python3版本
import requests
import re
import json
import time
import csv
from bs4 import BeautifulSoup
def get_url():
a=[]
d = 0
start = time.clock()
n = 0
pages=[0,60,120,240,300]
with open(r'C:\Users\MY\Desktop\鍵盤數據.csv','w',newline='',encoding='utf-8') as file_1:
filednames =['商品名','價格','店鋪','月銷量']
writer = csv.writer(file_1)
writer.writerow(filednames)
for page in pages:
html = 'https://list.tmall.com/search_product.htm?spm=a220m.1000858.0.0.leSoie&cat=50024406&s='+str(page)+'&q=%BC%FC%C5%CC&sort=d&style=g&industryCatId=50024406&type=pc#J_Filter'
r = requests.get(html)
soup = BeautifulSoup(r.text,'lxml')
shop_names = soup.select('a.productShop-name')#店名
shop_prices = soup.select('p.productPrice em')#商品價格
#商品名稱
shop_titles = soup.select('div.productTitle a[target=_blank]')#選擇 target="_blank" 的所有元素。
shop_status = soup.select('p.productStatus')#商品月銷售
for title,price,name,status in zip (shop_titles,shop_prices,shop_names,shop_status):
time.sleep(1)
#data = {
# '商品名':title.get("title"),
# '價格':price.get_text(),
# '店鋪':name.get_text().strip(),
# '月銷量':status.get_text().strip()
# }
data = [title.get("title"),price.get_text(),name.get_text().strip(),status.get_text().strip()]
print(data)
n +=1
print('正在寫入第%d條數據'%n)
#writer.writeheader()#寫入表頭
writer.writerow(data)#一行一行寫入
print('第%d條數據已寫入'%n)
print('關閉寫入')
file_1.close()
end = time.clock()
print ("本次抓取耗時: %f s" % (end - start))
get_url()