importpickle
frombs4importBeautifulSoup
importrequests
#把網(wǎng)頁(yè)html存到本地文件,測(cè)試用
defhtml_save(url):
filename =input('請(qǐng)輸入要保存文件名稱(chēng)')
f =open(filename,'wb')
html = requests.get(url).text
html_list = [url,html]
pickle.dump(html_list,f)
f.close()
#讀取本地html文件,測(cè)試用
defhtml_load(filename):
f =open(filename,'rb')
html_list = pickle.load(f)
f.close()
returnhtml_list
#解析html文件
defhtml_jx(html,all_list):
soup = BeautifulSoup(html,'lxml')
name_list = soup.select('a.t')
price_list = soup.select('b.pri')
all_list.append([name_list[i].string,price_list[i].string])
returnall_list
#找到下一頁(yè)連接
deffind_next(html):
soup = BeautifulSoup(html,'lxml')
next_link ='http://zz.58.com'+soup.select('a.next')[0].get('href')
returnnext_link
all_list = []
html = requests.get('http://zz.58.com/bijiben/0/?PGTID=0d100000-0015-624b-2e87-3e5214b563a9&ClickID=1').text
foreachinrange(12):
all_list = html_jx(html,all_list)
ifeach !=11:
url_ne = find_next(html)
html = requests.get(url_ne).text
#把信息列表保存為本地文件
f =open('lifile','wb')
pickle.dump(all_list,f)
f.close()