#coding:utf-8
import urllib
from bs4 import BeautifulSoup
import urllib2
for pa in range(1,200):
url = "http://www.xxxxx.com/page/{}".format(pa)
html = urllib.urlopen(url).read()
soup = BeautifulSoup(html, "html.parser")
imglist = soup.find_all("img")
lenth = len(imglist)
for i in range(lenth):
Pictu = imglist[i].attrs['src']
req = urllib2.Request(Pictu)
response = None
try:
response = urllib2.urlopen(req, timeout=5)
print response.geturl()
print response.getcode()
except urllib2.URLError as e:
print e
if hasattr(e, 'code'):
print e.geturl()
print 'Error code:', e.code
print e.info
elif hasattr(e, 'reason'):
print 'Reason:', e.reason
except:
pass
finally:
if response:
response.close()
初次編寫此代碼,實現(xiàn)在網(wǎng)站爬取所有圖片(翻頁),并判斷其狀態(tài)值,若有錯誤,歡迎指正!