爱爱视频香蕉网,欧美日韩婷婷在线

Urllib庫(kù)是Python用于操作Url的標(biāo)準(zhǔn)模塊，Python2.x時(shí)分為Urllib和Urllib2，Python3.x時(shí)合并到Urllib里面。這里把常見(jiàn)的變化列舉一下，便于查找修改。
官方文檔：https://docs.python.org/3.6/library/urllib.html

Python2.x	Python3.x
import urllib2	import urllib.request，urllib.error
import urllib	import urllib.request，urllib.error，urllib.parse
import urlparse	import urllib.parse
urllib2.urlopen	urllib.request.urlopen
urllib2.request	urllib.request.Request
urllib.quote	urllib.request.quote
urllib.urlencode	urllib.parse.urlencode
cookielib.CookieJar	http.CookieJar

簡(jiǎn)單讀取網(wǎng)頁(yè)信息：urllib需制定內(nèi)容的解碼方式，requests可自動(dòng)解碼。

import urllib.request  
f = urllib.request.urlopen('http://python.org/') 
html1 = f.read()   #urlopen返回的是bytes對(duì)象，此時(shí)調(diào)用read()方法得到的也是bytes對(duì)象。
html2 = f.read().decode('utf-8')    #要獲取字符串內(nèi)容，需要指定解碼方式。因此，更常用html2的方式。

#還可以寫(xiě)成以下方式：
import urllib.request
with urllib.request.urlopen('http://python.org') as f:
    html = f.read().decode('utf-8')
    print(f.status)
    print(html)

#html等價(jià)于requests庫(kù)的r.text:
import requests
r = requests.get('http://python.org') 
print(r.status_code)
print(r.text)        #調(diào)用r.text時(shí)，Requests對(duì)象會(huì)使用其推測(cè)的文本編碼自動(dòng)解碼。
print(r.encoding)    #查詢Requests對(duì)象使用的編碼方式。
r.encoding = 'utf-8'  #可直接通過(guò)賦值語(yǔ)句來(lái)改變Requests對(duì)象使用的編碼方式。

2.urllib對(duì)含中文的URL進(jìn)行手動(dòng)編碼

import urllib.request
a = urllib.request.quote("中文")
b = urllib.request.unquote(a)
print(a,b)

結(jié)果為：%E4%B8%AD%E6%96%87 中文

3.使用Request對(duì)象添加headers進(jìn)行請(qǐng)求

import urllib.request
hds = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'}
req = urllib.request.Request('http://python.org')
req.add_header('User-Agent','Mozilla/5.0')  ##注意參數(shù)是用“，”進(jìn)行分隔。
#req.add_header('User-Agent',hds['User-Agent'])  #另一種寫(xiě)法
with urllib.request.urlopen(req) as f:    ##urlopen可放入url或Request對(duì)象
    html = f.read().decode('utf-8')

#requests方法
import requests
hds = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'}
r = requests.get('http://python.org'，headers=hds)

4.超時(shí)設(shè)置

import urllib.request
#加上timeout參數(shù)即可
f = urllib.request.urlopen(req,timeout=1)
f = urllib.request.urlopen('http://python.org',timeout=1)

#完整用法（正常響應(yīng)1秒，若網(wǎng)站服務(wù)器性能不好時(shí)可適當(dāng)調(diào)高timeout值）
import urllib.request
for i in range(10):   #若超時(shí)，重復(fù)請(qǐng)求10次
    try:
        f = urllib.request.urlopen('http://python.org',timeout=1)
        print(f.read().decode('utf-8')[:100])
        break
    except Exception as e:
        print("出現(xiàn)異常: "+str(e))
        # print(type(e))

#requests庫(kù)類似
for i in range(10):   #若超時(shí)，重復(fù)請(qǐng)求10次
    try:
        r = requests.get('http://python.org',timeout=0.25)   #響應(yīng)比urllib.request快
        print(r.text[:100])
        break
    except Exception as e:
        print("第{}次請(qǐng)求出現(xiàn)異常:".format(str(i+1))+str(e))
        print(type(e))

5.下載HTML文件到本地
同理：圖片、MP3、視頻等文件格式也是用‘wb’形式下載。

#方法一：
import urllib.request

html = urllib.request.urlopen("http://www.baidu.com").read()
with open("1.html","wb") as f:     #使用b模式寫(xiě)入，此時(shí)傳入的html不需解碼
    f.write(html)


#方法二：最方便
#urlretrieve(url, filename=None, reporthook=None, data=None)  
#reporthook(可選)是回調(diào)函數(shù)，可以顯示下載進(jìn)度。
#data(可選)指post到服務(wù)器的數(shù)據(jù)。

import urllib.request
urllib.request.urlretrieve("http://www.baidu.com",filename="1.html")
#urllib.request.urlretrieve("http://www.baidu.com","1.html") 


#方法三：
import requests

r = requests.get("http://www.baidu.com")
with open("1.html",'wb') as f:
    f.write(r.content)

# 其他格式：
urllib.request.urlretrieve("XXX.jpg",filename="1.jpg")      #XXX表示服務(wù)器地址
urllib.request.urlretrieve("XXX.mp3",filename="1.mp3")
urllib.request.urlretrieve("XXX.rmvb",filename="1.rmvb")

6.get請(qǐng)求實(shí)例
get請(qǐng)求的url地址格式：http://網(wǎng)址？字段名1=內(nèi)容1&字段名2=內(nèi)容2
http://www.baidu.com/s?wd="python"&rqlang=cn # wd代表關(guān)鍵字, rqlang代表區(qū)域

import urllib.request

base_url = "http://www.baidu.com/s?wd="
keyword = "Python爬蟲(chóng)"
url = base_url + urllib.request.quote(keyword)
html = urllib.request.urlopen(url).read()
with open("1.html","wb") as f:
    f.write(html)

#requests庫(kù)
import requests

base_url = "http://www.baidu.com/s?wd="
keyword = "Python爬蟲(chóng)"
url = base_url + keyword     #requests模塊自動(dòng)解析含中文的url
r = requests.get(url)
#print(r.url)                #可查看解析后的url
with open("2.html","wb") as f:
    f.write(r.content)

7.使用代理：urllib.request.ProxyHandler

import urllib.request  
 
# 創(chuàng)建代理字典
proxy1={'sock5': 'localhost:1080'}
proxy2={'http': '183.51.191.203:9797'}
# 使用ProxyHandler方法生成處理器對(duì)象
proxy_handler = urllib.request.ProxyHandler(proxy1) 
# 創(chuàng)建代理IP的opener實(shí)例
opener = urllib.request.build_opener(proxy_handler)  
# 創(chuàng)建全局默認(rèn)的open對(duì)象，使用urlopen()時(shí)會(huì)自動(dòng)使用已經(jīng)安裝的opener對(duì)象
urllib.request.install_opener(opener) 
  
a = urllib.request.urlopen("http://www.baidu.com").read().decode("utf8")  
print(len(a))

8.開(kāi)啟Debuglog：urllib.request.HTTPHandler，urllib.request.HTTPSHandler

import urllib.request

http_handler = urllib.request.HTTPHandler(debuglevel=1)
https_handler = urllib.request.HTTPSHandler(debuglevel=1)
opener = urllib.request.build_opener(http_handler,https_handler)
urllib.request.install_opener(opener)
urllib.request.urlopen("https://www.baidu.com")

9.異常處理：URLError，子類HTTPError

觸發(fā)URLError的原因有以下四種可能：
①連接不上服務(wù)器
②遠(yuǎn)程URL不存在
③無(wú)網(wǎng)絡(luò)
④觸發(fā)HTTPError

#寫(xiě)法一：
import urllib.request
import urllib.error

try:
    # urllib.request.urlopen("http://www.google.com")       #對(duì)應(yīng)URLError
    urllib.request.urlopen("https://login.taobao.com/member")   #對(duì)應(yīng)HTTPError
except urllib.error.HTTPError as e:
    print(e.code,e.reason)
except urllib.error.URLError as e:
    print(e.reason)

#寫(xiě)法二：
import urllib.request
import urllib.error

try:
    #urllib.request.urlopen("http://www.google.com")
    urllib.request.urlopen("https://login.taobao.com/member")
except urllib.error.URLError as e:
    if hasattr(e,"code"):        #hasattr是自帶函數(shù)，詳見(jiàn)下方。
        print(e.code)
    if hasattr(e,"reason"):
        print(e.reason)

'''
hasattr(obj, name, /)
    Return whether the object has an attribute with the given name.
    
    This is done by calling getattr(obj, name) and catching AttributeError.
'''

HTTP狀態(tài)碼以及含義

狀態(tài)碼 (e.code)	英文(e.reason)	含義
200	OK	一切正常
301	Moved Permanently	重定向到新的URL，永久性
302	Found	重定向到新的URL，非永久性
304	Not Modified	請(qǐng)求的資源未更新
400	Bad Request	非法請(qǐng)求
401	Unauthorized	請(qǐng)求未經(jīng)授權(quán)
403	Forbidden	禁止訪問(wèn)
404	Not Found	沒(méi)有找到對(duì)應(yīng)頁(yè)面
500	Internal Server Error	服務(wù)器內(nèi)部錯(cuò)誤
501	Not Implemented	服務(wù)器不支持實(shí)現(xiàn)請(qǐng)求所需要的功能

10.post請(qǐng)求

import urllib.request
import urllib.parse

url = "https://www.douban.com/accounts/login"
params = {'source':'index_nav',
          'form_email':'XXXX',     #賬號(hào)
          'form_password':'XXXX'   #密碼
          }
postdata = urllib.parse.urlencode(params).encode('utf-8')  #對(duì)數(shù)據(jù)進(jìn)行編碼
req = urllib.request.Request(url,postdata)
html = urllib.request.urlopen(req).read()
with open('1.html','wb') as f:
    f.write(html)

#requests庫(kù)
import requests
url = "https://www.douban.com/accounts/login"
params = {'source':'index_nav',
          'form_email':'XXXX',     #賬號(hào)
          'form_password':'XXXX'   #密碼
          }
r = requests.post(url,params)
with open('1.html','wb') as f:
    f.write(r.content)

#注：
urlencode:對(duì)key-value的字典數(shù)據(jù)進(jìn)行編碼轉(zhuǎn)換，返回類似“a=XXX&b=XXX”的結(jié)果。
quote：對(duì)單個(gè)字符串進(jìn)行編碼轉(zhuǎn)換，返回編碼后的一串字符，多用于中文字符的編碼。

11.使用cookies

import urllib.request
import urllib.parse
import http.cookiejar
url = "https://www.douban.com/accounts/login"
params = {'source':'index_nav',
          'form_email':'XXXX',     #賬號(hào)
          'form_password':'XXXX'   #密碼
          }
postdata = urllib.parse.urlencode(params).encode('utf-8')  #對(duì)數(shù)據(jù)進(jìn)行編碼
req = request.Request(url, postdata, method="POST")  # 構(gòu)建Request對(duì)象

#創(chuàng)建CookieJar對(duì)象
cj = http.cookiejar.CookieJar()
pro = urllib.request.HTTPCookieProcessor(cj)
opener = urllib.request.build_opener(pro)
# 創(chuàng)建全局默認(rèn)的open對(duì)象，使用urlopen()時(shí)會(huì)自動(dòng)使用已經(jīng)安裝的opener對(duì)象
urllib.request.install_opener(opener)

html1 = urllib.request.urlopen(req).read()
with open('1.html', 'wb') as f:
    f.write(html1)


#requests庫(kù)
import requests
url = "https://www.douban.com/accounts/login"
headers = {
    'Cookie':'xxxxxxx'
}
r = requests.get(url,headers=headers)
print(r.text)

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九欧美,1769亚洲,黄色成人av

七.Python標(biāo)準(zhǔn)庫(kù)：Urllib庫(kù)

七.Python標(biāo)準(zhǔn)庫(kù)：Urllib庫(kù)

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九 欧美,1769亚洲,黄色成人av

七.Python標(biāo)準(zhǔn)庫(kù)：Urllib庫(kù)

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九欧美,1769亚洲,黄色成人av