python的web基礎(chǔ)應(yīng)用

簡單腳本分享

簡單爬行頁面 3.x與2.x

????import urllib.request #3.x版本

????url='http://www.baidu.com/'?

????def getHtml(url):

????page=urllib.request.urlopen(url)?

????html=page.read().decode(encoding='utf-8',errors='strict')

????return html

????print(getHtml(url))

????import requests #2.x版本

????import string

????headers = {

????????'Connection': 'Keep-Alive',

????????'Accept': 'text/html, application/xhtml+xml, */*',

????????'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',

????????'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36 LB'

????}

????url="https://www.qq.com"

????res=requests.get(url,headers)

????print(res.text)

post傳參,設(shè)置cookie,截取返回頁面固定長度 2.x?

????url='http://106.75.72.168:2222/index.php'

????headers = {

????????'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',

????????'Accept-Encoding': 'gzip, deflate, compress',

????????'Accept-Language': 'en-us;q=0.5,en;q=0.3',

????????'Cache-Control': 'max-age=0',

????????'Connection': 'keep-alive',

????????'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100101 Firefox/22.0',

????????'cookie': 'Hm_lvt_9d483e9e48ba1faa0dfceaf6333de846=1542198011; role=Zjo1OiJucXp2YSI7'

????}

????payload={'filename':'1.php','data[]':'<?php phpinfo();?>'}

????r=requests.post(url,headers=headers,data=payload)

????url="http://106.75.72.168:2222"+r.content[82:128]

????r=requests.get(url)

????print r.content

字典制作 各版本


????with open('wordlist.txt','w+') as f:

????for i in range(0,10):

????????for j in range(0,10):

????????????for k in range(0,10):

????????????????for h in range(0,10):

????????????????????f.write('1391040'+str(i)+str(j)+str(k)+str(h)+'\n')

????f.close

保存本地到wordlist.txt文件里1391040xxxx生成后四位的字典。

python登陸網(wǎng)站 3.x

? ? from urllib import request#導(dǎo)入urllib模塊里的request

????from urllib import parse#parse模塊里的編碼?

????from urllib.request import urlopen

????values ={'zhanghao':'admin','mima':'admin'}

????data=parse.urlencode(values).encode('utf-8')#提交類型不能為str,需要為byte類型,parse.urlencode方法的作用是把dict格式的參數(shù)轉(zhuǎn)換為url參數(shù),并以utf-8編碼,可以拼接為HTTP請求。

????url='http://127.0.0.1/login.php'

????request=request.Request(url,data)

????response=urlopen(request)

????print(response.read().decode())#加入decode才能使網(wǎng)頁解碼成中文

整理 3.x版本

? ? import urllib.parse#urlencode

????import urllib.request#Request,urlopen

????'''

????response=urllib.request.urlopen("http://127.0.0.1")

????print(response.read().decode())

????'''

????#設(shè)置header和data

????'''

????url='http://127.0.0.1/login.php'

????user_agent='Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36'

????values={'zhanghao':'admin','mima':'admin'}

????headers={'User-Agent':user_agent}

????data=urllib.parse.urlencode(values).encode('utf-8')

????request=urllib.request.Request(url,data,headers)

????response=urllib.request.urlopen(request)

????page=response.read()

????print(page.decode())

????'''

????#設(shè)置代理 避免因?yàn)槟硞€IP的訪問次數(shù)過多導(dǎo)致的禁止訪問

????'''

????enable_proxy = True

????proxy_handler = urllib.request.ProxyHandler({"http":'http://some-proxy.com:8080'})

????null_proxy_handler = urllib.request.ProxyHandler({})

????if enable_proxy:

????opener = urllib.request.build_opener(proxy_handler)

????else:

????opener = urllib.request.build_opener(null_proxy_handler)

????urllib.request.install_opener(opener)

????'''

????#設(shè)置timeout?

????# urlopen與Request 區(qū)別https://blog.csdn.net/tao3741/article/details/75207879

????'''

????response=urllib.request.urlopen('http://127.0.0.1',timeout=10)

????print(response.read().decode())

????'''

????#post put 等提交方式

????'''

????request=urllib.request.Request(url,data,headers)#post 直接寫在data里

????request=urllib.request.Request('http://127.0.0.1?a=1')#get直接寫在url里

????request = urllib.request.Request(url, data=data)#put和delete

????request.get_method = lambda:'PUT' #or 'DELETE'#put和delete

????'''

????#使用DebugLog 把收發(fā)包的內(nèi)容在屏幕上打印出來

????'''

????httpHandler = urllib.request.HTTPHandler(debuglevel=1)

????httpsHandler = urllib.request.HTTPSHandler(debuglevel=1)

????opener = urllib.request.build_opener(httpHandler, httpsHandler)

????urllib.request.install_opener(opener)

????response = urllib.request.urlopen('http://127.0.0.1', timeout = 5)

????'''

????#URLError異常屬性判斷

????'''

????request=urllib.request.Request('http://127.0.0.999')

????try:????

????urllib.request.urlopen(request)

????except urllib.error.URLError as e:

????????if hasattr(e, "code"):?????#hasattr 判斷變量是否有某個屬性

????????????print(e.code)

????????if hasattr(e, "reason"):????????

????????????print(e.reason)?

????else:

????????print("OK")

????'''

參考:(https://www.cnblogs.com/dplearning/p/4854746.html)

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容