簡單腳本分享
簡單爬行頁面 3.x與2.x
????import urllib.request #3.x版本
????url='http://www.baidu.com/'?
????def getHtml(url):
????page=urllib.request.urlopen(url)?
????html=page.read().decode(encoding='utf-8',errors='strict')
????return html
????print(getHtml(url))
????import requests #2.x版本
????import string
????headers = {
????????'Connection': 'Keep-Alive',
????????'Accept': 'text/html, application/xhtml+xml, */*',
????????'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
????????'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36 LB'
????}
????url="https://www.qq.com"
????res=requests.get(url,headers)
????print(res.text)
post傳參,設(shè)置cookie,截取返回頁面固定長度 2.x?
????url='http://106.75.72.168:2222/index.php'
????headers = {
????????'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
????????'Accept-Encoding': 'gzip, deflate, compress',
????????'Accept-Language': 'en-us;q=0.5,en;q=0.3',
????????'Cache-Control': 'max-age=0',
????????'Connection': 'keep-alive',
????????'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100101 Firefox/22.0',
????????'cookie': 'Hm_lvt_9d483e9e48ba1faa0dfceaf6333de846=1542198011; role=Zjo1OiJucXp2YSI7'
????}
????payload={'filename':'1.php','data[]':'<?php phpinfo();?>'}
????r=requests.post(url,headers=headers,data=payload)
????url="http://106.75.72.168:2222"+r.content[82:128]
????r=requests.get(url)
????print r.content
字典制作 各版本
????with open('wordlist.txt','w+') as f:
????for i in range(0,10):
????????for j in range(0,10):
????????????for k in range(0,10):
????????????????for h in range(0,10):
????????????????????f.write('1391040'+str(i)+str(j)+str(k)+str(h)+'\n')
????f.close
保存本地到wordlist.txt文件里1391040xxxx生成后四位的字典。
python登陸網(wǎng)站 3.x
? ? from urllib import request#導(dǎo)入urllib模塊里的request
????from urllib import parse#parse模塊里的編碼?
????from urllib.request import urlopen
????values ={'zhanghao':'admin','mima':'admin'}
????data=parse.urlencode(values).encode('utf-8')#提交類型不能為str,需要為byte類型,parse.urlencode方法的作用是把dict格式的參數(shù)轉(zhuǎn)換為url參數(shù),并以utf-8編碼,可以拼接為HTTP請求。
????url='http://127.0.0.1/login.php'
????request=request.Request(url,data)
????response=urlopen(request)
????print(response.read().decode())#加入decode才能使網(wǎng)頁解碼成中文
整理 3.x版本
? ? import urllib.parse#urlencode
????import urllib.request#Request,urlopen
????'''
????response=urllib.request.urlopen("http://127.0.0.1")
????print(response.read().decode())
????'''
????#設(shè)置header和data
????'''
????url='http://127.0.0.1/login.php'
????user_agent='Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36'
????values={'zhanghao':'admin','mima':'admin'}
????headers={'User-Agent':user_agent}
????data=urllib.parse.urlencode(values).encode('utf-8')
????request=urllib.request.Request(url,data,headers)
????response=urllib.request.urlopen(request)
????page=response.read()
????print(page.decode())
????'''
????#設(shè)置代理 避免因?yàn)槟硞€IP的訪問次數(shù)過多導(dǎo)致的禁止訪問
????'''
????enable_proxy = True
????proxy_handler = urllib.request.ProxyHandler({"http":'http://some-proxy.com:8080'})
????null_proxy_handler = urllib.request.ProxyHandler({})
????if enable_proxy:
????opener = urllib.request.build_opener(proxy_handler)
????else:
????opener = urllib.request.build_opener(null_proxy_handler)
????urllib.request.install_opener(opener)
????'''
????#設(shè)置timeout?
????# urlopen與Request 區(qū)別https://blog.csdn.net/tao3741/article/details/75207879
????'''
????response=urllib.request.urlopen('http://127.0.0.1',timeout=10)
????print(response.read().decode())
????'''
????#post put 等提交方式
????'''
????request=urllib.request.Request(url,data,headers)#post 直接寫在data里
????request=urllib.request.Request('http://127.0.0.1?a=1')#get直接寫在url里
????request = urllib.request.Request(url, data=data)#put和delete
????request.get_method = lambda:'PUT' #or 'DELETE'#put和delete
????'''
????#使用DebugLog 把收發(fā)包的內(nèi)容在屏幕上打印出來
????'''
????httpHandler = urllib.request.HTTPHandler(debuglevel=1)
????httpsHandler = urllib.request.HTTPSHandler(debuglevel=1)
????opener = urllib.request.build_opener(httpHandler, httpsHandler)
????urllib.request.install_opener(opener)
????response = urllib.request.urlopen('http://127.0.0.1', timeout = 5)
????'''
????#URLError異常屬性判斷
????'''
????request=urllib.request.Request('http://127.0.0.999')
????try:????
????urllib.request.urlopen(request)
????except urllib.error.URLError as e:
????????if hasattr(e, "code"):?????#hasattr 判斷變量是否有某個屬性
????????????print(e.code)
????????if hasattr(e, "reason"):????????
????????????print(e.reason)?
????else:
????????print("OK")
????'''
參考:(https://www.cnblogs.com/dplearning/p/4854746.html)