思路
這里抓取的是中國天氣網(wǎng)的數(shù)據(jù),它是個靜態(tài)頁面,請求的URL是:
http://d1.weather.com.cn/sk_2d/101190101.html?_=1527152861827
兩個變量,一個是html前的一串?dāng)?shù)字,這個數(shù)字代表一個城市,因此我們需要城市名和數(shù)字編號的轉(zhuǎn)換關(guān)系。后面一串?dāng)?shù)字很明顯是當(dāng)前時間。
該請求會返回一個字典,每個key的命名還是很容易猜到其意義的,就不多說了。
代碼

weather-result.jpg
weather.py
import re
import Sqlite3api as sqlite3
import Configure as Configs
import requests
from random import choice
from bs4 import BeautifulSoup
import time
import ast
def import_data():
with open('cityWeather18-03-4.sql', 'r', encoding='utf-8') as file:
content = file.read()
pattern = re.compile('(\d+),\'(.*?)\',\'(\d+)\'',re.S)
data = pattern.findall(content)
print ("Retrieve {0:d} weather codes.".format(len(data)))
conn = sqlite3.sqlite3_init()
for d in data:
print (d)
sql = "INSERT INTO weather VALUES ({0:s},'{1:s}',{2:s})".format(d[0], d[1].strip(),d[2])
sqlite3.sqlite3_execute(conn, sql)
sqlite3.sqlite3_close(conn)
def get_citycode_by_cityname(cityname):
conn = sqlite3.sqlite3_init()
ret = sqlite3.sqlite3_execute(conn, "select cityname, citycode from weather WHERE cityname LIKE '%{0:s}%'".format(cityname))
sqlite3.sqlite3_close(conn)
return ret
def get_weather_by_citycode(citycode):
url = "http://d1.weather.com.cn/sk_2d/{0:d}.html?_={1:d}".format(citycode, int(time.time()*1000))
header = {}
header['user-agent'] = choice(Configs.FakeUserAgents)
header['Referer'] = "http://www.weather.com.cn/weather1d/101190101.shtml"
try:
response = requests.get(url, headers=header)
content = ''
if response.status_code == requests.codes.ok:
response.encoding = 'utf-8'
content = response.text
except Exception as e:
print (e)
pattern = re.compile('{(.*?)}', re.S)
data = pattern.findall(content)[0]
data = ast.literal_eval("{"+data+"}")
print ("城市:", data.get('cityname'))
print ("日期:", data.get('date'))
print ("時間:", data.get('time'))
print ("攝氏溫度:", data.get('temp'))
print ("華氏溫度:", data.get('tempf'))
print ("天氣:", data.get('weather'))
print ("濕度:", data.get('SD'))
print ("風(fēng)向:", data.get('WD'))
print ("風(fēng)級:", data.get('WS'))
print ("空氣質(zhì)量:", data.get('aqi'))
print ("空氣質(zhì)量PM2.5:", data.get('aqi_pm25'))
def main():
while True:
print ("-"*80)
cityname = input("請輸入要查詢的城市名: ")
ret = get_citycode_by_cityname(cityname)
if not ret:
print ("沒有找到該城市,請重新輸入。")
continue
if 1 == len(ret):
get_weather_by_citycode(int(ret[0][1]))
continue
cnt = 1
table = PrettyTable()
table.field_names = ["編號", "城市名字"]
table.sortby = "編號"
for r in ret:
table.add_row([cnt, r[0]])
cnt += 1
print (table)
code = int(input("找到不只一個城市,請輸入其編號: "))
get_weather_by_citycode(int(ret[code-1][1]))
if __name__ == '__main__':
#import_data()
main()