一個簡單的爬蟲代碼
利用Python爬取英雄聯(lián)盟的全部皮膚
import json
import os
import re
import pymongo
import requests
USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6)" \
" AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"
FEED_URL = "http://lol.qq.com/biz/hero/champion.js" # 在官網(wǎng)找到的js文件地址對應(yīng)到所有英雄與英雄ID
FILE_PATH = r"./lol/" # 將皮膚保存在lol文件夾內(nèi)
client = pymongo.MongoClient(host='127.0.0.1', port=27017) # 連接Mongodb
db = client.lol # 保存圖片鏈接的Mongo數(shù)據(jù)庫
# 拿到所有英雄的名稱以及ID
def get_heros():
resp = requests.get(FEED_URL, {'user_agent': USER_AGENT}).content
resp_js = resp.decode()
pattern = re.compile('"keys":(.*?),"data"')
hero_dict = json.loads(pattern.findall(resp_js)[0])
for key, value in hero_dict.items():
hero_file = FILE_PATH + value
try:
# 為每個英雄建立一個文件夾保存皮膚
os.mkdir(hero_file)
except Exception as e:
print(e)
return hero_dict
# 拿到對應(yīng)英雄的全部皮膚
def get_skins(hero_dict):
for hero_id, hero_name in hero_dict.items():
hero_skin_list = []
# 皮膚最多的英雄是安妮查了一下11個皮膚 預(yù)計(jì)暫時也不會有超過15個皮膚的
for skin_id in range(15):
try:
skin_url = "http://ossweb-img.qq.com/images/lol/web201310/skin/big%s0%02d.jpg" % (hero_id, skin_id)
skin = requests.get(skin_url, {'user_agent': USER_AGENT})
file_name = os.path.join(FILE_PATH, hero_name, '%d.jpg' % skin_id)
if skin.status_code == 200:
# 保存圖片
with open(file_name, 'wb') as f:
f.write(skin.content)
print('downloading:%s' % file_name)
f.close()
hero_skin_list.append(skin_url)
except Exception as e:
print(e)
# 保存到數(shù)據(jù)庫
db['skins'].insert_one({'hero_id': hero_id, 'hero_name': hero_name, 'url': hero_skin_list})
def main():
hero_dict = get_heros()
get_skins(hero_dict)
print('Finish....')
if __name__ == '__main__':
main()
效果展示:
lolskin.PNG
skin.PNG