Python爬蟲櫻花動(dòng)漫多線程下載附源碼(超詳細(xì)適合新手練習(xí))

前言

別瞅了!看完你肯定行


一、打開動(dòng)漫詳細(xì)頁面

image.png

二、查看網(wǎng)頁源碼


查看網(wǎng)頁源碼搜索關(guān)詞能夠找到相關(guān)內(nèi)容,我們可以看見詳情頁地址并不完整,所以我們需要出拼接出完整url

def url_parse():
    new_url = input("請(qǐng)粘貼你想下載的動(dòng)漫鏈接")
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36'}
    response = requests.get(url=new_url, headers=headers).text
    tree = etree.HTML(response)
    li_list = tree.xpath("http://div[@id='play_0']/ul/li")
    dic ={}
    for li in li_list:
        url = "http://www.imomoe.ai" + li.xpath("a/@href")[0]
        name = li.xpath("a/@title")[0]
        dic[name]=url
    return dic

三、進(jìn)入詳情頁查看

打開開發(fā)者工具(F12)點(diǎn)擊視頻我們可以看到video標(biāo)簽中src屬性為視頻地址


查看網(wǎng)頁源碼發(fā)現(xiàn)并沒有找到我們想要video標(biāo)簽中的屬性,我們用selenum獲取網(wǎng)頁源碼

def data(url):
    bro = webdriver.Chrome(executable_path="./chromedriver")
    r = bro.get(url)
    page_data = bro.page_source
    bro.quit()
    tree1 = etree.HTML(page_data)
    video= tree1.xpath('//div[@class="player"]/iframe/@src')[0]
    name = "第"+video.split(".")[-2].split("-")[-1]+"集.mp4"
    x = threading.Thread(target=videos, args=(name, video,))
    x.start()
    print(name + "解析成功")

四、線程創(chuàng)建

通過selenum提取url需要依次提取,這簡(jiǎn)直太費(fèi)時(shí)間了,所以我們創(chuàng)建線程進(jìn)行同步提取來節(jié)省時(shí)間

def data_parse(dic):
    tasks=[]
    urls=list(dic.values())
    for url in urls:
        x=threading.Thread(target=data,args=(url,))
        tasks.append(x)
    for task in tasks:
        task.start()
        sleep(1)

通過selenum提取的url跟我們剛才所看到視頻url并不相同,不過跳轉(zhuǎn)到我們selenum抓取的url在重復(fù)抓取一下就能找到視頻url

def videos(name,url):
    bro= webdriver.Chrome(executable_path="./chromedriver.exe")
    bro.get(url)
    mp4_data = bro.page_source
    bro.quit()
    tree2 = etree.HTML(mp4_data)
    video= tree2.xpath("http://*[@id='a1']/div[2]/video/@src")[0]
    x = threading.Thread(target=download, args=(name, video,))
    x.start()
    print(name+"正在下載")
    # print(video)

五、視頻下載

def download(name,url):
    headers={"User-Agent":UserAgent().random}
    response=requests.get(url=url,headers=headers).content
    with open(name,"wb")as fp:
        fp.write(response)
        print(name+"下載完成")

代碼

import requests
import threading
from fake_useragent import UserAgent
from selenium import webdriver
from  lxml import etree
from time import sleep
# from selenium.webdriver.chrome.options import Options
# chrome_options = Options()
# chrome_options.add_argument('--headless')
# chrome_options.add_argument('--disable-gpu')

def url_parse():
    new_url = input("請(qǐng)粘貼你想下載的動(dòng)漫鏈接")
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36'}
    response = requests.get(url=new_url, headers=headers).text
    tree = etree.HTML(response)
    li_list = tree.xpath("http://div[@id='play_0']/ul/li")
    dic ={}
    for li in li_list:
        url = "http://www.imomoe.ai" + li.xpath("a/@href")[0]
        name = li.xpath("a/@title")[0]
        dic[name]=url
    return dic

def data_parse(dic):
    tasks=[]
    urls=list(dic.values())
    for url in urls:
        x=threading.Thread(target=data,args=(url,))
        tasks.append(x)
    for task in tasks:
        task.start()
        sleep(1)



def data(url):
    bro = webdriver.Chrome(executable_path="./chromedriver")
    r = bro.get(url)
    page_data = bro.page_source
    bro.quit()
    tree1 = etree.HTML(page_data)
    video= tree1.xpath('//div[@class="player"]/iframe/@src')[0]
    name = "第"+video.split(".")[-2].split("-")[-1]+"集.mp4"
    x = threading.Thread(target=videos, args=(name, video,))
    x.start()
    print(name + "解析成功")
    # print(video)




def videos(name,url):
    bro= webdriver.Chrome(executable_path="./chromedriver.exe")
    bro.get(url)
    mp4_data = bro.page_source
    bro.quit()
    tree2 = etree.HTML(mp4_data)
    video= tree2.xpath("http://*[@id='a1']/div[2]/video/@src")[0]
    x = threading.Thread(target=download, args=(name, video,))
    x.start()
    print(name+"正在下載")
    # print(video)


def download(name,url):
    headers={"User-Agent":UserAgent().random}
    response=requests.get(url=url,headers=headers).content
    with open(name,"wb")as fp:
        fp.write(response)
        print(name+"下載完成")

def main():
    dic = url_parse()
    data_parse(dic)


if __name__ == '__main__':
    main()

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容