愛奇藝vip視頻爬蟲下載

 # -*- coding: UTF-8 -*-

import configparser
import time
from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED
import requests


class abuyun():
    config = configparser.ConfigParser()
    config.read("./config.ini")
    conf_abuyun = config["abuyun"]
    proxyMeta = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {
        "host": conf_abuyun["proxyHost"],
        "port": conf_abuyun["proxyPort"],
        "user": conf_abuyun["proxyUser"],
        "pass": conf_abuyun["proxyPass"],
    }

    proxies = {
        "http": proxyMeta,
        "https": proxyMeta,
    }


def run(id):
    header = {
        "Origin": "https://jx.km58.top",
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36",
        "Referer": "https://jx.km58.top/58/?url=https://www.iqiyi.com/v_19rra0h3wg.html",
    }

    id = str(id)
    if len(id) == 1:
        id = "00" + id
    elif len(id) == 2:
        id = "0" + id
    url = "https://cdn.ziyuan605.com/20190101/gapN0uwa/800kb/hls/SjDqfU3641{}.ts".format(id)
    response = requests.get(url, headers=header, verify=False)
    print(response.status_code)
    save_path = "./tss1/" + id + ".ts"  # 當(dāng)前工作目錄下的文件路徑
    with open(save_path, 'wb') as code:
        code.write(response.content)
    print(id + ".ts [Download successful!]")


def main(thread_num=5):
    # 統(tǒng)計(jì)該爬蟲的消耗時間
    print('*' * 50)
    start = time.time()

    # 利用并發(fā)下載
    executor = ThreadPoolExecutor(max_workers=thread_num)  # 可以自己調(diào)整max_workers,即線程的個數(shù)
    # submit()的參數(shù): 第一個為函數(shù), 之后為該函數(shù)的傳入?yún)?shù),允許有多個
    # ids = ["e27a4e2d-f7dc-442a-937e-6b10691275e5", "520ae730-20d2-11e6-bcc9-000c29ffef9b",
    #        "c01daf43-3c4d-11e7-866e-000c29ffef9b", "1faa27d9-f6f7-11e8-a44b-000c29ffef9b"]

    future_tasks = [executor.submit(run, id) for id in range(2135)]

    # 等待所有的線程完成,才進(jìn)入后續(xù)的執(zhí)行
    wait(future_tasks, return_when=ALL_COMPLETED)

    end = time.time()
    print("[All works are done.]")
    print('使用多線程,總共耗時:%s' % (end - start))
    print('*' * 50)


if __name__ == '__main__':
    main()
最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

友情鏈接更多精彩內(nèi)容