【Python】asyncio協(xié)程異步批量下載視頻

#!/usr/bin/env python3
# coding: utf-8

import os, time
import pathlib
import aiohttp
import asyncio
from urllib import parse

def to_video_name(url):
    '''
    根據(jù)URL創(chuàng)建存儲目錄并生成視頻文件名
    '''
    to_string = parse.urlparse(url)
    # 存儲目錄,使用絕對路徑,例如/root/video
    video_dir = os.getcwd() + os.path.dirname(to_string.path)
    # 創(chuàng)建目錄,目錄不存在則創(chuàng)建,即使目錄已存在也不會拋出異常
    pathlib.Path(video_dir).mkdir(parents=True, exist_ok=True)
    # 文件名,使用絕對路徑,例如/root/video/to1.mp4
    video_name = video_dir + "/" + os.path.basename(to_string.path)
    return video_name
    
async def fetch(session, url):
    r = None
    ct = 0
    
    # 請求頭部
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.1.4753.73 Safari/527.56 Edg/102.1.4753.73",
        "Accept-Encoding": "identity"
    }
    # 請求URL,例如http://www.example.com/video/to1.mp4
    url = url
    # 失敗重試次數(shù)
    retry = 5
    
    # 傳入完整的URL,然后根據(jù)URL生成視頻文件名
    video_name = to_video_name(url)

    while ct < retry:
        try:
            async with session.get(url=url, headers=headers) as r:
                if r.status == 200:
                    #start_time = time.time()
                    with open(video_name, 'wb') as fp:
                        async for chunk in r.content.iter_chunked(4194304):
                            fp.write(chunk)
                    #end_time = time.time()
                    #print(end_time-start_time)
                    return r
        except Exception as e:
            pass
        ct += 1
    return r
            
async def main(url_list):
    # url_list是URL列表
    start_time = time.time()
    timeout = aiohttp.ClientTimeout(connect=5)
    conn = aiohttp.TCPConnector(ssl=False, limit=10)
    async with aiohttp.ClientSession(timeout=timeout, connector=conn, raise_for_status=True) as session:
        tasks = [asyncio.create_task(fetch(session, url)) for url in url_list]
        results = await asyncio.gather(*tasks)
        [print(result.status, result.url) for result in results]
    end_time = time.time()
    print("Total time:", end_time-start_time)

if __name__ == "__main__":
    import sys
    url_file = sys.argv[1]
    with open(url_file, "r") as fp:
        url_list = [url.strip("\n") for url in fp.readlines()]
    asyncio.run(main(url_list))
root@ubuntu:/tmp/py# cat url_list.txt 
http://192.168.3.120/video/01a.mkv
http://192.168.3.120/video/02a.mkv
http://192.168.3.120/video/03a.mkv
root@ubuntu:/tmp/py# 
root@ubuntu:/tmp/py# ./down2video.py url_list.txt 
200 http://192.168.3.120/video/01a.mkv
200 http://192.168.3.120/video/02a.mkv
200 http://192.168.3.120/video/03a.mkv
Total time: 0.17106056213378906
root@ubuntu:/tmp/py# 
root@ubuntu:/tmp/py# tree 
.
├── down2video.py
├── url_list.txt
└── video
    ├── 01a.mkv
    ├── 02a.mkv
    └── 03a.mkv

1 directory, 5 files
root@ubuntu:/tmp/py# 
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容