#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import logging,os,shutil,requests
from lxmlimport etree
logging.basicConfig(level=logging.INFO)
url="https://www.baidu.com"
headers={}
headers["User-Agent"]="Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36"
res=requests.get(url,headers=headers)
try:
if res.status_code==200:
selector=etree.HTML(res.text)#使用etree.HTML處理源代碼
? ? ? ? result=etree.tostring(selector)#使用tostring 方法,可以看到剛才建立的 xml文件全貌
? ? ? ? logging.info(result)#輸出網(wǎng)頁(yè)內(nèi)容
#使用Xpath提取內(nèi)容
? ? ? ? img_urls=selector.xpath("http://*[@id='lg']/img[1]/@src")
print(img_urls)
if img_urls:
img_url=img_urls[0]
img_url=img_urlif img_url.startswith("http")else "http:"+img_url
logging.info(img_url)
#先刪除本地在下載
? ? ? ? ? ? filename="logo1.png"
? ? ? ? ? ? if os.path.isfile(filename):
os.remove(filename)
#獲取圖片數(shù)據(jù)流
? ? ? ? ? ? res=requests.get(img_url,stream=True)
with open(filename,"wb")as out_file:
shutil.copyfileobj(res.raw,out_file)
else:
logging.info("查找元素失敗")
else:
print("網(wǎng)頁(yè)異常")
except ConnectionError:
print("連接異常")