女朋友公司經(jīng)常需要做各種植物名錄幾十百種不重復好麻煩啊,試試用selenium+xpath簡單利用chrome通過植物名稱列表循環(huán)查找爬取植物通數(shù)據(jù)庫(http://1.zhiwutong.com/)植物名錄(科,屬,種,拉丁,中文)
輸入列表

圖片.png
#植物名錄自動循環(huán)抓取
from selenium import webdriver
from collections import OrderedDict
import pandas as pd
chromedriver = r"C:\Users\Administrator\AppData\Local\Google\Chrome\Application\chromedriver"#啟動瀏覽器
driver = webdriver.Chrome(chromedriver)#內(nèi)核
url = "http://1.zhiwutong.com/"#訪問植物通網(wǎng)站
driver.get(url)#獲取網(wǎng)頁
name = input("name:")#名錄名稱文件名字輸入
def file(name):#名錄名稱文件輸入函數(shù)
file = open(name,'r')
read = file.readlines()
return read
def catch():#抓取與格式處理函數(shù)
cat = driver.find_elements_by_xpath('//*[@id="table21"]/tbody/tr[2]/td[@align="center"]')
ac = []
for i in cat:
al = i.text
ac.append(al)
return ac
name = file(name)
plant = OrderedDict()
for i in name:#主工作代碼
element = driver.find_element_by_xpath('//*[@id="form2"]/input[1]')
element.send_keys(i)#輸入字段
click = driver.find_element_by_xpath('//*[@id="form2"]/input[2]')
click.click()
ac = catch()
i = i.strip()
plant[i] = ac
driver.get(url)#每次抓取后重定向回主頁面
last1 = []#輸出到csv
for i in plant.keys():
last = [i,plant[i]]
last1.append(last)
#print(last1)
column = ['name','belong']
real = pd.DataFrame(columns = column,data = last1)
real.to_csv('real.csv',encoding="utf_8_sig")#你輸出csv的命名
輸出CSV(real.csv):

圖片.png
然后用python或者excel調(diào)調(diào)格式吧都很簡單啦,記得調(diào)用chrome前需要配置chromedriver教程鏈接:https://www.cnblogs.com/x_wukong/p/9103099.html