抓取電話號碼的例子

from lxml import etree
import requests
import os

root_url='http://www.51hao.cc/'
req=requests.get(root_url)
req.encoding='gb2312'
html=req.text
selector=etree.HTML(html)
infos=selector.xpath('//div[@class="fkce"]/div[@class="fkt"][position()>1]')
print(len(infos))
for info in infos:
    province=info.xpath('div[@class="fkbj"]/p/a/text()')[0]
    citys=info.xpath('div[@class="fklk"]/p/a/text()')
    citys_url=info.xpath('div[@class="fklk"]/p/a/@href')
    city_infos=zip(citys,citys_url)#映射函數(shù)
    for city_info in city_infos:
        city_name=city_info[0]
        city_url=city_info[1]
        print(province,city_name,city_url)

        req2 = requests.get(city_url)
        req2.encoding = 'gb2312'
        html2 = req2.text
        selector2 = etree.HTML(html2)
        infos2 = selector2.xpath('//div[@class="all"]//div[@class="num_bg"]')  # 運營商
        num = len(infos2)
        for i in range(num):
            first_3 = selector2.xpath('//div[@class="all"]/div[%s]//span[@class="nums"]/text()' % str(i + 2))[0]# 參數(shù)化xpath表達式
            types = selector2.xpath('//div[@class="all"]/div[%s]/div[1]/text()' % str(i + 2))[0]
            types1 = types.split('(')[0]
            total = types.split('(')[1].split(')')[0].replace('共', '').replace('個', '')
            mobiles = selector2.xpath('//div[@class="all"]//li[%s]/a/text()' % str(i + 1))
            for mobile in mobiles:
                print(province,city_name,first_3, types1, total, mobile)

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容