1.拿到第一個(gè)用戶名、鏈接以及include參數(shù)(user_url)
start_user = 'gui-mu-zhi'
user_url = 'https://www.zhihu.com/api/v4/members/{user}?include={include}'
user_query = 'allow_message,is_followed,is_following,is_org,is_blocking,employments,answer_count,follower_count,articles_count,gender,badge[?(type=best_answerer)].topics'
2.拿到粉絲鏈接以及include參數(shù)
follows_url = 'https://www.zhihu.com/api/v4/members/{user}/followers?include={include}&offset={offset}t&limit={limit}'
follows_query = 'data[*].answer_count,articles_count,gender,follower_count,is_followed,is_following,badge[?(type=best_answerer)].topics'
3.修改初始請求,返回用戶和粉絲的請求,通過format方法將參數(shù)傳遞進(jìn)去,分別回調(diào)解析用戶函數(shù)和解析粉絲函數(shù)
def start_requests(self):
yield Request(self.user_url.format(user=self.start_user, include=self.user_query), self.parse_user)
yield Request(self.follows_url.format(user=self.start_user, include=self.follows_query, offset=0, limit=20), self.parse_follow)
4.解析用戶,獲取內(nèi)容
def parse_user(self, response):
# 將拿到的數(shù)據(jù)轉(zhuǎn)成json格式
result = json.loads(response.text)
# 將Item實(shí)例化對象
item = ZhihuItem()
# 遍歷item里面的fields
for filed in item.fields:
# 判斷filed是否在返回結(jié)果的keys中
if filed in result.keys():
# 將返回結(jié)果傳入item當(dāng)中
item[filed] = result.get(filed)
yield item
# 返回請求粉絲鏈接將剛拿到的粉絲用戶里面的url_token還有include傳進(jìn)去 回調(diào)到解析粉絲函數(shù)當(dāng)中
yield Request(self.follows_url.format(user=result.get('url_token'), include=self.follows_query, offset=0, limit=20), self.parse_follow)
5.# 解析粉絲列表
def parse_follow(self, response):
# 將返回結(jié)果轉(zhuǎn)成json格式
results = json.loads(response.text)
# 判斷'data'是否在在返回結(jié)果的keys里面
if 'data' in results.keys():
# 如果在 遍歷data數(shù)據(jù)
for result in results.get('data'):
# 返回請求用戶函數(shù)當(dāng)中 再解析存儲數(shù)據(jù)
yield Request(self.user_url.format(user=result.get('url_token'), include=self.user_query), callback=self.parse_user)
# 判斷paging是否在keys中并且當(dāng)中的is_end參數(shù)值是否為False(判斷是否是最后一頁)
if 'paging' in results.keys() and results.get('paging').get('is_end') == False:
# 拿到下一頁鏈接
next_page = results.get('paging').get('next')
# 返回請求下一頁 回調(diào)繼續(xù)解析粉絲
yield Request(
next_page,
self.parse_follow,
)
注:使用的是scrapy框架