Python爬蟲:獲取扇貝打卡信息

程序運行截圖
# -*- coding: utf-8 -*-
"""
Created on Tue May 14  2019

@author: YangYang
"""

# 針對扇貝打卡數據統(tǒng)計和分析
# 只統(tǒng)計 單詞、煉句、聽力、閱讀 四部分的數據,閱讀不區(qū)分短語和文章

from urllib.request import urlopen
import datetime
import re

# 計算打卡的統(tǒng)計時間
now = datetime.datetime.now()     # 輸入查卡日期,默認是今天
# now = datetime.date(2019,5,13)      # 輸入查卡日期,自定義
time2 = datetime.timedelta(days=8)  # 統(tǒng)計一個星期的數據
day_now = str(now).split(" ")[0]
day_end = now - time2
day_end = str(day_end).split(" ")[0]

ID_total = input("請輸入你的扇貝ID:")
print('\n')
#ID_total = "16888030"
ID_total = ID_total.split(",")   # 如果輸入多個ID,用“,”分開
for ID in ID_total:
    web = "https://www.shanbay.com/api/v1/checkin/user/"+str(ID)+"/"
    shanbay = urlopen(web)    
    shanbay_data = shanbay.read().decode()
    
    # 獲取昵稱
    find_username = re.findall("username\".*?,",shanbay_data)[0]
    username = str(find_username)[len("username\": \""):-2]
    
    # 獲取打卡數據
    find_data = re.findall("\"stats\".*?track_object_img" ,shanbay_data)
    find_start = "\"stats\": "
    find_end = "\"track_object_img\""

    num_today = "\"num_today\": "
    used_time = "\"used_time\": "

    # 初始化各項統(tǒng)計數據
    count = 0
    time_read = 0
    time_listen = 0
    time_bdc = 0
    time_sentence = 0
    count_read = 0
    count_listen = 0
    count_bdc = 0
    count_sentence = 0

    # 獲取打卡天數
    checkin_time = []
    num_checkin_days = []
    find_checkin = re.findall("\"checkin_time\".*?\"share_urls\"",shanbay_data) 
    for checkin in find_checkin:
        shanbey_time = checkin.split(",")[0]
        shanbey_days = checkin.split(",")[3]
        checkin_time.append(str(shanbey_time)[len("\"checkin_time\": \""):len("\"checkin_time\": \"")+10])
        num_checkin_days.append(str(shanbey_days)[len("\"num_checkin_days\": "):])
    
    print("上周打卡情況:")
    
    for data in find_data:       
        read = re.findall("\"read\":.*?}",data)
        if read == []:
           read = "{num_today\": 0, \"used_time\": 0.0}"
        
        listen = re.findall("\"listen\":.*?}",data)
        if listen == []:
            listen = "{num_today\": 0, \"used_time\": 0.0}"
        
        sentence = re.findall("\"sentence\":.*?}",data)
        if sentence == []:
            sentence = "{num_today\": 0, \"used_time\": 0.0}"
        
        bdc = re.findall("\"bdc\":.*?}",data)
        if bdc == []:
            bdc = "{num_today\": 0, \"used_time\": 0.0}"    
    
        read_num = re.findall(r"\d+\.?\d*",str(read))[0]
        read_time = re.findall(r"\d+\.?\d*",str(read))[1]
    
        listen_num = re.findall(r"\d+\.?\d*",str(listen))[0]
        listen_time = re.findall(r"\d+\.?\d*",str(listen))[1]
    
        bdc_num = re.findall(r"\d+\.?\d*",str(bdc))[0]
        bdc_time = re.findall(r"\d+\.?\d*",str(bdc))[1]
    
        sentence_num = re.findall(r"\d+\.?\d*",str(sentence))[0]
        sentence_time = re.findall(r"\d+\.?\d*",str(sentence))[1]     
        
        
        if checkin_time[count] >= day_now:
            count += 1
        elif checkin_time[count] > day_end:            
            # 統(tǒng)計總時間和各項時間
            time_total = float(read_time)+float(listen_time)+float(bdc_time)+float(sentence_time);
            time_read = time_read+float(read_time);
            time_listen = time_listen+float(listen_time);
            time_bdc = time_bdc+float(bdc_time);
            time_sentence = time_sentence+float(sentence_time);  
            
            # 統(tǒng)計各項數目累計
            count_read = count_read+float(read_num)
            count_listen = count_listen+float(listen_num)
            count_bdc = count_bdc+float(bdc_num)
            count_sentence = count_sentence+float(sentence_num)
            
            # 輸出一周每日打卡情況
            print("{},打卡{}天:閱讀{}篇,聽力{}句,單詞{}個,煉句{}句,學習時間{}分鐘".format(checkin_time[count],num_checkin_days[count],read_num,listen_num,bdc_num,sentence_num,time_total))
            count += 1
        else:
            break        

print('\n')    
print("單詞:{}分鐘,總計{}個".format(time_bdc,count_bdc))
print("閱讀:{}分鐘,總計{}篇".format(time_read,count_read))
print("煉句:{}分鐘,總計{}句".format(time_sentence,count_sentence))
print("聽力:{}分鐘,總計{}句".format(time_listen,count_listen))
print('\n') 
print("打卡時長:{}分鐘".format(time_read+time_sentence+time_bdc+time_listen))

input("Please <Enter>")

?著作權歸作者所有,轉載或內容合作請聯(lián)系作者
【社區(qū)內容提示】社區(qū)部分內容疑似由AI輔助生成,瀏覽時請結合常識與多方信息審慎甄別。
平臺聲明:文章內容(如有圖片或視頻亦包括在內)由作者上傳并發(fā)布,文章內容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務。

相關閱讀更多精彩內容

友情鏈接更多精彩內容