Python爬蟲:從TXT導入數據

?
程序運行結果

?

# -*- coding: utf-8 -*-
"""
Created on Tue May 14 2019

@author: YangYang
"""

from urllib.request import urlopen
import datetime
import re
import xlwt

# 定義保存Excel的位置
workbook = xlwt.Workbook()  #定義workbook
sheet = workbook.add_sheet('單詞群打卡')  #添加sheet
head = ['扇貝ID', '扇貝用戶名', '單詞總計', '平均', '學習時間']    #表頭
for h in range(len(head)):
    sheet.write(0, h, head[h])    #把表頭寫到Excel里面去

#計算打卡的統(tǒng)計時間
now = datetime.datetime.now()        #從今天開始查卡
#now = datetime.date(2019,5,13)      #輸入查卡日期,自定義查卡日期
print("查卡日期:",now)
print('\n')
time2 = datetime.timedelta(days=8)   #統(tǒng)計一個星期的數據
day_now = str(now).split(" ")[0]
day_end = now - time2
day_end = str(day_end).split(" ")[0]

print("開始讀取ID數據")
print("數據位置:")
print("C:/Users/Administrator/Desktop/user.txt")
print('\n')

#從txt導入數據
ID_total_input = open('C:/Users/Administrator/Desktop/user.txt')
ID_total = ID_total_input.read()
ID_total = ID_total.split("\n")  # 如果輸入多個ID,用“\n”分開

i = 1  #定義Excel表格的行數,從第二行開始寫入,第一行已經寫了表頭

for ID in ID_total:

    web = "https://www.shanbay.com/api/v1/checkin/user/"+str(ID)+"/"
    shanbay = urlopen(web)
    #shanbay = urlopen("https://www.shanbay.com/api/v1/checkin/user/16888030/")
    shanbay_data = shanbay.read().decode()
    
    #獲取昵稱
    find_username = re.findall("username\".*?,",shanbay_data)[0]
    username = str(find_username)[len("username\": \""):-2]
    
    # 獲取打卡數據
    find_data = re.findall("\"stats\".*?track_object_img" ,shanbay_data)
    find_start = "\"stats\": "
    find_end = "\"track_object_img\""

    num_today = "\"num_today\": "
    used_time = "\"used_time\": "

    count = 0
    time_bdc = 0
    bdc_total = 0    
    
    #獲取打卡天數
    checkin_time = []
    num_checkin_days = []
    find_checkin = re.findall("\"checkin_time\".*?\"share_urls\"",shanbay_data) 
    for checkin in find_checkin:
        shanbey_time = checkin.split(",")[0]
        shanbey_days = checkin.split(",")[3]
        checkin_time.append(str(shanbey_time)[len("\"checkin_time\": \""):len("\"checkin_time\": \"")+10])
        num_checkin_days.append(str(shanbey_days)[len("\"num_checkin_days\": "):])

    # 開始統(tǒng)計數據
    for data in find_data:       
    
        bdc = re.findall("\"bdc\":.*?}",data)
        if bdc == []:
            bdc = "{num_today\": 0, \"used_time\": 0.0}"    
    
        bdc_num = re.findall(r"\d+\.?\d*",str(bdc))[0]
        bdc_time = re.findall(r"\d+\.?\d*",str(bdc))[1]        
        
        if checkin_time[count] >= day_now:
            count += 1
        elif checkin_time[count] > day_end:            
            time_bdc = time_bdc+float(bdc_time)
            bdc_total = bdc_total+float(bdc_num)           
            #print("{}:打卡{}天,單詞{}個,學習時間{}分鐘".format(checkin_time[count],num_checkin_days[count],bdc_num,bdc_time))
            count += 1
        else:
            break
        
    average = bdc_total/7
    average = round(average,2)
    print("ID:{},昵稱:{},背單詞總計:{},平均:{},時長:{}分鐘".format(ID,username,bdc_total,average,time_bdc))
    
    # 把內容保存到Excel
    sheet.write(i, 0, ID)  # 第i行,第1列
    sheet.write(i, 1, username)  # 第i行,第2列
    sheet.write(i, 2, bdc_total)  # 第i行,第3列
    sheet.write(i, 3, average)  # 第i行,第4列
    sheet.write(i, 4, time_bdc)  # 第i行,第5列
    i += 1

   # print(ID,username,bdc_total,average,time_bdc)

workbook.save('C:/Users/Administrator/Desktop/單詞群打卡.xls')
print('\n') 
print('寫入excel成功')
print("文件位置:")
print("C:/Users/Administrator/Desktop/單詞群打卡.xls")
print('\n') 
input("查卡完畢,點擊回車退出")  

小組打卡輸出EXCEL情況如下:(昵稱和ID做了打碼處理)

?
數據保存至Excel

?

?著作權歸作者所有,轉載或內容合作請聯系作者
【社區(qū)內容提示】社區(qū)部分內容疑似由AI輔助生成,瀏覽時請結合常識與多方信息審慎甄別。
平臺聲明:文章內容(如有圖片或視頻亦包括在內)由作者上傳并發(fā)布,文章內容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務。

相關閱讀更多精彩內容

友情鏈接更多精彩內容