https://github.com/micovey/Quantify
1. 代碼目的
設(shè)定一定的時(shí)間段,滾動(dòng)下載數(shù)據(jù)。
例如,當(dāng)前時(shí)間為2019-11-19,當(dāng)日未收盤,若設(shè)置數(shù)據(jù)期間為180天,則保存2019-5-23至2019-11-18的數(shù)據(jù)
明日為2019-11-20,當(dāng)日未收盤,則自動(dòng)刪除2019-5-23數(shù)據(jù),加入2019-11-19的數(shù)據(jù)。
便于之后的量化分析
滾動(dòng)的目的:避免數(shù)據(jù)文件過大
2. 前期準(zhǔn)備
首先安裝python (或者anaconda),但要注意最好不要同時(shí)安裝python和anacoda,因?yàn)閍nacoda里面包含純python,容易起沖突。
anacoda網(wǎng)址:https://www.anaconda.com/
若選擇純python, 則需要
pip install datetime
pip install pandas
pip install numpy
pip install baostock
若選擇anacoda
需要設(shè)置環(huán)境變量

111111.png
然后只需要
pip install baostock
安裝pycharm或者anacoda自帶的spyder
3. 為什么選擇baostock
Tushare、Baostock、joinquant等均可獲得股票日數(shù)據(jù)
Wind、CSMAR也可以獲得(花錢)
但是Tusharepro需要積分、joinquant只有一年試用期
因此本文選擇Baostock,無需注冊(cè)。
http://baostock.com/
4. 代碼
import baostock as bs
import pandas as pd
import numpy as np
import datetime
#### 登陸系統(tǒng) ####
lg = bs.login()
########定義變量#######
now_time=datetime.datetime.now().strftime('%Y-%m-%d')
now_time=datetime.datetime.strptime(now_time,'%Y-%m-%d')
year=int(now_time.year)
print('請(qǐng)輸入地址')
global file_place
file_place='D:\\'
fl = 'Quantify\\idcode.csv'
filee=file_place+fl
fff=file_place+'\\Quantify\\daily\\daily'+str(year)+'.csv'
# D:\\Quantify\\idcode.csv
idcode = pd.read_csv(filee,header=None)
if len(idcode) > 10:
print('讀取成功')
else:
print('讀取失敗')
idcode = np.array(idcode)
global datalength
datalength=180
#######定義函數(shù)########
##滾動(dòng)刪除數(shù)據(jù)
def drop_date_row(drop_data):
drop_data['date'] = [datetime.datetime.strptime(x, '%Y/%m/%d') for x in drop_data['date']]
now_time=datetime.datetime.now().strftime('%Y-%m-%d')
now_time=datetime.datetime.strptime(now_time,'%Y-%m-%d')
min_date = drop_data['date'].min()
drop_date = now_time + datetime.timedelta(days=-datalength)
if min_date <drop_date:
drop_data = drop_data.drop(drop_data[drop_data.date < drop_date].index)
return(drop_data)
else:
drop_data=pd.DataFrame()
return(drop_data )
##滾動(dòng)獲得數(shù)據(jù)
def get_daily_data(id,start_date_find,end_date_find):
rs = bs.query_history_k_data_plus(id, "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST,peTTM,pbMRQ,psTTM,pcfNcfTTM",
start_date=str(start_date_find), end_date=str(end_date_find),
frequency="d", adjustflag="3")
data_list = []
while (rs.error_code == '0') & rs.next():
data_list.append(rs.get_row_data())
result = pd.DataFrame(data_list, columns=rs.fields,index=range(0,len(data_list)))
result["turn"] = [0 if x == "" else float(x) for x in result["turn"]]
result["volume"] = [0 if x == "" else float(x) for x in result["volume"]]
result["close"] = [0 if x == "" else float(x) for x in result["close"]]
marketvalue=result["volume"] / result["turn"]*result["close"]*100
# marketvalue=result.apply(lambda x: x["volume"] / x["turn"]*x["close"]*100, axis=1)
result["marketvalue"]=marketvalue
# print( result)
return(result)
##開始,結(jié)束下載日期
def check_download(check_date):
if len(check_date)==0:
now_time = datetime.datetime.now().strftime('%Y-%m-%d')
now_time = datetime.datetime.strptime(now_time, '%Y-%m-%d')
start_date = now_time + datetime.timedelta(days=-datalength)
start_date = str(datetime.datetime.strftime(start_date, '%Y-%m-%d'))
else:
start_date = check_date['date'].max()
start_date += datetime.timedelta(days=+1)
start_date = str(datetime.datetime.strftime(start_date, '%Y-%m-%d'))
end_date=str(datetime.datetime.now().strftime('%Y-%m-%d'))
return start_date,end_date
##刪除重復(fù)項(xiàng)
def drop_duplicates(drop_data):
n1=len(drop_data)
drop_data['date'] = [datetime.datetime.strptime(x, '%Y/%m/%d') for x in drop_data['date']]
drop_data.drop_duplicates(subset=['code','date'],keep='first',inplace=True)
n2=len(drop_data)
if n1==n2:
drop_data = pd.DataFrame()
return (drop_data)
else:
return (drop_data)
#####開始計(jì)算###########
######滾動(dòng)刪除數(shù)據(jù)
drop_data = pd.read_csv(fff)
drop_data = drop_date_row(drop_data)
if len(drop_data)<2:
print("不需要?jiǎng)h除數(shù)據(jù)")
else:
print("已刪除數(shù)據(jù)")
drop_data['date'] = [datetime.datetime.strftime(x, '%Y/%m/%d') for x in drop_data['date']]
drop_data.to_csv(fff, header=True, index=False)
##查看是否需要更新數(shù)據(jù)
check_date = pd.read_csv(fff)
check_date['date']=[datetime.datetime.strptime(x,'%Y/%m/%d') for x in check_date['date']]
start_date,end_date=check_download(check_date)
##歷史行情數(shù)據(jù)下載
if datetime.datetime.strptime(start_date,'%Y-%m-%d')<=datetime.datetime.strptime(end_date,'%Y-%m-%d'):
if datetime.datetime.strptime(end_date,'%Y-%m-%d')==datetime.datetime.strptime(start_date,'%Y-%m-%d') and int(datetime.datetime.now().hour)<18:
print("不需要更新數(shù)據(jù),當(dāng)日沒有結(jié)束")
else:
for id in idcode:
id=str(id)
id=id[2:11]#提取9位代碼
print('=====' + id + '=====')
result1=get_daily_data(id,start_date,end_date)
if len(result1) > 0:
result1['date'] = [datetime.datetime.strptime(x,'%Y-%m-%d') for x in result1['date']]
result1['quarter']=[int((x.month-1)/3)+1 for x in result1['date']]
result1['year']=[int(x.year) for x in result1['date']]
result1['date'] = [datetime.datetime.strftime(x, '%Y/%m/%d') for x in result1['date']]
result1.to_csv(fff, mode='a', header=False)
else:
continue
else:
print("不需要更新數(shù)據(jù),沒有開始新交易")
##刪除重復(fù)項(xiàng)
drop_data = pd.read_csv(fff)
drop_data = drop_duplicates(drop_data)
if len(drop_data)<2:
print("沒有重復(fù)項(xiàng)")
else:
print("已刪除重復(fù)項(xiàng)")
drop_data['date'] = [datetime.datetime.strftime(x, '%Y/%m/%d') for x in drop_data['date']]
drop_data.to_csv(fff, header=True, index=False)
##
bs.logout()