#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
from datetime import datetime, time, timedelta
import h5py
import numpy as np
import pandas as pd
from dateutil import parser
from pymongo import MongoClient
import config
from util import date_range
PERIODS = set([
? ? '1min',
? ? # '3min',
? ? # '5min',
? ? # '15min',
? ? # '30min',
? ? # '1day',
? ? # '3day',
? ? # '1week',
? ? # '1hour',
? ? # '2hour',
? ? # '4hour',
? ? # '6hour',
? ? # '12hour',
])
MIN_CANDLE_FOLDER = os.path.join(config.data_dir, 'bar')
TICK_FOLDER = os.path.join(config.data_dir, 'tick')
def hist_symbol(date):
? ? """獲取歷史代碼表
? ? :param str date: 日期
? ? :returns: 當(dāng)日代碼列表,空則為None
? ? :rtype: list or None
? ? """
? ? with MongoClient(config.mongo_read_uri) as client:
? ? ? ? db = client.master
? ? ? ? sym_col = db.hist_symbols
? ? ? ? dt = parser.parse(date)
? ? ? ? cond = {"date": dt}
? ? ? ? data = sym_col.find_one(cond, projection={"symbols": 1, "_id": 0})
? ? ? ? if data and 'symbols' in data:
? ? ? ? ? ? return data['symbols']
? ? ? ? else:
? ? ? ? ? ? return None
def candle(symbol, period, begin, end):
? ? """獲取K線(xiàn)數(shù)據(jù)
? ? :param symbol: 代碼
? ? :param str period: 周期,支持:1min
? ? :param str begin: 起始時(shí)間(閉區(qū)間)
? ? :param str end: 結(jié)束時(shí)間(閉區(qū)間)
? ? :returns: index: datetime64; columns: open, high, low, close, volume
? ? :rtype: pandas DataFrame or None
? ? """
? ? begin_dt = parser.parse(begin)
? ? end_dt = parser.parse(end)
? ? begin_date = begin_dt.date()
? ? end_date = end_dt.date()
? ? if period not in PERIODS:
? ? ? ? raise KeyError(
? ? ? ? ? ? 'argument wrong: period should be in [%s], given value %s',
? ? ? ? ? ? ','.join(list(PERIODS)), period)
? ? exchange, sym = symbol.split('/')
? ? h5filepath = os.path.join(MIN_CANDLE_FOLDER, exchange, sym + '.h5')
? ? if not os.path.isfile(h5filepath):
? ? ? ? raise ValueError('file not existed: ' + h5filepath)
? ? timestamp_cache = []
? ? price_cache = []
? ? volume_cache = []
? ? with h5py.File(h5filepath, 'r') as min_fs:
? ? ? ? for dt in date_range(begin_date, end_date):
? ? ? ? ? ? date_str = str(dt)
? ? ? ? ? ? if date_str not in min_fs:
? ? ? ? ? ? ? ? continue
? ? ? ? ? ? timestamp_cache.append(min_fs[date_str]['timestamps'][...])
? ? ? ? ? ? price_cache.append(min_fs[date_str]['prices'][...])
? ? ? ? ? ? volume_cache.append(min_fs[date_str]['volumes'][...])
? ? timestamp_cache = np.reshape(np.concatenate(timestamp_cache), (-1, 1))
? ? price_cache = np.concatenate(price_cache)
? ? volume_cache = np.reshape(np.concatenate(volume_cache), (-1, 1))
? ? cache = np.concatenate((timestamp_cache, price_cache, volume_cache),
? ? ? ? ? ? ? ? ? ? ? ? ? axis=1)
? ? if len(cache) == 0:
? ? ? ? raise ValueError('empty data since {} until {}'.format(
? ? ? ? ? ? begin_dt, end_dt))
? ? df = pd.DataFrame(
? ? ? ? data=cache,
? ? ? ? columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
? ? df['datetime'] = df['timestamp'].apply(
? ? ? ? lambda x: datetime.utcfromtimestamp(x))
? ? df = df.drop('timestamp', axis=1)
? ? df = df.set_index('datetime')
? ? return df[begin_dt:end_dt]
def tick(symbol, begin, end, level=20):
? ? """獲取深度tick數(shù)據(jù)
? ? :param symbol: 代碼
? ? :param str begin: 起始時(shí)間(閉區(qū)間)
? ? :param str end: 結(jié)束時(shí)間(閉區(qū)間)
? ? :param str level: 檔位數(shù)
? ? :returns: index: datetime64; columns: bidpN~1, last, askp1~N, bidsN~1,
? ? volume, asks1~N, 說(shuō)明:bidp代表買(mǎi)價(jià),bids代表買(mǎi)量
? ? :rtype: pandas DataFrame or None
? ? """
? ? begin_dt = parser.parse(begin)
? ? end_dt = parser.parse(end)
? ? begin_date = begin_dt.date()
? ? end_date = end_dt.date()
? ? split_end_dt = datetime.combine(end_date, time(16, 0, 0))
? ? if end_dt > split_end_dt:
? ? ? ? end_date += timedelta(days=1)
? ? exchange, sym = symbol.split('/')
? ? timestamp_cache = []
? ? price_cache = []
? ? volume_cache = []
? ? for date in date_range(begin_date, end_date):
? ? ? ? date_str = str(date)
? ? ? ? h5filepath = os.path.join(TICK_FOLDER, exchange, sym, date_str + '.h5')
? ? ? ? if not os.path.isfile(h5filepath):
? ? ? ? ? ? raise ValueError('file not existed: ' + h5filepath)
? ? ? ? with h5py.File(h5filepath, 'r') as fs:
? ? ? ? ? ? timestamp_cache.append(fs['timestamps'][...])
? ? ? ? ? ? max_level = int((fs['prices'].shape[1] - 1) / 2)
? ? ? ? ? ? level_diff = max_level - level
? ? ? ? ? ? if level_diff < 0:
? ? ? ? ? ? ? ? raise ValueError(
? ? ? ? ? ? ? ? ? ? 'level is larger than shape in {} file'.format(h5filepath))
? ? ? ? ? ? elif level_diff > 0:
? ? ? ? ? ? ? ? level_slice = slice(level_diff, -level_diff)
? ? ? ? ? ? else:
? ? ? ? ? ? ? ? level_slice = slice(None, None, None)
? ? ? ? ? ? price_cache.append(fs['prices'][..., level_slice])
? ? ? ? ? ? volume_cache.append(fs['volumes'][..., level_slice])
? ? if len(timestamp_cache) == 0:
? ? ? ? raise ValueError('empty data since {} until {}'.format(
? ? ? ? ? ? begin_dt, end_dt))
? ? timestamp_cache = np.reshape(np.concatenate(timestamp_cache), (-1, 1))
? ? price_cache = np.concatenate(price_cache)
? ? volume_cache = np.concatenate(volume_cache)
? ? cache = np.concatenate((timestamp_cache, price_cache, volume_cache),
? ? ? ? ? ? ? ? ? ? ? ? ? axis=1)
? ? columns = ['timestamp']
? ? columns.extend(['bidp' + str(x) for x in range(level, 0, -1)])
? ? columns.append('last')
? ? columns.extend(['askp' + str(x) for x in range(1, level + 1)])
? ? columns.extend(['bids' + str(x) for x in range(level, 0, -1)])
? ? columns.append('volume')
? ? columns.extend(['asks' + str(x) for x in range(1, level + 1)])
? ? df = pd.DataFrame(data=cache, columns=columns)
? ? df['datetime'] = df['timestamp'].apply(
? ? ? ? lambda x: datetime.utcfromtimestamp(x))
? ? df = df.drop('timestamp', axis=1)
? ? df = df.set_index('datetime').sort_index()
? ? return df[begin_dt:end_dt]
#if __name__ == '__main__':
#? ? # df = candle(
#? ? #? ? 'okex/btc.usdt', '1min', begin='2018-9-1 1:05:00', end='2018-9-3')
#? ? # print(df.head())
#? ? df = tick(
#? ? ? ? 'okex/eos.usdt',
#? ? ? ? begin='2018-9-9 00:00:00',
#? ? ? ? end='2018-9-9 00:02:00',
#? ? ? ? level=1)
#? ? print(df.head())