Pandas使用技巧

添加列并逐行設(shè)置值

import tushare as ts
import time
import pandas as pd

def statisticsfordayofweek(code):
    marketdata = ts.get_k_data(code)
    for index, row in marketdata.iterrows():
        marketdata.loc[index, 'dayofweek'] = time.strftime('%A', time.strptime(row.date, "%Y-%m-%d"))
    downdata = marketdata[marketdata.close < marketdata.open]
    dayweekgroup = downdata['dayofweek'].groupby(
        downdata['dayofweek'].map(lambda  x: x[0:4]))\
        .count()
    dayweekgroup.plot(kind="bar")
    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.rcParams['axes.unicode_minus'] = False
    plt.show()

從DataFrame獲取特定列數(shù)據(jù)

data = pd.read_csv('./data/optdigits.tra', header=None)
#注意iloc的用法,這里表示x為所有行,索引為0~63,共64列的數(shù)據(jù)。
#python3中,使用data[range(64)].values,會(huì)報(bào)slice不正確的異常
x, y = data.iloc[:,range(64)].values, data.iloc[:, 64].values
images = x.reshape(-1, 8, 8)
y = y.ravel().astype(np.int)

完全構(gòu)建新的DataFrame,并添加數(shù)據(jù)

dfresult = pd.DataFrame(columns=('domicile',
                                         'universe',
                                         'rawname',
                                         'suggestid',
                                         'legalname',
                                         'Similarity%'))
dfresult.loc[0] = {'domicile': 'UK',
                           'universe':'ETF',
                           'rawname': 'IL Bright Start College Savings (Advisor) Advisor Age Based 15-17 Yrs Port',
                           'suggestid':'F123456ABC',
                            'legalname':'IL Bright Start College Savings (Advisor) Advisor Age Based',
                            'Similarity%':'81.75'}

導(dǎo)出csv時(shí),解決中文亂碼問題

dfresult.to_csv('./output/result_%s.csv' % searchdate, encoding='utf_8_sig')

解決merge過(guò)程中,數(shù)據(jù)類型不匹配問題

dfresult = pd.DataFrame(columns=('documentid',
                                     'effectivedatehat'))
    indexfordf = 0
    for docid in docdate:
        dfresult.loc[indexfordf] = {'documentid': docid,
                                    'effectivedatehat': docdate[docid]
                                    }
        indexfordf += 1

    dfresult['documentid'] = dfresult['documentid'].apply(int)
    dfsample = pd.read_csv('./output/sample/rawsupplementdocwitheffectivedate.csv', encoding='utf-8')
    dfsample['documentid'] = dfsample['documentid'].apply(int)
    dfmerge = pd.merge(dfsample, dfresult, on = ['documentid'], how='left')
最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容