Numpy&Pandas視頻筆記

數(shù)據(jù)處理入門

1 簡介

C語言直接編寫,在tensorflow等應(yīng)用中更方便、快速。pandas是numpy的升級(jí)版

2 安裝

3 基本屬性

import numpy as np
array=np.array([[1,2,3]
                [2,5,8]],dtype=int)
print(array.ndim)
print(array.shape)
print(array.size)

4 創(chuàng)建數(shù)組array

import numpy as np
zero_array = np.zeros((3,4))#3行4列的零矩陣
one_array = np.ones((2,4),dtype=int)#每個(gè)元素都為1
np.empty((3,2))#每個(gè)元素幾乎為0
range_array = np.arange(12).reshape((2,6))
#按順序取0-12,并reshape為2行6列的矩陣
line_arr = np.linspace(0,10,5)#在指定的間隔內(nèi)返回均勻間隔的數(shù)字,即返回0-10中的5個(gè)等間隔數(shù)字

5 基礎(chǔ)運(yùn)算

import numpy as np  
a = np.array([[10,20],
            [1,0,]])
b = np.arange(4).reshape(2,2)
c = a*b#逐個(gè)相乘
c_dot = np.dot(a,b)#矩陣乘法
print(c)
print(c_dot)

a = np.random.random((2,4))
print(a)
print(np.sum(a))
print(np.max(a,axis=1))#按行求最大值
print(np.min(a,axis=0))#按列求最大值

6 基礎(chǔ)運(yùn)算2

a = np.arange(14,2,-1).reshape((3,4))
print(a)
print(np.clip(a,5,9))
print(np.mean(a,axis=0))#對(duì)于列進(jìn)行計(jì)算
print(np.mean(a,axis=1))#對(duì)于行進(jìn)行計(jì)算

7 numpy索引

a = np.arange(3,15)
print('a',a)
print(a[3])
b = a.reshape((3,4))
print(b)
print(b[2][1])#第2行,第1列(索引從0開始)
print(b[:,2])#第2列,所有元素
print(b[0,:])#第0行,所有元素
print(b[0:2,0])#第0列,第0-2行的元素
#for循環(huán)
a = np.arange(3,15).reshape((3,4))
print(a)

print('row:')
for row in a:
    print(row)

print('column:')
for column in a.T:#轉(zhuǎn)置矩陣
    print(column)
 
print('flat:')
print(a.flatten())
for item in a.flat:
    print(item)

8 numpay array合并

a = np.array([1,1,1])
b = np.array([2,2,2])

c = np.vstack((a,b))#vertical stack,上下合并
d = np.hstack((a,b))#horizontal stack,左右合并
print(c.shape,d.shape)
print(c)
print(d)

print(a[np.newaxis,:])#新增行維
print(a[:,np.newaxis])#新增列維

d = np.concatenate((a,b,b,a),axis=0)#多個(gè)array的橫向合并
print(d)

9 array分割

a = np.arange(12).reshape((3,4))
print(a)

print(np.split(a,2,axis=1))#縱向平均分割為2個(gè)array
print(np.split(a,3,axis=0))#橫向平均分割為3個(gè)array
print(np.array_split(a,3,axis=1))#縱向不等分割為3個(gè)array

10 numpy copy & deep copy

a = np.arange(4)
print(a)
b = a
c = a
d = a
a[0] = 11
print(a)
# a,b,c,d相互關(guān)聯(lián)
print(b is a)
d[1:3] = [22,33]
print(c is a)

#不相互關(guān)聯(lián)
b = a.copy()#deep copy
a[3] = 15
print(a is b)

11 pandas基本介紹

import pandas as pd
import numpy as np

s = pd.Series([1,3,6,np.nan,44,1])#可以顯示索引的列表
print(s)
dates = pd.date_range('20180304',periods=6)#從20180304開始的6天時(shí)間數(shù)據(jù)
print(dates)

df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d'])#dataframe相當(dāng)于一個(gè)matrix矩陣,也就是numpy里面的二維矩陣。行索引為dates,列索引為中括號(hào)內(nèi)所述內(nèi)容
print(df)
df1 = pd.DataFrame(np.arange(12).reshape((3,4)))
print(df1)
df2 = pd.DataFrame({'A':1,
                   'B':pd.Series(1,index=list(range(4)),dtype='float32'),
                   'C':pd.Timestamp('20180102'),
                   'D':np.array([3]*4,dtype='int32'),
                   'E':pd.Categorical(['test','train','test','train']),
                   'F':'foo'})
print(df2)
print(df2.dtypes)#輸出類型
print(df2.index)#輸出索引
print(df2.columns)#輸出列名
print(df2.values)
print(df2.describe())#輸出描述
print(df2.T)#矩陣轉(zhuǎn)置
print(df2.sort_index(axis=1,ascending=False))#按行排序,降序
print(df2.sort_values(by='E'))#按E列中的值排序

12 pandas選擇數(shù)據(jù)

dates = pd.date_range('20180308',periods=6)
df = pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['a','b','c','d'])
print(df[0:3],df['20180309':'20180311'])#選擇列
print(df['a'],df.a)#選擇行
print(df.loc['20180309'])#按橫向標(biāo)簽選擇
print(df.loc[:,['b','c']])#按列標(biāo)簽選擇
print(df.iloc[3:5,1:3])#按位置選擇
# print(df.ix[:3,['a','d']])#標(biāo)簽、位置混合選擇,已棄用
print(df[df.a>8])

13 pandas設(shè)置值

df.iloc[2,2]=1111
df.loc['20180309','a'] = 2222
df.b[df.a>8] = 0

14 如何處理丟失數(shù)據(jù)

dates = pd.date_range('20180308',periods=6)
df = pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['a','b','c','d'])
df.iloc[0,1] = np.nan
df.iloc[1,2] = np.nan
print(df.dropna(axis=0,how='any'))#丟掉nan數(shù)據(jù),how=['any','all'],當(dāng)how=all時(shí),該行全部為nan時(shí)才啟用
print(df.fillna(value=0))#將nan填為0
print(df.isnull)#是否缺失數(shù)據(jù)
print(np.any(df.isnull()==True))#至少有一個(gè)為nan

15 導(dǎo)入導(dǎo)出數(shù)據(jù)

data = pd.read_csv('filepath')#導(dǎo)入
data.to_pickle('filepath')#導(dǎo)出

16 合并concatenating

df1 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'])
df3 = pd.DataFrame(np.ones((3,4))*2,columns=['a','b','c','d'])

# print(df1)
# print(df2)
# print(df3)

# result = pd.concat([df1,df2,df3],axis=0,ignore_index=True)#axis=0為縱向合并
# print(result)

#join,['inner','outer']
df4 = pd.DataFrame(np.ones((3,4))*1,columns=['c','d','e','f'])
df5 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'])
res = pd.concat([df4,df5],axis=0,join='inner',ignore_index=True)#默認(rèn)為outer join
print(res)

#join axes
res2 = pd.concat([df4,df5],axis=1,join_axes=[df4.index])#橫向合并,以df4的索引為準(zhǔn)
print(res2)

#append
res3 = df1.append(df2,ignore_index=True)
print(res3)

s1= pd.Series([1,2,3,4],index=['a','b','c','d'])
res4 = df1.append(s1,ignore_index=True)
print(res4)

17 合并merge

#兩組dataframe依據(jù)key合并
left = pd.DataFrame({'key':['k0','k1','k2','k3'],
                    'A':['b0','b1','b2','b3'],
                    'B':['a0','a1','a2','a3']})
right = pd.DataFrame({'key':['k0','k1','k2','k3'],
                     'C':['c0','c1','c2','c3'],
                     'D':['d0','d1','d2','d3']})
res = pd.merge(left,right,on='key')
# print(res)

#依據(jù)兩組keys合并
left2 = pd.DataFrame({'key1':['k0','k0','k1','k2'],
                      'key2':['k0','k1','k0','k1'],
                    'A':['b0','b1','b2','b3'],
                    'B':['a0','a1','a2','a3']})
right2 = pd.DataFrame({'key1':['k0','k0','k1','k2'],
                       'key2':['k0','k0','k0','k0'],
                     'C':['c0','c1','c2','c3'],
                     'D':['d0','d1','d2','d3']})
res2 = pd.merge(left2,right2,on=['key1','key2'],how='inner')
# print(res2)

#根據(jù)index橫向合并
res3 = pd.merge(left2,right2,left_index=True,right_index=True,how='outer')
print(res3)

#當(dāng)兩個(gè)表中的某一列重名,但是值不同,合并的時(shí)候要保留,則使用suffixes參數(shù)
boys = pd.DataFrame({'k':['k0','k1','k2'],'age':[1,2,3]})
girls = pd.DataFrame({'k':['k0','k0','k3'],'age':[4,5,6]})

res4 = pd.merge(boys,girls,on='k',suffixes=['_boy','_girl'],how='inner')
print(res4)

18 plot圖表

import matplotlib.pyplot as plt
#線性數(shù)據(jù)Series
data = pd.Series(np.random.randn(1000),index=np.arange(1000))
data = data.cumsum()#累加
# data.plot()
# plt.show()#顯示
#矩陣數(shù)據(jù)DataFrame
data2 = pd.DataFrame(np.random.randn(1000,4),
                    index=np.arange(1000),
                    columns=list('ABCD'))
data2 = data2.cumsum()
print(data2.head())
# data2.plot()
ax = data2.plot.scatter(x='A',y='B',color='DarkBlue',label='Class1')
data2.plot.scatter(x='A',y='C',color='DarkGreen',label='Class2',ax=ax)
plt.show()#顯示

#plot method:'bar','hist','box','kde','area','scatter','hexbin','pie'

感謝:

視頻鏈接

莫煩PYTHON

Pandas速查手冊(cè)中文版 - CSDN博客

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容