1 刪除列
1.1 用對(duì)象的drop方法
# 注:必須指定axis參數(shù)=1,可以用labels或者columns或者都不用。
df = pd.DataFrame(np.random.randn(6,4), columns=list("ABCD"))
# method1 和method2 和method3和method4 和method5 和method6的寫法效果一致。
df.drop(["A"], axis = 1) #method1
df.drop(labels=["A"], axis = 1) #method2
df.drop(labels="A", axis = 1) #method3
df.drop(columns="A") #method4
df.drop(columns=["A"]) #method5
df.drop(columns=["A"], axis = 1) #method6
# 注:如果用labels,必須指定axis=1(刪除列),如果用columns,則不必指定axis=1,因?yàn)閏olumns時(shí),axis為1
# 如果刪除多列,就只能用method1和method2
df.drop(["A", "B"], axis = 1) #method1
df.drop(labels=["A", "B"], axis = 1) #method2
# 如果指定的列不存在,則報(bào)錯(cuò) KeyError: "['具體指定列'] not found in axis"
# 如果就地修改,可以指定inplace=True

創(chuàng)建數(shù)據(jù)

image.png

刪除多列

inplace=True,默認(rèn)為False
1.2 用python內(nèi)建的del
# 會(huì)就地修改原數(shù)據(jù)且一次只能刪除一列
del df["A"]

image.png
2 增加列
2.1 通過(guò)位置([] or loc)<原地修改>
df = pd.DataFrame(np.random.randn(6,4), columns=list("ABCD"))
df["E"] = 1.0
df.loc[:,"F"] = np.nan
df

image.png
2.2 通過(guò)對(duì)象的insert方法 <原地修改>
第一參數(shù)的值的范圍為:[0,df.shape[1]],包括列的最大索引+1
# 在第一列添加名為“E”的一列,且其值可以為:全是0或None或np.nan的值,或是list-like,或是某些列的運(yùn)算結(jié)果
df = pd.DataFrame(np.random.randn(6,4), columns=list("ABCD"))
df.insert(0,"E",0) #值全為0
df = pd.DataFrame(np.random.randn(6,4), columns=list("ABCD"))
df.insert(0,"E",None) #值全為None
df = pd.DataFrame(np.random.randn(6,4), columns=list("ABCD"))
df.insert(0,"E",np.nan) #值全為NaN
df = pd.DataFrame(np.random.randn(6,4), columns=list("ABCD"))
df.insert(0,"E",df.A + df.B) #值為A和B列的和
df = pd.DataFrame(np.random.randn(6,4), columns=list("ABCD"))
df.insert(0,"E",df["A"] + df["B"]) #值為A列和B列的和
#第一參數(shù)的值的范圍為:[0,df.shape[1]],包括列的最大索引+1
df = pd.DataFrame(np.random.randn(6,4), columns=list("ABCD"))
df.insert(4,"E",df["A"] + df["B"])
# A B C D E
#0 -2.162230 1.405092 -0.467515 -0.795393 -0.757138
#1 0.400447 0.397316 0.094859 -0.320334 0.797763
#2 1.113476 0.841143 -1.825015 1.086205 1.954619
#3 -1.152093 -0.948018 -0.232126 -0.814098 -2.100112
#4 0.452588 -1.228917 -0.795181 -0.723586 -0.776329
#5 0.153220 1.956513 -0.387545 -0.525249 2.109733
2.3 pd.concat <不會(huì)原地修改>
#推薦用axis=1
pd.concat([df, pd.DataFrame(columns = list("DFE"))], sort =False) # method1
pd.concat([df, pd.DataFrame(columns = list("DFE"))], axis = 0, sort =False) # mehod2
# method1 and method2 結(jié)果一致,如果sort=True,則對(duì)列名進(jìn)行排序,變成ABCDEF
# 如果添加的列,已經(jīng)存在,則不再重復(fù)添加(axis=0)。
A B C D F E
0 -0.162737 0.102415 0.516459 0.505589 NaN NaN
1 1.064120 0.744812 1.760359 -0.604785 NaN NaN
2 1.786787 -1.123441 -2.451674 1.125258 NaN NaN
3 -1.637454 -0.193606 2.038299 -0.116104 NaN NaN
4 0.090663 0.793978 1.206064 -1.026067 NaN NaN
5 0.908653 -1.931487 -1.068116 2.164030 NaN NaN
pd.concat([df, pd.DataFrame(columns = list("DFE"))], axis = 1, sort =False)
#如果添加的列,已經(jīng)存在,則會(huì)再重復(fù)添加(axis=1)
A B C D D F E
0 -0.162737 0.102415 0.516459 0.505589 NaN NaN NaN
1 1.064120 0.744812 1.760359 -0.604785 NaN NaN NaN
2 1.786787 -1.123441 -2.451674 1.125258 NaN NaN NaN
3 -1.637454 -0.193606 2.038299 -0.116104 NaN NaN NaN
4 0.090663 0.793978 1.206064 -1.026067 NaN NaN NaN
5 0.908653 -1.931487 -1.068116 2.164030 NaN NaN NaN
2.4 對(duì)象的reindex方法<不會(huì)原地修改,如原地修改,需設(shè)置inplace=True>
df = pd.DataFrame(np.random.randn(6,4), columns=list("ABCD"))
df.reindex(columns = list("ABCDEFG")) #method1
df.reindex(labels = list("ABCDEFG"), axis = 1) #method2
#method1 and method2結(jié)果一致
#修改默認(rèn)添加值:fill_value參數(shù)
df.reindex(labels = list("ABCDEFG"), axis = 1, fill_value=21)
A B C D E F G
0 0.946368 1.168997 2.511798 0.661356 21 21 21
1 -0.666867 -0.110801 -0.455017 -0.417373 21 21 21
2 0.566638 -1.092429 1.136306 0.092472 21 21 21
3 0.605805 -0.697731 -0.001785 -0.916009 21 21 21
4 -0.715701 0.752973 0.006688 -0.948602 21 21 21
5 -0.818942 2.388051 0.679956 1.035658 21 21 21
# 修改列的順序
df.reindex(labels = list("AEFGBCD"), axis = 1, fill_value=None)
A E F G B C D
0 0.946368 NaN NaN NaN 1.168997 2.511798 0.661356
1 -0.666867 NaN NaN NaN -0.110801 -0.455017 -0.417373
2 0.566638 NaN NaN NaN -1.092429 1.136306 0.092472
3 0.605805 NaN NaN NaN -0.697731 -0.001785 -0.916009
4 -0.715701 NaN NaN NaN 0.752973 0.006688 -0.948602
5 -0.818942 NaN NaN NaN 2.388051 0.679956 1.035658
3增加行
3.1 通過(guò)位置增加(loc and at)<原地修改>
# 添加的值的形狀(長(zhǎng)度)和df對(duì)象的列的長(zhǎng)度一致。
df = pd.DataFrame(np.random.randn(6,4), columns=list("ABCD"))
df.loc["5"] = [1,2,3,4] # 索引"5"在df對(duì)象中不存在
df.loc[5] = [1,2,3,3] # 索引5在對(duì)象df中存在