pandas 和numpy庫處理數(shù)據(jù)(2)

接上:對于數(shù)據(jù)的基本統(tǒng)計分析(統(tǒng)計元素不再解釋,基本是均值、方差等計算)

 df
   num  class   name    sex  english  sport  army  math  possity  space
0   10      1   mary  woman       80     80    90  75.0       60     65
1   28      1   land    man       80     50    69  70.0       58     70
2   15      2   asnx    man       80     69    80  75.0       90     94
3   18      4  david    man       90     80    86  85.0       95     62
4   19      2    gry  woman       90     50    64   NaN       64     85
5   20      2  kitty  woman       84     58    97  94.0       63     21
6   14      3   lury  woman       98     77    88   0.0       55     40
7   21      1   facy    man       55     68    94  52.0       36     48
>>> df['sport'].describe()
count     8.000000
mean     66.500000
std      12.535663
min      50.000000
25%      56.000000
50%      68.500000
75%      77.750000
max      80.000000
Name: sport, dtype: float64
>>> df.describe()
             num     class    english    ...           math    possity      space
count   8.000000  8.000000   8.000000    ...       7.000000   8.000000   8.000000
mean   18.125000  2.000000  82.125000    ...      64.428571  65.125000  60.625000
std     5.383507  1.069045  12.699128    ...      31.245571  19.067081  23.820384
min    10.000000  1.000000  55.000000    ...       0.000000  36.000000  21.000000
25%    14.750000  1.000000  80.000000    ...      61.000000  57.250000  46.000000
50%    18.500000  2.000000  82.000000    ...      75.000000  61.500000  63.500000
75%    20.250000  2.250000  90.000000    ...      80.000000  70.500000  73.750000
max    28.000000  4.000000  98.000000    ...      94.000000  95.000000  94.000000

[8 rows x 8 columns]
>>> df['english'].size
8
>>> df['english'].max()
98
>>> df['english'].min()
55
>>> df['english'].sum()
657
>>> df['english'].mean()
82.125
>>> df['english'].var()
161.26785714285714
>>> df['english'].std()
12.699128204048383
>>> np.mean(df['english'])
82.125
>>> np.average(df['english'])
82.125
>>> df.median()
num        18.5
class       2.0
english    82.0
sport      68.5
army       87.0
math       75.0
possity    61.5
space      63.5
dtype: float64
>>> df.mode()
   num  class   name    sex  english  sport  army  math  possity  space
0   10    1.0   asnx    man     80.0   50.0    64  75.0       36     21
1   14    2.0  david  woman      NaN   80.0    69   NaN       55     40
2   15    NaN   facy    NaN      NaN    NaN    80   NaN       58     48
3   18    NaN    gry    NaN      NaN    NaN    86   NaN       60     62
4   19    NaN  kitty    NaN      NaN    NaN    88   NaN       63     65
5   20    NaN   land    NaN      NaN    NaN    90   NaN       64     70
6   21    NaN   lury    NaN      NaN    NaN    94   NaN       90     85
7   28    NaN   mary    NaN      NaN    NaN    97   NaN       95     94

>>> df.mode()
   num  class   name    sex  english  sport  army  math  possity  space
0   10    1.0   asnx    man     80.0   50.0    64  75.0       36     21
1   14    2.0  david  woman      NaN   80.0    69   NaN       55     40
2   15    NaN   facy    NaN      NaN    NaN    80   NaN       58     48
3   18    NaN    gry    NaN      NaN    NaN    86   NaN       60     62
4   19    NaN  kitty    NaN      NaN    NaN    88   NaN       63     65
5   20    NaN   land    NaN      NaN    NaN    90   NaN       64     70
6   21    NaN   lury    NaN      NaN    NaN    94   NaN       90     85
7   28    NaN   mary    NaN      NaN    NaN    97   NaN       95     94
>>> df
   num  class   name    sex  english  sport  army  math  possity  space
0   10      1   mary  woman       80     80    90  75.0       60     65
1   28      1   land    man       80     50    69  70.0       58     70
2   15      2   asnx    man       80     69    80  75.0       90     94
3   18      4  david    man       90     80    86  85.0       95     62
4   19      2    gry  woman       90     50    64   NaN       64     85
5   20      2  kitty  woman       84     58    97  94.0       63     21
6   14      3   lury  woman       98     77    88   0.0       55     40
7   21      1   facy    man       55     68    94  52.0       36     48
>>> df.groupby('class')['english','sport','army'].mean()
         english  sport       army
class                             
1      71.666667   66.0  84.333333
2      84.666667   59.0  80.333333
3      98.000000   77.0  88.000000
4      90.000000   80.0  86.000000
>>> df.groupby(['class','sex'])['english'].agg({'total':np.sum,'number':np.size,'mean':np.mean,'var':np.var})

             total  number  mean    var
class sex                              
1     man      135       2  67.5  312.5
      woman     80       1  80.0    NaN
2     man       80       1  80.0    NaN
      woman    174       2  87.0   18.0
3     woman     98       1  98.0    NaN
4     man       90       1  90.0    NaN
>>> #建立透視表
>>> df.pivot_table(index=['class','name'])
             army  english  math  num  possity  space  sport
class name                                                  
1     facy     94       55  52.0   21       36     48     68
      land     69       80  70.0   28       58     70     50
      mary     90       80  75.0   10       60     65     80
2     asnx     80       80  75.0   15       90     94     69
      gry      64       90   NaN   19       64     85     50
      kitty    97       84  94.0   20       63     21     58
3     lury     88       98   0.0   14       55     40     77
4     david    86       90  85.0   18       95     62     80
>>> df
   num  class   name    sex  english  sport  army  math  possity  space
0   10      1   mary  woman       80     80    90  75.0       60     65
1   28      1   land    man       80     50    69  70.0       58     70
2   15      2   asnx    man       80     69    80  75.0       90     94
3   18      4  david    man       90     80    86  85.0       95     62
4   19      2    gry  woman       90     50    64   NaN       64     85
5   20      2  kitty  woman       84     58    97  94.0       63     21
6   14      3   lury  woman       98     77    88   0.0       55     40
7   21      1   facy    man       55     68    94  52.0       36     48
#相關系數(shù)
>>> df['english'].corr(df['sport'])
0.0785215353368861
>>> df['english'].corr(df['army'])
-0.28518424251841296
>>> df.loc[:,['english','sport','army','math','possity','space']].corr()
          english     sport      army      math   possity     space
english  1.000000  0.078522 -0.285184 -0.210888  0.486667  0.020484
sport    0.078522  1.000000  0.604026 -0.275197  0.239372 -0.140894
army    -0.285184  0.604026  1.000000 -0.010708 -0.191855 -0.744345
math    -0.210888 -0.275197 -0.010708  1.000000  0.449533  0.180691
possity  0.486667  0.239372 -0.191855  0.449533  1.000000  0.445185
space    0.020484 -0.140894 -0.744345  0.180691  0.445185  1.000000

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務。

友情鏈接更多精彩內(nèi)容