天天看點

DataFrame1、dataframe的建立 2、讀取dataframe資料單元

1、dataframe的建立

  • df_1 = pd.DataFrame({'A': [0, 1, 2], 'B': [3, 4, 5]}) 
  • df_2 = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns=['A' , 'B' , 'C'], index=[1, 2]),colunmns是列,index是行
import numpy as np
price=np.array([
[3,4,5,6,7],
[5,6,5,4,3],
[4,4,5,4,3],
[5,5,6,7,4],
[5,6,7,5,4],
])
print (price)#列印二維數組
print (price[0,2])#列印第一行第三個數
print (price[1,:])#列印第二行所有數
print (price[1,1:3])#列印第二行2到第3個數
print (price[:,1])#列印第二列所有數
print (price[:,price[0,:].argmax()].mean())#第一行最大的列的均值
for i in range(5):
    print (price[i,:].mean())#計算所有行的均值
print (price.mean(axis=0))#按列來計算均值
print (price.mean(axis=1))#按行來計算均值
for i in range(5):
    print (price[i,:].std())#計算所有行的标準差
           
[[3 4 5 6 7]
 [5 6 5 4 3]
 [4 4 5 4 3]
 [5 5 6 7 4]
 [5 6 7 5 4]]
5
[5 6 5 4 3]
[6 5]
[4 6 4 5 6]
4.2
5.0
4.6
4.0
5.4
5.4
[4.4 5.  5.6 5.2 4.2]
[5.  4.6 4.  5.4 5.4]
1.4142135623730951
1.0198039027185568
0.6324555320336759
1.019803902718557
1.019803902718557
           
import numpy as np
price=np.array([
['date','s1','s2','s3','s4','s5'],
['a',3,4,5,6,7],
['b',5,6,5,4,3],
['c',4,4,5,4,3],
['d',5,5,6,7,4],
['e',5,6,7,5,4],
])
#列印類型,因為數組的類型必須一緻,是以都被轉換為字元串
price.dtype #最大長度為4的字元串類型,但轉換成字元串後就不能用mean()之類的計算函數了
#是以需要引用dataframe
           
dtype('<U4')
           
import pandas as pd
price=pd.DataFrame({
's':['s1','s2','s3','s4','s5'],
'a':[3,4,5,6,7],
'b':[5,6,5,4,3],
'c':[4,4,5,4,3],
'd':[5,5,6,7,4],
'e':[5,6,7,5,4],}
)
print (price)#引入了dataframe就能做數學計算了
print (price.std())#列印标準差,pandas标準差和numpy不一樣,存在貝塞爾矯正
print (price.std(ddof=0))#關閉貝塞爾矯正後列印标準差,值變得和numpy一樣
           
s  a  b  c  d  e
0  s1  3  5  4  5  5
1  s2  4  6  4  5  6
2  s3  5  5  5  6  7
3  s4  6  4  4  7  5
4  s5  7  3  3  4  4
a    1.581139
b    1.140175
c    0.707107
d    1.140175
e    1.140175
dtype: float64
a    1.414214
b    1.019804
c    0.632456
d    1.019804
e    1.019804
dtype: float64
           

 2、讀取dataframe資料單元

import pandas as pd
price=pd.DataFrame({
'a':[3,4,5,6,7],
'b':[5,6,5,4,3],
'c':[4,4,5,4,3],
'd':[5,5,6,7,4],
'e':[5,6,7,5,4]},
index=['s1','s2','s3','s4','s5'])
print (price)
print (price.loc['s1'])
type(price.loc['s1'])
price['d']
print (price.values)#傳回numpy ndarray
print (price.values.mean())#傳回所有值的均值
           
a  b  c  d  e
s1  3  5  4  5  5
s2  4  6  4  5  6
s3  5  5  5  6  7
s4  6  4  4  7  5
s5  7  3  3  4  4
a    3
b    5
c    4
d    5
e    5
Name: s1, dtype: int64
[[3 5 4 5 5]
 [4 6 4 5 6]
 [5 5 5 6 7]
 [6 4 4 7 5]
 [7 3 3 4 4]]
4.88