pandas 是基于NumPy 的一種工具,該工具是為了解決資料分析任務而建立的。Pandas 納入了大量庫和一些标準的資料模型,提供了高效地操作大型資料集所需的工具。pandas提供了大量能使我們快速便捷地處理資料的函數和方法。—百度百科
基本操作
>>>import pandas as pd
>>>test = pd.Series(['pig', 'girl', 35, -123123123])
>>>test
0 pig
1 girl
2 35
3 -123123123
dtype: object
>>>test = pd.Series(['pig', 'girl', 35, -123123123],
index=['name','name','age','nums'])
>>>test
name pig
name girl
age 35
nums -123123123
dtype: object
>>>test = pd.Series(['pig', 'girl', 35, -123123123],
index=['name','name','age','nums'])
>>>test['name']
name pig
name girl
dtype: object
>>>test[['name','age']]
name pig
name girl
age 35
dtype: object
>>>data = {'year': [2016, 2015, 2017, 2014],
'teams': ['Bears', 'Bears', 'Bears', 'Packers'],
'wins': [11, 8, 10, 15],
'losses': [5, 8, 6, 1]}
>>>football = pd.DataFrame(data)
>>>football
losses teams wins year
0 5 Bears 11 2016
1 8 Bears 8 2015
2 6 Bears 10 2017
3 1 Packers 15 2014
>>>football.dtypes
losses int64
teams object
wins int64
year int64
dtype: object
>>>football.describe()
losses wins year
count 4.00000 4.00000 4.000000
mean 5.00000 11.00000 2015.500000
std 2.94392 2.94392 1.290994
min 1.00000 8.00000 2014.000000
25% 4.00000 9.50000 2014.750000
50% 5.50000 10.50000 2015.500000
75% 6.50000 12.00000 2016.250000
max 8.00000 15.00000 2017.000000
>>>football.head()
losses teams wins year
0 5 Bears 11 2016
1 8 Bears 8 2015
2 6 Bears 10 2017
3 1 Packers 15 2014
>>>football.tail()
losses teams wins year
0 5 Bears 11 2016
1 8 Bears 8 2015
2 6 Bears 10 2017
3 1 Packers 15 2014
>>>import numpy
>>> avg_medal_count = olympic_medal_counts_df[['gold', 'silver', 'bronze']].apply(numpy.mean)
avg_medal_count
gold 3.807692
silver 3.730769
bronze 3.807692
dtype: float64
>>> from pandas import DataFrame, Series
>>>countries = ['Russian Fed.', 'Norway', 'Canada', 'United States',
'Netherlands', 'Germany', 'Switzerland', 'Belarus',
'Austria', 'France', 'Poland', 'China', 'Korea',
'Sweden', 'Czech Republic', 'Slovenia', 'Japan',
'Finland', 'Great Britain', 'Ukraine', 'Slovakia',
'Italy', 'Latvia', 'Australia', 'Croatia', 'Kazakhstan']
gold = [13, 11, 10, 9, 8, 8, 6, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
silver = [11, 5, 10, 7, 7, 6, 3, 0, 8, 4, 1, 4, 3, 7, 4, 2, 4, 3, 1, 0, 0, 2, 2, 2, 1, 0]
bronze = [9, 10, 5, 12, 9, 5, 2, 1, 5, 7, 1, 2, 2, 6, 2, 4, 3, 1, 2, 1, 0, 6, 2, 1, 0, 1]
>>>olympic_medal_counts = {'country_name': Series(countries), 'gold': Series(gold),
'silver': Series(silver), 'bronze': Series(bronze)}
>>>olympic_medal_counts_df = DataFrame(olympic_medal_counts)
>>>olympic_medal_counts_df
bronze country_name gold silver
0 9 Russian Fed. 13 11
1 10 Norway 11 5
2 5 Canada 10 10
3 12 United States 9 7
4 9 Netherlands 8 7
5 5 Germany 8 6
6 2 Switzerland 6 3
7 1 Belarus 5 0
8 5 Austria 4 8
9 7 France 4 4
10 1 Poland 4 1
11 2 China 3 4
12 2 Korea 3 3
13 6 Sweden 2 7
14 2 Czech Republic 2 4
15 4 Slovenia 2 2
16 3 Japan 1 4
17 1 Finland 1 3
18 2 Great Britain 1 1
19 1 Ukraine 1 0
20 0 Slovakia 1 0
21 6 Italy 0 2
22 2 Latvia 0 2
23 1 Australia 0 2
24 0 Croatia 0 1
25 1 Kazakhstan 0 0
import pandas as pd
>>>data = {'year': [2010, 2011, 2012, 2011, 2012, 2010, 2011, 2012],
'team': ['Bears', 'Bears', 'Bears', 'Packers', 'Packers', 'Lions',
'Lions', 'Lions'],
'wins': [11, 8, 10, 15, 11, 6, 10, 4],
'losses': [5, 8, 6, 1, 5, 10, 6, 12]}
>>>football = pd.DataFrame(data)
>>>football['year']
0 2010
1 2011
2 2012
3 2011
4 2012
5 2010
6 2011
7 2012
Name: year, dtype: int64
>>>football.year
0 2010
1 2011
2 2012
3 2011
4 2012
5 2010
6 2011
7 2012
Name: year, dtype: int64
>>>football[['year', 'wins', 'losses']]
year wins losses
0 2010 11 5
1 2011 8 8
2 2012 10 6
3 2011 15 1
4 2012 11 5
5 2010 6 10
6 2011 10 6
7 2012 4 12
>>>data = {'year': [2010, 2011, 2012, 2011, 2012, 2010, 2011, 2012],
'team': ['Bears', 'Bears', 'Bears', 'Packers', 'Packers', 'Lions',
'Lions', 'Lions'],
'wins': [11, 8, 10, 15, 11, 6, 10, 4],
'losses': [5, 8, 6, 1, 5, 10, 6, 12]}
>>>football = pd.DataFrame(data)
>>>football.iloc[[0]]
losses team wins year
0 5 Bears 11 2010
>>>football.loc[[0]]
losses team wins year
0 5 Bears 11 2010
>>>football[3:5]
losses team wins year
3 1 Packers 15 2011
4 5 Packers 11 2012
>>>football[football.wins > 10]
losses team wins year
0 5 Bears 11 2010
3 1 Packers 15 2011
4 5 Packers 11 2012
>>>football[(football.wins > 10) & (football.team == "Packers")]
losses team wins year
3 1 Packers 15 2011
4 5 Packers 11 2012