pandas.
read_csv
(file_path)Читает данные из файла и возвращает pandas.DataFrame()
.
df = pandas.read_csv('data.csv', index_col=0, parse_dates=['date_'])
edu = pandas.read_csv(
'some.csv',
na_values=':',
usecols=['TIME', 'GEO', 'Value'],
)
edu
"""
TIME GEO Value
0 2000 sg11 sv21
1 2000 sg12 sv22
2 2000 sg13 sv23
...
500 2000 sg1500 sv2500
"""
edu.head()
"""
TIME GEO Value
0 2000 sg11 sv21
1 2000 sg12 sv22
2 2000 sg13 sv23
"""
edu.tail()
"""
498 2000 sg1498 sv2498
499 2000 sg1499 sv2499
500 2000 sg1500 sv2500
"""
edu.describe()
"""
TIME Value
count 384.000000 361.000000
mean 2005.500000 5.203989
std 3.456556 1.021694
min 2000.000000 2.880000
25% 2002.750000 4.620000
50% 2005.500000 5.060000
75% 2008.250000 5.660000
max 2011.000000 8.810000
"""
edu['Value']
"""
0 NaN
1 NaN
2 5.00
3 5.03
4 4.95
...
"""
edu[10:14]
"""
TIME GEO Value
10 2000 sg110 sv210
11 2000 sg111 sv211
12 2000 sg112 sv212
13 2000 sg113 sv213
14 2000 sg114 sv214
"""
edu.ix[90:94 , ['TIME ','GEO']]
"""
TIME GEO
90 2000 sg190
91 2000 sg191
92 2000 sg192
93 2000 sg193
94 2000 sg194
"""
edu[edu['Value '] > 6.5].tail()
"""
TIME GEO Value
218 2002 Cyprus 6.60
281 2005 Malta 6.58
94 2010 Belgium 6.58
93 2009 Belgium 6.57
95 2011 Belgium 6.55
"""
edu[edu["Value"].isnull()].head()
"""
TIME GEO Value
0 2000 European Union (28 countries) NaN
1 2001 European Union (28 countries) NaN
36 2000 Euro area (18 countries) NaN
37 2001 Euro area (18 countries) NaN
48 2000 Euro area (17 countries) NaN
"""
edu.max(axis=0)
"""
TIME 2011
GEO Spain
Value 8.81
"""
edu['Value'].max()
# 8.81
"""
count() Number of non-null observations
sum() Sum of values
mean() Mean of values
median() Arithmetic median of values
min() Minimum
max() Maximum
prod() Product of values
std() Unbiased standard deviation
var() Unbiased variance
"""
s = edu['Value']/100
s.head()
"""
0 NaN
1 NaN
2 0.0500
3 0.0503
4 0.0495
"""
s = edu['Value'].apply(numpy.sqrt)
s.head()
"""
0 NaN
1 NaN
2 2.236068
3 2.242766
4 2.224860
"""
edu['ValueNorm'] = edu['Value']/edu['Value'].max()
edu.tail()
"""
TIME GEO Value ValueNorm
379 2007 Finland 5.90 0.669694
380 2008 Finland 6.10 0.692395
381 2009 Finland 6.81 0.772985
382 2010 Finland 6.85 0.777526
383 2011 Finland 6.76 0.767310
"""
pandas.
DataFrame
data = {
'year': [
2010, 2011, 2012,
2010, 2011, 2012,
2010, 2011, 2012
],
'team': [
'FCBarcelona', 'FCBarcelona', 'FCBarcelona',
'RMadrid', 'RMadrid', 'RMadrid',
'ValenciaCF', 'ValenciaCF', 'ValenciaCF'
],
'wins': [
30, 28, 32,
29, 32, 26,
21, 17, 19
],
'draws': [
6, 7, 4,
5, 4, 7,
8, 10, 8
],
'losses': [
2, 3, 2,
4, 2, 5,
9, 11, 11
]
}
football = pd.DataFrame(
data ,
columns = [
'year', 'team', 'wins', 'draws', 'losses'
]
)
# изменяем значения в указанном столбце
football.losses.map(lambda x: x+1)
football.losses.map({
2: A,
3: B,
})
copy
()Возвращает копию объекта
df_copy = df.copy()
drop
()df.drop(columns='val_updated')
head
()Возвращает строку, голову/шапку данных таблицы
edu.head()
"""
TIME GEO Value
0 2000 sg11 sv21
1 2000 sg12 sv22
2 2000 sg13 sv23
"""
edu.head(5)
"""
TIME GEO Value
0 2000 sg11 sv21
...
4 2000 sg13 sv23
"""
set_inex
()df.set_index('date_')
df.set_index(['Date', 'Store'])
df.set_index('UPC EAN', append=True, inplace=True)
sample
()Отображает укзанное количество строк
df.sample(10)
"""
TIME GEO Value
0 2000 sg11 sv21
...
9 2000 sg13 sv23
"""
sort_values
()Сортирует таблицу по указанному стобцу
df.sort_values('date_')