Cheatography
https://cheatography.com
The most important Pandas abilities
This is a draft cheat sheet. It is a work in progress and is not finished yet.
To Start
import numpy as np
import pandas as pd
|
Create
pd.DataFrame( dict/list, index = None, columns = None)
|
create DataFrame from list or dictionary |
|
set custom indexes |
pd.Series( list/np_array/dict, index = None)
|
create series from the list or np_array or dictionary |
Input and Output
pd.read_csv('name', index_col = None)
|
read csv |
pd.read_excel('name')
|
read excel |
df.to_csv('name', index = False)
|
save to csv |
df.to_excel('name', 'sheet_name = 'name', index = False)
|
save to excel |
Iteration
for lab, row in df.iterrrows():
print(lab)
print(row)
|
Functions/Methods
s.drop(row_index, axis = 0)
|
drop values from rows of series |
df.drop(col_name, axis = 0)
|
drop values from columns |
df.drop(columns=[col_names])
|
drop columns from DataFrame |
|
remove duplicate rows (only considers columns) |
df.sort_index(by = col_names)
|
sort by the values along an axis |
df.sort_values( by = col_names, ascending = False)
|
order rows by values of a column high to low |
df.rename(columns = {'old_name':'new_name'})
|
rename the columns of a DataFrame |
|
assign ranks to entries |
|
append rows of DataFrames |
|
number of rows in DataFrame |
|
join two DataFrames |
df['col_name'].unique()
|
return unique values from column |
df[col_name].apply( func/type.method)
|
apply function to column |
df.apply(func/type.method)
|
apply function |
|
|
Extract
|
series of column |
|
select column |
|
select many columns |
|
select row |
|
select rows |
df.loc[[index_names]]
|
select rows |
df.iloc[[index_nums]]
|
select rows |
df.loc[[index_names], [col_names]]
|
select rows and columns |
|
select all rows and few columns |
|
select all rows and few columns |
|
select first n rows |
|
select last n rows |
df.filter(regex = 'regex')
|
select columns whose name matches regular expression regex |
Boolean Operators
df[np.logical_and( con1, con2, ...)]
|
1 'and' 2 condition ... |
|
1 'and' 2 condition ... |
df[np.logical_or( con1, con2, ...)]
|
1 'or' 2 condition ... |
|
1 'or' 2 condition ... |
|
'not' condition |
|
'not' condition |
|
for condition |
Get DataFrame Information
|
(rows, colums) |
|
decribe index |
|
describe DataFrame columns |
|
info on DataFrame |
|
number of non_NA values for columns |
|
summary statics |
Math
|
sum of values for columns |
|
cummulative sum of values |
|
minimum values for columns maximum values for columns |
|
median of values columns |
|
mean of values for columns |
|
standard deviation of each object |
|
variance of each object |
|