Show Menu
Cheatography

Python Snippits by

Operators

Arithmetic
(Addit­ion(+), Substr­act­ion(-), Multip­lic­ati­on(*), Divisi­on(/), Modulus(%)
Relational
<, >, <=, >=, ==, != (not equal),
Assignment
=. +=, -=, /=, *=, %=
Logical
and. or, not
Membership
in, not in
Identity (same memory location)
is, is not

Functions

len()
determine the length of a string, a list, an array
split()
split a string into shorter string using defined seperatos
string.sp­lit­(“,”)
sum(),­mean(), count(­),s­td().
functions that can be used by grouby in pandas
groupe­d_m­ultiple = df.gro­upb­y([­'Team', 'Pos']­).a­gg(­{'Age': ['mean', 'min', 'max']}) groupe­d_m­ult­ipl­e.c­olumns = ['age_­mean', 'age_min', 'age_max'] groupe­d_m­ultiple = groupe­d_m­ult­ipl­e.r­ese­t_i­ndex()
   
df.gro­upb­y([­"­Tea­m","C­oll­ege­"­])[­"­Sal­ary­"­].max()
agg()
Allows for multiple or custom aggreg­ations
defpct­30(­col­umn):
   
return column.qu­ant­ile­(0.3)
   
dogs["w­eig­ht_­kg"].ag­g(p­ct30)
keys()
We can use the Keys function of a Group By object to describe how rows of a dataset has been split
data.g­rou­pby­(['­mon­th'­]).g­ro­ups.keys()
join() and ravel()
An effective way to rename columns after a group
grouped = data.g­rou­pby­('m­ont­h').ag­g("d­ura­tio­n": [min, max, mean])
   
groupe­d.c­olumns = ["_".j­oin(x) for x in groupe­d.c­olu­mns.ra­vel()]

Custom Functions

User-Defined Functions

By adding * to a parameter, we can add any number of arguments to that parameters

def func_with_var_pos_args(*args):
for arg in args:
print(arg)

Simiarly, by adding * to an argument, we can add any number of arguments to that parameters

def func_with_var_pos_args(*args):
for arg in args:
print(arg)

Naming Conven­tions

Funciton
function, my_fun­ction
Variable
x, var, my_var­iable
Class
Model, MyClass
Method
class_­method, method
 

Packaging and Displaying

from pprint import pprint
pprint­(di­r(m­y_d­ict))
Pychecker
detects bugs from the source code and warns about its style and complexity
Pylint
Checks whether the module matches upto a coding standard.
Modules
Each Python program file is a module, importing other attributes and objects.
Package
folder of modules

Map, Filter and Lambda

Map
Applies a function to the input list
map(fu­nct­ion­_to­_apply, list_o­f_i­nputs)
items = [1, 2, 3, 4, 5] squared = list(m­ap(­lambda x: x**2, items))
filter
creates a list of elements for which a function returns true.
filter­(fu­nct­ion­_to­_ap­ply,n lisst_­to_­sel­ect­_From)
number­_list = range(-5, 5) less_t­han­_zero = list(f­ilt­er(­lambda x: x < 0, number­_list))
Reduce
applies a rolling comput­ation to sequential pairs of values in a list
from functools import reduce
product = reduce­((l­ambda x, y: x * y), [1, 2, 3, 4])

Scikit Learn - Regression

poly_reg = PolynomialFeatures(degree = 2)
X_poly = poly_reg.fit_transform(xtrain)
X_poly.predict(xtest)

xtrainp= X_poly[:11900*3]


# polynomial regression model
poly_reg_model = LinearRegression()
poly_reg_model.fit(xtrainp, ytrain)
poly_reg_model.predict(xtest)
print( metrics.mean_squared_error(y_test, poly_reg_model.predict(xtest) ) )


svr_regressor = SVR(kernel='rbf', gamma='auto')
svr_regressor.fit(xtrain, ytrain)

tree_regressor = DecisionTreeRegressor(random_state = 0)
tree_regressor.fit(xtrain, ytrain)

forest_regressor = RandomForestRegressor(n_estimators = 300, random_state = 0)
forest_regressor.fit(xtrain, ytrain)

from sklearn import linear_model
reg = linear_model.LassoLars(alpha=.1, normalize=False)
reg.fit(xtrain, ytrain) 

reg.coef_
reg.predict(xtest)

est = SGDClassifier()
est.fit(xtrain, ytrain)
est.predict(xtest)

linear_regression = LinearRegression()

y_pred_lr = linear_regression.fit(xtrain, ytrain).predict(xtest)


xgbmodel = xgboost.XGBRegressor(colsample_bytree=0.4,
                 gamma=0,                 
                 learning_rate=0.07,
                 max_depth=3,
                 min_child_weight=1.5,
                 n_estimators=10000,                                                                    
                 reg_alpha=0.75,
                 reg_lambda=0.45,
                 subsample=0.6,
                 seed=42) 
xgbmodel.fit(xtrain, ytrain)


print( svr_regressor.predict(xtest))
print( tree_regressor.predict(xtest))
print( y_pred_lr) 
print( forest_regressor.predict(xtest))
model.predict(xtest)


print( metrics.mean_squared_error(y_test, svr_regressor.predict(xtest) )  )

print( metrics.mean_squared_error(y_test, tree_regressor.predict(xtest) ) )
print( metrics.mean_squared_error(y_test, y_pred_lr)  )
print( metrics.mean_squared_error(y_test, forest_regressor.predict(xtest) ) )



forestrev = forest_regressor.predict(xtest)

xgbmodel.predict(xtest).mean() 
print( metrics.mean_squared_error(y_test, xgbmodel.predict(xtest) ) )


ytest.mean() 

bas.REVENUE.mean() 





xtrain, ytrain = np.array(xtrain), np.array(ytrain)

xtrain = np.reshape(xtrain, (xtrain.shape[0],xtrain.shape[1],1))

# create and fit the LSTM network
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(xtrain.shape[1],1)))
model.add(LSTM(units=50))
model.add(Dense(1))

ytrain = ytrain.astype(np.float32)
xtrain =  xtrain.astype(np.float32)
xtrain = np.reshape(xtrain, (xtrain.shape[0],xtrain.shape[1],1))
 

Looping Data Structures

1) With One Column:
import pandas as pd
#The column to look through 
brics = pd.read_csv("brics.csv", index_col = 0)
    for val in brics :
         print(val)

2) Index then all cols in row:
for lab, row in brics.iterrows():
   print(lab)
   print(row)

3) Index then one col in row:
for lab, row in brics.iterrows():
    brics.loc[lab, "name_length"] = len(row["country"])

4) Apply 
brics["name_length"] = brics["country"].apply(len)

Scikit Learn - Classi­fic­ation

## Classifier imports
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.linear_model import Ridge 
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

# Defining our models
gnb = GaussianNB()
KNN = KNeighborsClassifier(n_neighbors=1)
MNB = MultinomialNB()
BNB = BernoulliNB()
LR = LogisticRegression()
SDG = SGDClassifier()
#SVC = SVC(kernel='linear', C=1e3)
LSVC = LinearSVC()
NSVC = NuSVC()

# Train our classifier and print accuracy scores
gnb.fit(x1, y1)
y2_GNB_model = gnb.predict(x2)
print("GaussianNB Accuracy :", accuracy_score(y2, y2_GNB_model))

KNN.fit(x1,y1)
y2_KNN_model = KNN.predict(x2)
print("KNN Accuracy :", accuracy_score(y2, y2_KNN_model))

#MNB.fit(x1,y1)
#y2_MNB_model = MNB.predict(x2)
#print("MNB Accuracy :", accuracy_score(y2, y2_MNB_model))

BNB.fit(x1,y1)
y2_BNB_model = BNB.predict(x2)
print("BNB Accuracy :", accuracy_score(y2, y2_BNB_model))

LR.fit(x1,y1)
y2_LR_model = LR.predict(x2)
print("LR Accuracy :", accuracy_score(y2, y2_LR_model))

SDG.fit(x1,y1)
y2_SDG_model = SDG.predict(x2)
print("SDG Accuracy :", accuracy_score(y2, y2_SDG_model))

# SVC.fit(x1,y1)
# y2_SVC_model = SVC.predict(x2)
# print("SVC Accuracy :", accuracy_score(y2, y2_SVC_model))

LSVC.fit(x1,y1)
y2_LSVC_model = LSVC.predict(x2)
print("LSVC Accuracy :", accuracy_score(y2, y2_LSVC_model))

NSVC.fit(x1,y1)
y2_NSVC_model = NSVC.predict(x2)
print("NSVC Accuracy :", accuracy_score(y2, y2_NSVC_model))
 

Comments

No comments yet. Add yours below!

Add a Comment

Your Comment

Please enter your name.

    Please enter your email address

      Please enter your Comment.

          More Cheat Sheets by datamansam

          Core Cloud Concepts with AWS Cheat Sheet