Libraries to Importlibrary(readr) | library(dplyr) | library(ggplot2) | library(broom) | library(caret) | library(rpart) | library(splines) | library(party) | library(leaps) | library(glmnet) | library(MASS) | library(class) |
Data Conversionas.array(x) | as.character(x) | as.data.frame(x) | as.factor(x) | as.logical(x) | as.numeric(x) |
K-Foldfolds <- crossv_kfold(data, k = 5) [k is the number of folds] |
Regularization - Ridge & Lassolambdas_to_try <- 10seq(-3, 5, length.out = 100)^ | ridge_cv <- cv.glmnet(X, y, alpha = 0, lambda = lambdas_to_try,standardize = TRUE, nfolds = 10) [Setting alpha = 0 implements ridge regression] | lasso <- glmnet(data_x, data_y, alpha = 1.0) |
Random ForestrandomForest(formula, data) [formula is a formula describing the predictor and response variables. data is the name of the data set used] |
| | Basic Codesread_csv("path/nhanes.csv") [Read nhanes.csv in the path/ folder (readr)] | View(df) [View tabular data frame df in a graphical viewer] | mean, median, range [Descriptive stats. Remember na.rm=TRUE if desired] | filter(df, ..,) [ Filters data frame according to condition ... (dplyr)] | factor(x, levels=c("wt", "mutant")) [Create factor specifying level order] | relevel(x, ref="wildtype") [Re-level a factor variable] | t.test(y~grp, data=df) [T-test mean y across grp in data df] | lmfit <- lm(y~x1+x2, data=df) [Fit linear model y against two x’s] | anova(lmfit) [Print ANOVA table on object returned from lm()] | summary(lmfit) [Get summary information about a model fit with lm()] | TukeyHSD(aov(lmfit)) [ANOVA Post-hoc pairwise contrasts] | wilcox.test(y~grp, data=df) [Wilcoxon rank sum / Mann-Whitney U test] | xt <- xtabs(~x1+x2, data=df) [Cross-tabulate a contingency table] | addmargins(xt) [Adds summary margin to a contingency table xt] | addmargins(xt) [Adds summary margin to a contingency table xt] | chisq.test(xt) [Chi-square test on a contingency table xt] | fisher.test(xt) [Fisher’s exact test on a contingency table xt] | mosaicplot(xt) [Mosaic plot for a contingency table xt] | power.t.test(n, power, sd, delta) [T-test power calculations] | power.prop.test(n, power, p1, p2) [Proportions test power calculations] | tidy() augment() glance() [Model tidying functions in the broom package] |
| | Data Informationis.na(x) | is.null(x) | is.nan(x) | is.array(x) | is.data.frame(x) | is.numeric(x) | is.complex(x) | is.character(x) | head(x) | tail(x) | summary(x) | str(x) | length(x) | dim(x) | dimnames(x) | attr(x,which) | nrow(x) | ncol(x) | NROW(x) | NCOL(x) | class(x) | unclass(x) |
Data SplittingcreateDataPartition(y,p=0.8) [createDaIt splits a vector 'y' with 80 percent data in one part and 20 percent in other parttaPartition(y,p=0.8)] | trainControl( summaryFunction=<Rfunction>,classProbs=<logical>) [It is used for controlling training parameters like resampling, number of folds, iteration etc.] | densityplot.rfe(x,data,...) [Lattice functions for plotting resampling results of recursive feature selection] | featureplot(x,y,plot...) [A shortcut to produce lattice plots] |
|
Created By
Metadata
Comments
No comments yet. Add yours below!
Add a Comment
More Cheat Sheets by [deleted]