Import Libraries to Readlibrary(readr) | library(ggplot2) | library(dplyr) | library(broom) | library(Tmisc) | library(caret) | library(caret) | library(splines) | library(party) | library(leaps) | library(glmnet) |
Apply Functions(m=matrix, a=array, l=list; v=vector, d=dataframe) | apply(x,index,fun) [input: m; output: a or l; applies function fun to rows/cols/cells (index) of x] | lapply(x,fun) [input l; output l; apply fun to each element of list x] | sapply(x,fun) [input l; output v; user friendly wrapper for lapply(); see also replicate()] | tapply(x,index,fun) [input l output l; applies fun to subsets of x, as grouped based on index] |
Clusteringplot(1:nc, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")} | wssplot <- function(data, nc=15, seed=1234){ wss <- (nrow(data)-1)*sum(apply(data,2,var)) for (i in 2:nc){ set.seed(seed) wss[i] <- sum(kmeans(data, centers=i)$withinss)} |
GGplotggplot(mydata, aes(xvar, yvar)) + geom_point(aes(color=groupvar)) + geom_smooth(method="lm") | qplot(x = cty, y = hwy, data = mpg, geom = “point") [Creates a complete plot with given data, geom, and mappings. Supplies many useful defaults] | last_plot() [Returns the last plot] | ggsave("plot.png", width = 5, height = 5) [Saves last plot as 5’ x 5’ file named "plot.png" in working directory. Matches file type to file extension] |
SetupcreateDummyFeatures(obj=,target=,method=,cols=) [creates (0,1) flags for each non-numeric variable excluding target | **normalizeFeatures(obj=,target=,method=,cols=,range=,on.constant=) | center subtract mean | scale divide by std. deviation | standardize center and scale | range linear scale to given range | mergeSmallFactorLevels(task=,cols=,min.perc=) [combine infrequent factor levels into single merged level] |
| | Basic Codesread_csv("path/nhanes.csv") | View(df) | filter(df, ..,) [Filters data frame according to condition ] | mean, median, range [na.rm=TRUE ] | t.test(y~grp, data=df) | wilcox.test(y~grp, data=df) | anova(lmfit) | TukeyHSD(aov(lmfit)) [ANOVA Post-hoc pairwise contrasts] | xt <- xtabs(~x1+x2, data=df) | addmargins(xt) | prop.table(xt) | chisq.test(xt) | fisher.test(xt) | mosaicplot(xt) | factor(x, levels=c("wt", "mutant")) | relevel(x, ref="wildtype") | power.t.test(n, power, sd, delta) | power.prop.test(n, power, p1, p2) | tidy() augment() glance() [Model tidying functions in the broom package] |
Model Functionsaov(formula, data) [analysis of variance model] | lm(formula, data) [fit linear models] | glm(formula, family, data [fit generalized linear models] | nls(formula, data) [nonlinear least-squares estimates of the nonlinear model parameters] | lmer(formula, data) [fit mixed effects model] | (lme4); lme() or (nlme) | anova(fit, data...) [provides sequential sums of squares and corresponding F-test for objects] | contrasts(fit, contrasts = TRUE) [view contrasts associated with a factor] | contrasts(fit, how.many) <‐ value | glht(fit, linfct) [makes multiple comparisons using a linear function linfct (mutcomp)] | summary(fit) [summary of model, often w/ t-values] | confint(parameter) [confidence intervals for one or more parameters in a fitted model] | predict(fit,...) [predictions from fit] |
Decision Treectree(formula,data) [formula is a formula describing the predictor and response variables] |
| | Data Informationis.na(x) | is.nan(x) | is.null(x) | is.array(x) | is.complex(x) | is.character(x) | is.data.frame(x) | is.numeric(x) | head(x) | tail(x) | summary(x) | str(x) | length(x) | dim(x) | dimnames(x) | attr(x,which) | nrow(x) | ncol(x) | NROW(x) | NCOL(x) | class(x) | unclass(x) |
Data Splitting and ManipulatingcreateDataPartition(y,p=0.8) [createDaIt splits a vector 'y' with 80 percent data in one part and 20 percent in other parttaPartition(y,p=0.8)] | trainControl(summaryFunction = <R function>, classProbs = <logical>) [It is used for controlling training parameters like resampling, number of folds, iteration etc.] | densityplot.rfe(x,data,...) [Lattice functions for plotting resampling results of recursive feature selection] | featureplot(x,y,plot...) [A shortcut to produce lattice plots] |
Polynomial regressionmedv=b0+b1∗lstat+b2∗lstat2^ | lm(medv ~ lstat + I(lstat^2), data = train.data) | lm(medv ~ poly(lstat, 2, raw = TRUE), data = train.data) |
Spline Modelspline(x,y) [cubic spline interpolation] | splineKnots(object) | knots <- quantile(train.data$lstat, p = c(0.25, 0.5, 0.75)) |
Step-wise Selectionnull<- lm(Formula~1, data=dtrain) | full<-lm(Formula~.,data=dtrain) | step(null, scope=list(lower=null, upper=full), direction="forward") | step(full, scope=list(lower=full, upper=null), direction="backward") |
PreprocessingTransformations, filters, and other operations can be applied to the predictors with the preProc option. | train(, preProc = c("method1", "method2"), ...) | train determines the order of operations; the order that the methods are declared does not matter. | recipes package has a more extensive list of preprocessing operations. |
|
Created By
Metadata
Comments
No comments yet. Add yours below!
Add a Comment
Related Cheat Sheets
More Cheat Sheets by [deleted]