Util Functions
data = read.csv(file.choose()) |
Opens file explorer to get data |
setwd("Path") |
Sets the working directory |
getwd() |
Gets the working directory |
str(variable_name) |
Structure of the variable |
ls() |
Lists the variables |
rm(variable_name) |
Removes the variable |
help.start() |
Opens help |
library("Package Name") |
Makes it available for the use |
install.packages("Package Name") |
Installs package |
detach("Package Name") |
detaches the package |
history() |
Shows the history |
DataFrame
d=data.frame(Roll No=1:3, gender=c("F","M","F"), Grade=c(8,4,7) |
Input Dataframe - length should be equal |
view(d) |
Opens Editor |
summary(d) |
Provides the descriptive statistics |
head(d)/tail(d) |
Displays the top or bottom rows |
Strings
toString(x) |
To produce a single character string |
toupper(x)/tolower(x) |
Changes from upper to lower case and vice versa |
strsplit(word, character, fixed=TRUE) |
Splits the word based on the character given |
substring(word,startpoint,endpoint) |
Retrieves or replaces the substring of the character |
Arrays
1D = array(1:24) |
1 dimensional array |
2D = array(1:24, dim = c(6,4)) |
2 dimensional array |
3D = array(1:24, dim = c(4,3,2)) |
3 dimensional array |
dim(2D) |
nrow(2D) |
ncol(2D) |
length(2D) |
|
|
Vectors
num = c(1,2,3,4,5,6) |
numeric vector |
chr = c("aaa","bbb") |
character vector |
log = c(TRUE,TRUE,FALSE) |
logical vector |
mean(vector) |
mean |
sd(vector) |
standard deviation |
var(vector) |
variance |
range(vector) |
range |
which.min(vec)/which.max(vec) position of the min/max value rep(1:5,times=3) |
Finding the minimum and maximum values in the vector |
Matrix
mat = matrix(1:12, nrow=4, ncol=3) |
matrix |
cbind(mat1,mat2) |
column bind |
rbind(mat1,mat2) |
row bind |
det(matrix(c(1,0,0,1),2)) |
Determinant |
solve(A) |
Inverse |
A%*%B |
Matrix Multiplication |
Lists
x<-list(c(1,2,3),c("A","B","C")) |
Collection of Elements |
x[[1]] |
First row |
Descriptive Statistics
colMeans(data[]) |
column mean |
colSums(data[]) |
column sum |
rowMeans(data[]) |
row mean |
rowSums(data[]) |
row sum |
Hypothesis
t.test(data) |
1 sample t test |
t.test(data1,data2) |
2 sample t test |
t.test(pre,post,paired=TRUE) |
paired sample t test |
cor.test(data1,data2) |
Correlation |
chisq.test(data) |
Chi square |
aov() |
ANOVA |
wilcox.test(data) |
Wilcox test |
shapiro.test(data) |
Shapiro test |
prop.test |
Testing proportions |
|
|
Visualization
qplot(data, line=TRUE,...) |
produces quantile-quantile plot |
qqnorm(column) |
produces quantile-quantile plot |
barplot(table) |
Relationship between a numerical and a categorical variable |
boxplot(x,data=) |
Distribution |
ggplot(data = NULL, mapping = aes(), ...) |
initializes a ggplot object |
geom_bar() |
bar graph |
coord_flip() |
flip x and y coordinates |
facet_grid() |
lay out panels in a grid |
geom_density |
density plot |
geom_hist |
histogram |
geom_point |
scatter plots |
hist(column) |
normality test |
pie(table(tablename)) |
piecharts |
Loops
for (variable in sequence){ Do something } |
for loop |
while (condition){ Do something } |
while loop |
if (condition){ Do something } else { Do something different } |
ifelse statement |
Probability
runif(n, min = 0, max = 1) |
Uniform |
rnorm(n,mean,sd) |
Normal or Gaussian |
rexp(n) |
Exponential |
rbinom(n, size, prob) |
Binomial |
rpois(n,size) |
Poisson |
Data Table
library(data.table) |
class(x) |
head(x) |
Statistics
fit <- lm(y ~ x1 + x2 + x3, data=mydata) summary(fit) |
Regression |
glm.fit(y ~ x1 + x2 + x3, family="", data=mydata) |
Classification |
kmeans(data) |
Clustering |
|