forked from ujjwalkarn/DataScienceR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
useful_commands.R
66 lines (47 loc) · 1.72 KB
/
useful_commands.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#Useful R commands
#taken from https://github.com/FlorianMuellerklein/R
#clear console
clc <- function() cat(rep("\n",50))
clc()
#Read SPSS
read.spss(file, use.value.labels = TRUE, to.data.frame = TRUE)
#Subset based on factor
Newdataframe = subest(maindataframe, maindataframe$factor == whatever)
#Label factors (categories)
dataframe$whatevercolumn = factor(dataframe$whatevercolumn, labels = c('blah', 'blah'))
#Change column names
colnames(data.fram)[x] = "newname"
#plot with factor colors
qplot(dataframe$whatevercolumn, dataframe$whatevercolumn, color = dataframe$factor)
#Pretty Histograms
ggplot(data.frame, aes(x = datacolumm, fill = factorcolumn)) + geom_bar()
#Standard Deviation of column
sapply(dataframe$column, sd)
#T-test using a factor
t.test(dataframe$whatevercolumn ~ dataframe$factor)
#Count values in a factor
table(dataframe$factor)
#Summary stats
summary(dataframe$whatevercolumn)
#Check if data is normal
shapiro.test(x, y)
#ANOVA 1-way
anova = aov(data~factor)
summary(anova)
#ANOVA 2-way
anova = aov(data~factor*factor)
summary(anova)
#After running anova you can see the pair-wise comparison
TukeyHSD('nameofanova')
#Fit a linear regression
fit = lm(y ~ x, data = data.frame)
fit = lm(y ~ x1 + x2 + ... + xn, data = data.frame)
#predict using fitted regression (variable must match the ones used to fit)
predict(fit, newdata = data.frame(variable(x) = listofnewvalues))
#plotting and subsetting two time-series data sets on the same graph
ts.plot(ts(a.ts[100:150]), ts(b.ts[100:150]), gpars = list(col = c('black', 'red')))
#or
ggplot(df,aes(x=timevariable,y=value,color=variable,group=variable)) + geom_line()
#Check if a specific value is present in an array
'value' %in% array #or
is.element('value', array)