-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
executable file
·127 lines (99 loc) · 4 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# Submitted for the Course Project of "Getting and Cleaning Data"
# offered on Coursera.org by JHU
#
# Required Dataset:
# https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip
#
# Purpose of this file:
# 1.Merges the training and the test sets to create one data set.
# 2.Extracts only the measurements of the mean ("~mean()") and
# standard deviation ("~std()") for each observation.
# 3.Uses descriptive activity names to name the activities in the data set (activity_labels.txt)
# 4.Appropriately labels the data set with descriptive variable names.
#
# 5.From the data set in step 4, creates a second, independent tidy data
# set with the average of each variable for each activity and each subject.
merge_data <- function(){
#r man recommends use of scan() for large matrices
#get column names and target column indicies
colnames <- scan(file = "UCI HAR Dataset/features.txt",
n=561*2,
quiet = TRUE,
what=list(numeric(1),character(1))
)
colnames <- colnames[[2]]
targetColumns = sort(c(grep("mean\\(",colnames),grep("std\\(",colnames)))
#read measurement data
testData <- scan(file = "UCI HAR Dataset/test/X_test.txt",
n=561*2947,
what=double(),
quiet=TRUE,
comment.char="")
testData <- matrix(testData, 2947, 561, byrow=TRUE)
colnames(testData)<-colnames
testData <- testData[,targetColumns]
trainData <- scan(file = "UCI HAR Dataset/train/X_train.txt",
n=561*7352,
what=double(),
quiet=TRUE,
comment.char="")
trainData <- matrix(trainData, 7352, 561, byrow=TRUE)
colnames(trainData)<-colnames
trainData <- trainData[,targetColumns]
rm(colnames, targetColumns)
#read and append activities
Activity<-scan(file="UCI HAR Dataset/test/y_test.txt",
what=integer(1),
quiet=TRUE)
testData <- cbind(testData,Activity)
Activity<-scan(file="UCI HAR Dataset/train/y_train.txt",
what=integer(1),
quiet=TRUE)
trainData <- cbind(trainData,Activity)
rm(Activity)
#read and append subjects
Subject<-scan(file="UCI HAR Dataset/test/subject_test.txt",
what=integer(1),
quiet=TRUE)
testData <- cbind(testData,Subject)
Subject<-scan(file="UCI HAR Dataset/train/subject_train.txt",
what=integer(1),
quiet=TRUE)
trainData <- cbind(trainData,Subject)
rm(Subject)
#convert to data.frame, add factors
harData <- rbind(testData,trainData)
rm(testData,trainData)
harData<-as.data.frame(harData)
activityLabels<-read.table("UCI HAR Dataset/activity_labels.txt",
sep = " ",
colClasses = c("integer","character"))
harData$Activity<-factor(harData$Activity,activityLabels[[1]],activityLabels[[2]])
rm(activityLabels)
harData$Subject <- as.integer(harData$Subject)
return(harData)
}
# arguments:
# writeDataset: Write the data set to file or return as a variable.
# filename: Name of the file to be written if writeDataset is TRUE
run_analysis<-function(writeToFile=TRUE,
filename="dataset.txt"){
# temporary
mergedData <- merge_data()
# Aggregate data to means
# This will create a "wide-form tidy" data set
mergedData <- aggregate(mergedData[,1:(ncol(mergedData)-2)],
list(Actitivy=mergedData$Activity,
Subject=mergedData$Subject),
mean)
#order data by activity then participant
mergedData <- mergedData[order(mergedData$Actitivy,mergedData$Subject),]
#write out data set as a txt file created with write.table() using row.name=FALSE
if(writeToFile){
write.table(x = mergedData,
file = filename,
row.names = FALSE)
}else{
return(mergedData)
}
}