Skip to content

Latest commit

 

History

History
127 lines (91 loc) · 2.71 KB

PA1_template.md

File metadata and controls

127 lines (91 loc) · 2.71 KB

Reproducible Research: Peer Assessment 1

Loading and preprocessing the data

data<-read.csv(unz("activity.zip", "activity.csv"))
library(lubridate)
data<-transform(data, date=ymd(date))

What is mean total number of steps taken per day?

library(plyr)
dataPerDay<-ddply(data, .(date), summarize, stepsSum = sum(steps, na.rm=TRUE))
hist(dataPerDay$stepsSum)

plot of chunk unnamed-chunk-2

mean(dataPerDay$stepsSum)
## [1] 9354
median(dataPerDay$stepsSum)
## [1] 10395

What is the average daily activity pattern?

###Plot

dataPerInterval<-ddply(data, .(interval), summarize, stepsMean = mean(steps, na.rm=TRUE))
plot(x=dataPerInterval$interval, y=dataPerInterval$stepsMean, type = "l")

plot of chunk unnamed-chunk-3

###Interval with maximum steps

dataPerInterval[which(dataPerInterval$stepsMean == max(dataPerInterval$stepsMean)),1]
## [1] 835

Imputing missing values

How many NAs are there?

sum(is.na(data$steps))
## [1] 2304

We will use the mean of the interval to impute the missing value.

imputedData<-merge(data, dataPerInterval, by="interval")
ind <- which(is.na(imputedData$steps), arr.ind=TRUE)
#substitute NAs with means for the respective interval
imputedData[ind,"steps"]<-imputedData[ind,"stepsMean"]
#remove stepsMean
imputedData<-imputedData[,-c(4)]

imputedDataPerDay<-ddply(imputedData, .(date), summarize, stepsSum = sum(steps, na.rm=TRUE))

#prepare data for visual comparison
imputedDataPerDayComparison<-imputedDataPerDay
imputedDataPerDayComparison$kind<-"imputed"
dataPerDayComparison<-dataPerDay
dataPerDayComparison$kind <-'original'

comparison<-rbind(imputedDataPerDayComparison, dataPerDayComparison)

library(ggplot2)
ggplot(comparison, aes(x=stepsSum, fill = kind)) + 
    geom_histogram(alpha = 0.2,  binwidth = diff(range(comparison$stepsSum))/10)

plot of chunk unnamed-chunk-6

c(mean(imputedDataPerDay$stepsSum),mean(dataPerDay$stepsSum))
## [1] 10766  9354
c(median(imputedDataPerDay$stepsSum),median(dataPerDay$stepsSum))
## [1] 10766 10395

Are there differences in activity patterns between weekdays and weekends?

indWeekend<-which(weekdays(imputedData$date) %in% c('Saturday','Sunday'), arr.ind = TRUE)
imputedData$day<-"Weekday"
imputedData[indWeekend,]$day<-"Weekend"
imputedData$day<-as.factor(imputedData$day)
imputedDataPerInterval<-ddply(imputedData, .(interval, day), summarize, steps = mean(steps))
ggplot(imputedDataPerInterval, aes(x=interval, y=steps, color = day)) + 
    geom_line()

plot of chunk unnamed-chunk-7