-
Notifications
You must be signed in to change notification settings - Fork 1
/
R-bios6643-L10.Rmd
executable file
·211 lines (151 loc) · 6.78 KB
/
R-bios6643-L10.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
---
title: "BIOS6643. L10 Model Building"
cheauthor: "EJC"
-- date: ""
header-includes:
- \usepackage{amsmath}
- \usepackage{float}
output: pdf_document
---
\newcommand{\bi}{\begin{itemize}}
\newcommand{\ei}{\end{itemize}}
\newcommand{\itt}{\item}
```{r setup, include=FALSE, echo=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
<!-- -->
```{r installp, echo = FALSE, eval=T, include=F}
library(dplyr)
library(tidyverse)
library(ggplot2)
library(nlme)
library(lme4)
```
# Exercise involving $\pmb G$ and $\pmb R$ matrices
Consider a basic science experiment conducted where cell counts are measured at 4 time points for samples taken from individual subjects or animals. A linear mixed model will be fit for the data (perhaps after log transformation), and fixed effects will be included for time, and treatment group as well as their interaction. (To answer this question we do not need to know the specific form of $\pmb {X\beta}$.)
Determine the structure for $\pmb V_i$ if a random intercept for subjects will be included, plus an AR(1) structure for the error covariance matrix ($\pmb R_i$). What does the combination of non-simple $\pmb R$ and $\pmb G$ allow you to do in modeling covariance that using only one cannot do? Discuss in a few sentences.
# FEV Study
## Description
The data set gives characteristics of children patients with a diagnosis of Cystic Fibrosis (CF) who are patients at the Colorado Children’s Hospital. Data pull was conducted in Summer 2020. This dataset may only be used for BIOS6643.
VARIABLE DESCRIPTIONS:
id: patient ID
race: race
Genotypes1: mutation of copy one of the gene that codes for the CFTR protein
Genotypes2: mutation of copy two of the gene that codes for the CFTR protein.
age: Age in years
gender : gender
fev: \% of predicted forced expiratory volume in 1 second
- **Objectives** of the study included:
- Determine if fev values over time are larger on average for males than for females
- Determine if the rate of change of fev over time is different for males and females.
## Selecting the mean structure
```{r eval=T, echo = T, include=TRUE}
# Read in the data
## Read in the data
dat <- read.csv("/Users/juarezce/Documents/OneDrive - The University of Colorado Denver/BIOS6643/BIOS6643_Notes/data/fev.csv",
header=TRUE)
length(unique(dat$id)) ## number of individuals
head(dat,3)
summary(dat$age) ## age in years
summary(dat$fev)
######
################################################################################################
### selecting the mean form
## --------------------------------------------------------------------------------------------
ggplot(dat, aes(x = age, y = fev, group = id)) +
facet_wrap(~gender) +
geom_line(aes(color = gender), alpha = 0.3) +
geom_smooth(aes(group = 1), method="loess", color="black", se=FALSE) +
theme_classic() +
theme(legend.position = "top") +
ylab("Fev")
## age as linear
fit.lme.0 <- lme(fev ~ -1 + gender + age:gender, data=dat,
random = ~ 1 | id,method="ML")
summary(fit.lme.0)
## adding quadratic terms of age
fit.lme.1 <- lme(fev ~ -1 + gender + age:gender + I(age*age):gender,
data=dat,
random = ~ 1 | id,method="ML")
summary(fit.lme.1)
## adding cubic terms of age
fit.lme.2 <- lme(fev ~ -1 + gender + age:gender + I(age*age):gender +
I(age*age*age):gender, data=dat,
random = ~ 1 | id,method="ML")
summary(fit.lme.2)
## updating model - same as above
## fit.lme.2 <- update(fit.lme.1, fev ~ -1 + gender + age:gender + I(age*age):gender +
## I(age*age*age):gender)
anova(fit.lme.0, fit.lme.1, fit.lme.2)
```
## Preliminary covariance structure $\pmb G$
```{r eval=T, echo = T, fig.height=3.7, include=T}
################################################################################################
###
################################################################################################
### preliminary covariance G
## --------------------------------------------------------------------------------------------
fit.lm.0 <- lm(fev ~ -1 + gender + age:gender + I(age*age):gender +
I(age*age*age):gender, data=dat)
summary(fit.lm.0)
## calculating residuals
dat$resid <- residuals(fit.lm.0)
head(dat)
ggplot(dat, aes(x = age, y = resid, group = id)) +
facet_wrap(~gender) +
geom_line(aes(color = gender), alpha = 0.3) +
geom_smooth(aes(group = 1), method="loess", color="black", se=FALSE) +
theme_classic() +
theme(legend.position = "top") +
ylab("residuals")
ggplot(dat, aes(x = age, y = resid^2, group = id)) +
facet_wrap(~gender) +
geom_point(aes(color = gender), alpha = 0.3) +
geom_smooth(aes(group = 1), method="loess", color="black", se=FALSE) +
theme_classic() +
theme(legend.position = "top") +
ylab("Squared residuals")
## model with quadratic random effects did not converge: adding "+ I(age*age)" to random effects
fit.lme.3 <- lme(fev ~ -1 + gender + age:gender + I(age*age):gender +
I(age*age*age):gender, data=dat,
random = ~ age | id, method="ML")
summary(fit.lme.2)
summary(fit.lme.3)
```
## Residual covariance structure $\pmb R$
```{r eval=T, echo = T, fig.height=3.7, include=T}
################################################################################################
###
## different variance for males and females
fit.lme.4 <- lme(fev ~ -1 + gender + age:gender + I(age*age):gender +
I(age*age*age):gender, data=dat,
random = ~ age | id, method="ML",
weights = varIdent(form = ~ 1 | gender))
summary(fit.lme.4)
R.b.1 <- getVarCov(fit.lme.4,type="conditional",individual=1) # R_1; first male
R.b.2 <- getVarCov(fit.lme.4,type="conditional",individual=12) # R_2; first female
R.b.1
R.b.1
## allowing for AR(1) exp
fit.lme.5 <- lme(fev ~ -1 + gender + age:gender + I(age*age):gender +
I(age*age*age):gender, data=dat,
random = ~ age | id, method="ML",
corExp(form = ~ age | id))
summary(fit.lme.5)
anova(fit.lme.3, fit.lme.4, fit.lme.5)
```
## Model reduction
```{r eval=T, echo = T, include=T}
## do we really random slopes? The variance of the random slopes, is not negligible (~1.8^2), so I would keep random slopes in
## testing if the cubic terms are needed in the mean form
fit.lme.5.1 <- lme(fev ~ -1 + gender + age:gender + I(age*age):gender,
data=dat,
random = ~ age | id, method="ML",
corExp(form = ~ age | id))
summary(fit.lme.5.1)
anova(fit.lme.5, fit.lme.5.1) ## we do seem to need cubic terms
## testing if there is an interaction between age and gender
```
## Final model
```{r eval=T, echo = T, fig.height=5, include=T}
```