-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpcor.R
156 lines (124 loc) · 4 KB
/
pcor.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
pcor.test <- function(x,y,z,use="mat",method="p",na.rm=T){
# The partial correlation coefficient between x and y given z
#
# pcor.test is free and comes with ABSOLUTELY NO WARRANTY.
#
# x and y should be vectors
#
# z can be either a vector or a matrix
#
# use: There are two methods to calculate the partial correlation coefficient.
# One is by using variance-covariance matrix ("mat") and the other is by using recursive formula ("rec").
# Default is "mat".
#
# method: There are three ways to calculate the correlation coefficient,
# which are Pearson's ("p"), Spearman's ("s"), and Kendall's ("k") methods.
# The last two methods which are Spearman's and Kendall's coefficient are based on the non-parametric analysis.
# Default is "p".
#
# na.rm: If na.rm is T, then all the missing samples are deleted from the whole dataset, which is (x,y,z).
# If not, the missing samples will be removed just when the correlation coefficient is calculated.
# However, the number of samples for the p-value is the number of samples after removing
# all the missing samples from the whole dataset.
# Default is "T".
x <- c(x)
y <- c(y)
z <- as.data.frame(z)
if(use == "mat"){
p.use <- "Var-Cov matrix"
pcor = pcor.mat(x,y,z,method=method,na.rm=na.rm)
}else if(use == "rec"){
p.use <- "Recursive formula"
pcor = pcor.rec(x,y,z,method=method,na.rm=na.rm)
}else{
stop("\'use\' should be either \"rec\" or \"mat\"!\n")
}
# print the method
if(gregexpr("p",method)[[1]][1] == 1){
p.method <- "Pearson"
}else if(gregexpr("s",method)[[1]][1] == 1){
p.method <- "Spearman"
}else if(gregexpr("k",method)[[1]][1] == 1){
p.method <- "Kendall"
}else{
stop("\'method\' should be \"pearson\" or \"spearman\" or \"kendall\"!\n")
}
# sample number
n <- dim(na.omit(data.frame(x,y,z)))[1]
# given variables' number
gn <- dim(z)[2]
# p-value
if(p.method == "Kendall"){
statistic <- pcor/sqrt(2*(2*(n-gn)+5)/(9*(n-gn)*(n-1-gn)))
p.value <- 2*pnorm(-abs(statistic))
}else{
statistic <- pcor*sqrt((n-2-gn)/(1-pcor^2))
p.value <- 2*pnorm(-abs(statistic))
}
data.frame(estimate=pcor,p.value=p.value,statistic=statistic,n=n,gn=gn,Method=p.method,Use=p.use)
}
# By using var-cov matrix
pcor.mat <- function(x,y,z,method="p",na.rm=T){
x <- c(x)
y <- c(y)
z <- as.data.frame(z)
if(dim(z)[2] == 0){
stop("There should be given data\n")
}
data <- data.frame(x,y,z)
if(na.rm == T){
data = na.omit(data)
}
xdata <- na.omit(data.frame(data[,c(1,2)]))
Sxx <- cov(xdata,xdata,m=method)
xzdata <- na.omit(data)
xdata <- data.frame(xzdata[,c(1,2)])
zdata <- data.frame(xzdata[,-c(1,2)])
Sxz <- cov(xdata,zdata,m=method)
zdata <- na.omit(data.frame(data[,-c(1,2)]))
Szz <- cov(zdata,zdata,m=method)
# is Szz positive definite?
zz.ev <- eigen(Szz)$values
if(min(zz.ev)[1]<0){
stop("\'Szz\' is not positive definite!\n")
}
# partial correlation
Sxx.z <- Sxx - Sxz %*% solve(Szz) %*% t(Sxz)
rxx.z <- cov2cor(Sxx.z)[1,2]
rxx.z
}
# By using recursive formula
pcor.rec <- function(x,y,z,method="p",na.rm=T){
#
x <- c(x)
y <- c(y)
z <- as.data.frame(z)
if(dim(z)[2] == 0){
stop("There should be given data\n")
}
data <- data.frame(x,y,z)
if(na.rm == T){
data = na.omit(data)
}
# recursive formula
if(dim(z)[2] == 1){
tdata <- na.omit(data.frame(data[,1],data[,2]))
rxy <- cor(tdata[,1],tdata[,2],m=method)
tdata <- na.omit(data.frame(data[,1],data[,-c(1,2)]))
rxz <- cor(tdata[,1],tdata[,2],m=method)
tdata <- na.omit(data.frame(data[,2],data[,-c(1,2)]))
ryz <- cor(tdata[,1],tdata[,2],m=method)
rxy.z <- (rxy - rxz*ryz)/( sqrt(1-rxz^2)*sqrt(1-ryz^2) )
return(rxy.z)
}else{
x <- c(data[,1])
y <- c(data[,2])
z0 <- c(data[,3])
zc <- as.data.frame(data[,-c(1,2,3)])
rxy.zc <- pcor.rec(x,y,zc,method=method,na.rm=na.rm)
rxz0.zc <- pcor.rec(x,z0,zc,method=method,na.rm=na.rm)
ryz0.zc <- pcor.rec(y,z0,zc,method=method,na.rm=na.rm)
rxy.z <- (rxy.zc - rxz0.zc*ryz0.zc)/( sqrt(1-rxz0.zc^2)*sqrt(1-ryz0.zc^2) )
return(rxy.z)
}
}