-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Statistical tests against external simulators
- Loading branch information
1 parent
255539a
commit 21b2bc5
Showing
6 changed files
with
917 additions
and
258 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
if (!require("AlphaSimR")) install.packages("AlphaSimR", repos='http://cran.us.r-project.org') | ||
library(AlphaSimR) | ||
if (!require("reticulate")) install.packages("reticulate", repos='http://cran.us.r-project.org') | ||
library(reticulate) | ||
|
||
# This code is used from verification.py to simulate quantitative traits | ||
# by using AlphaSimR. | ||
# | ||
# The basic simulation step is the following: | ||
# 1. Use the tskit Python package through the R package tskit and load the tree | ||
# sequence data as a founder population in AlphaSimR. The codes of this step are | ||
# largely adapted from | ||
# https://github.com/ ynorr/AlphaSimR_Examples/blob/master/misc/msprime.R | ||
# 2. Simulate quantitative traits of the founder population in AlphaSimR | ||
|
||
# The commandline input has 8 elements | ||
# [num_causal, temporary_directory_name, | ||
# corA, num_trait, h2, h2_2, num_rep, random_seed] | ||
|
||
myArgs <- commandArgs(trailingOnly = TRUE) | ||
# Convert to numerics | ||
num_causal <- as.numeric(myArgs[1]) | ||
directory_name <- myArgs[2] | ||
corA <- as.numeric(myArgs[3]) | ||
num_trait <- as.numeric(myArgs[4]) | ||
h2 <- as.numeric(myArgs[5]) | ||
h2_2 <- as.numeric(myArgs[6]) | ||
num_rep <- as.numeric(myArgs[7]) | ||
random_seed <- as.numeric(myArgs[8]) | ||
|
||
set.seed(random_seed) | ||
|
||
tskit <- import("tskit") | ||
|
||
tree_filename <- paste0(directory_name,"/tree_seq.tree") | ||
ts <- tskit$load(tree_filename) | ||
|
||
sites <- ts$tables$sites$asdict() | ||
pos <- sites$position * 1e-8 # Convert to Morgans | ||
pos <- pos - pos[1] # Set first position to zero | ||
|
||
# Extract haplotypes | ||
haplo <- t(ts$genotype_matrix()) | ||
|
||
# Create an AlphaSimR founder population | ||
founderPop <- newMapPop(genMap=list(pos), haplotypes=list(haplo)) | ||
|
||
num_ind <- nrow(haplo) / 2 | ||
|
||
if (num_trait == 1){ | ||
mean <- 0 | ||
var <- 1 | ||
corA <- NULL | ||
H2 <- h2 | ||
} else if (num_trait == 2){ | ||
mean <- c(0,0) | ||
var <- c(1,1) | ||
corA <- matrix(c(1,corA,corA,1),nrow=2,ncol=2) | ||
H2 <- c(h2,h2_2) | ||
} | ||
|
||
phenotype_result <- c() | ||
trait_result <- c() | ||
|
||
for (i in 1:num_rep) { | ||
SP <- SimParam$ | ||
new(founderPop)$ | ||
addTraitA( | ||
nQtlPerChr=num_causal, | ||
mean=mean, | ||
var=var, | ||
corA=corA | ||
)$ | ||
setVarE(H2=H2) | ||
|
||
individuals <- newPop(founderPop) | ||
|
||
trait_df <- c() | ||
phenotype_df <- c() | ||
|
||
for (trait_id in 1:num_trait){ | ||
qtl_site <- SP$traits[[trait_id]]@lociLoc - 1 | ||
effect_size <- SP$traits[[trait_id]]@addEff | ||
trait_id_df <- data.frame( | ||
effect_size = effect_size, | ||
site_id = qtl_site, | ||
trait_id = rep(trait_id-1, length(effect_size)) | ||
) | ||
trait_df <- rbind(trait_df, trait_id_df) | ||
phenotype <- individuals@pheno[,trait_id] | ||
phenotype_id_df <- data.frame( | ||
phenotype=phenotype, | ||
individual_id = 0:(num_ind-1), | ||
trait_id = rep(trait_id-1, num_ind) | ||
) | ||
phenotype_df <- rbind(phenotype_df, phenotype_id_df) | ||
} | ||
phenotype_result <- rbind(phenotype_result, phenotype_df) | ||
trait_result <- rbind(trait_result, trait_df) | ||
} | ||
|
||
phenotype_filename <- paste0(directory_name,"/phenotype_alphasimr.csv") | ||
write.csv(phenotype_result, phenotype_filename, row.names=FALSE) | ||
|
||
trait_filename <- paste0(directory_name,"/trait_alphasimr.csv") | ||
write.csv(trait_result, trait_filename, row.names=FALSE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
if (!require("simplePHENOTYPES")) install.packages("simplePHENOTYPES", repos='http://cran.us.r-project.org') | ||
library(simplePHENOTYPES) | ||
|
||
# This code is used from verification.py to simulate quantitative traits | ||
# by using simplePHENOTYPES. | ||
|
||
# This code loads the vcf file that is generated in `verification.py` | ||
# and uses the effect size from a normal distribution to simulate | ||
# additive traits. | ||
|
||
# The commandline input has 7 elements | ||
# [num_causal, h2, directory_name, | ||
# num_rep, mean, var, random_seed] | ||
myArgs <- commandArgs(trailingOnly = TRUE) | ||
|
||
num_causal <- as.numeric(myArgs[1]) | ||
h2 <- as.numeric(myArgs[2]) | ||
directory_name <- myArgs[3] | ||
num_rep <- myArgs[4] | ||
mean <- as.numeric(myArgs[5]) | ||
var <- as.numeric(myArgs[6]) | ||
random_seed <- as.numeric(myArgs[7]) | ||
|
||
set.seed(random_seed) | ||
|
||
sd <- sqrt(var) | ||
|
||
# Function to simulate phenotypes from simplePHENOTYPES | ||
# The effect sizes are simulated from a normal distribution, | ||
# as the geometric series is the only effect size distribution | ||
# supported in simplePHENOTYPES. | ||
simulate_simplePHENOTYPE <- function( | ||
num_causal, random_seed | ||
) { | ||
effect_size <- list(rnorm(n=num_causal, mean=mean, sd=sd)) | ||
phenotypes <- suppressMessages(create_phenotypes( | ||
geno_file = paste0(directory_name, "/tree_seq.vcf"), | ||
add_QTN_num = num_causal, | ||
add_effect = effect_size, | ||
rep = 1, | ||
h2 = h2, | ||
model = "A", | ||
seed = random_seed, | ||
vary_QTN = FALSE, | ||
to_r = TRUE, | ||
sim_method = "custom", | ||
quiet = TRUE, | ||
home_dir = directory_name, | ||
verbose = FALSE, | ||
mean = 0 | ||
)) | ||
# The mean is centered at 0 from simplePHENOTYPES simulation | ||
# so we will divide it by the standard deviation to normalise | ||
# the data | ||
phenotypes[,2] <- phenotypes[,2] / sd(phenotypes[,2]) | ||
names(phenotypes)[1:2] <- c("individual_id", "phenotype") | ||
return(phenotypes) | ||
} | ||
|
||
phenotype_result <- c() | ||
|
||
for (i in 1:num_rep) { | ||
simulated_result <- simulate_simplePHENOTYPE( | ||
num_causal=num_causal, random_seed=random_seed+i | ||
) | ||
phenotype_result <- rbind(phenotype_result, simulated_result) | ||
} | ||
|
||
filename = paste0(directory_name, "/simplePHENOTYPES.csv") | ||
write.csv(phenotype_result, filename, row.names=FALSE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Simulation Codes | ||
|
||
This | ||
This directory is used to store the R simulation codes. | ||
|
||
R simulation codes: | ||
|
||
- simulate_AlphaSimR.R: This R code uses AlphaSimR to simulate quantitative traits based on a simulated tree sequence data. | ||
- simulate_simplePHENOTYPES_exact.R: This R code uses simplePHENOTYPES to simulate quantitative traits based on a VCF file. This is used for exact tests. | ||
- simulate_simplePHENOTYPES_qqplot.R: This R code uses simplePHENOTYPES to simulate quantitative traits. This is used in qqplot construction. |
Oops, something went wrong.