diff --git a/articletwo-rrbiome.R b/articletwo-rrbiome.R index 0110d5f..476132d 100644 --- a/articletwo-rrbiome.R +++ b/articletwo-rrbiome.R @@ -53,7 +53,7 @@ filter.phenotype.data <- function(pseq, PULSEPRESSURE = myscale(PULSEPRESSURE), MAP = myscale(MAP), SEX = as.factor(SEX), - Q57X = as.factor(Q57X), + Q57X = factor(Q57X, ordered = FALSE), BL_USE_RX_C03 = as.factor(BL_USE_RX_C03), BL_USE_RX_C07 = as.factor(BL_USE_RX_C07), BL_USE_RX_C08 = as.factor(BL_USE_RX_C08), @@ -379,3 +379,9 @@ deseq.list <- function(var.CL, var.CL.min) { }) l[!duplicated(l)] } + +coretaxa <- function(pseq, detection = 0.1/100, prevalence = 1/100) { + microbiome::transform(pseq, "compositional") %>% + core(detection = detection, prevalence = prevalence) %>% + taxa +} diff --git a/articletwo.Rmd b/articletwo.Rmd index 8d8c0e5..70c5b46 100644 --- a/articletwo.Rmd +++ b/articletwo.Rmd @@ -122,26 +122,20 @@ names.dset <- getdescriptions() eLoading phyloseq object ```{r data -pseq.genus <- import_filter_data("data/phfinrisk_genus_all_drop50k_2018-11-16.RDs") pseq.species <- import_filter_data("data/phfinrisk_species_all_drop50k_2018-12-21.RDs") +pseq.genus <- import_filter_data("data/phfinrisk_genus_all_drop50k_2018-11-16.RDs") +pseq.genus.coretaxa <- coretaxa(pseq.genus, detection = 0.1/100, prevalence = 1/100) ``` Species has average number of reads `r pseq.species %>% sample_sums %>% mean` and -genus `r pseq.genus %>% sample_sums %>% mean`. +genus `r pseq.genus %>% sample_sums %>% mean`. Core has length +`r length(pseq.genus.coretaxa)`. At species level meta has dimensions (`r dim(meta(pseq.species))`) and there ntaxa is `r ntaxa(pseq.species)`. At genus level meta has dimensions (`r dim(meta(pseq.genus))`) and there ntaxa is `r ntaxa(pseq.genus)`. -```{r defining genus core} -pseq.genus.core <- microbiome::transform(pseq.genus, "compositional") %>% - core(detection = 0.1/100, prevalence = 1/100) -``` - -At genus-core level meta has dimensions (`r dim(meta(pseq.genus.core))`) -and there ntaxa is `r ntaxa(pseq.genus.core)`. - ## Variables ```{r my variables} @@ -153,6 +147,8 @@ var.CL <- c("BL_AGE", "SEX", "BMI", "CURR_SMOKE", "Q57X", "PREVAL_DIAB", ## MODELS +### Bray curtis distance matrix + ```{r matrix calculation} if (!file.exists("rds/bray.dist.m.species.rds")) { bray.dist.m.species <- calculate.beta.matrix(pseq.species) @@ -162,6 +158,8 @@ if (!file.exists("rds/bray.dist.m.species.rds")) { } ``` +### Beta diversity + ```{r adonis calculation} if (!file.exists("rds/adonis.species.rds")) { adonis.species <- calculate.betadiversity(pseq = pseq.species, @@ -175,6 +173,8 @@ if (!file.exists("rds/bray.dist.m.species.rds")) { } ``` +### PCoA + ```{r pcoa calculate} if (!file.exists("rds/pcoa.ordinate.rds")) { pcoa.abundances <- microbiome::transform(pseq.species, 'compositional') @@ -185,9 +185,11 @@ if (!file.exists("rds/pcoa.ordinate.rds")) { } ``` +### DeSeq2 + ```{r deseq2} if (!file.exists("rds/dds.rds")) { - pseq.genus.core.deseq <- prune_taxa(taxa(pseq.genus.core), pseq.genus) + pseq.genus.core.deseq <- prune_taxa(pseq.genus.coretaxa, pseq.genus) dds <- lapply(c2l(var.BP), function(x) { dds.data <- phyloseq_to_deseq2(pseq.genus.core.deseq, deseq.formula(x, var.CL)) @@ -195,7 +197,8 @@ if (!file.exists("rds/dds.rds")) { test="Wald", fitType="parametric", parallel = TRUE, - BPPARAM=MulticoreParam(16))}) + BPPARAM=MulticoreParam(16)) + }) saveRDS(dds, file = "rds/dds.rds") } else { @@ -205,7 +208,7 @@ if (!file.exists("rds/dds.rds")) { ```{r comparing deseq2} if (!file.exists("rds/dds3.rds")) { - pseq.genus.core.deseq <- prune_taxa(taxa(pseq.genus.core), pseq.genus) + pseq.genus.core.deseq <- prune_taxa(pseq.genus.coretaxa, pseq.genus) dds3 <- lapply(deseq.list(var.CL, var.CL.min), function(x, dds) { dds.data <- phyloseq_to_deseq2(dds, deseq.formula(x, "HYPERTENSION")) DESeq(dds.data,