diff --git a/R/new_DA_methods.R b/R/new_DA_methods.R index 1771f47..f7ea010 100644 --- a/R/new_DA_methods.R +++ b/R/new_DA_methods.R @@ -257,9 +257,12 @@ DA_lefse <- function( statInfo <- statInfo |> dplyr::mutate(abs_score = abs(.data$scores)) |> dplyr::arrange(abs_score) + ## These raw p-values and adjusted p-values are artificial. + ## I'm only leaving them here for now to help in the ordering of the DA features statInfo$rawP <- seq(0.04, 0, length.out = nrow(statInfo)) - statInfo$adj_pval <- stats::p.adjust(statInfo$rawP, method = "fdr") - rownames(statInfo) <- statInfo[["Names"]] + statInfo$adjP <- seq(0.09, 0, length.out = nrow(statInfo)) + # statInfo$adj_pval <- stats::p.adjust(statInfo$rawP, method = "fdr") + rownames(statInfo) <- statInfo[["features"]] ## check names colnames(statInfo) <- c("Taxa", "LDA_scores", "abs_score", "rawP", "adjP") pValMat <- statInfo[, c("rawP", "adjP")] diff --git a/R/set_DA_methods_list.R b/R/set_DA_methods_list.R index 75b7096..d4c6683 100644 --- a/R/set_DA_methods_list.R +++ b/R/set_DA_methods_list.R @@ -190,13 +190,16 @@ set_DA_methods_list <- function(conditions_col, conditions) { # wilcox.threshold = 1, # lda.threshold = 0 # ), + + ## I really need p-values to help define DA abundant features + ## So I'm using a p-value threshold only for the lefse method lefse.12 = list( method = 'DA_lefse', conditions = conditions, norm = 'CLR', groupCol = conditions_col, - kruskal.threshold = 1, - wilcox.threshold = 1, + kruskal.threshold = 0.05, + wilcox.threshold = 0.05, lda.threshold = 0 ), lefse.13 = list( @@ -204,8 +207,8 @@ set_DA_methods_list <- function(conditions_col, conditions) { conditions = conditions, norm = 'TSS', groupCol = conditions_col, - kruskal.threshold = 1, - wilcox.threshold = 1, + kruskal.threshold = 0.05, + wilcox.threshold = 0.05, lda.threshold = 0 ) ) diff --git a/vignettes/articles/Figure1.pdf b/vignettes/articles/Figure1.pdf index 404413c..4d943ff 100644 Binary files a/vignettes/articles/Figure1.pdf and b/vignettes/articles/Figure1.pdf differ diff --git a/vignettes/articles/HMP_2012_16S_gingival_V35.Rmd b/vignettes/articles/HMP_2012_16S_gingival_V35.Rmd index 0ba40d7..fd80987 100644 --- a/vignettes/articles/HMP_2012_16S_gingival_V35.Rmd +++ b/vignettes/articles/HMP_2012_16S_gingival_V35.Rmd @@ -200,6 +200,40 @@ direction <- get_direction_cols(DA_output, conditions_col, conditions) ## Enrichment (adjP <= 0.1) +Lefse is the strictest method, defining DA with p-values and effect +size thresholds (based on linear discriminant analysis). CLR affects +the magnitud of the effect size, so let's define a new one. + +```{r} +DA_output$lefse.TSS$statInfo$abs_score |> hist() +``` +```{r} +DA_output$lefse.CLR$statInfo$abs_score |> hist() +``` +```{r} +c( + lefse.TSS = median(DA_output$lefse.TSS$statInfo$abs_score), + lefse.CLR = median(DA_output$lefse.CLR$statInfo$abs_score) +) +``` + + +Create variables of thresholds: + +```{r} +adjThr<- rep(0.1, length(DA_output)) +names(adjThr) <- names(DA_output) + +esThr <- rep(0, length(DA_output)) +names(esThr) <- names(DA_output) +esThr[grep("lefse.TSS", names(esThr))] <- 2 +esThr[grep("lefse.CLR", names(esThr))] <- 0.06 + +slotV <- ifelse(grepl("lefse", names(DA_output)), "statInfo", "pValMat") +colNameV <- ifelse(grepl("lefse", names(DA_output)), "LDA_scores", "adjP") +typeV <- ifelse(grepl("lefse", names(DA_output)), "logfc", "pvalue") +``` + Run enrichment: ```{r enrichment} @@ -208,10 +242,11 @@ enrichment <- createEnrichment( priorKnowledge = prior_info, enrichmentCol = "taxon_annotation", namesCol = "new_names", - slot = "pValMat", colName = "adjP", type = "pvalue", + # slot = "pValMat", colName = "adjP", type = "pvalue", + slot = slotV, colName = colNameV, type = typeV, direction = direction, - threshold_pvalue = 0.1, - threshold_logfc = 0, + threshold_pvalue = adjThr, + threshold_logfc = esThr, top = NULL, # No top feature selected alternative = "greater", verbose = FALSE @@ -293,21 +328,41 @@ enPlot <- enrichmentSummary |> ## Calculate TP - FP ratio (no threshold) ```{r} -positives <- createPositives( - object = DA_output, - priorKnowledge = prior_info, - enrichmentCol = "taxon_annotation", namesCol = "new_names", - slot = "pValMat", colName = "rawP", type = "pvalue", - direction = direction, - threshold_pvalue = 1, - threshold_logfc = 0, - top = seq.int(from = 0, to = 50, by = 5), - alternative = "greater", - verbose = FALSE, - TP = list(c("DOWN Abundant", "anaerobic"), c("UP Abundant", "aerobic")), - FP = list(c("DOWN Abundant", "aerobic"), c("UP Abundant", "anaerobic")) -) |> - left_join(get_meth_class(), by = 'method') +positives <- map(1:length(DA_output), .f = function(i) { + positives <- createPositives( + object = DA_output[i], + priorKnowledge = prior_info, + enrichmentCol = "taxon_annotation", namesCol = "new_names", + slot = slotV[i], colName = colNameV[i], type = typeV[i], + direction = direction[i], + threshold_pvalue = 1, + threshold_logfc = 0, + top = seq.int(from = 0, to = 50, by = 5), + alternative = "greater", + verbose = FALSE, + TP = list(c("DOWN Abundant", "anaerobic"), c("UP Abundant", "aerobic")), + FP = list(c("DOWN Abundant", "aerobic"), c("UP Abundant", "anaerobic")) + ) |> + left_join(get_meth_class(), by = 'method') +}) |> bind_rows() +# names(positives) <- names(DA_output) + + +# positives <- createPositives( +# object = DA_output, +# priorKnowledge = prior_info, +# enrichmentCol = "taxon_annotation", namesCol = "new_names", +# slot = slotV, colName = colNameV, type = typeV, +# direction = direction, +# threshold_pvalue = 1, +# threshold_logfc = 0, +# top = seq.int(from = 0, to = 50, by = 5), +# alternative = "greater", +# verbose = FALSE, +# TP = list(c("DOWN Abundant", "anaerobic"), c("UP Abundant", "aerobic")), +# FP = list(c("DOWN Abundant", "aerobic"), c("UP Abundant", "anaerobic")) +# ) |> +# left_join(get_meth_class(), by = 'method') ``` ## Plot TP - FP