Skip to content

Commit

Permalink
Lefse DA are defined by both effect size and p-value (raw) thresholds
Browse files Browse the repository at this point in the history
  • Loading branch information
sdgamboa committed Sep 23, 2024
1 parent 2efb793 commit de319d0
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 24 deletions.
7 changes: 5 additions & 2 deletions R/new_DA_methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,12 @@ DA_lefse <- function(
statInfo <- statInfo |>
dplyr::mutate(abs_score = abs(.data$scores)) |>
dplyr::arrange(abs_score)
## These raw p-values and adjusted p-values are artificial.
## I'm only leaving them here for now to help in the ordering of the DA features
statInfo$rawP <- seq(0.04, 0, length.out = nrow(statInfo))
statInfo$adj_pval <- stats::p.adjust(statInfo$rawP, method = "fdr")
rownames(statInfo) <- statInfo[["Names"]]
statInfo$adjP <- seq(0.09, 0, length.out = nrow(statInfo))
# statInfo$adj_pval <- stats::p.adjust(statInfo$rawP, method = "fdr")
rownames(statInfo) <- statInfo[["features"]] ## check names
colnames(statInfo) <- c("Taxa", "LDA_scores", "abs_score", "rawP", "adjP")

pValMat <- statInfo[, c("rawP", "adjP")]
Expand Down
11 changes: 7 additions & 4 deletions R/set_DA_methods_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -190,22 +190,25 @@ set_DA_methods_list <- function(conditions_col, conditions) {
# wilcox.threshold = 1,
# lda.threshold = 0
# ),

## I really need p-values to help define DA abundant features
## So I'm using a p-value threshold only for the lefse method
lefse.12 = list(
method = 'DA_lefse',
conditions = conditions,
norm = 'CLR',
groupCol = conditions_col,
kruskal.threshold = 1,
wilcox.threshold = 1,
kruskal.threshold = 0.05,
wilcox.threshold = 0.05,
lda.threshold = 0
),
lefse.13 = list(
method = 'DA_lefse',
conditions = conditions,
norm = 'TSS',
groupCol = conditions_col,
kruskal.threshold = 1,
wilcox.threshold = 1,
kruskal.threshold = 0.05,
wilcox.threshold = 0.05,
lda.threshold = 0
)
)
Expand Down
Binary file modified vignettes/articles/Figure1.pdf
Binary file not shown.
91 changes: 73 additions & 18 deletions vignettes/articles/HMP_2012_16S_gingival_V35.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,40 @@ direction <- get_direction_cols(DA_output, conditions_col, conditions)

## Enrichment (adjP <= 0.1)

Lefse is the strictest method, defining DA with p-values and effect
size thresholds (based on linear discriminant analysis). CLR affects
the magnitud of the effect size, so let's define a new one.

```{r}
DA_output$lefse.TSS$statInfo$abs_score |> hist()
```
```{r}
DA_output$lefse.CLR$statInfo$abs_score |> hist()
```
```{r}
c(
lefse.TSS = median(DA_output$lefse.TSS$statInfo$abs_score),
lefse.CLR = median(DA_output$lefse.CLR$statInfo$abs_score)
)
```


Create variables of thresholds:

```{r}
adjThr<- rep(0.1, length(DA_output))
names(adjThr) <- names(DA_output)
esThr <- rep(0, length(DA_output))
names(esThr) <- names(DA_output)
esThr[grep("lefse.TSS", names(esThr))] <- 2
esThr[grep("lefse.CLR", names(esThr))] <- 0.06
slotV <- ifelse(grepl("lefse", names(DA_output)), "statInfo", "pValMat")
colNameV <- ifelse(grepl("lefse", names(DA_output)), "LDA_scores", "adjP")
typeV <- ifelse(grepl("lefse", names(DA_output)), "logfc", "pvalue")
```

Run enrichment:

```{r enrichment}
Expand All @@ -208,10 +242,11 @@ enrichment <- createEnrichment(
priorKnowledge = prior_info,
enrichmentCol = "taxon_annotation",
namesCol = "new_names",
slot = "pValMat", colName = "adjP", type = "pvalue",
# slot = "pValMat", colName = "adjP", type = "pvalue",
slot = slotV, colName = colNameV, type = typeV,
direction = direction,
threshold_pvalue = 0.1,
threshold_logfc = 0,
threshold_pvalue = adjThr,
threshold_logfc = esThr,
top = NULL, # No top feature selected
alternative = "greater",
verbose = FALSE
Expand Down Expand Up @@ -293,21 +328,41 @@ enPlot <- enrichmentSummary |>
## Calculate TP - FP ratio (no threshold)

```{r}
positives <- createPositives(
object = DA_output,
priorKnowledge = prior_info,
enrichmentCol = "taxon_annotation", namesCol = "new_names",
slot = "pValMat", colName = "rawP", type = "pvalue",
direction = direction,
threshold_pvalue = 1,
threshold_logfc = 0,
top = seq.int(from = 0, to = 50, by = 5),
alternative = "greater",
verbose = FALSE,
TP = list(c("DOWN Abundant", "anaerobic"), c("UP Abundant", "aerobic")),
FP = list(c("DOWN Abundant", "aerobic"), c("UP Abundant", "anaerobic"))
) |>
left_join(get_meth_class(), by = 'method')
positives <- map(1:length(DA_output), .f = function(i) {
positives <- createPositives(
object = DA_output[i],
priorKnowledge = prior_info,
enrichmentCol = "taxon_annotation", namesCol = "new_names",
slot = slotV[i], colName = colNameV[i], type = typeV[i],
direction = direction[i],
threshold_pvalue = 1,
threshold_logfc = 0,
top = seq.int(from = 0, to = 50, by = 5),
alternative = "greater",
verbose = FALSE,
TP = list(c("DOWN Abundant", "anaerobic"), c("UP Abundant", "aerobic")),
FP = list(c("DOWN Abundant", "aerobic"), c("UP Abundant", "anaerobic"))
) |>
left_join(get_meth_class(), by = 'method')
}) |> bind_rows()
# names(positives) <- names(DA_output)
# positives <- createPositives(
# object = DA_output,
# priorKnowledge = prior_info,
# enrichmentCol = "taxon_annotation", namesCol = "new_names",
# slot = slotV, colName = colNameV, type = typeV,
# direction = direction,
# threshold_pvalue = 1,
# threshold_logfc = 0,
# top = seq.int(from = 0, to = 50, by = 5),
# alternative = "greater",
# verbose = FALSE,
# TP = list(c("DOWN Abundant", "anaerobic"), c("UP Abundant", "aerobic")),
# FP = list(c("DOWN Abundant", "aerobic"), c("UP Abundant", "anaerobic"))
# ) |>
# left_join(get_meth_class(), by = 'method')
```

## Plot TP - FP
Expand Down

0 comments on commit de319d0

Please sign in to comment.