Skip to content

Commit

Permalink
added linearization option
Browse files Browse the repository at this point in the history
  • Loading branch information
trvinh committed Jun 11, 2024
1 parent fc1ac88 commit 7276ee4
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 22 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: PhyloProfile
Version: 1.19.2
Date: 2024-06-07
Date: 2024-06-11
Title: PhyloProfile
Authors@R: c(
person("Vinh", "Tran", role = c("aut", "cre"), email = "[email protected]", comment=c(ORCID="0000-0001-6772-7595")),
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ export(gridArrangeSharedLegend)
export(heatmapPlotting)
export(highlightProfilePlot)
export(id2name)
export(linearizeArchitecture)
export(mainTaxonomyRank)
export(parseDomainInput)
export(parseInfoProfile)
Expand Down Expand Up @@ -96,6 +97,7 @@ importFrom(data.table,setDT)
importFrom(data.table,setnames)
importFrom(data.table,transpose)
importFrom(dplyr,add_count)
importFrom(dplyr,arrange)
importFrom(dplyr,count)
importFrom(dplyr,group_by)
importFrom(dplyr,left_join)
Expand Down
70 changes: 70 additions & 0 deletions R/createDomainPlot.R
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,76 @@ resolveOverlapFeatures <- function(domainDf) {
return(domainDf)
}

#' Linearize PFAM/SMART annotations by best e-value/bitscore
#' @export
#' @param domainDf input domain dataframe
#' @param orthoID ID of protein that needs to be linearized
#' @param value type of values that will be used for linearized, either evalue
#' (default) or bitscore
#' @return Domain dataframe of the selected protein after linearization
#' @author Vinh Tran [email protected]
#' @importFrom dplyr arrange
#' @examples
#' demoDomainDf <- data.frame(
#' orthoID = rep("protID", 4),
#' start = c(1, 5, 100, 80),
#' end = c(30, 40, 130, 110),
#' evalue = c(0.001, 0.0005, 0.2, 0.004),
#' feature_type = c(rep("pfam", 2), rep("smart", 2)),
#' feature_id = c("pf1", "pf2", "sm1", "sm2")
#' )
#' linearizeArchitecture(demoDomainDf, "protID", "evalue")

linearizeArchitecture <- function(
domainDf = NULL, orthoID = NULL, value = "evalue"
) {
if (is.null(domainDf) | is.null(orthoID)) stop("Input data is NULL!")
evalue <- bitscore <- start <- end <- NULL
# Sort the dataframe by start position and then by evalue or bitscore
if (value == "evalue") {
domainDf <- domainDf %>% dplyr::arrange(start, evalue)
} else if (value == "bitscore") {
domainDf <- domainDf %>% dplyr::arrange(start, bitscore)
} else stop("Incorrect value specified! Either 'evalue' or 'bitscore'")

# Get lines that need to be excluded
pfamRows <- rownames(domainDf[domainDf$orthoID == orthoID &
domainDf$feature_type %in% c("pfam","smart"),])
exclude_lines <- vapply(
seq_len(length(pfamRows)-1),
function(i) {
if (domainDf[pfamRows[i],]$end >= domainDf[pfamRows[i+1],]$start) {
# Exclude the row with the higher evalue / lower bitscore
if (value == "evalue") {
if (
domainDf[pfamRows[i],]$evalue >
domainDf[pfamRows[i+1],]$evalue
) {
return((pfamRows[i]))
} else {
return((pfamRows[i+1]))
}
} else {
if (
domainDf[pfamRows[i],]$bitscore <
domainDf[pfamRows[i+1],]$bitscore
) {
return((pfamRows[i]))
} else {
return((pfamRows[i+1]))
}
}

} else {
return("0")
}
},
character(1)
)
# return domainDf after removing overlapped features with higher e-values
outDf <- domainDf[!(row.names(domainDf) %in% exclude_lines), ]
return(outDf[outDf$orthoID == orthoID,])
}

#' Add colors for each feature/domain
#' @description Add colors to features/domains of 2 domain dataframes. Users can
Expand Down
45 changes: 24 additions & 21 deletions inst/PhyloProfile/R/createArchitecturePlot.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,11 @@ createArchitecturePlotUI <- function(id) {
),
column(
3,
checkboxGroupInput(
ns("showInstance"),
"Show only instances with",
selectInput(
ns("linearizationBy"),
"Linearizing architecture using",
choices = c(
"None" = "none",
"Best E-value" = "evalue",
"Best Bit-score" = "bitscore",
"Paths" = "path"
Expand Down Expand Up @@ -261,10 +262,7 @@ createArchitecturePlot <- function(
updateSelectInput(
session, "excludeNames",
"Exclude feature names of",
choices = c(
"flps","seg","coils","signalp","tmhmm",
"smart","pfam"
)
choices = c("seg","coils","signalp","tmhmm","smart","pfam")
)
} else if (
"axis" %in% input$namePostion | "legend" %in% input$namePostion
Expand All @@ -273,10 +271,9 @@ createArchitecturePlot <- function(
session, "excludeNames",
"Exclude feature names of",
choices = c(
"flps","seg","coils","signalp","tmhmm",
"smart","pfam"
"flps","seg","coils","signalp","tmhmm","smart","pfam"
),
selected = c("tmhmm","signalp","flps","seg","coils")
selected = c("tmhmm","signalp","seg","coils")
)
}
})
Expand Down Expand Up @@ -385,19 +382,25 @@ createArchitecturePlot <- function(
outDf <- rbind(outDf,naOutDf)
}
# get only best instances
if ("evalue" %in% input$showInstance) {
naOutDf <- outDf[is.na(outDf$evalue),]
outDf <- outDf %>% dplyr::group_by(feature, orthoID) %>%
dplyr::filter(evalue == min(evalue))
outDf <- rbind(outDf,naOutDf)
if ("evalue" %in% input$linearizationBy) {
linearizedDfs <- lapply(
levels(as.factor(outDf$orthoID)),
function(orthoID) {
return(linearizeArchitecture(outDf, orthoID, "evalue"))
}
)
outDf <- do.call(rbind, linearizedDfs)
}
if ("bitscore" %in% input$showInstance) {
naOutDf <- outDf[is.na(outDf$bitscore),]
outDf <- outDf %>% dplyr::group_by(feature, orthoID) %>%
dplyr::filter(bitscore == max(bitscore))
outDf <- rbind(outDf,naOutDf)
if ("bitscore" %in% input$linearizationBy) {
linearizedDfs <- lapply(
levels(as.factor(outDf$orthoID)),
function(orthoID) {
return(linearizeArchitecture(outDf, orthoID,"bitscore"))
}
)
outDf <- do.call(rbind, linearizedDfs)
}
if ("path" %in% input$showInstance) {
if ("path" %in% input$linearizationBy) {
outDf <- outDf %>% dplyr::group_by(feature) %>%
dplyr::filter(path == "Y")
}
Expand Down
36 changes: 36 additions & 0 deletions man/linearizeArchitecture.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 7276ee4

Please sign in to comment.