From 3fe7e848b3477dd509eba4ca07f1428ec4035b00 Mon Sep 17 00:00:00 2001 From: Chun-Hui Gao Date: Wed, 7 Feb 2024 10:12:08 +0800 Subject: [PATCH] upset plot non-specific items --- DESCRIPTION | 5 +++-- NEWS.md | 4 ++++ R/process_data.R | 18 +++++++++++++----- R/regions.R | 5 +++-- R/upset_plot.R | 29 +++++++++++++++++++++-------- man/process_upset_data.Rd | 12 ++++++++++-- man/upset-plot.Rd | 3 +++ man/venn_data.Rd | 12 +++++++++++- 8 files changed, 68 insertions(+), 20 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 675d565..926209c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ggVennDiagram Type: Package Title: A 'ggplot2' Implement of Venn Diagram -Version: 1.5.1 +Version: 1.5.2 Authors@R: c( person("Chun-Hui","Gao", email="gaospecial@gmail.com", role=c("aut","cre"), comment=c(ORCID = "0000-0002-1445-7939")), person("Guangchuang", "Yu", email = "guangchuangyu@gmail.com", role = c("ctb"), comment = c(ORCID = "0000-0002-6485-8781")), @@ -41,6 +41,7 @@ Suggests: plotly, RColorBrewer, shiny, - rmarkdown + rmarkdown, + tidyr VignetteBuilder: knitr LazyData: true diff --git a/NEWS.md b/NEWS.md index 29789dc..e70e690 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # Changelog of ggVennDiagram +## ggVennDiagram 1.5.2 + +* Add a switch to show non-specific items in `plot_upset`. See #64. + ## ggVennDiagram 1.5.1 * Add more param to `plot_upset()` function diff --git a/R/process_data.R b/R/process_data.R index b792e2d..00103df 100644 --- a/R/process_data.R +++ b/R/process_data.R @@ -104,7 +104,7 @@ venn_region = function(obj){ #' @export #' plotData_add_venn = function(plotData, venn){ - if (!all(c("setLabel","setEdge","regionLabel", "regionEdge") %in% names(plotData))){ + if (!all(c("setLabel","setEdge","regionLabel", "regionEdge") %in% names(plotData))) { stop("Invalid shape data.") } if (!inherits(venn, "Venn")) stop("venn should be a S4 Venn object.") @@ -142,7 +142,7 @@ plotData_add_venn = function(plotData, venn){ #' process_set_data(venn) #' process_region_data(venn) process_set_data = function(venn){ - if(!inherits(venn, "Venn")) stop("venn is not a S4 class 'Venn' object.") + if (!inherits(venn, "Venn")) stop("venn is not a S4 class 'Venn' object.") tibble::tibble( id = as.character(seq_along(venn@sets)), name = venn@names, @@ -152,10 +152,18 @@ process_set_data = function(venn){ } #' @rdname venn_data +#' @param specific whether return ONLY specific items for a subset, default is TRUE +#' @details +#' ggVennDiagram, by default, only return the specific subsets of a region. +#' However, sometimes, we want to show all the overlapping items for two or more sets. +#' For example: https://github.com/gaospecial/ggVennDiagram/issues/64 +#' Therefore, we add a 'specific' switch to this function. While 'specific = FALSE', +#' the seperator will be changed from "/" to "~", and all the overlapping items +#' will be returned. This feature is useful in plotting upset plot. #' @export -process_region_data = function(venn, sep = "/"){ - if(!inherits(venn, "Venn")) stop("venn is not a S4 class 'Venn' object.") - region_items = get_subset_items(venn) +process_region_data = function(venn, sep = "/", specific = TRUE) { + if (!inherits(venn, "Venn")) stop("venn is not a S4 class 'Venn' object.") + region_items = get_subset_items(venn, specific = specific) counts = sapply(region_items, length) region_ids = get_subset_ids(venn, sep = sep) region_names = get_subset_names(venn, sep = sep) diff --git a/R/regions.R b/R/regions.R index 2352917..e172e74 100644 --- a/R/regions.R +++ b/R/regions.R @@ -17,10 +17,11 @@ matrix2list <- function(matrix){ lapply(seq_len(ncol(matrix)), function(i) matrix[,i]) } -get_subset_items <- function(venn){ +get_subset_items <- function(venn, specific = TRUE){ n = length(venn@sets) c = combinations(n) - lapply(c, function(i) discern_overlap(venn,i)) + fun = ifelse(specific, "discern_overlap", "overlap") + lapply(c, fun, venn = venn) } get_subset_names <- function(venn, sep = "/"){ diff --git a/R/upset_plot.R b/R/upset_plot.R index 214aa71..2df84e0 100644 --- a/R/upset_plot.R +++ b/R/upset_plot.R @@ -34,6 +34,7 @@ #' @param sets.bar.show.numbers default is FALSE #' @param sets.bar.x.label default is "Set Size" #' @param intersection.matrix.color default is "grey30" +#' @param specific whether only include specific items in subsets, default is TRUE. #' @param ... useless #' @return an upset plot #' @@ -62,6 +63,7 @@ plot_upset = function(venn, sets.bar.show.numbers = FALSE, sets.bar.x.label = "Set Size", intersection.matrix.color = "grey30", + specific = TRUE, ...){ # process arguments order.intersect.by = match.arg(order.intersect.by) @@ -71,7 +73,8 @@ plot_upset = function(venn, data = process_upset_data(venn, nintersects = nintersects, order.intersect.by = order.intersect.by, - order.set.by = order.set.by) + order.set.by = order.set.by, + specific = specific) p_main = upsetplot_main(data$main_data, intersection.matrix.color = intersection.matrix.color) @@ -183,21 +186,31 @@ theme_upset_left = function(){ #' process upset data #' #' @inheritParams upset-plot -#' @param name_separator will be used to assign subset names +#' @param specific whether return ONLY specific items for a subset, default is TRUE +#' @details +#' ggVennDiagram, by default, only return the specific subsets of a region. +#' However, sometimes, we want to show all the overlapping items for two or more sets. +#' For example: https://github.com/gaospecial/ggVennDiagram/issues/64 +#' Therefore, we add a 'specific' switch to this function. While 'specific = FALSE', +#' the seperator will be changed from "/" to "~", and all the overlapping items +#' will be returned. This feature is useful in plotting upset plot. #' #' @return a upsetPlotData object process_upset_data = function(venn, nintersects = 30, order.intersect.by = "size", order.set.by = "name", - name_separator = "/"){ - data = process_region_data(venn, sep = name_separator) - data$size = data$count + specific = TRUE){ set_name = venn@names + name_separator = ifelse(specific, "/", "~") + + # region data + data = process_region_data(venn, sep = name_separator, specific = specific) + data$size = data$count # top data top_data = data |> dplyr::select(c('id', 'name', 'item', 'size')) - if (order.intersect.by %in% colnames(top_data)){ + if (order.intersect.by %in% colnames(top_data)) { top_data = dplyr::mutate(top_data, id = forcats::fct_reorder(.data$id, .data[[order.intersect.by]], .desc = TRUE)) } else { @@ -208,7 +221,7 @@ process_upset_data = function(venn, left_data = dplyr::tibble(set = set_name, name = set_name, size = lengths(venn@sets)) - if (order.set.by %in% colnames(left_data)){ + if (order.set.by %in% colnames(left_data)) { left_data = dplyr::mutate(left_data, set = forcats::fct_reorder(.data$set, .data[[order.set.by]], .desc = TRUE)) } else { @@ -224,7 +237,7 @@ process_upset_data = function(venn, main_data$id = factor(main_data$id, levels = levels(top_data$id)) # filter intersections - if (is.numeric(nintersects)){ + if (is.numeric(nintersects)) { keep_id = utils::head(levels(top_data$id), nintersects) main_data = main_data |> dplyr::filter(.data$id %in% keep_id) top_data = top_data |> dplyr::filter(.data$id %in% keep_id) diff --git a/man/process_upset_data.Rd b/man/process_upset_data.Rd index b3b7c2f..06abb10 100644 --- a/man/process_upset_data.Rd +++ b/man/process_upset_data.Rd @@ -9,7 +9,7 @@ process_upset_data( nintersects = 30, order.intersect.by = "size", order.set.by = "name", - name_separator = "/" + specific = TRUE ) } \arguments{ @@ -21,7 +21,7 @@ process_upset_data( \item{order.set.by}{'size', 'name', or "none"} -\item{name_separator}{will be used to assign subset names} +\item{specific}{whether return ONLY specific items for a subset, default is TRUE} } \value{ a upsetPlotData object @@ -29,3 +29,11 @@ a upsetPlotData object \description{ process upset data } +\details{ +ggVennDiagram, by default, only return the specific subsets of a region. + However, sometimes, we want to show all the overlapping items for two or more sets. + For example: https://github.com/gaospecial/ggVennDiagram/issues/64 + Therefore, we add a 'specific' switch to this function. While 'specific = FALSE', + the seperator will be changed from "/" to "~", and all the overlapping items + will be returned. This feature is useful in plotting upset plot. +} diff --git a/man/upset-plot.Rd b/man/upset-plot.Rd index a05a2c4..03308a5 100644 --- a/man/upset-plot.Rd +++ b/man/upset-plot.Rd @@ -20,6 +20,7 @@ plot_upset( sets.bar.show.numbers = FALSE, sets.bar.x.label = "Set Size", intersection.matrix.color = "grey30", + specific = TRUE, ... ) } @@ -52,6 +53,8 @@ plot_upset( \item{intersection.matrix.color}{default is "grey30"} +\item{specific}{whether only include specific items in subsets, default is TRUE.} + \item{...}{useless} } \value{ diff --git a/man/venn_data.Rd b/man/venn_data.Rd index 4752649..bde5d2f 100644 --- a/man/venn_data.Rd +++ b/man/venn_data.Rd @@ -8,12 +8,14 @@ \usage{ process_set_data(venn) -process_region_data(venn, sep = "/") +process_region_data(venn, sep = "/", specific = TRUE) } \arguments{ \item{venn}{a Venn object} \item{sep}{name and id separator for intersections} + +\item{specific}{whether return ONLY specific items for a subset, default is TRUE} } \value{ a tibble @@ -21,6 +23,14 @@ a tibble \description{ Prepare Venn data } +\details{ +ggVennDiagram, by default, only return the specific subsets of a region. + However, sometimes, we want to show all the overlapping items for two or more sets. + For example: https://github.com/gaospecial/ggVennDiagram/issues/64 + Therefore, we add a 'specific' switch to this function. While 'specific = FALSE', + the seperator will be changed from "/" to "~", and all the overlapping items + will be returned. This feature is useful in plotting upset plot. +} \examples{ x = list( A = sample(letters, 8),