Skip to content

Commit

Permalink
Merge pull request #134 from NIDAP-Community/fix_sankey
Browse files Browse the repository at this point in the history
Fix sankey
  • Loading branch information
escauley authored Oct 23, 2023
2 parents b1719d0 + d69b216 commit b96dba8
Show file tree
Hide file tree
Showing 13 changed files with 195 additions and 103 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ Imports:
cowplot (>= 1.1.1),
dplyr (>= 1.0.9),
GeomxTools (>= 3.1.1),
ggforce (== 0.3.4),
ggplot2 (== 3.3.6),
ggforce (>= 0.3.4),
ggplot2 (>= 3.3.6),
gridExtra (>= 2.3),
grid (>= 4.1.3),
gtable (>= 0.3.0),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ importFrom(dplyr,count)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,pull)
importFrom(dplyr,rename)
importFrom(dplyr,select)
importFrom(ggforce,gather_set_data)
importFrom(ggforce,geom_parallel_sets)
Expand Down
57 changes: 39 additions & 18 deletions R/filtering.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,19 @@
#' @details This function will run various filtering parameters for NanoStringGeoMxSet datasets
#'
#' @param object A NanoStringGeoMxSet dataset
#' @param loq.cutoff The number of standard deviations above the negative probe
#' geometric mean to use as a cutoff for the limit of quantification
#' @param loq.min The minimum value for the limit of quantification
#' @param segment.gene.rate.cutoff A decimal for the minimum cutoff for the
#' genes detected in a given segment over the total number of genes in the
#' probe set
#' @param study.gene.rate.cutoff = A decimal for the minimum cutoff for the
#' average amount a given gene is detected in all segments
#' @param sankey.exclude.slide A toggle for including the slide name in the
#' Sankey Plot
#' @param goi A list of genes of interest to evaluate for their study-wide
#' detection rate
#'
#' @importFrom scales percent
#' @importFrom Biobase pData
#' @importFrom Biobase fData
Expand All @@ -18,12 +31,6 @@
#' @export
#' @return A list containing the ....

# To call function, must have data = raw object, dsp.obj = QC demoData,
# loq.cutoff 2 is recommended,
# loq.min 2 is recommend,
# segment.gene.rate.cutoff = remove segments with less than x% of the gene set detected; .05-.1 recommended,
# study.gene.rate.cutoff = remove genes detected in less than x% of segments; .05-.2 recommended,
# goi = goi (genes of interest). Must be a vector of genes (i.e c("PDCD1", "CD274")),
filtering <- function(object,
loq.cutoff = 2,
loq.min = 2,
Expand Down Expand Up @@ -135,32 +142,46 @@ filtering <- function(object,
stop(paste0("Error: You have the wrong data class, must be NanoStringGeoMxSet" ))
}

# Gather the data and plot in order: class, slide name, region, segment
# gather_set_data creates x, id, y, and n fields within sankey.count.data
# Establish the levels of the Sankey with or without the slide name
if(sankey.exclude.slide == TRUE){
# Create a dataframe used to make the Sankey plot
sankey.count.data <- gather_set_data(count.mat, 1:3)
sankey.count.data$x <-
factor(
sankey.count.data$x,
levels = c("class", "region", "segment")
)

# Define the annotations to use for the Sankey x axis labels
sankey.count.data$x[sankey.count.data$x == 1] <- "class"
sankey.count.data$x[sankey.count.data$x == 2] <- "region"
sankey.count.data$x[sankey.count.data$x == 3] <- "segment"

factor(
sankey.count.data$x,
levels = c("class", "region", "segment")
)

# For position of Sankey 100 segment scale
adjust.scale.pos = 1
} else {
# Create a dataframe used to make the Sankey plot
sankey.count.data <- gather_set_data(count.mat, 1:4)
sankey.count.data$x <-
factor(
sankey.count.data$x,
levels = c("class", "slide_name", "region", "segment")
)

# Define the annotations to use for the Sankey x axis labels
sankey.count.data$x[sankey.count.data$x == 1] <- "slide_name"
sankey.count.data$x[sankey.count.data$x == 2] <- "class"
sankey.count.data$x[sankey.count.data$x == 3] <- "region"
sankey.count.data$x[sankey.count.data$x == 4] <- "segment"

factor(
sankey.count.data$x,
levels = c("class", "slide_name", "region", "segment")
)

# For position of Sankey 100 segment scale
adjust.scale.pos = 0
}

# plot Sankey
sankey.plot <- ggplot(sankey.count.data, aes(x, id = id, split = y, value = n)) +
geom_parallel_sets(aes(fill = region), alpha = 0.5, axis.width = 0.1) +
geom_parallel_sets(aes(fill = class), alpha = 0.5, axis.width = 0.1) +
geom_parallel_sets_axes(axis.width = 0.2) +
geom_parallel_sets_labels(color = "gray", size = 5, angle = 0) +
theme_classic(base_size = 17) +
Expand Down
24 changes: 6 additions & 18 deletions R/spatial_deconvolution.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#' @title Spatial Deconvolution
#'
#' Helper functions comes from
#' https://bioconductor.org/packages/release/bioc/vignettes/SpatialDecon/inst/doc/SpatialDecon_vignette_NSCLC.html
#'
#'
#' @title Spatial Deconvolution
#'
#' @description spatialDeconvolution estimate cell composition across DSP
#' samples from reference expression matrix
#'
Expand Down Expand Up @@ -41,18 +41,6 @@
#' @importFrom ComplexHeatmap pheatmap
#'
#' @export
#' @example Do not run: spatialDeconvolution(object = NanostringGeomx,
#' expr.type = "q_norm",
#' prof.mtx = profile_matrix,
#' clust.rows = TRUE,
#' clust.cols = TRUE,
#' group.by = "none",
#' plot.fontsize = 5,
#' use.custom.prof.mtx = FALSE,
#' ref.mtx = reference_matrix,
#' ref.annot = reference_annotation,
#' cell.id.col = "CellID",
#' celltype.col = "LabeledCellType")
#'
#' @return A list dsp.data containing the results of spatial deconvolution,
#' res$beta: matrix of estimated cell abundances
Expand All @@ -66,7 +54,7 @@


spatialDeconvolution <- function(object,
expr.type,
expr.type = "q_norm",
prof.mtx,
clust.rows = TRUE,
clust.cols = TRUE,
Expand All @@ -79,8 +67,8 @@ spatialDeconvolution <- function(object,
min.genes = 10,
ref.mtx,
ref.annot,
cell.id.col,
celltype.col) {
cell.id.col = "CellID",
celltype.col = "LabeledCellType") {

# Check for Parameter Misspecification Error(s)
if (!expr.type %in% names(object@assayData)) {
Expand Down
53 changes: 34 additions & 19 deletions R/study_design.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,14 @@
#' phenoDataFile containing data about the experiment's meta-data.
#' @param slide.name.col The name of the field that contains the slide names
#' @param class.col The name of the field that contains the class annotation
#' @param region.col The name of the field that contains the class annotation
#' @param segment.col The name of the field that contains the class annotation
#'
#'
#' @param region.col The name of the field that contains the region annotation
#' @param segment.col The name of the field that contains the segment name
#' @param area.col The name of the field that contains the segment area
#' @param nuclei.col The name of the field that contains the nuclei number
#' @param sankey.exclude.slide A toggle for including the slide name in the
#' Sankey plot
#' @param segment.id.length The number of characters to use from each of the
#' annotation fields class, region, and segment to create the segment ID
#'
#' @importFrom GeomxTools readNanoStringGeoMxSet
#' @importFrom knitr kable
Expand All @@ -52,8 +56,6 @@
#' @export
#' @return A list containing the NanoString Object and the Sankey plot.



studyDesign <- function(dcc.files,
pkc.files,
pheno.data.file,
Expand Down Expand Up @@ -177,26 +179,39 @@ studyDesign <- function(dcc.files,
rownames(count.mat) <- 1:nrow(count.mat)
}


# Gather the data and plot in order: class, slide name, region, segment
# gather_set_data creates x, id, y, and n fields within sankey.count.data
# Establish the levels of the Sankey with or without the slide name
if(sankey.exclude.slide == TRUE){
# Create a dataframe used to make the Sankey plot
sankey.count.data <- gather_set_data(count.mat, 1:3)
sankey.count.data$x <-
factor(
sankey.count.data$x,
levels = c("class", "region", "segment")
)

# Define the annotations to use for the Sankey x axis labels
sankey.count.data$x[sankey.count.data$x == 1] <- "class"
sankey.count.data$x[sankey.count.data$x == 2] <- "region"
sankey.count.data$x[sankey.count.data$x == 3] <- "segment"

factor(
sankey.count.data$x,
levels = c("class", "region", "segment")
)

# For position of Sankey 100 segment scale
adjust.scale.pos = 1
} else {
# Create a dataframe used to make the Sankey plot
sankey.count.data <- gather_set_data(count.mat, 1:4)
sankey.count.data$x <-
factor(
sankey.count.data$x,
levels = c("class", "slide_name", "region", "segment")
)

# Define the annotations to use for the Sankey x axis labels
sankey.count.data$x[sankey.count.data$x == 1] <- "slide_name"
sankey.count.data$x[sankey.count.data$x == 2] <- "class"
sankey.count.data$x[sankey.count.data$x == 3] <- "region"
sankey.count.data$x[sankey.count.data$x == 4] <- "segment"

factor(
sankey.count.data$x,
levels = c("class", "slide_name", "region", "segment")
)

# For position of Sankey 100 segment scale
adjust.scale.pos = 0
}
Expand All @@ -210,7 +225,7 @@ studyDesign <- function(dcc.files,
split = y,
value = n
)) +
geom_parallel_sets(aes(fill = region), alpha = 0.5, axis.width = 0.1) +
geom_parallel_sets(aes(fill = class), alpha = 0.5, axis.width = 0.1) +
geom_parallel_sets_axes(axis.width = 0.2) +
geom_parallel_sets_labels(color = "gray",
size = 5,
Expand Down
11 changes: 3 additions & 8 deletions R/violin_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,13 @@
#' @importFrom gridExtra arrangeGrob
#'
#' @export
#' @example Do not run: violinPlot(object = NanostringGeomx,
#' expr.type = "q_norm",
#' genes = c("FOXP3","CD4"),
#' group = "CellType",
#' facet.by = "segment")
#'
#' @return an arranged grob of violin plots

violinPlot <- function(object,
expr.type,
genes,
group,
expr.type = "q_norm",
genes = c("FOXP3","CD4"),
group = "CellType",
facet.by = "none") {

# Check for Parameter Misspecification Error(s)
Expand Down
8 changes: 4 additions & 4 deletions man/diffExpr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 27 additions & 1 deletion man/filtering.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/geomxNorm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit b96dba8

Please sign in to comment.