Skip to content

Commit

Permalink
Added the BunisHSPCData() getter function.
Browse files Browse the repository at this point in the history
  • Loading branch information
dtm2451 authored Apr 29, 2021
1 parent 905ac11 commit 75862a1
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 1 deletion.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ Authors@R: c(
person("Alan", "O'Callaghan", role="ctb"),
person("Jens", "Preussner", role="ctb"),
person("Charlotte", "Soneson", role="ctb"),
person("Stephany", "Orjuela", role="ctb"))
person("Stephany", "Orjuela", role="ctb"),
person("Daniel", "Bunis", role="ctb"))
Description: Gene-level counts for a collection of public scRNA-seq datasets,
provided as SingleCellExperiment objects with cell- and gene-level metadata.
License: CC0
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export(BacherTCellData)
export(BaronPancreasData)
export(BhaduriOrganoidData)
export(BuettnerESCData)
export(BunisHSPCData)
export(CampbellBrainData)
export(ChenBrainData)
export(DarmanisBrainData)
Expand Down
62 changes: 62 additions & 0 deletions R/BunisHSPCData.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#' Obtain the Bunis haematopoietic stem and progenitor cell data
#'
#' Obtain the human fetal, newborn, and adult haematopoietic stem and progenitor cell single-cell RNA-seq dataset from Bunis et al. (2021).
#'
#' @param filtered Logical scalar or "cells" indicating whether to:
#' \itemize{
#' \item \code{TRUE}: filter out cells that were not used by the authors.
#' \item \code{"cells"}: filter out empty droplets as filtered out by cell ranger.
#' \item \code{FALSE}: no filtering
#' }
#'
#' @details
#' Column metadata is recreated from GEO using the author-supplied TSV of per-cell annotations, or retrieved from a processed version of the data shared by authors via figshare.
#' This contains information such as the tissue & sample of origin, age group, likely cell type, and Developmental Stage Scoring.
#' Within DevStageScoring element of the column metadata are the applied results ('<cell_type>_scores') of random forest regression trained on the fetal (score = 0) and adult (score = 1) cells of individual cell types indicated by ('<cell_type>_inTraining').
#'
#' If \code{filtered=TRUE}, only the cells used by the authors in their final analysis are returned.
#' Otherwise, an additional \code{retained} field will be present in the \code{\link{colData}}, indicating whether the cell was retained by the authors.
#'
#' All data are downloaded from ExperimentHub and cached for local re-use.
#' Specific resources can be retrieved by searching for \code{scRNAseq/bunis-hspc}.
#'
#' @return A \linkS4class{SingleCellExperiment} object with a single matrix of UMI counts.
#'
#' @author Daniel Bunis
#'
#' @references
#' Bunis DG et al. (2021).
#' Single-Cell Mapping of Progressive Fetal-to-Adult Transition in Human Naive T Cells
#' \emph{Cell Rep.} 34(1): 108573
#'
#' @examples
#' sce <- BunisHSPCData()
#'
#' @export
BunisHSPCData <- function(filtered=TRUE) {
version <- "2.6.0"

sce <- .create_sce(file.path("bunis-hspc", version), has.rowdata = TRUE, has.coldata = FALSE)

hub <- ExperimentHub()
colData.path <- file.path("scRNAseq", "bunis-hspc", version, "coldata.rds")
colData <- hub[hub$rdatapath==colData.path][[1]]

if (isTRUE(filtered)) {
keep <- colnames(sce) %in% rownames(colData)[colData$retained]
sce <- sce[,keep]
colData$retained <- NULL
} else if (identical(filtered, "cells")) {
keep <- colnames(sce) %in% rownames(colData)
sce <- sce[,keep]
}

# Weird performance issue when directly subsetting with rownames.
# Also, preserve names when filtered=FALSE, though this takes some time.
m <- match(colnames(sce), rownames(sce))
colData <- colData[m,, drop = FALSE]
rownames(colData) <- colnames(sce)
colData(sce) <- colData

sce
}
1 change: 1 addition & 0 deletions inst/extdata/manifest.csv
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Reference,Taxonomy,Part,Number,Call
@baron2016singlecell,10090,pancreas,1886,BaronPancreasData('mouse')
@bhaduri2020cell,9606,cortical organoids,242349,BhaduriOrganoidData()
@buettner2015computational,10090,embryonic stem cells,288,BuettnerESCData()
@bunis2021haematopoietic,9606,haematopoietic stem and progenitor,5183,BunisHSPCData()
@campbell2017molecular,10090,brain,21086,CampbellBrainData()
@chen2017singlecell,10090,brain,14437,ChenBrainData()
@darmanis2015survey,9606,brain,466,DarmanisBrainData()
Expand Down
17 changes: 17 additions & 0 deletions longtests/testthat/test-bunis-hspc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# library(testthat); library(scRNAseq); source('test-bunis-hspc.R')

test_that("Bunis HSPC getter works as expected", {
sce <- BunisHSPCData()
expect_s4_class(sce, "SingleCellExperiment")

sce2 <- BunisHSPCData(filtered = "cells")
expect_s4_class(sce2, "SingleCellExperiment")

sce3 <- BunisHSPCData(filtered=FALSE)

# Checks cell filtering and that colData is added all at once
expect_true( nrow(colData(sce)) < nrow(colData(sce2)) )
expect_true( nrow(colData(sce2)) < nrow(colData(sce3)) )

expect_true(all(grepl("^ENSG", rownames(sce))))
})
45 changes: 45 additions & 0 deletions man/BunisHSPCData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions tests/testthat/test-loading.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ test_that("BuettnerESCData works", {
expect_s4_class(rowRanges(out), "GRangesList")
})

test_that("BunisHSPCData works", {
CHECK(BunisHSPCData())
})

test_that("CampbellBrainData works", {
CHECK(CampbellBrainData(ensembl=TRUE))
})
Expand Down

0 comments on commit 75862a1

Please sign in to comment.