diff --git a/DESCRIPTION b/DESCRIPTION index 6d56d9b..d9a9205 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,7 +9,8 @@ Authors@R: c( person("Alan", "O'Callaghan", role="ctb"), person("Jens", "Preussner", role="ctb"), person("Charlotte", "Soneson", role="ctb"), - person("Stephany", "Orjuela", role="ctb")) + person("Stephany", "Orjuela", role="ctb"), + person("Daniel", "Bunis", role="ctb")) Description: Gene-level counts for a collection of public scRNA-seq datasets, provided as SingleCellExperiment objects with cell- and gene-level metadata. License: CC0 diff --git a/NAMESPACE b/NAMESPACE index 9258bdb..7b55bba 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(BacherTCellData) export(BaronPancreasData) export(BhaduriOrganoidData) export(BuettnerESCData) +export(BunisHSPCData) export(CampbellBrainData) export(ChenBrainData) export(DarmanisBrainData) diff --git a/R/BunisHSPCData.R b/R/BunisHSPCData.R new file mode 100644 index 0000000..6c95a4b --- /dev/null +++ b/R/BunisHSPCData.R @@ -0,0 +1,62 @@ +#' Obtain the Bunis haematopoietic stem and progenitor cell data +#' +#' Obtain the human fetal, newborn, and adult haematopoietic stem and progenitor cell single-cell RNA-seq dataset from Bunis et al. (2021). +#' +#' @param filtered Logical scalar or "cells" indicating whether to: +#' \itemize{ +#' \item \code{TRUE}: filter out cells that were not used by the authors. +#' \item \code{"cells"}: filter out empty droplets as filtered out by cell ranger. +#' \item \code{FALSE}: no filtering +#' } +#' +#' @details +#' Column metadata is recreated from GEO using the author-supplied TSV of per-cell annotations, or retrieved from a processed version of the data shared by authors via figshare. +#' This contains information such as the tissue & sample of origin, age group, likely cell type, and Developmental Stage Scoring. +#' Within DevStageScoring element of the column metadata are the applied results ('_scores') of random forest regression trained on the fetal (score = 0) and adult (score = 1) cells of individual cell types indicated by ('_inTraining'). +#' +#' If \code{filtered=TRUE}, only the cells used by the authors in their final analysis are returned. +#' Otherwise, an additional \code{retained} field will be present in the \code{\link{colData}}, indicating whether the cell was retained by the authors. +#' +#' All data are downloaded from ExperimentHub and cached for local re-use. +#' Specific resources can be retrieved by searching for \code{scRNAseq/bunis-hspc}. +#' +#' @return A \linkS4class{SingleCellExperiment} object with a single matrix of UMI counts. +#' +#' @author Daniel Bunis +#' +#' @references +#' Bunis DG et al. (2021). +#' Single-Cell Mapping of Progressive Fetal-to-Adult Transition in Human Naive T Cells +#' \emph{Cell Rep.} 34(1): 108573 +#' +#' @examples +#' sce <- BunisHSPCData() +#' +#' @export +BunisHSPCData <- function(filtered=TRUE) { + version <- "2.6.0" + + sce <- .create_sce(file.path("bunis-hspc", version), has.rowdata = TRUE, has.coldata = FALSE) + + hub <- ExperimentHub() + colData.path <- file.path("scRNAseq", "bunis-hspc", version, "coldata.rds") + colData <- hub[hub$rdatapath==colData.path][[1]] + + if (isTRUE(filtered)) { + keep <- colnames(sce) %in% rownames(colData)[colData$retained] + sce <- sce[,keep] + colData$retained <- NULL + } else if (identical(filtered, "cells")) { + keep <- colnames(sce) %in% rownames(colData) + sce <- sce[,keep] + } + + # Weird performance issue when directly subsetting with rownames. + # Also, preserve names when filtered=FALSE, though this takes some time. + m <- match(colnames(sce), rownames(sce)) + colData <- colData[m,, drop = FALSE] + rownames(colData) <- colnames(sce) + colData(sce) <- colData + + sce +} diff --git a/inst/extdata/manifest.csv b/inst/extdata/manifest.csv index 300bfda..f3a2c54 100644 --- a/inst/extdata/manifest.csv +++ b/inst/extdata/manifest.csv @@ -6,6 +6,7 @@ Reference,Taxonomy,Part,Number,Call @baron2016singlecell,10090,pancreas,1886,BaronPancreasData('mouse') @bhaduri2020cell,9606,cortical organoids,242349,BhaduriOrganoidData() @buettner2015computational,10090,embryonic stem cells,288,BuettnerESCData() +@bunis2021haematopoietic,9606,haematopoietic stem and progenitor,5183,BunisHSPCData() @campbell2017molecular,10090,brain,21086,CampbellBrainData() @chen2017singlecell,10090,brain,14437,ChenBrainData() @darmanis2015survey,9606,brain,466,DarmanisBrainData() diff --git a/longtests/testthat/test-bunis-hspc.R b/longtests/testthat/test-bunis-hspc.R new file mode 100644 index 0000000..9f1d084 --- /dev/null +++ b/longtests/testthat/test-bunis-hspc.R @@ -0,0 +1,17 @@ +# library(testthat); library(scRNAseq); source('test-bunis-hspc.R') + +test_that("Bunis HSPC getter works as expected", { + sce <- BunisHSPCData() + expect_s4_class(sce, "SingleCellExperiment") + + sce2 <- BunisHSPCData(filtered = "cells") + expect_s4_class(sce2, "SingleCellExperiment") + + sce3 <- BunisHSPCData(filtered=FALSE) + + # Checks cell filtering and that colData is added all at once + expect_true( nrow(colData(sce)) < nrow(colData(sce2)) ) + expect_true( nrow(colData(sce2)) < nrow(colData(sce3)) ) + + expect_true(all(grepl("^ENSG", rownames(sce)))) +}) diff --git a/man/BunisHSPCData.Rd b/man/BunisHSPCData.Rd new file mode 100644 index 0000000..d07b889 --- /dev/null +++ b/man/BunisHSPCData.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/BunisHSPCData.R +\name{BunisHSPCData} +\alias{BunisHSPCData} +\title{Obtain the Bunis haematopoietic stem and progenitor cell data} +\usage{ +BunisHSPCData(filtered = TRUE) +} +\arguments{ +\item{filtered}{Logical scalar or "cells" indicating whether to: +\itemize{ +\item \code{TRUE}: filter out cells that were not used by the authors. +\item \code{"cells"}: filter out empty droplets as filtered out by cell ranger. +\item \code{FALSE}: no filtering +}} +} +\value{ +A \linkS4class{SingleCellExperiment} object with a single matrix of UMI counts. +} +\description{ +Obtain the human fetal, newborn, and adult haematopoietic stem and progenitor cell single-cell RNA-seq dataset from Bunis et al. (2021). +} +\details{ +Column metadata is recreated from GEO using the author-supplied TSV of per-cell annotations, or retrieved from a processed version of the data shared by authors via figshare. +This contains information such as the tissue & sample of origin, age group, likely cell type, and Developmental Stage Scoring. +Within DevStageScoring element of the column metadata are the applied results ('_scores') of random forest regression trained on the fetal (score = 0) and adult (score = 1) cells of individual cell types indicated by ('_inTraining'). + +If \code{filtered=TRUE}, only the cells used by the authors in their final analysis are returned. +Otherwise, an additional \code{retained} field will be present in the \code{\link{colData}}, indicating whether the cell was retained by the authors. + +All data are downloaded from ExperimentHub and cached for local re-use. +Specific resources can be retrieved by searching for \code{scRNAseq/bunis-hspc}. +} +\examples{ +sce <- BunisHSPCData() + +} +\references{ +Bunis DG et al. (2021). +Single-Cell Mapping of Progressive Fetal-to-Adult Transition in Human Naive T Cells +\emph{Cell Rep.} 34(1): 108573 +} +\author{ +Daniel Bunis +} diff --git a/tests/testthat/test-loading.R b/tests/testthat/test-loading.R index 2d17577..5ba21c7 100644 --- a/tests/testthat/test-loading.R +++ b/tests/testthat/test-loading.R @@ -21,6 +21,10 @@ test_that("BuettnerESCData works", { expect_s4_class(rowRanges(out), "GRangesList") }) +test_that("BunisHSPCData works", { + CHECK(BunisHSPCData()) +}) + test_that("CampbellBrainData works", { CHECK(CampbellBrainData(ensembl=TRUE)) })