From 34be4788d6abe4a9a34f536a9883b3fd4e2e6f3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20Pag=C3=A8s?= Date: Thu, 2 Aug 2018 01:20:42 -0700 Subject: [PATCH 1/2] The TENxMatrix container is now implemented in the HDF5Array package --- DESCRIPTION | 5 +- NAMESPACE | 3 - R/TENxMatrix-class.R | 230 ------------------------------------- man/TENxMatrix-class.Rd | 39 ------- vignettes/TENxGenomics.Rmd | 8 +- 5 files changed, 7 insertions(+), 278 deletions(-) delete mode 100644 R/TENxMatrix-class.R delete mode 100644 man/TENxMatrix-class.Rd diff --git a/DESCRIPTION b/DESCRIPTION index d40d37f..1ce91f3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: TENxGenomics Title: Interface to 10x Genomics data -Version: 0.0.27 +Version: 0.0.28 Authors@R: c(person( "Martin", "Morgan", email = "Martin.Morgan@RoswellPark.org", @@ -21,13 +21,12 @@ Description: This package provides an interface to 10xGenomics data, Depends: R (>= 3.4.0), methods, Matrix, SummarizedExperiment, DelayedArray (>= 0.3.7) Imports: rhdf5 (>= 2.19.3-2), tools, BiocGenerics, S4Vectors, IRanges, - HDF5Array (>= 1.7.6), BiocParallel (>= 1.11.2) + HDF5Array (>= 1.9.6), BiocParallel (>= 1.11.2) Suggests: BiocStyle, knitr, rmarkdown, BiocFileCache, Rtsne, BiocSklearn, restfulSE Collate: TENxGenomics-class.R coerce.R - TENxMatrix-class.R tenxSummarizedExperiment.R tenxiterate.R License: Artistic-2.0 diff --git a/NAMESPACE b/NAMESPACE index bcffa59..f70021f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,7 +2,6 @@ S3method(as.matrix,TENxGenomics) export(TENxGenomics) -export(TENxMatrix) export(as.dgCMatrix) export(as.dgCMatrixSummarizedExperiment) export(as.matrixSummarizedExperiment) @@ -11,8 +10,6 @@ export(dgCMatrixSummarizedExperiment) export(matrixSummarizedExperiment) export(tenxSummarizedExperiment) export(tenxiterate) -exportClasses(TENxMatrix) -exportClasses(TENxMatrixSeed) exportMethods("[") exportMethods(coerce) exportMethods(dim) diff --git a/R/TENxMatrix-class.R b/R/TENxMatrix-class.R deleted file mode 100644 index 1dc02df..0000000 --- a/R/TENxMatrix-class.R +++ /dev/null @@ -1,230 +0,0 @@ -### ========================================================================= -### TENxMatrix objects -### ------------------------------------------------------------------------- - -#' @rdname TENxMatrix-class -#' -#' @title TENxMatrix objects -#' -#' @name TENxMatrix-class -#' -#' @aliases TENxMatrixSeed-class -#' -#' @aliases DelayedArray,TENxMatrixSeed-method -#' -#' @description A container for representing 10xGenomics data. -#' TENxMatrix extends \link{DelayedArray} so all the operations -#' available on \link{DelayedArray} objects work on TENxMatrix objects. -#' -#' @import methods BiocGenerics S4Vectors IRanges DelayedArray HDF5Array -#' -#' @importFrom tools file_path_as_absolute -#' -#' @exportClass TENxMatrixSeed -setClass("TENxMatrixSeed", - representation( - file="character", # Absolute path to the HDF5 file so the object - # doesn't break when the user changes the working - # directory (e.g. with setwd()). - group="character", # Name of the group in the HDF5 file containing - # 10xGenomics data. - dim="integer", - dimnames="list", - col_ranges="data.frame" # Can't use an IRanges object for this at the - # moment because they don't support Linteger - # start/end values yet. - ) -) - -### All the 10xGenomics components are monodimensional. -.get_TENx_component <- function(file, group, name, idx=NULL) -{ - name <- paste0(group, "/", name) - if (!is.null(idx)) - idx <- list(idx) - as.vector(h5read(file, name, index=idx)) -} - -.get_barcodes <- function(file, group, idx=NULL) - .get_TENx_component(file, group, "barcodes", idx=idx) - -.get_data <- function(file, group, idx=NULL) - .get_TENx_component(file, group, "data", idx=idx) - -.get_gene_names <- function(file, group, idx=NULL) - .get_TENx_component(file, group, "gene_names", idx=idx) - -.get_genes <- function(file, group, idx=NULL) - .get_TENx_component(file, group, "genes", idx=idx) - -.get_indices <- function(file, group, idx=NULL) - .get_TENx_component(file, group, "indices", idx=idx) - -.get_indptr <- function(file, group, idx=NULL) -{ - name <- paste0(group, "/indptr") - if (!is.null(idx)) - idx <- list(idx) - as.vector(h5read(file, name, index=idx, bit64conversion="double")) -} - -.get_shape <- function(file, group, idx=NULL) - .get_TENx_component(file, group, "shape", idx=idx) - -TENxMatrixSeed <- function(h5path, group="mm10") -{ - if (!isSingleString(h5path)) - stop(wmsg("'h5path' must be a single string specifying the path to ", - "the HDF5 file where the 10xGenomics data is located")) - h5path <- file_path_as_absolute(h5path) - if (!isSingleString(group)) - stop(wmsg("'group' must be a single string specifying the name ", - "of the group in the HDF5 file containing the ", - "10xGenomics data")) - if (group == "") - stop(wmsg("'group' cannot be the empty string")) - - ## dim - dim <- .get_shape(h5path, group) - stopifnot(length(dim) == 2L) - - ## dimnames - rownames <- .get_genes(h5path, group) - stopifnot(length(rownames) == dim[[1L]]) - colnames <- .get_barcodes(h5path, group) - stopifnot(length(colnames) == dim[[2L]]) - dimnames <- list(rownames, colnames) - - ## col_ranges - ## "/data" and "/indices" are monodimensional arrays of length >= 2^31 so - ## we need to call h5dim() with 'check=FALSE'. - data_len <- HDF5Array:::h5dim(h5path, paste0(group, "/data"), - check=FALSE) - stopifnot(length(data_len) == 1L) - indices_len <- HDF5Array:::h5dim(h5path, paste0(group, "/indices"), - check=FALSE) - stopifnot(identical(data_len, indices_len)) - indptr <- .get_indptr(h5path, group) - stopifnot(length(indptr) == dim[[2L]] + 1L, - indptr[[1L]] == 0L, - indptr[[length(indptr)]] == data_len) - col_ranges <- data.frame(start=indptr[-length(indptr)] + 1, - width=as.integer(diff(indptr))) - - new2("TENxMatrixSeed", file=h5path, - group=group, - dim=dim, - dimnames=dimnames, - col_ranges=col_ranges) -} - -#' @rdname TENxMatrix-class -#' -#' @aliases dim,TENxMatrixSeed-method -#' -#' @param x A \code{TENxMatrix-class} or \code{TENxMatrixSeed-class} -#' object. -#' -#' @exportMethod dim -setMethod("dim", "TENxMatrixSeed", function(x) x@dim) - -#' @rdname TENxMatrix-class -#' -#' @aliases dimnames,TENxMatrixSeed-method -#' -#' @exportMethod dimnames -setMethod("dimnames", "TENxMatrixSeed", function(x) x@dimnames) - -### S4Vectors:::fancy_mseq() does not accept 'offset' of type double yet so -### we implement a version that does. -.fancy_mseq <- function(lengths, offset=0) -{ - lengths_len <- length(lengths) - if (lengths_len == 0L) - return(numeric(0)) - offsets <- offset - cumsum(c(0L, lengths[-lengths_len])) - seq_len(sum(lengths)) + rep.int(offsets, lengths) -} - -.extract_array_from_TENxMatrixSeed <- function(x, index) -{ - ans_dim <- DelayedArray:::get_Nindex_lengths(index, dim(x)) - ans <- array(0L, dim=ans_dim) - - j <- index[[2L]] - col_ranges <- x@col_ranges - if (is.null(j)) { - j <- seq_len(ncol(ans)) - } else { - col_ranges <- S4Vectors:::extract_data_frame_rows(col_ranges, j) - } - start2 <- col_ranges[ , "start"] - width2 <- col_ranges[ , "width"] - idx2 <- .fancy_mseq(width2, offset=start2 - 1) - i2 <- .get_indices(x@file, x@group, idx=idx2) + 1L - j2 <- rep.int(seq_along(j), width2) - - i <- index[[1L]] - if (!is.null(i)) { - m <- findMatches(i2, i) - idx2 <- idx2[from(m)] - i2 <- to(m) - j2 <- j2[from(m)] - } - - ans[cbind(i2, j2)] <- .get_data(x@file, x@group, idx=idx2) - ans -} - -setMethod("extract_array", "TENxMatrixSeed", - .extract_array_from_TENxMatrixSeed -) - -#' @rdname TENxMatrix-class -#' -#' @exportClass TENxMatrix -setClass("TENxMatrix", contains="DelayedMatrix") - -.validate_TENxMatrix <- function(x) -{ - if (!is(x@seed, "TENxMatrixSeed")) - return(wmsg("'x@seed' must be a TENxMatrixSeed object")) - if (!DelayedArray:::is_pristine(x)) - return(wmsg("'x' carries delayed operations")) - TRUE -} - -setValidity2("TENxMatrix", .validate_TENxMatrix) - -setMethod("DelayedArray", "TENxMatrixSeed", - function(seed) DelayedArray:::new_DelayedArray(seed, Class="TENxMatrix") -) - -### Works directly on a TENxMatrixSeed object, in which case it must be called -### with a single argument. -#' @rdname TENxMatrix-class -#' -#' @param h5path character(1) path to the 10xGenomics hdf5 file, or a -#' \code{TENxMatrixSeed} instance. -#' -#' @param group character(1) group (e.g., \dQuote{mm10}) containing -#' scRNA-seq data; only allowed when \code{h5path} is a -#' character(1). -#' -#' @return \code{TENxMatrix()} returns a TENxMatrix instance; -#' \code{dim()}, \code{dimnames()} and all other -#' \code{DelayedArray} methods operate on this instance. -#' -#' @export -TENxMatrix <- function(h5path, group="mm10") -{ - if (is(h5path, "TENxMatrixSeed")) { - if (!missing(group)) - stop(wmsg("TENxMatrix() must be called with a single argument ", - "when passed a TENxMatrixSeed object")) - seed <- h5path - } else { - seed <- TENxMatrixSeed(h5path, group) - } - DelayedArray(seed) -} diff --git a/man/TENxMatrix-class.Rd b/man/TENxMatrix-class.Rd deleted file mode 100644 index a211e1e..0000000 --- a/man/TENxMatrix-class.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/TENxMatrix-class.R -\docType{class} -\name{TENxMatrix-class} -\alias{TENxMatrix-class} -\alias{TENxMatrixSeed-class} -\alias{dim,TENxMatrixSeed-method} -\alias{dimnames,TENxMatrixSeed-method} -\alias{TENxMatrix-class} -\alias{TENxMatrix} -\title{TENxMatrix objects} -\usage{ -\S4method{dim}{TENxMatrixSeed}(x) - -\S4method{dimnames}{TENxMatrixSeed}(x) - -TENxMatrix(h5path, group = "mm10") -} -\arguments{ -\item{x}{A \code{TENxMatrix-class} or \code{TENxMatrixSeed-class} -object.} - -\item{h5path}{character(1) path to the 10xGenomics hdf5 file, or a -\code{TENxMatrixSeed} instance.} - -\item{group}{character(1) group (e.g., \dQuote{mm10}) containing -scRNA-seq data; only allowed when \code{h5path} is a -character(1).} -} -\value{ -\code{TENxMatrix()} returns a TENxMatrix instance; - \code{dim()}, \code{dimnames()} and all other - \code{DelayedArray} methods operate on this instance. -} -\description{ -A container for representing 10xGenomics data. - TENxMatrix extends \link{DelayedArray} so all the operations - available on \link{DelayedArray} objects work on TENxMatrix objects. -} diff --git a/vignettes/TENxGenomics.Rmd b/vignettes/TENxGenomics.Rmd index 0c067f6..3ec603b 100644 --- a/vignettes/TENxGenomics.Rmd +++ b/vignettes/TENxGenomics.Rmd @@ -104,16 +104,18 @@ as.matrix(tenx[sample(nrow(tenx), 5), sample(ncol(tenx), 3)]) ## Using a TENxMatrix object An alternative to creating `TENxGenomics` object `tenx` is to wrap the -10xGenomics data in a `TENxMatrix` object. +10xGenomics data in a `TENxMatrix` object. The `TENxMatrix` container is +implemented in the [HDF5Array][] package. ```{r} +library(HDF5Array) tenxmat <- TENxMatrix(path) ``` The `TENxMatrix` class extends the `DelayedArray` class defined in the [DelayedArray][] package so all the operations available on `DelayedArray` -objects work on `TENxMatrix` objects. See `?DelayedArray` for more -information. +objects work on `TENxMatrix` objects. See `?TENxMatrix` and `?DelayedArray` +for more information. ## Rich From 07d1a505d6191323bd1e401bd29db1bd2dfd44a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20Pag=C3=A8s?= Date: Thu, 2 Aug 2018 01:27:45 -0700 Subject: [PATCH 2/2] 2 minor tweaks --- vignettes/TENxGenomics.Rmd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vignettes/TENxGenomics.Rmd b/vignettes/TENxGenomics.Rmd index 3ec603b..34906f4 100644 --- a/vignettes/TENxGenomics.Rmd +++ b/vignettes/TENxGenomics.Rmd @@ -20,6 +20,7 @@ knitr::opts_chunk$set( suppressPackageStartupMessages({ library(TENxGenomics) library(BiocFileCache) + library(HDF5Array) library(SummarizedExperiment) library(Rtsne) }) @@ -306,3 +307,4 @@ sessionInfo() [BiocFileCache]: https://bioconductor.org/packages/BiocFileCache [SummarizedExperiment]: https://bioconductor.org/packages/SummarizedExperiment [DelayedArray]: https://bioconductor.org/packages/DelayedArray +[HDF5Array]: https://bioconductor.org/packages/HDF5Array