diff --git a/paper/paper.bib b/paper/paper.bib index b4265add..8a67ca19 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -1344,3 +1344,86 @@ @melonora doi = {10.5281/zenodo.8115575}, file = {Snapshot:/Users/aj/Zotero/storage/VX4YHQCL/8115575.html:text/html}, } + +@article{palla_squidpy_2022, + title = {Squidpy: a scalable framework for spatial omics analysis}, + volume = {19}, + rights = {2022 The Author(s)}, + issn = {1548-7105}, + url = {https://www.nature.com/articles/s41592-021-01358-2}, + doi = {10.1038/s41592-021-01358-2}, + shorttitle = {Squidpy}, + abstract = {Spatial omics data are advancing the study of tissue organization and cellular communication at an unprecedented scale. Flexible tools are required to store, integrate and visualize the large diversity of spatial omics data. Here, we present Squidpy, a Python framework that brings together tools from omics and image analysis to enable scalable description of spatial molecular data, such as transcriptome or multivariate proteins. Squidpy provides efficient infrastructure and numerous analysis methods that allow to efficiently store, manipulate and interactively visualize spatial omics data. Squidpy is extensible and can be interfaced with a variety of already existing libraries for the scalable analysis of spatial omics data.}, + pages = {171--178}, + number = {2}, + journaltitle = {Nature Methods}, + shortjournal = {Nat Methods}, + author = {Palla, Giovanni and Spitzer, Hannah and Klein, Michal and Fischer, David and Schaar, Anna Christina and Kuemmerle, Louis Benedikt and Rybakov, Sergei and Ibarra, Ignacio L. and Holmberg, Olle and Virshup, Isaac and Lotfollahi, Mohammad and Richter, Sabrina and Theis, Fabian J.}, + urldate = {2024-05-15}, + date = {2022-02}, + langid = {english}, + note = {Publisher: Nature Publishing Group}, + keywords = {Data integration, Imaging, Software, Transcriptomics}, + file = {Full Text PDF:/Users/aj/Zotero/storage/X2GQX7F5/Palla et al. - 2022 - Squidpy a scalable framework for spatial omics an.pdf:application/pdf}, +} + +@article{dries_giotto_2021, + title = {Giotto: a toolbox for integrative analysis and visualization of spatial expression data}, + volume = {22}, + issn = {1474-760X}, + url = {https://doi.org/10.1186/s13059-021-02286-2}, + doi = {10.1186/s13059-021-02286-2}, + shorttitle = {Giotto}, + abstract = {Spatial transcriptomic and proteomic technologies have provided new opportunities to investigate cells in their native microenvironment. Here we present Giotto, a comprehensive and open-source toolbox for spatial data analysis and visualization. The analysis module provides end-to-end analysis by implementing a wide range of algorithms for characterizing tissue composition, spatial expression patterns, and cellular interactions. Furthermore, single-cell {RNAseq} data can be integrated for spatial cell-type enrichment analysis. The visualization module allows users to interactively visualize analysis outputs and imaging features. To demonstrate its general applicability, we apply Giotto to a wide range of datasets encompassing diverse technologies and platforms.}, + pages = {78}, + number = {1}, + journaltitle = {Genome Biology}, + shortjournal = {Genome Biology}, + author = {Dries, Ruben and Zhu, Qian and Dong, Rui and Eng, Chee-Huat Linus and Li, Huipeng and Liu, Kan and Fu, Yuntian and Zhao, Tianxiao and Sarkar, Arpan and Bao, Feng and George, Rani E. and Pierson, Nico and Cai, Long and Yuan, Guo-Cheng}, + urldate = {2024-05-15}, + date = {2021-03-08}, + file = {Full Text PDF:/Users/aj/Zotero/storage/SKSFHISI/Dries et al. - 2021 - Giotto a toolbox for integrative analysis and vis.pdf:application/pdf}, +} + +@article{hao_dictionary_2024, + title = {Dictionary learning for integrative, multimodal and scalable single-cell analysis}, + volume = {42}, + rights = {2023 The Author(s), under exclusive licence to Springer Nature America, Inc.}, + issn = {1546-1696}, + url = {https://www.nature.com/articles/s41587-023-01767-y}, + doi = {10.1038/s41587-023-01767-y}, + abstract = {Mapping single-cell sequencing profiles to comprehensive reference datasets provides a powerful alternative to unsupervised analysis. However, most reference datasets are constructed from single-cell {RNA}-sequencing data and cannot be used to annotate datasets that do not measure gene expression. Here we introduce ‘bridge integration’, a method to integrate single-cell datasets across modalities using a multiomic dataset as a molecular bridge. Each cell in the multiomic dataset constitutes an element in a ‘dictionary’, which is used to reconstruct unimodal datasets and transform them into a shared space. Our procedure accurately integrates transcriptomic data with independent single-cell measurements of chromatin accessibility, histone modifications, {DNA} methylation and protein levels. Moreover, we demonstrate how dictionary learning can be combined with sketching techniques to improve computational scalability and harmonize 8.6 million human immune cell profiles from sequencing and mass cytometry experiments. Our approach, implemented in version 5 of our Seurat toolkit (http://www.satijalab.org/seurat), broadens the utility of single-cell reference datasets and facilitates comparisons across diverse molecular modalities.}, + pages = {293--304}, + number = {2}, + journaltitle = {Nature Biotechnology}, + shortjournal = {Nat Biotechnol}, + author = {Hao, Yuhan and Stuart, Tim and Kowalski, Madeline H. and Choudhary, Saket and Hoffman, Paul and Hartman, Austin and Srivastava, Avi and Molla, Gesmira and Madad, Shaista and Fernandez-Granda, Carlos and Satija, Rahul}, + urldate = {2024-05-15}, + date = {2024-02}, + langid = {english}, + note = {Publisher: Nature Publishing Group}, + keywords = {Epigenomics, Genomics}, + file = {Full Text PDF:/Users/aj/Zotero/storage/CKPPTRT2/Hao et al. - 2024 - Dictionary learning for integrative, multimodal an.pdf:application/pdf}, +} + +@article{stringer_cellpose_2021, + title = {Cellpose: a generalist algorithm for cellular segmentation}, + volume = {18}, + rights = {2020 The Author(s), under exclusive licence to Springer Nature America, Inc.}, + issn = {1548-7105}, + url = {https://www.nature.com/articles/s41592-020-01018-x}, + doi = {10.1038/s41592-020-01018-x}, + shorttitle = {Cellpose}, + abstract = {Many biological applications require the segmentation of cell bodies, membranes and nuclei from microscopy images. Deep learning has enabled great progress on this problem, but current methods are specialized for images that have large training datasets. Here we introduce a generalist, deep learning-based segmentation method called Cellpose, which can precisely segment cells from a wide range of image types and does not require model retraining or parameter adjustments. Cellpose was trained on a new dataset of highly varied images of cells, containing over 70,000 segmented objects. We also demonstrate a three-dimensional (3D) extension of Cellpose that reuses the two-dimensional (2D) model and does not require 3D-labeled data. To support community contributions to the training data, we developed software for manual labeling and for curation of the automated results. Periodically retraining the model on the community-contributed data will ensure that Cellpose improves constantly.}, + pages = {100--106}, + number = {1}, + journaltitle = {Nature Methods}, + shortjournal = {Nat Methods}, + author = {Stringer, Carsen and Wang, Tim and Michaelos, Michalis and Pachitariu, Marius}, + urldate = {2024-05-15}, + date = {2021-01}, + langid = {english}, + note = {Publisher: Nature Publishing Group}, + keywords = {Cell biology, Computational biology and bioinformatics}, + file = {Full Text PDF:/Users/aj/Zotero/storage/GA6YHTHZ/Stringer et al. - 2021 - Cellpose a generalist algorithm for cellular segm.pdf:application/pdf}, +} diff --git a/paper/paper.md b/paper/paper.md index 960ce2dd..bbf27166 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -33,7 +33,7 @@ A variety of methods have been introduced for high multiplexed imaging of tissue Spatial feature tables provide the quantitative data for analysis of high-plex data but human inspection of the original image data remains essential. At the current state of the art, many of the critical morphological details in high-resolution images cannot be fully and accurately quantified. Segmentation is also subject to errors identifiable by humans, but not fully resolvable computationally [@baker_quality_2024]. As a consequence, computation of spatial features and relationships must be performed in combination with visualization of the underlying image data. Humans excel at identifying tissue features that correspond to classical histo-morphologies; they are also effective at discriminating foreground signals from variable background [@nirmal_cell_2023] using a process of “visual gating” (perception of high and low-intensity levels while visualizing an image). More generally, effective integration of visualization and computation enables nuanced interpretation of cellular organization in relation to established tissue architectures. -While packages such as squidpy, Giotto, and Seurat have the potential to manage multiplexed imaging data, their functionalities are primarily optimized for spatial transcriptomics data. In contrast, `SCIMAP` is specifically designed to address the unique requirements of multiplexed imaging data analysis, offering features such as image-based visual gating and the integration of prior knowledge for cellular phenotyping, among others. `SCIMAP` uses the Python-based Napari [@chiu_napari_2022; @ahlers_napari_2023] image viewer to leverage these capabilities by providing a seamless interface to inspect and annotate high-plex imaging data alongside computational analysis. For example, we have implemented an image-based gating approach that allows users to visually determine the threshold that discriminates background from a true signal at both a whole-specimen and single-cell level. Users can also select specific regions of interest (ROIs) for selective or deeper analysis. This involves drawing ROIs over images (freehand or geometric) and then selecting the underlying single cell data for further analysis. This capability is essential for incorporating histopathological information on common tissue structures (e.g., epidermis, dermis, follicles), immune structures (e.g., secondary and tertiary lymphoid structures), tumor domains (e.g., tumor center, boundary, tumor buds), and tumor grade or stage (e.g., early lesions, invasive regions, established nodules). It also allows for excluding regions affected by significant tissue loss, folding, or artifactual staining [@baker_quality_2024]. `SCIMAP` then performs statistical and spatial analyses on individual ROIs or sets of ROIs. Spatial analysis, including the measurement of distances between cells, analysis of interaction patterns, categorization into neighborhoods, and scoring of these patterns, is crucial for elucidating the cellular communications that underpin the functional aspects of the biology being studied. `SCIMAP` offers various functions to facilitate these analyses. +While packages such as squidpy [@palla_squidpy_2022], Giotto [@dries_giotto_2021], and Seurat [@hao_dictionary_2024] have the potential to manage multiplexed imaging data, their functionalities are primarily optimized for spatial transcriptomics data. In contrast, `SCIMAP` is specifically designed to address the unique requirements of multiplexed imaging data analysis, offering features such as image-based visual gating and the integration of prior knowledge for cellular phenotyping, among others. `SCIMAP` uses the Python-based Napari [@chiu_napari_2022; @ahlers_napari_2023] image viewer to leverage these capabilities by providing a seamless interface to inspect and annotate high-plex imaging data alongside computational analysis. For example, we have implemented an image-based gating approach that allows users to visually determine the threshold that discriminates background from a true signal at both a whole-specimen and single-cell level. Users can also select specific regions of interest (ROIs) for selective or deeper analysis. This involves drawing ROIs over images (freehand or geometric) and then selecting the underlying single cell data for further analysis. This capability is essential for incorporating histopathological information on common tissue structures (e.g., epidermis, dermis, follicles), immune structures (e.g., secondary and tertiary lymphoid structures), tumor domains (e.g., tumor center, boundary, tumor buds), and tumor grade or stage (e.g., early lesions, invasive regions, established nodules). It also allows for excluding regions affected by significant tissue loss, folding, or artifactual staining [@baker_quality_2024]. `SCIMAP` then performs statistical and spatial analyses on individual ROIs or sets of ROIs. Spatial analysis, including the measurement of distances between cells, analysis of interaction patterns, categorization into neighborhoods, and scoring of these patterns, is crucial for elucidating the cellular communications that underpin the functional aspects of the biology being studied. `SCIMAP` offers various functions to facilitate these analyses. Lastly, a single high-plex whole slide image can exceed 100GB per image and 10$^6$ cells, necessitating optimized functions for handling large matrices and images. `SCIMAP` employs the well-established AnnData object structure, complemented by Dask and Zarr for efficient image loading in Napari. This approach facilitates seamless viewing of images with overlaid data layers, thus enabling effective analysis of large datasets. To date, `SCIMAP` has been used in the analysis of over 5 datasets from 8 tissue and cancer types [@yapp_multiplexed_2024; @nirmal_spatial_2022; @gaglia_lymphocyte_2023; @maliga_immune_2024]. @@ -43,7 +43,7 @@ Lastly, a single high-plex whole slide image can exceed 100GB per image and 10$^ ![SCIMAP Workflow Overview. The schematic highlights data import, cell classification, spatial analysis, and visualization techniques within the SCIMAP tool box.\label{fig:workflow}](figure-workflow.png) -`SCIMAP` operates on segmented single-cell data derived from imaging data using tools such as cellpose or MCMICRO. The essential inputs for `SCIMAP` are: (a) a single-cell expression matrix and (b) the X and Y coordinates for each cell. Additionally, multi-stack OME-TIFF or TIFF images can be optionally provided to enable visualization of the data analysis on the original raw images. +`SCIMAP` operates on segmented single-cell data derived from imaging data using tools such as cellpose [@stringer_cellpose_2021] or MCMICRO [@schapiro_mcmicro_2022]. The essential inputs for `SCIMAP` are: (a) a single-cell expression matrix and (b) the X and Y coordinates for each cell. Additionally, multi-stack OME-TIFF or TIFF images can be optionally provided to enable visualization of the data analysis on the original raw images. `SCIMAP` comprises of four main modules: preprocessing, analysis tools, visualization, and external methods. The preprocessing tools include functions for normalization, batch correction, and streamlined import from cloud processing pipelines such as MCMICRO [@schapiro_mcmicro_2022]. The analysis tools offer standard single-cell analysis techniques such as dimensionality reduction, clustering, prior knowledge-based cell phenotyping (a method through which cells are classified into specific cell types based on patterns of marker expression defined by the user), and various spatial analysis tools for measuring cellular distances, identifying regions of specific cell type aggregation, and assessing statistical differences in proximity scores or interaction frequencies. `SCIMAP` also includes neighborhood detection algorithms that utilize spatial-LDA [@wang_spatial_2007] for categorical data (cell types or clusters) and spatial lag for continuous data (marker expression values). All tools within the `SCIMAP` package for spatial analysis are compatible with both 2D and 3D data. Most analysis tools come with corresponding visualization functions to plot the results effectively. Additionally, the external methods module facilitates the integration of new tools developed by the community into `SCIMAP`, further extending its utility and applicability to both 2D and 3D data.