Skip to content

Commit

Permalink
add doc for XeniumReader and ensemblID to gene names (#75)
Browse files Browse the repository at this point in the history
* modify visium HD pooling logiic

* resolve cell_vit bug on windows

* HESTData: provide util to map ensemble ID to gene name (#71)

* HESTData: provide util to map ensemble ID to gene name

* HESTData: filter_na=False as default for ensemble ID mapping

* add unit tests and inplace arg

---------

Co-authored-by: Paul Doucet <[email protected]>

* add xenium_reader doc

* small correction

* remove test

---------

Co-authored-by: Konstantin Hemker <[email protected]>
  • Loading branch information
pauldoucet and konst-int-i authored Nov 23, 2024
1 parent 0512f57 commit c8bef8c
Show file tree
Hide file tree
Showing 7 changed files with 205 additions and 106 deletions.
12 changes: 12 additions & 0 deletions docs/source/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,18 @@
HESTData
```

## Pooling of transcripts, binning

```{eval-rst}
.. currentmodule:: hest.readers
.. autosummary::
:toctree: generated
pool_transcripts_xenium
pool_bins_visiumhd
```

## Batch effect visualization/correction

```{eval-rst}
Expand Down
43 changes: 40 additions & 3 deletions src/hest/HESTData.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,10 @@ def read_hest_wsi(wsi: WSI, width, height):

return SpatialData(tables=new_table, images=images, shapes=shapes)

def ensembleID_to_gene(self):
ensembleID_to_gene(self)


class VisiumHESTData(HESTData):
def __init__(self,
adata: sc.AnnData, # type: ignore
Expand Down Expand Up @@ -1239,11 +1243,44 @@ def unify_gene_names(adata: sc.AnnData, species="human", drop=False) -> sc.AnnDa

if drop:
adata = adata[:, ~remaining]

# TODO return dict map of renamed, and remaining

return adata

def ensembleID_to_gene(st: HESTData, filter_na = False) -> HESTData:
"""
Converts ensemble gene IDs of a HESTData object using Biomart annotations and filter out genes with no matching Ensembl ID
Args:
st (HESTData): HESTData object
filter_na (bool): whenever to filter genes that are not valid ensemble IDs. Defaults to False.
Returns:
HESTData: HESTData object with gene names instead of ensemble gene IDs
"""

import scanpy as sc
species = st.meta['species']
org = "hsapiens" if species == "Homo sapiens" else "mmusculus"

annotations = sc.queries.biomart_annotations(org=org,attrs=['ensembl_gene_id', 'external_gene_name'], use_cache=True)
ensembl_to_gene_name = dict(zip(annotations['ensembl_gene_id'], annotations['external_gene_name']))


st.adata.var['gene_name'] = st.adata.var_names.map(ensembl_to_gene_name, na_action=None)

if filter_na:
st.adata.var_names = st.adata.var['gene_name'].fillna('')
else:
st.adata.var['gene_name'] = st.adata.var['gene_name'].where(st.adata.var['gene_name'].notna(), st.adata.var_names)

valid_genes = st.adata.var['gene_name'].notna()
st.adata = st.adata[:, valid_genes]


return st


def save_spatial_plot(adata: sc.AnnData, save_path: str, name: str='', key='total_counts', pl_kwargs={}):
"""Save the spatial plot from that sc.AnnData
Expand All @@ -1255,7 +1292,7 @@ def save_spatial_plot(adata: sc.AnnData, save_path: str, name: str='', key='tota
"""
import scanpy as sc

fig = sc.pl.spatial(adata, show=None, img_key="downscaled_fullres", color=[key], title=f"in_tissue spots", return_fig=True, **pl_kwargs)
fig = sc.pl.spatial(adata, show=False, img_key="downscaled_fullres", color=[key], title=f"in_tissue spots", return_fig=True, **pl_kwargs)

filename = f"{name}spatial_plots.png"

Expand Down
5 changes: 3 additions & 2 deletions src/hest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .utils import tiff_save, find_pixel_size_from_spot_coords, write_10X_h5, get_k_genes, SpotPacking
from .autoalign import autoalign_visium
from .readers import *
from .HESTData import HESTData, read_HESTData, load_hest, iter_hest
from .HESTData import HESTData, read_HESTData, load_hest, iter_hest, ensembleID_to_gene
from .segmentation.cell_segmenters import segment_cellvit

__all__ = [
Expand All @@ -20,5 +20,6 @@
'autoalign_visium',
'write_10X_h5',
'HESTData',
'segment_cellvit'
'segment_cellvit',
'ensembleID_to_gene'
]
12 changes: 7 additions & 5 deletions src/hest/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,12 +308,12 @@ def preprocess_cells_visium_hd(
nuclei_path = None
) -> Tuple[sc.AnnData, gpd.GeoDataFrame, gpd.GeoDataFrame]:

segment_kwargs['save_dir'] = full_exp_dir


if nuclei_path is None:
segmenter = cell_segmenter_factory(segment_method)
logger.info('Segmenting cells...')
path_geojson = segmenter.segment_cells(he_wsi, '', pixel_size, **segment_kwargs)
path_geojson = segmenter.segment_cells(he_wsi, 'seg', pixel_size, save_dir=full_exp_dir, **segment_kwargs)
nuc_gdf = GeojsonCellReader().read_gdf(path_geojson)
else:
nuc_gdf = read_gdf(nuclei_path)
Expand Down Expand Up @@ -342,7 +342,8 @@ def process_meta_df(
segment_tissue=True,
registration_kwargs={},
read_kwargs={},
segment_kwargs={}
segment_kwargs={},
preprocess_kwargs={}
):
"""Internal use method, process all the raw ST data in the meta_df"""
for _, row in tqdm(meta_df.iterrows(), total=len(meta_df)):
Expand Down Expand Up @@ -404,21 +405,22 @@ def process_meta_df(
write_geojson(warped_cells, os.path.join(path, 'processed', f'he_cell_seg.geojson'), '', chunk=True)
write_geojson(warped_nuclei, os.path.join(path, 'processed', f'he_nucleus_seg.geojson'), '', chunk=True)
elif isinstance(st, VisiumHDHESTData):
segment_config = {'method': 'cellvit'}
segment_config = {}
binning_config = {}

bc_matrix_path = find_first_file_endswith(os.path.join(path, 'binned_outputs', 'square_002um'), 'filtered_feature_bc_matrix.h5')
bin_positions_path = find_first_file_endswith(os.path.join(path, 'binned_outputs', 'square_002um', 'spatial'), 'tissue_positions.parquet')

del st.wsi
preprocess_cells_visium_hd(
os.path.join(path, 'processed', ALIGNED_HE_FILENAME),
full_exp_dir,
row['id'],
st.pixel_size,
bc_matrix_path,
bin_positions_path,
segment_config,
binning_config,
**preprocess_kwargs
)

if isinstance(st, XeniumHESTData):
Expand Down
Loading

0 comments on commit c8bef8c

Please sign in to comment.