From 6a5e5cd780843e457d52dc68ef6ac1ed329788e4 Mon Sep 17 00:00:00 2001 From: Benjamin Rombaut Date: Wed, 8 Nov 2023 16:58:55 +0100 Subject: [PATCH 1/7] add macsima reader --- pyproject.toml | 2 + src/spatialdata_io/__init__.py | 2 + src/spatialdata_io/_constants/_constants.py | 10 + src/spatialdata_io/readers/_utils/_utils.py | 115 ++++++++++++ src/spatialdata_io/readers/macsima.py | 193 ++++++++++++++++++++ 5 files changed, 322 insertions(+) create mode 100644 src/spatialdata_io/readers/macsima.py diff --git a/pyproject.toml b/pyproject.toml index 58f6858a..397faff3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,8 @@ dependencies = [ "dask-image", "pyarrow", "readfcs", + "ome-types", + "aicsimageio", ] [project.optional-dependencies] diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py index 7b63c9d4..392c53e7 100644 --- a/src/spatialdata_io/__init__.py +++ b/src/spatialdata_io/__init__.py @@ -3,6 +3,7 @@ from spatialdata_io.readers.codex import codex from spatialdata_io.readers.cosmx import cosmx from spatialdata_io.readers.curio import curio +from spatialdata_io.readers.macsima import macsima from spatialdata_io.readers.mcmicro import mcmicro from spatialdata_io.readers.merscope import merscope from spatialdata_io.readers.steinbock import steinbock @@ -18,6 +19,7 @@ "mcmicro", "steinbock", "merscope", + "macsima", ] __version__ = version("spatialdata-io") diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py index c572fced..4e4bb937 100644 --- a/src/spatialdata_io/_constants/_constants.py +++ b/src/spatialdata_io/_constants/_constants.py @@ -18,6 +18,16 @@ class CodexKeys(ModeEnum): IMAGE_TIF = ".tif" +@unique +class MacsimaKeys(ModeEnum): + """Keys for *MACSima* formatted dataset.""" + + # images + METADATA_SUFFIX = ".QiPattern.txt" + IMAGE_OMETIF = ".ome.tif" + IMAGE_QPTIF = ".qptif" + + class CurioKeys(ModeEnum): """Keys for *Curio* formatted dataset.""" diff --git a/src/spatialdata_io/readers/_utils/_utils.py b/src/spatialdata_io/readers/_utils/_utils.py index 8b8a6225..4390d1f4 100644 --- a/src/spatialdata_io/readers/_utils/_utils.py +++ b/src/spatialdata_io/readers/_utils/_utils.py @@ -5,8 +5,13 @@ from typing import Any, Optional, Union import numpy as np +import spatialdata as sd +import zarr from anndata import AnnData, read_text from h5py import File +from ome_types import from_tiff +from ome_types.model import Pixels, UnitsLength +from spatialdata._logging import logger from spatialdata_io.readers._utils._read_10x_h5 import _read_10x_h5 @@ -66,3 +71,113 @@ def _read_counts( adata.uns["spatial"] = {library_id: {"metadata": {}}} # can overwrite return adata, library_id + + +def add_ome_attr(path: Path) -> None: + """Add OME metadata to Zarr store of SpatialData object.""" + path = Path(path) + assert path.exists() + # store = zarr.open(path, mode="r") + sdata = sd.SpatialData.read(path) + logger.debug(sdata) + # get subdirs + images = [p for p in path.glob("images/*") if p.is_dir()] + logger.debug(images) + # write the image data + for image in images: + name = image.stem + el = sdata.images[name] + if "scale0" in el: + el = el["scale0"] + logger.debug("picking scale0") + el = el[name] + logger.debug(el) + channel_names = list(el.coords["c"].data) + logger.debug(channel_names) + # get maximum possible value for dtype + dtype = el.dtype + max_value = np.iinfo(dtype).max + logger.debug(f"{dtype} {max_value}") + # store = parse_url(image, mode="w").store + # list of 7 basic colors + colors = ["ff0000", "00ff00", "0000ff", "ffff00", "00ffff", "ff00ff", "ffffff"] + channel_dicts = [ + { + "active": True if i < 3 else False, + "label": c, + "coefficient": 1, + "family": "linear", + "inverted": False, + # get one of the 7 colors, based on i + "color": colors[i % 7], + "window": { + "min": 0, + "start": 0, + "end": max_value, + "max": max_value, + }, + } + for i, c in enumerate(channel_names) + ] + root = zarr.open_group(image) + root.attrs.update( + { + "omero": { + "channels": channel_dicts, + "rdefs": { + "defaultT": 0, # First timepoint to show the user + "defaultZ": 0, # First Z section to show the user + "model": "color", # "color" or "greyscale" + }, + "name": name, + "version": "0.4", + } + } + ) + zarr.consolidate_metadata(path) + + +def calc_scale_factors(stack: Any, min_size: int = 1000, default_scale_factor: int = 2) -> list[int]: + """Calculate scale factors based on image size to get lowest resolution under min_size pixels.""" + # get lowest dimension, ignoring channels + lower_scale_limit = min(stack.shape[1:]) + scale_factor = default_scale_factor + scale_factors = [scale_factor] + lower_scale_limit /= scale_factor + while lower_scale_limit >= min_size: + # scale_factors are cumulative, so we don't need to do e.g. scale_factor *= 2 + scale_factors.append(scale_factor) + lower_scale_limit /= scale_factor + return scale_factors + + +def parse_channels(path: Path) -> list[str]: + """Parse channel names from an OME-TIFF file.""" + images = from_tiff(path).images + if len(images) > 1: + logger.warning("Found multiple images in OME-TIFF file. Only the first one will be used.") + channels = images[0].pixels.channels + logger.debug(channels) + names = [c.name for c in channels] + return names + + +def parse_physical_size(path: Path | None = None, ome_pixels: Pixels | None = None) -> float: + """Parse physical size from OME-TIFF to micrometer.""" + pixels = ome_pixels or from_tiff(path).images[0].pixels + logger.debug(pixels) + if pixels.physical_size_x_unit != pixels.physical_size_y_unit: + logger.error("Physical units for x and y dimensions are not the same.") + raise NotImplementedError + if pixels.physical_size_x != pixels.physical_size_y: + logger.error("Physical sizes for x and y dimensions are the same.") + raise NotImplementedError + # convert to micrometer if needed + if pixels.physical_size_x_unit == UnitsLength.NANOMETER: + physical_size = pixels.physical_size_x / 1000 + elif pixels.physical_size_x_unit == UnitsLength.MICROMETER: + physical_size = pixels.physical_size_x + else: + logger.error(f"Physical unit not recognized: '{pixels.physical_size_x_unit}'.") + raise NotImplementedError + return float(physical_size) diff --git a/src/spatialdata_io/readers/macsima.py b/src/spatialdata_io/readers/macsima.py new file mode 100644 index 00000000..03a4b92b --- /dev/null +++ b/src/spatialdata_io/readers/macsima.py @@ -0,0 +1,193 @@ +from __future__ import annotations + +from collections.abc import Mapping +from math import e +from pathlib import Path +from types import MappingProxyType +from typing import Any + +import pandas as pd +from dask_image.imread import imread +from spatialdata import SpatialData +from spatialdata._logging import logger + +from spatialdata_io._constants._constants import MacsimaKeys +from spatialdata_io._docs import inject_docs +from spatialdata_io.readers._utils._utils import parse_physical_size, calc_scale_factors + +from spatialdata._logging import logger +import spatialdata as sd +from ome_types import from_tiff +import dask.array as da +from aicsimageio import AICSImage + +__all__ = ["macsima"] + + +@inject_docs(vx=MacsimaKeys) +def macsima( + path: str | Path, + metadata: bool = True, + imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + subset: int | None = None, + c_subset: int | None = None, + max_chunk_size: int = 1024, + c_chunks_size: int = 1, + multiscale: bool = True, + transformations: bool = True, + scale_factors: list[int] | None = None, + default_scale_factor: int = 2, +) -> SpatialData: + """ + Read *MACSima* formatted dataset. + + This function reads images from a MACSima cyclic imaging experiment. Metadata of the cycles is either parsed from a metadata file or image names. + For parsing .qptiff files, installation of bioformats is adviced. + + .. seealso:: + + - `MACSima output `_. + + Parameters + ---------- + path + Path to the directory containing the data. + metadata + Whether to search for a .txt file with metadata in the folder. If False, the metadata in the image names is used. + imread_kwargs + Keyword arguments passed to :func:`dask_image.imread.imread`. + subset + Subset the image to the first ``subset`` pixels in x and y dimensions. + c_subset + Subset the image to the first ``c_subset`` channels. + max_chunk_size + Maximum chunk size for x and y dimensions. + c_chunks_size + Chunk size for c dimension. + multiscale + Whether to create a multiscale image. + transformations + Whether to add a transformation from pixels to microns to the image. + scale_factors + Scale factors to use for downsampling. If None, scale factors are calculated based on image size. + default_scale_factor + Default scale factor to use for downsampling. + + Returns + ------- + :class:`spatialdata.SpatialData` + """ + path = Path(path) + path_files = [] + pixels_to_microns = None + if metadata: + # read metadata to get list of images and channel names + path_files = list(path.glob(f'*{MacsimaKeys.METADATA_SUFFIX}')) + if len(path_files) > 0: + if len(path_files) > 1: + logger.warning(f"Cannot determine metadata file. Expecting a single file with format .txt. Got multiple files: {path_files}") + path_metadata = list(path.glob(f'*{MacsimaKeys.METADATA_SUFFIX}'))[0] + df = pd.read_csv(path_metadata, sep="\t", header=0, index_col=None) + logger.debug(df) + df['channel'] = df['ch1'].str.split(' ').str[0] + df['round_channel'] = df['Round'] + ' ' + df['channel'] + path_files = [path / p for p in df.filename.values] + assert all([p.exists() for p in path_files]), f"Cannot find all images in metadata file. Missing: {[p for p in path_files if not p.exists()]}" + round_channels = df.round_channel.values + stack, sorted_channels = get_stack(path_files, round_channels, imread_kwargs) + else: + logger.warning(f"Cannot find metadata file. Will try to parse from image names.") + if not metadata or len(path_files) == 0: + # get list of image paths, get channel name from OME data and cycle number from filename + # look for OME-TIFF files + ome_patt = f'*{MacsimaKeys.IMAGE_OMETIF}*' + path_files = list(path.glob(ome_patt)) + if not path_files: + # look for .qptiff files + qptif_patt = f'*{MacsimaKeys.IMAGE_QPTIF}*' + path_files = list(path.glob(qptif_patt)) + logger.debug(path_files) + if not path_files: + raise ValueError("Cannot determine data set. Expecting '{ome_patt}' or '{qptif_patt}' files") + # TODO: warning if not 1 ROI with 1 .qptiff per cycle + # TODO: robuster parsing of {name}_cycle{round}_{scan}.qptiff + rounds = [f"R{int(p.stem.split('_')[1][5:])}" for p in path_files] + # parse .qptiff files + imgs = [AICSImage(img, **imread_kwargs) for img in path_files] + # sort based on cycle number + rounds, imgs = zip(*sorted(zip(rounds, imgs), key=lambda x: int(x[0][1:]))) + channels_per_round = [img.channel_names for img in imgs] + # take first image and first channel to get physical size + ome_data = imgs[0].ome_metadata + logger.debug(ome_data) + pixels_to_microns = parse_physical_size(ome_pixels=ome_data.images[0].pixels) + da_per_round = [img.dask_data[0, :, 0, :, :] for img in imgs] + sorted_channels = [] + for r, cs in zip(rounds, channels_per_round): + for c in cs: + sorted_channels.append(f"{r} {c}") + stack = da.stack(da_per_round).squeeze() + # Parse OME XML + # img.ome_metadata + # arr = img.dask_data[0, :, 0, :, :] + # channel_names = img.channel_names + logger.debug(sorted_channels) + logger.debug(stack) + else: + logger.debug(path_files[0]) + # make sure not to remove round 0 when parsing! + rounds = [f"R{int(p.stem.split('_')[0])}" for p in path_files] + channels = [from_tiff(p).images[0].pixels.channels[0].name for p in path_files] + round_channels = [f"{r} {c}" for r, c in zip(rounds, channels)] + stack, sorted_channels = get_stack(path_files, round_channels, imread_kwargs) + + # do subsetting if needed + if subset: + stack = stack[:, :subset, :subset] + if c_subset: + stack = stack[:c_subset, :, :] + sorted_channels = sorted_channels[:c_subset] + if multiscale and not scale_factors: + scale_factors = calc_scale_factors(stack, default_scale_factor=default_scale_factor) + if not multiscale: + scale_factors = None + logger.debug(f"Scale factors: {scale_factors}") + + t_dict = None + if transformations: + pixels_to_microns = pixels_to_microns or parse_physical_size(path_files[0]) + t_pixels_to_microns = sd.transformations.Scale([pixels_to_microns, pixels_to_microns], axes=("x", "y")) + # 'microns' is also used in merscope example + # no inverse needed as the transformation is already from pixels to microns + t_dict = {"microns": t_pixels_to_microns} + # # chunk_size can be 1 for channels + chunks = { + "x": max_chunk_size, + "y": max_chunk_size, + "c": c_chunks_size, + } + stack = sd.models.Image2DModel.parse( + stack, + # TODO: make sure y and x locations are correct + dims=["c", "y", "x"], + scale_factors=scale_factors, + chunks=chunks, + c_coords=sorted_channels, + transformations=t_dict, + ) + sdata = sd.SpatialData(images={path.stem: stack}, table=None) + + return sdata + + +def get_stack(path_files: list[Path], round_channels: list[str], imread_kwargs: Mapping[str, Any])-> Any: + imgs_channels = list(zip(path_files, round_channels)) + logger.debug(imgs_channels) + # sort based on round number + imgs_channels = sorted(imgs_channels, key=lambda x: int(x[1].split(' ')[0][1:])) + logger.debug(f'Len imgs_channels: {len(imgs_channels)}') + # read in images and merge channels + sorted_paths, sorted_channels = list(zip(*imgs_channels)) + imgs = [imread(img, **imread_kwargs) for img in sorted_paths] + stack = da.stack(imgs).squeeze() + return stack, sorted_channels From 4a8d1fd9136f4d85ebf294064bd5f5335286f4e2 Mon Sep 17 00:00:00 2001 From: Benjamin Rombaut Date: Wed, 8 Nov 2023 16:58:55 +0100 Subject: [PATCH 2/7] add macsima reader --- pyproject.toml | 2 + src/spatialdata_io/__init__.py | 2 + src/spatialdata_io/_constants/_constants.py | 10 + src/spatialdata_io/readers/_utils/_utils.py | 115 +++++++++++- src/spatialdata_io/readers/macsima.py | 193 ++++++++++++++++++++ 5 files changed, 321 insertions(+), 1 deletion(-) create mode 100644 src/spatialdata_io/readers/macsima.py diff --git a/pyproject.toml b/pyproject.toml index 1e7c4c9f..d052fe12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,8 @@ dependencies = [ "dask-image", "pyarrow", "readfcs", + "ome-types", + "aicsimageio", ] [project.optional-dependencies] diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py index 9b107061..8832dac8 100644 --- a/src/spatialdata_io/__init__.py +++ b/src/spatialdata_io/__init__.py @@ -4,6 +4,7 @@ from spatialdata_io.readers.cosmx import cosmx from spatialdata_io.readers.curio import curio from spatialdata_io.readers.dbit import dbit +from spatialdata_io.readers.macsima import macsima from spatialdata_io.readers.mcmicro import mcmicro from spatialdata_io.readers.merscope import merscope from spatialdata_io.readers.steinbock import steinbock @@ -30,6 +31,7 @@ "xenium_explorer_selection", "dbit", "visium_hd", + "macsima", ] __version__ = version("spatialdata-io") diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py index fbbf0a52..8be824b3 100644 --- a/src/spatialdata_io/_constants/_constants.py +++ b/src/spatialdata_io/_constants/_constants.py @@ -18,6 +18,16 @@ class CodexKeys(ModeEnum): IMAGE_TIF = ".tif" +@unique +class MacsimaKeys(ModeEnum): + """Keys for *MACSima* formatted dataset.""" + + # images + METADATA_SUFFIX = ".QiPattern.txt" + IMAGE_OMETIF = ".ome.tif" + IMAGE_QPTIF = ".qptif" + + class CurioKeys(ModeEnum): """Keys for *Curio* formatted dataset.""" diff --git a/src/spatialdata_io/readers/_utils/_utils.py b/src/spatialdata_io/readers/_utils/_utils.py index 63f5bdf2..46a0b175 100644 --- a/src/spatialdata_io/readers/_utils/_utils.py +++ b/src/spatialdata_io/readers/_utils/_utils.py @@ -6,8 +6,13 @@ from typing import Any, Optional, Union import numpy as np +import spatialdata as sd +import zarr from anndata import AnnData, read_text from h5py import File +from ome_types import from_tiff +from ome_types.model import Pixels, UnitsLength +from spatialdata._logging import logger from spatialdata_io.readers._utils._read_10x_h5 import _read_10x_h5 @@ -68,7 +73,6 @@ def _read_counts( adata.uns["spatial"] = {library_id: {"metadata": {}}} # can overwrite return adata, library_id - def _initialize_raster_models_kwargs( image_models_kwargs: Mapping[str, Any], labels_models_kwargs: Mapping[str, Any] ) -> tuple[dict[str, Any], dict[str, Any]]: @@ -84,3 +88,112 @@ def _initialize_raster_models_kwargs( if "scale_factors" not in labels_models_kwargs: labels_models_kwargs["scale_factors"] = [2, 2, 2, 2] return image_models_kwargs, labels_models_kwargs + +def add_ome_attr(path: Path) -> None: + """Add OME metadata to Zarr store of SpatialData object.""" + path = Path(path) + assert path.exists() + # store = zarr.open(path, mode="r") + sdata = sd.SpatialData.read(path) + logger.debug(sdata) + # get subdirs + images = [p for p in path.glob("images/*") if p.is_dir()] + logger.debug(images) + # write the image data + for image in images: + name = image.stem + el = sdata.images[name] + if "scale0" in el: + el = el["scale0"] + logger.debug("picking scale0") + el = el[name] + logger.debug(el) + channel_names = list(el.coords["c"].data) + logger.debug(channel_names) + # get maximum possible value for dtype + dtype = el.dtype + max_value = np.iinfo(dtype).max + logger.debug(f"{dtype} {max_value}") + # store = parse_url(image, mode="w").store + # list of 7 basic colors + colors = ["ff0000", "00ff00", "0000ff", "ffff00", "00ffff", "ff00ff", "ffffff"] + channel_dicts = [ + { + "active": True if i < 3 else False, + "label": c, + "coefficient": 1, + "family": "linear", + "inverted": False, + # get one of the 7 colors, based on i + "color": colors[i % 7], + "window": { + "min": 0, + "start": 0, + "end": max_value, + "max": max_value, + }, + } + for i, c in enumerate(channel_names) + ] + root = zarr.open_group(image) + root.attrs.update( + { + "omero": { + "channels": channel_dicts, + "rdefs": { + "defaultT": 0, # First timepoint to show the user + "defaultZ": 0, # First Z section to show the user + "model": "color", # "color" or "greyscale" + }, + "name": name, + "version": "0.4", + } + } + ) + zarr.consolidate_metadata(path) + + +def calc_scale_factors(stack: Any, min_size: int = 1000, default_scale_factor: int = 2) -> list[int]: + """Calculate scale factors based on image size to get lowest resolution under min_size pixels.""" + # get lowest dimension, ignoring channels + lower_scale_limit = min(stack.shape[1:]) + scale_factor = default_scale_factor + scale_factors = [scale_factor] + lower_scale_limit /= scale_factor + while lower_scale_limit >= min_size: + # scale_factors are cumulative, so we don't need to do e.g. scale_factor *= 2 + scale_factors.append(scale_factor) + lower_scale_limit /= scale_factor + return scale_factors + + +def parse_channels(path: Path) -> list[str]: + """Parse channel names from an OME-TIFF file.""" + images = from_tiff(path).images + if len(images) > 1: + logger.warning("Found multiple images in OME-TIFF file. Only the first one will be used.") + channels = images[0].pixels.channels + logger.debug(channels) + names = [c.name for c in channels] + return names + + +def parse_physical_size(path: Path | None = None, ome_pixels: Pixels | None = None) -> float: + """Parse physical size from OME-TIFF to micrometer.""" + pixels = ome_pixels or from_tiff(path).images[0].pixels + logger.debug(pixels) + if pixels.physical_size_x_unit != pixels.physical_size_y_unit: + logger.error("Physical units for x and y dimensions are not the same.") + raise NotImplementedError + if pixels.physical_size_x != pixels.physical_size_y: + logger.error("Physical sizes for x and y dimensions are the same.") + raise NotImplementedError + # convert to micrometer if needed + if pixels.physical_size_x_unit == UnitsLength.NANOMETER: + physical_size = pixels.physical_size_x / 1000 + elif pixels.physical_size_x_unit == UnitsLength.MICROMETER: + physical_size = pixels.physical_size_x + else: + logger.error(f"Physical unit not recognized: '{pixels.physical_size_x_unit}'.") + raise NotImplementedError + return float(physical_size) diff --git a/src/spatialdata_io/readers/macsima.py b/src/spatialdata_io/readers/macsima.py new file mode 100644 index 00000000..03a4b92b --- /dev/null +++ b/src/spatialdata_io/readers/macsima.py @@ -0,0 +1,193 @@ +from __future__ import annotations + +from collections.abc import Mapping +from math import e +from pathlib import Path +from types import MappingProxyType +from typing import Any + +import pandas as pd +from dask_image.imread import imread +from spatialdata import SpatialData +from spatialdata._logging import logger + +from spatialdata_io._constants._constants import MacsimaKeys +from spatialdata_io._docs import inject_docs +from spatialdata_io.readers._utils._utils import parse_physical_size, calc_scale_factors + +from spatialdata._logging import logger +import spatialdata as sd +from ome_types import from_tiff +import dask.array as da +from aicsimageio import AICSImage + +__all__ = ["macsima"] + + +@inject_docs(vx=MacsimaKeys) +def macsima( + path: str | Path, + metadata: bool = True, + imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + subset: int | None = None, + c_subset: int | None = None, + max_chunk_size: int = 1024, + c_chunks_size: int = 1, + multiscale: bool = True, + transformations: bool = True, + scale_factors: list[int] | None = None, + default_scale_factor: int = 2, +) -> SpatialData: + """ + Read *MACSima* formatted dataset. + + This function reads images from a MACSima cyclic imaging experiment. Metadata of the cycles is either parsed from a metadata file or image names. + For parsing .qptiff files, installation of bioformats is adviced. + + .. seealso:: + + - `MACSima output `_. + + Parameters + ---------- + path + Path to the directory containing the data. + metadata + Whether to search for a .txt file with metadata in the folder. If False, the metadata in the image names is used. + imread_kwargs + Keyword arguments passed to :func:`dask_image.imread.imread`. + subset + Subset the image to the first ``subset`` pixels in x and y dimensions. + c_subset + Subset the image to the first ``c_subset`` channels. + max_chunk_size + Maximum chunk size for x and y dimensions. + c_chunks_size + Chunk size for c dimension. + multiscale + Whether to create a multiscale image. + transformations + Whether to add a transformation from pixels to microns to the image. + scale_factors + Scale factors to use for downsampling. If None, scale factors are calculated based on image size. + default_scale_factor + Default scale factor to use for downsampling. + + Returns + ------- + :class:`spatialdata.SpatialData` + """ + path = Path(path) + path_files = [] + pixels_to_microns = None + if metadata: + # read metadata to get list of images and channel names + path_files = list(path.glob(f'*{MacsimaKeys.METADATA_SUFFIX}')) + if len(path_files) > 0: + if len(path_files) > 1: + logger.warning(f"Cannot determine metadata file. Expecting a single file with format .txt. Got multiple files: {path_files}") + path_metadata = list(path.glob(f'*{MacsimaKeys.METADATA_SUFFIX}'))[0] + df = pd.read_csv(path_metadata, sep="\t", header=0, index_col=None) + logger.debug(df) + df['channel'] = df['ch1'].str.split(' ').str[0] + df['round_channel'] = df['Round'] + ' ' + df['channel'] + path_files = [path / p for p in df.filename.values] + assert all([p.exists() for p in path_files]), f"Cannot find all images in metadata file. Missing: {[p for p in path_files if not p.exists()]}" + round_channels = df.round_channel.values + stack, sorted_channels = get_stack(path_files, round_channels, imread_kwargs) + else: + logger.warning(f"Cannot find metadata file. Will try to parse from image names.") + if not metadata or len(path_files) == 0: + # get list of image paths, get channel name from OME data and cycle number from filename + # look for OME-TIFF files + ome_patt = f'*{MacsimaKeys.IMAGE_OMETIF}*' + path_files = list(path.glob(ome_patt)) + if not path_files: + # look for .qptiff files + qptif_patt = f'*{MacsimaKeys.IMAGE_QPTIF}*' + path_files = list(path.glob(qptif_patt)) + logger.debug(path_files) + if not path_files: + raise ValueError("Cannot determine data set. Expecting '{ome_patt}' or '{qptif_patt}' files") + # TODO: warning if not 1 ROI with 1 .qptiff per cycle + # TODO: robuster parsing of {name}_cycle{round}_{scan}.qptiff + rounds = [f"R{int(p.stem.split('_')[1][5:])}" for p in path_files] + # parse .qptiff files + imgs = [AICSImage(img, **imread_kwargs) for img in path_files] + # sort based on cycle number + rounds, imgs = zip(*sorted(zip(rounds, imgs), key=lambda x: int(x[0][1:]))) + channels_per_round = [img.channel_names for img in imgs] + # take first image and first channel to get physical size + ome_data = imgs[0].ome_metadata + logger.debug(ome_data) + pixels_to_microns = parse_physical_size(ome_pixels=ome_data.images[0].pixels) + da_per_round = [img.dask_data[0, :, 0, :, :] for img in imgs] + sorted_channels = [] + for r, cs in zip(rounds, channels_per_round): + for c in cs: + sorted_channels.append(f"{r} {c}") + stack = da.stack(da_per_round).squeeze() + # Parse OME XML + # img.ome_metadata + # arr = img.dask_data[0, :, 0, :, :] + # channel_names = img.channel_names + logger.debug(sorted_channels) + logger.debug(stack) + else: + logger.debug(path_files[0]) + # make sure not to remove round 0 when parsing! + rounds = [f"R{int(p.stem.split('_')[0])}" for p in path_files] + channels = [from_tiff(p).images[0].pixels.channels[0].name for p in path_files] + round_channels = [f"{r} {c}" for r, c in zip(rounds, channels)] + stack, sorted_channels = get_stack(path_files, round_channels, imread_kwargs) + + # do subsetting if needed + if subset: + stack = stack[:, :subset, :subset] + if c_subset: + stack = stack[:c_subset, :, :] + sorted_channels = sorted_channels[:c_subset] + if multiscale and not scale_factors: + scale_factors = calc_scale_factors(stack, default_scale_factor=default_scale_factor) + if not multiscale: + scale_factors = None + logger.debug(f"Scale factors: {scale_factors}") + + t_dict = None + if transformations: + pixels_to_microns = pixels_to_microns or parse_physical_size(path_files[0]) + t_pixels_to_microns = sd.transformations.Scale([pixels_to_microns, pixels_to_microns], axes=("x", "y")) + # 'microns' is also used in merscope example + # no inverse needed as the transformation is already from pixels to microns + t_dict = {"microns": t_pixels_to_microns} + # # chunk_size can be 1 for channels + chunks = { + "x": max_chunk_size, + "y": max_chunk_size, + "c": c_chunks_size, + } + stack = sd.models.Image2DModel.parse( + stack, + # TODO: make sure y and x locations are correct + dims=["c", "y", "x"], + scale_factors=scale_factors, + chunks=chunks, + c_coords=sorted_channels, + transformations=t_dict, + ) + sdata = sd.SpatialData(images={path.stem: stack}, table=None) + + return sdata + + +def get_stack(path_files: list[Path], round_channels: list[str], imread_kwargs: Mapping[str, Any])-> Any: + imgs_channels = list(zip(path_files, round_channels)) + logger.debug(imgs_channels) + # sort based on round number + imgs_channels = sorted(imgs_channels, key=lambda x: int(x[1].split(' ')[0][1:])) + logger.debug(f'Len imgs_channels: {len(imgs_channels)}') + # read in images and merge channels + sorted_paths, sorted_channels = list(zip(*imgs_channels)) + imgs = [imread(img, **imread_kwargs) for img in sorted_paths] + stack = da.stack(imgs).squeeze() + return stack, sorted_channels From 0bbfa8c9692c113617ca14832bc0f5af3f18e29a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 11 Jun 2024 15:21:34 +0000 Subject: [PATCH 3/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/macsima2m.py | 1103 ++++++++++--------- scripts/nfmacsima.py | 872 ++++++++------- src/spatialdata_io/readers/_utils/_utils.py | 2 + src/spatialdata_io/readers/macsima.py | 45 +- 4 files changed, 1018 insertions(+), 1004 deletions(-) diff --git a/scripts/macsima2m.py b/scripts/macsima2m.py index b691ba75..214208d8 100644 --- a/scripts/macsima2m.py +++ b/scripts/macsima2m.py @@ -1,101 +1,110 @@ #!/usr/bin/python -import ome_types -import numpy as np -import pandas as pd -from pathlib import Path -#from PIL import Image -#import PIL -#from PIL.TiffTags import TAGS -import tifffile as tifff -from bs4 import BeautifulSoup -import os -from uuid import uuid4 +import argparse import copy -from ome_types import from_tiff,to_xml -from ome_types.model import OME,Image,Instrument,Pixels,TiffData,Channel,Plane -import ome_types.model +import os import platform -import argparse - - -#---CLI-BLOCK---# -parser=argparse.ArgumentParser() - - -parser.add_argument('-i', - '--input', - required=True, - help='Directory containing the antigen & bleaching cycles. Use frontslash to specify path.' - ) +from pathlib import Path +from uuid import uuid4 -parser.add_argument('-o', - '--output', - required=True, - help='Directory where the stacks will be saved. Use frontslash to specify path.\ - If directory does not exist it will be created.' - ) +import numpy as np +import ome_types +import ome_types.model +import pandas as pd -parser.add_argument('-c', - '--cycles', - required=True, - type=int, - nargs='*', - help='By default this input accepts two integer numbers which mark the \ +# from PIL import Image +# import PIL +# from PIL.TiffTags import TAGS +import tifffile as tifff +from bs4 import BeautifulSoup +from ome_types import to_xml +from ome_types.model import OME, Channel, Image, Pixels, Plane, TiffData + +# ---CLI-BLOCK---# +parser = argparse.ArgumentParser() + + +parser.add_argument( + "-i", + "--input", + required=True, + help="Directory containing the antigen & bleaching cycles. Use frontslash to specify path.", +) + +parser.add_argument( + "-o", + "--output", + required=True, + help="Directory where the stacks will be saved. Use frontslash to specify path.\ + If directory does not exist it will be created.", +) + +parser.add_argument( + "-c", + "--cycles", + required=True, + type=int, + nargs="*", + help="By default this input accepts two integer numbers which mark the \ start and end of the cycles to be taken. Alternatively, if the flag -il is activated \ - this input will accept a list of any number of specific cycles.' - ) - - -parser.add_argument('-ofl', - '--output_folders_list', - action='store_true', - help='Activate this flag to save the output images in a list of folders rather than in a tree structure. The list structure facilitates pointing to the files to run a batch job.' - ) - -parser.add_argument('-il', - '--input_mode_list', - action='store_true', - help='Activate this flag to provide the cycles argument a list of specific cycles of interest.' - ) - -parser.add_argument('-he', - '--hi_exposure_only', - action='store_true', - help='Activate this flag to extract only the set of images with the highest exposure time.' - ) - -parser.add_argument('-nb', - '--no_bleach_cycles', - action='store_false', - help='Activate this flag to deactivate the extraction of the bleaching cycles, i.e \ - only the antigen images will be extracted.' - ) - -args=parser.parse_args() -#---END_CLI-BLOCK---# - - - -#------INPUT BLOCK-------# -device_name='MACSIMA' -cycles_dir=Path(args.input) -stack_path=Path(args.output) -condition=args.input_mode_list - -if (condition==False and len(args.cycles)==2): - start=min(args.cycles) - end=max(args.cycles) - antigen_cycle_number=list(range(start,1+end)) - -elif condition==True: - antigen_cycle_number=args.cycles + this input will accept a list of any number of specific cycles.", +) + + +parser.add_argument( + "-ofl", + "--output_folders_list", + action="store_true", + help="Activate this flag to save the output images in a list of folders rather than in a tree structure. The list structure facilitates pointing to the files to run a batch job.", +) + +parser.add_argument( + "-il", + "--input_mode_list", + action="store_true", + help="Activate this flag to provide the cycles argument a list of specific cycles of interest.", +) + +parser.add_argument( + "-he", + "--hi_exposure_only", + action="store_true", + help="Activate this flag to extract only the set of images with the highest exposure time.", +) + +parser.add_argument( + "-nb", + "--no_bleach_cycles", + action="store_false", + help="Activate this flag to deactivate the extraction of the bleaching cycles, i.e \ + only the antigen images will be extracted.", +) + +args = parser.parse_args() +# ---END_CLI-BLOCK---# + + +# ------INPUT BLOCK-------# +device_name = "MACSIMA" +cycles_dir = Path(args.input) +stack_path = Path(args.output) +condition = args.input_mode_list + +if condition == False and len(args.cycles) == 2: + start = min(args.cycles) + end = max(args.cycles) + antigen_cycle_number = list(range(start, 1 + end)) + +elif condition == True: + antigen_cycle_number = args.cycles else: - print('Wrong input, try one of the following: \n', - '1) Range mode: Give only two numbers to the cycles argument to define the start and end of cycles to be stacked.\n', - '2) List mode: Activate the optional argument -il in the command line so the numbers are read as a list of specific cycles.') + print( + "Wrong input, try one of the following: \n", + "1) Range mode: Give only two numbers to the cycles argument to define the start and end of cycles to be stacked.\n", + "2) List mode: Activate the optional argument -il in the command line so the numbers are read as a list of specific cycles.", + ) -#------ ENDINPUT BLOCK----# +# ------ ENDINPUT BLOCK----# if os.path.exists(stack_path): pass @@ -103,543 +112,549 @@ os.mkdir(stack_path) -#---- HELPER FUNCTIONS ----# - -def antigen_cycle_info(antigen_cycle_no=antigen_cycle_number,cycles_path=cycles_dir,ref_marker='DAPI'): - antigen_cycle=f'{antigen_cycle_no:03d}' - cycle_folder='_'.join([antigen_cycle,'AntigenCycle']) - workdir=os.path.join(cycles_path,cycle_folder) - images=list(filter(lambda x: x.endswith('.tif'),os.listdir(workdir))) - cycle_info={'img_full_path':[], - 'image':images, - 'marker':[], - 'filter':[], - 'rack':[], - 'well':[], - 'roi':[], - 'fov':[], - 'exposure':[] - } +# ---- HELPER FUNCTIONS ----# + + +def antigen_cycle_info(antigen_cycle_no=antigen_cycle_number, cycles_path=cycles_dir, ref_marker="DAPI"): + antigen_cycle = f"{antigen_cycle_no:03d}" + cycle_folder = "_".join([antigen_cycle, "AntigenCycle"]) + workdir = os.path.join(cycles_path, cycle_folder) + images = list(filter(lambda x: x.endswith(".tif"), os.listdir(workdir))) + cycle_info = { + "img_full_path": [], + "image": images, + "marker": [], + "filter": [], + "rack": [], + "well": [], + "roi": [], + "fov": [], + "exposure": [], + } for im in images: - - marker_info=im.split('AntigenCycle')[-1].split('__') - acq_info=im.split('sensor')[-1].split('_') - #add the information to the cycle_info dictionary - #img full path - cycle_info['img_full_path'].append(os.path.join(workdir,im)) - #marker and fluorophore () - m=marker_info[0].strip('_') + + marker_info = im.split("AntigenCycle")[-1].split("__") + acq_info = im.split("sensor")[-1].split("_") + # add the information to the cycle_info dictionary + # img full path + cycle_info["img_full_path"].append(os.path.join(workdir, im)) + # marker and fluorophore () + m = marker_info[0].strip("_") if ref_marker in m: - cycle_info['marker'].append(ref_marker) - cycle_info['filter'].append(ref_marker) + cycle_info["marker"].append(ref_marker) + cycle_info["filter"].append(ref_marker) else: - cycle_info['marker'].append(m) - #cycle_info['filter'].append(marker_info[-1].split('_')[2]) - cycle_info['filter'].append(marker_info[-1].split('bit')[0].split("_")[-2]) - - #rack - cycle_info['rack'].append(acq_info[2].split('-')[-1]) - #well - cycle_info['well'].append(acq_info[3].split('-')[-1]) - #roi - cycle_info['roi'].append(acq_info[4].split('-')[-1]) - #fov, i.e. tiles - cycle_info['fov'].append(acq_info[5].split('-')[-1]) - #exposure - exposure=cycle_info['exposure'].append(acq_info[6].split('-')[-1].strip('.tif')) - - info=pd.DataFrame(cycle_info) - - - markers= info['marker'].unique() - markers_subset=np.setdiff1d(markers,[ref_marker]) - info.insert(len(cycle_info),'exposure_level',np.zeros(info.shape[0])) - info.loc[ info['marker']==ref_marker, 'exposure_level']='ref' - #info.to_csv(os.path.join(cycles_path,'test_antigen.csv')) - + cycle_info["marker"].append(m) + # cycle_info['filter'].append(marker_info[-1].split('_')[2]) + cycle_info["filter"].append(marker_info[-1].split("bit")[0].split("_")[-2]) + + # rack + cycle_info["rack"].append(acq_info[2].split("-")[-1]) + # well + cycle_info["well"].append(acq_info[3].split("-")[-1]) + # roi + cycle_info["roi"].append(acq_info[4].split("-")[-1]) + # fov, i.e. tiles + cycle_info["fov"].append(acq_info[5].split("-")[-1]) + # exposure + exposure = cycle_info["exposure"].append(acq_info[6].split("-")[-1].strip(".tif")) + + info = pd.DataFrame(cycle_info) + + markers = info["marker"].unique() + markers_subset = np.setdiff1d(markers, [ref_marker]) + info.insert(len(cycle_info), "exposure_level", np.zeros(info.shape[0])) + info.loc[info["marker"] == ref_marker, "exposure_level"] = "ref" + # info.to_csv(os.path.join(cycles_path,'test_antigen.csv')) for m in markers_subset: - exposure=info.loc[info['marker']==m]['exposure'].unique() - val=pd.to_numeric(exposure) - val=np.sort(val) - for level,value in enumerate(val): - info.loc[ (info['marker']==m) & (info['exposure']==str(value)), 'exposure_level']=level+1 - - - - info['rack']=pd.to_numeric(info['rack'],downcast='unsigned') - info['well']=pd.to_numeric(info['well'],downcast='unsigned') - info['roi']=pd.to_numeric(info['roi'],downcast='unsigned') - info['fov']=pd.to_numeric(info['fov'],downcast='unsigned') - info['exposure']=pd.to_numeric(info['exposure'],downcast='unsigned') - + exposure = info.loc[info["marker"] == m]["exposure"].unique() + val = pd.to_numeric(exposure) + val = np.sort(val) + for level, value in enumerate(val): + info.loc[(info["marker"] == m) & (info["exposure"] == str(value)), "exposure_level"] = level + 1 + + info["rack"] = pd.to_numeric(info["rack"], downcast="unsigned") + info["well"] = pd.to_numeric(info["well"], downcast="unsigned") + info["roi"] = pd.to_numeric(info["roi"], downcast="unsigned") + info["fov"] = pd.to_numeric(info["fov"], downcast="unsigned") + info["exposure"] = pd.to_numeric(info["exposure"], downcast="unsigned") + return info -def bleach_cycle_info(antigen_cycle_no=antigen_cycle_number,cycles_path=cycles_dir,ref_marker='DAPI'): - bleach_cycle_no=antigen_cycle_no - bleach_cycle=f'{bleach_cycle_no:03d}' - cycle_folder='_'.join([bleach_cycle,'BleachCycle']) - workdir=os.path.join(cycles_path,cycle_folder) - images=list(filter(lambda x: x.endswith('.tif'),os.listdir(workdir))) - cycle_info={'img_full_path':[], - 'image':images, - 'marker':[], - 'filter':[], - 'rack':[], - 'well':[], - 'roi':[], - 'fov':[], - 'exposure':[] - } +def bleach_cycle_info(antigen_cycle_no=antigen_cycle_number, cycles_path=cycles_dir, ref_marker="DAPI"): + bleach_cycle_no = antigen_cycle_no + bleach_cycle = f"{bleach_cycle_no:03d}" + cycle_folder = "_".join([bleach_cycle, "BleachCycle"]) + workdir = os.path.join(cycles_path, cycle_folder) + images = list(filter(lambda x: x.endswith(".tif"), os.listdir(workdir))) + cycle_info = { + "img_full_path": [], + "image": images, + "marker": [], + "filter": [], + "rack": [], + "well": [], + "roi": [], + "fov": [], + "exposure": [], + } for im in images: - - marker_info=im.split('BleachCycle')[-1].split('_') - acq_info=im.split('sensor')[-1].split('_') - #add the information to the cycle_info dictionary - #img full path - cycle_info['img_full_path'].append(os.path.join(workdir,im)) - #marker and fluorophore - #marker_info=['', 'DAPI', 'V0', 'DAPI', '16bit', 'M-20x-S Fluor full sensor', 'B-1', 'R-1', 'W-2', 'G-1', 'F-10', 'E-16.0.tif'] - m=marker_info[1] - if m==ref_marker: - cycle_info['marker'].append(ref_marker) - else: - cycle_info['marker'].append('_'.join([bleach_cycle,'bleach',marker_info[3]])) - cycle_info['filter'].append(marker_info[3]) - #rack - cycle_info['rack'].append(acq_info[2].split('-')[-1]) - #well - cycle_info['well'].append(acq_info[3].split('-')[-1]) - #roi - cycle_info['roi'].append(acq_info[4].split('-')[-1]) - #fov, i.e. tiles - cycle_info['fov'].append(acq_info[5].split('-')[-1]) - #exposure - exposure=cycle_info['exposure'].append(acq_info[6].split('-')[-1].strip('.tif')) - - info=pd.DataFrame(cycle_info) - - - markers= info['filter'].unique() - markers_subset=np.setdiff1d(markers,[ref_marker]) - info.insert(len(cycle_info),'exposure_level',np.zeros(info.shape[0])) - info.loc[ info['marker']==ref_marker, 'exposure_level']='ref' - #info.to_csv(os.path.join(cycles_path,'test_bleach.csv')) + marker_info = im.split("BleachCycle")[-1].split("_") + acq_info = im.split("sensor")[-1].split("_") + # add the information to the cycle_info dictionary + # img full path + cycle_info["img_full_path"].append(os.path.join(workdir, im)) + # marker and fluorophore + # marker_info=['', 'DAPI', 'V0', 'DAPI', '16bit', 'M-20x-S Fluor full sensor', 'B-1', 'R-1', 'W-2', 'G-1', 'F-10', 'E-16.0.tif'] + m = marker_info[1] + if m == ref_marker: + cycle_info["marker"].append(ref_marker) + else: + cycle_info["marker"].append("_".join([bleach_cycle, "bleach", marker_info[3]])) + cycle_info["filter"].append(marker_info[3]) + # rack + cycle_info["rack"].append(acq_info[2].split("-")[-1]) + # well + cycle_info["well"].append(acq_info[3].split("-")[-1]) + # roi + cycle_info["roi"].append(acq_info[4].split("-")[-1]) + # fov, i.e. tiles + cycle_info["fov"].append(acq_info[5].split("-")[-1]) + # exposure + exposure = cycle_info["exposure"].append(acq_info[6].split("-")[-1].strip(".tif")) + + info = pd.DataFrame(cycle_info) + + markers = info["filter"].unique() + markers_subset = np.setdiff1d(markers, [ref_marker]) + info.insert(len(cycle_info), "exposure_level", np.zeros(info.shape[0])) + info.loc[info["marker"] == ref_marker, "exposure_level"] = "ref" + # info.to_csv(os.path.join(cycles_path,'test_bleach.csv')) for m in markers_subset: - exposure=info.loc[info['filter']==m]['exposure'].unique() - val=pd.to_numeric(exposure) - val=np.sort(val) - for level,value in enumerate(val): - info.loc[ (info['filter']==m) & (info['exposure']==str(value)), 'exposure_level']=level+1 - - - - info['rack']=pd.to_numeric(info['rack'],downcast='unsigned') - info['well']=pd.to_numeric(info['well'],downcast='unsigned') - info['roi']=pd.to_numeric(info['roi'],downcast='unsigned') - info['fov']=pd.to_numeric(info['fov'],downcast='unsigned') - info['exposure']=pd.to_numeric(info['exposure'],downcast='unsigned') - + exposure = info.loc[info["filter"] == m]["exposure"].unique() + val = pd.to_numeric(exposure) + val = np.sort(val) + for level, value in enumerate(val): + info.loc[(info["filter"] == m) & (info["exposure"] == str(value)), "exposure_level"] = level + 1 + + info["rack"] = pd.to_numeric(info["rack"], downcast="unsigned") + info["well"] = pd.to_numeric(info["well"], downcast="unsigned") + info["roi"] = pd.to_numeric(info["roi"], downcast="unsigned") + info["fov"] = pd.to_numeric(info["fov"], downcast="unsigned") + info["exposure"] = pd.to_numeric(info["exposure"], downcast="unsigned") + return info + def create_dir(dir_path): if os.path.exists(dir_path): pass else: os.mkdir(dir_path) -def create_ome(img_info,xy_tile_positions_units,img_path): - img_name=img_info['name'] - device_name=img_info['device'] - no_of_channels=img_info['no_channels'] - img_size=img_info['xy_img_size_pix'] - markers=img_info['markers'] - exposures=img_info['exposure_times'] - bit_depth=img_info['bit_depth'] - pixel_size=img_info['pix_size'] - pixel_units=img_info['pix_units'] - sig_bits=img_info['sig_bits'] - if pixel_units=='mm': - pixel_size=1000*pixel_size - #pixel_units='um' - no_of_tiles=len(xy_tile_positions_units) - tifff.tiffcomment(img_path,'') - #--Generate tiff_data_blocks--# - tiff_block=[] - UUID=ome_types.model.TiffData.UUID(file_name=img_name,value=uuid4().urn) - for ch in range(0,no_of_channels): - tiff_block.append(TiffData(first_c=ch, - ifd=ch, - plane_count=1, - uuid=UUID - ) - - ) - #--Generate planes block (contains the information of each tile)--# - plane_block=[] - #length_units=ome_types.model.simple_types.UnitsLength('µm') - for ch in range(0,no_of_channels): - plane_block.append(Plane(the_c=ch, - the_t=0, - the_z=0, - position_x=0,#x=0 is just a place holder - position_y=0,#y=0 is just a place holder - position_z=0, - exposure_time=0 - #position_x_unit=pixel_units, - #position_y_unit=pixel_units - ) - ) - #--Generate channels block--# - chann_block=[] - for ch in range(0,no_of_channels): - chann_block.append(Channel(id=f'Channel:{ch}', - color=ome_types.model.Color((255,255,255)), - emission_wavelength=1,#place holder - excitation_wavelength=1,#place holder - ) - ) - #--Generate pixels block--# - pix_block=[] - ifd_counter=0 - for t in range(0,no_of_tiles): - template_plane_block=copy.deepcopy(plane_block) - template_chann_block=copy.deepcopy(chann_block) - template_tiffdata_block=copy.deepcopy(tiff_block) - for ch,mark in enumerate(markers): - template_plane_block[ch].position_x=xy_tile_positions_units[t][0] - template_plane_block[ch].position_y=xy_tile_positions_units[t][1] - template_plane_block[ch].exposure_time=exposures[ch] - template_chann_block[ch].id='Channel:{y}:{x}:{marker_name}'.format(x=ch,y=100+t,marker_name=mark) - template_tiffdata_block[ch].ifd=ifd_counter - ifd_counter+=1 - pix_block.append(Pixels(id=f'Pixels:{t}', - dimension_order=ome_types.model.Pixels_DimensionOrder('XYCZT'), - size_c=no_of_channels, - size_t=1, - size_x=img_size[0], - size_y=img_size[1], - size_z=1, - type=bit_depth, - big_endian=False, - channels=template_chann_block, - interleaved=False, - physical_size_x=pixel_size, - #physical_size_x_unit=pixel_units, - physical_size_y=pixel_size, - #physical_size_y_unit=pixel_units, - physical_size_z=1.0, - planes=template_plane_block, - significant_bits=sig_bits, - tiff_data_blocks=template_tiffdata_block - ) - ) - #--Generate image block--# - img_block=[] - for t in range(0,no_of_tiles): - img_block.append(Image(id=f'Image:{t}', - pixels=pix_block[t] - ) - ) - #--Create the OME object with all prebiously defined blocks--# - ome_custom=OME() - ome_custom.creator=" ".join([ome_types.__name__, - ome_types.__version__, - '/ python version-', - platform.python_version() - ] - ) - ome_custom.images=img_block - ome_custom.uuid=uuid4().urn - ome_xml=to_xml(ome_custom) - tifff.tiffcomment(img_path,ome_xml) - -def setup_coords(x,y,pix_units): - if pix_units=='mm': - x_norm=1000*(np.array(x)-np.min(x))#/pixel_size - y_norm=1000*(np.array(y)-np.min(y))#/pixel_size - x_norm=np.rint(x_norm).astype('int') - y_norm=np.rint(y_norm).astype('int') - #invert y - y_norm=np.max(y_norm)-y_norm - xy_tile_positions=[(i, j) for i,j in zip(x_norm,y_norm)] + +def create_ome(img_info, xy_tile_positions_units, img_path): + img_name = img_info["name"] + img_info["device"] + no_of_channels = img_info["no_channels"] + img_size = img_info["xy_img_size_pix"] + markers = img_info["markers"] + exposures = img_info["exposure_times"] + bit_depth = img_info["bit_depth"] + pixel_size = img_info["pix_size"] + pixel_units = img_info["pix_units"] + sig_bits = img_info["sig_bits"] + if pixel_units == "mm": + pixel_size = 1000 * pixel_size + # pixel_units='um' + no_of_tiles = len(xy_tile_positions_units) + tifff.tiffcomment(img_path, "") + # --Generate tiff_data_blocks--# + tiff_block = [] + UUID = ome_types.model.TiffData.UUID(file_name=img_name, value=uuid4().urn) + for ch in range(0, no_of_channels): + tiff_block.append(TiffData(first_c=ch, ifd=ch, plane_count=1, uuid=UUID)) + # --Generate planes block (contains the information of each tile)--# + plane_block = [] + # length_units=ome_types.model.simple_types.UnitsLength('µm') + for ch in range(0, no_of_channels): + plane_block.append( + Plane( + the_c=ch, + the_t=0, + the_z=0, + position_x=0, # x=0 is just a place holder + position_y=0, # y=0 is just a place holder + position_z=0, + exposure_time=0, + # position_x_unit=pixel_units, + # position_y_unit=pixel_units + ) + ) + # --Generate channels block--# + chann_block = [] + for ch in range(0, no_of_channels): + chann_block.append( + Channel( + id=f"Channel:{ch}", + color=ome_types.model.Color((255, 255, 255)), + emission_wavelength=1, # place holder + excitation_wavelength=1, # place holder + ) + ) + # --Generate pixels block--# + pix_block = [] + ifd_counter = 0 + for t in range(0, no_of_tiles): + template_plane_block = copy.deepcopy(plane_block) + template_chann_block = copy.deepcopy(chann_block) + template_tiffdata_block = copy.deepcopy(tiff_block) + for ch, mark in enumerate(markers): + template_plane_block[ch].position_x = xy_tile_positions_units[t][0] + template_plane_block[ch].position_y = xy_tile_positions_units[t][1] + template_plane_block[ch].exposure_time = exposures[ch] + template_chann_block[ch].id = f"Channel:{100 + t}:{ch}:{mark}" + template_tiffdata_block[ch].ifd = ifd_counter + ifd_counter += 1 + pix_block.append( + Pixels( + id=f"Pixels:{t}", + dimension_order=ome_types.model.Pixels_DimensionOrder("XYCZT"), + size_c=no_of_channels, + size_t=1, + size_x=img_size[0], + size_y=img_size[1], + size_z=1, + type=bit_depth, + big_endian=False, + channels=template_chann_block, + interleaved=False, + physical_size_x=pixel_size, + # physical_size_x_unit=pixel_units, + physical_size_y=pixel_size, + # physical_size_y_unit=pixel_units, + physical_size_z=1.0, + planes=template_plane_block, + significant_bits=sig_bits, + tiff_data_blocks=template_tiffdata_block, + ) + ) + # --Generate image block--# + img_block = [] + for t in range(0, no_of_tiles): + img_block.append(Image(id=f"Image:{t}", pixels=pix_block[t])) + # --Create the OME object with all prebiously defined blocks--# + ome_custom = OME() + ome_custom.creator = " ".join( + [ome_types.__name__, ome_types.__version__, "/ python version-", platform.python_version()] + ) + ome_custom.images = img_block + ome_custom.uuid = uuid4().urn + ome_xml = to_xml(ome_custom) + tifff.tiffcomment(img_path, ome_xml) + + +def setup_coords(x, y, pix_units): + if pix_units == "mm": + x_norm = 1000 * (np.array(x) - np.min(x)) # /pixel_size + y_norm = 1000 * (np.array(y) - np.min(y)) # /pixel_size + x_norm = np.rint(x_norm).astype("int") + y_norm = np.rint(y_norm).astype("int") + # invert y + y_norm = np.max(y_norm) - y_norm + xy_tile_positions = [(i, j) for i, j in zip(x_norm, y_norm)] return xy_tile_positions + def tile_position(metadata_string): - #meta_dict = {TAGS[key] : img.tag[key] for key in img.tag_v2} - #ome=BeautifulSoup(meta_dict['ImageDescription'][0],'xml') + # meta_dict = {TAGS[key] : img.tag[key] for key in img.tag_v2} + # ome=BeautifulSoup(meta_dict['ImageDescription'][0],'xml') # TODO replace with ome_types - ome=BeautifulSoup(metadata_string,'xml') - x=float(ome.StageLabel["X"]) - y=float(ome.StageLabel["Y"]) - stage_units=ome.StageLabel["XUnit"] - pixel_size=float(ome.Pixels['PhysicalSizeX']) - pixel_units=ome.Pixels['PhysicalSizeXUnit'] - bit_depth=ome.Pixels['Type'] - significantBits=int(ome.Pixels['SignificantBits']) - tile_info={'x':x, - 'y':y, - 'stage_units':stage_units, - 'pixel_size':pixel_size, - 'pixel_units':pixel_units, - 'bit_depth':bit_depth, - 'sig_bits':significantBits - } + ome = BeautifulSoup(metadata_string, "xml") + x = float(ome.StageLabel["X"]) + y = float(ome.StageLabel["Y"]) + stage_units = ome.StageLabel["XUnit"] + pixel_size = float(ome.Pixels["PhysicalSizeX"]) + pixel_units = ome.Pixels["PhysicalSizeXUnit"] + bit_depth = ome.Pixels["Type"] + significantBits = int(ome.Pixels["SignificantBits"]) + tile_info = { + "x": x, + "y": y, + "stage_units": stage_units, + "pixel_size": pixel_size, + "pixel_units": pixel_units, + "bit_depth": bit_depth, + "sig_bits": significantBits, + } return tile_info -def create_stack(info,exp_level=1, - antigen_cycle_no=antigen_cycle_number, - cycles_path=cycles_dir, - isbleach=False, - offset=0, - device=device_name, - ref_marker='DAPI', - results_path=stack_path): - - racks=info['rack'].unique() - wells=info['well'].unique() - antigen_cycle=f'{antigen_cycle_no:03d}' + +def create_stack( + info, + exp_level=1, + antigen_cycle_no=antigen_cycle_number, + cycles_path=cycles_dir, + isbleach=False, + offset=0, + device=device_name, + ref_marker="DAPI", + results_path=stack_path, +): + + racks = info["rack"].unique() + wells = info["well"].unique() + antigen_cycle = f"{antigen_cycle_no:03d}" if isbleach: - cycle_prefix='Bleach' + cycle_prefix = "Bleach" else: - cycle_prefix='Antigen' - - cycle_folder='_'.join([antigen_cycle,cycle_prefix+'Cycle']) - workdir=os.path.join(cycles_path,cycle_folder) - #img_ref=PIL.Image.open(info['img_full_path'][0]) - #width=img_ref.width - #height=img_ref.height - #dtype_ref=tifff.imread(info['img_full_path'][0]).dtype - with tifff.TiffFile(info['img_full_path'][0]) as tif: - img_ref=tif.pages[0].asarray() - width=img_ref.shape[1] - height=img_ref.shape[0] - dtype_ref=img_ref.dtype - ref_marker= list(info.loc[ info['exposure_level']=='ref', 'marker'])[0] - markers= info['marker'].unique() - markers_subset=np.setdiff1d(markers,[ref_marker]) - sorted_markers=np.insert(markers_subset,0,ref_marker) - sorted_filters=[] + cycle_prefix = "Antigen" + + cycle_folder = "_".join([antigen_cycle, cycle_prefix + "Cycle"]) + os.path.join(cycles_path, cycle_folder) + # img_ref=PIL.Image.open(info['img_full_path'][0]) + # width=img_ref.width + # height=img_ref.height + # dtype_ref=tifff.imread(info['img_full_path'][0]).dtype + with tifff.TiffFile(info["img_full_path"][0]) as tif: + img_ref = tif.pages[0].asarray() + width = img_ref.shape[1] + height = img_ref.shape[0] + dtype_ref = img_ref.dtype + ref_marker = list(info.loc[info["exposure_level"] == "ref", "marker"])[0] + markers = info["marker"].unique() + markers_subset = np.setdiff1d(markers, [ref_marker]) + sorted_markers = np.insert(markers_subset, 0, ref_marker) + sorted_filters = [] for m in sorted_markers: - sorted_filters.append(info.loc[info['marker']==m,'filter'].tolist()[0]) + sorted_filters.append(info.loc[info["marker"] == m, "filter"].tolist()[0]) if isbleach: - target_name='filters' - target='_'.join(sorted_filters) + target_name = "filters" + target = "_".join(sorted_filters) else: - target_name='markers' - target='_'.join(sorted_markers) + target_name = "markers" + target = "_".join(sorted_markers) - for r in racks: - output_levels=[] - rack_no='rack_{n}'.format(n=f'{r:02d}') + output_levels = [] + rack_no = "rack_{n}".format(n=f"{r:02d}") output_levels.append(rack_no) - #rack_path=stack_path / 'rack_{n}'.format(n=f'{r:02d}') - #create_dir(rack_path) + # rack_path=stack_path / 'rack_{n}'.format(n=f'{r:02d}') + # create_dir(rack_path) for w in wells: - well_no='well_{n}'.format(n=f'{w:02d}') + well_no = "well_{n}".format(n=f"{w:02d}") output_levels.append(well_no) - #well_path=rack_path / 'well_{n}'.format(n=f'{w:02d}') - #create_dir(well_path) - - groupA=info.loc[(info['rack']==r) & (info['well']==w)] - rois=groupA['roi'].unique() - + # well_path=rack_path / 'well_{n}'.format(n=f'{w:02d}') + # create_dir(well_path) + + groupA = info.loc[(info["rack"] == r) & (info["well"] == w)] + rois = groupA["roi"].unique() + for roi in rois: - roi_no='roi_{n}'.format(n=f'{roi:02d}') - exp_level_no='exp_level_{n}'.format(n=f'{exp_level:02d}') - #roi_path=well_path / 'roi_{n}'.format(n=f'{roi:02d}') - #create_dir(roi_path) - #exposure_path=roi_path / 'exp_level_{n}'.format(n=f'{exp_level:02d}') - #create_dir(exposure_path) + roi_no = "roi_{n}".format(n=f"{roi:02d}") + exp_level_no = "exp_level_{n}".format(n=f"{exp_level:02d}") + # roi_path=well_path / 'roi_{n}'.format(n=f'{roi:02d}') + # create_dir(roi_path) + # exposure_path=roi_path / 'exp_level_{n}'.format(n=f'{exp_level:02d}') + # create_dir(exposure_path) output_levels.append(roi_no) output_levels.append(exp_level_no) - - counter=0 - groupB=groupA.loc[groupA['roi']==roi] - A=groupB.loc[(groupB['exposure_level']=='ref')] - stack_size_z=A.shape[0]*len(markers) - fov_id=groupB.loc[groupB['marker']==ref_marker,'fov'].unique() - fov_id=np.sort(fov_id) - - stack_name='cycle-{C}-{prefix}-exp-{E}-rack-{R}-well-{W}-roi-{ROI}-{T}-{M}.{img_format}'.format(C=f'{(offset+antigen_cycle_no):03d}', - prefix=cycle_prefix, - E=exp_level, - R=r, - W=w, - ROI=roi, - T=target_name,#markers or filters - M=target, - img_format='ome.tiff' - ) - - - X=[] - Y=[] - stack=np.zeros((stack_size_z,height,width),dtype=dtype_ref) - - exposure_per_marker=[] - exp=groupB.loc[(groupB['marker']==ref_marker) & (groupB['fov']==1) & (groupB['exposure_level']=='ref'),'exposure'].tolist()[0] + counter = 0 + groupB = groupA.loc[groupA["roi"] == roi] + A = groupB.loc[(groupB["exposure_level"] == "ref")] + stack_size_z = A.shape[0] * len(markers) + fov_id = groupB.loc[groupB["marker"] == ref_marker, "fov"].unique() + fov_id = np.sort(fov_id) + + stack_name = "cycle-{C}-{prefix}-exp-{E}-rack-{R}-well-{W}-roi-{ROI}-{T}-{M}.{img_format}".format( + C=f"{(offset+antigen_cycle_no):03d}", + prefix=cycle_prefix, + E=exp_level, + R=r, + W=w, + ROI=roi, + T=target_name, # markers or filters + M=target, + img_format="ome.tiff", + ) + + X = [] + Y = [] + stack = np.zeros((stack_size_z, height, width), dtype=dtype_ref) + + exposure_per_marker = [] + exp = groupB.loc[ + (groupB["marker"] == ref_marker) & (groupB["fov"] == 1) & (groupB["exposure_level"] == "ref"), + "exposure", + ].tolist()[0] exposure_per_marker.append(exp) for s in markers_subset: - exp=groupB.loc[(groupB['marker']==s) & (groupB['fov']==1) & (groupB['exposure_level']==exp_level),'exposure'].tolist()[0] + exp = groupB.loc[ + (groupB["marker"] == s) & (groupB["fov"] == 1) & (groupB["exposure_level"] == exp_level), + "exposure", + ].tolist()[0] exposure_per_marker.append(exp) - + for tile in fov_id: - - img_ref=groupB.loc[(groupB['marker']==ref_marker) & (groupB['fov']==tile) ,'img_full_path'].tolist() - - - if len(img_ref)>0: - #img=tifff.imread(img_ref[0]) - #img_PIL=PIL.Image.open(img_ref[0]) + + img_ref = groupB.loc[ + (groupB["marker"] == ref_marker) & (groupB["fov"] == tile), "img_full_path" + ].tolist() + + if len(img_ref) > 0: + # img=tifff.imread(img_ref[0]) + # img_PIL=PIL.Image.open(img_ref[0]) with tifff.TiffFile(img_ref[0]) as tif: - img=tif.pages[0].asarray() - metadata=tif.ome_metadata - #stack[counter,:,:]=tifff.imread(img_ref[0]) - stack[counter,:,:]=img - tile_data=tile_position(metadata) - X.append(tile_data['x']) - Y.append(tile_data['y']) - counter+=1 - + img = tif.pages[0].asarray() + metadata = tif.ome_metadata + # stack[counter,:,:]=tifff.imread(img_ref[0]) + stack[counter, :, :] = img + tile_data = tile_position(metadata) + X.append(tile_data["x"]) + Y.append(tile_data["y"]) + counter += 1 + for m in markers_subset: - img_marker=groupB.loc[(groupB['marker']==m) & (groupB['fov']==tile) & (groupB['exposure_level']==exp_level),'img_full_path'].tolist()[0] - img=tifff.imread(img_marker) - stack[counter,:,:]=img - counter+=1 - + img_marker = groupB.loc[ + (groupB["marker"] == m) + & (groupB["fov"] == tile) + & (groupB["exposure_level"] == exp_level), + "img_full_path", + ].tolist()[0] + img = tifff.imread(img_marker) + stack[counter, :, :] = img + counter += 1 + if args.output_folders_list: - output_folders_path=stack_path / '--'.join(output_levels) / 'raw' + output_folders_path = stack_path / "--".join(output_levels) / "raw" else: - output_folders_path=stack_path / Path('/'.join(output_levels)) / 'raw' - + output_folders_path = stack_path / Path("/".join(output_levels)) / "raw" if os.path.exists(output_folders_path): pass else: os.makedirs(output_folders_path) - final_stack_path=os.path.join(output_folders_path,stack_name) - tifff.imwrite(final_stack_path, stack, photometric='minisblack') - img_info={'name':stack_name, - 'device':device_name, - 'no_channels':len(markers), - 'markers':sorted_markers, - 'filters':sorted_filters, - 'exposure_times':exposure_per_marker, - 'xy_img_size_pix':(width,height), - 'pix_size':tile_data['pixel_size'], - 'pix_units':tile_data['pixel_units'], - 'bit_depth':tile_data['bit_depth'], - 'sig_bits':tile_data['sig_bits'] - } - create_ome(img_info,setup_coords(X,Y,img_info['pix_units']),img_path=final_stack_path) - - return img_info + final_stack_path = os.path.join(output_folders_path, stack_name) + tifff.imwrite(final_stack_path, stack, photometric="minisblack") + img_info = { + "name": stack_name, + "device": device_name, + "no_channels": len(markers), + "markers": sorted_markers, + "filters": sorted_filters, + "exposure_times": exposure_per_marker, + "xy_img_size_pix": (width, height), + "pix_size": tile_data["pixel_size"], + "pix_units": tile_data["pixel_units"], + "bit_depth": tile_data["bit_depth"], + "sig_bits": tile_data["sig_bits"], + } + create_ome(img_info, setup_coords(X, Y, img_info["pix_units"]), img_path=final_stack_path) + return img_info -def markers_cycle_table(info,antigen_cycle_no=antigen_cycle_number): - ref_marker= list(info.loc[ info['exposure_level']=='ref', 'marker'])[0] - markers= info['marker'].unique() - markers_subset=np.setdiff1d(markers,[ref_marker]) - antigens=[ref_marker] - cycles=[antigen_cycle_no] +def markers_cycle_table(info, antigen_cycle_no=antigen_cycle_number): + ref_marker = list(info.loc[info["exposure_level"] == "ref", "marker"])[0] + markers = info["marker"].unique() + markers_subset = np.setdiff1d(markers, [ref_marker]) + antigens = [ref_marker] + cycles = [antigen_cycle_no] for m in markers_subset: - cycles.append(antigen_cycle_no) - antigens.append(m) - return cycles,antigens - - -def main(): - out_ant={ - 'cycle_number':[], - 'marker_name':[], - 'Filter':[], - 'background':[], - 'exposure':[], - 'remove':[], - 'exposure_level':[] - } - - out_ble=copy.deepcopy(out_ant) - - offset_value=1+max(antigen_cycle_number) + cycles.append(antigen_cycle_no) + antigens.append(m) + return cycles, antigens + + +def main(): + out_ant = { + "cycle_number": [], + "marker_name": [], + "Filter": [], + "background": [], + "exposure": [], + "remove": [], + "exposure_level": [], + } + + out_ble = copy.deepcopy(out_ant) + + offset_value = 1 + max(antigen_cycle_number) for i in antigen_cycle_number: - antigen_info=antigen_cycle_info(antigen_cycle_no=i) - exp=antigen_info['exposure_level'].unique() - exp=exp[exp!='ref'] + antigen_info = antigen_cycle_info(antigen_cycle_no=i) + exp = antigen_info["exposure_level"].unique() + exp = exp[exp != "ref"] exp.sort() if args.hi_exposure_only: - exp=[max(exp)] + exp = [max(exp)] else: pass - print('extracting antigen cycle:',i) + print("extracting antigen cycle:", i) - extract_bleach=args.no_bleach_cycles + extract_bleach = args.no_bleach_cycles if extract_bleach: - bcycle=i-1 - bleach_info=bleach_cycle_info(antigen_cycle_no=bcycle) - + bcycle = i - 1 + bleach_info = bleach_cycle_info(antigen_cycle_no=bcycle) for e in exp: - antigen_stack_info=create_stack(antigen_info,antigen_cycle_no=i,exp_level=e) + antigen_stack_info = create_stack(antigen_info, antigen_cycle_no=i, exp_level=e) - out_ant['cycle_number'].extend(antigen_stack_info['no_channels']*[i]) - out_ant['marker_name'].extend(antigen_stack_info['markers']) - out_ant['Filter'].extend(antigen_stack_info['filters']) - out_ant['remove'].extend(antigen_stack_info['no_channels']*['']) - out_ant['exposure'].extend(antigen_stack_info['exposure_times']) - out_ant['exposure_level'].extend(antigen_stack_info['no_channels']*[e]) + out_ant["cycle_number"].extend(antigen_stack_info["no_channels"] * [i]) + out_ant["marker_name"].extend(antigen_stack_info["markers"]) + out_ant["Filter"].extend(antigen_stack_info["filters"]) + out_ant["remove"].extend(antigen_stack_info["no_channels"] * [""]) + out_ant["exposure"].extend(antigen_stack_info["exposure_times"]) + out_ant["exposure_level"].extend(antigen_stack_info["no_channels"] * [e]) if extract_bleach: - print('extracting bleaching cycle:',bcycle) - bleach_stack_info=create_stack(bleach_info,antigen_cycle_no=bcycle,isbleach=True,offset=offset_value,exp_level=e) - background_channels=['']#the blank string corresponds to the reference marker, it is always the first in the sorted_markers list - for m in antigen_stack_info['filters'][1::]: - ch_name=[x for x in bleach_stack_info['markers'] if m in x] + print("extracting bleaching cycle:", bcycle) + bleach_stack_info = create_stack( + bleach_info, antigen_cycle_no=bcycle, isbleach=True, offset=offset_value, exp_level=e + ) + background_channels = [ + "" + ] # the blank string corresponds to the reference marker, it is always the first in the sorted_markers list + for m in antigen_stack_info["filters"][1::]: + ch_name = [x for x in bleach_stack_info["markers"] if m in x] background_channels.extend(ch_name) - - out_ant['background'].extend(background_channels) - - out_ble['background'].extend(bleach_stack_info['no_channels']*['']) - out_ble['cycle_number'].extend(bleach_stack_info['no_channels']*[offset_value+bcycle]) - out_ble['marker_name'].extend(bleach_stack_info['markers']) - out_ble['Filter'].extend(bleach_stack_info['filters']) - out_ble['remove'].extend(bleach_stack_info['no_channels']*['TRUE']) - out_ble['exposure'].extend(bleach_stack_info['exposure_times']) - out_ble['exposure_level'].extend(bleach_stack_info['no_channels']*[e]) - - else: - out_ant['background'].extend(antigen_stack_info['no_channels']*['']) + out_ant["background"].extend(background_channels) + + out_ble["background"].extend(bleach_stack_info["no_channels"] * [""]) + out_ble["cycle_number"].extend(bleach_stack_info["no_channels"] * [offset_value + bcycle]) + out_ble["marker_name"].extend(bleach_stack_info["markers"]) + out_ble["Filter"].extend(bleach_stack_info["filters"]) + out_ble["remove"].extend(bleach_stack_info["no_channels"] * ["TRUE"]) + out_ble["exposure"].extend(bleach_stack_info["exposure_times"]) + out_ble["exposure_level"].extend(bleach_stack_info["no_channels"] * [e]) + + else: + out_ant["background"].extend(antigen_stack_info["no_channels"] * [""]) for e in exp: if extract_bleach: - df1=pd.DataFrame(out_ant).groupby('exposure_level').get_group(e) - df2=pd.DataFrame(out_ble).groupby('exposure_level').get_group(e) - df=pd.concat([df1,df2],ignore_index=True) + df1 = pd.DataFrame(out_ant).groupby("exposure_level").get_group(e) + df2 = pd.DataFrame(out_ble).groupby("exposure_level").get_group(e) + df = pd.concat([df1, df2], ignore_index=True) else: - df=pd.DataFrame(out_ant).groupby('exposure_level').get_group(e) - df.drop('exposure_level',axis=1,inplace=True) - df.insert(0,'channel_number',list(range(1,1+df.shape[0]))) - df.to_csv(stack_path / 'markers_exp_{level}.csv'.format(level=e),index=False) + df = pd.DataFrame(out_ant).groupby("exposure_level").get_group(e) + df.drop("exposure_level", axis=1, inplace=True) + df.insert(0, "channel_number", list(range(1, 1 + df.shape[0]))) + df.to_csv(stack_path / f"markers_exp_{e}.csv", index=False) + -if __name__=='__main__': +if __name__ == "__main__": main() - - - - \ No newline at end of file diff --git a/scripts/nfmacsima.py b/scripts/nfmacsima.py index 5d17d3f9..51a1c5d2 100644 --- a/scripts/nfmacsima.py +++ b/scripts/nfmacsima.py @@ -1,79 +1,72 @@ #!/usr/bin/python -import ome_types +import argparse +import copy +import os +import platform +from pathlib import Path +from uuid import uuid4 + import numpy as np +import ome_types +import ome_types.model import pandas as pd -from pathlib import Path -#from PIL import Image -#import PIL -#from PIL.TiffTags import TAGS + +# from PIL import Image +# import PIL +# from PIL.TiffTags import TAGS import tifffile as tifff from bs4 import BeautifulSoup -import os -from uuid import uuid4 -import copy -from ome_types import from_tiff,to_xml -from ome_types.model import OME,Image,Instrument,Pixels,TiffData,Channel,Plane +from ome_types import to_xml +from ome_types.model import OME, Channel, Image, Pixels, Plane, TiffData +from ome_types.model.simple_types import ChannelID, Color, ImageID, PixelsID from ome_types.model.tiff_data import UUID -from ome_types.model.simple_types import PixelsID,ImageID,Color,ChannelID -import ome_types.model -import platform -import argparse - - -#---CLI-BLOCK---# -parser=argparse.ArgumentParser() +# ---CLI-BLOCK---# +parser = argparse.ArgumentParser() -parser.add_argument('-i', - '--input', - nargs='*', - required=True, - help='Ordered pairs of paths to the Antigen and corresponding Bleaching cycle, e.g [002_AntigenCycle,001_BleachCycle].' - ) +parser.add_argument( + "-i", + "--input", + nargs="*", + required=True, + help="Ordered pairs of paths to the Antigen and corresponding Bleaching cycle, e.g [002_AntigenCycle,001_BleachCycle].", +) -parser.add_argument('-o', - '--output', - required=True, - help='Directory to save the stack.' - ) +parser.add_argument("-o", "--output", required=True, help="Directory to save the stack.") -parser.add_argument('-c', - '--cycle', - required=True, - type=int, - help='Number of the antigen cycle.' - ) +parser.add_argument("-c", "--cycle", required=True, type=int, help="Number of the antigen cycle.") -parser.add_argument('-rm', - '--ref_marker', - required=False, - type=str, - default='DAPI', - help='Name of the reference marker.Default value is DAPI.' - ) +parser.add_argument( + "-rm", + "--ref_marker", + required=False, + type=str, + default="DAPI", + help="Name of the reference marker.Default value is DAPI.", +) -parser.add_argument('-he', - '--hi_exposure_only', - action='store_true', - help='Activate this flag to extract only the set of images with the highest exposure time.' - ) +parser.add_argument( + "-he", + "--hi_exposure_only", + action="store_true", + help="Activate this flag to extract only the set of images with the highest exposure time.", +) -args=parser.parse_args() -#---END_CLI-BLOCK---# +args = parser.parse_args() +# ---END_CLI-BLOCK---# +# ------FORMAT INPUT BLOCK-------# +device_name = "MACSIMA" +antigen_dir = Path(args.input[0]) +bleach_dir = Path(args.input[1]) +stack_path = Path(args.output) +ref_marker = args.ref_marker +cycle_no = args.cycle -#------FORMAT INPUT BLOCK-------# -device_name='MACSIMA' -antigen_dir=Path(args.input[0]) -bleach_dir=Path(args.input[1]) -stack_path=Path(args.output) -ref_marker=args.ref_marker -cycle_no=args.cycle - -#------ ENDINPUT BLOCK----# +# ------ ENDINPUT BLOCK----# if os.path.exists(stack_path): pass @@ -81,458 +74,461 @@ os.mkdir(stack_path) -#---- HELPER FUNCTIONS ----# +# ---- HELPER FUNCTIONS ----# + -def antigen_cycle_info(antigen_folder=antigen_dir,ref_marker=ref_marker): +def antigen_cycle_info(antigen_folder=antigen_dir, ref_marker=ref_marker): - images=list(filter(lambda x: x.endswith('.tif'),os.listdir(antigen_folder))) + images = list(filter(lambda x: x.endswith(".tif"), os.listdir(antigen_folder))) - cycle_info={'img_full_path':[], - 'image':images, - 'marker':[], - 'filter':[], - 'rack':[], - 'well':[], - 'roi':[], - 'fov':[], - 'exposure':[] - } + cycle_info = { + "img_full_path": [], + "image": images, + "marker": [], + "filter": [], + "rack": [], + "well": [], + "roi": [], + "fov": [], + "exposure": [], + } for im in images: - - #Extraction of marker name info and acquisition info (rack,well,exposure,roi) - marker_info=im.split('AntigenCycle')[-1].split('__') - acq_info=im.split('sensor')[-1].split('_') - #add the information to the cycle_info dictionary - #img full path - cycle_info['img_full_path'].append(os.path.join(antigen_folder,im)) - #marker and fluorophore () - m=marker_info[0].strip('_') + + # Extraction of marker name info and acquisition info (rack,well,exposure,roi) + marker_info = im.split("AntigenCycle")[-1].split("__") + acq_info = im.split("sensor")[-1].split("_") + # add the information to the cycle_info dictionary + # img full path + cycle_info["img_full_path"].append(os.path.join(antigen_folder, im)) + # marker and fluorophore () + m = marker_info[0].strip("_") if ref_marker in m: - cycle_info['marker'].append(ref_marker) - cycle_info['filter'].append(ref_marker) + cycle_info["marker"].append(ref_marker) + cycle_info["filter"].append(ref_marker) else: - cycle_info['marker'].append(m) - cycle_info['filter'].append(marker_info[-1].split('bit')[0].split("_")[-2]) - - #rack - cycle_info['rack'].append(acq_info[2].split('-')[-1]) - #well - cycle_info['well'].append(acq_info[3].split('-')[-1]) - #roi - cycle_info['roi'].append(acq_info[4].split('-')[-1]) - #fov, i.e. tiles - cycle_info['fov'].append(acq_info[5].split('-')[-1]) - #exposure - exposure=cycle_info['exposure'].append(acq_info[6].split('-')[-1].strip('.tif')) - - info=pd.DataFrame(cycle_info) - - - markers= info['marker'].unique() - markers_subset=np.setdiff1d(markers,[ref_marker]) - #insert the exposure level colum at the end of the info dataframe - info.insert(len(cycle_info),'exposure_level',np.zeros(info.shape[0])) - #label the exposure level of the reference markers with "ref" - info.loc[ info['marker']==ref_marker, 'exposure_level']='ref' + cycle_info["marker"].append(m) + cycle_info["filter"].append(marker_info[-1].split("bit")[0].split("_")[-2]) + + # rack + cycle_info["rack"].append(acq_info[2].split("-")[-1]) + # well + cycle_info["well"].append(acq_info[3].split("-")[-1]) + # roi + cycle_info["roi"].append(acq_info[4].split("-")[-1]) + # fov, i.e. tiles + cycle_info["fov"].append(acq_info[5].split("-")[-1]) + # exposure + exposure = cycle_info["exposure"].append(acq_info[6].split("-")[-1].strip(".tif")) + + info = pd.DataFrame(cycle_info) + + markers = info["marker"].unique() + markers_subset = np.setdiff1d(markers, [ref_marker]) + # insert the exposure level colum at the end of the info dataframe + info.insert(len(cycle_info), "exposure_level", np.zeros(info.shape[0])) + # label the exposure level of the reference markers with "ref" + info.loc[info["marker"] == ref_marker, "exposure_level"] = "ref" for m in markers_subset: - exposure=info.loc[info['marker']==m]['exposure'].unique() - val=pd.to_numeric(exposure) - val=np.sort(val) - for level,value in enumerate(val): - info.loc[ (info['marker']==m) & (info['exposure']==str(value)), 'exposure_level']=level+1 - - - #change the data type to numeric - info['rack']=pd.to_numeric(info['rack'],downcast='unsigned') - info['well']=pd.to_numeric(info['well'],downcast='unsigned') - info['roi']=pd.to_numeric(info['roi'],downcast='unsigned') - info['fov']=pd.to_numeric(info['fov'],downcast='unsigned') - info['exposure']=pd.to_numeric(info['exposure'],downcast='unsigned') - + exposure = info.loc[info["marker"] == m]["exposure"].unique() + val = pd.to_numeric(exposure) + val = np.sort(val) + for level, value in enumerate(val): + info.loc[(info["marker"] == m) & (info["exposure"] == str(value)), "exposure_level"] = level + 1 + + # change the data type to numeric + info["rack"] = pd.to_numeric(info["rack"], downcast="unsigned") + info["well"] = pd.to_numeric(info["well"], downcast="unsigned") + info["roi"] = pd.to_numeric(info["roi"], downcast="unsigned") + info["fov"] = pd.to_numeric(info["fov"], downcast="unsigned") + info["exposure"] = pd.to_numeric(info["exposure"], downcast="unsigned") + return info -def bleach_cycle_info(bleach_folder=bleach_dir,ref_marker=ref_marker,antigen_cycle_no=cycle_no): +def bleach_cycle_info(bleach_folder=bleach_dir, ref_marker=ref_marker, antigen_cycle_no=cycle_no): - bleach_cycle=f'{antigen_cycle_no-1:03d}' + bleach_cycle = f"{antigen_cycle_no-1:03d}" - images=list(filter(lambda x: x.endswith('.tif'),os.listdir(bleach_folder))) + images = list(filter(lambda x: x.endswith(".tif"), os.listdir(bleach_folder))) - cycle_info={'img_full_path':[], - 'image':images, - 'marker':[], - 'filter':[], - 'rack':[], - 'well':[], - 'roi':[], - 'fov':[], - 'exposure':[] - } + cycle_info = { + "img_full_path": [], + "image": images, + "marker": [], + "filter": [], + "rack": [], + "well": [], + "roi": [], + "fov": [], + "exposure": [], + } for im in images: - - marker_info=im.split('BleachCycle')[-1].split('_') - acq_info=im.split('sensor')[-1].split('_') - #add the information to the cycle_info dictionary - #img full path - cycle_info['img_full_path'].append(os.path.join(bleach_folder,im)) - #marker and fluorophore - #marker_info=['', 'DAPI', 'V0', 'DAPI', '16bit', 'M-20x-S Fluor full sensor', 'B-1', 'R-1', 'W-2', 'G-1', 'F-10', 'E-16.0.tif'] - m=marker_info[1] - if m==ref_marker: - cycle_info['marker'].append(ref_marker) - else: - cycle_info['marker'].append('_'.join([bleach_cycle,'bleach',marker_info[3]])) - cycle_info['filter'].append(marker_info[3]) - #rack - cycle_info['rack'].append(acq_info[2].split('-')[-1]) - #well - cycle_info['well'].append(acq_info[3].split('-')[-1]) - #roi - cycle_info['roi'].append(acq_info[4].split('-')[-1]) - #fov, i.e. tiles - cycle_info['fov'].append(acq_info[5].split('-')[-1]) - #exposure - exposure=cycle_info['exposure'].append(acq_info[6].split('-')[-1].strip('.tif')) - - info=pd.DataFrame(cycle_info) - - - markers= info['filter'].unique() - markers_subset=np.setdiff1d(markers,[ref_marker]) - info.insert(len(cycle_info),'exposure_level',np.zeros(info.shape[0])) - info.loc[ info['marker']==ref_marker, 'exposure_level']='ref' - #info.to_csv(os.path.join(cycles_path,'test_bleach.csv')) + marker_info = im.split("BleachCycle")[-1].split("_") + acq_info = im.split("sensor")[-1].split("_") + # add the information to the cycle_info dictionary + # img full path + cycle_info["img_full_path"].append(os.path.join(bleach_folder, im)) + # marker and fluorophore + # marker_info=['', 'DAPI', 'V0', 'DAPI', '16bit', 'M-20x-S Fluor full sensor', 'B-1', 'R-1', 'W-2', 'G-1', 'F-10', 'E-16.0.tif'] + m = marker_info[1] + if m == ref_marker: + cycle_info["marker"].append(ref_marker) + else: + cycle_info["marker"].append("_".join([bleach_cycle, "bleach", marker_info[3]])) + cycle_info["filter"].append(marker_info[3]) + # rack + cycle_info["rack"].append(acq_info[2].split("-")[-1]) + # well + cycle_info["well"].append(acq_info[3].split("-")[-1]) + # roi + cycle_info["roi"].append(acq_info[4].split("-")[-1]) + # fov, i.e. tiles + cycle_info["fov"].append(acq_info[5].split("-")[-1]) + # exposure + exposure = cycle_info["exposure"].append(acq_info[6].split("-")[-1].strip(".tif")) + + info = pd.DataFrame(cycle_info) + + markers = info["filter"].unique() + markers_subset = np.setdiff1d(markers, [ref_marker]) + info.insert(len(cycle_info), "exposure_level", np.zeros(info.shape[0])) + info.loc[info["marker"] == ref_marker, "exposure_level"] = "ref" + # info.to_csv(os.path.join(cycles_path,'test_bleach.csv')) for m in markers_subset: - exposure=info.loc[info['filter']==m]['exposure'].unique() - val=pd.to_numeric(exposure) - val=np.sort(val) - for level,value in enumerate(val): - info.loc[ (info['filter']==m) & (info['exposure']==str(value)), 'exposure_level']=level+1 - - - - info['rack']=pd.to_numeric(info['rack'],downcast='unsigned') - info['well']=pd.to_numeric(info['well'],downcast='unsigned') - info['roi']=pd.to_numeric(info['roi'],downcast='unsigned') - info['fov']=pd.to_numeric(info['fov'],downcast='unsigned') - info['exposure']=pd.to_numeric(info['exposure'],downcast='unsigned') - + exposure = info.loc[info["filter"] == m]["exposure"].unique() + val = pd.to_numeric(exposure) + val = np.sort(val) + for level, value in enumerate(val): + info.loc[(info["filter"] == m) & (info["exposure"] == str(value)), "exposure_level"] = level + 1 + + info["rack"] = pd.to_numeric(info["rack"], downcast="unsigned") + info["well"] = pd.to_numeric(info["well"], downcast="unsigned") + info["roi"] = pd.to_numeric(info["roi"], downcast="unsigned") + info["fov"] = pd.to_numeric(info["fov"], downcast="unsigned") + info["exposure"] = pd.to_numeric(info["exposure"], downcast="unsigned") + return info + def create_dir(dir_path): if os.path.exists(dir_path): pass else: os.mkdir(dir_path) -def create_ome(img_info,xy_tile_positions_units,img_path): - img_name=img_info['name'] - device_name=img_info['device'] - no_of_channels=img_info['no_channels'] - img_size=img_info['xy_img_size_pix'] - markers=img_info['markers'] - exposures=img_info['exposure_times'] - bit_depth=img_info['bit_depth'] - pixel_size=img_info['pix_size'] - pixel_units=img_info['pix_units'] - sig_bits=img_info['sig_bits'] - if pixel_units=='mm': - pixel_size=1000*pixel_size - #pixel_units='um' - no_of_tiles=len(xy_tile_positions_units) - tifff.tiffcomment(img_path,'') - #--Generate tiff_data_blocks--# - tiff_block=[] - #UUID=ome_types.model.tiff_data.UUID(file_name=img_name,value=uuid4().urn) - unique_identifier=UUID(file_name=img_name,value=uuid4().urn) - for ch in range(0,no_of_channels): - tiff_block.append(TiffData(first_c=ch, - ifd=ch, - plane_count=1, - uuid=unique_identifier - ) - - ) - #--Generate planes block (contains the information of each tile)--# - plane_block=[] - #length_units=ome_types.model.simple_types.UnitsLength('µm') - for ch in range(0,no_of_channels): - plane_block.append(Plane(the_c=ch, - the_t=0, - the_z=0, - position_x=0,#x=0 is just a place holder - position_y=0,#y=0 is just a place holder - position_z=0, - exposure_time=0 - #position_x_unit=pixel_units, - #position_y_unit=pixel_units - ) - ) - #--Generate channels block--# - chann_block=[] - for ch in range(0,no_of_channels): - chann_block.append(Channel(id=ChannelID('Channel:{x}'.format(x=ch)), - color=Color((255,255,255)), - emission_wavelength=1,#place holder - excitation_wavelength=1,#place holder - - ) - ) - #--Generate pixels block--# - pix_block=[] - ifd_counter=0 - for t in range(0,no_of_tiles): - template_plane_block=copy.deepcopy(plane_block) - template_chann_block=copy.deepcopy(chann_block) - template_tiffdata_block=copy.deepcopy(tiff_block) - for ch,mark in enumerate(markers): - template_plane_block[ch].position_x=xy_tile_positions_units[t][0] - template_plane_block[ch].position_y=xy_tile_positions_units[t][1] - template_plane_block[ch].exposure_time=exposures[ch] - template_chann_block[ch].id='Channel:{y}:{x}:{marker_name}'.format(x=ch,y=100+t,marker_name=mark) - template_tiffdata_block[ch].ifd=ifd_counter - ifd_counter+=1 - pix_block.append(Pixels(id=PixelsID('Pixels:{x}'.format(x=t)), - dimension_order=ome_types.model.pixels.DimensionOrder('XYCZT'), - size_c=no_of_channels, - size_t=1, - size_x=img_size[0], - size_y=img_size[1], - size_z=1, - type=bit_depth, - big_endian=False, - channels=template_chann_block, - interleaved=False, - physical_size_x=pixel_size, - #physical_size_x_unit=pixel_units, - physical_size_y=pixel_size, - #physical_size_y_unit=pixel_units, - physical_size_z=1.0, - planes=template_plane_block, - significant_bits=sig_bits, - tiff_data_blocks=template_tiffdata_block - ) - ) - #--Generate image block--# - img_block=[] - for t in range(0,no_of_tiles): - img_block.append(Image(id=ImageID('Image:{x}'.format(x=t)), - pixels=pix_block[t] - ) - ) - #--Create the OME object with all prebiously defined blocks--# - ome_custom=OME() - ome_custom.creator=" ".join([ome_types.__name__, - ome_types.__version__, - '/ python version-', - platform.python_version() - ] - ) - ome_custom.images=img_block - ome_custom.uuid=uuid4().urn - ome_xml=to_xml(ome_custom) - tifff.tiffcomment(img_path,ome_xml) - -def setup_coords(x,y,pix_units): - if pix_units=='mm': - x_norm=1000*(np.array(x)-np.min(x))#/pixel_size - y_norm=1000*(np.array(y)-np.min(y))#/pixel_size - x_norm=np.rint(x_norm).astype('int') - y_norm=np.rint(y_norm).astype('int') - #invert y - y_norm=np.max(y_norm)-y_norm - xy_tile_positions=[(i, j) for i,j in zip(x_norm,y_norm)] + +def create_ome(img_info, xy_tile_positions_units, img_path): + img_name = img_info["name"] + img_info["device"] + no_of_channels = img_info["no_channels"] + img_size = img_info["xy_img_size_pix"] + markers = img_info["markers"] + exposures = img_info["exposure_times"] + bit_depth = img_info["bit_depth"] + pixel_size = img_info["pix_size"] + pixel_units = img_info["pix_units"] + sig_bits = img_info["sig_bits"] + if pixel_units == "mm": + pixel_size = 1000 * pixel_size + # pixel_units='um' + no_of_tiles = len(xy_tile_positions_units) + tifff.tiffcomment(img_path, "") + # --Generate tiff_data_blocks--# + tiff_block = [] + # UUID=ome_types.model.tiff_data.UUID(file_name=img_name,value=uuid4().urn) + unique_identifier = UUID(file_name=img_name, value=uuid4().urn) + for ch in range(0, no_of_channels): + tiff_block.append(TiffData(first_c=ch, ifd=ch, plane_count=1, uuid=unique_identifier)) + # --Generate planes block (contains the information of each tile)--# + plane_block = [] + # length_units=ome_types.model.simple_types.UnitsLength('µm') + for ch in range(0, no_of_channels): + plane_block.append( + Plane( + the_c=ch, + the_t=0, + the_z=0, + position_x=0, # x=0 is just a place holder + position_y=0, # y=0 is just a place holder + position_z=0, + exposure_time=0, + # position_x_unit=pixel_units, + # position_y_unit=pixel_units + ) + ) + # --Generate channels block--# + chann_block = [] + for ch in range(0, no_of_channels): + chann_block.append( + Channel( + id=ChannelID(f"Channel:{ch}"), + color=Color((255, 255, 255)), + emission_wavelength=1, # place holder + excitation_wavelength=1, # place holder + ) + ) + # --Generate pixels block--# + pix_block = [] + ifd_counter = 0 + for t in range(0, no_of_tiles): + template_plane_block = copy.deepcopy(plane_block) + template_chann_block = copy.deepcopy(chann_block) + template_tiffdata_block = copy.deepcopy(tiff_block) + for ch, mark in enumerate(markers): + template_plane_block[ch].position_x = xy_tile_positions_units[t][0] + template_plane_block[ch].position_y = xy_tile_positions_units[t][1] + template_plane_block[ch].exposure_time = exposures[ch] + template_chann_block[ch].id = f"Channel:{100 + t}:{ch}:{mark}" + template_tiffdata_block[ch].ifd = ifd_counter + ifd_counter += 1 + pix_block.append( + Pixels( + id=PixelsID(f"Pixels:{t}"), + dimension_order=ome_types.model.pixels.DimensionOrder("XYCZT"), + size_c=no_of_channels, + size_t=1, + size_x=img_size[0], + size_y=img_size[1], + size_z=1, + type=bit_depth, + big_endian=False, + channels=template_chann_block, + interleaved=False, + physical_size_x=pixel_size, + # physical_size_x_unit=pixel_units, + physical_size_y=pixel_size, + # physical_size_y_unit=pixel_units, + physical_size_z=1.0, + planes=template_plane_block, + significant_bits=sig_bits, + tiff_data_blocks=template_tiffdata_block, + ) + ) + # --Generate image block--# + img_block = [] + for t in range(0, no_of_tiles): + img_block.append(Image(id=ImageID(f"Image:{t}"), pixels=pix_block[t])) + # --Create the OME object with all prebiously defined blocks--# + ome_custom = OME() + ome_custom.creator = " ".join( + [ome_types.__name__, ome_types.__version__, "/ python version-", platform.python_version()] + ) + ome_custom.images = img_block + ome_custom.uuid = uuid4().urn + ome_xml = to_xml(ome_custom) + tifff.tiffcomment(img_path, ome_xml) + + +def setup_coords(x, y, pix_units): + if pix_units == "mm": + x_norm = 1000 * (np.array(x) - np.min(x)) # /pixel_size + y_norm = 1000 * (np.array(y) - np.min(y)) # /pixel_size + x_norm = np.rint(x_norm).astype("int") + y_norm = np.rint(y_norm).astype("int") + # invert y + y_norm = np.max(y_norm) - y_norm + xy_tile_positions = [(i, j) for i, j in zip(x_norm, y_norm)] return xy_tile_positions + def tile_position(metadata_string): - #meta_dict = {TAGS[key] : img.tag[key] for key in img.tag_v2} - #ome=BeautifulSoup(meta_dict['ImageDescription'][0],'xml') - ome=BeautifulSoup(metadata_string,'xml') - x=float(ome.StageLabel["X"]) - y=float(ome.StageLabel["Y"]) - stage_units=ome.StageLabel["XUnit"] - pixel_size=float(ome.Pixels['PhysicalSizeX']) - pixel_units=ome.Pixels['PhysicalSizeXUnit'] - bit_depth=ome.Pixels['Type'] - significantBits=int(ome.Pixels['SignificantBits']) - tile_info={'x':x, - 'y':y, - 'stage_units':stage_units, - 'pixel_size':pixel_size, - 'pixel_units':pixel_units, - 'bit_depth':bit_depth, - 'sig_bits':significantBits - } + # meta_dict = {TAGS[key] : img.tag[key] for key in img.tag_v2} + # ome=BeautifulSoup(meta_dict['ImageDescription'][0],'xml') + ome = BeautifulSoup(metadata_string, "xml") + x = float(ome.StageLabel["X"]) + y = float(ome.StageLabel["Y"]) + stage_units = ome.StageLabel["XUnit"] + pixel_size = float(ome.Pixels["PhysicalSizeX"]) + pixel_units = ome.Pixels["PhysicalSizeXUnit"] + bit_depth = ome.Pixels["Type"] + significantBits = int(ome.Pixels["SignificantBits"]) + tile_info = { + "x": x, + "y": y, + "stage_units": stage_units, + "pixel_size": pixel_size, + "pixel_units": pixel_units, + "bit_depth": bit_depth, + "sig_bits": significantBits, + } return tile_info -def create_stack(info,exp_level=1, - antigen_cycle_no=cycle_no, - cycle_folder='', - isbleach=False, - offset=0, - device=device_name, - ref_marker=ref_marker, - results_path=stack_path): - - racks=info['rack'].unique() - wells=info['well'].unique() - antigen_cycle=f'{antigen_cycle_no:03d}' + +def create_stack( + info, + exp_level=1, + antigen_cycle_no=cycle_no, + cycle_folder="", + isbleach=False, + offset=0, + device=device_name, + ref_marker=ref_marker, + results_path=stack_path, +): + + racks = info["rack"].unique() + wells = info["well"].unique() + antigen_cycle = f"{antigen_cycle_no:03d}" if isbleach: - cycle_prefix='Bleach' + cycle_prefix = "Bleach" else: - cycle_prefix='Antigen' - - workdir=Path(cycle_folder) - - with tifff.TiffFile(info['img_full_path'][0]) as tif: - img_ref=tif.pages[0].asarray() - width=img_ref.shape[1] - height=img_ref.shape[0] - dtype_ref=img_ref.dtype - ref_marker= list(info.loc[ info['exposure_level']=='ref', 'marker'])[0] - markers= info['marker'].unique() - markers_subset=np.setdiff1d(markers,[ref_marker]) - sorted_markers=np.insert(markers_subset,0,ref_marker) - sorted_filters=[] + cycle_prefix = "Antigen" + + Path(cycle_folder) + + with tifff.TiffFile(info["img_full_path"][0]) as tif: + img_ref = tif.pages[0].asarray() + width = img_ref.shape[1] + height = img_ref.shape[0] + dtype_ref = img_ref.dtype + ref_marker = list(info.loc[info["exposure_level"] == "ref", "marker"])[0] + markers = info["marker"].unique() + markers_subset = np.setdiff1d(markers, [ref_marker]) + sorted_markers = np.insert(markers_subset, 0, ref_marker) + sorted_filters = [] for m in sorted_markers: - sorted_filters.append(info.loc[info['marker']==m,'filter'].tolist()[0]) + sorted_filters.append(info.loc[info["marker"] == m, "filter"].tolist()[0]) if isbleach: - target_name='filters' - target='_'.join(sorted_filters) + target_name = "filters" + target = "_".join(sorted_filters) else: - target_name='markers' - target='_'.join(sorted_markers) + target_name = "markers" + target = "_".join(sorted_markers) - for r in racks: - output_levels=[] - rack_no='rack_{n}'.format(n=f'{r:02d}') + output_levels = [] + rack_no = "rack_{n}".format(n=f"{r:02d}") output_levels.append(rack_no) for w in wells: - well_no='well_{n}'.format(n=f'{w:02d}') + well_no = "well_{n}".format(n=f"{w:02d}") output_levels.append(well_no) - groupA=info.loc[(info['rack']==r) & (info['well']==w)] - rois=groupA['roi'].unique() - + groupA = info.loc[(info["rack"] == r) & (info["well"] == w)] + rois = groupA["roi"].unique() + for roi in rois: - roi_no='roi_{n}'.format(n=f'{roi:02d}') - exp_level_no='exp_level_{n}'.format(n=f'{exp_level:02d}') + roi_no = "roi_{n}".format(n=f"{roi:02d}") + exp_level_no = "exp_level_{n}".format(n=f"{exp_level:02d}") output_levels.append(roi_no) output_levels.append(exp_level_no) - - counter=0 - groupB=groupA.loc[groupA['roi']==roi] - A=groupB.loc[(groupB['exposure_level']=='ref')] - stack_size_z=A.shape[0]*len(markers) - fov_id=groupB.loc[groupB['marker']==ref_marker,'fov'].unique() - fov_id=np.sort(fov_id) - - stack_name='cycle-{C}-{prefix}-exp-{E}-rack-{R}-well-{W}-roi-{ROI}-{T}-{M}.{img_format}'.format(C=f'{(offset+antigen_cycle_no):03d}', - prefix=cycle_prefix, - E=exp_level, - R=r, - W=w, - ROI=roi, - T=target_name,#markers or filters - M=target, - img_format='ome.tiff' - ) - - - X=[] - Y=[] - stack=np.zeros((stack_size_z,height,width),dtype=dtype_ref) - - exposure_per_marker=[] - exp=groupB.loc[(groupB['marker']==ref_marker) & (groupB['fov']==1) & (groupB['exposure_level']=='ref'),'exposure'].tolist()[0] + counter = 0 + groupB = groupA.loc[groupA["roi"] == roi] + A = groupB.loc[(groupB["exposure_level"] == "ref")] + stack_size_z = A.shape[0] * len(markers) + fov_id = groupB.loc[groupB["marker"] == ref_marker, "fov"].unique() + fov_id = np.sort(fov_id) + + stack_name = "cycle-{C}-{prefix}-exp-{E}-rack-{R}-well-{W}-roi-{ROI}-{T}-{M}.{img_format}".format( + C=f"{(offset+antigen_cycle_no):03d}", + prefix=cycle_prefix, + E=exp_level, + R=r, + W=w, + ROI=roi, + T=target_name, # markers or filters + M=target, + img_format="ome.tiff", + ) + + X = [] + Y = [] + stack = np.zeros((stack_size_z, height, width), dtype=dtype_ref) + + exposure_per_marker = [] + exp = groupB.loc[ + (groupB["marker"] == ref_marker) & (groupB["fov"] == 1) & (groupB["exposure_level"] == "ref"), + "exposure", + ].tolist()[0] exposure_per_marker.append(exp) for s in markers_subset: - exp=groupB.loc[(groupB['marker']==s) & (groupB['fov']==1) & (groupB['exposure_level']==exp_level),'exposure'].tolist()[0] + exp = groupB.loc[ + (groupB["marker"] == s) & (groupB["fov"] == 1) & (groupB["exposure_level"] == exp_level), + "exposure", + ].tolist()[0] exposure_per_marker.append(exp) - + for tile in fov_id: - - img_ref=groupB.loc[(groupB['marker']==ref_marker) & (groupB['fov']==tile) ,'img_full_path'].tolist() - - - if len(img_ref)>0: + + img_ref = groupB.loc[ + (groupB["marker"] == ref_marker) & (groupB["fov"] == tile), "img_full_path" + ].tolist() + + if len(img_ref) > 0: with tifff.TiffFile(img_ref[0]) as tif: - img=tif.pages[0].asarray() - metadata=tif.ome_metadata - - stack[counter,:,:]=img - tile_data=tile_position(metadata) - X.append(tile_data['x']) - Y.append(tile_data['y']) - counter+=1 - - for m in markers_subset: - img_marker=groupB.loc[(groupB['marker']==m) & (groupB['fov']==tile) & (groupB['exposure_level']==exp_level),'img_full_path'].tolist()[0] - img=tifff.imread(img_marker) - stack[counter,:,:]=img - counter+=1 - + img = tif.pages[0].asarray() + metadata = tif.ome_metadata + + stack[counter, :, :] = img + tile_data = tile_position(metadata) + X.append(tile_data["x"]) + Y.append(tile_data["y"]) + counter += 1 - output_folders_path=stack_path / '--'.join(output_levels) / 'raw' + for m in markers_subset: + img_marker = groupB.loc[ + (groupB["marker"] == m) + & (groupB["fov"] == tile) + & (groupB["exposure_level"] == exp_level), + "img_full_path", + ].tolist()[0] + img = tifff.imread(img_marker) + stack[counter, :, :] = img + counter += 1 + + output_folders_path = stack_path / "--".join(output_levels) / "raw" if os.path.exists(output_folders_path): pass else: os.makedirs(output_folders_path) - final_stack_path=os.path.join(output_folders_path,stack_name) - tifff.imwrite(final_stack_path, stack, photometric='minisblack') - img_info={'name':stack_name, - 'device':device_name, - 'no_channels':len(markers), - 'markers':sorted_markers, - 'filters':sorted_filters, - 'exposure_times':exposure_per_marker, - 'xy_img_size_pix':(width,height), - 'pix_size':tile_data['pixel_size'], - 'pix_units':tile_data['pixel_units'], - 'bit_depth':tile_data['bit_depth'], - 'sig_bits':tile_data['sig_bits'] - } - create_ome(img_info,setup_coords(X,Y,img_info['pix_units']),img_path=final_stack_path) - + final_stack_path = os.path.join(output_folders_path, stack_name) + tifff.imwrite(final_stack_path, stack, photometric="minisblack") + img_info = { + "name": stack_name, + "device": device_name, + "no_channels": len(markers), + "markers": sorted_markers, + "filters": sorted_filters, + "exposure_times": exposure_per_marker, + "xy_img_size_pix": (width, height), + "pix_size": tile_data["pixel_size"], + "pix_units": tile_data["pixel_units"], + "bit_depth": tile_data["bit_depth"], + "sig_bits": tile_data["sig_bits"], + } + create_ome(img_info, setup_coords(X, Y, img_info["pix_units"]), img_path=final_stack_path) + return img_info -def main(): - - antigen_info=antigen_cycle_info(antigen_folder=antigen_dir) - bleach_info=bleach_cycle_info(bleach_folder=bleach_dir) - exp=antigen_info['exposure_level'].unique() - exp=exp[exp!='ref'] +def main(): + + antigen_info = antigen_cycle_info(antigen_folder=antigen_dir) + bleach_info = bleach_cycle_info(bleach_folder=bleach_dir) + exp = antigen_info["exposure_level"].unique() + exp = exp[exp != "ref"] exp.sort() if args.hi_exposure_only: - exp=[max(exp)] + exp = [max(exp)] else: - pass - - + pass + for e in exp: - antigen_stack_info=create_stack(antigen_info,antigen_cycle_no=cycle_no,exp_level=e) - bleach_stack_info=create_stack(bleach_info,antigen_cycle_no=cycle_no,isbleach=True,exp_level=e) + antigen_stack_info = create_stack(antigen_info, antigen_cycle_no=cycle_no, exp_level=e) + bleach_stack_info = create_stack(bleach_info, antigen_cycle_no=cycle_no, isbleach=True, exp_level=e) - -if __name__=='__main__': +if __name__ == "__main__": main() - - - - \ No newline at end of file diff --git a/src/spatialdata_io/readers/_utils/_utils.py b/src/spatialdata_io/readers/_utils/_utils.py index 46a0b175..d2b51330 100644 --- a/src/spatialdata_io/readers/_utils/_utils.py +++ b/src/spatialdata_io/readers/_utils/_utils.py @@ -73,6 +73,7 @@ def _read_counts( adata.uns["spatial"] = {library_id: {"metadata": {}}} # can overwrite return adata, library_id + def _initialize_raster_models_kwargs( image_models_kwargs: Mapping[str, Any], labels_models_kwargs: Mapping[str, Any] ) -> tuple[dict[str, Any], dict[str, Any]]: @@ -89,6 +90,7 @@ def _initialize_raster_models_kwargs( labels_models_kwargs["scale_factors"] = [2, 2, 2, 2] return image_models_kwargs, labels_models_kwargs + def add_ome_attr(path: Path) -> None: """Add OME metadata to Zarr store of SpatialData object.""" path = Path(path) diff --git a/src/spatialdata_io/readers/macsima.py b/src/spatialdata_io/readers/macsima.py index 03a4b92b..ca51b3f5 100644 --- a/src/spatialdata_io/readers/macsima.py +++ b/src/spatialdata_io/readers/macsima.py @@ -1,25 +1,22 @@ from __future__ import annotations from collections.abc import Mapping -from math import e from pathlib import Path from types import MappingProxyType from typing import Any +import dask.array as da import pandas as pd +import spatialdata as sd +from aicsimageio import AICSImage from dask_image.imread import imread +from ome_types import from_tiff from spatialdata import SpatialData from spatialdata._logging import logger from spatialdata_io._constants._constants import MacsimaKeys from spatialdata_io._docs import inject_docs -from spatialdata_io.readers._utils._utils import parse_physical_size, calc_scale_factors - -from spatialdata._logging import logger -import spatialdata as sd -from ome_types import from_tiff -import dask.array as da -from aicsimageio import AICSImage +from spatialdata_io.readers._utils._utils import calc_scale_factors, parse_physical_size __all__ = ["macsima"] @@ -82,17 +79,21 @@ def macsima( pixels_to_microns = None if metadata: # read metadata to get list of images and channel names - path_files = list(path.glob(f'*{MacsimaKeys.METADATA_SUFFIX}')) + path_files = list(path.glob(f"*{MacsimaKeys.METADATA_SUFFIX}")) if len(path_files) > 0: - if len(path_files) > 1: - logger.warning(f"Cannot determine metadata file. Expecting a single file with format .txt. Got multiple files: {path_files}") - path_metadata = list(path.glob(f'*{MacsimaKeys.METADATA_SUFFIX}'))[0] + if len(path_files) > 1: + logger.warning( + f"Cannot determine metadata file. Expecting a single file with format .txt. Got multiple files: {path_files}" + ) + path_metadata = list(path.glob(f"*{MacsimaKeys.METADATA_SUFFIX}"))[0] df = pd.read_csv(path_metadata, sep="\t", header=0, index_col=None) logger.debug(df) - df['channel'] = df['ch1'].str.split(' ').str[0] - df['round_channel'] = df['Round'] + ' ' + df['channel'] + df["channel"] = df["ch1"].str.split(" ").str[0] + df["round_channel"] = df["Round"] + " " + df["channel"] path_files = [path / p for p in df.filename.values] - assert all([p.exists() for p in path_files]), f"Cannot find all images in metadata file. Missing: {[p for p in path_files if not p.exists()]}" + assert all( + [p.exists() for p in path_files] + ), f"Cannot find all images in metadata file. Missing: {[p for p in path_files if not p.exists()]}" round_channels = df.round_channel.values stack, sorted_channels = get_stack(path_files, round_channels, imread_kwargs) else: @@ -100,11 +101,11 @@ def macsima( if not metadata or len(path_files) == 0: # get list of image paths, get channel name from OME data and cycle number from filename # look for OME-TIFF files - ome_patt = f'*{MacsimaKeys.IMAGE_OMETIF}*' + ome_patt = f"*{MacsimaKeys.IMAGE_OMETIF}*" path_files = list(path.glob(ome_patt)) if not path_files: # look for .qptiff files - qptif_patt = f'*{MacsimaKeys.IMAGE_QPTIF}*' + qptif_patt = f"*{MacsimaKeys.IMAGE_QPTIF}*" path_files = list(path.glob(qptif_patt)) logger.debug(path_files) if not path_files: @@ -140,7 +141,7 @@ def macsima( channels = [from_tiff(p).images[0].pixels.channels[0].name for p in path_files] round_channels = [f"{r} {c}" for r, c in zip(rounds, channels)] stack, sorted_channels = get_stack(path_files, round_channels, imread_kwargs) - + # do subsetting if needed if subset: stack = stack[:, :subset, :subset] @@ -176,16 +177,16 @@ def macsima( transformations=t_dict, ) sdata = sd.SpatialData(images={path.stem: stack}, table=None) - + return sdata -def get_stack(path_files: list[Path], round_channels: list[str], imread_kwargs: Mapping[str, Any])-> Any: +def get_stack(path_files: list[Path], round_channels: list[str], imread_kwargs: Mapping[str, Any]) -> Any: imgs_channels = list(zip(path_files, round_channels)) logger.debug(imgs_channels) # sort based on round number - imgs_channels = sorted(imgs_channels, key=lambda x: int(x[1].split(' ')[0][1:])) - logger.debug(f'Len imgs_channels: {len(imgs_channels)}') + imgs_channels = sorted(imgs_channels, key=lambda x: int(x[1].split(" ")[0][1:])) + logger.debug(f"Len imgs_channels: {len(imgs_channels)}") # read in images and merge channels sorted_paths, sorted_channels = list(zip(*imgs_channels)) imgs = [imread(img, **imread_kwargs) for img in sorted_paths] From cf06fa0e6f6823637ae2453626d133fe2305a846 Mon Sep 17 00:00:00 2001 From: Benjamin Rombaut Date: Wed, 12 Jun 2024 10:42:01 +0200 Subject: [PATCH 4/7] rename microns cs as global is the default --- src/spatialdata_io/readers/macsima.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/spatialdata_io/readers/macsima.py b/src/spatialdata_io/readers/macsima.py index ca51b3f5..7cbd7daf 100644 --- a/src/spatialdata_io/readers/macsima.py +++ b/src/spatialdata_io/readers/macsima.py @@ -97,7 +97,7 @@ def macsima( round_channels = df.round_channel.values stack, sorted_channels = get_stack(path_files, round_channels, imread_kwargs) else: - logger.warning(f"Cannot find metadata file. Will try to parse from image names.") + logger.warning("Cannot find metadata file. Will try to parse from image names.") if not metadata or len(path_files) == 0: # get list of image paths, get channel name from OME data and cycle number from filename # look for OME-TIFF files @@ -160,7 +160,7 @@ def macsima( t_pixels_to_microns = sd.transformations.Scale([pixels_to_microns, pixels_to_microns], axes=("x", "y")) # 'microns' is also used in merscope example # no inverse needed as the transformation is already from pixels to microns - t_dict = {"microns": t_pixels_to_microns} + t_dict = {"global": t_pixels_to_microns} # # chunk_size can be 1 for channels chunks = { "x": max_chunk_size, From 9e6e9e5eb9139e2e7953138f0982e0888be34a10 Mon Sep 17 00:00:00 2001 From: Benjamin Rombaut Date: Wed, 12 Jun 2024 10:42:16 +0200 Subject: [PATCH 5/7] add tests for lung macsima dataset --- tests/test_macsima.py | 60 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 tests/test_macsima.py diff --git a/tests/test_macsima.py b/tests/test_macsima.py new file mode 100644 index 00000000..4d1cf408 --- /dev/null +++ b/tests/test_macsima.py @@ -0,0 +1,60 @@ +import math +import sys +from pathlib import Path + +import pytest + +from spatialdata_io.readers.macsima import ( + macsima, +) + + +# The datasets should be downloaded and placed in the "data" directory; +@pytest.mark.skipif(sys.version_info < (3, 10), reason="Test requires Python 3.10 or higher") +@pytest.mark.parametrize( + "dataset,expected", + [ + ("Lung_adc_demo", "{'y': (0, 15460), 'x': (0, 13864)}"), + ], +) +def test_image_size(dataset: str, expected: str) -> None: + f = Path("./data") / dataset + assert f.is_dir() + sdata = macsima(f) + from spatialdata import get_extent + + extent: dict[str, tuple[float, float]] = get_extent(sdata, exact=False) + extent = {ax: (math.floor(extent[ax][0]), math.ceil(extent[ax][1])) for ax in extent} + assert str(extent) == expected + +@pytest.mark.skipif(sys.version_info < (3, 10), reason="Test requires Python 3.10 or higher") +@pytest.mark.parametrize( + "dataset,expected", + [ + ("Lung_adc_demo", 116), + ], +) +def test_total_channels(dataset: str, expected: int) -> None: + f = Path("./data") / dataset + assert f.is_dir() + sdata = macsima(f) + + # get the number of channels + channels: int = len(sdata.images[dataset]["scale0"]["c"]) + assert channels == expected + +@pytest.mark.skipif(sys.version_info < (3, 10), reason="Test requires Python 3.10 or higher") +@pytest.mark.parametrize( + "dataset,expected", + [ + ("Lung_adc_demo", ['R0 DAPI', 'R1 CD68', 'R1 CD163']), + ], +) +def test_channel_names(dataset: str, expected: list[str]) -> None: + f = Path("./data") / dataset + assert f.is_dir() + sdata = macsima(f, c_subset=3) + + # get the channel names + channels: int = sdata.images[dataset]["scale0"]["c"].values + assert list(channels) == expected \ No newline at end of file From a57d425be4269473984f86002010590817bb482e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Jun 2024 08:43:24 +0000 Subject: [PATCH 6/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_macsima.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_macsima.py b/tests/test_macsima.py index 4d1cf408..1201ac6b 100644 --- a/tests/test_macsima.py +++ b/tests/test_macsima.py @@ -4,9 +4,7 @@ import pytest -from spatialdata_io.readers.macsima import ( - macsima, -) +from spatialdata_io.readers.macsima import macsima # The datasets should be downloaded and placed in the "data" directory; @@ -27,6 +25,7 @@ def test_image_size(dataset: str, expected: str) -> None: extent = {ax: (math.floor(extent[ax][0]), math.ceil(extent[ax][1])) for ax in extent} assert str(extent) == expected + @pytest.mark.skipif(sys.version_info < (3, 10), reason="Test requires Python 3.10 or higher") @pytest.mark.parametrize( "dataset,expected", @@ -43,11 +42,12 @@ def test_total_channels(dataset: str, expected: int) -> None: channels: int = len(sdata.images[dataset]["scale0"]["c"]) assert channels == expected + @pytest.mark.skipif(sys.version_info < (3, 10), reason="Test requires Python 3.10 or higher") @pytest.mark.parametrize( "dataset,expected", [ - ("Lung_adc_demo", ['R0 DAPI', 'R1 CD68', 'R1 CD163']), + ("Lung_adc_demo", ["R0 DAPI", "R1 CD68", "R1 CD163"]), ], ) def test_channel_names(dataset: str, expected: list[str]) -> None: @@ -57,4 +57,4 @@ def test_channel_names(dataset: str, expected: list[str]) -> None: # get the channel names channels: int = sdata.images[dataset]["scale0"]["c"].values - assert list(channels) == expected \ No newline at end of file + assert list(channels) == expected From 1e99c79559ed05239d97c59527d6d5679562a083 Mon Sep 17 00:00:00 2001 From: Benjamin Rombaut Date: Wed, 12 Jun 2024 12:49:11 +0200 Subject: [PATCH 7/7] update for liver cell dataset --- tests/test_macsima.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/tests/test_macsima.py b/tests/test_macsima.py index 4d1cf408..523dce65 100644 --- a/tests/test_macsima.py +++ b/tests/test_macsima.py @@ -4,18 +4,14 @@ import pytest -from spatialdata_io.readers.macsima import ( - macsima, -) +from spatialdata_io.readers.macsima import macsima # The datasets should be downloaded and placed in the "data" directory; @pytest.mark.skipif(sys.version_info < (3, 10), reason="Test requires Python 3.10 or higher") @pytest.mark.parametrize( "dataset,expected", - [ - ("Lung_adc_demo", "{'y': (0, 15460), 'x': (0, 13864)}"), - ], + [("Lung_adc_demo", "{'y': (0, 15460), 'x': (0, 13864)}"), ("HumanLiverH35", "{'y': (0, 1154), 'x': (0, 1396)}")], ) def test_image_size(dataset: str, expected: str) -> None: f = Path("./data") / dataset @@ -27,12 +23,11 @@ def test_image_size(dataset: str, expected: str) -> None: extent = {ax: (math.floor(extent[ax][0]), math.ceil(extent[ax][1])) for ax in extent} assert str(extent) == expected + @pytest.mark.skipif(sys.version_info < (3, 10), reason="Test requires Python 3.10 or higher") @pytest.mark.parametrize( "dataset,expected", - [ - ("Lung_adc_demo", 116), - ], + [("Lung_adc_demo", 116), ("HumanLiverH35", 151)], ) def test_total_channels(dataset: str, expected: int) -> None: f = Path("./data") / dataset @@ -43,12 +38,11 @@ def test_total_channels(dataset: str, expected: int) -> None: channels: int = len(sdata.images[dataset]["scale0"]["c"]) assert channels == expected + @pytest.mark.skipif(sys.version_info < (3, 10), reason="Test requires Python 3.10 or higher") @pytest.mark.parametrize( "dataset,expected", - [ - ("Lung_adc_demo", ['R0 DAPI', 'R1 CD68', 'R1 CD163']), - ], + [("Lung_adc_demo", ["R0 DAPI", "R1 CD68", "R1 CD163"]), ("HumanLiverH35", ["R0 DAPI", "R1 CD68", "R1 CD163"])], ) def test_channel_names(dataset: str, expected: list[str]) -> None: f = Path("./data") / dataset @@ -57,4 +51,4 @@ def test_channel_names(dataset: str, expected: list[str]) -> None: # get the channel names channels: int = sdata.images[dataset]["scale0"]["c"].values - assert list(channels) == expected \ No newline at end of file + assert list(channels) == expected