From 6ac65455222137d28d53c699e241227d8423b5bc Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 20 Apr 2022 17:41:08 +0000 Subject: [PATCH 01/16] Implement PatchWSIDataset Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/__init__.py | 1 + monai/data/wsi_datasets.py | 116 +++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 monai/data/wsi_datasets.py diff --git a/monai/data/__init__.py b/monai/data/__init__.py index ca4be87ef6..8dea59ba4f 100644 --- a/monai/data/__init__.py +++ b/monai/data/__init__.py @@ -88,3 +88,4 @@ zoom_affine, ) from .wsi_reader import BaseWSIReader, CuCIMWSIReader, WSIReader +from .wsi_datasets import PatchWSIDataset diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py new file mode 100644 index 0000000000..1a03cc4ca8 --- /dev/null +++ b/monai/data/wsi_datasets.py @@ -0,0 +1,116 @@ +# Copyright (c) MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Callable, Dict, List, Optional, Tuple, Union + +import numpy as np + +from monai.data import Dataset +from monai.data.wsi_reader import WSIReader +from monai.utils import ensure_tuple_rep + +__all__ = ["PatchWSIDataset"] + + +class PatchWSIDataset(Dataset): + """ + This dataset extracts patches from whole slide images (without loading the whole image) + It also reads labels for each patch and provides each patch with its associated class labels. + + Args: + data: the list of input samples including image, location, and label (see the note below for more details). + size: the size of patch to be extracted from the whole slide image. + level: the level at which the patches to be extracted (default to 0). + transform: transforms to be executed on input data. + reader_name: the name of library to be used for loading whole slide imaging, as the backend of `monai.data.WSIReader` + Defaults to CuCIM. + + Note: + The input data has the following form as an example: + + .. code-block:: python + + [ + {"image": "path/to/image1.tiff", "location": [200, 500], "label": 0}, + {"image": "path/to/image2.tiff", "location": [100, 700], "label": 1} + ] + + """ + + def __init__( + self, + data: List, + size: Optional[Union[int, Tuple[int, int]]] = None, + level: Optional[int] = None, + transform: Optional[Callable] = None, + reader_name: str = "cuCIM", + ): + super().__init__(data, transform) + + # Ensure patch size is a two dimensional tuple + if size is None: + self.size = None + else: + self.size = ensure_tuple_rep(size, 2) + + # Create a default level + self.level = level + + # Setup the WSI reader backend + self.reader_name = reader_name.lower() + self.image_reader = WSIReader(backend=self.reader_name) + + # Initialized an empty whole slide image object dict + self.wsi_object_dict: Dict = {} + + def _get_wsi_object(self, sample): + image_path = sample["image"] + if image_path not in self.wsi_object_dict: + self.wsi_object_dict[image_path] = self.image_reader.read(image_path) + return self.wsi_object_dict[image_path] + + def _empty_wsi_object_dict(self): + self.wsi_object_dict = {} + + def _get_label(self, sample): + return np.array(sample["label"], dtype=np.float32) + + def _get_location(self, sample): + size = self._get_size(sample) + return [sample["location"][i] - size[i] // 2 for i in range(len(size))] + + def _get_level(self, sample): + if self.level is None: + return sample.get("level") + return self.level + + def _get_size(self, sample): + if self.size is None: + return ensure_tuple_rep(sample.get("size"), 2) + return self.size + + def _get_data(self, sample): + if self.reader_name == "openslide": + self._empty_wsi_object_dict() + wsi_obj = self._get_wsi_object(sample) + location = self._get_location(sample) + level = self._get_level(sample) + size = self._get_size(sample) + return self.image_reader.get_data(wsi=wsi_obj, location=location, size=size, level=level) + + def __getitem__(self, index): + sample = self.data[index] + image, metadata = self._get_data(sample) + label = self._get_label(sample) + patch = {"image": image, "label": label, "metadata": metadata} + if self.transform: + patch = self.transform(patch) + return patch From d227b551153719e394c3d8802dc027053e634886 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 20 Apr 2022 17:41:29 +0000 Subject: [PATCH 02/16] Add unittests Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/test_patch_wsi_dataset_new.py | 130 ++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 tests/test_patch_wsi_dataset_new.py diff --git a/tests/test_patch_wsi_dataset_new.py b/tests/test_patch_wsi_dataset_new.py new file mode 100644 index 0000000000..4f5f4be1f7 --- /dev/null +++ b/tests/test_patch_wsi_dataset_new.py @@ -0,0 +1,130 @@ +# Copyright (c) MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest +from unittest import skipUnless + +import numpy as np +from numpy.testing import assert_array_equal +from parameterized import parameterized + +from monai.data import PatchWSIDataset +from monai.utils import optional_import +from tests.utils import download_url_or_skip_test, testing_data_config + +cucim, has_cucim = optional_import("cucim") +has_cucim = has_cucim and hasattr(cucim, "CuImage") +openslide, has_osl = optional_import("openslide") +imwrite, has_tiff = optional_import("tifffile", name="imwrite") +_, has_codec = optional_import("imagecodecs") +has_tiff = has_tiff and has_codec + +FILE_KEY = "wsi_img" +FILE_URL = testing_data_config("images", FILE_KEY, "url") +base_name, extension = os.path.basename(f"{FILE_URL}"), ".tiff" +FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + base_name + extension) + +TEST_CASE_0 = [ + { + "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], + "size": (1, 1), + "reader_name": "cuCIM", + }, + {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([1])}, +] + +TEST_CASE_0_L1 = [ + { + "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], + "size": (1, 1), + "level": 1, + "reader_name": "cuCIM", + }, + {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([1])}, +] + +TEST_CASE_0_L2 = [ + { + "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], + "size": (1, 1), + "level": 1, + "reader_name": "cuCIM", + }, + {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([1])}, +] + + +TEST_CASE_1 = [ + { + "data": [{"image": FILE_PATH, "location": [0, 0], "size": 1, "label": [1]}], + }, + {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([1])}, +] + +TEST_CASE_2 = [ + { + "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], + "size": 1, + "reader_name": "cuCIM", + }, + {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([1])}, +] + +TEST_CASE_3 = [ + { + "data": [{"image": FILE_PATH, "location": [0, 0], "label": [[[0, 1], [1, 0]]]}], + "size": 1, + "reader_name": "cuCIM", + }, + {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[0, 1], [1, 0]]])}, +] + + +@skipUnless(has_cucim or has_osl or has_tiff, "Requires cucim, openslide, or tifffile!") +def setUpModule(): # noqa: N802 + hash_type = testing_data_config("images", FILE_KEY, "hash_type") + hash_val = testing_data_config("images", FILE_KEY, "hash_val") + download_url_or_skip_test(FILE_URL, FILE_PATH, hash_type=hash_type, hash_val=hash_val) + + +class PatchWSIDatasetTests: + class Tests(unittest.TestCase): + backend = None + + @parameterized.expand( + [ + TEST_CASE_0, + TEST_CASE_0_L1, + TEST_CASE_0_L2, + TEST_CASE_1, + TEST_CASE_2, + TEST_CASE_3, + ] + ) + def test_read_patches_cucim(self, input_parameters, expected): + dataset = PatchWSIDataset(**input_parameters) + sample = dataset[0] + self.assertTupleEqual(sample["label"].shape, expected["label"].shape) + self.assertTupleEqual(sample["image"].shape, expected["image"].shape) + self.assertIsNone(assert_array_equal(sample["label"], expected["label"])) + self.assertIsNone(assert_array_equal(sample["image"], expected["image"])) + + +@skipUnless(has_cucim, "Requires cucim") +class TestCuCIM(PatchWSIDatasetTests.Tests): + @classmethod + def setUpClass(cls): + cls.backend = "cucim" + + +if __name__ == "__main__": + unittest.main() From a2d9a111aa33d0e6a0982ed220dbcd7e78917594 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 20 Apr 2022 17:43:06 +0000 Subject: [PATCH 03/16] Add docs Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- docs/source/data.rst | 46 ++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/docs/source/data.rst b/docs/source/data.rst index c968d72945..4f58e52c1a 100644 --- a/docs/source/data.rst +++ b/docs/source/data.rst @@ -152,23 +152,6 @@ PILReader .. autoclass:: PILReader :members: -Whole slide image reader ------------------------- - -BaseWSIReader -~~~~~~~~~~~~~ -.. autoclass:: BaseWSIReader - :members: - -WSIReader -~~~~~~~~~ -.. autoclass:: WSIReader - :members: - -CuCIMWSIReader -~~~~~~~~~~~~~~ -.. autoclass:: CuCIMWSIReader - :members: Image writer ------------ @@ -295,3 +278,32 @@ MetaTensor ---------- .. autoclass:: monai.data.MetaTensor :members: + + + +Whole slide image reader +------------------------ + +BaseWSIReader +~~~~~~~~~~~~~ +.. autoclass:: monai.data.BaseWSIReader + :members: + +WSIReader +~~~~~~~~~ +.. autoclass:: monai.data.WSIReader + :members: + +CuCIMWSIReader +~~~~~~~~~~~~~~ +.. autoclass:: monai.data.CuCIMWSIReader + :members: + + +Whole slide image datasets +-------------------------- + +PatchWSIDataset +~~~~~~~~~~~~~~~ +.. autoclass:: monai.data.PatchWSIDataset + :members: From f1324d1b024f721e17afba2ab81b0733f97b679a Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 20 Apr 2022 17:49:36 +0000 Subject: [PATCH 04/16] Reorder imports Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/data/__init__.py b/monai/data/__init__.py index 8dea59ba4f..09225b19a0 100644 --- a/monai/data/__init__.py +++ b/monai/data/__init__.py @@ -87,5 +87,5 @@ worker_init_fn, zoom_affine, ) -from .wsi_reader import BaseWSIReader, CuCIMWSIReader, WSIReader from .wsi_datasets import PatchWSIDataset +from .wsi_reader import BaseWSIReader, CuCIMWSIReader, WSIReader From 39a679a788c7eb69553a697f265f3c61cd8208a7 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Wed, 20 Apr 2022 18:07:40 +0000 Subject: [PATCH 05/16] formatting: Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/test_patch_wsi_dataset_new.py | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/tests/test_patch_wsi_dataset_new.py b/tests/test_patch_wsi_dataset_new.py index 4f5f4be1f7..a388aa589e 100644 --- a/tests/test_patch_wsi_dataset_new.py +++ b/tests/test_patch_wsi_dataset_new.py @@ -34,11 +34,7 @@ FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + base_name + extension) TEST_CASE_0 = [ - { - "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], - "size": (1, 1), - "reader_name": "cuCIM", - }, + {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], "size": (1, 1), "reader_name": "cuCIM"}, {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([1])}, ] @@ -64,18 +60,12 @@ TEST_CASE_1 = [ - { - "data": [{"image": FILE_PATH, "location": [0, 0], "size": 1, "label": [1]}], - }, + {"data": [{"image": FILE_PATH, "location": [0, 0], "size": 1, "label": [1]}]}, {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([1])}, ] TEST_CASE_2 = [ - { - "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], - "size": 1, - "reader_name": "cuCIM", - }, + {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], "size": 1, "reader_name": "cuCIM"}, {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([1])}, ] @@ -100,16 +90,7 @@ class PatchWSIDatasetTests: class Tests(unittest.TestCase): backend = None - @parameterized.expand( - [ - TEST_CASE_0, - TEST_CASE_0_L1, - TEST_CASE_0_L2, - TEST_CASE_1, - TEST_CASE_2, - TEST_CASE_3, - ] - ) + @parameterized.expand([TEST_CASE_0, TEST_CASE_0_L1, TEST_CASE_0_L2, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3]) def test_read_patches_cucim(self, input_parameters, expected): dataset = PatchWSIDataset(**input_parameters) sample = dataset[0] From 688ff044bd7987528d575e081335e7b5d1275c77 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Thu, 21 Apr 2022 21:45:42 +0000 Subject: [PATCH 06/16] Address comments Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_datasets.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index 1a03cc4ca8..1da4f378b0 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -71,43 +71,40 @@ def __init__( # Initialized an empty whole slide image object dict self.wsi_object_dict: Dict = {} - def _get_wsi_object(self, sample): + def _get_wsi_object(self, sample: Dict): image_path = sample["image"] if image_path not in self.wsi_object_dict: self.wsi_object_dict[image_path] = self.image_reader.read(image_path) return self.wsi_object_dict[image_path] - def _empty_wsi_object_dict(self): - self.wsi_object_dict = {} - - def _get_label(self, sample): + def _get_label(self, sample: Dict): return np.array(sample["label"], dtype=np.float32) - def _get_location(self, sample): + def _get_location(self, sample: Dict): size = self._get_size(sample) return [sample["location"][i] - size[i] // 2 for i in range(len(size))] - def _get_level(self, sample): + def _get_level(self, sample: Dict): if self.level is None: return sample.get("level") return self.level - def _get_size(self, sample): + def _get_size(self, sample: Dict): if self.size is None: return ensure_tuple_rep(sample.get("size"), 2) return self.size - def _get_data(self, sample): + def _get_data(self, sample: Dict): if self.reader_name == "openslide": - self._empty_wsi_object_dict() + self.wsi_object_dict = {} wsi_obj = self._get_wsi_object(sample) location = self._get_location(sample) level = self._get_level(sample) size = self._get_size(sample) return self.image_reader.get_data(wsi=wsi_obj, location=location, size=size, level=level) - def __getitem__(self, index): - sample = self.data[index] + def __getitem__(self, index: int): + sample: Dict = self.data[index] image, metadata = self._get_data(sample) label = self._get_label(sample) patch = {"image": image, "label": label, "metadata": metadata} From 4092159e83a104dd958bc894dc74d3eeb9292362 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Fri, 22 Apr 2022 02:59:10 +0000 Subject: [PATCH 07/16] Update to be compatible with Dataset Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_datasets.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index 1da4f378b0..ef9dd9b9d0 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -15,6 +15,7 @@ from monai.data import Dataset from monai.data.wsi_reader import WSIReader +from monai.transforms import apply_transform from monai.utils import ensure_tuple_rep __all__ = ["PatchWSIDataset"] @@ -103,11 +104,14 @@ def _get_data(self, sample: Dict): size = self._get_size(sample) return self.image_reader.get_data(wsi=wsi_obj, location=location, size=size, level=level) - def __getitem__(self, index: int): + def _transform(self, index: int): + # Get a single entry of data sample: Dict = self.data[index] + # Extract patch image and associated metadata image, metadata = self._get_data(sample) + # Get the label label = self._get_label(sample) + + # Create put all patch information together and apply transforms patch = {"image": image, "label": label, "metadata": metadata} - if self.transform: - patch = self.transform(patch) - return patch + return apply_transform(self.transform, patch) if self.transform else patch From d9d7b82f10b6d24c865cd5f617eaef0ded254fa3 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Fri, 22 Apr 2022 03:56:58 +0000 Subject: [PATCH 08/16] Update reader to accept str, class, object Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_datasets.py | 40 ++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index ef9dd9b9d0..1d90f4d8d5 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -9,12 +9,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import inspect from typing import Callable, Dict, List, Optional, Tuple, Union import numpy as np from monai.data import Dataset -from monai.data.wsi_reader import WSIReader +from monai.data.wsi_reader import BaseWSIReader, WSIReader from monai.transforms import apply_transform from monai.utils import ensure_tuple_rep @@ -31,8 +32,10 @@ class PatchWSIDataset(Dataset): size: the size of patch to be extracted from the whole slide image. level: the level at which the patches to be extracted (default to 0). transform: transforms to be executed on input data. - reader_name: the name of library to be used for loading whole slide imaging, as the backend of `monai.data.WSIReader` - Defaults to CuCIM. + reader: the module to be used for loading whole slide imaging, + - if `reader` is a string, it defines the backend of `monai.data.WSIReader`. Defaults to cuCIM. + - if `reader` is a class (inherited from `BaseWSIReader`), it is initialized and set as wsi_reader. + - if `reader` is an instance of a a class inherited from `BaseWSIReader`, it is set as the wsi_reader. Note: The input data has the following form as an example: @@ -52,7 +55,8 @@ def __init__( size: Optional[Union[int, Tuple[int, int]]] = None, level: Optional[int] = None, transform: Optional[Callable] = None, - reader_name: str = "cuCIM", + reader="cuCIM", + **kwargs, ): super().__init__(data, transform) @@ -62,12 +66,23 @@ def __init__( else: self.size = ensure_tuple_rep(size, 2) - # Create a default level + # Create a default level that override all levels if it is not None self.level = level - - # Setup the WSI reader backend - self.reader_name = reader_name.lower() - self.image_reader = WSIReader(backend=self.reader_name) + # Set the default WSIReader's level to 0 if level is not provided + if level is None: + level = 0 + + # Setup the WSI reader + self.wsi_reader: Union[WSIReader, BaseWSIReader] + if isinstance(reader, str): + self.reader_name = reader.lower() + self.wsi_reader = WSIReader(backend=self.reader_name, level=level, **kwargs) + elif inspect.isclass(reader) and issubclass(reader, BaseWSIReader): + self.wsi_reader = reader(level=level, **kwargs) + elif isinstance(reader, BaseWSIReader): + self.wsi_reader = reader + else: + raise ValueError(f"unsupported reader type: {reader}.") # Initialized an empty whole slide image object dict self.wsi_object_dict: Dict = {} @@ -75,7 +90,7 @@ def __init__( def _get_wsi_object(self, sample: Dict): image_path = sample["image"] if image_path not in self.wsi_object_dict: - self.wsi_object_dict[image_path] = self.image_reader.read(image_path) + self.wsi_object_dict[image_path] = self.wsi_reader.read(image_path) return self.wsi_object_dict[image_path] def _get_label(self, sample: Dict): @@ -87,7 +102,7 @@ def _get_location(self, sample: Dict): def _get_level(self, sample: Dict): if self.level is None: - return sample.get("level") + return sample.get("level", 0) return self.level def _get_size(self, sample: Dict): @@ -97,12 +112,13 @@ def _get_size(self, sample: Dict): def _get_data(self, sample: Dict): if self.reader_name == "openslide": + # to avoid OpenSlide internal cache self.wsi_object_dict = {} wsi_obj = self._get_wsi_object(sample) location = self._get_location(sample) level = self._get_level(sample) size = self._get_size(sample) - return self.image_reader.get_data(wsi=wsi_obj, location=location, size=size, level=level) + return self.wsi_reader.get_data(wsi=wsi_obj, location=location, size=size, level=level) def _transform(self, index: int): # Get a single entry of data From 31d722ca13ddb8782dcd2cbd373d4a79c0e285d0 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Fri, 22 Apr 2022 03:57:24 +0000 Subject: [PATCH 09/16] Add test cases for various reader and level arguments Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/test_patch_wsi_dataset_new.py | 48 +++++++++++++++++++---------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/tests/test_patch_wsi_dataset_new.py b/tests/test_patch_wsi_dataset_new.py index a388aa589e..1f8adb9f5b 100644 --- a/tests/test_patch_wsi_dataset_new.py +++ b/tests/test_patch_wsi_dataset_new.py @@ -18,6 +18,7 @@ from parameterized import parameterized from monai.data import PatchWSIDataset +from monai.data.wsi_reader import CuCIMWSIReader from monai.utils import optional_import from tests.utils import download_url_or_skip_test, testing_data_config @@ -34,27 +35,17 @@ FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + base_name + extension) TEST_CASE_0 = [ - {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], "size": (1, 1), "reader_name": "cuCIM"}, + {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1], "level": 0}], "size": (1, 1), "reader": "cuCIM"}, {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([1])}, ] TEST_CASE_0_L1 = [ - { - "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], - "size": (1, 1), - "level": 1, - "reader_name": "cuCIM", - }, + {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], "size": (1, 1), "level": 1, "reader": "cuCIM"}, {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([1])}, ] TEST_CASE_0_L2 = [ - { - "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], - "size": (1, 1), - "level": 1, - "reader_name": "cuCIM", - }, + {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], "size": (1, 1), "level": 1, "reader": "cuCIM"}, {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([1])}, ] @@ -65,15 +56,29 @@ ] TEST_CASE_2 = [ - {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], "size": 1, "reader_name": "cuCIM"}, + {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], "size": 1, "level": 0, "reader": "cuCIM"}, {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([1])}, ] TEST_CASE_3 = [ + {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [[[0, 1], [1, 0]]]}], "size": 1, "reader": "cuCIM"}, + {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[0, 1], [1, 0]]])}, +] + +TEST_CASE_4 = [ + { + "data": [{"image": FILE_PATH, "location": [0, 0], "label": [[[0, 1], [1, 0]]]}], + "size": 1, + "reader": CuCIMWSIReader, + }, + {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[0, 1], [1, 0]]])}, +] + +TEST_CASE_5 = [ { "data": [{"image": FILE_PATH, "location": [0, 0], "label": [[[0, 1], [1, 0]]]}], "size": 1, - "reader_name": "cuCIM", + "reader": CuCIMWSIReader(level=0), }, {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[0, 1], [1, 0]]])}, ] @@ -90,7 +95,18 @@ class PatchWSIDatasetTests: class Tests(unittest.TestCase): backend = None - @parameterized.expand([TEST_CASE_0, TEST_CASE_0_L1, TEST_CASE_0_L2, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3]) + @parameterized.expand( + [ + TEST_CASE_0, + TEST_CASE_0_L1, + TEST_CASE_0_L2, + TEST_CASE_1, + TEST_CASE_2, + TEST_CASE_3, + TEST_CASE_4, + TEST_CASE_5, + ] + ) def test_read_patches_cucim(self, input_parameters, expected): dataset = PatchWSIDataset(**input_parameters) sample = dataset[0] From 15a9b28564f06221848903885c2bba14f07281ec Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Fri, 22 Apr 2022 03:59:21 +0000 Subject: [PATCH 10/16] Update comment about OpenSlide cache Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index 1d90f4d8d5..68672a1c3b 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -111,8 +111,8 @@ def _get_size(self, sample: Dict): return self.size def _get_data(self, sample: Dict): + # Don't store OpenSlide objects to avoid issues with OpenSlide internal cache if self.reader_name == "openslide": - # to avoid OpenSlide internal cache self.wsi_object_dict = {} wsi_obj = self._get_wsi_object(sample) location = self._get_location(sample) From 48dc0a3866a6f9e6ea59c94b52d6a4d18ecd09ee Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Fri, 22 Apr 2022 04:05:53 +0000 Subject: [PATCH 11/16] Rename reader_name to backend Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_datasets.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index 68672a1c3b..e723286b24 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -74,15 +74,16 @@ def __init__( # Setup the WSI reader self.wsi_reader: Union[WSIReader, BaseWSIReader] + self.backend = "" if isinstance(reader, str): - self.reader_name = reader.lower() - self.wsi_reader = WSIReader(backend=self.reader_name, level=level, **kwargs) + self.backend = reader.lower() + self.wsi_reader = WSIReader(backend=self.backend, level=level, **kwargs) elif inspect.isclass(reader) and issubclass(reader, BaseWSIReader): self.wsi_reader = reader(level=level, **kwargs) elif isinstance(reader, BaseWSIReader): self.wsi_reader = reader else: - raise ValueError(f"unsupported reader type: {reader}.") + raise ValueError(f"Unsupported reader type: {reader}.") # Initialized an empty whole slide image object dict self.wsi_object_dict: Dict = {} @@ -112,7 +113,7 @@ def _get_size(self, sample: Dict): def _get_data(self, sample: Dict): # Don't store OpenSlide objects to avoid issues with OpenSlide internal cache - if self.reader_name == "openslide": + if self.backend == "openslide": self.wsi_object_dict = {} wsi_obj = self._get_wsi_object(sample) location = self._get_location(sample) From ab3472c1bc7642f6bd9a5183277bf68478226578 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Fri, 22 Apr 2022 12:31:38 +0000 Subject: [PATCH 12/16] Add new test cases Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/test_patch_wsi_dataset_new.py | 86 +++++++++++++++++++---------- 1 file changed, 56 insertions(+), 30 deletions(-) diff --git a/tests/test_patch_wsi_dataset_new.py b/tests/test_patch_wsi_dataset_new.py index 1f8adb9f5b..d994c9f93e 100644 --- a/tests/test_patch_wsi_dataset_new.py +++ b/tests/test_patch_wsi_dataset_new.py @@ -18,7 +18,7 @@ from parameterized import parameterized from monai.data import PatchWSIDataset -from monai.data.wsi_reader import CuCIMWSIReader +from monai.data.wsi_reader import CuCIMWSIReader, OpenSlideWSIReader from monai.utils import optional_import from tests.utils import download_url_or_skip_test, testing_data_config @@ -35,51 +35,31 @@ FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + base_name + extension) TEST_CASE_0 = [ - {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1], "level": 0}], "size": (1, 1), "reader": "cuCIM"}, + {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1], "level": 0}], "size": (1, 1)}, {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([1])}, ] TEST_CASE_0_L1 = [ - {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], "size": (1, 1), "level": 1, "reader": "cuCIM"}, + {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], "size": (1, 1), "level": 1}, {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([1])}, ] TEST_CASE_0_L2 = [ - {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], "size": (1, 1), "level": 1, "reader": "cuCIM"}, + {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], "size": (1, 1), "level": 1}, {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([1])}, ] - - TEST_CASE_1 = [ {"data": [{"image": FILE_PATH, "location": [0, 0], "size": 1, "label": [1]}]}, {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([1])}, ] TEST_CASE_2 = [ - {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], "size": 1, "level": 0, "reader": "cuCIM"}, + {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], "size": 1, "level": 0}, {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([1])}, ] TEST_CASE_3 = [ - {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [[[0, 1], [1, 0]]]}], "size": 1, "reader": "cuCIM"}, - {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[0, 1], [1, 0]]])}, -] - -TEST_CASE_4 = [ - { - "data": [{"image": FILE_PATH, "location": [0, 0], "label": [[[0, 1], [1, 0]]]}], - "size": 1, - "reader": CuCIMWSIReader, - }, - {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[0, 1], [1, 0]]])}, -] - -TEST_CASE_5 = [ - { - "data": [{"image": FILE_PATH, "location": [0, 0], "label": [[[0, 1], [1, 0]]]}], - "size": 1, - "reader": CuCIMWSIReader(level=0), - }, + {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [[[0, 1], [1, 0]]]}], "size": 1}, {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[0, 1], [1, 0]]])}, ] @@ -103,12 +83,58 @@ class Tests(unittest.TestCase): TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, - TEST_CASE_4, - TEST_CASE_5, ] ) - def test_read_patches_cucim(self, input_parameters, expected): - dataset = PatchWSIDataset(**input_parameters) + def test_read_patches_str(self, input_parameters, expected): + dataset = PatchWSIDataset(reader=self.backend, **input_parameters) + sample = dataset[0] + self.assertTupleEqual(sample["label"].shape, expected["label"].shape) + self.assertTupleEqual(sample["image"].shape, expected["image"].shape) + self.assertIsNone(assert_array_equal(sample["label"], expected["label"])) + self.assertIsNone(assert_array_equal(sample["image"], expected["image"])) + + @parameterized.expand( + [ + TEST_CASE_0, + TEST_CASE_0_L1, + TEST_CASE_0_L2, + TEST_CASE_1, + TEST_CASE_2, + TEST_CASE_3, + ] + ) + def test_read_patches_class(self, input_parameters, expected): + if self.backend == "openslide": + reader = OpenSlideWSIReader + elif self.backend == "cucim": + reader = CuCIMWSIReader + else: + raise ValueError("Unsupported backend: {self.backend}") + dataset = PatchWSIDataset(reader=reader, **input_parameters) + sample = dataset[0] + self.assertTupleEqual(sample["label"].shape, expected["label"].shape) + self.assertTupleEqual(sample["image"].shape, expected["image"].shape) + self.assertIsNone(assert_array_equal(sample["label"], expected["label"])) + self.assertIsNone(assert_array_equal(sample["image"], expected["image"])) + + @parameterized.expand( + [ + TEST_CASE_0, + TEST_CASE_0_L1, + TEST_CASE_0_L2, + TEST_CASE_1, + TEST_CASE_2, + TEST_CASE_3, + ] + ) + def test_read_patches_object(self, input_parameters, expected): + if self.backend == "openslide": + reader = OpenSlideWSIReader(level=input_parameters.get("level", 0)) + elif self.backend == "cucim": + reader = CuCIMWSIReader(level=input_parameters.get("level", 0)) + else: + raise ValueError("Unsupported backend: {self.backend}") + dataset = PatchWSIDataset(reader=reader, **input_parameters) sample = dataset[0] self.assertTupleEqual(sample["label"].shape, expected["label"].shape) self.assertTupleEqual(sample["image"].shape, expected["image"].shape) From 10ee84f83ebe1e562c0c765e600d27c0a95bc340 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Fri, 22 Apr 2022 12:35:32 +0000 Subject: [PATCH 13/16] Add unittests for openslide Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/test_patch_wsi_dataset_new.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_patch_wsi_dataset_new.py b/tests/test_patch_wsi_dataset_new.py index d994c9f93e..5107a24b18 100644 --- a/tests/test_patch_wsi_dataset_new.py +++ b/tests/test_patch_wsi_dataset_new.py @@ -143,11 +143,18 @@ def test_read_patches_object(self, input_parameters, expected): @skipUnless(has_cucim, "Requires cucim") -class TestCuCIM(PatchWSIDatasetTests.Tests): +class TestPatchWSIDatasetCuCIM(PatchWSIDatasetTests.Tests): @classmethod def setUpClass(cls): cls.backend = "cucim" +@skipUnless(has_osl, "Requires cucim") +class TestPatchWSIDatasetOpenSlide(PatchWSIDatasetTests.Tests): + @classmethod + def setUpClass(cls): + cls.backend = "openslide" + + if __name__ == "__main__": unittest.main() From bab87beae061137fb689cfcc34234d1cbb07d3e2 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Fri, 22 Apr 2022 13:04:36 +0000 Subject: [PATCH 14/16] Add new test cases Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/test_patch_wsi_dataset_new.py | 74 +++++++++++++++++------------ 1 file changed, 43 insertions(+), 31 deletions(-) diff --git a/tests/test_patch_wsi_dataset_new.py b/tests/test_patch_wsi_dataset_new.py index 5107a24b18..81ad0ba1e9 100644 --- a/tests/test_patch_wsi_dataset_new.py +++ b/tests/test_patch_wsi_dataset_new.py @@ -59,10 +59,40 @@ ] TEST_CASE_3 = [ - {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [[[0, 1], [1, 0]]]}], "size": 1}, + { + "data": [{"image": FILE_PATH, "location": [0, 0], "label": [[[0, 1], [1, 0]]]}], + "size": 1, + }, {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[0, 1], [1, 0]]])}, ] +TEST_CASE_4 = [ + { + "data": [ + {"image": FILE_PATH, "location": [0, 0], "label": [[[0, 1], [1, 0]]]}, + {"image": FILE_PATH, "location": [0, 0], "label": [[[1, 0], [0, 0]]]}, + ], + "size": 1, + }, + [ + {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[0, 1], [1, 0]]])}, + {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1, 0], [0, 0]]])}, + ], +] + +TEST_CASE_5 = [ + { + "data": [ + {"image": FILE_PATH, "location": [0, 0], "label": [[[0, 1], [1, 0]]], "size": 1, "level": 1}, + {"image": FILE_PATH, "location": [100, 100], "label": [[[1, 0], [0, 0]]], "size": 1, "level": 1}, + ], + }, + [ + {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[0, 1], [1, 0]]])}, + {"image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8), "label": np.array([[[1, 0], [0, 0]]])}, + ], +] + @skipUnless(has_cucim or has_osl or has_tiff, "Requires cucim, openslide, or tifffile!") def setUpModule(): # noqa: N802 @@ -75,16 +105,7 @@ class PatchWSIDatasetTests: class Tests(unittest.TestCase): backend = None - @parameterized.expand( - [ - TEST_CASE_0, - TEST_CASE_0_L1, - TEST_CASE_0_L2, - TEST_CASE_1, - TEST_CASE_2, - TEST_CASE_3, - ] - ) + @parameterized.expand([TEST_CASE_0, TEST_CASE_0_L1, TEST_CASE_0_L2, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3]) def test_read_patches_str(self, input_parameters, expected): dataset = PatchWSIDataset(reader=self.backend, **input_parameters) sample = dataset[0] @@ -93,16 +114,7 @@ def test_read_patches_str(self, input_parameters, expected): self.assertIsNone(assert_array_equal(sample["label"], expected["label"])) self.assertIsNone(assert_array_equal(sample["image"], expected["image"])) - @parameterized.expand( - [ - TEST_CASE_0, - TEST_CASE_0_L1, - TEST_CASE_0_L2, - TEST_CASE_1, - TEST_CASE_2, - TEST_CASE_3, - ] - ) + @parameterized.expand([TEST_CASE_0, TEST_CASE_0_L1, TEST_CASE_0_L2, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3]) def test_read_patches_class(self, input_parameters, expected): if self.backend == "openslide": reader = OpenSlideWSIReader @@ -117,16 +129,7 @@ def test_read_patches_class(self, input_parameters, expected): self.assertIsNone(assert_array_equal(sample["label"], expected["label"])) self.assertIsNone(assert_array_equal(sample["image"], expected["image"])) - @parameterized.expand( - [ - TEST_CASE_0, - TEST_CASE_0_L1, - TEST_CASE_0_L2, - TEST_CASE_1, - TEST_CASE_2, - TEST_CASE_3, - ] - ) + @parameterized.expand([TEST_CASE_0, TEST_CASE_0_L1, TEST_CASE_0_L2, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3]) def test_read_patches_object(self, input_parameters, expected): if self.backend == "openslide": reader = OpenSlideWSIReader(level=input_parameters.get("level", 0)) @@ -141,6 +144,15 @@ def test_read_patches_object(self, input_parameters, expected): self.assertIsNone(assert_array_equal(sample["label"], expected["label"])) self.assertIsNone(assert_array_equal(sample["image"], expected["image"])) + @parameterized.expand([TEST_CASE_4, TEST_CASE_5]) + def test_read_patches_str_multi(self, input_parameters, expected): + dataset = PatchWSIDataset(reader=self.backend, **input_parameters) + for i in range(len(dataset)): + self.assertTupleEqual(dataset[i]["label"].shape, expected[i]["label"].shape) + self.assertTupleEqual(dataset[i]["image"].shape, expected[i]["image"].shape) + self.assertIsNone(assert_array_equal(dataset[i]["label"], expected[i]["label"])) + self.assertIsNone(assert_array_equal(dataset[i]["image"], expected[i]["image"])) + @skipUnless(has_cucim, "Requires cucim") class TestPatchWSIDatasetCuCIM(PatchWSIDatasetTests.Tests): From c725103c22475f7341249ed3e1af4ce300674e95 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Fri, 22 Apr 2022 13:17:35 +0000 Subject: [PATCH 15/16] sorts Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- tests/test_patch_wsi_dataset_new.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/test_patch_wsi_dataset_new.py b/tests/test_patch_wsi_dataset_new.py index 81ad0ba1e9..0be30536de 100644 --- a/tests/test_patch_wsi_dataset_new.py +++ b/tests/test_patch_wsi_dataset_new.py @@ -59,10 +59,7 @@ ] TEST_CASE_3 = [ - { - "data": [{"image": FILE_PATH, "location": [0, 0], "label": [[[0, 1], [1, 0]]]}], - "size": 1, - }, + {"data": [{"image": FILE_PATH, "location": [0, 0], "label": [[[0, 1], [1, 0]]]}], "size": 1}, {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[0, 1], [1, 0]]])}, ] @@ -85,7 +82,7 @@ "data": [ {"image": FILE_PATH, "location": [0, 0], "label": [[[0, 1], [1, 0]]], "size": 1, "level": 1}, {"image": FILE_PATH, "location": [100, 100], "label": [[[1, 0], [0, 0]]], "size": 1, "level": 1}, - ], + ] }, [ {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[0, 1], [1, 0]]])}, From 4838ec1c17c6e6c496f494abdb5e3fe0759a2f06 Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Fri, 22 Apr 2022 13:48:22 +0000 Subject: [PATCH 16/16] Add docstring for kwargs Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/data/wsi_datasets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/monai/data/wsi_datasets.py b/monai/data/wsi_datasets.py index e723286b24..a895e8aa45 100644 --- a/monai/data/wsi_datasets.py +++ b/monai/data/wsi_datasets.py @@ -36,6 +36,7 @@ class PatchWSIDataset(Dataset): - if `reader` is a string, it defines the backend of `monai.data.WSIReader`. Defaults to cuCIM. - if `reader` is a class (inherited from `BaseWSIReader`), it is initialized and set as wsi_reader. - if `reader` is an instance of a a class inherited from `BaseWSIReader`, it is set as the wsi_reader. + kwargs: additional arguments to pass to `WSIReader` or provided whole slide reader class Note: The input data has the following form as an example: