Merge pull request #88 from blowekamp/quantiles_parameters

Quantiles parameters
niaid · Nov 14, 2023 · 8390daf · 8390daf
2 parents 926f45d + 935afa5
commit 8390daf
Show file tree

Hide file tree

Showing 3 changed files with 169 additions and 51 deletions.
diff --git a/pytools/HedwigZarrImage.py b/pytools/HedwigZarrImage.py
@@ -15,7 +15,7 @@
 
 import SimpleITK as sitk
 import zarr
-from typing import Tuple, Dict, List
+from typing import Tuple, Dict, List, Optional
 from pytools.utils import OMEInfo
 import logging
 import math
@@ -179,11 +179,11 @@ def extract_2d(
             if is_vector:
                 img.ToScalarImage(True)
 
-            min_max = sitk.MinimumMaximum(img)
+            min, max = sitk.MinimumMaximum(img)
 
-            logger.debug(f"Adjusting output pixel intensity range from {min_max} -> {(0, 255)}.")
+            logger.debug(f"Adjusting output pixel intensity range from {min, max} -> {(0, 255)}.")
 
-            img = sitk.ShiftScale(img, -min_max[0], 255.0 / (min_max[1] - min_max[0]), sitk.sitkUInt8)
+            img = sitk.ShiftScale(img, -min, 255.0 / (max - min), sitk.sitkUInt8)
 
             if is_vector:
                 img.ToVectorImage(True)
@@ -203,11 +203,112 @@ def shader_type(
             return "Grayscale"
         return "MultiChannel"
 
-    def neuroglancer_shader_parameters(self, mad_scale=3) -> dict:
+    def _neuroglancer_shader_parameters_multichannel(
+        self,
+        *,
+        mad_scale=3,
+        middle_quantile: Optional[Tuple[float, float]] = None,
+        zero_black_quantiles=True,
+        upper_quantile=0.9999,
+    ) -> dict:
+        """
+        Produces the "shaderParameters" portion of the metadata for Neuroglancer when the shader type is MultiChannel.
+
+        The output window parameters are used for the visible histogram range. The computation improves the robustness
+         to outliers and the background pixel values with the zero_black_quantiles option and the upper_quantile value.
+
+        :param mad_scale: The scale factor for the robust median absolute deviation (MAD) about the median to produce
+            the minimum and maximum range that is used to select the visible pixel intensities.
+        :param middle_quantile: If not None then the range is computed from the  provided quantiles of the image data.
+            The middle_quantile is a tuple of two floats that are between 0.0 and 1.0. The first value is the lower
+             quantile and the second value is the upper quantile.
+        :param zero_black_quantiles: If True then the zero values are removed from the histogram before computing the
+            quantiles.
+        :param upper_quantile: The upper quantile to use for the "window", which is the extent of the visible histogram.
+        :return: The dictionary of the shader parameters suitable for JSON serialization.
+
+        """
+
+        if middle_quantile:
+            assert len(middle_quantile) == 2
+            assert 0.0 <= middle_quantile[0] <= 1.0
+            assert 0.0 <= middle_quantile[1] <= 1.0
+            assert middle_quantile[0] < middle_quantile[1]
+
+        assert self._ome_ngff_multiscale_dims()[1] == "C"
+
+        if len(list(self.ome_info.channel_names(self.ome_idx))) != self.shape[1]:
+            raise RuntimeError(
+                f"Mismatch of number of Channels! Array has {self.shape[1]} but there"
+                f"are {len(list(self.ome_info.channel_names(self.ome_idx)))} names:"
+                f"{list(self.ome_info.channel_names(self.ome_idx))}"
+            )
+
+        color_sequence = ["red", "green", "blue", "cyan", "yellow", "magenta"]
+
+        if self.shape[1] > len(color_sequence):
+            raise RuntimeError(
+                f"Too many channels! The array has {self.shape[1]} channels but"
+                f" only {len(color_sequence)} is supported!"
+            )
+
+        json_channel_array = []
+
+        for c, c_name in enumerate(self.ome_info.channel_names(self.ome_idx)):
+            logger.debug(f"Processing channel: {c_name}")
+
+            # replace non-alpha numeric with an underscore
+            name = re.sub(r"[^a-zA-Z0-9]+", "_", c_name.lower())
+
+            stats = self._image_statistics(
+                quantiles=[*middle_quantile, upper_quantile] if middle_quantile else [upper_quantile],
+                channel=c,
+                zero_black_quantiles=zero_black_quantiles,
+            )
+            if middle_quantile:
+                range = (stats["quantiles"][middle_quantile[0]], stats["quantiles"][middle_quantile[1]])
+            else:
+                range = (stats["median"] - stats["mad"] * mad_scale, stats["median"] + stats["mad"] * mad_scale)
+
+            range = (max(range[0], stats["min"]), min(range[1], stats["max"]))
+
+            json_channel_array.append(
+                {
+                    "range": [math.floor(range[0]), math.ceil(range[1])],
+                    "window": [math.floor(stats["min"]), math.ceil(stats["quantiles"][upper_quantile])],
+                    "name": name,
+                    "color": color_sequence[c],
+                    "channel": c,
+                    "clamp": False,
+                    "enabled": True,
+                }
+            )
+
+        return {"brightness": 0.0, "contrast": 0.0, "channelArray": json_channel_array}
+
+    def neuroglancer_shader_parameters(
+        self, *, mad_scale=3, middle_quantile: Optional[Tuple[float, float]] = None
+    ) -> dict:
         """
-        Produces the "shaderParameters" portion of the metadata for Neuroglancer
-        returns JSON serializable object
+        Produces the "shaderParameters" portion of the metadata for Neuroglancer.
+
+        Determines which shader type to use to render the image. The shader type is one of: RGB, Grayscale, or
+        MultiChannel. The shader parameters are computed from the full resolution Zarr image. Dask is used for parallel
+        reading and statistics computation. The global scheduler is used for all operations which can be changed with
+        standard Dask configurations.
+
+        For the MultiChannel shader type the default algorithm for the range is to compute the robust median absolute
+        deviation (MAD) about the median to produce the minimum and maximum range. If the middle_quantile is not None
+        then the range is computed from the provided quantiles of the image data.
+
+        :param mad_scale: The scale factor for the robust median absolute deviation (MAD) about the median to produce
+            the minimum and maximum range that is used to select the visible pixel intensities.
+        :param middle_quantile: If not None then the range is computed from the  provided quantiles of the image data.
+         The middle_quantile is a tuple of two floats that are between 0.0 and 1.0. The first value is the lower
+         quantile and the second value is the upper quantile. The range is computed from the lower and upper quantiles.
+        :return: The dictionary of the shader parameters suitable for JSON serialization
         """
+
         if self.ome_info is None:
             return {}
 
@@ -224,47 +325,9 @@ def neuroglancer_shader_parameters(self, mad_scale=3) -> dict:
             }
 
         if _shader_type == "MultiChannel":
-            assert self._ome_ngff_multiscale_dims()[1] == "C"
-
-            if len(list(self.ome_info.channel_names(self.ome_idx))) != self.shape[1]:
-                raise RuntimeError(
-                    f"Mismatch of number of Channels! Array has {self.shape[1]} but there"
-                    f"are {len(list(self.ome_info.channel_names(self.ome_idx)))} names:"
-                    f"{list(self.ome_info.channel_names(self.ome_idx))}"
-                )
-            color_sequence = ["red", "green", "blue", "cyan", "yellow", "magenta"]
-
-            if self.shape[1] > len(color_sequence):
-                raise RuntimeError(
-                    f"Too many channels! The array has {self.shape[1]} channels but"
-                    f" only {len(color_sequence)} is supported!"
-                )
-
-            json_channel_array = []
-
-            for c, c_name in enumerate(self.ome_info.channel_names(self.ome_idx)):
-                logger.debug(f"Processing channel: {c_name}")
-
-                # replace non-alpha numeric with a underscore
-                name = re.sub(r"[^a-zA-Z0-9]+", "_", c_name.lower())
-
-                stats = self._image_statistics(channel=c)
-                range = (stats["median"] - stats["mad"] * mad_scale, stats["median"] + stats["mad"] * mad_scale)
-                range = (max(range[0], stats["min"]), min(range[1], stats["max"]))
-
-                json_channel_array.append(
-                    {
-                        "range": [math.floor(range[0]), math.ceil(range[1])],
-                        "window": [math.floor(stats["min"]), math.ceil(stats["max"])],
-                        "name": name,
-                        "color": color_sequence[c],
-                        "channel": c,
-                        "clamp": False,
-                        "enabled": True,
-                    }
-                )
-
-            return {"brightness": 0.0, "contrast": 0.0, "channelArray": json_channel_array}
+            return self._neuroglancer_shader_parameters_multichannel(
+                mad_scale=mad_scale, middle_quantile=middle_quantile
+            )
 
         raise RuntimeError(f'Unknown shader type: "{self.shader_type}"')
 
@@ -314,11 +377,12 @@ def _chunk_logic_dim(drequest: int, dshape: int) -> int:
             return drequest
         return dshape
 
-    def _image_statistics(self, channel=None) -> Dict[str, List[int]]:
+    def _image_statistics(self, quantiles=None, channel=None, *, zero_black_quantiles=False) -> Dict[str, List[int]]:
         """Processes the full resolution Zarr image. Dask is used for parallel reading and statistics computation. The
          global scheduler is used for all operations which can be changed with standard Dask configurations.
 
         :param channel: The index of the channel to perform calculation on
+        :param quantiles: values of quantiles to return in option "quantiles" element.
 
         :returns: The resulting dictionary will contain the following data elements:
             "min",
@@ -327,7 +391,8 @@ def _image_statistics(self, channel=None) -> Dict[str, List[int]]:
             "mad",
             "mean",
             "var",
-            "sigma"
+            "sigma",
+            "quantiles" (if quantiles is not None)
 
         """
 
@@ -355,6 +420,12 @@ def _image_statistics(self, channel=None) -> Dict[str, List[int]]:
         stats = histogram_robust_stats(h, bins)
         stats.update(histogram_stats(h, bins))
         stats["min"], stats["max"] = weighted_quantile(mids, quantiles=[0.0, 1.0], sample_weight=h, values_sorted=True)
+        if quantiles:
+            if zero_black_quantiles:
+                h[0] = 0
+
+            quantile_value = weighted_quantile(mids, quantiles=quantiles, sample_weight=h, values_sorted=True)
+            stats["quantiles"] = {q: v for q, v in zip(quantiles, quantile_value)}
         logger.debug(f"stats: {stats}")
 
         return stats
diff --git a/test/test_HedwigZarrImage.py b/test/test_HedwigZarrImage.py
@@ -11,8 +11,9 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
-
+import numpy as np
 import pytest
+from pytest import approx
 import shutil
 from pytools.HedwigZarrImages import HedwigZarrImages
 from pytools.HedwigZarrImage import HedwigZarrImage
@@ -79,6 +80,44 @@ def test_HedwigZarrImage_info_for_czi(data_path, zarr_name, attrs):
             shader_params[param_key] == len(z_img.neuroglancer_shader_parameters()[param_key])
 
 
+@pytest.mark.parametrize(
+    "zarr_name, key",
+    [
+        ("KC_M3_S2_ReducedImageSubset2.zarr", "Scene #0"),
+    ],
+)
+def test_HedwigZarrImage_info_for_czi_quantiles(data_path, zarr_name, key):
+    """
+    Test that the quantiles are computed correctly for the neuroglancer shader parameters
+    """
+    zi = HedwigZarrImages(data_path / zarr_name)
+    assert zi.ome_xml_path is not None
+    image_names = list(zi.get_series_keys())
+    assert len(image_names) == 3
+    assert all(image_names)
+
+    z_img = zi[key]
+    assert z_img._ome_ngff_multiscale_dims() == "TCZYX"
+
+    shader_params = z_img._neuroglancer_shader_parameters_multichannel(zero_black_quantiles=False)
+
+    for idx, channel_params in enumerate(shader_params["channelArray"]):
+        param_window_min, param_window_max = channel_params["window"]
+        qvalues = np.quantile(z_img._ome_ngff_multiscale_get_array(0)[:, idx, ...], [0.0, 0.9999])
+
+        assert param_window_min == approx(qvalues[0], abs=1)
+        # not sure why this is not more accurate
+        assert param_window_max == approx(qvalues[1], abs=20)
+
+    shader_params = z_img.neuroglancer_shader_parameters(middle_quantile=(0.25, 0.75))
+
+    for idx, channel_params in enumerate(shader_params["channelArray"]):
+        param_range_min, param_range_max = channel_params["range"]
+        qvalues = np.quantile(z_img._ome_ngff_multiscale_get_array(0)[:, idx, ...], [0.25, 0.75])
+        assert param_range_min == approx(qvalues[0], abs=1)
+        assert param_range_max == approx(qvalues[1], abs=1)
+
+
 @pytest.mark.parametrize("targetx, targety", [(300, 300), (600, 600), (1024, 1024)])
 def test_hedwigimage_extract_2d(data_path, targetx, targety):
     """

diff --git a/test/test_histogram.py b/test/test_histogram.py
@@ -38,6 +38,14 @@ def test_weighted_quantile():
     assert pytools_hist.weighted_quantile(v, [0.5], sample_weight=h) == approx(4)
     assert pytools_hist.weighted_quantile(v, [1], sample_weight=h) == approx(6)
 
+    data = list(range(100))
+    h, b = np.histogram(data, bins=np.arange(-0.5, 100.5))
+    v = 0.5 * (b[1:] + b[:-1])
+    print(b)
+    quantiles_values = list(pytools_hist.weighted_quantile(v, [0.25, 0.5, 0.75], sample_weight=h))
+    for value, baseline in zip(quantiles_values, [24.5, 49.5, 74.5]):
+        assert value == approx(baseline)
+
 
 def test_histogram_stats():
     data = [3]