Skip to content

Commit

Permalink
Merge pull request #88 from blowekamp/quantiles_parameters
Browse files Browse the repository at this point in the history
Quantiles parameters
  • Loading branch information
blowekamp authored Nov 14, 2023
2 parents 926f45d + 935afa5 commit 8390daf
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 51 deletions.
171 changes: 121 additions & 50 deletions pytools/HedwigZarrImage.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import SimpleITK as sitk
import zarr
from typing import Tuple, Dict, List
from typing import Tuple, Dict, List, Optional
from pytools.utils import OMEInfo
import logging
import math
Expand Down Expand Up @@ -179,11 +179,11 @@ def extract_2d(
if is_vector:
img.ToScalarImage(True)

min_max = sitk.MinimumMaximum(img)
min, max = sitk.MinimumMaximum(img)

logger.debug(f"Adjusting output pixel intensity range from {min_max} -> {(0, 255)}.")
logger.debug(f"Adjusting output pixel intensity range from {min, max} -> {(0, 255)}.")

img = sitk.ShiftScale(img, -min_max[0], 255.0 / (min_max[1] - min_max[0]), sitk.sitkUInt8)
img = sitk.ShiftScale(img, -min, 255.0 / (max - min), sitk.sitkUInt8)

if is_vector:
img.ToVectorImage(True)
Expand All @@ -203,11 +203,112 @@ def shader_type(
return "Grayscale"
return "MultiChannel"

def neuroglancer_shader_parameters(self, mad_scale=3) -> dict:
def _neuroglancer_shader_parameters_multichannel(
self,
*,
mad_scale=3,
middle_quantile: Optional[Tuple[float, float]] = None,
zero_black_quantiles=True,
upper_quantile=0.9999,
) -> dict:
"""
Produces the "shaderParameters" portion of the metadata for Neuroglancer when the shader type is MultiChannel.
The output window parameters are used for the visible histogram range. The computation improves the robustness
to outliers and the background pixel values with the zero_black_quantiles option and the upper_quantile value.
:param mad_scale: The scale factor for the robust median absolute deviation (MAD) about the median to produce
the minimum and maximum range that is used to select the visible pixel intensities.
:param middle_quantile: If not None then the range is computed from the provided quantiles of the image data.
The middle_quantile is a tuple of two floats that are between 0.0 and 1.0. The first value is the lower
quantile and the second value is the upper quantile.
:param zero_black_quantiles: If True then the zero values are removed from the histogram before computing the
quantiles.
:param upper_quantile: The upper quantile to use for the "window", which is the extent of the visible histogram.
:return: The dictionary of the shader parameters suitable for JSON serialization.
"""

if middle_quantile:
assert len(middle_quantile) == 2
assert 0.0 <= middle_quantile[0] <= 1.0
assert 0.0 <= middle_quantile[1] <= 1.0
assert middle_quantile[0] < middle_quantile[1]

assert self._ome_ngff_multiscale_dims()[1] == "C"

if len(list(self.ome_info.channel_names(self.ome_idx))) != self.shape[1]:
raise RuntimeError(
f"Mismatch of number of Channels! Array has {self.shape[1]} but there"
f"are {len(list(self.ome_info.channel_names(self.ome_idx)))} names:"
f"{list(self.ome_info.channel_names(self.ome_idx))}"
)

color_sequence = ["red", "green", "blue", "cyan", "yellow", "magenta"]

if self.shape[1] > len(color_sequence):
raise RuntimeError(
f"Too many channels! The array has {self.shape[1]} channels but"
f" only {len(color_sequence)} is supported!"
)

json_channel_array = []

for c, c_name in enumerate(self.ome_info.channel_names(self.ome_idx)):
logger.debug(f"Processing channel: {c_name}")

# replace non-alpha numeric with an underscore
name = re.sub(r"[^a-zA-Z0-9]+", "_", c_name.lower())

stats = self._image_statistics(
quantiles=[*middle_quantile, upper_quantile] if middle_quantile else [upper_quantile],
channel=c,
zero_black_quantiles=zero_black_quantiles,
)
if middle_quantile:
range = (stats["quantiles"][middle_quantile[0]], stats["quantiles"][middle_quantile[1]])
else:
range = (stats["median"] - stats["mad"] * mad_scale, stats["median"] + stats["mad"] * mad_scale)

range = (max(range[0], stats["min"]), min(range[1], stats["max"]))

json_channel_array.append(
{
"range": [math.floor(range[0]), math.ceil(range[1])],
"window": [math.floor(stats["min"]), math.ceil(stats["quantiles"][upper_quantile])],
"name": name,
"color": color_sequence[c],
"channel": c,
"clamp": False,
"enabled": True,
}
)

return {"brightness": 0.0, "contrast": 0.0, "channelArray": json_channel_array}

def neuroglancer_shader_parameters(
self, *, mad_scale=3, middle_quantile: Optional[Tuple[float, float]] = None
) -> dict:
"""
Produces the "shaderParameters" portion of the metadata for Neuroglancer
returns JSON serializable object
Produces the "shaderParameters" portion of the metadata for Neuroglancer.
Determines which shader type to use to render the image. The shader type is one of: RGB, Grayscale, or
MultiChannel. The shader parameters are computed from the full resolution Zarr image. Dask is used for parallel
reading and statistics computation. The global scheduler is used for all operations which can be changed with
standard Dask configurations.
For the MultiChannel shader type the default algorithm for the range is to compute the robust median absolute
deviation (MAD) about the median to produce the minimum and maximum range. If the middle_quantile is not None
then the range is computed from the provided quantiles of the image data.
:param mad_scale: The scale factor for the robust median absolute deviation (MAD) about the median to produce
the minimum and maximum range that is used to select the visible pixel intensities.
:param middle_quantile: If not None then the range is computed from the provided quantiles of the image data.
The middle_quantile is a tuple of two floats that are between 0.0 and 1.0. The first value is the lower
quantile and the second value is the upper quantile. The range is computed from the lower and upper quantiles.
:return: The dictionary of the shader parameters suitable for JSON serialization
"""

if self.ome_info is None:
return {}

Expand All @@ -224,47 +325,9 @@ def neuroglancer_shader_parameters(self, mad_scale=3) -> dict:
}

if _shader_type == "MultiChannel":
assert self._ome_ngff_multiscale_dims()[1] == "C"

if len(list(self.ome_info.channel_names(self.ome_idx))) != self.shape[1]:
raise RuntimeError(
f"Mismatch of number of Channels! Array has {self.shape[1]} but there"
f"are {len(list(self.ome_info.channel_names(self.ome_idx)))} names:"
f"{list(self.ome_info.channel_names(self.ome_idx))}"
)
color_sequence = ["red", "green", "blue", "cyan", "yellow", "magenta"]

if self.shape[1] > len(color_sequence):
raise RuntimeError(
f"Too many channels! The array has {self.shape[1]} channels but"
f" only {len(color_sequence)} is supported!"
)

json_channel_array = []

for c, c_name in enumerate(self.ome_info.channel_names(self.ome_idx)):
logger.debug(f"Processing channel: {c_name}")

# replace non-alpha numeric with a underscore
name = re.sub(r"[^a-zA-Z0-9]+", "_", c_name.lower())

stats = self._image_statistics(channel=c)
range = (stats["median"] - stats["mad"] * mad_scale, stats["median"] + stats["mad"] * mad_scale)
range = (max(range[0], stats["min"]), min(range[1], stats["max"]))

json_channel_array.append(
{
"range": [math.floor(range[0]), math.ceil(range[1])],
"window": [math.floor(stats["min"]), math.ceil(stats["max"])],
"name": name,
"color": color_sequence[c],
"channel": c,
"clamp": False,
"enabled": True,
}
)

return {"brightness": 0.0, "contrast": 0.0, "channelArray": json_channel_array}
return self._neuroglancer_shader_parameters_multichannel(
mad_scale=mad_scale, middle_quantile=middle_quantile
)

raise RuntimeError(f'Unknown shader type: "{self.shader_type}"')

Expand Down Expand Up @@ -314,11 +377,12 @@ def _chunk_logic_dim(drequest: int, dshape: int) -> int:
return drequest
return dshape

def _image_statistics(self, channel=None) -> Dict[str, List[int]]:
def _image_statistics(self, quantiles=None, channel=None, *, zero_black_quantiles=False) -> Dict[str, List[int]]:
"""Processes the full resolution Zarr image. Dask is used for parallel reading and statistics computation. The
global scheduler is used for all operations which can be changed with standard Dask configurations.
:param channel: The index of the channel to perform calculation on
:param quantiles: values of quantiles to return in option "quantiles" element.
:returns: The resulting dictionary will contain the following data elements:
"min",
Expand All @@ -327,7 +391,8 @@ def _image_statistics(self, channel=None) -> Dict[str, List[int]]:
"mad",
"mean",
"var",
"sigma"
"sigma",
"quantiles" (if quantiles is not None)
"""

Expand Down Expand Up @@ -355,6 +420,12 @@ def _image_statistics(self, channel=None) -> Dict[str, List[int]]:
stats = histogram_robust_stats(h, bins)
stats.update(histogram_stats(h, bins))
stats["min"], stats["max"] = weighted_quantile(mids, quantiles=[0.0, 1.0], sample_weight=h, values_sorted=True)
if quantiles:
if zero_black_quantiles:
h[0] = 0

quantile_value = weighted_quantile(mids, quantiles=quantiles, sample_weight=h, values_sorted=True)
stats["quantiles"] = {q: v for q, v in zip(quantiles, quantile_value)}
logger.debug(f"stats: {stats}")

return stats
41 changes: 40 additions & 1 deletion test/test_HedwigZarrImage.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

import numpy as np
import pytest
from pytest import approx
import shutil
from pytools.HedwigZarrImages import HedwigZarrImages
from pytools.HedwigZarrImage import HedwigZarrImage
Expand Down Expand Up @@ -79,6 +80,44 @@ def test_HedwigZarrImage_info_for_czi(data_path, zarr_name, attrs):
shader_params[param_key] == len(z_img.neuroglancer_shader_parameters()[param_key])


@pytest.mark.parametrize(
"zarr_name, key",
[
("KC_M3_S2_ReducedImageSubset2.zarr", "Scene #0"),
],
)
def test_HedwigZarrImage_info_for_czi_quantiles(data_path, zarr_name, key):
"""
Test that the quantiles are computed correctly for the neuroglancer shader parameters
"""
zi = HedwigZarrImages(data_path / zarr_name)
assert zi.ome_xml_path is not None
image_names = list(zi.get_series_keys())
assert len(image_names) == 3
assert all(image_names)

z_img = zi[key]
assert z_img._ome_ngff_multiscale_dims() == "TCZYX"

shader_params = z_img._neuroglancer_shader_parameters_multichannel(zero_black_quantiles=False)

for idx, channel_params in enumerate(shader_params["channelArray"]):
param_window_min, param_window_max = channel_params["window"]
qvalues = np.quantile(z_img._ome_ngff_multiscale_get_array(0)[:, idx, ...], [0.0, 0.9999])

assert param_window_min == approx(qvalues[0], abs=1)
# not sure why this is not more accurate
assert param_window_max == approx(qvalues[1], abs=20)

shader_params = z_img.neuroglancer_shader_parameters(middle_quantile=(0.25, 0.75))

for idx, channel_params in enumerate(shader_params["channelArray"]):
param_range_min, param_range_max = channel_params["range"]
qvalues = np.quantile(z_img._ome_ngff_multiscale_get_array(0)[:, idx, ...], [0.25, 0.75])
assert param_range_min == approx(qvalues[0], abs=1)
assert param_range_max == approx(qvalues[1], abs=1)


@pytest.mark.parametrize("targetx, targety", [(300, 300), (600, 600), (1024, 1024)])
def test_hedwigimage_extract_2d(data_path, targetx, targety):
"""
Expand Down
8 changes: 8 additions & 0 deletions test/test_histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ def test_weighted_quantile():
assert pytools_hist.weighted_quantile(v, [0.5], sample_weight=h) == approx(4)
assert pytools_hist.weighted_quantile(v, [1], sample_weight=h) == approx(6)

data = list(range(100))
h, b = np.histogram(data, bins=np.arange(-0.5, 100.5))
v = 0.5 * (b[1:] + b[:-1])
print(b)
quantiles_values = list(pytools_hist.weighted_quantile(v, [0.25, 0.5, 0.75], sample_weight=h))
for value, baseline in zip(quantiles_values, [24.5, 49.5, 74.5]):
assert value == approx(baseline)


def test_histogram_stats():
data = [3]
Expand Down

0 comments on commit 8390daf

Please sign in to comment.