diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..bc8460392 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +presentations/** linguist-documentation \ No newline at end of file diff --git a/asv.conf.json b/asv.conf.json index e5d94ee7b..4d220fc5c 100644 --- a/asv.conf.json +++ b/asv.conf.json @@ -7,7 +7,7 @@ "dvcs": "git", "environment_type": "virtualenv", "install_command": [ - "in-dir={env_dir} python -mpip install {build_dir}[dev]" + "in-dir={env_dir} python -mpip install {build_dir}[benchmark]" ], "show_commit_url": "http://github.com/AllenCellModeling/aicsimageio/commit/", "pythons": ["3.9"], diff --git a/benchmarks/benchmark_chunk_sizes.py b/benchmarks/benchmark_chunk_sizes.py new file mode 100644 index 000000000..a97e262eb --- /dev/null +++ b/benchmarks/benchmark_chunk_sizes.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import dask.array as da +import random +from pathlib import Path + +from aicsimageio import AICSImage + +from .benchmark_image_containers import _ImageContainerTimeSuite + +############################################################################### + +# We only benchmark against local files as remote files are covered by unit tests +# and are generally slower than local but scale at a similar rate. +LOCAL_RESOURCES_DIR = ( + Path(__file__).parent.parent / "aicsimageio" / "tests" / "resources" +) + +############################################################################### + + +class ChunkSuite(_ImageContainerTimeSuite): + # This suite measures the effect that changing the default chunk dims + # has on the duration of various reads. + # We would expect that processing speed can be optimized based off of the + # dimensions of the file and what the user is trying to do with said file. + # i.e. If the user wants to normalize each channel and make a max projection + # through Z, then the default of 'ZYX' is preferred over just 'YX'. + # During this suite we not only benchmark the above example but also + # file reading under the various chunk configurations as a monitor + # for general read performance. + + params = ( + [ + str(LOCAL_RESOURCES_DIR / "pre-variance-cfe.ome.tiff"), + str(LOCAL_RESOURCES_DIR / "variance-cfe.ome.tiff"), + ], + # We don't go above chunking by three dims because it would be rare + # to do so... if you can read four-plus dims in a single chunk why can't you + # just read in the whole image at once. + # We also use CYX here to show that chunking with the _wrong_ dimensions can + # result in longer processing times. + [ + "YX", + "ZYX", + "CYX", + ], + ) + + def time_norm_and_project(self, img_path, chunk_dims): + """ + Benchmark how long a norm and project through Z takes + under various chunk dims configurations. + """ + # Init image container + r = self.ImageContainer(img_path, chunk_dims=chunk_dims) + + # Store all delayed projections + projs = [] + + # Only run a random sample of two channels instead of all + selected_channels = random.sample(r.channel_names, 2) + for i, channel_name in enumerate(r.channel_names): + if channel_name in selected_channels: + # Select each channel + data = r.get_image_dask_data("ZYX", C=i) + + # Get percentile norm by values + min_px_val, max_px_val = da.percentile( + data.flatten(), + [50.0, 99.8], + ).compute() + + # Norm + normed = (data - min_px_val) / (max_px_val - min_px_val) + + # Clip any values outside of 0 and 1 + clipped = da.clip(normed, 0, 1) + + # Scale them between 0 and 255 + scaled = clipped * 255 + + # Create max project + projs.append(scaled.max(axis=0)) + + # Compute all projections + projs = da.stack(projs) + projs.compute() + + def setup(self, img_path, chunk_dims): + random.seed(42) + self.ImageContainer = AICSImage diff --git a/benchmarks/benchmark_image_containers.py b/benchmarks/benchmark_image_containers.py index 976e86ab3..c78a3aebe 100644 --- a/benchmarks/benchmark_image_containers.py +++ b/benchmarks/benchmark_image_containers.py @@ -5,7 +5,7 @@ from pathlib import Path from aicsimageio import AICSImage, readers -from aicsimageio.dimensions import DimensionNames +from aicsimageio.dimensions import DEFAULT_CHUNK_DIMS, DimensionNames ############################################################################### @@ -61,25 +61,34 @@ class _ImageContainerTimeSuite: DimensionNames.Samples, ] - def time_init(self, img_path): + def time_init(self, img_path, chunk_dims=None): """ Benchmark how long it takes to validate a file and finish general setup. """ - self.ImageContainer(img_path) + if chunk_dims is None: + chunk_dims = DEFAULT_CHUNK_DIMS - def time_delayed_array_construct(self, img_path): + self.ImageContainer(img_path, chunk_dims=chunk_dims) + + def time_delayed_array_construct(self, img_path, chunk_dims=None): """ Benchmark how long it takes to construct the delayed dask array for a file. """ - self.ImageContainer(img_path).dask_data + if chunk_dims is None: + chunk_dims = DEFAULT_CHUNK_DIMS + + self.ImageContainer(img_path, chunk_dims=chunk_dims).dask_data - def time_random_single_chunk_read(self, img_path): + def time_random_single_chunk_read(self, img_path, chunk_dims=None): """ Benchmark how long it takes to read a single chunk out of a file. I.E. "Pull just the Brightfield channel z-stack. """ - r = self.ImageContainer(img_path) + if chunk_dims is None: + chunk_dims = DEFAULT_CHUNK_DIMS + + r = self.ImageContainer(img_path, chunk_dims=chunk_dims) random_index_selections = {} for dim, size in zip(r.dims.order, r.dims.shape): @@ -91,13 +100,16 @@ def time_random_single_chunk_read(self, img_path): ) r.get_image_dask_data(valid_dims_to_return, **random_index_selections).compute() - def time_random_many_chunk_read(self, img_path): + def time_random_many_chunk_read(self, img_path, chunk_dims=None): """ Open a file, get many chunks out of the file at once. I.E. "Pull the DNA and Nucleus channel z-stacks, for the middle 50% timepoints". """ - r = self.ImageContainer(img_path) + if chunk_dims is None: + chunk_dims = DEFAULT_CHUNK_DIMS + + r = self.ImageContainer(img_path, chunk_dims=chunk_dims) random_index_selections = {} for dim, size in zip(r.dims.order, r.dims.shape): @@ -133,27 +145,37 @@ class DefaultReaderSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite): def setup(self, img_path): random.seed(42) - self.ImageContainer = readers.DefaultReader + self.ImageContainer = readers.default_reader.DefaultReader class TiffReaderSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite): params = [ - sorted([str(f) for f in LOCAL_RESOURCES_DIR.glob("*.tiff")]), + [ + str( + LOCAL_RESOURCES_DIR + / "image_stack_tpzc_50tp_2p_5z_3c_512k_1_MMStack_2-Pos001_000.ome.tif" + ), + str(LOCAL_RESOURCES_DIR / "variance-cfe.ome.tiff"), + ] ] def setup(self, img_path): random.seed(42) - self.ImageContainer = readers.TiffReader + self.ImageContainer = readers.tiff_reader.TiffReader class OmeTiffReaderSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite): params = [ - sorted([str(f) for f in LOCAL_RESOURCES_DIR.glob("*.ome.tiff")]), + [ + str(LOCAL_RESOURCES_DIR / "actk.ome.tiff"), + str(LOCAL_RESOURCES_DIR / "pre-variance-cfe.ome.tiff"), + str(LOCAL_RESOURCES_DIR / "variance-cfe.ome.tiff"), + ] ] def setup(self, img_path): random.seed(42) - self.ImageContainer = readers.OmeTiffReader + self.ImageContainer = readers.ome_tiff_reader.OmeTiffReader class LifReaderSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite): @@ -163,7 +185,7 @@ class LifReaderSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite): def setup(self, img_path): random.seed(42) - self.ImageContainer = readers.LifReader + self.ImageContainer = readers.lif_reader.LifReader class AICSImageSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite): diff --git a/benchmarks/benchmark_lib.py b/benchmarks/benchmark_lib.py index 78f063f11..16543c720 100644 --- a/benchmarks/benchmark_lib.py +++ b/benchmarks/benchmark_lib.py @@ -2,13 +2,48 @@ # -*- coding: utf-8 -*- """ -Benchmarks for general library operations. +Benchmarks for general library operations and comparisons against other libraries. """ +from functools import partial -class LibSuite: +from aicsimageio import imread_dask as aicsimageio_imread +from dask_image.imread import imread as dask_image_imread + +from .benchmark_image_containers import LOCAL_RESOURCES_DIR + +############################################################################### + +ACTK_OME_TIFF = str(LOCAL_RESOURCES_DIR / "actk.ome.tiff") + +############################################################################### + + +class LibInitSuite: def time_base_import(self): """ Benchmark how long it takes to import the library as a whole. """ import aicsimageio # noqa: F401 + + +class LibCompareSuite: + """ + Compare aicsimageio against other "just-in-time" image reading libs. + """ + + FUNC_LOOKUP = { + "aicsimageio-default-chunks": partial(aicsimageio_imread, chunk_dims="ZYX"), + "aicsimageio-plane-chunks": partial(aicsimageio_imread, chunk_dims="YX"), + "dask-image-imread-default": dask_image_imread, + } + + params = [ + "aicsimageio-default-chunks", + "aicsimageio-plane-chunks", + "dask-image-imread-default", + ] + + def time_lib_config(self, func_name): + func = self.FUNC_LOOKUP[func_name] + func(ACTK_OME_TIFF).compute() diff --git a/setup.py b/setup.py index 87e7d67bc..4c4008393 100644 --- a/setup.py +++ b/setup.py @@ -53,6 +53,11 @@ "wheel>=0.34.2", ] +benchmark_requirements = [ + *dev_requirements, + "dask-image~=0.6.0", +] + requirements = [ "dask[array]>=2021.4.1", "fsspec>=2021.4.0", @@ -70,6 +75,7 @@ "setup": setup_requirements, "test": test_requirements, "dev": dev_requirements, + "benchmark": benchmark_requirements, **format_libs, "all": all_formats, }