Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

admin/4.0-release-prep-and-benchmark-upgrades #244

Merged
merged 13 commits into from
May 30, 2021
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
presentations/** linguist-documentation
2 changes: 1 addition & 1 deletion asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"dvcs": "git",
"environment_type": "virtualenv",
"install_command": [
"in-dir={env_dir} python -mpip install {build_dir}[dev]"
"in-dir={env_dir} python -mpip install {build_dir}[benchmark]"
],
"show_commit_url": "http://github.com/AllenCellModeling/aicsimageio/commit/",
"pythons": ["3.9"],
Expand Down
93 changes: 93 additions & 0 deletions benchmarks/benchmark_chunk_sizes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import dask.array as da
import random
from pathlib import Path

from aicsimageio import AICSImage

from .benchmark_image_containers import _ImageContainerTimeSuite

###############################################################################

# We only benchmark against local files as remote files are covered by unit tests
# and are generally slower than local but scale at a similar rate.
LOCAL_RESOURCES_DIR = (
Path(__file__).parent.parent / "aicsimageio" / "tests" / "resources"
)

###############################################################################


class ChunkSuite(_ImageContainerTimeSuite):
# This suite measures the effect that changing the default chunk dims
# has on the duration of various reads.
# We would expect that processing speed can be optimized based off of the
# dimensions of the file and what the user is trying to do with said file.
# i.e. If the user wants to normalize each channel and make a max projection
# through Z, then the default of 'ZYX' is preferred over just 'YX'.
# During this suite we not only benchmark the above example but also
# file reading under the various chunk configurations as a monitor
# for general read performance.

params = (
[
str(LOCAL_RESOURCES_DIR / "pre-variance-cfe.ome.tiff"),
str(LOCAL_RESOURCES_DIR / "variance-cfe.ome.tiff"),
],
# We don't go above chunking by three dims because it would be rare
# to do so... if you can read four-plus dims in a single chunk why can't you
# just read in the whole image at once.
# We also use CYX here to show that chunking with the _wrong_ dimensions can
# result in longer processing times.
[
"YX",
"ZYX",
"CYX",
],
)

def time_norm_and_project(self, img_path, chunk_dims):
"""
Benchmark how long a norm and project through Z takes
under various chunk dims configurations.
"""
# Init image container
r = self.ImageContainer(img_path, chunk_dims=chunk_dims)

# Store all delayed projections
projs = []

# Only run a random sample of two channels instead of all
selected_channels = random.sample(r.channel_names, 2)
for i, channel_name in enumerate(r.channel_names):
if channel_name in selected_channels:
# Select each channel
data = r.get_image_dask_data("ZYX", C=i)

# Get percentile norm by values
min_px_val, max_px_val = da.percentile(
data.flatten(),
[50.0, 99.8],
).compute()

# Norm
normed = (data - min_px_val) / (max_px_val - min_px_val)

# Clip any values outside of 0 and 1
clipped = da.clip(normed, 0, 1)

# Scale them between 0 and 255
scaled = clipped * 255

# Create max project
projs.append(scaled.max(axis=0))

# Compute all projections
projs = da.stack(projs)
projs.compute()

def setup(self, img_path, chunk_dims):
random.seed(42)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🌌

self.ImageContainer = AICSImage
52 changes: 37 additions & 15 deletions benchmarks/benchmark_image_containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pathlib import Path

from aicsimageio import AICSImage, readers
from aicsimageio.dimensions import DimensionNames
from aicsimageio.dimensions import DEFAULT_CHUNK_DIMS, DimensionNames

###############################################################################

Expand Down Expand Up @@ -61,25 +61,34 @@ class _ImageContainerTimeSuite:
DimensionNames.Samples,
]

def time_init(self, img_path):
def time_init(self, img_path, chunk_dims=None):
"""
Benchmark how long it takes to validate a file and finish general setup.
"""
self.ImageContainer(img_path)
if chunk_dims is None:
chunk_dims = DEFAULT_CHUNK_DIMS

def time_delayed_array_construct(self, img_path):
self.ImageContainer(img_path, chunk_dims=chunk_dims)

def time_delayed_array_construct(self, img_path, chunk_dims=None):
"""
Benchmark how long it takes to construct the delayed dask array for a file.
"""
self.ImageContainer(img_path).dask_data
if chunk_dims is None:
chunk_dims = DEFAULT_CHUNK_DIMS

self.ImageContainer(img_path, chunk_dims=chunk_dims).dask_data

def time_random_single_chunk_read(self, img_path):
def time_random_single_chunk_read(self, img_path, chunk_dims=None):
"""
Benchmark how long it takes to read a single chunk out of a file.

I.E. "Pull just the Brightfield channel z-stack.
"""
r = self.ImageContainer(img_path)
if chunk_dims is None:
chunk_dims = DEFAULT_CHUNK_DIMS

r = self.ImageContainer(img_path, chunk_dims=chunk_dims)

random_index_selections = {}
for dim, size in zip(r.dims.order, r.dims.shape):
Expand All @@ -91,13 +100,16 @@ def time_random_single_chunk_read(self, img_path):
)
r.get_image_dask_data(valid_dims_to_return, **random_index_selections).compute()

def time_random_many_chunk_read(self, img_path):
def time_random_many_chunk_read(self, img_path, chunk_dims=None):
"""
Open a file, get many chunks out of the file at once.

I.E. "Pull the DNA and Nucleus channel z-stacks, for the middle 50% timepoints".
"""
r = self.ImageContainer(img_path)
if chunk_dims is None:
chunk_dims = DEFAULT_CHUNK_DIMS

r = self.ImageContainer(img_path, chunk_dims=chunk_dims)

random_index_selections = {}
for dim, size in zip(r.dims.order, r.dims.shape):
Expand Down Expand Up @@ -133,27 +145,37 @@ class DefaultReaderSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite):

def setup(self, img_path):
random.seed(42)
self.ImageContainer = readers.DefaultReader
self.ImageContainer = readers.default_reader.DefaultReader


class TiffReaderSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite):
params = [
sorted([str(f) for f in LOCAL_RESOURCES_DIR.glob("*.tiff")]),
[
str(
LOCAL_RESOURCES_DIR
/ "image_stack_tpzc_50tp_2p_5z_3c_512k_1_MMStack_2-Pos001_000.ome.tif"
),
str(LOCAL_RESOURCES_DIR / "variance-cfe.ome.tiff"),
]
]

def setup(self, img_path):
random.seed(42)
self.ImageContainer = readers.TiffReader
self.ImageContainer = readers.tiff_reader.TiffReader


class OmeTiffReaderSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite):
params = [
sorted([str(f) for f in LOCAL_RESOURCES_DIR.glob("*.ome.tiff")]),
[
str(LOCAL_RESOURCES_DIR / "actk.ome.tiff"),
str(LOCAL_RESOURCES_DIR / "pre-variance-cfe.ome.tiff"),
str(LOCAL_RESOURCES_DIR / "variance-cfe.ome.tiff"),
]
]

def setup(self, img_path):
random.seed(42)
self.ImageContainer = readers.OmeTiffReader
self.ImageContainer = readers.ome_tiff_reader.OmeTiffReader


class LifReaderSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite):
Expand All @@ -163,7 +185,7 @@ class LifReaderSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite):

def setup(self, img_path):
random.seed(42)
self.ImageContainer = readers.LifReader
self.ImageContainer = readers.lif_reader.LifReader


class AICSImageSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite):
Expand Down
39 changes: 37 additions & 2 deletions benchmarks/benchmark_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,48 @@
# -*- coding: utf-8 -*-

"""
Benchmarks for general library operations.
Benchmarks for general library operations and comparisons against other libraries.
"""

from functools import partial

class LibSuite:
from aicsimageio import imread_dask as aicsimageio_imread
from dask_image.imread import imread as dask_image_imread

from .benchmark_image_containers import LOCAL_RESOURCES_DIR

###############################################################################

ACTK_OME_TIFF = str(LOCAL_RESOURCES_DIR / "actk.ome.tiff")

###############################################################################


class LibInitSuite:
def time_base_import(self):
"""
Benchmark how long it takes to import the library as a whole.
"""
import aicsimageio # noqa: F401


class LibCompareSuite:
"""
Compare aicsimageio against other "just-in-time" image reading libs.
"""

FUNC_LOOKUP = {
"aicsimageio-default-chunks": partial(aicsimageio_imread, chunk_dims="ZYX"),
"aicsimageio-plane-chunks": partial(aicsimageio_imread, chunk_dims="YX"),
"dask-image-imread-default": dask_image_imread,
}

params = [
"aicsimageio-default-chunks",
"aicsimageio-plane-chunks",
"dask-image-imread-default",
]

def time_lib_config(self, func_name):
func = self.FUNC_LOOKUP[func_name]
func(ACTK_OME_TIFF).compute()
6 changes: 6 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@
"wheel>=0.34.2",
]

benchmark_requirements = [
*dev_requirements,
"dask-image~=0.6.0",
]

requirements = [
"dask[array]>=2021.4.1",
"fsspec>=2021.4.0",
Expand All @@ -70,6 +75,7 @@
"setup": setup_requirements,
"test": test_requirements,
"dev": dev_requirements,
"benchmark": benchmark_requirements,
**format_libs,
"all": all_formats,
}
Expand Down