Skip to content

Commit

Permalink
admin/4.0-release-prep-and-benchmark-upgrades (#244)
Browse files Browse the repository at this point in the history
* Ignore presentations dir from language

* Try double star for ignore lang

* Add benchmark for chunk size variability

* Fix default and lif bench, add chunk compare

* Benchmark aicsimageio against other libs

* Use variance cfe instead of pipeline 4

* Configure better lib compare bench

* Remove extra deps from benchmark deps

* Cleanup lib compare

* Reduce the amount of files checked during benchs

* Fix benchmark params on TIFF like

* Fix comment in random sample

* Fix typo
  • Loading branch information
Jackson Maxfield Brown authored May 30, 2021
1 parent 738ce92 commit 8c7d4f5
Show file tree
Hide file tree
Showing 6 changed files with 175 additions and 18 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
presentations/** linguist-documentation
2 changes: 1 addition & 1 deletion asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"dvcs": "git",
"environment_type": "virtualenv",
"install_command": [
"in-dir={env_dir} python -mpip install {build_dir}[dev]"
"in-dir={env_dir} python -mpip install {build_dir}[benchmark]"
],
"show_commit_url": "http://github.com/AllenCellModeling/aicsimageio/commit/",
"pythons": ["3.9"],
Expand Down
93 changes: 93 additions & 0 deletions benchmarks/benchmark_chunk_sizes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import dask.array as da
import random
from pathlib import Path

from aicsimageio import AICSImage

from .benchmark_image_containers import _ImageContainerTimeSuite

###############################################################################

# We only benchmark against local files as remote files are covered by unit tests
# and are generally slower than local but scale at a similar rate.
LOCAL_RESOURCES_DIR = (
Path(__file__).parent.parent / "aicsimageio" / "tests" / "resources"
)

###############################################################################


class ChunkSuite(_ImageContainerTimeSuite):
# This suite measures the effect that changing the default chunk dims
# has on the duration of various reads.
# We would expect that processing speed can be optimized based off of the
# dimensions of the file and what the user is trying to do with said file.
# i.e. If the user wants to normalize each channel and make a max projection
# through Z, then the default of 'ZYX' is preferred over just 'YX'.
# During this suite we not only benchmark the above example but also
# file reading under the various chunk configurations as a monitor
# for general read performance.

params = (
[
str(LOCAL_RESOURCES_DIR / "pre-variance-cfe.ome.tiff"),
str(LOCAL_RESOURCES_DIR / "variance-cfe.ome.tiff"),
],
# We don't go above chunking by three dims because it would be rare
# to do so... if you can read four-plus dims in a single chunk why can't you
# just read in the whole image at once.
# We also use CYX here to show that chunking with the _wrong_ dimensions can
# result in longer processing times.
[
"YX",
"ZYX",
"CYX",
],
)

def time_norm_and_project(self, img_path, chunk_dims):
"""
Benchmark how long a norm and project through Z takes
under various chunk dims configurations.
"""
# Init image container
r = self.ImageContainer(img_path, chunk_dims=chunk_dims)

# Store all delayed projections
projs = []

# Only run a random sample of two channels instead of all
selected_channels = random.sample(r.channel_names, 2)
for i, channel_name in enumerate(r.channel_names):
if channel_name in selected_channels:
# Select each channel
data = r.get_image_dask_data("ZYX", C=i)

# Get percentile norm by values
min_px_val, max_px_val = da.percentile(
data.flatten(),
[50.0, 99.8],
).compute()

# Norm
normed = (data - min_px_val) / (max_px_val - min_px_val)

# Clip any values outside of 0 and 1
clipped = da.clip(normed, 0, 1)

# Scale them between 0 and 255
scaled = clipped * 255

# Create max project
projs.append(scaled.max(axis=0))

# Compute all projections
projs = da.stack(projs)
projs.compute()

def setup(self, img_path, chunk_dims):
random.seed(42)
self.ImageContainer = AICSImage
52 changes: 37 additions & 15 deletions benchmarks/benchmark_image_containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pathlib import Path

from aicsimageio import AICSImage, readers
from aicsimageio.dimensions import DimensionNames
from aicsimageio.dimensions import DEFAULT_CHUNK_DIMS, DimensionNames

###############################################################################

Expand Down Expand Up @@ -61,25 +61,34 @@ class _ImageContainerTimeSuite:
DimensionNames.Samples,
]

def time_init(self, img_path):
def time_init(self, img_path, chunk_dims=None):
"""
Benchmark how long it takes to validate a file and finish general setup.
"""
self.ImageContainer(img_path)
if chunk_dims is None:
chunk_dims = DEFAULT_CHUNK_DIMS

def time_delayed_array_construct(self, img_path):
self.ImageContainer(img_path, chunk_dims=chunk_dims)

def time_delayed_array_construct(self, img_path, chunk_dims=None):
"""
Benchmark how long it takes to construct the delayed dask array for a file.
"""
self.ImageContainer(img_path).dask_data
if chunk_dims is None:
chunk_dims = DEFAULT_CHUNK_DIMS

self.ImageContainer(img_path, chunk_dims=chunk_dims).dask_data

def time_random_single_chunk_read(self, img_path):
def time_random_single_chunk_read(self, img_path, chunk_dims=None):
"""
Benchmark how long it takes to read a single chunk out of a file.
I.E. "Pull just the Brightfield channel z-stack.
"""
r = self.ImageContainer(img_path)
if chunk_dims is None:
chunk_dims = DEFAULT_CHUNK_DIMS

r = self.ImageContainer(img_path, chunk_dims=chunk_dims)

random_index_selections = {}
for dim, size in zip(r.dims.order, r.dims.shape):
Expand All @@ -91,13 +100,16 @@ def time_random_single_chunk_read(self, img_path):
)
r.get_image_dask_data(valid_dims_to_return, **random_index_selections).compute()

def time_random_many_chunk_read(self, img_path):
def time_random_many_chunk_read(self, img_path, chunk_dims=None):
"""
Open a file, get many chunks out of the file at once.
I.E. "Pull the DNA and Nucleus channel z-stacks, for the middle 50% timepoints".
"""
r = self.ImageContainer(img_path)
if chunk_dims is None:
chunk_dims = DEFAULT_CHUNK_DIMS

r = self.ImageContainer(img_path, chunk_dims=chunk_dims)

random_index_selections = {}
for dim, size in zip(r.dims.order, r.dims.shape):
Expand Down Expand Up @@ -133,27 +145,37 @@ class DefaultReaderSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite):

def setup(self, img_path):
random.seed(42)
self.ImageContainer = readers.DefaultReader
self.ImageContainer = readers.default_reader.DefaultReader


class TiffReaderSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite):
params = [
sorted([str(f) for f in LOCAL_RESOURCES_DIR.glob("*.tiff")]),
[
str(
LOCAL_RESOURCES_DIR
/ "image_stack_tpzc_50tp_2p_5z_3c_512k_1_MMStack_2-Pos001_000.ome.tif"
),
str(LOCAL_RESOURCES_DIR / "variance-cfe.ome.tiff"),
]
]

def setup(self, img_path):
random.seed(42)
self.ImageContainer = readers.TiffReader
self.ImageContainer = readers.tiff_reader.TiffReader


class OmeTiffReaderSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite):
params = [
sorted([str(f) for f in LOCAL_RESOURCES_DIR.glob("*.ome.tiff")]),
[
str(LOCAL_RESOURCES_DIR / "actk.ome.tiff"),
str(LOCAL_RESOURCES_DIR / "pre-variance-cfe.ome.tiff"),
str(LOCAL_RESOURCES_DIR / "variance-cfe.ome.tiff"),
]
]

def setup(self, img_path):
random.seed(42)
self.ImageContainer = readers.OmeTiffReader
self.ImageContainer = readers.ome_tiff_reader.OmeTiffReader


class LifReaderSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite):
Expand All @@ -163,7 +185,7 @@ class LifReaderSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite):

def setup(self, img_path):
random.seed(42)
self.ImageContainer = readers.LifReader
self.ImageContainer = readers.lif_reader.LifReader


class AICSImageSuite(_ImageContainerTimeSuite, _ImageContainerMemorySuite):
Expand Down
39 changes: 37 additions & 2 deletions benchmarks/benchmark_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,48 @@
# -*- coding: utf-8 -*-

"""
Benchmarks for general library operations.
Benchmarks for general library operations and comparisons against other libraries.
"""

from functools import partial

class LibSuite:
from aicsimageio import imread_dask as aicsimageio_imread
from dask_image.imread import imread as dask_image_imread

from .benchmark_image_containers import LOCAL_RESOURCES_DIR

###############################################################################

ACTK_OME_TIFF = str(LOCAL_RESOURCES_DIR / "actk.ome.tiff")

###############################################################################


class LibInitSuite:
def time_base_import(self):
"""
Benchmark how long it takes to import the library as a whole.
"""
import aicsimageio # noqa: F401


class LibCompareSuite:
"""
Compare aicsimageio against other "just-in-time" image reading libs.
"""

FUNC_LOOKUP = {
"aicsimageio-default-chunks": partial(aicsimageio_imread, chunk_dims="ZYX"),
"aicsimageio-plane-chunks": partial(aicsimageio_imread, chunk_dims="YX"),
"dask-image-imread-default": dask_image_imread,
}

params = [
"aicsimageio-default-chunks",
"aicsimageio-plane-chunks",
"dask-image-imread-default",
]

def time_lib_config(self, func_name):
func = self.FUNC_LOOKUP[func_name]
func(ACTK_OME_TIFF).compute()
6 changes: 6 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@
"wheel>=0.34.2",
]

benchmark_requirements = [
*dev_requirements,
"dask-image~=0.6.0",
]

requirements = [
"dask[array]>=2021.4.1",
"fsspec>=2021.4.0",
Expand All @@ -70,6 +75,7 @@
"setup": setup_requirements,
"test": test_requirements,
"dev": dev_requirements,
"benchmark": benchmark_requirements,
**format_libs,
"all": all_formats,
}
Expand Down

0 comments on commit 8c7d4f5

Please sign in to comment.