diff --git a/README.md b/README.md index d0501ba..8711cfb 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,12 @@ results = pd.read_json(basic_bench.outfile, lines=True) ``` The above example captures the fields `start_time`, `finish_time`, -`run_durations` and `function_name`. Microbench can capture many +`run_durations` (of each function call, in seconds by default), `function_name`, +`timestamp_tz` (timezone name, see Timezones section of this README), and +`duration_counter` (the name of the function used to calculate +durations, see Duration timings section of this README). + +Microbench can capture many other types of metadata from the environment, resource usage, and hardware, which are covered below. @@ -90,20 +95,22 @@ names) to extend functionality. Note that keyword arguments can be supplied to the constructor (in this case `some_info=123`) to specify additional information to capture. We also specify `iterations=3`, which means that the called function with be executed 3 times (the returned result will always -be from the final run) with timings captured for each run. +be from the final run) with timings captured for each run. We specify a custom +duration counter, `time.monotonic` instead of the default `time.perf_counter` +(see Duration timings section later in this README for explanation). This example also specifies the `outfile` option, which appends metadata to a file on disk. ```python from microbench import * -import numpy, pandas +import numpy, pandas, time class MyBench(MicroBench, MBFunctionCall, MBPythonVersion, MBHostInfo): outfile = '/home/user/my-benchmarks' capture_versions = (numpy, pandas) # Or use MBGlobalPackages/MBInstalledPackages env_vars = ('SLURM_ARRAY_TASK_ID', ) -benchmark = MyBench(some_info=123, iterations=3) +benchmark = MyBench(some_info=123, iterations=3, duration_counter=time.monotonic) ``` The `env_vars` option from the example above specifies a list of environment @@ -162,9 +169,16 @@ separate line in the file. The output from the minimal example above for a single run will look similar to the following: ```json -{"start_time": "2018-08-06T10:28:24.806493+00:00", "finish_time": "2018-08-06T10:28:24.867456+00:00", "run_durations": [0.60857599999999999], "function_name": "my_function"} +{"start_time": "2018-08-06T10:28:24.806493+00:00", "finish_time": "2018-08-06T10:28:24.867456+00:00", "run_durations": [0.60857599999999999], "function_name": "my_function", "timestamp_tz": "UTC", "duration_counter": "perf_counter"} ``` +Start and finish times are given as timestamps in ISO-8601 format, in the UTC +timezone by default (see Timezones section of this README). + +Run_durations are given in seconds, captured using the `time.perf_counter` +function by default, but this can be overridden (see Duration timings section +of this README). + The simplest way to examine results in detail is to load them into a [pandas](https://pandas.pydata.org/) dataframe: @@ -451,11 +465,21 @@ although the latter is a one-off per invocation and typically less than one seco Telemetry capture intervals should be kept relatively infrequent (e.g., every minute or two, rather than every second) to avoid significant runtime impacts. +### Duration timings + +By default, `run_durations` are given in seconds using the `time.perf_counter` function, +which should be sufficient for most use cases. You can use any function that +returns a float or integer number for time. Some examples would be `time.perf_counter_ns` +if you want time in nanoseconds, or `time.monotonic` for a monotonic clock impervious +to clock adjustments (ideal for very long-running code). Use the `duration_counter=...` +argument when creating a benchmark suite object, as seen in the Extended examples +section of this README, above. + ### Timezones Microbench captures `start_time` and `finish_time` in the UTC timezone by default. -This can be overriden by passing a `timezone=tz` argument when creating a benchmark -class, where `tz` is a timezone object (e.g. created using the `pytz` library). +This can be overriden by passing a `tz=...` argument when creating a benchmark +class, where the value is a timezone object (e.g. created using the `pytz` library). ## Feedback diff --git a/microbench/__init__.py b/microbench/__init__.py index 7a09edc..ff9fe7e 100644 --- a/microbench/__init__.py +++ b/microbench/__init__.py @@ -14,6 +14,7 @@ import subprocess import io import threading +import time import signal import warnings try: @@ -37,7 +38,6 @@ import pandas except ImportError: pandas = None -from .diff import envdiff from ._version import get_versions @@ -75,7 +75,26 @@ class JSONEncodeWarning(Warning): class MicroBench(object): def __init__(self, outfile=None, json_encoder=JSONEncoder, tz=timezone.utc, iterations=1, + duration_counter=time.perf_counter, *args, **kwargs): + """Benchmark and metadata capture suite. + + Args: + outfile (file or buf, optional): Output file or buffer. + Defaults to None, which captures data using a StringIO + buffer. + json_encoder (json.JSONEncoder, optional): JSONEncoder for + benchmark results. Defaults to JSONEncoder. + tz (timezone, optional): Timezone for start_time and finish_time. + Defaults to timezone.utc. + iterations (int, optional): Number of iterations to run function. + Defaults to 1. + duration_counter (_type_, optional): Timer function to use for + run_durations. Defaults to time.perf_counter. + + Raises: + ValueError: If unknown position arguments are used. + """ if args: raise ValueError('Only keyword arguments are allowed') self._bm_static = kwargs @@ -84,12 +103,15 @@ def __init__(self, outfile=None, json_encoder=JSONEncoder, elif not hasattr(self, 'outfile'): self.outfile = io.StringIO() self._json_encoder = json_encoder + self._duration_counter = duration_counter self.tz = tz self.iterations = iterations def pre_start_triggers(self, bm_data): # Store timezone bm_data['timestamp_tz'] = str(self.tz) + # Store duration counter function name + bm_data['duration_counter'] = self._duration_counter.__name__ # Capture environment variables if hasattr(self, 'env_vars'): @@ -144,10 +166,10 @@ def post_finish_triggers(self, bm_data): method(bm_data) def pre_run_triggers(self, bm_data): - bm_data['_run_start'] = datetime.now(self.tz) + bm_data['_run_start'] = self._duration_counter() def post_run_triggers(self, bm_data): - bm_data['run_durations'].append(datetime.now(self.tz) - bm_data['_run_start']) + bm_data['run_durations'].append(self._duration_counter() - bm_data['_run_start']) def capture_function_name(self, bm_data): bm_data['function_name'] = bm_data['_func'].__name__ diff --git a/microbench/tests/test_base.py b/microbench/tests/test_base.py index 26dbfdd..4143747 100644 --- a/microbench/tests/test_base.py +++ b/microbench/tests/test_base.py @@ -34,6 +34,9 @@ def my_function(): runtimes = results['finish_time'] - results['start_time'] assert (runtimes > datetime.timedelta(0)).all() + assert results['timestamp_tz'][0] == 'UTC' + assert results['duration_counter'][0] == 'perf_counter' + def test_multi_iterations(): class MyBench(MicroBench): @@ -58,7 +61,6 @@ def my_function(): assert len(results['run_durations'][0]) == iterations assert all(dur >= 0 for dur in results['run_durations'][0]) - assert sum(results['run_durations'][0]) <= runtimes[0].total_seconds() def test_capture_global_packages():