Result loading and evaluation V1 (#135)

* result loading and evaluation v1 * [pre-commit.ci lite] apply automatic fixes * more result loading and evaluation * bound * fix * cd dias * time fix and resamples * remade test results * sorting an verify parameter * sorting an verify parameter * close enough * fixes * figure start * no boxplot * no boxplot * fixes * fixes * allow for multiple load paths * email --------- Co-authored-by: pre-commit-ci-lite[bot] <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
time-series-machine-learning · Nov 21, 2023 · b34e333 · b34e333
1 parent aea95ef
commit b34e333
Show file tree

Hide file tree

Showing 242 changed files with 43,471 additions and 698 deletions.
diff --git a/.github/workflows/pr_opened.yml b/.github/workflows/pr_opened.yml
@@ -33,11 +33,11 @@ jobs:
 
       - name: Label pull request
         id: label-pr
-        run: _python build_tools/pr_labeler.py ${{ steps.app-token.outputs.token }}
+        run: python _build_tools/pr_labeler.py ${{ steps.app-token.outputs.token }}
         env:
           CONTEXT_GITHUB: ${{ toJson(github) }}
 
       - name: Write pull request comment
-        run: _python build_tools/pr_open_commenter.py ${{ steps.app-token.outputs.token }} ${{ steps.label-pr.outputs.title-labels }} ${{ steps.label-pr.outputs.title-labels-new }} ${{ steps.label-pr.outputs.content-labels }} ${{ steps.label-pr.outputs.content-labels-status }}
+        run: python _build_tools/pr_open_commenter.py ${{ steps.app-token.outputs.token }} ${{ steps.label-pr.outputs.title-labels }} ${{ steps.label-pr.outputs.title-labels-new }} ${{ steps.label-pr.outputs.content-labels }} ${{ steps.label-pr.outputs.content-labels-status }}
         env:
           CONTEXT_GITHUB: ${{ toJson(github) }}
diff --git a/conftest.py b/conftest.py
@@ -0,0 +1,21 @@
+"""Main configuration file for pytest."""
+
+__author__ = ["MatthewMiddlehurst"]
+
+from tsml_eval.experiments import experiments
+
+
+def pytest_addoption(parser):
+    """Pytest command line parser options adder."""
+    parser.addoption(
+        "--meminterval",
+        type=float,
+        default=5.0,
+        help="Set the time interval in seconds for recording memory usage "
+        "(default: %(default)s).",
+    )
+
+
+def pytest_configure(config):
+    """Pytest configuration preamble."""
+    experiments.MEMRECORD_INTERVAL = config.getoption("--meminterval")
diff --git a/examples/images/cd_diagram.png b/examples/images/cd_diagram.png
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,12 +7,12 @@ name = "tsml-eval"
 version = "0.1.1"
 description = "A package for benchmarking time series machine learning tools."
 authors = [
-    {name = "Matthew Middlehurst", email = "m.middlehurst@uea.ac.uk"},
-    {name = "Tony Bagnall", email = "ajb@uea.ac.uk"},
+    {name = "Matthew Middlehurst", email = "m.b.middlehurst@soton.ac.uk"},
+    {name = "Tony Bagnall", email = "a.j.bagnall@soton.ac.uk"},
 ]
 maintainers = [
-    {name = "Matthew Middlehurst", email = "m.middlehurst@uea.ac.uk"},
-    {name = "Tony Bagnall", email = "ajb@uea.ac.uk"},
+    {name = "Matthew Middlehurst", email = "m.b.middlehurst@soton.ac.uk"},
+    {name = "Tony Bagnall", email = "a.j.bagnall@soton.ac.uk"},
 ]
 readme = "README.md"
 keywords = [
@@ -43,7 +43,8 @@ requires-python = ">=3.8,<3.12"
 dependencies = [
     "aeon>=0.5.0,<0.6.0",
     "scikit-learn>=1.0.2,<=1.3.2",
-    "tsml>=0.2.0,<0.3.0",
+    "tsml>=0.2.1,<0.3.0",
+    "matplotlib",
     "gpustat",
     "psutil",
 ]
@@ -52,7 +53,7 @@ dependencies = [
 all_extras = [
     "aeon[all_extras,dl]",
     "tsml[extras]",
-    "xgboost<=2.0.2",
+    "xgboost",
     "torch>=1.13.1",
 ]
 unstable_extras = [
@@ -150,4 +151,5 @@ addopts = '''
     --showlocals
     --doctest-modules
     --numprocesses auto
+    --meminterval 0.1
 '''
diff --git a/tsml_eval/evaluation/benchmark.py → tsml_eval/evaluation/efficiency_benchmark.py b/tsml_eval/evaluation/benchmark.py → tsml_eval/evaluation/efficiency_benchmark.py
@@ -2,10 +2,8 @@
 
 from dataclasses import dataclass
 from math import floor
-from time import perf_counter
 
 import numpy as np
-import psutil
 from sklearn.base import clone
 from sklearn.model_selection import train_test_split
 from sklearn.utils.validation import check_random_state
@@ -18,16 +16,15 @@
     "compare_estimators",
 ]
 
+from tsml_eval.utils.memory_recorder import record_max_memory
+
 
 @dataclass
 class BenchmarkResult:
-    """Aggregates runtimes (seconds) and memory usage (bytes)."""
+    """Aggregates runtimes (milliseconds) and memory usage (bytes)."""
 
-    total_runtime: float
     fit_runtime: float
     predict_runtime: float
-
-    total_memory_usage: int
     fit_memory_usage: int
     predict_memory_usage: int
 
@@ -88,20 +85,18 @@ def benchmark_estimator(
             random_state=rng,
         )
 
-    runtime_fit, memory_fit, _ = _benchmark_function_wrapper(
-        estimator.fit, args=[X_train, y_train], kwargs={}
+    memory_fit, runtime_fit = record_max_memory(
+        estimator.fit, args=(X_train, y_train), return_func_time=True
     )
-    runtime_predict, memory_predict, _ = _benchmark_function_wrapper(
-        estimator.predict, args=[X_test], kwargs={}
+    memory_predict, runtime_predict = record_max_memory(
+        estimator.predict, args=(X_test,), return_func_time=True
     )
 
     return BenchmarkResult(
         fit_runtime=runtime_fit,
         predict_runtime=runtime_predict,
-        total_runtime=runtime_fit + runtime_predict,
         fit_memory_usage=memory_fit,
         predict_memory_usage=memory_predict,
-        total_memory_usage=memory_fit + memory_predict,
     )
 
 
@@ -237,15 +232,3 @@ def compare_estimators(
             f"Invalid varying method: {varying}. Allowed values"
             + " are {'total', 'train', 'test'}."
         )
-
-
-def _benchmark_function_wrapper(func, args, kwargs):
-    process = psutil.Process()
-
-    mem_before = process.memory_info().vms
-    clock_start = perf_counter()
-    func_output = func(*args, **kwargs)
-    clock_end = perf_counter()
-    mem_after = process.memory_info().vms
-
-    return clock_end - clock_start, mem_after - mem_before, func_output
diff --git a/tsml_eval/evaluation/metrics.py b/tsml_eval/evaluation/metrics.py
@@ -2,33 +2,16 @@
 
 __author__ = ["MatthewMiddlehurst"]
 
-__all__ = ["clustering_accuracy", "davies_bouldin_score_from_file"]
+__all__ = ["clustering_accuracy_score"]
 
-import sys
 
-import numpy as np
 from scipy.optimize import linear_sum_assignment
-from sklearn.metrics import confusion_matrix, davies_bouldin_score
+from sklearn.metrics import confusion_matrix
 
 
-def clustering_accuracy(y_true, y_pred):
+def clustering_accuracy_score(y_true, y_pred):
     """Calculate clustering accuracy."""
     matrix = confusion_matrix(y_true, y_pred)
     row, col = linear_sum_assignment(matrix.max() - matrix)
     s = sum([matrix[row[i], col[i]] for i in range(len(row))])
     return s / y_pred.size
-
-
-def davies_bouldin_score_from_file(X, file_path):
-    """Calculate Davies-Bouldin score from a results file."""
-    y = np.zeros(len(X))
-    with open(file_path, "r") as f:
-        lines = f.readlines()
-        for i, line in enumerate(lines[3:]):
-            y[i] = float(line.split(",")[1])
-
-    clusters = len(np.unique(y))
-    if clusters <= 1:
-        return sys.float_info.max
-    else:
-        return davies_bouldin_score(X, y)