From a7da30fc9dfd58251b07a87cb7a318a4e1a605a5 Mon Sep 17 00:00:00 2001 From: Jeremy Zucker Date: Thu, 28 Mar 2024 09:41:12 -0700 Subject: [PATCH] 556 pattern matching on var finds multiple options (#557) * Tests pass for find_target_col. Will let CI find out if any other tests fail * Running black --- .../integration_utils/result_processing.py | 4 ++- .../test_result_processing.py | 33 +++++++++++++++---- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/pyciemss/integration_utils/result_processing.py b/pyciemss/integration_utils/result_processing.py index fffbfd6b5..b787a853d 100644 --- a/pyciemss/integration_utils/result_processing.py +++ b/pyciemss/integration_utils/result_processing.py @@ -1,3 +1,4 @@ +import re from typing import Any, Dict, Iterable, List, Mapping, Optional, Union import numpy as np @@ -144,7 +145,8 @@ def find_target_col(var: str, options: List[str]): # TODO: This "underscore-trailing-name matching" seems very fragile.... # It is done this way since you can intervene on params & states # and that will match either. - options = [c for c in options if f"{var}_" in c] + pattern = re.compile(f"(?:^|_){var}_(state|param)") + options = [c for c in options if pattern.search(c)] if len(options) == 0: raise KeyError(f"No target column match found for '{var}'.") if len(options) > 1: diff --git a/tests/integration_utils/test_result_processing.py b/tests/integration_utils/test_result_processing.py index 09ce34f13..ef51ccdfa 100644 --- a/tests/integration_utils/test_result_processing.py +++ b/tests/integration_utils/test_result_processing.py @@ -59,15 +59,36 @@ def test_get_times_for(intervention): @pytest.mark.parametrize("name", ["underscored", "with_underscore", "I", "i"]) def test_find_target_col(name): - columns = [ - "before_underscored", - "underscored_after", - "before_with_underscore_after", - "stuff_I_stuff", + good_columns = [ + "before_underscored_param", + "underscored_after_state", + "sample_with_underscore_state", "i_state", + "sampli_id_state", + "persistent_I_param", ] - result = result_processing.find_target_col(name, columns) + result = result_processing.find_target_col(name, good_columns) assert name in result + multiple_match_columns = [ + "i_state", + "persistent_i_param", + "before_underscored_param", + "underscored_param", + "with_underscore_param", + "not_with_underscore_state", + "With_I_param", + "I_state", + ] + with pytest.raises(ValueError): + result_processing.find_target_col(name, multiple_match_columns) + no_match_columns = [ + "stuff_I_stuff_state", + "sampli_state", + "before_with_underscore_after_param", + "underscored_after_state", + ] + with pytest.raises(KeyError): + result_processing.find_target_col(name, no_match_columns) @pytest.mark.parametrize("logging_step_size", [1, 5, 10, 12, 23])