From a7da30fc9dfd58251b07a87cb7a318a4e1a605a5 Mon Sep 17 00:00:00 2001
From: Jeremy Zucker <djinnome@gmail.com>
Date: Thu, 28 Mar 2024 09:41:12 -0700
Subject: [PATCH] 556 pattern matching on var  finds multiple options (#557)

* Tests pass for find_target_col. Will let CI find out if any other tests fail

* Running black
---
 .../integration_utils/result_processing.py    |  4 ++-
 .../test_result_processing.py                 | 33 +++++++++++++++----
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/pyciemss/integration_utils/result_processing.py b/pyciemss/integration_utils/result_processing.py
index fffbfd6b5..b787a853d 100644
--- a/pyciemss/integration_utils/result_processing.py
+++ b/pyciemss/integration_utils/result_processing.py
@@ -1,3 +1,4 @@
+import re
 from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
 
 import numpy as np
@@ -144,7 +145,8 @@ def find_target_col(var: str, options: List[str]):
     # TODO: This "underscore-trailing-name matching" seems very fragile....
     #       It is done this way since you can intervene on params & states
     #       and that will match either.
-    options = [c for c in options if f"{var}_" in c]
+    pattern = re.compile(f"(?:^|_){var}_(state|param)")
+    options = [c for c in options if pattern.search(c)]
     if len(options) == 0:
         raise KeyError(f"No target column match found for '{var}'.")
     if len(options) > 1:
diff --git a/tests/integration_utils/test_result_processing.py b/tests/integration_utils/test_result_processing.py
index 09ce34f13..ef51ccdfa 100644
--- a/tests/integration_utils/test_result_processing.py
+++ b/tests/integration_utils/test_result_processing.py
@@ -59,15 +59,36 @@ def test_get_times_for(intervention):
 
 @pytest.mark.parametrize("name", ["underscored", "with_underscore", "I", "i"])
 def test_find_target_col(name):
-    columns = [
-        "before_underscored",
-        "underscored_after",
-        "before_with_underscore_after",
-        "stuff_I_stuff",
+    good_columns = [
+        "before_underscored_param",
+        "underscored_after_state",
+        "sample_with_underscore_state",
         "i_state",
+        "sampli_id_state",
+        "persistent_I_param",
     ]
-    result = result_processing.find_target_col(name, columns)
+    result = result_processing.find_target_col(name, good_columns)
     assert name in result
+    multiple_match_columns = [
+        "i_state",
+        "persistent_i_param",
+        "before_underscored_param",
+        "underscored_param",
+        "with_underscore_param",
+        "not_with_underscore_state",
+        "With_I_param",
+        "I_state",
+    ]
+    with pytest.raises(ValueError):
+        result_processing.find_target_col(name, multiple_match_columns)
+    no_match_columns = [
+        "stuff_I_stuff_state",
+        "sampli_state",
+        "before_with_underscore_after_param",
+        "underscored_after_state",
+    ]
+    with pytest.raises(KeyError):
+        result_processing.find_target_col(name, no_match_columns)
 
 
 @pytest.mark.parametrize("logging_step_size", [1, 5, 10, 12, 23])