From aab849601d1ff63c1fb3d6177a28c7e44ccd4139 Mon Sep 17 00:00:00 2001
From: "J.J. Allaire" <jj.allaire@gmail.com>
Date: Sat, 23 Nov 2024 11:07:02 -0500
Subject: [PATCH 1/3] handle exponents when converting str to float

---
 src/inspect_ai/_util/text.py     | 46 +++++++++++++++++++++++
 src/inspect_ai/scorer/_common.py |  8 +++-
 tests/util/test_str_to_float.py  | 64 ++++++++++++++++++++++++++++++++
 3 files changed, 116 insertions(+), 2 deletions(-)
 create mode 100644 tests/util/test_str_to_float.py

diff --git a/src/inspect_ai/_util/text.py b/src/inspect_ai/_util/text.py
index 8a2a79ade..25640723d 100644
--- a/src/inspect_ai/_util/text.py
+++ b/src/inspect_ai/_util/text.py
@@ -62,3 +62,49 @@ def truncate_string_to_bytes(input: str, max_bytes: int) -> TruncatedOutput | No
     except Exception as ex:
         logger.warning(f"Unexpected error occurred truncating string: {ex}")
         return None
+
+
+def str_to_float(s: str) -> float:
+    """Convert a str to float, including handling exponent characters.
+
+    The Python is_numeric() function returns True for strings that include exponents
+    (e.g. 5²) however the float() function doesn't handle exponents. This function
+    will correctly handle these exponents when converting from str to float.
+
+    Args:
+       s (str): String to convert to float
+
+    Returns:
+       float: Converted value
+
+    Raises:
+       ValueError: If the string is not a valid numeric value.
+    """
+    # handle empty input
+    if not s:
+        raise ValueError("Input string is empty.")
+
+    superscript_map = str.maketrans("⁰¹²³⁴⁵⁶⁷⁸⁹", "0123456789")
+    superscript_chars = "⁰¹²³⁴⁵⁶⁷⁸⁹"
+
+    base_part = ""
+    exponent_part = ""
+    for idx, char in enumerate(s):
+        if char in superscript_chars:
+            base_part = s[:idx]
+            exponent_part = s[idx:]
+            break
+    else:
+        base_part = s
+
+    # handle empty base (e.g., '²')
+    base = float(base_part) if base_part else 1.0
+
+    # handle exponent part
+    if exponent_part:
+        exponent_str = exponent_part.translate(superscript_map)
+        exponent = int(exponent_str)
+    else:
+        exponent = 1  # Default exponent is 1 if no superscript is present
+
+    return base**exponent
diff --git a/src/inspect_ai/scorer/_common.py b/src/inspect_ai/scorer/_common.py
index 50fe816cd..668c0574d 100644
--- a/src/inspect_ai/scorer/_common.py
+++ b/src/inspect_ai/scorer/_common.py
@@ -1,6 +1,10 @@
 from typing import Callable, Literal
 
-from inspect_ai._util.text import strip_numeric_punctuation, strip_punctuation
+from inspect_ai._util.text import (
+    str_to_float,
+    strip_numeric_punctuation,
+    strip_punctuation,
+)
 from inspect_ai.solver._task_state import TaskState
 
 from ._metric import CORRECT, INCORRECT, Score
@@ -96,7 +100,7 @@ def first_number_normalized(words: list[str]) -> str:
 
 def normalize_number(number: str, precision: int = 5) -> str:
     if number.replace(".", "").isnumeric():
-        num = float(number)
+        num = str_to_float(number)
         return format(num, f".{precision}g")
     else:
         return number
diff --git a/tests/util/test_str_to_float.py b/tests/util/test_str_to_float.py
new file mode 100644
index 000000000..467d759a9
--- /dev/null
+++ b/tests/util/test_str_to_float.py
@@ -0,0 +1,64 @@
+import pytest
+
+from inspect_ai._util.text import str_to_float
+
+
+def test_str_to_float_basic():
+    assert str_to_float("1²") == 1.0
+    assert str_to_float("2³") == 8.0
+    assert str_to_float("5⁴") == 625.0
+    assert str_to_float("10⁰") == 1.0
+    assert str_to_float("3") == 3.0
+
+
+def test_str_to_float_decimal_base():
+    assert str_to_float("2.5²") == 2.5**2
+    assert str_to_float("0.1³") == 0.1**3
+
+
+def test_str_to_float_negative_base():
+    assert str_to_float("-2²") == (-2) ** 2
+    assert str_to_float("-2³") == (-2) ** 3
+
+
+def test_str_to_float_multi_digit_exponent():
+    assert str_to_float("2⁴⁵") == 2**45
+    assert str_to_float("3⁰⁰⁰") == 3**0  # Exponent is 0
+
+
+def test_str_to_float_no_exponent():
+    assert str_to_float("7") == 7.0
+    assert str_to_float("0") == 0.0
+
+
+def test_str_to_float_no_base():
+    # When the base is missing, default to 1.0
+    assert str_to_float("⁵") == 1.0**5
+    assert str_to_float("⁰") == 1.0**0
+
+
+def test_str_to_float_zero_exponent():
+    assert str_to_float("5⁰") == 1.0
+    assert str_to_float("0⁰") == 1.0  # 0^0 is considered 1 in this context
+
+
+def test_str_to_float_invalid_input():
+    with pytest.raises(ValueError):
+        str_to_float("abc")
+    with pytest.raises(ValueError):
+        str_to_float("")
+    with pytest.raises(ValueError):
+        str_to_float("2^3")
+    with pytest.raises(ValueError):
+        str_to_float("⁺²")  # Unsupported superscript characters
+
+
+def test_str_to_float_edge_cases():
+    # Exponent with unsupported characters
+    with pytest.raises(ValueError):
+        str_to_float("2⁻³")
+    # Base with unsupported characters
+    with pytest.raises(ValueError):
+        str_to_float("a²")
+    # Superscript after decimal point
+    assert str_to_float("2.5⁴") == 2.5**4

From aaa4d2929ec8a9fb91288d620a16ff784e2f2402 Mon Sep 17 00:00:00 2001
From: "J.J. Allaire" <jj.allaire@gmail.com>
Date: Sat, 23 Nov 2024 11:10:08 -0500
Subject: [PATCH 2/3] correct isnumeric

---
 src/inspect_ai/_util/text.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/inspect_ai/_util/text.py b/src/inspect_ai/_util/text.py
index 25640723d..0afaabfae 100644
--- a/src/inspect_ai/_util/text.py
+++ b/src/inspect_ai/_util/text.py
@@ -67,7 +67,7 @@ def truncate_string_to_bytes(input: str, max_bytes: int) -> TruncatedOutput | No
 def str_to_float(s: str) -> float:
     """Convert a str to float, including handling exponent characters.
 
-    The Python is_numeric() function returns True for strings that include exponents
+    The Python isnumeric() function returns True for strings that include exponents
     (e.g. 5²) however the float() function doesn't handle exponents. This function
     will correctly handle these exponents when converting from str to float.
 

From 6077f97a987b47cbb22d3e0611fcdbdb77ca3979 Mon Sep 17 00:00:00 2001
From: jjallaire <jj.allaire@gmail.com>
Date: Mon, 25 Nov 2024 08:55:56 -0500
Subject: [PATCH 3/3] Update CHANGELOG.md

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ac9ed0c5b..2af9cae56 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@
 - Consistent behavior for `max_samples` across sandbox and non-sandbox evals (both now apply `max_samples` per task, formerly evals with sandboxes applied `max_samples` globally).
 - Bash tool: add `--login` option so that e.g. .bashrc is read before executing the command.
 - Google/Vertex: Support for `logprobs` and other new 1.5 (002 series) options.
+- Handle exponents in numeric normalisation for match, include, and answer scorers.
 - hf_dataset: added `cached` argument to control whether to use a previously cached version of the dataset if available (defaults to `True`). 
 - hf_dataset: added `revision` option to load a specific branch or commit SHA (when using `revision` datasets are always revalidated on Hugging Face, i.e. `cached` is ignored).
 - Log viewer: display sample ids rather than indexes.