DDMAL · dchiller · Jun 12, 2024 · May 20, 2024 · May 21, 2024 · May 27, 2024
diff --git a/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py b/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py
@@ -142,7 +142,7 @@ class NgramDocument(TypedDict):
         type: The type of the document (corresponds to solr schema's type field)
     """
 
-    location: str
+    location_json: str
     pitch_names: str
     contour: str
     semitone_intervals: str

diff --git a/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py b/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py
@@ -5,7 +5,7 @@
 """
 
 import uuid
-from typing import List, Tuple, Optional, Never, Union
+from typing import List, Tuple, Optional
 from .mei_parser import MEIParser
 from .mei_parsing_types import (
     Neume,
@@ -88,7 +88,7 @@ def _create_document_from_neume_components(
         ]
         location: str = stringify_bounding_boxes(combine_bounding_boxes(zones_with_sys))
         return {
-            "location": location,
+            "location_json": location,
             "pitch_names": pitch_names,
             "contour": contour,
             "semitone_intervals": intervals,

diff --git a/app/public/cantusdata/helpers/search_utils.py b/app/public/cantusdata/helpers/search_utils.py
@@ -1,82 +1,67 @@
-import re
+# Contains the words that are allowed
+# in a neume_name query
+VALID_NEUME_NAME_WORDS = {
+    "punctum",
+    "pes",
+    "clivis",
+    "scandicus",
+    "torculus",
+    "porrectus",
+    "distropha",
+    "tristopha",
+    "pressus",
+    "climacus",
+    "resupinus",
+    "flexus",
+    "subpunctis",
+    "compound",
+}
 
 
-def get_transpositions(sequence):
-    """Given a series of pitch names (no flats or sharps - just abcdefg),
-    return a list of the 7 possible transpositions of the melody. This is used
-    when generating an elastic search query to look for all transpositions of a
-    user specified pitch sequence.
+def validate_query(q: list[str], q_type: str) -> bool:
+    """
+    Depending on the type of the query, returns True if the query is valid
+    """
+    match q_type:
+        case "neume_names":
+            return all(neume in VALID_NEUME_NAME_WORDS for neume in q)
+        case "pitch_names" | "pitch_names_transposed":
+            return all(pitch in "abcdefg" for pitch in q)
+        case "contour":
+            return all(contour in "udr" for contour in q)
+        case _:
+            return False
+
 
-    The URL for the query will include 'q=pnames:' followed by the returned
-    transpositions seperated by commas.
+def transpose_up_unicode(x: int) -> int:
+    """
+    Transpose up the unicode decimal for a pitch
+    name up 1 step. The unicode decimal for "g" is 103,
+    so to transpose up from "g" to "a", we need to subtract 6.
+    We can transpose up all other pitch names by adding 1.
+    """
+    # x is the unicode decimal for "a-f"
+    if x < 103:
+        return x + 1
+    # x is the unicode decimal for "g"
+    return x - 6
 
-    e.g. getTranspositions('cece') returns ['cece', 'dfdf', 'egeg', 'fafa',
+
+def get_transpositions(sequence: list[str]) -> list[list[str]]:
+    """
+    Given a series of pitch names (no flats or sharps - just abcdefg),
+    return a list of the 7 possible transpositions of the melody.
+
+    e.g. get_transpositions('cece') returns ['cece', 'dfdf', 'egeg', 'fafa',
     'gbgb', 'acac', 'bdbd']
     """
-    sequence = str(sequence)
+    # Get the unicode decimal for each character in the sequence
     asciinum = list(map(ord, sequence))
 
-    def transposeUp(x):
-        if x < 103:
-            return x + 1
-        else:
-            return x - 6
-
     transpositions = [sequence]
 
-    for i in range(1, 7):
-        asciinum = list(map(transposeUp, asciinum))
-        transposed = "".join(chr(i) for i in asciinum)  # convert to string
-        transpositions = transpositions + [transposed]
+    for _ in range(1, 7):
+        asciinum = list(map(transpose_up_unicode, asciinum))
+        transposed_chars = list(map(chr, asciinum))
+        transpositions.append(transposed_chars)
     return transpositions
-
-
-def get_neumes_length(neumes):
-    lengths = {
-        "punctum": 1,
-        "virga": 1,
-        "bivirga": 2,
-        "podatus": 2,
-        "pes": 2,
-        "clivis": 2,
-        "epiphonus": 2,
-        "cephalicus": 2,
-        "scandicus": 3,
-        "salicus": 3,
-        "ancus": 3,
-        "torculus": 3,
-        "porrectus": 3,
-        # Treat flexus as a different one so we can have porrectus flexus, etc
-        "resupinus": 1,
-        "flexus": 1,
-        "cavum": 1,
-    }
-
-    neumes = neumes.lower().split(" ")
-    length = 0
-    for neume in neumes:
-        if neume in lengths:
-            length += lengths[neume]
-        else:
-            # In case their is a type of neume that we don't recognize!
-            length += 1
-
-    return length
-
-
-def valid_pitch_sequence(sequence):
-    # Should already be lowercase
-    pattern = re.compile("[^a-g]")
-    if pattern.search(sequence) is not None:
-        return False
-    else:
-        return True
-
-
-def valid_contour_sequence(sequence):
-    # Already lowercase
-    pattern = re.compile("[^rud]")
-    if pattern.search(sequence) is not None:
-        return False
-    else:
-        return True
diff --git a/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py b/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py
@@ -145,7 +145,7 @@ def test_mei_tokenizer(self) -> None:
         # <zone xml:id="zone-0000002089367816" ulx="5104" uly="7774" lrx="5175" lry="7824"/>
         with self.subTest("First 1-gram"):
             expected_1gram: NgramDocument = {
-                "location": json.dumps(
+                "location_json": json.dumps(
                     [{"ulx": 2608, "uly": 2399, "width": 70, "height": 49}]
                 ),
                 "pitch_names": "d",
@@ -156,7 +156,7 @@ def test_mei_tokenizer(self) -> None:
             self.assertEqual(expected_1gram, ngram_docs_1_2[0])
         with self.subTest("Ngram of first 3 neumes"):
             expected_3gram: NgramDocument = {
-                "location": json.dumps(
+                "location_json": json.dumps(
                     [{"ulx": 2608, "uly": 2292, "width": 477, "height": 201}]
                 ),
                 "neume_names": "punctum_clivis_punctum",
@@ -170,7 +170,7 @@ def test_mei_tokenizer(self) -> None:
             # This 3-gram is constructed from the second three
             # pitches of the sample above.
             pitch_3gram: NgramDocument = {
-                "location": json.dumps(
+                "location_json": json.dumps(
                     [{"ulx": 2725, "uly": 2292, "width": 360, "height": 201}]
                 ),
                 "pitch_names": "d_c_f",
@@ -190,7 +190,7 @@ def test_mei_tokenizer(self) -> None:
             # This 4-gram is constructed from the last three
             # pitches of the test document.
             pitch_3gram_1: NgramDocument = {
-                "location": json.dumps(
+                "location_json": json.dumps(
                     [{"ulx": 4811, "uly": 7724, "width": 364, "height": 150}]
                 ),
                 "pitch_names": "c_e_d",
@@ -209,7 +209,7 @@ def test_mei_tokenizer(self) -> None:
             # This 4-gram is constructed from the last four
             # pitches of the test document.
             pitch_4gram: NgramDocument = {
-                "location": json.dumps(
+                "location_json": json.dumps(
                     [{"ulx": 4750, "uly": 7724, "width": 425, "height": 150}]
                 ),
                 "pitch_names": "d_c_e_d",

diff --git a/app/public/cantusdata/test/core/helpers/test_search_utils.py b/app/public/cantusdata/test/core/helpers/test_search_utils.py
@@ -0,0 +1,54 @@
+from unittest import TestCase
+
+from cantusdata.helpers.search_utils import validate_query, get_transpositions
+
+
+class SearchUtilsTestCase(TestCase):
+    def test_validate_query(self) -> None:
+        with self.subTest("neume_names validation"):
+            valid_neume_names = ["punctum", "flexus", "porrectus"]
+            invalid_neume_names = ["punctum", "flexus", "not_a_neume_name"]
+            self.assertTrue(validate_query(valid_neume_names, "neume_names"))
+            self.assertFalse(validate_query(invalid_neume_names, "neume_names"))
+        with self.subTest("pitch_names validation"):
+            valid_pitch_names = ["a", "b", "c", "f", "g"]
+            invalid_pitch_names = ["d", "e", "x", "f"]
+            self.assertTrue(validate_query(valid_pitch_names, "pitch_names"))
+            self.assertFalse(validate_query(invalid_pitch_names, "pitch_names"))
+            self.assertTrue(validate_query(valid_pitch_names, "pitch_names_transposed"))
+            self.assertFalse(
+                validate_query(invalid_pitch_names, "pitch_names_transposed")
+            )
+        with self.subTest("contour validation"):
+            valid_contour = ["u", "d", "r"]
+            invalid_contour = ["u", "d", "s", "r"]
+            self.assertTrue(validate_query(valid_contour, "contour"))
+            self.assertFalse(validate_query(invalid_contour, "contour"))
+        with self.subTest("invalid query type"):
+            self.assertFalse(validate_query(["a", "b", "c"], "not_a_query_type"))
+
+    def test_get_transpositions(self) -> None:
+        with self.subTest("Transpositions of 'ga'"):
+            transpositions = get_transpositions(["g", "a"])
+            expected_transpositions = [
+                ["g", "a"],
+                ["a", "b"],
+                ["b", "c"],
+                ["c", "d"],
+                ["d", "e"],
+                ["e", "f"],
+                ["f", "g"],
+            ]
+            self.assertEqual(transpositions, expected_transpositions)
+        with self.subTest("Transpositions of 'fgae'"):
+            transpositions = get_transpositions(["f", "g", "a", "e"])
+            expected_transpositions = [
+                ["f", "g", "a", "e"],
+                ["g", "a", "b", "f"],
+                ["a", "b", "c", "g"],
+                ["b", "c", "d", "a"],
+                ["c", "d", "e", "b"],
+                ["d", "e", "f", "c"],
+                ["e", "f", "g", "d"],
+            ]
+            self.assertEqual(transpositions, expected_transpositions)