Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite SearchNotationView for new OMR search #870

Merged
merged 6 commits into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ class NgramDocument(TypedDict):
type: The type of the document (corresponds to solr schema's type field)
"""

location: str
location_json: str
pitch_names: str
contour: str
semitone_intervals: str
Expand Down
4 changes: 2 additions & 2 deletions app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"""

import uuid
from typing import List, Tuple, Optional, Never, Union
from typing import List, Tuple, Optional
from .mei_parser import MEIParser
from .mei_parsing_types import (
Neume,
Expand Down Expand Up @@ -88,7 +88,7 @@ def _create_document_from_neume_components(
]
location: str = stringify_bounding_boxes(combine_bounding_boxes(zones_with_sys))
return {
"location": location,
"location_json": location,
"pitch_names": pitch_names,
"contour": contour,
"semitone_intervals": intervals,
Expand Down
127 changes: 56 additions & 71 deletions app/public/cantusdata/helpers/search_utils.py
Original file line number Diff line number Diff line change
@@ -1,82 +1,67 @@
import re
# Contains the words that are allowed
# in a neume_name query
VALID_NEUME_NAME_WORDS = {
"punctum",
"pes",
"clivis",
"scandicus",
"torculus",
"porrectus",
"distropha",
"tristopha",
"pressus",
"climacus",
"resupinus",
"flexus",
"subpunctis",
"compound",
}


def get_transpositions(sequence):
"""Given a series of pitch names (no flats or sharps - just abcdefg),
return a list of the 7 possible transpositions of the melody. This is used
when generating an elastic search query to look for all transpositions of a
user specified pitch sequence.
def validate_query(q: list[str], q_type: str) -> bool:
"""
Depending on the type of the query, returns True if the query is valid
"""
match q_type:
case "neume_names":
return all(neume in VALID_NEUME_NAME_WORDS for neume in q)
case "pitch_names" | "pitch_names_transposed":
return all(pitch in "abcdefg" for pitch in q)
case "contour":
return all(contour in "udr" for contour in q)
case _:
return False


The URL for the query will include 'q=pnames:' followed by the returned
transpositions seperated by commas.
def transpose_up_unicode(x: int) -> int:
"""
Transpose up the unicode decimal for a pitch
name up 1 step. The unicode decimal for "g" is 103,
so to transpose up from "g" to "a", we need to subtract 6.
We can transpose up all other pitch names by adding 1.
"""
# x is the unicode decimal for "a-f"
if x < 103:
return x + 1
# x is the unicode decimal for "g"
return x - 6

e.g. getTranspositions('cece') returns ['cece', 'dfdf', 'egeg', 'fafa',

def get_transpositions(sequence: list[str]) -> list[list[str]]:
"""
Given a series of pitch names (no flats or sharps - just abcdefg),
return a list of the 7 possible transpositions of the melody.

e.g. get_transpositions('cece') returns ['cece', 'dfdf', 'egeg', 'fafa',
'gbgb', 'acac', 'bdbd']
"""
sequence = str(sequence)
# Get the unicode decimal for each character in the sequence
asciinum = list(map(ord, sequence))

def transposeUp(x):
if x < 103:
return x + 1
else:
return x - 6

transpositions = [sequence]

for i in range(1, 7):
asciinum = list(map(transposeUp, asciinum))
transposed = "".join(chr(i) for i in asciinum) # convert to string
transpositions = transpositions + [transposed]
for _ in range(1, 7):
asciinum = list(map(transpose_up_unicode, asciinum))
transposed_chars = list(map(chr, asciinum))
transpositions.append(transposed_chars)
return transpositions


def get_neumes_length(neumes):
lengths = {
"punctum": 1,
"virga": 1,
"bivirga": 2,
"podatus": 2,
"pes": 2,
"clivis": 2,
"epiphonus": 2,
"cephalicus": 2,
"scandicus": 3,
"salicus": 3,
"ancus": 3,
"torculus": 3,
"porrectus": 3,
# Treat flexus as a different one so we can have porrectus flexus, etc
"resupinus": 1,
"flexus": 1,
"cavum": 1,
}

neumes = neumes.lower().split(" ")
length = 0
for neume in neumes:
if neume in lengths:
length += lengths[neume]
else:
# In case their is a type of neume that we don't recognize!
length += 1

return length


def valid_pitch_sequence(sequence):
# Should already be lowercase
pattern = re.compile("[^a-g]")
if pattern.search(sequence) is not None:
return False
else:
return True


def valid_contour_sequence(sequence):
# Already lowercase
pattern = re.compile("[^rud]")
if pattern.search(sequence) is not None:
return False
else:
return True
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def test_mei_tokenizer(self) -> None:
# <zone xml:id="zone-0000002089367816" ulx="5104" uly="7774" lrx="5175" lry="7824"/>
with self.subTest("First 1-gram"):
expected_1gram: NgramDocument = {
"location": json.dumps(
"location_json": json.dumps(
[{"ulx": 2608, "uly": 2399, "width": 70, "height": 49}]
),
"pitch_names": "d",
Expand All @@ -156,7 +156,7 @@ def test_mei_tokenizer(self) -> None:
self.assertEqual(expected_1gram, ngram_docs_1_2[0])
with self.subTest("Ngram of first 3 neumes"):
expected_3gram: NgramDocument = {
"location": json.dumps(
"location_json": json.dumps(
[{"ulx": 2608, "uly": 2292, "width": 477, "height": 201}]
),
"neume_names": "punctum_clivis_punctum",
Expand All @@ -170,7 +170,7 @@ def test_mei_tokenizer(self) -> None:
# This 3-gram is constructed from the second three
# pitches of the sample above.
pitch_3gram: NgramDocument = {
"location": json.dumps(
"location_json": json.dumps(
[{"ulx": 2725, "uly": 2292, "width": 360, "height": 201}]
),
"pitch_names": "d_c_f",
Expand All @@ -190,7 +190,7 @@ def test_mei_tokenizer(self) -> None:
# This 4-gram is constructed from the last three
# pitches of the test document.
pitch_3gram_1: NgramDocument = {
"location": json.dumps(
"location_json": json.dumps(
[{"ulx": 4811, "uly": 7724, "width": 364, "height": 150}]
),
"pitch_names": "c_e_d",
Expand All @@ -209,7 +209,7 @@ def test_mei_tokenizer(self) -> None:
# This 4-gram is constructed from the last four
# pitches of the test document.
pitch_4gram: NgramDocument = {
"location": json.dumps(
"location_json": json.dumps(
[{"ulx": 4750, "uly": 7724, "width": 425, "height": 150}]
),
"pitch_names": "d_c_e_d",
Expand Down
54 changes: 54 additions & 0 deletions app/public/cantusdata/test/core/helpers/test_search_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from unittest import TestCase

from cantusdata.helpers.search_utils import validate_query, get_transpositions


class SearchUtilsTestCase(TestCase):
def test_validate_query(self) -> None:
with self.subTest("neume_names validation"):
valid_neume_names = ["punctum", "flexus", "porrectus"]
invalid_neume_names = ["punctum", "flexus", "not_a_neume_name"]
self.assertTrue(validate_query(valid_neume_names, "neume_names"))
self.assertFalse(validate_query(invalid_neume_names, "neume_names"))
with self.subTest("pitch_names validation"):
valid_pitch_names = ["a", "b", "c", "f", "g"]
invalid_pitch_names = ["d", "e", "x", "f"]
self.assertTrue(validate_query(valid_pitch_names, "pitch_names"))
self.assertFalse(validate_query(invalid_pitch_names, "pitch_names"))
self.assertTrue(validate_query(valid_pitch_names, "pitch_names_transposed"))
self.assertFalse(
validate_query(invalid_pitch_names, "pitch_names_transposed")
)
with self.subTest("contour validation"):
valid_contour = ["u", "d", "r"]
invalid_contour = ["u", "d", "s", "r"]
self.assertTrue(validate_query(valid_contour, "contour"))
self.assertFalse(validate_query(invalid_contour, "contour"))
with self.subTest("invalid query type"):
self.assertFalse(validate_query(["a", "b", "c"], "not_a_query_type"))

def test_get_transpositions(self) -> None:
with self.subTest("Transpositions of 'ga'"):
transpositions = get_transpositions(["g", "a"])
expected_transpositions = [
["g", "a"],
["a", "b"],
["b", "c"],
["c", "d"],
["d", "e"],
["e", "f"],
["f", "g"],
]
self.assertEqual(transpositions, expected_transpositions)
with self.subTest("Transpositions of 'fgae'"):
transpositions = get_transpositions(["f", "g", "a", "e"])
expected_transpositions = [
["f", "g", "a", "e"],
["g", "a", "b", "f"],
["a", "b", "c", "g"],
["b", "c", "d", "a"],
["c", "d", "e", "b"],
["d", "e", "f", "c"],
["e", "f", "g", "d"],
]
self.assertEqual(transpositions, expected_transpositions)
Loading
Loading