Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update MEIParser to include intervals and contours between neumes & syllables #833

Merged
merged 4 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
226 changes: 182 additions & 44 deletions app/public/cantusdata/helpers/mei_processing/mei_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"""

from xml.etree import ElementTree as ET
from typing import Tuple, Dict, List, TypedDict, Literal
from typing import Tuple, Dict, List, TypedDict, Literal, Iterator, Optional

# Mapping from pitch names to integer pitch class where C = 0
PITCH_CLASS = {"c": 0, "d": 2, "e": 4, "f": 5, "g": 7, "a": 9, "b": 11}
Expand Down Expand Up @@ -45,22 +45,54 @@


class Zone(TypedDict):
"""A type for zones (bounding boxes) in MEI files"""
"""A type for zones (bounding boxes) in MEI files.

coordinates: The location of the bouding box as
defined in MEI 'zone' elements. The coordinates
of the box are given as four integers designating,
in order:
- the x-coordinate of the upper-left corner of the box
- the y-coordinate of the upper-left corner of the box
- the x-coordinate of the lower-right corner of the box
- the y-coordinate of the lower-right corner of the box
rotate: The rotation of the zone in degrees.
"""

coordinates: CoordinatesType
rotate: float

dchiller marked this conversation as resolved.
Show resolved Hide resolved

class NeumeComponent(TypedDict):
"""A type for neume components"""
"""A type for neume components

pname: The pitch name of the neume component (ie. "c", "d", "e", etc.)
octave: The octave of the neume component (as an integer, in scientific
pitch notation; e.g. middle c has octave "4")
bounding_box: The bounding box of the neume component
"""

pname: str
octave: int
bounding_box: Zone


class Neume(TypedDict):
"""A type for neumes"""
"""A type for neumes

neume_type: The name of the neume (ie. "Punctum", "Pes", "Clivis", etc.)
neume_components: A list of neume components (containing pitch infomation)
intervals: A list of intervals (in semitones) between neume components.
In most cases, the length of this list is the same as the number of neume
components in the neume, with the final element being the interval between
the final component of the current neume and the first component of the
following neume. When there is no following neume (at the end of the mei
file), the list is one element shorter than the number of neume components
(this final element is omitted).
contours: A list of contours ("u"[p], "d"[own], or "s"[tay]) for each interval.
As with the "intervals" list, the length of this list usually includes a final
element that stores the contour between the final component of the current neume
and the first component of the following neume.
"""

neume_type: str
neume_components: List[NeumeComponent]
Expand Down Expand Up @@ -151,64 +183,170 @@ def _get_element_zone(self, element: ET.Element) -> Zone:
return zone
return {"coordinates": (-1, -1, -1, -1), "rotate": 0.0}

def parse_syllable(self, syllable: ET.Element) -> Syllable:
def _parse_syllable_text(self, syl_elem: Optional[ET.Element]) -> SyllableText:
"""
Parse a syllable element from an MEI file into a dictionary.
Get the text of a syllable and its associated bounding box from
a 'syl' element.

:param syllable: An ElementTree element of a syllable
:return: Dictionary of syllable data
:param syllable: A syllable element from an MEI file
:return: Dictionary of syllable text data
"""
# <syl> elements contain the text of the syllable.
syl = syllable.find(f"{self.MEINS}syl")
text_dict: SyllableText
if syl and syl.text:
text_dict = {
"text": syl.text.strip(),
"bounding_box": self._get_element_zone(syl),
if syl_elem is not None and syl_elem.text:
text_dict: SyllableText = {
"text": syl_elem.text.strip(),
"bounding_box": self._get_element_zone(syl_elem),
}
else:
text_dict = {
"text": "",
"bounding_box": {"coordinates": (-1, -1, -1, -1), "rotate": 0.0},
}
# <neume> elements contain the pitches of the syllable.
neumes_list: List[Neume] = []
for neume in syllable.findall(f"{self.MEINS}neume"):
neume_components: List[NeumeComponent] = []
for neume_comp in neume.findall(f"{self.MEINS}nc"):
pname = neume_comp.get("pname")
octave = neume_comp.get("oct")
if pname and octave:
neume_components.append(
{
"pname": pname,
"octave": int(octave),
"bounding_box": self._get_element_zone(neume_comp),
}
)
neume_type, intervals, contours = analyze_neume(neume_components)
neume_dict: Neume = {
"neume_type": neume_type,
"neume_components": neume_components,
"intervals": intervals,
"contours": contours,
return text_dict

def _parse_neume_component(
self, neume_comp: ET.Element
) -> Optional[NeumeComponent]:
"""
Parses an 'nc' element into a NeumeComponent dictionary.

:param neume_comp: An 'nc' element from an MEI file
:return: A dictionary of neume component data (see NeumeComponent for structure)
"""
pname = neume_comp.get("pname")
octave = neume_comp.get("oct")
if pname and octave:
return {
"pname": pname,
"octave": int(octave),
"bounding_box": self._get_element_zone(neume_comp),
}
neumes_list.append(neume_dict)
syllable_dict: Syllable = {
"text": text_dict,
"neumes": neumes_list,
return None

def _parse_neume(
self,
neume_components: List[ET.Element],
next_neume_component: Optional[ET.Element],
) -> Neume:
"""
Gets a Neume dictionary from a series of 'nc' elements (including
the first neume component of the following neume, if it exists)

:param neume_components: A list of 'nc' elements in a given 'neume' element
:param next_neume_component: The first 'nc' element of the next neume
:return: A list of neume dictionaries (see Neume for structure)
"""
parsed_neume_components: List[NeumeComponent] = []
for neume_comp in neume_components:
parsed_neume_component: Optional[NeumeComponent] = (
self._parse_neume_component(neume_comp)
)
if parsed_neume_component:
parsed_neume_components.append(parsed_neume_component)
neume_type, intervals, contours = analyze_neume(parsed_neume_components)
# If the first neume component of the next syllable can be parsed,
# add the interval and contour between the final neume component of
# the current syllable and the first neume component of the next syllable.
if next_neume_component is not None:
parsed_next_neume_comp: Optional[NeumeComponent] = (
self._parse_neume_component(next_neume_component)
)
if parsed_next_neume_comp:
last_neume_comp = parsed_neume_components[-1]
intervals.append(
get_interval_between_neume_components(
last_neume_comp, parsed_next_neume_comp
)
)
contours.append(get_contour_from_interval(intervals[-1]))
parsed_neume: Neume = {
"neume_type": neume_type,
"neume_components": parsed_neume_components,
"intervals": intervals,
"contours": contours,
}
return syllable_dict
return parsed_neume

def _neume_iterator(
self,
neumes: List[ET.Element],
next_syllable_1st_nc: Optional[ET.Element],
) -> Iterator[Tuple[List[ET.Element], Optional[ET.Element]]]:
"""
Convenience generator for iterating over a syllable's neumes.
At each iteration step, the generator provides the 'nc' elements
of the current neume and the first 'nc' element of the next neume
(if it exists) so that the interval and contour between the final
neume of the current syllable and the first neume of the next syllable
can be computed.

:param neumes: A list of 'neume' elements in a syllable
:param next_syllable_1st_nc: The first 'nc' element of the next syllable

The generator yields a tuple of:
- The 'nc' elements of the current neume
- The first 'nc' element of the next neume (if it exists)
"""
neume_iterator = iter(neumes)
current_neume = next(neume_iterator, None)
while current_neume:
neume_components = current_neume.findall(f"{self.MEINS}nc")
jacobdgm marked this conversation as resolved.
Show resolved Hide resolved
next_neume = next(neume_iterator, None)
if next_neume:
next_neume_component = next_neume.find(f"{self.MEINS}nc")
else:
next_neume_component = next_syllable_1st_nc
yield neume_components, next_neume_component
current_neume = next_neume

def _syllable_iterator(
self,
) -> Iterator[Tuple[Optional[ET.Element], List[ET.Element], Optional[ET.Element]]]:
jacobdgm marked this conversation as resolved.
Show resolved Hide resolved
"""
Convenience generator for iterating over syllables in an MEI file. At each
iteration step, the generator provides all data for the current syllable
and the first neume of the next syllable (if it exists) so that the interval
and contour between the final neume of the current syllable and the first
neume of the next syllable can be computed.

The generator yields a tuple of:
- The 'syl' element of the current syllable (containing text information),
if it exists.
- A list of 'neume' elements for the current syllable (containing musical
information), if they exist.
- The first 'nc' element (neume component) of the next syllable (if it exists).
If there is no next syllable, this value is None.
"""
syllable_iterator = self.mei.iter(f"{self.MEINS}syllable")
current_syllable = next(syllable_iterator, None)
while current_syllable:
current_syl = current_syllable.find(f"{self.MEINS}syl")
current_neumes = current_syllable.findall(f"{self.MEINS}neume")
next_syllable = next(syllable_iterator, None)
next_neume = (
next_syllable.find(f"{self.MEINS}neume") if next_syllable else None
)
next_nc = next_neume.find(f"{self.MEINS}nc") if next_neume else None
yield current_syl, current_neumes, next_nc
current_syllable = next_syllable

def parse_mei(self) -> List[Syllable]:
"""
Parses the MEI file into a list of syllables.

:return: A list of syllables
"""
syllables = []
for syllable in self.mei.iter(f"{self.MEINS}syllable"):
syllable_dict = self.parse_syllable(syllable)
syllables: List[Syllable] = []
for text_elem, syllable_neumes, next_neume_comp in self._syllable_iterator():
syllable_text: SyllableText = self._parse_syllable_text(text_elem)
neumes_list: List[Neume] = []
for neume, next_neume_1st_nc in self._neume_iterator(
syllable_neumes, next_neume_comp
):
neumes_list.append(self._parse_neume(neume, next_neume_1st_nc))
syllable_dict: Syllable = {
"text": syllable_text,
"neumes": neumes_list,
}
syllables.append(syllable_dict)
return syllables

Expand Down
Loading
Loading