diff --git a/utilities/mei_cleaning.py b/utilities/mei_cleaning.py
new file mode 100644
index 0000000..19de96a
--- /dev/null
+++ b/utilities/mei_cleaning.py
@@ -0,0 +1,270 @@
+import xml.etree.ElementTree as ET
+from itertools import combinations
+import argparse
+import re
+import os
+
+parser = argparse.ArgumentParser(description="Utilities for cleaning mei files")
+parser.add_argument(
+    "mei_path",
+    type=str,
+    nargs="?",
+    help="Path to mei file for cleaning. If a directory, cleans all mei files in the directory.",
+    action="store",
+)
+parser.add_argument(
+    "--remove_unreferenced_bounding_boxes",
+    action="store_true",
+    help="If flagged, removes zones/bounding boxes that are defined but not referenced anywhere in the body.",
+)
+parser.add_argument(
+    "--remove_identical_duplicates",
+    action="store_true",
+    help="If flagged, removes duplicate zones/bounding boxes and duplicate objects that reference those bounding boxes.",
+)
+parser.add_argument(
+    "--raise_nonidentical_duplicates",
+    action="store_true",
+    help="Find and record instances where duplicate zones/bounding boxes are referenced by different, non-identical objects.",
+)
+parser.add_argument(
+    "--destination_path",
+    action="store",
+    default=None,
+    type=str,
+    nargs="?",
+    help="If provided, the cleaned file is save here. If omitted, file is save to mei_path location. If mei_path is a directory, this should also be a directory.",
+)
+parser.add_argument(
+    "--report_file",
+    action="store",
+    default=None,
+    type=str,
+    nargs="?",
+    help="File in which to report any raised non-identical duplicates. If not given, results are printed.",
+)
+
+MEINS = "{http://www.music-encoding.org/ns/mei}"
+XMLNS = "{http://www.w3.org/XML/1998/namespace}"
+
+ET.register_namespace("", "http://www.music-encoding.org/ns/mei")
+
+
+class MEIFileCleaner:
+    def __init__(
+        self,
+        remove_unreferenced_bounding_boxes,
+        remove_identical_duplicates,
+        raise_nonidentical_duplicates,
+        report_file=None,
+    ):
+        """See argument parser for a description of these arguments."""
+        self.remove_unreferenced_bounding_boxes = remove_unreferenced_bounding_boxes
+        self.remove_identical_duplicates = remove_identical_duplicates
+        self.raise_nonidentical_duplicates = raise_nonidentical_duplicates
+        self.report_file = report_file
+
+    def parse_zones(self, mei):
+        """Get the zones (bounding boxes) from an MEI root element."""
+        zones = {}
+        for zone in mei.iter(f"{MEINS}zone"):
+            zone_id = zone.get(f"{XMLNS}id")
+            coordinate_names = ["ulx", "uly", "lrx", "lry"]
+            coordinates = [int(zone.get(c, -1)) for c in coordinate_names]
+            rotate = float(zone.get("rotate", 0.0))
+            zones[f"#{zone_id}"] = {
+                "coordinates": tuple(coordinates),
+                "rotate": rotate,
+            }
+        return zones
+
+    def find_duplicate_zones(self, mei):
+        zones = self.parse_zones(mei)
+        dupe_zone_list = []
+        for z1, z2 in combinations(zones.keys(), 2):
+            if zones[z1] == zones[z2]:
+                dupe_zone_list.append((z1, z2))
+        return dupe_zone_list
+
+    def remove_unreferenced_zones(self, mei):
+        """Removes any zones defined in the facsimile section of mei (ie.
+        zone elements for which coordinates are defined) but that are not
+        associated with any mei element in the score."""
+        music = mei.find(f"{MEINS}music")
+        surface = music.find(f"{MEINS}facsimile/{MEINS}surface")
+        defined_zones = surface.findall(f"{MEINS}zone")
+        body_str = ET.tostring(music.find(f"{MEINS}body"), encoding="unicode")
+        for def_z in defined_zones:
+            zone_id = def_z.get(f"{XMLNS}id")
+            if zone_id not in body_str:
+                surface.remove(def_z)
+                print(f"Unreferenced zone removed: {zone_id}")
+        return None
+
+    def get_elements_with_duplicate_references(self, mei):
+        """Finds elements that reference duplicate bounding boxes.
+        Returns a list of lists, one for each set of duplicate bouding boxes,
+        each of which contains two dictionaries. These dictionaries have:
+         - "element": the ElementTree object of one of the elements
+         - "bb_id": the id of the zone/bounding box of that element
+         - "parent": the ElementTree object of the parent of that element (necessary to remove an element)"""
+        duplicate_zones = self.find_duplicate_zones(mei)
+        layer = mei.find(
+            f"./{MEINS}music/{MEINS}body/{MEINS}mdiv/{MEINS}score/{MEINS}section/{MEINS}staff/{MEINS}layer"
+        )
+        duplicate_references_list = []
+        for dup_zone_pair in duplicate_zones:
+            elems = [layer.find(f".//*[@facs='{dup}']") for dup in dup_zone_pair]
+            parent_elems = [
+                layer.find(f".//*[@facs='{dup}']/..") for dup in dup_zone_pair
+            ]
+            dup_ref_list = [
+                {
+                    "element": elems[0],
+                    "bb_id": dup_zone_pair[0],
+                    "parent": parent_elems[0],
+                },
+                {
+                    "element": elems[1],
+                    "bb_id": dup_zone_pair[1],
+                    "parent": parent_elems[1],
+                },
+            ]
+            duplicate_references_list.append(dup_ref_list)
+        return duplicate_references_list
+
+    def check_element_identity(self, elem1, elem2):
+        elem_attribs = [elem1.attrib.copy(), elem2.attrib.copy()]
+        for a in elem_attribs:
+            a.pop(f"{XMLNS}id")
+            a.pop("facs")
+        if (elem_attribs[0] == elem_attribs[1]) and elem1.text == elem2.text:
+            return True
+        else:
+            return False
+
+    def delete_element_and_referenced_zone(self, surface, element, parent, zone_id):
+        elem_id = element.attrib[f"{XMLNS}id"]
+        parent.remove(element)
+        zone_to_del = surface.find(f"*[@{XMLNS}id='{zone_id.replace('#','')}']")
+        surface.remove(zone_to_del)
+        print(f"Identical zone and referencing element removed: {zone_id} & {elem_id}")
+        return None
+
+    def register_nonidentical_duplicates(self, dup_dict_1, dup_dict_2):
+        str_to_print = f"""
+                        ###### NON-IDENTICAL DUPLICATE FOUND ###### \n
+                        {dup_dict_1['bb_id']} > \n
+                        \t {dup_dict_1['element'].attrib} {dup_dict_1['element'].text} \n
+                        {dup_dict_2['bb_id']} > \n
+                        \t {dup_dict_2['element'].attrib} {dup_dict_2['element'].text} \n \n"""
+        if self.report_file:
+            with open(self.report_file, "a") as rf:
+                rf.write(str_to_print)
+        else:
+            print(str_to_print)
+        return None
+
+    def handle_referenced_duplicates(self, mei):
+        dup_ref_list = self.get_elements_with_duplicate_references(mei)
+        for dup_ref in dup_ref_list:
+            identical = self.check_element_identity(
+                dup_ref[0]["element"], dup_ref[1]["element"]
+            )
+            if identical:
+                if self.remove_identical_duplicates:
+                    surface = mei.find(f"{MEINS}music/{MEINS}facsimile/{MEINS}surface")
+                    self.delete_element_and_referenced_zone(
+                        surface=surface,
+                        element=dup_ref[1]["element"],
+                        parent=dup_ref[1]["parent"],
+                        zone_id=dup_ref[1]["bb_id"],
+                    )
+            else:
+                if self.raise_nonidentical_duplicates:
+                    self.register_nonidentical_duplicates(dup_ref[0], dup_ref[1])
+        if self.raise_nonidentical_duplicates:
+            if self.report_file:
+                print(
+                    f"Non-identical duplicates checked and raised in {self.report_file}"
+                )
+        return None
+
+    def clean_mei(self, filepath):
+        print(f"CLEANING MEI FILE: {filepath}")
+        if self.report_file:
+            with open(self.report_file, "a") as rf:
+                rf.write(f"CLEANING MEI FILE: {filepath} \n")
+        xml_tree, xml_declarations = read_mei_file(filepath)
+        mei = xml_tree.getroot()
+        if self.remove_unreferenced_bounding_boxes:
+            self.remove_unreferenced_zones(mei)
+        self.handle_referenced_duplicates(mei)
+        return mei, xml_declarations
+
+
+def read_mei_file(filepath):
+    xml_tree = ET.parse(filepath)
+    declarations = []
+    with open(filepath, "r") as in_file:
+        for f_line in in_file:
+            if re.fullmatch("^<\?.*\?>\n$", f_line):
+                declarations.append(f_line)
+            else:
+                break
+    xml_declarations = "".join(declarations)
+    return xml_tree, xml_declarations
+
+
+def save_mei_file(xml_tree, xml_declarations, filepath):
+    xml_str = ET.tostring(xml_tree, encoding="unicode")
+    formatted_xml_str = re.sub(" \/>", "/>", xml_str)
+    formatted_xml_str = "".join([xml_declarations, formatted_xml_str])
+    with open(filepath, "w") as out_file:
+        out_file.write(formatted_xml_str)
+
+
+def clean_mei_files(
+    path,
+    destination_path=None,
+    remove_unreferenced_bounding_boxes=True,
+    remove_identical_duplicates=True,
+    raise_nonidentical_duplicates=True,
+    report_file=None,
+):
+    mei_cleaner = MEIFileCleaner(
+        remove_unreferenced_bounding_boxes=remove_unreferenced_bounding_boxes,
+        remove_identical_duplicates=remove_identical_duplicates,
+        raise_nonidentical_duplicates=raise_nonidentical_duplicates,
+        report_file=report_file,
+    )
+    if os.path.isfile(path):
+        cleaned_mei, xml_declarations = mei_cleaner.clean_mei(path)
+        if destination_path:
+            save_mei_file(cleaned_mei, xml_declarations, destination_path)
+        else:
+            save_mei_file(cleaned_mei, xml_declarations, path)
+    if os.path.isdir(path):
+        mei_files = [file for file in os.listdir(path) if file.endswith(".mei")]
+        for mei_f in mei_files:
+            cleaned_mei, xml_declarations = mei_cleaner.clean_mei(
+                os.path.join(path, mei_f)
+            )
+            if destination_path:
+                save_mei_file(
+                    cleaned_mei, xml_declarations, os.path.join(destination_path, mei_f)
+                )
+            else:
+                save_mei_file(cleaned_mei, xml_declarations, os.path.join(path, mei_f))
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    clean_mei_files(
+        path=args.mei_path,
+        destination_path=args.destination_path,
+        remove_unreferenced_bounding_boxes=args.remove_unreferenced_bounding_boxes,
+        remove_identical_duplicates=args.remove_identical_duplicates,
+        raise_nonidentical_duplicates=args.raise_nonidentical_duplicates,
+        report_file=args.report_file,
+    )