From 7553fd6d90cf76c864ba25d389b04d9ec643df06 Mon Sep 17 00:00:00 2001 From: Andrew Tavis McAllister Date: Mon, 3 Jun 2024 01:02:58 +0200 Subject: [PATCH] #124 standardize annotation ordering and format file fxn calls --- .../English/nouns/format_nouns.py | 8 ++++-- .../English/verbs/format_verbs.py | 8 ++++-- .../French/nouns/format_nouns.py | 27 ++++-------------- .../French/verbs/format_verbs.py | 8 ++++-- .../German/nouns/format_nouns.py | 27 ++++-------------- .../prepositions/format_prepositions.py | 8 ++++-- .../German/verbs/format_verbs.py | 8 ++++-- .../Italian/nouns/format_nouns.py | 27 ++++-------------- .../Italian/verbs/format_verbs.py | 8 ++++-- .../Portuguese/nouns/format_nouns.py | 27 ++++-------------- .../Portuguese/verbs/format_verbs.py | 8 ++++-- .../Russian/nouns/format_nouns.py | 28 ++++--------------- .../prepositions/format_prepositions.py | 8 ++++-- .../Russian/verbs/format_verbs.py | 8 ++++-- .../Spanish/nouns/format_nouns.py | 28 ++++--------------- .../Spanish/verbs/format_verbs.py | 8 ++++-- .../Swedish/nouns/format_nouns.py | 27 ++++-------------- .../Swedish/verbs/format_verbs.py | 8 ++++-- 18 files changed, 97 insertions(+), 182 deletions(-) diff --git a/src/scribe_data/language_data_extraction/English/nouns/format_nouns.py b/src/scribe_data/language_data_extraction/English/nouns/format_nouns.py index 235522b2f..874ce7edc 100644 --- a/src/scribe_data/language_data_extraction/English/nouns/format_nouns.py +++ b/src/scribe_data/language_data_extraction/English/nouns/format_nouns.py @@ -8,10 +8,12 @@ from scribe_data.utils import export_formatted_data, load_queried_data +LANGUAGE = "English" +DATA_TYPE = "nouns" file_path = sys.argv[0] nouns_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="English", data_type="nouns" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) nouns_formatted = {} @@ -79,8 +81,8 @@ export_formatted_data( formatted_data=nouns_formatted, update_data_in_use=update_data_in_use, - language="English", - data_type="nouns", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path) diff --git a/src/scribe_data/language_data_extraction/English/verbs/format_verbs.py b/src/scribe_data/language_data_extraction/English/verbs/format_verbs.py index aadd3ec7c..c7a410e88 100644 --- a/src/scribe_data/language_data_extraction/English/verbs/format_verbs.py +++ b/src/scribe_data/language_data_extraction/English/verbs/format_verbs.py @@ -8,10 +8,12 @@ from scribe_data.utils import export_formatted_data, load_queried_data +LANGUAGE = "English" +DATA_TYPE = "verbs" file_path = sys.argv[0] verbs_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="English", data_type="verbs" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) verbs_formatted = {} @@ -66,8 +68,8 @@ export_formatted_data( formatted_data=verbs_formatted, update_data_in_use=update_data_in_use, - language="English", - data_type="verbs", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path) diff --git a/src/scribe_data/language_data_extraction/French/nouns/format_nouns.py b/src/scribe_data/language_data_extraction/French/nouns/format_nouns.py index b4082b406..1e3a85491 100644 --- a/src/scribe_data/language_data_extraction/French/nouns/format_nouns.py +++ b/src/scribe_data/language_data_extraction/French/nouns/format_nouns.py @@ -10,33 +10,18 @@ export_formatted_data, load_queried_data, map_genders, + order_annotations, ) +LANGUAGE = "French" +DATA_TYPE = "nouns" file_path = sys.argv[0] nouns_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="French", data_type="nouns" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) -def order_annotations(annotation): - """ - Standardizes the annotations that are presented to users where more than one is applicable. - - Parameters - ---------- - annotation : str - The annotation to be returned to the user in the command bar. - """ - single_annotations = ["F", "M", "PL"] - if annotation in single_annotations: - return annotation - - annotation_split = sorted([a for a in set(annotation.split("/")) if a != ""]) - - return "/".join(annotation_split) - - nouns_formatted = {} for noun_vals in nouns_list: @@ -102,8 +87,8 @@ def order_annotations(annotation): export_formatted_data( formatted_data=nouns_formatted, update_data_in_use=update_data_in_use, - language="French", - data_type="nouns", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path) diff --git a/src/scribe_data/language_data_extraction/French/verbs/format_verbs.py b/src/scribe_data/language_data_extraction/French/verbs/format_verbs.py index 9c08f37da..26ade2841 100644 --- a/src/scribe_data/language_data_extraction/French/verbs/format_verbs.py +++ b/src/scribe_data/language_data_extraction/French/verbs/format_verbs.py @@ -8,10 +8,12 @@ from scribe_data.utils import export_formatted_data, load_queried_data +LANGUAGE = "French" +DATA_TYPE = "verbs" file_path = sys.argv[0] verbs_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="French", data_type="verbs" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) verbs_formatted = {} @@ -63,8 +65,8 @@ export_formatted_data( formatted_data=verbs_formatted, update_data_in_use=update_data_in_use, - language="French", - data_type="verbs", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path) diff --git a/src/scribe_data/language_data_extraction/German/nouns/format_nouns.py b/src/scribe_data/language_data_extraction/German/nouns/format_nouns.py index 13e38a245..a943e39a3 100644 --- a/src/scribe_data/language_data_extraction/German/nouns/format_nouns.py +++ b/src/scribe_data/language_data_extraction/German/nouns/format_nouns.py @@ -10,33 +10,18 @@ export_formatted_data, load_queried_data, map_genders, + order_annotations, ) +LANGUAGE = "German" +DATA_TYPE = "nouns" file_path = sys.argv[0] nouns_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="German", data_type="nouns" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) -def order_annotations(annotation): - """ - Standardizes the annotations that are presented to users where more than one is applicable. - - Parameters - ---------- - annotation : str - The annotation to be returned to the user in the command bar. - """ - single_annotations = ["F", "M", "N", "PL"] - if annotation in single_annotations: - return annotation - - annotation_split = sorted([a for a in set(annotation.split("/")) if a != ""]) - - return "/".join(annotation_split) - - nouns_formatted = {} for noun_vals in nouns_list: @@ -164,8 +149,8 @@ def order_annotations(annotation): export_formatted_data( formatted_data=nouns_formatted, update_data_in_use=update_data_in_use, - language="German", - data_type="nouns", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path) diff --git a/src/scribe_data/language_data_extraction/German/prepositions/format_prepositions.py b/src/scribe_data/language_data_extraction/German/prepositions/format_prepositions.py index e728d0fc4..04f85c806 100644 --- a/src/scribe_data/language_data_extraction/German/prepositions/format_prepositions.py +++ b/src/scribe_data/language_data_extraction/German/prepositions/format_prepositions.py @@ -13,10 +13,12 @@ order_annotations, ) +LANGUAGE = "German" +DATA_TYPE = "prepositions" file_path = sys.argv[0] prepositions_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="German", data_type="prepositions" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) prepositions_formatted = {} @@ -75,8 +77,8 @@ export_formatted_data( formatted_data=prepositions_formatted, update_data_in_use=update_data_in_use, - language="German", - data_type="prepositions", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path) diff --git a/src/scribe_data/language_data_extraction/German/verbs/format_verbs.py b/src/scribe_data/language_data_extraction/German/verbs/format_verbs.py index 28bded81c..9f5c31ba3 100644 --- a/src/scribe_data/language_data_extraction/German/verbs/format_verbs.py +++ b/src/scribe_data/language_data_extraction/German/verbs/format_verbs.py @@ -13,10 +13,12 @@ from scribe_data.utils import export_formatted_data, load_queried_data +LANGUAGE = "German" +DATA_TYPE = "verbs" file_path = sys.argv[0] verbs_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="German", data_type="verbs" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) verbs_formatted = {} @@ -142,8 +144,8 @@ def assign_past_participle(verb, tense): export_formatted_data( formatted_data=verbs_formatted, update_data_in_use=update_data_in_use, - language="German", - data_type="verbs", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path) diff --git a/src/scribe_data/language_data_extraction/Italian/nouns/format_nouns.py b/src/scribe_data/language_data_extraction/Italian/nouns/format_nouns.py index 5e35e194a..9c844ffb9 100644 --- a/src/scribe_data/language_data_extraction/Italian/nouns/format_nouns.py +++ b/src/scribe_data/language_data_extraction/Italian/nouns/format_nouns.py @@ -10,33 +10,18 @@ export_formatted_data, load_queried_data, map_genders, + order_annotations, ) +LANGUAGE = "Italian" +DATA_TYPE = "nouns" file_path = sys.argv[0] nouns_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="Italian", data_type="nouns" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) -def order_annotations(annotation): - """ - Standardizes the annotations that are presented to users where more than one is applicable. - - Parameters - ---------- - annotation : str - The annotation to be returned to the user in the command bar. - """ - single_annotations = ["F", "M", "PL"] - if annotation in single_annotations: - return annotation - - annotation_split = sorted([a for a in set(annotation.split("/")) if a != ""]) - - return "/".join(annotation_split) - - nouns_formatted = {} for noun_vals in nouns_list: @@ -103,8 +88,8 @@ def order_annotations(annotation): export_formatted_data( formatted_data=nouns_formatted, update_data_in_use=update_data_in_use, - language="Italian", - data_type="nouns", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path) diff --git a/src/scribe_data/language_data_extraction/Italian/verbs/format_verbs.py b/src/scribe_data/language_data_extraction/Italian/verbs/format_verbs.py index f16616bf4..fb8861798 100644 --- a/src/scribe_data/language_data_extraction/Italian/verbs/format_verbs.py +++ b/src/scribe_data/language_data_extraction/Italian/verbs/format_verbs.py @@ -8,10 +8,12 @@ from scribe_data.utils import export_formatted_data, load_queried_data +LANGUAGE = "Italian" +DATA_TYPE = "verbs" file_path = sys.argv[0] verbs_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="Italian", data_type="verbs" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) verbs_formatted = {} @@ -57,8 +59,8 @@ export_formatted_data( formatted_data=verbs_formatted, update_data_in_use=update_data_in_use, - language="Italian", - data_type="verbs", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path) diff --git a/src/scribe_data/language_data_extraction/Portuguese/nouns/format_nouns.py b/src/scribe_data/language_data_extraction/Portuguese/nouns/format_nouns.py index 09091e793..606b4a46e 100644 --- a/src/scribe_data/language_data_extraction/Portuguese/nouns/format_nouns.py +++ b/src/scribe_data/language_data_extraction/Portuguese/nouns/format_nouns.py @@ -10,33 +10,18 @@ export_formatted_data, load_queried_data, map_genders, + order_annotations, ) +LANGUAGE = "Portuguese" +DATA_TYPE = "nouns" file_path = sys.argv[0] nouns_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="Portuguese", data_type="nouns" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) -def order_annotations(annotation): - """ - Standardizes the annotations that are presented to users where more than one is applicable. - - Parameters - ---------- - annotation : str - The annotation to be returned to the user in the command bar. - """ - single_annotations = ["F", "M", "PL"] - if annotation in single_annotations: - return annotation - - annotation_split = sorted([a for a in set(annotation.split("/")) if a != ""]) - - return "/".join(annotation_split) - - nouns_formatted = {} for noun_vals in nouns_list: @@ -103,8 +88,8 @@ def order_annotations(annotation): export_formatted_data( formatted_data=nouns_formatted, update_data_in_use=update_data_in_use, - language="Portuguese", - data_type="nouns", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path) diff --git a/src/scribe_data/language_data_extraction/Portuguese/verbs/format_verbs.py b/src/scribe_data/language_data_extraction/Portuguese/verbs/format_verbs.py index 4405a111f..eaf259b62 100644 --- a/src/scribe_data/language_data_extraction/Portuguese/verbs/format_verbs.py +++ b/src/scribe_data/language_data_extraction/Portuguese/verbs/format_verbs.py @@ -8,10 +8,12 @@ from scribe_data.utils import export_formatted_data, load_queried_data +LANGUAGE = "Portuguese" +DATA_TYPE = "verbs" file_path = sys.argv[0] verbs_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="Portuguese", data_type="verbs" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) verbs_formatted = {} @@ -57,8 +59,8 @@ export_formatted_data( formatted_data=verbs_formatted, update_data_in_use=update_data_in_use, - language="Portuguese", - data_type="verbs", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path) diff --git a/src/scribe_data/language_data_extraction/Russian/nouns/format_nouns.py b/src/scribe_data/language_data_extraction/Russian/nouns/format_nouns.py index af95db3c2..b63a5c073 100644 --- a/src/scribe_data/language_data_extraction/Russian/nouns/format_nouns.py +++ b/src/scribe_data/language_data_extraction/Russian/nouns/format_nouns.py @@ -10,33 +10,17 @@ export_formatted_data, load_queried_data, map_genders, + order_annotations, ) +LANGUAGE = "Russian" +DATA_TYPE = "nouns" file_path = sys.argv[0] nouns_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="Russian", data_type="nouns" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) - -def order_annotations(annotation): - """ - Standardizes the annotations that are presented to users where more than one is applicable. - - Parameters - ---------- - annotation : str - The annotation to be returned to the user in the command bar. - """ - single_annotations = ["F", "M", "N", "PL"] - if annotation in single_annotations: - return annotation - - annotation_split = sorted([a for a in set(annotation.split("/")) if a != ""]) - - return "/".join(annotation_split) - - nouns_formatted = {} for noun_vals in nouns_list: @@ -164,8 +148,8 @@ def order_annotations(annotation): export_formatted_data( formatted_data=nouns_formatted, update_data_in_use=update_data_in_use, - language="Russian", - data_type="nouns", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path) diff --git a/src/scribe_data/language_data_extraction/Russian/prepositions/format_prepositions.py b/src/scribe_data/language_data_extraction/Russian/prepositions/format_prepositions.py index 37baa523e..38b1b0c61 100644 --- a/src/scribe_data/language_data_extraction/Russian/prepositions/format_prepositions.py +++ b/src/scribe_data/language_data_extraction/Russian/prepositions/format_prepositions.py @@ -13,10 +13,12 @@ order_annotations, ) +LANGUAGE = "Russian" +DATA_TYPE = "prepositions" file_path = sys.argv[0] prepositions_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="Russian", data_type="prepositions" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) prepositions_formatted = {} @@ -41,8 +43,8 @@ export_formatted_data( formatted_data=prepositions_formatted, update_data_in_use=update_data_in_use, - language="Russian", - data_type="prepositions", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path) diff --git a/src/scribe_data/language_data_extraction/Russian/verbs/format_verbs.py b/src/scribe_data/language_data_extraction/Russian/verbs/format_verbs.py index 34f19d57b..6c639f2ed 100644 --- a/src/scribe_data/language_data_extraction/Russian/verbs/format_verbs.py +++ b/src/scribe_data/language_data_extraction/Russian/verbs/format_verbs.py @@ -8,10 +8,12 @@ from scribe_data.utils import export_formatted_data, load_queried_data +LANGUAGE = "Russian" +DATA_TYPE = "verbs" file_path = sys.argv[0] verbs_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="Russian", data_type="verbs" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) verbs_formatted = {} @@ -43,8 +45,8 @@ export_formatted_data( formatted_data=verbs_formatted, update_data_in_use=update_data_in_use, - language="Russian", - data_type="verbs", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path) diff --git a/src/scribe_data/language_data_extraction/Spanish/nouns/format_nouns.py b/src/scribe_data/language_data_extraction/Spanish/nouns/format_nouns.py index 29a14e3f8..23a0a017d 100644 --- a/src/scribe_data/language_data_extraction/Spanish/nouns/format_nouns.py +++ b/src/scribe_data/language_data_extraction/Spanish/nouns/format_nouns.py @@ -10,33 +10,17 @@ export_formatted_data, load_queried_data, map_genders, + order_annotations, ) +LANGUAGE = "Spanish" +DATA_TYPE = "nouns" file_path = sys.argv[0] nouns_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="Spanish", data_type="nouns" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) - -def order_annotations(annotation): - """ - Standardizes the annotations that are presented to users where more than one is applicable. - - Parameters - ---------- - annotation : str - The annotation to be returned to the user in the command bar. - """ - single_annotations = ["F", "M", "PL"] - if annotation in single_annotations: - return annotation - - annotation_split = sorted([a for a in set(annotation.split("/")) if a != ""]) - - return "/".join(annotation_split) - - nouns_formatted = {} for noun_vals in nouns_list: @@ -103,8 +87,8 @@ def order_annotations(annotation): export_formatted_data( formatted_data=nouns_formatted, update_data_in_use=update_data_in_use, - language="Spanish", - data_type="nouns", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path) diff --git a/src/scribe_data/language_data_extraction/Spanish/verbs/format_verbs.py b/src/scribe_data/language_data_extraction/Spanish/verbs/format_verbs.py index ecfed6a8a..4a5ac737c 100644 --- a/src/scribe_data/language_data_extraction/Spanish/verbs/format_verbs.py +++ b/src/scribe_data/language_data_extraction/Spanish/verbs/format_verbs.py @@ -8,10 +8,12 @@ from scribe_data.utils import export_formatted_data, load_queried_data +LANGUAGE = "Spanish" +DATA_TYPE = "verbs" file_path = sys.argv[0] verbs_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="Spanish", data_type="verbs" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) verbs_formatted = {} @@ -57,8 +59,8 @@ export_formatted_data( formatted_data=verbs_formatted, update_data_in_use=update_data_in_use, - language="Spanish", - data_type="verbs", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path) diff --git a/src/scribe_data/language_data_extraction/Swedish/nouns/format_nouns.py b/src/scribe_data/language_data_extraction/Swedish/nouns/format_nouns.py index 3e116e022..712623025 100644 --- a/src/scribe_data/language_data_extraction/Swedish/nouns/format_nouns.py +++ b/src/scribe_data/language_data_extraction/Swedish/nouns/format_nouns.py @@ -10,33 +10,18 @@ export_formatted_data, load_queried_data, map_genders, + order_annotations, ) +LANGUAGE = "Swedish" +DATA_TYPE = "nouns" file_path = sys.argv[0] nouns_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="Swedish", data_type="nouns" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) -def order_annotations(annotation): - """ - Standardizes the annotations that are presented to users where more than one is applicable. - - Parameters - ---------- - annotation : str - The annotation to be returned to the user in the command bar. - """ - single_annotations = ["C", "N", "PL"] - if annotation in single_annotations: - return annotation - - annotation_split = sorted([a for a in set(annotation.split("/")) if a != ""]) - - return "/".join(annotation_split) - - nouns_formatted = {} for noun_vals in nouns_list: @@ -176,8 +161,8 @@ def order_annotations(annotation): export_formatted_data( formatted_data=nouns_formatted, update_data_in_use=update_data_in_use, - language="Swedish", - data_type="nouns", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path) diff --git a/src/scribe_data/language_data_extraction/Swedish/verbs/format_verbs.py b/src/scribe_data/language_data_extraction/Swedish/verbs/format_verbs.py index ed25a4733..7b6dda54d 100644 --- a/src/scribe_data/language_data_extraction/Swedish/verbs/format_verbs.py +++ b/src/scribe_data/language_data_extraction/Swedish/verbs/format_verbs.py @@ -8,10 +8,12 @@ from scribe_data.utils import export_formatted_data, load_queried_data +LANGUAGE = "Swedish" +DATA_TYPE = "verbs" file_path = sys.argv[0] verbs_list, update_data_in_use, data_path = load_queried_data( - file_path=file_path, language="Swedish", data_type="verbs" + file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE ) verbs_formatted = {} @@ -50,8 +52,8 @@ export_formatted_data( formatted_data=verbs_formatted, update_data_in_use=update_data_in_use, - language="Swedish", - data_type="verbs", + language=LANGUAGE, + data_type=DATA_TYPE, ) os.remove(data_path)