From d6e7e7c5c169a8c0fc9b65f2412389b29e6a592d Mon Sep 17 00:00:00 2001 From: thorunna Date: Fri, 13 Aug 2021 15:18:56 +0000 Subject: [PATCH 1/8] first handling for agreement-conj added --- src/reynir_correct/pattern.py | 67 +++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/src/reynir_correct/pattern.py b/src/reynir_correct/pattern.py index 4489d41..c169aa2 100644 --- a/src/reynir_correct/pattern.py +++ b/src/reynir_correct/pattern.py @@ -159,6 +159,7 @@ class PatternMatcher: ctx_uncertain_verbs: ContextDict = cast(ContextDict, None) ctx_confident_verbs: ContextDict = cast(ContextDict, None) ctx_dir_loc: ContextDict = cast(ContextDict, None) + ctx_agreement_conj: ContextDict = cast(ContextDict, None) def __init__(self, ann: List[Annotation], sent: Sentence) -> None: # Annotation list @@ -1303,6 +1304,30 @@ def doubledefinite(self, match: SimpleTree) -> None: ) ) + def agreement_conj(self, match: SimpleTree) -> None: + vp = match.first_match("VP > so_ft") + if vp is None: return + so = vp.first_match("so") + if so is None: return + start, end = so.span + sbj = match.first_match("NP-SUBJ") + variants = [f for f in so.all_variants if f != "vh"] + variants.append("fh") + suggest = self.get_wordform(so.lemma, so.cat, variants) + if not suggest: + return + text = f"Hér á sögnin '{so.lemma}' að samræmast frumlaginu '{sbj.lemma}'" + self._ann.append( + Annotation( + start=start, + end=end, + code="P_NT", + text=text, + original=so.tidy_text, + suggest=suggest, + ) + ) + @classmethod def add_pattern(cls, p: PatternTuple) -> None: """ Validates and adds a pattern to the class global pattern list """ @@ -2287,6 +2312,48 @@ def dir4loc(verbs: Set[str], tree: SimpleTree) -> bool: ) ) + ## # Check errors in dir4loc + # def agreement_conj(verbs: Set[str], tree: SimpleTree) -> bool: + # """ Context matching function for the %noun macro in combination + # with 'að' """ + # lemma = tree.own_lemma + # if not lemma: + # # The passed-in tree node is probably not a terminal + # return False + # return lemma in verbs + + # VERBS: FrozenSet[str] = frozenset(("safna", "kaupa", "læsa", "geyma")) + # # The macro %verb is resolved by calling the function dir4loc() + # # with the potentially matching tree node as an argument. + # cls.ctx_dir_loc = {"verb": partial(dir4loc, VERBS)} + # cls.add_pattern( + # ( + # "út", # Trigger lemma for this pattern + # "VP > { VP > { %verb } NP > { PP > { ADVP > { 'út' } P > { 'í' } NP > { 'búð' } } } }", + # lambda self, match: self.dir_loc(match), + # cls.ctx_dir_loc, + # ) + # ) + + cls.add_pattern( + ( + frozenset(("og", "en", "heldur")), # Trigger lemma for this pattern + "S0 > { S-MAIN > { IP > { NP-SUBJ > { no_et_nf } } } S-MAIN >> [ VP > { so_ft } .* ] }", + lambda self, match: self.agreement_conj(match), + cls.ctx_agreement_conj, + ) + ) + + ## Virkar ekki + cls.add_pattern( + ( + frozenset(("og", "en", "heldur")), # Trigger lemma for this pattern + "S0 > { S-MAIN > { IP > { NP-SUBJ > { no_ft_nf } } } S-MAIN >> [ VP > { so_et } .* ] }", + lambda self, match: self.agreement_conj(match), + cls.ctx_agreement_conj, + ) + ) + def run(self) -> None: """ Apply the patterns to the sentence """ tree = None if self._sent is None else self._sent.tree From ac4e248fdebc3fcb75533f504eee5051ec4438fe Mon Sep 17 00:00:00 2001 From: thorunna Date: Fri, 13 Aug 2021 15:19:15 +0000 Subject: [PATCH 2/8] agreement-conj added --- eval/eval.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/eval/eval.py b/eval/eval.py index 6de8028..9612a0e 100755 --- a/eval/eval.py +++ b/eval/eval.py @@ -359,6 +359,7 @@ ], "grammar": [ "agreement", + "agreement-conj", "agreement-concord", "agreement-pred", "agreement-pro", @@ -579,6 +580,7 @@ "collocation": ["collocation", "collocation-idiom", "though",], "grammar": [ "agreement", + "agreement-conj", "agreement-concord", "agreement-pred", "agreement-pro", From 85649026da0b182a71eaad54113d352fe9a5c32e Mon Sep 17 00:00:00 2001 From: thorunna Date: Mon, 16 Aug 2021 11:15:56 +0000 Subject: [PATCH 3/8] added handling for agreement errors --- src/reynir_correct/pattern.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reynir_correct/pattern.py b/src/reynir_correct/pattern.py index c169aa2..8a48e80 100644 --- a/src/reynir_correct/pattern.py +++ b/src/reynir_correct/pattern.py @@ -1310,7 +1310,7 @@ def agreement_conj(self, match: SimpleTree) -> None: so = vp.first_match("so") if so is None: return start, end = so.span - sbj = match.first_match("NP-SUBJ") + sbj = match.first_match("NP-SUBJ > (no|pfn)") variants = [f for f in so.all_variants if f != "vh"] variants.append("fh") suggest = self.get_wordform(so.lemma, so.cat, variants) From 32c2ab4d9360077f072b6d0c9b4368c4cc7a0a5a Mon Sep 17 00:00:00 2001 From: thorunna Date: Tue, 17 Aug 2021 13:57:49 +0000 Subject: [PATCH 4/8] more handling for agreement-conj --- src/reynir_correct/pattern.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/src/reynir_correct/pattern.py b/src/reynir_correct/pattern.py index 8a48e80..2d4e50b 100644 --- a/src/reynir_correct/pattern.py +++ b/src/reynir_correct/pattern.py @@ -1310,7 +1310,9 @@ def agreement_conj(self, match: SimpleTree) -> None: so = vp.first_match("so") if so is None: return start, end = so.span - sbj = match.first_match("NP-SUBJ > (no|pfn)") + sbj = match.first_match("NP-SUBJ") + if len(sbj) > 1: #TODO: more accurate subject selection + sbj = sbj[0] variants = [f for f in so.all_variants if f != "vh"] variants.append("fh") suggest = self.get_wordform(so.lemma, so.cat, variants) @@ -2313,7 +2315,7 @@ def dir4loc(verbs: Set[str], tree: SimpleTree) -> bool: ) ## # Check errors in dir4loc - # def agreement_conj(verbs: Set[str], tree: SimpleTree) -> bool: + # def dir4loc(verbs: Set[str], tree: SimpleTree) -> bool: # """ Context matching function for the %noun macro in combination # with 'að' """ # lemma = tree.own_lemma @@ -2338,7 +2340,7 @@ def dir4loc(verbs: Set[str], tree: SimpleTree) -> bool: cls.add_pattern( ( frozenset(("og", "en", "heldur")), # Trigger lemma for this pattern - "S0 > { S-MAIN > { IP > { NP-SUBJ > { no_et_nf } } } S-MAIN >> [ VP > { so_ft } .* ] }", + "S0 > { S-MAIN > { IP > { NP-SUBJ > { no_et_nf } } } C S-MAIN >> [ VP > { so_ft } .* ] }", lambda self, match: self.agreement_conj(match), cls.ctx_agreement_conj, ) @@ -2348,12 +2350,33 @@ def dir4loc(verbs: Set[str], tree: SimpleTree) -> bool: cls.add_pattern( ( frozenset(("og", "en", "heldur")), # Trigger lemma for this pattern - "S0 > { S-MAIN > { IP > { NP-SUBJ > { no_ft_nf } } } S-MAIN >> [ VP > { so_et } .* ] }", + "S0 > { S-MAIN > { IP > { NP-SUBJ > { no_ft_nf } } } S-MAIN >> [ VP > { VP-AUX > { so_et_p3 } } .* ] }", lambda self, match: self.agreement_conj(match), cls.ctx_agreement_conj, ) ) + ## Virkar ekki + cls.add_pattern( + ( + "heldur", # Trigger lemma for this pattern + "S0 > { S-MAIN }", + #"S0 > { S-MAIN > { IP > { NP-SUBJ > [ no_et_nf C no_et_nf ] } } C S-MAIN >> [ VP > { so_et } .* ] }", + lambda self, match: self.agreement_conj(match), + cls.ctx_agreement_conj, + ) + ) + + ## Virkar ekki + #cls.add_pattern( + # ( + # frozenset(("og", "en", "heldur")), # Trigger lemma for this pattern + # "S0 > { S-MAIN > { IP > { NP-SUBJ > { no_ft_nf } } } C S-MAIN >> [ VP > { so_et } .* ] }", + # lambda self, match: self.agreement_conj(match), + # cls.ctx_agreement_conj, + # ) + #) + def run(self) -> None: """ Apply the patterns to the sentence """ tree = None if self._sent is None else self._sent.tree From 556a7d8d39498a8c03efc7dbf55372e26061cc6b Mon Sep 17 00:00:00 2001 From: thorunna Date: Wed, 18 Aug 2021 16:15:58 +0000 Subject: [PATCH 5/8] handling for agreement-subpost added --- src/reynir_correct/pattern.py | 50 ++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/src/reynir_correct/pattern.py b/src/reynir_correct/pattern.py index 2d4e50b..0ed4604 100644 --- a/src/reynir_correct/pattern.py +++ b/src/reynir_correct/pattern.py @@ -160,6 +160,7 @@ class PatternMatcher: ctx_confident_verbs: ContextDict = cast(ContextDict, None) ctx_dir_loc: ContextDict = cast(ContextDict, None) ctx_agreement_conj: ContextDict = cast(ContextDict, None) + ctx_agreement_subpost: ContextDict = cast(ContextDict, None) def __init__(self, ann: List[Annotation], sent: Sentence) -> None: # Annotation list @@ -1305,12 +1306,13 @@ def doubledefinite(self, match: SimpleTree) -> None: ) def agreement_conj(self, match: SimpleTree) -> None: - vp = match.first_match("VP > so_ft") + vp = match.first_match("VP > (so_ft|so_et)") if vp is None: return so = vp.first_match("so") if so is None: return start, end = so.span sbj = match.first_match("NP-SUBJ") + if sbj is None: return if len(sbj) > 1: #TODO: more accurate subject selection sbj = sbj[0] variants = [f for f in so.all_variants if f != "vh"] @@ -1330,6 +1332,33 @@ def agreement_conj(self, match: SimpleTree) -> None: ) ) + def agreement_subpost_sing(self, match: SimpleTree) -> None: + vp = match.first_match("VP > (so_ft)") + if vp is None: return + so = vp.first_match("so") + if so is None: return + start, end = so.span + sbj = match.first_match("NP-SUBJ > { (no_nf_et|fn_nf_et) }") + if sbj is None: return + if len(sbj) > 1: #TODO: more accurate subject selection + sbj = sbj[0] + variants = [f for f in so.all_variants if f != "vh"] + variants.append("fh") + suggest = self.get_wordform(so.lemma, so.cat, variants) + if not suggest: + return + text = f"Hér á sögnin '{so.lemma}' að samræmast eintölufrumlaginu '{sbj.lemma}'" + self._ann.append( + Annotation( + start=start, + end=end, + code="P_NT", + text=text, + original=so.tidy_text, + suggest=suggest, + ) + ) + @classmethod def add_pattern(cls, p: PatternTuple) -> None: """ Validates and adds a pattern to the class global pattern list """ @@ -2377,6 +2406,25 @@ def dir4loc(verbs: Set[str], tree: SimpleTree) -> bool: # ) #) + ## Virkar ekki + #cls.add_pattern( + # ( + # "mögulega", # Trigger lemma for this pattern + # "VP > { VP > { so_et_p3 } NP-SUBJ > { (fn_et_nf|no_et_nf) } }", + # lambda self, match: self.agreement_subpost(match), + # cls.ctx_agreement_subpost, + # ) + #) + + # cls.add_pattern( + # ( + # "kolasvæði", # Trigger lemma for this pattern + # "IP > { NP-SUBJ > { fn_nf_et } VP > { VP > { so_ft } } }", + # lambda self, match: self.agreement_subpost_sing(match), + # cls.ctx_agreement_subpost, + # ) + # ) + def run(self) -> None: """ Apply the patterns to the sentence """ tree = None if self._sent is None else self._sent.tree From 6bd5dfc1287f1d0d930d64205f745c4c7347f047 Mon Sep 17 00:00:00 2001 From: thorunna Date: Fri, 27 Aug 2021 15:40:18 +0000 Subject: [PATCH 6/8] handling for agreement-concord added --- src/reynir_correct/pattern.py | 74 +++++++++++++++++++++++++++++------ 1 file changed, 63 insertions(+), 11 deletions(-) diff --git a/src/reynir_correct/pattern.py b/src/reynir_correct/pattern.py index 0ed4604..80d2dd7 100644 --- a/src/reynir_correct/pattern.py +++ b/src/reynir_correct/pattern.py @@ -161,6 +161,7 @@ class PatternMatcher: ctx_dir_loc: ContextDict = cast(ContextDict, None) ctx_agreement_conj: ContextDict = cast(ContextDict, None) ctx_agreement_subpost: ContextDict = cast(ContextDict, None) + ctx_agreement_concord: ContextDict = cast(ContextDict, None) def __init__(self, ann: List[Annotation], sent: Sentence) -> None: # Annotation list @@ -1359,6 +1360,29 @@ def agreement_subpost_sing(self, match: SimpleTree) -> None: ) ) + def agreement_concord(self, match: SimpleTree) -> None: + np = match.first_match("NP") + #if vp is None: return + assert np is not None + fn = np.first_match("fn") + assert fn is not None + no = np.first_match("no") + start, end = np.span + suggest = self.get_wordform(fn.lemma, fn.cat, no.lemma, no.cat) + if not suggest: + return + text = f"Hér á fornafnið '{fn.lemma}' að samræmast nafnorðinu '{no.lemma}'" + self._ann.append( + Annotation( + start=start, + end=end, + code="P_NT", + text=text, + original=so.tidy_text, + suggest=suggest, + ) + ) + @classmethod def add_pattern(cls, p: PatternTuple) -> None: """ Validates and adds a pattern to the class global pattern list """ @@ -2375,26 +2399,54 @@ def dir4loc(verbs: Set[str], tree: SimpleTree) -> bool: ) ) - ## Virkar ekki cls.add_pattern( ( - frozenset(("og", "en", "heldur")), # Trigger lemma for this pattern - "S0 > { S-MAIN > { IP > { NP-SUBJ > { no_ft_nf } } } S-MAIN >> [ VP > { VP-AUX > { so_et_p3 } } .* ] }", + "þessi", # Trigger lemma for this pattern + "NP-POSS > { NP-POSS > { fn_et_ef_kk } no_ft_ef_kk }", lambda self, match: self.agreement_conj(match), cls.ctx_agreement_conj, ) ) ## Virkar ekki - cls.add_pattern( - ( - "heldur", # Trigger lemma for this pattern - "S0 > { S-MAIN }", + # cls.add_pattern( + # ( + # "og", # Trigger lemma for this pattern + # "IP > { NP-SUBJ > { 'ég' C > {'og'} } VP >> { so_p3 } }", + # lambda self, match: self.agreement_conj(match), + # cls.ctx_agreement_conj, + # ) + # ) + + # cls.add_pattern( + # ( + # frozenset(("og", "en", "heldur")), # Trigger lemma for this pattern + # "IP > { NP-SUBJ > { 'og' } VP > { VP > { so_et } } }", + # lambda self, match: self.agreement_conj(match), + # cls.ctx_agreement_conj, + # ) + # ) + + ## Virkar ekki + # cls.add_pattern( + # ( + # frozenset(("og", "en", "heldur")), # Trigger lemma for this pattern + # "S0 > { S-MAIN > { IP > { NP-SUBJ > { no_ft_nf } } } S-MAIN >> [ VP > { VP-AUX > { so_et_p3 } } .* ] }", + # lambda self, match: self.agreement_conj(match), + # cls.ctx_agreement_conj, + # ) + # ) + + ## Virkar ekki + # cls.add_pattern( + # ( + # "heldur", # Trigger lemma for this pattern + # "S0 > { S-MAIN }", #"S0 > { S-MAIN > { IP > { NP-SUBJ > [ no_et_nf C no_et_nf ] } } C S-MAIN >> [ VP > { so_et } .* ] }", - lambda self, match: self.agreement_conj(match), - cls.ctx_agreement_conj, - ) - ) + # lambda self, match: self.agreement_conj(match), + # cls.ctx_agreement_conj, + # ) + # ) ## Virkar ekki #cls.add_pattern( From c61c137c268265c1b1826aeb2255f6b7435b1b60 Mon Sep 17 00:00:00 2001 From: thorunna Date: Wed, 25 Oct 2023 09:42:09 +0000 Subject: [PATCH 7/8] responded to comments --- src/reynir_correct/pattern.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/reynir_correct/pattern.py b/src/reynir_correct/pattern.py index 7734187..93de72b 100644 --- a/src/reynir_correct/pattern.py +++ b/src/reynir_correct/pattern.py @@ -1708,6 +1708,7 @@ def né(self, match: SimpleTree) -> None: ) def agreement_conj(self, match: SimpleTree) -> None: + """A verb, whose subject precedes a conjunction, is not in agreement with the subject. E.g. 'Bílarnir eru léttari og gæti verið hraðari.'""" vp = match.first_match("VP > (so_ft|so_et)") if vp is None: return @@ -1738,6 +1739,7 @@ def agreement_conj(self, match: SimpleTree) -> None: ) def agreement_subpost_sing(self, match: SimpleTree) -> None: + """A plural verb which precedes its subject is not in agreement with the subject, which is singular. E.g. 'Í skrúðgöngunni eru fólk klætt...'""" vp = match.first_match("VP > (so_ft)") if vp is None: return @@ -1768,11 +1770,13 @@ def agreement_subpost_sing(self, match: SimpleTree) -> None: ) def agreement_concord(self, match: SimpleTree) -> None: + """A pronoun is not in agreement with the following noun, e.g. 'Við kaupum ákveðin hluti'""" np = match.first_match("NP") - # if vp is None: return - assert np is not None + if np is None: + return fn = np.first_match("fn") - assert fn is not None + if fn is None: + return no = np.first_match("no") start, end = np.span suggest = self.get_wordform(fn.lemma, fn.cat, no.lemma, no.cat) From 287cdacd96d0b6416a13c1835289cbebb46a537e Mon Sep 17 00:00:00 2001 From: thorunna Date: Thu, 26 Oct 2023 09:34:44 +0000 Subject: [PATCH 8/8] responded to comment --- src/reynir_correct/pattern.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/reynir_correct/pattern.py b/src/reynir_correct/pattern.py index 93de72b..2878215 100644 --- a/src/reynir_correct/pattern.py +++ b/src/reynir_correct/pattern.py @@ -1708,7 +1708,8 @@ def né(self, match: SimpleTree) -> None: ) def agreement_conj(self, match: SimpleTree) -> None: - """A verb, whose subject precedes a conjunction, is not in agreement with the subject. E.g. 'Bílarnir eru léttari og gæti verið hraðari.'""" + """A verb, whose subject precedes a conjunction, is not in agreement with the subject. + E.g. 'Bílarnir eru léttari og gæti verið hraðari.'""" vp = match.first_match("VP > (so_ft|so_et)") if vp is None: return @@ -1739,7 +1740,8 @@ def agreement_conj(self, match: SimpleTree) -> None: ) def agreement_subpost_sing(self, match: SimpleTree) -> None: - """A plural verb which precedes its subject is not in agreement with the subject, which is singular. E.g. 'Í skrúðgöngunni eru fólk klætt...'""" + """A plural verb which precedes its subject is not in agreement with the subject, + which is singular. E.g. 'Í skrúðgöngunni eru fólk klætt...'""" vp = match.first_match("VP > (so_ft)") if vp is None: return