From 1f408ccd0e3f644c26610a3830d78daacf4e9ede Mon Sep 17 00:00:00 2001 From: Matt Dahl Date: Thu, 23 Feb 2023 15:49:02 -0500 Subject: [PATCH 1/9] test(find): Adds failing test for court string without space (#135). --- tests/test_FindTest.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_FindTest.py b/tests/test_FindTest.py index 47339f1..f84b47c 100644 --- a/tests/test_FindTest.py +++ b/tests/test_FindTest.py @@ -121,6 +121,13 @@ def test_find_citations(self): 'defendant': 'test', 'court': 'ca4', 'pin_cite': '347-348'})]), + # Test with court string without space + ('bob lissner v. test 1 U.S. 12, 347-348 (Pa.Super. 1982)', + [case_citation(page='12', year=1982, + metadata={'plaintiff': 'lissner', + 'defendant': 'test', + 'court': 'pasuperct', + 'pin_cite': '347-348'})]), # Parallel cite with parenthetical ('bob lissner v. test 1 U.S. 12, 347-348, 1 S. Ct. 2, 358 (4th Cir. 1982) (overruling foo)', [case_citation(page='12', year=1982, From c9b4d78f9471a5ed4866b165f5ce030f65946470 Mon Sep 17 00:00:00 2001 From: Matt Dahl Date: Thu, 23 Feb 2023 16:08:29 -0500 Subject: [PATCH 2/9] fix(find): Strips whitespace from court strings for matching. --- eyecite/helpers.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/eyecite/helpers.py b/eyecite/helpers.py index 4380be6..662f71a 100644 --- a/eyecite/helpers.py +++ b/eyecite/helpers.py @@ -42,14 +42,17 @@ def get_court_by_paren(paren_string: str) -> Optional[str]: needs to be handled after disambiguation has been completed. """ court_str = strip_punct(paren_string) + court_str = court_str.replace(" ", "") court_code = None if court_str: # Map the string to a court, if possible. for court in courts: - # Use startswith because citations are often missing final period, - # e.g. "2d Cir" - if court["citation_string"].startswith(court_str): + # Use startswith because citation strings are often missing final + # period, e.g. "2d Cir" + # Remove whitespace because citation strings sometimes lack + # internal spaces, e.g. "Pa.Super." + if court["citation_string"].replace(" ", "").startswith(court_str): court_code = court["id"] break From 9c6b3871a585cbbe99352c2e50fec45e64142416 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Fri, 6 Dec 2024 20:00:30 -0600 Subject: [PATCH 3/9] fix(get_court_by_paren): try with an exact match add test --- eyecite/helpers.py | 4 ++++ tests/test_FindTest.py | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/eyecite/helpers.py b/eyecite/helpers.py index 662f71a..fd36f26 100644 --- a/eyecite/helpers.py +++ b/eyecite/helpers.py @@ -46,6 +46,10 @@ def get_court_by_paren(paren_string: str) -> Optional[str]: court_code = None if court_str: + # Try with an exact match, e.g. Pa. + exact_match = next((court["id"] for court in courts if strip_punct(court["citation_string"].replace(" ", "")) == court_str), None) + if exact_match: + return exact_match # Map the string to a court, if possible. for court in courts: # Use startswith because citation strings are often missing final diff --git a/tests/test_FindTest.py b/tests/test_FindTest.py index f84b47c..de37c54 100644 --- a/tests/test_FindTest.py +++ b/tests/test_FindTest.py @@ -128,6 +128,12 @@ def test_find_citations(self): 'defendant': 'test', 'court': 'pasuperct', 'pin_cite': '347-348'})]), + # Test with court string exact match + ('Commonwealth v. Muniz, 164 A.3d 1189 (Pa. 2017)', + [case_citation(page='1189', reporter='A.3d', volume='164', year=2017, + metadata={'plaintiff': 'Commonwealth', + 'defendant': 'Muniz', + 'court': 'pa'})]), # Parallel cite with parenthetical ('bob lissner v. test 1 U.S. 12, 347-348, 1 S. Ct. 2, 358 (4th Cir. 1982) (overruling foo)', [case_citation(page='12', year=1982, From deac1a7c7a3b56299b5c9aafbf8cdfd5dcef3e3d Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 11 Dec 2024 11:33:11 -0600 Subject: [PATCH 4/9] fix(get_court_by_paren): try with an exact match --- eyecite/helpers.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/eyecite/helpers.py b/eyecite/helpers.py index fd36f26..3dcdb97 100644 --- a/eyecite/helpers.py +++ b/eyecite/helpers.py @@ -46,19 +46,20 @@ def get_court_by_paren(paren_string: str) -> Optional[str]: court_code = None if court_str: - # Try with an exact match, e.g. Pa. - exact_match = next((court["id"] for court in courts if strip_punct(court["citation_string"].replace(" ", "")) == court_str), None) - if exact_match: - return exact_match - # Map the string to a court, if possible. for court in courts: - # Use startswith because citation strings are often missing final - # period, e.g. "2d Cir" # Remove whitespace because citation strings sometimes lack # internal spaces, e.g. "Pa.Super." - if court["citation_string"].replace(" ", "").startswith(court_str): + s = strip_punct(court["citation_string"]).replace(" ", "") + + # Check for an exact match first + if s == court_str: + return court["id"] + + # If no exact match, try to record a startswith match for possible eventual return + if s.startswith(court_str): court_code = court["id"] - break + + return court_code return court_code From 33c997c11673622b22d80a6cf21e8b105c485034 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 11 Dec 2024 11:37:47 -0600 Subject: [PATCH 5/9] fix(get_court_by_paren): try with an exact match --- eyecite/helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/eyecite/helpers.py b/eyecite/helpers.py index 3dcdb97..963ca6b 100644 --- a/eyecite/helpers.py +++ b/eyecite/helpers.py @@ -55,7 +55,8 @@ def get_court_by_paren(paren_string: str) -> Optional[str]: if s == court_str: return court["id"] - # If no exact match, try to record a startswith match for possible eventual return + # If no exact match, try to record a startswith match for possible eventual + # return if s.startswith(court_str): court_code = court["id"] From 4943d054147e467f89e078c1b81b45a897be0cb9 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 11 Dec 2024 11:43:53 -0600 Subject: [PATCH 6/9] fix(get_court_by_paren): fix comment line length --- eyecite/helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eyecite/helpers.py b/eyecite/helpers.py index 963ca6b..7f56633 100644 --- a/eyecite/helpers.py +++ b/eyecite/helpers.py @@ -55,8 +55,8 @@ def get_court_by_paren(paren_string: str) -> Optional[str]: if s == court_str: return court["id"] - # If no exact match, try to record a startswith match for possible eventual - # return + # If no exact match, try to record a startswith match for possible + # eventual return if s.startswith(court_str): court_code = court["id"] From d11736f4a38cb7a7b5947340f91c602fa4171b1f Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 11 Dec 2024 11:54:18 -0600 Subject: [PATCH 7/9] fix(get_court_by_paren): cast to str to avoid mypy warning --- eyecite/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eyecite/helpers.py b/eyecite/helpers.py index 7f56633..ea50159 100644 --- a/eyecite/helpers.py +++ b/eyecite/helpers.py @@ -53,7 +53,7 @@ def get_court_by_paren(paren_string: str) -> Optional[str]: # Check for an exact match first if s == court_str: - return court["id"] + return str(court["id"]) # If no exact match, try to record a startswith match for possible # eventual return From ce0a409220794ea838249e318b493fe770b5d55a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKevin?= Date: Wed, 15 Jan 2025 13:12:00 -0600 Subject: [PATCH 8/9] fix(helpers): fix court string matching --- eyecite/helpers.py | 11 +++++------ tests/test_FindTest.py | 1 + 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/eyecite/helpers.py b/eyecite/helpers.py index ea50159..34b9e58 100644 --- a/eyecite/helpers.py +++ b/eyecite/helpers.py @@ -24,7 +24,6 @@ POST_SHORT_CITATION_REGEX, YEAR_REGEX, ) -from eyecite.utils import strip_punct BACKWARD_SEEK = 28 # Median case name length in the CL db is 28 (2016-02-26) @@ -41,15 +40,15 @@ def get_court_by_paren(paren_string: str) -> Optional[str]: Does not work on SCOTUS, since that court lacks parentheticals, and needs to be handled after disambiguation has been completed. """ - court_str = strip_punct(paren_string) - court_str = court_str.replace(" ", "") + + # Remove whitespace and punctuation because citation strings sometimes lack + # internal spaces, e.g. "Pa.Super." or "SC" (South Carolina) + court_str = re.sub(r"[^\w]", "", paren_string).lower() court_code = None if court_str: for court in courts: - # Remove whitespace because citation strings sometimes lack - # internal spaces, e.g. "Pa.Super." - s = strip_punct(court["citation_string"]).replace(" ", "") + s = re.sub(r"[^\w]", "", court["citation_string"]).lower() # Check for an exact match first if s == court_str: diff --git a/tests/test_FindTest.py b/tests/test_FindTest.py index de37c54..6aa83a5 100644 --- a/tests/test_FindTest.py +++ b/tests/test_FindTest.py @@ -488,6 +488,7 @@ def test_find_citations(self): # Long pin cite -- make sure no catastrophic backtracking in regex ('1 U.S. 1, 2277, 2278, 2279, 2280, 2281, 2282, 2283, 2284, 2286, 2287, 2288, 2289, 2290, 2291', [case_citation(metadata={'pin_cite': '2277, 2278, 2279, 2280, 2281, 2282, 2283, 2284, 2286, 2287, 2288, 2289, 2290, 2291'})]), + ('Foo v. Bar, 1 F.Supp. 1 (SC 1967)', [case_citation(volume='1', reporter='F.Supp.', year=1967, page='1', metadata={'plaintiff': 'Foo', 'defendant': 'Bar', 'court': 'sc'})]), ) # fmt: on self.run_test_pairs(test_pairs, "Citation extraction") From 78448b4330b59d08da539d538acad86851096088 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKevin?= Date: Wed, 15 Jan 2025 13:39:11 -0600 Subject: [PATCH 9/9] test(test_FindTest): add test case --- tests/test_FindTest.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_FindTest.py b/tests/test_FindTest.py index 6aa83a5..cab670e 100644 --- a/tests/test_FindTest.py +++ b/tests/test_FindTest.py @@ -488,6 +488,11 @@ def test_find_citations(self): # Long pin cite -- make sure no catastrophic backtracking in regex ('1 U.S. 1, 2277, 2278, 2279, 2280, 2281, 2282, 2283, 2284, 2286, 2287, 2288, 2289, 2290, 2291', [case_citation(metadata={'pin_cite': '2277, 2278, 2279, 2280, 2281, 2282, 2283, 2284, 2286, 2287, 2288, 2289, 2290, 2291'})]), + ('Commonwealth v. Muniz, 164 A.3d 1189 (Pa. 2017)', [ + case_citation(volume='164', reporter='A.3d', year=2017, + page='1189', + metadata={'plaintiff': 'Commonwealth', 'defendant': 'Muniz', + 'court': 'pa'})]), ('Foo v. Bar, 1 F.Supp. 1 (SC 1967)', [case_citation(volume='1', reporter='F.Supp.', year=1967, page='1', metadata={'plaintiff': 'Foo', 'defendant': 'Bar', 'court': 'sc'})]), ) # fmt: on