Skip to content

Commit

Permalink
fix(find.py): Add fixes for over id'ing reference citations
Browse files Browse the repository at this point in the history
Fix typos
  • Loading branch information
flooie committed Jan 14, 2025
1 parent a785288 commit 9ea2169
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 16 deletions.
32 changes: 23 additions & 9 deletions eyecite/find.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
from eyecite.helpers import (
disambiguate_reporters,
extract_pin_cite,
filter_citations,
joke_cite,
match_on_tokens,
order_citations,
)
from eyecite.models import (
CaseReferenceToken,
Expand Down Expand Up @@ -110,7 +110,7 @@ def get_citations(

citations.append(citation)

citations = order_citations(citations)
citations = filter_citations(citations)

# Remove citations with multiple reporter candidates where we couldn't
# guess correct reporter
Expand Down Expand Up @@ -139,13 +139,23 @@ def _extract_reference_citations(
if not citation.metadata.defendant:
# Skip if no defendant exists
return []
escaped_plaintiff = re.escape(citation.metadata.plaintiff or "")
escaped_defendant = re.escape(citation.metadata.defendant)
plaintiff_regex = (
rf"(?P<plaintiff>{re.escape(citation.metadata.plaintiff)})"
if citation.metadata.plaintiff
else ""
)
defendant_regex = (
rf"(?P<defendant>{re.escape(citation.metadata.defendant)})"
if citation.metadata.defendant
else ""
)

# Combine the components if they are not empty
combined_regex_parts = "|".join(
filter(None, [plaintiff_regex, defendant_regex])
)
pin_cite_regex = (
rf"\b(?:"
rf"(?P<plaintiff>{escaped_plaintiff})|"
rf"(?P<defendant>{escaped_defendant})\s?"
rf")\s+at\s+(?P<page>\d{{1,5}})?\b"
rf"\b(?:{combined_regex_parts})\s+at\s+(?P<page>\d{{1,5}})\b"
)

pin_cite_pattern = re.compile(pin_cite_regex)
Expand All @@ -169,7 +179,11 @@ def _extract_reference_citations(
full_span_end=end + offset,
index=0,
metadata={
"plaintiff": match.group("plaintiff"),
"plaintiff": (
match.group("plaintiff")
if "plaintiff" in match.groupdict()
else None
),
"defendant": match.group("defendant"),
"pin_cite": match.group("page"),
},
Expand Down
21 changes: 16 additions & 5 deletions eyecite/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,14 +307,25 @@ def disambiguate_reporters(
]


def order_citations(citations: List[CitationBase]) -> List[CitationBase]:
"""
Order citations that may have reference citations out or sequential order
def filter_citations(citations: List[CitationBase]) -> List[CitationBase]:
"""Filter and order citations that may have reference cites out of order
:param citations: List of citation`
:return: Sorted citations
:return: Sorted and filtered citations
"""
return sorted(citations, key=lambda citation: citation.span())
filtered_citations = []
sorted_citations = sorted(citations, key=lambda citation: citation.span())
for citation in sorted_citations:
if filtered_citations:
last_citation = filtered_citations[-1]
last_span = last_citation.span()
current_span = citation.span()

if current_span[0] <= last_span[1]:
# Remove overlapping citations that can occur in edge cases
continue
filtered_citations.append(citation)
return filtered_citations


joke_cite: List[CitationBase] = [
Expand Down
3 changes: 2 additions & 1 deletion eyecite/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,8 @@ def _resolve_reference_citation(
reference_citation: ReferenceCitation,
resolved_full_cites: ResolvedFullCites,
) -> Optional[ResourceType]:
"""
"""Resolve reference citations
Try to resolve reference citations by checking whether their is only one
full citation that appears with either the defendant or plaintiff
field of any of the previously resolved full citations.
Expand Down
2 changes: 1 addition & 1 deletion tests/test_AnnotateTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def lower_annotator(before, text, after):
"<0>1 U.S. 1</0>. Foo v. Bar, <1>supra at 2</1>.",
[],
),
# Refernce cite
# Reference cite
(
"Foo v. Bar 1 U.S. 1. In Foo at 2.",
"Foo v. Bar <0>1 U.S. 1</0>. In <1>Foo at 2</1>.",
Expand Down
10 changes: 10 additions & 0 deletions tests/test_FindTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,16 @@ def test_find_citations(self):
reference_citation('Foo at 62',
metadata={'defendant': 'Foo',
"pin_cite": "62"})]),
# Test reference citation that contains at
('In re Foo 1 Mass. 12, 347-348. something something, in at we see that',
[case_citation(page='12', reporter="Mass.", volume="1",
metadata={'defendant': 'Foo', 'pin_cite': '347-348'})]),
# Test U.S. as plaintiff with reference citations
('U.S. v. Boch Oldsmobile, Inc., 909 F.2d 657, 660 (1st Cir.1990); Piper Aircraft, 454 U.S. at 241',
[case_citation(page='657', reporter="F.2d", volume="909",
metadata={'plaintiff': 'U.S.', 'defendant': 'Boch Oldsmobile, Inc.', 'pin_cite': '660'}),
case_citation(volume="454", page='241', reporter_found='U.S.', short=True,
metadata={'antecedent_guess': 'Aircraft', 'court': "scotus", 'pin_cite': None})]),
# Test reference citation after an id citation
('we said in Morton v. Mancari, 417 U. S. 535, 552 (1974) “Literally every piece ....”. “asisovereign tribal entities . . . .” Id. In Mancari at 665',
[case_citation(page='535', year=1974, volume="417",
Expand Down

0 comments on commit 9ea2169

Please sign in to comment.