Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Jan 13, 2025
1 parent d2989e8 commit 1c8ba33
Showing 1 changed file with 43 additions and 37 deletions.
80 changes: 43 additions & 37 deletions juriscraper/opinions/united_states/state/lactapp_2.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
from datetime import datetime, date
from datetime import date, datetime

from juriscraper.OpinionSiteLinear import OpinionSiteLinear
from juriscraper.lib.html_utils import get_row_column_text, get_row_column_links
from juriscraper.lib.date_utils import unique_year_month
from juriscraper.AbstractSite import logger
from juriscraper.lib.date_utils import unique_year_month
from juriscraper.lib.html_utils import (
get_row_column_links,
get_row_column_text,
)
from juriscraper.OpinionSiteLinear import OpinionSiteLinear


class Site(OpinionSiteLinear):
first_opinion_date = datetime(2019, 7, 17)
days_interval = 28 # Monthly interval
abbreviation_to_lower_court = {
abbreviation_to_lower_court = {
"Caddo": "First Judicial District Court for the Parish of Caddo, Louisiana",
"Ouachita": "Fourth Judicial District Court for the Parish of Ouachita, Louisiana",
"Ouachita": "Fourth Judicial District Court for the Parish of Ouachita, Louisiana",
"Bossier": "Twenty-Sixth Judicial District Court for the Parish of Bossier, Louisiana",
"DeSoto": "Forty-Second Judicial District Court for the Parish of DeSoto, Louisiana",
"Lincoln": "Third Judicial District Court for the Parish of Lincoln, Louisiana",
Expand Down Expand Up @@ -40,13 +44,12 @@ class Site(OpinionSiteLinear):
"OUACHITA Monroe City Court": "Monroe City Court for the Parish of Ouachita, Louisiana",
"Franklin OWC District 1-E": "Office of Workers' Compensation District 1-E for the Parish of Franklin, Louisiana",
"Minden City Court Webster": "Minden City Court for the Parish of Webster, Louisiana",
"Morehouse Bastrop City Court": "Bastrop City Court for the Parish of Morehouse, Louisiana",
"Morehouse Bastrop City Court": "Bastrop City Court for the Parish of Morehouse, Louisiana",
"Morehouse OWC District 1-E": "Office of Workers' Compensation District 1-E for the Parish of Morehouse, Louisiana",
"Webster Minden City Court": "Minden City Court for the Parish of Webster, Louisiana",
"Winn OWC District 2": "Office of Workers' Compensation District 2 for the Parish of Winn, Louisiana"
"Winn OWC District 2": "Office of Workers' Compensation District 2 for the Parish of Winn, Louisiana",
}


def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.court_id = self.__module__
Expand All @@ -59,10 +62,10 @@ def __init__(self, *args, **kwargs):

def _download(self):
html = super()._download()
#Currenly there are no opinions for 2025, so we need to go back one year
# Currenly there are no opinions for 2025, so we need to go back one year
if html is not None:
tables = html.cssselect('table#datatable')
if not tables or not tables[0].cssselect('tbody tr'):
tables = html.cssselect("table#datatable")
if not tables or not tables[0].cssselect("tbody tr"):
self.year -= 1
self.url = f"{self.base_url}?opinion_year={self.year}"
return self._download()
Expand All @@ -71,40 +74,43 @@ def _download(self):
def _process_html(self):
if self.html is None:
return
tables = self.html.cssselect('table#datatable')

tables = self.html.cssselect("table#datatable")
if not tables or not tables[0].cssselect("tbody tr"):
return

logger.info(f"Processing cases for year: {self.year}")
for row in tables[0].cssselect('tbody tr'):
for row in tables[0].cssselect("tbody tr"):
case_date = datetime.strptime(
get_row_column_text(row, 1),
'%m/%d/%Y'
get_row_column_text(row, 1), "%m/%d/%Y"
).date()

if self.skip_row_by_date(case_date):
continue

author = get_row_column_text(row, 4)
clean_author = self.clean_judge_name(author)
clean_author = self.clean_judge_name(author)

# Get the lower court abbreviation
lower_court_abbr = get_row_column_text(row, 6)

# Replace abbreviation with full name
lower_court_full = self.abbreviation_to_lower_court.get(lower_court_abbr, lower_court_abbr)

self.cases.append({
"date": get_row_column_text(row, 1),
"docket": get_row_column_text(row, 2),
"name": get_row_column_text(row, 3),
"author": clean_author,
"disposition": get_row_column_text(row, 5),
"lower_court": lower_court_full,
"url": get_row_column_links(row, 8),
})

lower_court_full = self.abbreviation_to_lower_court.get(
lower_court_abbr, lower_court_abbr
)

self.cases.append(
{
"date": get_row_column_text(row, 1),
"docket": get_row_column_text(row, 2),
"name": get_row_column_text(row, 3),
"author": clean_author,
"disposition": get_row_column_text(row, 5),
"lower_court": lower_court_full,
"url": get_row_column_links(row, 8),
}
)

def skip_row_by_date(self, case_date):
"""Determine if a row should be skipped based on the case date."""
# Skip if before first opinion date
Expand All @@ -113,14 +119,14 @@ def skip_row_by_date(self, case_date):

def clean_judge_name(self, name):
"""Remove everything after a comma in the judge's name."""
return name.split(',')[0].strip()
return name.split(",")[0].strip()

def _download_backwards(self, target_year: int) -> None:
logger.info(f"Backscraping for date: {target_year}")
self.year = target_year
self.url = f"{self.base_url}?opinion_year={self.year}"

#Pagination not required, all the opinions data is sent in the first request
# Pagination not required, all the opinions data is sent in the first request
self.html = self._download()
self._process_html()

Expand All @@ -139,4 +145,4 @@ def make_backscrape_iterable(self, kwargs: dict) -> None:
end = int(end) + 1 if end else datetime.now().year + 1

# Create a range of years for back scraping
self.back_scrape_iterable = range(start, end)
self.back_scrape_iterable = range(start, end)

0 comments on commit 1c8ba33

Please sign in to comment.