Merge branch 'main' into 4721-free-opinion-scraper-got-multiple-resul…

…ts-while-attempting-save
freelawproject · Dec 16, 2024 · 1e4a8ba · 1e4a8ba
2 parents d26d51e + b429ae0
commit 1e4a8ba
Show file tree

Hide file tree

Showing 5 changed files with 408 additions and 41 deletions.
diff --git a/cl/assets/static-global/css/opinions.css b/cl/assets/static-global/css/opinions.css
@@ -6,7 +6,6 @@
 .opinion-body {
 
   .harvard > * {
-    font-family: Merriweather, "Times New Roman", Times, serif;
     font-size: 15px;
     letter-spacing: 0.2px;
     text-align: justify;
@@ -379,24 +378,19 @@ div.footnote:first-of-type {
 
   /*Case Caption CSS*/
   #caption-square {
-    background-color: #F6F2EE;
+    background-color: whitesmoke;
     margin-left: -15px;
     margin-right: -15px;
     margin-top: -20px;
   }
 
   #caption-square > ul > li {
-    background-color: #fcfaf9;
+    background-color: #e7e7e7;
     border-top-right-radius: 5px 5px; /* Rounds the corners */
     border-top-left-radius: 5px 5px; /* Rounds the corners */
     margin-left: 4px;
   }
 
-  #caption-square > ul > li.active {
-      background-color: #ffffff;
-      border-bottom: 1px solid lightgrey;
-  }
-
   #caption-square > ul > li.active {
     background-color: #ffffff;
     border-bottom: 1px solid white;
@@ -407,7 +401,6 @@ div.footnote:first-of-type {
   }
 
   /*Opinion Date File*/
-
  .case-date-new {
     border: 1px solid #B53C2C;
     padding: 0px 10px;
@@ -439,7 +432,6 @@ div.footnote:first-of-type {
 
   #opinion-caption {
     margin-top: 20px;
-    font-family: Merriweather, "Times New Roman", Times, serif;
     font-size: 15px;
     letter-spacing: 0.2px;
     line-height: 2.3em;
@@ -530,15 +522,13 @@ div.footnote:first-of-type {
   }
 
   div.subopinion-content > .harvard {
-    font-family: Merriweather, "Times New Roman", Times, serif;
     font-size: 15px;
     letter-spacing: 0.2px;
     line-height: 2.3em;
     text-align: justify;
   }
 
   #columbia-text {
-    font-family: Merriweather, "Times New Roman", Times, serif;
     font-size: 15px;
     letter-spacing: 0.2px;
     line-height: 2.3em;
@@ -684,14 +674,12 @@ div.footnote:first-of-type {
   }
 
   .case-details {
-    font-family: Merriweather, "Times New Roman", Times, serif;
     letter-spacing: 0.2px;
     line-height:2.3em;
   }
 
   .opinion-section-title {
     margin-top: 50px;
-    font-family: Merriweather, "Times New Roman", Times, serif;
   }
 
   /*Add style to align roman numerals */

diff --git a/cl/opinion_page/templates/opinions.html b/cl/opinion_page/templates/opinions.html
@@ -62,8 +62,8 @@ <h3><span>Admin</span></h3>
                     </p>
                 </div>
             {% endif %}
-
-       <div id="opinion-toc" class="sidebar-section">
+        {% if tab == "opinions" %}
+        <div id="opinion-toc" class="sidebar-section">
               <h3> <span>Jump To</span> </h3>
               <li class="jump-links active"><a id="nav_top" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#"  class="active">Top</a></li>
               <li class="jump-links"><a id="nav_caption" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#caption" >Caption</a></li>
@@ -115,6 +115,7 @@ <h3> <span>Jump To</span> </h3>
               </li>
               {% endfor %}
           </div>
+        {% endif %}
 
         {% if cluster.sub_opinions.all.first.extracted_by_ocr or "U" in cluster.source and tab == "opinions" %}
             <div class="col-sm-12 alert-warning alert v-offset-above-2">
@@ -160,6 +161,17 @@ <h3> <span>Jump To</span> </h3>
             </div>
             <div class="clearfix"></div>
         {% endif %}
+
+        {% if tab == "pdf" %}
+            <div class="col-sm-12 alert-warning alert v-offset-above-2">
+                <p class="bottom">
+                    Certain sections of this document, such as headnotes or
+                    other content, may be redacted to comply with copyright
+                    or privacy requirements.
+                </p>
+            </div>
+            <div class="clearfix"></div>
+        {% endif %}
         </div>
 
         <div class="bottom-section">

diff --git a/cl/recap/mergers.py b/cl/recap/mergers.py
@@ -1670,6 +1670,7 @@ async def merge_attachment_page_data(
             main_rd = await RECAPDocument.objects.select_related(
                 "docket_entry", "docket_entry__docket"
             ).aget(**params)
+
     except RECAPDocument.MultipleObjectsReturned as exc:
         if pacer_case_id:
             duplicate_rd_queryset = RECAPDocument.objects.filter(**params)

diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py
@@ -110,7 +110,9 @@ async def process_recap_upload(pq: ProcessingQueue) -> None:
     if pq.upload_type == UPLOAD_TYPE.DOCKET:
         docket = await process_recap_docket(pq.pk)
     elif pq.upload_type == UPLOAD_TYPE.ATTACHMENT_PAGE:
-        await process_recap_attachment(pq.pk)
+        sub_docket_att_page_pks = await find_subdocket_att_page_rds(pq.pk)
+        for pq_pk in sub_docket_att_page_pks:
+            await process_recap_attachment(pq_pk)
     elif pq.upload_type == UPLOAD_TYPE.PDF:
         await process_recap_pdf(pq.pk)
     elif pq.upload_type == UPLOAD_TYPE.DOCKET_HISTORY_REPORT:
@@ -645,14 +647,93 @@ async def process_recap_docket(pk):
     }
 
 
+async def get_att_data_from_pq(
+    pq: ProcessingQueue,
+) -> tuple[ProcessingQueue | None, dict | None, str | None]:
+    """Extract attachment data from a ProcessingQueue object.
+
+    :param pq: The ProcessingQueue object.
+    :return: A tuple containing the updated pq, att_data, and text.
+    """
+    try:
+        with pq.filepath_local.open("rb") as file:
+            text = file.read().decode("utf-8")
+    except IOError as exc:
+        msg = f"Internal processing error ({exc.errno}: {exc.strerror})."
+        await mark_pq_status(pq, msg, PROCESSING_STATUS.FAILED)
+        return None, None, None
+
+    att_data = get_data_from_att_report(text, pq.court_id)
+    if not att_data:
+        msg = "Not a valid attachment page upload."
+        await mark_pq_status(pq, msg, PROCESSING_STATUS.INVALID_CONTENT)
+        return None, None, None
+
+    if pq.pacer_case_id in ["undefined", "null"]:
+        pq.pacer_case_id = att_data.get("pacer_case_id")
+        await pq.asave()
+
+    return pq, att_data, text
+
+
+async def find_subdocket_att_page_rds(
+    pk: int,
+) -> list[int]:
+    """Look for RECAP Documents that belong to subdockets, and create a PQ
+    object for each additional attachment page that requires processing.
+
+    :param pk: Primary key of the processing queue item.
+    :return: A list of ProcessingQueue pks to process.
+    """
+
+    pq = await ProcessingQueue.objects.aget(pk=pk)
+    court = await Court.objects.aget(id=pq.court_id)
+    pq, att_data, text = await get_att_data_from_pq(pq)
+    pacer_doc_id = att_data["pacer_doc_id"]
+    main_rds = (
+        RECAPDocument.objects.select_related("docket_entry__docket")
+        .filter(
+            pacer_doc_id=pacer_doc_id,
+            docket_entry__docket__court=court,
+        )
+        .order_by("docket_entry__docket__pacer_case_id")
+        .distinct("docket_entry__docket__pacer_case_id")
+        .only(
+            "pacer_doc_id",
+            "docket_entry__docket__pacer_case_id",
+            "docket_entry__docket__court_id",
+        )
+        .exclude(docket_entry__docket__pacer_case_id=pq.pacer_case_id)
+    )
+    pqs_to_process_pks = [
+        pq.pk
+    ]  # Add the original pq to the list of pqs to process
+    original_file_content = text.encode("utf-8")
+    original_file_name = pq.filepath_local.name
+    async for main_rd in main_rds:
+        main_pacer_case_id = main_rd.docket_entry.docket.pacer_case_id
+        # Create additional pqs for each subdocket case found.
+        pq_created = await ProcessingQueue.objects.acreate(
+            uploader_id=pq.uploader_id,
+            pacer_doc_id=pacer_doc_id,
+            pacer_case_id=main_pacer_case_id,
+            court_id=court.pk,
+            upload_type=UPLOAD_TYPE.ATTACHMENT_PAGE,
+            filepath_local=ContentFile(
+                original_file_content, name=original_file_name
+            ),
+        )
+        pqs_to_process_pks.append(pq_created.pk)
+    return pqs_to_process_pks
+
+
 async def process_recap_attachment(
     pk: int,
     tag_names: Optional[List[str]] = None,
     document_number: int | None = None,
 ) -> Optional[Tuple[int, str, list[RECAPDocument]]]:
     """Process an uploaded attachment page from the RECAP API endpoint.
 
-    :param self: The Celery task
     :param pk: The primary key of the processing queue item you want to work on
     :param tag_names: A list of tag names to add to all items created or
     modified in this function.
@@ -666,30 +747,11 @@ async def process_recap_attachment(
     await mark_pq_status(pq, "", PROCESSING_STATUS.IN_PROGRESS)
     logger.info(f"Processing RECAP item (debug is: {pq.debug}): {pq}")
 
-    try:
-        text = pq.filepath_local.read().decode()
-    except IOError as exc:
-        msg = f"Internal processing error ({exc.errno}: {exc.strerror})."
-        pq_status, msg = await mark_pq_status(
-            pq, msg, PROCESSING_STATUS.FAILED
-        )
-        return pq_status, msg, []
-
-    att_data = get_data_from_att_report(text, pq.court_id)
-    logger.info(f"Parsing completed for item {pq}")
-
-    if att_data == {}:
-        # Bad attachment page.
-        msg = "Not a valid attachment page upload."
-        pq_status, msg = await mark_pq_status(
-            pq, msg, PROCESSING_STATUS.INVALID_CONTENT
-        )
-        return pq_status, msg, []
+    pq = await ProcessingQueue.objects.aget(pk=pk)
+    await mark_pq_status(pq, "", PROCESSING_STATUS.IN_PROGRESS)
+    logger.info(f"Processing RECAP item (debug is: {pq.debug}): {pq}")
 
-    if pq.pacer_case_id in ["undefined", "null"]:
-        # Bad data from the client. Fix it with parsed data.
-        pq.pacer_case_id = att_data.get("pacer_case_id")
-        await pq.asave()
+    pq, att_data, text = await get_att_data_from_pq(pq)
 
     if document_number is None:
         document_number = att_data["document_number"]
@@ -723,6 +785,7 @@ async def process_recap_attachment(
     await add_tags_to_objs(tag_names, rds_affected)
     await associate_related_instances(pq, d_id=de.docket_id, de_id=de.pk)
     pq_status, msg = await mark_pq_successful(pq)
+
     return pq_status, msg, rds_affected