microbiomedata · cristina-stonepedraza · Nov 1, 2024 · Nov 4, 2024 · Nov 4, 2024 · Nov 6, 2024
diff --git a/nmdc_server/api.py b/nmdc_server/api.py
@@ -637,6 +637,97 @@ async def download_zip_file(
     )
 
 
+@router.get(
+    "/metadata_submission/mixs",
-    "/metadata_submission/mixs",
+    "/metadata_submission/mixs_report",
-    "/metadata_submission/mixs",
+    "/metadata_submission/mixs_report",
+    tags=["metadata_submission"],
+)
+async def get_metadata_submissions_mixs(
+    db: Session = Depends(get_db),
+):
+    r"""
+    Generate a report of NMDC submissions that are submitted pending review to allow review
+    of submission triads and evaluate for approval
+    """
-    r"""
-    Generate a report of NMDC submissions that are submitted pending review to allow review
-    of submission triads and evaluate for approval
-    """
+    r"""
+    Generate a TSV-formatted report of biosamples belonging to submissions 
+    that have a status of "Submitted- Pending Review".
+    
+    The report indicates which environmental package/extension, broad scale,
+    local scale, and medium are specified for each biosample. The report is
+    designed to facilitate the review of submissions by NMDC team members.
+    """
-    r"""
-    Generate a report of NMDC submissions that are submitted pending review to allow review
-    of submission triads and evaluate for approval
-    """
+    r"""
+    Generate a TSV-formatted report of biosamples belonging to submissions 
+    that have a status of "Submitted- Pending Review".
+    
+    The report indicates which environmental package/extension, broad scale,
+    local scale, and medium are specified for each biosample. The report is
+    designed to facilitate the review of submissions by NMDC team members.
+    """
+    # Get the submissions from the database.
+    q = crud.get_query_for_submitted_pending_review(db)
+    submissions = q.all()
+
+    # Iterate through the submissions, building the data rows for the report.
+    header_row = [
+        "Submission ID",
+        "Status",
+        "Sample Name",
+        "Environmental Package/Extension",
+        "Environmental Broad Scale",
+        "Environmental Local Scale",
+        "Environmental Medium",
+    ]
+    data_rows = []
+    for s in submissions:
+
+        metadata = s.metadata_submission  # creates a concise alias
+        sample_data = metadata["sampleData"] if "sampleData" in metadata else {}
+        env_package = metadata["packageName"] if "packageName" in metadata else {}
+
+        # Get sample names from each sample type
+        for sample_type in sample_data:
+            samples = sample_data[sample_type] if sample_type in sample_data else {}
-            samples = sample_data[sample_type] if sample_type in sample_data else {}
+            samples = sample_data[sample_type] if sample_type in sample_data else []
-            samples = sample_data[sample_type] if sample_type in sample_data else {}
+            samples = sample_data[sample_type] if sample_type in sample_data else []
+            # Iterate through each sample and extract the name
+            for x in samples:
+
+                # Get the sample name
+                name = x["samp_name"] if "samp_name" in x else {}
-                name = x["samp_name"] if "samp_name" in x else {}
+                name = x["samp_name"] if "samp_name" in x else ""
-                name = x["samp_name"] if "samp_name" in x else {}
+                name = x["samp_name"] if "samp_name" in x else ""
+                name = str(name)
+                name = name.replace("\t", "").replace("\r", "").replace("\n", "").lstrip("_")
+                # Get the env broad scale
+                broad = x["env_broad_scale"] if "env_broad_scale" in x else {}
+                broad = str(broad)
+                broad = broad.replace("\t", "").replace("\r", "").replace("\n", "").lstrip("_")
+                # Get the env local scale
+                local = x["env_local_scale"] if "env_local_scale" in x else {}
+                local = str(local)
+                local = local.replace("\t", "").replace("\r", "").replace("\n", "").lstrip("_")
+                # Get the env medium
+                medium = x["env_medium"] if "env_medium" in x else {}
+                medium = str(medium)
+                medium = medium.replace("\t", "").replace("\r", "").replace("\n", "").lstrip("_")
+
+                # Append each sample as new row (with env data)
+                data_row = [
+                    s.id,
+                    s.status,
+                    name,
+                    env_package,
+                    broad,
+                    local,
+                    medium,
+                ]
+                data_rows.append(data_row)
+
+    # Build the report as an in-memory TSV "file" (buffer).
+    # Reference: https://docs.python.org/3/library/csv.html#csv.writer
+    buffer = StringIO()
+    writer = csv.writer(buffer, delimiter="\t")
+    writer.writerow(header_row)
+    writer.writerows(data_rows)
+
+    # Reset the buffer's internal file pointer to the beginning of the buffer, so that,
+    # when we stream the buffer's contents later, all of its contents are included.
+    buffer.seek(0)
+
+    # Stream the buffer's contents to the HTTP client as a downloadable TSV file.
+    # Reference: https://fastapi.tiangolo.com/advanced/custom-response
+    # Reference: https://mimetype.io/text/tab-separated-values
+    filename = "mixs-report.tsv"
+    response = StreamingResponse(
+        buffer,
+        media_type="text/tab-separated-values",
+        headers={"Content-Disposition": f'attachment; filename="{filename}"'},
+    )
+
+    return response
+
+
 @router.get(
     "/metadata_submission/report",
     tags=["metadata_submission"],

diff --git a/nmdc_server/crud.py b/nmdc_server/crud.py
@@ -686,6 +686,19 @@ def get_query_for_all_submissions(db: Session):
     return all_submissions
 
 
+def get_query_for_submitted_pending_review(db: Session):
+    r"""
+    Returns a SQLAlchemy query that can be used to retrieve submissions pending review.
+
+    Reference: https://fastapi.tiangolo.com/tutorial/sql-databases/#crud-utils
+    Reference: https://docs.sqlalchemy.org/en/14/orm/session_basics.html
+    """
+    submitted_pending_review = db.query(models.SubmissionMetadata).filter(
+        models.SubmissionMetadata.status == "Submitted- Pending Review"
+    )
+    return submitted_pending_review
+
+
 def get_roles_for_submission(
     db: Session, submission: models.SubmissionMetadata
 ) -> List[models.SubmissionRole]: