Skip to content

Commit

Permalink
Merge pull request #64 from elixir-europe/process-isa-json-after-bios…
Browse files Browse the repository at this point in the history
…amples

Process isa json after biosamples
  • Loading branch information
kdp-cloud authored Nov 7, 2024
2 parents a180bab + ba58c6c commit b907a74
Show file tree
Hide file tree
Showing 7 changed files with 169 additions and 90 deletions.
24 changes: 18 additions & 6 deletions mars-cli/mars_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,12 @@ def cli(ctx, development):
help="Name of a credentials file",
)
@click.argument("isa_json_file", type=click.File("r"))
@click.option(
"--submit-to-biosamples",
type=click.BOOL,
default=True,
help="Submit to BioSamples.",
)
@click.option("--submit-to-ena", type=click.BOOL, default=True, help="Submit to ENA.")
@click.option(
"--file-transfer",
Expand All @@ -200,29 +206,34 @@ def cli(ctx, development):
type=click.BOOL,
help="Boolean indicating if the investigation is the root of the ISA JSON. Set this to True if the ISA-JSON does not contain a 'investigation' field.",
)
@click.option(
"--output",
type=click.STRING,
default=f"output_{datetime.now().strftime('%Y-%m-%dT%H:%M:%S')}",
)
@click.pass_context
def submit(
ctx,
credential_service_name,
username_credentials,
credentials_file,
isa_json_file,
submit_to_biosamples,
submit_to_ena,
submit_to_metabolights,
investigation_is_root,
file_transfer,
output,
data_files,
):
"""Start a submission to the target repositories."""
target_repositories = [TargetRepository.BIOSAMPLES]
target_repositories = []

if submit_to_biosamples:
target_repositories.append(TargetRepository.BIOSAMPLES)

if submit_to_ena:
target_repositories.append(TargetRepository.ENA)
target_repositories.remove(TargetRepository.BIOSAMPLES)
print_and_log(
f"Skipping {TargetRepository.BIOSAMPLES} repository due to {TargetRepository.ENA} being present in the list of repositories",
level="debug",
)

if submit_to_metabolights:
target_repositories.append(TargetRepository.METABOLIGHTS)
Expand All @@ -245,6 +256,7 @@ def submit(
investigation_is_root,
urls_dict,
file_transfer,
output,
data_file_paths,
)
except requests.RequestException as err:
Expand Down
62 changes: 30 additions & 32 deletions mars-cli/mars_lib/isa_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,21 @@


def reduce_isa_json_for_target_repo(
input_isa_json: Investigation, target_repo: str
) -> Investigation:
input_isa_json: IsaJson, target_repo: str
) -> IsaJson:
"""
Filters out assays that are not meant to be sent to the specified target repository.
Args:
input_isa_json (Investigation): Input ISA JSON that contains the original information.
input_isa_json (IsaJson): Input ISA JSON that contains the original information.
target_repo (TargetRepository): Target repository as a constant.
Returns:
Investigation: Filtered ISA JSON.
IsaJson: Filtered ISA JSON.
"""
filtered_isa_json = input_isa_json.model_copy(deep=True)
new_studies = []
studies = filtered_isa_json.studies
studies = filtered_isa_json.investigation.studies
for study in studies:
if target_repo == TargetRepository.BIOSAMPLES:
filtered_assays = []
Expand All @@ -51,7 +51,7 @@ def reduce_isa_json_for_target_repo(
study.assays = filtered_assays
new_studies.append(study)

filtered_isa_json.studies = new_studies
filtered_isa_json.investigation.studies = new_studies
return filtered_isa_json


Expand All @@ -64,6 +64,8 @@ def detect_target_repo_comment(comments: List[Comment]) -> Comment:
Returns:
Comment: The comment where the name corresponds with the name of the provided target repo.
"""
if len(comments) < 1:
raise ValueError("No comments found! Not able to detect the target repository!")
return next(comment for comment in comments if comment.name == TARGET_REPO_KEY)


Expand Down Expand Up @@ -188,13 +190,15 @@ def accession_characteristic_present(
f"'where' atribute is missing in path {material_type_path.key}."
)

accession_characteristics = [
char
for char in material.characteristics
if char.category
and char.category.characteristicType
and char.category.characteristicType.annotationValue == "accession"
]
accession_characteristics = []
for char in material.characteristics:
if char.category and char.category.characteristicType:
if char.category.characteristicType.annotationValue:
if char.category.characteristicType.annotationValue == "accession":
accession_characteristics.append(char)
else:
if char.category.characteristicType == "accession":
accession_characteristics.append(char)

if len(accession_characteristics) > 1:
raise AttributeError(
Expand Down Expand Up @@ -255,17 +259,12 @@ def add_accession_to_node(
if not updated_material_accession_characteristic:
raise ValueError("Accession characteristic is not present.")

if updated_material_accession_characteristic.value and hasattr(
updated_material_accession_characteristic.value, "annotationValue"
):
accession_ontology_annotation = OntologyAnnotation()
accession_ontology_annotation.id = (
f"#ontology_annotation/accession_{updated_material.id}"
)
accession_ontology_annotation.annotationValue = accession_number
updated_material_accession_characteristic.value = accession_ontology_annotation
else:
updated_material_accession_characteristic.value = accession_number
accession_ontology_annotation = OntologyAnnotation()
accession_ontology_annotation.id = (
f"#ontology_annotation/accession_{updated_material.id}"
)
accession_ontology_annotation.annotationValue = accession_number
updated_material_accession_characteristic.value = accession_ontology_annotation

updated_material.characteristics.append(updated_material_accession_characteristic)
print(f"{updated_material.id}: {updated_material_accession_characteristic.value}.")
Expand Down Expand Up @@ -352,20 +351,18 @@ def create_accession_characteristic(
updated_material.characteristics.append(new_material_attribute_value)


def update_investigation(
investigation: Investigation, repo_response: RepositoryResponse
) -> Investigation:
def update_isa_json(isa_json: IsaJson, repo_response: RepositoryResponse) -> IsaJson:
"""
Adds the accession to the ISA JSON.
Args:
isa_json (Investigation): The ISA JSON to be updated.
isa_json (IsaJson): The ISA JSON to be updated.
repo_response (RepositoryResponse): The response from the repository.
Returns:
Investigation: The updated ISA JSON.
IsaJson: The updated ISA JSON.
"""
updated_investigation = investigation.model_copy(deep=True)
investigation = isa_json.investigation
for accession in repo_response.accessions:

has_assay_in_path = [p for p in accession.path if p.key == "assays"]
Expand All @@ -380,7 +377,7 @@ def update_investigation(
if not study_filter:
raise ValueError(f"Study filter is not present in {accession.path}.")

updated_node = apply_filter(study_filter, updated_investigation.studies)
updated_node = apply_filter(study_filter, investigation.studies)

if target_level == "assay":
assay_filter = get_filter_for_accession_key(accession, "assays")
Expand All @@ -407,4 +404,5 @@ def update_investigation(

add_accession_to_node(updated_node, accession.value, material_type_path)

return updated_investigation
isa_json.investigation = investigation
return isa_json
94 changes: 70 additions & 24 deletions mars-cli/mars_lib/submit.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os
from datetime import datetime
from io import TextIOWrapper
import requests
import json
Expand All @@ -11,8 +13,13 @@
input_json_schema_filepath,
)
from mars_lib.credential import CredentialManager
from mars_lib.isa_json import load_isa_json
from mars_lib.isa_json import (
load_isa_json,
reduce_isa_json_for_target_repo,
update_isa_json,
)
from mars_lib.models.isa_json import IsaJson
from mars_lib.models.repository_response import RepositoryResponse
from mars_lib.target_repo import TargetRepository
from mars_lib.logging import print_and_log
from pydantic import ValidationError
Expand All @@ -22,6 +29,17 @@
from typing import List


def save_step_to_file(time_stamp: float, filename: str, isa_json: IsaJson):
dir_path = f"tmp/{datetime.now().strftime('%Y-%m-%dT%H:%M:%S')}"
os.makedirs(dir_path, exist_ok=True)

with open(f"{dir_path}/{filename}.json", "w") as f:
f.write(isa_json.model_dump_json(by_alias=True, exclude_none=True))


DEBUG = os.getenv("MARS_DEBUG") in ["1", 1]


def submission(
credential_service_name: str,
username_credentials: str,
Expand All @@ -31,8 +49,9 @@ def submission(
investigation_is_root: bool,
urls: dict[str, Any],
file_transfer: str,
output: str,
data_file_paths=None,
):
) -> None:
# If credential manager info found:
# Get password from the credential manager
# Else:
Expand All @@ -59,6 +78,37 @@ def submission(
f"ISA JSON with investigation '{isa_json.investigation.title}' is valid."
)

time_stamp = datetime.timestamp(datetime.now())

if DEBUG:
save_step_to_file(time_stamp, "0_Initial_ISA_JSON_in_model", isa_json)

if all(
repo not in TargetRepository.available_repositories()
for repo in target_repositories
):
raise ValueError("No target repository selected.")

if TargetRepository.BIOSAMPLES in target_repositories:
# Submit to Biosamples
biosamples_result = submit_to_biosamples(
isa_json=isa_json,
biosamples_credentials=user_credentials,
biosamples_url=urls["BIOSAMPLES"]["SUBMISSION"],
webin_token_url=urls["WEBIN"]["TOKEN"],
)
print_and_log(
f"Submission to {TargetRepository.BIOSAMPLES} was successful. Result:\n{biosamples_result.json()}",
level="info",
)
# Update `isa_json`, based on the receipt returned
bs_mars_receipt = RepositoryResponse.model_validate(
json.loads(biosamples_result.content)
)
isa_json = update_isa_json(isa_json, bs_mars_receipt)
if DEBUG:
save_step_to_file(time_stamp, "1_after_biosamples", isa_json)

if TargetRepository.ENA in target_repositories:
# Step 1 : upload data if file paths are provided
if data_file_paths and file_transfer:
Expand All @@ -68,8 +118,8 @@ def submission(
submission_url=urls["ENA"]["DATA-SUBMISSION"],
file_transfer=file_transfer,
)

# Step 2 : submit isa-json to ena
# TODO: Filter out other assays
ena_result = submit_to_ena(
isa_json=isa_json,
user_credentials=user_credentials,
Expand All @@ -78,40 +128,32 @@ def submission(
print_and_log(
f"Submission to {TargetRepository.ENA} was successful. Result:\n{ena_result.json()}"
)
# TODO: Update `isa_json`, based on the receipt returned
# Update `isa_json`, based on the receipt returned
ena_mars_receipt = RepositoryResponse.from_json(str(ena_result.content))
isa_json = update_isa_json(isa_json, ena_mars_receipt)
if DEBUG:
save_step_to_file(time_stamp, "2_after_ena", isa_json)

elif TargetRepository.BIOSAMPLES in target_repositories:
# Submit to Biosamples
biosamples_result = submit_to_biosamples(
isa_json=isa_json,
biosamples_credentials=user_credentials,
biosamples_url=urls["BIOSAMPLES"]["SUBMISSION"],
webin_token_url=urls["WEBIN"]["TOKEN"],
)
print_and_log(
f"Submission to {TargetRepository.BIOSAMPLES} was successful. Result:\n{biosamples_result.json()}",
level="info",
)
# TODO: Update `isa_json`, based on the receipt returned
elif TargetRepository.METABOLIGHTS in target_repositories:
if TargetRepository.METABOLIGHTS in target_repositories:
# Submit to MetaboLights
# TODO: Filter out other assays
print_and_log(
f"Submission to {TargetRepository.METABOLIGHTS} was successful",
level="info",
)
# TODO: Update `isa_json`, based on the receipt returned
elif TargetRepository.EVA in target_repositories:

if TargetRepository.EVA in target_repositories:
# Submit to EVA
# TODO: Filter out other assays
print_and_log(
f"Submission to {TargetRepository.EVA} was successful", level="info"
)
# TODO: Update `isa_json`, based on the receipt returned
else:
raise ValueError("No target repository selected.")

# TODO: Return the updated ISA JSON
# Return the updated ISA JSON
with open(f"{output}.json", "w") as f:
f.write(isa_json.model_dump_json(by_alias=True, exclude_none=True))


def submit_to_biosamples(
Expand All @@ -130,7 +172,9 @@ def submit_to_biosamples(
biosamples_url,
headers=headers,
params=params,
json=isa_json.model_dump(by_alias=True, exclude_none=True),
json=reduce_isa_json_for_target_repo(
isa_json, TargetRepository.BIOSAMPLES
).model_dump(by_alias=True, exclude_none=True),
)

if result.status_code != 200:
Expand Down Expand Up @@ -158,7 +202,9 @@ def submit_to_ena(
submission_url,
headers=headers,
params=params,
json=isa_json.model_dump(by_alias=True, exclude_none=True),
json=reduce_isa_json_for_target_repo(isa_json, TargetRepository.ENA).model_dump(
by_alias=True, exclude_none=True
),
)

if result.status_code != 200:
Expand Down
4 changes: 4 additions & 0 deletions mars-cli/mars_lib/target_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ class TargetRepository(str, Enum):
METABOLIGHTS = "metabolights"
BIOSAMPLES = "biosamples"
EVA = "eva"

@classmethod
def available_repositories(cls):
return {item.value for item in cls}
11 changes: 9 additions & 2 deletions mars-cli/tests/test_ftp_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,17 @@ def test_upload_login_failure():
uploader.upload([Path("./tests/fixtures/not_a_json_file.txt")])


@pytest.mark.skip(reason="Relies on real ENA credentials in test_credentials_example.json")
@pytest.mark.skip(
reason="Relies on real ENA credentials in test_credentials_example.json"
)
def test_upload_success():
# For local testing, add ENA username/password to test_credentials_example.json
with open("./tests/test_credentials_example.json") as f:
creds = json.load(f)
uploader = FTPUploader("webin2.ebi.ac.uk", creds["username"], creds["password"])
uploader.upload([Path("../test-data/ENA_TEST2.R1.fastq.gz"), Path("./tests/fixtures/not_a_json_file.txt")])
uploader.upload(
[
Path("../test-data/ENA_TEST2.R1.fastq.gz"),
Path("./tests/fixtures/not_a_json_file.txt"),
]
)
Loading

0 comments on commit b907a74

Please sign in to comment.