diff --git a/mars-cli/mars_cli.py b/mars-cli/mars_cli.py index 08d4b64..55a01e4 100644 --- a/mars-cli/mars_cli.py +++ b/mars-cli/mars_cli.py @@ -176,6 +176,12 @@ def cli(ctx, development): help="Name of a credentials file", ) @click.argument("isa_json_file", type=click.File("r")) +@click.option( + "--submit-to-biosamples", + type=click.BOOL, + default=True, + help="Submit to BioSamples.", +) @click.option("--submit-to-ena", type=click.BOOL, default=True, help="Submit to ENA.") @click.option( "--file-transfer", @@ -200,6 +206,11 @@ def cli(ctx, development): type=click.BOOL, help="Boolean indicating if the investigation is the root of the ISA JSON. Set this to True if the ISA-JSON does not contain a 'investigation' field.", ) +@click.option( + "--output", + type=click.STRING, + default=f"output_{datetime.now().strftime('%Y-%m-%dT%H:%M:%S')}", +) @click.pass_context def submit( ctx, @@ -207,22 +218,22 @@ def submit( username_credentials, credentials_file, isa_json_file, + submit_to_biosamples, submit_to_ena, submit_to_metabolights, investigation_is_root, file_transfer, + output, data_files, ): """Start a submission to the target repositories.""" - target_repositories = [TargetRepository.BIOSAMPLES] + target_repositories = [] + + if submit_to_biosamples: + target_repositories.append(TargetRepository.BIOSAMPLES) if submit_to_ena: target_repositories.append(TargetRepository.ENA) - target_repositories.remove(TargetRepository.BIOSAMPLES) - print_and_log( - f"Skipping {TargetRepository.BIOSAMPLES} repository due to {TargetRepository.ENA} being present in the list of repositories", - level="debug", - ) if submit_to_metabolights: target_repositories.append(TargetRepository.METABOLIGHTS) @@ -245,6 +256,7 @@ def submit( investigation_is_root, urls_dict, file_transfer, + output, data_file_paths, ) except requests.RequestException as err: diff --git a/mars-cli/mars_lib/isa_json.py b/mars-cli/mars_lib/isa_json.py index 8507d1a..fdff019 100644 --- a/mars-cli/mars_lib/isa_json.py +++ b/mars-cli/mars_lib/isa_json.py @@ -22,21 +22,21 @@ def reduce_isa_json_for_target_repo( - input_isa_json: Investigation, target_repo: str -) -> Investigation: + input_isa_json: IsaJson, target_repo: str +) -> IsaJson: """ Filters out assays that are not meant to be sent to the specified target repository. Args: - input_isa_json (Investigation): Input ISA JSON that contains the original information. + input_isa_json (IsaJson): Input ISA JSON that contains the original information. target_repo (TargetRepository): Target repository as a constant. Returns: - Investigation: Filtered ISA JSON. + IsaJson: Filtered ISA JSON. """ filtered_isa_json = input_isa_json.model_copy(deep=True) new_studies = [] - studies = filtered_isa_json.studies + studies = filtered_isa_json.investigation.studies for study in studies: if target_repo == TargetRepository.BIOSAMPLES: filtered_assays = [] @@ -51,7 +51,7 @@ def reduce_isa_json_for_target_repo( study.assays = filtered_assays new_studies.append(study) - filtered_isa_json.studies = new_studies + filtered_isa_json.investigation.studies = new_studies return filtered_isa_json @@ -64,6 +64,8 @@ def detect_target_repo_comment(comments: List[Comment]) -> Comment: Returns: Comment: The comment where the name corresponds with the name of the provided target repo. """ + if len(comments) < 1: + raise ValueError("No comments found! Not able to detect the target repository!") return next(comment for comment in comments if comment.name == TARGET_REPO_KEY) @@ -188,13 +190,15 @@ def accession_characteristic_present( f"'where' atribute is missing in path {material_type_path.key}." ) - accession_characteristics = [ - char - for char in material.characteristics - if char.category - and char.category.characteristicType - and char.category.characteristicType.annotationValue == "accession" - ] + accession_characteristics = [] + for char in material.characteristics: + if char.category and char.category.characteristicType: + if char.category.characteristicType.annotationValue: + if char.category.characteristicType.annotationValue == "accession": + accession_characteristics.append(char) + else: + if char.category.characteristicType == "accession": + accession_characteristics.append(char) if len(accession_characteristics) > 1: raise AttributeError( @@ -255,17 +259,12 @@ def add_accession_to_node( if not updated_material_accession_characteristic: raise ValueError("Accession characteristic is not present.") - if updated_material_accession_characteristic.value and hasattr( - updated_material_accession_characteristic.value, "annotationValue" - ): - accession_ontology_annotation = OntologyAnnotation() - accession_ontology_annotation.id = ( - f"#ontology_annotation/accession_{updated_material.id}" - ) - accession_ontology_annotation.annotationValue = accession_number - updated_material_accession_characteristic.value = accession_ontology_annotation - else: - updated_material_accession_characteristic.value = accession_number + accession_ontology_annotation = OntologyAnnotation() + accession_ontology_annotation.id = ( + f"#ontology_annotation/accession_{updated_material.id}" + ) + accession_ontology_annotation.annotationValue = accession_number + updated_material_accession_characteristic.value = accession_ontology_annotation updated_material.characteristics.append(updated_material_accession_characteristic) print(f"{updated_material.id}: {updated_material_accession_characteristic.value}.") @@ -352,20 +351,18 @@ def create_accession_characteristic( updated_material.characteristics.append(new_material_attribute_value) -def update_investigation( - investigation: Investigation, repo_response: RepositoryResponse -) -> Investigation: +def update_isa_json(isa_json: IsaJson, repo_response: RepositoryResponse) -> IsaJson: """ Adds the accession to the ISA JSON. Args: - isa_json (Investigation): The ISA JSON to be updated. + isa_json (IsaJson): The ISA JSON to be updated. repo_response (RepositoryResponse): The response from the repository. Returns: - Investigation: The updated ISA JSON. + IsaJson: The updated ISA JSON. """ - updated_investigation = investigation.model_copy(deep=True) + investigation = isa_json.investigation for accession in repo_response.accessions: has_assay_in_path = [p for p in accession.path if p.key == "assays"] @@ -380,7 +377,7 @@ def update_investigation( if not study_filter: raise ValueError(f"Study filter is not present in {accession.path}.") - updated_node = apply_filter(study_filter, updated_investigation.studies) + updated_node = apply_filter(study_filter, investigation.studies) if target_level == "assay": assay_filter = get_filter_for_accession_key(accession, "assays") @@ -407,4 +404,5 @@ def update_investigation( add_accession_to_node(updated_node, accession.value, material_type_path) - return updated_investigation + isa_json.investigation = investigation + return isa_json diff --git a/mars-cli/mars_lib/submit.py b/mars-cli/mars_lib/submit.py index 031afde..dc347c9 100644 --- a/mars-cli/mars_lib/submit.py +++ b/mars-cli/mars_lib/submit.py @@ -1,3 +1,5 @@ +import os +from datetime import datetime from io import TextIOWrapper import requests import json @@ -11,8 +13,13 @@ input_json_schema_filepath, ) from mars_lib.credential import CredentialManager -from mars_lib.isa_json import load_isa_json +from mars_lib.isa_json import ( + load_isa_json, + reduce_isa_json_for_target_repo, + update_isa_json, +) from mars_lib.models.isa_json import IsaJson +from mars_lib.models.repository_response import RepositoryResponse from mars_lib.target_repo import TargetRepository from mars_lib.logging import print_and_log from pydantic import ValidationError @@ -22,6 +29,17 @@ from typing import List +def save_step_to_file(time_stamp: float, filename: str, isa_json: IsaJson): + dir_path = f"tmp/{datetime.now().strftime('%Y-%m-%dT%H:%M:%S')}" + os.makedirs(dir_path, exist_ok=True) + + with open(f"{dir_path}/{filename}.json", "w") as f: + f.write(isa_json.model_dump_json(by_alias=True, exclude_none=True)) + + +DEBUG = os.getenv("MARS_DEBUG") in ["1", 1] + + def submission( credential_service_name: str, username_credentials: str, @@ -31,8 +49,9 @@ def submission( investigation_is_root: bool, urls: dict[str, Any], file_transfer: str, + output: str, data_file_paths=None, -): +) -> None: # If credential manager info found: # Get password from the credential manager # Else: @@ -59,6 +78,37 @@ def submission( f"ISA JSON with investigation '{isa_json.investigation.title}' is valid." ) + time_stamp = datetime.timestamp(datetime.now()) + + if DEBUG: + save_step_to_file(time_stamp, "0_Initial_ISA_JSON_in_model", isa_json) + + if all( + repo not in TargetRepository.available_repositories() + for repo in target_repositories + ): + raise ValueError("No target repository selected.") + + if TargetRepository.BIOSAMPLES in target_repositories: + # Submit to Biosamples + biosamples_result = submit_to_biosamples( + isa_json=isa_json, + biosamples_credentials=user_credentials, + biosamples_url=urls["BIOSAMPLES"]["SUBMISSION"], + webin_token_url=urls["WEBIN"]["TOKEN"], + ) + print_and_log( + f"Submission to {TargetRepository.BIOSAMPLES} was successful. Result:\n{biosamples_result.json()}", + level="info", + ) + # Update `isa_json`, based on the receipt returned + bs_mars_receipt = RepositoryResponse.model_validate( + json.loads(biosamples_result.content) + ) + isa_json = update_isa_json(isa_json, bs_mars_receipt) + if DEBUG: + save_step_to_file(time_stamp, "1_after_biosamples", isa_json) + if TargetRepository.ENA in target_repositories: # Step 1 : upload data if file paths are provided if data_file_paths and file_transfer: @@ -68,8 +118,8 @@ def submission( submission_url=urls["ENA"]["DATA-SUBMISSION"], file_transfer=file_transfer, ) + # Step 2 : submit isa-json to ena - # TODO: Filter out other assays ena_result = submit_to_ena( isa_json=isa_json, user_credentials=user_credentials, @@ -78,22 +128,13 @@ def submission( print_and_log( f"Submission to {TargetRepository.ENA} was successful. Result:\n{ena_result.json()}" ) - # TODO: Update `isa_json`, based on the receipt returned + # Update `isa_json`, based on the receipt returned + ena_mars_receipt = RepositoryResponse.from_json(str(ena_result.content)) + isa_json = update_isa_json(isa_json, ena_mars_receipt) + if DEBUG: + save_step_to_file(time_stamp, "2_after_ena", isa_json) - elif TargetRepository.BIOSAMPLES in target_repositories: - # Submit to Biosamples - biosamples_result = submit_to_biosamples( - isa_json=isa_json, - biosamples_credentials=user_credentials, - biosamples_url=urls["BIOSAMPLES"]["SUBMISSION"], - webin_token_url=urls["WEBIN"]["TOKEN"], - ) - print_and_log( - f"Submission to {TargetRepository.BIOSAMPLES} was successful. Result:\n{biosamples_result.json()}", - level="info", - ) - # TODO: Update `isa_json`, based on the receipt returned - elif TargetRepository.METABOLIGHTS in target_repositories: + if TargetRepository.METABOLIGHTS in target_repositories: # Submit to MetaboLights # TODO: Filter out other assays print_and_log( @@ -101,17 +142,18 @@ def submission( level="info", ) # TODO: Update `isa_json`, based on the receipt returned - elif TargetRepository.EVA in target_repositories: + + if TargetRepository.EVA in target_repositories: # Submit to EVA # TODO: Filter out other assays print_and_log( f"Submission to {TargetRepository.EVA} was successful", level="info" ) # TODO: Update `isa_json`, based on the receipt returned - else: - raise ValueError("No target repository selected.") - # TODO: Return the updated ISA JSON + # Return the updated ISA JSON + with open(f"{output}.json", "w") as f: + f.write(isa_json.model_dump_json(by_alias=True, exclude_none=True)) def submit_to_biosamples( @@ -130,7 +172,9 @@ def submit_to_biosamples( biosamples_url, headers=headers, params=params, - json=isa_json.model_dump(by_alias=True, exclude_none=True), + json=reduce_isa_json_for_target_repo( + isa_json, TargetRepository.BIOSAMPLES + ).model_dump(by_alias=True, exclude_none=True), ) if result.status_code != 200: @@ -158,7 +202,9 @@ def submit_to_ena( submission_url, headers=headers, params=params, - json=isa_json.model_dump(by_alias=True, exclude_none=True), + json=reduce_isa_json_for_target_repo(isa_json, TargetRepository.ENA).model_dump( + by_alias=True, exclude_none=True + ), ) if result.status_code != 200: diff --git a/mars-cli/mars_lib/target_repo.py b/mars-cli/mars_lib/target_repo.py index ef6e3f6..dd648ec 100644 --- a/mars-cli/mars_lib/target_repo.py +++ b/mars-cli/mars_lib/target_repo.py @@ -13,3 +13,7 @@ class TargetRepository(str, Enum): METABOLIGHTS = "metabolights" BIOSAMPLES = "biosamples" EVA = "eva" + + @classmethod + def available_repositories(cls): + return {item.value for item in cls} diff --git a/mars-cli/tests/test_ftp_upload.py b/mars-cli/tests/test_ftp_upload.py index 13915ab..a5d1672 100644 --- a/mars-cli/tests/test_ftp_upload.py +++ b/mars-cli/tests/test_ftp_upload.py @@ -13,10 +13,17 @@ def test_upload_login_failure(): uploader.upload([Path("./tests/fixtures/not_a_json_file.txt")]) -@pytest.mark.skip(reason="Relies on real ENA credentials in test_credentials_example.json") +@pytest.mark.skip( + reason="Relies on real ENA credentials in test_credentials_example.json" +) def test_upload_success(): # For local testing, add ENA username/password to test_credentials_example.json with open("./tests/test_credentials_example.json") as f: creds = json.load(f) uploader = FTPUploader("webin2.ebi.ac.uk", creds["username"], creds["password"]) - uploader.upload([Path("../test-data/ENA_TEST2.R1.fastq.gz"), Path("./tests/fixtures/not_a_json_file.txt")]) + uploader.upload( + [ + Path("../test-data/ENA_TEST2.R1.fastq.gz"), + Path("./tests/fixtures/not_a_json_file.txt"), + ] + ) diff --git a/mars-cli/tests/test_isa_json.py b/mars-cli/tests/test_isa_json.py index f14f9ba..c4fd0d8 100644 --- a/mars-cli/tests/test_isa_json.py +++ b/mars-cli/tests/test_isa_json.py @@ -3,7 +3,7 @@ from mars_lib.isa_json import ( reduce_isa_json_for_target_repo, load_isa_json, - update_investigation, + update_isa_json, ) from mars_lib.target_repo import TargetRepository, TARGET_REPO_KEY import pytest @@ -44,12 +44,12 @@ def test_reduce_isa_json_for_target_repo(): ) filtered_isa_json = reduce_isa_json_for_target_repo( - good_isa_json.investigation, TargetRepository.ENA + good_isa_json, TargetRepository.ENA ) good_isa_json_study = good_isa_json.investigation.studies[0] - filtered_isa_json_study = filtered_isa_json.studies[0] + filtered_isa_json_study = filtered_isa_json.investigation.studies[0] assert len(good_isa_json_study.assays) == 5 assert len(filtered_isa_json_study.assays) == 1 @@ -61,10 +61,10 @@ def test_reduce_isa_json_for_biosamples(): ) filtered_isa_json = reduce_isa_json_for_target_repo( - good_isa_json.investigation, TargetRepository.BIOSAMPLES + good_isa_json, TargetRepository.BIOSAMPLES ) - assert len(filtered_isa_json.studies[0].assays) == 0 + assert len(filtered_isa_json.investigation.studies[0].assays) == 0 def test_data_type_validator(): @@ -186,21 +186,23 @@ def test_update_study_materials_no_accession_categories(): respose_file_path = "tests/fixtures/json_responses/biosamples_success_reponse.json" repo_response = RepositoryResponse.from_json_file(respose_file_path) - updated_investigation = update_investigation( - validated_isa_json.investigation, repo_response - ) + updated_isa_json = update_isa_json(validated_isa_json, repo_response) # Check the accession number of the source - # Accession characteristic is of type String assert ( - updated_investigation.studies[0].materials.sources[0].characteristics[-1].value + updated_isa_json.investigation.studies[0] + .materials.sources[0] + .characteristics[-1] + .value.annotationValue == repo_response.accessions[0].value ) # Check the accession number of the sample - # Accession characteristic is of type String assert ( - updated_investigation.studies[0].materials.samples[0].characteristics[-1].value + updated_isa_json.investigation.studies[0] + .materials.samples[0] + .characteristics[-1] + .value.annotationValue == repo_response.accessions[1].value ) @@ -213,16 +215,13 @@ def test_update_study_materials_with_accession_categories(): validated_isa_json = IsaJson.model_validate(json_data) - respose_file_path = "tests/fixtures/json_responses/biosamples_success_reponse.json" - repo_response = RepositoryResponse.from_json_file(respose_file_path) + response_file_path = "tests/fixtures/json_responses/biosamples_success_reponse.json" + repo_response = RepositoryResponse.from_json_file(response_file_path) - updated_investigation = update_investigation( - validated_isa_json.investigation, repo_response - ) + updated_isa_json = update_isa_json(validated_isa_json, repo_response) # Check the accession number of the source - # Accession characteristic is of type OntologyAnnotation assert ( - updated_investigation.studies[0] + updated_isa_json.investigation.studies[0] .materials.sources[0] .characteristics[-1] .value.annotationValue @@ -230,9 +229,11 @@ def test_update_study_materials_with_accession_categories(): ) # Check the accession number of the sample - # Accession characteristic is of type String assert ( - updated_investigation.studies[0].materials.samples[0].characteristics[-1].value + updated_isa_json.investigation.studies[0] + .materials.samples[0] + .characteristics[-1] + .value.annotationValue == repo_response.accessions[1].value ) diff --git a/repository-services/isajson-ena/src/main/java/com/elixir/biohackaton/ISAToSRA/controller/WebinIsaToXmlSubmissionController.java b/repository-services/isajson-ena/src/main/java/com/elixir/biohackaton/ISAToSRA/controller/WebinIsaToXmlSubmissionController.java index b378a76..e195d10 100644 --- a/repository-services/isajson-ena/src/main/java/com/elixir/biohackaton/ISAToSRA/controller/WebinIsaToXmlSubmissionController.java +++ b/repository-services/isajson-ena/src/main/java/com/elixir/biohackaton/ISAToSRA/controller/WebinIsaToXmlSubmissionController.java @@ -20,6 +20,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.dom4j.Document; import org.dom4j.DocumentHelper; @@ -146,13 +147,23 @@ public Map getBiosamples(List studies) { } private String getCharacteresticAnnotation(List characteristics) { - for (Characteristic characteristic : characteristics) { - if ("#characteristic_category/accession".equals(characteristic.category.id)) { - return characteristic.value.annotationValue; - } + List filteredCharacteristics = + characteristics.stream() + .filter( + characteristic -> + characteristic.category.id.contains("#characteristic_category/accession")) + .collect(Collectors.toList()); + + if (filteredCharacteristics.isEmpty()) { + log.error("No accession found in the characteristics"); + throw new RuntimeException("No accession found in the characteristics"); } - return ""; + if (filteredCharacteristics.size() > 1) { + log.error("More than one accession found in the characteristics"); + throw new RuntimeException("Too many accessions found in the characteristics"); + } + return filteredCharacteristics.get(0).value.annotationValue; } private static Element startPreparingWebinV2SubmissionXml(Document document) {