Merge pull request #81 from Clinical-Genomics-Lund/80-update-parsing-…

…of-tbprofiler-results-to-support-version-6 Validate TbProfiler schema version.
SMD-Bioinformatics-Lund · Aug 12, 2024 · 95ae98e · 95ae98e
2 parents bfb3214 + 5481b1f
commit 95ae98e
Show file tree

Hide file tree

Showing 7 changed files with 673 additions and 1,166 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 ### Added
 
+ - Added flag to set verbosity level.
+ - Validate TbProfiler schema version.
+ - Added CLI command for adding IGV annotation tracks
+
 ### Fixed
 
 ### Changed

diff --git a/prp/cli.py b/prp/cli.py
@@ -37,23 +37,35 @@
     parse_virulencefinder_stx_typing,
     parse_virulencefinder_vir_pred,
 )
+from .parse.phenotype.tbprofiler import (
+    EXPECTED_SCHEMA_VERSION as EXPECTED_TBPROFILER_SCHEMA_VERSION,
+)
 from .parse.metadata import get_database_info, get_gb_genome_version, parse_run_info
 from .parse.species import get_mykrobe_spp_prediction
 from .parse.utils import _get_path, get_db_version, parse_input_dir
 from .parse.variant import annotate_delly_variants
 
-logging.basicConfig(
-    level=logging.INFO, format="[%(asctime)s] %(levelname)s in %(module)s: %(message)s"
-)
 LOG = logging.getLogger(__name__)
 
 OUTPUT_SCHEMA_VERSION = 1
 
 
 @click.group()
 @click.version_option(__version__)
-def cli():
+@click.option("-s", "--silent", is_flag=True)
+@click.option("-d", "--debug", is_flag=True)
+def cli(silent, debug):
     """Jasen pipeline result processing tool."""
+    if silent:
+        log_level = logging.WARNING
+    elif debug:
+        log_level = logging.DEBUG
+    else:
+        log_level = logging.INFO
+    # configure logging
+    logging.basicConfig(
+        level=log_level, format="[%(asctime)s] %(levelname)s in %(module)s: %(message)s"
+    )
 
 
 @cli.command()
@@ -287,6 +299,15 @@ def create_bonsai_input(
         LOG.info("Parse tbprofiler results")
         with open(tbprofiler, "r", encoding="utf-8") as tbprofiler_json:
             pred_res = json.load(tbprofiler_json)
+            # check schema version
+            schema_version = pred_res.get("schema_version")
+            if not EXPECTED_TBPROFILER_SCHEMA_VERSION == schema_version:
+                LOG.warning(
+                    "Unsupported TbProfiler schema version - output might be inaccurate; result schema: %s; expected: %s",
+                    schema_version,
+                    EXPECTED_TBPROFILER_SCHEMA_VERSION,
+                )
+            # store pipeline version
             db_info: list[SoupVersion] = []
             db_info = [
                 SoupVersion(
@@ -521,3 +542,41 @@ def annotate_delly(vcf, bed, output):
     annotate_delly_variants(writer, vcf_obj, annotation, annot_chrom=annot_chrom)
 
     click.secho(f"Wrote annotated delly variants to {output}", fg="green")
+
+
+@cli.command()
+@click.option("-n", "--name", type=str, help="Track name.")
+@click.option(
+    "-a", "--annotation-file", type=click.Path(exists=True), help="Path to file."
+)
+@click.option(
+    "-r",
+    "--result",
+    required=True,
+    type=click.Path(writable=True),
+    help="PRP result.",
+)
+@click.argument("output", type=click.File("w"))
+def add_igv_annotation_track(name, annotation_file, result, output):
+    """Add IGV annotation track to result."""
+    with open(result, "r", encoding="utf-8") as jfile:
+        result_obj = PipelineResult(**json.load(jfile))
+
+    # Get genome annotation
+    if result_obj.genome_annotation is None or isinstance(
+        result_obj.genome_annotation, list
+    ):
+        track_info = []
+    else:
+        track_info = result.genome_annotation
+
+    # add new tracks
+    track_info.append({"name": name, "file": annotation_file})
+
+    # update data model
+    upd_result = result_obj.model_copy(update={"genome_annotation": track_info})
+
+    # overwrite result
+    output.write(upd_result.model_dump_json(indent=3))
+
+    click.secho(f"Wrote updated result to {output}", fg="green")
diff --git a/prp/models/sample.py b/prp/models/sample.py
@@ -59,6 +59,13 @@ class ReferenceGenome(RWModel):
     genes: str
 
 
+class IgvAnnotationTrack(RWModel):
+    """IGV annotation track data."""
+
+    name: str  # track name to display
+    file: str  # path to the annotation file
+
+
 class PipelineResult(SampleBase):
     """Input format of sample object from pipeline."""
 
@@ -77,4 +84,4 @@ class PipelineResult(SampleBase):
     # optional alignment info
     reference_genome: Optional[ReferenceGenome] = None
     read_mapping: Optional[str] = None
-    genome_annotation: Optional[list[dict[str, str]]] = None
+    genome_annotation: Optional[list[IgvAnnotationTrack]] = None
diff --git a/prp/parse/phenotype/tbprofiler.py b/prp/parse/phenotype/tbprofiler.py
@@ -14,6 +14,7 @@
 from ...models.phenotype import TbProfilerVariant, VariantSubType, VariantType
 
 LOG = logging.getLogger(__name__)
+EXPECTED_SCHEMA_VERSION = "1.0.0"
 
 
 def _get_tbprofiler_amr_sr_profie(tbprofiler_result):

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,3 +1,37 @@
 """Test fixtures."""
 
 from .fixtures import *
+from prp.models import PipelineResult
+from prp.models.metadata import RunMetadata, RunInformation
+from datetime import datetime
+
+
+@pytest.fixture()
+def simple_pipeline_result():
+    """Return a basic analysis result."""
+
+    mock_run_info = RunInformation(
+        pipeline="Jasen",
+        version="0.0.1",
+        commit="commit-hash",
+        analysis_profile="",
+        configuration_files=[],
+        workflow_name="workflow-name",
+        sample_name="sample-name",
+        lims_id="limbs id",
+        sequencing_run="run-id",
+        sequencing_platform="sequencing plattform",
+        sequencing_type="illumina",
+        command="nextflow run ...",
+        date=datetime.now(),
+    )
+    # add run into to metadata model
+    metadata = RunMetadata(run=mock_run_info, databases=[])
+    return PipelineResult(
+        sample_id="mock-sample-001",
+        run_metadata=metadata,
+        qc=[],
+        species_prediction=[],
+        typing_result=[],
+        element_type_result=[],
+    )