Skip to content

Commit

Permalink
Merge pull request #81 from Clinical-Genomics-Lund/80-update-parsing-…
Browse files Browse the repository at this point in the history
…of-tbprofiler-results-to-support-version-6

Validate TbProfiler schema version.
  • Loading branch information
mhkc authored Aug 12, 2024
2 parents bfb3214 + 5481b1f commit 95ae98e
Show file tree
Hide file tree
Showing 7 changed files with 673 additions and 1,166 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

### Added

- Added flag to set verbosity level.
- Validate TbProfiler schema version.
- Added CLI command for adding IGV annotation tracks

### Fixed

### Changed
Expand Down
67 changes: 63 additions & 4 deletions prp/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,35 @@
parse_virulencefinder_stx_typing,
parse_virulencefinder_vir_pred,
)
from .parse.phenotype.tbprofiler import (
EXPECTED_SCHEMA_VERSION as EXPECTED_TBPROFILER_SCHEMA_VERSION,
)
from .parse.metadata import get_database_info, get_gb_genome_version, parse_run_info
from .parse.species import get_mykrobe_spp_prediction
from .parse.utils import _get_path, get_db_version, parse_input_dir
from .parse.variant import annotate_delly_variants

logging.basicConfig(
level=logging.INFO, format="[%(asctime)s] %(levelname)s in %(module)s: %(message)s"
)
LOG = logging.getLogger(__name__)

OUTPUT_SCHEMA_VERSION = 1


@click.group()
@click.version_option(__version__)
def cli():
@click.option("-s", "--silent", is_flag=True)
@click.option("-d", "--debug", is_flag=True)
def cli(silent, debug):
"""Jasen pipeline result processing tool."""
if silent:
log_level = logging.WARNING
elif debug:
log_level = logging.DEBUG
else:
log_level = logging.INFO
# configure logging
logging.basicConfig(
level=log_level, format="[%(asctime)s] %(levelname)s in %(module)s: %(message)s"
)


@cli.command()
Expand Down Expand Up @@ -287,6 +299,15 @@ def create_bonsai_input(
LOG.info("Parse tbprofiler results")
with open(tbprofiler, "r", encoding="utf-8") as tbprofiler_json:
pred_res = json.load(tbprofiler_json)
# check schema version
schema_version = pred_res.get("schema_version")
if not EXPECTED_TBPROFILER_SCHEMA_VERSION == schema_version:
LOG.warning(
"Unsupported TbProfiler schema version - output might be inaccurate; result schema: %s; expected: %s",
schema_version,
EXPECTED_TBPROFILER_SCHEMA_VERSION,
)
# store pipeline version
db_info: list[SoupVersion] = []
db_info = [
SoupVersion(
Expand Down Expand Up @@ -521,3 +542,41 @@ def annotate_delly(vcf, bed, output):
annotate_delly_variants(writer, vcf_obj, annotation, annot_chrom=annot_chrom)

click.secho(f"Wrote annotated delly variants to {output}", fg="green")


@cli.command()
@click.option("-n", "--name", type=str, help="Track name.")
@click.option(
"-a", "--annotation-file", type=click.Path(exists=True), help="Path to file."
)
@click.option(
"-r",
"--result",
required=True,
type=click.Path(writable=True),
help="PRP result.",
)
@click.argument("output", type=click.File("w"))
def add_igv_annotation_track(name, annotation_file, result, output):
"""Add IGV annotation track to result."""
with open(result, "r", encoding="utf-8") as jfile:
result_obj = PipelineResult(**json.load(jfile))

# Get genome annotation
if result_obj.genome_annotation is None or isinstance(
result_obj.genome_annotation, list
):
track_info = []
else:
track_info = result.genome_annotation

# add new tracks
track_info.append({"name": name, "file": annotation_file})

# update data model
upd_result = result_obj.model_copy(update={"genome_annotation": track_info})

# overwrite result
output.write(upd_result.model_dump_json(indent=3))

click.secho(f"Wrote updated result to {output}", fg="green")
9 changes: 8 additions & 1 deletion prp/models/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ class ReferenceGenome(RWModel):
genes: str


class IgvAnnotationTrack(RWModel):
"""IGV annotation track data."""

name: str # track name to display
file: str # path to the annotation file


class PipelineResult(SampleBase):
"""Input format of sample object from pipeline."""

Expand All @@ -77,4 +84,4 @@ class PipelineResult(SampleBase):
# optional alignment info
reference_genome: Optional[ReferenceGenome] = None
read_mapping: Optional[str] = None
genome_annotation: Optional[list[dict[str, str]]] = None
genome_annotation: Optional[list[IgvAnnotationTrack]] = None
1 change: 1 addition & 0 deletions prp/parse/phenotype/tbprofiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from ...models.phenotype import TbProfilerVariant, VariantSubType, VariantType

LOG = logging.getLogger(__name__)
EXPECTED_SCHEMA_VERSION = "1.0.0"


def _get_tbprofiler_amr_sr_profie(tbprofiler_result):
Expand Down
34 changes: 34 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,37 @@
"""Test fixtures."""

from .fixtures import *
from prp.models import PipelineResult
from prp.models.metadata import RunMetadata, RunInformation
from datetime import datetime


@pytest.fixture()
def simple_pipeline_result():
"""Return a basic analysis result."""

mock_run_info = RunInformation(
pipeline="Jasen",
version="0.0.1",
commit="commit-hash",
analysis_profile="",
configuration_files=[],
workflow_name="workflow-name",
sample_name="sample-name",
lims_id="limbs id",
sequencing_run="run-id",
sequencing_platform="sequencing plattform",
sequencing_type="illumina",
command="nextflow run ...",
date=datetime.now(),
)
# add run into to metadata model
metadata = RunMetadata(run=mock_run_info, databases=[])
return PipelineResult(
sample_id="mock-sample-001",
run_metadata=metadata,
qc=[],
species_prediction=[],
typing_result=[],
element_type_result=[],
)
Loading

0 comments on commit 95ae98e

Please sign in to comment.