From ff08639f6828ed53ac372a2ded5274ec087d8e12 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Wed, 27 Dec 2023 17:24:51 +0100 Subject: [PATCH 01/29] Update parse_mykrobe_amr_pred to handle mykrobe csv format --- prp/cli.py | 13 ++++++++----- prp/parse/phenotype/mykrobe.py | 35 +++++++++++++++++++--------------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/prp/cli.py b/prp/cli.py index 5232ed9..771c821 100644 --- a/prp/cli.py +++ b/prp/cli.py @@ -1,6 +1,7 @@ """Definition of the PRP command-line interface.""" import json import logging +import pandas as pd from typing import List import click @@ -188,10 +189,12 @@ def create_bonsai_input( # mykrobe if mykrobe: LOG.info("Parse mykrobe results") - pred_res = json.load(mykrobe) + pred_res = pd.read_csv(mykrobe, quotechar='"', orient='records') + pred_res.columns.values[3] = "variants" + pred_res.columns.values[4] = "genes" # verify that sample id is in prediction result - if not sample_id in pred_res: + if not sample_id in pred_res[0]["sample"]: LOG.warning( "Sample id %s is not in Mykrobe result, possible sample mixup", sample_id, @@ -202,17 +205,17 @@ def create_bonsai_input( results["run_metadata"]["databases"].append( SoupVersion( name="mykrobe-predictor", - version=pred_res[sample_id]["version"]["mykrobe-predictor"], + version=pred_res[0]["mykrobe_version"], type=SoupType.DB, ) ) # parse mykrobe result - amr_res = parse_mykrobe_amr_pred(pred_res[sample_id], ElementType.AMR) + amr_res = parse_mykrobe_amr_pred(pred_res, ElementType.AMR) if amr_res is not None: results["element_type_result"].append(amr_res) lin_res: MethodIndex = parse_mykrobe_lineage_results( - pred_res[sample_id], TypingMethod.LINEAGE + pred_res, TypingMethod.LINEAGE ) results["typing_result"].append(lin_res) diff --git a/prp/parse/phenotype/mykrobe.py b/prp/parse/phenotype/mykrobe.py index d21d6d6..3079780 100644 --- a/prp/parse/phenotype/mykrobe.py +++ b/prp/parse/phenotype/mykrobe.py @@ -33,22 +33,27 @@ def _parse_mykrobe_amr_genes(mykrobe_result) -> Tuple[ResistanceGene, ...]: results = [] for element_type in mykrobe_result: # skip non-resistance yeilding - if not mykrobe_result[element_type]["predict"].upper() == "R": + if not element_type["susceptibility"].upper() == "R": continue - - hits = mykrobe_result[element_type]["called_by"] - for hit_name, hit in hits.items(): - gene = ResistanceGene( - gene_symbol=hit_name.split("_")[0], - accession=None, - depth=hit["info"]["coverage"]["alternate"]["median_depth"], - identity=None, - coverage=hit["info"]["coverage"]["alternate"]["percent_coverage"], - phenotypes=[element_type.lower()], - element_type=ElementType.AMR, - element_subtype=ElementAmrSubtype.AMR, - ) - results.append(gene) + + try: + depth = element_type["genes"].split(':')[-1] + coverage = element_type["genes"].split(':')[-2] + except KeyError: + depth = None + coverage = None + + gene = ResistanceGene( + gene_symbol=element_type["variants"].split("_")[0], + accession=None, + depth=depth, + identity=None, + coverage=coverage, + phenotypes=[element_type["drug"].lower()], + element_type=ElementType.AMR, + element_subtype=ElementAmrSubtype.AMR, + ) + results.append(gene) return results From 5563589db20654de118d704c375c99357cf9e2b6 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Thu, 28 Dec 2023 23:13:46 +0100 Subject: [PATCH 02/29] Fix mykrobe variant parser --- prp/parse/phenotype/mykrobe.py | 97 ++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 41 deletions(-) diff --git a/prp/parse/phenotype/mykrobe.py b/prp/parse/phenotype/mykrobe.py index 3079780..f147c80 100644 --- a/prp/parse/phenotype/mykrobe.py +++ b/prp/parse/phenotype/mykrobe.py @@ -3,7 +3,7 @@ import re from typing import Any, Dict, Tuple -from ...models.phenotype import ElementAmrSubtype, ElementType, ElementTypeResult +from ...models.phenotype import ElementAmrSubtype, ElementType, ElementTypeResult, PhenotypeInfo from ...models.phenotype import PredictionSoftware as Software from ...models.phenotype import ResistanceGene, ResistanceVariant, VariantType from ...models.sample import MethodIndex @@ -21,10 +21,10 @@ def _get_mykrobe_amr_sr_profie(mykrobe_result): return {} for element_type in mykrobe_result: - if mykrobe_result[element_type]["predict"].upper() == "R": - resistant.add(element_type) + if element_type["susceptibility"].upper() == "R": + resistant.add(element_type["drug"]) else: - susceptible.add(element_type) + susceptible.add(element_type["drug"]) return {"susceptible": list(susceptible), "resistant": list(resistant)} @@ -37,19 +37,26 @@ def _parse_mykrobe_amr_genes(mykrobe_result) -> Tuple[ResistanceGene, ...]: continue try: - depth = element_type["genes"].split(':')[-1] - coverage = element_type["genes"].split(':')[-2] - except KeyError: + depth = float(element_type["genes"].split(':')[-1]) + coverage = float(element_type["genes"].split(':')[-2]) + except AttributeError: depth = None coverage = None - + gene = ResistanceGene( gene_symbol=element_type["variants"].split("_")[0], accession=None, depth=depth, identity=None, coverage=coverage, - phenotypes=[element_type["drug"].lower()], + drugs=[element_type["drug"].lower()], + phenotypes=[ + PhenotypeInfo( + type=ElementType.AMR, + group=ElementType.AMR, + name=ElementType.AMR, + ) + ], element_type=ElementType.AMR, element_subtype=ElementAmrSubtype.AMR, ) @@ -95,36 +102,45 @@ def _parse_mykrobe_amr_variants(mykrobe_result) -> Tuple[ResistanceVariant, ...] for element_type in mykrobe_result: # skip non-resistance yeilding - if not mykrobe_result[element_type]["predict"].upper() == "R": + if not element_type["susceptibility"].upper() == "R": + continue + + if element_type["variants"] is not None: continue - hits = mykrobe_result[element_type]["called_by"] - for hit in hits: - if hits[hit]["variant"] is not None: - continue - - var_info = hit.split("-")[1] - _, ref_nt, alt_nt, position = get_mutation_type(var_info) - var_nom = hit.split("-")[0].split("_")[1] - var_type, *_ = get_mutation_type(var_nom) - variant = ResistanceVariant( - variant_type=var_type, - genes=[hit.split("_")[0]], - phenotypes=[element_type], - position=position, - ref_nt=ref_nt, - alt_nt=alt_nt, - depth=hits[hit]["info"]["coverage"]["alternate"]["median_depth"], - ref_database=None, - ref_id=None, - type=None, - change=var_nom, - nucleotide_change=None, - protein_change=None, - annotation=None, - drugs=None, - ) - results.append(variant) + try: + depth = float(element_type["genes"].split(':')[-1]) + except AttributeError: + depth = None + + var_info = element_type["variants"].split("-")[1] + _, ref_nt, alt_nt, position = get_mutation_type(var_info) + var_nom = element_type["variants"].split("-")[0].split("_")[1] + var_type, *_ = get_mutation_type(var_nom) + variant = ResistanceVariant( + variant_type=var_type, + genes=[element_type["variants"].split("_")[0]], + phenotypes=[ + PhenotypeInfo( + type=ElementType.AMR, + group=ElementType.AMR, + name=ElementType.AMR, + ) + ], + position=position, + ref_nt=ref_nt, + alt_nt=alt_nt, + depth=depth, + ref_database=None, + ref_id=None, + type=None, + change=var_nom, + nucleotide_change=None, + protein_change=None, + annotation=None, + drugs=[element_type["drug"].lower()], + ) + results.append(variant) return results @@ -133,11 +149,10 @@ def parse_mykrobe_amr_pred( ) -> ElementTypeResult | None: """Parse mykrobe resistance prediction results.""" LOG.info("Parsing mykrobe prediction") - pred = prediction["susceptibility"] resistance = ElementTypeResult( - phenotypes=_get_mykrobe_amr_sr_profie(pred), - genes=_parse_mykrobe_amr_genes(pred), - mutations=_parse_mykrobe_amr_variants(pred), + phenotypes=_get_mykrobe_amr_sr_profie(prediction), + genes=_parse_mykrobe_amr_genes(prediction), + mutations=_parse_mykrobe_amr_variants(prediction), ) # verify prediction result From 89c94d5deedce7b681e6695b9ae278614defea35 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Thu, 28 Dec 2023 23:14:24 +0100 Subject: [PATCH 03/29] Fix mykrobe phenotype model --- prp/models/phenotype.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/prp/models/phenotype.py b/prp/models/phenotype.py index 9e1403b..453e27a 100644 --- a/prp/models/phenotype.py +++ b/prp/models/phenotype.py @@ -90,6 +90,7 @@ class GeneBase(BaseModel): coverage: Optional[float] = None ref_start_pos: Optional[int] = None ref_end_pos: Optional[int] = None + drugs: Optional[List[Union[Dict,str]]] = None ref_gene_length: Optional[int] = Field( default=None, alias="target_length", @@ -175,7 +176,7 @@ class VariantBase(DatabaseReference): nucleotide_change: Optional[str] = None protein_change: Optional[str] = None annotation: Optional[List[Dict]] = None - drugs: Optional[List[Dict]] = None + drugs: Optional[List[Union[Dict,str]]] = None class ResistanceVariant(VariantBase): From 3183b1b6138b73bd39113a6f75e6ab13a349245e Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Thu, 28 Dec 2023 23:15:00 +0100 Subject: [PATCH 04/29] Fix read_csv in cli --- prp/cli.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/prp/cli.py b/prp/cli.py index 771c821..0392686 100644 --- a/prp/cli.py +++ b/prp/cli.py @@ -189,9 +189,10 @@ def create_bonsai_input( # mykrobe if mykrobe: LOG.info("Parse mykrobe results") - pred_res = pd.read_csv(mykrobe, quotechar='"', orient='records') + pred_res = pd.read_csv(mykrobe, quotechar='"') pred_res.columns.values[3] = "variants" pred_res.columns.values[4] = "genes" + pred_res = pred_res.to_dict(orient="records") # verify that sample id is in prediction result if not sample_id in pred_res[0]["sample"]: From d4369c571a90c6c28dd19b2c1960c4e2dbbcaa6f Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Thu, 28 Dec 2023 23:15:28 +0100 Subject: [PATCH 05/29] Update CHANGELOG.md --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6ffdf0..64a0fd9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,15 @@ -## [Unreleased] +## [0.3.0] ### Added + - Pytest for Mycobacterium tuberculosis + ### Fixed ### Changed + - Mykrobe output parser handles csv format instead of json + ## [0.2.0] ### Added From 8c44c99033177e2cb58960c8a229cd64539ef67b Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Fri, 29 Dec 2023 14:49:18 +0100 Subject: [PATCH 06/29] Fix PhenotypeInfo for tbprofiler --- prp/parse/phenotype/tbprofiler.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/prp/parse/phenotype/tbprofiler.py b/prp/parse/phenotype/tbprofiler.py index a8b22df..c6546d5 100644 --- a/prp/parse/phenotype/tbprofiler.py +++ b/prp/parse/phenotype/tbprofiler.py @@ -3,7 +3,7 @@ from typing import Any, Dict, Tuple from ...models.metadata import SoupVersions -from ...models.phenotype import ElementTypeResult +from ...models.phenotype import ElementTypeResult, ElementType, PhenotypeInfo from ...models.phenotype import PredictionSoftware as Software from ...models.phenotype import ResistanceVariant from ...models.sample import MethodIndex @@ -50,10 +50,17 @@ def _parse_tbprofiler_amr_variants(tbprofiler_result) -> Tuple[ResistanceVariant for hit in tbprofiler_result["dr_variants"]: var_type = "substitution" + variant = ResistanceVariant( variant_type=var_type, genes=[hit["gene"]], - phenotypes=hit["gene_associated_drugs"], + phenotypes=[ + PhenotypeInfo( + type=ElementType.AMR, + group=ElementType.AMR, + name=ElementType.AMR, + ) + ], position=int(hit["genome_pos"]), ref_nt=hit["ref"], alt_nt=hit["alt"], From b5459684686f0bb59917657bad7f997295762c04 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Fri, 29 Dec 2023 14:51:48 +0100 Subject: [PATCH 07/29] Set ref_aa & alt_aa to optional --- prp/models/phenotype.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prp/models/phenotype.py b/prp/models/phenotype.py index 453e27a..9685610 100644 --- a/prp/models/phenotype.py +++ b/prp/models/phenotype.py @@ -148,8 +148,8 @@ class VariantBase(DatabaseReference): position: int ref_nt: str alt_nt: str - ref_aa: str - alt_aa: str + ref_aa: Optional[str] = None + alt_aa: Optional[str] = None # prediction info depth: Optional[float] = None contig_id: Optional[str] = None From 3d0bcecb576434429166ad3ab8bba6cb491086e5 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Fri, 29 Dec 2023 14:54:32 +0100 Subject: [PATCH 08/29] Set coverage in LineageInformation to optional --- prp/models/typing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prp/models/typing.py b/prp/models/typing.py index 7082c4c..e8fa153 100644 --- a/prp/models/typing.py +++ b/prp/models/typing.py @@ -57,7 +57,7 @@ class LineageInformation(RWModel): rd: str | None = None fraction: float | None = None variant: str | None = None - coverage: Dict[str, Any] = None + coverage: Dict[str, Any] | None = None class ResultMlstBase(RWModel): From da7ae4ca14a9c800576d9828e164e915f073b1b0 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Fri, 29 Dec 2023 14:57:46 +0100 Subject: [PATCH 09/29] Fix mykrobe lineage csv parser --- prp/parse/typing.py | 43 ++++++++++++------------------------------- 1 file changed, 12 insertions(+), 31 deletions(-) diff --git a/prp/parse/typing.py b/prp/parse/typing.py index 5a4efa6..bbd8fa2 100644 --- a/prp/parse/typing.py +++ b/prp/parse/typing.py @@ -132,37 +132,18 @@ def parse_mykrobe_lineage_results(pred_res: dict, method) -> TypingResultLineage """Parse mykrobe results for lineage object.""" LOG.info("Parsing lineage results") lineages = [] - lineage_pred = pred_res["phylogenetics"]["lineage"] - if "calls_summary" in lineage_pred: - lineage_calls = lineage_pred["calls"] - sublin = list(lineage_calls)[0] - lineage_info = lineage_calls[sublin] - for lineage in lineage_info: - genotypes = list( - list(lineage_pred["calls_summary"].values())[0]["genotypes"].keys() - ) - main_lin = genotypes[0] - try: - variant = list(lineage_info[lineage].keys())[0] - except AttributeError: - variant = None - try: - coverage = lineage_info[lineage][variant]["info"]["coverage"][ - "alternate" - ] - except (KeyError, TypeError): - coverage = None - lin_array = LineageInformation( - lineage=lineage, variant=variant, coverage=coverage - ) - lineages.append(lin_array) - else: - genotypes = list(lineage_pred) - main_lin, sublin = genotypes[0], genotypes[0] - lin_array = LineageInformation( - lineage=genotypes[0], coverage=lineage_pred[genotypes[0]] - ) - lineages.append(lin_array) + for lineage in pred_res: + if not lineage["susceptibility"].upper() == "R": + continue + split_lin = lineage["lineage"].split('.') + main_lin = split_lin[0] + sublin = lineage["lineage"] + lin_idxs = lineage["lineage"].lstrip("lineage").split('.') + try: + coverage = float(lineage["genes"].split(':')[-2]) + except AttributeError: + coverage = None + lineages = [LineageInformation(lineage="lineage" + '.'.join(lin_idxs[:idx+1]), variant=lineage["variants"].split(':')[0], coverage=coverage) for idx in range(len(lin_idxs))] # cast to lineage object result_obj = TypingResultLineage( main_lin=main_lin, From 0b37a9563d91766f1d96b3f0fd6d8176570b1327 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Fri, 29 Dec 2023 15:55:24 +0100 Subject: [PATCH 10/29] Add Mtuberculosis test files --- tests/fixtures/__init__.py | 1 + tests/fixtures/mtuberculosis/__init__.py | 35 + .../fixtures/mtuberculosis/analysis_meta.json | 15 + tests/fixtures/mtuberculosis/bracken.out | 90 ++ tests/fixtures/mtuberculosis/bwa.qc | 20 + tests/fixtures/mtuberculosis/mykrobe.csv | 16 + tests/fixtures/mtuberculosis/quast.tsv | 2 + tests/fixtures/mtuberculosis/result.json | 568 +++++++ tests/fixtures/mtuberculosis/tbprofiler.json | 1335 +++++++++++++++++ 9 files changed, 2082 insertions(+) create mode 100644 tests/fixtures/mtuberculosis/__init__.py create mode 100644 tests/fixtures/mtuberculosis/analysis_meta.json create mode 100644 tests/fixtures/mtuberculosis/bracken.out create mode 100644 tests/fixtures/mtuberculosis/bwa.qc create mode 100644 tests/fixtures/mtuberculosis/mykrobe.csv create mode 100644 tests/fixtures/mtuberculosis/quast.tsv create mode 100644 tests/fixtures/mtuberculosis/result.json create mode 100644 tests/fixtures/mtuberculosis/tbprofiler.json diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index 84b902f..44a6771 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -2,3 +2,4 @@ from .ecoli import * from .saureus import * +from .mtuberculosis import * diff --git a/tests/fixtures/mtuberculosis/__init__.py b/tests/fixtures/mtuberculosis/__init__.py new file mode 100644 index 0000000..a878644 --- /dev/null +++ b/tests/fixtures/mtuberculosis/__init__.py @@ -0,0 +1,35 @@ +"""mtuberculosis input data fixutres.""" + +import pytest + +from ..fixtures import data_path + + +@pytest.fixture() +def mtuberculosis_analysis_meta_path(data_path): + return str(data_path.joinpath("mtuberculosis", "analysis_meta.json")) + + +@pytest.fixture() +def mtuberculosis_bracken_path(data_path): + return str(data_path.joinpath("mtuberculosis", "bracken.out")) + + +@pytest.fixture() +def mtuberculosis_bwa_path(data_path): + return str(data_path.joinpath("mtuberculosis", "bwa.qc")) + + +@pytest.fixture() +def mtuberculosis_mykrobe_path(data_path): + return str(data_path.joinpath("mtuberculosis", "mykrobe.csv")) + + +@pytest.fixture() +def mtuberculosis_quast_path(data_path): + return str(data_path.joinpath("mtuberculosis", "quast.tsv")) + + +@pytest.fixture() +def mtuberculosis_tbprofiler_path(data_path): + return str(data_path.joinpath("mtuberculosis", "tbprofiler.json")) diff --git a/tests/fixtures/mtuberculosis/analysis_meta.json b/tests/fixtures/mtuberculosis/analysis_meta.json new file mode 100644 index 0000000..3943167 --- /dev/null +++ b/tests/fixtures/mtuberculosis/analysis_meta.json @@ -0,0 +1,15 @@ +{ + "workflow_name": "shrivelled_bassi", + "sample_name": "test_mtuberculosis_1", + "sequencing_platform": "illumina", + "sequencing_type": "PE", + "date": "2023-12-27T13:52:15.507160+01:00", + "pipeline": "main.nf", + "version": "1.0.0", + "commit": "null", + "configuration_files": [ + "/fs1/pipelines/jasen/nextflow.config" + ], + "analysis_profile": "mycobacterium_tuberculosis", + "command": "nextflow run /fs1/pipelines/jasen/main.nf -profile mycobacterium_tuberculosis --csv /fs1/ryan/pipelines/jasen/test-csvs/prp_test_samples_mtuberculosis.csv --queue high -with-singularity /fs1/pipelines/jasen/container/jasen_2023_02-28.sif -with-report /fs1/nextflow//reports/mtuberculosis-val.jasen-mtuberculosis-dev.report.html -with-trace /fs1/nextflow//reports/mtuberculosis-val.jasen-mtuberculosis-dev.trace.txt -with-timeline /fs1/nextflow//reports/mtuberculosis-val.jasen-mtuberculosis-dev.timeline.html -work-dir /fs1/nextflow//mtuberculosis-val.jasen-mtuberculosis-dev -resume" +} \ No newline at end of file diff --git a/tests/fixtures/mtuberculosis/bracken.out b/tests/fixtures/mtuberculosis/bracken.out new file mode 100644 index 0000000..3aa938e --- /dev/null +++ b/tests/fixtures/mtuberculosis/bracken.out @@ -0,0 +1,90 @@ +name taxonomy_id taxonomy_lvl kraken_assigned_reads added_reads new_est_reads fraction_total_reads +Mycobacterium tuberculosis 1773 S 308488 5518842 5827330 0.98023 +Mycobacterium canettii 78331 S 28588 6553 35141 0.00591 +Mycobacterium colombiense 339268 S 1157 3 1160 0.00020 +Mycobacterium avium 1764 S 1090 448 1538 0.00026 +Mycobacterium marseillense 701042 S 801 19 820 0.00014 +Mycobacterium intracellulare 1767 S 295 858 1153 0.00019 +Mycobacterium lepraemurium 64667 S 289 56 345 0.00006 +Mycobacterium chimaera 222805 S 286 394 680 0.00011 +Mycobacterium paraintracellulare 1138383 S 19 83 102 0.00002 +Mycobacterium sp. JS623 212767 S 714 0 714 0.00012 +Mycobacterium sp. KMS 189918 S 441 14 455 0.00008 +Mycobacterium sp. MS1601 1936029 S 415 1 416 0.00007 +Mycobacterium sp. THAF192 2587868 S 387 3 390 0.00007 +Mycobacterium sp. YC-RL4 1682113 S 362 4 366 0.00006 +Mycobacterium sp. ELW1 1547487 S 361 7 368 0.00006 +Mycobacterium sp. DL90 2487344 S 358 1 359 0.00006 +Mycobacterium sp. EPa45 1545728 S 341 8 349 0.00006 +Mycobacterium sp. 3/86Rv 2041046 S 322 35106 35428 0.00596 +Mycobacterium sp. WY10 1920667 S 74 261 335 0.00006 +Mycobacterium sp. PYR15 2051552 S 72 254 326 0.00005 +Mycobacterium sp. MOTT36Y 1168287 S 22 29 51 0.00001 +Mycobacterium sp. QIA-37 1561223 S 15 5 20 0.00000 +Mycobacterium kansasii 1768 S 2832 10 2842 0.00048 +Mycobacterium haemophilum 29311 S 2260 0 2260 0.00038 +Mycobacterium basiliense 2094119 S 2158 0 2158 0.00036 +Mycobacterium paragordonae 1389713 S 1758 3 1761 0.00030 +Mycobacterium shigaense 722731 S 1348 0 1348 0.00023 +Mycobacterium leprae 1769 S 632 0 632 0.00011 +Mycobacterium marinum 1781 S 601 61 662 0.00011 +Mycobacterium dioxanotrophicus 482462 S 431 2 433 0.00007 +Mycobacterium grossiae 1552759 S 387 2 389 0.00007 +Mycobacterium ulcerans 1809 S 159 60 219 0.00004 +Mycobacterium liflandii 261524 S 43 40 83 0.00001 +Mycobacterium pseudoshottsii 265949 S 38 25 63 0.00001 +Mycolicibacterium aurum 1791 S 19 0 19 0.00000 +Mycolicibacterium fortuitum 1766 S 18 0 18 0.00000 +Mycolicibacterium hassiacum 46351 S 18 7 25 0.00000 +Mycolicibacterium gilvum 1804 S 17 2 19 0.00000 +Mycolicibacterium thermoresistibile 1797 S 16 2 18 0.00000 +Mycolicibacterium smegmatis 1772 S 15 1 16 0.00000 +Mycolicibacterium rhodesiae 36814 S 15 0 15 0.00000 +Mycolicibacterium chitae 1792 S 14 0 14 0.00000 +Mycolicibacterium goodii 134601 S 12 1 13 0.00000 +Mycolicibacterium vanbaalenii 110539 S 11 2 13 0.00000 +Mycolicibacter terrae 1788 S 29 0 29 0.00000 +Mycolicibacter sinensis 875328 S 21 0 21 0.00000 +Mycobacteroides abscessus 36809 S 15 2 17 0.00000 +Rhodococcus opacus 37919 S 12 27 39 0.00001 +Corynebacterium sphenisci 191493 S 12 15 27 0.00000 +Rothia mucilaginosa 43675 S 6301 167 6468 0.00109 +Rothia dentocariosa 2047 S 32 1 33 0.00001 +Rothia aeria 172042 S 23 1 24 0.00000 +Schaalia odontolytica 1660 S 853 22 875 0.00015 +Schaalia meyeri 52773 S 46 1 47 0.00001 +Actinomyces pacaensis 1852377 S 172 2 174 0.00003 +Actinomyces hongkongensis 178339 S 15 6 21 0.00000 +Streptomyces sp. ICC4 2099584 S 353 85 438 0.00007 +Streptomyces sp. ICC1 2099583 S 181 39 220 0.00004 +Streptomyces venezuelae 54571 S 14 0 14 0.00000 +Pseudopropionibacterium propionicum 1750 S 143 2 145 0.00002 +Cutibacterium acnes 1747 S 24 1 25 0.00000 +Nocardioides sp. SB3-45 2558918 S 10 1 11 0.00000 +Nocardioides dokdonensis 450734 S 11 2 13 0.00000 +Pseudonocardia sp. HH130630-07 1690815 S 12 13 25 0.00000 +Olsenella uli 133926 S 43 0 43 0.00001 +Olsenella sp. oral taxon 807 712411 S 40 0 40 0.00001 +Streptococcus sp. NPS 308 1902136 S 877 153 1030 0.00017 +Streptococcus sp. 1643 2576376 S 120 53 173 0.00003 +Streptococcus sp. ChDC B345 1433513 S 51 12 63 0.00001 +Streptococcus sp. oral taxon 064 712624 S 46 60 106 0.00002 +Streptococcus sp. 116-D4 2598453 S 45 5 50 0.00001 +Streptococcus sp. oral taxon 431 712633 S 38 2 40 0.00001 +Streptococcus oralis 1303 S 793 238 1031 0.00017 +Streptococcus mitis 28037 S 371 68 439 0.00007 +Streptococcus pneumoniae 1313 S 209 155 364 0.00006 +Streptococcus australis 113107 S 137 25 162 0.00003 +Streptococcus sanguinis 1305 S 104 3 107 0.00002 +Streptococcus gordonii 1302 S 60 2 62 0.00001 +Streptococcus pseudopneumoniae 257758 S 40 14 54 0.00001 +Streptococcus salivarius 1304 S 36 22 58 0.00001 +Streptococcus cristatus 45634 S 10 0 10 0.00000 +Staphylococcus epidermidis 1282 S 16 5 21 0.00000 +Staphylococcus aureus 1280 S 13 12 25 0.00000 +Veillonella dispar 39778 S 154 20 174 0.00003 +Veillonella parvula 29466 S 79 6 85 0.00001 +Veillonella atypica 39777 S 17 1 18 0.00000 +Sorangium cellulosum 56 S 13 50 63 0.00001 +Leptospira santarosai 28183 S 16 0 16 0.00000 +Homo sapiens 9606 S 11046 22 11068 0.00186 diff --git a/tests/fixtures/mtuberculosis/bwa.qc b/tests/fixtures/mtuberculosis/bwa.qc new file mode 100644 index 0000000..10cb802 --- /dev/null +++ b/tests/fixtures/mtuberculosis/bwa.qc @@ -0,0 +1,20 @@ +{ + "ins_size_dev" : "213", + "ins_size" : "264", + "iqr_median" : 0.292929292929293, + "mean_cov" : 199.063794572237, + "sample_id" : "test_mtuberculosis_1", + "pct_above_x" : { + "10" : 99.9898416844028, + "250" : 12.4392427641267, + "1" : 100, + "1000" : 0, + "30" : 99.8976462131901, + "500" : 0.0275675668101908, + "100" : 98.3363831632575 + }, + "dup_reads" : "0", + "dup_pct" : 0, + "tot_reads" : "6136081", + "mapped_reads" : "6022294" +} diff --git a/tests/fixtures/mtuberculosis/mykrobe.csv b/tests/fixtures/mtuberculosis/mykrobe.csv new file mode 100644 index 0000000..417c209 --- /dev/null +++ b/tests/fixtures/mtuberculosis/mykrobe.csv @@ -0,0 +1,16 @@ +"sample","drug","susceptibility","variants (dna_variant-AA_variant:ref_kmer_count:alt_kmer_count:conf) [use --format json for more info]","genes (prot_mut-ref_mut:percent_covg:depth) [use --format json for more info]","mykrobe_version","files","probe_sets","genotype_model","kmer_size","phylo_group","species","lineage","phylo_group_per_covg","species_per_covg","lineage_per_covg","phylo_group_depth","species_depth","lineage_depth" +"test_mtuberculosis_1","Amikacin","S","","","v0.12.2","mtuberculosis_test_1_1.fastq.gz;mtuberculosis_test_1_2.fastq.gz","/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-species-170421.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-probe-set-202206.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb.lineage.20200930.probes.fa.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-lineage-bcg-rd1.fasta.gz","kmer_count","21","Mycobacterium_tuberculosis_complex","Mycobacterium_tuberculosis","lineage2.2.4","99.685","98.25","NA","161.0","155","NA" +"test_mtuberculosis_1","Capreomycin","S","","","v0.12.2","mtuberculosis_test_1_1.fastq.gz;mtuberculosis_test_1_2.fastq.gz","/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-species-170421.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-probe-set-202206.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb.lineage.20200930.probes.fa.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-lineage-bcg-rd1.fasta.gz","kmer_count","21","Mycobacterium_tuberculosis_complex","Mycobacterium_tuberculosis","lineage2.2.4","99.685","98.25","NA","161.0","155","NA" +"test_mtuberculosis_1","Ciprofloxacin","S","","","v0.12.2","mtuberculosis_test_1_1.fastq.gz;mtuberculosis_test_1_2.fastq.gz","/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-species-170421.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-probe-set-202206.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb.lineage.20200930.probes.fa.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-lineage-bcg-rd1.fasta.gz","kmer_count","21","Mycobacterium_tuberculosis_complex","Mycobacterium_tuberculosis","lineage2.2.4","99.685","98.25","NA","161.0","155","NA" +"test_mtuberculosis_1","Delamanid","S","","","v0.12.2","mtuberculosis_test_1_1.fastq.gz;mtuberculosis_test_1_2.fastq.gz","/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-species-170421.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-probe-set-202206.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb.lineage.20200930.probes.fa.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-lineage-bcg-rd1.fasta.gz","kmer_count","21","Mycobacterium_tuberculosis_complex","Mycobacterium_tuberculosis","lineage2.2.4","99.685","98.25","NA","161.0","155","NA" +"test_mtuberculosis_1","Ethambutol","S","","","v0.12.2","mtuberculosis_test_1_1.fastq.gz;mtuberculosis_test_1_2.fastq.gz","/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-species-170421.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-probe-set-202206.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb.lineage.20200930.probes.fa.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-lineage-bcg-rd1.fasta.gz","kmer_count","21","Mycobacterium_tuberculosis_complex","Mycobacterium_tuberculosis","lineage2.2.4","99.685","98.25","NA","161.0","155","NA" +"test_mtuberculosis_1","Ethionamide","S","","","v0.12.2","mtuberculosis_test_1_1.fastq.gz;mtuberculosis_test_1_2.fastq.gz","/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-species-170421.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-probe-set-202206.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb.lineage.20200930.probes.fa.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-lineage-bcg-rd1.fasta.gz","kmer_count","21","Mycobacterium_tuberculosis_complex","Mycobacterium_tuberculosis","lineage2.2.4","99.685","98.25","NA","161.0","155","NA" +"test_mtuberculosis_1","Isoniazid","R","katG_S315T-GCT2155167GGT:0:2624:17362","","v0.12.2","mtuberculosis_test_1_1.fastq.gz;mtuberculosis_test_1_2.fastq.gz","/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-species-170421.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-probe-set-202206.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb.lineage.20200930.probes.fa.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-lineage-bcg-rd1.fasta.gz","kmer_count","21","Mycobacterium_tuberculosis_complex","Mycobacterium_tuberculosis","lineage2.2.4","99.685","98.25","NA","161.0","155","NA" +"test_mtuberculosis_1","Kanamycin","S","","","v0.12.2","mtuberculosis_test_1_1.fastq.gz;mtuberculosis_test_1_2.fastq.gz","/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-species-170421.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-probe-set-202206.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb.lineage.20200930.probes.fa.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-lineage-bcg-rd1.fasta.gz","kmer_count","21","Mycobacterium_tuberculosis_complex","Mycobacterium_tuberculosis","lineage2.2.4","99.685","98.25","NA","161.0","155","NA" +"test_mtuberculosis_1","Levofloxacin","S","","","v0.12.2","mtuberculosis_test_1_1.fastq.gz;mtuberculosis_test_1_2.fastq.gz","/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-species-170421.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-probe-set-202206.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb.lineage.20200930.probes.fa.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-lineage-bcg-rd1.fasta.gz","kmer_count","21","Mycobacterium_tuberculosis_complex","Mycobacterium_tuberculosis","lineage2.2.4","99.685","98.25","NA","161.0","155","NA" +"test_mtuberculosis_1","Linezolid","S","","","v0.12.2","mtuberculosis_test_1_1.fastq.gz;mtuberculosis_test_1_2.fastq.gz","/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-species-170421.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-probe-set-202206.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb.lineage.20200930.probes.fa.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-lineage-bcg-rd1.fasta.gz","kmer_count","21","Mycobacterium_tuberculosis_complex","Mycobacterium_tuberculosis","lineage2.2.4","99.685","98.25","NA","161.0","155","NA" +"test_mtuberculosis_1","Moxifloxacin","S","","","v0.12.2","mtuberculosis_test_1_1.fastq.gz;mtuberculosis_test_1_2.fastq.gz","/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-species-170421.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-probe-set-202206.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb.lineage.20200930.probes.fa.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-lineage-bcg-rd1.fasta.gz","kmer_count","21","Mycobacterium_tuberculosis_complex","Mycobacterium_tuberculosis","lineage2.2.4","99.685","98.25","NA","161.0","155","NA" +"test_mtuberculosis_1","Ofloxacin","S","","","v0.12.2","mtuberculosis_test_1_1.fastq.gz;mtuberculosis_test_1_2.fastq.gz","/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-species-170421.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-probe-set-202206.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb.lineage.20200930.probes.fa.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-lineage-bcg-rd1.fasta.gz","kmer_count","21","Mycobacterium_tuberculosis_complex","Mycobacterium_tuberculosis","lineage2.2.4","99.685","98.25","NA","161.0","155","NA" +"test_mtuberculosis_1","Pyrazinamide","S","","","v0.12.2","mtuberculosis_test_1_1.fastq.gz;mtuberculosis_test_1_2.fastq.gz","/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-species-170421.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-probe-set-202206.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb.lineage.20200930.probes.fa.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-lineage-bcg-rd1.fasta.gz","kmer_count","21","Mycobacterium_tuberculosis_complex","Mycobacterium_tuberculosis","lineage2.2.4","99.685","98.25","NA","161.0","155","NA" +"test_mtuberculosis_1","Rifampicin","R","rpoB_TCATGGA1298T-TCATGGA761104T:152:2108:14397;rpoB_TTCATGGA1297TT-TTCATGGA761103TT:303:1978:13279;rpoB_CTGAGCCAATTCATGGACCAGAACAACCC1288CTGAGCCAATTCCAGAACAACCC-CTGAGCCAATTCATGGACCAGAACAACCC761094CTGAGCCAATTCCAGAACAACCC:1:2505:16677","","v0.12.2","mtuberculosis_test_1_1.fastq.gz;mtuberculosis_test_1_2.fastq.gz","/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-species-170421.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-probe-set-202206.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb.lineage.20200930.probes.fa.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-lineage-bcg-rd1.fasta.gz","kmer_count","21","Mycobacterium_tuberculosis_complex","Mycobacterium_tuberculosis","lineage2.2.4","99.685","98.25","NA","161.0","155","NA" +"test_mtuberculosis_1","Streptomycin","R","rpsL_K88R-AAG781821AGG:17:3759:16769;gid_E173*-CTC4407684CTA:168:3101:18476","","v0.12.2","mtuberculosis_test_1_1.fastq.gz;mtuberculosis_test_1_2.fastq.gz","/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-species-170421.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-probe-set-202206.fasta.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb.lineage.20200930.probes.fa.gz;/usr/local/lib/python3.9/site-packages/mykrobe/data/tb/tb-lineage-bcg-rd1.fasta.gz","kmer_count","21","Mycobacterium_tuberculosis_complex","Mycobacterium_tuberculosis","lineage2.2.4","99.685","98.25","NA","161.0","155","NA" diff --git a/tests/fixtures/mtuberculosis/quast.tsv b/tests/fixtures/mtuberculosis/quast.tsv new file mode 100644 index 0000000..73b9a63 --- /dev/null +++ b/tests/fixtures/mtuberculosis/quast.tsv @@ -0,0 +1,2 @@ +Assembly # contigs (>= 0 bp) # contigs (>= 1000 bp) # contigs (>= 5000 bp) # contigs (>= 10000 bp) # contigs (>= 25000 bp) # contigs (>= 50000 bp) Total length (>= 0 bp) Total length (>= 1000 bp) Total length (>= 5000 bp) Total length (>= 10000 bp) Total length (>= 25000 bp) Total length (>= 50000 bp) # contigs Largest contig Total length Reference length GC (%) Reference GC (%) N50 NG50 N90 NG90 auN auNG L50 LG50 L90 LG90 # misassemblies # misassembled contigs Misassembled contigs length # local misassemblies # scaffold gap ext. mis. # scaffold gap loc. mis. # unaligned mis. contigs # unaligned contigs Unaligned length Genome fraction (%) Duplication ratio # N's per 100 kbp # mismatches per 100 kbp # indels per 100 kbp Largest alignment Total aligned length NA50 NGA50 NA90 NGA90 auNA auNGA LA50 LGA50 LA90 LGA90 +test_mtuberculosis_1 293 231 163 122 61 16 4291275 4257007 4078569 3787721 2709866 1133996 262 121726 4279497 4411532 65.44 65.61 34433 32559 8674 6823 39097.1 37927.0 42 44 129 145 16 16 368608 5 0 0 0 3 + 6 part 19873 96.508 1.000 0.00 35.61 5.38 121726 4257282 32559 32050 7490 5904 37838.5 36706.0 42 44 138 155 diff --git a/tests/fixtures/mtuberculosis/result.json b/tests/fixtures/mtuberculosis/result.json new file mode 100644 index 0000000..1c41bf6 --- /dev/null +++ b/tests/fixtures/mtuberculosis/result.json @@ -0,0 +1,568 @@ +{ + "sample_id": "test_mtuberculosis_1", + "run_metadata": { + "run": { + "pipeline": "main.nf", + "version": "1.0.0", + "commit": "null", + "analysis_profile": "mycobacterium_tuberculosis", + "configuration_files": [ + "/fs1/pipelines/jasen/nextflow.config" + ], + "workflow_name": "shrivelled_bassi", + "sample_name": "test_mtuberculosis_1", + "sequencing_platform": "illumina", + "sequencing_type": "PE", + "command": "nextflow run /fs1/pipelines/jasen/main.nf -profile mycobacterium_tuberculosis --csv /fs1/ryan/pipelines/jasen/test-csvs/prp_test_samples_mtuberculosis.csv --queue high -with-singularity /fs1/pipelines/jasen/container/jasen_2023_02-28.sif -with-report /fs1/nextflow//reports/mtuberculosis-val.jasen-mtuberculosis-dev.report.html -with-trace /fs1/nextflow//reports/mtuberculosis-val.jasen-mtuberculosis-dev.trace.txt -with-timeline /fs1/nextflow//reports/mtuberculosis-val.jasen-mtuberculosis-dev.timeline.html -work-dir /fs1/nextflow//mtuberculosis-val.jasen-mtuberculosis-dev -resume", + "date": "2023-12-27T13:52:15.507160+01:00" + }, + "databases": [ + { + "name": "mykrobe-predictor", + "version": "v0.12.2", + "type": "database" + }, + { + "name": "tbdb", + "version": "c2fb9a2", + "type": "database" + } + ] + }, + "qc": [ + { + "software": "quast", + "version": null, + "result": { + "total_length": 4279497, + "reference_length": 4411532, + "largest_contig": 121726, + "n_contigs": 262, + "n50": 34433, + "assembly_gc": 65.44, + "reference_gc": 65.61, + "duplication_ratio": 1.0 + } + }, + { + "software": "postalignqc", + "version": null, + "result": { + "ins_size": 264, + "ins_size_dev": 213, + "mean_cov": 199, + "pct_above_x": { + "10": 99.9898416844028, + "250": 12.4392427641267, + "1": 100.0, + "1000": 0.0, + "30": 99.8976462131901, + "500": 0.0275675668101908, + "100": 98.3363831632575 + }, + "mapped_reads": 6022294, + "tot_reads": 6136081, + "iqr_median": 0.292929292929293, + "dup_pct": 0.0, + "dup_reads": 0 + } + } + ], + "species_prediction": [ + { + "scientific_name": "Mycobacterium tuberculosis", + "taxonomy_id": 1773, + "taxonomy_lvl": "species", + "kraken_assigned_reads": 308488, + "added_reads": 5518842, + "fraction_total_reads": 0.98023 + }, + { + "scientific_name": "Mycobacterium sp. 3/86Rv", + "taxonomy_id": 2041046, + "taxonomy_lvl": "species", + "kraken_assigned_reads": 322, + "added_reads": 35106, + "fraction_total_reads": 0.00596 + }, + { + "scientific_name": "Mycobacterium canettii", + "taxonomy_id": 78331, + "taxonomy_lvl": "species", + "kraken_assigned_reads": 28588, + "added_reads": 6553, + "fraction_total_reads": 0.00591 + }, + { + "scientific_name": "Homo sapiens", + "taxonomy_id": 9606, + "taxonomy_lvl": "species", + "kraken_assigned_reads": 11046, + "added_reads": 22, + "fraction_total_reads": 0.00186 + }, + { + "scientific_name": "Rothia mucilaginosa", + "taxonomy_id": 43675, + "taxonomy_lvl": "species", + "kraken_assigned_reads": 6301, + "added_reads": 167, + "fraction_total_reads": 0.00109 + } + ], + "schema_version": 1, + "typing_result": [ + { + "type": "lineage", + "software": "mykrobe", + "result": { + "lineages": [ + { + "lin": null, + "family": null, + "spoligotype": null, + "rd": null, + "fraction": null, + "variant": "rpsL_K88R-AAG781821AGG", + "coverage": null + }, + { + "lin": null, + "family": null, + "spoligotype": null, + "rd": null, + "fraction": null, + "variant": "rpsL_K88R-AAG781821AGG", + "coverage": null + }, + { + "lin": null, + "family": null, + "spoligotype": null, + "rd": null, + "fraction": null, + "variant": "rpsL_K88R-AAG781821AGG", + "coverage": null + } + ], + "main_lin": "lineage2", + "sublin": "lineage2.2.4" + } + }, + { + "type": "lineage", + "software": "tbprofiler", + "result": { + "lineages": [ + { + "lin": "lineage2", + "family": "East-Asian", + "spoligotype": "Beijing", + "rd": "RD105", + "fraction": null, + "variant": null, + "coverage": null + }, + { + "lin": "lineage2.2", + "family": "East-Asian (Beijing)", + "spoligotype": "Beijing-RD207", + "rd": "RD105;RD207", + "fraction": null, + "variant": null, + "coverage": null + }, + { + "lin": "lineage2.2.1", + "family": "East-Asian (Beijing)", + "spoligotype": "Beijing-RD181", + "rd": "RD105;RD207;RD181", + "fraction": null, + "variant": null, + "coverage": null + } + ], + "main_lin": "lineage2", + "sublin": "lineage2.2.1" + } + } + ], + "element_type_result": [ + { + "type": "AMR", + "software": "mykrobe", + "result": { + "phenotypes": { + "susceptible": [ + "Kanamycin", + "Ethambutol", + "Levofloxacin", + "Ethionamide", + "Ciprofloxacin", + "Pyrazinamide", + "Ofloxacin", + "Delamanid", + "Capreomycin", + "Linezolid", + "Amikacin", + "Moxifloxacin" + ], + "resistant": [ + "Rifampicin", + "Streptomycin", + "Isoniazid" + ] + }, + "genes": [ + { + "ref_database": null, + "ref_id": null, + "accession": null, + "depth": null, + "identity": null, + "coverage": null, + "ref_start_pos": null, + "ref_end_pos": null, + "drugs": [ + "isoniazid" + ], + "ref_gene_length": null, + "alignment_length": null, + "contig_id": null, + "gene_symbol": "katG", + "sequence_name": null, + "ass_start_pos": null, + "ass_end_pos": null, + "strand": null, + "element_type": "AMR", + "element_subtype": "AMR", + "res_class": null, + "res_subclass": null, + "method": null, + "close_seq_name": null, + "phenotypes": [ + { + "type": "AMR", + "group": "AMR", + "name": "AMR" + } + ] + }, + { + "ref_database": null, + "ref_id": null, + "accession": null, + "depth": null, + "identity": null, + "coverage": null, + "ref_start_pos": null, + "ref_end_pos": null, + "drugs": [ + "rifampicin" + ], + "ref_gene_length": null, + "alignment_length": null, + "contig_id": null, + "gene_symbol": "rpoB", + "sequence_name": null, + "ass_start_pos": null, + "ass_end_pos": null, + "strand": null, + "element_type": "AMR", + "element_subtype": "AMR", + "res_class": null, + "res_subclass": null, + "method": null, + "close_seq_name": null, + "phenotypes": [ + { + "type": "AMR", + "group": "AMR", + "name": "AMR" + } + ] + }, + { + "ref_database": null, + "ref_id": null, + "accession": null, + "depth": null, + "identity": null, + "coverage": null, + "ref_start_pos": null, + "ref_end_pos": null, + "drugs": [ + "streptomycin" + ], + "ref_gene_length": null, + "alignment_length": null, + "contig_id": null, + "gene_symbol": "rpsL", + "sequence_name": null, + "ass_start_pos": null, + "ass_end_pos": null, + "strand": null, + "element_type": "AMR", + "element_subtype": "AMR", + "res_class": null, + "res_subclass": null, + "method": null, + "close_seq_name": null, + "phenotypes": [ + { + "type": "AMR", + "group": "AMR", + "name": "AMR" + } + ] + } + ], + "mutations": [] + } + }, + { + "type": "AMR", + "software": "tbprofiler", + "result": { + "phenotypes": { + "susceptible": [ + "ofloxacin", + "moxifloxacin", + "delamanid", + "kanamycin", + "amikacin", + "ethambutol", + "ethionamide", + "ciprofloxacin", + "levofloxacin", + "pyrazinamide", + "linezolid", + "capreomycin" + ], + "resistant": [ + "streptomycin", + "isoniazid", + "rifampicin" + ] + }, + "genes": [], + "mutations": [ + { + "ref_database": "tbdb", + "ref_id": null, + "variant_type": "substitution", + "genes": [ + "rpoB" + ], + "position": 761104, + "ref_nt": "TCATGGA", + "alt_nt": "T", + "ref_aa": null, + "alt_aa": null, + "depth": 151.0, + "contig_id": null, + "gene_symbol": null, + "sequence_name": null, + "ass_start_pos": null, + "ass_end_pos": null, + "strand": null, + "element_type": null, + "element_subtype": null, + "target_length": null, + "res_class": null, + "res_subclass": null, + "method": null, + "close_seq_name": null, + "type": "conservative_inframe_deletion", + "change": null, + "nucleotide_change": "c.1300_1305delATGGAC", + "protein_change": "p.Met434_Asp435del", + "annotation": [ + { + "type": "who_confidence", + "drug": "rifampicin", + "who_confidence": "Assoc w R - interim" + } + ], + "drugs": [ + { + "type": "drug", + "drug": "rifampicin", + "literature": "https://www.who.int/publications/i/item/9789240028173", + "confers": "resistance", + "who confidence": "Assoc w R - interim" + } + ], + "phenotypes": [ + { + "type": "AMR", + "group": "AMR", + "name": "AMR" + } + ] + }, + { + "ref_database": "tbdb", + "ref_id": null, + "variant_type": "substitution", + "genes": [ + "rpsL" + ], + "position": 781822, + "ref_nt": "A", + "alt_nt": "G", + "ref_aa": null, + "alt_aa": null, + "depth": 236.0, + "contig_id": null, + "gene_symbol": null, + "sequence_name": null, + "ass_start_pos": null, + "ass_end_pos": null, + "strand": null, + "element_type": null, + "element_subtype": null, + "target_length": null, + "res_class": null, + "res_subclass": null, + "method": null, + "close_seq_name": null, + "type": "missense_variant", + "change": null, + "nucleotide_change": "c.263A>G", + "protein_change": "p.Lys88Arg", + "annotation": [ + { + "type": "who_confidence", + "drug": "streptomycin", + "who_confidence": "Assoc w R" + } + ], + "drugs": [ + { + "type": "drug", + "drug": "streptomycin", + "confers": "resistance", + "who confidence": "Assoc w R" + } + ], + "phenotypes": [ + { + "type": "AMR", + "group": "AMR", + "name": "AMR" + } + ] + }, + { + "ref_database": "tbdb", + "ref_id": null, + "variant_type": "substitution", + "genes": [ + "katG" + ], + "position": 2155168, + "ref_nt": "C", + "alt_nt": "G", + "ref_aa": null, + "alt_aa": null, + "depth": 152.0, + "contig_id": null, + "gene_symbol": null, + "sequence_name": null, + "ass_start_pos": null, + "ass_end_pos": null, + "strand": null, + "element_type": null, + "element_subtype": null, + "target_length": null, + "res_class": null, + "res_subclass": null, + "method": null, + "close_seq_name": null, + "type": "missense_variant", + "change": null, + "nucleotide_change": "c.944G>C", + "protein_change": "p.Ser315Thr", + "annotation": [ + { + "type": "who_confidence", + "drug": "isoniazid", + "who_confidence": "Assoc w R" + } + ], + "drugs": [ + { + "type": "drug", + "drug": "isoniazid", + "confers": "resistance", + "who confidence": "Assoc w R" + } + ], + "phenotypes": [ + { + "type": "AMR", + "group": "AMR", + "name": "AMR" + } + ] + }, + { + "ref_database": "tbdb", + "ref_id": null, + "variant_type": "substitution", + "genes": [ + "gid" + ], + "position": 4407686, + "ref_nt": "C", + "alt_nt": "A", + "ref_aa": null, + "alt_aa": null, + "depth": 221.0, + "contig_id": null, + "gene_symbol": null, + "sequence_name": null, + "ass_start_pos": null, + "ass_end_pos": null, + "strand": null, + "element_type": null, + "element_subtype": null, + "target_length": null, + "res_class": null, + "res_subclass": null, + "method": null, + "close_seq_name": null, + "type": "stop_gained", + "change": null, + "nucleotide_change": "c.517G>T", + "protein_change": "p.Glu173*", + "annotation": [ + { + "type": "who_confidence", + "drug": "streptomycin", + "who_confidence": "Assoc w R - interim" + } + ], + "drugs": [ + { + "type": "drug", + "drug": "streptomycin", + "literature": "https://www.who.int/publications/i/item/9789240028173", + "confers": "resistance", + "who confidence": "Assoc w R - interim" + } + ], + "phenotypes": [ + { + "type": "AMR", + "group": "AMR", + "name": "AMR" + } + ] + } + ] + } + } + ] +} \ No newline at end of file diff --git a/tests/fixtures/mtuberculosis/tbprofiler.json b/tests/fixtures/mtuberculosis/tbprofiler.json new file mode 100644 index 0000000..4132b99 --- /dev/null +++ b/tests/fixtures/mtuberculosis/tbprofiler.json @@ -0,0 +1,1335 @@ +{ + "id": "test_mtuberculosis_1", + "tbprofiler_version": "4.4.2", + "qc": { + "pct_reads_mapped": 98.26, + "num_reads_mapped": 6004294, + "median_coverage": 194, + "gene_coverage": [ + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv0005", "gene": "gyrB" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv0006", "gene": "gyrA" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv0407", "gene": "fgd1" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv0486", "gene": "mshA" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv0529", "gene": "ccsA" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv0667", "gene": "rpoB" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv0668", "gene": "rpoC" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv0676c", "gene": "mmpL5" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv0677c", "gene": "mmpS5" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv0678", "gene": "mmpR5" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv0682", "gene": "rpsL" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv0701", "gene": "rplC" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv1173", "gene": "fbiC" }, + { + "fraction": 0.0, + "cutoff": 0, + "locus_tag": "Rv1258c", + "gene": "Rv1258c" + }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv1267c", "gene": "embR" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv1305", "gene": "atpE" }, + { + "fraction": 0.0, + "cutoff": 0, + "locus_tag": "EBG00000313325", + "gene": "rrs" + }, + { + "fraction": 0.0, + "cutoff": 0, + "locus_tag": "EBG00000313339", + "gene": "rrl" + }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv1483", "gene": "fabG1" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv1484", "gene": "inhA" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv1630", "gene": "rpsA" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv1694", "gene": "tlyA" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv1854c", "gene": "ndh" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv1908c", "gene": "katG" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv1918c", "gene": "PPE35" }, + { + "fraction": 0.0, + "cutoff": 0, + "locus_tag": "Rv1979c", + "gene": "Rv1979c" + }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv2043c", "gene": "pncA" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv2245", "gene": "kasA" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv2416c", "gene": "eis" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv2428", "gene": "ahpC" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv2447c", "gene": "folC" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv2535c", "gene": "pepQ" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv2671", "gene": "ribD" }, + { + "fraction": 0.0, + "cutoff": 0, + "locus_tag": "Rv2752c", + "gene": "Rv2752c" + }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv2754c", "gene": "thyX" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv2764c", "gene": "thyA" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv2780", "gene": "ald" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv2983", "gene": "fbiD" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3083", "gene": "Rv3083" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3106", "gene": "fprA" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3197A", "gene": "whiB7" }, + { + "fraction": 0.0, + "cutoff": 0, + "locus_tag": "Rv3236c", + "gene": "Rv3236c" + }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3261", "gene": "fbiA" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3262", "gene": "fbiB" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3423c", "gene": "alr" }, + { + "fraction": 0.016632016632016633, + "cutoff": 0, + "locus_tag": "Rv3457c", + "gene": "rpoA" + }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3547", "gene": "ddn" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3596c", "gene": "clpC1" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3601c", "gene": "panD" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3793", "gene": "embC" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3794", "gene": "embA" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3795", "gene": "embB" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3805c", "gene": "aftB" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3806c", "gene": "ubiA" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3854c", "gene": "ethA" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3855", "gene": "ethR" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3862c", "gene": "whiB6" }, + { "fraction": 0.0, "cutoff": 0, "locus_tag": "Rv3919c", "gene": "gid" } + ], + "missing_positions": [] + }, + "delly": "success", + "input_data_source": "fastq", + "lineage": [ + { + "lin": "lineage2", + "family": "East-Asian", + "spoligotype": "Beijing", + "rd": "RD105", + "frac": 0.9989017023613399 + }, + { + "lin": "lineage2.2", + "family": "East-Asian (Beijing)", + "spoligotype": "Beijing-RD207", + "rd": "RD105;RD207", + "frac": 0.9980324643384161 + }, + { + "lin": "lineage2.2.1", + "family": "East-Asian (Beijing)", + "spoligotype": "Beijing-RD181", + "rd": "RD105;RD207;RD181", + "frac": 0.998792270531401 + } + ], + "main_lin": "lineage2", + "sublin": "lineage2.2.1", + "dr_variants": [ + { + "chrom": "Chromosome", + "genome_pos": 761104, + "ref": "TCATGGA", + "alt": "T", + "depth": 151, + "freq": 1.0, + "feature_id": "CCP43410", + "type": "conservative_inframe_deletion", + "nucleotide_change": "c.1300_1305delATGGAC", + "protein_change": "p.Met434_Asp435del", + "annotation": [ + { + "type": "who_confidence", + "drug": "rifampicin", + "who_confidence": "Assoc w R - interim" + } + ], + "alternate_consequences": [], + "change": "c.1300_1305delATGGAC", + "locus_tag": "Rv0667", + "gene": "rpoB", + "drugs": [ + { + "type": "drug", + "drug": "rifampicin", + "literature": "https://www.who.int/publications/i/item/9789240028173", + "confers": "resistance", + "who confidence": "Assoc w R - interim" + } + ], + "gene_associated_drugs": ["rifampicin"] + }, + { + "chrom": "Chromosome", + "genome_pos": 781822, + "ref": "A", + "alt": "G", + "depth": 236, + "freq": 0.9957627118644068, + "feature_id": "CCP43425", + "type": "missense_variant", + "nucleotide_change": "c.263A>G", + "protein_change": "p.Lys88Arg", + "annotation": [ + { + "type": "who_confidence", + "drug": "streptomycin", + "who_confidence": "Assoc w R" + } + ], + "alternate_consequences": [], + "change": "p.Lys88Arg", + "locus_tag": "Rv0682", + "gene": "rpsL", + "drugs": [ + { + "type": "drug", + "drug": "streptomycin", + "confers": "resistance", + "who confidence": "Assoc w R" + } + ], + "gene_associated_drugs": ["streptomycin"] + }, + { + "chrom": "Chromosome", + "genome_pos": 2155168, + "ref": "C", + "alt": "G", + "depth": 152, + "freq": 1.0, + "feature_id": "CCP44675", + "type": "missense_variant", + "nucleotide_change": "c.944G>C", + "protein_change": "p.Ser315Thr", + "annotation": [ + { + "type": "who_confidence", + "drug": "isoniazid", + "who_confidence": "Assoc w R" + } + ], + "alternate_consequences": [], + "change": "p.Ser315Thr", + "locus_tag": "Rv1908c", + "gene": "katG", + "drugs": [ + { + "type": "drug", + "drug": "isoniazid", + "confers": "resistance", + "who confidence": "Assoc w R" + } + ], + "gene_associated_drugs": ["isoniazid"] + }, + { + "chrom": "Chromosome", + "genome_pos": 4407686, + "ref": "C", + "alt": "A", + "depth": 221, + "freq": 1.0, + "feature_id": "CCP46748", + "type": "stop_gained", + "nucleotide_change": "c.517G>T", + "protein_change": "p.Glu173*", + "annotation": [ + { + "type": "who_confidence", + "drug": "streptomycin", + "who_confidence": "Assoc w R - interim" + } + ], + "alternate_consequences": [], + "change": "p.Glu173*", + "locus_tag": "Rv3919c", + "gene": "gid", + "drugs": [ + { + "type": "drug", + "drug": "streptomycin", + "literature": "https://www.who.int/publications/i/item/9789240028173", + "confers": "resistance", + "who confidence": "Assoc w R - interim" + } + ], + "gene_associated_drugs": ["streptomycin"] + } + ], + "other_variants": [ + { + "chrom": "Chromosome", + "genome_pos": 7296, + "ref": "G", + "alt": "T", + "depth": 160, + "freq": 0.1125, + "feature_id": "CCP42728", + "type": "upstream_gene_variant", + "nucleotide_change": "c.-6G>T", + "protein_change": "", + "alternate_consequences": [], + "change": "c.-6G>T", + "locus_tag": "Rv0006", + "gene": "gyrA", + "gene_associated_drugs": [ + "fluoroquinolones", + "moxifloxacin", + "levofloxacin", + "ciprofloxacin", + "ofloxacin" + ] + }, + { + "chrom": "Chromosome", + "genome_pos": 7362, + "ref": "G", + "alt": "C", + "depth": 181, + "freq": 0.994475138121547, + "feature_id": "CCP42728", + "type": "missense_variant", + "nucleotide_change": "c.61G>C", + "protein_change": "p.Glu21Gln", + "annotation": [ + { + "type": "who_confidence", + "drug": "moxifloxacin", + "who_confidence": "Not assoc w R" + }, + { + "type": "who_confidence", + "drug": "levofloxacin", + "who_confidence": "Not assoc w R" + } + ], + "alternate_consequences": [], + "change": "p.Glu21Gln", + "locus_tag": "Rv0006", + "gene": "gyrA", + "gene_associated_drugs": [ + "fluoroquinolones", + "moxifloxacin", + "levofloxacin", + "ciprofloxacin", + "ofloxacin" + ] + }, + { + "chrom": "Chromosome", + "genome_pos": 7585, + "ref": "G", + "alt": "C", + "depth": 212, + "freq": 1.0, + "feature_id": "CCP42728", + "type": "missense_variant", + "nucleotide_change": "c.284G>C", + "protein_change": "p.Ser95Thr", + "annotation": [ + { + "type": "who_confidence", + "drug": "levofloxacin", + "who_confidence": "Not assoc w R" + }, + { + "type": "who_confidence", + "drug": "moxifloxacin", + "who_confidence": "Not assoc w R" + } + ], + "alternate_consequences": [], + "change": "p.Ser95Thr", + "locus_tag": "Rv0006", + "gene": "gyrA", + "gene_associated_drugs": [ + "fluoroquinolones", + "moxifloxacin", + "levofloxacin", + "ciprofloxacin", + "ofloxacin" + ] + }, + { + "chrom": "Chromosome", + "genome_pos": 9304, + "ref": "G", + "alt": "A", + "depth": 221, + "freq": 1.0, + "feature_id": "CCP42728", + "type": "missense_variant", + "nucleotide_change": "c.2003G>A", + "protein_change": "p.Gly668Asp", + "annotation": [ + { + "type": "who_confidence", + "drug": "moxifloxacin", + "who_confidence": "Not assoc w R" + }, + { + "type": "who_confidence", + "drug": "levofloxacin", + "who_confidence": "Not assoc w R" + } + ], + "alternate_consequences": [], + "change": "p.Gly668Asp", + "locus_tag": "Rv0006", + "gene": "gyrA", + "gene_associated_drugs": [ + "fluoroquinolones", + "moxifloxacin", + "levofloxacin", + "ciprofloxacin", + "ofloxacin" + ] + }, + { + "chrom": "Chromosome", + "genome_pos": 9653, + "ref": "C", + "alt": "A", + "depth": 160, + "freq": 0.1625, + "feature_id": "CCP42728", + "type": "synonymous_variant", + "nucleotide_change": "c.2352C>A", + "protein_change": "p.Ile784Ile", + "alternate_consequences": [], + "change": "c.2352C>A", + "locus_tag": "Rv0006", + "gene": "gyrA", + "gene_associated_drugs": [ + "fluoroquinolones", + "moxifloxacin", + "levofloxacin", + "ciprofloxacin", + "ofloxacin" + ] + }, + { + "chrom": "Chromosome", + "genome_pos": 9675, + "ref": "C", + "alt": "A", + "depth": 139, + "freq": 0.11510791366906475, + "feature_id": "CCP42728", + "type": "missense_variant", + "nucleotide_change": "c.2374C>A", + "protein_change": "p.Arg792Ser", + "alternate_consequences": [], + "change": "p.Arg792Ser", + "locus_tag": "Rv0006", + "gene": "gyrA", + "gene_associated_drugs": [ + "fluoroquinolones", + "moxifloxacin", + "levofloxacin", + "ciprofloxacin", + "ofloxacin" + ] + }, + { + "chrom": "Chromosome", + "genome_pos": 491742, + "ref": "T", + "alt": "C", + "depth": 149, + "freq": 1.0, + "feature_id": "CCP43138", + "type": "synonymous_variant", + "nucleotide_change": "c.960T>C", + "protein_change": "p.Phe320Phe", + "alternate_consequences": [], + "change": "c.960T>C", + "locus_tag": "Rv0407", + "gene": "fgd1", + "gene_associated_drugs": ["delamanid"] + }, + { + "chrom": "Chromosome", + "genome_pos": 575907, + "ref": "C", + "alt": "T", + "depth": 252, + "freq": 1.0, + "feature_id": "CCP43220", + "type": "missense_variant", + "nucleotide_change": "c.560C>T", + "protein_change": "p.Ala187Val", + "annotation": [ + { + "type": "who_confidence", + "drug": "isoniazid", + "who_confidence": "Not assoc w R" + }, + { + "type": "who_confidence", + "drug": "ethionamide", + "who_confidence": "Uncertain significance" + } + ], + "alternate_consequences": [], + "change": "p.Ala187Val", + "locus_tag": "Rv0486", + "gene": "mshA", + "gene_associated_drugs": ["ethionamide", "isoniazid"] + }, + { + "chrom": "Chromosome", + "genome_pos": 576626, + "ref": "A", + "alt": "C", + "depth": 79, + "freq": 0.11392405063291139, + "feature_id": "CCP43220", + "type": "missense_variant", + "nucleotide_change": "c.1279A>C", + "protein_change": "p.Thr427Pro", + "alternate_consequences": [], + "change": "p.Thr427Pro", + "locus_tag": "Rv0486", + "gene": "mshA", + "gene_associated_drugs": ["ethionamide", "isoniazid"] + }, + { + "chrom": "Chromosome", + "genome_pos": 576631, + "ref": "C", + "alt": "A", + "depth": 83, + "freq": 0.13253012048192772, + "feature_id": "CCP43220", + "type": "missense_variant", + "nucleotide_change": "c.1284C>A", + "protein_change": "p.Phe428Leu", + "alternate_consequences": [], + "change": "p.Phe428Leu", + "locus_tag": "Rv0486", + "gene": "mshA", + "gene_associated_drugs": ["ethionamide", "isoniazid"] + }, + { + "chrom": "Chromosome", + "genome_pos": 620625, + "ref": "A", + "alt": "G", + "depth": 186, + "freq": 1.0, + "feature_id": "CCP43266", + "type": "missense_variant", + "nucleotide_change": "c.735A>G", + "protein_change": "p.Ile245Met", + "annotation": [ + { + "type": "who_confidence", + "drug": "amikacin", + "who_confidence": "Not assoc w R" + }, + { + "type": "who_confidence", + "drug": "capreomycin", + "who_confidence": "Not assoc w R" + } + ], + "alternate_consequences": [], + "change": "p.Ile245Met", + "locus_tag": "Rv0529", + "gene": "ccsA", + "gene_associated_drugs": ["capreomycin", "amikacin"] + }, + { + "chrom": "Chromosome", + "genome_pos": 763031, + "ref": "T", + "alt": "C", + "depth": 215, + "freq": 1.0, + "feature_id": "CCP43411", + "type": "upstream_gene_variant", + "nucleotide_change": "c.-339T>C", + "protein_change": "", + "alternate_consequences": [ + { + "gene_name": "rpoB", + "gene_id": "Rv0667", + "feature_id": "CCP43410", + "type": "synonymous_variant", + "nucleotide_change": "c.3225T>C", + "protein_change": "p.Ala1075Ala" + } + ], + "change": "c.-339T>C", + "locus_tag": "Rv0668", + "gene": "rpoC", + "gene_associated_drugs": ["rifampicin"] + }, + { + "chrom": "Chromosome", + "genome_pos": 775639, + "ref": "T", + "alt": "C", + "depth": 207, + "freq": 1.0, + "feature_id": "CCP43419", + "type": "missense_variant", + "nucleotide_change": "c.2842A>G", + "protein_change": "p.Ile948Val", + "annotation": [ + { + "type": "who_confidence", + "drug": "bedaquiline", + "who_confidence": "Not assoc w R" + }, + { + "type": "who_confidence", + "drug": "clofazimine", + "who_confidence": "Not assoc w R" + } + ], + "alternate_consequences": [], + "change": "p.Ile948Val", + "locus_tag": "Rv0676c", + "gene": "mmpL5", + "gene_associated_drugs": ["clofazimine", "bedaquiline"] + }, + { + "chrom": "Chromosome", + "genome_pos": 776100, + "ref": "G", + "alt": "A", + "depth": 198, + "freq": 1.0, + "feature_id": "CCP43419", + "type": "missense_variant", + "nucleotide_change": "c.2381C>T", + "protein_change": "p.Thr794Ile", + "annotation": [ + { + "type": "who_confidence", + "drug": "clofazimine", + "who_confidence": "Not assoc w R" + }, + { + "type": "who_confidence", + "drug": "bedaquiline", + "who_confidence": "Not assoc w R" + } + ], + "alternate_consequences": [], + "change": "p.Thr794Ile", + "locus_tag": "Rv0676c", + "gene": "mmpL5", + "gene_associated_drugs": ["clofazimine", "bedaquiline"] + }, + { + "chrom": "Chromosome", + "genome_pos": 776182, + "ref": "C", + "alt": "T", + "depth": 157, + "freq": 1.0, + "feature_id": "CCP43419", + "type": "missense_variant", + "nucleotide_change": "c.2299G>A", + "protein_change": "p.Asp767Asn", + "annotation": [ + { + "type": "who_confidence", + "drug": "clofazimine", + "who_confidence": "Not assoc w R" + }, + { + "type": "who_confidence", + "drug": "bedaquiline", + "who_confidence": "Not assoc w R" + } + ], + "alternate_consequences": [], + "change": "p.Asp767Asn", + "locus_tag": "Rv0676c", + "gene": "mmpL5", + "gene_associated_drugs": ["clofazimine", "bedaquiline"] + }, + { + "chrom": "Chromosome", + "genome_pos": 779615, + "ref": "G", + "alt": "C", + "depth": 195, + "freq": 1.0, + "feature_id": "CCP43420", + "type": "upstream_gene_variant", + "nucleotide_change": "c.-710C>G", + "protein_change": "", + "alternate_consequences": [], + "change": "c.-710C>G", + "locus_tag": "Rv0677c", + "gene": "mmpS5", + "gene_associated_drugs": ["clofazimine", "bedaquiline"] + }, + { + "chrom": "Chromosome", + "genome_pos": 781395, + "ref": "T", + "alt": "C", + "depth": 224, + "freq": 1.0, + "feature_id": "CCP43425", + "type": "upstream_gene_variant", + "nucleotide_change": "c.-165T>C", + "protein_change": "", + "annotation": [ + { + "type": "who_confidence", + "drug": "streptomycin", + "who_confidence": "Not assoc w R" + } + ], + "alternate_consequences": [], + "change": "c.-165T>C", + "locus_tag": "Rv0682", + "gene": "rpsL", + "gene_associated_drugs": ["streptomycin"] + }, + { + "chrom": "Chromosome", + "genome_pos": 1406760, + "ref": "T", + "alt": "TG", + "depth": 193, + "freq": 1.0, + "feature_id": "CCP44014", + "type": "frameshift_variant", + "nucleotide_change": "c.580_581insC", + "protein_change": "p.Glu194fs", + "annotation": [ + { + "type": "who_confidence", + "drug": "isoniazid", + "who_confidence": "Uncertain significance" + }, + { + "type": "who_confidence", + "drug": "isoniazid", + "who_confidence": "Not assoc w R" + }, + { + "type": "who_confidence", + "drug": "streptomycin", + "who_confidence": "Uncertain significance" + }, + { + "type": "who_confidence", + "drug": "pyrazinamide", + "who_confidence": "Uncertain significance" + }, + { + "type": "who_confidence", + "drug": "pyrazinamide", + "who_confidence": "Not assoc w R" + }, + { + "type": "who_confidence", + "drug": "streptomycin", + "who_confidence": "Not assoc w R" + } + ], + "alternate_consequences": [], + "change": "c.580_581insC", + "locus_tag": "Rv1258c", + "gene": "Rv1258c", + "gene_associated_drugs": ["streptomycin", "pyrazinamide", "isoniazid"] + }, + { + "chrom": "Chromosome", + "genome_pos": 1471659, + "ref": "C", + "alt": "T", + "depth": 142, + "freq": 1.0, + "feature_id": "EBG00000313325-1", + "type": "upstream_gene_variant", + "nucleotide_change": "n.-187C>T", + "protein_change": "", + "alternate_consequences": [], + "change": "n.-187C>T", + "locus_tag": "EBG00000313325", + "gene": "rrs", + "gene_associated_drugs": [ + "linezolid", + "kanamycin", + "capreomycin", + "amikacin", + "aminoglycosides", + "streptomycin" + ] + }, + { + "chrom": "Chromosome", + "genome_pos": 1834177, + "ref": "A", + "alt": "C", + "depth": 246, + "freq": 0.991869918699187, + "feature_id": "CCP44394", + "type": "synonymous_variant", + "nucleotide_change": "c.636A>C", + "protein_change": "p.Arg212Arg", + "alternate_consequences": [], + "change": "c.636A>C", + "locus_tag": "Rv1630", + "gene": "rpsA", + "gene_associated_drugs": ["pyrazinamide"] + }, + { + "chrom": "Chromosome", + "genome_pos": 1917972, + "ref": "A", + "alt": "G", + "depth": 166, + "freq": 1.0, + "feature_id": "CCP44459", + "type": "synonymous_variant", + "nucleotide_change": "c.33A>G", + "protein_change": "p.Leu11Leu", + "alternate_consequences": [], + "change": "c.33A>G", + "locus_tag": "Rv1694", + "gene": "tlyA", + "gene_associated_drugs": ["capreomycin"] + }, + { + "chrom": "Chromosome", + "genome_pos": 2154724, + "ref": "C", + "alt": "A", + "depth": 212, + "freq": 1.0, + "feature_id": "CCP44675", + "type": "missense_variant", + "nucleotide_change": "c.1388G>T", + "protein_change": "p.Arg463Leu", + "annotation": [ + { + "type": "who_confidence", + "drug": "isoniazid", + "who_confidence": "Not assoc w R" + } + ], + "alternate_consequences": [], + "change": "p.Arg463Leu", + "locus_tag": "Rv1908c", + "gene": "katG", + "gene_associated_drugs": ["isoniazid"] + }, + { + "chrom": "Chromosome", + "genome_pos": 2167926, + "ref": "A", + "alt": "G", + "depth": 142, + "freq": 1.0, + "feature_id": "CCP44685", + "type": "missense_variant", + "nucleotide_change": "c.2687T>C", + "protein_change": "p.Leu896Ser", + "annotation": [ + { + "type": "who_confidence", + "drug": "pyrazinamide", + "who_confidence": "Not assoc w R" + } + ], + "alternate_consequences": [], + "change": "p.Leu896Ser", + "locus_tag": "Rv1918c", + "gene": "PPE35", + "gene_associated_drugs": ["pyrazinamide"] + }, + { + "chrom": "Chromosome", + "genome_pos": 2170086, + "ref": "G", + "alt": "C", + "depth": 44, + "freq": 0.11363636363636363, + "feature_id": "CCP44685", + "type": "missense_variant", + "nucleotide_change": "c.527C>G", + "protein_change": "p.Ala176Gly", + "alternate_consequences": [], + "change": "p.Ala176Gly", + "locus_tag": "Rv1918c", + "gene": "PPE35", + "gene_associated_drugs": ["pyrazinamide"] + }, + { + "chrom": "Chromosome", + "genome_pos": 2223293, + "ref": "T", + "alt": "C", + "depth": 163, + "freq": 1.0, + "feature_id": "CCP44748", + "type": "upstream_gene_variant", + "nucleotide_change": "c.-129A>G", + "protein_change": "", + "annotation": [ + { + "type": "who_confidence", + "drug": "bedaquiline", + "who_confidence": "Not assoc w R" + }, + { + "type": "who_confidence", + "drug": "clofazimine", + "who_confidence": "Not assoc w R" + } + ], + "alternate_consequences": [], + "change": "c.-129A>G", + "locus_tag": "Rv1979c", + "gene": "Rv1979c", + "gene_associated_drugs": ["clofazimine", "bedaquiline"] + }, + { + "chrom": "Chromosome", + "genome_pos": 2714526, + "ref": "GGT", + "alt": "G", + "depth": 196, + "freq": 1.0, + "feature_id": "CCP45207", + "type": "frameshift_variant", + "nucleotide_change": "c.805_806delAC", + "protein_change": "p.Thr269fs", + "annotation": [ + { + "type": "who_confidence", + "drug": "amikacin", + "who_confidence": "Uncertain significance" + }, + { + "type": "who_confidence", + "drug": "amikacin", + "who_confidence": "Not assoc w R" + }, + { + "type": "who_confidence", + "drug": "kanamycin", + "who_confidence": "Uncertain significance" + }, + { + "type": "who_confidence", + "drug": "amikacin", + "who_confidence": "Uncertain significance" + } + ], + "alternate_consequences": [], + "change": "c.805_806delAC", + "locus_tag": "Rv2416c", + "gene": "eis", + "gene_associated_drugs": ["kanamycin", "amikacin"] + }, + { + "chrom": "Chromosome", + "genome_pos": 3086788, + "ref": "T", + "alt": "C", + "depth": 168, + "freq": 1.0, + "feature_id": "CCP45579", + "type": "upstream_gene_variant", + "nucleotide_change": "c.-32T>C", + "protein_change": "", + "alternate_consequences": [], + "change": "c.-32T>C", + "locus_tag": "Rv2780", + "gene": "ald", + "gene_associated_drugs": ["cycloserine"] + }, + { + "chrom": "Chromosome", + "genome_pos": 3339726, + "ref": "A", + "alt": "T", + "depth": 56, + "freq": 0.16071428571428573, + "feature_id": "CCP45788", + "type": "synonymous_variant", + "nucleotide_change": "c.609A>T", + "protein_change": "p.Val203Val", + "alternate_consequences": [], + "change": "c.609A>T", + "locus_tag": "Rv2983", + "gene": "fbiD", + "gene_associated_drugs": ["delamanid"] + }, + { + "chrom": "Chromosome", + "genome_pos": 3339744, + "ref": "A", + "alt": "T", + "depth": 31, + "freq": 0.12903225806451613, + "feature_id": "CCP45788", + "type": "synonymous_variant", + "nucleotide_change": "c.627A>T", + "protein_change": "p.Arg209Arg", + "alternate_consequences": [], + "change": "c.627A>T", + "locus_tag": "Rv2983", + "gene": "fbiD", + "gene_associated_drugs": ["delamanid"] + }, + { + "chrom": "Chromosome", + "genome_pos": 3339750, + "ref": "C", + "alt": "G", + "depth": 38, + "freq": 0.15789473684210525, + "feature_id": "CCP45788", + "type": "synonymous_variant", + "nucleotide_change": "c.633C>G", + "protein_change": "p.Val211Val", + "alternate_consequences": [], + "change": "c.633C>G", + "locus_tag": "Rv2983", + "gene": "fbiD", + "gene_associated_drugs": ["delamanid"] + }, + { + "chrom": "Chromosome", + "genome_pos": 3473996, + "ref": "G", + "alt": "GA", + "depth": 154, + "freq": 1.0, + "feature_id": "CCP45916", + "type": "upstream_gene_variant", + "nucleotide_change": "c.-11_-10insA", + "protein_change": "", + "alternate_consequences": [], + "change": "c.-11_-10insA", + "locus_tag": "Rv3106", + "gene": "fprA", + "gene_associated_drugs": ["capreomycin", "amikacin"] + }, + { + "chrom": "Chromosome", + "genome_pos": 3568428, + "ref": "T", + "alt": "C", + "depth": 87, + "freq": 0.1839080459770115, + "feature_id": "CCP46011", + "type": "synonymous_variant", + "nucleotide_change": "c.252A>G", + "protein_change": "p.Gly84Gly", + "alternate_consequences": [], + "change": "c.252A>G", + "locus_tag": "Rv3197A", + "gene": "whiB7", + "gene_associated_drugs": ["streptomycin", "kanamycin", "amikacin"] + }, + { + "chrom": "Chromosome", + "genome_pos": 3568473, + "ref": "C", + "alt": "A", + "depth": 103, + "freq": 0.14563106796116504, + "feature_id": "CCP46011", + "type": "missense_variant", + "nucleotide_change": "c.207G>T", + "protein_change": "p.Glu69Asp", + "alternate_consequences": [], + "change": "p.Glu69Asp", + "locus_tag": "Rv3197A", + "gene": "whiB7", + "gene_associated_drugs": ["streptomycin", "kanamycin", "amikacin"] + }, + { + "chrom": "Chromosome", + "genome_pos": 3612813, + "ref": "T", + "alt": "C", + "depth": 118, + "freq": 1.0, + "feature_id": "CCP46055", + "type": "missense_variant", + "nucleotide_change": "c.304A>G", + "protein_change": "p.Thr102Ala", + "annotation": [ + { + "type": "who_confidence", + "drug": "pyrazinamide", + "who_confidence": "Not assoc w R" + } + ], + "alternate_consequences": [], + "change": "p.Thr102Ala", + "locus_tag": "Rv3236c", + "gene": "Rv3236c", + "gene_associated_drugs": ["pyrazinamide"] + }, + { + "chrom": "Chromosome", + "genome_pos": 4038955, + "ref": "C", + "alt": "A", + "depth": 131, + "freq": 0.1450381679389313, + "feature_id": "CCP46419", + "type": "stop_gained", + "nucleotide_change": "c.1750G>T", + "protein_change": "p.Glu584*", + "alternate_consequences": [], + "change": "p.Glu584*", + "locus_tag": "Rv3596c", + "gene": "clpC1", + "gene_associated_drugs": ["pyrazinamide"] + }, + { + "chrom": "Chromosome", + "genome_pos": 4242643, + "ref": "C", + "alt": "T", + "depth": 158, + "freq": 1.0, + "feature_id": "CCP46623", + "type": "upstream_gene_variant", + "nucleotide_change": "c.-590C>T", + "protein_change": "", + "alternate_consequences": [ + { + "gene_name": "embC", + "gene_id": "Rv3793", + "feature_id": "CCP46622", + "type": "synonymous_variant", + "nucleotide_change": "c.2781C>T", + "protein_change": "p.Arg927Arg" + } + ], + "change": "c.-590C>T", + "locus_tag": "Rv3794", + "gene": "embA", + "gene_associated_drugs": ["ethambutol"] + }, + { + "chrom": "Chromosome", + "genome_pos": 4243460, + "ref": "C", + "alt": "T", + "depth": 177, + "freq": 1.0, + "feature_id": "CCP46623", + "type": "synonymous_variant", + "nucleotide_change": "c.228C>T", + "protein_change": "p.Cys76Cys", + "alternate_consequences": [], + "change": "c.228C>T", + "locus_tag": "Rv3794", + "gene": "embA", + "gene_associated_drugs": ["ethambutol"] + }, + { + "chrom": "Chromosome", + "genome_pos": 4247015, + "ref": "T", + "alt": "G", + "depth": 94, + "freq": 0.1702127659574468, + "feature_id": "CCP46624", + "type": "missense_variant", + "nucleotide_change": "c.502T>G", + "protein_change": "p.Ser168Ala", + "alternate_consequences": [], + "change": "p.Ser168Ala", + "locus_tag": "Rv3795", + "gene": "embB", + "gene_associated_drugs": ["ethambutol"] + }, + { + "chrom": "Chromosome", + "genome_pos": 4247028, + "ref": "T", + "alt": "G", + "depth": 86, + "freq": 0.16279069767441862, + "feature_id": "CCP46624", + "type": "missense_variant", + "nucleotide_change": "c.515T>G", + "protein_change": "p.Leu172Arg", + "annotation": [ + { + "type": "who_confidence", + "drug": "ethambutol", + "who_confidence": "Uncertain significance" + } + ], + "alternate_consequences": [], + "change": "p.Leu172Arg", + "locus_tag": "Rv3795", + "gene": "embB", + "gene_associated_drugs": ["ethambutol"] + }, + { + "chrom": "Chromosome", + "genome_pos": 4267647, + "ref": "T", + "alt": "C", + "depth": 125, + "freq": 1.0, + "feature_id": "CCP46634", + "type": "missense_variant", + "nucleotide_change": "c.1190A>G", + "protein_change": "p.Asp397Gly", + "annotation": [ + { + "type": "who_confidence", + "drug": "capreomycin", + "who_confidence": "Not assoc w R" + }, + { + "type": "who_confidence", + "drug": "amikacin", + "who_confidence": "Not assoc w R" + } + ], + "alternate_consequences": [], + "change": "p.Asp397Gly", + "locus_tag": "Rv3805c", + "gene": "aftB", + "gene_associated_drugs": ["capreomycin", "amikacin"] + }, + { + "chrom": "Chromosome", + "genome_pos": 4338219, + "ref": "G", + "alt": "T", + "depth": 137, + "freq": 0.11678832116788321, + "feature_id": "CCP46691", + "type": "synonymous_variant", + "nucleotide_change": "c.303C>A", + "protein_change": "p.Arg101Arg", + "alternate_consequences": [], + "change": "c.303C>A", + "locus_tag": "Rv3862c", + "gene": "whiB6", + "gene_associated_drugs": ["streptomycin", "capreomycin", "amikacin"] + }, + { + "chrom": "Chromosome", + "genome_pos": 4338595, + "ref": "GC", + "alt": "G", + "depth": 224, + "freq": 0.9955357142857143, + "feature_id": "CCP46691", + "type": "upstream_gene_variant", + "nucleotide_change": "c.-75delG", + "protein_change": "", + "annotation": [ + { + "type": "who_confidence", + "drug": "streptomycin", + "who_confidence": "Not assoc w R" + }, + { + "type": "who_confidence", + "drug": "capreomycin", + "who_confidence": "Uncertain significance" + }, + { + "type": "who_confidence", + "drug": "streptomycin", + "who_confidence": "Uncertain significance" + }, + { + "type": "who_confidence", + "drug": "capreomycin", + "who_confidence": "Not assoc w R" + }, + { + "type": "who_confidence", + "drug": "amikacin", + "who_confidence": "Not assoc w R" + }, + { + "type": "who_confidence", + "drug": "amikacin", + "who_confidence": "Uncertain significance" + } + ], + "alternate_consequences": [], + "change": "c.-75delG", + "locus_tag": "Rv3862c", + "gene": "whiB6", + "gene_associated_drugs": ["streptomycin", "capreomycin", "amikacin"] + }, + { + "chrom": "Chromosome", + "genome_pos": 4407588, + "ref": "T", + "alt": "C", + "depth": 233, + "freq": 1.0, + "feature_id": "CCP46748", + "type": "synonymous_variant", + "nucleotide_change": "c.615A>G", + "protein_change": "p.Ala205Ala", + "alternate_consequences": [], + "change": "c.615A>G", + "locus_tag": "Rv3919c", + "gene": "gid", + "gene_associated_drugs": ["streptomycin"] + }, + { + "chrom": "Chromosome", + "genome_pos": 4407927, + "ref": "T", + "alt": "G", + "depth": 247, + "freq": 0.9959514170040485, + "feature_id": "CCP46748", + "type": "missense_variant", + "nucleotide_change": "c.276A>C", + "protein_change": "p.Glu92Asp", + "annotation": [ + { + "type": "who_confidence", + "drug": "streptomycin", + "who_confidence": "Not assoc w R" + } + ], + "alternate_consequences": [], + "change": "p.Glu92Asp", + "locus_tag": "Rv3919c", + "gene": "gid", + "gene_associated_drugs": ["streptomycin"] + } + ], + "drtype": "MDR-TB", + "db_version": { + "name": "tbdb", + "commit": "c2fb9a2", + "Author": "jodyphelan ", + "Date": "Tue Oct 4 11:40:15 2022 +0100" + }, + "pipeline": [ + { "Analysis": "Mapping", "Program": "mtuberculosis_test_1_1.fastq.gz" }, + { "Analysis": "Variant calling", "Program": "freebayes" } + ], + "timestamp": "23-12-2023 21:42:24" +} From 268ea861f229ba48932e364af69037cdc55e7e32 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Fri, 29 Dec 2023 15:56:34 +0100 Subject: [PATCH 11/29] Update test function to include create_bonsai_input for mtuberculosis --- tests/test_cli.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index 45ebb3b..95e0e80 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -139,3 +139,44 @@ def test_cdm_input_cmd( with open(output_fname) as inpt: cmd_output = json.load(inpt) assert cmd_output == ecoli_cdm_input + + + +def test_create_output_saureus( + mtuberculosis_analysis_meta_path, + mtuberculosis_bracken_path, + mtuberculosis_bwa_path, + mtuberculosis_mykrobe_path, + mtuberculosis_quast_path, + mtuberculosis_tbprofiler_path, +): + """Test creating a analysis summary using M. tuberculosis data. + + The test is intended as an end-to-end test. + """ + sample_id = "test_mtuberculosis_1" + output_file = f"{sample_id}.json" + runner = CliRunner() + with runner.isolated_filesystem(): + result = runner.invoke( + create_bonsai_input, + [ + "-i", + sample_id, + "--run-metadata", + mtuberculosis_analysis_meta_path, + "--kraken", + mtuberculosis_bracken_path, + "--quality", + mtuberculosis_bwa_path, + "--mykrobe", + mtuberculosis_mykrobe_path, + "--quast", + mtuberculosis_quast_path, + "--tbprofiler", + mtuberculosis_tbprofiler_path, + "--output", + output_file, + ], + ) + assert result.exit_code == 0 \ No newline at end of file From 6fc6898db0248e98a6a1f208164714b3f72fd2bb Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Fri, 29 Dec 2023 16:10:41 +0100 Subject: [PATCH 12/29] Create and add _default_amr_phenotype to tbprofiler & mykrobe --- prp/parse/phenotype/mykrobe.py | 18 +++--------------- prp/parse/phenotype/tbprofiler.py | 10 ++-------- prp/parse/phenotype/utils.py | 8 ++++++++ 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/prp/parse/phenotype/mykrobe.py b/prp/parse/phenotype/mykrobe.py index f147c80..287e89d 100644 --- a/prp/parse/phenotype/mykrobe.py +++ b/prp/parse/phenotype/mykrobe.py @@ -7,7 +7,7 @@ from ...models.phenotype import PredictionSoftware as Software from ...models.phenotype import ResistanceGene, ResistanceVariant, VariantType from ...models.sample import MethodIndex -from .utils import is_prediction_result_empty +from .utils import is_prediction_result_empty, _default_amr_phenotype LOG = logging.getLogger(__name__) @@ -50,13 +50,7 @@ def _parse_mykrobe_amr_genes(mykrobe_result) -> Tuple[ResistanceGene, ...]: identity=None, coverage=coverage, drugs=[element_type["drug"].lower()], - phenotypes=[ - PhenotypeInfo( - type=ElementType.AMR, - group=ElementType.AMR, - name=ElementType.AMR, - ) - ], + phenotypes=[_default_amr_phenotype()], element_type=ElementType.AMR, element_subtype=ElementAmrSubtype.AMR, ) @@ -120,13 +114,7 @@ def _parse_mykrobe_amr_variants(mykrobe_result) -> Tuple[ResistanceVariant, ...] variant = ResistanceVariant( variant_type=var_type, genes=[element_type["variants"].split("_")[0]], - phenotypes=[ - PhenotypeInfo( - type=ElementType.AMR, - group=ElementType.AMR, - name=ElementType.AMR, - ) - ], + phenotypes=[_default_amr_phenotype()], position=position, ref_nt=ref_nt, alt_nt=alt_nt, diff --git a/prp/parse/phenotype/tbprofiler.py b/prp/parse/phenotype/tbprofiler.py index c6546d5..10592af 100644 --- a/prp/parse/phenotype/tbprofiler.py +++ b/prp/parse/phenotype/tbprofiler.py @@ -7,7 +7,7 @@ from ...models.phenotype import PredictionSoftware as Software from ...models.phenotype import ResistanceVariant from ...models.sample import MethodIndex -from .utils import _default_variant +from .utils import _default_variant, _default_amr_phenotype LOG = logging.getLogger(__name__) @@ -54,13 +54,7 @@ def _parse_tbprofiler_amr_variants(tbprofiler_result) -> Tuple[ResistanceVariant variant = ResistanceVariant( variant_type=var_type, genes=[hit["gene"]], - phenotypes=[ - PhenotypeInfo( - type=ElementType.AMR, - group=ElementType.AMR, - name=ElementType.AMR, - ) - ], + phenotypes=[_default_amr_phenotype()], position=int(hit["genome_pos"]), ref_nt=hit["ref"], alt_nt=hit["alt"], diff --git a/prp/parse/phenotype/utils.py b/prp/parse/phenotype/utils.py index b761ef1..2b0a3b0 100644 --- a/prp/parse/phenotype/utils.py +++ b/prp/parse/phenotype/utils.py @@ -1,5 +1,6 @@ """Shared utility functions.""" from ...models.phenotype import ElementTypeResult, ResistanceGene +from ...models.phenotype import ElementType, PhenotypeInfo def _default_resistance() -> ElementTypeResult: @@ -49,6 +50,13 @@ def _default_variant() -> ElementTypeResult: mutations = [mutation] return ElementTypeResult(phenotypes=[], genes=[], mutations=mutations) +def _default_amr_phenotype() -> PhenotypeInfo: + return PhenotypeInfo( + type = ElementType.AMR, + group = ElementType.AMR, + name = ElementType.AMR, + ) + def is_prediction_result_empty(result: ElementTypeResult) -> bool: """Check if prediction result is emtpy. From c19f09548f3f3fd4667f79b6ce84e1ae8ac3b9c5 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Fri, 29 Dec 2023 16:18:01 +0100 Subject: [PATCH 13/29] Fix test_virulencefinder.py file spelling --- tests/parse/{test_virulecefinder.py => test_virulencefinder.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/parse/{test_virulecefinder.py => test_virulencefinder.py} (100%) diff --git a/tests/parse/test_virulecefinder.py b/tests/parse/test_virulencefinder.py similarity index 100% rename from tests/parse/test_virulecefinder.py rename to tests/parse/test_virulencefinder.py From 3d39291071b1dd10c16c733de8e137717b3b1a9d Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Fri, 29 Dec 2023 17:05:18 +0100 Subject: [PATCH 14/29] Add pytest to pylint.yml GA --- .github/workflows/pylint.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index ed007ee..ec682d6 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -18,6 +18,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pylint + pip install pytest pip install -e . - name: Analysing the code with pylint run: | From c4f94d50b43e4870b64924429429dfe8aa57ec16 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Fri, 29 Dec 2023 17:11:18 +0100 Subject: [PATCH 15/29] Simple pylint.yml GA fix --- .github/workflows/pylint.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index ec682d6..56f0c1c 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -18,8 +18,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pylint - pip install pytest - pip install -e . + pip install -e .[dev] - name: Analysing the code with pylint run: | pylint --fail-under 9 $(git ls-files '*.py') From 076fbef49b292c08b14d4a8aba70c0eeb85c6b84 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Fri, 29 Dec 2023 17:13:39 +0100 Subject: [PATCH 16/29] Pylint fixes --- tests/parse/test_virulencefinder.py | 9 ++------- tests/test_cli.py | 8 ++++---- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/tests/parse/test_virulencefinder.py b/tests/parse/test_virulencefinder.py index 833e015..c6b4e81 100644 --- a/tests/parse/test_virulencefinder.py +++ b/tests/parse/test_virulencefinder.py @@ -1,17 +1,12 @@ """Virulencefinder parser test suite.""" -import pytest +#import pytest from prp.models.sample import MethodIndex from prp.models.typing import TypingResultGeneAllele from prp.parse.phenotype.virulencefinder import parse_virulencefinder_vir_pred from prp.parse.typing import parse_virulencefinder_stx_typing -""" -@pytest.mark.parametrize("input", indirect=["ecoli_virulencefinder_wo_sxt_path"]) -def test_parse_virulencefinder_output(input): -""" - - +#@pytest.mark.parametrize("input", indirect=["ecoli_virulencefinder_wo_sxt_path"]) def test_parse_virulencefinder_output(ecoli_virulencefinder_stx_pred_stx_path): """Test parsing virulencefinder output json file.""" result = parse_virulencefinder_vir_pred(ecoli_virulencefinder_stx_pred_stx_path) diff --git a/tests/test_cli.py b/tests/test_cli.py index 95e0e80..117d962 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -4,7 +4,7 @@ from click.testing import CliRunner -from prp.cli import create_bonsai_input, create_cdm_input, print_schema +from prp.cli import create_bonsai_input, create_cdm_input def test_create_output_saureus( @@ -136,13 +136,13 @@ def test_cdm_input_cmd( assert result.exit_code == 0 # test correct output format - with open(output_fname) as inpt: + with open(output_fname, "rb") as inpt: cmd_output = json.load(inpt) assert cmd_output == ecoli_cdm_input -def test_create_output_saureus( +def test_create_output_mtuberculosis( mtuberculosis_analysis_meta_path, mtuberculosis_bracken_path, mtuberculosis_bwa_path, @@ -179,4 +179,4 @@ def test_create_output_saureus( output_file, ], ) - assert result.exit_code == 0 \ No newline at end of file + assert result.exit_code == 0 From 510165a8489d908bd97616e5d1758927e222ae94 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Fri, 29 Dec 2023 17:29:48 +0100 Subject: [PATCH 17/29] More pylint fixes --- .github/workflows/pylint.yml | 1 + prp/cli.py | 2 +- prp/models/phenotype.py | 2 +- prp/parse/phenotype/mykrobe.py | 6 +++--- prp/parse/phenotype/tbprofiler.py | 2 +- prp/parse/phenotype/virulencefinder.py | 5 +++-- prp/parse/typing.py | 9 ++++++--- 7 files changed, 16 insertions(+), 11 deletions(-) diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 56f0c1c..ece33b0 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -18,6 +18,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pylint + pip install pytest pip install -e .[dev] - name: Analysing the code with pylint run: | diff --git a/prp/cli.py b/prp/cli.py index 0392686..9746af7 100644 --- a/prp/cli.py +++ b/prp/cli.py @@ -1,8 +1,8 @@ """Definition of the PRP command-line interface.""" import json import logging -import pandas as pd from typing import List +import pandas as pd import click from pydantic import TypeAdapter, ValidationError diff --git a/prp/models/phenotype.py b/prp/models/phenotype.py index 9685610..ab19a31 100644 --- a/prp/models/phenotype.py +++ b/prp/models/phenotype.py @@ -2,7 +2,7 @@ from enum import Enum from typing import Dict, List, Optional, Union -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, Field from .base import RWModel diff --git a/prp/parse/phenotype/mykrobe.py b/prp/parse/phenotype/mykrobe.py index 287e89d..bdf7886 100644 --- a/prp/parse/phenotype/mykrobe.py +++ b/prp/parse/phenotype/mykrobe.py @@ -3,7 +3,7 @@ import re from typing import Any, Dict, Tuple -from ...models.phenotype import ElementAmrSubtype, ElementType, ElementTypeResult, PhenotypeInfo +from ...models.phenotype import ElementAmrSubtype, ElementType, ElementTypeResult from ...models.phenotype import PredictionSoftware as Software from ...models.phenotype import ResistanceGene, ResistanceVariant, VariantType from ...models.sample import MethodIndex @@ -35,14 +35,14 @@ def _parse_mykrobe_amr_genes(mykrobe_result) -> Tuple[ResistanceGene, ...]: # skip non-resistance yeilding if not element_type["susceptibility"].upper() == "R": continue - + try: depth = float(element_type["genes"].split(':')[-1]) coverage = float(element_type["genes"].split(':')[-2]) except AttributeError: depth = None coverage = None - + gene = ResistanceGene( gene_symbol=element_type["variants"].split("_")[0], accession=None, diff --git a/prp/parse/phenotype/tbprofiler.py b/prp/parse/phenotype/tbprofiler.py index 10592af..7fbc746 100644 --- a/prp/parse/phenotype/tbprofiler.py +++ b/prp/parse/phenotype/tbprofiler.py @@ -3,7 +3,7 @@ from typing import Any, Dict, Tuple from ...models.metadata import SoupVersions -from ...models.phenotype import ElementTypeResult, ElementType, PhenotypeInfo +from ...models.phenotype import ElementTypeResult from ...models.phenotype import PredictionSoftware as Software from ...models.phenotype import ResistanceVariant from ...models.sample import MethodIndex diff --git a/prp/parse/phenotype/virulencefinder.py b/prp/parse/phenotype/virulencefinder.py index 9b8b649..43e38df 100644 --- a/prp/parse/phenotype/virulencefinder.py +++ b/prp/parse/phenotype/virulencefinder.py @@ -14,6 +14,7 @@ def parse_vir_gene( info: Dict[str, Any], subtype: ElementVirulenceSubtype = ElementVirulenceSubtype.VIR ) -> VirulenceGene: + """Parse virulence gene prediction results.""" start_pos, end_pos = map(int, info["position_in_ref"].split("..")) # Some genes doesnt have accession numbers accnr = None if info["accession"] == "NA" else info["accession"] @@ -35,7 +36,7 @@ def parse_vir_gene( def _parse_virulencefinder_vir_results(pred: str) -> ElementTypeResult: - """Parse virulence prediction results from ARIBA.""" + """Parse virulence prediction results from virulencefinder.""" # parse virulence finder results species = list(k for k in pred["virulencefinder"]["results"]) vir_genes = [] @@ -66,7 +67,7 @@ def parse_virulencefinder_vir_pred(path: str) -> ElementTypeResult | None: :rtype: ElementTypeResult | None """ LOG.info("Parsing virulencefinder virulence prediction") - with open(path) as inpt: + with open(path, 'rb') as inpt: pred = json.load(inpt) if "virulencefinder" in pred: results: ElementTypeResult = _parse_virulencefinder_vir_results(pred) diff --git a/prp/parse/typing.py b/prp/parse/typing.py index bbd8fa2..8346963 100644 --- a/prp/parse/typing.py +++ b/prp/parse/typing.py @@ -3,7 +3,7 @@ import csv import json import logging -from typing import List, TextIO +from typing import List from ..models.sample import MethodIndex from ..models.typing import ( @@ -154,7 +154,8 @@ def parse_mykrobe_lineage_results(pred_res: dict, method) -> TypingResultLineage def parse_virulencefinder_stx_typing(path: str) -> MethodIndex | None: - with open(path) as inpt: + """Parse virulencefinder's output re stx typing""" + with open(path, "rb") as inpt: pred_obj = json.load(inpt) # if has valid results pred_result = None @@ -175,6 +176,8 @@ def parse_virulencefinder_stx_typing(path: str) -> MethodIndex | None: vir_gene = parse_vir_gene(hit) gene = TypingResultGeneAllele(**vir_gene.model_dump()) pred_result = MethodIndex( - type=TypingMethod.STX, software=Software.VIRULENCEFINDER, result=gene + type=TypingMethod.STX, + software=Software.VIRULENCEFINDER, + result=gene ) return pred_result From 012048fcff978fa53842b896b3889e11abefccde Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 2 Jan 2024 22:54:32 +0100 Subject: [PATCH 18/29] Add docstrings to test functions --- tests/fixtures/ecoli/__init__.py | 17 ++++++++++++++--- tests/fixtures/fixtures.py | 1 + tests/fixtures/mtuberculosis/__init__.py | 9 ++++++--- tests/fixtures/saureus/__init__.py | 14 +++++++++++--- 4 files changed, 32 insertions(+), 9 deletions(-) diff --git a/tests/fixtures/ecoli/__init__.py b/tests/fixtures/ecoli/__init__.py index 7a1a1e0..e6d91f7 100644 --- a/tests/fixtures/ecoli/__init__.py +++ b/tests/fixtures/ecoli/__init__.py @@ -4,76 +4,87 @@ import pytest -from ..fixtures import data_path - - @pytest.fixture() def ecoli_analysis_meta_path(data_path): + """Get path for ecoli meta file""" return str(data_path.joinpath("ecoli", "analysis_meta.json")) @pytest.fixture() def ecoli_bwa_path(data_path): + """Get path for ecoli bwa qc file""" return str(data_path.joinpath("ecoli", "bwa.qc")) @pytest.fixture() def ecoli_quast_path(data_path): + """Get path for ecoli quast file""" return str(data_path.joinpath("ecoli", "quast.tsv")) @pytest.fixture() def ecoli_amrfinder_path(data_path): + """Get path for ecoli amrfinder file""" return str(data_path.joinpath("ecoli", "amrfinder.out")) @pytest.fixture() def ecoli_resfinder_path(data_path): + """Get path for ecoli resfinder file""" return str(data_path.joinpath("ecoli", "resfinder.json")) @pytest.fixture() def ecoli_resfinder_meta_path(data_path): + """Get path for ecoli resfinder meta file""" return str(data_path.joinpath("ecoli", "resfinder_meta.json")) @pytest.fixture() def ecoli_virulencefinder_wo_stx_path(data_path): + """Get path for ecoli virulencefinder without stx file""" return str(data_path.joinpath("ecoli", "virulencefinder.json")) @pytest.fixture() def ecoli_virulencefinder_stx_pred_stx_path(data_path): + """Get path for ecoli stx prediction file""" return str(data_path.joinpath("ecoli", "virulencefinder.stx_pred.stx.json")) @pytest.fixture() def ecoli_virulencefinder_stx_pred_no_stx_path(data_path): + """Get path for ecoli stx prediction no stx file""" return str(data_path.joinpath("ecoli", "virulencefinder.stx_pred.no_stx.json")) @pytest.fixture() def ecoli_virulencefinder_meta_path(data_path): + """Get path for ecoli virulencefinder meta file""" return str(data_path.joinpath("ecoli", "virulencefinder_meta.json")) @pytest.fixture() def ecoli_mlst_path(data_path): + """Get path for ecoli mlst file""" return str(data_path.joinpath("ecoli", "mlst.json")) @pytest.fixture() def ecoli_chewbbaca_path(data_path): + """Get path for ecoli chewbbaca file""" return str(data_path.joinpath("ecoli", "chewbbaca.out")) @pytest.fixture() def ecoli_bracken_path(data_path): + """Get path for ecoli bracken file""" return str(data_path.joinpath("ecoli", "bracken.out")) @pytest.fixture() def ecoli_cdm_input(data_path): + """Get path for ecoli cdm file""" path = data_path.joinpath("ecoli", "cdm_input.json") with open(path) as inpt: return json.load(inpt) diff --git a/tests/fixtures/fixtures.py b/tests/fixtures/fixtures.py index cc904f6..d1baf3a 100644 --- a/tests/fixtures/fixtures.py +++ b/tests/fixtures/fixtures.py @@ -7,5 +7,6 @@ @pytest.fixture() def data_path(): + """Get path of this file""" conftest_path = Path(__file__) return conftest_path.parent diff --git a/tests/fixtures/mtuberculosis/__init__.py b/tests/fixtures/mtuberculosis/__init__.py index a878644..a657311 100644 --- a/tests/fixtures/mtuberculosis/__init__.py +++ b/tests/fixtures/mtuberculosis/__init__.py @@ -2,34 +2,37 @@ import pytest -from ..fixtures import data_path - - @pytest.fixture() def mtuberculosis_analysis_meta_path(data_path): + """Get path for mtuberculosis meta file""" return str(data_path.joinpath("mtuberculosis", "analysis_meta.json")) @pytest.fixture() def mtuberculosis_bracken_path(data_path): + """Get path for mtuberculosis bracken file""" return str(data_path.joinpath("mtuberculosis", "bracken.out")) @pytest.fixture() def mtuberculosis_bwa_path(data_path): + """Get path for mtuberculosis bwa qc file""" return str(data_path.joinpath("mtuberculosis", "bwa.qc")) @pytest.fixture() def mtuberculosis_mykrobe_path(data_path): + """Get path for mtuberculosis mykrobe file""" return str(data_path.joinpath("mtuberculosis", "mykrobe.csv")) @pytest.fixture() def mtuberculosis_quast_path(data_path): + """Get path for mtuberculosis quast file""" return str(data_path.joinpath("mtuberculosis", "quast.tsv")) @pytest.fixture() def mtuberculosis_tbprofiler_path(data_path): + """Get path for mtuberculosis tbprofiler file""" return str(data_path.joinpath("mtuberculosis", "tbprofiler.json")) diff --git a/tests/fixtures/saureus/__init__.py b/tests/fixtures/saureus/__init__.py index 088e6d7..7c6478e 100644 --- a/tests/fixtures/saureus/__init__.py +++ b/tests/fixtures/saureus/__init__.py @@ -2,59 +2,67 @@ import pytest -from ..fixtures import data_path - - @pytest.fixture() def saureus_analysis_meta_path(data_path): + """Get path for saureus meta file""" return str(data_path.joinpath("saureus", "analysis_meta.json")) @pytest.fixture() def saureus_bwa_path(data_path): + """Get path for saureus bwa qc file""" return str(data_path.joinpath("saureus", "bwa.qc")) @pytest.fixture() def saureus_quast_path(data_path): + """Get path for saureus quast file""" return str(data_path.joinpath("saureus", "quast.tsv")) @pytest.fixture() def saureus_amrfinder_path(data_path): + """Get path for saureus amrfinder file""" return str(data_path.joinpath("saureus", "amrfinder.out")) @pytest.fixture() def saureus_resfinder_path(data_path): + """Get path for saureus resfinder file""" return str(data_path.joinpath("saureus", "resfinder.json")) @pytest.fixture() def saureus_resfinder_meta_path(data_path): + """Get path for saureus resfinder meta file""" return str(data_path.joinpath("saureus", "resfinder_meta.json")) @pytest.fixture() def saureus_virulencefinder_path(data_path): + """Get path for saureus virulencefinder file""" return str(data_path.joinpath("saureus", "virulencefinder.json")) @pytest.fixture() def saureus_virulencefinder_meta_path(data_path): + """Get path for saureus virulencefinder meta file""" return str(data_path.joinpath("saureus", "virulencefinder_meta.json")) @pytest.fixture() def saureus_mlst_path(data_path): + """Get path for saureus mlst file""" return str(data_path.joinpath("saureus", "mlst.json")) @pytest.fixture() def saureus_chewbbaca_path(data_path): + """Get path for saureus chewbbaca file""" return str(data_path.joinpath("saureus", "chewbbaca.out")) @pytest.fixture() def saureus_bracken_path(data_path): + """Get path for saureus bracken file""" return str(data_path.joinpath("saureus", "bracken.out")) From 4b77020ef5809c15db59afad0b70140c89e40c66 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 2 Jan 2024 23:00:43 +0100 Subject: [PATCH 19/29] Fix data_path error --- tests/fixtures/ecoli/__init__.py | 59 +++++++++++++----------- tests/fixtures/mtuberculosis/__init__.py | 27 ++++++----- tests/fixtures/saureus/__init__.py | 47 ++++++++++--------- 3 files changed, 71 insertions(+), 62 deletions(-) diff --git a/tests/fixtures/ecoli/__init__.py b/tests/fixtures/ecoli/__init__.py index e6d91f7..f1af44c 100644 --- a/tests/fixtures/ecoli/__init__.py +++ b/tests/fixtures/ecoli/__init__.py @@ -4,87 +4,90 @@ import pytest +from ..fixtures import data_path + + @pytest.fixture() -def ecoli_analysis_meta_path(data_path): +def ecoli_analysis_meta_path(data_fpath): """Get path for ecoli meta file""" - return str(data_path.joinpath("ecoli", "analysis_meta.json")) + return str(data_fpath.joinpath("ecoli", "analysis_meta.json")) @pytest.fixture() -def ecoli_bwa_path(data_path): +def ecoli_bwa_path(data_fpath): """Get path for ecoli bwa qc file""" - return str(data_path.joinpath("ecoli", "bwa.qc")) + return str(data_fpath.joinpath("ecoli", "bwa.qc")) @pytest.fixture() -def ecoli_quast_path(data_path): +def ecoli_quast_path(data_fpath): """Get path for ecoli quast file""" - return str(data_path.joinpath("ecoli", "quast.tsv")) + return str(data_fpath.joinpath("ecoli", "quast.tsv")) @pytest.fixture() -def ecoli_amrfinder_path(data_path): +def ecoli_amrfinder_path(data_fpath): """Get path for ecoli amrfinder file""" - return str(data_path.joinpath("ecoli", "amrfinder.out")) + return str(data_fpath.joinpath("ecoli", "amrfinder.out")) @pytest.fixture() -def ecoli_resfinder_path(data_path): +def ecoli_resfinder_path(data_fpath): """Get path for ecoli resfinder file""" - return str(data_path.joinpath("ecoli", "resfinder.json")) + return str(data_fpath.joinpath("ecoli", "resfinder.json")) @pytest.fixture() -def ecoli_resfinder_meta_path(data_path): +def ecoli_resfinder_meta_path(data_fpath): """Get path for ecoli resfinder meta file""" - return str(data_path.joinpath("ecoli", "resfinder_meta.json")) + return str(data_fpath.joinpath("ecoli", "resfinder_meta.json")) @pytest.fixture() -def ecoli_virulencefinder_wo_stx_path(data_path): +def ecoli_virulencefinder_wo_stx_path(data_fpath): """Get path for ecoli virulencefinder without stx file""" - return str(data_path.joinpath("ecoli", "virulencefinder.json")) + return str(data_fpath.joinpath("ecoli", "virulencefinder.json")) @pytest.fixture() -def ecoli_virulencefinder_stx_pred_stx_path(data_path): +def ecoli_virulencefinder_stx_pred_stx_path(data_fpath): """Get path for ecoli stx prediction file""" - return str(data_path.joinpath("ecoli", "virulencefinder.stx_pred.stx.json")) + return str(data_fpath.joinpath("ecoli", "virulencefinder.stx_pred.stx.json")) @pytest.fixture() -def ecoli_virulencefinder_stx_pred_no_stx_path(data_path): +def ecoli_virulencefinder_stx_pred_no_stx_path(data_fpath): """Get path for ecoli stx prediction no stx file""" - return str(data_path.joinpath("ecoli", "virulencefinder.stx_pred.no_stx.json")) + return str(data_fpath.joinpath("ecoli", "virulencefinder.stx_pred.no_stx.json")) @pytest.fixture() -def ecoli_virulencefinder_meta_path(data_path): +def ecoli_virulencefinder_meta_path(data_fpath): """Get path for ecoli virulencefinder meta file""" - return str(data_path.joinpath("ecoli", "virulencefinder_meta.json")) + return str(data_fpath.joinpath("ecoli", "virulencefinder_meta.json")) @pytest.fixture() -def ecoli_mlst_path(data_path): +def ecoli_mlst_path(data_fpath): """Get path for ecoli mlst file""" - return str(data_path.joinpath("ecoli", "mlst.json")) + return str(data_fpath.joinpath("ecoli", "mlst.json")) @pytest.fixture() -def ecoli_chewbbaca_path(data_path): +def ecoli_chewbbaca_path(data_fpath): """Get path for ecoli chewbbaca file""" - return str(data_path.joinpath("ecoli", "chewbbaca.out")) + return str(data_fpath.joinpath("ecoli", "chewbbaca.out")) @pytest.fixture() -def ecoli_bracken_path(data_path): +def ecoli_bracken_path(data_fpath): """Get path for ecoli bracken file""" - return str(data_path.joinpath("ecoli", "bracken.out")) + return str(data_fpath.joinpath("ecoli", "bracken.out")) @pytest.fixture() -def ecoli_cdm_input(data_path): +def ecoli_cdm_input(data_fpath): """Get path for ecoli cdm file""" - path = data_path.joinpath("ecoli", "cdm_input.json") + path = data_fpath.joinpath("ecoli", "cdm_input.json") with open(path) as inpt: return json.load(inpt) diff --git a/tests/fixtures/mtuberculosis/__init__.py b/tests/fixtures/mtuberculosis/__init__.py index a657311..1b077f9 100644 --- a/tests/fixtures/mtuberculosis/__init__.py +++ b/tests/fixtures/mtuberculosis/__init__.py @@ -2,37 +2,40 @@ import pytest +from ..fixtures import data_path + + @pytest.fixture() -def mtuberculosis_analysis_meta_path(data_path): +def mtuberculosis_analysis_meta_path(data_fpath): """Get path for mtuberculosis meta file""" - return str(data_path.joinpath("mtuberculosis", "analysis_meta.json")) + return str(data_fpath.joinpath("mtuberculosis", "analysis_meta.json")) @pytest.fixture() -def mtuberculosis_bracken_path(data_path): +def mtuberculosis_bracken_path(data_fpath): """Get path for mtuberculosis bracken file""" - return str(data_path.joinpath("mtuberculosis", "bracken.out")) + return str(data_fpath.joinpath("mtuberculosis", "bracken.out")) @pytest.fixture() -def mtuberculosis_bwa_path(data_path): +def mtuberculosis_bwa_path(data_fpath): """Get path for mtuberculosis bwa qc file""" - return str(data_path.joinpath("mtuberculosis", "bwa.qc")) + return str(data_fpath.joinpath("mtuberculosis", "bwa.qc")) @pytest.fixture() -def mtuberculosis_mykrobe_path(data_path): +def mtuberculosis_mykrobe_path(data_fpath): """Get path for mtuberculosis mykrobe file""" - return str(data_path.joinpath("mtuberculosis", "mykrobe.csv")) + return str(data_fpath.joinpath("mtuberculosis", "mykrobe.csv")) @pytest.fixture() -def mtuberculosis_quast_path(data_path): +def mtuberculosis_quast_path(data_fpath): """Get path for mtuberculosis quast file""" - return str(data_path.joinpath("mtuberculosis", "quast.tsv")) + return str(data_fpath.joinpath("mtuberculosis", "quast.tsv")) @pytest.fixture() -def mtuberculosis_tbprofiler_path(data_path): +def mtuberculosis_tbprofiler_path(data_fpath): """Get path for mtuberculosis tbprofiler file""" - return str(data_path.joinpath("mtuberculosis", "tbprofiler.json")) + return str(data_fpath.joinpath("mtuberculosis", "tbprofiler.json")) diff --git a/tests/fixtures/saureus/__init__.py b/tests/fixtures/saureus/__init__.py index 7c6478e..c28d9d9 100644 --- a/tests/fixtures/saureus/__init__.py +++ b/tests/fixtures/saureus/__init__.py @@ -2,67 +2,70 @@ import pytest +from ..fixtures import data_path + + @pytest.fixture() -def saureus_analysis_meta_path(data_path): +def saureus_analysis_meta_path(data_fpath): """Get path for saureus meta file""" - return str(data_path.joinpath("saureus", "analysis_meta.json")) + return str(data_fpath.joinpath("saureus", "analysis_meta.json")) @pytest.fixture() -def saureus_bwa_path(data_path): +def saureus_bwa_path(data_fpath): """Get path for saureus bwa qc file""" - return str(data_path.joinpath("saureus", "bwa.qc")) + return str(data_fpath.joinpath("saureus", "bwa.qc")) @pytest.fixture() -def saureus_quast_path(data_path): +def saureus_quast_path(data_fpath): """Get path for saureus quast file""" - return str(data_path.joinpath("saureus", "quast.tsv")) + return str(data_fpath.joinpath("saureus", "quast.tsv")) @pytest.fixture() -def saureus_amrfinder_path(data_path): +def saureus_amrfinder_path(data_fpath): """Get path for saureus amrfinder file""" - return str(data_path.joinpath("saureus", "amrfinder.out")) + return str(data_fpath.joinpath("saureus", "amrfinder.out")) @pytest.fixture() -def saureus_resfinder_path(data_path): +def saureus_resfinder_path(data_fpath): """Get path for saureus resfinder file""" - return str(data_path.joinpath("saureus", "resfinder.json")) + return str(data_fpath.joinpath("saureus", "resfinder.json")) @pytest.fixture() -def saureus_resfinder_meta_path(data_path): +def saureus_resfinder_meta_path(data_fpath): """Get path for saureus resfinder meta file""" - return str(data_path.joinpath("saureus", "resfinder_meta.json")) + return str(data_fpath.joinpath("saureus", "resfinder_meta.json")) @pytest.fixture() -def saureus_virulencefinder_path(data_path): +def saureus_virulencefinder_path(data_fpath): """Get path for saureus virulencefinder file""" - return str(data_path.joinpath("saureus", "virulencefinder.json")) + return str(data_fpath.joinpath("saureus", "virulencefinder.json")) @pytest.fixture() -def saureus_virulencefinder_meta_path(data_path): +def saureus_virulencefinder_meta_path(data_fpath): """Get path for saureus virulencefinder meta file""" - return str(data_path.joinpath("saureus", "virulencefinder_meta.json")) + return str(data_fpath.joinpath("saureus", "virulencefinder_meta.json")) @pytest.fixture() -def saureus_mlst_path(data_path): +def saureus_mlst_path(data_fpath): """Get path for saureus mlst file""" - return str(data_path.joinpath("saureus", "mlst.json")) + return str(data_fpath.joinpath("saureus", "mlst.json")) @pytest.fixture() -def saureus_chewbbaca_path(data_path): +def saureus_chewbbaca_path(data_fpath): """Get path for saureus chewbbaca file""" - return str(data_path.joinpath("saureus", "chewbbaca.out")) + return str(data_fpath.joinpath("saureus", "chewbbaca.out")) @pytest.fixture() -def saureus_bracken_path(data_path): +def saureus_bracken_path(data_fpath): """Get path for saureus bracken file""" - return str(data_path.joinpath("saureus", "bracken.out")) + return str(data_fpath.joinpath("saureus", "bracken.out")) From 56a9da3ab11ad9deaeae40ca357224abdb99809c Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 2 Jan 2024 23:07:41 +0100 Subject: [PATCH 20/29] Fix data_fpath error --- tests/fixtures/ecoli/__init__.py | 58 ++++++++++++------------ tests/fixtures/mtuberculosis/__init__.py | 24 +++++----- tests/fixtures/saureus/__init__.py | 44 +++++++++--------- 3 files changed, 63 insertions(+), 63 deletions(-) diff --git a/tests/fixtures/ecoli/__init__.py b/tests/fixtures/ecoli/__init__.py index f1af44c..6679406 100644 --- a/tests/fixtures/ecoli/__init__.py +++ b/tests/fixtures/ecoli/__init__.py @@ -8,86 +8,86 @@ @pytest.fixture() -def ecoli_analysis_meta_path(data_fpath): +def ecoli_analysis_meta_path(data_path): """Get path for ecoli meta file""" - return str(data_fpath.joinpath("ecoli", "analysis_meta.json")) + return str(data_path.joinpath("ecoli", "analysis_meta.json")) @pytest.fixture() -def ecoli_bwa_path(data_fpath): +def ecoli_bwa_path(data_path): """Get path for ecoli bwa qc file""" - return str(data_fpath.joinpath("ecoli", "bwa.qc")) + return str(data_path.joinpath("ecoli", "bwa.qc")) @pytest.fixture() -def ecoli_quast_path(data_fpath): +def ecoli_quast_path(data_path): """Get path for ecoli quast file""" - return str(data_fpath.joinpath("ecoli", "quast.tsv")) + return str(data_path.joinpath("ecoli", "quast.tsv")) @pytest.fixture() -def ecoli_amrfinder_path(data_fpath): +def ecoli_amrfinder_path(data_path): """Get path for ecoli amrfinder file""" - return str(data_fpath.joinpath("ecoli", "amrfinder.out")) + return str(data_path.joinpath("ecoli", "amrfinder.out")) @pytest.fixture() -def ecoli_resfinder_path(data_fpath): +def ecoli_resfinder_path(data_path): """Get path for ecoli resfinder file""" - return str(data_fpath.joinpath("ecoli", "resfinder.json")) + return str(data_path.joinpath("ecoli", "resfinder.json")) @pytest.fixture() -def ecoli_resfinder_meta_path(data_fpath): +def ecoli_resfinder_meta_path(data_path): """Get path for ecoli resfinder meta file""" - return str(data_fpath.joinpath("ecoli", "resfinder_meta.json")) + return str(data_path.joinpath("ecoli", "resfinder_meta.json")) @pytest.fixture() -def ecoli_virulencefinder_wo_stx_path(data_fpath): +def ecoli_virulencefinder_wo_stx_path(data_path): """Get path for ecoli virulencefinder without stx file""" - return str(data_fpath.joinpath("ecoli", "virulencefinder.json")) + return str(data_path.joinpath("ecoli", "virulencefinder.json")) @pytest.fixture() -def ecoli_virulencefinder_stx_pred_stx_path(data_fpath): +def ecoli_virulencefinder_stx_pred_stx_path(data_path): """Get path for ecoli stx prediction file""" - return str(data_fpath.joinpath("ecoli", "virulencefinder.stx_pred.stx.json")) + return str(data_path.joinpath("ecoli", "virulencefinder.stx_pred.stx.json")) @pytest.fixture() -def ecoli_virulencefinder_stx_pred_no_stx_path(data_fpath): +def ecoli_virulencefinder_stx_pred_no_stx_path(data_path): """Get path for ecoli stx prediction no stx file""" - return str(data_fpath.joinpath("ecoli", "virulencefinder.stx_pred.no_stx.json")) + return str(data_path.joinpath("ecoli", "virulencefinder.stx_pred.no_stx.json")) @pytest.fixture() -def ecoli_virulencefinder_meta_path(data_fpath): +def ecoli_virulencefinder_meta_path(data_path): """Get path for ecoli virulencefinder meta file""" - return str(data_fpath.joinpath("ecoli", "virulencefinder_meta.json")) + return str(data_path.joinpath("ecoli", "virulencefinder_meta.json")) @pytest.fixture() -def ecoli_mlst_path(data_fpath): +def ecoli_mlst_path(data_path): """Get path for ecoli mlst file""" - return str(data_fpath.joinpath("ecoli", "mlst.json")) + return str(data_path.joinpath("ecoli", "mlst.json")) @pytest.fixture() -def ecoli_chewbbaca_path(data_fpath): +def ecoli_chewbbaca_path(data_path): """Get path for ecoli chewbbaca file""" - return str(data_fpath.joinpath("ecoli", "chewbbaca.out")) + return str(data_path.joinpath("ecoli", "chewbbaca.out")) @pytest.fixture() -def ecoli_bracken_path(data_fpath): +def ecoli_bracken_path(data_path): """Get path for ecoli bracken file""" - return str(data_fpath.joinpath("ecoli", "bracken.out")) + return str(data_path.joinpath("ecoli", "bracken.out")) @pytest.fixture() -def ecoli_cdm_input(data_fpath): +def ecoli_cdm_input(data_path): """Get path for ecoli cdm file""" - path = data_fpath.joinpath("ecoli", "cdm_input.json") - with open(path) as inpt: + path = data_path.joinpath("ecoli", "cdm_input.json") + with open(path, "rb") as inpt: return json.load(inpt) diff --git a/tests/fixtures/mtuberculosis/__init__.py b/tests/fixtures/mtuberculosis/__init__.py index 1b077f9..5bedbf2 100644 --- a/tests/fixtures/mtuberculosis/__init__.py +++ b/tests/fixtures/mtuberculosis/__init__.py @@ -6,36 +6,36 @@ @pytest.fixture() -def mtuberculosis_analysis_meta_path(data_fpath): +def mtuberculosis_analysis_meta_path(data_path): """Get path for mtuberculosis meta file""" - return str(data_fpath.joinpath("mtuberculosis", "analysis_meta.json")) + return str(data_path.joinpath("mtuberculosis", "analysis_meta.json")) @pytest.fixture() -def mtuberculosis_bracken_path(data_fpath): +def mtuberculosis_bracken_path(data_path): """Get path for mtuberculosis bracken file""" - return str(data_fpath.joinpath("mtuberculosis", "bracken.out")) + return str(data_path.joinpath("mtuberculosis", "bracken.out")) @pytest.fixture() -def mtuberculosis_bwa_path(data_fpath): +def mtuberculosis_bwa_path(data_path): """Get path for mtuberculosis bwa qc file""" - return str(data_fpath.joinpath("mtuberculosis", "bwa.qc")) + return str(data_path.joinpath("mtuberculosis", "bwa.qc")) @pytest.fixture() -def mtuberculosis_mykrobe_path(data_fpath): +def mtuberculosis_mykrobe_path(data_path): """Get path for mtuberculosis mykrobe file""" - return str(data_fpath.joinpath("mtuberculosis", "mykrobe.csv")) + return str(data_path.joinpath("mtuberculosis", "mykrobe.csv")) @pytest.fixture() -def mtuberculosis_quast_path(data_fpath): +def mtuberculosis_quast_path(data_path): """Get path for mtuberculosis quast file""" - return str(data_fpath.joinpath("mtuberculosis", "quast.tsv")) + return str(data_path.joinpath("mtuberculosis", "quast.tsv")) @pytest.fixture() -def mtuberculosis_tbprofiler_path(data_fpath): +def mtuberculosis_tbprofiler_path(data_path): """Get path for mtuberculosis tbprofiler file""" - return str(data_fpath.joinpath("mtuberculosis", "tbprofiler.json")) + return str(data_path.joinpath("mtuberculosis", "tbprofiler.json")) diff --git a/tests/fixtures/saureus/__init__.py b/tests/fixtures/saureus/__init__.py index c28d9d9..6ce22e9 100644 --- a/tests/fixtures/saureus/__init__.py +++ b/tests/fixtures/saureus/__init__.py @@ -6,66 +6,66 @@ @pytest.fixture() -def saureus_analysis_meta_path(data_fpath): +def saureus_analysis_meta_path(data_path): """Get path for saureus meta file""" - return str(data_fpath.joinpath("saureus", "analysis_meta.json")) + return str(data_path.joinpath("saureus", "analysis_meta.json")) @pytest.fixture() -def saureus_bwa_path(data_fpath): +def saureus_bwa_path(data_path): """Get path for saureus bwa qc file""" - return str(data_fpath.joinpath("saureus", "bwa.qc")) + return str(data_path.joinpath("saureus", "bwa.qc")) @pytest.fixture() -def saureus_quast_path(data_fpath): +def saureus_quast_path(data_path): """Get path for saureus quast file""" - return str(data_fpath.joinpath("saureus", "quast.tsv")) + return str(data_path.joinpath("saureus", "quast.tsv")) @pytest.fixture() -def saureus_amrfinder_path(data_fpath): +def saureus_amrfinder_path(data_path): """Get path for saureus amrfinder file""" - return str(data_fpath.joinpath("saureus", "amrfinder.out")) + return str(data_path.joinpath("saureus", "amrfinder.out")) @pytest.fixture() -def saureus_resfinder_path(data_fpath): +def saureus_resfinder_path(data_path): """Get path for saureus resfinder file""" - return str(data_fpath.joinpath("saureus", "resfinder.json")) + return str(data_path.joinpath("saureus", "resfinder.json")) @pytest.fixture() -def saureus_resfinder_meta_path(data_fpath): +def saureus_resfinder_meta_path(data_path): """Get path for saureus resfinder meta file""" - return str(data_fpath.joinpath("saureus", "resfinder_meta.json")) + return str(data_path.joinpath("saureus", "resfinder_meta.json")) @pytest.fixture() -def saureus_virulencefinder_path(data_fpath): +def saureus_virulencefinder_path(data_path): """Get path for saureus virulencefinder file""" - return str(data_fpath.joinpath("saureus", "virulencefinder.json")) + return str(data_path.joinpath("saureus", "virulencefinder.json")) @pytest.fixture() -def saureus_virulencefinder_meta_path(data_fpath): +def saureus_virulencefinder_meta_path(data_path): """Get path for saureus virulencefinder meta file""" - return str(data_fpath.joinpath("saureus", "virulencefinder_meta.json")) + return str(data_path.joinpath("saureus", "virulencefinder_meta.json")) @pytest.fixture() -def saureus_mlst_path(data_fpath): +def saureus_mlst_path(data_path): """Get path for saureus mlst file""" - return str(data_fpath.joinpath("saureus", "mlst.json")) + return str(data_path.joinpath("saureus", "mlst.json")) @pytest.fixture() -def saureus_chewbbaca_path(data_fpath): +def saureus_chewbbaca_path(data_path): """Get path for saureus chewbbaca file""" - return str(data_fpath.joinpath("saureus", "chewbbaca.out")) + return str(data_path.joinpath("saureus", "chewbbaca.out")) @pytest.fixture() -def saureus_bracken_path(data_fpath): +def saureus_bracken_path(data_path): """Get path for saureus bracken file""" - return str(data_fpath.joinpath("saureus", "bracken.out")) + return str(data_path.joinpath("saureus", "bracken.out")) From 5eec581db4759e76862958f23dfcf6a87637fed6 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 2 Jan 2024 23:13:06 +0100 Subject: [PATCH 21/29] Update CHANGELOG.md --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64a0fd9..0ea65d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +## [Unreleased] + +### Added + +### Fixed + +### Changed + ## [0.3.0] ### Added From b90fb4dfb1c98df42be045fe0bcac042a792b645 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Wed, 3 Jan 2024 13:09:23 +0100 Subject: [PATCH 22/29] Fix parsers --- prp/parse/phenotype/mykrobe.py | 20 ++++--------- prp/parse/phenotype/resfinder.py | 8 +---- prp/parse/phenotype/tbprofiler.py | 7 ++--- prp/parse/phenotype/utils.py | 49 +------------------------------ prp/parse/typing.py | 4 +-- 5 files changed, 12 insertions(+), 76 deletions(-) diff --git a/prp/parse/phenotype/mykrobe.py b/prp/parse/phenotype/mykrobe.py index bdf7886..fc03013 100644 --- a/prp/parse/phenotype/mykrobe.py +++ b/prp/parse/phenotype/mykrobe.py @@ -7,7 +7,7 @@ from ...models.phenotype import PredictionSoftware as Software from ...models.phenotype import ResistanceGene, ResistanceVariant, VariantType from ...models.sample import MethodIndex -from .utils import is_prediction_result_empty, _default_amr_phenotype +from .utils import is_prediction_result_empty LOG = logging.getLogger(__name__) @@ -45,12 +45,9 @@ def _parse_mykrobe_amr_genes(mykrobe_result) -> Tuple[ResistanceGene, ...]: gene = ResistanceGene( gene_symbol=element_type["variants"].split("_")[0], - accession=None, depth=depth, - identity=None, coverage=coverage, drugs=[element_type["drug"].lower()], - phenotypes=[_default_amr_phenotype()], element_type=ElementType.AMR, element_subtype=ElementAmrSubtype.AMR, ) @@ -99,7 +96,7 @@ def _parse_mykrobe_amr_variants(mykrobe_result) -> Tuple[ResistanceVariant, ...] if not element_type["susceptibility"].upper() == "R": continue - if element_type["variants"] is not None: + if element_type["variants"] is None: continue try: @@ -107,25 +104,18 @@ def _parse_mykrobe_amr_variants(mykrobe_result) -> Tuple[ResistanceVariant, ...] except AttributeError: depth = None - var_info = element_type["variants"].split("-")[1] + var_info = element_type["variants"].split("-")[1].split(":")[0] _, ref_nt, alt_nt, position = get_mutation_type(var_info) var_nom = element_type["variants"].split("-")[0].split("_")[1] var_type, *_ = get_mutation_type(var_nom) variant = ResistanceVariant( variant_type=var_type, - genes=[element_type["variants"].split("_")[0]], - phenotypes=[_default_amr_phenotype()], + gene_symbol=element_type["variants"].split("_")[0], position=position, ref_nt=ref_nt, alt_nt=alt_nt, depth=depth, - ref_database=None, - ref_id=None, - type=None, change=var_nom, - nucleotide_change=None, - protein_change=None, - annotation=None, drugs=[element_type["drug"].lower()], ) results.append(variant) @@ -139,7 +129,7 @@ def parse_mykrobe_amr_pred( LOG.info("Parsing mykrobe prediction") resistance = ElementTypeResult( phenotypes=_get_mykrobe_amr_sr_profie(prediction), - genes=_parse_mykrobe_amr_genes(prediction), + genes=[], mutations=_parse_mykrobe_amr_variants(prediction), ) diff --git a/prp/parse/phenotype/resfinder.py b/prp/parse/phenotype/resfinder.py index 17f03d7..aa9c224 100644 --- a/prp/parse/phenotype/resfinder.py +++ b/prp/parse/phenotype/resfinder.py @@ -14,7 +14,6 @@ from ...models.phenotype import PredictionSoftware as Software from ...models.phenotype import ResistanceGene, ResistanceVariant, VariantType from ...models.sample import MethodIndex -from .utils import _default_resistance LOG = logging.getLogger(__name__) @@ -221,7 +220,7 @@ def _parse_resfinder_amr_genes( """Get resistance genes from resfinder result.""" results = [] if not "seq_regions" in resfinder_result: - return _default_resistance().genes + return [ResistanceGene()] for info in resfinder_result["seq_regions"].values(): # Get only acquired resistance genes @@ -327,7 +326,6 @@ def _parse_resfinder_amr_variants( ) -> Tuple[ResistanceVariant, ...]: """Get resistance genes from resfinder result.""" results = [] - igenes = [] for info in resfinder_result["seq_variations"].values(): # Get only variants from desired phenotypes if limit_to_phenotypes is not None: @@ -350,9 +348,6 @@ def _parse_resfinder_amr_variants( var_type = VariantType.DELETION else: raise ValueError("Output has no known mutation type") - if not "seq_regions" in info: - # igenes = _default_resistance().genes - igenes = [""] # get gene symbol and accession nr gene_symbol, _, gene_accnr = info["seq_regions"][0].split(";;") @@ -376,7 +371,6 @@ def _parse_resfinder_amr_variants( gene_symbol=gene_symbol, accession=gene_accnr, close_seq_name=gene_accnr, - genes=igenes, phenotypes=phenotype, position=info["ref_start_pos"], ref_nt=ref_nt, diff --git a/prp/parse/phenotype/tbprofiler.py b/prp/parse/phenotype/tbprofiler.py index 7fbc746..1a62021 100644 --- a/prp/parse/phenotype/tbprofiler.py +++ b/prp/parse/phenotype/tbprofiler.py @@ -7,7 +7,6 @@ from ...models.phenotype import PredictionSoftware as Software from ...models.phenotype import ResistanceVariant from ...models.sample import MethodIndex -from .utils import _default_variant, _default_amr_phenotype LOG = logging.getLogger(__name__) @@ -53,8 +52,8 @@ def _parse_tbprofiler_amr_variants(tbprofiler_result) -> Tuple[ResistanceVariant variant = ResistanceVariant( variant_type=var_type, - genes=[hit["gene"]], - phenotypes=[_default_amr_phenotype()], + gene_symbol=hit["gene"], + phenotypes=[], position=int(hit["genome_pos"]), ref_nt=hit["ref"], alt_nt=hit["alt"], @@ -69,7 +68,7 @@ def _parse_tbprofiler_amr_variants(tbprofiler_result) -> Tuple[ResistanceVariant results.append(variant) if not results: - results = _default_variant().mutations + results = [ResistanceVariant()] return results return results diff --git a/prp/parse/phenotype/utils.py b/prp/parse/phenotype/utils.py index 2b0a3b0..6ea7bd7 100644 --- a/prp/parse/phenotype/utils.py +++ b/prp/parse/phenotype/utils.py @@ -1,55 +1,8 @@ """Shared utility functions.""" -from ...models.phenotype import ElementTypeResult, ResistanceGene +from ...models.phenotype import ElementTypeResult from ...models.phenotype import ElementType, PhenotypeInfo -def _default_resistance() -> ElementTypeResult: - gene = ResistanceGene( - name=None, - virulence_category=None, - accession=None, - depth=None, - identity=None, - coverage=None, - ref_start_pos=None, - ref_end_pos=None, - ref_gene_length=None, - alignment_length=None, - ref_database=None, - phenotypes=[], - ref_id=None, - contig_id=None, - sequence_name=None, - ass_start_pos=None, - ass_end_pos=None, - strand=None, - element_type=None, - element_subtype=None, - target_length=None, - res_class=None, - res_subclass=None, - method=None, - close_seq_name=None, - ) - genes = [ - gene, - ] - return ElementTypeResult(phenotypes=[], genes=genes, mutations=[]) - - -def _default_variant() -> ElementTypeResult: - mutation = ResistanceGene( - variant_type=None, - genes=None, - phenotypes=[], - position=None, - ref_nt=None, - alt_nt=None, - depth=None, - ) - mutations = [mutation] - return ElementTypeResult(phenotypes=[], genes=[], mutations=mutations) - def _default_amr_phenotype() -> PhenotypeInfo: return PhenotypeInfo( type = ElementType.AMR, diff --git a/prp/parse/typing.py b/prp/parse/typing.py index 8346963..1790e29 100644 --- a/prp/parse/typing.py +++ b/prp/parse/typing.py @@ -176,8 +176,8 @@ def parse_virulencefinder_stx_typing(path: str) -> MethodIndex | None: vir_gene = parse_vir_gene(hit) gene = TypingResultGeneAllele(**vir_gene.model_dump()) pred_result = MethodIndex( - type=TypingMethod.STX, - software=Software.VIRULENCEFINDER, + type=TypingMethod.STX, + software=Software.VIRULENCEFINDER, result=gene ) return pred_result From 023c38520ca4cf158daec791df05631558bedfcb Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Wed, 3 Jan 2024 13:10:18 +0100 Subject: [PATCH 23/29] Remove genes from VariantBase --- prp/models/phenotype.py | 1 - 1 file changed, 1 deletion(-) diff --git a/prp/models/phenotype.py b/prp/models/phenotype.py index ab19a31..8f11814 100644 --- a/prp/models/phenotype.py +++ b/prp/models/phenotype.py @@ -144,7 +144,6 @@ class VariantBase(DatabaseReference): """Container for mutation information""" variant_type: VariantType - genes: List[str] position: int ref_nt: str alt_nt: str From aee230af3fc5b9e5e5ce8c53106a3ee88b6ad5b2 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Wed, 3 Jan 2024 13:23:55 +0100 Subject: [PATCH 24/29] Remove _parse_mykrobe_amr_genes --- prp/parse/phenotype/mykrobe.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/prp/parse/phenotype/mykrobe.py b/prp/parse/phenotype/mykrobe.py index fc03013..84d4e03 100644 --- a/prp/parse/phenotype/mykrobe.py +++ b/prp/parse/phenotype/mykrobe.py @@ -28,33 +28,6 @@ def _get_mykrobe_amr_sr_profie(mykrobe_result): return {"susceptible": list(susceptible), "resistant": list(resistant)} -def _parse_mykrobe_amr_genes(mykrobe_result) -> Tuple[ResistanceGene, ...]: - """Get resistance genes from mykrobe result.""" - results = [] - for element_type in mykrobe_result: - # skip non-resistance yeilding - if not element_type["susceptibility"].upper() == "R": - continue - - try: - depth = float(element_type["genes"].split(':')[-1]) - coverage = float(element_type["genes"].split(':')[-2]) - except AttributeError: - depth = None - coverage = None - - gene = ResistanceGene( - gene_symbol=element_type["variants"].split("_")[0], - depth=depth, - coverage=coverage, - drugs=[element_type["drug"].lower()], - element_type=ElementType.AMR, - element_subtype=ElementAmrSubtype.AMR, - ) - results.append(gene) - return results - - def get_mutation_type(var_nom: str) -> Tuple[VariantType, str, str, int]: """Extract mutation type from Mykrobe mutation description. From 6def2ee1978ef09046169f688e0c87155af40e5f Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Wed, 3 Jan 2024 16:30:24 +0100 Subject: [PATCH 25/29] Review fixes regarding mykrobe variant parser --- prp/parse/phenotype/mykrobe.py | 19 ++++++++++--------- prp/parse/typing.py | 6 +----- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/prp/parse/phenotype/mykrobe.py b/prp/parse/phenotype/mykrobe.py index 84d4e03..a924f02 100644 --- a/prp/parse/phenotype/mykrobe.py +++ b/prp/parse/phenotype/mykrobe.py @@ -3,9 +3,9 @@ import re from typing import Any, Dict, Tuple -from ...models.phenotype import ElementAmrSubtype, ElementType, ElementTypeResult +from ...models.phenotype import ElementType, ElementTypeResult from ...models.phenotype import PredictionSoftware as Software -from ...models.phenotype import ResistanceGene, ResistanceVariant, VariantType +from ...models.phenotype import ResistanceVariant, VariantType from ...models.sample import MethodIndex from .utils import is_prediction_result_empty @@ -72,23 +72,24 @@ def _parse_mykrobe_amr_variants(mykrobe_result) -> Tuple[ResistanceVariant, ...] if element_type["variants"] is None: continue - try: - depth = float(element_type["genes"].split(':')[-1]) - except AttributeError: - depth = None - var_info = element_type["variants"].split("-")[1].split(":")[0] _, ref_nt, alt_nt, position = get_mutation_type(var_info) var_nom = element_type["variants"].split("-")[0].split("_")[1] - var_type, *_ = get_mutation_type(var_nom) + var_type, ref_aa, alt_aa, _ = get_mutation_type(var_nom) variant = ResistanceVariant( variant_type=var_type, gene_symbol=element_type["variants"].split("_")[0], position=position, ref_nt=ref_nt, alt_nt=alt_nt, - depth=depth, + ref_aa=ref_aa if len(ref_aa)==1 and len(alt_aa)==1 else None, + alt_aa=alt_aa if len(ref_aa)==1 and len(alt_aa)==1 else None, + depth=float(element_type["variants"].split(':')[-1]), change=var_nom, + nucleotide_change="c." + var_info, + protein_change="p." + var_nom, + element_type=ElementType.AMR, + method=element_type["genotype_model"], drugs=[element_type["drug"].lower()], ) results.append(variant) diff --git a/prp/parse/typing.py b/prp/parse/typing.py index 1790e29..2ba46cb 100644 --- a/prp/parse/typing.py +++ b/prp/parse/typing.py @@ -139,11 +139,7 @@ def parse_mykrobe_lineage_results(pred_res: dict, method) -> TypingResultLineage main_lin = split_lin[0] sublin = lineage["lineage"] lin_idxs = lineage["lineage"].lstrip("lineage").split('.') - try: - coverage = float(lineage["genes"].split(':')[-2]) - except AttributeError: - coverage = None - lineages = [LineageInformation(lineage="lineage" + '.'.join(lin_idxs[:idx+1]), variant=lineage["variants"].split(':')[0], coverage=coverage) for idx in range(len(lin_idxs))] + lineages = [LineageInformation(lineage="lineage" + '.'.join(lin_idxs[:idx+1])) for idx in range(len(lin_idxs))] # cast to lineage object result_obj = TypingResultLineage( main_lin=main_lin, From 84f17124da636da656b351d0e268d53319e35ea9 Mon Sep 17 00:00:00 2001 From: mhkc Date: Thu, 4 Jan 2024 08:47:26 +0100 Subject: [PATCH 26/29] Restored unreleased version --- CHANGELOG.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ea65d0..2c2ea53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,5 @@ ## [Unreleased] -### Added - -### Fixed - -### Changed - -## [0.3.0] - ### Added - Pytest for Mycobacterium tuberculosis From 7e0770162b3129327b1868bbf0d454f4e8ff7714 Mon Sep 17 00:00:00 2001 From: mhkc Date: Thu, 4 Jan 2024 08:49:16 +0100 Subject: [PATCH 27/29] minor refactoring --- prp/cli.py | 2 +- prp/models/phenotype.py | 4 +- prp/parse/phenotype/mykrobe.py | 6 +-- prp/parse/phenotype/resfinder.py | 53 +--------------------- prp/parse/phenotype/utils.py | 62 +++++++++++++++++++++++--- prp/parse/phenotype/virulencefinder.py | 2 +- prp/parse/typing.py | 11 +++-- 7 files changed, 72 insertions(+), 68 deletions(-) diff --git a/prp/cli.py b/prp/cli.py index 9746af7..ff63198 100644 --- a/prp/cli.py +++ b/prp/cli.py @@ -2,9 +2,9 @@ import json import logging from typing import List -import pandas as pd import click +import pandas as pd from pydantic import TypeAdapter, ValidationError from .models.metadata import SoupType, SoupVersion diff --git a/prp/models/phenotype.py b/prp/models/phenotype.py index 8f11814..584b9ea 100644 --- a/prp/models/phenotype.py +++ b/prp/models/phenotype.py @@ -90,7 +90,7 @@ class GeneBase(BaseModel): coverage: Optional[float] = None ref_start_pos: Optional[int] = None ref_end_pos: Optional[int] = None - drugs: Optional[List[Union[Dict,str]]] = None + drugs: Optional[List[Union[Dict, str]]] = None ref_gene_length: Optional[int] = Field( default=None, alias="target_length", @@ -175,7 +175,7 @@ class VariantBase(DatabaseReference): nucleotide_change: Optional[str] = None protein_change: Optional[str] = None annotation: Optional[List[Dict]] = None - drugs: Optional[List[Union[Dict,str]]] = None + drugs: Optional[List[Union[Dict, str]]] = None class ResistanceVariant(VariantBase): diff --git a/prp/parse/phenotype/mykrobe.py b/prp/parse/phenotype/mykrobe.py index a924f02..cb04b06 100644 --- a/prp/parse/phenotype/mykrobe.py +++ b/prp/parse/phenotype/mykrobe.py @@ -82,9 +82,9 @@ def _parse_mykrobe_amr_variants(mykrobe_result) -> Tuple[ResistanceVariant, ...] position=position, ref_nt=ref_nt, alt_nt=alt_nt, - ref_aa=ref_aa if len(ref_aa)==1 and len(alt_aa)==1 else None, - alt_aa=alt_aa if len(ref_aa)==1 and len(alt_aa)==1 else None, - depth=float(element_type["variants"].split(':')[-1]), + ref_aa=ref_aa if len(ref_aa) == 1 and len(alt_aa) == 1 else None, + alt_aa=alt_aa if len(ref_aa) == 1 and len(alt_aa) == 1 else None, + depth=float(element_type["variants"].split(":")[-1]), change=var_nom, nucleotide_change="c." + var_info, protein_change="p." + var_nom, diff --git a/prp/parse/phenotype/resfinder.py b/prp/parse/phenotype/resfinder.py index aa9c224..34d9d79 100644 --- a/prp/parse/phenotype/resfinder.py +++ b/prp/parse/phenotype/resfinder.py @@ -14,6 +14,7 @@ from ...models.phenotype import PredictionSoftware as Software from ...models.phenotype import ResistanceGene, ResistanceVariant, VariantType from ...models.sample import MethodIndex +from .utils import format_nt_change, get_nt_change LOG = logging.getLogger(__name__) @@ -269,58 +270,6 @@ def _parse_resfinder_amr_genes( return results -def get_nt_change(ref_codon: str, alt_codon: str) -> Tuple[str, str]: - """Get nucleotide change from codons - - Ref: TCG, Alt: TTG => Tuple[C, T] - - :param ref_codon: Reference codeon - :type ref_codon: str - :param str: Alternatve codon - :type str: str - :return: Returns nucleotide changed from the reference. - :rtype: Tuple[str, str] - """ - ref_nt = "" - alt_nt = "" - for ref, alt in zip(ref_codon, alt_codon): - if not ref == alt: - ref_nt += ref - alt_nt += alt - return ref_nt.upper(), alt_nt.upper() - - -def format_nt_change( - ref: str, - alt: str, - var_type: VariantType, - start_pos: int, - end_pos: int = None, -) -> str: - """Format nucleotide change - - :param ref: Reference sequence - :type ref: str - :param alt: Alternate sequence - :type alt: str - :param pos: Position - :type pos: int - :param var_type: Type of change - :type var_type: VariantType - :return: Formatted nucleotide - :rtype: str - """ - fmt_change = "" - match var_type: - case VariantType.SUBSTITUTION: - f"g.{start_pos}{ref}>{alt}" - case VariantType.DELETION: - f"g.{start_pos}_{end_pos}del" - case VariantType.INSERTION: - f"g.{start_pos}_{end_pos}ins{alt}" - return fmt_change - - def _parse_resfinder_amr_variants( resfinder_result, limit_to_phenotypes=None ) -> Tuple[ResistanceVariant, ...]: diff --git a/prp/parse/phenotype/utils.py b/prp/parse/phenotype/utils.py index 6ea7bd7..88cd155 100644 --- a/prp/parse/phenotype/utils.py +++ b/prp/parse/phenotype/utils.py @@ -1,13 +1,13 @@ """Shared utility functions.""" -from ...models.phenotype import ElementTypeResult -from ...models.phenotype import ElementType, PhenotypeInfo +from typing import Tuple +from ...models.phenotype import ElementType, ElementTypeResult, PhenotypeInfo, VariantType def _default_amr_phenotype() -> PhenotypeInfo: return PhenotypeInfo( - type = ElementType.AMR, - group = ElementType.AMR, - name = ElementType.AMR, + type=ElementType.AMR, + group=ElementType.AMR, + name=ElementType.AMR, ) @@ -21,3 +21,55 @@ def is_prediction_result_empty(result: ElementTypeResult) -> bool: """ n_entries = len(result.genes) + len(result.mutations) return n_entries == 1 + + +def get_nt_change(ref_codon: str, alt_codon: str) -> Tuple[str, str]: + """Get nucleotide change from codons + + Ref: TCG, Alt: TTG => Tuple[C, T] + + :param ref_codon: Reference codeon + :type ref_codon: str + :param str: Alternatve codon + :type str: str + :return: Returns nucleotide changed from the reference. + :rtype: Tuple[str, str] + """ + ref_nt = "" + alt_nt = "" + for ref, alt in zip(ref_codon, alt_codon): + if not ref == alt: + ref_nt += ref + alt_nt += alt + return ref_nt.upper(), alt_nt.upper() + + +def format_nt_change( + ref: str, + alt: str, + var_type: VariantType, + start_pos: int, + end_pos: int = None, +) -> str: + """Format nucleotide change + + :param ref: Reference sequence + :type ref: str + :param alt: Alternate sequence + :type alt: str + :param pos: Position + :type pos: int + :param var_type: Type of change + :type var_type: VariantType + :return: Formatted nucleotide + :rtype: str + """ + fmt_change = "" + match var_type: + case VariantType.SUBSTITUTION: + f"g.{start_pos}{ref}>{alt}" + case VariantType.DELETION: + f"g.{start_pos}_{end_pos}del" + case VariantType.INSERTION: + f"g.{start_pos}_{end_pos}ins{alt}" + return fmt_change diff --git a/prp/parse/phenotype/virulencefinder.py b/prp/parse/phenotype/virulencefinder.py index 43e38df..e365ed4 100644 --- a/prp/parse/phenotype/virulencefinder.py +++ b/prp/parse/phenotype/virulencefinder.py @@ -67,7 +67,7 @@ def parse_virulencefinder_vir_pred(path: str) -> ElementTypeResult | None: :rtype: ElementTypeResult | None """ LOG.info("Parsing virulencefinder virulence prediction") - with open(path, 'rb') as inpt: + with open(path, "rb") as inpt: pred = json.load(inpt) if "virulencefinder" in pred: results: ElementTypeResult = _parse_virulencefinder_vir_results(pred) diff --git a/prp/parse/typing.py b/prp/parse/typing.py index 2ba46cb..bf9ee8e 100644 --- a/prp/parse/typing.py +++ b/prp/parse/typing.py @@ -135,11 +135,14 @@ def parse_mykrobe_lineage_results(pred_res: dict, method) -> TypingResultLineage for lineage in pred_res: if not lineage["susceptibility"].upper() == "R": continue - split_lin = lineage["lineage"].split('.') + split_lin = lineage["lineage"].split(".") main_lin = split_lin[0] sublin = lineage["lineage"] - lin_idxs = lineage["lineage"].lstrip("lineage").split('.') - lineages = [LineageInformation(lineage="lineage" + '.'.join(lin_idxs[:idx+1])) for idx in range(len(lin_idxs))] + lin_idxs = lineage["lineage"].lstrip("lineage").split(".") + lineages = [ + LineageInformation(lineage="lineage" + ".".join(lin_idxs[: idx + 1])) + for idx in range(len(lin_idxs)) + ] # cast to lineage object result_obj = TypingResultLineage( main_lin=main_lin, @@ -174,6 +177,6 @@ def parse_virulencefinder_stx_typing(path: str) -> MethodIndex | None: pred_result = MethodIndex( type=TypingMethod.STX, software=Software.VIRULENCEFINDER, - result=gene + result=gene, ) return pred_result From 2337a3d5f1ec468c486c4986976b0669d7fdf6b5 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Thu, 4 Jan 2024 11:39:07 +0100 Subject: [PATCH 28/29] Fix conflicts --- prp/parse/phenotype/mykrobe.py | 46 +++++++++++++++++-------------- prp/parse/phenotype/tbprofiler.py | 1 + 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/prp/parse/phenotype/mykrobe.py b/prp/parse/phenotype/mykrobe.py index cb04b06..24b1fe0 100644 --- a/prp/parse/phenotype/mykrobe.py +++ b/prp/parse/phenotype/mykrobe.py @@ -72,27 +72,31 @@ def _parse_mykrobe_amr_variants(mykrobe_result) -> Tuple[ResistanceVariant, ...] if element_type["variants"] is None: continue - var_info = element_type["variants"].split("-")[1].split(":")[0] - _, ref_nt, alt_nt, position = get_mutation_type(var_info) - var_nom = element_type["variants"].split("-")[0].split("_")[1] - var_type, ref_aa, alt_aa, _ = get_mutation_type(var_nom) - variant = ResistanceVariant( - variant_type=var_type, - gene_symbol=element_type["variants"].split("_")[0], - position=position, - ref_nt=ref_nt, - alt_nt=alt_nt, - ref_aa=ref_aa if len(ref_aa) == 1 and len(alt_aa) == 1 else None, - alt_aa=alt_aa if len(ref_aa) == 1 and len(alt_aa) == 1 else None, - depth=float(element_type["variants"].split(":")[-1]), - change=var_nom, - nucleotide_change="c." + var_info, - protein_change="p." + var_nom, - element_type=ElementType.AMR, - method=element_type["genotype_model"], - drugs=[element_type["drug"].lower()], - ) - results.append(variant) + variants = element_type["variants"].split(";") + for var in variants: + var_info = var.split("-")[1].split(":")[0] + _, ref_nt, alt_nt, position = get_mutation_type(var_info) + var_nom = var.split("-")[0].split("_")[1] + var_type, ref_aa, alt_aa, _ = get_mutation_type(var_nom) + variant = ResistanceVariant( + variant_type=var_type, + gene_symbol=var.split("_")[0], + position=position, + ref_nt=ref_nt, + alt_nt=alt_nt, + ref_aa=ref_aa if len(ref_aa) == 1 and len(alt_aa) == 1 else None, + alt_aa=alt_aa if len(ref_aa) == 1 and len(alt_aa) == 1 else None, + conf=float(var.split(":")[-1]), + alt_kmer_count=float(var.split(":")[-2]), + ref_kmer_count=float(var.split(":")[-3]), + change=var_nom, + nucleotide_change="c." + var_info, + protein_change="p." + var_nom, + element_type=ElementType.AMR, + method=element_type["genotype_model"], + drugs=[element_type["drug"].lower()], + ) + results.append(variant) return results diff --git a/prp/parse/phenotype/tbprofiler.py b/prp/parse/phenotype/tbprofiler.py index 1a62021..f98f413 100644 --- a/prp/parse/phenotype/tbprofiler.py +++ b/prp/parse/phenotype/tbprofiler.py @@ -58,6 +58,7 @@ def _parse_tbprofiler_amr_variants(tbprofiler_result) -> Tuple[ResistanceVariant ref_nt=hit["ref"], alt_nt=hit["alt"], depth=hit["depth"], + freq=float(hit["freq"]), ref_database=tbprofiler_result["db_version"]["name"], type=hit["type"], nucleotide_change=hit["nucleotide_change"], From 4850c8650edfac7b949c5c95a2a36bd95a886661 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Thu, 4 Jan 2024 11:17:07 +0100 Subject: [PATCH 29/29] Update models to handle freq, kmer counts & conf --- prp/models/phenotype.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/prp/models/phenotype.py b/prp/models/phenotype.py index 584b9ea..76c10e6 100644 --- a/prp/models/phenotype.py +++ b/prp/models/phenotype.py @@ -150,7 +150,11 @@ class VariantBase(DatabaseReference): ref_aa: Optional[str] = None alt_aa: Optional[str] = None # prediction info + conf: Optional[int] = None + alt_kmer_count: Optional[int] = None + ref_kmer_count: Optional[int] = None depth: Optional[float] = None + freq: Optional[float] = None contig_id: Optional[str] = None gene_symbol: Optional[str] = None sequence_name: Optional[str] = Field(