From cd5cd00f7842cb02f665903e360bfd8f6120e3b1 Mon Sep 17 00:00:00 2001 From: mhkc Date: Tue, 13 Aug 2024 09:49:13 +0200 Subject: [PATCH 01/18] Reorganised sample metadata in the output --- prp/cli.py | 17 +++++------ prp/models/metadata.py | 43 +++++++++++---------------- prp/models/sample.py | 13 +++++++-- prp/parse/metadata.py | 48 +++++++++++++++++++++++++++---- prp/parse/phenotype/tbprofiler.py | 4 +-- tests/conftest.py | 22 ++++++++------ 6 files changed, 95 insertions(+), 52 deletions(-) diff --git a/prp/cli.py b/prp/cli.py index 60383df..2fedcc7 100644 --- a/prp/cli.py +++ b/prp/cli.py @@ -166,14 +166,15 @@ def create_bonsai_input( ): # pylint: disable=too-many-arguments """Combine pipeline results into a standardized json output file.""" LOG.info("Start generating pipeline result json") + # Get basic sample object + sample_info, seq_info, pipeline_info = parse_run_info(run_metadata, process_metadata) results = { - "run_metadata": { - "run": parse_run_info(run_metadata), - "databases": get_database_info(process_metadata), - }, + "sequencing": seq_info, + "pipeline": pipeline_info, "qc": [], "typing_result": [], "element_type_result": [], + **sample_info # add sample_name & lims_id } # qc if quast: @@ -274,8 +275,8 @@ def create_bonsai_input( ) raise click.Abort() - # add mykrobe db version - results["run_metadata"]["databases"].append( + # add mykrobe db version to the list of softwares + results['pipeline'].softwares.append( SoupVersion( name="mykrobe-predictor", version=pred_res[0]["mykrobe_version"], @@ -316,7 +317,7 @@ def create_bonsai_input( type=SoupType.DB, ) ] - results["run_metadata"]["databases"].extend(db_info) + sw_list = results["pipeline"].softwares.extend(db_info) lin_res: MethodIndex = parse_tbprofiler_lineage_results(pred_res) results["typing_result"].append(lin_res) amr_res: MethodIndex = parse_tbprofiler_amr_pred(pred_res) @@ -361,7 +362,7 @@ def create_bonsai_input( sample_id=sample_id, schema_version=OUTPUT_SCHEMA_VERSION, **results ) except ValidationError as err: - click.secho("Input failed Validation", fg="red") + click.secho("Generated result failed validation", fg="red") click.secho(err) raise click.Abort LOG.info("Storing results to: %s", output) diff --git a/prp/models/metadata.py b/prp/models/metadata.py index 972258b..62a0785 100644 --- a/prp/models/metadata.py +++ b/prp/models/metadata.py @@ -1,6 +1,7 @@ """Metadata models.""" from datetime import datetime from enum import Enum +from typing import Optional from pydantic import BaseModel, Field @@ -22,35 +23,25 @@ class SoupVersion(BaseModel): type: SoupType -class RunInformation(RWModel): - """Store information on a run how the run was conducted.""" +class SequencingInfo(RWModel): + """Information on the sample was sequenced.""" + + run_id: str + platform: str + instrument: Optional[str] + method: dict[str, str] = {} + date: datetime | None + + +class PipelineInfo(RWModel): + """Information on the sample was analysed.""" pipeline: str version: str commit: str - analysis_profile: str = Field( - ..., - alias="analysisProfile", - description="The analysis profile used when starting the pipeline", - ) - configuration_files: list[str] = Field( - ..., alias="configurationFiles", description="Nextflow configuration used" - ) + analysis_profile: str + configuration_files: list[str] workflow_name: str - sample_name: str - lims_id: str - sequencing_run: str - sequencing_platform: str - sequencing_type: str command: str - date: datetime - - -SoupVersions = list[SoupVersion] - - -class RunMetadata(BaseModel): - """Run metadata""" - - run: RunInformation - databases: SoupVersions + softwares: list[SoupVersion] + date: datetime \ No newline at end of file diff --git a/prp/models/sample.py b/prp/models/sample.py index 7d4f3a6..57328f4 100644 --- a/prp/models/sample.py +++ b/prp/models/sample.py @@ -5,7 +5,7 @@ from pydantic import Field from .base import RWModel -from .metadata import RunMetadata +from .metadata import SequencingInfo, PipelineInfo from .phenotype import ( AMRMethodIndex, StressMethodIndex, @@ -44,8 +44,17 @@ class SampleBase(RWModel): """Base datamodel for sample data structure""" sample_id: str = Field(..., alias="sampleId", min_length=3, max_length=100) - run_metadata: RunMetadata = Field(..., alias="runMetadata") + sample_name: str + lims_id: str + + # metadata + sequencing: SequencingInfo + pipeline: PipelineInfo + + # quality qc: list[QcMethodIndex] = Field(...) + + # species identification species_prediction: list[SppMethodIndex] = Field(..., alias="speciesPrediction") diff --git a/prp/parse/metadata.py b/prp/parse/metadata.py index 3dce1b8..4fb9e6c 100644 --- a/prp/parse/metadata.py +++ b/prp/parse/metadata.py @@ -1,10 +1,11 @@ """Parse metadata passed to pipeline.""" import json import logging +from datetime import datetime from Bio import SeqIO -from ..models.metadata import RunInformation, SoupVersion +from ..models.metadata import PipelineInfo, SequencingInfo, SoupVersion LOG = logging.getLogger(__name__) @@ -29,8 +30,22 @@ def get_database_info(process_metadata: list[str]) -> list[SoupVersion]: return db_info -def parse_run_info(run_metadata: str) -> RunInformation: - """Parse nextflow analysis information +def parse_sequence_date_from_run_id(run_id: str) -> datetime | None: + err_msg = 'Unrecognized format of run_id, sequence time cant be determined' + if '_' not in run_id: + LOG.warning(err_msg) + return None + # parse date string + try: + seq_date = datetime.strptime(run_id.split('_')[0], r'%y%m%d') + except ValueError: + LOG.warning(err_msg) + seq_date = None + return seq_date + + +def parse_run_info(run_metadata: str, process_metadata: list[str]) -> tuple[SequencingInfo, PipelineInfo]: + """Parse nextflow analysis information. :param run_metadata: Nextflow analysis metadata in json format. :type run_metadata: str @@ -39,8 +54,31 @@ def parse_run_info(run_metadata: str) -> RunInformation: """ LOG.info("Parse run metadata.") with open(run_metadata, encoding="utf-8") as jsonfile: - run_info = RunInformation(**json.load(jsonfile)) - return run_info + run_info = json.load(jsonfile) + # get sample info + sample_info = {"sample_name": run_info["sample_name"], "lims_id": run_info["lims_id"]} + # get sequencing info + seq_info = SequencingInfo( + run_id=run_info['sequencing_run'], + platform=run_info['sequencing_platform'], + instrument=None, + method={'method': run_info['sequencing_type']}, + date=parse_sequence_date_from_run_id(run_info['sequencing_run']) + ) + # get pipeline info + soup_versions = get_database_info(process_metadata) + pipeline_info = PipelineInfo( + pipeline=run_info['pipeline'], + version=run_info['version'], + commit=run_info['commit'], + analysis_profile=run_info['analysis_profile'], + configuration_files=run_info['configuration_files'], + workflow_name=run_info['commit'], + command=run_info['command'], + softwares=soup_versions, + date=datetime.fromisoformat(run_info['date']), + ) + return sample_info, seq_info, pipeline_info def get_gb_genome_version(fasta_path: str) -> str: diff --git a/prp/parse/phenotype/tbprofiler.py b/prp/parse/phenotype/tbprofiler.py index 731b82b..5b63d78 100644 --- a/prp/parse/phenotype/tbprofiler.py +++ b/prp/parse/phenotype/tbprofiler.py @@ -2,7 +2,7 @@ import logging from typing import Any -from ...models.metadata import SoupVersions +from ...models.metadata import SoupVersion from ...models.phenotype import ( AMRMethodIndex, AnnotationType, @@ -157,7 +157,7 @@ def parse_drug_resistance_info(drugs: list[dict[str, str]]) -> list[PhenotypeInf def parse_tbprofiler_amr_pred( prediction: dict[str, Any] -) -> tuple[SoupVersions, ElementTypeResult]: +) -> tuple[tuple[SoupVersion, ...], ElementTypeResult]: """Parse tbprofiler resistance prediction results.""" LOG.info("Parsing tbprofiler prediction") resistance = ElementTypeResult( diff --git a/tests/conftest.py b/tests/conftest.py index 9d217e4..e0234f1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,7 +2,7 @@ from .fixtures import * from prp.models import PipelineResult -from prp.models.metadata import RunMetadata, RunInformation +from prp.models.metadata import PipelineInfo, SequencingInfo from datetime import datetime @@ -10,26 +10,30 @@ def simple_pipeline_result(): """Return a basic analysis result.""" - mock_run_info = RunInformation( + mock_pipeline_info = PipelineInfo( pipeline="Jasen", version="0.0.1", commit="commit-hash", analysis_profile="", configuration_files=[], workflow_name="workflow-name", - sample_name="sample-name", - lims_id="limbs id", - sequencing_run="run-id", - sequencing_platform="sequencing plattform", - sequencing_type="illumina", command="nextflow run ...", + softwares=[], date=datetime.now(), ) + seq_info = SequencingInfo( + run_id="run-id", + platform="sequencing plattform", + instrument="illumina", + date=datetime.now() + ) # add run into to metadata model - metadata = RunMetadata(run=mock_run_info, databases=[]) return PipelineResult( sample_id="mock-sample-001", - run_metadata=metadata, + sample_name="sample-name", + lims_id="limbs id", + sequencing=seq_info, + pipeline=mock_pipeline_info, qc=[], species_prediction=[], typing_result=[], From 06ca3e712c43131a2f50e9143e5dcc5a5063ca6f Mon Sep 17 00:00:00 2001 From: mhkc Date: Tue, 13 Aug 2024 09:51:10 +0200 Subject: [PATCH 02/18] Updated CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 84e010a..e7ade80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ ### Changed + - Updated sample metadata in the output format. RunMetadata was split into two fields, one for sequencing and one for pipeline information. Lims id and sample name are now included in the SampleBase object. + ## [0.9.3] ### Added From c09e7745883c17edfb6009d787ef27eebc7773dc Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Fri, 16 Aug 2024 11:17:02 +0200 Subject: [PATCH 03/18] Update argument flags --- prp/cli.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/prp/cli.py b/prp/cli.py index 2fedcc7..9b16fee 100644 --- a/prp/cli.py +++ b/prp/cli.py @@ -131,6 +131,7 @@ def cli(silent, debug): multiple=True, help="Genome annotations bed format", ) +@click.option("--vcf", type=click.Path(), help="VCF filepath") @click.option("--snv-vcf", type=click.Path(), help="VCF with SNV variants") @click.option("--sv-vcf", type=click.Path(), help="VCF with SV variants") @click.option("--symlink-dir", type=click.Path(), help="Dir for symlink") @@ -158,6 +159,7 @@ def create_bonsai_input( reference_genome_fasta, reference_genome_gff, genome_annotation, + vcf, snv_vcf, sv_vcf, symlink_dir, @@ -349,11 +351,12 @@ def create_bonsai_input( {"name": f"annotation_{i}", "file": Path(annot).name} for i, annot in enumerate(genome_annotation, start=1) ] - for vcf in [sv_vcf, snv_vcf]: - if vcf: - vcf = _get_path(symlink_dir, "vcf", vcf) - name = "SNV" if vcf == sv_vcf else "SV" - annotations.append({"name": name, "file": vcf}) + vcf_dict = {"SV": sv_vcf, "SNV": snv_vcf, "VCF": vcf} + for name in vcf_dict: + vcf_filepath = vcf_dict[name] + if vcf_filepath: + vcf_filepath = _get_path(symlink_dir, "vcf", vcf_filepath) + annotations.append({"name": name, "file": vcf_filepath}) # store annotation results results["genome_annotation"] = annotations if annotations else None @@ -546,21 +549,21 @@ def annotate_delly(vcf, bed, output): @cli.command() -@click.option("-n", "--name", type=str, help="Track name.") +@click.option("-n", "--track-name", type=str, help="Track name.") @click.option( "-a", "--annotation-file", type=click.Path(exists=True), help="Path to file." ) @click.option( - "-r", - "--result", + "-b", + "--bonsai-input-file", required=True, type=click.Path(writable=True), - help="PRP result.", + help="PRP result file (used as bonsai input).", ) @click.argument("output", type=click.File("w")) -def add_igv_annotation_track(name, annotation_file, result, output): +def add_igv_annotation_track(track_name, annotation_file, bonsai_input_file, output): """Add IGV annotation track to result.""" - with open(result, "r", encoding="utf-8") as jfile: + with open(bonsai_input_file, "r", encoding="utf-8") as jfile: result_obj = PipelineResult(**json.load(jfile)) # Get genome annotation @@ -569,10 +572,10 @@ def add_igv_annotation_track(name, annotation_file, result, output): ): track_info = [] else: - track_info = result.genome_annotation + track_info = bonsai_input_file.genome_annotation # add new tracks - track_info.append({"name": name, "file": annotation_file}) + track_info.append({"name": track_name, "file": annotation_file}) # update data model upd_result = result_obj.model_copy(update={"genome_annotation": track_info}) From 7044a0a2db3229880e996e90cb26ce2271cadcf9 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Fri, 16 Aug 2024 11:30:30 +0200 Subject: [PATCH 04/18] Fix pytest bugs re updated arg flags --- tests/test_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index e72b4dc..caa1b95 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -232,11 +232,11 @@ def test_add_igv_annotation_track(mtuberculosis_snv_vcf_path, simple_pipeline_re output_fname = "after_update.json" args = [ - "--name", + "--track-name", "snv", "--annotation-file", mtuberculosis_snv_vcf_path, - "--result", + "--bonsai-input-file", result_fname, output_fname, ] From 0609e117ad882baf1dd8274e4f9847feb1fb21de Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Fri, 16 Aug 2024 15:08:15 +0200 Subject: [PATCH 05/18] Fix genome_annotation appending bug --- prp/cli.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/prp/cli.py b/prp/cli.py index 9b16fee..a265225 100644 --- a/prp/cli.py +++ b/prp/cli.py @@ -562,17 +562,18 @@ def annotate_delly(vcf, bed, output): ) @click.argument("output", type=click.File("w")) def add_igv_annotation_track(track_name, annotation_file, bonsai_input_file, output): - """Add IGV annotation track to result.""" + """Add IGV annotation track to result (bonsai input file).""" with open(bonsai_input_file, "r", encoding="utf-8") as jfile: result_obj = PipelineResult(**json.load(jfile)) + print(result_obj.genome_annotation) # Get genome annotation - if result_obj.genome_annotation is None or isinstance( + if not isinstance( result_obj.genome_annotation, list ): track_info = [] else: - track_info = bonsai_input_file.genome_annotation + track_info = result_obj.genome_annotation # add new tracks track_info.append({"name": track_name, "file": annotation_file}) From 208621fcdd733d67dabf7007bfd87e16ec1f2969 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Mon, 19 Aug 2024 14:46:53 +0200 Subject: [PATCH 06/18] Split SV and SNV from tbprofiler output --- prp/parse/phenotype/tbprofiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prp/parse/phenotype/tbprofiler.py b/prp/parse/phenotype/tbprofiler.py index 5b63d78..ae6e189 100644 --- a/prp/parse/phenotype/tbprofiler.py +++ b/prp/parse/phenotype/tbprofiler.py @@ -74,7 +74,7 @@ def _parse_tbprofiler_amr_variants(predictions) -> tuple[TbProfilerVariant, ...] for hit in predictions.get(result_type, []): ref_nt = hit["ref"] alt_nt = hit["alt"] - var_type = VariantType.SNV + var_type = VariantType.SNV if not bool(hit["sv"]) else VariantType.SV if len(ref_nt) == len(alt_nt): var_sub_type = VariantSubType.SUBSTITUTION elif len(ref_nt) > len(alt_nt): From c8780c20b18a22e6d5a87d672a4734f01074b1b8 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Mon, 19 Aug 2024 15:04:17 +0200 Subject: [PATCH 07/18] Remove debugging line --- prp/cli.py | 1 - 1 file changed, 1 deletion(-) diff --git a/prp/cli.py b/prp/cli.py index a265225..e746ca3 100644 --- a/prp/cli.py +++ b/prp/cli.py @@ -565,7 +565,6 @@ def add_igv_annotation_track(track_name, annotation_file, bonsai_input_file, out """Add IGV annotation track to result (bonsai input file).""" with open(bonsai_input_file, "r", encoding="utf-8") as jfile: result_obj = PipelineResult(**json.load(jfile)) - print(result_obj.genome_annotation) # Get genome annotation if not isinstance( From eff17f0e9d8363179827be0683a1c2b3e55f7a10 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Tue, 20 Aug 2024 18:22:40 +0200 Subject: [PATCH 08/18] Update annotate_delly to extract SVs from vcf --- prp/cli.py | 33 ++++++++++++++++--- prp/models/phenotype.py | 4 +-- prp/parse/variant.py | 71 +++++++++++++++++++++++++++++++---------- 3 files changed, 85 insertions(+), 23 deletions(-) diff --git a/prp/cli.py b/prp/cli.py index e746ca3..c6e17ff 100644 --- a/prp/cli.py +++ b/prp/cli.py @@ -491,6 +491,13 @@ def create_qc_result(sample_id, bam, bed, baits, reference, cpus, output) -> Non @cli.command() @click.option("-v", "--vcf", type=click.Path(exists=True), help="VCF file") @click.option("-b", "--bed", type=click.Path(exists=True), help="BED file") +@click.option( + "-b", + "--bonsai-input-file", + required=True, + type=click.Path(writable=True), + help="PRP result file (used as bonsai input).", +) @click.option( "-o", "--output", @@ -498,7 +505,7 @@ def create_qc_result(sample_id, bam, bed, baits, reference, cpus, output) -> Non type=click.Path(writable=True), help="output filepath", ) -def annotate_delly(vcf, bed, output): +def annotate_delly(vcf, bed, bonsai_input_file, output): """Annotate Delly SV varinats with genes in BED file.""" output = Path(output) # load annotation @@ -542,9 +549,21 @@ def annotate_delly(vcf, bed, output): ) # open vcf writer - writer = Writer(output.absolute(), vcf_obj) - annotate_delly_variants(writer, vcf_obj, annotation, annot_chrom=annot_chrom) + with open(bonsai_input_file, "r", encoding="utf-8") as jfile: + result_obj = PipelineResult(**json.load(jfile)) + + # Get genome annotation + if not isinstance( + result_obj.sv_variants, list + ): + sv_variants_info = [] + else: + sv_variants_info = result_obj.sv_variants + sv_variants = annotate_delly_variants(vcf_obj, annotation, annot_chrom=annot_chrom) + sv_variants_info.extend(sv_variants) + upd_result = result_obj.model_copy(update={"sv_variants": sv_variants_info}) + output.write(upd_result.model_dump_json(indent=3)) click.secho(f"Wrote annotated delly variants to {output}", fg="green") @@ -560,7 +579,13 @@ def annotate_delly(vcf, bed, output): type=click.Path(writable=True), help="PRP result file (used as bonsai input).", ) -@click.argument("output", type=click.File("w")) +@click.option( + "-o", + "--output", + required=True, + type=click.Path(writable=True), + help="output filepath", +) def add_igv_annotation_track(track_name, annotation_file, bonsai_input_file, output): """Add IGV annotation track to result (bonsai input file).""" with open(bonsai_input_file, "r", encoding="utf-8") as jfile: diff --git a/prp/models/phenotype.py b/prp/models/phenotype.py index 0aa8e9d..2e33f4d 100644 --- a/prp/models/phenotype.py +++ b/prp/models/phenotype.py @@ -215,7 +215,7 @@ class VariantBase(RWModel): phenotypes: list[PhenotypeInfo] = [] # variant location - reference_sequence: str = Field( + reference_sequence: str | None = Field( ..., description="Reference sequence such as chromosome, gene or contig id.", alias="gene_symbol", @@ -251,7 +251,7 @@ class MykrobeVariant(VariantBase): class TbProfilerVariant(VariantBase): """Container for TbProfiler variant information""" - variant_effect: str + variant_effect: str | None = None hgvs_nt_change: Optional[str] = Field(..., description="DNA change in HGVS format") hgvs_aa_change: Optional[str] = Field( ..., description="Protein change in HGVS format" diff --git a/prp/parse/variant.py b/prp/parse/variant.py index 5c6f531..c326b8d 100644 --- a/prp/parse/variant.py +++ b/prp/parse/variant.py @@ -6,6 +6,7 @@ from cyvcf2 import VCF, Variant from prp.models.phenotype import VariantBase, VariantType +from prp.models.phenotype import TbProfilerVariant, VariantBase, VariantType LOG = logging.getLogger(__name__) SOURCE_PATTERN = r"##source=(.+)\n" @@ -80,27 +81,63 @@ def load_variants(variant_file: str) -> list[VariantBase]: return variants -def annotate_delly_variants(writer, vcf, annotation, annot_chrom=False): +def annotate_delly_variants(vcf, annotation, annot_chrom=False): """Annotate a variant called by Delly.""" + results = [] + var_id = 1 locus_tag = 3 gene_symbol = 4 # annotate variant n_annotated = 0 for variant in vcf: - # update chromosome - if annot_chrom: - variant.CHROM = annotation.contigs[0] - # get genes intersecting with SV - genes = [ - {"gene_symbol": gene[gene_symbol], "locus_tag": gene[locus_tag]} - for gene in annotation.fetch(variant.CHROM, variant.start, variant.end) - ] - # add overlapping genes to INFO - if len(genes) > 0: - variant.INFO["gene"] = ",".join([gene["gene_symbol"] for gene in genes]) - variant.INFO["locus_tag"] = ",".join([gene["locus_tag"] for gene in genes]) - n_annotated += 1 - - # write variant - writer.write_record(variant) + var_sub_type = variant.INFO.get('TYPE').upper() if variant.INFO.get('TYPE') else variant.INFO.get('SVTYPE') + if var_sub_type == "DEL" or var_sub_type == "INS": + # update chromosome + if annot_chrom: + variant.CHROM = annotation.contigs[0] + # get genes intersecting with SV + genes = [ + {"gene_symbol": gene[gene_symbol], "locus_tag": gene[locus_tag]} + for gene in annotation.fetch(variant.CHROM, variant.start, variant.end) + ] + # add overlapping genes to INFO + if len(genes) > 0: + variant.INFO["gene"] = ",".join([gene["gene_symbol"] for gene in genes]) + variant.INFO["locus_tag"] = ",".join([gene["locus_tag"] for gene in genes]) + n_annotated += 1 + + if len(variant.FILTERS) == 0: + passed_qc = None + elif "PASS" in variant.FILTERS: + passed_qc = True + else: + passed_qc = False + print(variant.CHROM, type(variant.start), type(variant.end)) + print(genes) + var = TbProfilerVariant( + id=var_id, + variant_type=VariantType.SV, + variant_subtype=var_sub_type, + phenotypes=[], + reference_sequence=variant.INFO.get("gene"), + accession=variant.INFO.get("locus_tag"), + start=variant.start, + end=variant.end + len(variant.ALT), + ref_nt=variant.REF, + alt_nt=variant.ALT[0], + variant_effect=None, + hgvs_nt_change=None, + hgvs_aa_change=None, + depth=variant.INFO.get("DP"), + frequency=variant.INFO.get("AF"), + method="delly", + passed_qc=passed_qc, + ) + var_id += 1 # increment variant id + results.append(var) + + variants = sorted( + results, key=lambda entry: (entry.reference_sequence, entry.start) + ) LOG.info("Annotated %d SV variants", n_annotated) + return variants From 5fe3ded0205d4c8ea1763ed51d15e951ddd81fa3 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Wed, 21 Aug 2024 16:28:24 +0200 Subject: [PATCH 09/18] Fix variant sorting and output bug --- prp/cli.py | 10 ++++------ prp/parse/variant.py | 4 +--- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/prp/cli.py b/prp/cli.py index c6e17ff..1fee0b3 100644 --- a/prp/cli.py +++ b/prp/cli.py @@ -502,13 +502,11 @@ def create_qc_result(sample_id, bam, bed, baits, reference, cpus, output) -> Non "-o", "--output", required=True, - type=click.Path(writable=True), + type=click.File("w"), help="output filepath", ) def annotate_delly(vcf, bed, bonsai_input_file, output): """Annotate Delly SV varinats with genes in BED file.""" - output = Path(output) - # load annotation if bed is not None: annotation = pysam.TabixFile(bed, parser=pysam.asTuple()) else: @@ -564,7 +562,7 @@ def annotate_delly(vcf, bed, bonsai_input_file, output): sv_variants_info.extend(sv_variants) upd_result = result_obj.model_copy(update={"sv_variants": sv_variants_info}) output.write(upd_result.model_dump_json(indent=3)) - click.secho(f"Wrote annotated delly variants to {output}", fg="green") + click.secho(f"Wrote annotated delly variants to {output.name}", fg="green") @cli.command() @@ -583,7 +581,7 @@ def annotate_delly(vcf, bed, bonsai_input_file, output): "-o", "--output", required=True, - type=click.Path(writable=True), + type=click.File("w"), help="output filepath", ) def add_igv_annotation_track(track_name, annotation_file, bonsai_input_file, output): @@ -608,4 +606,4 @@ def add_igv_annotation_track(track_name, annotation_file, bonsai_input_file, out # overwrite result output.write(upd_result.model_dump_json(indent=3)) - click.secho(f"Wrote updated result to {output}", fg="green") + click.secho(f"Wrote updated result to {output.name}", fg="green") diff --git a/prp/parse/variant.py b/prp/parse/variant.py index c326b8d..31de4a9 100644 --- a/prp/parse/variant.py +++ b/prp/parse/variant.py @@ -112,8 +112,6 @@ def annotate_delly_variants(vcf, annotation, annot_chrom=False): passed_qc = True else: passed_qc = False - print(variant.CHROM, type(variant.start), type(variant.end)) - print(genes) var = TbProfilerVariant( id=var_id, variant_type=VariantType.SV, @@ -137,7 +135,7 @@ def annotate_delly_variants(vcf, annotation, annot_chrom=False): results.append(var) variants = sorted( - results, key=lambda entry: (entry.reference_sequence, entry.start) + results, key=lambda entry: entry.start ) LOG.info("Annotated %d SV variants", n_annotated) return variants From e29e9610096139122eb8ce3be92be6202f2e0d7c Mon Sep 17 00:00:00 2001 From: mhkc Date: Thu, 22 Aug 2024 08:57:29 +0200 Subject: [PATCH 10/18] Renamed duplicated option name in annoatate_delly --- prp/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prp/cli.py b/prp/cli.py index 1fee0b3..e4e3cb5 100644 --- a/prp/cli.py +++ b/prp/cli.py @@ -492,7 +492,7 @@ def create_qc_result(sample_id, bam, bed, baits, reference, cpus, output) -> Non @click.option("-v", "--vcf", type=click.Path(exists=True), help="VCF file") @click.option("-b", "--bed", type=click.Path(exists=True), help="BED file") @click.option( - "-b", + "-i", "--bonsai-input-file", required=True, type=click.Path(writable=True), From 1ebe440208546121dc92085a75e84e8af4878f25 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Thu, 22 Aug 2024 11:22:41 +0200 Subject: [PATCH 11/18] Revert annotate_delly changes --- prp/cli.py | 29 ++++--------------- prp/parse/variant.py | 69 +++++++++++--------------------------------- 2 files changed, 23 insertions(+), 75 deletions(-) diff --git a/prp/cli.py b/prp/cli.py index e4e3cb5..6ffbce9 100644 --- a/prp/cli.py +++ b/prp/cli.py @@ -491,22 +491,17 @@ def create_qc_result(sample_id, bam, bed, baits, reference, cpus, output) -> Non @cli.command() @click.option("-v", "--vcf", type=click.Path(exists=True), help="VCF file") @click.option("-b", "--bed", type=click.Path(exists=True), help="BED file") -@click.option( - "-i", - "--bonsai-input-file", - required=True, - type=click.Path(writable=True), - help="PRP result file (used as bonsai input).", -) @click.option( "-o", "--output", required=True, - type=click.File("w"), + type=click.Path(writable=True), help="output filepath", ) -def annotate_delly(vcf, bed, bonsai_input_file, output): +def annotate_delly(vcf, bed, output): """Annotate Delly SV varinats with genes in BED file.""" + output = Path(output) + # load annotation if bed is not None: annotation = pysam.TabixFile(bed, parser=pysam.asTuple()) else: @@ -547,21 +542,9 @@ def annotate_delly(vcf, bed, bonsai_input_file, output): ) # open vcf writer - with open(bonsai_input_file, "r", encoding="utf-8") as jfile: - result_obj = PipelineResult(**json.load(jfile)) - - # Get genome annotation - if not isinstance( - result_obj.sv_variants, list - ): - sv_variants_info = [] - else: - sv_variants_info = result_obj.sv_variants + writer = Writer(output.absolute(), vcf_obj) + annotate_delly_variants(writer, vcf_obj, annotation, annot_chrom=annot_chrom) - sv_variants = annotate_delly_variants(vcf_obj, annotation, annot_chrom=annot_chrom) - sv_variants_info.extend(sv_variants) - upd_result = result_obj.model_copy(update={"sv_variants": sv_variants_info}) - output.write(upd_result.model_dump_json(indent=3)) click.secho(f"Wrote annotated delly variants to {output.name}", fg="green") diff --git a/prp/parse/variant.py b/prp/parse/variant.py index 31de4a9..8de5986 100644 --- a/prp/parse/variant.py +++ b/prp/parse/variant.py @@ -81,61 +81,26 @@ def load_variants(variant_file: str) -> list[VariantBase]: return variants -def annotate_delly_variants(vcf, annotation, annot_chrom=False): +def annotate_delly_variants(writer, vcf, annotation, annot_chrom=False): """Annotate a variant called by Delly.""" - results = [] - var_id = 1 locus_tag = 3 gene_symbol = 4 # annotate variant n_annotated = 0 for variant in vcf: - var_sub_type = variant.INFO.get('TYPE').upper() if variant.INFO.get('TYPE') else variant.INFO.get('SVTYPE') - if var_sub_type == "DEL" or var_sub_type == "INS": - # update chromosome - if annot_chrom: - variant.CHROM = annotation.contigs[0] - # get genes intersecting with SV - genes = [ - {"gene_symbol": gene[gene_symbol], "locus_tag": gene[locus_tag]} - for gene in annotation.fetch(variant.CHROM, variant.start, variant.end) - ] - # add overlapping genes to INFO - if len(genes) > 0: - variant.INFO["gene"] = ",".join([gene["gene_symbol"] for gene in genes]) - variant.INFO["locus_tag"] = ",".join([gene["locus_tag"] for gene in genes]) - n_annotated += 1 - - if len(variant.FILTERS) == 0: - passed_qc = None - elif "PASS" in variant.FILTERS: - passed_qc = True - else: - passed_qc = False - var = TbProfilerVariant( - id=var_id, - variant_type=VariantType.SV, - variant_subtype=var_sub_type, - phenotypes=[], - reference_sequence=variant.INFO.get("gene"), - accession=variant.INFO.get("locus_tag"), - start=variant.start, - end=variant.end + len(variant.ALT), - ref_nt=variant.REF, - alt_nt=variant.ALT[0], - variant_effect=None, - hgvs_nt_change=None, - hgvs_aa_change=None, - depth=variant.INFO.get("DP"), - frequency=variant.INFO.get("AF"), - method="delly", - passed_qc=passed_qc, - ) - var_id += 1 # increment variant id - results.append(var) - - variants = sorted( - results, key=lambda entry: entry.start - ) - LOG.info("Annotated %d SV variants", n_annotated) - return variants + # update chromosome + if annot_chrom: + variant.CHROM = annotation.contigs[0] + # get genes intersecting with SV + genes = [ + {"gene_symbol": gene[gene_symbol], "locus_tag": gene[locus_tag]} + for gene in annotation.fetch(variant.CHROM, variant.start, variant.end) + ] + # add overlapping genes to INFO + if len(genes) > 0: + variant.INFO["gene"] = ",".join([gene["gene_symbol"] for gene in genes]) + variant.INFO["locus_tag"] = ",".join([gene["locus_tag"] for gene in genes]) + n_annotated += 1 + + # write variant + writer.write_record(variant) From 0fd848776ee909ab6b13344f775c7324c3798545 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Thu, 22 Aug 2024 17:01:36 +0200 Subject: [PATCH 12/18] Update filtering of vcf file --- prp/cli.py | 7 ++-- prp/parse/variant.py | 80 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 68 insertions(+), 19 deletions(-) diff --git a/prp/cli.py b/prp/cli.py index 6ffbce9..e4976c3 100644 --- a/prp/cli.py +++ b/prp/cli.py @@ -327,10 +327,13 @@ def create_bonsai_input( # parse SNV and SV variants. if snv_vcf: - results["snv_variants"] = load_variants(snv_vcf) + results["snv_variants"] = load_variants(snv_vcf)["snv_variants"] if sv_vcf: - results["sv_variants"] = load_variants(sv_vcf) + results["sv_variants"] = load_variants(sv_vcf)["sv_variants"] + + if vcf: + results.update(load_variants(vcf)) # entries for reference genome and read mapping if all([bam, reference_genome_fasta, reference_genome_gff]): diff --git a/prp/parse/variant.py b/prp/parse/variant.py index 8de5986..7612880 100644 --- a/prp/parse/variant.py +++ b/prp/parse/variant.py @@ -12,6 +12,27 @@ SOURCE_PATTERN = r"##source=(.+)\n" +def _filter_variants(variant_list): + # Initialize the results dictionary + filetered_variants = { + 'sv_variants': [], + 'mnv_variants': [], + 'snv_variants': [] + } + + # Iterate through each variant in the list + for variant in variant_list: + variant_type = dict(variant).get('variant_type') # Extract the variant_type + # Append the variant to the appropriate key in the results dictionary + if variant_type == "SV": + filetered_variants['sv_variants'].append(variant) + elif variant_type == "MNV": + filetered_variants['mnv_variants'].append(variant) + elif variant_type == "SNV": + filetered_variants['snv_variants'].append(variant) + return filetered_variants + + def _get_variant_type(variant) -> VariantType: """Parse variant type.""" match variant.var_type: @@ -19,13 +40,30 @@ def _get_variant_type(variant) -> VariantType: var_type = VariantType.SNV case "mnp": var_type = VariantType.MNV + case "indel": + var_type = VariantType.SV case _: var_type = VariantType(variant.var_type.upper()) return var_type +def _get_variant_subtype(ref_base, alt_base): + # Define purines and pyrimidines + purines = {'A', 'G'} + pyrimidines = {'C', 'T'} + + # Check for transition substitution + if (ref_base in purines and alt_base in purines) or (ref_base in pyrimidines and alt_base in pyrimidines): + return "TS" + + # If it's not a transition, it must be a transversion + return "TV" + + def parse_variant(variant: Variant, var_id: int, caller: str | None = None): """Parse variant info from VCF row.""" + + var_objs = [] # check if variant passed qc filtering if len(variant.FILTERS) == 0: passed_qc = None @@ -36,20 +74,29 @@ def parse_variant(variant: Variant, var_id: int, caller: str | None = None): var_type: VariantType = _get_variant_type(variant) - var_obj = VariantBase( - id=var_id, - variant_type=var_type, - variant_subtype=variant.var_subtype.upper(), - gene_symbol=variant.CHROM, - start=variant.start, - end=variant.end, - ref_nt=variant.REF, - alt_nt=variant.ALT[0], # haploid - method=variant.INFO.get("SVMETHOD", caller), - confidence=variant.QUAL, - passed_qc=passed_qc, - ) - return var_obj + for alt_idx, alt_var in enumerate(variant.ALT): + possible_minority_var = False + var_subtype = variant.var_subtype.upper() + if var_subtype == "UNKNOWN": + var_subtype = _get_variant_subtype(variant.REF, alt_var) + possible_minority_var = True + var_obj = VariantBase( + id=var_id, + variant_type=var_type, + variant_subtype=var_subtype, + gene_symbol=variant.CHROM, + start=variant.start, + end=variant.end, + ref_nt=variant.REF, + alt_nt=alt_var, + frequency=variant.INFO.get("AF") if type(variant.INFO.get("AF")) != tuple else variant.INFO.get("AF")[alt_idx], + depth=variant.INFO.get("DP") if type(variant.INFO.get("DP")) != tuple else variant.INFO.get("DP")[alt_idx], + method=variant.INFO.get("SVMETHOD", caller), + confidence=variant.QUAL, + passed_qc=passed_qc, + ) + var_objs.append(var_obj) + return var_objs def _get_variant_caller(vcf_obj: VCF) -> str | None: @@ -76,9 +123,8 @@ def load_variants(variant_file: str) -> list[VariantBase]: # parse header from vcf file variants = [] for var_id, variant in enumerate(vcf_obj, start=1): - variants.append(parse_variant(variant, var_id=var_id, caller=variant_caller)) - - return variants + variants.extend(parse_variant(variant, var_id=var_id, caller=variant_caller)) + return _filter_variants(variants) def annotate_delly_variants(writer, vcf, annotation, annot_chrom=False): From ff103304413b3716181c13a7b02df983fbae2de5 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Thu, 22 Aug 2024 22:52:31 +0200 Subject: [PATCH 13/18] Update var_type determination --- prp/parse/phenotype/mykrobe.py | 28 ++++++++++++++++------------ prp/parse/phenotype/tbprofiler.py | 7 +++++++ 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/prp/parse/phenotype/mykrobe.py b/prp/parse/phenotype/mykrobe.py index 6afee60..fbd0895 100644 --- a/prp/parse/phenotype/mykrobe.py +++ b/prp/parse/phenotype/mykrobe.py @@ -45,36 +45,40 @@ def get_mutation_type(var_nom: str) -> tuple[str, Union[VariantSubType, str, int :param var_nom: Mykrobe mutation description :type var_nom: str - :return: Return variant type, ref_codon, alt_codont and position + :return: Return variant type, ref_nt, alt_ntt and position :rtype: dict[str, Union[VariantSubType, str, int]] """ mut_type = None - ref_codon = None - alt_codon = None + ref_nt = None + alt_nt = None position = None try: ref_idx = re.search(r"\d", var_nom, 1).start() alt_idx = re.search(r"\d(?=[^\d]*$)", var_nom).start() + 1 except AttributeError: - return mut_type, ref_codon, alt_codon, position + return mut_type, ref_nt, alt_nt, position - ref_codon = var_nom[:ref_idx] - alt_codon = var_nom[alt_idx:] + ref_nt = var_nom[:ref_idx] + alt_nt = var_nom[alt_idx:] position = int(var_nom[ref_idx:alt_idx]) - if len(ref_codon) > len(alt_codon): + var_len = abs(len(ref_nt) - len(alt_nt)) + if var_len >= 50: var_type = VariantType.SV + elif 1 < var_len < 50: + var_type = VariantType.INDEL + else: + var_type = VariantType.SNV + if len(ref_nt) > len(alt_nt): var_sub_type = VariantSubType.DELETION - elif len(ref_codon) < len(alt_codon): - var_type = VariantType.SV + elif len(ref_nt) < len(alt_nt): var_sub_type = VariantSubType.INSERTION else: - var_type = VariantType.SNV var_sub_type = VariantSubType.SUBSTITUTION return { "type": var_type, "subtype": var_sub_type, - "ref": ref_codon, - "alt": alt_codon, + "ref": ref_nt, + "alt": alt_nt, "pos": position, } diff --git a/prp/parse/phenotype/tbprofiler.py b/prp/parse/phenotype/tbprofiler.py index ae6e189..fe2073c 100644 --- a/prp/parse/phenotype/tbprofiler.py +++ b/prp/parse/phenotype/tbprofiler.py @@ -75,6 +75,13 @@ def _parse_tbprofiler_amr_variants(predictions) -> tuple[TbProfilerVariant, ...] ref_nt = hit["ref"] alt_nt = hit["alt"] var_type = VariantType.SNV if not bool(hit["sv"]) else VariantType.SV + var_len = abs(len(ref_nt) - len(alt_nt)) + if var_len >= 50 or bool(hit["sv"]): + var_type = VariantType.SV + elif 1 < var_len < 50: + var_type = VariantType.INDEL + else: + var_type = VariantType.SNV if len(ref_nt) == len(alt_nt): var_sub_type = VariantSubType.SUBSTITUTION elif len(ref_nt) > len(alt_nt): From f3493c544d594e79d1423de05c30deeb6f905c54 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Thu, 22 Aug 2024 22:54:46 +0200 Subject: [PATCH 14/18] Add indel_variants to result json --- prp/models/phenotype.py | 1 + prp/models/sample.py | 1 + prp/parse/variant.py | 8 ++++---- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/prp/models/phenotype.py b/prp/models/phenotype.py index 2e33f4d..2496d87 100644 --- a/prp/models/phenotype.py +++ b/prp/models/phenotype.py @@ -31,6 +31,7 @@ class VariantType(str, Enum): SNV = "SNV" MNV = "MNV" SV = "SV" + INDEL = "INDEL" STR = "STR" diff --git a/prp/models/sample.py b/prp/models/sample.py index 57328f4..ecdd8e2 100644 --- a/prp/models/sample.py +++ b/prp/models/sample.py @@ -90,6 +90,7 @@ class PipelineResult(SampleBase): # optional variant info snv_variants: Optional[list[VariantBase]] = None sv_variants: Optional[list[VariantBase]] = None + indel_variants: Optional[list[VariantBase]] = None # optional alignment info reference_genome: Optional[ReferenceGenome] = None read_mapping: Optional[str] = None diff --git a/prp/parse/variant.py b/prp/parse/variant.py index 7612880..d74f181 100644 --- a/prp/parse/variant.py +++ b/prp/parse/variant.py @@ -16,7 +16,7 @@ def _filter_variants(variant_list): # Initialize the results dictionary filetered_variants = { 'sv_variants': [], - 'mnv_variants': [], + 'indel_variants': [], 'snv_variants': [] } @@ -26,8 +26,8 @@ def _filter_variants(variant_list): # Append the variant to the appropriate key in the results dictionary if variant_type == "SV": filetered_variants['sv_variants'].append(variant) - elif variant_type == "MNV": - filetered_variants['mnv_variants'].append(variant) + elif variant_type == "INDEL": + filetered_variants['indel_variants'].append(variant) elif variant_type == "SNV": filetered_variants['snv_variants'].append(variant) return filetered_variants @@ -41,7 +41,7 @@ def _get_variant_type(variant) -> VariantType: case "mnp": var_type = VariantType.MNV case "indel": - var_type = VariantType.SV + var_type = VariantType.INDEL case _: var_type = VariantType(variant.var_type.upper()) return var_type From 86cf8333ddd71473de48471f95ff849bdfabf306 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Fri, 23 Aug 2024 15:26:56 +0200 Subject: [PATCH 15/18] Fix pytest errors --- tests/parse/test_variant.py | 4 ++-- tests/test_cli.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/parse/test_variant.py b/tests/parse/test_variant.py index 96e77b7..f0a19b6 100644 --- a/tests/parse/test_variant.py +++ b/tests/parse/test_variant.py @@ -6,11 +6,11 @@ def test_parse_sv_variants(mtuberculosis_sv_vcf_path): """Test loading of varians.""" variants = load_variants(mtuberculosis_sv_vcf_path) - assert len(variants) == 2 + assert len(variants) == 3 def test_parse_snv_variants(mtuberculosis_snv_vcf_path): """Test loading of varians.""" variants = load_variants(mtuberculosis_snv_vcf_path) - assert len(variants) == 2 + assert len(variants) == 3 diff --git a/tests/test_cli.py b/tests/test_cli.py index caa1b95..2888b10 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -238,6 +238,7 @@ def test_add_igv_annotation_track(mtuberculosis_snv_vcf_path, simple_pipeline_re mtuberculosis_snv_vcf_path, "--bonsai-input-file", result_fname, + "--output", output_fname, ] result = runner.invoke(add_igv_annotation_track, args) From a001a264243768c0f5618f60ccc9604effadecf8 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Sat, 24 Aug 2024 02:11:15 +0200 Subject: [PATCH 16/18] Update CHANGELOG --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e7ade80..f3a5fe5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,12 +5,18 @@ - Added flag to set verbosity level. - Validate TbProfiler schema version. - Added CLI command for adding IGV annotation tracks + - Added `indel_variants` to json result and ability to filter vcf into various categories ### Fixed + - Fixed genome_annotation appending bug + - Fixed variant sorting as `reference_sequence` can be None + ### Changed - Updated sample metadata in the output format. RunMetadata was split into two fields, one for sequencing and one for pipeline information. Lims id and sample name are now included in the SampleBase object. + - Split SV and SNV from tbprofiler output + - Update annotate_delly to extract SVs from vcf ## [0.9.3] From 735c3f151c3f3737f784625dabeb8c635707cc31 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Sat, 24 Aug 2024 02:50:35 +0200 Subject: [PATCH 17/18] Update README documentation --- README.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f7808f7..419b495 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ prp --help ``` -### Use the method help argument for information regarding the input for each of prp's methods (`create-bonsai-input`, `create-cdm-input`, `create-qc-result`, `print-schema`, `validate`) +### Use the method help argument for information regarding the input for each of prp's methods (`add-igv-annotation-track`, `annotate-delly`, `create-bonsai-input`, `create-cdm-input`, `create-qc-result`, `print-schema`, `rerun-bonsai-input`, `validate`) ``` prp --help ``` @@ -34,3 +34,18 @@ prp create-cdm-input -q QUAST_FILENAME -c CGMLST_FILE -p POSTALIGNQC_FILE [--cor ``` prp create-qc-result -i SAMPLE_ID --b BAM_FILE [-e BED_FILE] [-a BAITS_FILE] -r REFERENCE_FILE [-c CPUS] -o OUTPUT_FILE [-h] ``` + +### Rerun bonsai input creation for all samples +``` +prp rerun-bonsai-input -i INPUT_DIR -j JASEN_DIR -s SYMLINK_DIR -o OUTPUT_DIR +``` + +### Add IGV annotation track to result +``` +prp add-igv-annotation-track -n, TRACK_NAME -a, ANNOTATION_FILE -b, BONSAI_INPUT_FILE +``` + +### Validate output format of result json file +``` +prp validate -o OUTPUT +``` From 3186b2cd7875dc8c6428344699f4599a262c37dd Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Wed, 28 Aug 2024 11:01:37 +0200 Subject: [PATCH 18/18] Update README --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 419b495..38d1b43 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ prp --help ### Create bonsai input from pipeline data ``` -prp create-bonsai-input -i SAMPLE_ID -u RUN_METADATA_FILE -q QUAST_FILENAME -d PROCESS_METADATA_FILE -k KRAKEN_FILE -a AMRFINDER_FILE -m MLST_FILE -c CGMLST_FILE -v VIRULENCEFINDER_FILE -r RESFINDER_FILE -p POSTALIGNQC_FILE -k MYKROBE_FILE -t TBPROFILER_FILE [--correct_alleles] -o OUTPUT_FILE [-h] +prp create-bonsai-input -i SAMPLE_ID -u RUN_METADATA_FILE -q QUAST_FILENAME -d PROCESS_METADATA_FILE -k KRAKEN_FILE -a AMRFINDER_FILE -m MLST_FILE -c CGMLST_FILE -v VIRULENCEFINDER_FILE -r RESFINDER_FILE -p POSTALIGNQC_FILE -k MYKROBE_FILE -t TBPROFILER_FILE --vcf VCF_FILE [--snv-vcf SNV_VCF_FILE] [--sv-vcf SV_VCF_FILE] [--symlink-dir SYMLINK_DIR] [--correct_alleles] -o OUTPUT_FILE [-h] ``` ### Create CDM input from pipeline data @@ -37,15 +37,15 @@ prp create-qc-result -i SAMPLE_ID --b BAM_FILE [-e BED_FILE] [-a BAITS_FILE] -r ### Rerun bonsai input creation for all samples ``` -prp rerun-bonsai-input -i INPUT_DIR -j JASEN_DIR -s SYMLINK_DIR -o OUTPUT_DIR +prp rerun-bonsai-input -i INPUT_DIR -j JASEN_DIR -s SYMLINK_DIR -o OUTPUT_DIR -o OUTPUT_FILE [-h] ``` ### Add IGV annotation track to result ``` -prp add-igv-annotation-track -n, TRACK_NAME -a, ANNOTATION_FILE -b, BONSAI_INPUT_FILE +prp add-igv-annotation-track -n TRACK_NAME -a ANNOTATION_FILE -b BONSAI_INPUT_FILE -o OUTPUT_FILE [-h] ``` ### Validate output format of result json file ``` -prp validate -o OUTPUT +prp validate -o OUTPUT_FILE [-h] ```