Skip to content

Commit

Permalink
Merge pull request #483 from genomic-medicine-sweden/fp_cnvkit_html
Browse files Browse the repository at this point in the history
feat: Added an FP-flag to the html report
  • Loading branch information
monikaBrandt authored Sep 23, 2024
2 parents 0c4a350 + ef5d439 commit d92c538
Show file tree
Hide file tree
Showing 12 changed files with 1,095 additions and 39 deletions.
13 changes: 8 additions & 5 deletions .tests/units/test_cnv_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def test_create_tsv_report(self):
cnv = tempfile.mkdtemp() + "/tcnv.txt"
out_additional_only = open(tempfile.mkdtemp() + "/out_additional_only.txt", "w")
out_tsv_chrom_arms = tempfile.mkdtemp() + "/cnv_chromosome_arms.tsv"
out_vcf_filename = tempfile.mkdtemp() + "/out_vcf.vcf"
create_tsv_report(
[".tests/units/vcf/test.cnv1.vcf"],
[".tests/units/vcf/test.cnv2.vcf"],
Expand All @@ -27,6 +28,7 @@ def test_create_tsv_report(self):
cnv,
out_additional_only,
out_tsv_chrom_arms,
out_vcf_filename,
1.5,
0.5,
0.3,
Expand All @@ -39,6 +41,7 @@ def test_create_tsv_report(self):
0.2,
0.2,
0.5,
15000000,
)

@dataclass
Expand All @@ -53,23 +56,23 @@ class TestCase:
),
TestCase(
name="variant 1",
expected=("FGFR1", "chr8", "34370199-43930232", "cnvkit", "0.01", "8.59")
expected=("FGFR1", "chr8", "34370199-43930232", "cnvkit", "0.01", "8.59", "-")
),
TestCase(
name="variant 2",
expected=("FGFR1,MYC", "chr8", "35008818-146144253", "gatk", "0.01", "7.01")
expected=("FGFR1,MYC", "chr8", "35008818-146144253", "gatk", "0.01", "7.01", "-")
),
TestCase(
name="variant 3",
expected=("MYC", "chr8", "46689525-146144003", "cnvkit", "0.09", "5.06")
expected=("MYC", "chr8", "46689525-146144003", "cnvkit", "0.09", "5.06", "-")
),
TestCase(
name="small deletion",
expected=("CDKN2A,CDKN2B", "chr9", "21968207-22008972", "small_deletion", "NA", "-0.28")
expected=("CDKN2A,CDKN2B", "chr9", "21968207-22008972", "small_deletion", "NA", "-0.28", "-")
),
TestCase(
name="small amplification",
expected=("MYCN", "chr2", "16968207-17008972", "small_amplification", "NA", "7.29")
expected=("MYCN", "chr2", "16968207-17008972", "small_amplification", "NA", "7.29", "-")
),
]

Expand Down
24 changes: 15 additions & 9 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,20 +86,20 @@ cnv_html_report:
Also warnings of baseline skewness and detection of polyploidy in the sample.
path: "cnv_sv/svdb_query/{sample}_{type}.{tc_method}.cnv_loh_genes_all.cnv_chromosome_arms.tsv"


cnv_tsv_report:
amp_cn_limit: 6.0
baseline_fraction_limit: 0.2
del_1p19q_cn_limit: 1.4
del_1p19q_chr_arm_fraction: 0.3
chr_arm_fraction: 0.3
del_chr_arm_cn_limit: 1.4
amp_chr_arm_cn_limit: 2.5
normal_baf_lower_limit: 0.4
normal_baf_upper_limit: 0.6
amp_chr_arm_cn_limit: 2.6
normal_baf_lower_limit: 0.3
normal_baf_upper_limit: 0.7
normal_cn_lower_limit: 1.7
normal_cn_upper_limit: 2.25
polyploidy_fraction_limit: 0.2
max_cnv_fp_size: 15000000

fastp_pe:
container: "docker://hydragenetics/fastp:0.20.1"
Expand Down Expand Up @@ -204,12 +204,18 @@ manta_run_workflow_t:

merge_cnv_json:
filtered_cnv_vcfs:
- cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.cnv_amp_genes.filter.cnv_hard_filter_amp.vcf.gz
- cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.cnv_loh_genes_all.filter.cnv_hard_filter_loh.vcf.gz
germline_vcf: snv_indels/bcbio_variation_recall_ensemble/{sample}_{type}.ensembled.vep_annotated.filter.germline.exclude.blacklist.vcf.gz
- cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.cnv_amp_genes.filter.cnv_hard_filter_amp.fp_tag.vcf.gz
- cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.cnv_loh_genes_all.filter.cnv_hard_filter_loh.fp_tag.annotate_fp.vcf.gz
filtered_cnv_vcfs_tbi:
- cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.cnv_amp_genes.filter.cnv_hard_filter_amp.fp_tag.vcf.gz.tbi
- cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.cnv_loh_genes_all.filter.cnv_hard_filter_loh.fp_tag.annotate_fp.vcf.gz.tbi
unfiltered_cnv_vcfs:
- cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.cnv_amp_genes.vcf.gz
- cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.cnv_loh_genes_all.vcf.gz
- cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.cnv_amp_genes.fp_tag.vcf.gz
- cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.cnv_loh_genes_all.fp_tag.vcf.gz
unfiltered_cnv_vcfs_tbi:
- cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.cnv_amp_genes.fp_tag.vcf.gz.tbi
- cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.cnv_loh_genes_all.fp_tag.vcf.gz.tbi
germline_vcf: snv_indels/bcbio_variation_recall_ensemble/{sample}_{type}.ensembled.vep_annotated.filter.germline.exclude.blacklist.vcf.gz

mosdepth:
container: "docker://hydragenetics/mosdepth:0.3.2"
Expand Down
2 changes: 1 addition & 1 deletion docs/softwares.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ The CNVkit and GATK CNV caller often miss small deletions of four exons or small
---

## cnv_tsv_report
Collect all CNV calls into an excel friendly text file. Adds potential 1p19q calls. See further [CNV tsv report info](dna_cnvs.md#cnv-report).
Collect all CNV calls into an excel friendly text file. Adds potential 1p19q calls. Also add a FP flag for variants called by CNVkit that in GATK CNV does not have any signal in that region. See further [CNV tsv report info](dna_cnvs.md#cnv-report).

### :snake: Rule

Expand Down
54 changes: 51 additions & 3 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -619,15 +619,25 @@ module reports:
config


use rule * from reports exclude all as reports_*
use rule * from reports exclude all, merge_cnv_json as reports_*


use rule cnv_html_report from reports as reports_cnv_html_report with:
input:
json=workflow.get_rule("reports_cnv_html_report").input.json,
html_template=workflow.get_rule("reports_cnv_html_report").input.html_template,
js_files=workflow.get_rule("reports_cnv_html_report").input.js_files,
css_files=workflow.get_rule("reports_cnv_html_report").input.css_files,
js_files=[
workflow.get_rule("reports_cnv_html_report").input.js_files[0],
workflow.get_rule("reports_cnv_html_report").input.js_files[1],
workflow.get_rule("reports_cnv_html_report").input.js_files[2],
workflow.source_path("templates/cnv_html_report/03-results-table.js"),
workflow.get_rule("reports_cnv_html_report").input.js_files[4],
workflow.get_rule("reports_cnv_html_report").input.js_files[5],
],
css_files=[
workflow.get_rule("reports_cnv_html_report").input.css_files[0],
workflow.source_path("templates/cnv_html_report/style.css"),
],
tc_file=get_tc_file,
extra_table_files=[t["path"] for t in config.get("cnv_html_report", {}).get("extra_tables", [])],
params:
Expand All @@ -638,6 +648,44 @@ use rule cnv_html_report from reports as reports_cnv_html_report with:
tc_method=lambda wildcards: wildcards.tc_method,


rule merge_cnv_json:
input:
json=reports.get_json_for_merge_cnv_json,
fai=config.get("reference", {}).get("fai", ""),
annotation_bed=config.get("merge_cnv_json", {}).get("annotations", []),
germline_vcf=config.get("merge_cnv_json", {}).get("germline_vcf", []),
filtered_cnv_vcfs=config.get("merge_cnv_json", {}).get("filtered_cnv_vcfs", []),
filtered_cnv_vcfs_tbi=config.get("merge_cnv_json", {}).get("filtered_cnv_vcfs_tbi", []),
cnv_vcfs=config.get("merge_cnv_json", {}).get("unfiltered_cnv_vcfs", []),
cnv_vcfs_tbi=config.get("merge_cnv_json", {}).get("unfiltered_cnv_vcfs_tbi", []),
cytobands=config.get("merge_cnv_json", {}).get("cytobands", []),
output:
json=temp("reports/cnv_html_report/{sample}_{type}.{tc_method}.merged.json"),
params:
skip_chromosomes=config.get("reference", {}).get("skip_chrs", []),
cytobands=config.get("cnv_html_report", {}).get("cytobands", False),
log:
"reports/cnv_html_report/{sample}_{type}.{tc_method}.merged.json.log",
benchmark:
repeat(
"reports/cnv_html_report/{sample}_{type}.{tc_method}.merged.json.benchmark.tsv",
config.get("merge_cnv_json", {}).get("benchmark_repeats", 1),
)
threads: config.get("merge_cnv_json", {}).get("threads", config["default_resources"]["threads"])
resources:
mem_mb=config.get("merge_cnv_json", {}).get("mem_mb", config["default_resources"]["mem_mb"]),
mem_per_cpu=config.get("merge_cnv_json", {}).get("mem_per_cpu", config["default_resources"]["mem_per_cpu"]),
partition=config.get("merge_cnv_json", {}).get("partition", config["default_resources"]["partition"]),
threads=config.get("merge_cnv_json", {}).get("threads", config["default_resources"]["threads"]),
time=config.get("merge_cnv_json", {}).get("time", config["default_resources"]["time"]),
container:
config.get("merge_cnv_json", {}).get("container", config["default_container"])
message:
"{rule}: Merge CNV JSON data for {wildcards.sample}_{wildcards.type}"
script:
"scripts/merge_cnv_json.py"


module misc:
snakefile:
get_module_snakefile(config, "hydra-genetics/misc", path="workflow/Snakefile", tag="v0.2.0")
Expand Down
38 changes: 34 additions & 4 deletions workflow/rules/cnv_tsv_report.smk
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,19 @@ rule cnv_tsv_report:
deletions="cnv_sv/call_small_cnv_deletions/{sample}_{type}.deletions.tsv",
gatk_cnr="cnv_sv/gatk_denoise_read_counts/{sample}_{type}.clean.denoisedCR.tsv",
org_vcfs=[
"cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.cnv_amp_genes.vcf",
"cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.{tag}.vcf",
"cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.cnv_amp_genes.fp_tag.vcf",
"cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.{tag}.fp_tag.vcf",
],
tc_file=get_tc_file,
vcfs=[
"cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.cnv_amp_genes.filter.cnv_hard_filter_amp.vcf",
"cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.{tag}.filter.cnv_hard_filter_loh.vcf",
"cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.cnv_amp_genes.filter.cnv_hard_filter_amp.fp_tag.vcf",
"cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.{tag}.filter.cnv_hard_filter_loh.fp_tag.vcf",
],
output:
tsv=temp("cnv_sv/svdb_query/{sample}_{type}.{tc_method}.{tag}.cnv_report.tsv"),
tsv_additional_only=temp("cnv_sv/svdb_query/{sample}_{type}.{tc_method}.{tag}.cnv_additional_variants_only.tsv"),
tsv_chrom_arms=temp("cnv_sv/svdb_query/{sample}_{type}.{tc_method}.{tag}.cnv_chromosome_arms.tsv"),
vcf_del="cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.{tag}.filter.cnv_hard_filter_loh.fp_tag.annotate_fp.vcf",
params:
amp_chr_arm_cn_limit=config.get("cnv_tsv_report", {}).get("amp_chr_arm_cn_limit", ""),
baseline_fraction_limit=config.get("cnv_tsv_report", {}).get("baseline_fraction_limit", ""),
Expand All @@ -31,6 +32,7 @@ rule cnv_tsv_report:
del_chr_arm_cn_limit=config.get("cnv_tsv_report", {}).get("del_chr_arm_cn_limit", ""),
del_1p19q_cn_limit=config.get("cnv_tsv_report", {}).get("del_1p19q_cn_limit", ""),
del_1p19q_chr_arm_fraction=config.get("cnv_tsv_report", {}).get("del_1p19q_chr_arm_fraction", ""),
max_cnv_fp_size=config.get("cnv_tsv_report", {}).get("max_cnv_fp_size", ""),
normal_cn_lower_limit=config.get("cnv_tsv_report", {}).get("normal_cn_lower_limit", ""),
normal_cn_upper_limit=config.get("cnv_tsv_report", {}).get("normal_cn_upper_limit", ""),
normal_baf_lower_limit=config.get("cnv_tsv_report", {}).get("normal_baf_lower_limit", ""),
Expand All @@ -57,3 +59,31 @@ rule cnv_tsv_report:
"{rule}: Convert cnv vcf to a tsv file: {output.tsv}"
script:
"../scripts/cnv_report.py"


rule cnv_add_fp_header:
input:
vcf="cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.{post_fix}.vcf.gz",
vcf_tbi="cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.{post_fix}.vcf.gz.tbi",
output:
vcf="cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.{post_fix}.fp_tag.vcf",
log:
"cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.{post_fix}.fp_tag.vcf.log",
benchmark:
repeat(
"cnv_sv/svdb_query/{sample}_{type}.{tc_method}.svdb_query.annotate_cnv.{post_fix}.fp_tag.vcf.benchmark.tsv",
config.get("cnv_add_fp_header", {}).get("benchmark_repeats", 1),
)
threads: config.get("cnv_add_fp_header", {}).get("threads", config["default_resources"]["threads"])
resources:
mem_mb=config.get("cnv_add_fp_header", {}).get("mem_mb", config["default_resources"]["mem_mb"]),
mem_per_cpu=config.get("cnv_add_fp_header", {}).get("mem_per_cpu", config["default_resources"]["mem_per_cpu"]),
partition=config.get("cnv_add_fp_header", {}).get("partition", config["default_resources"]["partition"]),
threads=config.get("cnv_add_fp_header", {}).get("threads", config["default_resources"]["threads"]),
time=config.get("cnv_add_fp_header", {}).get("time", config["default_resources"]["time"]),
container:
config.get("cnv_add_fp_header", {}).get("container", config["default_container"])
message:
"{rule}: Add FP_FLAG to cnv vcf header in {output.vcf}"
script:
"../scripts/cnv_add_fp_header.py"
3 changes: 3 additions & 0 deletions workflow/schemas/config.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ properties:
del_chr_arm_cn_limit:
type: number
description: Upper limit of copy number to be included in calculation of chr_arm_fraction for deletions
max_cnv_fp_size:
type: number
description: Max size of cnvkit cnv to be tested for false positive
normal_cn_lower_limit:
type: number
description: Lower limit of copy number to be counted as a normal segment
Expand Down
4 changes: 2 additions & 2 deletions workflow/schemas/rules.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ properties:
properties:
vcf:
type: string
description: Vcf with pileup informations for all positions in input bed file
description: Vcf with pileup information for all positions in input bed file

call_small_cnv_amplifications:
type: object
Expand All @@ -50,7 +50,7 @@ properties:
properties:
amplifications:
type: string
description: Text file with the called amplifictions
description: Text file with the called amplifications

call_small_cnv_deletions:
type: object
Expand Down
18 changes: 18 additions & 0 deletions workflow/scripts/cnv_add_fp_header.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from pysam import VariantFile


def add_fp_header(in_vcf_filename, out_vcf_filename):
variants = VariantFile(in_vcf_filename)
header = variants.header
header.add_meta(
'INFO', items=[('ID', "FP_FLAG"), ('Number', "1"), ('Type', 'String'), ('Description', 'CNV false positive flag')]
)
out_vcf = VariantFile(out_vcf_filename, "w", header=header)
for variant in variants.fetch():
out_vcf.write(variant)


if __name__ == "__main__":
in_vcf_filename = snakemake.input.vcf
out_vcf_filename = snakemake.output.vcf
add_fp_header(in_vcf_filename, out_vcf_filename)
Loading

0 comments on commit d92c538

Please sign in to comment.