diff --git a/test/benchmark/caller_comparison/workflow/rules/plots.smk b/test/benchmark/caller_comparison/workflow/rules/plots.smk deleted file mode 100644 index dd5b19d3..00000000 --- a/test/benchmark/caller_comparison/workflow/rules/plots.smk +++ /dev/null @@ -1,12 +0,0 @@ -rule plot_pr_all_results: - input: - "results/caller_comparison/eval/all_results.txt" - output: - "results/caller_comparison/eval/results.all.png" - log: - "logs/rplot.all.log" - shell: - """ - Rscript --vanilla Repos/iGenVar/test/benchmark/caller_comparison/workflow/scripts/plot_all_results.R \ - {input} Repos/iGenVar/test/benchmark/F1_score.csv {output} > {log} - """ diff --git a/test/benchmark/caller_comparison/Snakefile b/test/benchmark/caller_comparison_long_read/Snakefile similarity index 88% rename from test/benchmark/caller_comparison/Snakefile rename to test/benchmark/caller_comparison_long_read/Snakefile index bc2ce45d..fa9b67f1 100644 --- a/test/benchmark/caller_comparison/Snakefile +++ b/test/benchmark/caller_comparison_long_read/Snakefile @@ -1,4 +1,4 @@ -configfile: "Repos/iGenVar/test/benchmark/caller_comparison/config/config.yaml" +configfile: "Repos/iGenVar/test/benchmark/config/caller_comparison_config.yaml" include: "workflow/rules/callers.smk" include: "workflow/rules/eval.smk" diff --git a/test/benchmark/caller_comparison/config/config.yaml b/test/benchmark/caller_comparison_long_read/config/config.yaml similarity index 100% rename from test/benchmark/caller_comparison/config/config.yaml rename to test/benchmark/caller_comparison_long_read/config/config.yaml diff --git a/test/benchmark/caller_comparison/workflow/rules/callers.smk b/test/benchmark/caller_comparison_long_read/workflow/rules/callers.smk similarity index 52% rename from test/benchmark/caller_comparison/workflow/rules/callers.smk rename to test/benchmark/caller_comparison_long_read/workflow/rules/callers.smk index 915ba0ce..20327c34 100644 --- a/test/benchmark/caller_comparison/workflow/rules/callers.smk +++ b/test/benchmark/caller_comparison_long_read/workflow/rules/callers.smk @@ -6,16 +6,12 @@ rule run_igenvar: input: bam = config["long_bam"] output: - vcf = "results/caller_comparison/iGenVar/variants.vcf" + vcf = "results/caller_comparison_long_read/iGenVar/variants.vcf" threads: 1 shell: """ - ./build/iGenVar/bin/iGenVar --input_long_reads {input.bam} --output {output.vcf} \ - --vcf_sample_name {sample} \ - --threads {threads} \ - --min_var_length {min_var_length} \ - --max_var_length {max_var_length} \ - --min_qual 1 + ./build/iGenVar/bin/iGenVar --input_long_reads {input.bam} --output {output.vcf} --vcf_sample_name {sample} \ + --threads {threads} --min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 """ # Defaults: # --method cigar_string --method split_read --method read_pairs --method read_depth @@ -29,28 +25,21 @@ rule run_svim: bai = config["long_bai"], genome = config["reference_fa"] # [E::fai_build3_core] Cannot index files compressed with gzip, please use bgzip output: - "results/caller_comparison/SVIM/variants.vcf" + "results/caller_comparison_long_read/SVIM/variants.vcf" resources: mem_mb = 20000, time_min = 600, io_gb = 100 params: - working_dir = "results/caller_comparison/SVIM/" + working_dir = "results/caller_comparison_long_read/SVIM/" threads: 1 # conda: # "../../../envs/svim.yaml" shell: """ - svim alignment --sample {sample} \ - --partition_max_distance 1000 \ - --cluster_max_distance 0.5 \ - --min_sv_size {min_var_length} \ - --segment_gap_tolerance 20 \ - --segment_overlap_tolerance 20 \ - --read_names \ - --max_sv_size {max_var_length} \ - {params.working_dir} {input.bam} {input.genome} \ - &>> logs/svim_output.log + svim alignment --sample {sample} --partition_max_distance 1000 --cluster_max_distance 0.5 \ + --min_sv_size {min_var_length} --segment_gap_tolerance 20 --segment_overlap_tolerance 20 --read_names \ + --max_sv_size {max_var_length} {params.working_dir} {input.bam} {input.genome} &>> logs/svim_output.log """ # Defaults: # --position_distance_normalizer 900 --edit_distance_normalizer 1.0 @@ -60,7 +49,7 @@ rule run_sniffles: input: bam = config["long_md_bam"] output: - "results/caller_comparison/Sniffles/raw_variants_{min_qual}.vcf" + "results/caller_comparison_long_read/Sniffles/raw_variants_{min_qual}.vcf" resources: mem_mb = 400000, time_min = 1200, @@ -71,18 +60,18 @@ rule run_sniffles: shell: """ sniffles --mapped_reads {input.bam} \ - --vcf results/caller_comparison/Sniffles/raw_variants_{wildcards.min_qual}.vcf \ - --min_support {wildcards.min_qual} --min_length {min_var_length} --threads {threads} --genotype \ - >> logs/sniffles_output.log + --vcf results/caller_comparison_long_read/Sniffles/raw_variants_{wildcards.min_qual}.vcf \ + --min_support {wildcards.min_qual} --min_length {min_var_length} --threads {threads} --genotype \ + >> logs/sniffles_output.log """ # see https://github.com/spiralgenetics/truvari/issues/43 # see https://github.com/fritzsedlazeck/Sniffles/issues/209 (Fixed, but there just hasn't been a release since the fix.) rule fix_sniffles: input: - "results/caller_comparison/Sniffles/raw_variants_{min_qual}.vcf" + "results/caller_comparison_long_read/Sniffles/raw_variants_{min_qual}.vcf" output: - "results/caller_comparison/Sniffles/variants.unsorted.min_qual_{min_qual}.vcf" + "results/caller_comparison_long_read/Sniffles/variants.unsorted.min_qual_{min_qual}.vcf" run: shell("sed 's/##INFO= {output}") shell("sed -i '4i##FILTER=' {output}") @@ -90,9 +79,9 @@ rule fix_sniffles: # Split to SV classes rule fix_sniffles_2: input: - "results/caller_comparison/Sniffles/variants.unsorted.min_qual_{min_qual}.vcf" + "results/caller_comparison_long_read/Sniffles/variants.unsorted.min_qual_{min_qual}.vcf" output: - "results/caller_comparison/Sniffles/variants.min_qual_{min_qual}.vcf" + "results/caller_comparison_long_read/Sniffles/variants.min_qual_{min_qual}.vcf" shell: "bcftools sort {input} > {output}" @@ -101,8 +90,8 @@ rule run_pbsv_dicsover: input: bam = config["long_bam"] output: - svsig_gz = "results/caller_comparison/pbsv/signatures.svsig.gz" - # svsig_gz = dynamic("results/caller_comparison/pbsv/signatures.{region}.svsig.gz") + svsig_gz = "results/caller_comparison_long_read/pbsv/signatures.svsig.gz" + # svsig_gz = dynamic("results/caller_comparison_long_read/pbsv/signatures.{region}.svsig.gz") resources: mem_mb = 400000, time_min = 2000, @@ -114,17 +103,17 @@ rule run_pbsv_dicsover: "pbsv discover {input.bam} {output.svsig_gz}" # """ # for i in $(samtools view -H {input.bam} | grep '^@SQ' | cut -f2 | cut -d':' -f2); do - # pbsv discover --region $i {input.bam} results/caller_comparison/pbsv/signatures.$i.svsig.gz + # pbsv discover --region $i {input.bam} results/caller_comparison_long_read/pbsv/signatures.$i.svsig.gz # done # """ rule run_pbsv_call: input: genome = config["reference_fa"], - svsig_gz = "results/caller_comparison/pbsv/signatures.svsig.gz" - # svsig_gz = dynamic("results/caller_comparison/pbsv/signatures.{region}.svsig.gz") + svsig_gz = "results/caller_comparison_long_read/pbsv/signatures.svsig.gz" + # svsig_gz = dynamic("results/caller_comparison_long_read/pbsv/signatures.{region}.svsig.gz") output: - "results/caller_comparison/pbsv/variants.min_qual_{min_qual}.vcf" + "results/caller_comparison_long_read/pbsv/variants.min_qual_{min_qual}.vcf" resources: mem_mb = 400000, time_min = 2000, @@ -135,19 +124,19 @@ rule run_pbsv_call: shell: """ pbsv call --types DEL,INS,DUP --min-sv-length {min_var_length} --max-ins-length 100K \ - --call-min-reads-all-samples {wildcards.min_qual} --call-min-reads-one-sample {wildcards.min_qual} \ - --call-min-reads-per-strand-all-samples 0 --call-min-bnd-reads-all-samples 0 --call-min-read-perc-one-sample 0 \ - --num-threads {threads} {input.genome} {input.svsig_gz} \ - results/caller_comparison/pbsv/variants.min_qual_{wildcards.min_qual}.vcf + --call-min-reads-all-samples {wildcards.min_qual} --call-min-reads-one-sample {wildcards.min_qual} \ + --call-min-reads-per-strand-all-samples 0 --call-min-bnd-reads-all-samples 0 --call-min-read-perc-one-sample 0 \ + --num-threads {threads} {input.genome} {input.svsig_gz} \ + results/caller_comparison_long_read/pbsv/variants.min_qual_{wildcards.min_qual}.vcf """ rule run_pbsv_call_without_DUP: input: genome = config["reference_fa"], - svsig_gz = "results/caller_comparison/pbsv/signatures.svsig.gz" - # svsig_gz = dynamic("results/caller_comparison/pbsv/signatures.{region}.svsig.gz") + svsig_gz = "results/caller_comparison_long_read/pbsv/signatures.svsig.gz" + # svsig_gz = dynamic("results/caller_comparison_long_read/pbsv/signatures.{region}.svsig.gz") output: - "results/caller_comparison/pbsv_without_DUP/variants.min_qual_{min_qual}.vcf" + "results/caller_comparison_long_read/pbsv_without_DUP/variants.min_qual_{min_qual}.vcf" resources: mem_mb = 400000, time_min = 2000, @@ -158,8 +147,8 @@ rule run_pbsv_call_without_DUP: shell: """ pbsv call --types DEL,INS --min-sv-length {min_var_length} --max-ins-length 100K \ - --call-min-reads-all-samples {wildcards.min_qual} --call-min-reads-one-sample {wildcards.min_qual} \ - --call-min-reads-per-strand-all-samples 0 --call-min-bnd-reads-all-samples 0 --call-min-read-perc-one-sample 0 \ - --num-threads {threads} {input.genome} {input.svsig_gz} \ - results/caller_comparison/pbsv_without_DUP/variants.min_qual_{wildcards.min_qual}.vcf + --call-min-reads-all-samples {wildcards.min_qual} --call-min-reads-one-sample {wildcards.min_qual} \ + --call-min-reads-per-strand-all-samples 0 --call-min-bnd-reads-all-samples 0 --call-min-read-perc-one-sample 0 \ + --num-threads {threads} {input.genome} {input.svsig_gz} \ + results/caller_comparison_long_read/pbsv_without_DUP/variants.min_qual_{wildcards.min_qual}.vcf """ diff --git a/test/benchmark/caller_comparison/workflow/rules/eval.smk b/test/benchmark/caller_comparison_long_read/workflow/rules/eval.smk similarity index 52% rename from test/benchmark/caller_comparison/workflow/rules/eval.smk rename to test/benchmark/caller_comparison_long_read/workflow/rules/eval.smk index 4c837963..b8bdd6ce 100644 --- a/test/benchmark/caller_comparison/workflow/rules/eval.smk +++ b/test/benchmark/caller_comparison_long_read/workflow/rules/eval.smk @@ -1,8 +1,8 @@ rule filter_vcf: input: - "results/caller_comparison/{caller,iGenVar|SVIM}/variants.vcf" + "results/caller_comparison_long_read/{caller,iGenVar|SVIM}/variants.vcf" output: - "results/caller_comparison/{caller,iGenVar|SVIM}/variants.min_qual_{min_qual}.vcf" + "results/caller_comparison_long_read/{caller,iGenVar|SVIM}/variants.min_qual_{min_qual}.vcf" shell: "bcftools view -i 'QUAL>={wildcards.min_qual}' {input} > {output}" @@ -24,60 +24,61 @@ rule tabix: rule truvari: input: - vcf = "results/caller_comparison/{caller}/variants.min_qual_{min_qual}.vcf.gz", - index = "results/caller_comparison/{caller}/variants.min_qual_{min_qual}.vcf.gz.tbi" + vcf = "results/caller_comparison_long_read/{caller}/variants.min_qual_{min_qual}.vcf.gz", + index = "results/caller_comparison_long_read/{caller}/variants.min_qual_{min_qual}.vcf.gz.tbi" params: - output_dir = "results/caller_comparison/eval/{caller}/min_qual_{min_qual}" + output_dir = "results/caller_comparison_long_read/eval/{caller}/min_qual_{min_qual}" output: - summary = "results/caller_comparison/eval/{caller}/min_qual_{min_qual}/summary.txt" + summary = "results/caller_comparison_long_read/eval/{caller}/min_qual_{min_qual}/summary.txt" shell: """ rm -rf {params.output_dir} && truvari bench -b data/truth_set/HG002_SVs_Tier1_v0.6.vcf.gz \ - -c {input.vcf} -o {params.output_dir} --passonly --includebed data/truth_set/HG002_SVs_Tier1_v0.6.bed -p 0 \ - &>> logs/truvari_output.log + -c {input.vcf} -o {params.output_dir} --passonly --includebed data/truth_set/HG002_SVs_Tier1_v0.6.bed -p 0 \ + &>> logs/truvari_output.log """ rule reformat_truvari_results: input: - "results/caller_comparison/eval/{caller}/min_qual_{min_qual}/summary.txt" + "results/caller_comparison_long_read/eval/{caller}/min_qual_{min_qual}/summary.txt" output: - "results/caller_comparison/eval/{caller}/min_qual_{min_qual}/pr_rec.txt" + "results/caller_comparison_long_read/eval/{caller}/min_qual_{min_qual}/pr_rec.txt" threads: 1 shell: """ - cat {input} | grep '\\|\' | tr -d ',' |sed 's/^[ \t]*//' | tr -d '\"' | tr -d ' ' \ - | tr ':' '\t' | awk 'OFS=\"\\t\" {{ print \"{wildcards.caller}\", \"{wildcards.min_qual}\", $1, $2 }}' > {output} + cat {input} | grep '\\|\' | tr -d ',' |sed 's/^[ \t]*//' | tr -d '\"' | tr -d ' ' \ + | tr ':' '\t' | awk 'OFS=\"\\t\" {{ print \"{wildcards.caller}\", \"{wildcards.min_qual}\", $1, $2 }}' \ + > {output} """ rule cat_truvari_results_all: input: - igenvar = expand("results/caller_comparison/eval/iGenVar/min_qual_{min_qual}/pr_rec.txt", + igenvar = expand("results/caller_comparison_long_read/eval/iGenVar/min_qual_{min_qual}/pr_rec.txt", min_qual=list(range(config["quality_ranges"]["from"], config["quality_ranges"]["to"]+1, config["quality_ranges"]["step"]))), - svim = expand("results/caller_comparison/eval/SVIM/min_qual_{min_qual}/pr_rec.txt", + svim = expand("results/caller_comparison_long_read/eval/SVIM/min_qual_{min_qual}/pr_rec.txt", min_qual=list(range(config["quality_ranges"]["from"], config["quality_ranges"]["to"]+1, config["quality_ranges"]["step"]))), - sniffles = expand("results/caller_comparison/eval/Sniffles/min_qual_{min_qual}/pr_rec.txt", + sniffles = expand("results/caller_comparison_long_read/eval/Sniffles/min_qual_{min_qual}/pr_rec.txt", min_qual=list(range(config["quality_ranges"]["from"], config["quality_ranges"]["to"]+1, config["quality_ranges"]["step"]))), - pbsv = expand("results/caller_comparison/eval/pbsv/min_qual_{min_qual}/pr_rec.txt", + pbsv = expand("results/caller_comparison_long_read/eval/pbsv/min_qual_{min_qual}/pr_rec.txt", min_qual=list(range(config["quality_ranges"]["from"], config["quality_ranges"]["to"]+1, config["quality_ranges"]["step"]))), - pbsv_without_DUP = expand("results/caller_comparison/eval/pbsv_without_DUP/min_qual_{min_qual}/pr_rec.txt", + pbsv_without_DUP = expand("results/caller_comparison_long_read/eval/pbsv_without_DUP/min_qual_{min_qual}/pr_rec.txt", min_qual=list(range(config["quality_ranges"]["from"], config["quality_ranges"]["to"]+1, config["quality_ranges"]["step"]))) output: - igenvar = temp("results/caller_comparison/eval/igenvar.all_results.txt"), - svim = temp("results/caller_comparison/eval/svim.all_results.txt"), - sniffles = temp("results/caller_comparison/eval/sniffles.all_results.txt"), - pbsv = temp("results/caller_comparison/eval/pbsv.all_results.txt"), - pbsv_without_DUP = temp("results/caller_comparison/eval/pbsv_without_DUP.all_results.txt"), - all = "results/caller_comparison/eval/all_results.txt" + igenvar = temp("results/caller_comparison_long_read/eval/igenvar.all_results.txt"), + svim = temp("results/caller_comparison_long_read/eval/svim.all_results.txt"), + sniffles = temp("results/caller_comparison_long_read/eval/sniffles.all_results.txt"), + pbsv = temp("results/caller_comparison_long_read/eval/pbsv.all_results.txt"), + pbsv_without_DUP = temp("results/caller_comparison_long_read/eval/pbsv_without_DUP.all_results.txt"), + all = "results/caller_comparison_long_read/eval/all_results.txt" threads: 1 run: shell("cat {input.igenvar} > {output.igenvar}") diff --git a/test/benchmark/caller_comparison_long_read/workflow/rules/plots.smk b/test/benchmark/caller_comparison_long_read/workflow/rules/plots.smk new file mode 100644 index 00000000..cf0cf7fe --- /dev/null +++ b/test/benchmark/caller_comparison_long_read/workflow/rules/plots.smk @@ -0,0 +1,12 @@ +rule plot_pr_all_results: + input: + "results/caller_comparison_long_read/eval/all_results.txt" + output: + "results/caller_comparison_long_read/eval/results.all.png" + log: + "logs/rplot.all.log" + shell: + """ + Rscript --vanilla Repos/iGenVar/test/benchmark/caller_comparison_long_read/workflow/scripts/plot_all_results.R \ + {input} Repos/iGenVar/test/benchmark/F1_score.csv {output} > {log} + """ diff --git a/test/benchmark/caller_comparison/workflow/scripts/plot_all_results.R b/test/benchmark/caller_comparison_long_read/workflow/scripts/plot_all_results.R similarity index 100% rename from test/benchmark/caller_comparison/workflow/scripts/plot_all_results.R rename to test/benchmark/caller_comparison_long_read/workflow/scripts/plot_all_results.R diff --git a/test/benchmark/caller_comparison_short_read/Snakefile b/test/benchmark/caller_comparison_short_read/Snakefile new file mode 100644 index 00000000..6d2b3e0f --- /dev/null +++ b/test/benchmark/caller_comparison_short_read/Snakefile @@ -0,0 +1,13 @@ +configfile: "Repos/iGenVar/test/benchmark/config/caller_comparison_config.yaml" + +include: "workflow/rules/callers.smk" +include: "workflow/rules/eval.smk" +include: "workflow/rules/plots.smk" + +##### Target rules ##### + +rule all: + input: + # SV calling + expand("results/caller_comparison/SVIM/variants.vcf"), + expand("results/caller_comparison/eval/results.all.png") diff --git a/test/benchmark/caller_comparison_short_read/workflow/rules/callers.smk b/test/benchmark/caller_comparison_short_read/workflow/rules/callers.smk new file mode 100644 index 00000000..28960757 --- /dev/null +++ b/test/benchmark/caller_comparison_short_read/workflow/rules/callers.smk @@ -0,0 +1,34 @@ +sample = config["parameters"]["sample"], +min_var_length = config["parameters"]["min_var_length"], +max_var_length = config["parameters"]["max_var_length"] + +# iGenVar +# TODO (irallia 07.03.2022): change this to short read input +rule run_iGenVar_S: + input: + bam = config["long_bam"] + output: + vcf = "results/caller_comparison_short_read/iGenVar/variants.vcf" + threads: 1 + shell: + """ + ./build/iGenVar/bin/iGenVar --input_long_reads {input.bam} --output {output.vcf} --vcf_sample_name {sample} \ + --threads {threads} --min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 + """ + +# TODO (irallia 07.03.2022): add short read input +rule run_iGenVar_SL: + input: + bam = config["long_bam"] + output: + vcf = "results/caller_comparison_short_read/iGenVar/variants.vcf" + threads: 1 + shell: + """ + ./build/iGenVar/bin/iGenVar --input_long_reads {input.bam} --output {output.vcf} --vcf_sample_name {sample} \ + --threads {threads} --min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 + """ + # Defaults: + # --method cigar_string --method split_read --method read_pairs --method read_depth + # --clustering_methods hierarchical_clustering --refinement_methods no_refinement + # --max_tol_inserted_length 50 --max_overlap 10 --hierarchical_clustering_cutoff 0.5 diff --git a/test/benchmark/caller_comparison_short_read/workflow/rules/eval.smk b/test/benchmark/caller_comparison_short_read/workflow/rules/eval.smk new file mode 100644 index 00000000..c68ac627 --- /dev/null +++ b/test/benchmark/caller_comparison_short_read/workflow/rules/eval.smk @@ -0,0 +1,71 @@ +rule filter_vcf: + input: + "results/caller_comparison_short_read/{caller,iGenVar_S|iGenVar_SL}/variants.vcf" + output: + "results/caller_comparison_short_read/{caller,iGenVar_S|iGenVar_SL}/variants.min_qual_{min_qual}.vcf" + shell: + "bcftools view -i 'QUAL>={wildcards.min_qual}' {input} > {output}" + +rule bgzip: + input: + "{name}.vcf" + output: + "{name}.vcf.gz" + shell: + "pbgzip -c {input} > {output}" + +rule tabix: + input: + "{name}.vcf.gz" + output: + "{name}.vcf.gz.tbi" + shell: + "tabix -p vcf {input}" + +rule truvari: + input: + vcf = "results/caller_comparison_short_read/{caller}/variants.min_qual_{min_qual}.vcf.gz", + index = "results/caller_comparison_short_read/{caller}/variants.min_qual_{min_qual}.vcf.gz.tbi" + params: + output_dir = "results/caller_comparison_short_read/eval/{caller}/min_qual_{min_qual}" + output: + summary = "results/caller_comparison_short_read/eval/{caller}/min_qual_{min_qual}/summary.txt" + shell: + """ + rm -rf {params.output_dir} && truvari bench -b data/truth_set/HG002_SVs_Tier1_v0.6.vcf.gz \ + -c {input.vcf} -o {params.output_dir} --passonly --includebed data/truth_set/HG002_SVs_Tier1_v0.6.bed -p 0 \ + &>> logs/truvari_output.log + """ + +rule reformat_truvari_results: + input: + "results/caller_comparison_short_read/eval/{caller}/min_qual_{min_qual}/summary.txt" + output: + "results/caller_comparison_short_read/eval/{caller}/min_qual_{min_qual}/pr_rec.txt" + threads: 1 + shell: + """ + cat {input} | grep '\\|\' | tr -d ',' |sed 's/^[ \t]*//' | tr -d '\"' | tr -d ' ' \ + | tr ':' '\t' | awk 'OFS=\"\\t\" {{ print \"{wildcards.caller}\", \"{wildcards.min_qual}\", $1, $2 }}' \ + > {output} + """ + +rule cat_truvari_results_all: + input: + iGenVar_S = expand("results/caller_comparison_short_read/eval/iGenVar_S/min_qual_{min_qual}/pr_rec.txt", + min_qual=list(range(config["quality_ranges"]["from"], + config["quality_ranges"]["to"], + config["quality_ranges"]["step"]))), + iGenVar_SL = expand("results/caller_comparison_short_read/eval/iGenVar_SL/min_qual_{min_qual}/pr_rec.txt", + min_qual=list(range(config["quality_ranges"]["from"], + config["quality_ranges"]["to"], + config["quality_ranges"]["step"]))) + output: + iGenVar_S = temp("results/caller_comparison_short_read/eval/igenvar_s.all_results.txt"), + iGenVar_SL = temp("results/caller_comparison_short_read/eval/igenvar_sl.all_results.txt") + all = "results/caller_comparison_short_read/eval/all_results.txt" + threads: 1 + run: + shell("cat {input.iGenVar_S} > {output.iGenVar_S}") + shell("cat {input.iGenVar_SL} > {output.iGenVar_SL}") + shell("cat {output.iGenVar_S} {output.iGenVar_SL} > {output.all}") diff --git a/test/benchmark/caller_comparison_short_read/workflow/rules/plots.smk b/test/benchmark/caller_comparison_short_read/workflow/rules/plots.smk new file mode 100644 index 00000000..8f27dabf --- /dev/null +++ b/test/benchmark/caller_comparison_short_read/workflow/rules/plots.smk @@ -0,0 +1,12 @@ +rule plot_pr_all_results: + input: + "results/caller_comparison_short_read/eval/all_results.txt" + output: + "results/caller_comparison_short_read/eval/results.all.png" + log: + "logs/rplot.all.log" + shell: + """ + Rscript --vanilla Repos/iGenVar/test/benchmark/caller_comparison_short_read/workflow/scripts/plot_all_results.R \ + {input} Repos/iGenVar/test/benchmark/F1_score.csv {output} > {log} + """ diff --git a/test/benchmark/caller_comparison_short_read/workflow/scripts/plot_all_results.R b/test/benchmark/caller_comparison_short_read/workflow/scripts/plot_all_results.R new file mode 100644 index 00000000..55685c65 --- /dev/null +++ b/test/benchmark/caller_comparison_short_read/workflow/scripts/plot_all_results.R @@ -0,0 +1,50 @@ +library(directlabels) +library(tidyverse) + +args = commandArgs(trailingOnly=TRUE) + +res <- read_tsv(args[1], col_names = c("caller", "min_qual", "metric", "value")) +f1 <- read_csv(args[2], col_names = c("caller", "precision", "recall")) + +res <- res %>% + filter(metric %in% c("recall", "precision")) %>% + pivot_wider(names_from=metric, values_from=value) %>% + filter(recall!=0 | precision!=0) + +# Creating an empty column: +f1 <- add_column(f1, min_qual = NA, .after="caller") + +# Combine res & f1 +total <- rbind(res, f1) +total$caller = factor(total$caller, + levels=c('iGenVar_S', 'iGenVar_SL', + '0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9'), + labels=c('iGenVar_S 0.0.3', 'iGenVar_SL 0.0.3', + '0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9')) + +scale_custom <- list( + # https://stackoverflow.com/questions/46803260/assigning-40-shapes-or-more-in-scale-shape-manual + scale_shape_manual(values = c(15, 16, 17, 18, 5, 46, 46, 46, 46, 46, 46, 46, 46, 46)), + scale_color_manual(breaks = c('iGenVar_S 0.0.3', 'iGenVar_SL 0.0.3'), + values = c("chartreuse3", "chartreuse1", + "tan", "tan", "tan", "tan", "tan", "tan", "tan", "tan", "tan"), + na.value = "gray80") +) + +ggplot(total, aes(recall, precision, color = caller)) + + geom_point(size=0.5) + + scale_custom + + # Add labels to curves + # cex: character expansion; rot: rotation + geom_dl(aes(label = caller), method = list("last.points", rot=-20, cex = 0.5, dl.trans(x=x+0.1))) + + geom_path() + + labs(y = "Precision", x = "Recall", color = "Tool", + title="Caller Comparison", subtitle="AshkenazimTrio - HG002 NA24385 Son - PacBio CCS 10kb") + + lims(x=c(0,1), y=c(0,1)) + + theme_bw() + + theme(panel.spacing = unit(0.75, "lines")) + + theme(text = element_text(size=14), axis.text.x = element_text(size=9), axis.text.y = element_text(size=9)) + + theme(plot.title = element_text(size=20, hjust=0.5, face="bold", color="black")) + + theme(plot.subtitle = element_text(size=10, hjust=0.5, face="italic", color="black")) + +ggsave(args[3], width=10, height=8) diff --git a/test/benchmark/config/caller_comparison_config.yaml b/test/benchmark/config/caller_comparison_config.yaml new file mode 100644 index 00000000..fcaa8000 --- /dev/null +++ b/test/benchmark/config/caller_comparison_config.yaml @@ -0,0 +1,16 @@ +long_bam: data/long_reads/HG002.Sequel.10kb.pbmm2.hs37d5.whatshap.haplotag.RTG.10x.trio.bam +long_md_bam: data/long_reads/HG002.Sequel.10kb.pbmm2.hs37d5.whatshap.haplotag.RTG.10x.trio.md.bam +long_bai: data/long_reads/HG002.Sequel.10kb.pbmm2.hs37d5.whatshap.haplotag.RTG.10x.trio.bam.bai + +reference_fa_gz: data/reference/hs37d5.fa.gz +reference_fa: data/reference/hs37d5.fa + +parameters: + sample: HG002 + min_var_length: 40 + max_var_length: 1000000 + +quality_ranges: + from: 1 + to: 80 + step: 1 diff --git a/test/benchmark/caller_comparison/results/SVIM_variants.min_qual_3.vcf b/test/benchmark/results/SVIM_variants.min_qual_3.vcf similarity index 100% rename from test/benchmark/caller_comparison/results/SVIM_variants.min_qual_3.vcf rename to test/benchmark/results/SVIM_variants.min_qual_3.vcf diff --git a/test/benchmark/caller_comparison/results/Sniffles_variants.min_qual_3.vcf b/test/benchmark/results/Sniffles_variants.min_qual_3.vcf similarity index 100% rename from test/benchmark/caller_comparison/results/Sniffles_variants.min_qual_3.vcf rename to test/benchmark/results/Sniffles_variants.min_qual_3.vcf diff --git a/test/benchmark/caller_comparison/results/caller_comparison-runtime.md b/test/benchmark/results/caller_comparison_long_read-runtime.md similarity index 100% rename from test/benchmark/caller_comparison/results/caller_comparison-runtime.md rename to test/benchmark/results/caller_comparison_long_read-runtime.md diff --git a/test/benchmark/results/caller_comparison_short_read-runtime.md b/test/benchmark/results/caller_comparison_short_read-runtime.md new file mode 100644 index 00000000..67f9bbcb --- /dev/null +++ b/test/benchmark/results/caller_comparison_short_read-runtime.md @@ -0,0 +1,3 @@ +# 2021-11-26 + +... diff --git a/test/benchmark/caller_comparison/results/iGenVar_variants.min_qual_3.vcf b/test/benchmark/results/iGenVar_variants.min_qual_3.vcf similarity index 100% rename from test/benchmark/caller_comparison/results/iGenVar_variants.min_qual_3.vcf rename to test/benchmark/results/iGenVar_variants.min_qual_3.vcf diff --git a/test/benchmark/caller_comparison/results/pbsv_variants.min_qual_3.vcf b/test/benchmark/results/pbsv_variants.min_qual_3.vcf similarity index 100% rename from test/benchmark/caller_comparison/results/pbsv_variants.min_qual_3.vcf rename to test/benchmark/results/pbsv_variants.min_qual_3.vcf diff --git a/test/benchmark/caller_comparison/results/pbsv_without_DUP_variants.min_qual_3.vcf b/test/benchmark/results/pbsv_without_DUP_variants.min_qual_3.vcf similarity index 100% rename from test/benchmark/caller_comparison/results/pbsv_without_DUP_variants.min_qual_3.vcf rename to test/benchmark/results/pbsv_without_DUP_variants.min_qual_3.vcf