Skip to content

Commit

Permalink
Merge pull request #517 from genomic-medicine-sweden/develop
Browse files Browse the repository at this point in the history
chore: dev to master
  • Loading branch information
jonca79 authored Oct 24, 2024
2 parents f9c4859 + 5872f10 commit bc606ce
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 17 deletions.
4 changes: 2 additions & 2 deletions config/output_reference_files.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ files:
types:
- N
- name: jumble_pon
input: references/jumble_reference/design.bed.reference.RDS
output: result/jumble.PoN.RDS
input: references/jumble_reference/{design}.reference.RDS
output: result/jumble.{design}.PoN.RDS
types:
- N
- name: gatk_pon
Expand Down
10 changes: 10 additions & 0 deletions config/reports/multiqc_config_dna.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
title: "Clinical Genomics MultiQC Report"
subtitle: "Reference used: GRCh37"
intro_text: "The MultiQC DNA report summarise analysis results from GMS560 panel data that been analysed by the Twist Solid pipeline (https://github.com/genomic-medicine-sweden/Twist_Solid)."

report_header_info:
- Contact E-mail: "[email protected]"
- Application Type: "Bioinformatic analysis of GMS560 panel for solid cancers"

show_analysis_paths: True

#decimalPoint_format: ','
extra_fn_clean_exts: ##from this until end
- '.duplication_metrics'
Expand Down
19 changes: 15 additions & 4 deletions config/reports/multiqc_config_rna.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@
title: "Clinical Genomics MultiQC Report"
subtitle: "Reference used: GRCh37"
intro_text: "The MultiQC RNA report summarise analysis results from GMS560 panel data that been analysed by the Twist Solid pipeline (https://github.com/genomic-medicine-sweden/Twist_Solid)."

report_header_info:
- Contact E-mail: "[email protected]"
- Application Type: "Bioinformatic analysis of GMS560 panel for solid cancers"

show_analysis_paths: True


#decimalPoint_format: ','
extra_fn_clean_exts: ##from this until end
- '.duplication_metrics'
- '.HsMetrics'
- '.alignment_summary_metrics'
- type: regex
pattern: '_fastq[12]'
- type: regex_keep
pattern: '[0-9A-Z-]+'
#extra_fn_clean_trim: #if found in beginning or end
#fn_ignore_dirs:
#fn_ignore_files:
Expand Down Expand Up @@ -33,8 +44,8 @@ table_columns_visible:
"Samtools: stats":
error_rate: False
non-primary_alignments: False
reads_mapped: False
reads_mapped_percent: False
reads_mapped: True
reads_mapped_percent: True
reads_properly_paired_percent: False
reads_MQ0_percent: False
raw_total_sequences: False
Expand Down
2 changes: 1 addition & 1 deletion workflow/Snakefile_references.smk
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ use rule cnvkit_batch from cnv_sv as cnv_sv_cnvkit_batch with:
input:
bam="alignment/samtools_merge_bam/{sample}_{type}.bam",
bai="alignment/samtools_merge_bam/{sample}_{type}.bam.bai",
cnv_reference="references/cnvkit_build_normal_reference/cnvkit.PoN.cnn",
reference="references/cnvkit_build_normal_reference/cnvkit.PoN.cnn",


use rule background_annotation from annotation as annotation_background_annotation with:
Expand Down
4 changes: 3 additions & 1 deletion workflow/rules/common_references.smk
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ def compile_output_list(wildcards):
for filedef in output_spec["files"]:
output_files += set(
[
filedef["output"].format(sample=sample, type=unit_type, caller=caller)
filedef["output"].format(
sample=sample, type=unit_type, caller=caller, design=config["reference"]["design_bed"].split("/")[-1]
)
for sample in get_samples(samples)
for unit_type in get_unit_types(units, sample)
if unit_type in set(filedef["types"]).intersection(types)
Expand Down
16 changes: 8 additions & 8 deletions workflow/scripts/report_fusions.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,16 +190,16 @@
if int(Junction_read_count) < housekeeping_genes[gene2][0]:
continue
# Min AF for frequent FP gene fusions and housekeeping gene
if (gene1 in artefact_gene_dict and gene2 in artefact_gene_dict[gene1]):
if (gene1 in artefact_gene_dict and gene2 in artefact_gene_dict[gene1] and artefact_gene_dict[gene1][gene2][3] > 0):
if int(Junction_read_count) / artefact_gene_dict[gene1][gene2][3] < artefact_gene_dict[gene1][gene2][2]:
continue
if (gene2 in artefact_gene_dict and gene1 in artefact_gene_dict[gene2]):
if (gene2 in artefact_gene_dict and gene1 in artefact_gene_dict[gene2] and artefact_gene_dict[gene2][gene1][3] > 0):
if int(Junction_read_count) / artefact_gene_dict[gene2][gene1][3] < artefact_gene_dict[gene2][gene1][2]:
continue
if gene1 in housekeeping_genes:
if gene1 in housekeeping_genes and housekeeping_genes[gene1][3] > 0:
if int(Junction_read_count) / housekeeping_genes[gene1][3] < housekeeping_genes[gene1][2]:
continue
if gene2 in housekeeping_genes:
if gene2 in housekeeping_genes and housekeeping_genes[gene2][3] > 0:
if int(Junction_read_count) / housekeeping_genes[gene2][3] < housekeeping_genes[gene2][2]:
continue
breakpoint1 = lline[7][:-2]
Expand Down Expand Up @@ -282,16 +282,16 @@
if int(Spanning_reads_unique) < housekeeping_genes[gene2][1]:
continue
# Min AF for frequent FP gene fusions and housekeeping gene
if (gene1 in artefact_gene_dict and gene2 in artefact_gene_dict[gene1]):
if (gene1 in artefact_gene_dict and gene2 in artefact_gene_dict[gene1] and artefact_gene_dict[gene1][gene2][3] > 0):
if int(Spanning_reads_unique) / artefact_gene_dict[gene1][gene2][3] < artefact_gene_dict[gene1][gene2][2]:
continue
if (gene2 in artefact_gene_dict and gene1 in artefact_gene_dict[gene2]):
if (gene2 in artefact_gene_dict and gene1 in artefact_gene_dict[gene2] and artefact_gene_dict[gene2][gene1][3] > 0):
if int(Spanning_reads_unique) / artefact_gene_dict[gene2][gene1][3] < artefact_gene_dict[gene2][gene1][2]:
continue
if gene1 in housekeeping_genes:
if gene1 in housekeeping_genes and housekeeping_genes[gene1][3] > 0:
if int(Spanning_reads_unique) / housekeeping_genes[gene1][3] < housekeeping_genes[gene1][2]:
continue
if gene2 in housekeeping_genes:
if gene2 in housekeeping_genes and housekeeping_genes[gene2][3] > 0:
if int(Spanning_reads_unique) / housekeeping_genes[gene2][3] < housekeeping_genes[gene2][2]:
continue
# Flag fusions annotated that are fusions with very high probability
Expand Down
2 changes: 1 addition & 1 deletion workflow/scripts/sample_mixup_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def read_vcf(vcf_filename, vcf_dict, samples):
if rna_samples[rna_sample][dna_sample] > best_gt_match:
best_dna_sample = dna_sample
best_gt_match = rna_samples[rna_sample][dna_sample]
p_match = round(best_gt_match / 42.0, 1)
p_match = round(best_gt_match * 100 / 42.0, 1)
report.write(f"{rna_sample}\t{best_dna_sample}\t{best_gt_match}\t{p_match}%\t")
if p_match > match_cutoff:
report.write(f"yes\n")
Expand Down

0 comments on commit bc606ce

Please sign in to comment.