Skip to content

Commit

Permalink
Merge pull request #18 from eastgenomics/CNV_integration_dev
Browse files Browse the repository at this point in the history
GCNV-18 CNV integration (#18)

Co-Authored-By: sophie22 <[email protected]>
  • Loading branch information
mattgarner and sophie22 authored Oct 3, 2022
2 parents dda56f8 + 677eff7 commit 31f8a2d
Showing 1 changed file with 129 additions and 2 deletions.
131 changes: 129 additions & 2 deletions egg5_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
)

assay_name = "CEN" # Core Endo Neuro
assay_version = "v1.1.5"
assay_version = "v1.2.0"

ref_project_id = "project-Fkb6Gkj433GVVvj73J7x8KbV"

Expand Down Expand Up @@ -75,6 +75,24 @@
mqc_applet_id = "app-G6FyybQ4f4xqqpFfGqg34y2Y"
mqc_config_file = "{}:file-G82027Q433Gfx69zGvjq7PqQ".format(ref_project_id)


# CNV calling

cnvcall_app_id = "app-GF3J3Kj4jq2VZkJ2P46fJ9vv"
cnvcalling_fixed_inputs = {
# GATK Docker image tar
"gatk_docker": "{}:file-GBBP9JQ433GxV97xBpQkzYZx".format(ref_project_id),
# CEN intervals for CNV calling and its annotation
"interval_list": "{}:file-GFPxzKj4V50pJX3F4vV58yyg".format(ref_project_id),
"annotation_tsv": "{}:file-GFPxzPQ4V50z4pv230p82G0q".format(ref_project_id),
}

cnvcalling_input_dict = {
"app": "sentieon-dnaseq",
"patterns": ["-E '(.*).bam$'", "-E '(.*).bai$'"]
}


# Reports

xlsx_flanks = 495
Expand Down Expand Up @@ -172,4 +190,113 @@
# inputs for athena
"{}.exons_file ID".format(athena_stage_id): cds_file_for_athena,
"{}.exons_file".format(athena_stage_id): ""
}
}


# CNV Reports

cnv_rpt_workflow_id = "{}:workflow-GGpkk40433GQqXQj4j2FyV36".format(ref_project_id)

cnv_generate_bed_excluded_stage_id = "stage-GFZQB7Q4qq8X6yjKG2pFQ58x"
cnv_generate_bed_vep_stage_id = "stage-GG39Gq04qq8ZkfgV31yQy93v"
cnv_annotate_excluded_regions_stage_id = "stage-GG1qYz84qq8yKzF1J2X48q62"
cnv_vep_stage_id = "stage-GFYvJF04qq8VKgq34j30pZZ3"
cnv_generate_workbook_stage_id = "stage-GFfYY9j4qq8ZxpFpP8zKG7G0"

cnv_vep_config = "{}:file-GGkJqk84GVVGqG6VFz60gkFF".format(ref_project_id)
additional_regions = "{}:file-GGkz5B84GVV4KbX64gzBXqZ2".format(ref_project_id)

cnv_rpt_stage_input_dict = {
# generate_bed for vep generate bed
"{}.sample_file".format(cnv_generate_bed_vep_stage_id): {
"app": "mosdepth", "subdir": "",
"pattern": "-E '{}(.*).per-base.bed.gz.csi$'"
},
# generate_bed for excluded generate bed
"{}.sample_file".format(cnv_generate_bed_excluded_stage_id): {
"app": "mosdepth", "subdir": "",
"pattern": "-E '{}(.*).per-base.bed.gz.csi$'"
},
# vep
# subdirectories always require the backward dash
"{}.vcf".format(cnv_vep_stage_id): {
"app": "eggd_GATKgCNV_call", "subdir": "CNV_vcfs/",
"pattern": "-E '{}(.*)_segments.vcf$'"
},
# excluded_annotate
# subdirectories always require the backward dash
"{}.excluded_regions".format(cnv_annotate_excluded_regions_stage_id): {
"app": "eggd_GATKgCNV_call", "subdir": "CNV_summary/",
"pattern": "-E '(.*)_excluded_intervals.bed$'"
},
}

cnv_rpt_dynamic_files = {
# inputs for generate bed for vep
"{}.exons_nirvana ID".format(cnv_generate_bed_vep_stage_id): cds_file,
"{}.exons_nirvana".format(cnv_generate_bed_vep_stage_id): "",
"{}.nirvana_genes2transcripts ID".format(cnv_generate_bed_vep_stage_id): genes2transcripts,
"{}.nirvana_genes2transcripts".format(cnv_generate_bed_vep_stage_id): "",
"{}.gene_panels ID".format(cnv_generate_bed_vep_stage_id): genepanels_file,
"{}.gene_panels".format(cnv_generate_bed_vep_stage_id): "",
"{}.manifest ID".format(cnv_generate_bed_vep_stage_id): bioinformatic_manifest,
"{}.manifest".format(cnv_generate_bed_vep_stage_id): "",
# inputs for generate bed for excluded app
"{}.exons_nirvana ID".format(cnv_generate_bed_excluded_stage_id): cds_file,
"{}.exons_nirvana".format(cnv_generate_bed_excluded_stage_id): "",
"{}.nirvana_genes2transcripts ID".format(cnv_generate_bed_excluded_stage_id): genes2transcripts,
"{}.nirvana_genes2transcripts".format(cnv_generate_bed_excluded_stage_id): "",
"{}.gene_panels ID".format(cnv_generate_bed_excluded_stage_id): genepanels_file,
"{}.gene_panels".format(cnv_generate_bed_excluded_stage_id): "",
"{}.manifest ID".format(cnv_generate_bed_excluded_stage_id): bioinformatic_manifest,
"{}.manifest".format(cnv_generate_bed_excluded_stage_id): "",
# inputs for excluded app
"{}.cds_hgnc ID".format(cnv_annotate_excluded_regions_stage_id): cds_file,
"{}.cds_hgnc".format(cnv_annotate_excluded_regions_stage_id): "",
"{}.cds_gene ID".format(cnv_annotate_excluded_regions_stage_id): cds_file_for_athena,
"{}.cds_gene".format(cnv_annotate_excluded_regions_stage_id): "",
"{}.additional_regions ID".format(cnv_annotate_excluded_regions_stage_id): additional_regions,
"{}.additional_regions".format(cnv_annotate_excluded_regions_stage_id): ""
}

# CNV reanalysis

cnv_rea_stage_input_dict = {
# vep
# subdirectories always require the backward dash
"{}.vcf".format(cnv_vep_stage_id): {
"app": "eggd_GATKgCNV_call", "subdir": "CNV_vcfs/",
"pattern": "-E '{}(.*)_segments.vcf$'"
},
# excluded_annotate
# subdirectories always require the backward dash
"{}.excluded_regions".format(cnv_annotate_excluded_regions_stage_id): {
"app": "eggd_GATKgCNV_call", "subdir": "CNV_summary/",
"pattern": "-E '(.*)_excluded_intervals.bed$'"
},
}
cnv_rea_dynamic_files = {
# inputs for generate bed for vep
"{}.exons_nirvana ID".format(cnv_generate_bed_vep_stage_id): cds_file,
"{}.exons_nirvana".format(cnv_generate_bed_vep_stage_id): "",
"{}.nirvana_genes2transcripts ID".format(cnv_generate_bed_vep_stage_id): genes2transcripts,
"{}.nirvana_genes2transcripts".format(cnv_generate_bed_vep_stage_id): "",
"{}.gene_panels ID".format(cnv_generate_bed_vep_stage_id): genepanels_file,
"{}.gene_panels".format(cnv_generate_bed_vep_stage_id): "",
# inputs for generate bed for excluded app
"{}.exons_nirvana ID".format(cnv_generate_bed_excluded_stage_id): cds_file,
"{}.exons_nirvana".format(cnv_generate_bed_excluded_stage_id): "",
"{}.nirvana_genes2transcripts ID".format(cnv_generate_bed_excluded_stage_id): genes2transcripts,
"{}.nirvana_genes2transcripts".format(cnv_generate_bed_excluded_stage_id): "",
"{}.gene_panels ID".format(cnv_generate_bed_excluded_stage_id): genepanels_file,
"{}.gene_panels".format(cnv_generate_bed_excluded_stage_id): "",
"{}.manifest ID".format(cnv_generate_bed_excluded_stage_id): bioinformatic_manifest,
"{}.manifest".format(cnv_generate_bed_excluded_stage_id): "",
# inputs for excluded app
"{}.cds_hgnc ID".format(cnv_annotate_excluded_regions_stage_id): cds_file,
"{}.cds_hgnc".format(cnv_annotate_excluded_regions_stage_id): "",
"{}.cds_gene ID".format(cnv_annotate_excluded_regions_stage_id): cds_file_for_athena,
"{}.cds_gene".format(cnv_annotate_excluded_regions_stage_id): "",
"{}.additional_regions ID".format(cnv_annotate_excluded_regions_stage_id): additional_regions,
"{}.additional_regions".format(cnv_annotate_excluded_regions_stage_id): ""
}

0 comments on commit 31f8a2d

Please sign in to comment.