diff --git a/CHANGELOG.md b/CHANGELOG.md index 53949c87..bf6b540e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#431](https://github.com/genomic-medicine-sweden/nallo/pull/431) - Added files needed to automatically build and publish docs to GitHub Pages - [#435](https://github.com/genomic-medicine-sweden/nallo/pull/435) - Added nf-test to rank variants - [#445](https://github.com/genomic-medicine-sweden/nallo/pull/445) - Added FOUND_IN tag and nf-test to rank variants +- [#450](https://github.com/genomic-medicine-sweden/nallo/pull/450) - Added ranking of SVs (and CNVs) ### `Changed` diff --git a/README.md b/README.md index d038d89d..ef9224ed 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ ##### Ranking -- Rank SNVs with [GENMOD](https://github.com/Clinical-Genomics/genmod) +- Rank SNVs, INDELs and SVs with [GENMOD](https://github.com/Clinical-Genomics/genmod) ## Usage diff --git a/conf/modules/annotate_consequence_pli.config b/conf/modules/annotate_consequence_pli.config index 3158c4f4..648c98e6 100644 --- a/conf/modules/annotate_consequence_pli.config +++ b/conf/modules/annotate_consequence_pli.config @@ -12,6 +12,10 @@ */ process { + + // + // SNVs + // withName: '.*:ANN_CSQ_PLI_SNV:.*' { publishDir = [ enabled: false @@ -29,4 +33,25 @@ process { withName: '.*ANN_CSQ_PLI_SNV:TABIX_BGZIPTABIX' { ext.prefix = { "${meta.id}_snv_csq_pli" } } + + // + // SVs + // + withName: '.*:ANN_CSQ_PLI_SVS:.*' { + publishDir = [ + enabled: false + ] + } + + withName: '.*ANN_CSQ_PLI_SVS:ADD_MOST_SEVERE_CSQ' { + ext.prefix = { "${meta.id}_svs_csq" } + } + + withName: '.*ANN_CSQ_PLI_SVS:ADD_MOST_SEVERE_PLI' { + ext.prefix = { "${meta.id}_svs_csq_pli" } + } + + withName: '.*ANN_CSQ_PLI_SVS:TABIX_BGZIPTABIX' { + ext.prefix = { "${meta.id}_svs_csq_pli" } + } } diff --git a/conf/modules/annotate_svs.config b/conf/modules/annotate_svs.config index f1a4e7c1..1a672a65 100644 --- a/conf/modules/annotate_svs.config +++ b/conf/modules/annotate_svs.config @@ -42,7 +42,7 @@ process { publishDir = [ path: { "${params.outdir}/svs/family/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') || !params.skip_rank_variants ? null : filename } ] } @@ -50,7 +50,7 @@ process { publishDir = [ path: { "${params.outdir}/svs/family/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') || !params.skip_rank_variants ? null : filename } ] } diff --git a/conf/modules/rank_variants.config b/conf/modules/rank_variants.config index 30aad2fa..deea5785 100644 --- a/conf/modules/rank_variants.config +++ b/conf/modules/rank_variants.config @@ -11,18 +11,16 @@ ---------------------------------------------------------------------------------------- */ -// -// Score and rank SNVs -// - process { + // + // Score and rank SNVs + // withName: '.*:RANK_VARIANTS_SNV:.*' { publishDir = [ enabled: false, ] } - withName: '.*:RANK_VARIANTS_SNV:GENMOD_ANNOTATE' { ext.prefix = { "${meta.id}_snv_genmod_annotate" } ext.args = { [ @@ -31,20 +29,53 @@ process { '--temp_dir ./' ].join(' ') } } - withName: '.*:RANK_VARIANTS_SNV:GENMOD_MODELS' { ext.prefix = { "${meta.id}_snv_genmod_models" } ext.args = "--whole_gene --temp_dir ./" } - withName: '.*:RANK_VARIANTS_SNV:GENMOD_SCORE' { ext.prefix = { "${meta.id}_snv_genmod_score" } ext.args = "--rank_results" } - withName: '.*:RANK_VARIANTS_SNV:GENMOD_COMPOUND' { ext.prefix = { "${meta.id}_snv_genmod_compound" } ext.args = "--temp_dir ./" } + // + // Score and rank SVSs + // + withName: '.*:RANK_VARIANTS_SVS:.*' { + publishDir = [ + enabled: false, + ] + } + withName: '.*:RANK_VARIANTS_SVS:GENMOD_ANNOTATE' { + ext.prefix = { "${meta.id}_svs_genmod_annotate" } + ext.args = { [ + '--annotate_regions', + '--genome-build 38', + '--temp_dir ./' + ].join(' ') } + } + withName: '.*:RANK_VARIANTS_SVS:GENMOD_MODELS' { + ext.prefix = { "${meta.id}_svs_genmod_models" } + ext.args = "--whole_gene --temp_dir ./" + } + withName: '.*:RANK_VARIANTS_SVS:GENMOD_SCORE' { + ext.prefix = { "${meta.id}_svs_genmod_score" } + ext.args = "--rank_results" + } + withName: '.*:RANK_VARIANTS_SVS:GENMOD_COMPOUND' { + ext.prefix = { "${meta.id}_svs_genmod_compound" } + ext.args = "--temp_dir ./" + } + withName: '.*:RANK_VARIANTS_SVS:TABIX_BGZIPTABIX' { + ext.prefix = { params.skip_cnv_calling ? "${meta.id}_svs_merged_annotated_ranked" : "${meta.id}_svs_cnvs_merged_annotated_ranked" } + publishDir = [ + path: { "${params.outdir}/svs/family/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } diff --git a/conf/test.config b/conf/test.config index 9f91f654..5e30ea1b 100644 --- a/conf/test.config +++ b/conf/test.config @@ -18,7 +18,7 @@ params { modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' // Base directory for genomic-medicine-sweden/nallo test data - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/7be7114cb132be8cae9343f225bcf42ec11ecc1b/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/22fb5b8a1a358df96e49f8d01a9c6e18770fbd6d/' // References fasta = params.pipelines_testdata_base_path + 'reference/hg38.test.fa.gz' @@ -36,7 +36,9 @@ params { svdb_dbs = params.pipelines_testdata_base_path + 'testdata/svdb_dbs.csv' reduced_penetrance = params.pipelines_testdata_base_path + 'reference/reduced_penetrance.tsv' score_config_snv = params.pipelines_testdata_base_path + 'reference/rank_model_snv.ini' + score_config_svs = params.pipelines_testdata_base_path + 'reference/rank_model_svs.ini' variant_consequences_snv = params.pipelines_testdata_base_path + 'reference/variant_consequences_v2.txt' + variant_consequences_svs = params.pipelines_testdata_base_path + 'reference/variant_consequences_v2.txt' somalier_sites = params.pipelines_testdata_base_path + 'reference/somalier_sites.vcf.gz' // Pipeline options diff --git a/docs/output.md b/docs/output.md index a08bfc99..294ef823 100644 --- a/docs/output.md +++ b/docs/output.md @@ -206,10 +206,11 @@ If the pipeline is run with phasing, the aligned reads will be happlotagged usin [Severus](https://github.com/KolmogorovLab/Severus) or [Sniffles](https://github.com/fritzsedlazeck/Sniffles) is used to call structural variants. [HiFiCNV](https://github.com/PacificBiosciences/HiFiCNV) is used to call CNVs. It also produces copy number, depth, and MAF [visualization tracks](#visualization-tracks). [SVDB](https://github.com/J35P312/SVDB) is used to combine and merge SVs and CNVs within and between samples. +[GENMOD](https://github.com/Clinical-Genomics/genmod) is used to rank the annotated SVs. !!!note - Variants are only output without annotation if that subworkflow is turned off. + Variants are only output without annotation and/or ranking if these subworkflow is turned off. !!!note @@ -237,6 +238,15 @@ If the pipeline is run with phasing, the aligned reads will be happlotagged usin | `svs/family/{family_id}/{family_id}_svs_merged_annotated.vcf.gz` | VCF file with merged and annotated SVs per family (output if CNV-calling is off) | | `svs/family/{family_id}/{family_id}_svs_merged_annotated.vcf.gz.tbi` | Index of the merged VCF file | +[GENMOD](https://github.com/Clinical-Genomics/genmod) is used to rank the annotated SVs. + +| Path | Description | +| -------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------- | +| `svs/family/{family_id}/{family_id}_cnvs_svs_merged_annotated_ranked.vcf.gz` | VCF file with merged, annotated and ranked CNVs and SVs per family | +| `svs/family/{family_id}/{family_id}_cnvs_svs_merged_annotated_ranked.vcf.gz.tbi` | Index of the merged VCF file | +| `svs/family/{family_id}/{family_id}_svs_merged_annotated_ranked.vcf.gz` | VCF file with merged, annotated and ranked SVs per family (output if CNV-calling is off) | +| `svs/family/{family_id}/{family_id}_svs_merged_annotated_ranked.vcf.gz.tbi` | Index of the merged VCF file | + ## Visualization Tracks [HiFiCNV](https://github.com/PacificBiosciences/HiFiCNV) is used to call CNVs, but it also produces copy number, depth, and MAF tracks that can be visualized in for example IGV. diff --git a/docs/parameters.md b/docs/parameters.md index 96498185..3a07f156 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -41,6 +41,7 @@ Define where the pipeline should find input data and save output data. | `svdb_dbs` | Databases used for structural variant annotation in vcf format.
HelpPath to comma-separated file containing information about the databases used for structural variant annotation.
| `string` | | | | | `variant_catalog` | A variant catalog json-file for stranger | `string` | | | | | `variant_consequences_snv` | File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic SNVs. For more information check https://ensembl.org/info/genome/variation/prediction/predicted_data.html | `string` | | | | +| `variant_consequences_svs` | File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic SVs. For more information check https://ensembl.org/info/genome/variation/prediction/predicted_data.html | `string` | | | | | `vep_cache` | A path to the VEP cache location | `string` | | | | | `bed` | A BED file with regions of interest, used to limit short variant calling. | `string` | | | | | `hificnv_xy` | A BED file containing expected copy number regions for XY samples. | `string` | | | | @@ -48,8 +49,9 @@ Define where the pipeline should find input data and save output data. | `hificnv_exclude` | A BED file specifying regions to exclude with HiFiCNV, such as centromeres. | `string` | | | | | `reduced_penetrance` | A file with gene ids that have reduced penetrance. For use with genmod. | `string` | | | | | `score_config_snv` | A SNV rank model config file for genmod. | `string` | | | | +| `score_config_svs` | A SV rank model config file for genmod. | `string` | | | | | `somalier_sites` | A VCF of known polymorphic sites for somalier | `string` | | | | -| `pipelines_testdata_base_path` | Base URL or local path to location of pipeline test dataset files | `string` | https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/7be7114cb132be8cae9343f225bcf42ec11ecc1b/ | | True | +| `pipelines_testdata_base_path` | Base URL or local path to location of pipeline test dataset files | `string` | https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/22fb5b8a1a358df96e49f8d01a9c6e18770fbd6d/ | | True | ## Reference genome options @@ -106,7 +108,7 @@ Workflow options specific to genomic-medicine-sweden/nallo | `vep_cache_version` | VEP cache version | `integer` | 110 | | | | `vep_plugin_files` | A csv file with vep_plugins as header, and then paths to vep plugin files. Paths to pLI_values.txt and LoFtool_scores.txt are required. | `string` | | | | | `deepvariant_model_type` | Sets the model type used for DeepVariant. This is set automatically using `--preset` by default. | `string` | PACBIO | | True | -| `minimap2_read_mapping_preset` | Sets the minimap2-preset (-x) for read alignment. This is set automatically using the pipeline `--preset` by default. | `string` | | | True | +| `minimap2_read_mapping_preset` | Sets the minimap2-preset (-x) for read alignment. This is set automatically using the pipeline `--preset` by default. | `string` | map-hifi | | True | | `extra_modkit_options` | Extra options to modkit, used for test profile. | `string` | | | True | | `extra_vep_options` | Extra options to VEP, used for test profile. | `string` | | | True | | `extra_paraphase_options` | Extra options to Paraphase, used for test profile. | `string` | | | True | diff --git a/docs/usage.md b/docs/usage.md index b11265dd..b5a9fe4d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -298,6 +298,17 @@ These databases could for example come from [CoLoRSdb](https://zenodo.org/record Turned off with `--skip_sv_annotation`. +### Rank SVs + +This subworkflow ranks SVs, and relies on the mapping, SV calling and SV annotation subworkflows, and requires the following additional files: + +| Parameter | Description | +| -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `score_config_svs` |  Used by GENMOD when ranking variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/rank_model_snv.ini). | +| `reduced_penetrance` | A list of loci that show [reduced penetrance](https://medlineplus.gov/genetics/understanding/inheritance/penetranceexpressivity/) in people. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/reduced_penetrance.tsv) | + +`--skip_rank_variants`. + ## Other highlighted parameters - Limit SNV calling to regions in BED file (`--bed`). diff --git a/nextflow.config b/nextflow.config index 5b0db077..d424c0eb 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,9 +20,11 @@ params { variant_catalog = null reduced_penetrance = null score_config_snv = null + score_config_svs = null snp_db = null svdb_dbs = null variant_consequences_snv = null + variant_consequences_svs = null vep_cache = null vep_plugin_files = null hificnv_xy = null @@ -86,7 +88,7 @@ params { help_full = false show_hidden = false version = false - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/7be7114cb132be8cae9343f225bcf42ec11ecc1b/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/22fb5b8a1a358df96e49f8d01a9c6e18770fbd6d/' // Config options config_profile_name = null diff --git a/nextflow_schema.json b/nextflow_schema.json index ec1fa4b2..65f282cc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -189,6 +189,11 @@ "description": "File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic SNVs. For more information check https://ensembl.org/info/genome/variation/prediction/predicted_data.html", "fa_icon": "fas fa-file-csv" }, + "variant_consequences_svs": { + "type": "string", + "description": "File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic SVs. For more information check https://ensembl.org/info/genome/variation/prediction/predicted_data.html", + "fa_icon": "fas fa-file-csv" + }, "vep_cache": { "type": "string", "description": "A path to the VEP cache location", @@ -234,6 +239,13 @@ "fa_icon": "fas fa-file", "description": "A SNV rank model config file for genmod." }, + "score_config_svs": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-file", + "description": "A SV rank model config file for genmod." + }, "somalier_sites": { "type": "string", "pattern": "^\\S+\\.vcf(\\.gz)?$", @@ -245,7 +257,7 @@ "type": "string", "fa_icon": "far fa-check-circle", "description": "Base URL or local path to location of pipeline test dataset files", - "default": "https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/7be7114cb132be8cae9343f225bcf42ec11ecc1b/", + "default": "https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/22fb5b8a1a358df96e49f8d01a9c6e18770fbd6d/", "hidden": true } } @@ -467,7 +479,8 @@ "type": "string", "description": "Sets the minimap2-preset (-x) for read alignment. This is set automatically using the pipeline `--preset` by default.", "hidden": true, - "enum": ["map-hifi", "map-ont", "lr:hq", "lr:hqae"] + "enum": ["map-hifi", "map-ont", "lr:hq", "lr:hqae"], + "default": "map-hifi" }, "extra_modkit_options": { "type": "string", diff --git a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf index 3061e3c0..315f86ba 100644 --- a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf @@ -73,9 +73,9 @@ def fileDependencies = [ assembly : ["fasta", "par_regions"], // The assembly workflow should be split into two - assembly and variant calling (requires ref) snv_calling : ["fasta", "par_regions"], snv_annotation : ["snp_db", "vep_cache", "vep_plugin_files", "variant_consequences_snv"], - sv_annotation : ["svdb_dbs", "vep_cache", "vep_plugin_files"], + sv_annotation : ["svdb_dbs", "vep_cache", "vep_plugin_files", "variant_consequences_svs"], cnv_calling : ["hificnv_xy", "hificnv_xx", "hificnv_exclude"], - rank_variants : ["reduced_penetrance", "score_config_snv"], + rank_variants : ["reduced_penetrance", "score_config_snv", "score_config_svs"], repeat_calling : ["trgt_repeats"], repeat_annotation: ["variant_catalog"], ] @@ -108,10 +108,11 @@ def parameterStatus = [ fasta : params.fasta, trgt_repeats : params.trgt_repeats, variant_catalog : params.variant_catalog, - score_config_snv : params.score_config_snv, reduced_penetrance : params.reduced_penetrance, score_config_snv : params.score_config_snv, + score_config_svs : params.score_config_svs, variant_consequences_snv: params.variant_consequences_snv, + variant_consequences_svs: params.variant_consequences_svs, ] ] diff --git a/tests/samplesheet.nf.test b/tests/samplesheet.nf.test index 1b3448e4..3d4c9648 100644 --- a/tests/samplesheet.nf.test +++ b/tests/samplesheet.nf.test @@ -9,7 +9,7 @@ nextflow_pipeline { when { params { - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/7be7114cb132be8cae9343f225bcf42ec11ecc1b/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/22fb5b8a1a358df96e49f8d01a9c6e18770fbd6d/' input = params.pipelines_testdata_base_path + 'testdata/samplesheet.csv' outdir = "$outputDir" } diff --git a/tests/samplesheet.nf.test.snap b/tests/samplesheet.nf.test.snap index 838d5318..e97c63f2 100644 --- a/tests/samplesheet.nf.test.snap +++ b/tests/samplesheet.nf.test.snap @@ -1,7 +1,7 @@ { "test profile": { "content": [ - 104, + 112, { "ADD_FOUND_IN_TAG": { "bcftools": 1.2, @@ -382,8 +382,8 @@ "svs", "svs/family", "svs/family/FAM", - "svs/family/FAM/FAM_svs_cnvs_merged_annotated.vcf.gz", - "svs/family/FAM/FAM_svs_cnvs_merged_annotated.vcf.gz.tbi", + "svs/family/FAM/FAM_svs_cnvs_merged_annotated_ranked.vcf.gz", + "svs/family/FAM/FAM_svs_cnvs_merged_annotated_ranked.vcf.gz.tbi", "svs/single_sample", "svs/single_sample/HG002_Revio", "svs/single_sample/HG002_Revio/HG002_Revio_cnvs.vcf.gz", @@ -507,7 +507,7 @@ "VcfFile [chromosomes=[chrX, chr16], sampleCount=1, variantCount=100, phased=false, phasedAutodetect=false]" ], [ - "FAM_svs_cnvs_merged_annotated.vcf.gz", + "FAM_svs_cnvs_merged_annotated_ranked.vcf.gz", "VcfFile [chromosomes=[chrX, chr16], sampleCount=1, variantCount=87, phased=false, phasedAutodetect=false]" ], [ @@ -546,6 +546,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-30T10:27:37.120618269" + "timestamp": "2024-10-30T11:26:11.367381989" } } \ No newline at end of file diff --git a/tests/samplesheet_multisample_bam.nf.test b/tests/samplesheet_multisample_bam.nf.test index 205348e6..4fa19978 100644 --- a/tests/samplesheet_multisample_bam.nf.test +++ b/tests/samplesheet_multisample_bam.nf.test @@ -9,7 +9,7 @@ nextflow_pipeline { when { params { - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/7be7114cb132be8cae9343f225bcf42ec11ecc1b/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/22fb5b8a1a358df96e49f8d01a9c6e18770fbd6d/' input = params.pipelines_testdata_base_path + 'testdata/samplesheet_multisample_bam.csv' outdir = "$outputDir" phaser = "hiphase" diff --git a/tests/samplesheet_multisample_bam.nf.test.snap b/tests/samplesheet_multisample_bam.nf.test.snap index 75143c8d..3aa42ad2 100644 --- a/tests/samplesheet_multisample_bam.nf.test.snap +++ b/tests/samplesheet_multisample_bam.nf.test.snap @@ -1,7 +1,7 @@ { "samplesheet_multisample_bam | --phaser hiphase": { "content": [ - 150, + 158, { "ADD_FOUND_IN_TAG": { "bcftools": 1.2, @@ -456,8 +456,8 @@ "svs", "svs/family", "svs/family/FAM", - "svs/family/FAM/FAM_svs_cnvs_merged_annotated.vcf.gz", - "svs/family/FAM/FAM_svs_cnvs_merged_annotated.vcf.gz.tbi", + "svs/family/FAM/FAM_svs_cnvs_merged_annotated_ranked.vcf.gz", + "svs/family/FAM/FAM_svs_cnvs_merged_annotated_ranked.vcf.gz.tbi", "svs/single_sample", "svs/single_sample/HG002_Revio_A", "svs/single_sample/HG002_Revio_A/HG002_Revio_A_cnvs.vcf.gz", @@ -683,7 +683,7 @@ "VcfFile [chromosomes=[chrX, chr16], sampleCount=1, variantCount=100, phased=false, phasedAutodetect=false]" ], [ - "FAM_svs_cnvs_merged_annotated.vcf.gz", + "FAM_svs_cnvs_merged_annotated_ranked.vcf.gz", "VcfFile [chromosomes=[chrX, chr16], sampleCount=2, variantCount=87, phased=false, phasedAutodetect=false]" ], [ @@ -746,6 +746,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-30T10:29:12.353783346" + "timestamp": "2024-10-30T11:27:50.204023027" } } \ No newline at end of file diff --git a/tests/samplesheet_multisample_ont_bam.nf.test b/tests/samplesheet_multisample_ont_bam.nf.test index e5216f0f..5afa04af 100644 --- a/tests/samplesheet_multisample_ont_bam.nf.test +++ b/tests/samplesheet_multisample_ont_bam.nf.test @@ -9,7 +9,7 @@ nextflow_pipeline { when { params { - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/7be7114cb132be8cae9343f225bcf42ec11ecc1b/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/22fb5b8a1a358df96e49f8d01a9c6e18770fbd6d/' input = params.pipelines_testdata_base_path + 'testdata/samplesheet_multisample_bam_ont.csv' outdir = "$outputDir" preset = 'ONT_R10' diff --git a/tests/samplesheet_multisample_ont_bam.nf.test.snap b/tests/samplesheet_multisample_ont_bam.nf.test.snap index 7bce4132..1c19f9b8 100644 --- a/tests/samplesheet_multisample_ont_bam.nf.test.snap +++ b/tests/samplesheet_multisample_ont_bam.nf.test.snap @@ -1,7 +1,7 @@ { "samplesheet_multisample_ont_bam | --preset ONT_R10 --phaser whatshap --parallel_alignments 1 --parallel_snv 1": { "content": [ - 97, + 105, { "ADD_FOUND_IN_TAG": { "bcftools": 1.2, @@ -339,8 +339,8 @@ "svs", "svs/family", "svs/family/FAM", - "svs/family/FAM/FAM_svs_cnvs_merged_annotated.vcf.gz", - "svs/family/FAM/FAM_svs_cnvs_merged_annotated.vcf.gz.tbi", + "svs/family/FAM/FAM_svs_cnvs_merged_annotated_ranked.vcf.gz", + "svs/family/FAM/FAM_svs_cnvs_merged_annotated_ranked.vcf.gz.tbi", "svs/single_sample", "svs/single_sample/HG002_ONT_A", "svs/single_sample/HG002_ONT_A/HG002_ONT_A_cnvs.vcf.gz", @@ -462,7 +462,7 @@ "VcfFile [chromosomes=[chrX, chr16], sampleCount=1, variantCount=99, phased=false, phasedAutodetect=false]" ], [ - "FAM_svs_cnvs_merged_annotated.vcf.gz", + "FAM_svs_cnvs_merged_annotated_ranked.vcf.gz", "VcfFile [chromosomes=[chrX, chr16], sampleCount=2, variantCount=98, phased=false, phasedAutodetect=false]" ], [ @@ -490,6 +490,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-30T08:37:01.633018038" + "timestamp": "2024-10-30T11:29:20.61580529" } } \ No newline at end of file diff --git a/workflows/nallo.nf b/workflows/nallo.nf index 45587e62..28fa679c 100644 --- a/workflows/nallo.nf +++ b/workflows/nallo.nf @@ -7,6 +7,7 @@ include { samplesheetToList } from 'plugin/nf-schema' */ include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SNV } from '../subworkflows/local/annotate_consequence_pli' +include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SVS } from '../subworkflows/local/annotate_consequence_pli' include { ANNOTATE_SVS } from '../subworkflows/local/annotate_svs' include { ANNOTATE_REPEAT_EXPANSIONS } from '../subworkflows/local/annotate_repeat_expansions' include { ASSEMBLY } from '../subworkflows/local/genome_assembly' @@ -22,6 +23,7 @@ include { PHASING } from '../subworkflows/local/ include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' include { QC_ALIGNED_READS } from '../subworkflows/local/qc_aligned_reads' include { RANK_VARIANTS as RANK_VARIANTS_SNV } from '../subworkflows/local/rank_variants' +include { RANK_VARIANTS as RANK_VARIANTS_SVS } from '../subworkflows/local/rank_variants' include { SCATTER_GENOME } from '../subworkflows/local/scatter_genome' include { SHORT_VARIANT_CALLING } from '../subworkflows/local/short_variant_calling' include { SNV_ANNOTATION } from '../subworkflows/local/snv_annotation' @@ -91,6 +93,8 @@ workflow NALLO { : '' ch_variant_consequences_snv = params.variant_consequences_snv ? Channel.fromPath(params.variant_consequences_snv).map { it -> [ it.simpleName, it ] }.collect() : Channel.value([]) + ch_variant_consequences_svs = params.variant_consequences_svs ? Channel.fromPath(params.variant_consequences_svs).map { it -> [ it.simpleName, it ] }.collect() + : Channel.value([]) ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [ [ id:'vep_cache' ], it ] }.collect() : Channel.value([[],[]]) ch_vep_extra_files_unsplit = params.vep_plugin_files ? Channel.fromPath(params.vep_plugin_files).collect() @@ -105,6 +109,8 @@ workflow NALLO { : Channel.value([]) ch_score_config_snv = params.score_config_snv ? Channel.fromPath(params.score_config_snv).map { it -> [ it.simpleName, it ] }.collect() : Channel.value([]) + ch_score_config_svs = params.score_config_svs ? Channel.fromPath(params.score_config_svs).map { it -> [ it.simpleName, it ] }.collect() + : Channel.value([]) ch_somalier_sites = params.somalier_sites ? Channel.fromPath(params.somalier_sites).map { [ it.simpleName, it ] }.collect() : '' ch_svdb_dbs = params.svdb_dbs ? Channel.fromPath(params.svdb_dbs).map { [ it.simpleName, it ] }.collect() @@ -388,8 +394,8 @@ workflow NALLO { .set { ch_vcf_tbi_per_region } } else { // otherwise grab the VCF that should have gone into RANK_VARIANTS - ANN_CSQ_PLI_SNV.out.vcf_ann - .join( ANN_CSQ_PLI_SNV.out.tbi_ann ) + ANN_CSQ_PLI_SNV.out.vcf + .join( ANN_CSQ_PLI_SNV.out.tbi ) .set { ch_vcf_tbi_per_region } } } else { @@ -532,6 +538,23 @@ workflow NALLO { params.vep_cache_version, PREPARE_GENOME.out.vep_extra_files ) + + ANN_CSQ_PLI_SVS ( + ANNOTATE_SVS.out.vcf, + ch_variant_consequences_svs + ) + ch_versions = ch_versions.mix(ANN_CSQ_PLI_SVS.out.versions) + + if (!params.skip_rank_variants) { + RANK_VARIANTS_SVS ( + ANN_CSQ_PLI_SVS.out.vcf, + ch_updated_pedfile, + ch_reduced_penetrance, + ch_score_config_svs + ) + ch_versions = ch_versions.mix(RANK_VARIANTS_SVS.out.versions) + } + } }