Skip to content

Commit

Permalink
Update modules and subworkflows to compatibility with 1.8.0
Browse files Browse the repository at this point in the history
  • Loading branch information
fellen31 committed Jan 8, 2025
1 parent 5c0366f commit 560dfdf
Show file tree
Hide file tree
Showing 16 changed files with 224 additions and 223 deletions.
15 changes: 1 addition & 14 deletions modules/nf-core/deepvariant/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,7 @@ These module subcommands incorporate the individual steps of the DeepVariant pip
## makeexamples

This process imports the data used for calling, and thus decides what information is available to the
deep neural network. It's important to import the correct channels for the model you want to use.

The script `run_deepvariant` (not used in the subworkflow) does this automatically. You can refer to
the implementation in the DeepVariant repo:

https://github.com/google/deepvariant/blob/bf9ed7e6de97cf6c8381694cb996317a740625ad/scripts/run_deepvariant.py#L367

For WGS and WES models you need to enable the `insert_size` channel. Specify the following in the config:

```
withName: "DEEPVARIANT_MAKEEXAMPLES" {
ext.args = '--channels "insert_size"'
}
```
deep neural network. It's important to use the correct settings for the model you want to use for each step. The script [`run_deepvariant.py`](https://github.com/google/deepvariant/blob/r1.8/scripts/run_deepvariant.py) does this automatically. To figure out the flags needed for each step you can run `run_deepvariant.py` while adding `dry_run=true`, to print out the command used for each step, as described [here](https://github.com/google/deepvariant/blob/r1.8/docs/deepvariant-pacbio-model-case-study.md).

## callvariants

Expand Down
14 changes: 7 additions & 7 deletions modules/nf-core/deepvariant/callvariants/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
"versions": {
"content": [
[
"versions.yml:md5,5ff99ffba1e56e4e919d3dfc2d0f3cbb"
"versions.yml:md5,384f8c54b3d1b03f7bdb583cb3c93e5c"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
"nextflow": "24.10.3"
},
"timestamp": "2024-08-09T16:38:47.927241"
"timestamp": "2025-01-08T10:33:45.081424542"
},
"homo_sapiens-wgs-call_variants_tfrecords-filenames": {
"content": [
Expand All @@ -34,7 +34,7 @@
]
],
"1": [
"versions.yml:md5,5ff99ffba1e56e4e919d3dfc2d0f3cbb"
"versions.yml:md5,384f8c54b3d1b03f7bdb583cb3c93e5c"
],
"call_variants_tfrecords": [
[
Expand All @@ -46,14 +46,14 @@
]
],
"versions": [
"versions.yml:md5,5ff99ffba1e56e4e919d3dfc2d0f3cbb"
"versions.yml:md5,384f8c54b3d1b03f7bdb583cb3c93e5c"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
"nextflow": "24.10.3"
},
"timestamp": "2024-08-13T21:07:17.335788301"
"timestamp": "2025-01-08T10:07:27.993998742"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ process {
}
process {
withName: "DEEPVARIANT_MAKEEXAMPLES" {
ext.args = '--channels "insert_size"'
ext.args = '--checkpoint "/opt/models/wgs" --call_small_model_examples --small_model_indel_gq_threshold "30" --small_model_snp_gq_threshold "25" --small_model_vaf_context_window_size "51" --trained_small_model_path "/opt/smallmodels/wgs"'
}
}
14 changes: 7 additions & 7 deletions modules/nf-core/deepvariant/makeexamples/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ nextflow_process {
{ assert process.out.examples.get(0).get(0) == [ id:'test', single_end:false ] },
{ assert process.out.gvcf.get(0).get(0) == [ id:'test', single_end:false ] },
{ assert process.out.examples.get(0).get(1).size() == 4 },
{ assert snapshot( // Check examples (tfrecord / json) file name list
{ assert snapshot( // Check examples (tfrecord / json) file name list
file(process.out.examples.get(0).get(1).get(0)).name,
file(process.out.examples.get(0).get(1).get(1)).name,
file(process.out.examples.get(0).get(1).get(2)).name,
file(process.out.examples.get(0).get(1).get(3)).name,
).match("test1-exaamples-filenames")},

{ assert process.out.gvcf.get(0).get(0) == [ id:'test', single_end:false ] },
{ assert process.out.gvcf.get(0).get(1).size() == 2 },
{ assert snapshot( // Check gvcf file name list
Expand Down Expand Up @@ -154,7 +154,7 @@ nextflow_process {
{ assert process.out.examples.get(0).get(0) == [ id:'test', single_end:false ] },
// The test is always run with 2 cpus
{ assert process.out.examples.get(0).get(1).size() == 4 },
{ assert snapshot( // Check examples (tfrecord / json) file name list
{ assert snapshot( // Check examples (tfrecord / json) file name list
file(process.out.examples.get(0).get(1).get(0)).name,
file(process.out.examples.get(0).get(1).get(1)).name,
file(process.out.examples.get(0).get(1).get(2)).name,
Expand All @@ -173,7 +173,7 @@ nextflow_process {
}

test("stub") {

options "-stub"

when {
Expand Down Expand Up @@ -208,13 +208,13 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert process.out.examples.get(0).get(1).size() == 4 },
{ assert snapshot( // Check examples (tfrecord / json) file name list
{ assert snapshot( // Check examples (tfrecord / json) file name list
file(process.out.examples.get(0).get(1).get(0)).name,
file(process.out.examples.get(0).get(1).get(1)).name,
file(process.out.examples.get(0).get(1).get(2)).name,
file(process.out.examples.get(0).get(1).get(3)).name,
).match("test4-examples-filenames")},

{ assert process.out.gvcf.get(0).get(0) == [ id:'test', single_end:false ] },
{ assert process.out.gvcf.get(0).get(1).size() == 2 },
{ assert snapshot( // Check gvcf file name list
Expand All @@ -225,4 +225,4 @@ nextflow_process {
}
}

}
}
80 changes: 40 additions & 40 deletions modules/nf-core/deepvariant/makeexamples/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@
},
"timestamp": "2024-09-04T16:09:47.885995"
},
"test3-versions": {
"content": [
[
"versions.yml:md5,2bfe7f3902fb3d9e2dc1d97dc6347c9c"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.10.3"
},
"timestamp": "2025-01-08T10:34:31.031697972"
},
"test2-examples-filenames": {
"content": [
"test.examples.tfrecord-00000-of-00002.gz",
Expand All @@ -26,14 +38,27 @@
"test2-versions": {
"content": [
[
"versions.yml:md5,842dca9323f25aa3cfd67789d18e7e33"
"versions.yml:md5,2bfe7f3902fb3d9e2dc1d97dc6347c9c"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.10.3"
},
"timestamp": "2025-01-08T10:34:17.998740352"
},
"test1-exaamples-filenames": {
"content": [
"test.examples.tfrecord-00000-of-00002.gz",
"test.examples.tfrecord-00000-of-00002.gz.example_info.json",
"test.examples.tfrecord-00001-of-00002.gz",
"test.examples.tfrecord-00001-of-00002.gz.example_info.json"
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-08-09T16:39:28.960959"
"timestamp": "2024-09-04T16:09:47.874585"
},
"test4-examples-filenames": {
"content": [
Expand All @@ -51,14 +76,25 @@
"test1-versions": {
"content": [
[
"versions.yml:md5,842dca9323f25aa3cfd67789d18e7e33"
"versions.yml:md5,2bfe7f3902fb3d9e2dc1d97dc6347c9c"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.10.3"
},
"timestamp": "2025-01-08T10:34:04.940271042"
},
"test3-gvcf-filenames": {
"content": [
"test.gvcf.tfrecord-00000-of-00002.gz",
"test.gvcf.tfrecord-00001-of-00002.gz"
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-08-09T16:39:13.57526"
"timestamp": "2024-09-04T16:10:17.714443"
},
"test3-examples-filenames": {
"content": [
Expand Down Expand Up @@ -94,41 +130,5 @@
"nextflow": "24.04.4"
},
"timestamp": "2024-09-04T16:10:27.423442"
},
"test3-versions": {
"content": [
[
"versions.yml:md5,842dca9323f25aa3cfd67789d18e7e33"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-08-09T16:39:44.83616"
},
"test1-exaamples-filenames": {
"content": [
"test.examples.tfrecord-00000-of-00002.gz",
"test.examples.tfrecord-00000-of-00002.gz.example_info.json",
"test.examples.tfrecord-00001-of-00002.gz",
"test.examples.tfrecord-00001-of-00002.gz.example_info.json"
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-04T16:09:47.874585"
},
"test3-gvcf-filenames": {
"content": [
"test.gvcf.tfrecord-00000-of-00002.gz",
"test.gvcf.tfrecord-00001-of-00002.gz"
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-04T16:10:17.714443"
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process {
withName: "DEEPVARIANT_MAKEEXAMPLES" {
ext.args = '--channels "insert_size"'
cpus = 2 // The number of output files is determined by cpus - keep it the same for tests
ext.args = '--checkpoint "/opt/models/wgs" --call_small_model_examples --small_model_indel_gq_threshold "30" --small_model_snp_gq_threshold "25" --small_model_vaf_context_window_size "51" --trained_small_model_path "/opt/smallmodels/wgs"'
}
}
4 changes: 3 additions & 1 deletion modules/nf-core/deepvariant/postprocessvariants/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ process DEEPVARIANT_POSTPROCESSVARIANTS {
container "docker.io/google/deepvariant:1.8.0"

input:
tuple val(meta), path(variant_calls_tfrecord_files), path(gvcf_tfrecords)
tuple val(meta), path(variant_calls_tfrecord_files), path(gvcf_tfrecords), path(intervals)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
tuple val(meta4), path(gzi)
Expand All @@ -30,6 +30,7 @@ process DEEPVARIANT_POSTPROCESSVARIANTS {
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"

def regions = intervals ? "--regions ${intervals}" : ""
def variant_calls_tfrecord_name = variant_calls_tfrecord_files[0].name.replaceFirst(/-\d{5}-of-\d{5}/, "")

def gvcf_matcher = gvcf_tfrecords[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/
Expand All @@ -49,6 +50,7 @@ process DEEPVARIANT_POSTPROCESSVARIANTS {
--outfile "${prefix}.vcf.gz" \\
--nonvariant_site_tfrecord_path "${gvcf_tfrecords_logical_name}" \\
--gvcf_outfile "${prefix}.g.vcf.gz" \\
${regions} \\
--cpus $task.cpus
cat <<-END_VERSIONS > versions.yml
Expand Down
4 changes: 4 additions & 0 deletions modules/nf-core/deepvariant/postprocessvariants/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ input:
description: |
Sharded tfrecord file from DEEPVARIANT_MAKEEXAMPLES with the coverage information used for GVCF output
pattern: "*.gz"
- intervals:
type: file
description: Interval file for targeted regions
pattern: "*.bed"
- - meta2:
type: map
description: |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ nextflow_process {
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
[]
[],
]
input[1] = [
[ id:'genome'],
Expand Down Expand Up @@ -56,7 +57,7 @@ nextflow_process {
input[0] = DEEPVARIANT_CALLVARIANTS.out.call_variants_tfrecords.join(
DEEPVARIANT_MAKEEXAMPLES.out.gvcf,
failOnMismatch: true
)
).map { meta, tf, gvcf -> [ meta, tf, gvcf, [] ] }
input[1] = [
[ id:'genome'],
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
Expand All @@ -82,7 +83,7 @@ nextflow_process {

test("homo_sapiens - wgs - stub") {
options "-stub"

when {
process {
"""
Expand All @@ -91,6 +92,7 @@ nextflow_process {
[],
[],
[],
[],
]
input[1] = [
[ id:'genome'],
Expand All @@ -115,4 +117,4 @@ nextflow_process {
}
}

}
}
Loading

0 comments on commit 560dfdf

Please sign in to comment.