Skip to content

Commit

Permalink
Add found_in tag to call repeat expansions
Browse files Browse the repository at this point in the history
  • Loading branch information
fellen31 committed Oct 29, 2024
1 parent 79b836b commit 9d4989b
Show file tree
Hide file tree
Showing 12 changed files with 318 additions and 46 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ jobs:
- "CALL_SVS"
- "ANNOTATE_SVS"
- "RANK_VARIANTS"
- "CALL_REPEAT_EXPANSIONS"
profile:
- "docker"

Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#430](https://github.com/genomic-medicine-sweden/nallo/pull/430) - Added a GitHub action to build and publish docs to GitHub Pages
- [#431](https://github.com/genomic-medicine-sweden/nallo/pull/431) - Added files needed to automatically build and publish docs to GitHub Pages
- [#435](https://github.com/genomic-medicine-sweden/nallo/pull/435) - Added nf-test to rank variants
- [#445](https://github.com/genomic-medicine-sweden/nallo/pull/445) - Added FOUND_IN tag and nf-test to rank variants

### `Changed`

Expand Down
15 changes: 14 additions & 1 deletion modules/local/trgt/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ process TRGT {
tuple val(meta), path(bam), path(bai), val(sex)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
path(repeats)
tuple val(meta4), path(repeats)

output:
tuple val(meta), path("${meta.id}.spanning.bam"), emit: bam
Expand Down Expand Up @@ -40,6 +40,19 @@ process TRGT {
--output-prefix ${meta.id}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
trgt: \$(echo \$(trgt -V) | sed 's/trgt //' )
END_VERSIONS
"""

stub:
prefix = task.ext.prefix ?: "${meta.id}"

"""
touch ${prefix}.spanning.bam
echo | gzip > ${prefix}.vcf.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
trgt: \$(echo \$(trgt -V) | sed 's/trgt //' )
Expand Down
51 changes: 36 additions & 15 deletions subworkflows/local/call_repeat_expansions/main.nf
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
include { ADD_FOUND_IN_TAG } from '../../../modules/local/add_found_in_tag/main'
include { TRGT } from '../../../modules/local/trgt'
include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_TRGT } from '../../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_SORT as SAMTOOLS_SORT_TRGT } from '../../../modules/nf-core/samtools/sort/main'
Expand All @@ -13,40 +14,60 @@ workflow CALL_REPEAT_EXPANSIONS {
ch_trgt_bed // channel: [mandatory] [ val(meta), path(bed) ]

main:
ch_repeat_calls_vcf = Channel.empty()
ch_versions = Channel.empty()
ch_versions = Channel.empty()

ch_bam_bai
.map { meta, bam, bai -> [meta, bam, bai, meta.sex] }
.set { ch_trgt_input }

// Run TGRT
TRGT ( ch_trgt_input, ch_fasta, ch_fai, ch_trgt_bed.map { it[1] } )
TRGT (
ch_trgt_input,
ch_fasta,
ch_fai,
ch_trgt_bed
)
ch_versions = ch_versions.mix(TRGT.out.versions)

// Sort and index bam
SAMTOOLS_SORT_TRGT ( TRGT.out.bam, [[],[]] )
SAMTOOLS_INDEX_TRGT(SAMTOOLS_SORT_TRGT.out.bam)
SAMTOOLS_SORT_TRGT (
TRGT.out.bam,
[[],[]]
)
ch_versions = ch_versions.mix(SAMTOOLS_SORT_TRGT.out.versions)

SAMTOOLS_INDEX_TRGT ( SAMTOOLS_SORT_TRGT.out.bam )
ch_versions = ch_versions.mix(SAMTOOLS_INDEX_TRGT.out.versions)

// Add FOUND_IN=TRGT tag
ADD_FOUND_IN_TAG (
TRGT.out.vcf.map { meta, vcf -> [ meta, vcf, [] ] },
"TRGT"
)

// Sort and index bcf
BCFTOOLS_SORT_TRGT(TRGT.out.vcf)
BCFTOOLS_SORT_TRGT ( ADD_FOUND_IN_TAG.out.vcf )
ch_versions = ch_versions.mix(BCFTOOLS_SORT_TRGT.out.versions)

BCFTOOLS_SORT_TRGT.out.vcf
.join( BCFTOOLS_SORT_TRGT.out.tbi )
.map { meta, bcf, csi -> [ [ id : meta.project ], bcf, csi ] }
.groupTuple()
.set{ ch_bcftools_merge_in }

BCFTOOLS_MERGE ( ch_bcftools_merge_in, ch_fasta, ch_fai, [[],[]] )


ch_versions = ch_versions.mix(TRGT.out.versions)
ch_versions = ch_versions.mix(SAMTOOLS_SORT_TRGT.out.versions)
ch_versions = ch_versions.mix(SAMTOOLS_INDEX_TRGT.out.versions)
ch_versions = ch_versions.mix(BCFTOOLS_SORT_TRGT.out.versions)
BCFTOOLS_MERGE (
ch_bcftools_merge_in,
ch_fasta,
ch_fai,
[[],[]]
)
ch_versions = ch_versions.mix(BCFTOOLS_MERGE.out.versions)

emit:
vcf = BCFTOOLS_SORT_TRGT.out.vcf // channel: [ val(meta), path(vcf) ]
versions = ch_versions // channel: [ versions.yml ]
sample_vcf = BCFTOOLS_SORT_TRGT.out.vcf // channel: [ val(meta), path(vcf) ]
project_vcf = BCFTOOLS_MERGE.out.vcf // channel: [ val(meta), path(vcf) ]
sample_bam = SAMTOOLS_SORT_TRGT.out.bam // channel: [ val(meta), path(bam) ]
sample_bai = SAMTOOLS_INDEX_TRGT.out.bai // channel: [ val(meta), path(bai) ]
versions = ch_versions // channel: [ versions.yml ]
}

99 changes: 99 additions & 0 deletions subworkflows/local/call_repeat_expansions/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
nextflow_workflow {

name "Test Workflow CALL_REPEAT_EXPANSIONS"
script "../main.nf"
config "./nextflow.config"
workflow "CALL_REPEAT_EXPANSIONS"

setup {
run("GUNZIP") {
script "../../../../modules/nf-core/gunzip/main.nf"
process {
"""
input[0] = [
[ id:'test' ],
file(params.pipelines_testdata_base_path + 'reference/hg38.test.fa.gz', checkIfExists: true)
]
"""
}

}

run("SAMTOOLS_FAIDX") {
script "../../../../modules/nf-core/samtools/faidx/main.nf"
process {
"""
input[0] = GUNZIP.out.gunzip
input[1] = [[],[]]
"""
}

}
}

test("[bam, bai], fasta, fai, bed") {

when {
workflow {
"""
input[0] = Channel.of([
[ id:'test', single_end:false, project: 'project', sex: 1 ], // meta map
file(params.pipelines_testdata_base_path + 'testdata/HG002_PacBio_Revio.bam', checkIfExists: true),
file(params.pipelines_testdata_base_path + 'testdata/HG002_PacBio_Revio.bam.bai', checkIfExists: true)
])
input[1] = GUNZIP.out.gunzip
input[2] = SAMTOOLS_FAIDX.out.fai
input[3] = Channel.of([
[ id: 'pathogenic_repeats' ],
file(params.pipelines_testdata_base_path + 'reference/pathogenic_repeats.hg38.bed')
])
"""
}
}

then {
assertAll(
{ assert workflow.success },
{ assert workflow.out.sample_bai.get(0).get(1).endsWith(".bai") },
{ assert snapshot(
path(workflow.out.sample_vcf.get(0).get(1)).vcf.variantsMD5,
path(workflow.out.project_vcf.get(0).get(1)).vcf.variantsMD5,
bam(workflow.out.sample_bam.get(0).get(1), stringency: 'silent').getReadsMD5(),
workflow.out.versions,
).match() }
)
}

}

test("[bam, bai], fasta, fai, bed - stub") {

options "-stub"

when {
workflow {
"""
input[0] = Channel.of([
[ id:'test', single_end:false, project: 'project', sex: 1 ], // meta map
file(params.pipelines_testdata_base_path + 'testdata/HG002_PacBio_Revio.bam', checkIfExists: true),
file(params.pipelines_testdata_base_path + 'testdata/HG002_PacBio_Revio.bam.bai', checkIfExists: true)
])
input[1] = GUNZIP.out.gunzip
input[2] = SAMTOOLS_FAIDX.out.fai
input[3] = Channel.of([
[ id: 'pathogenic_repeats' ],
file(params.pipelines_testdata_base_path + 'reference/pathogenic_repeats.hg38.bed')
])
"""
}
}

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(workflow.out).match() }
)
}

}
}
128 changes: 128 additions & 0 deletions subworkflows/local/call_repeat_expansions/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
{
"[bam, bai], fasta, fai, bed": {
"content": [
"502b7befd528ce70b8269a8e4b9281df",
"1de84bd070a82b37e6fac25d19ae604e",
"65999ab8f2bc7841de8172468bf23ab6",
[
"versions.yml:md5,52272b464e62cb9e5d41622ea76cd070",
"versions.yml:md5,6576546ea5cf2a0cb6438b4c6758fd1e",
"versions.yml:md5,799b136592e7434ff7eb9ddcc70e7e41",
"versions.yml:md5,8a4b29c3089d4b00cfe6c5c39b88d1ab",
"versions.yml:md5,b9424dde80b33e84164cc956a14aa459"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-29T10:47:08.425030144"
},
"[bam, bai], fasta, fai, bed - stub": {
"content": [
{
"0": [
[
{
"id": "test",
"single_end": false,
"project": "project",
"sex": 1
},
"test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"1": [
[
{
"id": "project"
},
"project.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"2": [
[
{
"id": "test",
"single_end": false,
"project": "project",
"sex": 1
},
"test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"3": [
[
{
"id": "test",
"single_end": false,
"project": "project",
"sex": 1
},
"test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"4": [
"versions.yml:md5,52272b464e62cb9e5d41622ea76cd070",
"versions.yml:md5,6576546ea5cf2a0cb6438b4c6758fd1e",
"versions.yml:md5,799b136592e7434ff7eb9ddcc70e7e41",
"versions.yml:md5,8a4b29c3089d4b00cfe6c5c39b88d1ab",
"versions.yml:md5,b9424dde80b33e84164cc956a14aa459"
],
"project_vcf": [
[
{
"id": "project"
},
"project.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"sample_bai": [
[
{
"id": "test",
"single_end": false,
"project": "project",
"sex": 1
},
"test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"sample_bam": [
[
{
"id": "test",
"single_end": false,
"project": "project",
"sex": 1
},
"test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"sample_vcf": [
[
{
"id": "test",
"single_end": false,
"project": "project",
"sex": 1
},
"test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"versions": [
"versions.yml:md5,52272b464e62cb9e5d41622ea76cd070",
"versions.yml:md5,6576546ea5cf2a0cb6438b4c6758fd1e",
"versions.yml:md5,799b136592e7434ff7eb9ddcc70e7e41",
"versions.yml:md5,8a4b29c3089d4b00cfe6c5c39b88d1ab",
"versions.yml:md5,b9424dde80b33e84164cc956a14aa459"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-29T15:35:39.127660234"
}
}
11 changes: 11 additions & 0 deletions subworkflows/local/call_repeat_expansions/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
process {
withName: 'CALL_REPEAT_EXPANSIONS:BCFTOOLS_MERGE' {
ext.args = '--output-type z --force-single --no-version'
}
withName: 'CALL_REPEAT_EXPANSIONS:BCFTOOLS_SORT_TRGT' {
ext.args = '--output-type z --write-index=tbi'
}
withName: 'CALL_REPEAT_EXPANSIONS:TRGT' {
ext.args = { "--sample-name ${meta.id}" }
}
}
2 changes: 1 addition & 1 deletion tests/.nftignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ paraphase/**/*.{vcf.gz,tbi,bam,bai,json}
phased_variants/**/*.{vcf.gz,tbi}
pipeline_info/*.{html,json,txt,yml}
qc/cramino/**/*.txt
qc/fastqc/**/*.zip
qc/fastqc/**/*.{zip,html}
qc/somalier/**/*.{html,tsv}
repeat_annotation/**/*.{vcf.gz,tbi}
repeat_calling/**/*.{vcf.gz,tbi,bam,bai}
Expand Down
Loading

0 comments on commit 9d4989b

Please sign in to comment.