Skip to content

Commit

Permalink
Merge pull request #1 from charles-plessy/compareMasks
Browse files Browse the repository at this point in the history
Compare and combine masked regions
  • Loading branch information
U13bs1125 authored Jul 11, 2024
2 parents 0967b82 + 538fd6f commit 179be18
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 2 deletions.
13 changes: 13 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,19 @@ process {
withName: 'WINDOWMASKER_USTAT' {
ext.args = { "-outfmt fasta" }
}

withName: 'REPEATMODELER_BED' {
ext.prefix = { "${meta.id}.repeatmasker" }
}

withName: 'TANTAN_BED' {
ext.prefix = { "${meta.id}.tantan" }
}

withName: 'WINDOWMASKER_BED' {
ext.prefix = { "${meta.id}.windowmasker" }
}

withName: 'MULTIQC' {
ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
publishDir = [
Expand Down
48 changes: 48 additions & 0 deletions modules/local/bedtools.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
process BEDTOOLS_CUSTOM {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' :
'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }"

input:
tuple val(meta), path(genome), path(tantan), path(windowmasker), path(repeatmasker)

output:
tuple val(meta), path("*.fasta.gz") , emit: fasta
tuple val(meta), path("*_jaccard.txt") , emit: txt
tuple val(meta), path("*.bed.gz") , emit: bed_gz
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
bedtools jaccard -nonamecheck -a $tantan -b $windowmasker > ${prefix}_tantan_windowmasker_jaccard.txt
bedtools jaccard -nonamecheck -a $tantan -b $repeatmasker > ${prefix}_tantan_repeatmasker_jaccard.txt
bedtools jaccard -nonamecheck -a $repeatmasker -b $windowmasker > ${prefix}_repeatmasker_windowmasker_jaccard.txt
zcat $tantan $windowmasker | sort -k1,1 -k2,2n | bedtools merge | gzip --best > ${prefix}_tantan_windowmasker.bed.gz
zcat $tantan $repeatmasker | sort -k1,1 -k2,2n | bedtools merge | gzip --best > ${prefix}_tantan_repeatmasker.bed.gz
zcat $windowmasker $repeatmasker | sort -k1,1 -k2,2n | bedtools merge | gzip --best > ${prefix}_windowmasker_repeatmasker.bed.gz
zcat $tantan $windowmasker $repeatmasker | sort -k1,1 -k2,2n | bedtools merge | gzip --best > ${prefix}_allmaskers.bed.gz
bedtools \\
maskfasta \\
-soft \\
-fi $genome \\
-bed ${prefix}_allmaskers.bed.gz \\
-fo /dev/stdout |
gzip --best > ${prefix}_allmaskers.fasta.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bedtools: \$(bedtools --version | sed -e "s/bedtools v//g")
END_VERSIONS
"""
}
49 changes: 49 additions & 0 deletions modules/local/seqtk.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
process SEQTK_CUTN {
tag "$meta.id"
label 'process_low'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/seqtk:1.4--he4a0461_1' :
'biocontainers/seqtk:1.4--he4a0461_1' }"

input:
tuple val(meta), path(fasta)

output:
tuple val(meta), path("*.bed.gz") , emit: bed_gz
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
# Produces a compressed BED-3 file with the coordinates of soft-masked regions.
awk '/^>/ {print; next} {gsub(/[acgt]/, "N"); print}' $fasta |
seqtk cutN -gn 1 - |
sort -k1,1 -k2,2n | gzip --best > ${prefix}.mask.bed.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"

"""
touch ${prefix}.mask.bed
gzip --best ${prefix}.mask.bed
cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
END_VERSIONS
"""

}
44 changes: 42 additions & 2 deletions workflows/pairgenomealignmask.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ include { REPEATMODELER_REPEATMODELER } from '../modules/nf-core/repeatmodeler/r
include { REPEATMODELER_MASKER } from '../modules/nf-core/repeatmodeler/repeatmasker/main'
include { REPEATMODELER_BUILDDATABASE } from '../modules/nf-core/repeatmodeler/builddatabase/main'
include { TANTAN } from '../modules/local/tantan.nf'
include { BEDTOOLS_CUSTOM } from '../modules/local/bedtools.nf'
include { CUSTOMMODULE } from '../modules/local/custommodule.nf'
include { SEQTK_CUTN as TANTAN_BED } from '../modules/local/seqtk.nf'
include { SEQTK_CUTN as WINDOWMASKER_BED } from '../modules/local/seqtk.nf'
include { SEQTK_CUTN as REPEATMODELER_BED } from '../modules/local/seqtk.nf'
include { GFASTATS as GFSTTANTAN } from '../modules/nf-core/gfastats/main'
include { GFASTATS as GFSTREPEATMOD } from '../modules/nf-core/gfastats/main'
include { GFASTATS as GFSTWINDOWMASK } from '../modules/nf-core/gfastats/main'
Expand Down Expand Up @@ -49,7 +53,14 @@ workflow PAIRGENOMEALIGNMASK {
GFSTTANTAN (
TANTAN.out.masked_fa
)


//
// MODULE: tantan_bed
//
TANTAN_BED {
TANTAN.out.masked_fa
}

// MODULE: repeatmodeler_builddatabase
//
REPEATMODELER_BUILDDATABASE (
Expand Down Expand Up @@ -77,6 +88,13 @@ workflow PAIRGENOMEALIGNMASK {
REPEATMODELER_MASKER.out.fasta
)

//
// MODULE: repeatmodeler_bed
//
REPEATMODELER_BED {
REPEATMODELER_MASKER.out.fasta
}

//
// MODULE: windowmasker_mkcounts
//
Expand All @@ -98,6 +116,13 @@ workflow PAIRGENOMEALIGNMASK {
WINDOWMASKER_USTAT.out.intervals
)

//
// MODULE: windowmasker_bed
//
WINDOWMASKER_BED {
WINDOWMASKER_USTAT.out.intervals
}

//
// MODULE: CUSTOMMODULE
//
Expand All @@ -108,8 +133,23 @@ workflow PAIRGENOMEALIGNMASK {
)
ch_multiqc_files = ch_multiqc_files.mix(CUSTOMMODULE.out.tsv)

//
// MODULE: bedtools_custom
//
BEDTOOLS_CUSTOM (
ch_samplesheet
.join(TANTAN_BED.out.bed_gz)
.join(WINDOWMASKER_BED.out.bed_gz)
.join(REPEATMODELER_BED.out.bed_gz)
)

ch_versions = ch_versions.mix(WINDOWMASKER_MKCOUNTS.out.versions.first())
ch_versions = ch_versions
.mix(WINDOWMASKER_MKCOUNTS.out.versions.first())
.mix(TANTAN.out.versions.first())
.mix(REPEATMODELER_REPEATMODELER.out.versions.first())
.mix(GFSTWINDOWMASK.out.versions.first())
.mix(TANTAN_BED.out.versions.first())
.mix(BEDTOOLS_CUSTOM.out.versions.first())

//
// Collate and save software versions
Expand Down

0 comments on commit 179be18

Please sign in to comment.