forked from nf-core/modules
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
new module : jvarkit/vcffilterjdk (nf-core#6621)
* vcffilterdjdk * update params * update params * oppsss tag and TODO * target/region * answers to review * f...g space * fix conda problem https://nfcore.slack.com/archives/CJRH30T6V/p1726233311260959 * add test+bed * reset polyx * prevent test exception md5sum for empty file * update main.nf.test * update meta.yml * remove suggestion --------- Co-authored-by: James A. Fellows Yates <[email protected]>
- Loading branch information
1 parent
6866c4e
commit af02735
Showing
7 changed files
with
385 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
dependencies: | ||
- "bioconda::jvarkit=2024.08.25" | ||
- "bioconda:bcftools=1.20" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
process JVARKIT_VCFFILTERJDK { | ||
tag "$meta.id" | ||
label 'process_single' | ||
|
||
conda "${moduleDir}/environment.yml" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/jvarkit:2024.08.25--hdfd78af_1': | ||
'biocontainers/jvarkit:2024.08.25--hdfd78af_1' }" | ||
|
||
input: | ||
tuple val(meta), path(vcf), path(tbi), path(regions_file) | ||
tuple val(meta2), path(fasta) | ||
tuple val(meta3), path(fai) | ||
tuple val(meta4), path(dict) | ||
tuple val(meta5), path(code) | ||
tuple val(meta6), path(pedigree) | ||
|
||
output: | ||
tuple val(meta), path("*.${extension}"), emit: vcf | ||
tuple val(meta), path("*.tbi") , emit: tbi, optional: true | ||
tuple val(meta), path("*.csi") , emit: csi, optional: true | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args1 = task.ext.args1 ?: '' | ||
def args2 = task.ext.args2 ?: '' | ||
def args3 = task.ext.args3 ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
def script_file = code ? "--script \"${code}\"" : "" | ||
def pedigree_file = pedigree ? " --pedigree \"${pedigree}\" " : "" | ||
def regions_cmd = regions_file ? (tbi ? " --regions-file" : " --targets-file") + " \"${regions_file}\" " : "" | ||
|
||
extension = getVcfExtension(args3); /* custom function, see below */ | ||
|
||
if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" | ||
""" | ||
mkdir -p TMP | ||
bcftools view \\ | ||
-O v \\ | ||
${regions_cmd} \\ | ||
${args1} \\ | ||
"${vcf}" |\\ | ||
jvarkit -Xmx${task.memory.giga}g -XX:-UsePerfData -Djava.io.tmpdir=TMP vcffilterjdk \\ | ||
${pedigree_file} \\ | ||
${script_file} \\ | ||
${args2} |\\ | ||
bcftools view \\ | ||
--output "${prefix}.${extension}" \\ | ||
${args3} | ||
rm -rf TMP | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') | ||
jvarkit: \$(jvarkit -v) | ||
END_VERSIONS | ||
""" | ||
|
||
stub: | ||
def args3 = task.ext.args3 ?: '' | ||
extension = getVcfExtension(args3); /* custom function, see below */ | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
""" | ||
touch "${prefix}.${extension}" | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') | ||
jvarkit: \$(jvarkit -v) | ||
END_VERSIONS | ||
""" | ||
} | ||
|
||
|
||
|
||
// Custom Function to get VCF extension | ||
String getVcfExtension(String args) { | ||
return args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : | ||
args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : | ||
args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : | ||
args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : | ||
"vcf"; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json | ||
name: "jvarkit_vcffilterjdk" | ||
description: Filtering VCF with dynamically-compiled java expressions | ||
keywords: | ||
- vcf | ||
- bcf | ||
- filter | ||
- variant | ||
- java | ||
- script | ||
tools: | ||
- "jvarkit": | ||
description: "Java utilities for Bioinformatics." | ||
homepage: "https://github.com/lindenb/jvarkit" | ||
documentation: "https://jvarkit.readthedocs.io/" | ||
tool_dev_url: "https://github.com/lindenb/jvarkit" | ||
doi: "10.1093/bioinformatics/btx734 " | ||
licence: ["MIT License"] | ||
args_id: "$args2" | ||
|
||
- "bcftools": | ||
description: | | ||
View, subset and filter VCF or BCF files by position and filtering expression. Convert between VCF and BCF | ||
homepage: "http://samtools.github.io/bcftools/bcftools.html" | ||
documentation: "http://www.htslib.org/doc/bcftools.html" | ||
doi: "10.1093/bioinformatics/btp352" | ||
licence: ["MIT"] | ||
args_id: ["$args1", "$args3"] | ||
input: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing VCF information | ||
e.g. [ id:'test_reference' ] | ||
- vcf: | ||
type: file | ||
description: Input VCF/BCF file | ||
pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" | ||
- tbi: | ||
type: file | ||
description: Optional VCF/BCF index file | ||
pattern: "*.{tbi,csi}" | ||
- regions_file: | ||
type: file | ||
description: Optional. Restrict to regions listed in a file | ||
pattern: "*.{bed,bed.gz,txt,tsv}" | ||
- meta2: | ||
type: map | ||
description: | | ||
Groovy Map containing fasta information | ||
e.g. [ id:'test_reference' ] | ||
- fasta: | ||
type: file | ||
description: Fasta reference file | ||
pattern: "*.fasta" | ||
- meta3: | ||
type: map | ||
description: | | ||
Groovy Map containing fasta.fai information | ||
e.g. [ id:'test_reference' ] | ||
- fai: | ||
type: file | ||
description: Fasta file index | ||
pattern: "*.fasta.fai" | ||
- meta4: | ||
type: map | ||
description: | | ||
Groovy Map containing fasta.dict information | ||
e.g. [ id:'test_reference' ] | ||
- dict: | ||
type: file | ||
description: GATK sequence dictionary | ||
pattern: "*.dict" | ||
- meta5: | ||
type: map | ||
description: | | ||
Groovy Map containing code information | ||
e.g. [ id:'test_reference' ] | ||
- code: | ||
type: file | ||
description: File containing custom user code . May be empty if script if provided via `task.ext.args2`. | ||
pattern: "*.{code,script,txt,tsv,java,js}" | ||
- meta6: | ||
type: map | ||
description: | | ||
Groovy Map containing pedigree information | ||
e.g. [ id:'test_reference' ] | ||
- pedigree: | ||
type: file | ||
description: Optional jvarkit pedigree. | ||
pattern: "*.{tsv,ped,pedigree}" | ||
output: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing VCF information | ||
e.g. [ id:'test', single_end:false ] | ||
- vcf: | ||
type: file | ||
description: VCF filtered output file | ||
pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" | ||
- csi: | ||
type: file | ||
description: Default VCF file index | ||
pattern: "*.csi" | ||
- tbi: | ||
type: file | ||
description: Alternative VCF file index | ||
pattern: "*.tbi" | ||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
authors: | ||
- "@lindenb" | ||
maintainers: | ||
- "@lindenb" |
119 changes: 119 additions & 0 deletions
119
modules/nf-core/jvarkit/vcffilterjdk/tests/main.nf.test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
// nf-core modules test jvarkit/vcffilterjdk | ||
nextflow_process { | ||
|
||
name "Test Process JVARKIT_VCFFILTERJDK" | ||
script "../main.nf" | ||
process "JVARKIT_VCFFILTERJDK" | ||
config "./nextflow.config" | ||
|
||
|
||
tag "modules" | ||
tag "modules_nfcore" | ||
tag "jvarkit" | ||
tag "jvarkit/vcffilterjdk" | ||
|
||
test("sarscov2 - vcf") { | ||
|
||
when { | ||
process { | ||
""" | ||
input[0] =[ | ||
[id:"vcf_test"], | ||
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), | ||
[], | ||
[] | ||
] | ||
input[1] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] | ||
input[2] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ] | ||
input[3] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ] | ||
input[4] = [ [] , []] | ||
input[5] = [ [] , []] | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot( | ||
path(process.out.vcf[0][1]).vcf.variantsMD5, | ||
process.out.versions | ||
).match() | ||
} | ||
) | ||
} | ||
|
||
} | ||
|
||
|
||
|
||
test("sarscov2 - vcf+bed") { | ||
|
||
when { | ||
process { | ||
""" | ||
input[0] =[ | ||
[id:"vcf_test"], | ||
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), | ||
[], | ||
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) | ||
] | ||
input[1] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] | ||
input[2] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ] | ||
input[3] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ] | ||
input[4] = [ [] , []] | ||
input[5] = [ [] , []] | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert file(process.out.vcf[0][1]).exists() }, | ||
{ assert snapshot(process.out.versions).match() | ||
} | ||
) | ||
} | ||
} | ||
|
||
|
||
|
||
|
||
test("sarscov2 - vcf - stub") { | ||
|
||
options "-stub" | ||
|
||
when { | ||
process { | ||
""" | ||
input[0] =[ | ||
[id:"vcf_test"], | ||
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), | ||
[], | ||
[] | ||
] | ||
input[1] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] | ||
input[2] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ] | ||
input[3] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ] | ||
input[4] = [ [] , []] | ||
input[5] = [ [] , []] | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot( | ||
path(process.out.vcf[0][1]), | ||
process.out.versions | ||
).match() | ||
} | ||
) | ||
} | ||
|
||
} | ||
|
||
|
||
} |
45 changes: 45 additions & 0 deletions
45
modules/nf-core/jvarkit/vcffilterjdk/tests/main.nf.test.snap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
{ | ||
"sarscov2 - vcf": { | ||
"content": [ | ||
"335cdc0f8c403378e1e9d75c41c3736f", | ||
[ | ||
"versions.yml:md5,3601751995727e2ee7102d8ef18e5304" | ||
] | ||
], | ||
"meta": { | ||
"nf-test": "0.9.0", | ||
"nextflow": "24.04.4" | ||
}, | ||
"timestamp": "2024-09-03T14:00:13.118369362" | ||
}, | ||
|
||
|
||
"sarscov2 - vcf+bed": { | ||
"content": [ | ||
[ | ||
"versions.yml:md5,3601751995727e2ee7102d8ef18e5304" | ||
] | ||
], | ||
"meta": { | ||
"nf-test": "0.9.0", | ||
"nextflow": "24.04.4" | ||
}, | ||
"timestamp": "2024-09-03T14:00:13.118369362" | ||
}, | ||
|
||
|
||
"sarscov2 - vcf - stub": { | ||
"content": [ | ||
"vcf_test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
[ | ||
"versions.yml:md5,3601751995727e2ee7102d8ef18e5304" | ||
] | ||
], | ||
"meta": { | ||
"nf-test": "0.9.0", | ||
"nextflow": "24.04.4" | ||
}, | ||
"timestamp": "2024-09-03T14:00:13.118369362" | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
process { | ||
withName: JVARKIT_VCFFILTERJDK { | ||
ext.args2=" --expression 'return variant.getStart()%2==1;' " | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
jvarkit/vcffilterjdk: | ||
- "modules/nf-core/jvarkit/vcffilterjdk/**" |