diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7a0134154..a2e74c7a3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,6 +33,7 @@ jobs: - "-profile test,docker --mapping_tool bowtie2 --damagecalculation_tool mapdamage --damagecalculation_mapdamage_downsample 100 --run_genotyping --genotyping_tool 'hc' --genotyping_source 'raw'" - "-profile test,docker --skip_preprocessing" - "-profile test_humanbam,docker --run_mtnucratio --run_contamination_estimation_angsd --snpcapture_bed 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz' --run_genotyping --genotyping_tool 'pileupcaller' --genotyping_source 'raw'" + - "-profile test_humanbam,docker --run_sexdeterrmine" - "-profile test_multiref,docker" ## TODO add damage manipulation here instead once it goes multiref steps: - name: Check out pipeline code diff --git a/CITATIONS.md b/CITATIONS.md index c6e0b8b68..3715b56a8 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -122,6 +122,10 @@ > Li H. A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics (2011) 27(21) 2987-93.doi: [10.1093/bioinformatics/btr509](https://doi.org/10.1093/bioinformatics/btr509). + - [Sex.DetERRmine.py](http://dx.doi.org/10.1038/s41467-018-07483-5) + + > Sex.DetERRmine.py Lamnidis, T.C. et al., 2018. Ancient Fennoscandian genomes reveal origin and spread of Siberian ancestry in Europe. Nature communications, 9(1), p.5018. Available at: http://dx.doi.org/10.1038/s41467-018-07483-5. Download: https://github.com/TCLamnidis/Sex.DetERRmine + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/conf/modules.config b/conf/modules.config index 621a984e9..efdc216a4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -919,6 +919,29 @@ process { ] } + // + // RUN SEXDETERRMINE + // + withName: SAMTOOLS_DEPTH_SEXDETERRMINE { + tag = { "${meta1.reference}|${meta1.sample_id}_${meta1.library_id}" } + ext.prefix = { "${meta2.id}_samtoolsdepth" } + ext.args = '-aa -q30 -Q30 -H' + publishDir = [ + enabled: false + ] + } + + withName: SEXDETERRMINE { + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.prefix = { "${meta.reference}_sexdeterrmine" } + publishDir = [ + path: { "${params.outdir}/sex_determination/" }, + mode: params.publish_dir_mode, + pattern: '*{_sexdeterrmine}*', + enabled: true + ] + } + // // LIBRARY MERGE // diff --git a/conf/test_humanbam.config b/conf/test_humanbam.config index d2badc5c1..68df288f2 100644 --- a/conf/test_humanbam.config +++ b/conf/test_humanbam.config @@ -36,7 +36,7 @@ params { // TODO Reactivate sexDet and genotyping params when those steps get implemented. // //Sex Determination - // sexdeterrmine_bedfile = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz' + sexdeterrmine_bedfile = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz' // // Genotyping genotyping_pileupcaller_bedfile = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz' genotyping_pileupcaller_snpfile = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K_covered_in_JK2067_downsampled_s0.1.numeric_chromosomes.snp' diff --git a/docs/development/manual_tests.md b/docs/development/manual_tests.md index a71a5afe0..41aa2defa 100644 --- a/docs/development/manual_tests.md +++ b/docs/development/manual_tests.md @@ -110,6 +110,12 @@ Tool Specific combinations - single reference: with damage manipulation (pmd + trimming), on pmd filtered data ✅ - multi reference: no damage manipulation ✅ +- Sex determination + + - With sexdeterrmine + + - with default parameters + ### Multi-reference tests ```bash @@ -739,6 +745,14 @@ nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume -- nextflow run main.nf -profile test_multiref,docker --outdir ./results -w work/ -resume --genotyping_source 'raw' -ansi-log false -dump-channels ``` +# Run Sexdeterrmine + +```bash +## Running sex determination subworkflow from deduplicated bams +## Expect: sex_deterrmine/sexdeterrmine directory with tsv summary table for all individuals. +nextflow run main.nf -profile test_humanbam,arm,docker --outdir ./results --run_sexdeterrmine +``` + # GENOTYPING These tests were ran before library merging was implemented. diff --git a/docs/output.md b/docs/output.md index 144c7bc21..691e4db12 100644 --- a/docs/output.md +++ b/docs/output.md @@ -541,9 +541,25 @@ is a tool which calculates a variety of standard 'aDNA' metrics from a BAM file. [ANGSD](http://www.popgen.dk/angsd/index.php/ANGSD) is a software for analyzing next generation sequencing data. Among other functions, ANGSD can estimate contamination for chromosomes for which one copy exists, i.e. X-chromosome for humans with karyotype XY. To do this, we first generate a binary count file for the X-chromosome (`angsd`) and then perform a Fisher's exact test for finding a p-value and jackknife to get an estimate of contamination (`contamination`). Contamination is estimated with Method of Moments (MOM) and Maximum Likelihood (ML) for both Method1 and Method2. Method1 compares the total number of minor and major reads at SNP sites with the number of minor and major reads at adjacent sites, assuming independent errors between reads and sites, while Method2 only samples one read at each site to remove the previous assumption. The results of all methods for each library, as well as respective standard errors are summarised in `nuclear_contamination.txt` and `nuclear_contamination_mqc.json`. +### Sex Determination + +
+Output files + +- `sex_determination/`: this contains the output for the sex determination run. This is a single `.tsv` file that includes a table with the sample name, the number of autosomal SNPs, number of SNPs on the X/Y chromosome, the number of reads mapping to the autosomes, the number of reads mapping to the X/Y chromosome, the relative coverage on the X/Y chromosomes, and the standard error associated with the relative coverages. These measures are provided for each bam file, one row per file. If the `sexdeterrmine_bedfile` option has not been provided, the error bars cannot be trusted! +-
+ +#### Sex.DetERRmine + +Sex.DetERRmine calculates the coverage of your mapped reads on the X and Y chromosomes relative to the coverage on the autosomes (X-/Y-rate). This metric can be thought of as the number of copies of chromosomes X and Y that is found within each cell, relative to the autosomal copies. The number of autosomal copies is assumed to be two, meaning that an X-rate of 1.0 means there are two X chromosomes in each cell, while 0.5 means there is a single copy of the X chromosome per cell. Human females have two copies of the X chromosome and no Y chromosome (XX), while human males have one copy of each of the X and Y chromosomes (XY). + +When a bedfile of specific sites is provided, Sex.DetERRmine runs much faster and additionally calculates error bars around each relative coverage estimate. For this estimate to be trustworthy, the sites included in the bedfile should be spaced apart enough that a single sequencing read cannot overlap multiple sites. Hence, when a bedfile has not been provided, this error should be ignored. When a suitable bedfile is provided, each observation of a covered site is independent, and the error around the coverage is equal to the binomial error estimate. This error is then propagated during the calculation of relative coverage for the X and Y chromosomes. + +> Note that in nf-core/eager this will be run on single- and double-stranded variants of the same library separately. This can also help assess for differential contamination between libraries. + ### Genotyping -### pileupCaller +#### pileupCaller
Output files @@ -561,7 +577,7 @@ is a tool which calculates a variety of standard 'aDNA' metrics from a BAM file. When using pileupCaller for genotyping, single-stranded and double-stranded libraries are genotyped separately. Single-stranded libraries are genotyped with the additional option `--singeStrandMode`, which ensure that deamination damage artefactts cannot affect the genotype calls, by only using the forward- or reverse-mapping reads when genotyping on transitions (depending on the alleles of the transition). -### GATK UnifiedGenotyper +#### GATK UnifiedGenotyper
Output files @@ -576,7 +592,7 @@ When using pileupCaller for genotyping, single-stranded and double-stranded libr [GATK's UnifiedGenotyper](https://github.com/broadinstitute/gatk-docs/blob/master/gatk3-tooldocs/3.5-0/org_broadinstitute_gatk_tools_walkers_genotyper_UnifiedGenotyper.html) uses a Bayesian genotype likelihood model to estimate simultaneously the most likely genotypes and allele frequency in a population of N samples, emitting a genotype for each sample. The system can either emit just the variant sites or complete genotypes (which includes homozygous reference calls) satisfying some phred-scaled confidence value. This tool has been deprecated by the GATK developers in favour of HaplotypeCaller, but is still cosidered a preferable genotyper for ancient DNA, given its ability to handle low coverage data. The output provided is a bgzipped VCF file for containing the genotype calls of each sample, it's index file, as well as the statistics of the VCF file generated by `bcftools stats`. -### GATK HaplotypeCaller +#### GATK HaplotypeCaller
Output files @@ -591,7 +607,7 @@ When using pileupCaller for genotyping, single-stranded and double-stranded libr [GATK's HaplotypeCaller](https://gatk.broadinstitute.org/hc/en-us/articles/13832687299739-HaplotypeCaller) is capable of calling SNPs and indels simultaneously via local de-novo assembly of haplotypes in an active region. In addition, HaplotypeCaller is able to handle non-diploid organisms as well as pooled experiment data. This is the preferred genotyper for modern DNA. The output provided is a bgzipped VCF file for containing the genotype calls of each sample, it's index file, as well as the statistics of the VCF file generated by `bcftools stats`. -### FreeBayes +#### FreeBayes
Output files diff --git a/modules.json b/modules.json index f966fd082..1b9488483 100644 --- a/modules.json +++ b/modules.json @@ -62,12 +62,12 @@ }, "bwa/aln": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "6278bf9afd4a4b2d00fa6052250e73da3d91546f", "installed_by": ["fastq_align_bwaaln"] }, "bwa/index": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "6278bf9afd4a4b2d00fa6052250e73da3d91546f", "installed_by": ["modules"] }, "bwa/mem": { @@ -117,7 +117,7 @@ }, "fastp": { "branch": "master", - "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", + "git_sha": "003920c7f9a8ae19b69a97171922880220bedf56", "installed_by": ["modules"] }, "fastqc": { @@ -205,6 +205,11 @@ "git_sha": "6b0e4fe14ca1b12e131f64608f0bbaf36fd11451", "installed_by": ["modules"] }, + "samtools/depth": { + "branch": "master", + "git_sha": "a1ffbc1fd87bd5a829e956cc26ec9cc53af3e817", + "installed_by": ["modules"] + }, "samtools/faidx": { "branch": "master", "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", @@ -212,27 +217,27 @@ }, "samtools/fastq": { "branch": "master", - "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", + "git_sha": "8d8f0ae52d6c9342bd41c33dda0b74b07e32153d", "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", - "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", + "git_sha": "8d8f0ae52d6c9342bd41c33dda0b74b07e32153d", "installed_by": ["modules"] }, "samtools/idxstats": { "branch": "master", - "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", + "git_sha": "8d8f0ae52d6c9342bd41c33dda0b74b07e32153d", "installed_by": ["modules"] }, "samtools/index": { "branch": "master", - "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", + "git_sha": "8d8f0ae52d6c9342bd41c33dda0b74b07e32153d", "installed_by": ["bam_split_by_region", "fastq_align_bwaaln"] }, "samtools/merge": { "branch": "master", - "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", + "git_sha": "8d8f0ae52d6c9342bd41c33dda0b74b07e32153d", "installed_by": ["modules"] }, "samtools/mpileup": { @@ -242,13 +247,13 @@ }, "samtools/sort": { "branch": "master", - "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", + "git_sha": "8d8f0ae52d6c9342bd41c33dda0b74b07e32153d", "installed_by": ["modules"] }, "samtools/view": { "branch": "master", - "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", - "installed_by": ["bam_split_by_region", "modules"] + "git_sha": "8d8f0ae52d6c9342bd41c33dda0b74b07e32153d", + "installed_by": ["bam_split_by_region"] }, "seqkit/split2": { "branch": "master", @@ -259,6 +264,11 @@ "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] + }, + "sexdeterrmine": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] } } }, diff --git a/modules/nf-core/samtools/depth/main.nf b/modules/nf-core/samtools/depth/main.nf new file mode 100644 index 000000000..64b0e3a53 --- /dev/null +++ b/modules/nf-core/samtools/depth/main.nf @@ -0,0 +1,39 @@ +process SAMTOOLS_DEPTH { + tag "$meta1.id" + label 'process_low' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta1), path(bam) + tuple val(meta2), path(intervals) + + output: + tuple val(meta1), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta1.id}" + def positions = intervals ? "-b ${intervals}" : "" + """ + samtools \\ + depth \\ + --threads ${task.cpus-1} \\ + $args \\ + $positions \\ + -o ${prefix}.tsv \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/depth/meta.yml b/modules/nf-core/samtools/depth/meta.yml new file mode 100644 index 000000000..f774c0f2c --- /dev/null +++ b/modules/nf-core/samtools/depth/meta.yml @@ -0,0 +1,54 @@ +name: samtools_depth +description: Computes the depth at each position or region. +keywords: + - depth + - samtools + - statistics + - coverage +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files; samtools depth – computes the read depth at each position or region + homepage: http://www.htslib.org + documentation: http://www.htslib.org/doc/samtools-depth.html + tool_dev_url: https://github.com/samtools/samtools + doi: "10.1093/bioinformatics/btp352" + licence: ["MIT"] + +input: + - meta1: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - intervals: + type: file + description: list of positions or regions in specified bed file + pattern: "*.{bed}" + +output: + - meta1: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tsv: + type: file + description: The output of samtools depth has three columns - the name of the contig or chromosome, the position and the number of reads aligned at that position + pattern: "*.{tsv}" + +authors: + - "@louperelo" + - "@nevinwu" diff --git a/modules/nf-core/sexdeterrmine/main.nf b/modules/nf-core/sexdeterrmine/main.nf new file mode 100644 index 000000000..3f3ef2b79 --- /dev/null +++ b/modules/nf-core/sexdeterrmine/main.nf @@ -0,0 +1,40 @@ +process SEXDETERRMINE { + tag "$meta.id" + label 'process_single' + + conda "bioconda::sexdeterrmine=1.1.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/sexdeterrmine:1.1.2--hdfd78af_1': + 'biocontainers/sexdeterrmine:1.1.2--hdfd78af_1' }" + + input: + tuple val(meta), path(depth) + path sample_list_file + + output: + tuple val(meta), path("*.json"), emit: json + tuple val(meta), path("*.tsv") , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def sample_list = sample_list_file ? '-f ${sample_list_file}' : '' + if ("$depth" == "${prefix}.tsv") error "Input depth and output TSV names are the same, set prefix in module configuration to disambiguate!" + + """ + sexdeterrmine \\ + -I $depth \\ + $sample_list \\ + $args \\ + > ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sexdeterrmine: \$(echo \$(sexdeterrmine --version 2>&1)) + END_VERSIONS + """ +} diff --git a/modules/nf-core/sexdeterrmine/meta.yml b/modules/nf-core/sexdeterrmine/meta.yml new file mode 100644 index 000000000..b2b74eff4 --- /dev/null +++ b/modules/nf-core/sexdeterrmine/meta.yml @@ -0,0 +1,48 @@ +name: "sexdeterrmine" +description: Calculate the relative coverage on the Gonosomes vs Autosomes from the output of samtools depth, with error bars. +keywords: + - sex determination + - genetic sex + - relative coverage + - ancient dna +tools: + - "sexdeterrmine": + description: "A python script carry out calculate the relative coverage of X and Y chromosomes, and their associated error bars, out of capture data." + homepage: "https://github.com/TCLamnidis/Sex.DetERRmine" + documentation: "https://github.com/TCLamnidis/Sex.DetERRmine/README.md" + tool_dev_url: "https://github.com/TCLamnidis/Sex.DetERRmine" + doi: "10.1038/s41467-018-07483-5" + licence: "['GPL v3']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - depth: + type: file + description: Output from samtools depth (with header) + pattern: "*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - json: + type: file + description: JSON formatted table of relative coverages on the X and Y, with associated error bars. + pattern: "*.json" + - tsv: + type: file + description: TSV table of relative coverages on the X and Y, with associated error bars. + pattern: "*.tsv" + +authors: + - "@TCLamnidis" diff --git a/nextflow.config b/nextflow.config index 573cf211d..5865a2943 100644 --- a/nextflow.config +++ b/nextflow.config @@ -200,8 +200,11 @@ params { damage_manipulation_bamutils_trim_single_stranded_half_udg_right = 0 damage_manipulation_bamutils_softclip = false + // Sex determination + run_sexdeterrmine = false + sexdeterrmine_bedfile = null + // Genotyping - // TODO GATK and FREEBAYES ploidy options should be merged run_genotyping = false genotyping_tool = null genotyping_source = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 89c5a5062..11348bfd2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1276,6 +1276,29 @@ "fa_icon": "fas fa-map" } } + }, + "human_sex_determination": { + "title": "Human Sex Determination", + "type": "object", + "description": "Options for the calculation of biological sex of human individuals.", + "default": "", + "properties": { + "run_sexdeterrmine": { + "type": "boolean", + "default": false, + "fa_icon": "fas fa-transgender-alt", + "description": "Turn on sex determination for human reference genomes. This will run on single- and double-stranded variants of a library separately.", + "help_text": "Specify to run the optional process of sex determination." + }, + "sexdeterrmine_bedfile": { + "type": "string", + "fa_icon": "fas fa-bed", + "description": "Specify path to SNP panel in bed format for error bar calculation. Optional (see documentation).", + "help_text": "Specify an optional bedfile of the list of SNPs to be used for X-/Y-rate calculation. Running without this parameter will considerably increase runtime, and render the resulting error bars untrustworthy. Theoretically, any set of SNPs that are distant enough that two SNPs are unlikely to be covered by the same read can be used here. The programme was coded with the 1240K panel in mind." + } + }, + "fa_icon": "fas fa-transgender-alt", + "help_text": "" } }, "allOf": [ @@ -1330,6 +1353,12 @@ { "$ref": "#/definitions/host_removal" }, + { + "$ref": "#/definitions/human_sex_determination" + }, + { + "$ref": "#/definitions/contamination_estimation" + }, { "$ref": "#/definitions/contamination_estimation" } diff --git a/subworkflows/local/reference_indexing_single.nf b/subworkflows/local/reference_indexing_single.nf index b3a292f08..4206b340c 100644 --- a/subworkflows/local/reference_indexing_single.nf +++ b/subworkflows/local/reference_indexing_single.nf @@ -87,7 +87,7 @@ workflow REFERENCE_INDEXING_SINGLE { def capture_bed = params.snpcapture_bed != null ? file(params.snpcapture_bed, checkIfExists: true ) : "" def pileupcaller_bed = params.genotyping_pileupcaller_bedfile != null ? file(params.genotyping_pileupcaller_bedfile, checkIfExists: true ) : "" def pileupcaller_snp = params.genotyping_pileupcaller_snpfile != null ? file(params.genotyping_pileupcaller_snpfile, checkIfExists: true ) : "" - def sexdet_bed = "" + def sexdet_bed = params.sexdeterrmine_bedfile != null ? file(params.sexdeterrmine_bedfile, checkIfExists: true ) : "" def bedtools_feature = params.mapstats_bedtools_featurefile != null ? file(params.mapstats_bedtools_featurefile, checkIfExists: true ) : "" def genotyping_reference_ploidy = params.genotyping_reference_ploidy def genotyping_gatk_dbsnp = params.genotyping_gatk_dbsnp != null ? file(params.genotyping_gatk_dbsnp, checkIfExists: true ) : "" diff --git a/subworkflows/local/run_sex_determination.nf b/subworkflows/local/run_sex_determination.nf new file mode 100644 index 000000000..8917c1575 --- /dev/null +++ b/subworkflows/local/run_sex_determination.nf @@ -0,0 +1,66 @@ +// +// Run sex deterrmine +// + +include { addNewMetaFromAttributes } from '../../subworkflows/local/utils_nfcore_eager_pipeline/main' + +include { SAMTOOLS_DEPTH as SAMTOOLS_DEPTH_SEXDETERRMINE } from '../../modules/nf-core/samtools/depth/main' +include { SEXDETERRMINE } from '../../modules/nf-core/sexdeterrmine/main' + +workflow RUN_SEXDETERRMINE { + + take: + sexdeterrmine_bam // channel: [ val(meta1), [ bam ] ] + sexdeterrmine_bed // channel: [ [ meta2 ] [ intervals_bed ] ] + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + if ( params.run_sexdeterrmine ) { + // Generate sex_determine input + // Break here the input not in the workflow + + ch_bed = sexdeterrmine_bed + .map { + // Prepend a new meta that contains the meta.id value as the new_meta.reference attribute + addNewMetaFromAttributes( it, "id" , "reference" , false ) + } + + ch_input_sexdeterrmine = sexdeterrmine_bam + .map { + // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute + addNewMetaFromAttributes( it, "reference" , "reference" , false ) + } + .groupTuple(by:0) + .combine( ch_bed, by: 0 ) // [ [meta], bam, bai, [ref_meta], bed ] + .map { + combo_meta, metas, bam, bai, ref_meta, bed -> + def ids = metas.collect { meta -> meta.id } + [ combo_meta + [id: ids], bam, ref_meta, bed ] // Drop bais + } + + ch_samtoolsdepth_input = ch_input_sexdeterrmine + .multiMap { + combo_meta, bam, ref_meta, bed -> + bam: [ combo_meta, bam ] + bed: [ ref_meta, bed ] + } + + SAMTOOLS_DEPTH_SEXDETERRMINE(ch_samtoolsdepth_input.bam, ch_samtoolsdepth_input.bed) + ch_sex_determine_input = SAMTOOLS_DEPTH_SEXDETERRMINE.out.tsv + ch_versions = ch_versions.mix( SAMTOOLS_DEPTH_SEXDETERRMINE.out.versions ) + + // Run sex determination with samtools depth input + SEXDETERRMINE(ch_sex_determine_input, []) + ch_coverages = SEXDETERRMINE.out.tsv + ch_multiqc_files = ch_multiqc_files.mix( SEXDETERRMINE.out.json ) + ch_versions = ch_versions.mix( SEXDETERRMINE.out.versions ) + + } + + emit: + coverages = ch_coverages // channel: [ val(meta), path("*.tsv") ] + mqc = ch_multiqc_files // channel: [ val(meta), path("*.json") ] + versions = ch_versions // channel: path(versions.yml) +} diff --git a/workflows/eager.nf b/workflows/eager.nf index bf56f48bd..d2e25ce20 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -30,6 +30,7 @@ include { MANIPULATE_DAMAGE } from '../subworkflows/local/manipulate include { METAGENOMICS_COMPLEXITYFILTER } from '../subworkflows/local/metagenomics_complexityfilter' include { ESTIMATE_CONTAMINATION } from '../subworkflows/local/estimate_contamination' include { CALCULATE_DAMAGE } from '../subworkflows/local/calculate_damage' +include { RUN_SEXDETERRMINE } from '../subworkflows/local/run_sex_determination' include { MERGE_LIBRARIES } from '../subworkflows/local/merge_libraries' include { GENOTYPE } from '../subworkflows/local/genotype' @@ -459,6 +460,18 @@ workflow EAGER { } + // + // SUBWORKFLOW: Run Sex Determination + // + + if ( params.run_sexdeterrmine ) { + ch_sexdeterrmine_input = ch_dedupped_bams + + RUN_SEXDETERRMINE(ch_sexdeterrmine_input, REFERENCE_INDEXING.out.sexdeterrmine_bed ) + ch_versions = ch_versions.mix( RUN_SEXDETERRMINE.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( RUN_SEXDETERRMINE.out.mqc.collect{it[1]}.ifEmpty([]) ) + } + // // SUBWORKFLOW: Contamination estimation //