Skip to content

Commit

Permalink
Modifications after extensively iterations on Virtual Machine (#86)
Browse files Browse the repository at this point in the history
* reformat reads input handling

* dev-iteration on input handling

* Revert "reformat reads input handling"

This reverts commit 311be5a.

* fix container configurations

* make tbfull use more cpus than a normal process

* fix genome name handling on cohor analysis

* replace cohort sample generation by the same used on normal analysis

* tweak input name on cohort analysis

* iterate around sample tsv file generation

* replace reads_ch by genome_names

* testing if adding `$` will fix the genome_names error

* use meta.id instead of it

* Revert "use meta.id instead of it"

This reverts commit 2c9a4e8.

* remove stilystic change

* Accommodating the new `QC` workflow

* Accommodate `REPORT` workflow and import MTBSEQ_NF on the main workflow

* remove version collection from mtbseq modules

* remove unecessary comment on main.nf

* fix module importation on qc and report

* fix typo in report workflow

* comment run main workflow

* fix QC.out.ch_versions

* move includes to the report subworkflow

* tweak on includes

* early optimization before benchmark

* emit multiqc_report on mtbseqnf

* implement base labels before benchmark

* itereation on tbpile after first benchmark

* implement optimization after first benchmark

* tweak the labels [ci skip]

---------

Co-authored-by: Abhinav Sharma <[email protected]>
  • Loading branch information
Mxrcon and abhi18av authored May 2, 2024
1 parent e7038ee commit 25440bc
Show file tree
Hide file tree
Showing 18 changed files with 112 additions and 104 deletions.
18 changes: 14 additions & 4 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,31 @@ process {
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
}
withLabel:process_single_high_memory {
cpus = 1
memory = { check_max( 10.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
}
withLabel:process_low {
cpus = { check_max( 2 * task.attempt, 'cpus' ) }
memory = { check_max( 12.GB * task.attempt, 'memory' ) }
cpus = { check_max( 2 * task.attempt, 'cpus' ) }
memory = { check_max( 8.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
}
withLabel:process_medium {
cpus = { check_max( 6 * task.attempt, 'cpus' ) }
memory = { check_max( 36.GB * task.attempt, 'memory' ) }
memory = { check_max( 10.GB * task.attempt, 'memory' ) }
time = { check_max( 8.h * task.attempt, 'time' ) }
}
withLabel:process_high {
cpus = { check_max( 12 * task.attempt, 'cpus' ) }
memory = { check_max( 72.GB * task.attempt, 'memory' ) }
memory = { check_max( 20.GB * task.attempt, 'memory' ) }
time = { check_max( 16.h * task.attempt, 'time' ) }
}
withLabel:process_max {
cpus = params.max_cpus
memory = params.max_memory
time = params.max_time
}
withLabel:process_long {
time = { check_max( 20.h * task.attempt, 'time' ) }
}
Expand Down
5 changes: 1 addition & 4 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,7 @@ process {
]
}

withName: '.*TB.*' {
withName: 'TBAMEND|TBBWA|TBFULL|TBGROUPS|TBJOIN|TBLIST|TBPILE|TBREFINE|TBSTATS|TBSTRAINS|TBVARIANTS' {
container = 'quay.io/biocontainers/mtbseq:1.1.0--hdfd78af_0'

}


}
33 changes: 3 additions & 30 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,37 +15,10 @@ nextflow.enable.dsl = 2
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

include { MTBSEQNF } from './workflows/mtbseqnf'
include { MTBSEQ_NF } from './workflows/mtbseqnf'
include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_mtbseqnf_pipeline'
include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_mtbseqnf_pipeline'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
NAMED WORKFLOWS FOR PIPELINE
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

//
// WORKFLOW: Run main analysis pipeline depending on type of input
//
workflow MTBSEQNF_MTBSEQNF {

take:
samplesheet // channel: samplesheet read in from --input

main:

//
// WORKFLOW: Run pipeline
//
MTBSEQNF (
samplesheet
)

emit:
multiqc_report = MTBSEQNF.out.multiqc_report // channel: /path/to/multiqc_report.html

}
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RUN MAIN WORKFLOW
Expand All @@ -72,7 +45,7 @@ workflow {
//
// WORKFLOW: Run main workflow
//
MTBSEQNF_MTBSEQNF (
MTBSEQ_NF (
PIPELINE_INITIALISATION.out.samplesheet
)

Expand All @@ -86,7 +59,7 @@ workflow {
params.outdir,
params.monochrome_logs,
params.hook_url,
MTBSEQNF_MTBSEQNF.out.multiqc_report
MTBSEQ_NF.out.multiqc_report
)
}

Expand Down
4 changes: 2 additions & 2 deletions modules/mtbseq/tbamend.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process TBAMEND {
tag "${params.project}"
label 'process_high'
tag "cohort"
label 'process_single'
publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish

input:
Expand Down
2 changes: 1 addition & 1 deletion modules/mtbseq/tbbwa.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process TBBWA {
tag "${meta.id} - ${params.project}"
tag "${meta.id}"
label 'process_medium'
publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish

Expand Down
4 changes: 2 additions & 2 deletions modules/mtbseq/tbfull.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process TBFULL {
tag "${params.project}"
label 'process_high'
tag "cohort"
label 'process_max'
publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish

input:
Expand Down
4 changes: 2 additions & 2 deletions modules/mtbseq/tbgroups.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process TBGROUPS {
tag "${params.project}"
label 'process_high'
tag "cohort"
label 'process_single'
publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish

input:
Expand Down
4 changes: 2 additions & 2 deletions modules/mtbseq/tbjoin.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process TBJOIN {
tag "${params.project}"
label 'process_high'
tag "cohort"
label 'process_high_memory'
publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish

input:
Expand Down
4 changes: 2 additions & 2 deletions modules/mtbseq/tblist.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process TBLIST {
tag "${meta.id} - ${params.project}"
label 'process_medium'
tag "${meta.id}"
label 'process_single_high_memory'
publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish

input:
Expand Down
4 changes: 2 additions & 2 deletions modules/mtbseq/tbpile.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process TBPILE {
tag "${meta.id} - ${params.project}"
label 'process_medium'
tag "${meta.id}"
label 'process_single'
publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish
stageInMode 'copy'

Expand Down
2 changes: 1 addition & 1 deletion modules/mtbseq/tbrefine.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process TBREFINE {
tag "${meta.id} - ${params.project}"
tag "${meta.id}"
label 'process_medium'
publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish

Expand Down
4 changes: 2 additions & 2 deletions modules/mtbseq/tbstats.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process TBSTATS {
tag "${params.project}"
label 'process_medium'
tag "cohort"
label 'process_single_high_memory'
publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish

input:
Expand Down
4 changes: 2 additions & 2 deletions modules/mtbseq/tbstrains.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process TBSTRAINS {
tag "${params.project}"
label 'process_medium'
tag "cohort"
label 'process_single'
publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish

input:
Expand Down
4 changes: 2 additions & 2 deletions modules/mtbseq/tbvariants.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process TBVARIANTS {
tag "${meta.id} - ${params.project}"
label 'process_medium'
tag "${meta.id}"
label 'process_single'
publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish

input:
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/mtbseq-nf-modes/cohort_analysis.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ workflow COHORT_ANALYSIS {
main:
samples_tsv_file = genome_names
.collect()
.flatten().map { n -> $n.id + "\t" + "${params.library_name}" + "\n" }
.flatten().map { n -> "$n" + "\t" + "${params.library_name}" + "\n" }
.collectFile(name: params.cohort_tsv, newLine: false, storeDir: "${params.outdir}", cache: false)

TBJOIN(position_variants.collect(),
Expand Down
22 changes: 22 additions & 0 deletions subworkflows/local/qc/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
include { FASTQC } from '../../../modules/nf-core/fastqc/main'




workflow QC {
take:
ch_samplesheet

main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()

FASTQC (ch_samplesheet)

ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]})
ch_versions = ch_versions.mix(FASTQC.out.versions.first())

emit:
ch_multiqc_files
ch_versions
}
41 changes: 41 additions & 0 deletions subworkflows/local/report/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
include { MULTIQC } from '../../../modules/nf-core/multiqc/main'
include { paramsSummaryMap } from 'plugin/nf-validation'
include { paramsSummaryMultiqc } from '../../nf-core/utils_nfcore_pipeline'
include { softwareVersionsToYAML } from '../../nf-core/utils_nfcore_pipeline'
include { methodsDescriptionText } from '../utils_nfcore_mtbseqnf_pipeline'


workflow REPORT {
take:
ch_multiqc_files
ch_versions
main:
softwareVersionsToYAML(ch_versions)
.collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true)
.set { ch_collated_versions }

//
// MODULE: MultiQC
//
ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty()
ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty()
summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json")
ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params))
ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description))
ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions)
ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false))

MULTIQC (
ch_multiqc_files.collect(),
ch_multiqc_config.toList(),
ch_multiqc_custom_config.toList(),
ch_multiqc_logo.toList()
)

emit:
multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html
versions = ch_versions // channel: [ path(versions.yml) ]
}
55 changes: 10 additions & 45 deletions workflows/mtbseqnf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,8 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

include { FASTQC } from '../modules/nf-core/fastqc/main'
include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { paramsSummaryMap } from 'plugin/nf-validation'
include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mtbseqnf_pipeline'
include { QC } from '../subworkflows/local/qc'
include { REPORT } from '../subworkflows/local/report'
include { PARALLEL_ANALYSIS } from "../subworkflows/local/mtbseq-nf-modes/parallel_analysis.nf"
include { NORMAL_ANALYSIS } from "../subworkflows/local/mtbseq-nf-modes/normal_analysis.nf"
/*
Expand All @@ -18,24 +14,17 @@ include { NORMAL_ANALYSIS } from "../subworkflows/local/mtbseq-nf-modes/normal_a
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

workflow MTBSEQNF {
workflow MTBSEQ_NF {

take:
ch_samplesheet // channel: samplesheet read in from --input

main:

ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()

//
// MODULE: Run FastQC
//
FASTQC (
ch_samplesheet
)
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]})
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
QC(ch_samplesheet)
ch_versions.mix(QC.out.ch_versions)

// MTBSEQ run modes
if( params.parallel && !params.only_qc ) {
Expand All @@ -45,6 +34,7 @@ workflow MTBSEQNF {
params.intregions,
params.categories,
params.basecalib])
// ch_versions = ch_versions.mix(PARALLEL_ANALYSIS.out.versions)

} else {

Expand All @@ -54,41 +44,16 @@ workflow MTBSEQNF {
params.intregions,
params.categories,
params.basecalib])
// ch_versions = ch_versions.mix(NORMAL_ANALYSIS.out.versions)

}
/// END MTBSEQ ANALYSIS

//
// Collate and save software versions
//
softwareVersionsToYAML(ch_versions)
.collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true)
.set { ch_collated_versions }

//
// MODULE: MultiQC
//
ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty()
ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty()
summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json")
ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params))
ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description))
ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions)
ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false))

MULTIQC (
ch_multiqc_files.collect(),
ch_multiqc_config.toList(),
ch_multiqc_custom_config.toList(),
ch_multiqc_logo.toList()
)
REPORT (QC.out.ch_multiqc_files, ch_versions)
multiqc_report = REPORT.out.multiqc_report

emit:
multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html
versions = ch_versions // channel: [ path(versions.yml) ]
multiqc_report
}

/*
Expand Down

0 comments on commit 25440bc

Please sign in to comment.