-
Notifications
You must be signed in to change notification settings - Fork 13
/
main.nf
278 lines (220 loc) · 9.81 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
// Expand user directory if exists
outDIR = "${params.outDIR}".replaceFirst("^~", System.getProperty("user.home"))
readDIR = "${params.readDIR}".replaceFirst("^~", System.getProperty("user.home"))
// Set boilerplate parameters
params.QC_only = false
params.reads = "${readDIR}/*_R{1,2}*.fastq.gz"
params.amplicon_info = "$projectDir/resources/${params.target}/${params.target}_amplicon_info.tsv"
params.resmarkers_amplicon = "$projectDir/resources/${params.target}/resistance_markers_amplicon_${params.target}.txt"
params.help = false
// Files
cutadapt_minlen = params.cutadapt_minlen
/*
Create 'read_pairs' channel that emits for each read pair a
tuple containing 3 elements: pair_id, R1, R2
*/
// workflows
include { DEMULTIPLEX_AMPLICONS } from './workflows/demultiplex_amplicons.nf'
include { DENOISE_AMPLICONS_1 } from './workflows/denoise_amplicons_1.nf'
include { DENOISE_AMPLICONS_2 } from './workflows/denoise_amplicons_2.nf'
include { RESISTANCE_MARKER_MODULE } from './workflows/resistance_marker_module.nf'
include { QUALITY_CONTROL} from './workflows/quality_control.nf'
// workflows
include { QC_ONLY } from './workflows/qc_only.nf'
include { POSTPROC_ONLY } from './workflows/postproc_only.nf'
// modules
include { BUILD_ALLELETABLE } from './modules/local/build_alleletable.nf'
def helpMessage() {
log.info """
Usage:
The typical command for running the pipeline is as follows:
nextflow run main.nf --readDIR data/testdata --target v4
Mandatory arguments:
--readDIR Path to folder containing fastq files
--target The amplicon panel version that was used. [Options: 4cast, ama1, v1, v2, v3, v4]
--sequencer The sequencer used to produce your data. [Options: miseq, nextseq]
Optional arguments:
--outDIR Path to folder to place final output [Default: results]
--QC_only Runs just the QC workflow when set [Default: Not set]
--denoised_asvs Path to denoised ASVs from DADA2. Used to only run the postprocessing workflow
(Nextflow parameters. Note the flags have "-" and not "--" at the start)
-profile Runtime profile [Options: sge,apptainer, docker, conda]
-config Resource configurations for each process
(DADA2 parameters)
--omega_a Level of statistical evidence required for DADA2 to infer a new ASV [Default: 1e-120]
--pool Pooling method for DADA2 to process ASVs [pseudo (default), true, false]
--band_size Limit on the net cumulative number of insertions of one sequence relative to the other in DADA2 [Default: 16]
--maxEE Limit on number of expected errors within a read during filtering and trimming within DADA2 [Default: 3]
(Post processing parameters)
--concat_non_overlaps Whether to concatenate or discard any sequences that DADA2 was unable to be merge
--refseq_fasta Path to targeted reference
--genome Path to full genome covering all targets
--homopolymer_threshold The length a homopolymer must reach to be masked [Default: 5]
--trf_min_score The alignment of a tandem repeat must meet or exceed this alignment score to be masked [Default: 25]
--trf_max_period The pattern size must be less than this to be masked [Default: 3]
Examples:
More advanced usage
nextflow run main.nf --readDIR data/testdata --outDIR results --target v4 --sequencer nextseq -config custom.config
Change runtime param to use Docker
nextflow run main.nf --readDIR data/testdata --outDIR results --target v4 -profile docker
Only run the QC workflow
nextflow run main.nf --readDIR data/testdata --outDIR results --target v4 --QC_only
Only run the Postprocessing workflow
nextflow run main.nf --outDIR postprocessing_results --target v4 --denoised_asvs results/raw_dada2_output/dada2.clusters.txt
Alter Dada2 params
nextflow run main.nf --readDIR data/testdata --outDIR results --target v4 --omega_a 1e-40 --pool false --band_size 20 --maxEE 4
Set full genome
nextflow run main.nf --readDIR data/testdata --outDIR results --target v4 --genome data/reference/v1/PkPfPmPoPv.fasta
Set targeted reference
nextflow run main.nf --readDIR data/testdata --outDIR results --target v4 --refseq_fasta resources/v4/v4_reference.fasta
Alter Masking parameters
nextflow run main.nf --readDIR data/testdata --outDIR results --target v4 --homopolymer_threshold 2 --trf_min_score 30 --trf_max_period 5
""".stripIndent()
}
// main workflow
workflow {
// Print help if requested
if (params.help) {
helpMessage()
exit 0
}
if (params.QC_only) {
// Make sure required inputs are present
check_readdir_presence(should_exist: true)
check_target()
check_sequencer()
// Run QC Only Workflow
QC_ONLY(params.reads)
} else if (params.denoised_asvs != null) {
check_readdir_presence(should_exist: false)
// Make sure the target is specified
check_target()
// Run Postprocessing only
POSTPROC_ONLY(params.denoised_asvs)
} else {
// Make sure required inputs are present
check_readdir_presence(should_exist: true)
check_target()
check_sequencer()
// Create read pairs channel from fastq data
read_pairs = channel.fromFilePairs( params.reads, checkIfExists: true )
// Trim and demultiplex amplicons by amplicon
DEMULTIPLEX_AMPLICONS(read_pairs)
// Denoising (DADA specific)
DENOISE_AMPLICONS_1(
DEMULTIPLEX_AMPLICONS.out.demux_fastqs_ch
)
// Masking, collapsing ASVs
DENOISE_AMPLICONS_2(
DENOISE_AMPLICONS_1.out.denoise_ch
)
// Finally create the final allele table
BUILD_ALLELETABLE(
DENOISE_AMPLICONS_1.out.denoise_ch,
DENOISE_AMPLICONS_2.out.results_ch
)
// Create the quality report now
QUALITY_CONTROL(
DEMULTIPLEX_AMPLICONS.out.sample_summary_ch,
DEMULTIPLEX_AMPLICONS.out.amplicon_summary_ch,
BUILD_ALLELETABLE.out.alleledata,
DENOISE_AMPLICONS_1.out.denoise_ch
)
// By default, run the resistance marker module in the main workflow
// Only panel V4 is supported at the moment
if (params.target == "v4") {
RESISTANCE_MARKER_MODULE(
BUILD_ALLELETABLE.out.alleledata,
DENOISE_AMPLICONS_2.out.aligned_asv_table,
DENOISE_AMPLICONS_2.out.reference_ch
)
}
}
}
workflow.onComplete {
def outputDir = new File("${params.outDIR}/run")
if (!outputDir.exists()) {
outputDir.mkdirs()
}
record_params()
record_runtime()
}
/* Record parmameters
*
* Records set parameters for the run
*
*/
def record_params() {
def output = new File("${params.outDIR}/run/parameters.tsv")
params.each{ k, v ->
output.append("${k}\t${v}\n")
}
}
/* Record runtime information
*
* Records runtime and environment information and writes summary to a tabulated (tsv) file
*
*/
def record_runtime() {
def output = new File("${params.outDIR}/run/runtime.tsv")
// Append ContainerEngine first as shown in your example
output.append("PipelineVersion\t${workflow.manifest.version}\n")
output.append("ContainerEngine\t${workflow.containerEngine}\n")
output.append("Duration\t${workflow.duration}\n")
output.append("CommandLine\t${workflow.commandLine}\n")
output.append("CommitId\t${workflow.commitId}\n")
output.append("Complete\t${workflow.complete}\n")
output.append("ConfigFiles\t${workflow.configFiles.join(', ')}\n")
output.append("Container\t${workflow.container}\n")
output.append("ErrorMessage\t${workflow.errorMessage}\n")
output.append("ErrorReport\t${workflow.errorReport}\n")
output.append("ExitStatus\t${workflow.exitStatus}\n")
output.append("HomeDir\t${workflow.homeDir}\n")
output.append("LaunchDir\t${workflow.launchDir}\n")
output.append("Manifest\t${workflow.manifest}\n")
output.append("Profile\t${workflow.profile}\n")
output.append("ProjectDir\t${workflow.projectDir}\n")
output.append("Repository\t${workflow.repository}\n")
output.append("Resume\t${workflow.resume}\n")
output.append("Revision\t${workflow.revision}\n")
output.append("RunName\t${workflow.runName}\n")
output.append("ScriptFile\t${workflow.scriptFile}\n")
output.append("ScriptId\t${workflow.scriptId}\n")
output.append("ScriptName\t${workflow.scriptName}\n")
output.append("SessionId\t${workflow.sessionId}\n")
output.append("Start\t${workflow.start}\n")
output.append("StubRun\t${workflow.stubRun}\n")
output.append("Success\t${workflow.success}\n")
output.append("UserName\t${workflow.userName}\n")
output.append("WorkDir\t${workflow.workDir}\n")
output.append("NextflowBuild\t${nextflow.build}\n")
output.append("NextflowTimestamp\t${nextflow.timestamp}\n")
output.append("NextflowVersion\t${nextflow.version}\n")
}
/* Parameter check helper functions
*
* Check the readDIR and target parameters before executing the workflow
*
*/
def check_readdir_presence(should_exist) {
// If readDIR MUST be provided and is not, exit with an error
if ( should_exist.containsValue(true) && params.readDIR == null ) {
exit 0, log.error("`--readDIR` must be specified but is missing.")
}
// If readDIR MUST NOT be provided and is, exit with an error
if ( should_exist.containsValue(false) && params.readDIR != null ) {
exit 0, log.error("`--readDIR` must not be specified but is present.")
}
}
def check_target() {
if ( params.target == null ) {
exit 0, log.error("`--target` must be specified.")
}
}
def check_sequencer() {
if ( params.sequencer == null ) {
exit 0, log.error("`--sequencer` must be specified.")
}
}