Skip to content

Commit

Permalink
Add collect_outputs flag (#29)
Browse files Browse the repository at this point in the history
  • Loading branch information
dfornika authored Dec 20, 2022
1 parent cceb828 commit 26558ac
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 5 deletions.
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,26 @@ When the `--skip_bracken` flag is used, abundances will be calculated directly f
estimates directly from kraken2 reports may under-estimate the actual abundances. Detailed rationale for including bracken analysis
can be found in the [bracken paper](https://peerj.com/articles/cs-104/).

### Collecting Outputs

By default, separate output files will be created for each sample, in independent output sub-directories under the directory provided with the `--outdir` flag.

To generate summary files for all samples, add the `--collect_outputs` flag. The following files will be written to the output dir:

```
collected_<taxonomic_level>_<kraken|bracken>_abundances.csv
collected_<taxonomic_level>_<kraken|bracken>_abundances_top_5.csv
collected_fastp.csv
```

An alternative prefix (instead of `collected`) may be supplied using the `--collected_outputs_prefix` flag. For example, with `--collected_outputs_prefix test`:

```
test_<taxonomic_level>_<kraken|bracken>_abundances.csv
test_<taxonomic_level>_<kraken|bracken>_abundances_top_5.csv
test_fastp.csv
```

## Outputs

An output directory will be created for each sample. Within those directories,
Expand Down
2 changes: 1 addition & 1 deletion bin/adjust_bracken_percentages_for_unclassified_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def main(args):
'fraction_total_reads',
]

writer = csv.DictWriter(sys.stdout, fieldnames=output_fieldnames)
writer = csv.DictWriter(sys.stdout, fieldnames=output_fieldnames, dialect='unix', quoting=csv.QUOTE_MINIMAL)
writer.writeheader()

for b in bracken_abundances:
Expand Down
11 changes: 11 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,17 @@ workflow {
extract_reads(ch_fastq.join(kraken2.out.report).combine(ch_to_extract, by: 0))
}

if (params.collect_outputs) {
fastp.out.csv.map{ it -> it[1] }.collectFile(name: params.collected_outputs_prefix + "_fastp.csv", storeDir: params.outdir, keepHeader: true, skip: 1, sort: { it -> it.readLines()[1] })
if (!params.skip_bracken) {
ch_abundances.map{ it -> it[1] }.collectFile(name: params.collected_outputs_prefix + "_" + params.taxonomic_level + "_bracken_abundances.csv", storeDir: params.outdir, keepHeader: true, skip: 1, sort: { it -> it.readLines()[1].split(',')[0] })
abundance_top_5.out.map{ it -> it[1] }.collectFile(name: params.collected_outputs_prefix + "_" + params.taxonomic_level + "_bracken_abundances_top_5.csv", storeDir: params.outdir, keepHeader: true, skip: 1, sort: { it -> it.readLines()[1] })
} else {
ch_abundances.map{ it -> it[1] }.collectFile(name: params.collected_outputs_prefix + "_" + params.taxonomic_level + "_kraken_abundances.csv", storeDir: params.outdir, keepHeader: true, skip: 1, sort: { it -> it.readLines()[1].split(',')[0] })
abundance_top_5_kraken.out.map{ it -> it[1] }.collectFile(name: params.collected_outputs_prefix + "_" + params.taxonomic_level + "_kraken_abundances_top_5.csv", storeDir: params.outdir, keepHeader: true, skip: 1, sort: { it -> it.readLines()[1] })
}
}

ch_provenance = fastp.out.provenance

ch_provenance = ch_provenance.join(kraken2.out.provenance).map{ it -> [it[0], [it[1], it[2]]] }
Expand Down
7 changes: 6 additions & 1 deletion modules/taxon_abundance.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ process fastp {
tuple val(sample_id), path(reads_1), path(reads_2)

output:
tuple val(sample_id), path("${sample_id}_fastp.{json,csv}"), emit: fastp_reports
tuple val(sample_id), path("${sample_id}_fastp.json"), emit: json
tuple val(sample_id), path("${sample_id}_fastp.csv"), emit: csv
tuple val(sample_id), path("${sample_id}_trimmed_R1.fastq.gz"), path("${sample_id}_trimmed_R2.fastq.gz"), emit: reads
tuple val(sample_id), path("${sample_id}_fastp_provenance.yml"), emit: provenance

Expand Down Expand Up @@ -77,15 +78,19 @@ process bracken {
-o ${sample_id}_${params.taxonomic_level}_bracken_abundances_unsorted.tsv \
-r ${params.read_length} \
-l ${params.taxonomic_level}
paste <(echo "sample_id") <(head -n 1 ${sample_id}_${params.taxonomic_level}_bracken_abundances_unsorted.tsv) | tr \$'\\t' ',' > bracken_abundances_header.csv
adjust_bracken_percentages_for_unclassified_reads.py \
-k ${kraken2_report} \
-b ${sample_id}_${params.taxonomic_level}_bracken.txt \
-a ${sample_id}_${params.taxonomic_level}_bracken_abundances_unsorted.tsv \
> ${sample_id}_${params.taxonomic_level}_bracken_abundances_unsorted_with_unclassified.csv
tail -n+2 ${sample_id}_${params.taxonomic_level}_bracken_abundances_unsorted_with_unclassified.csv | \
sort -t ',' -nrk 7,7 | \
awk -F ',' 'BEGIN {OFS=FS}; {print "${sample_id}",\$0}' > ${sample_id}_${params.taxonomic_level}_bracken_abundances_data.csv
cat bracken_abundances_header.csv ${sample_id}_${params.taxonomic_level}_bracken_abundances_data.csv > ${sample_id}_${params.taxonomic_level}_bracken_abundances.csv
"""
}
Expand Down
8 changes: 5 additions & 3 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ manifest {
description = 'Taxon Abundance'
mainScript = 'main.nf'
nextflowVersion = '>=20.01.0'
version = '0.1.4'
version = '0.1.5'
}

params {
Expand All @@ -14,15 +14,17 @@ params {
fastq_exts = ['.fastq.gz', '.fq.gz', '.fastq', '.fq']
fastq_search_path = makeFastqSearchPath( illumina_suffixes, fastq_exts )
samplesheet_input = 'NO_FILE'
kraken_db = '/data/ref_databases/kraken2/2021-05-17_standard'
bracken_db = '/data/ref_databases/kraken2/2021-05-17_standard'
kraken_db = '/data/ref_databases/kraken2/latest_standard'
bracken_db = '/data/ref_databases/kraken2/latest_standard'
confidence = 0.0
taxonomic_level = "S"
read_length = 150
versioned_outdir = false
extract_reads = false
extract_reads_threshold = 1.0
skip_bracken = false
collect_outputs = false
collected_outputs_prefix = 'collected'
pipeline_short_name = parsePipelineName(manifest.toMap().get('name'))
pipeline_minor_version = parseMinorVersion(manifest.toMap().get('version'))
}
Expand Down

0 comments on commit 26558ac

Please sign in to comment.