diff --git a/bin/DEA.R b/bin/DEA.R index b40dbfa7..ea5960b7 100755 --- a/bin/DEA.R +++ b/bin/DEA.R @@ -283,7 +283,7 @@ DESeq2 <- function(inputdata, data_type){ rm(tmp) dds <- DESeqDataSetFromMatrix( - countData=inputdata$circ, + countData=round(inputdata$circ), colData=inputdata$pheno, design = inputdata$design) diff --git a/bin/counts_combined.py b/bin/counts_combined.py index 99a9304e..f50fe3db 100755 --- a/bin/counts_combined.py +++ b/bin/counts_combined.py @@ -11,20 +11,18 @@ args = parser.parse_args() -columns = ['chr', 'start', 'end', 'strand', 'count', 'tools'] +columns = ['chr', 'start', 'end', 'name', 'count', 'strand'] dfs = {os.path.basename(bed).split('.')[0]: pd.read_csv(bed, sep='\t', header=None, - names=columns, - index_col=[0, 1, 2, 3]) - .drop('tools', axis=1) for bed in args.beds} + index_col=["chr", "start", "end", "strand"], + usecols=["chr", "start", "end", "strand", "count"], + names=columns) for bed in args.beds} dfs = [df.rename(columns={'count': sample}) for sample, df in dfs.items()] - df = pd.concat(dfs, axis=1) -df = df.fillna(0).astype(int) - -df.to_csv(args.out_bed, sep='\t') +df = df.fillna(0) +df.to_csv(args.out_bed, sep='\t', header=True, index=True) df.index = df.index.map(lambda x: f'{x[0]}:{x[1]}-{x[2]}:{x[3]}') df.index.name = 'ID' diff --git a/bin/merge_tools.py b/bin/merge_tools.py index fe81353d..32bacab1 100755 --- a/bin/merge_tools.py +++ b/bin/merge_tools.py @@ -21,4 +21,9 @@ 'tool_count': 'sum'}).reset_index() df = df[df['tool_count'] >= args.tool_filter] +df.drop('tool_count', axis=1, inplace=True) +df["name"] = df["chr"] + ":" + df["start"].astype(str) + "-" + df["end"].astype(str) + ":" + df["strand"] + +df = df[['chr', 'start', 'end', 'name', 'count', 'strand']] + df.to_csv(args.output, sep='\t', index=False, header=False) diff --git a/conf/modules.config b/conf/modules.config index 1d82c383..68535f27 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -562,6 +562,14 @@ if (!params.skip_trimming) { ] } + withName: UPSET_SAMPLES { + ext.when = { params.tool.split(',').length > 1 } + } + + withName: UPSET_ALL { + ext.when = { params.tool.split(',').length > 1 } + } + withName: INTERSECT_ANNOTATION { ext.args = "-loj" ext.suffix = "intersect.bed" @@ -601,6 +609,11 @@ if (!params.skip_trimming) { ext.suffix = "gtf" } + withName: EXCLUDE_OVERLONG_TRANSCRIPTS { + ext.args = "-v FS='\\t' -v OFS='\\t' '\$5-\$4 <= 10000 { print }'" + ext.suffix = "filtered.gtf" + } + withName: MARK_CIRCULAR { // GAWK process that marks FASTA headers. // Leaves headers starting with "ENS" and non-header lines as is. diff --git a/modules/local/annotation/full_annotation/main.nf b/modules/local/annotation/full_annotation/main.nf index 43030ca4..207cd463 100644 --- a/modules/local/annotation/full_annotation/main.nf +++ b/modules/local/annotation/full_annotation/main.nf @@ -1,5 +1,5 @@ process ANNOTATION { - tag "$meta.id" + tag "$meta.id:$meta.tool" label 'process_single' conda "bioconda::pandas=1.5.2" diff --git a/modules/local/quantification/merge_experiments/environment.yml b/modules/local/quantification/merge_experiments/environment.yml index 381dba4d..cfc21aba 100644 --- a/modules/local/quantification/merge_experiments/environment.yml +++ b/modules/local/quantification/merge_experiments/environment.yml @@ -6,4 +6,4 @@ channels: - bioconda - defaults dependencies: - - "bioconda::bioconductor-summarizedexperiment=1.32.0" + - "bioconda::bioconductor-rtracklayer==1.62.0--r43ha9d7317_0" diff --git a/modules/local/quantification/merge_experiments/main.nf b/modules/local/quantification/merge_experiments/main.nf index 367a3082..9e14df7e 100644 --- a/modules/local/quantification/merge_experiments/main.nf +++ b/modules/local/quantification/merge_experiments/main.nf @@ -4,12 +4,14 @@ process MERGE_EXPERIMENTS { conda "${moduleDir}/environment.yaml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bioconductor-summarizedexperiment:1.32.0--r43hdfd78af_0' : - 'biocontainers/bioconductor-summarizedexperiment:1.32.0--r43hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/bioconductor-rtracklayer:1.62.0--r43ha9d7317_0' : + 'biocontainers/bioconductor-rtracklayer:1.62.0--r43ha9d7317_0' }" input: - tuple val(meta), path(experiments) + tuple val(meta), path(experiments) tuple val(meta2), path(phenotype) + tuple val(meta3), path(gtf) + tuple val(meta4), path(tpm) output: tuple val(meta), path("${meta.id}.merged.rds"), emit: merged diff --git a/modules/local/quantification/merge_experiments/templates/merge_experiments.r b/modules/local/quantification/merge_experiments/templates/merge_experiments.r index 51b5b666..a166190b 100644 --- a/modules/local/quantification/merge_experiments/templates/merge_experiments.r +++ b/modules/local/quantification/merge_experiments/templates/merge_experiments.r @@ -6,6 +6,8 @@ paths <- c('${experiments.join("\', \'")}') experiments <- lapply(paths, readRDS) phenotype <- read.csv('${phenotype}', stringsAsFactors = FALSE) +annotation <- rtracklayer::import('${gtf}') +tpm <- read.table('${tpm}', header=TRUE, row.names=1)[, -1] se_assays <- list() @@ -37,6 +39,13 @@ for (col in colnames(colData(se))) { } } +# Add transcript annotation +annotation <- annotation[match(rownames(se), annotation\$transcript_id),] +rowData(se) <- annotation + +# Add TPM +assay(se, "tpm", withDimnames = FALSE) <- tpm[rownames(se), colData(se)\$names] + saveRDS(se, '${meta.id}.merged.rds') writeLines( diff --git a/modules/local/upset/main.nf b/modules/local/upset/main.nf new file mode 100644 index 00000000..08babf76 --- /dev/null +++ b/modules/local/upset/main.nf @@ -0,0 +1,22 @@ +process UPSET { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::python=3.8.3 conda-forge::numpy=1.20.* conda-forge::pandas=1.2.* conda-forge::upsetplot=0.4.4" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-f42a44964bca5225c7860882e231a7b5488b5485:47ef981087c59f79fdbcab4d9d7316e9ac2e688d-0' : + 'biocontainers/mulled-v2-f42a44964bca5225c7860882e231a7b5488b5485:47ef981087c59f79fdbcab4d9d7316e9ac2e688d-0' }" + input: + tuple val(meta), val(tools), path(beds) + + when: + task.ext.when == null || task.ext.when + + output: + tuple val(meta), path("*.png"), emit: plot + path "*.upset_mqc.json" , emit: multiqc + path "versions.yml" , emit: versions + + script: + template "upset.py" +} diff --git a/modules/local/upset/templates/upset.py b/modules/local/upset/templates/upset.py new file mode 100644 index 00000000..f0378bf8 --- /dev/null +++ b/modules/local/upset/templates/upset.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 + +import pandas as pd +import platform +import upsetplot +import matplotlib +import matplotlib.pyplot as plt +import distutils.version +import base64 +import json + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +df_tools = pd.DataFrame( + { + "tool": "${tools.join(' ')}".split(" "), + "file": "${beds.join(' ')}".split(" ") + } +) + +tool_files = df_tools.groupby("tool").agg(lambda x: x.tolist())["file"].to_dict() +tool_ids = {} + +for tool, files in tool_files.items(): + df_tool = pd.concat([pd.read_csv(f, sep="\\t", header=None) for f in files]) + tool_ids[tool] = set(df_tool[3].unique()) + +dataset = upsetplot.from_contents(tool_ids) + +upsetplot.plot(dataset, orientation='horizontal', show_counts=True) +plot_file = "${meta.id}.upset.png" +plt.savefig(plot_file) + +image_string = base64.b64encode(open(plot_file, "rb").read()).decode("utf-8") +image_html = f'
' + +multiqc = { + 'id': "${meta.id}_upset", + 'parent_id': "upset_plots", + 'parent_name': 'UpSet Plots', + 'parent_description': 'UpSet plots showing the overlap between tools for each sample', + 'section_name': 'UpSet: ${meta.id}', + 'description': 'UpSet plot showing the overlap between tools for sample ${meta.id}', + 'plot_type': 'image', + 'data': image_html +} + +with open("${meta.id}.upset_mqc.json", "w") as f: + f.write(json.dumps(multiqc, indent=4)) + +# Create version file +versions = { + "${task.process}" : { + "python": platform.python_version(), + "pandas": pd.__version__, + "upsetplot": upsetplot.__version__, + "matplotlib": matplotlib.__version__ + } +} + +with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) diff --git a/subworkflows/local/circrna_discovery.nf b/subworkflows/local/circrna_discovery.nf index 8ac26e69..35e2fe72 100644 --- a/subworkflows/local/circrna_discovery.nf +++ b/subworkflows/local/circrna_discovery.nf @@ -1,43 +1,45 @@ -include { ANNOTATION } from '../../modules/local/annotation/full_annotation/main' -include { GNU_SORT as COMBINE_ANNOTATION_BEDS } from '../../modules/nf-core/gnu/sort/main' -include { GNU_SORT as COMBINE_ANNOTATION_GTFS } from '../../modules/nf-core/gnu/sort/main' -include { GAWK as REMOVE_SCORE_STRAND } from '../../modules/nf-core/gawk/main' -include { BEDTOOLS_INTERSECT as INTERSECT_ANNOTATION } from '../../modules/nf-core/bedtools/intersect/main' -include { BOWTIE2_ALIGN as FIND_CIRC_ALIGN } from '../../modules/nf-core/bowtie2/align/main' -include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' -include { FIND_CIRC_ANCHORS } from '../../modules/local/find_circ/anchors/main' -include { FIND_CIRC } from '../../modules/local/find_circ/find_circ/main' -include { FIND_CIRC_FILTER } from '../../modules/local/find_circ/filter/main' -include { CIRIQUANT } from '../../modules/local/ciriquant/ciriquant/main' -include { CIRIQUANT_FILTER } from '../../modules/local/ciriquant/filter/main' -include { CIRCRNA_FINDER_FILTER } from '../../modules/local/circrna_finder/filter/main' -include { SEGEMEHL_ALIGN } from '../../modules/nf-core/segemehl/align/main' -include { SEGEMEHL_FILTER } from '../../modules/local/segemehl/filter/main' -include { STAR_ALIGN as STAR_1ST_PASS } from '../../modules/nf-core/star/align/main' -include { STAR_ALIGN as STAR_2ND_PASS } from '../../modules/nf-core/star/align/main' -include { SJDB as STAR_SJDB } from '../../modules/local/star/sjdb/main' -include { STAR_ALIGN as DCC_MATE1_1ST_PASS } from '../../modules/nf-core/star/align/main' -include { STAR_ALIGN as DCC_MATE1_2ND_PASS } from '../../modules/nf-core/star/align/main' -include { SJDB as DCC_MATE1_SJDB } from '../../modules/local/star/sjdb/main' -include { STAR_ALIGN as DCC_MATE2_1ST_PASS } from '../../modules/nf-core/star/align/main' -include { STAR_ALIGN as DCC_MATE2_2ND_PASS } from '../../modules/nf-core/star/align/main' -include { SJDB as DCC_MATE2_SJDB } from '../../modules/local/star/sjdb/main' -include { DCC } from '../../modules/local/dcc/dcc/main' -include { DCC_FILTER } from '../../modules/local/dcc/filter/main' -include { MAPSPLICE_ALIGN } from '../../modules/local/mapsplice/align/main' -include { FASTA } from '../../modules/local/fasta/main' -include { MERGE_TOOLS } from '../../modules/local/count_matrix/merge_tools/main' -include { COUNTS_COMBINED } from '../../modules/local/count_matrix/combined/main' -include { CIRCEXPLORER2_REFERENCE as CIRCEXPLORER2_REF } from '../../modules/local/circexplorer2/reference/main' -include { CIRCEXPLORER2_PARSE as CIRCEXPLORER2_PAR } from '../../modules/nf-core/circexplorer2/parse/main' -include { CIRCEXPLORER2_ANNOTATE as CIRCEXPLORER2_ANN } from '../../modules/nf-core/circexplorer2/annotate/main' -include { CIRCEXPLORER2_FILTER as CIRCEXPLORER2_FLT } from '../../modules/local/circexplorer2/filter/main' -include { CIRCEXPLORER2_REFERENCE as MAPSPLICE_REFERENCE } from '../../modules/local/circexplorer2/reference/main' -include { CIRCEXPLORER2_PARSE as MAPSPLICE_PARSE } from '../../modules/nf-core/circexplorer2/parse/main' -include { CIRCEXPLORER2_ANNOTATE as MAPSPLICE_ANNOTATE } from '../../modules/nf-core/circexplorer2/annotate/main' -include { CIRCEXPLORER2_FILTER as MAPSPLICE_FILTER } from '../../modules/local/circexplorer2/filter/main' +include { ANNOTATION } from '../../modules/local/annotation/full_annotation' +include { GNU_SORT as COMBINE_ANNOTATION_BEDS } from '../../modules/nf-core/gnu/sort' +include { GNU_SORT as COMBINE_ANNOTATION_GTFS } from '../../modules/nf-core/gnu/sort' +include { GAWK as REMOVE_SCORE_STRAND } from '../../modules/nf-core/gawk' +include { BEDTOOLS_INTERSECT as INTERSECT_ANNOTATION } from '../../modules/nf-core/bedtools/intersect' +include { BOWTIE2_ALIGN as FIND_CIRC_ALIGN } from '../../modules/nf-core/bowtie2/align' +include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index' +include { FIND_CIRC_ANCHORS } from '../../modules/local/find_circ/anchors' +include { FIND_CIRC } from '../../modules/local/find_circ/find_circ' +include { FIND_CIRC_FILTER } from '../../modules/local/find_circ/filter' +include { CIRIQUANT } from '../../modules/local/ciriquant/ciriquant' +include { CIRIQUANT_FILTER } from '../../modules/local/ciriquant/filter' +include { CIRCRNA_FINDER_FILTER } from '../../modules/local/circrna_finder/filter' +include { SEGEMEHL_ALIGN } from '../../modules/nf-core/segemehl/align' +include { SEGEMEHL_FILTER } from '../../modules/local/segemehl/filter' +include { STAR_ALIGN as STAR_1ST_PASS } from '../../modules/nf-core/star/align' +include { STAR_ALIGN as STAR_2ND_PASS } from '../../modules/nf-core/star/align' +include { SJDB as STAR_SJDB } from '../../modules/local/star/sjdb' +include { STAR_ALIGN as DCC_MATE1_1ST_PASS } from '../../modules/nf-core/star/align' +include { STAR_ALIGN as DCC_MATE1_2ND_PASS } from '../../modules/nf-core/star/align' +include { SJDB as DCC_MATE1_SJDB } from '../../modules/local/star/sjdb' +include { STAR_ALIGN as DCC_MATE2_1ST_PASS } from '../../modules/nf-core/star/align' +include { STAR_ALIGN as DCC_MATE2_2ND_PASS } from '../../modules/nf-core/star/align' +include { SJDB as DCC_MATE2_SJDB } from '../../modules/local/star/sjdb' +include { DCC } from '../../modules/local/dcc/dcc' +include { DCC_FILTER } from '../../modules/local/dcc/filter' +include { MAPSPLICE_ALIGN } from '../../modules/local/mapsplice/align' +include { FASTA } from '../../modules/local/fasta' +include { MERGE_TOOLS } from '../../modules/local/count_matrix/merge_tools' +include { COUNTS_COMBINED } from '../../modules/local/count_matrix/combined' +include { CIRCEXPLORER2_REFERENCE as CIRCEXPLORER2_REF } from '../../modules/local/circexplorer2/reference' +include { CIRCEXPLORER2_PARSE as CIRCEXPLORER2_PAR } from '../../modules/nf-core/circexplorer2/parse' +include { CIRCEXPLORER2_ANNOTATE as CIRCEXPLORER2_ANN } from '../../modules/nf-core/circexplorer2/annotate' +include { CIRCEXPLORER2_FILTER as CIRCEXPLORER2_FLT } from '../../modules/local/circexplorer2/filter' +include { CIRCEXPLORER2_REFERENCE as MAPSPLICE_REFERENCE } from '../../modules/local/circexplorer2/reference' +include { CIRCEXPLORER2_PARSE as MAPSPLICE_PARSE } from '../../modules/nf-core/circexplorer2/parse' +include { CIRCEXPLORER2_ANNOTATE as MAPSPLICE_ANNOTATE } from '../../modules/nf-core/circexplorer2/annotate' +include { CIRCEXPLORER2_FILTER as MAPSPLICE_FILTER } from '../../modules/local/circexplorer2/filter' +include { UPSET as UPSET_SAMPLES } from '../../modules/local/upset' +include { UPSET as UPSET_ALL } from '../../modules/local/upset' workflow CIRCRNA_DISCOVERY { @@ -59,6 +61,7 @@ workflow CIRCRNA_DISCOVERY { main: ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() fasta = ch_fasta.map{meta, fasta -> fasta} gtf = ch_gtf.map{meta, gtf -> gtf} @@ -201,24 +204,63 @@ workflow CIRCRNA_DISCOVERY { ch_versions = ch_versions.mix(MAPSPLICE_ANNOTATE.out.versions) ch_versions = ch_versions.mix(MAPSPLICE_FILTER.out.versions) + // + // COUNT MATRIX WORKFLOW: + // + + ch_matrix = CIRCEXPLORER2_FLT.out.matrix.mix(SEGEMEHL_FILTER.out.matrix, + CIRCRNA_FINDER_FILTER.out.matrix, + FIND_CIRC_FILTER.out.matrix, + CIRIQUANT_FILTER.out.matrix, + DCC_FILTER.out.matrix, + MAPSPLICE_FILTER.out.matrix) + + tools_selected = params.tool.split(',').collect{it.trim().toLowerCase()} + + MERGE_TOOLS( ch_matrix.map{ meta, bed -> [ [id: meta.id], bed ] }.groupTuple(), + tools_selected.size() > 1 ? tool_filter : 1, duplicates_fun ) + COUNTS_COMBINED( MERGE_TOOLS.out.merged.map{ meta, bed -> bed }.collect() ) + + ch_versions = ch_versions.mix(MERGE_TOOLS.out.versions) + ch_versions = ch_versions.mix(COUNTS_COMBINED.out.versions) + // // ANNOTATION WORKFLOW: // ch_biotypes = Channel.fromPath("${projectDir}/bin/unwanted_biotypes.txt") - circrna_filtered = CIRCEXPLORER2_FLT.out.results.mix(SEGEMEHL_FILTER.out.results, + circrna_tools = CIRCEXPLORER2_FLT.out.results.mix(SEGEMEHL_FILTER.out.results, CIRCRNA_FINDER_FILTER.out.results, FIND_CIRC_FILTER.out.results, CIRIQUANT_FILTER.out.results, DCC_FILTER.out.results, MAPSPLICE_FILTER.out.results) - INTERSECT_ANNOTATION( circrna_filtered.combine(gtf), [[], []]) + UPSET_SAMPLES( circrna_tools.map{ meta, bed -> [meta.id, meta.tool, bed]} + .groupTuple() + .map{ sample, tools, beds -> [[id: sample], tools, beds]} ) + UPSET_ALL( circrna_tools.map{ meta, bed -> ["all", meta.tool, bed] } + .groupTuple() + .map{ sample, tools, beds -> [[id: sample], tools, beds]} ) + + ch_multiqc_files = ch_multiqc_files.mix(UPSET_SAMPLES.out.multiqc) + ch_multiqc_files = ch_multiqc_files.mix(UPSET_ALL.out.multiqc) + ch_versions = ch_versions.mix(UPSET_SAMPLES.out.versions) + ch_versions = ch_versions.mix(UPSET_ALL.out.versions) + + circrna_incl_merged = circrna_tools.mix( + MERGE_TOOLS.out.merged.map{ meta, bed -> [meta + [tool: "merged"], bed] }) + + INTERSECT_ANNOTATION( circrna_incl_merged.combine(gtf), [[], []]) ANNOTATION( INTERSECT_ANNOTATION.out.intersect, exon_boundary ) - COMBINE_ANNOTATION_BEDS(ANNOTATION.out.bed.map{ meta, bed -> bed}.collect().map{[[id: "annotation"], it]}) + + ch_annotation_bed_merged = ANNOTATION.out.bed.filter{ meta, bed -> meta.tool == "merged" } + ch_annotation_gtf_merged = ANNOTATION.out.gtf.filter{ meta, gtf -> meta.tool == "merged" } + + COMBINE_ANNOTATION_BEDS(ch_annotation_bed_merged.map{ meta, bed -> bed}.collect().map{[[id: "annotation"], it]}) REMOVE_SCORE_STRAND( COMBINE_ANNOTATION_BEDS.out.sorted, []) - COMBINE_ANNOTATION_GTFS(ANNOTATION.out.gtf.map{ meta, gtf -> gtf}.collect().map{[[id: "annotation"], it]}) + COMBINE_ANNOTATION_GTFS(ch_annotation_gtf_merged.map{ meta, gtf -> gtf}.collect().map{[[id: "annotation"], it]}) ch_versions = ch_versions.mix(INTERSECT_ANNOTATION.out.versions) ch_versions = ch_versions.mix(ANNOTATION.out.versions) @@ -230,40 +272,18 @@ workflow CIRCRNA_DISCOVERY { // FASTA WORKFLOW: // - FASTA( ANNOTATION.out.bed, fasta ) + FASTA( ch_annotation_bed_merged, fasta ) ch_versions = ch_versions.mix(FASTA.out.versions) - // - // COUNT MATRIX WORKFLOW: - // - - ch_matrix = CIRCEXPLORER2_FLT.out.matrix.mix(SEGEMEHL_FILTER.out.matrix, - CIRCRNA_FINDER_FILTER.out.matrix, - FIND_CIRC_FILTER.out.matrix, - CIRIQUANT_FILTER.out.matrix, - DCC_FILTER.out.matrix, - MAPSPLICE_FILTER.out.matrix) - - tools_selected = params.tool.split(',').collect{it.trim().toLowerCase()} - - MERGE_TOOLS( ch_matrix.map{ meta, bed -> [ [id: meta.id], bed ] }.groupTuple(), - tools_selected.size() > 1 ? tool_filter : 1, duplicates_fun ) - - COUNTS_COMBINED( MERGE_TOOLS.out.merged.map{ meta, bed -> bed }.collect() ) - - counts_bed = COUNTS_COMBINED.out.counts_bed - counts_tsv = COUNTS_COMBINED.out.counts_tsv - ch_versions = ch_versions.mix(MERGE_TOOLS.out.versions) - ch_versions = ch_versions.mix(COUNTS_COMBINED.out.versions) - emit: - circrna_bed12 = ANNOTATION.out.bed + circrna_bed12 = ch_annotation_bed_merged fasta = FASTA.out.analysis_fasta annotation_bed = REMOVE_SCORE_STRAND.out.output annotation_gtf = COMBINE_ANNOTATION_GTFS.out.sorted - counts_bed - counts_tsv + counts_bed = COUNTS_COMBINED.out.counts_bed + counts_tsv = COUNTS_COMBINED.out.counts_tsv + multiqc_files = ch_multiqc_files versions = ch_versions } diff --git a/subworkflows/local/quantification.nf b/subworkflows/local/quantification.nf index 86cee630..ee9ad8be 100644 --- a/subworkflows/local/quantification.nf +++ b/subworkflows/local/quantification.nf @@ -1,4 +1,5 @@ include { GNU_SORT as COMBINE_TRANSCRIPTOME_GTFS } from '../../modules/nf-core/gnu/sort' +include { GAWK as EXCLUDE_OVERLONG_TRANSCRIPTS } from '../../modules/nf-core/gawk' include { TRANSCRIPTOME } from '../../modules/local/quantification/transcriptome' include { GAWK as MARK_CIRCULAR } from '../../modules/nf-core/gawk' include { PSIRC_INDEX } from '../../modules/local/psirc/index' @@ -33,13 +34,18 @@ workflow QUANTIFICATION { ch_gtf.mix(circ_annotation_gtf).map{meta, gtf -> gtf}.collect().map{[[id: "transcriptome"], it]}, ) - TRANSCRIPTOME(COMBINE_TRANSCRIPTOME_GTFS.out.sorted, ch_fasta) + EXCLUDE_OVERLONG_TRANSCRIPTS( + COMBINE_TRANSCRIPTOME_GTFS.out.sorted, [] + ) + + TRANSCRIPTOME(EXCLUDE_OVERLONG_TRANSCRIPTS.out.output, ch_fasta) MARK_CIRCULAR(TRANSCRIPTOME.out.transcriptome, []) ch_versions = ch_versions.mix( COMBINE_TRANSCRIPTOME_GTFS.out.versions, TRANSCRIPTOME.out.versions, - MARK_CIRCULAR.out.versions + MARK_CIRCULAR.out.versions, + EXCLUDE_OVERLONG_TRANSCRIPTS.out.versions ) PSIRC_INDEX(MARK_CIRCULAR.out.output) @@ -58,11 +64,6 @@ workflow QUANTIFICATION { "kallisto" ) - MERGE_EXPERIMENTS( - TXIMETA_TXIMETA.out.se.map{meta, se -> se}.collect().map{[[id: "experiments"], it]}, - ch_phenotype - ) - TXIMETA_TXIMPORT( PSIRC_QUANT.out.directory, CUSTOM_TX2GENE.out.tx2gene, @@ -74,8 +75,7 @@ workflow QUANTIFICATION { PSIRC_QUANT.out.versions, CUSTOM_TX2GENE.out.versions, TXIMETA_TXIMETA.out.versions, - TXIMETA_TXIMPORT.out.versions, - MERGE_EXPERIMENTS.out.versions + TXIMETA_TXIMPORT.out.versions ) JOIN_GENE_COUNTS( @@ -102,13 +102,22 @@ workflow QUANTIFICATION { JOIN_TX_TPM.out.csv ) + + MERGE_EXPERIMENTS( + TXIMETA_TXIMETA.out.se.map{meta, se -> se}.collect().map{[[id: "experiments"], it]}, + ch_phenotype, + EXCLUDE_OVERLONG_TRANSCRIPTS.out.output, + JOIN_TX_TPM.out.csv + ) + ch_versions = ch_versions.mix( JOIN_GENE_COUNTS.out.versions, JOIN_GENE_TPM.out.versions, JOIN_TX_COUNTS.out.versions, JOIN_TX_TPM.out.versions, SPLIT_TYPES_COUNTS.out.versions, - SPLIT_TYPES_TPM.out.versions + SPLIT_TYPES_TPM.out.versions, + MERGE_EXPERIMENTS.out.versions ) emit: diff --git a/workflows/circrna/main.nf b/workflows/circrna/main.nf index b8bfa62d..1b0f89cd 100644 --- a/workflows/circrna/main.nf +++ b/workflows/circrna/main.nf @@ -151,6 +151,7 @@ workflow CIRCRNA { params.exon_boundary ) + ch_multiqc_files = ch_multiqc_files.mix(CIRCRNA_DISCOVERY.out.multiqc_files) ch_versions = ch_versions.mix(CIRCRNA_DISCOVERY.out.versions) //