From 11c419506105c1cf6e7ab71b913fe5e87463db9d Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Thu, 21 Mar 2024 13:15:33 +0100
Subject: [PATCH 01/32] first steps & sc run

---
 modules.json                            | 30 +++++---
 modules/local/trust4.nf                 | 81 +++++++++++++++++++++
 nextflow.config                         |  9 ++-
 nextflow_schema.json                    | 60 +++++++++++++---
 subworkflows/local/fastq_input_check.nf | 26 ++++++-
 subworkflows/local/rnaseq_input.nf      | 94 +++++++++++++++++++++++++
 workflows/airrflow.nf                   | 31 +++++++-
 7 files changed, 307 insertions(+), 24 deletions(-)
 create mode 100644 modules/local/trust4.nf
 create mode 100644 subworkflows/local/rnaseq_input.nf

diff --git a/modules.json b/modules.json
index 19799719..cef85ae4 100644
--- a/modules.json
+++ b/modules.json
@@ -8,40 +8,54 @@
                     "cat/fastq": {
                         "branch": "master",
                         "git_sha": "02fd5bd7275abad27aad32d5c852e0a9b1b98882",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "cellranger/mkvdjref": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "cellranger/vdj": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "custom/dumpsoftwareversions": {
                         "branch": "master",
                         "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "fastp": {
                         "branch": "master",
                         "git_sha": "d497a4868ace3302016ea8ed4b395072d5e833cd",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "fastqc": {
                         "branch": "master",
                         "git_sha": "c9488585ce7bd35ccd2a30faa2371454c8112fb9",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     }
                 }
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/modules/local/trust4.nf b/modules/local/trust4.nf
new file mode 100644
index 00000000..9947484c
--- /dev/null
+++ b/modules/local/trust4.nf
@@ -0,0 +1,81 @@
+process TRUST4 {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "bioconda::trust4=1.0.13"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/trust4:1.0.13--h43eeafb_0':
+        'biocontainers/trust4:1.0.13--h43eeafb_0' }"
+
+    input:
+    tuple val(meta), path(bam), path(reads)
+    tuple val(meta2), path(fasta)
+    tuple val(meta3), path(vdj_reference)
+    
+    output:
+    tuple val(meta), path("*.tsv")              , emit: tsv
+    tuple val(meta), path("*_airr.tsv")         , emit: airr_files
+    tuple val(meta), path("${meta.id}_airr.tsv") , emit: airr_tsv
+    tuple val(meta), path("*_report.tsv")       , emit: report_tsv
+    tuple val(meta), path("*.fa")               , emit: fasta
+    tuple val(meta), path("*.out")              , emit: out
+    tuple val(meta), path("*.fq")               , emit: fq
+    path "versions.yml"                         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def bam_mode = bam ? "-b ${bam}" : ''
+    def single_end_mode = reads && meta.single_end ? "-u ${reads}" : ''
+    // reference is optional for fastq input
+    def reference = vdj_reference ? "--ref ${vdj_reference}" : ""
+    // separate forward from reverse pairs
+    def (forward, reverse) = reads.collate(2).transpose()
+    def paired_end_mode = reads && (meta.single_end == false) ? "-1 ${forward[0]} -2 ${reverse[0]}" : ''
+    def barcode = meta.barcode_read ? "--barcode ${meta.barcode_read}" : ''
+    def readFormat = params.read_format ? "--readFormat ${params.read_format}" : ''
+    def umi_position = meta.umi_position ? "--UMI ${meta.umi_position}" : ''
+
+    """
+    run-trust4 \\
+        ${bam_mode} \\
+        ${single_end_mode} \\
+        ${paired_end_mode} \\
+        ${barcode} \\
+        ${readFormat} \\
+        ${umi_position} \\
+        -t $task.cpus \\
+        -f ${fasta} \\
+        -o ${prefix} \\
+        ${reference} \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        trust4: \$(run-trust4 2>&1 | grep -o 'v[0-9.]*-r[0-9]*' | sed 's/^/TRUST4 using /' )
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}_airr.tsv
+    touch ${prefix}_airr_align.tsv
+    touch ${prefix}_report.tsv
+    touch ${prefix}_assembled_reads.fa
+    touch ${prefix}_annot.fa
+    touch ${prefix}_cdr3.out
+    touch ${prefix}_raw.out
+    touch ${prefix}_final.out
+    touch ${prefix}_toassemble.fq
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        trust4: \$(run-trust4 2>&1 | grep -o 'v[0-9.]*-r[0-9]*' | sed 's/^/TRUST4 using /' )
+    END_VERSIONS
+    """
+}
diff --git a/nextflow.config b/nextflow.config
index d84a0c59..e64b5769 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -31,7 +31,7 @@ params {
     primer_revpr = false
 
     // UMI and primer handling
-    umi_position = 'R1'
+    umi_position = null
     umi_length = -1
     umi_start = 0
 
@@ -121,6 +121,13 @@ params {
     // -----------------------
     reference_10x = null
 
+    // -----------------------
+    // raw RNA seq input options
+    // -----------------------
+    barcode_read = null
+    read_format = null
+    coord_fasta = null
+
 
     // -----------------------
     // generic nf-core options
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 4224ebd1..d9490390 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -45,7 +45,7 @@
                 },
                 "miairr": {
                     "type": "string",
-                    "default": "${projectDir}/assets/reveal/mapping_MiAIRR_BioSample_v1.3.1.tsv",
+                    "default": "/home/kymmp01/workdir/pipeline_dev/trust42airrflow/airrflow/assets/reveal/mapping_MiAIRR_BioSample_v1.3.1.tsv",
                     "description": "Path to MiAIRR-BioSample mapping",
                     "fa_icon": "fas fa-table"
                 }
@@ -61,7 +61,14 @@
                     "type": "string",
                     "fa_icon": "fas fa-flask",
                     "description": "Protocol used for the V(D)J amplicon sequencing library generation.",
-                    "enum": ["specific_pcr_umi", "specific_pcr", "dt_5p_race", "dt_5p_race_umi", "sc_10x_genomics"],
+                    "enum": [
+                        "specific_pcr_umi",
+                        "specific_pcr",
+                        "dt_5p_race",
+                        "dt_5p_race_umi",
+                        "sc_10x_genomics",
+                        "trust4"
+                    ],
                     "help_text": "Available protocols are:\n- `specific_pcr_umi`: RT-PCR using transcript-specific primers containing UMIs.\n- `specific_pcr`: RT-PCR using transcript-specific primers.\n- `dt_5p_race_umi`: 5\u2019-RACE PCR using oligo-dT primers and template switch primers containing UMI.\n- `dt_5p_race`: 5\u2019-RACE PCR (i.e. RT is followed by a template switch (TS) step) using oligo-dT primers.\n- `sc_10x_genomics`:10x genomics library preparation protocol for scVDJ sequencing."
                 },
                 "race_linker": {
@@ -336,19 +343,22 @@
                 "save_databases": {
                     "type": "boolean",
                     "description": "Save databases so you can use the cache in future runs.",
-                    "fa_icon": "fas fa-file-download"
+                    "fa_icon": "fas fa-file-download",
+                    "default": true
                 },
                 "imgtdb_base": {
                     "type": "string",
                     "description": "Path to the cached IMGT database.",
                     "help_text": "By default, we provide a pre-downloaded version of the IMGT database. It is also possible to provide a custom IMGT reference database. To fetch a fresh version of IMGT, set the `--fetch_imgt` parameter instead.",
-                    "fa_icon": "fas fa-database"
+                    "fa_icon": "fas fa-database",
+                    "default": "https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip"
                 },
                 "igblast_base": {
                     "type": "string",
                     "description": "Path to the cached igblast database.",
                     "help_text": "By default, we provide a pre-downloaded version of the IMGT database. It is also possible to provide a custom IMGT reference database. To fetch a fresh version of IMGT, set the `--fetch_imgt` parameter instead.",
-                    "fa_icon": "fas fa-database"
+                    "fa_icon": "fas fa-database",
+                    "default": "https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip"
                 },
                 "fetch_imgt": {
                     "type": "boolean",
@@ -464,6 +474,34 @@
             "help_text": "Options for running raw single cell data.",
             "fa_icon": "fab fa-pagelines"
         },
+        "rnaseq_based_analysis_options": {
+            "title": "RNA seq based analysis options",
+            "type": "object",
+            "description": "Options specific for raw RNA seq input.",
+            "default": "",
+            "properties": {
+                "barcode_read": {
+                    "type": "string",
+                    "description": "Specifies which read holds the barcodes",
+                    "enum": ["R1", "R2"],
+                    "fa_icon": "fas fa-terminal"
+                },
+                "read_format": {
+                    "type": "string",
+                    "description": "Specifies where in the read the barcodes and UMIs can be found.",
+                    "help_text": "For further information see the TRUST4 [docs](https://github.com/liulab-dfci/TRUST4?tab=readme-ov-file#10x-genomics-data-and-barcode-based-single-cell-data).",
+                    "fa_icon": "fas fa-terminal"
+                },
+                "coord_fasta": {
+                    "type": "string",
+                    "description": "path to the fasta file coordinate and sequence of V/D/J/C genes.",
+                    "help_text": "For further information see the TRUST4 [docs](https://github.com/liulab-dfci/TRUST4).",
+                    "fa_icon": "fas fa-database"
+                }
+            },
+            "help_text": "Options for running raw RNA seq data.",
+            "fa_icon": "fab fa-pagelines"
+        },
         "report_options": {
             "title": "Report options",
             "type": "object",
@@ -472,25 +510,25 @@
             "properties": {
                 "report_rmd": {
                     "type": "string",
-                    "default": "${projectDir}/assets/repertoire_comparison.Rmd",
+                    "default": "/home/kymmp01/workdir/pipeline_dev/trust42airrflow/airrflow/assets/repertoire_comparison.Rmd",
                     "description": "Custom report Rmarkdown file.",
                     "fa_icon": "far fa-file-code"
                 },
                 "report_css": {
                     "type": "string",
-                    "default": "${projectDir}/assets/nf-core_style.css",
+                    "default": "/home/kymmp01/workdir/pipeline_dev/trust42airrflow/airrflow/assets/nf-core_style.css",
                     "description": "Custom report style file in css format.",
                     "fa_icon": "far fa-file-code"
                 },
                 "report_logo": {
                     "type": "string",
-                    "default": "${projectDir}/assets/nf-core-airrflow_logo_light.png",
+                    "default": "/home/kymmp01/workdir/pipeline_dev/trust42airrflow/airrflow/assets/nf-core-airrflow_logo_light.png",
                     "description": "Custom logo for the report.",
                     "fa_icon": "far fa-file-code"
                 },
                 "report_logo_img": {
                     "type": "string",
-                    "default": "${projectDir}/assets/nf-core-airrflow_logo_reports.png",
+                    "default": "/home/kymmp01/workdir/pipeline_dev/trust42airrflow/airrflow/assets/nf-core-airrflow_logo_reports.png",
                     "description": "Custom logo for the EnchantR reports.",
                     "fa_icon": "far fa-file-code"
                 },
@@ -574,7 +612,6 @@
                 "config_profile_url": {
                     "type": "string",
                     "description": "Directory to keep pipeline Nextflow logs and reports.",
-                    "default": "${params.outdir}/pipeline_info",
                     "fa_icon": "fas fa-cogs",
                     "hidden": true
                 }
@@ -769,6 +806,9 @@
         {
             "$ref": "#/definitions/single_cell_analysis_options"
         },
+        {
+            "$ref": "#/definitions/rnaseq_based_analysis_options"
+        },
         {
             "$ref": "#/definitions/institutional_config_options"
         },
diff --git a/subworkflows/local/fastq_input_check.nf b/subworkflows/local/fastq_input_check.nf
index b5165871..bd46bae0 100644
--- a/subworkflows/local/fastq_input_check.nf
+++ b/subworkflows/local/fastq_input_check.nf
@@ -27,8 +27,8 @@ workflow FASTQ_INPUT_CHECK {
         .set { ch_reads }
     ch_versions = SAMPLESHEET_CHECK.out.versions
 
-    // Merge multi-lane sample fastq for protocols except for 10x genomics (cellranger handles multi-fastq per sample)
-    if (params.library_generation_method == 'sc_10x_genomics') {
+    // Merge multi-lane sample fastq for protocols except for 10x genomics, trust4 (cellranger handles multi-fastq per sample)
+    if (params.library_generation_method == 'sc_10x_genomics' || params.library_generation_method == 'trust4')  {
 
         ch_merged_reads = ch_reads.single.mix( ch_reads.multiple )
 
@@ -82,8 +82,28 @@ def create_fastq_channels(LinkedHashMap col) {
             error "ERROR: Please check input samplesheet -> Index read FastQ file does not exist!\n${col.filename_I1}"
         }
         array = [ meta, [ file(col.filename_R1), file(col.filename_R2), file(col.filename_I1) ] ]
-    } else {
+    }
+    if (params.library_generation_method == "trust4") {
+        if (params.barcode_read == "R1") {
 
+            meta.barcode_read = file(col.filename_R1).name
+        }
+        else if (params.barcode_read == "R2") {
+            meta.barcode_read = file(col.filename_R2).name
+        }
+        if (params.umi_position == "R1") {
+            meta.umi_position = file(col.filename_R1).name
+        }
+        else if (params.umi_position == "R2") {
+            meta.umi_position = file(col.filename_R2).name
+        }
+        else if (params.umi_position == "") {
+            meta.umi_position = null 
+        } else {
+            error "ERROR: UMI read must be specified as either R1 or R2!"
+        }
+        array = [ meta, [ file(col.filename_R1), file(col.filename_R2) ] ]
+    } else {
         array = [ meta, [ file(col.filename_R1), file(col.filename_R2) ] ]
         if (params.index_file) {
             error "ERROR: --index_file was provided but the index file path is not specified in the samplesheet!"
diff --git a/subworkflows/local/rnaseq_input.nf b/subworkflows/local/rnaseq_input.nf
new file mode 100644
index 00000000..64ff529a
--- /dev/null
+++ b/subworkflows/local/rnaseq_input.nf
@@ -0,0 +1,94 @@
+include { TRUST4                                                        } from '../../modules/local/trust4'
+include { FASTQ_INPUT_CHECK                                             } from '../../subworkflows/local/fastq_input_check'
+include { RENAME_FILE as RENAME_FILE_TSV                                } from '../../modules/local/rename_file'
+include { CHANGEO_CONVERTDB_FASTA as CHANGEO_CONVERTDB_FASTA_FROM_AIRR  } from '../../modules/local/changeo/changeo_convertdb_fasta'
+
+workflow RNASEQ_INPUT {
+
+    take:
+    ch_input
+
+    main:
+
+    ch_versions = Channel.empty()
+    ch_logs = Channel.empty()
+
+    //
+    // read in samplesheet, validate and stage input fies
+    //
+    FASTQ_INPUT_CHECK(
+        ch_input
+    )
+    ch_versions = ch_versions.mix(FASTQ_INPUT_CHECK.out.versions)
+
+    ch_reads = FASTQ_INPUT_CHECK.out.reads
+
+    // validate library generation method parameter
+    if (params.vprimers) {
+        error "The TRUST4 library generation method does not require V-region primers, please provide a reference file instead or select another library method option."
+    } else if (params.race_linker) {
+        error "The TRUST4 10X genomics library generation method does not require the --race_linker parameter, please provide a reference file instead or select another library method option."
+    }
+    if (params.cprimers)  {
+        error "The TRUST4 library generation method does not require C-region primers, please provide a reference file instead or select another library method option."
+    }
+    if (params.umi_length > 0)  {
+        error "TRUST4 library generation method does not require to set the UMI length, please provide a reference file instead or select another library method option."
+    }
+    if (params.reference_10x)  {
+        // necessary to allow tar.gz files as input so that tests can run
+        error "The TRUST4 library generation method does not require this reference, please provide a compliant reference file instead or select another library method option."
+    }
+    if (!params.coord_fasta) {
+        error "Please provide a reference file for the TRUST4 library generation method."
+    }
+    if (!params.read_format) {
+        error "Please provide a a read-format as seen in the TRUST4 docs for the TRUST4 library generation method."
+    }
+
+
+    ch_reads.map{ meta, input_file  ->
+        [ meta, [], input_file ] }
+    .set { ch_reads_new }
+
+    // ch_reads_new.view()
+
+    // if (params.vdj_reference != null) {
+    //     ch_vdjref = Channel.of([[], file(params.vdj_reference)])
+    // }
+    // else {
+    //     ch_vdjref = Channel.of([[], []])
+    // }
+
+    TRUST4(
+        ch_reads_new,
+        Channel.of([[], file(params.coord_fasta)]),
+        Channel.of([[], []])
+    )
+
+    ch_trust4_out = TRUST4.out.out
+    ch_trust4_airr = TRUST4.out.airr_tsv
+
+    // rename tsv file to unique name
+    RENAME_FILE_TSV(
+                ch_trust4_airr
+            )
+        .set { ch_renamed_tsv }
+
+    // convert airr tsv to fasta (cellranger does not create any fasta with clonotype information)
+    CHANGEO_CONVERTDB_FASTA_FROM_AIRR(
+                RENAME_FILE_TSV.out.file
+            )
+
+    ch_fasta = CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.fasta
+
+    emit:
+    versions = ch_versions
+    // complete trust4 output
+    outs = ch_trust4_out
+    // trust4 airr file
+    airr = ch_trust4_airr
+    // trust4 output converted to FASTA format
+    fasta = ch_fasta
+    samplesheet = FASTQ_INPUT_CHECK.out.samplesheet
+}
diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf
index ede383fa..a263e535 100644
--- a/workflows/airrflow.nf
+++ b/workflows/airrflow.nf
@@ -64,6 +64,7 @@ include { CLONAL_ANALYSIS               } from '../subworkflows/local/clonal_ana
 include { REPERTOIRE_ANALYSIS_REPORTING } from '../subworkflows/local/repertoire_analysis_reporting'
 include { SC_RAW_INPUT                  } from '../subworkflows/local/sc_raw_input'
 include { FASTQ_INPUT_CHECK             } from '../subworkflows/local/fastq_input_check'
+include { RNASEQ_INPUT                  } from '../subworkflows/local/rnaseq_input'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -122,7 +123,32 @@ workflow AIRRFLOW {
             ch_fastp_html                           = Channel.empty()
             ch_fastp_json                           = Channel.empty()
             ch_fastqc_postassembly_mqc              = Channel.empty()
-        } else {
+        
+        }  else if (params.library_generation_method == "trust4") {
+            // Extract VDJ sequences from "general" RNA seq data using TRUST4
+            RNASEQ_INPUT (
+                ch_input
+            )
+
+            ch_fasta                                = RNASEQ_INPUT.out.fasta
+            ch_versions                             = ch_versions.mix(RNASEQ_INPUT.out.versions)
+
+            ch_validated_samplesheet                = RNASEQ_INPUT.out.samplesheet.collect()
+
+            ch_presto_filterseq_logs                = Channel.empty()
+            ch_presto_maskprimers_logs              = Channel.empty()
+            ch_presto_pairseq_logs                  = Channel.empty()
+            ch_presto_clustersets_logs              = Channel.empty()
+            ch_presto_buildconsensus_logs           = Channel.empty()
+            ch_presto_postconsensus_pairseq_logs    = Channel.empty()
+            ch_presto_assemblepairs_logs            = Channel.empty()
+            ch_presto_collapseseq_logs              = Channel.empty()
+            ch_presto_splitseq_logs                 = Channel.empty()
+            ch_fastp_html                           = Channel.empty()
+            ch_fastp_json                           = Channel.empty()
+            ch_fastqc_postassembly_mqc              = Channel.empty()
+        }
+        else {
             // Perform sequence assembly if input type is fastq from bulk sequencing data
             SEQUENCE_ASSEMBLY(
                 ch_input,
@@ -187,6 +213,7 @@ workflow AIRRFLOW {
     } else {
         error "Mode parameter value not valid."
     }
+
     // Perform V(D)J annotation and filtering
     VDJ_ANNOTATION(
         ch_fasta,
@@ -238,7 +265,7 @@ workflow AIRRFLOW {
     ch_versions = ch_versions.mix( CLONAL_ANALYSIS.out.versions)
 
     if (!params.skip_report){
-        REPERTOIRE_ANALYSIS_REPORTING(
+        REPERTOIRE_ANALYSIS_REPORTING (
             ch_presto_filterseq_logs.collect().ifEmpty([]),
             ch_presto_maskprimers_logs.collect().ifEmpty([]),
             ch_presto_pairseq_logs.collect().ifEmpty([]),

From 6639f0fd53a0d98ead367ada9c9db16db2b08ded Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Mon, 25 Mar 2024 09:26:42 +0100
Subject: [PATCH 02/32] input change

---
 subworkflows/local/fastq_input_check.nf | 2 --
 subworkflows/local/rnaseq_input.nf      | 5 +----
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/subworkflows/local/fastq_input_check.nf b/subworkflows/local/fastq_input_check.nf
index bd46bae0..459a64e4 100644
--- a/subworkflows/local/fastq_input_check.nf
+++ b/subworkflows/local/fastq_input_check.nf
@@ -99,8 +99,6 @@ def create_fastq_channels(LinkedHashMap col) {
         }
         else if (params.umi_position == "") {
             meta.umi_position = null 
-        } else {
-            error "ERROR: UMI read must be specified as either R1 or R2!"
         }
         array = [ meta, [ file(col.filename_R1), file(col.filename_R2) ] ]
     } else {
diff --git a/subworkflows/local/rnaseq_input.nf b/subworkflows/local/rnaseq_input.nf
index 64ff529a..67663626 100644
--- a/subworkflows/local/rnaseq_input.nf
+++ b/subworkflows/local/rnaseq_input.nf
@@ -23,7 +23,7 @@ workflow RNASEQ_INPUT {
 
     ch_reads = FASTQ_INPUT_CHECK.out.reads
 
-    // validate library generation method parameter
+    // validate library generation method parameteç
     if (params.vprimers) {
         error "The TRUST4 library generation method does not require V-region primers, please provide a reference file instead or select another library method option."
     } else if (params.race_linker) {
@@ -42,9 +42,6 @@ workflow RNASEQ_INPUT {
     if (!params.coord_fasta) {
         error "Please provide a reference file for the TRUST4 library generation method."
     }
-    if (!params.read_format) {
-        error "Please provide a a read-format as seen in the TRUST4 docs for the TRUST4 library generation method."
-    }
 
 
     ch_reads.map{ meta, input_file  ->

From 2a42c32f4234e942b734abe9ce4508f97e56870c Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Wed, 10 Apr 2024 16:44:17 +0200
Subject: [PATCH 03/32] stage before pull

---
 modules/local/trust4.nf            | 18 ++++++++++--------
 subworkflows/local/rnaseq_input.nf |  4 +++-
 subworkflows/local/sc_raw_input.nf |  2 +-
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/modules/local/trust4.nf b/modules/local/trust4.nf
index 9947484c..5f5dacd3 100644
--- a/modules/local/trust4.nf
+++ b/modules/local/trust4.nf
@@ -13,14 +13,16 @@ process TRUST4 {
     tuple val(meta3), path(vdj_reference)
     
     output:
-    tuple val(meta), path("*.tsv")              , emit: tsv
-    tuple val(meta), path("*_airr.tsv")         , emit: airr_files
-    tuple val(meta), path("${meta.id}_airr.tsv") , emit: airr_tsv
-    tuple val(meta), path("*_report.tsv")       , emit: report_tsv
-    tuple val(meta), path("*.fa")               , emit: fasta
-    tuple val(meta), path("*.out")              , emit: out
-    tuple val(meta), path("*.fq")               , emit: fq
-    path "versions.yml"                         , emit: versions
+    tuple val(meta), path("*.tsv")                          , emit: tsv
+    tuple val(meta), path("*_airr.tsv")                     , emit: airr_files
+    tuple val(meta), path("${meta.id}_airr.tsv")            , emit: airr_tsv
+    tuple val(meta), path("${meta.id}_barcode_airr.tsv")    , emit: barcode_airr
+    tuple val(meta), path("*_report.tsv")                   , emit: report_tsv
+    tuple val(meta), path("*.fa")                           , emit: fasta
+    tuple val(meta), path("*.out")                          , emit: out
+    tuple val(meta), path("*.fq")                           , emit: fq
+    tuple val(meta), path("**")                             , emit: outs
+    path "versions.yml"                                     , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
diff --git a/subworkflows/local/rnaseq_input.nf b/subworkflows/local/rnaseq_input.nf
index 67663626..0ee851c9 100644
--- a/subworkflows/local/rnaseq_input.nf
+++ b/subworkflows/local/rnaseq_input.nf
@@ -64,8 +64,10 @@ workflow RNASEQ_INPUT {
     )
 
     ch_trust4_out = TRUST4.out.out
-    ch_trust4_airr = TRUST4.out.airr_tsv
+    //ch_trust4_airr = TRUST4.out.airr_tsv
+    ch_trust4_airr = TRUST4.out.barcode_airr
 
+    ch_trust4_airr.view()
     // rename tsv file to unique name
     RENAME_FILE_TSV(
                 ch_trust4_airr
diff --git a/subworkflows/local/sc_raw_input.nf b/subworkflows/local/sc_raw_input.nf
index 8f46cbfd..22976dea 100644
--- a/subworkflows/local/sc_raw_input.nf
+++ b/subworkflows/local/sc_raw_input.nf
@@ -65,7 +65,7 @@ workflow SC_RAW_INPUT {
                 [ meta, out_files.find { it.endsWith("airr_rearrangement.tsv") } ]
             }
         .set { ch_cellranger_airr }
-
+    
     // TODO : add VALIDATE_INPUT Module
     // this module requires input in csv format... Might need to create this in an extra module
 

From a5af20285407ef47cba07af009cd32b92e7b15ee Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Thu, 11 Apr 2024 14:10:25 +0200
Subject: [PATCH 04/32] barcode airr test

---
 subworkflows/local/rnaseq_input.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/rnaseq_input.nf b/subworkflows/local/rnaseq_input.nf
index 0ee851c9..8cc53156 100644
--- a/subworkflows/local/rnaseq_input.nf
+++ b/subworkflows/local/rnaseq_input.nf
@@ -64,7 +64,7 @@ workflow RNASEQ_INPUT {
     )
 
     ch_trust4_out = TRUST4.out.out
-    //ch_trust4_airr = TRUST4.out.airr_tsv
+    // ch_trust4_airr = TRUST4.out.airr_tsv
     ch_trust4_airr = TRUST4.out.barcode_airr
 
     ch_trust4_airr.view()

From 6242a6f605b244ed8f8a0d44edf9ffe9d3c5ce18 Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Tue, 7 May 2024 11:08:28 +0200
Subject: [PATCH 05/32] skip presto reporting which doesnt happen when trust4
 is used

---
 subworkflows/local/repertoire_analysis_reporting.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/repertoire_analysis_reporting.nf b/subworkflows/local/repertoire_analysis_reporting.nf
index 2a796751..905c2d85 100644
--- a/subworkflows/local/repertoire_analysis_reporting.nf
+++ b/subworkflows/local/repertoire_analysis_reporting.nf
@@ -30,7 +30,7 @@ workflow REPERTOIRE_ANALYSIS_REPORTING {
     main:
     ch_versions = Channel.empty()
 
-    if (params.mode == "fastq" && params.library_generation_method != "sc_10x_genomics") {
+    if (params.mode == "fastq" && params.library_generation_method != "sc_10x_genomics" && params.library_generation_method != "trust4" ) {
         PARSE_LOGS(
             ch_presto_filterseq_logs,
             ch_presto_maskprimers_logs,

From 761eccc2cb78e2050daeba7526166625bcb4108f Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Wed, 8 May 2024 15:16:45 +0200
Subject: [PATCH 06/32] linting

---
 modules.json                                       | 14 ++++----------
 subworkflows/nf-core/utils_nfcore_pipeline/main.nf |  8 +++++++-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/modules.json b/modules.json
index d2ed633e..15b7dc89 100644
--- a/modules.json
+++ b/modules.json
@@ -8,23 +8,17 @@
                     "cat/fastq": {
                         "branch": "master",
                         "git_sha": "02fd5bd7275abad27aad32d5c852e0a9b1b98882",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "cellranger/mkvdjref": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "cellranger/vdj": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "fastp": {
                         "branch": "master",
@@ -52,7 +46,7 @@
                     },
                     "utils_nfcore_pipeline": {
                         "branch": "master",
-                        "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
+                        "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3",
                         "installed_by": ["subworkflows"]
                     },
                     "utils_nfvalidation_plugin": {
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
index a8b55d6f..14558c39 100644
--- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
@@ -65,9 +65,15 @@ def checkProfileProvided(nextflow_cli_args) {
 // Citation string for pipeline
 //
 def workflowCitation() {
+    def temp_doi_ref = ""
+    String[] manifest_doi = workflow.manifest.doi.tokenize(",")
+    // Using a loop to handle multiple DOIs
+    // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers
+    // Removing ` ` since the manifest.doi is a string and not a proper list
+    for (String doi_ref: manifest_doi) temp_doi_ref += "  https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n"
     return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
         "* The pipeline\n" +
-        "  ${workflow.manifest.doi}\n\n" +
+        temp_doi_ref + "\n" +
         "* The nf-core framework\n" +
         "  https://doi.org/10.1038/s41587-020-0439-x\n\n" +
         "* Software dependencies\n" +

From 2da37e203a987da3c1d4920f4614927a0f9e8742 Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Wed, 8 May 2024 16:37:42 +0200
Subject: [PATCH 07/32] fastp before trust4

---
 modules/local/rename_fastq_trust4.nf | 24 ++++++++++++++++++++
 subworkflows/local/rnaseq_input.nf   | 34 ++++++++++++++++++++++++----
 2 files changed, 54 insertions(+), 4 deletions(-)
 create mode 100644 modules/local/rename_fastq_trust4.nf

diff --git a/modules/local/rename_fastq_trust4.nf b/modules/local/rename_fastq_trust4.nf
new file mode 100644
index 00000000..58e9d7a7
--- /dev/null
+++ b/modules/local/rename_fastq_trust4.nf
@@ -0,0 +1,24 @@
+// Import generic module functions
+process RENAME_FASTQ_TRUST4 {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "conda-forge::python=3.8.0 conda-forge::biopython=1.74"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-adc9bb9edc31eb38b3c24786a83b7dfa530e2bea:47d6d7765d7537847ced7dac873190d164146022-0' :
+        'biocontainers/mulled-v2-adc9bb9edc31eb38b3c24786a83b7dfa530e2bea:47d6d7765d7537847ced7dac873190d164146022-0' }"
+
+    input:
+    tuple val(meta), path(R1), path(R2)
+    tuple val(meta_2), path(orig_r1), path(orig_r2)
+
+    output:
+    tuple val(meta), path(orig_r1), path(orig_r2) , emit: reads
+
+    script:
+    """
+    mv ${R1} ${orig_r1}
+    mv ${R2} ${orig_r2}
+
+    """
+}
diff --git a/subworkflows/local/rnaseq_input.nf b/subworkflows/local/rnaseq_input.nf
index 8cc53156..f0b89a2c 100644
--- a/subworkflows/local/rnaseq_input.nf
+++ b/subworkflows/local/rnaseq_input.nf
@@ -2,6 +2,10 @@ include { TRUST4                                                        } from '
 include { FASTQ_INPUT_CHECK                                             } from '../../subworkflows/local/fastq_input_check'
 include { RENAME_FILE as RENAME_FILE_TSV                                } from '../../modules/local/rename_file'
 include { CHANGEO_CONVERTDB_FASTA as CHANGEO_CONVERTDB_FASTA_FROM_AIRR  } from '../../modules/local/changeo/changeo_convertdb_fasta'
+include { FASTP                                                         } from '../../modules/nf-core/fastp/main'
+include { RENAME_FASTQ_TRUST4                                           } from '../../modules/local/rename_fastq_trust4'
+
+
 
 workflow RNASEQ_INPUT {
 
@@ -43,10 +47,32 @@ workflow RNASEQ_INPUT {
         error "Please provide a reference file for the TRUST4 library generation method."
     }
 
+    // Fastp
+    save_merged = false
+    FASTP (
+        ch_reads,
+        [],
+        [],
+        save_merged
+    )
+    ch_versions = ch_versions.mix(FASTP.out.versions)
+    
+    ch_rename_fastq = FASTP.out.reads.map { meta, reads -> [meta, reads[0], reads[1]] }
+    ch_rename_original = ch_reads.map{ meta,reads -> [meta, reads[0], reads[1]] }
+
+    RENAME_FASTQ_TRUST4(
+        ch_rename_fastq,
+        ch_rename_original
+    )
+
+    ch_reads_fastp_filtered = RENAME_FASTQ_TRUST4.out.reads
+
+    ch_reads_fastp_filtered.view()
+
 
-    ch_reads.map{ meta, input_file  ->
-        [ meta, [], input_file ] }
-    .set { ch_reads_new }
+    ch_reads_fastp_filtered.map{ meta, read_1, read_2  ->
+        [ meta, [], [read_1, read_2] ] }
+    .set { ch_reads_trust4 }
 
     // ch_reads_new.view()
 
@@ -58,7 +84,7 @@ workflow RNASEQ_INPUT {
     // }
 
     TRUST4(
-        ch_reads_new,
+        ch_reads_trust4,
         Channel.of([[], file(params.coord_fasta)]),
         Channel.of([[], []])
     )

From d262a387693739f13e77051c849a9b75429bfd1e Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Fri, 10 May 2024 10:07:12 +0200
Subject: [PATCH 08/32] fastp before trust4

---
 modules/local/rename_fastq_trust4.nf | 1 -
 subworkflows/local/rnaseq_input.nf   | 4 ++++
 workflows/airrflow.nf                | 4 ++--
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/modules/local/rename_fastq_trust4.nf b/modules/local/rename_fastq_trust4.nf
index 58e9d7a7..5ce36d63 100644
--- a/modules/local/rename_fastq_trust4.nf
+++ b/modules/local/rename_fastq_trust4.nf
@@ -19,6 +19,5 @@ process RENAME_FASTQ_TRUST4 {
     """
     mv ${R1} ${orig_r1}
     mv ${R2} ${orig_r2}
-
     """
 }
diff --git a/subworkflows/local/rnaseq_input.nf b/subworkflows/local/rnaseq_input.nf
index f0b89a2c..b90aba92 100644
--- a/subworkflows/local/rnaseq_input.nf
+++ b/subworkflows/local/rnaseq_input.nf
@@ -60,6 +60,7 @@ workflow RNASEQ_INPUT {
     ch_rename_fastq = FASTP.out.reads.map { meta, reads -> [meta, reads[0], reads[1]] }
     ch_rename_original = ch_reads.map{ meta,reads -> [meta, reads[0], reads[1]] }
 
+    // need to rename to input names in case barcodes are present
     RENAME_FASTQ_TRUST4(
         ch_rename_fastq,
         ch_rename_original
@@ -109,6 +110,9 @@ workflow RNASEQ_INPUT {
 
     emit:
     versions = ch_versions
+    // fastp
+    fastp_reads_json = FASTP.out.json.collect{ meta,json -> json }
+    fastp_reads_html = FASTP.out.html.collect{ meta,html -> html }
     // complete trust4 output
     outs = ch_trust4_out
     // trust4 airr file
diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf
index a975d802..cbd2fab7 100644
--- a/workflows/airrflow.nf
+++ b/workflows/airrflow.nf
@@ -128,8 +128,8 @@ workflow AIRRFLOW {
             ch_presto_assemblepairs_logs            = Channel.empty()
             ch_presto_collapseseq_logs              = Channel.empty()
             ch_presto_splitseq_logs                 = Channel.empty()
-            ch_fastp_html                           = Channel.empty()
-            ch_fastp_json                           = Channel.empty()
+            ch_fastp_html                           = RNASEQ_INPUT.out.fastp_reads_html
+            ch_fastp_json                           = RNASEQ_INPUT.out.fastp_reads_json
             ch_fastqc_postassembly_mqc              = Channel.empty()
         }
         else {

From 50418e1a70ae17bef71c358be0d06a58d9ad1a52 Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Thu, 16 May 2024 08:18:24 +0200
Subject: [PATCH 09/32] bulk and sc rnaseq input

---
 modules/local/trust4.nf            |  1 -
 subworkflows/local/rnaseq_input.nf | 54 +++++++++++++++++-------------
 workflows/airrflow.nf              |  1 +
 3 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/modules/local/trust4.nf b/modules/local/trust4.nf
index 5f5dacd3..c3189e8f 100644
--- a/modules/local/trust4.nf
+++ b/modules/local/trust4.nf
@@ -16,7 +16,6 @@ process TRUST4 {
     tuple val(meta), path("*.tsv")                          , emit: tsv
     tuple val(meta), path("*_airr.tsv")                     , emit: airr_files
     tuple val(meta), path("${meta.id}_airr.tsv")            , emit: airr_tsv
-    tuple val(meta), path("${meta.id}_barcode_airr.tsv")    , emit: barcode_airr
     tuple val(meta), path("*_report.tsv")                   , emit: report_tsv
     tuple val(meta), path("*.fa")                           , emit: fasta
     tuple val(meta), path("*.out")                          , emit: out
diff --git a/subworkflows/local/rnaseq_input.nf b/subworkflows/local/rnaseq_input.nf
index b90aba92..876b1dfa 100644
--- a/subworkflows/local/rnaseq_input.nf
+++ b/subworkflows/local/rnaseq_input.nf
@@ -27,7 +27,8 @@ workflow RNASEQ_INPUT {
 
     ch_reads = FASTQ_INPUT_CHECK.out.reads
 
-    // validate library generation method parameteç
+
+    // validate library generation method parameters
     if (params.vprimers) {
         error "The TRUST4 library generation method does not require V-region primers, please provide a reference file instead or select another library method option."
     } else if (params.race_linker) {
@@ -40,12 +41,17 @@ workflow RNASEQ_INPUT {
         error "TRUST4 library generation method does not require to set the UMI length, please provide a reference file instead or select another library method option."
     }
     if (params.reference_10x)  {
-        // necessary to allow tar.gz files as input so that tests can run
         error "The TRUST4 library generation method does not require this reference, please provide a compliant reference file instead or select another library method option."
     }
     if (!params.coord_fasta) {
         error "Please provide a reference file for the TRUST4 library generation method."
     }
+    else {
+        ch_reads.map {
+            meta, reads -> [meta, file(params.coord_fasta)]
+        }
+        .set { ch_coord_fasta }
+    }
 
     // Fastp
     save_merged = false
@@ -68,33 +74,33 @@ workflow RNASEQ_INPUT {
 
     ch_reads_fastp_filtered = RENAME_FASTQ_TRUST4.out.reads
 
-    ch_reads_fastp_filtered.view()
-
-
-    ch_reads_fastp_filtered.map{ meta, read_1, read_2  ->
-        [ meta, [], [read_1, read_2] ] }
-    .set { ch_reads_trust4 }
-
-    // ch_reads_new.view()
-
-    // if (params.vdj_reference != null) {
-    //     ch_vdjref = Channel.of([[], file(params.vdj_reference)])
-    // }
-    // else {
-    //     ch_vdjref = Channel.of([[], []])
-    // }
+    // create trust4 input
+    ch_reads_trust4 = ch_reads_fastp_filtered.map{ meta, read_1, read_2  -> [ meta, [], [read_1, read_2] ] }
 
     TRUST4(
         ch_reads_trust4,
-        Channel.of([[], file(params.coord_fasta)]),
-        Channel.of([[], []])
+        ch_coord_fasta,
+        Channel.of([[], []]).collect()
     )
 
-    ch_trust4_out = TRUST4.out.out
-    // ch_trust4_airr = TRUST4.out.airr_tsv
-    ch_trust4_airr = TRUST4.out.barcode_airr
+    ch_trust4_out = TRUST4.out.outs
 
-    ch_trust4_airr.view()
+    // check whether input is sc or bulk and extract respective airr file for downstream processing
+    ch_trust4_out
+        .branch {
+            meta, out_files ->
+                bulk : meta["single_cell"] == "false"
+                    return [ meta, out_files.find { it.endsWith("${meta.id}_airr.tsv") } ]
+                sc : meta["single_cell"] == "true"
+                    return [ meta, out_files.find { it.endsWith("${meta.id}_barcode_airr.tsv") } ]
+        }
+        .set { ch_trust4_airr_file }
+
+    
+    // create channel with airr file
+    ch_trust4_airr_file.bulk.mix ( ch_trust4_airr_file.sc ).set { ch_trust4_airr }
+        
+    
     // rename tsv file to unique name
     RENAME_FILE_TSV(
                 ch_trust4_airr
@@ -108,6 +114,7 @@ workflow RNASEQ_INPUT {
 
     ch_fasta = CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.fasta
 
+
     emit:
     versions = ch_versions
     // fastp
@@ -120,4 +127,5 @@ workflow RNASEQ_INPUT {
     // trust4 output converted to FASTA format
     fasta = ch_fasta
     samplesheet = FASTQ_INPUT_CHECK.out.samplesheet
+    
 }
diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf
index cbd2fab7..72a79530 100644
--- a/workflows/airrflow.nf
+++ b/workflows/airrflow.nf
@@ -110,6 +110,7 @@ workflow AIRRFLOW {
         
         }  else if (params.library_generation_method == "trust4") {
             // Extract VDJ sequences from "general" RNA seq data using TRUST4
+
             RNASEQ_INPUT (
                 ch_input
             )

From c6f95bf1669c814df18f506991bbaccb715741b2 Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Thu, 16 May 2024 08:19:09 +0200
Subject: [PATCH 10/32] bulk rnaseq test workflow

---
 .github/workflows/ci.yml     |  1 +
 conf/test_rnaseq_bulk.config | 27 +++++++++++++++++++++++++++
 nextflow.config              |  2 ++
 3 files changed, 30 insertions(+)
 create mode 100644 conf/test_rnaseq_bulk.config

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 515482aa..84d26cb5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -62,6 +62,7 @@ jobs:
             "test_10x_sc",
             "test_clontech_umi",
             "test_nebnext_umi",
+            "test_rnaseq_bulk",
           ]
       fail-fast: false
     steps:
diff --git a/conf/test_rnaseq_bulk.config b/conf/test_rnaseq_bulk.config
new file mode 100644
index 00000000..2dd6740e
--- /dev/null
+++ b/conf/test_rnaseq_bulk.config
@@ -0,0 +1,27 @@
+/*
+ * -------------------------------------------------
+ *  Nextflow config file for running tests
+ * -------------------------------------------------
+ * Defines bundled input files and everything required
+ * to run a fast and simple test. Use as follows:
+ *   nextflow run nf-core/airrflow -profile test_10x_sc,<docker/singularity>
+ */
+
+params {
+    config_profile_name = 'Test bulk RNA-seq based workflow using TRUST4'
+    config_profile_description = 'Minimal test dataset to check pipeline function with raw bulk RNA-seq data'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus = 2
+    max_memory = 6.GB
+    max_time = 48.h
+
+    // params
+    mode = 'fastq'
+    library_generation_method = 'trust4'
+    clonal_threshold = 0
+
+    // Input data
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-rnaseq/rnaseq_metadata.tsv'
+    coord_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-rnaseq/IMGT+C.fa'
+}
diff --git a/nextflow.config b/nextflow.config
index 559865be..7db8af95 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -306,10 +306,12 @@ profiles {
     test_10x_sc { includeConfig 'conf/test_10x_sc.config' }
     test_clontech_umi { includeConfig 'conf/test_clontech_umi.config' }
     test_nebnext_umi { includeConfig 'conf/test_nebnext_umi.config' }
+    test_rnaseq_bulk { includeConfig 'conf/test_rnaseq_bulk.config' }
     nebnext_umi_tcr { includeConfig 'conf/nebnext_umi_tcr.config' }
     nebnext_umi_bcr { includeConfig 'conf/nebnext_umi_bcr.config' }
     clontech_umi_bcr { includeConfig 'conf/clontech_umi_bcr.config' }
     clontech_umi_tcr { includeConfig 'conf/clontech_umi_tcr.config' }
+    
 }
 
 // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile

From c72146c931bbada00cfee68c6b27970594d1cc39 Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Thu, 16 May 2024 08:55:07 +0200
Subject: [PATCH 11/32] sc rnaseq tests

---
 .github/workflows/ci.yml     |  1 +
 conf/test_rnaseq_bulk.config |  2 +-
 conf/test_rnaseq_sc.config   | 31 +++++++++++++++++++++++++++++++
 nextflow.config              |  1 +
 4 files changed, 34 insertions(+), 1 deletion(-)
 create mode 100644 conf/test_rnaseq_sc.config

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 84d26cb5..6a3ffa98 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -63,6 +63,7 @@ jobs:
             "test_clontech_umi",
             "test_nebnext_umi",
             "test_rnaseq_bulk",
+            "test_rnaseq_sc",
           ]
       fail-fast: false
     steps:
diff --git a/conf/test_rnaseq_bulk.config b/conf/test_rnaseq_bulk.config
index 2dd6740e..83d0eaf2 100644
--- a/conf/test_rnaseq_bulk.config
+++ b/conf/test_rnaseq_bulk.config
@@ -4,7 +4,7 @@
  * -------------------------------------------------
  * Defines bundled input files and everything required
  * to run a fast and simple test. Use as follows:
- *   nextflow run nf-core/airrflow -profile test_10x_sc,<docker/singularity>
+ *   nextflow run nf-core/airrflow -profile test_rnaseq_bulk,<docker/singularity>
  */
 
 params {
diff --git a/conf/test_rnaseq_sc.config b/conf/test_rnaseq_sc.config
new file mode 100644
index 00000000..0ca7618f
--- /dev/null
+++ b/conf/test_rnaseq_sc.config
@@ -0,0 +1,31 @@
+/*
+ * -------------------------------------------------
+ *  Nextflow config file for running tests
+ * -------------------------------------------------
+ * Defines bundled input files and everything required
+ * to run a fast and simple test. Use as follows:
+ *   nextflow run nf-core/airrflow -profile test_rnaseq_sc,<docker/singularity>
+ */
+
+params {
+    config_profile_name = 'Test single-cell RNA-seq based workflow using TRUST4'
+    config_profile_description = 'Minimal test dataset to check pipeline function with raw single-cell RNA-seq data'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus = 2
+    max_memory = 6.GB
+    max_time = 48.h
+
+    // params
+    mode = 'fastq'
+    library_generation_method = 'trust4'
+    clonal_threshold = 0
+    barcode_read = R1
+    umi_position = R1
+    read_format = "bc:0:15,um:16:27"
+    skip_lineage = True
+
+    // Input data
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-rnaseq/sc_rnaseq_metadata.tsv'
+    coord_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-rnaseq/IMGT+C.fa'
+}
diff --git a/nextflow.config b/nextflow.config
index 7db8af95..4384b769 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -307,6 +307,7 @@ profiles {
     test_clontech_umi { includeConfig 'conf/test_clontech_umi.config' }
     test_nebnext_umi { includeConfig 'conf/test_nebnext_umi.config' }
     test_rnaseq_bulk { includeConfig 'conf/test_rnaseq_bulk.config' }
+    test_rnaseq_sc { includeConfig 'conf/test_rnaseq_sc.config' }
     nebnext_umi_tcr { includeConfig 'conf/nebnext_umi_tcr.config' }
     nebnext_umi_bcr { includeConfig 'conf/nebnext_umi_bcr.config' }
     clontech_umi_bcr { includeConfig 'conf/clontech_umi_bcr.config' }

From 7746378fea3062930c677aaccd13f6db61fd35d7 Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Thu, 16 May 2024 12:50:12 +0200
Subject: [PATCH 12/32] docs

---
 docs/usage.md        | 50 +++++++++++++++++++++++++++++++++++++++++---
 nextflow_schema.json |  4 ++--
 2 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 69b7696b..c854811c 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -42,13 +42,13 @@ nextflow run nf-core/airrflow \
 A typical command to run the pipeline from **single cell raw fastq files** is:
 
 ```bash
-nextflow run nf-core/airrflow -r dev \
+nextflow run nf-core/airrflow \
 -profile <docker/singularity/podman/shifter/charliecloud/conda/institute> \
 --mode fastq \
 --input input_samplesheet.tsv \
 --library_generation_method sc_10x_genomics \
 --reference_10x reference/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0.tar.gz \
---outdir ./results
+--outdir results
 ```
 
 A typical command for running the pipeline departing from **single-cell AIRR rearrangement tables or assembled bulk sequencing fasta** data is:
@@ -123,7 +123,7 @@ If you wish to share such profile (such as upload as supplementary material for
 
 ## Input samplesheet
 
-### Fastq input samplesheet (bulk sequencing)
+### Fastq input samplesheet (bulk AIRR and bulk/sc RNA sequencing)
 
 The required input file for processing raw BCR or TCR bulk targeted sequencing data is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename_R1`, `filename_R2`, `subject_id`, `species`, `tissue`, `pcr_target_locus`, `single_cell`, `sex`, `age` and `biomaterial_provider` are required. An example samplesheet is:
 
@@ -511,6 +511,50 @@ nextflow run nf-core/airrflow -r dev \
 - The 10xGenomics reference can be downloaded from the [download page](https://www.10xgenomics.com/support/software/cell-ranger/downloads)
 - To generate a V(D)J segment fasta file as reference from IMGT one can follow the [cellranger docs](https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/advanced/references#imgt).
 
+
+## Supported unselected RNA-seq based methods
+
+nf-core/airrflow supports unselected bulk or single-cell RNA-seq fastq files as input. [TRUST4](https://github.com/liulab-dfci/TRUST4) is used to extract TCR/BCR sequences from these files. The resulting AIRR tables are then fed into airrflow's Immcantation based workflow. <br>
+To use unselected RNA-seq based input, specify `--library_generation_method trust4`.
+
+### Bulk RNA-seq
+
+A typical command to run the pipeline from **bulk RNA-seq fastq files** is:
+
+```bash
+nextflow run nf-core/airrfow \
+-profile <docker/singularity/podman/shifter/charliecloud/conda/institute> \
+--mode fastq \
+--input input_samplesheet.tsv \
+--library_generation_method trust4 \
+--coord_fasta reference/IMGT+C.fa \
+--outdir results
+```
+
+### Single-cell RNA-seq
+
+A typical command to run the pipeline from **single-cell RNA-seq fastq files** is:
+
+```bash
+nextflow run nf-core/airrfow \
+-profile <docker/singularity/podman/shifter/charliecloud/conda/institute> \
+--mode fastq \
+--input input_samplesheet.tsv \
+--library_generation_method trust4 \
+--umi_position R1 \
+--read_format bc:0:15,um:16:27
+--coord_fasta reference/IMGT+C.fa \
+--outdir results
+```
+
+* If UMI's are present, the read containing them must be specified using the `--umi_position` parameter.
+* The `--read_format` parameter can be used to specify the Barcode and UMI position within the reads (see TRUST4 [docs](https://github.com/liulab-dfci/TRUST4?tab=readme-ov-file#10x-genomics-data-and-barcode-based-single-cell-data))
+
+#### Reference file
+
+TRUST4 requires a reference. This can provided using the `--coord_fasta` parameter.
+The reference fasta can be downloaded from IMGT and created using [TRUST4](https://github.com/liulab-dfci/TRUST4?tab=readme-ov-file#build-custom-vjc-gene-database-files-for--f-and---ref)
+
 ## Core Nextflow arguments
 
 :::note
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 776dfdf0..072a532e 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -487,9 +487,9 @@
             "fa_icon": "fab fa-pagelines"
         },
         "rnaseq_based_analysis_options": {
-            "title": "RNA seq based analysis options",
+            "title": "Unselected RNA-seq based analysis options",
             "type": "object",
-            "description": "Options specific for raw RNA seq input.",
+            "description": "Options specific for raw unselected RNA-seq input.",
             "default": "",
             "properties": {
                 "barcode_read": {

From 5ef2f60afd25f4c2bf796f8b7259e66f9d747d58 Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Thu, 16 May 2024 12:57:40 +0200
Subject: [PATCH 13/32] linting & prettier

---
 docs/usage.md        | 5 ++---
 nextflow_schema.json | 1 -
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index c854811c..4df6e0e7 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -511,7 +511,6 @@ nextflow run nf-core/airrflow -r dev \
 - The 10xGenomics reference can be downloaded from the [download page](https://www.10xgenomics.com/support/software/cell-ranger/downloads)
 - To generate a V(D)J segment fasta file as reference from IMGT one can follow the [cellranger docs](https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/advanced/references#imgt).
 
-
 ## Supported unselected RNA-seq based methods
 
 nf-core/airrflow supports unselected bulk or single-cell RNA-seq fastq files as input. [TRUST4](https://github.com/liulab-dfci/TRUST4) is used to extract TCR/BCR sequences from these files. The resulting AIRR tables are then fed into airrflow's Immcantation based workflow. <br>
@@ -547,8 +546,8 @@ nextflow run nf-core/airrfow \
 --outdir results
 ```
 
-* If UMI's are present, the read containing them must be specified using the `--umi_position` parameter.
-* The `--read_format` parameter can be used to specify the Barcode and UMI position within the reads (see TRUST4 [docs](https://github.com/liulab-dfci/TRUST4?tab=readme-ov-file#10x-genomics-data-and-barcode-based-single-cell-data))
+- If UMI's are present, the read containing them must be specified using the `--umi_position` parameter.
+- The `--read_format` parameter can be used to specify the Barcode and UMI position within the reads (see TRUST4 [docs](https://github.com/liulab-dfci/TRUST4?tab=readme-ov-file#10x-genomics-data-and-barcode-based-single-cell-data))
 
 #### Reference file
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 072a532e..a33aec38 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -130,7 +130,6 @@
             "properties": {
                 "umi_position": {
                     "type": "string",
-                    "default": "R1",
                     "description": "Indicate if UMI indices are recorded in the R1 (default) or R1 fastq file.",
                     "help_text": "The pipeline requires UMI barcodes for identifying unique transcripts. These barcodes are typically read from an index file but sometimes can be provided merged with the start of the R1 or R2 reads. If provided in an additional index file, set the `--index_file` parameter, if provided merged with the R1 or R2 reads, set the `--umi_position` parameter to R1 or R2, respectively.",
                     "enum": ["R1", "R2"],

From 0bc5c193211a1d35d15a10d3ed0e4d61d7255012 Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Thu, 16 May 2024 13:07:45 +0200
Subject: [PATCH 14/32] trailing whitespace

---
 modules/local/trust4.nf            |  2 +-
 nextflow.config                    |  1 -
 subworkflows/local/rnaseq_input.nf | 10 +++++-----
 subworkflows/local/sc_raw_input.nf |  2 +-
 workflows/airrflow.nf              |  2 +-
 5 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/modules/local/trust4.nf b/modules/local/trust4.nf
index c3189e8f..d139f2eb 100644
--- a/modules/local/trust4.nf
+++ b/modules/local/trust4.nf
@@ -11,7 +11,7 @@ process TRUST4 {
     tuple val(meta), path(bam), path(reads)
     tuple val(meta2), path(fasta)
     tuple val(meta3), path(vdj_reference)
-    
+
     output:
     tuple val(meta), path("*.tsv")                          , emit: tsv
     tuple val(meta), path("*_airr.tsv")                     , emit: airr_files
diff --git a/nextflow.config b/nextflow.config
index 4384b769..1a389435 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -312,7 +312,6 @@ profiles {
     nebnext_umi_bcr { includeConfig 'conf/nebnext_umi_bcr.config' }
     clontech_umi_bcr { includeConfig 'conf/clontech_umi_bcr.config' }
     clontech_umi_tcr { includeConfig 'conf/clontech_umi_tcr.config' }
-    
 }
 
 // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile
diff --git a/subworkflows/local/rnaseq_input.nf b/subworkflows/local/rnaseq_input.nf
index 876b1dfa..f9aa61ac 100644
--- a/subworkflows/local/rnaseq_input.nf
+++ b/subworkflows/local/rnaseq_input.nf
@@ -62,7 +62,7 @@ workflow RNASEQ_INPUT {
         save_merged
     )
     ch_versions = ch_versions.mix(FASTP.out.versions)
-    
+
     ch_rename_fastq = FASTP.out.reads.map { meta, reads -> [meta, reads[0], reads[1]] }
     ch_rename_original = ch_reads.map{ meta,reads -> [meta, reads[0], reads[1]] }
 
@@ -96,11 +96,11 @@ workflow RNASEQ_INPUT {
         }
         .set { ch_trust4_airr_file }
 
-    
+
     // create channel with airr file
     ch_trust4_airr_file.bulk.mix ( ch_trust4_airr_file.sc ).set { ch_trust4_airr }
-        
-    
+
+
     // rename tsv file to unique name
     RENAME_FILE_TSV(
                 ch_trust4_airr
@@ -127,5 +127,5 @@ workflow RNASEQ_INPUT {
     // trust4 output converted to FASTA format
     fasta = ch_fasta
     samplesheet = FASTQ_INPUT_CHECK.out.samplesheet
-    
+
 }
diff --git a/subworkflows/local/sc_raw_input.nf b/subworkflows/local/sc_raw_input.nf
index ef69ea38..735a8c10 100644
--- a/subworkflows/local/sc_raw_input.nf
+++ b/subworkflows/local/sc_raw_input.nf
@@ -65,7 +65,7 @@ workflow SC_RAW_INPUT {
                 [ meta, out_files.find { it.endsWith("airr_rearrangement.tsv") } ]
             }
         .set { ch_cellranger_airr }
-    
+
     // TODO : add VALIDATE_INPUT Module
     // this module requires input in csv format... Might need to create this in an extra module
 
diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf
index 72a79530..0b5ab76a 100644
--- a/workflows/airrflow.nf
+++ b/workflows/airrflow.nf
@@ -107,7 +107,7 @@ workflow AIRRFLOW {
             ch_fastp_html                           = Channel.empty()
             ch_fastp_json                           = Channel.empty()
             ch_fastqc_postassembly_mqc              = Channel.empty()
-        
+
         }  else if (params.library_generation_method == "trust4") {
             // Extract VDJ sequences from "general" RNA seq data using TRUST4
 

From 82c0688860a8366cfdee43dad3ef6bb10f101129 Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Thu, 16 May 2024 13:10:38 +0200
Subject: [PATCH 15/32] trailing whitespace

---
 subworkflows/local/fastq_input_check.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/fastq_input_check.nf b/subworkflows/local/fastq_input_check.nf
index 1dcc6917..6ed28f4a 100644
--- a/subworkflows/local/fastq_input_check.nf
+++ b/subworkflows/local/fastq_input_check.nf
@@ -100,7 +100,7 @@ def create_fastq_channels(LinkedHashMap col) {
             meta.umi_position = file(col.filename_R2).name
         }
         else if (params.umi_position == "") {
-            meta.umi_position = null 
+            meta.umi_position = null
         }
         array = [ meta, [ file(col.filename_R1), file(col.filename_R2) ] ]
     } else {

From 1a3a6298f197cb6c3cac1969ee245c50ca89f5a3 Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Tue, 21 May 2024 11:20:48 +0200
Subject: [PATCH 16/32] bugfix

---
 modules/local/trust4.nf                 | 25 +++++++++++++++++++++++--
 subworkflows/local/fastq_input_check.nf | 21 ++++-----------------
 subworkflows/local/rnaseq_input.nf      |  9 ++++++---
 3 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/modules/local/trust4.nf b/modules/local/trust4.nf
index d139f2eb..85c9ab70 100644
--- a/modules/local/trust4.nf
+++ b/modules/local/trust4.nf
@@ -36,9 +36,30 @@ process TRUST4 {
     // separate forward from reverse pairs
     def (forward, reverse) = reads.collate(2).transpose()
     def paired_end_mode = reads && (meta.single_end == false) ? "-1 ${forward[0]} -2 ${reverse[0]}" : ''
-    def barcode = meta.barcode_read ? "--barcode ${meta.barcode_read}" : ''
     def readFormat = params.read_format ? "--readFormat ${params.read_format}" : ''
-    def umi_position = meta.umi_position ? "--UMI ${meta.umi_position}" : ''
+    def barcode = ''
+    if (meta.barcode_read) {
+        if (meta.barcode_read == "R1") {
+            barcode = "--barcode ${forward[0]}"
+        } else if (meta.barcode_read == "R2") {
+            barcode = "--barcode ${reverse[0]}"
+        }
+    }
+    else {
+        barcode = ''
+    }
+    
+    def umi_position = ''
+    if (meta.umi_position) {
+        if (meta.umi_position == "R1") {
+            umi_position = "--UMI ${forward[0]}"
+        } else if (meta.umi_position == "R2") {
+            umi_position = "--UMI ${reverse[0]}"
+        }
+    }
+    else {
+        umi_position = ''
+    }
 
     """
     run-trust4 \\
diff --git a/subworkflows/local/fastq_input_check.nf b/subworkflows/local/fastq_input_check.nf
index 6ed28f4a..060c8038 100644
--- a/subworkflows/local/fastq_input_check.nf
+++ b/subworkflows/local/fastq_input_check.nf
@@ -86,28 +86,15 @@ def create_fastq_channels(LinkedHashMap col) {
         array = [ meta, [ file(col.filename_R1), file(col.filename_R2), file(col.filename_I1) ] ]
     }
     if (params.library_generation_method == "trust4") {
-        if (params.barcode_read == "R1") {
-
-            meta.barcode_read = file(col.filename_R1).name
-        }
-        else if (params.barcode_read == "R2") {
-            meta.barcode_read = file(col.filename_R2).name
-        }
-        if (params.umi_position == "R1") {
-            meta.umi_position = file(col.filename_R1).name
-        }
-        else if (params.umi_position == "R2") {
-            meta.umi_position = file(col.filename_R2).name
-        }
-        else if (params.umi_position == "") {
+        meta.barcode_read = params.barcode_read
+        if (params.umi_position == "") {
             meta.umi_position = null
+        } else {
+            meta.umi_position = params.umi_position
         }
         array = [ meta, [ file(col.filename_R1), file(col.filename_R2) ] ]
     } else {
         array = [ meta, [ file(col.filename_R1), file(col.filename_R2) ] ]
-        if (params.index_file) {
-            error "ERROR: --index_file was provided but the index file path is not specified in the samplesheet!"
-        }
     }
     return array
 }
diff --git a/subworkflows/local/rnaseq_input.nf b/subworkflows/local/rnaseq_input.nf
index f9aa61ac..6651507a 100644
--- a/subworkflows/local/rnaseq_input.nf
+++ b/subworkflows/local/rnaseq_input.nf
@@ -3,7 +3,7 @@ include { FASTQ_INPUT_CHECK                                             } from '
 include { RENAME_FILE as RENAME_FILE_TSV                                } from '../../modules/local/rename_file'
 include { CHANGEO_CONVERTDB_FASTA as CHANGEO_CONVERTDB_FASTA_FROM_AIRR  } from '../../modules/local/changeo/changeo_convertdb_fasta'
 include { FASTP                                                         } from '../../modules/nf-core/fastp/main'
-include { RENAME_FASTQ_TRUST4                                           } from '../../modules/local/rename_fastq_trust4'
+include { RENAME_FASTQ as RENAME_FASTQ_TRUST4                           } from '../../modules/local/rename_fastq'
 
 
@@ -64,19 +64,22 @@ workflow RNASEQ_INPUT {
     ch_versions = ch_versions.mix(FASTP.out.versions)
 
     ch_rename_fastq = FASTP.out.reads.map { meta, reads -> [meta, reads[0], reads[1]] }
-    ch_rename_original = ch_reads.map{ meta,reads -> [meta, reads[0], reads[1]] }
+    // ch_rename_original = ch_reads.map{ meta,reads -> [meta, reads[0], reads[1]] }
 
     // need to rename to input names in case barcodes are present
     RENAME_FASTQ_TRUST4(
         ch_rename_fastq,
-        ch_rename_original
     )
 
     ch_reads_fastp_filtered = RENAME_FASTQ_TRUST4.out.reads
 
+    ch_reads_fastp_filtered.view()
+
+
     // create trust4 input
     ch_reads_trust4 = ch_reads_fastp_filtered.map{ meta, read_1, read_2  -> [ meta, [], [read_1, read_2] ] }
 
+
     TRUST4(
         ch_reads_trust4,
         ch_coord_fasta,

From d822084e4afc76fcd80c4304c94c1dc69f0a1230 Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Tue, 21 May 2024 11:27:46 +0200
Subject: [PATCH 17/32] trailing whitespace

---
 modules/local/trust4.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/local/trust4.nf b/modules/local/trust4.nf
index 85c9ab70..d4e567d3 100644
--- a/modules/local/trust4.nf
+++ b/modules/local/trust4.nf
@@ -48,7 +48,6 @@ process TRUST4 {
     else {
         barcode = ''
     }
-    
     def umi_position = ''
     if (meta.umi_position) {
         if (meta.umi_position == "R1") {

From 90cada8a40c45c205f017c269e2bf3b46d5f399c Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Tue, 21 May 2024 16:53:53 +0200
Subject: [PATCH 18/32] view statement removed

---
 subworkflows/local/rnaseq_input.nf | 2 --
 1 file changed, 2 deletions(-)

diff --git a/subworkflows/local/rnaseq_input.nf b/subworkflows/local/rnaseq_input.nf
index 6651507a..34fa5351 100644
--- a/subworkflows/local/rnaseq_input.nf
+++ b/subworkflows/local/rnaseq_input.nf
@@ -73,8 +73,6 @@ workflow RNASEQ_INPUT {
 
     ch_reads_fastp_filtered = RENAME_FASTQ_TRUST4.out.reads
 
-    ch_reads_fastp_filtered.view()
-
 
     // create trust4 input
     ch_reads_trust4 = ch_reads_fastp_filtered.map{ meta, read_1, read_2  -> [ meta, [], [read_1, read_2] ] }

From 35cf7f46f6378d7a5a5f10c801071d2f916650a9 Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Thu, 23 May 2024 10:08:33 +0200
Subject: [PATCH 19/32] incorporating review comments

---
 conf/test_rnaseq_sc.config              |  2 +-
 docs/usage.md                           |  2 +-
 modules/local/rename_fastq_trust4.nf    | 23 -----------------------
 modules/local/trust4.nf                 | 16 ++++++++--------
 nextflow.config                         |  3 ++-
 nextflow_schema.json                    | 18 +++++++++++++-----
 subworkflows/local/fastq_input_check.nf |  6 +++---
 subworkflows/local/rnaseq_input.nf      | 13 ++++---------
 8 files changed, 32 insertions(+), 51 deletions(-)
 delete mode 100644 modules/local/rename_fastq_trust4.nf

diff --git a/conf/test_rnaseq_sc.config b/conf/test_rnaseq_sc.config
index 0ca7618f..82d6ceef 100644
--- a/conf/test_rnaseq_sc.config
+++ b/conf/test_rnaseq_sc.config
@@ -21,7 +21,7 @@ params {
     library_generation_method = 'trust4'
     clonal_threshold = 0
     barcode_read = R1
-    umi_position = R1
+    umi_read = R1
     read_format = "bc:0:15,um:16:27"
     skip_lineage = True
 
diff --git a/docs/usage.md b/docs/usage.md
index 4df6e0e7..ba4eb4f6 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -540,7 +540,7 @@ nextflow run nf-core/airrfow \
 --mode fastq \
 --input input_samplesheet.tsv \
 --library_generation_method trust4 \
---umi_position R1 \
+--umi_read R1 \
 --read_format bc:0:15,um:16:27
 --coord_fasta reference/IMGT+C.fa \
 --outdir results
diff --git a/modules/local/rename_fastq_trust4.nf b/modules/local/rename_fastq_trust4.nf
deleted file mode 100644
index 5ce36d63..00000000
--- a/modules/local/rename_fastq_trust4.nf
+++ /dev/null
@@ -1,23 +0,0 @@
-// Import generic module functions
-process RENAME_FASTQ_TRUST4 {
-    tag "$meta.id"
-    label 'process_low'
-
-    conda "conda-forge::python=3.8.0 conda-forge::biopython=1.74"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/mulled-v2-adc9bb9edc31eb38b3c24786a83b7dfa530e2bea:47d6d7765d7537847ced7dac873190d164146022-0' :
-        'biocontainers/mulled-v2-adc9bb9edc31eb38b3c24786a83b7dfa530e2bea:47d6d7765d7537847ced7dac873190d164146022-0' }"
-
-    input:
-    tuple val(meta), path(R1), path(R2)
-    tuple val(meta_2), path(orig_r1), path(orig_r2)
-
-    output:
-    tuple val(meta), path(orig_r1), path(orig_r2) , emit: reads
-
-    script:
-    """
-    mv ${R1} ${orig_r1}
-    mv ${R2} ${orig_r2}
-    """
-}
diff --git a/modules/local/trust4.nf b/modules/local/trust4.nf
index d4e567d3..49035c74 100644
--- a/modules/local/trust4.nf
+++ b/modules/local/trust4.nf
@@ -48,16 +48,16 @@ process TRUST4 {
     else {
         barcode = ''
     }
-    def umi_position = ''
-    if (meta.umi_position) {
-        if (meta.umi_position == "R1") {
-            umi_position = "--UMI ${forward[0]}"
-        } else if (meta.umi_position == "R2") {
-            umi_position = "--UMI ${reverse[0]}"
+    def umi_read = ''
+    if (meta.umi_read) {
+        if (meta.umi_read == "R1") {
+            umi_read = "--UMI ${forward[0]}"
+        } else if (meta.umi_read == "R2") {
+            umi_read = "--UMI ${reverse[0]}"
         }
     }
     else {
-        umi_position = ''
+        umi_read = ''
     }
 
     """
@@ -67,7 +67,7 @@ process TRUST4 {
         ${paired_end_mode} \\
         ${barcode} \\
         ${readFormat} \\
-        ${umi_position} \\
+        ${umi_read} \\
         -t $task.cpus \\
         -f ${fasta} \\
         -o ${prefix} \\
diff --git a/nextflow.config b/nextflow.config
index 1a389435..8db319a6 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -31,7 +31,7 @@ params {
     primer_revpr = false
 
     // UMI and primer handling
-    umi_position = null
+    umi_position = 'R1'
     umi_length = -1
     umi_start = 0
 
@@ -129,6 +129,7 @@ params {
     barcode_read = null
     read_format = null
     coord_fasta = null
+    umi_read = null
 
 
     // -----------------------
diff --git a/nextflow_schema.json b/nextflow_schema.json
index a33aec38..443f6cca 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -45,7 +45,7 @@
                 },
                 "miairr": {
                     "type": "string",
-                    "default": "/home/kymmp01/workdir/pipeline_dev/trust42airrflow/airrflow/assets/reveal/mapping_MiAIRR_BioSample_v1.3.1.tsv",
+                    "default": "${projectDir}/assets/reveal/mapping_MiAIRR_BioSample_v1.3.1.tsv",
                     "description": "Path to MiAIRR-BioSample mapping",
                     "fa_icon": "fas fa-table"
                 }
@@ -130,6 +130,7 @@
             "properties": {
                 "umi_position": {
                     "type": "string",
+                    "default": "R1",
                     "description": "Indicate if UMI indices are recorded in the R1 (default) or R1 fastq file.",
                     "help_text": "The pipeline requires UMI barcodes for identifying unique transcripts. These barcodes are typically read from an index file but sometimes can be provided merged with the start of the R1 or R2 reads. If provided in an additional index file, set the `--index_file` parameter, if provided merged with the R1 or R2 reads, set the `--umi_position` parameter to R1 or R2, respectively.",
                     "enum": ["R1", "R2"],
@@ -497,6 +498,13 @@
                     "enum": ["R1", "R2"],
                     "fa_icon": "fas fa-terminal"
                 },
+                "umi_read": {
+                    "type": "string",
+                    "description": "Indicate if UMI indices are recorded in the R1 (default) or R1 fastq file.",
+                    "help_text": "file containing 10x Genomics-like UMIs",
+                    "enum": ["R1", "R2"],
+                    "fa_icon": "fas fa-barcode"
+                },
                 "read_format": {
                     "type": "string",
                     "description": "Specifies where in the read the barcodes and UMIs can be found.",
@@ -521,25 +529,25 @@
             "properties": {
                 "report_rmd": {
                     "type": "string",
-                    "default": "/home/kymmp01/workdir/pipeline_dev/trust42airrflow/airrflow/assets/repertoire_comparison.Rmd",
+                    "default": "${projectDir}/assets/repertoire_comparison.Rmd",
                     "description": "Custom report Rmarkdown file.",
                     "fa_icon": "far fa-file-code"
                 },
                 "report_css": {
                     "type": "string",
-                    "default": "/home/kymmp01/workdir/pipeline_dev/trust42airrflow/airrflow/assets/nf-core_style.css",
+                    "default": "${projectDir}/assets/nf-core_style.css",
                     "description": "Custom report style file in css format.",
                     "fa_icon": "far fa-file-code"
                 },
                 "report_logo": {
                     "type": "string",
-                    "default": "/home/kymmp01/workdir/pipeline_dev/trust42airrflow/airrflow/assets/nf-core-airrflow_logo_light.png",
+                    "default": "${projectDir}/assets/nf-core-airrflow_logo_light.png",
                     "description": "Custom logo for the report.",
                     "fa_icon": "far fa-file-code"
                 },
                 "report_logo_img": {
                     "type": "string",
-                    "default": "/home/kymmp01/workdir/pipeline_dev/trust42airrflow/airrflow/assets/nf-core-airrflow_logo_reports.png",
+                    "default": "${projectDir}/assets/nf-core-airrflow_logo_reports.png",
                     "description": "Custom logo for the EnchantR reports.",
                     "fa_icon": "far fa-file-code"
                 },
diff --git a/subworkflows/local/fastq_input_check.nf b/subworkflows/local/fastq_input_check.nf
index 060c8038..18016f60 100644
--- a/subworkflows/local/fastq_input_check.nf
+++ b/subworkflows/local/fastq_input_check.nf
@@ -87,10 +87,10 @@ def create_fastq_channels(LinkedHashMap col) {
     }
     if (params.library_generation_method == "trust4") {
         meta.barcode_read = params.barcode_read
-        if (params.umi_position == "") {
-            meta.umi_position = null
+        if (params.umi_read == "") {
+            meta.umi_read = null
         } else {
-            meta.umi_position = params.umi_position
+            meta.umi_read = params.umi_read
         }
         array = [ meta, [ file(col.filename_R1), file(col.filename_R2) ] ]
     } else {
diff --git a/subworkflows/local/rnaseq_input.nf b/subworkflows/local/rnaseq_input.nf
index 34fa5351..bbb91b56 100644
--- a/subworkflows/local/rnaseq_input.nf
+++ b/subworkflows/local/rnaseq_input.nf
@@ -1,6 +1,5 @@
 include { TRUST4                                                        } from '../../modules/local/trust4'
 include { FASTQ_INPUT_CHECK                                             } from '../../subworkflows/local/fastq_input_check'
-include { RENAME_FILE as RENAME_FILE_TSV                                } from '../../modules/local/rename_file'
 include { CHANGEO_CONVERTDB_FASTA as CHANGEO_CONVERTDB_FASTA_FROM_AIRR  } from '../../modules/local/changeo/changeo_convertdb_fasta'
 include { FASTP                                                         } from '../../modules/nf-core/fastp/main'
 include { RENAME_FASTQ as RENAME_FASTQ_TRUST4                           } from '../../modules/local/rename_fastq'
@@ -66,7 +65,7 @@ workflow RNASEQ_INPUT {
     ch_rename_fastq = FASTP.out.reads.map { meta, reads -> [meta, reads[0], reads[1]] }
     // ch_rename_original = ch_reads.map{ meta,reads -> [meta, reads[0], reads[1]] }
 
-    // need to rename to input names in case barcodes are present
+    // rename fastp output
     RENAME_FASTQ_TRUST4(
         ch_rename_fastq,
     )
@@ -77,6 +76,8 @@ workflow RNASEQ_INPUT {
     // create trust4 input
     ch_reads_trust4 = ch_reads_fastp_filtered.map{ meta, read_1, read_2  -> [ meta, [], [read_1, read_2] ] }
 
+    ch_reads_trust4.view()
+
 
     TRUST4(
         ch_reads_trust4,
@@ -102,15 +103,9 @@ workflow RNASEQ_INPUT {
     ch_trust4_airr_file.bulk.mix ( ch_trust4_airr_file.sc ).set { ch_trust4_airr }
 
 
-    // rename tsv file to unique name
-    RENAME_FILE_TSV(
-                ch_trust4_airr
-            )
-        .set { ch_renamed_tsv }
-
     // convert airr tsv to fasta (cellranger does not create any fasta with clonotype information)
     CHANGEO_CONVERTDB_FASTA_FROM_AIRR(
-                RENAME_FILE_TSV.out.file
+                ch_trust4_airr
             )
 
     ch_fasta = CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.fasta

From cbdc415bce7a30e8c25152a2f18adee89d8509cb Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Thu, 23 May 2024 11:12:31 +0200
Subject: [PATCH 20/32] fix test

---
 conf/test_rnaseq_sc.config | 6 +++---
 nextflow_schema.json       | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/conf/test_rnaseq_sc.config b/conf/test_rnaseq_sc.config
index 82d6ceef..9a55b5da 100644
--- a/conf/test_rnaseq_sc.config
+++ b/conf/test_rnaseq_sc.config
@@ -20,10 +20,10 @@ params {
     mode = 'fastq'
     library_generation_method = 'trust4'
     clonal_threshold = 0
-    barcode_read = R1
-    umi_read = R1
+    barcode_read = "R1"
+    umi_read = "R1"
     read_format = "bc:0:15,um:16:27"
-    skip_lineage = True
+    skip_lineage = true
 
     // Input data
     input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-rnaseq/sc_rnaseq_metadata.tsv'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 443f6cca..3de67940 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -496,7 +496,8 @@
                     "type": "string",
                     "description": "Specifies which read holds the barcodes",
                     "enum": ["R1", "R2"],
-                    "fa_icon": "fas fa-terminal"
+                    "fa_icon": "fas fa-terminal",
+                    "help_text": "file containing the barcodes"
                 },
                 "umi_read": {
                     "type": "string",

From 948bb53d7830f9122bf8e88ec197b97656c7c094 Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Thu, 23 May 2024 12:58:34 +0200
Subject: [PATCH 21/32] removed view statement

---
 subworkflows/local/rnaseq_input.nf | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/subworkflows/local/rnaseq_input.nf b/subworkflows/local/rnaseq_input.nf
index bbb91b56..92d2794f 100644
--- a/subworkflows/local/rnaseq_input.nf
+++ b/subworkflows/local/rnaseq_input.nf
@@ -76,9 +76,6 @@ workflow RNASEQ_INPUT {
     // create trust4 input
     ch_reads_trust4 = ch_reads_fastp_filtered.map{ meta, read_1, read_2  -> [ meta, [], [read_1, read_2] ] }
 
-    ch_reads_trust4.view()
-
-
     TRUST4(
         ch_reads_trust4,
         ch_coord_fasta,

From 616349ff9a7fba3d1d234149f45f1443e23bb176 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Tue, 28 May 2024 13:05:24 -0400
Subject: [PATCH 22/32] generate reference fasta

---
 modules/local/prepare_trust4_reference.nf | 24 +++++++++++++++++++++++
 subworkflows/local/rnaseq_input.nf        | 13 ++++++++----
 workflows/airrflow.nf                     |  3 ++-
 3 files changed, 35 insertions(+), 5 deletions(-)
 create mode 100644 modules/local/prepare_trust4_reference.nf

diff --git a/modules/local/prepare_trust4_reference.nf b/modules/local/prepare_trust4_reference.nf
new file mode 100644
index 00000000..3e5fe846
--- /dev/null
+++ b/modules/local/prepare_trust4_reference.nf
@@ -0,0 +1,24 @@
+process PREPARE_TRUST4_REFERENCE {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "bioconda::trust4=1.0.13"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/trust4:1.0.13--h43eeafb_0':
+        'biocontainers/trust4:1.0.13--h43eeafb_0' }"
+
+    input:
+    tuple val(meta), path(reads)
+    path(reference_fasta)
+
+    output:
+    path("trust4_reference.fa") , emit: trust4_reference
+
+    script:
+    """
+    cat ${reference_fasta}/${meta.species.toLowerCase()}/vdj/*.fasta \\
+    ${reference_fasta}/${meta.species.toLowerCase()}/constant/*.fasta > trust4_reference.fa
+    """
+
+
+}
diff --git a/subworkflows/local/rnaseq_input.nf b/subworkflows/local/rnaseq_input.nf
index 92d2794f..c61ac75f 100644
--- a/subworkflows/local/rnaseq_input.nf
+++ b/subworkflows/local/rnaseq_input.nf
@@ -10,6 +10,7 @@ workflow RNASEQ_INPUT {
 
     take:
     ch_input
+    ch_reference_fasta
 
     main:
 
@@ -63,22 +64,26 @@ workflow RNASEQ_INPUT {
     ch_versions = ch_versions.mix(FASTP.out.versions)
 
     ch_rename_fastq = FASTP.out.reads.map { meta, reads -> [meta, reads[0], reads[1]] }
-    // ch_rename_original = ch_reads.map{ meta,reads -> [meta, reads[0], reads[1]] }
 
     // rename fastp output
     RENAME_FASTQ_TRUST4(
-        ch_rename_fastq,
+        ch_rename_fastq
     )
 
     ch_reads_fastp_filtered = RENAME_FASTQ_TRUST4.out.reads
 
+    PREPARE_TRUST4_REFERENCE(
+        ch_reference_fasta,
+        ch_reads_fastp_filtered
+    )
+
 
     // create trust4 input
     ch_reads_trust4 = ch_reads_fastp_filtered.map{ meta, read_1, read_2  -> [ meta, [], [read_1, read_2] ] }
 
     TRUST4(
         ch_reads_trust4,
-        ch_coord_fasta,
+        PREPARE_TRUST4_REFERENCE.out.trust4_reference,
         Channel.of([[], []]).collect()
     )
 
@@ -100,7 +105,7 @@ workflow RNASEQ_INPUT {
     ch_trust4_airr_file.bulk.mix ( ch_trust4_airr_file.sc ).set { ch_trust4_airr }
 
 
-    // convert airr tsv to fasta (cellranger does not create any fasta with clonotype information)
+    // convert airr tsv to fasta
     CHANGEO_CONVERTDB_FASTA_FROM_AIRR(
                 ch_trust4_airr
             )
diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf
index 0b5ab76a..64c3e99b 100644
--- a/workflows/airrflow.nf
+++ b/workflows/airrflow.nf
@@ -85,7 +85,8 @@ workflow AIRRFLOW {
             if (params.library_generation_method == "sc_10x_genomics") {
 
                 SC_RAW_INPUT(
-                    ch_input
+                    ch_input,
+                    DATABASES.out.reference_fasta.collect()
                 )
 
                 ch_fasta                                = SC_RAW_INPUT.out.fasta

From 9443bc8821073100dea259f5afcb57ec0e31e7b8 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Tue, 28 May 2024 13:12:22 -0400
Subject: [PATCH 23/32] channel in wrong subworkflow

---
 workflows/airrflow.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf
index 64c3e99b..f524004c 100644
--- a/workflows/airrflow.nf
+++ b/workflows/airrflow.nf
@@ -85,8 +85,7 @@ workflow AIRRFLOW {
             if (params.library_generation_method == "sc_10x_genomics") {
 
                 SC_RAW_INPUT(
-                    ch_input,
-                    DATABASES.out.reference_fasta.collect()
+                    ch_input
                 )
 
                 ch_fasta                                = SC_RAW_INPUT.out.fasta
@@ -113,7 +112,8 @@ workflow AIRRFLOW {
             // Extract VDJ sequences from "general" RNA seq data using TRUST4
 
             RNASEQ_INPUT (
-                ch_input
+                ch_input,
+                DATABASES.out.reference_fasta.collect()
             )
 
             ch_fasta                                = RNASEQ_INPUT.out.fasta

From 310e3026f72203b0eb8fc9224d69d9d6da94c921 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Tue, 28 May 2024 14:09:24 -0400
Subject: [PATCH 24/32] generate trust4 reference

---
 conf/test_rnaseq_bulk.config              |  1 -
 conf/test_rnaseq_sc.config                |  1 -
 docs/usage.md                             |  9 +--------
 modules/local/prepare_trust4_reference.nf |  8 ++++----
 modules/local/trust4.nf                   |  2 +-
 nextflow.config                           |  1 -
 nextflow_schema.json                      |  6 ------
 subworkflows/local/rnaseq_input.nf        | 20 +++++++-------------
 workflows/airrflow.nf                     |  2 +-
 9 files changed, 14 insertions(+), 36 deletions(-)

diff --git a/conf/test_rnaseq_bulk.config b/conf/test_rnaseq_bulk.config
index 83d0eaf2..eb10e0d9 100644
--- a/conf/test_rnaseq_bulk.config
+++ b/conf/test_rnaseq_bulk.config
@@ -23,5 +23,4 @@ params {
 
     // Input data
     input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-rnaseq/rnaseq_metadata.tsv'
-    coord_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-rnaseq/IMGT+C.fa'
 }
diff --git a/conf/test_rnaseq_sc.config b/conf/test_rnaseq_sc.config
index 9a55b5da..de2bd2f5 100644
--- a/conf/test_rnaseq_sc.config
+++ b/conf/test_rnaseq_sc.config
@@ -27,5 +27,4 @@ params {
 
     // Input data
     input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-rnaseq/sc_rnaseq_metadata.tsv'
-    coord_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-rnaseq/IMGT+C.fa'
 }
diff --git a/docs/usage.md b/docs/usage.md
index ba4eb4f6..8aa0d5fd 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -526,7 +526,6 @@ nextflow run nf-core/airrfow \
 --mode fastq \
 --input input_samplesheet.tsv \
 --library_generation_method trust4 \
---coord_fasta reference/IMGT+C.fa \
 --outdir results
 ```
 
@@ -541,19 +540,13 @@ nextflow run nf-core/airrfow \
 --input input_samplesheet.tsv \
 --library_generation_method trust4 \
 --umi_read R1 \
---read_format bc:0:15,um:16:27
---coord_fasta reference/IMGT+C.fa \
+--read_format bc:0:15,um:16:27 \
 --outdir results
 ```
 
 - If UMI's are present, the read containing them must be specified using the `--umi_position` parameter.
 - The `--read_format` parameter can be used to specify the Barcode and UMI position within the reads (see TRUST4 [docs](https://github.com/liulab-dfci/TRUST4?tab=readme-ov-file#10x-genomics-data-and-barcode-based-single-cell-data))
 
-#### Reference file
-
-TRUST4 requires a reference. This can provided using the `--coord_fasta` parameter.
-The reference fasta can be downloaded from IMGT and created using [TRUST4](https://github.com/liulab-dfci/TRUST4?tab=readme-ov-file#build-custom-vjc-gene-database-files-for--f-and---ref)
-
 ## Core Nextflow arguments
 
 :::note
diff --git a/modules/local/prepare_trust4_reference.nf b/modules/local/prepare_trust4_reference.nf
index 3e5fe846..e0c2306c 100644
--- a/modules/local/prepare_trust4_reference.nf
+++ b/modules/local/prepare_trust4_reference.nf
@@ -8,16 +8,16 @@ process PREPARE_TRUST4_REFERENCE {
         'biocontainers/trust4:1.0.13--h43eeafb_0' }"
 
     input:
-    tuple val(meta), path(reads)
-    path(reference_fasta)
+    tuple val(meta), path(R1), path(R2)
+    path(reference_igblast)
 
     output:
     path("trust4_reference.fa") , emit: trust4_reference
 
     script:
     """
-    cat ${reference_fasta}/${meta.species.toLowerCase()}/vdj/*.fasta \\
-    ${reference_fasta}/${meta.species.toLowerCase()}/constant/*.fasta > trust4_reference.fa
+    cat ${reference_igblast}/fasta/imgt_${meta.species.toLowerCase()}_*.fasta \\
+    ${reference_igblast}/fasta/imgt_${meta.species.toLowerCase()}_*.fasta >> trust4_reference.fa
     """
 
 
diff --git a/modules/local/trust4.nf b/modules/local/trust4.nf
index 49035c74..dd145c6b 100644
--- a/modules/local/trust4.nf
+++ b/modules/local/trust4.nf
@@ -9,7 +9,7 @@ process TRUST4 {
 
     input:
     tuple val(meta), path(bam), path(reads)
-    tuple val(meta2), path(fasta)
+    path(fasta)
     tuple val(meta3), path(vdj_reference)
 
     output:
diff --git a/nextflow.config b/nextflow.config
index 8db319a6..fcbd3b8d 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -128,7 +128,6 @@ params {
     // -----------------------
     barcode_read = null
     read_format = null
-    coord_fasta = null
     umi_read = null
 
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 3de67940..e894902f 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -511,12 +511,6 @@
                     "description": "Specifies where in the read the barcodes and UMIs can be found.",
                     "help_text": "For further information see the TRUST4 [docs](https://github.com/liulab-dfci/TRUST4?tab=readme-ov-file#10x-genomics-data-and-barcode-based-single-cell-data).",
                     "fa_icon": "fas fa-terminal"
-                },
-                "coord_fasta": {
-                    "type": "string",
-                    "description": "path to the fasta file coordinate and sequence of V/D/J/C genes.",
-                    "help_text": "For further information see the TRUST4 [docs](https://github.com/liulab-dfci/TRUST4).",
-                    "fa_icon": "fas fa-database"
                 }
             },
             "help_text": "Options for running raw RNA seq data.",
diff --git a/subworkflows/local/rnaseq_input.nf b/subworkflows/local/rnaseq_input.nf
index c61ac75f..4db81c01 100644
--- a/subworkflows/local/rnaseq_input.nf
+++ b/subworkflows/local/rnaseq_input.nf
@@ -1,3 +1,4 @@
+include { PREPARE_TRUST4_REFERENCE                                      } from '../../modules/local/prepare_trust4_reference'
 include { TRUST4                                                        } from '../../modules/local/trust4'
 include { FASTQ_INPUT_CHECK                                             } from '../../subworkflows/local/fastq_input_check'
 include { CHANGEO_CONVERTDB_FASTA as CHANGEO_CONVERTDB_FASTA_FROM_AIRR  } from '../../modules/local/changeo/changeo_convertdb_fasta'
@@ -10,7 +11,7 @@ workflow RNASEQ_INPUT {
 
     take:
     ch_input
-    ch_reference_fasta
+    ch_igblast_reference
 
     main:
 
@@ -43,15 +44,6 @@ workflow RNASEQ_INPUT {
     if (params.reference_10x)  {
         error "The TRUST4 library generation method does not require this reference, please provide a compliant reference file instead or select another library method option."
     }
-    if (!params.coord_fasta) {
-        error "Please provide a reference file for the TRUST4 library generation method."
-    }
-    else {
-        ch_reads.map {
-            meta, reads -> [meta, file(params.coord_fasta)]
-        }
-        .set { ch_coord_fasta }
-    }
 
     // Fastp
     save_merged = false
@@ -70,17 +62,19 @@ workflow RNASEQ_INPUT {
         ch_rename_fastq
     )
 
-    ch_reads_fastp_filtered = RENAME_FASTQ_TRUST4.out.reads
+    ch_reads_fastp_filtered = RENAME_FASTQ_TRUST4.out.reads.dump(tag: "fastp_filtered")
 
     PREPARE_TRUST4_REFERENCE(
-        ch_reference_fasta,
-        ch_reads_fastp_filtered
+        ch_reads_fastp_filtered,
+        ch_igblast_reference
     )
 
 
     // create trust4 input
     ch_reads_trust4 = ch_reads_fastp_filtered.map{ meta, read_1, read_2  -> [ meta, [], [read_1, read_2] ] }
 
+    PREPARE_TRUST4_REFERENCE.out.trust4_reference.dump(tag: "trust4_reference")
+
     TRUST4(
         ch_reads_trust4,
         PREPARE_TRUST4_REFERENCE.out.trust4_reference,
diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf
index f524004c..80d0b6eb 100644
--- a/workflows/airrflow.nf
+++ b/workflows/airrflow.nf
@@ -113,7 +113,7 @@ workflow AIRRFLOW {
 
             RNASEQ_INPUT (
                 ch_input,
-                DATABASES.out.reference_fasta.collect()
+                DATABASES.out.igblast.collect()
             )
 
             ch_fasta                                = RNASEQ_INPUT.out.fasta

From 0d7e636cdc9085eaaf2ba96f8f0e7d0a9c895a46 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Fri, 31 May 2024 10:08:33 -0400
Subject: [PATCH 25/32] merge dev


From bd3bbe25ab3ec293ac95e080ec550e6f8d81718f Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Fri, 31 May 2024 10:23:32 -0400
Subject: [PATCH 26/32] add param skip alignment filter

---
 nextflow.config                      |  1 +
 nextflow_schema.json                 | 13 ++++++++++---
 subworkflows/local/vdj_annotation.nf | 26 ++++++++++++++++----------
 3 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 40fb8f0b..2751e888 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -86,6 +86,7 @@ params {
     fetch_imgt = false
     save_databases = true
     isotype_column = 'c_call'
+    skip_alignment_filter = false
 
     // -----------------------
     // bulk filtering options
diff --git a/nextflow_schema.json b/nextflow_schema.json
index aeb50894..b8d87edf 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -350,13 +350,15 @@
                     "type": "string",
                     "description": "Path to the germline reference fasta.",
                     "help_text": "By default, we provide a pre-downloaded version of the IMGT database. It is also possible to provide a custom reference fasta database. To fetch a fresh version of IMGT, set the `--fetch_imgt` parameter instead.",
-                    "fa_icon": "fas fa-database"
+                    "fa_icon": "fas fa-database",
+                    "default": "https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip"
                 },
                 "reference_igblast": {
                     "type": "string",
                     "description": "Path to the cached igblast database.",
                     "help_text": "By default, we provide a pre-downloaded version of the IMGT database. It is also possible to provide a custom reference fasta database. To fetch a fresh version of IMGT, set the `--fetch_imgt` parameter instead.",
-                    "fa_icon": "fas fa-database"
+                    "fa_icon": "fas fa-database",
+                    "default": "https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip"
                 },
                 "fetch_imgt": {
                     "type": "boolean",
@@ -839,5 +841,10 @@
         {
             "$ref": "#/definitions/generic_options"
         }
-    ]
+    ],
+    "properties": {
+        "skip_alignment_filter": {
+            "type": "boolean"
+        }
+    }
 }
diff --git a/subworkflows/local/vdj_annotation.nf b/subworkflows/local/vdj_annotation.nf
index 692320ec..18d052bf 100644
--- a/subworkflows/local/vdj_annotation.nf
+++ b/subworkflows/local/vdj_annotation.nf
@@ -38,19 +38,25 @@ workflow VDJ_ANNOTATION {
     ch_assigned_tab = CHANGEO_MAKEDB.out.tab
     ch_assignment_logs = CHANGEO_MAKEDB.out.logs
 
-    // Apply quality filters:
-    // - locus should match v_call chain
-    // - seq alignment min length informative positions 200
-    // - max 10% N nucleotides
-    FILTER_QUALITY(
-        ch_assigned_tab
-    )
-    ch_logs = ch_logs.mix(FILTER_QUALITY.out.logs)
-    ch_versions = ch_versions.mix(FILTER_QUALITY.out.versions)
+    if (!params.skip_alignment_filter){
+        // Apply quality filters:
+        // - locus should match v_call chain
+        // - seq alignment min length informative positions 200
+        // - max 10% N nucleotides
+        FILTER_QUALITY(
+            ch_assigned_tab
+        )
+        ch_for_parsedb_split = FILTER_QUALITY.out.tab
+        ch_logs = ch_logs.mix(FILTER_QUALITY.out.logs)
+        ch_versions = ch_versions.mix(FILTER_QUALITY.out.versions)
+    } else {
+        ch_for_parsedb_split = ch_assigned_tab
+    }
+
 
     if (params.productive_only) {
         CHANGEO_PARSEDB_SPLIT (
-            FILTER_QUALITY.out.tab
+            ch_for_parsedb_split
         )
         ch_logs = ch_logs.mix(CHANGEO_PARSEDB_SPLIT.out.logs)
         ch_versions = ch_versions.mix(CHANGEO_PARSEDB_SPLIT.out.versions)

From 46a5fa2c4e02638f3603b987e5fd428ae8c9f62f Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Mon, 3 Jun 2024 10:12:20 +0200
Subject: [PATCH 27/32] add 'config_profile_url' default

---
 nextflow_schema.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index b8d87edf..566dafa2 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -628,6 +628,7 @@
                 "config_profile_url": {
                     "type": "string",
                     "description": "Directory to keep pipeline Nextflow logs and reports.",
+                    "default": "${params.outdir}/pipeline_info",
                     "fa_icon": "fas fa-cogs",
                     "hidden": true
                 }

From ba3eea19930930c4cc53751f406eeab2c1736336 Mon Sep 17 00:00:00 2001
From: mapo9 <mark.polster@qbic.uni-tuebingen.de>
Date: Wed, 5 Jun 2024 14:30:16 +0200
Subject: [PATCH 28/32] trust4 nf-core module

---
 modules.json                                  |  5 ++
 modules/local/prepare_trust4_reference.nf     |  2 +-
 modules/nf-core/trust4/environment.yml        |  9 +++
 .../trust4.nf => nf-core/trust4/main.nf}      | 47 ++++++------
 modules/nf-core/trust4/meta.yml               | 75 +++++++++++++++++++
 subworkflows/local/fastq_input_check.nf       |  6 +-
 subworkflows/local/rnaseq_input.nf            | 12 ++-
 7 files changed, 126 insertions(+), 30 deletions(-)
 create mode 100644 modules/nf-core/trust4/environment.yml
 rename modules/{local/trust4.nf => nf-core/trust4/main.nf} (67%)
 create mode 100644 modules/nf-core/trust4/meta.yml

diff --git a/modules.json b/modules.json
index 3a6e053c..e561d2ec 100644
--- a/modules.json
+++ b/modules.json
@@ -34,6 +34,11 @@
                         "branch": "master",
                         "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a",
                         "installed_by": ["modules"]
+                    },
+                    "trust4": {
+                        "branch": "master",
+                        "git_sha": "bbb9636dbe460f45fe786d0866f8fd7337e4fc7a",
+                        "installed_by": ["modules"]
                     }
                 }
             },
diff --git a/modules/local/prepare_trust4_reference.nf b/modules/local/prepare_trust4_reference.nf
index e0c2306c..dce204c8 100644
--- a/modules/local/prepare_trust4_reference.nf
+++ b/modules/local/prepare_trust4_reference.nf
@@ -12,7 +12,7 @@ process PREPARE_TRUST4_REFERENCE {
     path(reference_igblast)
 
     output:
-    path("trust4_reference.fa") , emit: trust4_reference
+    tuple val(meta), path("trust4_reference.fa") , emit: trust4_reference
 
     script:
     """
diff --git a/modules/nf-core/trust4/environment.yml b/modules/nf-core/trust4/environment.yml
new file mode 100644
index 00000000..9270eee2
--- /dev/null
+++ b/modules/nf-core/trust4/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "trust4"
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - "bioconda::trust4=1.0.13"
diff --git a/modules/local/trust4.nf b/modules/nf-core/trust4/main.nf
similarity index 67%
rename from modules/local/trust4.nf
rename to modules/nf-core/trust4/main.nf
index dd145c6b..1d822fb8 100644
--- a/modules/local/trust4.nf
+++ b/modules/nf-core/trust4/main.nf
@@ -9,19 +9,21 @@ process TRUST4 {
 
     input:
     tuple val(meta), path(bam), path(reads)
-    path(fasta)
+    tuple val(meta2), path(fasta)
     tuple val(meta3), path(vdj_reference)
+    tuple val(meta4), val(barcode_read)
+    tuple val(meta5), val(umi_read)
 
     output:
-    tuple val(meta), path("*.tsv")                          , emit: tsv
-    tuple val(meta), path("*_airr.tsv")                     , emit: airr_files
-    tuple val(meta), path("${meta.id}_airr.tsv")            , emit: airr_tsv
-    tuple val(meta), path("*_report.tsv")                   , emit: report_tsv
-    tuple val(meta), path("*.fa")                           , emit: fasta
-    tuple val(meta), path("*.out")                          , emit: out
-    tuple val(meta), path("*.fq")                           , emit: fq
-    tuple val(meta), path("**")                             , emit: outs
-    path "versions.yml"                                     , emit: versions
+    tuple val(meta), path("*.tsv")                  , emit: tsv
+    tuple val(meta), path("*_airr.tsv")             , emit: airr_files
+    tuple val(meta), path("${meta.id}_airr.tsv")    , emit: airr_tsv
+    tuple val(meta), path("*_report.tsv")           , emit: report_tsv
+    tuple val(meta), path("*.fa")                   , emit: fasta
+    tuple val(meta), path("*.out")                  , emit: out
+    tuple val(meta), path("*.fq")                   , emit: fq
+    tuple val(meta), path("**")                     , emit: outs
+    path "versions.yml"                             , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -36,28 +38,29 @@ process TRUST4 {
     // separate forward from reverse pairs
     def (forward, reverse) = reads.collate(2).transpose()
     def paired_end_mode = reads && (meta.single_end == false) ? "-1 ${forward[0]} -2 ${reverse[0]}" : ''
+    // read format is optional
     def readFormat = params.read_format ? "--readFormat ${params.read_format}" : ''
-    def barcode = ''
-    if (meta.barcode_read) {
-        if (meta.barcode_read == "R1") {
+    // add barcode information if present
+    if (barcode_read) {
+        if (barcode_read == "R1") {
             barcode = "--barcode ${forward[0]}"
-        } else if (meta.barcode_read == "R2") {
+        } else if (barcode_read == "R2") {
             barcode = "--barcode ${reverse[0]}"
         }
     }
     else {
         barcode = ''
     }
-    def umi_read = ''
-    if (meta.umi_read) {
-        if (meta.umi_read == "R1") {
-            umi_read = "--UMI ${forward[0]}"
-        } else if (meta.umi_read == "R2") {
-            umi_read = "--UMI ${reverse[0]}"
+    // add umi information if present
+    if (umi_read) {
+        if (umi_read == "R1") {
+            umi = "--UMI ${forward[0]}"
+        } else if (umi_read == "R2") {
+            umi = "--UMI ${reverse[0]}"
         }
     }
     else {
-        umi_read = ''
+        umi = ''
     }
 
     """
@@ -67,7 +70,7 @@ process TRUST4 {
         ${paired_end_mode} \\
         ${barcode} \\
         ${readFormat} \\
-        ${umi_read} \\
+        ${umi} \\
         -t $task.cpus \\
         -f ${fasta} \\
         -o ${prefix} \\
diff --git a/modules/nf-core/trust4/meta.yml b/modules/nf-core/trust4/meta.yml
new file mode 100644
index 00000000..89bc4d29
--- /dev/null
+++ b/modules/nf-core/trust4/meta.yml
@@ -0,0 +1,75 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "trust4"
+description: Run TRUST4 on RNA-seq data
+keywords:
+  - sort
+  - example
+  - genomics
+tools:
+  - "trust4":
+      description: "TCR and BCR assembly from bulk or single-cell RNA-seq data"
+      homepage: "https://github.com/liulab-dfci/TRUST4"
+      documentation: "https://github.com/liulab-dfci/TRUST4"
+      tool_dev_url: "https://github.com/liulab-dfci/TRUST4"
+      licence: ["GPL v3"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1', single_end:false ]`
+  - bam:
+      type: file
+      description: BAM file from bulk or single-cell RNA-seq data
+      pattern: "*.bam"
+  - reads:
+      type: file
+      description: List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively
+  - fasta:
+      type: file
+      description: Path to the fasta file coordinate and sequence of V/D/J/C genes
+  - ref:
+      type: file
+      description: Path to detailed V/D/J/C gene reference file, such as from IMGT database.
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1', single_end:false ]`
+  - tsv:
+      type: file
+      description: tsv files created by TRUST4
+      pattern: "*.tsv"
+  - airr_tsv:
+      type: file
+      description: TRUST4 results in AIRR format
+      pattern: "*_airr.tsv"
+  - report_tsv:
+      type: file
+      description: TRUST4 report in tsv format
+      pattern: "*_report.tsv"
+  - fasta:
+      type: file
+      description: Fasta files created by TRUST4
+      pattern: "*.fa"
+  - out:
+      type: file
+      description: Further report files
+      pattern: "*.out"
+  - fq:
+      type: file
+      description: Fastq files created by TRUST4
+      pattern: "*.fq"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@mapo9, @Joaodemeirelles"
+maintainers:
+  - "@mapo9"
diff --git a/subworkflows/local/fastq_input_check.nf b/subworkflows/local/fastq_input_check.nf
index 18016f60..1f142211 100644
--- a/subworkflows/local/fastq_input_check.nf
+++ b/subworkflows/local/fastq_input_check.nf
@@ -87,11 +87,7 @@ def create_fastq_channels(LinkedHashMap col) {
     }
     if (params.library_generation_method == "trust4") {
         meta.barcode_read = params.barcode_read
-        if (params.umi_read == "") {
-            meta.umi_read = null
-        } else {
-            meta.umi_read = params.umi_read
-        }
+        meta.umi_read = params.umi_read
         array = [ meta, [ file(col.filename_R1), file(col.filename_R2) ] ]
     } else {
         array = [ meta, [ file(col.filename_R1), file(col.filename_R2) ] ]
diff --git a/subworkflows/local/rnaseq_input.nf b/subworkflows/local/rnaseq_input.nf
index 4db81c01..a04898dd 100644
--- a/subworkflows/local/rnaseq_input.nf
+++ b/subworkflows/local/rnaseq_input.nf
@@ -1,5 +1,5 @@
 include { PREPARE_TRUST4_REFERENCE                                      } from '../../modules/local/prepare_trust4_reference'
-include { TRUST4                                                        } from '../../modules/local/trust4'
+include { TRUST4                                                        } from '../../modules/nf-core/trust4/main'
 include { FASTQ_INPUT_CHECK                                             } from '../../subworkflows/local/fastq_input_check'
 include { CHANGEO_CONVERTDB_FASTA as CHANGEO_CONVERTDB_FASTA_FROM_AIRR  } from '../../modules/local/changeo/changeo_convertdb_fasta'
 include { FASTP                                                         } from '../../modules/nf-core/fastp/main'
@@ -75,10 +75,18 @@ workflow RNASEQ_INPUT {
 
     PREPARE_TRUST4_REFERENCE.out.trust4_reference.dump(tag: "trust4_reference")
 
+    ch_reads_trust4.dump(tag: "trust4_input")
+
+    // create barcode and umi channels for nf-core trust4 module
+    barcode_channel = ch_reads_fastp_filtered.map { meta, read_1, read_2 ->  [meta, meta.barcode_read] }
+    umi_channel = ch_reads_fastp_filtered.map { meta, read_1, read_2 -> [meta, meta.umi_read] }
+
     TRUST4(
         ch_reads_trust4,
         PREPARE_TRUST4_REFERENCE.out.trust4_reference,
-        Channel.of([[], []]).collect()
+        Channel.of([[], []]).collect(),
+        barcode_channel,
+        umi_channel
     )
 
     ch_trust4_out = TRUST4.out.outs

From c186a69ad6a2b80de129145164b5008a93095082 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Wed, 17 Jul 2024 10:58:34 -0400
Subject: [PATCH 29/32] improve docs

---
 docs/usage.md | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 889d74a6..d94bac5a 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -121,7 +121,7 @@ If you wish to share such profile (such as upload as supplementary material for
 
 ## Input samplesheet
 
-### Fastq input samplesheet (bulk AIRR and bulk/sc RNA sequencing)
+### Fastq input samplesheet (bulk AIRR sequencing)
 
 The required input file for processing raw BCR or TCR bulk targeted sequencing data is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename_R1`, `filename_R2`, `subject_id`, `species`, `tissue`, `pcr_target_locus`, `single_cell`, `sex`, `age` and `biomaterial_provider` are required. An example samplesheet is:
 
@@ -143,7 +143,7 @@ The required input file for processing raw BCR or TCR bulk targeted sequencing d
 - `age`: Subject biological age.
 - `single_cell`: TRUE or FALSE.
 
-Other optional columns can be added. These columns will be available when building the contrasts for the repertoire comparison report. It is recommended that these columns also follow the AIRR nomenclature. Examples are:
+Other optional columns can be added. These columns will be available as metadata in the final repertoire table. It is recommended that these columns also follow the AIRR nomenclature. Examples are:
 
 - `intervention`: Description of intervention.
 - `disease_diagnosis`: Diagnosis of subject.
@@ -151,19 +151,19 @@ Other optional columns can be added. These columns will be available when buildi
 - `collection_time_point_reference`: Event in the study schedule to which `Sample collection time` relates to (e.g. primary vaccination, intervention start).
 - `cell_subset`: Commonly-used designation of isolated cell population.
 
-The metadata specified in the input file will then be automatically annotated in a column with the same header in the tables generated by the pipeline.
+It is possible to provide several fastq files per sample (e.g. sequenced over different chips or lanes). In this case the different fastq files per sample will be merged together prior to processing. Provide one fastq pair R1/R2 per row, and the same `sample_id` field for these rows.
 
 ### Fastq input samplesheet (single cell sequencing)
 
-The required input file for processing raw BCR or TCR single cell targeted sequencing data is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename_R1`, `filename_R2`, `subject_id`, `species`, `tissue`, `pcr_target_locus`, `single_cell`, `sex`, `age` and `biomaterial_provider` are required. You can refer to the bulk fastq input section for documentation on the individual columns.
+The required input file for processing raw BCR or TCR single cell targeted sequencing data is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename_R1`, `filename_R2`, `subject_id`, `species`, `tissue`, `pcr_target_locus`, `single_cell`, `sex`, `age` and `biomaterial_provider` are required. Any other columns you add will be available in the final repertoire file as extra metadata fields. You can refer to the bulk fastq input section for documentation on the individual columns.
 An example samplesheet is:
 
-| sample_id | filename_R1                     | filename_R2                     | subject_id | species | pcr_target_locus | tissue | sex    | age | biomaterial_provider | single_cell | intervention   | collection_time_point_relative | cell_subset  |
-| --------- | ------------------------------- | ------------------------------- | ---------- | ------- | ---------------- | ------ | ------ | --- | -------------------- | ----------- | -------------- | ------------------------------ | ------------ |
-| sample01  | sample1_S1_L001_R1_001.fastq.gz | sample1_S1_L001_R2_001.fastq.gz | Subject02  | human   | IG               | blood  | NA     | 53  | sequencing_facility  | FALSE       | Drug_treatment | Baseline                       | plasmablasts |
-| sample02  | sample2_S1_L001_R1_001.fastq.gz | sample2_S1_L001_R2_001.fastq.gz | Subject02  | human   | TR               | blood  | female | 78  | sequencing_facility  | FALSE       | Drug_treatment | Baseline                       | plasmablasts |
+| sample_id | filename_R1                     | filename_R2                     | subject_id | species | pcr_target_locus | tissue | sex    | age | biomaterial_provider | single_cell |
+| --------- | ------------------------------- | ------------------------------- | ---------- | ------- | ---------------- | ------ | ------ | --- | -------------------- | ----------- |
+| sample01  | sample01_S1_L001_R1_001.fastq.gz | sample01_S1_L001_R2_001.fastq.gz | Subject02  | human   | IG               | blood  | NA     | 53  | sequencing_facility  | TRUE       |
+| sample02  | sample02_S1_L001_R1_001.fastq.gz | sample02_S1_L001_R2_001.fastq.gz | Subject02  | human   | TR               | blood  | female | 78  | sequencing_facility  | TRUE       |
 
-> FASTQ files must confirm the 10xGenomics cellranger naming conventions<br> >**`[SAMPLE-NAME]`_S1_L00`[LANE-NUMBER]` _`[READ-TYPE]`\_001.fastq.gz**
+> FASTQ files must conform with the 10xGenomics cellranger naming conventions with the same sample name as provided in the sample_id column <br> >**`[SAMPLE-NAME]`_ S`[CHIP-NUMBER]`_ L00`[LANE-NUMBER]`_`[R1/R2]`\_001.fastq.gz**
 >
 > Read type is one of
 >
@@ -172,6 +172,13 @@ An example samplesheet is:
 > - `R1`: Read 1
 > - `R2`: Read 2
 
+It is possible to provide several fastq files per sample (e.g. sequenced over different chips or lanes). In this case the different fastq files per sample will be provided to the same cellranger process. These rows should then have an identical `sample_id` field.
+
+### Fastq input samplesheet (untargeted bulk or sc RNA sequencing)
+
+When running the untargeted protocol, BCR or TCR sequences will be extracted from the untargeted bulk or single-cell RNA sequencing with tools such as [TRUST4](https://github.com/liulab-dfci/TRUST4).
+The required input file is the same as for the [Fastq bulk AIRR samplesheet](#fastq-input-samplesheet-bulk-airr-sequencing).
+
 ### Assembled input samplesheet (bulk or single-cell sequencing)
 
 The required input file for processing raw BCR or TCR bulk targeted sequencing data is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename`, `subject_id`, `species`, `tissue`, `single_cell`, `sex`, `age` and `biomaterial_provider` are required. All fields are explained in the previous section, with the only difference being that there is only one `filename` column for the assembled input samplesheet. The provided file will be different from assembled single-cell or bulk data:

From 5e6e1a98265211db77dc2cf7caeb487e3f572772 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Wed, 17 Jul 2024 11:01:02 -0400
Subject: [PATCH 30/32] fix prettier

---
 docs/usage.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index d94bac5a..a054653f 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -158,12 +158,12 @@ It is possible to provide several fastq files per sample (e.g. sequenced over di
 The required input file for processing raw BCR or TCR single cell targeted sequencing data is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename_R1`, `filename_R2`, `subject_id`, `species`, `tissue`, `pcr_target_locus`, `single_cell`, `sex`, `age` and `biomaterial_provider` are required. Any other columns you add will be available in the final repertoire file as extra metadata fields. You can refer to the bulk fastq input section for documentation on the individual columns.
 An example samplesheet is:
 
-| sample_id | filename_R1                     | filename_R2                     | subject_id | species | pcr_target_locus | tissue | sex    | age | biomaterial_provider | single_cell |
-| --------- | ------------------------------- | ------------------------------- | ---------- | ------- | ---------------- | ------ | ------ | --- | -------------------- | ----------- |
-| sample01  | sample01_S1_L001_R1_001.fastq.gz | sample01_S1_L001_R2_001.fastq.gz | Subject02  | human   | IG               | blood  | NA     | 53  | sequencing_facility  | TRUE       |
-| sample02  | sample02_S1_L001_R1_001.fastq.gz | sample02_S1_L001_R2_001.fastq.gz | Subject02  | human   | TR               | blood  | female | 78  | sequencing_facility  | TRUE       |
+| sample_id | filename_R1                      | filename_R2                      | subject_id | species | pcr_target_locus | tissue | sex    | age | biomaterial_provider | single_cell |
+| --------- | -------------------------------- | -------------------------------- | ---------- | ------- | ---------------- | ------ | ------ | --- | -------------------- | ----------- |
+| sample01  | sample01_S1_L001_R1_001.fastq.gz | sample01_S1_L001_R2_001.fastq.gz | Subject02  | human   | IG               | blood  | NA     | 53  | sequencing_facility  | TRUE        |
+| sample02  | sample02_S1_L001_R1_001.fastq.gz | sample02_S1_L001_R2_001.fastq.gz | Subject02  | human   | TR               | blood  | female | 78  | sequencing_facility  | TRUE        |
 
-> FASTQ files must conform with the 10xGenomics cellranger naming conventions with the same sample name as provided in the sample_id column <br> >**`[SAMPLE-NAME]`_ S`[CHIP-NUMBER]`_ L00`[LANE-NUMBER]`_`[R1/R2]`\_001.fastq.gz**
+> FASTQ files must conform with the 10xGenomics cellranger naming conventions with the same sample name as provided in the sample*id column <br> >\*\*`[SAMPLE-NAME]`* S`[CHIP-NUMBER]`_ L00`[LANE-NUMBER]`_`[R1/R2]`\_001.fastq.gz\*\*
 >
 > Read type is one of
 >

From b8dc66ec8ed9d3755f52ddf42068928de8233bb2 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Wed, 17 Jul 2024 13:35:23 -0400
Subject: [PATCH 31/32] add locus selection to rnaseq workflow

---
 conf/modules.config                                    |  8 ++++++++
 ...rsedb_select.nf => changeo_parsedb_select_locus.nf} | 10 +++-------
 subworkflows/local/rnaseq_input.nf                     |  6 +++++-
 3 files changed, 16 insertions(+), 8 deletions(-)
 rename modules/local/changeo/{changeo_parsedb_select.nf => changeo_parsedb_select_locus.nf} (72%)

diff --git a/conf/modules.config b/conf/modules.config
index 3dc63fa9..dd315b85 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -422,6 +422,14 @@ process {
         ]
     }
 
+    withName: CHANGEO_PARSEDB_SELECT_LOCUS {
+        publishDir = [
+            path: { "${params.outdir}/vdj_annotation/select-locus/${meta.id}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
     withName: CHANGEO_PARSEDB_SPLIT {
         publishDir = [
             path: { "${params.outdir}/vdj_annotation/04-select-productive/${meta.id}" },
diff --git a/modules/local/changeo/changeo_parsedb_select.nf b/modules/local/changeo/changeo_parsedb_select_locus.nf
similarity index 72%
rename from modules/local/changeo/changeo_parsedb_select.nf
rename to modules/local/changeo/changeo_parsedb_select_locus.nf
index 2bba4916..32805c26 100644
--- a/modules/local/changeo/changeo_parsedb_select.nf
+++ b/modules/local/changeo/changeo_parsedb_select_locus.nf
@@ -1,4 +1,4 @@
-process CHANGEO_PARSEDB_SELECT {
+process CHANGEO_PARSEDB_SELECT_LOCUS {
     tag "$meta.id"
     label 'process_low'
     label 'immcantation'
@@ -18,25 +18,21 @@ process CHANGEO_PARSEDB_SELECT {
     path "versions.yml" , emit: versions
 
     script:
-    def args = task.ext.args ?: ''
-    def args2 = task.ext.args2 ?: ''
     if (meta.locus.toUpperCase() == 'IG'){
         """
-        ParseDb.py select -d $tab $args --outname ${meta.id} > ${meta.id}_select_command_log.txt
+        ParseDb.py select -d $tab -f locus -u "IG[HKL]" --regex --outname ${meta.id} > ${meta.id}_select_command_log.txt
 
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":
-            igblastn: \$( igblastn -version | grep -o "igblast[0-9\\. ]\\+" | grep -o "[0-9\\. ]\\+" )
             changeo: \$( ParseDb.py --version | awk -F' '  '{print \$2}' )
         END_VERSIONS
         """
     } else if (meta.locus.toUpperCase() == 'TR'){
         """
-        ParseDb.py select -d $tab $args2 --outname ${meta.id} > "${meta.id}_command_log.txt"
+        ParseDb.py select -d $tab -f locus -u "TR[ABDG]" --regex --outname ${meta.id} > "${meta.id}_command_log.txt"
 
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":
-            igblastn: \$( igblastn -version | grep -o "igblast[0-9\\. ]\\+" | grep -o "[0-9\\. ]\\+" )
             changeo: \$( ParseDb.py --version | awk -F' '  '{print \$2}' )
         END_VERSIONS
         """
diff --git a/subworkflows/local/rnaseq_input.nf b/subworkflows/local/rnaseq_input.nf
index a04898dd..12253f5a 100644
--- a/subworkflows/local/rnaseq_input.nf
+++ b/subworkflows/local/rnaseq_input.nf
@@ -1,6 +1,7 @@
 include { PREPARE_TRUST4_REFERENCE                                      } from '../../modules/local/prepare_trust4_reference'
 include { TRUST4                                                        } from '../../modules/nf-core/trust4/main'
 include { FASTQ_INPUT_CHECK                                             } from '../../subworkflows/local/fastq_input_check'
+include { CHANGEO_PARSEDB_SELECT_LOCUS                                  } from '../../modules/local/changeo/changeo_parsedb_select_locus'
 include { CHANGEO_CONVERTDB_FASTA as CHANGEO_CONVERTDB_FASTA_FROM_AIRR  } from '../../modules/local/changeo/changeo_convertdb_fasta'
 include { FASTP                                                         } from '../../modules/nf-core/fastp/main'
 include { RENAME_FASTQ as RENAME_FASTQ_TRUST4                           } from '../../modules/local/rename_fastq'
@@ -106,10 +107,13 @@ workflow RNASEQ_INPUT {
     // create channel with airr file
     ch_trust4_airr_file.bulk.mix ( ch_trust4_airr_file.sc ).set { ch_trust4_airr }
 
+    // select only provided locus
+    CHANGEO_PARSEDB_SELECT_LOCUS(ch_trust4_airr)
+
 
     // convert airr tsv to fasta
     CHANGEO_CONVERTDB_FASTA_FROM_AIRR(
-                ch_trust4_airr
+                CHANGEO_PARSEDB_SELECT_LOCUS.out.tab
             )
 
     ch_fasta = CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.fasta

From cfa4f73b6ce514f5bcb5f70a607d05eedaf0599b Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Wed, 17 Jul 2024 15:35:46 -0400
Subject: [PATCH 32/32] fix issue with merge_UMI

---
 docs/usage.md                           | 2 +-
 subworkflows/local/fastq_input_check.nf | 8 +++-----
 subworkflows/local/rnaseq_input.nf      | 4 ++--
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index a054653f..3f6f7963 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -177,7 +177,7 @@ It is possible to provide several fastq files per sample (e.g. sequenced over di
 ### Fastq input samplesheet (untargeted bulk or sc RNA sequencing)
 
 When running the untargeted protocol, BCR or TCR sequences will be extracted from the untargeted bulk or single-cell RNA sequencing with tools such as [TRUST4](https://github.com/liulab-dfci/TRUST4).
-The required input file is the same as for the [Fastq bulk AIRR samplesheet](#fastq-input-samplesheet-bulk-airr-sequencing).
+The required input file is the same as for the [Fastq bulk AIRR samplesheet](#fastq-input-samplesheet-bulk-airr-sequencing) or [Fastq single-cell AIRR samplesheet](#fastq-input-samplesheet-single-cell-sequencing) depending on the input data type (bulk RNAseq or single-cell RNAseq).
 
 ### Assembled input samplesheet (bulk or single-cell sequencing)
 
diff --git a/subworkflows/local/fastq_input_check.nf b/subworkflows/local/fastq_input_check.nf
index 1f142211..91412b15 100644
--- a/subworkflows/local/fastq_input_check.nf
+++ b/subworkflows/local/fastq_input_check.nf
@@ -84,13 +84,11 @@ def create_fastq_channels(LinkedHashMap col) {
             error "ERROR: Please check input samplesheet -> Index read FastQ file does not exist!\n${col.filename_I1}"
         }
         array = [ meta, [ file(col.filename_R1), file(col.filename_R2), file(col.filename_I1) ] ]
-    }
-    if (params.library_generation_method == "trust4") {
-        meta.barcode_read = params.barcode_read
-        meta.umi_read = params.umi_read
-        array = [ meta, [ file(col.filename_R1), file(col.filename_R2) ] ]
     } else {
         array = [ meta, [ file(col.filename_R1), file(col.filename_R2) ] ]
+        if (params.index_file) {
+            error "ERROR: Index file path was provided but the index file path is not specified in the samplesheet!"
+        }
     }
     return array
 }
diff --git a/subworkflows/local/rnaseq_input.nf b/subworkflows/local/rnaseq_input.nf
index 12253f5a..6469e6b5 100644
--- a/subworkflows/local/rnaseq_input.nf
+++ b/subworkflows/local/rnaseq_input.nf
@@ -79,8 +79,8 @@ workflow RNASEQ_INPUT {
     ch_reads_trust4.dump(tag: "trust4_input")
 
     // create barcode and umi channels for nf-core trust4 module
-    barcode_channel = ch_reads_fastp_filtered.map { meta, read_1, read_2 ->  [meta, meta.barcode_read] }
-    umi_channel = ch_reads_fastp_filtered.map { meta, read_1, read_2 -> [meta, meta.umi_read] }
+    barcode_channel = ch_reads_fastp_filtered.map { meta, read_1, read_2 ->  [meta, params.barcode_read] }
+    umi_channel = ch_reads_fastp_filtered.map { meta, read_1, read_2 -> [meta, params.umi_read] }
 
     TRUST4(
         ch_reads_trust4,