From 52fa9ca8bb94e6e8b564d03ec41f2a1ae08291ab Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Mon, 19 Aug 2024 11:22:53 +0200
Subject: [PATCH 01/66] comment out for development

---
 workflows/scrnaseq.nf | 62 +++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 10ced221..4c1ef914 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -290,37 +290,37 @@ workflow SCRNASEQ {
     }
 
     // Run emptydrops calling module
-    if ( !params.skip_emptydrops && !(params.aligner in ['cellrangerarc']) ) {
-
-        //
-        // emptydrops should only run on the raw matrices thus, filter-out the filtered result of the aligners that can produce it
-        //
-        if ( params.aligner in [ 'cellranger', 'cellrangermulti', 'kallisto', 'star' ] ) {
-            ch_mtx_matrices_for_emptydrops =
-                ch_mtx_matrices.filter { meta, mtx_files ->
-                    mtx_files.toString().contains("raw_feature_bc_matrix") || // cellranger
-                    mtx_files.toString().contains("counts_unfiltered")     || // kallisto
-                    mtx_files.toString().contains("raw")                      // star
-                }
-        } else {
-            ch_mtx_matrices_for_emptydrops = ch_mtx_matrices
-        }
-
-        EMPTYDROPS_CELL_CALLING( ch_mtx_matrices_for_emptydrops )
-        ch_mtx_matrices = ch_mtx_matrices.mix( EMPTYDROPS_CELL_CALLING.out.filtered_matrices )
-
-    }
-
-    // Run mtx to h5ad conversion subworkflow
-    MTX_CONVERSION (
-        ch_mtx_matrices,
-        ch_input,
-        ch_txp2gene,
-        ch_star_index
-    )
-
-    //Add Versions from MTX Conversion workflow too
-    ch_versions.mix(MTX_CONVERSION.out.ch_versions)
+    // if ( !params.skip_emptydrops && !(params.aligner in ['cellrangerarc']) ) {
+
+    //     //
+    //     // emptydrops should only run on the raw matrices thus, filter-out the filtered result of the aligners that can produce it
+    //     //
+    //     if ( params.aligner in [ 'cellranger', 'cellrangermulti', 'kallisto', 'star' ] ) {
+    //         ch_mtx_matrices_for_emptydrops =
+    //             ch_mtx_matrices.filter { meta, mtx_files ->
+    //                 mtx_files.toString().contains("raw_feature_bc_matrix") || // cellranger
+    //                 mtx_files.toString().contains("counts_unfiltered")     || // kallisto
+    //                 mtx_files.toString().contains("raw")                      // star
+    //             }
+    //     } else {
+    //         ch_mtx_matrices_for_emptydrops = ch_mtx_matrices
+    //     }
+
+    //     EMPTYDROPS_CELL_CALLING( ch_mtx_matrices_for_emptydrops )
+    //     ch_mtx_matrices = ch_mtx_matrices.mix( EMPTYDROPS_CELL_CALLING.out.filtered_matrices )
+
+    // }
+
+    // // Run mtx to h5ad conversion subworkflow
+    // MTX_CONVERSION (
+    //     ch_mtx_matrices,
+    //     ch_input,
+    //     ch_txp2gene,
+    //     ch_star_index
+    // )
+
+    // //Add Versions from MTX Conversion workflow too
+    // ch_versions.mix(MTX_CONVERSION.out.ch_versions)
 
     //
     // Collate and save software versions

From bbf299c5ea80a4cd03e3e9add5d6d761ece9a1a2 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Mon, 19 Aug 2024 12:19:37 +0200
Subject: [PATCH 02/66] refact modules for STAR aligner

---
 bin/mtx_to_h5ad_star.py           | 89 +++++++++++++++++++++++++++++++
 modules/local/mtx_to_h5ad_star.nf | 51 ++++++++++++++++++
 subworkflows/local/starsolo.nf    | 14 +++--
 workflows/scrnaseq.nf             |  3 --
 4 files changed, 150 insertions(+), 7 deletions(-)
 create mode 100755 bin/mtx_to_h5ad_star.py
 create mode 100644 modules/local/mtx_to_h5ad_star.nf

diff --git a/bin/mtx_to_h5ad_star.py b/bin/mtx_to_h5ad_star.py
new file mode 100755
index 00000000..a8c2ec22
--- /dev/null
+++ b/bin/mtx_to_h5ad_star.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+
+# Set numba chache dir to current working directory (which is a writable mount also in containers)
+import os
+
+os.environ["NUMBA_CACHE_DIR"] = "."
+
+import scanpy as sc
+import pandas as pd
+import argparse
+from scipy import io
+from anndata import AnnData
+
+def _mtx_to_adata(
+    mtx_file: str,
+    barcode_file: str,
+    feature_file: str,
+    sample: str,
+):
+    adata = sc.read_mtx(mtx_file)
+    adata = adata.transpose()
+    adata.obs_names = pd.read_csv(barcode_file, header=None, sep="\t")[0].values
+    adata.var_names = pd.read_csv(feature_file, header=None, sep="\t")[0].values
+    adata.obs["sample"] = sample
+
+    return adata
+
+
+def input_to_adata(
+    input_data: str,
+    barcode_file: str,
+    feature_file: str,
+    sample: str,
+    star_index: str,
+):
+    print("Reading in {}".format(input_data))
+
+    # open main data
+    adata = _mtx_to_adata(input_data, barcode_file, feature_file, sample)
+
+    # open gene information
+    txp2gene = "{}/geneInfo.tab".format(star_index)
+    print("Reading in {}".format(txp2gene))
+    t2g = pd.read_table("{}".format(txp2gene), header=None, skiprows=1, names=["gene_id", "gene_symbol"], usecols=[0, 1])
+    t2g = t2g.drop_duplicates(subset="gene_id").set_index("gene_id")
+
+    # standard format
+    adata.var.index = t2g.index.values.tolist()
+    adata.var["gene_symbol"] = t2g["gene_symbol"]
+
+    return adata
+
+def dump_versions(task_process):
+    import pkg_resources
+
+    with open("versions.yml", "w") as f:
+        f.write(f"{task_process}:\n\t")
+        f.write("\n\t".join([f"{pkg.key}: {pkg.version}" for pkg in pkg_resources.working_set]))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.")
+
+    parser.add_argument("-i", "--input_data", dest="input_data", help="Path to either mtx or mtx h5 file.")
+    parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.", nargs="?", const="")
+    parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.", nargs="?", const="")
+    parser.add_argument("-s", "--sample", dest="sample", help="Sample name")
+    parser.add_argument("-o", "--out", dest="out", help="Output path.")
+    parser.add_argument("--task_process", dest="task_process", help="Task process name.")
+    parser.add_argument("--star_index", dest="star_index", help="Star index folder containing geneInfo.tab.", nargs="?", const="")
+
+    args = vars(parser.parse_args())
+
+    # create the directory with the sample name
+    os.makedirs(os.path.dirname(args["out"]), exist_ok=True)
+
+    adata = input_to_adata(
+        input_data=args["input_data"],
+        barcode_file=args["barcode"],
+        feature_file=args["feature"],
+        sample=args["sample"],
+        star_index=args["star_index"]
+    )
+
+    adata.write_h5ad(args["out"], compression="gzip")
+
+    print("Wrote h5ad file to {}".format(args["out"]))
+
+    dump_versions(task_process=args["task_process"])
diff --git a/modules/local/mtx_to_h5ad_star.nf b/modules/local/mtx_to_h5ad_star.nf
new file mode 100644
index 00000000..ae42262f
--- /dev/null
+++ b/modules/local/mtx_to_h5ad_star.nf
@@ -0,0 +1,51 @@
+process MTX_TO_H5AD_STAR {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' :
+        'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }"
+
+    input:
+    tuple val(meta), path(inputs)
+    path star_index
+
+    output:
+    tuple val(meta2), path("${meta.id}/*h5ad"), emit: h5ad
+    path  "versions.yml"                      , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    // Get a file to check input type. Some aligners bring arrays instead of a single file.
+    def input_to_check = (inputs instanceof String) ? inputs : inputs[0]
+
+    // check input type of inputs
+    input_type   = (input_to_check.toUriString().contains('unfiltered') || input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered'
+    meta2        = meta + [input_type: input_type]
+    mtx_matrix   = "${input_type}/matrix.mtx.gz"
+    barcodes_tsv = "${input_type}/barcodes.tsv.gz"
+    features_tsv = "${input_type}/features.tsv.gz"
+
+
+    """
+    # convert file types
+    mtx_to_h5ad_star.py \\
+        --task_process ${task.process} \\
+        --sample ${meta.id} \\
+        --input $mtx_matrix \\
+        --barcode $barcodes_tsv \\
+        --feature $features_tsv \\
+        --star_index ${star_index} \\
+        --out ${meta.id}/${meta.id}_${input_type}_matrix.h5ad
+    """
+
+    stub:
+    """
+    mkdir ${meta.id}
+    touch ${meta.id}/${meta.id}_matrix.h5ad
+    touch versions.yml
+    """
+}
diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf
index 0c11acd1..19dc9221 100644
--- a/subworkflows/local/starsolo.nf
+++ b/subworkflows/local/starsolo.nf
@@ -1,5 +1,6 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
-include { STAR_ALIGN }                  from '../../modules/local/star_align'
+include { STAR_ALIGN }                      from '../../modules/local/star_align'
+include { MTX_TO_H5AD_STAR as MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad_star'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP }                      from '../../modules/nf-core/gunzip/main'
@@ -53,14 +54,19 @@ workflow STARSOLO {
     )
     ch_versions = ch_versions.mix(STAR_ALIGN.out.versions)
 
+    /*
+    * Perform h5ad conversion
+    */
+    MTX_TO_H5AD (
+        STAR_ALIGN.out.raw_counts.mix( STAR_ALIGN.out.filtered_counts ),
+        star_index.map{ meta, index -> index }
+    )
+
 
     emit:
     ch_versions
     // get rid of meta for star index
-    star_index  = star_index.map{ meta, index -> index }
     star_result = STAR_ALIGN.out.tab
     star_counts = STAR_ALIGN.out.counts
-    raw_counts = STAR_ALIGN.out.raw_counts
-    filtered_counts = STAR_ALIGN.out.filtered_counts
     for_multiqc = STAR_ALIGN.out.log_final.map{ meta, it -> it }
 }
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 4c1ef914..e58978da 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -171,8 +171,6 @@ workflow SCRNASEQ {
             protocol_config.get('extra_args', ""),
         )
         ch_versions = ch_versions.mix(STARSOLO.out.ch_versions)
-        ch_mtx_matrices = ch_mtx_matrices.mix(STARSOLO.out.raw_counts, STARSOLO.out.filtered_counts)
-        ch_star_index = STARSOLO.out.star_index
         ch_multiqc_files = ch_multiqc_files.mix(STARSOLO.out.for_multiqc)
     }
 
@@ -186,7 +184,6 @@ workflow SCRNASEQ {
             protocol_config['protocol']
         )
         ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions)
-        ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_matrices)
         ch_star_index = CELLRANGER_ALIGN.out.star_index
         ch_multiqc_files = ch_multiqc_files.mix(CELLRANGER_ALIGN.out.cellranger_out.map{
             meta, outs -> outs.findAll{ it -> it.name == "web_summary.html"}

From c3eb5eac38b09f916f9bb7a0091493053f4d5365 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Mon, 19 Aug 2024 12:42:17 +0200
Subject: [PATCH 03/66] directly pass txp2gene

---
 bin/mtx_to_h5ad_star.py           | 7 +++----
 modules/local/mtx_to_h5ad_star.nf | 3 +--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/bin/mtx_to_h5ad_star.py b/bin/mtx_to_h5ad_star.py
index a8c2ec22..0e388e61 100755
--- a/bin/mtx_to_h5ad_star.py
+++ b/bin/mtx_to_h5ad_star.py
@@ -31,7 +31,7 @@ def input_to_adata(
     barcode_file: str,
     feature_file: str,
     sample: str,
-    star_index: str,
+    txp2gene: str,
 ):
     print("Reading in {}".format(input_data))
 
@@ -39,7 +39,6 @@ def input_to_adata(
     adata = _mtx_to_adata(input_data, barcode_file, feature_file, sample)
 
     # open gene information
-    txp2gene = "{}/geneInfo.tab".format(star_index)
     print("Reading in {}".format(txp2gene))
     t2g = pd.read_table("{}".format(txp2gene), header=None, skiprows=1, names=["gene_id", "gene_symbol"], usecols=[0, 1])
     t2g = t2g.drop_duplicates(subset="gene_id").set_index("gene_id")
@@ -67,7 +66,7 @@ def dump_versions(task_process):
     parser.add_argument("-s", "--sample", dest="sample", help="Sample name")
     parser.add_argument("-o", "--out", dest="out", help="Output path.")
     parser.add_argument("--task_process", dest="task_process", help="Task process name.")
-    parser.add_argument("--star_index", dest="star_index", help="Star index folder containing geneInfo.tab.", nargs="?", const="")
+    parser.add_argument("--txp2gene", dest="txp2gene", help="Star index folder containing geneInfo.tab.", nargs="?", const="")
 
     args = vars(parser.parse_args())
 
@@ -79,7 +78,7 @@ def dump_versions(task_process):
         barcode_file=args["barcode"],
         feature_file=args["feature"],
         sample=args["sample"],
-        star_index=args["star_index"]
+        txp2gene=args["txp2gene"]
     )
 
     adata.write_h5ad(args["out"], compression="gzip")
diff --git a/modules/local/mtx_to_h5ad_star.nf b/modules/local/mtx_to_h5ad_star.nf
index ae42262f..35d6f2ef 100644
--- a/modules/local/mtx_to_h5ad_star.nf
+++ b/modules/local/mtx_to_h5ad_star.nf
@@ -29,7 +29,6 @@ process MTX_TO_H5AD_STAR {
     barcodes_tsv = "${input_type}/barcodes.tsv.gz"
     features_tsv = "${input_type}/features.tsv.gz"
 
-
     """
     # convert file types
     mtx_to_h5ad_star.py \\
@@ -38,7 +37,7 @@ process MTX_TO_H5AD_STAR {
         --input $mtx_matrix \\
         --barcode $barcodes_tsv \\
         --feature $features_tsv \\
-        --star_index ${star_index} \\
+        --txp2gene ${star_index}/geneInfo.tab \\
         --out ${meta.id}/${meta.id}_${input_type}_matrix.h5ad
     """
 

From 4771ed1fa1dc7301a856bfe8edcc04a0f2d9c342 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Mon, 19 Aug 2024 13:02:50 +0200
Subject: [PATCH 04/66] simplify module lines

---
 modules/local/mtx_to_h5ad_star.nf | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/modules/local/mtx_to_h5ad_star.nf b/modules/local/mtx_to_h5ad_star.nf
index 35d6f2ef..2ad9d4f0 100644
--- a/modules/local/mtx_to_h5ad_star.nf
+++ b/modules/local/mtx_to_h5ad_star.nf
@@ -25,18 +25,15 @@ process MTX_TO_H5AD_STAR {
     // check input type of inputs
     input_type   = (input_to_check.toUriString().contains('unfiltered') || input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered'
     meta2        = meta + [input_type: input_type]
-    mtx_matrix   = "${input_type}/matrix.mtx.gz"
-    barcodes_tsv = "${input_type}/barcodes.tsv.gz"
-    features_tsv = "${input_type}/features.tsv.gz"
 
     """
     # convert file types
     mtx_to_h5ad_star.py \\
         --task_process ${task.process} \\
         --sample ${meta.id} \\
-        --input $mtx_matrix \\
-        --barcode $barcodes_tsv \\
-        --feature $features_tsv \\
+        --input ${input_type}/matrix.mtx.gz \\
+        --barcode ${input_type}/barcodes.tsv.gz \\
+        --feature ${input_type}/features.tsv.gz \\
         --txp2gene ${star_index}/geneInfo.tab \\
         --out ${meta.id}/${meta.id}_${input_type}_matrix.h5ad
     """

From af85f87512f06a1320db8083f111c23054ef473f Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Mon, 19 Aug 2024 13:03:53 +0200
Subject: [PATCH 05/66] emit h5ad on starsolo

---
 subworkflows/local/starsolo.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf
index 19dc9221..1226ce5d 100644
--- a/subworkflows/local/starsolo.nf
+++ b/subworkflows/local/starsolo.nf
@@ -68,5 +68,6 @@ workflow STARSOLO {
     // get rid of meta for star index
     star_result = STAR_ALIGN.out.tab
     star_counts = STAR_ALIGN.out.counts
+    star_h5ad   = MTX_TO_H5AD.out.h5ad
     for_multiqc = STAR_ALIGN.out.log_final.map{ meta, it -> it }
 }

From d2a53861a73114c2656cad6999f08d74eb3a9e8b Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Mon, 19 Aug 2024 13:04:41 +0200
Subject: [PATCH 06/66] add versions emition

---
 subworkflows/local/starsolo.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf
index 1226ce5d..8236632d 100644
--- a/subworkflows/local/starsolo.nf
+++ b/subworkflows/local/starsolo.nf
@@ -61,6 +61,7 @@ workflow STARSOLO {
         STAR_ALIGN.out.raw_counts.mix( STAR_ALIGN.out.filtered_counts ),
         star_index.map{ meta, index -> index }
     )
+    ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions.first())
 
 
     emit:

From 0e5505197854b2da1271bf90bd2a5cf89357c7b8 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Mon, 19 Aug 2024 13:35:07 +0200
Subject: [PATCH 07/66] update module to use templates and cleanup way of
 priting versions

---
 bin/mtx_to_h5ad_star.py                     |  88 -----------------
 modules/local/mtx_to_h5ad_star.nf           |  12 +--
 modules/local/templates/mtx_to_h5ad_star.py | 104 ++++++++++++++++++++
 3 files changed, 105 insertions(+), 99 deletions(-)
 delete mode 100755 bin/mtx_to_h5ad_star.py
 create mode 100755 modules/local/templates/mtx_to_h5ad_star.py

diff --git a/bin/mtx_to_h5ad_star.py b/bin/mtx_to_h5ad_star.py
deleted file mode 100755
index 0e388e61..00000000
--- a/bin/mtx_to_h5ad_star.py
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/usr/bin/env python
-
-# Set numba chache dir to current working directory (which is a writable mount also in containers)
-import os
-
-os.environ["NUMBA_CACHE_DIR"] = "."
-
-import scanpy as sc
-import pandas as pd
-import argparse
-from scipy import io
-from anndata import AnnData
-
-def _mtx_to_adata(
-    mtx_file: str,
-    barcode_file: str,
-    feature_file: str,
-    sample: str,
-):
-    adata = sc.read_mtx(mtx_file)
-    adata = adata.transpose()
-    adata.obs_names = pd.read_csv(barcode_file, header=None, sep="\t")[0].values
-    adata.var_names = pd.read_csv(feature_file, header=None, sep="\t")[0].values
-    adata.obs["sample"] = sample
-
-    return adata
-
-
-def input_to_adata(
-    input_data: str,
-    barcode_file: str,
-    feature_file: str,
-    sample: str,
-    txp2gene: str,
-):
-    print("Reading in {}".format(input_data))
-
-    # open main data
-    adata = _mtx_to_adata(input_data, barcode_file, feature_file, sample)
-
-    # open gene information
-    print("Reading in {}".format(txp2gene))
-    t2g = pd.read_table("{}".format(txp2gene), header=None, skiprows=1, names=["gene_id", "gene_symbol"], usecols=[0, 1])
-    t2g = t2g.drop_duplicates(subset="gene_id").set_index("gene_id")
-
-    # standard format
-    adata.var.index = t2g.index.values.tolist()
-    adata.var["gene_symbol"] = t2g["gene_symbol"]
-
-    return adata
-
-def dump_versions(task_process):
-    import pkg_resources
-
-    with open("versions.yml", "w") as f:
-        f.write(f"{task_process}:\n\t")
-        f.write("\n\t".join([f"{pkg.key}: {pkg.version}" for pkg in pkg_resources.working_set]))
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.")
-
-    parser.add_argument("-i", "--input_data", dest="input_data", help="Path to either mtx or mtx h5 file.")
-    parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.", nargs="?", const="")
-    parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.", nargs="?", const="")
-    parser.add_argument("-s", "--sample", dest="sample", help="Sample name")
-    parser.add_argument("-o", "--out", dest="out", help="Output path.")
-    parser.add_argument("--task_process", dest="task_process", help="Task process name.")
-    parser.add_argument("--txp2gene", dest="txp2gene", help="Star index folder containing geneInfo.tab.", nargs="?", const="")
-
-    args = vars(parser.parse_args())
-
-    # create the directory with the sample name
-    os.makedirs(os.path.dirname(args["out"]), exist_ok=True)
-
-    adata = input_to_adata(
-        input_data=args["input_data"],
-        barcode_file=args["barcode"],
-        feature_file=args["feature"],
-        sample=args["sample"],
-        txp2gene=args["txp2gene"]
-    )
-
-    adata.write_h5ad(args["out"], compression="gzip")
-
-    print("Wrote h5ad file to {}".format(args["out"]))
-
-    dump_versions(task_process=args["task_process"])
diff --git a/modules/local/mtx_to_h5ad_star.nf b/modules/local/mtx_to_h5ad_star.nf
index 2ad9d4f0..ded91538 100644
--- a/modules/local/mtx_to_h5ad_star.nf
+++ b/modules/local/mtx_to_h5ad_star.nf
@@ -26,17 +26,7 @@ process MTX_TO_H5AD_STAR {
     input_type   = (input_to_check.toUriString().contains('unfiltered') || input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered'
     meta2        = meta + [input_type: input_type]
 
-    """
-    # convert file types
-    mtx_to_h5ad_star.py \\
-        --task_process ${task.process} \\
-        --sample ${meta.id} \\
-        --input ${input_type}/matrix.mtx.gz \\
-        --barcode ${input_type}/barcodes.tsv.gz \\
-        --feature ${input_type}/features.tsv.gz \\
-        --txp2gene ${star_index}/geneInfo.tab \\
-        --out ${meta.id}/${meta.id}_${input_type}_matrix.h5ad
-    """
+    template 'mtx_to_h5ad_star.py'
 
     stub:
     """
diff --git a/modules/local/templates/mtx_to_h5ad_star.py b/modules/local/templates/mtx_to_h5ad_star.py
new file mode 100755
index 00000000..88bd91ec
--- /dev/null
+++ b/modules/local/templates/mtx_to_h5ad_star.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+
+# Set numba chache dir to current working directory (which is a writable mount also in containers)
+import os
+
+os.environ["NUMBA_CACHE_DIR"] = "."
+
+import scanpy as sc
+import pandas as pd
+import argparse
+from anndata import AnnData
+import platform
+
+def _mtx_to_adata(
+    mtx_file: str,
+    barcode_file: str,
+    feature_file: str,
+    sample: str,
+):
+    adata = sc.read_mtx(mtx_file)
+    adata = adata.transpose()
+    adata.obs_names = pd.read_csv(barcode_file, header=None, sep="\t")[0].values
+    adata.var_names = pd.read_csv(feature_file, header=None, sep="\t")[0].values
+    adata.obs["sample"] = sample
+
+    return adata
+
+
+def format_yaml_like(data: dict, indent: int = 0) -> str:
+    """Formats a dictionary to a YAML-like string.
+    Args:
+        data (dict): The dictionary to format.
+        indent (int): The current indentation level.
+    Returns:
+        str: A string formatted as YAML.
+    """
+    yaml_str = ""
+    for key, value in data.items():
+        spaces = "  " * indent
+        if isinstance(value, dict):
+            yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
+        else:
+            yaml_str += f"{spaces}{key}: {value}\\n"
+    return yaml_str
+
+def dump_versions():
+    versions = {
+        "${task.process}": {
+            "python": platform.python_version(),
+            "scanpy": sc.__version__,
+            "pandas": pd.__version__
+        }
+    }
+
+    with open("versions.yml", "w") as f:
+        f.write(format_yaml_like(versions))
+
+def input_to_adata(
+    input_data: str,
+    output: str,
+    barcode_file: str,
+    feature_file: str,
+    sample: str,
+    txp2gene: str,
+):
+    print(f"Reading in {input_data}")
+
+    # open main data
+    adata = _mtx_to_adata(input_data, barcode_file, feature_file, sample)
+
+    # open gene information
+    print(f"Reading in {txp2gene}")
+    t2g = pd.read_table(f"{txp2gene}", header=None, skiprows=1, names=["gene_id", "gene_symbol"], usecols=[0, 1])
+    t2g = t2g.drop_duplicates(subset="gene_id").set_index("gene_id")
+
+    # standard format
+    adata.var.index = t2g.index.values.tolist()
+    adata.var["gene_symbol"] = t2g["gene_symbol"]
+
+    # write results
+    adata.write_h5ad(f"{output}", compression="gzip")
+    print(f"Wrote h5ad file to {output}")
+
+    # dump versions
+    dump_versions()
+
+    return adata
+
+#
+# Run main script
+#
+
+# create the directory with the sample name
+os.makedirs("${meta.id}", exist_ok=True)
+
+# input_type comes from NF module
+adata = input_to_adata(
+    input_data="${input_type}/matrix.mtx.gz",
+    output="${meta.id}/${meta.id}_${input_type}_matrix.h5ad",
+    barcode_file="${input_type}/barcodes.tsv.gz",
+    feature_file="${input_type}/features.tsv.gz",
+    sample="${meta.id}",
+    txp2gene="${star_index}/geneInfo.tab"
+)

From 994d5a530d969c864c9c9465ef2d09cd083d9f49 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Tue, 20 Aug 2024 12:41:36 +0200
Subject: [PATCH 08/66] fix h5ad generator script

---
 modules/local/mtx_to_h5ad_star.nf           |  6 ++--
 modules/local/templates/mtx_to_h5ad_star.py | 33 +++++++--------------
 2 files changed, 12 insertions(+), 27 deletions(-)

diff --git a/modules/local/mtx_to_h5ad_star.nf b/modules/local/mtx_to_h5ad_star.nf
index ded91538..1895cb87 100644
--- a/modules/local/mtx_to_h5ad_star.nf
+++ b/modules/local/mtx_to_h5ad_star.nf
@@ -2,10 +2,8 @@ process MTX_TO_H5AD_STAR {
     tag "$meta.id"
     label 'process_medium'
 
-    conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' :
-        'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }"
+    conda "conda-forge::scanpy==1.10.2 conda-forge::python-igraph conda-forge::leidenalg"
+    container "community.wave.seqera.io/library/scanpy:1.10.2--e83da2205b92a538"
 
     input:
     tuple val(meta), path(inputs)
diff --git a/modules/local/templates/mtx_to_h5ad_star.py b/modules/local/templates/mtx_to_h5ad_star.py
index 88bd91ec..d6889671 100755
--- a/modules/local/templates/mtx_to_h5ad_star.py
+++ b/modules/local/templates/mtx_to_h5ad_star.py
@@ -12,15 +12,10 @@
 import platform
 
 def _mtx_to_adata(
-    mtx_file: str,
-    barcode_file: str,
-    feature_file: str,
+    input: str,
     sample: str,
 ):
-    adata = sc.read_mtx(mtx_file)
-    adata = adata.transpose()
-    adata.obs_names = pd.read_csv(barcode_file, header=None, sep="\t")[0].values
-    adata.var_names = pd.read_csv(feature_file, header=None, sep="\t")[0].values
+    adata = sc.read_10x_mtx(input)
     adata.obs["sample"] = sample
 
     return adata
@@ -58,24 +53,19 @@ def dump_versions():
 def input_to_adata(
     input_data: str,
     output: str,
-    barcode_file: str,
-    feature_file: str,
     sample: str,
-    txp2gene: str,
 ):
     print(f"Reading in {input_data}")
 
     # open main data
-    adata = _mtx_to_adata(input_data, barcode_file, feature_file, sample)
-
-    # open gene information
-    print(f"Reading in {txp2gene}")
-    t2g = pd.read_table(f"{txp2gene}", header=None, skiprows=1, names=["gene_id", "gene_symbol"], usecols=[0, 1])
-    t2g = t2g.drop_duplicates(subset="gene_id").set_index("gene_id")
+    adata = _mtx_to_adata(input_data, sample)
 
     # standard format
-    adata.var.index = t2g.index.values.tolist()
-    adata.var["gene_symbol"] = t2g["gene_symbol"]
+    # index are gene IDs and symbols are a column
+    adata.var["gene_symbol"] = adata.var.index
+    adata.var[['gene_ids', 'gene_versions']] = adata.var["gene_ids"].apply(lambda x: pd.Series(str(x).split(".")))
+    adata.var.index = adata.var["gene_ids"].values
+    adata.var = adata.var.drop("gene_ids", axis=1)
 
     # write results
     adata.write_h5ad(f"{output}", compression="gzip")
@@ -95,10 +85,7 @@ def input_to_adata(
 
 # input_type comes from NF module
 adata = input_to_adata(
-    input_data="${input_type}/matrix.mtx.gz",
+    input_data="${input_type}",
     output="${meta.id}/${meta.id}_${input_type}_matrix.h5ad",
-    barcode_file="${input_type}/barcodes.tsv.gz",
-    feature_file="${input_type}/features.tsv.gz",
-    sample="${meta.id}",
-    txp2gene="${star_index}/geneInfo.tab"
+    sample="${meta.id}"
 )

From b317165c43e47999f78198df51f37059e8fd1388 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Tue, 20 Aug 2024 12:43:01 +0200
Subject: [PATCH 09/66] simplify check

---
 modules/local/mtx_to_h5ad_star.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/mtx_to_h5ad_star.nf b/modules/local/mtx_to_h5ad_star.nf
index 1895cb87..84474ed0 100644
--- a/modules/local/mtx_to_h5ad_star.nf
+++ b/modules/local/mtx_to_h5ad_star.nf
@@ -21,7 +21,7 @@ process MTX_TO_H5AD_STAR {
     def input_to_check = (inputs instanceof String) ? inputs : inputs[0]
 
     // check input type of inputs
-    input_type   = (input_to_check.toUriString().contains('unfiltered') || input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered'
+    input_type   = (input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered'
     meta2        = meta + [input_type: input_type]
 
     template 'mtx_to_h5ad_star.py'

From 72e9d50f91e8fb8fc870b9ed223f4bcc8fc77211 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Wed, 21 Aug 2024 09:23:17 +0200
Subject: [PATCH 10/66] Fix h5ad structure

---
 modules/local/templates/mtx_to_h5ad_star.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/local/templates/mtx_to_h5ad_star.py b/modules/local/templates/mtx_to_h5ad_star.py
index d6889671..48749114 100755
--- a/modules/local/templates/mtx_to_h5ad_star.py
+++ b/modules/local/templates/mtx_to_h5ad_star.py
@@ -63,7 +63,8 @@ def input_to_adata(
     # standard format
     # index are gene IDs and symbols are a column
     adata.var["gene_symbol"] = adata.var.index
-    adata.var[['gene_ids', 'gene_versions']] = adata.var["gene_ids"].apply(lambda x: pd.Series(str(x).split(".")))
+    adata.var['gene_versions'] = adata.var["gene_ids"]
+    adata.var['gene_ids'] = adata.var['gene_versions'].str.split('.').str[0]
     adata.var.index = adata.var["gene_ids"].values
     adata.var = adata.var.drop("gene_ids", axis=1)
 

From 882908024464e740fa5dd271ff78b9574450f76e Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 30 Aug 2024 10:49:57 +0200
Subject: [PATCH 11/66] updated concat module

---
 modules/local/concat_h5ad.nf                  | 15 ++-----
 .../local/templates}/concat_h5ad.py           | 25 +++--------
 subworkflows/local/mtx_conversion.nf          | 31 +++----------
 workflows/scrnaseq.nf                         | 45 +++++++------------
 4 files changed, 30 insertions(+), 86 deletions(-)
 rename {bin => modules/local/templates}/concat_h5ad.py (53%)

diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf
index cd08cbbe..37b14156 100644
--- a/modules/local/concat_h5ad.nf
+++ b/modules/local/concat_h5ad.nf
@@ -1,13 +1,11 @@
 process CONCAT_H5AD {
     label 'process_medium'
 
-    conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' :
-        'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }"
+    conda "conda-forge::scanpy==1.10.2 conda-forge::python-igraph conda-forge::leidenalg"
+    container "community.wave.seqera.io/library/scanpy:1.10.2--e83da2205b92a538"
 
     input:
-    tuple val(input_type), path(h5ad)
+    tuple val(meta), path(h5ad)
     path samplesheet
 
     output:
@@ -17,12 +15,7 @@ process CONCAT_H5AD {
     task.ext.when == null || task.ext.when
 
     script:
-    """
-    concat_h5ad.py \\
-        --input $samplesheet \\
-        --out combined_${input_type}_matrix.h5ad \\
-        --suffix "_matrix.h5ad"
-    """
+    template 'concat_h5ad.py'
 
     stub:
     """
diff --git a/bin/concat_h5ad.py b/modules/local/templates/concat_h5ad.py
similarity index 53%
rename from bin/concat_h5ad.py
rename to modules/local/templates/concat_h5ad.py
index 43ea071a..033bc89a 100755
--- a/bin/concat_h5ad.py
+++ b/modules/local/templates/concat_h5ad.py
@@ -7,7 +7,6 @@
 
 import scanpy as sc, anndata as ad, pandas as pd
 from pathlib import Path
-import argparse
 
 
 def read_samplesheet(samplesheet):
@@ -17,36 +16,24 @@ def read_samplesheet(samplesheet):
     # samplesheet may contain replicates, when it has,
     # group information from replicates and collapse with commas
     # only keep unique values using set()
-    df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column)))
+    df = df.groupby(["sample"]).agg(lambda column: ",".join(set(str(column))))
 
     return df
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet")
-
-    parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv")
-    parser.add_argument("-o", "--out", dest="out", help="Output path.")
-    parser.add_argument(
-        "-s",
-        "--suffix",
-        dest="suffix",
-        help="Suffix of matrices to remove and get sample name",
-    )
-
-    args = vars(parser.parse_args())
 
     # Open samplesheet as dataframe
-    df_samplesheet = read_samplesheet(args["input"])
+    df_samplesheet = read_samplesheet("${samplesheet}")
 
     # find all h5ad and append to dict
-    dict_of_h5ad = {str(path).replace(args["suffix"], ""): sc.read_h5ad(path) for path in Path(".").rglob("*.h5ad")}
+    dict_of_h5ad = {str(path).replace("_matrix.h5ad", ""): sc.read_h5ad(path) for path in Path(".").rglob("*.h5ad")}
 
     # concat h5ad files
     adata = ad.concat(dict_of_h5ad, label="sample", merge="unique", index_unique="_")
 
     # merge with data.frame, on sample information
-    adata.obs = adata.obs.join(df_samplesheet, on="sample")
-    adata.write_h5ad(args["out"], compression="gzip")
+    adata.obs = adata.obs.join(df_samplesheet, on="sample").astype(str)
+    adata.write_h5ad("combined_${meta.input_type}_matrix.h5ad", compression="gzip")
 
-    print("Wrote h5ad file to {}".format(args["out"]))
+    print("Wrote h5ad file to {}".format("combined_${meta.input_type}_matrix.h5ad"))
diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf
index 98e49a2e..0d076db0 100644
--- a/subworkflows/local/mtx_conversion.nf
+++ b/subworkflows/local/mtx_conversion.nf
@@ -8,35 +8,14 @@ workflow MTX_CONVERSION {
     take:
     mtx_matrices
     samplesheet
-    txp2gene
-    star_index
 
     main:
         ch_versions = Channel.empty()
 
-        // Cellranger module output contains too many files which cause path collisions, we filter to the ones we need.
-        // Keeping backwards compatibility with cellranger-arc.
-        // TODO: Adapt cellranger-arc subworkflow like cellranger to remove this snippet here.
-        if (params.aligner in [ 'cellrangerarc' ]) {
-            mtx_matrices = mtx_matrices.map { meta, mtx_files ->
-                    [ meta, mtx_files.findAll { it.toString().contains("filtered_feature_bc_matrix") } ]
-                }
-                .filter { meta, mtx_files -> mtx_files } // Remove any that are missing the relevant files
-        }
-
-        //
-        // Convert matrix to h5ad
-        //
-        MTX_TO_H5AD (
-            mtx_matrices,
-            txp2gene,
-            star_index
-        )
-
         //
         // Concat sample-specific h5ad in one
         //
-        ch_concat_h5ad_input = MTX_TO_H5AD.out.h5ad.groupTuple() // gather all sample-specific files / per type
+        ch_concat_h5ad_input = mtx_matrices.groupTuple() // gather all sample-specific files / per type
         if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') {
             // when having spliced / unspliced matrices, the collected tuple has two levels ( [[mtx_1, mtx_2]] )
             // which nextflow break because it is not a valid 'path' thus, we have to remove one level
@@ -51,12 +30,12 @@ workflow MTX_CONVERSION {
         //
         // Convert matrix do seurat
         //
-        MTX_TO_SEURAT (
-            mtx_matrices
-        )
+        // MTX_TO_SEURAT (
+        //     mtx_matrices
+        // )
 
         //TODO CONCAT h5ad and MTX to h5ad should also have versions.yaml output
-        ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions, MTX_TO_SEURAT.out.versions)
+        // ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions, MTX_TO_SEURAT.out.versions)
 
     emit:
     ch_versions
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index e58978da..333699a7 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -86,8 +86,8 @@ workflow SCRNASEQ {
     ch_multi_samplesheet              = params.cellranger_multi_barcodes ? file(params.cellranger_multi_barcodes, checkIfExists: true) : []
     empty_file                        = file("$projectDir/assets/EMPTY", checkIfExists: true)
 
-    ch_versions     = Channel.empty()
-    ch_mtx_matrices = Channel.empty()
+    ch_versions      = Channel.empty()
+    ch_h5ad_matrices = Channel.empty()
 
     // Run FastQC
     if (!params.skip_fastqc) {
@@ -137,7 +137,7 @@ workflow SCRNASEQ {
             ch_fastq
         )
         ch_versions = ch_versions.mix(KALLISTO_BUSTOOLS.out.ch_versions)
-        ch_mtx_matrices = ch_mtx_matrices.mix(KALLISTO_BUSTOOLS.out.raw_counts, KALLISTO_BUSTOOLS.out.filtered_counts)
+        ch_h5ad_matrices = ch_h5ad_matrices.mix(KALLISTO_BUSTOOLS.out.raw_counts, KALLISTO_BUSTOOLS.out.filtered_counts)
         ch_txp2gene = KALLISTO_BUSTOOLS.out.txp2gene
     }
 
@@ -155,7 +155,7 @@ workflow SCRNASEQ {
         )
         ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions)
         ch_multiqc_files = ch_multiqc_files.mix(SCRNASEQ_ALEVIN.out.alevin_results.map{ meta, it -> it })
-        ch_mtx_matrices = ch_mtx_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_results)
+        ch_h5ad_matrices = ch_h5ad_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_results)
     }
 
     // Run STARSolo pipeline
@@ -172,6 +172,7 @@ workflow SCRNASEQ {
         )
         ch_versions = ch_versions.mix(STARSOLO.out.ch_versions)
         ch_multiqc_files = ch_multiqc_files.mix(STARSOLO.out.for_multiqc)
+        ch_h5ad_matrices = STARSOLO.out.star_h5ad
     }
 
     // Run cellranger pipeline
@@ -200,7 +201,7 @@ workflow SCRNASEQ {
             ch_fastq
         )
         ch_versions = ch_versions.mix(UNIVERSC_ALIGN.out.ch_versions)
-        ch_mtx_matrices = ch_mtx_matrices.mix(UNIVERSC_ALIGN.out.universc_out)
+        ch_h5ad_matrices = ch_h5ad_matrices.mix(UNIVERSC_ALIGN.out.universc_out)
     }
 
     // Run cellrangerarc pipeline
@@ -214,7 +215,7 @@ workflow SCRNASEQ {
             ch_cellrangerarc_config
         )
         ch_versions = ch_versions.mix(CELLRANGERARC_ALIGN.out.ch_versions)
-        ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGERARC_ALIGN.out.cellranger_arc_out)
+        ch_h5ad_matrices = ch_h5ad_matrices.mix(CELLRANGERARC_ALIGN.out.cellranger_arc_out)
     }
 
     // Run cellrangermulti pipeline
@@ -282,39 +283,23 @@ workflow SCRNASEQ {
         ch_multiqc_files = ch_multiqc_files.mix( CELLRANGER_MULTI_ALIGN.out.cellrangermulti_out.map{
             meta, outs -> outs.findAll{ it -> it.name == "web_summary.html" }
         })
-        ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_MULTI_ALIGN.out.cellrangermulti_mtx)
+        ch_h5ad_matrices = ch_h5ad_matrices.mix(CELLRANGER_MULTI_ALIGN.out.cellrangermulti_mtx)
 
     }
 
     // Run emptydrops calling module
     // if ( !params.skip_emptydrops && !(params.aligner in ['cellrangerarc']) ) {
 
-    //     //
-    //     // emptydrops should only run on the raw matrices thus, filter-out the filtered result of the aligners that can produce it
-    //     //
-    //     if ( params.aligner in [ 'cellranger', 'cellrangermulti', 'kallisto', 'star' ] ) {
-    //         ch_mtx_matrices_for_emptydrops =
-    //             ch_mtx_matrices.filter { meta, mtx_files ->
-    //                 mtx_files.toString().contains("raw_feature_bc_matrix") || // cellranger
-    //                 mtx_files.toString().contains("counts_unfiltered")     || // kallisto
-    //                 mtx_files.toString().contains("raw")                      // star
-    //             }
-    //     } else {
-    //         ch_mtx_matrices_for_emptydrops = ch_mtx_matrices
-    //     }
-
-    //     EMPTYDROPS_CELL_CALLING( ch_mtx_matrices_for_emptydrops )
-    //     ch_mtx_matrices = ch_mtx_matrices.mix( EMPTYDROPS_CELL_CALLING.out.filtered_matrices )
+    //     EMPTYDROPS_CELL_CALLING( ch_h5ad_matrices.filter{ it[0].input_type == 'raw' } )
+    //     ch_h5ad_matrices = ch_h5ad_matrices.mix( EMPTYDROPS_CELL_CALLING.out.filtered_matrices )
 
     // }
 
-    // // Run mtx to h5ad conversion subworkflow
-    // MTX_CONVERSION (
-    //     ch_mtx_matrices,
-    //     ch_input,
-    //     ch_txp2gene,
-    //     ch_star_index
-    // )
+    // Run mtx to h5ad conversion subworkflow
+    MTX_CONVERSION (
+        ch_h5ad_matrices,
+        ch_input
+    )
 
     // //Add Versions from MTX Conversion workflow too
     // ch_versions.mix(MTX_CONVERSION.out.ch_versions)

From 9fc75b482b6142a6e76eeb8d82bb11814024d059 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 30 Aug 2024 10:59:25 +0200
Subject: [PATCH 12/66] workflow misses emptydrops and seurat & mtx conversion
 modules

---
 workflows/scrnaseq.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 333699a7..10569c35 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -301,8 +301,8 @@ workflow SCRNASEQ {
         ch_input
     )
 
-    // //Add Versions from MTX Conversion workflow too
-    // ch_versions.mix(MTX_CONVERSION.out.ch_versions)
+    //Add Versions from MTX Conversion workflow too
+    ch_versions.mix(MTX_CONVERSION.out.ch_versions)
 
     //
     // Collate and save software versions

From dc69f47f1ae8c080fdad3084ebc9cedea5a13ede Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Thu, 12 Sep 2024 13:55:14 +0200
Subject: [PATCH 13/66] start emptydrops cellbender subworkflow

---
 modules.json                                  |   5 +
 modules/adata_barcodes.nf                     |  23 ++
 modules/local/templates/barcodes.py           |  44 ++++
 .../removebackground/environment.yml          |   5 +
 .../cellbender/removebackground/main.nf       |  65 ++++++
 .../cellbender/removebackground/meta.yml      |  75 +++++++
 .../removebackground/tests/epochs.config      |   6 +
 .../removebackground/tests/main.nf.test       |  66 ++++++
 .../removebackground/tests/main.nf.test.snap  | 196 ++++++++++++++++++
 .../removebackground/tests/tags.yml           |   2 +
 subworkflows/local/emptydrops_removal.nf      |  25 +++
 subworkflows/local/mtx_conversion.nf          |   7 +-
 workflows/scrnaseq.nf                         |   9 -
 13 files changed, 516 insertions(+), 12 deletions(-)
 create mode 100644 modules/adata_barcodes.nf
 create mode 100644 modules/local/templates/barcodes.py
 create mode 100644 modules/nf-core/cellbender/removebackground/environment.yml
 create mode 100644 modules/nf-core/cellbender/removebackground/main.nf
 create mode 100644 modules/nf-core/cellbender/removebackground/meta.yml
 create mode 100644 modules/nf-core/cellbender/removebackground/tests/epochs.config
 create mode 100644 modules/nf-core/cellbender/removebackground/tests/main.nf.test
 create mode 100644 modules/nf-core/cellbender/removebackground/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/cellbender/removebackground/tests/tags.yml
 create mode 100644 subworkflows/local/emptydrops_removal.nf

diff --git a/modules.json b/modules.json
index aa186d98..b9680cfe 100644
--- a/modules.json
+++ b/modules.json
@@ -5,6 +5,11 @@
         "https://github.com/nf-core/modules.git": {
             "modules": {
                 "nf-core": {
+                    "cellbender/removebackground": {
+                        "branch": "master",
+                        "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48",
+                        "installed_by": ["modules"]
+                    },
                     "cellranger/count": {
                         "branch": "master",
                         "git_sha": "90dad5491658049282ceb287a3d7732c1ce39837",
diff --git a/modules/adata_barcodes.nf b/modules/adata_barcodes.nf
new file mode 100644
index 00000000..2aef8ec9
--- /dev/null
+++ b/modules/adata_barcodes.nf
@@ -0,0 +1,23 @@
+process ADATA_BARCODES {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'oras://community.wave.seqera.io/library/anndata:0.10.7--e9840a94592528c8':
+        'community.wave.seqera.io/library/anndata:0.10.7--336c6c1921a0632b' }"
+
+    input:
+    tuple val(meta), path(h5ad), path(barcodes_csv)
+
+    output:
+    tuple val(meta), path("*.h5ad"), emit: h5ad
+    path "versions.yml"            , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    template 'barcodes.py'
+}
diff --git a/modules/local/templates/barcodes.py b/modules/local/templates/barcodes.py
new file mode 100644
index 00000000..8a9b10a7
--- /dev/null
+++ b/modules/local/templates/barcodes.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+
+import platform
+import anndata as ad
+import pandas as pd
+
+def format_yaml_like(data: dict, indent: int = 0) -> str:
+    """Formats a dictionary to a YAML-like string.
+
+    Args:
+        data (dict): The dictionary to format.
+        indent (int): The current indentation level.
+
+    Returns:
+        str: A string formatted as YAML.
+    """
+    yaml_str = ""
+    for key, value in data.items():
+        spaces = "  " * indent
+        if isinstance(value, dict):
+            yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
+        else:
+            yaml_str += f"{spaces}{key}: {value}\\n"
+    return yaml_str
+
+df = pd.read_csv("${barcodes_csv}", header=None)
+adata = ad.read_h5ad("${h5ad}")
+
+adata = adata[df[0].values]
+
+adata.write_h5ad("${prefix}.h5ad")
+
+# Versions
+
+versions = {
+    "${task.process}": {
+        "python": platform.python_version(),
+        "anndata": ad.__version__,
+        "pandas": pd.__version__
+    }
+}
+
+with open("versions.yml", "w") as f:
+    f.write(format_yaml_like(versions))
diff --git a/modules/nf-core/cellbender/removebackground/environment.yml b/modules/nf-core/cellbender/removebackground/environment.yml
new file mode 100644
index 00000000..a157c522
--- /dev/null
+++ b/modules/nf-core/cellbender/removebackground/environment.yml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::cellbender=0.3.0
diff --git a/modules/nf-core/cellbender/removebackground/main.nf b/modules/nf-core/cellbender/removebackground/main.nf
new file mode 100644
index 00000000..f3cfd1ff
--- /dev/null
+++ b/modules/nf-core/cellbender/removebackground/main.nf
@@ -0,0 +1,65 @@
+process CELLBENDER_REMOVEBACKGROUND {
+    tag "$meta.id"
+    label 'process_medium'
+    label 'process_gpu'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'oras://community.wave.seqera.io/library/cellbender:0.3.0--c4addb97ab2d83fe':
+        'community.wave.seqera.io/library/cellbender:0.3.0--41318a055fc3aacb' }"
+
+    input:
+    tuple val(meta), path(h5ad)
+
+    output:
+    tuple val(meta), path("${prefix}.h5")               , emit: h5
+    tuple val(meta), path("${prefix}_filtered.h5")      , emit: filtered_h5
+    tuple val(meta), path("${prefix}_posterior.h5")     , emit: posterior_h5
+    tuple val(meta), path("${prefix}_cell_barcodes.csv"), emit: barcodes
+    tuple val(meta), path("${prefix}_metrics.csv")      , emit: metrics
+    tuple val(meta), path("${prefix}_report.html")      , emit: report
+    tuple val(meta), path("${prefix}.pdf")              , emit: pdf
+    tuple val(meta), path("${prefix}.log")              , emit: log
+    tuple val(meta), path("ckpt.tar.gz")                , emit: checkpoint
+    path "versions.yml"                                 , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    args = task.ext.args ?: ""
+    use_gpu = task.ext.use_gpu ? "--cuda" : ""
+    """
+    TMPDIR=. cellbender remove-background \
+        ${args} \
+        --cpu-threads ${task.cpus} \
+        ${use_gpu} \
+        --input ${h5ad} \
+        --output ${prefix}.h5
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        cellbender: \$(cellbender --version)
+    END_VERSIONS
+    """
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch "${prefix}.h5"
+    touch "${prefix}_filtered.h5"
+    touch "${prefix}_posterior.h5"
+    touch "${prefix}_cell_barcodes.csv"
+    touch "${prefix}_metrics.csv"
+    touch "${prefix}_report.html"
+    touch "${prefix}.pdf"
+    touch "${prefix}.log"
+    touch "ckpt.tar.gz"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        cellbender: \$(cellbender --version)
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/cellbender/removebackground/meta.yml b/modules/nf-core/cellbender/removebackground/meta.yml
new file mode 100644
index 00000000..d70fa3fd
--- /dev/null
+++ b/modules/nf-core/cellbender/removebackground/meta.yml
@@ -0,0 +1,75 @@
+name: cellbender_removebackground
+description: Module to use CellBender to estimate ambient RNA from single-cell RNA-seq data
+keywords:
+  - single-cell
+  - scRNA-seq
+  - ambient RNA removal
+tools:
+  - cellbender:
+      description: CellBender is a software package for eliminating technical artifacts from high-throughput single-cell RNA sequencing (scRNA-seq) data.
+      documentation: https://cellbender.readthedocs.io/en/latest/
+      tool_dev_url: https://github.com/broadinstitute/CellBender
+      licence: ["BSD-3-Clause"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test' ]
+  - h5ad:
+      type: file
+      description: AnnData file containing unfiltered data (with empty droplets)
+      pattern: "*.h5ad"
+output:
+  - h5:
+      type: file
+      description: Full count matrix as an h5 file, with background RNA removed. This file contains all the original droplet barcodes.
+      pattern: "*.h5"
+  - filtered_h5:
+      type: file
+      description: |
+        Full count matrix as an h5 file, with background RNA removed. This file contains only the droplet barcodes which were determined to have a > 50% posterior probability of containing cells.
+      pattern: "*.h5"
+  - posterior_h5:
+      type: file
+      description: |
+        The full posterior probability of noise counts. This is not normally used downstream.
+      pattern: "*.h5"
+  - barcodes:
+      type: file
+      description: |
+        CSV file containing all the droplet barcodes which were determined to have a > 50% posterior probability of containing cells. |
+        Barcodes are written in plain text. This information is also contained in each of the above outputs, |
+        but is included as a separate output for convenient use in certain downstream applications.
+      pattern: "*.csv"
+  - metrics:
+      type: file
+      description: |
+        Metrics describing the run, potentially to be used to flag problematic runs |
+        when using CellBender as part of a large-scale automated pipeline.
+      pattern: "*.csv"
+  - report:
+      type: file
+      description: |
+        HTML report including plots and commentary, along with any warnings or suggestions for improved parameter settings.
+      pattern: "*.html"
+  - pdf:
+      type: file
+      description: PDF file that provides a standard graphical summary of the inference procedure.
+      pattern: "*.pdf"
+  - log:
+      type: file
+      description: Log file produced by the cellbender remove-background run.
+      pattern: "*.log"
+  - checkpoint:
+      type: file
+      description: Checkpoint file which contains the trained model and the full posterior.
+      pattern: "*.ckpt"
+  - versions:
+      type: file
+      description: File containing software version
+      pattern: "versions.yml"
+authors:
+  - "@nictru"
+maintainers:
+  - "@nictru"
diff --git a/modules/nf-core/cellbender/removebackground/tests/epochs.config b/modules/nf-core/cellbender/removebackground/tests/epochs.config
new file mode 100644
index 00000000..96282b07
--- /dev/null
+++ b/modules/nf-core/cellbender/removebackground/tests/epochs.config
@@ -0,0 +1,6 @@
+
+process {
+    withName: CELLBENDER_REMOVEBACKGROUND {
+        ext.args = '--epochs 20'
+    }
+}
diff --git a/modules/nf-core/cellbender/removebackground/tests/main.nf.test b/modules/nf-core/cellbender/removebackground/tests/main.nf.test
new file mode 100644
index 00000000..1afa6f3b
--- /dev/null
+++ b/modules/nf-core/cellbender/removebackground/tests/main.nf.test
@@ -0,0 +1,66 @@
+nextflow_process {
+    name 'Test Process CELLBENDER_REMOVEBACKGROUND'
+    script '../main.nf'
+    process 'CELLBENDER_REMOVEBACKGROUND'
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "cellbender/removebackground"
+    tag "cellbender"
+
+    test("test_cellbender_removebackground") {
+        config './epochs.config'
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file("https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_raw_matrix_5k.h5ad", checkIfExists: true)
+                ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                {assert process.success},
+                {assert file(process.out.h5.get(0).get(1)).exists()},
+                {assert file(process.out.filtered_h5.get(0).get(1)).exists()},
+                {assert file(process.out.posterior_h5.get(0).get(1)).exists()},
+                {assert snapshot(process.out.barcodes).match("cellbender_removebackground_barcodes")},
+                {assert snapshot(process.out.metrics).match("cellbender_removebackground_metrics")},
+                {assert file(process.out.report.get(0).get(1)).exists()},
+                {assert file(process.out.pdf.get(0).get(1)).exists()},
+                {assert file(process.out.log.get(0).get(1)).exists()},
+                {assert snapshot(process.out.versions).match("cellbender_removebackground_versions")}
+            )
+        }
+    }
+
+    test("test_cellbender_removebackground - stub") {
+        options '-stub'
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file("https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_raw_matrix_5k.h5ad", checkIfExists: true)
+                ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                {assert process.success},
+                {assert snapshot(process.out.h5).match("cellbender_removebackground_h5_stub")},
+                {assert snapshot(process.out.filtered_h5).match("cellbender_removebackground_filtered_h5_stub")},
+                {assert snapshot(process.out.posterior_h5).match("cellbender_removebackground_posterior_h5_stub")},
+                {assert snapshot(process.out.barcodes).match("cellbender_removebackground_barcodes_stub")},
+                {assert snapshot(process.out.metrics).match("cellbender_removebackground_metrics_stub")},
+                {assert snapshot(process.out.report).match("cellbender_removebackground_report_stub")},
+                {assert snapshot(process.out.pdf).match("cellbender_removebackground_pdf_stub")},
+                {assert snapshot(process.out.log).match("cellbender_removebackground_log_stub")},
+                {assert snapshot(process.out.versions).match("cellbender_removebackground_versions_stub")}
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/cellbender/removebackground/tests/main.nf.test.snap b/modules/nf-core/cellbender/removebackground/tests/main.nf.test.snap
new file mode 100644
index 00000000..fdb51d66
--- /dev/null
+++ b/modules/nf-core/cellbender/removebackground/tests/main.nf.test.snap
@@ -0,0 +1,196 @@
+{
+    "cellbender_removebackground_versions": {
+        "content": [
+            [
+                "versions.yml:md5,b236ac7595dfa6cd4d51ac73e51cb05a"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:09.33127881"
+    },
+    "cellbender_removebackground_filtered_h5_stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_filtered.h5:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.833598082"
+    },
+    "cellbender_removebackground_pdf_stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.pdf:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.891829278"
+    },
+    "cellbender_removebackground_metrics": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_metrics.csv:md5,88272bde1c157528b0b0ab2abe5ad26f"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:09.327155805"
+    },
+    "cellbender_removebackground_versions_stub": {
+        "content": [
+            [
+                "versions.yml:md5,b236ac7595dfa6cd4d51ac73e51cb05a"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.904614838"
+    },
+    "cellbender_removebackground_h5_stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.h5:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.829304361"
+    },
+    "cellbender_removebackground_metrics_stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_metrics.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.870469733"
+    },
+    "cellbender_removebackground_log_stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.899293304"
+    },
+    "cellbender_removebackground_barcodes": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_cell_barcodes.csv:md5,c8e8df9d0f9aea976d6f6aa36d329429"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:09.316098811"
+    },
+    "cellbender_removebackground_report_stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_report.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.885307244"
+    },
+    "cellbender_removebackground_posterior_h5_stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_posterior.h5:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.838032754"
+    },
+    "cellbender_removebackground_barcodes_stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_cell_barcodes.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.861284979"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/cellbender/removebackground/tests/tags.yml b/modules/nf-core/cellbender/removebackground/tests/tags.yml
new file mode 100644
index 00000000..d935083b
--- /dev/null
+++ b/modules/nf-core/cellbender/removebackground/tests/tags.yml
@@ -0,0 +1,2 @@
+cellbender/removebackground:
+  - modules/nf-core/cellbender/removebackground/**
diff --git a/subworkflows/local/emptydrops_removal.nf b/subworkflows/local/emptydrops_removal.nf
new file mode 100644
index 00000000..202612d6
--- /dev/null
+++ b/subworkflows/local/emptydrops_removal.nf
@@ -0,0 +1,25 @@
+include { CELLBENDER_REMOVEBACKGROUND } from '../../modules/nf-core/cellbender/removebackground'
+include { ADATA_BARCODES              } from '../../modules/local/adata_barcodes'
+
+workflow EMPTY_DROPLET_REMOVAL {
+    take:
+    ch_unfiltered
+
+    main:
+    ch_versions = Channel.empty()
+
+    CELLBENDER_REMOVEBACKGROUND(ch_unfiltered)
+    ch_versions = ch_versions.mix(CELLBENDER_REMOVEBACKGROUND.out.versions)
+
+    ch_combined = ch_unfiltered.join(CELLBENDER_REMOVEBACKGROUND.out.barcodes)
+
+    ADATA_BARCODES(ch_combined)
+    ch_versions = ch_versions.mix(ADATA_BARCODES.out.versions)
+
+    ch_h5ad = ADATA_BARCODES.out.h5ad
+
+    emit:
+    h5ad = ch_h5ad
+
+    versions = ch_versions
+}
diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf
index 0d076db0..7cca836d 100644
--- a/subworkflows/local/mtx_conversion.nf
+++ b/subworkflows/local/mtx_conversion.nf
@@ -1,7 +1,8 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
-include { MTX_TO_H5AD   }             from '../../modules/local/mtx_to_h5ad.nf'
-include { CONCAT_H5AD   }             from '../../modules/local/concat_h5ad.nf'
-include { MTX_TO_SEURAT }             from '../../modules/local/mtx_to_seurat.nf'
+include { MTX_TO_H5AD           } from '../../modules/local/mtx_to_h5ad.nf'
+include { CONCAT_H5AD           } from '../../modules/local/concat_h5ad.nf'
+include { MTX_TO_SEURAT         } from '../../modules/local/mtx_to_seurat.nf'
+include { EMPTY_DROPLET_REMOVAL } from '../subworkflows/local/emptydrops_removal'
 
 workflow MTX_CONVERSION {
 
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 10569c35..bc5eaef6 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -9,7 +9,6 @@ include { CELLRANGERARC_ALIGN                } from "../subworkflows/local/align
 include { UNIVERSC_ALIGN                     } from "../subworkflows/local/align_universc"
 include { MTX_CONVERSION                     } from "../subworkflows/local/mtx_conversion"
 include { GTF_GENE_FILTER                    } from '../modules/local/gtf_gene_filter'
-include { EMPTYDROPS_CELL_CALLING            } from '../modules/local/emptydrops'
 include { GUNZIP as GUNZIP_FASTA             } from '../modules/nf-core/gunzip/main'
 include { GUNZIP as GUNZIP_GTF               } from '../modules/nf-core/gunzip/main'
 include { paramsSummaryMultiqc               } from '../subworkflows/nf-core/utils_nfcore_pipeline'
@@ -287,14 +286,6 @@ workflow SCRNASEQ {
 
     }
 
-    // Run emptydrops calling module
-    // if ( !params.skip_emptydrops && !(params.aligner in ['cellrangerarc']) ) {
-
-    //     EMPTYDROPS_CELL_CALLING( ch_h5ad_matrices.filter{ it[0].input_type == 'raw' } )
-    //     ch_h5ad_matrices = ch_h5ad_matrices.mix( EMPTYDROPS_CELL_CALLING.out.filtered_matrices )
-
-    // }
-
     // Run mtx to h5ad conversion subworkflow
     MTX_CONVERSION (
         ch_h5ad_matrices,

From 9c204200d0f86f7bc716343404bce2ad9e1c1636 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 13 Sep 2024 10:57:43 +0200
Subject: [PATCH 14/66] fix paths

---
 modules/{ => local}/adata_barcodes.nf | 0
 subworkflows/local/mtx_conversion.nf  | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename modules/{ => local}/adata_barcodes.nf (100%)

diff --git a/modules/adata_barcodes.nf b/modules/local/adata_barcodes.nf
similarity index 100%
rename from modules/adata_barcodes.nf
rename to modules/local/adata_barcodes.nf
diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf
index 7cca836d..4d3ba721 100644
--- a/subworkflows/local/mtx_conversion.nf
+++ b/subworkflows/local/mtx_conversion.nf
@@ -2,7 +2,7 @@
 include { MTX_TO_H5AD           } from '../../modules/local/mtx_to_h5ad.nf'
 include { CONCAT_H5AD           } from '../../modules/local/concat_h5ad.nf'
 include { MTX_TO_SEURAT         } from '../../modules/local/mtx_to_seurat.nf'
-include { EMPTY_DROPLET_REMOVAL } from '../subworkflows/local/emptydrops_removal'
+include { EMPTY_DROPLET_REMOVAL } from './emptydrops_removal'
 
 workflow MTX_CONVERSION {
 

From 683119598e67e5073f43b488e457a8f4d868f14f Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 13 Sep 2024 12:46:51 +0200
Subject: [PATCH 15/66] started the anndatar standardization module

---
 modules/local/anndatar_convert.nf          | 24 ++++++++++++++++++++++
 modules/local/templates/anndatar_convert.R | 20 ++++++++++++++++++
 subworkflows/local/mtx_conversion.nf       |  9 ++++++++
 3 files changed, 53 insertions(+)
 create mode 100644 modules/local/anndatar_convert.nf
 create mode 100755 modules/local/templates/anndatar_convert.R

diff --git a/modules/local/anndatar_convert.nf b/modules/local/anndatar_convert.nf
new file mode 100644
index 00000000..f3552ef0
--- /dev/null
+++ b/modules/local/anndatar_convert.nf
@@ -0,0 +1,24 @@
+process ANNDATAR_CONVERT {
+    label 'process_medium'
+
+    container "fmalmeida/anndatar:dev"
+
+    input:
+    tuple val(meta), path(h5ad)
+
+    output:
+    tuple val(meta), path("${meta.id}_standardized.h5ad"), emit: h5ad
+    tuple val(meta), path("${meta.id}_standardized.Rds"), emit: rds
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    template 'anndatar_convert.R'
+
+    stub:
+    """
+    touch ${meta.id}_standardized.h5ad
+    touch ${meta.id}_standardized.Rds
+    """
+}
diff --git a/modules/local/templates/anndatar_convert.R b/modules/local/templates/anndatar_convert.R
new file mode 100755
index 00000000..016f8bb4
--- /dev/null
+++ b/modules/local/templates/anndatar_convert.R
@@ -0,0 +1,20 @@
+#!/usr/bin/env Rscript
+
+# to use nf variables: "${meta.id}"
+
+# load libraries
+library(anndataR)
+
+# read input
+adata <- read_h5ad("${h5ad}")
+
+#
+# scope to perform standardization options
+#
+
+# convert to Rds
+obj <- adata\$to_Seurat()
+
+# save files
+write_h5ad(adata, "${meta.id}_standardized.h5ad")
+saveRDS(obj, file = "${meta.id}_standardized.Rds")
diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf
index 4d3ba721..6fc0ffb9 100644
--- a/subworkflows/local/mtx_conversion.nf
+++ b/subworkflows/local/mtx_conversion.nf
@@ -1,4 +1,5 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
+include { ANNDATAR_CONVERT      } from '../../modules/local/anndatar_convert'
 include { MTX_TO_H5AD           } from '../../modules/local/mtx_to_h5ad.nf'
 include { CONCAT_H5AD           } from '../../modules/local/concat_h5ad.nf'
 include { MTX_TO_SEURAT         } from '../../modules/local/mtx_to_seurat.nf'
@@ -13,6 +14,13 @@ workflow MTX_CONVERSION {
     main:
         ch_versions = Channel.empty()
 
+        //
+        // MODULE: Standardize h5ad and convert with AnndataR package
+        //
+        ANNDATAR_CONVERT (
+            mtx_matrices
+        )
+
         //
         // Concat sample-specific h5ad in one
         //
@@ -23,6 +31,7 @@ workflow MTX_CONVERSION {
             // making it as [ mtx_1, mtx_2 ]
             ch_concat_h5ad_input = ch_concat_h5ad_input.map{ type, matrices -> [ type, matrices.flatten().toList() ] }
         }
+
         CONCAT_H5AD (
             ch_concat_h5ad_input,
             samplesheet

From 237d1ca3c65f1fc3b236e8d7ab8ac552e447779c Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 13 Sep 2024 12:49:09 +0200
Subject: [PATCH 16/66] concat h5ad with anndatar h5ad

---
 subworkflows/local/mtx_conversion.nf | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf
index 6fc0ffb9..0178868f 100644
--- a/subworkflows/local/mtx_conversion.nf
+++ b/subworkflows/local/mtx_conversion.nf
@@ -24,7 +24,7 @@ workflow MTX_CONVERSION {
         //
         // Concat sample-specific h5ad in one
         //
-        ch_concat_h5ad_input = mtx_matrices.groupTuple() // gather all sample-specific files / per type
+        ch_concat_h5ad_input = ANNDATAR_CONVERT.out.h5ad.groupTuple() // gather all sample-specific files / per type
         if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') {
             // when having spliced / unspliced matrices, the collected tuple has two levels ( [[mtx_1, mtx_2]] )
             // which nextflow break because it is not a valid 'path' thus, we have to remove one level
@@ -37,13 +37,6 @@ workflow MTX_CONVERSION {
             samplesheet
         )
 
-        //
-        // Convert matrix do seurat
-        //
-        // MTX_TO_SEURAT (
-        //     mtx_matrices
-        // )
-
         //TODO CONCAT h5ad and MTX to h5ad should also have versions.yaml output
         // ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions, MTX_TO_SEURAT.out.versions)
 

From f5fb4a27cb3b70211403e62fbc7ffc485ad9f64a Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 13 Sep 2024 12:51:03 +0200
Subject: [PATCH 17/66] update tag information

---
 modules/local/anndatar_convert.nf | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modules/local/anndatar_convert.nf b/modules/local/anndatar_convert.nf
index f3552ef0..3f985b64 100644
--- a/modules/local/anndatar_convert.nf
+++ b/modules/local/anndatar_convert.nf
@@ -1,4 +1,6 @@
 process ANNDATAR_CONVERT {
+    tag "${meta.id}"
+
     label 'process_medium'
 
     container "fmalmeida/anndatar:dev"

From 752666bff34142a2bc2b6a947c5bad4a3c3ba236 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 13 Sep 2024 12:58:07 +0200
Subject: [PATCH 18/66] update tags

---
 modules/local/concat_h5ad.nf | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf
index 37b14156..41310553 100644
--- a/modules/local/concat_h5ad.nf
+++ b/modules/local/concat_h5ad.nf
@@ -1,4 +1,6 @@
 process CONCAT_H5AD {
+    tag "${meta.id}"
+
     label 'process_medium'
 
     conda "conda-forge::scanpy==1.10.2 conda-forge::python-igraph conda-forge::leidenalg"

From d0ad7f6a90056903f2b7ee473232dcfc202523ff Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 13 Sep 2024 12:59:19 +0200
Subject: [PATCH 19/66] module is only to convert to rds

---
 modules/local/anndatar_convert.nf          | 4 +---
 modules/local/templates/anndatar_convert.R | 5 -----
 subworkflows/local/mtx_conversion.nf       | 2 +-
 3 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/modules/local/anndatar_convert.nf b/modules/local/anndatar_convert.nf
index 3f985b64..dfe5fce9 100644
--- a/modules/local/anndatar_convert.nf
+++ b/modules/local/anndatar_convert.nf
@@ -3,13 +3,12 @@ process ANNDATAR_CONVERT {
 
     label 'process_medium'
 
-    container "fmalmeida/anndatar:dev"
+    container "fmalmeida/anndatar:dev" // TODO: Fix
 
     input:
     tuple val(meta), path(h5ad)
 
     output:
-    tuple val(meta), path("${meta.id}_standardized.h5ad"), emit: h5ad
     tuple val(meta), path("${meta.id}_standardized.Rds"), emit: rds
 
     when:
@@ -20,7 +19,6 @@ process ANNDATAR_CONVERT {
 
     stub:
     """
-    touch ${meta.id}_standardized.h5ad
     touch ${meta.id}_standardized.Rds
     """
 }
diff --git a/modules/local/templates/anndatar_convert.R b/modules/local/templates/anndatar_convert.R
index 016f8bb4..479ac912 100755
--- a/modules/local/templates/anndatar_convert.R
+++ b/modules/local/templates/anndatar_convert.R
@@ -8,13 +8,8 @@ library(anndataR)
 # read input
 adata <- read_h5ad("${h5ad}")
 
-#
-# scope to perform standardization options
-#
-
 # convert to Rds
 obj <- adata\$to_Seurat()
 
 # save files
-write_h5ad(adata, "${meta.id}_standardized.h5ad")
 saveRDS(obj, file = "${meta.id}_standardized.Rds")
diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf
index 0178868f..4e85a1a7 100644
--- a/subworkflows/local/mtx_conversion.nf
+++ b/subworkflows/local/mtx_conversion.nf
@@ -24,7 +24,7 @@ workflow MTX_CONVERSION {
         //
         // Concat sample-specific h5ad in one
         //
-        ch_concat_h5ad_input = ANNDATAR_CONVERT.out.h5ad.groupTuple() // gather all sample-specific files / per type
+        ch_concat_h5ad_input = mtx_matrices.groupTuple() // gather all sample-specific files / per type
         if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') {
             // when having spliced / unspliced matrices, the collected tuple has two levels ( [[mtx_1, mtx_2]] )
             // which nextflow break because it is not a valid 'path' thus, we have to remove one level

From 20ac4ae8a5eea676428c28945a684b55444cd8f9 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 13 Sep 2024 13:37:19 +0200
Subject: [PATCH 20/66] update directives

---
 conf/modules.config | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/conf/modules.config b/conf/modules.config
index 81395a1d..1f83f2a6 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -45,12 +45,13 @@ process {
         }
     }
 
-    withName: 'MTX_TO_H5AD|CONCAT_H5AD|MTX_TO_SEURAT' {
+    withName: 'MTX_TO_H5AD*|CONCAT_H5AD|ANNDATAR_CONVERT' {
         publishDir = [
             path: { "${params.outdir}/${params.aligner}/mtx_conversions" },
             mode: params.publish_dir_mode
         ]
     }
+
     withName: 'GTF_GENE_FILTER' {
         publishDir = [
             path: { "${params.outdir}/gtf_filter" },

From da5b0363dcfd972b317056cd9eae1fd431926658 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 13 Sep 2024 13:39:05 +0200
Subject: [PATCH 21/66] update comments

---
 subworkflows/local/mtx_conversion.nf | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf
index 4e85a1a7..a2ff58c7 100644
--- a/subworkflows/local/mtx_conversion.nf
+++ b/subworkflows/local/mtx_conversion.nf
@@ -1,8 +1,6 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
-include { ANNDATAR_CONVERT      } from '../../modules/local/anndatar_convert'
-include { MTX_TO_H5AD           } from '../../modules/local/mtx_to_h5ad.nf'
 include { CONCAT_H5AD           } from '../../modules/local/concat_h5ad.nf'
-include { MTX_TO_SEURAT         } from '../../modules/local/mtx_to_seurat.nf'
+include { ANNDATAR_CONVERT      } from '../../modules/local/anndatar_convert'
 include { EMPTY_DROPLET_REMOVAL } from './emptydrops_removal'
 
 workflow MTX_CONVERSION {
@@ -15,7 +13,7 @@ workflow MTX_CONVERSION {
         ch_versions = Channel.empty()
 
         //
-        // MODULE: Standardize h5ad and convert with AnndataR package
+        // MODULE: Convert to Rds with AnndataR package
         //
         ANNDATAR_CONVERT (
             mtx_matrices

From b90f3884770bddcea033d9e59829c9179d7ff391 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 13 Sep 2024 14:28:05 +0200
Subject: [PATCH 22/66] add cellbender to workflow

---
 subworkflows/local/mtx_conversion.nf |  1 -
 workflows/scrnaseq.nf                | 31 +++++++++++++++++++++++-----
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf
index a2ff58c7..6435966d 100644
--- a/subworkflows/local/mtx_conversion.nf
+++ b/subworkflows/local/mtx_conversion.nf
@@ -1,7 +1,6 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
 include { CONCAT_H5AD           } from '../../modules/local/concat_h5ad.nf'
 include { ANNDATAR_CONVERT      } from '../../modules/local/anndatar_convert'
-include { EMPTY_DROPLET_REMOVAL } from './emptydrops_removal'
 
 workflow MTX_CONVERSION {
 
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index bc5eaef6..1e7ea9ef 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -3,11 +3,12 @@ include { FASTQC_CHECK                       } from '../subworkflows/local/fastq
 include { KALLISTO_BUSTOOLS                  } from '../subworkflows/local/kallisto_bustools'
 include { SCRNASEQ_ALEVIN                    } from '../subworkflows/local/alevin'
 include { STARSOLO                           } from '../subworkflows/local/starsolo'
-include { CELLRANGER_ALIGN                   } from "../subworkflows/local/align_cellranger"
-include { CELLRANGER_MULTI_ALIGN             } from "../subworkflows/local/align_cellrangermulti"
-include { CELLRANGERARC_ALIGN                } from "../subworkflows/local/align_cellrangerarc"
-include { UNIVERSC_ALIGN                     } from "../subworkflows/local/align_universc"
-include { MTX_CONVERSION                     } from "../subworkflows/local/mtx_conversion"
+include { CELLRANGER_ALIGN                   } from '../subworkflows/local/align_cellranger'
+include { CELLRANGER_MULTI_ALIGN             } from '../subworkflows/local/align_cellrangermulti'
+include { CELLRANGERARC_ALIGN                } from '../subworkflows/local/align_cellrangerarc'
+include { UNIVERSC_ALIGN                     } from '../subworkflows/local/align_universc'
+include { EMPTY_DROPLET_REMOVAL              } from '../subworkflows/local/emptydrops_removal'
+include { MTX_CONVERSION                     } from '../subworkflows/local/mtx_conversion'
 include { GTF_GENE_FILTER                    } from '../modules/local/gtf_gene_filter'
 include { GUNZIP as GUNZIP_FASTA             } from '../modules/nf-core/gunzip/main'
 include { GUNZIP as GUNZIP_GTF               } from '../modules/nf-core/gunzip/main'
@@ -286,6 +287,26 @@ workflow SCRNASEQ {
 
     }
 
+    // SUBWORKFLOW: Run cellbender emptydrops filter
+    if ( !params.skip_emptydrops && !(params.aligner in ['cellrangerarc']) ) {
+
+        //
+        // emptydrops should only run on the raw matrices thus, filter-out the filtered result of the aligners that can produce it
+        //
+        if ( params.aligner in [ 'cellranger', 'cellrangermulti', 'kallisto', 'star' ] ) {
+            ch_h5ad_matrices_for_emptydrops =
+                ch_h5ad_matrices.filter { meta, mtx_files -> meta.input_type == 'raw' }
+        } else {
+            ch_h5ad_matrices_for_emptydrops = ch_h5ad_matrices
+        }
+
+        EMPTY_DROPLET_REMOVAL (
+            ch_h5ad_matrices_for_emptydrops
+        )
+        // ch_h5ad_matrices = ch_h5ad_matrices.mix( EMPTYDROPS_CELL_CALLING.out.filtered_matrices )
+
+    }
+
     // Run mtx to h5ad conversion subworkflow
     MTX_CONVERSION (
         ch_h5ad_matrices,

From 7c304cc2c259709f5fd09921cafb630280fd9661 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 13 Sep 2024 14:41:58 +0200
Subject: [PATCH 23/66] start organisation of files

---
 conf/modules.config | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 1f83f2a6..8a53c285 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -32,15 +32,17 @@ process {
     }
 
     if (!params.skip_emptydrops) {
-        withName: EMPTYDROPS_CELL_CALLING {
+        withName: 'CELLBENDER_REMOVEBACKGROUND' {
             publishDir = [
-                path: { "${params.outdir}/${params.aligner}" },
-                mode: params.publish_dir_mode,
-                saveAs: { filename ->
-                    if ( params.aligner == 'cellranger' ) "count/${meta.id}/${filename}"
-                    else if ( params.aligner == 'kallisto' ) "${meta.id}.count/${filename}"
-                    else "${meta.id}/${filename}"
-                }
+                path: { "${params.outdir}/${params.aligner}/${meta.id}/emptydrops_filter" },
+                mode: params.publish_dir_mode
+            ]
+        }
+        withName: 'ADATA_BARCODES' {
+            ext.prefix = { "${meta.id}_custom_emptydrops_filter_matrix" }
+            publishDir = [
+                path: { "${params.outdir}/${params.aligner}/mtx_conversions/${meta.id}" },
+                mode: params.publish_dir_mode
             ]
         }
     }

From 83204317227513880fc330429658e256761b93df Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Thu, 26 Sep 2024 09:25:25 +0200
Subject: [PATCH 24/66] fix file naming

---
 modules/local/anndatar_convert.nf          | 4 ++--
 modules/local/templates/anndatar_convert.R | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/modules/local/anndatar_convert.nf b/modules/local/anndatar_convert.nf
index dfe5fce9..a3a67a08 100644
--- a/modules/local/anndatar_convert.nf
+++ b/modules/local/anndatar_convert.nf
@@ -9,7 +9,7 @@ process ANNDATAR_CONVERT {
     tuple val(meta), path(h5ad)
 
     output:
-    tuple val(meta), path("${meta.id}_standardized.Rds"), emit: rds
+    tuple val(meta), path("${meta.id}/${meta.id}_${meta.input_type}_matrix.Rds"), emit: rds
 
     when:
     task.ext.when == null || task.ext.when
@@ -19,6 +19,6 @@ process ANNDATAR_CONVERT {
 
     stub:
     """
-    touch ${meta.id}_standardized.Rds
+    touch ${meta.id}.Rds
     """
 }
diff --git a/modules/local/templates/anndatar_convert.R b/modules/local/templates/anndatar_convert.R
index 479ac912..06a723d7 100755
--- a/modules/local/templates/anndatar_convert.R
+++ b/modules/local/templates/anndatar_convert.R
@@ -12,4 +12,5 @@ adata <- read_h5ad("${h5ad}")
 obj <- adata\$to_Seurat()
 
 # save files
-saveRDS(obj, file = "${meta.id}_standardized.Rds")
+dir.create(file.path("$meta.id"), showWarnings = FALSE)
+saveRDS(obj, file = "${meta.id}/${meta.id}_${meta.input_type}_matrix.Rds")

From 1c95e858d822dde703b4e25106dd3cae1226fb89 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Thu, 26 Sep 2024 09:36:57 +0200
Subject: [PATCH 25/66] resolve emptydrops naming

---
 conf/modules.config                      |  2 +-
 subworkflows/local/emptydrops_removal.nf | 10 +++++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 8a53c285..ac26f731 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -39,7 +39,7 @@ process {
             ]
         }
         withName: 'ADATA_BARCODES' {
-            ext.prefix = { "${meta.id}_custom_emptydrops_filter_matrix" }
+            ext.prefix = { "${meta.id}_${meta.input_type}_matrix" }
             publishDir = [
                 path: { "${params.outdir}/${params.aligner}/mtx_conversions/${meta.id}" },
                 mode: params.publish_dir_mode
diff --git a/subworkflows/local/emptydrops_removal.nf b/subworkflows/local/emptydrops_removal.nf
index 202612d6..7171c3f3 100644
--- a/subworkflows/local/emptydrops_removal.nf
+++ b/subworkflows/local/emptydrops_removal.nf
@@ -11,7 +11,15 @@ workflow EMPTY_DROPLET_REMOVAL {
     CELLBENDER_REMOVEBACKGROUND(ch_unfiltered)
     ch_versions = ch_versions.mix(CELLBENDER_REMOVEBACKGROUND.out.versions)
 
-    ch_combined = ch_unfiltered.join(CELLBENDER_REMOVEBACKGROUND.out.barcodes)
+    ch_combined =
+    ch_unfiltered
+        .join(CELLBENDER_REMOVEBACKGROUND.out.barcodes)
+        .map { meta, h5ad, csv ->
+            def meta_clone = meta.clone()
+            meta_clone.input_type = 'emptydrops_filter'
+
+            [ meta_clone, h5ad, csv ]
+        }
 
     ADATA_BARCODES(ch_combined)
     ch_versions = ch_versions.mix(ADATA_BARCODES.out.versions)

From e6fff346698a321e8305d0ae6f78e58adcfd4b41 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Thu, 26 Sep 2024 09:38:17 +0200
Subject: [PATCH 26/66] also convert emptydrops filter matrices

---
 workflows/scrnaseq.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 1e7ea9ef..a0e7c958 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -303,7 +303,7 @@ workflow SCRNASEQ {
         EMPTY_DROPLET_REMOVAL (
             ch_h5ad_matrices_for_emptydrops
         )
-        // ch_h5ad_matrices = ch_h5ad_matrices.mix( EMPTYDROPS_CELL_CALLING.out.filtered_matrices )
+        ch_h5ad_matrices = ch_h5ad_matrices.mix( EMPTY_DROPLET_REMOVAL.out.h5ad )
 
     }
 

From e9c88e89891a9858bed3fea721432c56d6ea6301 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Thu, 26 Sep 2024 09:40:03 +0200
Subject: [PATCH 27/66] move files to BKP since they will be replaced

---
 modules/local/{ => BKP}/emptydrops.nf    | 0
 modules/local/{ => BKP}/mtx_to_h5ad.nf   | 0
 modules/local/{ => BKP}/mtx_to_seurat.nf | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename modules/local/{ => BKP}/emptydrops.nf (100%)
 rename modules/local/{ => BKP}/mtx_to_h5ad.nf (100%)
 rename modules/local/{ => BKP}/mtx_to_seurat.nf (100%)

diff --git a/modules/local/emptydrops.nf b/modules/local/BKP/emptydrops.nf
similarity index 100%
rename from modules/local/emptydrops.nf
rename to modules/local/BKP/emptydrops.nf
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/BKP/mtx_to_h5ad.nf
similarity index 100%
rename from modules/local/mtx_to_h5ad.nf
rename to modules/local/BKP/mtx_to_h5ad.nf
diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/BKP/mtx_to_seurat.nf
similarity index 100%
rename from modules/local/mtx_to_seurat.nf
rename to modules/local/BKP/mtx_to_seurat.nf

From f7f5fa520bb016f99de2bf8e0872aff7bf2eb902 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Thu, 26 Sep 2024 12:16:11 +0200
Subject: [PATCH 28/66] add conversion for alevin

---
 modules/local/{ => alevin}/alevinqc.nf        |  0
 modules/local/alevin/mtx_to_h5ad.nf           | 34 +++++++
 modules/local/{ => alevin}/simpleaf_index.nf  |  0
 modules/local/{ => alevin}/simpleaf_quant.nf  |  0
 modules/local/alevin/templates/mtx_to_h5ad.py | 96 +++++++++++++++++++
 subworkflows/local/alevin.nf                  | 25 +++--
 workflows/scrnaseq.nf                         |  2 +-
 7 files changed, 148 insertions(+), 9 deletions(-)
 rename modules/local/{ => alevin}/alevinqc.nf (100%)
 create mode 100644 modules/local/alevin/mtx_to_h5ad.nf
 rename modules/local/{ => alevin}/simpleaf_index.nf (100%)
 rename modules/local/{ => alevin}/simpleaf_quant.nf (100%)
 create mode 100755 modules/local/alevin/templates/mtx_to_h5ad.py

diff --git a/modules/local/alevinqc.nf b/modules/local/alevin/alevinqc.nf
similarity index 100%
rename from modules/local/alevinqc.nf
rename to modules/local/alevin/alevinqc.nf
diff --git a/modules/local/alevin/mtx_to_h5ad.nf b/modules/local/alevin/mtx_to_h5ad.nf
new file mode 100644
index 00000000..0c4fc844
--- /dev/null
+++ b/modules/local/alevin/mtx_to_h5ad.nf
@@ -0,0 +1,34 @@
+process MTX_TO_H5AD {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "conda-forge::scanpy==1.10.2 conda-forge::python-igraph conda-forge::leidenalg"
+    container "community.wave.seqera.io/library/scanpy:1.10.2--e83da2205b92a538"
+
+    input:
+    tuple val(meta), path(inputs)
+    path star_index
+
+    output:
+    tuple val(meta2), path("${meta.id}/*h5ad"), emit: h5ad
+    path  "versions.yml"                      , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    // Get a file to check input type. Some aligners bring arrays instead of a single file.
+    def input_to_check = (inputs instanceof String) ? inputs : inputs[0]
+
+    // alevin has a single output
+    meta2 = meta + [input_type: 'raw']
+
+    template 'mtx_to_h5ad.py'
+
+    stub:
+    """
+    mkdir ${meta.id}
+    touch ${meta.id}/${meta.id}_matrix.h5ad
+    touch versions.yml
+    """
+}
diff --git a/modules/local/simpleaf_index.nf b/modules/local/alevin/simpleaf_index.nf
similarity index 100%
rename from modules/local/simpleaf_index.nf
rename to modules/local/alevin/simpleaf_index.nf
diff --git a/modules/local/simpleaf_quant.nf b/modules/local/alevin/simpleaf_quant.nf
similarity index 100%
rename from modules/local/simpleaf_quant.nf
rename to modules/local/alevin/simpleaf_quant.nf
diff --git a/modules/local/alevin/templates/mtx_to_h5ad.py b/modules/local/alevin/templates/mtx_to_h5ad.py
new file mode 100755
index 00000000..fb784fde
--- /dev/null
+++ b/modules/local/alevin/templates/mtx_to_h5ad.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+
+# Set numba chache dir to current working directory (which is a writable mount also in containers)
+import os
+
+os.environ["NUMBA_CACHE_DIR"] = "."
+
+import scanpy as sc
+import pandas as pd
+import argparse
+from anndata import AnnData
+import platform
+
+def _mtx_to_adata(
+    input: str,
+    sample: str,
+):
+
+    adata = sc.read_mtx(f"{input}/quants_mat.mtx")
+    adata.obs_names = pd.read_csv(f"{input}/quants_mat_rows.txt", header=None, sep="\t")[0].values
+    adata.var_names = pd.read_csv(f"{input}/quants_mat_cols.txt", header=None, sep="\t")[0].values
+    adata.obs["sample"] = sample
+
+    return adata
+
+
+def format_yaml_like(data: dict, indent: int = 0) -> str:
+    """Formats a dictionary to a YAML-like string.
+    Args:
+        data (dict): The dictionary to format.
+        indent (int): The current indentation level.
+    Returns:
+        str: A string formatted as YAML.
+    """
+    yaml_str = ""
+    for key, value in data.items():
+        spaces = "  " * indent
+        if isinstance(value, dict):
+            yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
+        else:
+            yaml_str += f"{spaces}{key}: {value}\\n"
+    return yaml_str
+
+def dump_versions():
+    versions = {
+        "${task.process}": {
+            "python": platform.python_version(),
+            "scanpy": sc.__version__,
+            "pandas": pd.__version__
+        }
+    }
+
+    with open("versions.yml", "w") as f:
+        f.write(format_yaml_like(versions))
+
+def input_to_adata(
+    input_data: str,
+    output: str,
+    sample: str,
+):
+    print(f"Reading in {input_data}")
+
+    # open main data
+    adata = _mtx_to_adata(input_data, sample)
+
+    # standard format
+    # index are gene IDs and symbols are a column
+    # TODO: how to get gene_symbols for alevin?
+    adata.var['gene_versions'] = adata.var.index
+    adata.var['gene_ids'] = adata.var['gene_versions'].str.split('.').str[0]
+    adata.var.index = adata.var["gene_ids"].values
+    adata.var = adata.var.drop("gene_ids", axis=1)
+    adata.var_names_make_unique()
+
+    # write results
+    adata.write_h5ad(f"{output}", compression="gzip")
+    print(f"Wrote h5ad file to {output}")
+
+    # dump versions
+    dump_versions()
+
+    return adata
+
+#
+# Run main script
+#
+
+# create the directory with the sample name
+os.makedirs("${meta2.id}", exist_ok=True)
+
+# input_type comes from NF module
+adata = input_to_adata(
+    input_data="${meta2.id}_alevin_results/af_quant/alevin/",
+    output="${meta2.id}/${meta2.id}_${meta2.input_type}_matrix.h5ad",
+    sample="${meta2.id}"
+)
diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf
index 764c08f8..d8848dc4 100644
--- a/subworkflows/local/alevin.nf
+++ b/subworkflows/local/alevin.nf
@@ -1,11 +1,12 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
-include { GFFREAD_TRANSCRIPTOME }             from '../../modules/local/gffread_transcriptome'
-include { ALEVINQC              }             from '../../modules/local/alevinqc'
-include { SIMPLEAF_INDEX        }             from '../../modules/local/simpleaf_index'
-include { SIMPLEAF_QUANT        }             from '../../modules/local/simpleaf_quant'
+include { GFFREAD_TRANSCRIPTOME } from '../../modules/local/gffread_transcriptome'
+include { ALEVINQC              } from '../../modules/local/alevin/alevinqc'
+include { SIMPLEAF_INDEX        } from '../../modules/local/alevin/simpleaf_index'
+include { SIMPLEAF_QUANT        } from '../../modules/local/alevin/simpleaf_quant'
+include { MTX_TO_H5AD           } from '../../modules/local/alevin/mtx_to_h5ad'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
-include { GUNZIP }                      from '../../modules/nf-core/gunzip/main'
+include { GUNZIP                      } from '../../modules/nf-core/gunzip/main'
 include { GFFREAD as GFFREAD_TXP2GENE } from '../../modules/nf-core/gffread/main'
 
 def multiqc_report    = []
@@ -44,8 +45,6 @@ workflow SCRNASEQ_ALEVIN {
         }
     }
 
-
-
     /*
     * Perform quantification with salmon alevin
     */
@@ -58,6 +57,15 @@ workflow SCRNASEQ_ALEVIN {
     )
     ch_versions = ch_versions.mix(SIMPLEAF_QUANT.out.versions)
 
+    /*
+    * Perform h5ad conversion
+    */
+    MTX_TO_H5AD (
+        SIMPLEAF_QUANT.out.alevin_results,
+        []
+    )
+    ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions.first())
+
     /*
     * Run alevinQC
     */
@@ -67,5 +75,6 @@ workflow SCRNASEQ_ALEVIN {
     emit:
     ch_versions
     alevin_results = SIMPLEAF_QUANT.out.alevin_results
-    alevinqc = ALEVINQC.out.report
+    alevin_h5ad    = MTX_TO_H5AD.out.h5ad
+    alevinqc       = ALEVINQC.out.report
 }
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index a0e7c958..c4a63d7d 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -155,7 +155,7 @@ workflow SCRNASEQ {
         )
         ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions)
         ch_multiqc_files = ch_multiqc_files.mix(SCRNASEQ_ALEVIN.out.alevin_results.map{ meta, it -> it })
-        ch_h5ad_matrices = ch_h5ad_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_results)
+        ch_h5ad_matrices = ch_h5ad_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_h5ad)
     }
 
     // Run STARSolo pipeline

From 73c91c27e0e6798f46d44ce05fdf5155821b68a8 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Thu, 26 Sep 2024 12:28:43 +0200
Subject: [PATCH 29/66] re-organise star modules

---
 .../local/{mtx_to_h5ad_star.nf => star/mtx_to_h5ad.nf}    | 8 ++++----
 modules/local/{ => star}/star_align.nf                    | 0
 .../mtx_to_h5ad_star.py => star/templates/mtx_to_h5ad.py} | 8 ++++----
 subworkflows/local/starsolo.nf                            | 4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)
 rename modules/local/{mtx_to_h5ad_star.nf => star/mtx_to_h5ad.nf} (80%)
 rename modules/local/{ => star}/star_align.nf (100%)
 rename modules/local/{templates/mtx_to_h5ad_star.py => star/templates/mtx_to_h5ad.py} (92%)

diff --git a/modules/local/mtx_to_h5ad_star.nf b/modules/local/star/mtx_to_h5ad.nf
similarity index 80%
rename from modules/local/mtx_to_h5ad_star.nf
rename to modules/local/star/mtx_to_h5ad.nf
index 84474ed0..8e77749d 100644
--- a/modules/local/mtx_to_h5ad_star.nf
+++ b/modules/local/star/mtx_to_h5ad.nf
@@ -1,4 +1,4 @@
-process MTX_TO_H5AD_STAR {
+process MTX_TO_H5AD {
     tag "$meta.id"
     label 'process_medium'
 
@@ -21,10 +21,10 @@ process MTX_TO_H5AD_STAR {
     def input_to_check = (inputs instanceof String) ? inputs : inputs[0]
 
     // check input type of inputs
-    input_type   = (input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered'
-    meta2        = meta + [input_type: input_type]
+    input_type = (input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered'
+    meta2      = meta + [input_type: input_type]
 
-    template 'mtx_to_h5ad_star.py'
+    template 'mtx_to_h5ad.py'
 
     stub:
     """
diff --git a/modules/local/star_align.nf b/modules/local/star/star_align.nf
similarity index 100%
rename from modules/local/star_align.nf
rename to modules/local/star/star_align.nf
diff --git a/modules/local/templates/mtx_to_h5ad_star.py b/modules/local/star/templates/mtx_to_h5ad.py
similarity index 92%
rename from modules/local/templates/mtx_to_h5ad_star.py
rename to modules/local/star/templates/mtx_to_h5ad.py
index 48749114..3a84295d 100755
--- a/modules/local/templates/mtx_to_h5ad_star.py
+++ b/modules/local/star/templates/mtx_to_h5ad.py
@@ -82,11 +82,11 @@ def input_to_adata(
 #
 
 # create the directory with the sample name
-os.makedirs("${meta.id}", exist_ok=True)
+os.makedirs("${meta2.id}", exist_ok=True)
 
 # input_type comes from NF module
 adata = input_to_adata(
-    input_data="${input_type}",
-    output="${meta.id}/${meta.id}_${input_type}_matrix.h5ad",
-    sample="${meta.id}"
+    input_data="${meta2.input_type}",
+    output="${meta2.id}/${meta2.id}_${meta2.input_type}_matrix.h5ad",
+    sample="${meta2.id}"
 )
diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf
index 8236632d..3499f707 100644
--- a/subworkflows/local/starsolo.nf
+++ b/subworkflows/local/starsolo.nf
@@ -1,6 +1,6 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
-include { STAR_ALIGN }                      from '../../modules/local/star_align'
-include { MTX_TO_H5AD_STAR as MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad_star'
+include { STAR_ALIGN  } from '../../modules/local/star/star_align'
+include { MTX_TO_H5AD } from '../../modules/local/star/mtx_to_h5ad'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP }                      from '../../modules/nf-core/gunzip/main'

From 417bf698f44419f7ac23a806608738f8ba822de9 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 27 Sep 2024 09:52:37 +0200
Subject: [PATCH 30/66] fixed publishDir directives

---
 conf/modules.config | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index ac26f731..a6fc3411 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -35,22 +35,25 @@ process {
         withName: 'CELLBENDER_REMOVEBACKGROUND' {
             publishDir = [
                 path: { "${params.outdir}/${params.aligner}/${meta.id}/emptydrops_filter" },
-                mode: params.publish_dir_mode
+                mode: params.publish_dir_mode,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]
         }
         withName: 'ADATA_BARCODES' {
             ext.prefix = { "${meta.id}_${meta.input_type}_matrix" }
             publishDir = [
                 path: { "${params.outdir}/${params.aligner}/mtx_conversions/${meta.id}" },
-                mode: params.publish_dir_mode
+                mode: params.publish_dir_mode,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]
         }
     }
 
-    withName: 'MTX_TO_H5AD*|CONCAT_H5AD|ANNDATAR_CONVERT' {
+    withName: 'MTX_TO_H5AD|CONCAT_H5AD|ANNDATAR_CONVERT' {
         publishDir = [
             path: { "${params.outdir}/${params.aligner}/mtx_conversions" },
-            mode: params.publish_dir_mode
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
     }
 
@@ -164,8 +167,9 @@ if (params.aligner == "alevin") {
         }
         withName: 'SIMPLEAF_QUANT' {
             publishDir = [
-                path: { "${params.outdir}/${params.aligner}" },
-                mode: params.publish_dir_mode
+                path: { "${params.outdir}/${params.aligner}/${meta.id}" },
+                mode: params.publish_dir_mode,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]
             ext.args = "-r cr-like"
         }

From 94bf9c11ed6909d6615f059e3aebcc2ac12d4566 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Mon, 30 Sep 2024 13:47:53 +0200
Subject: [PATCH 31/66] add h5ad conversion module

---
 modules/local/mtx_to_h5ad.nf                  |  35 ++++++
 .../local/templates/mtx_to_h5ad_cellranger.py | 101 ++++++++++++++++++
 subworkflows/local/align_cellranger.nf        |  21 +++-
 3 files changed, 152 insertions(+), 5 deletions(-)
 create mode 100644 modules/local/mtx_to_h5ad.nf
 create mode 100755 modules/local/templates/mtx_to_h5ad_cellranger.py

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
new file mode 100644
index 00000000..cb3a0a4f
--- /dev/null
+++ b/modules/local/mtx_to_h5ad.nf
@@ -0,0 +1,35 @@
+process MTX_TO_H5AD {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' :
+        'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }"
+
+    input:
+    // inputs from cellranger nf-core module does not come in a single sample dir
+    // for each sample, the sub-folders and files come directly in array.
+    tuple val(meta), path(inputs)
+    path txp2gene
+    path star_index
+
+    output:
+    tuple val(input_type), path("${meta.id}/*h5ad") , emit: h5ad
+    path  "versions.yml"                            , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def aligner = (params.aligner in [ 'cellranger', 'cellrangerarc', 'cellrangermulti' ]) ? 'cellranger' : params.aligner
+
+    template "mtx_to_h5ad_${aligner}.py"
+
+    stub:
+    """
+    mkdir ${meta.id}
+    touch ${meta.id}/${meta.id}_matrix.h5ad
+    touch versions.yml
+    """
+}
diff --git a/modules/local/templates/mtx_to_h5ad_cellranger.py b/modules/local/templates/mtx_to_h5ad_cellranger.py
new file mode 100755
index 00000000..a91fe0cf
--- /dev/null
+++ b/modules/local/templates/mtx_to_h5ad_cellranger.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+
+# Set numba chache dir to current working directory (which is a writable mount also in containers)
+import os
+
+os.environ["NUMBA_CACHE_DIR"] = "."
+
+import scanpy as sc
+import pandas as pd
+import argparse
+from anndata import AnnData
+import platform
+
+def _mtx_to_adata(
+    input: str,
+    sample: str,
+):
+
+    adata = sc.read_10x_h5(input)
+    adata.var["gene_symbols"] = adata.var_names
+    adata.var.set_index("gene_ids", inplace=True)
+    adata.obs["sample"] = sample
+
+    # reorder columns for 10x mtx files
+    adata.var = adata.var[["gene_symbols", "feature_types", "genome"]]
+
+    return adata
+
+
+def format_yaml_like(data: dict, indent: int = 0) -> str:
+    """Formats a dictionary to a YAML-like string.
+    Args:
+        data (dict): The dictionary to format.
+        indent (int): The current indentation level.
+    Returns:
+        str: A string formatted as YAML.
+    """
+    yaml_str = ""
+    for key, value in data.items():
+        spaces = "  " * indent
+        if isinstance(value, dict):
+            yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
+        else:
+            yaml_str += f"{spaces}{key}: {value}\\n"
+    return yaml_str
+
+def dump_versions():
+    versions = {
+        "${task.process}": {
+            "python": platform.python_version(),
+            "scanpy": sc.__version__,
+            "pandas": pd.__version__
+        }
+    }
+
+    with open("versions.yml", "w") as f:
+        f.write(format_yaml_like(versions))
+
+def input_to_adata(
+    input_data: str,
+    output: str,
+    sample: str,
+):
+    print(f"Reading in {input_data}")
+
+    # open main data
+    adata = _mtx_to_adata(input_data, sample)
+
+    # standard format
+    # index are gene IDs and symbols are a column
+    adata.var['gene_versions'] = adata.var.index
+    adata.var['gene_ids'] = adata.var['gene_versions'].str.split('.').str[0]
+    adata.var.index = adata.var["gene_ids"].values
+    adata.var = adata.var.drop("gene_ids", axis=1)
+    adata.var_names_make_unique()
+
+    print(adata)
+    print(adata.var)
+
+    # write results
+    adata.write_h5ad(f"{output}", compression="gzip")
+    print(f"Wrote h5ad file to {output}")
+
+    # dump versions
+    dump_versions()
+
+    return adata
+
+#
+# Run main script
+#
+
+# create the directory with the sample name
+os.makedirs("${meta.id}", exist_ok=True)
+
+# input_type comes from NF module
+adata = input_to_adata(
+    input_data="${meta.input_type}_feature_bc_matrix.h5",
+    output="${meta.id}/${meta.id}_${meta.input_type}_matrix.h5ad",
+    sample="${meta.id}"
+)
diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index 2461373b..413efc02 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -2,9 +2,10 @@
  * Alignment with Cellranger
  */
 
-include {CELLRANGER_MKGTF} from "../../modules/nf-core/cellranger/mkgtf/main.nf"
-include {CELLRANGER_MKREF} from "../../modules/nf-core/cellranger/mkref/main.nf"
-include {CELLRANGER_COUNT} from "../../modules/nf-core/cellranger/count/main.nf"
+include { CELLRANGER_MKGTF } from "../../modules/nf-core/cellranger/mkgtf/main.nf"
+include { CELLRANGER_MKREF } from "../../modules/nf-core/cellranger/mkref/main.nf"
+include { CELLRANGER_COUNT } from "../../modules/nf-core/cellranger/count/main.nf"
+include { MTX_TO_H5AD      } from '../../modules/local/mtx_to_h5ad'
 
 // Define workflow to subset and index a genome region fasta file
 workflow CELLRANGER_ALIGN {
@@ -49,7 +50,7 @@ workflow CELLRANGER_ALIGN {
             mtx_files.each{
                 if ( it.toString().contains("raw_feature_bc_matrix") ) { desired_files.add( it ) }
             }
-            [ meta, desired_files ]
+            [ meta + [input_type: 'raw'], desired_files ]
         }
 
         ch_matrices_filtered =
@@ -58,9 +59,19 @@ workflow CELLRANGER_ALIGN {
             mtx_files.each{
                 if ( it.toString().contains("filtered_feature_bc_matrix") ) { desired_files.add( it ) }
             }
-            [ meta, desired_files ]
+            [ meta + [input_type: 'filtered'], desired_files ]
         }
 
+        /*
+        * Perform h5ad conversion
+        */
+        MTX_TO_H5AD (
+            ch_matrices_raw.mix( ch_matrices_filtered ),
+            [],
+            []
+        )
+        ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions.first())
+
     emit:
         ch_versions
         cellranger_out      = CELLRANGER_COUNT.out.outs

From 43a29c22e632dcdbfbe452debd836b750efcb54b Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Mon, 30 Sep 2024 14:32:39 +0200
Subject: [PATCH 32/66] integrate to mtx_conversion module

currently receiving the error: 'H5AD encoding information is missing. This file may have been created with Python anndata<0.8.0.'
---
 modules/local/anndatar_convert.nf                 | 2 +-
 modules/local/mtx_to_h5ad.nf                      | 4 ++--
 modules/local/templates/mtx_to_h5ad_cellranger.py | 3 ---
 subworkflows/local/align_cellranger.nf            | 1 +
 workflows/scrnaseq.nf                             | 2 +-
 5 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/modules/local/anndatar_convert.nf b/modules/local/anndatar_convert.nf
index a3a67a08..d6a8271d 100644
--- a/modules/local/anndatar_convert.nf
+++ b/modules/local/anndatar_convert.nf
@@ -3,7 +3,7 @@ process ANNDATAR_CONVERT {
 
     label 'process_medium'
 
-    container "fmalmeida/anndatar:dev" // TODO: Fix
+    container "docker://fmalmeida/anndatar:dev" // TODO: Fix
 
     input:
     tuple val(meta), path(h5ad)
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index cb3a0a4f..f708162d 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -15,8 +15,8 @@ process MTX_TO_H5AD {
     path star_index
 
     output:
-    tuple val(input_type), path("${meta.id}/*h5ad") , emit: h5ad
-    path  "versions.yml"                            , emit: versions
+    tuple val(meta), path("${meta.id}/*h5ad"), emit: h5ad
+    path  "versions.yml"                     , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
diff --git a/modules/local/templates/mtx_to_h5ad_cellranger.py b/modules/local/templates/mtx_to_h5ad_cellranger.py
index a91fe0cf..294a15e7 100755
--- a/modules/local/templates/mtx_to_h5ad_cellranger.py
+++ b/modules/local/templates/mtx_to_h5ad_cellranger.py
@@ -74,9 +74,6 @@ def input_to_adata(
     adata.var = adata.var.drop("gene_ids", axis=1)
     adata.var_names_make_unique()
 
-    print(adata)
-    print(adata.var)
-
     # write results
     adata.write_h5ad(f"{output}", compression="gzip")
     print(f"Wrote h5ad file to {output}")
diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index 413efc02..09442313 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -76,5 +76,6 @@ workflow CELLRANGER_ALIGN {
         ch_versions
         cellranger_out      = CELLRANGER_COUNT.out.outs
         cellranger_matrices = ch_matrices_raw.mix( ch_matrices_filtered )
+        cellranger_h5ad     = MTX_TO_H5AD.out.h5ad
         star_index          = cellranger_index
 }
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index c4a63d7d..4b6323ee 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -185,7 +185,7 @@ workflow SCRNASEQ {
             protocol_config['protocol']
         )
         ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions)
-        ch_star_index = CELLRANGER_ALIGN.out.star_index
+        ch_h5ad_matrices = ch_h5ad_matrices.mix(CELLRANGER_ALIGN.out.cellranger_h5ad)
         ch_multiqc_files = ch_multiqc_files.mix(CELLRANGER_ALIGN.out.cellranger_out.map{
             meta, outs -> outs.findAll{ it -> it.name == "web_summary.html"}
         })

From 7fc32f0a7bd6b701739156bd16c15fa6394a37ab Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Mon, 30 Sep 2024 14:48:11 +0200
Subject: [PATCH 33/66] reorganize module levels and fix docker image

---
 modules/local/alevin/mtx_to_h5ad.nf           | 34 -------------------
 modules/local/{alevin => }/alevinqc.nf        |  0
 modules/local/mtx_to_h5ad.nf                  |  6 ++--
 modules/local/{alevin => }/simpleaf_index.nf  |  0
 modules/local/{alevin => }/simpleaf_quant.nf  |  0
 .../mtx_to_h5ad_alevin.py}                    |  8 ++---
 subworkflows/local/alevin.nf                  | 11 +++---
 7 files changed, 12 insertions(+), 47 deletions(-)
 delete mode 100644 modules/local/alevin/mtx_to_h5ad.nf
 rename modules/local/{alevin => }/alevinqc.nf (100%)
 rename modules/local/{alevin => }/simpleaf_index.nf (100%)
 rename modules/local/{alevin => }/simpleaf_quant.nf (100%)
 rename modules/local/{alevin/templates/mtx_to_h5ad.py => templates/mtx_to_h5ad_alevin.py} (92%)

diff --git a/modules/local/alevin/mtx_to_h5ad.nf b/modules/local/alevin/mtx_to_h5ad.nf
deleted file mode 100644
index 0c4fc844..00000000
--- a/modules/local/alevin/mtx_to_h5ad.nf
+++ /dev/null
@@ -1,34 +0,0 @@
-process MTX_TO_H5AD {
-    tag "$meta.id"
-    label 'process_medium'
-
-    conda "conda-forge::scanpy==1.10.2 conda-forge::python-igraph conda-forge::leidenalg"
-    container "community.wave.seqera.io/library/scanpy:1.10.2--e83da2205b92a538"
-
-    input:
-    tuple val(meta), path(inputs)
-    path star_index
-
-    output:
-    tuple val(meta2), path("${meta.id}/*h5ad"), emit: h5ad
-    path  "versions.yml"                      , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    // Get a file to check input type. Some aligners bring arrays instead of a single file.
-    def input_to_check = (inputs instanceof String) ? inputs : inputs[0]
-
-    // alevin has a single output
-    meta2 = meta + [input_type: 'raw']
-
-    template 'mtx_to_h5ad.py'
-
-    stub:
-    """
-    mkdir ${meta.id}
-    touch ${meta.id}/${meta.id}_matrix.h5ad
-    touch versions.yml
-    """
-}
diff --git a/modules/local/alevin/alevinqc.nf b/modules/local/alevinqc.nf
similarity index 100%
rename from modules/local/alevin/alevinqc.nf
rename to modules/local/alevinqc.nf
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index f708162d..751cf9a5 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -2,10 +2,8 @@ process MTX_TO_H5AD {
     tag "$meta.id"
     label 'process_medium'
 
-    conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' :
-        'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }"
+    conda "conda-forge::scanpy==1.10.2 conda-forge::python-igraph conda-forge::leidenalg"
+    container "community.wave.seqera.io/library/scanpy:1.10.2--e83da2205b92a538"
 
     input:
     // inputs from cellranger nf-core module does not come in a single sample dir
diff --git a/modules/local/alevin/simpleaf_index.nf b/modules/local/simpleaf_index.nf
similarity index 100%
rename from modules/local/alevin/simpleaf_index.nf
rename to modules/local/simpleaf_index.nf
diff --git a/modules/local/alevin/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf
similarity index 100%
rename from modules/local/alevin/simpleaf_quant.nf
rename to modules/local/simpleaf_quant.nf
diff --git a/modules/local/alevin/templates/mtx_to_h5ad.py b/modules/local/templates/mtx_to_h5ad_alevin.py
similarity index 92%
rename from modules/local/alevin/templates/mtx_to_h5ad.py
rename to modules/local/templates/mtx_to_h5ad_alevin.py
index fb784fde..fba5a34f 100755
--- a/modules/local/alevin/templates/mtx_to_h5ad.py
+++ b/modules/local/templates/mtx_to_h5ad_alevin.py
@@ -86,11 +86,11 @@ def input_to_adata(
 #
 
 # create the directory with the sample name
-os.makedirs("${meta2.id}", exist_ok=True)
+os.makedirs("${meta.id}", exist_ok=True)
 
 # input_type comes from NF module
 adata = input_to_adata(
-    input_data="${meta2.id}_alevin_results/af_quant/alevin/",
-    output="${meta2.id}/${meta2.id}_${meta2.input_type}_matrix.h5ad",
-    sample="${meta2.id}"
+    input_data="${meta.id}_alevin_results/af_quant/alevin/",
+    output="${meta.id}/${meta.id}_${meta.input_type}_matrix.h5ad",
+    sample="${meta.id}"
 )
diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf
index d8848dc4..0aa2483f 100644
--- a/subworkflows/local/alevin.nf
+++ b/subworkflows/local/alevin.nf
@@ -1,9 +1,9 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
 include { GFFREAD_TRANSCRIPTOME } from '../../modules/local/gffread_transcriptome'
-include { ALEVINQC              } from '../../modules/local/alevin/alevinqc'
-include { SIMPLEAF_INDEX        } from '../../modules/local/alevin/simpleaf_index'
-include { SIMPLEAF_QUANT        } from '../../modules/local/alevin/simpleaf_quant'
-include { MTX_TO_H5AD           } from '../../modules/local/alevin/mtx_to_h5ad'
+include { ALEVINQC              } from '../../modules/local/alevinqc'
+include { SIMPLEAF_INDEX        } from '../../modules/local/simpleaf_index'
+include { SIMPLEAF_QUANT        } from '../../modules/local/simpleaf_quant'
+include { MTX_TO_H5AD           } from '../../modules/local/mtx_to_h5ad'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP                      } from '../../modules/nf-core/gunzip/main'
@@ -61,7 +61,8 @@ workflow SCRNASEQ_ALEVIN {
     * Perform h5ad conversion
     */
     MTX_TO_H5AD (
-        SIMPLEAF_QUANT.out.alevin_results,
+        SIMPLEAF_QUANT.out.alevin_results.map{ meta, files -> [meta + [input_type: 'raw'], files] },
+        [],
         []
     )
     ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions.first())

From e36639681a00c8f959671cad2d85d2e9b19c5a48 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Mon, 30 Sep 2024 15:04:22 +0200
Subject: [PATCH 34/66] reorganize star modules

---
 modules/local/star/mtx_to_h5ad.nf             | 35 -------------------
 modules/local/{star => }/star_align.nf        |  0
 .../mtx_to_h5ad_star.py}                      |  8 ++---
 subworkflows/local/starsolo.nf                |  9 +++--
 4 files changed, 10 insertions(+), 42 deletions(-)
 delete mode 100644 modules/local/star/mtx_to_h5ad.nf
 rename modules/local/{star => }/star_align.nf (100%)
 rename modules/local/{star/templates/mtx_to_h5ad.py => templates/mtx_to_h5ad_star.py} (92%)

diff --git a/modules/local/star/mtx_to_h5ad.nf b/modules/local/star/mtx_to_h5ad.nf
deleted file mode 100644
index 8e77749d..00000000
--- a/modules/local/star/mtx_to_h5ad.nf
+++ /dev/null
@@ -1,35 +0,0 @@
-process MTX_TO_H5AD {
-    tag "$meta.id"
-    label 'process_medium'
-
-    conda "conda-forge::scanpy==1.10.2 conda-forge::python-igraph conda-forge::leidenalg"
-    container "community.wave.seqera.io/library/scanpy:1.10.2--e83da2205b92a538"
-
-    input:
-    tuple val(meta), path(inputs)
-    path star_index
-
-    output:
-    tuple val(meta2), path("${meta.id}/*h5ad"), emit: h5ad
-    path  "versions.yml"                      , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    // Get a file to check input type. Some aligners bring arrays instead of a single file.
-    def input_to_check = (inputs instanceof String) ? inputs : inputs[0]
-
-    // check input type of inputs
-    input_type = (input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered'
-    meta2      = meta + [input_type: input_type]
-
-    template 'mtx_to_h5ad.py'
-
-    stub:
-    """
-    mkdir ${meta.id}
-    touch ${meta.id}/${meta.id}_matrix.h5ad
-    touch versions.yml
-    """
-}
diff --git a/modules/local/star/star_align.nf b/modules/local/star_align.nf
similarity index 100%
rename from modules/local/star/star_align.nf
rename to modules/local/star_align.nf
diff --git a/modules/local/star/templates/mtx_to_h5ad.py b/modules/local/templates/mtx_to_h5ad_star.py
similarity index 92%
rename from modules/local/star/templates/mtx_to_h5ad.py
rename to modules/local/templates/mtx_to_h5ad_star.py
index 3a84295d..49a1837e 100755
--- a/modules/local/star/templates/mtx_to_h5ad.py
+++ b/modules/local/templates/mtx_to_h5ad_star.py
@@ -82,11 +82,11 @@ def input_to_adata(
 #
 
 # create the directory with the sample name
-os.makedirs("${meta2.id}", exist_ok=True)
+os.makedirs("${meta.id}", exist_ok=True)
 
 # input_type comes from NF module
 adata = input_to_adata(
-    input_data="${meta2.input_type}",
-    output="${meta2.id}/${meta2.id}_${meta2.input_type}_matrix.h5ad",
-    sample="${meta2.id}"
+    input_data="${meta.input_type}",
+    output="${meta.id}/${meta.id}_${meta.input_type}_matrix.h5ad",
+    sample="${meta.id}"
 )
diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf
index 3499f707..6b71dbdd 100644
--- a/subworkflows/local/starsolo.nf
+++ b/subworkflows/local/starsolo.nf
@@ -1,6 +1,6 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
-include { STAR_ALIGN  } from '../../modules/local/star/star_align'
-include { MTX_TO_H5AD } from '../../modules/local/star/mtx_to_h5ad'
+include { STAR_ALIGN  } from '../../modules/local/star_align'
+include { MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP }                      from '../../modules/nf-core/gunzip/main'
@@ -58,7 +58,10 @@ workflow STARSOLO {
     * Perform h5ad conversion
     */
     MTX_TO_H5AD (
-        STAR_ALIGN.out.raw_counts.mix( STAR_ALIGN.out.filtered_counts ),
+        STAR_ALIGN.out.raw_counts
+            .map{ meta, files -> [meta + [input_type: 'raw'], files] }
+            .mix( STAR_ALIGN.out.filtered_counts.map{ meta, files -> [meta + [input_type: 'filtered'], files] } ),
+        [],
         star_index.map{ meta, index -> index }
     )
     ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions.first())

From 76f126ebc0d804266bcaa49cce5ca71718e37080 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 4 Oct 2024 09:36:44 +0200
Subject: [PATCH 35/66] re-structure and include cellranger

---
 subworkflows/local/align_cellranger.nf | 12 -------
 subworkflows/local/mtx_conversion.nf   | 33 ++++++++++++++++++--
 workflows/scrnaseq.nf                  | 43 +++++++-------------------
 3 files changed, 43 insertions(+), 45 deletions(-)

diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index 09442313..86b69a38 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -5,7 +5,6 @@
 include { CELLRANGER_MKGTF } from "../../modules/nf-core/cellranger/mkgtf/main.nf"
 include { CELLRANGER_MKREF } from "../../modules/nf-core/cellranger/mkref/main.nf"
 include { CELLRANGER_COUNT } from "../../modules/nf-core/cellranger/count/main.nf"
-include { MTX_TO_H5AD      } from '../../modules/local/mtx_to_h5ad'
 
 // Define workflow to subset and index a genome region fasta file
 workflow CELLRANGER_ALIGN {
@@ -62,20 +61,9 @@ workflow CELLRANGER_ALIGN {
             [ meta + [input_type: 'filtered'], desired_files ]
         }
 
-        /*
-        * Perform h5ad conversion
-        */
-        MTX_TO_H5AD (
-            ch_matrices_raw.mix( ch_matrices_filtered ),
-            [],
-            []
-        )
-        ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions.first())
-
     emit:
         ch_versions
         cellranger_out      = CELLRANGER_COUNT.out.outs
         cellranger_matrices = ch_matrices_raw.mix( ch_matrices_filtered )
-        cellranger_h5ad     = MTX_TO_H5AD.out.h5ad
         star_index          = cellranger_index
 }
diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf
index 6435966d..3ecd1b36 100644
--- a/subworkflows/local/mtx_conversion.nf
+++ b/subworkflows/local/mtx_conversion.nf
@@ -1,27 +1,56 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
+include { MTX_TO_H5AD           } from '../../modules/local/mtx_to_h5ad'
 include { CONCAT_H5AD           } from '../../modules/local/concat_h5ad.nf'
 include { ANNDATAR_CONVERT      } from '../../modules/local/anndatar_convert'
+include { EMPTY_DROPLET_REMOVAL } from '../../subworkflows/local/emptydrops_removal'
 
 workflow MTX_CONVERSION {
 
     take:
     mtx_matrices
+    txp2gene
+    star_index
     samplesheet
 
     main:
         ch_versions = Channel.empty()
+        ch_h5ads    = Channel.empty()
+
+        //
+        // MODULE: Convert matrices to h5ad
+        //
+        MTX_TO_H5AD (
+            mtx_matrices,
+            txp2gene,
+            star_index
+        )
+        ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions.first())
+        ch_h5ads    = MTX_TO_H5AD.out.h5ad
+
+        //
+        // SUBWORKFLOW: Run cellbender emptydrops filter
+        //
+        if ( !params.skip_emptydrops && !(params.aligner in ['cellrangerarc']) ) {
+
+            // emptydrops should only run on the raw matrices thus, filter-out the filtered result of the aligners that can produce it
+            EMPTY_DROPLET_REMOVAL (
+                MTX_TO_H5AD.out.h5ad.filter { meta, mtx_files -> meta.input_type == 'raw' }
+            )
+            ch_h5ads = ch_h5ads.mix( EMPTY_DROPLET_REMOVAL.out.h5ad )
+
+        }
 
         //
         // MODULE: Convert to Rds with AnndataR package
         //
         ANNDATAR_CONVERT (
-            mtx_matrices
+            ch_h5ads
         )
 
         //
         // Concat sample-specific h5ad in one
         //
-        ch_concat_h5ad_input = mtx_matrices.groupTuple() // gather all sample-specific files / per type
+        ch_concat_h5ad_input = ch_h5ads.groupTuple() // gather all sample-specific files / per type
         if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') {
             // when having spliced / unspliced matrices, the collected tuple has two levels ( [[mtx_1, mtx_2]] )
             // which nextflow break because it is not a valid 'path' thus, we have to remove one level
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 4b6323ee..282a7ab9 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -7,7 +7,6 @@ include { CELLRANGER_ALIGN                   } from '../subworkflows/local/align
 include { CELLRANGER_MULTI_ALIGN             } from '../subworkflows/local/align_cellrangermulti'
 include { CELLRANGERARC_ALIGN                } from '../subworkflows/local/align_cellrangerarc'
 include { UNIVERSC_ALIGN                     } from '../subworkflows/local/align_universc'
-include { EMPTY_DROPLET_REMOVAL              } from '../subworkflows/local/emptydrops_removal'
 include { MTX_CONVERSION                     } from '../subworkflows/local/mtx_conversion'
 include { GTF_GENE_FILTER                    } from '../modules/local/gtf_gene_filter'
 include { GUNZIP as GUNZIP_FASTA             } from '../modules/nf-core/gunzip/main'
@@ -87,7 +86,7 @@ workflow SCRNASEQ {
     empty_file                        = file("$projectDir/assets/EMPTY", checkIfExists: true)
 
     ch_versions      = Channel.empty()
-    ch_h5ad_matrices = Channel.empty()
+    ch_mtx_matrices = Channel.empty()
 
     // Run FastQC
     if (!params.skip_fastqc) {
@@ -137,7 +136,7 @@ workflow SCRNASEQ {
             ch_fastq
         )
         ch_versions = ch_versions.mix(KALLISTO_BUSTOOLS.out.ch_versions)
-        ch_h5ad_matrices = ch_h5ad_matrices.mix(KALLISTO_BUSTOOLS.out.raw_counts, KALLISTO_BUSTOOLS.out.filtered_counts)
+        ch_mtx_matrices = ch_mtx_matrices.mix(KALLISTO_BUSTOOLS.out.raw_counts, KALLISTO_BUSTOOLS.out.filtered_counts)
         ch_txp2gene = KALLISTO_BUSTOOLS.out.txp2gene
     }
 
@@ -155,7 +154,7 @@ workflow SCRNASEQ {
         )
         ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions)
         ch_multiqc_files = ch_multiqc_files.mix(SCRNASEQ_ALEVIN.out.alevin_results.map{ meta, it -> it })
-        ch_h5ad_matrices = ch_h5ad_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_h5ad)
+        ch_mtx_matrices = ch_mtx_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_h5ad)
     }
 
     // Run STARSolo pipeline
@@ -172,7 +171,7 @@ workflow SCRNASEQ {
         )
         ch_versions = ch_versions.mix(STARSOLO.out.ch_versions)
         ch_multiqc_files = ch_multiqc_files.mix(STARSOLO.out.for_multiqc)
-        ch_h5ad_matrices = STARSOLO.out.star_h5ad
+        ch_mtx_matrices = STARSOLO.out.star_h5ad
     }
 
     // Run cellranger pipeline
@@ -185,8 +184,8 @@ workflow SCRNASEQ {
             protocol_config['protocol']
         )
         ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions)
-        ch_h5ad_matrices = ch_h5ad_matrices.mix(CELLRANGER_ALIGN.out.cellranger_h5ad)
-        ch_multiqc_files = ch_multiqc_files.mix(CELLRANGER_ALIGN.out.cellranger_out.map{
+        ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_matrices)
+        ch_multiqc_files = ch_multiqc_files.mix(CELLRANGER_ALIGN.out.cellranger_out.map {
             meta, outs -> outs.findAll{ it -> it.name == "web_summary.html"}
         })
     }
@@ -201,7 +200,7 @@ workflow SCRNASEQ {
             ch_fastq
         )
         ch_versions = ch_versions.mix(UNIVERSC_ALIGN.out.ch_versions)
-        ch_h5ad_matrices = ch_h5ad_matrices.mix(UNIVERSC_ALIGN.out.universc_out)
+        ch_mtx_matrices = ch_mtx_matrices.mix(UNIVERSC_ALIGN.out.universc_out)
     }
 
     // Run cellrangerarc pipeline
@@ -215,7 +214,7 @@ workflow SCRNASEQ {
             ch_cellrangerarc_config
         )
         ch_versions = ch_versions.mix(CELLRANGERARC_ALIGN.out.ch_versions)
-        ch_h5ad_matrices = ch_h5ad_matrices.mix(CELLRANGERARC_ALIGN.out.cellranger_arc_out)
+        ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGERARC_ALIGN.out.cellranger_arc_out)
     }
 
     // Run cellrangermulti pipeline
@@ -283,33 +282,15 @@ workflow SCRNASEQ {
         ch_multiqc_files = ch_multiqc_files.mix( CELLRANGER_MULTI_ALIGN.out.cellrangermulti_out.map{
             meta, outs -> outs.findAll{ it -> it.name == "web_summary.html" }
         })
-        ch_h5ad_matrices = ch_h5ad_matrices.mix(CELLRANGER_MULTI_ALIGN.out.cellrangermulti_mtx)
-
-    }
-
-    // SUBWORKFLOW: Run cellbender emptydrops filter
-    if ( !params.skip_emptydrops && !(params.aligner in ['cellrangerarc']) ) {
-
-        //
-        // emptydrops should only run on the raw matrices thus, filter-out the filtered result of the aligners that can produce it
-        //
-        if ( params.aligner in [ 'cellranger', 'cellrangermulti', 'kallisto', 'star' ] ) {
-            ch_h5ad_matrices_for_emptydrops =
-                ch_h5ad_matrices.filter { meta, mtx_files -> meta.input_type == 'raw' }
-        } else {
-            ch_h5ad_matrices_for_emptydrops = ch_h5ad_matrices
-        }
-
-        EMPTY_DROPLET_REMOVAL (
-            ch_h5ad_matrices_for_emptydrops
-        )
-        ch_h5ad_matrices = ch_h5ad_matrices.mix( EMPTY_DROPLET_REMOVAL.out.h5ad )
+        ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_MULTI_ALIGN.out.cellrangermulti_mtx)
 
     }
 
     // Run mtx to h5ad conversion subworkflow
     MTX_CONVERSION (
-        ch_h5ad_matrices,
+        ch_mtx_matrices,
+        ch_txp2gene,
+        ch_star_index,
         ch_input
     )
 

From b92af8b43e9716b9d1e90e29441f7a531170e564 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 4 Oct 2024 09:47:58 +0200
Subject: [PATCH 36/66] add alevin to new structure

---
 subworkflows/local/alevin.nf | 14 +-------------
 workflows/scrnaseq.nf        |  2 +-
 2 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf
index 0aa2483f..ae98cc85 100644
--- a/subworkflows/local/alevin.nf
+++ b/subworkflows/local/alevin.nf
@@ -3,7 +3,6 @@ include { GFFREAD_TRANSCRIPTOME } from '../../modules/local/gffread_transcriptom
 include { ALEVINQC              } from '../../modules/local/alevinqc'
 include { SIMPLEAF_INDEX        } from '../../modules/local/simpleaf_index'
 include { SIMPLEAF_QUANT        } from '../../modules/local/simpleaf_quant'
-include { MTX_TO_H5AD           } from '../../modules/local/mtx_to_h5ad'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP                      } from '../../modules/nf-core/gunzip/main'
@@ -57,16 +56,6 @@ workflow SCRNASEQ_ALEVIN {
     )
     ch_versions = ch_versions.mix(SIMPLEAF_QUANT.out.versions)
 
-    /*
-    * Perform h5ad conversion
-    */
-    MTX_TO_H5AD (
-        SIMPLEAF_QUANT.out.alevin_results.map{ meta, files -> [meta + [input_type: 'raw'], files] },
-        [],
-        []
-    )
-    ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions.first())
-
     /*
     * Run alevinQC
     */
@@ -75,7 +64,6 @@ workflow SCRNASEQ_ALEVIN {
 
     emit:
     ch_versions
-    alevin_results = SIMPLEAF_QUANT.out.alevin_results
-    alevin_h5ad    = MTX_TO_H5AD.out.h5ad
+    alevin_results = SIMPLEAF_QUANT.out.alevin_results.map{ meta, files -> [meta + [input_type: 'raw'], files] }
     alevinqc       = ALEVINQC.out.report
 }
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 282a7ab9..a55029ea 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -154,7 +154,7 @@ workflow SCRNASEQ {
         )
         ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions)
         ch_multiqc_files = ch_multiqc_files.mix(SCRNASEQ_ALEVIN.out.alevin_results.map{ meta, it -> it })
-        ch_mtx_matrices = ch_mtx_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_h5ad)
+        ch_mtx_matrices = ch_mtx_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_results)
     }
 
     // Run STARSolo pipeline

From c57b09fc98fe0b21c54234dcf3db729f1ab04f60 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 4 Oct 2024 10:29:37 +0200
Subject: [PATCH 37/66] add star to new structure

---
 subworkflows/local/starsolo.nf | 18 +++++-------------
 workflows/scrnaseq.nf          |  2 +-
 2 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf
index 6b71dbdd..d69ce9d9 100644
--- a/subworkflows/local/starsolo.nf
+++ b/subworkflows/local/starsolo.nf
@@ -54,24 +54,16 @@ workflow STARSOLO {
     )
     ch_versions = ch_versions.mix(STAR_ALIGN.out.versions)
 
-    /*
-    * Perform h5ad conversion
-    */
-    MTX_TO_H5AD (
-        STAR_ALIGN.out.raw_counts
-            .map{ meta, files -> [meta + [input_type: 'raw'], files] }
-            .mix( STAR_ALIGN.out.filtered_counts.map{ meta, files -> [meta + [input_type: 'filtered'], files] } ),
-        [],
-        star_index.map{ meta, index -> index }
-    )
-    ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions.first())
+    // generate channel of star counts with correct metadata
+    ch_star_counts =
+        STAR_ALIGN.out.raw_counts.map{ meta, files -> [meta + [input_type: 'raw'], files] }
+        .mix( STAR_ALIGN.out.filtered_counts.map{ meta, files -> [meta + [input_type: 'filtered'], files] } )
 
 
     emit:
     ch_versions
     // get rid of meta for star index
     star_result = STAR_ALIGN.out.tab
-    star_counts = STAR_ALIGN.out.counts
-    star_h5ad   = MTX_TO_H5AD.out.h5ad
+    star_counts = ch_star_counts
     for_multiqc = STAR_ALIGN.out.log_final.map{ meta, it -> it }
 }
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index a55029ea..2f2301c1 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -171,7 +171,7 @@ workflow SCRNASEQ {
         )
         ch_versions = ch_versions.mix(STARSOLO.out.ch_versions)
         ch_multiqc_files = ch_multiqc_files.mix(STARSOLO.out.for_multiqc)
-        ch_mtx_matrices = STARSOLO.out.star_h5ad
+        ch_mtx_matrices = STARSOLO.out.star_counts
     }
 
     // Run cellranger pipeline

From d0d1f814b56971d1757e681a6a29f022a80080fd Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 4 Oct 2024 13:04:06 +0200
Subject: [PATCH 38/66] Add kallisto standard workflow to structure

---
 modules/local/templates/mtx_to_h5ad_alevin.py |   4 +-
 .../local/templates/mtx_to_h5ad_kallisto.py   | 103 ++++++++++++++++++
 subworkflows/local/kallisto_bustools.nf       |   8 +-
 workflows/scrnaseq.nf                         |   2 +-
 4 files changed, 109 insertions(+), 8 deletions(-)
 create mode 100755 modules/local/templates/mtx_to_h5ad_kallisto.py

diff --git a/modules/local/templates/mtx_to_h5ad_alevin.py b/modules/local/templates/mtx_to_h5ad_alevin.py
index fba5a34f..50107e0c 100755
--- a/modules/local/templates/mtx_to_h5ad_alevin.py
+++ b/modules/local/templates/mtx_to_h5ad_alevin.py
@@ -17,8 +17,8 @@ def _mtx_to_adata(
 ):
 
     adata = sc.read_mtx(f"{input}/quants_mat.mtx")
-    adata.obs_names = pd.read_csv(f"{input}/quants_mat_rows.txt", header=None, sep="\t")[0].values
-    adata.var_names = pd.read_csv(f"{input}/quants_mat_cols.txt", header=None, sep="\t")[0].values
+    adata.obs_names = pd.read_csv(f"{input}/quants_mat_rows.txt", header=None, sep="\\t")[0].values
+    adata.var_names = pd.read_csv(f"{input}/quants_mat_cols.txt", header=None, sep="\\t")[0].values
     adata.obs["sample"] = sample
 
     return adata
diff --git a/modules/local/templates/mtx_to_h5ad_kallisto.py b/modules/local/templates/mtx_to_h5ad_kallisto.py
new file mode 100755
index 00000000..cf27f13c
--- /dev/null
+++ b/modules/local/templates/mtx_to_h5ad_kallisto.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+
+# Set numba chache dir to current working directory (which is a writable mount also in containers)
+import os
+
+os.environ["NUMBA_CACHE_DIR"] = "."
+
+import scanpy as sc
+import pandas as pd
+import argparse
+from anndata import AnnData
+import platform
+import glob
+
+def _mtx_to_adata(
+    input: str,
+    sample: str,
+    t2g: str
+):
+
+    adata = sc.read_mtx(glob.glob(f"{input}/*.mtx")[0])
+    adata.obs_names = pd.read_csv(glob.glob(f"{input}/*.barcodes.txt")[0], header=None, sep="\\t")[0].values
+    adata.var_names = pd.read_csv(glob.glob(f"{input}/*.genes.txt")[0], header=None, sep="\\t")[0].values
+    adata.obs["sample"] = sample
+
+    txp2gene = pd.read_table(glob.glob(f"{t2g}")[0], header=None, names=["gene_id", "gene_symbol"], usecols=[1, 2])
+    txp2gene = txp2gene.drop_duplicates(subset="gene_id").set_index("gene_id")
+    adata.var = pd.merge(adata.var, txp2gene, left_index=True, right_index=True)
+
+    return adata
+
+
+def format_yaml_like(data: dict, indent: int = 0) -> str:
+    """Formats a dictionary to a YAML-like string.
+    Args:
+        data (dict): The dictionary to format.
+        indent (int): The current indentation level.
+    Returns:
+        str: A string formatted as YAML.
+    """
+    yaml_str = ""
+    for key, value in data.items():
+        spaces = "  " * indent
+        if isinstance(value, dict):
+            yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
+        else:
+            yaml_str += f"{spaces}{key}: {value}\\n"
+    return yaml_str
+
+def dump_versions():
+    versions = {
+        "${task.process}": {
+            "python": platform.python_version(),
+            "scanpy": sc.__version__,
+            "pandas": pd.__version__
+        }
+    }
+
+    with open("versions.yml", "w") as f:
+        f.write(format_yaml_like(versions))
+
+def input_to_adata(
+    input_data: str,
+    output: str,
+    sample: str,
+    t2g: str
+):
+    print(f"Reading in {input_data}")
+
+    # open main data
+    adata = _mtx_to_adata(input_data, sample, t2g)
+
+    # standard format
+    # index are gene IDs and symbols are a column
+    adata.var['gene_versions'] = adata.var.index
+    adata.var['gene_ids'] = adata.var['gene_versions'].str.split('.').str[0]
+    adata.var.index = adata.var["gene_ids"].values
+    adata.var = adata.var.drop("gene_ids", axis=1)
+    adata.var_names_make_unique()
+
+    # write results
+    adata.write_h5ad(f"{output}", compression="gzip")
+    print(f"Wrote h5ad file to {output}")
+
+    # dump versions
+    dump_versions()
+
+    return adata
+
+#
+# Run main script
+#
+
+# create the directory with the sample name
+os.makedirs("${meta.id}", exist_ok=True)
+
+# input_type comes from NF module
+adata = input_to_adata(
+    input_data="${inputs}",
+    output="${meta.id}/${meta.id}_${meta.input_type}_matrix.h5ad",
+    sample="${meta.id}",
+    t2g="${txp2gene}"
+)
diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf
index 3deee2c5..d6b171a6 100644
--- a/subworkflows/local/kallisto_bustools.nf
+++ b/subworkflows/local/kallisto_bustools.nf
@@ -55,20 +55,18 @@ workflow KALLISTO_BUSTOOLS {
     // get raw/filtered counts
     ch_raw_counts = KALLISTOBUSTOOLS_COUNT.out.count.map{ meta, kb_dir ->
         if (file("${kb_dir.toUriString()}/counts_unfiltered").exists()) {
-            [meta, file("${kb_dir.toUriString()}/counts_unfiltered")]
+            [meta + [input_type: 'raw'], file("${kb_dir.toUriString()}/counts_unfiltered")]
         }
     }
     ch_filtered_counts = KALLISTOBUSTOOLS_COUNT.out.count.map{ meta, kb_dir ->
         if (file("${kb_dir.toUriString()}/counts_filtered").exists()) {
-            [meta, file("${kb_dir.toUriString()}/counts_filtered")]
+            [meta + [input_type: 'filtered'], file("${kb_dir.toUriString()}/counts_filtered")]
         }
     }
 
     emit:
     ch_versions
-    counts          = KALLISTOBUSTOOLS_COUNT.out.count
-    raw_counts      = ch_raw_counts
-    filtered_counts = ch_filtered_counts
+    counts          = ch_raw_counts.mix (ch_filtered_counts)
     txp2gene        = txp2gene.collect()
 
 }
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 2f2301c1..3f3c1017 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -136,7 +136,7 @@ workflow SCRNASEQ {
             ch_fastq
         )
         ch_versions = ch_versions.mix(KALLISTO_BUSTOOLS.out.ch_versions)
-        ch_mtx_matrices = ch_mtx_matrices.mix(KALLISTO_BUSTOOLS.out.raw_counts, KALLISTO_BUSTOOLS.out.filtered_counts)
+        ch_mtx_matrices = KALLISTO_BUSTOOLS.out.counts
         ch_txp2gene = KALLISTO_BUSTOOLS.out.txp2gene
     }
 

From aa99cd17eac212e350d1d20fe251ccd187c9343c Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 4 Oct 2024 16:16:11 +0200
Subject: [PATCH 39/66] Account for non-standard kallisto workflows

---
 .../local/templates/mtx_to_h5ad_kallisto.py   | 58 ++++++++++++-------
 subworkflows/local/emptydrops_removal.nf      |  2 +-
 subworkflows/local/mtx_conversion.nf          | 20 ++++++-
 3 files changed, 56 insertions(+), 24 deletions(-)

diff --git a/modules/local/templates/mtx_to_h5ad_kallisto.py b/modules/local/templates/mtx_to_h5ad_kallisto.py
index cf27f13c..24efe7e0 100755
--- a/modules/local/templates/mtx_to_h5ad_kallisto.py
+++ b/modules/local/templates/mtx_to_h5ad_kallisto.py
@@ -13,14 +13,16 @@
 import glob
 
 def _mtx_to_adata(
-    input: str,
-    sample: str,
-    t2g: str
+    matrix: str,
+    barcodes: str,
+    features: str,
+    t2g: str,
+    sample: str
 ):
 
-    adata = sc.read_mtx(glob.glob(f"{input}/*.mtx")[0])
-    adata.obs_names = pd.read_csv(glob.glob(f"{input}/*.barcodes.txt")[0], header=None, sep="\\t")[0].values
-    adata.var_names = pd.read_csv(glob.glob(f"{input}/*.genes.txt")[0], header=None, sep="\\t")[0].values
+    adata = sc.read_mtx(matrix)
+    adata.obs_names = pd.read_csv(barcodes, header=None, sep="\\t")[0].values
+    adata.var_names = pd.read_csv(features, header=None, sep="\\t")[0].values
     adata.obs["sample"] = sample
 
     txp2gene = pd.read_table(glob.glob(f"{t2g}")[0], header=None, names=["gene_id", "gene_symbol"], usecols=[1, 2])
@@ -60,15 +62,17 @@ def dump_versions():
         f.write(format_yaml_like(versions))
 
 def input_to_adata(
-    input_data: str,
+    matrix: str,
+    barcodes: str,
+    features: str,
+    t2g: str,
     output: str,
     sample: str,
-    t2g: str
 ):
-    print(f"Reading in {input_data}")
+    print(f"Reading in {matrix}")
 
     # open main data
-    adata = _mtx_to_adata(input_data, sample, t2g)
+    adata = _mtx_to_adata(matrix=matrix, barcodes=barcodes, features=features, sample=sample, t2g=t2g)
 
     # standard format
     # index are gene IDs and symbols are a column
@@ -82,11 +86,6 @@ def input_to_adata(
     adata.write_h5ad(f"{output}", compression="gzip")
     print(f"Wrote h5ad file to {output}")
 
-    # dump versions
-    dump_versions()
-
-    return adata
-
 #
 # Run main script
 #
@@ -95,9 +94,26 @@ def input_to_adata(
 os.makedirs("${meta.id}", exist_ok=True)
 
 # input_type comes from NF module
-adata = input_to_adata(
-    input_data="${inputs}",
-    output="${meta.id}/${meta.id}_${meta.input_type}_matrix.h5ad",
-    sample="${meta.id}",
-    t2g="${txp2gene}"
-)
+if "${params.kb_workflow}" == "standard":
+    input_to_adata(
+        matrix=glob.glob("${inputs}/*.mtx")[0],
+        barcodes=glob.glob("${inputs}/*.barcodes.txt")[0],
+        features=glob.glob("${inputs}/*.genes.txt")[0],
+        output="${meta.id}/${meta.id}_${meta.input_type}_matrix.h5ad",
+        sample="${meta.id}",
+        t2g="${txp2gene}"
+    )
+
+else:
+    for type in ['spliced', 'unspliced']:
+        input_to_adata(
+            matrix=glob.glob("${inputs}/" + f"{type}*.mtx")[0],
+            barcodes=glob.glob("${inputs}/" + f"{type}*.barcodes.txt")[0],
+            features=glob.glob("${inputs}/" + f"{type}*.genes.txt")[0],
+            output="${meta.id}/${meta.id}_${meta.input_type}" + f"_{type}_matrix.h5ad",
+            sample="${meta.id}",
+            t2g="${txp2gene}"
+        )
+
+# dump versions
+dump_versions()
diff --git a/subworkflows/local/emptydrops_removal.nf b/subworkflows/local/emptydrops_removal.nf
index 7171c3f3..2ccacc26 100644
--- a/subworkflows/local/emptydrops_removal.nf
+++ b/subworkflows/local/emptydrops_removal.nf
@@ -16,7 +16,7 @@ workflow EMPTY_DROPLET_REMOVAL {
         .join(CELLBENDER_REMOVEBACKGROUND.out.barcodes)
         .map { meta, h5ad, csv ->
             def meta_clone = meta.clone()
-            meta_clone.input_type = 'emptydrops_filter'
+            meta_clone.input_type = meta['input_type'].toString().replaceAll('raw', 'emptydrops_filter')
 
             [ meta_clone, h5ad, csv ]
         }
diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf
index 3ecd1b36..55bd68dc 100644
--- a/subworkflows/local/mtx_conversion.nf
+++ b/subworkflows/local/mtx_conversion.nf
@@ -25,7 +25,23 @@ workflow MTX_CONVERSION {
             star_index
         )
         ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions.first())
-        ch_h5ads    = MTX_TO_H5AD.out.h5ad
+
+        // fix channel size when kallisto non-standard workflow
+        if (params.aligner == 'kallisto' && !(params.kb_workflow == 'standard')) {
+            ch_h5ads =
+            MTX_TO_H5AD.out.h5ad
+                .transpose()
+                .map { meta, h5ad ->
+                    def meta_clone = meta.clone()
+                    def spc_prefix = h5ad.toString().contains('unspliced') ? 'un' : ''
+
+                    meta_clone["input_type"] = "${meta.input_type}_${spc_prefix}spliced"
+
+                    [ meta_clone, h5ad ]
+                }
+        } else {
+            ch_h5ads = MTX_TO_H5AD.out.h5ad
+        }
 
         //
         // SUBWORKFLOW: Run cellbender emptydrops filter
@@ -34,7 +50,7 @@ workflow MTX_CONVERSION {
 
             // emptydrops should only run on the raw matrices thus, filter-out the filtered result of the aligners that can produce it
             EMPTY_DROPLET_REMOVAL (
-                MTX_TO_H5AD.out.h5ad.filter { meta, mtx_files -> meta.input_type == 'raw' }
+                ch_h5ads.filter { meta, mtx_files -> meta.input_type.contains('raw') }
             )
             ch_h5ads = ch_h5ads.mix( EMPTY_DROPLET_REMOVAL.out.h5ad )
 

From 0ca1161dbac0722cd0e46afe270cb9cc40260d90 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Fri, 4 Oct 2024 16:19:31 +0200
Subject: [PATCH 40/66] Simplify alevin template

---
 modules/local/templates/mtx_to_h5ad_alevin.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/modules/local/templates/mtx_to_h5ad_alevin.py b/modules/local/templates/mtx_to_h5ad_alevin.py
index 50107e0c..db295f1b 100755
--- a/modules/local/templates/mtx_to_h5ad_alevin.py
+++ b/modules/local/templates/mtx_to_h5ad_alevin.py
@@ -76,11 +76,6 @@ def input_to_adata(
     adata.write_h5ad(f"{output}", compression="gzip")
     print(f"Wrote h5ad file to {output}")
 
-    # dump versions
-    dump_versions()
-
-    return adata
-
 #
 # Run main script
 #
@@ -89,8 +84,11 @@ def input_to_adata(
 os.makedirs("${meta.id}", exist_ok=True)
 
 # input_type comes from NF module
-adata = input_to_adata(
+input_to_adata(
     input_data="${meta.id}_alevin_results/af_quant/alevin/",
     output="${meta.id}/${meta.id}_${meta.input_type}_matrix.h5ad",
     sample="${meta.id}"
 )
+
+# dump versions
+dump_versions()

From 28251b068a5df1f12661195fbb0b60eb29656337 Mon Sep 17 00:00:00 2001
From: "zxBIB Almeida,Felipe (GCBDS) EXTERNAL"
 <felipe.almeida.ext@boehringer-ingelheim.com>
Date: Mon, 7 Oct 2024 09:21:35 +0200
Subject: [PATCH 41/66] reorganise star template

---
 modules/local/templates/mtx_to_h5ad_star.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/modules/local/templates/mtx_to_h5ad_star.py b/modules/local/templates/mtx_to_h5ad_star.py
index 49a1837e..0cc9a969 100755
--- a/modules/local/templates/mtx_to_h5ad_star.py
+++ b/modules/local/templates/mtx_to_h5ad_star.py
@@ -72,11 +72,6 @@ def input_to_adata(
     adata.write_h5ad(f"{output}", compression="gzip")
     print(f"Wrote h5ad file to {output}")
 
-    # dump versions
-    dump_versions()
-
-    return adata
-
 #
 # Run main script
 #
@@ -85,8 +80,11 @@ def input_to_adata(
 os.makedirs("${meta.id}", exist_ok=True)
 
 # input_type comes from NF module
-adata = input_to_adata(
+input_to_adata(
     input_data="${meta.input_type}",
     output="${meta.id}/${meta.id}_${meta.input_type}_matrix.h5ad",
     sample="${meta.id}"
 )
+
+# dump versions
+dump_versions()

From c1e8357112b8e389bab9e5a6503b052b6147594c Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 09:02:53 +0100
Subject: [PATCH 42/66] write uncompressed

Co-authored-by: Gregor Sturm <mail@gregor-sturm.de>
---
 modules/local/templates/concat_h5ad.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/templates/concat_h5ad.py b/modules/local/templates/concat_h5ad.py
index 033bc89a..2a483e7b 100755
--- a/modules/local/templates/concat_h5ad.py
+++ b/modules/local/templates/concat_h5ad.py
@@ -34,6 +34,6 @@ def read_samplesheet(samplesheet):
 
     # merge with data.frame, on sample information
     adata.obs = adata.obs.join(df_samplesheet, on="sample").astype(str)
-    adata.write_h5ad("combined_${meta.input_type}_matrix.h5ad", compression="gzip")
+    adata.write_h5ad("combined_${meta.input_type}_matrix.h5ad")
 
     print("Wrote h5ad file to {}".format("combined_${meta.input_type}_matrix.h5ad"))

From ae857102b56cc8f68def3bb45b1f167edb672c07 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 09:04:52 +0100
Subject: [PATCH 43/66] use .astype(str)

Co-authored-by: Gregor Sturm <mail@gregor-sturm.de>
---
 modules/local/templates/concat_h5ad.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/templates/concat_h5ad.py b/modules/local/templates/concat_h5ad.py
index 2a483e7b..e2cce156 100755
--- a/modules/local/templates/concat_h5ad.py
+++ b/modules/local/templates/concat_h5ad.py
@@ -16,7 +16,7 @@ def read_samplesheet(samplesheet):
     # samplesheet may contain replicates, when it has,
     # group information from replicates and collapse with commas
     # only keep unique values using set()
-    df = df.groupby(["sample"]).agg(lambda column: ",".join(set(str(column))))
+    df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column.astype(str)))
 
     return df
 

From ae8809a828b5543dc1c5ed968fa057e962fb13bc Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 09:05:14 +0100
Subject: [PATCH 44/66] simplify iteration

Co-authored-by: Gregor Sturm <mail@gregor-sturm.de>
---
 modules/local/templates/mtx_to_h5ad_cellranger.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/modules/local/templates/mtx_to_h5ad_cellranger.py b/modules/local/templates/mtx_to_h5ad_cellranger.py
index 294a15e7..be870829 100755
--- a/modules/local/templates/mtx_to_h5ad_cellranger.py
+++ b/modules/local/templates/mtx_to_h5ad_cellranger.py
@@ -69,9 +69,7 @@ def input_to_adata(
     # standard format
     # index are gene IDs and symbols are a column
     adata.var['gene_versions'] = adata.var.index
-    adata.var['gene_ids'] = adata.var['gene_versions'].str.split('.').str[0]
-    adata.var.index = adata.var["gene_ids"].values
-    adata.var = adata.var.drop("gene_ids", axis=1)
+    adata.var.index = adata.var['gene_versions'].str.split('.').str[0]
     adata.var_names_make_unique()
 
     # write results

From 68464de7d8d0a71a827379f2d15d1f8f4b310cfc Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 09:05:35 +0100
Subject: [PATCH 45/66] perform join left operation

Co-authored-by: Gregor Sturm <mail@gregor-sturm.de>
---
 modules/local/templates/mtx_to_h5ad_kallisto.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/templates/mtx_to_h5ad_kallisto.py b/modules/local/templates/mtx_to_h5ad_kallisto.py
index 24efe7e0..87511823 100755
--- a/modules/local/templates/mtx_to_h5ad_kallisto.py
+++ b/modules/local/templates/mtx_to_h5ad_kallisto.py
@@ -27,7 +27,7 @@ def _mtx_to_adata(
 
     txp2gene = pd.read_table(glob.glob(f"{t2g}")[0], header=None, names=["gene_id", "gene_symbol"], usecols=[1, 2])
     txp2gene = txp2gene.drop_duplicates(subset="gene_id").set_index("gene_id")
-    adata.var = pd.merge(adata.var, txp2gene, left_index=True, right_index=True)
+    adata.var = adata.var.join(txp2gene, how="left")
 
     return adata
 

From fdedc4d3385e59b2f2670a7d16525928f0dc384f Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 09:05:51 +0100
Subject: [PATCH 46/66] do not compress output h5ad

Co-authored-by: Gregor Sturm <mail@gregor-sturm.de>
---
 modules/local/templates/mtx_to_h5ad_kallisto.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/templates/mtx_to_h5ad_kallisto.py b/modules/local/templates/mtx_to_h5ad_kallisto.py
index 87511823..fab07f59 100755
--- a/modules/local/templates/mtx_to_h5ad_kallisto.py
+++ b/modules/local/templates/mtx_to_h5ad_kallisto.py
@@ -83,7 +83,7 @@ def input_to_adata(
     adata.var_names_make_unique()
 
     # write results
-    adata.write_h5ad(f"{output}", compression="gzip")
+    adata.write_h5ad(f"{output}")
     print(f"Wrote h5ad file to {output}")
 
 #

From 187dbf6d04d8fe73a4b4cabbca2b957656035ed6 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 09:06:26 +0100
Subject: [PATCH 47/66] perform join left operation

Co-authored-by: Gregor Sturm <mail@gregor-sturm.de>
---
 modules/local/templates/concat_h5ad.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/templates/concat_h5ad.py b/modules/local/templates/concat_h5ad.py
index e2cce156..ac5da7f6 100755
--- a/modules/local/templates/concat_h5ad.py
+++ b/modules/local/templates/concat_h5ad.py
@@ -33,7 +33,7 @@ def read_samplesheet(samplesheet):
     adata = ad.concat(dict_of_h5ad, label="sample", merge="unique", index_unique="_")
 
     # merge with data.frame, on sample information
-    adata.obs = adata.obs.join(df_samplesheet, on="sample").astype(str)
+    adata.obs = adata.obs.join(df_samplesheet, on="sample", how="left").astype(str)
     adata.write_h5ad("combined_${meta.input_type}_matrix.h5ad")
 
     print("Wrote h5ad file to {}".format("combined_${meta.input_type}_matrix.h5ad"))

From 98af608abbc3aaa562b052ccecebb0030c9ef0b6 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 08:21:15 +0000
Subject: [PATCH 48/66] not compress h5ad output

---
 modules/local/templates/mtx_to_h5ad_alevin.py     | 2 +-
 modules/local/templates/mtx_to_h5ad_cellranger.py | 2 +-
 modules/local/templates/mtx_to_h5ad_star.py       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/local/templates/mtx_to_h5ad_alevin.py b/modules/local/templates/mtx_to_h5ad_alevin.py
index db295f1b..a4563d6b 100755
--- a/modules/local/templates/mtx_to_h5ad_alevin.py
+++ b/modules/local/templates/mtx_to_h5ad_alevin.py
@@ -73,7 +73,7 @@ def input_to_adata(
     adata.var_names_make_unique()
 
     # write results
-    adata.write_h5ad(f"{output}", compression="gzip")
+    adata.write_h5ad(f"{output}")
     print(f"Wrote h5ad file to {output}")
 
 #
diff --git a/modules/local/templates/mtx_to_h5ad_cellranger.py b/modules/local/templates/mtx_to_h5ad_cellranger.py
index be870829..4a40201d 100755
--- a/modules/local/templates/mtx_to_h5ad_cellranger.py
+++ b/modules/local/templates/mtx_to_h5ad_cellranger.py
@@ -73,7 +73,7 @@ def input_to_adata(
     adata.var_names_make_unique()
 
     # write results
-    adata.write_h5ad(f"{output}", compression="gzip")
+    adata.write_h5ad(f"{output}")
     print(f"Wrote h5ad file to {output}")
 
     # dump versions
diff --git a/modules/local/templates/mtx_to_h5ad_star.py b/modules/local/templates/mtx_to_h5ad_star.py
index 0cc9a969..5614e698 100755
--- a/modules/local/templates/mtx_to_h5ad_star.py
+++ b/modules/local/templates/mtx_to_h5ad_star.py
@@ -69,7 +69,7 @@ def input_to_adata(
     adata.var = adata.var.drop("gene_ids", axis=1)
 
     # write results
-    adata.write_h5ad(f"{output}", compression="gzip")
+    adata.write_h5ad(f"{output}")
     print(f"Wrote h5ad file to {output}")
 
 #

From efd6299bd54cee59d68568f6fab20f1d9339eb7b Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 08:22:03 +0000
Subject: [PATCH 49/66] simplify iteration

---
 modules/local/templates/mtx_to_h5ad_alevin.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/modules/local/templates/mtx_to_h5ad_alevin.py b/modules/local/templates/mtx_to_h5ad_alevin.py
index a4563d6b..d36d8797 100755
--- a/modules/local/templates/mtx_to_h5ad_alevin.py
+++ b/modules/local/templates/mtx_to_h5ad_alevin.py
@@ -67,9 +67,7 @@ def input_to_adata(
     # index are gene IDs and symbols are a column
     # TODO: how to get gene_symbols for alevin?
     adata.var['gene_versions'] = adata.var.index
-    adata.var['gene_ids'] = adata.var['gene_versions'].str.split('.').str[0]
-    adata.var.index = adata.var["gene_ids"].values
-    adata.var = adata.var.drop("gene_ids", axis=1)
+    adata.var.index = adata.var['gene_versions'].str.split('.').str[0]
     adata.var_names_make_unique()
 
     # write results

From f357fd7ccbed89b715d1f0e1fa7b7b9ca9efeda1 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 08:23:53 +0000
Subject: [PATCH 50/66] simplify index iteration

---
 modules/local/templates/mtx_to_h5ad_kallisto.py | 4 +---
 modules/local/templates/mtx_to_h5ad_star.py     | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/modules/local/templates/mtx_to_h5ad_kallisto.py b/modules/local/templates/mtx_to_h5ad_kallisto.py
index fab07f59..ac71fc42 100755
--- a/modules/local/templates/mtx_to_h5ad_kallisto.py
+++ b/modules/local/templates/mtx_to_h5ad_kallisto.py
@@ -77,9 +77,7 @@ def input_to_adata(
     # standard format
     # index are gene IDs and symbols are a column
     adata.var['gene_versions'] = adata.var.index
-    adata.var['gene_ids'] = adata.var['gene_versions'].str.split('.').str[0]
-    adata.var.index = adata.var["gene_ids"].values
-    adata.var = adata.var.drop("gene_ids", axis=1)
+    adata.var.index = adata.var['gene_versions'].str.split('.').str[0]
     adata.var_names_make_unique()
 
     # write results
diff --git a/modules/local/templates/mtx_to_h5ad_star.py b/modules/local/templates/mtx_to_h5ad_star.py
index 5614e698..aa0cd02f 100755
--- a/modules/local/templates/mtx_to_h5ad_star.py
+++ b/modules/local/templates/mtx_to_h5ad_star.py
@@ -64,9 +64,7 @@ def input_to_adata(
     # index are gene IDs and symbols are a column
     adata.var["gene_symbol"] = adata.var.index
     adata.var['gene_versions'] = adata.var["gene_ids"]
-    adata.var['gene_ids'] = adata.var['gene_versions'].str.split('.').str[0]
-    adata.var.index = adata.var["gene_ids"].values
-    adata.var = adata.var.drop("gene_ids", axis=1)
+    adata.var.index = adata.var['gene_versions'].str.split('.').str[0]
 
     # write results
     adata.write_h5ad(f"{output}")

From bd1a74cebe0fd8bcf725f2f8be2a461cedb598ea Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 08:24:22 +0000
Subject: [PATCH 51/66] fix unmatched parenthesis

---
 modules/local/templates/concat_h5ad.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/templates/concat_h5ad.py b/modules/local/templates/concat_h5ad.py
index ac5da7f6..087f7fde 100755
--- a/modules/local/templates/concat_h5ad.py
+++ b/modules/local/templates/concat_h5ad.py
@@ -16,7 +16,7 @@ def read_samplesheet(samplesheet):
     # samplesheet may contain replicates, when it has,
     # group information from replicates and collapse with commas
     # only keep unique values using set()
-    df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column.astype(str)))
+    df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column.astype(str))))
 
     return df
 

From 4731e009f2264b64356f490f58aa1479ac3dbc31 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 08:50:33 +0000
Subject: [PATCH 52/66] fix use of igenomes ... pipeline was not properly
 selecting igenomes options

---
 .../utils_nfcore_scrnaseq_pipeline/main.nf    |  5 +++-
 workflows/scrnaseq.nf                         | 29 +++++++++----------
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/subworkflows/local/utils_nfcore_scrnaseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_scrnaseq_pipeline/main.nf
index dd54d352..16569d5a 100644
--- a/subworkflows/local/utils_nfcore_scrnaseq_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_scrnaseq_pipeline/main.nf
@@ -202,9 +202,12 @@ def getGenomeAttribute(attribute) {
     if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
         if (params.genomes[ params.genome ].containsKey(attribute)) {
             return params.genomes[ params.genome ][ attribute ]
+        } else {
+            return null
         }
+    } else {
+        return null
     }
-    return null
 }
 
 //
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 3f3c1017..61401185 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -31,12 +31,11 @@ workflow SCRNASEQ {
         error "Only cellranger supports `protocol = 'auto'`. Please specify the protocol manually!"
     }
 
-    params.fasta = getGenomeAttribute('fasta')
-    params.gtf = getGenomeAttribute('gtf')
-    params.star_index = getGenomeAttribute('star')
-
-    ch_genome_fasta = params.fasta ? file(params.fasta, checkIfExists: true) : []
-    ch_gtf = params.gtf ? file(params.gtf, checkIfExists: true) : []
+    // search igenomes, but overwrite with user paths
+    // cannot use 'params.<something> = ' in workflow, it does not overwrite parameter
+    def fasta_file = params.fasta      ? params.fasta      : getGenomeAttribute('fasta')
+    def gtf_file   = params.gtf        ? params.gtf        : getGenomeAttribute('gtf')
+    def star_index = params.star_index ? params.star_index : getGenomeAttribute('star')
 
     // general input and params
     ch_transcript_fasta = params.transcript_fasta ? file(params.transcript_fasta): []
@@ -70,7 +69,7 @@ workflow SCRNASEQ {
     ch_salmon_index = params.salmon_index ? file(params.salmon_index) : []
 
     //star params
-    star_index = params.star_index ? file(params.star_index, checkIfExists: true) : null
+    star_index = star_index ? file(star_index, checkIfExists: true) : null
     ch_star_index = star_index ? [[id: star_index.baseName], star_index] : []
     star_feature = params.star_feature
 
@@ -98,24 +97,24 @@ workflow SCRNASEQ {
     //
     // Uncompress genome fasta file if required
     //
-    if (params.fasta) {
-        if (params.fasta.endsWith('.gz')) {
-            ch_genome_fasta    = GUNZIP_FASTA ( [ [:], file(params.fasta) ] ).gunzip.map { it[1] }
+    if (fasta_file) {
+        if (fasta_file.endsWith('.gz')) {
+            ch_genome_fasta    = GUNZIP_FASTA ( [ [:], file(fasta_file) ] ).gunzip.map { it[1] }
             ch_versions        = ch_versions.mix(GUNZIP_FASTA.out.versions)
         } else {
-            ch_genome_fasta = Channel.value( file(params.fasta) )
+            ch_genome_fasta = Channel.value( file(fasta_file) )
         }
     }
 
     //
     // Uncompress GTF annotation file or create from GFF3 if required
     //
-    if (params.gtf) {
-        if (params.gtf.endsWith('.gz')) {
-            ch_gtf      = GUNZIP_GTF ( [ [:], file(params.gtf) ] ).gunzip.map { it[1] }
+    if (gtf_file) {
+        if (gtf_file.endsWith('.gz')) {
+            ch_gtf      = GUNZIP_GTF ( [ [:], file(gtf_file) ] ).gunzip.map { it[1] }
             ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions)
         } else {
-            ch_gtf = Channel.value( file(params.gtf) )
+            ch_gtf = Channel.value( file(gtf_file) )
         }
     }
 

From a71348fe887a3798d07a391a7c42f53973e308d8 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 09:44:43 +0000
Subject: [PATCH 53/66] correct values parsing

---
 modules/local/templates/mtx_to_h5ad_alevin.py     | 2 +-
 modules/local/templates/mtx_to_h5ad_cellranger.py | 2 +-
 modules/local/templates/mtx_to_h5ad_kallisto.py   | 2 +-
 modules/local/templates/mtx_to_h5ad_star.py       | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/local/templates/mtx_to_h5ad_alevin.py b/modules/local/templates/mtx_to_h5ad_alevin.py
index d36d8797..83cbd946 100755
--- a/modules/local/templates/mtx_to_h5ad_alevin.py
+++ b/modules/local/templates/mtx_to_h5ad_alevin.py
@@ -67,7 +67,7 @@ def input_to_adata(
     # index are gene IDs and symbols are a column
     # TODO: how to get gene_symbols for alevin?
     adata.var['gene_versions'] = adata.var.index
-    adata.var.index = adata.var['gene_versions'].str.split('.').str[0]
+    adata.var.index = adata.var['gene_versions'].str.split('.').str[0].values
     adata.var_names_make_unique()
 
     # write results
diff --git a/modules/local/templates/mtx_to_h5ad_cellranger.py b/modules/local/templates/mtx_to_h5ad_cellranger.py
index 4a40201d..76ec4998 100755
--- a/modules/local/templates/mtx_to_h5ad_cellranger.py
+++ b/modules/local/templates/mtx_to_h5ad_cellranger.py
@@ -69,7 +69,7 @@ def input_to_adata(
     # standard format
     # index are gene IDs and symbols are a column
     adata.var['gene_versions'] = adata.var.index
-    adata.var.index = adata.var['gene_versions'].str.split('.').str[0]
+    adata.var.index = adata.var['gene_versions'].str.split('.').str[0].values
     adata.var_names_make_unique()
 
     # write results
diff --git a/modules/local/templates/mtx_to_h5ad_kallisto.py b/modules/local/templates/mtx_to_h5ad_kallisto.py
index ac71fc42..0fc606ba 100755
--- a/modules/local/templates/mtx_to_h5ad_kallisto.py
+++ b/modules/local/templates/mtx_to_h5ad_kallisto.py
@@ -77,7 +77,7 @@ def input_to_adata(
     # standard format
     # index are gene IDs and symbols are a column
     adata.var['gene_versions'] = adata.var.index
-    adata.var.index = adata.var['gene_versions'].str.split('.').str[0]
+    adata.var.index = adata.var['gene_versions'].str.split('.').str[0].values
     adata.var_names_make_unique()
 
     # write results
diff --git a/modules/local/templates/mtx_to_h5ad_star.py b/modules/local/templates/mtx_to_h5ad_star.py
index aa0cd02f..b4c71e9e 100755
--- a/modules/local/templates/mtx_to_h5ad_star.py
+++ b/modules/local/templates/mtx_to_h5ad_star.py
@@ -64,7 +64,7 @@ def input_to_adata(
     # index are gene IDs and symbols are a column
     adata.var["gene_symbol"] = adata.var.index
     adata.var['gene_versions'] = adata.var["gene_ids"]
-    adata.var.index = adata.var['gene_versions'].str.split('.').str[0]
+    adata.var.index = adata.var['gene_versions'].str.split('.').str[0].values
 
     # write results
     adata.write_h5ad(f"{output}")

From d2f9cfdfa0c1af8e0c14f1b99dd7a9e071ff7452 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 09:50:20 +0000
Subject: [PATCH 54/66] fix container registry

---
 modules/local/anndatar_convert.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/anndatar_convert.nf b/modules/local/anndatar_convert.nf
index d6a8271d..8fb6dac3 100644
--- a/modules/local/anndatar_convert.nf
+++ b/modules/local/anndatar_convert.nf
@@ -3,7 +3,7 @@ process ANNDATAR_CONVERT {
 
     label 'process_medium'
 
-    container "docker://fmalmeida/anndatar:dev" // TODO: Fix
+    container "docker.io/fmalmeida/anndatar:dev" // TODO: Fix
 
     input:
     tuple val(meta), path(h5ad)

From 5c31226a18f9d57d9f6d5971b480095f81d66a4c Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 10:13:49 +0000
Subject: [PATCH 55/66] do not save versions files

---
 conf/modules.config | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index a6fc3411..4a230680 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -80,13 +80,15 @@ if(params.aligner == "cellranger") {
         withName: CELLRANGER_MKREF {
             publishDir = [
                 path: "${params.outdir}/${params.aligner}/mkref",
-                mode: params.publish_dir_mode
+                mode: params.publish_dir_mode,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]
         }
         withName: CELLRANGER_COUNT {
             publishDir = [
                 path: "${params.outdir}/${params.aligner}/count",
-                mode: params.publish_dir_mode
+                mode: params.publish_dir_mode,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]
             ext.args = {"--chemistry ${meta.chemistry} --create-bam true " + (meta.expected_cells ? "--expect-cells ${meta.expected_cells}" : '')}
             time   = { check_max( 240.h  * task.attempt, 'time'    ) }
@@ -183,20 +185,20 @@ if (params.aligner == "alevin") {
 
 if (params.aligner == "star") {
     process {
-        withName: STAR_ALIGN {
-            ext.args = "--readFilesCommand zcat --runDirPerm All_RWX --outWigType bedGraph --twopassMode Basic --outSAMtype BAM SortedByCoordinate"
-        }
         withName: STAR_GENOMEGENERATE {
             publishDir = [
                 path: { "${params.outdir}/${params.aligner}/genome_generate" },
                 mode: params.publish_dir_mode,
-                enabled: params.save_reference
+                enabled: params.save_reference,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]
         }
         withName: STAR_ALIGN {
+            ext.args = "--readFilesCommand zcat --runDirPerm All_RWX --outWigType bedGraph --twopassMode Basic --outSAMtype BAM SortedByCoordinate"
             publishDir = [
                 path: { "${params.outdir}/${params.aligner}/${meta.id}" },
-                mode: params.publish_dir_mode
+                mode: params.publish_dir_mode,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]
         }
     }
@@ -208,14 +210,16 @@ if (params.aligner == 'kallisto') {
             publishDir = [
                 path: { "${params.outdir}/${params.aligner}" },
                 mode: params.publish_dir_mode,
-                enabled: params.save_reference
+                enabled: params.save_reference,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]
         }
         withName: KALLISTOBUSTOOLS_COUNT {
             def kb_filter = (params.kb_filter) ? '--filter' : ''
             publishDir = [
                 path: { "${params.outdir}/${params.aligner}" },
-                mode: params.publish_dir_mode
+                mode: params.publish_dir_mode,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]
             ext.args = "--workflow ${params.kb_workflow} ${kb_filter}"
         }
@@ -254,7 +258,8 @@ if (params.aligner == 'cellrangermulti') {
         withName: CELLRANGER_MKVDJREF {
             publishDir = [
                 path: "${params.outdir}/${params.aligner}/mkvdjref",
-                mode: params.publish_dir_mode
+                mode: params.publish_dir_mode,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]
         }
     }

From 82784295364ffc436afd2cb7ac35258f59763efa Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 10:13:59 +0000
Subject: [PATCH 56/66] also convert concat h5ads

---
 modules/local/concat_h5ad.nf         |  2 +-
 subworkflows/local/mtx_conversion.nf | 19 ++++++++++++-------
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf
index 41310553..393a7353 100644
--- a/modules/local/concat_h5ad.nf
+++ b/modules/local/concat_h5ad.nf
@@ -11,7 +11,7 @@ process CONCAT_H5AD {
     path samplesheet
 
     output:
-    path "*.h5ad", emit: h5ad
+    tuple val(meta), path("*.h5ad"), emit: h5ad
 
     when:
     task.ext.when == null || task.ext.when
diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf
index 55bd68dc..9891536d 100644
--- a/subworkflows/local/mtx_conversion.nf
+++ b/subworkflows/local/mtx_conversion.nf
@@ -56,13 +56,6 @@ workflow MTX_CONVERSION {
 
         }
 
-        //
-        // MODULE: Convert to Rds with AnndataR package
-        //
-        ANNDATAR_CONVERT (
-            ch_h5ads
-        )
-
         //
         // Concat sample-specific h5ad in one
         //
@@ -78,6 +71,18 @@ workflow MTX_CONVERSION {
             ch_concat_h5ad_input,
             samplesheet
         )
+        ch_h5ad_concat = CONCAT_H5AD.out.h5ad.map{ meta, file ->
+            def meta_clone = meta.clone()
+            meta_clone.id = 'combined' // maintain output prefix
+            [ meta_clone, file ]
+        }
+
+        //
+        // MODULE: Convert to Rds with AnndataR package
+        //
+        ANNDATAR_CONVERT (
+            ch_h5ads.mix( ch_h5ad_concat )
+        )
 
         //TODO CONCAT h5ad and MTX to h5ad should also have versions.yaml output
         // ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions, MTX_TO_SEURAT.out.versions)

From 2b199d053d011c785b71d45cb1d04ba8eaf2f440 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 10:23:02 +0000
Subject: [PATCH 57/66] manage subdirectory in publishDir

---
 conf/modules.config                        | 6 +++++-
 modules/local/anndatar_convert.nf          | 2 +-
 modules/local/mtx_to_h5ad.nf               | 4 ++--
 modules/local/templates/anndatar_convert.R | 2 +-
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 4a230680..ed0c1b92 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -53,7 +53,11 @@ process {
         publishDir = [
             path: { "${params.outdir}/${params.aligner}/mtx_conversions" },
             mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            saveAs: { filename ->
+                if (!filename.contains('combined_')) { "${meta.id}/${filename}" }
+                else if (filename.equals('versions.yml')) { null }
+                else filename
+            }
         ]
     }
 
diff --git a/modules/local/anndatar_convert.nf b/modules/local/anndatar_convert.nf
index 8fb6dac3..12a2b1b4 100644
--- a/modules/local/anndatar_convert.nf
+++ b/modules/local/anndatar_convert.nf
@@ -9,7 +9,7 @@ process ANNDATAR_CONVERT {
     tuple val(meta), path(h5ad)
 
     output:
-    tuple val(meta), path("${meta.id}/${meta.id}_${meta.input_type}_matrix.Rds"), emit: rds
+    tuple val(meta), path("${meta.id}_${meta.input_type}_matrix.Rds"), emit: rds
 
     when:
     task.ext.when == null || task.ext.when
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 751cf9a5..ea56c7ae 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -13,8 +13,8 @@ process MTX_TO_H5AD {
     path star_index
 
     output:
-    tuple val(meta), path("${meta.id}/*h5ad"), emit: h5ad
-    path  "versions.yml"                     , emit: versions
+    tuple val(meta), path("${meta.id}_${meta.input_type}_matrix.h5ad"), emit: h5ad
+    path  "versions.yml"                                              , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
diff --git a/modules/local/templates/anndatar_convert.R b/modules/local/templates/anndatar_convert.R
index 06a723d7..5be46163 100755
--- a/modules/local/templates/anndatar_convert.R
+++ b/modules/local/templates/anndatar_convert.R
@@ -13,4 +13,4 @@ obj <- adata\$to_Seurat()
 
 # save files
 dir.create(file.path("$meta.id"), showWarnings = FALSE)
-saveRDS(obj, file = "${meta.id}/${meta.id}_${meta.input_type}_matrix.Rds")
+saveRDS(obj, file = "${meta.id}_${meta.input_type}_matrix.Rds")

From cb677974e480ef386349efc70f7f4ac79b7bb75b Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 10:24:35 +0000
Subject: [PATCH 58/66] match template scripts with new publishDir

---
 modules/local/templates/mtx_to_h5ad_alevin.py     | 2 +-
 modules/local/templates/mtx_to_h5ad_cellranger.py | 2 +-
 modules/local/templates/mtx_to_h5ad_kallisto.py   | 4 ++--
 modules/local/templates/mtx_to_h5ad_star.py       | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/modules/local/templates/mtx_to_h5ad_alevin.py b/modules/local/templates/mtx_to_h5ad_alevin.py
index 83cbd946..d54a1667 100755
--- a/modules/local/templates/mtx_to_h5ad_alevin.py
+++ b/modules/local/templates/mtx_to_h5ad_alevin.py
@@ -84,7 +84,7 @@ def input_to_adata(
 # input_type comes from NF module
 input_to_adata(
     input_data="${meta.id}_alevin_results/af_quant/alevin/",
-    output="${meta.id}/${meta.id}_${meta.input_type}_matrix.h5ad",
+    output="${meta.id}_${meta.input_type}_matrix.h5ad",
     sample="${meta.id}"
 )
 
diff --git a/modules/local/templates/mtx_to_h5ad_cellranger.py b/modules/local/templates/mtx_to_h5ad_cellranger.py
index 76ec4998..ecc5c077 100755
--- a/modules/local/templates/mtx_to_h5ad_cellranger.py
+++ b/modules/local/templates/mtx_to_h5ad_cellranger.py
@@ -91,6 +91,6 @@ def input_to_adata(
 # input_type comes from NF module
 adata = input_to_adata(
     input_data="${meta.input_type}_feature_bc_matrix.h5",
-    output="${meta.id}/${meta.id}_${meta.input_type}_matrix.h5ad",
+    output="${meta.id}_${meta.input_type}_matrix.h5ad",
     sample="${meta.id}"
 )
diff --git a/modules/local/templates/mtx_to_h5ad_kallisto.py b/modules/local/templates/mtx_to_h5ad_kallisto.py
index 0fc606ba..37f66c5f 100755
--- a/modules/local/templates/mtx_to_h5ad_kallisto.py
+++ b/modules/local/templates/mtx_to_h5ad_kallisto.py
@@ -97,7 +97,7 @@ def input_to_adata(
         matrix=glob.glob("${inputs}/*.mtx")[0],
         barcodes=glob.glob("${inputs}/*.barcodes.txt")[0],
         features=glob.glob("${inputs}/*.genes.txt")[0],
-        output="${meta.id}/${meta.id}_${meta.input_type}_matrix.h5ad",
+        output="${meta.id}_${meta.input_type}_matrix.h5ad",
         sample="${meta.id}",
         t2g="${txp2gene}"
     )
@@ -108,7 +108,7 @@ def input_to_adata(
             matrix=glob.glob("${inputs}/" + f"{type}*.mtx")[0],
             barcodes=glob.glob("${inputs}/" + f"{type}*.barcodes.txt")[0],
             features=glob.glob("${inputs}/" + f"{type}*.genes.txt")[0],
-            output="${meta.id}/${meta.id}_${meta.input_type}" + f"_{type}_matrix.h5ad",
+            output="${meta.id}_${meta.input_type}" + f"_{type}_matrix.h5ad",
             sample="${meta.id}",
             t2g="${txp2gene}"
         )
diff --git a/modules/local/templates/mtx_to_h5ad_star.py b/modules/local/templates/mtx_to_h5ad_star.py
index b4c71e9e..e44d2478 100755
--- a/modules/local/templates/mtx_to_h5ad_star.py
+++ b/modules/local/templates/mtx_to_h5ad_star.py
@@ -80,7 +80,7 @@ def input_to_adata(
 # input_type comes from NF module
 input_to_adata(
     input_data="${meta.input_type}",
-    output="${meta.id}/${meta.id}_${meta.input_type}_matrix.h5ad",
+    output="${meta.id}_${meta.input_type}_matrix.h5ad",
     sample="${meta.id}"
 )
 

From 7230095677ba87f5325ed7bd4d853b00ee4255fe Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 10:31:25 +0000
Subject: [PATCH 59/66] have outputs separated

---
 subworkflows/local/starsolo.nf | 14 +++++---------
 workflows/scrnaseq.nf          |  4 +++-
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf
index d69ce9d9..4cbf7aea 100644
--- a/subworkflows/local/starsolo.nf
+++ b/subworkflows/local/starsolo.nf
@@ -54,16 +54,12 @@ workflow STARSOLO {
     )
     ch_versions = ch_versions.mix(STAR_ALIGN.out.versions)
 
-    // generate channel of star counts with correct metadata
-    ch_star_counts =
-        STAR_ALIGN.out.raw_counts.map{ meta, files -> [meta + [input_type: 'raw'], files] }
-        .mix( STAR_ALIGN.out.filtered_counts.map{ meta, files -> [meta + [input_type: 'filtered'], files] } )
-
-
     emit:
     ch_versions
     // get rid of meta for star index
-    star_result = STAR_ALIGN.out.tab
-    star_counts = ch_star_counts
-    for_multiqc = STAR_ALIGN.out.log_final.map{ meta, it -> it }
+    star_result     = STAR_ALIGN.out.tab
+    star_counts     = STAR_ALIGN.out.counts
+    raw_counts      = STAR_ALIGN.out.raw_counts
+    filtered_counts = STAR_ALIGN.out.filtered_counts
+    for_multiqc     = STAR_ALIGN.out.log_final.map{ meta, it -> it }
 }
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 61401185..f7bbe692 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -170,7 +170,9 @@ workflow SCRNASEQ {
         )
         ch_versions = ch_versions.mix(STARSOLO.out.ch_versions)
         ch_multiqc_files = ch_multiqc_files.mix(STARSOLO.out.for_multiqc)
-        ch_mtx_matrices = STARSOLO.out.star_counts
+        ch_mtx_matrices =
+            STARSOLO.out.raw_counts.map{ meta, files -> [meta + [input_type: 'raw'], files] }
+            .mix( STARSOLO.out.filtered_counts.map{ meta, files -> [meta + [input_type: 'filtered'], files] } )
     }
 
     // Run cellranger pipeline

From 206a7c1d8f50914d70a873bf66dfd184219d17f5 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 10:36:48 +0000
Subject: [PATCH 60/66] make parsing inside sub-workflow

---
 subworkflows/local/starsolo.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf
index 4cbf7aea..aadda6b6 100644
--- a/subworkflows/local/starsolo.nf
+++ b/subworkflows/local/starsolo.nf
@@ -59,7 +59,7 @@ workflow STARSOLO {
     // get rid of meta for star index
     star_result     = STAR_ALIGN.out.tab
     star_counts     = STAR_ALIGN.out.counts
-    raw_counts      = STAR_ALIGN.out.raw_counts
-    filtered_counts = STAR_ALIGN.out.filtered_counts
+    raw_counts      = STAR_ALIGN.out.raw_counts.map{ meta, files -> [meta + [input_type: 'raw'], files] }
+    filtered_counts = STAR_ALIGN.out.filtered_counts.map{ meta, files -> [meta + [input_type: 'filtered'], files] }
     for_multiqc     = STAR_ALIGN.out.log_final.map{ meta, it -> it }
 }

From c4a09e07baba8b2a1176e6e8429a3f133128cd55 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 10:43:28 +0000
Subject: [PATCH 61/66] added kallisto to correct structure

---
 subworkflows/local/kallisto_bustools.nf | 4 +++-
 workflows/scrnaseq.nf                   | 6 ++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf
index d6b171a6..5f8d9bcc 100644
--- a/subworkflows/local/kallisto_bustools.nf
+++ b/subworkflows/local/kallisto_bustools.nf
@@ -66,7 +66,9 @@ workflow KALLISTO_BUSTOOLS {
 
     emit:
     ch_versions
-    counts          = ch_raw_counts.mix (ch_filtered_counts)
+    counts          = KALLISTOBUSTOOLS_COUNT.out.count
+    counts_raw      = ch_raw_counts
+    counts_filtered = ch_filtered_counts
     txp2gene        = txp2gene.collect()
 
 }
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index f7bbe692..070565f4 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -135,7 +135,7 @@ workflow SCRNASEQ {
             ch_fastq
         )
         ch_versions = ch_versions.mix(KALLISTO_BUSTOOLS.out.ch_versions)
-        ch_mtx_matrices = KALLISTO_BUSTOOLS.out.counts
+        ch_mtx_matrices = KALLISTO_BUSTOOLS.out.counts_raw.mix( KALLISTO_BUSTOOLS.out.counts_filtered )
         ch_txp2gene = KALLISTO_BUSTOOLS.out.txp2gene
     }
 
@@ -170,9 +170,7 @@ workflow SCRNASEQ {
         )
         ch_versions = ch_versions.mix(STARSOLO.out.ch_versions)
         ch_multiqc_files = ch_multiqc_files.mix(STARSOLO.out.for_multiqc)
-        ch_mtx_matrices =
-            STARSOLO.out.raw_counts.map{ meta, files -> [meta + [input_type: 'raw'], files] }
-            .mix( STARSOLO.out.filtered_counts.map{ meta, files -> [meta + [input_type: 'filtered'], files] } )
+        ch_mtx_matrices = STARSOLO.out.raw_counts.mix( STARSOLO.out.filtered_counts )
     }
 
     // Run cellranger pipeline

From 971667b454be7960ccbc57a9eb5bef7239fa6beb Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 10:47:33 +0000
Subject: [PATCH 62/66] correct mix of channels

---
 subworkflows/local/align_cellranger.nf | 7 ++++---
 workflows/scrnaseq.nf                  | 8 ++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index 86b69a38..d787e0f0 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -63,7 +63,8 @@ workflow CELLRANGER_ALIGN {
 
     emit:
         ch_versions
-        cellranger_out      = CELLRANGER_COUNT.out.outs
-        cellranger_matrices = ch_matrices_raw.mix( ch_matrices_filtered )
-        star_index          = cellranger_index
+        cellranger_out               = CELLRANGER_COUNT.out.outs
+        cellranger_matrices_raw      = ch_matrices_raw
+        cellranger_matrices_filtered = ch_matrices_filtered
+        star_index                   = cellranger_index
 }
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 070565f4..20776d6b 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -135,7 +135,7 @@ workflow SCRNASEQ {
             ch_fastq
         )
         ch_versions = ch_versions.mix(KALLISTO_BUSTOOLS.out.ch_versions)
-        ch_mtx_matrices = KALLISTO_BUSTOOLS.out.counts_raw.mix( KALLISTO_BUSTOOLS.out.counts_filtered )
+        ch_mtx_matrices = ch_mtx_matrices.mix( KALLISTO_BUSTOOLS.out.counts_raw, KALLISTO_BUSTOOLS.out.counts_filtered )
         ch_txp2gene = KALLISTO_BUSTOOLS.out.txp2gene
     }
 
@@ -153,7 +153,7 @@ workflow SCRNASEQ {
         )
         ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions)
         ch_multiqc_files = ch_multiqc_files.mix(SCRNASEQ_ALEVIN.out.alevin_results.map{ meta, it -> it })
-        ch_mtx_matrices = ch_mtx_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_results)
+        ch_mtx_matrices = ch_mtx_matrices.mix( SCRNASEQ_ALEVIN.out.alevin_results )
     }
 
     // Run STARSolo pipeline
@@ -170,7 +170,7 @@ workflow SCRNASEQ {
         )
         ch_versions = ch_versions.mix(STARSOLO.out.ch_versions)
         ch_multiqc_files = ch_multiqc_files.mix(STARSOLO.out.for_multiqc)
-        ch_mtx_matrices = STARSOLO.out.raw_counts.mix( STARSOLO.out.filtered_counts )
+        ch_mtx_matrices = ch_mtx_matrices.mix( STARSOLO.out.raw_counts, STARSOLO.out.filtered_counts )
     }
 
     // Run cellranger pipeline
@@ -183,7 +183,7 @@ workflow SCRNASEQ {
             protocol_config['protocol']
         )
         ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions)
-        ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_matrices)
+        ch_mtx_matrices = ch_mtx_matrices.mix( CELLRANGER_ALIGN.out.cellranger_matrices_raw, CELLRANGER_ALIGN.out.cellranger_matrices_filtered )
         ch_multiqc_files = ch_multiqc_files.mix(CELLRANGER_ALIGN.out.cellranger_out.map {
             meta, outs -> outs.findAll{ it -> it.name == "web_summary.html"}
         })

From 81221135336857206c7eaa24cd58dfc1c1b17810 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 10:49:17 +0000
Subject: [PATCH 63/66] correct for cellranger multi

---
 subworkflows/local/align_cellrangermulti.nf | 5 +++--
 workflows/scrnaseq.nf                       | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/subworkflows/local/align_cellrangermulti.nf b/subworkflows/local/align_cellrangermulti.nf
index 977bf478..f13c7bf1 100644
--- a/subworkflows/local/align_cellrangermulti.nf
+++ b/subworkflows/local/align_cellrangermulti.nf
@@ -204,8 +204,9 @@ workflow CELLRANGER_MULTI_ALIGN {
 
     emit:
         ch_versions
-        cellrangermulti_out = CELLRANGER_MULTI.out.outs
-        cellrangermulti_mtx = ch_matrices_raw.mix( ch_matrices_filtered )
+        cellrangermulti_out          = CELLRANGER_MULTI.out.outs
+        cellrangermulti_mtx_raw      = ch_matrices_raw
+        cellrangermulti_mtx_filtered = ch_matrices_filtered
 }
 
 def parse_demultiplexed_output_channels(in_ch, pattern) {
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 20776d6b..418f6711 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -281,7 +281,7 @@ workflow SCRNASEQ {
         ch_multiqc_files = ch_multiqc_files.mix( CELLRANGER_MULTI_ALIGN.out.cellrangermulti_out.map{
             meta, outs -> outs.findAll{ it -> it.name == "web_summary.html" }
         })
-        ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_MULTI_ALIGN.out.cellrangermulti_mtx)
+        ch_mtx_matrices = ch_mtx_matrices.mix( CELLRANGER_MULTI_ALIGN.out.cellrangermulti_mtx_raw, CELLRANGER_MULTI_ALIGN.out.cellrangermulti_mtx_filtered )
 
     }
 

From 44d1cb745c0d2907a97b992df0f217313ad8333b Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 11:04:24 +0000
Subject: [PATCH 64/66] correct stub

---
 modules/local/mtx_to_h5ad.nf | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index ea56c7ae..27e1070a 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -26,8 +26,7 @@ process MTX_TO_H5AD {
 
     stub:
     """
-    mkdir ${meta.id}
-    touch ${meta.id}/${meta.id}_matrix.h5ad
+    touch ${meta.id}_raw_matrix.h5ad
     touch versions.yml
     """
 }

From 18cfdd7571ddb80234ff7e6ed2c58f6360605292 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 11:04:53 +0000
Subject: [PATCH 65/66] remove glob in txp2gene

---
 modules/local/templates/mtx_to_h5ad_kallisto.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/templates/mtx_to_h5ad_kallisto.py b/modules/local/templates/mtx_to_h5ad_kallisto.py
index 37f66c5f..8d0f0909 100755
--- a/modules/local/templates/mtx_to_h5ad_kallisto.py
+++ b/modules/local/templates/mtx_to_h5ad_kallisto.py
@@ -25,7 +25,7 @@ def _mtx_to_adata(
     adata.var_names = pd.read_csv(features, header=None, sep="\\t")[0].values
     adata.obs["sample"] = sample
 
-    txp2gene = pd.read_table(glob.glob(f"{t2g}")[0], header=None, names=["gene_id", "gene_symbol"], usecols=[1, 2])
+    txp2gene = pd.read_table(f"{t2g}", header=None, names=["gene_id", "gene_symbol"], usecols=[1, 2])
     txp2gene = txp2gene.drop_duplicates(subset="gene_id").set_index("gene_id")
     adata.var = adata.var.join(txp2gene, how="left")
 

From 079bb7ec7a7dd5f4a2009463106e785fabbdd178 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 30 Oct 2024 11:15:13 +0000
Subject: [PATCH 66/66] added small comment to local modules

---
 modules/local/adata_barcodes.nf                    | 6 ++++++
 modules/local/alevinqc.nf                          | 5 +++++
 modules/local/anndatar_convert.nf                  | 5 +++++
 modules/local/concat_h5ad.nf                       | 6 ++++++
 modules/local/gffread_transcriptome.nf             | 5 +++++
 modules/local/gtf_gene_filter.nf                   | 5 +++++
 modules/local/mtx_to_h5ad.nf                       | 5 +++++
 modules/local/parse_cellrangermulti_samplesheet.nf | 5 +++++
 modules/local/simpleaf_index.nf                    | 5 +++++
 modules/local/simpleaf_quant.nf                    | 5 +++++
 modules/local/star_align.nf                        | 5 +++++
 11 files changed, 57 insertions(+)

diff --git a/modules/local/adata_barcodes.nf b/modules/local/adata_barcodes.nf
index 2aef8ec9..630d90ae 100644
--- a/modules/local/adata_barcodes.nf
+++ b/modules/local/adata_barcodes.nf
@@ -1,4 +1,10 @@
 process ADATA_BARCODES {
+
+    //
+    // Module from nf-core/scdownstream.
+    // This module performs the subset of the h5ad file to only contain barcodes that passed emptydrops filter with cellbender
+    //
+
     tag "$meta.id"
     label 'process_single'
 
diff --git a/modules/local/alevinqc.nf b/modules/local/alevinqc.nf
index 9000d79e..777a1371 100644
--- a/modules/local/alevinqc.nf
+++ b/modules/local/alevinqc.nf
@@ -1,4 +1,9 @@
 process ALEVINQC {
+
+    //
+    // This module executes alevinfry QC reporting tool on alevin results
+    //
+
     tag "$meta.id"
     label 'process_low'
 
diff --git a/modules/local/anndatar_convert.nf b/modules/local/anndatar_convert.nf
index 12a2b1b4..8e4c242e 100644
--- a/modules/local/anndatar_convert.nf
+++ b/modules/local/anndatar_convert.nf
@@ -1,4 +1,9 @@
 process ANNDATAR_CONVERT {
+
+    //
+    // This module uses the anndata R package to convert h5ad files in different formats
+    //
+
     tag "${meta.id}"
 
     label 'process_medium'
diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf
index 393a7353..c875ba3c 100644
--- a/modules/local/concat_h5ad.nf
+++ b/modules/local/concat_h5ad.nf
@@ -1,4 +1,10 @@
 process CONCAT_H5AD {
+
+    //
+    // This module concatenates all h5ad, per type (raw, filtered, etc.) files generated during pipeline execution
+    //
+
+
     tag "${meta.id}"
 
     label 'process_medium'
diff --git a/modules/local/gffread_transcriptome.nf b/modules/local/gffread_transcriptome.nf
index ab573b07..671b6726 100644
--- a/modules/local/gffread_transcriptome.nf
+++ b/modules/local/gffread_transcriptome.nf
@@ -1,4 +1,9 @@
 process GFFREAD_TRANSCRIPTOME {
+
+    //
+    // This module uses gffread to filter input to generate a transcripts fasta
+    //
+
     tag "${genome_fasta}"
     label 'process_low'
 
diff --git a/modules/local/gtf_gene_filter.nf b/modules/local/gtf_gene_filter.nf
index 063bd228..10af352b 100644
--- a/modules/local/gtf_gene_filter.nf
+++ b/modules/local/gtf_gene_filter.nf
@@ -1,4 +1,9 @@
 process GTF_GENE_FILTER {
+
+    //
+    // This module executes a custom script to filter input gtf to contain only annotations present in input genome
+    //
+
     tag "$fasta"
     label 'process_low'
 
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 27e1070a..f54318f1 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -1,4 +1,9 @@
 process MTX_TO_H5AD {
+
+    //
+    // This module executes different conversion template scripts (per aligner) for converting output mtx files into h5ad files
+    //
+
     tag "$meta.id"
     label 'process_medium'
 
diff --git a/modules/local/parse_cellrangermulti_samplesheet.nf b/modules/local/parse_cellrangermulti_samplesheet.nf
index df616995..e8f56b67 100644
--- a/modules/local/parse_cellrangermulti_samplesheet.nf
+++ b/modules/local/parse_cellrangermulti_samplesheet.nf
@@ -1,4 +1,9 @@
 process PARSE_CELLRANGERMULTI_SAMPLESHEET {
+
+    //
+    // This module contains a custom script for checking special cellranger multi samplesheet
+    //
+
     label 'process_low'
     publishDir = [ enabled: false ]
 
diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf
index 8e8bd519..5c362c99 100644
--- a/modules/local/simpleaf_index.nf
+++ b/modules/local/simpleaf_index.nf
@@ -1,4 +1,9 @@
 process SIMPLEAF_INDEX {
+
+    //
+    // This module executes simpleaf to generate alevin genome index
+    //
+
     tag "$transcript_gtf"
     label "process_medium"
 
diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf
index abb58404..9241b210 100644
--- a/modules/local/simpleaf_quant.nf
+++ b/modules/local/simpleaf_quant.nf
@@ -1,4 +1,9 @@
 process SIMPLEAF_QUANT {
+
+    //
+    // This module executes simpleaf to perform quantification with alevin
+    //
+
     tag "$meta.id"
     label 'process_high'
 
diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf
index 4b3df1e1..70d6770c 100644
--- a/modules/local/star_align.nf
+++ b/modules/local/star_align.nf
@@ -1,4 +1,9 @@
 process STAR_ALIGN {
+
+    //
+    // This module executes STAR align quantification
+    //
+
     tag "$meta.id"
     label 'process_high'