nf-core · fmalmeida · Aug 19, 2024 · Aug 19, 2024 · Aug 19, 2024 · Aug 19, 2024
diff --git a/conf/modules.config b/conf/modules.config
@@ -32,25 +32,31 @@ process {
     }
 
     if (!params.skip_emptydrops) {
-        withName: EMPTYDROPS_CELL_CALLING {
+        withName: 'CELLBENDER_REMOVEBACKGROUND' {
             publishDir = [
-                path: { "${params.outdir}/${params.aligner}" },
+                path: { "${params.outdir}/${params.aligner}/${meta.id}/emptydrops_filter" },
+                mode: params.publish_dir_mode,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            ]
+        }
+        withName: 'ADATA_BARCODES' {
+            ext.prefix = { "${meta.id}_${meta.input_type}_matrix" }
+            publishDir = [
+                path: { "${params.outdir}/${params.aligner}/mtx_conversions/${meta.id}" },
                 mode: params.publish_dir_mode,
-                saveAs: { filename ->
-                    if ( params.aligner == 'cellranger' ) "count/${meta.id}/${filename}"
-                    else if ( params.aligner == 'kallisto' ) "${meta.id}.count/${filename}"
-                    else "${meta.id}/${filename}"
-                }
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]
         }
     }
 
-    withName: 'MTX_TO_H5AD|CONCAT_H5AD|MTX_TO_SEURAT' {
+    withName: 'MTX_TO_H5AD|CONCAT_H5AD|ANNDATAR_CONVERT' {
         publishDir = [
             path: { "${params.outdir}/${params.aligner}/mtx_conversions" },
-            mode: params.publish_dir_mode
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
     }
+
     withName: 'GTF_GENE_FILTER' {
         publishDir = [
             path: { "${params.outdir}/gtf_filter" },
@@ -161,8 +167,9 @@ if (params.aligner == "alevin") {
         }
         withName: 'SIMPLEAF_QUANT' {
             publishDir = [
-                path: { "${params.outdir}/${params.aligner}" },
-                mode: params.publish_dir_mode
+                path: { "${params.outdir}/${params.aligner}/${meta.id}" },
+                mode: params.publish_dir_mode,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]
             ext.args = "-r cr-like"
         }

diff --git a/modules.json b/modules.json
@@ -5,6 +5,11 @@
         "https://github.com/nf-core/modules.git": {
             "modules": {
                 "nf-core": {
+                    "cellbender/removebackground": {
+                        "branch": "master",
+                        "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48",
+                        "installed_by": ["modules"]
+                    },
                     "cellranger/count": {
                         "branch": "master",
                         "git_sha": "90dad5491658049282ceb287a3d7732c1ce39837",

diff --git a/modules/local/emptydrops.nf → modules/local/BKP/emptydrops.nf b/modules/local/emptydrops.nf → modules/local/BKP/emptydrops.nf
diff --git a/modules/local/BKP/mtx_to_h5ad.nf b/modules/local/BKP/mtx_to_h5ad.nf
@@ -0,0 +1,139 @@
+process MTX_TO_H5AD {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' :
+        'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }"
+
+    input:
+    // inputs from cellranger nf-core module does not come in a single sample dir
+    // for each sample, the sub-folders and files come directly in array.
+    tuple val(meta), path(inputs)
+    path txp2gene
+    path star_index
+
+    output:
+    tuple val(input_type), path("${meta.id}/*h5ad") , emit: h5ad
+    path  "versions.yml"                            , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    // Get a file to check input type. Some aligners bring arrays instead of a single file.
+    def input_to_check = (inputs instanceof String) ? inputs : inputs[0]
+
+    // check input type of inputs
+    input_type = (input_to_check.toUriString().contains('unfiltered') || input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered'
+    if ( params.aligner == 'alevin' ) { input_type = 'raw' } // alevin has its own filtering methods and mostly output a single mtx, 'raw' here means, the base tool output
+    if (input_to_check.toUriString().contains('emptydrops')) { input_type = 'custom_emptydrops_filter' }
+
+    // def file paths for aligners. Cellranger is normally converted with the .h5 files
+    // However, the emptydrops call, always generate .mtx files, thus, cellranger 'emptydrops' required a parsing
+    if (params.aligner in [ 'cellranger', 'cellrangerarc', 'cellrangermulti' ] && input_type == 'custom_emptydrops_filter') {
+
+        aligner      = 'cellranger'
+        txp2gene     = ''
+        star_index   = ''
+        mtx_matrix   = "emptydrops_filtered/matrix.mtx"
+        barcodes_tsv = "emptydrops_filtered/barcodes.tsv"
+        features_tsv = "emptydrops_filtered/features.tsv"
+
+    } else if (params.aligner == 'kallisto') {
+
+        kb_pattern   = (input_type == 'raw') ? 'un' : ''
+        mtx_dir      = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : "counts_${kb_pattern}filtered"
+        if ((input_type == 'custom_emptydrops_filter') && (params.kb_workflow != 'standard')) { mtx_dir = 'emptydrops_filtered/\${input_type}' } // dir has subdirs for non-standard workflows
+        mtx_matrix   = "${mtx_dir}/*.mtx"
+        barcodes_tsv = "${mtx_dir}/*.barcodes.txt"
+        features_tsv = "${mtx_dir}/*.genes.names.txt"
+
+        // kallisto allows the following workflows: ["standard", "lamanno", "nac"]
+        // lamanno creates "spliced" and "unspliced"
+        // nac creates "nascent", "ambiguous" "mature"
+        // also, lamanno produces a barcodes and genes file for both spliced and unspliced
+        // while nac keep only one for all the different .mtx files produced
+        kb_non_standard_files = ""
+        if (params.kb_workflow == "lamanno") {
+            kb_non_standard_files = "spliced unspliced"
+            matrix       = "${mtx_dir}/\${input_type}.mtx"
+            barcodes_tsv = "${mtx_dir}/\${input_type}.barcodes.txt"
+            features_tsv = "${mtx_dir}/\${input_type}.genes.txt"
+        }
+        if (params.kb_workflow == "nac") {
+            kb_non_standard_files = "nascent ambiguous mature"
+            matrix       = "${mtx_dir}/*\${input_type}.mtx"
+            features_tsv = "${mtx_dir}/*.genes.txt"
+        } // barcodes tsv has same pattern as standard workflow
+
+    } else if (params.aligner == 'alevin') {
+
+        // alevin does not have filtered/unfiltered results
+        mtx_dir      = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : '*_alevin_results/af_quant/alevin'
+        mtx_matrix   = "${mtx_dir}/quants_mat.mtx"
+        barcodes_tsv = "${mtx_dir}/quants_mat_rows.txt"
+        features_tsv = "${mtx_dir}/quants_mat_cols.txt"
+
+    } else if (params.aligner == 'star') {
+
+        mtx_dir      = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : "${input_type}"
+        suffix       = (input_type == 'custom_emptydrops_filter') ? '' : '.gz'
+        mtx_matrix   = "${mtx_dir}/matrix.mtx${suffix}"
+        barcodes_tsv = "${mtx_dir}/barcodes.tsv${suffix}"
+        features_tsv = "${mtx_dir}/features.tsv${suffix}"
+
+    }
+
+    //
+    // run script
+    //
+    if (params.aligner in [ "cellranger", "cellrangerarc", "cellrangermulti"] && input_type != 'custom_emptydrops_filter')
+    """
+    # convert file types
+    mtx_to_h5ad.py \\
+        --aligner cellranger \\
+        --input *${input_type}_feature_bc_matrix.h5 \\
+        --sample ${meta.id} \\
+        --out ${meta.id}/${meta.id}_${input_type}_matrix.h5ad
+    """
+
+    else if (params.aligner == 'kallisto' && params.kb_workflow != 'standard')
+    """
+    # convert file types
+    for input_type in ${kb_non_standard_files} ; do
+        mtx_to_h5ad.py \\
+            --aligner ${params.aligner} \\
+            --sample ${meta.id} \\
+            --input ${matrix} \\
+            --barcode ${barcodes_tsv} \\
+            --feature ${features_tsv} \\
+            --txp2gene ${txp2gene} \\
+            --star_index ${star_index} \\
+            --out ${meta.id}/${meta.id}_\${input_type}_matrix.h5ad ;
+    done
+    """
+
+    else
+    """
+    # convert file types
+    mtx_to_h5ad.py \\
+        --task_process ${task.process} \\
+        --aligner ${params.aligner} \\
+        --sample ${meta.id} \\
+        --input $mtx_matrix \\
+        --barcode $barcodes_tsv \\
+        --feature $features_tsv \\
+        --txp2gene ${txp2gene} \\
+        --star_index ${star_index} \\
+        --out ${meta.id}/${meta.id}_${input_type}_matrix.h5ad
+    """
+
+    stub:
+    """
+    mkdir ${meta.id}
+    touch ${meta.id}/${meta.id}_matrix.h5ad
+    touch versions.yml
+    """
+}
diff --git a/modules/local/mtx_to_seurat.nf → modules/local/BKP/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf → modules/local/BKP/mtx_to_seurat.nf
diff --git a/modules/local/adata_barcodes.nf b/modules/local/adata_barcodes.nf
@@ -0,0 +1,23 @@
+process ADATA_BARCODES {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'oras://community.wave.seqera.io/library/anndata:0.10.7--e9840a94592528c8':
+        'community.wave.seqera.io/library/anndata:0.10.7--336c6c1921a0632b' }"
+
+    input:
+    tuple val(meta), path(h5ad), path(barcodes_csv)
+
+    output:
+    tuple val(meta), path("*.h5ad"), emit: h5ad
+    path "versions.yml"            , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    template 'barcodes.py'
+}
diff --git a/modules/local/anndatar_convert.nf b/modules/local/anndatar_convert.nf
@@ -0,0 +1,24 @@
+process ANNDATAR_CONVERT {
+    tag "${meta.id}"
+
+    label 'process_medium'
+
+    container "docker://fmalmeida/anndatar:dev" // TODO: Fix
+
+    input:
+    tuple val(meta), path(h5ad)
+
+    output:
+    tuple val(meta), path("${meta.id}/${meta.id}_${meta.input_type}_matrix.Rds"), emit: rds
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    template 'anndatar_convert.R'
+
+    stub:
+    """
+    touch ${meta.id}.Rds
+    """
+}
diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf
@@ -1,13 +1,13 @@
 process CONCAT_H5AD {
+    tag "${meta.id}"
+
     label 'process_medium'
 
-    conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' :
-        'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }"
+    conda "conda-forge::scanpy==1.10.2 conda-forge::python-igraph conda-forge::leidenalg"
+    container "community.wave.seqera.io/library/scanpy:1.10.2--e83da2205b92a538"
 
     input:
-    tuple val(input_type), path(h5ad)
+    tuple val(meta), path(h5ad)
     path samplesheet
 
     output:
@@ -17,12 +17,7 @@ process CONCAT_H5AD {
     task.ext.when == null || task.ext.when
 
     script:
-    """
-    concat_h5ad.py \\
-        --input $samplesheet \\
-        --out combined_${input_type}_matrix.h5ad \\
-        --suffix "_matrix.h5ad"
-    """
+    template 'concat_h5ad.py'
 
     stub:
     """