From 2462a9040de785f254893f96453ae12ebeb36619 Mon Sep 17 00:00:00 2001
From: Ksenia Krasheninnikova <kk16@sanger.ac.uk>
Date: Thu, 15 Aug 2024 16:27:38 +0100
Subject: [PATCH] Merge in from dev

---
 .github/workflows/ci.yml                      |   6 +-
 assets/test.yaml                              |  13 ++-
 assets/test_github.yaml                       |  19 ----
 assets/test_gsMetZobe1.yaml                   |   5 +-
 bin/generate_cram_csv.sh                      |   7 +-
 conf/modules.config                           |  35 +-----
 conf/test.config                              |   7 +-
 conf/test_full.config                         |   1 -
 conf/test_github.config                       |  28 -----
 docs/usage.md                                 |   9 +-
 modules/local/generate_cram_csv.nf            |   4 +-
 modules/local/longranger/align/main.nf        |   4 +-
 .../batch_summary.txt                         |   2 -
 .../logs/busco.log                            | 107 ------------------
 modules/nf-core/busco/main.nf_                |  83 --------------
 nextflow.config                               |   1 -
 subworkflows/local/organelles.nf              |  12 +-
 subworkflows/local/prepare_input.nf           |   6 +-
 workflows/genomeassembly.nf                   |   2 +-
 19 files changed, 38 insertions(+), 313 deletions(-)
 delete mode 100644 assets/test_github.yaml
 delete mode 100644 conf/test_github.config
 delete mode 100644 modules/nf-core/busco/iyVesGerm1-insecta_odb10-busco/batch_summary.txt
 delete mode 100644 modules/nf-core/busco/iyVesGerm1-insecta_odb10-busco/logs/busco.log
 delete mode 100644 modules/nf-core/busco/main.nf_

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 845d8539..ea139834 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -48,10 +48,6 @@ jobs:
         run: |
           nextflow secrets set NCBI_API_KEY ${{ secrets.NCBI_API_KEY }}
 
-      - name: Download test data
-        run: |
-          curl https://tolit.cog.sanger.ac.uk/test-data/resources/genomeassembly/genomeassembly_test_data.tar.gz | tar xzf -
-
       - name: Setup apptainer
         uses: eWaterCycle/setup-apptainer@main
 
@@ -62,4 +58,4 @@ jobs:
 
       - name: Run pipeline with test data
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test_github,singularity --outdir ./results
+          nextflow run ${GITHUB_WORKSPACE} -profile test,singularity --outdir ./results
diff --git a/assets/test.yaml b/assets/test.yaml
index aff832fd..423fdc22 100644
--- a/assets/test.yaml
+++ b/assets/test.yaml
@@ -1,13 +1,16 @@
 dataset:
   id: baUndUnlc1
   illumina_10X:
-    reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/
+    reads:
+      - https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/baUndUnlc1_S12_L002_R1_001.fastq.gz
+      - https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/baUndUnlc1_S12_L002_R2_001.fastq.gz
+      - https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/baUndUnlc1_S12_L002_I1_001.fastq.gz
   pacbio:
     reads:
-      - reads: /lustre/scratch124/tol/projects/darwin/users/kk16/development/test/test/HiFi.reads.BIG.fasta
+      - reads: https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/pacbio/fasta/HiFi.reads.fasta
   HiC:
     reads:
-      - reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2#7.sub.cram
+      - reads: https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2%237.sub.cram
 hic_motif: GATC,GANTC,CTNAG,TTAA
 hic_aligner: bwamem2
 busco:
@@ -16,6 +19,6 @@ mito:
   species: Caradrina clavipalpis
   min_length: 15000
   code: 5
-  fam: /lustre/scratch124/tol/projects/darwin/users/cz3/organelle_asm/hmm_db/insecta_mito.fam
+  fam: https://github.com/c-zhou/OatkDB/raw/main/v20230921/insecta_mito.fam
 plastid:
-  fam: /lustre/scratch124/tol/projects/darwin/users/cz3/organelle_asm/hmm_db/acrogymnospermae_pltd.fam
+  fam: https://github.com/c-zhou/OatkDB/raw/main/v20230921/acrogymnospermae_pltd.fam
diff --git a/assets/test_github.yaml b/assets/test_github.yaml
deleted file mode 100644
index 3bf048f3..00000000
--- a/assets/test_github.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-dataset:
-  id: baUndUnlc1
-  illumina_10X:
-    reads: /home/runner/work/genomeassembly/genomeassembly/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/
-  pacbio:
-    reads:
-      - reads: /home/runner/work/genomeassembly/genomeassembly/Undibacterium_unclassified/genomic_data/baUndUnlc1/pacbio/fasta/HiFi.reads.fasta
-  HiC:
-    reads:
-      - reads: /home/runner/work/genomeassembly/genomeassembly/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2#7.sub.cram
-hic_motif: GATC,GANTC,CTNAG,TTAA
-hic_aligner: minimap2
-busco:
-  lineage: bacteria_odb10
-mito:
-  species: Caradrina clavipalpis
-  min_length: 15000
-  code: 5
-  fam: /home/runner/work/genomeassembly/genomeassembly/Undibacterium_unclassified/hmm_db/insecta_mito.fam
diff --git a/assets/test_gsMetZobe1.yaml b/assets/test_gsMetZobe1.yaml
index c115395f..56f7a484 100644
--- a/assets/test_gsMetZobe1.yaml
+++ b/assets/test_gsMetZobe1.yaml
@@ -1,7 +1,10 @@
 dataset:
   id: gsMetZobe1
   illumina_10X:
-    reads: /lustre/scratch123/tol/resources/genomeassembly/testdata/gsMetZobe1/10x/
+    reads:
+      - /lustre/scratch123/tol/resources/genomeassembly/testdata/gsMetZobe1/10x/gsMetZobe1_S6_L008_R1_001.fastq.gz
+      - /lustre/scratch123/tol/resources/genomeassembly/testdata/gsMetZobe1/10x/gsMetZobe1_S6_L008_R2_001.fastq.gz
+      - /lustre/scratch123/tol/resources/genomeassembly/testdata/gsMetZobe1/10x/gsMetZobe1_S6_L008_I1_001.fastq.gz
   pacbio:
     reads:
       - reads: /lustre/scratch123/tol/resources/genomeassembly/testdata/gsMetZobe1/pacbio/m64125_200823_145825.ccs.bc1019_BAK8B_OA--bc1019_BAK8B_OA.filtered.fasta.gz
diff --git a/bin/generate_cram_csv.sh b/bin/generate_cram_csv.sh
index 81eaad34..15e51694 100755
--- a/bin/generate_cram_csv.sh
+++ b/bin/generate_cram_csv.sh
@@ -12,8 +12,9 @@ for cram in "$@"; do
     rgline=$(samtools view -H $cram|grep "RG"|sed 's/\t/\\t/g'|sed "s/'//g")
 
     crampath=$(readlink -f ${cram})
+    craipath=$(readlink -f ${cram}.crai)
 
-    ncontainers=$(zcat ${crampath}.crai|wc -l)
+    ncontainers=$(zcat ${craipath} | wc -l)
     base=$(basename $cram .cram)
 
     from=0
@@ -22,7 +23,7 @@ for cram in "$@"; do
 
     while [ $to -lt $ncontainers ]
     do
-        echo $crampath,${crampath}.crai,${from},${to},${base},${chunkn},${rgline}
+        echo $crampath,${craipath},${from},${to},${base},${chunkn},${rgline}
         from=$((to+1))
         ((to+=10000))
         ((chunkn++))
@@ -30,7 +31,7 @@ for cram in "$@"; do
 
     if [ $from -le $ncontainers ]
     then
-        echo $crampath,${crampath}.crai,${from},${ncontainers},${base},${chunkn},${rgline}
+        echo $crampath,${craipath},${from},${ncontainers},${base},${chunkn},${rgline}
         ((chunkn++))
     fi
 done
diff --git a/conf/modules.config b/conf/modules.config
index 2c30e6a3..21cb4af8 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -372,6 +372,9 @@ process {
     // Set up of the polishing pipeline
     if (params.polishing_on) {
         withName: LONGRANGER_MKREF {
+            if(System.getenv('GITHUB_ACTION') != null ) {
+                container = "ghcr.io/sanger-tol/longranger:2.2.2-c4"
+            }
             publishDir = [
                 path: { "${params.outdir}/${meta.id}.${params.hifiasm}/polishing" },
                 mode: params.publish_dir_mode,
@@ -382,6 +385,9 @@ process {
         withName: LONGRANGER_ALIGN {
             // Keep in sync with `longranger_lsf_sanger.config`
             ext.args = "--disable-ui --nopreflight"
+            if(System.getenv('GITHUB_ACTION') != null ) {
+                container = "ghcr.io/sanger-tol/longranger:2.2.2-c4"
+            }
             publishDir = [
                 path: { "${params.outdir}/${meta.id}.${params.hifiasm}/polishing" },
                 mode: params.publish_dir_mode,
@@ -843,33 +849,4 @@ profiles {
             }
         }
     }
-
-    test_github {
-        process {
-            // Set up of the scaffolding pipeline
-            withName: 'YAHS' {
-                // Skip the initial assembly error correction step
-                ext.args = '-r 1000,2000,5000'
-            }
-
-            withName: '.*HIFIASM.*' {
-                // Skip bloom filter
-                ext.args = '--primary -f0'
-            }
-
-            withName: '.*OATK' {
-                // Set kmer size and minimal coverage
-                ext.args = "-k1001 -c5 -Ttmp"
-            }
-
-            if (params.polishing_on) {
-                withName: LONGRANGER_MKREF {
-                    container = "ghcr.io/sanger-tol/longranger:2.2.2-c4"
-                }
-                withName: LONGRANGER_ALIGN {
-                    container = "ghcr.io/sanger-tol/longranger:2.2.2-c4"
-                }
-            }
-        }
-    }
 }
diff --git a/conf/test.config b/conf/test.config
index 03941e71..84874df3 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -14,15 +14,14 @@ params {
     config_profile_name        = 'Test profile'
     config_profile_description = 'Minimal test dataset to check pipeline function'
 
-    // Limit resources so that this can run on GitHub Actions
-    max_cpus   = 2
-    max_memory = '6.GB'
+    // Match resource limits with the ubuntu2204-4c runner
+    max_cpus   = 4
+    max_memory = '15.GB'
     max_time   = '6.h'
 
     // Input data
     input = "${projectDir}/assets/test.yaml"
     bed_chunks_polishing = 2
     organelles_on = true
-    polishing_on = false
     hifiasm_hic_on = true
 }
diff --git a/conf/test_full.config b/conf/test_full.config
index e83aac0d..eb2551fb 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -22,6 +22,5 @@ params {
 
     // Input data for full size test
     input = "${projectDir}/assets/test_gsMetZobe1.yaml"
-    polishing_on = true
     hifiasm_hic_on = true
 }
diff --git a/conf/test_github.config b/conf/test_github.config
deleted file mode 100644
index 8086af2d..00000000
--- a/conf/test_github.config
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Nextflow config file for running minimal tests
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Defines input files and everything required to run a fast and simple pipeline test.
-
-    Use as follows:
-        nextflow run sanger-tol/genomeassembly -profile test,<docker/singularity> --outdir <OUTDIR>
-
-----------------------------------------------------------------------------------------
-*/
-
-params {
-    config_profile_name        = 'Github test profile'
-    config_profile_description = 'Minimal test dataset to check pipeline function'
-
-    max_cpus   = 4
-    max_memory = '15.GB'
-    max_time   = '6.h'
-    // Limit resources so that this can run on GitHub Actions
-
-    // Input data
-    input = "${projectDir}/assets/test_github.yaml"
-    bed_chunks_polishing = 2
-    polishing_on = false
-    hifiasm_hic_on = true
-    organelles_on = true
-}
diff --git a/docs/usage.md b/docs/usage.md
index 0bc8e117..660ebf5a 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -86,17 +86,12 @@ mito:
 The pipeline can be tested locally using a provided small test dataset:
 
 ```
-cd ${GENOMEASSEMBLY_TEST_DATA}
-curl https://darwin.cog.sanger.ac.uk/genomeassembly_test_data.tar.gz | tar xzf -
-
 git clone git@github.com:sanger-tol/genomeassembly.git
 cd genomeassembly/
-sed -i "s|/home/runner/work/genomeassembly/genomeassembly|${GENOMEASSEMBLY_TEST_DATA}|" assets/test_github.yaml
-nextflow run main.nf -profile test_github,singularity --outdir ${OUTDIR} {OTHER ARGUMENTS}
+nextflow run main.nf -profile test,singularity --outdir ${OUTDIR} {OTHER ARGUMENTS}
 ```
 
-These command line steps will download and decompress the test data first, then download the pipeline and modify YAML so that it matches dataset location in your file system.
-The last command line runs the test.
+These command line steps will download the pipeline and run the test.
 
 You should now be able to run the pipeline as you see fit.
 
diff --git a/modules/local/generate_cram_csv.nf b/modules/local/generate_cram_csv.nf
index 860dfe65..85d78516 100644
--- a/modules/local/generate_cram_csv.nf
+++ b/modules/local/generate_cram_csv.nf
@@ -13,7 +13,7 @@ process GENERATE_CRAM_CSV {
         'biocontainers/samtools:1.17--h00cdaf9_0' }"
 
     input:
-    tuple val(meta), path(crampaths, stageAs: "?/*")
+    tuple val(meta), path(crampaths, stageAs: "?/*"), path(craipaths, stageAs: "?/*")
 
 
     output:
@@ -23,7 +23,7 @@ process GENERATE_CRAM_CSV {
     script:
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    generate_cram_csv.sh $crampaths >> ${prefix}_cram.csv
+    generate_cram_csv.sh $crampaths > ${prefix}_cram.csv
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/local/longranger/align/main.nf b/modules/local/longranger/align/main.nf
index 728cd9af..0f3009a7 100644
--- a/modules/local/longranger/align/main.nf
+++ b/modules/local/longranger/align/main.nf
@@ -11,7 +11,7 @@ process LONGRANGER_ALIGN {
 
     input:
     tuple val(meta), path(reference)
-    path(fastqs)
+    path(fastqs, stageAs: "10X_inputs/*")
 
     output:
     tuple val(meta), path("${meta.id}/outs/possorted_bam.bam"), emit: bam
@@ -26,7 +26,7 @@ process LONGRANGER_ALIGN {
     def args = task.ext.args ?: ''
     def sample = "${meta.id}"
     """
-    longranger align --id=$sample --fastqs=$fastqs \
+    longranger align --id=$sample --fastqs=10X_inputs \
         --sample=$sample --reference=$reference \
         ${args}
 
diff --git a/modules/nf-core/busco/iyVesGerm1-insecta_odb10-busco/batch_summary.txt b/modules/nf-core/busco/iyVesGerm1-insecta_odb10-busco/batch_summary.txt
deleted file mode 100644
index 226d64fd..00000000
--- a/modules/nf-core/busco/iyVesGerm1-insecta_odb10-busco/batch_summary.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-Input_file	Dataset	Complete	Single	Duplicated	Fragmented	Missing	n_markers	Scaffold N50	Contigs N50	Percent gaps	Number of scaffolds
-iyVesGerm1_scaffolds_final.fa	Run failed; check logs													
diff --git a/modules/nf-core/busco/iyVesGerm1-insecta_odb10-busco/logs/busco.log b/modules/nf-core/busco/iyVesGerm1-insecta_odb10-busco/logs/busco.log
deleted file mode 100644
index 583cbbff..00000000
--- a/modules/nf-core/busco/iyVesGerm1-insecta_odb10-busco/logs/busco.log
+++ /dev/null
@@ -1,107 +0,0 @@
-2023-03-31 12:35:30 DEBUG:busco.run_BUSCO	Command line: /usr/local/bin/busco --cpu 2 --in input_seqs --out iyVesGerm1-insecta_odb10-busco --out_path /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/dev/genomeassembly/./workflows/../subworkflows/local/../../modules/nf-core/busco --lineage_dataset /lustre/scratch123/tol/resources/busco/v5/lineages/insecta_odb10 --mode genome
-2023-03-31 12:35:30 INFO:busco.run_BUSCO	***** Start a BUSCO v5.4.3 analysis, current time: 03/31/2023 12:35:30 *****
-2023-03-31 12:35:30 DEBUG:busco.ConfigManager	Getting config file
-2023-03-31 12:35:30 INFO:busco.ConfigManager	Configuring BUSCO with local environment
-2023-03-31 12:35:30 INFO:busco.BuscoConfig	Mode is genome
-2023-03-31 12:35:30 INFO:busco.BuscoDownloadManager	Downloading information on latest versions of BUSCO data...
-2023-03-31 12:35:32 DEBUG:busco.BuscoConfig	State of BUSCO config before run:
-2023-03-31 12:35:32 DEBUG:busco.BuscoConfig	{'_allow_no_value': False,
- '_comment_prefixes': ('#', ';'),
- '_converters': <configparser.ConverterMapping object at 0x7fb477f15910>,
- '_defaults': {},
- '_delimiters': ('=', ':'),
- '_dict': <class 'dict'>,
- '_empty_lines_in_values': True,
- '_inline_comment_prefixes': (),
- '_input_filepath': '/lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/dev/genomeassembly/work/06/5d8ceb4658862a248b20dcc4c3b27a/input_seqs',
- '_interpolation': <configparser.BasicInterpolation object at 0x7fb5002eda60>,
- '_mode': 'genome',
- '_optcre': re.compile('\n        (?P<option>.*?)                    # very permissive!\n        \\s*(?P<vi>=|:)\\s*              # any number of space/tab,\n                                           # followed by any of t, re.VERBOSE),
- '_proxies': {'DEFAULT': <Section: DEFAULT>,
-              'augustus': <Section: augustus>,
-              'bbtools': <Section: bbtools>,
-              'busco_run': <Section: busco_run>,
-              'etraining': <Section: etraining>,
-              'gff2gbSmallDNA.pl': <Section: gff2gbSmallDNA.pl>,
-              'hmmsearch': <Section: hmmsearch>,
-              'makeblastdb': <Section: makeblastdb>,
-              'metaeuk': <Section: metaeuk>,
-              'new_species.pl': <Section: new_species.pl>,
-              'optimize_augustus.pl': <Section: optimize_augustus.pl>,
-              'prodigal': <Section: prodigal>,
-              'sepp': <Section: sepp>,
-              'tblastn': <Section: tblastn>},
- '_sections': {'augustus': {'command': '', 'path': ''},
-               'bbtools': {'command': '', 'path': ''},
-               'busco_run': {'auto-lineage': 'False',
-                             'auto-lineage-euk': 'False',
-                             'auto-lineage-prok': 'False',
-                             'batch_mode': 'True',
-                             'cpu': '2',
-                             'datasets_version': 'odb10',
-                             'download_base_url': 'https://busco-data.ezlab.org/v5/data/',
-                             'download_path': '/lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/dev/genomeassembly/work/06/5d8ceb4658862a248b20dcc4c3b27a/busco_downloads',
-                             'force': 'False',
-                             'in': '/lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/dev/genomeassembly/work/06/5d8ceb4658862a248b20dcc4c3b27a/input_seqs',
-                             'lineage_dataset': '/lustre/scratch123/tol/resources/busco/v5/lineages/insecta_odb10',
-                             'main_out': '/lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/dev/genomeassembly/modules/nf-core/busco/iyVesGerm1-insecta_odb10-busco',
-                             'mode': 'genome',
-                             'offline': 'False',
-                             'out': 'iyVesGerm1-insecta_odb10-busco',
-                             'out_path': '/lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/dev/genomeassembly/modules/nf-core/busco',
-                             'quiet': 'False',
-                             'restart': 'False',
-                             'tar': 'False',
-                             'update-data': 'False',
-                             'use_augustus': 'False'},
-               'etraining': {'command': '', 'path': ''},
-               'gff2gbSmallDNA.pl': {'command': '', 'path': ''},
-               'hmmsearch': {'command': '', 'path': ''},
-               'makeblastdb': {'command': '', 'path': ''},
-               'metaeuk': {'command': '', 'path': ''},
-               'new_species.pl': {'command': '', 'path': ''},
-               'optimize_augustus.pl': {'command': '', 'path': ''},
-               'prodigal': {'command': '', 'path': ''},
-               'sepp': {'command': '', 'path': ''},
-               'tblastn': {'command': '', 'path': ''}},
- '_strict': True,
- 'conf_file': 'local environment',
- 'default_section': 'DEFAULT',
- 'downloader': <busco.BuscoDownloadManager.BuscoDownloadManager object at 0x7fb477f15520>,
- 'main_out': '/lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/dev/genomeassembly/modules/nf-core/busco/iyVesGerm1-insecta_odb10-busco',
- 'params': {'augustus_parameters': None,
-            'augustus_species': None,
-            'auto-lineage': False,
-            'auto-lineage-euk': False,
-            'auto-lineage-prok': False,
-            'config_file': None,
-            'contig_break': None,
-            'cpu': 2,
-            'datasets_version': None,
-            'download': '==SUPPRESS==',
-            'download_base_url': None,
-            'download_path': None,
-            'evalue': None,
-            'force': False,
-            'help': '==SUPPRESS==',
-            'in': 'input_seqs',
-            'limit': None,
-            'lineage_dataset': '/lustre/scratch123/tol/resources/busco/v5/lineages/insecta_odb10',
-            'list_datasets': '==SUPPRESS==',
-            'long': False,
-            'metaeuk_parameters': None,
-            'metaeuk_rerun_parameters': None,
-            'mode': 'genome',
-            'offline': False,
-            'out': 'iyVesGerm1-insecta_odb10-busco',
-            'out_path': '/lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/dev/genomeassembly/./workflows/../subworkflows/local/../../modules/nf-core/busco',
-            'quiet': False,
-            'restart': False,
-            'scaffold_composition': False,
-            'tar': False,
-            'update-data': False,
-            'use_augustus': False,
-            'version': '==SUPPRESS=='}}
-2023-03-31 12:35:32 INFO:busco.BuscoRunner	Running in batch mode. 1 input files found in /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/dev/genomeassembly/work/06/5d8ceb4658862a248b20dcc4c3b27a/input_seqs
-2023-03-31 12:35:32 INFO:busco.BuscoRunner	Input file is /lustre/scratch124/tol/projects/darwin/users/kk16/development/nextflow/dev/genomeassembly/work/06/5d8ceb4658862a248b20dcc4c3b27a/input_seqs/iyVesGerm1_scaffolds_final.fa
-2023-03-31 12:35:32 ERROR:busco.BuscoRunner	/lustre/scratch123/tol/resources/busco/v5/lineages/insecta_odb10 does not exist
diff --git a/modules/nf-core/busco/main.nf_ b/modules/nf-core/busco/main.nf_
deleted file mode 100644
index 23b75b25..00000000
--- a/modules/nf-core/busco/main.nf_
+++ /dev/null
@@ -1,83 +0,0 @@
-process BUSCO {
-    tag "$meta.id"
-    label 'process_medium'
-
-    conda "bioconda::busco=5.4.3"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/busco:5.4.3--pyhdfd78af_0':
-        'quay.io/biocontainers/busco:5.4.3--pyhdfd78af_0' }"
-
-    input:
-    tuple val(meta), path('tmp_input/*')
-    val lineage                           // Required:    lineage to check against, "auto" enables --auto-lineage instead
-    val busco_lineages_path              // Recommended: path to busco lineages - downloads if not set
-    path config_file                      // Optional:    busco configuration file
-
-    output:
-    tuple val(meta), path("*-busco.batch_summary.txt"), emit: batch_summary
-    tuple val(meta), path("short_summary.*.txt")      , emit: short_summaries_txt, optional: true
-    tuple val(meta), path("short_summary.*.json")     , emit: short_summaries_json, optional: true
-    tuple val(meta), path("*-busco")                  , emit: busco_dir
-    path "versions.yml"                               , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}-${lineage}"
-    def busco_config = config_file ? "--config $config_file" : ''
-    def busco_lineage = lineage.equals('auto') ? '--auto-lineage' : "--offline --lineage_dataset ${busco_lineages_path}/${lineage}"
-    """
-    # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute)
-    # Check for container variable initialisation script and source it.
-    if [ -f "/usr/local/env-activate.sh" ]; then
-        set +u  # Otherwise, errors out because of various unbound variables
-        . "/usr/local/env-activate.sh"
-        set -u
-    fi
-
-    # If the augustus config directory is not writable, then copy to writeable area
-    if [ ! -w "\${AUGUSTUS_CONFIG_PATH}" ]; then
-        # Create writable tmp directory for augustus
-        AUG_CONF_DIR=\$( mktemp -d -p \$PWD )
-        cp -r \$AUGUSTUS_CONFIG_PATH/* \$AUG_CONF_DIR
-        export AUGUSTUS_CONFIG_PATH=\$AUG_CONF_DIR
-        echo "New AUGUSTUS_CONFIG_PATH=\${AUGUSTUS_CONFIG_PATH}"
-    fi
-
-    # Ensure the input is uncompressed
-    INPUT_SEQS=input_seqs
-    mkdir "\$INPUT_SEQS"
-    cd "\$INPUT_SEQS"
-    for FASTA in ../tmp_input/*; do
-        if [ "\${FASTA##*.}" == 'gz' ]; then
-            gzip -cdf "\$FASTA" > \$( basename "\$FASTA" .gz )
-        else
-            ln -s "\$FASTA" .
-        fi
-    done
-    cd ..
-
-    busco \\
-        --cpu $task.cpus \\
-        --in "\$INPUT_SEQS" \\
-        --out ${prefix}-busco \\
-        --out_path ./ \\
-        $busco_lineage \\
-        $busco_config \\
-        $args
-
-    # clean up
-    rm -rf "\$INPUT_SEQS"
-
-    # Move files to avoid staging/publishing issues
-    mv ${prefix}-busco/batch_summary.txt ${prefix}-busco.batch_summary.txt
-    mv ${prefix}-busco/*/short_summary.*.{json,txt} . || echo "Short summaries were not available: No genes were found."
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' )
-    END_VERSIONS
-    """
-}
diff --git a/nextflow.config b/nextflow.config
index 3595f319..46455dc4 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -173,7 +173,6 @@ profiles {
         executor.memory        = 60.GB
     }
     test          { includeConfig 'conf/test.config'      }
-    test_github   { includeConfig 'conf/test_github.config'  }
     test_full     { includeConfig 'conf/test_full.config' }
 }
 
diff --git a/subworkflows/local/organelles.nf b/subworkflows/local/organelles.nf
index 4b5eb96e..cfc90e0b 100644
--- a/subworkflows/local/organelles.nf
+++ b/subworkflows/local/organelles.nf
@@ -50,17 +50,9 @@ workflow ORGANELLES {
     //
     // LOGIC: PREPARE OATK INPUT
     //
-    mito_info.map{ species, min_length, code, email, fam -> [ file(fam.toString(), checkIfExists: true), 
-                                                            file(fam.toString()+'.h3f', checkIfExists: true), 
-                                                            file(fam.toString()+'.h3i', checkIfExists: true), 
-                                                            file(fam.toString()+'.h3m', checkIfExists: true), 
-                                                            file(fam.toString()+'.h3p', checkIfExists: true) ]}
+    mito_info.map{ species, min_length, code, email, fam -> [ fam ] + ['h3f', 'h3i', 'h3m', 'h3p'].collect {fam.resolveSibling(fam.name + '.' + it)} }
                                                             .set { mito_hmm_input }
-    plastid_info.map{ fam -> fam ? [ file(fam.toString(), checkIfExists: true), 
-                               file(fam.toString()+'.h3f', checkIfExists: true), 
-                               file(fam.toString()+'.h3i', checkIfExists: true), 
-                               file(fam.toString()+'.h3m', checkIfExists: true), 
-                               file(fam.toString()+'.h3p', checkIfExists: true) ] : [[],[],[],[],[]]}
+    plastid_info.map{ fam -> fam ? ([ fam ] + ['h3f', 'h3i', 'h3m', 'h3p'].collect {fam.resolveSibling(fam.name + '.' + it)} ) : [[],[],[],[],[]] }
                                .set { plastid_hmm_input }
     //
     // MODULE: RUN OATK TO IDENTIFY MITO
diff --git a/subworkflows/local/prepare_input.nf b/subworkflows/local/prepare_input.nf
index 6d04e697..63d36902 100644
--- a/subworkflows/local/prepare_input.nf
+++ b/subworkflows/local/prepare_input.nf
@@ -26,8 +26,8 @@ workflow PREPARE_INPUT {
     ymlfile.multiMap{ data -> 
         dataset : (data.dataset ? data.dataset : []) 
         busco : (data.busco ? data.busco : [])
-        mito: ( data.mito ? ['\"'+data.mito.species+'\"', data.mito.min_length, data.mito.code, data.mito.email ? data.mito.email : "\"\"", data.mito.fam ? data.mito.fam : "\"\"" ] : [])
-        plastid : ( data.plastid ? ( data.plastid.fam ? data.plastid.fam : "\"\"" ) : [])
+        mito: ( data.mito ? ['\"'+data.mito.species+'\"', data.mito.min_length, data.mito.code, data.mito.email ? data.mito.email : "\"\"", data.mito.fam ? file(data.mito.fam, checkIfExists: true) : [] ] : [])
+        plastid : ( data.plastid ? ( data.plastid.fam ? file(data.plastid.fam, checkIfExists: true) : [] ) : [])
         hic_motif : (data.hic_motif ? data.hic_motif : [])
         hic_aligner : (data.hic_aligner ? data.hic_aligner :[])
     }
@@ -40,7 +40,7 @@ workflow PREPARE_INPUT {
             .multiMap { data -> 
             id_ch : (data.id ? [id: data.id] : [])
             illumina_10X_ch : ( data.illumina_10X ? [ [id: data.id ], 
-                                                       file(data.illumina_10X.reads, checkIfExists: true),
+                                                       data.illumina_10X.reads.collect { file(it, checkIfExists: true) },
                                                        data.illumina_10X.kmer_pref ? data.illumina_10X.kmer_pref : [] ] 
                                 : [] )
             pacbio_ch: ( data.pacbio ? [ [id: data.id ], 
diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf
index 7f900af0..2e5e3899 100644
--- a/workflows/genomeassembly.nf
+++ b/workflows/genomeassembly.nf
@@ -258,7 +258,7 @@ workflow GENOMEASSEMBLY {
         //
         // LOGIC: REFACTOR ILLUMINA CHANNEL TO PASS IT INTO THE POLISHING SUBWORKFLOW
         //
-        PREPARE_INPUT.out.illumina_10X.map{ meta, reads, kmers -> [reads] }
+        PREPARE_INPUT.out.illumina_10X.map{ meta, reads, kmers -> reads }
                         .set{ illumina_10X_ch }
         
         //