From 9aa718678abe815e914c363f2eb343006f588c15 Mon Sep 17 00:00:00 2001
From: Samuel Lampa <samuel.lampa@scilifelab.se>
Date: Fri, 25 Oct 2024 15:18:35 +0200
Subject: [PATCH 1/4] Add GitHub Actions CI config

Adapted from JASEN, keeping the relevant stuff and adapting a bit to
this pipeline.
---
 .github/workflows/ci.yml | 77 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 .github/workflows/ci.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..483e1bd
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,77 @@
+name: CI
+# This workflow runs the pipeline with the minimal test dataset to check that
+# it completes without any syntax errors
+on:
+  push:
+    branches:
+      - main
+      - dev
+  pull_request:
+    branches:
+      - main
+      - dev
+    draft: true
+
+env:
+  NXF_ANSI_LOG: false
+
+concurrency:
+  group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
+  cancel-in-progress: true
+
+jobs:
+  test:
+    name: Install and run self-test pipeline
+    # Only run on push if this is in the main repository
+    if: "${{ github.repository == 'genomic-medicine-sweden/gms_16s' }}"
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        NXF_VER:
+          - "23.10.1"
+    steps:
+      - name: Install Nextflow
+        # For running (locally) with ACT, we use a docker image with Nextflow pre-installed
+        if: "${{ ! github.event.act }}"
+        uses: nf-core/setup-nextflow@v1
+        with:
+          version: "${{ matrix.NXF_VER }}"
+
+      - name: Check out pipeline code
+        uses: actions/checkout@v3
+
+      - name: Install Singularity
+        # For running (locally) with ACT, we use a docker image with Singularity already installed
+        if: "${{ ! github.event.act }}"
+        run: |
+          wget https://github.com/apptainer/singularity/releases/download/v3.8.7/singularity-container_3.8.7_amd64.deb && sudo dpkg -i singularity-container_3.8.7_amd64.deb
+
+      - name: Report disk-usage before make install
+        run:
+          df -h
+
+      - name: Run Make install
+        run:
+          make install
+
+      - name: Report disk-usage after make install
+        run:
+          df -h
+
+      - name: Adapt paths in samplelist.csv
+        # We have to use '#' as a separator, as there are slashes in the $PWD
+        # path, messing up with the s///-replace syntax
+        run: |
+          sed -i "s#PATH_TO_GMS16S#$PWD#g" assets/test_data/samplelist.csv
+
+      - name: Run pipeline with test data
+        run: |
+          nextflow run main.nf \
+            --input assets/examples/samplesheet_medium.csv \
+            --outdir results \
+            --db assets/databases/emu_database \
+            --seqtype map-ont \
+             -profile singularity,test \
+            --quality_filtering \
+            --longread_qc_qualityfilter_minlength 1200 \
+            --longread_qc_qualityfilter_maxlength 1800

From 5a7d52f1ef5988c86fb978fad8f2dd8248dcd7e8 Mon Sep 17 00:00:00 2001
From: Samuel Lampa <samuel.lampa@scilifelab.se>
Date: Mon, 28 Oct 2024 14:16:43 +0100
Subject: [PATCH 2/4] Add Makefile

---
 Makefile | 158 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 Makefile

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..e812de6
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,158 @@
+# ==============================================================================
+# Brief explanation of Makefile syntax
+# ==============================================================================
+# Since Makefiles are a bit special, and there are not a lot of easy to read
+# tutorials out there, here follows a very brief introduction to the syntax in
+# Makefiles.
+#
+# Basics
+# ------------------------------------------------------------------------------
+#
+# Makefiles are in many ways similar to bash-scripts, but unlike bash-scripts
+# they are not written in a linear sequential fashion, but rather divided into
+# so called rules, that are typically tightly connected to specific output file
+# paths or file path pattern.
+#
+# Firstly, Makefiles should be named `Makefile` and be put inside a folder
+# where one wants to run the make command.
+#
+# The rule syntax
+# ------------------------------------------------------------------------------
+#
+# The syntax of a rule is at its core very simple:
+#
+# output file(s)/target(s) : any dependent input files
+#     commands to produce the output files from input files
+#     possibly more commands
+#
+# So, the significant part of the rule syntax is the ':'-character, as well as
+# the indentation of the rows following the head of the rule, to indicate the
+# "recipe" or the commands to produce the outputs of the rule.
+#
+# A rule can also have just a name, and no concrete output file. That is, it
+# would have the form:
+#
+# name_of_the_rule : any dependent input files
+#     one commands
+#     more commands
+#
+# Now, there is one big caveat here, related to our scripts: Make will rebuild
+# a target as soon as any of its inputs are updated, or have a newer timestamp
+# than the target. This is typically not desired for us, since we might have
+# files unpacked with zip with all kinds of different dates, and for a one-run
+# installation, we are mostly interested in wheter an output file already
+# exists or not, not so much about timestamps.
+#
+# To change so that Make only cares about whether files exist, and not timestamps,
+# one can add a | character before those input files, like so:
+#
+# name_of_the_rule : input files where timestamp matters | input files where only existence matters
+#     one commands
+#     more commands
+#
+# Of course, one can have everything on the right side of the | character, so:
+#
+# name_of_the_rule : | input files where only existence matters
+#     one commands
+#     more commands
+#
+# Running Makefiles
+# ------------------------------------------------------------------------------
+#
+# To run a named rule, you would then do:
+#
+# $ make name_of_the_rule
+#
+# For normal rules, you would hit:
+#
+# $ make <outputfile>
+#
+# Tip: Type "make" and hit tab twice in the shell, to show available targets to
+# run in the current makefile.
+#
+# Special variables and more
+# ------------------------------------------------------------------------------
+#
+# Inside the commands of the rules, one can write pretty much bash, especially
+# if setting `SHELL := /bin/bash` in the beginning of the Makefile which we have
+# done below.
+#
+# There are a some differences though:
+#
+# 1. Variable syntax using only a single $-character refer to MAKE-variables.
+#    Thus, to use variables set in bash, you have to use double-$.
+#    So:
+#    echo $(this-is-a-make-variable) and $$(this-is-a-bash-variable)
+#
+#    This also goes for command substitution, so rather than:
+#
+#    	echo "Lines in file:" $(wc -l somefile.txt)
+#
+#    ... you would write:
+#
+#    	echo "Lines in file:" $$(wc -l somefile.txt)
+#
+# 2. Makefiles also use some special variables with special meaning, to access
+#    things like the output and input files:
+#
+#    $@   The output target (In case of multiple targets, the same command will
+#         be run once for every output file, feeding this variable with only a
+#         single one at a time)
+#
+#    $<   The first input file or dependency
+#
+#    $^   ALL input files / dependencies.
+#
+# 3. Another speciality is that adding a '@' character before any command, will
+#    stop this command from being printed out when it is executed (only its output
+#    will be printed).
+#
+# That's all for now. For the official docs, see:
+# https://www.gnu.org/software/make/manual/make.html
+#
+# Some more caveats
+# ------------------------------------------------------------------------------
+#  Here are some things that might not be obvious at first:
+#
+#  To create a rule with a single output but many dependencies, you can add
+#  these dependencies on multiple lines by using the continuation character \
+#  like so:
+#
+#  name_of_rule: dependency1 \
+#  		dependency2 \
+#  		dependency3 \
+#  		dependency4 \
+
+# ==============================================================================
+# Various definitions
+# ==============================================================================
+# Make sure bash is used as shell, for consistency and to make some more
+# advanced scripting possible than with /bin/sh
+SHELL := /bin/bash
+
+# Define path variables
+SCRIPT_DIR := $(shell pwd)
+ASSETS_DIR := $(shell realpath $(SCRIPT_DIR)/assets/)
+CONTAINER_DIR := $(realpath $(SCRIPT_DIR)/container/)
+# The root folder where the pipeline is currently located. To be mounted into
+# the Singularity containers below.
+MNT_ROOT := /$(shell readlink -f . | cut -d"/" -f2)
+INSTALL_LOG := "$(SCRIPT_DIR)/.install.log"
+
+define log_message
+	@echo "--------------------------------------------------------------------------------" | tee -a $(INSTALL_LOG);
+	@echo "$$(date "+%Y-%m-%d %H:%M:%S"): $1" | tee -a $(INSTALL_LOG);
+	@echo "--------------------------------------------------------------------------------" | tee -a $(INSTALL_LOG);
+endef
+
+# ==============================================================================
+# Main rules
+# ==============================================================================
+
+install: assets/databases/emu_database/species_taxid.fasta assets/databases/emu_database/taxonomy.tsv
+
+assets/databases/emu_database/species_taxid.fasta: assets/databases/emu_database/species_taxid.fasta.gz
+	zcat $< > $@
+
+assets/databases/emu_database/taxonomy.tsv: assets/databases/emu_database/taxonomy.tsv.gz
+	zcat $< > $@

From d1906f7d5aef6314fa0b675becbc22c67c0800d5 Mon Sep 17 00:00:00 2001
From: Samuel Lampa <samuel.lampa@scilifelab.se>
Date: Mon, 28 Oct 2024 14:26:40 +0100
Subject: [PATCH 3/4] Organize test assets for running test

---
 .github/workflows/ci.yml                            |  10 ++--------
 .../{ => barcode01}/small_test_data2.fastq.gz       | Bin
 .../{ => barcode01}/small_test_data3.fastq.gz       | Bin
 .../medium_Mock_dil_1_2_BC1.fastq.gz                | Bin
 .../medium_Mock_dil_1_2_BC3.fastq.gz                | Bin
 .../{ => barcode03}/Mock_dil_1_2_BC1.fastq.gz       | Bin
 .../{ => barcode03}/Mock_dil_1_2_BC3.fastq.gz       | Bin
 7 files changed, 2 insertions(+), 8 deletions(-)
 rename assets/test_assets/{ => barcode01}/small_test_data2.fastq.gz (100%)
 rename assets/test_assets/{ => barcode01}/small_test_data3.fastq.gz (100%)
 rename assets/test_assets/{ => barcode02}/medium_Mock_dil_1_2_BC1.fastq.gz (100%)
 rename assets/test_assets/{ => barcode02}/medium_Mock_dil_1_2_BC3.fastq.gz (100%)
 rename assets/test_assets/{ => barcode03}/Mock_dil_1_2_BC1.fastq.gz (100%)
 rename assets/test_assets/{ => barcode03}/Mock_dil_1_2_BC3.fastq.gz (100%)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 483e1bd..28182de 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -58,20 +58,14 @@ jobs:
         run:
           df -h
 
-      - name: Adapt paths in samplelist.csv
-        # We have to use '#' as a separator, as there are slashes in the $PWD
-        # path, messing up with the s///-replace syntax
-        run: |
-          sed -i "s#PATH_TO_GMS16S#$PWD#g" assets/test_data/samplelist.csv
-
       - name: Run pipeline with test data
         run: |
           nextflow run main.nf \
-            --input assets/examples/samplesheet_medium.csv \
             --outdir results \
             --db assets/databases/emu_database \
             --seqtype map-ont \
              -profile singularity,test \
             --quality_filtering \
             --longread_qc_qualityfilter_minlength 1200 \
-            --longread_qc_qualityfilter_maxlength 1800
+            --longread_qc_qualityfilter_maxlength 1800 \
+            --merge_fastq_pass assets/test_assets
diff --git a/assets/test_assets/small_test_data2.fastq.gz b/assets/test_assets/barcode01/small_test_data2.fastq.gz
similarity index 100%
rename from assets/test_assets/small_test_data2.fastq.gz
rename to assets/test_assets/barcode01/small_test_data2.fastq.gz
diff --git a/assets/test_assets/small_test_data3.fastq.gz b/assets/test_assets/barcode01/small_test_data3.fastq.gz
similarity index 100%
rename from assets/test_assets/small_test_data3.fastq.gz
rename to assets/test_assets/barcode01/small_test_data3.fastq.gz
diff --git a/assets/test_assets/medium_Mock_dil_1_2_BC1.fastq.gz b/assets/test_assets/barcode02/medium_Mock_dil_1_2_BC1.fastq.gz
similarity index 100%
rename from assets/test_assets/medium_Mock_dil_1_2_BC1.fastq.gz
rename to assets/test_assets/barcode02/medium_Mock_dil_1_2_BC1.fastq.gz
diff --git a/assets/test_assets/medium_Mock_dil_1_2_BC3.fastq.gz b/assets/test_assets/barcode02/medium_Mock_dil_1_2_BC3.fastq.gz
similarity index 100%
rename from assets/test_assets/medium_Mock_dil_1_2_BC3.fastq.gz
rename to assets/test_assets/barcode02/medium_Mock_dil_1_2_BC3.fastq.gz
diff --git a/assets/test_assets/Mock_dil_1_2_BC1.fastq.gz b/assets/test_assets/barcode03/Mock_dil_1_2_BC1.fastq.gz
similarity index 100%
rename from assets/test_assets/Mock_dil_1_2_BC1.fastq.gz
rename to assets/test_assets/barcode03/Mock_dil_1_2_BC1.fastq.gz
diff --git a/assets/test_assets/Mock_dil_1_2_BC3.fastq.gz b/assets/test_assets/barcode03/Mock_dil_1_2_BC3.fastq.gz
similarity index 100%
rename from assets/test_assets/Mock_dil_1_2_BC3.fastq.gz
rename to assets/test_assets/barcode03/Mock_dil_1_2_BC3.fastq.gz

From 2fbef62250d203efc5cfbc872d9ae64a8b22a9c1 Mon Sep 17 00:00:00 2001
From: Samuel Lampa <samuel.lampa@scilifelab.se>
Date: Mon, 28 Oct 2024 14:29:55 +0100
Subject: [PATCH 4/4] Send absolute path to workflow variables

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 28182de..57d4110 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -62,10 +62,10 @@ jobs:
         run: |
           nextflow run main.nf \
             --outdir results \
-            --db assets/databases/emu_database \
+            --db $(pwd)/assets/databases/emu_database \
             --seqtype map-ont \
              -profile singularity,test \
             --quality_filtering \
             --longread_qc_qualityfilter_minlength 1200 \
             --longread_qc_qualityfilter_maxlength 1800 \
-            --merge_fastq_pass assets/test_assets
+            --merge_fastq_pass $(pwd)/assets/test_assets