From 9aa718678abe815e914c363f2eb343006f588c15 Mon Sep 17 00:00:00 2001 From: Samuel Lampa Date: Fri, 25 Oct 2024 15:18:35 +0200 Subject: [PATCH 1/4] Add GitHub Actions CI config Adapted from JASEN, keeping the relevant stuff and adapting a bit to this pipeline. --- .github/workflows/ci.yml | 77 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..483e1bd --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,77 @@ +name: CI +# This workflow runs the pipeline with the minimal test dataset to check that +# it completes without any syntax errors +on: + push: + branches: + - main + - dev + pull_request: + branches: + - main + - dev + draft: true + +env: + NXF_ANSI_LOG: false + +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true + +jobs: + test: + name: Install and run self-test pipeline + # Only run on push if this is in the main repository + if: "${{ github.repository == 'genomic-medicine-sweden/gms_16s' }}" + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "23.10.1" + steps: + - name: Install Nextflow + # For running (locally) with ACT, we use a docker image with Nextflow pre-installed + if: "${{ ! github.event.act }}" + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Check out pipeline code + uses: actions/checkout@v3 + + - name: Install Singularity + # For running (locally) with ACT, we use a docker image with Singularity already installed + if: "${{ ! github.event.act }}" + run: | + wget https://github.com/apptainer/singularity/releases/download/v3.8.7/singularity-container_3.8.7_amd64.deb && sudo dpkg -i singularity-container_3.8.7_amd64.deb + + - name: Report disk-usage before make install + run: + df -h + + - name: Run Make install + run: + make install + + - name: Report disk-usage after make install + run: + df -h + + - name: Adapt paths in samplelist.csv + # We have to use '#' as a separator, as there are slashes in the $PWD + # path, messing up with the s///-replace syntax + run: | + sed -i "s#PATH_TO_GMS16S#$PWD#g" assets/test_data/samplelist.csv + + - name: Run pipeline with test data + run: | + nextflow run main.nf \ + --input assets/examples/samplesheet_medium.csv \ + --outdir results \ + --db assets/databases/emu_database \ + --seqtype map-ont \ + -profile singularity,test \ + --quality_filtering \ + --longread_qc_qualityfilter_minlength 1200 \ + --longread_qc_qualityfilter_maxlength 1800 From 5a7d52f1ef5988c86fb978fad8f2dd8248dcd7e8 Mon Sep 17 00:00:00 2001 From: Samuel Lampa Date: Mon, 28 Oct 2024 14:16:43 +0100 Subject: [PATCH 2/4] Add Makefile --- Makefile | 158 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e812de6 --- /dev/null +++ b/Makefile @@ -0,0 +1,158 @@ +# ============================================================================== +# Brief explanation of Makefile syntax +# ============================================================================== +# Since Makefiles are a bit special, and there are not a lot of easy to read +# tutorials out there, here follows a very brief introduction to the syntax in +# Makefiles. +# +# Basics +# ------------------------------------------------------------------------------ +# +# Makefiles are in many ways similar to bash-scripts, but unlike bash-scripts +# they are not written in a linear sequential fashion, but rather divided into +# so called rules, that are typically tightly connected to specific output file +# paths or file path pattern. +# +# Firstly, Makefiles should be named `Makefile` and be put inside a folder +# where one wants to run the make command. +# +# The rule syntax +# ------------------------------------------------------------------------------ +# +# The syntax of a rule is at its core very simple: +# +# output file(s)/target(s) : any dependent input files +# commands to produce the output files from input files +# possibly more commands +# +# So, the significant part of the rule syntax is the ':'-character, as well as +# the indentation of the rows following the head of the rule, to indicate the +# "recipe" or the commands to produce the outputs of the rule. +# +# A rule can also have just a name, and no concrete output file. That is, it +# would have the form: +# +# name_of_the_rule : any dependent input files +# one commands +# more commands +# +# Now, there is one big caveat here, related to our scripts: Make will rebuild +# a target as soon as any of its inputs are updated, or have a newer timestamp +# than the target. This is typically not desired for us, since we might have +# files unpacked with zip with all kinds of different dates, and for a one-run +# installation, we are mostly interested in wheter an output file already +# exists or not, not so much about timestamps. +# +# To change so that Make only cares about whether files exist, and not timestamps, +# one can add a | character before those input files, like so: +# +# name_of_the_rule : input files where timestamp matters | input files where only existence matters +# one commands +# more commands +# +# Of course, one can have everything on the right side of the | character, so: +# +# name_of_the_rule : | input files where only existence matters +# one commands +# more commands +# +# Running Makefiles +# ------------------------------------------------------------------------------ +# +# To run a named rule, you would then do: +# +# $ make name_of_the_rule +# +# For normal rules, you would hit: +# +# $ make +# +# Tip: Type "make" and hit tab twice in the shell, to show available targets to +# run in the current makefile. +# +# Special variables and more +# ------------------------------------------------------------------------------ +# +# Inside the commands of the rules, one can write pretty much bash, especially +# if setting `SHELL := /bin/bash` in the beginning of the Makefile which we have +# done below. +# +# There are a some differences though: +# +# 1. Variable syntax using only a single $-character refer to MAKE-variables. +# Thus, to use variables set in bash, you have to use double-$. +# So: +# echo $(this-is-a-make-variable) and $$(this-is-a-bash-variable) +# +# This also goes for command substitution, so rather than: +# +# echo "Lines in file:" $(wc -l somefile.txt) +# +# ... you would write: +# +# echo "Lines in file:" $$(wc -l somefile.txt) +# +# 2. Makefiles also use some special variables with special meaning, to access +# things like the output and input files: +# +# $@ The output target (In case of multiple targets, the same command will +# be run once for every output file, feeding this variable with only a +# single one at a time) +# +# $< The first input file or dependency +# +# $^ ALL input files / dependencies. +# +# 3. Another speciality is that adding a '@' character before any command, will +# stop this command from being printed out when it is executed (only its output +# will be printed). +# +# That's all for now. For the official docs, see: +# https://www.gnu.org/software/make/manual/make.html +# +# Some more caveats +# ------------------------------------------------------------------------------ +# Here are some things that might not be obvious at first: +# +# To create a rule with a single output but many dependencies, you can add +# these dependencies on multiple lines by using the continuation character \ +# like so: +# +# name_of_rule: dependency1 \ +# dependency2 \ +# dependency3 \ +# dependency4 \ + +# ============================================================================== +# Various definitions +# ============================================================================== +# Make sure bash is used as shell, for consistency and to make some more +# advanced scripting possible than with /bin/sh +SHELL := /bin/bash + +# Define path variables +SCRIPT_DIR := $(shell pwd) +ASSETS_DIR := $(shell realpath $(SCRIPT_DIR)/assets/) +CONTAINER_DIR := $(realpath $(SCRIPT_DIR)/container/) +# The root folder where the pipeline is currently located. To be mounted into +# the Singularity containers below. +MNT_ROOT := /$(shell readlink -f . | cut -d"/" -f2) +INSTALL_LOG := "$(SCRIPT_DIR)/.install.log" + +define log_message + @echo "--------------------------------------------------------------------------------" | tee -a $(INSTALL_LOG); + @echo "$$(date "+%Y-%m-%d %H:%M:%S"): $1" | tee -a $(INSTALL_LOG); + @echo "--------------------------------------------------------------------------------" | tee -a $(INSTALL_LOG); +endef + +# ============================================================================== +# Main rules +# ============================================================================== + +install: assets/databases/emu_database/species_taxid.fasta assets/databases/emu_database/taxonomy.tsv + +assets/databases/emu_database/species_taxid.fasta: assets/databases/emu_database/species_taxid.fasta.gz + zcat $< > $@ + +assets/databases/emu_database/taxonomy.tsv: assets/databases/emu_database/taxonomy.tsv.gz + zcat $< > $@ From d1906f7d5aef6314fa0b675becbc22c67c0800d5 Mon Sep 17 00:00:00 2001 From: Samuel Lampa Date: Mon, 28 Oct 2024 14:26:40 +0100 Subject: [PATCH 3/4] Organize test assets for running test --- .github/workflows/ci.yml | 10 ++-------- .../{ => barcode01}/small_test_data2.fastq.gz | Bin .../{ => barcode01}/small_test_data3.fastq.gz | Bin .../medium_Mock_dil_1_2_BC1.fastq.gz | Bin .../medium_Mock_dil_1_2_BC3.fastq.gz | Bin .../{ => barcode03}/Mock_dil_1_2_BC1.fastq.gz | Bin .../{ => barcode03}/Mock_dil_1_2_BC3.fastq.gz | Bin 7 files changed, 2 insertions(+), 8 deletions(-) rename assets/test_assets/{ => barcode01}/small_test_data2.fastq.gz (100%) rename assets/test_assets/{ => barcode01}/small_test_data3.fastq.gz (100%) rename assets/test_assets/{ => barcode02}/medium_Mock_dil_1_2_BC1.fastq.gz (100%) rename assets/test_assets/{ => barcode02}/medium_Mock_dil_1_2_BC3.fastq.gz (100%) rename assets/test_assets/{ => barcode03}/Mock_dil_1_2_BC1.fastq.gz (100%) rename assets/test_assets/{ => barcode03}/Mock_dil_1_2_BC3.fastq.gz (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 483e1bd..28182de 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -58,20 +58,14 @@ jobs: run: df -h - - name: Adapt paths in samplelist.csv - # We have to use '#' as a separator, as there are slashes in the $PWD - # path, messing up with the s///-replace syntax - run: | - sed -i "s#PATH_TO_GMS16S#$PWD#g" assets/test_data/samplelist.csv - - name: Run pipeline with test data run: | nextflow run main.nf \ - --input assets/examples/samplesheet_medium.csv \ --outdir results \ --db assets/databases/emu_database \ --seqtype map-ont \ -profile singularity,test \ --quality_filtering \ --longread_qc_qualityfilter_minlength 1200 \ - --longread_qc_qualityfilter_maxlength 1800 + --longread_qc_qualityfilter_maxlength 1800 \ + --merge_fastq_pass assets/test_assets diff --git a/assets/test_assets/small_test_data2.fastq.gz b/assets/test_assets/barcode01/small_test_data2.fastq.gz similarity index 100% rename from assets/test_assets/small_test_data2.fastq.gz rename to assets/test_assets/barcode01/small_test_data2.fastq.gz diff --git a/assets/test_assets/small_test_data3.fastq.gz b/assets/test_assets/barcode01/small_test_data3.fastq.gz similarity index 100% rename from assets/test_assets/small_test_data3.fastq.gz rename to assets/test_assets/barcode01/small_test_data3.fastq.gz diff --git a/assets/test_assets/medium_Mock_dil_1_2_BC1.fastq.gz b/assets/test_assets/barcode02/medium_Mock_dil_1_2_BC1.fastq.gz similarity index 100% rename from assets/test_assets/medium_Mock_dil_1_2_BC1.fastq.gz rename to assets/test_assets/barcode02/medium_Mock_dil_1_2_BC1.fastq.gz diff --git a/assets/test_assets/medium_Mock_dil_1_2_BC3.fastq.gz b/assets/test_assets/barcode02/medium_Mock_dil_1_2_BC3.fastq.gz similarity index 100% rename from assets/test_assets/medium_Mock_dil_1_2_BC3.fastq.gz rename to assets/test_assets/barcode02/medium_Mock_dil_1_2_BC3.fastq.gz diff --git a/assets/test_assets/Mock_dil_1_2_BC1.fastq.gz b/assets/test_assets/barcode03/Mock_dil_1_2_BC1.fastq.gz similarity index 100% rename from assets/test_assets/Mock_dil_1_2_BC1.fastq.gz rename to assets/test_assets/barcode03/Mock_dil_1_2_BC1.fastq.gz diff --git a/assets/test_assets/Mock_dil_1_2_BC3.fastq.gz b/assets/test_assets/barcode03/Mock_dil_1_2_BC3.fastq.gz similarity index 100% rename from assets/test_assets/Mock_dil_1_2_BC3.fastq.gz rename to assets/test_assets/barcode03/Mock_dil_1_2_BC3.fastq.gz From 2fbef62250d203efc5cfbc872d9ae64a8b22a9c1 Mon Sep 17 00:00:00 2001 From: Samuel Lampa Date: Mon, 28 Oct 2024 14:29:55 +0100 Subject: [PATCH 4/4] Send absolute path to workflow variables --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 28182de..57d4110 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -62,10 +62,10 @@ jobs: run: | nextflow run main.nf \ --outdir results \ - --db assets/databases/emu_database \ + --db $(pwd)/assets/databases/emu_database \ --seqtype map-ont \ -profile singularity,test \ --quality_filtering \ --longread_qc_qualityfilter_minlength 1200 \ --longread_qc_qualityfilter_maxlength 1800 \ - --merge_fastq_pass assets/test_assets + --merge_fastq_pass $(pwd)/assets/test_assets