Skip to content

Commit

Permalink
Add patched MultiQC to the repo
Browse files Browse the repository at this point in the history
  • Loading branch information
dialvarezs committed Aug 26, 2024
1 parent 8b4ad74 commit 2b3d848
Show file tree
Hide file tree
Showing 5 changed files with 237 additions and 0 deletions.
18 changes: 18 additions & 0 deletions .github/workflows/publish_multiqc.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: MultiQC to GitHub Container Registry

on:
push:
branches: [main]
paths:
- "containers/multiqc/*"
workflow_dispatch:

jobs:
call-workflow-passing-data:
uses: ./.github/workflows/reusable_publish_pypkg.yml
with:
package_name: multiqc
image: ${{ github.repository }}/multiqc
directory: ./containers/multiqc/
secrets:
GH_TOKEN: ${{ secrets.GH_TOKEN }}
62 changes: 62 additions & 0 deletions .github/workflows/reusable_publish_pypkg.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
name: Reusable Workflow for Publish python packages to GitHub Container Registry

on:
workflow_call:
inputs:
package_name:
required: true
type: string
registry:
required: false
type: string
default: ghcr.io
image:
required: true
type: string
directory:
required: true
type: string
secrets:
GH_TOKEN:
required: true

jobs:
build:
runs-on: ubuntu-latest

steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Log into Container Registry
uses: docker/login-action@v3
with:
registry: ${{ inputs.registry }}
username: ${{ github.actor }}
password: ${{ secrets.GH_TOKEN }}

- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v3

- name: Get version
id: getversion
run: |
PACKAGE=${{ inputs.package_name }}
DIRECTORY=${{ inputs.directory }}
VERSION=$(cat ./${DIRECTORY}/requirements.txt | grep -Po "(?<=${PACKAGE}==)[0-9.]+")
echo "version=$VERSION" >> $GITHUB_OUTPUT
- name: Build and push
id: docker_build
uses: docker/build-push-action@v6
with:
context: ${{ inputs.directory }}
file: ${{ inputs.directory }}/Containerfile
push: true
tags: |
${{ inputs.registry }}/${{ inputs.image }}:${{ steps.getversion.outputs.version }}
${{ inputs.registry }}/${{ inputs.image }}:latest
- name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}
24 changes: 24 additions & 0 deletions containers/multiqc/Containerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
FROM python:3.11-slim

ARG DEBIAN_FRONTEND="noninteractive"

LABEL author="Diego Alvarez ([email protected])"
LABEL description="MultiQC (https://github.com/MultiQC/MultiQC) with nanoq patched module"
LABEL org.opencontainers.image.source="https://github.com/dialvarezs/containers"

RUN apt-get update && apt-get upgrade -y && \
apt-get install -y procps patch && \
rm -fr /var/lib/apt/lists/*

ENV VIRTUAL_ENV=/venv
ENV PATH="${VIRTUAL_ENV}/bin:$PATH"

COPY requirements.txt /tmp/requirements.txt
RUN --mount=from=ghcr.io/astral-sh/uv:latest,source=/uv,target=/bin/uv \
uv venv ${VIRTUAL_ENV} && \
uv pip install --no-cache -r /tmp/requirements.txt

# patch nanoq module ot use bar chart instead of line chart
COPY nanoq_bargraph.patch /tmp/nanoq_bargraph.patch
RUN cd /venv/lib/python3.11/site-packages/multiqc/ && \
patch -p1 < /tmp/nanoq_bargraph.patch
132 changes: 132 additions & 0 deletions containers/multiqc/nanoq_bargraph.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
diff --git a/modules/nanoq/nanoq.py b/modules/nanoq/nanoq.py
index 398b2a24..bbc0d711 100644
--- a/modules/nanoq/nanoq.py
+++ b/modules/nanoq/nanoq.py
@@ -5,8 +5,9 @@ from copy import deepcopy
from typing import Dict, List, Any

from multiqc.base_module import BaseMultiqcModule, ModuleNoSamplesFound
-from multiqc.plots import table, linegraph
+from multiqc.plots import table, bargraph, linegraph
from multiqc.plots.table_object import TableConfig
+from multiqc.utils import mqc_colour

log = logging.getLogger(__name__)

@@ -131,7 +132,7 @@ class MultiqcModule(BaseMultiqcModule):
)

general_stats_headers = deepcopy(headers)
- for k, h in general_stats_headers.items():
+ for h in general_stats_headers.values():
h["hidden"] = True
general_stats_headers["Number of reads"]["hidden"] = False
general_stats_headers["N50 read length"]["hidden"] = False
@@ -141,16 +142,39 @@ class MultiqcModule(BaseMultiqcModule):

def reads_by_quality_plot(self, data_by_sample: Dict[str, Dict[str, float]]) -> None:
# Get data for plot
- lineplot_data: Dict[str, Dict[int, float]] = defaultdict(dict)
+ barplot_data: Dict[str, Dict[str, float]] = defaultdict(dict)
+ min_quality = 10
+
for name, d in data_by_sample.items():
- reads_by_q = {k: v for k, v in d.items() if k.startswith("Reads > Q")}
- if len(reads_by_q) == 0:
+ reads_by_q = {
+ int(k.split("> Q")[1]): v for k, v in d.items() if k.startswith("Reads > Q")
+ }
+ if not reads_by_q:
+ continue
+
+ thresholds = sorted(th for th in reads_by_q if th >= min_quality)
+ if not thresholds:
continue

- total_reads = d["Number of reads"]
- for k, v in reads_by_q.items():
- threshold = int(k.split("> Q")[1])
- lineplot_data[name][threshold] = v / total_reads * 100
+ keys = [f"<Q{min_quality}"]
+
+ for th, thn in zip(thresholds[:-1], thresholds[1:]):
+ key = f"Q{th}-{thn}"
+ keys.append(key)
+ barplot_data[name][key] = reads_by_q[th] - reads_by_q[thn]
+
+ last_key = f">Q{thresholds[-1]}"
+ keys.append(last_key)
+ barplot_data[name][f"<Q{min_quality}"] = (
+ d["Number of reads"] - reads_by_q[thresholds[0]]
+ )
+ barplot_data[name][last_key] = reads_by_q[thresholds[-1]]
+
+ colours = mqc_colour.mqc_colour_scale("RdYlGn-rev", 0, len(keys))
+ cats = {
+ k: {"name": f"Reads {k}", "color": colours.get_colour(idx, lighten=1)}
+ for idx, k in enumerate(keys[::-1])
+ }

# Plot
self.add_section(
@@ -163,9 +187,10 @@ class MultiqcModule(BaseMultiqcModule):
The phred score represents the liklelyhood that a given read contains errors.
High quality reads have a high score.
""",
- plot=linegraph.plot(
- lineplot_data,
- linegraph.LinePlotConfig(
+ plot=bargraph.plot(
+ barplot_data,
+ cats,
+ pconfig=dict(
id="nanoq_plot_quality_plot",
title="Nanoq: read qualities",
),
@@ -173,6 +198,7 @@ class MultiqcModule(BaseMultiqcModule):
)

def reads_by_length_plot(self, data_by_sample: Dict[str, Dict[str, float]]) -> None:
+ # sourcery skip: simplify-len-comparison
# Get data for plot
linegraph_data: Dict[str, Dict[int, float]] = defaultdict(dict)
for name, d in data_by_sample.items():
@@ -235,12 +261,18 @@ def parse_nanoq_log(f) -> Dict[str, float]:

# Helper function to parse thresholds part
def parse_thresholds(lines: List[str], threshold_type: str) -> Dict[str, List[Any]]:
- _thresholds: Dict[str, List[Any]] = {"Threshold": [], "Number of Reads": [], "Percentage": []}
+ _thresholds: Dict[str, List[Any]] = {
+ "Threshold": [],
+ "Number of Reads": [],
+ "Percentage": [],
+ }
for _line in lines:
match = re.match(r">\s*(\d+)\s+(\d+)\s+(\d+\.\d+)%", _line)
if match:
_threshold, _num_reads, _percentage = match.groups()
- _thresholds["Threshold"].append(_threshold + (threshold_type if threshold_type == "bp" else ""))
+ _thresholds["Threshold"].append(
+ _threshold + (threshold_type if threshold_type == "bp" else "")
+ )
_thresholds["Number of Reads"].append(int(_num_reads))
_thresholds["Percentage"].append(float(_percentage))
return _thresholds
@@ -248,10 +280,15 @@ def parse_nanoq_log(f) -> Dict[str, float]:
read_length_thresholds = parse_thresholds(length_threshold_lines, "bp")
read_quality_thresholds = parse_thresholds(quality_threshold_lines, "")

- for threshold, num_reads in zip(read_length_thresholds["Threshold"], read_length_thresholds["Number of Reads"]):
+ for threshold, num_reads in zip(
+ read_length_thresholds["Threshold"], read_length_thresholds["Number of Reads"]
+ ):
stats[f"Reads > {threshold}"] = num_reads

- for threshold, num_reads in zip(read_quality_thresholds["Threshold"], read_quality_thresholds["Number of Reads"]):
+ for threshold, num_reads in zip(
+ read_quality_thresholds["Threshold"], read_quality_thresholds["Number of Reads"]
+ ):
stats[f"Reads > Q{threshold}"] = num_reads

return stats
+
1 change: 1 addition & 0 deletions containers/multiqc/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
multiqc==1.24.1

0 comments on commit 2b3d848

Please sign in to comment.