Skip to content

Commit

Permalink
Change supplementary material to Quarto
Browse files Browse the repository at this point in the history
  • Loading branch information
fasterius committed Oct 28, 2023
1 parent d2bc32f commit 366163c
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 52 deletions.
16 changes: 13 additions & 3 deletions tutorials/containers/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,18 @@ WORKDIR /course
ENV TZ="Europe/Stockholm"
ENV DEBIAN_FRONTEND=noninteractive

# Install package for setting timezone
RUN apt-get update && apt-get install -y tzdata && apt-get clean
# Install packages require for timezone and Quarto installation
RUN apt-get update \
&& apt-get install -y tzdata curl \
&& apt-get clean

# Install Quarto
ARG QUARTO_VERSION="1.3.450"
RUN mkdir -p /opt/quarto/${QUARTO_VERSION} \
&& curl -o quarto.tar.gz -L "https://github.com/quarto-dev/quarto-cli/releases/download/v${QUARTO_VERSION}/quarto-${QUARTO_VERSION}-linux-amd64.tar.gz" \
&& tar -zxvf quarto.tar.gz -C "/opt/quarto/${QUARTO_VERSION}" --strip-components=1 \
&& rm quarto.tar.gz
ENV PATH /opt/quarto/${QUARTO_VERSION}/bin:${PATH}

# Configure Conda/Mamba
RUN mamba init bash && conda config --set channel_priority strict && \
Expand All @@ -40,4 +50,4 @@ COPY code ./code/
# Open up port 8888
EXPOSE 8888

CMD snakemake -rp -c 1 --configfile config.yml
CMD snakemake -rp -c 1 --configfile config.yml
34 changes: 16 additions & 18 deletions tutorials/containers/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@ def get_sample_url(wildcards):
rule get_SRA_by_accession:
"""
Retrieve a single-read FASTQ file from a remote repository
The fastq file is retrieved with wget and piped directly to the
seqtk program which samples a number of reads defined by the
max_reads parameter. The fastq output from seqtk is in turn piped
The fastq file is retrieved with wget and piped directly to the
seqtk program which samples a number of reads defined by the
max_reads parameter. The fastq output from seqtk is in turn piped
to gzip and stored as a compressed *.fastq.gz file.
The actual URL for the file is obtained from the config which
requires that each sample_id is defined in the configfile as for
The actual URL for the file is obtained from the config which
requires that each sample_id is defined in the configfile as for
example:
sample_id: "https://url/to/file"
"""
output:
Expand Down Expand Up @@ -206,16 +206,14 @@ rule make_supplementary:
GSM_IDs = config["sample_ids_geo"]
shell:
"""
echo 'rmarkdown::render("code/supplementary_material.Rmd", \
output_file="supplementary.html", \
output_dir="results/", \
params=list(counts_file="{input.counts}", \
multiqc_file="{input.multiqc_file}", \
summary_file="{input.summary_file}", \
rulegraph_file="{input.rulegraph}", \
SRR_IDs="{params.SRR_IDs}", \
GSM_IDs="{params.GSM_IDs}"))' \
| R --vanilla > {log} 2>&1
quarto render code/supplementary_material.qmd \
-P counts_file:../{input.counts} \
-P multiqc_file:../{input.multiqc_file} \
-P summary_file:../{input.summary_file} \
-P rulegraph_file:../{input.rulegraph} \
-P srr_ids:"{params.SRR_IDs}" \
-P gsm_ids:"{params.GSM_IDs}"
mv code/supplementary_material.html {output}
"""

rule generate_rulegraph:
Expand All @@ -227,4 +225,4 @@ rule generate_rulegraph:
shell:
"""
snakemake --rulegraph --configfile config.yml | dot -Tpng > {output}
"""
"""
7 changes: 0 additions & 7 deletions tutorials/containers/code/header.tex

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,37 +1,39 @@
---
title: "Supplementary Material"
output: html_document
format:
html:
echo: false
embed_resources: true
engine: knitr
params:
counts_file: "results/tables/counts.tsv"
multiqc_file: "intermediate/multiqc_general_stats.txt"
summary_file: "results/tables/counts.tsv.summary"
rulegraph_file: "results/rulegraph.png"
SRR_IDs: "SRR935090 SRR935091 SRR935092"
GSM_IDs: "GSM1186459 GSM1186460 GSM1186461"
counts_file: "results/tables/counts.tsv"
multiqc_file: "intermediate/multiqc_general_stats.txt"
summary_file: "results/tables/counts.tsv.summary"
rulegraph_file: "results/rulegraph.png"
srr_ids: "SRR935090 SRR935091 SRR935092"
gsm_ids: "GSM1186459 GSM1186460 GSM1186461"
---

```{r Setup, include = FALSE}
knitr::opts_knit$set(root.dir = "../")
knitr::opts_chunk$set(echo = FALSE)
```

```{r Dependencies, include = FALSE}
```{r Dependencies}
#| include: false
library("ggplot2")
library("reshape2")
library("pheatmap")
library("GEOquery")
```

```{r Read parameters, include = FALSE}
```{r Read parameters}
#| include: false
counts_file <- params$counts_file
multiqc_file <- params$multiqc_file
summary_file <- params$summary_file
rulegraph_file <- params$rulegraph_file
SRR_IDs <- unlist(strsplit(params$SRR_IDs, " "))
GSM_IDs <- unlist(strsplit(params$GSM_IDs, " "))
srr_ids <- unlist(strsplit(params$srr_ids, " "))
gsm_ids <- unlist(strsplit(params$gsm_ids, " "))
```

```{r Read data, include = FALSE}
```{r Read data}
#| include: false
# Read counts
counts <- read.delim(counts_file,
header = TRUE,
Expand All @@ -50,13 +52,13 @@ colnames(counts_summary) <- gsub(".*(SRR[0-9]+)\\..*", "\\1",
# Read metadata
meta <- data.frame()
for (GSM in GSM_IDs) {
for (GSM in gsm_ids) {
gsm <- Meta(getGEO(GSM))
current_meta <- as.data.frame(do.call(cbind, gsm))
meta <- rbind(meta, current_meta)
}
meta <- meta[c("title", "geo_accession", "source_name_ch1", "characteristics_ch1")]
gsm2srr <- data.frame(geo_accession = GSM_IDs, SRR = SRR_IDs)
gsm2srr <- data.frame(geo_accession = gsm_ids, SRR = srr_ids)
meta <- merge(meta, gsm2srr, by = "geo_accession")
# Read FastQC data and update column names
Expand All @@ -69,7 +71,7 @@ patterns <- c(".+percent_duplicates.*",
subs <- c("Percent duplicates", "Percent GC", "Avg sequence length",
"Percent fails", "Total sequences")
for (i in 1:length(patterns)) {
colnames(qc) <- gsub(patterns[i], subs[i], colnames(qc))
colnames(qc) <- gsub(patterns[i], subs[i], colnames(qc))
}
meta <- merge(meta, qc, by.x = "SRR", by.y = "Sample")
```
Expand Down Expand Up @@ -106,7 +108,8 @@ ggplot(count_data, aes(x = Sample, y = Reads, fill = Feature)) +
geom_bar(stat = "identity")
```

```{r Gene heatmap, fig.height = 14}
```{r Gene heatmap}
#| fig-height: 14
cv_cutoff <- 1.2
max_cutoff <- 5
heatmap_data <-
Expand All @@ -127,9 +130,9 @@ show(gg)
The code for reproducing this analysis is available in this [GitHub repo](https://github.com/NBISweden/workshop-reproducible-research/tree/master/docker).
The repo contains:

* A Snakemake workflow for running all analysis steps
* A Conda environment file for installing all needed dependenciesx
* A Docker file for running the analysis in a well-defined and isolated system
- A Snakemake workflow for running all analysis steps
- A Conda environment file for installing all needed dependenciesx
- A Docker file for running the analysis in a well-defined and isolated system

The results in this supplementary were generated in the following R environment:

Expand Down

0 comments on commit 366163c

Please sign in to comment.