From 485159c0c8e41fd4e138b5f76b9fab6960136bfa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20M=C3=A4hler?=
<2573608+maehler@users.noreply.github.com>
Date: Mon, 4 Dec 2023 09:41:15 +0100
Subject: [PATCH 01/12] Add scaffolding for a general report
---
.tests/integration/Snakefile_module | 1 +
.tests/integration/config.yaml | 3 ++
.../config/NA12878_N.general_report.json | 0
config/config.yaml | 3 ++
config/output_files.yaml | 3 ++
docs/softwares.md | 22 ++++++++++++
workflow/Snakefile | 1 +
workflow/rules/general_html_report.smk | 34 +++++++++++++++++++
workflow/schemas/config.schema.yaml | 25 ++++++++++++++
workflow/schemas/resources.schema.yaml | 20 +++++++++++
workflow/schemas/rules.schema.yaml | 23 +++++++++++++
workflow/scripts/general_html_report.py | 22 ++++++++++++
.../templates/general_html_report/index.html | 14 ++++++++
13 files changed, 171 insertions(+)
create mode 100644 .tests/integration/config/NA12878_N.general_report.json
create mode 100644 workflow/rules/general_html_report.smk
create mode 100644 workflow/scripts/general_html_report.py
create mode 100644 workflow/templates/general_html_report/index.html
diff --git a/.tests/integration/Snakefile_module b/.tests/integration/Snakefile_module
index 302a1d3..399a432 100644
--- a/.tests/integration/Snakefile_module
+++ b/.tests/integration/Snakefile_module
@@ -4,6 +4,7 @@ from hydra_genetics.utils.misc import get_module_snakefile
rule all:
input:
"reports/cnv_html_report/NA12878_N.pathology.cnv_report.html",
+ "reports/general_html_report/NA12878_N.general_report.html",
module reports:
diff --git a/.tests/integration/config.yaml b/.tests/integration/config.yaml
index 33db5d0..9e7891f 100644
--- a/.tests/integration/config.yaml
+++ b/.tests/integration/config.yaml
@@ -9,6 +9,9 @@ reference:
cnv_html_report:
cytobands: true
+general_html_report:
+ json: "config/{sample}_{type}.general_report.json"
+
merge_cnv_json:
annotations:
- config/amp_genes.bed
diff --git a/.tests/integration/config/NA12878_N.general_report.json b/.tests/integration/config/NA12878_N.general_report.json
new file mode 100644
index 0000000..e69de29
diff --git a/config/config.yaml b/config/config.yaml
index 656224c..da93564 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -14,6 +14,9 @@ cnv_html_report:
cytobands: false
show_table: true
+general_html_report:
+ json: "reports/general_html_report/{sample}_{type}.general_report.json"
+
merge_cnv_json:
annotations:
cytobands:
diff --git a/config/output_files.yaml b/config/output_files.yaml
index 136b1fb..8a836e9 100644
--- a/config/output_files.yaml
+++ b/config/output_files.yaml
@@ -2,3 +2,6 @@ files:
- name: "CNV HTML report"
input: "reports/cnv_html_report/{sample}_{type}.pathology.cnv_report.html"
output: "results/cnv/{sample}_{type}.pathology.cnv_report.html"
+ - name: "General HTML report"
+ input: "reports/general_html_report/{sample}_{type}.general_report.html"
+ output: "results/reports/{sample}_{type}.general_report.html"
diff --git a/docs/softwares.md b/docs/softwares.md
index 6ce2413..1caf123 100644
--- a/docs/softwares.md
+++ b/docs/softwares.md
@@ -65,3 +65,25 @@ Merge JSON files from multiple CNV callers and add annotations and other sample
#### Resources settings (`resources.yaml`)
#RESOURCESSCHEMA__merge_cnv_json#
+
+## general_html_report
+
+Generate a general HTML report for a single sample.
+
+### :snake: Rule
+
+#SNAKEMAKE_RULE_SOURCE__general_html_report__general_html_report#
+
+#### :left_right_arrow: input / output files
+
+#SNAKEMAKE_RULE_TABLE__general_html_report__general_html_report#
+
+### :wrench: Configuration
+
+#### Software settings (`config.yaml`)
+
+#CONFIGSCHEMA__general_html_report#
+
+#### Resources settings (`resources.yaml`)
+
+#RESOURCESSCHEMA__general_html_report#
diff --git a/workflow/Snakefile b/workflow/Snakefile
index f0d23b0..34eaf3f 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -6,6 +6,7 @@ __license__ = "GPL-3"
# Include pipeline specific rules
include: "rules/common.smk"
+include: "rules/general_html_report.smk"
include: "rules/cnv_html_report.smk"
diff --git a/workflow/rules/general_html_report.smk b/workflow/rules/general_html_report.smk
new file mode 100644
index 0000000..39184c4
--- /dev/null
+++ b/workflow/rules/general_html_report.smk
@@ -0,0 +1,34 @@
+__author__ = "Niklas Mähler"
+__copyright__ = "Copyright 2023, Niklas Mähler"
+__email__ = "niklas.mahler@regionvasterbotten.se"
+__license__ = "GPL-3"
+
+
+rule general_html_report:
+ input:
+ json=config.get("general_html_report", {}).get("json"),
+ html_template=workflow.source_path("../templates/general_html_report/index.html"),
+ output:
+ html="reports/general_html_report/{sample}_{type}.general_report.html",
+ params:
+ extra=config.get("general_html_report", {}).get("extra", ""),
+ log:
+ "reports/general_html_report/{sample}_{type}.general_report.log",
+ benchmark:
+ repeat(
+ "reports/general_html_report/{sample}_{type}.output.benchmark.tsv",
+ config.get("general_html_report", {}).get("benchmark_repeats", 1),
+ )
+ threads: config.get("general_html_report", {}).get("threads", config["default_resources"]["threads"])
+ resources:
+ mem_mb=config.get("general_html_report", {}).get("mem_mb", config["default_resources"]["mem_mb"]),
+ mem_per_cpu=config.get("general_html_report", {}).get("mem_per_cpu", config["default_resources"]["mem_per_cpu"]),
+ partition=config.get("general_html_report", {}).get("partition", config["default_resources"]["partition"]),
+ threads=config.get("general_html_report", {}).get("threads", config["default_resources"]["threads"]),
+ time=config.get("general_html_report", {}).get("time", config["default_resources"]["time"]),
+ container:
+ config.get("general_html_report", {}).get("container", config["default_container"])
+ message:
+ "{rule}: generate general html report from json config {input.json}"
+ script:
+ "../scripts/general_html_report.py"
diff --git a/workflow/schemas/config.schema.yaml b/workflow/schemas/config.schema.yaml
index 677d0ba..06d95ce 100644
--- a/workflow/schemas/config.schema.yaml
+++ b/workflow/schemas/config.schema.yaml
@@ -62,6 +62,31 @@ properties:
type: string
description: name or path to docker/singularity container
+ general_html_report:
+ type: object
+ description: parameters for general_html_report
+ properties:
+ json:
+ type: string
+ format: uri-reference
+ description: |
+ Path to the sample-specific configuration of the report. The wildcards
+ `sample` and `type` are supported.
+ examples:
+ - "report_configs/{sample}_{type}/general_report.json"
+ - "report_configs/general_html_report/{sample}_{type}.general_report.json"
+ benchmark_repeats:
+ type: integer
+ description: set number of times benchmark should be repeated
+ container:
+ type: string
+ description: name or path to docker/singularity container
+ extra:
+ type: string
+ description: parameters that should be forwarded
+ required:
+ - json
+
merge_cnv_json:
type: object
description: parameters for merge_cnv_json
diff --git a/workflow/schemas/resources.schema.yaml b/workflow/schemas/resources.schema.yaml
index 842e6d7..8ecdf7f 100644
--- a/workflow/schemas/resources.schema.yaml
+++ b/workflow/schemas/resources.schema.yaml
@@ -67,6 +67,26 @@ properties:
type: string
description: max execution time
+ general_html_report:
+ type: object
+ description: resource definitions for general_html_report
+ properties:
+ mem_mb:
+ type: integer
+ description: max memory in MB to be available
+ mem_per_cpu:
+ type: integer
+ description: memory in MB used per cpu
+ partition:
+ type: string
+ description: partition to use on cluster
+ threads:
+ type: integer
+ description: number of threads to be available
+ time:
+ type: string
+ description: max execution time
+
merge_cnv_json:
type: object
description: resource definitions for merge_cnv_json
diff --git a/workflow/schemas/rules.schema.yaml b/workflow/schemas/rules.schema.yaml
index e5fe303..b816a02 100644
--- a/workflow/schemas/rules.schema.yaml
+++ b/workflow/schemas/rules.schema.yaml
@@ -83,6 +83,29 @@ properties:
description: >
A JSON representation of the CNV results from a specific caller.
+ general_html_report:
+ type: object
+ description: input and output parameters for general_html_report
+ properties:
+ input:
+ type: object
+ description: list of inputs
+ properties:
+ json:
+ type: string
+ description: sample-specific configuration of the report
+ html_template:
+ type: string
+ description: path to the html template to use for the report
+
+ output:
+ type: object
+ description: list of outputs
+ properties:
+ html:
+ type: string
+ description: path to the generated report
+
merge_cnv_json:
type: object
description: input and output parameters for merge_cnv_json
diff --git a/workflow/scripts/general_html_report.py b/workflow/scripts/general_html_report.py
new file mode 100644
index 0000000..57ed703
--- /dev/null
+++ b/workflow/scripts/general_html_report.py
@@ -0,0 +1,22 @@
+from jinja2 import Template
+
+
+def generate_report(template_filename, json):
+ with open(template_filename) as f:
+ template = Template(source=f.read())
+
+ return template.render(dict(sample=snakemake.wildcards.sample))
+
+
+def main():
+ html_template = snakemake.input.html_template
+ json = snakemake.input.json
+
+ report_content = generate_report(html_template, json)
+
+ with open(snakemake.output.html, "w") as f:
+ f.write(report_content)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/workflow/templates/general_html_report/index.html b/workflow/templates/general_html_report/index.html
new file mode 100644
index 0000000..be34175
--- /dev/null
+++ b/workflow/templates/general_html_report/index.html
@@ -0,0 +1,14 @@
+
+
+
+ {{ sample }} – General Report
+
+
+
+
+
+
+
+
From 021b0c6a06ba7b284690cdbe9c287a8d6d37d3b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20M=C3=A4hler?=
<2573608+maehler@users.noreply.github.com>
Date: Mon, 4 Dec 2023 13:14:05 +0100
Subject: [PATCH 02/12] Parse and validate report config
---
.../config/NA12878_N.general_report.json | 16 ++++++
workflow/rules/general_html_report.smk | 3 +-
.../general_html_report_json.schema.yaml | 49 +++++++++++++++++++
workflow/scripts/general_html_report.py | 32 ++++++++++--
.../templates/general_html_report/index.html | 32 +++++++++++-
5 files changed, 125 insertions(+), 7 deletions(-)
create mode 100644 workflow/schemas/general_html_report_json.schema.yaml
diff --git a/.tests/integration/config/NA12878_N.general_report.json b/.tests/integration/config/NA12878_N.general_report.json
index e69de29..7ee8ce9 100644
--- a/.tests/integration/config/NA12878_N.general_report.json
+++ b/.tests/integration/config/NA12878_N.general_report.json
@@ -0,0 +1,16 @@
+{
+ "sample": "NA12878",
+ "analysis_date": "2023-11-17",
+ "pipeline": {
+ "name": "Twist Solid",
+ "version": "0.9.0",
+ "uri": "https://github.com/genomic-medicine-sweden/Twist_Solid/tree/v0.9.0"
+ },
+ "file_links": [
+ {
+ "name": "CNV HTML report",
+ "description": "HTML report summarising the results of the CNV analysis",
+ "uri": "results/cnv/NA12878_N.pathology.cnv_report.html"
+ }
+ ]
+}
diff --git a/workflow/rules/general_html_report.smk b/workflow/rules/general_html_report.smk
index 39184c4..1d3b88b 100644
--- a/workflow/rules/general_html_report.smk
+++ b/workflow/rules/general_html_report.smk
@@ -6,8 +6,9 @@ __license__ = "GPL-3"
rule general_html_report:
input:
- json=config.get("general_html_report", {}).get("json"),
+ config_schema=workflow.source_path("../schemas/general_html_report_json.schema.yaml"),
html_template=workflow.source_path("../templates/general_html_report/index.html"),
+ json=config.get("general_html_report", {}).get("json"),
output:
html="reports/general_html_report/{sample}_{type}.general_report.html",
params:
diff --git a/workflow/schemas/general_html_report_json.schema.yaml b/workflow/schemas/general_html_report_json.schema.yaml
new file mode 100644
index 0000000..4f0b75e
--- /dev/null
+++ b/workflow/schemas/general_html_report_json.schema.yaml
@@ -0,0 +1,49 @@
+$schema: https://json-schema.org/draft/2020-12/schema
+title: General Report JSON Config
+description: Configuration of a general HTML report for a Hydra Genetics pipeline
+type: object
+properties:
+ sample:
+ type: string
+ description: Name of the sample
+
+ analysis_date:
+ type: string
+ description: Date of the analysis
+
+ pipeline:
+ type: object
+ description: Pipeline information
+ properties:
+ name:
+ type: string
+ description: Name of the pipeline
+ version:
+ type: string
+ description: Version of the pipeline
+ uri:
+ type: string
+ format: uri-reference
+ description: URI of the pipeline
+
+ file_links:
+ type: array
+ description: List of file links
+ items:
+ type: object
+ properties:
+ name:
+ type: string
+ description: Descriptive name of the file
+ description:
+ type: string
+ description: Description of the file
+ uri:
+ type: string
+ format: uri-reference
+ description: URI of the file
+
+required:
+ - sample
+ - analysis_date
+ - pipeline
diff --git a/workflow/scripts/general_html_report.py b/workflow/scripts/general_html_report.py
index 57ed703..a02e5e4 100644
--- a/workflow/scripts/general_html_report.py
+++ b/workflow/scripts/general_html_report.py
@@ -1,18 +1,42 @@
from jinja2 import Template
+import json
+from jsonschema import validate
+import time
+import yaml
-def generate_report(template_filename, json):
+def validate_dict(d: dict, schema_path: str):
+ with open(schema_path) as f:
+ validate(instance=d, schema=yaml.safe_load(f))
+
+
+def generate_report(template_filename: str, config: dict):
with open(template_filename) as f:
template = Template(source=f.read())
- return template.render(dict(sample=snakemake.wildcards.sample))
+ return template.render(
+ dict(
+ metadata=dict(
+ analysis_date=config["analysis_date"],
+ report_date=time.strftime("%Y-%m-%d %H:%M", time.localtime()),
+ sample=config["sample"],
+ ),
+ pipeline=config["pipeline"],
+ file_links=config["file_links"],
+ )
+ )
def main():
html_template = snakemake.input.html_template
- json = snakemake.input.json
+ json_file = snakemake.input.json
+
+ with open(json_file) as f:
+ config = json.load(f)
+
+ validate_dict(config, snakemake.input.config_schema)
- report_content = generate_report(html_template, json)
+ report_content = generate_report(html_template, config)
with open(snakemake.output.html, "w") as f:
f.write(report_content)
diff --git a/workflow/templates/general_html_report/index.html b/workflow/templates/general_html_report/index.html
index be34175..f4680cc 100644
--- a/workflow/templates/general_html_report/index.html
+++ b/workflow/templates/general_html_report/index.html
@@ -1,14 +1,42 @@
- {{ sample }} – General Report
+ {{ metadata.sample }} – General Report
- {{ sample }}
+ {{ metadata.sample }}
+ Report generated at {{ metadata.report_date }}
+ Sample analysed at {{ metadata.analysis_date }}
+
+
+
+ hydra-genetics/reports v0.2.0
+
+
+
+
+
+
Pipeline
+
+ Name: {{ pipeline.name }}
+ Version: {{ pipeline.version }}
+ Source: {{ pipeline.uri }}
+
+
+
+ {% for fl in file_links %}
+
+ {% endfor %}
From e5b3251616409197a57e37f6fe4ae0d513064d55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20M=C3=A4hler?=
<2573608+maehler@users.noreply.github.com>
Date: Mon, 4 Dec 2023 13:59:36 +0100
Subject: [PATCH 03/12] Add parameter for final directory depth of the report
This is something that is needed in order to correctly resolve relative
paths in the config. It is possible that this could be handled in a more
automated way, for example by looking at the output files of the
pipeline. This works as a start.
---
.tests/integration/config.yaml | 1 +
docs/{reports.md => cnv_report.md} | 0
workflow/rules/general_html_report.smk | 1 +
workflow/schemas/config.schema.yaml | 8 ++++++++
workflow/scripts/general_html_report.py | 9 +++++++--
5 files changed, 17 insertions(+), 2 deletions(-)
rename docs/{reports.md => cnv_report.md} (100%)
diff --git a/.tests/integration/config.yaml b/.tests/integration/config.yaml
index 9e7891f..b138112 100644
--- a/.tests/integration/config.yaml
+++ b/.tests/integration/config.yaml
@@ -11,6 +11,7 @@ cnv_html_report:
general_html_report:
json: "config/{sample}_{type}.general_report.json"
+ final_directory_depth: 2
merge_cnv_json:
annotations:
diff --git a/docs/reports.md b/docs/cnv_report.md
similarity index 100%
rename from docs/reports.md
rename to docs/cnv_report.md
diff --git a/workflow/rules/general_html_report.smk b/workflow/rules/general_html_report.smk
index 1d3b88b..135ab1a 100644
--- a/workflow/rules/general_html_report.smk
+++ b/workflow/rules/general_html_report.smk
@@ -12,6 +12,7 @@ rule general_html_report:
output:
html="reports/general_html_report/{sample}_{type}.general_report.html",
params:
+ final_directory_depth=config.get("general_html_report", {}).get("final_directory_depth", 1),
extra=config.get("general_html_report", {}).get("extra", ""),
log:
"reports/general_html_report/{sample}_{type}.general_report.log",
diff --git a/workflow/schemas/config.schema.yaml b/workflow/schemas/config.schema.yaml
index 06d95ce..2426efc 100644
--- a/workflow/schemas/config.schema.yaml
+++ b/workflow/schemas/config.schema.yaml
@@ -75,6 +75,13 @@ properties:
examples:
- "report_configs/{sample}_{type}/general_report.json"
- "report_configs/general_html_report/{sample}_{type}.general_report.json"
+ final_directory_depth:
+ type: integer
+ description: |
+ How deep in the final results directory the report will be. This
+ will be used to correctly resolve relative paths in the JSON config.
+ For example, if the report is located in the directory `results/reports`,
+ the depth would be 2.
benchmark_repeats:
type: integer
description: set number of times benchmark should be repeated
@@ -86,6 +93,7 @@ properties:
description: parameters that should be forwarded
required:
- json
+ - final_directory_depth
merge_cnv_json:
type: object
diff --git a/workflow/scripts/general_html_report.py b/workflow/scripts/general_html_report.py
index a02e5e4..ee9915b 100644
--- a/workflow/scripts/general_html_report.py
+++ b/workflow/scripts/general_html_report.py
@@ -10,10 +10,14 @@ def validate_dict(d: dict, schema_path: str):
validate(instance=d, schema=yaml.safe_load(f))
-def generate_report(template_filename: str, config: dict):
+def generate_report(template_filename: str, config: dict, final_directory_depth: int):
with open(template_filename) as f:
template = Template(source=f.read())
+ if final_directory_depth != 0:
+ for d in config["file_links"]:
+ d["uri"] = final_directory_depth * "../" + d["uri"]
+
return template.render(
dict(
metadata=dict(
@@ -30,13 +34,14 @@ def generate_report(template_filename: str, config: dict):
def main():
html_template = snakemake.input.html_template
json_file = snakemake.input.json
+ final_directory_depth = snakemake.params.final_directory_depth
with open(json_file) as f:
config = json.load(f)
validate_dict(config, snakemake.input.config_schema)
- report_content = generate_report(html_template, config)
+ report_content = generate_report(html_template, config, final_directory_depth)
with open(snakemake.output.html, "w") as f:
f.write(report_content)
From e7bdbc444f68ee38b5225a74b0b56801369aff30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20M=C3=A4hler?=
<2573608+maehler@users.noreply.github.com>
Date: Mon, 4 Dec 2023 14:01:59 +0100
Subject: [PATCH 04/12] Urlencode URLs and trim `../` from the file links
---
workflow/templates/general_html_report/index.html | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/workflow/templates/general_html_report/index.html b/workflow/templates/general_html_report/index.html
index f4680cc..1e21cb2 100644
--- a/workflow/templates/general_html_report/index.html
+++ b/workflow/templates/general_html_report/index.html
@@ -27,7 +27,10 @@ Pipeline
@@ -35,7 +38,7 @@ Pipeline
{% endfor %}