Skip to content

Commit

Permalink
2.2.0: Config update, make_log_useful & watchdog update, Dockerfile, …
Browse files Browse the repository at this point in the history
…formatting & linting
  • Loading branch information
weber8thomas committed Aug 9, 2023
1 parent b85f954 commit 83de295
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 74 deletions.
105 changes: 105 additions & 0 deletions github-actions-runner/Dockerfile-2.2.0.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
FROM condaforge/mambaforge:latest
LABEL io.github.snakemake.containerized="true"
LABEL io.github.snakemake.conda_env_hash="285fb274e2e9c667a310dbacd0e76c015875280fa7c764272824fc39421dde0d"

# Step 1: Retrieve conda environments

# Conda environment:
# source: https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/bwa/index/environment.yaml
# prefix: /conda-envs/5681728a49bd83ceed09ba194330c858
# channels:
# - bioconda
# - conda-forge
# - defaults
# dependencies:
# - bwa ==0.7.17
RUN mkdir -p /conda-envs/5681728a49bd83ceed09ba194330c858
ADD https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/bwa/index/environment.yaml /conda-envs/5681728a49bd83ceed09ba194330c858/environment.yaml

# Conda environment:
# source: workflow/envs/ashleys_base.yaml
# prefix: /conda-envs/87c04f5d115eff742eca84455513deba
# name: ashleys_base
# channels:
# - conda-forge
# - bioconda
# dependencies:
# - samtools
# - tabix
# - bwa
# - sambamba
# - mosaicatcher
# # - alfred
# - ashleys-qc
# - pandas
# # PUBLISHDIR
# - rsync
# # MULTIQC
# - multiqc
# # Fix sklearn update
# - scikit-learn=1.2.2
RUN mkdir -p /conda-envs/87c04f5d115eff742eca84455513deba
COPY workflow/envs/ashleys_base.yaml /conda-envs/87c04f5d115eff742eca84455513deba/environment.yaml

# Conda environment:
# source: workflow/envs/ashleys_rtools.yaml
# prefix: /conda-envs/9b847fc31baae8e01dfb7ce438a56b71
# name: rtools
# channels:
# - conda-forge
# - bioconda
# - r
# - anaconda
# dependencies:
# # - bioconductor-biocparallel
# # - bioconductor-bsgenome
# # - bioconductor-bsgenome.hsapiens.ucsc.hg19
# # - bioconductor-bsgenome.hsapiens.ucsc.hg38
# # - bioconductor-fastseg
# # - bioconductor-genomicalignments
# - bioconductor-genomicranges
# # - bioconductor-rsamtools
# # - bioconductor-s4vectors
# - r-assertthat
# - r-base
# # - r-biocmanager
# - r-cowplot
# - r-data.table
# # - r-devtools
# # - r-doparallel
# # - r-foreach
# - r-ggplot2
# # - r-gtools
# - r-reshape2
# # - r-zoo
# # - r-dplyr
# # - r-mc2d
# # - r-pheatmap
# # - bioconductor-complexheatmap
# # - r-gplots
# - r-scales
# - r-rcolorbrewer
# # - r-stringr
# - r-cairo
# - fonts-anaconda
# # NEW
# - bioconductor-edger
# - r-r.utils
# # PLATE PLOT
# - r-dplyr
# - r-platetools
# - r-viridis
# # GC_correction
# - r-tidyr
# - r-ggpubr
# # SOLVE R lib issue
# - r-stringi=1.7.12
RUN mkdir -p /conda-envs/9b847fc31baae8e01dfb7ce438a56b71
COPY workflow/envs/ashleys_rtools.yaml /conda-envs/9b847fc31baae8e01dfb7ce438a56b71/environment.yaml

# Step 2: Generate conda environments

RUN mamba env create --prefix /conda-envs/5681728a49bd83ceed09ba194330c858 --file /conda-envs/5681728a49bd83ceed09ba194330c858/environment.yaml && \
mamba env create --prefix /conda-envs/87c04f5d115eff742eca84455513deba --file /conda-envs/87c04f5d115eff742eca84455513deba/environment.yaml && \
mamba env create --prefix /conda-envs/9b847fc31baae8e01dfb7ce438a56b71 --file /conda-envs/9b847fc31baae8e01dfb7ce438a56b71/environment.yaml && \
mamba clean --all -y
12 changes: 0 additions & 12 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ configfile: "config/config.yaml"


if config["mosaicatcher_pipeline"] is False:

docker_container = "docker://weber8thomas/ashleys-qc-pipeline:{version}".format(
version=str(config["version"])
)
Expand All @@ -16,20 +15,10 @@ if config["mosaicatcher_pipeline"] is False:


include: "rules/common.smk"


include: "rules/aggregate_fct.smk"


include: "rules/rules.smk"


include: "rules/gc.smk"


include: "rules/count.smk"


include: "rules/multiqc.smk"


Expand All @@ -44,7 +33,6 @@ if config["list_commands"] is False:
input:
get_final_output(),


if config["email"]:

onsuccess:
Expand Down
45 changes: 15 additions & 30 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import yaml
import subprocess

if config["mosaicatcher_pipeline"] == False:

exclude = [
"._.DS_Store",
".DS_Store",
Expand Down Expand Up @@ -50,27 +49,20 @@ if config["mosaicatcher_pipeline"] == False:
pipeline_aesthetic_start_ashleys.pipeline_aesthetic_start(config)
subprocess.Popen(
"mkdir -p {folder_path}/config".format(
folder_path=config["data_location"]
),
shell=True,
stdout=subprocess.PIPE,
)
folder_path=config["data_location"]
),
shell=True,
stdout=subprocess.PIPE,
)
subprocess.Popen(
"rsync --ignore-existing -avzh config/config.yaml {folder_path}/config".format(
folder_path=config["data_location"]
),
shell=True,
stdout=subprocess.PIPE,
)

# for sample in [e for e in os.listdir(config["data_location"]) if e not in exclude]:
# print(sample)
# if len(sample.split("_")) == 4:
# assert len(sample.split("_")) != 4, "Your sample name is using 4 times the '_' character, which is currently not supported by ashleys-qc"

folder_path=config["data_location"]
),
shell=True,
stdout=subprocess.PIPE,
)

def onsuccess_fct(log):

make_log_useful_ashleys.make_log_useful(log, "SUCCESS", config)
shell(
'mail -s "[Snakemake] smk-wf-catalog/ashleys-qc-pipeline v{} - Run on {} - SUCCESS" {} < {{log}}'.format(
Expand Down Expand Up @@ -280,14 +272,12 @@ class HandleInput:
if f.endswith(ext)
]


for f in l_files_all:
if len(f.split("_")) == 4:
assert (
len(f.split("_")) != 4
), "Your file name is using 4 times the '_' character, which is currently not supported by ashleys-qc, please rename your files"


# print(l_files_all)
# Dataframe creation
df = pd.DataFrame([{"File": f} for f in l_files_all])
Expand All @@ -314,7 +304,6 @@ class HandleInput:


def findstem(arr):

# Determine size of the array
n = len(arr)

Expand All @@ -327,13 +316,11 @@ def findstem(arr):

for i in range(l):
for j in range(i + 1, l + 1):

# generating all possible substrings
# of our reference string arr[0] i.e s
stem = s[i:j]
k = 1
for k in range(1, n):

# Check if the generated stem is
# common to all words
if stem not in arr[k]:
Expand Down Expand Up @@ -403,8 +390,10 @@ def get_final_output():
),
)

if config["mosaicatcher_pipeline"] is False or config["ashleys_pipeline_only"] is True:

if (
config["mosaicatcher_pipeline"] is False
or config["ashleys_pipeline_only"] is True
):
final_list.extend(
expand(
"{path}/{sample}/cell_selection/labels.tsv",
Expand All @@ -427,9 +416,7 @@ def get_final_output():
# Plate plots

for sample in samples:

if len(cell_per_sample[sample]) in [96, 384]:

final_list.extend(
[
sub_e
Expand All @@ -453,7 +440,7 @@ def get_final_output():
sample=samples,
)
)

# print(final_list)
return final_list

Expand Down Expand Up @@ -484,7 +471,6 @@ def publishdir_fct():
)

if config["use_light_data"] is False:

final_list.extend(
expand(
"{folder}/{sample}/plots/plate/ashleys_plate_{plate_plot}.pdf",
Expand All @@ -508,7 +494,6 @@ def publishdir_fct():
)
)


return final_list


Expand Down
6 changes: 3 additions & 3 deletions workflow/rules/multiqc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,8 @@ rule multiqc:
samtools_idxstats="{folder}/{sample}/multiqc/samtools_idxstats/config/samtools_idxstats_aggr_touch.ok",
samtools_stats="{folder}/{sample}/multiqc/samtools_stats/config/samtools_stats_aggr_touch.ok",
samtools_flagstats="{folder}/{sample}/multiqc/samtools_flagstats/config/samtools_flagstats_aggr_touch.ok",
# multiqc_input="{folder}/{sample}/multiqc/",
output:
report="{folder}/{sample}/multiqc/multiqc_report/multiqc_report.html",
# outdir=directory("{folder}/{sample}/multiqc/multiqc_report/"),
outdir=report(
directory("{folder}/{sample}/multiqc/multiqc_report"),
htmlindex="multiqc_report.html",
Expand All @@ -140,7 +138,9 @@ rule multiqc:
log:
"{folder}/{sample}/log/multiqc/{sample}.log",
params:
multiqc_input = lambda wc, input: "{abs_path}".format(abs_path=config["abs_path"]).join(input.fastqc.split("/")[:-3])
multiqc_input=lambda wc, input: "{abs_path}".format(
abs_path=config["abs_path"]
).join(input.fastqc.split("/")[:-3]),
conda:
"../envs/ashleys_base.yaml"
shell:
Expand Down
29 changes: 0 additions & 29 deletions workflow/rules/rules.smk
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ if config["genecore"] is True and config["genecore_date_folder"]:
.tolist(),
output:
"{folder}/{sample}/fastq/{cell}.{pair}.fastq.gz",
# wildcard_constraints:
# cell="^((?!\.sort\.mdup).*)$"
log:
"{folder}/log/genecore_symlink/{sample}/{cell}_{pair}.log",
shell:
Expand Down Expand Up @@ -142,28 +140,6 @@ rule mark_duplicates:
"sambamba markdup {input.bam} {output} 2>&1 > {log}"


# if config["use_light_data"] == True:

# rule samtools_idxstats_aggr:
# input:
# bam=lambda wc: expand(
# "{folder}/{sample}/samtools_idxstats/{cell}.txt",
# folder=config["data_location"],
# sample=wc.sample,
# cell=cell_per_sample[str(wc.sample)],
# ),
# output:
# "{folder}/{sample}/bam_stats/{sample}.txt",
# log:
# "{folder}/{sample}/log/samtools_idxstats_aggr/{cell}.log",
# resources:
# mem_mb=get_mem_mb,
# conda:
# "../envs/ashleys_base.yaml"
# script:
# ""


if config["mosaicatcher_pipeline"] is False:

rule samtools_index:
Expand Down Expand Up @@ -200,10 +176,6 @@ rule symlink_bam_ashleys:
rule generate_features:
input:
bam=selected_input_bam,
# plot=expand(
# "{{folder}}/{{sample}}/plots/counts/CountComplete.{plottype}.pdf",
# plottype=plottype_counts,
# ),
output:
"{folder}/{sample}/predictions/ashleys_features.tsv",
log:
Expand Down Expand Up @@ -357,7 +329,6 @@ if config["publishdir"] != "":
list_publishdir=publishdir_fct(),
output:
touch("{folder}/config/publishdir_outputs.ok"),

log:
"{folder}/log/publishdir_outputs/publishdir_outputs.log",
conda:
Expand Down

0 comments on commit 83de295

Please sign in to comment.