diff --git a/github-actions-runner/Dockerfile-2.2.0.dockerfile b/github-actions-runner/Dockerfile-2.2.0.dockerfile new file mode 100644 index 0000000..2fa4fb0 --- /dev/null +++ b/github-actions-runner/Dockerfile-2.2.0.dockerfile @@ -0,0 +1,105 @@ +FROM condaforge/mambaforge:latest +LABEL io.github.snakemake.containerized="true" +LABEL io.github.snakemake.conda_env_hash="285fb274e2e9c667a310dbacd0e76c015875280fa7c764272824fc39421dde0d" + +# Step 1: Retrieve conda environments + +# Conda environment: +# source: https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/bwa/index/environment.yaml +# prefix: /conda-envs/5681728a49bd83ceed09ba194330c858 +# channels: +# - bioconda +# - conda-forge +# - defaults +# dependencies: +# - bwa ==0.7.17 +RUN mkdir -p /conda-envs/5681728a49bd83ceed09ba194330c858 +ADD https://github.com/snakemake/snakemake-wrappers/raw/v1.7.0/bio/bwa/index/environment.yaml /conda-envs/5681728a49bd83ceed09ba194330c858/environment.yaml + +# Conda environment: +# source: workflow/envs/ashleys_base.yaml +# prefix: /conda-envs/87c04f5d115eff742eca84455513deba +# name: ashleys_base +# channels: +# - conda-forge +# - bioconda +# dependencies: +# - samtools +# - tabix +# - bwa +# - sambamba +# - mosaicatcher +# # - alfred +# - ashleys-qc +# - pandas +# # PUBLISHDIR +# - rsync +# # MULTIQC +# - multiqc +# # Fix sklearn update +# - scikit-learn=1.2.2 +RUN mkdir -p /conda-envs/87c04f5d115eff742eca84455513deba +COPY workflow/envs/ashleys_base.yaml /conda-envs/87c04f5d115eff742eca84455513deba/environment.yaml + +# Conda environment: +# source: workflow/envs/ashleys_rtools.yaml +# prefix: /conda-envs/9b847fc31baae8e01dfb7ce438a56b71 +# name: rtools +# channels: +# - conda-forge +# - bioconda +# - r +# - anaconda +# dependencies: +# # - bioconductor-biocparallel +# # - bioconductor-bsgenome +# # - bioconductor-bsgenome.hsapiens.ucsc.hg19 +# # - bioconductor-bsgenome.hsapiens.ucsc.hg38 +# # - bioconductor-fastseg +# # - bioconductor-genomicalignments +# - bioconductor-genomicranges +# # - bioconductor-rsamtools +# # - bioconductor-s4vectors +# - r-assertthat +# - r-base +# # - r-biocmanager +# - r-cowplot +# - r-data.table +# # - r-devtools +# # - r-doparallel +# # - r-foreach +# - r-ggplot2 +# # - r-gtools +# - r-reshape2 +# # - r-zoo +# # - r-dplyr +# # - r-mc2d +# # - r-pheatmap +# # - bioconductor-complexheatmap +# # - r-gplots +# - r-scales +# - r-rcolorbrewer +# # - r-stringr +# - r-cairo +# - fonts-anaconda +# # NEW +# - bioconductor-edger +# - r-r.utils +# # PLATE PLOT +# - r-dplyr +# - r-platetools +# - r-viridis +# # GC_correction +# - r-tidyr +# - r-ggpubr +# # SOLVE R lib issue +# - r-stringi=1.7.12 +RUN mkdir -p /conda-envs/9b847fc31baae8e01dfb7ce438a56b71 +COPY workflow/envs/ashleys_rtools.yaml /conda-envs/9b847fc31baae8e01dfb7ce438a56b71/environment.yaml + +# Step 2: Generate conda environments + +RUN mamba env create --prefix /conda-envs/5681728a49bd83ceed09ba194330c858 --file /conda-envs/5681728a49bd83ceed09ba194330c858/environment.yaml && \ + mamba env create --prefix /conda-envs/87c04f5d115eff742eca84455513deba --file /conda-envs/87c04f5d115eff742eca84455513deba/environment.yaml && \ + mamba env create --prefix /conda-envs/9b847fc31baae8e01dfb7ce438a56b71 --file /conda-envs/9b847fc31baae8e01dfb7ce438a56b71/environment.yaml && \ + mamba clean --all -y \ No newline at end of file diff --git a/workflow/Snakefile b/workflow/Snakefile index 1fe7e45..847a6c3 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -7,7 +7,6 @@ configfile: "config/config.yaml" if config["mosaicatcher_pipeline"] is False: - docker_container = "docker://weber8thomas/ashleys-qc-pipeline:{version}".format( version=str(config["version"]) ) @@ -16,20 +15,10 @@ if config["mosaicatcher_pipeline"] is False: include: "rules/common.smk" - - include: "rules/aggregate_fct.smk" - - include: "rules/rules.smk" - - include: "rules/gc.smk" - - include: "rules/count.smk" - - include: "rules/multiqc.smk" @@ -44,7 +33,6 @@ if config["list_commands"] is False: input: get_final_output(), - if config["email"]: onsuccess: diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index b5c3e5c..e41ec2b 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -5,7 +5,6 @@ import yaml import subprocess if config["mosaicatcher_pipeline"] == False: - exclude = [ "._.DS_Store", ".DS_Store", @@ -50,27 +49,20 @@ if config["mosaicatcher_pipeline"] == False: pipeline_aesthetic_start_ashleys.pipeline_aesthetic_start(config) subprocess.Popen( "mkdir -p {folder_path}/config".format( - folder_path=config["data_location"] - ), - shell=True, - stdout=subprocess.PIPE, - ) + folder_path=config["data_location"] + ), + shell=True, + stdout=subprocess.PIPE, + ) subprocess.Popen( "rsync --ignore-existing -avzh config/config.yaml {folder_path}/config".format( - folder_path=config["data_location"] - ), - shell=True, - stdout=subprocess.PIPE, - ) - - # for sample in [e for e in os.listdir(config["data_location"]) if e not in exclude]: - # print(sample) - # if len(sample.split("_")) == 4: - # assert len(sample.split("_")) != 4, "Your sample name is using 4 times the '_' character, which is currently not supported by ashleys-qc" - + folder_path=config["data_location"] + ), + shell=True, + stdout=subprocess.PIPE, + ) def onsuccess_fct(log): - make_log_useful_ashleys.make_log_useful(log, "SUCCESS", config) shell( 'mail -s "[Snakemake] smk-wf-catalog/ashleys-qc-pipeline v{} - Run on {} - SUCCESS" {} < {{log}}'.format( @@ -280,14 +272,12 @@ class HandleInput: if f.endswith(ext) ] - for f in l_files_all: if len(f.split("_")) == 4: assert ( len(f.split("_")) != 4 ), "Your file name is using 4 times the '_' character, which is currently not supported by ashleys-qc, please rename your files" - # print(l_files_all) # Dataframe creation df = pd.DataFrame([{"File": f} for f in l_files_all]) @@ -314,7 +304,6 @@ class HandleInput: def findstem(arr): - # Determine size of the array n = len(arr) @@ -327,13 +316,11 @@ def findstem(arr): for i in range(l): for j in range(i + 1, l + 1): - # generating all possible substrings # of our reference string arr[0] i.e s stem = s[i:j] k = 1 for k in range(1, n): - # Check if the generated stem is # common to all words if stem not in arr[k]: @@ -403,8 +390,10 @@ def get_final_output(): ), ) - if config["mosaicatcher_pipeline"] is False or config["ashleys_pipeline_only"] is True: - + if ( + config["mosaicatcher_pipeline"] is False + or config["ashleys_pipeline_only"] is True + ): final_list.extend( expand( "{path}/{sample}/cell_selection/labels.tsv", @@ -427,9 +416,7 @@ def get_final_output(): # Plate plots for sample in samples: - if len(cell_per_sample[sample]) in [96, 384]: - final_list.extend( [ sub_e @@ -453,7 +440,7 @@ def get_final_output(): sample=samples, ) ) - + # print(final_list) return final_list @@ -484,7 +471,6 @@ def publishdir_fct(): ) if config["use_light_data"] is False: - final_list.extend( expand( "{folder}/{sample}/plots/plate/ashleys_plate_{plate_plot}.pdf", @@ -508,7 +494,6 @@ def publishdir_fct(): ) ) - return final_list diff --git a/workflow/rules/multiqc.smk b/workflow/rules/multiqc.smk index 0e7e340..0ffb49a 100644 --- a/workflow/rules/multiqc.smk +++ b/workflow/rules/multiqc.smk @@ -127,10 +127,8 @@ rule multiqc: samtools_idxstats="{folder}/{sample}/multiqc/samtools_idxstats/config/samtools_idxstats_aggr_touch.ok", samtools_stats="{folder}/{sample}/multiqc/samtools_stats/config/samtools_stats_aggr_touch.ok", samtools_flagstats="{folder}/{sample}/multiqc/samtools_flagstats/config/samtools_flagstats_aggr_touch.ok", - # multiqc_input="{folder}/{sample}/multiqc/", output: report="{folder}/{sample}/multiqc/multiqc_report/multiqc_report.html", - # outdir=directory("{folder}/{sample}/multiqc/multiqc_report/"), outdir=report( directory("{folder}/{sample}/multiqc/multiqc_report"), htmlindex="multiqc_report.html", @@ -140,7 +138,9 @@ rule multiqc: log: "{folder}/{sample}/log/multiqc/{sample}.log", params: - multiqc_input = lambda wc, input: "{abs_path}".format(abs_path=config["abs_path"]).join(input.fastqc.split("/")[:-3]) + multiqc_input=lambda wc, input: "{abs_path}".format( + abs_path=config["abs_path"] + ).join(input.fastqc.split("/")[:-3]), conda: "../envs/ashleys_base.yaml" shell: diff --git a/workflow/rules/rules.smk b/workflow/rules/rules.smk index 4567975..2ba5e2e 100644 --- a/workflow/rules/rules.smk +++ b/workflow/rules/rules.smk @@ -26,8 +26,6 @@ if config["genecore"] is True and config["genecore_date_folder"]: .tolist(), output: "{folder}/{sample}/fastq/{cell}.{pair}.fastq.gz", - # wildcard_constraints: - # cell="^((?!\.sort\.mdup).*)$" log: "{folder}/log/genecore_symlink/{sample}/{cell}_{pair}.log", shell: @@ -142,28 +140,6 @@ rule mark_duplicates: "sambamba markdup {input.bam} {output} 2>&1 > {log}" -# if config["use_light_data"] == True: - -# rule samtools_idxstats_aggr: -# input: -# bam=lambda wc: expand( -# "{folder}/{sample}/samtools_idxstats/{cell}.txt", -# folder=config["data_location"], -# sample=wc.sample, -# cell=cell_per_sample[str(wc.sample)], -# ), -# output: -# "{folder}/{sample}/bam_stats/{sample}.txt", -# log: -# "{folder}/{sample}/log/samtools_idxstats_aggr/{cell}.log", -# resources: -# mem_mb=get_mem_mb, -# conda: -# "../envs/ashleys_base.yaml" -# script: -# "" - - if config["mosaicatcher_pipeline"] is False: rule samtools_index: @@ -200,10 +176,6 @@ rule symlink_bam_ashleys: rule generate_features: input: bam=selected_input_bam, - # plot=expand( - # "{{folder}}/{{sample}}/plots/counts/CountComplete.{plottype}.pdf", - # plottype=plottype_counts, - # ), output: "{folder}/{sample}/predictions/ashleys_features.tsv", log: @@ -357,7 +329,6 @@ if config["publishdir"] != "": list_publishdir=publishdir_fct(), output: touch("{folder}/config/publishdir_outputs.ok"), - log: "{folder}/log/publishdir_outputs/publishdir_outputs.log", conda: