chore: R filestyle precommit turned off

CCBR · Nov 24, 2023 · e517196 · e517196
1 parent 997dd04
commit e517196
Show file tree

Hide file tree

Showing 7 changed files with 257 additions and 115 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -12,10 +12,10 @@ repos:
     hooks:
       - id: black
   # R formatting
-  - repo: https://github.com/lorenzwalthert/precommit
-    rev: v0.1.2
-    hooks:
-      - id: style-files
+  #- repo: https://github.com/lorenzwalthert/precommit
+  #  rev: v0.1.2
+  #  hooks:
+  #    - id: style-files
   # general linting
   - repo: https://github.com/pre-commit/mirrors-prettier
     rev: v2.7.1

diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -4,7 +4,7 @@ include: join("rules/align.smk")
 include: join("rules/qc.smk")
 include: join("rules/peakcall.smk")
 include: join("rules/diffatac.smk")
-localrules: all
+#localrules: all
 rule all:
     input:
         #trim
@@ -20,6 +20,7 @@ rule all:
         expand(join(QCDIR,"{replicate}.filt.bam.flagstat"),replicate=REPLICATES),
         expand(join(QCDIR,"{replicate}.dupmetric"),replicate=REPLICATES),
         expand(join(QCDIR,"preseq","{replicate}.nrf"),replicate=REPLICATES),
+
         #nreads stats
         expand(join(QCDIR,"{replicate}.nreads.txt"),replicate=REPLICATES),
         #fastqc
@@ -36,44 +37,63 @@ rule all:
         expand(join(QCDIR,"tss","{replicate}.tss.txt"),replicate=REPLICATES),
         # #fld
         expand(join(QCDIR,"fld","{replicate}.fld.txt"),replicate=REPLICATES),
-        # #macs2 peaks
+        # # #macs2 peaks
         expand(join(RESULTSDIR,"peaks","macs2","{sample}.consensus.macs2.peakfiles"),sample=SAMPLES),
         expand(join(RESULTSDIR,"peaks","macs2","{sample}.replicate.macs2.peakfiles"),sample=SAMPLES),
         expand(join(RESULTSDIR,"peaks","macs2","{sample}.macs2.tn5nicksbedfiles"),sample=SAMPLES),
         #genrich peaks
         expand(join(RESULTSDIR,"peaks","genrich","{sample}.consensus.genrich.peakfiles"),sample=SAMPLES),
         expand(join(RESULTSDIR,"peaks","genrich","{sample}.replicate.genrich.peakfiles"),sample=SAMPLES),
         expand(join(RESULTSDIR,"peaks","genrich","{sample}.genrich.tn5nicksbedfiles"),sample=SAMPLES),
-        #jaccard comparison of called peaks
-        join(QCDIR,"jaccard","macs2.replicate.jaccard.pca.html"),
-        join(QCDIR,"jaccard","macs2.consensus.jaccard.pca.html"),
-        join(QCDIR,"jaccard","macs2.consensus_replicate.jaccard.pca.html"),
-        join(QCDIR,"jaccard","genrich.replicate.jaccard.pca.html"),
-        join(QCDIR,"jaccard","genrich.consensus.jaccard.pca.html"),
-        join(QCDIR,"jaccard","genrich.consensus_replicate.jaccard.pca.html"),
-        join(QCDIR,"jaccard","allmethods.replicate.jaccard.pca.html"),
-        join(QCDIR,"jaccard","allmethods.consensus.jaccard.pca.html"),
-        join(QCDIR,"jaccard","allmethods.consensus_replicate.jaccard.pca.html"),
-        # fixed width peaks
-        expand(join(RESULTSDIR,"peaks","macs2","fixed_width","{sample}.fixed_width.consensus.narrowPeak"),sample=SAMPLES),
-        expand(join(RESULTSDIR,"peaks","macs2","fixed_width","{sample}.renormalized.fixed_width.consensus.narrowPeak"),sample=SAMPLES),
-        expand(join(RESULTSDIR,"peaks","genrich","fixed_width","{sample}.fixed_width.consensus.narrowPeak"),sample=SAMPLES),
-        expand(join(RESULTSDIR,"peaks","genrich","fixed_width","{sample}.renormalized.fixed_width.consensus.narrowPeak"),sample=SAMPLES),
-        # roi gtf for genrich-only
-        join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.gtf"),
-        # counts matrix
-        join(RESULTSDIR,"peaks","genrich","ROI.counts.tsv"),
+        # #jaccard comparison of called peaks
+        # join(QCDIR,"jaccard","macs2.replicate.jaccard.pca.html"),
+        # join(QCDIR,"jaccard","macs2.consensus.jaccard.pca.html"),
+        # join(QCDIR,"jaccard","macs2.consensus_replicate.jaccard.pca.html"),
+        # join(QCDIR,"jaccard","genrich.replicate.jaccard.pca.html"),
+        # join(QCDIR,"jaccard","genrich.consensus.jaccard.pca.html"),
+        # join(QCDIR,"jaccard","genrich.consensus_replicate.jaccard.pca.html"),
+        # join(QCDIR,"jaccard","allmethods.replicate.jaccard.pca.html"),
+        # join(QCDIR,"jaccard","allmethods.consensus.jaccard.pca.html"),
+        # join(QCDIR,"jaccard","allmethods.consensus_replicate.jaccard.pca.html"),
+        # # fixed width peaks
+        # expand(join(RESULTSDIR,"peaks","macs2","fixed_width","{sample}.fixed_width.consensus.narrowPeak"),sample=SAMPLES),
+        # expand(join(RESULTSDIR,"peaks","macs2","fixed_width","{sample}.renormalized.fixed_width.consensus.narrowPeak"),sample=SAMPLES),
+        # expand(join(RESULTSDIR,"peaks","genrich","fixed_width","{sample}.fixed_width.consensus.narrowPeak"),sample=SAMPLES),
+        # expand(join(RESULTSDIR,"peaks","genrich","fixed_width","{sample}.renormalized.fixed_width.consensus.narrowPeak"),sample=SAMPLES),
+        # # roi gtf for genrich-only
+        # join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.gtf"),
+        # # counts matrix
+        # join(RESULTSDIR,"peaks","genrich","ROI.counts.tsv"),
         # diffatac
-        join(RESULTSDIR,"peaks","genrich","DiffATAC","sampleinfo.tsv"),
-        join(RESULTSDIR,"peaks","genrich","DiffATAC","contrasts.lst"),
-        #motif enrichment
+        # get_diffatac_input(),
+        # #motif enrichment
         expand(join(QCDIR,"{sample}.motif_enrichment"),sample=SAMPLES),
-        #frip
+        # #frip
         expand(join(QCDIR,"frip","{sample}.frip"),sample=SAMPLES),
-        #multiqc
+        # #multiqc
         join(QCDIR,"multiqc_report.html"),
         join(QCDIR,"QCStats.tsv")
 
+# create jobby tables
+jobby_cmd = "run_jobby_on_snakemake_log " + join(WORKDIR, 'snakemake.log') + " | tee " + join(WORKDIR, 'snakemake.log.jobby') + " | cut -f2,3,18 > " + join(WORKDIR, 'snakemake.log.jobby.short')
+spook_cmd = "spooker " + WORKDIR + " ASPEN | tee " + join(WORKDIR, 'spooker.log')
 
+onsuccess:
+    #subprocess.run(shlex.split(jobby_cmd),capture_output=False,shell=False,text=True)
+    print("OnSuccess")
+    shell("printenv")
+    shell("module list")
+    print(jobby_cmd)
+    shell(jobby_cmd)
+    print(spook_cmd)
+    shell(spook_cmd)
 
-
+onerror:
+    #subprocess.run(shlex.split(jobby_cmd),capture_output=False,shell=False,text=True)
+    print("OnError")
+    shell("printenv")
+    shell("module list")
+    print(jobby_cmd)
+    shell(jobby_cmd)
+    print(spook_cmd)
+    shell(spook_cmd)
diff --git a/workflow/rules/align.smk b/workflow/rules/align.smk
@@ -34,7 +34,7 @@ rule trim:
         replicate="{replicate}",
         scriptsdir=SCRIPTSDIR,
         script="ccbr_cutadapt_pe.bash"
-    container: config["masterdocker"]    
+    container: config["masterdocker"]
     threads: getthreads("trim")
     shell:"""
 if [ -w "/lscratch/${{SLURM_JOB_ID}}" ];then cd /lscratch/${{SLURM_JOB_ID}};else cd /dev/shm;fi
@@ -61,7 +61,7 @@ rule create_BL_index:
     params:
         # bldir=join(RESULTSDIR,"tmp","BL"),
         genome=GENOME,
-    container: config["masterdocker"]    
+    container: config["masterdocker"]
     threads: getthreads("create_BL_index")
     shell:"""
 blindexdir=$(dirname {output.sa})
@@ -77,7 +77,7 @@ rule remove_BL:
 # """
 # Second step of TAD
 # Input: trimmed fastq files
-# Output: not aligning to blacklisted regions, trimmed fastq files 
+# Output: not aligning to blacklisted regions, trimmed fastq files
 # in "tmp" folder, so they can be deleted if no longer required.
 # """
     # group: "TAD"
@@ -92,7 +92,7 @@ rule remove_BL:
         replicate="{replicate}",
         scriptsdir=SCRIPTSDIR,
         script="ccbr_remove_blacklisted_reads_pe.bash"
-    container: config["masterdocker"]    
+    container: config["masterdocker"]
     threads: getthreads("remove_BL")
     shell:"""
 if [ -w "/lscratch/${{SLURM_JOB_ID}}" ];then cd /lscratch/${{SLURM_JOB_ID}};else cd /dev/shm;fi
@@ -114,9 +114,9 @@ rule align:
 # """
 # Third (final) step of TAD
 # Alignment is handled by bowtie
-# Filtering is based off of ENCODEs pipeline (atac_assign_multimappers.py is direclty pulled from ENCODE)
+# Filtering is based off of ENCODEs pipeline (atac_assign_multimappers.py is directly pulled from ENCODE)
 # Input: not aligning to blacklisted regions, trimmed fastq files
-# Output: 
+# Output:
 #     1. aligned and filtered alignment files:
 #         * tagAlign --> for MACS2
 #         * dedup bam --> for TOBIAS
@@ -152,6 +152,8 @@ rule align:
     container: config["masterdocker"]
     threads: getthreads("align")
     shell:"""
+set -exo pipefail
+
 if [ -w "/lscratch/${{SLURM_JOB_ID}}" ];then cd /lscratch/${{SLURM_JOB_ID}};else cd /dev/shm;fi
 
 # make folder if they do not exist
@@ -220,4 +222,4 @@ nreads=`grep -m1 total {input.fs2}|awk '{{print $1}}'`
 echo "$nreads"|awk '{{printf("%d\\tAfter deduplication\\n",$1)}}' >> {output.nreads}
 """
 
-#########################################################
+#########################################################
diff --git a/workflow/rules/diffatac.smk b/workflow/rules/diffatac.smk
@@ -1,3 +1,16 @@
+# functions
+def get_diffatac_input():
+    expected_filelist = list()
+    deffiles = list()
+# diffatac only works for hg38 and mm10
+    if GENOME == "mm10" or GENOME == "hg38":
+        if CONTRASTS.shape[0] != 0:
+            expected_filelist = [ join(RESULTSDIR,"peaks","genrich","DiffATAC","degs.done") ]
+            expected_filelist.append([  join(RESULTSDIR,"peaks","genrich","DiffATAC","all_diff_atacs.html"),
+                                    join(RESULTSDIR,"peaks","genrich","DiffATAC","all_diff_atacs.tsv")])
+    return expected_filelist
+
+
 
 rule atac_genrich_calculate_regions_of_interest_for_diffatac:
 # """
@@ -6,17 +19,17 @@ rule atac_genrich_calculate_regions_of_interest_for_diffatac:
     input:
         expand(join(RESULTSDIR,"peaks","genrich","fixed_width","{sample}.renormalized.fixed_width.consensus.narrowPeak"),sample=SAMPLES)
     output:
-        roi=join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.narrowPeak"),
-        roi_bed=join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.bed"),
-        roi_gtf=join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.gtf"),
-        roi_renormalized=join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.renormalized.narrowPeak")
+        roi              = join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.narrowPeak"),
+        roi_bed          = join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.bed"),
+        roi_gtf          = join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.gtf"),
+        roi_renormalized = join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.renormalized.narrowPeak")
     params:
         script      =   "ccbr_atac_get_fixedwidth_peaks.sh",
         scriptsdir  =   SCRIPTSDIR,
         width       =   config["fixed_width"],
         roi_min_rep =   config['roi_min_replicates'],
         roi_min_spm =   config['roi_min_spm'],
-    container: config["masterdocker"] 
+    container: config["masterdocker"]
     shell:"""
 set -exo pipefail
 TMPDIR="/lscratch/$SLURM_JOB_ID"
@@ -103,30 +116,28 @@ cat ${{TMPDIR}}/`basename {output.counts}` | python {params.scriptsdir}/_feature
 
 rule diffatac:
     input:
-        files   = expand(join(RESULTSDIR,"peaks","genrich","{sample}.genrich.tn5nicksbedfiles"),sample=SAMPLES),
-        counts  = rules.get_counts_table.output.counts,        
+        counts      = rules.get_counts_table.output.counts,
     output:
-        sampleinfo  =   join(RESULTSDIR,"peaks","genrich","DiffATAC","sampleinfo.tsv"),
-        degfilelist =   join(RESULTSDIR,"peaks","genrich","DiffATAC","contrasts.lst")
+        degsdone    = join(RESULTSDIR,"peaks","genrich","DiffATAC","degs.done"),
+        sampleinfo  = join(RESULTSDIR,"peaks","genrich","DiffATAC","sampleinfo.txt"),
     params:
         contrasts   = config['contrasts'],
         scriptsdir  = SCRIPTSDIR,
         genome      = config['genome'],
         fc_cutoff   = config['contrasts_fc_cutoff'],
         fdr_cutoff  = config['contrasts_fdr_cutoff'],
+        manifest    = config['samplemanifest'],
     container: config['baser']
     shell:"""
 set -exo pipefail
 TMPDIR="/lscratch/$SLURM_JOB_ID"
 if [ ! -d $TMPDIR ];then
     TMPDIR="/dev/shm"
 fi
-outdir=$(dirname {output.sampleinfo})
+outdir=$(dirname {output.degsdone})
 if [ ! -d $outdir ];then mkdir $outdir;fi
 cd $outdir
-if [ -f {output.degfilelist} ];then rm -f {output.degfilelist};fi
-
-cat {input.files} | cut -f1-2 | sort > {output.sampleinfo}
+tail -n +2 {params.manifest} | cut -f1-2 | sort > {output.sampleinfo}
 
 while read g1 g2;do
     Rscript {params.scriptsdir}/DESeq2_runner.R \\
@@ -139,8 +150,46 @@ while read g1 g2;do
         --fdr {params.fdr_cutoff} \\
         --indexcols Geneid \\
         --excludecols Chr,Start,End,Strand,Length \\
-        --outdir $outdir
-    echo -ne "$g1\\t$g2\\t${{outdir}}/${{g1}}_vs_${{g2}}.tsv\\n" >> {output.degfilelist}
-done < {params.contrasts}
+        --outdir $outdir \\
+        --tmpdir $TMPDIR \\
+        --scriptsdir {params.scriptsdir}
+done < {params.contrasts} && \
+touch {output.degsdone}
+
+"""
 
+rule diffatac_aggregate:
+    input:
+        counts      = rules.get_counts_table.output.counts,
+        degsdone    = rules.diffatac.output.degsdone,
+        sampleinfo  = rules.diffatac.output.sampleinfo,
+    output:
+        alldegshtml = join(RESULTSDIR,"peaks","genrich","DiffATAC","all_diff_atacs.html"),
+        alldegstsv  = join(RESULTSDIR,"peaks","genrich","DiffATAC","all_diff_atacs.tsv"),
+    params:
+        contrasts   = config['contrasts'],
+        scriptsdir  = SCRIPTSDIR,
+        genome      = config['genome'],
+        fc_cutoff   = config['contrasts_fc_cutoff'],
+        fdr_cutoff  = config['contrasts_fdr_cutoff'],
+    container:config['baser']
+    shell:"""
+set -exo pipefail
+TMPDIR="/lscratch/$SLURM_JOB_ID"
+if [ ! -d $TMPDIR ];then
+    TMPDIR="/dev/shm"
+fi
+outdir=$(dirname {input.degsdone})
+cd $outdir
+Rscript {params.scriptsdir}/aggregate_results_runner.R \\
+    --countsmatrix {input.counts} \\
+    --diffatacdir $outdir \\
+    --coldata {input.sampleinfo} \\
+    --foldchange {params.fc_cutoff} \\
+    --fdr {params.fdr_cutoff} \\
+    --indexcols Geneid \\
+    --excludecols Chr,Start,End,Strand,Length \\
+    --diffatacdir $outdir \\
+    --tmpdir $TMPDIR \\
+    --scriptsdir {params.scriptsdir}
 """