Skip to content

Commit

Permalink
chore: R filestyle precommit turned off
Browse files Browse the repository at this point in the history
  • Loading branch information
kopardev committed Nov 24, 2023
1 parent 997dd04 commit e517196
Show file tree
Hide file tree
Showing 7 changed files with 257 additions and 115 deletions.
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ repos:
hooks:
- id: black
# R formatting
- repo: https://github.com/lorenzwalthert/precommit
rev: v0.1.2
hooks:
- id: style-files
#- repo: https://github.com/lorenzwalthert/precommit
# rev: v0.1.2
# hooks:
# - id: style-files
# general linting
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v2.7.1
Expand Down
74 changes: 47 additions & 27 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ include: join("rules/align.smk")
include: join("rules/qc.smk")
include: join("rules/peakcall.smk")
include: join("rules/diffatac.smk")
localrules: all
#localrules: all
rule all:
input:
#trim
Expand All @@ -20,6 +20,7 @@ rule all:
expand(join(QCDIR,"{replicate}.filt.bam.flagstat"),replicate=REPLICATES),
expand(join(QCDIR,"{replicate}.dupmetric"),replicate=REPLICATES),
expand(join(QCDIR,"preseq","{replicate}.nrf"),replicate=REPLICATES),

#nreads stats
expand(join(QCDIR,"{replicate}.nreads.txt"),replicate=REPLICATES),
#fastqc
Expand All @@ -36,44 +37,63 @@ rule all:
expand(join(QCDIR,"tss","{replicate}.tss.txt"),replicate=REPLICATES),
# #fld
expand(join(QCDIR,"fld","{replicate}.fld.txt"),replicate=REPLICATES),
# #macs2 peaks
# # #macs2 peaks
expand(join(RESULTSDIR,"peaks","macs2","{sample}.consensus.macs2.peakfiles"),sample=SAMPLES),
expand(join(RESULTSDIR,"peaks","macs2","{sample}.replicate.macs2.peakfiles"),sample=SAMPLES),
expand(join(RESULTSDIR,"peaks","macs2","{sample}.macs2.tn5nicksbedfiles"),sample=SAMPLES),
#genrich peaks
expand(join(RESULTSDIR,"peaks","genrich","{sample}.consensus.genrich.peakfiles"),sample=SAMPLES),
expand(join(RESULTSDIR,"peaks","genrich","{sample}.replicate.genrich.peakfiles"),sample=SAMPLES),
expand(join(RESULTSDIR,"peaks","genrich","{sample}.genrich.tn5nicksbedfiles"),sample=SAMPLES),
#jaccard comparison of called peaks
join(QCDIR,"jaccard","macs2.replicate.jaccard.pca.html"),
join(QCDIR,"jaccard","macs2.consensus.jaccard.pca.html"),
join(QCDIR,"jaccard","macs2.consensus_replicate.jaccard.pca.html"),
join(QCDIR,"jaccard","genrich.replicate.jaccard.pca.html"),
join(QCDIR,"jaccard","genrich.consensus.jaccard.pca.html"),
join(QCDIR,"jaccard","genrich.consensus_replicate.jaccard.pca.html"),
join(QCDIR,"jaccard","allmethods.replicate.jaccard.pca.html"),
join(QCDIR,"jaccard","allmethods.consensus.jaccard.pca.html"),
join(QCDIR,"jaccard","allmethods.consensus_replicate.jaccard.pca.html"),
# fixed width peaks
expand(join(RESULTSDIR,"peaks","macs2","fixed_width","{sample}.fixed_width.consensus.narrowPeak"),sample=SAMPLES),
expand(join(RESULTSDIR,"peaks","macs2","fixed_width","{sample}.renormalized.fixed_width.consensus.narrowPeak"),sample=SAMPLES),
expand(join(RESULTSDIR,"peaks","genrich","fixed_width","{sample}.fixed_width.consensus.narrowPeak"),sample=SAMPLES),
expand(join(RESULTSDIR,"peaks","genrich","fixed_width","{sample}.renormalized.fixed_width.consensus.narrowPeak"),sample=SAMPLES),
# roi gtf for genrich-only
join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.gtf"),
# counts matrix
join(RESULTSDIR,"peaks","genrich","ROI.counts.tsv"),
# #jaccard comparison of called peaks
# join(QCDIR,"jaccard","macs2.replicate.jaccard.pca.html"),
# join(QCDIR,"jaccard","macs2.consensus.jaccard.pca.html"),
# join(QCDIR,"jaccard","macs2.consensus_replicate.jaccard.pca.html"),
# join(QCDIR,"jaccard","genrich.replicate.jaccard.pca.html"),
# join(QCDIR,"jaccard","genrich.consensus.jaccard.pca.html"),
# join(QCDIR,"jaccard","genrich.consensus_replicate.jaccard.pca.html"),
# join(QCDIR,"jaccard","allmethods.replicate.jaccard.pca.html"),
# join(QCDIR,"jaccard","allmethods.consensus.jaccard.pca.html"),
# join(QCDIR,"jaccard","allmethods.consensus_replicate.jaccard.pca.html"),
# # fixed width peaks
# expand(join(RESULTSDIR,"peaks","macs2","fixed_width","{sample}.fixed_width.consensus.narrowPeak"),sample=SAMPLES),
# expand(join(RESULTSDIR,"peaks","macs2","fixed_width","{sample}.renormalized.fixed_width.consensus.narrowPeak"),sample=SAMPLES),
# expand(join(RESULTSDIR,"peaks","genrich","fixed_width","{sample}.fixed_width.consensus.narrowPeak"),sample=SAMPLES),
# expand(join(RESULTSDIR,"peaks","genrich","fixed_width","{sample}.renormalized.fixed_width.consensus.narrowPeak"),sample=SAMPLES),
# # roi gtf for genrich-only
# join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.gtf"),
# # counts matrix
# join(RESULTSDIR,"peaks","genrich","ROI.counts.tsv"),
# diffatac
join(RESULTSDIR,"peaks","genrich","DiffATAC","sampleinfo.tsv"),
join(RESULTSDIR,"peaks","genrich","DiffATAC","contrasts.lst"),
#motif enrichment
# get_diffatac_input(),
# #motif enrichment
expand(join(QCDIR,"{sample}.motif_enrichment"),sample=SAMPLES),
#frip
# #frip
expand(join(QCDIR,"frip","{sample}.frip"),sample=SAMPLES),
#multiqc
# #multiqc
join(QCDIR,"multiqc_report.html"),
join(QCDIR,"QCStats.tsv")

# create jobby tables
jobby_cmd = "run_jobby_on_snakemake_log " + join(WORKDIR, 'snakemake.log') + " | tee " + join(WORKDIR, 'snakemake.log.jobby') + " | cut -f2,3,18 > " + join(WORKDIR, 'snakemake.log.jobby.short')
spook_cmd = "spooker " + WORKDIR + " ASPEN | tee " + join(WORKDIR, 'spooker.log')

onsuccess:
#subprocess.run(shlex.split(jobby_cmd),capture_output=False,shell=False,text=True)
print("OnSuccess")
shell("printenv")
shell("module list")
print(jobby_cmd)
shell(jobby_cmd)
print(spook_cmd)
shell(spook_cmd)


onerror:
#subprocess.run(shlex.split(jobby_cmd),capture_output=False,shell=False,text=True)
print("OnError")
shell("printenv")
shell("module list")
print(jobby_cmd)
shell(jobby_cmd)
print(spook_cmd)
shell(spook_cmd)
16 changes: 9 additions & 7 deletions workflow/rules/align.smk
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ rule trim:
replicate="{replicate}",
scriptsdir=SCRIPTSDIR,
script="ccbr_cutadapt_pe.bash"
container: config["masterdocker"]
container: config["masterdocker"]
threads: getthreads("trim")
shell:"""
if [ -w "/lscratch/${{SLURM_JOB_ID}}" ];then cd /lscratch/${{SLURM_JOB_ID}};else cd /dev/shm;fi
Expand All @@ -61,7 +61,7 @@ rule create_BL_index:
params:
# bldir=join(RESULTSDIR,"tmp","BL"),
genome=GENOME,
container: config["masterdocker"]
container: config["masterdocker"]
threads: getthreads("create_BL_index")
shell:"""
blindexdir=$(dirname {output.sa})
Expand All @@ -77,7 +77,7 @@ rule remove_BL:
# """
# Second step of TAD
# Input: trimmed fastq files
# Output: not aligning to blacklisted regions, trimmed fastq files
# Output: not aligning to blacklisted regions, trimmed fastq files
# in "tmp" folder, so they can be deleted if no longer required.
# """
# group: "TAD"
Expand All @@ -92,7 +92,7 @@ rule remove_BL:
replicate="{replicate}",
scriptsdir=SCRIPTSDIR,
script="ccbr_remove_blacklisted_reads_pe.bash"
container: config["masterdocker"]
container: config["masterdocker"]
threads: getthreads("remove_BL")
shell:"""
if [ -w "/lscratch/${{SLURM_JOB_ID}}" ];then cd /lscratch/${{SLURM_JOB_ID}};else cd /dev/shm;fi
Expand All @@ -114,9 +114,9 @@ rule align:
# """
# Third (final) step of TAD
# Alignment is handled by bowtie
# Filtering is based off of ENCODEs pipeline (atac_assign_multimappers.py is direclty pulled from ENCODE)
# Filtering is based off of ENCODEs pipeline (atac_assign_multimappers.py is directly pulled from ENCODE)
# Input: not aligning to blacklisted regions, trimmed fastq files
# Output:
# Output:
# 1. aligned and filtered alignment files:
# * tagAlign --> for MACS2
# * dedup bam --> for TOBIAS
Expand Down Expand Up @@ -152,6 +152,8 @@ rule align:
container: config["masterdocker"]
threads: getthreads("align")
shell:"""
set -exo pipefail
if [ -w "/lscratch/${{SLURM_JOB_ID}}" ];then cd /lscratch/${{SLURM_JOB_ID}};else cd /dev/shm;fi
# make folder if they do not exist
Expand Down Expand Up @@ -220,4 +222,4 @@ nreads=`grep -m1 total {input.fs2}|awk '{{print $1}}'`
echo "$nreads"|awk '{{printf("%d\\tAfter deduplication\\n",$1)}}' >> {output.nreads}
"""

#########################################################
#########################################################
81 changes: 65 additions & 16 deletions workflow/rules/diffatac.smk
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
# functions
def get_diffatac_input():
expected_filelist = list()
deffiles = list()
# diffatac only works for hg38 and mm10
if GENOME == "mm10" or GENOME == "hg38":
if CONTRASTS.shape[0] != 0:
expected_filelist = [ join(RESULTSDIR,"peaks","genrich","DiffATAC","degs.done") ]
expected_filelist.append([ join(RESULTSDIR,"peaks","genrich","DiffATAC","all_diff_atacs.html"),
join(RESULTSDIR,"peaks","genrich","DiffATAC","all_diff_atacs.tsv")])
return expected_filelist



rule atac_genrich_calculate_regions_of_interest_for_diffatac:
# """
Expand All @@ -6,17 +19,17 @@ rule atac_genrich_calculate_regions_of_interest_for_diffatac:
input:
expand(join(RESULTSDIR,"peaks","genrich","fixed_width","{sample}.renormalized.fixed_width.consensus.narrowPeak"),sample=SAMPLES)
output:
roi=join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.narrowPeak"),
roi_bed=join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.bed"),
roi_gtf=join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.gtf"),
roi_renormalized=join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.renormalized.narrowPeak")
roi = join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.narrowPeak"),
roi_bed = join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.bed"),
roi_gtf = join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.gtf"),
roi_renormalized = join(RESULTSDIR,"peaks","genrich","fixed_width","ROI.renormalized.narrowPeak")
params:
script = "ccbr_atac_get_fixedwidth_peaks.sh",
scriptsdir = SCRIPTSDIR,
width = config["fixed_width"],
roi_min_rep = config['roi_min_replicates'],
roi_min_spm = config['roi_min_spm'],
container: config["masterdocker"]
container: config["masterdocker"]
shell:"""
set -exo pipefail
TMPDIR="/lscratch/$SLURM_JOB_ID"
Expand Down Expand Up @@ -103,30 +116,28 @@ cat ${{TMPDIR}}/`basename {output.counts}` | python {params.scriptsdir}/_feature

rule diffatac:
input:
files = expand(join(RESULTSDIR,"peaks","genrich","{sample}.genrich.tn5nicksbedfiles"),sample=SAMPLES),
counts = rules.get_counts_table.output.counts,
counts = rules.get_counts_table.output.counts,
output:
sampleinfo = join(RESULTSDIR,"peaks","genrich","DiffATAC","sampleinfo.tsv"),
degfilelist = join(RESULTSDIR,"peaks","genrich","DiffATAC","contrasts.lst")
degsdone = join(RESULTSDIR,"peaks","genrich","DiffATAC","degs.done"),
sampleinfo = join(RESULTSDIR,"peaks","genrich","DiffATAC","sampleinfo.txt"),
params:
contrasts = config['contrasts'],
scriptsdir = SCRIPTSDIR,
genome = config['genome'],
fc_cutoff = config['contrasts_fc_cutoff'],
fdr_cutoff = config['contrasts_fdr_cutoff'],
manifest = config['samplemanifest'],
container: config['baser']
shell:"""
set -exo pipefail
TMPDIR="/lscratch/$SLURM_JOB_ID"
if [ ! -d $TMPDIR ];then
TMPDIR="/dev/shm"
fi
outdir=$(dirname {output.sampleinfo})
outdir=$(dirname {output.degsdone})
if [ ! -d $outdir ];then mkdir $outdir;fi
cd $outdir
if [ -f {output.degfilelist} ];then rm -f {output.degfilelist};fi
cat {input.files} | cut -f1-2 | sort > {output.sampleinfo}
tail -n +2 {params.manifest} | cut -f1-2 | sort > {output.sampleinfo}
while read g1 g2;do
Rscript {params.scriptsdir}/DESeq2_runner.R \\
Expand All @@ -139,8 +150,46 @@ while read g1 g2;do
--fdr {params.fdr_cutoff} \\
--indexcols Geneid \\
--excludecols Chr,Start,End,Strand,Length \\
--outdir $outdir
echo -ne "$g1\\t$g2\\t${{outdir}}/${{g1}}_vs_${{g2}}.tsv\\n" >> {output.degfilelist}
done < {params.contrasts}
--outdir $outdir \\
--tmpdir $TMPDIR \\
--scriptsdir {params.scriptsdir}
done < {params.contrasts} && \
touch {output.degsdone}
"""

rule diffatac_aggregate:
input:
counts = rules.get_counts_table.output.counts,
degsdone = rules.diffatac.output.degsdone,
sampleinfo = rules.diffatac.output.sampleinfo,
output:
alldegshtml = join(RESULTSDIR,"peaks","genrich","DiffATAC","all_diff_atacs.html"),
alldegstsv = join(RESULTSDIR,"peaks","genrich","DiffATAC","all_diff_atacs.tsv"),
params:
contrasts = config['contrasts'],
scriptsdir = SCRIPTSDIR,
genome = config['genome'],
fc_cutoff = config['contrasts_fc_cutoff'],
fdr_cutoff = config['contrasts_fdr_cutoff'],
container:config['baser']
shell:"""
set -exo pipefail
TMPDIR="/lscratch/$SLURM_JOB_ID"
if [ ! -d $TMPDIR ];then
TMPDIR="/dev/shm"
fi
outdir=$(dirname {input.degsdone})
cd $outdir
Rscript {params.scriptsdir}/aggregate_results_runner.R \\
--countsmatrix {input.counts} \\
--diffatacdir $outdir \\
--coldata {input.sampleinfo} \\
--foldchange {params.fc_cutoff} \\
--fdr {params.fdr_cutoff} \\
--indexcols Geneid \\
--excludecols Chr,Start,End,Strand,Length \\
--diffatacdir $outdir \\
--tmpdir $TMPDIR \\
--scriptsdir {params.scriptsdir}
"""
Loading

0 comments on commit e517196

Please sign in to comment.