Skip to content

Commit

Permalink
Copied files from PRJNA509779 after resetting for phoenix
Browse files Browse the repository at this point in the history
  • Loading branch information
smped committed Jun 28, 2023
1 parent d7b93c2 commit 16d96b9
Show file tree
Hide file tree
Showing 17 changed files with 307 additions and 129 deletions.
10 changes: 9 additions & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,14 @@ ALL_HTML = expand(

ALL_OUTPUTS = []
# ALL_OUTPUTS.extend(ALL_QC)
# ALL_OUTPUTS.extend(ALL_BAM)
ALL_OUTPUTS.extend(ALL_BAM)
ALL_OUTPUTS.extend(ALL_PEAKS)
ALL_OUTPUTS.extend(
expand(
os.path.join("output", "annotations", "{accession}_greylist.bed"),
accession = input
)
)
ALL_OUTPUTS.extend(ALL_BIGWIG)
ALL_OUTPUTS.extend(ALL_HTML)

Expand All @@ -134,4 +140,6 @@ include: "rules/samtools.smk"
include: "rules/picard_markduplicates.smk"
include: "rules/peak_stats.smk"
include: "rules/macs2.smk"
include: "rules/bedgraph_to_bigwig.smk"
include: "rules/rmarkdown.smk"
include: "rules/make_greylist.smk"
3 changes: 2 additions & 1 deletion workflow/envs/adapterremoval.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
name: adapterremoval
channels:
- defaults
- bioconda
- conda-forge
dependencies:
- adapterremoval
- adapterremoval=2.3.2

5 changes: 3 additions & 2 deletions workflow/envs/bedgraph_to_bigwig.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: bedgraph_to_bigwig
channels:
- defaults
- conda-forge
- bioconda
- nodefaults
dependencies:
- ucsc-bedgraphtobigwig
- ucsc-bedgraphtobigwig=366
5 changes: 3 additions & 2 deletions workflow/envs/fastqc.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: fastqc
channels:
- defaults
- bioconda
- conda-forge
- bioconda
- nodefaults
dependencies:
- fastqc<=0.11.9
9 changes: 9 additions & 0 deletions workflow/envs/greylist.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
name: greylist
channels:
- conda-forge
- bioconda
- nodefaults
dependencies:
- r-base=4.2.3
- r-here=1.0
- bioconductor-diffbind=3.8
38 changes: 19 additions & 19 deletions workflow/envs/rmarkdown.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
name: rmarkdown
channels:
- bioconda
- conda-forge
- r
- bioconda
- nodefaults
dependencies:
- r-base=4.2.3
- icu =72
- r-base =4.2.3
- r-biocmanager
- r-complexupset
- r-diagrammer
- r-glue
- r-here
- r-knitr
- r-pander
- r-polychrome
- r-reactable
- r-rmarkdown
- r-scales
- r-tidyverse
- r-yaml
- bioconductor-extrachips
- bioconductor-ngsReports
- bioconductor-plyranges
- bioconductor-rtracklayer
- r-complexupset =1.3.3
- r-diagrammer =1.0
- r-glue =1.6
- r-here =1.0
- r-knitr =1.43
- r-pander = 0.6.5
- r-polychrome =1.5
- r-reactable =0.4
- r-rmarkdown =2.21
- r-scales =1.2
- r-tidyverse =2.0
- r-yaml =2.3
- bioconductor-extrachips =1.2
- bioconductor-ngsReports =2.0
- bioconductor-plyranges =1.18
- bioconductor-rtracklayer =1.58
2 changes: 1 addition & 1 deletion workflow/rules/adapterremoval.smk
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ rule adapterremoval:
input:
sample = os.path.join(raw_path, "{accession}.fastq.gz")
output:
fastq = os.path.join(trim_path, "{accession}.fastq.gz"),
fastq = temp(os.path.join(trim_path, "{accession}.fastq.gz")),
settings = "output/adapterremoval/{accession}.settings"
conda: "../envs/adapterremoval.yml"
log:
Expand Down
43 changes: 43 additions & 0 deletions workflow/rules/bedgraph_to_bigwig.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
rule sort_bedgraph:
input:
bdg = "{file}.bdg",
output:
bdg = temp("{file}.sorted.bdg")
log: "workflow/logs/sort_bedgraph/{file}.log"
threads: 1
resources:
runtime = "1h",
mem_mb = lambda wildcards, input, attempt: (input.size//1000000) * attempt * 8,
disk_mb = lambda wildcards, input, attempt: (input.size//1000000) * attempt * 4,
shell:
"""
## Sort the file
echo -e "Started sorting at $(date)" >> {log}
sort \
-k1,1 -k2,2n \
-S {resources.mem_mb}M \
{input.bdg} | \
egrep $'^chr[0-9XY]+\t' > {output.bdg}
echo -e "Finished sorting at $(date)" >> {log}
"""


rule bedgraph_to_bigwig:
input:
bedgraph = "{file}.sorted.bdg",
chrom_sizes = chrom_sizes
output:
bigwig = "{file}.bw"
conda: "../envs/bedgraph_to_bigwig.yml"
log: "workflow/logs/bedgraph_to_bigwig/{file}.log"
threads: 1
resources:
runtime = "3h",
mem_mb = lambda wildcards, input, attempt: (input.size//1000000) * attempt * 8,
disk_mb = lambda wildcards, input, attempt: (input.size//1000000) * attempt * 4,
shell:
"""
echo -e "Started conversion at $(date)" >> {log}
bedGraphToBigWig {input.bedgraph} {input.chrom_sizes} {output.bigwig}
echo -e "Finished conversion at $(date)" >> {log}
"""
2 changes: 2 additions & 0 deletions workflow/rules/bowtie2.smk
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ rule bowtie2_align:
params:
extra = config['params']['bowtie2'],
threads: 8
resources:
mem_mb = lambda wildcards, threads: (threads * 2048)
script:
"../scripts/bowtie2.py"

Expand Down
1 change: 1 addition & 0 deletions workflow/rules/fasterq-dump.smk
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ rule get_fastq:
params:
extra="--skip-technical",
threads: 2
retries: 3
resources:
runtime = "2h"
script:
Expand Down
58 changes: 18 additions & 40 deletions workflow/rules/macs2.smk
Original file line number Diff line number Diff line change
Expand Up @@ -59,27 +59,26 @@ rule macs2_callpeak:
),
log = os.path.join(macs2_path, "{sample}", "{sample}_callpeak.log")
conda: "../envs/macs2.yml"
log:
"workflow/logs/macs2_callpeak/{sample}_callpeak.log"
params:
extra = config['params']['macs2']['callpeak'],
prefix = "{sample}",
outdir = os.path.join(macs2_path, "{sample}")
log:
"workflow/logs/macs2_callpeak/{sample}_callpeak.log"
threads: 1
resources:
mem_mb = 8192,
mem_mb = 16384,
runtime = "1h",
shell:
"""
echo -e "Running macs2 call peak on:\n{input.bam}" >> {log}
echo -e "The specified control sample is:\n{input.control}" >> {log}
macs2 callpeak \
-t {input.bam}\
-c {input.control} \
-f BAM --bdg --SPMR \
{params.extra} \
-n {params.prefix} \
--outdir {params.outdir} 2> {output.log}
--outdir {params.outdir} 2> {log}
cp {log} {output.log}
"""

rule macs2_callpeak_merged:
Expand Down Expand Up @@ -113,15 +112,14 @@ rule macs2_callpeak_merged:
runtime = "1h"
shell:
"""
echo -e "Running macs2 call peak on:\n{input.bam}" >> {log}
echo -e "The specified control sample is:\n{input.control}" >> {log}
macs2 callpeak \
-t {input.bam}\
-c {input.control} \
-f BAM --bdg --SPMR \
{params.extra} \
-n {params.prefix} \
--outdir {params.outdir} 2> {output.log}
--outdir {params.outdir} 2> {log}
cp {log} {output.log}
"""

rule macs2_bdgcmp_merged:
Expand Down Expand Up @@ -157,38 +155,18 @@ rule macs2_bdgcmp_merged:
-o {output} 2> {log}
"""


rule bedgraph_to_bigwig:
input:
bedgraph = "{file}.bdg",
chrom_sizes = chrom_sizes
output:
bigwig = "{file}.bw"
conda: "../envs/bedgraph_to_bigwig.yml"
log: "workflow/logs/bedgraph_to_bigwig/{file}.log"
rule check_callpeak_logs:
input:
log = "{file}_callpeak.log",
script = "workflow/scripts/check_callpeak_logs.R"
output: temp("{file}_callpeak.chk")
threads: 1
log: "workflow/logs/check_callpeak_logs/{file}.log"
conda: "../envs/rmarkdown.yml"
resources:
runtime = "4h",
mem_mb = 8192
mem_mb = 1024,
runtime = "5m"
shell:
"""
echo -e "Converting {input.bedgraph} to BigWig\n" >> {log}
echo -e "Starting conversion at $(date)\n" >> {log}
TEMPDIR=$(mktemp -d -t bdgXXXXXXXXXX)
SORTED_BDG=$TEMPDIR/temp.bdg
## Sort the file
echo -e "Sorting as $SORTED_BDG...\n" >> {log}
sort -k1,1 -k2,2n {input.bedgraph} | egrep $'^chr[0-9XY]+\t' > $SORTED_BDG
## Convert the file
echo -e "Sorting complete\nConverting to bigWig...\n" >> {log}
bedGraphToBigWig $SORTED_BDG {input.chrom_sizes} {output.bigwig}
echo -e "Finished conversion at $(date)\n" >> {log}
## Remove the temp sorted file
echo -e "Cleaning up temp files\n" >> {log}
rm -rf $TEMPDIR
echo -e "Done" >> {log}
"""
Rscript --vanilla {input.script} {input.log} {output} >> {log} 2>&1
"""
26 changes: 26 additions & 0 deletions workflow/rules/make_greylist.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
rule make_greylist:
input:
bam = os.path.join(dedup_path, "{accession}.sorted.bam"),
bai = os.path.join(dedup_path, "{accession}.sorted.bam.bai"),
chrom_sizes = chrom_sizes,
script = "workflow/scripts/make_greylist.R"
output:
greylist = os.path.join("output", "annotations", "{accession}_greylist.bed")
params:
genome = config['reference']['name']
conda: "../envs/greylist.yml"
threads: 1
log: "workflow/logs/make_greylist/{accession}.log"
resources:
runtime = "40m",
mem_mb = "8192"
shell:
"""
Rscript --vanilla \
{input.script} \
{input.bam} \
{input.chrom_sizes} \
{params.genome} \
{output.greylist} >> {log} 2>&1
"""

Loading

0 comments on commit 16d96b9

Please sign in to comment.