Skip to content

Commit

Permalink
2.2.0: Config update, make_log_useful & watchdog update
Browse files Browse the repository at this point in the history
  • Loading branch information
weber8thomas committed Aug 9, 2023
1 parent 4c9124b commit b85f954
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 77 deletions.
46 changes: 30 additions & 16 deletions afac/watchdog_ashleys.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import logging
import json
import pandas as pd
import threading


os.makedirs("watchdog/logs", exist_ok=True)
Expand Down Expand Up @@ -180,24 +181,37 @@ def run_second_command(self, cmd, profile_slurm, data_location, date_folder):
subprocess.run(["chmod", "-R", "777", f"{data_location}/{date_folder}"])


# Create the event handler
event_handler = MyHandler()
event_handler.check_unprocessed_folder()
def main():
# Create the event handler
event_handler = MyHandler()

# Create an observer
observer = Observer()
# Create an observer
observer = Observer()

# Assign the observer to the path and the event handler
observer.schedule(event_handler, path_to_watch, recursive=False)
# Assign the observer to the path and the event handler
observer.schedule(event_handler, path_to_watch, recursive=False)

# Start the observer
observer.start()
# Start the observer
observer.start()

# Start the periodical directory scanning in a separate thread
def periodic_scan():
while True:
event_handler.check_unprocessed_folder()
time.sleep(3600) # Scan the directory every hour

scan_thread = threading.Thread(target=periodic_scan)
scan_thread.start()

try:
while True:
logging.info("Waiting for new plate ...")
time.sleep(3600)
except KeyboardInterrupt:
observer.stop()

observer.join()

try:
while True:
logging.info("Waiting for new plate ...")
time.sleep(3600)
except KeyboardInterrupt:
observer.stop()

observer.join()
if __name__ == "__main__":
main()
136 changes: 77 additions & 59 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,59 +1,39 @@
version: 2.1.3
# Option to display all potential options - listed in config_metadata.yaml
list_commands: False
## Data location - MUST BE AN ABSOULTE PATH (due to snakemake-symlink issues) - PLEASE MODIFY IT
# input_bam_location: ".tests/data_CHR17"
data_location: ".tests/data_CHR17"
# Reference genome used by BWA to map FASTQ files
# reference: sandbox.zenodo.org/record/1074721/files/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna
# Enable / Disable download of external files (1000G SNV & Fasta ref genome)
dl_external_files: False
# Enable / Disable multistep normalisation analysis
multistep_normalisation: False
# Ashleys-qc binary classification threshold
ashleys_threshold: 0.5
# Enable / Disable FastQC analysis
MultiQC: False
# To be informed of pipeline status
# --------------------------------------------------------
# Ashleys-QC pipeline Configuration
# --------------------------------------------------------
version: 2.2.0

# Email for notifications about the pipeline's status
email: ""
# Others
abs_path: "/"
############################################################################
# ADVANCED PARAMETERS
############################################################################

reference: "hg38"
# List of samples to process if multiple are specified
samples_to_process: []

references_data:
"hg38":
reference_fasta: "workflow/data/ref_genomes/hg38.fa"
"hg19":
reference_fasta: "workflow/data/ref_genomes/hg19.fa"
"T2T":
reference_fasta: "workflow/data/ref_genomes/T2T.fa"
"mm10":
reference_fasta: "workflow/data/ref_genomes/mm10.fa"

# Boolean parameters
## Is the pipeline called used as a submodule in mosaicatcher-pipeline?
mosaicatcher_pipeline: False
## Enable/Disable hand selection through Jupyter Notebook
hand_selection: False
# --------------------------------------------------------
# Data location & I/O
# --------------------------------------------------------

# Window size used by mosaic binning algorithm
window: 200000
# Absolute path to the data location (modify as needed)
data_location: ".tests/data_CHR17"

plottype_counts:
- "raw"
- "normalised"
# Directory to publish important data (e.g., stats, plots, counts). Leave empty if not required.
publishdir: ""

# --------------------------------------------------------
# Reference Data Configuration
# --------------------------------------------------------

alfred_plots:
- "dist"
- "devi"
# Reference genome used by BWA to map FASTQ files
reference: "hg38"

plate_orientation: landscape
# Reference genome files' location
references_data:
"hg38": { reference_fasta: "workflow/data/ref_genomes/hg38.fa" }
"T2T": { reference_fasta: "workflow/data/ref_genomes/T2T.fa" }
"hg19": { reference_fasta: "workflow/data/ref_genomes/hg19.fa" }
"mm10": { reference_fasta: "workflow/data/ref_genomes/mm10.fa" }

# Chromosomes list to process
# List of chromosomes to process
chromosomes:
- chr1
- chr2
Expand All @@ -80,26 +60,64 @@ chromosomes:
- chrX
- chrY

# Specify any chromosomes to exclude from processing
chromosomes_to_exclude: []

# GENECORE
# --------------------------------------------------------
# Quality Control Configuration
# --------------------------------------------------------

# Threshold for Ashleys-qc binary classification
ashleys_threshold: 0.5

# Enable or disable FastQC analysis
MultiQC: False

# --------------------------------------------------------
# Counts Configuration
# --------------------------------------------------------

# Enable or disable multistep normalization analysis
multistep_normalisation: False

# Advanced parameters for multi-step normalisation
multistep_normalisation_options:
min_reads_bin: 5
n_subsample: 1000
min_reads_cell: 100000

# Window size used by the mosaic binning algorithm
window: 200000

# Enable or disable hand selection through the Jupyter Notebook
hand_selection: False

# --------------------------------------------------------
# GENECORE Configuration
# --------------------------------------------------------

genecore: False
samples_to_process: []
genecore_date_folder: ""
# genecore_prefix: "/g/korbel/shared/genecore"
genecore_prefix: "/g/korbel/STOCKS/Data/Assay/sequencing/2023"
genecore_regex_element: "PE20"

##### DEV only
# --------------------------------------------------------
# Internal Parameters
# --------------------------------------------------------

# Is the pipeline used as a submodule in mosaicatcher-pipeline?
mosaicatcher_pipeline: False

# Overwrite ASHLEYS PREDICTIONS for GitHub & smoke dataset purpose
use_light_data: False

# If specified, will copy important data (stats, plots, counts file) to a second place
publishdir: ""
# For snakemake linting
abs_path: "/"

# Multi-step normalisation advanced parameters
multistep_normalisation_options:
min_reads_bin: 5
n_subsample: 1000
min_reads_cell: 100000
# Type of plots for counts
plottype_counts:
- "raw"
- "normalised"

# Option to display all potential commands (as listed in config_metadata.yaml)
list_commands: False
5 changes: 3 additions & 2 deletions workflow/scripts/utils/make_log_useful_ashleys.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@


def make_log_useful(log_path, status, config):
log_path_new = "/".join(log_path.split("/")[:-1]) + "/processed_logs_for_mail/" + log_path.split("/")[-1]
error_buffer = []
record = 0
with open(log_path, "r") as logfile:
Expand All @@ -24,12 +25,12 @@ def make_log_useful(log_path, status, config):
else:
continue

with open(log_path, "w") as logfile:
with open(log_path_new, "w") as logfile:
_ = logfile.write("\n".join(error_buffer))
_ = logfile.write("\n\n")

my_env = dict(os.environ)
with open(log_path, "a") as logfile:
with open(log_path_new, "a") as logfile:
_ = logfile.write("=======[{}]=======\n".format(status))
_ = logfile.write("\n===[{}]===\n".format("Infrastructure information"))
_ = logfile.write("Host: {}\n".format(my_env.get("HOST", "N/A")))
Expand Down

0 comments on commit b85f954

Please sign in to comment.