2.2.0: Config update, make_log_useful & watchdog update

friendsofstrandseq · Aug 9, 2023 · b85f954 · b85f954
1 parent 4c9124b
commit b85f954
Show file tree

Hide file tree

Showing 3 changed files with 110 additions and 77 deletions.
diff --git a/afac/watchdog_ashleys.py b/afac/watchdog_ashleys.py
@@ -6,6 +6,7 @@
 import logging
 import json
 import pandas as pd
+import threading
 
 
 os.makedirs("watchdog/logs", exist_ok=True)
@@ -180,24 +181,37 @@ def run_second_command(self, cmd, profile_slurm, data_location, date_folder):
         subprocess.run(["chmod", "-R", "777", f"{data_location}/{date_folder}"])
 
 
-# Create the event handler
-event_handler = MyHandler()
-event_handler.check_unprocessed_folder()
+def main():
+    # Create the event handler
+    event_handler = MyHandler()
 
-# Create an observer
-observer = Observer()
+    # Create an observer
+    observer = Observer()
 
-# Assign the observer to the path and the event handler
-observer.schedule(event_handler, path_to_watch, recursive=False)
+    # Assign the observer to the path and the event handler
+    observer.schedule(event_handler, path_to_watch, recursive=False)
 
-# Start the observer
-observer.start()
+    # Start the observer
+    observer.start()
+
+    # Start the periodical directory scanning in a separate thread
+    def periodic_scan():
+        while True:
+            event_handler.check_unprocessed_folder()
+            time.sleep(3600)  # Scan the directory every hour
+
+    scan_thread = threading.Thread(target=periodic_scan)
+    scan_thread.start()
+
+    try:
+        while True:
+            logging.info("Waiting for new plate ...")
+            time.sleep(3600)
+    except KeyboardInterrupt:
+        observer.stop()
+
+    observer.join()
 
-try:
-    while True:
-        logging.info("Waiting for new plate ...")
-        time.sleep(3600)
-except KeyboardInterrupt:
-    observer.stop()
 
-observer.join()
+if __name__ == "__main__":
+    main()
diff --git a/config/config.yaml b/config/config.yaml
@@ -1,59 +1,39 @@
-version: 2.1.3
-# Option to display all potential options - listed in config_metadata.yaml
-list_commands: False
-## Data location - MUST BE AN ABSOULTE PATH (due to snakemake-symlink issues) - PLEASE MODIFY IT
-# input_bam_location: ".tests/data_CHR17"
-data_location: ".tests/data_CHR17"
-# Reference genome used by BWA to map FASTQ files
-# reference: sandbox.zenodo.org/record/1074721/files/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna
-# Enable / Disable download of external files (1000G SNV & Fasta ref genome)
-dl_external_files: False
-# Enable / Disable multistep normalisation analysis
-multistep_normalisation: False
-# Ashleys-qc binary classification threshold
-ashleys_threshold: 0.5
-# Enable / Disable FastQC analysis
-MultiQC: False
-# To be informed of pipeline status
+# --------------------------------------------------------
+# Ashleys-QC pipeline Configuration
+# --------------------------------------------------------
+version: 2.2.0
+
+# Email for notifications about the pipeline's status
 email: ""
-# Others
-abs_path: "/"
-############################################################################
-#                          ADVANCED PARAMETERS
-############################################################################
 
-reference: "hg38"
+# List of samples to process if multiple are specified
+samples_to_process: []
 
-references_data:
-  "hg38":
-    reference_fasta: "workflow/data/ref_genomes/hg38.fa"
-  "hg19":
-    reference_fasta: "workflow/data/ref_genomes/hg19.fa"
-  "T2T":
-    reference_fasta: "workflow/data/ref_genomes/T2T.fa"
-  "mm10":
-    reference_fasta: "workflow/data/ref_genomes/mm10.fa"
-
-# Boolean parameters
-## Is the pipeline called used as a submodule in mosaicatcher-pipeline?
-mosaicatcher_pipeline: False
-## Enable/Disable hand selection through Jupyter Notebook
-hand_selection: False
+# --------------------------------------------------------
+# Data location & I/O
+# --------------------------------------------------------
 
-# Window size used by mosaic binning algorithm
-window: 200000
+# Absolute path to the data location (modify as needed)
+data_location: ".tests/data_CHR17"
 
-plottype_counts:
-  - "raw"
-  - "normalised"
+# Directory to publish important data (e.g., stats, plots, counts). Leave empty if not required.
+publishdir: ""
+
+# --------------------------------------------------------
+# Reference Data Configuration
+# --------------------------------------------------------
 
-alfred_plots:
-  - "dist"
-  - "devi"
+# Reference genome used by BWA to map FASTQ files
+reference: "hg38"
 
-plate_orientation: landscape
+# Reference genome files' location
+references_data:
+  "hg38": { reference_fasta: "workflow/data/ref_genomes/hg38.fa" }
+  "T2T": { reference_fasta: "workflow/data/ref_genomes/T2T.fa" }
+  "hg19": { reference_fasta: "workflow/data/ref_genomes/hg19.fa" }
+  "mm10": { reference_fasta: "workflow/data/ref_genomes/mm10.fa" }
 
-# Chromosomes list to process
+# List of chromosomes to process
 chromosomes:
   - chr1
   - chr2
@@ -80,26 +60,64 @@ chromosomes:
   - chrX
   - chrY
 
+# Specify any chromosomes to exclude from processing
 chromosomes_to_exclude: []
 
-# GENECORE
+# --------------------------------------------------------
+# Quality Control Configuration
+# --------------------------------------------------------
+
+# Threshold for Ashleys-qc binary classification
+ashleys_threshold: 0.5
+
+# Enable or disable FastQC analysis
+MultiQC: False
+
+# --------------------------------------------------------
+# Counts Configuration
+# --------------------------------------------------------
+
+# Enable or disable multistep normalization analysis
+multistep_normalisation: False
+
+# Advanced parameters for multi-step normalisation
+multistep_normalisation_options:
+  min_reads_bin: 5
+  n_subsample: 1000
+  min_reads_cell: 100000
+
+# Window size used by the mosaic binning algorithm
+window: 200000
+
+# Enable or disable hand selection through the Jupyter Notebook
+hand_selection: False
+
+# --------------------------------------------------------
+# GENECORE Configuration
+# --------------------------------------------------------
+
 genecore: False
-samples_to_process: []
 genecore_date_folder: ""
-# genecore_prefix: "/g/korbel/shared/genecore"
 genecore_prefix: "/g/korbel/STOCKS/Data/Assay/sequencing/2023"
 genecore_regex_element: "PE20"
 
-##### DEV only
+# --------------------------------------------------------
+# Internal Parameters
+# --------------------------------------------------------
+
+# Is the pipeline used as a submodule in mosaicatcher-pipeline?
+mosaicatcher_pipeline: False
 
 # Overwrite ASHLEYS PREDICTIONS for GitHub & smoke dataset purpose
 use_light_data: False
 
-# If specified, will copy important data (stats, plots, counts file) to a second place
-publishdir: ""
+# For snakemake linting
+abs_path: "/"
 
-# Multi-step normalisation advanced parameters
-multistep_normalisation_options:
-  min_reads_bin: 5
-  n_subsample: 1000
-  min_reads_cell: 100000
+# Type of plots for counts
+plottype_counts:
+  - "raw"
+  - "normalised"
+
+# Option to display all potential commands (as listed in config_metadata.yaml)
+list_commands: False
diff --git a/workflow/scripts/utils/make_log_useful_ashleys.py b/workflow/scripts/utils/make_log_useful_ashleys.py
@@ -2,6 +2,7 @@
 
 
 def make_log_useful(log_path, status, config):
+    log_path_new = "/".join(log_path.split("/")[:-1]) + "/processed_logs_for_mail/" + log_path.split("/")[-1]
     error_buffer = []
     record = 0
     with open(log_path, "r") as logfile:
@@ -24,12 +25,12 @@ def make_log_useful(log_path, status, config):
             else:
                 continue
 
-    with open(log_path, "w") as logfile:
+    with open(log_path_new, "w") as logfile:
         _ = logfile.write("\n".join(error_buffer))
         _ = logfile.write("\n\n")
 
     my_env = dict(os.environ)
-    with open(log_path, "a") as logfile:
+    with open(log_path_new, "a") as logfile:
         _ = logfile.write("=======[{}]=======\n".format(status))
         _ = logfile.write("\n===[{}]===\n".format("Infrastructure information"))
         _ = logfile.write("Host: {}\n".format(my_env.get("HOST", "N/A")))