From b71b6528f84a903f2289a61d7e03145eef453502 Mon Sep 17 00:00:00 2001 From: Anandashankar Anil Date: Wed, 27 Nov 2024 15:44:44 +0100 Subject: [PATCH 1/3] Add ONT FCs to bioinfo tab --- taca/utils/bioinfo_tab.py | 60 ++++++++++++++++++++++++++++++++++++++- taca/utils/cli.py | 10 +++++-- 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/taca/utils/bioinfo_tab.py b/taca/utils/bioinfo_tab.py index 99b2b32f..f878f106 100644 --- a/taca/utils/bioinfo_tab.py +++ b/taca/utils/bioinfo_tab.py @@ -8,6 +8,7 @@ from flowcell_parser.classes import RunParametersParser, SampleSheetParser from taca.element.Aviti_Runs import Aviti_Run +from taca.nanopore.ONT_run_classes import ONT_run from taca.utils import statusdb from taca.utils.config import CONFIG from taca.utils.misc import send_mail @@ -47,6 +48,9 @@ def collect_runs(): continue logger.info(f"Working on {run_dir}") update_statusdb(run_dir, inst_brand) + elif inst_brand == "ont": + logger.info(f"Working on {run_dir}") + update_statusdb(run_dir, inst_brand) nosync_data_dir = os.path.join(data_dir, "nosync") potential_nosync_run_dirs = glob.glob( @@ -59,7 +63,7 @@ def collect_runs(): and illumina_rundir_re.match( os.path.basename(os.path.abspath(run_dir)) ) - ) or inst_brand == "element": + ) or (inst_brand == "element" or inst_brand == "ont"): # Skip archived dirs if run_dir == os.path.join(nosync_data_dir, "archived"): continue @@ -79,6 +83,20 @@ def update_statusdb(run_dir, inst_brand): # Logger in Aviti_Run.parse_run_parameters() will print the warning # WARNING - Run parameters file not found for ElementRun(), might not be ready yet return + elif inst_brand == "ont": + base_name = os.path.basename(os.path.abspath(run_dir)) + # Skip archived, no_backup, nosync and qc folders + if base_name in ["archived", "no_backup", "nosync", "qc"]: + return + + run_dir = os.path.abspath(run_dir) + try: + ont_run = ONT_run(run_dir) + except AssertionError as e: + logger.error(f"ONT Run folder error: {e}") + return + + run_id = ont_run.run_name statusdb_conf = CONFIG.get("statusdb") couch_connection = statusdb.StatusdbSession(statusdb_conf).connection @@ -91,6 +109,8 @@ def update_statusdb(run_dir, inst_brand): project_info = get_ss_projects_illumina(run_dir) elif inst_brand == "element": project_info = get_ss_projects_element(aviti_run) + elif inst_brand == "ont": + project_info = get_ss_projects_ont(ont_run, couch_connection) # Construction and sending of individual records, if samplesheet is incorrectly formatted the loop is skipped if project_info: for flowcell in project_info: @@ -103,6 +123,8 @@ def update_statusdb(run_dir, inst_brand): sample_status = get_status(run_dir) elif inst_brand == "element": sample_status = get_status_element(aviti_run) + elif inst_brand == "ont": + sample_status = get_status_ont(ont_run) project_info[flowcell][lane][sample].value = sample_status obj = { "run_id": run_id, @@ -234,6 +256,42 @@ def get_status_element(aviti_run): return status +def get_status_ont(ont_run): + """Gets status of a ONT sample run, based on flowcell info.""" + # Default state, should never occur + status = "ERROR" + run_status = ont_run.get_demultiplexing_status() + + if run_status in ["finished"]: + status = "New" + elif run_status in ["ongoing"]: + status = "Sequencing" + + return status + + +def get_ss_projects_ont(ont_run, couch_connection): + """Fetches project, FC, lane & sample (sample-run) status for a given folder for ONT runs""" + proj_tree = Tree() + flowcell_id = ont_run.run_name + flowcell_info = ( + couch_connection["nanopore_runs"].view("info/lims")[flowcell_id].rows[0] + ) + if flowcell_info.value and "sample_data" in flowcell_info.value["loading"][0]: + samples = flowcell_info.value["loading"][0]["sample_data"] + for sample_dict in samples: + sample_id = sample_dict["sample_name"] + project = sample_id.split("_")[0] + # Use default lane of 0 for ONT + proj_tree[flowcell_id]["0"][sample_id][project] + + if list(proj_tree.keys()) == []: + logger.info( + f"There was no data in StatusDB for the ONT run, CHECK {flowcell_id}" + ) + return proj_tree + + def get_ss_projects_element(aviti_run): """Fetches project, FC, lane & sample (sample-run) status for a given folder for element runs""" proj_tree = Tree() diff --git a/taca/utils/cli.py b/taca/utils/cli.py index e7f46069..7b3397e5 100644 --- a/taca/utils/cli.py +++ b/taca/utils/cli.py @@ -14,9 +14,15 @@ def bioinfo_deliveries(): # bioinfo subcommands @bioinfo_deliveries.command() @click.argument("rundir") -def updaterun(rundir): +@click.option( + "-i", + "--inst_type", + type=click.Choice(["illumina", "element", "ont"]), + required=True, +) +def updaterun(rundir, inst_type): """Saves the bioinfo data to statusdb.""" - bt.update_statusdb(rundir) + bt.update_statusdb(rundir, inst_brand=inst_type) @bioinfo_deliveries.command() From 2b0a860f87aacd01dc2ac7bc991263a943582f91 Mon Sep 17 00:00:00 2001 From: Anandashankar Anil Date: Fri, 6 Dec 2024 10:20:07 +0100 Subject: [PATCH 2/3] Incorporate review comments --- taca/utils/bioinfo_tab.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/taca/utils/bioinfo_tab.py b/taca/utils/bioinfo_tab.py index f878f106..1c19ae1f 100644 --- a/taca/utils/bioinfo_tab.py +++ b/taca/utils/bioinfo_tab.py @@ -8,7 +8,7 @@ from flowcell_parser.classes import RunParametersParser, SampleSheetParser from taca.element.Aviti_Runs import Aviti_Run -from taca.nanopore.ONT_run_classes import ONT_run +from taca.nanopore.ONT_run_classes import ONT_run, ONT_RUN_PATTERN from taca.utils import statusdb from taca.utils.config import CONFIG from taca.utils.misc import send_mail @@ -49,8 +49,10 @@ def collect_runs(): logger.info(f"Working on {run_dir}") update_statusdb(run_dir, inst_brand) elif inst_brand == "ont": - logger.info(f"Working on {run_dir}") - update_statusdb(run_dir, inst_brand) + # Skip archived, no_backup, nosync and qc folders + if re.match(ONT_RUN_PATTERN, os.path.basename(os.path.abspath(run_dir))): + logger.info(f"Working on {run_dir}") + update_statusdb(run_dir, inst_brand) nosync_data_dir = os.path.join(data_dir, "nosync") potential_nosync_run_dirs = glob.glob( @@ -84,11 +86,6 @@ def update_statusdb(run_dir, inst_brand): # WARNING - Run parameters file not found for ElementRun(), might not be ready yet return elif inst_brand == "ont": - base_name = os.path.basename(os.path.abspath(run_dir)) - # Skip archived, no_backup, nosync and qc folders - if base_name in ["archived", "no_backup", "nosync", "qc"]: - return - run_dir = os.path.abspath(run_dir) try: ont_run = ONT_run(run_dir) @@ -277,8 +274,8 @@ def get_ss_projects_ont(ont_run, couch_connection): flowcell_info = ( couch_connection["nanopore_runs"].view("info/lims")[flowcell_id].rows[0] ) - if flowcell_info.value and "sample_data" in flowcell_info.value["loading"][0]: - samples = flowcell_info.value["loading"][0]["sample_data"] + if flowcell_info.value and flowcell_info.value.get("loading", []) and "sample_data" in flowcell_info.value["loading"][-1]: + samples = flowcell_info.value["loading"][-1]["sample_data"] for sample_dict in samples: sample_id = sample_dict["sample_name"] project = sample_id.split("_")[0] From dcd636bc7b06e07a63450a464a04591f0dff1f4f Mon Sep 17 00:00:00 2001 From: Anandashankar Anil Date: Fri, 6 Dec 2024 10:22:19 +0100 Subject: [PATCH 3/3] Update versionlog and ruff --- VERSIONLOG.md | 4 ++++ taca/utils/bioinfo_tab.py | 13 ++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/VERSIONLOG.md b/VERSIONLOG.md index 597087c0..924e7b13 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,5 +1,9 @@ # TACA Version Log +## 20241206.1 + +Add ONT instrument data to bioinfo_tab + ## 20241112.1 Add support for backing up Element Aviti data to PDC diff --git a/taca/utils/bioinfo_tab.py b/taca/utils/bioinfo_tab.py index 1c19ae1f..4e80c052 100644 --- a/taca/utils/bioinfo_tab.py +++ b/taca/utils/bioinfo_tab.py @@ -8,7 +8,7 @@ from flowcell_parser.classes import RunParametersParser, SampleSheetParser from taca.element.Aviti_Runs import Aviti_Run -from taca.nanopore.ONT_run_classes import ONT_run, ONT_RUN_PATTERN +from taca.nanopore.ONT_run_classes import ONT_RUN_PATTERN, ONT_run from taca.utils import statusdb from taca.utils.config import CONFIG from taca.utils.misc import send_mail @@ -50,7 +50,10 @@ def collect_runs(): update_statusdb(run_dir, inst_brand) elif inst_brand == "ont": # Skip archived, no_backup, nosync and qc folders - if re.match(ONT_RUN_PATTERN, os.path.basename(os.path.abspath(run_dir))): + if re.match( + ONT_RUN_PATTERN, + os.path.basename(os.path.abspath(run_dir)), + ): logger.info(f"Working on {run_dir}") update_statusdb(run_dir, inst_brand) @@ -274,7 +277,11 @@ def get_ss_projects_ont(ont_run, couch_connection): flowcell_info = ( couch_connection["nanopore_runs"].view("info/lims")[flowcell_id].rows[0] ) - if flowcell_info.value and flowcell_info.value.get("loading", []) and "sample_data" in flowcell_info.value["loading"][-1]: + if ( + flowcell_info.value + and flowcell_info.value.get("loading", []) + and "sample_data" in flowcell_info.value["loading"][-1] + ): samples = flowcell_info.value["loading"][-1]["sample_data"] for sample_dict in samples: sample_id = sample_dict["sample_name"]