From cfa0b071e204a25bf0265af0fbdd8d2ff9f54155 Mon Sep 17 00:00:00 2001 From: Charles Monod-Broca Date: Wed, 17 Jul 2024 13:55:42 +0200 Subject: [PATCH] Improved validation widget --- .../validation_methods/validation_ppi.json | 119 ++++++++++-- query.py | 10 +- validation_widget.py | 173 ++++++++++++------ 3 files changed, 222 insertions(+), 80 deletions(-) diff --git a/config_folder/validation_methods/validation_ppi.json b/config_folder/validation_methods/validation_ppi.json index f2989ce..03975b0 100644 --- a/config_folder/validation_methods/validation_ppi.json +++ b/config_folder/validation_methods/validation_ppi.json @@ -69,8 +69,8 @@ }, "steps": [ { - "title": "Etape 1", - "description": "Première étape de la validation.\nLes filtres appliqués montrent uniquement les variants dont la fréquence allélique en population générale est inférieure à 1%.", + "title": "Presets d'Alix", + "description": "Première étape de la validation.
Les filtres appliqués montrent uniquement les variants dont la fréquence allélique en population générale est inférieure à 1%.", "query": { "select": { "fields": [ @@ -166,6 +166,105 @@ ] } } + }, + { + "title": "Presets de Pascale", + "description": "Première étape de la validation.\nLes filtres appliqués montrent uniquement les variants dont la fréquence allélique en population générale est inférieure à 1%.", + "query": { + "select": { + "fields": [ + "main_table.run_name", + "main_table.sample_name", + "main_table.snpeff_Gene_Name", + "main_table.snpeff_Feature_ID", + "main_table.chromosome", + "main_table.position", + "main_table.reference", + "main_table.alternate", + "main_table.cv_AF", + "main_table.snpeff_Annotation", + "main_table.cv_GT", + "main_table.snpeff_Annotation_Impact", + "agg.ref_count", + "agg.var_count", + "recur.count", + "clinvar.CLNACC", + "clinvar.CLNSIG", + "main_table.\"snpeff_HGVS.c\" AS 'c. de l anomalie'" + ], + "tables": [ + { + "expression": "{main_table}", + "alias": "main_table" + }, + { + "expression": "read_parquet('{pwd}/aggregates/variants.parquet')", + "alias": "agg", + "on": "main_table.variant_hash = agg.variant_hash", + "how": "" + }, + { + "select": { + "fields": [ + "variant_hash", + "COUNT(*) as count" + ], + "tables": [ + { + "select": { + "fields": [ + "DISTINCT(sample_name)", + "variant_hash" + ], + "tables": [ + { + "expression": "{main_table}", + "alias": "" + } + ] + }, + "alias": "" + } + ], + "group_by": [ + "variant_hash" + ] + }, + "alias": "recur", + "on": "main_table.variant_hash = recur.variant_hash", + "how": "" + }, + { + "select": { + "fields": [ + "variant_hash", + "rcvAcc AS CLNACC", + "clinSign AS CLNSIG" + ], + "tables": [ + { + "expression": "read_parquet('{pwd}/annotations/clinvarMain.parquet')", + "alias": "" + } + ] + }, + "alias": "clinvar", + "on": "main_table.variant_hash = clinvar.variant_hash", + "how": "" + } + ], + "order_by": [ + { + "field": "main_table.chromosome", + "order": "DESC" + }, + { + "field": "main_table.position", + "order": "DESC" + } + ] + } + } } ], "final": { @@ -212,25 +311,13 @@ "how": "" }, { - "expression": "{pwd}/annotations/hgnc.hg19.parquet", + "expression": "read_parquet('{pwd}/annotations/hgnc.hg19.parquet')", "alias": "hgnc", "on": "main_table.chromosome = hgnc.chrom AND main_table.position >= hgnc.chromStart AND main_table.position <= hgnc.chromEnd", "how": "" }, { - "select": { - "fields": [ - "variant_hash", - "rcvAcc AS CLNACC", - "clinSign AS CLNSIG" - ], - "tables": [ - { - "expression": "read_parquet('{pwd}/clinvarMain.parquet')", - "alias": "" - } - ] - }, + "expression": "read_parquet('{pwd}/annotations/clinvarMain.parquet')", "alias": "clinvar", "on": "main_table.variant_hash = clinvar.variant_hash", "how": "" diff --git a/query.py b/query.py index fe04e06..3d1d081 100644 --- a/query.py +++ b/query.py @@ -231,15 +231,18 @@ def generate_query_template_from_json(self, data: dict) -> "Query": self.internal_changed.emit() return self - def select_query(self): + def select_query(self, paginated=True): + """Generates the select query to run on the database. Set paginated to False if you need a query that returns all rows.""" if not self.readonly_table: return "" + pagination = f" LIMIT {self.limit} OFFSET {self.offset}" if paginated else "" + additional_where = ( - f" WHERE {str(self.filter_model.root)} " if self.filter_model.root else "" + f" WHERE {str(self.filter_model.root)}" if self.filter_model.root else "" ) - return f"SELECT * FROM ({self.query_template}){additional_where}LIMIT {self.limit} OFFSET {self.offset}".format( + return f"SELECT * FROM ({self.query_template}){additional_where}{pagination}".format( **{ "main_table": self.readonly_table, "user_table": f'"{self.editable_table_name}"', @@ -273,7 +276,6 @@ def to_do(self): return "Please select a validation table" def update_data(self): - print("Updating data") # Empty data before updating self.header = [] self.data = [] diff --git a/validation_widget.py b/validation_widget.py index 11573d5..a2abd1d 100644 --- a/validation_widget.py +++ b/validation_widget.py @@ -1,20 +1,16 @@ import json from pathlib import Path +from typing import List import duckdb as db import PySide6.QtCore as qc import PySide6.QtWidgets as qw import datalake as dl -from common_widgets.multiline_display import MultiLineDisplay from common_widgets.multiwidget_holder import MultiWidgetHolder from common_widgets.searchable_table import SearchableTable from commons import get_config_folder, load_user_prefs, save_user_prefs -from validation_model import ( - VALIDATION_TABLE_COLUMNS, - ValidationModel, - get_validation_from_table_uuid, -) +from validation_model import VALIDATION_TABLE_COLUMNS, ValidationModel from validation_wizard import ValidationWizard @@ -139,6 +135,57 @@ def get_selected_validation(self): return None +class StepModel(qc.QAbstractListModel): + + def __init__(self, steps: List[dict], parent=None): + super().__init__(parent) + self.steps = steps + + def rowCount(self, parent: qc.QModelIndex): + if parent.isValid(): + return 0 + return len(self.steps) + + def columnCount(self, parent: qc.QModelIndex): + if parent.isValid(): + return 0 + return 1 + + def data(self, index: qc.QModelIndex, role: int): + if not index.isValid(): + return None + if role == qc.Qt.ItemDataRole.DisplayRole: + return self.steps[index.row()]["title"] + if role == qc.Qt.ItemDataRole.ToolTipRole: + return self.steps[index.row()]["description"] + if role == qc.Qt.ItemDataRole.UserRole: + return self.steps[index.row()]["query"] + return None + + def headerData(self, section: int, orientation: qc.Qt.Orientation, role: int): + if ( + orientation == qc.Qt.Orientation.Horizontal + and role == qc.Qt.ItemDataRole.DisplayRole + and section == 0 + ): + return "Steps" + return None + + def set_steps(self, steps): + self.beginResetModel() + self.steps = steps + self.endResetModel() + + def flags(self, index: qc.QModelIndex): + return qc.Qt.ItemFlag.ItemIsSelectable | qc.Qt.ItemFlag.ItemIsEnabled + + def get_step(self, index): + return self.steps[index] + + def get_current_step(self, index): + return self.steps[index] + + class ValidationWidget(qw.QWidget): return_to_validation = qc.Signal() @@ -150,8 +197,16 @@ def __init__(self, datalake: dl.DataLake, parent=None): self._layout = qw.QVBoxLayout(self) - self.title_label = qw.QLabel("") - self.description_text = MultiLineDisplay(self) + self.step_model = StepModel([], self) + self.step_selection_list_view = qw.QListView(self) + + self.step_selection_list_view.setModel(self.step_model) + self.step_selection_list_view.setSelectionMode( + qw.QAbstractItemView.SelectionMode.SingleSelection + ) + self.step_selection_list_view.selectionModel().currentChanged.connect( + self.update_step + ) self.validate_button = qw.QPushButton("", self) self.validate_button.clicked.connect(self.validate) @@ -166,9 +221,8 @@ def __init__(self, datalake: dl.DataLake, parent=None): self.setup_layout() def setup_layout(self): - self._layout.addWidget(self.title_label) - self._layout.addWidget(self.description_text) + self._layout.addWidget(self.step_selection_list_view) # Add vertical spacer self._layout.addStretch() @@ -196,38 +250,44 @@ def init_state(self): self.completed = False - def on_finish(self): - self.title_label.setText(qc.QCoreApplication.tr("Validation terminée")) - self.description_text.text_edit.setText( - qc.QCoreApplication.tr( - "Validation terminée. Vous ne pouvez rien ajouter au panier.\nLes résultats sont présentés dans la table ci-contre.\nVous pouvez exporter ces résultats vers Genno en cliquant sur le bouton ci-dessous." - ) - ) - self.validate_button.setText(qc.QCoreApplication.tr("Exporter vers Genno")) - - conn = self.datalake.get_database("validation") - - validation = get_validation_from_table_uuid(conn, self.validation_table_uuid) - if not validation: + def setup_finish(self): + if ( + not self.validation_name + or not self.validation_parquet_files + or not self.method + or not self.datalake + ): return - - parquet_files = validation["parquet_files"] - conn.close() + self.validate_button.setText(qc.QCoreApplication.tr("Exporter vers Genno")) last_step_definition = self.method["final"] - self.query.mute().set_readonly_table(parquet_files).set_editable_table_name( + self.query.mute().set_readonly_table( + self.validation_parquet_files + ).set_editable_table_name( self.validation_table_uuid - ).unmute().generate_query_template_from_json(last_step_definition["query"]) + ).unmute().generate_query_template_from_json( + last_step_definition["query"] + ) def validate(self): - pass + conn = self.datalake.get_database("validation") + try: + finish_validation(conn, self.validation_table_uuid) + self.completed = True + except Exception as e: + print(e) + finally: + conn.close() def on_return_to_validation(self): self.init_state() self.return_to_validation.emit() def export_csv(self): + if not self.datalake: + # WTF ? This should never happen + return user_prefs = load_user_prefs() if "genno_export_folder" not in user_prefs: qw.QMessageBox.warning( @@ -251,30 +311,15 @@ def export_csv(self): ), ) return - # Now, export final CSV to Genno - if self.datalake: - conn = self.datalake.get_database("validation") - validation = get_validation_from_table_uuid( - conn, self.validation_table_uuid - ) - if not validation: - return - conn.close() - - # def on_next_step_clicked(self): - # # Export to genno - # if self.completed: - # self.export_csv() - # return - - # # Decide whether we continue or if we reached the end - # if self.current_step_id < len(self.method["steps"]): - # self.setup_step() - # # Increment the step index - # self.current_step_id += 1 - # return - # else: - # self.on_finish() + else: + genno_export_folder = Path(user_prefs["genno_export_folder"]) + + # Now, export final CSV to Genno + + final_query = self.query.select_query(paginated=False) + db.sql( + f"COPY ({final_query}) TO '{genno_export_folder / self.validation_name}.csv' (FORMAT CSV, HEADER)" + ) def set_method_path(self, method_path: Path): if not method_path.exists(): @@ -289,6 +334,11 @@ def set_method_path(self, method_path: Path): ) with open(method_path, "r") as f: self.method = json.load(f) + self.step_model.set_steps(self.method["steps"]) + + def update_step(self, index: qc.QModelIndex): + self.current_step_id = index.row() + self.setup_step() def setup_step(self): """Modifies the query to match the current step definition.""" @@ -302,9 +352,6 @@ def setup_step(self): step_definition = self.method["steps"][self.current_step_id] - self.title_label.setText(step_definition["title"]) - self.description_text.text_edit.setText(step_definition["description"]) - self.query.mute().set_readonly_table( self.validation_parquet_files ).set_editable_table_name( @@ -337,9 +384,16 @@ def start_validation(self, selected_validation: dict): / "validation_methods" / (selected_validation["validation_method"] + ".json") ) - try: - conn = self.datalake.get_database("validation") + conn = self.datalake.get_database("validation") + + # Might be a good place to load previous step ID + self.step_selection_list_view.selectionModel().setCurrentIndex( + self.step_model.index(self.current_step_id), + qc.QItemSelectionModel.SelectionFlag.Select, + ) + + try: self.completed = ( conn.sql( f"SELECT completed FROM validations WHERE table_uuid = '{self.validation_table_uuid}'" @@ -348,11 +402,10 @@ def start_validation(self, selected_validation: dict): .to_dicts()[0]["completed"] ) if self.completed: - self.on_finish() + self.setup_finish() else: self.setup_step() except IndexError: - self.current_step_id = 0 print(self.validation_table_uuid) finally: conn.close()