Skip to content

Commit

Permalink
Implemented filtering on selected genes and samples
Browse files Browse the repository at this point in the history
  • Loading branch information
SteampunkIslande committed Jul 17, 2024
1 parent cfa0b07 commit 8cf5475
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 65 deletions.
6 changes: 5 additions & 1 deletion commons.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@
import PySide6.QtWidgets as qw


def duck_db_literal_string_list(l: typing.List) -> str:
def duck_db_literal_string_list(l: typing.Iterable) -> str:
return "[" + ", ".join(f"'{e}'" for e in l) + "]"


def duck_db_literal_string_tuple(l: typing.Iterable) -> str:
return "(" + ", ".join(f"'{e}'" for e in l) + ")"


def dict_add_value(d: dict, key: str, value: typing.Any):
"""Pythonic way to add a value to an arbitrarly nested dictionary (and without using defaultdict)
Expand Down
51 changes: 34 additions & 17 deletions config_folder/validation_methods/validation_ppi.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,23 +74,23 @@
"query": {
"select": {
"fields": [
"main_table.run_name",
"main_table.sample_name",
"main_table.snpeff_Gene_Name",
"main_table.snpeff_Feature_ID",
"main_table.chromosome",
"main_table.position",
"main_table.reference",
"main_table.alternate",
"main_table.cv_AF",
"main_table.snpeff_Annotation",
"main_table.cv_GT",
"main_table.snpeff_Annotation_Impact",
"agg.ref_count",
"agg.var_count",
"recur.count",
"clinvar.CLNACC",
"clinvar.CLNSIG",
"main_table.run_name AS 'Nom du run'",
"main_table.sample_name AS 'Échantillon'",
"main_table.snpeff_Gene_Name AS 'Nom du gène'",
"main_table.snpeff_Feature_ID AS 'NM'",
"main_table.chromosome AS 'Chromosome'",
"main_table.position AS 'Position'",
"main_table.reference AS 'Allèle de référence'",
"main_table.alternate AS 'Allèle alternatif'",
"main_table.cv_AF AS 'Fréquence allélique'",
"main_table.snpeff_Annotation AS 'Annotation'",
"main_table.cv_GT AS 'Génotype'",
"main_table.snpeff_Annotation_Impact AS 'Annotation impact'",
"agg.ref_count AS 'Nombre d''allèles de référence dans la base'",
"agg.var_count AS 'Nombre de variants détectés dans la base'",
"recur.count AS 'Nombre d''observations du variant dans ce run'",
"clinvar.CLNACC AS 'Numéro d''accession clinvar'",
"clinvar.CLNSIG AS 'Signification CLINVAR'",
"main_table.\"snpeff_HGVS.c\" AS 'c. de l anomalie'"
],
"tables": [
Expand Down Expand Up @@ -152,8 +152,25 @@
"alias": "clinvar",
"on": "main_table.variant_hash = clinvar.variant_hash",
"how": ""
},
{
"expression": "read_csv('{pwd}/lists/liste_nms.csv',sep=',',header=false,names=['NM'])",
"alias": "nms",
"on": "main_table.snpeff_Feature_ID = nms.NM",
"how": "right"
}
],
"filter": {
"filter_type": "AND",
"children": [
{
"expression": "main_table.sample_name IN {selected_samples}"
},
{
"expression": "main_table.snpeff_Gene_Name IN {selected_genes}"
}
]
},
"order_by": [
{
"field": "main_table.chromosome",
Expand Down
43 changes: 31 additions & 12 deletions query.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import datalake as dl
import filters_model as fm
from commons import duck_db_literal_string_list
from commons import duck_db_literal_string_list, duck_db_literal_string_tuple
from filters import FilterItem, FilterType


Expand Down Expand Up @@ -76,7 +76,13 @@ def run_sql(query: str, conn: db.DuckDBPyConnection = None) -> Union[List[dict],

class Query(qc.QObject):

RESERVED_VARIABLES = ["main_table", "user_table", "pwd"]
RESERVED_VARIABLES = [
"main_table",
"user_table",
"pwd",
"selected_genes",
"selected_samples",
]

# Signal for external use (tell the UI to update)
query_changed = qc.Signal()
Expand All @@ -95,8 +101,11 @@ def init_state(self):
# When we create a new Query, we want to reset everything, except for the datalake path...
self.query_template = None
self.order_by = None

self.readonly_table = None
self.editable_table_name = None
self.selected_samples = []
self.selected_genes = []

self.limit = 10
self.offset = 0
Expand Down Expand Up @@ -220,6 +229,22 @@ def set_editable_table_name(self, name: str):
self.internal_changed.emit()
return self

def set_selected_samples(self, samples: List[str]):
self.selected_samples = samples
self.internal_changed.emit()
return self

def get_selected_samples(self) -> List[str]:
return self.selected_samples

def set_selected_genes(self, genes: List[str]):
self.selected_genes = genes
self.internal_changed.emit()
return self

def get_selected_genes(self) -> List[str]:
return self.selected_genes

def generate_query_template_from_json(self, data: dict) -> "Query":
"""Builds a query template from a json object.
Provided json object must have a select key at the root level.
Expand Down Expand Up @@ -247,21 +272,15 @@ def select_query(self, paginated=True):
"main_table": self.readonly_table,
"user_table": f'"{self.editable_table_name}"',
"pwd": self.datalake.datalake_path,
"selected_genes": duck_db_literal_string_tuple(self.selected_genes),
"selected_samples": duck_db_literal_string_tuple(self.selected_samples),
**{k: v for k, v in self.variables.items()},
}
)

def count_query(self):
additional_where = (
f" WHERE {str(self.filter_model.root)} " if self.filter_model.root else ""
)
return f"SELECT COUNT(*) AS count_star FROM ({self.query_template}) {additional_where}".format(
**{
"main_table": self.readonly_table,
"user_table": f'"{self.editable_table_name}"',
"pwd": self.datalake.datalake_path,
**{k: v for k, v in self.variables.items()},
}
return (
f"SELECT COUNT(*) AS count_star FROM ({self.select_query(paginated=False)})"
)

def is_valid(self):
Expand Down
60 changes: 25 additions & 35 deletions validation_widget.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,6 @@ def set_steps(self, steps):
def flags(self, index: qc.QModelIndex):
return qc.Qt.ItemFlag.ItemIsSelectable | qc.Qt.ItemFlag.ItemIsEnabled

def get_step(self, index):
return self.steps[index]

def get_current_step(self, index):
return self.steps[index]


class ValidationWidget(qw.QWidget):

Expand All @@ -204,6 +198,7 @@ def __init__(self, datalake: dl.DataLake, parent=None):
self.step_selection_list_view.setSelectionMode(
qw.QAbstractItemView.SelectionMode.SingleSelection
)

self.step_selection_list_view.selectionModel().currentChanged.connect(
self.update_step
)
Expand Down Expand Up @@ -242,6 +237,8 @@ def init_state(self):
self.validation_table_uuid = None
self.validation_name = None
self.validation_parquet_files = None
self.validation_sample_names = None
self.validation_gene_names = None

self.validate_button.setText(qc.QCoreApplication.tr("Valider le panier"))
self.return_to_validation_button.setText(
Expand All @@ -250,31 +247,13 @@ def init_state(self):

self.completed = False

def setup_finish(self):
if (
not self.validation_name
or not self.validation_parquet_files
or not self.method
or not self.datalake
):
return
self.validate_button.setText(qc.QCoreApplication.tr("Exporter vers Genno"))

last_step_definition = self.method["final"]

self.query.mute().set_readonly_table(
self.validation_parquet_files
).set_editable_table_name(
self.validation_table_uuid
).unmute().generate_query_template_from_json(
last_step_definition["query"]
)

def validate(self):
conn = self.datalake.get_database("validation")
try:
finish_validation(conn, self.validation_table_uuid)
self.completed = True
step_definition = self.method["final"]["query"]
self.setup_step(step_definition)
except Exception as e:
print(e)
finally:
Expand Down Expand Up @@ -338,26 +317,28 @@ def set_method_path(self, method_path: Path):

def update_step(self, index: qc.QModelIndex):
self.current_step_id = index.row()
self.setup_step()
step_definition = index.data(qc.Qt.ItemDataRole.UserRole)
self.setup_step(step_definition)

def setup_step(self):
def setup_step(self, step_definition: dict):
"""Modifies the query to match the current step definition."""
if (
not self.validation_name
or not self.validation_parquet_files
or not self.method
or not self.datalake
or not self.query
):
return

step_definition = self.method["steps"][self.current_step_id]

self.query.mute().set_readonly_table(
self.validation_parquet_files
).set_editable_table_name(
self.validation_table_uuid
).set_editable_table_name(self.validation_table_uuid).set_selected_genes(
self.validation_gene_names
).set_selected_samples(
self.validation_sample_names
).unmute().generate_query_template_from_json(
step_definition["query"]
step_definition
)

def start_validation(self, selected_validation: dict):
Expand All @@ -378,6 +359,8 @@ def start_validation(self, selected_validation: dict):
self.validation_name = selected_validation["validation_name"]
self.validation_parquet_files = selected_validation["parquet_files"]
self.validation_table_uuid = selected_validation["table_uuid"]
self.validation_sample_names = selected_validation["sample_names"]
self.validation_gene_names = selected_validation["gene_names"]

self.set_method_path(
Path(config_folder)
Expand All @@ -402,9 +385,16 @@ def start_validation(self, selected_validation: dict):
.to_dicts()[0]["completed"]
)
if self.completed:
self.setup_finish()
self.validate_button.setText(
qc.QCoreApplication.tr("Exporter vers Genno")
)
step_definition = self.method["final"]["query"]
self.setup_step(step_definition)
else:
self.setup_step()
step_definition = self.step_model.index(self.current_step_id).data(
qc.Qt.ItemDataRole.UserRole
)
self.setup_step(step_definition)
except IndexError:
print(self.validation_table_uuid)
finally:
Expand Down

0 comments on commit 8cf5475

Please sign in to comment.