Skip to content

Commit

Permalink
Refactored remaining core tables (#166)
Browse files Browse the repository at this point in the history
* Refactored remaining core tables

* self review cleanup

* tighening up medication, docref, encounter

* unit tests, athena testing, PR feedback

* jinja trim, remove write_queries, regression update

* sqlfluff pass

* black 2024

* pin black
  • Loading branch information
dogversioning authored Jan 26, 2024
1 parent e637633 commit 178295f
Show file tree
Hide file tree
Showing 96 changed files with 28,972 additions and 6,307 deletions.
10 changes: 10 additions & 0 deletions .sqlfluffignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
/scratch/
# This is a common destination for debugging sql generation
output.sql

# These files have an error which is related to getting a two level deep
# date column, which is likely a Sqlfluff error
documentreference.sql.jinja
encounter.sql.jinja

# This file has namespace collisions with vars in other sqlfluff templates.
# Ignoring for now - could be addressed with an in-folder .sqlfluff config
# or by a refactor of variable names
count.sql.jinja
7 changes: 4 additions & 3 deletions cumulus_library/.sqlfluff
Original file line number Diff line number Diff line change
Expand Up @@ -29,26 +29,27 @@ code_system_tables = [{table_name":"hasarray","column_name":"acol","is_bare_codi
column_name = 'bar'
column_names = ['foo', 'bar']
conditions = ["1 > 0", "1 < 2"]
config = {"medication_datasources" : {"by_contained_ref" : True, "by_external_ref" : True}, 'has_userselected': False}
count_ref = count_ref
count_table = count_table
dataset = [["foo","foo"],["bar","bar"]]
dependent_variable = is_flu
ext_systems = ["omb", "text"]
field = 'column_name'
filter_table = filter_table
fhir_extension = fhir_extension
filter_resource = True
fhir_extension = condition
fhir_resource = patient
id = 'id'
join_cols_by_table = { "join_table": { "join_id": "enc_ref","included_cols": [["a"], ["b", "c"]]}}
join_id = subject_ref
medication_datasources = {"by_contained_ref" : True, "by_external_ref" : True}
neg_source_table = neg_source_table
output_table_name = 'created_table'
prefix = Test
primary_ref = encounter_ref
pos_source_table = pos_source_table
schema_name = test_schema
schema = {'condition': {'category': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'clinicalstatus': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'id': True, 'recordeddate': True, 'verificationstatus': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'subject': {'reference': True, 'display': False, 'type': True}, 'encounter': {'reference': True, 'display': False, 'type': True}}}
schema = {'condition': {'category': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'clinicalstatus': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'id': True, 'recordeddate': True, 'verificationstatus': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'subject': {'reference': True, 'display': False, 'type': True}, 'encounter': {'reference': True, 'display': False, 'type': True}}, 'documentreference': {'id': True, 'type': True, 'status': True, 'docstatus': True, 'context': {'period': True, 'start': True}, 'subject': {'reference': True}}, 'encounter': {'status': True, 'period': {'start': True, 'end': False}, 'class': {'code': True, 'system': True, 'display': False, 'userSelected': True, 'version': True}, 'subject': {'reference': True, 'display': False, 'type': True}, 'id': True}, 'medicationrequest': {'id': True, 'status': True, 'intent': True, 'authoredon': True, 'category': {'code': True, 'system': True, 'display': False}, 'subject': {'reference': True}}, 'observation': {'id': True, 'category': {'coding': True, 'code': True, 'display': True, 'system': True, 'text': True}, 'status': True, 'code': {'coding': True, 'code': True, 'display': True, 'system': True, 'text': True}, 'interpretation': {'coding': True, 'code': True, 'display': True, 'system': True, 'text': True}, 'referencerange': {'low': False, 'high': False, 'normalvalue': False, 'type': False, 'appliesto': False, 'age': False, 'text': True}, 'effectivedatetime': True, 'valuequantity': {'value': True, 'comparator': False, 'unit': False, 'system': False, 'code': False}, 'valuecodeableconcept': {'coding': True, 'code': True, 'display': True, 'system': True}, 'subject': {'reference': True}, 'encounter': {'reference': True}}, 'patient': {'id': True, 'gender': True, 'address': True, 'birthdate': True}}
source_table = source_table
source_id = source_id
table_cols = ["a","b"]
Expand Down
1 change: 1 addition & 0 deletions cumulus_library/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
"""Package metadata"""

__version__ = "2.0.0"
11 changes: 10 additions & 1 deletion cumulus_library/base_table_builder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" abstract base for python-based study executors """

import re
import sys

Expand Down Expand Up @@ -57,8 +58,16 @@ def execute_queries(
# Get the first non-whitespace word after create table
table_name = re.search(
'(?i)(?<=create table )(([a-zA-Z0-9_".-]+))', query
) # [0]
)

if table_name:
if table_name[0] == "IF":
# Edge case - if we're doing an empty conditional CTAS creation,
# we need to run a slightly different regex
table_name = re.search(
'(?i)(?<=not exists )(([a-zA-Z0-9_".-]+))', query
)

table_name = table_name[0]
# if it contains a schema, remove it (usually it won't, but some CTAS
# forms may)
Expand Down
1 change: 1 addition & 0 deletions cumulus_library/cli_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Manages configuration for argparse"""

import argparse


Expand Down
1 change: 1 addition & 0 deletions cumulus_library/enums.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" Holds enums used across more than one module """

from enum import Enum


Expand Down
1 change: 1 addition & 0 deletions cumulus_library/helper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" Collection of small commonly used utility functions """

import datetime
import os
import json
Expand Down
1 change: 1 addition & 0 deletions cumulus_library/protected_table_builder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" Builder for creating tables for tracking state/logging changes"""

from cumulus_library.base_table_builder import BaseTableBuilder
from cumulus_library.enums import ProtectedTables
from cumulus_library.template_sql.templates import (
Expand Down
34 changes: 31 additions & 3 deletions cumulus_library/statistics/counts.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Class for generating counts tables from templates"""

import sys

from pathlib import Path
Expand Down Expand Up @@ -122,15 +123,15 @@ def count_condition(
filter_resource="encounter",
)

def count_document(
def count_documentreference(
self,
table_name: str,
source_table: str,
table_cols: list,
where_clauses: Union[list, None] = None,
min_subject: int = 10,
) -> str:
"""wrapper method for constructing document counts tables
"""wrapper method for constructing documentreference counts tables
:param table_name: The name of the table to create. Must start with study prefix
:param source_table: The table to create counts data from
Expand All @@ -145,7 +146,8 @@ def count_document(
table_cols,
where_clauses=where_clauses,
min_subject=min_subject,
fhir_resource="document",
fhir_resource="documentreference",
filter_resource="encounter",
)

def count_encounter(
Expand Down Expand Up @@ -174,6 +176,32 @@ def count_encounter(
fhir_resource="encounter",
)

def count_medicationrequest(
self,
table_name: str,
source_table: str,
table_cols: list,
where_clauses: Union[list, None] = None,
min_subject: int = 10,
) -> str:
"""wrapper method for constructing medicationrequests counts tables
:param table_name: The name of the table to create. Must start with study prefix
:param source_table: The table to create counts data from
:param table_cols: The columns from the source table to add to the count table
:param where_clauses: An array of where clauses to use for filtering the data
:param min_subject: An integer setting the minimum bin size for inclusion
(default: 10)
"""
return self.get_count_query(
table_name,
source_table,
table_cols,
where_clauses=where_clauses,
min_subject=min_subject,
fhir_resource="medicationrequest",
)

def count_observation(
self,
table_name: str,
Expand Down
18 changes: 6 additions & 12 deletions cumulus_library/studies/core/builder_condition.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from cumulus_library import base_table_builder
from cumulus_library import databases
from cumulus_library.studies.core.core_templates import core_templates
from cumulus_library.template_sql import templates
from cumulus_library.template_sql import templates, utils


expected_table_cols = {
Expand Down Expand Up @@ -42,8 +42,10 @@


class CoreConditionBuilder(base_table_builder.BaseTableBuilder):
display_text = "Creating Condition tables..."

def denormalize_codes(self):
preferred_config = templates.CodeableConceptConfig(
preferred_config = utils.CodeableConceptConfig(
source_table="condition",
source_id="id",
column_name="code",
Expand All @@ -60,7 +62,7 @@ def denormalize_codes(self):
templates.get_codeable_concept_denormalize_query(preferred_config)
)

all_config = templates.CodeableConceptConfig(
all_config = utils.CodeableConceptConfig(
source_table="condition",
source_id="id",
column_name="code",
Expand All @@ -72,14 +74,6 @@ def denormalize_codes(self):
templates.get_codeable_concept_denormalize_query(all_config)
)

def validate_schema(self, cursor: object, schema: str, expected_table_cols, parser):
validated_schema = {}
for table, cols in expected_table_cols.items():
query = templates.get_column_datatype_query(schema, table, cols.keys())
table_schema = cursor.execute(query).fetchall()
validated_schema[table] = parser.validate_table_schema(cols, table_schema)
return validated_schema

def prepare_queries(
self,
cursor: object,
Expand All @@ -89,7 +83,7 @@ def prepare_queries(
**kwargs,
):
self.denormalize_codes()
validated_schema = self.validate_schema(
validated_schema = core_templates.validate_schema(
cursor, schema, expected_table_cols, parser
)
self.queries.append(
Expand Down
176 changes: 0 additions & 176 deletions cumulus_library/studies/core/builder_condition.sql

This file was deleted.

Loading

0 comments on commit 178295f

Please sign in to comment.