Refactored remaining core tables (#166)

* Refactored remaining core tables * self review cleanup * tighening up medication, docref, encounter * unit tests, athena testing, PR feedback * jinja trim, remove write_queries, regression update * sqlfluff pass * black 2024 * pin black
smart-on-fhir · Jan 26, 2024 · 178295f · 178295f
1 parent e637633
commit 178295f
Show file tree

Hide file tree

Showing 96 changed files with 28,972 additions and 6,307 deletions.
diff --git a/.sqlfluffignore b/.sqlfluffignore
@@ -1,3 +1,13 @@
 /scratch/
 # This is a common destination for debugging sql generation
 output.sql
+
+# These files have an error which is related to getting a two level deep
+# date column, which is likely a Sqlfluff error
+documentreference.sql.jinja
+encounter.sql.jinja
+
+# This file has namespace collisions with vars in other sqlfluff templates.
+# Ignoring for now - could be addressed with an in-folder .sqlfluff config
+# or by a refactor of variable names
+count.sql.jinja
diff --git a/cumulus_library/.sqlfluff b/cumulus_library/.sqlfluff
@@ -29,26 +29,27 @@ code_system_tables = [{table_name":"hasarray","column_name":"acol","is_bare_codi
 column_name = 'bar'
 column_names = ['foo', 'bar']
 conditions = ["1 > 0", "1 < 2"]
+config = {"medication_datasources" : {"by_contained_ref" : True, "by_external_ref" : True}, 'has_userselected': False}
 count_ref = count_ref
 count_table = count_table
 dataset = [["foo","foo"],["bar","bar"]]
 dependent_variable = is_flu
 ext_systems = ["omb", "text"]
 field = 'column_name'
 filter_table = filter_table
-fhir_extension = fhir_extension
+filter_resource = True
+fhir_extension = condition
 fhir_resource = patient
 id = 'id'
 join_cols_by_table = { "join_table": { "join_id": "enc_ref","included_cols": [["a"], ["b", "c"]]}}
 join_id = subject_ref
-medication_datasources = {"by_contained_ref" : True, "by_external_ref" : True}
 neg_source_table = neg_source_table
 output_table_name = 'created_table'
 prefix = Test
 primary_ref = encounter_ref
 pos_source_table = pos_source_table
 schema_name = test_schema
-schema = {'condition': {'category': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'clinicalstatus': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'id': True, 'recordeddate': True, 'verificationstatus': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'subject': {'reference': True, 'display': False, 'type': True}, 'encounter': {'reference': True, 'display': False, 'type': True}}}
+schema = {'condition': {'category': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'clinicalstatus': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'id': True, 'recordeddate': True, 'verificationstatus': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'subject': {'reference': True, 'display': False, 'type': True}, 'encounter': {'reference': True, 'display': False, 'type': True}}, 'documentreference': {'id': True, 'type': True, 'status': True, 'docstatus': True, 'context': {'period': True, 'start': True}, 'subject': {'reference': True}}, 'encounter': {'status': True, 'period': {'start': True, 'end': False}, 'class': {'code': True, 'system': True, 'display': False, 'userSelected': True, 'version': True}, 'subject': {'reference': True, 'display': False, 'type': True}, 'id': True}, 'medicationrequest': {'id': True, 'status': True, 'intent': True, 'authoredon': True, 'category': {'code': True, 'system': True, 'display': False}, 'subject': {'reference': True}}, 'observation': {'id': True, 'category': {'coding': True, 'code': True, 'display': True, 'system': True, 'text': True}, 'status': True, 'code': {'coding': True, 'code': True, 'display': True, 'system': True, 'text': True}, 'interpretation': {'coding': True, 'code': True, 'display': True, 'system': True, 'text': True}, 'referencerange': {'low': False, 'high': False, 'normalvalue': False, 'type': False, 'appliesto': False, 'age': False, 'text': True}, 'effectivedatetime': True, 'valuequantity': {'value': True, 'comparator': False, 'unit': False, 'system': False, 'code': False}, 'valuecodeableconcept': {'coding': True, 'code': True, 'display': True, 'system': True}, 'subject': {'reference': True}, 'encounter': {'reference': True}}, 'patient': {'id': True, 'gender': True, 'address': True, 'birthdate': True}}
 source_table = source_table
 source_id = source_id
 table_cols = ["a","b"]

diff --git a/cumulus_library/__init__.py b/cumulus_library/__init__.py
@@ -1,2 +1,3 @@
 """Package metadata"""
+
 __version__ = "2.0.0"
diff --git a/cumulus_library/base_table_builder.py b/cumulus_library/base_table_builder.py
@@ -1,4 +1,5 @@
 """ abstract base for python-based study executors """
+
 import re
 import sys
 
@@ -57,8 +58,16 @@ def execute_queries(
                 # Get the first non-whitespace word after create table
                 table_name = re.search(
                     '(?i)(?<=create table )(([a-zA-Z0-9_".-]+))', query
-                )  # [0]
+                )
+
                 if table_name:
+                    if table_name[0] == "IF":
+                        # Edge case - if we're doing an empty conditional CTAS creation,
+                        # we need to run a slightly different regex
+                        table_name = re.search(
+                            '(?i)(?<=not exists )(([a-zA-Z0-9_".-]+))', query
+                        )
+
                     table_name = table_name[0]
                     # if it contains a schema, remove it (usually it won't, but some CTAS
                     # forms may)

diff --git a/cumulus_library/cli_parser.py b/cumulus_library/cli_parser.py
@@ -1,4 +1,5 @@
 """Manages configuration for argparse"""
+
 import argparse
 
 

diff --git a/cumulus_library/enums.py b/cumulus_library/enums.py
@@ -1,4 +1,5 @@
 """ Holds enums used across more than one module """
+
 from enum import Enum
 
 

diff --git a/cumulus_library/helper.py b/cumulus_library/helper.py
@@ -1,4 +1,5 @@
 """ Collection of small commonly used utility functions """
+
 import datetime
 import os
 import json

diff --git a/cumulus_library/protected_table_builder.py b/cumulus_library/protected_table_builder.py
@@ -1,4 +1,5 @@
 """ Builder for creating tables for tracking state/logging changes"""
+
 from cumulus_library.base_table_builder import BaseTableBuilder
 from cumulus_library.enums import ProtectedTables
 from cumulus_library.template_sql.templates import (

diff --git a/cumulus_library/statistics/counts.py b/cumulus_library/statistics/counts.py
@@ -1,4 +1,5 @@
 """Class for generating counts tables from templates"""
+
 import sys
 
 from pathlib import Path
@@ -122,15 +123,15 @@ def count_condition(
             filter_resource="encounter",
         )
 
-    def count_document(
+    def count_documentreference(
         self,
         table_name: str,
         source_table: str,
         table_cols: list,
         where_clauses: Union[list, None] = None,
         min_subject: int = 10,
     ) -> str:
-        """wrapper method for constructing document counts tables
+        """wrapper method for constructing documentreference counts tables
 
         :param table_name: The name of the table to create. Must start with study prefix
         :param source_table: The table to create counts data from
@@ -145,7 +146,8 @@ def count_document(
             table_cols,
             where_clauses=where_clauses,
             min_subject=min_subject,
-            fhir_resource="document",
+            fhir_resource="documentreference",
+            filter_resource="encounter",
         )
 
     def count_encounter(
@@ -174,6 +176,32 @@ def count_encounter(
             fhir_resource="encounter",
         )
 
+    def count_medicationrequest(
+        self,
+        table_name: str,
+        source_table: str,
+        table_cols: list,
+        where_clauses: Union[list, None] = None,
+        min_subject: int = 10,
+    ) -> str:
+        """wrapper method for constructing medicationrequests counts tables
+
+        :param table_name: The name of the table to create. Must start with study prefix
+        :param source_table: The table to create counts data from
+        :param table_cols: The columns from the source table to add to the count table
+        :param where_clauses: An array of where clauses to use for filtering the data
+        :param min_subject: An integer setting the minimum bin size for inclusion
+            (default: 10)
+        """
+        return self.get_count_query(
+            table_name,
+            source_table,
+            table_cols,
+            where_clauses=where_clauses,
+            min_subject=min_subject,
+            fhir_resource="medicationrequest",
+        )
+
     def count_observation(
         self,
         table_name: str,

diff --git a/cumulus_library/studies/core/builder_condition.py b/cumulus_library/studies/core/builder_condition.py
@@ -1,7 +1,7 @@
 from cumulus_library import base_table_builder
 from cumulus_library import databases
 from cumulus_library.studies.core.core_templates import core_templates
-from cumulus_library.template_sql import templates
+from cumulus_library.template_sql import templates, utils
 
 
 expected_table_cols = {
@@ -42,8 +42,10 @@
 
 
 class CoreConditionBuilder(base_table_builder.BaseTableBuilder):
+    display_text = "Creating Condition tables..."
+
     def denormalize_codes(self):
-        preferred_config = templates.CodeableConceptConfig(
+        preferred_config = utils.CodeableConceptConfig(
             source_table="condition",
             source_id="id",
             column_name="code",
@@ -60,7 +62,7 @@ def denormalize_codes(self):
             templates.get_codeable_concept_denormalize_query(preferred_config)
         )
 
-        all_config = templates.CodeableConceptConfig(
+        all_config = utils.CodeableConceptConfig(
             source_table="condition",
             source_id="id",
             column_name="code",
@@ -72,14 +74,6 @@ def denormalize_codes(self):
             templates.get_codeable_concept_denormalize_query(all_config)
         )
 
-    def validate_schema(self, cursor: object, schema: str, expected_table_cols, parser):
-        validated_schema = {}
-        for table, cols in expected_table_cols.items():
-            query = templates.get_column_datatype_query(schema, table, cols.keys())
-            table_schema = cursor.execute(query).fetchall()
-            validated_schema[table] = parser.validate_table_schema(cols, table_schema)
-        return validated_schema
-
     def prepare_queries(
         self,
         cursor: object,
@@ -89,7 +83,7 @@ def prepare_queries(
         **kwargs,
     ):
         self.denormalize_codes()
-        validated_schema = self.validate_schema(
+        validated_schema = core_templates.validate_schema(
             cursor, schema, expected_table_cols, parser
         )
         self.queries.append(

diff --git a/cumulus_library/studies/core/builder_condition.sql b/cumulus_library/studies/core/builder_condition.sql
Original file line number	Diff line number	Diff line change
		@@ -1,4 +1,5 @@
		"""Manages configuration for argparse"""

		import argparse


Expand Down
Original file line number	Diff line number	Diff line change
		@@ -1,4 +1,5 @@
		""" Holds enums used across more than one module """

		from enum import Enum


Expand Down