From 025bdaefb9c0dd40d6362153a0ca36123df7f9d1 Mon Sep 17 00:00:00 2001 From: Michael Terry Date: Tue, 23 Apr 2024 09:23:27 -0400 Subject: [PATCH] Update for Library 2.0 (#22) * Add a smoke-test basic test suite * Update for cumulus_library 2.0 --- .gitignore | 1 + cumulus_library_hypertension/htn/counts.py | 4 +- cumulus_library_hypertension/htn/counts.sql | 2365 +++++++++++++---- .../htn/study_period.sql | 36 +- cumulus_library_hypertension/htn/table_bp.sql | 26 +- .../htn/table_comorbidity.sql | 6 +- cumulus_library_hypertension/htn/table_dx.sql | 12 +- pyproject.toml | 15 +- tests/__init__.py | 0 tests/data/basic/condition/0.ndjson | 2 + tests/data/basic/documentreference/0.ndjson | 1 + tests/data/basic/encounter/0.ndjson | 1 + tests/data/basic/expected_bp.csv | 2 + tests/data/basic/expected_comorbidity.csv | 2 + .../basic/expected_comorbidity_period.csv | 2 + tests/data/basic/expected_dx.csv | 2 + tests/data/basic/expected_dx_period.csv | 2 + tests/data/basic/expected_meta_version.csv | 2 + tests/data/basic/expected_prevalence.csv | 2 + tests/data/basic/expected_study_period.csv | 2 + tests/data/basic/medicationrequest/0.ndjson | 1 + tests/data/basic/observation/0.ndjson | 1 + tests/data/basic/patient/0.ndjson | 1 + tests/data/basic/procedure/0.ndjson | 11 + tests/test_basic.py | 73 + 25 files changed, 2060 insertions(+), 512 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/data/basic/condition/0.ndjson create mode 100644 tests/data/basic/documentreference/0.ndjson create mode 100644 tests/data/basic/encounter/0.ndjson create mode 100644 tests/data/basic/expected_bp.csv create mode 100644 tests/data/basic/expected_comorbidity.csv create mode 100644 tests/data/basic/expected_comorbidity_period.csv create mode 100644 tests/data/basic/expected_dx.csv create mode 100644 tests/data/basic/expected_dx_period.csv create mode 100644 tests/data/basic/expected_meta_version.csv create mode 100644 tests/data/basic/expected_prevalence.csv create mode 100644 tests/data/basic/expected_study_period.csv create mode 100644 tests/data/basic/medicationrequest/0.ndjson create mode 100644 tests/data/basic/observation/0.ndjson create mode 100644 tests/data/basic/patient/0.ndjson create mode 100644 tests/data/basic/procedure/0.ndjson create mode 100644 tests/test_basic.py diff --git a/.gitignore b/.gitignore index fd59499..ec11fc6 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ data_export/ .python-version .DS_Store cumulus_library_columns.json +/.idea/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/cumulus_library_hypertension/htn/counts.py b/cumulus_library_hypertension/htn/counts.py index c2c053b..580391b 100644 --- a/cumulus_library_hypertension/htn/counts.py +++ b/cumulus_library_hypertension/htn/counts.py @@ -1,5 +1,5 @@ from pathlib import Path -from cumulus_library.schema.counts import CountsBuilder +from cumulus_library.statistics.counts import CountsBuilder class HtnCountsBuilder(CountsBuilder): display_text = "Creating htn counts..." @@ -137,7 +137,7 @@ def count_procedure(self, duration=None): return self.count_encounter(view_name, from_table, cols) - def prepare_queries(self, cursor=None, schema=None): + def prepare_queries(self, *args, **kwargs): self.queries =[ self.count_study_period(), self.count_study_period('month'), diff --git a/cumulus_library_hypertension/htn/counts.sql b/cumulus_library_hypertension/htn/counts.sql index 800fc52..bd5ed3f 100644 --- a/cumulus_library_hypertension/htn/counts.sql +++ b/cumulus_library_hypertension/htn/counts.sql @@ -1,48 +1,204 @@ -- noqa: disable=all - CREATE TABLE htn__count_study_period AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + --noqa: disable=RF03, AL02 + s."enc_class_display", + s."enc_type_display", + s."age_at_visit", + s."gender", + s."race_display", + s."ethnicity_display" + --noqa: enable=RF03, AL02 + FROM htn__study_period AS s + WHERE s.status = 'finished' + ), + + null_replacement AS ( + SELECT + subject_ref, + encounter_ref, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display, + coalesce( + cast(enc_type_display AS varchar), + 'cumulus__none' + ) AS enc_type_display, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__none' + ) AS age_at_visit, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display + FROM filtered_table + ), + secondary_powerset AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, - count(DISTINCT encounter_ref) AS cnt_encounter, + count(DISTINCT encounter_ref) AS cnt_encounter_ref, + "enc_class_display", + "enc_type_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + concat_ws( + '-', + COALESCE("enc_class_display",''), + COALESCE("enc_type_display",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",'') + ) AS id + FROM null_replacement + GROUP BY + cube( "enc_class_display", "enc_type_display", "age_at_visit", "gender", "race_display", "ethnicity_display" - FROM htn__study_period + ) + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, + "enc_class_display", + "enc_type_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + concat_ws( + '-', + COALESCE("enc_class_display",''), + COALESCE("enc_type_display",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",'') + ) AS id + FROM null_replacement GROUP BY cube( - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display" + "enc_class_display", + "enc_type_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display" ) ) SELECT - cnt_encounter AS cnt, - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display" - FROM powerset + s.cnt_encounter_ref AS cnt, + p."enc_class_display", + p."enc_type_display", + p."age_at_visit", + p."gender", + p."race_display", + p."ethnicity_display" + FROM powerset AS p + JOIN secondary_powerset AS s on s.id = p.id WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_study_period_month AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + --noqa: disable=RF03, AL02 + s."enc_class_display", + s."enc_type_display", + s."age_at_visit", + s."gender", + s."race_display", + s."ethnicity_display", + s."start_month" + --noqa: enable=RF03, AL02 + FROM htn__study_period AS s + WHERE s.status = 'finished' + ), + + null_replacement AS ( + SELECT + subject_ref, + encounter_ref, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display, + coalesce( + cast(enc_type_display AS varchar), + 'cumulus__none' + ) AS enc_type_display, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__none' + ) AS age_at_visit, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display, + coalesce( + cast(start_month AS varchar), + 'cumulus__none' + ) AS start_month + FROM filtered_table + ), + secondary_powerset AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, - count(DISTINCT encounter_ref) AS cnt_encounter, + count(DISTINCT encounter_ref) AS cnt_encounter_ref, + "enc_class_display", + "enc_type_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + "start_month", + concat_ws( + '-', + COALESCE("enc_class_display",''), + COALESCE("enc_type_display",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",''), + COALESCE("start_month",'') + ) AS id + FROM null_replacement + GROUP BY + cube( "enc_class_display", "enc_type_display", "age_at_visit", @@ -50,40 +206,135 @@ CREATE TABLE htn__count_study_period_month AS ( "race_display", "ethnicity_display", "start_month" - FROM htn__study_period + ) + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, + "enc_class_display", + "enc_type_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + "start_month", + concat_ws( + '-', + COALESCE("enc_class_display",''), + COALESCE("enc_type_display",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",''), + COALESCE("start_month",'') + ) AS id + FROM null_replacement GROUP BY cube( - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "start_month" + "enc_class_display", + "enc_type_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + "start_month" ) ) SELECT - cnt_encounter AS cnt, - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "start_month" - FROM powerset + s.cnt_encounter_ref AS cnt, + p."enc_class_display", + p."enc_type_display", + p."age_at_visit", + p."gender", + p."race_display", + p."ethnicity_display", + p."start_month" + FROM powerset AS p + JOIN secondary_powerset AS s on s.id = p.id WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_study_period_week AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + --noqa: disable=RF03, AL02 + s."enc_class_display", + s."enc_type_display", + s."age_at_visit", + s."gender", + s."race_display", + s."ethnicity_display", + s."start_week" + --noqa: enable=RF03, AL02 + FROM htn__study_period AS s + WHERE s.status = 'finished' + ), + + null_replacement AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, - count(DISTINCT encounter_ref) AS cnt_encounter, + subject_ref, + encounter_ref, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display, + coalesce( + cast(enc_type_display AS varchar), + 'cumulus__none' + ) AS enc_type_display, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__none' + ) AS age_at_visit, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display, + coalesce( + cast(start_week AS varchar), + 'cumulus__none' + ) AS start_week + FROM filtered_table + ), + secondary_powerset AS ( + SELECT + count(DISTINCT encounter_ref) AS cnt_encounter_ref, + "enc_class_display", + "enc_type_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + "start_week", + concat_ws( + '-', + COALESCE("enc_class_display",''), + COALESCE("enc_type_display",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",''), + COALESCE("start_week",'') + ) AS id + FROM null_replacement + GROUP BY + cube( "enc_class_display", "enc_type_display", "age_at_visit", @@ -91,39 +342,118 @@ CREATE TABLE htn__count_study_period_week AS ( "race_display", "ethnicity_display", "start_week" - FROM htn__study_period + ) + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, + "enc_class_display", + "enc_type_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + "start_week", + concat_ws( + '-', + COALESCE("enc_class_display",''), + COALESCE("enc_type_display",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",''), + COALESCE("start_week",'') + ) AS id + FROM null_replacement GROUP BY cube( - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "start_week" + "enc_class_display", + "enc_type_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + "start_week" ) ) SELECT - cnt_encounter AS cnt, - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "start_week" - FROM powerset + s.cnt_encounter_ref AS cnt, + p."enc_class_display", + p."enc_type_display", + p."age_at_visit", + p."gender", + p."race_display", + p."ethnicity_display", + p."start_week" + FROM powerset AS p + JOIN secondary_powerset AS s on s.id = p.id WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_bp AS ( - WITH powerset AS ( + WITH + filtered_table AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + s.subject_ref, + --noqa: disable=RF03, AL02 + s."hypertension", + s."hypotension", + s."enc_class_display", + s."enc_type_display", + s."age_at_visit", + s."gender", + s."race_display", + s."ethnicity_display" + --noqa: enable=RF03, AL02 + FROM htn__bp AS s + ), + + null_replacement AS ( + SELECT + subject_ref, + coalesce( + cast(hypertension AS varchar), + 'cumulus__none' + ) AS hypertension, + coalesce( + cast(hypotension AS varchar), + 'cumulus__none' + ) AS hypotension, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display, + coalesce( + cast(enc_type_display AS varchar), + 'cumulus__none' + ) AS enc_type_display, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__none' + ) AS age_at_visit, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, "hypertension", "hypotension", "enc_class_display", @@ -131,42 +461,113 @@ CREATE TABLE htn__count_bp AS ( "age_at_visit", "gender", "race_display", - "ethnicity_display" - FROM htn__bp + "ethnicity_display", + concat_ws( + '-', + COALESCE("hypertension",''), + COALESCE("hypotension",''), + COALESCE("enc_class_display",''), + COALESCE("enc_type_display",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",'') + ) AS id + FROM null_replacement GROUP BY cube( - "hypertension", - "hypotension", - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display" + "hypertension", + "hypotension", + "enc_class_display", + "enc_type_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display" ) ) SELECT - cnt_subject AS cnt, - "hypertension", - "hypotension", - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display" - FROM powerset + p.cnt_subject_ref AS cnt, + p."hypertension", + p."hypotension", + p."enc_class_display", + p."enc_type_display", + p."age_at_visit", + p."gender", + p."race_display", + p."ethnicity_display" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_bp_month AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + --noqa: disable=RF03, AL02 + s."hypertension", + s."hypotension", + s."enc_class_display", + s."enc_type_display", + s."age_at_visit", + s."gender", + s."race_display", + s."ethnicity_display", + s."obs_month" + --noqa: enable=RF03, AL02 + FROM htn__bp AS s + ), + + null_replacement AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + subject_ref, + coalesce( + cast(hypertension AS varchar), + 'cumulus__none' + ) AS hypertension, + coalesce( + cast(hypotension AS varchar), + 'cumulus__none' + ) AS hypotension, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display, + coalesce( + cast(enc_type_display AS varchar), + 'cumulus__none' + ) AS enc_type_display, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__none' + ) AS age_at_visit, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display, + coalesce( + cast(obs_month AS varchar), + 'cumulus__none' + ) AS obs_month + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, "hypertension", "hypotension", "enc_class_display", @@ -175,44 +576,116 @@ CREATE TABLE htn__count_bp_month AS ( "gender", "race_display", "ethnicity_display", - "obs_month" - FROM htn__bp + "obs_month", + concat_ws( + '-', + COALESCE("hypertension",''), + COALESCE("hypotension",''), + COALESCE("enc_class_display",''), + COALESCE("enc_type_display",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",''), + COALESCE("obs_month",'') + ) AS id + FROM null_replacement GROUP BY cube( - "hypertension", - "hypotension", - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "obs_month" + "hypertension", + "hypotension", + "enc_class_display", + "enc_type_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + "obs_month" ) ) SELECT - cnt_subject AS cnt, - "hypertension", - "hypotension", - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "obs_month" - FROM powerset + p.cnt_subject_ref AS cnt, + p."hypertension", + p."hypotension", + p."enc_class_display", + p."enc_type_display", + p."age_at_visit", + p."gender", + p."race_display", + p."ethnicity_display", + p."obs_month" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_bp_week AS ( - WITH powerset AS ( + WITH + filtered_table AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + s.subject_ref, + --noqa: disable=RF03, AL02 + s."hypertension", + s."hypotension", + s."enc_class_display", + s."enc_type_display", + s."age_at_visit", + s."gender", + s."race_display", + s."ethnicity_display", + s."obs_week" + --noqa: enable=RF03, AL02 + FROM htn__bp AS s + ), + + null_replacement AS ( + SELECT + subject_ref, + coalesce( + cast(hypertension AS varchar), + 'cumulus__none' + ) AS hypertension, + coalesce( + cast(hypotension AS varchar), + 'cumulus__none' + ) AS hypotension, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display, + coalesce( + cast(enc_type_display AS varchar), + 'cumulus__none' + ) AS enc_type_display, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__none' + ) AS age_at_visit, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display, + coalesce( + cast(obs_week AS varchar), + 'cumulus__none' + ) AS obs_week + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, "hypertension", "hypotension", "enc_class_display", @@ -221,44 +694,116 @@ CREATE TABLE htn__count_bp_week AS ( "gender", "race_display", "ethnicity_display", - "obs_week" - FROM htn__bp + "obs_week", + concat_ws( + '-', + COALESCE("hypertension",''), + COALESCE("hypotension",''), + COALESCE("enc_class_display",''), + COALESCE("enc_type_display",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",''), + COALESCE("obs_week",'') + ) AS id + FROM null_replacement GROUP BY cube( - "hypertension", - "hypotension", - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "obs_week" + "hypertension", + "hypotension", + "enc_class_display", + "enc_type_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + "obs_week" ) ) SELECT - cnt_subject AS cnt, - "hypertension", - "hypotension", - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "obs_week" - FROM powerset + p.cnt_subject_ref AS cnt, + p."hypertension", + p."hypotension", + p."enc_class_display", + p."enc_type_display", + p."age_at_visit", + p."gender", + p."race_display", + p."ethnicity_display", + p."obs_week" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_bp_date AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + --noqa: disable=RF03, AL02 + s."hypertension", + s."hypotension", + s."enc_class_display", + s."enc_type_display", + s."age_at_visit", + s."gender", + s."race_display", + s."ethnicity_display", + s."obs_date" + --noqa: enable=RF03, AL02 + FROM htn__bp AS s + ), + + null_replacement AS ( + SELECT + subject_ref, + coalesce( + cast(hypertension AS varchar), + 'cumulus__none' + ) AS hypertension, + coalesce( + cast(hypotension AS varchar), + 'cumulus__none' + ) AS hypotension, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display, + coalesce( + cast(enc_type_display AS varchar), + 'cumulus__none' + ) AS enc_type_display, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__none' + ) AS age_at_visit, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display, + coalesce( + cast(obs_date AS varchar), + 'cumulus__none' + ) AS obs_date + FROM filtered_table + ), + + powerset AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + count(DISTINCT subject_ref) AS cnt_subject_ref, "hypertension", "hypotension", "enc_class_display", @@ -267,137 +812,340 @@ CREATE TABLE htn__count_bp_date AS ( "gender", "race_display", "ethnicity_display", - "obs_date" - FROM htn__bp + "obs_date", + concat_ws( + '-', + COALESCE("hypertension",''), + COALESCE("hypotension",''), + COALESCE("enc_class_display",''), + COALESCE("enc_type_display",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",''), + COALESCE("obs_date",'') + ) AS id + FROM null_replacement GROUP BY cube( - "hypertension", - "hypotension", - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "obs_date" + "hypertension", + "hypotension", + "enc_class_display", + "enc_type_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + "obs_date" ) ) SELECT - cnt_subject AS cnt, - "hypertension", - "hypotension", - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "obs_date" - FROM powerset + p.cnt_subject_ref AS cnt, + p."hypertension", + p."hypotension", + p."enc_class_display", + p."enc_type_display", + p."age_at_visit", + p."gender", + p."race_display", + p."ethnicity_display", + p."obs_date" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_dx_month AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + --noqa: disable=RF03, AL02 + s."category_display", + s."cond_display", + s."cond_system_display", + s."cond_month" + --noqa: enable=RF03, AL02 + FROM htn__dx AS s + ), + + null_replacement AS ( + SELECT + subject_ref, + coalesce( + cast(category_display AS varchar), + 'cumulus__none' + ) AS category_display, + coalesce( + cast(cond_display AS varchar), + 'cumulus__none' + ) AS cond_display, + coalesce( + cast(cond_system_display AS varchar), + 'cumulus__none' + ) AS cond_system_display, + coalesce( + cast(cond_month AS varchar), + 'cumulus__none' + ) AS cond_month + FROM filtered_table + ), + + powerset AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + count(DISTINCT subject_ref) AS cnt_subject_ref, "category_display", "cond_display", "cond_system_display", - "cond_month" - FROM htn__dx + "cond_month", + concat_ws( + '-', + COALESCE("category_display",''), + COALESCE("cond_display",''), + COALESCE("cond_system_display",''), + COALESCE("cond_month",'') + ) AS id + FROM null_replacement GROUP BY cube( - "category_display", - "cond_display", - "cond_system_display", - "cond_month" + "category_display", + "cond_display", + "cond_system_display", + "cond_month" ) ) SELECT - cnt_subject AS cnt, - "category_display", - "cond_display", - "cond_system_display", - "cond_month" - FROM powerset + p.cnt_subject_ref AS cnt, + p."category_display", + p."cond_display", + p."cond_system_display", + p."cond_month" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_dx_week AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + --noqa: disable=RF03, AL02 + s."category_display", + s."cond_display", + s."cond_system_display", + s."cond_week" + --noqa: enable=RF03, AL02 + FROM htn__dx AS s + ), + + null_replacement AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + subject_ref, + coalesce( + cast(category_display AS varchar), + 'cumulus__none' + ) AS category_display, + coalesce( + cast(cond_display AS varchar), + 'cumulus__none' + ) AS cond_display, + coalesce( + cast(cond_system_display AS varchar), + 'cumulus__none' + ) AS cond_system_display, + coalesce( + cast(cond_week AS varchar), + 'cumulus__none' + ) AS cond_week + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, "category_display", "cond_display", "cond_system_display", - "cond_week" - FROM htn__dx + "cond_week", + concat_ws( + '-', + COALESCE("category_display",''), + COALESCE("cond_display",''), + COALESCE("cond_system_display",''), + COALESCE("cond_week",'') + ) AS id + FROM null_replacement GROUP BY cube( - "category_display", - "cond_display", - "cond_system_display", - "cond_week" + "category_display", + "cond_display", + "cond_system_display", + "cond_week" ) ) SELECT - cnt_subject AS cnt, - "category_display", - "cond_display", - "cond_system_display", - "cond_week" - FROM powerset + p.cnt_subject_ref AS cnt, + p."category_display", + p."cond_display", + p."cond_system_display", + p."cond_week" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_dx_date AS ( - WITH powerset AS ( + WITH + filtered_table AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + s.subject_ref, + --noqa: disable=RF03, AL02 + s."category_display", + s."cond_display", + s."cond_system_display", + s."cond_date" + --noqa: enable=RF03, AL02 + FROM htn__dx AS s + ), + + null_replacement AS ( + SELECT + subject_ref, + coalesce( + cast(category_display AS varchar), + 'cumulus__none' + ) AS category_display, + coalesce( + cast(cond_display AS varchar), + 'cumulus__none' + ) AS cond_display, + coalesce( + cast(cond_system_display AS varchar), + 'cumulus__none' + ) AS cond_system_display, + coalesce( + cast(cond_date AS varchar), + 'cumulus__none' + ) AS cond_date + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, "category_display", "cond_display", "cond_system_display", - "cond_date" - FROM htn__dx + "cond_date", + concat_ws( + '-', + COALESCE("category_display",''), + COALESCE("cond_display",''), + COALESCE("cond_system_display",''), + COALESCE("cond_date",'') + ) AS id + FROM null_replacement GROUP BY cube( - "category_display", - "cond_display", - "cond_system_display", - "cond_date" + "category_display", + "cond_display", + "cond_system_display", + "cond_date" ) ) SELECT - cnt_subject AS cnt, - "category_display", - "cond_display", - "cond_system_display", - "cond_date" - FROM powerset + p.cnt_subject_ref AS cnt, + p."category_display", + p."cond_display", + p."cond_system_display", + p."cond_date" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_dx_period_month AS ( - WITH powerset AS ( + WITH + filtered_table AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + s.subject_ref, + --noqa: disable=RF03, AL02 + s."category_display", + s."cond_display", + s."cond_system_display", + s."enc_class_display", + s."enc_type_display", + s."age_at_visit", + s."gender", + s."race_display", + s."ethnicity_display", + s."cond_month" + --noqa: enable=RF03, AL02 + FROM htn__dx_period AS s + ), + + null_replacement AS ( + SELECT + subject_ref, + coalesce( + cast(category_display AS varchar), + 'cumulus__none' + ) AS category_display, + coalesce( + cast(cond_display AS varchar), + 'cumulus__none' + ) AS cond_display, + coalesce( + cast(cond_system_display AS varchar), + 'cumulus__none' + ) AS cond_system_display, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display, + coalesce( + cast(enc_type_display AS varchar), + 'cumulus__none' + ) AS enc_type_display, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__none' + ) AS age_at_visit, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display, + coalesce( + cast(cond_month AS varchar), + 'cumulus__none' + ) AS cond_month + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, "category_display", "cond_display", "cond_system_display", @@ -407,46 +1155,124 @@ CREATE TABLE htn__count_dx_period_month AS ( "gender", "race_display", "ethnicity_display", - "cond_month" - FROM htn__dx_period + "cond_month", + concat_ws( + '-', + COALESCE("category_display",''), + COALESCE("cond_display",''), + COALESCE("cond_system_display",''), + COALESCE("enc_class_display",''), + COALESCE("enc_type_display",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",''), + COALESCE("cond_month",'') + ) AS id + FROM null_replacement GROUP BY cube( - "category_display", - "cond_display", - "cond_system_display", - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "cond_month" + "category_display", + "cond_display", + "cond_system_display", + "enc_class_display", + "enc_type_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + "cond_month" ) ) SELECT - cnt_subject AS cnt, - "category_display", - "cond_display", - "cond_system_display", - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "cond_month" - FROM powerset + p.cnt_subject_ref AS cnt, + p."category_display", + p."cond_display", + p."cond_system_display", + p."enc_class_display", + p."enc_type_display", + p."age_at_visit", + p."gender", + p."race_display", + p."ethnicity_display", + p."cond_month" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_dx_period_week AS ( - WITH powerset AS ( + WITH + filtered_table AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + s.subject_ref, + --noqa: disable=RF03, AL02 + s."category_display", + s."cond_display", + s."cond_system_display", + s."enc_class_display", + s."enc_type_display", + s."age_at_visit", + s."gender", + s."race_display", + s."ethnicity_display", + s."cond_week" + --noqa: enable=RF03, AL02 + FROM htn__dx_period AS s + ), + + null_replacement AS ( + SELECT + subject_ref, + coalesce( + cast(category_display AS varchar), + 'cumulus__none' + ) AS category_display, + coalesce( + cast(cond_display AS varchar), + 'cumulus__none' + ) AS cond_display, + coalesce( + cast(cond_system_display AS varchar), + 'cumulus__none' + ) AS cond_system_display, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display, + coalesce( + cast(enc_type_display AS varchar), + 'cumulus__none' + ) AS enc_type_display, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__none' + ) AS age_at_visit, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display, + coalesce( + cast(cond_week AS varchar), + 'cumulus__none' + ) AS cond_week + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, "category_display", "cond_display", "cond_system_display", @@ -456,46 +1282,124 @@ CREATE TABLE htn__count_dx_period_week AS ( "gender", "race_display", "ethnicity_display", - "cond_week" - FROM htn__dx_period + "cond_week", + concat_ws( + '-', + COALESCE("category_display",''), + COALESCE("cond_display",''), + COALESCE("cond_system_display",''), + COALESCE("enc_class_display",''), + COALESCE("enc_type_display",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",''), + COALESCE("cond_week",'') + ) AS id + FROM null_replacement GROUP BY cube( - "category_display", - "cond_display", - "cond_system_display", - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "cond_week" + "category_display", + "cond_display", + "cond_system_display", + "enc_class_display", + "enc_type_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + "cond_week" ) ) SELECT - cnt_subject AS cnt, - "category_display", - "cond_display", - "cond_system_display", - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "cond_week" - FROM powerset + p.cnt_subject_ref AS cnt, + p."category_display", + p."cond_display", + p."cond_system_display", + p."enc_class_display", + p."enc_type_display", + p."age_at_visit", + p."gender", + p."race_display", + p."ethnicity_display", + p."cond_week" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_dx_period_date AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + --noqa: disable=RF03, AL02 + s."category_display", + s."cond_display", + s."cond_system_display", + s."enc_class_display", + s."enc_type_display", + s."age_at_visit", + s."gender", + s."race_display", + s."ethnicity_display", + s."cond_date" + --noqa: enable=RF03, AL02 + FROM htn__dx_period AS s + ), + + null_replacement AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + subject_ref, + coalesce( + cast(category_display AS varchar), + 'cumulus__none' + ) AS category_display, + coalesce( + cast(cond_display AS varchar), + 'cumulus__none' + ) AS cond_display, + coalesce( + cast(cond_system_display AS varchar), + 'cumulus__none' + ) AS cond_system_display, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display, + coalesce( + cast(enc_type_display AS varchar), + 'cumulus__none' + ) AS enc_type_display, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__none' + ) AS age_at_visit, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display, + coalesce( + cast(cond_date AS varchar), + 'cumulus__none' + ) AS cond_date + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, "category_display", "cond_display", "cond_system_display", @@ -505,46 +1409,114 @@ CREATE TABLE htn__count_dx_period_date AS ( "gender", "race_display", "ethnicity_display", - "cond_date" - FROM htn__dx_period + "cond_date", + concat_ws( + '-', + COALESCE("category_display",''), + COALESCE("cond_display",''), + COALESCE("cond_system_display",''), + COALESCE("enc_class_display",''), + COALESCE("enc_type_display",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",''), + COALESCE("cond_date",'') + ) AS id + FROM null_replacement GROUP BY cube( - "category_display", - "cond_display", - "cond_system_display", - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "cond_date" + "category_display", + "cond_display", + "cond_system_display", + "enc_class_display", + "enc_type_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + "cond_date" ) ) SELECT - cnt_subject AS cnt, - "category_display", - "cond_display", - "cond_system_display", - "enc_class_display", - "enc_type_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "cond_date" - FROM powerset + p.cnt_subject_ref AS cnt, + p."category_display", + p."cond_display", + p."cond_system_display", + p."enc_class_display", + p."enc_type_display", + p."age_at_visit", + p."gender", + p."race_display", + p."ethnicity_display", + p."cond_date" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_prevalence_month AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + --noqa: disable=RF03, AL02 + s."hypertension", + s."hypertension_lab", + s."hypertension_dx", + s."age_at_visit", + s."gender", + s."race_display", + s."ethnicity_display", + s."start_month" + --noqa: enable=RF03, AL02 + FROM htn__prevalence AS s + ), + + null_replacement AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + subject_ref, + coalesce( + cast(hypertension AS varchar), + 'cumulus__none' + ) AS hypertension, + coalesce( + cast(hypertension_lab AS varchar), + 'cumulus__none' + ) AS hypertension_lab, + coalesce( + cast(hypertension_dx AS varchar), + 'cumulus__none' + ) AS hypertension_dx, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__none' + ) AS age_at_visit, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display, + coalesce( + cast(start_month AS varchar), + 'cumulus__none' + ) AS start_month + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, "hypertension", "hypertension_lab", "hypertension_dx", @@ -552,42 +1524,108 @@ CREATE TABLE htn__count_prevalence_month AS ( "gender", "race_display", "ethnicity_display", - "start_month" - FROM htn__prevalence + "start_month", + concat_ws( + '-', + COALESCE("hypertension",''), + COALESCE("hypertension_lab",''), + COALESCE("hypertension_dx",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",''), + COALESCE("start_month",'') + ) AS id + FROM null_replacement GROUP BY cube( - "hypertension", - "hypertension_lab", - "hypertension_dx", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "start_month" + "hypertension", + "hypertension_lab", + "hypertension_dx", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + "start_month" ) ) SELECT - cnt_subject AS cnt, - "hypertension", - "hypertension_lab", - "hypertension_dx", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "start_month" - FROM powerset + p.cnt_subject_ref AS cnt, + p."hypertension", + p."hypertension_lab", + p."hypertension_dx", + p."age_at_visit", + p."gender", + p."race_display", + p."ethnicity_display", + p."start_month" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_prevalence_week AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + --noqa: disable=RF03, AL02 + s."hypertension", + s."hypertension_lab", + s."hypertension_dx", + s."age_at_visit", + s."gender", + s."race_display", + s."ethnicity_display", + s."start_week" + --noqa: enable=RF03, AL02 + FROM htn__prevalence AS s + ), + + null_replacement AS ( + SELECT + subject_ref, + coalesce( + cast(hypertension AS varchar), + 'cumulus__none' + ) AS hypertension, + coalesce( + cast(hypertension_lab AS varchar), + 'cumulus__none' + ) AS hypertension_lab, + coalesce( + cast(hypertension_dx AS varchar), + 'cumulus__none' + ) AS hypertension_dx, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__none' + ) AS age_at_visit, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display, + coalesce( + cast(start_week AS varchar), + 'cumulus__none' + ) AS start_week + FROM filtered_table + ), + + powerset AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + count(DISTINCT subject_ref) AS cnt_subject_ref, "hypertension", "hypertension_lab", "hypertension_dx", @@ -595,159 +1633,399 @@ CREATE TABLE htn__count_prevalence_week AS ( "gender", "race_display", "ethnicity_display", - "start_week" - FROM htn__prevalence + "start_week", + concat_ws( + '-', + COALESCE("hypertension",''), + COALESCE("hypertension_lab",''), + COALESCE("hypertension_dx",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",''), + COALESCE("start_week",'') + ) AS id + FROM null_replacement GROUP BY cube( - "hypertension", - "hypertension_lab", - "hypertension_dx", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "start_week" + "hypertension", + "hypertension_lab", + "hypertension_dx", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + "start_week" ) ) SELECT - cnt_subject AS cnt, - "hypertension", - "hypertension_lab", - "hypertension_dx", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "start_week" - FROM powerset + p.cnt_subject_ref AS cnt, + p."hypertension", + p."hypertension_lab", + p."hypertension_dx", + p."age_at_visit", + p."gender", + p."race_display", + p."ethnicity_display", + p."start_week" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_comorbidity AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + --noqa: disable=RF03, AL02 + s."comorbidity_category_display", + s."comorbidity_system_display", + s."comorbidity_display", + s."gender", + s."race_display", + s."ethnicity_display" + --noqa: enable=RF03, AL02 + FROM htn__comorbidity AS s + ), + + null_replacement AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + subject_ref, + coalesce( + cast(comorbidity_category_display AS varchar), + 'cumulus__none' + ) AS comorbidity_category_display, + coalesce( + cast(comorbidity_system_display AS varchar), + 'cumulus__none' + ) AS comorbidity_system_display, + coalesce( + cast(comorbidity_display AS varchar), + 'cumulus__none' + ) AS comorbidity_display, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, "comorbidity_category_display", "comorbidity_system_display", "comorbidity_display", "gender", "race_display", - "ethnicity_display" - FROM htn__comorbidity + "ethnicity_display", + concat_ws( + '-', + COALESCE("comorbidity_category_display",''), + COALESCE("comorbidity_system_display",''), + COALESCE("comorbidity_display",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",'') + ) AS id + FROM null_replacement GROUP BY cube( - "comorbidity_category_display", - "comorbidity_system_display", - "comorbidity_display", - "gender", - "race_display", - "ethnicity_display" + "comorbidity_category_display", + "comorbidity_system_display", + "comorbidity_display", + "gender", + "race_display", + "ethnicity_display" ) ) SELECT - cnt_subject AS cnt, - "comorbidity_category_display", - "comorbidity_system_display", - "comorbidity_display", - "gender", - "race_display", - "ethnicity_display" - FROM powerset + p.cnt_subject_ref AS cnt, + p."comorbidity_category_display", + p."comorbidity_system_display", + p."comorbidity_display", + p."gender", + p."race_display", + p."ethnicity_display" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_comorbidity_month AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + --noqa: disable=RF03, AL02 + s."comorbidity_category_display", + s."comorbidity_system_display", + s."comorbidity_display", + s."gender", + s."race_display", + s."ethnicity_display", + s."comorbidity_month" + --noqa: enable=RF03, AL02 + FROM htn__comorbidity AS s + ), + + null_replacement AS ( + SELECT + subject_ref, + coalesce( + cast(comorbidity_category_display AS varchar), + 'cumulus__none' + ) AS comorbidity_category_display, + coalesce( + cast(comorbidity_system_display AS varchar), + 'cumulus__none' + ) AS comorbidity_system_display, + coalesce( + cast(comorbidity_display AS varchar), + 'cumulus__none' + ) AS comorbidity_display, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display, + coalesce( + cast(comorbidity_month AS varchar), + 'cumulus__none' + ) AS comorbidity_month + FROM filtered_table + ), + + powerset AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + count(DISTINCT subject_ref) AS cnt_subject_ref, "comorbidity_category_display", "comorbidity_system_display", "comorbidity_display", "gender", "race_display", "ethnicity_display", - "comorbidity_month" - FROM htn__comorbidity + "comorbidity_month", + concat_ws( + '-', + COALESCE("comorbidity_category_display",''), + COALESCE("comorbidity_system_display",''), + COALESCE("comorbidity_display",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",''), + COALESCE("comorbidity_month",'') + ) AS id + FROM null_replacement GROUP BY cube( - "comorbidity_category_display", - "comorbidity_system_display", - "comorbidity_display", - "gender", - "race_display", - "ethnicity_display", - "comorbidity_month" + "comorbidity_category_display", + "comorbidity_system_display", + "comorbidity_display", + "gender", + "race_display", + "ethnicity_display", + "comorbidity_month" ) ) SELECT - cnt_subject AS cnt, - "comorbidity_category_display", - "comorbidity_system_display", - "comorbidity_display", - "gender", - "race_display", - "ethnicity_display", - "comorbidity_month" - FROM powerset + p.cnt_subject_ref AS cnt, + p."comorbidity_category_display", + p."comorbidity_system_display", + p."comorbidity_display", + p."gender", + p."race_display", + p."ethnicity_display", + p."comorbidity_month" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_comorbidity_week AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + --noqa: disable=RF03, AL02 + s."comorbidity_category_display", + s."comorbidity_system_display", + s."comorbidity_display", + s."gender", + s."race_display", + s."ethnicity_display", + s."comorbidity_week" + --noqa: enable=RF03, AL02 + FROM htn__comorbidity AS s + ), + + null_replacement AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + subject_ref, + coalesce( + cast(comorbidity_category_display AS varchar), + 'cumulus__none' + ) AS comorbidity_category_display, + coalesce( + cast(comorbidity_system_display AS varchar), + 'cumulus__none' + ) AS comorbidity_system_display, + coalesce( + cast(comorbidity_display AS varchar), + 'cumulus__none' + ) AS comorbidity_display, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display, + coalesce( + cast(comorbidity_week AS varchar), + 'cumulus__none' + ) AS comorbidity_week + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, "comorbidity_category_display", "comorbidity_system_display", "comorbidity_display", "gender", "race_display", "ethnicity_display", - "comorbidity_week" - FROM htn__comorbidity + "comorbidity_week", + concat_ws( + '-', + COALESCE("comorbidity_category_display",''), + COALESCE("comorbidity_system_display",''), + COALESCE("comorbidity_display",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",''), + COALESCE("comorbidity_week",'') + ) AS id + FROM null_replacement GROUP BY cube( - "comorbidity_category_display", - "comorbidity_system_display", - "comorbidity_display", - "gender", - "race_display", - "ethnicity_display", - "comorbidity_week" + "comorbidity_category_display", + "comorbidity_system_display", + "comorbidity_display", + "gender", + "race_display", + "ethnicity_display", + "comorbidity_week" ) ) SELECT - cnt_subject AS cnt, - "comorbidity_category_display", - "comorbidity_system_display", - "comorbidity_display", - "gender", - "race_display", - "ethnicity_display", - "comorbidity_week" - FROM powerset + p.cnt_subject_ref AS cnt, + p."comorbidity_category_display", + p."comorbidity_system_display", + p."comorbidity_display", + p."gender", + p."race_display", + p."ethnicity_display", + p."comorbidity_week" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_comorbidity_period AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + --noqa: disable=RF03, AL02 + s."comorbidity_category_display", + s."comorbidity_system_display", + s."comorbidity_display", + s."enc_class_display", + s."age_at_visit", + s."gender", + s."race_display", + s."ethnicity_display" + --noqa: enable=RF03, AL02 + FROM htn__comorbidity_period AS s + ), + + null_replacement AS ( + SELECT + subject_ref, + coalesce( + cast(comorbidity_category_display AS varchar), + 'cumulus__none' + ) AS comorbidity_category_display, + coalesce( + cast(comorbidity_system_display AS varchar), + 'cumulus__none' + ) AS comorbidity_system_display, + coalesce( + cast(comorbidity_display AS varchar), + 'cumulus__none' + ) AS comorbidity_display, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__none' + ) AS age_at_visit, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display + FROM filtered_table + ), + + powerset AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + count(DISTINCT subject_ref) AS cnt_subject_ref, "comorbidity_category_display", "comorbidity_system_display", "comorbidity_display", @@ -755,42 +2033,113 @@ CREATE TABLE htn__count_comorbidity_period AS ( "age_at_visit", "gender", "race_display", - "ethnicity_display" - FROM htn__comorbidity_period + "ethnicity_display", + concat_ws( + '-', + COALESCE("comorbidity_category_display",''), + COALESCE("comorbidity_system_display",''), + COALESCE("comorbidity_display",''), + COALESCE("enc_class_display",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",'') + ) AS id + FROM null_replacement GROUP BY cube( - "comorbidity_category_display", - "comorbidity_system_display", - "comorbidity_display", - "enc_class_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display" + "comorbidity_category_display", + "comorbidity_system_display", + "comorbidity_display", + "enc_class_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display" ) ) SELECT - cnt_subject AS cnt, - "comorbidity_category_display", - "comorbidity_system_display", - "comorbidity_display", - "enc_class_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display" - FROM powerset + p.cnt_subject_ref AS cnt, + p."comorbidity_category_display", + p."comorbidity_system_display", + p."comorbidity_display", + p."enc_class_display", + p."age_at_visit", + p."gender", + p."race_display", + p."ethnicity_display" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_comorbidity_period_month AS ( - WITH powerset AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + --noqa: disable=RF03, AL02 + s."comorbidity_category_display", + s."comorbidity_system_display", + s."comorbidity_display", + s."enc_class_display", + s."age_at_visit", + s."gender", + s."race_display", + s."ethnicity_display", + s."start_month" + --noqa: enable=RF03, AL02 + FROM htn__comorbidity_period AS s + ), + + null_replacement AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + subject_ref, + coalesce( + cast(comorbidity_category_display AS varchar), + 'cumulus__none' + ) AS comorbidity_category_display, + coalesce( + cast(comorbidity_system_display AS varchar), + 'cumulus__none' + ) AS comorbidity_system_display, + coalesce( + cast(comorbidity_display AS varchar), + 'cumulus__none' + ) AS comorbidity_display, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__none' + ) AS age_at_visit, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display, + coalesce( + cast(start_month AS varchar), + 'cumulus__none' + ) AS start_month + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, "comorbidity_category_display", "comorbidity_system_display", "comorbidity_display", @@ -799,44 +2148,116 @@ CREATE TABLE htn__count_comorbidity_period_month AS ( "gender", "race_display", "ethnicity_display", - "start_month" - FROM htn__comorbidity_period + "start_month", + concat_ws( + '-', + COALESCE("comorbidity_category_display",''), + COALESCE("comorbidity_system_display",''), + COALESCE("comorbidity_display",''), + COALESCE("enc_class_display",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",''), + COALESCE("start_month",'') + ) AS id + FROM null_replacement GROUP BY cube( - "comorbidity_category_display", - "comorbidity_system_display", - "comorbidity_display", - "enc_class_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "start_month" + "comorbidity_category_display", + "comorbidity_system_display", + "comorbidity_display", + "enc_class_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + "start_month" ) ) SELECT - cnt_subject AS cnt, - "comorbidity_category_display", - "comorbidity_system_display", - "comorbidity_display", - "enc_class_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "start_month" - FROM powerset + p.cnt_subject_ref AS cnt, + p."comorbidity_category_display", + p."comorbidity_system_display", + p."comorbidity_display", + p."enc_class_display", + p."age_at_visit", + p."gender", + p."race_display", + p."ethnicity_display", + p."start_month" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); -- ########################################################### CREATE TABLE htn__count_comorbidity_period_week AS ( - WITH powerset AS ( + WITH + filtered_table AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject, + s.subject_ref, + --noqa: disable=RF03, AL02 + s."comorbidity_category_display", + s."comorbidity_system_display", + s."comorbidity_display", + s."enc_class_display", + s."age_at_visit", + s."gender", + s."race_display", + s."ethnicity_display", + s."start_week" + --noqa: enable=RF03, AL02 + FROM htn__comorbidity_period AS s + ), + + null_replacement AS ( + SELECT + subject_ref, + coalesce( + cast(comorbidity_category_display AS varchar), + 'cumulus__none' + ) AS comorbidity_category_display, + coalesce( + cast(comorbidity_system_display AS varchar), + 'cumulus__none' + ) AS comorbidity_system_display, + coalesce( + cast(comorbidity_display AS varchar), + 'cumulus__none' + ) AS comorbidity_display, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__none' + ) AS enc_class_display, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__none' + ) AS age_at_visit, + coalesce( + cast(gender AS varchar), + 'cumulus__none' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__none' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__none' + ) AS ethnicity_display, + coalesce( + cast(start_week AS varchar), + 'cumulus__none' + ) AS start_week + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject_ref, "comorbidity_category_display", "comorbidity_system_display", "comorbidity_display", @@ -845,34 +2266,46 @@ CREATE TABLE htn__count_comorbidity_period_week AS ( "gender", "race_display", "ethnicity_display", - "start_week" - FROM htn__comorbidity_period + "start_week", + concat_ws( + '-', + COALESCE("comorbidity_category_display",''), + COALESCE("comorbidity_system_display",''), + COALESCE("comorbidity_display",''), + COALESCE("enc_class_display",''), + COALESCE("age_at_visit",''), + COALESCE("gender",''), + COALESCE("race_display",''), + COALESCE("ethnicity_display",''), + COALESCE("start_week",'') + ) AS id + FROM null_replacement GROUP BY cube( - "comorbidity_category_display", - "comorbidity_system_display", - "comorbidity_display", - "enc_class_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "start_week" + "comorbidity_category_display", + "comorbidity_system_display", + "comorbidity_display", + "enc_class_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display", + "start_week" ) ) SELECT - cnt_subject AS cnt, - "comorbidity_category_display", - "comorbidity_system_display", - "comorbidity_display", - "enc_class_display", - "age_at_visit", - "gender", - "race_display", - "ethnicity_display", - "start_week" - FROM powerset + p.cnt_subject_ref AS cnt, + p."comorbidity_category_display", + p."comorbidity_system_display", + p."comorbidity_display", + p."enc_class_display", + p."age_at_visit", + p."gender", + p."race_display", + p."ethnicity_display", + p."start_week" + FROM powerset AS p WHERE - cnt_subject >= 10 + cnt_subject_ref >= 10 ); diff --git a/cumulus_library_hypertension/htn/study_period.sql b/cumulus_library_hypertension/htn/study_period.sql index 8db0f7f..1113ef9 100644 --- a/cumulus_library_hypertension/htn/study_period.sql +++ b/cumulus_library_hypertension/htn/study_period.sql @@ -1,32 +1,32 @@ CREATE table htn__study_period AS SELECT DISTINCT - e.start_date, - e.start_week, - e.start_month, - e.end_date, + e.period_start_day as start_date, + e.period_start_week as start_week, + e.period_start_month as start_month, + e.period_end_day as end_date, e.age_at_visit, e.status, p.gender, p.race_display, p.ethnicity_display, - e.enc_class_code, - e.enc_class_display, - COALESCE(e.enc_type_system, 'None') as enc_type_system, - COALESCE(e.enc_type_code, 'None') as enc_type_code, - COALESCE(e.enc_type_display, 'None') as enc_type_display, - COALESCE(e.enc_service_system, 'None') as enc_service_system, - COALESCE(e.enc_service_code, 'None') as enc_service_code, - COALESCE(e.enc_service_display, 'None') as enc_service_display, - COALESCE(e.enc_priority_system, 'None') as enc_priority_system, - COALESCE(e.enc_priority_code, 'None') as enc_priority_code, - COALESCE(e.enc_priority_display, 'None') as enc_priority_display, + e.class_code AS enc_class_code, + e.class_display AS enc_class_display, + COALESCE(e.type_code_system, 'None') as enc_type_system, + COALESCE(e.type_code, 'None') as enc_type_code, + COALESCE(e.type_display, 'None') as enc_type_display, + COALESCE(e.serviceType_code_system, 'None') as enc_service_system, + COALESCE(e.serviceType_code, 'None') as enc_service_code, + COALESCE(e.serviceType_display, 'None') as enc_service_display, + COALESCE(e.priority_code_system, 'None') as enc_priority_system, + COALESCE(e.priority_code, 'None') as enc_priority_code, + COALESCE(e.priority_display, 'None') as enc_priority_display, p.subject_ref, e.encounter_ref FROM core__patient AS p, - core__encounter_type AS e + core__encounter AS e WHERE (p.subject_ref = e.subject_ref) - AND (e.start_date BETWEEN date('2016-01-01') AND current_date) - AND (e.end_date BETWEEN date('2016-01-01') AND current_date) + AND (e.period_start_day BETWEEN date('2016-01-01') AND current_date) + AND (e.period_end_day BETWEEN date('2016-01-01') AND current_date) ; \ No newline at end of file diff --git a/cumulus_library_hypertension/htn/table_bp.sql b/cumulus_library_hypertension/htn/table_bp.sql index d442403..1d2440a 100644 --- a/cumulus_library_hypertension/htn/table_bp.sql +++ b/cumulus_library_hypertension/htn/table_bp.sql @@ -24,13 +24,12 @@ define_component as (select * from htn__define_bp where component!='panel') select distinct define_component.*, - component_code, - component_part.valueQuantity, - component_part.valueQuantity.value as mmHg, - O.obs_date, - O.obs_week, - O.obs_month, - O.obs_year, + OCC.code as component_code, + OCVQ.value as mmHg, + O.effectiveDateTime_day as obs_date, + O.effectiveDateTime_week as obs_week, + O.effectiveDateTime_month as obs_month, + O.effectiveDateTime_year as obs_year, status, O.observation_ref, O.encounter_ref, @@ -38,10 +37,15 @@ select distinct from define_panel, define_component, core__observation_vital_signs as O, - UNNEST(O.component) t (component_part), - UNNEST(component_part.code.coding) t (component_code) -where define_panel.code = O.obs_code.code -and define_component.code = component_code.code + core__observation_component_code as OCC, + core__observation_component_valuequantity as OCVQ, +where define_panel.code = O.observation_code +and define_panel.system = O.observation_code_system +and define_component.code = OCC.code +and define_component.system = OCC.code_system +and O.id = OCC.id +and O.id = OCVQ.id +and OCC.row = OCVQ.row ; -- ######################################################################### diff --git a/cumulus_library_hypertension/htn/table_comorbidity.sql b/cumulus_library_hypertension/htn/table_comorbidity.sql index 385499a..c5c05f1 100644 --- a/cumulus_library_hypertension/htn/table_comorbidity.sql +++ b/cumulus_library_hypertension/htn/table_comorbidity.sql @@ -13,9 +13,9 @@ WITH condition_row AS c.code AS comorbidity_code, c.code_display as comorbidity_display, fhirspec.code_system as comorbidity_system_display, - c.recorded_month AS comorbidity_month, - c.recorded_week AS comorbidity_week, - c.recordeddate as comorbidity_date + c.recordedDate_month AS comorbidity_month, + c.recordedDate_week AS comorbidity_week, + c.recordedDate as comorbidity_date FROM core__condition AS c, core__fhir_mapping_code_system_uri as fhirspec diff --git a/cumulus_library_hypertension/htn/table_dx.sql b/cumulus_library_hypertension/htn/table_dx.sql index 52bbbeb..8029910 100644 --- a/cumulus_library_hypertension/htn/table_dx.sql +++ b/cumulus_library_hypertension/htn/table_dx.sql @@ -15,9 +15,9 @@ SELECT DISTINCT dx.display as cond_display, fhirspec.uri as cond_system, fhirspec.code_system as cond_system_display, - c.recorded_month AS cond_month, - c.recorded_week AS cond_week, - c.recordeddate as cond_date + c.recordedDate_month AS cond_month, + c.recordedDate_week AS cond_week, + c.recordedDate as cond_date FROM htn__define_dx AS dx, core__fhir_mapping_code_system_uri as fhirspec, @@ -37,9 +37,9 @@ SELECT DISTINCT dx.display as cond_display, fhirspec.uri as cond_system, fhirspec.code_system as cond_system_display, - c.recordeddate as cond_date, - c.recorded_month AS cond_month, - c.recorded_week AS cond_week, + c.recordedDate as cond_date, + c.recordedDate_month AS cond_month, + c.recordedDate_week AS cond_week, s.enc_class_code, s.enc_class_display, s.enc_type_display, diff --git a/pyproject.toml b/pyproject.toml index 94e0f7a..75d49d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,12 @@ [project] name = "cumulus-library-hypertension" -version = "0.1.3" -requires-python = ">= 3.9" +version = "1.0.0" +requires-python = ">= 3.10" dependencies = [ - "cumulus-library >= 1.4.0", - "sqlfluff == 2.0.2" + "cumulus-library >= 2.1, < 3", + "sqlfluff >= 3", ] -description = "SQL generation for cumulus covid symptom analysis" +description = "SQL generation for Cumulus hypertension analysis" readme = "README.md" license = { text="Apache License 2.0" } classifiers = [ @@ -17,7 +17,7 @@ classifiers = [ ] [project.urls] -Home = "https://smarthealthit.org/cumulus-a-universal-sidecar-for-a-smart-learning-healthcare-system/" +Home = "https://smarthealthit.org/cumulus/" Documentation = "https://docs.smarthealthit.org/cumulus/" Source = "https://github.com/smart-on-fhir/cumulus-library-hypertension" @@ -27,6 +27,9 @@ requires = ["flit_core >=3.4,<4"] build-backend = "flit_core.buildapi" [project.optional-dependencies] +tests = [ + "pytest", +] dev = [ "black", "pylint", diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/basic/condition/0.ndjson b/tests/data/basic/condition/0.ndjson new file mode 100644 index 0000000..6524868 --- /dev/null +++ b/tests/data/basic/condition/0.ndjson @@ -0,0 +1,2 @@ +{"id": "A.1", "clinicalStatus": {"coding": [{"code": "active", "system": "http://terminology.hl7.org/CodeSystem/condition-clinical"}]}, "category": [{"coding": [{"display": "Encounter Diagnosis", "code": "encounter-diagnosis", "system": "http://terminology.hl7.org/CodeSystem/condition-category"}]}], "code": {"coding": [{"code": "I10", "system": "http://hl7.org/fhir/sid/icd-10-cm"}]}, "subject": {"reference": "Patient/A"}, "encounter": {"reference": "Encounter/A"}, "recordedDate": "2023-10-17T12:00:00+05:00"} +{"id": "A.2", "clinicalStatus": {"coding": [{"code": "active", "system": "http://terminology.hl7.org/CodeSystem/condition-clinical"}]}, "category": [{"coding": [{"display": "Encounter Diagnosis", "code": "encounter-diagnosis", "system": "http://terminology.hl7.org/CodeSystem/condition-category"}]}], "code": {"coding": [{"display": "Right upper lobe pneumonia", "code": "J18.1", "system": "http://hl7.org/fhir/sid/icd-10-cm"}]}, "subject": {"reference": "Patient/A"}, "encounter": {"reference": "Encounter/A"}, "recordedDate": "2023-10-17T12:00:00+05:00"} diff --git a/tests/data/basic/documentreference/0.ndjson b/tests/data/basic/documentreference/0.ndjson new file mode 100644 index 0000000..1abe9e6 --- /dev/null +++ b/tests/data/basic/documentreference/0.ndjson @@ -0,0 +1 @@ +{"id": "Just.Here.So.The.Core.Study.Builds", "status": "current", "type": {"coding": [{"system": "http://loinc.org", "code": "34111-5", "display": "Emergency department note"}, {"system": "http://loinc.org", "code": "51847-2", "display": "Evaluation + Plan note"}]}, "category": [{"coding": [{"system": "http://hl7.org/fhir/us/core/CodeSystem/us-core-documentreference-category", "code": "clinical-note", "display": "Clinical Note"}]}], "subject": {"reference": "Patient/A"}, "date": "2018-07-13T13:30:43.931-04:00", "content": [{"attachment": {"contentType": "text/plain; charset=utf-8", "url": "nope"}}], "context": {"encounter": [{"reference": "Encounter/A"}], "period": {"start": "2018-07-13T13:30:43-04:00", "end": "2018-07-13T13:45:43-04:00"}}} diff --git a/tests/data/basic/encounter/0.ndjson b/tests/data/basic/encounter/0.ndjson new file mode 100644 index 0000000..ae09b0e --- /dev/null +++ b/tests/data/basic/encounter/0.ndjson @@ -0,0 +1 @@ +{"id": "A", "status": "finished", "period": {"start": "2023-10-17T11:35:00+05:00", "end": "2023-10-17T12:20:00+05:00"}, "class": {"display": "Ambulatory", "code": "AMB", "system": "http://terminology.hl7.org/CodeSystem/v3-ActCode"}, "type": [{"coding": [{"display": "Outpatient Kenacort injection", "code": "OKI", "system": "http://terminology.hl7.org/CodeSystem/encounter-type"}]}], "subject": {"reference": "Patient/A"}, "serviceType": {"coding": [{"display": "High Blood Pressure", "code": "356", "system": "http://terminology.hl7.org/CodeSystem/service-type"}]}, "priority": {"coding": [{"display": "routine", "code": "R", "system": "http://terminology.hl7.org/CodeSystem/v3-ActPriority"}]}} diff --git a/tests/data/basic/expected_bp.csv b/tests/data/basic/expected_bp.csv new file mode 100644 index 0000000..d693983 --- /dev/null +++ b/tests/data/basic/expected_bp.csv @@ -0,0 +1,2 @@ +bp_display,hypertension,hypotension,systolic_high,systolic_low,diastolic_high,diastolic_low,obs_date,obs_week,obs_month,subject_ref,encounter_ref,observation_ref,enc_class_code,enc_class_display,enc_type_display,age_at_visit,gender,race_display,ethnicity_display +89.0/59.0,false,true,false,true,false,true,2023-10-17,2023-10-16,2023-10-01,Patient/A,Encounter/A,Observation/A.hypotension,AMB,ambulatory,Outpatient Kenacort injection,23,female,asian,hispanic or latino diff --git a/tests/data/basic/expected_comorbidity.csv b/tests/data/basic/expected_comorbidity.csv new file mode 100644 index 0000000..9e43404 --- /dev/null +++ b/tests/data/basic/expected_comorbidity.csv @@ -0,0 +1,2 @@ +subject_ref,encounter_ref,comorbidity_category_display,comorbidity_code,comorbidity_display,comorbidity_system_display,comorbidity_month,comorbidity_week,comorbidity_date,gender,race_display,ethnicity_display +Patient/A,Encounter/A,Encounter Diagnosis,J18.1,Right upper lobe pneumonia,ICD10,2023-10-01,2023-10-16,2023-10-17,female,asian,hispanic or latino diff --git a/tests/data/basic/expected_comorbidity_period.csv b/tests/data/basic/expected_comorbidity_period.csv new file mode 100644 index 0000000..ade418a --- /dev/null +++ b/tests/data/basic/expected_comorbidity_period.csv @@ -0,0 +1,2 @@ +comorbidity_category_display,comorbidity_code,comorbidity_display,comorbidity_system_display,hypertension,hypertension_lab,hypertension_dx,cond_category_display,cond_code,cond_display,start_date,start_week,start_month,end_date,age_at_visit,status,gender,race_display,ethnicity_display,enc_class_code,enc_class_display,enc_type_system,enc_type_code,enc_type_display,enc_service_system,enc_service_code,enc_service_display,enc_priority_system,enc_priority_code,enc_priority_display,subject_ref,encounter_ref +Encounter Diagnosis,J18.1,Right upper lobe pneumonia,ICD10,Hypertension,None,Hypertension,Encounter Diagnosis,I10,Essential (primary) hypertension,2023-10-17,2023-10-16,2023-10-01,2023-10-17,23,finished,female,asian,hispanic or latino,AMB,ambulatory,http://terminology.hl7.org/CodeSystem/encounter-type,OKI,Outpatient Kenacort injection,http://terminology.hl7.org/CodeSystem/service-type,356,High Blood Pressure,http://terminology.hl7.org/CodeSystem/v3-ActPriority,R,routine,Patient/A,Encounter/A diff --git a/tests/data/basic/expected_dx.csv b/tests/data/basic/expected_dx.csv new file mode 100644 index 0000000..6fcd747 --- /dev/null +++ b/tests/data/basic/expected_dx.csv @@ -0,0 +1,2 @@ +subject_ref,encounter_ref,category_display,cond_code,cond_display,cond_system,cond_system_display,cond_month,cond_week,cond_date +Patient/A,Encounter/A,Encounter Diagnosis,I10,Essential (primary) hypertension,http://hl7.org/fhir/sid/icd-10-cm,ICD10,2023-10-01,2023-10-16,2023-10-17 diff --git a/tests/data/basic/expected_dx_period.csv b/tests/data/basic/expected_dx_period.csv new file mode 100644 index 0000000..2f0bc21 --- /dev/null +++ b/tests/data/basic/expected_dx_period.csv @@ -0,0 +1,2 @@ +subject_ref,encounter_ref,category_display,cond_code,cond_display,cond_system,cond_system_display,cond_date,cond_month,cond_week,enc_class_code,enc_class_display,enc_type_display,age_at_visit,gender,race_display,ethnicity_display +Patient/A,Encounter/A,Encounter Diagnosis,I10,Essential (primary) hypertension,http://hl7.org/fhir/sid/icd-10-cm,ICD10,2023-10-17,2023-10-01,2023-10-16,AMB,ambulatory,Outpatient Kenacort injection,23,female,asian,hispanic or latino diff --git a/tests/data/basic/expected_meta_version.csv b/tests/data/basic/expected_meta_version.csv new file mode 100644 index 0000000..adf9f2b --- /dev/null +++ b/tests/data/basic/expected_meta_version.csv @@ -0,0 +1,2 @@ +data_package_version +1 diff --git a/tests/data/basic/expected_prevalence.csv b/tests/data/basic/expected_prevalence.csv new file mode 100644 index 0000000..8f59db6 --- /dev/null +++ b/tests/data/basic/expected_prevalence.csv @@ -0,0 +1,2 @@ +hypertension,hypertension_lab,hypertension_dx,cond_category_display,cond_code,cond_display,start_date,start_week,start_month,end_date,age_at_visit,status,gender,race_display,ethnicity_display,enc_class_code,enc_class_display,enc_type_system,enc_type_code,enc_type_display,enc_service_system,enc_service_code,enc_service_display,enc_priority_system,enc_priority_code,enc_priority_display,subject_ref,encounter_ref +Hypertension,None,Hypertension,Encounter Diagnosis,I10,Essential (primary) hypertension,2023-10-17,2023-10-16,2023-10-01,2023-10-17,23,finished,female,asian,hispanic or latino,AMB,ambulatory,http://terminology.hl7.org/CodeSystem/encounter-type,OKI,Outpatient Kenacort injection,http://terminology.hl7.org/CodeSystem/service-type,356,High Blood Pressure,http://terminology.hl7.org/CodeSystem/v3-ActPriority,R,routine,Patient/A,Encounter/A diff --git a/tests/data/basic/expected_study_period.csv b/tests/data/basic/expected_study_period.csv new file mode 100644 index 0000000..691d592 --- /dev/null +++ b/tests/data/basic/expected_study_period.csv @@ -0,0 +1,2 @@ +start_date,start_week,start_month,end_date,age_at_visit,status,gender,race_display,ethnicity_display,enc_class_code,enc_class_display,enc_type_system,enc_type_code,enc_type_display,enc_service_system,enc_service_code,enc_service_display,enc_priority_system,enc_priority_code,enc_priority_display,subject_ref,encounter_ref +2023-10-17,2023-10-16,2023-10-01,2023-10-17,23,finished,female,asian,hispanic or latino,AMB,ambulatory,http://terminology.hl7.org/CodeSystem/encounter-type,OKI,Outpatient Kenacort injection,http://terminology.hl7.org/CodeSystem/service-type,356,High Blood Pressure,http://terminology.hl7.org/CodeSystem/v3-ActPriority,R,routine,Patient/A,Encounter/A diff --git a/tests/data/basic/medicationrequest/0.ndjson b/tests/data/basic/medicationrequest/0.ndjson new file mode 100644 index 0000000..e309f97 --- /dev/null +++ b/tests/data/basic/medicationrequest/0.ndjson @@ -0,0 +1 @@ +{"id": "Just.Here.So.The.Core.Study.Builds", "status": "stopped", "intent": "order", "category": [{"coding": [{"system": "http://terminology.hl7.org/CodeSystem/medicationrequest-category", "code": "community", "display": "Community"}], "text": "Community"}], "medicationCodeableConcept": {"coding": [{"system": "http://www.nlm.nih.gov/research/umls/rxnorm", "code": "106892", "display": "insulin isophane, human 70 UNT/ML / insulin, regular, human 30 UNT/ML Injectable Suspension [Humulin]"}], "text": "insulin isophane, human 70 UNT/ML / insulin, regular, human 30 UNT/ML Injectable Suspension [Humulin]"}, "subject": {"reference": "Patient/A"}, "encounter": {"reference": "Encounter/A"}, "authoredOn": "2018-07-13T13:30:43-04:00"} diff --git a/tests/data/basic/observation/0.ndjson b/tests/data/basic/observation/0.ndjson new file mode 100644 index 0000000..82f55a2 --- /dev/null +++ b/tests/data/basic/observation/0.ndjson @@ -0,0 +1 @@ +{"id": "A.hypotension", "effectiveDateTime": "2023-10-17T11:40:00+05:00", "status": "amended", "code": {"coding": [{"display": "Panel Blood Pressure", "code": "85354-9", "system": "http://loinc.org"}]}, "subject": {"reference": "Patient/A"}, "encounter": {"reference": "Encounter/A"}, "component": [{"code": {"coding": [{"display": "Systolic Blood Pressure", "code": "8480-6", "system": "http://loinc.org"}]}, "valueQuantity": {"value": 89}}, {"code": {"coding": [{"display": "Diastolic Blood Pressure", "code": "8462-4", "system": "http://loinc.org"}]}, "valueQuantity": {"value": 59}}], "category": [{"coding": [{"code": "vital-signs", "system": "http://terminology.hl7.org/CodeSystem/observation-category"}]}]} diff --git a/tests/data/basic/patient/0.ndjson b/tests/data/basic/patient/0.ndjson new file mode 100644 index 0000000..1904629 --- /dev/null +++ b/tests/data/basic/patient/0.ndjson @@ -0,0 +1 @@ +{"id": "A", "active": true, "gender": "female", "birthDate": "2000-04-04", "extension": [{"url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-race", "extension": [{"url": "ombCategory", "valueCoding": {"display": "Asian", "code": "2028-9", "system": "urn:oid:2.16.840.1.113883.6.238"}}]}, {"url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity", "extension": [{"url": "ombCategory", "valueCoding": {"display": "Hispanic or Latino", "code": "2135-2", "system": "urn:oid:2.16.840.1.113883.6.238"}}]}], "address": [{"postalCode": "02139"}]} diff --git a/tests/data/basic/procedure/0.ndjson b/tests/data/basic/procedure/0.ndjson new file mode 100644 index 0000000..2344c84 --- /dev/null +++ b/tests/data/basic/procedure/0.ndjson @@ -0,0 +1,11 @@ +{"id": "valid-with-datetime", "performedDateTime": "2015-02-07T13:28:17-05:00", "status": "completed", "code": {"text": "X"}, "subject": {"reference": "Patient/A"}} +{"id": "valid-with-period", "performedPeriod": {"start": "2020"}, "status": "completed", "code": {"text": "X"}, "subject": {"reference": "Patient/A"}} +{"id": "valid-with-string", "performedString": "yo", "status": "completed", "code": {"text": "X"}, "subject": {"reference": "Patient/A"}} +{"id": "valid-with-age", "performedAge": {"value": 1.5}, "status": "completed", "code": {"text": "X"}, "subject": {"reference": "Patient/A"}} +{"id": "valid-with-range", "performedRange": {"low": {"value": 1.5}}, "status": "completed", "code": {"text": "X"}, "subject": {"reference": "Patient/A"}} +{"id": "no-status", "code": {"text": "X"}, "subject": {"reference": "Patient/A"}, "performedString": "20th century"} +{"id": "no-code", "status": "completed", "subject": {"reference": "Patient/A"}, "performedString": "20th century"} +{"id": "no-subject", "status": "completed", "code": {"text": "X"}, "performedString": "20th century"} +{"id": "group-subject", "status": "completed", "code": {"text": "X"}, "subject": {"reference": "Group/A"}, "performedString": "20th century"} +{"id": "no-performed-time", "status": "completed", "code": {"text": "X"}, "subject": {"reference": "Patient/A"}} +{"id": "nothing"} diff --git a/tests/test_basic.py b/tests/test_basic.py new file mode 100644 index 0000000..bc2f04a --- /dev/null +++ b/tests/test_basic.py @@ -0,0 +1,73 @@ +"""Unit tests""" + +import glob +import os +import tempfile +import unittest + +import ddt +import duckdb + +from cumulus_library import cli + + +@ddt.ddt +class HypertensionTestCase(unittest.TestCase): + """Test case for the hypertension study writ large.""" + + def setUp(self): + super().setUp() + self.maxDiff = None + + def test_happy_path(self) -> None: + """Runs the study on some input data and spot-checks the results""" + test_dir = os.path.dirname(__file__) + root_dir = os.path.dirname(test_dir) + data_dir = f"{test_dir}/data/basic" + study_dir = f"{root_dir}/cumulus_library_hypertension/htn" + + # OK which tables are we going to compare in this test? + expected_result_paths = sorted(glob.glob(f"{data_dir}/expected*.csv")) + expected_names = [ + path.removeprefix(f"{data_dir}/expected").removesuffix(".csv") + for path in expected_result_paths + ] + expected_tables = {name: f"htn_{name}" for name in expected_names} + + # Set up and run the study! + with tempfile.TemporaryDirectory() as tmpdir: + cli.main( + [ + "build", + # "--verbose", + "--target=core", + "--target=htn", + f"--study-dir={study_dir}", + "--db-type=duckdb", + f"--database={tmpdir}/duck.db", + f"--load-ndjson-dir={data_dir}", + ] + ) + db = duckdb.connect(f"{tmpdir}/duck.db") + + # Uncomment this for extra debugging + # df = db.execute("select * from htn__count_prevalence_month").df() + # print(df.to_string()) + + # Check each output with the saved & expected version + for short_name, full_name in expected_tables.items(): + csv_path = f"{tmpdir}/{full_name}.csv" + db_table = db.table(full_name) + sorted_table = db_table.order(f"ALL DESC NULLS FIRST") + sorted_table.to_csv(csv_path) + with open(csv_path, "r", encoding="utf8") as f: + csv = f.read() + + expected_path = f"{data_dir}/expected{short_name}.csv" + with open(expected_path, "r", encoding="utf8") as f: + expected_lines = f.readlines() + # To allow for comments in expected files, strip them out here + expected = ''.join(line for line in expected_lines if not line.startswith("#")) + + explanation = f"{short_name}:\n{csv}" + self.assertEqual(expected, csv, explanation)