Skip to content

Commit

Permalink
Fix CQL tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ibacher committed Jun 19, 2024
1 parent 83e5665 commit a53ce0e
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 46 deletions.
19 changes: 19 additions & 0 deletions tests/data/example_fsh/HIV27_library.fsh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Instance: HIVIND27Logic
InstanceOf: Library
Title: "HIV.IND.27 Logic"
Description: "Number and % of people on ART among all people living with HIV at the end of the reporting period"
Usage: #definition
* meta.profile[+] = "http://hl7.org/fhir/uv/crmi/StructureDefinition/crmi-shareablelibrary"
* meta.profile[+] = "http://hl7.org/fhir/uv/crmi/StructureDefinition/crmi-publishablelibrary"
* meta.profile[+] = "http://hl7.org/fhir/uv/cql/StructureDefinition/cql-library"
* meta.profile[+] = "http://hl7.org/fhir/uv/cql/StructureDefinition/cql-module"
* url = "http://smart.who.int/immunizations-measles/Library/HIVIND27Logic"
* extension[+]
* url = "http://hl7.org/fhir/StructureDefinition/cqf-knowledgeCapability"
* valueCode = #computable
* name = "HIVIND27Logic"
* status = #draft
* experimental = true
* publisher = "World Health Organization (WHO)"
* type = $library-type#logic-library
* content.id = "ig-loader-HIVIND27Logic.cql"
46 changes: 46 additions & 0 deletions tests/data/example_fsh/HIV27_measure.fsh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
Instance: HIVIND27
InstanceOf: http://hl7.org/fhir/us/cqfmeasures/StructureDefinition/proportion-measure-cqfm
Title: "HIV.IND.27 People living with HIV on ART"
* meta.profile[+] = "http://hl7.org/fhir/uv/crmi/StructureDefinition/crmi-shareablemeasure"
* meta.profile[+] = "http://hl7.org/fhir/uv/crmi/StructureDefinition/crmi-publishablemeasure"
* extension[http://hl7.org/fhir/us/cqfmeasures/StructureDefinition/cqfm-populationBasis].valueCode = #boolean
* description = "Number and % of people on ART among all people living with HIV at the end of the reporting period"
* url = "http://smart.who.int/immunizations-measles/Measure/HIVIND27"
* status = #draft
* experimental = true
* date = "2024-06-14"
* name = "HIVIND27"
* title = "HIV.IND.27 People living with HIV on ART"
* publisher = "World Health Organization (WHO)"
* library = "http://smart.who.int/immunizations-measles/Library/HIVIND27Logic"
* scoring = $measure-scoring#proportion "Proportion"
* group[+]
* population[denominator]
* id = "HIV.IND.27.DEN"
* description = "1. To determine treatment coverage: estimated number of people living with HIV (from models, such as Spectrum AIM)
2. To gauge progress toward the second 95 target: number of people living with HIV who know their HIV status (from surveys or models)"
* code = $measure-population#denominator "Denominator"
* criteria.language = #text/cql-identifier
* criteria.expression = "Denominator"
* population[numerator]
* id = "HIV.IND.27.NUM"
* description = "Number of people on ART at the end of the reporting period (HIV patient monitoring data from, for example, ART registers, patient records or EMRs). For key populations survey data may be required."
* code = $measure-population#numerator "Numerator"
* criteria.language = #text/cql-identifier
* criteria.expression = "Numerator"
* stratifier[+]
* id = "HIV.IND.27.S.AG"
* criteria.language = #text/cql-identifier
* criteria.expression = "Administrative Gender Stratifier"
* stratifier[+]
* id = "HIV.IND.27.S.A"
* criteria.language = #text/cql-identifier
* criteria.expression = "Age Stratifier"
* stratifier[+]
* id = "HIV.IND.27.S.GR"
* criteria.language = #text/cql-identifier
* criteria.expression = "Geographic Region Stratifier"
* stratifier[+]
* id = "HIV.IND.27.S.P"
* criteria.language = #text/cql-identifier
* criteria.expression = "patientGroups Stratifier"
70 changes: 41 additions & 29 deletions tests/test_cql_tools.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Generated by CodiumAI
import datetime
import os
import re
from who_l3_smart_tools.core.indicator_generation.cql_tools import (
Expand All @@ -24,7 +25,7 @@ def test_generate_cql_file_headers(self):

generator.print_to_files(output_dir)

self.assertTrue(os.path.exists(output_dir + "HIV.IND.2.cql"))
assert os.path.exists(os.path.join(output_dir, "HIVIND2Logic.cql"))

def test_generate_cql_template(self):
input_file = "tests/data/indicator_dak_input_MINI.xlsx"
Expand All @@ -38,30 +39,34 @@ def test_generate_cql_template(self):
# Test the first row
cql_template = generator.generate_cql_template(indicator_artifact.iloc[0])

self.assertIsNotNone(cql_template)
assert cql_template is not None


class TestCqlResourceGenerator(unittest.TestCase):
def setUp(self):
# since we're comparing text, it's useful to have large diffs
self.maxDiff=5000

# Load example CQL from data directory
cql_file_path = "tests/data/example_cql_HIV27.cql"
indicator_file_path = "tests/data/indicator_dak_input_MINI.xlsx"

# Load content and close file
cql_file = open(cql_file_path, "r")
self.cql_content = cql_file.read()
cql_file.close()
with open(cql_file_path, "r") as cql_file:
self.cql_content = cql_file.read()

indicator_file = pd.read_excel(
indicator_file_path, sheet_name="Indicator definitions"
)

self.indicator_row = indicator_file.iloc[2]
self.indicator_row = indicator_file[indicator_file['DAK ID'] == 'HIV.IND.27'].head(1).squeeze()

self.generator = CQLResourceGenerator(self.indicator_row, self.cql_content)
self.generator = CQLResourceGenerator(self.cql_content, {
self.indicator_row['DAK ID']: self.indicator_row
})

def test_parse_cql_with_valid_content(self):
parsed_cql = self.generator.parse_cql()
parsed_cql = self.generator.parsed_cql

self.assertIsNotNone(parsed_cql)
self.assertEqual(parsed_cql["library_name"], "HIV.IND.27")
Expand All @@ -70,13 +75,12 @@ def test_parse_cql_with_valid_content(self):
self.assertIn("numerator", parsed_cql.keys())
self.assertIn("populations", parsed_cql.keys())
self.assertGreater(len(parsed_cql["stratifiers"]), 0)
self.assertGreater(len(parsed_cql["populations"]), 0)
# self.assertGreater(len(parsed_cql["populations"]), 0)
self.assertIsNotNone(parsed_cql["denominator"])
self.assertIsNotNone(parsed_cql["numerator"])

def test_generate_library_fsh(self):
p = self.generator.parse_cql()
library_fsh = self.generator.generate_library_fsh()
library_fsh = self.generator.generate_library_fsh().strip()

output_file = "tests/output/fsh/HIV27_library.fsh"

Expand All @@ -85,35 +89,40 @@ def test_generate_library_fsh(self):
with open(output_file, "w") as f:
f.write(library_fsh)

expected_lib_file = f"tests/data/example_fsh/{stringcase.alphanumcase(p["library_name"])}_library.fsh"
expected_lib_file = open(expected_lib_file, "r")

expected_library_fsh = expected_lib_file.read()
expected_lib_file = f"tests/data/example_fsh/HIV27_library.fsh"
with open(expected_lib_file, "r") as expected_lib_fsh_file:
expected_library_fsh = expected_lib_fsh_file.read().rstrip()

self.assertIsNotNone(library_fsh)

self.assertEqual(library_fsh.strip(), expected_library_fsh.strip())
self.assertEqual(expected_library_fsh, library_fsh)

def test_generate_measure_fsh(self):
p = self.generator.parse_cql()
p = self.generator.parsed_cql
measure_fsh = self.generator.generate_measure_fsh()

assert measure_fsh is not None

output_file = f"tests/output/fsh/{stringcase.alphanumcase(p["library_name"])}_measure.fsh"

if os.path.exists(output_file):
os.remove(output_file)
with open(output_file, "w") as f:
f.write(measure_fsh)
f.write(measure_fsh.strip())

expected_measure_file = "tests/data/example_fsh/HIV27_measure.fsh"
expected_measure_file = open(expected_measure_file, "r")

expected_measure_fsh = expected_measure_file.read()
with open(expected_measure_file, "r") as expected_measure_file:
expected_measure_fsh = expected_measure_file.read().rstrip()
# The date is always the date the measure was generated, so we need to update it
expected_measure_fsh = expected_measure_fsh.replace(
'* date = "2024-06-14"',
f'* date = "{datetime.datetime.now(datetime.timezone.utc).date():%Y-%m-%d}"'
)

self.assertIsNotNone(measure_fsh)
self.assertEqual(measure_fsh.strip(), expected_measure_fsh.strip())
self.assertEqual(expected_measure_fsh, measure_fsh)

class TestCqlGeneratorOnAllFiles(unittest.TestCase):

def test_resource_gen_for_all(self):
input_directory = "tests/data/cql/"
indicator_file_path = "tests/data/l2/test_indicators.xlsx"
Expand All @@ -128,9 +137,12 @@ def test_resource_gen_for_all(self):
indicator_dict[row["DAK ID"]] = row

# Create output dir if not exists
output_directory = "tests/output/fsh/"
output_directory = os.path.join("tests", "output", "fsh")
if not os.path.exists(output_directory):
os.makedirs(output_directory)
for subfolder in ["measures", "libraries"]:
if not os.path.exists(os.path.join(output_directory, subfolder)):
os.makedirs(os.path.join(output_directory, subfolder))


# For each cql file, generate library resources. Only generate measures for
Expand All @@ -147,9 +159,9 @@ def test_resource_gen_for_all(self):

# Create Library file and save to file
library_fsh = generator.generate_library_fsh()
if(library_fsh):
if library_fsh:
file_name = f"{stringcase.alphanumcase(generator.get_library_name())}"
if(generator.is_indicator()):
if generator.is_indicator():
file_name += "Logic"
file_name += ".fsh"
output_file = os.path.join(output_directory, "libraries", file_name)
Expand All @@ -158,10 +170,10 @@ def test_resource_gen_for_all(self):

# Create Measure file and save to file
measure_fsh = generator.generate_measure_fsh()
if(measure_fsh):
if measure_fsh:
output_file = os.path.join(output_directory, "measures", f"{stringcase.alphanumcase(generator.get_library_name())}.fsh")
with open(output_file, "w") as f:
f.write(measure_fsh)
f.write(measure_fsh)

if __name__ == "__main__":
unittest.main()
49 changes: 32 additions & 17 deletions who_l3_smart_tools/core/indicator_generation/cql_tools.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import json
from datetime import datetime, timezone
from typing import Any
import stringcase
import pandas as pd

Expand All @@ -10,7 +11,7 @@
/*
* Library: {DAK ID} Logic
* Short Name: {Short name}
*
*
* Definition: {Indicator definition}
*
* Numerator: {Numerator definition}
Expand All @@ -20,16 +21,16 @@
* Denominator: {Denominator definition}
* Denominator Calculation: {Denominator calculation}
* Denominator Exclusions: {Denominator exclusions}
*
*
* Disaggregations:
* {Disaggregation description}
* Disaggregation Elements: {Disaggregation data elements}
*
* Numerator and Denominator Elements:
* Numerator and Denominator Elements:
* {List of all data elements included in numerator and denominator}
*
* Reference: {Reference}
*
* Reference: {Reference}
*
* Additional Context
* - what it measures: {What it measures}
* - rationale: {Rationale}
Expand Down Expand Up @@ -113,7 +114,6 @@
* criteria.expression = "{index}"
"""


class CqlScaffoldGenerator:
def __init__(self, indicator_artifact_file):
self.indicator_artifact_file = indicator_artifact_file
Expand Down Expand Up @@ -201,6 +201,17 @@ def generate_cql_template(self, row_content):

return row_dict["DAK ID"], filled_template

# Get indicator DAK ID from CQL file with first instance of DAK ID pattern HIV.IND.X
DAK_INDICATOR_ID_PATTERN = re.compile(r'(HIV\.IND\.\d+)')

class EmptyItem:
def __getitem__(self, item) -> Any:
return None

def keys(self):
return []

__empty__ = EmptyItem()

class CQLResourceGenerator:
"""
Expand All @@ -212,9 +223,10 @@ class CQLResourceGenerator:
indicator_row (dict): The row of the indicator artifact.
"""

def __init__(self, cql_content, indicator_dictionary):
def __init__(self, cql_content, indicator_dictionary: dict[str, Any]):
self.cql_content = cql_content
self.parsed_cql = self.parse_cql()
self.parsed_cql = __empty__
self.parse_cql()
self.indicator_dictionary = indicator_dictionary

def parseRow(self, row):
Expand All @@ -227,6 +239,9 @@ def parse_cql(self):
"""
Parse the CQL file to extract relevant information.
"""
if self.parsed_cql is not __empty__:
return self.parsed_cql

parsed_data = {
"stratifiers": {},
"populations": {},
Expand All @@ -235,9 +250,7 @@ def parse_cql(self):
"library_name": None,
}

# Get indicator DAK ID from CQL file with first instance of DAK ID pattern HIV.IND.X
dak_id_indicator_pattern = r"(HIV\.IND\.\d+)"
indicator_match = re.search(dak_id_indicator_pattern, self.cql_content)
indicator_match = DAK_INDICATOR_ID_PATTERN.search(self.cql_content)

if indicator_match:
parsed_data["library_name"] = indicator_match.group(1)
Expand Down Expand Up @@ -278,22 +291,24 @@ def parse_cql(self):
for population in population_matches:
parsed_data["populations"][population] = True

return parsed_data
self.parsed_cql = parsed_data
return self.parsed_cql

def generate_library_fsh(self):
"""
Generate the Library FSH file content.
"""

library_name = f"{self.parsed_cql['library_name'].replace('.', '')}Logic"
raw_library_name = self.parsed_cql['library_name']
library_name = f"{raw_library_name.replace('.', '')}Logic"

# Treat as indicator
if library_name in self.indicator_dictionary.keys():
header_variables = self.parseRow(self.indicator_dictionary[library_name])
title = header_variables["Short name"]
if raw_library_name in self.indicator_dictionary.keys():
header_variables = self.parseRow(self.indicator_dictionary[raw_library_name])
title = raw_library_name
description = header_variables["Indicator definition"]
else:
title = library_name
title = raw_library_name
description = f"Description not yet available for {library_name}."

library_fsh = library_fsh_template.format(
Expand Down

0 comments on commit a53ce0e

Please sign in to comment.