Skip to content

Commit

Permalink
Merge pull request #26 from I-TECH-UW/WSG-94-refactor-questionnaire-g…
Browse files Browse the repository at this point in the history
…enerator

WSG-94 refactor questionnaire generator. add questionnaire command
  • Loading branch information
xkmato authored Jul 25, 2024
2 parents 445fbdb + b2ed139 commit 760436f
Show file tree
Hide file tree
Showing 8 changed files with 216 additions and 122 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -178,4 +178,5 @@ who_ocl.py
# Output for testing files

test_output/
tests/data/l2/csv_files/
tests/data/l2/csv_files/
data/
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ ignore=CVS,setup.py
# ignore-list. The regex matches against paths and can be in Posix or Windows
# format. Because '\\' represents the directory delimiter on Windows systems,
# it can't be used as an escape character.
ignore-paths=tests,scripts,who_l3_smart_tools/core/indicator_testing,who_l3_smart_tools/core/cql_tool,who_l3_smart_tools/core/requirements,who_l3_smart_tools/core/questionnaires
ignore-paths=tests,scripts,who_l3_smart_tools/core/indicator_testing,who_l3_smart_tools/core/cql_tool,who_l3_smart_tools/core/requirements

# Files or directories matching the regular expression patterns are skipped.
# The regex matches against base names, not paths. The default value ignores
Expand Down
15 changes: 3 additions & 12 deletions who_l3_smart_tools/cli/logical_model_gen.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import argparse

from who_l3_smart_tools.cli.utils import add_common_args
from who_l3_smart_tools.core.logical_models.logical_model_generator import (
LogicalModelAndTerminologyGenerator,
)
Expand All @@ -8,18 +10,7 @@ def main():
parser = argparse.ArgumentParser(
description="Generate Logical Model FSH from L3 Data Dictionary Excel file."
)
parser.add_argument(
"-i",
"--input",
default="./l3-data/test-data.xlsx",
help="Input Data Dictionary file location",
)
parser.add_argument(
"-o",
"--output",
default="./data/output",
help="Output Logical Model FSH file location",
)
add_common_args(parser)

args = parser.parse_args()

Expand Down
22 changes: 22 additions & 0 deletions who_l3_smart_tools/cli/questionnaire.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#! /usr/bin/env python
import argparse

from who_l3_smart_tools.cli.utils import add_common_args
from who_l3_smart_tools.core.questionnaires.questionnaire_generator import (
QuestionnaireGenerator,
)


def main():
parser = argparse.ArgumentParser(
description="Generate Questionnaire FSH from L3 Data Dictionary Excel file."
)
add_common_args(parser)

args = parser.parse_args()

QuestionnaireGenerator(args.input, args.output).generate_fsh_from_excel()


if __name__ == "__main__":
main()
22 changes: 22 additions & 0 deletions who_l3_smart_tools/cli/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
def add_common_args(parser):
"""
Add common arguments to the argument parser.
Args:
parser (argparse.ArgumentParser): The argument parser object.
Returns:
None
"""
parser.add_argument(
"-i",
"--input",
required=True,
help="Input Data Dictionary file location",
)
parser.add_argument(
"-o",
"--output",
default="./data/output",
help="Output Logical Model FSH file location",
)
221 changes: 113 additions & 108 deletions who_l3_smart_tools/core/questionnaires/questionnaire_generator.py
Original file line number Diff line number Diff line change
@@ -1,74 +1,83 @@
import os
import re
from typing import List, Union
import pandas as pd
import os
from who_l3_smart_tools.utils import camel_case

from openpyxl import load_workbook

data_type_map = {
"Boolean": "boolean",
"String": "string",
"Date": "date",
"DateTime": "dateTime",
"Coding": "choice",
"ID": "string",
"Quantity": "integer",
}

questionnaire_template = """Instance: {activity_id}
InstanceOf: sdc-questionnaire-extr-smap
Title: "{activity_title}"
Description: "Questionnaire for {activity_title_description}"
Usage: #definition
* meta.profile[+] = "http://hl7.org/fhir/uv/crmi/StructureDefinition/crmi-shareablequestionnaire"
* meta.profile[+] = "http://hl7.org/fhir/uv/crmi/StructureDefinition/crmi-publishablequestionnaire"
* subjectType = #Patient
* language = #en
* status = #draft
* experimental = true"""

questionnaire_item_template = """
* item[+]
* id = "{data_element_id}"
* linkId = "{data_element_id}"
* type = #{data_type}
* text = "{data_element_label}"
* required = {required}
* repeats = false
* readOnly = false"""

questionnaire_item_valueset = """
* answerValueSet = "#{data_element_id}" """.rstrip()
from who_l3_smart_tools.utils import camel_case
from who_l3_smart_tools.utils.jinja2 import (
DATA_TYPE_MAP,
initalize_jinja_env,
render_to_file,
)

jinja2_env = initalize_jinja_env(__name__)


# pylint: disable=too-few-public-methods
class QuestionnaireGenerator:
"""
A class that generates FHIR Questionnaire resources from an Excel file.
Args:
input_file (str): The path to the input Excel file.
output_dir (str): The directory where the generated FHIR Questionnaire
resources will be saved.
Attributes:
input_file (str): The path to the input Excel file.
output_dir (str): The directory where the generated FHIR Questionnaire
resources will be saved.
_activities (dict): A dictionary to store the activities and their associated
questionnaire items.
workbook: The loaded Excel workbook.
Methods:
generate_fsh_from_excel: Generates FHIR Questionnaire resources from the Excel file.
_add_items_to_activity: Adds questionnaire items to the specified activity.
"""

def __init__(self, input_file, output_dir):
self.input_file = input_file
self.output_dir = output_dir
self._activities = {}
self.workbook = load_workbook(self.input_file)

def generate_fsh_from_excel(self):
"""
Generates FHIR Questionnaire resources from the Excel file.
This method iterates through each sheet in the workbook and extracts
the questionnaire items.
It then organizes the questionnaire items into activities and generates FHIR
Questionnaire resources
for each activity. The generated resources are saved in the specified
output directory.
"""
if not os.path.exists(self.output_dir):
os.makedirs(self.output_dir)

# Load the Excel file
dd_xls = pd.read_excel(self.input_file, sheet_name=None)

for sheet_name in dd_xls.keys():
for sheet_name in self.workbook.sheetnames:
if not re.match(r"HIV\.[A-Z\-]+\s", sheet_name):
continue

df = dd_xls[sheet_name]
current_activity_id = None
questionnaire_items = []

for i, row in df.iterrows():
sheet = self.workbook[sheet_name]
header = None
for row in sheet.iter_rows(values_only=True):
if header is None:
header = row
continue
row = dict(zip(header, row))
activity_id = row["Activity ID"]

# handle an activity change
if type(activity_id) == str and activity_id != current_activity_id:
if isinstance(activity_id, str) and activity_id != current_activity_id:
# write out any existing activity
self._write_current_activity(
self._add_items_to_activity(
current_activity_id, questionnaire_items
)

Expand All @@ -77,88 +86,84 @@ def generate_fsh_from_excel(self):
# NB The template gets formatted when written
questionnaire_items = []

data_type = str(row["Data Type"])
data_type = row["Data Type"]

# we only want questions on the questionnaires
if data_type == "Codes":
continue

data_element_id = row["Data Element ID"]

if type(data_element_id) != str or not data_element_id:
if not isinstance(data_element_id, str) or not data_element_id:
continue

questionnaire_items.append(
questionnaire_item_template.format(
data_element_id=data_element_id,
data_element_label=str(row["Data Element Label"])
.replace("*", "")
.replace("[", "")
.replace("]", "")
.replace('"', "'")
.strip(),
data_type=data_type_map[data_type],
required="true" if str(row["Required"]) == "R" else "false",
)
)
questionnaire_item = {
"data_element_id": data_element_id,
"data_element_label": str(row["Data Element Label"])
.replace("*", "")
.replace("[", "")
.replace("]", "")
.replace('"', "'")
.strip(),
"data_type": DATA_TYPE_MAP[data_type],
"required": "true" if str(row["Required"]) == "R" else "false",
}

# coded answers should be bound to a dataset
if data_type == "Coding":
questionnaire_items.append(
questionnaire_item_valueset.format(
data_element_id=data_element_id
)
)
questionnaire_item["has_valueset"] = True

self._write_current_activity(current_activity_id, questionnaire_items)
questionnaire_items.append(questionnaire_item)

for activity_code, activity in self._activities.items():
questionnaire_items = activity.pop("questionnaire_items")
with open(os.path.join(self.output_dir, f"{activity_code}.fsh"), "w") as f:
f.write(
questionnaire_template.format(**activity)
+ (
"\n" + "".join(questionnaire_items)
if len(questionnaire_items) > 0
else ""
)
+ "\n"
)
self._add_items_to_activity(current_activity_id, questionnaire_items)

def _write_current_activity(
for activity_code, activity in self._activities.items():
_filename = os.path.join(self.output_dir, f"{activity_code}.fsh")
render_to_file(
jinja2_env.get_template("questionnaire.fsh.j2"),
activity,
_filename,
)

def _add_items_to_activity(
self, current_activity_id: Union[str, None], questionnaire_items: List[str]
):
"""
Adds questionnaire items to the specified activity.
Args:
current_activity_id (str or None): The ID of the current activity.
questionnaire_items (list): The list of questionnaire items to be added.
"""
if current_activity_id is not None:
if "\n" in current_activity_id:
activities = current_activity_id.split("\n")
else:
activities = [current_activity_id]

if activities:
for activity in activities:
if " " in activity:
activity_code, activity_description = activity.split(" ", 1)
activity_desc_camel = camel_case(activity_description)
activity_desc_camel = (
activity_desc_camel[0].upper() + activity_desc_camel[1:]
)
else:
activity_code = activity
activity_description = activity_desc_camel = activity.split(
".", 1
)[1]

if activity_code not in self._activities:
self._activities[activity_code] = {
"activity_id": f"{activity_code}{activity_desc_camel}",
"activity_title": activity_description,
"activity_title_description": activity_description[
0
].lower()
+ activity_description[1:],
"questionnaire_items": [],
}

self._activities[activity_code][
"questionnaire_items"
] += questionnaire_items
for activity in activities:
if " " in activity:
activity_code, activity_description = activity.split(" ", 1)
activity_desc_camel = camel_case(activity_description)
activity_desc_camel = (
activity_desc_camel[0].upper() + activity_desc_camel[1:]
)
else:
activity_code = activity
activity_description = activity_desc_camel = activity.split(".", 1)[
1
]

if activity_code not in self._activities:
self._activities[activity_code] = {
"activity_id": f"{activity_code}{activity_desc_camel}",
"activity_title": activity_description,
"activity_title_description": activity_description[0].lower()
+ activity_description[1:],
"questionnaire_items": [],
}

self._activities[activity_code][
"questionnaire_items"
] += questionnaire_items
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Instance: {{activity_id}}
InstanceOf: sdc-questionnaire-extr-smap
Title: "{{activity_title}}"
Description: "Questionnaire for {{activity_title_description}}"
Usage: #definition
* meta.profile[+] = "http://hl7.org/fhir/uv/crmi/StructureDefinition/crmi-shareablequestionnaire"
* meta.profile[+] = "http://hl7.org/fhir/uv/crmi/StructureDefinition/crmi-publishablequestionnaire"
* subjectType = #Patient
* language = #en
* status = #draft
* experimental = true

{% for item in questionnaire_items %}
* item[+]
* id = "{{item.data_element_id}}"
* linkId = "{{item.data_element_id}}"
* type = #{{item.data_type}}
* text = "{{item.data_element_label}}"
* required = {{item.required}}
* repeats = false
* readOnly = false
{% if item.has_valueset %}
* answerValueSet = "#{{item.data_element_id}}"
{% endif %}

{% endfor %}
Loading

0 comments on commit 760436f

Please sign in to comment.