From cd52051e0dda192e48f639af2d27e61614b05fdf Mon Sep 17 00:00:00 2001 From: xkmato Date: Wed, 24 Jul 2024 14:11:12 +0300 Subject: [PATCH] WSG-94 refactor questionnaire generator. add questionnaire command --- .gitignore | 3 +- .pylintrc | 2 +- who_l3_smart_tools/cli/logical_model_gen.py | 15 +- who_l3_smart_tools/cli/questionnaire.py | 22 ++ who_l3_smart_tools/cli/utils.py | 22 ++ .../questionnaires/questionnaire_generator.py | 221 +++++++++--------- .../questionnaires/templates/questionnaire.j2 | 26 +++ who_l3_smart_tools/utils/jinja2.py | 32 +++ 8 files changed, 221 insertions(+), 122 deletions(-) create mode 100755 who_l3_smart_tools/cli/questionnaire.py create mode 100644 who_l3_smart_tools/cli/utils.py create mode 100644 who_l3_smart_tools/core/questionnaires/templates/questionnaire.j2 create mode 100644 who_l3_smart_tools/utils/jinja2.py diff --git a/.gitignore b/.gitignore index 87f3612..0b9c044 100644 --- a/.gitignore +++ b/.gitignore @@ -178,4 +178,5 @@ who_ocl.py # Output for testing files test_output/ -tests/data/l2/csv_files/ \ No newline at end of file +tests/data/l2/csv_files/ +data/ \ No newline at end of file diff --git a/.pylintrc b/.pylintrc index 3cfde1e..c7d5090 100644 --- a/.pylintrc +++ b/.pylintrc @@ -52,7 +52,7 @@ ignore=CVS,setup.py # ignore-list. The regex matches against paths and can be in Posix or Windows # format. Because '\\' represents the directory delimiter on Windows systems, # it can't be used as an escape character. -ignore-paths=tests,scripts,who_l3_smart_tools/core/indicator_testing,who_l3_smart_tools/core/cql_tool,who_l3_smart_tools/core/requirements,who_l3_smart_tools/core/questionnaires +ignore-paths=tests,scripts,who_l3_smart_tools/core/indicator_testing,who_l3_smart_tools/core/cql_tool,who_l3_smart_tools/core/requirements # Files or directories matching the regular expression patterns are skipped. # The regex matches against base names, not paths. The default value ignores diff --git a/who_l3_smart_tools/cli/logical_model_gen.py b/who_l3_smart_tools/cli/logical_model_gen.py index 293fd85..e7ecb2f 100644 --- a/who_l3_smart_tools/cli/logical_model_gen.py +++ b/who_l3_smart_tools/cli/logical_model_gen.py @@ -1,4 +1,6 @@ import argparse + +from who_l3_smart_tools.cli.utils import add_common_args from who_l3_smart_tools.core.logical_models.logical_model_generator import ( LogicalModelAndTerminologyGenerator, ) @@ -8,18 +10,7 @@ def main(): parser = argparse.ArgumentParser( description="Generate Logical Model FSH from L3 Data Dictionary Excel file." ) - parser.add_argument( - "-i", - "--input", - default="./l3-data/test-data.xlsx", - help="Input Data Dictionary file location", - ) - parser.add_argument( - "-o", - "--output", - default="./data/output", - help="Output Logical Model FSH file location", - ) + add_common_args(parser) args = parser.parse_args() diff --git a/who_l3_smart_tools/cli/questionnaire.py b/who_l3_smart_tools/cli/questionnaire.py new file mode 100755 index 0000000..a2550e7 --- /dev/null +++ b/who_l3_smart_tools/cli/questionnaire.py @@ -0,0 +1,22 @@ +#! /usr/bin/env python +import argparse + +from who_l3_smart_tools.cli.utils import add_common_args +from who_l3_smart_tools.core.questionnaires.questionnaire_generator import ( + QuestionnaireGenerator, +) + + +def main(): + parser = argparse.ArgumentParser( + description="Generate Questionnaire FSH from L3 Data Dictionary Excel file." + ) + add_common_args(parser) + + args = parser.parse_args() + + QuestionnaireGenerator(args.input, args.output).generate_fsh_from_excel() + + +if __name__ == "__main__": + main() diff --git a/who_l3_smart_tools/cli/utils.py b/who_l3_smart_tools/cli/utils.py new file mode 100644 index 0000000..accfc6d --- /dev/null +++ b/who_l3_smart_tools/cli/utils.py @@ -0,0 +1,22 @@ +def add_common_args(parser): + """ + Add common arguments to the argument parser. + + Args: + parser (argparse.ArgumentParser): The argument parser object. + + Returns: + None + """ + parser.add_argument( + "-i", + "--input", + required=True, + help="Input Data Dictionary file location", + ) + parser.add_argument( + "-o", + "--output", + default="./data/output", + help="Output Logical Model FSH file location", + ) diff --git a/who_l3_smart_tools/core/questionnaires/questionnaire_generator.py b/who_l3_smart_tools/core/questionnaires/questionnaire_generator.py index af5f1e8..bdef29e 100644 --- a/who_l3_smart_tools/core/questionnaires/questionnaire_generator.py +++ b/who_l3_smart_tools/core/questionnaires/questionnaire_generator.py @@ -1,74 +1,83 @@ +import os import re from typing import List, Union -import pandas as pd -import os -from who_l3_smart_tools.utils import camel_case +from openpyxl import load_workbook -data_type_map = { - "Boolean": "boolean", - "String": "string", - "Date": "date", - "DateTime": "dateTime", - "Coding": "choice", - "ID": "string", - "Quantity": "integer", -} - -questionnaire_template = """Instance: {activity_id} -InstanceOf: sdc-questionnaire-extr-smap -Title: "{activity_title}" -Description: "Questionnaire for {activity_title_description}" -Usage: #definition -* meta.profile[+] = "http://hl7.org/fhir/uv/crmi/StructureDefinition/crmi-shareablequestionnaire" -* meta.profile[+] = "http://hl7.org/fhir/uv/crmi/StructureDefinition/crmi-publishablequestionnaire" -* subjectType = #Patient -* language = #en -* status = #draft -* experimental = true""" - -questionnaire_item_template = """ -* item[+] - * id = "{data_element_id}" - * linkId = "{data_element_id}" - * type = #{data_type} - * text = "{data_element_label}" - * required = {required} - * repeats = false - * readOnly = false""" - -questionnaire_item_valueset = """ - * answerValueSet = "#{data_element_id}" """.rstrip() +from who_l3_smart_tools.utils import camel_case +from who_l3_smart_tools.utils.jinja2 import ( + DATA_TYPE_MAP, + initalize_jinja_env, + render_to_file, +) + +jinja2_env = initalize_jinja_env(__file__) +# pylint: disable=too-few-public-methods class QuestionnaireGenerator: + """ + A class that generates FHIR Questionnaire resources from an Excel file. + + Args: + input_file (str): The path to the input Excel file. + output_dir (str): The directory where the generated FHIR Questionnaire + resources will be saved. + + Attributes: + input_file (str): The path to the input Excel file. + output_dir (str): The directory where the generated FHIR Questionnaire + resources will be saved. + _activities (dict): A dictionary to store the activities and their associated + questionnaire items. + workbook: The loaded Excel workbook. + + Methods: + generate_fsh_from_excel: Generates FHIR Questionnaire resources from the Excel file. + _add_items_to_activity: Adds questionnaire items to the specified activity. + + """ + def __init__(self, input_file, output_dir): self.input_file = input_file self.output_dir = output_dir self._activities = {} + self.workbook = load_workbook(self.input_file) def generate_fsh_from_excel(self): + """ + Generates FHIR Questionnaire resources from the Excel file. + + This method iterates through each sheet in the workbook and extracts + the questionnaire items. + It then organizes the questionnaire items into activities and generates FHIR + Questionnaire resources + for each activity. The generated resources are saved in the specified + output directory. + + """ if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) - # Load the Excel file - dd_xls = pd.read_excel(self.input_file, sheet_name=None) - - for sheet_name in dd_xls.keys(): + for sheet_name in self.workbook.sheetnames: if not re.match(r"HIV\.[A-Z\-]+\s", sheet_name): continue - - df = dd_xls[sheet_name] current_activity_id = None questionnaire_items = [] - for i, row in df.iterrows(): + sheet = self.workbook[sheet_name] + header = None + for row in sheet.iter_rows(values_only=True): + if header is None: + header = row + continue + row = dict(zip(header, row)) activity_id = row["Activity ID"] # handle an activity change - if type(activity_id) == str and activity_id != current_activity_id: + if isinstance(activity_id, str) and activity_id != current_activity_id: # write out any existing activity - self._write_current_activity( + self._add_items_to_activity( current_activity_id, questionnaire_items ) @@ -77,7 +86,7 @@ def generate_fsh_from_excel(self): # NB The template gets formatted when written questionnaire_items = [] - data_type = str(row["Data Type"]) + data_type = row["Data Type"] # we only want questions on the questionnaires if data_type == "Codes": @@ -85,80 +94,76 @@ def generate_fsh_from_excel(self): data_element_id = row["Data Element ID"] - if type(data_element_id) != str or not data_element_id: + if not isinstance(data_element_id, str) or not data_element_id: continue - questionnaire_items.append( - questionnaire_item_template.format( - data_element_id=data_element_id, - data_element_label=str(row["Data Element Label"]) - .replace("*", "") - .replace("[", "") - .replace("]", "") - .replace('"', "'") - .strip(), - data_type=data_type_map[data_type], - required="true" if str(row["Required"]) == "R" else "false", - ) - ) + questionnaire_item = { + "data_element_id": data_element_id, + "data_element_label": str(row["Data Element Label"]) + .replace("*", "") + .replace("[", "") + .replace("]", "") + .replace('"', "'") + .strip(), + "data_type": DATA_TYPE_MAP[data_type], + "required": "true" if str(row["Required"]) == "R" else "false", + } # coded answers should be bound to a dataset if data_type == "Coding": - questionnaire_items.append( - questionnaire_item_valueset.format( - data_element_id=data_element_id - ) - ) + questionnaire_item["has_valueset"] = True - self._write_current_activity(current_activity_id, questionnaire_items) + questionnaire_items.append(questionnaire_item) - for activity_code, activity in self._activities.items(): - questionnaire_items = activity.pop("questionnaire_items") - with open(os.path.join(self.output_dir, f"{activity_code}.fsh"), "w") as f: - f.write( - questionnaire_template.format(**activity) - + ( - "\n" + "".join(questionnaire_items) - if len(questionnaire_items) > 0 - else "" - ) - + "\n" - ) + self._add_items_to_activity(current_activity_id, questionnaire_items) - def _write_current_activity( + for activity_code, activity in self._activities.items(): + _filename = os.path.join(self.output_dir, f"{activity_code}.fsh") + render_to_file( + jinja2_env.get_template("questionnaire.j2"), + activity, + _filename, + ) + + def _add_items_to_activity( self, current_activity_id: Union[str, None], questionnaire_items: List[str] ): + """ + Adds questionnaire items to the specified activity. + + Args: + current_activity_id (str or None): The ID of the current activity. + questionnaire_items (list): The list of questionnaire items to be added. + + """ if current_activity_id is not None: if "\n" in current_activity_id: activities = current_activity_id.split("\n") else: activities = [current_activity_id] - if activities: - for activity in activities: - if " " in activity: - activity_code, activity_description = activity.split(" ", 1) - activity_desc_camel = camel_case(activity_description) - activity_desc_camel = ( - activity_desc_camel[0].upper() + activity_desc_camel[1:] - ) - else: - activity_code = activity - activity_description = activity_desc_camel = activity.split( - ".", 1 - )[1] - - if activity_code not in self._activities: - self._activities[activity_code] = { - "activity_id": f"{activity_code}{activity_desc_camel}", - "activity_title": activity_description, - "activity_title_description": activity_description[ - 0 - ].lower() - + activity_description[1:], - "questionnaire_items": [], - } - - self._activities[activity_code][ - "questionnaire_items" - ] += questionnaire_items + for activity in activities: + if " " in activity: + activity_code, activity_description = activity.split(" ", 1) + activity_desc_camel = camel_case(activity_description) + activity_desc_camel = ( + activity_desc_camel[0].upper() + activity_desc_camel[1:] + ) + else: + activity_code = activity + activity_description = activity_desc_camel = activity.split(".", 1)[ + 1 + ] + + if activity_code not in self._activities: + self._activities[activity_code] = { + "activity_id": f"{activity_code}{activity_desc_camel}", + "activity_title": activity_description, + "activity_title_description": activity_description[0].lower() + + activity_description[1:], + "questionnaire_items": [], + } + + self._activities[activity_code][ + "questionnaire_items" + ] += questionnaire_items diff --git a/who_l3_smart_tools/core/questionnaires/templates/questionnaire.j2 b/who_l3_smart_tools/core/questionnaires/templates/questionnaire.j2 new file mode 100644 index 0000000..1f4b2f3 --- /dev/null +++ b/who_l3_smart_tools/core/questionnaires/templates/questionnaire.j2 @@ -0,0 +1,26 @@ +Instance: {{activity_id}} +InstanceOf: sdc-questionnaire-extr-smap +Title: "{{activity_title}}" +Description: "Questionnaire for {{activity_title_description}}" +Usage: #definition +* meta.profile[+] = "http://hl7.org/fhir/uv/crmi/StructureDefinition/crmi-shareablequestionnaire" +* meta.profile[+] = "http://hl7.org/fhir/uv/crmi/StructureDefinition/crmi-publishablequestionnaire" +* subjectType = #Patient +* language = #en +* status = #draft +* experimental = true + +{% for item in questionnaire_items %} +* item[+] + * id = "{{item.data_element_id}}" + * linkId = "{{item.data_element_id}}" + * type = #{{item.data_type}} + * text = "{{item.data_element_label}}" + * required = {{item.required}} + * repeats = false + * readOnly = false + {% if item.has_valueset %} + * answerValueSet = "#{{item.data_element_id}}" + {% endif %} + +{% endfor %} \ No newline at end of file diff --git a/who_l3_smart_tools/utils/jinja2.py b/who_l3_smart_tools/utils/jinja2.py new file mode 100644 index 0000000..a529bf9 --- /dev/null +++ b/who_l3_smart_tools/utils/jinja2.py @@ -0,0 +1,32 @@ +import os + +from jinja2 import Environment, FileSystemLoader + +DATA_TYPE_MAP = { + "Boolean": "boolean", + "String": "string", + "Date": "date", + "DateTime": "dateTime", + "Coding": "choice", + "ID": "string", + "Quantity": "integer", +} + + +def initalize_jinja_env(file_name): + template_dir = os.path.join( + os.path.dirname(os.path.abspath(file_name)), "templates" + ) + return Environment( + loader=FileSystemLoader(template_dir), + trim_blocks=True, + lstrip_blocks=True, + ) + + +def render_to_file(template, context, output_file): + """ + Render a template to a file. + """ + with open(output_file, "w") as f: + f.write(template.render(context))