From 5aae8ef41127ebfdb507038bf583628381e4b090 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 5 Jul 2024 23:00:03 +0100
Subject: [PATCH 001/218] try to add one scenario of expanding hcw

---
 .../ResourceFile_HR_scaling_by_level_and_officer_type.xlsx    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx
index 3d804bbc77..853a830849 100644
--- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx
+++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:af86c2c2af5c291c18c5d481681d6d316526b81806c8c8e898517e850160e6fd
-size 12465
+oid sha256:89f1927bdbfa524ddf62dab23e6e30704c87baf4b0892a8110e01ad2f383a446
+size 12504

From 146631e83a9f95cb9fa4f53d4d92e751eb1d10f9 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 5 Jul 2024 23:33:10 +0100
Subject: [PATCH 002/218] try to add the scenario script

---
 .../scenario_of_expanding_funed_plus_hcw.py   | 91 +++++++++++++++++++
 1 file changed, 91 insertions(+)
 create mode 100644 src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
new file mode 100644
index 0000000000..6c06ce9cc9
--- /dev/null
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
@@ -0,0 +1,91 @@
+"""
+This file defines a batch run of a large population for a long time with all disease modules and full use of HSIs
+It's used for analysis of impact of expanding funded hcw.
+
+Run on the batch system using:
+```
+tlo batch-submit src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
+```
+
+or locally using:
+```
+tlo scenario-run src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
+```
+
+"""
+from pathlib import Path
+from typing import Dict
+
+from tlo import Date, logging
+from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios
+from tlo.methods.fullmodel import fullmodel
+from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher
+from tlo.scenario import BaseScenario
+
+
+class LongRun(BaseScenario):
+    def __init__(self):
+        super().__init__()
+        self.seed = 0
+        self.start_date = Date(2010, 1, 1)
+        self.end_date = Date(2020, 1, 1)
+        self.pop_size = 20_000
+        self._scenarios = self._get_scenarios()
+        self.number_of_draws = len(self._scenarios)
+        self.runs_per_draw = 10
+
+    def log_configuration(self):
+        return {
+            'filename': 'scenario_run_for_hcw_expansion_analysis',
+            'directory': Path('./outputs'),  # <- (specified only for local running)
+            'custom_levels': {
+                '*': logging.WARNING,
+                'tlo.methods.demography': logging.INFO,
+                'tlo.methods.demography.detail': logging.WARNING,
+                'tlo.methods.healthburden': logging.INFO,
+                'tlo.methods.healthsystem': logging.INFO,
+                'tlo.methods.healthsystem.summary': logging.INFO,
+            }
+        }
+
+    def modules(self):
+        return (fullmodel(resourcefilepath=self.resources) +
+                [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)])
+
+    def draw_parameters(self, draw_number, rng):
+        return list(self._scenarios.values())[draw_number]
+
+    def _get_scenarios(self) -> Dict[str, Dict]:
+        """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario."""
+
+        return {
+            "Establishment HCW":
+                mix_scenarios(
+                    get_parameters_for_status_quo(),
+                    {'HealthSystem': {
+                        'use_funded_or_actual_staffing': 'funded_plus',
+                        'yearly_HR_scaling_mode': 'no_scaling',
+                        'mode_appt_constraints': 2,
+                        'HR_scaling_by_level_and_officer_type_mode': 'default'
+                    }
+                    }
+                ),
+
+            "Establishment HCW Expansion":
+                mix_scenarios(
+                    get_parameters_for_status_quo(),
+                    {'HealthSystem': {
+                        'use_funded_or_actual_staffing': 'funded_plus',
+                        'yearly_HR_scaling_mode': 'no_scaling',
+                        'mode_appt_constraints': 2,
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded'
+                    }
+                    }
+                ),
+        }
+
+
+if __name__ == '__main__':
+    from tlo.cli import scenario_run
+
+    scenario_run([__file__])

From 7b09c1bfeb37bd6f3d58e48323f2988052e1391e Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 5 Jul 2024 23:45:23 +0100
Subject: [PATCH 003/218] todo

---
 .../scenario_of_expanding_funed_plus_hcw.py                     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
index 6c06ce9cc9..a9764136c1 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
@@ -55,7 +55,7 @@ def modules(self):
     def draw_parameters(self, draw_number, rng):
         return list(self._scenarios.values())[draw_number]
 
-    def _get_scenarios(self) -> Dict[str, Dict]:
+    def _get_scenarios(self) -> Dict[str, Dict]:  # todo: create many scenarios of expanding HCW
         """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario."""
 
         return {

From 75b88fcb6c183ef40a5dd03cb3392efd9455fa90 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Sat, 6 Jul 2024 15:34:54 +0100
Subject: [PATCH 004/218] input scenario data for increasing Clinical and
 Pharmacy cadres

---
 .../ResourceFile_HR_scaling_by_level_and_officer_type.xlsx    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx
index 853a830849..5658cad547 100644
--- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx
+++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:89f1927bdbfa524ddf62dab23e6e30704c87baf4b0892a8110e01ad2f383a446
-size 12504
+oid sha256:c4b4f49d06ec7ddf897d197ce93bd51cd420ef933f03131aef7927eae8853bbd
+size 22000

From 2a52eaa05085c667cc3e421eb5dba3908c51f4d6 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Sat, 6 Jul 2024 15:59:29 +0100
Subject: [PATCH 005/218] input scenario data for increasing Clinical and
 Pharmacy cadres

---
 .../ResourceFile_HR_scaling_by_level_and_officer_type.xlsx    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx
index 5658cad547..151dce6a35 100644
--- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx
+++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c4b4f49d06ec7ddf897d197ce93bd51cd420ef933f03131aef7927eae8853bbd
-size 22000
+oid sha256:f9d5003253c8a76bd8ab36c1f4fb1805645739853733b484c2ff15d776dfd35d
+size 29666

From ac6e5efeac53a7c4efcf1a7ca74384bfe290beb3 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Sat, 6 Jul 2024 16:16:21 +0100
Subject: [PATCH 006/218] update the scenario script

---
 .../scenario_of_expanding_funed_plus_hcw.py   | 193 +++++++++++++++++-
 1 file changed, 188 insertions(+), 5 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
index a9764136c1..0e9b599a20 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
@@ -1,6 +1,6 @@
 """
 This file defines a batch run of a large population for a long time with all disease modules and full use of HSIs
-It's used for analysis of impact of expanding funded hcw.
+It's used for analysis of impact of expanding funded hcw, assuming all other setting as default.
 
 Run on the batch system using:
 ```
@@ -43,7 +43,6 @@ def log_configuration(self):
                 'tlo.methods.demography': logging.INFO,
                 'tlo.methods.demography.detail': logging.WARNING,
                 'tlo.methods.healthburden': logging.INFO,
-                'tlo.methods.healthsystem': logging.INFO,
                 'tlo.methods.healthsystem.summary': logging.INFO,
             }
         }
@@ -55,7 +54,7 @@ def modules(self):
     def draw_parameters(self, draw_number, rng):
         return list(self._scenarios.values())[draw_number]
 
-    def _get_scenarios(self) -> Dict[str, Dict]:  # todo: create many scenarios of expanding HCW
+    def _get_scenarios(self) -> Dict[str, Dict]:  # todo: create many scenarios of expanding HCW (C, NM, P)
         """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario."""
 
         return {
@@ -63,6 +62,7 @@ def _get_scenarios(self) -> Dict[str, Dict]:  # todo: create many scenarios of e
                 mix_scenarios(
                     get_parameters_for_status_quo(),
                     {'HealthSystem': {
+                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
                         'use_funded_or_actual_staffing': 'funded_plus',
                         'yearly_HR_scaling_mode': 'no_scaling',
                         'mode_appt_constraints': 2,
@@ -71,14 +71,197 @@ def _get_scenarios(self) -> Dict[str, Dict]:  # todo: create many scenarios of e
                     }
                 ),
 
-            "Establishment HCW Expansion":
+            "Establishment HCW Expansion C1":
                 mix_scenarios(
                     get_parameters_for_status_quo(),
                     {'HealthSystem': {
+                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
                         'use_funded_or_actual_staffing': 'funded_plus',
                         'yearly_HR_scaling_mode': 'no_scaling',
                         'mode_appt_constraints': 2,
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded'
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c1'
+                    }
+                    }
+                ),
+
+            "Establishment HCW Expansion C2":
+                mix_scenarios(
+                    get_parameters_for_status_quo(),
+                    {'HealthSystem': {
+                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
+                        'use_funded_or_actual_staffing': 'funded_plus',
+                        'yearly_HR_scaling_mode': 'no_scaling',
+                        'mode_appt_constraints': 2,
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c2'
+                    }
+                    }
+                ),
+
+            "Establishment HCW Expansion C3":
+                mix_scenarios(
+                    get_parameters_for_status_quo(),
+                    {'HealthSystem': {
+                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
+                        'use_funded_or_actual_staffing': 'funded_plus',
+                        'yearly_HR_scaling_mode': 'no_scaling',
+                        'mode_appt_constraints': 2,
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c3'
+                    }
+                    }
+                ),
+
+            "Establishment HCW Expansion P1":
+                mix_scenarios(
+                    get_parameters_for_status_quo(),
+                    {'HealthSystem': {
+                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
+                        'use_funded_or_actual_staffing': 'funded_plus',
+                        'yearly_HR_scaling_mode': 'no_scaling',
+                        'mode_appt_constraints': 2,
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_p1'
+                    }
+                    }
+                ),
+
+            "Establishment HCW Expansion P2":
+                mix_scenarios(
+                    get_parameters_for_status_quo(),
+                    {'HealthSystem': {
+                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
+                        'use_funded_or_actual_staffing': 'funded_plus',
+                        'yearly_HR_scaling_mode': 'no_scaling',
+                        'mode_appt_constraints': 2,
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_p2'
+                    }
+                    }
+                ),
+
+            "Establishment HCW Expansion P3":
+                mix_scenarios(
+                    get_parameters_for_status_quo(),
+                    {'HealthSystem': {
+                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
+                        'use_funded_or_actual_staffing': 'funded_plus',
+                        'yearly_HR_scaling_mode': 'no_scaling',
+                        'mode_appt_constraints': 2,
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_p3'
+                    }
+                    }
+                ),
+
+            "Establishment HCW Expansion C1P1":
+                mix_scenarios(
+                    get_parameters_for_status_quo(),
+                    {'HealthSystem': {
+                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
+                        'use_funded_or_actual_staffing': 'funded_plus',
+                        'yearly_HR_scaling_mode': 'no_scaling',
+                        'mode_appt_constraints': 2,
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c1p1'
+                    }
+                    }
+                ),
+
+            "Establishment HCW Expansion C2P1":
+                mix_scenarios(
+                    get_parameters_for_status_quo(),
+                    {'HealthSystem': {
+                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
+                        'use_funded_or_actual_staffing': 'funded_plus',
+                        'yearly_HR_scaling_mode': 'no_scaling',
+                        'mode_appt_constraints': 2,
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c2p1'
+                    }
+                    }
+                ),
+
+            "Establishment HCW Expansion C3P1":
+                mix_scenarios(
+                    get_parameters_for_status_quo(),
+                    {'HealthSystem': {
+                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
+                        'use_funded_or_actual_staffing': 'funded_plus',
+                        'yearly_HR_scaling_mode': 'no_scaling',
+                        'mode_appt_constraints': 2,
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c3p1'
+                    }
+                    }
+                ),
+
+            "Establishment HCW Expansion C1P2":
+                mix_scenarios(
+                    get_parameters_for_status_quo(),
+                    {'HealthSystem': {
+                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
+                        'use_funded_or_actual_staffing': 'funded_plus',
+                        'yearly_HR_scaling_mode': 'no_scaling',
+                        'mode_appt_constraints': 2,
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c1p2'
+                    }
+                    }
+                ),
+
+            "Establishment HCW Expansion C2P2":
+                mix_scenarios(
+                    get_parameters_for_status_quo(),
+                    {'HealthSystem': {
+                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
+                        'use_funded_or_actual_staffing': 'funded_plus',
+                        'yearly_HR_scaling_mode': 'no_scaling',
+                        'mode_appt_constraints': 2,
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c2p2'
+                    }
+                    }
+                ),
+
+            "Establishment HCW Expansion C3P2":
+                mix_scenarios(
+                    get_parameters_for_status_quo(),
+                    {'HealthSystem': {
+                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
+                        'use_funded_or_actual_staffing': 'funded_plus',
+                        'yearly_HR_scaling_mode': 'no_scaling',
+                        'mode_appt_constraints': 2,
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c3p2'
+                    }
+                    }
+                ),
+
+            "Establishment HCW Expansion C1P3":
+                mix_scenarios(
+                    get_parameters_for_status_quo(),
+                    {'HealthSystem': {
+                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
+                        'use_funded_or_actual_staffing': 'funded_plus',
+                        'yearly_HR_scaling_mode': 'no_scaling',
+                        'mode_appt_constraints': 2,
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c1p3'
+                    }
+                    }
+                ),
+
+            "Establishment HCW Expansion C2P3":
+                mix_scenarios(
+                    get_parameters_for_status_quo(),
+                    {'HealthSystem': {
+                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
+                        'use_funded_or_actual_staffing': 'funded_plus',
+                        'yearly_HR_scaling_mode': 'no_scaling',
+                        'mode_appt_constraints': 2,
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c2p3'
+                    }
+                    }
+                ),
+
+            "Establishment HCW Expansion C3P3":
+                mix_scenarios(
+                    get_parameters_for_status_quo(),
+                    {'HealthSystem': {
+                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
+                        'use_funded_or_actual_staffing': 'funded_plus',
+                        'yearly_HR_scaling_mode': 'no_scaling',
+                        'mode_appt_constraints': 2,
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c3p3'
                     }
                     }
                 ),

From 1aa232a693549432afc85100b7948326d0b31626 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Sun, 7 Jul 2024 22:48:26 +0100
Subject: [PATCH 007/218] recover the 'custom' sheet in the resource file to
 fix test failure

---
 .../ResourceFile_HR_scaling_by_level_and_officer_type.xlsx    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx
index 151dce6a35..2f6abbe5ce 100644
--- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx
+++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_scaling_by_level_and_officer_type.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9d5003253c8a76bd8ab36c1f4fb1805645739853733b484c2ff15d776dfd35d
-size 29666
+oid sha256:dd2459617f2cc3c545e658f106df057cfba8c183f2500269102a11a9afdf6053
+size 30968

From db6c1450104bab8b9ebe0393199f29ebf87d23d1 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Sun, 7 Jul 2024 23:37:36 +0100
Subject: [PATCH 008/218] update the scenario file

---
 .../scenario_of_expanding_funed_plus_hcw.py   | 167 ++++++++----------
 1 file changed, 69 insertions(+), 98 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
index 0e9b599a20..49a8d07519 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
@@ -11,8 +11,8 @@
 ```
 tlo scenario-run src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
 ```
-
 """
+
 from pathlib import Path
 from typing import Dict
 
@@ -28,7 +28,7 @@ def __init__(self):
         super().__init__()
         self.seed = 0
         self.start_date = Date(2010, 1, 1)
-        self.end_date = Date(2020, 1, 1)
+        self.end_date = Date(2040, 1, 1)
         self.pop_size = 20_000
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
@@ -57,216 +57,187 @@ def draw_parameters(self, draw_number, rng):
     def _get_scenarios(self) -> Dict[str, Dict]:  # todo: create many scenarios of expanding HCW (C, NM, P)
         """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario."""
 
+        self.YEAR_OF_CHANGE = 2030  # This is the year to change the HR scaling mode
+
         return {
             "Establishment HCW":
                 mix_scenarios(
-                    get_parameters_for_status_quo(),
+                    self._baseline(),
                     {'HealthSystem': {
-                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
-                        'use_funded_or_actual_staffing': 'funded_plus',
-                        'yearly_HR_scaling_mode': 'no_scaling',
-                        'mode_appt_constraints': 2,
-                        'HR_scaling_by_level_and_officer_type_mode': 'default'
+                        'HR_scaling_by_level_and_officer_type_mode': 'default',
+                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
                     }
                     }
                 ),
 
             "Establishment HCW Expansion C1":
                 mix_scenarios(
-                    get_parameters_for_status_quo(),
+                    self._baseline(),
                     {'HealthSystem': {
-                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
-                        'use_funded_or_actual_staffing': 'funded_plus',
-                        'yearly_HR_scaling_mode': 'no_scaling',
-                        'mode_appt_constraints': 2,
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c1'
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c1',
+                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
                     }
                     }
                 ),
 
             "Establishment HCW Expansion C2":
                 mix_scenarios(
-                    get_parameters_for_status_quo(),
+                    self._baseline(),
                     {'HealthSystem': {
-                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
-                        'use_funded_or_actual_staffing': 'funded_plus',
-                        'yearly_HR_scaling_mode': 'no_scaling',
-                        'mode_appt_constraints': 2,
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c2'
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c2',
+                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
                     }
                     }
                 ),
 
             "Establishment HCW Expansion C3":
                 mix_scenarios(
-                    get_parameters_for_status_quo(),
+                    self._baseline(),
                     {'HealthSystem': {
-                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
-                        'use_funded_or_actual_staffing': 'funded_plus',
-                        'yearly_HR_scaling_mode': 'no_scaling',
-                        'mode_appt_constraints': 2,
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c3'
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c3',
+                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
                     }
                     }
                 ),
 
             "Establishment HCW Expansion P1":
                 mix_scenarios(
-                    get_parameters_for_status_quo(),
+                    self._baseline(),
                     {'HealthSystem': {
-                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
-                        'use_funded_or_actual_staffing': 'funded_plus',
-                        'yearly_HR_scaling_mode': 'no_scaling',
-                        'mode_appt_constraints': 2,
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_p1'
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_p1',
+                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
                     }
                     }
                 ),
 
             "Establishment HCW Expansion P2":
                 mix_scenarios(
-                    get_parameters_for_status_quo(),
+                    self._baseline(),
                     {'HealthSystem': {
-                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
-                        'use_funded_or_actual_staffing': 'funded_plus',
-                        'yearly_HR_scaling_mode': 'no_scaling',
-                        'mode_appt_constraints': 2,
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_p2'
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_p2',
+                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
                     }
                     }
                 ),
 
             "Establishment HCW Expansion P3":
                 mix_scenarios(
-                    get_parameters_for_status_quo(),
+                    self._baseline(),
                     {'HealthSystem': {
-                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
-                        'use_funded_or_actual_staffing': 'funded_plus',
-                        'yearly_HR_scaling_mode': 'no_scaling',
-                        'mode_appt_constraints': 2,
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_p3'
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_p3',
+                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
                     }
                     }
                 ),
 
             "Establishment HCW Expansion C1P1":
                 mix_scenarios(
-                    get_parameters_for_status_quo(),
+                    self._baseline(),
                     {'HealthSystem': {
-                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
-                        'use_funded_or_actual_staffing': 'funded_plus',
-                        'yearly_HR_scaling_mode': 'no_scaling',
-                        'mode_appt_constraints': 2,
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c1p1'
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c1p1',
+                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
                     }
                     }
                 ),
 
             "Establishment HCW Expansion C2P1":
                 mix_scenarios(
-                    get_parameters_for_status_quo(),
+                    self._baseline(),
                     {'HealthSystem': {
-                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
-                        'use_funded_or_actual_staffing': 'funded_plus',
-                        'yearly_HR_scaling_mode': 'no_scaling',
-                        'mode_appt_constraints': 2,
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c2p1'
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c2p1',
+                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
                     }
                     }
                 ),
 
             "Establishment HCW Expansion C3P1":
                 mix_scenarios(
-                    get_parameters_for_status_quo(),
+                    self._baseline(),
                     {'HealthSystem': {
-                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
-                        'use_funded_or_actual_staffing': 'funded_plus',
-                        'yearly_HR_scaling_mode': 'no_scaling',
-                        'mode_appt_constraints': 2,
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c3p1'
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c3p1',
+                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
                     }
                     }
                 ),
 
             "Establishment HCW Expansion C1P2":
                 mix_scenarios(
-                    get_parameters_for_status_quo(),
+                    self._baseline(),
                     {'HealthSystem': {
-                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
-                        'use_funded_or_actual_staffing': 'funded_plus',
-                        'yearly_HR_scaling_mode': 'no_scaling',
-                        'mode_appt_constraints': 2,
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c1p2'
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c1p2',
+                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
                     }
                     }
                 ),
 
             "Establishment HCW Expansion C2P2":
                 mix_scenarios(
-                    get_parameters_for_status_quo(),
+                    self._baseline(),
                     {'HealthSystem': {
-                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
-                        'use_funded_or_actual_staffing': 'funded_plus',
-                        'yearly_HR_scaling_mode': 'no_scaling',
-                        'mode_appt_constraints': 2,
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c2p2'
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c2p2',
+                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
                     }
                     }
                 ),
 
             "Establishment HCW Expansion C3P2":
                 mix_scenarios(
-                    get_parameters_for_status_quo(),
+                    self._baseline(),
                     {'HealthSystem': {
-                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
-                        'use_funded_or_actual_staffing': 'funded_plus',
-                        'yearly_HR_scaling_mode': 'no_scaling',
-                        'mode_appt_constraints': 2,
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c3p2'
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c3p2',
+                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
                     }
                     }
                 ),
 
             "Establishment HCW Expansion C1P3":
                 mix_scenarios(
-                    get_parameters_for_status_quo(),
+                    self._baseline(),
                     {'HealthSystem': {
-                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
-                        'use_funded_or_actual_staffing': 'funded_plus',
-                        'yearly_HR_scaling_mode': 'no_scaling',
-                        'mode_appt_constraints': 2,
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c1p3'
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c1p3',
+                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
                     }
                     }
                 ),
 
             "Establishment HCW Expansion C2P3":
                 mix_scenarios(
-                    get_parameters_for_status_quo(),
+                    self._baseline(),
                     {'HealthSystem': {
-                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
-                        'use_funded_or_actual_staffing': 'funded_plus',
-                        'yearly_HR_scaling_mode': 'no_scaling',
-                        'mode_appt_constraints': 2,
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c2p3'
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c2p3',
+                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
                     }
                     }
                 ),
 
             "Establishment HCW Expansion C3P3":
                 mix_scenarios(
-                    get_parameters_for_status_quo(),
+                    self._baseline(),
                     {'HealthSystem': {
-                        'equip_availability': 'default',  # if not specify here, the value will be 'all'
-                        'use_funded_or_actual_staffing': 'funded_plus',
-                        'yearly_HR_scaling_mode': 'no_scaling',
-                        'mode_appt_constraints': 2,
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c3p3'
+                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c3p3',
+                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
                     }
                     }
                 ),
         }
 
+    def _baseline(self) -> Dict:
+        return mix_scenarios(
+            get_parameters_for_status_quo(),
+            {'HealthSystem': {
+                'equip_availability': 'default',  # if not specify here, the value will be 'all'
+                'use_funded_or_actual_staffing': 'actual',
+                'use_funded_or_actual_staffing_postSwitch': 'funded_plus',
+                'year_use_funded_or_actual_staffing_switch': 2020,
+                'mode_appt_constraints': 1,
+                'mode_appt_constraints_postSwitch': 2,
+                "year_mode_switch": 2020,
+                'yearly_HR_scaling_mode': 'no_scaling',
+
+            }
+            },
+        )
+
 
 if __name__ == '__main__':
     from tlo.cli import scenario_run

From a7bf52bd2554987bdf5c7405d98c094d17f6ff78 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 8 Jul 2024 00:14:20 +0100
Subject: [PATCH 009/218] update comments

---
 .../scenario_of_expanding_funed_plus_hcw.py                   | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
index 49a8d07519..68c11fd1b4 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
@@ -57,7 +57,9 @@ def draw_parameters(self, draw_number, rng):
     def _get_scenarios(self) -> Dict[str, Dict]:  # todo: create many scenarios of expanding HCW (C, NM, P)
         """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario."""
 
-        self.YEAR_OF_CHANGE = 2030  # This is the year to change the HR scaling mode
+        self.YEAR_OF_CHANGE = 2030  # This is the year to change HR scaling mode.
+        # Year 2030 is when the Establishment HCW will be met as estimated by Berman 2022.
+        # But it can be 2020 to reduce running time (2010-2030 instead of 2010-2040).
 
         return {
             "Establishment HCW":

From 825535efbcbe2bb036bad16ea810b4af573fd97d Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 10 Jul 2024 13:53:55 +0100
Subject: [PATCH 010/218] update baseline scenario

---
 .../scenario_of_expanding_funed_plus_hcw.py                   | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
index 68c11fd1b4..5d7b24d668 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
@@ -227,13 +227,15 @@ def _baseline(self) -> Dict:
         return mix_scenarios(
             get_parameters_for_status_quo(),
             {'HealthSystem': {
-                'equip_availability': 'default',  # if not specify here, the value will be 'all'
                 'use_funded_or_actual_staffing': 'actual',
                 'use_funded_or_actual_staffing_postSwitch': 'funded_plus',
                 'year_use_funded_or_actual_staffing_switch': 2020,
                 'mode_appt_constraints': 1,
                 'mode_appt_constraints_postSwitch': 2,
                 "year_mode_switch": 2020,
+                'cons_availability': 'default',
+                'cons_availability_postSwitch': 'all',
+                'year_cons_availability_switch': 2020,
                 'yearly_HR_scaling_mode': 'no_scaling',
 
             }

From 6d44efc782894c5bea9728e821ea65d981a3b20b Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 10 Jul 2024 15:25:23 +0100
Subject: [PATCH 011/218] Reduce the running period to 2010-2030

---
 .../scenario_of_expanding_funed_plus_hcw.py          | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
index 5d7b24d668..4620cd6ce7 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
@@ -28,7 +28,7 @@ def __init__(self):
         super().__init__()
         self.seed = 0
         self.start_date = Date(2010, 1, 1)
-        self.end_date = Date(2040, 1, 1)
+        self.end_date = Date(2030, 1, 1)
         self.pop_size = 20_000
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
@@ -57,9 +57,9 @@ def draw_parameters(self, draw_number, rng):
     def _get_scenarios(self) -> Dict[str, Dict]:  # todo: create many scenarios of expanding HCW (C, NM, P)
         """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario."""
 
-        self.YEAR_OF_CHANGE = 2030  # This is the year to change HR scaling mode.
+        self.YEAR_OF_CHANGE = 2020  # This is the year to change HR scaling mode.
         # Year 2030 is when the Establishment HCW will be met as estimated by Berman 2022.
-        # But it can be 2020 to reduce running time (2010-2030 instead of 2010-2040).
+        # But it can be 2020, or 2019, to reduce running time (2010-2030 instead of 2010-2040).
 
         return {
             "Establishment HCW":
@@ -229,13 +229,13 @@ def _baseline(self) -> Dict:
             {'HealthSystem': {
                 'use_funded_or_actual_staffing': 'actual',
                 'use_funded_or_actual_staffing_postSwitch': 'funded_plus',
-                'year_use_funded_or_actual_staffing_switch': 2020,
+                'year_use_funded_or_actual_staffing_switch': self.YEAR_OF_CHANGE,
                 'mode_appt_constraints': 1,
                 'mode_appt_constraints_postSwitch': 2,
-                "year_mode_switch": 2020,
+                "year_mode_switch": self.YEAR_OF_CHANGE,
                 'cons_availability': 'default',
                 'cons_availability_postSwitch': 'all',
-                'year_cons_availability_switch': 2020,
+                'year_cons_availability_switch': self.YEAR_OF_CHANGE,
                 'yearly_HR_scaling_mode': 'no_scaling',
 
             }

From 12cb85db6193386c8816d7a28febd3b4428b004c Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 11 Jul 2024 23:45:30 +0100
Subject: [PATCH 012/218] try create scenarios given a budget

---
 .../ResourceFile_Annual_Salary_Per_Cadre.csv  |  3 +
 .../salary_cost_of_hr.py                      | 93 +++++++++++++++++++
 2 files changed, 96 insertions(+)
 create mode 100644 resources/costing/ResourceFile_Annual_Salary_Per_Cadre.csv
 create mode 100644 src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/salary_cost_of_hr.py

diff --git a/resources/costing/ResourceFile_Annual_Salary_Per_Cadre.csv b/resources/costing/ResourceFile_Annual_Salary_Per_Cadre.csv
new file mode 100644
index 0000000000..ae50af04f5
--- /dev/null
+++ b/resources/costing/ResourceFile_Annual_Salary_Per_Cadre.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2457b9b914a1b356ba64168790f99467a86f459760268a729a6ddaf719b45b7
+size 245
diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/salary_cost_of_hr.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/salary_cost_of_hr.py
new file mode 100644
index 0000000000..e0fde7711c
--- /dev/null
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/salary_cost_of_hr.py
@@ -0,0 +1,93 @@
+"""
+We calculate the salar cost of current and funded plus HCW.
+"""
+
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+resourcefilepath = Path('./resources')
+
+hr_salary = pd.read_csv(resourcefilepath /
+                        'costing' / 'ResourceFile_Annual_Salary_Per_Cadre.csv', index_col=False)
+hr_current = pd.read_csv(resourcefilepath /
+                         'healthsystem' / 'human_resources' / 'actual' / 'ResourceFile_Daily_Capabilities.csv')
+hr_established = pd.read_csv(resourcefilepath /
+                             'healthsystem' / 'human_resources' / 'funded_plus' / 'ResourceFile_Daily_Capabilities.csv')
+
+hr_curr_count = hr_current.groupby('Officer_Category').agg({'Staff_Count': 'sum'})
+hr_estab_count = (hr_established.groupby('Officer_Category').agg({'Staff_Count': 'sum'}))
+
+hr = hr_curr_count.merge(hr_estab_count, on='Officer_Category', how='outer'
+                         ).merge(hr_salary, on='Officer_Category', how='left')
+
+hr['total_curr_salary'] = hr['Staff_Count_x'] * hr['Annual_Salary_USD']
+hr['total_estab_salary'] = hr['Staff_Count_y'] * hr['Annual_Salary_USD']
+
+total_curr_salary = hr['total_curr_salary'].sum()  # 107.82 million
+total_estab_salary = hr['total_estab_salary'].sum()  # 201.36 million
+
+# now consider expanding establishment HCW
+# assuming annual GDP growth rate is 4.2% and
+# a fixed proportion of GDP is allocated to human resource expansion, thus assuming
+# the annual growth rate of HR salary cost is also 4.2%.
+
+# the annual extra budget and
+# if to expand one individual cadre in ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']
+hr['extra_budget'] = total_estab_salary * 4.2 / 100  # 8.46 million
+hr['individual_increase'] = np.floor(hr['extra_budget'] / hr['Annual_Salary_USD'])
+# do not increase other cadres
+for c in hr.Officer_Category:
+    if c not in ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']:
+        hr.loc[hr.Officer_Category == c, 'individual_increase'] = 0
+hr['individual_scale_up_factor'] = (hr['individual_increase'] + hr['Staff_Count_y']) / hr['Staff_Count_y']
+hr['individual_increase_%'] = hr['individual_increase'] * 100 / hr['Staff_Count_y']
+
+# if to expand multiple cadres in ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']
+hr_expand = hr.loc[hr.Officer_Category.isin(['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']),
+                   ['Officer_Category', 'individual_increase']].copy()
+hr_expand['individual_increase_0%'] = np.floor(hr_expand['individual_increase'] * 0)
+hr_expand['individual_increase_25%'] = np.floor(hr_expand['individual_increase'] * 0.25)
+hr_expand['individual_increase_50%'] = np.floor(hr_expand['individual_increase'] * 0.5)
+hr_expand['individual_increase_75%'] = np.floor(hr_expand['individual_increase'] * 0.75)
+hr_expand['individual_increase_100%'] = np.floor(hr_expand['individual_increase'] * 1.0)
+hr_expand.drop(columns='individual_increase', inplace=True)
+hr_expand.set_index('Officer_Category', inplace=True)
+
+c_array = hr_expand.loc['Clinical'].values
+nm_array = hr_expand.loc['Nursing_and_Midwifery'].values
+p_array = hr_expand.loc['Pharmacy'].values
+d_array = hr_expand.loc['DCSA'].values
+hr_meshgrid = np.array(
+    np.meshgrid(c_array, nm_array, p_array, d_array)).T.reshape(-1, 4)
+
+hr_expand_scenario = pd.DataFrame({'Clinical': hr_meshgrid[:, 0],
+                                   'Nursing_and_Midwifery': hr_meshgrid[:, 1],
+                                   'Pharmacy': hr_meshgrid[:, 2],
+                                   'DCSA': hr_meshgrid[:, 3]}).T
+
+hr_expand_salary = hr.loc[hr.Officer_Category.isin(['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']),
+                          ['Officer_Category', 'Annual_Salary_USD']].copy()
+hr_expand_salary.set_index('Officer_Category', inplace=True)
+
+hr_expand_scenario_cost = hr_expand_salary.merge(hr_expand_scenario, left_index=True, right_index=True)
+hr_expand_scenario_cost.loc[:, hr_expand_scenario_cost.columns[1:]] = \
+    hr_expand_scenario_cost.loc[:, hr_expand_scenario_cost.columns[1:]].multiply(
+    hr_expand_scenario_cost.loc[:, hr_expand_scenario_cost.columns[0]], axis='index')
+hr_expand_scenario_cost.loc['Total'] = hr_expand_scenario_cost.sum()
+# hr_expand_scenario_cost.drop(columns=['Annual_Salary_USD'], inplace=True)
+
+cond = (hr_expand_scenario_cost.loc['Total'] <= total_estab_salary * 4.2 / 100)
+hr_expand_scenario_budget = hr_expand_scenario_cost.loc[:, cond].copy()
+hr_expand_scenario_budget.drop(index='Total', inplace=True)
+hr_expand_scenario_budget.loc[:, hr_expand_scenario_budget.columns[1:]] = \
+    hr_expand_scenario_budget.loc[:, hr_expand_scenario_budget.columns[1:]].div(
+    hr_expand_scenario_budget.loc[:, hr_expand_scenario_budget.columns[0]], axis='index')
+
+# todo: further reduce scenarios
+
+
+
+
+

From 9fda127caf244ce98bbc65f373ad4513145499e1 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 12 Jul 2024 14:32:11 +0100
Subject: [PATCH 013/218] update the selection of scenarios

---
 .../salary_cost_of_hr.py                                   | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/salary_cost_of_hr.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/salary_cost_of_hr.py
index e0fde7711c..6e46dac98d 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/salary_cost_of_hr.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/salary_cost_of_hr.py
@@ -84,9 +84,12 @@
 hr_expand_scenario_budget.loc[:, hr_expand_scenario_budget.columns[1:]] = \
     hr_expand_scenario_budget.loc[:, hr_expand_scenario_budget.columns[1:]].div(
     hr_expand_scenario_budget.loc[:, hr_expand_scenario_budget.columns[0]], axis='index')
+hr_expand_scenario_budget.loc['Total'] = hr_expand_scenario_budget.sum()
 
-# todo: further reduce scenarios
-
+# further reduce scenarios
+# to examine marginal impact of each cadre, do keep the individual increase (0%, 25%, 50%, 100%) of the four cadres
+# to examine combined impact of multiple cadres, do keep the increase of a cadre that is as large as possible
+# to do this selection in Excel
 
 
 

From 55c47d2c42e06b4b4a96e01ebfd87019adbae396 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 12 Jul 2024 14:33:16 +0100
Subject: [PATCH 014/218] rename file

---
 ...t_of_hr.py => create_establishment_hcw_expansion_scenarios.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/{salary_cost_of_hr.py => create_establishment_hcw_expansion_scenarios.py} (100%)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/salary_cost_of_hr.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
similarity index 100%
rename from src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/salary_cost_of_hr.py
rename to src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py

From 246e64ea5f63be841d23326b96a32ed3f844db95 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 12 Jul 2024 15:35:27 +0100
Subject: [PATCH 015/218] reduce individual increase to 4 options per cadre

---
 .../create_establishment_hcw_expansion_scenarios.py  | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
index 6e46dac98d..f8d8233406 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
@@ -47,11 +47,10 @@
 # if to expand multiple cadres in ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']
 hr_expand = hr.loc[hr.Officer_Category.isin(['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']),
                    ['Officer_Category', 'individual_increase']].copy()
-hr_expand['individual_increase_0%'] = np.floor(hr_expand['individual_increase'] * 0)
-hr_expand['individual_increase_25%'] = np.floor(hr_expand['individual_increase'] * 0.25)
-hr_expand['individual_increase_50%'] = np.floor(hr_expand['individual_increase'] * 0.5)
-hr_expand['individual_increase_75%'] = np.floor(hr_expand['individual_increase'] * 0.75)
-hr_expand['individual_increase_100%'] = np.floor(hr_expand['individual_increase'] * 1.0)
+hr_expand['individual_increase_0'] = np.floor(hr_expand['individual_increase'] * 0)
+hr_expand['individual_increase_1/3'] = np.floor(hr_expand['individual_increase'] * 1/3)
+hr_expand['individual_increase_2/3'] = np.floor(hr_expand['individual_increase'] * 2/3)
+hr_expand['individual_increase_1'] = np.floor(hr_expand['individual_increase'] * 1.0)
 hr_expand.drop(columns='individual_increase', inplace=True)
 hr_expand.set_index('Officer_Category', inplace=True)
 
@@ -91,6 +90,5 @@
 # to examine combined impact of multiple cadres, do keep the increase of a cadre that is as large as possible
 # to do this selection in Excel
 
-
-
+# if the resulted 48 scenarios are too many, try reducing the individual increase of each cadre into 4 cases.
 

From 71cb109d519c0b4f1bc7207be911202ed346b98d Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 12 Jul 2024 15:50:56 +0100
Subject: [PATCH 016/218] reduce individual increase to 3 options per cadre

---
 .../create_establishment_hcw_expansion_scenarios.py        | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
index f8d8233406..8716d4e0c0 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
@@ -48,9 +48,9 @@
 hr_expand = hr.loc[hr.Officer_Category.isin(['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']),
                    ['Officer_Category', 'individual_increase']].copy()
 hr_expand['individual_increase_0'] = np.floor(hr_expand['individual_increase'] * 0)
-hr_expand['individual_increase_1/3'] = np.floor(hr_expand['individual_increase'] * 1/3)
-hr_expand['individual_increase_2/3'] = np.floor(hr_expand['individual_increase'] * 2/3)
-hr_expand['individual_increase_1'] = np.floor(hr_expand['individual_increase'] * 1.0)
+hr_expand['individual_increase_1/2'] = np.floor(hr_expand['individual_increase'] * 1/2)
+hr_expand['individual_increase_1'] = np.floor(hr_expand['individual_increase'] * 1)
+
 hr_expand.drop(columns='individual_increase', inplace=True)
 hr_expand.set_index('Officer_Category', inplace=True)
 
@@ -91,4 +91,5 @@
 # to do this selection in Excel
 
 # if the resulted 48 scenarios are too many, try reducing the individual increase of each cadre into 4 cases.
+# if the resulted 29 scenarios are too many, try reducing the individual increase of each cadre into 3 cases.
 

From cdb1d9f3bfdda1d3ffe15170c8e7b4452adedca2 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 15 Jul 2024 13:41:47 +0100
Subject: [PATCH 017/218] try increasing the individual increase of each cadre
 into 6 options

---
 .../create_establishment_hcw_expansion_scenarios.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
index 8716d4e0c0..ce780dd014 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
@@ -48,7 +48,10 @@
 hr_expand = hr.loc[hr.Officer_Category.isin(['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']),
                    ['Officer_Category', 'individual_increase']].copy()
 hr_expand['individual_increase_0'] = np.floor(hr_expand['individual_increase'] * 0)
-hr_expand['individual_increase_1/2'] = np.floor(hr_expand['individual_increase'] * 1/2)
+hr_expand['individual_increase_20%'] = np.floor(hr_expand['individual_increase'] * 0.2)
+hr_expand['individual_increase_40%'] = np.floor(hr_expand['individual_increase'] * 0.4)
+hr_expand['individual_increase_60%'] = np.floor(hr_expand['individual_increase'] * 0.6)
+hr_expand['individual_increase_80%'] = np.floor(hr_expand['individual_increase'] * 0.8)
 hr_expand['individual_increase_1'] = np.floor(hr_expand['individual_increase'] * 1)
 
 hr_expand.drop(columns='individual_increase', inplace=True)
@@ -90,6 +93,10 @@
 # to examine combined impact of multiple cadres, do keep the increase of a cadre that is as large as possible
 # to do this selection in Excel
 
-# if the resulted 48 scenarios are too many, try reducing the individual increase of each cadre into 4 cases.
-# if the resulted 29 scenarios are too many, try reducing the individual increase of each cadre into 3 cases.
+# if the resulted 48 scenarios are too many, try reducing the individual increase of each cadre into 4 cases,
+# step by 1/3
+# if the resulted 29 scenarios are too many, try reducing the individual increase of each cadre into 3 cases,
+# step by 50%.
+# if try increasing the individual increase of each cadre into 6 cases, a step by 20%
+
 

From cee37a6cc824a767cdf5a5cc8e497f642bfbb524 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 15 Jul 2024 17:09:39 +0100
Subject: [PATCH 018/218] update todo

---
 .../scenario_of_expanding_funed_plus_hcw.py            | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
index 4620cd6ce7..6d491da3a7 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
@@ -28,11 +28,11 @@ def __init__(self):
         super().__init__()
         self.seed = 0
         self.start_date = Date(2010, 1, 1)
-        self.end_date = Date(2030, 1, 1)
-        self.pop_size = 20_000
+        self.end_date = Date(2025, 1, 1)  # todo: TBC
+        self.pop_size = 20_000  # todo: TBC
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 10
+        self.runs_per_draw = 10  # todo: TBC
 
     def log_configuration(self):
         return {
@@ -49,12 +49,12 @@ def log_configuration(self):
 
     def modules(self):
         return (fullmodel(resourcefilepath=self.resources) +
-                [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)])
+                [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)])  # todo: TBC
 
     def draw_parameters(self, draw_number, rng):
         return list(self._scenarios.values())[draw_number]
 
-    def _get_scenarios(self) -> Dict[str, Dict]:  # todo: create many scenarios of expanding HCW (C, NM, P)
+    def _get_scenarios(self) -> Dict[str, Dict]:  # todo: create many scenarios of expanding HCW (C, NM, P, DCSA)
         """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario."""
 
         self.YEAR_OF_CHANGE = 2020  # This is the year to change HR scaling mode.

From ba0b6542019ac478b8dda32f89b31f842f0de3cb Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 16 Jul 2024 10:02:01 +0100
Subject: [PATCH 019/218] try increasing the individual increase of each cadre
 into 11 options

---
 .../create_establishment_hcw_expansion_scenarios.py          | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
index ce780dd014..64125fd2e2 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
@@ -48,10 +48,15 @@
 hr_expand = hr.loc[hr.Officer_Category.isin(['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']),
                    ['Officer_Category', 'individual_increase']].copy()
 hr_expand['individual_increase_0'] = np.floor(hr_expand['individual_increase'] * 0)
+hr_expand['individual_increase_10%'] = np.floor(hr_expand['individual_increase'] * 0.1)
 hr_expand['individual_increase_20%'] = np.floor(hr_expand['individual_increase'] * 0.2)
+hr_expand['individual_increase_30%'] = np.floor(hr_expand['individual_increase'] * 0.3)
 hr_expand['individual_increase_40%'] = np.floor(hr_expand['individual_increase'] * 0.4)
+hr_expand['individual_increase_50%'] = np.floor(hr_expand['individual_increase'] * 0.5)
 hr_expand['individual_increase_60%'] = np.floor(hr_expand['individual_increase'] * 0.6)
+hr_expand['individual_increase_70%'] = np.floor(hr_expand['individual_increase'] * 0.7)
 hr_expand['individual_increase_80%'] = np.floor(hr_expand['individual_increase'] * 0.8)
+hr_expand['individual_increase_90%'] = np.floor(hr_expand['individual_increase'] * 0.9)
 hr_expand['individual_increase_1'] = np.floor(hr_expand['individual_increase'] * 1)
 
 hr_expand.drop(columns='individual_increase', inplace=True)

From 66ef671878cff042fafefd53363bb0e0086aa724 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 17 Jul 2024 17:12:55 +0100
Subject: [PATCH 020/218] calculate minute salary per cadre per level

---
 .../create_establishment_hcw_expansion_scenarios.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
index 64125fd2e2..63c40a81f1 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
@@ -11,6 +11,8 @@
 
 hr_salary = pd.read_csv(resourcefilepath /
                         'costing' / 'ResourceFile_Annual_Salary_Per_Cadre.csv', index_col=False)
+hr_salary_per_level = pd.read_excel(resourcefilepath /
+                                    'costing' / 'ResourceFile_Costing.xlsx', sheet_name='human_resources')
 hr_current = pd.read_csv(resourcefilepath /
                          'healthsystem' / 'human_resources' / 'actual' / 'ResourceFile_Daily_Capabilities.csv')
 hr_established = pd.read_csv(resourcefilepath /
@@ -104,4 +106,15 @@
 # step by 50%.
 # if try increasing the individual increase of each cadre into 6 cases, a step by 20%
 
+# to get minute salary per cadre per level
+Annual_PFT = hr_current.groupby(['Facility_Level', 'Officer_Category']).agg(
+    {'Total_Mins_Per_Day': 'sum', 'Staff_Count': 'sum'}).reset_index()
+Annual_PFT['Annual_Mins_Per_Staff'] = 365.25 * Annual_PFT['Total_Mins_Per_Day']/Annual_PFT['Staff_Count']
 
+hr_salary_per_level['Facility_Level'] = hr_salary_per_level['Facility_Level'].astype(str)
+
+Minute_Salary = Annual_PFT.merge(hr_salary_per_level, on=['Facility_Level', 'Officer_Category'], how='outer')
+Minute_Salary['Minute_Salary_USD'] = Minute_Salary['Salary_USD']/Minute_Salary['Annual_Mins_Per_Staff']
+
+Minute_Salary[['Facility_Level', 'Officer_Category', 'Minute_Salary_USD']].to_csv(
+    resourcefilepath / 'costing' / 'Minute_Salary_HR.csv', index=False)

From 28b31e89e59b771dea325b9f268b6f96d090f5f1 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 17 Jul 2024 17:18:58 +0100
Subject: [PATCH 021/218] create parameters for the HR expansion function

---
 src/tlo/methods/healthsystem.py | 46 +++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 8099346ddf..85b328beb8 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -307,6 +307,32 @@ class HealthSystem(Module):
                           "a worksheet of the file `ResourceFile_dynamic_HR_scaling.xlsx`."
         ),
 
+        'HR_expansion_by_officer_type': Parameter(
+            Types.LIST, "This list comprises of four floats, which specifies the proportions of extra budget "
+                        "allocated to four cadres - Clinical, DCSA, Nursing_and_Midwifery and Pharmacy - in order, "
+                        "every year from start_year_HR_expansion_by_officer_type and onwards. "
+                        "The extra budget for this year is 4.2% of the total salary of these cadres in last year, "
+                        "assuming the annual GDP growth rate is 4.2% and the proportion of GDP expenditure on "
+                        "expanding these cadres is fixed. Given the allocated extra budget and annual salary, "
+                        "we calculate the extra staff and minutes for these cadres of this year. The expansion is done "
+                        "on 1 Jan of every year from start_year_HR_expansion_by_officer_type."
+        ),
+
+        'start_year_HR_expansion_by_officer_type': Parameter(
+            Types.INT, "Year from which the HR expansion by officer type will take place. The change happens "
+                       "on 1 Jan of every year onwards."
+        ),
+
+        'end_year_HR_expansion_by_officer_type': Parameter(
+            Types.INT, "Year in which the HR expansion by officer type will stop. This happens on 1 Jan of "
+                       "that year. When submit the scenario to run, this should be the same year of the end year of "
+                       "the run."
+        ),
+
+        'minute_salary': Parameter(
+            Types.DATA_FRAME, "This specifies the minute salary in USD per officer type per level."
+        ),
+
         'tclose_overwrite': Parameter(
             Types.INT, "Decide whether to overwrite tclose variables assigned by disease modules"),
 
@@ -627,6 +653,10 @@ def read_parameters(self, data_folder):
         # Ensure that a value for the year at the start of the simulation is provided.
         assert all(2010 in sheet['year'].values for sheet in self.parameters['yearly_HR_scaling'].values())
 
+        # Read in ResourceFile_Annual_Salary_Per_Cadre.csv
+        self.parameters['minute_salary'] = pd.read_csv(
+            Path(self.resourcefilepath) / 'costing' / 'Minute_Salary_HR.csv')
+
     def pre_initialise_population(self):
         """Generate the accessory classes used by the HealthSystem and pass to them the data that has been read."""
 
@@ -775,6 +805,12 @@ def initialise_simulation(self, sim):
         # whilst the actual scaling will only take effect from 2011 onwards.
         sim.schedule_event(DynamicRescalingHRCapabilities(self), Date(sim.date))
 
+        # Schedule recurring event that expands HR by officer type
+        # from the start_year_HR_expansion_by_officer_type to the end_year_HR_expansion_by_officer_type.
+        for yr in range(self.parameters["start_year_HR_expansion_by_officer_type"],
+                        self.parameters["end_year_HR_expansion_by_officer_type"]):
+            sim.schedule_event(HRExpansionByOfficerType(self), Date(yr, 1, 1))
+
     def on_birth(self, mother_id, child_id):
         self.bed_days.on_birth(self.sim.population.props, mother_id, child_id)
 
@@ -2921,6 +2957,16 @@ def apply(self, population):
                 self.module._daily_capabilities[officer] *= HR_scaling_factor_by_district[district]
 
 
+class HRExpansionByOfficerType(Event, PopulationScopeEventMixin):
+    """ This event exists to expand the HR by officer type (Clinical, DCSA, Nursing_and_Midwifery, Pharmacy)
+    given an extra budget."""
+    def __init__(self, module):
+        super().__init__(module)
+
+    def apply(self, population):
+        # get total cost of last year
+
+
 class HealthSystemChangeMode(RegularEvent, PopulationScopeEventMixin):
     """ This event exists to change the priority policy adopted by the
     HealthSystem at a given year.    """

From b4c32aebb9d7d353050d728559e18987d74a3624 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 17 Jul 2024 23:44:51 +0100
Subject: [PATCH 022/218] design steps for the HR expansion function

---
 src/tlo/methods/healthsystem.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 85b328beb8..ee21f1dea7 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -2964,7 +2964,17 @@ def __init__(self, module):
         super().__init__(module)
 
     def apply(self, population):
-        # get total cost of last year
+
+        # get total minutes per cadre per facility id, for only the four cadres
+        # get total cost of last year = total minutes per cadre per facility id * minute salary per cadre per level (summing up cadres and facility ids)
+        # get extra budget for this year = 4.2% * total cost
+
+        # get proportional extra budget for each of the four cadre = extra budget * proportion of a cadre
+        # get the scale up factor for each cadre, assumed to be the same for each facility id of that cadre
+        # r * total minutes per cadre per facility id  * minute salary per cadre per facility id (summing up facility ids) = \
+        # total minutes per cadre per facility id * minute salary per cadre per facility id (summing up facility ids) + proportional extra budget
+
+        # scale up the minutes per cadre per facility id, by multiplying the current values with r
 
 
 class HealthSystemChangeMode(RegularEvent, PopulationScopeEventMixin):

From 912252452c04d0806a8da4b1ca2eafd2029afed9 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 18 Jul 2024 00:27:51 +0100
Subject: [PATCH 023/218] update steps

---
 src/tlo/methods/healthsystem.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index ee21f1dea7..d888a34098 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -2966,15 +2966,15 @@ def __init__(self, module):
     def apply(self, population):
 
         # get total minutes per cadre per facility id, for only the four cadres
-        # get total cost of last year = total minutes per cadre per facility id * minute salary per cadre per level (summing up cadres and facility ids)
-        # get extra budget for this year = 4.2% * total cost
+        # get total daily cost of last year = minutes per cadre per facility id * minute salary per cadre per level (summing up cadres and facility ids)
+        # get daily extra budget for this year = 4.2% * total cost
 
-        # get proportional extra budget for each of the four cadre = extra budget * proportion of a cadre
+        # get proportional daily extra budget for each of the four cadre = extra budget * proportion of a cadre
         # get the scale up factor for each cadre, assumed to be the same for each facility id of that cadre
-        # r * total minutes per cadre per facility id  * minute salary per cadre per facility id (summing up facility ids) = \
-        # total minutes per cadre per facility id * minute salary per cadre per facility id (summing up facility ids) + proportional extra budget
+        # r * minutes per cadre per facility id  * minute salary per cadre per facility id (summing up facility ids) = \
+        # minutes per cadre per facility id * minute salary per cadre per facility id (summing up facility ids) + proportional extra budget
 
-        # scale up the minutes per cadre per facility id, by multiplying the current values with r
+        # scale up the daily minutes per cadre per facility id, by multiplying the current values with r
 
 
 class HealthSystemChangeMode(RegularEvent, PopulationScopeEventMixin):

From 8395512af591094c81bb103b3d2e7e9cf182dd1b Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 18 Jul 2024 00:29:33 +0100
Subject: [PATCH 024/218] add todo

---
 .../create_establishment_hcw_expansion_scenarios.py         | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
index 63c40a81f1..d5232361c8 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
@@ -118,3 +118,9 @@
 
 Minute_Salary[['Facility_Level', 'Officer_Category', 'Minute_Salary_USD']].to_csv(
     resourcefilepath / 'costing' / 'Minute_Salary_HR.csv', index=False)
+
+# todo: get the minute salary per facility id; note levels 1b and 2 are now treated as level 2 in daily capabilities;
+# note should fill level 4 and 5 with values of level 3;
+# ask Sakshi if salary of 1b and 2 can to transferred to level 2, or if to treat level 1b/2 have same salary
+# note that dental and radio cadre has 2 salary in hr_salary_per level
+# need to group level 1b and 2 capabilities

From ddb4602a0615da9c334ee654482d71a7ca512a17 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 18 Jul 2024 15:01:52 +0100
Subject: [PATCH 025/218] create hr minute salary data

---
 ...e_establishment_hcw_expansion_scenarios.py | 126 -----------------
 ...nute_salary_by_officer_type_facility_id.py | 129 ++++++++++++++++++
 2 files changed, 129 insertions(+), 126 deletions(-)
 delete mode 100644 src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
 create mode 100644 src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
deleted file mode 100644
index d5232361c8..0000000000
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_establishment_hcw_expansion_scenarios.py
+++ /dev/null
@@ -1,126 +0,0 @@
-"""
-We calculate the salar cost of current and funded plus HCW.
-"""
-
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-
-resourcefilepath = Path('./resources')
-
-hr_salary = pd.read_csv(resourcefilepath /
-                        'costing' / 'ResourceFile_Annual_Salary_Per_Cadre.csv', index_col=False)
-hr_salary_per_level = pd.read_excel(resourcefilepath /
-                                    'costing' / 'ResourceFile_Costing.xlsx', sheet_name='human_resources')
-hr_current = pd.read_csv(resourcefilepath /
-                         'healthsystem' / 'human_resources' / 'actual' / 'ResourceFile_Daily_Capabilities.csv')
-hr_established = pd.read_csv(resourcefilepath /
-                             'healthsystem' / 'human_resources' / 'funded_plus' / 'ResourceFile_Daily_Capabilities.csv')
-
-hr_curr_count = hr_current.groupby('Officer_Category').agg({'Staff_Count': 'sum'})
-hr_estab_count = (hr_established.groupby('Officer_Category').agg({'Staff_Count': 'sum'}))
-
-hr = hr_curr_count.merge(hr_estab_count, on='Officer_Category', how='outer'
-                         ).merge(hr_salary, on='Officer_Category', how='left')
-
-hr['total_curr_salary'] = hr['Staff_Count_x'] * hr['Annual_Salary_USD']
-hr['total_estab_salary'] = hr['Staff_Count_y'] * hr['Annual_Salary_USD']
-
-total_curr_salary = hr['total_curr_salary'].sum()  # 107.82 million
-total_estab_salary = hr['total_estab_salary'].sum()  # 201.36 million
-
-# now consider expanding establishment HCW
-# assuming annual GDP growth rate is 4.2% and
-# a fixed proportion of GDP is allocated to human resource expansion, thus assuming
-# the annual growth rate of HR salary cost is also 4.2%.
-
-# the annual extra budget and
-# if to expand one individual cadre in ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']
-hr['extra_budget'] = total_estab_salary * 4.2 / 100  # 8.46 million
-hr['individual_increase'] = np.floor(hr['extra_budget'] / hr['Annual_Salary_USD'])
-# do not increase other cadres
-for c in hr.Officer_Category:
-    if c not in ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']:
-        hr.loc[hr.Officer_Category == c, 'individual_increase'] = 0
-hr['individual_scale_up_factor'] = (hr['individual_increase'] + hr['Staff_Count_y']) / hr['Staff_Count_y']
-hr['individual_increase_%'] = hr['individual_increase'] * 100 / hr['Staff_Count_y']
-
-# if to expand multiple cadres in ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']
-hr_expand = hr.loc[hr.Officer_Category.isin(['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']),
-                   ['Officer_Category', 'individual_increase']].copy()
-hr_expand['individual_increase_0'] = np.floor(hr_expand['individual_increase'] * 0)
-hr_expand['individual_increase_10%'] = np.floor(hr_expand['individual_increase'] * 0.1)
-hr_expand['individual_increase_20%'] = np.floor(hr_expand['individual_increase'] * 0.2)
-hr_expand['individual_increase_30%'] = np.floor(hr_expand['individual_increase'] * 0.3)
-hr_expand['individual_increase_40%'] = np.floor(hr_expand['individual_increase'] * 0.4)
-hr_expand['individual_increase_50%'] = np.floor(hr_expand['individual_increase'] * 0.5)
-hr_expand['individual_increase_60%'] = np.floor(hr_expand['individual_increase'] * 0.6)
-hr_expand['individual_increase_70%'] = np.floor(hr_expand['individual_increase'] * 0.7)
-hr_expand['individual_increase_80%'] = np.floor(hr_expand['individual_increase'] * 0.8)
-hr_expand['individual_increase_90%'] = np.floor(hr_expand['individual_increase'] * 0.9)
-hr_expand['individual_increase_1'] = np.floor(hr_expand['individual_increase'] * 1)
-
-hr_expand.drop(columns='individual_increase', inplace=True)
-hr_expand.set_index('Officer_Category', inplace=True)
-
-c_array = hr_expand.loc['Clinical'].values
-nm_array = hr_expand.loc['Nursing_and_Midwifery'].values
-p_array = hr_expand.loc['Pharmacy'].values
-d_array = hr_expand.loc['DCSA'].values
-hr_meshgrid = np.array(
-    np.meshgrid(c_array, nm_array, p_array, d_array)).T.reshape(-1, 4)
-
-hr_expand_scenario = pd.DataFrame({'Clinical': hr_meshgrid[:, 0],
-                                   'Nursing_and_Midwifery': hr_meshgrid[:, 1],
-                                   'Pharmacy': hr_meshgrid[:, 2],
-                                   'DCSA': hr_meshgrid[:, 3]}).T
-
-hr_expand_salary = hr.loc[hr.Officer_Category.isin(['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']),
-                          ['Officer_Category', 'Annual_Salary_USD']].copy()
-hr_expand_salary.set_index('Officer_Category', inplace=True)
-
-hr_expand_scenario_cost = hr_expand_salary.merge(hr_expand_scenario, left_index=True, right_index=True)
-hr_expand_scenario_cost.loc[:, hr_expand_scenario_cost.columns[1:]] = \
-    hr_expand_scenario_cost.loc[:, hr_expand_scenario_cost.columns[1:]].multiply(
-    hr_expand_scenario_cost.loc[:, hr_expand_scenario_cost.columns[0]], axis='index')
-hr_expand_scenario_cost.loc['Total'] = hr_expand_scenario_cost.sum()
-# hr_expand_scenario_cost.drop(columns=['Annual_Salary_USD'], inplace=True)
-
-cond = (hr_expand_scenario_cost.loc['Total'] <= total_estab_salary * 4.2 / 100)
-hr_expand_scenario_budget = hr_expand_scenario_cost.loc[:, cond].copy()
-hr_expand_scenario_budget.drop(index='Total', inplace=True)
-hr_expand_scenario_budget.loc[:, hr_expand_scenario_budget.columns[1:]] = \
-    hr_expand_scenario_budget.loc[:, hr_expand_scenario_budget.columns[1:]].div(
-    hr_expand_scenario_budget.loc[:, hr_expand_scenario_budget.columns[0]], axis='index')
-hr_expand_scenario_budget.loc['Total'] = hr_expand_scenario_budget.sum()
-
-# further reduce scenarios
-# to examine marginal impact of each cadre, do keep the individual increase (0%, 25%, 50%, 100%) of the four cadres
-# to examine combined impact of multiple cadres, do keep the increase of a cadre that is as large as possible
-# to do this selection in Excel
-
-# if the resulted 48 scenarios are too many, try reducing the individual increase of each cadre into 4 cases,
-# step by 1/3
-# if the resulted 29 scenarios are too many, try reducing the individual increase of each cadre into 3 cases,
-# step by 50%.
-# if try increasing the individual increase of each cadre into 6 cases, a step by 20%
-
-# to get minute salary per cadre per level
-Annual_PFT = hr_current.groupby(['Facility_Level', 'Officer_Category']).agg(
-    {'Total_Mins_Per_Day': 'sum', 'Staff_Count': 'sum'}).reset_index()
-Annual_PFT['Annual_Mins_Per_Staff'] = 365.25 * Annual_PFT['Total_Mins_Per_Day']/Annual_PFT['Staff_Count']
-
-hr_salary_per_level['Facility_Level'] = hr_salary_per_level['Facility_Level'].astype(str)
-
-Minute_Salary = Annual_PFT.merge(hr_salary_per_level, on=['Facility_Level', 'Officer_Category'], how='outer')
-Minute_Salary['Minute_Salary_USD'] = Minute_Salary['Salary_USD']/Minute_Salary['Annual_Mins_Per_Staff']
-
-Minute_Salary[['Facility_Level', 'Officer_Category', 'Minute_Salary_USD']].to_csv(
-    resourcefilepath / 'costing' / 'Minute_Salary_HR.csv', index=False)
-
-# todo: get the minute salary per facility id; note levels 1b and 2 are now treated as level 2 in daily capabilities;
-# note should fill level 4 and 5 with values of level 3;
-# ask Sakshi if salary of 1b and 2 can to transferred to level 2, or if to treat level 1b/2 have same salary
-# note that dental and radio cadre has 2 salary in hr_salary_per level
-# need to group level 1b and 2 capabilities
diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
new file mode 100644
index 0000000000..007d429c3c
--- /dev/null
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
@@ -0,0 +1,129 @@
+"""
+We calculate the salar cost of current and funded plus HCW.
+"""
+
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+resourcefilepath = Path('./resources')
+
+mfl = pd.read_csv(resourcefilepath / 'healthsystem' / 'organisation' / 'ResourceFile_Master_Facilities_List.csv')
+
+hr_salary = pd.read_csv(resourcefilepath /
+                        'costing' / 'ResourceFile_Annual_Salary_Per_Cadre.csv', index_col=False)
+hr_salary_per_level = pd.read_excel(resourcefilepath /
+                                    'costing' / 'ResourceFile_Costing.xlsx', sheet_name='human_resources')
+hr_current = pd.read_csv(resourcefilepath /
+                         'healthsystem' / 'human_resources' / 'actual' / 'ResourceFile_Daily_Capabilities.csv')
+hr_established = pd.read_csv(resourcefilepath /
+                             'healthsystem' / 'human_resources' / 'funded_plus' / 'ResourceFile_Daily_Capabilities.csv')
+
+# experiment ============================================================================
+# hr_curr_count = hr_current.groupby('Officer_Category').agg({'Staff_Count': 'sum'})
+# hr_estab_count = (hr_established.groupby('Officer_Category').agg({'Staff_Count': 'sum'}))
+#
+# hr = hr_curr_count.merge(hr_estab_count, on='Officer_Category', how='outer'
+#                          ).merge(hr_salary, on='Officer_Category', how='left')
+#
+# hr['total_curr_salary'] = hr['Staff_Count_x'] * hr['Annual_Salary_USD']
+# hr['total_estab_salary'] = hr['Staff_Count_y'] * hr['Annual_Salary_USD']
+#
+# total_curr_salary = hr['total_curr_salary'].sum()  # 107.82 million
+# total_estab_salary = hr['total_estab_salary'].sum()  # 201.36 million
+#
+# # now consider expanding establishment HCW
+# # assuming annual GDP growth rate is 4.2% and
+# # a fixed proportion of GDP is allocated to human resource expansion, thus assuming
+# # the annual growth rate of HR salary cost is also 4.2%.
+#
+# # the annual extra budget and
+# # if to expand one individual cadre in ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']
+# hr['extra_budget'] = total_estab_salary * 4.2 / 100  # 8.46 million
+# hr['individual_increase'] = np.floor(hr['extra_budget'] / hr['Annual_Salary_USD'])
+# # do not increase other cadres
+# for c in hr.Officer_Category:
+#     if c not in ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']:
+#         hr.loc[hr.Officer_Category == c, 'individual_increase'] = 0
+# hr['individual_scale_up_factor'] = (hr['individual_increase'] + hr['Staff_Count_y']) / hr['Staff_Count_y']
+# hr['individual_increase_%'] = hr['individual_increase'] * 100 / hr['Staff_Count_y']
+#
+# # if to expand multiple cadres in ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']
+# hr_expand = hr.loc[hr.Officer_Category.isin(['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']),
+#                    ['Officer_Category', 'individual_increase']].copy()
+# hr_expand['individual_increase_0'] = np.floor(hr_expand['individual_increase'] * 0)
+# hr_expand['individual_increase_10%'] = np.floor(hr_expand['individual_increase'] * 0.1)
+# hr_expand['individual_increase_20%'] = np.floor(hr_expand['individual_increase'] * 0.2)
+# hr_expand['individual_increase_30%'] = np.floor(hr_expand['individual_increase'] * 0.3)
+# hr_expand['individual_increase_40%'] = np.floor(hr_expand['individual_increase'] * 0.4)
+# hr_expand['individual_increase_50%'] = np.floor(hr_expand['individual_increase'] * 0.5)
+# hr_expand['individual_increase_60%'] = np.floor(hr_expand['individual_increase'] * 0.6)
+# hr_expand['individual_increase_70%'] = np.floor(hr_expand['individual_increase'] * 0.7)
+# hr_expand['individual_increase_80%'] = np.floor(hr_expand['individual_increase'] * 0.8)
+# hr_expand['individual_increase_90%'] = np.floor(hr_expand['individual_increase'] * 0.9)
+# hr_expand['individual_increase_1'] = np.floor(hr_expand['individual_increase'] * 1)
+#
+# hr_expand.drop(columns='individual_increase', inplace=True)
+# hr_expand.set_index('Officer_Category', inplace=True)
+#
+# c_array = hr_expand.loc['Clinical'].values
+# nm_array = hr_expand.loc['Nursing_and_Midwifery'].values
+# p_array = hr_expand.loc['Pharmacy'].values
+# d_array = hr_expand.loc['DCSA'].values
+# hr_meshgrid = np.array(
+#     np.meshgrid(c_array, nm_array, p_array, d_array)).T.reshape(-1, 4)
+#
+# hr_expand_scenario = pd.DataFrame({'Clinical': hr_meshgrid[:, 0],
+#                                    'Nursing_and_Midwifery': hr_meshgrid[:, 1],
+#                                    'Pharmacy': hr_meshgrid[:, 2],
+#                                    'DCSA': hr_meshgrid[:, 3]}).T
+#
+# hr_expand_salary = hr.loc[hr.Officer_Category.isin(['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']),
+#                           ['Officer_Category', 'Annual_Salary_USD']].copy()
+# hr_expand_salary.set_index('Officer_Category', inplace=True)
+#
+# hr_expand_scenario_cost = hr_expand_salary.merge(hr_expand_scenario, left_index=True, right_index=True)
+# hr_expand_scenario_cost.loc[:, hr_expand_scenario_cost.columns[1:]] = \
+#     hr_expand_scenario_cost.loc[:, hr_expand_scenario_cost.columns[1:]].multiply(
+#     hr_expand_scenario_cost.loc[:, hr_expand_scenario_cost.columns[0]], axis='index')
+# hr_expand_scenario_cost.loc['Total'] = hr_expand_scenario_cost.sum()
+# # hr_expand_scenario_cost.drop(columns=['Annual_Salary_USD'], inplace=True)
+#
+# cond = (hr_expand_scenario_cost.loc['Total'] <= total_estab_salary * 4.2 / 100)
+# hr_expand_scenario_budget = hr_expand_scenario_cost.loc[:, cond].copy()
+# hr_expand_scenario_budget.drop(index='Total', inplace=True)
+# hr_expand_scenario_budget.loc[:, hr_expand_scenario_budget.columns[1:]] = \
+#     hr_expand_scenario_budget.loc[:, hr_expand_scenario_budget.columns[1:]].div(
+#     hr_expand_scenario_budget.loc[:, hr_expand_scenario_budget.columns[0]], axis='index')
+# hr_expand_scenario_budget.loc['Total'] = hr_expand_scenario_budget.sum()
+
+# further reduce scenarios
+# to examine marginal impact of each cadre, do keep the individual increase (0%, 25%, 50%, 100%) of the four cadres
+# to examine combined impact of multiple cadres, do keep the increase of a cadre that is as large as possible
+# to do this selection in Excel
+
+# if the resulted 48 scenarios are too many, try reducing the individual increase of each cadre into 4 cases,
+# step by 1/3
+# if the resulted 29 scenarios are too many, try reducing the individual increase of each cadre into 3 cases,
+# step by 50%.
+# if try increasing the individual increase of each cadre into 6 cases, a step by 20%
+
+# experiment ends =====================================================================================================
+
+# to get minute salary per cadre per level
+Annual_PFT = hr_current.groupby(['Facility_Level', 'Officer_Category']).agg(
+    {'Total_Mins_Per_Day': 'sum', 'Staff_Count': 'sum'}).reset_index()
+Annual_PFT['Annual_Mins_Per_Staff'] = 365.25 * Annual_PFT['Total_Mins_Per_Day']/Annual_PFT['Staff_Count']
+
+# the hr salary per level might need update from Sakshi; use the average salary for now
+# hr_salary_per_level['Facility_Level'] = hr_salary_per_level['Facility_Level'].astype(str)
+# Minute_Salary = Annual_PFT.merge(hr_salary_per_level, on=['Facility_Level', 'Officer_Category'], how='outer')
+Minute_Salary = Annual_PFT.merge(hr_salary, on=['Officer_Category'], how='outer')
+Minute_Salary['Minute_Salary_USD'] = Minute_Salary['Annual_Salary_USD']/Minute_Salary['Annual_Mins_Per_Staff']
+Minute_Salary = Minute_Salary[['Facility_Level', 'Officer_Category', 'Minute_Salary_USD']].merge(
+    mfl[['Facility_Level', 'Facility_ID']], on=['Facility_Level'], how='outer'
+)
+Minute_Salary[['Facility_ID', 'Facility_Level', 'Officer_Category', 'Minute_Salary_USD']].to_csv(
+    resourcefilepath / 'costing' / 'Minute_Salary_HR.csv', index=False)
+

From f5e21965a999b31b38f34a8ec4c0f1df9824160d Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 18 Jul 2024 23:47:46 +0100
Subject: [PATCH 026/218] write the hr expansion function

---
 src/tlo/methods/healthsystem.py | 66 +++++++++++++++++++++++++++++----
 1 file changed, 58 insertions(+), 8 deletions(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index d888a34098..0cbd9d9af3 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -330,7 +330,7 @@ class HealthSystem(Module):
         ),
 
         'minute_salary': Parameter(
-            Types.DATA_FRAME, "This specifies the minute salary in USD per officer type per level."
+            Types.DATA_FRAME, "This specifies the minute salary in USD per officer type per facility id."
         ),
 
         'tclose_overwrite': Parameter(
@@ -657,6 +657,12 @@ def read_parameters(self, data_folder):
         self.parameters['minute_salary'] = pd.read_csv(
             Path(self.resourcefilepath) / 'costing' / 'Minute_Salary_HR.csv')
 
+        # Set default values for HR_expansion_by_officer_type, start_year_HR_expansion_by_officer_type,
+        # end_year_HR_expansion_by_officer_type
+        self.parameters['HR_expansion_by_officer_type'] = [0, 0, 0, 0]
+        self.parameters['start_year_HR_expansion_by_officer_type'] = 2020
+        self.parameters['end_year_HR_expansion_by_officer_type'] = 2030
+
     def pre_initialise_population(self):
         """Generate the accessory classes used by the HealthSystem and pass to them the data that has been read."""
 
@@ -2959,22 +2965,66 @@ def apply(self, population):
 
 class HRExpansionByOfficerType(Event, PopulationScopeEventMixin):
     """ This event exists to expand the HR by officer type (Clinical, DCSA, Nursing_and_Midwifery, Pharmacy)
-    given an extra budget."""
+    given an extra budget. This is done for daily capabilities, as a year consists of 365.25 equal days."""
     def __init__(self, module):
         super().__init__(module)
 
     def apply(self, population):
 
-        # get total minutes per cadre per facility id, for only the four cadres
-        # get total daily cost of last year = minutes per cadre per facility id * minute salary per cadre per level (summing up cadres and facility ids)
+        # get minute salary for the four cadres
+        minute_salary_by_officer_facility_id = self.module.parameters['minute_salary']
+
+        # get current daily minutes and format it to be consistent with minute salary
+        daily_minutes = pd.DataFrame(self.module._daily_capabilities).reset_index().rename(
+            columns={'index': 'facilityid_officer'})
+        daily_minutes[['Facility_ID', 'Officer_Type_Code']] = daily_minutes.facilityid_officer.str.split(
+            pat='_', n=3, expand=True)[[1, 3]]
+
+        # get daily cost per officer per facility id
+        daily_cost = minute_salary_by_officer_facility_id.merge(
+            daily_minutes, on=['Facility_ID', 'Officer_Type_Code'], how='outer')
+        daily_cost['Total_Cost_Per_Day'] = daily_cost['Minute_Salary_USD'] * daily_cost['Total_Minutes_Per_Day']
+
+        # get daily cost per officer type of the four cadres
+        daily_cost = daily_cost.groupby('Officer_Type_Code').agg({'Total_Cost_Per_Day': 'sum'}).reset_index()
+        daily_cost = daily_cost.loc[daily_cost.Officer_Type_Code.isin(
+            ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy'])]
+
+        # get total daily cost of each of the four cadres
+        total_cost_clinical = daily_cost.loc[daily_cost.Officer_Type_Code == 'Clinical', 'Total_Cost_Per_Day'].sum()
+        total_cost_dcsa = daily_cost.loc[daily_cost.Officer_Type_Code == 'DCSA', 'Total_Cost_Per_Day'].sum()
+        total_cost_nursing = daily_cost.loc[
+            daily_cost.Officer_Type_Code == 'Nursing_and_Midwifery', 'Total_Cost_Per_Day'].sum()
+        total_cost_pharmacy = daily_cost.loc[daily_cost.Officer_Type_Code == 'Pharmacy', 'Total_Cost_Per_Day'].sum()
+
         # get daily extra budget for this year = 4.2% * total cost
+        daily_extra_budget = 0.042*(total_cost_clinical + total_cost_dcsa + total_cost_nursing + total_cost_pharmacy)
+
+        # get proportional daily extra budget for each of the four cadres
+        daily_extra_budget_by_officer = daily_extra_budget * self.parameters['HR_expansion_by_officer_type']
 
-        # get proportional daily extra budget for each of the four cadre = extra budget * proportion of a cadre
         # get the scale up factor for each cadre, assumed to be the same for each facility id of that cadre
-        # r * minutes per cadre per facility id  * minute salary per cadre per facility id (summing up facility ids) = \
-        # minutes per cadre per facility id * minute salary per cadre per facility id (summing up facility ids) + proportional extra budget
+        sf_clinical = (total_cost_clinical + daily_extra_budget_by_officer[0])/total_cost_clinical
+        sf_dcsa = (total_cost_dcsa + daily_extra_budget_by_officer[1]) / total_cost_dcsa
+        sf_nursing = (total_cost_nursing + daily_extra_budget_by_officer[2]) / total_cost_nursing
+        sf_pharmacy = (total_cost_pharmacy + daily_extra_budget_by_officer[3]) / total_cost_pharmacy
 
-        # scale up the daily minutes per cadre per facility id, by multiplying the current values with r
+        # scale up the daily minutes per cadre per facility id
+        pattern = r"FacilityID_(\w+)_Officer_(\w+)"
+        for officer in self.module._daily_capabilities.keys():
+            matches = re.match(pattern, officer)
+            # Extract officer type
+            officer_type = matches.group(2)
+            if officer_type == 'Clinical':
+                self.module._daily_capabilities[officer] *= sf_clinical
+            elif officer_type == 'DCSA':
+                self.module._daily_capabilities[officer] *= sf_dcsa
+            elif officer_type == 'Nursing_and_Midwifery':
+                self.module._daily_capabilities[officer] *= sf_nursing
+            elif officer_type == 'Pharmacy':
+                self.module._daily_capabilities[officer] *= sf_pharmacy
+            else:
+                self.module._daily_capabilities[officer] *= 1
 
 
 class HealthSystemChangeMode(RegularEvent, PopulationScopeEventMixin):

From 937d33d5d291f26b4d67904023f0d09ca420d40a Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 18 Jul 2024 23:50:33 +0100
Subject: [PATCH 027/218] update the minute salary file

---
 ...nute_salary_by_officer_type_facility_id.py | 22 ++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
index 007d429c3c..1543599139 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
@@ -4,8 +4,9 @@
 
 from pathlib import Path
 
-import numpy as np
+# import numpy as np
 import pandas as pd
+# import re
 
 resourcefilepath = Path('./resources')
 
@@ -124,6 +125,21 @@
 Minute_Salary = Minute_Salary[['Facility_Level', 'Officer_Category', 'Minute_Salary_USD']].merge(
     mfl[['Facility_Level', 'Facility_ID']], on=['Facility_Level'], how='outer'
 )
-Minute_Salary[['Facility_ID', 'Facility_Level', 'Officer_Category', 'Minute_Salary_USD']].to_csv(
-    resourcefilepath / 'costing' / 'Minute_Salary_HR.csv', index=False)
+Minute_Salary.drop(columns=['Facility_Level'], inplace=True)
+Minute_Salary = Minute_Salary.fillna(0)
 
+# Minute_Salary = Minute_Salary.loc[Minute_Salary.Officer_Category.isin(
+#     ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy'])]
+# Minute_Salary = Minute_Salary.set_index(
+#     'FacilityID_'
+#     + Minute_Salary['Facility_ID'].astype(str)
+#     + '_Officer_'
+#     + Minute_Salary['Officer_Category']
+# )
+# Minute_Salary = Minute_Salary['Minute_Salary_USD']
+# Minute_Salary = pd.DataFrame(Minute_Salary).reset_index().rename(columns={'index': 'facilityid_officer'})
+# Minute_Salary[['Facility_ID', 'Officer_Type_Code']] = Minute_Salary.facilityid_officer.str.split(
+#     pat='_', n=3, expand=True)[[1, 3]]
+
+Minute_Salary.rename(columns={'Officer_Category': 'Officer_Type_Code'}, inplace=True)
+Minute_Salary.to_csv(resourcefilepath / 'costing' / 'Minute_Salary_HR.csv', index=False)

From 71dc92c0b2483df4df1895bb534eb10df12a3894 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 18 Jul 2024 23:56:31 +0100
Subject: [PATCH 028/218] save the minute salary data

---
 resources/costing/Minute_Salary_HR.csv | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 resources/costing/Minute_Salary_HR.csv

diff --git a/resources/costing/Minute_Salary_HR.csv b/resources/costing/Minute_Salary_HR.csv
new file mode 100644
index 0000000000..0e248a312f
--- /dev/null
+++ b/resources/costing/Minute_Salary_HR.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23d959dabe6cfc86ff2604a3a01298cccf00ef3587a15afaf4e487c06d3b9df0
+size 35276

From 50df28a03521a5f0318cb825e627410a36e02939 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 5 Aug 2024 10:41:32 +0100
Subject: [PATCH 029/218] calculate the current cost distribution of the four
 cadres

---
 ...reate_hr_minute_salary_by_officer_type_facility_id.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
index 1543599139..58a0477e72 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
@@ -143,3 +143,12 @@
 
 Minute_Salary.rename(columns={'Officer_Category': 'Officer_Type_Code'}, inplace=True)
 Minute_Salary.to_csv(resourcefilepath / 'costing' / 'Minute_Salary_HR.csv', index=False)
+
+# calculate the current cost distribution of the four cadres
+staff_count = hr_current.groupby('Officer_Category')['Staff_Count'].sum().reset_index()
+staff_cost = staff_count.merge(hr_salary, on=['Officer_Category'], how='outer')
+staff_cost['annual_cost'] = staff_cost['Staff_Count'] * staff_cost['Annual_Salary_USD']
+four_cadres_cost = staff_cost.loc[
+    staff_cost.Officer_Category.isin(['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy'])].reset_index(drop=True)
+four_cadres_cost['cost_frac'] = four_cadres_cost['annual_cost'] / four_cadres_cost['annual_cost'].sum()
+assert four_cadres_cost.cost_frac.sum() == 1

From 232b2ab3aa0f8a1e8cd1bdcdfb17d781180602b6 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 5 Aug 2024 12:48:44 +0100
Subject: [PATCH 030/218] create the resource file for hr expansion scenarios
 base on fractions of extra budget

---
 ...rceFile_HR_expansion_by_officer_type_given_extra_budget.csv | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv

diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv
new file mode 100644
index 0000000000..71dddf055c
--- /dev/null
+++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93e647b3e6973976f65e0a91124655fd38633207d0424f01cc87b62ff4da9eb4
+size 490

From 9e90e83a74110f792c4d7153e32599b5403c37e0 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 5 Aug 2024 12:54:53 +0100
Subject: [PATCH 031/218] update resource file

---
 ...ceFile_HR_expansion_by_officer_type_given_extra_budget.csv | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv
index 71dddf055c..265e58141d 100644
--- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv
+++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:93e647b3e6973976f65e0a91124655fd38633207d0424f01cc87b62ff4da9eb4
-size 490
+oid sha256:1f910b61227901d43dbf8c1a5101dfc4490206ec23381715b546db14e267ac25
+size 363

From 0f44842075b0e156e1b2caaa4775581457c22ebc Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 5 Aug 2024 13:15:21 +0100
Subject: [PATCH 032/218] update scenario file

---
 .../scenario_of_expanding_funed_plus_hcw.py   | 172 ++----------------
 1 file changed, 13 insertions(+), 159 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
index 6d491da3a7..31fba3fc92 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
@@ -22,13 +22,15 @@
 from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher
 from tlo.scenario import BaseScenario
 
+import pandas as pd
+
 
 class LongRun(BaseScenario):
     def __init__(self):
         super().__init__()
         self.seed = 0
         self.start_date = Date(2010, 1, 1)
-        self.end_date = Date(2025, 1, 1)  # todo: TBC
+        self.end_date = Date(2030, 1, 1)
         self.pop_size = 20_000  # todo: TBC
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
@@ -54,173 +56,25 @@ def modules(self):
     def draw_parameters(self, draw_number, rng):
         return list(self._scenarios.values())[draw_number]
 
-    def _get_scenarios(self) -> Dict[str, Dict]:  # todo: create many scenarios of expanding HCW (C, NM, P, DCSA)
+    def _get_scenarios(self) -> Dict[str, Dict]:
         """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario."""
 
-        self.YEAR_OF_CHANGE = 2020  # This is the year to change HR scaling mode.
-        # Year 2030 is when the Establishment HCW will be met as estimated by Berman 2022.
-        # But it can be 2020, or 2019, to reduce running time (2010-2030 instead of 2010-2040).
-
-        return {
-            "Establishment HCW":
-                mix_scenarios(
-                    self._baseline(),
-                    {'HealthSystem': {
-                        'HR_scaling_by_level_and_officer_type_mode': 'default',
-                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
-                    }
-                    }
-                ),
-
-            "Establishment HCW Expansion C1":
-                mix_scenarios(
-                    self._baseline(),
-                    {'HealthSystem': {
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c1',
-                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
-                    }
-                    }
-                ),
-
-            "Establishment HCW Expansion C2":
-                mix_scenarios(
-                    self._baseline(),
-                    {'HealthSystem': {
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c2',
-                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
-                    }
-                    }
-                ),
-
-            "Establishment HCW Expansion C3":
-                mix_scenarios(
-                    self._baseline(),
-                    {'HealthSystem': {
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c3',
-                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
-                    }
-                    }
-                ),
-
-            "Establishment HCW Expansion P1":
-                mix_scenarios(
-                    self._baseline(),
-                    {'HealthSystem': {
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_p1',
-                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
-                    }
-                    }
-                ),
-
-            "Establishment HCW Expansion P2":
-                mix_scenarios(
-                    self._baseline(),
-                    {'HealthSystem': {
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_p2',
-                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
-                    }
-                    }
-                ),
-
-            "Establishment HCW Expansion P3":
-                mix_scenarios(
-                    self._baseline(),
-                    {'HealthSystem': {
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_p3',
-                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
-                    }
-                    }
-                ),
-
-            "Establishment HCW Expansion C1P1":
-                mix_scenarios(
-                    self._baseline(),
-                    {'HealthSystem': {
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c1p1',
-                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
-                    }
-                    }
-                ),
-
-            "Establishment HCW Expansion C2P1":
-                mix_scenarios(
-                    self._baseline(),
-                    {'HealthSystem': {
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c2p1',
-                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
-                    }
-                    }
-                ),
-
-            "Establishment HCW Expansion C3P1":
-                mix_scenarios(
-                    self._baseline(),
-                    {'HealthSystem': {
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c3p1',
-                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
-                    }
-                    }
-                ),
-
-            "Establishment HCW Expansion C1P2":
-                mix_scenarios(
-                    self._baseline(),
-                    {'HealthSystem': {
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c1p2',
-                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
-                    }
-                    }
-                ),
-
-            "Establishment HCW Expansion C2P2":
-                mix_scenarios(
-                    self._baseline(),
-                    {'HealthSystem': {
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c2p2',
-                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
-                    }
-                    }
-                ),
+        self.YEAR_OF_CHANGE = 2020  # This is the year to change run settings and to start hr expansion.
 
-            "Establishment HCW Expansion C3P2":
-                mix_scenarios(
-                    self._baseline(),
-                    {'HealthSystem': {
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c3p2',
-                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
-                    }
-                    }
-                ),
+        self.scenarios = pd.read_csv(Path('./resources')
+                                     / 'healthsystem' / 'human_resources' / 'scaling_capabilities'
+                                     / 'ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv'
+                                     ).set_index('Officer_Category')  # do we need 'self' or not?
 
-            "Establishment HCW Expansion C1P3":
-                mix_scenarios(
-                    self._baseline(),
-                    {'HealthSystem': {
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c1p3',
-                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
-                    }
-                    }
-                ),
-
-            "Establishment HCW Expansion C2P3":
-                mix_scenarios(
-                    self._baseline(),
-                    {'HealthSystem': {
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c2p3',
-                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
-                    }
-                    }
-                ),
-
-            "Establishment HCW Expansion C3P3":
+        return {
+            self.scenarios.columns[i]:
                 mix_scenarios(
                     self._baseline(),
                     {'HealthSystem': {
-                        'HR_scaling_by_level_and_officer_type_mode': 'expand_funded_c3p3',
-                        'year_HR_scaling_by_level_and_officer_type': self.YEAR_OF_CHANGE
+                        'HR_expansion_by_officer_type': list(self.scenarios.iloc[:, i])
                     }
                     }
-                ),
+                ) for i in range(len(self.scenarios.columns))
         }
 
     def _baseline(self) -> Dict:

From f3bbaa60b6e31b8a0792845fd4d265be7b840f8c Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 5 Aug 2024 13:17:27 +0100
Subject: [PATCH 033/218] todo in test_healthsystem

---
 tests/test_healthsystem.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py
index ae212a4f48..c6fdafcf73 100644
--- a/tests/test_healthsystem.py
+++ b/tests/test_healthsystem.py
@@ -2517,3 +2517,7 @@ def run_sim(dynamic_HR_scaling_factor: Dict[int, float]) -> tuple:
     ratio_in_sim = caps / initial_caps
 
     assert np.allclose(ratio_in_sim, expected_overall_scaling)
+
+
+# todo: test the function of HR expansion by officer type
+# def test_HR_expansion_by_officer_type(seed, tmpdir):

From fece9acef2b104ead9e6093307e9346ba01c2513 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 5 Aug 2024 13:31:11 +0100
Subject: [PATCH 034/218] clean the data preparation file

---
 ...nute_salary_by_officer_type_facility_id.py | 110 +-----------------
 1 file changed, 2 insertions(+), 108 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
index 58a0477e72..654964727b 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
@@ -21,105 +21,12 @@
 hr_established = pd.read_csv(resourcefilepath /
                              'healthsystem' / 'human_resources' / 'funded_plus' / 'ResourceFile_Daily_Capabilities.csv')
 
-# experiment ============================================================================
-# hr_curr_count = hr_current.groupby('Officer_Category').agg({'Staff_Count': 'sum'})
-# hr_estab_count = (hr_established.groupby('Officer_Category').agg({'Staff_Count': 'sum'}))
-#
-# hr = hr_curr_count.merge(hr_estab_count, on='Officer_Category', how='outer'
-#                          ).merge(hr_salary, on='Officer_Category', how='left')
-#
-# hr['total_curr_salary'] = hr['Staff_Count_x'] * hr['Annual_Salary_USD']
-# hr['total_estab_salary'] = hr['Staff_Count_y'] * hr['Annual_Salary_USD']
-#
-# total_curr_salary = hr['total_curr_salary'].sum()  # 107.82 million
-# total_estab_salary = hr['total_estab_salary'].sum()  # 201.36 million
-#
-# # now consider expanding establishment HCW
-# # assuming annual GDP growth rate is 4.2% and
-# # a fixed proportion of GDP is allocated to human resource expansion, thus assuming
-# # the annual growth rate of HR salary cost is also 4.2%.
-#
-# # the annual extra budget and
-# # if to expand one individual cadre in ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']
-# hr['extra_budget'] = total_estab_salary * 4.2 / 100  # 8.46 million
-# hr['individual_increase'] = np.floor(hr['extra_budget'] / hr['Annual_Salary_USD'])
-# # do not increase other cadres
-# for c in hr.Officer_Category:
-#     if c not in ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']:
-#         hr.loc[hr.Officer_Category == c, 'individual_increase'] = 0
-# hr['individual_scale_up_factor'] = (hr['individual_increase'] + hr['Staff_Count_y']) / hr['Staff_Count_y']
-# hr['individual_increase_%'] = hr['individual_increase'] * 100 / hr['Staff_Count_y']
-#
-# # if to expand multiple cadres in ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']
-# hr_expand = hr.loc[hr.Officer_Category.isin(['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']),
-#                    ['Officer_Category', 'individual_increase']].copy()
-# hr_expand['individual_increase_0'] = np.floor(hr_expand['individual_increase'] * 0)
-# hr_expand['individual_increase_10%'] = np.floor(hr_expand['individual_increase'] * 0.1)
-# hr_expand['individual_increase_20%'] = np.floor(hr_expand['individual_increase'] * 0.2)
-# hr_expand['individual_increase_30%'] = np.floor(hr_expand['individual_increase'] * 0.3)
-# hr_expand['individual_increase_40%'] = np.floor(hr_expand['individual_increase'] * 0.4)
-# hr_expand['individual_increase_50%'] = np.floor(hr_expand['individual_increase'] * 0.5)
-# hr_expand['individual_increase_60%'] = np.floor(hr_expand['individual_increase'] * 0.6)
-# hr_expand['individual_increase_70%'] = np.floor(hr_expand['individual_increase'] * 0.7)
-# hr_expand['individual_increase_80%'] = np.floor(hr_expand['individual_increase'] * 0.8)
-# hr_expand['individual_increase_90%'] = np.floor(hr_expand['individual_increase'] * 0.9)
-# hr_expand['individual_increase_1'] = np.floor(hr_expand['individual_increase'] * 1)
-#
-# hr_expand.drop(columns='individual_increase', inplace=True)
-# hr_expand.set_index('Officer_Category', inplace=True)
-#
-# c_array = hr_expand.loc['Clinical'].values
-# nm_array = hr_expand.loc['Nursing_and_Midwifery'].values
-# p_array = hr_expand.loc['Pharmacy'].values
-# d_array = hr_expand.loc['DCSA'].values
-# hr_meshgrid = np.array(
-#     np.meshgrid(c_array, nm_array, p_array, d_array)).T.reshape(-1, 4)
-#
-# hr_expand_scenario = pd.DataFrame({'Clinical': hr_meshgrid[:, 0],
-#                                    'Nursing_and_Midwifery': hr_meshgrid[:, 1],
-#                                    'Pharmacy': hr_meshgrid[:, 2],
-#                                    'DCSA': hr_meshgrid[:, 3]}).T
-#
-# hr_expand_salary = hr.loc[hr.Officer_Category.isin(['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA']),
-#                           ['Officer_Category', 'Annual_Salary_USD']].copy()
-# hr_expand_salary.set_index('Officer_Category', inplace=True)
-#
-# hr_expand_scenario_cost = hr_expand_salary.merge(hr_expand_scenario, left_index=True, right_index=True)
-# hr_expand_scenario_cost.loc[:, hr_expand_scenario_cost.columns[1:]] = \
-#     hr_expand_scenario_cost.loc[:, hr_expand_scenario_cost.columns[1:]].multiply(
-#     hr_expand_scenario_cost.loc[:, hr_expand_scenario_cost.columns[0]], axis='index')
-# hr_expand_scenario_cost.loc['Total'] = hr_expand_scenario_cost.sum()
-# # hr_expand_scenario_cost.drop(columns=['Annual_Salary_USD'], inplace=True)
-#
-# cond = (hr_expand_scenario_cost.loc['Total'] <= total_estab_salary * 4.2 / 100)
-# hr_expand_scenario_budget = hr_expand_scenario_cost.loc[:, cond].copy()
-# hr_expand_scenario_budget.drop(index='Total', inplace=True)
-# hr_expand_scenario_budget.loc[:, hr_expand_scenario_budget.columns[1:]] = \
-#     hr_expand_scenario_budget.loc[:, hr_expand_scenario_budget.columns[1:]].div(
-#     hr_expand_scenario_budget.loc[:, hr_expand_scenario_budget.columns[0]], axis='index')
-# hr_expand_scenario_budget.loc['Total'] = hr_expand_scenario_budget.sum()
-
-# further reduce scenarios
-# to examine marginal impact of each cadre, do keep the individual increase (0%, 25%, 50%, 100%) of the four cadres
-# to examine combined impact of multiple cadres, do keep the increase of a cadre that is as large as possible
-# to do this selection in Excel
-
-# if the resulted 48 scenarios are too many, try reducing the individual increase of each cadre into 4 cases,
-# step by 1/3
-# if the resulted 29 scenarios are too many, try reducing the individual increase of each cadre into 3 cases,
-# step by 50%.
-# if try increasing the individual increase of each cadre into 6 cases, a step by 20%
-
-# experiment ends =====================================================================================================
-
 # to get minute salary per cadre per level
 Annual_PFT = hr_current.groupby(['Facility_Level', 'Officer_Category']).agg(
     {'Total_Mins_Per_Day': 'sum', 'Staff_Count': 'sum'}).reset_index()
 Annual_PFT['Annual_Mins_Per_Staff'] = 365.25 * Annual_PFT['Total_Mins_Per_Day']/Annual_PFT['Staff_Count']
 
-# the hr salary per level might need update from Sakshi; use the average salary for now
-# hr_salary_per_level['Facility_Level'] = hr_salary_per_level['Facility_Level'].astype(str)
-# Minute_Salary = Annual_PFT.merge(hr_salary_per_level, on=['Facility_Level', 'Officer_Category'], how='outer')
+# the hr salary by minute and facility id
 Minute_Salary = Annual_PFT.merge(hr_salary, on=['Officer_Category'], how='outer')
 Minute_Salary['Minute_Salary_USD'] = Minute_Salary['Annual_Salary_USD']/Minute_Salary['Annual_Mins_Per_Staff']
 Minute_Salary = Minute_Salary[['Facility_Level', 'Officer_Category', 'Minute_Salary_USD']].merge(
@@ -127,21 +34,8 @@
 )
 Minute_Salary.drop(columns=['Facility_Level'], inplace=True)
 Minute_Salary = Minute_Salary.fillna(0)
-
-# Minute_Salary = Minute_Salary.loc[Minute_Salary.Officer_Category.isin(
-#     ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy'])]
-# Minute_Salary = Minute_Salary.set_index(
-#     'FacilityID_'
-#     + Minute_Salary['Facility_ID'].astype(str)
-#     + '_Officer_'
-#     + Minute_Salary['Officer_Category']
-# )
-# Minute_Salary = Minute_Salary['Minute_Salary_USD']
-# Minute_Salary = pd.DataFrame(Minute_Salary).reset_index().rename(columns={'index': 'facilityid_officer'})
-# Minute_Salary[['Facility_ID', 'Officer_Type_Code']] = Minute_Salary.facilityid_officer.str.split(
-#     pat='_', n=3, expand=True)[[1, 3]]
-
 Minute_Salary.rename(columns={'Officer_Category': 'Officer_Type_Code'}, inplace=True)
+
 Minute_Salary.to_csv(resourcefilepath / 'costing' / 'Minute_Salary_HR.csv', index=False)
 
 # calculate the current cost distribution of the four cadres

From f107449a20ac5803f537a4227a7e65f032e80e0c Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 5 Aug 2024 13:36:47 +0100
Subject: [PATCH 035/218] fix checks failure

---
 .../scenario_of_expanding_funed_plus_hcw.py                   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
index 31fba3fc92..b4eab86196 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
@@ -16,14 +16,14 @@
 from pathlib import Path
 from typing import Dict
 
+import pandas as pd
+
 from tlo import Date, logging
 from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios
 from tlo.methods.fullmodel import fullmodel
 from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher
 from tlo.scenario import BaseScenario
 
-import pandas as pd
-
 
 class LongRun(BaseScenario):
     def __init__(self):

From bf2bceafd4a66d4156198c43889b62a9c2680e48 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 5 Aug 2024 13:47:59 +0100
Subject: [PATCH 036/218] fix checks failure

---
 .../create_hr_minute_salary_by_officer_type_facility_id.py     | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
index 654964727b..ded61ec72a 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
@@ -4,9 +4,8 @@
 
 from pathlib import Path
 
-# import numpy as np
 import pandas as pd
-# import re
+
 
 resourcefilepath = Path('./resources')
 

From 8a14ec0811b7b54cf9e37ad86d2363976d6d9b8f Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 5 Aug 2024 13:52:16 +0100
Subject: [PATCH 037/218] fix checks failure

---
 .../create_hr_minute_salary_by_officer_type_facility_id.py       | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
index ded61ec72a..ac2499c2b1 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
@@ -6,7 +6,6 @@
 
 import pandas as pd
 
-
 resourcefilepath = Path('./resources')
 
 mfl = pd.read_csv(resourcefilepath / 'healthsystem' / 'organisation' / 'ResourceFile_Master_Facilities_List.csv')

From 9abbe4348ed28b93a07af0126dfec9e2c5340fa8 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 5 Aug 2024 14:03:25 +0100
Subject: [PATCH 038/218] fix checks failure

---
 src/tlo/methods/healthsystem.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 0b9ccc6492..aebda58718 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -3010,7 +3010,7 @@ def apply(self, population):
         daily_extra_budget = 0.042*(total_cost_clinical + total_cost_dcsa + total_cost_nursing + total_cost_pharmacy)
 
         # get proportional daily extra budget for each of the four cadres
-        daily_extra_budget_by_officer = daily_extra_budget * self.parameters['HR_expansion_by_officer_type']
+        daily_extra_budget_by_officer = daily_extra_budget * self.module.parameters['HR_expansion_by_officer_type']
 
         # get the scale up factor for each cadre, assumed to be the same for each facility id of that cadre
         sf_clinical = (total_cost_clinical + daily_extra_budget_by_officer[0])/total_cost_clinical

From d1a7823910e9de75587c26c16348090f392afda5 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 5 Aug 2024 14:25:14 +0100
Subject: [PATCH 039/218] renaming the scenario file

---
 ... scenario_of_expanding_current_hcw_with_extra_budget.py} | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
 rename src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/{scenario_of_expanding_funed_plus_hcw.py => scenario_of_expanding_current_hcw_with_extra_budget.py} (96%)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
similarity index 96%
rename from src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
rename to src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
index b4eab86196..36666c447b 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
@@ -4,12 +4,14 @@
 
 Run on the batch system using:
 ```
-tlo batch-submit src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
+tlo batch-submit src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/
+scenario_of_expanding_current_hcw_with_extra_budget.py
 ```
 
 or locally using:
 ```
-tlo scenario-run src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_funed_plus_hcw.py
+tlo scenario-run src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/
+scenario_of_expanding_current_hcw_with_extra_budget.py
 ```
 """
 

From 97869b0219c0a5c1476ef0e3f58706d89ec492ac Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 5 Aug 2024 21:36:57 +0100
Subject: [PATCH 040/218] fix failing tests

---
 src/tlo/methods/healthsystem.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index aebda58718..a32a11a403 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -2988,6 +2988,7 @@ def apply(self, population):
             columns={'index': 'facilityid_officer'})
         daily_minutes[['Facility_ID', 'Officer_Type_Code']] = daily_minutes.facilityid_officer.str.split(
             pat='_', n=3, expand=True)[[1, 3]]
+        daily_minutes['Facility_ID'] = daily_minutes['Facility_ID'].astype(int)
 
         # get daily cost per officer per facility id
         daily_cost = minute_salary_by_officer_facility_id.merge(
@@ -3010,7 +3011,8 @@ def apply(self, population):
         daily_extra_budget = 0.042*(total_cost_clinical + total_cost_dcsa + total_cost_nursing + total_cost_pharmacy)
 
         # get proportional daily extra budget for each of the four cadres
-        daily_extra_budget_by_officer = daily_extra_budget * self.module.parameters['HR_expansion_by_officer_type']
+        daily_extra_budget_by_officer = [
+            daily_extra_budget * i for i in self.module.parameters['HR_expansion_by_officer_type']]
 
         # get the scale up factor for each cadre, assumed to be the same for each facility id of that cadre
         sf_clinical = (total_cost_clinical + daily_extra_budget_by_officer[0])/total_cost_clinical

From c3d04d0544dce3d36f01d06d236505b3fc1434a9 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 6 Aug 2024 14:28:37 +0100
Subject: [PATCH 041/218] creat the test of the hr expansion function

---
 tests/test_healthsystem.py | 82 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py
index 6568d1df56..6dab2c0654 100644
--- a/tests/test_healthsystem.py
+++ b/tests/test_healthsystem.py
@@ -2570,3 +2570,85 @@ def get_capabilities(yearly_scaling: bool, scaling_by_level: bool, rescaling: bo
     caps_scaling_by_both_with_rescaling = get_capabilities(yearly_scaling=True, scaling_by_level=True, rescaling=True)
     assert caps_scaling_by_both_with_rescaling > caps_only_scaling_by_level_with_rescaling
     assert caps_scaling_by_both_with_rescaling > caps_only_scaling_by_year_with_rescaling
+
+
+def test_HR_expansion_by_officer_type(seed, tmpdir):
+    """Check that we can use the parameter `HR_expansion_by_officer_type` to update the minutes of time available
+    for healthcare workers."""
+
+    def get_initial_capabilities() -> pd.DataFrame:
+        sim = Simulation(start_date=start_date, seed=seed)
+        sim.register(
+            demography.Demography(resourcefilepath=resourcefilepath),
+            healthsystem.HealthSystem(resourcefilepath=resourcefilepath)
+        )
+        popsize=100
+        sim.make_initial_population(n=popsize)
+        sim.simulate(end_date=start_date + pd.DateOffset(days=0))
+
+        caps = pd.DataFrame(sim.modules['HealthSystem'].capabilities_today)
+        caps = caps[caps != 0]
+
+        return caps
+
+    def get_capabilities_after_update(end_year, HR_expansion_by_officer_type) -> pd.Series:
+        sim = Simulation(start_date=start_date, seed=seed)
+        sim.register(
+            demography.Demography(resourcefilepath=resourcefilepath),
+            healthsystem.HealthSystem(resourcefilepath=resourcefilepath),
+            simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath),
+
+        )
+        params = sim.modules['HealthSystem'].parameters
+        params['start_year_HR_expansion_by_officer_type'] = 2011
+        params['end_year_HR_expansion_by_officer_type'] = end_year
+        params['HR_expansion_by_officer_type'] = HR_expansion_by_officer_type
+
+        popsize = 100
+        sim.make_initial_population(n=popsize)
+
+        sim.simulate(end_date=Date(end_year, 1, 2))
+
+        caps = pd.DataFrame(sim.modules['HealthSystem'].capabilities_today)
+        caps = caps[caps != 0]
+
+        return caps
+
+    initial_caps = get_initial_capabilities()
+    caps_clinical_one_update = get_capabilities_after_update(2012, [1, 0, 0, 0])
+    caps_clinical_dcsa_one_update = get_capabilities_after_update(2012, [0.5, 0.5, 0, 0])
+    caps_clinical_two_updates = get_capabilities_after_update(2013, [1, 0, 0, 0])
+
+    # check that the cadres are expanded as expected
+    def compare(cadre, caps_1, caps_2) -> tuple:
+
+        assert (caps_1.index == caps_2.index).all()
+        comp_caps_0 = caps_1.merge(caps_2, left_index=True, right_index=True)
+        comp_caps_0 = comp_caps_0[comp_caps_0.index.str.contains(cadre, regex=True)]
+        ratio = (comp_caps_0.iloc[:, 1] / comp_caps_0.iloc[:, 0]).dropna()
+
+        return (ratio > 1).all(), (abs(ratio - ratio.unique()[0]) < 1e-6).all()
+
+    # initial_caps vs caps_clinical_one_update
+    # check if the clinical cadre of each facility id is expanded
+    assert compare('Clinical', initial_caps, caps_clinical_one_update)[0]
+    # check if the cadre is expanded by the same ratio of each facilty id
+    assert compare('Clinical', initial_caps, caps_clinical_one_update)[1]
+
+    # caps_clinical_one_update vs caps_clinical_two_updates
+    # check if the clinical cadre of each facility id is expanded more in the latter scenario with two updates
+    assert compare('Clinical', caps_clinical_one_update, caps_clinical_two_updates)[0]
+    # check if the cadre is expanded by the same ratio of each facilty id
+    assert compare('Clinical', caps_clinical_one_update, caps_clinical_two_updates)[1]
+
+    # initial_caps vs caps_clinical_dcsa_one_update
+    # check if the DCSA cadre of each facility id is expanded
+    assert compare('DCSA', initial_caps, caps_clinical_dcsa_one_update)[0]
+    # check if the cadre is expanded by the same ratio of each facilty id
+    assert compare('DCSA', initial_caps, caps_clinical_dcsa_one_update)[1]
+
+    # caps_clinical_one_update vs caps_clinical_dcsa_one_update
+    # check if the cadre of each facility id is expanded less in the latter scenario with a smaller frac of extra budget
+    assert compare('Clinical', caps_clinical_dcsa_one_update, caps_clinical_one_update)[0]
+    # check if the cadre is expanded by the same ratio of each facilty id
+    assert compare('Clinical', caps_clinical_dcsa_one_update, caps_clinical_one_update)[1]

From bc6d4290069d18aa6520016dc8aec4c835dc4ce3 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 13 Aug 2024 00:41:32 +0100
Subject: [PATCH 042/218] calculate scale up factor outside healthsystem module

---
 ...nute_salary_by_officer_type_facility_id.py | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
index ac2499c2b1..7fc57c4888 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
@@ -44,3 +44,47 @@
     staff_cost.Officer_Category.isin(['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy'])].reset_index(drop=True)
 four_cadres_cost['cost_frac'] = four_cadres_cost['annual_cost'] / four_cadres_cost['annual_cost'].sum()
 assert four_cadres_cost.cost_frac.sum() == 1
+
+
+# calculate hr scale up factor for years 2020-2030 (10 years in total) outside the healthsystem module
+
+def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFrame:
+    """This function calculates the yearly hr scale up factor for Clinical, DCSA, Nursing_and_Midwifery,
+    and Pharmacy cadres for a year yr, given a fraction of an extra budget allocated to each cadre and
+    a yearly budget growth rate of 4.2%.
+    Parameter extra_budget_frac (list) is a list of four floats, representing the fractions.
+    Parameter yr (int) is a year between 2020 and 2030.
+    Parameter scenario (string) is a column name in the extra budget fractions resource file.
+    Output dataframe stores scale up factors and relevant for the year yr.
+    """
+    # get data of previous year
+    prev_year = yr - 1
+    prev_data = scale_up_factor_dict[scenario][prev_year].copy()
+
+    # calculate and update scale_up_factor
+    prev_data['extra_budget_frac'] = extra_budget_frac
+    prev_data['extra_budget'] = 0.042 * prev_data.annual_cost.sum() * prev_data.extra_budget_frac
+    prev_data['extra_staff'] = prev_data.extra_budget / prev_data.Annual_Salary_USD
+    prev_data['scale_up_factor'] = (prev_data.Staff_Count + prev_data.extra_staff) / prev_data.Staff_Count
+
+    # store the updated data for the year yr
+    new_data = prev_data[['Officer_Category', 'Annual_Salary_USD', 'scale_up_factor']].copy()
+    new_data['Staff_Count'] = prev_data.Staff_Count + prev_data.extra_staff
+    new_data['annual_cost'] = prev_data.annual_cost + prev_data.extra_budget
+
+    return new_data
+
+
+# calculate scale up factors for all defined scenarios and years
+extra_budget_frac_data = pd.read_csv(resourcefilepath
+                                     / 'healthsystem' / 'human_resources' / 'scaling_capabilities'
+                                     / 'ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv'
+                                     ).set_index('Officer_Category')
+four_cadres_cost['scale_up_factor'] = 1
+scale_up_factor_dict = {s: {y: {} for y in range(2019, 2030)} for s in extra_budget_frac_data.columns}
+for s in extra_budget_frac_data.columns:
+    # for the initial/current year of 2019
+    scale_up_factor_dict[s][2019] = four_cadres_cost.drop(columns='cost_frac').copy()
+    # for the years with scaled up hr
+    for y in range(2020, 2030):
+        scale_up_factor_dict[s][y] = calculate_hr_scale_up_factor(list(extra_budget_frac_data[s]), y, s)

From 7dfc7f3c517db86a61d55ec0d78fe040656c7298 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 13 Aug 2024 12:50:06 +0100
Subject: [PATCH 043/218] local run that takes 12 hours

---
 ...ario_of_expanding_current_hcw_with_extra_budget.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
index 36666c447b..8ca41fcf61 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
@@ -10,8 +10,7 @@
 
 or locally using:
 ```
-tlo scenario-run src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/
-scenario_of_expanding_current_hcw_with_extra_budget.py
+tlo scenario-run src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
 ```
 """
 
@@ -32,11 +31,11 @@ def __init__(self):
         super().__init__()
         self.seed = 0
         self.start_date = Date(2010, 1, 1)
-        self.end_date = Date(2030, 1, 1)
-        self.pop_size = 20_000  # todo: TBC
+        self.end_date = Date(2023, 1, 1)
+        self.pop_size = 100  # todo: TBC
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 10  # todo: TBC
+        self.runs_per_draw = 2  # todo: TBC
 
     def log_configuration(self):
         return {
@@ -76,7 +75,7 @@ def _get_scenarios(self) -> Dict[str, Dict]:
                         'HR_expansion_by_officer_type': list(self.scenarios.iloc[:, i])
                     }
                     }
-                ) for i in range(len(self.scenarios.columns))
+                ) for i in range(len(self.scenarios.columns) - 14)
         }
 
     def _baseline(self) -> Dict:

From 4eb3b08cdc9dbb7cb2d32edcf15acee98cdb82f8 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 13 Aug 2024 13:49:41 +0100
Subject: [PATCH 044/218] save and read pickle file

---
 ...e_hr_minute_salary_by_officer_type_facility_id.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
index 7fc57c4888..e81a8b82f3 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
@@ -1,7 +1,7 @@
 """
 We calculate the salar cost of current and funded plus HCW.
 """
-
+import pickle
 from pathlib import Path
 
 import pandas as pd
@@ -88,3 +88,13 @@ def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFram
     # for the years with scaled up hr
     for y in range(2020, 2030):
         scale_up_factor_dict[s][y] = calculate_hr_scale_up_factor(list(extra_budget_frac_data[s]), y, s)
+
+# save and read pickle file
+pickle_file_path = Path(resourcefilepath / 'healthsystem' / 'human_resources' / 'scaling_capabilities' /
+                        'ResourceFile_HR_expansion_by_officer_type_yearly_scale_up_factors.pickle')
+
+with open(pickle_file_path, 'wb') as f:
+    pickle.dump(scale_up_factor_dict, f)
+
+with open(pickle_file_path, 'rb') as f:
+    x = pickle.load(f)

From 37f39e589a2e3d9326aee12c90fb9fa38a9af83b Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 13 Aug 2024 14:59:45 +0100
Subject: [PATCH 045/218] create logger for hr scale up factor

---
 src/tlo/methods/healthsystem.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index a32a11a403..780fc44389 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -3037,6 +3037,17 @@ def apply(self, population):
             else:
                 self.module._daily_capabilities[officer] *= 1
 
+        # save the scale up factor into logger
+        logger_summary.info(key='HRScaling',
+                            description='The HR scale up factor by office type - '
+                                        'Clinical, DCSA, Nursing_and_Midwifery, Pharmacy - '
+                                        'given fractions of an extra budget',
+                            data={
+                                'Scale up factor': [sf_clinical, sf_dcsa, sf_nursing, sf_pharmacy],
+                                'Year of scaling up': self.sim.date.year,
+                            }
+                            )
+
 
 class HealthSystemChangeMode(RegularEvent, PopulationScopeEventMixin):
     """ This event exists to change the priority policy adopted by the

From 3b9ba9ab25e28c43a6c40a22c63697463b34757c Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 13 Aug 2024 15:01:51 +0100
Subject: [PATCH 046/218] Revert "local run that takes 12 hours"

This reverts commit 7dfc7f3c517db86a61d55ec0d78fe040656c7298.
---
 ...ario_of_expanding_current_hcw_with_extra_budget.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
index 8ca41fcf61..36666c447b 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
@@ -10,7 +10,8 @@
 
 or locally using:
 ```
-tlo scenario-run src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
+tlo scenario-run src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/
+scenario_of_expanding_current_hcw_with_extra_budget.py
 ```
 """
 
@@ -31,11 +32,11 @@ def __init__(self):
         super().__init__()
         self.seed = 0
         self.start_date = Date(2010, 1, 1)
-        self.end_date = Date(2023, 1, 1)
-        self.pop_size = 100  # todo: TBC
+        self.end_date = Date(2030, 1, 1)
+        self.pop_size = 20_000  # todo: TBC
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 2  # todo: TBC
+        self.runs_per_draw = 10  # todo: TBC
 
     def log_configuration(self):
         return {
@@ -75,7 +76,7 @@ def _get_scenarios(self) -> Dict[str, Dict]:
                         'HR_expansion_by_officer_type': list(self.scenarios.iloc[:, i])
                     }
                     }
-                ) for i in range(len(self.scenarios.columns) - 14)
+                ) for i in range(len(self.scenarios.columns))
         }
 
     def _baseline(self) -> Dict:

From f244f70f52bbdbaeafc3134d3cbeeb339b626561 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 14 Aug 2024 16:03:22 +0100
Subject: [PATCH 047/218] update the scenario script

---
 ...of_expanding_current_hcw_with_extra_budget.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
index 36666c447b..40bc7c9ff9 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
@@ -4,14 +4,12 @@
 
 Run on the batch system using:
 ```
-tlo batch-submit src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/
-scenario_of_expanding_current_hcw_with_extra_budget.py
+tlo batch-submit src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
 ```
 
 or locally using:
 ```
-tlo scenario-run src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/
-scenario_of_expanding_current_hcw_with_extra_budget.py
+tlo scenario-run src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
 ```
 """
 
@@ -36,7 +34,7 @@ def __init__(self):
         self.pop_size = 20_000  # todo: TBC
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 10  # todo: TBC
+        self.runs_per_draw = 5  # todo: TBC
 
     def log_configuration(self):
         return {
@@ -83,9 +81,6 @@ def _baseline(self) -> Dict:
         return mix_scenarios(
             get_parameters_for_status_quo(),
             {'HealthSystem': {
-                'use_funded_or_actual_staffing': 'actual',
-                'use_funded_or_actual_staffing_postSwitch': 'funded_plus',
-                'year_use_funded_or_actual_staffing_switch': self.YEAR_OF_CHANGE,
                 'mode_appt_constraints': 1,
                 'mode_appt_constraints_postSwitch': 2,
                 "year_mode_switch": self.YEAR_OF_CHANGE,
@@ -93,8 +88,9 @@ def _baseline(self) -> Dict:
                 'cons_availability_postSwitch': 'all',
                 'year_cons_availability_switch': self.YEAR_OF_CHANGE,
                 'yearly_HR_scaling_mode': 'no_scaling',
-
-            }
+                'start_year_HR_expansion_by_officer_type': self.YEAR_OF_CHANGE,
+                'end_year_HR_expansion_by_officer_type': self.end_date.year,
+            }  # as to expand current hr and analyse the impact, we keep using 'actual' hr capabilities
             },
         )
 

From 4cac903e283745e68324721ef7ce9fae098024a9 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 16 Aug 2024 15:58:19 +0100
Subject: [PATCH 048/218] a run for only two scenarios

---
 .../scenario_of_expanding_current_hcw_with_extra_budget.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
index 40bc7c9ff9..0a1fdf95e7 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
@@ -74,7 +74,7 @@ def _get_scenarios(self) -> Dict[str, Dict]:
                         'HR_expansion_by_officer_type': list(self.scenarios.iloc[:, i])
                     }
                     }
-                ) for i in range(len(self.scenarios.columns))
+                ) for i in range(len(self.scenarios.columns) - 15)
         }
 
     def _baseline(self) -> Dict:

From fde641e1b13414dd540396b5c711c88347afad51 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 19 Aug 2024 10:53:59 +0100
Subject: [PATCH 049/218] scale to effective capabilities

---
 .../scenario_of_expanding_current_hcw_with_extra_budget.py       | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
index 0a1fdf95e7..ba11c7b559 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
@@ -83,6 +83,7 @@ def _baseline(self) -> Dict:
             {'HealthSystem': {
                 'mode_appt_constraints': 1,
                 'mode_appt_constraints_postSwitch': 2,
+                "scale_to_effective_capabilities": True,  # todo: TBC
                 "year_mode_switch": self.YEAR_OF_CHANGE,
                 'cons_availability': 'default',
                 'cons_availability_postSwitch': 'all',

From 90d590fd7fb0754251717dd8c98ddd0ed179fbe2 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 19 Aug 2024 11:34:19 +0100
Subject: [PATCH 050/218] rename file

---
 ...xpanding_current_hcw_by_officer_type_with_extra_budget.py} | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 rename src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/{scenario_of_expanding_current_hcw_with_extra_budget.py => scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py} (95%)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
similarity index 95%
rename from src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
rename to src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index ba11c7b559..7dfcfa53cd 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -4,12 +4,12 @@
 
 Run on the batch system using:
 ```
-tlo batch-submit src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
+tlo batch-submit src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
 ```
 
 or locally using:
 ```
-tlo scenario-run src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_with_extra_budget.py
+tlo scenario-run src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
 ```
 """
 

From 3dcd2d9208adc2fd01edd7bfa63a337f65465a58 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 19 Aug 2024 11:46:44 +0100
Subject: [PATCH 051/218] rename long run class in scenario file

---
 ...f_expanding_current_hcw_by_officer_type_with_extra_budget.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 7dfcfa53cd..0ead19d18b 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -25,7 +25,7 @@
 from tlo.scenario import BaseScenario
 
 
-class LongRun(BaseScenario):
+class HRHExpansionByCadreWithExtraBudget(BaseScenario):
     def __init__(self):
         super().__init__()
         self.seed = 0

From 51883c12fd279f0cf04f95764382944725c79f4b Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 19 Aug 2024 16:02:37 +0100
Subject: [PATCH 052/218] update test

---
 tests/test_healthsystem.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py
index 6dab2c0654..a3f8418f76 100644
--- a/tests/test_healthsystem.py
+++ b/tests/test_healthsystem.py
@@ -2600,14 +2600,14 @@ def get_capabilities_after_update(end_year, HR_expansion_by_officer_type) -> pd.
 
         )
         params = sim.modules['HealthSystem'].parameters
-        params['start_year_HR_expansion_by_officer_type'] = 2011
-        params['end_year_HR_expansion_by_officer_type'] = end_year
+        params['start_year_HR_expansion_by_officer_type'] = 2011  # first update happens on 1 Jan 2011
+        params['end_year_HR_expansion_by_officer_type'] = end_year  # last update happens on 1 Jan (end_year - 1)
         params['HR_expansion_by_officer_type'] = HR_expansion_by_officer_type
 
         popsize = 100
         sim.make_initial_population(n=popsize)
 
-        sim.simulate(end_date=Date(end_year, 1, 2))
+        sim.simulate(end_date=Date(end_year, 1, 1))
 
         caps = pd.DataFrame(sim.modules['HealthSystem'].capabilities_today)
         caps = caps[caps != 0]
@@ -2615,6 +2615,7 @@ def get_capabilities_after_update(end_year, HR_expansion_by_officer_type) -> pd.
         return caps
 
     initial_caps = get_initial_capabilities()
+    caps_clinical_no_update = get_capabilities_after_update(2012, [0, 0, 0, 0])
     caps_clinical_one_update = get_capabilities_after_update(2012, [1, 0, 0, 0])
     caps_clinical_dcsa_one_update = get_capabilities_after_update(2012, [0.5, 0.5, 0, 0])
     caps_clinical_two_updates = get_capabilities_after_update(2013, [1, 0, 0, 0])
@@ -2629,6 +2630,10 @@ def compare(cadre, caps_1, caps_2) -> tuple:
 
         return (ratio > 1).all(), (abs(ratio - ratio.unique()[0]) < 1e-6).all()
 
+    # initial_caps vs caps_clinical_no_update
+    # check if the clinical cadre of each facility id is not expanded
+    assert not compare('Clinical', initial_caps, caps_clinical_no_update)[0]
+
     # initial_caps vs caps_clinical_one_update
     # check if the clinical cadre of each facility id is expanded
     assert compare('Clinical', initial_caps, caps_clinical_one_update)[0]

From 74d8c6263dd3ef969779b2efd125f99141a39f21 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 20 Aug 2024 10:22:45 +0100
Subject: [PATCH 053/218] initiate the analysis script - learn from Tim's
 branch 1414

---
 ...dsion_by_officer_type_with_extra_budget.py | 152 ++++++++++++++++++
 1 file changed, 152 insertions(+)
 create mode 100644 src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
new file mode 100644
index 0000000000..8957b33d2b
--- /dev/null
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -0,0 +1,152 @@
+"""
+This file analyses and plots the services, DALYs, Deaths within different scenarios of expanding current hr by officer
+type given some extra budget. Return on investment and marginal productivity of each officer type will be examined.
+
+The scenarios are defined in scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py.
+"""
+
+import argparse
+import textwrap
+from pathlib import Path
+from typing import Tuple
+
+import numpy as np
+import pandas as pd
+from matplotlib import pyplot as plt
+
+from scripts.healthsystem.impact_of_hcw_capabilities_expansion.scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget import (
+    HRHExpansionByCadreWithExtraBudget,
+)
+from tlo import Date
+from tlo.analysis.utils import extract_results, make_age_grp_lookup, summarize
+
+
+def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None,
+          the_target_period: Tuple[Date, Date] = None):
+    """
+    Extract results of number of services by appt type, number of DALYs, number of Deaths in the target period.
+    (To see whether to extract these results by short treatment id and/or disease.)
+    Calculate the extra budget allocated, extra staff by cadre, return on investment and marginal productivity by cadre.
+    """
+    TARGET_PERIOD = the_target_period
+
+    # Definitions of general helper functions
+    make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png"  # noqa: E731
+
+    def target_period() -> str:
+        """Returns the target period as a string of the form YYYY-YYYY"""
+        return "-".join(str(t.year) for t in TARGET_PERIOD)
+
+    def get_parameter_names_from_scenario_file() -> Tuple[str]:
+        """Get the tuple of names of the scenarios from `Scenario` class used to create the results."""
+        e = HRHExpansionByCadreWithExtraBudget()
+        return tuple(e._scenarios.keys())
+
+    def get_num_appts(_df):
+        """Return the number of appointments (total within the TARGET_PERIOD)"""
+        return _df \
+            .loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code'] \
+            .apply(pd.Series) \
+            .sum()
+
+    def get_num_deaths(_df):
+        """Return total number of Deaths (total within the TARGET_PERIOD)"""
+        return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)]))
+
+    def get_num_dalys(_df):
+        """Return total number of DALYS (Stacked) by label (total within the TARGET_PERIOD).
+        Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+        results from runs that crashed mid-way through the simulation.
+        """
+        years_needed = [i.year for i in TARGET_PERIOD]
+        assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+        return pd.Series(
+            data=_df
+            .loc[_df.year.between(*years_needed)]
+            .drop(columns=['date', 'sex', 'age_range', 'year'])
+            .sum().sum()
+        )
+
+    def set_param_names_as_column_index_level_0(_df):
+        """Set the columns index (level 0) as the param_names."""
+        ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)}
+        names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]]
+        assert len(names_of_cols_level0) == len(_df.columns.levels[0])
+        _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0)
+        return _df
+
+    def find_difference_relative_to_comparison_series(
+        _ser: pd.Series,
+        comparison: str,
+        scaled: bool = False,
+        drop_comparison: bool = True,
+    ):
+        """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0)
+        within the runs (level 1), relative to where draw = `comparison`.
+        The comparison is `X - COMPARISON`."""
+        return _ser \
+            .unstack(level=0) \
+            .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \
+            .drop(columns=([comparison] if drop_comparison else [])) \
+            .stack()
+
+    def find_difference_relative_to_comparison_series_dataframe(_df: pd.DataFrame, **kwargs):
+        """Apply `find_difference_relative_to_comparison_series` to each row in a dataframe"""
+        return pd.concat({
+            _idx: find_difference_relative_to_comparison_series(row, **kwargs)
+            for _idx, row in _df.iterrows()
+        }, axis=1).T
+
+    # Get parameter/scenario names
+    param_names = get_parameter_names_from_scenario_file()
+
+    # Absolute Number of Deaths and DALYs and Services
+    num_deaths = extract_results(
+        results_folder,
+        module='tlo.methods.demography',
+        key='death',
+        custom_generate_series=get_num_deaths,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_dalys = extract_results(
+        results_folder,
+        module='tlo.methods.healthburden',
+        key='dalys_stacked',
+        custom_generate_series=get_num_dalys,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_appts = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event',
+        custom_generate_series=get_num_appts,
+        do_scaling=True
+        ).pipe(set_param_names_as_column_index_level_0)
+
+    num_dalys_summarized = summarize(num_dalys).loc[0].unstack().reindex(param_names)
+    num_deaths_summarized = summarize(num_deaths).loc[0].unstack().reindex(param_names)
+    num_appts_summarized = summarize(num_appts).T.unstack().reindex(param_names)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("results_folder", type=Path)  # outputs/bshe@ic.ac.uk/scenario_run_for_hcw_expansion_analysis-2024-08-16T160132Z
+    args = parser.parse_args()
+
+    # Produce results for short-term analysis
+    apply(
+        results_folder=args.results_folder,
+        output_folder=args.results_folder,
+        resourcefilepath=Path('./resources'),
+        the_target_period=(Date(2020, 1, 1), Date(2024, 12, 31))
+    )
+
+    # Produce results for long-term analysis
+    apply(
+        results_folder=args.results_folder,
+        output_folder=args.results_folder,
+        resourcefilepath=Path('./resources'),
+        the_target_period=(Date(2020, 1, 1), Date(2029, 12, 31))
+    )

From 9e047b191f809976ffc6df20830d6c6dfa5396a7 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 20 Aug 2024 11:09:36 +0100
Subject: [PATCH 054/218] extract more results

---
 ...dsion_by_officer_type_with_extra_budget.py | 39 +++++++++++++++----
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 8957b33d2b..2ab4c698e7 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -43,11 +43,16 @@ def get_parameter_names_from_scenario_file() -> Tuple[str]:
         return tuple(e._scenarios.keys())
 
     def get_num_appts(_df):
-        """Return the number of appointments (total within the TARGET_PERIOD)"""
-        return _df \
-            .loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code'] \
-            .apply(pd.Series) \
-            .sum()
+        """Return the number of appointments per appt type (total within the TARGET_PERIOD)"""
+        return (_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code']
+                .apply(pd.Series).sum())
+
+    def get_num_services(_df):
+        """Return the number of appointments in total of all appt types (total within the TARGET_PERIOD)"""
+        return pd.Series(
+            data=_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code']
+            .apply(pd.Series).sum().sum()
+        )
 
     def get_num_deaths(_df):
         """Return total number of Deaths (total within the TARGET_PERIOD)"""
@@ -125,9 +130,18 @@ def find_difference_relative_to_comparison_series_dataframe(_df: pd.DataFrame, *
         do_scaling=True
         ).pipe(set_param_names_as_column_index_level_0)
 
+    num_services = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event',
+        custom_generate_series=get_num_services,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
     num_dalys_summarized = summarize(num_dalys).loc[0].unstack().reindex(param_names)
     num_deaths_summarized = summarize(num_deaths).loc[0].unstack().reindex(param_names)
     num_appts_summarized = summarize(num_appts).T.unstack().reindex(param_names)
+    num_services_summarize = summarize(num_services).loc[0].unstack().reindex(param_names)
 
 
 if __name__ == "__main__":
@@ -135,7 +149,17 @@ def find_difference_relative_to_comparison_series_dataframe(_df: pd.DataFrame, *
     parser.add_argument("results_folder", type=Path)  # outputs/bshe@ic.ac.uk/scenario_run_for_hcw_expansion_analysis-2024-08-16T160132Z
     args = parser.parse_args()
 
-    # Produce results for short-term analysis
+    # Produce results for short-term analysis: 5 years
+
+    # 2015-2019
+    apply(
+        results_folder=args.results_folder,
+        output_folder=args.results_folder,
+        resourcefilepath=Path('./resources'),
+        the_target_period=(Date(2015, 1, 1), Date(2019, 12, 31))
+    )
+
+    # 2020-2024
     apply(
         results_folder=args.results_folder,
         output_folder=args.results_folder,
@@ -143,7 +167,8 @@ def find_difference_relative_to_comparison_series_dataframe(_df: pd.DataFrame, *
         the_target_period=(Date(2020, 1, 1), Date(2024, 12, 31))
     )
 
-    # Produce results for long-term analysis
+    # Produce results for long-term analysis: 10 years
+    # 2020-2029
     apply(
         results_folder=args.results_folder,
         output_folder=args.results_folder,

From d14599ce01f713221f6601a86b12a2c2fbb1d562 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 20 Aug 2024 22:24:44 +0100
Subject: [PATCH 055/218] initial plots and todo

---
 ...dsion_by_officer_type_with_extra_budget.py | 186 ++++++++++++++++--
 1 file changed, 167 insertions(+), 19 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 2ab4c698e7..397ad8bfbd 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -18,7 +18,34 @@
     HRHExpansionByCadreWithExtraBudget,
 )
 from tlo import Date
-from tlo.analysis.utils import extract_results, make_age_grp_lookup, summarize
+from tlo.analysis.utils import (
+    APPT_TYPE_TO_COARSE_APPT_TYPE_MAP,
+    COARSE_APPT_TYPE_TO_COLOR_MAP,
+    extract_results,
+    make_age_grp_lookup,
+    summarize,
+)
+
+# rename scenarios
+substitute_labels = {
+    's_1': 'no_expansion',
+    's_2': 'CDNP_expansion_current',
+    's_3': 'C_expansion',
+    's_4': 'D_expansion',
+    's_5': 'N_expansion',
+    's_6': 'P_expansion',
+    's_7': 'CD_expansion',
+    's_8': 'CN_expansion',
+    's_9': 'CP_expansion',
+    's_10': 'DN_expansion',
+    's_11': 'DP_expansion',
+    's_12': 'NP_expansion',
+    's_13': 'CDN_expansion',
+    's_14': 'CDP_expansion',
+    's_15': 'CNP_expansion',
+    's_16': 'DNP_expansion',
+    's_17': 'CDNP_expansion_equal'
+}
 
 
 def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None,
@@ -45,7 +72,10 @@ def get_parameter_names_from_scenario_file() -> Tuple[str]:
     def get_num_appts(_df):
         """Return the number of appointments per appt type (total within the TARGET_PERIOD)"""
         return (_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code']
-                .apply(pd.Series).sum())
+                .apply(pd.Series)
+                .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP)
+                .groupby(level=0, axis=1).sum()
+                .sum())
 
     def get_num_services(_df):
         """Return the number of appointments in total of all appt types (total within the TARGET_PERIOD)"""
@@ -102,6 +132,62 @@ def find_difference_relative_to_comparison_series_dataframe(_df: pd.DataFrame, *
             for _idx, row in _df.iterrows()
         }, axis=1).T
 
+    def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False, put_labels_in_legend=True):
+        """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
+         extent of the error bar."""
+
+        yerr = np.array([
+            (_df['mean'] - _df['lower']).values,
+            (_df['upper'] - _df['mean']).values,
+        ])
+
+        xticks = {(i + 0.5): k for i, k in enumerate(_df.index)}
+
+        # Define colormap (used only with option `put_labels_in_legend=True`)
+        cmap = plt.get_cmap("tab20")
+        rescale = lambda y: (y - np.min(y)) / (np.max(y) - np.min(y))  # noqa: E731
+        colors = list(map(cmap, rescale(np.array(list(xticks.keys()))))) if put_labels_in_legend else None
+
+        fig, ax = plt.subplots(figsize=(10, 5))
+        ax.bar(
+            xticks.keys(),
+            _df['mean'].values,
+            yerr=yerr,
+            alpha=0.8,
+            ecolor='black',
+            color=colors,
+            capsize=10,
+            label=xticks.values(),
+            zorder=100,
+        )
+        if annotations:
+            for xpos, ypos, text in zip(xticks.keys(), _df['upper'].values, annotations):
+                ax.text(xpos, ypos*1.15, text, horizontalalignment='center', rotation='vertical', fontsize='x-small')
+        ax.set_xticks(list(xticks.keys()))
+
+        if put_labels_in_legend:
+            # Update xticks label with substitute labels
+            # Insert legend with updated labels that shows correspondence between substitute label and original label
+            xtick_values = [substitute_labels[v] for v in xticks.values()]
+            xtick_legend = [f'{v}: {substitute_labels[v]}' for v in xticks.values()]
+            h, _ = ax.get_legend_handles_labels()
+            ax.legend(h, xtick_legend, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5))
+            ax.set_xticklabels(list(xtick_values))
+        else:
+            if not xticklabels_horizontal_and_wrapped:
+                # xticklabels will be vertical and not wrapped
+                ax.set_xticklabels(list(xticks.values()), rotation=90)
+            else:
+                wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
+                ax.set_xticklabels(wrapped_labs)
+
+        ax.grid(axis="y")
+        ax.spines['top'].set_visible(False)
+        ax.spines['right'].set_visible(False)
+        fig.tight_layout()
+
+        return fig, ax
+
     # Get parameter/scenario names
     param_names = get_parameter_names_from_scenario_file()
 
@@ -140,8 +226,70 @@ def find_difference_relative_to_comparison_series_dataframe(_df: pd.DataFrame, *
 
     num_dalys_summarized = summarize(num_dalys).loc[0].unstack().reindex(param_names)
     num_deaths_summarized = summarize(num_deaths).loc[0].unstack().reindex(param_names)
-    num_appts_summarized = summarize(num_appts).T.unstack().reindex(param_names)
-    num_services_summarize = summarize(num_services).loc[0].unstack().reindex(param_names)
+    num_appts_summarized = summarize(num_appts, only_mean=True).T.reindex(param_names)
+    num_services_summarized = summarize(num_services).loc[0].unstack().reindex(param_names)
+
+    # plot absolute numbers for scenarios
+
+    name_of_plot = f'Services, {target_period()}'
+    fig, ax = do_bar_plot_with_ci(num_services_summarized / 1e6, xticklabels_horizontal_and_wrapped=True,
+                                  put_labels_in_legend=True)
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('(Millions)')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Deaths, {target_period()}'
+    fig, ax = do_bar_plot_with_ci(num_deaths_summarized / 1e6, xticklabels_horizontal_and_wrapped=True,
+                                  put_labels_in_legend=True)
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('(Millions)')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'DALYs, {target_period()}'
+    fig, ax = do_bar_plot_with_ci(num_dalys_summarized / 1e6, xticklabels_horizontal_and_wrapped=True,
+                                  put_labels_in_legend=True)
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('(Millions)')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Services by appointment type, {target_period()}'
+    num_appts_summarized_in_millions = num_appts_summarized / 1e6
+    appt_color = {
+        appt: COARSE_APPT_TYPE_TO_COLOR_MAP.get(appt, np.nan) for appt in num_appts_summarized_in_millions.columns
+    }
+    yerr_services = np.array([
+        (num_services_summarized['mean'].values - num_services_summarized['lower']).values,
+        (num_services_summarized['upper'].values - num_services_summarized['mean']).values,
+    ])/1e6
+    fig, ax = plt.subplots()
+    num_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    ax.errorbar([0, 1], num_services_summarized['mean'].values / 1e6, yerr=yerr_services,
+                fmt=".", color="black", zorder=100)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in num_appts_summarized_in_millions.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # todo
+    # get data of extra budget, extra staff
+    # calculate return on investment
+    # plot DALYS by causes
 
 
 if __name__ == "__main__":
@@ -151,21 +299,21 @@ def find_difference_relative_to_comparison_series_dataframe(_df: pd.DataFrame, *
 
     # Produce results for short-term analysis: 5 years
 
-    # 2015-2019
-    apply(
-        results_folder=args.results_folder,
-        output_folder=args.results_folder,
-        resourcefilepath=Path('./resources'),
-        the_target_period=(Date(2015, 1, 1), Date(2019, 12, 31))
-    )
-
-    # 2020-2024
-    apply(
-        results_folder=args.results_folder,
-        output_folder=args.results_folder,
-        resourcefilepath=Path('./resources'),
-        the_target_period=(Date(2020, 1, 1), Date(2024, 12, 31))
-    )
+    # # 2015-2019, before change, incl. mode, hr expansion, etc.
+    # apply(
+    #     results_folder=args.results_folder,
+    #     output_folder=args.results_folder,
+    #     resourcefilepath=Path('./resources'),
+    #     the_target_period=(Date(2015, 1, 1), Date(2019, 12, 31))
+    # )
+    #
+    # # 2020-2024
+    # apply(
+    #     results_folder=args.results_folder,
+    #     output_folder=args.results_folder,
+    #     resourcefilepath=Path('./resources'),
+    #     the_target_period=(Date(2020, 1, 1), Date(2024, 12, 31))
+    # )
 
     # Produce results for long-term analysis: 10 years
     # 2020-2029

From 708ef84449d2575baf011cf88e21f035d94dab5a Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 20 Aug 2024 22:27:04 +0100
Subject: [PATCH 056/218] change to 10 runs per draw

---
 ...f_expanding_current_hcw_by_officer_type_with_extra_budget.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 0ead19d18b..73062069ca 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -34,7 +34,7 @@ def __init__(self):
         self.pop_size = 20_000  # todo: TBC
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 5  # todo: TBC
+        self.runs_per_draw = 10  # todo: TBC
 
     def log_configuration(self):
         return {

From a1e25d39f855a212694ebd95d598f7fba569a171 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 20 Aug 2024 22:52:43 +0100
Subject: [PATCH 057/218] fix failing checks

---
 .../analysis_hr_expandsion_by_officer_type_with_extra_budget.py  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 397ad8bfbd..a299c7cfe8 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -22,7 +22,6 @@
     APPT_TYPE_TO_COARSE_APPT_TYPE_MAP,
     COARSE_APPT_TYPE_TO_COLOR_MAP,
     extract_results,
-    make_age_grp_lookup,
     summarize,
 )
 

From 8cc937df5b44ff071a9053993945acf745f727a7 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 21 Aug 2024 11:42:06 +0100
Subject: [PATCH 058/218] plot dalys by cause

---
 ...dsion_by_officer_type_with_extra_budget.py | 64 ++++++++++++++++++-
 1 file changed, 61 insertions(+), 3 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index a299c7cfe8..1ae40a99f5 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -20,6 +20,7 @@
 from tlo import Date
 from tlo.analysis.utils import (
     APPT_TYPE_TO_COARSE_APPT_TYPE_MAP,
+    CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP,
     COARSE_APPT_TYPE_TO_COLOR_MAP,
     extract_results,
     summarize,
@@ -88,9 +89,9 @@ def get_num_deaths(_df):
         return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)]))
 
     def get_num_dalys(_df):
-        """Return total number of DALYS (Stacked) by label (total within the TARGET_PERIOD).
+        """Return total number of DALYS (Stacked) (total within the TARGET_PERIOD).
         Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
-        results from runs that crashed mid-way through the simulation.
+        results from runs that crashed mid-way through the simulation).
         """
         years_needed = [i.year for i in TARGET_PERIOD]
         assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
@@ -101,6 +102,18 @@ def get_num_dalys(_df):
             .sum().sum()
         )
 
+    def get_num_dalys_by_cause(_df):
+        """Return total number of DALYS by cause (Stacked) (total within the TARGET_PERIOD).
+        Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+        results from runs that crashed mid-way through the simulation).
+        """
+        years_needed = [i.year for i in TARGET_PERIOD]
+        assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+        return (_df
+                .loc[_df.year.between(*years_needed)].drop(columns=['date', 'year', 'li_wealth'])
+                .sum(axis=0)
+                )
+
     def set_param_names_as_column_index_level_0(_df):
         """Set the columns index (level 0) as the param_names."""
         ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)}
@@ -207,6 +220,14 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
         do_scaling=True
     ).pipe(set_param_names_as_column_index_level_0)
 
+    num_dalys_by_cause = extract_results(
+        results_folder,
+        module="tlo.methods.healthburden",
+        key="dalys_by_wealth_stacked_by_age_and_time",
+        custom_generate_series=get_num_dalys_by_cause,
+        do_scaling=True,
+    ).pipe(set_param_names_as_column_index_level_0)
+
     num_appts = extract_results(
         results_folder,
         module='tlo.methods.healthsystem.summary',
@@ -224,6 +245,7 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
     ).pipe(set_param_names_as_column_index_level_0)
 
     num_dalys_summarized = summarize(num_dalys).loc[0].unstack().reindex(param_names)
+    num_dalys_by_cause_summarized = summarize(num_dalys_by_cause, only_mean=True).T.reindex(param_names)
     num_deaths_summarized = summarize(num_deaths).loc[0].unstack().reindex(param_names)
     num_appts_summarized = summarize(num_appts, only_mean=True).T.reindex(param_names)
     num_services_summarized = summarize(num_services).loc[0].unstack().reindex(param_names)
@@ -285,10 +307,46 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'DALYs by cause, {target_period()}'
+    num_dalys_by_cause_summarized_in_millions = num_dalys_by_cause_summarized / 1e6
+    cause_color = {
+        cause: CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.get(cause, np.nan)
+        for cause in num_dalys_by_cause_summarized_in_millions.columns
+    }
+    yerr_dalys = np.array([
+        (num_dalys_summarized['mean'].values - num_dalys_summarized['lower']).values,
+        (num_dalys_summarized['upper'].values - num_dalys_summarized['mean']).values,
+    ])/1e6
+    fig, ax = plt.subplots()
+    num_dalys_by_cause_summarized_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax)
+    ax.errorbar([0, 1], num_dalys_summarized['mean'].values / 1e6, yerr=yerr_dalys,
+                fmt=".", color="black", zorder=100)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in num_dalys_by_cause_summarized_in_millions.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    fig.subplots_adjust(right=0.7)
+    ax.legend(
+        loc="center left",
+        bbox_to_anchor=(0.705, 0.520),
+        bbox_transform=fig.transFigure,
+        title='Cause of death or injury',
+        title_fontsize='x-small',
+        fontsize='x-small',
+        reverse=True,
+        ncol=1
+    )
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     # todo
     # get data of extra budget, extra staff
     # calculate return on investment
-    # plot DALYS by causes
+    # get and plot services by short treatment id
+    # get and plot comparison results
 
 
 if __name__ == "__main__":

From c7a0a5a24032faeef9296dadd56a36d3e1cfbe2c Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 21 Aug 2024 15:54:40 +0100
Subject: [PATCH 059/218] get comparison results

---
 ...dsion_by_officer_type_with_extra_budget.py | 81 ++++++++++++++++---
 1 file changed, 71 insertions(+), 10 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 1ae40a99f5..d6d9c537f4 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -70,7 +70,7 @@ def get_parameter_names_from_scenario_file() -> Tuple[str]:
         return tuple(e._scenarios.keys())
 
     def get_num_appts(_df):
-        """Return the number of appointments per appt type (total within the TARGET_PERIOD)"""
+        """Return the number of services by appt type (total within the TARGET_PERIOD)"""
         return (_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code']
                 .apply(pd.Series)
                 .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP)
@@ -78,7 +78,7 @@ def get_num_appts(_df):
                 .sum())
 
     def get_num_services(_df):
-        """Return the number of appointments in total of all appt types (total within the TARGET_PERIOD)"""
+        """Return the number of services in total of all appt types (total within the TARGET_PERIOD)"""
         return pd.Series(
             data=_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code']
             .apply(pd.Series).sum().sum()
@@ -131,13 +131,14 @@ def find_difference_relative_to_comparison_series(
         """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0)
         within the runs (level 1), relative to where draw = `comparison`.
         The comparison is `X - COMPARISON`."""
-        return _ser \
-            .unstack(level=0) \
-            .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \
-            .drop(columns=([comparison] if drop_comparison else [])) \
-            .stack()
+        return (_ser
+                .unstack(level=0)
+                .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1)
+                .drop(columns=([comparison] if drop_comparison else []))
+                .stack()
+                )
 
-    def find_difference_relative_to_comparison_series_dataframe(_df: pd.DataFrame, **kwargs):
+    def find_difference_relative_to_comparison_dataframe(_df: pd.DataFrame, **kwargs):
         """Apply `find_difference_relative_to_comparison_series` to each row in a dataframe"""
         return pd.concat({
             _idx: find_difference_relative_to_comparison_series(row, **kwargs)
@@ -244,11 +245,71 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
         do_scaling=True
     ).pipe(set_param_names_as_column_index_level_0)
 
+    # get absolute numbers for scenarios
     num_dalys_summarized = summarize(num_dalys).loc[0].unstack().reindex(param_names)
     num_dalys_by_cause_summarized = summarize(num_dalys_by_cause, only_mean=True).T.reindex(param_names)
+
     num_deaths_summarized = summarize(num_deaths).loc[0].unstack().reindex(param_names)
-    num_appts_summarized = summarize(num_appts, only_mean=True).T.reindex(param_names)
+
     num_services_summarized = summarize(num_services).loc[0].unstack().reindex(param_names)
+    num_appts_summarized = summarize(num_appts, only_mean=True).T.reindex(param_names)
+
+    # get relative numbers for scenarios, compared to no_expansion scenario: s_1
+    num_services_increased = summarize(
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_services.loc[0],
+                comparison='s_1')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).drop(['s_1'])
+
+    num_deaths_averted = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_deaths.loc[0],
+                comparison='s_1')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).drop(['s_1'])
+
+    num_dalys_averted = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_dalys.loc[0],
+                comparison='s_1')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).drop(['s_1'])
+
+    num_dalys_by_cause_averted = summarize(
+        -1.0 * find_difference_relative_to_comparison_dataframe(
+            num_dalys_by_cause,
+            comparison='s_1',
+        ),
+        only_mean=True
+    )
+
+    num_appts_increased = summarize(
+        find_difference_relative_to_comparison_dataframe(
+            num_appts,
+            comparison='s_1',
+        ),
+        only_mean=True
+    )
+
+    # Check that when we sum across the causes/appt types,
+    # we get the same total as calculated when we didn't split by cause/appt type.
+    assert (
+        (num_appts_increased.sum(axis=0).sort_index()
+         - num_services_increased['mean'].sort_index()
+         ) < 1e-6
+    ).all()
+
+    assert (
+        (num_dalys_by_cause_averted.sum(axis=0).sort_index()
+         - num_dalys_averted['mean'].sort_index()
+         ) < 1e-6
+    ).all()
 
     # plot absolute numbers for scenarios
 
@@ -346,7 +407,7 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
     # get data of extra budget, extra staff
     # calculate return on investment
     # get and plot services by short treatment id
-    # get and plot comparison results
+    # plot comparison results
 
 
 if __name__ == "__main__":

From bec9530b7586d69768ab2e4031fbf3997187cd8c Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 21 Aug 2024 16:45:04 +0100
Subject: [PATCH 060/218] plot comparison results

---
 ...dsion_by_officer_type_with_extra_budget.py | 82 ++++++++++++++++---
 1 file changed, 70 insertions(+), 12 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index d6d9c537f4..bd49f91f23 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -287,7 +287,7 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
             comparison='s_1',
         ),
         only_mean=True
-    )
+    ).T
 
     num_appts_increased = summarize(
         find_difference_relative_to_comparison_dataframe(
@@ -295,22 +295,31 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
             comparison='s_1',
         ),
         only_mean=True
-    )
+    ).T
 
     # Check that when we sum across the causes/appt types,
     # we get the same total as calculated when we didn't split by cause/appt type.
     assert (
-        (num_appts_increased.sum(axis=0).sort_index()
+        (num_appts_increased.sum(axis=1).sort_index()
          - num_services_increased['mean'].sort_index()
          ) < 1e-6
     ).all()
 
     assert (
-        (num_dalys_by_cause_averted.sum(axis=0).sort_index()
+        (num_dalys_by_cause_averted.sum(axis=1).sort_index()
          - num_dalys_averted['mean'].sort_index()
          ) < 1e-6
     ).all()
 
+    # prepare colors for plots
+    appt_color = {
+        appt: COARSE_APPT_TYPE_TO_COLOR_MAP.get(appt, np.nan) for appt in num_appts_summarized.columns
+    }
+    cause_color = {
+        cause: CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.get(cause, np.nan)
+        for cause in num_dalys_by_cause_summarized.columns
+    }
+
     # plot absolute numbers for scenarios
 
     name_of_plot = f'Services, {target_period()}'
@@ -345,9 +354,6 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 
     name_of_plot = f'Services by appointment type, {target_period()}'
     num_appts_summarized_in_millions = num_appts_summarized / 1e6
-    appt_color = {
-        appt: COARSE_APPT_TYPE_TO_COLOR_MAP.get(appt, np.nan) for appt in num_appts_summarized_in_millions.columns
-    }
     yerr_services = np.array([
         (num_services_summarized['mean'].values - num_services_summarized['lower']).values,
         (num_services_summarized['upper'].values - num_services_summarized['mean']).values,
@@ -370,10 +376,6 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 
     name_of_plot = f'DALYs by cause, {target_period()}'
     num_dalys_by_cause_summarized_in_millions = num_dalys_by_cause_summarized / 1e6
-    cause_color = {
-        cause: CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.get(cause, np.nan)
-        for cause in num_dalys_by_cause_summarized_in_millions.columns
-    }
     yerr_dalys = np.array([
         (num_dalys_summarized['mean'].values - num_dalys_summarized['lower']).values,
         (num_dalys_summarized['upper'].values - num_dalys_summarized['mean']).values,
@@ -403,11 +405,67 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
     fig.show()
     plt.close(fig)
 
+    # plot relative numbers for scenarios
+
+    name_of_plot = f'Services increased by appointment type, {target_period()}'
+    num_appts_increased_in_millions = num_appts_increased / 1e6
+    yerr_services = np.array([
+        (num_services_increased['mean'].values - num_services_increased['lower']).values,
+        (num_services_increased['upper'].values - num_services_increased['mean']).values,
+    ]) / 1e6
+    fig, ax = plt.subplots()
+    num_appts_increased_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    ax.errorbar(0, num_services_increased['mean'].values / 1e6, yerr=yerr_services,
+                fmt=".", color="black", zorder=100)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in num_appts_increased_in_millions.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+               fontsize='small')
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'DALYs averted by cause, {target_period()}'
+    num_dalys_by_cause_averted_in_millions = num_dalys_by_cause_averted / 1e6
+    yerr_dalys = np.array([
+        (num_dalys_averted['mean'].values - num_dalys_averted['lower']).values,
+        (num_dalys_averted['upper'].values - num_dalys_averted['mean']).values,
+    ]) / 1e6
+    fig, ax = plt.subplots()
+    num_dalys_by_cause_averted_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax)
+    ax.errorbar(0, num_dalys_averted['mean'].values / 1e6, yerr=yerr_dalys,
+                fmt=".", color="black", zorder=100)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in num_dalys_by_cause_averted.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    fig.subplots_adjust(right=0.7)
+    ax.legend(
+        loc="center left",
+        bbox_to_anchor=(0.705, 0.520),
+        bbox_transform=fig.transFigure,
+        title='Cause of death or injury',
+        title_fontsize='x-small',
+        fontsize='x-small',
+        ncol=1
+    )
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     # todo
     # get data of extra budget, extra staff
     # calculate return on investment
     # get and plot services by short treatment id
-    # plot comparison results
+    # plot comparison results: there are negative changes of some appts and causes, try increase runs and see
+    # as we have 17 scenarios in total, \
+    # design comparison groups of scenarios to examine marginal/combined productivity of cadres
 
 
 if __name__ == "__main__":

From 9a5d6af965fb797826d3972616360a3da4b7f58f Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 22 Aug 2024 11:32:56 +0100
Subject: [PATCH 061/218] update logger format of scale up factors

---
 src/tlo/methods/healthsystem.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 780fc44389..a9ca87ab74 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -3043,8 +3043,9 @@ def apply(self, population):
                                         'Clinical, DCSA, Nursing_and_Midwifery, Pharmacy - '
                                         'given fractions of an extra budget',
                             data={
-                                'Scale up factor': [sf_clinical, sf_dcsa, sf_nursing, sf_pharmacy],
-                                'Year of scaling up': self.sim.date.year,
+                                'scale_up_factor': {'Clinical': sf_clinical, 'DCSA': sf_dcsa,
+                                                    'Nursing_and_Midwifery': sf_nursing, 'Pharmacy': sf_pharmacy},
+                                'year_of_scale_up': self.sim.date.year,
                             }
                             )
 

From c1741a7e68d2266d781382f1d86bcf0476024467 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 22 Aug 2024 13:37:37 +0100
Subject: [PATCH 062/218] check if scale to effective capabilities = True will
 change 2019 capabilities

---
 ...f_expanding_current_hcw_by_officer_type_with_extra_budget.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 73062069ca..da49ed85b9 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -83,7 +83,7 @@ def _baseline(self) -> Dict:
             {'HealthSystem': {
                 'mode_appt_constraints': 1,
                 'mode_appt_constraints_postSwitch': 2,
-                "scale_to_effective_capabilities": True,  # todo: TBC
+                "scale_to_effective_capabilities": True,  # todo: TBC; will this change the capabilities of 2019?
                 "year_mode_switch": self.YEAR_OF_CHANGE,
                 'cons_availability': 'default',
                 'cons_availability_postSwitch': 'all',

From 853abc6f46c8aba886ee5f1701e09d7458c45ca0 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 22 Aug 2024 13:58:14 +0100
Subject: [PATCH 063/218] Revert "update logger format of scale up factors"

This reverts commit 9a5d6af965fb797826d3972616360a3da4b7f58f.
---
 src/tlo/methods/healthsystem.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index a9ca87ab74..780fc44389 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -3043,9 +3043,8 @@ def apply(self, population):
                                         'Clinical, DCSA, Nursing_and_Midwifery, Pharmacy - '
                                         'given fractions of an extra budget',
                             data={
-                                'scale_up_factor': {'Clinical': sf_clinical, 'DCSA': sf_dcsa,
-                                                    'Nursing_and_Midwifery': sf_nursing, 'Pharmacy': sf_pharmacy},
-                                'year_of_scale_up': self.sim.date.year,
+                                'Scale up factor': [sf_clinical, sf_dcsa, sf_nursing, sf_pharmacy],
+                                'Year of scaling up': self.sim.date.year,
                             }
                             )
 

From fcb5b75ad99eb785bb78b053495bc3eeadbb3592 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 22 Aug 2024 14:00:32 +0100
Subject: [PATCH 064/218] get hr scale up factors

---
 ...dsion_by_officer_type_with_extra_budget.py | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index bd49f91f23..3b2c7a8691 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -201,9 +201,32 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 
         return fig, ax
 
+    def get_scale_up_factor(_df):
+        """
+        Return a series of yearly scale up factors for four cadres - Clinical, DCSA, Nursing_and_Midwifery, Pharmacy,
+        with index of year and value of list of the four scale up factors.
+        """
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['Year of scaling up', 'Scale up factor']]
+        return pd.Series(
+            _df['Scale up factor'].values, index=_df['Year of scaling up']
+        )
+
     # Get parameter/scenario names
     param_names = get_parameter_names_from_scenario_file()
 
+    # Get scale up factors for all scenarios
+    scale_up_factors = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HRScaling',
+        custom_generate_series=get_scale_up_factor,
+        do_scaling=False
+    ).pipe(set_param_names_as_column_index_level_0).stack(level=0)
+    # check that the scale up factors are the same between each run within each draw
+    assert scale_up_factors.eq(scale_up_factors.iloc[:, 0], axis=0).all().all()
+    # keep scale up factors of only one run within each draw
+    scale_up_factors = scale_up_factors.iloc[:, 0].unstack()
+
     # Absolute Number of Deaths and DALYs and Services
     num_deaths = extract_results(
         results_folder,
@@ -466,6 +489,7 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
     # plot comparison results: there are negative changes of some appts and causes, try increase runs and see
     # as we have 17 scenarios in total, \
     # design comparison groups of scenarios to examine marginal/combined productivity of cadres
+    # do update HRScaling logger: year_of_scale_up, scale_up_factor, and get_scale_up_factor function
 
 
 if __name__ == "__main__":

From 025f018e624b5504dea53bcf69eb846f66f4c03b Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 22 Aug 2024 14:58:24 +0100
Subject: [PATCH 065/218] get current hr count by cadre

---
 ...xpandsion_by_officer_type_with_extra_budget.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 3b2c7a8691..4f8cb32b90 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -211,9 +211,24 @@ def get_scale_up_factor(_df):
             _df['Scale up factor'].values, index=_df['Year of scaling up']
         )
 
+    def get_current_hr(cadres):
+        """
+        Return current (year of 2019) staff counts and capabilities for the cadres specified.
+        """
+        curr_hr_path = Path(resourcefilepath
+                            / 'healthsystem' / 'human_resources' / 'actual' / 'ResourceFile_Daily_Capabilities.csv')
+        curr_hr = pd.read_csv(curr_hr_path).groupby('Officer_Category').agg(
+            {'Staff_Count': 'sum', 'Total_Mins_Per_Day': 'sum'}).reset_index()
+        curr_hr['Total_Minutes_Per_Year'] = curr_hr['Total_Mins_Per_Day'] * 365.25
+        curr_hr.drop(['Total_Mins_Per_Day'], axis=1, inplace=True)
+        return curr_hr.loc[curr_hr['Officer_Category'].isin(cadres), ['Officer_Category', 'Staff_Count']]
+
     # Get parameter/scenario names
     param_names = get_parameter_names_from_scenario_file()
 
+    # Get current (year of 2019) hr counts
+    curr_hr = get_current_hr(['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy'])
+
     # Get scale up factors for all scenarios
     scale_up_factors = extract_results(
         results_folder,

From 40219f4535f83ed7142584ac297fd9651b09f816 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 22 Aug 2024 15:58:45 +0100
Subject: [PATCH 066/218] get hr salary and update scale up factors

---
 ...pandsion_by_officer_type_with_extra_budget.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 4f8cb32b90..13b25284b8 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -223,12 +223,24 @@ def get_current_hr(cadres):
         curr_hr.drop(['Total_Mins_Per_Day'], axis=1, inplace=True)
         return curr_hr.loc[curr_hr['Officer_Category'].isin(cadres), ['Officer_Category', 'Staff_Count']]
 
+    def get_hr_salary(cadres):
+        """
+        Return annual salary for the cadres specified.
+        """
+        salary_path = Path(resourcefilepath
+                           / 'costing' / 'ResourceFile_Annual_Salary_Per_Cadre.csv')
+        salary = pd.read_csv(salary_path, index_col=False)
+        return salary.loc[salary['Officer_Category'].isin(cadres), ['Officer_Category', 'Annual_Salary_USD']]
+
     # Get parameter/scenario names
     param_names = get_parameter_names_from_scenario_file()
 
     # Get current (year of 2019) hr counts
     curr_hr = get_current_hr(['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy'])
 
+    # Get salary
+    salary = get_hr_salary(['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy'])
+
     # Get scale up factors for all scenarios
     scale_up_factors = extract_results(
         results_folder,
@@ -240,7 +252,9 @@ def get_current_hr(cadres):
     # check that the scale up factors are the same between each run within each draw
     assert scale_up_factors.eq(scale_up_factors.iloc[:, 0], axis=0).all().all()
     # keep scale up factors of only one run within each draw
-    scale_up_factors = scale_up_factors.iloc[:, 0].unstack()
+    scale_up_factors = scale_up_factors.iloc[:, 0].unstack().reset_index().melt(id_vars='Year of scaling up')
+    scale_up_factors[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy']] = scale_up_factors.value.tolist()
+    scale_up_factors.drop(columns='value', inplace=True)
 
     # Absolute Number of Deaths and DALYs and Services
     num_deaths = extract_results(

From c4a78988ae084a389468758b94d3fc11a2350f72 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 23 Aug 2024 00:17:45 +0100
Subject: [PATCH 067/218] get extra staff and extra cost

---
 ...dsion_by_officer_type_with_extra_budget.py | 32 +++++++++++++++----
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 13b25284b8..5aac621797 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -10,6 +10,7 @@
 from pathlib import Path
 from typing import Tuple
 
+import numpy
 import numpy as np
 import pandas as pd
 from matplotlib import pyplot as plt
@@ -221,7 +222,10 @@ def get_current_hr(cadres):
             {'Staff_Count': 'sum', 'Total_Mins_Per_Day': 'sum'}).reset_index()
         curr_hr['Total_Minutes_Per_Year'] = curr_hr['Total_Mins_Per_Day'] * 365.25
         curr_hr.drop(['Total_Mins_Per_Day'], axis=1, inplace=True)
-        return curr_hr.loc[curr_hr['Officer_Category'].isin(cadres), ['Officer_Category', 'Staff_Count']]
+        curr_hr = curr_hr.loc[
+            curr_hr['Officer_Category'].isin(cadres), ['Officer_Category', 'Staff_Count']
+        ].set_index('Officer_Category').T
+        return curr_hr[cadres]
 
     def get_hr_salary(cadres):
         """
@@ -230,16 +234,20 @@ def get_hr_salary(cadres):
         salary_path = Path(resourcefilepath
                            / 'costing' / 'ResourceFile_Annual_Salary_Per_Cadre.csv')
         salary = pd.read_csv(salary_path, index_col=False)
-        return salary.loc[salary['Officer_Category'].isin(cadres), ['Officer_Category', 'Annual_Salary_USD']]
+        salary = salary.loc[
+            salary['Officer_Category'].isin(cadres), ['Officer_Category', 'Annual_Salary_USD']
+        ].set_index('Officer_Category').T
+        return salary[cadres]
 
     # Get parameter/scenario names
     param_names = get_parameter_names_from_scenario_file()
 
     # Get current (year of 2019) hr counts
-    curr_hr = get_current_hr(['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy'])
+    cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy']
+    curr_hr = get_current_hr(cadres)
 
     # Get salary
-    salary = get_hr_salary(['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy'])
+    salary = get_hr_salary(cadres)
 
     # Get scale up factors for all scenarios
     scale_up_factors = extract_results(
@@ -253,9 +261,22 @@ def get_hr_salary(cadres):
     assert scale_up_factors.eq(scale_up_factors.iloc[:, 0], axis=0).all().all()
     # keep scale up factors of only one run within each draw
     scale_up_factors = scale_up_factors.iloc[:, 0].unstack().reset_index().melt(id_vars='Year of scaling up')
-    scale_up_factors[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy']] = scale_up_factors.value.tolist()
+    scale_up_factors[cadres] = scale_up_factors.value.tolist()
     scale_up_factors.drop(columns='value', inplace=True)
 
+    # Get total extra staff counts by officer type and total extra budget within the target period for all scenarios
+    years = range(2020, the_target_period[1].year + 1)
+    integrated_scale_up_factor = pd.DataFrame(index=list(param_names), columns=cadres)
+    for s in integrated_scale_up_factor.index:
+        integrated_scale_up_factor.loc[s] = scale_up_factors.loc[
+            (scale_up_factors['Year of scaling up'].isin(years)) & (scale_up_factors['draw'] == s), cadres
+        ].product()
+
+    total_staff = pd.DataFrame(integrated_scale_up_factor.mul(curr_hr.values, axis=1))
+    total_cost = pd.DataFrame(total_staff.mul(salary.values, axis=1))
+    extra_staff = pd.DataFrame(total_staff.subtract(total_staff.loc['s_1'], axis=1))
+    extra_cost = pd.DataFrame(total_cost.subtract(total_cost.loc['s_1'], axis=1))
+
     # Absolute Number of Deaths and DALYs and Services
     num_deaths = extract_results(
         results_folder,
@@ -512,7 +533,6 @@ def get_hr_salary(cadres):
     plt.close(fig)
 
     # todo
-    # get data of extra budget, extra staff
     # calculate return on investment
     # get and plot services by short treatment id
     # plot comparison results: there are negative changes of some appts and causes, try increase runs and see

From 1ebd47e23f8073cd7cee6bdb9e3cb9dc7e46a371 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 23 Aug 2024 12:56:49 +0100
Subject: [PATCH 068/218] mannual calculation of total cost and total staff
 counts for each year and each scenario

---
 ...e_hr_minute_salary_by_officer_type_facility_id.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
index e81a8b82f3..bca43154c8 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
@@ -42,7 +42,8 @@
 staff_cost['annual_cost'] = staff_cost['Staff_Count'] * staff_cost['Annual_Salary_USD']
 four_cadres_cost = staff_cost.loc[
     staff_cost.Officer_Category.isin(['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy'])].reset_index(drop=True)
-four_cadres_cost['cost_frac'] = four_cadres_cost['annual_cost'] / four_cadres_cost['annual_cost'].sum()
+four_cadres_cost['cost_frac'] = (four_cadres_cost['annual_cost'] / four_cadres_cost['annual_cost'].sum())
+# x = four_cadres_cost.loc[0, 'cost_frac'].as_integer_ratio()
 assert four_cadres_cost.cost_frac.sum() == 1
 
 
@@ -89,6 +90,15 @@ def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFram
     for y in range(2020, 2030):
         scale_up_factor_dict[s][y] = calculate_hr_scale_up_factor(list(extra_budget_frac_data[s]), y, s)
 
+# get the total cost and staff count for each year between 2020-2030 and each scenario
+total_cost = pd.DataFrame(index=range(2020, 2030), columns=extra_budget_frac_data.columns)
+total_staff = pd.DataFrame(index=range(2020, 2030), columns=extra_budget_frac_data.columns)
+for y in total_cost.index:
+    for s in extra_budget_frac_data.columns:
+        total_cost.loc[y, s] = scale_up_factor_dict[s][y].annual_cost.sum()
+        total_staff.loc[y, s] = scale_up_factor_dict[s][y].Staff_Count.sum()
+
+
 # save and read pickle file
 pickle_file_path = Path(resourcefilepath / 'healthsystem' / 'human_resources' / 'scaling_capabilities' /
                         'ResourceFile_HR_expansion_by_officer_type_yearly_scale_up_factors.pickle')

From d4d89e0c3969bd6de87eb7f61b9271c69751bb1b Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 23 Aug 2024 12:58:39 +0100
Subject: [PATCH 069/218] calculate total cost and staff counts by the end of
 target period

---
 ...is_hr_expandsion_by_officer_type_with_extra_budget.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 5aac621797..e948ca65f7 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -274,9 +274,17 @@ def get_hr_salary(cadres):
 
     total_staff = pd.DataFrame(integrated_scale_up_factor.mul(curr_hr.values, axis=1))
     total_cost = pd.DataFrame(total_staff.mul(salary.values, axis=1))
+    total_staff['all_four_cadres'] = total_staff.sum(axis=1)
+    total_cost['all_four_cadres'] = total_cost.sum(axis=1)
+
     extra_staff = pd.DataFrame(total_staff.subtract(total_staff.loc['s_1'], axis=1))
     extra_cost = pd.DataFrame(total_cost.subtract(total_cost.loc['s_1'], axis=1))
 
+    # check total cost calculated is increased as expected - approximate float of a fraction can sacrifice some budget
+    # for s in param_names[1:]:
+    #     assert abs(total_cost.loc[s, 'all_four_cadres'] -
+    #                (1 + 0.042) ** (len(years)) * total_cost.loc['s_1', 'all_four_cadres']) < 1e6
+
     # Absolute Number of Deaths and DALYs and Services
     num_deaths = extract_results(
         results_folder,
@@ -539,6 +547,7 @@ def get_hr_salary(cadres):
     # as we have 17 scenarios in total, \
     # design comparison groups of scenarios to examine marginal/combined productivity of cadres
     # do update HRScaling logger: year_of_scale_up, scale_up_factor, and get_scale_up_factor function
+    # update the extra budget fraction file so that floats have more digits, more close to the expected fractions.
 
 
 if __name__ == "__main__":

From 6e5f345efd3979ee8d5652b6244609d5403f90c1 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 27 Aug 2024 11:37:23 +0100
Subject: [PATCH 070/218] Calculate and plot ROI and CE

---
 ...dsion_by_officer_type_with_extra_budget.py | 80 +++++++++++++------
 1 file changed, 57 insertions(+), 23 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index e948ca65f7..6c1e543e35 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -158,9 +158,11 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
         xticks = {(i + 0.5): k for i, k in enumerate(_df.index)}
 
         # Define colormap (used only with option `put_labels_in_legend=True`)
+        # todo: could refine colors for each scenario once scenarios are confirmed
         cmap = plt.get_cmap("tab20")
         rescale = lambda y: (y - np.min(y)) / (np.max(y) - np.min(y))  # noqa: E731
-        colors = list(map(cmap, rescale(np.array(list(xticks.keys()))))) if put_labels_in_legend else None
+        colors = list(map(cmap, rescale(np.array(list(xticks.keys()))))) if put_labels_in_legend and len(xticks) > 1 \
+            else None
 
         fig, ax = plt.subplots(figsize=(10, 5))
         ax.bar(
@@ -180,13 +182,12 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
         ax.set_xticks(list(xticks.keys()))
 
         if put_labels_in_legend:
-            # Update xticks label with substitute labels
-            # Insert legend with updated labels that shows correspondence between substitute label and original label
-            xtick_values = [substitute_labels[v] for v in xticks.values()]
+            # Set x-axis labels as simple scenario names
+            # Insert legend to explain scenarios
             xtick_legend = [f'{v}: {substitute_labels[v]}' for v in xticks.values()]
             h, _ = ax.get_legend_handles_labels()
             ax.legend(h, xtick_legend, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5))
-            ax.set_xticklabels(list(xtick_values))
+            ax.set_xticklabels(list(xticks.values()))
         else:
             if not xticklabels_horizontal_and_wrapped:
                 # xticklabels will be vertical and not wrapped
@@ -277,10 +278,11 @@ def get_hr_salary(cadres):
     total_staff['all_four_cadres'] = total_staff.sum(axis=1)
     total_cost['all_four_cadres'] = total_cost.sum(axis=1)
 
-    extra_staff = pd.DataFrame(total_staff.subtract(total_staff.loc['s_1'], axis=1))
-    extra_cost = pd.DataFrame(total_cost.subtract(total_cost.loc['s_1'], axis=1))
+    extra_staff = pd.DataFrame(total_staff.subtract(total_staff.loc['s_1'], axis=1).drop(index='s_1').all_four_cadres)
+    extra_cost = pd.DataFrame(total_cost.subtract(total_cost.loc['s_1'], axis=1).drop(index='s_1').all_four_cadres)
 
     # check total cost calculated is increased as expected - approximate float of a fraction can sacrifice some budget
+    # to run the following checks once the approximate float issue is solved
     # for s in param_names[1:]:
     #     assert abs(total_cost.loc[s, 'all_four_cadres'] -
     #                (1 + 0.042) ** (len(years)) * total_cost.loc['s_1', 'all_four_cadres']) < 1e6
@@ -392,6 +394,17 @@ def get_hr_salary(cadres):
          ) < 1e-6
     ).all()
 
+    # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
+    # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
+    ROI = pd.DataFrame(index=num_deaths_averted.index, columns=num_dalys_averted.columns)
+    CE = pd.DataFrame(index=num_deaths_averted.index, columns=num_dalys_averted.columns)
+    assert (ROI.index == extra_cost.index).all()
+    for i in ROI.index:
+        ROI.loc[i, :] = num_dalys_averted.loc[i, :] / extra_cost.loc[i, 'all_four_cadres']
+        CE.loc[i, 'mean'] = extra_cost.loc[i, 'all_four_cadres'] / num_dalys_averted.loc[i, 'mean']
+        CE.loc[i, 'lower'] = extra_cost.loc[i, 'all_four_cadres'] / num_dalys_averted.loc[i, 'upper']
+        CE.loc[i, 'upper'] = extra_cost.loc[i, 'all_four_cadres'] / num_dalys_averted.loc[i, 'lower']
+
     # prepare colors for plots
     appt_color = {
         appt: COARSE_APPT_TYPE_TO_COLOR_MAP.get(appt, np.nan) for appt in num_appts_summarized.columns
@@ -403,16 +416,6 @@ def get_hr_salary(cadres):
 
     # plot absolute numbers for scenarios
 
-    name_of_plot = f'Services, {target_period()}'
-    fig, ax = do_bar_plot_with_ci(num_services_summarized / 1e6, xticklabels_horizontal_and_wrapped=True,
-                                  put_labels_in_legend=True)
-    ax.set_title(name_of_plot)
-    ax.set_ylabel('(Millions)')
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
-
     name_of_plot = f'Deaths, {target_period()}'
     fig, ax = do_bar_plot_with_ci(num_deaths_summarized / 1e6, xticklabels_horizontal_and_wrapped=True,
                                   put_labels_in_legend=True)
@@ -487,6 +490,15 @@ def get_hr_salary(cadres):
     plt.close(fig)
 
     # plot relative numbers for scenarios
+    name_of_plot = f'DALYs averted, {target_period()}'
+    fig, ax = do_bar_plot_with_ci(num_dalys_averted / 1e6, xticklabels_horizontal_and_wrapped=True,
+                                  put_labels_in_legend=True)
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('(Millions)')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
 
     name_of_plot = f'Services increased by appointment type, {target_period()}'
     num_appts_increased_in_millions = num_appts_increased / 1e6
@@ -540,14 +552,36 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
+    # plot ROI and CE for all expansion scenarios
+
+    name_of_plot = f'DALYs averted per extra USD dollar invested, {target_period()}'
+    fig, ax = do_bar_plot_with_ci(ROI, xticklabels_horizontal_and_wrapped=True,
+                                  put_labels_in_legend=True)
+    ax.set_title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Cost per every DALYs averted, {target_period()}'
+    fig, ax = do_bar_plot_with_ci(CE, xticklabels_horizontal_and_wrapped=True,
+                                  put_labels_in_legend=True)
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('USD dollars')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     # todo
-    # calculate return on investment
-    # get and plot services by short treatment id
-    # plot comparison results: there are negative changes of some appts and causes, try increase runs and see
-    # as we have 17 scenarios in total, \
+    # Get and plot services by short treatment id?
+    # Plot comparison results: there are negative changes of some appts and causes, try increase runs and see
+    # As we have 17 scenarios in total, \
     # design comparison groups of scenarios to examine marginal/combined productivity of cadres
-    # do update HRScaling logger: year_of_scale_up, scale_up_factor, and get_scale_up_factor function
-    # update the extra budget fraction file so that floats have more digits, more close to the expected fractions.
+    # Do update HRScaling logger: year_of_scale_up, scale_up_factor, and get_scale_up_factor function
+    # Update extra budget fraction scenarios so that floats have more digits, more close to the expected fractions?
+    # Update extra budget fraction scenarios so that fractions always reflect cost distributions among two/three/four cadres?
+    # As it is analysis of 10 year results, it would be better to consider increasing annual/minute salary?
 
 
 if __name__ == "__main__":

From 1e9113260586afdf5be20a4528161b84063d7c85 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 27 Aug 2024 11:43:41 +0100
Subject: [PATCH 071/218] fix failing checks and plot deaths averted

---
 ...xpandsion_by_officer_type_with_extra_budget.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 6c1e543e35..cf6765f4ac 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -10,7 +10,6 @@
 from pathlib import Path
 from typing import Tuple
 
-import numpy
 import numpy as np
 import pandas as pd
 from matplotlib import pyplot as plt
@@ -275,10 +274,10 @@ def get_hr_salary(cadres):
 
     total_staff = pd.DataFrame(integrated_scale_up_factor.mul(curr_hr.values, axis=1))
     total_cost = pd.DataFrame(total_staff.mul(salary.values, axis=1))
-    total_staff['all_four_cadres'] = total_staff.sum(axis=1)
+    # total_staff['all_four_cadres'] = total_staff.sum(axis=1)
     total_cost['all_four_cadres'] = total_cost.sum(axis=1)
 
-    extra_staff = pd.DataFrame(total_staff.subtract(total_staff.loc['s_1'], axis=1).drop(index='s_1').all_four_cadres)
+    # extra_staff = pd.DataFrame(total_staff.subtract(total_staff.loc['s_1'], axis=1).drop(index='s_1').all_four_cadres)
     extra_cost = pd.DataFrame(total_cost.subtract(total_cost.loc['s_1'], axis=1).drop(index='s_1').all_four_cadres)
 
     # check total cost calculated is increased as expected - approximate float of a fraction can sacrifice some budget
@@ -500,6 +499,16 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'Deaths averted, {target_period()}'
+    fig, ax = do_bar_plot_with_ci(num_deaths_averted / 1e6, xticklabels_horizontal_and_wrapped=True,
+                                  put_labels_in_legend=True)
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('(Millions)')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     name_of_plot = f'Services increased by appointment type, {target_period()}'
     num_appts_increased_in_millions = num_appts_increased / 1e6
     yerr_services = np.array([

From 5d003d6fcdb176e2d23494334bce488df3caa2dc Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 27 Aug 2024 17:45:35 +0100
Subject: [PATCH 072/218] add todo

---
 .../analysis_hr_expandsion_by_officer_type_with_extra_budget.py  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index cf6765f4ac..baed6136aa 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -27,6 +27,7 @@
 )
 
 # rename scenarios
+# todo: to update once scenarios confirmed
 substitute_labels = {
     's_1': 'no_expansion',
     's_2': 'CDNP_expansion_current',

From 22483a046e0882d3d9d0dadcd873b6e0612acb10 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 27 Aug 2024 18:03:01 +0100
Subject: [PATCH 073/218] prepare scenarios of fractions that reflect current
 cost distribution among two/three/four cadres

---
 ...nute_salary_by_officer_type_facility_id.py | 75 +++++++++++++++----
 1 file changed, 61 insertions(+), 14 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
index bca43154c8..1a325573d5 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
@@ -1,9 +1,11 @@
 """
 We calculate the salar cost of current and funded plus HCW.
 """
+import itertools
 import pickle
 from pathlib import Path
 
+import numpy as np
 import pandas as pd
 
 resourcefilepath = Path('./resources')
@@ -46,6 +48,37 @@
 # x = four_cadres_cost.loc[0, 'cost_frac'].as_integer_ratio()
 assert four_cadres_cost.cost_frac.sum() == 1
 
+# Calculate the current cost distribution of one/two/three/four cadres and define them as scenarios
+# We confirmed/can prove that in such expansion scenarios of two/three/four cadres,
+# the annual scale up factors are actually equal for cadres,
+# equal to 1 + annual extra cost / total current cost of two/three/four cadres.
+# One possible issue is that Pharmacy cost has only small fractions in all multi-cadre scenarios,
+# as its current fraction is small; we have estimated that Pharmacy cadre is extremely in shortage,
+# thus these scenarios might still face huge shortages.
+cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy']
+combination_list = ['']
+for n in range(1, len(cadres)+1):
+    for subset in itertools.combinations(cadres, n):
+        combination_list.append(str(subset))
+
+cadre_to_expand = pd.DataFrame(index=cadres, columns=combination_list).fillna(0)
+cadre_to_expand.loc[:, ''] = 0  # no_expansion scenario
+for c in cadres:
+    for i in cadre_to_expand.columns:
+        if c in i:
+            cadre_to_expand.loc[c, i] = staff_cost.loc[staff_cost.Officer_Category == c, 'annual_cost'].values[0]
+
+extra_budget_frac = pd.DataFrame(index=cadre_to_expand.index, columns=cadre_to_expand.columns).fillna(0)
+for i in extra_budget_frac.columns[1:]:
+    extra_budget_frac.loc[:, i] = cadre_to_expand.loc[:, i] / cadre_to_expand.loc[:, i].sum()
+
+assert (abs(extra_budget_frac.iloc[:, 1:len(extra_budget_frac.columns)].sum(axis=0) - 1.0) < 1/1e10).all()
+
+simple_scenario_name = {}
+for i in range(len(extra_budget_frac.columns)):
+    simple_scenario_name[extra_budget_frac.columns[i]] = 's_' + str(i+1)  # name scenario from s_1
+extra_budget_frac.rename(columns=simple_scenario_name, inplace=True)
+
 
 # calculate hr scale up factor for years 2020-2030 (10 years in total) outside the healthsystem module
 
@@ -77,10 +110,11 @@ def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFram
 
 
 # calculate scale up factors for all defined scenarios and years
-extra_budget_frac_data = pd.read_csv(resourcefilepath
-                                     / 'healthsystem' / 'human_resources' / 'scaling_capabilities'
-                                     / 'ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv'
-                                     ).set_index('Officer_Category')
+# extra_budget_frac_data = pd.read_csv(resourcefilepath
+#                                      / 'healthsystem' / 'human_resources' / 'scaling_capabilities'
+#                                      / 'ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv'
+#                                      ).set_index('Officer_Category')
+extra_budget_frac_data = extra_budget_frac.copy()
 four_cadres_cost['scale_up_factor'] = 1
 scale_up_factor_dict = {s: {y: {} for y in range(2019, 2030)} for s in extra_budget_frac_data.columns}
 for s in extra_budget_frac_data.columns:
@@ -98,13 +132,26 @@ def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFram
         total_cost.loc[y, s] = scale_up_factor_dict[s][y].annual_cost.sum()
         total_staff.loc[y, s] = scale_up_factor_dict[s][y].Staff_Count.sum()
 
-
-# save and read pickle file
-pickle_file_path = Path(resourcefilepath / 'healthsystem' / 'human_resources' / 'scaling_capabilities' /
-                        'ResourceFile_HR_expansion_by_officer_type_yearly_scale_up_factors.pickle')
-
-with open(pickle_file_path, 'wb') as f:
-    pickle.dump(scale_up_factor_dict, f)
-
-with open(pickle_file_path, 'rb') as f:
-    x = pickle.load(f)
+# check the total cost after 10 years are increased as expected
+assert (
+    abs(total_cost.loc[2029, total_cost.columns[1:]] - (1 + 0.042) ** 10 * total_cost.loc[2029, 's_1']) < 1/1e6
+).all()
+
+# get the integrated scale up factors for year 2029 and each scenario
+integrated_scale_up_factor = pd.DataFrame(index=cadres, columns=total_cost.columns).fillna(1.0)
+for s in total_cost.columns[1:]:
+    for yr in range(2020, 2030):
+        integrated_scale_up_factor.loc[:, s] = np.multiply(
+            integrated_scale_up_factor.loc[:, s].values,
+            scale_up_factor_dict[s][yr].loc[:, 'scale_up_factor'].values
+        )
+
+# # save and read pickle file
+# pickle_file_path = Path(resourcefilepath / 'healthsystem' / 'human_resources' / 'scaling_capabilities' /
+#                         'ResourceFile_HR_expansion_by_officer_type_yearly_scale_up_factors.pickle')
+#
+# with open(pickle_file_path, 'wb') as f:
+#     pickle.dump(scale_up_factor_dict, f)
+#
+# with open(pickle_file_path, 'rb') as f:
+#     x = pickle.load(f)

From aae8d7b206e92cf42db265421939c7e8adadb5c0 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 27 Aug 2024 22:09:47 +0100
Subject: [PATCH 074/218] instead of save and reload extra budget fractions
 data, try directly import the results from a script

---
 ...nute_salary_by_officer_type_facility_id.py | 31 ++++++++-----------
 ...t_hcw_by_officer_type_with_extra_budget.py | 14 ++++-----
 2 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
index 1a325573d5..c03388ef1d 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
@@ -68,16 +68,16 @@
         if c in i:
             cadre_to_expand.loc[c, i] = staff_cost.loc[staff_cost.Officer_Category == c, 'annual_cost'].values[0]
 
-extra_budget_frac = pd.DataFrame(index=cadre_to_expand.index, columns=cadre_to_expand.columns).fillna(0)
-for i in extra_budget_frac.columns[1:]:
-    extra_budget_frac.loc[:, i] = cadre_to_expand.loc[:, i] / cadre_to_expand.loc[:, i].sum()
+extra_budget_fracs = pd.DataFrame(index=cadre_to_expand.index, columns=cadre_to_expand.columns).fillna(0)
+for i in extra_budget_fracs.columns[1:]:
+    extra_budget_fracs.loc[:, i] = cadre_to_expand.loc[:, i] / cadre_to_expand.loc[:, i].sum()
 
-assert (abs(extra_budget_frac.iloc[:, 1:len(extra_budget_frac.columns)].sum(axis=0) - 1.0) < 1/1e10).all()
+assert (abs(extra_budget_fracs.iloc[:, 1:len(extra_budget_fracs.columns)].sum(axis=0) - 1.0) < 1/1e10).all()
 
 simple_scenario_name = {}
-for i in range(len(extra_budget_frac.columns)):
-    simple_scenario_name[extra_budget_frac.columns[i]] = 's_' + str(i+1)  # name scenario from s_1
-extra_budget_frac.rename(columns=simple_scenario_name, inplace=True)
+for i in range(len(extra_budget_fracs.columns)):
+    simple_scenario_name[extra_budget_fracs.columns[i]] = 's_' + str(i+1)  # name scenario from s_1
+extra_budget_fracs.rename(columns=simple_scenario_name, inplace=True)
 
 
 # calculate hr scale up factor for years 2020-2030 (10 years in total) outside the healthsystem module
@@ -110,25 +110,20 @@ def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFram
 
 
 # calculate scale up factors for all defined scenarios and years
-# extra_budget_frac_data = pd.read_csv(resourcefilepath
-#                                      / 'healthsystem' / 'human_resources' / 'scaling_capabilities'
-#                                      / 'ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv'
-#                                      ).set_index('Officer_Category')
-extra_budget_frac_data = extra_budget_frac.copy()
 four_cadres_cost['scale_up_factor'] = 1
-scale_up_factor_dict = {s: {y: {} for y in range(2019, 2030)} for s in extra_budget_frac_data.columns}
-for s in extra_budget_frac_data.columns:
+scale_up_factor_dict = {s: {y: {} for y in range(2019, 2030)} for s in extra_budget_fracs.columns}
+for s in extra_budget_fracs.columns:
     # for the initial/current year of 2019
     scale_up_factor_dict[s][2019] = four_cadres_cost.drop(columns='cost_frac').copy()
     # for the years with scaled up hr
     for y in range(2020, 2030):
-        scale_up_factor_dict[s][y] = calculate_hr_scale_up_factor(list(extra_budget_frac_data[s]), y, s)
+        scale_up_factor_dict[s][y] = calculate_hr_scale_up_factor(list(extra_budget_fracs[s]), y, s)
 
 # get the total cost and staff count for each year between 2020-2030 and each scenario
-total_cost = pd.DataFrame(index=range(2020, 2030), columns=extra_budget_frac_data.columns)
-total_staff = pd.DataFrame(index=range(2020, 2030), columns=extra_budget_frac_data.columns)
+total_cost = pd.DataFrame(index=range(2020, 2030), columns=extra_budget_fracs.columns)
+total_staff = pd.DataFrame(index=range(2020, 2030), columns=extra_budget_fracs.columns)
 for y in total_cost.index:
-    for s in extra_budget_frac_data.columns:
+    for s in extra_budget_fracs.columns:
         total_cost.loc[y, s] = scale_up_factor_dict[s][y].annual_cost.sum()
         total_staff.loc[y, s] = scale_up_factor_dict[s][y].Staff_Count.sum()
 
diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index da49ed85b9..3c4b58b99e 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -16,8 +16,9 @@
 from pathlib import Path
 from typing import Dict
 
-import pandas as pd
-
+from scripts.healthsystem.impact_of_hcw_capabilities_expansion.create_hr_minute_salary_by_officer_type_facility_id import (
+    extra_budget_fracs,
+)
 from tlo import Date, logging
 from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios
 from tlo.methods.fullmodel import fullmodel
@@ -61,10 +62,7 @@ def _get_scenarios(self) -> Dict[str, Dict]:
 
         self.YEAR_OF_CHANGE = 2020  # This is the year to change run settings and to start hr expansion.
 
-        self.scenarios = pd.read_csv(Path('./resources')
-                                     / 'healthsystem' / 'human_resources' / 'scaling_capabilities'
-                                     / 'ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv'
-                                     ).set_index('Officer_Category')  # do we need 'self' or not?
+        self.scenarios = extra_budget_fracs
 
         return {
             self.scenarios.columns[i]:
@@ -74,7 +72,7 @@ def _get_scenarios(self) -> Dict[str, Dict]:
                         'HR_expansion_by_officer_type': list(self.scenarios.iloc[:, i])
                     }
                     }
-                ) for i in range(len(self.scenarios.columns) - 15)
+                ) for i in range(len(self.scenarios.columns))
         }
 
     def _baseline(self) -> Dict:
@@ -83,7 +81,7 @@ def _baseline(self) -> Dict:
             {'HealthSystem': {
                 'mode_appt_constraints': 1,
                 'mode_appt_constraints_postSwitch': 2,
-                "scale_to_effective_capabilities": True,  # todo: TBC; will this change the capabilities of 2019?
+                # "scale_to_effective_capabilities": True,  # todo: TBC; will this change the capabilities of 2019?
                 "year_mode_switch": self.YEAR_OF_CHANGE,
                 'cons_availability': 'default',
                 'cons_availability_postSwitch': 'all',

From 5726154a1247f38b8695a79261e4f3052ae48537 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 27 Aug 2024 22:15:21 +0100
Subject: [PATCH 075/218] rename file

---
 ...d.py => prepare_minute_salary_and_extra_budget_frac_data.py} | 0
 ...f_expanding_current_hcw_by_officer_type_with_extra_budget.py | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/{create_hr_minute_salary_by_officer_type_facility_id.py => prepare_minute_salary_and_extra_budget_frac_data.py} (100%)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
similarity index 100%
rename from src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/create_hr_minute_salary_by_officer_type_facility_id.py
rename to src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 3c4b58b99e..b639749a9b 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -16,7 +16,7 @@
 from pathlib import Path
 from typing import Dict
 
-from scripts.healthsystem.impact_of_hcw_capabilities_expansion.create_hr_minute_salary_by_officer_type_facility_id import (
+from scripts.healthsystem.impact_of_hcw_capabilities_expansion.prepare_minute_salary_and_extra_budget_frac_data import (
     extra_budget_fracs,
 )
 from tlo import Date, logging

From ec6332ac37cb2f7d466dddc1136230c839ba13d9 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 27 Aug 2024 22:21:42 +0100
Subject: [PATCH 076/218] update todo list

---
 ...hr_expandsion_by_officer_type_with_extra_budget.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index baed6136aa..8ee1995664 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -158,7 +158,7 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
         xticks = {(i + 0.5): k for i, k in enumerate(_df.index)}
 
         # Define colormap (used only with option `put_labels_in_legend=True`)
-        # todo: could refine colors for each scenario once scenarios are confirmed
+        # todo: could re-define colors for each scenario once scenarios are confirmed
         cmap = plt.get_cmap("tab20")
         rescale = lambda y: (y - np.min(y)) / (np.max(y) - np.min(y))  # noqa: E731
         colors = list(map(cmap, rescale(np.array(list(xticks.keys()))))) if put_labels_in_legend and len(xticks) > 1 \
@@ -584,14 +584,13 @@ def get_hr_salary(cadres):
     plt.close(fig)
 
     # todo
-    # Get and plot services by short treatment id?
+    # Do update healthsystem HRScaling logger: year_of_scale_up, scale_up_factor, and get_scale_up_factor function here
     # Plot comparison results: there are negative changes of some appts and causes, try increase runs and see
-    # As we have 17 scenarios in total, \
+    # As we have 16 scenarios in total, \
     # design comparison groups of scenarios to examine marginal/combined productivity of cadres
-    # Do update HRScaling logger: year_of_scale_up, scale_up_factor, and get_scale_up_factor function
-    # Update extra budget fraction scenarios so that floats have more digits, more close to the expected fractions?
-    # Update extra budget fraction scenarios so that fractions always reflect cost distributions among two/three/four cadres?
+    # To design more scenarios so that Pharmacy cadre can be expanded more than the 16 scenarios?
     # As it is analysis of 10 year results, it would be better to consider increasing annual/minute salary?
+    # Get and plot services by short treatment id?
 
 
 if __name__ == "__main__":

From d30f73ceb97548184a17880e0a085939e2979dee Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 27 Aug 2024 22:23:30 +0100
Subject: [PATCH 077/218] update todo list

---
 .../analysis_hr_expandsion_by_officer_type_with_extra_budget.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 8ee1995664..0715143a90 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -282,7 +282,7 @@ def get_hr_salary(cadres):
     extra_cost = pd.DataFrame(total_cost.subtract(total_cost.loc['s_1'], axis=1).drop(index='s_1').all_four_cadres)
 
     # check total cost calculated is increased as expected - approximate float of a fraction can sacrifice some budget
-    # to run the following checks once the approximate float issue is solved
+    # todo: to run the following checks once the scenarios are confirmed and re-run
     # for s in param_names[1:]:
     #     assert abs(total_cost.loc[s, 'all_four_cadres'] -
     #                (1 + 0.042) ** (len(years)) * total_cost.loc['s_1', 'all_four_cadres']) < 1e6

From a9f4e55786c6f4c4019aae1f6445088fd1d6ef51 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 27 Aug 2024 22:56:28 +0100
Subject: [PATCH 078/218] temporarily fix param_names to be consistent with
 latest scenario run output, so that the analysis can run

---
 ...f_expanding_current_hcw_by_officer_type_with_extra_budget.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index b639749a9b..ef35c22d59 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -72,7 +72,7 @@ def _get_scenarios(self) -> Dict[str, Dict]:
                         'HR_expansion_by_officer_type': list(self.scenarios.iloc[:, i])
                     }
                     }
-                ) for i in range(len(self.scenarios.columns))
+                ) for i in range(len(self.scenarios.columns) - 14)
         }
 
     def _baseline(self) -> Dict:

From 9784abdb7191b222a79a4f843222781fb4d694f0 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 27 Aug 2024 23:03:05 +0100
Subject: [PATCH 079/218] delete unnecessary file

---
 ...rceFile_HR_expansion_by_officer_type_given_extra_budget.csv | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv

diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv
deleted file mode 100644
index 265e58141d..0000000000
--- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_HR_expansion_by_officer_type_given_extra_budget.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1f910b61227901d43dbf8c1a5101dfc4490206ec23381715b546db14e267ac25
-size 363

From f36015e80ba7ad26898628bd7703ac44b9d8d981 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 27 Aug 2024 23:12:23 +0100
Subject: [PATCH 080/218] fix failed checks

---
 .../prepare_minute_salary_and_extra_budget_frac_data.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index c03388ef1d..49289a5ef0 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -2,7 +2,7 @@
 We calculate the salar cost of current and funded plus HCW.
 """
 import itertools
-import pickle
+# import pickle
 from pathlib import Path
 
 import numpy as np

From d205aa456d7bab4f688f3c833adf2570ad53a45a Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 28 Aug 2024 11:54:13 +0100
Subject: [PATCH 081/218] name logger elements consistently

---
 src/tlo/methods/healthsystem.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 780fc44389..ef6e43c702 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -3043,8 +3043,8 @@ def apply(self, population):
                                         'Clinical, DCSA, Nursing_and_Midwifery, Pharmacy - '
                                         'given fractions of an extra budget',
                             data={
-                                'Scale up factor': [sf_clinical, sf_dcsa, sf_nursing, sf_pharmacy],
-                                'Year of scaling up': self.sim.date.year,
+                                'scale_up_factor': [sf_clinical, sf_dcsa, sf_nursing, sf_pharmacy],
+                                'year_of_scale_up': self.sim.date.year,
                             }
                             )
 

From d728c1e9cd3ac2219ef67121b51c1ee18b737184 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 28 Aug 2024 11:54:27 +0100
Subject: [PATCH 082/218] update todo

---
 ...nalysis_hr_expandsion_by_officer_type_with_extra_budget.py | 2 +-
 .../prepare_minute_salary_and_extra_budget_frac_data.py       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 0715143a90..4cb45e3912 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -208,6 +208,7 @@ def get_scale_up_factor(_df):
         Return a series of yearly scale up factors for four cadres - Clinical, DCSA, Nursing_and_Midwifery, Pharmacy,
         with index of year and value of list of the four scale up factors.
         """
+        # todo: once job re-run, update columns name as the logger recorded: year_of_scale_up, scale_up_factor
         _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['Year of scaling up', 'Scale up factor']]
         return pd.Series(
             _df['Scale up factor'].values, index=_df['Year of scaling up']
@@ -584,7 +585,6 @@ def get_hr_salary(cadres):
     plt.close(fig)
 
     # todo
-    # Do update healthsystem HRScaling logger: year_of_scale_up, scale_up_factor, and get_scale_up_factor function here
     # Plot comparison results: there are negative changes of some appts and causes, try increase runs and see
     # As we have 16 scenarios in total, \
     # design comparison groups of scenarios to examine marginal/combined productivity of cadres
diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index 49289a5ef0..42616d5965 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -52,9 +52,9 @@
 # We confirmed/can prove that in such expansion scenarios of two/three/four cadres,
 # the annual scale up factors are actually equal for cadres,
 # equal to 1 + annual extra cost / total current cost of two/three/four cadres.
-# One possible issue is that Pharmacy cost has only small fractions in all multi-cadre scenarios,
+# todo: One possible issue is that Pharmacy cost has only small fractions in all multi-cadre scenarios,
 # as its current fraction is small; we have estimated that Pharmacy cadre is extremely in shortage,
-# thus these scenarios might still face huge shortages.
+# thus these scenarios might still face huge shortages (However, we can not estimate hcw shortage in mode 2?).
 cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy']
 combination_list = ['']
 for n in range(1, len(cadres)+1):

From ce07e94db0e5fc1cc00d44190993ed0d7d97cee1 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 28 Aug 2024 13:11:02 +0100
Subject: [PATCH 083/218] update plot name

---
 .../analysis_hr_expandsion_by_officer_type_with_extra_budget.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 4cb45e3912..7c81bf6626 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -574,7 +574,7 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Cost per every DALYs averted, {target_period()}'
+    name_of_plot = f'Cost per DALY averted, {target_period()}'
     fig, ax = do_bar_plot_with_ci(CE, xticklabels_horizontal_and_wrapped=True,
                                   put_labels_in_legend=True)
     ax.set_title(name_of_plot)

From b0aec4fa184bdcb8074d499a4ba3ae0e5255d422 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 28 Aug 2024 14:56:06 +0100
Subject: [PATCH 084/218] update todo

---
 ...f_expanding_current_hcw_by_officer_type_with_extra_budget.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index ef35c22d59..0fae1468c2 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -84,7 +84,7 @@ def _baseline(self) -> Dict:
                 # "scale_to_effective_capabilities": True,  # todo: TBC; will this change the capabilities of 2019?
                 "year_mode_switch": self.YEAR_OF_CHANGE,
                 'cons_availability': 'default',
-                'cons_availability_postSwitch': 'all',
+                'cons_availability_postSwitch': 'all',  # todo: how to argue for this setting?
                 'year_cons_availability_switch': self.YEAR_OF_CHANGE,
                 'yearly_HR_scaling_mode': 'no_scaling',
                 'start_year_HR_expansion_by_officer_type': self.YEAR_OF_CHANGE,

From c22495a0bef706be914ff9b8715858571f72d399 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 28 Aug 2024 15:03:27 +0100
Subject: [PATCH 085/218] plot numbers of staff and budget

---
 ...dsion_by_officer_type_with_extra_budget.py | 94 ++++++++++++++++++-
 1 file changed, 92 insertions(+), 2 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 7c81bf6626..50e1ebfa67 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -276,12 +276,29 @@ def get_hr_salary(cadres):
 
     total_staff = pd.DataFrame(integrated_scale_up_factor.mul(curr_hr.values, axis=1))
     total_cost = pd.DataFrame(total_staff.mul(salary.values, axis=1))
-    # total_staff['all_four_cadres'] = total_staff.sum(axis=1)
+    total_staff['all_four_cadres'] = total_staff.sum(axis=1)
     total_cost['all_four_cadres'] = total_cost.sum(axis=1)
 
-    # extra_staff = pd.DataFrame(total_staff.subtract(total_staff.loc['s_1'], axis=1).drop(index='s_1').all_four_cadres)
     extra_cost = pd.DataFrame(total_cost.subtract(total_cost.loc['s_1'], axis=1).drop(index='s_1').all_four_cadres)
 
+    extra_staff_by_cadre = pd.DataFrame(
+        total_staff.subtract(total_staff.loc['s_1'], axis=1).drop(index='s_1').drop(columns='all_four_cadres')
+    )
+    extra_cost_by_cadre = pd.DataFrame(
+        total_cost.subtract(total_cost.loc['s_1'], axis=1).drop(index='s_1').drop(columns='all_four_cadres')
+    )
+
+    # As checked below, the increase percentages per cadre should be equal to each other and to the overall percentage
+    # because we set the extra budget fractions the same as the current cost distribution. Especially, in the scenario
+    # of expanding all four cadres, the yearly percentage increase if 4.2%, which is exactly the budget increasing rate.
+    # staff_increase_percents = pd.DataFrame(
+    #     total_staff.subtract(
+    #         total_staff.loc['s_1'], axis=1
+    #     ).divide(
+    #         total_staff.loc['s_1'], axis=1
+    #     ).multiply(100).drop(index='s_1')
+    # )
+
     # check total cost calculated is increased as expected - approximate float of a fraction can sacrifice some budget
     # todo: to run the following checks once the scenarios are confirmed and re-run
     # for s in param_names[1:]:
@@ -414,6 +431,12 @@ def get_hr_salary(cadres):
         cause: CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.get(cause, np.nan)
         for cause in num_dalys_by_cause_summarized.columns
     }
+    officer_category_color = {
+        'Clinical': 'blue',
+        'DCSA': 'orange',
+        'Nursing_and_Midwifery': 'red',
+        'Pharmacy': 'green'
+    }
 
     # plot absolute numbers for scenarios
 
@@ -459,6 +482,38 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'Number of staff by cadre, {target_period()}'
+    total_staff_to_plot = (total_staff / 1000).drop(columns='all_four_cadres')
+    fig, ax = plt.subplots()
+    total_staff_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Thousands', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in total_staff_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Total budget in USD dollars by cadre, {target_period()}'
+    total_cost_to_plot = (total_cost / 1e6).drop(columns='all_four_cadres')
+    fig, ax = plt.subplots()
+    total_cost_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in total_cost_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     name_of_plot = f'DALYs by cause, {target_period()}'
     num_dalys_by_cause_summarized_in_millions = num_dalys_by_cause_summarized / 1e6
     yerr_dalys = np.array([
@@ -511,6 +566,38 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'Extra staff by cadre, {target_period()}'
+    extra_staff_by_cadre_to_plot = extra_staff_by_cadre / 1e3
+    fig, ax = plt.subplots()
+    extra_staff_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Thousands', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in extra_staff_by_cadre_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small')
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Extra budget by cadre, {target_period()}'
+    extra_cost_by_cadre_to_plot = extra_cost_by_cadre / 1e6
+    fig, ax = plt.subplots()
+    extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small')
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     name_of_plot = f'Services increased by appointment type, {target_period()}'
     num_appts_increased_in_millions = num_appts_increased / 1e6
     yerr_services = np.array([
@@ -590,7 +677,10 @@ def get_hr_salary(cadres):
     # design comparison groups of scenarios to examine marginal/combined productivity of cadres
     # To design more scenarios so that Pharmacy cadre can be expanded more than the 16 scenarios?
     # As it is analysis of 10 year results, it would be better to consider increasing annual/minute salary?
+    # To plot time series of staff and budget in the target period to show \
+    # how many staff and how much budget to increase yearly?
     # Get and plot services by short treatment id?
+    # Later, to explain the cause of differences in scenarios, might consider hcw time flow?
 
 
 if __name__ == "__main__":

From ef9e2c85b8599aea060225e259bf33ad7744c433 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 29 Aug 2024 10:09:23 +0100
Subject: [PATCH 086/218] update todo

---
 ...ysis_hr_expandsion_by_officer_type_with_extra_budget.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 50e1ebfa67..7a77ffad79 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -672,15 +672,16 @@ def get_hr_salary(cadres):
     plt.close(fig)
 
     # todo
-    # Plot comparison results: there are negative changes of some appts and causes, try increase runs and see
+    # Plot comparison results: there are negative changes of some appts and causes, try increase runs and see.
     # As we have 16 scenarios in total, \
-    # design comparison groups of scenarios to examine marginal/combined productivity of cadres
+    # design comparison groups of scenarios to examine marginal/combined productivity of cadres.
     # To design more scenarios so that Pharmacy cadre can be expanded more than the 16 scenarios?
     # As it is analysis of 10 year results, it would be better to consider increasing annual/minute salary?
     # To plot time series of staff and budget in the target period to show \
-    # how many staff and how much budget to increase yearly?
+    # how many staff and how much budget to increase yearly (choose the best scenario to illustrate)?
     # Get and plot services by short treatment id?
     # Later, to explain the cause of differences in scenarios, might consider hcw time flow?
+    # Before submit a run, merge in the remote master.
 
 
 if __name__ == "__main__":

From 2d6143365594528d0479d968705753dd19eef658 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 29 Aug 2024 16:35:17 +0100
Subject: [PATCH 087/218] update todo

---
 src/tlo/methods/healthsystem.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index ef6e43c702..2818a554a7 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -3008,6 +3008,8 @@ def apply(self, population):
         total_cost_pharmacy = daily_cost.loc[daily_cost.Officer_Type_Code == 'Pharmacy', 'Total_Cost_Per_Day'].sum()
 
         # get daily extra budget for this year = 4.2% * total cost
+        # todo: could make the default growth rate 0.042 as an explicit variable
+        #  so that we can also analyse impacts of different budget growth rates
         daily_extra_budget = 0.042*(total_cost_clinical + total_cost_dcsa + total_cost_nursing + total_cost_pharmacy)
 
         # get proportional daily extra budget for each of the four cadres

From 54db97f617486fe786db12255daf4c0acfea4b90 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 29 Aug 2024 16:41:07 +0100
Subject: [PATCH 088/218] update todo

---
 ...analysis_hr_expandsion_by_officer_type_with_extra_budget.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 7a77ffad79..9d1cf8b20a 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -673,9 +673,10 @@ def get_hr_salary(cadres):
 
     # todo
     # Plot comparison results: there are negative changes of some appts and causes, try increase runs and see.
+    # To design more scenarios so that Pharmacy cadre can be expanded more than the 16 scenarios
+    # and so that each cadre has different scale up factor (the one in more shortage will need to be scaled up more)?
     # As we have 16 scenarios in total, \
     # design comparison groups of scenarios to examine marginal/combined productivity of cadres.
-    # To design more scenarios so that Pharmacy cadre can be expanded more than the 16 scenarios?
     # As it is analysis of 10 year results, it would be better to consider increasing annual/minute salary?
     # To plot time series of staff and budget in the target period to show \
     # how many staff and how much budget to increase yearly (choose the best scenario to illustrate)?

From e2a9febc3fe47548a8b4252f694847ad577db5e5 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 3 Sep 2024 15:23:01 +0100
Subject: [PATCH 089/218] turn up scale to effective capabilities

---
 ...f_expanding_current_hcw_by_officer_type_with_extra_budget.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 0fae1468c2..4952edabeb 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -81,7 +81,7 @@ def _baseline(self) -> Dict:
             {'HealthSystem': {
                 'mode_appt_constraints': 1,
                 'mode_appt_constraints_postSwitch': 2,
-                # "scale_to_effective_capabilities": True,  # todo: TBC; will this change the capabilities of 2019?
+                "scale_to_effective_capabilities": True,  # Will this change the capabilities of 2019?
                 "year_mode_switch": self.YEAR_OF_CHANGE,
                 'cons_availability': 'default',
                 'cons_availability_postSwitch': 'all',  # todo: how to argue for this setting?

From b9e310f1a0591f70585d0c59b696dae5361bc983 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 4 Sep 2024 17:33:36 +0100
Subject: [PATCH 090/218] update the preparation file to also expand all other
 cadres

---
 ...inute_salary_and_extra_budget_frac_data.py | 125 +++++++++++-------
 1 file changed, 76 insertions(+), 49 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index 42616d5965..3b3ae32cd6 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -38,56 +38,83 @@
 
 Minute_Salary.to_csv(resourcefilepath / 'costing' / 'Minute_Salary_HR.csv', index=False)
 
-# calculate the current cost distribution of the four cadres
+# calculate the current cost distribution of all cadres
+cadre_all = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+             'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography']
 staff_count = hr_current.groupby('Officer_Category')['Staff_Count'].sum().reset_index()
 staff_cost = staff_count.merge(hr_salary, on=['Officer_Category'], how='outer')
 staff_cost['annual_cost'] = staff_cost['Staff_Count'] * staff_cost['Annual_Salary_USD']
-four_cadres_cost = staff_cost.loc[
-    staff_cost.Officer_Category.isin(['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy'])].reset_index(drop=True)
-four_cadres_cost['cost_frac'] = (four_cadres_cost['annual_cost'] / four_cadres_cost['annual_cost'].sum())
-# x = four_cadres_cost.loc[0, 'cost_frac'].as_integer_ratio()
-assert four_cadres_cost.cost_frac.sum() == 1
-
-# Calculate the current cost distribution of one/two/three/four cadres and define them as scenarios
-# We confirmed/can prove that in such expansion scenarios of two/three/four cadres,
-# the annual scale up factors are actually equal for cadres,
-# equal to 1 + annual extra cost / total current cost of two/three/four cadres.
-# todo: One possible issue is that Pharmacy cost has only small fractions in all multi-cadre scenarios,
-# as its current fraction is small; we have estimated that Pharmacy cadre is extremely in shortage,
-# thus these scenarios might still face huge shortages (However, we can not estimate hcw shortage in mode 2?).
-cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy']
-combination_list = ['']
-for n in range(1, len(cadres)+1):
-    for subset in itertools.combinations(cadres, n):
-        combination_list.append(str(subset))
-
-cadre_to_expand = pd.DataFrame(index=cadres, columns=combination_list).fillna(0)
-cadre_to_expand.loc[:, ''] = 0  # no_expansion scenario
-for c in cadres:
-    for i in cadre_to_expand.columns:
+staff_cost['cost_frac'] = (staff_cost['annual_cost'] / staff_cost['annual_cost'].sum())
+assert staff_cost.cost_frac.sum() == 1
+staff_cost.set_index('Officer_Category', inplace=True)
+staff_cost = staff_cost.reindex(index=cadre_all)
+
+# No expansion scenario, or zero-extra-budget-fraction scenario, "s_1"
+# Define the current cost fractions among all cadres as extra-budget-fraction scenario "s_2" \
+# to be matched with Margherita's 4.2% scenario.
+# Define all other scenarios so that the extra budget fraction of each cadre, \
+# i.e., four main cadres and the "Other" cadre that groups up all other cadres, is the same (fair allocation)
+
+cadre_group = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']  # main cadres
+
+# create scenarios
+combination_list = ['s_1', 's_2']  # two baseline scenarios
+for n in range(1, len(cadre_group)+1):
+    for subset in itertools.combinations(cadre_group, n):
+        combination_list.append(str(subset))  # other equal-fraction scenarios
+
+# cadre groups to expand
+cadre_to_expand = pd.DataFrame(index=cadre_group, columns=combination_list).fillna(0.0)
+for c in cadre_group:
+    for i in cadre_to_expand.columns[2:]:
         if c in i:
-            cadre_to_expand.loc[c, i] = staff_cost.loc[staff_cost.Officer_Category == c, 'annual_cost'].values[0]
+            cadre_to_expand.loc[c, i] = 1  # value 1 indicate the cadre group will be expanded
 
-extra_budget_fracs = pd.DataFrame(index=cadre_to_expand.index, columns=cadre_to_expand.columns).fillna(0)
-for i in extra_budget_fracs.columns[1:]:
-    extra_budget_fracs.loc[:, i] = cadre_to_expand.loc[:, i] / cadre_to_expand.loc[:, i].sum()
+# prepare auxiliary dataframe for equal extra budget fractions scenarios
+auxiliary = cadre_to_expand.copy()
+for i in auxiliary.columns[2:]:
+    auxiliary.loc[:, i] = auxiliary.loc[:, i] / auxiliary.loc[:, i].sum()
+
+
+# define extra budget fracs for each cadre
+extra_budget_fracs = pd.DataFrame(index=cadre_all, columns=combination_list)
+assert (extra_budget_fracs.columns == auxiliary.columns).all()
+assert (extra_budget_fracs.index[0:4] == auxiliary.index[0:4]).all()
+
+extra_budget_fracs.loc[:, 's_1'] = 0
+assert (staff_cost.index == extra_budget_fracs.index).all()
+extra_budget_fracs.loc[:, 's_2'] = staff_cost.loc[:, 'cost_frac'].values
+
+for i in extra_budget_fracs.columns[2:]:
+    for c in extra_budget_fracs.index:
+        if c in auxiliary.index:  # the four main cadres
+            extra_budget_fracs.loc[c, i] = auxiliary.loc[c, i]
+        else:  # the other 5 cadres
+            extra_budget_fracs.loc[c, i] = auxiliary.loc['Other', i] / 5  # equal fracs among the 5 other cadres; could
+            # set non-equal fracs
 
 assert (abs(extra_budget_fracs.iloc[:, 1:len(extra_budget_fracs.columns)].sum(axis=0) - 1.0) < 1/1e10).all()
 
-simple_scenario_name = {}
-for i in range(len(extra_budget_fracs.columns)):
-    simple_scenario_name[extra_budget_fracs.columns[i]] = 's_' + str(i+1)  # name scenario from s_1
+# rename scenarios
+# make the scenario of equal fracs for all five cadre groups (i.e., the last column) to be s_3
+simple_scenario_name = {extra_budget_fracs.columns[-1]: 's_3'}
+for i in range(2, len(extra_budget_fracs.columns)-1):
+    simple_scenario_name[extra_budget_fracs.columns[i]] = 's_' + str(i+2)  # name scenario from s_4
 extra_budget_fracs.rename(columns=simple_scenario_name, inplace=True)
 
+# reorder columns
+col_order = ['s_' + str(i) for i in range(1, len(extra_budget_fracs.columns)+1)]
+assert len(col_order) == len(extra_budget_fracs.columns)
+extra_budget_fracs = extra_budget_fracs.reindex(columns=col_order)
+
 
 # calculate hr scale up factor for years 2020-2030 (10 years in total) outside the healthsystem module
 
 def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFrame:
-    """This function calculates the yearly hr scale up factor for Clinical, DCSA, Nursing_and_Midwifery,
-    and Pharmacy cadres for a year yr, given a fraction of an extra budget allocated to each cadre and
-    a yearly budget growth rate of 4.2%.
-    Parameter extra_budget_frac (list) is a list of four floats, representing the fractions.
-    Parameter yr (int) is a year between 2020 and 2030.
+    """This function calculates the yearly hr scale up factor for cadres for a year yr,
+    given a fraction of an extra budget allocated to each cadre and a yearly budget growth rate of 4.2%.
+    Parameter extra_budget_frac (list) is a list of 9 floats, representing the fractions.
+    Parameter yr (int) is a year between 2019 and 2030.
     Parameter scenario (string) is a column name in the extra budget fractions resource file.
     Output dataframe stores scale up factors and relevant for the year yr.
     """
@@ -102,7 +129,7 @@ def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFram
     prev_data['scale_up_factor'] = (prev_data.Staff_Count + prev_data.extra_staff) / prev_data.Staff_Count
 
     # store the updated data for the year yr
-    new_data = prev_data[['Officer_Category', 'Annual_Salary_USD', 'scale_up_factor']].copy()
+    new_data = prev_data[['Annual_Salary_USD', 'scale_up_factor']].copy()
     new_data['Staff_Count'] = prev_data.Staff_Count + prev_data.extra_staff
     new_data['annual_cost'] = prev_data.annual_cost + prev_data.extra_budget
 
@@ -110,32 +137,32 @@ def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFram
 
 
 # calculate scale up factors for all defined scenarios and years
-four_cadres_cost['scale_up_factor'] = 1
-scale_up_factor_dict = {s: {y: {} for y in range(2019, 2030)} for s in extra_budget_fracs.columns}
+staff_cost['scale_up_factor'] = 1
+scale_up_factor_dict = {s: {y: {} for y in range(2018, 2030)} for s in extra_budget_fracs.columns}
 for s in extra_budget_fracs.columns:
-    # for the initial/current year of 2019
-    scale_up_factor_dict[s][2019] = four_cadres_cost.drop(columns='cost_frac').copy()
+    # for the initial/current year of 2018
+    scale_up_factor_dict[s][2018] = staff_cost.drop(columns='cost_frac').copy()
     # for the years with scaled up hr
-    for y in range(2020, 2030):
+    for y in range(2019, 2030):
         scale_up_factor_dict[s][y] = calculate_hr_scale_up_factor(list(extra_budget_fracs[s]), y, s)
 
 # get the total cost and staff count for each year between 2020-2030 and each scenario
-total_cost = pd.DataFrame(index=range(2020, 2030), columns=extra_budget_fracs.columns)
-total_staff = pd.DataFrame(index=range(2020, 2030), columns=extra_budget_fracs.columns)
+total_cost = pd.DataFrame(index=range(2018, 2030), columns=extra_budget_fracs.columns)
+total_staff = pd.DataFrame(index=range(2018, 2030), columns=extra_budget_fracs.columns)
 for y in total_cost.index:
     for s in extra_budget_fracs.columns:
         total_cost.loc[y, s] = scale_up_factor_dict[s][y].annual_cost.sum()
         total_staff.loc[y, s] = scale_up_factor_dict[s][y].Staff_Count.sum()
 
-# check the total cost after 10 years are increased as expected
+# check the total cost after 11 years are increased as expected
 assert (
-    abs(total_cost.loc[2029, total_cost.columns[1:]] - (1 + 0.042) ** 10 * total_cost.loc[2029, 's_1']) < 1/1e6
+    abs(total_cost.loc[2029, total_cost.columns[1:]] - (1 + 0.042) ** 11 * total_cost.loc[2029, 's_1']) < 1/1e7
 ).all()
 
-# get the integrated scale up factors for year 2029 and each scenario
-integrated_scale_up_factor = pd.DataFrame(index=cadres, columns=total_cost.columns).fillna(1.0)
+# get the integrated scale up factors by the end of year 2029 and each scenario
+integrated_scale_up_factor = pd.DataFrame(index=cadre_all, columns=total_cost.columns).fillna(1.0)
 for s in total_cost.columns[1:]:
-    for yr in range(2020, 2030):
+    for yr in range(2019, 2030):
         integrated_scale_up_factor.loc[:, s] = np.multiply(
             integrated_scale_up_factor.loc[:, s].values,
             scale_up_factor_dict[s][yr].loc[:, 'scale_up_factor'].values

From b366357888e6e6ed499b42b80e1978866af80e1c Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 5 Sep 2024 12:07:42 +0100
Subject: [PATCH 091/218] update the function to expand officer type

---
 src/tlo/methods/healthsystem.py | 94 +++++++++++++++------------------
 1 file changed, 42 insertions(+), 52 deletions(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 2818a554a7..655fac0798 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -308,14 +308,17 @@ class HealthSystem(Module):
         ),
 
         'HR_expansion_by_officer_type': Parameter(
-            Types.LIST, "This list comprises of four floats, which specifies the proportions of extra budget "
-                        "allocated to four cadres - Clinical, DCSA, Nursing_and_Midwifery and Pharmacy - in order, "
-                        "every year from start_year_HR_expansion_by_officer_type and onwards. "
-                        "The extra budget for this year is 4.2% of the total salary of these cadres in last year, "
-                        "assuming the annual GDP growth rate is 4.2% and the proportion of GDP expenditure on "
-                        "expanding these cadres is fixed. Given the allocated extra budget and annual salary, "
-                        "we calculate the extra staff and minutes for these cadres of this year. The expansion is done "
-                        "on 1 Jan of every year from start_year_HR_expansion_by_officer_type."
+            Types.SERIES, "This series is indexed by nine officer types, each with a float value that "
+                          "specifies the proportion of extra budget allocated to that officer type."
+                          "The extra budget for this year is (100 * HR_budget_growth_rate) of the total salary "
+                          "of these officers in last year. Given the allocated extra budget and annual salary, "
+                          "we calculate the extra minutes for these staff of this year. The expansion is done "
+                          "on 1 Jan of every year from start_year_HR_expansion_by_officer_type."
+        ),
+        "HR_budget_growth_rate": Parameter(
+            Types.REAL, "This number is the annual growth rate of HR budget. "
+                         "The default value is 0.042 (4.2%), assuming the annual GDP growth rate is 4.2% and "
+                         "the proportion of GDP expenditure on paying salaries of these staff is fixed "
         ),
 
         'start_year_HR_expansion_by_officer_type': Parameter(
@@ -659,8 +662,14 @@ def read_parameters(self, data_folder):
 
         # Set default values for HR_expansion_by_officer_type, start_year_HR_expansion_by_officer_type,
         # end_year_HR_expansion_by_officer_type
-        self.parameters['HR_expansion_by_officer_type'] = [0, 0, 0, 0]
-        self.parameters['start_year_HR_expansion_by_officer_type'] = 2020
+        self.parameters['HR_expansion_by_officer_type'] = pd.Series(
+            index=['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+                   'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography'],
+            data=[0, 0, 0, 0,
+                  0, 0, 0, 0, 0]
+        )
+        self.parameters['HR_budget_growth_rate'] = 0.042
+        self.parameters['start_year_HR_expansion_by_officer_type'] = 2019
         self.parameters['end_year_HR_expansion_by_officer_type'] = 2030
 
     def pre_initialise_population(self):
@@ -2980,7 +2989,7 @@ def __init__(self, module):
 
     def apply(self, population):
 
-        # get minute salary for the four cadres
+        # get minute salary
         minute_salary_by_officer_facility_id = self.module.parameters['minute_salary']
 
         # get current daily minutes and format it to be consistent with minute salary
@@ -2990,37 +2999,29 @@ def apply(self, population):
             pat='_', n=3, expand=True)[[1, 3]]
         daily_minutes['Facility_ID'] = daily_minutes['Facility_ID'].astype(int)
 
-        # get daily cost per officer per facility id
+        # get daily cost per officer type per facility id
         daily_cost = minute_salary_by_officer_facility_id.merge(
             daily_minutes, on=['Facility_ID', 'Officer_Type_Code'], how='outer')
         daily_cost['Total_Cost_Per_Day'] = daily_cost['Minute_Salary_USD'] * daily_cost['Total_Minutes_Per_Day']
 
-        # get daily cost per officer type of the four cadres
-        daily_cost = daily_cost.groupby('Officer_Type_Code').agg({'Total_Cost_Per_Day': 'sum'}).reset_index()
-        daily_cost = daily_cost.loc[daily_cost.Officer_Type_Code.isin(
-            ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy'])]
-
-        # get total daily cost of each of the four cadres
-        total_cost_clinical = daily_cost.loc[daily_cost.Officer_Type_Code == 'Clinical', 'Total_Cost_Per_Day'].sum()
-        total_cost_dcsa = daily_cost.loc[daily_cost.Officer_Type_Code == 'DCSA', 'Total_Cost_Per_Day'].sum()
-        total_cost_nursing = daily_cost.loc[
-            daily_cost.Officer_Type_Code == 'Nursing_and_Midwifery', 'Total_Cost_Per_Day'].sum()
-        total_cost_pharmacy = daily_cost.loc[daily_cost.Officer_Type_Code == 'Pharmacy', 'Total_Cost_Per_Day'].sum()
-
-        # get daily extra budget for this year = 4.2% * total cost
-        # todo: could make the default growth rate 0.042 as an explicit variable
-        #  so that we can also analyse impacts of different budget growth rates
-        daily_extra_budget = 0.042*(total_cost_clinical + total_cost_dcsa + total_cost_nursing + total_cost_pharmacy)
-
-        # get proportional daily extra budget for each of the four cadres
-        daily_extra_budget_by_officer = [
-            daily_extra_budget * i for i in self.module.parameters['HR_expansion_by_officer_type']]
-
-        # get the scale up factor for each cadre, assumed to be the same for each facility id of that cadre
-        sf_clinical = (total_cost_clinical + daily_extra_budget_by_officer[0])/total_cost_clinical
-        sf_dcsa = (total_cost_dcsa + daily_extra_budget_by_officer[1]) / total_cost_dcsa
-        sf_nursing = (total_cost_nursing + daily_extra_budget_by_officer[2]) / total_cost_nursing
-        sf_pharmacy = (total_cost_pharmacy + daily_extra_budget_by_officer[3]) / total_cost_pharmacy
+        # get daily cost per officer type
+        daily_cost = daily_cost.groupby('Officer_Type_Code').agg({'Total_Cost_Per_Day': 'sum'})
+
+        # get daily extra budget for this year
+        daily_extra_budget = (self.module.parameters['HR_budget_growth_rate']
+                              * daily_cost.Total_Cost_Per_Day.sum())
+
+        # get proportional daily extra budget for each officer type
+        extra_budget_fraction = self.module.parameters['HR_expansion_by_officer_type']
+        daily_cost = daily_cost.reindex(index=extra_budget_fraction.index)
+        daily_cost['extra_budget_per_day'] = daily_extra_budget * extra_budget_fraction
+
+        # get the scale up factor for each officer type, assumed to be the same for each facility id of that
+        # officer type (note "cost = available minutes * minute salary", thus we could directly calculate
+        # scale up factor using cost)
+        daily_cost['scale_up_factor'] = (
+            (daily_cost.extra_budget_per_day + daily_cost.Total_Cost_Per_Day) / daily_cost.Total_Cost_Per_Day
+        )
 
         # scale up the daily minutes per cadre per facility id
         pattern = r"FacilityID_(\w+)_Officer_(\w+)"
@@ -3028,24 +3029,13 @@ def apply(self, population):
             matches = re.match(pattern, officer)
             # Extract officer type
             officer_type = matches.group(2)
-            if officer_type == 'Clinical':
-                self.module._daily_capabilities[officer] *= sf_clinical
-            elif officer_type == 'DCSA':
-                self.module._daily_capabilities[officer] *= sf_dcsa
-            elif officer_type == 'Nursing_and_Midwifery':
-                self.module._daily_capabilities[officer] *= sf_nursing
-            elif officer_type == 'Pharmacy':
-                self.module._daily_capabilities[officer] *= sf_pharmacy
-            else:
-                self.module._daily_capabilities[officer] *= 1
+            self.module._daily_capabilities[officer] *= daily_cost.loc[officer_type, 'scale_up_factor']
 
         # save the scale up factor into logger
         logger_summary.info(key='HRScaling',
-                            description='The HR scale up factor by office type - '
-                                        'Clinical, DCSA, Nursing_and_Midwifery, Pharmacy - '
-                                        'given fractions of an extra budget',
+                            description='The HR scale up factor by office type given fractions of an extra budget',
                             data={
-                                'scale_up_factor': [sf_clinical, sf_dcsa, sf_nursing, sf_pharmacy],
+                                'scale_up_factor': daily_cost.scale_up_factor,
                                 'year_of_scale_up': self.sim.date.year,
                             }
                             )

From d7cced904ad8b96dba6ebf93498a808c9705488e Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 5 Sep 2024 12:56:27 +0100
Subject: [PATCH 092/218] update the logger

---
 src/tlo/methods/healthsystem.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 655fac0798..a3817e9b50 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -3035,7 +3035,7 @@ def apply(self, population):
         logger_summary.info(key='HRScaling',
                             description='The HR scale up factor by office type given fractions of an extra budget',
                             data={
-                                'scale_up_factor': daily_cost.scale_up_factor,
+                                'scale_up_factor': daily_cost.scale_up_factor.to_dict(),
                                 'year_of_scale_up': self.sim.date.year,
                             }
                             )

From 1f003b09190481bbbaf9900288500680840efb50 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 5 Sep 2024 13:26:15 +0100
Subject: [PATCH 093/218] update test

---
 tests/test_healthsystem.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py
index a3f8418f76..79f82fa99b 100644
--- a/tests/test_healthsystem.py
+++ b/tests/test_healthsystem.py
@@ -2615,10 +2615,19 @@ def get_capabilities_after_update(end_year, HR_expansion_by_officer_type) -> pd.
         return caps
 
     initial_caps = get_initial_capabilities()
-    caps_clinical_no_update = get_capabilities_after_update(2012, [0, 0, 0, 0])
-    caps_clinical_one_update = get_capabilities_after_update(2012, [1, 0, 0, 0])
-    caps_clinical_dcsa_one_update = get_capabilities_after_update(2012, [0.5, 0.5, 0, 0])
-    caps_clinical_two_updates = get_capabilities_after_update(2013, [1, 0, 0, 0])
+    test_fracs = pd.DataFrame(
+            index=['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+                   'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography'],
+            data={'no_update': [0, 0, 0, 0, 0, 0, 0, 0, 0],
+                  'clinical_one_update': [1, 0, 0, 0, 0, 0, 0, 0, 0],
+                  'clinical_dcsa_one_update': [0.5, 0.5, 0, 0, 0, 0, 0, 0, 0],
+                  'clinical_two_updates': [1, 0, 0, 0, 0, 0, 0, 0, 0]}
+        )
+    caps_clinical_no_update = get_capabilities_after_update(2012, test_fracs.no_update)
+    caps_clinical_one_update = get_capabilities_after_update(2012, test_fracs.clinical_one_update)
+    caps_clinical_dcsa_one_update = get_capabilities_after_update(2012,
+                                                                  test_fracs.clinical_dcsa_one_update)
+    caps_clinical_two_updates = get_capabilities_after_update(2013, test_fracs.clinical_two_updates)
 
     # check that the cadres are expanded as expected
     def compare(cadre, caps_1, caps_2) -> tuple:

From c8569685d7ae9244e608a5200980241a1a497369 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 5 Sep 2024 13:29:26 +0100
Subject: [PATCH 094/218] update scenario run settings

---
 ...rrent_hcw_by_officer_type_with_extra_budget.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 4952edabeb..7cf754936b 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -32,7 +32,7 @@ def __init__(self):
         self.seed = 0
         self.start_date = Date(2010, 1, 1)
         self.end_date = Date(2030, 1, 1)
-        self.pop_size = 20_000  # todo: TBC
+        self.pop_size = 100_000  # todo: TBC
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
         self.runs_per_draw = 10  # todo: TBC
@@ -60,7 +60,7 @@ def draw_parameters(self, draw_number, rng):
     def _get_scenarios(self) -> Dict[str, Dict]:
         """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario."""
 
-        self.YEAR_OF_CHANGE = 2020  # This is the year to change run settings and to start hr expansion.
+        self.YEAR_OF_CHANGE = 2019  # This is the year to change run settings and to start hr expansion.
 
         self.scenarios = extra_budget_fracs
 
@@ -69,10 +69,10 @@ def _get_scenarios(self) -> Dict[str, Dict]:
                 mix_scenarios(
                     self._baseline(),
                     {'HealthSystem': {
-                        'HR_expansion_by_officer_type': list(self.scenarios.iloc[:, i])
+                        'HR_expansion_by_officer_type': self.scenarios.iloc[:, i]
                     }
                     }
-                ) for i in range(len(self.scenarios.columns) - 14)
+                ) for i in range(len(self.scenarios.columns))
         }
 
     def _baseline(self) -> Dict:
@@ -81,7 +81,7 @@ def _baseline(self) -> Dict:
             {'HealthSystem': {
                 'mode_appt_constraints': 1,
                 'mode_appt_constraints_postSwitch': 2,
-                "scale_to_effective_capabilities": True,  # Will this change the capabilities of 2019?
+                "scale_to_effective_capabilities": True,  # todo: what if set it False?
                 "year_mode_switch": self.YEAR_OF_CHANGE,
                 'cons_availability': 'default',
                 'cons_availability_postSwitch': 'all',  # todo: how to argue for this setting?
@@ -89,7 +89,10 @@ def _baseline(self) -> Dict:
                 'yearly_HR_scaling_mode': 'no_scaling',
                 'start_year_HR_expansion_by_officer_type': self.YEAR_OF_CHANGE,
                 'end_year_HR_expansion_by_officer_type': self.end_date.year,
-            }  # as to expand current hr and analyse the impact, we keep using 'actual' hr capabilities
+                "policy_name": "Naive",
+                "tclose_overwrite": 1,
+                "tclose_days_offset_overwrite": 7,
+            }
             },
         )
 

From 2899b027ce4c315a8fb52368c10bd39f482e9610 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 5 Sep 2024 15:28:47 +0100
Subject: [PATCH 095/218] change data type to fix json error

---
 src/tlo/methods/healthsystem.py | 28 +++++++++++++---------------
 tests/test_healthsystem.py      |  2 +-
 2 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index a3817e9b50..0097d61d32 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -308,17 +308,17 @@ class HealthSystem(Module):
         ),
 
         'HR_expansion_by_officer_type': Parameter(
-            Types.SERIES, "This series is indexed by nine officer types, each with a float value that "
-                          "specifies the proportion of extra budget allocated to that officer type."
-                          "The extra budget for this year is (100 * HR_budget_growth_rate) of the total salary "
-                          "of these officers in last year. Given the allocated extra budget and annual salary, "
-                          "we calculate the extra minutes for these staff of this year. The expansion is done "
-                          "on 1 Jan of every year from start_year_HR_expansion_by_officer_type."
+            Types.DICT, "This DICT has keys of nine officer types, each with a float value that "
+                        "specifies the proportion of extra budget allocated to that officer type."
+                        "The extra budget for this year is (100 * HR_budget_growth_rate) of the total salary "
+                        "of these officers in last year. Given the allocated extra budget and annual salary, "
+                        "we calculate the extra minutes for these staff of this year. The expansion is done "
+                        "on 1 Jan of every year from start_year_HR_expansion_by_officer_type."
         ),
         "HR_budget_growth_rate": Parameter(
             Types.REAL, "This number is the annual growth rate of HR budget. "
-                         "The default value is 0.042 (4.2%), assuming the annual GDP growth rate is 4.2% and "
-                         "the proportion of GDP expenditure on paying salaries of these staff is fixed "
+                        "The default value is 0.042 (4.2%), assuming the annual GDP growth rate is 4.2% and "
+                        "the proportion of GDP expenditure on paying salaries of these staff is fixed "
         ),
 
         'start_year_HR_expansion_by_officer_type': Parameter(
@@ -662,12 +662,10 @@ def read_parameters(self, data_folder):
 
         # Set default values for HR_expansion_by_officer_type, start_year_HR_expansion_by_officer_type,
         # end_year_HR_expansion_by_officer_type
-        self.parameters['HR_expansion_by_officer_type'] = pd.Series(
-            index=['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
-                   'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography'],
-            data=[0, 0, 0, 0,
-                  0, 0, 0, 0, 0]
-        )
+        self.parameters['HR_expansion_by_officer_type'] = {
+            'Clinical': 0, 'DCSA': 0, 'Nursing_and_Midwifery': 0, 'Pharmacy': 0,
+            'Dental': 0, 'Laboratory': 0, 'Mental': 0, 'Nutrition': 0, 'Radiography': 0
+        }
         self.parameters['HR_budget_growth_rate'] = 0.042
         self.parameters['start_year_HR_expansion_by_officer_type'] = 2019
         self.parameters['end_year_HR_expansion_by_officer_type'] = 2030
@@ -3012,7 +3010,7 @@ def apply(self, population):
                               * daily_cost.Total_Cost_Per_Day.sum())
 
         # get proportional daily extra budget for each officer type
-        extra_budget_fraction = self.module.parameters['HR_expansion_by_officer_type']
+        extra_budget_fraction = pd.Series(self.module.parameters['HR_expansion_by_officer_type'])
         daily_cost = daily_cost.reindex(index=extra_budget_fraction.index)
         daily_cost['extra_budget_per_day'] = daily_extra_budget * extra_budget_fraction
 
diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py
index 79f82fa99b..13999b097a 100644
--- a/tests/test_healthsystem.py
+++ b/tests/test_healthsystem.py
@@ -2602,7 +2602,7 @@ def get_capabilities_after_update(end_year, HR_expansion_by_officer_type) -> pd.
         params = sim.modules['HealthSystem'].parameters
         params['start_year_HR_expansion_by_officer_type'] = 2011  # first update happens on 1 Jan 2011
         params['end_year_HR_expansion_by_officer_type'] = end_year  # last update happens on 1 Jan (end_year - 1)
-        params['HR_expansion_by_officer_type'] = HR_expansion_by_officer_type
+        params['HR_expansion_by_officer_type'] = HR_expansion_by_officer_type.to_dict()
 
         popsize = 100
         sim.make_initial_population(n=popsize)

From d735bd3e04b82ce1952ccd8d614114c4565dd266 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 5 Sep 2024 16:05:10 +0100
Subject: [PATCH 096/218] update logger

---
 src/tlo/methods/healthsystem.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 0097d61d32..4475920610 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -3029,12 +3029,15 @@ def apply(self, population):
             officer_type = matches.group(2)
             self.module._daily_capabilities[officer] *= daily_cost.loc[officer_type, 'scale_up_factor']
 
-        # save the scale up factor into logger
+        # save the scale up factor, updated cost and updated capabilities into logger
+        total_cost_this_year = 365.25 * (daily_cost.Total_Cost_Per_Day.sum() + daily_extra_budget)
         logger_summary.info(key='HRScaling',
                             description='The HR scale up factor by office type given fractions of an extra budget',
                             data={
                                 'scale_up_factor': daily_cost.scale_up_factor.to_dict(),
                                 'year_of_scale_up': self.sim.date.year,
+                                'total_hr_salary': total_cost_this_year,
+                                'daily_capabilities': self.module._daily_capabilities.to_dict()
                             }
                             )
 

From c5dcdc2433bc8d909265a4a09bb9d429d0623d9e Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 5 Sep 2024 21:56:35 +0100
Subject: [PATCH 097/218] scale down minute salary when scale up to effective
 capabilities

---
 src/tlo/methods/healthsystem.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 4475920610..cb5b5847cc 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -1065,6 +1065,11 @@ def _rescale_capabilities_to_capture_effective_capability(self):
         # Note: Currently relying on module variable rather than parameter for
         # scale_to_effective_capabilities, in order to facilitate testing. However
         # this may eventually come into conflict with the Switcher functions.
+
+        # In addition, for Class HRExpansionByOfficerType,
+        # for the purpose of keep cost not scaled, need to scale down minute salary when capabilities are scaled up
+
+        minute_salary = self.parameters['minute_salary']
         pattern = r"FacilityID_(\w+)_Officer_(\w+)"
         for officer in self._daily_capabilities.keys():
             matches = re.match(pattern, officer)
@@ -1079,6 +1084,9 @@ def _rescale_capabilities_to_capture_effective_capability(self):
             )
             if rescaling_factor > 1 and rescaling_factor != float("inf"):
                 self._daily_capabilities[officer] *= rescaling_factor
+                minute_salary.loc[(minute_salary.Facility_ID == facility_id)
+                                  & (minute_salary.Officer_Type_Code == officer_type),
+                                  'Minute_Salary_USD'] /= rescaling_factor
 
     def update_consumables_availability_to_represent_merging_of_levels_1b_and_2(self, df_original):
         """To represent that facility levels '1b' and '2' are merged together under the label '2', we replace the

From 460436d23c261ed58c6f3a91c9916b5628aa17d5 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 5 Sep 2024 23:41:44 +0100
Subject: [PATCH 098/218] update logger

---
 src/tlo/methods/healthsystem.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index cb5b5847cc..c85b7c23bb 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -3038,14 +3038,13 @@ def apply(self, population):
             self.module._daily_capabilities[officer] *= daily_cost.loc[officer_type, 'scale_up_factor']
 
         # save the scale up factor, updated cost and updated capabilities into logger
-        total_cost_this_year = 365.25 * (daily_cost.Total_Cost_Per_Day.sum() + daily_extra_budget)
+        total_cost_this_year = 365.25 * (daily_cost.Total_Cost_Per_Day + daily_cost.extra_budget_per_day)
         logger_summary.info(key='HRScaling',
                             description='The HR scale up factor by office type given fractions of an extra budget',
                             data={
                                 'scale_up_factor': daily_cost.scale_up_factor.to_dict(),
                                 'year_of_scale_up': self.sim.date.year,
-                                'total_hr_salary': total_cost_this_year,
-                                'daily_capabilities': self.module._daily_capabilities.to_dict()
+                                'total_hr_salary': total_cost_this_year.to_dict()
                             }
                             )
 

From e34a2769fea03dfe9a2522260a735eae76cb8864 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 6 Sep 2024 00:08:31 +0100
Subject: [PATCH 099/218] update scenario run file

---
 ...panding_current_hcw_by_officer_type_with_extra_budget.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 7cf754936b..aead18cc57 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -35,7 +35,7 @@ def __init__(self):
         self.pop_size = 100_000  # todo: TBC
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 10  # todo: TBC
+        self.runs_per_draw = 5  # todo: TBC
 
     def log_configuration(self):
         return {
@@ -69,10 +69,10 @@ def _get_scenarios(self) -> Dict[str, Dict]:
                 mix_scenarios(
                     self._baseline(),
                     {'HealthSystem': {
-                        'HR_expansion_by_officer_type': self.scenarios.iloc[:, i]
+                        'HR_expansion_by_officer_type': self.scenarios.iloc[:, i].to_dict()
                     }
                     }
-                ) for i in range(len(self.scenarios.columns))
+                ) for i in range(len(self.scenarios.columns) - 25)  # run first 8 scenarios
         }
 
     def _baseline(self) -> Dict:

From 6ced85b6f3e8cff36c94f4a171c3e3b7cf4075a7 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 6 Sep 2024 10:58:35 +0100
Subject: [PATCH 100/218] add comments in test

---
 tests/test_healthsystem.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py
index 13999b097a..353b2259c9 100644
--- a/tests/test_healthsystem.py
+++ b/tests/test_healthsystem.py
@@ -2604,6 +2604,10 @@ def get_capabilities_after_update(end_year, HR_expansion_by_officer_type) -> pd.
         params['end_year_HR_expansion_by_officer_type'] = end_year  # last update happens on 1 Jan (end_year - 1)
         params['HR_expansion_by_officer_type'] = HR_expansion_by_officer_type.to_dict()
 
+        # for testing _rescale_capabilities_to_capture_effective_capability
+        # params['year_mode_switch'] = 2011
+        # params['scale_to_effective_capabilities'] = True
+
         popsize = 100
         sim.make_initial_population(n=popsize)
 

From 10e6f95eb2e1e288c91280777cf65415a5e768a7 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 6 Sep 2024 22:28:48 +0100
Subject: [PATCH 101/218] submit a run with 3 draws

---
 ...f_expanding_current_hcw_by_officer_type_with_extra_budget.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index aead18cc57..787835e07b 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -72,7 +72,7 @@ def _get_scenarios(self) -> Dict[str, Dict]:
                         'HR_expansion_by_officer_type': self.scenarios.iloc[:, i].to_dict()
                     }
                     }
-                ) for i in range(len(self.scenarios.columns) - 25)  # run first 8 scenarios
+                ) for i in range(len(self.scenarios.columns) - 30)  # run first 3 scenarios of 33 scenarios
         }
 
     def _baseline(self) -> Dict:

From d49e4718f5a207e353e427898244a37d8c7b9cce Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Sat, 7 Sep 2024 10:53:55 +0100
Subject: [PATCH 102/218] assert to make sure the input the officer types match
 the defined

---
 src/tlo/methods/healthsystem.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index c85b7c23bb..a2d5ecb716 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -3019,6 +3019,8 @@ def apply(self, population):
 
         # get proportional daily extra budget for each officer type
         extra_budget_fraction = pd.Series(self.module.parameters['HR_expansion_by_officer_type'])
+        assert set(extra_budget_fraction.index) == set(daily_cost.index), \
+            f"Input officer types do not match the defined officer types"
         daily_cost = daily_cost.reindex(index=extra_budget_fraction.index)
         daily_cost['extra_budget_per_day'] = daily_extra_budget * extra_budget_fraction
 

From 96783c6f8602eabb0ec10bb41240111bb84e0711 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Sat, 7 Sep 2024 12:14:51 +0100
Subject: [PATCH 103/218] test minute salary scale down

---
 tests/test_healthsystem.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py
index 353b2259c9..b4e6a4da36 100644
--- a/tests/test_healthsystem.py
+++ b/tests/test_healthsystem.py
@@ -2605,8 +2605,8 @@ def get_capabilities_after_update(end_year, HR_expansion_by_officer_type) -> pd.
         params['HR_expansion_by_officer_type'] = HR_expansion_by_officer_type.to_dict()
 
         # for testing _rescale_capabilities_to_capture_effective_capability
-        # params['year_mode_switch'] = 2011
-        # params['scale_to_effective_capabilities'] = True
+        params['year_mode_switch'] = 2011
+        params['scale_to_effective_capabilities'] = True
 
         popsize = 100
         sim.make_initial_population(n=popsize)

From 8b938ca1f11e05d0e7d997edf69596caf6959759 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 11 Sep 2024 10:16:01 +0100
Subject: [PATCH 104/218] add comment on creating alternative scenarios

---
 .../prepare_minute_salary_and_extra_budget_frac_data.py    | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index 3b3ae32cd6..f171f03345 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -56,6 +56,7 @@
 # i.e., four main cadres and the "Other" cadre that groups up all other cadres, is the same (fair allocation)
 
 cadre_group = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']  # main cadres
+other_group = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography']
 
 # create scenarios
 combination_list = ['s_1', 's_2']  # two baseline scenarios
@@ -90,8 +91,10 @@
         if c in auxiliary.index:  # the four main cadres
             extra_budget_fracs.loc[c, i] = auxiliary.loc[c, i]
         else:  # the other 5 cadres
-            extra_budget_fracs.loc[c, i] = auxiliary.loc['Other', i] / 5  # equal fracs among the 5 other cadres; could
-            # set non-equal fracs
+            # extra_budget_fracs.loc[c, i] = auxiliary.loc['Other', i] * (
+            #     staff_cost.loc[c, 'cost_frac'] / staff_cost.loc[staff_cost.index.isin(other_group), 'cost_frac'].sum()
+            # )  # current cost distribution among the 5 other cadres
+            extra_budget_fracs.loc[c, i] = auxiliary.loc['Other', i] / 5  # equal fracs among the 5 other cadres
 
 assert (abs(extra_budget_fracs.iloc[:, 1:len(extra_budget_fracs.columns)].sum(axis=0) - 1.0) < 1/1e10).all()
 

From 8f12ad59f977ddfa512ffaf3def8b37f02d28781 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 12 Sep 2024 01:52:17 +0100
Subject: [PATCH 105/218] temporary update plots for 3 scenarios

---
 ...dsion_by_officer_type_with_extra_budget.py | 382 +++++++++---------
 1 file changed, 195 insertions(+), 187 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 9d1cf8b20a..fdce8bcda6 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -30,22 +30,8 @@
 # todo: to update once scenarios confirmed
 substitute_labels = {
     's_1': 'no_expansion',
-    's_2': 'CDNP_expansion_current',
-    's_3': 'C_expansion',
-    's_4': 'D_expansion',
-    's_5': 'N_expansion',
-    's_6': 'P_expansion',
-    's_7': 'CD_expansion',
-    's_8': 'CN_expansion',
-    's_9': 'CP_expansion',
-    's_10': 'DN_expansion',
-    's_11': 'DP_expansion',
-    's_12': 'NP_expansion',
-    's_13': 'CDN_expansion',
-    's_14': 'CDP_expansion',
-    's_15': 'CNP_expansion',
-    's_16': 'DNP_expansion',
-    's_17': 'CDNP_expansion_equal'
+    's_2': 'all_expansion_current',
+    's_3': 'all_expansion_equal',
 }
 
 
@@ -159,10 +145,12 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 
         # Define colormap (used only with option `put_labels_in_legend=True`)
         # todo: could re-define colors for each scenario once scenarios are confirmed
-        cmap = plt.get_cmap("tab20")
-        rescale = lambda y: (y - np.min(y)) / (np.max(y) - np.min(y))  # noqa: E731
-        colors = list(map(cmap, rescale(np.array(list(xticks.keys()))))) if put_labels_in_legend and len(xticks) > 1 \
-            else None
+        if put_labels_in_legend and len(xticks) == 3:
+            colors = ['orange', 'blue', 'green']
+        elif put_labels_in_legend and len(xticks) == 2:
+            colors = ['blue', 'green']
+        else:
+            colore = None
 
         fig, ax = plt.subplots(figsize=(10, 5))
         ax.bar(
@@ -205,13 +193,32 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 
     def get_scale_up_factor(_df):
         """
-        Return a series of yearly scale up factors for four cadres - Clinical, DCSA, Nursing_and_Midwifery, Pharmacy,
-        with index of year and value of list of the four scale up factors.
+        Return a series of yearly scale up factors for all cadres,
+        with index of year and value of list of scale up factors.
+        """
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year_of_scale_up', 'scale_up_factor']
+                      ].set_index('year_of_scale_up')
+        _df = _df['scale_up_factor'].apply(pd.Series)
+        assert (_df.columns == cadres).all()
+        _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index}
+        _df_1 = pd.DataFrame(data=_dict).T
+        return pd.Series(
+            _df_1.loc[:, 0], index=_df_1.index
+        )
+
+    def get_total_cost(_df):
+        """
+        Return a series of yearly total cost for all cadres,
+        with index of year and values of list of total cost.
         """
-        # todo: once job re-run, update columns name as the logger recorded: year_of_scale_up, scale_up_factor
-        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['Year of scaling up', 'Scale up factor']]
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year_of_scale_up', 'total_hr_salary']
+                      ].set_index('year_of_scale_up')
+        _df = _df['total_hr_salary'].apply(pd.Series)
+        assert (_df.columns == cadres).all()
+        _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index}
+        _df_1 = pd.DataFrame(data=_dict).T
         return pd.Series(
-            _df['Scale up factor'].values, index=_df['Year of scaling up']
+            _df_1.loc[:, 0], index=_df_1.index
         )
 
     def get_current_hr(cadres):
@@ -244,66 +251,53 @@ def get_hr_salary(cadres):
     # Get parameter/scenario names
     param_names = get_parameter_names_from_scenario_file()
 
-    # Get current (year of 2019) hr counts
-    cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy']
-    curr_hr = get_current_hr(cadres)
-
-    # Get salary
-    salary = get_hr_salary(cadres)
-
-    # Get scale up factors for all scenarios
-    scale_up_factors = extract_results(
+    # Define cadres in order
+    cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+              'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography']
+
+    # # Get current (year of 2019) hr counts
+    # curr_hr = get_current_hr(cadres)
+
+    # # Get salary
+    # salary = get_hr_salary(cadres)
+
+    # # Get scale up factors for all scenarios
+    # scale_up_factors = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthsystem.summary',
+    #     key='HRScaling',
+    #     custom_generate_series=get_scale_up_factor,
+    #     do_scaling=False
+    # ).pipe(set_param_names_as_column_index_level_0).stack(level=0)
+    # # check that the scale up factors are all most the same between each run within each draw
+    # # assert scale_up_factors.eq(scale_up_factors.iloc[:, 0], axis=0).all().all()
+    # # keep scale up factors of only one run within each draw
+    # scale_up_factors = scale_up_factors.iloc[:, 0].unstack().reset_index().melt(id_vars='index')
+    # scale_up_factors[cadres] = scale_up_factors.value.tolist()
+    # scale_up_factors.drop(columns='value', inplace=True)
+
+    # Get total cost for all scenarios
+    total_cost = extract_results(
         results_folder,
         module='tlo.methods.healthsystem.summary',
         key='HRScaling',
-        custom_generate_series=get_scale_up_factor,
+        custom_generate_series=get_total_cost,
         do_scaling=False
     ).pipe(set_param_names_as_column_index_level_0).stack(level=0)
-    # check that the scale up factors are the same between each run within each draw
-    assert scale_up_factors.eq(scale_up_factors.iloc[:, 0], axis=0).all().all()
-    # keep scale up factors of only one run within each draw
-    scale_up_factors = scale_up_factors.iloc[:, 0].unstack().reset_index().melt(id_vars='Year of scaling up')
-    scale_up_factors[cadres] = scale_up_factors.value.tolist()
-    scale_up_factors.drop(columns='value', inplace=True)
-
-    # Get total extra staff counts by officer type and total extra budget within the target period for all scenarios
-    years = range(2020, the_target_period[1].year + 1)
-    integrated_scale_up_factor = pd.DataFrame(index=list(param_names), columns=cadres)
-    for s in integrated_scale_up_factor.index:
-        integrated_scale_up_factor.loc[s] = scale_up_factors.loc[
-            (scale_up_factors['Year of scaling up'].isin(years)) & (scale_up_factors['draw'] == s), cadres
-        ].product()
-
-    total_staff = pd.DataFrame(integrated_scale_up_factor.mul(curr_hr.values, axis=1))
-    total_cost = pd.DataFrame(total_staff.mul(salary.values, axis=1))
-    total_staff['all_four_cadres'] = total_staff.sum(axis=1)
-    total_cost['all_four_cadres'] = total_cost.sum(axis=1)
-
-    extra_cost = pd.DataFrame(total_cost.subtract(total_cost.loc['s_1'], axis=1).drop(index='s_1').all_four_cadres)
-
-    extra_staff_by_cadre = pd.DataFrame(
-        total_staff.subtract(total_staff.loc['s_1'], axis=1).drop(index='s_1').drop(columns='all_four_cadres')
-    )
-    extra_cost_by_cadre = pd.DataFrame(
-        total_cost.subtract(total_cost.loc['s_1'], axis=1).drop(index='s_1').drop(columns='all_four_cadres')
-    )
-
-    # As checked below, the increase percentages per cadre should be equal to each other and to the overall percentage
-    # because we set the extra budget fractions the same as the current cost distribution. Especially, in the scenario
-    # of expanding all four cadres, the yearly percentage increase if 4.2%, which is exactly the budget increasing rate.
-    # staff_increase_percents = pd.DataFrame(
-    #     total_staff.subtract(
-    #         total_staff.loc['s_1'], axis=1
-    #     ).divide(
-    #         total_staff.loc['s_1'], axis=1
-    #     ).multiply(100).drop(index='s_1')
-    # )
-
-    # check total cost calculated is increased as expected - approximate float of a fraction can sacrifice some budget
-    # todo: to run the following checks once the scenarios are confirmed and re-run
-    # for s in param_names[1:]:
-    #     assert abs(total_cost.loc[s, 'all_four_cadres'] -
-    #                (1 + 0.042) ** (len(years)) * total_cost.loc['s_1', 'all_four_cadres']) < 1e6
+    total_cost = total_cost.iloc[:, 0].unstack().reset_index().melt(id_vars='index')
+    total_cost[cadres] = total_cost.value.tolist()
+    total_cost.drop(columns='value', inplace=True)
+    total_cost['all_cadres'] = total_cost[total_cost.columns[2:11]].sum(axis=1)
+    total_cost.rename(columns={'index': 'year'}, inplace=True)
+
+    # check total cost calculated is increased as expected
+    years = range(2019, the_target_period[1].year + 1)
+    for s in param_names[1:]:
+        assert (abs(
+            total_cost.loc[(total_cost.year == 2029) & (total_cost.draw == s), 'all_cadres'].values[0] -
+            (1 + 0.042) ** len(years) * total_cost.loc[(total_cost.year == 2019) & (total_cost.draw == 's_1'),
+                                                       'all_cadres'].values[0]
+        ) < 1e6).all()
 
     # Absolute Number of Deaths and DALYs and Services
     num_deaths = extract_results(
@@ -412,16 +406,16 @@ def get_hr_salary(cadres):
          ) < 1e-6
     ).all()
 
-    # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
-    # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
-    ROI = pd.DataFrame(index=num_deaths_averted.index, columns=num_dalys_averted.columns)
-    CE = pd.DataFrame(index=num_deaths_averted.index, columns=num_dalys_averted.columns)
-    assert (ROI.index == extra_cost.index).all()
-    for i in ROI.index:
-        ROI.loc[i, :] = num_dalys_averted.loc[i, :] / extra_cost.loc[i, 'all_four_cadres']
-        CE.loc[i, 'mean'] = extra_cost.loc[i, 'all_four_cadres'] / num_dalys_averted.loc[i, 'mean']
-        CE.loc[i, 'lower'] = extra_cost.loc[i, 'all_four_cadres'] / num_dalys_averted.loc[i, 'upper']
-        CE.loc[i, 'upper'] = extra_cost.loc[i, 'all_four_cadres'] / num_dalys_averted.loc[i, 'lower']
+    # # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
+    # # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
+    # ROI = pd.DataFrame(index=num_deaths_averted.index, columns=num_dalys_averted.columns)
+    # CE = pd.DataFrame(index=num_deaths_averted.index, columns=num_dalys_averted.columns)
+    # assert (ROI.index == extra_cost.index).all()
+    # for i in ROI.index:
+    #     ROI.loc[i, :] = num_dalys_averted.loc[i, :] / extra_cost.loc[i, 'all_cadres']
+    #     CE.loc[i, 'mean'] = extra_cost.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'mean']
+    #     CE.loc[i, 'lower'] = extra_cost.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'upper']
+    #     CE.loc[i, 'upper'] = extra_cost.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'lower']
 
     # prepare colors for plots
     appt_color = {
@@ -435,7 +429,17 @@ def get_hr_salary(cadres):
         'Clinical': 'blue',
         'DCSA': 'orange',
         'Nursing_and_Midwifery': 'red',
-        'Pharmacy': 'green'
+        'Pharmacy': 'green',
+        'Dental': 'gray',
+        'Laboratory': 'gray',
+        'Mental': 'gray',
+        'Nutrition': 'gray',
+        'Radiography': 'gray',
+    }
+    scenario_color = {
+        's_1': 'orange',
+        's_2': 'blue',
+        's_3': 'green',
     }
 
     # plot absolute numbers for scenarios
@@ -468,7 +472,7 @@ def get_hr_salary(cadres):
     ])/1e6
     fig, ax = plt.subplots()
     num_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
-    ax.errorbar([0, 1], num_services_summarized['mean'].values / 1e6, yerr=yerr_services,
+    ax.errorbar([0, 1, 2], num_services_summarized['mean'].values / 1e6, yerr=yerr_services,
                 fmt=".", color="black", zorder=100)
     ax.set_ylabel('Millions', fontsize='small')
     ax.set(xlabel=None)
@@ -482,37 +486,37 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Number of staff by cadre, {target_period()}'
-    total_staff_to_plot = (total_staff / 1000).drop(columns='all_four_cadres')
-    fig, ax = plt.subplots()
-    total_staff_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    ax.set_ylabel('Thousands', fontsize='small')
-    ax.set(xlabel=None)
-    xtick_labels = [substitute_labels[v] for v in total_staff_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-               fontsize='small', reverse=True)
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
-
-    name_of_plot = f'Total budget in USD dollars by cadre, {target_period()}'
-    total_cost_to_plot = (total_cost / 1e6).drop(columns='all_four_cadres')
-    fig, ax = plt.subplots()
-    total_cost_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    ax.set_ylabel('Millions', fontsize='small')
-    ax.set(xlabel=None)
-    xtick_labels = [substitute_labels[v] for v in total_cost_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-               fontsize='small', reverse=True)
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'Number of staff by cadre, {target_period()}'
+    # total_staff_to_plot = (total_staff / 1000).drop(columns='all_cadres')
+    # fig, ax = plt.subplots()
+    # total_staff_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Thousands', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in total_staff_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+    #
+    # name_of_plot = f'Total budget in USD dollars by cadre, {target_period()}'
+    # total_cost_to_plot = (total_cost / 1e6).drop(columns='all_cadres')
+    # fig, ax = plt.subplots()
+    # total_cost_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in total_cost_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
     name_of_plot = f'DALYs by cause, {target_period()}'
     num_dalys_by_cause_summarized_in_millions = num_dalys_by_cause_summarized / 1e6
@@ -522,7 +526,7 @@ def get_hr_salary(cadres):
     ])/1e6
     fig, ax = plt.subplots()
     num_dalys_by_cause_summarized_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax)
-    ax.errorbar([0, 1], num_dalys_summarized['mean'].values / 1e6, yerr=yerr_dalys,
+    ax.errorbar([0, 1, 2], num_dalys_summarized['mean'].values / 1e6, yerr=yerr_dalys,
                 fmt=".", color="black", zorder=100)
     ax.set_ylabel('Millions', fontsize='small')
     ax.set(xlabel=None)
@@ -546,7 +550,7 @@ def get_hr_salary(cadres):
     plt.close(fig)
 
     # plot relative numbers for scenarios
-    name_of_plot = f'DALYs averted, {target_period()}'
+    name_of_plot = f'DALYs averted against no expansion, {target_period()}'
     fig, ax = do_bar_plot_with_ci(num_dalys_averted / 1e6, xticklabels_horizontal_and_wrapped=True,
                                   put_labels_in_legend=True)
     ax.set_title(name_of_plot)
@@ -556,7 +560,7 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Deaths averted, {target_period()}'
+    name_of_plot = f'Deaths averted against no expansion, {target_period()}'
     fig, ax = do_bar_plot_with_ci(num_deaths_averted / 1e6, xticklabels_horizontal_and_wrapped=True,
                                   put_labels_in_legend=True)
     ax.set_title(name_of_plot)
@@ -566,39 +570,39 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Extra staff by cadre, {target_period()}'
-    extra_staff_by_cadre_to_plot = extra_staff_by_cadre / 1e3
-    fig, ax = plt.subplots()
-    extra_staff_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    ax.set_ylabel('Thousands', fontsize='small')
-    ax.set(xlabel=None)
-    xtick_labels = [substitute_labels[v] for v in extra_staff_by_cadre_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-               fontsize='small')
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
-
-    name_of_plot = f'Extra budget by cadre, {target_period()}'
-    extra_cost_by_cadre_to_plot = extra_cost_by_cadre / 1e6
-    fig, ax = plt.subplots()
-    extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    ax.set_ylabel('Millions', fontsize='small')
-    ax.set(xlabel=None)
-    xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-               fontsize='small')
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
-
-    name_of_plot = f'Services increased by appointment type, {target_period()}'
+    # name_of_plot = f'Extra staff by cadre, {target_period()}'
+    # extra_staff_by_cadre_to_plot = extra_staff_by_cadre / 1e3
+    # fig, ax = plt.subplots()
+    # extra_staff_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Thousands', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in extra_staff_by_cadre_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small')
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+    #
+    # name_of_plot = f'Extra budget by cadre, {target_period()}'
+    # extra_cost_by_cadre_to_plot = extra_cost_by_cadre / 1e6
+    # fig, ax = plt.subplots()
+    # extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small')
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    name_of_plot = f'Services increased by appointment type \n against no expansion, {target_period()}'
     num_appts_increased_in_millions = num_appts_increased / 1e6
     yerr_services = np.array([
         (num_services_increased['mean'].values - num_services_increased['lower']).values,
@@ -606,7 +610,7 @@ def get_hr_salary(cadres):
     ]) / 1e6
     fig, ax = plt.subplots()
     num_appts_increased_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
-    ax.errorbar(0, num_services_increased['mean'].values / 1e6, yerr=yerr_services,
+    ax.errorbar([0, 1], num_services_increased['mean'].values / 1e6, yerr=yerr_services,
                 fmt=".", color="black", zorder=100)
     ax.set_ylabel('Millions', fontsize='small')
     ax.set(xlabel=None)
@@ -616,11 +620,13 @@ def get_hr_salary(cadres):
                fontsize='small')
     plt.title(name_of_plot)
     fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'DALYs averted by cause, {target_period()}'
+    name_of_plot = f'DALYs averted by cause against no expansion, {target_period()}'
     num_dalys_by_cause_averted_in_millions = num_dalys_by_cause_averted / 1e6
     yerr_dalys = np.array([
         (num_dalys_averted['mean'].values - num_dalys_averted['lower']).values,
@@ -628,7 +634,7 @@ def get_hr_salary(cadres):
     ]) / 1e6
     fig, ax = plt.subplots()
     num_dalys_by_cause_averted_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax)
-    ax.errorbar(0, num_dalys_averted['mean'].values / 1e6, yerr=yerr_dalys,
+    ax.errorbar([0, 1], num_dalys_averted['mean'].values / 1e6, yerr=yerr_dalys,
                 fmt=".", color="black", zorder=100)
     ax.set_ylabel('Millions', fontsize='small')
     ax.set(xlabel=None)
@@ -650,34 +656,36 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
-    # plot ROI and CE for all expansion scenarios
-
-    name_of_plot = f'DALYs averted per extra USD dollar invested, {target_period()}'
-    fig, ax = do_bar_plot_with_ci(ROI, xticklabels_horizontal_and_wrapped=True,
-                                  put_labels_in_legend=True)
-    ax.set_title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
-
-    name_of_plot = f'Cost per DALY averted, {target_period()}'
-    fig, ax = do_bar_plot_with_ci(CE, xticklabels_horizontal_and_wrapped=True,
-                                  put_labels_in_legend=True)
-    ax.set_title(name_of_plot)
-    ax.set_ylabel('USD dollars')
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # # plot ROI and CE for all expansion scenarios
+    #
+    # name_of_plot = f'DALYs averted per extra USD dollar invested, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(ROI, xticklabels_horizontal_and_wrapped=True,
+    #                               put_labels_in_legend=True)
+    # ax.set_title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+    #
+    # name_of_plot = f'Cost per DALY averted, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(CE, xticklabels_horizontal_and_wrapped=True,
+    #                               put_labels_in_legend=True)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('USD dollars')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
     # todo
     # Plot comparison results: there are negative changes of some appts and causes, try increase runs and see.
-    # To design more scenarios so that Pharmacy cadre can be expanded more than the 16 scenarios
+    # To design more scenarios so that Pharmacy cadre can be expanded more than the 33 scenarios
     # and so that each cadre has different scale up factor (the one in more shortage will need to be scaled up more)?
-    # As we have 16 scenarios in total, \
+    # As we have 33 scenarios in total, \
     # design comparison groups of scenarios to examine marginal/combined productivity of cadres.
-    # As it is analysis of 10 year results, it would be better to consider increasing annual/minute salary?
+    # As it is analysis of 10 year results, it would be better to consider increasing annual/minute salary? The \
+    # inflation rate of GDP and health workforce budget and the increase rate of salary could be assumed to be \
+    # the same, thus no need to consider the increase rate of salary if GDP inflation is not considered.
     # To plot time series of staff and budget in the target period to show \
     # how many staff and how much budget to increase yearly (choose the best scenario to illustrate)?
     # Get and plot services by short treatment id?
@@ -714,5 +722,5 @@ def get_hr_salary(cadres):
         results_folder=args.results_folder,
         output_folder=args.results_folder,
         resourcefilepath=Path('./resources'),
-        the_target_period=(Date(2020, 1, 1), Date(2029, 12, 31))
+        the_target_period=(Date(2019, 1, 1), Date(2029, 12, 31))
     )

From 639ba141bae9ebf2c1c1ce23499a1905d9b021ce Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 12 Sep 2024 01:57:15 +0100
Subject: [PATCH 106/218] distribute extra budget for Other group matching
 their cost distribution

---
 .../prepare_minute_salary_and_extra_budget_frac_data.py   | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index f171f03345..fade2e59cc 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -91,10 +91,10 @@
         if c in auxiliary.index:  # the four main cadres
             extra_budget_fracs.loc[c, i] = auxiliary.loc[c, i]
         else:  # the other 5 cadres
-            # extra_budget_fracs.loc[c, i] = auxiliary.loc['Other', i] * (
-            #     staff_cost.loc[c, 'cost_frac'] / staff_cost.loc[staff_cost.index.isin(other_group), 'cost_frac'].sum()
-            # )  # current cost distribution among the 5 other cadres
-            extra_budget_fracs.loc[c, i] = auxiliary.loc['Other', i] / 5  # equal fracs among the 5 other cadres
+            extra_budget_fracs.loc[c, i] = auxiliary.loc['Other', i] * (
+                staff_cost.loc[c, 'cost_frac'] / staff_cost.loc[staff_cost.index.isin(other_group), 'cost_frac'].sum()
+            )  # current cost distribution among the 5 other cadres
+            # extra_budget_fracs.loc[c, i] = auxiliary.loc['Other', i] / 5  # equal fracs among the 5 other cadres
 
 assert (abs(extra_budget_fracs.iloc[:, 1:len(extra_budget_fracs.columns)].sum(axis=0) - 1.0) < 1/1e10).all()
 

From 72c9bc5cc7d93f803a965f134e48702f1e9de530 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 12 Sep 2024 02:12:19 +0100
Subject: [PATCH 107/218] submit a run of all 33 scenarios

---
 ...f_expanding_current_hcw_by_officer_type_with_extra_budget.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 787835e07b..2cb559ecc9 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -72,7 +72,7 @@ def _get_scenarios(self) -> Dict[str, Dict]:
                         'HR_expansion_by_officer_type': self.scenarios.iloc[:, i].to_dict()
                     }
                     }
-                ) for i in range(len(self.scenarios.columns) - 30)  # run first 3 scenarios of 33 scenarios
+                ) for i in range(len(self.scenarios.columns))  # run 33 scenarios
         }
 
     def _baseline(self) -> Dict:

From 261c1e9d9255252a1d07b2a9fd1159b7cda2e940 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 12 Sep 2024 02:21:45 +0100
Subject: [PATCH 108/218] fix checks failure

---
 ...dsion_by_officer_type_with_extra_budget.py | 34 +++++++++----------
 src/tlo/methods/healthsystem.py               |  2 +-
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index fdce8bcda6..e080674bb3 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -150,7 +150,7 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
         elif put_labels_in_legend and len(xticks) == 2:
             colors = ['blue', 'green']
         else:
-            colore = None
+            colors = None
 
         fig, ax = plt.subplots(figsize=(10, 5))
         ax.bar(
@@ -425,22 +425,22 @@ def get_hr_salary(cadres):
         cause: CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.get(cause, np.nan)
         for cause in num_dalys_by_cause_summarized.columns
     }
-    officer_category_color = {
-        'Clinical': 'blue',
-        'DCSA': 'orange',
-        'Nursing_and_Midwifery': 'red',
-        'Pharmacy': 'green',
-        'Dental': 'gray',
-        'Laboratory': 'gray',
-        'Mental': 'gray',
-        'Nutrition': 'gray',
-        'Radiography': 'gray',
-    }
-    scenario_color = {
-        's_1': 'orange',
-        's_2': 'blue',
-        's_3': 'green',
-    }
+    # officer_category_color = {
+    #     'Clinical': 'blue',
+    #     'DCSA': 'orange',
+    #     'Nursing_and_Midwifery': 'red',
+    #     'Pharmacy': 'green',
+    #     'Dental': 'gray',
+    #     'Laboratory': 'gray',
+    #     'Mental': 'gray',
+    #     'Nutrition': 'gray',
+    #     'Radiography': 'gray',
+    # }
+    # scenario_color = {
+    #     's_1': 'orange',
+    #     's_2': 'blue',
+    #     's_3': 'green',
+    # }
 
     # plot absolute numbers for scenarios
 
diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index e7b6d92c5c..b957c2fb38 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -3041,7 +3041,7 @@ def apply(self, population):
         # get proportional daily extra budget for each officer type
         extra_budget_fraction = pd.Series(self.module.parameters['HR_expansion_by_officer_type'])
         assert set(extra_budget_fraction.index) == set(daily_cost.index), \
-            f"Input officer types do not match the defined officer types"
+            "Input officer types do not match the defined officer types"
         daily_cost = daily_cost.reindex(index=extra_budget_fraction.index)
         daily_cost['extra_budget_per_day'] = daily_extra_budget * extra_budget_fraction
 

From 4d45b469dd1714f5df7408dfd5a2ce3f54fc09b5 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 17 Sep 2024 11:31:29 +0100
Subject: [PATCH 109/218] plot extra staff by cadre

---
 ...dsion_by_officer_type_with_extra_budget.py | 97 ++++++++++++-------
 1 file changed, 60 insertions(+), 37 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index e080674bb3..c80d332541 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -27,11 +27,16 @@
 )
 
 # rename scenarios
-# todo: to update once scenarios confirmed
 substitute_labels = {
     's_1': 'no_expansion',
     's_2': 'all_expansion_current',
     's_3': 'all_expansion_equal',
+    's_4': 'Clinical', 's_5': 'DCSA', 's_6': 'Nursing_and_Midwifery', 's_7': 'Pharmacy', 's_8': 'Other',
+    's_9': 'CD_equal', 's_10': 'CN_equal', 's_11': 'CP_equal', 's_12': 'CO_equal', 's_13': 'DN_equal',
+    's_14': 'DP_equal', 's_15': 'DO_equal', 's_16': 'NP_equal', 's_17': 'NO_equal', 's_18': 'PO_equal',
+    's_19': 'CDN_equal', 's_20': 'CDP_equal', 's_21': 'CDO_equal', 's_22': 'CNP_equal', 's_23': 'CNO_equal',
+    's_24': 'CPO_equal', 's_25': 'DNP_equal', 's_26': 'DNO_equal', 's_27': 'DPO_equal', 's_28': 'NPO_equal',
+    's_29': 'CDNP_equal', 's_30': 'CDNO_equal', 's_31': 'CDPO_equal', 's_32': 'CNPO_equal', 's_33': 'DNPO_equal',
 }
 
 
@@ -223,7 +228,7 @@ def get_total_cost(_df):
 
     def get_current_hr(cadres):
         """
-        Return current (year of 2019) staff counts and capabilities for the cadres specified.
+        Return current (year of 2018/2019) staff counts and capabilities for the cadres specified.
         """
         curr_hr_path = Path(resourcefilepath
                             / 'healthsystem' / 'human_resources' / 'actual' / 'ResourceFile_Daily_Capabilities.csv')
@@ -249,18 +254,15 @@ def get_hr_salary(cadres):
         return salary[cadres]
 
     # Get parameter/scenario names
-    param_names = get_parameter_names_from_scenario_file()
+    param_names = ('s_1', 's_2', 's_3')#get_parameter_names_from_scenario_file()
 
     # Define cadres in order
     cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
               'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography']
 
-    # # Get current (year of 2019) hr counts
+    # # Get current (year of 2018/2019) hr counts
     # curr_hr = get_current_hr(cadres)
 
-    # # Get salary
-    # salary = get_hr_salary(cadres)
-
     # # Get scale up factors for all scenarios
     # scale_up_factors = extract_results(
     #     results_folder,
@@ -276,6 +278,9 @@ def get_hr_salary(cadres):
     # scale_up_factors[cadres] = scale_up_factors.value.tolist()
     # scale_up_factors.drop(columns='value', inplace=True)
 
+    # Get salary
+    salary = get_hr_salary(cadres)
+
     # Get total cost for all scenarios
     total_cost = extract_results(
         results_folder,
@@ -287,9 +292,25 @@ def get_hr_salary(cadres):
     total_cost = total_cost.iloc[:, 0].unstack().reset_index().melt(id_vars='index')
     total_cost[cadres] = total_cost.value.tolist()
     total_cost.drop(columns='value', inplace=True)
-    total_cost['all_cadres'] = total_cost[total_cost.columns[2:11]].sum(axis=1)
+    total_cost['all_cadres'] = total_cost[[c for c in total_cost.columns if c in cadres]].sum(axis=1)
     total_cost.rename(columns={'index': 'year'}, inplace=True)
 
+    # get staff count = total cost / salary
+    staff_count = total_cost.copy()
+    for c in cadres:
+        staff_count.loc[:, c] = total_cost.loc[:, c] / salary[c].values[0]
+    staff_count.loc[:, 'all_cadres'] = staff_count[[c for c in staff_count.columns if c in cadres]].sum(axis=1)
+
+    # get extra count = staff count - staff count of no expansion s_1
+    # note that annual staff increase rate = scale up factor - 1
+    extra_staff = staff_count.copy()
+    for i in staff_count.index:
+        extra_staff.iloc[i, 2:] = staff_count.iloc[i, 2:] - staff_count.iloc[0, 2:]
+
+    extra_staff_2029 = extra_staff.loc[extra_staff.year == 2029, :].copy()
+    extra_staff_2029 = extra_staff_2029.drop(index=extra_staff_2029[extra_staff_2029.draw == 's_1'].index).drop(
+        columns='year')
+
     # check total cost calculated is increased as expected
     years = range(2019, the_target_period[1].year + 1)
     for s in param_names[1:]:
@@ -425,17 +446,17 @@ def get_hr_salary(cadres):
         cause: CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.get(cause, np.nan)
         for cause in num_dalys_by_cause_summarized.columns
     }
-    # officer_category_color = {
-    #     'Clinical': 'blue',
-    #     'DCSA': 'orange',
-    #     'Nursing_and_Midwifery': 'red',
-    #     'Pharmacy': 'green',
-    #     'Dental': 'gray',
-    #     'Laboratory': 'gray',
-    #     'Mental': 'gray',
-    #     'Nutrition': 'gray',
-    #     'Radiography': 'gray',
-    # }
+    officer_category_color = {
+        'Clinical': 'blue',
+        'DCSA': 'orange',
+        'Nursing_and_Midwifery': 'red',
+        'Pharmacy': 'green',
+        'Dental': 'purple',
+        'Laboratory': 'orchid',
+        'Mental': 'plum',
+        'Nutrition': 'thistle',
+        'Radiography': 'lightgray',
+    }
     # scenario_color = {
     #     's_1': 'orange',
     #     's_2': 'blue',
@@ -570,22 +591,22 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
-    # name_of_plot = f'Extra staff by cadre, {target_period()}'
-    # extra_staff_by_cadre_to_plot = extra_staff_by_cadre / 1e3
-    # fig, ax = plt.subplots()
-    # extra_staff_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    # ax.set_ylabel('Thousands', fontsize='small')
-    # ax.set(xlabel=None)
-    # xtick_labels = [substitute_labels[v] for v in extra_staff_by_cadre_to_plot.index]
-    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-    #            fontsize='small')
-    # plt.title(name_of_plot)
-    # fig.tight_layout()
-    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    # fig.show()
-    # plt.close(fig)
-    #
+    name_of_plot = f'Extra staff by cadre, {target_period()}'
+    extra_staff_by_cadre_to_plot = extra_staff_2029.set_index('draw').drop(columns='all_cadres') / 1e3
+    fig, ax = plt.subplots()
+    extra_staff_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Thousands', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in extra_staff_by_cadre_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small')
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     # name_of_plot = f'Extra budget by cadre, {target_period()}'
     # extra_cost_by_cadre_to_plot = extra_cost_by_cadre / 1e6
     # fig, ax = plt.subplots()
@@ -679,16 +700,18 @@ def get_hr_salary(cadres):
 
     # todo
     # Plot comparison results: there are negative changes of some appts and causes, try increase runs and see.
-    # To design more scenarios so that Pharmacy cadre can be expanded more than the 33 scenarios
-    # and so that each cadre has different scale up factor (the one in more shortage will need to be scaled up more)?
     # As we have 33 scenarios in total, \
     # design comparison groups of scenarios to examine marginal/combined productivity of cadres.
+    # To vary the HRH budget growth rate (default: 4.2%) and do sensitivity analysis \
+    # (around the best possible extra budget allocation scenario)?
     # As it is analysis of 10 year results, it would be better to consider increasing annual/minute salary? The \
     # inflation rate of GDP and health workforce budget and the increase rate of salary could be assumed to be \
     # the same, thus no need to consider the increase rate of salary if GDP inflation is not considered.
     # To plot time series of staff and budget in the target period to show \
     # how many staff and how much budget to increase yearly (choose the best scenario to illustrate)?
     # Get and plot services by short treatment id?
+    # To design more scenarios so that Pharmacy cadre can be expanded more than the 33 scenarios
+    # and so that each cadre has different scale up factor (the one in more shortage will need to be scaled up more)?
     # Later, to explain the cause of differences in scenarios, might consider hcw time flow?
     # Before submit a run, merge in the remote master.
 

From 3b11f79870e40f01d4d0f519c089029fde021318 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 18 Sep 2024 16:08:33 +0100
Subject: [PATCH 110/218] plot extra cost by cadre

---
 ...dsion_by_officer_type_with_extra_budget.py | 54 +++++++++++++------
 1 file changed, 38 insertions(+), 16 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index c80d332541..9343846148 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -39,6 +39,18 @@
     's_29': 'CDNP_equal', 's_30': 'CDNO_equal', 's_31': 'CDPO_equal', 's_32': 'CNPO_equal', 's_33': 'DNPO_equal',
 }
 
+# group scenarios for presentation
+scenario_groups = {
+    'no_expansion': 's_1',
+    'all_cadres_expansion': {'s_2', 's_3'},
+    'one_cadre_expansion': {'s_4', 's_5', 's_6', 's_7', 's_8'},
+    'two_cadres_expansion': {'s_9', 's_10', 's_11', 's_12', 's_13',
+                             's_14', 's_15', 's_16', 's_17', 's_18'},
+    'three_cadres_expansion': {'s_19', 's_20', 's_21', 's_22', 's_23',
+                               's_24', 's_25', 's_26', 's_27', 's_28'},
+    'four_cadres_expansion': {'s_29', 's_30', 's_31', 's_32', 's_33'}
+}
+
 
 def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None,
           the_target_period: Tuple[Date, Date] = None):
@@ -295,6 +307,16 @@ def get_hr_salary(cadres):
     total_cost['all_cadres'] = total_cost[[c for c in total_cost.columns if c in cadres]].sum(axis=1)
     total_cost.rename(columns={'index': 'year'}, inplace=True)
 
+    # get extra count = staff count - staff count of no expansion s_1
+    # note that annual staff increase rate = scale up factor - 1
+    extra_cost = total_cost.copy()
+    for i in total_cost.index:
+        extra_cost.iloc[i, 2:] = extra_cost.iloc[i, 2:] - extra_cost.iloc[0, 2:]
+
+    extra_cost_2029 = extra_cost.loc[extra_cost.year == 2029, :].copy()
+    extra_cost_2029 = extra_cost_2029.drop(index=extra_cost_2029[extra_cost_2029.draw == 's_1'].index).drop(
+        columns='year')
+
     # get staff count = total cost / salary
     staff_count = total_cost.copy()
     for c in cadres:
@@ -591,7 +613,7 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Extra staff by cadre, {target_period()}'
+    name_of_plot = f'Extra staff by cadre against no expansion, {target_period()}'
     extra_staff_by_cadre_to_plot = extra_staff_2029.set_index('draw').drop(columns='all_cadres') / 1e3
     fig, ax = plt.subplots()
     extra_staff_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
@@ -607,21 +629,21 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
-    # name_of_plot = f'Extra budget by cadre, {target_period()}'
-    # extra_cost_by_cadre_to_plot = extra_cost_by_cadre / 1e6
-    # fig, ax = plt.subplots()
-    # extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    # ax.set_ylabel('Millions', fontsize='small')
-    # ax.set(xlabel=None)
-    # xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
-    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-    #            fontsize='small')
-    # plt.title(name_of_plot)
-    # fig.tight_layout()
-    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    # fig.show()
-    # plt.close(fig)
+    name_of_plot = f'Extra budget by cadre against no expansion, {target_period()}'
+    extra_cost_by_cadre_to_plot = extra_cost_2029.set_index('draw').drop(columns='all_cadres') / 1e6
+    fig, ax = plt.subplots()
+    extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small')
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
 
     name_of_plot = f'Services increased by appointment type \n against no expansion, {target_period()}'
     num_appts_increased_in_millions = num_appts_increased / 1e6

From ba2262be8143cc484a2cb7fe8fb4296d082e596d Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 20 Sep 2024 15:19:17 +0100
Subject: [PATCH 111/218] initial plots for all scenarios

---
 ...dsion_by_officer_type_with_extra_budget.py | 171 ++++++++----------
 1 file changed, 78 insertions(+), 93 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 9343846148..db94d9eaa9 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -160,14 +160,7 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 
         xticks = {(i + 0.5): k for i, k in enumerate(_df.index)}
 
-        # Define colormap (used only with option `put_labels_in_legend=True`)
-        # todo: could re-define colors for each scenario once scenarios are confirmed
-        if put_labels_in_legend and len(xticks) == 3:
-            colors = ['orange', 'blue', 'green']
-        elif put_labels_in_legend and len(xticks) == 2:
-            colors = ['blue', 'green']
-        else:
-            colors = None
+        colors = None
 
         fig, ax = plt.subplots(figsize=(10, 5))
         ax.bar(
@@ -182,24 +175,13 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
             zorder=100,
         )
         if annotations:
-            for xpos, ypos, text in zip(xticks.keys(), _df['upper'].values, annotations):
+            for xpos, ypos, text in zip(xticks.keys(), _df['mean'].values, annotations):
                 ax.text(xpos, ypos*1.15, text, horizontalalignment='center', rotation='vertical', fontsize='x-small')
+
         ax.set_xticks(list(xticks.keys()))
 
-        if put_labels_in_legend:
-            # Set x-axis labels as simple scenario names
-            # Insert legend to explain scenarios
-            xtick_legend = [f'{v}: {substitute_labels[v]}' for v in xticks.values()]
-            h, _ = ax.get_legend_handles_labels()
-            ax.legend(h, xtick_legend, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5))
-            ax.set_xticklabels(list(xticks.values()))
-        else:
-            if not xticklabels_horizontal_and_wrapped:
-                # xticklabels will be vertical and not wrapped
-                ax.set_xticklabels(list(xticks.values()), rotation=90)
-            else:
-                wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
-                ax.set_xticklabels(wrapped_labs)
+        xtick_label_detail = [substitute_labels[v] for v in xticks.values()]
+        ax.set_xticklabels(xtick_label_detail, rotation=90)
 
         ax.grid(axis="y")
         ax.spines['top'].set_visible(False)
@@ -208,20 +190,20 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 
         return fig, ax
 
-    def get_scale_up_factor(_df):
-        """
-        Return a series of yearly scale up factors for all cadres,
-        with index of year and value of list of scale up factors.
-        """
-        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year_of_scale_up', 'scale_up_factor']
-                      ].set_index('year_of_scale_up')
-        _df = _df['scale_up_factor'].apply(pd.Series)
-        assert (_df.columns == cadres).all()
-        _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index}
-        _df_1 = pd.DataFrame(data=_dict).T
-        return pd.Series(
-            _df_1.loc[:, 0], index=_df_1.index
-        )
+    # def get_scale_up_factor(_df):
+    #     """
+    #     Return a series of yearly scale up factors for all cadres,
+    #     with index of year and value of list of scale up factors.
+    #     """
+    #     _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year_of_scale_up', 'scale_up_factor']
+    #                   ].set_index('year_of_scale_up')
+    #     _df = _df['scale_up_factor'].apply(pd.Series)
+    #     assert (_df.columns == cadres).all()
+    #     _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index}
+    #     _df_1 = pd.DataFrame(data=_dict).T
+    #     return pd.Series(
+    #         _df_1.loc[:, 0], index=_df_1.index
+    #     )
 
     def get_total_cost(_df):
         """
@@ -266,7 +248,7 @@ def get_hr_salary(cadres):
         return salary[cadres]
 
     # Get parameter/scenario names
-    param_names = ('s_1', 's_2', 's_3')#get_parameter_names_from_scenario_file()
+    param_names = get_parameter_names_from_scenario_file()
 
     # Define cadres in order
     cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
@@ -311,11 +293,7 @@ def get_hr_salary(cadres):
     # note that annual staff increase rate = scale up factor - 1
     extra_cost = total_cost.copy()
     for i in total_cost.index:
-        extra_cost.iloc[i, 2:] = extra_cost.iloc[i, 2:] - extra_cost.iloc[0, 2:]
-
-    extra_cost_2029 = extra_cost.loc[extra_cost.year == 2029, :].copy()
-    extra_cost_2029 = extra_cost_2029.drop(index=extra_cost_2029[extra_cost_2029.draw == 's_1'].index).drop(
-        columns='year')
+        extra_cost.iloc[i, 2:] = total_cost.iloc[i, 2:] - total_cost.iloc[0, 2:]
 
     # get staff count = total cost / salary
     staff_count = total_cost.copy()
@@ -331,7 +309,7 @@ def get_hr_salary(cadres):
 
     extra_staff_2029 = extra_staff.loc[extra_staff.year == 2029, :].copy()
     extra_staff_2029 = extra_staff_2029.drop(index=extra_staff_2029[extra_staff_2029.draw == 's_1'].index).drop(
-        columns='year')
+        columns='year').set_index('draw')
 
     # check total cost calculated is increased as expected
     years = range(2019, the_target_period[1].year + 1)
@@ -384,13 +362,22 @@ def get_hr_salary(cadres):
     ).pipe(set_param_names_as_column_index_level_0)
 
     # get absolute numbers for scenarios
-    num_dalys_summarized = summarize(num_dalys).loc[0].unstack().reindex(param_names)
-    num_dalys_by_cause_summarized = summarize(num_dalys_by_cause, only_mean=True).T.reindex(param_names)
+    # sort the scenarios according to their DALYs values, in ascending order
+    num_dalys_summarized = summarize(num_dalys).loc[0].unstack().reindex(param_names).sort_values(by='mean')
+    num_dalys_by_cause_summarized = summarize(num_dalys_by_cause, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
 
-    num_deaths_summarized = summarize(num_deaths).loc[0].unstack().reindex(param_names)
+    num_deaths_summarized = summarize(num_deaths).loc[0].unstack().reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
 
-    num_services_summarized = summarize(num_services).loc[0].unstack().reindex(param_names)
-    num_appts_summarized = summarize(num_appts, only_mean=True).T.reindex(param_names)
+    num_services_summarized = summarize(num_services).loc[0].unstack().reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_appts_summarized = summarize(num_appts, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
 
     # get relative numbers for scenarios, compared to no_expansion scenario: s_1
     num_services_increased = summarize(
@@ -399,7 +386,7 @@ def get_hr_salary(cadres):
                 num_services.loc[0],
                 comparison='s_1')
         ).T
-    ).iloc[0].unstack().reindex(param_names).drop(['s_1'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_deaths_averted = summarize(
         -1.0 *
@@ -408,7 +395,7 @@ def get_hr_salary(cadres):
                 num_deaths.loc[0],
                 comparison='s_1')
         ).T
-    ).iloc[0].unstack().reindex(param_names).drop(['s_1'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_dalys_averted = summarize(
         -1.0 *
@@ -417,7 +404,7 @@ def get_hr_salary(cadres):
                 num_dalys.loc[0],
                 comparison='s_1')
         ).T
-    ).iloc[0].unstack().reindex(param_names).drop(['s_1'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_dalys_by_cause_averted = summarize(
         -1.0 * find_difference_relative_to_comparison_dataframe(
@@ -425,7 +412,7 @@ def get_hr_salary(cadres):
             comparison='s_1',
         ),
         only_mean=True
-    ).T
+    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_appts_increased = summarize(
         find_difference_relative_to_comparison_dataframe(
@@ -433,7 +420,7 @@ def get_hr_salary(cadres):
             comparison='s_1',
         ),
         only_mean=True
-    ).T
+    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
     # Check that when we sum across the causes/appt types,
     # we get the same total as calculated when we didn't split by cause/appt type.
@@ -449,16 +436,16 @@ def get_hr_salary(cadres):
          ) < 1e-6
     ).all()
 
-    # # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
-    # # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
-    # ROI = pd.DataFrame(index=num_deaths_averted.index, columns=num_dalys_averted.columns)
-    # CE = pd.DataFrame(index=num_deaths_averted.index, columns=num_dalys_averted.columns)
-    # assert (ROI.index == extra_cost.index).all()
+    # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
+    # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
+    # ROI = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
+    # todo: for the bad scenarios, the dalys averted are negative (to find out why), thus CE does not make sense.
+    # CE = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
     # for i in ROI.index:
-    #     ROI.loc[i, :] = num_dalys_averted.loc[i, :] / extra_cost.loc[i, 'all_cadres']
-    #     CE.loc[i, 'mean'] = extra_cost.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'mean']
-    #     CE.loc[i, 'lower'] = extra_cost.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'upper']
-    #     CE.loc[i, 'upper'] = extra_cost.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'lower']
+    #     ROI.loc[i, :] = num_dalys_averted.loc[i, :] / extra_cost_2029.loc[i, 'all_cadres']
+    #     CE.loc[i, 'mean'] = extra_cost_2029.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'mean']
+    #     CE.loc[i, 'lower'] = extra_cost_2029.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'upper']
+    #     CE.loc[i, 'upper'] = extra_cost_2029.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'lower']
 
     # prepare colors for plots
     appt_color = {
@@ -479,11 +466,6 @@ def get_hr_salary(cadres):
         'Nutrition': 'thistle',
         'Radiography': 'lightgray',
     }
-    # scenario_color = {
-    #     's_1': 'orange',
-    #     's_2': 'blue',
-    #     's_3': 'green',
-    # }
 
     # plot absolute numbers for scenarios
 
@@ -515,7 +497,7 @@ def get_hr_salary(cadres):
     ])/1e6
     fig, ax = plt.subplots()
     num_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
-    ax.errorbar([0, 1, 2], num_services_summarized['mean'].values / 1e6, yerr=yerr_services,
+    ax.errorbar(range(len(param_names)), num_services_summarized['mean'].values / 1e6, yerr=yerr_services,
                 fmt=".", color="black", zorder=100)
     ax.set_ylabel('Millions', fontsize='small')
     ax.set(xlabel=None)
@@ -544,7 +526,7 @@ def get_hr_salary(cadres):
     # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
     # fig.show()
     # plt.close(fig)
-    #
+
     # name_of_plot = f'Total budget in USD dollars by cadre, {target_period()}'
     # total_cost_to_plot = (total_cost / 1e6).drop(columns='all_cadres')
     # fig, ax = plt.subplots()
@@ -569,7 +551,7 @@ def get_hr_salary(cadres):
     ])/1e6
     fig, ax = plt.subplots()
     num_dalys_by_cause_summarized_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax)
-    ax.errorbar([0, 1, 2], num_dalys_summarized['mean'].values / 1e6, yerr=yerr_dalys,
+    ax.errorbar(range(len(param_names)), num_dalys_summarized['mean'].values / 1e6, yerr=yerr_dalys,
                 fmt=".", color="black", zorder=100)
     ax.set_ylabel('Millions', fontsize='small')
     ax.set(xlabel=None)
@@ -614,7 +596,8 @@ def get_hr_salary(cadres):
     plt.close(fig)
 
     name_of_plot = f'Extra staff by cadre against no expansion, {target_period()}'
-    extra_staff_by_cadre_to_plot = extra_staff_2029.set_index('draw').drop(columns='all_cadres') / 1e3
+    extra_staff_by_cadre_to_plot = extra_staff_2029.drop(columns='all_cadres').reindex(
+        num_dalys_summarized.index).drop(['s_1']) / 1e3
     fig, ax = plt.subplots()
     extra_staff_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
     ax.set_ylabel('Thousands', fontsize='small')
@@ -629,23 +612,24 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Extra budget by cadre against no expansion, {target_period()}'
-    extra_cost_by_cadre_to_plot = extra_cost_2029.set_index('draw').drop(columns='all_cadres') / 1e6
-    fig, ax = plt.subplots()
-    extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    ax.set_ylabel('Millions', fontsize='small')
-    ax.set(xlabel=None)
-    xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-               fontsize='small')
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'Extra budget by cadre against no expansion, {target_period()}'
+    # extra_cost_by_cadre_to_plot = extra_cost_2029.drop(columns='all_cadres').reindex(
+    #     num_dalys_summarized.index).drop(['s_1']) / 1e6
+    # fig, ax = plt.subplots()
+    # extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small')
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
-    name_of_plot = f'Services increased by appointment type \n against no expansion, {target_period()}'
+    name_of_plot = f'Services increased by appointment type \nagainst no expansion, {target_period()}'
     num_appts_increased_in_millions = num_appts_increased / 1e6
     yerr_services = np.array([
         (num_services_increased['mean'].values - num_services_increased['lower']).values,
@@ -653,7 +637,7 @@ def get_hr_salary(cadres):
     ]) / 1e6
     fig, ax = plt.subplots()
     num_appts_increased_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
-    ax.errorbar([0, 1], num_services_increased['mean'].values / 1e6, yerr=yerr_services,
+    ax.errorbar(range(len(param_names)-1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
                 fmt=".", color="black", zorder=100)
     ax.set_ylabel('Millions', fontsize='small')
     ax.set(xlabel=None)
@@ -677,7 +661,7 @@ def get_hr_salary(cadres):
     ]) / 1e6
     fig, ax = plt.subplots()
     num_dalys_by_cause_averted_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax)
-    ax.errorbar([0, 1], num_dalys_averted['mean'].values / 1e6, yerr=yerr_dalys,
+    ax.errorbar(range(len(param_names)-1), num_dalys_averted['mean'].values / 1e6, yerr=yerr_dalys,
                 fmt=".", color="black", zorder=100)
     ax.set_ylabel('Millions', fontsize='small')
     ax.set(xlabel=None)
@@ -699,8 +683,8 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
-    # # plot ROI and CE for all expansion scenarios
-    #
+    # plot ROI and CE for all expansion scenarios
+
     # name_of_plot = f'DALYs averted per extra USD dollar invested, {target_period()}'
     # fig, ax = do_bar_plot_with_ci(ROI, xticklabels_horizontal_and_wrapped=True,
     #                               put_labels_in_legend=True)
@@ -709,7 +693,7 @@ def get_hr_salary(cadres):
     # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
     # fig.show()
     # plt.close(fig)
-    #
+
     # name_of_plot = f'Cost per DALY averted, {target_period()}'
     # fig, ax = do_bar_plot_with_ci(CE, xticklabels_horizontal_and_wrapped=True,
     #                               put_labels_in_legend=True)
@@ -736,6 +720,7 @@ def get_hr_salary(cadres):
     # and so that each cadre has different scale up factor (the one in more shortage will need to be scaled up more)?
     # Later, to explain the cause of differences in scenarios, might consider hcw time flow?
     # Before submit a run, merge in the remote master.
+    # When calculate total cost and extra budget, should submit over all years from start to end
 
 
 if __name__ == "__main__":

From a558cc7a5db76b473fd9714d04e99f25c1b47768 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 20 Sep 2024 17:56:20 +0100
Subject: [PATCH 112/218] initial plots for all scenarios

---
 ...dsion_by_officer_type_with_extra_budget.py | 141 +++++++++---------
 1 file changed, 72 insertions(+), 69 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index db94d9eaa9..1814de603d 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -289,11 +289,14 @@ def get_hr_salary(cadres):
     total_cost['all_cadres'] = total_cost[[c for c in total_cost.columns if c in cadres]].sum(axis=1)
     total_cost.rename(columns={'index': 'year'}, inplace=True)
 
-    # get extra count = staff count - staff count of no expansion s_1
-    # note that annual staff increase rate = scale up factor - 1
-    extra_cost = total_cost.copy()
-    for i in total_cost.index:
-        extra_cost.iloc[i, 2:] = total_cost.iloc[i, 2:] - total_cost.iloc[0, 2:]
+    # total cost of all expansion years
+    total_cost_all_yrs = total_cost.groupby('draw').sum().drop(columns='year')
+
+    # total extra cost of all expansion years
+    extra_cost_all_yrs = total_cost_all_yrs.copy()
+    for s in param_names[1:]:
+        extra_cost_all_yrs.loc[s, :] = total_cost_all_yrs.loc[s, :] - total_cost_all_yrs.loc['s_1', :]
+    extra_cost_all_yrs.drop(index='s_1', inplace=True)
 
     # get staff count = total cost / salary
     staff_count = total_cost.copy()
@@ -307,9 +310,10 @@ def get_hr_salary(cadres):
     for i in staff_count.index:
         extra_staff.iloc[i, 2:] = staff_count.iloc[i, 2:] - staff_count.iloc[0, 2:]
 
-    extra_staff_2029 = extra_staff.loc[extra_staff.year == 2029, :].copy()
-    extra_staff_2029 = extra_staff_2029.drop(index=extra_staff_2029[extra_staff_2029.draw == 's_1'].index).drop(
-        columns='year').set_index('draw')
+    extra_staff_2029 = extra_staff.loc[extra_staff.year == 2029, :].drop(columns='year').set_index('draw').drop(
+        index='s_1'
+    )
+    staff_count_2029 = staff_count.loc[staff_count.year == 2029, :].drop(columns='year').set_index('draw')
 
     # check total cost calculated is increased as expected
     years = range(2019, the_target_period[1].year + 1)
@@ -438,14 +442,14 @@ def get_hr_salary(cadres):
 
     # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
     # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
-    # ROI = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
+    ROI = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
     # todo: for the bad scenarios, the dalys averted are negative (to find out why), thus CE does not make sense.
     # CE = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
-    # for i in ROI.index:
-    #     ROI.loc[i, :] = num_dalys_averted.loc[i, :] / extra_cost_2029.loc[i, 'all_cadres']
-    #     CE.loc[i, 'mean'] = extra_cost_2029.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'mean']
-    #     CE.loc[i, 'lower'] = extra_cost_2029.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'upper']
-    #     CE.loc[i, 'upper'] = extra_cost_2029.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'lower']
+    for i in ROI.index:
+        ROI.loc[i, :] = num_dalys_averted.loc[i, :] / extra_cost_all_yrs.loc[i, 'all_cadres']
+    #     CE.loc[i, 'mean'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'mean']
+    #     CE.loc[i, 'lower'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'upper']
+    #     CE.loc[i, 'upper'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'lower']
 
     # prepare colors for plots
     appt_color = {
@@ -511,37 +515,37 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
-    # name_of_plot = f'Number of staff by cadre, {target_period()}'
-    # total_staff_to_plot = (total_staff / 1000).drop(columns='all_cadres')
-    # fig, ax = plt.subplots()
-    # total_staff_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    # ax.set_ylabel('Thousands', fontsize='small')
-    # ax.set(xlabel=None)
-    # xtick_labels = [substitute_labels[v] for v in total_staff_to_plot.index]
-    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-    #            fontsize='small', reverse=True)
-    # plt.title(name_of_plot)
-    # fig.tight_layout()
-    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    # fig.show()
-    # plt.close(fig)
+    name_of_plot = f'Number of staff by cadre, {TARGET_PERIOD[1].year}'
+    total_staff_to_plot = (staff_count_2029 / 1000).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
+    fig, ax = plt.subplots()
+    total_staff_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Thousands', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in total_staff_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
 
-    # name_of_plot = f'Total budget in USD dollars by cadre, {target_period()}'
-    # total_cost_to_plot = (total_cost / 1e6).drop(columns='all_cadres')
-    # fig, ax = plt.subplots()
-    # total_cost_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    # ax.set_ylabel('Millions', fontsize='small')
-    # ax.set(xlabel=None)
-    # xtick_labels = [substitute_labels[v] for v in total_cost_to_plot.index]
-    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-    #            fontsize='small', reverse=True)
-    # plt.title(name_of_plot)
-    # fig.tight_layout()
-    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    # fig.show()
-    # plt.close(fig)
+    name_of_plot = f'Total budget in USD dollars by cadre, {target_period()}'
+    total_cost_to_plot = (total_cost_all_yrs / 1e6).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
+    fig, ax = plt.subplots()
+    total_cost_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in total_cost_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
 
     name_of_plot = f'DALYs by cause, {target_period()}'
     num_dalys_by_cause_summarized_in_millions = num_dalys_by_cause_summarized / 1e6
@@ -595,7 +599,7 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Extra staff by cadre against no expansion, {target_period()}'
+    name_of_plot = f'Extra staff by cadre against no expansion, {TARGET_PERIOD[1].year}'
     extra_staff_by_cadre_to_plot = extra_staff_2029.drop(columns='all_cadres').reindex(
         num_dalys_summarized.index).drop(['s_1']) / 1e3
     fig, ax = plt.subplots()
@@ -612,22 +616,21 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
-    # name_of_plot = f'Extra budget by cadre against no expansion, {target_period()}'
-    # extra_cost_by_cadre_to_plot = extra_cost_2029.drop(columns='all_cadres').reindex(
-    #     num_dalys_summarized.index).drop(['s_1']) / 1e6
-    # fig, ax = plt.subplots()
-    # extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    # ax.set_ylabel('Millions', fontsize='small')
-    # ax.set(xlabel=None)
-    # xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
-    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-    #            fontsize='small')
-    # plt.title(name_of_plot)
-    # fig.tight_layout()
-    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    # fig.show()
-    # plt.close(fig)
+    name_of_plot = f'Extra budget by cadre against no expansion, {target_period()}'
+    extra_cost_by_cadre_to_plot = extra_cost_all_yrs.drop(columns='all_cadres') / 1e6
+    fig, ax = plt.subplots()
+    extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small')
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
 
     name_of_plot = f'Services increased by appointment type \nagainst no expansion, {target_period()}'
     num_appts_increased_in_millions = num_appts_increased / 1e6
@@ -685,14 +688,14 @@ def get_hr_salary(cadres):
 
     # plot ROI and CE for all expansion scenarios
 
-    # name_of_plot = f'DALYs averted per extra USD dollar invested, {target_period()}'
-    # fig, ax = do_bar_plot_with_ci(ROI, xticklabels_horizontal_and_wrapped=True,
-    #                               put_labels_in_legend=True)
-    # ax.set_title(name_of_plot)
-    # fig.tight_layout()
-    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    # fig.show()
-    # plt.close(fig)
+    name_of_plot = f'DALYs averted per extra USD dollar invested, {target_period()}'
+    fig, ax = do_bar_plot_with_ci(ROI, xticklabels_horizontal_and_wrapped=True,
+                                  put_labels_in_legend=True)
+    ax.set_title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
 
     # name_of_plot = f'Cost per DALY averted, {target_period()}'
     # fig, ax = do_bar_plot_with_ci(CE, xticklabels_horizontal_and_wrapped=True,

From 494b6b8e7c78b99e8a2420ea9f71a509e0dfb24c Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 23 Sep 2024 10:04:18 +0100
Subject: [PATCH 113/218] update comments

---
 ...lysis_hr_expandsion_by_officer_type_with_extra_budget.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 1814de603d..51d077a127 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -443,7 +443,9 @@ def get_hr_salary(cadres):
     # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
     # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
     ROI = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
-    # todo: for the bad scenarios, the dalys averted are negative (to find out why), thus CE does not make sense.
+    # todo: for the bad scenarios (s_5, s_8, s_15), the dalys averted are negative
+    #  (maybe only due to statistical variation; relative difference to s_1 are close to 0%),
+    #  thus CE does not make sense.
     # CE = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
     for i in ROI.index:
         ROI.loc[i, :] = num_dalys_averted.loc[i, :] / extra_cost_all_yrs.loc[i, 'all_cadres']
@@ -723,7 +725,7 @@ def get_hr_salary(cadres):
     # and so that each cadre has different scale up factor (the one in more shortage will need to be scaled up more)?
     # Later, to explain the cause of differences in scenarios, might consider hcw time flow?
     # Before submit a run, merge in the remote master.
-    # When calculate total cost and extra budget, should submit over all years from start to end
+    # Think about a measure of Universal Health Service Coverage for the scenarios.
 
 
 if __name__ == "__main__":

From 45eebaf6766aeec838dccc62bdd980902321e992 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 23 Sep 2024 10:13:48 +0100
Subject: [PATCH 114/218] fix failing checks

---
 .../analysis_hr_expandsion_by_officer_type_with_extra_budget.py  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 51d077a127..24c6bc1eb8 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -6,7 +6,6 @@
 """
 
 import argparse
-import textwrap
 from pathlib import Path
 from typing import Tuple
 

From 78dcb80980dd0a9871efe5b3da71b0e7b8880971 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 23 Sep 2024 22:48:58 +0100
Subject: [PATCH 115/218] add scenario settings with historical scaling from
 2019 to 2024 and expansion by cadre from 2025 to 2030 - todo: might need
 update scenarios of extra budget allocation and HRH cost growth rate

---
 .../ResourceFile_dynamic_HR_scaling.xlsx                    | 4 ++--
 ...panding_current_hcw_by_officer_type_with_extra_budget.py | 6 +++---
 src/tlo/methods/healthsystem.py                             | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_dynamic_HR_scaling.xlsx b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_dynamic_HR_scaling.xlsx
index a633e6fc92..5af44f5b35 100644
--- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_dynamic_HR_scaling.xlsx
+++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_dynamic_HR_scaling.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d2d74390498e497ee0bf68773327868f6b199c1c9569337b173fa330c0f2f926
-size 24593
+oid sha256:f7462bfb5740df3f5ffbabd1cdf10e81342f6da146170cc9648de0fbedffb454
+size 25434
diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 2cb559ecc9..1efca4e900 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -31,7 +31,7 @@ def __init__(self):
         super().__init__()
         self.seed = 0
         self.start_date = Date(2010, 1, 1)
-        self.end_date = Date(2030, 1, 1)
+        self.end_date = Date(2031, 1, 1)
         self.pop_size = 100_000  # todo: TBC
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
@@ -60,7 +60,7 @@ def draw_parameters(self, draw_number, rng):
     def _get_scenarios(self) -> Dict[str, Dict]:
         """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario."""
 
-        self.YEAR_OF_CHANGE = 2019  # This is the year to change run settings and to start hr expansion.
+        self.YEAR_OF_CHANGE = 2025  # This is the year to change run settings and to start hr expansion.
 
         self.scenarios = extra_budget_fracs
 
@@ -86,7 +86,7 @@ def _baseline(self) -> Dict:
                 'cons_availability': 'default',
                 'cons_availability_postSwitch': 'all',  # todo: how to argue for this setting?
                 'year_cons_availability_switch': self.YEAR_OF_CHANGE,
-                'yearly_HR_scaling_mode': 'no_scaling',
+                'yearly_HR_scaling_mode': 'historical_scaling',
                 'start_year_HR_expansion_by_officer_type': self.YEAR_OF_CHANGE,
                 'end_year_HR_expansion_by_officer_type': self.end_date.year,
                 "policy_name": "Naive",
diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index b957c2fb38..e88de7f416 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -667,8 +667,8 @@ def read_parameters(self, data_folder):
             'Dental': 0, 'Laboratory': 0, 'Mental': 0, 'Nutrition': 0, 'Radiography': 0
         }
         self.parameters['HR_budget_growth_rate'] = 0.042
-        self.parameters['start_year_HR_expansion_by_officer_type'] = 2019
-        self.parameters['end_year_HR_expansion_by_officer_type'] = 2030
+        self.parameters['start_year_HR_expansion_by_officer_type'] = 2025
+        self.parameters['end_year_HR_expansion_by_officer_type'] = 2031
 
     def pre_initialise_population(self):
         """Generate the accessory classes used by the HealthSystem and pass to them the data that has been read."""

From f650a2eca2eee938c69f77f3c045f3f6762cb5c3 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 24 Sep 2024 22:33:00 +0100
Subject: [PATCH 116/218] annotate bar plot for DALYs averted

---
 ...dsion_by_officer_type_with_extra_budget.py | 48 ++++++++++++-------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 24c6bc1eb8..d3b077f334 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -148,9 +148,10 @@ def find_difference_relative_to_comparison_dataframe(_df: pd.DataFrame, **kwargs
             for _idx, row in _df.iterrows()
         }, axis=1).T
 
-    def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrapped=False, put_labels_in_legend=True):
+    def do_bar_plot_with_ci(_df, _df_percent=None, annotation=False):
         """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
-         extent of the error bar."""
+         extent of the error bar.
+         Annotated with percent statistics from _df_percent, if annotation=True and _df_percent not None."""
 
         yerr = np.array([
             (_df['mean'] - _df['lower']).values,
@@ -161,7 +162,7 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
 
         colors = None
 
-        fig, ax = plt.subplots(figsize=(10, 5))
+        fig, ax = plt.subplots(figsize=(18, 6))
         ax.bar(
             xticks.keys(),
             _df['mean'].values,
@@ -173,9 +174,15 @@ def do_bar_plot_with_ci(_df, annotations=None, xticklabels_horizontal_and_wrappe
             label=xticks.values(),
             zorder=100,
         )
-        if annotations:
-            for xpos, ypos, text in zip(xticks.keys(), _df['mean'].values, annotations):
-                ax.text(xpos, ypos*1.15, text, horizontalalignment='center', rotation='vertical', fontsize='x-small')
+
+        if annotation:
+            assert (_df.index == _df_percent.index).all()
+            for xpos, ypos, text1, text2, text3 in zip(xticks.keys(), _df['upper'].values,
+                                                       _df_percent['mean'].values,
+                                                       _df_percent['lower'].values,
+                                                       _df_percent['upper'].values):
+                text = f"{int(round(text1 * 100, 2))}%\n{[round(text2, 2),round(text3, 2)]}"
+                ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize='xx-small')
 
         ax.set_xticks(list(xticks.keys()))
 
@@ -409,6 +416,17 @@ def get_hr_salary(cadres):
         ).T
     ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
+    num_dalys_averted_percent = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_dalys.loc[0],
+                comparison='s_1',
+                scaled=True
+            )
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
     num_dalys_by_cause_averted = summarize(
         -1.0 * find_difference_relative_to_comparison_dataframe(
             num_dalys_by_cause,
@@ -475,8 +493,7 @@ def get_hr_salary(cadres):
     # plot absolute numbers for scenarios
 
     name_of_plot = f'Deaths, {target_period()}'
-    fig, ax = do_bar_plot_with_ci(num_deaths_summarized / 1e6, xticklabels_horizontal_and_wrapped=True,
-                                  put_labels_in_legend=True)
+    fig, ax = do_bar_plot_with_ci(num_deaths_summarized / 1e6)
     ax.set_title(name_of_plot)
     ax.set_ylabel('(Millions)')
     fig.tight_layout()
@@ -485,8 +502,7 @@ def get_hr_salary(cadres):
     plt.close(fig)
 
     name_of_plot = f'DALYs, {target_period()}'
-    fig, ax = do_bar_plot_with_ci(num_dalys_summarized / 1e6, xticklabels_horizontal_and_wrapped=True,
-                                  put_labels_in_legend=True)
+    fig, ax = do_bar_plot_with_ci(num_dalys_summarized / 1e6)
     ax.set_title(name_of_plot)
     ax.set_ylabel('(Millions)')
     fig.tight_layout()
@@ -581,8 +597,7 @@ def get_hr_salary(cadres):
 
     # plot relative numbers for scenarios
     name_of_plot = f'DALYs averted against no expansion, {target_period()}'
-    fig, ax = do_bar_plot_with_ci(num_dalys_averted / 1e6, xticklabels_horizontal_and_wrapped=True,
-                                  put_labels_in_legend=True)
+    fig, ax = do_bar_plot_with_ci(num_dalys_averted / 1e6, num_dalys_averted_percent, annotation=True)
     ax.set_title(name_of_plot)
     ax.set_ylabel('(Millions)')
     fig.tight_layout()
@@ -591,8 +606,7 @@ def get_hr_salary(cadres):
     plt.close(fig)
 
     name_of_plot = f'Deaths averted against no expansion, {target_period()}'
-    fig, ax = do_bar_plot_with_ci(num_deaths_averted / 1e6, xticklabels_horizontal_and_wrapped=True,
-                                  put_labels_in_legend=True)
+    fig, ax = do_bar_plot_with_ci(num_deaths_averted / 1e6)
     ax.set_title(name_of_plot)
     ax.set_ylabel('(Millions)')
     fig.tight_layout()
@@ -690,8 +704,7 @@ def get_hr_salary(cadres):
     # plot ROI and CE for all expansion scenarios
 
     name_of_plot = f'DALYs averted per extra USD dollar invested, {target_period()}'
-    fig, ax = do_bar_plot_with_ci(ROI, xticklabels_horizontal_and_wrapped=True,
-                                  put_labels_in_legend=True)
+    fig, ax = do_bar_plot_with_ci(ROI)
     ax.set_title(name_of_plot)
     fig.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
@@ -699,8 +712,7 @@ def get_hr_salary(cadres):
     plt.close(fig)
 
     # name_of_plot = f'Cost per DALY averted, {target_period()}'
-    # fig, ax = do_bar_plot_with_ci(CE, xticklabels_horizontal_and_wrapped=True,
-    #                               put_labels_in_legend=True)
+    # fig, ax = do_bar_plot_with_ci(CE)
     # ax.set_title(name_of_plot)
     # ax.set_ylabel('USD dollars')
     # fig.tight_layout()

From 587cdb1de17b40855db6943d481da552b875071b Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 25 Sep 2024 16:35:43 +0100
Subject: [PATCH 117/218] plot for treatment counts

---
 ...dsion_by_officer_type_with_extra_budget.py | 110 +++++++++++++++++-
 1 file changed, 109 insertions(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index d3b077f334..224a89dc54 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -21,6 +21,7 @@
     APPT_TYPE_TO_COARSE_APPT_TYPE_MAP,
     CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP,
     COARSE_APPT_TYPE_TO_COLOR_MAP,
+    SHORT_TREATMENT_ID_TO_COLOR_MAP,
     extract_results,
     summarize,
 )
@@ -87,6 +88,20 @@ def get_num_services(_df):
             .apply(pd.Series).sum().sum()
         )
 
+    def get_num_treatments(_df):
+        """Return the number of treatments by short treatment id (total within the TARGET_PERIOD)"""
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'TREATMENT_ID'].apply(pd.Series).sum()
+        _df.index = _df.index.map(lambda x: x.split('_')[0] + "*")
+        _df = _df.groupby(level=0).sum()
+        return _df
+
+    def get_num_treatments_total(_df):
+        """Return the number of treatments in total of all treatments (total within the TARGET_PERIOD)"""
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'TREATMENT_ID'].apply(pd.Series).sum()
+        _df.index = _df.index.map(lambda x: x.split('_')[0] + "*")
+        _df = _df.groupby(level=0).sum().sum()
+        return pd.Series(_df)
+
     def get_num_deaths(_df):
         """Return total number of Deaths (total within the TARGET_PERIOD)"""
         return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)]))
@@ -371,6 +386,22 @@ def get_hr_salary(cadres):
         do_scaling=True
     ).pipe(set_param_names_as_column_index_level_0)
 
+    num_treatments = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event',
+        custom_generate_series=get_num_treatments,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_treatments_total = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event',
+        custom_generate_series=get_num_treatments_total,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
     # get absolute numbers for scenarios
     # sort the scenarios according to their DALYs values, in ascending order
     num_dalys_summarized = summarize(num_dalys).loc[0].unstack().reindex(param_names).sort_values(by='mean')
@@ -388,6 +419,12 @@ def get_hr_salary(cadres):
     num_appts_summarized = summarize(num_appts, only_mean=True).T.reindex(param_names).reindex(
         num_dalys_summarized.index
     )
+    num_treatments_summarized = summarize(num_treatments, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_treatments_total_summarized = summarize(num_treatments_total).loc[0].unstack().reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
 
     # get relative numbers for scenarios, compared to no_expansion scenario: s_1
     num_services_increased = summarize(
@@ -443,6 +480,22 @@ def get_hr_salary(cadres):
         only_mean=True
     ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
+    num_treatments_increased = summarize(
+        find_difference_relative_to_comparison_dataframe(
+            num_treatments,
+            comparison='s_1',
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    num_treatments_total_increased = summarize(
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_treatments_total.loc[0],
+                comparison='s_1')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
     # Check that when we sum across the causes/appt types,
     # we get the same total as calculated when we didn't split by cause/appt type.
     assert (
@@ -457,6 +510,12 @@ def get_hr_salary(cadres):
          ) < 1e-6
     ).all()
 
+    assert (
+        (num_treatments_increased.sum(axis=1).sort_index()
+         - num_treatments_total_increased['mean'].sort_index()
+         ) < 1e-6
+    ).all()
+
     # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
     # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
     ROI = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
@@ -474,6 +533,10 @@ def get_hr_salary(cadres):
     appt_color = {
         appt: COARSE_APPT_TYPE_TO_COLOR_MAP.get(appt, np.nan) for appt in num_appts_summarized.columns
     }
+    treatment_color = {
+        treatment: SHORT_TREATMENT_ID_TO_COLOR_MAP.get(treatment, np.nan)
+        for treatment in num_treatments_summarized.columns
+    }
     cause_color = {
         cause: CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.get(cause, np.nan)
         for cause in num_dalys_by_cause_summarized.columns
@@ -532,6 +595,28 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'Services by treatment type, {target_period()}'
+    num_treatments_summarized_in_millions = num_treatments_summarized / 1e6
+    yerr_services = np.array([
+        (num_treatments_total_summarized['mean'].values - num_treatments_total_summarized['lower']).values,
+        (num_treatments_total_summarized['upper'].values - num_treatments_total_summarized['mean']).values,
+    ]) / 1e6
+    fig, ax = plt.subplots(figsize=(10, 6))
+    num_treatments_summarized_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    ax.errorbar(range(len(param_names)), num_treatments_total_summarized['mean'].values / 1e6, yerr=yerr_services,
+                fmt=".", color="black", zorder=100)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in num_treatments_summarized_in_millions.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     name_of_plot = f'Number of staff by cadre, {TARGET_PERIOD[1].year}'
     total_staff_to_plot = (staff_count_2029 / 1000).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
     fig, ax = plt.subplots()
@@ -671,6 +756,30 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'Services increased by treatment type \nagainst no expansion, {target_period()}'
+    num_treatments_increased_in_millions = num_treatments_increased / 1e6
+    yerr_services = np.array([
+        (num_treatments_total_increased['mean'].values - num_treatments_total_increased['lower']).values,
+        (num_treatments_total_increased['upper'].values - num_treatments_total_increased['mean']).values,
+    ]) / 1e6
+    fig, ax = plt.subplots(figsize=(10, 6))
+    num_treatments_increased_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    ax.errorbar(range(len(param_names)-1), num_treatments_total_increased['mean'].values / 1e6, yerr=yerr_services,
+                fmt=".", color="black", zorder=100)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in num_treatments_increased_in_millions.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
     name_of_plot = f'DALYs averted by cause against no expansion, {target_period()}'
     num_dalys_by_cause_averted_in_millions = num_dalys_by_cause_averted / 1e6
     yerr_dalys = np.array([
@@ -721,7 +830,6 @@ def get_hr_salary(cadres):
     # plt.close(fig)
 
     # todo
-    # Plot comparison results: there are negative changes of some appts and causes, try increase runs and see.
     # As we have 33 scenarios in total, \
     # design comparison groups of scenarios to examine marginal/combined productivity of cadres.
     # To vary the HRH budget growth rate (default: 4.2%) and do sensitivity analysis \

From 231138696d4f753c69d7b419271a81ca5d0c672a Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 26 Sep 2024 14:48:10 +0100
Subject: [PATCH 118/218] update scenario colors

---
 ...dsion_by_officer_type_with_extra_budget.py | 25 ++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 224a89dc54..5acf357ea9 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -41,7 +41,7 @@
 
 # group scenarios for presentation
 scenario_groups = {
-    'no_expansion': 's_1',
+    'no_expansion': {'s_1'},
     'all_cadres_expansion': {'s_2', 's_3'},
     'one_cadre_expansion': {'s_4', 's_5', 's_6', 's_7', 's_8'},
     'two_cadres_expansion': {'s_9', 's_10', 's_11', 's_12', 's_13',
@@ -175,7 +175,7 @@ def do_bar_plot_with_ci(_df, _df_percent=None, annotation=False):
 
         xticks = {(i + 0.5): k for i, k in enumerate(_df.index)}
 
-        colors = None
+        colors = [scenario_color[s] for s in _df.index]
 
         fig, ax = plt.subplots(figsize=(18, 6))
         ax.bar(
@@ -204,6 +204,11 @@ def do_bar_plot_with_ci(_df, _df_percent=None, annotation=False):
         xtick_label_detail = [substitute_labels[v] for v in xticks.values()]
         ax.set_xticklabels(xtick_label_detail, rotation=90)
 
+        legend_labels = list(scenario_groups_color.keys())
+        legend_handles = [plt.Rectangle((0, 0), 1, 1,
+                                        color=scenario_groups_color[label]) for label in legend_labels]
+        ax.legend(legend_handles, legend_labels, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5))
+
         ax.grid(axis="y")
         ax.spines['top'].set_visible(False)
         ax.spines['right'].set_visible(False)
@@ -552,6 +557,19 @@ def get_hr_salary(cadres):
         'Nutrition': 'thistle',
         'Radiography': 'lightgray',
     }
+    scenario_groups_color = {
+        'no_expansion': 'gray',
+        'one_cadre_expansion': 'lightpink',
+        'two_cadres_expansion': 'violet',
+        'three_cadres_expansion': 'darkorchid',
+        'four_cadres_expansion': 'paleturquoise',
+        'all_cadres_expansion': 'darkturquoise'
+    }
+    scenario_color = {}
+    for s in param_names:
+        for k in scenario_groups_color.keys():
+            if s in scenario_groups[k]:
+                scenario_color[s] = scenario_groups_color[k]
 
     # plot absolute numbers for scenarios
 
@@ -717,7 +735,8 @@ def get_hr_salary(cadres):
     plt.close(fig)
 
     name_of_plot = f'Extra budget by cadre against no expansion, {target_period()}'
-    extra_cost_by_cadre_to_plot = extra_cost_all_yrs.drop(columns='all_cadres') / 1e6
+    extra_cost_by_cadre_to_plot = extra_cost_all_yrs.drop(columns='all_cadres').reindex(
+        num_dalys_summarized.index).drop(index='s_1') / 1e6
     fig, ax = plt.subplots()
     extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
     ax.set_ylabel('Millions', fontsize='small')

From 116154cb1efaf6920d151487a9220da4604050fb Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 26 Sep 2024 23:03:11 +0100
Subject: [PATCH 119/218] plot yearly dalys and staff

---
 ...dsion_by_officer_type_with_extra_budget.py | 82 +++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 5acf357ea9..b7e91ba06d 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -120,6 +120,20 @@ def get_num_dalys(_df):
             .sum().sum()
         )
 
+    def get_num_dalys_yearly(_df):
+        """Return total number of DALYS (Stacked) for every year in the TARGET_PERIOD.
+        Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+        results from runs that crashed mid-way through the simulation).
+        """
+        years_needed = [i.year for i in TARGET_PERIOD]
+        assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+        _df = (_df.loc[_df.year.between(*years_needed)]
+               .drop(columns=['date', 'sex', 'age_range'])
+               .groupby('year').sum()
+               .sum(axis=1)
+               )
+        return _df
+
     def get_num_dalys_by_cause(_df):
         """Return total number of DALYS by cause (Stacked) (total within the TARGET_PERIOD).
         Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
@@ -367,6 +381,14 @@ def get_hr_salary(cadres):
         do_scaling=True
     ).pipe(set_param_names_as_column_index_level_0)
 
+    num_dalys_yearly = extract_results(
+        results_folder,
+        module='tlo.methods.healthburden',
+        key='dalys_stacked',
+        custom_generate_series=get_num_dalys_yearly,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
     num_dalys_by_cause = extract_results(
         results_folder,
         module="tlo.methods.healthburden",
@@ -414,6 +436,11 @@ def get_hr_salary(cadres):
         num_dalys_summarized.index
     )
 
+    num_dalys_yearly_summarized = (summarize(num_dalys_yearly)
+                                   .stack([0, 1])
+                                   .rename_axis(['year', 'scenario', 'stat'])
+                                   .reset_index(name='count'))
+
     num_deaths_summarized = summarize(num_deaths).loc[0].unstack().reindex(param_names).reindex(
         num_dalys_summarized.index
     )
@@ -571,6 +598,11 @@ def get_hr_salary(cadres):
             if s in scenario_groups[k]:
                 scenario_color[s] = scenario_groups_color[k]
 
+    best_scenarios_color = {'s_1': 'black'}
+    cmap_list = list(map(plt.get_cmap("Set1"), range(9)))
+    for i in range(9):
+        best_scenarios_color[num_dalys_summarized.index[i]] = cmap_list[i]
+
     # plot absolute numbers for scenarios
 
     name_of_plot = f'Deaths, {target_period()}'
@@ -591,6 +623,54 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
+    # plot yearly DALYs for best 9 scenarios
+    name_of_plot = f'Yearly DALYs, {target_period()}'
+    fig, ax = plt.subplots(figsize=(9, 6))
+    best_scenarios = list(num_dalys_summarized.index[0:9]) + ['s_1']
+    for s in best_scenarios:
+        data = (num_dalys_yearly_summarized.loc[num_dalys_yearly_summarized.scenario == s, :]
+                .drop(columns='scenario')
+                .pivot(index='year', columns='stat')
+                .droplevel(0, axis=1))
+        ax.plot(data.index, data['mean'] / 1e6, label=substitute_labels[s], color=best_scenarios_color[s])
+        # ax.fill_between(data.index.to_numpy(),
+        #                 (data['lower'] / 1e6).to_numpy(),
+        #                 (data['upper'] / 1e6).to_numpy(),
+        #                 color=best_scenarios_color[s],
+        #                 alpha=0.2)
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('(Millions)')
+    ax.set_xticks(data.index)
+    legend_labels = [substitute_labels[v] for v in best_scenarios]
+    legend_handles = [plt.Rectangle((0, 0), 1, 1,
+                                    color=best_scenarios_color[v]) for v in best_scenarios]
+    ax.legend(legend_handles, legend_labels, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5))
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # plot yearly staff count (Clinical/Pharmacy/Nursing and Midwifery) for best 9 scenarios
+    best_cadres = ['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']
+    name_of_plot = f'Yearly staff count for C+P+N, {target_period()}'
+    fig, ax = plt.subplots(figsize=(9, 6))
+    best_scenarios = list(num_dalys_summarized.index[0:9]) + ['s_1']
+    for s in best_scenarios:
+        data = staff_count.loc[staff_count.draw == s].set_index('year').drop(columns='draw').loc[:, best_cadres].sum(
+            axis=1)
+        ax.plot(data.index, data.values / 1e3, label=substitute_labels[s], color=best_scenarios_color[s])
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('(Thousands)')
+    ax.set_xticks(data.index)
+    legend_labels = [substitute_labels[v] for v in best_scenarios]
+    legend_handles = [plt.Rectangle((0, 0), 1, 1,
+                                    color=best_scenarios_color[v]) for v in best_scenarios]
+    ax.legend(legend_handles, legend_labels, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5))
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     name_of_plot = f'Services by appointment type, {target_period()}'
     num_appts_summarized_in_millions = num_appts_summarized / 1e6
     yerr_services = np.array([
@@ -637,6 +717,8 @@ def get_hr_salary(cadres):
 
     name_of_plot = f'Number of staff by cadre, {TARGET_PERIOD[1].year}'
     total_staff_to_plot = (staff_count_2029 / 1000).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
+    column_dcsa = total_staff_to_plot.pop('DCSA')
+    total_staff_to_plot.insert(3, "DCSA", column_dcsa)
     fig, ax = plt.subplots()
     total_staff_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
     ax.set_ylabel('Thousands', fontsize='small')

From cc5db6f97555ceb8c3532079dbe35a2cc02c28c8 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 26 Sep 2024 23:30:36 +0100
Subject: [PATCH 120/218] adjust figsize

---
 ...dsion_by_officer_type_with_extra_budget.py | 32 ++++++++++++-------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index b7e91ba06d..99e348f340 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -476,6 +476,16 @@ def get_hr_salary(cadres):
         ).T
     ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
+    num_deaths_averted_percent = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_deaths.loc[0],
+                comparison='s_1',
+                scaled=True)
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
     num_dalys_averted = summarize(
         -1.0 *
         pd.DataFrame(
@@ -677,7 +687,7 @@ def get_hr_salary(cadres):
         (num_services_summarized['mean'].values - num_services_summarized['lower']).values,
         (num_services_summarized['upper'].values - num_services_summarized['mean']).values,
     ])/1e6
-    fig, ax = plt.subplots()
+    fig, ax = plt.subplots(figsize=(9, 6))
     num_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
     ax.errorbar(range(len(param_names)), num_services_summarized['mean'].values / 1e6, yerr=yerr_services,
                 fmt=".", color="black", zorder=100)
@@ -719,7 +729,7 @@ def get_hr_salary(cadres):
     total_staff_to_plot = (staff_count_2029 / 1000).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
     column_dcsa = total_staff_to_plot.pop('DCSA')
     total_staff_to_plot.insert(3, "DCSA", column_dcsa)
-    fig, ax = plt.subplots()
+    fig, ax = plt.subplots(figsize=(9, 6))
     total_staff_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
     ax.set_ylabel('Thousands', fontsize='small')
     ax.set(xlabel=None)
@@ -735,7 +745,7 @@ def get_hr_salary(cadres):
 
     name_of_plot = f'Total budget in USD dollars by cadre, {target_period()}'
     total_cost_to_plot = (total_cost_all_yrs / 1e6).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
-    fig, ax = plt.subplots()
+    fig, ax = plt.subplots(figsize=(9, 6))
     total_cost_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
     ax.set_ylabel('Millions', fontsize='small')
     ax.set(xlabel=None)
@@ -755,7 +765,7 @@ def get_hr_salary(cadres):
         (num_dalys_summarized['mean'].values - num_dalys_summarized['lower']).values,
         (num_dalys_summarized['upper'].values - num_dalys_summarized['mean']).values,
     ])/1e6
-    fig, ax = plt.subplots()
+    fig, ax = plt.subplots(figsize=(9, 6))
     num_dalys_by_cause_summarized_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax)
     ax.errorbar(range(len(param_names)), num_dalys_summarized['mean'].values / 1e6, yerr=yerr_dalys,
                 fmt=".", color="black", zorder=100)
@@ -766,7 +776,7 @@ def get_hr_salary(cadres):
     fig.subplots_adjust(right=0.7)
     ax.legend(
         loc="center left",
-        bbox_to_anchor=(0.705, 0.520),
+        bbox_to_anchor=(0.750, 0.6),
         bbox_transform=fig.transFigure,
         title='Cause of death or injury',
         title_fontsize='x-small',
@@ -791,7 +801,7 @@ def get_hr_salary(cadres):
     plt.close(fig)
 
     name_of_plot = f'Deaths averted against no expansion, {target_period()}'
-    fig, ax = do_bar_plot_with_ci(num_deaths_averted / 1e6)
+    fig, ax = do_bar_plot_with_ci(num_deaths_averted / 1e6, num_deaths_averted_percent, annotation=True)
     ax.set_title(name_of_plot)
     ax.set_ylabel('(Millions)')
     fig.tight_layout()
@@ -802,7 +812,7 @@ def get_hr_salary(cadres):
     name_of_plot = f'Extra staff by cadre against no expansion, {TARGET_PERIOD[1].year}'
     extra_staff_by_cadre_to_plot = extra_staff_2029.drop(columns='all_cadres').reindex(
         num_dalys_summarized.index).drop(['s_1']) / 1e3
-    fig, ax = plt.subplots()
+    fig, ax = plt.subplots(figsize=(9, 6))
     extra_staff_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
     ax.set_ylabel('Thousands', fontsize='small')
     ax.set(xlabel=None)
@@ -819,7 +829,7 @@ def get_hr_salary(cadres):
     name_of_plot = f'Extra budget by cadre against no expansion, {target_period()}'
     extra_cost_by_cadre_to_plot = extra_cost_all_yrs.drop(columns='all_cadres').reindex(
         num_dalys_summarized.index).drop(index='s_1') / 1e6
-    fig, ax = plt.subplots()
+    fig, ax = plt.subplots(figsize=(9, 6))
     extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
     ax.set_ylabel('Millions', fontsize='small')
     ax.set(xlabel=None)
@@ -839,7 +849,7 @@ def get_hr_salary(cadres):
         (num_services_increased['mean'].values - num_services_increased['lower']).values,
         (num_services_increased['upper'].values - num_services_increased['mean']).values,
     ]) / 1e6
-    fig, ax = plt.subplots()
+    fig, ax = plt.subplots(figsize=(9, 6))
     num_appts_increased_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
     ax.errorbar(range(len(param_names)-1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
                 fmt=".", color="black", zorder=100)
@@ -887,7 +897,7 @@ def get_hr_salary(cadres):
         (num_dalys_averted['mean'].values - num_dalys_averted['lower']).values,
         (num_dalys_averted['upper'].values - num_dalys_averted['mean']).values,
     ]) / 1e6
-    fig, ax = plt.subplots()
+    fig, ax = plt.subplots(figsize=(9, 6))
     num_dalys_by_cause_averted_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax)
     ax.errorbar(range(len(param_names)-1), num_dalys_averted['mean'].values / 1e6, yerr=yerr_dalys,
                 fmt=".", color="black", zorder=100)
@@ -898,7 +908,7 @@ def get_hr_salary(cadres):
     fig.subplots_adjust(right=0.7)
     ax.legend(
         loc="center left",
-        bbox_to_anchor=(0.705, 0.520),
+        bbox_to_anchor=(0.750, 0.6),
         bbox_transform=fig.transFigure,
         title='Cause of death or injury',
         title_fontsize='x-small',

From 3327b2ce1b526027d9a8ee0d942155e3e0526029 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 26 Sep 2024 23:43:58 +0100
Subject: [PATCH 121/218] reverse legend order

---
 ...is_hr_expandsion_by_officer_type_with_extra_budget.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 99e348f340..e21c4a08f2 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -812,6 +812,8 @@ def get_hr_salary(cadres):
     name_of_plot = f'Extra staff by cadre against no expansion, {TARGET_PERIOD[1].year}'
     extra_staff_by_cadre_to_plot = extra_staff_2029.drop(columns='all_cadres').reindex(
         num_dalys_summarized.index).drop(['s_1']) / 1e3
+    column_dcsa = extra_staff_by_cadre_to_plot.pop('DCSA')
+    extra_staff_by_cadre_to_plot.insert(3, "DCSA", column_dcsa)
     fig, ax = plt.subplots(figsize=(9, 6))
     extra_staff_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
     ax.set_ylabel('Thousands', fontsize='small')
@@ -819,7 +821,7 @@ def get_hr_salary(cadres):
     xtick_labels = [substitute_labels[v] for v in extra_staff_by_cadre_to_plot.index]
     ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-               fontsize='small')
+               fontsize='small', reverse=True)
     plt.title(name_of_plot)
     fig.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
@@ -836,7 +838,7 @@ def get_hr_salary(cadres):
     xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
     ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-               fontsize='small')
+               fontsize='small', reverse=True)
     plt.title(name_of_plot)
     fig.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
@@ -913,7 +915,8 @@ def get_hr_salary(cadres):
         title='Cause of death or injury',
         title_fontsize='x-small',
         fontsize='x-small',
-        ncol=1
+        ncol=1,
+        reverse=True
     )
     plt.title(name_of_plot)
     fig.tight_layout()

From 5c526051da49e0ef723779a9ec9010e802d12563 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 26 Sep 2024 23:53:56 +0100
Subject: [PATCH 122/218] minor ajdust

---
 ...lysis_hr_expandsion_by_officer_type_with_extra_budget.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index e21c4a08f2..b072837c09 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -745,6 +745,8 @@ def get_hr_salary(cadres):
 
     name_of_plot = f'Total budget in USD dollars by cadre, {target_period()}'
     total_cost_to_plot = (total_cost_all_yrs / 1e6).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
+    column_dcsa = total_cost_to_plot.pop('DCSA')
+    total_cost_to_plot.insert(3, "DCSA", column_dcsa)
     fig, ax = plt.subplots(figsize=(9, 6))
     total_cost_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
     ax.set_ylabel('Millions', fontsize='small')
@@ -831,6 +833,8 @@ def get_hr_salary(cadres):
     name_of_plot = f'Extra budget by cadre against no expansion, {target_period()}'
     extra_cost_by_cadre_to_plot = extra_cost_all_yrs.drop(columns='all_cadres').reindex(
         num_dalys_summarized.index).drop(index='s_1') / 1e6
+    column_dcsa = extra_cost_by_cadre_to_plot.pop('DCSA')
+    extra_cost_by_cadre_to_plot.insert(3, "DCSA", column_dcsa)
     fig, ax = plt.subplots(figsize=(9, 6))
     extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
     ax.set_ylabel('Millions', fontsize='small')
@@ -860,7 +864,7 @@ def get_hr_salary(cadres):
     xtick_labels = [substitute_labels[v] for v in num_appts_increased_in_millions.index]
     ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
-               fontsize='small')
+               fontsize='small', reverse=True)
     plt.title(name_of_plot)
     fig.tight_layout()
     fig.savefig(make_graph_file_name(

From db27d5a6d94ae94eda8dcaaae52f7fa2b9561d92 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 2 Oct 2024 15:18:47 +0100
Subject: [PATCH 123/218] update scenario names

---
 ...dsion_by_officer_type_with_extra_budget.py | 56 ++++++++++++++++---
 1 file changed, 47 insertions(+), 9 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index b072837c09..339dac1c45 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -29,14 +29,16 @@
 # rename scenarios
 substitute_labels = {
     's_1': 'no_expansion',
-    's_2': 'all_expansion_current',
-    's_3': 'all_expansion_equal',
-    's_4': 'Clinical', 's_5': 'DCSA', 's_6': 'Nursing_and_Midwifery', 's_7': 'Pharmacy', 's_8': 'Other',
-    's_9': 'CD_equal', 's_10': 'CN_equal', 's_11': 'CP_equal', 's_12': 'CO_equal', 's_13': 'DN_equal',
-    's_14': 'DP_equal', 's_15': 'DO_equal', 's_16': 'NP_equal', 's_17': 'NO_equal', 's_18': 'PO_equal',
-    's_19': 'CDN_equal', 's_20': 'CDP_equal', 's_21': 'CDO_equal', 's_22': 'CNP_equal', 's_23': 'CNO_equal',
-    's_24': 'CPO_equal', 's_25': 'DNP_equal', 's_26': 'DNO_equal', 's_27': 'DPO_equal', 's_28': 'NPO_equal',
-    's_29': 'CDNP_equal', 's_30': 'CDNO_equal', 's_31': 'CDPO_equal', 's_32': 'CNPO_equal', 's_33': 'DNPO_equal',
+    's_2': 'all_cadres_current_allocation',
+    's_3': 'all_cadres_equal_allocation',
+    's_4': 'Clinical (C)', 's_5': 'DCSA (D)', 's_6': 'Nursing_and_Midwifery (N&M)', 's_7': 'Pharmacy (P)',
+    's_8': 'Other (O)',
+    's_9': 'C + D', 's_10': 'C + N&M', 's_11': 'C + P', 's_12': 'C + O', 's_13': 'D + N&M',
+    's_14': 'D + P', 's_15': 'D + O', 's_16': 'N&M + P', 's_17': 'N&M + O', 's_18': 'P + O',
+    's_19': 'C + D + N&M', 's_20': 'C + D + P', 's_21': 'C + D + O', 's_22': 'C + N&M + P', 's_23': 'C + N&M + O',
+    's_24': 'C + P + O', 's_25': 'D + N&M + P', 's_26': 'D + N&M + O', 's_27': 'D + P + O', 's_28': 'N&M + P + O',
+    's_29': 'C + D + N&M + P', 's_30': 'C + D + N&M + O', 's_31': 'C + D + P + O', 's_32': 'C + N&M + P + O',
+    's_33': 'D + N&M + P + O',
 }
 
 # group scenarios for presentation
@@ -467,6 +469,15 @@ def get_hr_salary(cadres):
         ).T
     ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
+    # num_services_increased_percent = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             num_services.loc[0],
+    #             comparison='s_1',
+    #             scaled=True)
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
     num_deaths_averted = summarize(
         -1.0 *
         pd.DataFrame(
@@ -514,6 +525,15 @@ def get_hr_salary(cadres):
         only_mean=True
     ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
+    # num_dalys_by_cause_averted_percent = summarize(
+    #     -1.0 * find_difference_relative_to_comparison_dataframe(
+    #         num_dalys_by_cause,
+    #         comparison='s_1',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
     num_appts_increased = summarize(
         find_difference_relative_to_comparison_dataframe(
             num_appts,
@@ -522,6 +542,15 @@ def get_hr_salary(cadres):
         only_mean=True
     ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
+    # num_appts_increased_percent = summarize(
+    #     find_difference_relative_to_comparison_dataframe(
+    #         num_appts,
+    #         comparison='s_1',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
     num_treatments_increased = summarize(
         find_difference_relative_to_comparison_dataframe(
             num_treatments,
@@ -538,6 +567,15 @@ def get_hr_salary(cadres):
         ).T
     ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
+    # num_treatments_total_increased_percent = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             num_treatments_total.loc[0],
+    #             comparison='s_1',
+    #             scaled=True)
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
     # Check that when we sum across the causes/appt types,
     # we get the same total as calculated when we didn't split by cause/appt type.
     assert (
@@ -642,7 +680,7 @@ def get_hr_salary(cadres):
                 .drop(columns='scenario')
                 .pivot(index='year', columns='stat')
                 .droplevel(0, axis=1))
-        ax.plot(data.index, data['mean'] / 1e6, label=substitute_labels[s], color=best_scenarios_color[s])
+        ax.plot(data.index, data['mean'] / 1e6, label=substitute_labels[s], color=best_scenarios_color[s], linewidth=2)
         # ax.fill_between(data.index.to_numpy(),
         #                 (data['lower'] / 1e6).to_numpy(),
         #                 (data['upper'] / 1e6).to_numpy(),

From 15a69a845924e3a7c215e85ea46d0f4075a02e98 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 2 Oct 2024 17:34:11 +0100
Subject: [PATCH 124/218] try plot 3D plots using 4D data

---
 ...dsion_by_officer_type_with_extra_budget.py | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 339dac1c45..fadba88e3f 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -349,12 +349,16 @@ def get_hr_salary(cadres):
     # get extra count = staff count - staff count of no expansion s_1
     # note that annual staff increase rate = scale up factor - 1
     extra_staff = staff_count.copy()
+    extra_staff_percent = staff_count.copy()
     for i in staff_count.index:
         extra_staff.iloc[i, 2:] = staff_count.iloc[i, 2:] - staff_count.iloc[0, 2:]
+        extra_staff_percent.iloc[i, 2:] = (staff_count.iloc[i, 2:] - staff_count.iloc[0, 2:]) / staff_count.iloc[0, 2:]
 
     extra_staff_2029 = extra_staff.loc[extra_staff.year == 2029, :].drop(columns='year').set_index('draw').drop(
         index='s_1'
     )
+    extra_staff_percent_2029 = extra_staff_percent.loc[extra_staff_percent.year == 2029, :].drop(
+        columns='year').set_index('draw').drop(index='s_1')
     staff_count_2029 = staff_count.loc[staff_count.year == 2029, :].drop(columns='year').set_index('draw')
 
     # check total cost calculated is increased as expected
@@ -651,6 +655,25 @@ def get_hr_salary(cadres):
     for i in range(9):
         best_scenarios_color[num_dalys_summarized.index[i]] = cmap_list[i]
 
+    # plot 4D data: relative increases of Clinical, Pharmacy, and Nursing_and_Midwifery as three coordinates,\
+    # percentage of DALYs averted decides the color of that scatter point
+    heat_data = pd.merge(num_dalys_averted_percent['mean'],
+                         extra_staff_percent_2029[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
+                         left_index=True, right_index=True, how='inner')
+    scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    fig = plt.figure()
+    ax = fig.add_subplot(111, projection='3d')
+    img = ax.scatter(heat_data['Clinical'], heat_data['Pharmacy'], heat_data['Nursing_and_Midwifery'],
+                     marker='o', s=heat_data['mean'] * 2000,
+                     c=heat_data['mean'] * 100, cmap='viridis', alpha=0.5)
+    ax.set_xlabel('relative increase of Clinical cadre')
+    ax.set_ylabel('Pharmacy cadre')
+    ax.set_zlabel('Nursing and Midwifery')
+    plt.colorbar(img, orientation='horizontal', fraction=0.046, pad=0.15)
+    plt.title('DALYs averted (%) against no expansion, 2019-2029')
+    plt.show()
+
     # plot absolute numbers for scenarios
 
     name_of_plot = f'Deaths, {target_period()}'

From 2241d46958cc2b680fba51da3c85f884069fd79d Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 4 Oct 2024 09:32:25 +0100
Subject: [PATCH 125/218] update the 3D plot

---
 ...dsion_by_officer_type_with_extra_budget.py | 39 +++++++++++++++----
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index fadba88e3f..a66d645580 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -13,6 +13,10 @@
 import pandas as pd
 from matplotlib import pyplot as plt
 
+from scripts.healthsystem.impact_of_hcw_capabilities_expansion.prepare_minute_salary_and_extra_budget_frac_data import (
+    extra_budget_fracs,
+)
+
 from scripts.healthsystem.impact_of_hcw_capabilities_expansion.scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget import (
     HRHExpansionByCadreWithExtraBudget,
 )
@@ -657,22 +661,43 @@ def get_hr_salary(cadres):
 
     # plot 4D data: relative increases of Clinical, Pharmacy, and Nursing_and_Midwifery as three coordinates,\
     # percentage of DALYs averted decides the color of that scatter point
+    extra_budget_allocation = extra_budget_fracs.T
     heat_data = pd.merge(num_dalys_averted_percent['mean'],
-                         extra_staff_percent_2029[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
+                         #extra_staff_percent_2029[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
+                         extra_budget_allocation[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
                          left_index=True, right_index=True, how='inner')
     scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
     heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    name_of_plot = f'DALYs averted (%) against no expansion, {target_period()}'
     fig = plt.figure()
     ax = fig.add_subplot(111, projection='3d')
     img = ax.scatter(heat_data['Clinical'], heat_data['Pharmacy'], heat_data['Nursing_and_Midwifery'],
-                     marker='o', s=heat_data['mean'] * 2000,
-                     c=heat_data['mean'] * 100, cmap='viridis', alpha=0.5)
-    ax.set_xlabel('relative increase of Clinical cadre')
+                     alpha=0.8, marker='o', #s=heat_data['mean'] * 2000,
+                     c=heat_data['mean'] * 100, cmap='viridis')
+    # plot lines from the best point to three axes panes
+    ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]],
+              [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]],
+              [0, heat_data['Nursing_and_Midwifery'][0]],
+              linestyle='--', color='gray', alpha=0.8)
+    ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]],
+              [0, heat_data['Pharmacy'][0]],
+              [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]],
+              linestyle='--', color='gray', alpha=0.8)
+    ax.plot3D([0, heat_data['Clinical'][0]],
+              [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]],
+              [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]],
+              linestyle='--', color='gray', alpha=0.8)
+    ax.set_xlabel('Fraction of extra budget allocated to \nClinical cadre')
     ax.set_ylabel('Pharmacy cadre')
+    #ax.invert_yaxis()
     ax.set_zlabel('Nursing and Midwifery')
-    plt.colorbar(img, orientation='horizontal', fraction=0.046, pad=0.15)
-    plt.title('DALYs averted (%) against no expansion, 2019-2029')
-    plt.show()
+    ax.plot3D([0, 1], [0, 1], [0, 1], linestyle='--', color='red', alpha=0.3, label='the line x=y=z')
+    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2))
+    plt.colorbar(img, orientation='horizontal', fraction=0.046, pad=0.25)
+    plt.title(name_of_plot)
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
 
     # plot absolute numbers for scenarios
 

From eb9bac2889639e59bf9b424b4ff930fa235da79e Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 4 Oct 2024 14:52:43 +0100
Subject: [PATCH 126/218] try multiple linear regression

---
 ...dsion_by_officer_type_with_extra_budget.py | 23 +++++++++++++++----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index a66d645580..1f0aace825 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -20,6 +20,7 @@
 from scripts.healthsystem.impact_of_hcw_capabilities_expansion.scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget import (
     HRHExpansionByCadreWithExtraBudget,
 )
+import statsmodels.api as sm
 from tlo import Date
 from tlo.analysis.utils import (
     APPT_TYPE_TO_COARSE_APPT_TYPE_MAP,
@@ -353,16 +354,12 @@ def get_hr_salary(cadres):
     # get extra count = staff count - staff count of no expansion s_1
     # note that annual staff increase rate = scale up factor - 1
     extra_staff = staff_count.copy()
-    extra_staff_percent = staff_count.copy()
     for i in staff_count.index:
         extra_staff.iloc[i, 2:] = staff_count.iloc[i, 2:] - staff_count.iloc[0, 2:]
-        extra_staff_percent.iloc[i, 2:] = (staff_count.iloc[i, 2:] - staff_count.iloc[0, 2:]) / staff_count.iloc[0, 2:]
 
     extra_staff_2029 = extra_staff.loc[extra_staff.year == 2029, :].drop(columns='year').set_index('draw').drop(
         index='s_1'
     )
-    extra_staff_percent_2029 = extra_staff_percent.loc[extra_staff_percent.year == 2029, :].drop(
-        columns='year').set_index('draw').drop(index='s_1')
     staff_count_2029 = staff_count.loc[staff_count.year == 2029, :].drop(columns='year').set_index('draw')
 
     # check total cost calculated is increased as expected
@@ -663,7 +660,6 @@ def get_hr_salary(cadres):
     # percentage of DALYs averted decides the color of that scatter point
     extra_budget_allocation = extra_budget_fracs.T
     heat_data = pd.merge(num_dalys_averted_percent['mean'],
-                         #extra_staff_percent_2029[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
                          extra_budget_allocation[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
                          left_index=True, right_index=True, how='inner')
     scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
@@ -699,6 +695,23 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
+    # do some linear regression to see the marginal effects of individual cadres and combined effects of C, N, P cadres
+    regression_data = pd.merge(num_dalys_averted_percent['mean'],
+                               extra_budget_allocation,
+                               left_index=True, right_index=True, how='inner')
+    regression_data['C*P'] = regression_data['Clinical'] * regression_data['Pharmacy']
+    regression_data['C*N'] = regression_data['Clinical'] * regression_data['Nursing_and_Midwifery']
+    regression_data['N*P'] = regression_data['Pharmacy'] * regression_data['Nursing_and_Midwifery']
+    regression_data['C*N*P'] = (regression_data['Clinical'] * regression_data['Pharmacy']
+                                * regression_data['Nursing_and_Midwifery'])
+    cadres_to_drop_due_to_multicollinearity = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography']
+    regression_data.drop(columns=cadres_to_drop_due_to_multicollinearity, inplace=True)
+    predictor = regression_data[regression_data.columns[1:]]
+    outcome = regression_data['mean']
+    predictor = sm.add_constant(predictor)
+    est = sm.OLS(outcome.astype(float), predictor.astype(float)).fit()
+    print(est.summary())
+
     # plot absolute numbers for scenarios
 
     name_of_plot = f'Deaths, {target_period()}'

From 7f4cf85be7da71e1a3a91b2655b6f7797f945aba Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 7 Oct 2024 19:24:21 +0100
Subject: [PATCH 127/218] try extract more results and plot to understand the
 results

---
 ...dsion_by_officer_type_with_extra_budget.py | 373 +++++++++++++++---
 1 file changed, 310 insertions(+), 63 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 1f0aace825..bf69875a23 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -47,7 +47,7 @@
 }
 
 # group scenarios for presentation
-scenario_groups = {
+scenario_groups_init = {
     'no_expansion': {'s_1'},
     'all_cadres_expansion': {'s_2', 's_3'},
     'one_cadre_expansion': {'s_4', 's_5', 's_6', 's_7', 's_8'},
@@ -58,6 +58,14 @@
     'four_cadres_expansion': {'s_29', 's_30', 's_31', 's_32', 's_33'}
 }
 
+# group scenarios based on whether expand Clinical/Pharmacy
+scenario_groups = {
+    'C + P + D/N&M/O/None': {'s_2', 's_3', 's_11', 's_20', 's_22', 's_24', 's_29', 's_31', 's_32'},
+    'C + D/N&M/O/None': {'s_4', 's_9', 's_10', 's_12', 's_19', 's_21', 's_23', 's_30'},
+    'P + D/N&M/O/None': {'s_7', 's_14', 's_16', 's_18', 's_25', 's_27', 's_28', 's_33'},
+    'D/N&M/O/None': {'s_5', 's_6', 's_8', 's_13', 's_15', 's_17', 's_26', 's_1'}
+}
+
 
 def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None,
           the_target_period: Tuple[Date, Date] = None):
@@ -294,6 +302,13 @@ def get_hr_salary(cadres):
         ].set_index('Officer_Category').T
         return salary[cadres]
 
+    # def get_hcw_time_usage(_df):
+    #     """Return the number of treatments by short treatment id (total within the TARGET_PERIOD)"""
+    #     _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'TREATMENT_ID'].apply(pd.Series).sum()
+    #     _df.index = _df.index.map(lambda x: x.split('_')[0] + "*")
+    #     _df = _df.groupby(level=0).sum()
+    #     return _df
+
     # Get parameter/scenario names
     param_names = get_parameter_names_from_scenario_file()
 
@@ -407,7 +422,7 @@ def get_hr_salary(cadres):
     num_appts = extract_results(
         results_folder,
         module='tlo.methods.healthsystem.summary',
-        key='HSI_Event',
+        key='HSI_Event_non_blank_appt_footprint',
         custom_generate_series=get_num_appts,
         do_scaling=True
         ).pipe(set_param_names_as_column_index_level_0)
@@ -415,7 +430,7 @@ def get_hr_salary(cadres):
     num_services = extract_results(
         results_folder,
         module='tlo.methods.healthsystem.summary',
-        key='HSI_Event',
+        key='HSI_Event_non_blank_appt_footprint',
         custom_generate_series=get_num_services,
         do_scaling=True
     ).pipe(set_param_names_as_column_index_level_0)
@@ -423,7 +438,7 @@ def get_hr_salary(cadres):
     num_treatments = extract_results(
         results_folder,
         module='tlo.methods.healthsystem.summary',
-        key='HSI_Event',
+        key='HSI_Event_non_blank_appt_footprint',
         custom_generate_series=get_num_treatments,
         do_scaling=True
     ).pipe(set_param_names_as_column_index_level_0)
@@ -431,11 +446,56 @@ def get_hr_salary(cadres):
     num_treatments_total = extract_results(
         results_folder,
         module='tlo.methods.healthsystem.summary',
-        key='HSI_Event',
+        key='HSI_Event_non_blank_appt_footprint',
         custom_generate_series=get_num_treatments_total,
         do_scaling=True
     ).pipe(set_param_names_as_column_index_level_0)
 
+    num_never_ran_appts = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Never_ran_HSI_Event',
+        custom_generate_series=get_num_appts,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_never_ran_services = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Never_ran_HSI_Event',
+        custom_generate_series=get_num_services,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_never_ran_treatments_total = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Never_ran_HSI_Event',
+        custom_generate_series=get_num_treatments_total,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_never_ran_treatments = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Never_ran_HSI_Event',
+        custom_generate_series=get_num_treatments,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    assert len(num_services) == len(num_never_ran_services) == 1
+    assert (num_services.columns == num_never_ran_services.columns).all()
+    num_services_demand = num_services + num_never_ran_services
+    ratio_services = num_services / num_services_demand
+
+    # hcw_time_usage = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthsystem.summary',
+    #     key='Capacity',#'Capacity_By_OfficerType_And_FacilityLevel',
+    #     custom_generate_series=get_hcw_time_usage,
+    #     do_scaling=False
+    # ).pipe(set_param_names_as_column_index_level_0)
+
     # get absolute numbers for scenarios
     # sort the scenarios according to their DALYs values, in ascending order
     num_dalys_summarized = summarize(num_dalys).loc[0].unstack().reindex(param_names).sort_values(by='mean')
@@ -465,6 +525,25 @@ def get_hr_salary(cadres):
         num_dalys_summarized.index
     )
 
+    num_never_ran_services_summarized = summarize(num_never_ran_services).loc[0].unstack().reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_never_ran_appts_summarized = summarize(num_never_ran_appts, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_never_ran_treatments_summarized = summarize(num_never_ran_treatments, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_never_ran_treatments_total_summarized = summarize(num_never_ran_treatments_total).loc[0].unstack().reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_service_demand_summarized = summarize(num_services_demand).loc[0].unstack().reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    ratio_service_summarized = summarize(ratio_services).loc[0].unstack().reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+
     # get relative numbers for scenarios, compared to no_expansion scenario: s_1
     num_services_increased = summarize(
         pd.DataFrame(
@@ -474,14 +553,14 @@ def get_hr_salary(cadres):
         ).T
     ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
-    # num_services_increased_percent = summarize(
-    #     pd.DataFrame(
-    #         find_difference_relative_to_comparison_series(
-    #             num_services.loc[0],
-    #             comparison='s_1',
-    #             scaled=True)
-    #     ).T
-    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+    num_services_increased_percent = summarize(
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_services.loc[0],
+                comparison='s_1',
+                scaled=True)
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_deaths_averted = summarize(
         -1.0 *
@@ -530,14 +609,14 @@ def get_hr_salary(cadres):
         only_mean=True
     ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
-    # num_dalys_by_cause_averted_percent = summarize(
-    #     -1.0 * find_difference_relative_to_comparison_dataframe(
-    #         num_dalys_by_cause,
-    #         comparison='s_1',
-    #         scaled=True
-    #     ),
-    #     only_mean=True
-    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+    num_dalys_by_cause_averted_percent = summarize(
+        -1.0 * find_difference_relative_to_comparison_dataframe(
+            num_dalys_by_cause,
+            comparison='s_1',
+            scaled=True
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_appts_increased = summarize(
         find_difference_relative_to_comparison_dataframe(
@@ -547,14 +626,14 @@ def get_hr_salary(cadres):
         only_mean=True
     ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
-    # num_appts_increased_percent = summarize(
-    #     find_difference_relative_to_comparison_dataframe(
-    #         num_appts,
-    #         comparison='s_1',
-    #         scaled=True
-    #     ),
-    #     only_mean=True
-    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+    num_appts_increased_percent = summarize(
+        find_difference_relative_to_comparison_dataframe(
+            num_appts,
+            comparison='s_1',
+            scaled=True
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_treatments_increased = summarize(
         find_difference_relative_to_comparison_dataframe(
@@ -564,6 +643,15 @@ def get_hr_salary(cadres):
         only_mean=True
     ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
+    num_treatments_increased_percent = summarize(
+        find_difference_relative_to_comparison_dataframe(
+            num_treatments,
+            comparison='s_1',
+            scaled=True
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
     num_treatments_total_increased = summarize(
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
@@ -572,14 +660,31 @@ def get_hr_salary(cadres):
         ).T
     ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
-    # num_treatments_total_increased_percent = summarize(
-    #     pd.DataFrame(
-    #         find_difference_relative_to_comparison_series(
-    #             num_treatments_total.loc[0],
-    #             comparison='s_1',
-    #             scaled=True)
-    #     ).T
-    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+    num_treatments_total_increased_percent = summarize(
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_treatments_total.loc[0],
+                comparison='s_1',
+                scaled=True)
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    service_ratio_increased = summarize(
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                ratio_services.loc[0],
+                comparison='s_1')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    service_ratio_increased_percent = summarize(
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                ratio_services.loc[0],
+                comparison='s_1',
+                scaled=True)
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
     # Check that when we sum across the causes/appt types,
     # we get the same total as calculated when we didn't split by cause/appt type.
@@ -637,7 +742,7 @@ def get_hr_salary(cadres):
         'Nutrition': 'thistle',
         'Radiography': 'lightgray',
     }
-    scenario_groups_color = {
+    scenario_groups_color_init = {
         'no_expansion': 'gray',
         'one_cadre_expansion': 'lightpink',
         'two_cadres_expansion': 'violet',
@@ -645,6 +750,13 @@ def get_hr_salary(cadres):
         'four_cadres_expansion': 'paleturquoise',
         'all_cadres_expansion': 'darkturquoise'
     }
+    scenario_groups_color = {
+        'D/N&M/O/None': 'lightpink',
+        'P + D/N&M/O/None': 'violet',
+        'C + D/N&M/O/None': 'darkorchid',
+        'C + P + D/N&M/O/None': 'darkturquoise',
+    }
+
     scenario_color = {}
     for s in param_names:
         for k in scenario_groups_color.keys():
@@ -659,44 +771,152 @@ def get_hr_salary(cadres):
     # plot 4D data: relative increases of Clinical, Pharmacy, and Nursing_and_Midwifery as three coordinates,\
     # percentage of DALYs averted decides the color of that scatter point
     extra_budget_allocation = extra_budget_fracs.T
+    name_of_plot = f'Dalys averted (%) against no expansion, {target_period()}'
     heat_data = pd.merge(num_dalys_averted_percent['mean'],
                          extra_budget_allocation[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
                          left_index=True, right_index=True, how='inner')
-    scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
-    heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
-    name_of_plot = f'DALYs averted (%) against no expansion, {target_period()}'
+    # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    colors = [scenario_color[s] for s in heat_data.index]
     fig = plt.figure()
     ax = fig.add_subplot(111, projection='3d')
     img = ax.scatter(heat_data['Clinical'], heat_data['Pharmacy'], heat_data['Nursing_and_Midwifery'],
-                     alpha=0.8, marker='o', #s=heat_data['mean'] * 2000,
-                     c=heat_data['mean'] * 100, cmap='viridis')
+                     alpha=0.8, marker='o', s=heat_data['mean'] * 2000,
+                     #c=heat_data['mean'] * 100, cmap='viridis',
+                     c=colors)
     # plot lines from the best point to three axes panes
-    ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]],
-              [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]],
-              [0, heat_data['Nursing_and_Midwifery'][0]],
-              linestyle='--', color='gray', alpha=0.8)
-    ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]],
-              [0, heat_data['Pharmacy'][0]],
-              [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]],
-              linestyle='--', color='gray', alpha=0.8)
-    ax.plot3D([0, heat_data['Clinical'][0]],
-              [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]],
-              [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]],
-              linestyle='--', color='gray', alpha=0.8)
-    ax.set_xlabel('Fraction of extra budget allocated to \nClinical cadre')
-    ax.set_ylabel('Pharmacy cadre')
-    #ax.invert_yaxis()
-    ax.set_zlabel('Nursing and Midwifery')
-    ax.plot3D([0, 1], [0, 1], [0, 1], linestyle='--', color='red', alpha=0.3, label='the line x=y=z')
-    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2))
-    plt.colorbar(img, orientation='horizontal', fraction=0.046, pad=0.25)
+    # ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]],
+    #           [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]],
+    #           [0, heat_data['Nursing_and_Midwifery'][0]],
+    #           linestyle='--', color='gray', alpha=0.8)
+    # ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]],
+    #           [0, heat_data['Pharmacy'][0]],
+    #           [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]],
+    #           linestyle='--', color='gray', alpha=0.8)
+    # ax.plot3D([0, heat_data['Clinical'][0]],
+    #           [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]],
+    #           [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]],
+    #           linestyle='--', color='gray', alpha=0.8)
+    ax.set_xlabel('Fraction of extra budget allocated to \nClinical cadre (C)')
+    ax.set_ylabel('Pharmacy cadre (P)')
+    #ax.invert_xaxis()
+    ax.invert_yaxis()
+    ax.set_zlabel('Nursing and Midwifery (N&M)')
+    ax.plot3D([0, 1], [0, 1], [0, 1], linestyle='-', color='orange', alpha=1.0, linewidth=2)
+    legend_labels = list(scenario_groups_color.keys()) + ['line of C = P = N&M']
+    legend_handles = [plt.Line2D([0, 0], [0, 0],
+                                 linestyle='none', marker='o', color=scenario_groups_color[label]
+                                 ) for label in legend_labels[0:len(legend_labels) - 1]
+                      ] + [plt.Line2D([0, 1], [0, 0], linestyle='-', color='orange')]
+    plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
+    # plt.colorbar(img, orientation='horizontal', fraction=0.046, pad=0.25)
+    plt.title(name_of_plot)
+    plt.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'1 Dalys averted, Services increased and Treatment increased, {target_period()}'
+    heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+                           num_treatments_total_increased_percent['mean']], axis=1)
+    # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    colors = [scenario_color[s] for s in heat_data.index]
+    fig = plt.figure()
+    ax = fig.add_subplot(111, projection='3d')
+    ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 2], 100 * heat_data.iloc[:, 0],
+               alpha=0.8, marker='o',
+               c=colors)
+    ax.set_xlabel('Services increased %')
+    ax.set_ylabel('Treatments increased %')
+    ax.set_zlabel('DALYs averted %')
+    legend_labels = list(scenario_groups_color.keys())
+    legend_handles = [plt.Line2D([0, 0], [0, 0],
+                                 linestyle='none', marker='o', color=scenario_groups_color[label]
+                                 ) for label in legend_labels
+                      ]
+    plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
     plt.title(name_of_plot)
+    plt.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'0 Dalys averted, Services increased and Treatment increased, {target_period()}'
+    heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+                           num_treatments_total_increased_percent['mean']], axis=1)
+    # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    colors = [scenario_color[s] for s in heat_data.index]
+    fig, ax = plt.subplots()
+    ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 2],
+               alpha=0.8, marker='o', s=2000 * heat_data.iloc[:, 0],
+               c=colors)
+    ax.set_xlabel('Services increased %')
+    ax.set_ylabel('Treatments increased %')
+    legend_labels = list(scenario_groups_color.keys())
+    legend_handles = [plt.Line2D([0, 0], [0, 0],
+                                 linestyle='none', marker='o', color=scenario_groups_color[label]
+                                 ) for label in legend_labels
+                      ]
+    plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
+    plt.title(name_of_plot)
+    plt.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Dalys averted and Services increased, {target_period()}'
+    heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+                           num_treatments_total_increased_percent['mean']], axis=1)
+    # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    colors = [scenario_color[s] for s in heat_data.index]
+    fig, ax = plt.subplots()
+    ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0],
+               alpha=0.8, marker='o', c=colors)
+    ax.set_xlabel('Services increased %')
+    ax.set_ylabel('DLAYs averted %')
+    legend_labels = list(scenario_groups_color.keys())
+    legend_handles = [plt.Line2D([0, 0], [0, 0],
+                                 linestyle='none', marker='o', color=scenario_groups_color[label]
+                                 ) for label in legend_labels
+                      ]
+    plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
+    plt.title(name_of_plot)
+    plt.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Dalys averted and Treatments increased, {target_period()}'
+    heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+                           num_treatments_total_increased_percent['mean']], axis=1)
+    # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    colors = [scenario_color[s] for s in heat_data.index]
+    fig, ax = plt.subplots()
+    ax.scatter(100 * heat_data.iloc[:, 2], 100 * heat_data.iloc[:, 0],
+               alpha=0.8, marker='o', c=colors)
+    ax.set_xlabel('Treatments increased %')
+    ax.set_ylabel('DLAYs averted %')
+    legend_labels = list(scenario_groups_color.keys())
+    legend_handles = [plt.Line2D([0, 0], [0, 0],
+                                 linestyle='none', marker='o', color=scenario_groups_color[label]
+                                 ) for label in legend_labels
+                      ]
+    plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
+    plt.title(name_of_plot)
+    plt.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
     fig.show()
     plt.close(fig)
 
     # do some linear regression to see the marginal effects of individual cadres and combined effects of C, N, P cadres
-    regression_data = pd.merge(num_dalys_averted_percent['mean'],
+    outcome_data = num_dalys_averted_percent['mean']
+    # outcome = num_services_increased_percent['mean']
+    # outcome = num_treatments_total_increased_percent['mean']
+    regression_data = pd.merge(outcome_data,
                                extra_budget_allocation,
                                left_index=True, right_index=True, how='inner')
     regression_data['C*P'] = regression_data['Clinical'] * regression_data['Pharmacy']
@@ -732,6 +952,24 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'Service demand, {target_period()}'
+    fig, ax = do_bar_plot_with_ci(num_service_demand_summarized / 1e6)
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('(Millions)')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Service delivery ratio, {target_period()}'
+    fig, ax = do_bar_plot_with_ci(ratio_service_summarized)
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('services delivered / demand')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     # plot yearly DALYs for best 9 scenarios
     name_of_plot = f'Yearly DALYs, {target_period()}'
     fig, ax = plt.subplots(figsize=(9, 6))
@@ -910,6 +1148,15 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'Service delivery ratio against no expansion, {target_period()}'
+    fig, ax = do_bar_plot_with_ci(service_ratio_increased * 100, service_ratio_increased_percent, annotation=True)
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('Percentage')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     name_of_plot = f'Extra staff by cadre against no expansion, {TARGET_PERIOD[1].year}'
     extra_staff_by_cadre_to_plot = extra_staff_2029.drop(columns='all_cadres').reindex(
         num_dalys_summarized.index).drop(['s_1']) / 1e3

From 2fcb3c4623a9204221f9f6e50966f720e1e27549 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 7 Oct 2024 23:03:21 +0100
Subject: [PATCH 128/218] anova analysis and more plot

---
 ...dsion_by_officer_type_with_extra_budget.py | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index bf69875a23..d6cfbfb328 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -21,6 +21,7 @@
     HRHExpansionByCadreWithExtraBudget,
 )
 import statsmodels.api as sm
+import statsmodels.stats as ss
 from tlo import Date
 from tlo.analysis.utils import (
     APPT_TYPE_TO_COARSE_APPT_TYPE_MAP,
@@ -912,6 +913,28 @@ def get_hr_salary(cadres):
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'Dalys averted and Services ratio increased, {target_period()}'
+    heat_data = pd.concat([num_dalys_averted_percent['mean'], service_ratio_increased_percent['mean']], axis=1)
+    # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    colors = [scenario_color[s] for s in heat_data.index]
+    fig, ax = plt.subplots()
+    ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0],
+               alpha=0.8, marker='o', c=colors)
+    ax.set_xlabel('Service delivery ratio increased %')
+    ax.set_ylabel('DLAYs averted %')
+    legend_labels = list(scenario_groups_color.keys())
+    legend_handles = [plt.Line2D([0, 0], [0, 0],
+                                 linestyle='none', marker='o', color=scenario_groups_color[label]
+                                 ) for label in legend_labels
+                      ]
+    plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
+    plt.title(name_of_plot)
+    plt.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     # do some linear regression to see the marginal effects of individual cadres and combined effects of C, N, P cadres
     outcome_data = num_dalys_averted_percent['mean']
     # outcome = num_services_increased_percent['mean']
@@ -932,6 +955,20 @@ def get_hr_salary(cadres):
     est = sm.OLS(outcome.astype(float), predictor.astype(float)).fit()
     print(est.summary())
 
+    # do anova analysis to test the difference of scenario groups
+    def anova_oneway(df=num_dalys_averted_percent):
+        best = df.loc[list(scenario_groups['C + P + D/N&M/O/None']), 'mean']
+        middle_C = df.loc[list(scenario_groups['C + D/N&M/O/None']), 'mean']
+        middle_P = df.loc[list(scenario_groups['P + D/N&M/O/None']), 'mean']
+        worst = df.loc[df.index.isin(scenario_groups['D/N&M/O/None']), 'mean']
+
+        return ss.oneway.anova_oneway((best, middle_C, middle_P, worst),
+                                      groups=None, use_var='unequal', welch_correction=True, trim_frac=0)
+
+    anova_dalys = anova_oneway()
+    anova_services = anova_oneway(num_services_increased_percent)
+    anova_treatments = anova_oneway(num_treatments_total_increased_percent)
+
     # plot absolute numbers for scenarios
 
     name_of_plot = f'Deaths, {target_period()}'

From 7b71b5e959e1904221c5ba7ce3081903d1be5304 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 9 Oct 2024 17:23:12 +0100
Subject: [PATCH 129/218] plot never ran appts and hcw time gap/needed to run
 those appts

---
 ...dsion_by_officer_type_with_extra_budget.py | 98 +++++++++++++++++++
 1 file changed, 98 insertions(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index d6cfbfb328..5eaf3ebb39 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -97,6 +97,20 @@ def get_num_appts(_df):
                 .groupby(level=0, axis=1).sum()
                 .sum())
 
+    def get_num_appts_by_level(_df):
+        """Return the number of services by appt type and facility level (total within the TARGET_PERIOD)"""
+        def unpack_nested_dict_in_series(_raw: pd.Series):
+            return pd.concat(
+                {
+                  idx: pd.DataFrame.from_dict(mydict) for idx, mydict in _raw.items()
+                 }
+             ).unstack().fillna(0.0).astype(int)
+
+        return _df \
+            .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code_And_Level'] \
+            .pipe(unpack_nested_dict_in_series) \
+            .sum(axis=0)
+
     def get_num_services(_df):
         """Return the number of services in total of all appt types (total within the TARGET_PERIOD)"""
         return pd.Series(
@@ -303,6 +317,17 @@ def get_hr_salary(cadres):
         ].set_index('Officer_Category').T
         return salary[cadres]
 
+    def format_appt_time():
+        """
+        Return the formatted appointment time requirements
+        """
+        file_path = Path(resourcefilepath
+                         / 'healthsystem' / 'human_resources' / 'definitions' / 'ResourceFile_Appt_Time_Table.csv')
+        _df = pd.read_csv(file_path, index_col=False)
+        _df = _df.pivot(index=['Facility_Level', 'Appt_Type_Code'], columns='Officer_Category',
+                        values='Time_Taken_Mins').fillna(0).T
+        return _df
+
     # def get_hcw_time_usage(_df):
     #     """Return the number of treatments by short treatment id (total within the TARGET_PERIOD)"""
     #     _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'TREATMENT_ID'].apply(pd.Series).sum()
@@ -317,6 +342,9 @@ def get_hr_salary(cadres):
     cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
               'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography']
 
+    # Get appointment time requirement
+    appt_time = format_appt_time()
+
     # # Get current (year of 2018/2019) hr counts
     # curr_hr = get_current_hr(cadres)
 
@@ -460,6 +488,14 @@ def get_hr_salary(cadres):
         do_scaling=True
     ).pipe(set_param_names_as_column_index_level_0)
 
+    num_never_ran_appts_by_level = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Never_ran_HSI_Event',
+        custom_generate_series=get_num_appts_by_level,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
     num_never_ran_services = extract_results(
         results_folder,
         module='tlo.methods.healthsystem.summary',
@@ -519,6 +555,9 @@ def get_hr_salary(cadres):
     num_appts_summarized = summarize(num_appts, only_mean=True).T.reindex(param_names).reindex(
         num_dalys_summarized.index
     )
+    num_never_ran_appts_by_level_summarized = summarize(num_never_ran_appts_by_level, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    ).fillna(0.0)
     num_treatments_summarized = summarize(num_treatments, only_mean=True).T.reindex(param_names).reindex(
         num_dalys_summarized.index
     )
@@ -707,6 +746,25 @@ def get_hr_salary(cadres):
          ) < 1e-6
     ).all()
 
+    # get HCW time needed to run the never run appts
+    cols_1 = num_never_ran_appts_by_level_summarized.columns
+    cols_2 = appt_time.columns
+    # check that never ran appts (at a level) not in appt_time (as defined) have count 0 and drop them
+    assert (num_never_ran_appts_by_level_summarized[list(set(cols_1) - set(cols_2))] == 0).all().all()
+    num_never_ran_appts_by_level_summarized.drop(columns=list(set(cols_1) - set(cols_2)), inplace=True)
+    assert set(num_never_ran_appts_by_level_summarized.columns).issubset(set(cols_2))
+    # calculate hcw time gap
+    hcw_time_gap = pd.DataFrame(index=num_never_ran_appts_by_level_summarized.index,
+                                columns=appt_time.index)
+    for i in hcw_time_gap.index:
+        for j in hcw_time_gap.columns:
+            hcw_time_gap.loc[i, j] = num_never_ran_appts_by_level_summarized.loc[i, :].mul(
+                appt_time.loc[j, num_never_ran_appts_by_level_summarized.columns]
+            ).sum()
+    # reorder columns to be consistent with cadres
+    hcw_time_gap = hcw_time_gap[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+                                 'Dental', 'Laboratory', 'Mental', 'Radiography']]
+
     # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
     # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
     ROI = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
@@ -1077,6 +1135,28 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'Never ran services by appointment type, {target_period()}'
+    num_never_ran_appts_summarized_in_millions = num_never_ran_appts_summarized / 1e6
+    yerr_services = np.array([
+        (num_never_ran_services_summarized['mean'].values - num_never_ran_services_summarized['lower']).values,
+        (num_never_ran_services_summarized['upper'].values - num_never_ran_services_summarized['mean']).values,
+    ])/1e6
+    fig, ax = plt.subplots(figsize=(9, 6))
+    num_never_ran_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    ax.errorbar(range(len(param_names)), num_never_ran_services_summarized['mean'].values / 1e6, yerr=yerr_services,
+                fmt=".", color="black", zorder=100)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in num_never_ran_appts_summarized_in_millions.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     name_of_plot = f'Services by treatment type, {target_period()}'
     num_treatments_summarized_in_millions = num_treatments_summarized / 1e6
     yerr_services = np.array([
@@ -1117,6 +1197,24 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'HCW time needed to deliver never ran appointments, {target_period()}'
+    hcw_time_gap_to_plot = (hcw_time_gap / 1e6).reindex(num_dalys_summarized.index)
+    column_dcsa =  hcw_time_gap_to_plot.pop('DCSA')
+    hcw_time_gap_to_plot.insert(3, "DCSA", column_dcsa)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    hcw_time_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Minutes in Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in hcw_time_gap_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     name_of_plot = f'Total budget in USD dollars by cadre, {target_period()}'
     total_cost_to_plot = (total_cost_all_yrs / 1e6).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
     column_dcsa = total_cost_to_plot.pop('DCSA')

From ac18fb5e04b810194fdd21cb7804e61fa26a5cb4 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 10 Oct 2024 10:34:31 +0100
Subject: [PATCH 130/218] update data prepare file

---
 .../prepare_minute_salary_and_extra_budget_frac_data.py   | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index fade2e59cc..4757231719 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -1,5 +1,5 @@
 """
-We calculate the salar cost of current and funded plus HCW.
+We calculate the salary cost of current and funded plus HCW.
 """
 import itertools
 # import pickle
@@ -29,11 +29,15 @@
 # the hr salary by minute and facility id
 Minute_Salary = Annual_PFT.merge(hr_salary, on=['Officer_Category'], how='outer')
 Minute_Salary['Minute_Salary_USD'] = Minute_Salary['Annual_Salary_USD']/Minute_Salary['Annual_Mins_Per_Staff']
+# store the minute salary by cadre and level
+Minute_Salary_by_Cadre_Level = Minute_Salary[
+    ['Facility_Level', 'Officer_Category', 'Minute_Salary_USD']
+].copy().fillna(0.0)
 Minute_Salary = Minute_Salary[['Facility_Level', 'Officer_Category', 'Minute_Salary_USD']].merge(
     mfl[['Facility_Level', 'Facility_ID']], on=['Facility_Level'], how='outer'
 )
 Minute_Salary.drop(columns=['Facility_Level'], inplace=True)
-Minute_Salary = Minute_Salary.fillna(0)
+Minute_Salary = Minute_Salary.fillna(0.0)
 Minute_Salary.rename(columns={'Officer_Category': 'Officer_Type_Code'}, inplace=True)
 
 Minute_Salary.to_csv(resourcefilepath / 'costing' / 'Minute_Salary_HR.csv', index=False)

From b569d71240bcc96e4e8a489b05dffe1d442b539f Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 11 Oct 2024 00:06:22 +0100
Subject: [PATCH 131/218] plot more and delete some

---
 ...dsion_by_officer_type_with_extra_budget.py | 655 ++++++++++++------
 1 file changed, 433 insertions(+), 222 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 5eaf3ebb39..ae2566a031 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -14,7 +14,7 @@
 from matplotlib import pyplot as plt
 
 from scripts.healthsystem.impact_of_hcw_capabilities_expansion.prepare_minute_salary_and_extra_budget_frac_data import (
-    extra_budget_fracs,
+    extra_budget_fracs, Minute_Salary_by_Cadre_Level,
 )
 
 from scripts.healthsystem.impact_of_hcw_capabilities_expansion.scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget import (
@@ -317,16 +317,23 @@ def get_hr_salary(cadres):
         ].set_index('Officer_Category').T
         return salary[cadres]
 
-    def format_appt_time():
+    def format_appt_time_and_cost():
         """
-        Return the formatted appointment time requirements
+        Return the formatted appointment time requirements and costs per cadre
         """
         file_path = Path(resourcefilepath
                          / 'healthsystem' / 'human_resources' / 'definitions' / 'ResourceFile_Appt_Time_Table.csv')
         _df = pd.read_csv(file_path, index_col=False)
-        _df = _df.pivot(index=['Facility_Level', 'Appt_Type_Code'], columns='Officer_Category',
-                        values='Time_Taken_Mins').fillna(0).T
-        return _df
+
+        time = _df.pivot(index=['Facility_Level', 'Appt_Type_Code'], columns='Officer_Category',
+                         values='Time_Taken_Mins').fillna(0.0).T
+        minute_salary = Minute_Salary_by_Cadre_Level
+        cost = _df.merge(minute_salary, on=['Facility_Level', 'Officer_Category'], how='left')
+        cost['cost_USD'] = cost['Time_Taken_Mins'] * cost['Minute_Salary_USD']
+        cost = cost.pivot(index=['Facility_Level', 'Appt_Type_Code'], columns='Officer_Category',
+                          values='cost_USD').fillna(0.0).T
+
+        return time, cost
 
     # def get_hcw_time_usage(_df):
     #     """Return the number of treatments by short treatment id (total within the TARGET_PERIOD)"""
@@ -342,8 +349,8 @@ def format_appt_time():
     cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
               'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography']
 
-    # Get appointment time requirement
-    appt_time = format_appt_time()
+    # Get appointment time and cost requirement
+    appt_time, appt_cost = format_appt_time_and_cost()
 
     # # Get current (year of 2018/2019) hr counts
     # curr_hr = get_current_hr(cadres)
@@ -520,11 +527,18 @@ def format_appt_time():
         do_scaling=True
     ).pipe(set_param_names_as_column_index_level_0)
 
+    # get total service demand
     assert len(num_services) == len(num_never_ran_services) == 1
     assert (num_services.columns == num_never_ran_services.columns).all()
     num_services_demand = num_services + num_never_ran_services
     ratio_services = num_services / num_services_demand
 
+    assert (num_appts.columns == num_never_ran_appts.columns).all()
+    num_never_ran_appts.loc['Lab / Diagnostics', :] = 0
+    num_never_ran_appts = num_never_ran_appts.reindex(num_appts.index).fillna(0.0)
+    assert (num_appts.index == num_never_ran_appts.index).all()
+    num_appts_demand = num_appts + num_never_ran_appts
+
     # hcw_time_usage = extract_results(
     #     results_folder,
     #     module='tlo.methods.healthsystem.summary',
@@ -558,6 +572,9 @@ def format_appt_time():
     num_never_ran_appts_by_level_summarized = summarize(num_never_ran_appts_by_level, only_mean=True).T.reindex(param_names).reindex(
         num_dalys_summarized.index
     ).fillna(0.0)
+    num_appts_demand_summarized = summarize(num_appts_demand, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
     num_treatments_summarized = summarize(num_treatments, only_mean=True).T.reindex(param_names).reindex(
         num_dalys_summarized.index
     )
@@ -666,6 +683,22 @@ def format_appt_time():
         only_mean=True
     ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
+    num_never_ran_appts_reduced = summarize(
+        -1.0 * find_difference_relative_to_comparison_dataframe(
+            num_never_ran_appts,
+            comparison='s_1',
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    num_never_ran_treatments_reduced = summarize(
+        -1.0 * find_difference_relative_to_comparison_dataframe(
+            num_never_ran_treatments,
+            comparison='s_1',
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
     num_appts_increased_percent = summarize(
         find_difference_relative_to_comparison_dataframe(
             num_appts,
@@ -746,24 +779,49 @@ def format_appt_time():
          ) < 1e-6
     ).all()
 
-    # get HCW time needed to run the never run appts
-    cols_1 = num_never_ran_appts_by_level_summarized.columns
-    cols_2 = appt_time.columns
-    # check that never ran appts (at a level) not in appt_time (as defined) have count 0 and drop them
-    assert (num_never_ran_appts_by_level_summarized[list(set(cols_1) - set(cols_2))] == 0).all().all()
-    num_never_ran_appts_by_level_summarized.drop(columns=list(set(cols_1) - set(cols_2)), inplace=True)
-    assert set(num_never_ran_appts_by_level_summarized.columns).issubset(set(cols_2))
-    # calculate hcw time gap
-    hcw_time_gap = pd.DataFrame(index=num_never_ran_appts_by_level_summarized.index,
-                                columns=appt_time.index)
-    for i in hcw_time_gap.index:
-        for j in hcw_time_gap.columns:
-            hcw_time_gap.loc[i, j] = num_never_ran_appts_by_level_summarized.loc[i, :].mul(
-                appt_time.loc[j, num_never_ran_appts_by_level_summarized.columns]
-            ).sum()
-    # reorder columns to be consistent with cadres
-    hcw_time_gap = hcw_time_gap[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
-                                 'Dental', 'Laboratory', 'Mental', 'Radiography']]
+    # get HCW time and cost needed to run the never run appts
+    def hcw_time_or_cost_gap(_df=appt_time):
+        cols_1 = num_never_ran_appts_by_level_summarized.columns
+        cols_2 = _df.columns
+        # check that never ran appts (at a level) not in appt_time (as defined) have count 0 and drop them
+        assert (num_never_ran_appts_by_level_summarized[list(set(cols_1) - set(cols_2))] == 0).all().all()
+        num_never_ran_appts_by_level_summarized.drop(columns=list(set(cols_1) - set(cols_2)), inplace=True)
+        assert set(num_never_ran_appts_by_level_summarized.columns).issubset(set(cols_2))
+        # calculate hcw time gap
+        gap = pd.DataFrame(index=num_never_ran_appts_by_level_summarized.index,
+                           columns=_df.index)
+        for i in gap.index:
+            for j in gap.columns:
+                gap.loc[i, j] = num_never_ran_appts_by_level_summarized.loc[i, :].mul(
+                    _df.loc[j, num_never_ran_appts_by_level_summarized.columns]
+                ).sum()
+        # reorder columns to be consistent with cadres
+        gap = gap[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+                   'Dental', 'Laboratory', 'Mental', 'Radiography']]
+
+        return gap
+
+    hcw_time_gap = hcw_time_or_cost_gap(appt_time)
+    hcw_cost_gap = hcw_time_or_cost_gap(appt_cost)
+
+    # cost gap proportions of cadres within each scenario
+    hcw_cost_gap_percent = pd.DataFrame(index=hcw_cost_gap.index, columns=hcw_cost_gap.columns)
+    for i in hcw_cost_gap_percent.index:
+        hcw_cost_gap_percent.loc[i, :] = hcw_cost_gap.loc[i, :] / hcw_cost_gap.loc[i, :].sum()
+    # add a column of 'other' to sum up other cadres
+    hcw_cost_gap_percent['Other'] = hcw_cost_gap_percent[
+        ['Dental', 'Laboratory', 'Mental', 'Radiography']
+    ].sum(axis=1)
+
+    # find appts that need Clinical + Pharmacy
+    # then calculate hcw time needed for these appts (or treatments, need treatment and their appt footprint) in never run set
+    # also consider plot service demand by appt of all scenarios to see if they are similar
+    # so we can explain that expand C+P is reducing the never run appts and bring health benefits across scenarios
+    # then the next question is what proportion for C and P?
+    appts_need_C_P = []
+    for col in appt_time.columns:
+        if (appt_time.loc[['Clinical', 'Pharmacy'], col] > 0).all():
+            appts_need_C_P.append(col)
 
     # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
     # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
@@ -800,6 +858,7 @@ def format_appt_time():
         'Mental': 'plum',
         'Nutrition': 'thistle',
         'Radiography': 'lightgray',
+        'Other': 'gray'
     }
     scenario_groups_color_init = {
         'no_expansion': 'gray',
@@ -829,7 +888,10 @@ def format_appt_time():
 
     # plot 4D data: relative increases of Clinical, Pharmacy, and Nursing_and_Midwifery as three coordinates,\
     # percentage of DALYs averted decides the color of that scatter point
-    extra_budget_allocation = extra_budget_fracs.T
+    extra_budget_allocation = extra_budget_fracs.T.reindex(num_dalys_summarized.index)
+    extra_budget_allocation['Other'] = extra_budget_allocation[
+        ['Dental', 'Laboratory', 'Mental', 'Radiography']
+    ].sum(axis=1)
     name_of_plot = f'Dalys averted (%) against no expansion, {target_period()}'
     heat_data = pd.merge(num_dalys_averted_percent['mean'],
                          extra_budget_allocation[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
@@ -875,7 +937,7 @@ def format_appt_time():
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'1 Dalys averted, Services increased and Treatment increased, {target_period()}'
+    name_of_plot = f'3D Dalys averted, Services increased and Treatment increased, {target_period()}'
     heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
                            num_treatments_total_increased_percent['mean']], axis=1)
     # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
@@ -901,7 +963,7 @@ def format_appt_time():
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'0 Dalys averted, Services increased and Treatment increased, {target_period()}'
+    name_of_plot = f'2D Dalys averted, Services increased and Treatment increased, {target_period()}'
     heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
                            num_treatments_total_increased_percent['mean']], axis=1)
     # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
@@ -948,50 +1010,50 @@ def format_appt_time():
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Dalys averted and Treatments increased, {target_period()}'
-    heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
-                           num_treatments_total_increased_percent['mean']], axis=1)
-    # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
-    # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
-    colors = [scenario_color[s] for s in heat_data.index]
-    fig, ax = plt.subplots()
-    ax.scatter(100 * heat_data.iloc[:, 2], 100 * heat_data.iloc[:, 0],
-               alpha=0.8, marker='o', c=colors)
-    ax.set_xlabel('Treatments increased %')
-    ax.set_ylabel('DLAYs averted %')
-    legend_labels = list(scenario_groups_color.keys())
-    legend_handles = [plt.Line2D([0, 0], [0, 0],
-                                 linestyle='none', marker='o', color=scenario_groups_color[label]
-                                 ) for label in legend_labels
-                      ]
-    plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
-    plt.title(name_of_plot)
-    plt.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'Dalys averted and Treatments increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+    #                        num_treatments_total_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig, ax = plt.subplots()
+    # ax.scatter(100 * heat_data.iloc[:, 2], 100 * heat_data.iloc[:, 0],
+    #            alpha=0.8, marker='o', c=colors)
+    # ax.set_xlabel('Treatments increased %')
+    # ax.set_ylabel('DLAYs averted %')
+    # legend_labels = list(scenario_groups_color.keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups_color[label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
-    name_of_plot = f'Dalys averted and Services ratio increased, {target_period()}'
-    heat_data = pd.concat([num_dalys_averted_percent['mean'], service_ratio_increased_percent['mean']], axis=1)
-    # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
-    # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
-    colors = [scenario_color[s] for s in heat_data.index]
-    fig, ax = plt.subplots()
-    ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0],
-               alpha=0.8, marker='o', c=colors)
-    ax.set_xlabel('Service delivery ratio increased %')
-    ax.set_ylabel('DLAYs averted %')
-    legend_labels = list(scenario_groups_color.keys())
-    legend_handles = [plt.Line2D([0, 0], [0, 0],
-                                 linestyle='none', marker='o', color=scenario_groups_color[label]
-                                 ) for label in legend_labels
-                      ]
-    plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
-    plt.title(name_of_plot)
-    plt.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'Dalys averted and Services ratio increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], service_ratio_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig, ax = plt.subplots()
+    # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0],
+    #            alpha=0.8, marker='o', c=colors)
+    # ax.set_xlabel('Service delivery ratio increased %')
+    # ax.set_ylabel('DLAYs averted %')
+    # legend_labels = list(scenario_groups_color.keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups_color[label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
     # do some linear regression to see the marginal effects of individual cadres and combined effects of C, N, P cadres
     outcome_data = num_dalys_averted_percent['mean']
@@ -1005,7 +1067,7 @@ def format_appt_time():
     regression_data['N*P'] = regression_data['Pharmacy'] * regression_data['Nursing_and_Midwifery']
     regression_data['C*N*P'] = (regression_data['Clinical'] * regression_data['Pharmacy']
                                 * regression_data['Nursing_and_Midwifery'])
-    cadres_to_drop_due_to_multicollinearity = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography']
+    cadres_to_drop_due_to_multicollinearity = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography', 'Other']
     regression_data.drop(columns=cadres_to_drop_due_to_multicollinearity, inplace=True)
     predictor = regression_data[regression_data.columns[1:]]
     outcome = regression_data['mean']
@@ -1029,41 +1091,41 @@ def anova_oneway(df=num_dalys_averted_percent):
 
     # plot absolute numbers for scenarios
 
-    name_of_plot = f'Deaths, {target_period()}'
-    fig, ax = do_bar_plot_with_ci(num_deaths_summarized / 1e6)
-    ax.set_title(name_of_plot)
-    ax.set_ylabel('(Millions)')
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'Deaths, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(num_deaths_summarized / 1e6)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Millions)')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
-    name_of_plot = f'DALYs, {target_period()}'
-    fig, ax = do_bar_plot_with_ci(num_dalys_summarized / 1e6)
-    ax.set_title(name_of_plot)
-    ax.set_ylabel('(Millions)')
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'DALYs, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(num_dalys_summarized / 1e6)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Millions)')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
-    name_of_plot = f'Service demand, {target_period()}'
-    fig, ax = do_bar_plot_with_ci(num_service_demand_summarized / 1e6)
-    ax.set_title(name_of_plot)
-    ax.set_ylabel('(Millions)')
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'Service demand, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(num_service_demand_summarized / 1e6)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Millions)')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
-    name_of_plot = f'Service delivery ratio, {target_period()}'
-    fig, ax = do_bar_plot_with_ci(ratio_service_summarized)
-    ax.set_title(name_of_plot)
-    ax.set_ylabel('services delivered / demand')
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'Service delivery ratio, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(ratio_service_summarized)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('services delivered / demand')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
     # plot yearly DALYs for best 9 scenarios
     name_of_plot = f'Yearly DALYs, {target_period()}'
@@ -1113,33 +1175,55 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Services by appointment type, {target_period()}'
-    num_appts_summarized_in_millions = num_appts_summarized / 1e6
-    yerr_services = np.array([
-        (num_services_summarized['mean'].values - num_services_summarized['lower']).values,
-        (num_services_summarized['upper'].values - num_services_summarized['mean']).values,
-    ])/1e6
-    fig, ax = plt.subplots(figsize=(9, 6))
-    num_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
-    ax.errorbar(range(len(param_names)), num_services_summarized['mean'].values / 1e6, yerr=yerr_services,
-                fmt=".", color="black", zorder=100)
-    ax.set_ylabel('Millions', fontsize='small')
-    ax.set(xlabel=None)
-    xtick_labels = [substitute_labels[v] for v in num_appts_summarized_in_millions.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
-               fontsize='small', reverse=True)
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'Services by appointment type, {target_period()}'
+    # num_appts_summarized_in_millions = num_appts_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_services_summarized['mean'] - num_services_summarized['lower']).values,
+    #     (num_services_summarized['upper'] - num_services_summarized['mean']).values,
+    # ])/1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_services_summarized['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_appts_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Services demand by appointment type, {target_period()}'
+    # num_appts_demand_to_plot = num_appts_demand_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_service_demand_summarized['mean'] - num_service_demand_summarized['lower']).values,
+    #     (num_service_demand_summarized['upper'] - num_service_demand_summarized['mean']).values,
+    # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_appts_demand_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_service_demand_summarized['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_appts_demand_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
     name_of_plot = f'Never ran services by appointment type, {target_period()}'
     num_never_ran_appts_summarized_in_millions = num_never_ran_appts_summarized / 1e6
     yerr_services = np.array([
-        (num_never_ran_services_summarized['mean'].values - num_never_ran_services_summarized['lower']).values,
-        (num_never_ran_services_summarized['upper'].values - num_never_ran_services_summarized['mean']).values,
+        (num_never_ran_services_summarized['mean'] - num_never_ran_services_summarized['lower']).values,
+        (num_never_ran_services_summarized['upper'] - num_never_ran_services_summarized['mean']).values,
     ])/1e6
     fig, ax = plt.subplots(figsize=(9, 6))
     num_never_ran_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
@@ -1157,49 +1241,72 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Services by treatment type, {target_period()}'
-    num_treatments_summarized_in_millions = num_treatments_summarized / 1e6
-    yerr_services = np.array([
-        (num_treatments_total_summarized['mean'].values - num_treatments_total_summarized['lower']).values,
-        (num_treatments_total_summarized['upper'].values - num_treatments_total_summarized['mean']).values,
-    ]) / 1e6
-    fig, ax = plt.subplots(figsize=(10, 6))
-    num_treatments_summarized_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
-    ax.errorbar(range(len(param_names)), num_treatments_total_summarized['mean'].values / 1e6, yerr=yerr_services,
-                fmt=".", color="black", zorder=100)
-    ax.set_ylabel('Millions', fontsize='small')
-    ax.set(xlabel=None)
-    xtick_labels = [substitute_labels[v] for v in num_treatments_summarized_in_millions.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
-               fontsize='small', reverse=True)
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'Services by treatment type, {target_period()}'
+    # num_treatments_summarized_in_millions = num_treatments_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_treatments_total_summarized['mean'] - num_treatments_total_summarized['lower']).values,
+    #     (num_treatments_total_summarized['upper'] - num_treatments_total_summarized['mean']).values,
+    # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(10, 6))
+    # num_treatments_summarized_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_treatments_total_summarized['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_treatments_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
-    name_of_plot = f'Number of staff by cadre, {TARGET_PERIOD[1].year}'
-    total_staff_to_plot = (staff_count_2029 / 1000).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
-    column_dcsa = total_staff_to_plot.pop('DCSA')
-    total_staff_to_plot.insert(3, "DCSA", column_dcsa)
-    fig, ax = plt.subplots(figsize=(9, 6))
-    total_staff_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    ax.set_ylabel('Thousands', fontsize='small')
-    ax.set(xlabel=None)
-    xtick_labels = [substitute_labels[v] for v in total_staff_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-               fontsize='small', reverse=True)
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'Never ran services by treatment type, {target_period()}'
+    # num_never_ran_treatments_summarized_in_millions = num_never_ran_treatments_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_never_ran_treatments_total_summarized['mean'] - num_never_ran_treatments_total_summarized['lower']).values,
+    #     (num_never_ran_treatments_total_summarized['upper'] - num_never_ran_treatments_total_summarized['mean']).values,
+    # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(10, 6))
+    # num_never_ran_treatments_summarized_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_never_ran_treatments_total_summarized['mean'].values / 1e6,
+    #             yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_never_ran_treatments_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Number of staff by cadre, {TARGET_PERIOD[1].year}'
+    # total_staff_to_plot = (staff_count_2029 / 1000).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
+    # column_dcsa = total_staff_to_plot.pop('DCSA')
+    # total_staff_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # total_staff_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Thousands', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in total_staff_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
     name_of_plot = f'HCW time needed to deliver never ran appointments, {target_period()}'
     hcw_time_gap_to_plot = (hcw_time_gap / 1e6).reindex(num_dalys_summarized.index)
-    column_dcsa =  hcw_time_gap_to_plot.pop('DCSA')
+    column_dcsa = hcw_time_gap_to_plot.pop('DCSA')
     hcw_time_gap_to_plot.insert(3, "DCSA", column_dcsa)
     fig, ax = plt.subplots(figsize=(9, 6))
     hcw_time_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
@@ -1215,15 +1322,15 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Total budget in USD dollars by cadre, {target_period()}'
-    total_cost_to_plot = (total_cost_all_yrs / 1e6).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
-    column_dcsa = total_cost_to_plot.pop('DCSA')
-    total_cost_to_plot.insert(3, "DCSA", column_dcsa)
+    name_of_plot = f'HCW cost needed to deliver never ran appointments, {target_period()}'
+    hcw_cost_gap_to_plot = (hcw_cost_gap / 1e6).reindex(num_dalys_summarized.index)
+    column_dcsa = hcw_cost_gap_to_plot.pop('DCSA')
+    hcw_cost_gap_to_plot.insert(3, "DCSA", column_dcsa)
     fig, ax = plt.subplots(figsize=(9, 6))
-    total_cost_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    ax.set_ylabel('Millions', fontsize='small')
+    hcw_cost_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('USD in Millions', fontsize='small')
     ax.set(xlabel=None)
-    xtick_labels = [substitute_labels[v] for v in total_cost_to_plot.index]
+    xtick_labels = [substitute_labels[v] for v in hcw_cost_gap_to_plot.index]
     ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
                fontsize='small', reverse=True)
@@ -1233,37 +1340,93 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'DALYs by cause, {target_period()}'
-    num_dalys_by_cause_summarized_in_millions = num_dalys_by_cause_summarized / 1e6
-    yerr_dalys = np.array([
-        (num_dalys_summarized['mean'].values - num_dalys_summarized['lower']).values,
-        (num_dalys_summarized['upper'].values - num_dalys_summarized['mean']).values,
-    ])/1e6
-    fig, ax = plt.subplots(figsize=(9, 6))
-    num_dalys_by_cause_summarized_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax)
-    ax.errorbar(range(len(param_names)), num_dalys_summarized['mean'].values / 1e6, yerr=yerr_dalys,
-                fmt=".", color="black", zorder=100)
-    ax.set_ylabel('Millions', fontsize='small')
+    name_of_plot = f'HCW cost gap distribution among cadres, {target_period()}'
+    cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other']
+    hcw_cost_gap_percent_to_plot = hcw_cost_gap_percent[cadres_to_plot] * 100
+    fig, ax = plt.subplots(figsize=(12, 8))
+    hcw_cost_gap_percent_to_plot.plot(kind='bar', color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylim(0, 100)
+    ax.set_ylabel('Percentage %')
     ax.set(xlabel=None)
-    xtick_labels = [substitute_labels[v] for v in num_dalys_by_cause_summarized_in_millions.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    fig.subplots_adjust(right=0.7)
-    ax.legend(
-        loc="center left",
-        bbox_to_anchor=(0.750, 0.6),
-        bbox_transform=fig.transFigure,
-        title='Cause of death or injury',
-        title_fontsize='x-small',
-        fontsize='x-small',
-        reverse=True,
-        ncol=1
-    )
+    xtick_labels = [substitute_labels[v] for v in hcw_cost_gap_percent_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    # plot the average proportions of all scenarios
+    for c in cadres_to_plot:
+        plt.axhline(y=hcw_cost_gap_percent_to_plot[c].mean(),
+                    linestyle='--', color=officer_category_color[c], alpha=0.8,
+                    label=c)
     plt.title(name_of_plot)
     fig.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'Extra budget allocation among cadres, {target_period()}'
+    cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other']
+    extra_budget_allocation_to_plot = extra_budget_allocation[cadres_to_plot] * 100
+    fig, ax = plt.subplots(figsize=(12, 8))
+    extra_budget_allocation_to_plot.plot(kind='bar', color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Percentage %')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in extra_budget_allocation_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # name_of_plot = f'Total budget in USD dollars by cadre, {target_period()}'
+    # total_cost_to_plot = (total_cost_all_yrs / 1e6).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
+    # column_dcsa = total_cost_to_plot.pop('DCSA')
+    # total_cost_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # total_cost_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in total_cost_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs by cause, {target_period()}'
+    # num_dalys_by_cause_summarized_in_millions = num_dalys_by_cause_summarized / 1e6
+    # yerr_dalys = np.array([
+    #     (num_dalys_summarized['mean'] - num_dalys_summarized['lower']).values,
+    #     (num_dalys_summarized['upper'] - num_dalys_summarized['mean']).values,
+    # ])/1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_dalys_by_cause_summarized_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_dalys_summarized['mean'].values / 1e6, yerr=yerr_dalys,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_dalys_by_cause_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # fig.subplots_adjust(right=0.7)
+    # ax.legend(
+    #     loc="center left",
+    #     bbox_to_anchor=(0.750, 0.6),
+    #     bbox_transform=fig.transFigure,
+    #     title='Cause of death or injury',
+    #     title_fontsize='x-small',
+    #     fontsize='x-small',
+    #     reverse=True,
+    #     ncol=1
+    # )
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
     # plot relative numbers for scenarios
     name_of_plot = f'DALYs averted against no expansion, {target_period()}'
     fig, ax = do_bar_plot_with_ci(num_dalys_averted / 1e6, num_dalys_averted_percent, annotation=True)
@@ -1283,14 +1446,14 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Service delivery ratio against no expansion, {target_period()}'
-    fig, ax = do_bar_plot_with_ci(service_ratio_increased * 100, service_ratio_increased_percent, annotation=True)
-    ax.set_title(name_of_plot)
-    ax.set_ylabel('Percentage')
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'Service delivery ratio against no expansion, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(service_ratio_increased * 100, service_ratio_increased_percent, annotation=True)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('Percentage')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
     name_of_plot = f'Extra staff by cadre against no expansion, {TARGET_PERIOD[1].year}'
     extra_staff_by_cadre_to_plot = extra_staff_2029.drop(columns='all_cadres').reindex(
@@ -1333,8 +1496,8 @@ def anova_oneway(df=num_dalys_averted_percent):
     name_of_plot = f'Services increased by appointment type \nagainst no expansion, {target_period()}'
     num_appts_increased_in_millions = num_appts_increased / 1e6
     yerr_services = np.array([
-        (num_services_increased['mean'].values - num_services_increased['lower']).values,
-        (num_services_increased['upper'].values - num_services_increased['mean']).values,
+        (num_services_increased['mean'] - num_services_increased['lower']).values,
+        (num_services_increased['upper'] - num_services_increased['mean']).values,
     ]) / 1e6
     fig, ax = plt.subplots(figsize=(9, 6))
     num_appts_increased_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
@@ -1354,21 +1517,45 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Services increased by treatment type \nagainst no expansion, {target_period()}'
-    num_treatments_increased_in_millions = num_treatments_increased / 1e6
-    yerr_services = np.array([
-        (num_treatments_total_increased['mean'].values - num_treatments_total_increased['lower']).values,
-        (num_treatments_total_increased['upper'].values - num_treatments_total_increased['mean']).values,
-    ]) / 1e6
-    fig, ax = plt.subplots(figsize=(10, 6))
-    num_treatments_increased_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
-    ax.errorbar(range(len(param_names)-1), num_treatments_total_increased['mean'].values / 1e6, yerr=yerr_services,
-                fmt=".", color="black", zorder=100)
+    name_of_plot = f'Never ran services reduced by appointment type \nagainst no expansion, {target_period()}'
+    num_never_ran_appts_reduced_to_plot = num_never_ran_appts_reduced / 1e6
+    # yerr_services = np.array([
+    #     (num_services_increased['mean'] - num_services_increased['lower']).values,
+    #     (num_services_increased['upper'] - num_services_increased['mean']).values,
+    # ]) / 1e6
+    fig, ax = plt.subplots(figsize=(9, 6))
+    num_never_ran_appts_reduced_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in num_never_ran_appts_reduced_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Never ran services reduced by treatment type \nagainst no expansion, {target_period()}'
+    num_never_ran_treatments_reduced_to_plot = num_never_ran_treatments_reduced / 1e6
+    # yerr_services = np.array([
+    #     (num_services_increased['mean'] - num_services_increased['lower']).values,
+    #     (num_services_increased['upper'] - num_services_increased['mean']).values,
+    # ]) / 1e6
+    fig, ax = plt.subplots(figsize=(9, 6))
+    num_never_ran_treatments_reduced_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
     ax.set_ylabel('Millions', fontsize='small')
     ax.set(xlabel=None)
-    xtick_labels = [substitute_labels[v] for v in num_treatments_increased_in_millions.index]
+    xtick_labels = [substitute_labels[v] for v in num_never_ran_treatments_reduced_to_plot.index]
     ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Treatment type', title_fontsize='small',
                fontsize='small', reverse=True)
     plt.title(name_of_plot)
     fig.tight_layout()
@@ -1378,11 +1565,35 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
+    # name_of_plot = f'Services increased by treatment type \nagainst no expansion, {target_period()}'
+    # num_treatments_increased_in_millions = num_treatments_increased / 1e6
+    # yerr_services = np.array([
+    #     (num_treatments_total_increased['mean'] - num_treatments_total_increased['lower']).values,
+    #     (num_treatments_total_increased['upper'] - num_treatments_total_increased['mean']).values,
+    # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(10, 6))
+    # num_treatments_increased_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)-1), num_treatments_total_increased['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_treatments_increased_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(
+    #     name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    # )
+    # fig.show()
+    # plt.close(fig)
+
     name_of_plot = f'DALYs averted by cause against no expansion, {target_period()}'
     num_dalys_by_cause_averted_in_millions = num_dalys_by_cause_averted / 1e6
     yerr_dalys = np.array([
-        (num_dalys_averted['mean'].values - num_dalys_averted['lower']).values,
-        (num_dalys_averted['upper'].values - num_dalys_averted['mean']).values,
+        (num_dalys_averted['mean'] - num_dalys_averted['lower']).values,
+        (num_dalys_averted['upper'] - num_dalys_averted['mean']).values,
     ]) / 1e6
     fig, ax = plt.subplots(figsize=(9, 6))
     num_dalys_by_cause_averted_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax)

From 96a072c47a869167db8867facd991636efee21f1 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 11 Oct 2024 23:24:01 +0100
Subject: [PATCH 132/218] plot more never ran appts results

---
 ...dsion_by_officer_type_with_extra_budget.py | 104 +++++++++++++++---
 1 file changed, 86 insertions(+), 18 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index ae2566a031..2f8fe5d24f 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -780,20 +780,23 @@ def format_appt_time_and_cost():
     ).all()
 
     # get HCW time and cost needed to run the never run appts
-    def hcw_time_or_cost_gap(_df=appt_time):
-        cols_1 = num_never_ran_appts_by_level_summarized.columns
-        cols_2 = _df.columns
+    def hcw_time_or_cost_gap(time_cost_df=appt_time, count_df=num_never_ran_appts_by_level_summarized):
+        cols_1 = count_df.columns
+        cols_2 = time_cost_df.columns
         # check that never ran appts (at a level) not in appt_time (as defined) have count 0 and drop them
-        assert (num_never_ran_appts_by_level_summarized[list(set(cols_1) - set(cols_2))] == 0).all().all()
-        num_never_ran_appts_by_level_summarized.drop(columns=list(set(cols_1) - set(cols_2)), inplace=True)
-        assert set(num_never_ran_appts_by_level_summarized.columns).issubset(set(cols_2))
+        assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all()
+        if len(list(set(cols_1) - set(cols_2))) > 0:
+            _count_df = count_df.drop(columns=list(set(cols_1) - set(cols_2)))
+        else:
+            _count_df = count_df.copy()
+        assert set(_count_df.columns).issubset(set(cols_2))
         # calculate hcw time gap
-        gap = pd.DataFrame(index=num_never_ran_appts_by_level_summarized.index,
-                           columns=_df.index)
+        gap = pd.DataFrame(index=_count_df.index,
+                           columns=time_cost_df.index)
         for i in gap.index:
             for j in gap.columns:
-                gap.loc[i, j] = num_never_ran_appts_by_level_summarized.loc[i, :].mul(
-                    _df.loc[j, num_never_ran_appts_by_level_summarized.columns]
+                gap.loc[i, j] = _count_df.loc[i, :].mul(
+                    time_cost_df.loc[j, _count_df.columns]
                 ).sum()
         # reorder columns to be consistent with cadres
         gap = gap[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
@@ -813,15 +816,58 @@ def hcw_time_or_cost_gap(_df=appt_time):
         ['Dental', 'Laboratory', 'Mental', 'Radiography']
     ].sum(axis=1)
 
-    # find appts that need Clinical + Pharmacy
-    # then calculate hcw time needed for these appts (or treatments, need treatment and their appt footprint) in never run set
-    # also consider plot service demand by appt of all scenarios to see if they are similar
+    # find appts that need Clinical + Pharmacy (+ Nursing_and_Midwifery)
+    # then calculate hcw time needed for these appts (or treatments, need treatment and their appt footprint)
+    # in never run set
     # so we can explain that expand C+P is reducing the never run appts and bring health benefits across scenarios
-    # then the next question is what proportion for C and P?
-    appts_need_C_P = []
-    for col in appt_time.columns:
-        if (appt_time.loc[['Clinical', 'Pharmacy'], col] > 0).all():
-            appts_need_C_P.append(col)
+    # then the next question is what proportion for C and P and any indication for better extra budget allocation
+    # so that never ran appts will be reduced and DALYs could be averted further?
+    def get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical', 'Pharmacy'], appts_count_all=num_never_ran_appts_by_level_summarized
+    ):
+        # find the appts that need all cadres in cadres_to_find
+        def find_never_ran_appts_that_need_specific_cadres():
+            appts_to_find = []
+            _common_cols = appt_time.columns.intersection(appts_count_all.columns)
+            # already checked above that columns in the latter that are not in the former have 0 count
+            for col in _common_cols:
+                if (appt_time.loc[cadres_to_find, col] > 0).all():
+                    appts_to_find.append(col)
+
+            return appts_to_find
+
+        # counts and count proportions
+        _appts = find_never_ran_appts_that_need_specific_cadres()
+        _counts = (appts_count_all[_appts].groupby(level=1, axis=1).sum()
+                   .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP).groupby(level=0, axis=1).sum()
+                   .reindex(num_dalys_summarized.index))
+        _counts_all = (appts_count_all.groupby(level=1, axis=1).sum()
+                       .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP).groupby(level=0, axis=1).sum()
+                       .reindex(num_dalys_summarized.index))
+        assert (_counts.index == _counts_all.index).all()
+        _proportions = _counts / _counts_all[_counts.columns]
+
+        # hcw time gap and proportions
+        _time_gap = hcw_time_or_cost_gap(appt_time, appts_count_all[_appts])
+        assert (_time_gap.index == hcw_time_gap.index).all()
+        _time_gap_proportions = _time_gap / hcw_time_gap[_time_gap.columns]
+
+        # hcw cost gap and proportions
+        _cost_gap = hcw_time_or_cost_gap(appt_cost, appts_count_all[_appts])
+        assert (_cost_gap.index == hcw_cost_gap.index).all()
+        _cost_gap_proportions = _cost_gap / hcw_cost_gap[_cost_gap.columns]
+
+        return _appts, _counts, _proportions, _time_gap, _time_gap_proportions, _cost_gap, _cost_gap_proportions
+
+    never_ran_appts_info_that_need_CP = get_never_ran_appts_info_that_need_specific_cadres()
+    never_ran_appts_info_that_need_CNP = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical', 'Nursing_and_Midwifery', 'Pharmacy'])
+    # Checked that never ran appts that need CP = never ran appts that need CNP + ('3', 'TBNew'),
+    # whereas never ran TBNew at level 3 = 0, thus the proportions info are the same in the two cases
+
+    # Checked that Number_By_Appt_Type_Code and Number_By_Appt_Type_Code_And_Level have not exactly same results
+
+    # hcw time flow to treatments?
 
     # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
     # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
@@ -1362,6 +1408,28 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'Cost proportions of appointments that need CNP in never ran appointments, {target_period()}'
+    cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy']
+    data_to_plot = never_ran_appts_info_that_need_CP[6][cadres_to_plot] * 100
+    fig, ax = plt.subplots(figsize=(12, 8))
+    data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylim(0, 100)
+    ax.set_ylabel('Percentage %')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    # plot the average proportions of all scenarios
+    for c in cadres_to_plot:
+        plt.axhline(y=data_to_plot[c].mean(),
+                    linestyle='--', color=officer_category_color[c], alpha=0.8,
+                    label=c)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     name_of_plot = f'Extra budget allocation among cadres, {target_period()}'
     cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other']
     extra_budget_allocation_to_plot = extra_budget_allocation[cadres_to_plot] * 100

From 2b03be02334311026e3ed16f0b08789d7fce9fa2 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 15 Oct 2024 22:32:10 +0100
Subject: [PATCH 133/218] check summary logger Capacity

---
 ...xpandsion_by_officer_type_with_extra_budget.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 2f8fe5d24f..35938d92a2 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -337,9 +337,14 @@ def format_appt_time_and_cost():
 
     # def get_hcw_time_usage(_df):
     #     """Return the number of treatments by short treatment id (total within the TARGET_PERIOD)"""
-    #     _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'TREATMENT_ID'].apply(pd.Series).sum()
-    #     _df.index = _df.index.map(lambda x: x.split('_')[0] + "*")
-    #     _df = _df.groupby(level=0).sum()
+    #     CNP_cols = ['date']
+    #     for col in _df.columns[1:]:
+    #         if ('Clinical' in col) | ('Nursing_and_Midwifery' in col) | ('Pharmacy' in col):
+    #             CNP_cols.append(col)
+    #
+    #     _df = _df[CNP_cols].copy()
+    #
+    #
     #     return _df
 
     # Get parameter/scenario names
@@ -538,11 +543,11 @@ def format_appt_time_and_cost():
     num_never_ran_appts = num_never_ran_appts.reindex(num_appts.index).fillna(0.0)
     assert (num_appts.index == num_never_ran_appts.index).all()
     num_appts_demand = num_appts + num_never_ran_appts
-
+    #
     # hcw_time_usage = extract_results(
     #     results_folder,
     #     module='tlo.methods.healthsystem.summary',
-    #     key='Capacity',#'Capacity_By_OfficerType_And_FacilityLevel',
+    #     key='Capacity_By_OfficerType_And_FacilityLevel',#'Capacity',#'Capacity_By_OfficerType_And_FacilityLevel',
     #     custom_generate_series=get_hcw_time_usage,
     #     do_scaling=False
     # ).pipe(set_param_names_as_column_index_level_0)

From 27d5404b7062b7cf5cfb6241e93fc8ea36e95b30 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 16 Oct 2024 16:38:28 +0100
Subject: [PATCH 134/218] plot never ran appts info

---
 ...dsion_by_officer_type_with_extra_budget.py | 165 ++++++++++++++----
 1 file changed, 132 insertions(+), 33 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 35938d92a2..45664ec0d5 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -806,6 +806,8 @@ def hcw_time_or_cost_gap(time_cost_df=appt_time, count_df=num_never_ran_appts_by
         # reorder columns to be consistent with cadres
         gap = gap[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
                    'Dental', 'Laboratory', 'Mental', 'Radiography']]
+        # reorder index to be consistent with
+        gap = gap.reindex(num_dalys_summarized.index)
 
         return gap
 
@@ -836,12 +838,13 @@ def find_never_ran_appts_that_need_specific_cadres():
             _common_cols = appt_time.columns.intersection(appts_count_all.columns)
             # already checked above that columns in the latter that are not in the former have 0 count
             for col in _common_cols:
-                if (appt_time.loc[cadres_to_find, col] > 0).all():
+                if ((appt_time.loc[cadres_to_find, col] > 0).all()
+                    and (appt_time.loc[~appt_time.index.isin(cadres_to_find), col] == 0).all()):
                     appts_to_find.append(col)
 
             return appts_to_find
 
-        # counts and count proportions
+        # counts and count proportions of all never ran
         _appts = find_never_ran_appts_that_need_specific_cadres()
         _counts = (appts_count_all[_appts].groupby(level=1, axis=1).sum()
                    .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP).groupby(level=0, axis=1).sum()
@@ -861,12 +864,66 @@ def find_never_ran_appts_that_need_specific_cadres():
         _cost_gap = hcw_time_or_cost_gap(appt_cost, appts_count_all[_appts])
         assert (_cost_gap.index == hcw_cost_gap.index).all()
         _cost_gap_proportions = _cost_gap / hcw_cost_gap[_cost_gap.columns]
+        # cost gap distribution among cadres
+        _cost_gap_percent = pd.DataFrame(index=_cost_gap.index, columns=_cost_gap.columns)
+        for i in _cost_gap_percent.index:
+            _cost_gap_percent.loc[i, :] = _cost_gap.loc[i, :] / _cost_gap.loc[i, :].sum()
 
-        return _appts, _counts, _proportions, _time_gap, _time_gap_proportions, _cost_gap, _cost_gap_proportions
+        # if sum up all appt types/cadres
+        _proportions_total = _counts.sum(axis=1) / _counts_all.sum(axis=1)
+        _cost_gap_proportions_total = _cost_gap.sum(axis=1) / hcw_cost_gap.sum(axis=1)
+
+        return _proportions_total, _cost_gap_proportions_total, _cost_gap_percent
 
-    never_ran_appts_info_that_need_CP = get_never_ran_appts_info_that_need_specific_cadres()
     never_ran_appts_info_that_need_CNP = get_never_ran_appts_info_that_need_specific_cadres(
         cadres_to_find=['Clinical', 'Nursing_and_Midwifery', 'Pharmacy'])
+    never_ran_appts_info_that_need_CP = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical', 'Pharmacy'])
+    never_ran_appts_info_that_need_CN = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical', 'Nursing_and_Midwifery'])
+    never_ran_appts_info_that_need_NP = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Nursing_and_Midwifery', 'Pharmacy'])
+    never_ran_appts_info_that_need_C = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical'])
+    never_ran_appts_info_that_need_N = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Nursing_and_Midwifery'])
+    never_ran_appts_info_that_need_P = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Pharmacy'])
+
+    # cost proportions within never ran appts, in total of all cadres
+    p_cost = pd.DataFrame(index=num_services_summarized.index)
+    p_cost['C + N&M + P'] = never_ran_appts_info_that_need_CNP[1]
+    p_cost['C + P'] = never_ran_appts_info_that_need_CP[1]
+    p_cost['C + N&M'] = never_ran_appts_info_that_need_CN[1]
+    p_cost['N&M + P'] = never_ran_appts_info_that_need_NP[1]
+    p_cost['Clinical (C)'] = never_ran_appts_info_that_need_C[1]
+    p_cost['Pharmacy (P)'] = never_ran_appts_info_that_need_P[1]
+    p_cost['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[1]
+    p_cost['Other cases'] = 1 - p_cost[p_cost.columns[0:7]].sum(axis=1)
+
+    # appts count proportions within never ran appts, in total of all cadres
+    p_count = pd.DataFrame(index=num_services_summarized.index)
+    p_count['C + N&M + P'] = never_ran_appts_info_that_need_CNP[0]
+    p_count['C + P'] = never_ran_appts_info_that_need_CP[0]
+    p_count['C + N&M'] = never_ran_appts_info_that_need_CN[0]
+    p_count['N&M + P'] = never_ran_appts_info_that_need_NP[0]
+    p_count['Clinical (C)'] = never_ran_appts_info_that_need_C[0]
+    p_count['Pharmacy (P)'] = never_ran_appts_info_that_need_P[0]
+    p_count['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[0]
+    p_count['Other cases'] = 1 - p_count[p_count.columns[0:7]].sum(axis=1)
+
+    # define color for the cadres combinations above
+    cadre_comb_color = {
+        'C + N&M + P': 'royalblue',
+        'C + P': 'turquoise',
+        'C + N&M': 'gold',
+        'N&M + P': 'yellowgreen',
+        'Clinical (C)': 'mediumpurple',
+        'Pharmacy (P)': 'limegreen',
+        'Nursing_and_Midwifery (N&M)': 'pink',
+        'Other cases': 'gray',
+    }
+
     # Checked that never ran appts that need CP = never ran appts that need CNP + ('3', 'TBNew'),
     # whereas never ran TBNew at level 3 = 0, thus the proportions info are the same in the two cases
 
@@ -1048,7 +1105,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0],
                alpha=0.8, marker='o', c=colors)
     ax.set_xlabel('Services increased %')
-    ax.set_ylabel('DLAYs averted %')
+    ax.set_ylabel('DALYs averted %')
     legend_labels = list(scenario_groups_color.keys())
     legend_handles = [plt.Line2D([0, 0], [0, 0],
                                  linestyle='none', marker='o', color=scenario_groups_color[label]
@@ -1071,7 +1128,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     # ax.scatter(100 * heat_data.iloc[:, 2], 100 * heat_data.iloc[:, 0],
     #            alpha=0.8, marker='o', c=colors)
     # ax.set_xlabel('Treatments increased %')
-    # ax.set_ylabel('DLAYs averted %')
+    # ax.set_ylabel('DALYs averted %')
     # legend_labels = list(scenario_groups_color.keys())
     # legend_handles = [plt.Line2D([0, 0], [0, 0],
     #                              linestyle='none', marker='o', color=scenario_groups_color[label]
@@ -1093,7 +1150,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0],
     #            alpha=0.8, marker='o', c=colors)
     # ax.set_xlabel('Service delivery ratio increased %')
-    # ax.set_ylabel('DLAYs averted %')
+    # ax.set_ylabel('DALYs averted %')
     # legend_labels = list(scenario_groups_color.keys())
     # legend_handles = [plt.Line2D([0, 0], [0, 0],
     #                              linestyle='none', marker='o', color=scenario_groups_color[label]
@@ -1355,23 +1412,23 @@ def anova_oneway(df=num_dalys_averted_percent):
     # fig.show()
     # plt.close(fig)
 
-    name_of_plot = f'HCW time needed to deliver never ran appointments, {target_period()}'
-    hcw_time_gap_to_plot = (hcw_time_gap / 1e6).reindex(num_dalys_summarized.index)
-    column_dcsa = hcw_time_gap_to_plot.pop('DCSA')
-    hcw_time_gap_to_plot.insert(3, "DCSA", column_dcsa)
-    fig, ax = plt.subplots(figsize=(9, 6))
-    hcw_time_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    ax.set_ylabel('Minutes in Millions', fontsize='small')
-    ax.set(xlabel=None)
-    xtick_labels = [substitute_labels[v] for v in hcw_time_gap_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-               fontsize='small', reverse=True)
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'HCW time needed to deliver never ran appointments, {target_period()}'
+    # hcw_time_gap_to_plot = (hcw_time_gap / 1e6).reindex(num_dalys_summarized.index)
+    # column_dcsa = hcw_time_gap_to_plot.pop('DCSA')
+    # hcw_time_gap_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # hcw_time_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Minutes in Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in hcw_time_gap_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
     name_of_plot = f'HCW cost needed to deliver never ran appointments, {target_period()}'
     hcw_cost_gap_to_plot = (hcw_cost_gap / 1e6).reindex(num_dalys_summarized.index)
@@ -1391,12 +1448,54 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'HCW cost gap distribution among cadres, {target_period()}'
+    name_of_plot = f'Count proportions of never ran appointments that require specific cadres only, {target_period()}'
+    data_to_plot = p_count * 100
+    fig, ax = plt.subplots(figsize=(12, 8))
+    data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    ax.set_ylim(0, 100)
+    ax.set_ylabel('Percentage %')
+    ax.set_xlabel('Extra budget allocation scenario')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # plot the average proportions of all scenarios
+    for c in data_to_plot.columns:
+        plt.axhline(y=data_to_plot[c].mean(),
+                    linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+                    label=c)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Cost proportions of never ran appointments that require specific cadres only, {target_period()}'
+    data_to_plot = p_cost * 100
+    fig, ax = plt.subplots(figsize=(12, 8))
+    data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    ax.set_ylim(0, 100)
+    ax.set_ylabel('Percentage %')
+    ax.set_xlabel('Extra budget allocation scenario')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # plot the average proportions of all scenarios
+    for c in data_to_plot.columns:
+        plt.axhline(y=data_to_plot[c].mean(),
+                    linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+                    label=c)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW cost gap distribution of never ran appointments, {target_period()}'
     cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other']
     hcw_cost_gap_percent_to_plot = hcw_cost_gap_percent[cadres_to_plot] * 100
     fig, ax = plt.subplots(figsize=(12, 8))
-    hcw_cost_gap_percent_to_plot.plot(kind='bar', color=officer_category_color, rot=0, ax=ax)
-    ax.set_ylim(0, 100)
+    hcw_cost_gap_percent_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    #ax.set_ylim(0, 100)
     ax.set_ylabel('Percentage %')
     ax.set(xlabel=None)
     xtick_labels = [substitute_labels[v] for v in hcw_cost_gap_percent_to_plot.index]
@@ -1405,7 +1504,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     # plot the average proportions of all scenarios
     for c in cadres_to_plot:
         plt.axhline(y=hcw_cost_gap_percent_to_plot[c].mean(),
-                    linestyle='--', color=officer_category_color[c], alpha=0.8,
+                    linestyle='--', color=officer_category_color[c], alpha=1.0, linewidth=2,
                     label=c)
     plt.title(name_of_plot)
     fig.tight_layout()
@@ -1413,12 +1512,12 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Cost proportions of appointments that need CNP in never ran appointments, {target_period()}'
+    name_of_plot = f'HCW cost gap distribution of never ran appointments that require CNP only, {target_period()}'
     cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy']
-    data_to_plot = never_ran_appts_info_that_need_CP[6][cadres_to_plot] * 100
+    data_to_plot = never_ran_appts_info_that_need_CNP[2][cadres_to_plot] * 100
     fig, ax = plt.subplots(figsize=(12, 8))
-    data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, ax=ax)
-    ax.set_ylim(0, 100)
+    data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    #ax.set_ylim(0, 100)
     ax.set_ylabel('Percentage %')
     ax.set(xlabel=None)
     xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
@@ -1427,7 +1526,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     # plot the average proportions of all scenarios
     for c in cadres_to_plot:
         plt.axhline(y=data_to_plot[c].mean(),
-                    linestyle='--', color=officer_category_color[c], alpha=0.8,
+                    linestyle='--', color=officer_category_color[c], alpha=1.0, linewidth=2,
                     label=c)
     plt.title(name_of_plot)
     fig.tight_layout()

From 0ebfd120fb16d57a99acff9b1c92302304b64e06 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 16 Oct 2024 16:58:55 +0100
Subject: [PATCH 135/218] minor formatting

---
 ...dsion_by_officer_type_with_extra_budget.py | 43 ++++++++++---------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 45664ec0d5..6c9d4a0d50 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -251,7 +251,8 @@ def do_bar_plot_with_ci(_df, _df_percent=None, annotation=False):
         legend_labels = list(scenario_groups_color.keys())
         legend_handles = [plt.Rectangle((0, 0), 1, 1,
                                         color=scenario_groups_color[label]) for label in legend_labels]
-        ax.legend(legend_handles, legend_labels, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5))
+        ax.legend(legend_handles, legend_labels, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
+                  title='Scenario groups')
 
         ax.grid(axis="y")
         ax.spines['top'].set_visible(False)
@@ -1000,7 +1001,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     extra_budget_allocation['Other'] = extra_budget_allocation[
         ['Dental', 'Laboratory', 'Mental', 'Radiography']
     ].sum(axis=1)
-    name_of_plot = f'Dalys averted (%) against no expansion, {target_period()}'
+    name_of_plot = f'DALYs averted (%) against no expansion, {target_period()}'
     heat_data = pd.merge(num_dalys_averted_percent['mean'],
                          extra_budget_allocation[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
                          left_index=True, right_index=True, how='inner')
@@ -1045,7 +1046,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'3D Dalys averted, Services increased and Treatment increased, {target_period()}'
+    name_of_plot = f'3D DALYs averted, Services increased and Treatment increased, {target_period()}'
     heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
                            num_treatments_total_increased_percent['mean']], axis=1)
     # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
@@ -1071,7 +1072,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'2D Dalys averted, Services increased and Treatment increased, {target_period()}'
+    name_of_plot = f'2D DALYs averted, Services increased and Treatment increased, {target_period()}'
     heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
                            num_treatments_total_increased_percent['mean']], axis=1)
     # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
@@ -1095,7 +1096,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Dalys averted and Services increased, {target_period()}'
+    name_of_plot = f'DALYs averted and Services increased, {target_period()}'
     heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
                            num_treatments_total_increased_percent['mean']], axis=1)
     # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
@@ -1118,7 +1119,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
-    # name_of_plot = f'Dalys averted and Treatments increased, {target_period()}'
+    # name_of_plot = f'DALYs averted and Treatments increased, {target_period()}'
     # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
     #                        num_treatments_total_increased_percent['mean']], axis=1)
     # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
@@ -1141,7 +1142,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     # fig.show()
     # plt.close(fig)
 
-    # name_of_plot = f'Dalys averted and Services ratio increased, {target_period()}'
+    # name_of_plot = f'DALYs averted and Services ratio increased, {target_period()}'
     # heat_data = pd.concat([num_dalys_averted_percent['mean'], service_ratio_increased_percent['mean']], axis=1)
     # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
     # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
@@ -1338,7 +1339,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     ax.errorbar(range(len(param_names)), num_never_ran_services_summarized['mean'].values / 1e6, yerr=yerr_services,
                 fmt=".", color="black", zorder=100)
     ax.set_ylabel('Millions', fontsize='small')
-    ax.set(xlabel=None)
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
     xtick_labels = [substitute_labels[v] for v in num_never_ran_appts_summarized_in_millions.index]
     ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
@@ -1437,7 +1438,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig, ax = plt.subplots(figsize=(9, 6))
     hcw_cost_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
     ax.set_ylabel('USD in Millions', fontsize='small')
-    ax.set(xlabel=None)
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
     xtick_labels = [substitute_labels[v] for v in hcw_cost_gap_to_plot.index]
     ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
@@ -1497,7 +1498,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     hcw_cost_gap_percent_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
     #ax.set_ylim(0, 100)
     ax.set_ylabel('Percentage %')
-    ax.set(xlabel=None)
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
     xtick_labels = [substitute_labels[v] for v in hcw_cost_gap_percent_to_plot.index]
     ax.set_xticklabels(xtick_labels, rotation=90)
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
@@ -1519,7 +1520,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
     #ax.set_ylim(0, 100)
     ax.set_ylabel('Percentage %')
-    ax.set(xlabel=None)
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
     xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
     ax.set_xticklabels(xtick_labels, rotation=90)
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
@@ -1540,7 +1541,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig, ax = plt.subplots(figsize=(12, 8))
     extra_budget_allocation_to_plot.plot(kind='bar', color=officer_category_color, rot=0, ax=ax)
     ax.set_ylabel('Percentage %')
-    ax.set(xlabel=None)
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
     xtick_labels = [substitute_labels[v] for v in extra_budget_allocation_to_plot.index]
     ax.set_xticklabels(xtick_labels, rotation=90)
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
@@ -1603,7 +1604,8 @@ def anova_oneway(df=num_dalys_averted_percent):
     name_of_plot = f'DALYs averted against no expansion, {target_period()}'
     fig, ax = do_bar_plot_with_ci(num_dalys_averted / 1e6, num_dalys_averted_percent, annotation=True)
     ax.set_title(name_of_plot)
-    ax.set_ylabel('(Millions)')
+    ax.set_ylabel('Millions')
+    ax.set_xlabel('Extra budget allocation scenario')
     fig.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
     fig.show()
@@ -1612,7 +1614,8 @@ def anova_oneway(df=num_dalys_averted_percent):
     name_of_plot = f'Deaths averted against no expansion, {target_period()}'
     fig, ax = do_bar_plot_with_ci(num_deaths_averted / 1e6, num_deaths_averted_percent, annotation=True)
     ax.set_title(name_of_plot)
-    ax.set_ylabel('(Millions)')
+    ax.set_ylabel('Millions')
+    ax.set_xlabel('Extra budget allocation scenario')
     fig.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
     fig.show()
@@ -1635,7 +1638,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig, ax = plt.subplots(figsize=(9, 6))
     extra_staff_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
     ax.set_ylabel('Thousands', fontsize='small')
-    ax.set(xlabel=None)
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
     xtick_labels = [substitute_labels[v] for v in extra_staff_by_cadre_to_plot.index]
     ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
@@ -1654,7 +1657,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig, ax = plt.subplots(figsize=(9, 6))
     extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
     ax.set_ylabel('Millions', fontsize='small')
-    ax.set(xlabel=None)
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
     xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
     ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
@@ -1676,7 +1679,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     ax.errorbar(range(len(param_names)-1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
                 fmt=".", color="black", zorder=100)
     ax.set_ylabel('Millions', fontsize='small')
-    ax.set(xlabel=None)
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
     xtick_labels = [substitute_labels[v] for v in num_appts_increased_in_millions.index]
     ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
@@ -1700,7 +1703,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
     #             fmt=".", color="black", zorder=100)
     ax.set_ylabel('Millions', fontsize='small')
-    ax.set(xlabel=None)
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
     xtick_labels = [substitute_labels[v] for v in num_never_ran_appts_reduced_to_plot.index]
     ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
@@ -1724,7 +1727,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
     #             fmt=".", color="black", zorder=100)
     ax.set_ylabel('Millions', fontsize='small')
-    ax.set(xlabel=None)
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
     xtick_labels = [substitute_labels[v] for v in num_never_ran_treatments_reduced_to_plot.index]
     ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Treatment type', title_fontsize='small',
@@ -1772,7 +1775,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     ax.errorbar(range(len(param_names)-1), num_dalys_averted['mean'].values / 1e6, yerr=yerr_dalys,
                 fmt=".", color="black", zorder=100)
     ax.set_ylabel('Millions', fontsize='small')
-    ax.set(xlabel=None)
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
     xtick_labels = [substitute_labels[v] for v in num_dalys_by_cause_averted.index]
     ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
     fig.subplots_adjust(right=0.7)

From a59f29f5d1b666656cf74af6462e93e8b32711c1 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 16 Oct 2024 23:51:06 +0100
Subject: [PATCH 136/218] extract fraction of hcw time used

---
 ...dsion_by_officer_type_with_extra_budget.py | 42 ++++++++++---------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 6c9d4a0d50..15d5cc64b7 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -336,17 +336,18 @@ def format_appt_time_and_cost():
 
         return time, cost
 
-    # def get_hcw_time_usage(_df):
-    #     """Return the number of treatments by short treatment id (total within the TARGET_PERIOD)"""
-    #     CNP_cols = ['date']
-    #     for col in _df.columns[1:]:
-    #         if ('Clinical' in col) | ('Nursing_and_Midwifery' in col) | ('Pharmacy' in col):
-    #             CNP_cols.append(col)
-    #
-    #     _df = _df[CNP_cols].copy()
-    #
-    #
-    #     return _df
+    def get_hcw_time_usage(_df):
+        """Return the number of treatments by short treatment id (total within the TARGET_PERIOD)"""
+        CNP_cols = ['date']
+        for col in _df.columns[1:]:
+            if ('Clinical' in col) | ('Nursing_and_Midwifery' in col) | ('Pharmacy' in col):
+                CNP_cols.append(col)
+
+        _df = _df[CNP_cols].copy()
+        _df = _df.loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), :]
+        _df = _df.set_index('date').mean(axis=0) # average over years
+
+        return _df
 
     # Get parameter/scenario names
     param_names = get_parameter_names_from_scenario_file()
@@ -544,14 +545,14 @@ def format_appt_time_and_cost():
     num_never_ran_appts = num_never_ran_appts.reindex(num_appts.index).fillna(0.0)
     assert (num_appts.index == num_never_ran_appts.index).all()
     num_appts_demand = num_appts + num_never_ran_appts
-    #
-    # hcw_time_usage = extract_results(
-    #     results_folder,
-    #     module='tlo.methods.healthsystem.summary',
-    #     key='Capacity_By_OfficerType_And_FacilityLevel',#'Capacity',#'Capacity_By_OfficerType_And_FacilityLevel',
-    #     custom_generate_series=get_hcw_time_usage,
-    #     do_scaling=False
-    # ).pipe(set_param_names_as_column_index_level_0)
+
+    hcw_time_usage = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Capacity_By_OfficerType_And_FacilityLevel',#'Capacity',#'Capacity_By_OfficerType_And_FacilityLevel',
+        custom_generate_series=get_hcw_time_usage,
+        do_scaling=False
+    ).pipe(set_param_names_as_column_index_level_0)
 
     # get absolute numbers for scenarios
     # sort the scenarios according to their DALYs values, in ascending order
@@ -606,6 +607,9 @@ def format_appt_time_and_cost():
     ratio_service_summarized = summarize(ratio_services).loc[0].unstack().reindex(param_names).reindex(
         num_dalys_summarized.index
     )
+    hcw_time_usage_summarized = summarize(hcw_time_usage, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
 
     # get relative numbers for scenarios, compared to no_expansion scenario: s_1
     num_services_increased = summarize(

From 8c98f0ce8ee8ada9f9b0bbc076320c32aa1404b1 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 17 Oct 2024 23:12:07 +0100
Subject: [PATCH 137/218] plot fraction of hcw time used

---
 ...dsion_by_officer_type_with_extra_budget.py | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 15d5cc64b7..a8836501b6 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -610,6 +610,9 @@ def get_hcw_time_usage(_df):
     hcw_time_usage_summarized = summarize(hcw_time_usage, only_mean=True).T.reindex(param_names).reindex(
         num_dalys_summarized.index
     )
+    hcw_time_usage_summarized.columns = [col.replace('OfficerType=', '').replace('FacilityLevel=', '')
+                                         for col in hcw_time_usage_summarized.columns]
+    hcw_time_usage_summarized.columns = hcw_time_usage_summarized.columns.str.split(pat='|', expand=True)
 
     # get relative numbers for scenarios, compared to no_expansion scenario: s_1
     num_services_increased = summarize(
@@ -1539,6 +1542,38 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'Average fractions of HCW time used (CNP, level 1a), {target_period()}'
+    data_to_plot = hcw_time_usage_summarized.xs('1a', axis=1, level=1, drop_level=True) * 100
+    fig, ax = plt.subplots(figsize=(12, 8))
+    data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    #ax.set_ylim(0, 100)
+    ax.set_ylabel('Percentage %')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Average fractions of HCW time used (CNP, level 2), {target_period()}'
+    data_to_plot = hcw_time_usage_summarized.xs('2', axis=1, level=1, drop_level=True) * 100
+    fig, ax = plt.subplots(figsize=(12, 8))
+    data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    # ax.set_ylim(0, 100)
+    ax.set_ylabel('Percentage %')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     name_of_plot = f'Extra budget allocation among cadres, {target_period()}'
     cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other']
     extra_budget_allocation_to_plot = extra_budget_allocation[cadres_to_plot] * 100

From e85cae05b18e1f264fe4b68882af3717fa8f36b1 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Sat, 19 Oct 2024 23:45:29 +0100
Subject: [PATCH 138/218] extract and plot results of hcw time by cadre and
 treatment

---
 ...dsion_by_officer_type_with_extra_budget.py | 91 ++++++++++++++++++-
 1 file changed, 87 insertions(+), 4 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index a8836501b6..4ec6324f5f 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -6,6 +6,7 @@
 """
 
 import argparse
+from collections import Counter, defaultdict
 from pathlib import Path
 from typing import Tuple
 
@@ -28,6 +29,8 @@
     CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP,
     COARSE_APPT_TYPE_TO_COLOR_MAP,
     SHORT_TREATMENT_ID_TO_COLOR_MAP,
+    bin_hsi_event_details,
+    compute_mean_across_runs,
     extract_results,
     summarize,
 )
@@ -349,6 +352,72 @@ def get_hcw_time_usage(_df):
 
         return _df
 
+    def get_hcw_time_by_treatment():
+        appointment_time_table = pd.read_csv(
+            resourcefilepath
+            / 'healthsystem'
+            / 'human_resources'
+            / 'definitions'
+            / 'ResourceFile_Appt_Time_Table.csv',
+            index_col=["Appt_Type_Code", "Facility_Level", "Officer_Category"]
+        )
+
+        appt_type_facility_level_officer_category_to_appt_time = (
+            appointment_time_table.Time_Taken_Mins.to_dict()
+        )
+
+        officer_categories = appointment_time_table.index.levels[
+            appointment_time_table.index.names.index("Officer_Category")
+        ].to_list()
+
+        times_by_officer_category_treatment_id_per_draw_run = bin_hsi_event_details(
+            results_folder,
+            lambda event_details, count: sum(
+                [
+                    Counter({
+                        (
+                            officer_category,
+                            event_details["treatment_id"].split("_")[0]
+                        ):
+                            count
+                            * appt_number
+                            * appt_type_facility_level_officer_category_to_appt_time.get(
+                                (
+                                    appt_type,
+                                    event_details["facility_level"],
+                                    officer_category
+                                ),
+                                0
+                            )
+                        for officer_category in officer_categories
+                    })
+                    for appt_type, appt_number in event_details["appt_footprint"]
+                ],
+                Counter()
+            ),
+            *TARGET_PERIOD,
+            True
+        )
+
+        time_by_cadre_treatment_per_draw = compute_mean_across_runs(times_by_officer_category_treatment_id_per_draw_run)
+        time_by_cadre_treatment_no_expansion = pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[0],
+                                                                      orient='index')
+        time_by_cadre_treatment_CNP = pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[21],
+                                                             orient='index')
+        time_by_cadre_treatment_CNP = time_by_cadre_treatment_CNP.reindex(time_by_cadre_treatment_no_expansion.index)
+        assert (time_by_cadre_treatment_CNP.index == time_by_cadre_treatment_no_expansion.index).all()
+        increased_time_by_cadre_treatment_CNP = time_by_cadre_treatment_CNP - time_by_cadre_treatment_no_expansion
+        increased_time_by_cadre_treatment_CNP.reset_index(drop=False, inplace=True)
+        for i in increased_time_by_cadre_treatment_CNP.index:
+            increased_time_by_cadre_treatment_CNP.loc[i, 'Cadre'] = \
+                increased_time_by_cadre_treatment_CNP.loc[i, 'index'][0]
+            increased_time_by_cadre_treatment_CNP.loc[i, 'Treatment'] = \
+                increased_time_by_cadre_treatment_CNP.loc[i, 'index'][1]
+        increased_time_by_cadre_treatment_CNP = increased_time_by_cadre_treatment_CNP.drop('index', axis=1).rename(
+            columns={0: 'value'}).pivot(index='Treatment', columns='Cadre', values='value').fillna(0.0)
+
+        return increased_time_by_cadre_treatment_CNP
+
     # Get parameter/scenario names
     param_names = get_parameter_names_from_scenario_file()
 
@@ -932,12 +1001,10 @@ def find_never_ran_appts_that_need_specific_cadres():
         'Other cases': 'gray',
     }
 
-    # Checked that never ran appts that need CP = never ran appts that need CNP + ('3', 'TBNew'),
-    # whereas never ran TBNew at level 3 = 0, thus the proportions info are the same in the two cases
-
     # Checked that Number_By_Appt_Type_Code and Number_By_Appt_Type_Code_And_Level have not exactly same results
 
-    # hcw time flow to treatments?
+    # hcw time by cadre and treatment: C + N + P vs no expandsion
+    time_increased_by_cadre_treatment = get_hcw_time_by_treatment()
 
     # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
     # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
@@ -1707,6 +1774,22 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'Time increased by cadre and treatment: C + N + P vs no expansion, {target_period()}'
+    data_to_plot = time_increased_by_cadre_treatment / 1e6
+    data_to_plot = data_to_plot[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery',
+                                 'DCSA', 'Laboratory', 'Mental', 'Radiography']]
+    fig, ax = plt.subplots(figsize=(12, 8))
+    data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Millions Minutes')
+    ax.set_xlabel('Treatment')
+    ax.set_xticklabels(data_to_plot.index, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     name_of_plot = f'Services increased by appointment type \nagainst no expansion, {target_period()}'
     num_appts_increased_in_millions = num_appts_increased / 1e6
     yerr_services = np.array([

From 737496828dc530b547f487915f811434e3ec8d12 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Sun, 20 Oct 2024 15:25:09 +0100
Subject: [PATCH 139/218] minor update

---
 ...dsion_by_officer_type_with_extra_budget.py | 31 +++++++++++++------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 4ec6324f5f..c21f3ed08c 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -1075,7 +1075,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     extra_budget_allocation['Other'] = extra_budget_allocation[
         ['Dental', 'Laboratory', 'Mental', 'Radiography']
     ].sum(axis=1)
-    name_of_plot = f'DALYs averted (%) against no expansion, {target_period()}'
+    name_of_plot = f'3D DALYs averted (%) against no expansion, {target_period()}'
     heat_data = pd.merge(num_dalys_averted_percent['mean'],
                          extra_budget_allocation[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
                          left_index=True, right_index=True, how='inner')
@@ -1112,7 +1112,9 @@ def find_never_ran_appts_that_need_specific_cadres():
                                  linestyle='none', marker='o', color=scenario_groups_color[label]
                                  ) for label in legend_labels[0:len(legend_labels) - 1]
                       ] + [plt.Line2D([0, 1], [0, 0], linestyle='-', color='orange')]
-    plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
+    plt.legend(legend_handles, legend_labels,
+               loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+               title='Scenario groups')
     # plt.colorbar(img, orientation='horizontal', fraction=0.046, pad=0.25)
     plt.title(name_of_plot)
     plt.tight_layout()
@@ -1139,7 +1141,9 @@ def find_never_ran_appts_that_need_specific_cadres():
                                  linestyle='none', marker='o', color=scenario_groups_color[label]
                                  ) for label in legend_labels
                       ]
-    plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
+    plt.legend(legend_handles, legend_labels,
+               loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+               title='Scenario groups')
     plt.title(name_of_plot)
     plt.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
@@ -1163,7 +1167,9 @@ def find_never_ran_appts_that_need_specific_cadres():
                                  linestyle='none', marker='o', color=scenario_groups_color[label]
                                  ) for label in legend_labels
                       ]
-    plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
+    plt.legend(legend_handles, legend_labels,
+               loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+               title='Scenario groups')
     plt.title(name_of_plot)
     plt.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
@@ -1186,7 +1192,9 @@ def find_never_ran_appts_that_need_specific_cadres():
                                  linestyle='none', marker='o', color=scenario_groups_color[label]
                                  ) for label in legend_labels
                       ]
-    plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
+    plt.legend(legend_handles, legend_labels,
+               loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+               title='Scenario groups')
     plt.title(name_of_plot)
     plt.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
@@ -1331,7 +1339,9 @@ def anova_oneway(df=num_dalys_averted_percent):
     legend_labels = [substitute_labels[v] for v in best_scenarios]
     legend_handles = [plt.Rectangle((0, 0), 1, 1,
                                     color=best_scenarios_color[v]) for v in best_scenarios]
-    ax.legend(legend_handles, legend_labels, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5))
+    ax.legend(legend_handles, legend_labels,
+              loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
+              title='Best scenario group')
     fig.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
     fig.show()
@@ -1339,7 +1349,7 @@ def anova_oneway(df=num_dalys_averted_percent):
 
     # plot yearly staff count (Clinical/Pharmacy/Nursing and Midwifery) for best 9 scenarios
     best_cadres = ['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']
-    name_of_plot = f'Yearly staff count for C+P+N, {target_period()}'
+    name_of_plot = f'Yearly staff count for C+P+N total, {target_period()}'
     fig, ax = plt.subplots(figsize=(9, 6))
     best_scenarios = list(num_dalys_summarized.index[0:9]) + ['s_1']
     for s in best_scenarios:
@@ -1352,7 +1362,9 @@ def anova_oneway(df=num_dalys_averted_percent):
     legend_labels = [substitute_labels[v] for v in best_scenarios]
     legend_handles = [plt.Rectangle((0, 0), 1, 1,
                                     color=best_scenarios_color[v]) for v in best_scenarios]
-    ax.legend(legend_handles, legend_labels, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5))
+    ax.legend(legend_handles, legend_labels,
+              loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
+              title='Best scenario group')
     fig.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
     fig.show()
@@ -1786,7 +1798,8 @@ def anova_oneway(df=num_dalys_averted_percent):
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', reverse=True)
     plt.title(name_of_plot)
     fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+        ':', '_')))
     fig.show()
     plt.close(fig)
 

From 0d5611f245d7d447f838095b13685c98b73feb35 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Sun, 20 Oct 2024 19:33:12 +0100
Subject: [PATCH 140/218] plot time used increased of C + P scenario

---
 ...dsion_by_officer_type_with_extra_budget.py | 35 ++++++++++---------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index c21f3ed08c..15f2f93428 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -352,7 +352,7 @@ def get_hcw_time_usage(_df):
 
         return _df
 
-    def get_hcw_time_by_treatment():
+    def get_hcw_time_by_treatment(draw=21):
         appointment_time_table = pd.read_csv(
             resourcefilepath
             / 'healthsystem'
@@ -402,21 +402,21 @@ def get_hcw_time_by_treatment():
         time_by_cadre_treatment_per_draw = compute_mean_across_runs(times_by_officer_category_treatment_id_per_draw_run)
         time_by_cadre_treatment_no_expansion = pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[0],
                                                                       orient='index')
-        time_by_cadre_treatment_CNP = pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[21],
-                                                             orient='index')
-        time_by_cadre_treatment_CNP = time_by_cadre_treatment_CNP.reindex(time_by_cadre_treatment_no_expansion.index)
-        assert (time_by_cadre_treatment_CNP.index == time_by_cadre_treatment_no_expansion.index).all()
-        increased_time_by_cadre_treatment_CNP = time_by_cadre_treatment_CNP - time_by_cadre_treatment_no_expansion
-        increased_time_by_cadre_treatment_CNP.reset_index(drop=False, inplace=True)
-        for i in increased_time_by_cadre_treatment_CNP.index:
-            increased_time_by_cadre_treatment_CNP.loc[i, 'Cadre'] = \
-                increased_time_by_cadre_treatment_CNP.loc[i, 'index'][0]
-            increased_time_by_cadre_treatment_CNP.loc[i, 'Treatment'] = \
-                increased_time_by_cadre_treatment_CNP.loc[i, 'index'][1]
-        increased_time_by_cadre_treatment_CNP = increased_time_by_cadre_treatment_CNP.drop('index', axis=1).rename(
+        time_by_cadre_treatment = pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[draw],
+                                                         orient='index')
+        time_by_cadre_treatment = time_by_cadre_treatment.reindex(time_by_cadre_treatment_no_expansion.index)
+        assert (time_by_cadre_treatment.index == time_by_cadre_treatment_no_expansion.index).all()
+        increased_time_by_cadre_treatment = time_by_cadre_treatment - time_by_cadre_treatment_no_expansion
+        increased_time_by_cadre_treatment.reset_index(drop=False, inplace=True)
+        for i in increased_time_by_cadre_treatment.index:
+            increased_time_by_cadre_treatment.loc[i, 'Cadre'] = \
+                increased_time_by_cadre_treatment.loc[i, 'index'][0]
+            increased_time_by_cadre_treatment.loc[i, 'Treatment'] = \
+                increased_time_by_cadre_treatment.loc[i, 'index'][1]
+        increased_time_by_cadre_treatment = increased_time_by_cadre_treatment.drop('index', axis=1).rename(
             columns={0: 'value'}).pivot(index='Treatment', columns='Cadre', values='value').fillna(0.0)
 
-        return increased_time_by_cadre_treatment_CNP
+        return increased_time_by_cadre_treatment
 
     # Get parameter/scenario names
     param_names = get_parameter_names_from_scenario_file()
@@ -1003,8 +1003,8 @@ def find_never_ran_appts_that_need_specific_cadres():
 
     # Checked that Number_By_Appt_Type_Code and Number_By_Appt_Type_Code_And_Level have not exactly same results
 
-    # hcw time by cadre and treatment: C + N + P vs no expandsion
-    time_increased_by_cadre_treatment = get_hcw_time_by_treatment()
+    # hcw time by cadre and treatment: draw = 21: C + N + P vs no expansion, draw = 10, C + P vs no expansion
+    time_increased_by_cadre_treatment = get_hcw_time_by_treatment(10)
 
     # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
     # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
@@ -1786,7 +1786,8 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Time increased by cadre and treatment: C + N + P vs no expansion, {target_period()}'
+    # name_of_plot = f'Time used increased by cadre and treatment: C + N + P vs no expansion, {target_period()}'
+    name_of_plot = f'Time used increased by cadre and treatment: C + P vs no expansion, {target_period()}'
     data_to_plot = time_increased_by_cadre_treatment / 1e6
     data_to_plot = data_to_plot[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery',
                                  'DCSA', 'Laboratory', 'Mental', 'Radiography']]

From b466dc00adfb21f4c076de717730bc5bf88c2b9d Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 21 Oct 2024 14:47:41 +0100
Subject: [PATCH 141/218] more plots of time used increased of C + P scenario

---
 ...dsion_by_officer_type_with_extra_budget.py | 48 +++++++++++++++++--
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 15f2f93428..9c8a9773db 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -748,6 +748,9 @@ def get_hcw_time_by_treatment(draw=21):
         only_mean=True
     ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
+    num_dalys_by_cause_averted_CNP = num_dalys_by_cause_averted.loc['s_22', :].sort_values(ascending=False)
+    num_dalys_by_cause_averted_CP = num_dalys_by_cause_averted.loc['s_11', :].sort_values(ascending=False)
+
     num_dalys_by_cause_averted_percent = summarize(
         -1.0 * find_difference_relative_to_comparison_dataframe(
             num_dalys_by_cause,
@@ -1004,7 +1007,8 @@ def find_never_ran_appts_that_need_specific_cadres():
     # Checked that Number_By_Appt_Type_Code and Number_By_Appt_Type_Code_And_Level have not exactly same results
 
     # hcw time by cadre and treatment: draw = 21: C + N + P vs no expansion, draw = 10, C + P vs no expansion
-    time_increased_by_cadre_treatment = get_hcw_time_by_treatment(10)
+    time_increased_by_cadre_treatment_CNP = get_hcw_time_by_treatment(21)
+    time_increased_by_cadre_treatment_CP = get_hcw_time_by_treatment(10)
 
     # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
     # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
@@ -1786,9 +1790,10 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
-    # name_of_plot = f'Time used increased by cadre and treatment: C + N + P vs no expansion, {target_period()}'
-    name_of_plot = f'Time used increased by cadre and treatment: C + P vs no expansion, {target_period()}'
-    data_to_plot = time_increased_by_cadre_treatment / 1e6
+    name_of_plot = f'Time used increased by cadre and treatment: C + N&M + P vs no expansion, {target_period()}'
+    data_to_plot = time_increased_by_cadre_treatment_CNP / 1e6
+    # name_of_plot = f'Time used increased by cadre and treatment: C + P vs no expansion, {target_period()}'
+    # data_to_plot = time_increased_by_cadre_treatment_CP / 1e6
     data_to_plot = data_to_plot[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery',
                                  'DCSA', 'Laboratory', 'Mental', 'Radiography']]
     fig, ax = plt.subplots(figsize=(12, 8))
@@ -1800,7 +1805,40 @@ def anova_oneway(df=num_dalys_averted_percent):
     plt.title(name_of_plot)
     fig.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
-        ':', '_')))
+        ':', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Time used increased by treatment and cadre: C + N&M + P vs no expansion, {target_period()}'
+    # name_of_plot = f'Time used increased by treatment and cadre: C + P vs no expansion, {target_period()}'
+    data_to_plot = data_to_plot.T
+    data_to_plot = data_to_plot.add_suffix('*')
+    fig, ax = plt.subplots(figsize=(12, 8))
+    data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    ax.set_ylabel('Millions Minutes')
+    ax.set_xlabel('Treatment')
+    ax.set_xticklabels(data_to_plot.index, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Treatment', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+        ':', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'DALYs by cause averted: C + N + P vs no expansion, {target_period()}'
+    data_to_plot = num_dalys_by_cause_averted_CNP / 1e6
+    # name_of_plot = f'DALYs by cause averted: C + P vs no expansion, {target_period()}'
+    # data_to_plot = num_dalys_by_cause_averted_CP / 1e6
+    fig, ax = plt.subplots()
+    data_to_plot.plot.bar(ax=ax, x=data_to_plot.index, y=data_to_plot.values)
+    ax.set_ylabel('Millions')
+    ax.set_xlabel('Treatment')
+    ax.set_xticklabels(data_to_plot.index, rotation=90)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+        ':', '')))
     fig.show()
     plt.close(fig)
 

From 5e2ab0687b46c66d2a0e488bd447e42be2c3ebd0 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 21 Oct 2024 22:27:02 +0100
Subject: [PATCH 142/218] more plots

---
 ...dsion_by_officer_type_with_extra_budget.py | 52 +++++++++++++++----
 1 file changed, 41 insertions(+), 11 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 9c8a9773db..0a993406e6 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -748,8 +748,21 @@ def get_hcw_time_by_treatment(draw=21):
         only_mean=True
     ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
+    num_dalys_by_cause_averted_percent = summarize(
+        -1.0 * find_difference_relative_to_comparison_dataframe(
+            num_dalys_by_cause,
+            comparison='s_1',
+            scaled=True
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
     num_dalys_by_cause_averted_CNP = num_dalys_by_cause_averted.loc['s_22', :].sort_values(ascending=False)
     num_dalys_by_cause_averted_CP = num_dalys_by_cause_averted.loc['s_11', :].sort_values(ascending=False)
+    num_dalys_by_cause_averted_percent_CNP = num_dalys_by_cause_averted_percent.loc['s_22', :].sort_values(
+        ascending=False)
+    num_dalys_by_cause_averted__percent_CP = num_dalys_by_cause_averted_percent.loc['s_11', :].sort_values(
+        ascending=False)
 
     num_dalys_by_cause_averted_percent = summarize(
         -1.0 * find_difference_relative_to_comparison_dataframe(
@@ -1790,10 +1803,13 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Time used increased by cadre and treatment: C + N&M + P vs no expansion, {target_period()}'
-    data_to_plot = time_increased_by_cadre_treatment_CNP / 1e6
-    # name_of_plot = f'Time used increased by cadre and treatment: C + P vs no expansion, {target_period()}'
-    # data_to_plot = time_increased_by_cadre_treatment_CP / 1e6
+    # name_of_plot = f'Time used increased by cadre and treatment: C + N&M + P vs no expansion, {target_period()}'
+    # data_to_plot = time_increased_by_cadre_treatment_CNP / 1e6
+    name_of_plot = f'Time used increased by cadre and treatment: C + P vs no expansion, {target_period()}'
+    data_to_plot = time_increased_by_cadre_treatment_CP / 1e6
+    data_to_plot['total'] = data_to_plot.sum(axis=1)
+    data_to_plot.sort_values(by='total', inplace=True, ascending=False)
+    data_to_plot.drop('total', axis=1, inplace=True)
     data_to_plot = data_to_plot[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery',
                                  'DCSA', 'Laboratory', 'Mental', 'Radiography']]
     fig, ax = plt.subplots(figsize=(12, 8))
@@ -1842,6 +1858,20 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'DALYs by cause averted %: C + N + P vs no expansion, {target_period()}'
+    data_to_plot = num_dalys_by_cause_averted_percent_CNP * 100
+    fig, ax = plt.subplots()
+    data_to_plot.plot.bar(ax=ax, x=data_to_plot.index, y=data_to_plot.values)
+    ax.set_ylabel('Percentage %')
+    ax.set_xlabel('Treatment')
+    ax.set_xticklabels(data_to_plot.index, rotation=90)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+        ':', '')))
+    fig.show()
+    plt.close(fig)
+
     name_of_plot = f'Services increased by appointment type \nagainst no expansion, {target_period()}'
     num_appts_increased_in_millions = num_appts_increased / 1e6
     yerr_services = np.array([
@@ -1971,13 +2001,13 @@ def anova_oneway(df=num_dalys_averted_percent):
 
     # plot ROI and CE for all expansion scenarios
 
-    name_of_plot = f'DALYs averted per extra USD dollar invested, {target_period()}'
-    fig, ax = do_bar_plot_with_ci(ROI)
-    ax.set_title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'DALYs averted per extra USD dollar invested, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(ROI)
+    # ax.set_title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
     # name_of_plot = f'Cost per DALY averted, {target_period()}'
     # fig, ax = do_bar_plot_with_ci(CE)

From 6e68e32f1ff81a3c0afb541e38719f4d2a049229 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 22 Oct 2024 16:13:38 +0100
Subject: [PATCH 143/218] comment results and plots, which may be less relevant

---
 ...dsion_by_officer_type_with_extra_budget.py | 912 +++++++++---------
 1 file changed, 455 insertions(+), 457 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 0a993406e6..39b1e5ea75 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -6,7 +6,7 @@
 """
 
 import argparse
-from collections import Counter, defaultdict
+from collections import Counter
 from pathlib import Path
 from typing import Tuple
 
@@ -484,10 +484,10 @@ def get_hcw_time_by_treatment(draw=21):
     for i in staff_count.index:
         extra_staff.iloc[i, 2:] = staff_count.iloc[i, 2:] - staff_count.iloc[0, 2:]
 
-    extra_staff_2029 = extra_staff.loc[extra_staff.year == 2029, :].drop(columns='year').set_index('draw').drop(
-        index='s_1'
-    )
-    staff_count_2029 = staff_count.loc[staff_count.year == 2029, :].drop(columns='year').set_index('draw')
+    # extra_staff_2029 = extra_staff.loc[extra_staff.year == 2029, :].drop(columns='year').set_index('draw').drop(
+    #     index='s_1'
+    # )
+    # staff_count_2029 = staff_count.loc[staff_count.year == 2029, :].drop(columns='year').set_index('draw')
 
     # check total cost calculated is increased as expected
     years = range(2019, the_target_period[1].year + 1)
@@ -515,13 +515,13 @@ def get_hcw_time_by_treatment(draw=21):
         do_scaling=True
     ).pipe(set_param_names_as_column_index_level_0)
 
-    num_dalys_yearly = extract_results(
-        results_folder,
-        module='tlo.methods.healthburden',
-        key='dalys_stacked',
-        custom_generate_series=get_num_dalys_yearly,
-        do_scaling=True
-    ).pipe(set_param_names_as_column_index_level_0)
+    # num_dalys_yearly = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthburden',
+    #     key='dalys_stacked',
+    #     custom_generate_series=get_num_dalys_yearly,
+    #     do_scaling=True
+    # ).pipe(set_param_names_as_column_index_level_0)
 
     num_dalys_by_cause = extract_results(
         results_folder,
@@ -587,13 +587,13 @@ def get_hcw_time_by_treatment(draw=21):
         do_scaling=True
     ).pipe(set_param_names_as_column_index_level_0)
 
-    num_never_ran_treatments_total = extract_results(
-        results_folder,
-        module='tlo.methods.healthsystem.summary',
-        key='Never_ran_HSI_Event',
-        custom_generate_series=get_num_treatments_total,
-        do_scaling=True
-    ).pipe(set_param_names_as_column_index_level_0)
+    # num_never_ran_treatments_total = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthsystem.summary',
+    #     key='Never_ran_HSI_Event',
+    #     custom_generate_series=get_num_treatments_total,
+    #     do_scaling=True
+    # ).pipe(set_param_names_as_column_index_level_0)
 
     num_never_ran_treatments = extract_results(
         results_folder,
@@ -613,7 +613,7 @@ def get_hcw_time_by_treatment(draw=21):
     num_never_ran_appts.loc['Lab / Diagnostics', :] = 0
     num_never_ran_appts = num_never_ran_appts.reindex(num_appts.index).fillna(0.0)
     assert (num_appts.index == num_never_ran_appts.index).all()
-    num_appts_demand = num_appts + num_never_ran_appts
+    # num_appts_demand = num_appts + num_never_ran_appts
 
     hcw_time_usage = extract_results(
         results_folder,
@@ -630,14 +630,14 @@ def get_hcw_time_by_treatment(draw=21):
         num_dalys_summarized.index
     )
 
-    num_dalys_yearly_summarized = (summarize(num_dalys_yearly)
-                                   .stack([0, 1])
-                                   .rename_axis(['year', 'scenario', 'stat'])
-                                   .reset_index(name='count'))
-
-    num_deaths_summarized = summarize(num_deaths).loc[0].unstack().reindex(param_names).reindex(
-        num_dalys_summarized.index
-    )
+    # num_dalys_yearly_summarized = (summarize(num_dalys_yearly)
+    #                                .stack([0, 1])
+    #                                .rename_axis(['year', 'scenario', 'stat'])
+    #                                .reset_index(name='count'))
+    #
+    # num_deaths_summarized = summarize(num_deaths).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
 
     num_services_summarized = summarize(num_services).loc[0].unstack().reindex(param_names).reindex(
         num_dalys_summarized.index
@@ -648,15 +648,15 @@ def get_hcw_time_by_treatment(draw=21):
     num_never_ran_appts_by_level_summarized = summarize(num_never_ran_appts_by_level, only_mean=True).T.reindex(param_names).reindex(
         num_dalys_summarized.index
     ).fillna(0.0)
-    num_appts_demand_summarized = summarize(num_appts_demand, only_mean=True).T.reindex(param_names).reindex(
-        num_dalys_summarized.index
-    )
-    num_treatments_summarized = summarize(num_treatments, only_mean=True).T.reindex(param_names).reindex(
-        num_dalys_summarized.index
-    )
-    num_treatments_total_summarized = summarize(num_treatments_total).loc[0].unstack().reindex(param_names).reindex(
-        num_dalys_summarized.index
-    )
+    # num_appts_demand_summarized = summarize(num_appts_demand, only_mean=True).T.reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    # num_treatments_summarized = summarize(num_treatments, only_mean=True).T.reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    # num_treatments_total_summarized = summarize(num_treatments_total).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
 
     num_never_ran_services_summarized = summarize(num_never_ran_services).loc[0].unstack().reindex(param_names).reindex(
         num_dalys_summarized.index
@@ -664,18 +664,18 @@ def get_hcw_time_by_treatment(draw=21):
     num_never_ran_appts_summarized = summarize(num_never_ran_appts, only_mean=True).T.reindex(param_names).reindex(
         num_dalys_summarized.index
     )
-    num_never_ran_treatments_summarized = summarize(num_never_ran_treatments, only_mean=True).T.reindex(param_names).reindex(
-        num_dalys_summarized.index
-    )
-    num_never_ran_treatments_total_summarized = summarize(num_never_ran_treatments_total).loc[0].unstack().reindex(param_names).reindex(
-        num_dalys_summarized.index
-    )
-    num_service_demand_summarized = summarize(num_services_demand).loc[0].unstack().reindex(param_names).reindex(
-        num_dalys_summarized.index
-    )
-    ratio_service_summarized = summarize(ratio_services).loc[0].unstack().reindex(param_names).reindex(
-        num_dalys_summarized.index
-    )
+    # num_never_ran_treatments_summarized = summarize(num_never_ran_treatments, only_mean=True).T.reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    # num_never_ran_treatments_total_summarized = summarize(num_never_ran_treatments_total).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    # num_service_demand_summarized = summarize(num_services_demand).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    # ratio_service_summarized = summarize(ratio_services).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
     hcw_time_usage_summarized = summarize(hcw_time_usage, only_mean=True).T.reindex(param_names).reindex(
         num_dalys_summarized.index
     )
@@ -692,14 +692,14 @@ def get_hcw_time_by_treatment(draw=21):
         ).T
     ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
-    num_services_increased_percent = summarize(
-        pd.DataFrame(
-            find_difference_relative_to_comparison_series(
-                num_services.loc[0],
-                comparison='s_1',
-                scaled=True)
-        ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+    # num_services_increased_percent = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             num_services.loc[0],
+    #             comparison='s_1',
+    #             scaled=True)
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_deaths_averted = summarize(
         -1.0 *
@@ -758,20 +758,20 @@ def get_hcw_time_by_treatment(draw=21):
     ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_dalys_by_cause_averted_CNP = num_dalys_by_cause_averted.loc['s_22', :].sort_values(ascending=False)
-    num_dalys_by_cause_averted_CP = num_dalys_by_cause_averted.loc['s_11', :].sort_values(ascending=False)
+    # num_dalys_by_cause_averted_CP = num_dalys_by_cause_averted.loc['s_11', :].sort_values(ascending=False)
     num_dalys_by_cause_averted_percent_CNP = num_dalys_by_cause_averted_percent.loc['s_22', :].sort_values(
         ascending=False)
-    num_dalys_by_cause_averted__percent_CP = num_dalys_by_cause_averted_percent.loc['s_11', :].sort_values(
-        ascending=False)
-
-    num_dalys_by_cause_averted_percent = summarize(
-        -1.0 * find_difference_relative_to_comparison_dataframe(
-            num_dalys_by_cause,
-            comparison='s_1',
-            scaled=True
-        ),
-        only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+    # num_dalys_by_cause_averted__percent_CP = num_dalys_by_cause_averted_percent.loc['s_11', :].sort_values(
+    #     ascending=False)
+
+    # num_dalys_by_cause_averted_percent = summarize(
+    #     -1.0 * find_difference_relative_to_comparison_dataframe(
+    #         num_dalys_by_cause,
+    #         comparison='s_1',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_appts_increased = summarize(
         find_difference_relative_to_comparison_dataframe(
@@ -781,81 +781,81 @@ def get_hcw_time_by_treatment(draw=21):
         only_mean=True
     ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
-    num_never_ran_appts_reduced = summarize(
-        -1.0 * find_difference_relative_to_comparison_dataframe(
-            num_never_ran_appts,
-            comparison='s_1',
-        ),
-        only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
-
-    num_never_ran_treatments_reduced = summarize(
-        -1.0 * find_difference_relative_to_comparison_dataframe(
-            num_never_ran_treatments,
-            comparison='s_1',
-        ),
-        only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
-
-    num_appts_increased_percent = summarize(
-        find_difference_relative_to_comparison_dataframe(
-            num_appts,
-            comparison='s_1',
-            scaled=True
-        ),
-        only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
-
-    num_treatments_increased = summarize(
-        find_difference_relative_to_comparison_dataframe(
-            num_treatments,
-            comparison='s_1',
-        ),
-        only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
-
-    num_treatments_increased_percent = summarize(
-        find_difference_relative_to_comparison_dataframe(
-            num_treatments,
-            comparison='s_1',
-            scaled=True
-        ),
-        only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
-
-    num_treatments_total_increased = summarize(
-        pd.DataFrame(
-            find_difference_relative_to_comparison_series(
-                num_treatments_total.loc[0],
-                comparison='s_1')
-        ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
-
-    num_treatments_total_increased_percent = summarize(
-        pd.DataFrame(
-            find_difference_relative_to_comparison_series(
-                num_treatments_total.loc[0],
-                comparison='s_1',
-                scaled=True)
-        ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
-
-    service_ratio_increased = summarize(
-        pd.DataFrame(
-            find_difference_relative_to_comparison_series(
-                ratio_services.loc[0],
-                comparison='s_1')
-        ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
-
-    service_ratio_increased_percent = summarize(
-        pd.DataFrame(
-            find_difference_relative_to_comparison_series(
-                ratio_services.loc[0],
-                comparison='s_1',
-                scaled=True)
-        ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+    # num_never_ran_appts_reduced = summarize(
+    #     -1.0 * find_difference_relative_to_comparison_dataframe(
+    #         num_never_ran_appts,
+    #         comparison='s_1',
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # num_never_ran_treatments_reduced = summarize(
+    #     -1.0 * find_difference_relative_to_comparison_dataframe(
+    #         num_never_ran_treatments,
+    #         comparison='s_1',
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # num_appts_increased_percent = summarize(
+    #     find_difference_relative_to_comparison_dataframe(
+    #         num_appts,
+    #         comparison='s_1',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # num_treatments_increased = summarize(
+    #     find_difference_relative_to_comparison_dataframe(
+    #         num_treatments,
+    #         comparison='s_1',
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # num_treatments_increased_percent = summarize(
+    #     find_difference_relative_to_comparison_dataframe(
+    #         num_treatments,
+    #         comparison='s_1',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # num_treatments_total_increased = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             num_treatments_total.loc[0],
+    #             comparison='s_1')
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # num_treatments_total_increased_percent = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             num_treatments_total.loc[0],
+    #             comparison='s_1',
+    #             scaled=True)
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # service_ratio_increased = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             ratio_services.loc[0],
+    #             comparison='s_1')
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # service_ratio_increased_percent = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             ratio_services.loc[0],
+    #             comparison='s_1',
+    #             scaled=True)
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
     # Check that when we sum across the causes/appt types,
     # we get the same total as calculated when we didn't split by cause/appt type.
@@ -871,11 +871,11 @@ def get_hcw_time_by_treatment(draw=21):
          ) < 1e-6
     ).all()
 
-    assert (
-        (num_treatments_increased.sum(axis=1).sort_index()
-         - num_treatments_total_increased['mean'].sort_index()
-         ) < 1e-6
-    ).all()
+    # assert (
+    #     (num_treatments_increased.sum(axis=1).sort_index()
+    #      - num_treatments_total_increased['mean'].sort_index()
+    #      ) < 1e-6
+    # ).all()
 
     # get HCW time and cost needed to run the never run appts
     def hcw_time_or_cost_gap(time_cost_df=appt_time, count_df=num_never_ran_appts_by_level_summarized):
@@ -1020,30 +1020,30 @@ def find_never_ran_appts_that_need_specific_cadres():
     # Checked that Number_By_Appt_Type_Code and Number_By_Appt_Type_Code_And_Level have not exactly same results
 
     # hcw time by cadre and treatment: draw = 21: C + N + P vs no expansion, draw = 10, C + P vs no expansion
-    time_increased_by_cadre_treatment_CNP = get_hcw_time_by_treatment(21)
-    time_increased_by_cadre_treatment_CP = get_hcw_time_by_treatment(10)
-
-    # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
-    # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
-    ROI = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
-    # todo: for the bad scenarios (s_5, s_8, s_15), the dalys averted are negative
-    #  (maybe only due to statistical variation; relative difference to s_1 are close to 0%),
-    #  thus CE does not make sense.
-    # CE = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
-    for i in ROI.index:
-        ROI.loc[i, :] = num_dalys_averted.loc[i, :] / extra_cost_all_yrs.loc[i, 'all_cadres']
-    #     CE.loc[i, 'mean'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'mean']
-    #     CE.loc[i, 'lower'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'upper']
-    #     CE.loc[i, 'upper'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'lower']
+    # time_increased_by_cadre_treatment_CNP = get_hcw_time_by_treatment(21)
+    # time_increased_by_cadre_treatment_CP = get_hcw_time_by_treatment(10)
+
+    # # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
+    # # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
+    # ROI = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
+    # # todo: for the bad scenarios (s_5, s_8, s_15), the dalys averted are negative
+    # #  (maybe only due to statistical variation; relative difference to s_1 are close to 0%),
+    # #  thus CE does not make sense.
+    # # CE = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
+    # for i in ROI.index:
+    #     ROI.loc[i, :] = num_dalys_averted.loc[i, :] / extra_cost_all_yrs.loc[i, 'all_cadres']
+    # #     CE.loc[i, 'mean'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'mean']
+    # #     CE.loc[i, 'lower'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'upper']
+    # #     CE.loc[i, 'upper'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'lower']
 
     # prepare colors for plots
     appt_color = {
         appt: COARSE_APPT_TYPE_TO_COLOR_MAP.get(appt, np.nan) for appt in num_appts_summarized.columns
     }
-    treatment_color = {
-        treatment: SHORT_TREATMENT_ID_TO_COLOR_MAP.get(treatment, np.nan)
-        for treatment in num_treatments_summarized.columns
-    }
+    # treatment_color = {
+    #     treatment: SHORT_TREATMENT_ID_TO_COLOR_MAP.get(treatment, np.nan)
+    #     for treatment in num_treatments_summarized.columns
+    # }
     cause_color = {
         cause: CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.get(cause, np.nan)
         for cause in num_dalys_by_cause_summarized.columns
@@ -1060,14 +1060,14 @@ def find_never_ran_appts_that_need_specific_cadres():
         'Radiography': 'lightgray',
         'Other': 'gray'
     }
-    scenario_groups_color_init = {
-        'no_expansion': 'gray',
-        'one_cadre_expansion': 'lightpink',
-        'two_cadres_expansion': 'violet',
-        'three_cadres_expansion': 'darkorchid',
-        'four_cadres_expansion': 'paleturquoise',
-        'all_cadres_expansion': 'darkturquoise'
-    }
+    # scenario_groups_color_init = {
+    #     'no_expansion': 'gray',
+    #     'one_cadre_expansion': 'lightpink',
+    #     'two_cadres_expansion': 'violet',
+    #     'three_cadres_expansion': 'darkorchid',
+    #     'four_cadres_expansion': 'paleturquoise',
+    #     'all_cadres_expansion': 'darkturquoise'
+    # }
     scenario_groups_color = {
         'D/N&M/O/None': 'lightpink',
         'P + D/N&M/O/None': 'violet',
@@ -1101,10 +1101,10 @@ def find_never_ran_appts_that_need_specific_cadres():
     colors = [scenario_color[s] for s in heat_data.index]
     fig = plt.figure()
     ax = fig.add_subplot(111, projection='3d')
-    img = ax.scatter(heat_data['Clinical'], heat_data['Pharmacy'], heat_data['Nursing_and_Midwifery'],
-                     alpha=0.8, marker='o', s=heat_data['mean'] * 2000,
-                     #c=heat_data['mean'] * 100, cmap='viridis',
-                     c=colors)
+    ax.scatter(heat_data['Clinical'], heat_data['Pharmacy'], heat_data['Nursing_and_Midwifery'],
+               alpha=0.8, marker='o', s=heat_data['mean'] * 2000,
+               #c=heat_data['mean'] * 100, cmap='viridis',
+               c=colors)
     # plot lines from the best point to three axes panes
     # ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]],
     #           [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]],
@@ -1139,84 +1139,84 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'3D DALYs averted, Services increased and Treatment increased, {target_period()}'
-    heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
-                           num_treatments_total_increased_percent['mean']], axis=1)
-    # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
-    # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
-    colors = [scenario_color[s] for s in heat_data.index]
-    fig = plt.figure()
-    ax = fig.add_subplot(111, projection='3d')
-    ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 2], 100 * heat_data.iloc[:, 0],
-               alpha=0.8, marker='o',
-               c=colors)
-    ax.set_xlabel('Services increased %')
-    ax.set_ylabel('Treatments increased %')
-    ax.set_zlabel('DALYs averted %')
-    legend_labels = list(scenario_groups_color.keys())
-    legend_handles = [plt.Line2D([0, 0], [0, 0],
-                                 linestyle='none', marker='o', color=scenario_groups_color[label]
-                                 ) for label in legend_labels
-                      ]
-    plt.legend(legend_handles, legend_labels,
-               loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
-               title='Scenario groups')
-    plt.title(name_of_plot)
-    plt.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'3D DALYs averted, Services increased and Treatment increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+    #                        num_treatments_total_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig = plt.figure()
+    # ax = fig.add_subplot(111, projection='3d')
+    # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 2], 100 * heat_data.iloc[:, 0],
+    #            alpha=0.8, marker='o',
+    #            c=colors)
+    # ax.set_xlabel('Services increased %')
+    # ax.set_ylabel('Treatments increased %')
+    # ax.set_zlabel('DALYs averted %')
+    # legend_labels = list(scenario_groups_color.keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups_color[label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels,
+    #            loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+    #            title='Scenario groups')
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
-    name_of_plot = f'2D DALYs averted, Services increased and Treatment increased, {target_period()}'
-    heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
-                           num_treatments_total_increased_percent['mean']], axis=1)
-    # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
-    # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
-    colors = [scenario_color[s] for s in heat_data.index]
-    fig, ax = plt.subplots()
-    ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 2],
-               alpha=0.8, marker='o', s=2000 * heat_data.iloc[:, 0],
-               c=colors)
-    ax.set_xlabel('Services increased %')
-    ax.set_ylabel('Treatments increased %')
-    legend_labels = list(scenario_groups_color.keys())
-    legend_handles = [plt.Line2D([0, 0], [0, 0],
-                                 linestyle='none', marker='o', color=scenario_groups_color[label]
-                                 ) for label in legend_labels
-                      ]
-    plt.legend(legend_handles, legend_labels,
-               loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
-               title='Scenario groups')
-    plt.title(name_of_plot)
-    plt.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'2D DALYs averted, Services increased and Treatment increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+    #                        num_treatments_total_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig, ax = plt.subplots()
+    # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 2],
+    #            alpha=0.8, marker='o', s=2000 * heat_data.iloc[:, 0],
+    #            c=colors)
+    # ax.set_xlabel('Services increased %')
+    # ax.set_ylabel('Treatments increased %')
+    # legend_labels = list(scenario_groups_color.keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups_color[label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels,
+    #            loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+    #            title='Scenario groups')
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
-    name_of_plot = f'DALYs averted and Services increased, {target_period()}'
-    heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
-                           num_treatments_total_increased_percent['mean']], axis=1)
-    # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
-    # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
-    colors = [scenario_color[s] for s in heat_data.index]
-    fig, ax = plt.subplots()
-    ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0],
-               alpha=0.8, marker='o', c=colors)
-    ax.set_xlabel('Services increased %')
-    ax.set_ylabel('DALYs averted %')
-    legend_labels = list(scenario_groups_color.keys())
-    legend_handles = [plt.Line2D([0, 0], [0, 0],
-                                 linestyle='none', marker='o', color=scenario_groups_color[label]
-                                 ) for label in legend_labels
-                      ]
-    plt.legend(legend_handles, legend_labels,
-               loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
-               title='Scenario groups')
-    plt.title(name_of_plot)
-    plt.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'DALYs averted and Services increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+    #                        num_treatments_total_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig, ax = plt.subplots()
+    # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0],
+    #            alpha=0.8, marker='o', c=colors)
+    # ax.set_xlabel('Services increased %')
+    # ax.set_ylabel('DALYs averted %')
+    # legend_labels = list(scenario_groups_color.keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups_color[label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels,
+    #            loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+    #            title='Scenario groups')
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
     # name_of_plot = f'DALYs averted and Treatments increased, {target_period()}'
     # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
@@ -1283,6 +1283,8 @@ def find_never_ran_appts_that_need_specific_cadres():
     est = sm.OLS(outcome.astype(float), predictor.astype(float)).fit()
     print(est.summary())
 
+    # todo: could do regression analysis of DALYs averted and Services increased
+
     # do anova analysis to test the difference of scenario groups
     def anova_oneway(df=num_dalys_averted_percent):
         best = df.loc[list(scenario_groups['C + P + D/N&M/O/None']), 'mean']
@@ -1293,9 +1295,9 @@ def anova_oneway(df=num_dalys_averted_percent):
         return ss.oneway.anova_oneway((best, middle_C, middle_P, worst),
                                       groups=None, use_var='unequal', welch_correction=True, trim_frac=0)
 
-    anova_dalys = anova_oneway()
-    anova_services = anova_oneway(num_services_increased_percent)
-    anova_treatments = anova_oneway(num_treatments_total_increased_percent)
+    # anova_dalys = anova_oneway()
+    # anova_services = anova_oneway(num_services_increased_percent)
+    # anova_treatments = anova_oneway(num_treatments_total_increased_percent)
 
     # plot absolute numbers for scenarios
 
@@ -1335,57 +1337,57 @@ def anova_oneway(df=num_dalys_averted_percent):
     # fig.show()
     # plt.close(fig)
 
-    # plot yearly DALYs for best 9 scenarios
-    name_of_plot = f'Yearly DALYs, {target_period()}'
-    fig, ax = plt.subplots(figsize=(9, 6))
-    best_scenarios = list(num_dalys_summarized.index[0:9]) + ['s_1']
-    for s in best_scenarios:
-        data = (num_dalys_yearly_summarized.loc[num_dalys_yearly_summarized.scenario == s, :]
-                .drop(columns='scenario')
-                .pivot(index='year', columns='stat')
-                .droplevel(0, axis=1))
-        ax.plot(data.index, data['mean'] / 1e6, label=substitute_labels[s], color=best_scenarios_color[s], linewidth=2)
-        # ax.fill_between(data.index.to_numpy(),
-        #                 (data['lower'] / 1e6).to_numpy(),
-        #                 (data['upper'] / 1e6).to_numpy(),
-        #                 color=best_scenarios_color[s],
-        #                 alpha=0.2)
-    ax.set_title(name_of_plot)
-    ax.set_ylabel('(Millions)')
-    ax.set_xticks(data.index)
-    legend_labels = [substitute_labels[v] for v in best_scenarios]
-    legend_handles = [plt.Rectangle((0, 0), 1, 1,
-                                    color=best_scenarios_color[v]) for v in best_scenarios]
-    ax.legend(legend_handles, legend_labels,
-              loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
-              title='Best scenario group')
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # # plot yearly DALYs for best 9 scenarios
+    # name_of_plot = f'Yearly DALYs, {target_period()}'
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # best_scenarios = list(num_dalys_summarized.index[0:9]) + ['s_1']
+    # for s in best_scenarios:
+    #     data = (num_dalys_yearly_summarized.loc[num_dalys_yearly_summarized.scenario == s, :]
+    #             .drop(columns='scenario')
+    #             .pivot(index='year', columns='stat')
+    #             .droplevel(0, axis=1))
+    #     ax.plot(data.index, data['mean'] / 1e6, label=substitute_labels[s], color=best_scenarios_color[s], linewidth=2)
+    #     # ax.fill_between(data.index.to_numpy(),
+    #     #                 (data['lower'] / 1e6).to_numpy(),
+    #     #                 (data['upper'] / 1e6).to_numpy(),
+    #     #                 color=best_scenarios_color[s],
+    #     #                 alpha=0.2)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Millions)')
+    # ax.set_xticks(data.index)
+    # legend_labels = [substitute_labels[v] for v in best_scenarios]
+    # legend_handles = [plt.Rectangle((0, 0), 1, 1,
+    #                                 color=best_scenarios_color[v]) for v in best_scenarios]
+    # ax.legend(legend_handles, legend_labels,
+    #           loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
+    #           title='Best scenario group')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
-    # plot yearly staff count (Clinical/Pharmacy/Nursing and Midwifery) for best 9 scenarios
-    best_cadres = ['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']
-    name_of_plot = f'Yearly staff count for C+P+N total, {target_period()}'
-    fig, ax = plt.subplots(figsize=(9, 6))
-    best_scenarios = list(num_dalys_summarized.index[0:9]) + ['s_1']
-    for s in best_scenarios:
-        data = staff_count.loc[staff_count.draw == s].set_index('year').drop(columns='draw').loc[:, best_cadres].sum(
-            axis=1)
-        ax.plot(data.index, data.values / 1e3, label=substitute_labels[s], color=best_scenarios_color[s])
-    ax.set_title(name_of_plot)
-    ax.set_ylabel('(Thousands)')
-    ax.set_xticks(data.index)
-    legend_labels = [substitute_labels[v] for v in best_scenarios]
-    legend_handles = [plt.Rectangle((0, 0), 1, 1,
-                                    color=best_scenarios_color[v]) for v in best_scenarios]
-    ax.legend(legend_handles, legend_labels,
-              loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
-              title='Best scenario group')
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # # plot yearly staff count (Clinical/Pharmacy/Nursing and Midwifery) for best 9 scenarios
+    # best_cadres = ['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']
+    # name_of_plot = f'Yearly staff count for C+P+N total, {target_period()}'
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # best_scenarios = list(num_dalys_summarized.index[0:9]) + ['s_1']
+    # for s in best_scenarios:
+    #     data = staff_count.loc[staff_count.draw == s].set_index('year').drop(columns='draw').loc[:, best_cadres].sum(
+    #         axis=1)
+    #     ax.plot(data.index, data.values / 1e3, label=substitute_labels[s], color=best_scenarios_color[s])
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Thousands)')
+    # ax.set_xticks(data.index)
+    # legend_labels = [substitute_labels[v] for v in best_scenarios]
+    # legend_handles = [plt.Rectangle((0, 0), 1, 1,
+    #                                 color=best_scenarios_color[v]) for v in best_scenarios]
+    # ax.legend(legend_handles, legend_labels,
+    #           loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
+    #           title='Best scenario group')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
     # name_of_plot = f'Services by appointment type, {target_period()}'
     # num_appts_summarized_in_millions = num_appts_summarized / 1e6
@@ -1616,27 +1618,27 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'HCW cost gap distribution of never ran appointments that require CNP only, {target_period()}'
-    cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy']
-    data_to_plot = never_ran_appts_info_that_need_CNP[2][cadres_to_plot] * 100
-    fig, ax = plt.subplots(figsize=(12, 8))
-    data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
-    #ax.set_ylim(0, 100)
-    ax.set_ylabel('Percentage %')
-    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
-    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90)
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
-    # plot the average proportions of all scenarios
-    for c in cadres_to_plot:
-        plt.axhline(y=data_to_plot[c].mean(),
-                    linestyle='--', color=officer_category_color[c], alpha=1.0, linewidth=2,
-                    label=c)
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'HCW cost gap distribution of never ran appointments that require CNP only, {target_period()}'
+    # cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy']
+    # data_to_plot = never_ran_appts_info_that_need_CNP[2][cadres_to_plot] * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    # #ax.set_ylim(0, 100)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    # # plot the average proportions of all scenarios
+    # for c in cadres_to_plot:
+    #     plt.axhline(y=data_to_plot[c].mean(),
+    #                 linestyle='--', color=officer_category_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
     name_of_plot = f'Average fractions of HCW time used (CNP, level 1a), {target_period()}'
     data_to_plot = hcw_time_usage_summarized.xs('1a', axis=1, level=1, drop_level=True) * 100
@@ -1756,6 +1758,8 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
+    # todo: plot Deaths averted by cause
+
     # name_of_plot = f'Service delivery ratio against no expansion, {target_period()}'
     # fig, ax = do_bar_plot_with_ci(service_ratio_increased * 100, service_ratio_increased_percent, annotation=True)
     # ax.set_title(name_of_plot)
@@ -1765,24 +1769,24 @@ def anova_oneway(df=num_dalys_averted_percent):
     # fig.show()
     # plt.close(fig)
 
-    name_of_plot = f'Extra staff by cadre against no expansion, {TARGET_PERIOD[1].year}'
-    extra_staff_by_cadre_to_plot = extra_staff_2029.drop(columns='all_cadres').reindex(
-        num_dalys_summarized.index).drop(['s_1']) / 1e3
-    column_dcsa = extra_staff_by_cadre_to_plot.pop('DCSA')
-    extra_staff_by_cadre_to_plot.insert(3, "DCSA", column_dcsa)
-    fig, ax = plt.subplots(figsize=(9, 6))
-    extra_staff_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    ax.set_ylabel('Thousands', fontsize='small')
-    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
-    xtick_labels = [substitute_labels[v] for v in extra_staff_by_cadre_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-               fontsize='small', reverse=True)
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'Extra staff by cadre against no expansion, {TARGET_PERIOD[1].year}'
+    # extra_staff_by_cadre_to_plot = extra_staff_2029.drop(columns='all_cadres').reindex(
+    #     num_dalys_summarized.index).drop(['s_1']) / 1e3
+    # column_dcsa = extra_staff_by_cadre_to_plot.pop('DCSA')
+    # extra_staff_by_cadre_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # extra_staff_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Thousands', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in extra_staff_by_cadre_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
     name_of_plot = f'Extra budget by cadre against no expansion, {target_period()}'
     extra_cost_by_cadre_to_plot = extra_cost_all_yrs.drop(columns='all_cadres').reindex(
@@ -1803,44 +1807,44 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
-    # name_of_plot = f'Time used increased by cadre and treatment: C + N&M + P vs no expansion, {target_period()}'
-    # data_to_plot = time_increased_by_cadre_treatment_CNP / 1e6
-    name_of_plot = f'Time used increased by cadre and treatment: C + P vs no expansion, {target_period()}'
-    data_to_plot = time_increased_by_cadre_treatment_CP / 1e6
-    data_to_plot['total'] = data_to_plot.sum(axis=1)
-    data_to_plot.sort_values(by='total', inplace=True, ascending=False)
-    data_to_plot.drop('total', axis=1, inplace=True)
-    data_to_plot = data_to_plot[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery',
-                                 'DCSA', 'Laboratory', 'Mental', 'Radiography']]
-    fig, ax = plt.subplots(figsize=(12, 8))
-    data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    ax.set_ylabel('Millions Minutes')
-    ax.set_xlabel('Treatment')
-    ax.set_xticklabels(data_to_plot.index, rotation=90)
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', reverse=True)
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
-        ':', '')))
-    fig.show()
-    plt.close(fig)
+    # # name_of_plot = f'Time used increased by cadre and treatment: C + N&M + P vs no expansion, {target_period()}'
+    # # data_to_plot = time_increased_by_cadre_treatment_CNP / 1e6
+    # name_of_plot = f'Time used increased by cadre and treatment: C + P vs no expansion, {target_period()}'
+    # data_to_plot = time_increased_by_cadre_treatment_CP / 1e6
+    # data_to_plot['total'] = data_to_plot.sum(axis=1)
+    # data_to_plot.sort_values(by='total', inplace=True, ascending=False)
+    # data_to_plot.drop('total', axis=1, inplace=True)
+    # data_to_plot = data_to_plot[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery',
+    #                              'DCSA', 'Laboratory', 'Mental', 'Radiography']]
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions Minutes')
+    # ax.set_xlabel('Treatment')
+    # ax.set_xticklabels(data_to_plot.index, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+    #     ':', '')))
+    # fig.show()
+    # plt.close(fig)
 
-    name_of_plot = f'Time used increased by treatment and cadre: C + N&M + P vs no expansion, {target_period()}'
-    # name_of_plot = f'Time used increased by treatment and cadre: C + P vs no expansion, {target_period()}'
-    data_to_plot = data_to_plot.T
-    data_to_plot = data_to_plot.add_suffix('*')
-    fig, ax = plt.subplots(figsize=(12, 8))
-    data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
-    ax.set_ylabel('Millions Minutes')
-    ax.set_xlabel('Treatment')
-    ax.set_xticklabels(data_to_plot.index, rotation=90)
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Treatment', reverse=True)
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
-        ':', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'Time used increased by treatment and cadre: C + N&M + P vs no expansion, {target_period()}'
+    # # name_of_plot = f'Time used increased by treatment and cadre: C + P vs no expansion, {target_period()}'
+    # data_to_plot = data_to_plot.T
+    # data_to_plot = data_to_plot.add_suffix('*')
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions Minutes')
+    # ax.set_xlabel('Treatment')
+    # ax.set_xticklabels(data_to_plot.index, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Treatment', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+    #     ':', '')))
+    # fig.show()
+    # plt.close(fig)
 
     name_of_plot = f'DALYs by cause averted: C + N + P vs no expansion, {target_period()}'
     data_to_plot = num_dalys_by_cause_averted_CNP / 1e6
@@ -1872,77 +1876,77 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Services increased by appointment type \nagainst no expansion, {target_period()}'
-    num_appts_increased_in_millions = num_appts_increased / 1e6
-    yerr_services = np.array([
-        (num_services_increased['mean'] - num_services_increased['lower']).values,
-        (num_services_increased['upper'] - num_services_increased['mean']).values,
-    ]) / 1e6
-    fig, ax = plt.subplots(figsize=(9, 6))
-    num_appts_increased_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
-    ax.errorbar(range(len(param_names)-1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
-                fmt=".", color="black", zorder=100)
-    ax.set_ylabel('Millions', fontsize='small')
-    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
-    xtick_labels = [substitute_labels[v] for v in num_appts_increased_in_millions.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
-               fontsize='small', reverse=True)
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(
-        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
-    )
-    fig.show()
-    plt.close(fig)
-
-    name_of_plot = f'Never ran services reduced by appointment type \nagainst no expansion, {target_period()}'
-    num_never_ran_appts_reduced_to_plot = num_never_ran_appts_reduced / 1e6
+    # name_of_plot = f'Services increased by appointment type \nagainst no expansion, {target_period()}'
+    # num_appts_increased_in_millions = num_appts_increased / 1e6
     # yerr_services = np.array([
     #     (num_services_increased['mean'] - num_services_increased['lower']).values,
     #     (num_services_increased['upper'] - num_services_increased['mean']).values,
     # ]) / 1e6
-    fig, ax = plt.subplots(figsize=(9, 6))
-    num_never_ran_appts_reduced_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
-    # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_appts_increased_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)-1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
     #             fmt=".", color="black", zorder=100)
-    ax.set_ylabel('Millions', fontsize='small')
-    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
-    xtick_labels = [substitute_labels[v] for v in num_never_ran_appts_reduced_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
-               fontsize='small', reverse=True)
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(
-        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
-    )
-    fig.show()
-    plt.close(fig)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in num_appts_increased_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(
+    #     name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    # )
+    # fig.show()
+    # plt.close(fig)
 
-    name_of_plot = f'Never ran services reduced by treatment type \nagainst no expansion, {target_period()}'
-    num_never_ran_treatments_reduced_to_plot = num_never_ran_treatments_reduced / 1e6
-    # yerr_services = np.array([
-    #     (num_services_increased['mean'] - num_services_increased['lower']).values,
-    #     (num_services_increased['upper'] - num_services_increased['mean']).values,
-    # ]) / 1e6
-    fig, ax = plt.subplots(figsize=(9, 6))
-    num_never_ran_treatments_reduced_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
-    # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
-    #             fmt=".", color="black", zorder=100)
-    ax.set_ylabel('Millions', fontsize='small')
-    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
-    xtick_labels = [substitute_labels[v] for v in num_never_ran_treatments_reduced_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Treatment type', title_fontsize='small',
-               fontsize='small', reverse=True)
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(
-        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
-    )
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'Never ran services reduced by appointment type \nagainst no expansion, {target_period()}'
+    # num_never_ran_appts_reduced_to_plot = num_never_ran_appts_reduced / 1e6
+    # # yerr_services = np.array([
+    # #     (num_services_increased['mean'] - num_services_increased['lower']).values,
+    # #     (num_services_increased['upper'] - num_services_increased['mean']).values,
+    # # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_never_ran_appts_reduced_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
+    # #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in num_never_ran_appts_reduced_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(
+    #     name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    # )
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Never ran services reduced by treatment type \nagainst no expansion, {target_period()}'
+    # num_never_ran_treatments_reduced_to_plot = num_never_ran_treatments_reduced / 1e6
+    # # yerr_services = np.array([
+    # #     (num_services_increased['mean'] - num_services_increased['lower']).values,
+    # #     (num_services_increased['upper'] - num_services_increased['mean']).values,
+    # # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_never_ran_treatments_reduced_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
+    # #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in num_never_ran_treatments_reduced_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Treatment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(
+    #     name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    # )
+    # fig.show()
+    # plt.close(fig)
 
     # name_of_plot = f'Services increased by treatment type \nagainst no expansion, {target_period()}'
     # num_treatments_increased_in_millions = num_treatments_increased / 1e6
@@ -2019,8 +2023,6 @@ def anova_oneway(df=num_dalys_averted_percent):
     # plt.close(fig)
 
     # todo
-    # As we have 33 scenarios in total, \
-    # design comparison groups of scenarios to examine marginal/combined productivity of cadres.
     # To vary the HRH budget growth rate (default: 4.2%) and do sensitivity analysis \
     # (around the best possible extra budget allocation scenario)?
     # As it is analysis of 10 year results, it would be better to consider increasing annual/minute salary? The \
@@ -2028,12 +2030,8 @@ def anova_oneway(df=num_dalys_averted_percent):
     # the same, thus no need to consider the increase rate of salary if GDP inflation is not considered.
     # To plot time series of staff and budget in the target period to show \
     # how many staff and how much budget to increase yearly (choose the best scenario to illustrate)?
-    # Get and plot services by short treatment id?
-    # To design more scenarios so that Pharmacy cadre can be expanded more than the 33 scenarios
-    # and so that each cadre has different scale up factor (the one in more shortage will need to be scaled up more)?
-    # Later, to explain the cause of differences in scenarios, might consider hcw time flow?
     # Before submit a run, merge in the remote master.
-    # Think about a measure of Universal Health Service Coverage for the scenarios.
+    # Think about a measure of Universal Health Service Coverage for the scenarios?
 
 
 if __name__ == "__main__":

From 50338c56853a72e8afc8c14db4d4a7a0199d2ab1 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 22 Oct 2024 16:20:38 +0100
Subject: [PATCH 144/218] reformat

---
 ...hr_expandsion_by_officer_type_with_extra_budget.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 39b1e5ea75..066428f71f 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -12,23 +12,22 @@
 
 import numpy as np
 import pandas as pd
+import statsmodels.api as sm
+import statsmodels.stats as ss
 from matplotlib import pyplot as plt
 
 from scripts.healthsystem.impact_of_hcw_capabilities_expansion.prepare_minute_salary_and_extra_budget_frac_data import (
-    extra_budget_fracs, Minute_Salary_by_Cadre_Level,
+    Minute_Salary_by_Cadre_Level,
+    extra_budget_fracs,
 )
-
 from scripts.healthsystem.impact_of_hcw_capabilities_expansion.scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget import (
     HRHExpansionByCadreWithExtraBudget,
 )
-import statsmodels.api as sm
-import statsmodels.stats as ss
 from tlo import Date
-from tlo.analysis.utils import (
+from tlo.analysis.utils import (  # SHORT_TREATMENT_ID_TO_COLOR_MAP,
     APPT_TYPE_TO_COARSE_APPT_TYPE_MAP,
     CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP,
     COARSE_APPT_TYPE_TO_COLOR_MAP,
-    SHORT_TREATMENT_ID_TO_COLOR_MAP,
     bin_hsi_event_details,
     compute_mean_across_runs,
     extract_results,

From 8d45f0bb8840e949ea88d6f6206e62c677be9edb Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 22 Oct 2024 16:22:47 +0100
Subject: [PATCH 145/218] add todo for new loggers

---
 src/tlo/methods/healthsystem.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index e88de7f416..aa94678de1 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -1802,6 +1802,7 @@ def write_to_hsi_log(
                 squeeze_factor=squeeze_factor,
                 appt_footprint=event_details.appt_footprint,
                 level=event_details.facility_level,
+                # todo: to log the facility ID to get district level info
             )
 
     def call_and_record_never_ran_hsi_event(self, hsi_event, priority=None):
@@ -1900,6 +1901,8 @@ def log_current_capabilities_and_usage(self):
         ).replace([np.inf, -np.inf, np.nan], 0.0)
         summary_by_officer.index.names = ['Officer_Type', 'Facility_Level']
 
+        # todo: Compute and log fraction of time used for each officer and level and district
+
         logger.info(key='Capacity',
                     data={
                         'Frac_Time_Used_Overall': fraction_time_used_overall,

From 3ca37a2c8e647f2575cc66a9de3ca3b204240aca Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 22 Oct 2024 17:22:22 +0100
Subject: [PATCH 146/218] fix failing checks

---
 ...dsion_by_officer_type_with_extra_budget.py | 48 +++++++++----------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 066428f71f..cd87e1aec0 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -546,21 +546,21 @@ def get_hcw_time_by_treatment(draw=21):
         do_scaling=True
     ).pipe(set_param_names_as_column_index_level_0)
 
-    num_treatments = extract_results(
-        results_folder,
-        module='tlo.methods.healthsystem.summary',
-        key='HSI_Event_non_blank_appt_footprint',
-        custom_generate_series=get_num_treatments,
-        do_scaling=True
-    ).pipe(set_param_names_as_column_index_level_0)
-
-    num_treatments_total = extract_results(
-        results_folder,
-        module='tlo.methods.healthsystem.summary',
-        key='HSI_Event_non_blank_appt_footprint',
-        custom_generate_series=get_num_treatments_total,
-        do_scaling=True
-    ).pipe(set_param_names_as_column_index_level_0)
+    # num_treatments = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthsystem.summary',
+    #     key='HSI_Event_non_blank_appt_footprint',
+    #     custom_generate_series=get_num_treatments,
+    #     do_scaling=True
+    # ).pipe(set_param_names_as_column_index_level_0)
+    #
+    # num_treatments_total = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthsystem.summary',
+    #     key='HSI_Event_non_blank_appt_footprint',
+    #     custom_generate_series=get_num_treatments_total,
+    #     do_scaling=True
+    # ).pipe(set_param_names_as_column_index_level_0)
 
     num_never_ran_appts = extract_results(
         results_folder,
@@ -594,19 +594,19 @@ def get_hcw_time_by_treatment(draw=21):
     #     do_scaling=True
     # ).pipe(set_param_names_as_column_index_level_0)
 
-    num_never_ran_treatments = extract_results(
-        results_folder,
-        module='tlo.methods.healthsystem.summary',
-        key='Never_ran_HSI_Event',
-        custom_generate_series=get_num_treatments,
-        do_scaling=True
-    ).pipe(set_param_names_as_column_index_level_0)
+    # num_never_ran_treatments = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthsystem.summary',
+    #     key='Never_ran_HSI_Event',
+    #     custom_generate_series=get_num_treatments,
+    #     do_scaling=True
+    # ).pipe(set_param_names_as_column_index_level_0)
 
     # get total service demand
     assert len(num_services) == len(num_never_ran_services) == 1
     assert (num_services.columns == num_never_ran_services.columns).all()
-    num_services_demand = num_services + num_never_ran_services
-    ratio_services = num_services / num_services_demand
+    # num_services_demand = num_services + num_never_ran_services
+    # ratio_services = num_services / num_services_demand
 
     assert (num_appts.columns == num_never_ran_appts.columns).all()
     num_never_ran_appts.loc['Lab / Diagnostics', :] = 0

From 411a25667b4ec1f8b2780d1bce7e736f5f32b7b6 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 22 Oct 2024 22:59:49 +0100
Subject: [PATCH 147/218] prepare the "better scenario" indicated by never ran
 services cost gap

---
 ...dsion_by_officer_type_with_extra_budget.py |  5 +++
 ...inute_salary_and_extra_budget_frac_data.py | 31 ++++++++++++-------
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index cd87e1aec0..6240e42900 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -915,6 +915,11 @@ def hcw_time_or_cost_gap(time_cost_df=appt_time, count_df=num_never_ran_appts_by
         ['Dental', 'Laboratory', 'Mental', 'Radiography']
     ].sum(axis=1)
 
+    # # store the proportions of no expansion scenario as the "best" scenario that is to be tested
+    # hcw_cost_gap_percent_no_expansion = hcw_cost_gap_percent.loc[
+    #     's_1', ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']
+    # ].copy()  # [0.4586, 0.0272, 0.3502, 0.1476, 0.0164]
+
     # find appts that need Clinical + Pharmacy (+ Nursing_and_Midwifery)
     # then calculate hcw time needed for these appts (or treatments, need treatment and their appt footprint)
     # in never run set
diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index 4757231719..e66c069de6 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -53,9 +53,11 @@
 staff_cost.set_index('Officer_Category', inplace=True)
 staff_cost = staff_cost.reindex(index=cadre_all)
 
-# No expansion scenario, or zero-extra-budget-fraction scenario, "s_1"
-# Define the current cost fractions among all cadres as extra-budget-fraction scenario "s_2" \
+# No expansion scenario, or zero-extra-budget-fraction scenario, "s_0"
+# Define the current cost fractions among all cadres as extra-budget-fraction scenario "s_1" \
 # to be matched with Margherita's 4.2% scenario.
+# Add in the scenario that is indicated by hcw cost gap distribution \
+# resulted from never ran services in no expansion scenario, "s_2"
 # Define all other scenarios so that the extra budget fraction of each cadre, \
 # i.e., four main cadres and the "Other" cadre that groups up all other cadres, is the same (fair allocation)
 
@@ -63,7 +65,7 @@
 other_group = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography']
 
 # create scenarios
-combination_list = ['s_1', 's_2']  # two baseline scenarios
+combination_list = ['s_0', 's_1', 's_2']  # the three special scenarios
 for n in range(1, len(cadre_group)+1):
     for subset in itertools.combinations(cadre_group, n):
         combination_list.append(str(subset))  # other equal-fraction scenarios
@@ -71,24 +73,24 @@
 # cadre groups to expand
 cadre_to_expand = pd.DataFrame(index=cadre_group, columns=combination_list).fillna(0.0)
 for c in cadre_group:
-    for i in cadre_to_expand.columns[2:]:
+    for i in cadre_to_expand.columns[3:]:  # for all equal-fraction scenarios
         if c in i:
             cadre_to_expand.loc[c, i] = 1  # value 1 indicate the cadre group will be expanded
 
 # prepare auxiliary dataframe for equal extra budget fractions scenarios
 auxiliary = cadre_to_expand.copy()
-for i in auxiliary.columns[2:]:
+for i in auxiliary.columns[3:]:  # for all equal-fraction scenarios
     auxiliary.loc[:, i] = auxiliary.loc[:, i] / auxiliary.loc[:, i].sum()
-
+auxiliary.loc[:, 's_2'] = [0.4586, 0.0272, 0.3502, 0.1476, 0.0164]
 
 # define extra budget fracs for each cadre
 extra_budget_fracs = pd.DataFrame(index=cadre_all, columns=combination_list)
 assert (extra_budget_fracs.columns == auxiliary.columns).all()
 assert (extra_budget_fracs.index[0:4] == auxiliary.index[0:4]).all()
 
-extra_budget_fracs.loc[:, 's_1'] = 0
+extra_budget_fracs.loc[:, 's_0'] = 0
 assert (staff_cost.index == extra_budget_fracs.index).all()
-extra_budget_fracs.loc[:, 's_2'] = staff_cost.loc[:, 'cost_frac'].values
+extra_budget_fracs.loc[:, 's_1'] = staff_cost.loc[:, 'cost_frac'].values
 
 for i in extra_budget_fracs.columns[2:]:
     for c in extra_budget_fracs.index:
@@ -105,12 +107,12 @@
 # rename scenarios
 # make the scenario of equal fracs for all five cadre groups (i.e., the last column) to be s_3
 simple_scenario_name = {extra_budget_fracs.columns[-1]: 's_3'}
-for i in range(2, len(extra_budget_fracs.columns)-1):
-    simple_scenario_name[extra_budget_fracs.columns[i]] = 's_' + str(i+2)  # name scenario from s_4
+for i in range(3, len(extra_budget_fracs.columns)-1):
+    simple_scenario_name[extra_budget_fracs.columns[i]] = 's_' + str(i+1)  # name scenario from s_4
 extra_budget_fracs.rename(columns=simple_scenario_name, inplace=True)
 
 # reorder columns
-col_order = ['s_' + str(i) for i in range(1, len(extra_budget_fracs.columns)+1)]
+col_order = ['s_' + str(i) for i in range(0, len(extra_budget_fracs.columns))]
 assert len(col_order) == len(extra_budget_fracs.columns)
 extra_budget_fracs = extra_budget_fracs.reindex(columns=col_order)
 
@@ -163,7 +165,7 @@ def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFram
 
 # check the total cost after 11 years are increased as expected
 assert (
-    abs(total_cost.loc[2029, total_cost.columns[1:]] - (1 + 0.042) ** 11 * total_cost.loc[2029, 's_1']) < 1/1e7
+    abs(total_cost.loc[2029, total_cost.columns[1:]] - (1 + 0.042) ** 11 * total_cost.loc[2029, 's_0']) < 1/1e7
 ).all()
 
 # get the integrated scale up factors by the end of year 2029 and each scenario
@@ -175,6 +177,11 @@ def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFram
             scale_up_factor_dict[s][yr].loc[:, 'scale_up_factor'].values
         )
 
+# Checked that for s_2, the integrated scale up factors of C/N/P cadres are comparable with shortage estimates from \
+# She et al 2024: https://human-resources-health.biomedcentral.com/articles/10.1186/s12960-024-00949-2
+# todo: This might provide a short-cut way (no simulation, but mathematical calculation) to calculate \
+# an extra budget allocation scenario 's_2+' that is comparable with s_2.
+
 # # save and read pickle file
 # pickle_file_path = Path(resourcefilepath / 'healthsystem' / 'human_resources' / 'scaling_capabilities' /
 #                         'ResourceFile_HR_expansion_by_officer_type_yearly_scale_up_factors.pickle')

From c55076a67bebe373b01e385ed1e14e74e4fc1c22 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 23 Oct 2024 00:12:28 +0100
Subject: [PATCH 148/218] undo the changes for historical scaling

---
 ...panding_current_hcw_by_officer_type_with_extra_budget.py | 6 +++---
 src/tlo/methods/healthsystem.py                             | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 1efca4e900..2cb559ecc9 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -31,7 +31,7 @@ def __init__(self):
         super().__init__()
         self.seed = 0
         self.start_date = Date(2010, 1, 1)
-        self.end_date = Date(2031, 1, 1)
+        self.end_date = Date(2030, 1, 1)
         self.pop_size = 100_000  # todo: TBC
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
@@ -60,7 +60,7 @@ def draw_parameters(self, draw_number, rng):
     def _get_scenarios(self) -> Dict[str, Dict]:
         """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario."""
 
-        self.YEAR_OF_CHANGE = 2025  # This is the year to change run settings and to start hr expansion.
+        self.YEAR_OF_CHANGE = 2019  # This is the year to change run settings and to start hr expansion.
 
         self.scenarios = extra_budget_fracs
 
@@ -86,7 +86,7 @@ def _baseline(self) -> Dict:
                 'cons_availability': 'default',
                 'cons_availability_postSwitch': 'all',  # todo: how to argue for this setting?
                 'year_cons_availability_switch': self.YEAR_OF_CHANGE,
-                'yearly_HR_scaling_mode': 'historical_scaling',
+                'yearly_HR_scaling_mode': 'no_scaling',
                 'start_year_HR_expansion_by_officer_type': self.YEAR_OF_CHANGE,
                 'end_year_HR_expansion_by_officer_type': self.end_date.year,
                 "policy_name": "Naive",
diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 9a69785d04..c7075092f0 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -667,8 +667,8 @@ def read_parameters(self, data_folder):
             'Dental': 0, 'Laboratory': 0, 'Mental': 0, 'Nutrition': 0, 'Radiography': 0
         }
         self.parameters['HR_budget_growth_rate'] = 0.042
-        self.parameters['start_year_HR_expansion_by_officer_type'] = 2025
-        self.parameters['end_year_HR_expansion_by_officer_type'] = 2031
+        self.parameters['start_year_HR_expansion_by_officer_type'] = 2019
+        self.parameters['end_year_HR_expansion_by_officer_type'] = 2030
 
     def pre_initialise_population(self):
         """Generate the accessory classes used by the HealthSystem and pass to them the data that has been read."""

From 62e5f72cb6f19de50e17b1e0d7ce838523c0713d Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 23 Oct 2024 00:14:18 +0100
Subject: [PATCH 149/218] to test the "best" scenario

---
 ...f_expanding_current_hcw_by_officer_type_with_extra_budget.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 2cb559ecc9..11f3295b2d 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -62,7 +62,7 @@ def _get_scenarios(self) -> Dict[str, Dict]:
 
         self.YEAR_OF_CHANGE = 2019  # This is the year to change run settings and to start hr expansion.
 
-        self.scenarios = extra_budget_fracs
+        self.scenarios = extra_budget_fracs['s_2']  # test the "best" scenario
 
         return {
             self.scenarios.columns[i]:

From 603a4806ea2c834eea8ebee5f83004b9220516c0 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 23 Oct 2024 00:23:29 +0100
Subject: [PATCH 150/218] fix series have no columns

---
 ...f_expanding_current_hcw_by_officer_type_with_extra_budget.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 11f3295b2d..b6f32fdafc 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -62,7 +62,7 @@ def _get_scenarios(self) -> Dict[str, Dict]:
 
         self.YEAR_OF_CHANGE = 2019  # This is the year to change run settings and to start hr expansion.
 
-        self.scenarios = extra_budget_fracs['s_2']  # test the "best" scenario
+        self.scenarios = extra_budget_fracs['s_2'].to_frame()  # test the "best" scenario
 
         return {
             self.scenarios.columns[i]:

From a7e748b7bc5cf4624dc185c8d620e67c859d7115 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 23 Oct 2024 10:58:08 +0100
Subject: [PATCH 151/218] fix the issue of running only one scenario

---
 ..._expanding_current_hcw_by_officer_type_with_extra_budget.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index b6f32fdafc..8cb693d4e2 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -55,7 +55,8 @@ def modules(self):
                 [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)])  # todo: TBC
 
     def draw_parameters(self, draw_number, rng):
-        return list(self._scenarios.values())[draw_number]
+        if draw_number < len(self._scenarios):
+            return list(self._scenarios.values())[draw_number]
 
     def _get_scenarios(self) -> Dict[str, Dict]:
         """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario."""

From f189f17ff3ebfa6cec080f51b3ec785cc88f979b Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 24 Oct 2024 15:35:26 +0100
Subject: [PATCH 152/218] try add suumary logger of fraction of time used by
 officer, level and district

---
 src/tlo/methods/healthsystem.py | 86 ++++++++++++++++++++++++++++-----
 1 file changed, 73 insertions(+), 13 deletions(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index c7075092f0..e19bcdeb36 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -1914,24 +1914,36 @@ def log_current_capabilities_and_usage(self):
             comparison['Minutes_Used'].sum() / total_available if total_available > 0 else 0
         )
 
-        # Compute Fraction of Time Used In Each Facility
+        def compute_fraction_of_time_used(groups):
+            """
+            This will take in the groups for the groupby and calculate the fraction of time used for each group.
+            :param groups: list of groups
+            :return: dataframe with groups as the index and time measures as the columns
+            """
+            _summary = comparison.groupby(by=groups)[['Total_Minutes_Per_Day', 'Minutes_Used']].sum()
+            _summary['Fraction_Time_Used'] = (
+                _summary['Minutes_Used'] / _summary['Total_Minutes_Per_Day']
+            ).replace([np.inf, -np.inf, np.nan], 0.0)
+
+            return _summary
+
+        # Get facility id, officer, level, district groups
         facility_id = [_f.split('_')[1] for _f in comparison.index]
-        summary_by_fac_id = comparison.groupby(by=facility_id)[['Total_Minutes_Per_Day', 'Minutes_Used']].sum()
-        summary_by_fac_id['Fraction_Time_Used'] = (
-            summary_by_fac_id['Minutes_Used'] / summary_by_fac_id['Total_Minutes_Per_Day']
-        ).replace([np.inf, -np.inf, np.nan], 0.0)
-
-        # Compute Fraction of Time For Each Officer and level
         officer = [_f.rsplit('Officer_')[1] for _f in comparison.index]
         level = [self._facility_by_facility_id[int(_fac_id)].level for _fac_id in facility_id]
         level = list(map(lambda x: x.replace('1b', '2'), level))
-        summary_by_officer = comparison.groupby(by=[officer, level])[['Total_Minutes_Per_Day', 'Minutes_Used']].sum()
-        summary_by_officer['Fraction_Time_Used'] = (
-            summary_by_officer['Minutes_Used'] / summary_by_officer['Total_Minutes_Per_Day']
-        ).replace([np.inf, -np.inf, np.nan], 0.0)
+        district = [self._facility_by_facility_id[int(_fac_id)].name.split('_')[-1] for _fac_id in facility_id]
+
+        # Compute Fraction of Time Used In Each Facility
+        summary_by_fac_id = compute_fraction_of_time_used(facility_id)
+
+        # Compute Fraction of Time For Each Officer and Level
+        summary_by_officer = compute_fraction_of_time_used([officer, level])
         summary_by_officer.index.names = ['Officer_Type', 'Facility_Level']
 
-        # todo: Compute and log fraction of time used for each officer and level and district
+        # Compute Fraction of Time by Officer, Level and District
+        summary_by_officer_level_district = compute_fraction_of_time_used([officer, level, district])
+        summary_by_officer_level_district.index.names = ['Officer_Type', 'Facility_Level', 'District']
 
         logger.info(key='Capacity',
                     data={
@@ -1945,7 +1957,9 @@ def log_current_capabilities_and_usage(self):
 
         self._summary_counter.record_hs_status(
             fraction_time_used_across_all_facilities=fraction_time_used_overall,
-            fraction_time_used_by_officer_type_and_level=summary_by_officer["Fraction_Time_Used"].to_dict()
+            fraction_time_used_by_officer_type_and_level=summary_by_officer["Fraction_Time_Used"].to_dict(),
+            fraction_time_used_by_officer_level_district=summary_by_officer_level_district[
+                'Fraction_Time_Used'].to_dict(),
         )
 
     def remove_beddays_footprint(self, person_id):
@@ -2808,12 +2822,15 @@ def record_hs_status(
         self,
         fraction_time_used_across_all_facilities: float,
         fraction_time_used_by_officer_type_and_level: Dict[Tuple[str, int], float],
+        fraction_time_used_by_officer_level_district: Dict[Tuple[str, str, str], float],
     ) -> None:
         """Record a current status metric of the HealthSystem."""
         # The fraction of all healthcare worker time that is used:
         self._frac_time_used_overall.append(fraction_time_used_across_all_facilities)
         for officer_type_facility_level, fraction_time in fraction_time_used_by_officer_type_and_level.items():
             self._sum_of_daily_frac_time_used_by_officer_type_and_level[officer_type_facility_level] += fraction_time
+        for officer_level_district, fraction_time in fraction_time_used_by_officer_level_district.items():
+            self._sum_of_daily_frac_time_used_by_officer_level_district[officer_level_district] += fraction_time
 
     def write_to_log_and_reset_counters(self):
         """Log summary statistics reset the data structures. This usually occurs at the end of the year."""
@@ -2874,6 +2891,16 @@ def write_to_log_and_reset_counters(self):
                 self.frac_time_used_by_officer_type_and_level()),
         )
 
+        # Log mean of 'fraction time used by officer type and facility level and district' from daily entries from the
+        # previous year.
+        logger_summary.info(
+            key="Capacity_By_OfficerType_And_FacilityLevel_And_District",
+            description="The fraction of healthcare worker time that is used each day, averaged over this "
+                        "calendar year, for each officer type at each facility level in each district.",
+            data=flatten_multi_index_series_into_dict_for_logging(
+                self.frac_time_used_by_officer_level_district()),
+        )
+
         self._reset_internal_stores()
 
     def frac_time_used_by_officer_type_and_level(
@@ -2906,6 +2933,39 @@ def frac_time_used_by_officer_type_and_level(
                 data=mean_frac_time_used.values()
             ).sort_index()
 
+    def frac_time_used_by_officer_level_district(
+        self,
+        officer_type: Optional[str]=None,
+        level: Optional[str]=None,
+        district: Optional[str]=None,
+    ) -> Union[float, pd.Series]:
+        """Average fraction of time used by officer, level and district since last reset.
+        If `officer_type` and/or `level` and/or 'district' is not provided (left to default to `None`),
+        then a pd.Series with a multi-index is returned giving the result for all officer_types/levels/districts."""
+
+        if (officer_type is not None) and (level is not None) and (district is not None):
+            return (
+                self._sum_of_daily_frac_time_used_by_officer_level_district[officer_type, level, district]
+                / len(self._frac_time_used_overall)
+                # Use len(self._frac_time_used_overall) as proxy for number of days in past year.
+            )
+        else:
+            # Return multiple in the form of a pd.Series with multiindex
+            mean_frac_time_used = {
+                (_officer_type, _level, _district): v / len(self._frac_time_used_overall)
+                for (_officer_type, _level, _district), v in self._sum_of_daily_frac_time_used_by_officer_level_district.items()
+                if (_officer_type == officer_type or officer_type is None) and (_level == level or level is None) and (
+                    _district == district or district is None)
+            }
+            return pd.Series(
+                index=pd.MultiIndex.from_tuples(
+                    mean_frac_time_used.keys(),
+                    names=['OfficerType', 'FacilityLevel', 'District']
+                ),
+                data=mean_frac_time_used.values()
+            ).sort_index()
+
+
 class HealthSystemChangeParameters(Event, PopulationScopeEventMixin):
     """Event that causes certain internal parameters of the HealthSystem to be changed; specifically:
         * `mode_appt_constraints`

From 30261f9263a428cb13396575e91b9d1fc378c12a Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 24 Oct 2024 16:23:47 +0100
Subject: [PATCH 153/218] add fraction of time used by officer, level and
 district to summary counter

---
 src/tlo/methods/healthsystem.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index e19bcdeb36..83787c804b 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -2769,6 +2769,7 @@ def _reset_internal_stores(self) -> None:
 
         self._frac_time_used_overall = []  # Running record of the usage of the healthcare system
         self._sum_of_daily_frac_time_used_by_officer_type_and_level = Counter()
+        self._sum_of_daily_frac_time_used_by_officer_level_district = Counter()
         self._squeeze_factor_by_hsi_event_name = defaultdict(list)  # Running record the squeeze-factor applying to each
         #                                                           treatment_id. Key is of the form:
         #                                                           "<TREATMENT_ID>:<HSI_EVENT_NAME>"

From 1df79e568c1414da828fbb24115244e4bde01912 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 24 Oct 2024 17:30:04 +0100
Subject: [PATCH 154/218] try add summary logger of appt num by appt and
 facility id

---
 src/tlo/methods/healthsystem.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 83787c804b..89a3a3c2cb 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -1832,7 +1832,7 @@ def write_to_hsi_log(
                 squeeze_factor=squeeze_factor,
                 appt_footprint=event_details.appt_footprint,
                 level=event_details.facility_level,
-                # todo: to log the facility ID to get district level info
+                fac_id=facility_id if facility_id is not None else -99,
             )
 
     def call_and_record_never_ran_hsi_event(self, hsi_event, priority=None):
@@ -1891,6 +1891,7 @@ def write_to_never_ran_hsi_log(
             hsi_event_name=event_details.event_name,
             appt_footprint=event_details.appt_footprint,
             level=event_details.facility_level,
+            fac_id=facility_id if facility_id is not None else -99,
         )
 
     def log_current_capabilities_and_usage(self):
@@ -2761,11 +2762,14 @@ def _reset_internal_stores(self) -> None:
         self._no_blank_appt_treatment_ids = defaultdict(int)  # As above, but for `HSI_Event`s with non-blank footprint
         self._no_blank_appt_appts = defaultdict(int)  # As above, but for `HSI_Event`s that with non-blank footprint
         self._no_blank_appt_by_level = {_level: defaultdict(int) for _level in ('0', '1a', '1b', '2', '3', '4')}
+        fac_ids = list(self._facility_by_facility_id.keys()) + [-99]
+        self._no_blank_appt_by_fac_id = {_fac_id: defaultdict(int) for _fac_id in fac_ids}
 
         # Log HSI_Events that never ran to monitor shortcoming of Health System
         self._never_ran_treatment_ids = defaultdict(int)  # As above, but for `HSI_Event`s that never ran
         self._never_ran_appts = defaultdict(int)  # As above, but for `HSI_Event`s that have never ran
         self._never_ran_appts_by_level = {_level: defaultdict(int) for _level in ('0', '1a', '1b', '2', '3', '4')}
+        self._no_blank_appt_by_fac_id = {_fac_id: defaultdict(int) for _fac_id in fac_ids}
 
         self._frac_time_used_overall = []  # Running record of the usage of the healthcare system
         self._sum_of_daily_frac_time_used_by_officer_type_and_level = Counter()
@@ -2779,7 +2783,8 @@ def record_hsi_event(self,
                          hsi_event_name: str,
                          squeeze_factor: float,
                          appt_footprint: Counter,
-                         level: str
+                         level: str,
+                         fac_id: int,
                          ) -> None:
         """Add information about an `HSI_Event` to the running summaries."""
 
@@ -2802,12 +2807,14 @@ def record_hsi_event(self,
             for appt_type, number in appt_footprint:
                 self._no_blank_appt_appts[appt_type] += number
                 self._no_blank_appt_by_level[level][appt_type] += number
+                self._no_blank_appt_by_fac_id[fac_id][appt_type] += number
 
     def record_never_ran_hsi_event(self,
                                    treatment_id: str,
                                    hsi_event_name: str,
                                    appt_footprint: Counter,
-                                   level: str
+                                   level: str,
+                                   fac_id: int,
                                    ) -> None:
         """Add information about a never-ran `HSI_Event` to the running summaries."""
 
@@ -2818,6 +2825,7 @@ def record_never_ran_hsi_event(self,
         for appt_type, number in appt_footprint:
             self._never_ran_appts[appt_type] += number
             self._never_ran_appts_by_level[level][appt_type] += number
+            self._never_ran_appts_by_fac_id[fac_id][appt_type] += number
 
     def record_hs_status(
         self,

From b17b774343d2ba74a04b9c7a2b85115fcf7b3328 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 24 Oct 2024 19:11:53 +0100
Subject: [PATCH 155/218] modify summary logger of appt num by appt and
 facility id

---
 src/tlo/methods/healthsystem.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 89a3a3c2cb..54b470d94b 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -2769,7 +2769,7 @@ def _reset_internal_stores(self) -> None:
         self._never_ran_treatment_ids = defaultdict(int)  # As above, but for `HSI_Event`s that never ran
         self._never_ran_appts = defaultdict(int)  # As above, but for `HSI_Event`s that have never ran
         self._never_ran_appts_by_level = {_level: defaultdict(int) for _level in ('0', '1a', '1b', '2', '3', '4')}
-        self._no_blank_appt_by_fac_id = {_fac_id: defaultdict(int) for _fac_id in fac_ids}
+        self._never_ran_appts_by_fac_id = {_fac_id: defaultdict(int) for _fac_id in fac_ids}
 
         self._frac_time_used_overall = []  # Running record of the usage of the healthcare system
         self._sum_of_daily_frac_time_used_by_officer_type_and_level = Counter()
@@ -2862,9 +2862,10 @@ def write_to_log_and_reset_counters(self):
             key="HSI_Event_non_blank_appt_footprint",
             description="Same as for key 'HSI_Event' but limited to HSI_Event that have non-blank footprints",
             data={
-            "TREATMENT_ID": self._no_blank_appt_treatment_ids,
-            "Number_By_Appt_Type_Code": self._no_blank_appt_appts,
-            "Number_By_Appt_Type_Code_And_Level": self._no_blank_appt_by_level,
+                "TREATMENT_ID": self._no_blank_appt_treatment_ids,
+                "Number_By_Appt_Type_Code": self._no_blank_appt_appts,
+                "Number_By_Appt_Type_Code_And_Level": self._no_blank_appt_by_level,
+                "Number_By_Appt_Type_Code_And_FacilityID": self._no_blank_appt_by_fac_id,
             },
         )
 
@@ -2877,6 +2878,7 @@ def write_to_log_and_reset_counters(self):
                 "TREATMENT_ID": self._never_ran_treatment_ids,
                 "Number_By_Appt_Type_Code": self._never_ran_appts,
                 "Number_By_Appt_Type_Code_And_Level": self._never_ran_appts_by_level,
+                "Number_By_Appt_Type_Code_And_FacilityID": self._never_ran_appts_by_fac_id,
             },
         )
 

From d231bdbe01b7155fd1d564280a24e872c76ca95e Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 24 Oct 2024 22:39:26 +0100
Subject: [PATCH 156/218] modify summary logger of appt num by appt and
 facility id

---
 src/tlo/methods/healthsystem.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 54b470d94b..65ed2ed60a 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -2762,14 +2762,13 @@ def _reset_internal_stores(self) -> None:
         self._no_blank_appt_treatment_ids = defaultdict(int)  # As above, but for `HSI_Event`s with non-blank footprint
         self._no_blank_appt_appts = defaultdict(int)  # As above, but for `HSI_Event`s that with non-blank footprint
         self._no_blank_appt_by_level = {_level: defaultdict(int) for _level in ('0', '1a', '1b', '2', '3', '4')}
-        fac_ids = list(self._facility_by_facility_id.keys()) + [-99]
-        self._no_blank_appt_by_fac_id = {_fac_id: defaultdict(int) for _fac_id in fac_ids}
+        self._no_blank_appt_by_fac_id = {_fac_id: defaultdict(int) for _fac_id in range(133)}  # 133 facilities
 
         # Log HSI_Events that never ran to monitor shortcoming of Health System
         self._never_ran_treatment_ids = defaultdict(int)  # As above, but for `HSI_Event`s that never ran
         self._never_ran_appts = defaultdict(int)  # As above, but for `HSI_Event`s that have never ran
         self._never_ran_appts_by_level = {_level: defaultdict(int) for _level in ('0', '1a', '1b', '2', '3', '4')}
-        self._never_ran_appts_by_fac_id = {_fac_id: defaultdict(int) for _fac_id in fac_ids}
+        self._never_ran_appts_by_fac_id = {_fac_id: defaultdict(int) for _fac_id in range(133)}  # 133 facilities
 
         self._frac_time_used_overall = []  # Running record of the usage of the healthcare system
         self._sum_of_daily_frac_time_used_by_officer_type_and_level = Counter()

From 77e26abc68f254a2f17a0bf1c6b17ab2d95002cd Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 24 Oct 2024 23:09:19 +0100
Subject: [PATCH 157/218] a possible issue

---
 src/tlo/methods/healthsystem.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 65ed2ed60a..e6f9a97cd1 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -3106,7 +3106,7 @@ def apply(self, population):
             matches = re.match(pattern, officer)
             # Extract ID and officer type from
             facility_id = int(matches.group(1))
-            district = self.module._facility_by_facility_id[facility_id].district
+            district = self.module._facility_by_facility_id[facility_id].district  # todo: check if district callable
             if district in HR_scaling_factor_by_district:
                 self.module._daily_capabilities[officer] *= HR_scaling_factor_by_district[district]
 

From d73a6e997af7ccd1518b749f621c2fcbcd8977c2 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 24 Oct 2024 23:42:41 +0100
Subject: [PATCH 158/218] add summary logger of fraction of time used by
 officer district

---
 src/tlo/methods/healthsystem.py | 50 +++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index e6f9a97cd1..d951aa71ae 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -1942,6 +1942,10 @@ def compute_fraction_of_time_used(groups):
         summary_by_officer = compute_fraction_of_time_used([officer, level])
         summary_by_officer.index.names = ['Officer_Type', 'Facility_Level']
 
+        # Compute raction of Time For Each Officer and District
+        summary_by_officer_district = compute_fraction_of_time_used([officer, district])
+        summary_by_officer_district.index.names = ['Officer_Type', 'District']
+
         # Compute Fraction of Time by Officer, Level and District
         summary_by_officer_level_district = compute_fraction_of_time_used([officer, level, district])
         summary_by_officer_level_district.index.names = ['Officer_Type', 'Facility_Level', 'District']
@@ -1959,6 +1963,7 @@ def compute_fraction_of_time_used(groups):
         self._summary_counter.record_hs_status(
             fraction_time_used_across_all_facilities=fraction_time_used_overall,
             fraction_time_used_by_officer_type_and_level=summary_by_officer["Fraction_Time_Used"].to_dict(),
+            fraction_time_used_by_officer_district=summary_by_officer_district["Fraction_Time_Used"].to_dict(),
             fraction_time_used_by_officer_level_district=summary_by_officer_level_district[
                 'Fraction_Time_Used'].to_dict(),
         )
@@ -2772,6 +2777,7 @@ def _reset_internal_stores(self) -> None:
 
         self._frac_time_used_overall = []  # Running record of the usage of the healthcare system
         self._sum_of_daily_frac_time_used_by_officer_type_and_level = Counter()
+        self._sum_of_daily_frac_time_used_by_officer_district = Counter()
         self._sum_of_daily_frac_time_used_by_officer_level_district = Counter()
         self._squeeze_factor_by_hsi_event_name = defaultdict(list)  # Running record the squeeze-factor applying to each
         #                                                           treatment_id. Key is of the form:
@@ -2830,6 +2836,7 @@ def record_hs_status(
         self,
         fraction_time_used_across_all_facilities: float,
         fraction_time_used_by_officer_type_and_level: Dict[Tuple[str, int], float],
+        fraction_time_used_by_officer_district: Dict[Tuple[str, str], float],
         fraction_time_used_by_officer_level_district: Dict[Tuple[str, str, str], float],
     ) -> None:
         """Record a current status metric of the HealthSystem."""
@@ -2837,6 +2844,8 @@ def record_hs_status(
         self._frac_time_used_overall.append(fraction_time_used_across_all_facilities)
         for officer_type_facility_level, fraction_time in fraction_time_used_by_officer_type_and_level.items():
             self._sum_of_daily_frac_time_used_by_officer_type_and_level[officer_type_facility_level] += fraction_time
+        for officer_district, fraction_time in fraction_time_used_by_officer_district.items():
+            self._sum_of_daily_frac_time_used_by_officer_district[officer_district] += fraction_time
         for officer_level_district, fraction_time in fraction_time_used_by_officer_level_district.items():
             self._sum_of_daily_frac_time_used_by_officer_level_district[officer_level_district] += fraction_time
 
@@ -2901,6 +2910,16 @@ def write_to_log_and_reset_counters(self):
                 self.frac_time_used_by_officer_type_and_level()),
         )
 
+        # Log mean of 'fraction time used by officer type and district' from daily entries from the previous
+        # year.
+        logger_summary.info(
+            key="Capacity_By_OfficerType_And_District",
+            description="The fraction of healthcare worker time that is used each day, averaged over this "
+                        "calendar year, for each officer type in each district.",
+            data=flatten_multi_index_series_into_dict_for_logging(
+                self.frac_time_used_by_officer_district()),
+        )
+
         # Log mean of 'fraction time used by officer type and facility level and district' from daily entries from the
         # previous year.
         logger_summary.info(
@@ -2943,6 +2962,37 @@ def frac_time_used_by_officer_type_and_level(
                 data=mean_frac_time_used.values()
             ).sort_index()
 
+    def frac_time_used_by_officer_district(
+        self,
+        officer_type: Optional[str]=None,
+        district: Optional[str]=None,
+    ) -> Union[float, pd.Series]:
+        """Average fraction of time used by officer type and district since last reset.
+        If `officer_type` and/or `district` is not provided (left to default to `None`) then a pd.Series with a multi-index
+        is returned giving the result for all officer_types/levels."""
+
+        if (officer_type is not None) and (district is not None):
+            return (
+                self._sum_of_daily_frac_time_used_by_officer_district[officer_type, district]
+                / len(self._frac_time_used_overall)
+                # Use len(self._frac_time_used_overall) as proxy for number of days in past year.
+            )
+        else:
+            # Return multiple in the form of a pd.Series with multiindex
+            mean_frac_time_used = {
+                (_officer_type, _district): v / len(self._frac_time_used_overall)
+                for (_officer_type, _district), v in self._sum_of_daily_frac_time_used_by_officer_district.items()
+                if (_officer_type == officer_type or officer_type is None) and (
+                    _district == district or district is None)
+            }
+            return pd.Series(
+                index=pd.MultiIndex.from_tuples(
+                    mean_frac_time_used.keys(),
+                    names=['OfficerType', 'District']
+                ),
+                data=mean_frac_time_used.values()
+            ).sort_index()
+
     def frac_time_used_by_officer_level_district(
         self,
         officer_type: Optional[str]=None,

From 83b05048cfe23a0ccf1df09c765514c6fd9ee01f Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 25 Oct 2024 10:32:11 +0100
Subject: [PATCH 159/218] try fixing error of non-usual facilities

---
 src/tlo/methods/healthsystem.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index d951aa71ae..4eeb6bb8c8 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -2767,13 +2767,14 @@ def _reset_internal_stores(self) -> None:
         self._no_blank_appt_treatment_ids = defaultdict(int)  # As above, but for `HSI_Event`s with non-blank footprint
         self._no_blank_appt_appts = defaultdict(int)  # As above, but for `HSI_Event`s that with non-blank footprint
         self._no_blank_appt_by_level = {_level: defaultdict(int) for _level in ('0', '1a', '1b', '2', '3', '4')}
-        self._no_blank_appt_by_fac_id = {_fac_id: defaultdict(int) for _fac_id in range(133)}  # 133 facilities
+        fac_ids = list(range(133)) + [-1, -99]  # 133 "real" facilities + 2 dummy facilities
+        self._no_blank_appt_by_fac_id = {_fac_id: defaultdict(int) for _fac_id in fac_ids}
 
         # Log HSI_Events that never ran to monitor shortcoming of Health System
         self._never_ran_treatment_ids = defaultdict(int)  # As above, but for `HSI_Event`s that never ran
         self._never_ran_appts = defaultdict(int)  # As above, but for `HSI_Event`s that have never ran
         self._never_ran_appts_by_level = {_level: defaultdict(int) for _level in ('0', '1a', '1b', '2', '3', '4')}
-        self._never_ran_appts_by_fac_id = {_fac_id: defaultdict(int) for _fac_id in range(133)}  # 133 facilities
+        self._never_ran_appts_by_fac_id = {_fac_id: defaultdict(int) for _fac_id in fac_ids}
 
         self._frac_time_used_overall = []  # Running record of the usage of the healthcare system
         self._sum_of_daily_frac_time_used_by_officer_type_and_level = Counter()

From ce98147e8d2f49d82b38173e45307465fe371473 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 25 Oct 2024 10:58:12 +0100
Subject: [PATCH 160/218] add a small comment on rescaling capabilities

---
 src/tlo/methods/healthsystem.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 4eeb6bb8c8..feb8d3afbd 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -1104,6 +1104,9 @@ def _rescale_capabilities_to_capture_effective_capability(self):
             level = self._facility_by_facility_id[facility_id].level
             # Only rescale if rescaling factor is greater than 1 (i.e. don't reduce
             # available capabilities if these were under-used the previous year).
+            # Later, we might want to rescale capabilities by rescaling factor of officer type and facility id
+            # (i.e., officer type, district and level specific),
+            # which will need fraction of time used by officer type and facility id.
             rescaling_factor = self._summary_counter.frac_time_used_by_officer_type_and_level(
                 officer_type=officer_type, level=level
             )

From 4f6583e667a81e6edafab0bbf371c8af23b475bf Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 25 Oct 2024 11:04:15 +0100
Subject: [PATCH 161/218] add todo tasks for RescalingHRCapabilities_ByDistrict

---
 src/tlo/methods/healthsystem.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index feb8d3afbd..3ce379d40b 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -3153,6 +3153,8 @@ def apply(self, population):
         HR_scaling_factor_by_district = self.module.parameters['HR_scaling_by_district_table'][
             self.module.parameters['HR_scaling_by_district_mode']
         ].set_index('District').to_dict()
+        # todo: add entries for facilities at and beyond level 3,
+        #  so that the district list would match the facility IDs fully.
 
         pattern = r"FacilityID_(\w+)_Officer_(\w+)"
 
@@ -3160,7 +3162,9 @@ def apply(self, population):
             matches = re.match(pattern, officer)
             # Extract ID and officer type from
             facility_id = int(matches.group(1))
-            district = self.module._facility_by_facility_id[facility_id].district  # todo: check if district callable
+            district = self.module._facility_by_facility_id[facility_id].district
+            # todo: check if district callable; a fix might be
+            #  district = self.module._facility_by_facility_id[facility_id].name.split('_')[-1]
             if district in HR_scaling_factor_by_district:
                 self.module._daily_capabilities[officer] *= HR_scaling_factor_by_district[district]
 

From 2f92ddcac190c688800c425642d1f2688666d40a Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 28 Oct 2024 11:00:29 +0000
Subject: [PATCH 162/218] plots for representative scenarios inc. the gap
 allocation

---
 ...dsion_by_officer_type_with_extra_budget.py | 198 ++++++++++--------
 1 file changed, 115 insertions(+), 83 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 6240e42900..725514b621 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -36,8 +36,9 @@
 
 # rename scenarios
 substitute_labels = {
-    's_1': 'no_expansion',
-    's_2': 'all_cadres_current_allocation',
+    's_0': 'no_extra_budget_allocation',
+    's_1': 'all_cadres_current_allocation',
+    's_2': 'all_cadres_gap_allocation',
     's_3': 'all_cadres_equal_allocation',
     's_4': 'Clinical (C)', 's_5': 'DCSA (D)', 's_6': 'Nursing_and_Midwifery (N&M)', 's_7': 'Pharmacy (P)',
     's_8': 'Other (O)',
@@ -51,8 +52,8 @@
 
 # group scenarios for presentation
 scenario_groups_init = {
-    'no_expansion': {'s_1'},
-    'all_cadres_expansion': {'s_2', 's_3'},
+    'no_expansion': {'s_0'},
+    'all_cadres_expansion': {'s_1', 's_2', 's_3'},
     'one_cadre_expansion': {'s_4', 's_5', 's_6', 's_7', 's_8'},
     'two_cadres_expansion': {'s_9', 's_10', 's_11', 's_12', 's_13',
                              's_14', 's_15', 's_16', 's_17', 's_18'},
@@ -63,10 +64,10 @@
 
 # group scenarios based on whether expand Clinical/Pharmacy
 scenario_groups = {
-    'C + P + D/N&M/O/None': {'s_2', 's_3', 's_11', 's_20', 's_22', 's_24', 's_29', 's_31', 's_32'},
+    'C + P + D/N&M/O/None': {'s_1', 's_2', 's_3', 's_11', 's_20', 's_22', 's_24', 's_29', 's_31', 's_32'},
     'C + D/N&M/O/None': {'s_4', 's_9', 's_10', 's_12', 's_19', 's_21', 's_23', 's_30'},
     'P + D/N&M/O/None': {'s_7', 's_14', 's_16', 's_18', 's_25', 's_27', 's_28', 's_33'},
-    'D/N&M/O/None': {'s_5', 's_6', 's_8', 's_13', 's_15', 's_17', 's_26', 's_1'}
+    'D/N&M/O/None': {'s_5', 's_6', 's_8', 's_13', 's_15', 's_17', 's_26', 's_0'}
 }
 
 
@@ -418,7 +419,7 @@ def get_hcw_time_by_treatment(draw=21):
         return increased_time_by_cadre_treatment
 
     # Get parameter/scenario names
-    param_names = get_parameter_names_from_scenario_file()
+    param_names = ('s_0', 's_1', 's_2', 's_3', 's_11', 's_22')  # get_parameter_names_from_scenario_file()
 
     # Define cadres in order
     cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
@@ -468,8 +469,8 @@ def get_hcw_time_by_treatment(draw=21):
     # total extra cost of all expansion years
     extra_cost_all_yrs = total_cost_all_yrs.copy()
     for s in param_names[1:]:
-        extra_cost_all_yrs.loc[s, :] = total_cost_all_yrs.loc[s, :] - total_cost_all_yrs.loc['s_1', :]
-    extra_cost_all_yrs.drop(index='s_1', inplace=True)
+        extra_cost_all_yrs.loc[s, :] = total_cost_all_yrs.loc[s, :] - total_cost_all_yrs.loc['s_0', :]
+    extra_cost_all_yrs.drop(index='s_0', inplace=True)
 
     # get staff count = total cost / salary
     staff_count = total_cost.copy()
@@ -493,7 +494,7 @@ def get_hcw_time_by_treatment(draw=21):
     for s in param_names[1:]:
         assert (abs(
             total_cost.loc[(total_cost.year == 2029) & (total_cost.draw == s), 'all_cadres'].values[0] -
-            (1 + 0.042) ** len(years) * total_cost.loc[(total_cost.year == 2019) & (total_cost.draw == 's_1'),
+            (1 + 0.042) ** len(years) * total_cost.loc[(total_cost.year == 2019) & (total_cost.draw == 's_0'),
                                                        'all_cadres'].values[0]
         ) < 1e6).all()
 
@@ -687,9 +688,9 @@ def get_hcw_time_by_treatment(draw=21):
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
                 num_services.loc[0],
-                comparison='s_1')
+                comparison='s_0')
         ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
 
     # num_services_increased_percent = summarize(
     #     pd.DataFrame(
@@ -705,60 +706,60 @@ def get_hcw_time_by_treatment(draw=21):
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
                 num_deaths.loc[0],
-                comparison='s_1')
+                comparison='s_0')
         ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
 
     num_deaths_averted_percent = summarize(
         -1.0 *
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
                 num_deaths.loc[0],
-                comparison='s_1',
+                comparison='s_0',
                 scaled=True)
         ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
 
     num_dalys_averted = summarize(
         -1.0 *
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
                 num_dalys.loc[0],
-                comparison='s_1')
+                comparison='s_0')
         ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
 
     num_dalys_averted_percent = summarize(
         -1.0 *
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
                 num_dalys.loc[0],
-                comparison='s_1',
+                comparison='s_0',
                 scaled=True
             )
         ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
 
     num_dalys_by_cause_averted = summarize(
         -1.0 * find_difference_relative_to_comparison_dataframe(
             num_dalys_by_cause,
-            comparison='s_1',
+            comparison='s_0',
         ),
         only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
 
     num_dalys_by_cause_averted_percent = summarize(
         -1.0 * find_difference_relative_to_comparison_dataframe(
             num_dalys_by_cause,
-            comparison='s_1',
+            comparison='s_0',
             scaled=True
         ),
         only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
 
-    num_dalys_by_cause_averted_CNP = num_dalys_by_cause_averted.loc['s_22', :].sort_values(ascending=False)
+    num_dalys_by_cause_averted_CNP = num_dalys_by_cause_averted.loc['s_2', :].sort_values(ascending=False)
     # num_dalys_by_cause_averted_CP = num_dalys_by_cause_averted.loc['s_11', :].sort_values(ascending=False)
-    num_dalys_by_cause_averted_percent_CNP = num_dalys_by_cause_averted_percent.loc['s_22', :].sort_values(
+    num_dalys_by_cause_averted_percent_CNP = num_dalys_by_cause_averted_percent.loc['s_2', :].sort_values(
         ascending=False)
     # num_dalys_by_cause_averted__percent_CP = num_dalys_by_cause_averted_percent.loc['s_11', :].sort_values(
     #     ascending=False)
@@ -775,10 +776,10 @@ def get_hcw_time_by_treatment(draw=21):
     num_appts_increased = summarize(
         find_difference_relative_to_comparison_dataframe(
             num_appts,
-            comparison='s_1',
+            comparison='s_0',
         ),
         only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
 
     # num_never_ran_appts_reduced = summarize(
     #     -1.0 * find_difference_relative_to_comparison_dataframe(
@@ -970,7 +971,7 @@ def find_never_ran_appts_that_need_specific_cadres():
         _proportions_total = _counts.sum(axis=1) / _counts_all.sum(axis=1)
         _cost_gap_proportions_total = _cost_gap.sum(axis=1) / hcw_cost_gap.sum(axis=1)
 
-        return _proportions_total, _cost_gap_proportions_total, _cost_gap_percent
+        return _proportions_total, _cost_gap_proportions_total, _cost_gap
 
     never_ran_appts_info_that_need_CNP = get_never_ran_appts_info_that_need_specific_cadres(
         cadres_to_find=['Clinical', 'Nursing_and_Midwifery', 'Pharmacy'])
@@ -998,6 +999,17 @@ def find_never_ran_appts_that_need_specific_cadres():
     p_cost['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[1]
     p_cost['Other cases'] = 1 - p_cost[p_cost.columns[0:7]].sum(axis=1)
 
+    # absolute cost gap within never ran appts
+    a_cost = pd.DataFrame(index=num_services_summarized.index)
+    a_cost['C + N&M + P'] = never_ran_appts_info_that_need_CNP[2].sum(axis=1)
+    a_cost['C + P'] = never_ran_appts_info_that_need_CP[2].sum(axis=1)
+    a_cost['C + N&M'] = never_ran_appts_info_that_need_CN[2].sum(axis=1)
+    a_cost['N&M + P'] = never_ran_appts_info_that_need_NP[2].sum(axis=1)
+    a_cost['Clinical (C)'] = never_ran_appts_info_that_need_C[2].sum(axis=1)
+    a_cost['Pharmacy (P)'] = never_ran_appts_info_that_need_P[2].sum(axis=1)
+    a_cost['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[2].sum(axis=1)
+    a_cost['Other cases'] = hcw_cost_gap.sum(axis=1) - a_cost.sum(axis=1)
+
     # appts count proportions within never ran appts, in total of all cadres
     p_count = pd.DataFrame(index=num_services_summarized.index)
     p_count['C + N&M + P'] = never_ran_appts_info_that_need_CNP[0]
@@ -1085,10 +1097,10 @@ def find_never_ran_appts_that_need_specific_cadres():
             if s in scenario_groups[k]:
                 scenario_color[s] = scenario_groups_color[k]
 
-    best_scenarios_color = {'s_1': 'black'}
-    cmap_list = list(map(plt.get_cmap("Set1"), range(9)))
-    for i in range(9):
-        best_scenarios_color[num_dalys_summarized.index[i]] = cmap_list[i]
+    # representative_scenarios_color = {}
+    # cmap_list = list(map(plt.get_cmap("Set3"), range(len(param_names))))
+    # for i in range(len(param_names)):
+    #     representative_scenarios_color[num_dalys_summarized.index[i]] = cmap_list[i]
 
     # plot 4D data: relative increases of Clinical, Pharmacy, and Nursing_and_Midwifery as three coordinates,\
     # percentage of DALYs averted decides the color of that scatter point
@@ -1096,7 +1108,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     extra_budget_allocation['Other'] = extra_budget_allocation[
         ['Dental', 'Laboratory', 'Mental', 'Radiography']
     ].sum(axis=1)
-    name_of_plot = f'3D DALYs averted (%) against no expansion, {target_period()}'
+    name_of_plot = f'3D DALYs averted (%) vs no extra budget allocation, {target_period()}'
     heat_data = pd.merge(num_dalys_averted_percent['mean'],
                          extra_budget_allocation[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
                          left_index=True, right_index=True, how='inner')
@@ -1267,37 +1279,37 @@ def find_never_ran_appts_that_need_specific_cadres():
     # fig.show()
     # plt.close(fig)
 
-    # do some linear regression to see the marginal effects of individual cadres and combined effects of C, N, P cadres
-    outcome_data = num_dalys_averted_percent['mean']
-    # outcome = num_services_increased_percent['mean']
-    # outcome = num_treatments_total_increased_percent['mean']
-    regression_data = pd.merge(outcome_data,
-                               extra_budget_allocation,
-                               left_index=True, right_index=True, how='inner')
-    regression_data['C*P'] = regression_data['Clinical'] * regression_data['Pharmacy']
-    regression_data['C*N'] = regression_data['Clinical'] * regression_data['Nursing_and_Midwifery']
-    regression_data['N*P'] = regression_data['Pharmacy'] * regression_data['Nursing_and_Midwifery']
-    regression_data['C*N*P'] = (regression_data['Clinical'] * regression_data['Pharmacy']
-                                * regression_data['Nursing_and_Midwifery'])
-    cadres_to_drop_due_to_multicollinearity = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography', 'Other']
-    regression_data.drop(columns=cadres_to_drop_due_to_multicollinearity, inplace=True)
-    predictor = regression_data[regression_data.columns[1:]]
-    outcome = regression_data['mean']
-    predictor = sm.add_constant(predictor)
-    est = sm.OLS(outcome.astype(float), predictor.astype(float)).fit()
-    print(est.summary())
+    # # do some linear regression to see the marginal effects of individual cadres and combined effects of C, N, P cadres
+    # outcome_data = num_dalys_averted_percent['mean']
+    # # outcome = num_services_increased_percent['mean']
+    # # outcome = num_treatments_total_increased_percent['mean']
+    # regression_data = pd.merge(outcome_data,
+    #                            extra_budget_allocation,
+    #                            left_index=True, right_index=True, how='inner')
+    # regression_data['C*P'] = regression_data['Clinical'] * regression_data['Pharmacy']
+    # regression_data['C*N'] = regression_data['Clinical'] * regression_data['Nursing_and_Midwifery']
+    # regression_data['N*P'] = regression_data['Pharmacy'] * regression_data['Nursing_and_Midwifery']
+    # regression_data['C*N*P'] = (regression_data['Clinical'] * regression_data['Pharmacy']
+    #                             * regression_data['Nursing_and_Midwifery'])
+    # cadres_to_drop_due_to_multicollinearity = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography', 'Other']
+    # regression_data.drop(columns=cadres_to_drop_due_to_multicollinearity, inplace=True)
+    # predictor = regression_data[regression_data.columns[1:]]
+    # outcome = regression_data['mean']
+    # predictor = sm.add_constant(predictor)
+    # est = sm.OLS(outcome.astype(float), predictor.astype(float)).fit()
+    # print(est.summary())
 
     # todo: could do regression analysis of DALYs averted and Services increased
 
-    # do anova analysis to test the difference of scenario groups
-    def anova_oneway(df=num_dalys_averted_percent):
-        best = df.loc[list(scenario_groups['C + P + D/N&M/O/None']), 'mean']
-        middle_C = df.loc[list(scenario_groups['C + D/N&M/O/None']), 'mean']
-        middle_P = df.loc[list(scenario_groups['P + D/N&M/O/None']), 'mean']
-        worst = df.loc[df.index.isin(scenario_groups['D/N&M/O/None']), 'mean']
-
-        return ss.oneway.anova_oneway((best, middle_C, middle_P, worst),
-                                      groups=None, use_var='unequal', welch_correction=True, trim_frac=0)
+    # # do anova analysis to test the difference of scenario groups
+    # def anova_oneway(df=num_dalys_averted_percent):
+    #     best = df.loc[list(scenario_groups['C + P + D/N&M/O/None']), 'mean']
+    #     middle_C = df.loc[list(scenario_groups['C + D/N&M/O/None']), 'mean']
+    #     middle_P = df.loc[list(scenario_groups['P + D/N&M/O/None']), 'mean']
+    #     worst = df.loc[df.index.isin(scenario_groups['D/N&M/O/None']), 'mean']
+    #
+    #     return ss.oneway.anova_oneway((best, middle_C, middle_P, worst),
+    #                                   groups=None, use_var='unequal', welch_correction=True, trim_frac=0)
 
     # anova_dalys = anova_oneway()
     # anova_services = anova_oneway(num_services_increased_percent)
@@ -1540,7 +1552,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     # fig.show()
     # plt.close(fig)
 
-    name_of_plot = f'HCW cost needed to deliver never ran appointments, {target_period()}'
+    name_of_plot = f'HCW cost needed by cadre to deliver never ran appointments, {target_period()}'
     hcw_cost_gap_to_plot = (hcw_cost_gap / 1e6).reindex(num_dalys_summarized.index)
     column_dcsa = hcw_cost_gap_to_plot.pop('DCSA')
     hcw_cost_gap_to_plot.insert(3, "DCSA", column_dcsa)
@@ -1568,11 +1580,11 @@ def anova_oneway(df=num_dalys_averted_percent):
     xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
     ax.set_xticklabels(xtick_labels, rotation=90)
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
-    # plot the average proportions of all scenarios
-    for c in data_to_plot.columns:
-        plt.axhline(y=data_to_plot[c].mean(),
-                    linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
-                    label=c)
+    # # plot the average proportions of all scenarios
+    # for c in data_to_plot.columns:
+    #     plt.axhline(y=data_to_plot[c].mean(),
+    #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
     plt.title(name_of_plot)
     fig.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
@@ -1589,18 +1601,38 @@ def anova_oneway(df=num_dalys_averted_percent):
     xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
     ax.set_xticklabels(xtick_labels, rotation=90)
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
-    # plot the average proportions of all scenarios
-    for c in data_to_plot.columns:
-        plt.axhline(y=data_to_plot[c].mean(),
-                    linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
-                    label=c)
+    # # plot the average proportions of all scenarios
+    # for c in data_to_plot.columns:
+    #     plt.axhline(y=data_to_plot[c].mean(),
+    #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Cost distribution of never ran appointments that require specific cadres only, {target_period()}'
+    data_to_plot = a_cost / 1e6
+    fig, ax = plt.subplots(figsize=(12, 8))
+    data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    ax.set_ylabel('USD in millions')
+    ax.set_xlabel('Extra budget allocation scenario')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # plot the average cost of all scenarios
+    # for c in data_to_plot.columns:
+    #     plt.axhline(y=data_to_plot[c].mean(),
+    #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
     plt.title(name_of_plot)
     fig.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'HCW cost gap distribution of never ran appointments, {target_period()}'
+    name_of_plot = f'HCW cost gap by cadre distribution of never ran appointments, {target_period()}'
     cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other']
     hcw_cost_gap_percent_to_plot = hcw_cost_gap_percent[cadres_to_plot] * 100
     fig, ax = plt.subplots(figsize=(12, 8))
@@ -1742,7 +1774,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     # plt.close(fig)
 
     # plot relative numbers for scenarios
-    name_of_plot = f'DALYs averted against no expansion, {target_period()}'
+    name_of_plot = f'DALYs averted vs no extra budget allocation, {target_period()}'
     fig, ax = do_bar_plot_with_ci(num_dalys_averted / 1e6, num_dalys_averted_percent, annotation=True)
     ax.set_title(name_of_plot)
     ax.set_ylabel('Millions')
@@ -1752,7 +1784,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Deaths averted against no expansion, {target_period()}'
+    name_of_plot = f'Deaths averted vs no extra budget allocation, {target_period()}'
     fig, ax = do_bar_plot_with_ci(num_deaths_averted / 1e6, num_deaths_averted_percent, annotation=True)
     ax.set_title(name_of_plot)
     ax.set_ylabel('Millions')
@@ -1792,9 +1824,9 @@ def anova_oneway(df=num_dalys_averted_percent):
     # fig.show()
     # plt.close(fig)
 
-    name_of_plot = f'Extra budget by cadre against no expansion, {target_period()}'
+    name_of_plot = f'Extra budget by cadre vs no extra budget allocation, {target_period()}'
     extra_cost_by_cadre_to_plot = extra_cost_all_yrs.drop(columns='all_cadres').reindex(
-        num_dalys_summarized.index).drop(index='s_1') / 1e6
+        num_dalys_summarized.index).drop(index='s_0') / 1e6
     column_dcsa = extra_cost_by_cadre_to_plot.pop('DCSA')
     extra_cost_by_cadre_to_plot.insert(3, "DCSA", column_dcsa)
     fig, ax = plt.subplots(figsize=(9, 6))
@@ -1850,7 +1882,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     # fig.show()
     # plt.close(fig)
 
-    name_of_plot = f'DALYs by cause averted: C + N + P vs no expansion, {target_period()}'
+    name_of_plot = f'DALYs by cause averted: \nall cadres gap allocation vs no extra budget allocation, {target_period()}'
     data_to_plot = num_dalys_by_cause_averted_CNP / 1e6
     # name_of_plot = f'DALYs by cause averted: C + P vs no expansion, {target_period()}'
     # data_to_plot = num_dalys_by_cause_averted_CP / 1e6
@@ -1862,11 +1894,11 @@ def anova_oneway(df=num_dalys_averted_percent):
     plt.title(name_of_plot)
     fig.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
-        ':', '')))
+        ':', '').replace('\n', '')))
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'DALYs by cause averted %: C + N + P vs no expansion, {target_period()}'
+    name_of_plot = f'DALYs by cause averted %: \nall cadres gap allocation vs no extra budget allocation, {target_period()}'
     data_to_plot = num_dalys_by_cause_averted_percent_CNP * 100
     fig, ax = plt.subplots()
     data_to_plot.plot.bar(ax=ax, x=data_to_plot.index, y=data_to_plot.values)
@@ -1876,7 +1908,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     plt.title(name_of_plot)
     fig.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
-        ':', '')))
+        ':', '').replace('\n', '')))
     fig.show()
     plt.close(fig)
 
@@ -1976,7 +2008,7 @@ def anova_oneway(df=num_dalys_averted_percent):
     # fig.show()
     # plt.close(fig)
 
-    name_of_plot = f'DALYs averted by cause against no expansion, {target_period()}'
+    name_of_plot = f'DALYs by cause averted vs no extra budget allocation, {target_period()}'
     num_dalys_by_cause_averted_in_millions = num_dalys_by_cause_averted / 1e6
     yerr_dalys = np.array([
         (num_dalys_averted['mean'] - num_dalys_averted['lower']).values,

From 4a1923a7c2cb6ec171a4de84d4bb1a3dd13041e5 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 28 Oct 2024 15:34:58 +0000
Subject: [PATCH 163/218] update comments

---
 .../prepare_minute_salary_and_extra_budget_frac_data.py          | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index e66c069de6..be4ede8319 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -179,6 +179,7 @@ def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFram
 
 # Checked that for s_2, the integrated scale up factors of C/N/P cadres are comparable with shortage estimates from \
 # She et al 2024: https://human-resources-health.biomedcentral.com/articles/10.1186/s12960-024-00949-2
+# C: 2.21, N: 1.44, P: 4.14 vs C: 2.83, N: 1.57, P:6.37
 # todo: This might provide a short-cut way (no simulation, but mathematical calculation) to calculate \
 # an extra budget allocation scenario 's_2+' that is comparable with s_2.
 

From 1794516a9d11224940dda8477251ff34bfd7fb8c Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 28 Oct 2024 22:04:05 +0000
Subject: [PATCH 164/218] recover the plot of treatments, service demand, etc.

---
 ...dsion_by_officer_type_with_extra_budget.py | 173 ++++++++++--------
 1 file changed, 99 insertions(+), 74 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 725514b621..47a95af5f0 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -24,10 +24,11 @@
     HRHExpansionByCadreWithExtraBudget,
 )
 from tlo import Date
-from tlo.analysis.utils import (  # SHORT_TREATMENT_ID_TO_COLOR_MAP,
+from tlo.analysis.utils import (
     APPT_TYPE_TO_COARSE_APPT_TYPE_MAP,
     CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP,
     COARSE_APPT_TYPE_TO_COLOR_MAP,
+    SHORT_TREATMENT_ID_TO_COLOR_MAP,
     bin_hsi_event_details,
     compute_mean_across_runs,
     extract_results,
@@ -419,7 +420,9 @@ def get_hcw_time_by_treatment(draw=21):
         return increased_time_by_cadre_treatment
 
     # Get parameter/scenario names
-    param_names = ('s_0', 's_1', 's_2', 's_3', 's_11', 's_22')  # get_parameter_names_from_scenario_file()
+    param_names = get_parameter_names_from_scenario_file()
+    # param_names = ('s_0', 's_1', 's_2', 's_3', 's_11', 's_22')
+    # param_names = ('s_1', 's_2', 's_3', 's_11', 's_22')
 
     # Define cadres in order
     cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
@@ -547,21 +550,21 @@ def get_hcw_time_by_treatment(draw=21):
         do_scaling=True
     ).pipe(set_param_names_as_column_index_level_0)
 
-    # num_treatments = extract_results(
-    #     results_folder,
-    #     module='tlo.methods.healthsystem.summary',
-    #     key='HSI_Event_non_blank_appt_footprint',
-    #     custom_generate_series=get_num_treatments,
-    #     do_scaling=True
-    # ).pipe(set_param_names_as_column_index_level_0)
-    #
-    # num_treatments_total = extract_results(
-    #     results_folder,
-    #     module='tlo.methods.healthsystem.summary',
-    #     key='HSI_Event_non_blank_appt_footprint',
-    #     custom_generate_series=get_num_treatments_total,
-    #     do_scaling=True
-    # ).pipe(set_param_names_as_column_index_level_0)
+    num_treatments = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_treatments,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_treatments_total = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_treatments_total,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
 
     num_never_ran_appts = extract_results(
         results_folder,
@@ -606,14 +609,14 @@ def get_hcw_time_by_treatment(draw=21):
     # get total service demand
     assert len(num_services) == len(num_never_ran_services) == 1
     assert (num_services.columns == num_never_ran_services.columns).all()
-    # num_services_demand = num_services + num_never_ran_services
+    num_services_demand = num_services + num_never_ran_services
     # ratio_services = num_services / num_services_demand
 
     assert (num_appts.columns == num_never_ran_appts.columns).all()
     num_never_ran_appts.loc['Lab / Diagnostics', :] = 0
     num_never_ran_appts = num_never_ran_appts.reindex(num_appts.index).fillna(0.0)
     assert (num_appts.index == num_never_ran_appts.index).all()
-    # num_appts_demand = num_appts + num_never_ran_appts
+    num_appts_demand = num_appts + num_never_ran_appts
 
     hcw_time_usage = extract_results(
         results_folder,
@@ -648,12 +651,12 @@ def get_hcw_time_by_treatment(draw=21):
     num_never_ran_appts_by_level_summarized = summarize(num_never_ran_appts_by_level, only_mean=True).T.reindex(param_names).reindex(
         num_dalys_summarized.index
     ).fillna(0.0)
-    # num_appts_demand_summarized = summarize(num_appts_demand, only_mean=True).T.reindex(param_names).reindex(
-    #     num_dalys_summarized.index
-    # )
-    # num_treatments_summarized = summarize(num_treatments, only_mean=True).T.reindex(param_names).reindex(
-    #     num_dalys_summarized.index
-    # )
+    num_appts_demand_summarized = summarize(num_appts_demand, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_treatments_summarized = summarize(num_treatments, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
     # num_treatments_total_summarized = summarize(num_treatments_total).loc[0].unstack().reindex(param_names).reindex(
     #     num_dalys_summarized.index
     # )
@@ -670,9 +673,9 @@ def get_hcw_time_by_treatment(draw=21):
     # num_never_ran_treatments_total_summarized = summarize(num_never_ran_treatments_total).loc[0].unstack().reindex(param_names).reindex(
     #     num_dalys_summarized.index
     # )
-    # num_service_demand_summarized = summarize(num_services_demand).loc[0].unstack().reindex(param_names).reindex(
-    #     num_dalys_summarized.index
-    # )
+    num_services_demand_summarized = summarize(num_services_demand).loc[0].unstack().reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
     # ratio_service_summarized = summarize(ratio_services).loc[0].unstack().reindex(param_names).reindex(
     #     num_dalys_summarized.index
     # )
@@ -806,13 +809,13 @@ def get_hcw_time_by_treatment(draw=21):
     #     only_mean=True
     # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
-    # num_treatments_increased = summarize(
-    #     find_difference_relative_to_comparison_dataframe(
-    #         num_treatments,
-    #         comparison='s_1',
-    #     ),
-    #     only_mean=True
-    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+    num_treatments_increased = summarize(
+        find_difference_relative_to_comparison_dataframe(
+            num_treatments,
+            comparison='s_0',
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
 
     # num_treatments_increased_percent = summarize(
     #     find_difference_relative_to_comparison_dataframe(
@@ -823,13 +826,13 @@ def get_hcw_time_by_treatment(draw=21):
     #     only_mean=True
     # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
-    # num_treatments_total_increased = summarize(
-    #     pd.DataFrame(
-    #         find_difference_relative_to_comparison_series(
-    #             num_treatments_total.loc[0],
-    #             comparison='s_1')
-    #     ).T
-    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+    num_treatments_total_increased = summarize(
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_treatments_total.loc[0],
+                comparison='s_0')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
 
     # num_treatments_total_increased_percent = summarize(
     #     pd.DataFrame(
@@ -871,11 +874,11 @@ def get_hcw_time_by_treatment(draw=21):
          ) < 1e-6
     ).all()
 
-    # assert (
-    #     (num_treatments_increased.sum(axis=1).sort_index()
-    #      - num_treatments_total_increased['mean'].sort_index()
-    #      ) < 1e-6
-    # ).all()
+    assert (
+        (num_treatments_increased.sum(axis=1).sort_index()
+         - num_treatments_total_increased['mean'].sort_index()
+         ) < 1e-6
+    ).all()
 
     # get HCW time and cost needed to run the never run appts
     def hcw_time_or_cost_gap(time_cost_df=appt_time, count_df=num_never_ran_appts_by_level_summarized):
@@ -1056,10 +1059,10 @@ def find_never_ran_appts_that_need_specific_cadres():
     appt_color = {
         appt: COARSE_APPT_TYPE_TO_COLOR_MAP.get(appt, np.nan) for appt in num_appts_summarized.columns
     }
-    # treatment_color = {
-    #     treatment: SHORT_TREATMENT_ID_TO_COLOR_MAP.get(treatment, np.nan)
-    #     for treatment in num_treatments_summarized.columns
-    # }
+    treatment_color = {
+        treatment: SHORT_TREATMENT_ID_TO_COLOR_MAP.get(treatment, np.nan)
+        for treatment in num_treatments_summarized.columns
+    }
     cause_color = {
         cause: CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.get(cause, np.nan)
         for cause in num_dalys_by_cause_summarized.columns
@@ -1471,6 +1474,28 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'Total services demand by appointment type, {target_period()}'
+    data_to_plot = num_appts_demand_summarized / 1e6
+    yerr_services = np.array([
+        (num_services_demand_summarized['mean'] - num_services_demand_summarized['lower']).values,
+        (num_services_demand_summarized['upper'] - num_services_demand_summarized['mean']).values,
+    ])/1e6
+    fig, ax = plt.subplots(figsize=(9, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    ax.errorbar(range(len(param_names)), num_services_demand_summarized['mean'].values / 1e6, yerr=yerr_services,
+                fmt=".", color="black", zorder=100)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     # name_of_plot = f'Services by treatment type, {target_period()}'
     # num_treatments_summarized_in_millions = num_treatments_summarized / 1e6
     # yerr_services = np.array([
@@ -1984,29 +2009,29 @@ def find_never_ran_appts_that_need_specific_cadres():
     # fig.show()
     # plt.close(fig)
 
-    # name_of_plot = f'Services increased by treatment type \nagainst no expansion, {target_period()}'
-    # num_treatments_increased_in_millions = num_treatments_increased / 1e6
-    # yerr_services = np.array([
-    #     (num_treatments_total_increased['mean'] - num_treatments_total_increased['lower']).values,
-    #     (num_treatments_total_increased['upper'] - num_treatments_total_increased['mean']).values,
-    # ]) / 1e6
-    # fig, ax = plt.subplots(figsize=(10, 6))
-    # num_treatments_increased_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
-    # ax.errorbar(range(len(param_names)-1), num_treatments_total_increased['mean'].values / 1e6, yerr=yerr_services,
-    #             fmt=".", color="black", zorder=100)
-    # ax.set_ylabel('Millions', fontsize='small')
-    # ax.set(xlabel=None)
-    # xtick_labels = [substitute_labels[v] for v in num_treatments_increased_in_millions.index]
-    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
-    #            fontsize='small', reverse=True)
-    # plt.title(name_of_plot)
-    # fig.tight_layout()
-    # fig.savefig(make_graph_file_name(
-    #     name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
-    # )
-    # fig.show()
-    # plt.close(fig)
+    name_of_plot = f'Services increased by treatment type \nagainst no expansion, {target_period()}'
+    data_to_plot = num_treatments_increased / 1e6
+    yerr_services = np.array([
+        (num_treatments_total_increased['mean'] - num_treatments_total_increased['lower']).values,
+        (num_treatments_total_increased['upper'] - num_treatments_total_increased['mean']).values,
+    ]) / 1e6
+    fig, ax = plt.subplots(figsize=(10, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    ax.errorbar(range(len(param_names)-1), num_treatments_total_increased['mean'].values / 1e6, yerr=yerr_services,
+                fmt=".", color="black", zorder=100)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
 
     name_of_plot = f'DALYs by cause averted vs no extra budget allocation, {target_period()}'
     num_dalys_by_cause_averted_in_millions = num_dalys_by_cause_averted / 1e6

From 45b0d1664f93ba10423b1ebee1e8cb6866871d9e Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 28 Oct 2024 23:07:40 +0000
Subject: [PATCH 165/218] plots against current allocation scenario

---
 ...dsion_by_officer_type_with_extra_budget.py | 210 +++++++++---------
 1 file changed, 105 insertions(+), 105 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 47a95af5f0..3deea18e8b 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -422,7 +422,7 @@ def get_hcw_time_by_treatment(draw=21):
     # Get parameter/scenario names
     param_names = get_parameter_names_from_scenario_file()
     # param_names = ('s_0', 's_1', 's_2', 's_3', 's_11', 's_22')
-    # param_names = ('s_1', 's_2', 's_3', 's_11', 's_22')
+    param_names = ('s_1', 's_2', 's_3', 's_11', 's_22')
 
     # Define cadres in order
     cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
@@ -449,57 +449,57 @@ def get_hcw_time_by_treatment(draw=21):
     # scale_up_factors[cadres] = scale_up_factors.value.tolist()
     # scale_up_factors.drop(columns='value', inplace=True)
 
-    # Get salary
-    salary = get_hr_salary(cadres)
-
-    # Get total cost for all scenarios
-    total_cost = extract_results(
-        results_folder,
-        module='tlo.methods.healthsystem.summary',
-        key='HRScaling',
-        custom_generate_series=get_total_cost,
-        do_scaling=False
-    ).pipe(set_param_names_as_column_index_level_0).stack(level=0)
-    total_cost = total_cost.iloc[:, 0].unstack().reset_index().melt(id_vars='index')
-    total_cost[cadres] = total_cost.value.tolist()
-    total_cost.drop(columns='value', inplace=True)
-    total_cost['all_cadres'] = total_cost[[c for c in total_cost.columns if c in cadres]].sum(axis=1)
-    total_cost.rename(columns={'index': 'year'}, inplace=True)
-
-    # total cost of all expansion years
-    total_cost_all_yrs = total_cost.groupby('draw').sum().drop(columns='year')
-
-    # total extra cost of all expansion years
-    extra_cost_all_yrs = total_cost_all_yrs.copy()
-    for s in param_names[1:]:
-        extra_cost_all_yrs.loc[s, :] = total_cost_all_yrs.loc[s, :] - total_cost_all_yrs.loc['s_0', :]
-    extra_cost_all_yrs.drop(index='s_0', inplace=True)
-
-    # get staff count = total cost / salary
-    staff_count = total_cost.copy()
-    for c in cadres:
-        staff_count.loc[:, c] = total_cost.loc[:, c] / salary[c].values[0]
-    staff_count.loc[:, 'all_cadres'] = staff_count[[c for c in staff_count.columns if c in cadres]].sum(axis=1)
-
-    # get extra count = staff count - staff count of no expansion s_1
-    # note that annual staff increase rate = scale up factor - 1
-    extra_staff = staff_count.copy()
-    for i in staff_count.index:
-        extra_staff.iloc[i, 2:] = staff_count.iloc[i, 2:] - staff_count.iloc[0, 2:]
-
-    # extra_staff_2029 = extra_staff.loc[extra_staff.year == 2029, :].drop(columns='year').set_index('draw').drop(
-    #     index='s_1'
-    # )
-    # staff_count_2029 = staff_count.loc[staff_count.year == 2029, :].drop(columns='year').set_index('draw')
-
-    # check total cost calculated is increased as expected
-    years = range(2019, the_target_period[1].year + 1)
-    for s in param_names[1:]:
-        assert (abs(
-            total_cost.loc[(total_cost.year == 2029) & (total_cost.draw == s), 'all_cadres'].values[0] -
-            (1 + 0.042) ** len(years) * total_cost.loc[(total_cost.year == 2019) & (total_cost.draw == 's_0'),
-                                                       'all_cadres'].values[0]
-        ) < 1e6).all()
+    # # Get salary
+    # salary = get_hr_salary(cadres)
+    #
+    # # Get total cost for all scenarios
+    # total_cost = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthsystem.summary',
+    #     key='HRScaling',
+    #     custom_generate_series=get_total_cost,
+    #     do_scaling=False
+    # ).pipe(set_param_names_as_column_index_level_0).stack(level=0)
+    # total_cost = total_cost.iloc[:, 0].unstack().reset_index().melt(id_vars='index')
+    # total_cost[cadres] = total_cost.value.tolist()
+    # total_cost.drop(columns='value', inplace=True)
+    # total_cost['all_cadres'] = total_cost[[c for c in total_cost.columns if c in cadres]].sum(axis=1)
+    # total_cost.rename(columns={'index': 'year'}, inplace=True)
+    #
+    # # total cost of all expansion years
+    # total_cost_all_yrs = total_cost.groupby('draw').sum().drop(columns='year')
+    #
+    # # total extra cost of all expansion years
+    # extra_cost_all_yrs = total_cost_all_yrs.copy()
+    # for s in param_names[1:]:
+    #     extra_cost_all_yrs.loc[s, :] = total_cost_all_yrs.loc[s, :] - total_cost_all_yrs.loc['s_0', :]
+    # extra_cost_all_yrs.drop(index='s_0', inplace=True)
+    #
+    # # get staff count = total cost / salary
+    # staff_count = total_cost.copy()
+    # for c in cadres:
+    #     staff_count.loc[:, c] = total_cost.loc[:, c] / salary[c].values[0]
+    # staff_count.loc[:, 'all_cadres'] = staff_count[[c for c in staff_count.columns if c in cadres]].sum(axis=1)
+    #
+    # # get extra count = staff count - staff count of no expansion s_1
+    # # note that annual staff increase rate = scale up factor - 1
+    # extra_staff = staff_count.copy()
+    # for i in staff_count.index:
+    #     extra_staff.iloc[i, 2:] = staff_count.iloc[i, 2:] - staff_count.iloc[0, 2:]
+    #
+    # # extra_staff_2029 = extra_staff.loc[extra_staff.year == 2029, :].drop(columns='year').set_index('draw').drop(
+    # #     index='s_1'
+    # # )
+    # # staff_count_2029 = staff_count.loc[staff_count.year == 2029, :].drop(columns='year').set_index('draw')
+    #
+    # # check total cost calculated is increased as expected
+    # years = range(2019, the_target_period[1].year + 1)
+    # for s in param_names[1:]:
+    #     assert (abs(
+    #         total_cost.loc[(total_cost.year == 2029) & (total_cost.draw == s), 'all_cadres'].values[0] -
+    #         (1 + 0.042) ** len(years) * total_cost.loc[(total_cost.year == 2019) & (total_cost.draw == 's_0'),
+    #                                                    'all_cadres'].values[0]
+    #     ) < 1e6).all()
 
     # Absolute Number of Deaths and DALYs and Services
     num_deaths = extract_results(
@@ -691,9 +691,9 @@ def get_hcw_time_by_treatment(draw=21):
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
                 num_services.loc[0],
-                comparison='s_0')
+                comparison='s_1')
         ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
     # num_services_increased_percent = summarize(
     #     pd.DataFrame(
@@ -709,56 +709,56 @@ def get_hcw_time_by_treatment(draw=21):
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
                 num_deaths.loc[0],
-                comparison='s_0')
+                comparison='s_1')
         ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_deaths_averted_percent = summarize(
         -1.0 *
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
                 num_deaths.loc[0],
-                comparison='s_0',
+                comparison='s_1',
                 scaled=True)
         ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_dalys_averted = summarize(
         -1.0 *
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
                 num_dalys.loc[0],
-                comparison='s_0')
+                comparison='s_1')
         ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_dalys_averted_percent = summarize(
         -1.0 *
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
                 num_dalys.loc[0],
-                comparison='s_0',
+                comparison='s_1',
                 scaled=True
             )
         ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_dalys_by_cause_averted = summarize(
         -1.0 * find_difference_relative_to_comparison_dataframe(
             num_dalys_by_cause,
-            comparison='s_0',
+            comparison='s_1',
         ),
         only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_dalys_by_cause_averted_percent = summarize(
         -1.0 * find_difference_relative_to_comparison_dataframe(
             num_dalys_by_cause,
-            comparison='s_0',
+            comparison='s_1',
             scaled=True
         ),
         only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_dalys_by_cause_averted_CNP = num_dalys_by_cause_averted.loc['s_2', :].sort_values(ascending=False)
     # num_dalys_by_cause_averted_CP = num_dalys_by_cause_averted.loc['s_11', :].sort_values(ascending=False)
@@ -779,10 +779,10 @@ def get_hcw_time_by_treatment(draw=21):
     num_appts_increased = summarize(
         find_difference_relative_to_comparison_dataframe(
             num_appts,
-            comparison='s_0',
+            comparison='s_1',
         ),
         only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
     # num_never_ran_appts_reduced = summarize(
     #     -1.0 * find_difference_relative_to_comparison_dataframe(
@@ -812,27 +812,27 @@ def get_hcw_time_by_treatment(draw=21):
     num_treatments_increased = summarize(
         find_difference_relative_to_comparison_dataframe(
             num_treatments,
-            comparison='s_0',
+            comparison='s_1',
         ),
         only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
-    # num_treatments_increased_percent = summarize(
-    #     find_difference_relative_to_comparison_dataframe(
-    #         num_treatments,
-    #         comparison='s_1',
-    #         scaled=True
-    #     ),
-    #     only_mean=True
-    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+    num_treatments_increased_percent = summarize(
+        find_difference_relative_to_comparison_dataframe(
+            num_treatments,
+            comparison='s_1',
+            scaled=True
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_treatments_total_increased = summarize(
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
                 num_treatments_total.loc[0],
-                comparison='s_0')
+                comparison='s_1')
         ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
 
     # num_treatments_total_increased_percent = summarize(
     #     pd.DataFrame(
@@ -1111,7 +1111,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     extra_budget_allocation['Other'] = extra_budget_allocation[
         ['Dental', 'Laboratory', 'Mental', 'Radiography']
     ].sum(axis=1)
-    name_of_plot = f'3D DALYs averted (%) vs no extra budget allocation, {target_period()}'
+    name_of_plot = f'3D DALYs averted (%) vs current allocation, {target_period()}'
     heat_data = pd.merge(num_dalys_averted_percent['mean'],
                          extra_budget_allocation[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
                          left_index=True, right_index=True, how='inner')
@@ -1799,7 +1799,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     # plt.close(fig)
 
     # plot relative numbers for scenarios
-    name_of_plot = f'DALYs averted vs no extra budget allocation, {target_period()}'
+    name_of_plot = f'DALYs averted vs current allocation, {target_period()}'
     fig, ax = do_bar_plot_with_ci(num_dalys_averted / 1e6, num_dalys_averted_percent, annotation=True)
     ax.set_title(name_of_plot)
     ax.set_ylabel('Millions')
@@ -1809,7 +1809,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Deaths averted vs no extra budget allocation, {target_period()}'
+    name_of_plot = f'Deaths averted vs current allocation, {target_period()}'
     fig, ax = do_bar_plot_with_ci(num_deaths_averted / 1e6, num_deaths_averted_percent, annotation=True)
     ax.set_title(name_of_plot)
     ax.set_ylabel('Millions')
@@ -1849,24 +1849,24 @@ def find_never_ran_appts_that_need_specific_cadres():
     # fig.show()
     # plt.close(fig)
 
-    name_of_plot = f'Extra budget by cadre vs no extra budget allocation, {target_period()}'
-    extra_cost_by_cadre_to_plot = extra_cost_all_yrs.drop(columns='all_cadres').reindex(
-        num_dalys_summarized.index).drop(index='s_0') / 1e6
-    column_dcsa = extra_cost_by_cadre_to_plot.pop('DCSA')
-    extra_cost_by_cadre_to_plot.insert(3, "DCSA", column_dcsa)
-    fig, ax = plt.subplots(figsize=(9, 6))
-    extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    ax.set_ylabel('Millions', fontsize='small')
-    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
-    xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-               fontsize='small', reverse=True)
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'Extra budget by cadre vs no extra budget allocation, {target_period()}'
+    # extra_cost_by_cadre_to_plot = extra_cost_all_yrs.drop(columns='all_cadres').reindex(
+    #     num_dalys_summarized.index).drop(index='s_0') / 1e6
+    # column_dcsa = extra_cost_by_cadre_to_plot.pop('DCSA')
+    # extra_cost_by_cadre_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
     # # name_of_plot = f'Time used increased by cadre and treatment: C + N&M + P vs no expansion, {target_period()}'
     # # data_to_plot = time_increased_by_cadre_treatment_CNP / 1e6
@@ -1907,7 +1907,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     # fig.show()
     # plt.close(fig)
 
-    name_of_plot = f'DALYs by cause averted: \nall cadres gap allocation vs no extra budget allocation, {target_period()}'
+    name_of_plot = f'DALYs by cause averted: \nall cadres gap allocation vs current allocation, {target_period()}'
     data_to_plot = num_dalys_by_cause_averted_CNP / 1e6
     # name_of_plot = f'DALYs by cause averted: C + P vs no expansion, {target_period()}'
     # data_to_plot = num_dalys_by_cause_averted_CP / 1e6
@@ -1923,7 +1923,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'DALYs by cause averted %: \nall cadres gap allocation vs no extra budget allocation, {target_period()}'
+    name_of_plot = f'DALYs by cause averted %: \nall cadres gap allocation vs current allocation, {target_period()}'
     data_to_plot = num_dalys_by_cause_averted_percent_CNP * 100
     fig, ax = plt.subplots()
     data_to_plot.plot.bar(ax=ax, x=data_to_plot.index, y=data_to_plot.values)
@@ -2009,7 +2009,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     # fig.show()
     # plt.close(fig)
 
-    name_of_plot = f'Services increased by treatment type \nagainst no expansion, {target_period()}'
+    name_of_plot = f'Services increased by treatment type vs current allocation, {target_period()}'
     data_to_plot = num_treatments_increased / 1e6
     yerr_services = np.array([
         (num_treatments_total_increased['mean'] - num_treatments_total_increased['lower']).values,
@@ -2033,7 +2033,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'DALYs by cause averted vs no extra budget allocation, {target_period()}'
+    name_of_plot = f'DALYs by cause averted vs current allocation, {target_period()}'
     num_dalys_by_cause_averted_in_millions = num_dalys_by_cause_averted / 1e6
     yerr_dalys = np.array([
         (num_dalys_averted['mean'] - num_dalys_averted['lower']).values,

From 9cc522087a1eeabedc1a893ce81ef5a85abdeb9e Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 28 Oct 2024 23:13:54 +0000
Subject: [PATCH 166/218] Revert "plots against current allocation scenario"

This reverts commit 45b0d1664f93ba10423b1ebee1e8cb6866871d9e.
---
 ...dsion_by_officer_type_with_extra_budget.py | 210 +++++++++---------
 1 file changed, 105 insertions(+), 105 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 3deea18e8b..47a95af5f0 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -422,7 +422,7 @@ def get_hcw_time_by_treatment(draw=21):
     # Get parameter/scenario names
     param_names = get_parameter_names_from_scenario_file()
     # param_names = ('s_0', 's_1', 's_2', 's_3', 's_11', 's_22')
-    param_names = ('s_1', 's_2', 's_3', 's_11', 's_22')
+    # param_names = ('s_1', 's_2', 's_3', 's_11', 's_22')
 
     # Define cadres in order
     cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
@@ -449,57 +449,57 @@ def get_hcw_time_by_treatment(draw=21):
     # scale_up_factors[cadres] = scale_up_factors.value.tolist()
     # scale_up_factors.drop(columns='value', inplace=True)
 
-    # # Get salary
-    # salary = get_hr_salary(cadres)
-    #
-    # # Get total cost for all scenarios
-    # total_cost = extract_results(
-    #     results_folder,
-    #     module='tlo.methods.healthsystem.summary',
-    #     key='HRScaling',
-    #     custom_generate_series=get_total_cost,
-    #     do_scaling=False
-    # ).pipe(set_param_names_as_column_index_level_0).stack(level=0)
-    # total_cost = total_cost.iloc[:, 0].unstack().reset_index().melt(id_vars='index')
-    # total_cost[cadres] = total_cost.value.tolist()
-    # total_cost.drop(columns='value', inplace=True)
-    # total_cost['all_cadres'] = total_cost[[c for c in total_cost.columns if c in cadres]].sum(axis=1)
-    # total_cost.rename(columns={'index': 'year'}, inplace=True)
-    #
-    # # total cost of all expansion years
-    # total_cost_all_yrs = total_cost.groupby('draw').sum().drop(columns='year')
-    #
-    # # total extra cost of all expansion years
-    # extra_cost_all_yrs = total_cost_all_yrs.copy()
-    # for s in param_names[1:]:
-    #     extra_cost_all_yrs.loc[s, :] = total_cost_all_yrs.loc[s, :] - total_cost_all_yrs.loc['s_0', :]
-    # extra_cost_all_yrs.drop(index='s_0', inplace=True)
-    #
-    # # get staff count = total cost / salary
-    # staff_count = total_cost.copy()
-    # for c in cadres:
-    #     staff_count.loc[:, c] = total_cost.loc[:, c] / salary[c].values[0]
-    # staff_count.loc[:, 'all_cadres'] = staff_count[[c for c in staff_count.columns if c in cadres]].sum(axis=1)
-    #
-    # # get extra count = staff count - staff count of no expansion s_1
-    # # note that annual staff increase rate = scale up factor - 1
-    # extra_staff = staff_count.copy()
-    # for i in staff_count.index:
-    #     extra_staff.iloc[i, 2:] = staff_count.iloc[i, 2:] - staff_count.iloc[0, 2:]
-    #
-    # # extra_staff_2029 = extra_staff.loc[extra_staff.year == 2029, :].drop(columns='year').set_index('draw').drop(
-    # #     index='s_1'
-    # # )
-    # # staff_count_2029 = staff_count.loc[staff_count.year == 2029, :].drop(columns='year').set_index('draw')
-    #
-    # # check total cost calculated is increased as expected
-    # years = range(2019, the_target_period[1].year + 1)
-    # for s in param_names[1:]:
-    #     assert (abs(
-    #         total_cost.loc[(total_cost.year == 2029) & (total_cost.draw == s), 'all_cadres'].values[0] -
-    #         (1 + 0.042) ** len(years) * total_cost.loc[(total_cost.year == 2019) & (total_cost.draw == 's_0'),
-    #                                                    'all_cadres'].values[0]
-    #     ) < 1e6).all()
+    # Get salary
+    salary = get_hr_salary(cadres)
+
+    # Get total cost for all scenarios
+    total_cost = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HRScaling',
+        custom_generate_series=get_total_cost,
+        do_scaling=False
+    ).pipe(set_param_names_as_column_index_level_0).stack(level=0)
+    total_cost = total_cost.iloc[:, 0].unstack().reset_index().melt(id_vars='index')
+    total_cost[cadres] = total_cost.value.tolist()
+    total_cost.drop(columns='value', inplace=True)
+    total_cost['all_cadres'] = total_cost[[c for c in total_cost.columns if c in cadres]].sum(axis=1)
+    total_cost.rename(columns={'index': 'year'}, inplace=True)
+
+    # total cost of all expansion years
+    total_cost_all_yrs = total_cost.groupby('draw').sum().drop(columns='year')
+
+    # total extra cost of all expansion years
+    extra_cost_all_yrs = total_cost_all_yrs.copy()
+    for s in param_names[1:]:
+        extra_cost_all_yrs.loc[s, :] = total_cost_all_yrs.loc[s, :] - total_cost_all_yrs.loc['s_0', :]
+    extra_cost_all_yrs.drop(index='s_0', inplace=True)
+
+    # get staff count = total cost / salary
+    staff_count = total_cost.copy()
+    for c in cadres:
+        staff_count.loc[:, c] = total_cost.loc[:, c] / salary[c].values[0]
+    staff_count.loc[:, 'all_cadres'] = staff_count[[c for c in staff_count.columns if c in cadres]].sum(axis=1)
+
+    # get extra count = staff count - staff count of no expansion s_1
+    # note that annual staff increase rate = scale up factor - 1
+    extra_staff = staff_count.copy()
+    for i in staff_count.index:
+        extra_staff.iloc[i, 2:] = staff_count.iloc[i, 2:] - staff_count.iloc[0, 2:]
+
+    # extra_staff_2029 = extra_staff.loc[extra_staff.year == 2029, :].drop(columns='year').set_index('draw').drop(
+    #     index='s_1'
+    # )
+    # staff_count_2029 = staff_count.loc[staff_count.year == 2029, :].drop(columns='year').set_index('draw')
+
+    # check total cost calculated is increased as expected
+    years = range(2019, the_target_period[1].year + 1)
+    for s in param_names[1:]:
+        assert (abs(
+            total_cost.loc[(total_cost.year == 2029) & (total_cost.draw == s), 'all_cadres'].values[0] -
+            (1 + 0.042) ** len(years) * total_cost.loc[(total_cost.year == 2019) & (total_cost.draw == 's_0'),
+                                                       'all_cadres'].values[0]
+        ) < 1e6).all()
 
     # Absolute Number of Deaths and DALYs and Services
     num_deaths = extract_results(
@@ -691,9 +691,9 @@ def get_hcw_time_by_treatment(draw=21):
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
                 num_services.loc[0],
-                comparison='s_1')
+                comparison='s_0')
         ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
 
     # num_services_increased_percent = summarize(
     #     pd.DataFrame(
@@ -709,56 +709,56 @@ def get_hcw_time_by_treatment(draw=21):
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
                 num_deaths.loc[0],
-                comparison='s_1')
+                comparison='s_0')
         ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
 
     num_deaths_averted_percent = summarize(
         -1.0 *
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
                 num_deaths.loc[0],
-                comparison='s_1',
+                comparison='s_0',
                 scaled=True)
         ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
 
     num_dalys_averted = summarize(
         -1.0 *
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
                 num_dalys.loc[0],
-                comparison='s_1')
+                comparison='s_0')
         ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
 
     num_dalys_averted_percent = summarize(
         -1.0 *
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
                 num_dalys.loc[0],
-                comparison='s_1',
+                comparison='s_0',
                 scaled=True
             )
         ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
 
     num_dalys_by_cause_averted = summarize(
         -1.0 * find_difference_relative_to_comparison_dataframe(
             num_dalys_by_cause,
-            comparison='s_1',
+            comparison='s_0',
         ),
         only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
 
     num_dalys_by_cause_averted_percent = summarize(
         -1.0 * find_difference_relative_to_comparison_dataframe(
             num_dalys_by_cause,
-            comparison='s_1',
+            comparison='s_0',
             scaled=True
         ),
         only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
 
     num_dalys_by_cause_averted_CNP = num_dalys_by_cause_averted.loc['s_2', :].sort_values(ascending=False)
     # num_dalys_by_cause_averted_CP = num_dalys_by_cause_averted.loc['s_11', :].sort_values(ascending=False)
@@ -779,10 +779,10 @@ def get_hcw_time_by_treatment(draw=21):
     num_appts_increased = summarize(
         find_difference_relative_to_comparison_dataframe(
             num_appts,
-            comparison='s_1',
+            comparison='s_0',
         ),
         only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
 
     # num_never_ran_appts_reduced = summarize(
     #     -1.0 * find_difference_relative_to_comparison_dataframe(
@@ -812,27 +812,27 @@ def get_hcw_time_by_treatment(draw=21):
     num_treatments_increased = summarize(
         find_difference_relative_to_comparison_dataframe(
             num_treatments,
-            comparison='s_1',
+            comparison='s_0',
         ),
         only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
 
-    num_treatments_increased_percent = summarize(
-        find_difference_relative_to_comparison_dataframe(
-            num_treatments,
-            comparison='s_1',
-            scaled=True
-        ),
-        only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+    # num_treatments_increased_percent = summarize(
+    #     find_difference_relative_to_comparison_dataframe(
+    #         num_treatments,
+    #         comparison='s_1',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
 
     num_treatments_total_increased = summarize(
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
                 num_treatments_total.loc[0],
-                comparison='s_1')
+                comparison='s_0')
         ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
 
     # num_treatments_total_increased_percent = summarize(
     #     pd.DataFrame(
@@ -1111,7 +1111,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     extra_budget_allocation['Other'] = extra_budget_allocation[
         ['Dental', 'Laboratory', 'Mental', 'Radiography']
     ].sum(axis=1)
-    name_of_plot = f'3D DALYs averted (%) vs current allocation, {target_period()}'
+    name_of_plot = f'3D DALYs averted (%) vs no extra budget allocation, {target_period()}'
     heat_data = pd.merge(num_dalys_averted_percent['mean'],
                          extra_budget_allocation[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
                          left_index=True, right_index=True, how='inner')
@@ -1799,7 +1799,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     # plt.close(fig)
 
     # plot relative numbers for scenarios
-    name_of_plot = f'DALYs averted vs current allocation, {target_period()}'
+    name_of_plot = f'DALYs averted vs no extra budget allocation, {target_period()}'
     fig, ax = do_bar_plot_with_ci(num_dalys_averted / 1e6, num_dalys_averted_percent, annotation=True)
     ax.set_title(name_of_plot)
     ax.set_ylabel('Millions')
@@ -1809,7 +1809,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Deaths averted vs current allocation, {target_period()}'
+    name_of_plot = f'Deaths averted vs no extra budget allocation, {target_period()}'
     fig, ax = do_bar_plot_with_ci(num_deaths_averted / 1e6, num_deaths_averted_percent, annotation=True)
     ax.set_title(name_of_plot)
     ax.set_ylabel('Millions')
@@ -1849,24 +1849,24 @@ def find_never_ran_appts_that_need_specific_cadres():
     # fig.show()
     # plt.close(fig)
 
-    # name_of_plot = f'Extra budget by cadre vs no extra budget allocation, {target_period()}'
-    # extra_cost_by_cadre_to_plot = extra_cost_all_yrs.drop(columns='all_cadres').reindex(
-    #     num_dalys_summarized.index).drop(index='s_0') / 1e6
-    # column_dcsa = extra_cost_by_cadre_to_plot.pop('DCSA')
-    # extra_cost_by_cadre_to_plot.insert(3, "DCSA", column_dcsa)
-    # fig, ax = plt.subplots(figsize=(9, 6))
-    # extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    # ax.set_ylabel('Millions', fontsize='small')
-    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
-    # xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
-    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-    #            fontsize='small', reverse=True)
-    # plt.title(name_of_plot)
-    # fig.tight_layout()
-    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    # fig.show()
-    # plt.close(fig)
+    name_of_plot = f'Extra budget by cadre vs no extra budget allocation, {target_period()}'
+    extra_cost_by_cadre_to_plot = extra_cost_all_yrs.drop(columns='all_cadres').reindex(
+        num_dalys_summarized.index).drop(index='s_0') / 1e6
+    column_dcsa = extra_cost_by_cadre_to_plot.pop('DCSA')
+    extra_cost_by_cadre_to_plot.insert(3, "DCSA", column_dcsa)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
 
     # # name_of_plot = f'Time used increased by cadre and treatment: C + N&M + P vs no expansion, {target_period()}'
     # # data_to_plot = time_increased_by_cadre_treatment_CNP / 1e6
@@ -1907,7 +1907,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     # fig.show()
     # plt.close(fig)
 
-    name_of_plot = f'DALYs by cause averted: \nall cadres gap allocation vs current allocation, {target_period()}'
+    name_of_plot = f'DALYs by cause averted: \nall cadres gap allocation vs no extra budget allocation, {target_period()}'
     data_to_plot = num_dalys_by_cause_averted_CNP / 1e6
     # name_of_plot = f'DALYs by cause averted: C + P vs no expansion, {target_period()}'
     # data_to_plot = num_dalys_by_cause_averted_CP / 1e6
@@ -1923,7 +1923,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'DALYs by cause averted %: \nall cadres gap allocation vs current allocation, {target_period()}'
+    name_of_plot = f'DALYs by cause averted %: \nall cadres gap allocation vs no extra budget allocation, {target_period()}'
     data_to_plot = num_dalys_by_cause_averted_percent_CNP * 100
     fig, ax = plt.subplots()
     data_to_plot.plot.bar(ax=ax, x=data_to_plot.index, y=data_to_plot.values)
@@ -2009,7 +2009,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     # fig.show()
     # plt.close(fig)
 
-    name_of_plot = f'Services increased by treatment type vs current allocation, {target_period()}'
+    name_of_plot = f'Services increased by treatment type \nagainst no expansion, {target_period()}'
     data_to_plot = num_treatments_increased / 1e6
     yerr_services = np.array([
         (num_treatments_total_increased['mean'] - num_treatments_total_increased['lower']).values,
@@ -2033,7 +2033,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'DALYs by cause averted vs current allocation, {target_period()}'
+    name_of_plot = f'DALYs by cause averted vs no extra budget allocation, {target_period()}'
     num_dalys_by_cause_averted_in_millions = num_dalys_by_cause_averted / 1e6
     yerr_dalys = np.array([
         (num_dalys_averted['mean'] - num_dalys_averted['lower']).values,

From 6c32a78c1070571ed861113251e92b19951c6346 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 28 Oct 2024 23:28:04 +0000
Subject: [PATCH 167/218] minor reformat

---
 ...lysis_hr_expandsion_by_officer_type_with_extra_budget.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 47a95af5f0..19bf89635d 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -974,7 +974,7 @@ def find_never_ran_appts_that_need_specific_cadres():
         _proportions_total = _counts.sum(axis=1) / _counts_all.sum(axis=1)
         _cost_gap_proportions_total = _cost_gap.sum(axis=1) / hcw_cost_gap.sum(axis=1)
 
-        return _proportions_total, _cost_gap_proportions_total, _cost_gap
+        return _proportions_total, _cost_gap_proportions_total, _cost_gap, _cost_gap_percent
 
     never_ran_appts_info_that_need_CNP = get_never_ran_appts_info_that_need_specific_cadres(
         cadres_to_find=['Clinical', 'Nursing_and_Midwifery', 'Pharmacy'])
@@ -1681,7 +1681,7 @@ def find_never_ran_appts_that_need_specific_cadres():
 
     # name_of_plot = f'HCW cost gap distribution of never ran appointments that require CNP only, {target_period()}'
     # cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy']
-    # data_to_plot = never_ran_appts_info_that_need_CNP[2][cadres_to_plot] * 100
+    # data_to_plot = never_ran_appts_info_that_need_CNP[3][cadres_to_plot] * 100
     # fig, ax = plt.subplots(figsize=(12, 8))
     # data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
     # #ax.set_ylim(0, 100)
@@ -2009,7 +2009,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     # fig.show()
     # plt.close(fig)
 
-    name_of_plot = f'Services increased by treatment type \nagainst no expansion, {target_period()}'
+    name_of_plot = f'Services increased by treatment type \nvs no extra budget allocation, {target_period()}'
     data_to_plot = num_treatments_increased / 1e6
     yerr_services = np.array([
         (num_treatments_total_increased['mean'] - num_treatments_total_increased['lower']).values,

From 8eb06cecbcb8e0ccf75e6994001e6f7f8e33477d Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 29 Oct 2024 23:32:16 +0000
Subject: [PATCH 168/218] calculate time used

---
 ...dsion_by_officer_type_with_extra_budget.py | 61 ++++++++++++++++---
 1 file changed, 54 insertions(+), 7 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 19bf89635d..d5ec9825a9 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -340,8 +340,8 @@ def format_appt_time_and_cost():
 
         return time, cost
 
-    def get_hcw_time_usage(_df):
-        """Return the number of treatments by short treatment id (total within the TARGET_PERIOD)"""
+    def get_frac_of_hcw_time_used(_df):
+        """Return the fraction of time used by cadre and facility level"""
         CNP_cols = ['date']
         for col in _df.columns[1:]:
             if ('Clinical' in col) | ('Nursing_and_Midwifery' in col) | ('Pharmacy' in col):
@@ -542,6 +542,14 @@ def get_hcw_time_by_treatment(draw=21):
         do_scaling=True
         ).pipe(set_param_names_as_column_index_level_0)
 
+    num_appts_by_level = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_appts_by_level,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
     num_services = extract_results(
         results_folder,
         module='tlo.methods.healthsystem.summary',
@@ -622,7 +630,7 @@ def get_hcw_time_by_treatment(draw=21):
         results_folder,
         module='tlo.methods.healthsystem.summary',
         key='Capacity_By_OfficerType_And_FacilityLevel',#'Capacity',#'Capacity_By_OfficerType_And_FacilityLevel',
-        custom_generate_series=get_hcw_time_usage,
+        custom_generate_series=get_frac_of_hcw_time_used,
         do_scaling=False
     ).pipe(set_param_names_as_column_index_level_0)
 
@@ -648,9 +656,10 @@ def get_hcw_time_by_treatment(draw=21):
     num_appts_summarized = summarize(num_appts, only_mean=True).T.reindex(param_names).reindex(
         num_dalys_summarized.index
     )
-    num_never_ran_appts_by_level_summarized = summarize(num_never_ran_appts_by_level, only_mean=True).T.reindex(param_names).reindex(
-        num_dalys_summarized.index
-    ).fillna(0.0)
+    num_appts_by_level_summarized = summarize(num_appts_by_level, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index).fillna(0.0)
+    num_never_ran_appts_by_level_summarized = summarize(num_never_ran_appts_by_level, only_mean=True).T.reindex(
+        param_names).reindex(num_dalys_summarized.index).fillna(0.0)
     num_appts_demand_summarized = summarize(num_appts_demand, only_mean=True).T.reindex(param_names).reindex(
         num_dalys_summarized.index
     )
@@ -764,7 +773,7 @@ def get_hcw_time_by_treatment(draw=21):
     # num_dalys_by_cause_averted_CP = num_dalys_by_cause_averted.loc['s_11', :].sort_values(ascending=False)
     num_dalys_by_cause_averted_percent_CNP = num_dalys_by_cause_averted_percent.loc['s_2', :].sort_values(
         ascending=False)
-    # num_dalys_by_cause_averted__percent_CP = num_dalys_by_cause_averted_percent.loc['s_11', :].sort_values(
+    # num_dalys_by_cause_averted_percent_CP = num_dalys_by_cause_averted_percent.loc['s_11', :].sort_values(
     #     ascending=False)
 
     # num_dalys_by_cause_averted_percent = summarize(
@@ -880,6 +889,39 @@ def get_hcw_time_by_treatment(draw=21):
          ) < 1e-6
     ).all()
 
+    # get time used by services delivered
+    def hcw_time_or_cost_used(time_cost_df=appt_time, count_df=num_appts_by_level_summarized):
+        cols_1 = count_df.columns
+        cols_2 = time_cost_df.columns
+        # check that appts (at a level) not in appt_time (as defined) have count 0 and drop them
+        # assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all() -> ('2', 'Tomography')
+        # replace Tomography from level 2 to level 3
+        count_df.loc[:, ('3', 'Tomography')] += count_df.loc[:, ('2', 'Tomography')]
+        count_df.loc[:, ('2', 'Tomography')] = 0
+        assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all()
+        if len(list(set(cols_1) - set(cols_2))) > 0:
+            _count_df = count_df.drop(columns=list(set(cols_1) - set(cols_2)))
+        else:
+            _count_df = count_df.copy()
+        assert set(_count_df.columns).issubset(set(cols_2))
+        # calculate hcw time gap
+        use = pd.DataFrame(index=_count_df.index,
+                           columns=time_cost_df.index)
+        for i in use.index:
+            for j in use.columns:
+                use.loc[i, j] = _count_df.loc[i, :].mul(
+                    time_cost_df.loc[j, _count_df.columns]
+                ).sum()
+        # reorder columns to be consistent with cadres
+        use = use[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+                   'Dental', 'Laboratory', 'Mental', 'Radiography']]
+        # reorder index to be consistent with
+        use = use.reindex(num_dalys_summarized.index)
+
+        return use
+
+    hcw_time_used = hcw_time_or_cost_used(appt_time)
+
     # get HCW time and cost needed to run the never run appts
     def hcw_time_or_cost_gap(time_cost_df=appt_time, count_df=num_never_ran_appts_by_level_summarized):
         cols_1 = count_df.columns
@@ -910,6 +952,11 @@ def hcw_time_or_cost_gap(time_cost_df=appt_time, count_df=num_never_ran_appts_by
     hcw_time_gap = hcw_time_or_cost_gap(appt_time)
     hcw_cost_gap = hcw_time_or_cost_gap(appt_cost)
 
+    # hcw time demand to meet ran + never ran services
+    # assert (hcw_time_used.index == hcw_time_gap.index).all()
+    # assert (hcw_time_used.columns == hcw_time_gap.columns).all()
+    # hcw_time_demand = hcw_time_used + hcw_time_gap
+
     # cost gap proportions of cadres within each scenario
     hcw_cost_gap_percent = pd.DataFrame(index=hcw_cost_gap.index, columns=hcw_cost_gap.columns)
     for i in hcw_cost_gap_percent.index:

From 84392837711031322b26719fb65d37f4f02b2cce Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 30 Oct 2024 11:11:47 +0000
Subject: [PATCH 169/218] format scenario grouping and coloring

---
 ...dsion_by_officer_type_with_extra_budget.py | 108 +++++++++---------
 1 file changed, 56 insertions(+), 52 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index d5ec9825a9..6758249659 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -51,26 +51,6 @@
     's_33': 'D + N&M + P + O',
 }
 
-# group scenarios for presentation
-scenario_groups_init = {
-    'no_expansion': {'s_0'},
-    'all_cadres_expansion': {'s_1', 's_2', 's_3'},
-    'one_cadre_expansion': {'s_4', 's_5', 's_6', 's_7', 's_8'},
-    'two_cadres_expansion': {'s_9', 's_10', 's_11', 's_12', 's_13',
-                             's_14', 's_15', 's_16', 's_17', 's_18'},
-    'three_cadres_expansion': {'s_19', 's_20', 's_21', 's_22', 's_23',
-                               's_24', 's_25', 's_26', 's_27', 's_28'},
-    'four_cadres_expansion': {'s_29', 's_30', 's_31', 's_32', 's_33'}
-}
-
-# group scenarios based on whether expand Clinical/Pharmacy
-scenario_groups = {
-    'C + P + D/N&M/O/None': {'s_1', 's_2', 's_3', 's_11', 's_20', 's_22', 's_24', 's_29', 's_31', 's_32'},
-    'C + D/N&M/O/None': {'s_4', 's_9', 's_10', 's_12', 's_19', 's_21', 's_23', 's_30'},
-    'P + D/N&M/O/None': {'s_7', 's_14', 's_16', 's_18', 's_25', 's_27', 's_28', 's_33'},
-    'D/N&M/O/None': {'s_5', 's_6', 's_8', 's_13', 's_15', 's_17', 's_26', 's_0'}
-}
-
 
 def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None,
           the_target_period: Tuple[Date, Date] = None):
@@ -211,6 +191,43 @@ def find_difference_relative_to_comparison_dataframe(_df: pd.DataFrame, **kwargs
             for _idx, row in _df.iterrows()
         }, axis=1).T
 
+    # group scenarios for presentation
+    def scenario_grouping_coloring(by='effect'):
+        if by == 'effect':  # based on DALYs averted/whether to  expand Clinical + Pharmacy
+            grouping = {
+                'C + P + D/N&M/O/None': {'s_1', 's_2', 's_3', 's_11', 's_20', 's_22', 's_24', 's_29', 's_31', 's_32'},
+                'C + D/N&M/O/None': {'s_4', 's_9', 's_10', 's_12', 's_19', 's_21', 's_23', 's_30'},
+                'P + D/N&M/O/None': {'s_7', 's_14', 's_16', 's_18', 's_25', 's_27', 's_28', 's_33'},
+                'D/N&M/O/None': {'s_5', 's_6', 's_8', 's_13', 's_15', 's_17', 's_26', 's_0'}
+            }
+            grouping_color = {
+                'D/N&M/O/None': 'lightpink',
+                'P + D/N&M/O/None': 'violet',
+                'C + D/N&M/O/None': 'darkorchid',
+                'C + P + D/N&M/O/None': 'darkturquoise',
+            }
+        elif by == 'expansion':  # based on how many cadres are expanded
+            grouping = {
+                'no_expansion': {'s_0'},
+                'all_cadres_expansion': {'s_1', 's_2', 's_3'},
+                'one_cadre_expansion': {'s_4', 's_5', 's_6', 's_7', 's_8'},
+                'two_cadres_expansion': {'s_9', 's_10', 's_11', 's_12', 's_13',
+                                         's_14', 's_15', 's_16', 's_17', 's_18'},
+                'three_cadres_expansion': {'s_19', 's_20', 's_21', 's_22', 's_23',
+                                           's_24', 's_25', 's_26', 's_27', 's_28'},
+                'four_cadres_expansion': {'s_29', 's_30', 's_31', 's_32', 's_33'}
+
+            }
+            grouping_color = {
+                'no_expansion': 'gray',
+                'one_cadre_expansion': 'lightpink',
+                'two_cadres_expansion': 'violet',
+                'three_cadres_expansion': 'darkorchid',
+                'four_cadres_expansion': 'paleturquoise',
+                'all_cadres_expansion': 'darkturquoise'
+            }
+        return grouping, grouping_color
+
     def do_bar_plot_with_ci(_df, _df_percent=None, annotation=False):
         """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
          extent of the error bar.
@@ -252,9 +269,9 @@ def do_bar_plot_with_ci(_df, _df_percent=None, annotation=False):
         xtick_label_detail = [substitute_labels[v] for v in xticks.values()]
         ax.set_xticklabels(xtick_label_detail, rotation=90)
 
-        legend_labels = list(scenario_groups_color.keys())
+        legend_labels = list(scenario_groups[1].keys())
         legend_handles = [plt.Rectangle((0, 0), 1, 1,
-                                        color=scenario_groups_color[label]) for label in legend_labels]
+                                        color=scenario_groups[1][label]) for label in legend_labels]
         ax.legend(legend_handles, legend_labels, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
                   title='Scenario groups')
 
@@ -1126,26 +1143,13 @@ def find_never_ran_appts_that_need_specific_cadres():
         'Radiography': 'lightgray',
         'Other': 'gray'
     }
-    # scenario_groups_color_init = {
-    #     'no_expansion': 'gray',
-    #     'one_cadre_expansion': 'lightpink',
-    #     'two_cadres_expansion': 'violet',
-    #     'three_cadres_expansion': 'darkorchid',
-    #     'four_cadres_expansion': 'paleturquoise',
-    #     'all_cadres_expansion': 'darkturquoise'
-    # }
-    scenario_groups_color = {
-        'D/N&M/O/None': 'lightpink',
-        'P + D/N&M/O/None': 'violet',
-        'C + D/N&M/O/None': 'darkorchid',
-        'C + P + D/N&M/O/None': 'darkturquoise',
-    }
-
+    # get scenario color
+    scenario_groups = scenario_grouping_coloring(by='effect')
     scenario_color = {}
     for s in param_names:
-        for k in scenario_groups_color.keys():
-            if s in scenario_groups[k]:
-                scenario_color[s] = scenario_groups_color[k]
+        for k in scenario_groups[1].keys():
+            if s in scenario_groups[0][k]:
+                scenario_color[s] = scenario_groups[1][k]
 
     # representative_scenarios_color = {}
     # cmap_list = list(map(plt.get_cmap("Set3"), range(len(param_names))))
@@ -1190,9 +1194,9 @@ def find_never_ran_appts_that_need_specific_cadres():
     ax.invert_yaxis()
     ax.set_zlabel('Nursing and Midwifery (N&M)')
     ax.plot3D([0, 1], [0, 1], [0, 1], linestyle='-', color='orange', alpha=1.0, linewidth=2)
-    legend_labels = list(scenario_groups_color.keys()) + ['line of C = P = N&M']
+    legend_labels = list(scenario_groups[1].keys()) + ['line of C = P = N&M']
     legend_handles = [plt.Line2D([0, 0], [0, 0],
-                                 linestyle='none', marker='o', color=scenario_groups_color[label]
+                                 linestyle='none', marker='o', color=scenario_groups[1][label]
                                  ) for label in legend_labels[0:len(legend_labels) - 1]
                       ] + [plt.Line2D([0, 1], [0, 0], linestyle='-', color='orange')]
     plt.legend(legend_handles, legend_labels,
@@ -1219,9 +1223,9 @@ def find_never_ran_appts_that_need_specific_cadres():
     # ax.set_xlabel('Services increased %')
     # ax.set_ylabel('Treatments increased %')
     # ax.set_zlabel('DALYs averted %')
-    # legend_labels = list(scenario_groups_color.keys())
+    # legend_labels = list(scenario_groups[1].keys())
     # legend_handles = [plt.Line2D([0, 0], [0, 0],
-    #                              linestyle='none', marker='o', color=scenario_groups_color[label]
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
     #                              ) for label in legend_labels
     #                   ]
     # plt.legend(legend_handles, legend_labels,
@@ -1245,9 +1249,9 @@ def find_never_ran_appts_that_need_specific_cadres():
     #            c=colors)
     # ax.set_xlabel('Services increased %')
     # ax.set_ylabel('Treatments increased %')
-    # legend_labels = list(scenario_groups_color.keys())
+    # legend_labels = list(scenario_groups[1].keys())
     # legend_handles = [plt.Line2D([0, 0], [0, 0],
-    #                              linestyle='none', marker='o', color=scenario_groups_color[label]
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
     #                              ) for label in legend_labels
     #                   ]
     # plt.legend(legend_handles, legend_labels,
@@ -1270,9 +1274,9 @@ def find_never_ran_appts_that_need_specific_cadres():
     #            alpha=0.8, marker='o', c=colors)
     # ax.set_xlabel('Services increased %')
     # ax.set_ylabel('DALYs averted %')
-    # legend_labels = list(scenario_groups_color.keys())
+    # legend_labels = list(scenario_groups[1].keys())
     # legend_handles = [plt.Line2D([0, 0], [0, 0],
-    #                              linestyle='none', marker='o', color=scenario_groups_color[label]
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
     #                              ) for label in legend_labels
     #                   ]
     # plt.legend(legend_handles, legend_labels,
@@ -1295,9 +1299,9 @@ def find_never_ran_appts_that_need_specific_cadres():
     #            alpha=0.8, marker='o', c=colors)
     # ax.set_xlabel('Treatments increased %')
     # ax.set_ylabel('DALYs averted %')
-    # legend_labels = list(scenario_groups_color.keys())
+    # legend_labels = list(scenario_groups[1].keys())
     # legend_handles = [plt.Line2D([0, 0], [0, 0],
-    #                              linestyle='none', marker='o', color=scenario_groups_color[label]
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
     #                              ) for label in legend_labels
     #                   ]
     # plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
@@ -1317,9 +1321,9 @@ def find_never_ran_appts_that_need_specific_cadres():
     #            alpha=0.8, marker='o', c=colors)
     # ax.set_xlabel('Service delivery ratio increased %')
     # ax.set_ylabel('DALYs averted %')
-    # legend_labels = list(scenario_groups_color.keys())
+    # legend_labels = list(scenario_groups[1].keys())
     # legend_handles = [plt.Line2D([0, 0], [0, 0],
-    #                              linestyle='none', marker='o', color=scenario_groups_color[label]
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
     #                              ) for label in legend_labels
     #                   ]
     # plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)

From cfb29b64012f461f4e84dd1b945edcb9292736c1 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 30 Oct 2024 11:43:41 +0000
Subject: [PATCH 170/218] format scenario grouping and coloring

---
 ...dsion_by_officer_type_with_extra_budget.py | 27 +++++++++++--------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 6758249659..ef9e98cd3d 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -209,22 +209,26 @@ def scenario_grouping_coloring(by='effect'):
         elif by == 'expansion':  # based on how many cadres are expanded
             grouping = {
                 'no_expansion': {'s_0'},
-                'all_cadres_expansion': {'s_1', 's_2', 's_3'},
+                'all_cadres_equal_expansion': {'s_3'},
+                'all_cadres_gap_expansion': {'s_2'},
+                'all_cadres_current_expansion': {'s_1'},
                 'one_cadre_expansion': {'s_4', 's_5', 's_6', 's_7', 's_8'},
-                'two_cadres_expansion': {'s_9', 's_10', 's_11', 's_12', 's_13',
-                                         's_14', 's_15', 's_16', 's_17', 's_18'},
-                'three_cadres_expansion': {'s_19', 's_20', 's_21', 's_22', 's_23',
-                                           's_24', 's_25', 's_26', 's_27', 's_28'},
-                'four_cadres_expansion': {'s_29', 's_30', 's_31', 's_32', 's_33'}
+                'two_cadres_equal_expansion': {'s_9', 's_10', 's_11', 's_12', 's_13',
+                                               's_14', 's_15', 's_16', 's_17', 's_18'},
+                'three_cadres_equal_expansion': {'s_19', 's_20', 's_21', 's_22', 's_23',
+                                                 's_24', 's_25', 's_26', 's_27', 's_28'},
+                'four_cadres_equal_expansion': {'s_29', 's_30', 's_31', 's_32', 's_33'}
 
             }
             grouping_color = {
                 'no_expansion': 'gray',
                 'one_cadre_expansion': 'lightpink',
-                'two_cadres_expansion': 'violet',
-                'three_cadres_expansion': 'darkorchid',
-                'four_cadres_expansion': 'paleturquoise',
-                'all_cadres_expansion': 'darkturquoise'
+                'two_cadres_equal_expansion': 'violet',
+                'three_cadres_equal_expansion': 'darkorchid',
+                'four_cadres_equal_expansion': 'paleturquoise',
+                'all_cadres_equal_expansion': 'darkturquoise',
+                'all_cadres_current_expansion': 'deepskyblue',
+                'all_cadres_gap_expansion': 'royalblue',
             }
         return grouping, grouping_color
 
@@ -1144,7 +1148,8 @@ def find_never_ran_appts_that_need_specific_cadres():
         'Other': 'gray'
     }
     # get scenario color
-    scenario_groups = scenario_grouping_coloring(by='effect')
+    # scenario_groups = scenario_grouping_coloring(by='effect')
+    scenario_groups = scenario_grouping_coloring(by='expansion')
     scenario_color = {}
     for s in param_names:
         for k in scenario_groups[1].keys():

From 0345754063d487de5ecaa801d1592f5d79aecb94 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 30 Oct 2024 23:58:06 +0000
Subject: [PATCH 171/218] plot for time info

---
 ...dsion_by_officer_type_with_extra_budget.py | 257 ++++++++++++++----
 1 file changed, 198 insertions(+), 59 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index ef9e98cd3d..2b0e058045 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -374,7 +374,7 @@ def get_frac_of_hcw_time_used(_df):
 
         return _df
 
-    def get_hcw_time_by_treatment(draw=21):
+    def get_hcw_time_by_treatment():
         appointment_time_table = pd.read_csv(
             resourcefilepath
             / 'healthsystem'
@@ -422,23 +422,38 @@ def get_hcw_time_by_treatment(draw=21):
         )
 
         time_by_cadre_treatment_per_draw = compute_mean_across_runs(times_by_officer_category_treatment_id_per_draw_run)
-        time_by_cadre_treatment_no_expansion = pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[0],
-                                                                      orient='index')
-        time_by_cadre_treatment = pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[draw],
-                                                         orient='index')
-        time_by_cadre_treatment = time_by_cadre_treatment.reindex(time_by_cadre_treatment_no_expansion.index)
-        assert (time_by_cadre_treatment.index == time_by_cadre_treatment_no_expansion.index).all()
-        increased_time_by_cadre_treatment = time_by_cadre_treatment - time_by_cadre_treatment_no_expansion
-        increased_time_by_cadre_treatment.reset_index(drop=False, inplace=True)
-        for i in increased_time_by_cadre_treatment.index:
-            increased_time_by_cadre_treatment.loc[i, 'Cadre'] = \
-                increased_time_by_cadre_treatment.loc[i, 'index'][0]
-            increased_time_by_cadre_treatment.loc[i, 'Treatment'] = \
-                increased_time_by_cadre_treatment.loc[i, 'index'][1]
-        increased_time_by_cadre_treatment = increased_time_by_cadre_treatment.drop('index', axis=1).rename(
-            columns={0: 'value'}).pivot(index='Treatment', columns='Cadre', values='value').fillna(0.0)
-
-        return increased_time_by_cadre_treatment
+
+        # transform counter to dataframe
+        def format_time_by_cadre_treatment(_df):
+            _df.reset_index(drop=False, inplace=True)
+            for idx in _df.index:
+                _df.loc[idx, 'Cadre'] = _df.loc[idx, 'index'][0]
+                _df.loc[idx, 'Treatment'] = _df.loc[idx, 'index'][1]
+            _df = _df.drop('index', axis=1).rename(columns={0: 'value'}).pivot(
+                index='Treatment', columns='Cadre', values='value').fillna(0.0)
+
+            _series = _df.sum(axis=1)  # sum up cadres
+
+            return _df, _series
+
+        time_by_cadre_treatment_all_scenarios = {
+            f's_{key}': format_time_by_cadre_treatment(
+                pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[key], orient='index')
+            )[0] for key in range(len(param_names))
+        }
+
+        time_by_treatment_all_scenarios = {
+            f's_{key}': format_time_by_cadre_treatment(
+                pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[key], orient='index')
+            )[1] for key in range(len(param_names))
+
+        }
+        time_by_treatment_all_scenarios = pd.DataFrame(time_by_treatment_all_scenarios).T
+
+        time_increased_by_treatment = time_by_treatment_all_scenarios.subtract(
+            time_by_treatment_all_scenarios.loc['s_0', :], axis=1).drop('s_0', axis=0).add_suffix('*')
+
+        return time_increased_by_treatment
 
     # Get parameter/scenario names
     param_names = get_parameter_names_from_scenario_file()
@@ -716,7 +731,7 @@ def get_hcw_time_by_treatment(draw=21):
                                          for col in hcw_time_usage_summarized.columns]
     hcw_time_usage_summarized.columns = hcw_time_usage_summarized.columns.str.split(pat='|', expand=True)
 
-    # get relative numbers for scenarios, compared to no_expansion scenario: s_1
+    # get relative numbers for scenarios, compared to no_expansion scenario: s_0
     num_services_increased = summarize(
         pd.DataFrame(
             find_difference_relative_to_comparison_series(
@@ -725,6 +740,8 @@ def get_hcw_time_by_treatment(draw=21):
         ).T
     ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
 
+    hcw_time_increased_by_treatment_type = get_hcw_time_by_treatment().reindex(num_dalys_summarized.index).drop(['s_0'])
+
     # num_services_increased_percent = summarize(
     #     pd.DataFrame(
     #         find_difference_relative_to_comparison_series(
@@ -936,12 +953,15 @@ def hcw_time_or_cost_used(time_cost_df=appt_time, count_df=num_appts_by_level_su
         # reorder columns to be consistent with cadres
         use = use[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
                    'Dental', 'Laboratory', 'Mental', 'Radiography']]
-        # reorder index to be consistent with
+        # reorder index to be consistent with descending order of DALYs averted
         use = use.reindex(num_dalys_summarized.index)
 
-        return use
+        use_increased = use.subtract(use.loc['s_0', :], axis=1).drop('s_0', axis=0)
+
+        return use, use_increased
 
-    hcw_time_used = hcw_time_or_cost_used(appt_time)
+    hcw_time_used = hcw_time_or_cost_used(time_cost_df=appt_time)[0]
+    hcw_time_increased_by_cadre = hcw_time_or_cost_used(time_cost_df=appt_time)[1]
 
     # get HCW time and cost needed to run the never run appts
     def hcw_time_or_cost_gap(time_cost_df=appt_time, count_df=num_never_ran_appts_by_level_summarized):
@@ -1041,8 +1061,10 @@ def find_never_ran_appts_that_need_specific_cadres():
         # if sum up all appt types/cadres
         _proportions_total = _counts.sum(axis=1) / _counts_all.sum(axis=1)
         _cost_gap_proportions_total = _cost_gap.sum(axis=1) / hcw_cost_gap.sum(axis=1)
+        _time_gap_proportions_total = _time_gap.sum(axis=1) / hcw_time_gap.sum(axis=1)
 
-        return _proportions_total, _cost_gap_proportions_total, _cost_gap, _cost_gap_percent
+        return (_proportions_total, _cost_gap_proportions_total, _cost_gap, _cost_gap_percent,
+                _time_gap_proportions_total, _time_gap)
 
     never_ran_appts_info_that_need_CNP = get_never_ran_appts_info_that_need_specific_cadres(
         cadres_to_find=['Clinical', 'Nursing_and_Midwifery', 'Pharmacy'])
@@ -1059,34 +1081,54 @@ def find_never_ran_appts_that_need_specific_cadres():
     never_ran_appts_info_that_need_P = get_never_ran_appts_info_that_need_specific_cadres(
         cadres_to_find=['Pharmacy'])
 
-    # cost proportions within never ran appts, in total of all cadres
+    # cost/time proportions within never ran appts, in total of all cadres
     p_cost = pd.DataFrame(index=num_services_summarized.index)
-    p_cost['C + N&M + P'] = never_ran_appts_info_that_need_CNP[1]
-    p_cost['C + P'] = never_ran_appts_info_that_need_CP[1]
-    p_cost['C + N&M'] = never_ran_appts_info_that_need_CN[1]
-    p_cost['N&M + P'] = never_ran_appts_info_that_need_NP[1]
+    p_cost['C and P and N&M'] = never_ran_appts_info_that_need_CNP[1]
+    p_cost['C and P'] = never_ran_appts_info_that_need_CP[1]
+    p_cost['C and N&M'] = never_ran_appts_info_that_need_CN[1]
+    p_cost['N&M and P'] = never_ran_appts_info_that_need_NP[1]
     p_cost['Clinical (C)'] = never_ran_appts_info_that_need_C[1]
     p_cost['Pharmacy (P)'] = never_ran_appts_info_that_need_P[1]
     p_cost['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[1]
     p_cost['Other cases'] = 1 - p_cost[p_cost.columns[0:7]].sum(axis=1)
 
-    # absolute cost gap within never ran appts
+    p_time = pd.DataFrame(index=num_services_summarized.index)
+    p_time['C and P and N&M'] = never_ran_appts_info_that_need_CNP[4]
+    p_time['C and P'] = never_ran_appts_info_that_need_CP[4]
+    p_time['C and N&M'] = never_ran_appts_info_that_need_CN[4]
+    p_time['N&M and P'] = never_ran_appts_info_that_need_NP[4]
+    p_time['Clinical (C)'] = never_ran_appts_info_that_need_C[4]
+    p_time['Pharmacy (P)'] = never_ran_appts_info_that_need_P[4]
+    p_time['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[4]
+    p_time['Other cases'] = 1 - p_time[p_time.columns[0:7]].sum(axis=1)
+
+    # absolute cost/time gap within never ran appts
     a_cost = pd.DataFrame(index=num_services_summarized.index)
-    a_cost['C + N&M + P'] = never_ran_appts_info_that_need_CNP[2].sum(axis=1)
-    a_cost['C + P'] = never_ran_appts_info_that_need_CP[2].sum(axis=1)
-    a_cost['C + N&M'] = never_ran_appts_info_that_need_CN[2].sum(axis=1)
-    a_cost['N&M + P'] = never_ran_appts_info_that_need_NP[2].sum(axis=1)
+    a_cost['C and P and N&M'] = never_ran_appts_info_that_need_CNP[2].sum(axis=1)
+    a_cost['C and P'] = never_ran_appts_info_that_need_CP[2].sum(axis=1)
+    a_cost['C and N&M'] = never_ran_appts_info_that_need_CN[2].sum(axis=1)
+    a_cost['N&M and P'] = never_ran_appts_info_that_need_NP[2].sum(axis=1)
     a_cost['Clinical (C)'] = never_ran_appts_info_that_need_C[2].sum(axis=1)
     a_cost['Pharmacy (P)'] = never_ran_appts_info_that_need_P[2].sum(axis=1)
     a_cost['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[2].sum(axis=1)
     a_cost['Other cases'] = hcw_cost_gap.sum(axis=1) - a_cost.sum(axis=1)
 
+    a_time = pd.DataFrame(index=num_services_summarized.index)
+    a_time['C and P and N&M'] = never_ran_appts_info_that_need_CNP[5].sum(axis=1)
+    a_time['C and P'] = never_ran_appts_info_that_need_CP[5].sum(axis=1)
+    a_time['C and N&M'] = never_ran_appts_info_that_need_CN[5].sum(axis=1)
+    a_time['N&M and P'] = never_ran_appts_info_that_need_NP[5].sum(axis=1)
+    a_time['Clinical (C)'] = never_ran_appts_info_that_need_C[5].sum(axis=1)
+    a_time['Pharmacy (P)'] = never_ran_appts_info_that_need_P[5].sum(axis=1)
+    a_time['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[5].sum(axis=1)
+    a_time['Other cases'] = hcw_time_gap.sum(axis=1) - a_time.sum(axis=1)
+
     # appts count proportions within never ran appts, in total of all cadres
     p_count = pd.DataFrame(index=num_services_summarized.index)
-    p_count['C + N&M + P'] = never_ran_appts_info_that_need_CNP[0]
-    p_count['C + P'] = never_ran_appts_info_that_need_CP[0]
-    p_count['C + N&M'] = never_ran_appts_info_that_need_CN[0]
-    p_count['N&M + P'] = never_ran_appts_info_that_need_NP[0]
+    p_count['C and P and N&M'] = never_ran_appts_info_that_need_CNP[0]
+    p_count['C and P'] = never_ran_appts_info_that_need_CP[0]
+    p_count['C and N&M'] = never_ran_appts_info_that_need_CN[0]
+    p_count['N&M and P'] = never_ran_appts_info_that_need_NP[0]
     p_count['Clinical (C)'] = never_ran_appts_info_that_need_C[0]
     p_count['Pharmacy (P)'] = never_ran_appts_info_that_need_P[0]
     p_count['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[0]
@@ -1094,10 +1136,10 @@ def find_never_ran_appts_that_need_specific_cadres():
 
     # define color for the cadres combinations above
     cadre_comb_color = {
-        'C + N&M + P': 'royalblue',
-        'C + P': 'turquoise',
-        'C + N&M': 'gold',
-        'N&M + P': 'yellowgreen',
+        'C and P and N&M': 'royalblue',
+        'C and P': 'turquoise',
+        'C and N&M': 'gold',
+        'N&M and P': 'yellowgreen',
         'Clinical (C)': 'mediumpurple',
         'Pharmacy (P)': 'limegreen',
         'Nursing_and_Midwifery (N&M)': 'pink',
@@ -1106,7 +1148,7 @@ def find_never_ran_appts_that_need_specific_cadres():
 
     # Checked that Number_By_Appt_Type_Code and Number_By_Appt_Type_Code_And_Level have not exactly same results
 
-    # hcw time by cadre and treatment: draw = 21: C + N + P vs no expansion, draw = 10, C + P vs no expansion
+    # hcw time by cadre and treatment: draw = 22: C + N + P vs no expansion, draw = 11, C + P vs no expansion
     # time_increased_by_cadre_treatment_CNP = get_hcw_time_by_treatment(21)
     # time_increased_by_cadre_treatment_CP = get_hcw_time_by_treatment(10)
 
@@ -1615,23 +1657,41 @@ def find_never_ran_appts_that_need_specific_cadres():
     # fig.show()
     # plt.close(fig)
 
-    # name_of_plot = f'HCW time needed to deliver never ran appointments, {target_period()}'
-    # hcw_time_gap_to_plot = (hcw_time_gap / 1e6).reindex(num_dalys_summarized.index)
-    # column_dcsa = hcw_time_gap_to_plot.pop('DCSA')
-    # hcw_time_gap_to_plot.insert(3, "DCSA", column_dcsa)
-    # fig, ax = plt.subplots(figsize=(9, 6))
-    # hcw_time_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    # ax.set_ylabel('Minutes in Millions', fontsize='small')
-    # ax.set(xlabel=None)
-    # xtick_labels = [substitute_labels[v] for v in hcw_time_gap_to_plot.index]
-    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
-    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
-    #            fontsize='small', reverse=True)
-    # plt.title(name_of_plot)
-    # fig.tight_layout()
-    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    # fig.show()
-    # plt.close(fig)
+    name_of_plot = f'HCW time used by cadre in delivering services , {target_period()}'
+    data_to_plot = (hcw_time_used / 1e6).reindex(num_dalys_summarized.index)
+    column_dcsa = data_to_plot.pop('DCSA')
+    data_to_plot.insert(3, "DCSA", column_dcsa)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Minutes in Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW time needed to deliver never ran appointments, {target_period()}'
+    hcw_time_gap_to_plot = (hcw_time_gap / 1e6).reindex(num_dalys_summarized.index)
+    column_dcsa = hcw_time_gap_to_plot.pop('DCSA')
+    hcw_time_gap_to_plot.insert(3, "DCSA", column_dcsa)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    hcw_time_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Minutes in Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in hcw_time_gap_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
 
     name_of_plot = f'HCW cost needed by cadre to deliver never ran appointments, {target_period()}'
     hcw_cost_gap_to_plot = (hcw_cost_gap / 1e6).reindex(num_dalys_summarized.index)
@@ -1693,6 +1753,27 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'Time proportions of never ran appointments that require specific cadres only, {target_period()}'
+    data_to_plot = p_time * 100
+    fig, ax = plt.subplots(figsize=(12, 8))
+    data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    ax.set_ylim(0, 100)
+    ax.set_ylabel('Percentage %')
+    ax.set_xlabel('Extra budget allocation scenario')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # plot the average proportions of all scenarios
+    # for c in data_to_plot.columns:
+    #     plt.axhline(y=data_to_plot[c].mean(),
+    #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     name_of_plot = f'Cost distribution of never ran appointments that require specific cadres only, {target_period()}'
     data_to_plot = a_cost / 1e6
     fig, ax = plt.subplots(figsize=(12, 8))
@@ -1713,6 +1794,26 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'Time distribution of never ran appointments that require specific cadres only, {target_period()}'
+    data_to_plot = a_time / 1e6
+    fig, ax = plt.subplots(figsize=(12, 8))
+    data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    ax.set_ylabel('minutes in millions')
+    ax.set_xlabel('Extra budget allocation scenario')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # plot the average cost of all scenarios
+    # for c in data_to_plot.columns:
+    #     plt.axhline(y=data_to_plot[c].mean(),
+    #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     name_of_plot = f'HCW cost gap by cadre distribution of never ran appointments, {target_period()}'
     cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other']
     hcw_cost_gap_percent_to_plot = hcw_cost_gap_percent[cadres_to_plot] * 100
@@ -2089,6 +2190,44 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
+    name_of_plot = f'HCW time-used increased by treatment type \nvs no extra budget allocation, {target_period()}'
+    data_to_plot = hcw_time_increased_by_treatment_type / 1e6
+    fig, ax = plt.subplots(figsize=(10, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    ax.set_ylabel('Million minutes', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW time-used increased by cadre \nvs no extra budget allocation, {target_period()}'
+    data_to_plot = hcw_time_increased_by_cadre / 1e6
+    column_dcsa = data_to_plot.pop('DCSA')
+    data_to_plot.insert(3, "DCSA", column_dcsa)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
     name_of_plot = f'DALYs by cause averted vs no extra budget allocation, {target_period()}'
     num_dalys_by_cause_averted_in_millions = num_dalys_by_cause_averted / 1e6
     yerr_dalys = np.array([

From 23e485c22d32360ea058a1ac17ebf6e90995c16b Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Sun, 3 Nov 2024 00:07:19 +0000
Subject: [PATCH 172/218] plot for CNP permutation scenarios

---
 .../analysis_CNP_permutation.py               | 2447 +++++++++++++++++
 1 file changed, 2447 insertions(+)
 create mode 100644 src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py
new file mode 100644
index 0000000000..0ac2ebdc8d
--- /dev/null
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py
@@ -0,0 +1,2447 @@
+"""
+This file analyses and plots the services, DALYs, Deaths within different scenarios of expanding current hr by officer
+type given some extra budget. Return on investment and marginal productivity of each officer type will be examined.
+
+The scenarios are defined in scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py.
+"""
+
+import argparse
+from collections import Counter
+from pathlib import Path
+from typing import Tuple
+
+import numpy as np
+import pandas as pd
+import statsmodels.api as sm
+import statsmodels.stats as ss
+from matplotlib import pyplot as plt
+
+from scripts.healthsystem.impact_of_hcw_capabilities_expansion.prepare_minute_salary_and_extra_budget_frac_data import (
+    Minute_Salary_by_Cadre_Level,
+    extra_budget_fracs,
+)
+from scripts.healthsystem.impact_of_hcw_capabilities_expansion.scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget import (
+    HRHExpansionByCadreWithExtraBudget,
+)
+from tlo import Date
+from tlo.analysis.utils import (
+    APPT_TYPE_TO_COARSE_APPT_TYPE_MAP,
+    CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP,
+    COARSE_APPT_TYPE_TO_COLOR_MAP,
+    SHORT_TREATMENT_ID_TO_COLOR_MAP,
+    bin_hsi_event_details,
+    compute_mean_across_runs,
+    extract_results,
+    summarize,
+)
+
+# rename scenarios
+substitute_labels = {
+    's_0': 'no_extra_budget_allocation',
+    's_1': 'all_cadres_current_allocation',
+    's_2': 'all_cadres_gap_allocation',
+    's_3': 'all_cadres_equal_allocation',
+    's_4': 'Clinical (C)', 's_5': 'DCSA (D)', 's_6': 'Nursing_and_Midwifery (N&M)', 's_7': 'Pharmacy (P)',
+    's_8': 'Other (O)',
+    's_9': 'C + D', 's_10': 'C + N&M', 's_11': 'C + P', 's_12': 'C + O', 's_13': 'D + N&M',
+    's_14': 'D + P', 's_15': 'D + O', 's_16': 'N&M + P', 's_17': 'N&M + O', 's_18': 'P + O',
+    's_19': 'C + D + N&M', 's_20': 'C + D + P', 's_21': 'C + D + O', 's_22': 'C + N&M + P', 's_23': 'C + N&M + O',
+    's_24': 'C + P + O', 's_25': 'D + N&M + P', 's_26': 'D + N&M + O', 's_27': 'D + P + O', 's_28': 'N&M + P + O',
+    's_29': 'C + D + N&M + P', 's_30': 'C + D + N&M + O', 's_31': 'C + D + P + O', 's_32': 'C + N&M + P + O',
+    's_33': 'D + N&M + P + O',
+}
+
+
+def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None,
+          the_target_period: Tuple[Date, Date] = None):
+    """
+    Extract results of number of services by appt type, number of DALYs, number of Deaths in the target period.
+    (To see whether to extract these results by short treatment id and/or disease.)
+    Calculate the extra budget allocated, extra staff by cadre, return on investment and marginal productivity by cadre.
+    """
+    TARGET_PERIOD = the_target_period
+
+    # Definitions of general helper functions
+    make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png"  # noqa: E731
+
+    def target_period() -> str:
+        """Returns the target period as a string of the form YYYY-YYYY"""
+        return "-".join(str(t.year) for t in TARGET_PERIOD)
+
+    def get_parameter_names_from_scenario_file() -> Tuple[str]:
+        """Get the tuple of names of the scenarios from `Scenario` class used to create the results."""
+        e = HRHExpansionByCadreWithExtraBudget()
+        return tuple(e._scenarios.keys())
+
+    def get_num_appts(_df):
+        """Return the number of services by appt type (total within the TARGET_PERIOD)"""
+        return (_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code']
+                .apply(pd.Series)
+                .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP)
+                .groupby(level=0, axis=1).sum()
+                .sum())
+
+    def get_num_appts_by_level(_df):
+        """Return the number of services by appt type and facility level (total within the TARGET_PERIOD)"""
+        def unpack_nested_dict_in_series(_raw: pd.Series):
+            return pd.concat(
+                {
+                  idx: pd.DataFrame.from_dict(mydict) for idx, mydict in _raw.items()
+                 }
+             ).unstack().fillna(0.0).astype(int)
+
+        return _df \
+            .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code_And_Level'] \
+            .pipe(unpack_nested_dict_in_series) \
+            .sum(axis=0)
+
+    def get_num_services(_df):
+        """Return the number of services in total of all appt types (total within the TARGET_PERIOD)"""
+        return pd.Series(
+            data=_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code']
+            .apply(pd.Series).sum().sum()
+        )
+
+    def get_num_treatments(_df):
+        """Return the number of treatments by short treatment id (total within the TARGET_PERIOD)"""
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'TREATMENT_ID'].apply(pd.Series).sum()
+        _df.index = _df.index.map(lambda x: x.split('_')[0] + "*")
+        _df = _df.groupby(level=0).sum()
+        return _df
+
+    def get_num_treatments_total(_df):
+        """Return the number of treatments in total of all treatments (total within the TARGET_PERIOD)"""
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), 'TREATMENT_ID'].apply(pd.Series).sum()
+        _df.index = _df.index.map(lambda x: x.split('_')[0] + "*")
+        _df = _df.groupby(level=0).sum().sum()
+        return pd.Series(_df)
+
+    def get_num_deaths(_df):
+        """Return total number of Deaths (total within the TARGET_PERIOD)"""
+        return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)]))
+
+    def get_num_dalys(_df):
+        """Return total number of DALYS (Stacked) (total within the TARGET_PERIOD).
+        Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+        results from runs that crashed mid-way through the simulation).
+        """
+        years_needed = [i.year for i in TARGET_PERIOD]
+        assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+        return pd.Series(
+            data=_df
+            .loc[_df.year.between(*years_needed)]
+            .drop(columns=['date', 'sex', 'age_range', 'year'])
+            .sum().sum()
+        )
+
+    def get_num_dalys_yearly(_df):
+        """Return total number of DALYS (Stacked) for every year in the TARGET_PERIOD.
+        Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+        results from runs that crashed mid-way through the simulation).
+        """
+        years_needed = [i.year for i in TARGET_PERIOD]
+        assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+        _df = (_df.loc[_df.year.between(*years_needed)]
+               .drop(columns=['date', 'sex', 'age_range'])
+               .groupby('year').sum()
+               .sum(axis=1)
+               )
+        return _df
+
+    def get_num_dalys_by_cause(_df):
+        """Return total number of DALYS by cause (Stacked) (total within the TARGET_PERIOD).
+        Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+        results from runs that crashed mid-way through the simulation).
+        """
+        years_needed = [i.year for i in TARGET_PERIOD]
+        assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+        return (_df
+                .loc[_df.year.between(*years_needed)].drop(columns=['date', 'year', 'li_wealth'])
+                .sum(axis=0)
+                )
+
+    def set_param_names_as_column_index_level_0(_df):
+        """Set the columns index (level 0) as the param_names."""
+        ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)}
+        names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]]
+        assert len(names_of_cols_level0) == len(_df.columns.levels[0])
+        _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0)
+        return _df
+
+    def find_difference_relative_to_comparison_series(
+        _ser: pd.Series,
+        comparison: str,
+        scaled: bool = False,
+        drop_comparison: bool = True,
+    ):
+        """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0)
+        within the runs (level 1), relative to where draw = `comparison`.
+        The comparison is `X - COMPARISON`."""
+        return (_ser
+                .unstack(level=0)
+                .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1)
+                .drop(columns=([comparison] if drop_comparison else []))
+                .stack()
+                )
+
+    def find_difference_relative_to_comparison_dataframe(_df: pd.DataFrame, **kwargs):
+        """Apply `find_difference_relative_to_comparison_series` to each row in a dataframe"""
+        return pd.concat({
+            _idx: find_difference_relative_to_comparison_series(row, **kwargs)
+            for _idx, row in _df.iterrows()
+        }, axis=1).T
+
+    # group scenarios for presentation
+    def scenario_grouping_coloring(by='effect'):
+        if by == 'effect':  # based on DALYs averted/whether to  expand Clinical + Pharmacy
+            grouping = {
+                'C + P + D/N&M/O/None': {'s_1', 's_2', 's_3', 's_11', 's_20', 's_22', 's_24', 's_29', 's_31', 's_32'},
+                'C + D/N&M/O/None': {'s_4', 's_9', 's_10', 's_12', 's_19', 's_21', 's_23', 's_30'},
+                'P + D/N&M/O/None': {'s_7', 's_14', 's_16', 's_18', 's_25', 's_27', 's_28', 's_33'},
+                'D/N&M/O/None': {'s_5', 's_6', 's_8', 's_13', 's_15', 's_17', 's_26', 's_0'}
+            }
+            grouping_color = {
+                'D/N&M/O/None': 'lightpink',
+                'P + D/N&M/O/None': 'violet',
+                'C + D/N&M/O/None': 'darkorchid',
+                'C + P + D/N&M/O/None': 'darkturquoise',
+            }
+        elif by == 'expansion':  # based on how many cadres are expanded
+            grouping = {
+                'no_expansion': {'s_0'},
+                'all_cadres_equal_expansion': {'s_3'},
+                'all_cadres_gap_expansion': {'s_2'},
+                'all_cadres_current_expansion': {'s_1'},
+                'one_cadre_expansion': {'s_4', 's_5', 's_6', 's_7', 's_8'},
+                'two_cadres_equal_expansion': {'s_9', 's_10', 's_11', 's_12', 's_13',
+                                               's_14', 's_15', 's_16', 's_17', 's_18'},
+                'three_cadres_equal_expansion': {'s_19', 's_20', 's_21', 's_22', 's_23',
+                                                 's_24', 's_25', 's_26', 's_27', 's_28'},
+                'four_cadres_equal_expansion': {'s_29', 's_30', 's_31', 's_32', 's_33'}
+
+            }
+            grouping_color = {
+                'no_expansion': 'gray',
+                'one_cadre_expansion': 'lightpink',
+                'two_cadres_equal_expansion': 'violet',
+                'three_cadres_equal_expansion': 'darkorchid',
+                'four_cadres_equal_expansion': 'paleturquoise',
+                'all_cadres_equal_expansion': 'darkturquoise',
+                'all_cadres_current_expansion': 'deepskyblue',
+                'all_cadres_gap_expansion': 'royalblue',
+            }
+        return grouping, grouping_color
+
+    def do_bar_plot_with_ci(_df, _df_percent=None, annotation=False):
+        """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
+         extent of the error bar.
+         Annotated with percent statistics from _df_percent, if annotation=True and _df_percent not None."""
+
+        yerr = np.array([
+            (_df['mean'] - _df['lower']).values,
+            (_df['upper'] - _df['mean']).values,
+        ])
+
+        xticks = {(i + 0.5): k for i, k in enumerate(_df.index)}
+
+        colors = [scenario_color[s] for s in _df.index]
+
+        fig, ax = plt.subplots(figsize=(18, 6))
+        ax.bar(
+            xticks.keys(),
+            _df['mean'].values,
+            yerr=yerr,
+            alpha=0.8,
+            ecolor='black',
+            color=colors,
+            capsize=10,
+            label=xticks.values(),
+            zorder=100,
+        )
+
+        if annotation:
+            assert (_df.index == _df_percent.index).all()
+            for xpos, ypos, text1, text2, text3 in zip(xticks.keys(), _df['upper'].values,
+                                                       _df_percent['mean'].values,
+                                                       _df_percent['lower'].values,
+                                                       _df_percent['upper'].values):
+                text = f"{int(round(text1 * 100, 2))}%\n{[round(text2, 2),round(text3, 2)]}"
+                ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize='xx-small')
+
+        ax.set_xticks(list(xticks.keys()))
+
+        xtick_label_detail = [substitute_labels[v] for v in xticks.values()]
+        ax.set_xticklabels(xtick_label_detail, rotation=90)
+
+        legend_labels = list(scenario_groups[1].keys())
+        legend_handles = [plt.Rectangle((0, 0), 1, 1,
+                                        color=scenario_groups[1][label]) for label in legend_labels]
+        ax.legend(legend_handles, legend_labels, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
+                  title='Scenario groups')
+
+        ax.grid(axis="y")
+        ax.spines['top'].set_visible(False)
+        ax.spines['right'].set_visible(False)
+        fig.tight_layout()
+
+        return fig, ax
+
+    def get_scale_up_factor(_df):
+        """
+        Return a series of yearly scale up factors for all cadres,
+        with index of year and value of list of scale up factors.
+        """
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year_of_scale_up', 'scale_up_factor']
+                      ].set_index('year_of_scale_up')
+        _df = _df['scale_up_factor'].apply(pd.Series)
+        assert (_df.columns == cadres).all()
+        _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index}
+        _df_1 = pd.DataFrame(data=_dict).T
+        return pd.Series(
+            _df_1.loc[:, 0], index=_df_1.index
+        )
+
+    def get_total_cost(_df):
+        """
+        Return a series of yearly total cost for all cadres,
+        with index of year and values of list of total cost.
+        """
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year_of_scale_up', 'total_hr_salary']
+                      ].set_index('year_of_scale_up')
+        _df = _df['total_hr_salary'].apply(pd.Series)
+        assert (_df.columns == cadres).all()
+        _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index}
+        _df_1 = pd.DataFrame(data=_dict).T
+        return pd.Series(
+            _df_1.loc[:, 0], index=_df_1.index
+        )
+
+    def get_current_hr(cadres):
+        """
+        Return current (year of 2018/2019) staff counts and capabilities for the cadres specified.
+        """
+        curr_hr_path = Path(resourcefilepath
+                            / 'healthsystem' / 'human_resources' / 'actual' / 'ResourceFile_Daily_Capabilities.csv')
+        curr_hr = pd.read_csv(curr_hr_path).groupby('Officer_Category').agg(
+            {'Staff_Count': 'sum', 'Total_Mins_Per_Day': 'sum'}).reset_index()
+        curr_hr['Total_Minutes_Per_Year'] = curr_hr['Total_Mins_Per_Day'] * 365.25
+        curr_hr.drop(['Total_Mins_Per_Day'], axis=1, inplace=True)
+        curr_hr_counts = curr_hr.loc[
+            curr_hr['Officer_Category'].isin(cadres), ['Officer_Category', 'Staff_Count']
+        ].set_index('Officer_Category').T
+        curr_hr_capabilities = curr_hr.loc[
+            curr_hr['Officer_Category'].isin(cadres), ['Officer_Category', 'Total_Minutes_Per_Year']
+        ].set_index('Officer_Category').T
+
+        return curr_hr_counts[cadres], curr_hr_capabilities[cadres]
+
+    def get_hr_salary(cadres):
+        """
+        Return annual salary for the cadres specified.
+        """
+        salary_path = Path(resourcefilepath
+                           / 'costing' / 'ResourceFile_Annual_Salary_Per_Cadre.csv')
+        salary = pd.read_csv(salary_path, index_col=False)
+        salary = salary.loc[
+            salary['Officer_Category'].isin(cadres), ['Officer_Category', 'Annual_Salary_USD']
+        ].set_index('Officer_Category').T
+        return salary[cadres]
+
+    def format_appt_time_and_cost():
+        """
+        Return the formatted appointment time requirements and costs per cadre
+        """
+        file_path = Path(resourcefilepath
+                         / 'healthsystem' / 'human_resources' / 'definitions' / 'ResourceFile_Appt_Time_Table.csv')
+        _df = pd.read_csv(file_path, index_col=False)
+
+        time = _df.pivot(index=['Facility_Level', 'Appt_Type_Code'], columns='Officer_Category',
+                         values='Time_Taken_Mins').fillna(0.0).T
+        minute_salary = Minute_Salary_by_Cadre_Level
+        cost = _df.merge(minute_salary, on=['Facility_Level', 'Officer_Category'], how='left')
+        cost['cost_USD'] = cost['Time_Taken_Mins'] * cost['Minute_Salary_USD']
+        cost = cost.pivot(index=['Facility_Level', 'Appt_Type_Code'], columns='Officer_Category',
+                          values='cost_USD').fillna(0.0).T
+
+        return time, cost
+
+    def get_frac_of_hcw_time_used(_df):
+        """Return the fraction of time used by cadre and facility level"""
+        # CNP_cols = ['date']
+        # for col in _df.columns[1:]:
+        #     if ('Clinical' in col) | ('Nursing_and_Midwifery' in col) | ('Pharmacy' in col):
+        #         CNP_cols.append(col)
+        #
+        # _df = _df[CNP_cols].copy()
+        _df = _df.loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), :]
+        _df = _df.set_index('date').mean(axis=0)  # average over years
+
+        return _df
+
+    def get_hcw_time_by_treatment():
+        appointment_time_table = pd.read_csv(
+            resourcefilepath
+            / 'healthsystem'
+            / 'human_resources'
+            / 'definitions'
+            / 'ResourceFile_Appt_Time_Table.csv',
+            index_col=["Appt_Type_Code", "Facility_Level", "Officer_Category"]
+        )
+
+        appt_type_facility_level_officer_category_to_appt_time = (
+            appointment_time_table.Time_Taken_Mins.to_dict()
+        )
+
+        officer_categories = appointment_time_table.index.levels[
+            appointment_time_table.index.names.index("Officer_Category")
+        ].to_list()
+
+        times_by_officer_category_treatment_id_per_draw_run = bin_hsi_event_details(
+            results_folder,
+            lambda event_details, count: sum(
+                [
+                    Counter({
+                        (
+                            officer_category,
+                            event_details["treatment_id"].split("_")[0]
+                        ):
+                            count
+                            * appt_number
+                            * appt_type_facility_level_officer_category_to_appt_time.get(
+                                (
+                                    appt_type,
+                                    event_details["facility_level"],
+                                    officer_category
+                                ),
+                                0
+                            )
+                        for officer_category in officer_categories
+                    })
+                    for appt_type, appt_number in event_details["appt_footprint"]
+                ],
+                Counter()
+            ),
+            *TARGET_PERIOD,
+            True
+        )
+
+        time_by_cadre_treatment_per_draw = compute_mean_across_runs(times_by_officer_category_treatment_id_per_draw_run)
+
+        # transform counter to dataframe
+        def format_time_by_cadre_treatment(_df):
+            _df.reset_index(drop=False, inplace=True)
+            for idx in _df.index:
+                _df.loc[idx, 'Cadre'] = _df.loc[idx, 'index'][0]
+                _df.loc[idx, 'Treatment'] = _df.loc[idx, 'index'][1]
+            _df = _df.drop('index', axis=1).rename(columns={0: 'value'}).pivot(
+                index='Treatment', columns='Cadre', values='value').fillna(0.0)
+
+            _series = _df.sum(axis=1)  # sum up cadres
+
+            return _df, _series
+
+        time_by_cadre_treatment_all_scenarios = {
+            f's_{key}': format_time_by_cadre_treatment(
+                pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[key], orient='index')
+            )[0] for key in range(len(param_names))
+        }
+
+        time_increased_by_cadre_treatment = {
+            key: time_by_cadre_treatment_all_scenarios[key] - time_by_cadre_treatment_all_scenarios['s_2']
+            for key in time_by_cadre_treatment_all_scenarios.keys()
+        }
+
+        time_by_treatment_all_scenarios = {
+            f's_{key}': format_time_by_cadre_treatment(
+                pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[key], orient='index')
+            )[1] for key in range(len(param_names))
+
+        }
+        time_by_treatment_all_scenarios = pd.DataFrame(time_by_treatment_all_scenarios).T
+
+        # rename index of scenario to match with real draw number
+        time_by_treatment_all_scenarios.rename(
+            index={'s_1': 's_10', 's_2': 's_11', 's_3': 's_16', 's_4': 's_22'},
+            inplace=True)
+
+        time_increased_by_treatment = time_by_treatment_all_scenarios.subtract(
+            time_by_treatment_all_scenarios.loc['s_0', :], axis=1).drop('s_0', axis=0).add_suffix('*')
+
+        return time_increased_by_treatment
+
+    # Get parameter/scenario names
+    param_names = ('s_0', 's_10', 's_11', 's_16', 's_22')
+
+    # Define cadres in order
+    cadres = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+              'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography']
+
+    # Get appointment time and cost requirement
+    appt_time, appt_cost = format_appt_time_and_cost()
+
+    # Get current (year of 2018/2019) hr counts
+    curr_hr = get_current_hr(cadres)[0]
+    curr_hr_cap = get_current_hr(cadres)[1]
+
+    # Get scale up factors for all scenarios
+    scale_up_factors = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HRScaling',
+        custom_generate_series=get_scale_up_factor,
+        do_scaling=False
+    ).pipe(set_param_names_as_column_index_level_0).stack(level=0)
+    # check that the scale up factors are all most the same between each run within each draw
+    # assert scale_up_factors.eq(scale_up_factors.iloc[:, 0], axis=0).all().all()
+    # keep scale up factors of only one run within each draw
+    scale_up_factors = scale_up_factors.iloc[:, 0].unstack().reset_index().melt(id_vars='index')
+    scale_up_factors[cadres] = scale_up_factors.value.tolist()
+    scale_up_factors.drop(columns='value', inplace=True)
+
+    # get total capabilities by cadre in the target period
+    hcw_time_capabilities = scale_up_factors.copy()
+    assert (hcw_time_capabilities.columns[2:] == curr_hr_cap.columns).all()
+    hcw_time_capabilities[hcw_time_capabilities.columns[2:]] = (
+        hcw_time_capabilities[hcw_time_capabilities.columns[2:]].mul(curr_hr_cap.values, axis=1))
+    hcw_time_capabilities = hcw_time_capabilities.groupby(by=['draw']).sum().drop(columns=['index'])  # sum up years
+
+    # # Get salary
+    # salary = get_hr_salary(cadres)
+    #
+    # # Get total cost for all scenarios
+    # total_cost = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthsystem.summary',
+    #     key='HRScaling',
+    #     custom_generate_series=get_total_cost,
+    #     do_scaling=False
+    # ).pipe(set_param_names_as_column_index_level_0).stack(level=0)
+    # total_cost = total_cost.iloc[:, 0].unstack().reset_index().melt(id_vars='index')
+    # total_cost[cadres] = total_cost.value.tolist()
+    # total_cost.drop(columns='value', inplace=True)
+    # total_cost['all_cadres'] = total_cost[[c for c in total_cost.columns if c in cadres]].sum(axis=1)
+    # total_cost.rename(columns={'index': 'year'}, inplace=True)
+    #
+    # # total cost of all expansion years
+    # total_cost_all_yrs = total_cost.groupby('draw').sum().drop(columns='year')
+    #
+    # # total extra cost of all expansion years
+    # extra_cost_all_yrs = total_cost_all_yrs.copy()
+    # for s in param_names[1:]:
+    #     extra_cost_all_yrs.loc[s, :] = total_cost_all_yrs.loc[s, :] - total_cost_all_yrs.loc['s_0', :]
+    # extra_cost_all_yrs.drop(index='s_0', inplace=True)
+    #
+    # # get staff count = total cost / salary
+    # staff_count = total_cost.copy()
+    # for c in cadres:
+    #     staff_count.loc[:, c] = total_cost.loc[:, c] / salary[c].values[0]
+    # staff_count.loc[:, 'all_cadres'] = staff_count[[c for c in staff_count.columns if c in cadres]].sum(axis=1)
+    #
+    # # get extra count = staff count - staff count of no expansion s_1
+    # # note that annual staff increase rate = scale up factor - 1
+    # extra_staff = staff_count.copy()
+    # for i in staff_count.index:
+    #     extra_staff.iloc[i, 2:] = staff_count.iloc[i, 2:] - staff_count.iloc[0, 2:]
+    #
+    # # extra_staff_2029 = extra_staff.loc[extra_staff.year == 2029, :].drop(columns='year').set_index('draw').drop(
+    # #     index='s_1'
+    # # )
+    # # staff_count_2029 = staff_count.loc[staff_count.year == 2029, :].drop(columns='year').set_index('draw')
+    #
+    # # check total cost calculated is increased as expected
+    # years = range(2019, the_target_period[1].year + 1)
+    # for s in param_names[1:]:
+    #     assert (abs(
+    #         total_cost.loc[(total_cost.year == 2029) & (total_cost.draw == s), 'all_cadres'].values[0] -
+    #         (1 + 0.042) ** len(years) * total_cost.loc[(total_cost.year == 2019) & (total_cost.draw == 's_0'),
+    #                                                    'all_cadres'].values[0]
+    #     ) < 1e6).all()
+
+    # Absolute Number of Deaths and DALYs and Services
+    num_deaths = extract_results(
+        results_folder,
+        module='tlo.methods.demography',
+        key='death',
+        custom_generate_series=get_num_deaths,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_dalys = extract_results(
+        results_folder,
+        module='tlo.methods.healthburden',
+        key='dalys_stacked',
+        custom_generate_series=get_num_dalys,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    # num_dalys_yearly = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthburden',
+    #     key='dalys_stacked',
+    #     custom_generate_series=get_num_dalys_yearly,
+    #     do_scaling=True
+    # ).pipe(set_param_names_as_column_index_level_0)
+
+    num_dalys_by_cause = extract_results(
+        results_folder,
+        module="tlo.methods.healthburden",
+        key="dalys_by_wealth_stacked_by_age_and_time",
+        custom_generate_series=get_num_dalys_by_cause,
+        do_scaling=True,
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_appts = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_appts,
+        do_scaling=True
+        ).pipe(set_param_names_as_column_index_level_0)
+
+    num_appts_by_level = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_appts_by_level,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_services = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_services,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_treatments = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_treatments,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_treatments_total = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='HSI_Event_non_blank_appt_footprint',
+        custom_generate_series=get_num_treatments_total,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_never_ran_appts = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Never_ran_HSI_Event',
+        custom_generate_series=get_num_appts,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_never_ran_appts_by_level = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Never_ran_HSI_Event',
+        custom_generate_series=get_num_appts_by_level,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    num_never_ran_services = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Never_ran_HSI_Event',
+        custom_generate_series=get_num_services,
+        do_scaling=True
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    # num_never_ran_treatments_total = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthsystem.summary',
+    #     key='Never_ran_HSI_Event',
+    #     custom_generate_series=get_num_treatments_total,
+    #     do_scaling=True
+    # ).pipe(set_param_names_as_column_index_level_0)
+
+    # num_never_ran_treatments = extract_results(
+    #     results_folder,
+    #     module='tlo.methods.healthsystem.summary',
+    #     key='Never_ran_HSI_Event',
+    #     custom_generate_series=get_num_treatments,
+    #     do_scaling=True
+    # ).pipe(set_param_names_as_column_index_level_0)
+
+    # get total service demand
+    assert len(num_services) == len(num_never_ran_services) == 1
+    assert (num_services.columns == num_never_ran_services.columns).all()
+    num_services_demand = num_services + num_never_ran_services
+    # ratio_services = num_services / num_services_demand
+
+    assert (num_appts.columns == num_never_ran_appts.columns).all()
+    num_never_ran_appts.loc['Lab / Diagnostics', :] = 0
+    num_never_ran_appts = num_never_ran_appts.reindex(num_appts.index).fillna(0.0)
+    assert (num_appts.index == num_never_ran_appts.index).all()
+    num_appts_demand = num_appts + num_never_ran_appts
+
+    hcw_time_usage = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='Capacity_By_OfficerType_And_FacilityLevel',#'Capacity',#'Capacity_By_OfficerType_And_FacilityLevel',
+        custom_generate_series=get_frac_of_hcw_time_used,
+        do_scaling=False
+    ).pipe(set_param_names_as_column_index_level_0)
+
+    # get absolute numbers for scenarios
+    # sort the scenarios according to their DALYs values, in ascending order
+    num_dalys_summarized = summarize(num_dalys).loc[0].unstack().reindex(param_names).sort_values(by='mean')
+    num_dalys_by_cause_summarized = summarize(num_dalys_by_cause, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+
+    # num_dalys_yearly_summarized = (summarize(num_dalys_yearly)
+    #                                .stack([0, 1])
+    #                                .rename_axis(['year', 'scenario', 'stat'])
+    #                                .reset_index(name='count'))
+    #
+    # num_deaths_summarized = summarize(num_deaths).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+
+    num_services_summarized = summarize(num_services).loc[0].unstack().reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_appts_summarized = summarize(num_appts, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_appts_by_level_summarized = summarize(num_appts_by_level, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index).fillna(0.0)
+    num_never_ran_appts_by_level_summarized = summarize(num_never_ran_appts_by_level, only_mean=True).T.reindex(
+        param_names).reindex(num_dalys_summarized.index).fillna(0.0)
+    num_appts_demand_summarized = summarize(num_appts_demand, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_treatments_summarized = summarize(num_treatments, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    # num_treatments_total_summarized = summarize(num_treatments_total).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+
+    num_never_ran_services_summarized = summarize(num_never_ran_services).loc[0].unstack().reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    num_never_ran_appts_summarized = summarize(num_never_ran_appts, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    # num_never_ran_treatments_summarized = summarize(num_never_ran_treatments, only_mean=True).T.reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    # num_never_ran_treatments_total_summarized = summarize(num_never_ran_treatments_total).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    num_services_demand_summarized = summarize(num_services_demand).loc[0].unstack().reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    # ratio_service_summarized = summarize(ratio_services).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    hcw_time_usage_summarized = summarize(hcw_time_usage, only_mean=True).T.reindex(param_names).reindex(
+        num_dalys_summarized.index
+    )
+    hcw_time_usage_summarized.columns = [col.replace('OfficerType=', '').replace('FacilityLevel=', '')
+                                         for col in hcw_time_usage_summarized.columns]
+    hcw_time_usage_summarized.columns = hcw_time_usage_summarized.columns.str.split(pat='|', expand=True)
+
+    # get relative numbers for scenarios, compared to no_expansion scenario: s_0
+    num_services_increased = summarize(
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_services.loc[0],
+                comparison='s_0')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    hcw_time_increased_by_treatment_type = get_hcw_time_by_treatment().reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    # num_services_increased_percent = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             num_services.loc[0],
+    #             comparison='s_1',
+    #             scaled=True)
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    num_deaths_averted = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_deaths.loc[0],
+                comparison='s_0')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    num_deaths_averted_percent = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_deaths.loc[0],
+                comparison='s_0',
+                scaled=True)
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    num_dalys_averted = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_dalys.loc[0],
+                comparison='s_0')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    num_dalys_averted_percent = summarize(
+        -1.0 *
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_dalys.loc[0],
+                comparison='s_0',
+                scaled=True
+            )
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    num_dalys_by_cause_averted = summarize(
+        -1.0 * find_difference_relative_to_comparison_dataframe(
+            num_dalys_by_cause,
+            comparison='s_0',
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    num_dalys_by_cause_averted_percent = summarize(
+        -1.0 * find_difference_relative_to_comparison_dataframe(
+            num_dalys_by_cause,
+            comparison='s_0',
+            scaled=True
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    # num_dalys_by_cause_averted_CNP = num_dalys_by_cause_averted.loc['s_2', :].sort_values(ascending=False)
+    # # num_dalys_by_cause_averted_CP = num_dalys_by_cause_averted.loc['s_11', :].sort_values(ascending=False)
+    # num_dalys_by_cause_averted_percent_CNP = num_dalys_by_cause_averted_percent.loc['s_2', :].sort_values(
+    #     ascending=False)
+    # # num_dalys_by_cause_averted_percent_CP = num_dalys_by_cause_averted_percent.loc['s_11', :].sort_values(
+    # #     ascending=False)
+
+    # num_dalys_by_cause_averted_percent = summarize(
+    #     -1.0 * find_difference_relative_to_comparison_dataframe(
+    #         num_dalys_by_cause,
+    #         comparison='s_1',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    num_appts_increased = summarize(
+        find_difference_relative_to_comparison_dataframe(
+            num_appts,
+            comparison='s_0',
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    # num_never_ran_appts_reduced = summarize(
+    #     -1.0 * find_difference_relative_to_comparison_dataframe(
+    #         num_never_ran_appts,
+    #         comparison='s_1',
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # num_never_ran_treatments_reduced = summarize(
+    #     -1.0 * find_difference_relative_to_comparison_dataframe(
+    #         num_never_ran_treatments,
+    #         comparison='s_1',
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # num_appts_increased_percent = summarize(
+    #     find_difference_relative_to_comparison_dataframe(
+    #         num_appts,
+    #         comparison='s_1',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    num_treatments_increased = summarize(
+        find_difference_relative_to_comparison_dataframe(
+            num_treatments,
+            comparison='s_0',
+        ),
+        only_mean=True
+    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    # num_treatments_increased_percent = summarize(
+    #     find_difference_relative_to_comparison_dataframe(
+    #         num_treatments,
+    #         comparison='s_1',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    num_treatments_total_increased = summarize(
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_treatments_total.loc[0],
+                comparison='s_0')
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+
+    # num_treatments_total_increased_percent = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             num_treatments_total.loc[0],
+    #             comparison='s_1',
+    #             scaled=True)
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # service_ratio_increased = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             ratio_services.loc[0],
+    #             comparison='s_1')
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # service_ratio_increased_percent = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             ratio_services.loc[0],
+    #             comparison='s_1',
+    #             scaled=True)
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+
+    # Check that when we sum across the causes/appt types,
+    # we get the same total as calculated when we didn't split by cause/appt type.
+    assert (
+        (num_appts_increased.sum(axis=1).sort_index()
+         - num_services_increased['mean'].sort_index()
+         ) < 1e-6
+    ).all()
+
+    assert (
+        (num_dalys_by_cause_averted.sum(axis=1).sort_index()
+         - num_dalys_averted['mean'].sort_index()
+         ) < 1e-6
+    ).all()
+
+    assert (
+        (num_treatments_increased.sum(axis=1).sort_index()
+         - num_treatments_total_increased['mean'].sort_index()
+         ) < 1e-6
+    ).all()
+
+    # get time used by services delivered
+    def hcw_time_or_cost_used(time_cost_df=appt_time, count_df=num_appts_by_level_summarized):
+        cols_1 = count_df.columns
+        cols_2 = time_cost_df.columns
+        # check that appts (at a level) not in appt_time (as defined) have count 0 and drop them
+        # assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all() -> ('2', 'Tomography')
+        # replace Tomography from level 2 to level 3
+        count_df.loc[:, ('3', 'Tomography')] += count_df.loc[:, ('2', 'Tomography')]
+        count_df.loc[:, ('2', 'Tomography')] = 0
+        assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all()
+        if len(list(set(cols_1) - set(cols_2))) > 0:
+            _count_df = count_df.drop(columns=list(set(cols_1) - set(cols_2)))
+        else:
+            _count_df = count_df.copy()
+        assert set(_count_df.columns).issubset(set(cols_2))
+        # calculate hcw time used
+        use = pd.DataFrame(index=_count_df.index,
+                           columns=time_cost_df.index)
+        for i in use.index:
+            for j in use.columns:
+                use.loc[i, j] = _count_df.loc[i, :].mul(
+                    time_cost_df.loc[j, _count_df.columns]
+                ).sum()
+        # reorder columns to be consistent with cadres
+        use = use[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+                   'Dental', 'Laboratory', 'Mental', 'Radiography']]
+        # reorder index to be consistent with descending order of DALYs averted
+        use = use.reindex(num_dalys_summarized.index)
+
+        # calculate time used by cadre and level
+        used_by_cadre_level = {
+            key: time_cost_df[_count_df.columns].mul(_count_df.loc[key, :].values, axis=1).rename(
+                columns={'1b': '2'}, level=0).groupby(
+                level=0, axis=1).sum().T.unstack().T for key in _count_df.index
+        }
+        used_by_cadre_level = pd.DataFrame.from_dict(used_by_cadre_level, orient='index')
+
+        return use, used_by_cadre_level
+
+    hcw_time_used = hcw_time_or_cost_used(time_cost_df=appt_time)[0]
+    hcw_time_used_increased = pd.DataFrame(
+        hcw_time_used.subtract(hcw_time_used.loc['s_0', :], axis=1).drop('s_0', axis=0)
+    )
+    hcw_time_used_by_cadre_level = hcw_time_or_cost_used(time_cost_df=appt_time)[1]
+
+    # get hcw capabilities rescaled
+    assert set(hcw_time_used_by_cadre_level.columns).issubset(set(hcw_time_usage_summarized.columns))
+    assert (hcw_time_usage_summarized[
+        list(set(hcw_time_usage_summarized.columns) - set(hcw_time_used_by_cadre_level.columns))
+    ] == 0.0).all().all()
+    hcw_time_capabilities_rescaled = (hcw_time_used_by_cadre_level /
+                                      hcw_time_usage_summarized[hcw_time_used_by_cadre_level.columns])
+    hcw_time_capabilities_rescaled = hcw_time_capabilities_rescaled.groupby(level=0, axis=1).sum()
+    hcw_time_capabilities_rescaled = hcw_time_capabilities_rescaled[hcw_time_used.columns]
+    hcw_time_capabilities_increased = pd.DataFrame(
+        hcw_time_capabilities_rescaled.subtract(
+            hcw_time_capabilities_rescaled.loc['s_0', :], axis=1).drop('s_0', axis=0)
+    )
+
+    # get hcw capabilities not rescaled
+    hcw_time_capabilities = hcw_time_capabilities.reindex(num_dalys_summarized.index).drop(columns='Nutrition')
+
+    # get HCW time and cost needed to run the never run appts
+    def hcw_time_or_cost_gap(time_cost_df=appt_time, count_df=num_never_ran_appts_by_level_summarized):
+        cols_1 = count_df.columns
+        cols_2 = time_cost_df.columns
+        # check that never ran appts (at a level) not in appt_time (as defined) have count 0 and drop them
+        assert (count_df[list(set(cols_1) - set(cols_2))] == 0).all().all()
+        if len(list(set(cols_1) - set(cols_2))) > 0:
+            _count_df = count_df.drop(columns=list(set(cols_1) - set(cols_2)))
+        else:
+            _count_df = count_df.copy()
+        assert set(_count_df.columns).issubset(set(cols_2))
+        # calculate hcw time gap
+        gap = pd.DataFrame(index=_count_df.index,
+                           columns=time_cost_df.index)
+        for i in gap.index:
+            for j in gap.columns:
+                gap.loc[i, j] = _count_df.loc[i, :].mul(
+                    time_cost_df.loc[j, _count_df.columns]
+                ).sum()
+        # reorder columns to be consistent with cadres
+        gap = gap[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
+                   'Dental', 'Laboratory', 'Mental', 'Radiography']]
+        # reorder index to be consistent with
+        gap = gap.reindex(num_dalys_summarized.index)
+
+        return gap
+
+    hcw_time_gap = hcw_time_or_cost_gap(appt_time)
+    hcw_cost_gap = hcw_time_or_cost_gap(appt_cost)
+
+    # hcw time demand to meet ran + never ran services
+    assert (hcw_time_used.index == hcw_time_gap.index).all()
+    assert (hcw_time_used.columns == hcw_time_gap.columns).all()
+    hcw_time_demand = hcw_time_used + hcw_time_gap
+    hcw_time_demand_increased = pd.DataFrame(
+        hcw_time_demand.subtract(hcw_time_demand.loc['s_0', :], axis=1).drop('s_0', axis=0)
+    )
+
+    # cost gap proportions of cadres within each scenario
+    hcw_cost_gap_percent = pd.DataFrame(index=hcw_cost_gap.index, columns=hcw_cost_gap.columns)
+    for i in hcw_cost_gap_percent.index:
+        hcw_cost_gap_percent.loc[i, :] = hcw_cost_gap.loc[i, :] / hcw_cost_gap.loc[i, :].sum()
+    # add a column of 'other' to sum up other cadres
+    hcw_cost_gap_percent['Other'] = hcw_cost_gap_percent[
+        ['Dental', 'Laboratory', 'Mental', 'Radiography']
+    ].sum(axis=1)
+
+    # # store the proportions of no expansion scenario as the "best" scenario that is to be tested
+    # hcw_cost_gap_percent_no_expansion = hcw_cost_gap_percent.loc[
+    #     's_1', ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']
+    # ].copy()  # [0.4586, 0.0272, 0.3502, 0.1476, 0.0164]
+
+    # find appts that need Clinical + Pharmacy (+ Nursing_and_Midwifery)
+    # then calculate hcw time needed for these appts (or treatments, need treatment and their appt footprint)
+    # in never run set
+    # so we can explain that expand C+P is reducing the never run appts and bring health benefits across scenarios
+    # then the next question is what proportion for C and P and any indication for better extra budget allocation
+    # so that never ran appts will be reduced and DALYs could be averted further?
+    def get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical', 'Pharmacy'], appts_count_all=num_never_ran_appts_by_level_summarized
+    ):
+        # find the appts that need all cadres in cadres_to_find
+        def find_never_ran_appts_that_need_specific_cadres():
+            appts_to_find = []
+            _common_cols = appt_time.columns.intersection(appts_count_all.columns)
+            # already checked above that columns in the latter that are not in the former have 0 count
+            for col in _common_cols:
+                if ((appt_time.loc[cadres_to_find, col] > 0).all()
+                    and (appt_time.loc[~appt_time.index.isin(cadres_to_find), col] == 0).all()):
+                    appts_to_find.append(col)
+
+            return appts_to_find
+
+        # counts and count proportions of all never ran
+        _appts = find_never_ran_appts_that_need_specific_cadres()
+        _counts = (appts_count_all[_appts].groupby(level=1, axis=1).sum()
+                   .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP).groupby(level=0, axis=1).sum()
+                   .reindex(num_dalys_summarized.index))
+        _counts_all = (appts_count_all.groupby(level=1, axis=1).sum()
+                       .rename(columns=APPT_TYPE_TO_COARSE_APPT_TYPE_MAP).groupby(level=0, axis=1).sum()
+                       .reindex(num_dalys_summarized.index))
+        assert (_counts.index == _counts_all.index).all()
+        _proportions = _counts / _counts_all[_counts.columns]
+
+        # hcw time gap and proportions
+        _time_gap = hcw_time_or_cost_gap(appt_time, appts_count_all[_appts])
+        assert (_time_gap.index == hcw_time_gap.index).all()
+        _time_gap_proportions = _time_gap / hcw_time_gap[_time_gap.columns]
+
+        # hcw cost gap and proportions
+        _cost_gap = hcw_time_or_cost_gap(appt_cost, appts_count_all[_appts])
+        assert (_cost_gap.index == hcw_cost_gap.index).all()
+        _cost_gap_proportions = _cost_gap / hcw_cost_gap[_cost_gap.columns]
+        # cost gap distribution among cadres
+        _cost_gap_percent = pd.DataFrame(index=_cost_gap.index, columns=_cost_gap.columns)
+        for i in _cost_gap_percent.index:
+            _cost_gap_percent.loc[i, :] = _cost_gap.loc[i, :] / _cost_gap.loc[i, :].sum()
+
+        # if sum up all appt types/cadres
+        _proportions_total = _counts.sum(axis=1) / _counts_all.sum(axis=1)
+        _cost_gap_proportions_total = _cost_gap.sum(axis=1) / hcw_cost_gap.sum(axis=1)
+        _time_gap_proportions_total = _time_gap.sum(axis=1) / hcw_time_gap.sum(axis=1)
+
+        return (_proportions_total, _cost_gap_proportions_total, _cost_gap, _cost_gap_percent,
+                _time_gap_proportions_total, _time_gap)
+
+    never_ran_appts_info_that_need_CNP = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical', 'Nursing_and_Midwifery', 'Pharmacy'])
+    never_ran_appts_info_that_need_CP = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical', 'Pharmacy'])
+    never_ran_appts_info_that_need_CN = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical', 'Nursing_and_Midwifery'])
+    never_ran_appts_info_that_need_NP = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Nursing_and_Midwifery', 'Pharmacy'])
+    never_ran_appts_info_that_need_C = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Clinical'])
+    never_ran_appts_info_that_need_N = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Nursing_and_Midwifery'])
+    never_ran_appts_info_that_need_P = get_never_ran_appts_info_that_need_specific_cadres(
+        cadres_to_find=['Pharmacy'])
+
+    # cost/time proportions within never ran appts, in total of all cadres
+    p_cost = pd.DataFrame(index=num_services_summarized.index)
+    p_cost['C and P and N&M'] = never_ran_appts_info_that_need_CNP[1]
+    p_cost['C and P'] = never_ran_appts_info_that_need_CP[1]
+    p_cost['C and N&M'] = never_ran_appts_info_that_need_CN[1]
+    p_cost['N&M and P'] = never_ran_appts_info_that_need_NP[1]
+    p_cost['Clinical (C)'] = never_ran_appts_info_that_need_C[1]
+    p_cost['Pharmacy (P)'] = never_ran_appts_info_that_need_P[1]
+    p_cost['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[1]
+    p_cost['Other cases'] = 1 - p_cost[p_cost.columns[0:7]].sum(axis=1)
+
+    p_time = pd.DataFrame(index=num_services_summarized.index)
+    p_time['C and P and N&M'] = never_ran_appts_info_that_need_CNP[4]
+    p_time['C and P'] = never_ran_appts_info_that_need_CP[4]
+    p_time['C and N&M'] = never_ran_appts_info_that_need_CN[4]
+    p_time['N&M and P'] = never_ran_appts_info_that_need_NP[4]
+    p_time['Clinical (C)'] = never_ran_appts_info_that_need_C[4]
+    p_time['Pharmacy (P)'] = never_ran_appts_info_that_need_P[4]
+    p_time['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[4]
+    p_time['Other cases'] = 1 - p_time[p_time.columns[0:7]].sum(axis=1)
+
+    # absolute cost/time gap within never ran appts
+    a_cost = pd.DataFrame(index=num_services_summarized.index)
+    a_cost['C and P and N&M'] = never_ran_appts_info_that_need_CNP[2].sum(axis=1)
+    a_cost['C and P'] = never_ran_appts_info_that_need_CP[2].sum(axis=1)
+    a_cost['C and N&M'] = never_ran_appts_info_that_need_CN[2].sum(axis=1)
+    a_cost['N&M and P'] = never_ran_appts_info_that_need_NP[2].sum(axis=1)
+    a_cost['Clinical (C)'] = never_ran_appts_info_that_need_C[2].sum(axis=1)
+    a_cost['Pharmacy (P)'] = never_ran_appts_info_that_need_P[2].sum(axis=1)
+    a_cost['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[2].sum(axis=1)
+    a_cost['Other cases'] = hcw_cost_gap.sum(axis=1) - a_cost.sum(axis=1)
+
+    a_time = pd.DataFrame(index=num_services_summarized.index)
+    a_time['C and P and N&M'] = never_ran_appts_info_that_need_CNP[5].sum(axis=1)
+    a_time['C and P'] = never_ran_appts_info_that_need_CP[5].sum(axis=1)
+    a_time['C and N&M'] = never_ran_appts_info_that_need_CN[5].sum(axis=1)
+    a_time['N&M and P'] = never_ran_appts_info_that_need_NP[5].sum(axis=1)
+    a_time['Clinical (C)'] = never_ran_appts_info_that_need_C[5].sum(axis=1)
+    a_time['Pharmacy (P)'] = never_ran_appts_info_that_need_P[5].sum(axis=1)
+    a_time['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[5].sum(axis=1)
+    a_time['Other cases'] = hcw_time_gap.sum(axis=1) - a_time.sum(axis=1)
+
+    # appts count proportions within never ran appts, in total of all cadres
+    p_count = pd.DataFrame(index=num_services_summarized.index)
+    p_count['C and P and N&M'] = never_ran_appts_info_that_need_CNP[0]
+    p_count['C and P'] = never_ran_appts_info_that_need_CP[0]
+    p_count['C and N&M'] = never_ran_appts_info_that_need_CN[0]
+    p_count['N&M and P'] = never_ran_appts_info_that_need_NP[0]
+    p_count['Clinical (C)'] = never_ran_appts_info_that_need_C[0]
+    p_count['Pharmacy (P)'] = never_ran_appts_info_that_need_P[0]
+    p_count['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[0]
+    p_count['Other cases'] = 1 - p_count[p_count.columns[0:7]].sum(axis=1)
+
+    # define color for the cadres combinations above
+    cadre_comb_color = {
+        'C and P and N&M': 'royalblue',
+        'C and P': 'turquoise',
+        'C and N&M': 'gold',
+        'N&M and P': 'yellowgreen',
+        'Clinical (C)': 'mediumpurple',
+        'Pharmacy (P)': 'limegreen',
+        'Nursing_and_Midwifery (N&M)': 'pink',
+        'Other cases': 'gray',
+    }
+
+    # Checked that Number_By_Appt_Type_Code and Number_By_Appt_Type_Code_And_Level have not exactly same results
+
+    # hcw time by cadre and treatment: draw = 22: C + N + P vs no expansion, draw = 11, C + P vs no expansion
+    # time_increased_by_cadre_treatment_CNP = get_hcw_time_by_treatment(21)
+    # time_increased_by_cadre_treatment_CP = get_hcw_time_by_treatment(10)
+
+    # # get Return (in terms of DALYs averted) On Investment (extra cost) for all expansion scenarios, excluding s_1
+    # # get Cost-Effectiveness, i.e., cost of every daly averted, for all expansion scenarios
+    # ROI = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
+    # # todo: for the bad scenarios (s_5, s_8, s_15), the dalys averted are negative
+    # #  (maybe only due to statistical variation; relative difference to s_1 are close to 0%),
+    # #  thus CE does not make sense.
+    # # CE = pd.DataFrame(index=num_dalys_averted.index, columns=num_dalys_averted.columns)
+    # for i in ROI.index:
+    #     ROI.loc[i, :] = num_dalys_averted.loc[i, :] / extra_cost_all_yrs.loc[i, 'all_cadres']
+    # #     CE.loc[i, 'mean'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'mean']
+    # #     CE.loc[i, 'lower'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'upper']
+    # #     CE.loc[i, 'upper'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'lower']
+
+    # prepare colors for plots
+    appt_color = {
+        appt: COARSE_APPT_TYPE_TO_COLOR_MAP.get(appt, np.nan) for appt in num_appts_summarized.columns
+    }
+    treatment_color = {
+        treatment: SHORT_TREATMENT_ID_TO_COLOR_MAP.get(treatment, np.nan)
+        for treatment in num_treatments_summarized.columns
+    }
+    cause_color = {
+        cause: CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP.get(cause, np.nan)
+        for cause in num_dalys_by_cause_summarized.columns
+    }
+    officer_category_color = {
+        'Clinical': 'blue',
+        'DCSA': 'orange',
+        'Nursing_and_Midwifery': 'red',
+        'Pharmacy': 'green',
+        'Dental': 'purple',
+        'Laboratory': 'orchid',
+        'Mental': 'plum',
+        'Nutrition': 'thistle',
+        'Radiography': 'lightgray',
+        'Other': 'gray'
+    }
+    # get scenario color
+    # scenario_groups = scenario_grouping_coloring(by='effect')
+    scenario_groups = scenario_grouping_coloring(by='expansion')
+    scenario_color = {}
+    for s in param_names:
+        for k in scenario_groups[1].keys():
+            if s in scenario_groups[0][k]:
+                scenario_color[s] = scenario_groups[1][k]
+
+    # representative_scenarios_color = {}
+    # cmap_list = list(map(plt.get_cmap("Set3"), range(len(param_names))))
+    # for i in range(len(param_names)):
+    #     representative_scenarios_color[num_dalys_summarized.index[i]] = cmap_list[i]
+
+    # plot 4D data: relative increases of Clinical, Pharmacy, and Nursing_and_Midwifery as three coordinates,\
+    # percentage of DALYs averted decides the color of that scatter point
+    extra_budget_allocation = extra_budget_fracs.T.reindex(num_dalys_summarized.index)
+    extra_budget_allocation['Other'] = extra_budget_allocation[
+        ['Dental', 'Laboratory', 'Mental', 'Radiography']
+    ].sum(axis=1)
+    name_of_plot = f'3D DALYs averted (%) vs no extra budget allocation, {target_period()}'
+    heat_data = pd.merge(num_dalys_averted_percent['mean'],
+                         extra_budget_allocation[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
+                         left_index=True, right_index=True, how='inner')
+    # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    colors = [scenario_color[s] for s in heat_data.index]
+    fig = plt.figure()
+    ax = fig.add_subplot(111, projection='3d')
+    ax.scatter(heat_data['Clinical'], heat_data['Pharmacy'], heat_data['Nursing_and_Midwifery'],
+               alpha=0.8, marker='o', s=heat_data['mean'] * 2000,
+               #c=heat_data['mean'] * 100, cmap='viridis',
+               c=colors)
+    # plot lines from the best point to three axes panes
+    # ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]],
+    #           [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]],
+    #           [0, heat_data['Nursing_and_Midwifery'][0]],
+    #           linestyle='--', color='gray', alpha=0.8)
+    # ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]],
+    #           [0, heat_data['Pharmacy'][0]],
+    #           [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]],
+    #           linestyle='--', color='gray', alpha=0.8)
+    # ax.plot3D([0, heat_data['Clinical'][0]],
+    #           [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]],
+    #           [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]],
+    #           linestyle='--', color='gray', alpha=0.8)
+    ax.set_xlabel('Fraction of extra budget allocated to \nClinical cadre (C)')
+    ax.set_ylabel('Pharmacy cadre (P)')
+    #ax.invert_xaxis()
+    ax.invert_yaxis()
+    ax.set_zlabel('Nursing and Midwifery (N&M)')
+    ax.plot3D([0, 1], [0, 1], [0, 1], linestyle='-', color='orange', alpha=1.0, linewidth=2)
+    legend_labels = list(scenario_groups[1].keys()) + ['line of C = P = N&M']
+    legend_handles = [plt.Line2D([0, 0], [0, 0],
+                                 linestyle='none', marker='o', color=scenario_groups[1][label]
+                                 ) for label in legend_labels[0:len(legend_labels) - 1]
+                      ] + [plt.Line2D([0, 1], [0, 0], linestyle='-', color='orange')]
+    plt.legend(legend_handles, legend_labels,
+               loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+               title='Scenario groups')
+    # plt.colorbar(img, orientation='horizontal', fraction=0.046, pad=0.25)
+    plt.title(name_of_plot)
+    plt.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # name_of_plot = f'3D DALYs averted, Services increased and Treatment increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+    #                        num_treatments_total_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_0, 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig = plt.figure()
+    # ax = fig.add_subplot(111, projection='3d')
+    # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 2], 100 * heat_data.iloc[:, 0],
+    #            alpha=0.8, marker='o',
+    #            c=colors)
+    # ax.set_xlabel('Services increased %')
+    # ax.set_ylabel('Treatments increased %')
+    # ax.set_zlabel('DALYs averted %')
+    # legend_labels = list(scenario_groups[1].keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels,
+    #            loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+    #            title='Scenario groups')
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'2D DALYs averted, Services increased and Treatment increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+    #                        num_treatments_total_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig, ax = plt.subplots()
+    # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 2],
+    #            alpha=0.8, marker='o', s=2000 * heat_data.iloc[:, 0],
+    #            c=colors)
+    # ax.set_xlabel('Services increased %')
+    # ax.set_ylabel('Treatments increased %')
+    # legend_labels = list(scenario_groups[1].keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels,
+    #            loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+    #            title='Scenario groups')
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs averted and Services increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+    #                        num_treatments_total_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig, ax = plt.subplots()
+    # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0],
+    #            alpha=0.8, marker='o', c=colors)
+    # ax.set_xlabel('Services increased %')
+    # ax.set_ylabel('DALYs averted %')
+    # legend_labels = list(scenario_groups[1].keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels,
+    #            loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+    #            title='Scenario groups')
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs averted and Treatments increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], num_services_increased_percent['mean'],
+    #                        num_treatments_total_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig, ax = plt.subplots()
+    # ax.scatter(100 * heat_data.iloc[:, 2], 100 * heat_data.iloc[:, 0],
+    #            alpha=0.8, marker='o', c=colors)
+    # ax.set_xlabel('Treatments increased %')
+    # ax.set_ylabel('DALYs averted %')
+    # legend_labels = list(scenario_groups[1].keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs averted and Services ratio increased, {target_period()}'
+    # heat_data = pd.concat([num_dalys_averted_percent['mean'], service_ratio_increased_percent['mean']], axis=1)
+    # # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
+    # # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
+    # colors = [scenario_color[s] for s in heat_data.index]
+    # fig, ax = plt.subplots()
+    # ax.scatter(100 * heat_data.iloc[:, 1], 100 * heat_data.iloc[:, 0],
+    #            alpha=0.8, marker='o', c=colors)
+    # ax.set_xlabel('Service delivery ratio increased %')
+    # ax.set_ylabel('DALYs averted %')
+    # legend_labels = list(scenario_groups[1].keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels, loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2)
+    # plt.title(name_of_plot)
+    # plt.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # # do some linear regression to see the marginal effects of individual cadres and combined effects of C, N, P cadres
+    # outcome_data = num_dalys_averted_percent['mean']
+    # # outcome = num_services_increased_percent['mean']
+    # # outcome = num_treatments_total_increased_percent['mean']
+    # regression_data = pd.merge(outcome_data,
+    #                            extra_budget_allocation,
+    #                            left_index=True, right_index=True, how='inner')
+    # regression_data['C*P'] = regression_data['Clinical'] * regression_data['Pharmacy']
+    # regression_data['C*N'] = regression_data['Clinical'] * regression_data['Nursing_and_Midwifery']
+    # regression_data['N*P'] = regression_data['Pharmacy'] * regression_data['Nursing_and_Midwifery']
+    # regression_data['C*N*P'] = (regression_data['Clinical'] * regression_data['Pharmacy']
+    #                             * regression_data['Nursing_and_Midwifery'])
+    # cadres_to_drop_due_to_multicollinearity = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography', 'Other']
+    # regression_data.drop(columns=cadres_to_drop_due_to_multicollinearity, inplace=True)
+    # predictor = regression_data[regression_data.columns[1:]]
+    # outcome = regression_data['mean']
+    # predictor = sm.add_constant(predictor)
+    # est = sm.OLS(outcome.astype(float), predictor.astype(float)).fit()
+    # print(est.summary())
+
+    # todo: could do regression analysis of DALYs averted and Services increased
+
+    # # do anova analysis to test the difference of scenario groups
+    # def anova_oneway(df=num_dalys_averted_percent):
+    #     best = df.loc[list(scenario_groups['C + P + D/N&M/O/None']), 'mean']
+    #     middle_C = df.loc[list(scenario_groups['C + D/N&M/O/None']), 'mean']
+    #     middle_P = df.loc[list(scenario_groups['P + D/N&M/O/None']), 'mean']
+    #     worst = df.loc[df.index.isin(scenario_groups['D/N&M/O/None']), 'mean']
+    #
+    #     return ss.oneway.anova_oneway((best, middle_C, middle_P, worst),
+    #                                   groups=None, use_var='unequal', welch_correction=True, trim_frac=0)
+
+    # anova_dalys = anova_oneway()
+    # anova_services = anova_oneway(num_services_increased_percent)
+    # anova_treatments = anova_oneway(num_treatments_total_increased_percent)
+
+    # plot absolute numbers for scenarios
+
+    # name_of_plot = f'Deaths, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(num_deaths_summarized / 1e6)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Millions)')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(num_dalys_summarized / 1e6)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Millions)')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Service demand, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(num_service_demand_summarized / 1e6)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Millions)')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Service delivery ratio, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(ratio_service_summarized)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('services delivered / demand')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # # plot yearly DALYs for best 9 scenarios
+    # name_of_plot = f'Yearly DALYs, {target_period()}'
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # best_scenarios = list(num_dalys_summarized.index[0:9]) + ['s_1']
+    # for s in best_scenarios:
+    #     data = (num_dalys_yearly_summarized.loc[num_dalys_yearly_summarized.scenario == s, :]
+    #             .drop(columns='scenario')
+    #             .pivot(index='year', columns='stat')
+    #             .droplevel(0, axis=1))
+    #     ax.plot(data.index, data['mean'] / 1e6, label=substitute_labels[s], color=best_scenarios_color[s], linewidth=2)
+    #     # ax.fill_between(data.index.to_numpy(),
+    #     #                 (data['lower'] / 1e6).to_numpy(),
+    #     #                 (data['upper'] / 1e6).to_numpy(),
+    #     #                 color=best_scenarios_color[s],
+    #     #                 alpha=0.2)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Millions)')
+    # ax.set_xticks(data.index)
+    # legend_labels = [substitute_labels[v] for v in best_scenarios]
+    # legend_handles = [plt.Rectangle((0, 0), 1, 1,
+    #                                 color=best_scenarios_color[v]) for v in best_scenarios]
+    # ax.legend(legend_handles, legend_labels,
+    #           loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
+    #           title='Best scenario group')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # # plot yearly staff count (Clinical/Pharmacy/Nursing and Midwifery) for best 9 scenarios
+    # best_cadres = ['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']
+    # name_of_plot = f'Yearly staff count for C+P+N total, {target_period()}'
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # best_scenarios = list(num_dalys_summarized.index[0:9]) + ['s_1']
+    # for s in best_scenarios:
+    #     data = staff_count.loc[staff_count.draw == s].set_index('year').drop(columns='draw').loc[:, best_cadres].sum(
+    #         axis=1)
+    #     ax.plot(data.index, data.values / 1e3, label=substitute_labels[s], color=best_scenarios_color[s])
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('(Thousands)')
+    # ax.set_xticks(data.index)
+    # legend_labels = [substitute_labels[v] for v in best_scenarios]
+    # legend_handles = [plt.Rectangle((0, 0), 1, 1,
+    #                                 color=best_scenarios_color[v]) for v in best_scenarios]
+    # ax.legend(legend_handles, legend_labels,
+    #           loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
+    #           title='Best scenario group')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Services by appointment type, {target_period()}'
+    # num_appts_summarized_in_millions = num_appts_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_services_summarized['mean'] - num_services_summarized['lower']).values,
+    #     (num_services_summarized['upper'] - num_services_summarized['mean']).values,
+    # ])/1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_services_summarized['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_appts_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Services demand by appointment type, {target_period()}'
+    # num_appts_demand_to_plot = num_appts_demand_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_service_demand_summarized['mean'] - num_service_demand_summarized['lower']).values,
+    #     (num_service_demand_summarized['upper'] - num_service_demand_summarized['mean']).values,
+    # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_appts_demand_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_service_demand_summarized['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_appts_demand_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Never ran services by appointment type, {target_period()}'
+    # num_never_ran_appts_summarized_in_millions = num_never_ran_appts_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_never_ran_services_summarized['mean'] - num_never_ran_services_summarized['lower']).values,
+    #     (num_never_ran_services_summarized['upper'] - num_never_ran_services_summarized['mean']).values,
+    # ])/1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_never_ran_appts_summarized_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_never_ran_services_summarized['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in num_never_ran_appts_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Total services demand by appointment type, {target_period()}'
+    # data_to_plot = num_appts_demand_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_services_demand_summarized['mean'] - num_services_demand_summarized['lower']).values,
+    #     (num_services_demand_summarized['upper'] - num_services_demand_summarized['mean']).values,
+    # ])/1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # data_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_services_demand_summarized['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Services by treatment type, {target_period()}'
+    # num_treatments_summarized_in_millions = num_treatments_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_treatments_total_summarized['mean'] - num_treatments_total_summarized['lower']).values,
+    #     (num_treatments_total_summarized['upper'] - num_treatments_total_summarized['mean']).values,
+    # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(10, 6))
+    # num_treatments_summarized_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_treatments_total_summarized['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_treatments_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Never ran services by treatment type, {target_period()}'
+    # num_never_ran_treatments_summarized_in_millions = num_never_ran_treatments_summarized / 1e6
+    # yerr_services = np.array([
+    #     (num_never_ran_treatments_total_summarized['mean'] - num_never_ran_treatments_total_summarized['lower']).values,
+    #     (num_never_ran_treatments_total_summarized['upper'] - num_never_ran_treatments_total_summarized['mean']).values,
+    # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(10, 6))
+    # num_never_ran_treatments_summarized_in_millions.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_never_ran_treatments_total_summarized['mean'].values / 1e6,
+    #             yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_never_ran_treatments_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Number of staff by cadre, {TARGET_PERIOD[1].year}'
+    # total_staff_to_plot = (staff_count_2029 / 1000).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
+    # column_dcsa = total_staff_to_plot.pop('DCSA')
+    # total_staff_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # total_staff_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Thousands', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in total_staff_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'HCW time used by cadre in delivering services , {target_period()}'
+    # data_to_plot = (hcw_time_used / 1e6).reindex(num_dalys_summarized.index)
+    # column_dcsa = data_to_plot.pop('DCSA')
+    # data_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Minutes in Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'HCW time needed to deliver never ran appointments, {target_period()}'
+    # hcw_time_gap_to_plot = (hcw_time_gap / 1e6).reindex(num_dalys_summarized.index)
+    # column_dcsa = hcw_time_gap_to_plot.pop('DCSA')
+    # hcw_time_gap_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # hcw_time_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Minutes in Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in hcw_time_gap_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    name_of_plot = f'HCW time needed to deliver ran + never ran appointments, {target_period()}'
+    hcw_time_gap_to_plot = (hcw_time_demand / 1e9).reindex(num_dalys_summarized.index)
+    column_dcsa = hcw_time_gap_to_plot.pop('DCSA')
+    hcw_time_gap_to_plot.insert(3, "DCSA", column_dcsa)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    hcw_time_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Billion minutes', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in hcw_time_gap_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # name_of_plot = f'HCW cost needed by cadre to deliver never ran appointments, {target_period()}'
+    # hcw_cost_gap_to_plot = (hcw_cost_gap / 1e6).reindex(num_dalys_summarized.index)
+    # column_dcsa = hcw_cost_gap_to_plot.pop('DCSA')
+    # hcw_cost_gap_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # hcw_cost_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('USD in Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in hcw_cost_gap_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+    #
+    # name_of_plot = f'Count proportions of never ran appointments that require specific cadres only, {target_period()}'
+    # data_to_plot = p_count * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    # ax.set_ylim(0, 100)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # # plot the average proportions of all scenarios
+    # # for c in data_to_plot.columns:
+    # #     plt.axhline(y=data_to_plot[c].mean(),
+    # #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    # #                 label=c)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+    #
+    # name_of_plot = f'Cost proportions of never ran appointments that require specific cadres only, {target_period()}'
+    # data_to_plot = p_cost * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    # ax.set_ylim(0, 100)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # # plot the average proportions of all scenarios
+    # # for c in data_to_plot.columns:
+    # #     plt.axhline(y=data_to_plot[c].mean(),
+    # #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    # #                 label=c)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Time proportions of never ran appointments that require specific cadres only, {target_period()}'
+    # data_to_plot = p_time * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    # ax.set_ylim(0, 100)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # # plot the average proportions of all scenarios
+    # # for c in data_to_plot.columns:
+    # #     plt.axhline(y=data_to_plot[c].mean(),
+    # #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    # #                 label=c)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+    #
+    # name_of_plot = f'Cost distribution of never ran appointments that require specific cadres only, {target_period()}'
+    # data_to_plot = a_cost / 1e6
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    # ax.set_ylabel('USD in millions')
+    # ax.set_xlabel('Extra budget allocation scenario')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # # plot the average cost of all scenarios
+    # # for c in data_to_plot.columns:
+    # #     plt.axhline(y=data_to_plot[c].mean(),
+    # #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    # #                 label=c)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Time distribution of never ran appointments that require specific cadres only, {target_period()}'
+    # data_to_plot = a_time / 1e6
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
+    # ax.set_ylabel('minutes in millions')
+    # ax.set_xlabel('Extra budget allocation scenario')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
+    # # # plot the average cost of all scenarios
+    # # for c in data_to_plot.columns:
+    # #     plt.axhline(y=data_to_plot[c].mean(),
+    # #                 linestyle='--', color=cadre_comb_color[c], alpha=1.0, linewidth=2,
+    # #                 label=c)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+    #
+    # name_of_plot = f'HCW cost gap by cadre distribution of never ran appointments, {target_period()}'
+    # cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other']
+    # hcw_cost_gap_percent_to_plot = hcw_cost_gap_percent[cadres_to_plot] * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # hcw_cost_gap_percent_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    # #ax.set_ylim(0, 100)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in hcw_cost_gap_percent_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    # # plot the average proportions of all scenarios
+    # for c in cadres_to_plot:
+    #     plt.axhline(y=hcw_cost_gap_percent_to_plot[c].mean(),
+    #                 linestyle='--', color=officer_category_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'HCW cost gap distribution of never ran appointments that require CNP only, {target_period()}'
+    # cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy']
+    # data_to_plot = never_ran_appts_info_that_need_CNP[3][cadres_to_plot] * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    # #ax.set_ylim(0, 100)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    # # plot the average proportions of all scenarios
+    # for c in cadres_to_plot:
+    #     plt.axhline(y=data_to_plot[c].mean(),
+    #                 linestyle='--', color=officer_category_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    name_of_plot = f'Average fractions of HCW time used (CNP, level 1a), {target_period()}'
+    data_to_plot = hcw_time_usage_summarized.xs('1a', axis=1, level=1, drop_level=True) * 100
+    fig, ax = plt.subplots(figsize=(12, 8))
+    data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    #ax.set_ylim(0, 100)
+    ax.set_ylabel('Percentage %')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Average fractions of HCW time used (CNP, level 2), {target_period()}'
+    data_to_plot = hcw_time_usage_summarized.xs('2', axis=1, level=1, drop_level=True) * 100
+    fig, ax = plt.subplots(figsize=(12, 8))
+    data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    # ax.set_ylim(0, 100)
+    ax.set_ylabel('Percentage %')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90)
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # name_of_plot = f'Extra budget allocation among cadres, {target_period()}'
+    # cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other']
+    # extra_budget_allocation_to_plot = extra_budget_allocation[cadres_to_plot] * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # extra_budget_allocation_to_plot.plot(kind='bar', color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in extra_budget_allocation_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Total budget in USD dollars by cadre, {target_period()}'
+    # total_cost_to_plot = (total_cost_all_yrs / 1e6).drop(columns='all_cadres').reindex(num_dalys_summarized.index)
+    # column_dcsa = total_cost_to_plot.pop('DCSA')
+    # total_cost_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # total_cost_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in total_cost_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs by cause, {target_period()}'
+    # num_dalys_by_cause_summarized_in_millions = num_dalys_by_cause_summarized / 1e6
+    # yerr_dalys = np.array([
+    #     (num_dalys_summarized['mean'] - num_dalys_summarized['lower']).values,
+    #     (num_dalys_summarized['upper'] - num_dalys_summarized['mean']).values,
+    # ])/1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_dalys_by_cause_summarized_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)), num_dalys_summarized['mean'].values / 1e6, yerr=yerr_dalys,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set(xlabel=None)
+    # xtick_labels = [substitute_labels[v] for v in num_dalys_by_cause_summarized_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # fig.subplots_adjust(right=0.7)
+    # ax.legend(
+    #     loc="center left",
+    #     bbox_to_anchor=(0.750, 0.6),
+    #     bbox_transform=fig.transFigure,
+    #     title='Cause of death or injury',
+    #     title_fontsize='x-small',
+    #     fontsize='x-small',
+    #     reverse=True,
+    #     ncol=1
+    # )
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # plot relative numbers for scenarios
+    name_of_plot = f'DALYs averted vs no extra budget allocation, {target_period()}'
+    fig, ax = do_bar_plot_with_ci(num_dalys_averted / 1e6, num_dalys_averted_percent, annotation=True)
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('Millions')
+    ax.set_xlabel('Extra budget allocation scenario')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'Deaths averted vs no extra budget allocation, {target_period()}'
+    fig, ax = do_bar_plot_with_ci(num_deaths_averted / 1e6, num_deaths_averted_percent, annotation=True)
+    ax.set_title(name_of_plot)
+    ax.set_ylabel('Millions')
+    ax.set_xlabel('Extra budget allocation scenario')
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # todo: plot Deaths averted by cause
+
+    # name_of_plot = f'Service delivery ratio against no expansion, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(service_ratio_increased * 100, service_ratio_increased_percent, annotation=True)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('Percentage')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Extra staff by cadre against no expansion, {TARGET_PERIOD[1].year}'
+    # extra_staff_by_cadre_to_plot = extra_staff_2029.drop(columns='all_cadres').reindex(
+    #     num_dalys_summarized.index).drop(['s_1']) / 1e3
+    # column_dcsa = extra_staff_by_cadre_to_plot.pop('DCSA')
+    # extra_staff_by_cadre_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # extra_staff_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Thousands', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in extra_staff_by_cadre_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Extra budget by cadre vs no extra budget allocation, {target_period()}'
+    # extra_cost_by_cadre_to_plot = extra_cost_all_yrs.drop(columns='all_cadres').reindex(
+    #     num_dalys_summarized.index).drop(index='s_0') / 1e6
+    # column_dcsa = extra_cost_by_cadre_to_plot.pop('DCSA')
+    # extra_cost_by_cadre_to_plot.insert(3, "DCSA", column_dcsa)
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # extra_cost_by_cadre_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in extra_cost_by_cadre_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # # name_of_plot = f'Time used increased by cadre and treatment: C + N&M + P vs no expansion, {target_period()}'
+    # # data_to_plot = time_increased_by_cadre_treatment_CNP / 1e6
+    # name_of_plot = f'Time used increased by cadre and treatment: C + P vs no expansion, {target_period()}'
+    # data_to_plot = time_increased_by_cadre_treatment_CP / 1e6
+    # data_to_plot['total'] = data_to_plot.sum(axis=1)
+    # data_to_plot.sort_values(by='total', inplace=True, ascending=False)
+    # data_to_plot.drop('total', axis=1, inplace=True)
+    # data_to_plot = data_to_plot[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery',
+    #                              'DCSA', 'Laboratory', 'Mental', 'Radiography']]
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions Minutes')
+    # ax.set_xlabel('Treatment')
+    # ax.set_xticklabels(data_to_plot.index, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+    #     ':', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Time used increased by treatment and cadre: C + N&M + P vs no expansion, {target_period()}'
+    # # name_of_plot = f'Time used increased by treatment and cadre: C + P vs no expansion, {target_period()}'
+    # data_to_plot = data_to_plot.T
+    # data_to_plot = data_to_plot.add_suffix('*')
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # ax.set_ylabel('Millions Minutes')
+    # ax.set_xlabel('Treatment')
+    # ax.set_xticklabels(data_to_plot.index, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Treatment', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+    #     ':', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'DALYs by cause averted: \nall cadres gap allocation vs no extra budget allocation, {target_period()}'
+    # data_to_plot = num_dalys_by_cause_averted_CNP / 1e6
+    # # name_of_plot = f'DALYs by cause averted: C + P vs no expansion, {target_period()}'
+    # # data_to_plot = num_dalys_by_cause_averted_CP / 1e6
+    # fig, ax = plt.subplots()
+    # data_to_plot.plot.bar(ax=ax, x=data_to_plot.index, y=data_to_plot.values)
+    # ax.set_ylabel('Millions')
+    # ax.set_xlabel('Treatment')
+    # ax.set_xticklabels(data_to_plot.index, rotation=90)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+    #     ':', '').replace('\n', '')))
+    # fig.show()
+    # plt.close(fig)
+    #
+    # name_of_plot = f'DALYs by cause averted %: \nall cadres gap allocation vs no extra budget allocation, {target_period()}'
+    # data_to_plot = num_dalys_by_cause_averted_percent_CNP * 100
+    # fig, ax = plt.subplots()
+    # data_to_plot.plot.bar(ax=ax, x=data_to_plot.index, y=data_to_plot.values)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Treatment')
+    # ax.set_xticklabels(data_to_plot.index, rotation=90)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '').replace(
+    #     ':', '').replace('\n', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Services increased by appointment type \nagainst no expansion, {target_period()}'
+    # num_appts_increased_in_millions = num_appts_increased / 1e6
+    # yerr_services = np.array([
+    #     (num_services_increased['mean'] - num_services_increased['lower']).values,
+    #     (num_services_increased['upper'] - num_services_increased['mean']).values,
+    # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_appts_increased_in_millions.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # ax.errorbar(range(len(param_names)-1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in num_appts_increased_in_millions.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(
+    #     name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    # )
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Never ran services reduced by appointment type \nagainst no expansion, {target_period()}'
+    # num_never_ran_appts_reduced_to_plot = num_never_ran_appts_reduced / 1e6
+    # # yerr_services = np.array([
+    # #     (num_services_increased['mean'] - num_services_increased['lower']).values,
+    # #     (num_services_increased['upper'] - num_services_increased['mean']).values,
+    # # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_never_ran_appts_reduced_to_plot.plot(kind='bar', stacked=True, color=appt_color, rot=0, ax=ax)
+    # # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
+    # #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in num_never_ran_appts_reduced_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Appointment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(
+    #     name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    # )
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Never ran services reduced by treatment type \nagainst no expansion, {target_period()}'
+    # num_never_ran_treatments_reduced_to_plot = num_never_ran_treatments_reduced / 1e6
+    # # yerr_services = np.array([
+    # #     (num_services_increased['mean'] - num_services_increased['lower']).values,
+    # #     (num_services_increased['upper'] - num_services_increased['mean']).values,
+    # # ]) / 1e6
+    # fig, ax = plt.subplots(figsize=(9, 6))
+    # num_never_ran_treatments_reduced_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    # # ax.errorbar(range(len(param_names) - 1), num_services_increased['mean'].values / 1e6, yerr=yerr_services,
+    # #             fmt=".", color="black", zorder=100)
+    # ax.set_ylabel('Millions', fontsize='small')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in num_never_ran_treatments_reduced_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Treatment type', title_fontsize='small',
+    #            fontsize='small', reverse=True)
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(
+    #     name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    # )
+    # fig.show()
+    # plt.close(fig)
+
+    name_of_plot = f'Services increased by treatment type \nvs no extra budget allocation, {target_period()}'
+    data_to_plot = num_treatments_increased / 1e6
+    yerr_services = np.array([
+        (num_treatments_total_increased['mean'] - num_treatments_total_increased['lower']).values,
+        (num_treatments_total_increased['upper'] - num_treatments_total_increased['mean']).values,
+    ]) / 1e6
+    fig, ax = plt.subplots(figsize=(10, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    ax.errorbar(range(len(param_names)-1), num_treatments_total_increased['mean'].values / 1e6, yerr=yerr_services,
+                fmt=".", color="black", zorder=100)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW time-used increased by treatment type \nvs no extra budget allocation, {target_period()}'
+    data_to_plot = hcw_time_increased_by_treatment_type / 1e9
+    fig, ax = plt.subplots(figsize=(10, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
+    ax.set_ylabel('Billion minutes', fontsize='small')
+    ax.set(xlabel=None)
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW time-used increased by cadre \nvs no extra budget allocation, {target_period()}'
+    data_to_plot = hcw_time_used_increased / 1e9
+    column_dcsa = data_to_plot.pop('DCSA')
+    data_to_plot.insert(3, "DCSA", column_dcsa)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Billion minutes', fontsize='small')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW capabilities increased by cadre \nvs no extra budget allocation, {target_period()}'
+    data_to_plot = hcw_time_capabilities_increased / 1e9
+    column_dcsa = data_to_plot.pop('DCSA')
+    data_to_plot.insert(3, "DCSA", column_dcsa)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
+    ax.set_ylabel('Billion minutes', fontsize='small')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
+               fontsize='small', reverse=True)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'HCW time - used, needed, capabilities rescaled, capabilities - by cadre \nvs no extra budget allocation, {target_period()}'
+    # name_of_plot = f'HCW time - used, needed - by cadre \nvs no extra budget allocation, {target_period()}'
+    assert (hcw_time_used.index == hcw_time_capabilities_rescaled.index).all().all()
+    assert (hcw_time_used.index == hcw_time_demand.index).all().all()
+    assert (hcw_time_used.index == hcw_time_capabilities.index).all().all()
+    assert (hcw_time_used.columns == hcw_time_capabilities_rescaled.columns).all().all()
+    assert (hcw_time_used.columns == hcw_time_demand.columns).all().all()
+    assert (hcw_time_used.columns == hcw_time_capabilities.columns).all().all()
+    use_to_plot = hcw_time_used / 1e9
+    cap_to_plot = hcw_time_capabilities / 1e9
+    cap_rescaled_to_plot = hcw_time_capabilities_rescaled / 1e9
+    demand_to_plot = hcw_time_demand / 1e9
+    fig, ax = plt.subplots(figsize=(8, 5))
+    use_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, alpha=1.0, position=3,
+                     width=0.15, edgecolor='dimgrey', rot=0, ax=ax)
+    cap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, alpha=0.3, position=0,
+                     width=0.15, edgecolor='dimgrey', rot=0, ax=ax)
+    cap_rescaled_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, alpha=0.6, position=1,
+                              width=0.15, edgecolor='dimgrey', rot=0,  ax=ax)
+    demand_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, alpha=0.8, position=2,
+                        width=0.15, edgecolor='dimgrey', rot=0, ax=ax)
+    ax.set_xlim(right=len(use_to_plot) - 0.45)
+    ax.set_ylabel('Billion minutes', fontsize='small')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in use_to_plot.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    legend_1 = plt.legend(use_to_plot.columns, loc='center left', bbox_to_anchor=(1.0, 0.5), fontsize='small',
+                          title='Officer category', title_fontsize='small', reverse=True)
+    fig.add_artist(legend_1)
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(
+        name_of_plot.replace(' ', '_').replace(',', '').replace('\n', ''))
+    )
+    fig.show()
+    plt.close(fig)
+
+    name_of_plot = f'DALYs by cause averted vs no extra budget allocation, {target_period()}'
+    num_dalys_by_cause_averted_in_millions = num_dalys_by_cause_averted / 1e6
+    yerr_dalys = np.array([
+        (num_dalys_averted['mean'] - num_dalys_averted['lower']).values,
+        (num_dalys_averted['upper'] - num_dalys_averted['mean']).values,
+    ]) / 1e6
+    fig, ax = plt.subplots(figsize=(9, 6))
+    num_dalys_by_cause_averted_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax)
+    ax.errorbar(range(len(param_names)-1), num_dalys_averted['mean'].values / 1e6, yerr=yerr_dalys,
+                fmt=".", color="black", zorder=100)
+    ax.set_ylabel('Millions', fontsize='small')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    xtick_labels = [substitute_labels[v] for v in num_dalys_by_cause_averted.index]
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    fig.subplots_adjust(right=0.7)
+    ax.legend(
+        loc="center left",
+        bbox_to_anchor=(0.750, 0.6),
+        bbox_transform=fig.transFigure,
+        title='Cause of death or injury',
+        title_fontsize='x-small',
+        fontsize='x-small',
+        ncol=1,
+        reverse=True
+    )
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
+    # plot ROI and CE for all expansion scenarios
+
+    # name_of_plot = f'DALYs averted per extra USD dollar invested, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(ROI)
+    # ax.set_title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # name_of_plot = f'Cost per DALY averted, {target_period()}'
+    # fig, ax = do_bar_plot_with_ci(CE)
+    # ax.set_title(name_of_plot)
+    # ax.set_ylabel('USD dollars')
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
+
+    # todo
+    # To vary the HRH budget growth rate (default: 4.2%) and do sensitivity analysis \
+    # (around the best possible extra budget allocation scenario)?
+    # As it is analysis of 10 year results, it would be better to consider increasing annual/minute salary? The \
+    # inflation rate of GDP and health workforce budget and the increase rate of salary could be assumed to be \
+    # the same, thus no need to consider the increase rate of salary if GDP inflation is not considered.
+    # To plot time series of staff and budget in the target period to show \
+    # how many staff and how much budget to increase yearly (choose the best scenario to illustrate)?
+    # Before submit a run, merge in the remote master.
+    # Think about a measure of Universal Health Service Coverage for the scenarios?
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("results_folder", type=Path)  # outputs/bshe@ic.ac.uk/scenario_run_for_hcw_expansion_analysis-2024-08-16T160132Z
+    args = parser.parse_args()
+
+    # Produce results for short-term analysis: 5 years
+
+    # # 2015-2019, before change, incl. mode, hr expansion, etc.
+    # apply(
+    #     results_folder=args.results_folder,
+    #     output_folder=args.results_folder,
+    #     resourcefilepath=Path('./resources'),
+    #     the_target_period=(Date(2015, 1, 1), Date(2019, 12, 31))
+    # )
+    #
+    # # 2020-2024
+    # apply(
+    #     results_folder=args.results_folder,
+    #     output_folder=args.results_folder,
+    #     resourcefilepath=Path('./resources'),
+    #     the_target_period=(Date(2020, 1, 1), Date(2024, 12, 31))
+    # )
+
+    # Produce results for long-term analysis: 10 years
+    # 2020-2029
+    apply(
+        results_folder=args.results_folder,
+        output_folder=args.results_folder,
+        resourcefilepath=Path('./resources'),
+        the_target_period=(Date(2019, 1, 1), Date(2029, 12, 31))
+    )

From 3b270d53513d9663e1019bec5b6e9052eb9d602b Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 5 Nov 2024 08:48:40 +0000
Subject: [PATCH 173/218] log the capabilities

---
 src/tlo/methods/healthsystem.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 3ce379d40b..c4ea2dd6a2 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -3222,13 +3222,17 @@ def apply(self, population):
             self.module._daily_capabilities[officer] *= daily_cost.loc[officer_type, 'scale_up_factor']
 
         # save the scale up factor, updated cost and updated capabilities into logger
+        # note that cost and capabilities are on the actual scale,
+        # not normalised by the self.capabilities_coefficient parameter
         total_cost_this_year = 365.25 * (daily_cost.Total_Cost_Per_Day + daily_cost.extra_budget_per_day)
+        total_capabilities_this_year = (365.25 * self.module._daily_capabilities)
         logger_summary.info(key='HRScaling',
                             description='The HR scale up factor by office type given fractions of an extra budget',
                             data={
                                 'scale_up_factor': daily_cost.scale_up_factor.to_dict(),
                                 'year_of_scale_up': self.sim.date.year,
-                                'total_hr_salary': total_cost_this_year.to_dict()
+                                'total_hr_salary': total_cost_this_year.to_dict(),
+                                'total_hr_capabilities': total_capabilities_this_year.to_dict()
                             }
                             )
 

From d4f18bdc89b721619590b07e4a0813aba0ed17e9 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 6 Nov 2024 15:13:01 +0000
Subject: [PATCH 174/218] comment and uncomment some lines

---
 ...dsion_by_officer_type_with_extra_budget.py | 112 +++++++++---------
 1 file changed, 56 insertions(+), 56 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 2b0e058045..11c9d6050b 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -742,14 +742,14 @@ def format_time_by_cadre_treatment(_df):
 
     hcw_time_increased_by_treatment_type = get_hcw_time_by_treatment().reindex(num_dalys_summarized.index).drop(['s_0'])
 
-    # num_services_increased_percent = summarize(
-    #     pd.DataFrame(
-    #         find_difference_relative_to_comparison_series(
-    #             num_services.loc[0],
-    #             comparison='s_1',
-    #             scaled=True)
-    #     ).T
-    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_1'])
+    num_services_increased_percent = summarize(
+        pd.DataFrame(
+            find_difference_relative_to_comparison_series(
+                num_services.loc[0],
+                comparison='s_0',
+                scaled=True)
+        ).T
+    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
 
     num_deaths_averted = summarize(
         -1.0 *
@@ -1380,25 +1380,25 @@ def find_never_ran_appts_that_need_specific_cadres():
     # fig.show()
     # plt.close(fig)
 
-    # # do some linear regression to see the marginal effects of individual cadres and combined effects of C, N, P cadres
+    # do some linear regression to see the marginal effects of individual cadres and combined effects of C, N, P cadres
     # outcome_data = num_dalys_averted_percent['mean']
-    # # outcome = num_services_increased_percent['mean']
-    # # outcome = num_treatments_total_increased_percent['mean']
-    # regression_data = pd.merge(outcome_data,
-    #                            extra_budget_allocation,
-    #                            left_index=True, right_index=True, how='inner')
-    # regression_data['C*P'] = regression_data['Clinical'] * regression_data['Pharmacy']
-    # regression_data['C*N'] = regression_data['Clinical'] * regression_data['Nursing_and_Midwifery']
-    # regression_data['N*P'] = regression_data['Pharmacy'] * regression_data['Nursing_and_Midwifery']
-    # regression_data['C*N*P'] = (regression_data['Clinical'] * regression_data['Pharmacy']
-    #                             * regression_data['Nursing_and_Midwifery'])
-    # cadres_to_drop_due_to_multicollinearity = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography', 'Other']
-    # regression_data.drop(columns=cadres_to_drop_due_to_multicollinearity, inplace=True)
-    # predictor = regression_data[regression_data.columns[1:]]
-    # outcome = regression_data['mean']
-    # predictor = sm.add_constant(predictor)
-    # est = sm.OLS(outcome.astype(float), predictor.astype(float)).fit()
-    # print(est.summary())
+    outcome_data = num_services_increased_percent['mean']
+    # outcome_data = num_treatments_total_increased_percent['mean']
+    regression_data = pd.merge(outcome_data,
+                               extra_budget_allocation,
+                               left_index=True, right_index=True, how='inner')
+    regression_data['C*P'] = regression_data['Clinical'] * regression_data['Pharmacy']
+    regression_data['C*N'] = regression_data['Clinical'] * regression_data['Nursing_and_Midwifery']
+    regression_data['N*P'] = regression_data['Pharmacy'] * regression_data['Nursing_and_Midwifery']
+    regression_data['C*N*P'] = (regression_data['Clinical'] * regression_data['Pharmacy']
+                                * regression_data['Nursing_and_Midwifery'])
+    cadres_to_drop_due_to_multicollinearity = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography', 'Other']
+    regression_data.drop(columns=cadres_to_drop_due_to_multicollinearity, inplace=True)
+    predictor = regression_data[regression_data.columns[1:]]
+    outcome = regression_data['mean']
+    predictor = sm.add_constant(predictor)
+    est = sm.OLS(outcome.astype(float), predictor.astype(float)).fit()
+    print(est.summary())
 
     # todo: could do regression analysis of DALYs averted and Services increased
 
@@ -1858,37 +1858,37 @@ def find_never_ran_appts_that_need_specific_cadres():
     # fig.show()
     # plt.close(fig)
 
-    name_of_plot = f'Average fractions of HCW time used (CNP, level 1a), {target_period()}'
-    data_to_plot = hcw_time_usage_summarized.xs('1a', axis=1, level=1, drop_level=True) * 100
-    fig, ax = plt.subplots(figsize=(12, 8))
-    data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
-    #ax.set_ylim(0, 100)
-    ax.set_ylabel('Percentage %')
-    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
-    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90)
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'Average fractions of HCW time used (CNP, level 1a), {target_period()}'
+    # data_to_plot = hcw_time_usage_summarized.xs('1a', axis=1, level=1, drop_level=True) * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    # #ax.set_ylim(0, 100)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
-    name_of_plot = f'Average fractions of HCW time used (CNP, level 2), {target_period()}'
-    data_to_plot = hcw_time_usage_summarized.xs('2', axis=1, level=1, drop_level=True) * 100
-    fig, ax = plt.subplots(figsize=(12, 8))
-    data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
-    # ax.set_ylim(0, 100)
-    ax.set_ylabel('Percentage %')
-    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
-    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90)
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
-    plt.title(name_of_plot)
-    fig.tight_layout()
-    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
-    fig.show()
-    plt.close(fig)
+    # name_of_plot = f'Average fractions of HCW time used (CNP, level 2), {target_period()}'
+    # data_to_plot = hcw_time_usage_summarized.xs('2', axis=1, level=1, drop_level=True) * 100
+    # fig, ax = plt.subplots(figsize=(12, 8))
+    # data_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    # # ax.set_ylim(0, 100)
+    # ax.set_ylabel('Percentage %')
+    # ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+    # xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    # ax.set_xticklabels(xtick_labels, rotation=90)
+    # plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    # plt.title(name_of_plot)
+    # fig.tight_layout()
+    # fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    # fig.show()
+    # plt.close(fig)
 
     name_of_plot = f'Extra budget allocation among cadres, {target_period()}'
     cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other']

From 3da4a00133781f9e6fa8c8546d8036ed014ae3d3 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 7 Nov 2024 10:24:56 +0000
Subject: [PATCH 175/218] no need to log the year of scale up in HRH expansion
 HSI

---
 src/tlo/methods/healthsystem.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index c4ea2dd6a2..613282c7c0 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -3230,7 +3230,6 @@ def apply(self, population):
                             description='The HR scale up factor by office type given fractions of an extra budget',
                             data={
                                 'scale_up_factor': daily_cost.scale_up_factor.to_dict(),
-                                'year_of_scale_up': self.sim.date.year,
                                 'total_hr_salary': total_cost_this_year.to_dict(),
                                 'total_hr_capabilities': total_capabilities_this_year.to_dict()
                             }

From 26a9cd2dd5db3b3aa9c65b2ccf62381ad4492153 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 7 Nov 2024 11:21:15 +0000
Subject: [PATCH 176/218] turn on historical scaling for years 2020-2024

---
 ...t_hcw_by_officer_type_with_extra_budget.py | 26 ++++++++++---------
 src/tlo/methods/healthsystem.py               |  4 +--
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 8cb693d4e2..90d9338943 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -31,11 +31,11 @@ def __init__(self):
         super().__init__()
         self.seed = 0
         self.start_date = Date(2010, 1, 1)
-        self.end_date = Date(2030, 1, 1)
-        self.pop_size = 100_000  # todo: TBC
+        self.end_date = Date(2035, 1, 1)
+        self.pop_size = 100_000
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 5  # todo: TBC
+        self.runs_per_draw = 5
 
     def log_configuration(self):
         return {
@@ -52,7 +52,7 @@ def log_configuration(self):
 
     def modules(self):
         return (fullmodel(resourcefilepath=self.resources) +
-                [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)])  # todo: TBC
+                [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)])
 
     def draw_parameters(self, draw_number, rng):
         if draw_number < len(self._scenarios):
@@ -61,9 +61,10 @@ def draw_parameters(self, draw_number, rng):
     def _get_scenarios(self) -> Dict[str, Dict]:
         """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario."""
 
-        self.YEAR_OF_CHANGE = 2019  # This is the year to change run settings and to start hr expansion.
+        self.YEAR_OF_MODE_CHANGE = 2020  # HCW capabilities data are for year of 2019, before the Covid-19 pandemic
 
-        self.scenarios = extra_budget_fracs['s_2'].to_frame()  # test the "best" scenario
+        self.scenarios = extra_budget_fracs['s_0'].to_frame()
+        # run no extra budget allocation scenarios first to get the never ran services and 'gap' allocation strategies
 
         return {
             self.scenarios.columns[i]:
@@ -82,13 +83,14 @@ def _baseline(self) -> Dict:
             {'HealthSystem': {
                 'mode_appt_constraints': 1,
                 'mode_appt_constraints_postSwitch': 2,
-                "scale_to_effective_capabilities": True,  # todo: what if set it False?
-                "year_mode_switch": self.YEAR_OF_CHANGE,
+                "scale_to_effective_capabilities": True,
+                # This happens in the year before mode change, as the model calibration is done by that year
+                "year_mode_switch": self.YEAR_OF_MODE_CHANGE,
                 'cons_availability': 'default',
-                'cons_availability_postSwitch': 'all',  # todo: how to argue for this setting?
-                'year_cons_availability_switch': self.YEAR_OF_CHANGE,
-                'yearly_HR_scaling_mode': 'no_scaling',
-                'start_year_HR_expansion_by_officer_type': self.YEAR_OF_CHANGE,
+                'cons_availability_postSwitch': 'all',
+                'year_cons_availability_switch': self.YEAR_OF_MODE_CHANGE,  # todo: or the HRH expansion start year?
+                'yearly_HR_scaling_mode': 'historical_scaling',  # for 5 years of 2020-2024; source data year 2019
+                'start_year_HR_expansion_by_officer_type': 2025,  # start expansion from 2025
                 'end_year_HR_expansion_by_officer_type': self.end_date.year,
                 "policy_name": "Naive",
                 "tclose_overwrite": 1,
diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 613282c7c0..2ed2a702c2 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -667,8 +667,8 @@ def read_parameters(self, data_folder):
             'Dental': 0, 'Laboratory': 0, 'Mental': 0, 'Nutrition': 0, 'Radiography': 0
         }
         self.parameters['HR_budget_growth_rate'] = 0.042
-        self.parameters['start_year_HR_expansion_by_officer_type'] = 2019
-        self.parameters['end_year_HR_expansion_by_officer_type'] = 2030
+        self.parameters['start_year_HR_expansion_by_officer_type'] = 2025
+        self.parameters['end_year_HR_expansion_by_officer_type'] = 2035
 
     def pre_initialise_population(self):
         """Generate the accessory classes used by the HealthSystem and pass to them the data that has been read."""

From 117a91688d7614dbe58fe1485469586e29e7d90a Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 7 Nov 2024 11:27:59 +0000
Subject: [PATCH 177/218] recover the scaling factors in historical scaling for
 years 2020-2024

---
 .../scaling_capabilities/ResourceFile_dynamic_HR_scaling.xlsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_dynamic_HR_scaling.xlsx b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_dynamic_HR_scaling.xlsx
index 5af44f5b35..36b9fd0dc2 100644
--- a/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_dynamic_HR_scaling.xlsx
+++ b/resources/healthsystem/human_resources/scaling_capabilities/ResourceFile_dynamic_HR_scaling.xlsx
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7462bfb5740df3f5ffbabd1cdf10e81342f6da146170cc9648de0fbedffb454
-size 25434
+oid sha256:b8388ef18f073c9470c01f8408bff572017484763cfc4c87bb0212c38ee0b6d7
+size 25488

From ad030561f0ec2004191555b5ea366c99da71c93e Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 7 Nov 2024 15:33:20 +0000
Subject: [PATCH 178/218] fix failing checks

---
 .../analysis_CNP_permutation.py               | 96 +++++++++----------
 ...dsion_by_officer_type_with_extra_budget.py | 12 +--
 2 files changed, 53 insertions(+), 55 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py
index 0ac2ebdc8d..ed8187e7aa 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py
@@ -12,8 +12,6 @@
 
 import numpy as np
 import pandas as pd
-import statsmodels.api as sm
-import statsmodels.stats as ss
 from matplotlib import pyplot as plt
 
 from scripts.healthsystem.impact_of_hcw_capabilities_expansion.prepare_minute_salary_and_extra_budget_frac_data import (
@@ -440,16 +438,16 @@ def format_time_by_cadre_treatment(_df):
 
             return _df, _series
 
-        time_by_cadre_treatment_all_scenarios = {
-            f's_{key}': format_time_by_cadre_treatment(
-                pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[key], orient='index')
-            )[0] for key in range(len(param_names))
-        }
-
-        time_increased_by_cadre_treatment = {
-            key: time_by_cadre_treatment_all_scenarios[key] - time_by_cadre_treatment_all_scenarios['s_2']
-            for key in time_by_cadre_treatment_all_scenarios.keys()
-        }
+        # time_by_cadre_treatment_all_scenarios = {
+        #     f's_{key}': format_time_by_cadre_treatment(
+        #         pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[key], orient='index')
+        #     )[0] for key in range(len(param_names))
+        # }
+        #
+        # time_increased_by_cadre_treatment = {
+        #     key: time_by_cadre_treatment_all_scenarios[key] - time_by_cadre_treatment_all_scenarios['s_2']
+        #     for key in time_by_cadre_treatment_all_scenarios.keys()
+        # }
 
         time_by_treatment_all_scenarios = {
             f's_{key}': format_time_by_cadre_treatment(
@@ -480,7 +478,7 @@ def format_time_by_cadre_treatment(_df):
     appt_time, appt_cost = format_appt_time_and_cost()
 
     # Get current (year of 2018/2019) hr counts
-    curr_hr = get_current_hr(cadres)[0]
+    # curr_hr = get_current_hr(cadres)[0]
     curr_hr_cap = get_current_hr(cadres)[1]
 
     # Get scale up factors for all scenarios
@@ -716,9 +714,9 @@ def format_time_by_cadre_treatment(_df):
         num_dalys_summarized.index).fillna(0.0)
     num_never_ran_appts_by_level_summarized = summarize(num_never_ran_appts_by_level, only_mean=True).T.reindex(
         param_names).reindex(num_dalys_summarized.index).fillna(0.0)
-    num_appts_demand_summarized = summarize(num_appts_demand, only_mean=True).T.reindex(param_names).reindex(
-        num_dalys_summarized.index
-    )
+    # num_appts_demand_summarized = summarize(num_appts_demand, only_mean=True).T.reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
     num_treatments_summarized = summarize(num_treatments, only_mean=True).T.reindex(param_names).reindex(
         num_dalys_summarized.index
     )
@@ -726,21 +724,21 @@ def format_time_by_cadre_treatment(_df):
     #     num_dalys_summarized.index
     # )
 
-    num_never_ran_services_summarized = summarize(num_never_ran_services).loc[0].unstack().reindex(param_names).reindex(
-        num_dalys_summarized.index
-    )
-    num_never_ran_appts_summarized = summarize(num_never_ran_appts, only_mean=True).T.reindex(param_names).reindex(
-        num_dalys_summarized.index
-    )
+    # num_never_ran_services_summarized = summarize(num_never_ran_services).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
+    # num_never_ran_appts_summarized = summarize(num_never_ran_appts, only_mean=True).T.reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
     # num_never_ran_treatments_summarized = summarize(num_never_ran_treatments, only_mean=True).T.reindex(param_names).reindex(
     #     num_dalys_summarized.index
     # )
     # num_never_ran_treatments_total_summarized = summarize(num_never_ran_treatments_total).loc[0].unstack().reindex(param_names).reindex(
     #     num_dalys_summarized.index
     # )
-    num_services_demand_summarized = summarize(num_services_demand).loc[0].unstack().reindex(param_names).reindex(
-        num_dalys_summarized.index
-    )
+    # num_services_demand_summarized = summarize(num_services_demand).loc[0].unstack().reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
     # ratio_service_summarized = summarize(ratio_services).loc[0].unstack().reindex(param_names).reindex(
     #     num_dalys_summarized.index
     # )
@@ -818,14 +816,14 @@ def format_time_by_cadre_treatment(_df):
         only_mean=True
     ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
 
-    num_dalys_by_cause_averted_percent = summarize(
-        -1.0 * find_difference_relative_to_comparison_dataframe(
-            num_dalys_by_cause,
-            comparison='s_0',
-            scaled=True
-        ),
-        only_mean=True
-    ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
+    # num_dalys_by_cause_averted_percent = summarize(
+    #     -1.0 * find_difference_relative_to_comparison_dataframe(
+    #         num_dalys_by_cause,
+    #         comparison='s_0',
+    #         scaled=True
+    #     ),
+    #     only_mean=True
+    # ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
 
     # num_dalys_by_cause_averted_CNP = num_dalys_by_cause_averted.loc['s_2', :].sort_values(ascending=False)
     # # num_dalys_by_cause_averted_CP = num_dalys_by_cause_averted.loc['s_11', :].sort_values(ascending=False)
@@ -1043,9 +1041,9 @@ def hcw_time_or_cost_gap(time_cost_df=appt_time, count_df=num_never_ran_appts_by
     assert (hcw_time_used.index == hcw_time_gap.index).all()
     assert (hcw_time_used.columns == hcw_time_gap.columns).all()
     hcw_time_demand = hcw_time_used + hcw_time_gap
-    hcw_time_demand_increased = pd.DataFrame(
-        hcw_time_demand.subtract(hcw_time_demand.loc['s_0', :], axis=1).drop('s_0', axis=0)
-    )
+    # hcw_time_demand_increased = pd.DataFrame(
+    #     hcw_time_demand.subtract(hcw_time_demand.loc['s_0', :], axis=1).drop('s_0', axis=0)
+    # )
 
     # cost gap proportions of cadres within each scenario
     hcw_cost_gap_percent = pd.DataFrame(index=hcw_cost_gap.index, columns=hcw_cost_gap.columns)
@@ -1184,16 +1182,16 @@ def find_never_ran_appts_that_need_specific_cadres():
     p_count['Other cases'] = 1 - p_count[p_count.columns[0:7]].sum(axis=1)
 
     # define color for the cadres combinations above
-    cadre_comb_color = {
-        'C and P and N&M': 'royalblue',
-        'C and P': 'turquoise',
-        'C and N&M': 'gold',
-        'N&M and P': 'yellowgreen',
-        'Clinical (C)': 'mediumpurple',
-        'Pharmacy (P)': 'limegreen',
-        'Nursing_and_Midwifery (N&M)': 'pink',
-        'Other cases': 'gray',
-    }
+    # cadre_comb_color = {
+    #     'C and P and N&M': 'royalblue',
+    #     'C and P': 'turquoise',
+    #     'C and N&M': 'gold',
+    #     'N&M and P': 'yellowgreen',
+    #     'Clinical (C)': 'mediumpurple',
+    #     'Pharmacy (P)': 'limegreen',
+    #     'Nursing_and_Midwifery (N&M)': 'pink',
+    #     'Other cases': 'gray',
+    # }
 
     # Checked that Number_By_Appt_Type_Code and Number_By_Appt_Type_Code_And_Level have not exactly same results
 
@@ -1215,9 +1213,9 @@ def find_never_ran_appts_that_need_specific_cadres():
     # #     CE.loc[i, 'upper'] = extra_cost_all_yrs.loc[i, 'all_cadres'] / num_dalys_averted.loc[i, 'lower']
 
     # prepare colors for plots
-    appt_color = {
-        appt: COARSE_APPT_TYPE_TO_COLOR_MAP.get(appt, np.nan) for appt in num_appts_summarized.columns
-    }
+    # appt_color = {
+    #     appt: COARSE_APPT_TYPE_TO_COLOR_MAP.get(appt, np.nan) for appt in num_appts_summarized.columns
+    # }
     treatment_color = {
         treatment: SHORT_TREATMENT_ID_TO_COLOR_MAP.get(treatment, np.nan)
         for treatment in num_treatments_summarized.columns
diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 11c9d6050b..6e61530fab 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -13,7 +13,7 @@
 import numpy as np
 import pandas as pd
 import statsmodels.api as sm
-import statsmodels.stats as ss
+# import statsmodels.stats as ss
 from matplotlib import pyplot as plt
 
 from scripts.healthsystem.impact_of_hcw_capabilities_expansion.prepare_minute_salary_and_extra_budget_frac_data import (
@@ -436,11 +436,11 @@ def format_time_by_cadre_treatment(_df):
 
             return _df, _series
 
-        time_by_cadre_treatment_all_scenarios = {
-            f's_{key}': format_time_by_cadre_treatment(
-                pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[key], orient='index')
-            )[0] for key in range(len(param_names))
-        }
+        # time_by_cadre_treatment_all_scenarios = {
+        #     f's_{key}': format_time_by_cadre_treatment(
+        #         pd.DataFrame.from_dict(time_by_cadre_treatment_per_draw[key], orient='index')
+        #     )[0] for key in range(len(param_names))
+        # }
 
         time_by_treatment_all_scenarios = {
             f's_{key}': format_time_by_cadre_treatment(

From ff698d1af27a087379e25a9d648b0dd9cc686bac Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 7 Nov 2024 16:01:05 +0000
Subject: [PATCH 179/218] fix failing checks

---
 .../analysis_CNP_permutation.py                       | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py
index ed8187e7aa..fddfd2eddd 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_CNP_permutation.py
@@ -25,7 +25,6 @@
 from tlo.analysis.utils import (
     APPT_TYPE_TO_COARSE_APPT_TYPE_MAP,
     CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP,
-    COARSE_APPT_TYPE_TO_COLOR_MAP,
     SHORT_TREATMENT_ID_TO_COLOR_MAP,
     bin_hsi_event_details,
     compute_mean_across_runs,
@@ -671,14 +670,14 @@ def format_time_by_cadre_treatment(_df):
     # get total service demand
     assert len(num_services) == len(num_never_ran_services) == 1
     assert (num_services.columns == num_never_ran_services.columns).all()
-    num_services_demand = num_services + num_never_ran_services
+    # num_services_demand = num_services + num_never_ran_services
     # ratio_services = num_services / num_services_demand
 
     assert (num_appts.columns == num_never_ran_appts.columns).all()
     num_never_ran_appts.loc['Lab / Diagnostics', :] = 0
     num_never_ran_appts = num_never_ran_appts.reindex(num_appts.index).fillna(0.0)
     assert (num_appts.index == num_never_ran_appts.index).all()
-    num_appts_demand = num_appts + num_never_ran_appts
+    # num_appts_demand = num_appts + num_never_ran_appts
 
     hcw_time_usage = extract_results(
         results_folder,
@@ -707,9 +706,9 @@ def format_time_by_cadre_treatment(_df):
     num_services_summarized = summarize(num_services).loc[0].unstack().reindex(param_names).reindex(
         num_dalys_summarized.index
     )
-    num_appts_summarized = summarize(num_appts, only_mean=True).T.reindex(param_names).reindex(
-        num_dalys_summarized.index
-    )
+    # num_appts_summarized = summarize(num_appts, only_mean=True).T.reindex(param_names).reindex(
+    #     num_dalys_summarized.index
+    # )
     num_appts_by_level_summarized = summarize(num_appts_by_level, only_mean=True).T.reindex(param_names).reindex(
         num_dalys_summarized.index).fillna(0.0)
     num_never_ran_appts_by_level_summarized = summarize(num_never_ran_appts_by_level, only_mean=True).T.reindex(

From 321a40b93c6610bd154ab604f7ace599d62f62d4 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 7 Nov 2024 22:31:31 +0000
Subject: [PATCH 180/218] update scenario run settings

---
 ...t_hcw_by_officer_type_with_extra_budget.py | 119 ++++++++++++++----
 1 file changed, 97 insertions(+), 22 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 90d9338943..a7c2a20e29 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -61,41 +61,116 @@ def draw_parameters(self, draw_number, rng):
     def _get_scenarios(self) -> Dict[str, Dict]:
         """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario."""
 
-        self.YEAR_OF_MODE_CHANGE = 2020  # HCW capabilities data are for year of 2019, before the Covid-19 pandemic
+        self.YEAR_OF_MODE_CHANGE = 2020
+        # HCW capabilities from data source are for year 2019,
+        # and we want to rescale to effective capabilities in the end of 2019 considering model calibration
+        self.YEAR_OF_HRH_EXPANSION = 2025
+        # The start year to expand HRH by cadre given the extra budget, which is after the historical HRH scaling
 
         self.scenarios = extra_budget_fracs['s_0'].to_frame()
-        # run no extra budget allocation scenarios first to get the never ran services and 'gap' allocation strategies
+        # Run no extra budget allocation scenarios first to get never ran services and 'gap' allocation strategies
+
+        self.cons_availability = ['all', 'default']
+        self.policy = ['Naive', 'EHP_III']  # TBC, not clear the differences or the implementation/change year
+        self.hr_budget = [0.042, 0.058, 0.026]
+        self.hs_function = [[False, False], [False, True]]
+
+        self.baselines = {
+            'baseline': self._baseline_of_baseline(),
+            'default_cons': self._baseline_default_cons(),
+            'more_budget': self._baseline_more_budget(),
+            'less_budget': self._baseline_less_budget(),
+            'efficient_policy': self._baseline_efficient_policy(),
+            'max_hs_function': self._baseline_max_hs_function(),
+        }
 
         return {
-            self.scenarios.columns[i]:
+            b + ' ' + self.scenarios.columns[i]:
                 mix_scenarios(
-                    self._baseline(),
+                    self.baselines[b],
                     {'HealthSystem': {
                         'HR_expansion_by_officer_type': self.scenarios.iloc[:, i].to_dict()
                     }
                     }
-                ) for i in range(len(self.scenarios.columns))  # run 33 scenarios
+                ) for b in self.baselines.keys() for i in range(len(self.scenarios.columns))
         }
 
-    def _baseline(self) -> Dict:
+    def _baseline_of_baseline(self) -> Dict:
         return mix_scenarios(
             get_parameters_for_status_quo(),
-            {'HealthSystem': {
-                'mode_appt_constraints': 1,
-                'mode_appt_constraints_postSwitch': 2,
-                "scale_to_effective_capabilities": True,
-                # This happens in the year before mode change, as the model calibration is done by that year
-                "year_mode_switch": self.YEAR_OF_MODE_CHANGE,
-                'cons_availability': 'default',
-                'cons_availability_postSwitch': 'all',
-                'year_cons_availability_switch': self.YEAR_OF_MODE_CHANGE,  # todo: or the HRH expansion start year?
-                'yearly_HR_scaling_mode': 'historical_scaling',  # for 5 years of 2020-2024; source data year 2019
-                'start_year_HR_expansion_by_officer_type': 2025,  # start expansion from 2025
-                'end_year_HR_expansion_by_officer_type': self.end_date.year,
-                "policy_name": "Naive",
-                "tclose_overwrite": 1,
-                "tclose_days_offset_overwrite": 7,
-            }
+            {
+                'HealthSystem': {
+                    'mode_appt_constraints': 1,
+                    'mode_appt_constraints_postSwitch': 2,
+                    "scale_to_effective_capabilities": True,
+                    # This happens in the year before mode change, as the model calibration is done by that year
+                    "year_mode_switch": self.YEAR_OF_MODE_CHANGE,
+                    'cons_availability': 'default',
+                    'cons_availability_postSwitch': self.cons_availability[0],
+                    'year_cons_availability_switch': self.YEAR_OF_HRH_EXPANSION,  # TBC: or YEAR_OF_MODE_CHANGE?
+                    'HR_budget_growth_rate': self.hr_budget[0],
+                    'yearly_HR_scaling_mode': 'historical_scaling',  # for 5 years of 2020-2024; source data year 2019
+                    'start_year_HR_expansion_by_officer_type': self.YEAR_OF_HRH_EXPANSION,
+                    'end_year_HR_expansion_by_officer_type': self.end_date.year,
+                    "policy_name": self.policy[0],
+                    "tclose_overwrite": 1,
+                    "tclose_days_offset_overwrite": 7,
+                },
+                'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': {
+                    'max_healthcare_seeking': [False, False],
+                    'max_healthsystem_function': self.hs_function[0],
+                    'year_of_switch': self.YEAR_OF_HRH_EXPANSION,  # TBC: or YEAR_OF_MODE_CHANGE?
+                }
+            },
+        )
+
+    def _baseline_default_cons(self) -> Dict:
+        return mix_scenarios(
+            self._baseline_of_baseline(),
+            {
+                'HealthSystem': {
+                    'cons_availability_postSwitch': self.cons_availability[1],
+                },
+            },
+        )
+
+    def _baseline_more_budget(self) -> Dict:
+        return mix_scenarios(
+            self._baseline_of_baseline(),
+            {
+                'HealthSystem': {
+                    'HR_budget_growth_rate': self.hr_budget[1],
+                },
+            },
+        )
+
+    def _baseline_less_budget(self) -> Dict:
+        return mix_scenarios(
+            self._baseline_of_baseline(),
+            {
+                'HealthSystem': {
+                    'HR_budget_growth_rate': self.hr_budget[2],
+                },
+            },
+        )
+
+    def _baseline_efficient_policy(self) -> Dict:
+        return mix_scenarios(
+            self._baseline_of_baseline(),
+            {
+                'HealthSystem': {
+                    "policy_name": self.policy[1],
+                },
+            },
+        )
+
+    def _baseline_max_hs_function(self) -> Dict:
+        return mix_scenarios(
+            self._baseline_of_baseline(),
+            {
+                'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': {
+                    'max_healthsystem_function': self.hs_function[1],
+                }
             },
         )
 

From ef3aae05f97cedc3bff5620adaef97911abb6854 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 8 Nov 2024 16:57:39 +0000
Subject: [PATCH 181/218] update scenario run settings and TBC

---
 ...t_hcw_by_officer_type_with_extra_budget.py | 25 ++++++-------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index a7c2a20e29..a3152a2ba6 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -35,7 +35,7 @@ def __init__(self):
         self.pop_size = 100_000
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 5
+        self.runs_per_draw = 10  # TBC
 
     def log_configuration(self):
         return {
@@ -70,17 +70,16 @@ def _get_scenarios(self) -> Dict[str, Dict]:
         self.scenarios = extra_budget_fracs['s_0'].to_frame()
         # Run no extra budget allocation scenarios first to get never ran services and 'gap' allocation strategies
 
+        # Baseline settings for change
         self.cons_availability = ['all', 'default']
-        self.policy = ['Naive', 'EHP_III']  # TBC, not clear the differences or the implementation/change year
         self.hr_budget = [0.042, 0.058, 0.026]
         self.hs_function = [[False, False], [False, True]]
 
         self.baselines = {
             'baseline': self._baseline_of_baseline(),
             'default_cons': self._baseline_default_cons(),
-            'more_budget': self._baseline_more_budget(),
-            'less_budget': self._baseline_less_budget(),
-            'efficient_policy': self._baseline_efficient_policy(),
+            # 'more_budget': self._baseline_more_budget(),  # turn off when run baseline scenarios with no expansion
+            # 'less_budget': self._baseline_less_budget(),
             'max_hs_function': self._baseline_max_hs_function(),
         }
 
@@ -107,19 +106,19 @@ def _baseline_of_baseline(self) -> Dict:
                     "year_mode_switch": self.YEAR_OF_MODE_CHANGE,
                     'cons_availability': 'default',
                     'cons_availability_postSwitch': self.cons_availability[0],
-                    'year_cons_availability_switch': self.YEAR_OF_HRH_EXPANSION,  # TBC: or YEAR_OF_MODE_CHANGE?
+                    'year_cons_availability_switch': self.YEAR_OF_HRH_EXPANSION,
                     'HR_budget_growth_rate': self.hr_budget[0],
                     'yearly_HR_scaling_mode': 'historical_scaling',  # for 5 years of 2020-2024; source data year 2019
                     'start_year_HR_expansion_by_officer_type': self.YEAR_OF_HRH_EXPANSION,
                     'end_year_HR_expansion_by_officer_type': self.end_date.year,
-                    "policy_name": self.policy[0],
+                    "policy_name": 'Naive',
                     "tclose_overwrite": 1,
                     "tclose_days_offset_overwrite": 7,
                 },
                 'ImprovedHealthSystemAndCareSeekingScenarioSwitcher': {
                     'max_healthcare_seeking': [False, False],
                     'max_healthsystem_function': self.hs_function[0],
-                    'year_of_switch': self.YEAR_OF_HRH_EXPANSION,  # TBC: or YEAR_OF_MODE_CHANGE?
+                    'year_of_switch': self.YEAR_OF_HRH_EXPANSION,
                 }
             },
         )
@@ -154,16 +153,6 @@ def _baseline_less_budget(self) -> Dict:
             },
         )
 
-    def _baseline_efficient_policy(self) -> Dict:
-        return mix_scenarios(
-            self._baseline_of_baseline(),
-            {
-                'HealthSystem': {
-                    "policy_name": self.policy[1],
-                },
-            },
-        )
-
     def _baseline_max_hs_function(self) -> Dict:
         return mix_scenarios(
             self._baseline_of_baseline(),

From 2b6e96e9d61ee0dc148fd5871cd296ee1b018a78 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 8 Nov 2024 16:58:13 +0000
Subject: [PATCH 182/218] update param_names

---
 ...analysis_hr_expandsion_by_officer_type_with_extra_budget.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 6e61530fab..bc0003c83a 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -456,7 +456,8 @@ def format_time_by_cadre_treatment(_df):
         return time_increased_by_treatment
 
     # Get parameter/scenario names
-    param_names = get_parameter_names_from_scenario_file()
+    param_names = tuple(extra_budget_fracs.columns)
+    # param_names = get_parameter_names_from_scenario_file()
     # param_names = ('s_0', 's_1', 's_2', 's_3', 's_11', 's_22')
     # param_names = ('s_1', 's_2', 's_3', 's_11', 's_22')
 

From 780b9a8ac517f4d6f8179ce0d01dd4457ff32b69 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 11 Nov 2024 10:02:01 +0000
Subject: [PATCH 183/218] update comment

---
 ...f_expanding_current_hcw_by_officer_type_with_extra_budget.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index a3152a2ba6..c3713d8bd6 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -35,7 +35,7 @@ def __init__(self):
         self.pop_size = 100_000
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 10  # TBC
+        self.runs_per_draw = 10  # TBC: considering convergence and cost
 
     def log_configuration(self):
         return {

From 0726246734c07dc3baaf6feb7a69017872dc33e4 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 12 Nov 2024 10:20:50 +0000
Subject: [PATCH 184/218] format labels

---
 ...dsion_by_officer_type_with_extra_budget.py | 83 +++++++++----------
 1 file changed, 41 insertions(+), 42 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index bc0003c83a..feb2514dd5 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -43,12 +43,12 @@
     's_3': 'all_cadres_equal_allocation',
     's_4': 'Clinical (C)', 's_5': 'DCSA (D)', 's_6': 'Nursing_and_Midwifery (N&M)', 's_7': 'Pharmacy (P)',
     's_8': 'Other (O)',
-    's_9': 'C + D', 's_10': 'C + N&M', 's_11': 'C + P', 's_12': 'C + O', 's_13': 'D + N&M',
-    's_14': 'D + P', 's_15': 'D + O', 's_16': 'N&M + P', 's_17': 'N&M + O', 's_18': 'P + O',
-    's_19': 'C + D + N&M', 's_20': 'C + D + P', 's_21': 'C + D + O', 's_22': 'C + N&M + P', 's_23': 'C + N&M + O',
-    's_24': 'C + P + O', 's_25': 'D + N&M + P', 's_26': 'D + N&M + O', 's_27': 'D + P + O', 's_28': 'N&M + P + O',
-    's_29': 'C + D + N&M + P', 's_30': 'C + D + N&M + O', 's_31': 'C + D + P + O', 's_32': 'C + N&M + P + O',
-    's_33': 'D + N&M + P + O',
+    's_9': 'C = D', 's_10': 'C = N&M', 's_11': 'C = P', 's_12': 'C = O', 's_13': 'N&M = D',
+    's_14': 'P = D', 's_15': 'D = O', 's_16': 'P = N&M', 's_17': 'N&M = O', 's_18': 'P = O',
+    's_19': 'C = N&M = D', 's_20': 'C = P = D', 's_21': 'C = D = O', 's_22': 'C = P = N&M', 's_23': 'C = N&M = O',
+    's_24': 'C = P = O', 's_25': 'P = N&M = D', 's_26': 'N&M = D = O', 's_27': 'P = D = O', 's_28': 'P = N&M = O',
+    's_29': 'C = P = N&M = D', 's_30': 'C = N&M = D = O', 's_31': 'C = P = D = O', 's_32': 'C = P = N&M = O',
+    's_33': 'P = N&M = D = O',
 }
 
 
@@ -195,16 +195,16 @@ def find_difference_relative_to_comparison_dataframe(_df: pd.DataFrame, **kwargs
     def scenario_grouping_coloring(by='effect'):
         if by == 'effect':  # based on DALYs averted/whether to  expand Clinical + Pharmacy
             grouping = {
-                'C + P + D/N&M/O/None': {'s_1', 's_2', 's_3', 's_11', 's_20', 's_22', 's_24', 's_29', 's_31', 's_32'},
-                'C + D/N&M/O/None': {'s_4', 's_9', 's_10', 's_12', 's_19', 's_21', 's_23', 's_30'},
-                'P + D/N&M/O/None': {'s_7', 's_14', 's_16', 's_18', 's_25', 's_27', 's_28', 's_33'},
+                'C & P & D/N&M/O/None': {'s_1', 's_2', 's_3', 's_11', 's_20', 's_22', 's_24', 's_29', 's_31', 's_32'},
+                'C & D/N&M/O/None': {'s_4', 's_9', 's_10', 's_12', 's_19', 's_21', 's_23', 's_30'},
+                'P & D/N&M/O/None': {'s_7', 's_14', 's_16', 's_18', 's_25', 's_27', 's_28', 's_33'},
                 'D/N&M/O/None': {'s_5', 's_6', 's_8', 's_13', 's_15', 's_17', 's_26', 's_0'}
             }
             grouping_color = {
                 'D/N&M/O/None': 'lightpink',
-                'P + D/N&M/O/None': 'violet',
-                'C + D/N&M/O/None': 'darkorchid',
-                'C + P + D/N&M/O/None': 'darkturquoise',
+                'P & D/N&M/O/None': 'violet',
+                'C & D/N&M/O/None': 'darkorchid',
+                'C & P & D/N&M/O/None': 'darkturquoise',
             }
         elif by == 'expansion':  # based on how many cadres are expanded
             grouping = {
@@ -1084,20 +1084,20 @@ def find_never_ran_appts_that_need_specific_cadres():
 
     # cost/time proportions within never ran appts, in total of all cadres
     p_cost = pd.DataFrame(index=num_services_summarized.index)
-    p_cost['C and P and N&M'] = never_ran_appts_info_that_need_CNP[1]
-    p_cost['C and P'] = never_ran_appts_info_that_need_CP[1]
-    p_cost['C and N&M'] = never_ran_appts_info_that_need_CN[1]
-    p_cost['N&M and P'] = never_ran_appts_info_that_need_NP[1]
+    p_cost['C & P & N&M'] = never_ran_appts_info_that_need_CNP[1]
+    p_cost['C & P'] = never_ran_appts_info_that_need_CP[1]
+    p_cost['C & N&M'] = never_ran_appts_info_that_need_CN[1]
+    p_cost['P & N&M'] = never_ran_appts_info_that_need_NP[1]
     p_cost['Clinical (C)'] = never_ran_appts_info_that_need_C[1]
     p_cost['Pharmacy (P)'] = never_ran_appts_info_that_need_P[1]
     p_cost['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[1]
     p_cost['Other cases'] = 1 - p_cost[p_cost.columns[0:7]].sum(axis=1)
 
     p_time = pd.DataFrame(index=num_services_summarized.index)
-    p_time['C and P and N&M'] = never_ran_appts_info_that_need_CNP[4]
-    p_time['C and P'] = never_ran_appts_info_that_need_CP[4]
-    p_time['C and N&M'] = never_ran_appts_info_that_need_CN[4]
-    p_time['N&M and P'] = never_ran_appts_info_that_need_NP[4]
+    p_time['C & P & N&M'] = never_ran_appts_info_that_need_CNP[4]
+    p_time['C & P'] = never_ran_appts_info_that_need_CP[4]
+    p_time['C & N&M'] = never_ran_appts_info_that_need_CN[4]
+    p_time['P & N&M'] = never_ran_appts_info_that_need_NP[4]
     p_time['Clinical (C)'] = never_ran_appts_info_that_need_C[4]
     p_time['Pharmacy (P)'] = never_ran_appts_info_that_need_P[4]
     p_time['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[4]
@@ -1105,20 +1105,20 @@ def find_never_ran_appts_that_need_specific_cadres():
 
     # absolute cost/time gap within never ran appts
     a_cost = pd.DataFrame(index=num_services_summarized.index)
-    a_cost['C and P and N&M'] = never_ran_appts_info_that_need_CNP[2].sum(axis=1)
-    a_cost['C and P'] = never_ran_appts_info_that_need_CP[2].sum(axis=1)
-    a_cost['C and N&M'] = never_ran_appts_info_that_need_CN[2].sum(axis=1)
-    a_cost['N&M and P'] = never_ran_appts_info_that_need_NP[2].sum(axis=1)
+    a_cost['C & P & N&M'] = never_ran_appts_info_that_need_CNP[2].sum(axis=1)
+    a_cost['C & P'] = never_ran_appts_info_that_need_CP[2].sum(axis=1)
+    a_cost['C & N&M'] = never_ran_appts_info_that_need_CN[2].sum(axis=1)
+    a_cost['P & N&M'] = never_ran_appts_info_that_need_NP[2].sum(axis=1)
     a_cost['Clinical (C)'] = never_ran_appts_info_that_need_C[2].sum(axis=1)
     a_cost['Pharmacy (P)'] = never_ran_appts_info_that_need_P[2].sum(axis=1)
     a_cost['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[2].sum(axis=1)
     a_cost['Other cases'] = hcw_cost_gap.sum(axis=1) - a_cost.sum(axis=1)
 
     a_time = pd.DataFrame(index=num_services_summarized.index)
-    a_time['C and P and N&M'] = never_ran_appts_info_that_need_CNP[5].sum(axis=1)
-    a_time['C and P'] = never_ran_appts_info_that_need_CP[5].sum(axis=1)
-    a_time['C and N&M'] = never_ran_appts_info_that_need_CN[5].sum(axis=1)
-    a_time['N&M and P'] = never_ran_appts_info_that_need_NP[5].sum(axis=1)
+    a_time['C & P & N&M'] = never_ran_appts_info_that_need_CNP[5].sum(axis=1)
+    a_time['C & P'] = never_ran_appts_info_that_need_CP[5].sum(axis=1)
+    a_time['C & N&M'] = never_ran_appts_info_that_need_CN[5].sum(axis=1)
+    a_time['P & N&M'] = never_ran_appts_info_that_need_NP[5].sum(axis=1)
     a_time['Clinical (C)'] = never_ran_appts_info_that_need_C[5].sum(axis=1)
     a_time['Pharmacy (P)'] = never_ran_appts_info_that_need_P[5].sum(axis=1)
     a_time['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[5].sum(axis=1)
@@ -1126,10 +1126,10 @@ def find_never_ran_appts_that_need_specific_cadres():
 
     # appts count proportions within never ran appts, in total of all cadres
     p_count = pd.DataFrame(index=num_services_summarized.index)
-    p_count['C and P and N&M'] = never_ran_appts_info_that_need_CNP[0]
-    p_count['C and P'] = never_ran_appts_info_that_need_CP[0]
-    p_count['C and N&M'] = never_ran_appts_info_that_need_CN[0]
-    p_count['N&M and P'] = never_ran_appts_info_that_need_NP[0]
+    p_count['C & P & N&M'] = never_ran_appts_info_that_need_CNP[0]
+    p_count['C & P'] = never_ran_appts_info_that_need_CP[0]
+    p_count['C & N&M'] = never_ran_appts_info_that_need_CN[0]
+    p_count['P & N&M'] = never_ran_appts_info_that_need_NP[0]
     p_count['Clinical (C)'] = never_ran_appts_info_that_need_C[0]
     p_count['Pharmacy (P)'] = never_ran_appts_info_that_need_P[0]
     p_count['Nursing_and_Midwifery (N&M)'] = never_ran_appts_info_that_need_N[0]
@@ -1137,10 +1137,10 @@ def find_never_ran_appts_that_need_specific_cadres():
 
     # define color for the cadres combinations above
     cadre_comb_color = {
-        'C and P and N&M': 'royalblue',
-        'C and P': 'turquoise',
-        'C and N&M': 'gold',
-        'N&M and P': 'yellowgreen',
+        'C & P & N&M': 'royalblue',
+        'C & P': 'turquoise',
+        'C & N&M': 'gold',
+        'P & N&M': 'yellowgreen',
         'Clinical (C)': 'mediumpurple',
         'Pharmacy (P)': 'limegreen',
         'Nursing_and_Midwifery (N&M)': 'pink',
@@ -1191,8 +1191,8 @@ def find_never_ran_appts_that_need_specific_cadres():
         'Other': 'gray'
     }
     # get scenario color
-    # scenario_groups = scenario_grouping_coloring(by='effect')
-    scenario_groups = scenario_grouping_coloring(by='expansion')
+    scenario_groups = scenario_grouping_coloring(by='effect')
+    # scenario_groups = scenario_grouping_coloring(by='expansion')
     scenario_color = {}
     for s in param_names:
         for k in scenario_groups[1].keys():
@@ -1241,12 +1241,11 @@ def find_never_ran_appts_that_need_specific_cadres():
     #ax.invert_xaxis()
     ax.invert_yaxis()
     ax.set_zlabel('Nursing and Midwifery (N&M)')
-    ax.plot3D([0, 1], [0, 1], [0, 1], linestyle='-', color='orange', alpha=1.0, linewidth=2)
-    legend_labels = list(scenario_groups[1].keys()) + ['line of C = P = N&M']
+    legend_labels = list(scenario_groups[1].keys())
     legend_handles = [plt.Line2D([0, 0], [0, 0],
                                  linestyle='none', marker='o', color=scenario_groups[1][label]
-                                 ) for label in legend_labels[0:len(legend_labels) - 1]
-                      ] + [plt.Line2D([0, 1], [0, 0], linestyle='-', color='orange')]
+                                 ) for label in legend_labels
+                      ]
     plt.legend(legend_handles, legend_labels,
                loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
                title='Scenario groups')

From e13b262d27578698b92ab8c64e7dec417986ff3e Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 12 Nov 2024 18:40:43 +0000
Subject: [PATCH 185/218] update scenario run settings and first run to test
 historical scaling only

---
 ...rrent_hcw_by_officer_type_with_extra_budget.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index c3713d8bd6..463441eacf 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -29,13 +29,13 @@
 class HRHExpansionByCadreWithExtraBudget(BaseScenario):
     def __init__(self):
         super().__init__()
-        self.seed = 0
+        self.seed = 0  # change seed to 1 if to do another 5 runs per draw
         self.start_date = Date(2010, 1, 1)
         self.end_date = Date(2035, 1, 1)
         self.pop_size = 100_000
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 10  # TBC: considering convergence and cost
+        self.runs_per_draw = 5
 
     def log_configuration(self):
         return {
@@ -67,7 +67,10 @@ def _get_scenarios(self) -> Dict[str, Dict]:
         self.YEAR_OF_HRH_EXPANSION = 2025
         # The start year to expand HRH by cadre given the extra budget, which is after the historical HRH scaling
 
-        self.scenarios = extra_budget_fracs['s_0'].to_frame()
+        self.scenarios = extra_budget_fracs.drop(columns='s_2')
+        # Test historical scaling changes; do not run 'gap' scenario that's based on "no historical scaling"
+
+        # self.scenarios = extra_budget_fracs['s_0'].to_frame()
         # Run no extra budget allocation scenarios first to get never ran services and 'gap' allocation strategies
 
         # Baseline settings for change
@@ -76,11 +79,11 @@ def _get_scenarios(self) -> Dict[str, Dict]:
         self.hs_function = [[False, False], [False, True]]
 
         self.baselines = {
-            'baseline': self._baseline_of_baseline(),
-            'default_cons': self._baseline_default_cons(),
+            'baseline': self._baseline_of_baseline(),  # test historical scaling changes first
+            # 'default_cons': self._baseline_default_cons(),
             # 'more_budget': self._baseline_more_budget(),  # turn off when run baseline scenarios with no expansion
             # 'less_budget': self._baseline_less_budget(),
-            'max_hs_function': self._baseline_max_hs_function(),
+            # 'max_hs_function': self._baseline_max_hs_function(),
         }
 
         return {

From c53f9054cb71cea78d9e3c8bdb3b8b5a41c55989 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 14 Nov 2024 11:33:39 +0000
Subject: [PATCH 186/218] update 3D plot

---
 ...dsion_by_officer_type_with_extra_budget.py | 59 ++++++++++---------
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index feb2514dd5..43d647731f 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -1211,45 +1211,46 @@ def find_never_ran_appts_that_need_specific_cadres():
         ['Dental', 'Laboratory', 'Mental', 'Radiography']
     ].sum(axis=1)
     name_of_plot = f'3D DALYs averted (%) vs no extra budget allocation, {target_period()}'
+    # name_of_plot = f'DALYs averted (%) vs no HCW expansion investment, {target_period()}'
     heat_data = pd.merge(num_dalys_averted_percent['mean'],
                          extra_budget_allocation[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
                          left_index=True, right_index=True, how='inner')
     # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
     # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
-    colors = [scenario_color[s] for s in heat_data.index]
+    # colors = [scenario_color[s] for s in heat_data.index]
     fig = plt.figure()
     ax = fig.add_subplot(111, projection='3d')
-    ax.scatter(heat_data['Clinical'], heat_data['Pharmacy'], heat_data['Nursing_and_Midwifery'],
-               alpha=0.8, marker='o', s=heat_data['mean'] * 2000,
-               #c=heat_data['mean'] * 100, cmap='viridis',
-               c=colors)
+    img = ax.scatter(heat_data['Clinical'], heat_data['Pharmacy'], heat_data['Nursing_and_Midwifery'],
+                     alpha=0.8, marker='o', #s=heat_data['mean'] * 2000, c=colors,
+                     c=heat_data['mean'] * 100, cmap='viridis'
+                     )
     # plot lines from the best point to three axes panes
-    # ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]],
-    #           [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]],
-    #           [0, heat_data['Nursing_and_Midwifery'][0]],
-    #           linestyle='--', color='gray', alpha=0.8)
-    # ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]],
-    #           [0, heat_data['Pharmacy'][0]],
-    #           [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]],
-    #           linestyle='--', color='gray', alpha=0.8)
-    # ax.plot3D([0, heat_data['Clinical'][0]],
-    #           [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]],
-    #           [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]],
-    #           linestyle='--', color='gray', alpha=0.8)
-    ax.set_xlabel('Fraction of extra budget allocated to \nClinical cadre (C)')
-    ax.set_ylabel('Pharmacy cadre (P)')
+    ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]],
+              [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]],
+              [0, heat_data['Nursing_and_Midwifery'][0]],
+              linestyle='--', color='gray', alpha=0.8)
+    ax.plot3D([heat_data['Clinical'][0], heat_data['Clinical'][0]],
+              [0, heat_data['Pharmacy'][0]],
+              [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]],
+              linestyle='--', color='gray', alpha=0.8)
+    ax.plot3D([0, heat_data['Clinical'][0]],
+              [heat_data['Pharmacy'][0], heat_data['Pharmacy'][0]],
+              [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]],
+              linestyle='--', color='gray', alpha=0.8)
+    ax.set_xlabel('Fraction of extra budget allocated to \nClinical cadre', fontsize='small')
+    ax.set_ylabel('Pharmacy cadre', fontsize='small')
     #ax.invert_xaxis()
     ax.invert_yaxis()
-    ax.set_zlabel('Nursing and Midwifery (N&M)')
-    legend_labels = list(scenario_groups[1].keys())
-    legend_handles = [plt.Line2D([0, 0], [0, 0],
-                                 linestyle='none', marker='o', color=scenario_groups[1][label]
-                                 ) for label in legend_labels
-                      ]
-    plt.legend(legend_handles, legend_labels,
-               loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
-               title='Scenario groups')
-    # plt.colorbar(img, orientation='horizontal', fraction=0.046, pad=0.25)
+    ax.set_zlabel('Nursing and Midwifery cadre', fontsize='small')
+    # legend_labels = list(scenario_groups[1].keys())
+    # legend_handles = [plt.Line2D([0, 0], [0, 0],
+    #                              linestyle='none', marker='o', color=scenario_groups[1][label]
+    #                              ) for label in legend_labels
+    #                   ]
+    # plt.legend(legend_handles, legend_labels,
+    #            loc='upper center', fontsize='small', bbox_to_anchor=(0.5, -0.2), ncol=2,
+    #            title='Scenario groups')
+    plt.colorbar(img, orientation='horizontal', fraction=0.046, pad=0.1, label='DALYs averted %')
     plt.title(name_of_plot)
     plt.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))

From 7910145baa76dcaeacf7271adbb5a839c849760b Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 19 Nov 2024 15:56:17 +0000
Subject: [PATCH 187/218] correct type

---
 src/tlo/methods/healthsystem.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 2ed2a702c2..90f63c678c 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -310,7 +310,7 @@ class HealthSystem(Module):
         'HR_expansion_by_officer_type': Parameter(
             Types.DICT, "This DICT has keys of nine officer types, each with a float value that "
                         "specifies the proportion of extra budget allocated to that officer type."
-                        "The extra budget for this year is (100 * HR_budget_growth_rate) of the total salary "
+                        "The extra budget for this year is (100 * HR_budget_growth_rate) percent of the total salary "
                         "of these officers in last year. Given the allocated extra budget and annual salary, "
                         "we calculate the extra minutes for these staff of this year. The expansion is done "
                         "on 1 Jan of every year from start_year_HR_expansion_by_officer_type."

From f6bf44925147b6db07cbe8a8314e74eb394a7f5d Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 19 Nov 2024 15:57:37 +0000
Subject: [PATCH 188/218] update analysis file to run new results

---
 ...dsion_by_officer_type_with_extra_budget.py | 28 +++++++++++--------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 43d647731f..50f9b573eb 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -291,8 +291,9 @@ def do_bar_plot_with_ci(_df, _df_percent=None, annotation=False):
     #     Return a series of yearly scale up factors for all cadres,
     #     with index of year and value of list of scale up factors.
     #     """
-    #     _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year_of_scale_up', 'scale_up_factor']
-    #                   ].set_index('year_of_scale_up')
+    #     _df['year'] = _df['date'].dt.year
+    #     _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year', 'scale_up_factor']
+    #                   ].set_index('year')
     #     _df = _df['scale_up_factor'].apply(pd.Series)
     #     assert (_df.columns == cadres).all()
     #     _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index}
@@ -306,8 +307,8 @@ def get_total_cost(_df):
         Return a series of yearly total cost for all cadres,
         with index of year and values of list of total cost.
         """
-        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year_of_scale_up', 'total_hr_salary']
-                      ].set_index('year_of_scale_up')
+        _df['year'] = _df['date'].dt.year
+        _df = _df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD), ['year', 'total_hr_salary']].set_index('year')
         _df = _df['total_hr_salary'].apply(pd.Series)
         assert (_df.columns == cadres).all()
         _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index}
@@ -450,13 +451,18 @@ def format_time_by_cadre_treatment(_df):
         }
         time_by_treatment_all_scenarios = pd.DataFrame(time_by_treatment_all_scenarios).T
 
+        # rename scenarios according to param_names
+        time_by_treatment_all_scenarios.rename(
+            index={time_by_treatment_all_scenarios.index[i]: param_names[i]
+                   for i in range(len(time_by_treatment_all_scenarios.index))}, inplace=True)
+
         time_increased_by_treatment = time_by_treatment_all_scenarios.subtract(
             time_by_treatment_all_scenarios.loc['s_0', :], axis=1).drop('s_0', axis=0).add_suffix('*')
 
         return time_increased_by_treatment
 
     # Get parameter/scenario names
-    param_names = tuple(extra_budget_fracs.columns)
+    param_names = tuple(extra_budget_fracs.drop(columns='s_2'))
     # param_names = get_parameter_names_from_scenario_file()
     # param_names = ('s_0', 's_1', 's_2', 's_3', 's_11', 's_22')
     # param_names = ('s_1', 's_2', 's_3', 's_11', 's_22')
@@ -530,11 +536,11 @@ def format_time_by_cadre_treatment(_df):
     # staff_count_2029 = staff_count.loc[staff_count.year == 2029, :].drop(columns='year').set_index('draw')
 
     # check total cost calculated is increased as expected
-    years = range(2019, the_target_period[1].year + 1)
+    years = range(2025, the_target_period[1].year + 1)
     for s in param_names[1:]:
         assert (abs(
-            total_cost.loc[(total_cost.year == 2029) & (total_cost.draw == s), 'all_cadres'].values[0] -
-            (1 + 0.042) ** len(years) * total_cost.loc[(total_cost.year == 2019) & (total_cost.draw == 's_0'),
+            total_cost.loc[(total_cost.year == 2034) & (total_cost.draw == s), 'all_cadres'].values[0] -
+            (1 + 0.042) ** len(years) * total_cost.loc[(total_cost.year == 2025) & (total_cost.draw == 's_0'),
                                                        'all_cadres'].values[0]
         ) < 1e6).all()
 
@@ -808,9 +814,9 @@ def format_time_by_cadre_treatment(_df):
         only_mean=True
     ).T.reindex(num_dalys_summarized.index).drop(['s_0'])
 
-    num_dalys_by_cause_averted_CNP = num_dalys_by_cause_averted.loc['s_2', :].sort_values(ascending=False)
+    num_dalys_by_cause_averted_CNP = num_dalys_by_cause_averted.loc['s_22', :].sort_values(ascending=False)
     # num_dalys_by_cause_averted_CP = num_dalys_by_cause_averted.loc['s_11', :].sort_values(ascending=False)
-    num_dalys_by_cause_averted_percent_CNP = num_dalys_by_cause_averted_percent.loc['s_2', :].sort_values(
+    num_dalys_by_cause_averted_percent_CNP = num_dalys_by_cause_averted_percent.loc['s_22', :].sort_values(
         ascending=False)
     # num_dalys_by_cause_averted_percent_CP = num_dalys_by_cause_averted_percent.loc['s_11', :].sort_values(
     #     ascending=False)
@@ -2320,5 +2326,5 @@ def find_never_ran_appts_that_need_specific_cadres():
         results_folder=args.results_folder,
         output_folder=args.results_folder,
         resourcefilepath=Path('./resources'),
-        the_target_period=(Date(2019, 1, 1), Date(2029, 12, 31))
+        the_target_period=(Date(2025, 1, 1), Date(2034, 12, 31))
     )

From 11b7710802605537e5fa489152aafb21fafcb2bd Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 19 Nov 2024 15:59:04 +0000
Subject: [PATCH 189/218] update comment of the "gap" strategies

---
 .../prepare_minute_salary_and_extra_budget_frac_data.py        | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index be4ede8319..98b825ecf2 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -81,7 +81,8 @@
 auxiliary = cadre_to_expand.copy()
 for i in auxiliary.columns[3:]:  # for all equal-fraction scenarios
     auxiliary.loc[:, i] = auxiliary.loc[:, i] / auxiliary.loc[:, i].sum()
-auxiliary.loc[:, 's_2'] = [0.4586, 0.0272, 0.3502, 0.1476, 0.0164]
+auxiliary.loc[:, 's_2'] = [0.4586, 0.0272, 0.3502, 0.1476, 0.0164]  # without historical scaling
+# auxiliary.loc[:, 's_2'] = [0.4322, 0.0201, 0.3701, 0.1408, 0.0368]  # with historical scaling
 
 # define extra budget fracs for each cadre
 extra_budget_fracs = pd.DataFrame(index=cadre_all, columns=combination_list)

From da8785c483c2d0e3c5bb280f90d9d0aaceea9735 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 21 Nov 2024 10:05:06 +0000
Subject: [PATCH 190/218] update regression

---
 ...expandsion_by_officer_type_with_extra_budget.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 50f9b573eb..dc95fad800 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -1388,17 +1388,17 @@ def find_never_ran_appts_that_need_specific_cadres():
     # plt.close(fig)
 
     # do some linear regression to see the marginal effects of individual cadres and combined effects of C, N, P cadres
-    # outcome_data = num_dalys_averted_percent['mean']
-    outcome_data = num_services_increased_percent['mean']
+    outcome_data = num_dalys_averted_percent['mean']
+    # outcome_data = num_services_increased_percent['mean']
     # outcome_data = num_treatments_total_increased_percent['mean']
     regression_data = pd.merge(outcome_data,
                                extra_budget_allocation,
                                left_index=True, right_index=True, how='inner')
-    regression_data['C*P'] = regression_data['Clinical'] * regression_data['Pharmacy']
-    regression_data['C*N'] = regression_data['Clinical'] * regression_data['Nursing_and_Midwifery']
-    regression_data['N*P'] = regression_data['Pharmacy'] * regression_data['Nursing_and_Midwifery']
-    regression_data['C*N*P'] = (regression_data['Clinical'] * regression_data['Pharmacy']
-                                * regression_data['Nursing_and_Midwifery'])
+    # regression_data['C*P'] = regression_data['Clinical'] * regression_data['Pharmacy']
+    # regression_data['C*N'] = regression_data['Clinical'] * regression_data['Nursing_and_Midwifery']
+    # regression_data['N*P'] = regression_data['Pharmacy'] * regression_data['Nursing_and_Midwifery']
+    # regression_data['C*N*P'] = (regression_data['Clinical'] * regression_data['Pharmacy']
+    #                             * regression_data['Nursing_and_Midwifery'])
     cadres_to_drop_due_to_multicollinearity = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography', 'Other']
     regression_data.drop(columns=cadres_to_drop_due_to_multicollinearity, inplace=True)
     predictor = regression_data[regression_data.columns[1:]]

From 84de376ecf239a115fd62b8096aa0d094e34d29f Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 21 Nov 2024 10:15:03 +0000
Subject: [PATCH 191/218] set the "gap" allocation strategy for defaul +
 historical scaling

---
 .../prepare_minute_salary_and_extra_budget_frac_data.py      | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index 98b825ecf2..a89ba0e8fe 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -81,8 +81,9 @@
 auxiliary = cadre_to_expand.copy()
 for i in auxiliary.columns[3:]:  # for all equal-fraction scenarios
     auxiliary.loc[:, i] = auxiliary.loc[:, i] / auxiliary.loc[:, i].sum()
-auxiliary.loc[:, 's_2'] = [0.4586, 0.0272, 0.3502, 0.1476, 0.0164]  # without historical scaling
-# auxiliary.loc[:, 's_2'] = [0.4322, 0.0201, 0.3701, 0.1408, 0.0368]  # with historical scaling
+# for "gap" allocation strategy
+# auxiliary.loc[:, 's_2'] = [0.4586, 0.0272, 0.3502, 0.1476, 0.0164]  # without historical scaling; "default" settings
+auxiliary.loc[:, 's_2'] = [0.4322, 0.0201, 0.3701, 0.1408, 0.0368]  # with historical scaling; "default" settings
 
 # define extra budget fracs for each cadre
 extra_budget_fracs = pd.DataFrame(index=cadre_all, columns=combination_list)

From ca634f568f9e27a27634c6126d69bf6eae63e13d Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 21 Nov 2024 10:20:17 +0000
Subject: [PATCH 192/218] to run the "gap" allocation strategy for defaul +
 historical scaling

---
 ...expanding_current_hcw_by_officer_type_with_extra_budget.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 463441eacf..4d9aa95786 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -67,8 +67,8 @@ def _get_scenarios(self) -> Dict[str, Dict]:
         self.YEAR_OF_HRH_EXPANSION = 2025
         # The start year to expand HRH by cadre given the extra budget, which is after the historical HRH scaling
 
-        self.scenarios = extra_budget_fracs.drop(columns='s_2')
-        # Test historical scaling changes; do not run 'gap' scenario that's based on "no historical scaling"
+        self.scenarios = extra_budget_fracs['s_2'].to_frame()
+        # Run 'gap' scenario that's based on "no historical scaling" + baseline of baseline settings
 
         # self.scenarios = extra_budget_fracs['s_0'].to_frame()
         # Run no extra budget allocation scenarios first to get never ran services and 'gap' allocation strategies

From 1d4077bbd1efd079a6ec6e6bf87a002f932e3748 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 21 Nov 2024 10:25:51 +0000
Subject: [PATCH 193/218] fix failing checks

---
 ...pandsion_by_officer_type_with_extra_budget.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index dc95fad800..e35c13a9ea 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -749,14 +749,14 @@ def format_time_by_cadre_treatment(_df):
 
     hcw_time_increased_by_treatment_type = get_hcw_time_by_treatment().reindex(num_dalys_summarized.index).drop(['s_0'])
 
-    num_services_increased_percent = summarize(
-        pd.DataFrame(
-            find_difference_relative_to_comparison_series(
-                num_services.loc[0],
-                comparison='s_0',
-                scaled=True)
-        ).T
-    ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
+    # num_services_increased_percent = summarize(
+    #     pd.DataFrame(
+    #         find_difference_relative_to_comparison_series(
+    #             num_services.loc[0],
+    #             comparison='s_0',
+    #             scaled=True)
+    #     ).T
+    # ).iloc[0].unstack().reindex(param_names).reindex(num_dalys_summarized.index).drop(['s_0'])
 
     num_deaths_averted = summarize(
         -1.0 *

From 4374217ad9d6be929fdd267735040300096575f1 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 29 Nov 2024 00:19:52 +0000
Subject: [PATCH 194/218] run 5 more runs for baseline of baseline draws excl.
 "gap" strategy

---
 ...anding_current_hcw_by_officer_type_with_extra_budget.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 4d9aa95786..a37e66d7b2 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -29,7 +29,7 @@
 class HRHExpansionByCadreWithExtraBudget(BaseScenario):
     def __init__(self):
         super().__init__()
-        self.seed = 0  # change seed to 1 if to do another 5 runs per draw
+        self.seed = 1  # change seed to 1 if to do another 5 runs per draw
         self.start_date = Date(2010, 1, 1)
         self.end_date = Date(2035, 1, 1)
         self.pop_size = 100_000
@@ -67,7 +67,10 @@ def _get_scenarios(self) -> Dict[str, Dict]:
         self.YEAR_OF_HRH_EXPANSION = 2025
         # The start year to expand HRH by cadre given the extra budget, which is after the historical HRH scaling
 
-        self.scenarios = extra_budget_fracs['s_2'].to_frame()
+        self.scenarios = extra_budget_fracs.drop(columns=['s_2'])
+        # Run another 5 runs for "no historical scaling" + baseline of baseline settings
+
+        # self.scenarios = extra_budget_fracs['s_2'].to_frame()
         # Run 'gap' scenario that's based on "no historical scaling" + baseline of baseline settings
 
         # self.scenarios = extra_budget_fracs['s_0'].to_frame()

From e0eab5e74fda2514b40351e80343b5be37ac1838 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 29 Nov 2024 10:22:26 +0000
Subject: [PATCH 195/218] run for sensitivity analysis - baselines + no
 expansion - to get gap strategy

---
 ...nt_hcw_by_officer_type_with_extra_budget.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index a37e66d7b2..8522ad3da0 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -29,13 +29,13 @@
 class HRHExpansionByCadreWithExtraBudget(BaseScenario):
     def __init__(self):
         super().__init__()
-        self.seed = 1  # change seed to 1 if to do another 5 runs per draw
+        self.seed = 0  # change seed to 1 if to do another 5 runs per draw
         self.start_date = Date(2010, 1, 1)
         self.end_date = Date(2035, 1, 1)
         self.pop_size = 100_000
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 5
+        self.runs_per_draw = 10
 
     def log_configuration(self):
         return {
@@ -67,13 +67,13 @@ def _get_scenarios(self) -> Dict[str, Dict]:
         self.YEAR_OF_HRH_EXPANSION = 2025
         # The start year to expand HRH by cadre given the extra budget, which is after the historical HRH scaling
 
-        self.scenarios = extra_budget_fracs.drop(columns=['s_2'])
+        # self.scenarios = extra_budget_fracs.drop(columns=['s_2'])
         # Run another 5 runs for "no historical scaling" + baseline of baseline settings
 
         # self.scenarios = extra_budget_fracs['s_2'].to_frame()
         # Run 'gap' scenario that's based on "no historical scaling" + baseline of baseline settings
 
-        # self.scenarios = extra_budget_fracs['s_0'].to_frame()
+        self.scenarios = extra_budget_fracs['s_0'].to_frame()
         # Run no extra budget allocation scenarios first to get never ran services and 'gap' allocation strategies
 
         # Baseline settings for change
@@ -82,11 +82,11 @@ def _get_scenarios(self) -> Dict[str, Dict]:
         self.hs_function = [[False, False], [False, True]]
 
         self.baselines = {
-            'baseline': self._baseline_of_baseline(),  # test historical scaling changes first
-            # 'default_cons': self._baseline_default_cons(),
-            # 'more_budget': self._baseline_more_budget(),  # turn off when run baseline scenarios with no expansion
-            # 'less_budget': self._baseline_less_budget(),
-            # 'max_hs_function': self._baseline_max_hs_function(),
+            # 'baseline': self._baseline_of_baseline(),  # test historical scaling changes first
+            'default_cons': self._baseline_default_cons(),
+            'more_budget': self._baseline_more_budget(),  # turn off when run baseline scenarios with no expansion
+            'less_budget': self._baseline_less_budget(),
+            'max_hs_function': self._baseline_max_hs_function(),
         }
 
         return {

From 2c9f2ed8b3ab31de27740b1be7ba31c622e7dac0 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 2 Dec 2024 11:28:09 +0000
Subject: [PATCH 196/218] update the preparation file to incorporate historical
 scaling, which does not change the extra budget fracs

---
 ...inute_salary_and_extra_budget_frac_data.py | 47 +++++++++++++------
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index a89ba0e8fe..436a164425 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -16,17 +16,30 @@
                         'costing' / 'ResourceFile_Annual_Salary_Per_Cadre.csv', index_col=False)
 hr_salary_per_level = pd.read_excel(resourcefilepath /
                                     'costing' / 'ResourceFile_Costing.xlsx', sheet_name='human_resources')
+# as of 2019
 hr_current = pd.read_csv(resourcefilepath /
                          'healthsystem' / 'human_resources' / 'actual' / 'ResourceFile_Daily_Capabilities.csv')
 hr_established = pd.read_csv(resourcefilepath /
                              'healthsystem' / 'human_resources' / 'funded_plus' / 'ResourceFile_Daily_Capabilities.csv')
+# for 2020-2024
+historical_scaling = pd.read_excel(resourcefilepath /
+                                   'healthsystem' / 'human_resources' / 'scaling_capabilities' /
+                                   'ResourceFile_dynamic_HR_scaling.xlsx', sheet_name='historical_scaling'
+                                   ).set_index('year')
+integrated_historical_scaling = (
+    historical_scaling.loc[2020, 'dynamic_HR_scaling_factor'] *
+    historical_scaling.loc[2021, 'dynamic_HR_scaling_factor'] *
+    historical_scaling.loc[2022, 'dynamic_HR_scaling_factor'] *
+    historical_scaling.loc[2023, 'dynamic_HR_scaling_factor'] *
+    historical_scaling.loc[2024, 'dynamic_HR_scaling_factor']
+)
 
 # to get minute salary per cadre per level
 Annual_PFT = hr_current.groupby(['Facility_Level', 'Officer_Category']).agg(
     {'Total_Mins_Per_Day': 'sum', 'Staff_Count': 'sum'}).reset_index()
 Annual_PFT['Annual_Mins_Per_Staff'] = 365.25 * Annual_PFT['Total_Mins_Per_Day']/Annual_PFT['Staff_Count']
 
-# the hr salary by minute and facility id
+# the hr salary by minute and facility id, as of 2019
 Minute_Salary = Annual_PFT.merge(hr_salary, on=['Officer_Category'], how='outer')
 Minute_Salary['Minute_Salary_USD'] = Minute_Salary['Annual_Salary_USD']/Minute_Salary['Annual_Mins_Per_Staff']
 # store the minute salary by cadre and level
@@ -42,14 +55,18 @@
 
 Minute_Salary.to_csv(resourcefilepath / 'costing' / 'Minute_Salary_HR.csv', index=False)
 
-# calculate the current cost distribution of all cadres
+# implement historical scaling to hr_current
+hr_current['Total_Mins_Per_Day'] *= integrated_historical_scaling
+hr_current['Staff_Count'] *= integrated_historical_scaling
+
+# calculate the current cost distribution of all cadres, as of 2024
 cadre_all = ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy',
              'Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography']
 staff_count = hr_current.groupby('Officer_Category')['Staff_Count'].sum().reset_index()
 staff_cost = staff_count.merge(hr_salary, on=['Officer_Category'], how='outer')
 staff_cost['annual_cost'] = staff_cost['Staff_Count'] * staff_cost['Annual_Salary_USD']
 staff_cost['cost_frac'] = (staff_cost['annual_cost'] / staff_cost['annual_cost'].sum())
-assert staff_cost.cost_frac.sum() == 1
+assert abs(staff_cost.cost_frac.sum() - 1) < 1/1e8
 staff_cost.set_index('Officer_Category', inplace=True)
 staff_cost = staff_cost.reindex(index=cadre_all)
 
@@ -125,7 +142,7 @@ def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFram
     """This function calculates the yearly hr scale up factor for cadres for a year yr,
     given a fraction of an extra budget allocated to each cadre and a yearly budget growth rate of 4.2%.
     Parameter extra_budget_frac (list) is a list of 9 floats, representing the fractions.
-    Parameter yr (int) is a year between 2019 and 2030.
+    Parameter yr (int) is a year between 2025 and 2035 (exclusive).
     Parameter scenario (string) is a column name in the extra budget fractions resource file.
     Output dataframe stores scale up factors and relevant for the year yr.
     """
@@ -149,31 +166,31 @@ def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFram
 
 # calculate scale up factors for all defined scenarios and years
 staff_cost['scale_up_factor'] = 1
-scale_up_factor_dict = {s: {y: {} for y in range(2018, 2030)} for s in extra_budget_fracs.columns}
+scale_up_factor_dict = {s: {y: {} for y in range(2025, 2035)} for s in extra_budget_fracs.columns}
 for s in extra_budget_fracs.columns:
-    # for the initial/current year of 2018
-    scale_up_factor_dict[s][2018] = staff_cost.drop(columns='cost_frac').copy()
+    # for the initial/current year of 2024
+    scale_up_factor_dict[s][2024] = staff_cost.drop(columns='cost_frac').copy()
     # for the years with scaled up hr
-    for y in range(2019, 2030):
+    for y in range(2025, 2035):
         scale_up_factor_dict[s][y] = calculate_hr_scale_up_factor(list(extra_budget_fracs[s]), y, s)
 
-# get the total cost and staff count for each year between 2020-2030 and each scenario
-total_cost = pd.DataFrame(index=range(2018, 2030), columns=extra_budget_fracs.columns)
-total_staff = pd.DataFrame(index=range(2018, 2030), columns=extra_budget_fracs.columns)
+# get the total cost and staff count for each year between 2024-2034 and each scenario
+total_cost = pd.DataFrame(index=range(2024, 2035), columns=extra_budget_fracs.columns)
+total_staff = pd.DataFrame(index=range(2024, 2035), columns=extra_budget_fracs.columns)
 for y in total_cost.index:
     for s in extra_budget_fracs.columns:
         total_cost.loc[y, s] = scale_up_factor_dict[s][y].annual_cost.sum()
         total_staff.loc[y, s] = scale_up_factor_dict[s][y].Staff_Count.sum()
 
-# check the total cost after 11 years are increased as expected
+# check the total cost after 10 years are increased as expected
 assert (
-    abs(total_cost.loc[2029, total_cost.columns[1:]] - (1 + 0.042) ** 11 * total_cost.loc[2029, 's_0']) < 1/1e7
+    abs(total_cost.loc[2034, total_cost.columns[1:]] - (1 + 0.042) ** 10 * total_cost.loc[2024, 's_0']) < 1/1e6
 ).all()
 
-# get the integrated scale up factors by the end of year 2029 and each scenario
+# get the integrated scale up factors by the end of year 2034 and each scenario
 integrated_scale_up_factor = pd.DataFrame(index=cadre_all, columns=total_cost.columns).fillna(1.0)
 for s in total_cost.columns[1:]:
-    for yr in range(2019, 2030):
+    for yr in range(2025, 2035):
         integrated_scale_up_factor.loc[:, s] = np.multiply(
             integrated_scale_up_factor.loc[:, s].values,
             scale_up_factor_dict[s][yr].loc[:, 'scale_up_factor'].values

From f513567d3c4631a3e64e0b51f8358be47ebb9cc0 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 2 Dec 2024 23:15:29 +0000
Subject: [PATCH 197/218] get the expected hrh increase rate

---
 .../prepare_minute_salary_and_extra_budget_frac_data.py        | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index 436a164425..58ac532d2c 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -196,6 +196,9 @@ def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFram
             scale_up_factor_dict[s][yr].loc[:, 'scale_up_factor'].values
         )
 
+# get the staff increase rate: 2034 vs 2025
+hr_increase_rates_2034 = pd.DataFrame(integrated_scale_up_factor - 1.0)
+
 # Checked that for s_2, the integrated scale up factors of C/N/P cadres are comparable with shortage estimates from \
 # She et al 2024: https://human-resources-health.biomedcentral.com/articles/10.1186/s12960-024-00949-2
 # C: 2.21, N: 1.44, P: 4.14 vs C: 2.83, N: 1.57, P:6.37

From b968ffd5dae60bbe4a8404949c52bd0a341fde99 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 2 Dec 2024 23:24:31 +0000
Subject: [PATCH 198/218] get yearly hrh count from logger

---
 ...dsion_by_officer_type_with_extra_budget.py | 81 ++++++++++++++-----
 1 file changed, 61 insertions(+), 20 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index e35c13a9ea..3c8cc662f7 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -19,6 +19,7 @@
 from scripts.healthsystem.impact_of_hcw_capabilities_expansion.prepare_minute_salary_and_extra_budget_frac_data import (
     Minute_Salary_by_Cadre_Level,
     extra_budget_fracs,
+    hr_increase_rates_2034,
 )
 from scripts.healthsystem.impact_of_hcw_capabilities_expansion.scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget import (
     HRHExpansionByCadreWithExtraBudget,
@@ -317,6 +318,28 @@ def get_total_cost(_df):
             _df_1.loc[:, 0], index=_df_1.index
         )
 
+    def get_yearly_hr_count(_df):
+        """
+        Return a series of yearly total cost for all cadres,
+        with index of year and values of list of total cost.
+        """
+        # format
+        _df['year'] = _df['date'].dt.year
+        _df = _df.drop(columns='date').set_index('year').fillna(0)
+        _df.columns = _df.columns.map(lambda x: x.split('_')[-1])
+        _df.rename(columns={'Midwifery': 'Nursing_and_Midwifery'}, inplace=True)
+        _df = _df.groupby(level=0, axis=1).sum()
+        assert set(_df.columns) == set(cadres)
+        _df = _df[cadres]
+        # get multiplier for popsize=100,000: 145.39609000000002
+        _df = _df * 145.39609000000002
+        # reformat as a series
+        _dict = {idx: [list(_df.loc[idx, :])] for idx in _df.index}
+        _df_1 = pd.DataFrame(data=_dict).T
+        return pd.Series(
+            _df_1.loc[:, 0], index=_df_1.index
+        )
+
     def get_current_hr(cadres):
         """
         Return current (year of 2018/2019) staff counts and capabilities for the cadres specified.
@@ -462,7 +485,7 @@ def format_time_by_cadre_treatment(_df):
         return time_increased_by_treatment
 
     # Get parameter/scenario names
-    param_names = tuple(extra_budget_fracs.drop(columns='s_2'))
+    param_names = tuple(extra_budget_fracs)
     # param_names = get_parameter_names_from_scenario_file()
     # param_names = ('s_0', 's_1', 's_2', 's_3', 's_11', 's_22')
     # param_names = ('s_1', 's_2', 's_3', 's_11', 's_22')
@@ -474,9 +497,6 @@ def format_time_by_cadre_treatment(_df):
     # Get appointment time and cost requirement
     appt_time, appt_cost = format_appt_time_and_cost()
 
-    # # Get current (year of 2018/2019) hr counts
-    # curr_hr = get_current_hr(cadres)
-
     # # Get scale up factors for all scenarios
     # scale_up_factors = extract_results(
     #     results_folder,
@@ -492,9 +512,6 @@ def format_time_by_cadre_treatment(_df):
     # scale_up_factors[cadres] = scale_up_factors.value.tolist()
     # scale_up_factors.drop(columns='value', inplace=True)
 
-    # Get salary
-    salary = get_hr_salary(cadres)
-
     # Get total cost for all scenarios
     total_cost = extract_results(
         results_folder,
@@ -518,24 +535,42 @@ def format_time_by_cadre_treatment(_df):
         extra_cost_all_yrs.loc[s, :] = total_cost_all_yrs.loc[s, :] - total_cost_all_yrs.loc['s_0', :]
     extra_cost_all_yrs.drop(index='s_0', inplace=True)
 
-    # get staff count = total cost / salary
-    staff_count = total_cost.copy()
-    for c in cadres:
-        staff_count.loc[:, c] = total_cost.loc[:, c] / salary[c].values[0]
-    staff_count.loc[:, 'all_cadres'] = staff_count[[c for c in staff_count.columns if c in cadres]].sum(axis=1)
+    # get yearly hr count
+    yearly_hr_count = extract_results(
+        results_folder,
+        module='tlo.methods.healthsystem.summary',
+        key='number_of_hcw_staff',
+        custom_generate_series=get_yearly_hr_count,
+        do_scaling=False
+    ).pipe(set_param_names_as_column_index_level_0).stack(level=0)
+    # check that the staff counts are the same between each run within each draw
+    for i in range(len(yearly_hr_count.index)):
+        for j in yearly_hr_count.columns[1:]:
+            for k in range(len(cadres)):
+                assert abs(yearly_hr_count.iloc[i, j][k] - yearly_hr_count.iloc[i, 0][k]) < 1/1e8
+    # store results for only one run per draw
+    yearly_hr_count = yearly_hr_count.iloc[:, 0].unstack().reset_index().melt(id_vars='index')
+    yearly_hr_count[cadres] = yearly_hr_count.value.tolist()
+    yearly_hr_count.drop(columns='value', inplace=True)
+    yearly_hr_count['all_cadres'] = yearly_hr_count[[c for c in yearly_hr_count.columns if c in cadres]].sum(axis=1)
+    yearly_hr_count.rename(columns={'index': 'year'}, inplace=True)
 
     # get extra count = staff count - staff count of no expansion s_1
     # note that annual staff increase rate = scale up factor - 1
-    extra_staff = staff_count.copy()
-    for i in staff_count.index:
-        extra_staff.iloc[i, 2:] = staff_count.iloc[i, 2:] - staff_count.iloc[0, 2:]
-
-    # extra_staff_2029 = extra_staff.loc[extra_staff.year == 2029, :].drop(columns='year').set_index('draw').drop(
-    #     index='s_1'
-    # )
-    # staff_count_2029 = staff_count.loc[staff_count.year == 2029, :].drop(columns='year').set_index('draw')
+    extra_staff = yearly_hr_count.drop(
+        yearly_hr_count[yearly_hr_count.year.isin(range(2010, 2024))].index, axis=0
+    ).reset_index(drop=True)
+    staff_increase_rate = extra_staff.copy()
+    staff_2024 = pd.DataFrame(extra_staff.loc[(extra_staff.year == 2024)
+                                              & (extra_staff.draw == 's_0'), :])
+    for i in extra_staff.index:
+        extra_staff.iloc[i, 2:] = extra_staff.iloc[i, 2:] - staff_2024.iloc[0, 2:]
+        staff_increase_rate.iloc[i, 2:] = (extra_staff.iloc[i, 2:] / staff_2024.iloc[0, 2:])
+        # checked that this is slightly different with hr_increase_rates from preparation script, due the calculation
+        # process are not the same
 
     # check total cost calculated is increased as expected
+    # also checked (in excel) that the yearly_hr_count (s_0 and s_1) are expanded as expected
     years = range(2025, the_target_period[1].year + 1)
     for s in param_names[1:]:
         assert (abs(
@@ -1212,10 +1247,15 @@ def find_never_ran_appts_that_need_specific_cadres():
 
     # plot 4D data: relative increases of Clinical, Pharmacy, and Nursing_and_Midwifery as three coordinates,\
     # percentage of DALYs averted decides the color of that scatter point
+    # prepare extra budget allocation
     extra_budget_allocation = extra_budget_fracs.T.reindex(num_dalys_summarized.index)
     extra_budget_allocation['Other'] = extra_budget_allocation[
         ['Dental', 'Laboratory', 'Mental', 'Radiography']
     ].sum(axis=1)
+    # prepare hrh increase rates in the same format for regression analysis
+    hr_increase_rates = hr_increase_rates_2034.T.reindex(num_dalys_summarized.index)
+    hr_increase_rates['Other'] = hr_increase_rates['Dental'].copy()
+
     name_of_plot = f'3D DALYs averted (%) vs no extra budget allocation, {target_period()}'
     # name_of_plot = f'DALYs averted (%) vs no HCW expansion investment, {target_period()}'
     heat_data = pd.merge(num_dalys_averted_percent['mean'],
@@ -1392,6 +1432,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     # outcome_data = num_services_increased_percent['mean']
     # outcome_data = num_treatments_total_increased_percent['mean']
     regression_data = pd.merge(outcome_data,
+                               # hr_increase_rates,
                                extra_budget_allocation,
                                left_index=True, right_index=True, how='inner')
     # regression_data['C*P'] = regression_data['Clinical'] * regression_data['Pharmacy']

From 5670ed97c0fabb40da77a8c4410e01bf4a64ec83 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 6 Dec 2024 17:29:29 +0000
Subject: [PATCH 199/218] record the gap strategies for main and sensitivity
 analysis

---
 .../prepare_minute_salary_and_extra_budget_frac_data.py     | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index 58ac532d2c..18ddb7aa42 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -100,7 +100,11 @@
     auxiliary.loc[:, i] = auxiliary.loc[:, i] / auxiliary.loc[:, i].sum()
 # for "gap" allocation strategy
 # auxiliary.loc[:, 's_2'] = [0.4586, 0.0272, 0.3502, 0.1476, 0.0164]  # without historical scaling; "default" settings
-auxiliary.loc[:, 's_2'] = [0.4322, 0.0201, 0.3701, 0.1408, 0.0368]  # with historical scaling; "default" settings
+auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + main settings
+# auxiliary.loc[:, 'default_cons s_2'] = [0.4252, 0.0261, 0.3752, 0.1362, 0.0373]  # historical scaling + default_cons
+# auxiliary.loc[:, 'more_budget s_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + more_budget
+# auxiliary.loc[:, 'less_budget s_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + less_budget
+# auxiliary.loc[:, 'max hs function s_2'] = [0.5133, 0.0085, 0.2501, 0.1551, 0.073]  # historical scaling + less_budget
 
 # define extra budget fracs for each cadre
 extra_budget_fracs = pd.DataFrame(index=cadre_all, columns=combination_list)

From 2ad95cf5e00689f94063924cc52f4ba715af4050 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 6 Dec 2024 17:45:19 +0000
Subject: [PATCH 200/218] correct typo

---
 .../analysis_hr_expandsion_by_officer_type_with_extra_budget.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 3c8cc662f7..5d670b2e02 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -577,7 +577,7 @@ def format_time_by_cadre_treatment(_df):
             total_cost.loc[(total_cost.year == 2034) & (total_cost.draw == s), 'all_cadres'].values[0] -
             (1 + 0.042) ** len(years) * total_cost.loc[(total_cost.year == 2025) & (total_cost.draw == 's_0'),
                                                        'all_cadres'].values[0]
-        ) < 1e6).all()
+        ) < 1e-6).all()
 
     # Absolute Number of Deaths and DALYs and Services
     num_deaths = extract_results(

From bf1a3a179b026c33fd3170c97dd3adabad255520 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 6 Dec 2024 18:05:08 +0000
Subject: [PATCH 201/218] rerun gap strategy for main analysis as the
 proportions has changed due to another 5 runs of no expansion

---
 ...t_hcw_by_officer_type_with_extra_budget.py | 20 +++++++------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 8522ad3da0..7e08b9b746 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -67,14 +67,8 @@ def _get_scenarios(self) -> Dict[str, Dict]:
         self.YEAR_OF_HRH_EXPANSION = 2025
         # The start year to expand HRH by cadre given the extra budget, which is after the historical HRH scaling
 
-        # self.scenarios = extra_budget_fracs.drop(columns=['s_2'])
-        # Run another 5 runs for "no historical scaling" + baseline of baseline settings
-
-        # self.scenarios = extra_budget_fracs['s_2'].to_frame()
-        # Run 'gap' scenario that's based on "no historical scaling" + baseline of baseline settings
-
-        self.scenarios = extra_budget_fracs['s_0'].to_frame()
-        # Run no extra budget allocation scenarios first to get never ran services and 'gap' allocation strategies
+        self.scenarios = extra_budget_fracs['s_2'].to_frame()
+        # Run 'gap' scenario that's based on "historical scaling" + baseline of baseline settings
 
         # Baseline settings for change
         self.cons_availability = ['all', 'default']
@@ -82,11 +76,11 @@ def _get_scenarios(self) -> Dict[str, Dict]:
         self.hs_function = [[False, False], [False, True]]
 
         self.baselines = {
-            # 'baseline': self._baseline_of_baseline(),  # test historical scaling changes first
-            'default_cons': self._baseline_default_cons(),
-            'more_budget': self._baseline_more_budget(),  # turn off when run baseline scenarios with no expansion
-            'less_budget': self._baseline_less_budget(),
-            'max_hs_function': self._baseline_max_hs_function(),
+            'baseline': self._baseline_of_baseline(),  # test historical scaling changes first
+            # 'default_cons': self._baseline_default_cons(),
+            # 'more_budget': self._baseline_more_budget(),  # turn off when run baseline scenarios with no expansion
+            # 'less_budget': self._baseline_less_budget(),
+            # 'max_hs_function': self._baseline_max_hs_function(),
         }
 
         return {

From c04e001123ce9d6b57888741174f4a485cc1e94d Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 6 Dec 2024 23:18:50 +0000
Subject: [PATCH 202/218] run all non-gap strategies for more budget

---
 .../prepare_minute_salary_and_extra_budget_frac_data.py   | 8 ++++----
 ...nding_current_hcw_by_officer_type_with_extra_budget.py | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index 18ddb7aa42..edfcd1ced3 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -101,10 +101,10 @@
 # for "gap" allocation strategy
 # auxiliary.loc[:, 's_2'] = [0.4586, 0.0272, 0.3502, 0.1476, 0.0164]  # without historical scaling; "default" settings
 auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + main settings
-# auxiliary.loc[:, 'default_cons s_2'] = [0.4252, 0.0261, 0.3752, 0.1362, 0.0373]  # historical scaling + default_cons
-# auxiliary.loc[:, 'more_budget s_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + more_budget
-# auxiliary.loc[:, 'less_budget s_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + less_budget
-# auxiliary.loc[:, 'max hs function s_2'] = [0.5133, 0.0085, 0.2501, 0.1551, 0.073]  # historical scaling + less_budget
+# auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + more_budget; same as above
+# auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + less_budget; same as above
+# auxiliary.loc[:, 's_2'] = [0.4252, 0.0261, 0.3752, 0.1362, 0.0373]  # historical scaling + default_cons
+# auxiliary.loc[:, 's_2'] = [0.5133, 0.0085, 0.2501, 0.1551, 0.073]  # historical scaling + max_hs_function
 
 # define extra budget fracs for each cadre
 extra_budget_fracs = pd.DataFrame(index=cadre_all, columns=combination_list)
diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 7e08b9b746..9bdea355ee 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -67,7 +67,7 @@ def _get_scenarios(self) -> Dict[str, Dict]:
         self.YEAR_OF_HRH_EXPANSION = 2025
         # The start year to expand HRH by cadre given the extra budget, which is after the historical HRH scaling
 
-        self.scenarios = extra_budget_fracs['s_2'].to_frame()
+        self.scenarios = extra_budget_fracs.drop(columns='s_0')
         # Run 'gap' scenario that's based on "historical scaling" + baseline of baseline settings
 
         # Baseline settings for change
@@ -76,10 +76,10 @@ def _get_scenarios(self) -> Dict[str, Dict]:
         self.hs_function = [[False, False], [False, True]]
 
         self.baselines = {
-            'baseline': self._baseline_of_baseline(),  # test historical scaling changes first
+            # 'baseline': self._baseline_of_baseline(),  # test historical scaling changes first
             # 'default_cons': self._baseline_default_cons(),
-            # 'more_budget': self._baseline_more_budget(),  # turn off when run baseline scenarios with no expansion
-            # 'less_budget': self._baseline_less_budget(),
+            'more_budget': self._baseline_more_budget(),  # turn off when run baseline scenarios with no expansion
+            # 'less_budget': self._baseline_less_budget(),  # turn off when run baseline scenarios with no expansion
             # 'max_hs_function': self._baseline_max_hs_function(),
         }
 

From 101f6ce2d4846b0590762226dfa0cf819f006523 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 6 Dec 2024 23:28:52 +0000
Subject: [PATCH 203/218] run all non-gap strategies for less budget

---
 ...expanding_current_hcw_by_officer_type_with_extra_budget.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 9bdea355ee..8cd00afc0b 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -78,8 +78,8 @@ def _get_scenarios(self) -> Dict[str, Dict]:
         self.baselines = {
             # 'baseline': self._baseline_of_baseline(),  # test historical scaling changes first
             # 'default_cons': self._baseline_default_cons(),
-            'more_budget': self._baseline_more_budget(),  # turn off when run baseline scenarios with no expansion
-            # 'less_budget': self._baseline_less_budget(),  # turn off when run baseline scenarios with no expansion
+            # 'more_budget': self._baseline_more_budget(),  # turn off when run baseline scenarios with no expansion
+            'less_budget': self._baseline_less_budget(),  # turn off when run baseline scenarios with no expansion
             # 'max_hs_function': self._baseline_max_hs_function(),
         }
 

From dacdd77a1c4853c683b5d61b8338a500b5abf994 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 11 Dec 2024 00:07:31 +0000
Subject: [PATCH 204/218] calculate average yearly increase rate per cadre
 during 2025-2034

---
 .../prepare_minute_salary_and_extra_budget_frac_data.py          | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index edfcd1ced3..bd1abb8132 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -202,6 +202,7 @@ def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFram
 
 # get the staff increase rate: 2034 vs 2025
 hr_increase_rates_2034 = pd.DataFrame(integrated_scale_up_factor - 1.0)
+hr_avg_yearly_increase_rate = pd.DataFrame(integrated_scale_up_factor**(1/10) - 1.0)
 
 # Checked that for s_2, the integrated scale up factors of C/N/P cadres are comparable with shortage estimates from \
 # She et al 2024: https://human-resources-health.biomedcentral.com/articles/10.1186/s12960-024-00949-2

From b138a22f2e083bd356bfd2c4f5fd605ff79096b8 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 11 Dec 2024 10:07:40 +0000
Subject: [PATCH 205/218] reduce runs per draw to 5

---
 ...f_expanding_current_hcw_by_officer_type_with_extra_budget.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 8cd00afc0b..f8cee53117 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -35,7 +35,7 @@ def __init__(self):
         self.pop_size = 100_000
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 10
+        self.runs_per_draw = 5
 
     def log_configuration(self):
         return {

From bd271c2f7b6fecdee3d46b24c58969ec78ea9ba5 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Wed, 11 Dec 2024 23:43:09 +0000
Subject: [PATCH 206/218] run default_cons for sensitivity analysis

---
 .../prepare_minute_salary_and_extra_budget_frac_data.py       | 4 ++--
 ...expanding_current_hcw_by_officer_type_with_extra_budget.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index bd1abb8132..2821490079 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -100,10 +100,10 @@
     auxiliary.loc[:, i] = auxiliary.loc[:, i] / auxiliary.loc[:, i].sum()
 # for "gap" allocation strategy
 # auxiliary.loc[:, 's_2'] = [0.4586, 0.0272, 0.3502, 0.1476, 0.0164]  # without historical scaling; "default" settings
-auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + main settings
+# auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + main settings
 # auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + more_budget; same as above
 # auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + less_budget; same as above
-# auxiliary.loc[:, 's_2'] = [0.4252, 0.0261, 0.3752, 0.1362, 0.0373]  # historical scaling + default_cons
+auxiliary.loc[:, 's_2'] = [0.4252, 0.0261, 0.3752, 0.1362, 0.0373]  # historical scaling + default_cons
 # auxiliary.loc[:, 's_2'] = [0.5133, 0.0085, 0.2501, 0.1551, 0.073]  # historical scaling + max_hs_function
 
 # define extra budget fracs for each cadre
diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index f8cee53117..9e05667acf 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -77,9 +77,9 @@ def _get_scenarios(self) -> Dict[str, Dict]:
 
         self.baselines = {
             # 'baseline': self._baseline_of_baseline(),  # test historical scaling changes first
-            # 'default_cons': self._baseline_default_cons(),
+            'default_cons': self._baseline_default_cons(),
             # 'more_budget': self._baseline_more_budget(),  # turn off when run baseline scenarios with no expansion
-            'less_budget': self._baseline_less_budget(),  # turn off when run baseline scenarios with no expansion
+            # 'less_budget': self._baseline_less_budget(),  # turn off when run baseline scenarios with no expansion
             # 'max_hs_function': self._baseline_max_hs_function(),
         }
 

From c846eb63361b96e414ef56e06a2f35f216d0e8d2 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 12 Dec 2024 13:48:52 +0000
Subject: [PATCH 207/218] update analysis file to include budget growth rate

---
 ...alysis_hr_expandsion_by_officer_type_with_extra_budget.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 5d670b2e02..2b17061161 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -572,11 +572,12 @@ def format_time_by_cadre_treatment(_df):
     # check total cost calculated is increased as expected
     # also checked (in excel) that the yearly_hr_count (s_0 and s_1) are expanded as expected
     years = range(2025, the_target_period[1].year + 1)
+    budget_growth_rate = 0.042  # 0.042, 0.058, 0.026
     for s in param_names[1:]:
         assert (abs(
             total_cost.loc[(total_cost.year == 2034) & (total_cost.draw == s), 'all_cadres'].values[0] -
-            (1 + 0.042) ** len(years) * total_cost.loc[(total_cost.year == 2025) & (total_cost.draw == 's_0'),
-                                                       'all_cadres'].values[0]
+            (1 + budget_growth_rate) ** len(years) * total_cost.loc[
+                (total_cost.year == 2025) & (total_cost.draw == 's_0'), 'all_cadres'].values[0]
         ) < 1e-6).all()
 
     # Absolute Number of Deaths and DALYs and Services

From e0550bb543de70f3b3929b5467be02f85c6a4b85 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Sat, 14 Dec 2024 02:05:36 +0000
Subject: [PATCH 208/218] run max hs function for sensitivity analysis

---
 .../prepare_minute_salary_and_extra_budget_frac_data.py       | 4 ++--
 ...expanding_current_hcw_by_officer_type_with_extra_budget.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index 2821490079..75362e1ed1 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -103,8 +103,8 @@
 # auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + main settings
 # auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + more_budget; same as above
 # auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + less_budget; same as above
-auxiliary.loc[:, 's_2'] = [0.4252, 0.0261, 0.3752, 0.1362, 0.0373]  # historical scaling + default_cons
-# auxiliary.loc[:, 's_2'] = [0.5133, 0.0085, 0.2501, 0.1551, 0.073]  # historical scaling + max_hs_function
+# auxiliary.loc[:, 's_2'] = [0.4252, 0.0261, 0.3752, 0.1362, 0.0373]  # historical scaling + default_cons
+auxiliary.loc[:, 's_2'] = [0.5133, 0.0085, 0.2501, 0.1551, 0.073]  # historical scaling + max_hs_function
 
 # define extra budget fracs for each cadre
 extra_budget_fracs = pd.DataFrame(index=cadre_all, columns=combination_list)
diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index 9e05667acf..f008eaa7b6 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -77,10 +77,10 @@ def _get_scenarios(self) -> Dict[str, Dict]:
 
         self.baselines = {
             # 'baseline': self._baseline_of_baseline(),  # test historical scaling changes first
-            'default_cons': self._baseline_default_cons(),
+            # 'default_cons': self._baseline_default_cons(),
             # 'more_budget': self._baseline_more_budget(),  # turn off when run baseline scenarios with no expansion
             # 'less_budget': self._baseline_less_budget(),  # turn off when run baseline scenarios with no expansion
-            # 'max_hs_function': self._baseline_max_hs_function(),
+            'max_hs_function': self._baseline_max_hs_function(),
         }
 
         return {

From 25b85568d0a9717ca6d4b57ab3e1c12beb726582 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Sat, 14 Dec 2024 02:11:55 +0000
Subject: [PATCH 209/218] the 17th decimal of some cadre's minute salary has
 changed - due the same change to HRH capabilities

---
 resources/costing/Minute_Salary_HR.csv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources/costing/Minute_Salary_HR.csv b/resources/costing/Minute_Salary_HR.csv
index 0e248a312f..64fec2c8f1 100644
--- a/resources/costing/Minute_Salary_HR.csv
+++ b/resources/costing/Minute_Salary_HR.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:23d959dabe6cfc86ff2604a3a01298cccf00ef3587a15afaf4e487c06d3b9df0
+oid sha256:1731535fc81a7918dcaf6eceda21452999828515bb1b781c433361af6acd00e2
 size 35276

From 8177c0d8e7eda2af1952e9ca7dfd2f34b6394473 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 20 Dec 2024 00:52:09 +0000
Subject: [PATCH 210/218] use standard error for CI

---
 src/tlo/analysis/utils.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index 7895185d78..8b86a9ab21 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -18,6 +18,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+import scipy.stats as st
 import squarify
 
 from tlo import Date, Simulation, logging, util
@@ -364,6 +365,7 @@ def compute_summary_statistics(
     width_of_range: float = 0.95,
     only_central: bool = False,
     collapse_columns: bool = False,
+    use_standard_error: bool = False,
 ) -> pd.DataFrame:
     """Utility function to compute summary statistics
 
@@ -388,9 +390,16 @@ def compute_summary_statistics(
     else:
         raise ValueError(f"Unknown stat: {central_measure}")
 
-    lower_quantile = (1. - width_of_range) / 2.
-    stats["lower"] = grouped_results.quantile(lower_quantile)
-    stats["upper"] = grouped_results.quantile(1 - lower_quantile)
+    if not use_standard_error:
+        lower_quantile = (1. - width_of_range) / 2.
+        stats["lower"] = grouped_results.quantile(lower_quantile)
+        stats["upper"] = grouped_results.quantile(1 - lower_quantile)
+    else:
+        std_deviation = grouped_results.std()
+        std_error = std_deviation / np.sqrt(len(grouped_results))
+        z_value = st.norm.ppf(1 - (1. - width_of_range) / 2.)  # (import scipy.stats as st)
+        stats["lower"] = stats['central'] - z_value * std_error
+        stats["upper"] = stats['central'] + z_value * std_error
 
     summary = pd.concat(stats, axis=1)
     summary.columns = summary.columns.swaplevel(1, 0)
@@ -419,7 +428,8 @@ def compute_summary_statistics(
 def summarize(
     results: pd.DataFrame,
     only_mean: bool = False,
-    collapse_columns: bool = False
+    collapse_columns: bool = False,
+    use_standard_error: bool = True,
 ):
     """Utility function to compute summary statistics
 
@@ -440,6 +450,7 @@ def summarize(
         central_measure='mean',
         only_central=only_mean,
         collapse_columns=collapse_columns,
+        use_standard_error=use_standard_error,
     )
     if output.columns.nlevels > 1:
         output = output.rename(columns={'central': 'mean'}, level=1)  # rename 'central' to 'mean'

From 80a1acaa35ccfbf3b26c138e980b163bac2d1e1f Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Fri, 20 Dec 2024 00:52:28 +0000
Subject: [PATCH 211/218] update analysis script

---
 ...dsion_by_officer_type_with_extra_budget.py | 180 +++++++++++++-----
 ...inute_salary_and_extra_budget_frac_data.py |  34 +++-
 2 files changed, 161 insertions(+), 53 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 2b17061161..342ed9b073 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -19,7 +19,7 @@
 from scripts.healthsystem.impact_of_hcw_capabilities_expansion.prepare_minute_salary_and_extra_budget_frac_data import (
     Minute_Salary_by_Cadre_Level,
     extra_budget_fracs,
-    hr_increase_rates_2034,
+    avg_increase_rate_exp,
 )
 from scripts.healthsystem.impact_of_hcw_capabilities_expansion.scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget import (
     HRHExpansionByCadreWithExtraBudget,
@@ -247,7 +247,7 @@ def do_bar_plot_with_ci(_df, _df_percent=None, annotation=False):
 
         colors = [scenario_color[s] for s in _df.index]
 
-        fig, ax = plt.subplots(figsize=(18, 6))
+        fig, ax = plt.subplots(figsize=(21, 7))
         ax.bar(
             xticks.keys(),
             _df['mean'].values,
@@ -267,7 +267,7 @@ def do_bar_plot_with_ci(_df, _df_percent=None, annotation=False):
                                                        _df_percent['lower'].values,
                                                        _df_percent['upper'].values):
                 text = f"{int(round(text1 * 100, 2))}%\n{[round(text2, 2),round(text3, 2)]}"
-                ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize='xx-small')
+                ax.text(xpos, ypos * 1.05, text, horizontalalignment='center', fontsize='x-small')
 
         ax.set_xticks(list(xticks.keys()))
 
@@ -277,7 +277,7 @@ def do_bar_plot_with_ci(_df, _df_percent=None, annotation=False):
         legend_labels = list(scenario_groups[1].keys())
         legend_handles = [plt.Rectangle((0, 0), 1, 1,
                                         color=scenario_groups[1][label]) for label in legend_labels]
-        ax.legend(legend_handles, legend_labels, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5),
+        ax.legend(legend_handles, legend_labels, loc='center left', bbox_to_anchor=(1, 0.5),
                   title='Scenario groups')
 
         ax.grid(axis="y")
@@ -999,8 +999,15 @@ def hcw_time_or_cost_used(time_cost_df=appt_time, count_df=num_appts_by_level_su
         # reorder index to be consistent with descending order of DALYs averted
         use = use.reindex(num_dalys_summarized.index)
 
+        # add columns 'total' and 'other'
+        use['all'] = use.sum(axis=1)
+        use['Other'] = use[['Dental', 'Laboratory', 'Mental', 'Radiography']].sum(axis=1)
+        use.drop(columns=['Dental', 'Laboratory', 'Mental', 'Radiography'], inplace=True)
+
         use_increased = use.subtract(use.loc['s_0', :], axis=1).drop('s_0', axis=0)
 
+        use_increase_percent = use.subtract(use.loc['s_0', :], axis=1).divide(use.loc['s_0', :], axis=1).drop('s_0', axis=0)
+
         return use, use_increased
 
     hcw_time_used = hcw_time_or_cost_used(time_cost_df=appt_time)[0]
@@ -1049,6 +1056,9 @@ def hcw_time_or_cost_gap(time_cost_df=appt_time, count_df=num_never_ran_appts_by
     hcw_cost_gap_percent['Other'] = hcw_cost_gap_percent[
         ['Dental', 'Laboratory', 'Mental', 'Radiography']
     ].sum(axis=1)
+    hcw_cost_gap['Other'] = hcw_cost_gap[
+        ['Dental', 'Laboratory', 'Mental', 'Radiography']
+    ].sum(axis=1)
 
     # # store the proportions of no expansion scenario as the "best" scenario that is to be tested
     # hcw_cost_gap_percent_no_expansion = hcw_cost_gap_percent.loc[
@@ -1254,8 +1264,8 @@ def find_never_ran_appts_that_need_specific_cadres():
         ['Dental', 'Laboratory', 'Mental', 'Radiography']
     ].sum(axis=1)
     # prepare hrh increase rates in the same format for regression analysis
-    hr_increase_rates = hr_increase_rates_2034.T.reindex(num_dalys_summarized.index)
-    hr_increase_rates['Other'] = hr_increase_rates['Dental'].copy()
+    increase_rate_avg_exp = avg_increase_rate_exp.T.reindex(num_dalys_summarized.index)
+    increase_rate_avg_exp['Other'] = increase_rate_avg_exp['Dental'].copy()
 
     name_of_plot = f'3D DALYs averted (%) vs no extra budget allocation, {target_period()}'
     # name_of_plot = f'DALYs averted (%) vs no HCW expansion investment, {target_period()}'
@@ -1433,15 +1443,16 @@ def find_never_ran_appts_that_need_specific_cadres():
     # outcome_data = num_services_increased_percent['mean']
     # outcome_data = num_treatments_total_increased_percent['mean']
     regression_data = pd.merge(outcome_data,
-                               # hr_increase_rates,
-                               extra_budget_allocation,
+                               increase_rate_avg_exp,
+                               # extra_budget_allocation,
                                left_index=True, right_index=True, how='inner')
+    # regression_data.drop(index='s_2', inplace=True)
     # regression_data['C*P'] = regression_data['Clinical'] * regression_data['Pharmacy']
     # regression_data['C*N'] = regression_data['Clinical'] * regression_data['Nursing_and_Midwifery']
     # regression_data['N*P'] = regression_data['Pharmacy'] * regression_data['Nursing_and_Midwifery']
     # regression_data['C*N*P'] = (regression_data['Clinical'] * regression_data['Pharmacy']
-    #                             * regression_data['Nursing_and_Midwifery'])
-    cadres_to_drop_due_to_multicollinearity = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography', 'Other']
+    #                              * regression_data['Nursing_and_Midwifery'])
+    cadres_to_drop_due_to_multicollinearity = ['Dental', 'Laboratory', 'Mental', 'Nutrition', 'Radiography']
     regression_data.drop(columns=cadres_to_drop_due_to_multicollinearity, inplace=True)
     predictor = regression_data[regression_data.columns[1:]]
     outcome = regression_data['mean']
@@ -1449,6 +1460,40 @@ def find_never_ran_appts_that_need_specific_cadres():
     est = sm.OLS(outcome.astype(float), predictor.astype(float)).fit()
     print(est.summary())
 
+    # calculate the predicted DALYs based on the regression results
+    for i in regression_data.index:
+        regression_data.loc[i, 'predicted'] = (
+            regression_data.loc[i, ['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']].dot(
+                est.params[['Clinical', 'DCSA', 'Nursing_and_Midwifery', 'Pharmacy', 'Other']]
+            )
+            + est.params['const']
+        )
+
+    # plot mean and predicted DALYs from regression analysis
+    # name_of_plot = f'DALYs-averted simulated vs predicted from linear regression on extra budget allocation'
+    name_of_plot = f'DALYs-averted simulated vs predicted from linear regression on HRH increase rate (exp)'
+    fig, ax = plt.subplots(figsize=(9, 6))
+    data_to_plot = regression_data[['mean', 'predicted']] * 100
+    data_to_plot['strategy'] = data_to_plot.index
+    data_to_plot.rename(columns={'mean': 'simulated'}, inplace=True)
+    data_to_plot.plot.scatter(x='strategy', y='simulated', color='blue', label= 'simulated', ax=ax)
+    data_to_plot.plot.scatter(x='strategy', y='predicted', color='orange', label='predicted', ax=ax)
+    ax.set_ylabel('DALYs averted %', fontsize='small')
+    ax.set(xlabel=None)
+
+    xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
+    xtick_colors = [scenario_color[v] for v in data_to_plot.index]
+    for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+        xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')  # re-label scenarios
+
+    plt.legend(loc='upper right')
+    plt.title(name_of_plot)
+    fig.tight_layout()
+    fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
+    fig.show()
+    plt.close(fig)
+
     # todo: could do regression analysis of DALYs averted and Services increased
 
     # # do anova analysis to test the difference of scenario groups
@@ -1707,7 +1752,7 @@ def find_never_ran_appts_that_need_specific_cadres():
     # plt.close(fig)
 
     name_of_plot = f'HCW time used by cadre in delivering services , {target_period()}'
-    data_to_plot = (hcw_time_used / 1e6).reindex(num_dalys_summarized.index)
+    data_to_plot = (hcw_time_used.drop(columns='all') / 1e6).reindex(num_dalys_summarized.index)
     column_dcsa = data_to_plot.pop('DCSA')
     data_to_plot.insert(3, "DCSA", column_dcsa)
     fig, ax = plt.subplots(figsize=(9, 6))
@@ -1742,16 +1787,22 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'HCW cost needed by cadre to deliver never ran appointments, {target_period()}'
-    hcw_cost_gap_to_plot = (hcw_cost_gap / 1e6).reindex(num_dalys_summarized.index)
+    name_of_plot = f'HCW cost gap by cadre to deliver never ran appointments, {target_period()}'
+    cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other']
+    hcw_cost_gap_to_plot = (hcw_cost_gap[cadres_to_plot] / 1e6).reindex(num_dalys_summarized.index)
     column_dcsa = hcw_cost_gap_to_plot.pop('DCSA')
     hcw_cost_gap_to_plot.insert(3, "DCSA", column_dcsa)
     fig, ax = plt.subplots(figsize=(9, 6))
     hcw_cost_gap_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
     ax.set_ylabel('USD in Millions', fontsize='small')
     ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+
     xtick_labels = [substitute_labels[v] for v in hcw_cost_gap_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    xtick_colors = [scenario_color[v] for v in hcw_cost_gap_to_plot.index]
+    for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+        xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')  # re-label scenarios
+
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
                fontsize='small', reverse=True)
     plt.title(name_of_plot)
@@ -1781,15 +1832,20 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Cost proportions of never ran appointments that require specific cadres only, {target_period()}'
+    name_of_plot = f'HCW cost proportions of never ran appointments that require specific cadres only, {target_period()}'
     data_to_plot = p_cost * 100
-    fig, ax = plt.subplots(figsize=(12, 8))
+    fig, ax = plt.subplots(figsize=(9, 6))
     data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
-    ax.set_ylim(0, 100)
+    # ax.set_ylim(0, 100)
     ax.set_ylabel('Percentage %')
-    ax.set_xlabel('Extra budget allocation scenario')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+
     xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90)
+    xtick_colors = [scenario_color[v] for v in data_to_plot.index]
+    for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+        xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')  # re-label scenarios
+
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
     # # plot the average proportions of all scenarios
     # for c in data_to_plot.columns:
@@ -1823,14 +1879,19 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'Cost distribution of never ran appointments that require specific cadres only, {target_period()}'
+    name_of_plot = f'HCW cost of never ran appointments that require specific cadres only, {target_period()}'
     data_to_plot = a_cost / 1e6
-    fig, ax = plt.subplots(figsize=(12, 8))
+    fig, ax = plt.subplots(figsize=(9, 6))
     data_to_plot.plot(kind='bar', stacked=True, color=cadre_comb_color, rot=0, ax=ax)
     ax.set_ylabel('USD in millions')
-    ax.set_xlabel('Extra budget allocation scenario')
+    ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+
     xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90)
+    xtick_colors = [scenario_color[v] for v in data_to_plot.index]
+    for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+        xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')  # re-label scenarios
+
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Cadre combination', reverse=True)
     # # plot the average cost of all scenarios
     # for c in data_to_plot.columns:
@@ -1863,22 +1924,28 @@ def find_never_ran_appts_that_need_specific_cadres():
     fig.show()
     plt.close(fig)
 
-    name_of_plot = f'HCW cost gap by cadre distribution of never ran appointments, {target_period()}'
+    name_of_plot = f'HCW cost gap proportion by cadre to deliver never ran appointments, {target_period()}'
     cadres_to_plot = ['Clinical', 'Nursing_and_Midwifery', 'Pharmacy', 'DCSA', 'Other']
     hcw_cost_gap_percent_to_plot = hcw_cost_gap_percent[cadres_to_plot] * 100
-    fig, ax = plt.subplots(figsize=(12, 8))
-    hcw_cost_gap_percent_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    fig, ax = plt.subplots(figsize=(9, 6))
+    # hcw_cost_gap_percent_to_plot.plot(kind='bar', color=officer_category_color, rot=0, alpha=0.6, ax=ax)
+    hcw_cost_gap_percent_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
     #ax.set_ylim(0, 100)
     ax.set_ylabel('Percentage %')
     ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+
     xtick_labels = [substitute_labels[v] for v in hcw_cost_gap_percent_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90)
-    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category')
+    xtick_colors = [scenario_color[v] for v in hcw_cost_gap_percent_to_plot.index]
+    for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+        xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')  # re-label scenarios
+
+    plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', reverse=True)
     # plot the average proportions of all scenarios
-    for c in cadres_to_plot:
-        plt.axhline(y=hcw_cost_gap_percent_to_plot[c].mean(),
-                    linestyle='--', color=officer_category_color[c], alpha=1.0, linewidth=2,
-                    label=c)
+    # for c in cadres_to_plot:
+    #     plt.axhline(y=hcw_cost_gap_percent_to_plot[c].mean(),
+    #                 linestyle='--', color=officer_category_color[c], alpha=1.0, linewidth=2,
+    #                 label=c)
     plt.title(name_of_plot)
     fig.tight_layout()
     fig.savefig(make_graph_file_name(name_of_plot.replace(' ', '_').replace(',', '')))
@@ -2217,18 +2284,23 @@ def find_never_ran_appts_that_need_specific_cadres():
 
     name_of_plot = f'Services increased by treatment type \nvs no extra budget allocation, {target_period()}'
     data_to_plot = num_treatments_increased / 1e6
-    yerr_services = np.array([
-        (num_treatments_total_increased['mean'] - num_treatments_total_increased['lower']).values,
-        (num_treatments_total_increased['upper'] - num_treatments_total_increased['mean']).values,
-    ]) / 1e6
+    # yerr_services = np.array([
+    #     (num_treatments_total_increased['mean'] - num_treatments_total_increased['lower']).values,
+    #     (num_treatments_total_increased['upper'] - num_treatments_total_increased['mean']).values,
+    # ]) / 1e6
     fig, ax = plt.subplots(figsize=(10, 6))
     data_to_plot.plot(kind='bar', stacked=True, color=treatment_color, rot=0, ax=ax)
-    ax.errorbar(range(len(param_names)-1), num_treatments_total_increased['mean'].values / 1e6, yerr=yerr_services,
-                fmt=".", color="black", zorder=100)
+    # ax.errorbar(range(len(param_names)-1), num_treatments_total_increased['mean'].values / 1e6, yerr=yerr_services,
+    #             fmt=".", color="black", zorder=100)
     ax.set_ylabel('Millions', fontsize='small')
     ax.set(xlabel=None)
+
     xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    xtick_colors = [scenario_color[v] for v in data_to_plot.index]
+    for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+        xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')  # re-label scenarios
+
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.4), title='Treatment type', title_fontsize='small',
                fontsize='small', reverse=True)
     plt.title(name_of_plot)
@@ -2258,15 +2330,20 @@ def find_never_ran_appts_that_need_specific_cadres():
     plt.close(fig)
 
     name_of_plot = f'HCW time-used increased by cadre \nvs no extra budget allocation, {target_period()}'
-    data_to_plot = hcw_time_increased_by_cadre / 1e6
+    data_to_plot = hcw_time_increased_by_cadre.drop(columns='all') / 1e6
     column_dcsa = data_to_plot.pop('DCSA')
     data_to_plot.insert(3, "DCSA", column_dcsa)
     fig, ax = plt.subplots(figsize=(9, 6))
     data_to_plot.plot(kind='bar', stacked=True, color=officer_category_color, rot=0, ax=ax)
-    ax.set_ylabel('Millions', fontsize='small')
+    ax.set_ylabel('Millions minutes', fontsize='small')
     ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+
     xtick_labels = [substitute_labels[v] for v in data_to_plot.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    xtick_colors = [scenario_color[v] for v in data_to_plot.index]
+    for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+        xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')  # re-label scenarios
+
     plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), title='Officer category', title_fontsize='small',
                fontsize='small', reverse=True)
     plt.title(name_of_plot)
@@ -2279,18 +2356,23 @@ def find_never_ran_appts_that_need_specific_cadres():
 
     name_of_plot = f'DALYs by cause averted vs no extra budget allocation, {target_period()}'
     num_dalys_by_cause_averted_in_millions = num_dalys_by_cause_averted / 1e6
-    yerr_dalys = np.array([
-        (num_dalys_averted['mean'] - num_dalys_averted['lower']).values,
-        (num_dalys_averted['upper'] - num_dalys_averted['mean']).values,
-    ]) / 1e6
+    # yerr_dalys = np.array([
+    #     (num_dalys_averted['mean'] - num_dalys_averted['lower']).values,
+    #     (num_dalys_averted['upper'] - num_dalys_averted['mean']).values,
+    # ]) / 1e6
     fig, ax = plt.subplots(figsize=(9, 6))
     num_dalys_by_cause_averted_in_millions.plot(kind='bar', stacked=True, color=cause_color, rot=0, ax=ax)
-    ax.errorbar(range(len(param_names)-1), num_dalys_averted['mean'].values / 1e6, yerr=yerr_dalys,
-                fmt=".", color="black", zorder=100)
+    # ax.errorbar(range(len(param_names)-1), num_dalys_averted['mean'].values / 1e6, yerr=yerr_dalys,
+    #             fmt=".", color="black", zorder=100)
     ax.set_ylabel('Millions', fontsize='small')
     ax.set_xlabel('Extra budget allocation scenario', fontsize='small')
+
     xtick_labels = [substitute_labels[v] for v in num_dalys_by_cause_averted.index]
-    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')
+    xtick_colors = [scenario_color[v] for v in num_dalys_by_cause_averted.index]
+    for xtick, color in zip(ax.get_xticklabels(), xtick_colors):
+        xtick.set_color(color)  # color scenarios based on the group info
+    ax.set_xticklabels(xtick_labels, rotation=90, fontsize='small')  # re-label scenarios
+
     fig.subplots_adjust(right=0.7)
     ax.legend(
         loc="center left",
diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index 75362e1ed1..63e905f9b0 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -100,11 +100,11 @@
     auxiliary.loc[:, i] = auxiliary.loc[:, i] / auxiliary.loc[:, i].sum()
 # for "gap" allocation strategy
 # auxiliary.loc[:, 's_2'] = [0.4586, 0.0272, 0.3502, 0.1476, 0.0164]  # without historical scaling; "default" settings
-# auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + main settings
+auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + main settings
 # auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + more_budget; same as above
 # auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + less_budget; same as above
 # auxiliary.loc[:, 's_2'] = [0.4252, 0.0261, 0.3752, 0.1362, 0.0373]  # historical scaling + default_cons
-auxiliary.loc[:, 's_2'] = [0.5133, 0.0085, 0.2501, 0.1551, 0.073]  # historical scaling + max_hs_function
+# auxiliary.loc[:, 's_2'] = [0.5133, 0.0085, 0.2501, 0.1551, 0.073]  # historical scaling + max_hs_function
 
 # define extra budget fracs for each cadre
 extra_budget_fracs = pd.DataFrame(index=cadre_all, columns=combination_list)
@@ -164,12 +164,14 @@ def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFram
     new_data = prev_data[['Annual_Salary_USD', 'scale_up_factor']].copy()
     new_data['Staff_Count'] = prev_data.Staff_Count + prev_data.extra_staff
     new_data['annual_cost'] = prev_data.annual_cost + prev_data.extra_budget
+    new_data['increase_rate'] = new_data['scale_up_factor'] - 1.0
 
     return new_data
 
 
 # calculate scale up factors for all defined scenarios and years
 staff_cost['scale_up_factor'] = 1
+staff_cost['increase_rate'] = 0.0
 scale_up_factor_dict = {s: {y: {} for y in range(2025, 2035)} for s in extra_budget_fracs.columns}
 for s in extra_budget_fracs.columns:
     # for the initial/current year of 2024
@@ -200,9 +202,33 @@ def calculate_hr_scale_up_factor(extra_budget_frac, yr, scenario) -> pd.DataFram
             scale_up_factor_dict[s][yr].loc[:, 'scale_up_factor'].values
         )
 
+# get normal average increase rate over all years
+sum_increase_rate = pd.DataFrame(index=cadre_all, columns=total_cost.columns).fillna(0.0)
+for s in total_cost.columns[1:]:
+    for yr in range(2025, 2035):
+        sum_increase_rate.loc[:, s] = (
+            sum_increase_rate.loc[:, s].values +
+            scale_up_factor_dict[s][yr].loc[:, 'increase_rate'].values
+        )
+avg_increase_rate = pd.DataFrame(sum_increase_rate / 10)
+
 # get the staff increase rate: 2034 vs 2025
-hr_increase_rates_2034 = pd.DataFrame(integrated_scale_up_factor - 1.0)
-hr_avg_yearly_increase_rate = pd.DataFrame(integrated_scale_up_factor**(1/10) - 1.0)
+increase_rate_2034 = pd.DataFrame(integrated_scale_up_factor - 1.0)
+avg_increase_rate_exp = pd.DataFrame(integrated_scale_up_factor**(1/10) - 1.0)
+
+
+def func_of_avg_increase_rate(cadre, scenario='s_2', r=0.042):
+    """
+    This return the average growth rate of the staff of a cadre from 2025 to 2034.
+    The total HRH cost growth rate is r.
+    """
+    overall_scale_up = 1 + (staff_cost.annual_cost.sum()
+                            * extra_budget_fracs.loc[cadre, scenario]
+                            / staff_cost.loc[cadre, 'annual_cost']
+                            * ((1+r)**10 - 1)
+                            )
+
+    return overall_scale_up ** (1/10) - 1.0
 
 # Checked that for s_2, the integrated scale up factors of C/N/P cadres are comparable with shortage estimates from \
 # She et al 2024: https://human-resources-health.biomedcentral.com/articles/10.1186/s12960-024-00949-2

From 99220bc44a7c01ab5e7e349ae35c1af3a2f58ff7 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Mon, 6 Jan 2025 10:42:46 +0000
Subject: [PATCH 212/218] try plot 3D-plot on avg. increase rate

---
 ...nalysis_hr_expandsion_by_officer_type_with_extra_budget.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 342ed9b073..e40d43bc4d 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -1268,9 +1268,10 @@ def find_never_ran_appts_that_need_specific_cadres():
     increase_rate_avg_exp['Other'] = increase_rate_avg_exp['Dental'].copy()
 
     name_of_plot = f'3D DALYs averted (%) vs no extra budget allocation, {target_period()}'
-    # name_of_plot = f'DALYs averted (%) vs no HCW expansion investment, {target_period()}'
+    # name_of_plot = f'DALYs averted (%) vs no HCW expansion investment (avg. HCW increase rate), {target_period()}'
     heat_data = pd.merge(num_dalys_averted_percent['mean'],
                          extra_budget_allocation[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
+                         # increase_rate_avg_exp[['Clinical', 'Pharmacy', 'Nursing_and_Midwifery']],
                          left_index=True, right_index=True, how='inner')
     # scenarios_with_CNP_only = ['s_4', 's_6', 's_7', 's_10', 's_11', 's_16', 's_22']
     # heat_data = heat_data.loc[heat_data.index.isin(scenarios_with_CNP_only)]
@@ -1295,6 +1296,7 @@ def find_never_ran_appts_that_need_specific_cadres():
               [heat_data['Nursing_and_Midwifery'][0], heat_data['Nursing_and_Midwifery'][0]],
               linestyle='--', color='gray', alpha=0.8)
     ax.set_xlabel('Fraction of extra budget allocated to \nClinical cadre', fontsize='small')
+    # ax.set_xlabel('Avg. annual increase rate of \nClinical cadre', fontsize='small')
     ax.set_ylabel('Pharmacy cadre', fontsize='small')
     #ax.invert_xaxis()
     ax.invert_yaxis()

From 817d496053f15932d34d16bbcf4f2824d240fa30 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Thu, 9 Jan 2025 15:21:20 +0000
Subject: [PATCH 213/218] add more info to staff cost 2024

---
 ...re_minute_salary_and_extra_budget_frac_data.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index 63e905f9b0..b725dd02ed 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -230,6 +230,21 @@ def func_of_avg_increase_rate(cadre, scenario='s_2', r=0.042):
 
     return overall_scale_up ** (1/10) - 1.0
 
+
+# prepare 2024 cost info for Other cadre and Total
+extra_rows = pd.DataFrame(columns=staff_cost.columns, index=['Other', 'Total'])
+staff_cost = pd.concat([staff_cost, extra_rows], axis=0)
+staff_cost.loc['Other', 'annual_cost'] = staff_cost.loc[staff_cost.index.isin(other_group), 'annual_cost'].sum()
+staff_cost.loc['Total', 'annual_cost'] = staff_cost.loc[staff_cost.index.isin(cadre_all), 'annual_cost'].sum()
+staff_cost.loc['Other', 'Staff_Count'] = staff_cost.loc[staff_cost.index.isin(other_group), 'Staff_Count'].sum()
+staff_cost.loc['Total', 'Staff_Count'] = staff_cost.loc[staff_cost.index.isin(cadre_all), 'Staff_Count'].sum()
+staff_cost.loc['Other', 'cost_frac'] = (staff_cost.loc['Other', 'annual_cost']
+                                        / staff_cost.loc[staff_cost.index.isin(cadre_all), 'annual_cost'].sum())
+staff_cost.loc['Total', 'cost_frac'] = (staff_cost.loc['Total', 'annual_cost']
+                                        / staff_cost.loc[staff_cost.index.isin(cadre_all), 'annual_cost'].sum())
+staff_cost.annual_cost = staff_cost.annual_cost.astype(str)
+staff_cost.cost_frac = staff_cost.cost_frac.astype(str)
+
 # Checked that for s_2, the integrated scale up factors of C/N/P cadres are comparable with shortage estimates from \
 # She et al 2024: https://human-resources-health.biomedcentral.com/articles/10.1186/s12960-024-00949-2
 # C: 2.21, N: 1.44, P: 4.14 vs C: 2.83, N: 1.57, P:6.37

From 10436018f7770db93bbb54334de14df835bbb7cf Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 14 Jan 2025 10:05:23 +0000
Subject: [PATCH 214/218] add in optimal strategies

---
 ...inute_salary_and_extra_budget_frac_data.py | 28 +++++++++++--------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
index b725dd02ed..084d6b2213 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/prepare_minute_salary_and_extra_budget_frac_data.py
@@ -75,6 +75,7 @@
 # to be matched with Margherita's 4.2% scenario.
 # Add in the scenario that is indicated by hcw cost gap distribution \
 # resulted from never ran services in no expansion scenario, "s_2"
+# Add in the scenario that is indicated by the regression analysis of all other scenarios, "s_*"
 # Define all other scenarios so that the extra budget fraction of each cadre, \
 # i.e., four main cadres and the "Other" cadre that groups up all other cadres, is the same (fair allocation)
 
@@ -86,17 +87,19 @@
 for n in range(1, len(cadre_group)+1):
     for subset in itertools.combinations(cadre_group, n):
         combination_list.append(str(subset))  # other equal-fraction scenarios
+# add in "s_*" in the end
+combination_list.append('s_*')
 
 # cadre groups to expand
 cadre_to_expand = pd.DataFrame(index=cadre_group, columns=combination_list).fillna(0.0)
 for c in cadre_group:
-    for i in cadre_to_expand.columns[3:]:  # for all equal-fraction scenarios
+    for i in cadre_to_expand.columns[3:len(combination_list) - 1]:  # for all equal-fraction scenarios
         if c in i:
             cadre_to_expand.loc[c, i] = 1  # value 1 indicate the cadre group will be expanded
 
 # prepare auxiliary dataframe for equal extra budget fractions scenarios
 auxiliary = cadre_to_expand.copy()
-for i in auxiliary.columns[3:]:  # for all equal-fraction scenarios
+for i in auxiliary.columns[3:len(combination_list) - 1]:  # for all equal-fraction scenarios
     auxiliary.loc[:, i] = auxiliary.loc[:, i] / auxiliary.loc[:, i].sum()
 # for "gap" allocation strategy
 # auxiliary.loc[:, 's_2'] = [0.4586, 0.0272, 0.3502, 0.1476, 0.0164]  # without historical scaling; "default" settings
@@ -105,6 +108,12 @@
 # auxiliary.loc[:, 's_2'] = [0.4314, 0.0214, 0.3701, 0.1406, 0.0365]  # historical scaling + less_budget; same as above
 # auxiliary.loc[:, 's_2'] = [0.4252, 0.0261, 0.3752, 0.1362, 0.0373]  # historical scaling + default_cons
 # auxiliary.loc[:, 's_2'] = [0.5133, 0.0085, 0.2501, 0.1551, 0.073]  # historical scaling + max_hs_function
+# for "optimal" allocation strategy
+auxiliary.loc[:, 's_*'] = [0.6068, 0.0, 0.0830, 0.2496, 0.0606]  # historical scaling + main settings
+# auxiliary.loc[:, 's_*'] = [0.5827, 0.0, 0.1083, 0.2409, 0.0681]  # historical scaling + more_budget; same as above
+# auxiliary.loc[:, 's_*'] = [0.5981, 0.0, 0.0902, 0.2649, 0.0468]  # historical scaling + less_budget; same as above
+# auxiliary.loc[:, 's_*'] = [0.6109, 0.0, 0.1494, 0.2033, 0.0364]  # historical scaling + default_cons
+# auxiliary.loc[:, 's_*'] = [0.5430, 0.0, 0.3631, 0.0939, 0.0]  # historical scaling + max_hs_function
 
 # define extra budget fracs for each cadre
 extra_budget_fracs = pd.DataFrame(index=cadre_all, columns=combination_list)
@@ -129,13 +138,14 @@
 
 # rename scenarios
 # make the scenario of equal fracs for all five cadre groups (i.e., the last column) to be s_3
-simple_scenario_name = {extra_budget_fracs.columns[-1]: 's_3'}
-for i in range(3, len(extra_budget_fracs.columns)-1):
-    simple_scenario_name[extra_budget_fracs.columns[i]] = 's_' + str(i+1)  # name scenario from s_4
+simple_scenario_name = {extra_budget_fracs.columns[-2]: 's_3'}
+for i in range(3, len(extra_budget_fracs.columns)-2):
+    simple_scenario_name[extra_budget_fracs.columns[i]] = 's_' + str(i+1)  # name scenario from s_4 to s_33
 extra_budget_fracs.rename(columns=simple_scenario_name, inplace=True)
 
 # reorder columns
-col_order = ['s_' + str(i) for i in range(0, len(extra_budget_fracs.columns))]
+col_order = ['s_' + str(i) for i in range(0, len(extra_budget_fracs.columns) - 1)]
+col_order += ['s_*']
 assert len(col_order) == len(extra_budget_fracs.columns)
 extra_budget_fracs = extra_budget_fracs.reindex(columns=col_order)
 
@@ -245,12 +255,6 @@ def func_of_avg_increase_rate(cadre, scenario='s_2', r=0.042):
 staff_cost.annual_cost = staff_cost.annual_cost.astype(str)
 staff_cost.cost_frac = staff_cost.cost_frac.astype(str)
 
-# Checked that for s_2, the integrated scale up factors of C/N/P cadres are comparable with shortage estimates from \
-# She et al 2024: https://human-resources-health.biomedcentral.com/articles/10.1186/s12960-024-00949-2
-# C: 2.21, N: 1.44, P: 4.14 vs C: 2.83, N: 1.57, P:6.37
-# todo: This might provide a short-cut way (no simulation, but mathematical calculation) to calculate \
-# an extra budget allocation scenario 's_2+' that is comparable with s_2.
-
 # # save and read pickle file
 # pickle_file_path = Path(resourcefilepath / 'healthsystem' / 'human_resources' / 'scaling_capabilities' /
 #                         'ResourceFile_HR_expansion_by_officer_type_yearly_scale_up_factors.pickle')

From cd7520d0baab5b6d9b44559f7b438542e4d851ec Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 14 Jan 2025 15:11:35 +0000
Subject: [PATCH 215/218] comments on DALYs causes and treatment types grouping

---
 ...dsion_by_officer_type_with_extra_budget.py | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index e40d43bc4d..78b719b288 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -52,6 +52,26 @@
     's_33': 'P = N&M = D = O',
 }
 
+# grouping causes of DALYs and types of treatments
+# cause_group = {
+#     'HIV':,
+#     'TB':,
+#     'malaria':,
+#     'RMNCH':,
+#     'NCDs':'',
+#     'Other':'',
+# }
+# treatment_group = {
+#     'HIV':,
+#     'TB':,
+#     'malaria':,
+#      'RMNCH':,
+#      'NCDs': ,
+#      'Other': ,
+# }
+# cause_group_color = {
+#
+# }
 
 def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None,
           the_target_period: Tuple[Date, Date] = None):

From 1b64e4cf7433f4e81c1546fe6926167ca62da619 Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 14 Jan 2025 15:12:25 +0000
Subject: [PATCH 216/218] prepare the scenario script to run the "optimal"
 strategy

---
 ...nding_current_hcw_by_officer_type_with_extra_budget.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
index f008eaa7b6..4b197010ae 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget.py
@@ -67,8 +67,8 @@ def _get_scenarios(self) -> Dict[str, Dict]:
         self.YEAR_OF_HRH_EXPANSION = 2025
         # The start year to expand HRH by cadre given the extra budget, which is after the historical HRH scaling
 
-        self.scenarios = extra_budget_fracs.drop(columns='s_0')
-        # Run 'gap' scenario that's based on "historical scaling" + baseline of baseline settings
+        self.scenarios = extra_budget_fracs['s_*'].to_frame()
+        # Run 'optimal' scenario for main analysis
 
         # Baseline settings for change
         self.cons_availability = ['all', 'default']
@@ -76,11 +76,11 @@ def _get_scenarios(self) -> Dict[str, Dict]:
         self.hs_function = [[False, False], [False, True]]
 
         self.baselines = {
-            # 'baseline': self._baseline_of_baseline(),  # test historical scaling changes first
+            'baseline': self._baseline_of_baseline(),  # test historical scaling changes first
             # 'default_cons': self._baseline_default_cons(),
             # 'more_budget': self._baseline_more_budget(),  # turn off when run baseline scenarios with no expansion
             # 'less_budget': self._baseline_less_budget(),  # turn off when run baseline scenarios with no expansion
-            'max_hs_function': self._baseline_max_hs_function(),
+            # 'max_hs_function': self._baseline_max_hs_function(),
         }
 
         return {

From 1d9621308426386f481a9073e68a46623786cc9c Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 14 Jan 2025 15:31:09 +0000
Subject: [PATCH 217/218] fix checks errors

---
 ...nalysis_hr_expandsion_by_officer_type_with_extra_budget.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 78b719b288..0e9e64fc15 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -1026,7 +1026,7 @@ def hcw_time_or_cost_used(time_cost_df=appt_time, count_df=num_appts_by_level_su
 
         use_increased = use.subtract(use.loc['s_0', :], axis=1).drop('s_0', axis=0)
 
-        use_increase_percent = use.subtract(use.loc['s_0', :], axis=1).divide(use.loc['s_0', :], axis=1).drop('s_0', axis=0)
+        # use_increase_percent = use.subtract(use.loc['s_0', :], axis=1).divide(use.loc['s_0', :], axis=1).drop('s_0', axis=0)
 
         return use, use_increased
 
@@ -1493,7 +1493,7 @@ def find_never_ran_appts_that_need_specific_cadres():
 
     # plot mean and predicted DALYs from regression analysis
     # name_of_plot = f'DALYs-averted simulated vs predicted from linear regression on extra budget allocation'
-    name_of_plot = f'DALYs-averted simulated vs predicted from linear regression on HRH increase rate (exp)'
+    name_of_plot = 'DALYs-averted simulated vs predicted from linear regression on HRH increase rate (exp)'
     fig, ax = plt.subplots(figsize=(9, 6))
     data_to_plot = regression_data[['mean', 'predicted']] * 100
     data_to_plot['strategy'] = data_to_plot.index

From 4a9a5e625d7c0928c3e41a2bc65c8ce9da698f4d Mon Sep 17 00:00:00 2001
From: Bingling <b.she@imperial.ac.uk>
Date: Tue, 14 Jan 2025 15:40:00 +0000
Subject: [PATCH 218/218] fix checks errors

---
 .../analysis_hr_expandsion_by_officer_type_with_extra_budget.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
index 0e9e64fc15..7f1e89e5ea 100644
--- a/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
+++ b/src/scripts/healthsystem/impact_of_hcw_capabilities_expansion/analysis_hr_expandsion_by_officer_type_with_extra_budget.py
@@ -18,8 +18,8 @@
 
 from scripts.healthsystem.impact_of_hcw_capabilities_expansion.prepare_minute_salary_and_extra_budget_frac_data import (
     Minute_Salary_by_Cadre_Level,
-    extra_budget_fracs,
     avg_increase_rate_exp,
+    extra_budget_fracs,
 )
 from scripts.healthsystem.impact_of_hcw_capabilities_expansion.scenario_of_expanding_current_hcw_by_officer_type_with_extra_budget import (
     HRHExpansionByCadreWithExtraBudget,