From 7b8c3be3cbc56cdf696feb9839c058ff5710d4d4 Mon Sep 17 00:00:00 2001
From: Joe Moorhouse <5102656+joemoorhouse@users.noreply.github.com>
Date: Wed, 18 Dec 2024 20:54:27 +0000
Subject: [PATCH] Support historical scenario for score-based risk measures.

Signed-off-by: Joe Moorhouse <5102656+joemoorhouse@users.noreply.github.com>
---
 pyproject.toml                                |  2 +-
 .../data/pregenerated_hazard_model.py         | 12 ++++---
 src/physrisk/kernel/risk.py                   | 34 ++++++++++---------
 src/physrisk/requests.py                      |  2 +-
 tests/risk_models/risk_models_test.py         |  6 ++--
 5 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index f6ca5f17..f1889256 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "physrisk-lib"
 # Could test changing the below to be sourced "dynamically"
 # dynamic = ['version']
-version = "0.41.0"
+version = "0.42.0"
 description = "OS-Climate Physical Risk Library"
 authors = [
     {name = "Joe Moorhouse",email = "5102656+joemoorhouse@users.noreply.github.com"},
diff --git a/src/physrisk/data/pregenerated_hazard_model.py b/src/physrisk/data/pregenerated_hazard_model.py
index 671dce1b..a7fe1b84 100644
--- a/src/physrisk/data/pregenerated_hazard_model.py
+++ b/src/physrisk/data/pregenerated_hazard_model.py
@@ -138,19 +138,21 @@ def _get_hazard_data_batch(
                     )
         except Exception as err:
             # e.g. the requested data is unavailable
-            for _i, req in enumerate(batch):
+            for _, req in enumerate(batch):
                 failed_response = HazardDataFailedResponse(err)
                 responses[req] = failed_response
                 failures.append(failed_response)
 
         if any(failures):
-            logger.error(
-                f"{len(failures)} errors in batch (hazard_type={hazard_type.__name__}, indicator_id={indicator_id}, "
+            # only a warning: perhaps the caller does not expect data to be present for all
+            # year/scenario combinations.
+            logger.warning(
+                f"{len(failures)} requests failed in batch (hazard_type={hazard_type.__name__}, indicator_id={indicator_id}, "
                 f"scenario={scenario}, year={year}): (logs limited to first 3)"
             )
             errors = (str(i.error) for i in failures)
-            for _ in range(3):
-                logger.error(next(errors))
+            for _ in range(min(len(failures), 3)):
+                logger.warning(next(errors))
         return
 
 
diff --git a/src/physrisk/kernel/risk.py b/src/physrisk/kernel/risk.py
index c4712d97..a1730ee6 100644
--- a/src/physrisk/kernel/risk.py
+++ b/src/physrisk/kernel/risk.py
@@ -103,7 +103,7 @@ def _calculate_single_impact(
 class MeasureKey(NamedTuple):
     asset: Asset
     prosp_scen: str  # prospective scenario
-    year: int
+    year: Optional[int]
     hazard_type: type
 
 
@@ -238,10 +238,10 @@ def get_measure_id(
         return measure_ids_for_hazard, measure_id_lookup
 
     def calculate_risk_measures(
-        self, assets: Sequence[Asset], prosp_scens: Sequence[str], years: Sequence[int]
+        self, assets: Sequence[Asset], scenarios: Sequence[str], years: Sequence[int]
     ):
         impacts = self._calculate_all_impacts(
-            assets, prosp_scens, years, include_histo=True
+            assets, scenarios, years, include_histo=True
         )
         measures: Dict[MeasureKey, Measure] = {}
         aggregated_measures: Dict[MeasureKey, Measure] = {}
@@ -249,8 +249,8 @@ def calculate_risk_measures(
             if type(asset) not in self._measure_calculators:
                 continue
             measure_calc = self._measure_calculators[type(asset)]
-            for prosp_scen in prosp_scens:
-                for year in years:
+            for scenario in scenarios:
+                for year in [None] if scenario == "historical" else years:
                     for hazard_type in measure_calc.supported_hazards():
                         base_impacts = impacts.get(
                             ImpactKey(
@@ -260,20 +260,24 @@ def calculate_risk_measures(
                                 key_year=None,
                             )
                         )
-                        prosp_impacts = impacts.get(
+                        # the future impact might also be the historical if that is also specified
+                        fut_impacts = impacts.get(
                             ImpactKey(
                                 asset=asset,
                                 hazard_type=hazard_type,
-                                scenario=prosp_scen,
+                                scenario=scenario,
                                 key_year=year,
                             )
                         )
+                        if base_impacts is None or fut_impacts is None:
+                            # should only happen if we are working with limited hazard scope
+                            continue
                         risk_inds = [
                             measure_calc.calc_measure(
                                 hazard_type, base_impact, prosp_impact
                             )
                             for base_impact, prosp_impact in zip(
-                                base_impacts, prosp_impacts
+                                base_impacts, fut_impacts
                             )
                         ]
                         risk_ind = [
@@ -281,12 +285,10 @@ def calculate_risk_measures(
                         ]
                         if len(risk_ind) > 0:
                             # TODO: Aggregate  measures instead of picking the first value.
-                            measures[
-                                MeasureKey(asset, prosp_scen, year, hazard_type)
-                            ] = risk_ind[0]
-            aggregated_measures.update(
-                measure_calc.aggregate_risk_measures(
-                    measures, assets, prosp_scens, years
-                )
-            )
+                            measures[MeasureKey(asset, scenario, year, hazard_type)] = (
+                                risk_ind[0]
+                            )
+        aggregated_measures.update(
+            measure_calc.aggregate_risk_measures(measures, assets, scenarios, years)
+        )
         return impacts, aggregated_measures
diff --git a/src/physrisk/requests.py b/src/physrisk/requests.py
index 59624ab6..5a34adbb 100644
--- a/src/physrisk/requests.py
+++ b/src/physrisk/requests.py
@@ -556,7 +556,7 @@ def _create_risk_measures(
     measures_for_assets: List[RiskMeasuresForAssets] = []
     for hazard_type in hazard_types:
         for scenario_id in scenarios:
-            for year in years:
+            for year in [None] if scenario_id == "historical" else years:
                 # we calculate and tag results for each scenario, year and hazard
                 score_key = RiskMeasureKey(
                     hazard_type=hazard_type.__name__,
diff --git a/tests/risk_models/risk_models_test.py b/tests/risk_models/risk_models_test.py
index 2eee66bd..cd3dd53f 100644
--- a/tests/risk_models/risk_models_test.py
+++ b/tests/risk_models/risk_models_test.py
@@ -64,7 +64,7 @@ def test_risk_indicator_model(self):
         )
         measure_ids_for_asset, definitions = model.populate_measure_definitions(assets)
         _, measures = model.calculate_risk_measures(
-            assets, prosp_scens=scenarios, years=years
+            assets, scenarios=scenarios, years=years
         )
 
         # how to get a score using the MeasureKey
@@ -293,7 +293,7 @@ def sp_precipitation(scenario, year):
         )
 
     def test_via_requests(self):
-        scenarios = ["ssp585"]
+        scenarios = ["ssp585", "historical"]
         years = [2050]
 
         assets = self._create_assets()
@@ -386,7 +386,7 @@ def test_generic_model(self):
         )
         measure_ids_for_asset, definitions = model.populate_measure_definitions(assets)
         _, measures = model.calculate_risk_measures(
-            assets, prosp_scens=scenarios, years=years
+            assets, scenarios=scenarios, years=years
         )
         np.testing.assert_approx_equal(
             measures[MeasureKey(assets[0], scenarios[0], years[0], Wind)].measure_0,