From 776a20670f8fa631812484f1db70c109b69d6766 Mon Sep 17 00:00:00 2001
From: Daniel Young <danyoung@utexas.edu>
Date: Wed, 26 Jun 2024 09:51:06 -0700
Subject: [PATCH] got presc experiments to work with new presc manager

---
 .../experiments/prescriptor_experiments.ipynb | 117 +++++++++---------
 1 file changed, 60 insertions(+), 57 deletions(-)

diff --git a/use_cases/eluc/experiments/prescriptor_experiments.ipynb b/use_cases/eluc/experiments/prescriptor_experiments.ipynb
index 6fb9059..9106777 100644
--- a/use_cases/eluc/experiments/prescriptor_experiments.ipynb
+++ b/use_cases/eluc/experiments/prescriptor_experiments.ipynb
@@ -26,12 +26,13 @@
     "\n",
     "from data import constants\n",
     "from data.eluc_data import ELUCData\n",
-    "from data.eluc_encoder import ELUCEncoder\n",
+    "from persistence.serializers.neural_network_serializer import NeuralNetSerializer\n",
     "from prescriptors.nsga2.candidate import Candidate\n",
     "from prescriptors.nsga2.land_use_prescriptor import LandUsePrescriptor\n",
     "from prescriptors.prescriptor_manager import PrescriptorManager\n",
     "from prescriptors.heuristics.heuristics import EvenHeuristic, PerfectHeuristic\n",
-    "from predictors.neural_network.neural_net_predictor import NeuralNetPredictor"
+    "from predictors.neural_network.neural_net_predictor import NeuralNetPredictor\n",
+    "from predictors.percent_change.percent_change_predictor import PercentChangePredictor"
    ]
   },
   {
@@ -41,7 +42,7 @@
    "outputs": [],
    "source": [
     "dataset = ELUCData.from_hf()\n",
-    "encoder = ELUCEncoder.from_pandas(dataset.train_df)"
+    "encoder = dataset.encoder"
    ]
   },
   {
@@ -168,10 +169,9 @@
     "        pareto = True\n",
     "        for j in range(i+1, len(pareto_list)):\n",
     "            p = pareto_list[j]\n",
-    "            # if (((p['ELUC'] < row['ELUC']) and (p['change'] <= row['change'])) or \\\n",
-    "            #     ((p['ELUC'] <= row['ELUC']) and (p['change'] < row['change'])) or \\\n",
-    "            #     ((p['ELUC'] == row['ELUC']) and (p['change'] == row['change']))):\n",
-    "            if p['ELUC'] <= row['ELUC'] and p['change'] <= row['change']:\n",
+    "            if (((p['ELUC'] < row['ELUC']) and (p['change'] <= row['change'])) or \\\n",
+    "                ((p['ELUC'] <= row['ELUC']) and (p['change'] < row['change'])) or \\\n",
+    "                ((p['ELUC'] == row['ELUC']) and (p['change'] == row['change']))):\n",
     "                pareto = False\n",
     "                break\n",
     "        if pareto:\n",
@@ -348,13 +348,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "nnp = NeuralNetPredictor.from_pretrained(\"predictors/neural_network/trained_models/no_overlap_nn\")\n",
+    "nn_serializer = NeuralNetSerializer()\n",
+    "nnp = nn_serializer.load(Path(\"predictors/neural_network/trained_models/no_overlap_nn\"))\n",
+    "change_predictor = PercentChangePredictor()\n",
+    "predictors = {\"ELUC\": nnp, \"change\": change_predictor}\n",
     "\n",
     "candidate_params = {\"in_size\": len(constants.CAO_MAPPING[\"context\"]), \"hidden_size\": 16, \"out_size\": len(constants.RECO_COLS)}\n",
     "# Set up new PrescriptorManager\n",
     "cands = [load_candidate(results_dir, cand_id, candidate_params) for cand_id in all_pareto_df[\"id\"]]\n",
     "prescs = {cand.cand_id: LandUsePrescriptor(cand, encoder) for cand in cands}\n",
-    "torch_manager = PrescriptorManager(prescs, nnp)"
+    "torch_manager = PrescriptorManager(prescs, predictors)"
    ]
   },
   {
@@ -385,8 +388,8 @@
    "source": [
     "def evaluate_prescriptor(prescriptor_manager: PrescriptorManager, cand_id: str, context_df: pd.DataFrame):\n",
     "    context_actions_df = prescriptor_manager.prescribe(cand_id, context_df)\n",
-    "    eluc_df, change_df = prescriptor_manager.predict_metrics(context_actions_df)\n",
-    "    return eluc_df[\"ELUC\"].mean(), change_df[\"change\"].mean()"
+    "    outcome_df = prescriptor_manager.predict_metrics(context_actions_df)\n",
+    "    return outcome_df[\"ELUC\"].mean(), outcome_df[\"change\"].mean()"
    ]
   },
   {
@@ -398,7 +401,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 179/179 [00:50<00:00,  3.57it/s]\n"
+      "100%|██████████| 179/179 [00:50<00:00,  3.52it/s]\n"
      ]
     }
    ],
@@ -423,7 +426,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -437,20 +440,20 @@
     "for col in constants.RECO_COLS:\n",
     "    reco_coefs.append(coef_dict[col])\n",
     "\n",
-    "even_manager = PrescriptorManager({str(pct): EvenHeuristic(pct, \"secdf\") for pct in pcts}, nnp)\n",
-    "perfect_manager = PrescriptorManager({str(pct): PerfectHeuristic(pct, reco_coefs) for pct in pcts}, nnp)"
+    "even_manager = PrescriptorManager({str(pct): EvenHeuristic(pct, \"secdf\") for pct in pcts}, predictors)\n",
+    "perfect_manager = PrescriptorManager({str(pct): PerfectHeuristic(pct, reco_coefs) for pct in pcts}, predictors)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 179/179 [01:14<00:00,  2.41it/s]\n"
+      "100%|██████████| 179/179 [01:17<00:00,  2.30it/s]\n"
      ]
     }
    ],
@@ -477,7 +480,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -489,7 +492,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -498,7 +501,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -516,7 +519,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -543,7 +546,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -609,16 +612,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Even hypervolume: 19.87827885124927\n",
-      "Perfect hypervolume: 20.445184246654176\n",
-      "Trained hypervolume: 20.612026679495887\n"
+      "Even hypervolume: 19.878278830833306\n",
+      "Perfect hypervolume: 20.445184251826745\n",
+      "Trained hypervolume: 20.61202668767982\n"
      ]
     }
    ],
@@ -640,7 +643,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -665,7 +668,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -698,21 +701,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [],
    "source": [
     "def trained_prescribe_and_predict(prescriptor_manager: PrescriptorManager, cand_id: str, context_df: pd.DataFrame):\n",
     "    context_actions_df = prescriptor_manager.prescribe(cand_id, context_df)\n",
-    "    eluc_df, change_df = prescriptor_manager.predict_metrics(context_actions_df)\n",
-    "    context_actions_df[\"ELUC\"] = eluc_df[\"ELUC\"]\n",
-    "    context_actions_df[\"change\"] = change_df[\"change\"]\n",
+    "    outcome_df = prescriptor_manager.predict_metrics(context_actions_df)\n",
+    "    context_actions_df[\"ELUC\"] = outcome_df[\"ELUC\"]\n",
+    "    context_actions_df[\"change\"] = outcome_df[\"change\"]\n",
     "    return context_actions_df"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -726,7 +729,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -750,7 +753,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
@@ -782,7 +785,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -796,7 +799,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -805,7 +808,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -830,7 +833,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
@@ -852,7 +855,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -874,7 +877,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [
     {
@@ -895,7 +898,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -920,7 +923,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [
     {
@@ -953,7 +956,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -982,14 +985,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[0.833829812009902, 0.6505255840955172, -0.25363241947260634, -0.2565692273582894, -0.04820242793010668, -0.07578107423921765, 0.0306483354548232, 0.14394597756353772, 0.31080526300406486, -0.2708375110699364, -0.1873337078723743, 0.009221983705203095]\n"
+      "[0.8338298117955503, 0.6505255838780966, -0.2536324191991408, -0.25656922753721734, -0.04820242826920596, -0.07578107397990458, 0.030648335354318348, 0.14394597861778466, 0.3108052624795656, -0.27083751052628097, -0.18733370786819345, 0.009221983051331711]\n"
      ]
     },
     {
@@ -1011,7 +1014,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 46,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1031,7 +1034,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 47,
    "metadata": {},
    "outputs": [
     {
@@ -1193,14 +1196,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 50,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 6/6 [00:08<00:00,  1.42s/it]\n"
+      "100%|██████████| 6/6 [00:10<00:00,  1.67s/it]\n"
      ]
     }
    ],
@@ -1208,12 +1211,12 @@
     "pcts = [0.01, 0.05, 0.1, 0.2, 0.5, 1]\n",
     "total_emissions = []\n",
     "total_changes = []\n",
-    "warming_manager = PrescriptorManager({str(pct): PerfectHeuristic(pct, reco_coefs) for pct in pcts}, nnp)\n",
+    "warming_manager = PrescriptorManager({str(pct): PerfectHeuristic(pct, reco_coefs) for pct in pcts}, predictors)\n",
     "for pct in tqdm(pcts):\n",
     "    result_df = warming_manager.prescribe(str(pct), dataset.test_df.loc[2021][constants.CAO_MAPPING[\"context\"]])\n",
-    "    eluc_df, change_df = perfect_manager.predict_metrics(result_df)\n",
-    "    result_df[\"ELUC\"] = eluc_df[\"ELUC\"]\n",
-    "    result_df[\"change\"] = change_df[\"change\"]\n",
+    "    outcome_df = perfect_manager.predict_metrics(result_df)\n",
+    "    result_df[\"ELUC\"] = outcome_df[\"ELUC\"]\n",
+    "    result_df[\"change\"] = outcome_df[\"change\"]\n",
     "    result_df[\"total_emissions\"] = result_df[\"ELUC\"] * result_df[\"cell_area\"]\n",
     "    result_df[\"total_change\"] = result_df[\"change\"] * result_df[\"cell_area\"]\n",
     "    total_emissions.append(result_df[\"total_emissions\"].sum())\n",
@@ -1222,7 +1225,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 51,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1237,7 +1240,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 52,
    "metadata": {},
    "outputs": [
     {
@@ -1265,7 +1268,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 53,
    "metadata": {},
    "outputs": [
     {