From 6e5fe290b7dc7f43babcb85a827323bc847fc29d Mon Sep 17 00:00:00 2001
From: Egor Kraev <egor.kraev@gmail.com>
Date: Sat, 24 Aug 2024 11:43:16 +0200
Subject: [PATCH 01/11] First steps towards simulateneous nuisance and
 estimator search

---
 causaltune/optimiser.py                       |  64 +++------
 causaltune/search/__init__.py                 |   0
 causaltune/search/component.py                | 128 ++++++++++++++++++
 causaltune/{ => search}/params.py             |   0
 tests/causaltune/test_custom_outcome_model.py |   6 +-
 tests/causaltune/test_endtoend.py             |  10 +-
 .../test_endtoend_automl_propensity.py        |   6 +-
 tests/causaltune/test_endtoend_inference.py   |   2 +-
 tests/causaltune/test_estimator_list.py       |  38 ++----
 .../test_sklearn_propensity_model.py          |   6 +-
 10 files changed, 166 insertions(+), 94 deletions(-)
 create mode 100644 causaltune/search/__init__.py
 create mode 100644 causaltune/search/component.py
 rename causaltune/{ => search}/params.py (100%)

diff --git a/causaltune/optimiser.py b/causaltune/optimiser.py
index 08b921ae..a87f033b 100644
--- a/causaltune/optimiser.py
+++ b/causaltune/optimiser.py
@@ -19,7 +19,7 @@
 
 from joblib import Parallel, delayed
 
-from causaltune.params import SimpleParamService
+from causaltune.search.params import SimpleParamService
 from causaltune.scoring import Scorer
 from causaltune.r_score import RScoreWrapper
 from causaltune.utils import clean_config, treatment_is_multivalue
@@ -34,9 +34,7 @@
 
 
 # Patched from sklearn.linear_model._base to adjust rtol and atol values
-def _check_precomputed_gram_matrix(
-    X, precompute, X_offset, X_scale, rtol=1e-4, atol=1e-2
-):
+def _check_precomputed_gram_matrix(X, precompute, X_offset, X_scale, rtol=1e-4, atol=1e-2):
     n_features = X.shape[1]
     f1 = n_features // 2
     f2 = min(f1 + 1, n_features - 1)
@@ -181,17 +179,13 @@ def __init__(
             resources_per_trial if resources_per_trial is not None else {"cpu": 0.5}
         )
         self._settings["try_init_configs"] = try_init_configs
-        self._settings["include_experimental_estimators"] = (
-            include_experimental_estimators
-        )
+        self._settings["include_experimental_estimators"] = include_experimental_estimators
 
         # params for FLAML on component models:
         self._settings["component_models"] = {}
         self._settings["component_models"]["task"] = components_task
         self._settings["component_models"]["verbose"] = components_verbose
-        self._settings["component_models"][
-            "pred_time_limit"
-        ] = components_pred_time_limit
+        self._settings["component_models"]["pred_time_limit"] = components_pred_time_limit
         self._settings["component_models"]["n_jobs"] = components_njobs
         self._settings["component_models"]["time_budget"] = components_time_budget
         self._settings["component_models"]["eval_method"] = "holdout"
@@ -236,9 +230,7 @@ def init_propensity_model(self, propensity_model: str):
             self.propensity_model = AutoML(
                 **{**self._settings["component_models"], "task": "classification"}
             )
-        elif hasattr(propensity_model, "fit") and hasattr(
-            propensity_model, "predict_proba"
-        ):
+        elif hasattr(propensity_model, "fit") and hasattr(propensity_model, "predict_proba"):
             self.propensity_model = propensity_model
         else:
             raise ValueError(
@@ -270,9 +262,7 @@ def init_outcome_model(self, outcome_model):
             else:
                 outcome_model_class = AutoML
 
-            self.outcome_model = outcome_model_class(
-                **self._settings["component_models"]
-            )
+            self.outcome_model = outcome_model_class(**self._settings["component_models"])
 
     def fit(
         self,
@@ -330,9 +320,7 @@ def fit(
         if preprocess:
             data = copy.deepcopy(data)
             self.dataset_processor = CausalityDatasetProcessor()
-            self.dataset_processor.fit(
-                data, encoder_type=encoder_type, outcome=encoder_outcome
-            )
+            self.dataset_processor.fit(data, encoder_type=encoder_type, outcome=encoder_outcome)
             data = self.dataset_processor.transform(data)
         else:
             self.dataset_processor = None
@@ -340,9 +328,7 @@ def fit(
         self.data = data
         treatment_values = data.treatment_values
 
-        assert (
-            len(treatment_values) > 1
-        ), "Treatment must take at least 2 values, eg 0 and 1!"
+        assert len(treatment_values) > 1, "Treatment must take at least 2 values, eg 0 and 1!"
 
         self._control_value = treatment_values[0]
         self._treatment_values = list(treatment_values[1:])
@@ -365,8 +351,8 @@ def fit(
         self.init_propensity_model(self._settings["propensity_model"])
         self.init_outcome_model(self._settings["outcome_model"])
 
-        self.identified_estimand: IdentifiedEstimand = (
-            self.causal_model.identify_effect(proceed_when_unidentifiable=True)
+        self.identified_estimand: IdentifiedEstimand = self.causal_model.identify_effect(
+            proceed_when_unidentifiable=True
         )
 
         if bool(self.identified_estimand.estimands["iv"]) and bool(data.instruments):
@@ -438,9 +424,7 @@ def fit(
             and self._settings["tuner"]["num_samples"] == -1
         ):
             self._settings["tuner"]["time_budget_s"] = (
-                2.5
-                * len(self.estimator_list)
-                * self._settings["component_models"]["time_budget"]
+                2.5 * len(self.estimator_list) * self._settings["component_models"]["time_budget"]
             )
 
         cmtb = self._settings["component_models"]["time_budget"]
@@ -489,17 +473,9 @@ def fit(
             self._tune_with_config,
             search_space,
             metric=self.metric,
-            points_to_evaluate=(
-                init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg
-            ),
-            evaluated_rewards=(
-                [] if len(self.resume_scores) == 0 else self.resume_scores
-            ),
-            mode=(
-                "min"
-                if self.metric in ["energy_distance", "psw_energy_distance"]
-                else "max"
-            ),
+            points_to_evaluate=(init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg),
+            evaluated_rewards=([] if len(self.resume_scores) == 0 else self.resume_scores),
+            mode=("min" if self.metric in ["energy_distance", "psw_energy_distance"] else "max"),
             low_cost_partial_config={},
             **self._settings["tuner"],
         )
@@ -625,9 +601,7 @@ def _estimate_effect(self, config):
             }
 
     def _compute_metrics(self, estimator, df: pd.DataFrame) -> dict:
-        return self.scorer.make_scores(
-            estimator, df, self.metrics_to_report, r_scorer=None
-        )
+        return self.scorer.make_scores(estimator, df, self.metrics_to_report, r_scorer=None)
 
     def score_dataset(self, df: pd.DataFrame, dataset_name: str):
         """
@@ -714,9 +688,7 @@ def effect(self, df, *args, **kwargs):
         """
         return self.model.effect(df, *args, **kwargs)
 
-    def predict(
-        self, cd: CausalityDataset, preprocess: Optional[bool] = False, *args, **kwargs
-    ):
+    def predict(self, cd: CausalityDataset, preprocess: Optional[bool] = False, *args, **kwargs):
         """Heterogeneous Treatment Effects for data CausalityDataset
 
         Args:
@@ -796,9 +768,7 @@ def effect_stderr(self, df, n_bootstrap_samples=5, n_jobs=1, *args, **kwargs):
                     n_bootstrap_samples=n_bootstrap_samples, n_jobs=n_jobs
                 )
 
-                best_cfg = {
-                    k: v for k, v in self.best_config.items() if k not in ["estimator"]
-                }
+                best_cfg = {k: v for k, v in self.best_config.items() if k not in ["estimator"]}
                 method_params = {
                     "init_params": {**best_cfg, **cfg.init_params},
                     "fit_params": {"inference": bootstrap},
diff --git a/causaltune/search/__init__.py b/causaltune/search/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/causaltune/search/component.py b/causaltune/search/component.py
new file mode 100644
index 00000000..c4f367b0
--- /dev/null
+++ b/causaltune/search/component.py
@@ -0,0 +1,128 @@
+from typing import Tuple
+import copy
+
+import numpy as np
+import pandas as pd
+
+from flaml import tune
+from flaml.automl.model import (
+    XGBoostSklearnEstimator,
+    XGBoostLimitDepthEstimator,
+    RandomForestEstimator,
+    LGBMEstimator,
+    CatBoostEstimator,
+    ExtraTreesEstimator,
+)
+from flaml.automl.task.factory import task_factory
+
+import flaml
+
+
+def flaml_config_to_tune_config(flaml_config: dict) -> Tuple[dict, dict, dict]:
+    cfg = {}
+    init_params = {}
+    low_cost_init_params = {}
+    for key, value in flaml_config.items():
+        if isinstance(value["domain"], dict):
+            raise NotImplementedError("Nested dictionaries are not supported yet")
+        cfg[key] = value["domain"]
+        if "init_value" in value:
+            init_params[key] = value["init_value"]
+        if "low_cost_init_value" in value:
+            low_cost_init_params[key] = value["low_cost_init_value"]
+
+    return cfg, init_params, low_cost_init_params
+
+
+estimators = {
+    "xgboost": XGBoostSklearnEstimator,
+    "xgboost_limit_depth": XGBoostLimitDepthEstimator,
+    "random_forest": RandomForestEstimator,
+    "lgbm": LGBMEstimator,
+    "catboost": CatBoostEstimator,
+    "extra_trees": ExtraTreesEstimator,
+}
+
+
+def joint_config(data_size: Tuple[int, int], estimator_list=None):
+    joint_cfg = []
+    joint_init_params = []
+    joint_low_cost_init_params = {}
+    for name, cls in estimators.items():
+        if estimator_list is not None and name not in estimator_list:
+            continue
+        task = task_factory("regression")
+        cfg, init_params, low_cost_init_params = flaml_config_to_tune_config(
+            cls.search_space(data_size, task=task)
+        )
+
+        # Test if the estimator instantiates fine
+        try:
+            cls(task=task, **init_params)
+            cfg["estimator_name"] = name
+            joint_cfg.append(cfg)
+            init_params["estimator_name"] = name
+            joint_init_params.append({"estimator": init_params})
+            joint_low_cost_init_params[name] = low_cost_init_params
+        except ImportError as e:
+            print(f"Error instantiating {name}: {e}")
+
+    return {"estimator": tune.choice(joint_cfg)}, joint_init_params, joint_low_cost_init_params
+
+
+def model_from_cfg(cfg: dict):
+    cfg = copy.deepcopy(cfg)
+    model_name = cfg["estimator"].pop("estimator_name")
+    out = estimators[model_name](task=task_factory("regression"), **cfg["estimator"])
+    return out
+
+
+def config2score(cfg: dict, X, y):
+    model = model_from_cfg(cfg)
+    model.fit(X, y)
+    ypred = model.predict(X)
+    err = y - ypred
+    return {"score": np.mean(err**2)}
+
+
+def make_fake_data():
+
+    # Set random seed for reproducibility
+    np.random.seed(42)
+
+    # Parameters for the DataFrame
+    num_samples = 1000  # Number of rows (samples)
+    num_features = 5  # Number of features (columns)
+
+    # Generate random float features
+    X = np.random.rand(num_samples, num_features)
+
+    # Define the coefficients for each feature to generate the target variable
+    coefficients = np.random.rand(num_features)
+
+    # Generate the target variable y as a linear combination of the features plus some noise
+    noise = np.random.normal(0, 0.1, num_samples)  # Add some Gaussian noise
+    y = np.dot(X, coefficients) + noise
+
+    # Create a DataFrame
+    column_names = [f"feature_{i + 1}" for i in range(num_features)]
+    df = pd.DataFrame(X, columns=column_names)
+
+    return df, y
+
+
+if __name__ == "__main__":
+
+    # Create fake data
+    X, y = make_fake_data()
+    cfg, init_params, low_cost_init_params = joint_config(data_size=X.shape)
+    flaml.tune.run(
+        evaluation_function=lambda cfg: config2score(cfg, X, y),
+        metric="score",
+        mode="min",
+        config=cfg,
+        points_to_evaluate=init_params,
+        num_samples=10,
+    )
+
+    print("yay!")
diff --git a/causaltune/params.py b/causaltune/search/params.py
similarity index 100%
rename from causaltune/params.py
rename to causaltune/search/params.py
diff --git a/tests/causaltune/test_custom_outcome_model.py b/tests/causaltune/test_custom_outcome_model.py
index 138d8d83..ae2ea651 100644
--- a/tests/causaltune/test_custom_outcome_model.py
+++ b/tests/causaltune/test_custom_outcome_model.py
@@ -5,7 +5,7 @@
 
 from causaltune import CausalTune
 from causaltune.datasets import linear_multi_dataset, generate_synthetic_data
-from causaltune.params import SimpleParamService
+from causaltune.search.params import SimpleParamService
 
 warnings.filterwarnings("ignore")  # suppress sklearn deprecation warnings for now..
 
@@ -62,9 +62,7 @@ def test_custom_outcome_model_multivalue(self):
             include_experimental=False,
             multivalue=True,
         )
-        estimator_list = cfg.estimator_names_from_patterns(
-            "backdoor", "all", data_rows=len(data)
-        )
+        estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data))
 
         ct = CausalTune(
             outcome_model=LinearRegression(),
diff --git a/tests/causaltune/test_endtoend.py b/tests/causaltune/test_endtoend.py
index 741cc035..08da3c57 100644
--- a/tests/causaltune/test_endtoend.py
+++ b/tests/causaltune/test_endtoend.py
@@ -1,10 +1,9 @@
-import pytest
 import warnings
 
 
 from causaltune import CausalTune
 from causaltune.datasets import generate_non_random_dataset, linear_multi_dataset
-from causaltune.params import SimpleParamService
+from causaltune.search.params import SimpleParamService
 
 warnings.filterwarnings("ignore")  # suppress sklearn deprecation warnings for now..
 
@@ -84,9 +83,7 @@ def test_endtoend_multivalue(self):
             include_experimental=False,
             multivalue=True,
         )
-        estimator_list = cfg.estimator_names_from_patterns(
-            "backdoor", "all", data_rows=len(data)
-        )
+        estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data))
 
         ct = CausalTune(
             estimator_list="all",
@@ -101,5 +98,6 @@ def test_endtoend_multivalue(self):
 
 
 if __name__ == "__main__":
-    pytest.main([__file__])
+    TestEndToEnd().test_endtoend_cate()
+    # pytest.main([__file__])
     # TestEndToEnd().test_endtoend_iv()
diff --git a/tests/causaltune/test_endtoend_automl_propensity.py b/tests/causaltune/test_endtoend_automl_propensity.py
index f5b2f893..baafcb82 100644
--- a/tests/causaltune/test_endtoend_automl_propensity.py
+++ b/tests/causaltune/test_endtoend_automl_propensity.py
@@ -3,7 +3,7 @@
 
 from causaltune import CausalTune
 from causaltune.datasets import linear_multi_dataset
-from causaltune.params import SimpleParamService
+from causaltune.search.params import SimpleParamService
 
 warnings.filterwarnings("ignore")  # suppress sklearn deprecation warnings for now..
 
@@ -60,9 +60,7 @@ def test_endtoend_multivalue_propensity(self):
             multivalue=True,
         )
 
-        estimator_list = cfg.estimator_names_from_patterns(
-            "backdoor", "all", data_rows=len(data)
-        )
+        estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data))
 
         ct = CausalTune(
             estimator_list="all",
diff --git a/tests/causaltune/test_endtoend_inference.py b/tests/causaltune/test_endtoend_inference.py
index 154fe748..aa9bc954 100644
--- a/tests/causaltune/test_endtoend_inference.py
+++ b/tests/causaltune/test_endtoend_inference.py
@@ -5,7 +5,7 @@
 
 from causaltune import CausalTune
 from causaltune.datasets import linear_multi_dataset
-from causaltune.params import SimpleParamService
+from causaltune.search.params import SimpleParamService
 
 warnings.filterwarnings("ignore")  # suppress sklearn deprecation warnings for now..
 
diff --git a/tests/causaltune/test_estimator_list.py b/tests/causaltune/test_estimator_list.py
index 8db6b30a..55911951 100644
--- a/tests/causaltune/test_estimator_list.py
+++ b/tests/causaltune/test_estimator_list.py
@@ -2,7 +2,7 @@
 import pandas as pd
 
 from causaltune import CausalTune
-from causaltune.params import SimpleParamService
+from causaltune.search.params import SimpleParamService
 
 
 class TestEstimatorListGenerator:
@@ -10,9 +10,7 @@ class TestEstimatorListGenerator:
 
     def test_auto_list(self):
         """tests if "auto" setting yields all available estimators"""
-        cfg = SimpleParamService(
-            propensity_model=None, outcome_model=None, multivalue=False
-        )
+        cfg = SimpleParamService(propensity_model=None, outcome_model=None, multivalue=False)
         auto_estimators_iv = cfg.estimator_names_from_patterns("iv", "auto")
         auto_estimators_backdoor = cfg.estimator_names_from_patterns("backdoor", "auto")
         # verify that returned estimator list includes all available estimators
@@ -40,9 +38,7 @@ def test_auto_list(self):
 
     def test_substring_group(self):
         """tests if substring match to group of estimators works"""
-        cfg = SimpleParamService(
-            propensity_model=None, outcome_model=None, multivalue=False
-        )
+        cfg = SimpleParamService(propensity_model=None, outcome_model=None, multivalue=False)
 
         estimator_list = cfg.estimator_names_from_patterns("backdoor", ["dml"])
         available_estimators = [e for e in cfg._configs().keys() if "dml" in e]
@@ -57,21 +53,13 @@ def test_substring_group(self):
 
     def test_substring_single(self):
         """tests if substring match to single estimators works"""
-        cfg = SimpleParamService(
-            propensity_model=None, outcome_model=None, multivalue=False
-        )
-        estimator_list = cfg.estimator_names_from_patterns(
-            "backdoor", ["DomainAdaptationLearner"]
-        )
-        assert estimator_list == [
-            "backdoor.econml.metalearners.DomainAdaptationLearner"
-        ]
+        cfg = SimpleParamService(propensity_model=None, outcome_model=None, multivalue=False)
+        estimator_list = cfg.estimator_names_from_patterns("backdoor", ["DomainAdaptationLearner"])
+        assert estimator_list == ["backdoor.econml.metalearners.DomainAdaptationLearner"]
 
     def test_checkduplicates(self):
         """tests if duplicates are removed"""
-        cfg = SimpleParamService(
-            propensity_model=None, outcome_model=None, multivalue=False
-        )
+        cfg = SimpleParamService(propensity_model=None, outcome_model=None, multivalue=False)
         estimator_list = cfg.estimator_names_from_patterns(
             "backdoor",
             [
@@ -87,14 +75,10 @@ def test_invalid_choice(self):
         # this should raise a ValueError
         # unavailable estimators:
 
-        cfg = SimpleParamService(
-            propensity_model=None, outcome_model=None, multivalue=False
-        )
+        cfg = SimpleParamService(propensity_model=None, outcome_model=None, multivalue=False)
 
         with pytest.raises(ValueError):
-            cfg.estimator_names_from_patterns(
-                "backdoor", ["linear_regression", "pasta", 12]
-            )
+            cfg.estimator_names_from_patterns("backdoor", ["linear_regression", "pasta", 12])
 
         with pytest.raises(ValueError):
             cfg.estimator_names_from_patterns("backdoor", 5)
@@ -104,9 +88,7 @@ def test_invalid_choice_fitter(self):
             """tests if empty list is correctly handled"""
             ct = CausalTune(components_time_budget=10)
             ct.fit(
-                pd.DataFrame(
-                    {"treatment": [0, 1], "outcome": [0.5, 1.5], "dummy": [0.1, 0.2]}
-                ),
+                pd.DataFrame({"treatment": [0, 1], "outcome": [0.5, 1.5], "dummy": [0.1, 0.2]}),
                 treatment="treatment",
                 outcome="outcome",
                 common_causes=["dummy"],
diff --git a/tests/causaltune/test_sklearn_propensity_model.py b/tests/causaltune/test_sklearn_propensity_model.py
index 7a1ce204..942a7746 100644
--- a/tests/causaltune/test_sklearn_propensity_model.py
+++ b/tests/causaltune/test_sklearn_propensity_model.py
@@ -9,7 +9,7 @@
     generate_synth_data_with_categories,
     linear_multi_dataset,
 )
-from causaltune.params import SimpleParamService
+from causaltune.search.params import SimpleParamService
 
 warnings.filterwarnings("ignore")  # suppress sklearn deprecation warnings for now..
 
@@ -66,9 +66,7 @@ def test_sklearn_propensity_model_multivalue(self):
             include_experimental=False,
             multivalue=True,
         )
-        estimator_list = cfg.estimator_names_from_patterns(
-            "backdoor", "all", data_rows=len(data)
-        )
+        estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data))
 
         ct = CausalTune(
             propensity_model=LogisticRegression(),

From 819a7c900806cc81970ceeaeee8afc912e869e42 Mon Sep 17 00:00:00 2001
From: Egor Kraev <egor.kraev@gmail.com>
Date: Sun, 25 Aug 2024 17:14:11 +0200
Subject: [PATCH 02/11] Move full cunstruction of method_params into params.py
 As a prelude to dynamic construction of outcome_function

---
 causaltune/optimiser.py     | 20 +++------
 causaltune/search/params.py | 85 ++++++++++++++++++++++---------------
 causaltune/utils.py         |  1 +
 3 files changed, 59 insertions(+), 47 deletions(-)

diff --git a/causaltune/optimiser.py b/causaltune/optimiser.py
index a87f033b..e6fb27eb 100644
--- a/causaltune/optimiser.py
+++ b/causaltune/optimiser.py
@@ -519,7 +519,7 @@ def _tune_with_config(self, config: dict) -> dict:
         # to spawn a separate process to prevent cross-talk between tuner and automl on component models:
 
         estimates = Parallel(n_jobs=2, backend="threading")(
-            delayed(self._estimate_effect)(config["estimator"]) for i in range(1)
+            delayed(self._estimate_effect)(config) for i in range(1)
         )[0]
         # estimates = self._estimate_effect(config["estimator"])
 
@@ -558,17 +558,10 @@ def _est_effect_stub(self, method_params):
     def _estimate_effect(self, config):
         """estimates effect with chosen estimator"""
 
-        # add params that are tuned by flaml:
-        config = clean_config(copy.copy(config))
-        self.estimator_name = config.pop("estimator_name")
-        # params_to_tune = {
-        #     k: v for k, v in config.items() if (not k == "estimator_name")
-        # }
-        cfg = self.cfg.method_params(self.estimator_name)
-        method_params = {
-            "init_params": {**deepcopy(config), **cfg.init_params},
-            "fit_params": {},
-        }
+        # Do we need an boject property for this, instead of a local var?
+        self.estimator_name = config["estimator"]["estimator_name"]
+        method_params = self.cfg.method_params(config, self.outcome_model)
+
         try:  #
             # if True:  #
             estimate = self._est_effect_stub(method_params)
@@ -589,7 +582,8 @@ def _estimate_effect(self, config):
                 "estimator": estimate,
                 "estimator_name": scores.pop("estimator_name"),
                 "scores": scores,
-                "config": config,
+                # TODO: return full config!
+                "config": config["estimator"],
             }
         except Exception as e:
             print("Evaluation failed!\n", config, traceback.format_exc())
diff --git a/causaltune/search/params.py b/causaltune/search/params.py
index 36403d0f..e66ef5e5 100644
--- a/causaltune/search/params.py
+++ b/causaltune/search/params.py
@@ -1,15 +1,19 @@
 from flaml import tune
-from copy import deepcopy
-from typing import Optional, Sequence, Union, Iterable, Dict
+from copy import deepcopy, copy
+from typing import Optional, Sequence, Union, Iterable, Dict, Any
 from dataclasses import dataclass, field
 
 import warnings
 from econml.inference import BootstrapInference  # noqa F401
 from sklearn import linear_model
 
+from causaltune.utils import clean_config
+
 
 @dataclass
 class EstimatorConfig:
+    outcome_model_name: str = None
+    final_model_name: str = None
     init_params: dict = field(default_factory=dict)
     fit_params: dict = field(default_factory=dict)
     search_space: dict = field(default_factory=dict)
@@ -175,13 +179,29 @@ def default_configs(self, estimator_list: Iterable[str]):
 
     def method_params(
         self,
-        estimator: str,
+        config: dict,
+        outcome_model: Any,
     ):
-        return self._configs()[estimator]
+        config = clean_config(deepcopy(config["estimator"]))
+        estimator_name = config.pop("estimator_name")
+
+        cfg = self._configs()[estimator_name]
+
+        # Insert the outcome model dynamically (prelude to spawning it dynamically)
+        if cfg.outcome_model_name is not None and cfg.outcome_model_name not in cfg.init_params:
+            cfg.init_params[cfg.outcome_model_name] = outcome_model
+        if cfg.final_model_name is not None and cfg.final_model_name not in cfg.init_params:
+            cfg.init_params[cfg.final_model_name] = outcome_model
+
+        method_params = {
+            "init_params": {**deepcopy(config), **cfg.init_params},
+            "fit_params": {},
+        }
+        return method_params
 
     def _configs(self) -> Dict[str, EstimatorConfig]:
         propensity_model = deepcopy(self.propensity_model)
-        outcome_model = deepcopy(self.outcome_model)
+        # outcome_model = deepcopy(self.outcome_model)
         if self.n_bootstrap_samples is not None:
             # TODO Egor please look into this
             # bootstrap is causing recursion errors (see notes below)
@@ -190,10 +210,10 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             # )
             pass
 
-        if self.final_model is None:
-            final_model = deepcopy(self.outcome_model)
-        else:
-            final_model = deepcopy(self.final_model)
+        # if self.final_model is None:
+        #     final_model = deepcopy(self.outcome_model)
+        # else:
+        #     final_model = deepcopy(self.final_model)
 
         configs: dict[str:EstimatorConfig] = {
             "backdoor.causaltune.models.NaiveDummy": EstimatorConfig(),
@@ -206,32 +226,34 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
                 experimental=True,
             ),
             "backdoor.econml.metalearners.SLearner": EstimatorConfig(
-                init_params={"overall_model": outcome_model},
+                outcome_model_name="overall_model",
+                init_params={},
                 supports_multivalue=True,
             ),
             "backdoor.econml.metalearners.TLearner": EstimatorConfig(
-                init_params={"models": outcome_model},
+                outcome_model_name="models",
+                init_params={},
                 supports_multivalue=True,
             ),
             "backdoor.econml.metalearners.XLearner": EstimatorConfig(
+                outcome_model_name="models",
                 init_params={
                     "propensity_model": propensity_model,
-                    "models": outcome_model,
                 },
                 supports_multivalue=True,
             ),
             "backdoor.econml.metalearners.DomainAdaptationLearner": EstimatorConfig(
+                outcome_model_name="models",
+                final_model_name="final_models",
                 init_params={
                     "propensity_model": propensity_model,
-                    "models": outcome_model,
-                    "final_models": final_model,
                 },
                 supports_multivalue=True,
             ),
             "backdoor.econml.dr.ForestDRLearner": EstimatorConfig(
+                outcome_model_name="model_regression",
                 init_params={
                     "model_propensity": propensity_model,
-                    "model_regression": outcome_model,
                     # putting these here for now, until default values can be reconciled with search space
                     "mc_iters": None,
                     "max_depth": None,
@@ -268,9 +290,9 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
                 inference="blb",
             ),
             "backdoor.econml.dr.LinearDRLearner": EstimatorConfig(
+                outcome_model_name="model_regression",
                 init_params={
                     "model_propensity": propensity_model,
-                    "model_regression": outcome_model,
                     "mc_iters": None,
                 },
                 search_space={
@@ -286,9 +308,9 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
                 inference="auto",
             ),
             "backdoor.econml.dr.SparseLinearDRLearner": EstimatorConfig(
+                outcome_model_name="model_regression",
                 init_params={
                     "model_propensity": propensity_model,
-                    "model_regression": outcome_model,
                     "mc_iters": None,
                 },
                 search_space={
@@ -314,9 +336,9 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
                 inference="auto",
             ),
             "backdoor.econml.dml.LinearDML": EstimatorConfig(
+                outcome_model_name="model_y",
                 init_params={
                     "model_t": propensity_model,
-                    "model_y": outcome_model,
                     "discrete_treatment": True,
                     # it runs out of memory fast if the below is not set
                     "linear_first_stages": False,
@@ -335,9 +357,9 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
                 inference="statsmodels",
             ),
             "backdoor.econml.dml.SparseLinearDML": EstimatorConfig(
+                outcome_model_name="model_y",
                 init_params={
                     "model_t": propensity_model,
-                    "model_y": outcome_model,
                     "discrete_treatment": True,
                     # it runs out of memory fast if the below is not set
                     "linear_first_stages": False,
@@ -364,9 +386,9 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
                 inference="auto",
             ),
             "backdoor.econml.dml.CausalForestDML": EstimatorConfig(
+                outcome_model_name="model_y",
                 init_params={
                     "model_t": propensity_model,
-                    "model_y": outcome_model,
                     # "max_depth": self.max_depth,
                     # "n_estimators": self.n_estimators,
                     "discrete_treatment": True,
@@ -416,9 +438,9 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
                 inference="auto",
             ),
             "backdoor.causaltune.models.TransformedOutcome": EstimatorConfig(
+                outcome_model_name="outcome_model",
                 init_params={
                     "propensity_model": propensity_model,
-                    "outcome_model": outcome_model,
                 },
             ),
             # leaving out DML and NonParamDML as they're base classes for the 3
@@ -436,9 +458,7 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             "backdoor.econml.orf.DROrthoForest": EstimatorConfig(
                 init_params={
                     "propensity_model": propensity_model,
-                    "model_Y": linear_model.Ridge(
-                        alpha=0.01
-                    ),  # WeightedLasso(alpha=0.01),  #
+                    "model_Y": linear_model.Ridge(alpha=0.01),  # WeightedLasso(alpha=0.01),  #
                     "n_jobs": self.n_jobs,
                     # "max_depth": self.max_depth,
                     # "n_trees": self.n_estimators,
@@ -467,9 +487,7 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             "backdoor.econml.orf.DMLOrthoForest": EstimatorConfig(
                 init_params={
                     "model_T": propensity_model,
-                    "model_Y": linear_model.Ridge(
-                        alpha=0.01
-                    ),  # WeightedLasso(alpha=0.01),  #
+                    "model_Y": linear_model.Ridge(alpha=0.01),  # WeightedLasso(alpha=0.01),  #
                     "discrete_treatment": True,
                     "n_jobs": self.n_jobs,
                     # "max_depth": self.max_depth,
@@ -499,8 +517,8 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
                 inference="blb",
             ),
             "iv.econml.iv.dr.LinearDRIV": EstimatorConfig(
+                outcome_model_name="model_y_xw",
                 init_params={
-                    "model_y_xw": outcome_model,
                     "model_t_xw": propensity_model,
                 },
                 search_space={
@@ -509,8 +527,8 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
                 defaults={"projection": True},
             ),
             "iv.econml.iv.dml.OrthoIV": EstimatorConfig(
+                outcome_model_name="model_y_xw",
                 init_params={
-                    "model_y_xw": outcome_model,
                     "model_t_xw": propensity_model,
                 },
                 search_space={
@@ -521,8 +539,8 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
                 },
             ),
             "iv.econml.iv.dml.DMLIV": EstimatorConfig(
+                outcome_model_name="model_y_xw",
                 init_params={
-                    "model_y_xw": outcome_model,
                     "model_t_xw": propensity_model,
                 },
                 search_space={
@@ -533,8 +551,8 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
                 },
             ),
             "iv.econml.iv.dr.SparseLinearDRIV": EstimatorConfig(
+                outcome_model_name="model_y_xw",
                 init_params={
-                    "model_y_xw": outcome_model,
                     "model_t_xw": propensity_model,
                 },
                 search_space={
@@ -549,9 +567,8 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
                 },
             ),
             "iv.econml.iv.dr.LinearIntentToTreatDRIV": EstimatorConfig(
-                init_params={
-                    "model_y_xw": outcome_model,
-                },
+                outcome_model_name="model_y_xw",
+                init_params={},
                 search_space={
                     "cov_clip": tune.quniform(0.08, 0.2, 0.01),
                     "opt_reweighted": tune.choice([0, 1]),
diff --git a/causaltune/utils.py b/causaltune/utils.py
index df21f89b..6af2097b 100644
--- a/causaltune/utils.py
+++ b/causaltune/utils.py
@@ -8,6 +8,7 @@
 
 
 def clean_config(params: dict):
+    # TODO: move this to formal constraints in tune?
     if "subforest_size" in params and "n_estimators" in params:
         params["n_estimators"] = params["subforest_size"] * math.ceil(
             params["n_estimators"] / params["subforest_size"]

From 14dea52654850c067c50c2b7ab0367eb9a61389c Mon Sep 17 00:00:00 2001
From: Egor Kraev <egor.kraev@gmail.com>
Date: Sun, 25 Aug 2024 17:44:01 +0200
Subject: [PATCH 03/11] More refactor. test_endtoend.py passes

---
 causaltune/optimiser.py           | 73 +++++++++++++++-------------
 causaltune/search/params.py       | 80 ++++++++++++-------------------
 tests/causaltune/test_endtoend.py |  2 -
 3 files changed, 72 insertions(+), 83 deletions(-)

diff --git a/causaltune/optimiser.py b/causaltune/optimiser.py
index e6fb27eb..d03bdb5e 100644
--- a/causaltune/optimiser.py
+++ b/causaltune/optimiser.py
@@ -246,23 +246,27 @@ def init_outcome_model(self, outcome_model):
             # TODO: implement filtering like below, when there are propensity-only features
             # feature_filter below acts on classes not instances
             # to preserve all the extra methods through inheritance
-            self.outcome_model = outcome_model
+            return outcome_model
         else:
-            data = self.data
-            propensity_only_cols = [
-                p
-                for p in data.propensity_modifiers
-                if p not in data.common_causes + data.effect_modifiers
-            ]
-
-            if len(propensity_only_cols):
-                outcome_model_class = feature_filter(
-                    AutoML, data.effect_modifiers + data.common_causes, first_cols=True
-                )
-            else:
-                outcome_model_class = AutoML
+            return self.auto_outcome_model()
+
+    def auto_outcome_model(self):
+        data = self.data
+        propensity_only_cols = [
+            p
+            for p in data.propensity_modifiers
+            if p not in data.common_causes + data.effect_modifiers
+        ]
+
+        if len(propensity_only_cols):
+            # TODO: implement feature_filter for arbitrary outcome models
+            outcome_model_class = feature_filter(
+                AutoML, data.effect_modifiers + data.common_causes, first_cols=True
+            )
+        else:
+            outcome_model_class = AutoML
 
-            self.outcome_model = outcome_model_class(**self._settings["component_models"])
+        return outcome_model_class(**self._settings["component_models"])
 
     def fit(
         self,
@@ -349,7 +353,9 @@ def fit(
         )
 
         self.init_propensity_model(self._settings["propensity_model"])
-        self.init_outcome_model(self._settings["outcome_model"])
+
+        # Is that state needed at all?
+        # self.outcome_model = self.init_outcome_model(self._settings["outcome_model"])
 
         self.identified_estimand: IdentifiedEstimand = self.causal_model.identify_effect(
             proceed_when_unidentifiable=True
@@ -392,8 +398,6 @@ def fit(
 
         # config with method-specific params
         self.cfg = SimpleParamService(
-            self.propensity_model,
-            self.outcome_model,
             n_jobs=self._settings["component_models"]["n_jobs"],
             include_experimental=self._settings["include_experimental_estimators"],
             multivalue=treatment_is_multivalue(self._treatment_values),
@@ -438,20 +442,24 @@ def fit(
         if self._settings["test_size"] is not None:
             self.test_df = self.test_df.sample(self._settings["test_size"])
 
-        self.r_scorer = (
-            None
-            if "r_scorer" not in self.metrics_to_report
-            else RScoreWrapper(
-                self.outcome_model,
-                self.propensity_model,
-                self.train_df,
-                self.test_df,
-                outcome,
-                treatment,
-                common_causes,
-                effect_modifiers,
+        if "r_scorer" in self.metrics_to_report:
+            raise NotImplementedError(
+                "R-squared scorer no longer suported, please raise an issue if you want it back"
             )
-        )
+        # self.r_scorer = (
+        #     None
+        #     if "r_scorer" not in self.metrics_to_report
+        #     else RScoreWrapper(
+        #         self.outcome_model,
+        #         self.propensity_model,
+        #         self.train_df,
+        #         self.test_df,
+        #         outcome,
+        #         treatment,
+        #         common_causes,
+        #         effect_modifiers,
+        #     )
+        # )
 
         search_space = self.cfg.search_space(self.estimator_list)
         init_cfg = (
@@ -560,7 +568,8 @@ def _estimate_effect(self, config):
 
         # Do we need an boject property for this, instead of a local var?
         self.estimator_name = config["estimator"]["estimator_name"]
-        method_params = self.cfg.method_params(config, self.outcome_model)
+        outcome_model = self.init_outcome_model(self._settings["outcome_model"])
+        method_params = self.cfg.method_params(config, outcome_model, self.propensity_model)
 
         try:  #
             # if True:  #
diff --git a/causaltune/search/params.py b/causaltune/search/params.py
index e66ef5e5..2154a13a 100644
--- a/causaltune/search/params.py
+++ b/causaltune/search/params.py
@@ -14,6 +14,7 @@
 class EstimatorConfig:
     outcome_model_name: str = None
     final_model_name: str = None
+    propensity_model_name: str = None
     init_params: dict = field(default_factory=dict)
     fit_params: dict = field(default_factory=dict)
     search_space: dict = field(default_factory=dict)
@@ -26,17 +27,11 @@ class EstimatorConfig:
 class SimpleParamService:
     def __init__(
         self,
-        propensity_model,
-        outcome_model,
         multivalue: bool,
-        final_model=None,
         n_bootstrap_samples: Optional[int] = None,
         n_jobs: Optional[int] = None,
         include_experimental=False,
     ):
-        self.propensity_model = propensity_model
-        self.outcome_model = outcome_model
-        self.final_model = final_model
         self.n_jobs = n_jobs
         self.include_experimental = include_experimental
         self.n_bootstrap_samples = n_bootstrap_samples
@@ -181,6 +176,8 @@ def method_params(
         self,
         config: dict,
         outcome_model: Any,
+        propensity_model: Any,
+        final_model: Any = None,
     ):
         config = clean_config(deepcopy(config["estimator"]))
         estimator_name = config.pop("estimator_name")
@@ -189,9 +186,18 @@ def method_params(
 
         # Insert the outcome model dynamically (prelude to spawning it dynamically)
         if cfg.outcome_model_name is not None and cfg.outcome_model_name not in cfg.init_params:
-            cfg.init_params[cfg.outcome_model_name] = outcome_model
+            cfg.init_params[cfg.outcome_model_name] = deepcopy(outcome_model)
+
+        if (
+            cfg.propensity_model_name is not None
+            and cfg.propensity_model_name not in cfg.init_params
+        ):
+            cfg.init_params[cfg.propensity_model_name] = deepcopy(propensity_model)
+
         if cfg.final_model_name is not None and cfg.final_model_name not in cfg.init_params:
-            cfg.init_params[cfg.final_model_name] = outcome_model
+            cfg.init_params[cfg.final_model_name] = (
+                deepcopy(final_model) if final_model is not None else deepcopy(outcome_model)
+            )
 
         method_params = {
             "init_params": {**deepcopy(config), **cfg.init_params},
@@ -200,8 +206,6 @@ def method_params(
         return method_params
 
     def _configs(self) -> Dict[str, EstimatorConfig]:
-        propensity_model = deepcopy(self.propensity_model)
-        # outcome_model = deepcopy(self.outcome_model)
         if self.n_bootstrap_samples is not None:
             # TODO Egor please look into this
             # bootstrap is causing recursion errors (see notes below)
@@ -210,50 +214,39 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             # )
             pass
 
-        # if self.final_model is None:
-        #     final_model = deepcopy(self.outcome_model)
-        # else:
-        #     final_model = deepcopy(self.final_model)
-
         configs: dict[str:EstimatorConfig] = {
             "backdoor.causaltune.models.NaiveDummy": EstimatorConfig(),
             "backdoor.causaltune.models.Dummy": EstimatorConfig(
-                init_params={"propensity_model": propensity_model},
+                propensity_model_name="propensity_model",
                 experimental=False,
             ),
             "backdoor.propensity_score_weighting": EstimatorConfig(
-                init_params={"propensity_model": propensity_model},
+                propensity_model_name="propensity_model",
                 experimental=True,
             ),
             "backdoor.econml.metalearners.SLearner": EstimatorConfig(
                 outcome_model_name="overall_model",
-                init_params={},
                 supports_multivalue=True,
             ),
             "backdoor.econml.metalearners.TLearner": EstimatorConfig(
                 outcome_model_name="models",
-                init_params={},
                 supports_multivalue=True,
             ),
             "backdoor.econml.metalearners.XLearner": EstimatorConfig(
                 outcome_model_name="models",
-                init_params={
-                    "propensity_model": propensity_model,
-                },
+                propensity_model_name="propensity_model",
                 supports_multivalue=True,
             ),
             "backdoor.econml.metalearners.DomainAdaptationLearner": EstimatorConfig(
                 outcome_model_name="models",
+                propensity_model_name="propensity_model",
                 final_model_name="final_models",
-                init_params={
-                    "propensity_model": propensity_model,
-                },
                 supports_multivalue=True,
             ),
             "backdoor.econml.dr.ForestDRLearner": EstimatorConfig(
                 outcome_model_name="model_regression",
+                propensity_model_name="model_propensity",
                 init_params={
-                    "model_propensity": propensity_model,
                     # putting these here for now, until default values can be reconciled with search space
                     "mc_iters": None,
                     "max_depth": None,
@@ -291,8 +284,8 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             ),
             "backdoor.econml.dr.LinearDRLearner": EstimatorConfig(
                 outcome_model_name="model_regression",
+                propensity_model_name="model_propensity",
                 init_params={
-                    "model_propensity": propensity_model,
                     "mc_iters": None,
                 },
                 search_space={
@@ -309,8 +302,8 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             ),
             "backdoor.econml.dr.SparseLinearDRLearner": EstimatorConfig(
                 outcome_model_name="model_regression",
+                propensity_model_name="model_propensity",
                 init_params={
-                    "model_propensity": propensity_model,
                     "mc_iters": None,
                 },
                 search_space={
@@ -337,8 +330,8 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             ),
             "backdoor.econml.dml.LinearDML": EstimatorConfig(
                 outcome_model_name="model_y",
+                propensity_model_name="model_t",
                 init_params={
-                    "model_t": propensity_model,
                     "discrete_treatment": True,
                     # it runs out of memory fast if the below is not set
                     "linear_first_stages": False,
@@ -358,8 +351,8 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             ),
             "backdoor.econml.dml.SparseLinearDML": EstimatorConfig(
                 outcome_model_name="model_y",
+                propensity_model_name="model_t",
                 init_params={
-                    "model_t": propensity_model,
                     "discrete_treatment": True,
                     # it runs out of memory fast if the below is not set
                     "linear_first_stages": False,
@@ -387,8 +380,8 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             ),
             "backdoor.econml.dml.CausalForestDML": EstimatorConfig(
                 outcome_model_name="model_y",
+                propensity_model_name="model_t",
                 init_params={
-                    "model_t": propensity_model,
                     # "max_depth": self.max_depth,
                     # "n_estimators": self.n_estimators,
                     "discrete_treatment": True,
@@ -439,9 +432,7 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             ),
             "backdoor.causaltune.models.TransformedOutcome": EstimatorConfig(
                 outcome_model_name="outcome_model",
-                init_params={
-                    "propensity_model": propensity_model,
-                },
+                propensity_model_name="propensity_model",
             ),
             # leaving out DML and NonParamDML as they're base classes for the 3
             # above
@@ -456,8 +447,8 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             #     "fit_params": {},
             # },
             "backdoor.econml.orf.DROrthoForest": EstimatorConfig(
+                propensity_model_name="propensity_model",
                 init_params={
-                    "propensity_model": propensity_model,
                     "model_Y": linear_model.Ridge(alpha=0.01),  # WeightedLasso(alpha=0.01),  #
                     "n_jobs": self.n_jobs,
                     # "max_depth": self.max_depth,
@@ -485,8 +476,8 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
                 inference="blb",
             ),
             "backdoor.econml.orf.DMLOrthoForest": EstimatorConfig(
+                propensity_model_name="model_T",
                 init_params={
-                    "model_T": propensity_model,
                     "model_Y": linear_model.Ridge(alpha=0.01),  # WeightedLasso(alpha=0.01),  #
                     "discrete_treatment": True,
                     "n_jobs": self.n_jobs,
@@ -518,9 +509,7 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             ),
             "iv.econml.iv.dr.LinearDRIV": EstimatorConfig(
                 outcome_model_name="model_y_xw",
-                init_params={
-                    "model_t_xw": propensity_model,
-                },
+                propensity_model_name="model_t_xw",
                 search_space={
                     "projection": tune.choice([0, 1]),
                 },
@@ -528,9 +517,7 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             ),
             "iv.econml.iv.dml.OrthoIV": EstimatorConfig(
                 outcome_model_name="model_y_xw",
-                init_params={
-                    "model_t_xw": propensity_model,
-                },
+                propensity_model_name="model_t_xw",
                 search_space={
                     "mc_agg": tune.choice(["mean", "median"]),
                 },
@@ -540,9 +527,7 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             ),
             "iv.econml.iv.dml.DMLIV": EstimatorConfig(
                 outcome_model_name="model_y_xw",
-                init_params={
-                    "model_t_xw": propensity_model,
-                },
+                propensity_model_name="model_t_xw",
                 search_space={
                     "mc_agg": tune.choice(["mean", "median"]),
                 },
@@ -552,9 +537,7 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             ),
             "iv.econml.iv.dr.SparseLinearDRIV": EstimatorConfig(
                 outcome_model_name="model_y_xw",
-                init_params={
-                    "model_t_xw": propensity_model,
-                },
+                propensity_model_name="model_t_xw",
                 search_space={
                     "projection": tune.choice([0, 1]),
                     "opt_reweighted": tune.choice([0, 1]),
@@ -568,7 +551,6 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             ),
             "iv.econml.iv.dr.LinearIntentToTreatDRIV": EstimatorConfig(
                 outcome_model_name="model_y_xw",
-                init_params={},
                 search_space={
                     "cov_clip": tune.quniform(0.08, 0.2, 0.01),
                     "opt_reweighted": tune.choice([0, 1]),
diff --git a/tests/causaltune/test_endtoend.py b/tests/causaltune/test_endtoend.py
index 08da3c57..862e82c3 100644
--- a/tests/causaltune/test_endtoend.py
+++ b/tests/causaltune/test_endtoend.py
@@ -41,8 +41,6 @@ def test_endtoend_cate(self):
         data.preprocess_dataset()
 
         cfg = SimpleParamService(
-            propensity_model=None,
-            outcome_model=None,
             n_jobs=-1,
             include_experimental=False,
             multivalue=False,

From c7b3e9a098b5d6fa582fa6da3552cd0fd4eda2e4 Mon Sep 17 00:00:00 2001
From: Egor Kraev <egor.kraev@gmail.com>
Date: Sun, 25 Aug 2024 19:20:10 +0200
Subject: [PATCH 04/11] First rough cut of simulateneous estimator and
 outcome_function search.

---
 causaltune/optimiser.py                       |  28 +++--
 causaltune/search/component.py                |  26 +++--
 causaltune/search/params.py                   |  24 +++--
 tests/causaltune/test_custom_outcome_model.py |   4 -
 tests/causaltune/test_endtoend.py             |   2 -
 .../test_endtoend_automl_propensity.py        |   2 -
 tests/causaltune/test_endtoend_flat_search.py | 100 ++++++++++++++++++
 tests/causaltune/test_endtoend_inference.py   |   4 -
 tests/causaltune/test_estimator_list.py       |  10 +-
 .../test_sklearn_propensity_model.py          |   4 -
 10 files changed, 161 insertions(+), 43 deletions(-)
 create mode 100644 tests/causaltune/test_endtoend_flat_search.py

diff --git a/causaltune/optimiser.py b/causaltune/optimiser.py
index d03bdb5e..7a60ba0b 100644
--- a/causaltune/optimiser.py
+++ b/causaltune/optimiser.py
@@ -94,7 +94,7 @@ def __init__(
         test_size=None,
         num_samples=-1,
         propensity_model="dummy",
-        outcome_model=None,
+        outcome_model="nested",
         components_task="regression",
         components_verbose=0,
         components_pred_time_limit=10 / 1e6,
@@ -238,17 +238,24 @@ def init_propensity_model(self, propensity_model: str):
             )
 
     def init_outcome_model(self, outcome_model):
+        # TODO: implement filtering like below, when there are propensity-only features
+        # feature_filter below acts on classes not instances
+        # to preserve all the extra methods through inheritance
         # if we are only supplying certain features to the propensity function,
         # make them invisible to the outcome component model
         # This is a workaround for the DoWhy/EconML data model which doesn't
         # support that out of the box
-        if outcome_model is not None:
-            # TODO: implement filtering like below, when there are propensity-only features
-            # feature_filter below acts on classes not instances
-            # to preserve all the extra methods through inheritance
+
+        if hasattr(outcome_model, "fit") and hasattr(outcome_model, "predict"):
             return outcome_model
-        else:
+        elif outcome_model == "auto":
+            # Will be dynamically chosen at optimization time
+            return outcome_model
+        elif outcome_model == "nested":
+            # The current default behavior
             return self.auto_outcome_model()
+        else:
+            raise ValueError('outcome_model valid values are None, "auto", or an estimator object')
 
     def auto_outcome_model(self):
         data = self.data
@@ -354,9 +361,6 @@ def fit(
 
         self.init_propensity_model(self._settings["propensity_model"])
 
-        # Is that state needed at all?
-        # self.outcome_model = self.init_outcome_model(self._settings["outcome_model"])
-
         self.identified_estimand: IdentifiedEstimand = self.causal_model.identify_effect(
             proceed_when_unidentifiable=True
         )
@@ -401,6 +405,7 @@ def fit(
             n_jobs=self._settings["component_models"]["n_jobs"],
             include_experimental=self._settings["include_experimental_estimators"],
             multivalue=treatment_is_multivalue(self._treatment_values),
+            sample_outcome_estimators=self._settings["outcome_model"] == "auto",
         )
 
         self.estimator_list = self.cfg.estimator_names_from_patterns(
@@ -618,7 +623,10 @@ def score_dataset(self, df: pd.DataFrame, dataset_name: str):
             None.
         """
         for scr in self.scores.values():
-            scr["scores"][dataset_name] = self._compute_metrics(scr["estimator"], df)
+            if scr["estimator"] is None:
+                warnings.warn("Skipping scoring for estimator %s", scr["estimator_name"])
+            else:
+                scr["scores"][dataset_name] = self._compute_metrics(scr["estimator"], df)
 
     @property
     def best_estimator(self) -> str:
diff --git a/causaltune/search/component.py b/causaltune/search/component.py
index c4f367b0..ccc537c4 100644
--- a/causaltune/search/component.py
+++ b/causaltune/search/component.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import Tuple
 import copy
 
@@ -67,18 +68,31 @@ def joint_config(data_size: Tuple[int, int], estimator_list=None):
         except ImportError as e:
             print(f"Error instantiating {name}: {e}")
 
-    return {"estimator": tune.choice(joint_cfg)}, joint_init_params, joint_low_cost_init_params
+    return tune.choice(joint_cfg), joint_init_params, joint_low_cost_init_params
 
 
 def model_from_cfg(cfg: dict):
     cfg = copy.deepcopy(cfg)
-    model_name = cfg["estimator"].pop("estimator_name")
-    out = estimators[model_name](task=task_factory("regression"), **cfg["estimator"])
+    model_name = cfg.pop("estimator_name")
+    estimator_class = estimators[model_name]
+
+    # Some Econml estimators pass a weights vector as an unnamed third argument,
+    # which is not supported by flaml. We need to wrap the estimator to ignore
+    # TODO: expose better estimator wrappers that support weights
+    class FlamlEstimatorWrapper(estimator_class):
+        wrapped_class = estimator_class
+
+        def fit(self, X, y, *args, **kwargs):
+            if len(kwargs):
+                warnings.warn(f"Extra args {args} {kwargs} are being ignored")
+            return self.wrapped_class.fit(self, X, y)
+
+    out = FlamlEstimatorWrapper(task=task_factory("regression"), **cfg)
     return out
 
 
 def config2score(cfg: dict, X, y):
-    model = model_from_cfg(cfg)
+    model = model_from_cfg(cfg["estimator"])
     model.fit(X, y)
     ypred = model.predict(X)
     err = y - ypred
@@ -117,10 +131,10 @@ def make_fake_data():
     X, y = make_fake_data()
     cfg, init_params, low_cost_init_params = joint_config(data_size=X.shape)
     flaml.tune.run(
-        evaluation_function=lambda cfg: config2score(cfg, X, y),
+        evaluation_function=lambda cfgs: config2score(cfgs, X, y),
         metric="score",
         mode="min",
-        config=cfg,
+        config={"estimator": cfg},
         points_to_evaluate=init_params,
         num_samples=10,
     )
diff --git a/causaltune/search/params.py b/causaltune/search/params.py
index 2154a13a..a4fbbb8e 100644
--- a/causaltune/search/params.py
+++ b/causaltune/search/params.py
@@ -8,6 +8,7 @@
 from sklearn import linear_model
 
 from causaltune.utils import clean_config
+from causaltune.search.component import model_from_cfg, joint_config
 
 
 @dataclass
@@ -31,11 +32,13 @@ def __init__(
         n_bootstrap_samples: Optional[int] = None,
         n_jobs: Optional[int] = None,
         include_experimental=False,
+        sample_outcome_estimators: bool = False,
     ):
         self.n_jobs = n_jobs
         self.include_experimental = include_experimental
         self.n_bootstrap_samples = n_bootstrap_samples
         self.multivalue = multivalue
+        self.sample_outcome_estimators = sample_outcome_estimators
 
     def estimator_names_from_patterns(
         self,
@@ -128,7 +131,9 @@ def estimator_names(self):
         else:
             return [est for est, cfg in cfgs.items() if not cfg.experimental]
 
-    def search_space(self, estimator_list: Iterable[str]):
+    def search_space(
+        self, estimator_list: Iterable[str], outcome_estimator_list: Iterable[str] = None
+    ):
         """Constructs search space with estimators and their respective configs
 
         Args:
@@ -146,7 +151,11 @@ def search_space(self, estimator_list: Iterable[str]):
             if est in estimator_list
         ]
 
-        return {"estimator": tune.choice(search_space)}
+        out = {"estimator": tune.choice(search_space)}
+        if self.sample_outcome_estimators:
+            out["outcome_estimator"], _, _ = joint_config((10000, 10), outcome_estimator_list)
+
+        return out
 
     def default_configs(self, estimator_list: Iterable[str]):
         """Creates list with initial configs to try before moving
@@ -179,12 +188,15 @@ def method_params(
         propensity_model: Any,
         final_model: Any = None,
     ):
-        config = clean_config(deepcopy(config["estimator"]))
-        estimator_name = config.pop("estimator_name")
+        est_config = clean_config(deepcopy(config["estimator"]))
+        estimator_name = est_config.pop("estimator_name")
+
+        if outcome_model == "auto":
+            # Spawn the outcome model dynamically
+            outcome_model = model_from_cfg(config["outcome_estimator"])
 
         cfg = self._configs()[estimator_name]
 
-        # Insert the outcome model dynamically (prelude to spawning it dynamically)
         if cfg.outcome_model_name is not None and cfg.outcome_model_name not in cfg.init_params:
             cfg.init_params[cfg.outcome_model_name] = deepcopy(outcome_model)
 
@@ -200,7 +212,7 @@ def method_params(
             )
 
         method_params = {
-            "init_params": {**deepcopy(config), **cfg.init_params},
+            "init_params": {**deepcopy(est_config), **cfg.init_params},
             "fit_params": {},
         }
         return method_params
diff --git a/tests/causaltune/test_custom_outcome_model.py b/tests/causaltune/test_custom_outcome_model.py
index ae2ea651..8a9e7aab 100644
--- a/tests/causaltune/test_custom_outcome_model.py
+++ b/tests/causaltune/test_custom_outcome_model.py
@@ -20,8 +20,6 @@ def test_custom_outcome_model(self):
         data.preprocess_dataset()
 
         cfg = SimpleParamService(
-            propensity_model=None,
-            outcome_model=None,
             n_jobs=-1,
             include_experimental=False,
             multivalue=False,
@@ -56,8 +54,6 @@ def test_custom_outcome_model(self):
     def test_custom_outcome_model_multivalue(self):
         data = linear_multi_dataset(10000)
         cfg = SimpleParamService(
-            propensity_model=None,
-            outcome_model=None,
             n_jobs=-1,
             include_experimental=False,
             multivalue=True,
diff --git a/tests/causaltune/test_endtoend.py b/tests/causaltune/test_endtoend.py
index 862e82c3..90f41b3c 100644
--- a/tests/causaltune/test_endtoend.py
+++ b/tests/causaltune/test_endtoend.py
@@ -75,8 +75,6 @@ def test_endtoend_cate(self):
     def test_endtoend_multivalue(self):
         data = linear_multi_dataset(5000)
         cfg = SimpleParamService(
-            propensity_model=None,
-            outcome_model=None,
             n_jobs=-1,
             include_experimental=False,
             multivalue=True,
diff --git a/tests/causaltune/test_endtoend_automl_propensity.py b/tests/causaltune/test_endtoend_automl_propensity.py
index baafcb82..6e23c01d 100644
--- a/tests/causaltune/test_endtoend_automl_propensity.py
+++ b/tests/causaltune/test_endtoend_automl_propensity.py
@@ -53,8 +53,6 @@ def test_endtoend_multivalue_propensity(self):
         data = linear_multi_dataset(10000)
 
         cfg = SimpleParamService(
-            propensity_model=None,
-            outcome_model=None,
             n_jobs=-1,
             include_experimental=False,
             multivalue=True,
diff --git a/tests/causaltune/test_endtoend_flat_search.py b/tests/causaltune/test_endtoend_flat_search.py
new file mode 100644
index 00000000..daca9c1a
--- /dev/null
+++ b/tests/causaltune/test_endtoend_flat_search.py
@@ -0,0 +1,100 @@
+import warnings
+
+
+from causaltune import CausalTune
+from causaltune.datasets import generate_non_random_dataset, linear_multi_dataset
+from causaltune.search.params import SimpleParamService
+
+warnings.filterwarnings("ignore")  # suppress sklearn deprecation warnings for now..
+
+
+class TestEndToEnd(object):
+    """tests causaltune model end-to-end
+    1/ import causaltune object
+    2/ preprocess data
+    3/ init causaltune object
+    4/ run causaltune on data
+    """
+
+    def test_imports(self):
+        """tests if CausalTune can be imported"""
+
+        from causaltune import CausalTune  # noqa F401
+
+    def test_data_preprocessing(self):
+        """tests data preprocessing routines"""
+        data = generate_non_random_dataset()  # noqa F484
+
+    def test_init_causaltune(self):
+        """tests if causaltune object can be instantiated without errors"""
+
+        from causaltune import CausalTune  # noqa F401
+
+        ct = CausalTune(time_budget=0)  # noqa F484
+
+    def test_endtoend_cate(self):
+        """tests if CATE model can be instantiated and fit to data"""
+
+        from causaltune.shap import shap_values  # noqa F401
+
+        data = generate_non_random_dataset()
+        data.preprocess_dataset()
+
+        cfg = SimpleParamService(
+            n_jobs=-1,
+            include_experimental=False,
+            multivalue=False,
+        )
+        estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", 1)
+        # outcome = targets[0]
+        ct = CausalTune(
+            num_samples=len(estimator_list),
+            components_time_budget=10,
+            estimator_list=estimator_list,  # "all",  #
+            outcome_model="auto",
+            use_ray=False,
+            verbose=3,
+            components_verbose=2,
+            resources_per_trial={"cpu": 0.5},
+        )
+
+        ct.fit(data)
+        # ct.fit(data, resume=True)
+        ct.effect(data.data)
+        ct.score_dataset(data.data, "test")
+
+        # now let's test Shapley ct calculation
+        for est_name, scores in ct.scores.items():
+            # Dummy model doesn't support Shapley values
+            # Orthoforest shapley calc is VERY slow
+            if "Dummy" not in est_name and "Ortho" not in est_name:
+                print("Calculating Shapley values for", est_name)
+                shap_values(scores["estimator"], data.data[:10])
+
+        print(f"Best estimator: {ct.best_estimator}")
+
+    def test_endtoend_multivalue(self):
+        data = linear_multi_dataset(5000)
+        cfg = SimpleParamService(
+            n_jobs=-1,
+            include_experimental=False,
+            multivalue=True,
+        )
+        estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data))
+
+        ct = CausalTune(
+            estimator_list="all",
+            num_samples=len(estimator_list),
+            components_time_budget=10,
+        )
+        ct.fit(data)
+        # ct.fit(data, resume=True)
+
+        # TODO add an effect() call and an effect_tt call
+        print("yay!")
+
+
+if __name__ == "__main__":
+    TestEndToEnd().test_endtoend_cate()
+    # pytest.main([__file__])
+    # TestEndToEnd().test_endtoend_iv()
diff --git a/tests/causaltune/test_endtoend_inference.py b/tests/causaltune/test_endtoend_inference.py
index aa9bc954..ff142fb6 100644
--- a/tests/causaltune/test_endtoend_inference.py
+++ b/tests/causaltune/test_endtoend_inference.py
@@ -21,8 +21,6 @@ def test_endtoend_inference_nobootstrap(self):
         data.preprocess_dataset()
 
         cfg = SimpleParamService(
-            propensity_model=None,
-            outcome_model=None,
             n_jobs=-1,
             include_experimental=False,
             multivalue=False,
@@ -71,8 +69,6 @@ def test_endtoend_inference_bootstrap(self):
     def test_endtoend_multivalue_nobootstrap(self):
         data = linear_multi_dataset(1000)
         cfg = SimpleParamService(
-            propensity_model=None,
-            outcome_model=None,
             n_jobs=-1,
             include_experimental=False,
             multivalue=True,
diff --git a/tests/causaltune/test_estimator_list.py b/tests/causaltune/test_estimator_list.py
index 55911951..5193f2f8 100644
--- a/tests/causaltune/test_estimator_list.py
+++ b/tests/causaltune/test_estimator_list.py
@@ -10,7 +10,7 @@ class TestEstimatorListGenerator:
 
     def test_auto_list(self):
         """tests if "auto" setting yields all available estimators"""
-        cfg = SimpleParamService(propensity_model=None, outcome_model=None, multivalue=False)
+        cfg = SimpleParamService(multivalue=False)
         auto_estimators_iv = cfg.estimator_names_from_patterns("iv", "auto")
         auto_estimators_backdoor = cfg.estimator_names_from_patterns("backdoor", "auto")
         # verify that returned estimator list includes all available estimators
@@ -38,7 +38,7 @@ def test_auto_list(self):
 
     def test_substring_group(self):
         """tests if substring match to group of estimators works"""
-        cfg = SimpleParamService(propensity_model=None, outcome_model=None, multivalue=False)
+        cfg = SimpleParamService(multivalue=False)
 
         estimator_list = cfg.estimator_names_from_patterns("backdoor", ["dml"])
         available_estimators = [e for e in cfg._configs().keys() if "dml" in e]
@@ -53,13 +53,13 @@ def test_substring_group(self):
 
     def test_substring_single(self):
         """tests if substring match to single estimators works"""
-        cfg = SimpleParamService(propensity_model=None, outcome_model=None, multivalue=False)
+        cfg = SimpleParamService(multivalue=False)
         estimator_list = cfg.estimator_names_from_patterns("backdoor", ["DomainAdaptationLearner"])
         assert estimator_list == ["backdoor.econml.metalearners.DomainAdaptationLearner"]
 
     def test_checkduplicates(self):
         """tests if duplicates are removed"""
-        cfg = SimpleParamService(propensity_model=None, outcome_model=None, multivalue=False)
+        cfg = SimpleParamService(multivalue=False)
         estimator_list = cfg.estimator_names_from_patterns(
             "backdoor",
             [
@@ -75,7 +75,7 @@ def test_invalid_choice(self):
         # this should raise a ValueError
         # unavailable estimators:
 
-        cfg = SimpleParamService(propensity_model=None, outcome_model=None, multivalue=False)
+        cfg = SimpleParamService(multivalue=False)
 
         with pytest.raises(ValueError):
             cfg.estimator_names_from_patterns("backdoor", ["linear_regression", "pasta", 12])
diff --git a/tests/causaltune/test_sklearn_propensity_model.py b/tests/causaltune/test_sklearn_propensity_model.py
index 942a7746..4ca96449 100644
--- a/tests/causaltune/test_sklearn_propensity_model.py
+++ b/tests/causaltune/test_sklearn_propensity_model.py
@@ -24,8 +24,6 @@ def test_sklearn_propensity_model(self):
         data.preprocess_dataset()
 
         cfg = SimpleParamService(
-            propensity_model=None,
-            outcome_model=None,
             n_jobs=-1,
             include_experimental=False,
             multivalue=False,
@@ -60,8 +58,6 @@ def test_sklearn_propensity_model(self):
     def test_sklearn_propensity_model_multivalue(self):
         data = linear_multi_dataset(5000)
         cfg = SimpleParamService(
-            propensity_model=None,
-            outcome_model=None,
             n_jobs=-1,
             include_experimental=False,
             multivalue=True,

From 442e535ad0247f068a2ea0a267c8e24c14883965 Mon Sep 17 00:00:00 2001
From: Egor Kraev <egor.kraev@gmail.com>
Date: Sun, 25 Aug 2024 19:31:22 +0200
Subject: [PATCH 05/11] Linter fix

---
 causaltune/optimiser.py     | 4 +---
 causaltune/search/params.py | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/causaltune/optimiser.py b/causaltune/optimiser.py
index 7a60ba0b..1e30a5c4 100644
--- a/causaltune/optimiser.py
+++ b/causaltune/optimiser.py
@@ -1,5 +1,4 @@
 import copy
-from copy import deepcopy
 import warnings
 from typing import List, Optional, Union
 from collections import defaultdict
@@ -21,8 +20,7 @@
 
 from causaltune.search.params import SimpleParamService
 from causaltune.scoring import Scorer
-from causaltune.r_score import RScoreWrapper
-from causaltune.utils import clean_config, treatment_is_multivalue
+from causaltune.utils import treatment_is_multivalue
 from causaltune.models.monkey_patches import (
     AutoML,
     apply_multitreatment,
diff --git a/causaltune/search/params.py b/causaltune/search/params.py
index a4fbbb8e..956bec43 100644
--- a/causaltune/search/params.py
+++ b/causaltune/search/params.py
@@ -1,5 +1,5 @@
 from flaml import tune
-from copy import deepcopy, copy
+from copy import deepcopy
 from typing import Optional, Sequence, Union, Iterable, Dict, Any
 from dataclasses import dataclass, field
 

From b46b392db7cccf38570d53ba861d52a0a8b2681c Mon Sep 17 00:00:00 2001
From: Egor Kraev <egor.kraev@gmail.com>
Date: Mon, 26 Aug 2024 11:00:28 +0200
Subject: [PATCH 06/11] Better init values generation for flat search

---
 causaltune/optimiser.py                       |  4 +-
 causaltune/search/component.py                |  2 +-
 causaltune/search/params.py                   | 51 +++++++++++++++----
 tests/causaltune/test_endtoend_flat_search.py |  2 +-
 4 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/causaltune/optimiser.py b/causaltune/optimiser.py
index 1e30a5c4..4cc6d485 100644
--- a/causaltune/optimiser.py
+++ b/causaltune/optimiser.py
@@ -464,9 +464,9 @@ def fit(
         #     )
         # )
 
-        search_space = self.cfg.search_space(self.estimator_list)
+        search_space = self.cfg.search_space(self.estimator_list, data_size=data.data.shape)
         init_cfg = (
-            self.cfg.default_configs(self.estimator_list)
+            self.cfg.default_configs(self.estimator_list, data_size=data.data.shape)
             if self._settings["try_init_configs"]
             else []
         )
diff --git a/causaltune/search/component.py b/causaltune/search/component.py
index ccc537c4..607d1332 100644
--- a/causaltune/search/component.py
+++ b/causaltune/search/component.py
@@ -63,7 +63,7 @@ def joint_config(data_size: Tuple[int, int], estimator_list=None):
             cfg["estimator_name"] = name
             joint_cfg.append(cfg)
             init_params["estimator_name"] = name
-            joint_init_params.append({"estimator": init_params})
+            joint_init_params.append(init_params)
             joint_low_cost_init_params[name] = low_cost_init_params
         except ImportError as e:
             print(f"Error instantiating {name}: {e}")
diff --git a/causaltune/search/params.py b/causaltune/search/params.py
index 956bec43..49a578e8 100644
--- a/causaltune/search/params.py
+++ b/causaltune/search/params.py
@@ -1,6 +1,7 @@
+import numpy as np
 from flaml import tune
 from copy import deepcopy
-from typing import Optional, Sequence, Union, Iterable, Dict, Any
+from typing import Optional, Sequence, Union, Iterable, Dict, Any, Tuple
 from dataclasses import dataclass, field
 
 import warnings
@@ -109,7 +110,7 @@ def problem_match(est_name: str, problem: str) -> bool:
                     assert isinstance(p, str)
             except Exception:
                 raise ValueError(
-                    "Invalid estimator list, must be 'auto', 'all', or a list of strings"
+                    "Invalid estimator list, must be 'auto', 'all', 'cheap_inference' or a list of strings"
                 )
 
             out = [
@@ -132,7 +133,10 @@ def estimator_names(self):
             return [est for est, cfg in cfgs.items() if not cfg.experimental]
 
     def search_space(
-        self, estimator_list: Iterable[str], outcome_estimator_list: Iterable[str] = None
+        self,
+        estimator_list: Iterable[str],
+        data_size: Tuple[int, int],
+        outcome_estimator_list: Iterable[str] = None,
     ):
         """Constructs search space with estimators and their respective configs
 
@@ -153,11 +157,17 @@ def search_space(
 
         out = {"estimator": tune.choice(search_space)}
         if self.sample_outcome_estimators:
-            out["outcome_estimator"], _, _ = joint_config((10000, 10), outcome_estimator_list)
+            out["outcome_estimator"], _, _ = joint_config(data_size, outcome_estimator_list)
 
         return out
 
-    def default_configs(self, estimator_list: Iterable[str]):
+    def default_configs(
+        self,
+        estimator_list: Iterable[str],
+        data_size: Tuple[int, int],
+        outcome_estimator_list: Iterable[str] = None,
+        num_outcome_samples: int = 3,
+    ):
         """Creates list with initial configs to try before moving
         on to hierarchical HPO.
         The list has been identified by evaluating performance of all
@@ -172,13 +182,34 @@ def default_configs(self, estimator_list: Iterable[str]):
         Returns:
             list: list of dicts with promising initial configs
         """
-        points = [
+        pre_points = [
             {"estimator": {"estimator_name": est, **est_params.defaults}}
             for est, est_params in self._configs().items()
             if est in estimator_list
         ]
 
-        print("Initial configs:", points)
+        cfgs = self._configs()
+
+        if self.sample_outcome_estimators:
+            points = []
+            _, init_params, _ = joint_config(data_size, outcome_estimator_list)
+            for p in pre_points:
+                if cfgs[p["estimator"]["estimator_name"]].outcome_model_name is None:
+                    this_p = deepcopy(p)
+                    # this won't have any effect, so pick any valid config to mitigate sampling bias
+                    this_p["outcome_estimator"] = np.random.choice(init_params)
+                    points.append(p)
+                    continue
+                else:  # Sample different outcome functions for first pass
+                    for outcome_est in np.random.choice(
+                        init_params, size=num_outcome_samples, replace=False
+                    ):
+                        this_p = deepcopy(p)
+                        this_p["outcome_estimator"] = outcome_est
+                        points.append(this_p)
+        else:
+            points = pre_points
+
         return points
 
     def method_params(
@@ -191,12 +222,12 @@ def method_params(
         est_config = clean_config(deepcopy(config["estimator"]))
         estimator_name = est_config.pop("estimator_name")
 
-        if outcome_model == "auto":
+        cfg = self._configs()[estimator_name]
+
+        if outcome_model == "auto" and cfg.outcome_model_name is not None:
             # Spawn the outcome model dynamically
             outcome_model = model_from_cfg(config["outcome_estimator"])
 
-        cfg = self._configs()[estimator_name]
-
         if cfg.outcome_model_name is not None and cfg.outcome_model_name not in cfg.init_params:
             cfg.init_params[cfg.outcome_model_name] = deepcopy(outcome_model)
 
diff --git a/tests/causaltune/test_endtoend_flat_search.py b/tests/causaltune/test_endtoend_flat_search.py
index daca9c1a..677d5c2c 100644
--- a/tests/causaltune/test_endtoend_flat_search.py
+++ b/tests/causaltune/test_endtoend_flat_search.py
@@ -48,8 +48,8 @@ def test_endtoend_cate(self):
         estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", 1)
         # outcome = targets[0]
         ct = CausalTune(
-            num_samples=len(estimator_list),
             components_time_budget=10,
+            num_samples=len(estimator_list) * 4,
             estimator_list=estimator_list,  # "all",  #
             outcome_model="auto",
             use_ray=False,

From c8e9e75a8e997555566ac7fa61e5dfb0583f8396 Mon Sep 17 00:00:00 2001
From: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com>
Date: Tue, 27 Aug 2024 12:09:38 +0100
Subject: [PATCH 07/11] Update setup.py

Signed-off-by: AlxdrPolyakov <122611538+AlxdrPolyakov@users.noreply.github.com>
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 08299e37..0ad67cc4 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
     install_requires=[
         "dowhy==0.9.1",
         "econml==0.14.1",
-        "FLAML==1.0.14",
+        "FLAML==2.2.0",
         "xgboost==1.7.6",
         "numpy==1.23.5",
         "pandas",

From fd600d71bf035ecd3e7c1625b63b75cb32478f5d Mon Sep 17 00:00:00 2001
From: Alexander Polyakov <alexander.polyakov@transferwise.com>
Date: Tue, 27 Aug 2024 13:07:48 +0100
Subject: [PATCH 08/11] fixed formatting

---
 causaltune/dataset_processor.py               |  5 +-
 causaltune/optimiser.py                       | 78 ++++++++++++++-----
 causaltune/search/params.py                   | 26 +++++--
 tests/causaltune/test_custom_outcome_model.py |  4 +-
 tests/causaltune/test_endtoend.py             |  4 +-
 .../test_endtoend_automl_propensity.py        |  4 +-
 tests/causaltune/test_endtoend_flat_search.py |  4 +-
 tests/causaltune/test_estimator_list.py       | 16 +++-
 .../test_sklearn_propensity_model.py          |  4 +-
 9 files changed, 109 insertions(+), 36 deletions(-)

diff --git a/causaltune/dataset_processor.py b/causaltune/dataset_processor.py
index 08b0b31c..11ef5313 100644
--- a/causaltune/dataset_processor.py
+++ b/causaltune/dataset_processor.py
@@ -14,7 +14,10 @@ def __init__(self):
         self.encoder = None
 
     def fit(
-        self, cd: CausalityDataset, encoder_type: Optional[str] = "onehot", outcome: str = None
+        self,
+        cd: CausalityDataset,
+        encoder_type: Optional[str] = "onehot",
+        outcome: str = None,
     ):
         cd = copy.deepcopy(cd)
         self.preprocess_dataset(
diff --git a/causaltune/optimiser.py b/causaltune/optimiser.py
index 4cc6d485..a0f19b75 100644
--- a/causaltune/optimiser.py
+++ b/causaltune/optimiser.py
@@ -32,7 +32,9 @@
 
 
 # Patched from sklearn.linear_model._base to adjust rtol and atol values
-def _check_precomputed_gram_matrix(X, precompute, X_offset, X_scale, rtol=1e-4, atol=1e-2):
+def _check_precomputed_gram_matrix(
+    X, precompute, X_offset, X_scale, rtol=1e-4, atol=1e-2
+):
     n_features = X.shape[1]
     f1 = n_features // 2
     f2 = min(f1 + 1, n_features - 1)
@@ -177,13 +179,17 @@ def __init__(
             resources_per_trial if resources_per_trial is not None else {"cpu": 0.5}
         )
         self._settings["try_init_configs"] = try_init_configs
-        self._settings["include_experimental_estimators"] = include_experimental_estimators
+        self._settings[
+            "include_experimental_estimators"
+        ] = include_experimental_estimators
 
         # params for FLAML on component models:
         self._settings["component_models"] = {}
         self._settings["component_models"]["task"] = components_task
         self._settings["component_models"]["verbose"] = components_verbose
-        self._settings["component_models"]["pred_time_limit"] = components_pred_time_limit
+        self._settings["component_models"][
+            "pred_time_limit"
+        ] = components_pred_time_limit
         self._settings["component_models"]["n_jobs"] = components_njobs
         self._settings["component_models"]["time_budget"] = components_time_budget
         self._settings["component_models"]["eval_method"] = "holdout"
@@ -228,7 +234,9 @@ def init_propensity_model(self, propensity_model: str):
             self.propensity_model = AutoML(
                 **{**self._settings["component_models"], "task": "classification"}
             )
-        elif hasattr(propensity_model, "fit") and hasattr(propensity_model, "predict_proba"):
+        elif hasattr(propensity_model, "fit") and hasattr(
+            propensity_model, "predict_proba"
+        ):
             self.propensity_model = propensity_model
         else:
             raise ValueError(
@@ -253,7 +261,9 @@ def init_outcome_model(self, outcome_model):
             # The current default behavior
             return self.auto_outcome_model()
         else:
-            raise ValueError('outcome_model valid values are None, "auto", or an estimator object')
+            raise ValueError(
+                'outcome_model valid values are None, "auto", or an estimator object'
+            )
 
     def auto_outcome_model(self):
         data = self.data
@@ -329,7 +339,9 @@ def fit(
         if preprocess:
             data = copy.deepcopy(data)
             self.dataset_processor = CausalityDatasetProcessor()
-            self.dataset_processor.fit(data, encoder_type=encoder_type, outcome=encoder_outcome)
+            self.dataset_processor.fit(
+                data, encoder_type=encoder_type, outcome=encoder_outcome
+            )
             data = self.dataset_processor.transform(data)
         else:
             self.dataset_processor = None
@@ -337,7 +349,9 @@ def fit(
         self.data = data
         treatment_values = data.treatment_values
 
-        assert len(treatment_values) > 1, "Treatment must take at least 2 values, eg 0 and 1!"
+        assert (
+            len(treatment_values) > 1
+        ), "Treatment must take at least 2 values, eg 0 and 1!"
 
         self._control_value = treatment_values[0]
         self._treatment_values = list(treatment_values[1:])
@@ -359,8 +373,8 @@ def fit(
 
         self.init_propensity_model(self._settings["propensity_model"])
 
-        self.identified_estimand: IdentifiedEstimand = self.causal_model.identify_effect(
-            proceed_when_unidentifiable=True
+        self.identified_estimand: IdentifiedEstimand = (
+            self.causal_model.identify_effect(proceed_when_unidentifiable=True)
         )
 
         if bool(self.identified_estimand.estimands["iv"]) and bool(data.instruments):
@@ -431,7 +445,9 @@ def fit(
             and self._settings["tuner"]["num_samples"] == -1
         ):
             self._settings["tuner"]["time_budget_s"] = (
-                2.5 * len(self.estimator_list) * self._settings["component_models"]["time_budget"]
+                2.5
+                * len(self.estimator_list)
+                * self._settings["component_models"]["time_budget"]
             )
 
         cmtb = self._settings["component_models"]["time_budget"]
@@ -464,7 +480,9 @@ def fit(
         #     )
         # )
 
-        search_space = self.cfg.search_space(self.estimator_list, data_size=data.data.shape)
+        search_space = self.cfg.search_space(
+            self.estimator_list, data_size=data.data.shape
+        )
         init_cfg = (
             self.cfg.default_configs(self.estimator_list, data_size=data.data.shape)
             if self._settings["try_init_configs"]
@@ -484,9 +502,17 @@ def fit(
             self._tune_with_config,
             search_space,
             metric=self.metric,
-            points_to_evaluate=(init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg),
-            evaluated_rewards=([] if len(self.resume_scores) == 0 else self.resume_scores),
-            mode=("min" if self.metric in ["energy_distance", "psw_energy_distance"] else "max"),
+            points_to_evaluate=(
+                init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg
+            ),
+            evaluated_rewards=(
+                [] if len(self.resume_scores) == 0 else self.resume_scores
+            ),
+            mode=(
+                "min"
+                if self.metric in ["energy_distance", "psw_energy_distance"]
+                else "max"
+            ),
             low_cost_partial_config={},
             **self._settings["tuner"],
         )
@@ -572,7 +598,9 @@ def _estimate_effect(self, config):
         # Do we need an boject property for this, instead of a local var?
         self.estimator_name = config["estimator"]["estimator_name"]
         outcome_model = self.init_outcome_model(self._settings["outcome_model"])
-        method_params = self.cfg.method_params(config, outcome_model, self.propensity_model)
+        method_params = self.cfg.method_params(
+            config, outcome_model, self.propensity_model
+        )
 
         try:  #
             # if True:  #
@@ -607,7 +635,9 @@ def _estimate_effect(self, config):
             }
 
     def _compute_metrics(self, estimator, df: pd.DataFrame) -> dict:
-        return self.scorer.make_scores(estimator, df, self.metrics_to_report, r_scorer=None)
+        return self.scorer.make_scores(
+            estimator, df, self.metrics_to_report, r_scorer=None
+        )
 
     def score_dataset(self, df: pd.DataFrame, dataset_name: str):
         """
@@ -622,9 +652,13 @@ def score_dataset(self, df: pd.DataFrame, dataset_name: str):
         """
         for scr in self.scores.values():
             if scr["estimator"] is None:
-                warnings.warn("Skipping scoring for estimator %s", scr["estimator_name"])
+                warnings.warn(
+                    "Skipping scoring for estimator %s", scr["estimator_name"]
+                )
             else:
-                scr["scores"][dataset_name] = self._compute_metrics(scr["estimator"], df)
+                scr["scores"][dataset_name] = self._compute_metrics(
+                    scr["estimator"], df
+                )
 
     @property
     def best_estimator(self) -> str:
@@ -697,7 +731,9 @@ def effect(self, df, *args, **kwargs):
         """
         return self.model.effect(df, *args, **kwargs)
 
-    def predict(self, cd: CausalityDataset, preprocess: Optional[bool] = False, *args, **kwargs):
+    def predict(
+        self, cd: CausalityDataset, preprocess: Optional[bool] = False, *args, **kwargs
+    ):
         """Heterogeneous Treatment Effects for data CausalityDataset
 
         Args:
@@ -777,7 +813,9 @@ def effect_stderr(self, df, n_bootstrap_samples=5, n_jobs=1, *args, **kwargs):
                     n_bootstrap_samples=n_bootstrap_samples, n_jobs=n_jobs
                 )
 
-                best_cfg = {k: v for k, v in self.best_config.items() if k not in ["estimator"]}
+                best_cfg = {
+                    k: v for k, v in self.best_config.items() if k not in ["estimator"]
+                }
                 method_params = {
                     "init_params": {**best_cfg, **cfg.init_params},
                     "fit_params": {"inference": bootstrap},
diff --git a/causaltune/search/params.py b/causaltune/search/params.py
index 49a578e8..020506b0 100644
--- a/causaltune/search/params.py
+++ b/causaltune/search/params.py
@@ -157,7 +157,9 @@ def search_space(
 
         out = {"estimator": tune.choice(search_space)}
         if self.sample_outcome_estimators:
-            out["outcome_estimator"], _, _ = joint_config(data_size, outcome_estimator_list)
+            out["outcome_estimator"], _, _ = joint_config(
+                data_size, outcome_estimator_list
+            )
 
         return out
 
@@ -228,7 +230,10 @@ def method_params(
             # Spawn the outcome model dynamically
             outcome_model = model_from_cfg(config["outcome_estimator"])
 
-        if cfg.outcome_model_name is not None and cfg.outcome_model_name not in cfg.init_params:
+        if (
+            cfg.outcome_model_name is not None
+            and cfg.outcome_model_name not in cfg.init_params
+        ):
             cfg.init_params[cfg.outcome_model_name] = deepcopy(outcome_model)
 
         if (
@@ -237,9 +242,14 @@ def method_params(
         ):
             cfg.init_params[cfg.propensity_model_name] = deepcopy(propensity_model)
 
-        if cfg.final_model_name is not None and cfg.final_model_name not in cfg.init_params:
+        if (
+            cfg.final_model_name is not None
+            and cfg.final_model_name not in cfg.init_params
+        ):
             cfg.init_params[cfg.final_model_name] = (
-                deepcopy(final_model) if final_model is not None else deepcopy(outcome_model)
+                deepcopy(final_model)
+                if final_model is not None
+                else deepcopy(outcome_model)
             )
 
         method_params = {
@@ -492,7 +502,9 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             "backdoor.econml.orf.DROrthoForest": EstimatorConfig(
                 propensity_model_name="propensity_model",
                 init_params={
-                    "model_Y": linear_model.Ridge(alpha=0.01),  # WeightedLasso(alpha=0.01),  #
+                    "model_Y": linear_model.Ridge(
+                        alpha=0.01
+                    ),  # WeightedLasso(alpha=0.01),  #
                     "n_jobs": self.n_jobs,
                     # "max_depth": self.max_depth,
                     # "n_trees": self.n_estimators,
@@ -521,7 +533,9 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
             "backdoor.econml.orf.DMLOrthoForest": EstimatorConfig(
                 propensity_model_name="model_T",
                 init_params={
-                    "model_Y": linear_model.Ridge(alpha=0.01),  # WeightedLasso(alpha=0.01),  #
+                    "model_Y": linear_model.Ridge(
+                        alpha=0.01
+                    ),  # WeightedLasso(alpha=0.01),  #
                     "discrete_treatment": True,
                     "n_jobs": self.n_jobs,
                     # "max_depth": self.max_depth,
diff --git a/tests/causaltune/test_custom_outcome_model.py b/tests/causaltune/test_custom_outcome_model.py
index 8a9e7aab..898832e9 100644
--- a/tests/causaltune/test_custom_outcome_model.py
+++ b/tests/causaltune/test_custom_outcome_model.py
@@ -58,7 +58,9 @@ def test_custom_outcome_model_multivalue(self):
             include_experimental=False,
             multivalue=True,
         )
-        estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data))
+        estimator_list = cfg.estimator_names_from_patterns(
+            "backdoor", "all", data_rows=len(data)
+        )
 
         ct = CausalTune(
             outcome_model=LinearRegression(),
diff --git a/tests/causaltune/test_endtoend.py b/tests/causaltune/test_endtoend.py
index 90f41b3c..ccf2ebec 100644
--- a/tests/causaltune/test_endtoend.py
+++ b/tests/causaltune/test_endtoend.py
@@ -79,7 +79,9 @@ def test_endtoend_multivalue(self):
             include_experimental=False,
             multivalue=True,
         )
-        estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data))
+        estimator_list = cfg.estimator_names_from_patterns(
+            "backdoor", "all", data_rows=len(data)
+        )
 
         ct = CausalTune(
             estimator_list="all",
diff --git a/tests/causaltune/test_endtoend_automl_propensity.py b/tests/causaltune/test_endtoend_automl_propensity.py
index 6e23c01d..c6a6d4f1 100644
--- a/tests/causaltune/test_endtoend_automl_propensity.py
+++ b/tests/causaltune/test_endtoend_automl_propensity.py
@@ -58,7 +58,9 @@ def test_endtoend_multivalue_propensity(self):
             multivalue=True,
         )
 
-        estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data))
+        estimator_list = cfg.estimator_names_from_patterns(
+            "backdoor", "all", data_rows=len(data)
+        )
 
         ct = CausalTune(
             estimator_list="all",
diff --git a/tests/causaltune/test_endtoend_flat_search.py b/tests/causaltune/test_endtoend_flat_search.py
index 677d5c2c..7a20436d 100644
--- a/tests/causaltune/test_endtoend_flat_search.py
+++ b/tests/causaltune/test_endtoend_flat_search.py
@@ -80,7 +80,9 @@ def test_endtoend_multivalue(self):
             include_experimental=False,
             multivalue=True,
         )
-        estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data))
+        estimator_list = cfg.estimator_names_from_patterns(
+            "backdoor", "all", data_rows=len(data)
+        )
 
         ct = CausalTune(
             estimator_list="all",
diff --git a/tests/causaltune/test_estimator_list.py b/tests/causaltune/test_estimator_list.py
index 5193f2f8..2b28c726 100644
--- a/tests/causaltune/test_estimator_list.py
+++ b/tests/causaltune/test_estimator_list.py
@@ -54,8 +54,12 @@ def test_substring_group(self):
     def test_substring_single(self):
         """tests if substring match to single estimators works"""
         cfg = SimpleParamService(multivalue=False)
-        estimator_list = cfg.estimator_names_from_patterns("backdoor", ["DomainAdaptationLearner"])
-        assert estimator_list == ["backdoor.econml.metalearners.DomainAdaptationLearner"]
+        estimator_list = cfg.estimator_names_from_patterns(
+            "backdoor", ["DomainAdaptationLearner"]
+        )
+        assert estimator_list == [
+            "backdoor.econml.metalearners.DomainAdaptationLearner"
+        ]
 
     def test_checkduplicates(self):
         """tests if duplicates are removed"""
@@ -78,7 +82,9 @@ def test_invalid_choice(self):
         cfg = SimpleParamService(multivalue=False)
 
         with pytest.raises(ValueError):
-            cfg.estimator_names_from_patterns("backdoor", ["linear_regression", "pasta", 12])
+            cfg.estimator_names_from_patterns(
+                "backdoor", ["linear_regression", "pasta", 12]
+            )
 
         with pytest.raises(ValueError):
             cfg.estimator_names_from_patterns("backdoor", 5)
@@ -88,7 +94,9 @@ def test_invalid_choice_fitter(self):
             """tests if empty list is correctly handled"""
             ct = CausalTune(components_time_budget=10)
             ct.fit(
-                pd.DataFrame({"treatment": [0, 1], "outcome": [0.5, 1.5], "dummy": [0.1, 0.2]}),
+                pd.DataFrame(
+                    {"treatment": [0, 1], "outcome": [0.5, 1.5], "dummy": [0.1, 0.2]}
+                ),
                 treatment="treatment",
                 outcome="outcome",
                 common_causes=["dummy"],
diff --git a/tests/causaltune/test_sklearn_propensity_model.py b/tests/causaltune/test_sklearn_propensity_model.py
index 4ca96449..94ef157e 100644
--- a/tests/causaltune/test_sklearn_propensity_model.py
+++ b/tests/causaltune/test_sklearn_propensity_model.py
@@ -62,7 +62,9 @@ def test_sklearn_propensity_model_multivalue(self):
             include_experimental=False,
             multivalue=True,
         )
-        estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data))
+        estimator_list = cfg.estimator_names_from_patterns(
+            "backdoor", "all", data_rows=len(data)
+        )
 
         ct = CausalTune(
             propensity_model=LogisticRegression(),

From 1215d65a6d1394d429d27325f7454e48ea5d0ff8 Mon Sep 17 00:00:00 2001
From: Egor Kraev <egor.kraev@gmail.com>
Date: Thu, 29 Aug 2024 09:19:09 +0200
Subject: [PATCH 09/11] Fix stderr calculation

---
 causaltune/optimiser.py                     | 23 +++++++++++----------
 causaltune/search/component.py              |  2 ++
 causaltune/search/params.py                 |  4 ++++
 tests/causaltune/test_endtoend_inference.py |  5 +++--
 4 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/causaltune/optimiser.py b/causaltune/optimiser.py
index 4cc6d485..c16e08be 100644
--- a/causaltune/optimiser.py
+++ b/causaltune/optimiser.py
@@ -575,7 +575,7 @@ def _estimate_effect(self, config):
         method_params = self.cfg.method_params(config, outcome_model, self.propensity_model)
 
         try:  #
-            # if True:  #
+            # This calls the causal model's estimate_effect method
             estimate = self._est_effect_stub(method_params)
             scores = {
                 "estimator_name": self.estimator_name,
@@ -592,10 +592,10 @@ def _estimate_effect(self, config):
             return {
                 self.metric: scores["validation"][self.metric],
                 "estimator": estimate,
-                "estimator_name": scores.pop("estimator_name"),
+                "estimator_name": self.estimator_name,
                 "scores": scores,
                 # TODO: return full config!
-                "config": config["estimator"],
+                "config": config,
             }
         except Exception as e:
             print("Evaluation failed!\n", config, traceback.format_exc())
@@ -769,19 +769,20 @@ def effect_stderr(self, df, n_bootstrap_samples=5, n_jobs=1, *args, **kwargs):
         if "Econml" in str(type(self.model)):
             # Get a list of "Inference" objects from EconML, one per treatment
             self.model.__class__.effect_stderr = effect_stderr
-            cfg = self.cfg.method_params(self.best_estimator)
+            outcome_model = self.init_outcome_model(self._settings["outcome_model"])
+            method_params = self.cfg.method_params(
+                self.best_config, outcome_model, self.propensity_model
+            )
 
-            if cfg.inference == "bootstrap":
+            if self.cfg.full_config(self.best_estimator).inference == "bootstrap":
                 # TODO: before bootstrapping, check whether that's already been done
                 bootstrap = BootstrapInference(
                     n_bootstrap_samples=n_bootstrap_samples, n_jobs=n_jobs
                 )
-
-                best_cfg = {k: v for k, v in self.best_config.items() if k not in ["estimator"]}
-                method_params = {
-                    "init_params": {**best_cfg, **cfg.init_params},
-                    "fit_params": {"inference": bootstrap},
-                }
+                method_params["fit_params"]["inference"] = bootstrap
+                self.estimator_name = (
+                    self.best_estimator
+                )  # needed for _est_effect_stub, just in case
                 self.bootstrapped_estimate = self._est_effect_stub(method_params)
                 est = self.bootstrapped_estimate.estimator
             else:
diff --git a/causaltune/search/component.py b/causaltune/search/component.py
index 607d1332..4e51568a 100644
--- a/causaltune/search/component.py
+++ b/causaltune/search/component.py
@@ -7,6 +7,7 @@
 
 from flaml import tune
 from flaml.automl.model import (
+    KNeighborsEstimator,
     XGBoostSklearnEstimator,
     XGBoostLimitDepthEstimator,
     RandomForestEstimator,
@@ -36,6 +37,7 @@ def flaml_config_to_tune_config(flaml_config: dict) -> Tuple[dict, dict, dict]:
 
 
 estimators = {
+    "knn": KNeighborsEstimator,
     "xgboost": XGBoostSklearnEstimator,
     "xgboost_limit_depth": XGBoostLimitDepthEstimator,
     "random_forest": RandomForestEstimator,
diff --git a/causaltune/search/params.py b/causaltune/search/params.py
index 49a578e8..6684fd8a 100644
--- a/causaltune/search/params.py
+++ b/causaltune/search/params.py
@@ -248,6 +248,10 @@ def method_params(
         }
         return method_params
 
+    def full_config(self, estimator_name: str):
+        cfg = self._configs()[estimator_name]
+        return cfg
+
     def _configs(self) -> Dict[str, EstimatorConfig]:
         if self.n_bootstrap_samples is not None:
             # TODO Egor please look into this
diff --git a/tests/causaltune/test_endtoend_inference.py b/tests/causaltune/test_endtoend_inference.py
index ff142fb6..b5d3f41a 100644
--- a/tests/causaltune/test_endtoend_inference.py
+++ b/tests/causaltune/test_endtoend_inference.py
@@ -107,6 +107,7 @@ def test_endtoend_multivalue_bootstrap(self):
                 estimator_list=[e],
                 use_ray=False,
                 verbose=3,
+                outcome_model="auto",
                 components_verbose=2,
                 resources_per_trial={"cpu": 0.5},
             )
@@ -119,5 +120,5 @@ def test_endtoend_multivalue_bootstrap(self):
 
 
 if __name__ == "__main__":
-    pytest.main([__file__])
-    # TestEndToEnd().test_endtoend_iv()
+    # pytest.main([__file__])
+    TestEndToEndInference().test_endtoend_multivalue_bootstrap()

From 8afa4badb7d339c5ffdffca748a6fca3e4d022b5 Mon Sep 17 00:00:00 2001
From: Egor Kraev <egor.kraev@gmail.com>
Date: Thu, 29 Aug 2024 10:28:10 +0200
Subject: [PATCH 10/11] Fix linter

---
 tests/causaltune/test_endtoend_inference.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/causaltune/test_endtoend_inference.py b/tests/causaltune/test_endtoend_inference.py
index b5d3f41a..659fee7f 100644
--- a/tests/causaltune/test_endtoend_inference.py
+++ b/tests/causaltune/test_endtoend_inference.py
@@ -120,5 +120,5 @@ def test_endtoend_multivalue_bootstrap(self):
 
 
 if __name__ == "__main__":
-    # pytest.main([__file__])
-    TestEndToEndInference().test_endtoend_multivalue_bootstrap()
+    pytest.main([__file__])
+    # TestEndToEndInference().test_endtoend_multivalue_bootstrap()

From cb6677e9c5ddcc38775d5b4203cf051d4a51306b Mon Sep 17 00:00:00 2001
From: Egor Kraev <egor.kraev@gmail.com>
Date: Thu, 29 Aug 2024 10:53:47 +0200
Subject: [PATCH 11/11] Add ElasticNet and LassoLars output model options

---
 causaltune/models/regression.py | 86 +++++++++++++++++++++++++++++++++
 causaltune/search/component.py  |  7 ++-
 2 files changed, 91 insertions(+), 2 deletions(-)
 create mode 100644 causaltune/models/regression.py

diff --git a/causaltune/models/regression.py b/causaltune/models/regression.py
new file mode 100644
index 00000000..28aa53a8
--- /dev/null
+++ b/causaltune/models/regression.py
@@ -0,0 +1,86 @@
+from sklearn.linear_model import ElasticNet, LassoLars
+
+
+from flaml.automl.model import SKLearnEstimator
+from flaml import tune
+
+# These models are for some reason not in the deployed version of flaml 2.2.0,
+# but in the source code they are there
+# So keep this file in the project for now
+
+
+class ElasticNetEstimator(SKLearnEstimator):
+    """The class for tuning Elastic Net regression model."""
+
+    """Reference: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html"""
+
+    ITER_HP = "max_iter"
+
+    @classmethod
+    def search_space(cls, data_size, task="regresssion", **params):
+        return {
+            "alpha": {
+                "domain": tune.loguniform(lower=0.0001, upper=1.0),
+                "init_value": 0.1,
+            },
+            "l1_ratio": {
+                "domain": tune.uniform(lower=0.0, upper=1.0),
+                "init_value": 0.5,
+            },
+            "selection": {
+                "domain": tune.choice(["cyclic", "random"]),
+                "init_value": "cyclic",
+            },
+        }
+
+    def config2params(self, config: dict) -> dict:
+        params = super().config2params(config)
+        params["tol"] = params.get("tol", 0.0001)
+        if "n_jobs" in params:
+            params.pop("n_jobs")
+        return params
+
+    def __init__(self, task="regression", **config):
+        super().__init__(task, **config)
+        assert self._task.is_regression(), "ElasticNet for regression task only"
+        self.estimator_class = ElasticNet
+
+
+class LassoLarsEstimator(SKLearnEstimator):
+    """The class for tuning Lasso model fit with Least Angle Regression a.k.a. Lars."""
+
+    """Reference: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoLars.html"""
+
+    ITER_HP = "max_iter"
+
+    @classmethod
+    def search_space(cls, task=None, **params):
+        return {
+            "alpha": {
+                "domain": tune.loguniform(lower=1e-4, upper=1.0),
+                "init_value": 0.1,
+            },
+            "fit_intercept": {
+                "domain": tune.choice([True, False]),
+                "init_value": True,
+            },
+            "eps": {
+                "domain": tune.loguniform(lower=1e-16, upper=1e-4),
+                "init_value": 2.220446049250313e-16,
+            },
+        }
+
+    def config2params(self, config: dict) -> dict:
+        params = super().config2params(config)
+        if "n_jobs" in params:
+            params.pop("n_jobs")
+        return params
+
+    def __init__(self, task="regression", **config):
+        super().__init__(task, **config)
+        assert self._task.is_regression(), "LassoLars for regression task only"
+        self.estimator_class = LassoLars
+
+    def predict(self, X, **kwargs):
+        X = self._preprocess(X)
+        return self._model.predict(X, **kwargs)
diff --git a/causaltune/search/component.py b/causaltune/search/component.py
index 4e51568a..f3920aab 100644
--- a/causaltune/search/component.py
+++ b/causaltune/search/component.py
@@ -16,9 +16,10 @@
     ExtraTreesEstimator,
 )
 from flaml.automl.task.factory import task_factory
-
 import flaml
 
+from causaltune.models.regression import ElasticNetEstimator, LassoLarsEstimator
+
 
 def flaml_config_to_tune_config(flaml_config: dict) -> Tuple[dict, dict, dict]:
     cfg = {}
@@ -37,6 +38,8 @@ def flaml_config_to_tune_config(flaml_config: dict) -> Tuple[dict, dict, dict]:
 
 
 estimators = {
+    "elastic_net": ElasticNetEstimator,
+    "lasso_lars": LassoLarsEstimator,
     "knn": KNeighborsEstimator,
     "xgboost": XGBoostSklearnEstimator,
     "xgboost_limit_depth": XGBoostLimitDepthEstimator,
@@ -56,7 +59,7 @@ def joint_config(data_size: Tuple[int, int], estimator_list=None):
             continue
         task = task_factory("regression")
         cfg, init_params, low_cost_init_params = flaml_config_to_tune_config(
-            cls.search_space(data_size, task=task)
+            cls.search_space(data_size=data_size, task=task)
         )
 
         # Test if the estimator instantiates fine