Skip to content

Commit

Permalink
Merge branch 'flat_search' of https://github.com/ZmeiGorynych/causaltune
Browse files Browse the repository at this point in the history


# Conflicts:
#	causaltune/optimiser.py
  • Loading branch information
ZmeiGorynych committed Aug 29, 2024
2 parents 1215d65 + fd600d7 commit ab561aa
Show file tree
Hide file tree
Showing 10 changed files with 107 additions and 36 deletions.
5 changes: 4 additions & 1 deletion causaltune/dataset_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ def __init__(self):
self.encoder = None

def fit(
self, cd: CausalityDataset, encoder_type: Optional[str] = "onehot", outcome: str = None
self,
cd: CausalityDataset,
encoder_type: Optional[str] = "onehot",
outcome: str = None,
):
cd = copy.deepcopy(cd)
self.preprocess_dataset(
Expand Down
74 changes: 55 additions & 19 deletions causaltune/optimiser.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@


# Patched from sklearn.linear_model._base to adjust rtol and atol values
def _check_precomputed_gram_matrix(X, precompute, X_offset, X_scale, rtol=1e-4, atol=1e-2):
def _check_precomputed_gram_matrix(
X, precompute, X_offset, X_scale, rtol=1e-4, atol=1e-2
):
n_features = X.shape[1]
f1 = n_features // 2
f2 = min(f1 + 1, n_features - 1)
Expand Down Expand Up @@ -177,13 +179,17 @@ def __init__(
resources_per_trial if resources_per_trial is not None else {"cpu": 0.5}
)
self._settings["try_init_configs"] = try_init_configs
self._settings["include_experimental_estimators"] = include_experimental_estimators
self._settings[
"include_experimental_estimators"
] = include_experimental_estimators

# params for FLAML on component models:
self._settings["component_models"] = {}
self._settings["component_models"]["task"] = components_task
self._settings["component_models"]["verbose"] = components_verbose
self._settings["component_models"]["pred_time_limit"] = components_pred_time_limit
self._settings["component_models"][
"pred_time_limit"
] = components_pred_time_limit
self._settings["component_models"]["n_jobs"] = components_njobs
self._settings["component_models"]["time_budget"] = components_time_budget
self._settings["component_models"]["eval_method"] = "holdout"
Expand Down Expand Up @@ -228,7 +234,9 @@ def init_propensity_model(self, propensity_model: str):
self.propensity_model = AutoML(
**{**self._settings["component_models"], "task": "classification"}
)
elif hasattr(propensity_model, "fit") and hasattr(propensity_model, "predict_proba"):
elif hasattr(propensity_model, "fit") and hasattr(
propensity_model, "predict_proba"
):
self.propensity_model = propensity_model
else:
raise ValueError(
Expand All @@ -253,7 +261,9 @@ def init_outcome_model(self, outcome_model):
# The current default behavior
return self.auto_outcome_model()
else:
raise ValueError('outcome_model valid values are None, "auto", or an estimator object')
raise ValueError(
'outcome_model valid values are None, "auto", or an estimator object'
)

def auto_outcome_model(self):
data = self.data
Expand Down Expand Up @@ -329,15 +339,19 @@ def fit(
if preprocess:
data = copy.deepcopy(data)
self.dataset_processor = CausalityDatasetProcessor()
self.dataset_processor.fit(data, encoder_type=encoder_type, outcome=encoder_outcome)
self.dataset_processor.fit(
data, encoder_type=encoder_type, outcome=encoder_outcome
)
data = self.dataset_processor.transform(data)
else:
self.dataset_processor = None

self.data = data
treatment_values = data.treatment_values

assert len(treatment_values) > 1, "Treatment must take at least 2 values, eg 0 and 1!"
assert (
len(treatment_values) > 1
), "Treatment must take at least 2 values, eg 0 and 1!"

self._control_value = treatment_values[0]
self._treatment_values = list(treatment_values[1:])
Expand All @@ -359,8 +373,8 @@ def fit(

self.init_propensity_model(self._settings["propensity_model"])

self.identified_estimand: IdentifiedEstimand = self.causal_model.identify_effect(
proceed_when_unidentifiable=True
self.identified_estimand: IdentifiedEstimand = (
self.causal_model.identify_effect(proceed_when_unidentifiable=True)
)

if bool(self.identified_estimand.estimands["iv"]) and bool(data.instruments):
Expand Down Expand Up @@ -431,7 +445,9 @@ def fit(
and self._settings["tuner"]["num_samples"] == -1
):
self._settings["tuner"]["time_budget_s"] = (
2.5 * len(self.estimator_list) * self._settings["component_models"]["time_budget"]
2.5
* len(self.estimator_list)
* self._settings["component_models"]["time_budget"]
)

cmtb = self._settings["component_models"]["time_budget"]
Expand Down Expand Up @@ -464,7 +480,9 @@ def fit(
# )
# )

search_space = self.cfg.search_space(self.estimator_list, data_size=data.data.shape)
search_space = self.cfg.search_space(
self.estimator_list, data_size=data.data.shape
)
init_cfg = (
self.cfg.default_configs(self.estimator_list, data_size=data.data.shape)
if self._settings["try_init_configs"]
Expand All @@ -484,9 +502,17 @@ def fit(
self._tune_with_config,
search_space,
metric=self.metric,
points_to_evaluate=(init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg),
evaluated_rewards=([] if len(self.resume_scores) == 0 else self.resume_scores),
mode=("min" if self.metric in ["energy_distance", "psw_energy_distance"] else "max"),
points_to_evaluate=(
init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg
),
evaluated_rewards=(
[] if len(self.resume_scores) == 0 else self.resume_scores
),
mode=(
"min"
if self.metric in ["energy_distance", "psw_energy_distance"]
else "max"
),
low_cost_partial_config={},
**self._settings["tuner"],
)
Expand Down Expand Up @@ -572,7 +598,9 @@ def _estimate_effect(self, config):
# Do we need an boject property for this, instead of a local var?
self.estimator_name = config["estimator"]["estimator_name"]
outcome_model = self.init_outcome_model(self._settings["outcome_model"])
method_params = self.cfg.method_params(config, outcome_model, self.propensity_model)
method_params = self.cfg.method_params(
config, outcome_model, self.propensity_model
)

try: #
# This calls the causal model's estimate_effect method
Expand Down Expand Up @@ -607,7 +635,9 @@ def _estimate_effect(self, config):
}

def _compute_metrics(self, estimator, df: pd.DataFrame) -> dict:
return self.scorer.make_scores(estimator, df, self.metrics_to_report, r_scorer=None)
return self.scorer.make_scores(
estimator, df, self.metrics_to_report, r_scorer=None
)

def score_dataset(self, df: pd.DataFrame, dataset_name: str):
"""
Expand All @@ -622,9 +652,13 @@ def score_dataset(self, df: pd.DataFrame, dataset_name: str):
"""
for scr in self.scores.values():
if scr["estimator"] is None:
warnings.warn("Skipping scoring for estimator %s", scr["estimator_name"])
warnings.warn(
"Skipping scoring for estimator %s", scr["estimator_name"]
)
else:
scr["scores"][dataset_name] = self._compute_metrics(scr["estimator"], df)
scr["scores"][dataset_name] = self._compute_metrics(
scr["estimator"], df
)

@property
def best_estimator(self) -> str:
Expand Down Expand Up @@ -697,7 +731,9 @@ def effect(self, df, *args, **kwargs):
"""
return self.model.effect(df, *args, **kwargs)

def predict(self, cd: CausalityDataset, preprocess: Optional[bool] = False, *args, **kwargs):
def predict(
self, cd: CausalityDataset, preprocess: Optional[bool] = False, *args, **kwargs
):
"""Heterogeneous Treatment Effects for data CausalityDataset
Args:
Expand Down
26 changes: 20 additions & 6 deletions causaltune/search/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,9 @@ def search_space(

out = {"estimator": tune.choice(search_space)}
if self.sample_outcome_estimators:
out["outcome_estimator"], _, _ = joint_config(data_size, outcome_estimator_list)
out["outcome_estimator"], _, _ = joint_config(
data_size, outcome_estimator_list
)

return out

Expand Down Expand Up @@ -228,7 +230,10 @@ def method_params(
# Spawn the outcome model dynamically
outcome_model = model_from_cfg(config["outcome_estimator"])

if cfg.outcome_model_name is not None and cfg.outcome_model_name not in cfg.init_params:
if (
cfg.outcome_model_name is not None
and cfg.outcome_model_name not in cfg.init_params
):
cfg.init_params[cfg.outcome_model_name] = deepcopy(outcome_model)

if (
Expand All @@ -237,9 +242,14 @@ def method_params(
):
cfg.init_params[cfg.propensity_model_name] = deepcopy(propensity_model)

if cfg.final_model_name is not None and cfg.final_model_name not in cfg.init_params:
if (
cfg.final_model_name is not None
and cfg.final_model_name not in cfg.init_params
):
cfg.init_params[cfg.final_model_name] = (
deepcopy(final_model) if final_model is not None else deepcopy(outcome_model)
deepcopy(final_model)
if final_model is not None
else deepcopy(outcome_model)
)

method_params = {
Expand Down Expand Up @@ -496,7 +506,9 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
"backdoor.econml.orf.DROrthoForest": EstimatorConfig(
propensity_model_name="propensity_model",
init_params={
"model_Y": linear_model.Ridge(alpha=0.01), # WeightedLasso(alpha=0.01), #
"model_Y": linear_model.Ridge(
alpha=0.01
), # WeightedLasso(alpha=0.01), #
"n_jobs": self.n_jobs,
# "max_depth": self.max_depth,
# "n_trees": self.n_estimators,
Expand Down Expand Up @@ -525,7 +537,9 @@ def _configs(self) -> Dict[str, EstimatorConfig]:
"backdoor.econml.orf.DMLOrthoForest": EstimatorConfig(
propensity_model_name="model_T",
init_params={
"model_Y": linear_model.Ridge(alpha=0.01), # WeightedLasso(alpha=0.01), #
"model_Y": linear_model.Ridge(
alpha=0.01
), # WeightedLasso(alpha=0.01), #
"discrete_treatment": True,
"n_jobs": self.n_jobs,
# "max_depth": self.max_depth,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
install_requires=[
"dowhy==0.9.1",
"econml==0.14.1",
"FLAML==1.0.14",
"FLAML==2.2.0",
"xgboost==1.7.6",
"numpy==1.23.5",
"pandas",
Expand Down
4 changes: 3 additions & 1 deletion tests/causaltune/test_custom_outcome_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ def test_custom_outcome_model_multivalue(self):
include_experimental=False,
multivalue=True,
)
estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data))
estimator_list = cfg.estimator_names_from_patterns(
"backdoor", "all", data_rows=len(data)
)

ct = CausalTune(
outcome_model=LinearRegression(),
Expand Down
4 changes: 3 additions & 1 deletion tests/causaltune/test_endtoend.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ def test_endtoend_multivalue(self):
include_experimental=False,
multivalue=True,
)
estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data))
estimator_list = cfg.estimator_names_from_patterns(
"backdoor", "all", data_rows=len(data)
)

ct = CausalTune(
estimator_list="all",
Expand Down
4 changes: 3 additions & 1 deletion tests/causaltune/test_endtoend_automl_propensity.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ def test_endtoend_multivalue_propensity(self):
multivalue=True,
)

estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data))
estimator_list = cfg.estimator_names_from_patterns(
"backdoor", "all", data_rows=len(data)
)

ct = CausalTune(
estimator_list="all",
Expand Down
4 changes: 3 additions & 1 deletion tests/causaltune/test_endtoend_flat_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ def test_endtoend_multivalue(self):
include_experimental=False,
multivalue=True,
)
estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data))
estimator_list = cfg.estimator_names_from_patterns(
"backdoor", "all", data_rows=len(data)
)

ct = CausalTune(
estimator_list="all",
Expand Down
16 changes: 12 additions & 4 deletions tests/causaltune/test_estimator_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,12 @@ def test_substring_group(self):
def test_substring_single(self):
"""tests if substring match to single estimators works"""
cfg = SimpleParamService(multivalue=False)
estimator_list = cfg.estimator_names_from_patterns("backdoor", ["DomainAdaptationLearner"])
assert estimator_list == ["backdoor.econml.metalearners.DomainAdaptationLearner"]
estimator_list = cfg.estimator_names_from_patterns(
"backdoor", ["DomainAdaptationLearner"]
)
assert estimator_list == [
"backdoor.econml.metalearners.DomainAdaptationLearner"
]

def test_checkduplicates(self):
"""tests if duplicates are removed"""
Expand All @@ -78,7 +82,9 @@ def test_invalid_choice(self):
cfg = SimpleParamService(multivalue=False)

with pytest.raises(ValueError):
cfg.estimator_names_from_patterns("backdoor", ["linear_regression", "pasta", 12])
cfg.estimator_names_from_patterns(
"backdoor", ["linear_regression", "pasta", 12]
)

with pytest.raises(ValueError):
cfg.estimator_names_from_patterns("backdoor", 5)
Expand All @@ -88,7 +94,9 @@ def test_invalid_choice_fitter(self):
"""tests if empty list is correctly handled"""
ct = CausalTune(components_time_budget=10)
ct.fit(
pd.DataFrame({"treatment": [0, 1], "outcome": [0.5, 1.5], "dummy": [0.1, 0.2]}),
pd.DataFrame(
{"treatment": [0, 1], "outcome": [0.5, 1.5], "dummy": [0.1, 0.2]}
),
treatment="treatment",
outcome="outcome",
common_causes=["dummy"],
Expand Down
4 changes: 3 additions & 1 deletion tests/causaltune/test_sklearn_propensity_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ def test_sklearn_propensity_model_multivalue(self):
include_experimental=False,
multivalue=True,
)
estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data))
estimator_list = cfg.estimator_names_from_patterns(
"backdoor", "all", data_rows=len(data)
)

ct = CausalTune(
propensity_model=LogisticRegression(),
Expand Down

0 comments on commit ab561aa

Please sign in to comment.