diff --git a/causaltune/dataset_processor.py b/causaltune/dataset_processor.py index 08b0b31c..11ef5313 100644 --- a/causaltune/dataset_processor.py +++ b/causaltune/dataset_processor.py @@ -14,7 +14,10 @@ def __init__(self): self.encoder = None def fit( - self, cd: CausalityDataset, encoder_type: Optional[str] = "onehot", outcome: str = None + self, + cd: CausalityDataset, + encoder_type: Optional[str] = "onehot", + outcome: str = None, ): cd = copy.deepcopy(cd) self.preprocess_dataset( diff --git a/causaltune/optimiser.py b/causaltune/optimiser.py index c16e08be..9331295c 100644 --- a/causaltune/optimiser.py +++ b/causaltune/optimiser.py @@ -32,7 +32,9 @@ # Patched from sklearn.linear_model._base to adjust rtol and atol values -def _check_precomputed_gram_matrix(X, precompute, X_offset, X_scale, rtol=1e-4, atol=1e-2): +def _check_precomputed_gram_matrix( + X, precompute, X_offset, X_scale, rtol=1e-4, atol=1e-2 +): n_features = X.shape[1] f1 = n_features // 2 f2 = min(f1 + 1, n_features - 1) @@ -177,13 +179,17 @@ def __init__( resources_per_trial if resources_per_trial is not None else {"cpu": 0.5} ) self._settings["try_init_configs"] = try_init_configs - self._settings["include_experimental_estimators"] = include_experimental_estimators + self._settings[ + "include_experimental_estimators" + ] = include_experimental_estimators # params for FLAML on component models: self._settings["component_models"] = {} self._settings["component_models"]["task"] = components_task self._settings["component_models"]["verbose"] = components_verbose - self._settings["component_models"]["pred_time_limit"] = components_pred_time_limit + self._settings["component_models"][ + "pred_time_limit" + ] = components_pred_time_limit self._settings["component_models"]["n_jobs"] = components_njobs self._settings["component_models"]["time_budget"] = components_time_budget self._settings["component_models"]["eval_method"] = "holdout" @@ -228,7 +234,9 @@ def init_propensity_model(self, propensity_model: str): self.propensity_model = AutoML( **{**self._settings["component_models"], "task": "classification"} ) - elif hasattr(propensity_model, "fit") and hasattr(propensity_model, "predict_proba"): + elif hasattr(propensity_model, "fit") and hasattr( + propensity_model, "predict_proba" + ): self.propensity_model = propensity_model else: raise ValueError( @@ -253,7 +261,9 @@ def init_outcome_model(self, outcome_model): # The current default behavior return self.auto_outcome_model() else: - raise ValueError('outcome_model valid values are None, "auto", or an estimator object') + raise ValueError( + 'outcome_model valid values are None, "auto", or an estimator object' + ) def auto_outcome_model(self): data = self.data @@ -329,7 +339,9 @@ def fit( if preprocess: data = copy.deepcopy(data) self.dataset_processor = CausalityDatasetProcessor() - self.dataset_processor.fit(data, encoder_type=encoder_type, outcome=encoder_outcome) + self.dataset_processor.fit( + data, encoder_type=encoder_type, outcome=encoder_outcome + ) data = self.dataset_processor.transform(data) else: self.dataset_processor = None @@ -337,7 +349,9 @@ def fit( self.data = data treatment_values = data.treatment_values - assert len(treatment_values) > 1, "Treatment must take at least 2 values, eg 0 and 1!" + assert ( + len(treatment_values) > 1 + ), "Treatment must take at least 2 values, eg 0 and 1!" self._control_value = treatment_values[0] self._treatment_values = list(treatment_values[1:]) @@ -359,8 +373,8 @@ def fit( self.init_propensity_model(self._settings["propensity_model"]) - self.identified_estimand: IdentifiedEstimand = self.causal_model.identify_effect( - proceed_when_unidentifiable=True + self.identified_estimand: IdentifiedEstimand = ( + self.causal_model.identify_effect(proceed_when_unidentifiable=True) ) if bool(self.identified_estimand.estimands["iv"]) and bool(data.instruments): @@ -431,7 +445,9 @@ def fit( and self._settings["tuner"]["num_samples"] == -1 ): self._settings["tuner"]["time_budget_s"] = ( - 2.5 * len(self.estimator_list) * self._settings["component_models"]["time_budget"] + 2.5 + * len(self.estimator_list) + * self._settings["component_models"]["time_budget"] ) cmtb = self._settings["component_models"]["time_budget"] @@ -464,7 +480,9 @@ def fit( # ) # ) - search_space = self.cfg.search_space(self.estimator_list, data_size=data.data.shape) + search_space = self.cfg.search_space( + self.estimator_list, data_size=data.data.shape + ) init_cfg = ( self.cfg.default_configs(self.estimator_list, data_size=data.data.shape) if self._settings["try_init_configs"] @@ -484,9 +502,17 @@ def fit( self._tune_with_config, search_space, metric=self.metric, - points_to_evaluate=(init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg), - evaluated_rewards=([] if len(self.resume_scores) == 0 else self.resume_scores), - mode=("min" if self.metric in ["energy_distance", "psw_energy_distance"] else "max"), + points_to_evaluate=( + init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg + ), + evaluated_rewards=( + [] if len(self.resume_scores) == 0 else self.resume_scores + ), + mode=( + "min" + if self.metric in ["energy_distance", "psw_energy_distance"] + else "max" + ), low_cost_partial_config={}, **self._settings["tuner"], ) @@ -572,7 +598,9 @@ def _estimate_effect(self, config): # Do we need an boject property for this, instead of a local var? self.estimator_name = config["estimator"]["estimator_name"] outcome_model = self.init_outcome_model(self._settings["outcome_model"]) - method_params = self.cfg.method_params(config, outcome_model, self.propensity_model) + method_params = self.cfg.method_params( + config, outcome_model, self.propensity_model + ) try: # # This calls the causal model's estimate_effect method @@ -607,7 +635,9 @@ def _estimate_effect(self, config): } def _compute_metrics(self, estimator, df: pd.DataFrame) -> dict: - return self.scorer.make_scores(estimator, df, self.metrics_to_report, r_scorer=None) + return self.scorer.make_scores( + estimator, df, self.metrics_to_report, r_scorer=None + ) def score_dataset(self, df: pd.DataFrame, dataset_name: str): """ @@ -622,9 +652,13 @@ def score_dataset(self, df: pd.DataFrame, dataset_name: str): """ for scr in self.scores.values(): if scr["estimator"] is None: - warnings.warn("Skipping scoring for estimator %s", scr["estimator_name"]) + warnings.warn( + "Skipping scoring for estimator %s", scr["estimator_name"] + ) else: - scr["scores"][dataset_name] = self._compute_metrics(scr["estimator"], df) + scr["scores"][dataset_name] = self._compute_metrics( + scr["estimator"], df + ) @property def best_estimator(self) -> str: @@ -697,7 +731,9 @@ def effect(self, df, *args, **kwargs): """ return self.model.effect(df, *args, **kwargs) - def predict(self, cd: CausalityDataset, preprocess: Optional[bool] = False, *args, **kwargs): + def predict( + self, cd: CausalityDataset, preprocess: Optional[bool] = False, *args, **kwargs + ): """Heterogeneous Treatment Effects for data CausalityDataset Args: diff --git a/causaltune/search/params.py b/causaltune/search/params.py index 6684fd8a..c75f8993 100644 --- a/causaltune/search/params.py +++ b/causaltune/search/params.py @@ -157,7 +157,9 @@ def search_space( out = {"estimator": tune.choice(search_space)} if self.sample_outcome_estimators: - out["outcome_estimator"], _, _ = joint_config(data_size, outcome_estimator_list) + out["outcome_estimator"], _, _ = joint_config( + data_size, outcome_estimator_list + ) return out @@ -228,7 +230,10 @@ def method_params( # Spawn the outcome model dynamically outcome_model = model_from_cfg(config["outcome_estimator"]) - if cfg.outcome_model_name is not None and cfg.outcome_model_name not in cfg.init_params: + if ( + cfg.outcome_model_name is not None + and cfg.outcome_model_name not in cfg.init_params + ): cfg.init_params[cfg.outcome_model_name] = deepcopy(outcome_model) if ( @@ -237,9 +242,14 @@ def method_params( ): cfg.init_params[cfg.propensity_model_name] = deepcopy(propensity_model) - if cfg.final_model_name is not None and cfg.final_model_name not in cfg.init_params: + if ( + cfg.final_model_name is not None + and cfg.final_model_name not in cfg.init_params + ): cfg.init_params[cfg.final_model_name] = ( - deepcopy(final_model) if final_model is not None else deepcopy(outcome_model) + deepcopy(final_model) + if final_model is not None + else deepcopy(outcome_model) ) method_params = { @@ -496,7 +506,9 @@ def _configs(self) -> Dict[str, EstimatorConfig]: "backdoor.econml.orf.DROrthoForest": EstimatorConfig( propensity_model_name="propensity_model", init_params={ - "model_Y": linear_model.Ridge(alpha=0.01), # WeightedLasso(alpha=0.01), # + "model_Y": linear_model.Ridge( + alpha=0.01 + ), # WeightedLasso(alpha=0.01), # "n_jobs": self.n_jobs, # "max_depth": self.max_depth, # "n_trees": self.n_estimators, @@ -525,7 +537,9 @@ def _configs(self) -> Dict[str, EstimatorConfig]: "backdoor.econml.orf.DMLOrthoForest": EstimatorConfig( propensity_model_name="model_T", init_params={ - "model_Y": linear_model.Ridge(alpha=0.01), # WeightedLasso(alpha=0.01), # + "model_Y": linear_model.Ridge( + alpha=0.01 + ), # WeightedLasso(alpha=0.01), # "discrete_treatment": True, "n_jobs": self.n_jobs, # "max_depth": self.max_depth, diff --git a/setup.py b/setup.py index 08299e37..0ad67cc4 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ install_requires=[ "dowhy==0.9.1", "econml==0.14.1", - "FLAML==1.0.14", + "FLAML==2.2.0", "xgboost==1.7.6", "numpy==1.23.5", "pandas", diff --git a/tests/causaltune/test_custom_outcome_model.py b/tests/causaltune/test_custom_outcome_model.py index 8a9e7aab..898832e9 100644 --- a/tests/causaltune/test_custom_outcome_model.py +++ b/tests/causaltune/test_custom_outcome_model.py @@ -58,7 +58,9 @@ def test_custom_outcome_model_multivalue(self): include_experimental=False, multivalue=True, ) - estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data)) + estimator_list = cfg.estimator_names_from_patterns( + "backdoor", "all", data_rows=len(data) + ) ct = CausalTune( outcome_model=LinearRegression(), diff --git a/tests/causaltune/test_endtoend.py b/tests/causaltune/test_endtoend.py index 90f41b3c..ccf2ebec 100644 --- a/tests/causaltune/test_endtoend.py +++ b/tests/causaltune/test_endtoend.py @@ -79,7 +79,9 @@ def test_endtoend_multivalue(self): include_experimental=False, multivalue=True, ) - estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data)) + estimator_list = cfg.estimator_names_from_patterns( + "backdoor", "all", data_rows=len(data) + ) ct = CausalTune( estimator_list="all", diff --git a/tests/causaltune/test_endtoend_automl_propensity.py b/tests/causaltune/test_endtoend_automl_propensity.py index 6e23c01d..c6a6d4f1 100644 --- a/tests/causaltune/test_endtoend_automl_propensity.py +++ b/tests/causaltune/test_endtoend_automl_propensity.py @@ -58,7 +58,9 @@ def test_endtoend_multivalue_propensity(self): multivalue=True, ) - estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data)) + estimator_list = cfg.estimator_names_from_patterns( + "backdoor", "all", data_rows=len(data) + ) ct = CausalTune( estimator_list="all", diff --git a/tests/causaltune/test_endtoend_flat_search.py b/tests/causaltune/test_endtoend_flat_search.py index 677d5c2c..7a20436d 100644 --- a/tests/causaltune/test_endtoend_flat_search.py +++ b/tests/causaltune/test_endtoend_flat_search.py @@ -80,7 +80,9 @@ def test_endtoend_multivalue(self): include_experimental=False, multivalue=True, ) - estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data)) + estimator_list = cfg.estimator_names_from_patterns( + "backdoor", "all", data_rows=len(data) + ) ct = CausalTune( estimator_list="all", diff --git a/tests/causaltune/test_estimator_list.py b/tests/causaltune/test_estimator_list.py index 5193f2f8..2b28c726 100644 --- a/tests/causaltune/test_estimator_list.py +++ b/tests/causaltune/test_estimator_list.py @@ -54,8 +54,12 @@ def test_substring_group(self): def test_substring_single(self): """tests if substring match to single estimators works""" cfg = SimpleParamService(multivalue=False) - estimator_list = cfg.estimator_names_from_patterns("backdoor", ["DomainAdaptationLearner"]) - assert estimator_list == ["backdoor.econml.metalearners.DomainAdaptationLearner"] + estimator_list = cfg.estimator_names_from_patterns( + "backdoor", ["DomainAdaptationLearner"] + ) + assert estimator_list == [ + "backdoor.econml.metalearners.DomainAdaptationLearner" + ] def test_checkduplicates(self): """tests if duplicates are removed""" @@ -78,7 +82,9 @@ def test_invalid_choice(self): cfg = SimpleParamService(multivalue=False) with pytest.raises(ValueError): - cfg.estimator_names_from_patterns("backdoor", ["linear_regression", "pasta", 12]) + cfg.estimator_names_from_patterns( + "backdoor", ["linear_regression", "pasta", 12] + ) with pytest.raises(ValueError): cfg.estimator_names_from_patterns("backdoor", 5) @@ -88,7 +94,9 @@ def test_invalid_choice_fitter(self): """tests if empty list is correctly handled""" ct = CausalTune(components_time_budget=10) ct.fit( - pd.DataFrame({"treatment": [0, 1], "outcome": [0.5, 1.5], "dummy": [0.1, 0.2]}), + pd.DataFrame( + {"treatment": [0, 1], "outcome": [0.5, 1.5], "dummy": [0.1, 0.2]} + ), treatment="treatment", outcome="outcome", common_causes=["dummy"], diff --git a/tests/causaltune/test_sklearn_propensity_model.py b/tests/causaltune/test_sklearn_propensity_model.py index 4ca96449..94ef157e 100644 --- a/tests/causaltune/test_sklearn_propensity_model.py +++ b/tests/causaltune/test_sklearn_propensity_model.py @@ -62,7 +62,9 @@ def test_sklearn_propensity_model_multivalue(self): include_experimental=False, multivalue=True, ) - estimator_list = cfg.estimator_names_from_patterns("backdoor", "all", data_rows=len(data)) + estimator_list = cfg.estimator_names_from_patterns( + "backdoor", "all", data_rows=len(data) + ) ct = CausalTune( propensity_model=LogisticRegression(),