XGBooster Invalid missing value: null #23

Peccer · 2022-10-11T12:08:43Z

Running:

print('\n[Create 2 models for treatment and untreatment and estimate CATE (Conditional Average Treatment Effects)]')
train_df, test_df = cl.estimate_cate_by_2_models()

gives below error. Ran the example notebook from the github project

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /tmp/ipykernel_121/3555275851.py:5 in │
│ │
│ [Errno 2] No such file or directory: '/tmp/ipykernel_121/3555275851.py' │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/causal_lift.py:654 in │
│ estimate_cate_by_2_models │
│ │
│ 651 │ │ │ ) │
│ 652 │ │ │
│ 653 │ │ if self.runner: │
│ ❱ 654 │ │ │ self.kedro_context.run(tags=["311_fit", "312_bundle_2_models"]) │
│ 655 │ │ │ self.uplift_models_dict = self.kedro_context.catalog.load( │
│ 656 │ │ │ │ "uplift_models_dict" │
│ 657 │ │ │ ) │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/context/flexible_context.py:178 in run │
│ │
│ 175 │ │ │ + "only_missing: {}".format(only_missing) │
│ 176 │ │ │ + ")" │
│ 177 │ │ ) │
│ ❱ 178 │ │ return super().run( │
│ 179 │ │ │ tags=tags, runner=runner, node_names=node_names, only_missing=only_missing │
│ 180 │ │ ) │
│ 181 │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/context/flexible_context.py:141 in run │
│ │
│ 138 │ │ self, **kwargs # type: Any │
│ 139 │ ): │
│ 140 │ │ # type: (...) -> Dict[str, Any] │
│ ❱ 141 │ │ d = super().run(**kwargs) │
│ 142 │ │ self.catalog.add_feed_dict(d, replace=True) │
│ 143 │ │ return d │
│ 144 │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/context/flexible_context.py:131 in run │
│ │
│ 128 │ │ │ runner = ( │
│ 129 │ │ │ │ ParallelRunner() if runner == "ParallelRunner" else SequentialRunner() │
│ 130 │ │ │ ) │
│ ❱ 131 │ │ return super().run(runner=runner, **kwargs) │
│ 132 │
│ 133 │
│ 134 class ProjectContext2(ProjectContext1): │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/context/flexible_context.py:106 in run │
│ │
│ 103 │ │ runner = runner or SequentialRunner() │
│ 104 │ │ if only_missing and _skippable(self.catalog): │
│ 105 │ │ │ return runner.run_only_missing(pipeline, self.catalog) │
│ ❱ 106 │ │ return runner.run(pipeline, self.catalog) │
│ 107 │
│ 108 │
│ 109 def _skippable( │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/runner.py:88 in run │
│ │
│ 85 │ │ │ self._logger.info( │
│ 86 │ │ │ │ "Asynchronous mode is enabled for loading and saving data" │
│ 87 │ │ │ ) │
│ ❱ 88 │ │ self._run(pipeline, catalog, hook_manager, session_id) │
│ 89 │ │ │
│ 90 │ │ self._logger.info("Pipeline execution completed successfully.") │
│ 91 │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/sequential_runner.py:70 in _run │
│ │
│ 67 │ │ │
│ 68 │ │ for exec_index, node in enumerate(nodes): │
│ 69 │ │ │ try: │
│ ❱ 70 │ │ │ │ run_node(node, catalog, hook_manager, self._is_async, session_id) │
│ 71 │ │ │ │ done_nodes.add(node) │
│ 72 │ │ │ except Exception: │
│ 73 │ │ │ │ self._suggest_resume_scenario(pipeline, done_nodes, catalog) │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/runner.py:304 in run_node │
│ │
│ 301 │ if is_async: │
│ 302 │ │ node = _run_node_async(node, catalog, hook_manager, session_id) │
│ 303 │ else: │
│ ❱ 304 │ │ node = _run_node_sequential(node, catalog, hook_manager, session_id) │
│ 305 │ │
│ 306 │ for name in node.confirms: │
│ 307 │ │ catalog.confirm(name) │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/runner.py:398 in _run_node_sequential │
│ │
│ 395 │ ) │
│ 396 │ inputs.update(additional_inputs) │
│ 397 │ │
│ ❱ 398 │ outputs = _call_node_run( │
│ 399 │ │ node, catalog, inputs, is_async, hook_manager, session_id=session_id │
│ 400 │ ) │
│ 401 │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/runner.py:366 in _call_node_run │
│ │
│ 363 │ │ │ is_async=is_async, │
│ 364 │ │ │ session_id=session_id, │
│ 365 │ │ ) │
│ ❱ 366 │ │ raise exc │
│ 367 │ hook_manager.hook.after_node_run( │
│ 368 │ │ node=node, │
│ 369 │ │ catalog=catalog, │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/runner.py:356 in _call_node_run │
│ │
│ 353 ) -> Dict[str, Any]: │
│ 354 │ # pylint: disable=too-many-arguments │
│ 355 │ try: │
│ ❱ 356 │ │ outputs = node.run(inputs) │
│ 357 │ except Exception as exc: │
│ 358 │ │ hook_manager.hook.on_node_error( │
│ 359 │ │ │ error=exc, │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/pipeline/node.py:353 in run │
│ │
│ 350 │ │ # purposely catch all exceptions │
│ 351 │ │ except Exception as exc: │
│ 352 │ │ │ self._logger.error("Node '%s' failed with error: \n%s", str(self), str(exc)) │
│ ❱ 353 │ │ │ raise exc │
│ 354 │ │
│ 355 │ def _run_with_no_inputs(self, inputs: Dict[str, Any]): │
│ 356 │ │ if inputs: │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/pipeline/node.py:344 in run │
│ │
│ 341 │ │ │ elif isinstance(self._inputs, str): │
│ 342 │ │ │ │ outputs = self._run_with_one_input(inputs, self._inputs) │
│ 343 │ │ │ elif isinstance(self._inputs, list): │
│ ❱ 344 │ │ │ │ outputs = self._run_with_list(inputs, self._inputs) │
│ 345 │ │ │ elif isinstance(self._inputs, dict): │
│ 346 │ │ │ │ outputs = self._run_with_dict(inputs, self._inputs) │
│ 347 │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/pipeline/node.py:384 in _run_with_list │
│ │
│ 381 │ │ │ │ f"{sorted(inputs.keys())}." │
│ 382 │ │ │ ) │
│ 383 │ │ # Ensure the function gets the inputs in the correct order │
│ ❱ 384 │ │ return self._func(*(inputs[item] for item in node_inputs)) │
│ 385 │ │
│ 386 │ def _run_with_dict(self, inputs: Dict[str, Any], node_inputs: Dict[str, str]): │
│ 387 │ │ # Node inputs and provided run inputs should completely overlap │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/nodes/model_for_each.py:234 in │
│ model_for_treated_fit │
│ │
│ 231 │
│ 232 │
│ 233 def model_for_treated_fit(*posargs, **kwargs): │
│ ❱ 234 │ return ModelForTreated().fit(*posargs, **kwargs) │
│ 235 │
│ 236 │
│ 237 def model_for_treated_predict_proba(*posargs, **kwargs): │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/nodes/model_for_each.py:94 in fit │
│ │
│ 91 │ │ │ else: │
│ 92 │ │ │ │ log.info("## Feature importances not available.") │
│ 93 │ │ │
│ ❱ 94 │ │ y_pred_train = model.predict(X_train) │
│ 95 │ │ │
│ 96 │ │ y_test = None │
│ 97 │ │ y_pred_test = None │
│ │
│ /shared-libs/python3.9/py/lib/python3.9/site-packages/sklearn/model_selection/search.py:500 in │
│ predict │
│ │
│ 497 │ │ │ the best found parameters. │
│ 498 │ │ """ │
│ 499 │ │ check_is_fitted(self) │
│ ❱ 500 │ │ return self.best_estimator.predict(X) │
│ 501 │ │
│ 502 │ @available_if(_estimator_has("predict_proba")) │
│ 503 │ def predict_proba(self, X): │
│ │
│ /root/venv/lib/python3.9/site-packages/xgboost/sklearn.py:1434 in predict │
│ │
│ 1431 │ │ base_margin: Optional[ArrayLike] = None, │
│ 1432 │ │ iteration_range: Optional[Tuple[int, int]] = None, │
│ 1433 │ ) -> np.ndarray: │
│ ❱ 1434 │ │ class_probs = super().predict( │
│ 1435 │ │ │ X=X, │
│ 1436 │ │ │ output_margin=output_margin, │
│ 1437 │ │ │ ntree_limit=ntree_limit, │
│ │
│ /root/venv/lib/python3.9/site-packages/xgboost/sklearn.py:1049 in predict │
│ │
│ 1046 │ │ iteration_range = self._get_iteration_range(iteration_range) │
│ 1047 │ │ if self._can_use_inplace_predict(): │
│ 1048 │ │ │ try: │
│ ❱ 1049 │ │ │ │ predts = self.get_booster().inplace_predict( │
│ 1050 │ │ │ │ │ data=X, │
│ 1051 │ │ │ │ │ iteration_range=iteration_range, │
│ 1052 │ │ │ │ │ predict_type="margin" if output_margin else "value", │
│ │
│ /root/venv/lib/python3.9/site-packages/xgboost/core.py:2147 in inplace_predict │
│ │
│ 2144 │ │ if isinstance(data, np.ndarray): │
│ 2145 │ │ │ from .data import _ensure_np_dtype │
│ 2146 │ │ │ data, _ = _ensure_np_dtype(data, data.dtype) │
│ ❱ 2147 │ │ │ _check_call( │
│ 2148 │ │ │ │ _LIB.XGBoosterPredictFromDense( │
│ 2149 │ │ │ │ │ self.handle, │
│ 2150 │ │ │ │ │ _array_interface(data), │
│ │
│ /root/venv/lib/python3.9/site-packages/xgboost/core.py:246 in _check_call │
│ │
│ 243 │ │ return value from API calls │
│ 244 │ """ │
│ 245 │ if ret != 0: │
│ ❱ 246 │ │ raise XGBoostError(py_str(_LIB.XGBGetLastError())) │
│ 247 │
│ 248 │
│ 249 def _has_categorical(booster: "Booster", data: DataType) -> bool: │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
XGBoostError: [12:04:08] ../src/c_api/c_api_utils.h:159: Invalid missing value: null
Stack trace:
[bt] (0) /root/venv/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0xbbec9) [0x7f5d31953ec9]
[bt] (1) /root/venv/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0xdeb90) [0x7f5d31976b90]
[bt] (2) /root/venv/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0xe45d8) [0x7f5d3197c5d8]
[bt] (3) /root/venv/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(XGBoosterPredictFromDense+0x330)
[0x7f5d3195c4d0]
[bt] (4) /usr/lib/x86_64-linux-gnu/libffi.so.6(ffi_call_unix64+0x4c) [0x7f5dccad38ee]
[bt] (5) /usr/lib/x86_64-linux-gnu/libffi.so.6(ffi_call+0x22f) [0x7f5dccad32bf]
[bt] (6) /usr/local/lib/python3.9/lib-dynload/_ctypes.cpython-39-x86_64-linux-gnu.so(+0x13111) [0x7f5dccaf1111]
[bt] (7) /usr/local/lib/python3.9/lib-dynload/_ctypes.cpython-39-x86_64-linux-gnu.so(+0x81ed) [0x7f5dccae61ed]
[bt] (8) /usr/local/lib/libpython3.9.so.1.0(_PyObject_MakeTpCall+0x79) [0x7f5dcdd1ced9]

Minyus · 2022-10-11T14:10:08Z

HI @Peccer ,

Thank you for reported the issue.
Recent versions of xgboost may not work. Could you try older version released 1-2 years ago?

Besides, Python 3.9 may or may not work with CausalLift.
The latest tested version of Python is 3.7.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

XGBooster Invalid missing value: null #23

XGBooster Invalid missing value: null #23

Peccer commented Oct 11, 2022

Minyus commented Oct 11, 2022

XGBooster Invalid missing value: null #23

XGBooster Invalid missing value: null #23

Comments

Peccer commented Oct 11, 2022

Minyus commented Oct 11, 2022