Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

XGBooster Invalid missing value: null #23

Open
Peccer opened this issue Oct 11, 2022 · 1 comment
Open

XGBooster Invalid missing value: null #23

Peccer opened this issue Oct 11, 2022 · 1 comment

Comments

@Peccer
Copy link

Peccer commented Oct 11, 2022

Running:

print('\n[Create 2 models for treatment and untreatment and estimate CATE (Conditional Average Treatment Effects)]')
train_df, test_df = cl.estimate_cate_by_2_models()

gives below error. Ran the example notebook from the github project

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /tmp/ipykernel_121/3555275851.py:5 in │
│ │
│ [Errno 2] No such file or directory: '/tmp/ipykernel_121/3555275851.py' │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/causal_lift.py:654 in │
│ estimate_cate_by_2_models │
│ │
│ 651 │ │ │ ) │
│ 652 │ │ │
│ 653 │ │ if self.runner: │
│ ❱ 654 │ │ │ self.kedro_context.run(tags=["311_fit", "312_bundle_2_models"]) │
│ 655 │ │ │ self.uplift_models_dict = self.kedro_context.catalog.load( │
│ 656 │ │ │ │ "uplift_models_dict" │
│ 657 │ │ │ ) │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/context/flexible_context.py:178 in run │
│ │
│ 175 │ │ │ + "only_missing: {}".format(only_missing) │
│ 176 │ │ │ + ")" │
│ 177 │ │ ) │
│ ❱ 178 │ │ return super().run( │
│ 179 │ │ │ tags=tags, runner=runner, node_names=node_names, only_missing=only_missing │
│ 180 │ │ ) │
│ 181 │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/context/flexible_context.py:141 in run │
│ │
│ 138 │ │ self, **kwargs # type: Any │
│ 139 │ ): │
│ 140 │ │ # type: (...) -> Dict[str, Any] │
│ ❱ 141 │ │ d = super().run(**kwargs) │
│ 142 │ │ self.catalog.add_feed_dict(d, replace=True) │
│ 143 │ │ return d │
│ 144 │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/context/flexible_context.py:131 in run │
│ │
│ 128 │ │ │ runner = ( │
│ 129 │ │ │ │ ParallelRunner() if runner == "ParallelRunner" else SequentialRunner() │
│ 130 │ │ │ ) │
│ ❱ 131 │ │ return super().run(runner=runner, **kwargs) │
│ 132 │
│ 133 │
│ 134 class ProjectContext2(ProjectContext1): │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/context/flexible_context.py:106 in run │
│ │
│ 103 │ │ runner = runner or SequentialRunner() │
│ 104 │ │ if only_missing and _skippable(self.catalog): │
│ 105 │ │ │ return runner.run_only_missing(pipeline, self.catalog) │
│ ❱ 106 │ │ return runner.run(pipeline, self.catalog) │
│ 107 │
│ 108 │
│ 109 def _skippable( │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/runner.py:88 in run │
│ │
│ 85 │ │ │ self._logger.info( │
│ 86 │ │ │ │ "Asynchronous mode is enabled for loading and saving data" │
│ 87 │ │ │ ) │
│ ❱ 88 │ │ self._run(pipeline, catalog, hook_manager, session_id) │
│ 89 │ │ │
│ 90 │ │ self._logger.info("Pipeline execution completed successfully.") │
│ 91 │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/sequential_runner.py:70 in _run │
│ │
│ 67 │ │ │
│ 68 │ │ for exec_index, node in enumerate(nodes): │
│ 69 │ │ │ try: │
│ ❱ 70 │ │ │ │ run_node(node, catalog, hook_manager, self._is_async, session_id) │
│ 71 │ │ │ │ done_nodes.add(node) │
│ 72 │ │ │ except Exception: │
│ 73 │ │ │ │ self._suggest_resume_scenario(pipeline, done_nodes, catalog) │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/runner.py:304 in run_node │
│ │
│ 301 │ if is_async: │
│ 302 │ │ node = _run_node_async(node, catalog, hook_manager, session_id) │
│ 303 │ else: │
│ ❱ 304 │ │ node = _run_node_sequential(node, catalog, hook_manager, session_id) │
│ 305 │ │
│ 306 │ for name in node.confirms: │
│ 307 │ │ catalog.confirm(name) │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/runner.py:398 in _run_node_sequential │
│ │
│ 395 │ ) │
│ 396 │ inputs.update(additional_inputs) │
│ 397 │ │
│ ❱ 398 │ outputs = _call_node_run( │
│ 399 │ │ node, catalog, inputs, is_async, hook_manager, session_id=session_id │
│ 400 │ ) │
│ 401 │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/runner.py:366 in _call_node_run │
│ │
│ 363 │ │ │ is_async=is_async, │
│ 364 │ │ │ session_id=session_id, │
│ 365 │ │ ) │
│ ❱ 366 │ │ raise exc │
│ 367 │ hook_manager.hook.after_node_run( │
│ 368 │ │ node=node, │
│ 369 │ │ catalog=catalog, │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/runner/runner.py:356 in _call_node_run │
│ │
│ 353 ) -> Dict[str, Any]: │
│ 354 │ # pylint: disable=too-many-arguments │
│ 355 │ try: │
│ ❱ 356 │ │ outputs = node.run(inputs) │
│ 357 │ except Exception as exc: │
│ 358 │ │ hook_manager.hook.on_node_error( │
│ 359 │ │ │ error=exc, │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/pipeline/node.py:353 in run │
│ │
│ 350 │ │ # purposely catch all exceptions │
│ 351 │ │ except Exception as exc: │
│ 352 │ │ │ self._logger.error("Node '%s' failed with error: \n%s", str(self), str(exc)) │
│ ❱ 353 │ │ │ raise exc │
│ 354 │ │
│ 355 │ def _run_with_no_inputs(self, inputs: Dict[str, Any]): │
│ 356 │ │ if inputs: │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/pipeline/node.py:344 in run │
│ │
│ 341 │ │ │ elif isinstance(self._inputs, str): │
│ 342 │ │ │ │ outputs = self._run_with_one_input(inputs, self._inputs) │
│ 343 │ │ │ elif isinstance(self._inputs, list): │
│ ❱ 344 │ │ │ │ outputs = self._run_with_list(inputs, self._inputs) │
│ 345 │ │ │ elif isinstance(self._inputs, dict): │
│ 346 │ │ │ │ outputs = self._run_with_dict(inputs, self._inputs) │
│ 347 │
│ │
│ /root/venv/lib/python3.9/site-packages/kedro/pipeline/node.py:384 in _run_with_list │
│ │
│ 381 │ │ │ │ f"{sorted(inputs.keys())}." │
│ 382 │ │ │ ) │
│ 383 │ │ # Ensure the function gets the inputs in the correct order │
│ ❱ 384 │ │ return self._func(*(inputs[item] for item in node_inputs)) │
│ 385 │ │
│ 386 │ def _run_with_dict(self, inputs: Dict[str, Any], node_inputs: Dict[str, str]): │
│ 387 │ │ # Node inputs and provided run inputs should completely overlap │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/nodes/model_for_each.py:234 in │
│ model_for_treated_fit │
│ │
│ 231 │
│ 232 │
│ 233 def model_for_treated_fit(*posargs, **kwargs): │
│ ❱ 234 │ return ModelForTreated().fit(*posargs, **kwargs) │
│ 235 │
│ 236 │
│ 237 def model_for_treated_predict_proba(*posargs, **kwargs): │
│ │
│ /root/venv/lib/python3.9/site-packages/causallift/nodes/model_for_each.py:94 in fit │
│ │
│ 91 │ │ │ else: │
│ 92 │ │ │ │ log.info("## Feature importances not available.") │
│ 93 │ │ │
│ ❱ 94 │ │ y_pred_train = model.predict(X_train) │
│ 95 │ │ │
│ 96 │ │ y_test = None │
│ 97 │ │ y_pred_test = None │
│ │
│ /shared-libs/python3.9/py/lib/python3.9/site-packages/sklearn/model_selection/search.py:500 in │
│ predict │
│ │
│ 497 │ │ │ the best found parameters. │
│ 498 │ │ """ │
│ 499 │ │ check_is_fitted(self) │
│ ❱ 500 │ │ return self.best_estimator
.predict(X) │
│ 501 │ │
│ 502 │ @available_if(_estimator_has("predict_proba")) │
│ 503 │ def predict_proba(self, X): │
│ │
│ /root/venv/lib/python3.9/site-packages/xgboost/sklearn.py:1434 in predict │
│ │
│ 1431 │ │ base_margin: Optional[ArrayLike] = None, │
│ 1432 │ │ iteration_range: Optional[Tuple[int, int]] = None, │
│ 1433 │ ) -> np.ndarray: │
│ ❱ 1434 │ │ class_probs = super().predict( │
│ 1435 │ │ │ X=X, │
│ 1436 │ │ │ output_margin=output_margin, │
│ 1437 │ │ │ ntree_limit=ntree_limit, │
│ │
│ /root/venv/lib/python3.9/site-packages/xgboost/sklearn.py:1049 in predict │
│ │
│ 1046 │ │ iteration_range = self._get_iteration_range(iteration_range) │
│ 1047 │ │ if self._can_use_inplace_predict(): │
│ 1048 │ │ │ try: │
│ ❱ 1049 │ │ │ │ predts = self.get_booster().inplace_predict( │
│ 1050 │ │ │ │ │ data=X, │
│ 1051 │ │ │ │ │ iteration_range=iteration_range, │
│ 1052 │ │ │ │ │ predict_type="margin" if output_margin else "value", │
│ │
│ /root/venv/lib/python3.9/site-packages/xgboost/core.py:2147 in inplace_predict │
│ │
│ 2144 │ │ if isinstance(data, np.ndarray): │
│ 2145 │ │ │ from .data import _ensure_np_dtype │
│ 2146 │ │ │ data, _ = _ensure_np_dtype(data, data.dtype) │
│ ❱ 2147 │ │ │ _check_call( │
│ 2148 │ │ │ │ _LIB.XGBoosterPredictFromDense( │
│ 2149 │ │ │ │ │ self.handle, │
│ 2150 │ │ │ │ │ _array_interface(data), │
│ │
│ /root/venv/lib/python3.9/site-packages/xgboost/core.py:246 in _check_call │
│ │
│ 243 │ │ return value from API calls │
│ 244 │ """ │
│ 245 │ if ret != 0: │
│ ❱ 246 │ │ raise XGBoostError(py_str(_LIB.XGBGetLastError())) │
│ 247 │
│ 248 │
│ 249 def _has_categorical(booster: "Booster", data: DataType) -> bool: │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
XGBoostError: [12:04:08] ../src/c_api/c_api_utils.h:159: Invalid missing value: null
Stack trace:
[bt] (0) /root/venv/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0xbbec9) [0x7f5d31953ec9]
[bt] (1) /root/venv/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0xdeb90) [0x7f5d31976b90]
[bt] (2) /root/venv/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0xe45d8) [0x7f5d3197c5d8]
[bt] (3) /root/venv/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(XGBoosterPredictFromDense+0x330)
[0x7f5d3195c4d0]
[bt] (4) /usr/lib/x86_64-linux-gnu/libffi.so.6(ffi_call_unix64+0x4c) [0x7f5dccad38ee]
[bt] (5) /usr/lib/x86_64-linux-gnu/libffi.so.6(ffi_call+0x22f) [0x7f5dccad32bf]
[bt] (6) /usr/local/lib/python3.9/lib-dynload/_ctypes.cpython-39-x86_64-linux-gnu.so(+0x13111) [0x7f5dccaf1111]
[bt] (7) /usr/local/lib/python3.9/lib-dynload/_ctypes.cpython-39-x86_64-linux-gnu.so(+0x81ed) [0x7f5dccae61ed]
[bt] (8) /usr/local/lib/libpython3.9.so.1.0(_PyObject_MakeTpCall+0x79) [0x7f5dcdd1ced9]

@Minyus
Copy link
Owner

Minyus commented Oct 11, 2022

HI @Peccer ,

Thank you for reported the issue.
Recent versions of xgboost may not work. Could you try older version released 1-2 years ago?

Besides, Python 3.9 may or may not work with CausalLift.
The latest tested version of Python is 3.7.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants