From c267387e56a7840533e534e6abeed3724d2ec004 Mon Sep 17 00:00:00 2001 From: Jeremy Goh Date: Sun, 9 Jul 2023 12:09:15 +0800 Subject: [PATCH 1/6] try to only use NodeToJson directly --- src/io/tree.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/io/tree.cpp b/src/io/tree.cpp index ce45d20cf454..7928a86d9ef1 100644 --- a/src/io/tree.cpp +++ b/src/io/tree.cpp @@ -415,16 +415,16 @@ std::string Tree::ToJSON() const { str_buf << "\"num_leaves\":" << num_leaves_ << "," << '\n'; str_buf << "\"num_cat\":" << num_cat_ << "," << '\n'; str_buf << "\"shrinkage\":" << shrinkage_ << "," << '\n'; - if (num_leaves_ == 1) { - if (is_linear_) { - str_buf << "\"tree_structure\":{" << "\"leaf_value\":" << leaf_value_[0] << ", " << "\n"; - str_buf << LinearModelToJSON(0) << "}" << "\n"; - } else { - str_buf << "\"tree_structure\":{" << "\"leaf_value\":" << leaf_value_[0] << "}" << '\n'; - } - } else { - str_buf << "\"tree_structure\":" << NodeToJSON(0) << '\n'; - } + // if (num_leaves_ == 1) { + // if (is_linear_) { + // str_buf << "\"tree_structure\":{" << "\"leaf_value\":" << leaf_value_[0] << ", " << "\n"; + // str_buf << LinearModelToJSON(0) << "}" << "\n"; + // } else { + // str_buf << "\"tree_structure\":{" << "\"leaf_value\":" << leaf_value_[0] << "}" << '\n'; + // } + // } else { + str_buf << "\"tree_structure\":" << NodeToJSON(0) << '\n'; + // } return str_buf.str(); } From d0f4fef886b5767937f58e903505eb3c5eb5157c Mon Sep 17 00:00:00 2001 From: Jeremy Goh Date: Sun, 9 Jul 2023 12:10:11 +0800 Subject: [PATCH 2/6] add tests for stump --- tests/python_package_test/test_engine.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index e87cea3bfcbb..58e7c52ef675 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -3597,24 +3597,39 @@ def test_reset_params_works_with_metric_num_class_and_boosting(): assert new_bst.params == expected_params -def test_dump_model(): +@pytest.mark.parametrize("stump", [True, False]) +def test_dump_model(stump): X, y = load_breast_cancer(return_X_y=True) + if stump: + # intentionally create a stump (tree with only a root-node) + # using restricted # samples + subidx = random.sample(range(len(y)), 30) + X = X[subidx] + y = y[subidx] + train_data = lgb.Dataset(X, label=y) params = { "objective": "binary", - "verbose": -1 + "verbose": -1, } bst = lgb.train(params, train_data, num_boost_round=5) - dumped_model_str = str(bst.dump_model(5, 0)) + dumped_model = bst.dump_model(5, 0) + if stump: + assert len(dumped_model["tree_structure"]) == 1 + dumped_model_str = str(dumped_model) assert "leaf_features" not in dumped_model_str assert "leaf_coeff" not in dumped_model_str assert "leaf_const" not in dumped_model_str assert "leaf_value" in dumped_model_str assert "leaf_count" in dumped_model_str + params['linear_tree'] = True train_data = lgb.Dataset(X, label=y) bst = lgb.train(params, train_data, num_boost_round=5) - dumped_model_str = str(bst.dump_model(5, 0)) + dumped_model = bst.dump_model(5, 0) + if stump: + assert len(dumped_model["tree_structure"]) == 1 + dumped_model_str = str(dumped_model) assert "leaf_features" in dumped_model_str assert "leaf_coeff" in dumped_model_str assert "leaf_const" in dumped_model_str From 3d94a7929d87b4f5f2890be8ad332d1762ae2f1c Mon Sep 17 00:00:00 2001 From: Jeremy Goh Date: Sun, 9 Jul 2023 15:52:20 +0800 Subject: [PATCH 3/6] add leaf count in ToJSON instead --- src/io/tree.cpp | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/io/tree.cpp b/src/io/tree.cpp index 7928a86d9ef1..3ca6f7bdb5c0 100644 --- a/src/io/tree.cpp +++ b/src/io/tree.cpp @@ -415,16 +415,20 @@ std::string Tree::ToJSON() const { str_buf << "\"num_leaves\":" << num_leaves_ << "," << '\n'; str_buf << "\"num_cat\":" << num_cat_ << "," << '\n'; str_buf << "\"shrinkage\":" << shrinkage_ << "," << '\n'; - // if (num_leaves_ == 1) { - // if (is_linear_) { - // str_buf << "\"tree_structure\":{" << "\"leaf_value\":" << leaf_value_[0] << ", " << "\n"; - // str_buf << LinearModelToJSON(0) << "}" << "\n"; - // } else { - // str_buf << "\"tree_structure\":{" << "\"leaf_value\":" << leaf_value_[0] << "}" << '\n'; - // } - // } else { - str_buf << "\"tree_structure\":" << NodeToJSON(0) << '\n'; - // } + if (num_leaves_ == 1) { + str_buf << "\"tree_structure\":{"; + if (is_linear_) { + str_buf << "\"leaf_value\":" << leaf_value_[0] << ", " << '\n'; + str_buf << "\"leaf_count\":" << leaf_count_[0] << ", " << '\n'; + str_buf << LinearModelToJSON(0); + } else { + str_buf << "\"leaf_value\":" << leaf_value_[0] << ", " << '\n'; + str_buf << "\"leaf_count\":" << leaf_count_[0]; + } + str_buf << "}" << '\n'; + } else { + str_buf << "\"tree_structure\":" << NodeToJSON(0) << '\n'; + } return str_buf.str(); } From b512cbb99205f72283387ceeb027d5e59f05d058 Mon Sep 17 00:00:00 2001 From: Jeremy Goh <30731072+thatlittleboy@users.noreply.github.com> Date: Sat, 15 Jul 2023 18:20:08 +0800 Subject: [PATCH 4/6] ensure that leaf_count_ exists for stumps --- src/io/tree.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/io/tree.cpp b/src/io/tree.cpp index 3ca6f7bdb5c0..fd33335c8260 100644 --- a/src/io/tree.cpp +++ b/src/io/tree.cpp @@ -735,6 +735,12 @@ Tree::Tree(const char* str, size_t* used_len) { is_linear_ = false; } + if (key_vals.count("leaf_count")) { + leaf_count_ = CommonC::StringToArrayFast(key_vals["leaf_count"], num_leaves_); + } else { + leaf_count_.resize(num_leaves_); + } + #ifdef USE_CUDA is_cuda_tree_ = false; #endif // USE_CUDA @@ -797,12 +803,6 @@ Tree::Tree(const char* str, size_t* used_len) { leaf_weight_.resize(num_leaves_); } - if (key_vals.count("leaf_count")) { - leaf_count_ = CommonC::StringToArrayFast(key_vals["leaf_count"], num_leaves_); - } else { - leaf_count_.resize(num_leaves_); - } - if (key_vals.count("decision_type")) { decision_type_ = CommonC::StringToArrayFast(key_vals["decision_type"], num_leaves_ - 1); } else { From 28b5f5f3bdd6cc375127f8690127cb40624680fe Mon Sep 17 00:00:00 2001 From: Jeremy Goh <30731072+thatlittleboy@users.noreply.github.com> Date: Sat, 15 Jul 2023 18:21:11 +0800 Subject: [PATCH 5/6] make stump test its own test --- tests/python_package_test/test_engine.py | 37 ++++++++++++++++-------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 58e7c52ef675..25eeb4614877 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -3597,15 +3597,32 @@ def test_reset_params_works_with_metric_num_class_and_boosting(): assert new_bst.params == expected_params -@pytest.mark.parametrize("stump", [True, False]) -def test_dump_model(stump): +def test_dump_model_stump(): + X, y = load_breast_cancer(return_X_y=True) + # intentionally create a stump (tree with only a root-node) + # using restricted # samples + subidx = random.sample(range(len(y)), 30) + X = X[subidx] + y = y[subidx] + + train_data = lgb.Dataset(X, label=y) + params = { + "objective": "binary", + "verbose": -1, + "n_jobs": 1, + } + bst = lgb.train(params, train_data, num_boost_round=5) + dumped_model = bst.dump_model(5, 0) + print(dumped_model) + assert len(dumped_model["tree_info"]) == 1 + tree_structure = dumped_model["tree_info"][0]["tree_structure"] + assert "leaf_value" in tree_structure + assert "leaf_count" in tree_structure + assert tree_structure["leaf_count"] == 30 + + +def test_dump_model(): X, y = load_breast_cancer(return_X_y=True) - if stump: - # intentionally create a stump (tree with only a root-node) - # using restricted # samples - subidx = random.sample(range(len(y)), 30) - X = X[subidx] - y = y[subidx] train_data = lgb.Dataset(X, label=y) params = { @@ -3614,8 +3631,6 @@ def test_dump_model(stump): } bst = lgb.train(params, train_data, num_boost_round=5) dumped_model = bst.dump_model(5, 0) - if stump: - assert len(dumped_model["tree_structure"]) == 1 dumped_model_str = str(dumped_model) assert "leaf_features" not in dumped_model_str assert "leaf_coeff" not in dumped_model_str @@ -3627,8 +3642,6 @@ def test_dump_model(stump): train_data = lgb.Dataset(X, label=y) bst = lgb.train(params, train_data, num_boost_round=5) dumped_model = bst.dump_model(5, 0) - if stump: - assert len(dumped_model["tree_structure"]) == 1 dumped_model_str = str(dumped_model) assert "leaf_features" in dumped_model_str assert "leaf_coeff" in dumped_model_str From ae6e37e792e1ed4ca1c0d06e2e92b8c6e768d4f4 Mon Sep 17 00:00:00 2001 From: Jeremy Goh <30731072+thatlittleboy@users.noreply.github.com> Date: Sat, 15 Jul 2023 18:25:15 +0800 Subject: [PATCH 6/6] revert the original test --- tests/python_package_test/test_engine.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 25eeb4614877..bf2bd6a8b01d 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -3623,15 +3623,13 @@ def test_dump_model_stump(): def test_dump_model(): X, y = load_breast_cancer(return_X_y=True) - train_data = lgb.Dataset(X, label=y) params = { "objective": "binary", "verbose": -1, } bst = lgb.train(params, train_data, num_boost_round=5) - dumped_model = bst.dump_model(5, 0) - dumped_model_str = str(dumped_model) + dumped_model_str = str(bst.dump_model(5, 0)) assert "leaf_features" not in dumped_model_str assert "leaf_coeff" not in dumped_model_str assert "leaf_const" not in dumped_model_str @@ -3641,8 +3639,7 @@ def test_dump_model(): params['linear_tree'] = True train_data = lgb.Dataset(X, label=y) bst = lgb.train(params, train_data, num_boost_round=5) - dumped_model = bst.dump_model(5, 0) - dumped_model_str = str(dumped_model) + dumped_model_str = str(bst.dump_model(5, 0)) assert "leaf_features" in dumped_model_str assert "leaf_coeff" in dumped_model_str assert "leaf_const" in dumped_model_str