From 15259283f8339df539df9c4b09fe680d0692007d Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Wed, 26 Jan 2022 20:33:52 +0300 Subject: [PATCH 1/6] Update README.md --- README.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 3576451a..2297bd9e 100644 --- a/README.md +++ b/README.md @@ -86,12 +86,11 @@ The output is consistent with the output of the `predict_proba` method of `Decis Here's a simple example of how a linear model trained in Python environment can be represented in Java code: ```python -from sklearn.datasets import load_boston +from sklearn.datasets import load_diabetes from sklearn import linear_model import m2cgen as m2c -boston = load_boston() -X, y = boston.data, boston.target +X, y = load_diabetes(return_X_y=True) estimator = linear_model.LinearRegression() estimator.fit(X, y) @@ -102,9 +101,8 @@ code = m2c.export_to_java(estimator) Generated Java code: ```java public class Model { - public static double score(double[] input) { - return (((((((((((((36.45948838508965) + ((input[0]) * (-0.10801135783679647))) + ((input[1]) * (0.04642045836688297))) + ((input[2]) * (0.020558626367073608))) + ((input[3]) * (2.6867338193449406))) + ((input[4]) * (-17.76661122830004))) + ((input[5]) * (3.8098652068092163))) + ((input[6]) * (0.0006922246403454562))) + ((input[7]) * (-1.475566845600257))) + ((input[8]) * (0.30604947898516943))) + ((input[9]) * (-0.012334593916574394))) + ((input[10]) * (-0.9527472317072884))) + ((input[11]) * (0.009311683273794044))) + ((input[12]) * (-0.5247583778554867)); + return ((((((((((152.1334841628965) + ((input[0]) * (-10.012197817470472))) + ((input[1]) * (-239.81908936565458))) + ((input[2]) * (519.8397867901342))) + ((input[3]) * (324.39042768937657))) + ((input[4]) * (-792.1841616283054))) + ((input[5]) * (476.74583782366153))) + ((input[6]) * (101.04457032134408))) + ((input[7]) * (177.06417623225025))) + ((input[8]) * (751.2793210873945))) + ((input[9]) * (67.62538639104406)); } } ``` From d2cd7c9ec9b7542b971ed09204c8cb62d6ec58d6 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Wed, 26 Jan 2022 21:10:32 +0300 Subject: [PATCH 2/6] Update utils.py --- tests/utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index 4c5630d6..e82ff966 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -69,12 +69,12 @@ def __init__(self, dataset_name, test_fraction): self.test_fraction = test_fraction additional_test_data = None np.random.seed(seed=7) - if dataset_name == "boston": + if dataset_name == "diabetes": self.name = "train_model_regression" - self.X, self.y = datasets.load_boston(return_X_y=True) - elif dataset_name == "boston_y_bounded": + self.X, self.y = datasets.load_diabetes(return_X_y=True) + elif dataset_name == "diabetes_y_bounded": self.name = "train_model_regression_bounded" - self.X, self.y = datasets.load_boston(return_X_y=True) + self.X, self.y = datasets.load_diabetes(return_X_y=True) self.y = np.arctan(self.y) / np.pi + 0.5 # (0; 1) elif dataset_name == "diabetes": self.name = "train_model_regression_w_missing_values" @@ -216,7 +216,7 @@ def assert_code_equal(actual, expected): assert actual.strip() == expected.strip() -get_regression_model_trainer = partial(ModelTrainer.get_instance, "boston") +get_regression_model_trainer = partial(ModelTrainer.get_instance, "diabetes") get_classification_model_trainer = partial(ModelTrainer.get_instance, "iris") @@ -234,7 +234,7 @@ def assert_code_equal(actual, expected): get_classification_binary_random_data_model_trainer = partial(ModelTrainer.get_instance, "classification_binary_rnd") -get_bounded_regression_model_trainer = partial(ModelTrainer.get_instance, "boston_y_bounded") +get_bounded_regression_model_trainer = partial(ModelTrainer.get_instance, "diabetes_y_bounded") get_regression_w_missing_values_model_trainer = partial(ModelTrainer.get_instance, "diabetes") From 85525c694a2c9ba8e447ffa2ad277a9c1c411a10 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Wed, 26 Jan 2022 21:13:01 +0300 Subject: [PATCH 3/6] Update utils.py --- tests/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index e82ff966..eea5d062 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -76,7 +76,7 @@ def __init__(self, dataset_name, test_fraction): self.name = "train_model_regression_bounded" self.X, self.y = datasets.load_diabetes(return_X_y=True) self.y = np.arctan(self.y) / np.pi + 0.5 # (0; 1) - elif dataset_name == "diabetes": + elif dataset_name == "diabetes_w_missing_values": self.name = "train_model_regression_w_missing_values" self.X, self.y = datasets.load_diabetes(return_X_y=True) additional_test_data = np.array([ @@ -237,7 +237,7 @@ def assert_code_equal(actual, expected): get_bounded_regression_model_trainer = partial(ModelTrainer.get_instance, "diabetes_y_bounded") -get_regression_w_missing_values_model_trainer = partial(ModelTrainer.get_instance, "diabetes") +get_regression_w_missing_values_model_trainer = partial(ModelTrainer.get_instance, "diabetes_w_missing_values") get_classification_random_w_missing_values_model_trainer = partial( From 3f96e58654739e1c0225eafec21e0582febe1852 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Wed, 26 Jan 2022 22:41:15 +0300 Subject: [PATCH 4/6] Update utils.py --- tests/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils.py b/tests/utils.py index eea5d062..a1b393c3 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -265,7 +265,7 @@ def verify_python_model_is_expected(model_code, input, expected_output): context = {} exec(code, context) - + print(context["result"]) assert np.isclose(context["result"], expected_output) From 106ddf9ac29505e658029b2d51eec6687c1dacc0 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Wed, 26 Jan 2022 23:38:36 +0300 Subject: [PATCH 5/6] update tests --- tests/assemblers/test_boosting_lightgbm.py | 108 ++++++++++----------- tests/test_cli.py | 8 +- tests/utils.py | 2 +- 3 files changed, 59 insertions(+), 59 deletions(-) diff --git a/tests/assemblers/test_boosting_lightgbm.py b/tests/assemblers/test_boosting_lightgbm.py index 743c83c5..09349225 100644 --- a/tests/assemblers/test_boosting_lightgbm.py +++ b/tests/assemblers/test_boosting_lightgbm.py @@ -64,18 +64,18 @@ def test_regression(): expected = ast.BinNumExpr( ast.IfExpr( ast.CompExpr( - ast.FeatureRef(12), - ast.NumVal(9.725), + ast.FeatureRef(8), + ast.NumVal(1.0000000180025095e-35), ast.CompOpType.GT), - ast.NumVal(22.030283219508686), - ast.NumVal(23.27840740210207)), + ast.NumVal(156.64462853604854), + ast.NumVal(148.40956590509697)), ast.IfExpr( ast.CompExpr( - ast.FeatureRef(5), - ast.NumVal(6.8375), + ast.FeatureRef(2), + ast.NumVal(0.00780560282464346), ast.CompOpType.GT), - ast.NumVal(1.2777791671888081), - ast.NumVal(-0.2686772850549309)), + ast.NumVal(4.996373375352607), + ast.NumVal(-3.1063596100284814)), ast.BinNumOpType.ADD) assert utils.cmp_exprs(actual, expected) @@ -93,18 +93,18 @@ def test_regression_random_forest(): ast.BinNumExpr( ast.IfExpr( ast.CompExpr( - ast.FeatureRef(12), - ast.NumVal(9.605), + ast.FeatureRef(2), + ast.NumVal(0.00780560282464346), ast.CompOpType.GT), - ast.NumVal(17.398543657369768), - ast.NumVal(29.851408659650296)), + ast.NumVal(210.27118647591766), + ast.NumVal(120.45454548930705)), ast.IfExpr( ast.CompExpr( - ast.FeatureRef(5), - ast.NumVal(6.888), - ast.CompOpType.GT), - ast.NumVal(37.2235298136268), - ast.NumVal(19.948122884684025)), + ast.FeatureRef(2), + ast.NumVal(-0.007822672246629598), + ast.CompOpType.LTE), + ast.NumVal(114.24161077349474), + ast.NumVal(194.84868424576604)), ast.BinNumOpType.ADD), ast.NumVal(0.5), ast.BinNumOpType.MUL) @@ -159,18 +159,18 @@ def test_simple_sigmoid_output_transform(): ast.BinNumExpr( ast.IfExpr( ast.CompExpr( - ast.FeatureRef(12), - ast.NumVal(19.23), - ast.CompOpType.GT), - ast.NumVal(4.002437528537838), - ast.NumVal(4.090096709787509)), + ast.FeatureRef(8), + ast.NumVal(-0.0028501970360456344), + ast.CompOpType.LTE), + ast.NumVal(5.8325360677435345), + ast.NumVal(5.891973988308211)), ast.IfExpr( ast.CompExpr( - ast.FeatureRef(12), - ast.NumVal(14.895), - ast.CompOpType.GT), - ast.NumVal(-0.0417499606641773), - ast.NumVal(0.02069953712454655)), + ast.FeatureRef(8), + ast.NumVal(-0.005612778088288765), + ast.CompOpType.LTE), + ast.NumVal(-0.027170480653266372), + ast.NumVal(0.026423953384869338)), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected) @@ -188,18 +188,18 @@ def test_log1p_exp_output_transform(): ast.BinNumExpr( ast.IfExpr( ast.CompExpr( - ast.FeatureRef(12), - ast.NumVal(19.23), - ast.CompOpType.GT), - ast.NumVal(0.6622623010380544), - ast.NumVal(0.6684065452877841)), + ast.FeatureRef(8), + ast.NumVal(-0.0028501970360456344), + ast.CompOpType.LTE), + ast.NumVal(0.693713164308067), + ast.NumVal(0.694435273176687)), ast.IfExpr( ast.CompExpr( - ast.FeatureRef(12), - ast.NumVal(15.145), - ast.CompOpType.GT), - ast.NumVal(0.1404975120475147), - ast.NumVal(0.14535916856709272)), + ast.FeatureRef(8), + ast.NumVal(-0.005612778088288765), + ast.CompOpType.LTE), + ast.NumVal(0.14830023030115363), + ast.NumVal(0.14902176200722345)), ast.BinNumOpType.ADD))) assert utils.cmp_exprs(actual, expected) @@ -216,18 +216,18 @@ def test_maybe_sqr_output_transform(): ast.BinNumExpr( ast.IfExpr( ast.CompExpr( - ast.FeatureRef(12), - ast.NumVal(9.725), + ast.FeatureRef(8), + ast.NumVal(1.0000000180025095e-35), ast.CompOpType.GT), - ast.NumVal(4.569350528717041), - ast.NumVal(4.663526439666748)), + ast.NumVal(12.094032478332519), + ast.NumVal(11.671793556213379)), ast.IfExpr( ast.CompExpr( - ast.FeatureRef(12), - ast.NumVal(11.655), - ast.CompOpType.GT), - ast.NumVal(-0.04462450027465819), - ast.NumVal(0.033305134773254384)), + ast.FeatureRef(8), + ast.NumVal(-0.00468258384360457), + ast.CompOpType.LTE), + ast.NumVal(-0.18738342285156248), + ast.NumVal(0.19059675216674812)), ast.BinNumOpType.ADD), to_reuse=True) @@ -250,18 +250,18 @@ def test_exp_output_transform(): ast.BinNumExpr( ast.IfExpr( ast.CompExpr( - ast.FeatureRef(12), - ast.NumVal(9.725), + ast.FeatureRef(8), + ast.NumVal(1.0000000180025095e-35), ast.CompOpType.GT), - ast.NumVal(3.1043985065105892), - ast.NumVal(3.1318783133960197)), + ast.NumVal(5.040167360736721), + ast.NumVal(5.013324518244505)), ast.IfExpr( ast.CompExpr( - ast.FeatureRef(5), - ast.NumVal(6.8375), + ast.FeatureRef(2), + ast.NumVal(0.00780560282464346), ast.CompOpType.GT), - ast.NumVal(0.028409619436010138), - ast.NumVal(-0.0060740730485278754)), + ast.NumVal(0.016475080997255653), + ast.NumVal(-0.010346335106608635)), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected) diff --git a/tests/test_cli.py b/tests/test_cli.py index 7b7681b0..4dc0fc0a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -76,8 +76,8 @@ def test_generate_code(pickled_model): verify_python_model_is_expected( generated_code, - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], - expected_output=-44.40540274041321) + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + expected_output=11089.941259597403) def test_function_name(pickled_model): @@ -151,5 +151,5 @@ def test_unsupported_args_are_ignored(pickled_model): verify_python_model_is_expected( generated_code, - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], - expected_output=-44.40540274041321) + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + expected_output=11089.941259597403) diff --git a/tests/utils.py b/tests/utils.py index a1b393c3..eea5d062 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -265,7 +265,7 @@ def verify_python_model_is_expected(model_code, input, expected_output): context = {} exec(code, context) - print(context["result"]) + assert np.isclose(context["result"], expected_output) From 0828074104a2791105b3f17ee3622457e1183cd8 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sat, 29 Jan 2022 04:23:33 +0300 Subject: [PATCH 6/6] Update utils.py --- tests/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils.py b/tests/utils.py index eea5d062..5774917a 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -68,7 +68,7 @@ def __init__(self, dataset_name, test_fraction): self.dataset_name = dataset_name self.test_fraction = test_fraction additional_test_data = None - np.random.seed(seed=7) + np.random.seed(seed=42) if dataset_name == "diabetes": self.name = "train_model_regression" self.X, self.y = datasets.load_diabetes(return_X_y=True)