BayesWitnesses · StrikerRUS · Jan 26, 2022 · Jan 26, 2022 · Jan 26, 2022 · Jan 26, 2022
diff --git a/README.md b/README.md
@@ -86,12 +86,11 @@ The output is consistent with the output of the `predict_proba` method of `Decis
 
 Here's a simple example of how a linear model trained in Python environment can be represented in Java code:
 ```python
-from sklearn.datasets import load_boston
+from sklearn.datasets import load_diabetes
 from sklearn import linear_model
 import m2cgen as m2c
 
-boston = load_boston()
-X, y = boston.data, boston.target
+X, y = load_diabetes(return_X_y=True)
 
 estimator = linear_model.LinearRegression()
 estimator.fit(X, y)
@@ -102,9 +101,8 @@ code = m2c.export_to_java(estimator)
 Generated Java code:
 ```java
 public class Model {
-
     public static double score(double[] input) {
-        return (((((((((((((36.45948838508965) + ((input[0]) * (-0.10801135783679647))) + ((input[1]) * (0.04642045836688297))) + ((input[2]) * (0.020558626367073608))) + ((input[3]) * (2.6867338193449406))) + ((input[4]) * (-17.76661122830004))) + ((input[5]) * (3.8098652068092163))) + ((input[6]) * (0.0006922246403454562))) + ((input[7]) * (-1.475566845600257))) + ((input[8]) * (0.30604947898516943))) + ((input[9]) * (-0.012334593916574394))) + ((input[10]) * (-0.9527472317072884))) + ((input[11]) * (0.009311683273794044))) + ((input[12]) * (-0.5247583778554867));
+        return ((((((((((152.1334841628965) + ((input[0]) * (-10.012197817470472))) + ((input[1]) * (-239.81908936565458))) + ((input[2]) * (519.8397867901342))) + ((input[3]) * (324.39042768937657))) + ((input[4]) * (-792.1841616283054))) + ((input[5]) * (476.74583782366153))) + ((input[6]) * (101.04457032134408))) + ((input[7]) * (177.06417623225025))) + ((input[8]) * (751.2793210873945))) + ((input[9]) * (67.62538639104406));
     }
 }
 ```

diff --git a/tests/assemblers/test_boosting_lightgbm.py b/tests/assemblers/test_boosting_lightgbm.py
@@ -64,18 +64,18 @@ def test_regression():
     expected = ast.BinNumExpr(
         ast.IfExpr(
             ast.CompExpr(
-                ast.FeatureRef(12),
-                ast.NumVal(9.725),
+                ast.FeatureRef(8),
+                ast.NumVal(1.0000000180025095e-35),
                 ast.CompOpType.GT),
-            ast.NumVal(22.030283219508686),
-            ast.NumVal(23.27840740210207)),
+            ast.NumVal(156.64462853604854),
+            ast.NumVal(148.40956590509697)),
         ast.IfExpr(
             ast.CompExpr(
-                ast.FeatureRef(5),
-                ast.NumVal(6.8375),
+                ast.FeatureRef(2),
+                ast.NumVal(0.00780560282464346),
                 ast.CompOpType.GT),
-            ast.NumVal(1.2777791671888081),
-            ast.NumVal(-0.2686772850549309)),
+            ast.NumVal(4.996373375352607),
+            ast.NumVal(-3.1063596100284814)),
         ast.BinNumOpType.ADD)
 
     assert utils.cmp_exprs(actual, expected)
@@ -93,18 +93,18 @@ def test_regression_random_forest():
         ast.BinNumExpr(
             ast.IfExpr(
                 ast.CompExpr(
-                    ast.FeatureRef(12),
-                    ast.NumVal(9.605),
+                    ast.FeatureRef(2),
+                    ast.NumVal(0.00780560282464346),
                     ast.CompOpType.GT),
-                ast.NumVal(17.398543657369768),
-                ast.NumVal(29.851408659650296)),
+                ast.NumVal(210.27118647591766),
+                ast.NumVal(120.45454548930705)),
             ast.IfExpr(
                 ast.CompExpr(
-                    ast.FeatureRef(5),
-                    ast.NumVal(6.888),
-                    ast.CompOpType.GT),
-                ast.NumVal(37.2235298136268),
-                ast.NumVal(19.948122884684025)),
+                    ast.FeatureRef(2),
+                    ast.NumVal(-0.007822672246629598),
+                    ast.CompOpType.LTE),
+                ast.NumVal(114.24161077349474),
+                ast.NumVal(194.84868424576604)),
             ast.BinNumOpType.ADD),
         ast.NumVal(0.5),
         ast.BinNumOpType.MUL)
@@ -159,18 +159,18 @@ def test_simple_sigmoid_output_transform():
         ast.BinNumExpr(
             ast.IfExpr(
                 ast.CompExpr(
-                    ast.FeatureRef(12),
-                    ast.NumVal(19.23),
-                    ast.CompOpType.GT),
-                ast.NumVal(4.002437528537838),
-                ast.NumVal(4.090096709787509)),
+                    ast.FeatureRef(8),
+                    ast.NumVal(-0.0028501970360456344),
+                    ast.CompOpType.LTE),
+                ast.NumVal(5.8325360677435345),
+                ast.NumVal(5.891973988308211)),
             ast.IfExpr(
                 ast.CompExpr(
-                    ast.FeatureRef(12),
-                    ast.NumVal(14.895),
-                    ast.CompOpType.GT),
-                ast.NumVal(-0.0417499606641773),
-                ast.NumVal(0.02069953712454655)),
+                    ast.FeatureRef(8),
+                    ast.NumVal(-0.005612778088288765),
+                    ast.CompOpType.LTE),
+                ast.NumVal(-0.027170480653266372),
+                ast.NumVal(0.026423953384869338)),
             ast.BinNumOpType.ADD))
 
     assert utils.cmp_exprs(actual, expected)
@@ -188,18 +188,18 @@ def test_log1p_exp_output_transform():
             ast.BinNumExpr(
                 ast.IfExpr(
                     ast.CompExpr(
-                        ast.FeatureRef(12),
-                        ast.NumVal(19.23),
-                        ast.CompOpType.GT),
-                    ast.NumVal(0.6622623010380544),
-                    ast.NumVal(0.6684065452877841)),
+                        ast.FeatureRef(8),
+                        ast.NumVal(-0.0028501970360456344),
+                        ast.CompOpType.LTE),
+                    ast.NumVal(0.693713164308067),
+                    ast.NumVal(0.694435273176687)),
                 ast.IfExpr(
                     ast.CompExpr(
-                        ast.FeatureRef(12),
-                        ast.NumVal(15.145),
-                        ast.CompOpType.GT),
-                    ast.NumVal(0.1404975120475147),
-                    ast.NumVal(0.14535916856709272)),
+                        ast.FeatureRef(8),
+                        ast.NumVal(-0.005612778088288765),
+                        ast.CompOpType.LTE),
+                    ast.NumVal(0.14830023030115363),
+                    ast.NumVal(0.14902176200722345)),
                 ast.BinNumOpType.ADD)))
 
     assert utils.cmp_exprs(actual, expected)
@@ -216,18 +216,18 @@ def test_maybe_sqr_output_transform():
         ast.BinNumExpr(
             ast.IfExpr(
                 ast.CompExpr(
-                    ast.FeatureRef(12),
-                    ast.NumVal(9.725),
+                    ast.FeatureRef(8),
+                    ast.NumVal(1.0000000180025095e-35),
                     ast.CompOpType.GT),
-                ast.NumVal(4.569350528717041),
-                ast.NumVal(4.663526439666748)),
+                ast.NumVal(12.094032478332519),
+                ast.NumVal(11.671793556213379)),
             ast.IfExpr(
                 ast.CompExpr(
-                    ast.FeatureRef(12),
-                    ast.NumVal(11.655),
-                    ast.CompOpType.GT),
-                ast.NumVal(-0.04462450027465819),
-                ast.NumVal(0.033305134773254384)),
+                    ast.FeatureRef(8),
+                    ast.NumVal(-0.00468258384360457),
+                    ast.CompOpType.LTE),
+                ast.NumVal(-0.18738342285156248),
+                ast.NumVal(0.19059675216674812)),
             ast.BinNumOpType.ADD),
         to_reuse=True)
 
@@ -250,18 +250,18 @@ def test_exp_output_transform():
         ast.BinNumExpr(
             ast.IfExpr(
                 ast.CompExpr(
-                    ast.FeatureRef(12),
-                    ast.NumVal(9.725),
+                    ast.FeatureRef(8),
+                    ast.NumVal(1.0000000180025095e-35),
                     ast.CompOpType.GT),
-                ast.NumVal(3.1043985065105892),
-                ast.NumVal(3.1318783133960197)),
+                ast.NumVal(5.040167360736721),
+                ast.NumVal(5.013324518244505)),
             ast.IfExpr(
                 ast.CompExpr(
-                    ast.FeatureRef(5),
-                    ast.NumVal(6.8375),
+                    ast.FeatureRef(2),
+                    ast.NumVal(0.00780560282464346),
                     ast.CompOpType.GT),
-                ast.NumVal(0.028409619436010138),
-                ast.NumVal(-0.0060740730485278754)),
+                ast.NumVal(0.016475080997255653),
+                ast.NumVal(-0.010346335106608635)),
             ast.BinNumOpType.ADD))
 
     assert utils.cmp_exprs(actual, expected)

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -76,8 +76,8 @@ def test_generate_code(pickled_model):
 
     verify_python_model_is_expected(
         generated_code,
-        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
-        expected_output=-44.40540274041321)
+        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+        expected_output=11089.941259597403)
 
 
 def test_function_name(pickled_model):
@@ -151,5 +151,5 @@ def test_unsupported_args_are_ignored(pickled_model):
 
     verify_python_model_is_expected(
         generated_code,
-        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
-        expected_output=-44.40540274041321)
+        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+        expected_output=11089.941259597403)
diff --git a/tests/utils.py b/tests/utils.py
@@ -68,15 +68,15 @@ def __init__(self, dataset_name, test_fraction):
         self.dataset_name = dataset_name
         self.test_fraction = test_fraction
         additional_test_data = None
-        np.random.seed(seed=7)
-        if dataset_name == "boston":
+        np.random.seed(seed=42)
+        if dataset_name == "diabetes":
             self.name = "train_model_regression"
-            self.X, self.y = datasets.load_boston(return_X_y=True)
-        elif dataset_name == "boston_y_bounded":
+            self.X, self.y = datasets.load_diabetes(return_X_y=True)
+        elif dataset_name == "diabetes_y_bounded":
             self.name = "train_model_regression_bounded"
-            self.X, self.y = datasets.load_boston(return_X_y=True)
+            self.X, self.y = datasets.load_diabetes(return_X_y=True)
             self.y = np.arctan(self.y) / np.pi + 0.5  # (0; 1)
-        elif dataset_name == "diabetes":
+        elif dataset_name == "diabetes_w_missing_values":
             self.name = "train_model_regression_w_missing_values"
             self.X, self.y = datasets.load_diabetes(return_X_y=True)
             additional_test_data = np.array([
@@ -216,7 +216,7 @@ def assert_code_equal(actual, expected):
     assert actual.strip() == expected.strip()
 
 
-get_regression_model_trainer = partial(ModelTrainer.get_instance, "boston")
+get_regression_model_trainer = partial(ModelTrainer.get_instance, "diabetes")
 
 
 get_classification_model_trainer = partial(ModelTrainer.get_instance, "iris")
@@ -234,10 +234,10 @@ def assert_code_equal(actual, expected):
 get_classification_binary_random_data_model_trainer = partial(ModelTrainer.get_instance, "classification_binary_rnd")
 
 
-get_bounded_regression_model_trainer = partial(ModelTrainer.get_instance, "boston_y_bounded")
+get_bounded_regression_model_trainer = partial(ModelTrainer.get_instance, "diabetes_y_bounded")
 
 
-get_regression_w_missing_values_model_trainer = partial(ModelTrainer.get_instance, "diabetes")
+get_regression_w_missing_values_model_trainer = partial(ModelTrainer.get_instance, "diabetes_w_missing_values")
 
 
 get_classification_random_w_missing_values_model_trainer = partial(