Skip to content

Commit

Permalink
add custom json encoder, fixes (#496)
Browse files Browse the repository at this point in the history
  • Loading branch information
pplonski committed Sep 26, 2023
1 parent 90afa45 commit 5775bac
Show file tree
Hide file tree
Showing 40 changed files with 363 additions and 209 deletions.
9 changes: 6 additions & 3 deletions supervised/algorithms/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
from sklearn.base import ClassifierMixin, RegressorMixin
from sklearn.dummy import DummyClassifier, DummyRegressor

from supervised.algorithms.registry import (BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION, AlgorithmsRegistry)
from supervised.algorithms.registry import (
BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION,
AlgorithmsRegistry,
)
from supervised.algorithms.sklearn import SklearnAlgorithm
from supervised.utils.config import LOG_LEVEL

Expand Down
21 changes: 13 additions & 8 deletions supervised/algorithms/catboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,21 @@
from sklearn.base import ClassifierMixin, RegressorMixin

from supervised.algorithms.algorithm import BaseAlgorithm
from supervised.algorithms.registry import (BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION, AlgorithmsRegistry)
from supervised.algorithms.registry import (
BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION,
AlgorithmsRegistry,
)
from supervised.preprocessing.preprocessing_utils import PreprocessingUtils
from supervised.utils.config import LOG_LEVEL
from supervised.utils.metric import (CatBoostEvalMetricAveragePrecision,
CatBoostEvalMetricMSE,
CatBoostEvalMetricPearson,
CatBoostEvalMetricSpearman,
CatBoostEvalMetricUserDefined)
from supervised.utils.metric import (
CatBoostEvalMetricAveragePrecision,
CatBoostEvalMetricMSE,
CatBoostEvalMetricPearson,
CatBoostEvalMetricSpearman,
CatBoostEvalMetricUserDefined,
)

logger = logging.getLogger(__name__)
logger.setLevel(LOG_LEVEL)
Expand Down
9 changes: 6 additions & 3 deletions supervised/algorithms/decision_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor

from supervised.algorithms.algorithm import BaseAlgorithm
from supervised.algorithms.registry import (BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION, AlgorithmsRegistry)
from supervised.algorithms.registry import (
BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION,
AlgorithmsRegistry,
)
from supervised.algorithms.sklearn import SklearnAlgorithm
from supervised.utils.config import LOG_LEVEL

Expand Down
12 changes: 8 additions & 4 deletions supervised/algorithms/extra_trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,16 @@
from sklearn.ensemble import ExtraTreesClassifier, ExtraTreesRegressor

from supervised.algorithms.algorithm import BaseAlgorithm
from supervised.algorithms.registry import (BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION, AlgorithmsRegistry)
from supervised.algorithms.registry import (
BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION,
AlgorithmsRegistry,
)
from supervised.algorithms.sklearn import (
SklearnTreesEnsembleClassifierAlgorithm,
SklearnTreesEnsembleRegressorAlgorithm)
SklearnTreesEnsembleRegressorAlgorithm,
)
from supervised.utils.config import LOG_LEVEL

logger = logging.getLogger(__name__)
Expand Down
3 changes: 1 addition & 2 deletions supervised/algorithms/factory.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging

from supervised.algorithms.registry import (BINARY_CLASSIFICATION,
AlgorithmsRegistry)
from supervised.algorithms.registry import BINARY_CLASSIFICATION, AlgorithmsRegistry

logger = logging.getLogger(__name__)

Expand Down
9 changes: 6 additions & 3 deletions supervised/algorithms/knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor

from supervised.algorithms.algorithm import BaseAlgorithm
from supervised.algorithms.registry import (BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION, AlgorithmsRegistry)
from supervised.algorithms.registry import (
BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION,
AlgorithmsRegistry,
)
from supervised.algorithms.sklearn import SklearnAlgorithm
from supervised.utils.config import LOG_LEVEL

Expand Down
25 changes: 15 additions & 10 deletions supervised/algorithms/lightgbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,22 @@
from sklearn.base import ClassifierMixin, RegressorMixin

from supervised.algorithms.algorithm import BaseAlgorithm
from supervised.algorithms.registry import (BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION, AlgorithmsRegistry)
from supervised.algorithms.registry import (
BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION,
AlgorithmsRegistry,
)
from supervised.utils.config import LOG_LEVEL
from supervised.utils.metric import (lightgbm_eval_metric_accuracy,
lightgbm_eval_metric_average_precision,
lightgbm_eval_metric_f1,
lightgbm_eval_metric_pearson,
lightgbm_eval_metric_r2,
lightgbm_eval_metric_spearman,
lightgbm_eval_metric_user_defined)
from supervised.utils.metric import (
lightgbm_eval_metric_accuracy,
lightgbm_eval_metric_average_precision,
lightgbm_eval_metric_f1,
lightgbm_eval_metric_pearson,
lightgbm_eval_metric_r2,
lightgbm_eval_metric_spearman,
lightgbm_eval_metric_user_defined,
)

logger = logging.getLogger(__name__)
logger.setLevel(LOG_LEVEL)
Expand Down
9 changes: 6 additions & 3 deletions supervised/algorithms/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@
from sklearn.linear_model import LinearRegression, LogisticRegression

from supervised.algorithms.algorithm import BaseAlgorithm
from supervised.algorithms.registry import (BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION, AlgorithmsRegistry)
from supervised.algorithms.registry import (
BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION,
AlgorithmsRegistry,
)
from supervised.algorithms.sklearn import SklearnAlgorithm
from supervised.utils.config import LOG_LEVEL

Expand Down
9 changes: 6 additions & 3 deletions supervised/algorithms/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@
from sklearn.neural_network import MLPClassifier, MLPRegressor

from supervised.algorithms.algorithm import BaseAlgorithm
from supervised.algorithms.registry import (BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION, AlgorithmsRegistry)
from supervised.algorithms.registry import (
BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION,
AlgorithmsRegistry,
)
from supervised.algorithms.sklearn import SklearnAlgorithm
from supervised.utils.config import LOG_LEVEL

Expand Down
12 changes: 8 additions & 4 deletions supervised/algorithms/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,16 @@
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

from supervised.algorithms.algorithm import BaseAlgorithm
from supervised.algorithms.registry import (BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION, AlgorithmsRegistry)
from supervised.algorithms.registry import (
BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION,
AlgorithmsRegistry,
)
from supervised.algorithms.sklearn import (
SklearnTreesEnsembleClassifierAlgorithm,
SklearnTreesEnsembleRegressorAlgorithm)
SklearnTreesEnsembleRegressorAlgorithm,
)
from supervised.utils.config import LOG_LEVEL

logger = logging.getLogger(__name__)
Expand Down
1 change: 1 addition & 0 deletions supervised/algorithms/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def get_eval_metric(algorithm_name, ml_task, automl_eval_metric):
import supervised.algorithms.lightgbm
import supervised.algorithms.linear
import supervised.algorithms.nn

# Import algorithm to be registered
import supervised.algorithms.random_forest
import supervised.algorithms.xgboost
8 changes: 5 additions & 3 deletions supervised/algorithms/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
import pandas as pd

from supervised.algorithms.algorithm import BaseAlgorithm
from supervised.algorithms.registry import (BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION)
from supervised.algorithms.registry import (
BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION,
)
from supervised.utils.config import LOG_LEVEL

logger = logging.getLogger(__name__)
Expand Down
27 changes: 16 additions & 11 deletions supervised/algorithms/xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,23 @@
from sklearn.base import ClassifierMixin, RegressorMixin

from supervised.algorithms.algorithm import BaseAlgorithm
from supervised.algorithms.registry import (BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION, AlgorithmsRegistry)
from supervised.algorithms.registry import (
BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION,
AlgorithmsRegistry,
)
from supervised.utils.config import LOG_LEVEL
from supervised.utils.metric import (xgboost_eval_metric_accuracy,
xgboost_eval_metric_average_precision,
xgboost_eval_metric_f1,
xgboost_eval_metric_mse,
xgboost_eval_metric_pearson,
xgboost_eval_metric_r2,
xgboost_eval_metric_spearman,
xgboost_eval_metric_user_defined)
from supervised.utils.metric import (
xgboost_eval_metric_accuracy,
xgboost_eval_metric_average_precision,
xgboost_eval_metric_f1,
xgboost_eval_metric_mse,
xgboost_eval_metric_pearson,
xgboost_eval_metric_r2,
xgboost_eval_metric_spearman,
xgboost_eval_metric_user_defined,
)

logger = logging.getLogger(__name__)
logger.setLevel(LOG_LEVEL)
Expand Down
6 changes: 4 additions & 2 deletions supervised/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@

from collections.abc import Iterable
from re import U

# libraries for type hints
from typing import List, Optional, Union

import numpy
import pandas
from typing_extensions import \
Literal # typing_extensions is used for using Literal from python 3.7
from typing_extensions import (
Literal,
) # typing_extensions is used for using Literal from python 3.7

from supervised.base_automl import BaseAutoML
from supervised.utils.config import LOG_LEVEL
Expand Down
29 changes: 18 additions & 11 deletions supervised/base_automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,21 @@
from sklearn.utils.validation import check_array
from tabulate import tabulate

from supervised.algorithms.registry import (BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION, AlgorithmsRegistry)
from supervised.algorithms.registry import (
BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION,
AlgorithmsRegistry,
)
from supervised.callbacks.early_stopping import EarlyStopping
from supervised.callbacks.learner_time_constraint import LearnerTimeConstraint
from supervised.callbacks.metric_logger import MetricLogger
from supervised.callbacks.total_time_constraint import TotalTimeConstraint
from supervised.ensemble import Ensemble
from supervised.exceptions import AutoMLException, NotTrainedException
from supervised.model_framework import ModelFramework
from supervised.preprocessing.exclude_missing_target import \
ExcludeRowsMissingTarget
from supervised.preprocessing.exclude_missing_target import ExcludeRowsMissingTarget

# disable EDA
# from supervised.preprocessing.eda import EDA
from supervised.preprocessing.preprocessing_utils import PreprocessingUtils
Expand All @@ -39,12 +42,16 @@
from supervised.utils.automl_plots import AutoMLPlots
from supervised.utils.config import LOG_LEVEL, mem
from supervised.utils.data_validation import (
check_bool, check_greater_than_zero_integer,
check_greater_than_zero_integer_or_float, check_integer,
check_positive_integer)
check_bool,
check_greater_than_zero_integer,
check_greater_than_zero_integer_or_float,
check_integer,
check_positive_integer,
)
from supervised.utils.leaderboard_plots import LeaderboardPlots
from supervised.utils.metric import Metric, UserDefinedEvalMetric
from supervised.utils.utils import dump_data, load_data
from supervised.utils.jsonencoder import MLJSONEncoder

try:
import matplotlib.font_manager as font_manager
Expand Down Expand Up @@ -698,7 +705,7 @@ def _save_data_info(self, X, y, sample_weight=None, sensitive_features=None):
self._data_info["num_class"] = columns_and_target_info["num_class"]
data_info_path = os.path.join(self._results_path, "data_info.json")
with open(data_info_path, "w") as fout:
fout.write(json.dumps(self._data_info, indent=4))
fout.write(json.dumps(self._data_info, indent=4, cls=MLJSONEncoder))

def save_progress(self, step=None, generated_params=None):
if step is not None and generated_params is not None:
Expand All @@ -713,7 +720,7 @@ def save_progress(self, step=None, generated_params=None):

fname = os.path.join(self._results_path, "progress.json")
with open(fname, "w") as fout:
fout.write(json.dumps(state, indent=4))
fout.write(json.dumps(state, indent=4, cls=MLJSONEncoder))

def load_progress(self):
state = {}
Expand Down Expand Up @@ -1367,7 +1374,7 @@ def select_and_save_best(self, show_warnings=False):

if self._stacked_models is not None:
params["stacked"] = [m.get_name() for m in self._stacked_models]
fout.write(json.dumps(params, indent=4))
fout.write(json.dumps(params, indent=4, cls=MLJSONEncoder))

if self._models:
ldb = self.get_leaderboard(original_metric_values=True)
Expand Down
11 changes: 7 additions & 4 deletions supervised/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,17 @@
import pandas as pd

from supervised.algorithms.algorithm import BaseAlgorithm
from supervised.algorithms.registry import (BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION)
from supervised.algorithms.registry import (
BINARY_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
REGRESSION,
)
from supervised.exceptions import NotTrainedException
from supervised.model_framework import ModelFramework
from supervised.utils.additional_metrics import AdditionalMetrics
from supervised.utils.config import LOG_LEVEL
from supervised.utils.metric import Metric
from supervised.utils.jsonencoder import MLJSONEncoder

logger = logging.getLogger(__name__)
logger.setLevel(LOG_LEVEL)
Expand Down Expand Up @@ -527,7 +530,7 @@ def save(self, results_path, model_subpath):

if self._threshold is not None:
desc["threshold"] = self._threshold
fout.write(json.dumps(desc, indent=4))
fout.write(json.dumps(desc, indent=4, cls=MLJSONEncoder))

LearningCurves.plot_for_ensemble(self._scores, self.metric.name, model_path)

Expand Down
Loading

0 comments on commit 5775bac

Please sign in to comment.