-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Loading status checks…
Merge pull request #231 from DashAISoftware/feat/regression
Feat/regression
Showing
33 changed files
with
2,580 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
"""DashAI MAE regression metric implementation.""" | ||
|
||
import numpy as np | ||
from sklearn.metrics import mean_absolute_error | ||
|
||
from DashAI.back.dataloaders.classes.dashai_dataset import DashAIDataset | ||
from DashAI.back.metrics.regression_metric import RegressionMetric, prepare_to_metric | ||
|
||
|
||
class MAE(RegressionMetric): | ||
"""Mean Absolute Error metric for regression tasks.""" | ||
|
||
@staticmethod | ||
def score(true_values: DashAIDataset, predicted_values: np.ndarray) -> float: | ||
"""Calculate the MAE between true values and predicted values. | ||
Parameters | ||
---------- | ||
true_values : DashAIDataset | ||
A DashAI dataset with true values. | ||
predicted_values : np.ndarray | ||
A one-dimensional array with the predicted values | ||
for each instance. | ||
Returns | ||
------- | ||
float | ||
MAE score between true values and predicted values | ||
""" | ||
true_values, pred_values = prepare_to_metric(true_values, predicted_values) | ||
return mean_absolute_error(true_values, pred_values) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
"""DashAI RMSE regression metric implementation.""" | ||
|
||
import numpy as np | ||
from sklearn.metrics import mean_squared_error | ||
|
||
from DashAI.back.dataloaders.classes.dashai_dataset import DashAIDataset | ||
from DashAI.back.metrics.regression_metric import RegressionMetric, prepare_to_metric | ||
|
||
|
||
class RMSE(RegressionMetric): | ||
"""Root Mean Squared Error metric for regression tasks.""" | ||
|
||
@staticmethod | ||
def score(true_values: DashAIDataset, predicted_values: np.ndarray) -> float: | ||
"""Calculate the RMSE between true values and predicted values. | ||
Parameters | ||
---------- | ||
true_values : DashAIDataset | ||
A DashAI dataset with true values. | ||
predicted_values : np.ndarray | ||
A one-dimensional array with the predicted values | ||
for each instance. | ||
Returns | ||
------- | ||
float | ||
RMSE score between true values and predicted values | ||
""" | ||
true_values, pred_values = prepare_to_metric(true_values, predicted_values) | ||
return mean_squared_error(true_values, pred_values, squared=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
from typing import Tuple | ||
|
||
import numpy as np | ||
|
||
from DashAI.back.dataloaders.classes.dashai_dataset import DashAIDataset | ||
from DashAI.back.metrics.base_metric import BaseMetric | ||
|
||
|
||
class RegressionMetric(BaseMetric): | ||
"""Class for metrics associated with regression models.""" | ||
|
||
COMPATIBLE_COMPONENTS = ["RegressionTask"] | ||
|
||
|
||
def validate_inputs(true_values: np.ndarray, pred_values: np.ndarray) -> None: | ||
"""Validate inputs. | ||
Parameters | ||
---------- | ||
true_values : ndarray | ||
True values. | ||
pred_values : ndarray | ||
Predicted values by the model. | ||
""" | ||
if len(true_values) != len(pred_values): | ||
raise ValueError( | ||
"The length of the true and the predicted values must be equal, " | ||
f"given: len(true_values) = {len(true_values)} and " | ||
f"len(pred_values) = {len(pred_values)}." | ||
) | ||
|
||
|
||
def prepare_to_metric( | ||
y: DashAIDataset, predicted_values: np.ndarray | ||
) -> Tuple[np.ndarray, np.ndarray]: | ||
"""Prepare true and predicted values to be used later in metrics. | ||
Parameters | ||
---------- | ||
y : DashAIDataset | ||
A DashAIDataset with the output columns of the data. | ||
predicted_values: np.ndarray | ||
A one-dimensional array with the predicted values for each instance. | ||
Returns | ||
------- | ||
Tuple[np.ndarray, np.ndarray] | ||
A tuple with the true and predicted values in numpy format. | ||
""" | ||
column_name = y.column_names[0] | ||
true_values = np.array(y[column_name]) | ||
validate_inputs(true_values, predicted_values) | ||
return true_values, predicted_values |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
239 changes: 239 additions & 0 deletions
239
DashAI/back/models/parameters/models_schemas/GradientBoostingR.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,239 @@ | ||
{ | ||
"additionalProperties": false, | ||
"error_msg": "The parameters for Gradient Boosting regression must be one or more of ['loss', 'learning_rate', 'n_estimators', 'subsample', 'criterion', 'min_samples_split', 'min_samples_leaf', 'min_weight_fraction_leaf', 'max_depth', 'min_impurity_decrease', 'init', 'random_state', 'max_features', 'alpha', 'verbose', 'max_leaf_nodes', 'warm_start', 'validation_fraction', 'n_iter_no_change', 'tol', 'ccp_alpha'].", | ||
"description": "Gradient Boosting regression builds an additive model in a forward stage-wise fashion; it allows for the optimization of arbitrary differentiable loss functions.", | ||
"properties": { | ||
"loss": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'loss' parameter must be one of 'squared_error', 'absolute_error', 'huber', or 'quantile'.", | ||
"description": "The 'loss' parameter specifies the loss function to be optimized.", | ||
"type": "string", | ||
"default": "squared_error", | ||
"enum": ["squared_error", "absolute_error", "huber", "quantile"] | ||
} | ||
] | ||
}, | ||
"learning_rate": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'learning_rate' parameter must be a positive number.", | ||
"description": "The 'learning_rate' parameter specifies the learning rate shrinks the contribution of each tree.", | ||
"type": "number", | ||
"minimum": 0, | ||
"default": 0.1 | ||
} | ||
] | ||
}, | ||
"n_estimators": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'n_estimators' parameter must be a positive integer.", | ||
"description": "The 'n_estimators' parameter specifies the number of boosting stages to be run.", | ||
"type": "integer", | ||
"minimum": 1, | ||
"default": 100 | ||
} | ||
] | ||
}, | ||
"subsample": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'subsample' parameter must be a number between 0 and 1.", | ||
"description": "The 'subsample' parameter specifies the fraction of samples to be used for fitting the individual base learners.", | ||
"type": "number", | ||
"minimum": 0, | ||
"maximum": 1, | ||
"default": 1.0 | ||
} | ||
] | ||
}, | ||
"criterion": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'criterion' parameter must be one of 'friedman_mse' or 'squared_error'.", | ||
"description": "The 'criterion' parameter specifies the function to measure the quality of a split.", | ||
"type": "string", | ||
"default": "friedman_mse", | ||
"enum": ["friedman_mse", "squared_error"] | ||
} | ||
] | ||
}, | ||
"min_samples_split": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'min_samples_split' parameter must be ...", | ||
"description": "The 'min_samples_split' parameter specifies the minimum number of samples required to split an internal node.", | ||
"type": "number", | ||
"minimum": 2, | ||
"default": 2 | ||
} | ||
] | ||
}, | ||
"min_samples_leaf": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'min_samples_leaf' parameter must be a positive integer.", | ||
"description": "The 'min_samples_leaf' parameter specifies the minimum number of samples required to be at a leaf node.", | ||
"type": "number", | ||
"minimum": 1, | ||
"default": 1 | ||
} | ||
] | ||
}, | ||
"min_weight_fraction_leaf": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'min_weight_fraction_leaf' parameter must be a number between 0 and 0.5.", | ||
"description": "The 'min_weight_fraction_leaf' parameter specifies the minimum weighted fraction of the sum total of weights required to be at a leaf node.", | ||
"type": "number", | ||
"minimum": 0, | ||
"maximum": 0.5, | ||
"default": 0.0 | ||
} | ||
] | ||
}, | ||
"max_depth": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'max_depth' parameter must be an integer greater than or equal to 1, or null.", | ||
"description": "The 'max_depth' parameter specifies the maximum depth of the individual regression estimators.", | ||
"type": ["integer", "null"], | ||
"minimum": 1, | ||
"default": 3 | ||
} | ||
] | ||
}, | ||
"min_impurity_decrease": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'min_impurity_decrease' parameter must be a non-negative number.", | ||
"description": "The 'min_impurity_decrease' parameter specifies a node will be split if this split induces a decrease of the impurity greater than or equal to this value.", | ||
"type": "number", | ||
"minimum": 0, | ||
"default": 0.0 | ||
} | ||
] | ||
}, | ||
"init": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'init' parameter must be a string, estimator object implementing 'fit', 'partial_fit', 'predict', or None.", | ||
"description": "The 'init' parameter specifies the estimator object to use for the initial predictions.", | ||
"type": ["string", "null"], | ||
"default": null, | ||
"enum": ["fit", "partial_fit", "predict"] | ||
} | ||
] | ||
}, | ||
"random_state": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'random_state' parameter must be an integer, a RandomState instance, or None.", | ||
"description": "The 'random_state' parameter controls the random number generator.", | ||
"type": ["integer", "null"], | ||
"default": null | ||
} | ||
] | ||
}, | ||
"max_features": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'max_features' parameter must be an integer, float, string, or None.", | ||
"description": "The 'max_features' parameter specifies the number of features to consider when looking for the best split.", | ||
"type": ["number", "null"], | ||
"default": null | ||
} | ||
] | ||
}, | ||
"alpha": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'alpha' parameter must be a number between 0 and 1.", | ||
"description": "The 'alpha' parameter specifies the quantile loss function.", | ||
"type": "number", | ||
"minimum": 0, | ||
"maximum": 1, | ||
"default": 0.9 | ||
} | ||
] | ||
}, | ||
"verbose": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'verbose' parameter must be an integer.", | ||
"description": "The 'verbose' parameter specifies the verbosity level.", | ||
"type": "integer", | ||
"default": 0 | ||
} | ||
] | ||
}, | ||
"max_leaf_nodes": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'max_leaf_nodes' parameter must be an integer greater than 1, or null.", | ||
"description": "The 'max_leaf_nodes' parameter specifies the maximum number of leaf nodes.", | ||
"type": ["integer", "null"], | ||
"minimum": 2, | ||
"default": null | ||
} | ||
] | ||
}, | ||
"warm_start": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'warm_start' parameter must be of type boolean.", | ||
"description": "The 'warm_start' parameter specifies whether to reuse the solution of the previous call to fit and add more estimators to the ensemble.", | ||
"type": "boolean", | ||
"default": false | ||
} | ||
] | ||
}, | ||
"validation_fraction": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'validation_fraction' parameter must be a number between 0 and 1.", | ||
"description": "The 'validation_fraction' parameter specifies the proportion of training data to set aside as validation set for early stopping.", | ||
"type": "number", | ||
"minimum": 0, | ||
"maximum": 1, | ||
"default": 0.1 | ||
} | ||
] | ||
}, | ||
"n_iter_no_change": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'n_iter_no_change' parameter must be a positive integer, or null.", | ||
"description": "The 'n_iter_no_change' parameter specifies the number of iterations with no improvement to wait before early stopping.", | ||
"type": ["integer", "null"], | ||
"minimum": 1, | ||
"default": null | ||
} | ||
] | ||
}, | ||
"tol": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'tol' parameter must be a positive number.", | ||
"description": "The 'tol' parameter specifies the tolerance for the early stopping.", | ||
"type": "number", | ||
"exclusiveMinimum": 0, | ||
"default": 0.0001 | ||
} | ||
] | ||
}, | ||
"ccp_alpha": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'ccp_alpha' parameter must be a non-negative number.", | ||
"description": "The 'ccp_alpha' parameter specifies the complexity parameter used for Minimal Cost-Complexity Pruning.", | ||
"type": "number", | ||
"minimum": 0, | ||
"default": 0.0 | ||
} | ||
] | ||
} | ||
}, | ||
"type": "object" | ||
} |
49 changes: 49 additions & 0 deletions
49
DashAI/back/models/parameters/models_schemas/LinearRegression.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
{ | ||
"additionalProperties": false, | ||
"error_msg": "The parameters for Linear Regression must be one or more of ['fit_intercept', 'copy_X', 'n_jobs', 'positive'].", | ||
"description": "Linear Regression is a linear approach for modeling the relationship between a dependent variable and one or more independent variables.", | ||
"properties": { | ||
"fit_intercept": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'fit_intercept' parameter must be of type boolean.", | ||
"description": "The 'fit_intercept' parameter determines whether to calculate the intercept for this model. It must be of type boolean.", | ||
"type": "boolean", | ||
"default": true | ||
} | ||
] | ||
}, | ||
"copy_X": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'copy_X' parameter must be of type boolean.", | ||
"description": "The 'copy_X' parameter determines whether to copy the input variables. It must be of type boolean.", | ||
"type": "boolean", | ||
"default": true | ||
} | ||
] | ||
}, | ||
"n_jobs": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'n_jobs' parameter must be an integer or null.", | ||
"description": "The 'n_jobs' parameter specifies the number of jobs to use for computation. None means 1 unless in a joblib.parallel_backend context. -1 means using all processors.", | ||
"type": ["integer", "null"], | ||
"default": null, | ||
"minimum": -1 | ||
} | ||
] | ||
}, | ||
"positive": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'positive' parameter must be of type boolean.", | ||
"description": "The 'positive' parameter determines when set to True, forces the coefficients to be positive. It must be of type boolean.", | ||
"type": "boolean", | ||
"default": false | ||
} | ||
] | ||
} | ||
}, | ||
"type": "object" | ||
} |
115 changes: 115 additions & 0 deletions
115
DashAI/back/models/parameters/models_schemas/LinearSVR.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
{ | ||
"additionalProperties": false, | ||
"error_msg": "The parameters for LinearSVR must be one or more of ['epsilon', 'tol', 'C', 'loss', 'fit_intercept', 'intercept_scaling', 'dual', 'verbose', 'random_state', 'max_iter'].", | ||
"description": "Linear Support Vector Regression (LinearSVR) is a linear model that applies Support Vector Machine regression using a linear kernel.", | ||
"properties": { | ||
"epsilon": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'epsilon' parameter must be a non-negative number.", | ||
"description": "The 'epsilon' parameter specifies the epsilon-tube within which no penalty is associated in the training loss function with points predicted within a distance epsilon from the actual value.", | ||
"type": "number", | ||
"minimum": 0, | ||
"default": 0.0 | ||
} | ||
] | ||
}, | ||
"tol": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'tol' parameter must be a positive number.", | ||
"description": "The 'tol' parameter specifies the tolerance for stopping criterion.", | ||
"type": "number", | ||
"exclusiveMinimum": 0, | ||
"default": 0.0001 | ||
} | ||
] | ||
}, | ||
"C": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'C' parameter must be a positive number.", | ||
"description": "The 'C' parameter specifies the regularization strength. It must be a positive number.", | ||
"type": "number", | ||
"exclusiveMinimum": 0, | ||
"default": 1.0 | ||
} | ||
] | ||
}, | ||
"loss": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'loss' parameter must be one of 'epsilon_insensitive', 'squared_epsilon_insensitive'.", | ||
"description": "The 'loss' parameter specifies the loss function. It must be one of 'epsilon_insensitive' or 'squared_epsilon_insensitive'.", | ||
"type": "string", | ||
"default": "epsilon_insensitive", | ||
"enum": ["epsilon_insensitive", "squared_epsilon_insensitive"] | ||
} | ||
] | ||
}, | ||
"fit_intercept": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'fit_intercept' parameter must be of type boolean.", | ||
"description": "The 'fit_intercept' parameter specifies whether to calculate the intercept for this model.", | ||
"type": "boolean", | ||
"default": true | ||
} | ||
] | ||
}, | ||
"intercept_scaling": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'intercept_scaling' parameter must be a positive number.", | ||
"description": "The 'intercept_scaling' parameter is useful only when the solver 'liblinear' is used and the intercept needs to be scaled.", | ||
"type": "number", | ||
"exclusiveMinimum": 0, | ||
"default": 1.0 | ||
} | ||
] | ||
}, | ||
"dual": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'dual' parameter must be of type boolean.", | ||
"description": "The 'dual' parameter selects the algorithm to either solve the dual or primal optimization problem. Prefer dual=False when n_samples > n_features.", | ||
"type": "boolean", | ||
"default": true | ||
} | ||
] | ||
}, | ||
"verbose": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'verbose' parameter must be of type boolean.", | ||
"description": "The 'verbose' parameter enables verbose output.", | ||
"type": "boolean", | ||
"default": false | ||
} | ||
] | ||
}, | ||
"random_state": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'random_state' parameter must be an integer greater than or equal to 0, or null.", | ||
"description": "The 'random_state' parameter determines the seed used by the random number generator.", | ||
"type": ["integer", "null"], | ||
"default": null, | ||
"minimum": 0 | ||
} | ||
] | ||
}, | ||
"max_iter": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'max_iter' parameter must be a positive integer, or -1 to indicate that there is no iteration limit.", | ||
"description": "The 'max_iter' parameter specifies the maximum number of iterations to run.", | ||
"type": "integer", | ||
"default": 1000, | ||
"minimum": 1 | ||
} | ||
] | ||
} | ||
}, | ||
"type": "object" | ||
} |
260 changes: 260 additions & 0 deletions
260
DashAI/back/models/parameters/models_schemas/MLPRegression.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,260 @@ | ||
{ | ||
"additionalProperties": false, | ||
"error_msg": "The parameters for MLP regression must be one or more of ['hidden_layer_sizes', 'activation', 'solver', 'alpha', 'batch_size', 'learning_rate', 'learning_rate_init', 'power_t', 'max_iter', 'shuffle', 'random_state', 'tol', 'verbose', 'warm_start', 'momentum', 'nesterovs_momentum', 'early_stopping', 'validation_fraction', 'beta_1', 'beta_2', 'epsilon', 'n_iter_no_change', 'max_fun'].", | ||
"description": "MLP regression is a model that uses multi-layer perceptron to predict continuous values.", | ||
"properties": { | ||
"activation": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'activation' parameter must be one of 'identity', 'logistic', 'tanh', or 'relu'.", | ||
"description": "The 'activation' parameter specifies the activation function for the hidden layer.", | ||
"type": "string", | ||
"default": "relu", | ||
"enum": ["identity", "logistic", "tanh", "relu"] | ||
} | ||
] | ||
}, | ||
"solver": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'solver' parameter must be one of 'lbfgs', 'sgd', or 'adam'.", | ||
"description": "The 'solver' parameter specifies the solver for weight optimization.", | ||
"type": "string", | ||
"default": "adam", | ||
"enum": ["lbfgs", "sgd", "adam"] | ||
} | ||
] | ||
}, | ||
"alpha": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'alpha' parameter must be a positive number.", | ||
"description": "The 'alpha' parameter specifies the L2 penalty (regularization term) parameter.", | ||
"type": "number", | ||
"exclusiveMinimum": 0, | ||
"default": 0.0001 | ||
} | ||
] | ||
}, | ||
"batch_size": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'batch_size' parameter must be an integer.", | ||
"description": "The 'batch_size' parameter specifies the size of minibatches for stochastic optimizers.", | ||
"type": "integer", | ||
"default": null | ||
}, | ||
{ | ||
"error_msg": "The 'batch_size' parameter must be 'auto'.", | ||
"description": "The 'batch_size' parameter specifies the size of minibatches for stochastic optimizers.", | ||
"type": "string", | ||
"enum": ["auto"], | ||
"default": "auto" | ||
} | ||
] | ||
}, | ||
"learning_rate": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'learning_rate' parameter must be one of 'constant', 'invscaling', or 'adaptive'.", | ||
"description": "The 'learning_rate' parameter specifies the learning rate schedule for weight updates.", | ||
"type": "string", | ||
"default": "constant", | ||
"enum": ["constant", "invscaling", "adaptive"] | ||
} | ||
] | ||
}, | ||
"learning_rate_init": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'learning_rate_init' parameter must be a positive number.", | ||
"description": "The 'learning_rate_init' parameter specifies the initial learning rate used.", | ||
"type": "number", | ||
"exclusiveMinimum": 0, | ||
"default": 0.001 | ||
} | ||
] | ||
}, | ||
"power_t": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'power_t' parameter must be a positive number.", | ||
"description": "The 'power_t' parameter specifies the exponent for inverse scaling learning rate.", | ||
"type": "number", | ||
"exclusiveMinimum": 0, | ||
"default": 0.5 | ||
} | ||
] | ||
}, | ||
"max_iter": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'max_iter' parameter must be a positive integer.", | ||
"description": "The 'max_iter' parameter specifies the maximum number of iterations.", | ||
"type": "integer", | ||
"minimum": 1, | ||
"default": 200 | ||
} | ||
] | ||
}, | ||
"shuffle": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'shuffle' parameter must be of type boolean.", | ||
"description": "The 'shuffle' parameter specifies whether to shuffle samples in each iteration.", | ||
"type": "boolean", | ||
"default": true | ||
} | ||
] | ||
}, | ||
"random_state": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'random_state' parameter must be an integer.", | ||
"description": "The 'random_state' parameter controls the random number generator.", | ||
"type": "integer", | ||
"default": null | ||
}, | ||
{ | ||
"error_msg": "The 'random_state' parameter must be null.", | ||
"description": "The 'random_state' parameter controls the random number generator.", | ||
"type": "null", | ||
"default": null | ||
} | ||
] | ||
}, | ||
"tol": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'tol' parameter must be a positive number.", | ||
"description": "The 'tol' parameter specifies the tolerance for the optimization.", | ||
"type": "number", | ||
"exclusiveMinimum": 0, | ||
"default": 0.0001 | ||
} | ||
] | ||
}, | ||
"verbose": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'verbose' parameter must be of type boolean.", | ||
"description": "The 'verbose' parameter specifies whether to print progress messages to stdout.", | ||
"type": "boolean", | ||
"default": false | ||
} | ||
] | ||
}, | ||
"warm_start": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'warm_start' parameter must be of type boolean.", | ||
"description": "The 'warm_start' parameter specifies whether to reuse the solution of the previous call to fit and add more estimators to the ensemble.", | ||
"type": "boolean", | ||
"default": false | ||
} | ||
] | ||
}, | ||
"momentum": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'momentum' parameter must be a number between 0 and 1.", | ||
"description": "The 'momentum' parameter specifies the momentum for gradient descent update.", | ||
"type": "number", | ||
"minimum": 0, | ||
"maximum": 1, | ||
"default": 0.9 | ||
} | ||
] | ||
}, | ||
"nesterovs_momentum": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'nesterovs_momentum' parameter must be of type boolean.", | ||
"description": "The 'nesterovs_momentum' parameter specifies whether to use Nesterov's momentum.", | ||
"type": "boolean", | ||
"default": true | ||
} | ||
] | ||
}, | ||
"early_stopping": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'early_stopping' parameter must be of type boolean.", | ||
"description": "The 'early_stopping' parameter specifies whether to use early stopping to terminate training when validation score is not improving.", | ||
"type": "boolean", | ||
"default": false | ||
} | ||
] | ||
}, | ||
"validation_fraction": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'validation_fraction' parameter must be a number between 0 and 1.", | ||
"description": "The 'validation_fraction' parameter specifies the proportion of training data to set aside as validation set for early stopping.", | ||
"type": "number", | ||
"minimum": 0, | ||
"maximum": 1, | ||
"default": 0.1 | ||
} | ||
] | ||
}, | ||
"beta_1": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'beta_1' parameter must be a number between 0 and 1.", | ||
"description": "The 'beta_1' parameter specifies the exponential decay rate for estimates of first moment vector in Adam, should be in [0, 1).", | ||
"type": "number", | ||
"minimum": 0, | ||
"maximum": 1, | ||
"default": 0.9 | ||
} | ||
] | ||
}, | ||
"beta_2": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'beta_2' parameter must be a number between 0 and 1.", | ||
"description": "The 'beta_2' parameter specifies the exponential decay rate for estimates of second moment vector in Adam, should be in [0, 1).", | ||
"type": "number", | ||
"minimum": 0, | ||
"maximum": 1, | ||
"default": 0.999 | ||
} | ||
] | ||
}, | ||
"epsilon": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'epsilon' parameter must be a positive number.", | ||
"description": "The 'epsilon' parameter specifies the value for numerical stability in Adam.", | ||
"type": "number", | ||
"exclusiveMinimum": 0, | ||
"default": 1e-8 | ||
} | ||
] | ||
}, | ||
"n_iter_no_change": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'n_iter_no_change' parameter must be a positive integer.", | ||
"description": "The 'n_iter_no_change' parameter specifies the number of iterations with no improvement to wait before stopping.", | ||
"type": "integer", | ||
"minimum": 1, | ||
"default": 10 | ||
} | ||
] | ||
}, | ||
"max_fun": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'max_fun' parameter must be a positive integer.", | ||
"description": "The 'max_fun' parameter specifies the maximum number of function evaluations.", | ||
"type": "integer", | ||
"minimum": 1, | ||
"default": 15000 | ||
} | ||
] | ||
} | ||
}, | ||
"type": "object" | ||
} |
189 changes: 189 additions & 0 deletions
189
DashAI/back/models/parameters/models_schemas/RandomForestRegression.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
{ | ||
"additionalProperties": false, | ||
"error_msg": "The parameters for Random Forest regression must be one or more of ['n_estimators', 'criterion', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'min_weight_fraction_leaf', 'max_features', 'max_leaf_nodes', 'min_impurity_decrease', 'bootstrap', 'oob_score', 'n_jobs', 'random_state', 'verbose', 'warm_start', 'ccp_alpha', 'max_samples', 'monotonic_cst'].", | ||
"description": "Random Forest regression is an ensemble learning method that fits multiple decision trees and averages their predictions.", | ||
"properties": { | ||
"n_estimators": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'n_estimators' parameter must be a positive integer greater than or equal to 1.", | ||
"description": "The 'n_estimators' parameter specifies the number of trees in the forest. It must be a positive integer greater than or equal to 1.", | ||
"type": "integer", | ||
"default": 100, | ||
"minimum": 1 | ||
} | ||
] | ||
}, | ||
"criterion": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'criterion' parameter must be one of 'squared_error', 'absolute_error', 'friedman_mse' or 'poisson'.", | ||
"description": "The 'criterion' parameter specifies the function to measure the quality of a split.", | ||
"type": "string", | ||
"default": "squared_error", | ||
"enum": ["squared_error", "absolute_error", "friedman_mse", "poisson"] | ||
} | ||
] | ||
}, | ||
"max_depth": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'max_depth' parameter must be an integer greater than or equal to 1, or null.", | ||
"description": "The 'max_depth' parameter corresponds to the maximum depth of the tree. It must be an integer greater than or equal to 1, or null.", | ||
"type": ["integer", "null"], | ||
"default": null, | ||
"minimum": 1 | ||
} | ||
] | ||
}, | ||
"min_samples_split": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'min_samples_split' parameter must be a number greater than or equal to 2.", | ||
"description": "The 'min_samples_split' parameter is the minimum number of samples required to split an internal node. It must be a number greater than or equal to 2.", | ||
"type": "integer", | ||
"default": 2, | ||
"minimum": 2 | ||
} | ||
] | ||
}, | ||
"min_samples_leaf": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'min_samples_leaf' parameter must be a number greater than or equal to 1.", | ||
"description": "The 'min_samples_leaf' parameter is the minimum number of samples required to be at a leaf node. It must be a number greater than or equal to 1.", | ||
"type": "integer", | ||
"default": 1, | ||
"minimum": 1 | ||
} | ||
] | ||
}, | ||
"min_weight_fraction_leaf": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'min_weight_fraction_leaf' parameter must be a number between 0 and 0.5.", | ||
"description": "The 'min_weight_fraction_leaf' parameter specifies the minimum weighted fraction of the sum total of weights required to be at a leaf node. It must be a number between 0 and 0.5.", | ||
"type": "number", | ||
"minimum": 0, | ||
"maximum": 0.5, | ||
"default": 0.0 | ||
} | ||
] | ||
}, | ||
"max_features": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'max_features' parameter must be an integer or a float.", | ||
"description": "The 'max_features' parameter specifies the number of features to consider when looking for the best split.", | ||
"type": "integer", | ||
"default": 1.0 | ||
} | ||
] | ||
}, | ||
"max_leaf_nodes": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'max_leaf_nodes' parameter must be an integer greater than 2, or null.", | ||
"description": "The 'max_leaf_nodes' parameter parameter specifies the maximum number of leaf nodes. It must be an integer greater than 2, or null.", | ||
"type": ["integer", "null"], | ||
"default": null, | ||
"minimum": 2 | ||
} | ||
] | ||
}, | ||
"min_impurity_decrease": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'min_impurity_decrease' parameter must be a positive number.", | ||
"description": "The 'min_impurity_decrease' parameter specifies a node will be split if this split induces a decrease of the impurity greater than or equal to this value. It must be a positive number.", | ||
"type": "number", | ||
"minimum": 0, | ||
"default": 0.0 | ||
} | ||
] | ||
}, | ||
"bootstrap": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'bootstrap' parameter must be of type boolean.", | ||
"description": "The 'bootstrap' parameter specifies whether bootstrap samples are used when building trees.", | ||
"type": "boolean", | ||
"default": true | ||
} | ||
] | ||
}, | ||
"oob_score": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'oob_score' parameter must be of type boolean.", | ||
"description": "The 'oob_score' parameter specifies whether to use out-of-bag samples to estimate the generalization score.", | ||
"type": "boolean", | ||
"default": false | ||
} | ||
] | ||
}, | ||
"n_jobs": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'n_jobs' parameter must be an integer or null.", | ||
"description": "The 'n_jobs' parameter specifies the number of jobs to run in parallel. None means 1, -1 means using all processors.", | ||
"type": ["integer", "null"], | ||
"default": null | ||
} | ||
] | ||
}, | ||
"random_state": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'random_state' parameter must be an integer greater than or equal to 0, a RandomState instance, or null.", | ||
"description": "The 'random_state' parameter controls the random number generator. It must be an integer greater than or equal to 0, a RandomState instance, or null.", | ||
"type": ["integer", "null"], | ||
"default": null, | ||
"minimum": 0 | ||
} | ||
] | ||
}, | ||
"verbose": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'verbose' parameter must be an integer.", | ||
"description": "The 'verbose' parameter specifies the verbosity level. It must be an integer.", | ||
"type": "integer", | ||
"default": 0 | ||
} | ||
] | ||
}, | ||
"warm_start": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'warm_start' parameter must be of type boolean.", | ||
"description": "The 'warm_start' parameter specifies whether to reuse the solution of the previous call to fit and add more estimators to the ensemble.", | ||
"type": "boolean", | ||
"default": false | ||
} | ||
] | ||
}, | ||
"ccp_alpha": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'ccp_alpha' parameter must be a non-negative number.", | ||
"description": "The 'ccp_alpha' parameter specifies the complexity parameter used for Minimal Cost-Complexity Pruning. It must be a non-negative number.", | ||
"type": "number", | ||
"minimum": 0, | ||
"default": 0.0 | ||
} | ||
] | ||
}, | ||
"max_samples": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'max_samples' parameter must be a positive integer or float, or null.", | ||
"description": "The 'max_samples' parameter specifies the number of samples to draw from X to train each base estimator. It must be a positive integer or float, or null.", | ||
"type": ["number", "null"], | ||
"default": null | ||
} | ||
] | ||
} | ||
}, | ||
"type": "object" | ||
} |
93 changes: 93 additions & 0 deletions
93
DashAI/back/models/parameters/models_schemas/RidgeRegression.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
{ | ||
"additionalProperties": false, | ||
"error_msg": "The parameters for Ridge regression must be one or more of ['alpha', 'fit_intercept', 'normalize', 'copy_X', 'max_iter', 'tol', 'solver', 'random_state'].", | ||
"description": "Ridge regression is a linear model that includes L2 regularization, which can help mitigate issues of multicollinearity in linear regression.", | ||
"properties": { | ||
"alpha": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'alpha' parameter must be a positive number.", | ||
"description": "The 'alpha' parameter specifies the regularization strength. It must be a positive number.", | ||
"type": "number", | ||
"exclusiveMinimum": 0, | ||
"default": 1.0 | ||
} | ||
] | ||
}, | ||
"fit_intercept": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'fit_intercept' parameter must be of type boolean.", | ||
"description": "The 'fit_intercept' parameter determines whether to calculate the intercept for this model. It must be of type boolean.", | ||
"type": "boolean", | ||
"default": true | ||
} | ||
] | ||
}, | ||
"copy_X": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'copy_X' parameter must be of type boolean.", | ||
"description": "The 'copy_X' parameter determines whether to copy the input variables. It must be of type boolean.", | ||
"type": "boolean", | ||
"default": true | ||
} | ||
] | ||
}, | ||
"max_iter": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'max_iter' parameter must be a positive integer, or -1 to indicate that there is no iteration limit.", | ||
"description": "The 'max_iter' parameter determines the maximum number of iterations for the solver. It must be a positive integer or -1 to indicate no limit.", | ||
"type": "integer", | ||
"default": null, | ||
"minimum": 1 | ||
} | ||
] | ||
}, | ||
"tol": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'tol' parameter must be a positive number.", | ||
"description": "The 'tol' parameter determines the tolerance for the optimization. It must be a positive number.", | ||
"type": "number", | ||
"exclusiveMinimum": 0, | ||
"default": 0.001 | ||
} | ||
] | ||
}, | ||
"solver": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'solver' parameter must be one of 'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga', or 'lbfgs'.", | ||
"description": "The 'solver' parameter determines the solver to use in the computational routines. It must be one of 'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga', or 'lbfgs'.", | ||
"type": "string", | ||
"default": "auto", | ||
"enum": ["auto", "svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga", "lbfgs"] | ||
} | ||
] | ||
}, | ||
"positive": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'positive' parameter must be of type boolean.", | ||
"description": "The 'positive' parameter determines when set to True, forces the coefficients to be positive. It must be of type boolean.", | ||
"type": "boolean", | ||
"default": false | ||
} | ||
] | ||
}, | ||
"random_state": { | ||
"oneOf": [ | ||
{ | ||
"error_msg": "The 'random_state' parameter must be an integer greater than or equal to 0, or null.", | ||
"description": "The 'random_state' parameter determines the seed used by the random number generator. It must be an integer greater than or equal to 0, or null.", | ||
"type": ["integer", "null"], | ||
"default": null, | ||
"minimum": 0 | ||
} | ||
] | ||
} | ||
}, | ||
"type": "object" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from DashAI.back.models.base_model import BaseModel | ||
|
||
|
||
class RegressionModel(BaseModel): | ||
"""Class for models associated to RegressionTask.""" | ||
|
||
COMPATIBLE_COMPONENTS = ["RegressionTask"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
222 changes: 222 additions & 0 deletions
222
DashAI/back/models/scikit_learn/gradient_boosting_regression.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,222 @@ | ||
from sklearn.ensemble import GradientBoostingRegressor as _GBRegressor | ||
|
||
from DashAI.back.core.schema_fields import ( | ||
BaseSchema, | ||
bool_field, | ||
enum_field, | ||
none_type, | ||
optimizer_float_field, | ||
optimizer_int_field, | ||
schema_field, | ||
union_type, | ||
) | ||
from DashAI.back.models.regression_model import RegressionModel | ||
from DashAI.back.models.scikit_learn.sklearn_like_regressor import ( | ||
SklearnLikeRegressor, | ||
) | ||
|
||
|
||
class GradientBoostingRSchema(BaseSchema): | ||
"""Gradient Boosting for regression.""" | ||
|
||
loss: schema_field( | ||
enum_field(enum=["squared_error", "absolute_error", "huber", "quantile"]), | ||
placeholder="squared_error", | ||
description="Loss function to be optimized.", | ||
) # type: ignore | ||
|
||
learning_rate: schema_field( | ||
optimizer_float_field(gt=0.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.1, | ||
"lower_bound": 0.01, | ||
"upper_bound": 1.0, | ||
}, | ||
description="Learning rate shrinks the contribution of each tree.", | ||
) # type: ignore | ||
|
||
n_estimators: schema_field( | ||
optimizer_int_field(ge=1), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 100, | ||
"lower_bound": 10, | ||
"upper_bound": 1000, | ||
}, | ||
description="The number of boosting stages to be run.", | ||
) # type: ignore | ||
|
||
subsample: schema_field( | ||
optimizer_float_field(gt=0.0, le=1.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 1.0, | ||
"lower_bound": 0.1, | ||
"upper_bound": 1.0, | ||
}, | ||
description="The fraction of samples to be used for fitting the " | ||
"individual base learners.", | ||
) # type: ignore | ||
|
||
criterion: schema_field( | ||
enum_field(enum=["friedman_mse", "mse", "mae"]), | ||
placeholder="friedman_mse", | ||
description="The function to measure the quality of a split.", | ||
) # type: ignore | ||
|
||
min_samples_split: schema_field( | ||
optimizer_float_field(gt=0.0, le=1.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 2, | ||
"lower_bound": 2, | ||
"upper_bound": 20, | ||
}, | ||
description="The minimum number of samples required to split " | ||
"an internal node.", | ||
) # type: ignore | ||
|
||
min_samples_leaf: schema_field( | ||
optimizer_float_field(gt=0.0, le=0.5), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 1, | ||
"lower_bound": 1, | ||
"upper_bound": 20, | ||
}, | ||
description="The minimum number of samples required to be at a leaf node.", | ||
) # type: ignore | ||
|
||
min_weight_fraction_leaf: schema_field( | ||
optimizer_float_field(ge=0.0, le=0.5), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.0, | ||
"lower_bound": 0.0, | ||
"upper_bound": 0.5, | ||
}, | ||
description="The minimum weighted fraction of the sum total of weights" | ||
" (of all the input samples) required to be at a leaf node.", | ||
) # type: ignore | ||
|
||
max_depth: schema_field( | ||
union_type(optimizer_int_field(ge=1), none_type(int)), | ||
placeholder=3, | ||
description="The maximum depth of the individual regression estimators.", | ||
) # type: ignore | ||
|
||
min_impurity_decrease: schema_field( | ||
optimizer_float_field(ge=0.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.0, | ||
"lower_bound": 0.0, | ||
"upper_bound": 0.5, | ||
}, | ||
description="A node will be split if this split induces a decrease of " | ||
"the impurity greater than or equal to this value.", | ||
) # type: ignore | ||
|
||
random_state: schema_field( | ||
union_type(optimizer_int_field(ge=0), none_type(int)), | ||
placeholder=None, | ||
description="The seed of the pseudo-random number generator to use" | ||
" when shuffling the data.", | ||
) # type: ignore | ||
|
||
max_features: schema_field( | ||
union_type( | ||
optimizer_float_field(gt=0.0, le=1.0), | ||
enum_field(enum=["sqrt", "log2", None]), | ||
), | ||
placeholder=None, | ||
description="The number of features to consider when looking for " | ||
"the best split.", | ||
) # type: ignore | ||
|
||
alpha: schema_field( | ||
optimizer_float_field(gt=0.0, le=1.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.9, | ||
"lower_bound": 0.1, | ||
"upper_bound": 1.0, | ||
}, | ||
description="The alpha-quantile of the Huber loss function and the" | ||
" quantile loss function.", | ||
) # type: ignore | ||
|
||
verbose: schema_field( | ||
optimizer_int_field(ge=0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0, | ||
"lower_bound": 0, | ||
"upper_bound": 100, | ||
}, | ||
description="Enable verbose output.", | ||
) # type: ignore | ||
|
||
max_leaf_nodes: schema_field( | ||
union_type(optimizer_int_field(ge=1), none_type(int)), | ||
placeholder=None, | ||
description="Grow trees with max_leaf_nodes in best-first fashion.", | ||
) # type: ignore | ||
|
||
warm_start: schema_field( | ||
bool_field, | ||
placeholder=False, | ||
description="When set to True, reuse the solution of the previous call" | ||
"to fit and add more estimators to the ensemble.", | ||
) # type: ignore | ||
|
||
validation_fraction: schema_field( | ||
optimizer_float_field(gt=0.0, le=1.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.1, | ||
"lower_bound": 0.1, | ||
"upper_bound": 0.5, | ||
}, | ||
description="The proportion of training data to set aside as " | ||
"validation set for early stopping.", | ||
) # type: ignore | ||
|
||
n_iter_no_change: schema_field( | ||
union_type(optimizer_int_field(ge=1), none_type(int)), | ||
placeholder=None, | ||
description="The number of iterations with no improvement to wait " | ||
"before stopping the training.", | ||
) # type: ignore | ||
|
||
tol: schema_field( | ||
optimizer_float_field(ge=0.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.0001, | ||
"lower_bound": 1e-5, | ||
"upper_bound": 1e-1, | ||
}, | ||
description="Tolerance for the early stopping.", | ||
) # type: ignore | ||
|
||
ccp_alpha: schema_field( | ||
optimizer_float_field(ge=0.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.0, | ||
"lower_bound": 0.0, | ||
"upper_bound": 1.0, | ||
}, | ||
description="Complexity parameter used for Minimal Cost-Complexity Pruning.", | ||
) # type: ignore | ||
|
||
|
||
class GradientBoostingR(RegressionModel, SklearnLikeRegressor, _GBRegressor): | ||
"""Scikit-learn's Ridge Regression wrapper for DashAI.""" | ||
|
||
SCHEMA = GradientBoostingRSchema | ||
|
||
def __init__(self, **kwargs) -> None: | ||
super().__init__(**kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
from sklearn.svm import LinearSVR as _LinearSVR | ||
|
||
from DashAI.back.core.schema_fields import ( | ||
BaseSchema, | ||
bool_field, | ||
enum_field, | ||
none_type, | ||
optimizer_float_field, | ||
optimizer_int_field, | ||
schema_field, | ||
union_type, | ||
) | ||
from DashAI.back.models.regression_model import RegressionModel | ||
from DashAI.back.models.scikit_learn.sklearn_like_regressor import ( | ||
SklearnLikeRegressor, | ||
) | ||
|
||
|
||
class LinearSVRSchema(BaseSchema): | ||
"""Support Vector Regression (SVR) using a linear kernel.""" | ||
|
||
epsilon: schema_field( | ||
optimizer_float_field(ge=0.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.0, | ||
"lower_bound": 0.0, | ||
"upper_bound": 1, | ||
}, | ||
description="Epsilon parameter that specifies the epsilon-tube within " | ||
"which no penalty is associated.", | ||
) # type: ignore | ||
|
||
tol: schema_field( | ||
optimizer_float_field(ge=0.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.0001, | ||
"lower_bound": 1e-5, | ||
"upper_bound": 1e-1, | ||
}, | ||
description="Tolerance for stopping criterion.", | ||
) # type: ignore | ||
|
||
C: schema_field( | ||
optimizer_float_field(gt=0.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 1.0, | ||
"lower_bound": 0.1, | ||
"upper_bound": 10, | ||
}, | ||
description="Regularization parameter. The strength of the regularization " | ||
"is inversely proportional to C.", | ||
) # type: ignore | ||
|
||
loss: schema_field( | ||
enum_field(enum=["epsilon_insensitive", "squared_epsilon_insensitive"]), | ||
placeholder="epsilon_insensitive", | ||
description="Specifies the loss function. 'epsilon_insensitive' is " | ||
"the standard SVR loss.", | ||
) # type: ignore | ||
|
||
fit_intercept: schema_field( | ||
bool_field, | ||
placeholder=True, | ||
description="Whether to calculate the intercept for this model.", | ||
) # type: ignore | ||
|
||
intercept_scaling: schema_field( | ||
optimizer_float_field(gt=0.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 1.0, | ||
"lower_bound": 0.1, | ||
"upper_bound": 10, | ||
}, | ||
description="When fit_intercept is True, instance vector x becomes " | ||
"[x, self.intercept_scaling] in the primal problem.", | ||
) # type: ignore | ||
|
||
dual: schema_field( | ||
bool_field, | ||
placeholder=True, | ||
description="Select the algorithm to either solve the dual or primal" | ||
" optimization problem.", | ||
) # type: ignore | ||
|
||
verbose: schema_field( | ||
optimizer_int_field(ge=0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0, | ||
"lower_bound": 0, | ||
"upper_bound": 100, | ||
}, | ||
description="Enable verbose output. Note that this setting takes " | ||
"advantage of a per-process runtime setting in libsvm.", | ||
) # type: ignore | ||
|
||
random_state: schema_field( | ||
union_type(optimizer_int_field(ge=0), none_type(int)), | ||
placeholder=None, | ||
description="The seed of the pseudo-random number generator to use" | ||
" when shuffling the data.", | ||
) # type: ignore | ||
|
||
max_iter: schema_field( | ||
optimizer_int_field(ge=1), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 1000, | ||
"lower_bound": 100, | ||
"upper_bound": 10000, | ||
}, | ||
description="The maximum number of iterations to be run.", | ||
) # type: ignore | ||
|
||
|
||
class LinearSVR(RegressionModel, SklearnLikeRegressor, _LinearSVR): | ||
"""Scikit-learn's Linear Support Vector Regression (LinearSVR) | ||
wrapper for DashAI.""" | ||
|
||
SCHEMA = LinearSVRSchema | ||
|
||
def __init__(self, **kwargs) -> None: | ||
super().__init__(**kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from sklearn.linear_model import LinearRegression as _LinearRegression | ||
|
||
from DashAI.back.core.schema_fields import ( | ||
BaseSchema, | ||
bool_field, | ||
none_type, | ||
optimizer_int_field, | ||
schema_field, | ||
union_type, | ||
) | ||
from DashAI.back.models.regression_model import RegressionModel | ||
from DashAI.back.models.scikit_learn.sklearn_like_regressor import ( | ||
SklearnLikeRegressor, | ||
) | ||
|
||
|
||
class LinearRegressionSchema(BaseSchema): | ||
"""Linear regression model with optional intercept.""" | ||
|
||
fit_intercept: schema_field( | ||
bool_field, | ||
placeholder=True, | ||
description="Whether to calculate the intercept for this model. " | ||
"If set to False, no intercept will be used in calculations " | ||
"(e.g., data is expected to be centered).", | ||
) # type: ignore | ||
|
||
copy_x: schema_field( | ||
bool_field, | ||
placeholder=True, | ||
description="If True, X will be copied; else, it may be overwritten.", | ||
) # type: ignore | ||
|
||
n_jobs: schema_field( | ||
union_type(optimizer_int_field(ge=1), none_type(int)), | ||
placeholder=None, | ||
description="The number of jobs to use for the computation. " | ||
"None means 1 job, while -1 means using all processors.", | ||
) # type: ignore | ||
|
||
positive: schema_field( | ||
bool_field, | ||
placeholder=False, | ||
description="When set to True, forces the coefficients to be positive.", | ||
) # type: ignore | ||
|
||
|
||
class LinearRegression(RegressionModel, SklearnLikeRegressor, _LinearRegression): | ||
"""Scikit-learn's Linear Regression wrapper for DashAI.""" | ||
|
||
SCHEMA = LinearRegressionSchema | ||
|
||
def __init__(self, **kwargs) -> None: | ||
super().__init__(**kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,228 @@ | ||
from sklearn.neural_network import MLPRegressor as _MLPregressor | ||
|
||
from DashAI.back.core.schema_fields import ( | ||
BaseSchema, | ||
bool_field, | ||
enum_field, | ||
none_type, | ||
optimizer_float_field, | ||
optimizer_int_field, | ||
schema_field, | ||
union_type, | ||
) | ||
from DashAI.back.models.regression_model import RegressionModel | ||
from DashAI.back.models.scikit_learn.sklearn_like_regressor import ( | ||
SklearnLikeRegressor, | ||
) | ||
|
||
|
||
class MLPRegressorSchema(BaseSchema): | ||
"""MLP Regressor for DashAI.""" | ||
|
||
activation: schema_field( | ||
enum_field(enum=["identity", "logistic", "tanh", "relu"]), | ||
placeholder="relu", | ||
description="Activation function for the hidden layer.", | ||
) # type: ignore | ||
|
||
solver: schema_field( | ||
enum_field(enum=["lbfgs", "sgd", "adam"]), | ||
placeholder="adam", | ||
description="The solver for weight optimization.", | ||
) # type: ignore | ||
|
||
alpha: schema_field( | ||
optimizer_float_field(ge=0.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.0001, | ||
"lower_bound": 1e-6, | ||
"upper_bound": 1e-1, | ||
}, | ||
description="L2 penalty (regularization term) parameter.", | ||
) # type: ignore | ||
|
||
batch_size: schema_field( | ||
union_type(optimizer_int_field(ge=1), enum_field(enum=["auto"])), | ||
placeholder="auto", | ||
description="Size of minibatches for stochastic optimizers.", | ||
) # type: ignore | ||
|
||
learning_rate: schema_field( | ||
enum_field(enum=["constant", "invscaling", "adaptive"]), | ||
placeholder="constant", | ||
description="Learning rate schedule for weight updates.", | ||
) # type: ignore | ||
|
||
learning_rate_init: schema_field( | ||
optimizer_float_field(ge=0.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.001, | ||
"lower_bound": 1e-5, | ||
"upper_bound": 1e-1, | ||
}, | ||
description="The initial learning rate used.", | ||
) # type: ignore | ||
|
||
power_t: schema_field( | ||
optimizer_float_field(gt=0.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.5, | ||
"lower_bound": 0.1, | ||
"upper_bound": 0.9, | ||
}, | ||
description="The exponent for inverse scaling learning rate.", | ||
) # type: ignore | ||
|
||
max_iter: schema_field( | ||
optimizer_int_field(ge=1), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 200, | ||
"lower_bound": 50, | ||
"upper_bound": 1000, | ||
}, | ||
description="Maximum number of iterations.", | ||
) # type: ignore | ||
|
||
shuffle: schema_field( | ||
bool_field, | ||
placeholder=True, | ||
description="Whether to shuffle samples in each iteration.", | ||
) # type: ignore | ||
|
||
random_state: schema_field( | ||
union_type(optimizer_int_field(ge=0), none_type(int)), | ||
placeholder=None, | ||
description="The seed of the pseudo-random number generator to use " | ||
"when shuffling the data.", | ||
) # type: ignore | ||
|
||
tol: schema_field( | ||
optimizer_float_field(ge=0.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.0001, | ||
"lower_bound": 1e-6, | ||
"upper_bound": 1e-2, | ||
}, | ||
description="Tolerance for the optimization.", | ||
) # type: ignore | ||
|
||
verbose: schema_field( | ||
bool_field, | ||
placeholder=False, | ||
description="Whether to print progress messages to stdout.", | ||
) # type: ignore | ||
|
||
warm_start: schema_field( | ||
bool_field, | ||
placeholder=False, | ||
description="When set to True, reuse the solution of the previous call" | ||
" to fit as initialization.", | ||
) # type: ignore | ||
|
||
momentum: schema_field( | ||
optimizer_float_field(ge=0.0, le=1.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.9, | ||
"lower_bound": 0.0, | ||
"upper_bound": 1.0, | ||
}, | ||
description="Momentum for gradient descent update.", | ||
) # type: ignore | ||
|
||
nesterovs_momentum: schema_field( | ||
bool_field, | ||
placeholder=True, | ||
description="Whether to use Nesterov’s momentum.", | ||
) # type: ignore | ||
|
||
early_stopping: schema_field( | ||
bool_field, | ||
placeholder=False, | ||
description="Whether to use early stopping to terminate training when" | ||
" validation score is not improving.", | ||
) # type: ignore | ||
|
||
validation_fraction: schema_field( | ||
optimizer_float_field(gt=0.0, le=1.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.1, | ||
"lower_bound": 0.1, | ||
"upper_bound": 0.5, | ||
}, | ||
description="The proportion of training data to set aside as " | ||
"validation set for early stopping.", | ||
) # type: ignore | ||
|
||
beta_1: schema_field( | ||
optimizer_float_field(gt=0.0, lt=1.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.9, | ||
"lower_bound": 0.1, | ||
"upper_bound": 0.999, | ||
}, | ||
description="Exponential decay rate for estimates of first moment" | ||
" vector in Adam optimizer.", | ||
) # type: ignore | ||
|
||
beta_2: schema_field( | ||
optimizer_float_field(gt=0.0, lt=1.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.999, | ||
"lower_bound": 0.1, | ||
"upper_bound": 0.999, | ||
}, | ||
description="Exponential decay rate for estimates of second moment" | ||
" vector in Adam optimizer.", | ||
) # type: ignore | ||
|
||
epsilon: schema_field( | ||
optimizer_float_field(ge=0.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 1e-08, | ||
"lower_bound": 1e-10, | ||
"upper_bound": 1e-6, | ||
}, | ||
description="Value for numerical stability in Adam optimizer.", | ||
) # type: ignore | ||
|
||
n_iter_no_change: schema_field( | ||
optimizer_int_field(ge=1), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 10, | ||
"lower_bound": 1, | ||
"upper_bound": 50, | ||
}, | ||
description="Maximum number of epochs to not meet tol improvement.", | ||
) # type: ignore | ||
|
||
max_fun: schema_field( | ||
optimizer_int_field(ge=1), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 15000, | ||
"lower_bound": 1000, | ||
"upper_bound": 20000, | ||
}, | ||
description="Maximum number of loss function calls. Only used " | ||
" if solver='lbfgs'.", | ||
) # type: ignore | ||
|
||
|
||
class MLPRegression(RegressionModel, SklearnLikeRegressor, _MLPregressor): | ||
"""Scikit-learn's MLP Regression wrapper for DashAI.""" | ||
|
||
SCHEMA = MLPRegressorSchema | ||
|
||
def __init__(self, **kwargs) -> None: | ||
super().__init__(**kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
184 changes: 184 additions & 0 deletions
184
DashAI/back/models/scikit_learn/random_forest_regression.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
from sklearn.ensemble import RandomForestRegressor as _RandomForestRegressor | ||
|
||
from DashAI.back.core.schema_fields import ( | ||
BaseSchema, | ||
bool_field, | ||
enum_field, | ||
none_type, | ||
optimizer_float_field, | ||
optimizer_int_field, | ||
schema_field, | ||
union_type, | ||
) | ||
from DashAI.back.models.regression_model import RegressionModel | ||
from DashAI.back.models.scikit_learn.sklearn_like_regressor import SklearnLikeRegressor | ||
|
||
|
||
class RandomForestRegressionSchema(BaseSchema): | ||
"""Random Forest Regressor for DashAI.""" | ||
|
||
n_estimators: schema_field( | ||
optimizer_int_field(ge=1), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 100, | ||
"lower_bound": 10, | ||
"upper_bound": 1000, | ||
}, | ||
description="The number of trees in the forest.", | ||
) # type: ignore | ||
|
||
criterion: schema_field( | ||
enum_field(enum=["squared_error", "absolute_error", "poisson"]), | ||
placeholder="squared_error", | ||
description="The function to measure the quality of a split.", | ||
) # type: ignore | ||
|
||
max_depth: schema_field( | ||
union_type(optimizer_int_field(ge=1), none_type(int)), | ||
placeholder=None, | ||
description="The maximum depth of the tree.", | ||
) # type: ignore | ||
|
||
min_samples_split: schema_field( | ||
optimizer_int_field(ge=2), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 2, | ||
"lower_bound": 2, | ||
"upper_bound": 20, | ||
}, | ||
description="The minimum number of samples required to split " | ||
"an internal node.", | ||
) # type: ignore | ||
|
||
min_samples_leaf: schema_field( | ||
optimizer_int_field(ge=1), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 1, | ||
"lower_bound": 1, | ||
"upper_bound": 20, | ||
}, | ||
description="The minimum number of samples required to be at a leaf node.", | ||
) # type: ignore | ||
|
||
min_weight_fraction_leaf: schema_field( | ||
optimizer_float_field(ge=0.0, le=0.5), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.0, | ||
"lower_bound": 0.0, | ||
"upper_bound": 0.5, | ||
}, | ||
description="The minimum weighted fraction of the sum total of weights" | ||
" required to be at a leaf node.", | ||
) # type: ignore | ||
|
||
max_features: schema_field( | ||
union_type( | ||
optimizer_float_field(gt=0.0, le=1.0), | ||
enum_field(enum=["auto", "sqrt", "log2", None]), | ||
), | ||
placeholder="auto", | ||
description="The number of features to consider when looking for the" | ||
" best split.", | ||
) # type: ignore | ||
|
||
max_leaf_nodes: schema_field( | ||
union_type(optimizer_int_field(ge=1), none_type(int)), | ||
placeholder=None, | ||
description="Grow trees with max_leaf_nodes in best-first fashion.", | ||
) # type: ignore | ||
|
||
min_impurity_decrease: schema_field( | ||
optimizer_float_field(ge=0.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.0, | ||
"lower_bound": 0.0, | ||
"upper_bound": 0.5, | ||
}, | ||
description="A node will be split if this split induces a decrease of" | ||
" the impurity greater than or equal to this value.", | ||
) # type: ignore | ||
|
||
bootstrap: schema_field( | ||
bool_field, | ||
placeholder=True, | ||
description="Whether bootstrap samples are used when building trees.", | ||
) # type: ignore | ||
|
||
oob_score: schema_field( | ||
bool_field, | ||
placeholder=False, | ||
description="Whether to use out-of-bag samples to estimate the " | ||
"generalization score.", | ||
) # type: ignore | ||
|
||
n_jobs: schema_field( | ||
union_type(optimizer_int_field(ge=1), none_type(int)), | ||
placeholder=None, | ||
description="The number of jobs to run in parallel for both fit and predict.", | ||
) # type: ignore | ||
|
||
random_state: schema_field( | ||
union_type(optimizer_int_field(ge=0), none_type(int)), | ||
placeholder=None, | ||
description="The seed of the pseudo-random number generator to use" | ||
" when shuffling the data.", | ||
) # type: ignore | ||
|
||
verbose: schema_field( | ||
optimizer_int_field(ge=0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0, | ||
"lower_bound": 0, | ||
"upper_bound": 100, | ||
}, | ||
description="Controls the verbosity when fitting and predicting.", | ||
) # type: ignore | ||
|
||
warm_start: schema_field( | ||
bool_field, | ||
placeholder=False, | ||
description="When set to True, reuse the solution of the previous " | ||
"call to fit and add more estimators to the ensemble.", | ||
) # type: ignore | ||
|
||
ccp_alpha: schema_field( | ||
optimizer_float_field(ge=0.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.0, | ||
"lower_bound": 0.0, | ||
"upper_bound": 1.0, | ||
}, | ||
description="Complexity parameter used for Minimal Cost-Complexity Pruning.", | ||
) # type: ignore | ||
|
||
max_samples: schema_field( | ||
union_type(optimizer_float_field(gt=0.0, le=1.0), none_type(float)), | ||
placeholder=None, | ||
description="If bootstrap is True, the number of samples to draw from" | ||
" X to train each base estimator.", | ||
) # type: ignore | ||
|
||
monotonic_cst: schema_field( | ||
none_type((float)), | ||
placeholder=None, | ||
description="A constraint vector indicating the monotonicity " | ||
"constraint on each feature.", | ||
) # type: ignore | ||
|
||
|
||
class RandomForestRegression( | ||
RegressionModel, SklearnLikeRegressor, _RandomForestRegressor | ||
): | ||
"""Scikit-learn's Ridge Regression wrapper for DashAI.""" | ||
|
||
SCHEMA = RandomForestRegressionSchema | ||
|
||
def __init__(self, **kwargs) -> None: | ||
super().__init__(**kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
from sklearn.linear_model import Ridge as _Ridge | ||
|
||
from DashAI.back.core.schema_fields import ( | ||
BaseSchema, | ||
bool_field, | ||
enum_field, | ||
none_type, | ||
optimizer_float_field, | ||
optimizer_int_field, | ||
schema_field, | ||
union_type, | ||
) | ||
from DashAI.back.models.regression_model import RegressionModel | ||
from DashAI.back.models.scikit_learn.sklearn_like_regressor import ( | ||
SklearnLikeRegressor, | ||
) | ||
|
||
|
||
class RidgeRegressionSchema(BaseSchema): | ||
"""Ridge regression is a linear model that includes L2 regularization.""" | ||
|
||
alpha: schema_field( | ||
optimizer_float_field(ge=0.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 1.0, | ||
"lower_bound": 0.1, | ||
"upper_bound": 10.0, | ||
}, | ||
description="Regularization strength; must be a positive float. " | ||
"Larger values specify stronger regularization.", | ||
) # type: ignore | ||
|
||
fit_intercept: schema_field( | ||
bool_field, | ||
placeholder=True, | ||
description="Whether to calculate the intercept for this model. " | ||
"If set to False, no intercept will be used in calculations " | ||
"(e.g., data is expected to be centered).", | ||
) # type: ignore | ||
|
||
copy_x: schema_field( | ||
bool_field, | ||
placeholder=True, | ||
description="If True, X will be copied; else, it may be overwritten.", | ||
) # type: ignore | ||
|
||
max_iter: schema_field( | ||
optimizer_int_field(ge=1), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 1000, | ||
"lower_bound": 100, | ||
"upper_bound": 10000, | ||
}, | ||
description="Maximum number of iterations for conjugate gradient solver.", | ||
) # type: ignore | ||
tol: schema_field( | ||
optimizer_float_field(ge=0.0), | ||
placeholder={ | ||
"optimize": False, | ||
"fixed_value": 0.001, | ||
"lower_bound": 1e-5, | ||
"upper_bound": 1e-1, | ||
}, | ||
description="Precision of the solution.", | ||
) # type: ignore | ||
solver: schema_field( | ||
enum_field( | ||
enum=["auto", "svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga"] | ||
), | ||
placeholder="auto", | ||
description="Solver to use in the computation. ‘auto’ chooses the " | ||
"solver automatically based on the type of data.", | ||
) # type: ignore | ||
positive: schema_field( | ||
bool_field, | ||
placeholder=False, | ||
description="When set to True, forces the coefficients to be positive.", | ||
) # type: ignore | ||
random_state: schema_field( | ||
union_type(optimizer_int_field(ge=0), none_type(int)), | ||
placeholder=None, | ||
description="The seed of the pseudo random number generator to use " | ||
"when shuffling the data. Pass an int for reproducible output across " | ||
"multiple function calls, or None to not set a specific seed.", | ||
) # type: ignore | ||
|
||
|
||
class RidgeRegression(RegressionModel, SklearnLikeRegressor, _Ridge): | ||
"""Scikit-learn's Ridge regression wrapper for DashAI.""" | ||
|
||
SCHEMA = RidgeRegressionSchema | ||
|
||
def __init__(self, **kwargs) -> None: | ||
super().__init__(**kwargs) |
25 changes: 25 additions & 0 deletions
25
DashAI/back/models/scikit_learn/sklearn_like_classifier.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import numpy as np | ||
|
||
from DashAI.back.dataloaders.classes.dashai_dataset import DashAIDataset | ||
from DashAI.back.models.scikit_learn.sklearn_like_model import SklearnLikeModel | ||
|
||
|
||
class SklearnLikeClassifier(SklearnLikeModel): | ||
"""Class for handling sklearn-like classifier models.""" | ||
|
||
def predict(self, x_pred: DashAIDataset) -> np.ndarray: | ||
"""Make a prediction with the model. | ||
Parameters | ||
---------- | ||
x_pred : DashAIDataset | ||
Dataset with the input data columns. | ||
Returns | ||
------- | ||
np.ndarray | ||
Array with the predicted target values for x_pred | ||
""" | ||
if isinstance(x_pred, DashAIDataset): | ||
x_pred = x_pred.to_pandas() | ||
return super().predict_proba(x_pred) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import numpy as np | ||
|
||
from DashAI.back.dataloaders.classes.dashai_dataset import DashAIDataset | ||
from DashAI.back.models.scikit_learn.sklearn_like_model import SklearnLikeModel | ||
|
||
|
||
class SklearnLikeRegressor(SklearnLikeModel): | ||
"""Class for handling sklearn-like regressor models.""" | ||
|
||
def predict(self, x_pred: DashAIDataset) -> np.ndarray: | ||
"""Make a prediction with the model. | ||
Parameters | ||
---------- | ||
x_pred : DashAIDataset | ||
Dataset with the input data columns. | ||
Returns | ||
------- | ||
np.ndarray | ||
Array with the predicted target values for x_pred | ||
""" | ||
return super().predict(x_pred.to_pandas()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
# flake8: noqa | ||
from DashAI.back.tasks.base_task import BaseTask | ||
from DashAI.back.tasks.image_classification_task import ImageClassificationTask | ||
from DashAI.back.tasks.regression_task import RegressionTask | ||
from DashAI.back.tasks.tabular_classification_task import TabularClassificationTask | ||
from DashAI.back.tasks.text_classification_task import TextClassificationTask | ||
from DashAI.back.tasks.translation_task import TranslationTask |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from typing import List | ||
|
||
from datasets import DatasetDict, Value | ||
|
||
from DashAI.back.tasks.base_task import BaseTask | ||
|
||
|
||
class RegressionTask(BaseTask): | ||
"""Base class for regression tasks. | ||
Here you can change the methods provided by class Task. | ||
""" | ||
|
||
DESCRIPTION: str = """ | ||
Regression in machine learning involves predicting continuous values for | ||
structured data organized in tabular form (rows and columns). | ||
Models are trained to learn patterns and relationships in the data, | ||
enabling accurate prediction of new instances.""" | ||
metadata: dict = { | ||
"inputs_types": [Value], | ||
"outputs_types": [Value], | ||
"inputs_cardinality": "n", | ||
"outputs_cardinality": 1, | ||
} | ||
|
||
def prepare_for_task( | ||
self, datasetdict: DatasetDict, outputs_columns: List[str] | ||
) -> DatasetDict: | ||
"""Change the column types to suit the regression task. | ||
A copy of the dataset is created. | ||
Parameters | ||
---------- | ||
datasetdict : DatasetDict | ||
Dataset to be changed | ||
Returns | ||
------- | ||
DatasetDict | ||
Dataset with the new types | ||
""" | ||
return datasetdict |