Skip to content

Commit

Permalink
adding mlp classifier code and tests, updating pre-commit file, and f…
Browse files Browse the repository at this point in the history
…ixing pylint issues
  • Loading branch information
rafaelgreca committed Sep 30, 2024
1 parent 210cc86 commit 02c816f
Show file tree
Hide file tree
Showing 14 changed files with 952 additions and 29 deletions.
7 changes: 0 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,6 @@ repos:
- id: black
args: [--safe]

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.910
hooks:
- id: mypy
files: .
args: [--strict, --ignore-missing-imports]

- repo: local
hooks:
- id: pylint
Expand Down
5 changes: 4 additions & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,10 @@ disable=raw-checker-failed,
too-many-return-statements,
import-error,
no-name-in-module,
duplicate-code
duplicate-code,
too-few-public-methods,
too-many-public-methods,
arguments-differ

# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ RUN pip install --no-cache-dir -U pip

COPY . .

RUN pip install -r requirements/requirements.txt
RUN pip install -r requirements/requirements.txt
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ Run the Docker container using the following command:
```bash
sudo docker run -d -p 8000:5000 --name scratchml scratchml
```

## Usage/Examples

See the `examples` folder to see some use cases.
Expand Down Expand Up @@ -111,9 +111,9 @@ Implementations:
- [x] Leaky ReLU
- [x] SoftPlus
- [x] SELU
- [ ] Loss functions
- [x] Loss functions
- [x] Binary Cross Entropy
- [ ] Cross Entropy
- [x] Cross Entropy
- [x] Metrics
- [x] Regression Metrics
- [x] Mean Squared Error (MSE)
Expand Down Expand Up @@ -161,7 +161,7 @@ Implementations:
- [x] Decision Tree Regressor
- [x] Perceptron
- [ ] MLP
- [ ] MLP Classifier
- [x] MLP Classifier
- [ ] MLP Regressor
- [x] KMeans
- [x] PCA
Expand Down Expand Up @@ -213,4 +213,3 @@ Distributed under the [MIT](https://choosealicense.com/licenses/mit/) License. S
## Authors

- [@rafaelgreca](https://www.github.com/rafaelgreca)

2 changes: 0 additions & 2 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +0,0 @@
from .scratchml import *
from .tests import *
55 changes: 55 additions & 0 deletions examples/mlp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from scratchml.models.multilayer_perceptron import MLPClassifier
from scratchml.utils import KFold
from sklearn.datasets import make_classification


def example_mlp_classifier() -> None:
"""
Practical example of how to use the Multilayer Perceptron (MLP) Classifier model.
"""
# generating a dataset for the classfication set
X, y = make_classification(
n_samples=1000,
n_features=5,
n_classes=2,
n_clusters_per_class=1,
n_informative=2,
n_redundant=1,
n_repeated=0,
shuffle=True,
)

# splitting the data into training and testing using KFold
folds = KFold(X, y, stratify=True, shuffle=True, n_splits=5)

for fold, (train_indexes, test_indexes) in enumerate(folds):
# getting the training and test sets
X_train = X[train_indexes]
y_train = y[train_indexes]

X_test = X[test_indexes]
y_test = y[test_indexes]

# creating a MLP model instance
mlp = MLPClassifier(
loss_function="cross_entropy",
hidden_layer_sizes=(
32,
64,
),
max_iter=100,
batch_size=64,
verbose=0,
)

# fitting the model
mlp.fit(X=X_train, y=y_train)

# assessing the model's performance
score = mlp.score(X=X_test, y=y_test, metric="accuracy")

print(f"The model achieved an accuracy score of {score} on the fold {fold}.\n")


if __name__ == "__main__":
example_mlp_classifier()
33 changes: 32 additions & 1 deletion scratchml/losses.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,37 @@ def binary_cross_entropy(
if derivative:
return y_hat - y

y1 = y * np.log(y_hat) + epsilon
y1 = y * np.log(y_hat + epsilon)
y2 = (1 - y) * np.log(1 - y_hat + epsilon)
return (-1 * (1 / y.shape[0])) * np.sum(y1, y2)


def cross_entropy(
y: np.ndarray,
y_hat: np.ndarray,
derivative: bool = False,
epsilon: np.float32 = 1e-9,
) -> np.ndarray:
"""
Applies the Cross Entropy (CE) loss function.
Args:
y (np.ndarray): the true targets.
y_hat (np.ndarray): the predicted targets.
derivative (bool, optional): whether to use the
derivative function or not. Defaults to False.
epsilon (np.float32): a really small value (called epsilon)
used to avoid calculate the log of 0. Defaults to 1e-9.
Returns:
np.ndarray: the output of the loss function with respect
to the real targets and the predicted targets.
"""
if derivative:
y_hat = np.clip(y_hat, 1e-15, 1 - 1e-15)
return -(y / y_hat) + (1 - y) / (1 - y_hat)

y_hat = np.clip(y_hat, 1e-15, 1 - 1e-15)
y1 = -y * np.log(y_hat + epsilon)
y2 = (1 - y) * np.log(1 - y_hat + epsilon)
return y1 - y2
15 changes: 9 additions & 6 deletions scratchml/models/decision_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,9 @@ def __init__(
looking for the best split. Defaults to None.
max_leaf_nodes (int, optional): Grow a tree with max_leaf_nodes in best-first fashion.
Defaults to None.
min_impurity_decrease (Union[int, float], optional): A node will be split if this split induces a
decrease of the impurity greater than or equal to this value. Defaults to 0.0.
min_impurity_decrease (Union[int, float], optional): A node will be split if this split
induces a decrease of the impurity greater than or equal to this value.
Defaults to 0.0.
verbose (int, optional): how much information should be printed.
Should be 0, 1, or 2. Defaults to 0.
"""
Expand Down Expand Up @@ -552,8 +553,9 @@ def __init__(
looking for the best split. Defaults to None.
max_leaf_nodes (int, optional): Grow a tree with max_leaf_nodes in best-first fashion.
Defaults to None.
min_impurity_decrease (Union[int, float], optional): A node will be split if this split induces a
decrease of the impurity greater than or equal to this value. Defaults to 0.0.
min_impurity_decrease (Union[int, float], optional): A node will be split if this split
induces a decrease of the impurity greater than or equal to this value.
Defaults to 0.0.
verbose (int, optional): how much information should be printed.
Should be 0, 1, or 2. Defaults to 0.
"""
Expand Down Expand Up @@ -666,8 +668,9 @@ def __init__(
looking for the best split. Defaults to None.
max_leaf_nodes (int, optional): Grow a tree with max_leaf_nodes in best-first fashion.
Defaults to None.
min_impurity_decrease (Union[int, float], optional): A node will be split if this split induces a
decrease of the impurity greater than or equal to this value. Defaults to 0.0.
min_impurity_decrease (Union[int, float], optional): A node will be split if this split
induces a decrease of the impurity greater than or equal to this value.
Defaults to 0.0.
verbose (int, optional): how much information should be printed.
Should be 0, 1, or 2. Defaults to 0.
"""
Expand Down
Loading

0 comments on commit 02c816f

Please sign in to comment.