adding mlp classifier code and tests, updating pre-commit file, and f…

…ixing pylint issues
rafaelgreca · Sep 30, 2024 · 02c816f · 02c816f
1 parent 210cc86
commit 02c816f
Show file tree

Hide file tree

Showing 14 changed files with 952 additions and 29 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,13 +16,6 @@ repos:
       -   id: black
           args: [--safe]
 
-  -   repo: https://github.com/pre-commit/mirrors-mypy
-      rev: v0.910
-      hooks:
-      -   id: mypy
-          files: .
-          args: [--strict, --ignore-missing-imports]
-
   -   repo: local
       hooks:
       -   id: pylint

diff --git a/.pylintrc b/.pylintrc
@@ -450,7 +450,10 @@ disable=raw-checker-failed,
         too-many-return-statements,
         import-error,
         no-name-in-module,
-        duplicate-code
+        duplicate-code,
+        too-few-public-methods,
+        too-many-public-methods,
+        arguments-differ
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option

diff --git a/Dockerfile b/Dockerfile
@@ -8,4 +8,4 @@ RUN pip install --no-cache-dir -U pip
 
 COPY . .
 
-RUN pip install -r requirements/requirements.txt
+RUN pip install -r requirements/requirements.txt
diff --git a/README.md b/README.md
@@ -60,7 +60,7 @@ Run the Docker container using the following command:
 ```bash
 sudo docker run -d -p 8000:5000 --name scratchml scratchml
 ```
-    
+
 ## Usage/Examples
 
 See the `examples` folder to see some use cases.
@@ -111,9 +111,9 @@ Implementations:
     - [x] Leaky ReLU
     - [x] SoftPlus
     - [x] SELU
-- [ ] Loss functions
+- [x] Loss functions
     - [x] Binary Cross Entropy
-    - [ ] Cross Entropy
+    - [x] Cross Entropy
 - [x] Metrics
     - [x] Regression Metrics
         - [x] Mean Squared Error (MSE)
@@ -161,7 +161,7 @@ Implementations:
         - [x] Decision Tree Regressor
     - [x] Perceptron
     - [ ] MLP
-        - [ ] MLP Classifier
+        - [x] MLP Classifier
         - [ ] MLP Regressor
     - [x] KMeans
     - [x] PCA
@@ -213,4 +213,3 @@ Distributed under the [MIT](https://choosealicense.com/licenses/mit/) License. S
 ## Authors
 
 - [@rafaelgreca](https://www.github.com/rafaelgreca)
-
diff --git a/__init__.py b/__init__.py
@@ -1,2 +0,0 @@
-from .scratchml import *
-from .tests import *

diff --git a/examples/mlp.py b/examples/mlp.py
@@ -0,0 +1,55 @@
+from scratchml.models.multilayer_perceptron import MLPClassifier
+from scratchml.utils import KFold
+from sklearn.datasets import make_classification
+
+
+def example_mlp_classifier() -> None:
+    """
+    Practical example of how to use the Multilayer Perceptron (MLP) Classifier model.
+    """
+    # generating a dataset for the classfication set
+    X, y = make_classification(
+        n_samples=1000,
+        n_features=5,
+        n_classes=2,
+        n_clusters_per_class=1,
+        n_informative=2,
+        n_redundant=1,
+        n_repeated=0,
+        shuffle=True,
+    )
+
+    # splitting the data into training and testing using KFold
+    folds = KFold(X, y, stratify=True, shuffle=True, n_splits=5)
+
+    for fold, (train_indexes, test_indexes) in enumerate(folds):
+        # getting the training and test sets
+        X_train = X[train_indexes]
+        y_train = y[train_indexes]
+
+        X_test = X[test_indexes]
+        y_test = y[test_indexes]
+
+        # creating a MLP model instance
+        mlp = MLPClassifier(
+            loss_function="cross_entropy",
+            hidden_layer_sizes=(
+                32,
+                64,
+            ),
+            max_iter=100,
+            batch_size=64,
+            verbose=0,
+        )
+
+        # fitting the model
+        mlp.fit(X=X_train, y=y_train)
+
+        # assessing the model's performance
+        score = mlp.score(X=X_test, y=y_test, metric="accuracy")
+
+        print(f"The model achieved an accuracy score of {score} on the fold {fold}.\n")
+
+
+if __name__ == "__main__":
+    example_mlp_classifier()
diff --git a/scratchml/losses.py b/scratchml/losses.py
@@ -25,6 +25,37 @@ def binary_cross_entropy(
     if derivative:
         return y_hat - y
 
-    y1 = y * np.log(y_hat) + epsilon
+    y1 = y * np.log(y_hat + epsilon)
     y2 = (1 - y) * np.log(1 - y_hat + epsilon)
     return (-1 * (1 / y.shape[0])) * np.sum(y1, y2)
+
+
+def cross_entropy(
+    y: np.ndarray,
+    y_hat: np.ndarray,
+    derivative: bool = False,
+    epsilon: np.float32 = 1e-9,
+) -> np.ndarray:
+    """
+    Applies the Cross Entropy (CE) loss function.
+
+    Args:
+        y (np.ndarray): the true targets.
+        y_hat (np.ndarray): the predicted targets.
+        derivative (bool, optional): whether to use the
+            derivative function or not. Defaults to False.
+        epsilon (np.float32): a really small value (called epsilon)
+            used to avoid calculate the log of 0. Defaults to 1e-9.
+
+    Returns:
+        np.ndarray: the output of the loss function with respect
+            to the real targets and the predicted targets.
+    """
+    if derivative:
+        y_hat = np.clip(y_hat, 1e-15, 1 - 1e-15)
+        return -(y / y_hat) + (1 - y) / (1 - y_hat)
+
+    y_hat = np.clip(y_hat, 1e-15, 1 - 1e-15)
+    y1 = -y * np.log(y_hat + epsilon)
+    y2 = (1 - y) * np.log(1 - y_hat + epsilon)
+    return y1 - y2
diff --git a/scratchml/models/decision_tree.py b/scratchml/models/decision_tree.py
@@ -89,8 +89,9 @@ def __init__(
                 looking for the best split. Defaults to None.
             max_leaf_nodes (int, optional): Grow a tree with max_leaf_nodes in best-first fashion.
                 Defaults to None.
-            min_impurity_decrease (Union[int, float], optional): A node will be split if this split induces a
-                decrease of the impurity greater than or equal to this value. Defaults to 0.0.
+            min_impurity_decrease (Union[int, float], optional): A node will be split if this split
+                induces a decrease of the impurity greater than or equal to this value.
+                Defaults to 0.0.
             verbose (int, optional): how much information should be printed.
                 Should be 0, 1, or 2. Defaults to 0.
         """
@@ -552,8 +553,9 @@ def __init__(
                 looking for the best split. Defaults to None.
             max_leaf_nodes (int, optional): Grow a tree with max_leaf_nodes in best-first fashion.
                 Defaults to None.
-            min_impurity_decrease (Union[int, float], optional): A node will be split if this split induces a
-                decrease of the impurity greater than or equal to this value. Defaults to 0.0.
+            min_impurity_decrease (Union[int, float], optional): A node will be split if this split
+                induces a decrease of the impurity greater than or equal to this value.
+                Defaults to 0.0.
             verbose (int, optional): how much information should be printed.
                 Should be 0, 1, or 2. Defaults to 0.
         """
@@ -666,8 +668,9 @@ def __init__(
                 looking for the best split. Defaults to None.
             max_leaf_nodes (int, optional): Grow a tree with max_leaf_nodes in best-first fashion.
                 Defaults to None.
-            min_impurity_decrease (Union[int, float], optional): A node will be split if this split induces a
-                decrease of the impurity greater than or equal to this value. Defaults to 0.0.
+            min_impurity_decrease (Union[int, float], optional): A node will be split if this split
+                induces a decrease of the impurity greater than or equal to this value.
+                Defaults to 0.0.
             verbose (int, optional): how much information should be printed.
                 Should be 0, 1, or 2. Defaults to 0.
         """
Original file line number	Diff line number	Diff line change
Expand Up		@@ -8,4 +8,4 @@ RUN pip install --no-cache-dir -U pip

		COPY . .

		RUN pip install -r requirements/requirements.txt
		RUN pip install -r requirements/requirements.txt