Neural networks perceptron classifier

TheAlgorithms · Dec 17, 2024 · 97888b7 · 97888b7
1 parent 94ef610
commit 97888b7
Show file tree

Hide file tree

Showing 7 changed files with 324 additions and 0 deletions.
diff --git a/DIRECTORY.md b/DIRECTORY.md
@@ -98,6 +98,15 @@
     * [Problem9](./Maths/ProjectEuler/Problem9.php)
   * [Eratosthenessieve](./Maths/EratosthenesSieve.php)
 
+## NeuralNetworks
+  * PerceptronClassifier
+    * [NeuralNetworkPerceptronClassifier.php](NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifier.php)
+    * [README.md](NeuralNetworks/PerceptronClassifier/README.md)
+    * chart
+      * [dataset.png](NeuralNetworks/PerceptronClassifier/chart/dataset.png)
+      * [linear-separated.png](NeuralNetworks/PerceptronClassifier/chart/linear-separated.png)
+      * [sigmoid.png](NeuralNetworks/PerceptronClassifier/chart/sigmoid.png)
+
 ## Searches
   * [Binarysearch](./Searches/BinarySearch.php)
   * [Exponentialsearch](./Searches/ExponentialSearch.php)

diff --git a/NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifier.php b/NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifier.php
@@ -0,0 +1,192 @@
+<?php
+
+namespace NeuralNetworks\PerceptronClassifier;
+
+/**
+ * This class implements a simple neural network with one hidden layer and one output neuron.
+ * The network uses the sigmoid activation function and performs binary classification.
+ * (https://cw.fel.cvut.cz/b211/courses/be5b33rpz/labs/07_perceptron/start)
+ *
+ * @author Michał Żarnecki https://github.com/rzarno
+ */
+class NeuralNetworkPerceptronClassifier
+{
+    /**
+     * @param array $X
+     * @param array $Y
+     * @param int $iterations
+     * @param float $learningRate
+     * @return array
+     */
+    public function trainModel(array $X, array $Y, int $iterations, float $learningRate): array
+    {
+        [$W, $b] = $this->initParams(count($X));
+
+        for ($i = 0; $i < $iterations; $i++) {
+            // Forward propagation
+            $A = $this->forwardPropagation($X, $W, $b);
+
+            // Compute cost
+            $cost = $this->computeCost($A, $Y);
+
+            // Backward propagation
+            [$dW, $db] = $this->backwardPropagation($A, $X, $Y);
+
+            // Update parameters
+            [$W, $b] = $this->updateParams($W, $b, $dW, $db, $learningRate);
+
+            if ($i % 100 == 0) {
+                echo "Iteration {$i} - Cost: {$cost}\n";
+            }
+        }
+
+        return [$W, $b];
+    }
+
+    /**
+     * @param array $X
+     * @param array $W
+     * @param float $b
+     * @return array
+     */
+    public function predict(array $X, array $W, float $b): array
+    {
+        $A = $this->forwardPropagation($X, $W, $b);
+        return array_map(fn($a) => $a > 0.5 ? 1 : 0, $A);
+    }
+
+    /**
+     * Stage 1. Prepare dataset
+     * @return array[]
+     */
+    public function generateTrainingSet(): array
+    {
+        $m = 50;
+
+        // Generate a 2 x m matrix with binary values (0 or 1)
+        $X = [];
+        for ($i = 0; $i < 2; $i++) {
+            for ($j = 0; $j < $m; $j++) {
+                $X[$i][$j] = rand(0, 1);
+            }
+        }
+
+        // Compute Y: Logical AND condition (X[0] == 1 and X[1] == 0)
+        $Y = [];
+        for ($j = 0; $j < $m; $j++) {
+            $Y[$j] = ($X[0][$j] == 1 && $X[1][$j] == 0) ? 1 : 0;
+        }
+
+        return [$X, $Y];
+    }
+
+     /**
+      * Stage 2. Initialize model parameters
+      * @param int $n Number of features
+      * @return array [$W, $b] Weight and bias arrays
+      */
+    private function initParams(int $n): array
+    {
+        $W = [];
+        for ($i = 0; $i < $n; $i++) {
+            $W[$i] = mt_rand() / mt_getrandmax(); // Small random values
+        }
+        $b = 0.0; // Bias initialized to zero
+        return [$W, $b];
+    }
+
+    /**
+     * Sigmoid Activation Function
+     * @param float $z
+     * @return float
+     */
+    private function sigmoid(float $z): float
+    {
+        return 1 / (1 + exp(-$z));
+    }
+
+    /**
+     * Stage 3. Forward Propagation
+     * @param array $X
+     * @param array $W
+     * @param float $b
+     * @return array
+     */
+    private function forwardPropagation(array $X, array $W, float $b): array
+    {
+        $Z = [];
+        for ($j = 0; $j < count($X[0]); $j++) {
+            $sum = $b;
+            for ($i = 0; $i < count($W); $i++) {
+                $sum += $W[$i] * $X[$i][$j];
+            }
+            $Z[$j] = $this->sigmoid($sum);
+        }
+        return $Z;
+    }
+
+    /**
+     * Stage 4. Compute Cost Function (Binary Cross-Entropy Loss)
+     * @param array $A
+     * @param array $Y
+     * @return float
+     */
+    private function computeCost(array $A, array $Y): float
+    {
+        $m = count($Y);
+        $cost = 0.0;
+        for ($i = 0; $i < $m; $i++) {
+            $cost += -($Y[$i] * log($A[$i]) + (1 - $Y[$i]) * log(1 - $A[$i]));
+        }
+        return $cost / $m;
+    }
+
+    /**
+     * Stage 5. Backward Propagation
+     * @param array $A
+     * @param array $X
+     * @param array $Y
+     * @return array
+     */
+    private function backwardPropagation(array $A, array $X, array $Y): array
+    {
+        $m = count($Y);
+        $dW = array_fill(0, count($X), 0.0);
+        $db = 0.0;
+
+        for ($j = 0; $j < $m; $j++) {
+            $dZ = $A[$j] - $Y[$j];
+            for ($i = 0; $i < count($X); $i++) {
+                $dW[$i] += $dZ * $X[$i][$j];
+            }
+            $db += $dZ;
+        }
+
+        // Average gradients
+        for ($i = 0; $i < count($dW); $i++) {
+            $dW[$i] /= $m;
+        }
+        $db /= $m;
+
+        return [$dW, $db];
+    }
+
+    /**
+     * STage 6. Update Parameters
+     * @param array $W
+     * @param float $b
+     * @param array $dW
+     * @param float $db
+     * @param float $learningRate
+     * @return array
+     */
+    private function updateParams(array $W, float $b, array $dW, float $db, float $learningRate): array
+    {
+        for ($i = 0; $i < count($W); $i++) {
+            $W[$i] -= $learningRate * $dW[$i];
+        }
+        $b -= $learningRate * $db;
+
+        return [$W, $b];
+    }
+}
diff --git a/NeuralNetworks/PerceptronClassifier/README.md b/NeuralNetworks/PerceptronClassifier/README.md
@@ -0,0 +1,100 @@
+## Maths behind the single Perceptron Neural Network with Activation Function
+
+This work is based on examples from course https://www.coursera.org/learn/machine-learning-calculus prepared by author Luis Serrano.
+
+Linear separation refers to data points in binary classification problems that can be separated by a linear decision boundary.
+If the data points can be separated by a line, linear function, or flat hyperplane, they are said to be linearly separable.
+
+If separate points in an n-dimensional space exist, then it is said to be linearly separable
+
+$$w_1x_1 + w_2x_2 + w_nx_n + b = 0$$
+
+For two-dimensional input data, if there is a line, whose equation is $$w_1x_1 + w_2x_2 + b = 0$$
+
+that separates all samples of one class from the other class, then the corresponding observation can be derived from the equation of the separating line.
+Such classification problems are called "linearly separable", i.e. separating by linear combination.
+
+<img src="chart/linear-separated.png" />
+
+The input layer contains two nodes $x_1$ and $x_2$. Weight vector $W = \begin{bmatrix} w_1 & w_2\end{bmatrix}$ and bias ($b$) are the parameters to be updated during the model training.
+
+$$z^{(i)} = w_1x_1^{(i)} + w_2x_2^{(i)} + b = Wx^{(i)} + b.\tag{1}$$
+
+To be able to perform classification we need nonlinear approach. This can achieved with sigmoid activation function which roughly replace values with nearly 0 or nearly 1 for most cases and some values between for small range near 0.
+
+$$\hat{y} = \begin{cases} 1 & \mbox{if } a > 0.5 \\ 0 & \mbox{otherwise } \end{cases}\tag{10}$$
+
+Sigmoid activation function is defined as
+
+$$a = \sigma\left(z\right) = \frac{1}{1+e^{-z}}.\tag{2}$$
+
+<img src="chart/sigmoid.png" />
+
+Threshold value of $0.5$ can be used for predictions: $1$ (red) if  $a > 0.5$ and $0$ (blue) otherwise.
+
+The single perceptron neural network with sigmoid activation function can be expressed as:
+
+\begin{align}
+z^{(i)} &=  W x^{(i)} + b,\\
+a^{(i)} &= \sigma\left(z^{(i)}\right).\\\tag{3}
+\end{align}
+
+
+With $m$ training examples organised in the columns of ($2 \times m$) matrix $X$, you can apply the activation function element-wise. So the model can be written as:
+
+
+\begin {align}
+Z &=  W X + b,\\
+A &= \sigma\left(Z\right),\\\tag{4}
+\end{align}
+
+When dealing with classification problems, the most commonly used cost function is the **log loss**, which is described by the following equation
+
+$$\mathcal{L}\left(W, b\right) = \frac{1}{m}\sum_{i=1}^{m} L\left(W, b\right) = \frac{1}{m}\sum_{i=1}^{m}  \large\left(\small -y^{(i)}\log\left(a^{(i)}\right) - (1-y^{(i)})\log\left(1- a^{(i)}\right)  \large  \right) \small,\tag{5}$$
+
+where $y^{(i)} \in \{0,1\}$ are the original labels and $a^{(i)}$ are the continuous output values of the forward propagation step (elements of array $A$).
+
+
+We want to minimize the cost function during the training. To implement gradient descent, calculate partial derivatives using chain rule
+
+
+\begin{align}
+\frac{\partial \mathcal{L} }{ \partial w_1 } &=
+\frac{1}{m}\sum_{i=1}^{m} \left(a^{(i)} - y^{(i)}\right)x_1^{(i)},\\
+\frac{\partial \mathcal{L} }{ \partial w_2 } &=
+\frac{1}{m}\sum_{i=1}^{m} \left(a^{(i)} - y^{(i)}\right)x_2^{(i)},\tag{7}\\
+\frac{\partial \mathcal{L} }{ \partial b } &=
+\frac{1}{m}\sum_{i=1}^{m} \left(a^{(i)} - y^{(i)}\right).
+\end{align}
+
+Equations above can be rewritten in a matrix form
+
+
+\begin{align}
+\frac{\partial \mathcal{L} }{ \partial W } &=
+\begin{bmatrix} \frac{\partial \mathcal{L} }{ \partial w_1 } &
+\frac{\partial \mathcal{L} }{ \partial w_2 }\end{bmatrix} = \frac{1}{m}\left(A - Y\right)X^T,\\
+\frac{\partial \mathcal{L} }{ \partial b } &= \frac{1}{m}\left(A - Y\right)\mathbf{1}.
+\tag{8}
+\end{align}
+
+where $\left(A - Y\right)$ is an array of a shape ($1 \times m$), $X^T$ is an array of a shape ($m \times 2$) and $\mathbf{1}$ is just a ($m \times 1$) vector of ones.
+
+Then you can update the parameters:
+
+\begin{align}
+W &= W - \alpha \frac{\partial \mathcal{L} }{ \partial W },\\
+b &= b - \alpha \frac{\partial \mathcal{L} }{ \partial b },
+\tag{9}\end{align}
+
+where $\alpha$ is the learning rate. Repeat the process in a loop until the cost function stops decreasing.
+
+in last step apply activation
+$$\hat{y} = \begin{cases} 1 & \mbox{if } a > 0.5 \\ 0 & \mbox{otherwise } \end{cases}\tag{10}$$
+
+
+### Dataset
+
+As a dataset we will generate $m=50$ data points $(x_1, x_2)$, where $x_1, x_2 \in \{0,1\}$ and save them in the `NumPy` array `X` of a shape $(2 \times m)$. The labels ($0$: blue, $1$: red) will be calculated so that $y = 1$ if $x_1 = 1$ and $x_2 = 0$, in the rest of the cases $y=0$. The labels will be saved in the array `Y` of a shape $(1 \times m)$.
+
+<img src="chart/dataset.png" />
diff --git a/NeuralNetworks/PerceptronClassifier/chart/dataset.png b/NeuralNetworks/PerceptronClassifier/chart/dataset.png
diff --git a/NeuralNetworks/PerceptronClassifier/chart/linear-separated.png b/NeuralNetworks/PerceptronClassifier/chart/linear-separated.png
diff --git a/NeuralNetworks/PerceptronClassifier/chart/sigmoid.png b/NeuralNetworks/PerceptronClassifier/chart/sigmoid.png
diff --git a/tests/NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifierTest.php b/tests/NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifierTest.php
@@ -0,0 +1,23 @@
+<?php
+
+namespace NeuralNetworks\PerceptronClassifier;
+
+require_once __DIR__ . '/../../../vendor/autoload.php';
+require_once __DIR__ . '/../../../NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifier.php';
+
+use PHPUnit\Framework\TestCase;
+
+class NeuralNetworkPerceptronClassifierTest extends TestCase
+{
+    public function testNeuralNetworkPerceptronClassification()
+    {
+        $nnClassifier = new NeuralNetworkPerceptronClassifier();
+        [$X, $Y] = $nnClassifier->generateTrainingSet();
+        // Train the model
+        [$W, $b] = $nnClassifier->trainModel($X, $Y, 1000, 0.1);
+
+        // Make predictions
+        $predictions = $nnClassifier->predict([[0, 0, 1, 1], [0, 1, 1, 0]], $W, $b);
+        $this->assertEquals([0, 0, 0, 1], $predictions);
+    }
+}