-
-
Notifications
You must be signed in to change notification settings - Fork 479
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Neural networks perceptron classifier
- Loading branch information
Michał Żarnecki
committed
Dec 17, 2024
1 parent
94ef610
commit 97888b7
Showing
7 changed files
with
324 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
192 changes: 192 additions & 0 deletions
192
NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifier.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
<?php | ||
|
||
namespace NeuralNetworks\PerceptronClassifier; | ||
|
||
/** | ||
* This class implements a simple neural network with one hidden layer and one output neuron. | ||
* The network uses the sigmoid activation function and performs binary classification. | ||
* (https://cw.fel.cvut.cz/b211/courses/be5b33rpz/labs/07_perceptron/start) | ||
* | ||
* @author Michał Żarnecki https://github.com/rzarno | ||
*/ | ||
class NeuralNetworkPerceptronClassifier | ||
{ | ||
/** | ||
* @param array $X | ||
* @param array $Y | ||
* @param int $iterations | ||
* @param float $learningRate | ||
* @return array | ||
*/ | ||
public function trainModel(array $X, array $Y, int $iterations, float $learningRate): array | ||
{ | ||
[$W, $b] = $this->initParams(count($X)); | ||
|
||
for ($i = 0; $i < $iterations; $i++) { | ||
// Forward propagation | ||
$A = $this->forwardPropagation($X, $W, $b); | ||
|
||
// Compute cost | ||
$cost = $this->computeCost($A, $Y); | ||
|
||
// Backward propagation | ||
[$dW, $db] = $this->backwardPropagation($A, $X, $Y); | ||
|
||
// Update parameters | ||
[$W, $b] = $this->updateParams($W, $b, $dW, $db, $learningRate); | ||
|
||
if ($i % 100 == 0) { | ||
echo "Iteration {$i} - Cost: {$cost}\n"; | ||
} | ||
} | ||
|
||
return [$W, $b]; | ||
} | ||
|
||
/** | ||
* @param array $X | ||
* @param array $W | ||
* @param float $b | ||
* @return array | ||
*/ | ||
public function predict(array $X, array $W, float $b): array | ||
{ | ||
$A = $this->forwardPropagation($X, $W, $b); | ||
return array_map(fn($a) => $a > 0.5 ? 1 : 0, $A); | ||
} | ||
|
||
/** | ||
* Stage 1. Prepare dataset | ||
* @return array[] | ||
*/ | ||
public function generateTrainingSet(): array | ||
{ | ||
$m = 50; | ||
|
||
// Generate a 2 x m matrix with binary values (0 or 1) | ||
$X = []; | ||
for ($i = 0; $i < 2; $i++) { | ||
for ($j = 0; $j < $m; $j++) { | ||
$X[$i][$j] = rand(0, 1); | ||
} | ||
} | ||
|
||
// Compute Y: Logical AND condition (X[0] == 1 and X[1] == 0) | ||
$Y = []; | ||
for ($j = 0; $j < $m; $j++) { | ||
$Y[$j] = ($X[0][$j] == 1 && $X[1][$j] == 0) ? 1 : 0; | ||
} | ||
|
||
return [$X, $Y]; | ||
} | ||
|
||
/** | ||
* Stage 2. Initialize model parameters | ||
* @param int $n Number of features | ||
* @return array [$W, $b] Weight and bias arrays | ||
*/ | ||
private function initParams(int $n): array | ||
{ | ||
$W = []; | ||
for ($i = 0; $i < $n; $i++) { | ||
$W[$i] = mt_rand() / mt_getrandmax(); // Small random values | ||
} | ||
$b = 0.0; // Bias initialized to zero | ||
return [$W, $b]; | ||
} | ||
|
||
/** | ||
* Sigmoid Activation Function | ||
* @param float $z | ||
* @return float | ||
*/ | ||
private function sigmoid(float $z): float | ||
{ | ||
return 1 / (1 + exp(-$z)); | ||
} | ||
|
||
/** | ||
* Stage 3. Forward Propagation | ||
* @param array $X | ||
* @param array $W | ||
* @param float $b | ||
* @return array | ||
*/ | ||
private function forwardPropagation(array $X, array $W, float $b): array | ||
{ | ||
$Z = []; | ||
for ($j = 0; $j < count($X[0]); $j++) { | ||
$sum = $b; | ||
for ($i = 0; $i < count($W); $i++) { | ||
$sum += $W[$i] * $X[$i][$j]; | ||
} | ||
$Z[$j] = $this->sigmoid($sum); | ||
} | ||
return $Z; | ||
} | ||
|
||
/** | ||
* Stage 4. Compute Cost Function (Binary Cross-Entropy Loss) | ||
* @param array $A | ||
* @param array $Y | ||
* @return float | ||
*/ | ||
private function computeCost(array $A, array $Y): float | ||
{ | ||
$m = count($Y); | ||
$cost = 0.0; | ||
for ($i = 0; $i < $m; $i++) { | ||
$cost += -($Y[$i] * log($A[$i]) + (1 - $Y[$i]) * log(1 - $A[$i])); | ||
} | ||
return $cost / $m; | ||
} | ||
|
||
/** | ||
* Stage 5. Backward Propagation | ||
* @param array $A | ||
* @param array $X | ||
* @param array $Y | ||
* @return array | ||
*/ | ||
private function backwardPropagation(array $A, array $X, array $Y): array | ||
{ | ||
$m = count($Y); | ||
$dW = array_fill(0, count($X), 0.0); | ||
$db = 0.0; | ||
|
||
for ($j = 0; $j < $m; $j++) { | ||
$dZ = $A[$j] - $Y[$j]; | ||
for ($i = 0; $i < count($X); $i++) { | ||
$dW[$i] += $dZ * $X[$i][$j]; | ||
} | ||
$db += $dZ; | ||
} | ||
|
||
// Average gradients | ||
for ($i = 0; $i < count($dW); $i++) { | ||
$dW[$i] /= $m; | ||
} | ||
$db /= $m; | ||
|
||
return [$dW, $db]; | ||
} | ||
|
||
/** | ||
* STage 6. Update Parameters | ||
* @param array $W | ||
* @param float $b | ||
* @param array $dW | ||
* @param float $db | ||
* @param float $learningRate | ||
* @return array | ||
*/ | ||
private function updateParams(array $W, float $b, array $dW, float $db, float $learningRate): array | ||
{ | ||
for ($i = 0; $i < count($W); $i++) { | ||
$W[$i] -= $learningRate * $dW[$i]; | ||
} | ||
$b -= $learningRate * $db; | ||
|
||
return [$W, $b]; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
## Maths behind the single Perceptron Neural Network with Activation Function | ||
|
||
This work is based on examples from course https://www.coursera.org/learn/machine-learning-calculus prepared by author Luis Serrano. | ||
|
||
Linear separation refers to data points in binary classification problems that can be separated by a linear decision boundary. | ||
If the data points can be separated by a line, linear function, or flat hyperplane, they are said to be linearly separable. | ||
|
||
If separate points in an n-dimensional space exist, then it is said to be linearly separable | ||
|
||
$$w_1x_1 + w_2x_2 + w_nx_n + b = 0$$ | ||
|
||
For two-dimensional input data, if there is a line, whose equation is $$w_1x_1 + w_2x_2 + b = 0$$ | ||
|
||
that separates all samples of one class from the other class, then the corresponding observation can be derived from the equation of the separating line. | ||
Such classification problems are called "linearly separable", i.e. separating by linear combination. | ||
|
||
<img src="chart/linear-separated.png" /> | ||
|
||
The input layer contains two nodes $x_1$ and $x_2$. Weight vector $W = \begin{bmatrix} w_1 & w_2\end{bmatrix}$ and bias ($b$) are the parameters to be updated during the model training. | ||
|
||
$$z^{(i)} = w_1x_1^{(i)} + w_2x_2^{(i)} + b = Wx^{(i)} + b.\tag{1}$$ | ||
|
||
To be able to perform classification we need nonlinear approach. This can achieved with sigmoid activation function which roughly replace values with nearly 0 or nearly 1 for most cases and some values between for small range near 0. | ||
|
||
$$\hat{y} = \begin{cases} 1 & \mbox{if } a > 0.5 \\ 0 & \mbox{otherwise } \end{cases}\tag{10}$$ | ||
|
||
Sigmoid activation function is defined as | ||
|
||
$$a = \sigma\left(z\right) = \frac{1}{1+e^{-z}}.\tag{2}$$ | ||
|
||
<img src="chart/sigmoid.png" /> | ||
|
||
Threshold value of $0.5$ can be used for predictions: $1$ (red) if $a > 0.5$ and $0$ (blue) otherwise. | ||
|
||
The single perceptron neural network with sigmoid activation function can be expressed as: | ||
|
||
\begin{align} | ||
z^{(i)} &= W x^{(i)} + b,\\ | ||
a^{(i)} &= \sigma\left(z^{(i)}\right).\\\tag{3} | ||
\end{align} | ||
|
||
|
||
With $m$ training examples organised in the columns of ($2 \times m$) matrix $X$, you can apply the activation function element-wise. So the model can be written as: | ||
|
||
|
||
\begin {align} | ||
Z &= W X + b,\\ | ||
A &= \sigma\left(Z\right),\\\tag{4} | ||
\end{align} | ||
|
||
When dealing with classification problems, the most commonly used cost function is the **log loss**, which is described by the following equation | ||
|
||
$$\mathcal{L}\left(W, b\right) = \frac{1}{m}\sum_{i=1}^{m} L\left(W, b\right) = \frac{1}{m}\sum_{i=1}^{m} \large\left(\small -y^{(i)}\log\left(a^{(i)}\right) - (1-y^{(i)})\log\left(1- a^{(i)}\right) \large \right) \small,\tag{5}$$ | ||
|
||
where $y^{(i)} \in \{0,1\}$ are the original labels and $a^{(i)}$ are the continuous output values of the forward propagation step (elements of array $A$). | ||
|
||
|
||
We want to minimize the cost function during the training. To implement gradient descent, calculate partial derivatives using chain rule | ||
|
||
|
||
\begin{align} | ||
\frac{\partial \mathcal{L} }{ \partial w_1 } &= | ||
\frac{1}{m}\sum_{i=1}^{m} \left(a^{(i)} - y^{(i)}\right)x_1^{(i)},\\ | ||
\frac{\partial \mathcal{L} }{ \partial w_2 } &= | ||
\frac{1}{m}\sum_{i=1}^{m} \left(a^{(i)} - y^{(i)}\right)x_2^{(i)},\tag{7}\\ | ||
\frac{\partial \mathcal{L} }{ \partial b } &= | ||
\frac{1}{m}\sum_{i=1}^{m} \left(a^{(i)} - y^{(i)}\right). | ||
\end{align} | ||
|
||
Equations above can be rewritten in a matrix form | ||
|
||
|
||
\begin{align} | ||
\frac{\partial \mathcal{L} }{ \partial W } &= | ||
\begin{bmatrix} \frac{\partial \mathcal{L} }{ \partial w_1 } & | ||
\frac{\partial \mathcal{L} }{ \partial w_2 }\end{bmatrix} = \frac{1}{m}\left(A - Y\right)X^T,\\ | ||
\frac{\partial \mathcal{L} }{ \partial b } &= \frac{1}{m}\left(A - Y\right)\mathbf{1}. | ||
\tag{8} | ||
\end{align} | ||
|
||
where $\left(A - Y\right)$ is an array of a shape ($1 \times m$), $X^T$ is an array of a shape ($m \times 2$) and $\mathbf{1}$ is just a ($m \times 1$) vector of ones. | ||
|
||
Then you can update the parameters: | ||
|
||
\begin{align} | ||
W &= W - \alpha \frac{\partial \mathcal{L} }{ \partial W },\\ | ||
b &= b - \alpha \frac{\partial \mathcal{L} }{ \partial b }, | ||
\tag{9}\end{align} | ||
|
||
where $\alpha$ is the learning rate. Repeat the process in a loop until the cost function stops decreasing. | ||
|
||
in last step apply activation | ||
$$\hat{y} = \begin{cases} 1 & \mbox{if } a > 0.5 \\ 0 & \mbox{otherwise } \end{cases}\tag{10}$$ | ||
|
||
|
||
### Dataset | ||
|
||
As a dataset we will generate $m=50$ data points $(x_1, x_2)$, where $x_1, x_2 \in \{0,1\}$ and save them in the `NumPy` array `X` of a shape $(2 \times m)$. The labels ($0$: blue, $1$: red) will be calculated so that $y = 1$ if $x_1 = 1$ and $x_2 = 0$, in the rest of the cases $y=0$. The labels will be saved in the array `Y` of a shape $(1 \times m)$. | ||
|
||
<img src="chart/dataset.png" /> |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
23 changes: 23 additions & 0 deletions
23
tests/NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifierTest.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
<?php | ||
|
||
namespace NeuralNetworks\PerceptronClassifier; | ||
|
||
require_once __DIR__ . '/../../../vendor/autoload.php'; | ||
require_once __DIR__ . '/../../../NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifier.php'; | ||
|
||
use PHPUnit\Framework\TestCase; | ||
|
||
class NeuralNetworkPerceptronClassifierTest extends TestCase | ||
{ | ||
public function testNeuralNetworkPerceptronClassification() | ||
{ | ||
$nnClassifier = new NeuralNetworkPerceptronClassifier(); | ||
[$X, $Y] = $nnClassifier->generateTrainingSet(); | ||
// Train the model | ||
[$W, $b] = $nnClassifier->trainModel($X, $Y, 1000, 0.1); | ||
|
||
// Make predictions | ||
$predictions = $nnClassifier->predict([[0, 0, 1, 1], [0, 1, 1, 0]], $W, $b); | ||
$this->assertEquals([0, 0, 0, 1], $predictions); | ||
} | ||
} |