diff --git a/DIRECTORY.md b/DIRECTORY.md index 63e2121..456d390 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -98,6 +98,15 @@ * [Problem9](./Maths/ProjectEuler/Problem9.php) * [Eratosthenessieve](./Maths/EratosthenesSieve.php) +## NeuralNetworks + * PerceptronClassifier + * [NeuralNetworkPerceptronClassifier.php](NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifier.php) + * [README.md](NeuralNetworks/PerceptronClassifier/README.md) + * chart + * [dataset.png](NeuralNetworks/PerceptronClassifier/chart/dataset.png) + * [linear-separated.png](NeuralNetworks/PerceptronClassifier/chart/linear-separated.png) + * [sigmoid.png](NeuralNetworks/PerceptronClassifier/chart/sigmoid.png) + ## Searches * [Binarysearch](./Searches/BinarySearch.php) * [Exponentialsearch](./Searches/ExponentialSearch.php) diff --git a/NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifier.php b/NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifier.php new file mode 100644 index 0000000..6b94e54 --- /dev/null +++ b/NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifier.php @@ -0,0 +1,192 @@ +initParams(count($X)); + + for ($i = 0; $i < $iterations; $i++) { + // Forward propagation + $A = $this->forwardPropagation($X, $W, $b); + + // Compute cost + $cost = $this->computeCost($A, $Y); + + // Backward propagation + [$dW, $db] = $this->backwardPropagation($A, $X, $Y); + + // Update parameters + [$W, $b] = $this->updateParams($W, $b, $dW, $db, $learningRate); + + if ($i % 100 == 0) { + echo "Iteration {$i} - Cost: {$cost}\n"; + } + } + + return [$W, $b]; + } + + /** + * @param array $X + * @param array $W + * @param float $b + * @return array + */ + public function predict(array $X, array $W, float $b): array + { + $A = $this->forwardPropagation($X, $W, $b); + return array_map(fn($a) => $a > 0.5 ? 1 : 0, $A); + } + + /** + * Stage 1. Prepare dataset + * @return array[] + */ + public function generateTrainingSet(): array + { + $m = 50; + + // Generate a 2 x m matrix with binary values (0 or 1) + $X = []; + for ($i = 0; $i < 2; $i++) { + for ($j = 0; $j < $m; $j++) { + $X[$i][$j] = rand(0, 1); + } + } + + // Compute Y: Logical AND condition (X[0] == 1 and X[1] == 0) + $Y = []; + for ($j = 0; $j < $m; $j++) { + $Y[$j] = ($X[0][$j] == 1 && $X[1][$j] == 0) ? 1 : 0; + } + + return [$X, $Y]; + } + + /** + * Stage 2. Initialize model parameters + * @param int $n Number of features + * @return array [$W, $b] Weight and bias arrays + */ + private function initParams(int $n): array + { + $W = []; + for ($i = 0; $i < $n; $i++) { + $W[$i] = mt_rand() / mt_getrandmax(); // Small random values + } + $b = 0.0; // Bias initialized to zero + return [$W, $b]; + } + + /** + * Sigmoid Activation Function + * @param float $z + * @return float + */ + private function sigmoid(float $z): float + { + return 1 / (1 + exp(-$z)); + } + + /** + * Stage 3. Forward Propagation + * @param array $X + * @param array $W + * @param float $b + * @return array + */ + private function forwardPropagation(array $X, array $W, float $b): array + { + $Z = []; + for ($j = 0; $j < count($X[0]); $j++) { + $sum = $b; + for ($i = 0; $i < count($W); $i++) { + $sum += $W[$i] * $X[$i][$j]; + } + $Z[$j] = $this->sigmoid($sum); + } + return $Z; + } + + /** + * Stage 4. Compute Cost Function (Binary Cross-Entropy Loss) + * @param array $A + * @param array $Y + * @return float + */ + private function computeCost(array $A, array $Y): float + { + $m = count($Y); + $cost = 0.0; + for ($i = 0; $i < $m; $i++) { + $cost += -($Y[$i] * log($A[$i]) + (1 - $Y[$i]) * log(1 - $A[$i])); + } + return $cost / $m; + } + + /** + * Stage 5. Backward Propagation + * @param array $A + * @param array $X + * @param array $Y + * @return array + */ + private function backwardPropagation(array $A, array $X, array $Y): array + { + $m = count($Y); + $dW = array_fill(0, count($X), 0.0); + $db = 0.0; + + for ($j = 0; $j < $m; $j++) { + $dZ = $A[$j] - $Y[$j]; + for ($i = 0; $i < count($X); $i++) { + $dW[$i] += $dZ * $X[$i][$j]; + } + $db += $dZ; + } + + // Average gradients + for ($i = 0; $i < count($dW); $i++) { + $dW[$i] /= $m; + } + $db /= $m; + + return [$dW, $db]; + } + + /** + * STage 6. Update Parameters + * @param array $W + * @param float $b + * @param array $dW + * @param float $db + * @param float $learningRate + * @return array + */ + private function updateParams(array $W, float $b, array $dW, float $db, float $learningRate): array + { + for ($i = 0; $i < count($W); $i++) { + $W[$i] -= $learningRate * $dW[$i]; + } + $b -= $learningRate * $db; + + return [$W, $b]; + } +} diff --git a/NeuralNetworks/PerceptronClassifier/README.md b/NeuralNetworks/PerceptronClassifier/README.md new file mode 100644 index 0000000..870c972 --- /dev/null +++ b/NeuralNetworks/PerceptronClassifier/README.md @@ -0,0 +1,100 @@ +## Maths behind the single Perceptron Neural Network with Activation Function + +This work is based on examples from course https://www.coursera.org/learn/machine-learning-calculus prepared by author Luis Serrano. + +Linear separation refers to data points in binary classification problems that can be separated by a linear decision boundary. +If the data points can be separated by a line, linear function, or flat hyperplane, they are said to be linearly separable. + +If separate points in an n-dimensional space exist, then it is said to be linearly separable + +$$w_1x_1 + w_2x_2 + w_nx_n + b = 0$$ + +For two-dimensional input data, if there is a line, whose equation is $$w_1x_1 + w_2x_2 + b = 0$$ + +that separates all samples of one class from the other class, then the corresponding observation can be derived from the equation of the separating line. +Such classification problems are called "linearly separable", i.e. separating by linear combination. + + + +The input layer contains two nodes $x_1$ and $x_2$. Weight vector $W = \begin{bmatrix} w_1 & w_2\end{bmatrix}$ and bias ($b$) are the parameters to be updated during the model training. + +$$z^{(i)} = w_1x_1^{(i)} + w_2x_2^{(i)} + b = Wx^{(i)} + b.\tag{1}$$ + +To be able to perform classification we need nonlinear approach. This can achieved with sigmoid activation function which roughly replace values with nearly 0 or nearly 1 for most cases and some values between for small range near 0. + +$$\hat{y} = \begin{cases} 1 & \mbox{if } a > 0.5 \\ 0 & \mbox{otherwise } \end{cases}\tag{10}$$ + +Sigmoid activation function is defined as + +$$a = \sigma\left(z\right) = \frac{1}{1+e^{-z}}.\tag{2}$$ + + + +Threshold value of $0.5$ can be used for predictions: $1$ (red) if $a > 0.5$ and $0$ (blue) otherwise. + +The single perceptron neural network with sigmoid activation function can be expressed as: + +\begin{align} +z^{(i)} &= W x^{(i)} + b,\\ +a^{(i)} &= \sigma\left(z^{(i)}\right).\\\tag{3} +\end{align} + + +With $m$ training examples organised in the columns of ($2 \times m$) matrix $X$, you can apply the activation function element-wise. So the model can be written as: + + +\begin {align} +Z &= W X + b,\\ +A &= \sigma\left(Z\right),\\\tag{4} +\end{align} + +When dealing with classification problems, the most commonly used cost function is the **log loss**, which is described by the following equation + +$$\mathcal{L}\left(W, b\right) = \frac{1}{m}\sum_{i=1}^{m} L\left(W, b\right) = \frac{1}{m}\sum_{i=1}^{m} \large\left(\small -y^{(i)}\log\left(a^{(i)}\right) - (1-y^{(i)})\log\left(1- a^{(i)}\right) \large \right) \small,\tag{5}$$ + +where $y^{(i)} \in \{0,1\}$ are the original labels and $a^{(i)}$ are the continuous output values of the forward propagation step (elements of array $A$). + + +We want to minimize the cost function during the training. To implement gradient descent, calculate partial derivatives using chain rule + + +\begin{align} +\frac{\partial \mathcal{L} }{ \partial w_1 } &= +\frac{1}{m}\sum_{i=1}^{m} \left(a^{(i)} - y^{(i)}\right)x_1^{(i)},\\ +\frac{\partial \mathcal{L} }{ \partial w_2 } &= +\frac{1}{m}\sum_{i=1}^{m} \left(a^{(i)} - y^{(i)}\right)x_2^{(i)},\tag{7}\\ +\frac{\partial \mathcal{L} }{ \partial b } &= +\frac{1}{m}\sum_{i=1}^{m} \left(a^{(i)} - y^{(i)}\right). +\end{align} + +Equations above can be rewritten in a matrix form + + +\begin{align} +\frac{\partial \mathcal{L} }{ \partial W } &= +\begin{bmatrix} \frac{\partial \mathcal{L} }{ \partial w_1 } & +\frac{\partial \mathcal{L} }{ \partial w_2 }\end{bmatrix} = \frac{1}{m}\left(A - Y\right)X^T,\\ +\frac{\partial \mathcal{L} }{ \partial b } &= \frac{1}{m}\left(A - Y\right)\mathbf{1}. +\tag{8} +\end{align} + +where $\left(A - Y\right)$ is an array of a shape ($1 \times m$), $X^T$ is an array of a shape ($m \times 2$) and $\mathbf{1}$ is just a ($m \times 1$) vector of ones. + +Then you can update the parameters: + +\begin{align} +W &= W - \alpha \frac{\partial \mathcal{L} }{ \partial W },\\ +b &= b - \alpha \frac{\partial \mathcal{L} }{ \partial b }, +\tag{9}\end{align} + +where $\alpha$ is the learning rate. Repeat the process in a loop until the cost function stops decreasing. + +in last step apply activation +$$\hat{y} = \begin{cases} 1 & \mbox{if } a > 0.5 \\ 0 & \mbox{otherwise } \end{cases}\tag{10}$$ + + +### Dataset + +As a dataset we will generate $m=50$ data points $(x_1, x_2)$, where $x_1, x_2 \in \{0,1\}$ and save them in the `NumPy` array `X` of a shape $(2 \times m)$. The labels ($0$: blue, $1$: red) will be calculated so that $y = 1$ if $x_1 = 1$ and $x_2 = 0$, in the rest of the cases $y=0$. The labels will be saved in the array `Y` of a shape $(1 \times m)$. + + diff --git a/NeuralNetworks/PerceptronClassifier/chart/dataset.png b/NeuralNetworks/PerceptronClassifier/chart/dataset.png new file mode 100644 index 0000000..95a5042 Binary files /dev/null and b/NeuralNetworks/PerceptronClassifier/chart/dataset.png differ diff --git a/NeuralNetworks/PerceptronClassifier/chart/linear-separated.png b/NeuralNetworks/PerceptronClassifier/chart/linear-separated.png new file mode 100644 index 0000000..061be4b Binary files /dev/null and b/NeuralNetworks/PerceptronClassifier/chart/linear-separated.png differ diff --git a/NeuralNetworks/PerceptronClassifier/chart/sigmoid.png b/NeuralNetworks/PerceptronClassifier/chart/sigmoid.png new file mode 100644 index 0000000..9856031 Binary files /dev/null and b/NeuralNetworks/PerceptronClassifier/chart/sigmoid.png differ diff --git a/tests/NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifierTest.php b/tests/NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifierTest.php new file mode 100644 index 0000000..61eacc4 --- /dev/null +++ b/tests/NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifierTest.php @@ -0,0 +1,23 @@ +generateTrainingSet(); + // Train the model + [$W, $b] = $nnClassifier->trainModel($X, $Y, 1000, 0.1); + + // Make predictions + $predictions = $nnClassifier->predict([[0, 0, 1, 1], [0, 1, 1, 0]], $W, $b); + $this->assertEquals([0, 0, 0, 1], $predictions); + } +}