From 19771c7f62fedf6aa167ad2cb4a9cad840d5d36f Mon Sep 17 00:00:00 2001 From: Muhammad Ammar Nabil Date: Mon, 28 Aug 2023 08:19:13 +0700 Subject: [PATCH] Upload C2W4 Lab 2 (MultiGPU Mirrored) --- ...W4_Lab_2_multi_GPU_mirrored_strategy.ipynb | 422 ++++++++++++++++++ 1 file changed, 422 insertions(+) create mode 100644 Custom and Distributed Training with Tensorflow/Week 4/C2_W4_Lab_2_multi_GPU_mirrored_strategy.ipynb diff --git a/Custom and Distributed Training with Tensorflow/Week 4/C2_W4_Lab_2_multi_GPU_mirrored_strategy.ipynb b/Custom and Distributed Training with Tensorflow/Week 4/C2_W4_Lab_2_multi_GPU_mirrored_strategy.ipynb new file mode 100644 index 0000000..427096b --- /dev/null +++ b/Custom and Distributed Training with Tensorflow/Week 4/C2_W4_Lab_2_multi_GPU_mirrored_strategy.ipynb @@ -0,0 +1,422 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MX7rfuJ28EGn" + }, + "source": [ + "# Multi-GPU Mirrored Strategy\n", + "\n", + "In this ungraded lab, you'll go through how to set up a Multi-GPU Mirrored Strategy. The lab environment only has a CPU but we placed the code here in case you want to try this out for yourself in a multiGPU device.\n", + "\n", + "**Notes:**\n", + "- If you are running this on Coursera, you'll see it gives a warning about no presence of GPU devices.\n", + "- If you are running this in Colab, make sure you have selected your `runtime` to be `GPU`.\n", + "- In both these cases, you'll see there's only 1 device that is available. \n", + "- One device is sufficient for helping you understand these distribution strategies." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UTAQd7DC8EGt" + }, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "GkzALuf_8EGy" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import numpy as np\n", + "import os" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BKUMwQEL8EG4" + }, + "source": [ + "## Setup Distribution Strategy" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uV2ADYo18EG7", + "outputId": "064d7c32-e9f9-41c2-da72-51bf7ed0c1c1" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Number of devices: 1\n" + ] + } + ], + "source": [ + "# Note that it generally has a minimum of 8 cores, but if your GPU has\n", + "# less, you need to set this. In this case one of my GPUs has 4 cores\n", + "os.environ[\"TF_MIN_GPU_MULTIPROCESSOR_COUNT\"] = \"4\"\n", + "\n", + "# If the list of devices is not specified in the\n", + "# `tf.distribute.MirroredStrategy` constructor, it will be auto-detected.\n", + "# If you have *different* GPUs in your system, you probably have to set up\n", + "# cross_device_ops like this\n", + "strategy: tf.distribute.MirroredStrategy = tf.distribute.MirroredStrategy(\n", + " cross_device_ops=tf.distribute.HierarchicalCopyAllReduce()\n", + ")\n", + "print ('Number of devices: {}'.format(strategy.num_replicas_in_sync))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V6wkEJNr8EHB" + }, + "source": [ + "## Prepare the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "jcvYJxLK8EHF" + }, + "outputs": [], + "source": [ + "# Get the data\n", + "fa_mnist: object = tf.keras.datasets.fashion_mnist\n", + "train_images: np.ndarray\n", + "train_labels: np.ndarray\n", + "test_images: np.ndarray\n", + "test_labels: np.ndarray\n", + "(train_images, train_labels), (test_images, test_labels) = fa_mnist.load_data()\n", + "\n", + "# Adding a dimension to the array -> new shape == (28, 28, 1)\n", + "# We are doing this because the first layer in our model is a convolutional\n", + "# layer and it requires a 4D input (batch_size, height, width, channels).\n", + "# batch_size dimension will be added later on.\n", + "train_images = train_images[..., None]\n", + "test_images = test_images[..., None]\n", + "\n", + "# Normalize the images to [0, 1] range.\n", + "train_images = train_images / np.float32(255)\n", + "test_images = test_images / np.float32(255)\n", + "\n", + "# Batch the input data\n", + "BUFFER_SIZE: int = len(train_images)\n", + "BATCH_SIZE_PER_REPLICA: int = 64\n", + "GLOBAL_BATCH_SIZE: int = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync\n", + "\n", + "# Create Datasets from the batches\n", + "train_dataset = tf.data.Dataset.from_tensor_slices(\n", + " (train_images, train_labels)\n", + ").shuffle(BUFFER_SIZE).batch(GLOBAL_BATCH_SIZE)\n", + "test_dataset = tf.data.Dataset.from_tensor_slices(\n", + " (test_images, test_labels)\n", + ").batch(GLOBAL_BATCH_SIZE)\n", + "\n", + "# Create Distributed Datasets from the datasets\n", + "DistriDataset = tf.distribute.DistributedDataset\n", + "train_dist_dataset: DistriDataset = strategy.experimental_distribute_dataset(\n", + " train_dataset\n", + ")\n", + "test_dist_dataset: DistriDataset = strategy.experimental_distribute_dataset(\n", + " test_dataset\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9YV6YPUt8EHK" + }, + "source": [ + "## Define the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "Hh8629W_8EHT" + }, + "outputs": [], + "source": [ + "# Create the model architecture\n", + "def create_model() -> tf.keras.Sequential:\n", + " model: tf.keras.Sequential = tf.keras.Sequential([\n", + " tf.keras.layers.Conv2D(32, 3, activation='relu'),\n", + " tf.keras.layers.MaxPooling2D(),\n", + " tf.keras.layers.Conv2D(64, 3, activation='relu'),\n", + " tf.keras.layers.MaxPooling2D(),\n", + " tf.keras.layers.Flatten(),\n", + " tf.keras.layers.Dense(64, activation='relu'),\n", + " tf.keras.layers.Dense(10)\n", + " ])\n", + " return model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NRfvqRhj8EHY" + }, + "source": [ + "## Configure custom training\n", + "\n", + "Instead of `model.compile()`, we're going to do custom training, so let's do that within a strategy scope." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "d-d4yI5Y8EHb" + }, + "outputs": [], + "source": [ + "with strategy.scope():\n", + " # We will use sparse categorical crossentropy as always. But, instead of\n", + " # having the loss function manage the map reduce across GPUs for us, we'll\n", + " # do it ourselves with a simple algorithm.\n", + " # Remember -- the map reduce is how the losses get aggregated\n", + " # Set reduction to `none` so we can do the reduction afterwards and divide\n", + " # byglobal batch size.\n", + " Losses = tf.keras.losses.Loss\n", + " loss_object: Losses = tf.keras.losses.SparseCategoricalCrossentropy(\n", + " from_logits=True, reduction=tf.keras.losses.Reduction.NONE\n", + " )\n", + "\n", + " def compute_loss(labels: tf.Tensor, predictions: tf.Tensor) -> tf.Tensor:\n", + " # Compute Loss uses the loss object to compute the loss\n", + " # Notice that per_example_loss will have an entry per GPU\n", + " # so in this case there'll be 2 -- i.e. the loss for each replica\n", + " per_example_loss: tf.Tensor = loss_object(labels, predictions)\n", + " # You can print it to see it -- you'll get output like this:\n", + " # Tensor(\"sparse_categorical_crossentropy/weighted_loss/Mul:0\",\n", + " # shape=(48,), dtype=float32,\n", + " # device=/job:localhost/replica:0/task:0/device:GPU:0)\n", + " # Tensor(\"replica_1/sparse_categorical_crossentropy/weighted_loss/Mul:0\",\n", + " # shape=(48,), dtype=float32,\n", + " # device=/job:localhost/replica:0/task:0/device:GPU:1)\n", + " # Note in particular that replica_0 isn't named in the weighted_loss\n", + " # -- the first is unnamed, the second is replica_1 etc\n", + " print(\"per_example_loss:\", per_example_loss)\n", + " return tf.nn.compute_average_loss(\n", + " per_example_loss, global_batch_size=GLOBAL_BATCH_SIZE\n", + " )\n", + "\n", + " # We'll just reduce by getting the average of the losses\n", + " Metrics = tf.keras.metrics.Metric\n", + " test_loss: Metrics = tf.keras.metrics.Mean(name='test_loss')\n", + "\n", + " # Accuracy on train and test will be SparseCategoricalAccuracy\n", + " train_accuracy: Metrics = tf.keras.metrics.SparseCategoricalAccuracy(\n", + " name='train_accuracy'\n", + " )\n", + " test_accuracy: Metrics = tf.keras.metrics.SparseCategoricalAccuracy(\n", + " name='test_accuracy'\n", + " )\n", + "\n", + " # Optimizer will be Adam\n", + " optimizer: tf.keras.optimizers.Optimizer = tf.keras.optimizers.Adam()\n", + "\n", + " # Create the model within the scope\n", + " model: tf.keras.Sequential = create_model()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JmmjZtxf8EHf" + }, + "source": [ + "## Train and Test Steps Functions\n", + "\n", + "Let's define a few utilities to facilitate the training." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "k0VOdqKP2NEz" + }, + "outputs": [], + "source": [ + "# `run` replicates the provided computation and runs it\n", + "# with the distributed input.\n", + "@tf.function\n", + "def distributed_train_step(dataset_inputs: DistriDataset) -> tf.Tensor:\n", + " per_replica_losses: tf.Tensor = strategy.run(\n", + " train_step, args=(dataset_inputs,)\n", + " )\n", + " # tf.print(\"per_replica_losses\", per_replica_losses.values)\n", + " return strategy.reduce(\n", + " tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None\n", + " )\n", + "\n", + "def train_step(inputs: DistriDataset) -> tf.Tensor:\n", + " images: tf.Tensor\n", + " labels: tf.Tensor\n", + " images, labels = inputs\n", + " with tf.GradientTape() as tape:\n", + " predictions: tf.Tensor = model(images, training=True)\n", + " loss: tf.Tensor = compute_loss(labels, predictions)\n", + "\n", + " gradients: list = tape.gradient(loss, model.trainable_variables)\n", + " optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n", + "\n", + " train_accuracy.update_state(labels, predictions)\n", + " return loss\n", + "\n", + "#######################\n", + "# Test Steps Functions\n", + "#######################\n", + "@tf.function\n", + "def distributed_test_step(dataset_inputs: DistriDataset) -> None:\n", + " strategy.run(test_step, args=(dataset_inputs,))\n", + "\n", + "def test_step(inputs: DistriDataset) -> None:\n", + " images: tf.Tensor\n", + " labels: tf.Tensor\n", + " images, labels = inputs\n", + "\n", + " predictions: tf.Tensor = model(images, training=False)\n", + " t_loss: tf.Tensor = loss_object(labels, predictions)\n", + "\n", + " test_loss.update_state(t_loss)\n", + " test_accuracy.update_state(labels, predictions)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rHN2D-MF8EHl" + }, + "source": [ + "## Training Loop\n", + "\n", + "We can now start training the model." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XEowKX6m8EHm", + "outputId": "95e1b74e-d3ab-4b20-8f13-e6f6661eb624" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "per_example_loss: Tensor(\"sparse_categorical_crossentropy/weighted_loss/Mul:0\", shape=(64,), dtype=float32, device=/job:localhost/replica:0/task:0/device:GPU:0)\n", + "per_example_loss: Tensor(\"sparse_categorical_crossentropy/weighted_loss/Mul:0\", shape=(64,), dtype=float32, device=/job:localhost/replica:0/task:0/device:GPU:0)\n", + "per_example_loss: Tensor(\"sparse_categorical_crossentropy/weighted_loss/Mul:0\", shape=(32,), dtype=float32, device=/job:localhost/replica:0/task:0/device:GPU:0)\n", + "Epoch 1:\n", + "Loss: 0.5070505738258362, Accuracy: 81.67666625976562, Test Loss: 0.38398948311805725, Test Accuracy: 85.86000061035156\n", + "Epoch 2:\n", + "Loss: 0.33926278352737427, Accuracy: 87.7249984741211, Test Loss: 0.3299911618232727, Test Accuracy: 88.09000396728516\n", + "Epoch 3:\n", + "Loss: 0.29532328248023987, Accuracy: 89.12666320800781, Test Loss: 0.32005777955055237, Test Accuracy: 88.44000244140625\n", + "Epoch 4:\n", + "Loss: 0.2660471200942993, Accuracy: 90.19667053222656, Test Loss: 0.28988128900527954, Test Accuracy: 89.4800033569336\n", + "Epoch 5:\n", + "Loss: 0.24343512952327728, Accuracy: 91.13166809082031, Test Loss: 0.2764923870563507, Test Accuracy: 89.9000015258789\n", + "Epoch 6:\n", + "Loss: 0.22115500271320343, Accuracy: 91.75833129882812, Test Loss: 0.2748279571533203, Test Accuracy: 90.09000396728516\n", + "Epoch 7:\n", + "Loss: 0.2055717408657074, Accuracy: 92.35166931152344, Test Loss: 0.2636153995990753, Test Accuracy: 90.44999694824219\n", + "Epoch 8:\n", + "Loss: 0.1894248127937317, Accuracy: 92.99166870117188, Test Loss: 0.2818222641944885, Test Accuracy: 89.75\n", + "Epoch 9:\n", + "Loss: 0.173929363489151, Accuracy: 93.461669921875, Test Loss: 0.26498162746429443, Test Accuracy: 90.94999694824219\n", + "Epoch 10:\n", + "Loss: 0.15865449607372284, Accuracy: 94.09666442871094, Test Loss: 0.26011180877685547, Test Accuracy: 91.11000061035156\n" + ] + } + ], + "source": [ + "EPOCHS: int = 10\n", + "for epoch in range(EPOCHS):\n", + " # Do Training\n", + " total_loss: float = 0.0\n", + " num_batches: int = 0\n", + " for batch in train_dist_dataset:\n", + " total_loss += distributed_train_step(batch)\n", + " num_batches += 1\n", + " train_loss: float = total_loss / num_batches\n", + "\n", + " # Do Testing\n", + " for batch in test_dist_dataset:\n", + " distributed_test_step(batch)\n", + "\n", + " print(f\"Epoch {epoch+1}:\\nLoss: {train_loss}, \", end=\"\")\n", + " print(f\"Accuracy: {train_accuracy.result()*100}, \", end=\"\")\n", + " print(f\"Test Loss: {test_loss.result()}, \", end=\"\")\n", + " print(f\"Test Accuracy: {test_accuracy.result()*100}\")\n", + "\n", + " test_loss.reset_states()\n", + " train_accuracy.reset_states()\n", + " test_accuracy.reset_states()" + ] + } + ], + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4", + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "accelerator": "GPU" + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file