From 21d3e094f5e96b5d4d6137ea6826310e58b1d27d Mon Sep 17 00:00:00 2001 From: James Chao Date: Thu, 21 Sep 2023 18:22:36 -0400 Subject: [PATCH 1/4] support for pytorch dataset --- ...board-standard-pytorch-model-dataset.ipynb | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 notebooks/responsibleaidashboard/vision/responsibleaidashboard-standard-pytorch-model-dataset.ipynb diff --git a/notebooks/responsibleaidashboard/vision/responsibleaidashboard-standard-pytorch-model-dataset.ipynb b/notebooks/responsibleaidashboard/vision/responsibleaidashboard-standard-pytorch-model-dataset.ipynb new file mode 100644 index 0000000000..fc1ff49ccc --- /dev/null +++ b/notebooks/responsibleaidashboard/vision/responsibleaidashboard-standard-pytorch-model-dataset.ipynb @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "c758ccdb", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Files already downloaded and verified\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 60%|██████████████████████████████████████████████▍ | 1508/2500 [00:17<00:08, 118.56it/s]" + ] + } + ], + "source": [ + "import os\n", + "import torch\n", + "import torchvision\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from PIL import Image\n", + "from fastai.learner import load_learner\n", + "from torchvision.transforms import ToTensor\n", + "from responsibleai_vision.common.constants import ImageColumns\n", + "from tqdm import tqdm\n", + "\n", + "BATCH_SIZE = 4\n", + "DATASET_NAME = \"CIFAR\"\n", + "\n", + "dataset = torchvision.datasets.CIFAR10(root=\"data\", train=False, download=True, transform=ToTensor())\n", + "dataloader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)\n", + "\n", + "data = pd.DataFrame(columns=[ImageColumns.IMAGE.value,\n", + " ImageColumns.LABEL.value])\n", + "\n", + "cnts = {}\n", + "data_path = \"data/\" + DATASET_NAME + \"/\" \n", + "\n", + "for batch in tqdm(dataloader):\n", + " images, labels = batch\n", + " for i in range(BATCH_SIZE):\n", + " img, label = images[i], labels[i]\n", + " label = str(label.numpy())\n", + " img = img.numpy()\n", + " img = np.transpose(img, (1, 2, 0))\n", + " img = Image.fromarray((img * 255).astype(np.uint8))\n", + " \n", + " if label not in cnts:\n", + " os.makedirs(data_path + label, exist_ok=True)\n", + " cnts[label] = 1\n", + " else:\n", + " cnts[label] += 1\n", + " \n", + " img_path = data_path + label + \"/class_\" + label + \"_img_\" + str(cnts[label]) + \".jpeg\"\n", + " img.save(img_path)\n", + " item = pd.DataFrame(data={ImageColumns.IMAGE.value: [img_path], \n", + " ImageColumns.LABEL.value: [label]})\n", + " data = pd.concat([data, item], ignore_index=True)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e08447a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From d97d3da3dfb8a4520795857426039bdbc279767d Mon Sep 17 00:00:00 2001 From: James Chao Date: Thu, 21 Sep 2023 18:27:33 -0400 Subject: [PATCH 2/4] pytorch dataset support --- ...board-standard-pytorch-model-dataset.ipynb | 160 +++++++++++++----- 1 file changed, 114 insertions(+), 46 deletions(-) diff --git a/notebooks/responsibleaidashboard/vision/responsibleaidashboard-standard-pytorch-model-dataset.ipynb b/notebooks/responsibleaidashboard/vision/responsibleaidashboard-standard-pytorch-model-dataset.ipynb index fc1ff49ccc..afb7ce956b 100644 --- a/notebooks/responsibleaidashboard/vision/responsibleaidashboard-standard-pytorch-model-dataset.ipynb +++ b/notebooks/responsibleaidashboard/vision/responsibleaidashboard-standard-pytorch-model-dataset.ipynb @@ -3,35 +3,24 @@ { "cell_type": "code", "execution_count": null, - "id": "c758ccdb", + "id": "73d012de", "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Files already downloaded and verified\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 60%|██████████████████████████████████████████████▍ | 1508/2500 [00:17<00:08, 118.56it/s]" - ] - } - ], + "outputs": [], "source": [ "import os\n", "import torch\n", "import torchvision\n", "import numpy as np\n", "import pandas as pd\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import pickle\n", + "\n", + "from fastai.vision.all import *\n", "\n", "from PIL import Image\n", - "from fastai.learner import load_learner\n", "from torchvision.transforms import ToTensor\n", "from responsibleai_vision.common.constants import ImageColumns\n", "from tqdm import tqdm\n", @@ -40,44 +29,123 @@ "DATASET_NAME = \"CIFAR\"\n", "\n", "dataset = torchvision.datasets.CIFAR10(root=\"data\", train=False, download=True, transform=ToTensor())\n", - "dataloader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)\n", - "\n", - "data = pd.DataFrame(columns=[ImageColumns.IMAGE.value,\n", - " ImageColumns.LABEL.value])\n", + "data_loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)\n", "\n", - "cnts = {}\n", - "data_path = \"data/\" + DATASET_NAME + \"/\" \n", - "\n", - "for batch in tqdm(dataloader):\n", - " images, labels = batch\n", - " for i in range(BATCH_SIZE):\n", - " img, label = images[i], labels[i]\n", - " label = str(label.numpy())\n", - " img = img.numpy()\n", - " img = np.transpose(img, (1, 2, 0))\n", - " img = Image.fromarray((img * 255).astype(np.uint8))\n", + "def convert_torch_model(model, df, train=False):\n", + " dls = ImageDataLoaders.from_df(df, path='./')\n", + " learn = Learner(dls, model)\n", + " \n", + " if train:\n", + " xb,yb = learn.dls.one_batch()\n", + " init_loss = learn.loss_func(learn.model(xb), yb)\n", + " learn.fit_one_cycle(10)\n", + " xb,yb = learn.dls.one_batch()\n", + " final_loss = learn.loss_func(learn.model(xb), yb)\n", + " print(final_loss)\n", + " \n", + " return learn\n", + " \n", " \n", - " if label not in cnts:\n", - " os.makedirs(data_path + label, exist_ok=True)\n", - " cnts[label] = 1\n", - " else:\n", - " cnts[label] += 1\n", + "def convert_torch_data(data_loader):\n", + " \n", + " data = pd.DataFrame(columns=[ImageColumns.IMAGE.value,\n", + " ImageColumns.LABEL.value])\n", + "\n", + " cnts = {}\n", + " data_path = \"data/\" + DATASET_NAME + \"/\" \n", + " \n", + " for batch in tqdm(data_loader):\n", " \n", - " img_path = data_path + label + \"/class_\" + label + \"_img_\" + str(cnts[label]) + \".jpeg\"\n", - " img.save(img_path)\n", - " item = pd.DataFrame(data={ImageColumns.IMAGE.value: [img_path], \n", - " ImageColumns.LABEL.value: [label]})\n", - " data = pd.concat([data, item], ignore_index=True)\n", - " " + " images, labels = batch\n", + " for i in range(BATCH_SIZE):\n", + " img, label = images[i], labels[i]\n", + " label = str(label.numpy())\n", + " img = img.numpy()\n", + " img = np.transpose(img, (1, 2, 0))\n", + " img = Image.fromarray((img * 255).astype(np.uint8))\n", + "\n", + " if label not in cnts:\n", + " os.makedirs(data_path + label, exist_ok=True)\n", + " cnts[label] = 1\n", + " else:\n", + " cnts[label] += 1\n", + "\n", + " img_path = data_path + label + \"/class_\" + label + \"_img_\" + str(cnts[label]) + \".jpeg\"\n", + " img.save(img_path)\n", + " item = pd.DataFrame(data={ImageColumns.IMAGE.value: [img_path], \n", + " ImageColumns.LABEL.value: [label]})\n", + " data = pd.concat([data, item], ignore_index=True)\n", + " \n", + " return data\n", + "\n", + "data = convert_torch_data(data_loader)\n", + "\n", + "test_data = data\n", + "class_names = data[ImageColumns.LABEL.value].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87572a47", + "metadata": {}, + "outputs": [], + "source": [ + "device = 'cuda'\n", + "\n", + "class CNN(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.conv1 = nn.Conv2d(3, 6, 5)\n", + " self.pool = nn.MaxPool2d(2, 2)\n", + " self.conv2 = nn.Conv2d(6, 16, 5)\n", + " self.fc1 = nn.Linear(16 * 5 * 5, 120)\n", + " self.fc2 = nn.Linear(120, 84)\n", + " self.fc3 = nn.Linear(84, 10)\n", + "\n", + " def forward(self, x):\n", + " x = self.pool(F.relu(self.conv1(x)))\n", + " x = self.pool(F.relu(self.conv2(x)))\n", + " x = torch.flatten(x, 1)\n", + " x = F.relu(self.fc1(x))\n", + " x = F.relu(self.fc2(x))\n", + " x = self.fc3(x)\n", + " return x\n", + "\n", + "model = CNN().to(device)\n", + "\n", + "model = convert_torch_model(model, data, train=False)" ] }, { "cell_type": "code", "execution_count": null, - "id": "6e08447a", + "id": "5b9184ef", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from raiwidgets import ResponsibleAIDashboard\n", + "from responsibleai_vision import ModelTask, RAIVisionInsights\n", + "\n", + "rai_insights = RAIVisionInsights(model, data.sample(10, random_state=42),\n", + " \"label\", task_type=ModelTask.IMAGE_CLASSIFICATION,\n", + " classes=class_names)\n", + "rai_insights.explainer.add()\n", + "rai_insights.error_analysis.add()\n", + "rai_insights.compute()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4cb92832", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "ResponsibleAIDashboard(rai_insights)" + ] } ], "metadata": { From 47dd3a44de06512d83bf49e7c377c565fc5fc205 Mon Sep 17 00:00:00 2001 From: James Chao Date: Tue, 26 Sep 2023 18:21:13 -0400 Subject: [PATCH 3/4] pytorch model/dataset --- .../responsibleaidashboard-standard-pytorch-model-dataset.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/responsibleaidashboard/vision/responsibleaidashboard-standard-pytorch-model-dataset.ipynb b/notebooks/responsibleaidashboard/vision/responsibleaidashboard-standard-pytorch-model-dataset.ipynb index afb7ce956b..9129e1d548 100644 --- a/notebooks/responsibleaidashboard/vision/responsibleaidashboard-standard-pytorch-model-dataset.ipynb +++ b/notebooks/responsibleaidashboard/vision/responsibleaidashboard-standard-pytorch-model-dataset.ipynb @@ -91,7 +91,7 @@ "metadata": {}, "outputs": [], "source": [ - "device = 'cuda'\n", + "device = 'cpu'\n", "\n", "class CNN(nn.Module):\n", " def __init__(self):\n", From ce790cc6b8d6187fd7621093a20b271014c8ab80 Mon Sep 17 00:00:00 2001 From: James Chao Date: Tue, 26 Sep 2023 18:24:01 -0400 Subject: [PATCH 4/4] pytorch model/dataset --- .../responsibleaidashboard-standard-pytorch-model-dataset.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/responsibleaidashboard/vision/responsibleaidashboard-standard-pytorch-model-dataset.ipynb b/notebooks/responsibleaidashboard/vision/responsibleaidashboard-standard-pytorch-model-dataset.ipynb index 9129e1d548..afb7ce956b 100644 --- a/notebooks/responsibleaidashboard/vision/responsibleaidashboard-standard-pytorch-model-dataset.ipynb +++ b/notebooks/responsibleaidashboard/vision/responsibleaidashboard-standard-pytorch-model-dataset.ipynb @@ -91,7 +91,7 @@ "metadata": {}, "outputs": [], "source": [ - "device = 'cpu'\n", + "device = 'cuda'\n", "\n", "class CNN(nn.Module):\n", " def __init__(self):\n",