From 04358d690f5483f5522255e28d77efa51bbd936e Mon Sep 17 00:00:00 2001
From: patel-zeel <patel_zeel@iitgn.ac.in>
Date: Sun, 26 Nov 2023 20:40:16 +0530
Subject: [PATCH] add torch dataloaders notebook

---
 .gitignore                               |    8 +-
 posts/2023-11-26-Torch-DataLoaders.ipynb | 1196 ++++++++++++++++++++++
 2 files changed, 1203 insertions(+), 1 deletion(-)
 create mode 100644 posts/2023-11-26-Torch-DataLoaders.ipynb
diff --git a/.gitignore b/.gitignore
index 11a4213..7a86fc8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,9 @@
 /.quarto/
 _site/
-posts/logs
\ No newline at end of file
+posts/logs
+
+data/
+*.pdf
+*.xlsx
+launch.json
+settings.json
\ No newline at end of file
diff --git a/posts/2023-11-26-Torch-DataLoaders.ipynb b/posts/2023-11-26-Torch-DataLoaders.ipynb
new file mode 100644
index 0000000..214e58a
--- /dev/null
+++ b/posts/2023-11-26-Torch-DataLoaders.ipynb
@@ -0,0 +1,1196 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "author: Zeel B Patel\n",
+    "badges: true\n",
+    "categories: ML\n",
+    "description: An exploratory analysis of various dataset handling processes to optimize memory, diskspace and speed.\n",
+    "title: Data Handling for Large Scale ML\n",
+    "date: '2023-09-30'\n",
+    "toc: true\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"3\"\n",
+    "\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "from numcodecs import GZip, Zstd, Blosc\n",
+    "\n",
+    "from time import time, sleep\n",
+    "from tqdm import tqdm\n",
+    "from glob import glob\n",
+    "from os.path import join\n",
+    "from torch.utils.data import DataLoader, Dataset\n",
+    "from joblib import Parallel, delayed\n",
+    "import xarray as xr\n",
+    "import numpy as np\n",
+    "\n",
+    "from torchvision.models import vit_b_16\n",
+    "from astra.torch.models import ViTClassifier\n",
+    "from astra.torch.utils import train_fn"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Creating Custom Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><svg style=\"position: absolute; width: 0; height: 0; overflow: hidden\">\n",
+       "<defs>\n",
+       "<symbol id=\"icon-database\" viewBox=\"0 0 32 32\">\n",
+       "<path d=\"M16 0c-8.837 0-16 2.239-16 5v4c0 2.761 7.163 5 16 5s16-2.239 16-5v-4c0-2.761-7.163-5-16-5z\"></path>\n",
+       "<path d=\"M16 17c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
+       "<path d=\"M16 26c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
+       "</symbol>\n",
+       "<symbol id=\"icon-file-text2\" viewBox=\"0 0 32 32\">\n",
+       "<path d=\"M28.681 7.159c-0.694-0.947-1.662-2.053-2.724-3.116s-2.169-2.030-3.116-2.724c-1.612-1.182-2.393-1.319-2.841-1.319h-15.5c-1.378 0-2.5 1.121-2.5 2.5v27c0 1.378 1.122 2.5 2.5 2.5h23c1.378 0 2.5-1.122 2.5-2.5v-19.5c0-0.448-0.137-1.23-1.319-2.841zM24.543 5.457c0.959 0.959 1.712 1.825 2.268 2.543h-4.811v-4.811c0.718 0.556 1.584 1.309 2.543 2.268zM28 29.5c0 0.271-0.229 0.5-0.5 0.5h-23c-0.271 0-0.5-0.229-0.5-0.5v-27c0-0.271 0.229-0.5 0.5-0.5 0 0 15.499-0 15.5 0v7c0 0.552 0.448 1 1 1h7v19.5z\"></path>\n",
+       "<path d=\"M23 26h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
+       "<path d=\"M23 22h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
+       "<path d=\"M23 18h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
+       "</symbol>\n",
+       "</defs>\n",
+       "</svg>\n",
+       "<style>/* CSS stylesheet for displaying xarray objects in jupyterlab.\n",
+       " *\n",
+       " */\n",
+       "\n",
+       ":root {\n",
+       "  --xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1));\n",
+       "  --xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54));\n",
+       "  --xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38));\n",
+       "  --xr-border-color: var(--jp-border-color2, #e0e0e0);\n",
+       "  --xr-disabled-color: var(--jp-layout-color3, #bdbdbd);\n",
+       "  --xr-background-color: var(--jp-layout-color0, white);\n",
+       "  --xr-background-color-row-even: var(--jp-layout-color1, white);\n",
+       "  --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n",
+       "}\n",
+       "\n",
+       "html[theme=dark],\n",
+       "body[data-theme=dark],\n",
+       "body.vscode-dark {\n",
+       "  --xr-font-color0: rgba(255, 255, 255, 1);\n",
+       "  --xr-font-color2: rgba(255, 255, 255, 0.54);\n",
+       "  --xr-font-color3: rgba(255, 255, 255, 0.38);\n",
+       "  --xr-border-color: #1F1F1F;\n",
+       "  --xr-disabled-color: #515151;\n",
+       "  --xr-background-color: #111111;\n",
+       "  --xr-background-color-row-even: #111111;\n",
+       "  --xr-background-color-row-odd: #313131;\n",
+       "}\n",
+       "\n",
+       ".xr-wrap {\n",
+       "  display: block !important;\n",
+       "  min-width: 300px;\n",
+       "  max-width: 700px;\n",
+       "}\n",
+       "\n",
+       ".xr-text-repr-fallback {\n",
+       "  /* fallback to plain text repr when CSS is not injected (untrusted notebook) */\n",
+       "  display: none;\n",
+       "}\n",
+       "\n",
+       ".xr-header {\n",
+       "  padding-top: 6px;\n",
+       "  padding-bottom: 6px;\n",
+       "  margin-bottom: 4px;\n",
+       "  border-bottom: solid 1px var(--xr-border-color);\n",
+       "}\n",
+       "\n",
+       ".xr-header > div,\n",
+       ".xr-header > ul {\n",
+       "  display: inline;\n",
+       "  margin-top: 0;\n",
+       "  margin-bottom: 0;\n",
+       "}\n",
+       "\n",
+       ".xr-obj-type,\n",
+       ".xr-array-name {\n",
+       "  margin-left: 2px;\n",
+       "  margin-right: 10px;\n",
+       "}\n",
+       "\n",
+       ".xr-obj-type {\n",
+       "  color: var(--xr-font-color2);\n",
+       "}\n",
+       "\n",
+       ".xr-sections {\n",
+       "  padding-left: 0 !important;\n",
+       "  display: grid;\n",
+       "  grid-template-columns: 150px auto auto 1fr 20px 20px;\n",
+       "}\n",
+       "\n",
+       ".xr-section-item {\n",
+       "  display: contents;\n",
+       "}\n",
+       "\n",
+       ".xr-section-item input {\n",
+       "  display: none;\n",
+       "}\n",
+       "\n",
+       ".xr-section-item input + label {\n",
+       "  color: var(--xr-disabled-color);\n",
+       "}\n",
+       "\n",
+       ".xr-section-item input:enabled + label {\n",
+       "  cursor: pointer;\n",
+       "  color: var(--xr-font-color2);\n",
+       "}\n",
+       "\n",
+       ".xr-section-item input:enabled + label:hover {\n",
+       "  color: var(--xr-font-color0);\n",
+       "}\n",
+       "\n",
+       ".xr-section-summary {\n",
+       "  grid-column: 1;\n",
+       "  color: var(--xr-font-color2);\n",
+       "  font-weight: 500;\n",
+       "}\n",
+       "\n",
+       ".xr-section-summary > span {\n",
+       "  display: inline-block;\n",
+       "  padding-left: 0.5em;\n",
+       "}\n",
+       "\n",
+       ".xr-section-summary-in:disabled + label {\n",
+       "  color: var(--xr-font-color2);\n",
+       "}\n",
+       "\n",
+       ".xr-section-summary-in + label:before {\n",
+       "  display: inline-block;\n",
+       "  content: '►';\n",
+       "  font-size: 11px;\n",
+       "  width: 15px;\n",
+       "  text-align: center;\n",
+       "}\n",
+       "\n",
+       ".xr-section-summary-in:disabled + label:before {\n",
+       "  color: var(--xr-disabled-color);\n",
+       "}\n",
+       "\n",
+       ".xr-section-summary-in:checked + label:before {\n",
+       "  content: '▼';\n",
+       "}\n",
+       "\n",
+       ".xr-section-summary-in:checked + label > span {\n",
+       "  display: none;\n",
+       "}\n",
+       "\n",
+       ".xr-section-summary,\n",
+       ".xr-section-inline-details {\n",
+       "  padding-top: 4px;\n",
+       "  padding-bottom: 4px;\n",
+       "}\n",
+       "\n",
+       ".xr-section-inline-details {\n",
+       "  grid-column: 2 / -1;\n",
+       "}\n",
+       "\n",
+       ".xr-section-details {\n",
+       "  display: none;\n",
+       "  grid-column: 1 / -1;\n",
+       "  margin-bottom: 5px;\n",
+       "}\n",
+       "\n",
+       ".xr-section-summary-in:checked ~ .xr-section-details {\n",
+       "  display: contents;\n",
+       "}\n",
+       "\n",
+       ".xr-array-wrap {\n",
+       "  grid-column: 1 / -1;\n",
+       "  display: grid;\n",
+       "  grid-template-columns: 20px auto;\n",
+       "}\n",
+       "\n",
+       ".xr-array-wrap > label {\n",
+       "  grid-column: 1;\n",
+       "  vertical-align: top;\n",
+       "}\n",
+       "\n",
+       ".xr-preview {\n",
+       "  color: var(--xr-font-color3);\n",
+       "}\n",
+       "\n",
+       ".xr-array-preview,\n",
+       ".xr-array-data {\n",
+       "  padding: 0 5px !important;\n",
+       "  grid-column: 2;\n",
+       "}\n",
+       "\n",
+       ".xr-array-data,\n",
+       ".xr-array-in:checked ~ .xr-array-preview {\n",
+       "  display: none;\n",
+       "}\n",
+       "\n",
+       ".xr-array-in:checked ~ .xr-array-data,\n",
+       ".xr-array-preview {\n",
+       "  display: inline-block;\n",
+       "}\n",
+       "\n",
+       ".xr-dim-list {\n",
+       "  display: inline-block !important;\n",
+       "  list-style: none;\n",
+       "  padding: 0 !important;\n",
+       "  margin: 0;\n",
+       "}\n",
+       "\n",
+       ".xr-dim-list li {\n",
+       "  display: inline-block;\n",
+       "  padding: 0;\n",
+       "  margin: 0;\n",
+       "}\n",
+       "\n",
+       ".xr-dim-list:before {\n",
+       "  content: '(';\n",
+       "}\n",
+       "\n",
+       ".xr-dim-list:after {\n",
+       "  content: ')';\n",
+       "}\n",
+       "\n",
+       ".xr-dim-list li:not(:last-child):after {\n",
+       "  content: ',';\n",
+       "  padding-right: 5px;\n",
+       "}\n",
+       "\n",
+       ".xr-has-index {\n",
+       "  font-weight: bold;\n",
+       "}\n",
+       "\n",
+       ".xr-var-list,\n",
+       ".xr-var-item {\n",
+       "  display: contents;\n",
+       "}\n",
+       "\n",
+       ".xr-var-item > div,\n",
+       ".xr-var-item label,\n",
+       ".xr-var-item > .xr-var-name span {\n",
+       "  background-color: var(--xr-background-color-row-even);\n",
+       "  margin-bottom: 0;\n",
+       "}\n",
+       "\n",
+       ".xr-var-item > .xr-var-name:hover span {\n",
+       "  padding-right: 5px;\n",
+       "}\n",
+       "\n",
+       ".xr-var-list > li:nth-child(odd) > div,\n",
+       ".xr-var-list > li:nth-child(odd) > label,\n",
+       ".xr-var-list > li:nth-child(odd) > .xr-var-name span {\n",
+       "  background-color: var(--xr-background-color-row-odd);\n",
+       "}\n",
+       "\n",
+       ".xr-var-name {\n",
+       "  grid-column: 1;\n",
+       "}\n",
+       "\n",
+       ".xr-var-dims {\n",
+       "  grid-column: 2;\n",
+       "}\n",
+       "\n",
+       ".xr-var-dtype {\n",
+       "  grid-column: 3;\n",
+       "  text-align: right;\n",
+       "  color: var(--xr-font-color2);\n",
+       "}\n",
+       "\n",
+       ".xr-var-preview {\n",
+       "  grid-column: 4;\n",
+       "}\n",
+       "\n",
+       ".xr-index-preview {\n",
+       "  grid-column: 2 / 5;\n",
+       "  color: var(--xr-font-color2);\n",
+       "}\n",
+       "\n",
+       ".xr-var-name,\n",
+       ".xr-var-dims,\n",
+       ".xr-var-dtype,\n",
+       ".xr-preview,\n",
+       ".xr-attrs dt {\n",
+       "  white-space: nowrap;\n",
+       "  overflow: hidden;\n",
+       "  text-overflow: ellipsis;\n",
+       "  padding-right: 10px;\n",
+       "}\n",
+       "\n",
+       ".xr-var-name:hover,\n",
+       ".xr-var-dims:hover,\n",
+       ".xr-var-dtype:hover,\n",
+       ".xr-attrs dt:hover {\n",
+       "  overflow: visible;\n",
+       "  width: auto;\n",
+       "  z-index: 1;\n",
+       "}\n",
+       "\n",
+       ".xr-var-attrs,\n",
+       ".xr-var-data,\n",
+       ".xr-index-data {\n",
+       "  display: none;\n",
+       "  background-color: var(--xr-background-color) !important;\n",
+       "  padding-bottom: 5px !important;\n",
+       "}\n",
+       "\n",
+       ".xr-var-attrs-in:checked ~ .xr-var-attrs,\n",
+       ".xr-var-data-in:checked ~ .xr-var-data,\n",
+       ".xr-index-data-in:checked ~ .xr-index-data {\n",
+       "  display: block;\n",
+       "}\n",
+       "\n",
+       ".xr-var-data > table {\n",
+       "  float: right;\n",
+       "}\n",
+       "\n",
+       ".xr-var-name span,\n",
+       ".xr-var-data,\n",
+       ".xr-index-name div,\n",
+       ".xr-index-data,\n",
+       ".xr-attrs {\n",
+       "  padding-left: 25px !important;\n",
+       "}\n",
+       "\n",
+       ".xr-attrs,\n",
+       ".xr-var-attrs,\n",
+       ".xr-var-data,\n",
+       ".xr-index-data {\n",
+       "  grid-column: 1 / -1;\n",
+       "}\n",
+       "\n",
+       "dl.xr-attrs {\n",
+       "  padding: 0;\n",
+       "  margin: 0;\n",
+       "  display: grid;\n",
+       "  grid-template-columns: 125px auto;\n",
+       "}\n",
+       "\n",
+       ".xr-attrs dt,\n",
+       ".xr-attrs dd {\n",
+       "  padding: 0;\n",
+       "  margin: 0;\n",
+       "  float: left;\n",
+       "  padding-right: 10px;\n",
+       "  width: auto;\n",
+       "}\n",
+       "\n",
+       ".xr-attrs dt {\n",
+       "  font-weight: normal;\n",
+       "  grid-column: 1;\n",
+       "}\n",
+       "\n",
+       ".xr-attrs dt:hover span {\n",
+       "  display: inline-block;\n",
+       "  background: var(--xr-background-color);\n",
+       "  padding-right: 10px;\n",
+       "}\n",
+       "\n",
+       ".xr-attrs dd {\n",
+       "  grid-column: 2;\n",
+       "  white-space: pre-wrap;\n",
+       "  word-break: break-all;\n",
+       "}\n",
+       "\n",
+       ".xr-icon-database,\n",
+       ".xr-icon-file-text2,\n",
+       ".xr-no-icon {\n",
+       "  display: inline-block;\n",
+       "  vertical-align: middle;\n",
+       "  width: 1em;\n",
+       "  height: 1.5em !important;\n",
+       "  stroke-width: 0;\n",
+       "  stroke: currentColor;\n",
+       "  fill: currentColor;\n",
+       "}\n",
+       "</style><pre class='xr-text-repr-fallback'>&lt;xarray.Dataset&gt;\n",
+       "Dimensions:  (channel: 3, col: 224, lat_lag: 5, lon_lag: 5, row: 224)\n",
+       "Coordinates:\n",
+       "  * channel  (channel) uint8 0 1 2\n",
+       "  * col      (col) uint8 0 1 2 3 4 5 6 7 8 ... 216 217 218 219 220 221 222 223\n",
+       "    lat      float64 ...\n",
+       "  * lat_lag  (lat_lag) int8 -2 -1 0 1 2\n",
+       "    lon      float64 ...\n",
+       "  * lon_lag  (lon_lag) int8 -2 -1 0 1 2\n",
+       "  * row      (row) uint8 0 1 2 3 4 5 6 7 8 ... 216 217 218 219 220 221 222 223\n",
+       "Data variables:\n",
+       "    data     (lat_lag, lon_lag, row, col, channel) uint8 dask.array&lt;chunksize=(3, 3, 112, 112, 3), meta=np.ndarray&gt;\n",
+       "    label    (lat_lag, lon_lag) int8 dask.array&lt;chunksize=(5, 5), meta=np.ndarray&gt;</pre><div class='xr-wrap' style='display:none'><div class='xr-header'><div class='xr-obj-type'>xarray.Dataset</div></div><ul class='xr-sections'><li class='xr-section-item'><input id='section-1a921640-d933-4774-95f9-8a57d8d04227' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-1a921640-d933-4774-95f9-8a57d8d04227' class='xr-section-summary'  title='Expand/collapse section'>Dimensions:</label><div class='xr-section-inline-details'><ul class='xr-dim-list'><li><span class='xr-has-index'>channel</span>: 3</li><li><span class='xr-has-index'>col</span>: 224</li><li><span class='xr-has-index'>lat_lag</span>: 5</li><li><span class='xr-has-index'>lon_lag</span>: 5</li><li><span class='xr-has-index'>row</span>: 224</li></ul></div><div class='xr-section-details'></div></li><li class='xr-section-item'><input id='section-1b4788fc-473e-470f-9b8c-5fc78d71d312' class='xr-section-summary-in' type='checkbox'  checked><label for='section-1b4788fc-473e-470f-9b8c-5fc78d71d312' class='xr-section-summary' >Coordinates: <span>(7)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>channel</span></div><div class='xr-var-dims'>(channel)</div><div class='xr-var-dtype'>uint8</div><div class='xr-var-preview xr-preview'>0 1 2</div><input id='attrs-a8bd0e3d-296f-42b8-8983-904e3682973d' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-a8bd0e3d-296f-42b8-8983-904e3682973d' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-31680c0b-b379-4d2f-a927-c078c55a53d9' class='xr-var-data-in' type='checkbox'><label for='data-31680c0b-b379-4d2f-a927-c078c55a53d9' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([0, 1, 2], dtype=uint8)</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>col</span></div><div class='xr-var-dims'>(col)</div><div class='xr-var-dtype'>uint8</div><div class='xr-var-preview xr-preview'>0 1 2 3 4 5 ... 219 220 221 222 223</div><input id='attrs-13cbbcba-e688-4132-8f8f-4608acab98da' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-13cbbcba-e688-4132-8f8f-4608acab98da' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-13276982-0a2e-4712-bd56-c749816efd69' class='xr-var-data-in' type='checkbox'><label for='data-13276982-0a2e-4712-bd56-c749816efd69' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([  0,   1,   2, ..., 221, 222, 223], dtype=uint8)</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>lat</span></div><div class='xr-var-dims'>()</div><div class='xr-var-dtype'>float64</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-d2436d65-6f66-43ed-a2e7-0c0dfc318026' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-d2436d65-6f66-43ed-a2e7-0c0dfc318026' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-76c60d5c-818c-462b-a5f0-d3ae48ec9a7d' class='xr-var-data-in' type='checkbox'><label for='data-76c60d5c-818c-462b-a5f0-d3ae48ec9a7d' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>[1 values with dtype=float64]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>lat_lag</span></div><div class='xr-var-dims'>(lat_lag)</div><div class='xr-var-dtype'>int8</div><div class='xr-var-preview xr-preview'>-2 -1 0 1 2</div><input id='attrs-0bdc381f-5697-4676-b773-d9d07d2f11c7' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-0bdc381f-5697-4676-b773-d9d07d2f11c7' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-43b58e89-1ed9-4c37-ba50-ddd5fc8d4f91' class='xr-var-data-in' type='checkbox'><label for='data-43b58e89-1ed9-4c37-ba50-ddd5fc8d4f91' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([-2, -1,  0,  1,  2], dtype=int8)</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>lon</span></div><div class='xr-var-dims'>()</div><div class='xr-var-dtype'>float64</div><div class='xr-var-preview xr-preview'>...</div><input id='attrs-e12190cc-9227-4b7f-8a1e-55fa75410cfa' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-e12190cc-9227-4b7f-8a1e-55fa75410cfa' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-edcfd0f1-ef5f-4d1c-a585-f4b8735e2db1' class='xr-var-data-in' type='checkbox'><label for='data-edcfd0f1-ef5f-4d1c-a585-f4b8735e2db1' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>[1 values with dtype=float64]</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>lon_lag</span></div><div class='xr-var-dims'>(lon_lag)</div><div class='xr-var-dtype'>int8</div><div class='xr-var-preview xr-preview'>-2 -1 0 1 2</div><input id='attrs-23d7f053-cf4a-4717-a13c-415ab32b3a49' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-23d7f053-cf4a-4717-a13c-415ab32b3a49' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-737a0996-b867-4d4b-94da-311c6a9e2d78' class='xr-var-data-in' type='checkbox'><label for='data-737a0996-b867-4d4b-94da-311c6a9e2d78' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([-2, -1,  0,  1,  2], dtype=int8)</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>row</span></div><div class='xr-var-dims'>(row)</div><div class='xr-var-dtype'>uint8</div><div class='xr-var-preview xr-preview'>0 1 2 3 4 5 ... 219 220 221 222 223</div><input id='attrs-61c19c22-631f-4d43-a426-bb1cc799393b' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-61c19c22-631f-4d43-a426-bb1cc799393b' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-5ee1e193-c2e8-4da2-92fa-239bfe75a03f' class='xr-var-data-in' type='checkbox'><label for='data-5ee1e193-c2e8-4da2-92fa-239bfe75a03f' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([  0,   1,   2, ..., 221, 222, 223], dtype=uint8)</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-9bf62116-9d81-4152-bbb0-4ce8c64e6256' class='xr-section-summary-in' type='checkbox'  checked><label for='section-9bf62116-9d81-4152-bbb0-4ce8c64e6256' class='xr-section-summary' >Data variables: <span>(2)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span>data</span></div><div class='xr-var-dims'>(lat_lag, lon_lag, row, col, channel)</div><div class='xr-var-dtype'>uint8</div><div class='xr-var-preview xr-preview'>dask.array&lt;chunksize=(3, 3, 112, 112, 3), meta=np.ndarray&gt;</div><input id='attrs-94c0b5a4-fa43-4b1f-9259-3e67720d24a2' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-94c0b5a4-fa43-4b1f-9259-3e67720d24a2' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-088f095c-48ef-42cb-ab7b-fc9393a2a492' class='xr-var-data-in' type='checkbox'><label for='data-088f095c-48ef-42cb-ab7b-fc9393a2a492' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><table>\n",
+       "    <tr>\n",
+       "        <td>\n",
+       "            <table style=\"border-collapse: collapse;\">\n",
+       "                <thead>\n",
+       "                    <tr>\n",
+       "                        <td> </td>\n",
+       "                        <th> Array </th>\n",
+       "                        <th> Chunk </th>\n",
+       "                    </tr>\n",
+       "                </thead>\n",
+       "                <tbody>\n",
+       "                    \n",
+       "                    <tr>\n",
+       "                        <th> Bytes </th>\n",
+       "                        <td> 3.59 MiB </td>\n",
+       "                        <td> 330.75 kiB </td>\n",
+       "                    </tr>\n",
+       "                    \n",
+       "                    <tr>\n",
+       "                        <th> Shape </th>\n",
+       "                        <td> (5, 5, 224, 224, 3) </td>\n",
+       "                        <td> (3, 3, 112, 112, 3) </td>\n",
+       "                    </tr>\n",
+       "                    <tr>\n",
+       "                        <th> Dask graph </th>\n",
+       "                        <td colspan=\"2\"> 16 chunks in 2 graph layers </td>\n",
+       "                    </tr>\n",
+       "                    <tr>\n",
+       "                        <th> Data type </th>\n",
+       "                        <td colspan=\"2\"> uint8 numpy.ndarray </td>\n",
+       "                    </tr>\n",
+       "                </tbody>\n",
+       "            </table>\n",
+       "        </td>\n",
+       "        <td>\n",
+       "        <svg width=\"345\" height=\"240\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
+       "\n",
+       "  <!-- Horizontal lines -->\n",
+       "  <line x1=\"0\" y1=\"0\" x2=\"29\" y2=\"0\" style=\"stroke-width:2\" />\n",
+       "  <line x1=\"0\" y1=\"17\" x2=\"29\" y2=\"17\" />\n",
+       "  <line x1=\"0\" y1=\"29\" x2=\"29\" y2=\"29\" style=\"stroke-width:2\" />\n",
+       "\n",
+       "  <!-- Vertical lines -->\n",
+       "  <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"29\" style=\"stroke-width:2\" />\n",
+       "  <line x1=\"17\" y1=\"0\" x2=\"17\" y2=\"29\" />\n",
+       "  <line x1=\"29\" y1=\"0\" x2=\"29\" y2=\"29\" style=\"stroke-width:2\" />\n",
+       "\n",
+       "  <!-- Colored Rectangle -->\n",
+       "  <polygon points=\"0.0,0.0 29.654862634566733,0.0 29.654862634566733,29.654862634566733 0.0,29.654862634566733\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
+       "\n",
+       "  <!-- Text -->\n",
+       "  <text x=\"14.827431\" y=\"49.654863\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >5</text>\n",
+       "  <text x=\"49.654863\" y=\"14.827431\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,49.654863,14.827431)\">5</text>\n",
+       "\n",
+       "\n",
+       "  <!-- Horizontal lines -->\n",
+       "  <line x1=\"99\" y1=\"0\" x2=\"169\" y2=\"70\" style=\"stroke-width:2\" />\n",
+       "  <line x1=\"99\" y1=\"60\" x2=\"169\" y2=\"130\" />\n",
+       "  <line x1=\"99\" y1=\"120\" x2=\"169\" y2=\"190\" style=\"stroke-width:2\" />\n",
+       "\n",
+       "  <!-- Vertical lines -->\n",
+       "  <line x1=\"99\" y1=\"0\" x2=\"99\" y2=\"120\" style=\"stroke-width:2\" />\n",
+       "  <line x1=\"134\" y1=\"35\" x2=\"134\" y2=\"155\" />\n",
+       "  <line x1=\"169\" y1=\"70\" x2=\"169\" y2=\"190\" style=\"stroke-width:2\" />\n",
+       "\n",
+       "  <!-- Colored Rectangle -->\n",
+       "  <polygon points=\"99.0,0.0 169.58823529411765,70.58823529411765 169.58823529411765,190.58823529411765 99.0,120.0\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
+       "\n",
+       "  <!-- Horizontal lines -->\n",
+       "  <line x1=\"99\" y1=\"0\" x2=\"125\" y2=\"0\" style=\"stroke-width:2\" />\n",
+       "  <line x1=\"134\" y1=\"35\" x2=\"161\" y2=\"35\" />\n",
+       "  <line x1=\"169\" y1=\"70\" x2=\"196\" y2=\"70\" style=\"stroke-width:2\" />\n",
+       "\n",
+       "  <!-- Vertical lines -->\n",
+       "  <line x1=\"99\" y1=\"0\" x2=\"169\" y2=\"70\" style=\"stroke-width:2\" />\n",
+       "  <line x1=\"125\" y1=\"0\" x2=\"196\" y2=\"70\" style=\"stroke-width:2\" />\n",
+       "\n",
+       "  <!-- Colored Rectangle -->\n",
+       "  <polygon points=\"99.0,0.0 125.86562688004283,0.0 196.45386217416046,70.58823529411765 169.58823529411765,70.58823529411765\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
+       "\n",
+       "  <!-- Horizontal lines -->\n",
+       "  <line x1=\"169\" y1=\"70\" x2=\"196\" y2=\"70\" style=\"stroke-width:2\" />\n",
+       "  <line x1=\"169\" y1=\"130\" x2=\"196\" y2=\"130\" />\n",
+       "  <line x1=\"169\" y1=\"190\" x2=\"196\" y2=\"190\" style=\"stroke-width:2\" />\n",
+       "\n",
+       "  <!-- Vertical lines -->\n",
+       "  <line x1=\"169\" y1=\"70\" x2=\"169\" y2=\"190\" style=\"stroke-width:2\" />\n",
+       "  <line x1=\"196\" y1=\"70\" x2=\"196\" y2=\"190\" style=\"stroke-width:2\" />\n",
+       "\n",
+       "  <!-- Colored Rectangle -->\n",
+       "  <polygon points=\"169.58823529411765,70.58823529411765 196.4538621741605,70.58823529411765 196.4538621741605,190.58823529411765 169.58823529411765,190.58823529411765\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
+       "\n",
+       "  <!-- Text -->\n",
+       "  <text x=\"183.021049\" y=\"210.588235\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >3</text>\n",
+       "  <text x=\"216.453862\" y=\"130.588235\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,216.453862,130.588235)\">224</text>\n",
+       "  <text x=\"124.294118\" y=\"175.294118\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(45,124.294118,175.294118)\">224</text>\n",
+       "</svg>\n",
+       "        </td>\n",
+       "    </tr>\n",
+       "</table></div></li><li class='xr-var-item'><div class='xr-var-name'><span>label</span></div><div class='xr-var-dims'>(lat_lag, lon_lag)</div><div class='xr-var-dtype'>int8</div><div class='xr-var-preview xr-preview'>dask.array&lt;chunksize=(5, 5), meta=np.ndarray&gt;</div><input id='attrs-918b7b2e-0f34-4380-9d34-cd7c60f198bf' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-918b7b2e-0f34-4380-9d34-cd7c60f198bf' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-8e555474-8832-419f-84b9-a369c361b166' class='xr-var-data-in' type='checkbox'><label for='data-8e555474-8832-419f-84b9-a369c361b166' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><table>\n",
+       "    <tr>\n",
+       "        <td>\n",
+       "            <table style=\"border-collapse: collapse;\">\n",
+       "                <thead>\n",
+       "                    <tr>\n",
+       "                        <td> </td>\n",
+       "                        <th> Array </th>\n",
+       "                        <th> Chunk </th>\n",
+       "                    </tr>\n",
+       "                </thead>\n",
+       "                <tbody>\n",
+       "                    \n",
+       "                    <tr>\n",
+       "                        <th> Bytes </th>\n",
+       "                        <td> 25 B </td>\n",
+       "                        <td> 25 B </td>\n",
+       "                    </tr>\n",
+       "                    \n",
+       "                    <tr>\n",
+       "                        <th> Shape </th>\n",
+       "                        <td> (5, 5) </td>\n",
+       "                        <td> (5, 5) </td>\n",
+       "                    </tr>\n",
+       "                    <tr>\n",
+       "                        <th> Dask graph </th>\n",
+       "                        <td colspan=\"2\"> 1 chunks in 2 graph layers </td>\n",
+       "                    </tr>\n",
+       "                    <tr>\n",
+       "                        <th> Data type </th>\n",
+       "                        <td colspan=\"2\"> int8 numpy.ndarray </td>\n",
+       "                    </tr>\n",
+       "                </tbody>\n",
+       "            </table>\n",
+       "        </td>\n",
+       "        <td>\n",
+       "        <svg width=\"170\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
+       "\n",
+       "  <!-- Horizontal lines -->\n",
+       "  <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
+       "  <line x1=\"0\" y1=\"120\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
+       "\n",
+       "  <!-- Vertical lines -->\n",
+       "  <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
+       "  <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
+       "\n",
+       "  <!-- Colored Rectangle -->\n",
+       "  <polygon points=\"0.0,0.0 120.0,0.0 120.0,120.0 0.0,120.0\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
+       "\n",
+       "  <!-- Text -->\n",
+       "  <text x=\"60.000000\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >5</text>\n",
+       "  <text x=\"140.000000\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,60.000000)\">5</text>\n",
+       "</svg>\n",
+       "        </td>\n",
+       "    </tr>\n",
+       "</table></div></li></ul></div></li><li class='xr-section-item'><input id='section-c057ed3e-fe45-4c87-bf69-59ca33515c7b' class='xr-section-summary-in' type='checkbox'  ><label for='section-c057ed3e-fe45-4c87-bf69-59ca33515c7b' class='xr-section-summary' >Indexes: <span>(5)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-index-name'><div>channel</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-3774ca2d-2027-4bf1-bc6d-ffb02b09f1d4' class='xr-index-data-in' type='checkbox'/><label for='index-3774ca2d-2027-4bf1-bc6d-ffb02b09f1d4' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Index([0, 1, 2], dtype=&#x27;uint8&#x27;, name=&#x27;channel&#x27;))</pre></div></li><li class='xr-var-item'><div class='xr-index-name'><div>col</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-932d38c1-1f2b-4883-a525-5e8d7940b032' class='xr-index-data-in' type='checkbox'/><label for='index-932d38c1-1f2b-4883-a525-5e8d7940b032' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,\n",
+       "       ...\n",
+       "       214, 215, 216, 217, 218, 219, 220, 221, 222, 223],\n",
+       "      dtype=&#x27;uint8&#x27;, name=&#x27;col&#x27;, length=224))</pre></div></li><li class='xr-var-item'><div class='xr-index-name'><div>lat_lag</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-e37519a4-6f5b-41fd-aeb8-1de104526072' class='xr-index-data-in' type='checkbox'/><label for='index-e37519a4-6f5b-41fd-aeb8-1de104526072' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Index([-2, -1, 0, 1, 2], dtype=&#x27;int8&#x27;, name=&#x27;lat_lag&#x27;))</pre></div></li><li class='xr-var-item'><div class='xr-index-name'><div>lon_lag</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-23305f57-40de-4ccf-ba44-ca850e0a2613' class='xr-index-data-in' type='checkbox'/><label for='index-23305f57-40de-4ccf-ba44-ca850e0a2613' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Index([-2, -1, 0, 1, 2], dtype=&#x27;int8&#x27;, name=&#x27;lon_lag&#x27;))</pre></div></li><li class='xr-var-item'><div class='xr-index-name'><div>row</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-35fad5d4-6756-4140-8662-293459812367' class='xr-index-data-in' type='checkbox'/><label for='index-35fad5d4-6756-4140-8662-293459812367' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,\n",
+       "       ...\n",
+       "       214, 215, 216, 217, 218, 219, 220, 221, 222, 223],\n",
+       "      dtype=&#x27;uint8&#x27;, name=&#x27;row&#x27;, length=224))</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-6f36e9f3-7670-46b8-8d4b-436911524035' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-6f36e9f3-7670-46b8-8d4b-436911524035' class='xr-section-summary'  title='Expand/collapse section'>Attributes: <span>(0)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><dl class='xr-attrs'></dl></div></li></ul></div></div>"
+      ],
+      "text/plain": [
+       "<xarray.Dataset>\n",
+       "Dimensions:  (channel: 3, col: 224, lat_lag: 5, lon_lag: 5, row: 224)\n",
+       "Coordinates:\n",
+       "  * channel  (channel) uint8 0 1 2\n",
+       "  * col      (col) uint8 0 1 2 3 4 5 6 7 8 ... 216 217 218 219 220 221 222 223\n",
+       "    lat      float64 ...\n",
+       "  * lat_lag  (lat_lag) int8 -2 -1 0 1 2\n",
+       "    lon      float64 ...\n",
+       "  * lon_lag  (lon_lag) int8 -2 -1 0 1 2\n",
+       "  * row      (row) uint8 0 1 2 3 4 5 6 7 8 ... 216 217 218 219 220 221 222 223\n",
+       "Data variables:\n",
+       "    data     (lat_lag, lon_lag, row, col, channel) uint8 dask.array<chunksize=(3, 3, 112, 112, 3), meta=np.ndarray>\n",
+       "    label    (lat_lag, lon_lag) int8 dask.array<chunksize=(5, 5), meta=np.ndarray>"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "base_path = \"/home/patel_zeel/bkdb/bangladesh_pnas_pred/team1\"\n",
+    "xr.open_zarr(join(base_path, \"21.11,92.18.zarr\"), consolidated=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class XarrayDataset(Dataset):\n",
+    "    def __init__(self, path, max_files):\n",
+    "        self.base_path = path\n",
+    "        self.all_files = glob(join(path, \"*.zarr\"))[:max_files]\n",
+    "        self.all_files.sort()\n",
+    "        self.lat_lags = [-2, -1, 0, 1, 2]\n",
+    "        self.lon_lags = [-2, -1, 0, 1, 2]\n",
+    "        \n",
+    "    def __len__(self):\n",
+    "        return len(self.all_files) * 25\n",
+    "    \n",
+    "    def __getitem__(self, idx):\n",
+    "        file_idx = idx // 25\n",
+    "        local_idx = idx % 25\n",
+    "        lat_lag = self.lat_lags[local_idx // 5]\n",
+    "        lon_lag = self.lon_lags[local_idx % 5]\n",
+    "        \n",
+    "        with xr.open_zarr(self.all_files[file_idx], consolidated=False) as ds:\n",
+    "            img =  ds.isel(lat_lag=lat_lag, lon_lag=lon_lag)['data']\n",
+    "            # swap dims to make it [\"channel\", \"row\", \"col\"]\n",
+    "            img = img.transpose(\"channel\", \"row\", \"col\").values\n",
+    "            return img.astype(np.float32) / 255"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def process_it(dataset, batch_size, num_workers):\n",
+    "    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, pin_memory_device='cuda', prefetch_factor=num_workers//2)\n",
+    "\n",
+    "    model = ViTClassifier(vit_b_16, None, 2).to('cuda')\n",
+    "    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)\n",
+    "\n",
+    "    pbar = tqdm(dataloader)\n",
+    "\n",
+    "    train_init = time()\n",
+    "    iter_times = []\n",
+    "    for batch in pbar:\n",
+    "        init = time()\n",
+    "        optimizer.zero_grad()\n",
+    "        out = model(batch.to('cuda'))\n",
+    "        loss = nn.CrossEntropyLoss()(out, torch.randint(0, 2, (batch.shape[0],)).to('cuda'))\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "        time_taken = time() - init\n",
+    "        pbar.set_description(f\"Time: {time_taken:.4f}\")\n",
+    "        iter_times.append(time_taken)\n",
+    "        \n",
+    "    total_time = time() - train_init\n",
+    "    print(f\"Average Iteration Processing Time: {np.mean(iter_times):.4f} +- {np.std(iter_times):.4f}\")\n",
+    "    print(f\"Total time for all iterations: {np.sum(iter_times):.4f}\")\n",
+    "    print(f\"Total Wall Time per iteration: {total_time / len(dataloader):.4f}\")\n",
+    "    print(f\"Total Wall Time: {total_time:.4f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Global config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "max_files = 500"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Time: 1.5727: 100%|██████████| 49/49 [01:27<00:00,  1.78s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Average Iteration Processing Time: 1.6474 +- 0.2618\n",
+      "Total time for all iterations: 80.7246\n",
+      "Total Wall Time per iteration: 1.7799\n",
+      "Total Wall Time: 87.2134\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "batch_size = 256\n",
+    "num_workers = 32\n",
+    "\n",
+    "dataset = XarrayDataset(base_path, max_files=max_files)\n",
+    "process_it(dataset, batch_size, num_workers)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Time: 2.6731: 100%|██████████| 25/25 [01:32<00:00,  3.69s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Average Iteration Processing Time: 3.1956 +- 0.3949\n",
+      "Total time for all iterations: 79.8897\n",
+      "Total Wall Time per iteration: 3.6910\n",
+      "Total Wall Time: 92.2762\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "batch_size = 512\n",
+    "num_workers = 16\n",
+    "\n",
+    "dataset = XarrayDataset(base_path, max_files=max_files)\n",
+    "process_it(dataset, batch_size, num_workers)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Time: 2.6726: 100%|██████████| 25/25 [01:32<00:00,  3.69s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Average Iteration Processing Time: 3.1938 +- 0.4043\n",
+      "Total time for all iterations: 79.8451\n",
+      "Total Wall Time per iteration: 3.6908\n",
+      "Total Wall Time: 92.2689\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "batch_size = 512\n",
+    "num_workers = 32\n",
+    "\n",
+    "dataset = XarrayDataset(base_path, max_files=max_files)\n",
+    "process_it(dataset, batch_size, num_workers)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Time: 0.8377:   9%|▉         | 9/98 [00:11<01:19,  1.12it/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Time: 0.7455: 100%|██████████| 98/98 [01:25<00:00,  1.15it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Average Iteration Processing Time: 0.8269 +- 0.0551\n",
+      "Total time for all iterations: 81.0315\n",
+      "Total Wall Time per iteration: 0.8716\n",
+      "Total Wall Time: 85.4156\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "batch_size = 128\n",
+    "num_workers = 32\n",
+    "\n",
+    "dataset = XarrayDataset(base_path, max_files=max_files)\n",
+    "process_it(dataset, batch_size, num_workers)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Is .nc better than zarr?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.8G\t/home/patel_zeel/bkdb/bangladesh_pnas_pred/team1\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "os.system(f\"du -sh {base_path}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  0%|          | 0/1501 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 1501/1501 [00:24<00:00, 62.47it/s] \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "5.3G\t/tmp/nc_check_uncompressed\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "save_path = \"/tmp/nc_check_uncompressed\"\n",
+    "os.makedirs(save_path, exist_ok=True)\n",
+    "files = []\n",
+    "def zarr_to_nc(file):\n",
+    "    with xr.open_zarr(file, consolidated=False) as ds:\n",
+    "        ds.to_netcdf(join(save_path, file.split(\"/\")[-1].replace(\".zarr\", \".nc\")))\n",
+    "\n",
+    "_ = Parallel(n_jobs=32)(delayed(zarr_to_nc)(file) for file in tqdm(glob(join(base_path, \"*.zarr\"))))\n",
+    "\n",
+    "os.system(f\"du -sh {save_path}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 1501/1501 [00:04<00:00, 311.18it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.8G\t/tmp/nc_check_compressed\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "save_path = \"/tmp/nc_check_compressed\"\n",
+    "os.system(f\"rm -rf {save_path}\")\n",
+    "os.makedirs(save_path, exist_ok=True)\n",
+    "\n",
+    "encoding = {var: {\"zlib\": True, \"complevel\": 1} for var in [\"data\"]}\n",
+    "\n",
+    "files = []\n",
+    "def zarr_to_nc(file):\n",
+    "    with xr.open_zarr(file, consolidated=False) as ds:\n",
+    "        ds.to_netcdf(join(save_path, file.split(\"/\")[-1].replace(\".zarr\", \".nc\")), encoding=encoding)\n",
+    "\n",
+    "_ = Parallel(n_jobs=32)(delayed(zarr_to_nc)(file) for file in tqdm(glob(join(base_path, \"*.zarr\"))))\n",
+    "\n",
+    "os.system(f\"du -sh {save_path}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class XarrayDatasetWithNC(Dataset):\n",
+    "    def __init__(self, path, max_files):\n",
+    "        self.base_path = path\n",
+    "        self.all_files = glob(join(path, \"*.nc\"))[:max_files]\n",
+    "        self.all_files.sort()\n",
+    "        self.all_ds = [xr.open_dataset(file) for file in tqdm(self.all_files)]\n",
+    "        self.lat_lags = [-2, -1, 0, 1, 2]\n",
+    "        self.lon_lags = [-2, -1, 0, 1, 2]\n",
+    "        \n",
+    "    def __len__(self):\n",
+    "        return len(self.all_files) * 25\n",
+    "    \n",
+    "    def __getitem__(self, idx):\n",
+    "        file_idx = idx // 25\n",
+    "        local_idx = idx % 25\n",
+    "        lat_lag = self.lat_lags[local_idx // 5]\n",
+    "        lon_lag = self.lon_lags[local_idx % 5]\n",
+    "        \n",
+    "        ds = self.all_ds[file_idx]\n",
+    "        img =  ds.isel(lat_lag=lat_lag, lon_lag=lon_lag)['data'].values\n",
+    "        return torch.tensor(np.einsum(\"hwc->chw\", img).astype(np.float32) / 255)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nc_path = \"/tmp/nc_check_compressed\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 500/500 [00:02<00:00, 246.27it/s]\n",
+      "Time: 0.7414: 100%|██████████| 98/98 [01:25<00:00,  1.15it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Average Iteration Processing Time: 0.8260 +- 0.0530\n",
+      "Total time for all iterations: 80.9527\n",
+      "Total Wall Time per iteration: 0.8725\n",
+      "Total Wall Time: 85.5034\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "batch_size = 128\n",
+    "num_workers = 32\n",
+    "\n",
+    "dataset = XarrayDatasetWithNC(nc_path, max_files=max_files)\n",
+    "process_it(dataset, batch_size, num_workers)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Additional experiments"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Time to process 60000 images:  6.793048000000001 minutes\n"
+     ]
+    }
+   ],
+   "source": [
+    "n_images = 60000\n",
+    "t = 84.9131/500/25 * n_images\n",
+    "print(f\"Time to process {n_images} images: \", t/60, \"minutes\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 1501/1501 [02:44<00:00,  9.13it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "files = glob(join(base_path, \"*.zarr\"))\n",
+    "data_tensors = []\n",
+    "for file in tqdm(files):\n",
+    "    with xr.open_zarr(file, consolidated=False) as ds:\n",
+    "        # print(ds['data'].values.reshape(-1, 224, 224, 3))\n",
+    "        data_tensors.append(torch.tensor(np.einsum(\"nhwc->nchw\", ds['data'].values.reshape(-1, 224, 224, 3)).astype(np.float16) / 255))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([37525, 3, 224, 224])"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "all_in_one = torch.concat(data_tensors, dim=0)\n",
+    "all_in_one.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all_in_one = all_in_one.to('cuda')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Insights"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* GPU Memory consumption is `17776MiB / 81920MiB` for batch size 128 for ViT model\n",
+    "* Uploading torch.Size([37525, 3, 224, 224]) of float32 data to GPU takes `22054MiB / 81920MiB` of GPU Memory. Same data with float16 takes `11202MiB / 81920MiB` of GPU Memory.\n",
+    "* It seems `.nc` or `.zarr` are not making much difference in terms of time and/or memory."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}