diff --git a/.nojekyll b/.nojekyll index 914a0ea..652c8cf 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -8547ea76 \ No newline at end of file +6462097d \ No newline at end of file diff --git a/index.html b/index.html index b65ca60..3ac7aab 100644 --- a/index.html +++ b/index.html @@ -158,7 +158,7 @@

blog

+
Categories
All (42)
Academic (2)
Docker (1)
GP (2)
Gcloud (1)
GitHub (2)
ML (27)
ML, GP (2)
Python (1)
macOS (1)
@@ -171,7 +171,7 @@
Categories
-
+
@@ -191,7 +191,42 @@

-
+
+
+

+
+ + +
+
@@ -226,7 +261,7 @@

-
+
@@ -261,7 +296,7 @@

-
+
@@ -296,7 +331,7 @@

-
+
@@ -331,7 +366,7 @@

-
+
@@ -366,7 +401,7 @@

-
+
@@ -401,7 +436,7 @@

-
+
@@ -436,7 +471,7 @@

-
+
@@ -471,7 +506,7 @@

-
+
@@ -506,7 +541,7 @@

-
+
@@ -541,7 +576,7 @@

-
+
@@ -576,7 +611,7 @@

-
+
@@ -611,7 +646,7 @@

-
+
@@ -649,7 +684,7 @@

-
+
@@ -684,7 +719,7 @@

-
+
@@ -719,7 +754,7 @@

-
+
@@ -754,7 +789,7 @@

-
+
@@ -789,7 +824,7 @@

-
+
@@ -824,7 +859,7 @@

-
+
@@ -859,7 +894,7 @@

-
+
@@ -894,7 +929,7 @@

-
+
@@ -929,7 +964,7 @@

-
+
@@ -964,7 +999,7 @@

-
+
@@ -999,9 +1034,9 @@

-
+
-
+
@@ -1069,7 +1104,7 @@

-
+
@@ -1104,7 +1139,7 @@

-
+
@@ -1139,7 +1174,7 @@

-
+
@@ -1174,7 +1209,7 @@

-
+
@@ -1209,7 +1244,7 @@

-
+
@@ -1244,7 +1279,7 @@

-
+
@@ -1279,7 +1314,7 @@

-
+
@@ -1314,7 +1349,7 @@

-
+
@@ -1349,7 +1384,7 @@

-
+
@@ -1384,7 +1419,7 @@

-
+
@@ -1412,7 +1447,7 @@

-
+
@@ -1447,7 +1482,7 @@

-
+
@@ -1475,7 +1510,7 @@

-
+
@@ -1510,7 +1545,7 @@

-
+
@@ -1538,7 +1573,7 @@

-
+
diff --git a/listings.json b/listings.json index 79777c5..baa6ff5 100644 --- a/listings.json +++ b/listings.json @@ -3,6 +3,7 @@ "listing": "/index.html", "items": [ "/posts/2023-07-03-Brick_Kilns_identification.html", + "/posts/2023-11-28-learnings_from_brick_kiln_project.html", "/posts/2023-11-26-Torch-DataLoaders.html", "/posts/2023-08-31-bayesian-gaussian-basis-regression.html", "/posts/2023-07-26-PurpleAir.html", diff --git a/posts/2023-11-28-learnings_from_brick_kiln_project.html b/posts/2023-11-28-learnings_from_brick_kiln_project.html new file mode 100644 index 0000000..f7d82ee --- /dev/null +++ b/posts/2023-11-28-learnings_from_brick_kiln_project.html @@ -0,0 +1,417 @@ + + + + + + + + + + + + +blog - Learnings from the Brick Kiln Project + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+
+
+

Learnings from the Brick Kiln Project

+
+
+ Learnings from the Brick Kiln Project +
+
+
+
ML
+
+
+
+ + +
+ +
+
Author
+
+

Zeel B Patel

+
+
+ +
+
Published
+
+

November 28, 2023

+
+
+ + +
+ + +
+ + + + +
+ + + + +
+

Points

+
+

Labeling

+
    +
  • Labeling is the most important and effort-taking part of the project. It is also most confusing part if not done properly for the images. For example, we needed to make this decision for the images of the brick kilns: “If brick kiln firing chamber is visible fully or partially at a level where a human would be able to identify it as a brick kiln, we mark it as a brick kiln”.
  • +
  • To ensure good quality of labels, one should allow a small number of images to be labeled by multiple people and then compare the labels. This will help in identifying the mistakes in the labeling process and also help in improving the labeling instructions.
  • +
+ + +
+
+ +
+ +
+ + + + \ No newline at end of file diff --git a/search.json b/search.json index fdfd736..322b7c9 100644 --- a/search.json +++ b/search.json @@ -371,221 +371,235 @@ "text": "Predict\n\nloc, scale = model.apply(params, x, y, x_test)\nlower, upper = loc - 2*scale, loc + 2*scale\n\nplt.scatter(x, y, label='train', alpha=0.5)\nplt.scatter(x_test, y_test, label='test', alpha=0.5)\nplt.plot(x_test, loc);\nplt.fill_between(x_test.flatten(), lower, upper, alpha=0.4);\nplt.ylim(-5, 5);" }, { - "objectID": "posts/2021-10-12-sparsegps.html", - "href": "posts/2021-10-12-sparsegps.html", - "title": "SparseGPs in Stheno", + "objectID": "posts/2023-11-28-learnings_from_brick_kiln_project.html", + "href": "posts/2023-11-28-learnings_from_brick_kiln_project.html", + "title": "Learnings from the Brick Kiln Project", "section": "", - "text": "# !pip install -U regdata\n\n\nimport regdata as rd\nimport torch\nimport matplotlib.pyplot as plt\nfrom matplotlib.animation import FuncAnimation\nfrom matplotlib import rc\nimport wbml.out as out\nfrom wbml.plot import tweak\n\nfrom stheno import B, GP, EQ, PseudoObsVFE, PseudoObsFITC\nfrom varz.torch import Vars, minimise_l_bfgs_b, parametrised, Positive\nimport lab.torch" + "text": "Labeling is the most important and effort-taking part of the project. It is also most confusing part if not done properly for the images. For example, we needed to make this decision for the images of the brick kilns: “If brick kiln firing chamber is visible fully or partially at a level where a human would be able to identify it as a brick kiln, we mark it as a brick kiln”.\nTo ensure good quality of labels, one should allow a small number of images to be labeled by multiple people and then compare the labels. This will help in identifying the mistakes in the labeling process and also help in improving the labeling instructions." }, { - "objectID": "posts/2021-10-12-sparsegps.html#imports", - "href": "posts/2021-10-12-sparsegps.html#imports", - "title": "SparseGPs in Stheno", + "objectID": "posts/2023-11-28-learnings_from_brick_kiln_project.html#points", + "href": "posts/2023-11-28-learnings_from_brick_kiln_project.html#points", + "title": "Learnings from the Brick Kiln Project", "section": "", - "text": "# !pip install -U regdata\n\n\nimport regdata as rd\nimport torch\nimport matplotlib.pyplot as plt\nfrom matplotlib.animation import FuncAnimation\nfrom matplotlib import rc\nimport wbml.out as out\nfrom wbml.plot import tweak\n\nfrom stheno import B, GP, EQ, PseudoObsVFE, PseudoObsFITC\nfrom varz.torch import Vars, minimise_l_bfgs_b, parametrised, Positive\nimport lab.torch" + "text": "Labeling is the most important and effort-taking part of the project. It is also most confusing part if not done properly for the images. For example, we needed to make this decision for the images of the brick kilns: “If brick kiln firing chamber is visible fully or partially at a level where a human would be able to identify it as a brick kiln, we mark it as a brick kiln”.\nTo ensure good quality of labels, one should allow a small number of images to be labeled by multiple people and then compare the labels. This will help in identifying the mistakes in the labeling process and also help in improving the labeling instructions." }, { - "objectID": "posts/2021-10-12-sparsegps.html#data-preperation", - "href": "posts/2021-10-12-sparsegps.html#data-preperation", - "title": "SparseGPs in Stheno", - "section": "Data preperation", - "text": "Data preperation\n\n# Define points to predict at.\nx = B.linspace(0, 10, 100)\nx_obs = B.linspace(0, 7, 50_000)\nx_ind = B.linspace(0, 10, 20)\n\n# Construct a prior.\nf = GP(EQ().periodic(2 * B.pi))\n\n# Sample a true, underlying function and observations.\nf_true = B.sin(x)\ny_obs = B.sin(x_obs) + B.sqrt(0.5) * B.randn(*x_obs.shape)" + "objectID": "posts/2023-07-01-climate-modeling-with-SpecialGP.html", + "href": "posts/2023-07-01-climate-modeling-with-SpecialGP.html", + "title": "Climate Modeling with GPs", + "section": "", + "text": "import os\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n\nimport pyproj\nimport numpy as np\nimport xarray as xr\n\nfrom skgpytorch.models import GPRegression\n\nimport matplotlib.pyplot as plt\n\n\n# def haversine(lon1, lat1, lon2, lat2):\n# \"\"\"\n# Calculate the great circle distance in kilometers between two points \n# on the earth (specified in decimal degrees)\n# \"\"\"\n# # convert decimal degrees to radians \n# lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])\n\n# # haversine formula \n# dlon = lon2 - lon1 \n# dlat = lat2 - lat1 \n# a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2\n# c = 2 * np.arcsin(np.sqrt(a)) \n# r = 6371 # Radius of earth in kilometers. Use 3956 for miles. Determines return value units.\n# return c * r\n\n# def new_coords(lat1, long1):\n# new_lat1 = haversine(0, 0, 0, lat1)\n# new_long1 = haversine(0, 0, long1, 0)\n# return new_lat1, new_long1\n\ndef lat_long_to_cartesian(latitude, longitude):\n # Convert latitude and longitude to radians\n phi = np.radians(latitude)\n lam = np.radians(longitude)\n\n # Constants for WGS 84 ellipsoid\n a = 6378137.0 # equatorial radius in meters\n e = 0.0818191908426 # eccentricity\n\n # Calculate Earth's radius at the given latitude\n R = a / np.sqrt(1 - (e ** 2) * (np.sin(phi) ** 2))\n\n # Convert to Cartesian coordinates\n X = R * np.sin(lam)\n Y = R * np.tan(phi)\n\n return X, Y\n\ndef wgs84_coords(lat, lon): \n # Define coordinate systems\n wgs84 = pyproj.CRS.from_epsg(4326) # WGS 84 lat-long system\n utm_zone_32n = pyproj.CRS.from_string(\"+proj=utm +zone=32 +ellps=WGS84 +datum=WGS84 +units=m +no_defs\")\n\n # Create a transformer object\n transformer = pyproj.Transformer.from_crs(wgs84, utm_zone_32n)\n\n # Convert lat-long coordinates to UTM coordinates\n utm_easting, utm_northing = transformer.transform(lon, lat)\n\n return utm_northing, utm_easting\n\n# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n\n# This source code is licensed under the license found in the\n# LICENSE file in the root directory of this source tree.\n# --------------------------------------------------------\n# Position embedding utils\n# --------------------------------------------------------\n\n\n# --------------------------------------------------------\n# 2D sine-cosine position embedding\n# References:\n# Transformer: https://github.com/tensorflow/models/blob/master/official/nlp/transformer/model_utils.py\n# MoCo v3: https://github.com/facebookresearch/moco-v3\n# --------------------------------------------------------\ndef get_2d_sincos_pos_embed(embed_dim, grid_size_h, grid_size_w, cls_token=False):\n \"\"\"\n grid_size: int of the grid height and width\n return:\n pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)\n \"\"\"\n grid_h = np.arange(grid_size_h, dtype=np.float32)\n grid_w = np.arange(grid_size_w, dtype=np.float32)\n grid = np.meshgrid(grid_w, grid_h) # here w goes first\n grid = np.stack(grid, axis=0)\n\n grid = grid.reshape([2, 1, grid_size_h, grid_size_w])\n pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)\n if cls_token:\n pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0)\n return pos_embed\n\n\ndef get_2d_sincos_pos_embed_from_grid(embed_dim, grid):\n assert embed_dim % 2 == 0\n\n # use half of dimensions to encode grid_h\n emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0]) # (H*W, D/2)\n emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1]) # (H*W, D/2)\n\n emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D)\n return emb\n\n\ndef get_1d_sincos_pos_embed_from_grid(embed_dim, pos):\n \"\"\"\n embed_dim: output dimension for each position\n pos: a list of positions to be encoded: size (M,)\n out: (M, D)\n \"\"\"\n assert embed_dim % 2 == 0\n omega = np.arange(embed_dim // 2, dtype=np.float)\n omega /= embed_dim / 2.0\n omega = 1.0 / 10000**omega # (D/2,)\n\n pos = pos.reshape(-1) # (M,)\n out = np.einsum(\"m,d->md\", pos, omega) # (M, D/2), outer product\n\n emb_sin = np.sin(out) # (M, D/2)\n emb_cos = np.cos(out) # (M, D/2)\n\n emb = np.concatenate([emb_sin, emb_cos], axis=1) # (M, D)\n return emb\n\n\n# --------------------------------------------------------\n# Interpolate position embeddings for high-resolution\n# References:\n# DeiT: https://github.com/facebookresearch/deit\n# --------------------------------------------------------\ndef interpolate_pos_embed(model, checkpoint_model, new_size=(64, 128)):\n if \"net.pos_embed\" in checkpoint_model:\n pos_embed_checkpoint = checkpoint_model[\"net.pos_embed\"]\n embedding_size = pos_embed_checkpoint.shape[-1]\n orig_num_patches = pos_embed_checkpoint.shape[-2]\n patch_size = model.patch_size\n w_h_ratio = 2\n orig_h = int((orig_num_patches // w_h_ratio) ** 0.5)\n orig_w = w_h_ratio * orig_h\n orig_size = (orig_h, orig_w)\n new_size = (new_size[0] // patch_size, new_size[1] // patch_size)\n # print (orig_size)\n # print (new_size)\n if orig_size[0] != new_size[0]:\n print(\"Interpolate PEs from %dx%d to %dx%d\" % (orig_size[0], orig_size[1], new_size[0], new_size[1]))\n pos_tokens = pos_embed_checkpoint.reshape(-1, orig_size[0], orig_size[1], embedding_size).permute(\n 0, 3, 1, 2\n )\n new_pos_tokens = torch.nn.functional.interpolate(\n pos_tokens, size=(new_size[0], new_size[1]), mode=\"bicubic\", align_corners=False\n )\n new_pos_tokens = new_pos_tokens.permute(0, 2, 3, 1).flatten(1, 2)\n checkpoint_model[\"net.pos_embed\"] = new_pos_tokens\n\n\ndef interpolate_channel_embed(checkpoint_model, new_len):\n if \"net.channel_embed\" in checkpoint_model:\n channel_embed_checkpoint = checkpoint_model[\"net.channel_embed\"]\n old_len = channel_embed_checkpoint.shape[1]\n if new_len <= old_len:\n checkpoint_model[\"net.channel_embed\"] = channel_embed_checkpoint[:, :new_len]\n\n\ndef SIREN(input_dim, output_dim, features, activation_scale, dropout):\n model = tf.keras.Sequential()\n model.add(layers.Dense(features[0], input_shape=(input_dim,), kernel_initializer=initializers.RandomUniform(-1 / input_dim, 1 / input_dim), activation=tf.sin))\n for i in range(1, len(features)):\n model.add(layers.Dense(features[i], kernel_initializer=initializers.RandomUniform(-np.sqrt(6 / features[i-1]) / activation_scale, np.sqrt(6 / features[i-1]) / activation_scale), activation=tf.sin))\n model.add(layers.Dropout(dropout))\n model.add(layers.Dense(output_dim, kernel_initializer=initializers.RandomUniform(-np.sqrt(6 / features[-1]) / activation_scale, np.sqrt(6 / features[-1]) / activation_scale), activation='linear'))\n return model\n\ndef MLP(input_dim, output_dim, features, activation_scale, dropout):\n model = tf.keras.Sequential()\n model.add(layers.Dense(features[0], input_shape=(input_dim,), activation=activations.relu))\n for i in range(1, len(features)):\n model.add(layers.Dense(features[i], activation=activations.relu))\n model.add(layers.Dropout(dropout))\n model.add(layers.Dense(output_dim, activation='linear'))\n return model\n \ndef ResNet():\n resnet = ResNet50(include_top=False, weights=None, input_shape=(64, 32, 1), pooling='avg')\n model = tf.keras.Sequential()\n model.add(resnet)\n model.add(layers.Dense(2048, activation='relu'))\n model.add(layers.Dense(32768, activation='linear'))\n return model\n\n\ndata5 = xr.open_dataset(\"../data/2m_temperature_2018_5.625deg_Jan.nc\").to_dataframe().reset_index()\ndata1 = xr.open_dataset(\"../data/2m_temperature_2018_1.40625deg_Jan.nc\").to_dataframe().reset_index()\n\n\ndata5.head()\n\n\n\n\n\n\n\n\nlon\nlat\ntime\nt2m\n\n\n\n\n0\n0.0\n-87.1875\n2018-01-01 00:00:00\n250.728180\n\n\n1\n0.0\n-87.1875\n2018-01-01 01:00:00\n250.468552\n\n\n2\n0.0\n-87.1875\n2018-01-01 02:00:00\n250.250931\n\n\n3\n0.0\n-87.1875\n2018-01-01 03:00:00\n250.040314\n\n\n4\n0.0\n-87.1875\n2018-01-01 04:00:00\n249.993790\n\n\n\n\n\n\n\n\ntime_stamp = \"2018-01-01 01:00:00\"\ntrain_df = data5[data5.time == time_stamp]\ntest_df = data1[data1.time == time_stamp]\n\nX = np.stack([train_df.lat.values, train_df.lon.values], axis=1)\ny = train_df[[\"t2m\"]].values\nprint(f\"{X.shape=}, {y.shape=}\")\n\nX_test = np.stack([test_df.lat.values, test_df.lon.values], axis=1)\ny_test = test_df[[\"t2m\"]].values\nprint(f\"{X_test.shape=}, {y_test.shape=}\")\n\nrff = np.random.normal(size=(2, 16)) * 0.01\n# X = np.concatenate([np.sin(X @ rff), np.cos(X @ rff)], axis=1)\n# print(f\"{sin_cos.shape=}\")\n# X = X @ sin_cos\n# X_test = np.concatenate([np.sin(X_test @ rff), np.cos(X_test @ rff)], axis=1)\n\nprint(f\"{X.shape=}, {X_test.shape=}\")\n\nX.shape=(2048, 2), y.shape=(2048, 1)\nX_test.shape=(32768, 2), y_test.shape=(32768, 1)\nX.shape=(2048, 2), X_test.shape=(32768, 2)\n\n\n\nX_max = np.max(X, axis=0, keepdims=True)\nX_min = np.min(X, axis=0, keepdims=True)\n\nX_scaled = (X - X_min) / (X_max - X_min)\nX_test_scaled = (X_test - X_min) / (X_max - X_min)\n\ny_min = np.min(y, axis=0, keepdims=True)\ny_max = np.max(y, axis=0, keepdims=True)\n\ny_scaled = (y - y_min) / (y_max - y_min)\n\n# y_mean = np.mean(y, axis=0, keepdims=True)\n# y_std = np.std(y, axis=0, keepdims=True)\n\n# y_scaled = (y - y_mean) / y_std\n\n\nmodel = MLP(2, 1, [256]*4, 30.0, 0.0)\n# model = ResNet()\nmodel.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), loss='mse')\n\n\nhistory = model.fit(X_scaled, y_scaled, epochs=5000, batch_size=X_scaled.shape[0], verbose=0)\n\n\nplt.plot(history.history['loss']);\n\n\n\n\n\ny_pred = model.predict(X_test_scaled) * (y_max - y_min) + y_min\nplt.imshow(y_pred.reshape(256, 128), origin='lower', extent=[-180, 180, -90, 90], cmap='coolwarm', interpolation=\"none\");\n\n1024/1024 [==============================] - 1s 1ms/step\n\n\n\n\n\n\nplt.imshow(y.reshape(64, 32), origin='lower', extent=[-180, 180, -90, 90], cmap='coolwarm', interpolation=\"none\");\n\n\n\n\n\ndiff = y_pred.reshape(256, 128) - y_test.reshape(256, 128)\nplt.imshow(diff, origin='lower', extent=[-180, 180, -90, 90], cmap='coolwarm', interpolation=\"none\");\nplt.colorbar();\nplt.title(\"Diff\")\n\nText(0.5, 1.0, 'Diff')\n\n\n\n\n\n\n# rmse = np.sqrt(np.mean(np.abs(X_test[:, 0:1])*(y_pred.ravel() - y_test.ravel())**2))/np.mean(y_test.ravel() * np.abs(X_test[:, 0:1]))\nrmse = np.sqrt(np.mean((y_pred.ravel() - y_test.ravel())**2))\nprint(f\"{rmse=}\")\n\nrmse=2.7606046\n\n\n\nmean_bias = np.mean(y_pred.ravel() - y_test.ravel())\nprint(f\"{mean_bias=}\")\n\nmean_bias=0.10866926" }, { - "objectID": "posts/2021-10-12-sparsegps.html#plotting-function", - "href": "posts/2021-10-12-sparsegps.html#plotting-function", - "title": "SparseGPs in Stheno", - "section": "Plotting function", - "text": "Plotting function\n\ndef plot(method):\n if method == 'VFE':\n # Plot result.\n plt.plot(x, f_true, label=\"True\", style=\"test\")\n plt.scatter(\n x_obs,\n y_obs,\n label=\"Observations\",\n style=\"train\",\n c=\"tab:green\",\n alpha=0.35,\n )\n plt.scatter(\n x_ind,\n obs.mu(f.measure)[:, 0],\n label=\"Inducing Points\",\n style=\"train\",\n s=20,\n )\n plt.plot(x, mean, label=\"Prediction\", style=\"pred\")\n plt.fill_between(x, lower, upper, style=\"pred\")\n tweak()\n\n plt.show()\n else:\n # Plot result.\n plt.plot(x, f_true, label=\"True\", style=\"test\")\n plt.scatter(\n x_obs,\n y_obs,\n label=\"Observations\",\n style=\"train\",\n c=\"tab:green\",\n alpha=0.35,\n )\n plt.scatter(\n x_ind,\n B.dense(f_post(x_ind).mean),\n label=\"Inducing Points\",\n style=\"train\",\n s=20,\n )\n plt.plot(x, mean, label=\"Prediction\", style=\"pred\")\n plt.fill_between(x, lower, upper, style=\"pred\")\n tweak()\n\n plt.show()" + "objectID": "posts/2022-01-24-query_by_committee.html", + "href": "posts/2022-01-24-query_by_committee.html", + "title": "Query by Committee", + "section": "", + "text": "# Common imports\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib.animation import FuncAnimation\nfrom matplotlib import rc\n\nplt.style.use('fivethirtyeight')\nrc('animation', html='jshtml')\n\n# Copy the models\nfrom copy import deepcopy\n\n# Sklearn imports\nfrom sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\nfrom sklearn.datasets import make_classification\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import accuracy_score, f1_score\n\n# Entropy function\nfrom scipy.stats import entropy\n\n# Progress helper\nfrom IPython.display import clear_output" }, { - "objectID": "posts/2021-10-12-sparsegps.html#sparse-regression-with-variational-free-energy-vfe-method", - "href": "posts/2021-10-12-sparsegps.html#sparse-regression-with-variational-free-energy-vfe-method", - "title": "SparseGPs in Stheno", - "section": "Sparse regression with Variational Free Energy (VFE) method", - "text": "Sparse regression with Variational Free Energy (VFE) method\n\n# Compute a pseudo-point approximation of the posterior.\nobs = PseudoObsVFE(f(x_ind), (f(x_obs, 0.5), y_obs))\n\n# Compute the ELBO.\nout.kv(\"ELBO\", obs.elbo(f.measure))\n\n# Compute the approximate posterior.\nf_post = f | obs\n\n# Make predictions with the approximate posterior.\nmean, lower, upper = f_post(x, 0.5).marginal_credible_bounds()\nplot('VFE')\n\nELBO: -5.345e+04" + "objectID": "posts/2022-01-24-query_by_committee.html#qbc-by-posterior-sampling", + "href": "posts/2022-01-24-query_by_committee.html#qbc-by-posterior-sampling", + "title": "Query by Committee", + "section": "QBC by posterior sampling", + "text": "QBC by posterior sampling\n\nInteresting fact: For probabilistic models, QBC is similar to uncertainty sampling. How?\n\nDraw \\(k\\) parameter sets from the posterior distribution representing \\(k\\) different models.\nQuery a point which shows maximum disagreement among the points." }, { - "objectID": "posts/2021-10-12-sparsegps.html#sparse-regression-with-fully-independent-training-conditional-fitc-mehod", - "href": "posts/2021-10-12-sparsegps.html#sparse-regression-with-fully-independent-training-conditional-fitc-mehod", - "title": "SparseGPs in Stheno", - "section": "Sparse Regression with Fully Independent Training Conditional (FITC) mehod", - "text": "Sparse Regression with Fully Independent Training Conditional (FITC) mehod\n\n# Compute a pseudo-point approximation of the posterior.\nobs = PseudoObsFITC(f(x_ind), (f(x_obs, 0.5), y_obs))\n\n# Compute the ELBO.\nout.kv(\"ELBO\", obs.elbo(f.measure))\n\n# Compute the approximate posterior.\nf_post = f | obs\n\n# Make predictions with the approximate posterior.\nmean, lower, upper = f_post(x, 0.5).marginal_credible_bounds()\nplot('FITC')\n\nELBO: -5.345e+04" + "objectID": "posts/2022-01-24-query_by_committee.html#an-example-bayesian-linear-regression", + "href": "posts/2022-01-24-query_by_committee.html#an-example-bayesian-linear-regression", + "title": "Query by Committee", + "section": "An example: Bayesian linear regression", + "text": "An example: Bayesian linear regression\n\nnp.random.seed(0)\nN = 10\nX = np.linspace(-1,1,N).reshape(-1,1)\n\nt0 = 3\nt1 = 2\n\ny = X * t1 + t0 + np.random.rand(N,1)\n\nplt.scatter(X, y);\n\n\n\n\n\nAssume a posterior\n\nn_samples = 50\n\nt0_dist_samples = np.random.normal(t0, 0.1, size=n_samples)\nt1_dist_samples = np.random.normal(t1, 1, size=n_samples)\n\n\n\nPlot the models\n\nplt.scatter(X, y)\n\nfor i in range(len(t0_dist_samples)):\n sample_t0 = t0_dist_samples[i]\n sample_t1 = t1_dist_samples[i]\n \n plt.plot(X, X * sample_t1 + sample_t0,alpha=0.1)" }, { - "objectID": "posts/2021-10-12-sparsegps.html#hyperparameter-tuning-noisy-sine-data", - "href": "posts/2021-10-12-sparsegps.html#hyperparameter-tuning-noisy-sine-data", - "title": "SparseGPs in Stheno", - "section": "Hyperparameter tuning (Noisy Sine data)", - "text": "Hyperparameter tuning (Noisy Sine data)\n\ndef model(vs):\n \"\"\"Constuct a model with learnable parameters.\"\"\"\n return vs['variance']*GP(EQ().stretch(vs['length_scale']))\n\n\ntorch.manual_seed(123)\n\ndataObj = rd.SineNoisy(scale_X=False, scale_y=False, return_test=True, backend='torch')\nx_obs, y_obs, x = dataObj.get_data()\n\n\nplt.scatter(x_obs, y_obs, s=2);\n\n\n\n\n\nVFE\n\nvs = Vars(torch.float64)\nvs.positive(name=\"noise\")\nvs.positive(name=\"length_scale\");\nvs.positive(name=\"variance\");\nvs.positive(init=torch.linspace(0.4,0.6,10), shape=(10,), name='x_ind')\nvs.requires_grad(True)\n\noptimizer = torch.optim.Adam(vs.get_latent_vars(), lr=0.1)\nfig, ax = plt.subplots(1,2,figsize=(15,5))\nlosses = []\n\ndef update(i):\n optimizer.zero_grad()\n gp = model(vs)\n obs = PseudoObsVFE(gp(vs['x_ind']), (gp(x_obs, vs['noise']), y_obs))\n loss = -obs.elbo(gp.measure)\n losses.append(loss.item())\n loss.backward()\n optimizer.step()\n \n gp_post = gp | obs\n mean, lower, upper = gp_post(x, vs['noise']).marginal_credible_bounds()\n ind_mean = B.dense(gp_post(vs['x_ind']).mean)\n \n ax[0].cla();ax[1].cla();\n ax[0].scatter(x_obs, y_obs, s=2)\n with torch.no_grad():\n ax[0].plot()\n ax[0].plot(x, B.dense(mean), label='Prediction')\n ax[0].fill_between(x.ravel(), lower, upper, alpha=0.2, label='Uncertainty')\n ax[0].plot(x, dataObj.f(x), label='True')\n ax[0].scatter(vs['x_ind'], ind_mean, label='Inducing points')\n ax[0].set_xlabel('X')\n ax[0].legend()\n \n ax[1].plot(losses, label='loss')\n ax[1].set_xlabel('Iterations')\n ax[1].legend()\n \nanim = FuncAnimation(fig, update, range(50))\nrc('animation', html='jshtml')\nplt.close()\nanim\n\n\n\n\n\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n Once\n \n Loop\n \n Reflect\n \n \n\n\n\n\n\n\n\n\nFITC\n\nvs = Vars(torch.float64)\nvs.positive(name=\"noise\")\nvs.positive(name=\"length_scale\");\nvs.positive(name=\"variance\");\nvs.positive(init=torch.linspace(0.4,0.6,10), shape=(10,), name='x_ind')\nvs.requires_grad(True)\n\noptimizer = torch.optim.Adam(vs.get_latent_vars(), lr=0.1)\nfig, ax = plt.subplots(1,2,figsize=(15,5))\nlosses = []\n\ndef update(i):\n optimizer.zero_grad()\n gp = model(vs)\n obs = PseudoObsFITC(gp(vs['x_ind']), (gp(x_obs, vs['noise']), y_obs))\n loss = -obs.elbo(gp.measure)\n losses.append(loss.item())\n loss.backward()\n optimizer.step()\n \n gp_post = gp | obs\n mean, lower, upper = gp_post(x, vs['noise']).marginal_credible_bounds()\n ind_mean = B.dense(gp_post(vs['x_ind']).mean)\n \n ax[0].cla();ax[1].cla();\n ax[0].scatter(x_obs, y_obs, s=2)\n with torch.no_grad():\n ax[0].plot()\n ax[0].plot(x, B.dense(mean), label='Prediction')\n ax[0].fill_between(x.ravel(), lower, upper, alpha=0.2, label='Uncertainty')\n ax[0].plot(x, dataObj.f(x), label='True')\n ax[0].scatter(vs['x_ind'], ind_mean, label='Inducing points')\n ax[0].set_xlabel('X')\n ax[0].legend()\n \n ax[1].plot(losses, label='loss')\n ax[1].set_xlabel('Iterations')\n ax[1].legend()\n \nanim = FuncAnimation(fig, update, range(50))\nrc('animation', html='jshtml')\nplt.close()\nanim\n\n\n\n\n\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n Once\n \n Loop\n \n Reflect" + "objectID": "posts/2022-01-24-query_by_committee.html#qbc-by-bootstrapping", + "href": "posts/2022-01-24-query_by_committee.html#qbc-by-bootstrapping", + "title": "Query by Committee", + "section": "QBC by bootstrapping", + "text": "QBC by bootstrapping\n\n2 class dataset\n\nX, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, random_state=3, shuffle=True)\n\nplt.figure()\nplt.scatter(X[:,0], X[:,1], c=y);\n\n\n\n\n\n\nFull data fit with RF\n\nmodel = RandomForestClassifier(random_state=0)\nmodel.fit(X, y);\n\nRandomForestClassifier(random_state=0)\n\n\n\n\nVisualize decision boundary\n\ngrid_X1, grid_X2 = np.meshgrid(np.linspace(X[:,0].min()-0.1, X[:,0].max()+0.1, 100), \n np.linspace(X[:,1].min()-0.1, X[:,1].max()+0.1, 100))\n\ngrid_X = [(x1, x2) for x1, x2 in zip(grid_X1.ravel(), grid_X2.ravel())]\n\ngrid_pred = model.predict(grid_X)\n\nplt.figure(figsize=(6,5))\nplt.scatter(X[:,0], X[:,1], c=y);\nplt.contourf(grid_X1, grid_X2, grid_pred.reshape(*grid_X1.shape), alpha=0.2);\n\n\n\n\n\n\nTrain, pool, test split\n\nX_train_pool, X_test, y_train_pool, y_test = train_test_split(X, y, test_size=0.2, random_state=0, stratify=y)\nX_train, X_pool, y_train, y_pool = train_test_split(X_train_pool, y_train_pool, train_size=20, random_state=0)\n\nX_list = [X_train, X_pool, X_test]\ny_list = [y_train, y_pool, y_test]\nt_list = ['Train', 'Pool', 'Test']\n\nfig, ax = plt.subplots(1,3,figsize=(15,4), sharex=True, sharey=True)\nfor i in range(3):\n ax[i].scatter(X_list[i][:,0], X_list[i][:,1], c=y_list[i])\n ax[i].set_title(t_list[i])\n \n\n\n\n\n\n\nFitting a model on initial train data\n\nAL_model = RandomForestClassifier(n_jobs=28, random_state=0)\n\nAL_model.fit(X_train, y_train);\n\nRandomForestClassifier(n_jobs=28, random_state=0)\n\n\n\n\nGet the votes from trees on pool dataset\n\nvotes = np.zeros(shape=(X_pool.shape[0], len(AL_model.estimators_)))\n\nfor learner_idx, learner in enumerate(AL_model.estimators_):\n votes[:, learner_idx] = learner.predict(X_pool)\n\n\nvotes.shape\n\n(780, 100)\n\n\n\nvotes\n\narray([[0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.],\n [1., 1., 1., ..., 0., 1., 1.],\n ...,\n [0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.]])\n\n\n\n\nConvert to probabilities\n\np_vote = np.zeros(shape=(X_pool.shape[0], X_pool.shape[1]))\n\nfor vote_idx, vote in enumerate(votes):\n vote_counter = {0 : (1-vote).sum(), 1 : vote.sum()}\n\n for class_idx, class_label in enumerate(range(X.shape[1])):\n p_vote[vote_idx, class_idx] = vote_counter[class_label]/len(AL_model.estimators_)\n\n\np_vote\n\narray([[1. , 0. ],\n [0.89, 0.11],\n [0.06, 0.94],\n ...,\n [0.93, 0.07],\n [1. , 0. ],\n [1. , 0. ]])\n\n\n\n\nCalculate dissimilarity (entropy)\n\nexample_id = 2\n\n\nans = 0\nfor category in range(X_pool.shape[1]):\n ans += (-p_vote[example_id][category] * np.log(p_vote[example_id][category]))\n\nans\n\n0.22696752250060448\n\n\n\nentr = entropy(p_vote, axis=1)\n\n\nentr[example_id]\n\n0.22696752250060448\n\n\n\n\nActive Learning Flow\n\ndef get_query_idx():\n # Gather the votes\n votes = np.zeros(shape=(X_pool.shape[0], len(AL_model.estimators_)))\n for learner_idx, learner in enumerate(AL_model.estimators_):\n votes[:, learner_idx] = learner.predict(X_pool)\n \n # Calcuate probability of votes\n p_vote = np.zeros(shape=(X_pool.shape[0], X_pool.shape[1]))\n for vote_idx, vote in enumerate(votes):\n vote_counter = {0 : (1-vote).sum(), \n 1 : vote.sum()}\n\n for class_idx, class_label in enumerate(range(X.shape[1])):\n p_vote[vote_idx, class_idx] = vote_counter[class_label]/len(AL_model.estimators_)\n \n # Calculate entropy for each example\n entr = entropy(p_vote, axis=1)\n \n # Choose example with highest entropy (disagreement)\n return entr.argmax()\n\n\n\nPrepare data for random sampling\n\nX_train_rand = X_train.copy()\ny_train_rand = y_train.copy()\nX_pool_rand = X_pool.copy()\ny_pool_rand = y_pool.copy()\n\nrandom_model = RandomForestClassifier(n_jobs=28, random_state=0)\n\n\n\nRun active learning\n\nAL_iters = 100\nnp.random.seed(0)\n\nAL_inds = []\nAL_models = []\nrandom_inds = []\nrandom_models = []\n\nfor iteration in range(AL_iters):\n clear_output(wait=True)\n print(\"iteration\", iteration)\n ######## Active Learning ############\n # Fit the model\n AL_model.fit(X_train, y_train)\n AL_models.append(deepcopy(AL_model))\n \n # Query a point\n query_idx = get_query_idx()\n AL_inds.append(query_idx)\n \n # Add it to the train data\n X_train = np.concatenate([X_train, X_pool[query_idx:query_idx+1, :]], axis=0)\n y_train = np.concatenate([y_train, y_pool[query_idx:query_idx+1]], axis=0)\n \n # Remove it from the pool data\n X_pool = np.delete(X_pool, query_idx, axis=0)\n y_pool = np.delete(y_pool, query_idx, axis=0)\n \n ######## Random Sampling ############\n # Fit the model\n random_model.fit(X_train_rand, y_train_rand)\n random_models.append(deepcopy(random_model))\n \n # Query a point\n query_idx = np.random.choice(len(X_pool))\n random_inds.append(query_idx)\n # Add it to the train data\n X_train_rand = np.concatenate([X_train_rand, X_pool_rand[query_idx:query_idx+1, :]], axis=0)\n y_train_rand = np.concatenate([y_train_rand, y_pool_rand[query_idx:query_idx+1]], axis=0)\n \n # Remove it from the pool data\n X_pool_rand = np.delete(X_pool_rand, query_idx, axis=0)\n y_pool_rand = np.delete(y_pool_rand, query_idx, axis=0)\n\niteration 99\n\n\n\n\nPlot accuracy\n\nrandom_scores = []\nAL_scores = []\nfor iteration in range(AL_iters):\n clear_output(wait=True)\n print(\"iteration\", iteration)\n AL_scores.append(accuracy_score(y_test, AL_models[iteration].predict(X_test)))\n random_scores.append(accuracy_score(y_test, random_models[iteration].predict(X_test)))\n \nplt.plot(AL_scores, label='Active Learning');\nplt.plot(random_scores, label='Random Sampling');\nplt.legend();\nplt.xlabel('Iterations');\nplt.ylabel('Accuracy\\n(Higher is better)');\n\niteration 99\n\n\n\n\n\n\n\nPlot decision boundary\n\ndef update(i):\n for each in ax:\n each.cla()\n \n AL_grid_preds = AL_models[i].predict(grid_X)\n random_grid_preds = random_models[i].predict(grid_X)\n \n # Active learning\n ax[0].scatter(X_train[:n_train,0], X_train[:n_train,1], c=y_train[:n_train], label='initial_train', alpha=0.2)\n ax[0].scatter(X_train[n_train:n_train+i, 0], X_train[n_train:n_train+i, 1], \n c=y_train[n_train:n_train+i], label='new_points')\n ax[0].contourf(grid_X1, grid_X2, AL_grid_preds.reshape(*grid_X1.shape), alpha=0.2);\n ax[0].set_title('New points')\n \n ax[1].scatter(X_test[:, 0], X_test[:, 1], c=y_test, label='test_set')\n ax[1].contourf(grid_X1, grid_X2, AL_grid_preds.reshape(*grid_X1.shape), alpha=0.2);\n ax[1].set_title('Test points');\n ax[0].text(locs[0],locs[1],'Active Learning')\n \n # Random sampling\n ax[2].scatter(X_train_rand[:n_train,0], X_train_rand[:n_train,1], c=y_train_rand[:n_train], label='initial_train', alpha=0.2)\n ax[2].scatter(X_train_rand[n_train:n_train+i, 0], X_train_rand[n_train:n_train+i, 1], \n c=y_train_rand[n_train:n_train+i], label='new_points')\n ax[2].contourf(grid_X1, grid_X2, random_grid_preds.reshape(*grid_X1.shape), alpha=0.2);\n ax[2].set_title('New points')\n \n ax[3].scatter(X_test[:, 0], X_test[:, 1], c=y_test, label='test_set')\n ax[3].contourf(grid_X1, grid_X2, random_grid_preds.reshape(*grid_X1.shape), alpha=0.2);\n ax[3].set_title('Test points');\n ax[2].text(locs[0],locs[1],'Random Sampling');\n\n\nlocs = (2.7, 4)\nfig, ax = plt.subplots(2,2,figsize=(12,6), sharex=True, sharey=True)\nax = ax.ravel()\nn_train = X_train.shape[0]-AL_iters\n\nanim = FuncAnimation(fig, func=update, frames=range(100))\nplt.close()\nanim\n\n\n\n\n\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n Once\n \n Loop\n \n Reflect" }, { - "objectID": "posts/2023-05-14-ssh-macos.html", - "href": "posts/2023-05-14-ssh-macos.html", - "title": "Passwordless SSH setup for MacOS Hosts", + "objectID": "posts/2022-03-06-probabilistic-machine-learning.html", + "href": "posts/2022-03-06-probabilistic-machine-learning.html", + "title": "Probabilistic Machine Learning", "section": "", - "text": "HOST: The computer physically present with you.\nREMOTE: The remote computer that you’d like to access via ssh.\nREMOTE-IP: Ip address of the REMOTE.\nPORT: The port on which the ssh server is running on REMOTE." + "text": "An inference problem requires statements about the value of an unobserved (latent) variable x based on observations y which are related to x, but may not be sufficient to fully determine x. This requires a notion of uncertainty.\n\nWe can define the following rules because \\(p(E) = 1\\) for any event \\(E\\).\n\nSum rule: \\(p(E) = p(E|A) + p(E|\\neg A)\\)\n\nProduct rule: \\(p(E, A) = p(E|A)p(A) = p(A|E)p(E)\\)\n\nBayes’ theorem: \\(p(E|A) = \\frac{p(A|E)p(E)}{p(A)}\\)" }, { - "objectID": "posts/2023-05-14-ssh-macos.html#terminology", - "href": "posts/2023-05-14-ssh-macos.html#terminology", - "title": "Passwordless SSH setup for MacOS Hosts", + "objectID": "posts/2022-03-06-probabilistic-machine-learning.html#introduction", + "href": "posts/2022-03-06-probabilistic-machine-learning.html#introduction", + "title": "Probabilistic Machine Learning", "section": "", - "text": "HOST: The computer physically present with you.\nREMOTE: The remote computer that you’d like to access via ssh.\nREMOTE-IP: Ip address of the REMOTE.\nPORT: The port on which the ssh server is running on REMOTE." - }, - { - "objectID": "posts/2023-05-14-ssh-macos.html#what-is-the-problem", - "href": "posts/2023-05-14-ssh-macos.html#what-is-the-problem", - "title": "Passwordless SSH setup for MacOS Hosts", - "section": "What is the problem?", - "text": "What is the problem?\nSimilar to Windows machines, one can run the following commands on a macOS HOST for setting up the passwordless ssh:\nssh-keygen\nssh-copy-id -i ~/.ssh/id_rsa.pub -p PORT USERANAME@REMOTE-IP\nBut this does not work out of the box without the following command which lets your HOST know about the private key.\nssh-add ~/.ssh/id_rsa\nAfter this, connection works fine from macOS CLI. However, if you are trying to connect to REMOTE from VS code, make sure you restart VS code before attempting to connect (quit from the Dock as well).\nSo far so good. But this setup fails when you reboot your HOST since ssh-add is not perstistently adding the pirvate key to HOST.\nSo, what to do now?" + "text": "An inference problem requires statements about the value of an unobserved (latent) variable x based on observations y which are related to x, but may not be sufficient to fully determine x. This requires a notion of uncertainty.\n\nWe can define the following rules because \\(p(E) = 1\\) for any event \\(E\\).\n\nSum rule: \\(p(E) = p(E|A) + p(E|\\neg A)\\)\n\nProduct rule: \\(p(E, A) = p(E|A)p(A) = p(A|E)p(E)\\)\n\nBayes’ theorem: \\(p(E|A) = \\frac{p(A|E)p(E)}{p(A)}\\)" }, { - "objectID": "posts/2023-05-14-ssh-macos.html#permenant-solution", - "href": "posts/2023-05-14-ssh-macos.html#permenant-solution", - "title": "Passwordless SSH setup for MacOS Hosts", - "section": "Permenant solution", - "text": "Permenant solution\nI found a permenant and full-proof solution here. For each REMOTE you add in your HOST’s ~/.ssh/config, after generating a key pair and copying it to REMOTE with ssh-copy-id command, modify its entry in ~/.ssh/config like the following and the issue should be permenently resolved.\nHost REMOTE\n UseKeychain yes\n AddKeysToAgent yes\n IdentityFile ~/.ssh/id_rsa\n HostName REMOTE-IP\n Port PORT\n User USERNAME" + "objectID": "posts/2023-11-26-Torch-DataLoaders.html", + "href": "posts/2023-11-26-Torch-DataLoaders.html", + "title": "Data Handling for Large Scale ML", + "section": "", + "text": "import os\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"3\"\n\nimport torch\nimport torch.nn as nn\nfrom numcodecs import GZip, Zstd, Blosc\n\nfrom time import time, sleep\nfrom tqdm import tqdm\nfrom glob import glob\nfrom os.path import join\nfrom torch.utils.data import DataLoader, Dataset\nfrom joblib import Parallel, delayed\nimport xarray as xr\nimport numpy as np\n\nfrom torchvision.models import vit_b_16\nfrom astra.torch.models import ViTClassifier\nfrom astra.torch.utils import train_fn" }, { - "objectID": "posts/2022-01-20-kl-divergence.html", - "href": "posts/2022-01-20-kl-divergence.html", - "title": "KL divergence v/s cross-entropy", + "objectID": "posts/2023-11-26-Torch-DataLoaders.html#imports", + "href": "posts/2023-11-26-Torch-DataLoaders.html#imports", + "title": "Data Handling for Large Scale ML", "section": "", - "text": "In a classification problem, for a data-point \\(\\mathbf{x}_i\\), we have the true label \\(y_i\\) associated with it.\nLet us assume that we have three possible outcomes \\(\\{L1, L2, L3\\}\\) and for current \\(\\mathbf{x}_i\\), corresponding \\(y_i\\) is \\(L2\\). Then Ground truth probability distribution is the following:\n\\[\np_G(y = L1) = 0\\\\\np_G(y = L2) = 1\\\\\np_G(y=L3) = 0\n\\]\nLet us assume that our classifier model Predicted the following distribution:\n\\[\np_P(y = L1) = 0.1\\\\\np_P(y = L2) = 0.8\\\\\np_P(y=L3) = 0.1\n\\]" + "text": "import os\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"3\"\n\nimport torch\nimport torch.nn as nn\nfrom numcodecs import GZip, Zstd, Blosc\n\nfrom time import time, sleep\nfrom tqdm import tqdm\nfrom glob import glob\nfrom os.path import join\nfrom torch.utils.data import DataLoader, Dataset\nfrom joblib import Parallel, delayed\nimport xarray as xr\nimport numpy as np\n\nfrom torchvision.models import vit_b_16\nfrom astra.torch.models import ViTClassifier\nfrom astra.torch.utils import train_fn" }, { - "objectID": "posts/2022-01-20-kl-divergence.html#ground", - "href": "posts/2022-01-20-kl-divergence.html#ground", - "title": "KL divergence v/s cross-entropy", - "section": "", - "text": "In a classification problem, for a data-point \\(\\mathbf{x}_i\\), we have the true label \\(y_i\\) associated with it.\nLet us assume that we have three possible outcomes \\(\\{L1, L2, L3\\}\\) and for current \\(\\mathbf{x}_i\\), corresponding \\(y_i\\) is \\(L2\\). Then Ground truth probability distribution is the following:\n\\[\np_G(y = L1) = 0\\\\\np_G(y = L2) = 1\\\\\np_G(y=L3) = 0\n\\]\nLet us assume that our classifier model Predicted the following distribution:\n\\[\np_P(y = L1) = 0.1\\\\\np_P(y = L2) = 0.8\\\\\np_P(y=L3) = 0.1\n\\]" + "objectID": "posts/2023-11-26-Torch-DataLoaders.html#is-.nc-better-than-zarr", + "href": "posts/2023-11-26-Torch-DataLoaders.html#is-.nc-better-than-zarr", + "title": "Data Handling for Large Scale ML", + "section": "Is .nc better than zarr?", + "text": "Is .nc better than zarr?\n\nos.system(f\"du -sh {base_path}\")\n\n1.8G /home/patel_zeel/bkdb/bangladesh_pnas_pred/team1\n\n\n0\n\n\n\nsave_path = \"/tmp/nc_check_uncompressed\"\nos.makedirs(save_path, exist_ok=True)\nfiles = []\ndef zarr_to_nc(file):\n with xr.open_zarr(file, consolidated=False) as ds:\n ds.to_netcdf(join(save_path, file.split(\"/\")[-1].replace(\".zarr\", \".nc\")))\n\n_ = Parallel(n_jobs=32)(delayed(zarr_to_nc)(file) for file in tqdm(glob(join(base_path, \"*.zarr\"))))\n\nos.system(f\"du -sh {save_path}\")\n\n 0%| | 0/1501 [00:00<?, ?it/s]100%|██████████| 1501/1501 [00:24<00:00, 62.47it/s] \n\n\n5.3G /tmp/nc_check_uncompressed\n\n\n0\n\n\n\nsave_path = \"/tmp/nc_check_compressed\"\nos.system(f\"rm -rf {save_path}\")\nos.makedirs(save_path, exist_ok=True)\n\nencoding = {var: {\"zlib\": True, \"complevel\": 1} for var in [\"data\"]}\n\nfiles = []\ndef zarr_to_nc(file):\n with xr.open_zarr(file, consolidated=False) as ds:\n ds.to_netcdf(join(save_path, file.split(\"/\")[-1].replace(\".zarr\", \".nc\")), encoding=encoding)\n\n_ = Parallel(n_jobs=32)(delayed(zarr_to_nc)(file) for file in tqdm(glob(join(base_path, \"*.zarr\"))))\n\nos.system(f\"du -sh {save_path}\")\n\n100%|██████████| 1501/1501 [00:04<00:00, 311.18it/s]\n\n\n1.8G /tmp/nc_check_compressed\n\n\n0\n\n\n\nclass XarrayDatasetWithNC(Dataset):\n def __init__(self, path, max_files):\n self.base_path = path\n self.all_files = glob(join(path, \"*.nc\"))[:max_files]\n self.all_files.sort()\n self.all_ds = [xr.open_dataset(file) for file in tqdm(self.all_files)]\n self.lat_lags = [-2, -1, 0, 1, 2]\n self.lon_lags = [-2, -1, 0, 1, 2]\n \n def __len__(self):\n return len(self.all_files) * 25\n \n def __getitem__(self, idx):\n file_idx = idx // 25\n local_idx = idx % 25\n lat_lag = self.lat_lags[local_idx // 5]\n lon_lag = self.lon_lags[local_idx % 5]\n \n ds = self.all_ds[file_idx]\n img = ds.isel(lat_lag=lat_lag, lon_lag=lon_lag)['data'].values\n return torch.tensor(np.einsum(\"hwc->chw\", img).astype(np.float32) / 255)\n\n\nnc_path = \"/tmp/nc_check_compressed\"\n\n\nbatch_size = 128\nnum_workers = 32\n\ndataset = XarrayDatasetWithNC(nc_path, max_files=max_files)\nprocess_it(dataset, batch_size, num_workers)\n\n100%|██████████| 500/500 [00:02<00:00, 246.27it/s]\nTime: 0.7414: 100%|██████████| 98/98 [01:25<00:00, 1.15it/s]\n\n\nAverage Iteration Processing Time: 0.8260 +- 0.0530\nTotal time for all iterations: 80.9527\nTotal Wall Time per iteration: 0.8725\nTotal Wall Time: 85.5034" }, { - "objectID": "posts/2022-01-20-kl-divergence.html#kl-divergence", - "href": "posts/2022-01-20-kl-divergence.html#kl-divergence", - "title": "KL divergence v/s cross-entropy", - "section": "KL divergence", - "text": "KL divergence\nWe can use KL divergence to check how good is our model. The formula is:\n\\[\nD_{KL}(p_G\\;\\rVert\\;p_P) = \\sum_{y_i \\in \\{L1, L2, L3\\}} p_G(y_i)\\log\\frac{p_G(y_i)}{p_P(y_i)}\n\\]\nFor our example,\n\\[\nD_{KL}(p_G\\;\\rVert\\;p_P) = \\log\\frac{1}{0.8}\n\\]\nIt is evident that if \\(p_P(y = L2)\\) decreses from \\(0.8\\), \\(D_{KL}(p_G\\;\\rVert\\;p_P)\\) will increase and vice versa. Note that KL divergence is not symmetric which means \\(D_{KL}(p_G\\;\\rVert\\;p_P) \\ne D_{KL}(p_P\\;\\rVert\\;p_G)\\)." + "objectID": "posts/2023-11-26-Torch-DataLoaders.html#additional-experiments", + "href": "posts/2023-11-26-Torch-DataLoaders.html#additional-experiments", + "title": "Data Handling for Large Scale ML", + "section": "Additional experiments", + "text": "Additional experiments\n\nn_images = 60000\nt = 84.9131/500/25 * n_images\nprint(f\"Time to process {n_images} images: \", t/60, \"minutes\")\n\nTime to process 60000 images: 6.793048000000001 minutes\n\n\n\nfiles = glob(join(base_path, \"*.zarr\"))\ndata_tensors = []\nfor file in tqdm(files):\n with xr.open_zarr(file, consolidated=False) as ds:\n # print(ds['data'].values.reshape(-1, 224, 224, 3))\n data_tensors.append(torch.tensor(np.einsum(\"nhwc->nchw\", ds['data'].values.reshape(-1, 224, 224, 3)).astype(np.float16) / 255))\n\n100%|██████████| 1501/1501 [02:44<00:00, 9.13it/s]\n\n\n\nall_in_one = torch.concat(data_tensors, dim=0)\nall_in_one.shape\n\ntorch.Size([37525, 3, 224, 224])\n\n\n\nall_in_one = all_in_one.to('cuda')" }, { - "objectID": "posts/2022-01-20-kl-divergence.html#cross-entory", - "href": "posts/2022-01-20-kl-divergence.html#cross-entory", - "title": "KL divergence v/s cross-entropy", - "section": "Cross-entory", - "text": "Cross-entory\nCross-entropy is another measure for distribution similarity. The formula is:\n\\[\nH(p_G, p_P) = \\sum_{y_i \\in \\{L1, L2, L3\\}} - p_G(y_i)\\log p_P(y_i)\n\\]\nFor our example:\n\\[\nH(p_G, p_P) = -\\log 0.8 = \\log \\frac{1}{0.8}\n\\]" + "objectID": "posts/2023-11-26-Torch-DataLoaders.html#insights", + "href": "posts/2023-11-26-Torch-DataLoaders.html#insights", + "title": "Data Handling for Large Scale ML", + "section": "Insights", + "text": "Insights\n\nGPU Memory consumption is 17776MiB / 81920MiB for batch size 128 for ViT model\nUploading torch.Size([37525, 3, 224, 224]) of float32 data to GPU takes 22054MiB / 81920MiB of GPU Memory. Same data with float16 takes 11202MiB / 81920MiB of GPU Memory.\nIt seems .nc or .zarr are not making much difference in terms of time and/or memory." }, { - "objectID": "posts/2022-01-20-kl-divergence.html#kl-divergence-vs-cross-entropy", - "href": "posts/2022-01-20-kl-divergence.html#kl-divergence-vs-cross-entropy", - "title": "KL divergence v/s cross-entropy", - "section": "KL divergence v/s cross-entropy", - "text": "KL divergence v/s cross-entropy\nThis shows that KL divergence and cross-entropy will return the same values for a simple classification problem. Then why do we use cross-entropy as a loss function and not KL divergence?\nThat’s because KL divergence will compute additional constant terms (zero here) that are not adding any value in minimization." + "objectID": "posts/2022-01-29-presentation_tips.html", + "href": "posts/2022-01-29-presentation_tips.html", + "title": "Conference Presentation Tips", + "section": "", + "text": "General\n\nFirst page goes like this:\n\nTitle\nAuthors (Underline presenting author, no need to put * in case of equal contribution)\nAffiliations\nConference name\n\nIf importing figures from paper, avoid including the captions.\nInclude lot of images and less maths\nTalk should end with summary and not the future work or thank you slide or something.\nCite the references on the same slide in bottom.\n\nRefer to “Giving talks” section of this blog.\n\n\nDos and Don’ts\n\nNever put too detailed information difficult to grasp: a table with many numbers, a complex derivation all in one go, very complicated diagram." }, { - "objectID": "posts/2023-07-03-Brick_Kilns_identification.html", - "href": "posts/2023-07-03-Brick_Kilns_identification.html", - "title": "blog", + "objectID": "posts/2022-03-08-torch-essentials.html", + "href": "posts/2022-03-08-torch-essentials.html", + "title": "Torch essentials", "section": "", - "text": "# %pip install segment-geospatial groundingdino-py leafmap localtileserver\nimport leafmap\nfrom samgeo import tms_to_geotiff\nfrom samgeo.text_sam import LangSAM\n\n\nm = leafmap.Map(center=[28.6139, 77.2090], zoom=10, height=\"600px\")\nm.add_basemap(\"SATELLITE\")\nm\n\n\n\n\nSATELLITE has been already added before.\nSATELLITE has been already added before.\n\n\nTypeError: can only concatenate list (not \"NoneType\") to list\n\n\n\nm.user_roi_bounds()\n\n[77.0217, 28.5583, 77.3015, 28.6635]" + "text": "import torch\nimport numpy as np\n\n\ntensor1 = torch.tensor([1,2,3.], dtype=torch.float32)\ntensor2 = torch.tensor([5,6,7.], dtype=torch.float64)\ndisplay(tensor1, tensor2)\n\ntensor([1., 2., 3.])\n\n\ntensor([5., 6., 7.], dtype=torch.float64)\n\n\n\ndisplay(type(tensor1), type(tensor2))\n\ntorch.Tensor\n\n\ntorch.Tensor\n\n\n\ndisplay(tensor1.dtype, tensor2.dtype)\n\ntorch.float32\n\n\ntorch.float64\n\n\n\nlong_tensor = tensor1.to(torch.int32) # device, dtype, tensor\ndisplay(long_tensor)\n\ntensor([1, 2, 3], dtype=torch.int32)\n\n\n\nlong_tensor.device\n\ndevice(type='cpu')\n\n\n\ndevice = 'cuda' if torch.cuda.is_available() else 'cpu'\nlong_tensor_gpu = long_tensor.to(device)\nlong_tensor_gpu\n\ntensor([1, 2, 3], device='cuda:0', dtype=torch.int32)\n\n\n\nlong_tensor_born_on_gpu = torch.zeros(2,10, device=device).to(torch.float64)\nlong_tensor_born_on_gpu\n\ntensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], device='cuda:0',\n dtype=torch.float64)\n\n\n\ninspired_tensor = torch.tensor([1.,2.]).to(long_tensor_born_on_gpu)\ninspired_tensor\n\ntensor([1., 2.], device='cuda:0', dtype=torch.float64)\n\n\n\nnp_array = np.array([1,2,3.])\nnp_array.log()\n\nAttributeError: 'numpy.ndarray' object has no attribute 'log'\n\n\n\npt_array = torch.tensor([1,2,3.])\npt_array.log() # sin(), cos(), tan(), exp()\n\ntensor([0.0000, 0.6931, 1.0986])" }, { - "objectID": "posts/2022-05-17-contributors_sorted_by_prs.html", - "href": "posts/2022-05-17-contributors_sorted_by_prs.html", - "title": "Get a list of contributors from a repo", + "objectID": "posts/2022-03-08-torch-essentials.html#lets-go-hands-on", + "href": "posts/2022-03-08-torch-essentials.html#lets-go-hands-on", + "title": "Torch essentials", "section": "", - "text": "import pandas as pd" + "text": "import torch\nimport numpy as np\n\n\ntensor1 = torch.tensor([1,2,3.], dtype=torch.float32)\ntensor2 = torch.tensor([5,6,7.], dtype=torch.float64)\ndisplay(tensor1, tensor2)\n\ntensor([1., 2., 3.])\n\n\ntensor([5., 6., 7.], dtype=torch.float64)\n\n\n\ndisplay(type(tensor1), type(tensor2))\n\ntorch.Tensor\n\n\ntorch.Tensor\n\n\n\ndisplay(tensor1.dtype, tensor2.dtype)\n\ntorch.float32\n\n\ntorch.float64\n\n\n\nlong_tensor = tensor1.to(torch.int32) # device, dtype, tensor\ndisplay(long_tensor)\n\ntensor([1, 2, 3], dtype=torch.int32)\n\n\n\nlong_tensor.device\n\ndevice(type='cpu')\n\n\n\ndevice = 'cuda' if torch.cuda.is_available() else 'cpu'\nlong_tensor_gpu = long_tensor.to(device)\nlong_tensor_gpu\n\ntensor([1, 2, 3], device='cuda:0', dtype=torch.int32)\n\n\n\nlong_tensor_born_on_gpu = torch.zeros(2,10, device=device).to(torch.float64)\nlong_tensor_born_on_gpu\n\ntensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], device='cuda:0',\n dtype=torch.float64)\n\n\n\ninspired_tensor = torch.tensor([1.,2.]).to(long_tensor_born_on_gpu)\ninspired_tensor\n\ntensor([1., 2.], device='cuda:0', dtype=torch.float64)\n\n\n\nnp_array = np.array([1,2,3.])\nnp_array.log()\n\nAttributeError: 'numpy.ndarray' object has no attribute 'log'\n\n\n\npt_array = torch.tensor([1,2,3.])\npt_array.log() # sin(), cos(), tan(), exp()\n\ntensor([0.0000, 0.6931, 1.0986])" }, { - "objectID": "posts/2022-05-17-contributors_sorted_by_prs.html#config", - "href": "posts/2022-05-17-contributors_sorted_by_prs.html#config", - "title": "Get a list of contributors from a repo", - "section": "Config", - "text": "Config\n\nowner = \"probml\"\nrepo = \"pyprobml\"" + "objectID": "posts/2022-03-08-torch-essentials.html#gradient-is-all-you-need", + "href": "posts/2022-03-08-torch-essentials.html#gradient-is-all-you-need", + "title": "Torch essentials", + "section": "Gradient is all you need", + "text": "Gradient is all you need\n\nimport matplotlib.pyplot as plt\n\n\nx = torch.rand(5,1)\ny = 3 * x + 2 + torch.randn_like(x)*0.1\n\nplt.scatter(x, y);\n\n\n\n\n\nx_plus_ones = torch.cat([torch.ones_like(x), x], dim=1)\nx_plus_ones.shape\n\ntorch.Size([5, 2])\n\n\n\ntheta = torch.zeros(2,1, requires_grad=True)\ntheta\n\ntensor([[0.],\n [0.]], requires_grad=True)\n\n\n\ntheta.grad\n\n\ntheta.grad_fn\n\n\nlr = 0.1\n\ny_pred = x_plus_ones@theta\nloss = ((y_pred - y)**2).mean()\nloss.backward()\n# y_pred = torch.matmul(x_plus_ones, theta)\n# y_pred = torch.mm(x_plus_ones, theta)\n\n\ntheta.grad # dloss/dtheta\n\ntensor([[-6.3681],\n [-2.8128]])\n\n\n\ntheta.grad_fn\n\n\ntheta.data -= lr * theta.grad.data\n\n\ntheta\n\ntensor([[0.6368],\n [0.2813]], requires_grad=True)\n\n\n\ntheta.grad_fn\n\n\nwith torch.no_grad():\n plt.scatter(x, y)\n plt.plot(x, x_plus_ones@theta)\n\n\n\n\n\nfor i in range(10):\n theta.grad.data.zero_()\n y_pred = x_plus_ones@theta\n loss = ((y_pred - y)**2).mean()\n loss.backward()\n theta.data -= lr * theta.grad\n\n\nwith torch.no_grad():\n plt.scatter(x, y)\n plt.plot(x, x_plus_ones@theta)" }, { - "objectID": "posts/2022-05-17-contributors_sorted_by_prs.html#get-all-contributors-to-a-repo", - "href": "posts/2022-05-17-contributors_sorted_by_prs.html#get-all-contributors-to-a-repo", - "title": "Get a list of contributors from a repo", - "section": "Get all contributors to a repo", - "text": "Get all contributors to a repo\n\ncontributors = pd.read_json(f\"https://api.github.com/repos/{owner}/{repo}/contributors?per_page=100\")\ncontributors = contributors.set_index(\"login\")\nprint(f\"Number of contributors: {len(contributors.index.unique())}\")\ncontributors.head(2)\n\nNumber of contributors: 47\n\n\n\n \n \n \n\n\n\n\n\n\nid\nnode_id\navatar_url\ngravatar_id\nurl\nhtml_url\nfollowers_url\nfollowing_url\ngists_url\nstarred_url\nsubscriptions_url\norganizations_url\nrepos_url\nevents_url\nreceived_events_url\ntype\nsite_admin\ncontributions\n\n\nlogin\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nmurphyk\n4632336\nMDQ6VXNlcjQ2MzIzMzY=\nhttps://avatars.githubusercontent.com/u/463233...\n\nhttps://api.github.com/users/murphyk\nhttps://github.com/murphyk\nhttps://api.github.com/users/murphyk/followers\nhttps://api.github.com/users/murphyk/following...\nhttps://api.github.com/users/murphyk/gists{/gi...\nhttps://api.github.com/users/murphyk/starred{/...\nhttps://api.github.com/users/murphyk/subscript...\nhttps://api.github.com/users/murphyk/orgs\nhttps://api.github.com/users/murphyk/repos\nhttps://api.github.com/users/murphyk/events{/p...\nhttps://api.github.com/users/murphyk/received_...\nUser\nFalse\n1777\n\n\nNeoanarika\n5188337\nMDQ6VXNlcjUxODgzMzc=\nhttps://avatars.githubusercontent.com/u/518833...\n\nhttps://api.github.com/users/Neoanarika\nhttps://github.com/Neoanarika\nhttps://api.github.com/users/Neoanarika/followers\nhttps://api.github.com/users/Neoanarika/follow...\nhttps://api.github.com/users/Neoanarika/gists{...\nhttps://api.github.com/users/Neoanarika/starre...\nhttps://api.github.com/users/Neoanarika/subscr...\nhttps://api.github.com/users/Neoanarika/orgs\nhttps://api.github.com/users/Neoanarika/repos\nhttps://api.github.com/users/Neoanarika/events...\nhttps://api.github.com/users/Neoanarika/receiv...\nUser\nFalse\n184" + "objectID": "posts/2022-03-08-torch-essentials.html#advanced", + "href": "posts/2022-03-08-torch-essentials.html#advanced", + "title": "Torch essentials", + "section": "Advanced", + "text": "Advanced\n\nclass LinearRegression(torch.nn.Module):\n def __init__(self):\n super().__init__()\n self.theta = torch.nn.Parameter(torch.zeros(2,1))\n# self.register_parameter(theta, torch.zeros(2,1))\n \n def forward(self, x): # Don't call directly. it is called by __call__ method\n x_plus_ones = torch.cat([torch.ones_like(x), x], dim=1)\n y_pred = x_plus_ones@self.theta\n return y_pred\n\n\nmodel = LinearRegression()\nmodel\n\nLinearRegression()\n\n\n\nfor name, value in model.named_parameters():\n print(name, value)\n\ntheta Parameter containing:\ntensor([[0.],\n [0.]], requires_grad=True)\n\n\n\noptimizer = torch.optim.Adam(model.parameters(), lr=0.1)\nloss_fn = torch.nn.MSELoss() # torch.nn.CrossEntropyLoss()\n\nfor i in range(10):\n optimizer.zero_grad()\n \n y_pred = model(x)\n loss = loss_fn(y_pred, y)\n loss.backward()\n \n optimizer.step()\n\n\nmodel.state_dict()\n\nOrderedDict([('theta',\n tensor([[0.9799],\n [0.9808]]))])" }, { - "objectID": "posts/2022-05-17-contributors_sorted_by_prs.html#fetch-all-prs-from-a-repo", - "href": "posts/2022-05-17-contributors_sorted_by_prs.html#fetch-all-prs-from-a-repo", - "title": "Get a list of contributors from a repo", - "section": "Fetch all PRs from a repo", - "text": "Fetch all PRs from a repo\n\npage_range = range(1, 6)\nget_pr_df = lambda page: pd.read_json(f\"https://api.github.com/repos/probml/pyprobml/pulls?state=all&per_page=100&page={page}\")\npull_requests = pd.concat(map(get_pr_df, page_range))\nprint(f\"Number of PRs: {len(pull_requests)}\")\npull_requests.head(2)\n\nNumber of PRs: 497\n\n\n\n \n \n \n\n\n\n\n\n\nurl\nid\nnode_id\nhtml_url\ndiff_url\npatch_url\nissue_url\nnumber\nstate\nlocked\n...\nreview_comments_url\nreview_comment_url\ncomments_url\nstatuses_url\nhead\nbase\n_links\nauthor_association\nauto_merge\nactive_lock_reason\n\n\n\n\n0\nhttps://api.github.com/repos/probml/pyprobml/p...\n938329819\nPR_kwDOA-3vB8437cbb\nhttps://github.com/probml/pyprobml/pull/841\nhttps://github.com/probml/pyprobml/pull/841.diff\nhttps://github.com/probml/pyprobml/pull/841.patch\nhttps://api.github.com/repos/probml/pyprobml/i...\n841\nclosed\nFalse\n...\nhttps://api.github.com/repos/probml/pyprobml/p...\nhttps://api.github.com/repos/probml/pyprobml/p...\nhttps://api.github.com/repos/probml/pyprobml/i...\nhttps://api.github.com/repos/probml/pyprobml/s...\n{'label': 'karm-patel:posrprocessing', 'ref': ...\n{'label': 'probml:master', 'ref': 'master', 's...\n{'self': {'href': 'https://api.github.com/repo...\nCONTRIBUTOR\nNaN\nNaN\n\n\n1\nhttps://api.github.com/repos/probml/pyprobml/p...\n938317389\nPR_kwDOA-3vB8437ZZN\nhttps://github.com/probml/pyprobml/pull/840\nhttps://github.com/probml/pyprobml/pull/840.diff\nhttps://github.com/probml/pyprobml/pull/840.patch\nhttps://api.github.com/repos/probml/pyprobml/i...\n840\nclosed\nFalse\n...\nhttps://api.github.com/repos/probml/pyprobml/p...\nhttps://api.github.com/repos/probml/pyprobml/p...\nhttps://api.github.com/repos/probml/pyprobml/i...\nhttps://api.github.com/repos/probml/pyprobml/s...\n{'label': 'karm-patel:master', 'ref': 'master'...\n{'label': 'probml:master', 'ref': 'master', 's...\n{'self': {'href': 'https://api.github.com/repo...\nCONTRIBUTOR\nNaN\nNaN\n\n\n\n\n\n2 rows × 36 columns" + "objectID": "posts/2022-03-08-torch-essentials.html#wanna-run-on-gpu", + "href": "posts/2022-03-08-torch-essentials.html#wanna-run-on-gpu", + "title": "Torch essentials", + "section": "Wanna run on GPU?", + "text": "Wanna run on GPU?\n\nx_gpu = x.to(device)\ny_gpu = y.to(device)\n\n\nprint(model.theta)\nmodel.to(device)\nprint(model.theta)\n\nParameter containing:\ntensor([[0.9799],\n [0.9808]], requires_grad=True)\nParameter containing:\ntensor([[0.9799],\n [0.9808]], device='cuda:0', requires_grad=True)\n\n\n\noptimizer = torch.optim.Adam(model.parameters(), lr=0.1)\nloss_fn = torch.nn.MSELoss() # torch.nn.CrossEntropyLoss()\n\nfor i in range(10):\n optimizer.zero_grad()\n \n y_pred = model(x_gpu)\n loss = loss_fn(y_pred, y_gpu)\n loss.backward()\n \n optimizer.step()" }, { - "objectID": "posts/2022-05-17-contributors_sorted_by_prs.html#get-a-list-of-contributors-sorted-by-count-of-prs", - "href": "posts/2022-05-17-contributors_sorted_by_prs.html#get-a-list-of-contributors-sorted-by-count-of-prs", - "title": "Get a list of contributors from a repo", - "section": "Get a list of contributors sorted by count of PRs", - "text": "Get a list of contributors sorted by count of PRs\n\npull_requests['login'] = pull_requests['user'].apply(lambda x: x[\"login\"])\nsorted_by_pr_count = pull_requests.groupby(\"login\").agg({'url': len}).sort_values(by='url', ascending=False)\nsorted_by_pr_count.rename(columns={'url': 'Number of PRs'}, inplace=True)\nsorted_by_pr_count.head(5)\n\n\n \n \n \n\n\n\n\n\n\nNumber of PRs\n\n\nlogin\n\n\n\n\n\nDrishttii\n79\n\n\ngerdm\n55\n\n\nkaralleyna\n43\n\n\nalways-newbie161\n29\n\n\nkarm-patel\n29" + "objectID": "posts/2022-03-08-torch-essentials.html#state-dictionary", + "href": "posts/2022-03-08-torch-essentials.html#state-dictionary", + "title": "Torch essentials", + "section": "State dictionary", + "text": "State dictionary\n\n# torch.save(model.state_dict(), path)\n# model.load_state_dict(torch.load(path))" }, { - "objectID": "posts/2022-05-17-contributors_sorted_by_prs.html#create-a-dashboard", - "href": "posts/2022-05-17-contributors_sorted_by_prs.html#create-a-dashboard", - "title": "Get a list of contributors from a repo", - "section": "Create a dashboard", - "text": "Create a dashboard\n\ndef get_href_user(user):\n username, profile_link = user.split(\"|\")\n return f\"[{username}]({profile_link})\"\n\ndashboard = pd.DataFrame(index=sorted_by_pr_count.index)\ndashboard[\"Avatar\"] = contributors.avatar_url.apply(lambda url: f'<img width=\"25\" alt=\"image\" src=\"{url}\">')\ndashboard[\"Contributor\"] = (contributors.index +\"|\"+ contributors['html_url']).apply(get_href_user)\ndashboard[\"Number of PRs\"] = sorted_by_pr_count[\"Number of PRs\"]\nprint(dashboard.dropna().T.to_markdown())\n\n| | Drishttii | gerdm | karalleyna | always-newbie161 | karm-patel | Duane321 | Nirzu97 | patel-zeel | animesh-007 | ashishpapanai | shivaditya-meduri | Neoanarika | andrewnc | nappaillav | Abdelrahman350 | mjsML | jdf22 | kzymgch | nalzok | nitish1295 | Garvit9000c | AnkitaKumariJain14 | rohit-khoiwal-30 | shobro | raymondyeh07 | khanshehjad | alenm10 | firatoncel | AnandShegde | Aadesh-1404 | nealmcb | nipunbatra | petercerno | posgnu | mvervuurt | hieuza | Prahitha | TripleTop | UmarJ | Vishal987595 | a-fakhri | adamnemecek | galv | jlh2018 | krasserm | yuanx749 |\n|:--------------|:----------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|\n| Avatar | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/35187749?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/4108759?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/36455180?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/66471669?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/59387624?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/19956442?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/28842790?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/59758528?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/53366877?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/52123364?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/77324692?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/5188337?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/7716402?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/43855961?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/47902062?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/7131192?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/1637094?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/10054419?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/13443062?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/21181046?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/68856476?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/62535006?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/87682045?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/54628243?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/5696982?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/31896767?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/42214173?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/9141211?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/79975787?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/68186100?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/119472?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/60985?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/1649209?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/30136201?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/6399881?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/1021144?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/44160152?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/48208522?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/34779641?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/97757583?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/65111198?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/182415?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/4767568?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/40842099?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/202907?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/47032563?v=4\"> |\n| Contributor | [Drishttii](https://github.com/Drishttii) | [gerdm](https://github.com/gerdm) | [karalleyna](https://github.com/karalleyna) | [always-newbie161](https://github.com/always-newbie161) | [karm-patel](https://github.com/karm-patel) | [Duane321](https://github.com/Duane321) | [Nirzu97](https://github.com/Nirzu97) | [patel-zeel](https://github.com/patel-zeel) | [animesh-007](https://github.com/animesh-007) | [ashishpapanai](https://github.com/ashishpapanai) | [shivaditya-meduri](https://github.com/shivaditya-meduri) | [Neoanarika](https://github.com/Neoanarika) | [andrewnc](https://github.com/andrewnc) | [nappaillav](https://github.com/nappaillav) | [Abdelrahman350](https://github.com/Abdelrahman350) | [mjsML](https://github.com/mjsML) | [jdf22](https://github.com/jdf22) | [kzymgch](https://github.com/kzymgch) | [nalzok](https://github.com/nalzok) | [nitish1295](https://github.com/nitish1295) | [Garvit9000c](https://github.com/Garvit9000c) | [AnkitaKumariJain14](https://github.com/AnkitaKumariJain14) | [rohit-khoiwal-30](https://github.com/rohit-khoiwal-30) | [shobro](https://github.com/shobro) | [raymondyeh07](https://github.com/raymondyeh07) | [khanshehjad](https://github.com/khanshehjad) | [alenm10](https://github.com/alenm10) | [firatoncel](https://github.com/firatoncel) | [AnandShegde](https://github.com/AnandShegde) | [Aadesh-1404](https://github.com/Aadesh-1404) | [nealmcb](https://github.com/nealmcb) | [nipunbatra](https://github.com/nipunbatra) | [petercerno](https://github.com/petercerno) | [posgnu](https://github.com/posgnu) | [mvervuurt](https://github.com/mvervuurt) | [hieuza](https://github.com/hieuza) | [Prahitha](https://github.com/Prahitha) | [TripleTop](https://github.com/TripleTop) | [UmarJ](https://github.com/UmarJ) | [Vishal987595](https://github.com/Vishal987595) | [a-fakhri](https://github.com/a-fakhri) | [adamnemecek](https://github.com/adamnemecek) | [galv](https://github.com/galv) | [jlh2018](https://github.com/jlh2018) | [krasserm](https://github.com/krasserm) | [yuanx749](https://github.com/yuanx749) |\n| Number of PRs | 79 | 55 | 43 | 29 | 29 | 29 | 25 | 23 | 18 | 17 | 16 | 10 | 10 | 10 | 8 | 7 | 7 | 6 | 6 | 5 | 4 | 4 | 3 | 3 | 2 | 2 | 2 | 2 | 2 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |" + "objectID": "posts/2022-03-08-torch-essentials.html#nn-way", + "href": "posts/2022-03-08-torch-essentials.html#nn-way", + "title": "Torch essentials", + "section": "NN way", + "text": "NN way\n\nclass LinearRegression(torch.nn.Module):\n def __init__(self):\n super().__init__()\n self.layer = torch.nn.Linear(2, 1) # torch.nn.Linear(128, 64)\n # What else? \n# self.activation = torch.nn.ReLU()\n# torch.nn.LSTM()\n# torch.nn.Conv2d()\n \n def forward(self, x): # Don't call directly. it is called by __call__ method\n x_plus_ones = torch.cat([torch.ones_like(x), x], dim=1)\n y_pred = self.layer(x_plus_ones)\n return y_pred" }, { - "objectID": "posts/2023-06-23-GNNs_and_GPs.html", - "href": "posts/2023-06-23-GNNs_and_GPs.html", - "title": "GNNs and GPs", + "objectID": "posts/2023-05-31-CNPs_for_Images.html", + "href": "posts/2023-05-31-CNPs_for_Images.html", + "title": "Conditional Neural Processes for Image Interpolation", "section": "", - "text": "import GPy\nimport numpy as np\nimport pandas as pd\n\nfrom sklearn.preprocessing import MinMaxScaler, StandardScaler\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.ensemble import RandomForestRegressor\n\nimport regdata as rd\nimport matplotlib.pyplot as plt\n\nimport torch\nimport torch.nn as nn\n\n\nx_train, y_train, x_test = rd.Step().get_data()\ny_train = y_train.reshape(-1, 1)\nx_test = x_test * 1.5\nprint(x_train.shape, y_train.shape, x_test.shape)\n\nplt.scatter(x_train, y_train, label='train');\n\n(50, 1) (50, 1) (100, 1)\n\n\n\n\n\n\nkernel = GPy.kern.RBF(1, variance=1, lengthscale=1)\nmodel = GPy.models.GPRegression(x_train, y_train.reshape(-1, 1), kernel)\nmodel.Gaussian_noise.variance = 0.1\n\ny_pred_gp, y_var = model.predict(x_test)\n\nplt.scatter(x_train, y_train, label='train');\nplt.plot(x_test, y_pred_gp, label='pred');\n\n\n\n\n\nclass GCN_Forward(nn.Module):\n def __init__(self, in_features, out_features):\n super().__init__()\n self.fc = nn.Linear(in_features, out_features)\n \n def forward(self, x, A):\n x = self.fc(x)\n x = torch.matmul(A, x)\n return x\n \nclass GCN_Reverse(nn.Module):\n def __init__(self, in_features, out_features):\n super().__init__()\n self.fc = nn.Linear(in_features, out_features)\n \n def forward(self, x, A):\n x = torch.matmul(A, x)\n x = self.fc(x)\n return x\n\nclass NN(nn.Module):\n def __init__(self, features):\n super().__init__()\n self.features = features\n \n for i, (in_features, out_features) in enumerate(zip(features[:-1], features[1:])):\n setattr(self, f'layer_{i}', nn.Linear(in_features, out_features))\n \n self.last_layer = nn.Linear(features[-1], 1)\n \n def forward(self, x, A):\n for i in range(len(self.features) - 1):\n if isinstance(getattr(self, f'layer_{i}'), GCN_Forward):\n x = getattr(self, f'layer_{i}')(x, A)\n else:\n x = getattr(self, f'layer_{i}')(x)\n x = nn.functional.gelu(x)\n \n x = self.last_layer(x)\n return x\n\nclass GCN(NN):\n def __init__(self, features):\n super().__init__(features)\n for i, (in_features, out_features) in enumerate(zip(features[:-1], features[1:])):\n setattr(self, f'layer_{i}', GCN_Forward(in_features, out_features))\n\n\nA = torch.tensor(kernel.K(x_train, x_train)).float()\n# A.fill_diagonal_(0)\nA = A / A.sum(dim=0, keepdim=True)\n# A.fill_diagonal_(1)\n\nnum_epochs = 500\nfeatures = [1, 1024]\n\ngcn_model = GCN(features=features)\nnn_model = NN(features=features)\n\ngcn_optimizer = torch.optim.Adam(gcn_model.parameters(), lr=0.01)\nnn_optimizer = torch.optim.Adam(nn_model.parameters(), lr=0.01)\n\ncriterion = nn.MSELoss()\n\nx_train_torch = torch.from_numpy(x_train).float()\ny_train_torch = torch.from_numpy(y_train).float()\n\ngcn_losses = []\nnn_losses = []\nfor epoch in range(num_epochs):\n gcn_optimizer.zero_grad()\n nn_optimizer.zero_grad()\n \n y_out_gcn = gcn_model(x_train_torch, A)\n y_out_nn = nn_model(x_train_torch, A)\n gcn_loss = criterion(y_out_gcn, y_train_torch)\n nn_loss = criterion(y_out_nn, y_train_torch)\n \n gcn_loss.backward()\n nn_loss.backward()\n \n gcn_losses.append(gcn_loss.item())\n nn_losses.append(nn_loss.item())\n \n gcn_optimizer.step()\n nn_optimizer.step()\n \nplt.plot(gcn_losses, label='gcn');\nplt.plot(nn_losses, label='nn');\nplt.legend();\n\n\n\n\n\nA_test = torch.tensor(kernel.K(x_test, x_test)).float()\n# A_test.fill_diagonal_(0)\nA_test = A_test / A_test.sum(dim=0, keepdim=True)\n# A_test.fill_diagonal_(1)\n\ny_pred_nn = nn_model(torch.from_numpy(x_test).float(), A_test).detach().numpy()\ny_pred_gcn = gcn_model(torch.from_numpy(x_test).float(), A_test).detach().numpy()\n\nplt.figure(figsize=(10, 6))\nplt.scatter(x_train, y_train, label='train');\nplt.plot(x_train, y_out_gcn.detach().numpy(), label='pred GCN train');\nplt.plot(x_train, y_out_nn.detach().numpy(), label='pred NN train');\nplt.plot(x_test, y_pred_gp, label='pred GP', linestyle='--');\nplt.plot(x_test, y_pred_nn, label='pred NN');\nplt.plot(x_test, y_pred_gcn, label='pred GCN');\nplt.ylim(-3, 3);\nplt.legend();" + "text": "import os\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n# turn off preallocation by JAX\nos.environ[\"XLA_PYTHON_CLIENT_PREALLOCATE\"] = \"false\"\n\nimport numpy as np\nimport pandas as pd\n\nfrom tqdm import tqdm\nimport jax\nimport jax.numpy as jnp\nimport flax.linen as nn\n\nimport distrax as dx\n\nimport optax\n\n# load mnist dataset from tensorflow datasets\nimport tensorflow_datasets as tfds\n\nfrom sklearn.model_selection import train_test_split\n\nimport matplotlib.pyplot as plt\n# define initializers\ndef first_layer_init(key, shape, dtype=jnp.float32):\n num_input = shape[0] # reverse order compared to torch\n return jax.random.uniform(key, shape, dtype, minval=-1.0/num_input, maxval=1.0/num_input)\n\ndef other_layers_init(key, shape, dtype=jnp.float32):\n num_input = shape[0] # reverse order compared to torch\n return jax.random.uniform(key, shape, dtype, minval=-np.sqrt(6 / num_input)/30, maxval=np.sqrt(6 / num_input)/30)\n\nclass Encoder(nn.Module):\n features: list\n encoding_dims: int\n\n @nn.compact\n def __call__(self, x_context, y_context):\n x = jnp.hstack([x_context, y_context.reshape(x_context.shape[0], -1)])\n \n x = nn.Dense(self.features[0], kernel_init=first_layer_init, bias_init=first_layer_init)(x)\n x = jnp.sin(30*x)\n # x = nn.Dense(self.features[0])(x)\n # x = nn.relu(x)\n \n \n for n_features in self.features[1:]:\n x = nn.Dense(n_features, kernel_init=other_layers_init, bias_init=other_layers_init)(x)\n x = jnp.sin(30*x)\n # x = nn.Dense(n_features)(x)\n # x = nn.relu(x)\n\n x = nn.Dense(self.encoding_dims)(x)\n\n representation = x.mean(axis=0, keepdims=True) # option 1\n return representation # (1, encoding_dims)\n\nclass Decoder(nn.Module):\n features: list\n output_dim: int\n\n @nn.compact\n def __call__(self, representation, x):\n representation = jnp.repeat(representation, x.shape[0], axis=0)\n x = jnp.hstack([representation, x])\n \n x = nn.Dense(self.features[0], kernel_init=first_layer_init, bias_init=first_layer_init)(x)\n x = jnp.sin(30*x)\n # x = nn.Dense(self.features[0])(x)\n # x = nn.relu(x)\n\n for n_features in self.features:\n x = nn.Dense(n_features, kernel_init=other_layers_init, bias_init=other_layers_init)(x)\n x = jnp.sin(30*x)\n # x = nn.Dense(n_features)(x)\n # x = nn.relu(x)\n\n x = nn.Dense(self.output_dim*2)(x)\n loc, raw_scale = x[:, :self.output_dim], x[:, self.output_dim:]\n scale = jnp.exp(raw_scale)\n \n return loc, scale\n\nclass CNP(nn.Module):\n encoder_features: list\n encoding_dims: int\n decoder_features: list\n output_dim: int\n\n @nn.compact\n def __call__(self, x_content, y_context, x_target):\n representation = Encoder(self.encoder_features, self.encoding_dims)(x_content, y_context)\n loc, scale = Decoder(self.decoder_features, self.output_dim)(representation, x_target)\n return loc, scale\n\n def loss_fn(self, params, x_context, y_context, x_target, y_target):\n loc, scale = self.apply(params, x_context, y_context, x_target)\n predictive_distribution = dx.MultivariateNormalDiag(loc=loc, scale_diag=0.005+scale)\n return -predictive_distribution.log_prob(y_target)" }, { - "objectID": "posts/2022-10-31-stochastic-variational-gp.html", - "href": "posts/2022-10-31-stochastic-variational-gp.html", - "title": "Stochastic Variational Gaussian processes in JAX", - "section": "", - "text": "I recently read a compact and clean explanation of SVGP in the following blog post by Dr. Martin Ingram:\nNow, I am attempting to implement a practical code from scratch for the same (What is practical about it? Sometimes math does not simply translate to code without careful modifications). I am assuming that you have read the blog post cited above before moving further. Let’s go for coding!" + "objectID": "posts/2023-05-31-CNPs_for_Images.html#load-mnist", + "href": "posts/2023-05-31-CNPs_for_Images.html#load-mnist", + "title": "Conditional Neural Processes for Image Interpolation", + "section": "Load MNIST", + "text": "Load MNIST\n\nds = tfds.load('mnist')\n\n\ndef dataset_to_arrays(dataset):\n data = []\n labels = []\n stopper = 0\n end = 100\n for sample in dataset:\n data.append(sample[\"image\"].numpy())\n labels.append(sample[\"label\"].numpy())\n stopper += 1\n if stopper == end:\n break\n return np.array(data), np.array(labels)[..., None]\n\ntrain_data, train_labels = dataset_to_arrays(ds[\"train\"])\ntest_data, test_labels = dataset_to_arrays(ds[\"test\"])\n\ntrain_data.shape, train_labels.shape, test_data.shape, test_labels.shape\n\n2023-06-02 09:58:48.609001: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.\n2023-06-02 09:58:48.681190: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.\n\n\n((100, 28, 28, 1), (100, 1), (100, 28, 28, 1), (100, 1))\n\n\n\ncoords = np.linspace(-1, 1, 28)\nx, y = np.meshgrid(coords, coords)\ntrain_X = jnp.stack([x, y], axis=-1).reshape(-1, 2)\n\ntrain_y = jax.vmap(lambda x: x.reshape(-1, 1))(train_data) / 255.0\ntrain_X.shape, train_y.shape, type(train_X), type(train_y)\n\n((784, 2),\n (100, 784, 1),\n jaxlib.xla_extension.ArrayImpl,\n jaxlib.xla_extension.ArrayImpl)\n\n\n\niterations = 10000\n\ndef loss_fn(params, context_X, context_y, target_X, target_y):\n def loss_fn_per_sample(context_X, context_y, target_X, target_y):\n loc, scale = model.apply(params, context_X, context_y, target_X)\n # predictive_distribution = dx.MultivariateNormalDiag(loc=loc, scale_diag=scale)\n # return -predictive_distribution.log_prob(target_y)\n return jnp.square(loc.ravel() - target_y.ravel()).mean()\n \n return jax.vmap(loss_fn_per_sample, in_axes=(None, 0, None, 0))(context_X, context_y, target_X, target_y).mean()\n\nvalue_and_grad_fn = jax.jit(jax.value_and_grad(loss_fn))\nmodel = CNP([256]*2, 128, [256]*4, 1)\nparams = model.init(jax.random.PRNGKey(0), train_X, train_y[0], train_X)\noptimizer = optax.adam(1e-5)\nstate = optimizer.init(params)\n\n# losses = []\n# for iter in tqdm(range(iterations)):\n# tmp_index = jax.random.permutation(jax.random.PRNGKey(iter), index)\n# context_X = train_X[tmp_index][:int(train_X.shape[0]*0.05)]\n# context_y = train_y[:, tmp_index, :][:, :int(train_X.shape[0]*0.05), :]\n# target_X = train_X[tmp_index][int(train_X.shape[0]*0.05):]\n# target_y = train_y[:, tmp_index, :][:, int(train_X.shape[0]*0.05):, :]\n \n# # print(context_X.shape, context_y.shape, target_X.shape, target_y.shape)\n# # print(loss_fn(params, context_X, context_y, target_X, target_y).shape)\n \n# loss, grads = value_and_grad_fn(params, context_X, context_y, target_X, target_y)\n# updates, state = optimizer.update(grads, state)\n# params = optax.apply_updates(params, updates)\n# losses.append(loss.item())\n\ndef one_step(params_and_state, key):\n params, state = params_and_state\n tmp_index = jax.random.permutation(key, train_X.shape[0])\n context_X = train_X[tmp_index][:int(train_X.shape[0]*0.05)]\n context_y = train_y[:, tmp_index, :][:, :int(train_X.shape[0]*0.05), :]\n target_X = train_X[tmp_index][int(train_X.shape[0]*0.05):]\n target_y = train_y[:, tmp_index, :][:, int(train_X.shape[0]*0.05):, :]\n loss, grads = value_and_grad_fn(params, context_X, context_y, target_X, target_y)\n updates, state = optimizer.update(grads, state)\n params = optax.apply_updates(params, updates)\n return (params, state), loss\n\n(params, state), loss_history = jax.lax.scan(one_step, (params, state), jax.random.split(jax.random.PRNGKey(0), iterations))\n\n\nplt.plot(loss_history[10:]);\n\n\n\n\n\ntest_key = jax.random.PRNGKey(0)\ntmp_index = jax.random.permutation(test_key, train_X.shape[0])\ncontext_X = train_X[tmp_index][:int(train_X.shape[0]*0.5)]\ncontext_y = train_y[:, tmp_index, :][:, :int(train_X.shape[0]*0.5), :]\ntarget_X = train_X#[tmp_index][int(train_X.shape[0]*0.5):]\ntarget_y = train_y#[:, tmp_index, :][:, int(train_X.shape[0]*0.5):, :]\n\nid = 91\nplt.imshow(train_y[id].reshape(28, 28), cmap=\"gray\", interpolation=None);\n\nlocs, scales = jax.vmap(model.apply, in_axes=(None, None, 0, None))(params, context_X, context_y, target_X)\n# full_preds = jnp.concatenate([context_y, locs], axis=1)\n# full_preds = full_preds.at[:, tmp_index, :].set(full_preds).__array__()\n\nplt.figure()\nplt.imshow(locs[id].reshape(28, 28), cmap=\"gray\", interpolation=None);" }, { - "objectID": "posts/2022-10-31-stochastic-variational-gp.html#imports", - "href": "posts/2022-10-31-stochastic-variational-gp.html#imports", - "title": "Stochastic Variational Gaussian processes in JAX", - "section": "Imports", - "text": "Imports\n\n# JAX\nimport jax\nfrom jax.flatten_util import ravel_pytree\nimport jax.numpy as jnp\nimport jax.scipy as jsp\n\n# Partially initialize functions\nfrom functools import partial\n\n# TFP\nimport tensorflow_probability.substrates.jax as tfp\ntfd = tfp.distributions\ntfb = tfp.bijectors\n\n# GP Kernels\nfrom tinygp import kernels\n\n# sklearn\nfrom sklearn.datasets import make_moons, make_blobs, make_circles\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.metrics import accuracy_score\n\n# Optimization\nimport optax\n\n# Plotting\nimport matplotlib.pyplot as plt\nplt.rcParams['scatter.edgecolors'] = \"k\"\n\n# Progress bar\nfrom tqdm import tqdm\n\n# Jitter\nJITTER = 1e-6\n\n# Enable JAX 64bit\njax.config.update(\"jax_enable_x64\", True)" + "objectID": "posts/2023-06-12-GNN_for_regression.html", + "href": "posts/2023-06-12-GNN_for_regression.html", + "title": "Graph Neural Networks for Regression", + "section": "", + "text": "import os\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"3\"\n\nimport GPy\n\nimport torch\nimport torch.nn as nn\n\nfrom tqdm import trange\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.model_selection import train_test_split\n\ndevice = \"cuda\"" }, { - "objectID": "posts/2022-10-31-stochastic-variational-gp.html#dataset", - "href": "posts/2022-10-31-stochastic-variational-gp.html#dataset", - "title": "Stochastic Variational Gaussian processes in JAX", - "section": "Dataset", - "text": "Dataset\nFor this blog post, we will stick to the classification problem and pick a reasonable classification dataset.\n\nn_samples = 100\nnoise = 0.1\nrandom_state = 0\nshuffle = True\n\nX, y = make_moons(\n n_samples=n_samples, random_state=random_state, noise=noise, shuffle=shuffle\n)\nX = StandardScaler().fit_transform(X) # Yes, this is useful for GPs\n\nX, y = map(jnp.array, (X, y))\n\nplt.scatter(X[:, 0], X[:, 1], c=y);\n\nWARNING:absl:No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)" + "objectID": "posts/2023-06-12-GNN_for_regression.html#create-a-synthetic-dataset", + "href": "posts/2023-06-12-GNN_for_regression.html#create-a-synthetic-dataset", + "title": "Graph Neural Networks for Regression", + "section": "Create a synthetic dataset", + "text": "Create a synthetic dataset\n\nnp.random.seed(0)\ntorch.random.manual_seed(4)\n\nN = 50\nx = np.linspace(-1, 1, N).reshape(-1, 1)\nkernel = GPy.kern.RBF(input_dim=1, variance=1, lengthscale=0.1)\ny = np.random.multivariate_normal(np.zeros(N), kernel.K(x)).reshape(-1, 1)\ny_noisy = y + np.random.normal(0, 0.1, N).reshape(-1, 1)\n\ntrain_x, test_x, train_y, test_y = train_test_split(x, y_noisy, test_size=0.4, random_state=0)\n\nplt.plot(x, y, label=\"True\");\nplt.plot(train_x, train_y, 'o', label='train')\nplt.plot(test_x, test_y, 'o', label='test')\nplt.legend();\n\nx, y, y_noisy = map(lambda x: torch.tensor(x).float().to(device), (x, y, y_noisy))\ntrain_x, test_x, train_y, test_y = map(lambda x: torch.tensor(x).float().to(device), (train_x, test_x, train_y, test_y))\nprint(x.shape, y.shape, y_noisy.shape)\n\ntorch.Size([50, 1]) torch.Size([50, 1]) torch.Size([50, 1])" }, { - "objectID": "posts/2022-10-31-stochastic-variational-gp.html#methodology", - "href": "posts/2022-10-31-stochastic-variational-gp.html#methodology", - "title": "Stochastic Variational Gaussian processes in JAX", - "section": "Methodology", - "text": "Methodology\nTo define a GP, we need a kernel function. Let us use the RBF or Exponentiated Quadratic or Squared Exponential kernel.\n\nlengthscale = 1.0\nvariance = 1.0\n\nkernel_fn = variance * kernels.ExpSquared(scale=lengthscale)\n\nkernel_fn(X, X).shape\n\n(100, 100)\n\n\nAs explained in the blog post, we want to minimize the following loss function:\n\\[\nKL[q(u|\\eta) || p(u|y, \\theta)] = KL[q(u|\\eta) || p(u | \\theta)] - \\mathbb{E}_{u \\sim q(u|\\eta)} \\log p(y | u, \\theta) + const\n\\]\nLet us break down the loss and discuss each componant.\n\nKL divergence\nIn the first term, we want to compute the KL divergence between prior and variational distribution of GP at inducing points. First, we need to define the inducing points.\n\nkey = jax.random.PRNGKey(0)\nn_inducing = 10\nn_dim = X.shape[1]\n\nX_inducing = jax.random.normal(key, shape=(n_inducing, n_dim))\nX_inducing.shape\n\n(10, 2)\n\n\nNow, defining the prior and variational distributions.\n\ngp_mean = 0.43 # a scalar parameter to train\n\nprior_mean = gp_mean * jnp.zeros(n_inducing)\nprior_cov = kernel_fn(X_inducing, X_inducing)\n\nprior_distribution = tfd.MultivariateNormalFullCovariance(prior_mean, prior_cov)\n\n\nvariational_mean = jax.random.uniform(key, shape=(n_inducing,)) # a vector parameter to train\n\nA covariance matrix can not be learned directly due to positive definite constraint. We can decompose a covariance matrix in a following way:\n\\[\n\\begin{aligned}\nK &= diag(\\boldsymbol{\\sigma})\\Sigma diag(\\boldsymbol{\\sigma})\\\\\n &= diag(\\boldsymbol{\\sigma})LL^T diag(\\boldsymbol{\\sigma})\n\\end{aligned}\n\\]\nWhere, \\(\\Sigma\\) is a correlation matrix, \\(L\\) is a lower triangular cholesky decomposition of \\(\\Sigma\\) and \\(\\boldsymbol{\\sigma}\\) is the variance vector. We can use tfb.CorrelationCholesky to generate \\(L\\) from an unconstrained vector:\n\nrandom_vector = jax.random.normal(key, shape=(3,))\ncorr_chol = tfb.CorrelationCholesky()(random_vector)\ncorrelation = corr_chol@corr_chol.T\ncorrelation\n\nDeviceArray([[ 1. , 0.54464529, -0.7835968 ],\n [ 0.54464529, 1. , -0.33059078],\n [-0.7835968 , -0.33059078, 1. ]], dtype=float64)\n\n\nTo constrain \\(\\boldsymbol{\\sigma}\\), any positivity constraint would suffice. So, combining these tricks, we can model the covariance as following:\n\nrandom_vector = jax.random.normal(\n key, shape=(n_inducing * (n_inducing - 1) // 2,)\n) # a trainable parameter\nlog_sigma = jax.random.normal(key, shape=(n_inducing, 1)) # a trainable parameter\n\n\nsigma = jnp.exp(log_sigma)\ncorr_chol = tfb.CorrelationCholesky()(random_vector)\nvariational_cov = sigma * sigma.T * (corr_chol @ corr_chol.T)\nprint(variational_cov.shape)\n\nvariational_distribution = tfd.MultivariateNormalFullCovariance(variational_mean, variational_cov\n)\n\n(10, 10)\n\n\nNow, we can compute the KL divergence:\n\nvariational_distribution.kl_divergence(prior_distribution)\n\nDeviceArray(416.89357355, dtype=float64)\n\n\n\n\nExpectation over the likelihood\nWe want to compute the following expectation:\n\\[\n-\\sum_{i=1}^N \\mathbb{E}_{f_i \\sim q(f_i | \\eta, \\theta)} \\log p(y_i| f_i, \\theta)\n\\]\nNote that, \\(p(y_i| f_i, \\theta)\\) can be any likelihood depending upon the problem, but for classification, we may use a Bernoulli likelihood.\n\nf = jax.random.normal(key, shape=y.shape)\nlikelihood_distribution = tfd.Bernoulli(logits=f)\n\nlog_likelihood = likelihood_distribution.log_prob(y).sum()\nlog_likelihood\n\nDeviceArray(-72.04665624, dtype=float64)\n\n\nWe need to sample \\(f_i\\) from \\(q(f_i | \\eta, \\theta)\\) which has the following form:\n\\[\n\\begin{aligned}\nq(u) &\\sim \\mathcal{N}(\\boldsymbol{m}, S)\\\\\nq(f_i | \\eta, \\theta) &\\sim \\mathcal{N}(\\mu_i, \\sigma_i^2)\\\\\n\\mu_i &= A\\boldsymbol{m}\\\\\n\\sigma_i^2 &= K_{ii} + A(S - K_{mm})A^T\\\\\nA &= K_{im}K_{mm}^{-1}\n\\end{aligned}\n\\]\nNote that matrix inversion is often unstable with jnp.linalg.inv and thus we will use cholesky tricks to compute \\(A\\).\n\ndef q_f(x_i):\n x_i = x_i.reshape(1, -1) # ensure correct shape\n K_im = kernel_fn(x_i, X_inducing)\n K_mm = kernel_fn(X_inducing, X_inducing)\n chol_mm = jnp.linalg.cholesky(K_mm + jnp.eye(K_mm.shape[0])*JITTER)\n A = jsp.linalg.cho_solve((chol_mm, True), K_im.T).T\n \n mu_i = A@variational_mean\n sigma_sqr_i = kernel_fn(x_i, x_i) + A@(variational_cov - prior_cov)@A.T\n \n return tfd.Normal(loc=mu_i, scale=sigma_sqr_i**0.5)\n\nHere is a function to compute log likelihood for a single data-point:\n\ndef log_likelihood(x_i, y_i, seed):\n sample = q_f(x_i).sample(seed=seed)\n log_likelihood = tfd.Bernoulli(logits=sample).log_prob(y_i)\n return log_likelihood.squeeze()\n\n\nlog_likelihood(X[0], y[0], seed=key)\n\nDeviceArray(-0.17831203, dtype=float64)\n\n\nWe can use jax.vmap to compute log_likelihood over a batch. With that, we can leverage the stochastic variational inference following section 10.3.1 (Eq. 10.108) from pml book2. Basically, in each iteration, we need to multiply the batch log likelihood with \\(\\frac{N}{B}\\) to get an unbiased minibatch approximation where \\(N\\) is size of the full dataset and \\(B\\) is the batch size.\n\nbatch_size = 10\n\nseeds = jax.random.split(key, num=batch_size)\n\nll = len(y)/batch_size * jax.vmap(log_likelihood)(X[:batch_size], y[:batch_size], seeds).sum()\nll\n\nDeviceArray(-215.46520331, dtype=float64)\n\n\nNote that, once the parameters are optimized, we can use the derivations of \\(q(f_i | \\eta, \\theta)\\) to compute the posterior distribution. We have figured out all the pieces by now so it is the time to put it togather in a single class. Some pointers to note are the following:\n\nWe define a single function get_constrained_params to transform all unconstrained parameters.\njax.lax.scan gives a huge boost to a training loop.\nThere is some repeatation of code due to lack of super code optimization. You can do it at your end if needed." + "objectID": "posts/2023-06-12-GNN_for_regression.html#fit-with-a-simple-mlp", + "href": "posts/2023-06-12-GNN_for_regression.html#fit-with-a-simple-mlp", + "title": "Graph Neural Networks for Regression", + "section": "Fit with a simple MLP", + "text": "Fit with a simple MLP\n\ndef fit(model, x, y, A=None, lr=0.01, epochs=100):\n optimizer = torch.optim.Adam(model.parameters(), lr=lr)\n loss_fn = nn.MSELoss()\n \n if A is None:\n inputs = (x,)\n else:\n inputs = (x, A)\n \n losses = []\n pbar = trange(epochs)\n for epoch in pbar:\n optimizer.zero_grad()\n y_hat = model(*inputs)\n loss = loss_fn(y_hat, y)\n losses.append(loss.item())\n pbar.set_description(f\"Epoch {epoch} Loss: {loss.item()}\")\n loss.backward()\n optimizer.step()\n \n return losses\n\nclass SimpleMLP(nn.Module):\n def __init__(self, features):\n super().__init__()\n layers = [nn.Linear(1, features[0]), nn.ReLU()]\n for in_features, out_features in zip(features, features[1:]):\n layers.append(nn.Linear(in_features, out_features))\n layers.append(nn.ReLU())\n \n layers.append(nn.Linear(features[-1], 1))\n \n self.layers = nn.Sequential(*layers)\n \n def forward(self, x):\n return self.layers(x)\n\n\ntorch.manual_seed(0)\nmodel = SimpleMLP([10, 10, 10]).to(device)\nfit(model, train_x, train_y, lr=0.01, epochs=1000);\n\npred_y = model(x)\n\n(x_, y_, train_x_, train_y_, test_x_, test_y_, pred_y_) = map(lambda x: x.cpu().detach().numpy(), (x, y, train_x, train_y, test_x, test_y, pred_y))\nplt.plot(x_, y_, label=\"True\");\nplt.plot(train_x_, train_y_, 'o', label='train')\nplt.plot(test_x_, test_y_, 'o', label='test')\nplt.plot(x_, pred_y_, label='pred')\nplt.legend();\n\nEpoch 999 Loss: 0.07143261283636093: 100%|██████████| 1000/1000 [00:02<00:00, 410.79it/s]" }, { - "objectID": "posts/2022-10-31-stochastic-variational-gp.html#all-in-one", - "href": "posts/2022-10-31-stochastic-variational-gp.html#all-in-one", - "title": "Stochastic Variational Gaussian processes in JAX", - "section": "All in one", - "text": "All in one\n\nclass SVGP:\n def __init__(self, X_inducing, data_size):\n self.X_inducing = X_inducing\n self.n_inducing = len(X_inducing)\n self.data_size = data_size\n \n def init_params(self, seed):\n variational_corr_chol_param = tfb.CorrelationCholesky().inverse(jnp.eye(self.n_inducing))\n \n dummy_params = {\"log_variance\": jnp.zeros(()),\n \"log_scale\": jnp.zeros(()), \n \"mean\": jnp.zeros(()),\n \"X_inducing\": self.X_inducing,\n \"variational_mean\": jnp.zeros(self.n_inducing),\n \"variational_corr_chol_param\": variational_corr_chol_param,\n \"log_variational_sigma\": jnp.zeros((self.n_inducing, 1)),\n }\n \n flat_params, unravel_fn = ravel_pytree(dummy_params)\n random_params = jax.random.normal(key, shape=(len(flat_params), ))\n params = unravel_fn(random_params)\n return params\n \n @staticmethod\n def get_constrained_params(params):\n return {\"mean\": params[\"mean\"],\n \"variance\": jnp.exp(params['log_variance']), \n \"scale\": jnp.exp(params['log_scale']), \n \"X_inducing\": params[\"X_inducing\"],\n \"variational_mean\": params[\"variational_mean\"],\n \"variational_corr_chol_param\": params[\"variational_corr_chol_param\"],\n \"variational_sigma\": jnp.exp(params[\"log_variational_sigma\"])}\n \n @staticmethod\n def get_q_f(params, x_i, prior_distribution, variational_distribution):\n x_i = x_i.reshape(1, -1) # ensure correct shape\n \n kernel_fn = params['variance'] * kernels.ExpSquared(scale=params[\"scale\"])\n K_im = kernel_fn(x_i, params[\"X_inducing\"])\n K_mm = prior_distribution.covariance()\n chol_mm = jnp.linalg.cholesky(K_mm)\n A = jsp.linalg.cho_solve((chol_mm, True), K_im.T).T\n\n mu_i = A@params[\"variational_mean\"]\n sigma_sqr_i = kernel_fn(x_i, x_i) + A@(variational_distribution.covariance() - K_mm)@A.T\n\n return tfd.Normal(loc=mu_i, scale=sigma_sqr_i**0.5)\n \n def get_distributions(self, params):\n kernel_fn = params['variance'] * kernels.ExpSquared(scale=params[\"scale\"])\n prior_mean = params[\"mean\"]\n prior_cov = kernel_fn(params[\"X_inducing\"], params[\"X_inducing\"]) + jnp.eye(self.n_inducing)*JITTER\n prior_distribution = tfd.MultivariateNormalFullCovariance(prior_mean, prior_cov)\n\n corr_chol = tfb.CorrelationCholesky()(params[\"variational_corr_chol_param\"])\n sigma = jnp.diag(params[\"variational_sigma\"])\n variational_cov = sigma*sigma.T*(corr_chol@corr_chol.T) + jnp.eye(self.n_inducing)*JITTER\n variational_distribution = tfd.MultivariateNormalFullCovariance(params[\"variational_mean\"], variational_cov)\n \n return prior_distribution, variational_distribution\n \n def loss_fn(self, params, X_batch, y_batch, seed):\n params = self.get_constrained_params(params)\n \n # Get distributions\n prior_distribution, variational_distribution = self.get_distributions(params)\n \n # Compute kl\n kl = variational_distribution.kl_divergence(prior_distribution)\n\n # Compute log likelihood\n def log_likelihood_fn(x_i, y_i, seed):\n q_f = self.get_q_f(params, x_i, prior_distribution, variational_distribution)\n sample = q_f.sample(seed=seed)\n log_likelihood = tfd.Bernoulli(logits=sample).log_prob(y_i)\n return log_likelihood.squeeze()\n \n seeds = jax.random.split(seed, num=len(y_batch))\n log_likelihood = jax.vmap(log_likelihood_fn)(X_batch, y_batch, seeds).sum() * self.data_size/len(y_batch)\n\n return kl - log_likelihood\n \n def fit_fn(self, X, y, init_params, optimizer, n_iters, batch_size, seed):\n state = optimizer.init(init_params)\n value_and_grad_fn = jax.value_and_grad(self.loss_fn)\n \n def one_step(params_and_state, seed):\n params, state = params_and_state\n idx = jax.random.choice(seed, self.data_size, (batch_size,), replace=False)\n X_batch, y_batch = X[idx], y[idx]\n \n seed2 = jax.random.split(seed, 1)[0]\n loss, grads = value_and_grad_fn(params, X_batch, y_batch, seed2)\n updates, state = optimizer.update(grads, state)\n params = optax.apply_updates(params, updates)\n return (params, state), (loss, params)\n \n seeds = jax.random.split(seed, num=n_iters)\n (best_params, _), (loss_history, params_history) = jax.lax.scan(one_step, (init_params, state), xs=seeds)\n return best_params, loss_history, params_history\n\n def predict_fn(self, params, X_new):\n constrained_params = self.get_constrained_params(params)\n prior_distribution, variational_distribution = self.get_distributions(constrained_params)\n \n def _predict_fn(x_i): \n # Get posterior\n q_f = self.get_q_f(constrained_params, x_i, prior_distribution, variational_distribution)\n return q_f.mean().squeeze(), q_f.variance().squeeze()\n \n mean, var = jax.vmap(_predict_fn)(X_new)\n return mean.squeeze(), var.squeeze()" + "objectID": "posts/2023-06-12-GNN_for_regression.html#create-a-gcn-layer", + "href": "posts/2023-06-12-GNN_for_regression.html#create-a-gcn-layer", + "title": "Graph Neural Networks for Regression", + "section": "Create a GCN layer", + "text": "Create a GCN layer\n\nclass GCNLayer(nn.Module):\n def __init__(self, in_features, out_features):\n super().__init__()\n self.linear = nn.Linear(in_features, out_features)\n \n def forward(self, x, A): \n return self.linear(A @ x)\n \n \nclass GCN(nn.Module):\n def __init__(self, features):\n super().__init__()\n layers = [GCNLayer(1, features[0]), nn.ReLU()]\n for in_features, out_features in zip(features, features[1:]):\n layers.append(GCNLayer(in_features, out_features))\n layers.append(nn.ReLU())\n \n layers.append(nn.Linear(features[-1], 1))\n self.layers = nn.Sequential(*layers)\n \n def forward(self, x, A):\n for layer in self.layers:\n if isinstance(layer, GCNLayer):\n x = layer(x, A)\n else:\n x = layer(x)\n return x\n \ndef get_eucledean_A(x, exponent):\n d = ((x - x.T)**2)**0.5\n d = torch.where(d==0, torch.min(d[d!=0])/2, d) # self distance is 0, so replace it with half of the min distance\n A = 1/(d**exponent)\n return A/A.sum(dim=1, keepdim=True)\n\ndef get_KNN_A(x, k):\n d = torch.abs(x - x.T)\n A = torch.zeros_like(d)\n _, indices = torch.topk(d, k, dim=1, largest=False)\n for i, index in enumerate(indices):\n A[i, index] = 1\n return A/A.sum(dim=1, keepdim=True)\n\ndef fit_and_plot(title):\n model = GCN([10, 10, 10]).to(device)\n losses = fit(model, train_x, train_y, A=A_train, lr=0.001, epochs=3000);\n\n pred_y = model(x, A_all)\n\n fig, ax = plt.subplots(1, 2, figsize=(12, 4))\n axes = ax[0]\n axes.plot(losses)\n axes.set_title(\"Losses\")\n\n (x_, y_, train_x_, train_y_, test_x_, test_y_, pred_y_) = map(lambda x: x.cpu().detach().numpy(), (x, y, train_x, train_y, test_x, test_y, pred_y))\n axes = ax[1]\n axes.plot(x_, y_, label=\"True\");\n axes.plot(train_x_, train_y_, 'o', label='train')\n axes.plot(test_x_, test_y_, 'o', label='test')\n axes.plot(x_, pred_y_, label='pred')\n axes.set_title(title)\n axes.legend();" }, { - "objectID": "posts/2022-10-31-stochastic-variational-gp.html#train-and-predict", - "href": "posts/2022-10-31-stochastic-variational-gp.html#train-and-predict", - "title": "Stochastic Variational Gaussian processes in JAX", - "section": "Train and predict", - "text": "Train and predict\n\nn_inducing = 20\nn_epochs = 100\nbatch_size = 10\ndata_size = len(y)\nn_iters = n_epochs*(data_size/batch_size)\nn_iters\n\n1000.0\n\n\n\nkey = jax.random.PRNGKey(0)\nkey2, subkey = jax.random.split(key)\noptimizer = optax.adam(learning_rate=0.01)\n\nX_inducing = jax.random.choice(key, X, (n_inducing,), replace=False)\nmodel = SVGP(X_inducing, data_size)\n\ninit_params = model.init_params(key2)\n\nmodel.loss_fn(init_params, X, y, key)\nbest_params, loss_history, params_history = model.fit_fn(X, y, init_params, optimizer, n_iters, batch_size, subkey)\n\nplt.figure()\nplt.plot(loss_history);\nplt.title(\"Loss\");\n\n\n\n\n\nx = jnp.linspace(-3.5, 3.5, 100)\nseed = jax.random.PRNGKey(123)\n\nX1, X2 = jnp.meshgrid(x, x)\nf = lambda x1, x2: model.predict_fn(best_params, jnp.array([x1, x2]).reshape(1, -1))\npred_mean, pred_var = jax.vmap(jax.vmap(f))(X1, X2)\nlogits = tfd.Normal(pred_mean, pred_var**0.5).sample(seed=seed, sample_shape=(10000,))\nproba = jax.nn.sigmoid(logits)\n\nproba_mean = proba.mean(axis=0)\nproba_std2 = proba.std(axis=0)*2\n\n\nfig, ax = plt.subplots(1, 2, figsize=(12,4))\ncplot1 = ax[0].contourf(X1, X2, proba_mean.squeeze(), alpha=0.5, levels=20)\nplt.colorbar(cplot1, ax=ax[0])\n\ncplot2 = ax[1].contourf(X1, X2, proba_std2.squeeze(), alpha=0.5, levels=20)\nplt.colorbar(cplot2, ax=ax[1])\n\nax[0].scatter(X[:, 0], X[:, 1], c=y);\nax[1].scatter(X[:, 0], X[:, 1], c=y);\n\nax[0].set_title(\"Posterior $\\mu$\");\nax[1].set_title(\"Posterior $\\mu \\pm 2*\\sigma$\");" + "objectID": "posts/2023-06-12-GNN_for_regression.html#idw-setting", + "href": "posts/2023-06-12-GNN_for_regression.html#idw-setting", + "title": "Graph Neural Networks for Regression", + "section": "IDW setting", + "text": "IDW setting\n\nexponent = 1\nA_train = get_eucledean_A(train_x, exponent).to(device)\nA_all = get_eucledean_A(x, exponent).to(device)\ntitle = f\"Distance based adjacency matrix with exponent {exponent}\"\n\nfit_and_plot(title)\n\nEpoch 2999 Loss: 0.05447980388998985: 100%|██████████| 3000/3000 [00:07<00:00, 390.93it/s] \n\n\n\n\n\n\nexponent = 2\nA_train = get_eucledean_A(train_x, exponent).to(device)\nA_all = get_eucledean_A(x, exponent).to(device)\ntitle = f\"Distance based adjacency matrix with exponent {exponent}\"\n\nfit_and_plot(title)\n\nEpoch 2999 Loss: 0.06475391983985901: 100%|██████████| 3000/3000 [00:07<00:00, 413.49it/s]\n\n\n\n\n\n\nexponent = 3\nA_train = get_eucledean_A(train_x, exponent).to(device)\nA_all = get_eucledean_A(x, exponent).to(device)\ntitle = f\"Distance based adjacency matrix with exponent {exponent}\"\n\nfit_and_plot(title)\n\nEpoch 2999 Loss: 0.043554823845624924: 100%|██████████| 3000/3000 [00:08<00:00, 367.28it/s]" }, { - "objectID": "posts/2022-10-31-stochastic-variational-gp.html#some-more-datasets", - "href": "posts/2022-10-31-stochastic-variational-gp.html#some-more-datasets", - "title": "Stochastic Variational Gaussian processes in JAX", - "section": "Some more datasets", - "text": "Some more datasets\n\ndef fit_and_plot(X, y):\n X = StandardScaler().fit_transform(X) # Yes, this is useful for GPs\n X, y = map(jnp.array, (X, y))\n\n X_inducing = jax.random.choice(key, X, (n_inducing,), replace=False)\n model = SVGP(X_inducing, data_size)\n\n init_params = model.init_params(key2)\n\n model.loss_fn(init_params, X, y, key)\n best_params, loss_history, params_history = model.fit_fn(X, y, init_params, optimizer, n_iters, batch_size, subkey)\n\n plt.figure()\n plt.plot(loss_history);\n plt.title(\"Loss\");\n \n f = lambda x1, x2: model.predict_fn(best_params, jnp.array([x1, x2]).reshape(1, -1))\n pred_mean, pred_var = jax.vmap(jax.vmap(f))(X1, X2)\n logits = tfd.Normal(pred_mean, pred_var**0.5).sample(seed=seed, sample_shape=(10000,))\n proba = jax.nn.sigmoid(logits)\n\n proba_mean = proba.mean(axis=0)\n proba_std2 = proba.std(axis=0)*2\n \n fig, ax = plt.subplots(1, 2, figsize=(12,4))\n cplot1 = ax[0].contourf(X1, X2, proba_mean.squeeze(), alpha=0.5, levels=20)\n plt.colorbar(cplot1, ax=ax[0])\n\n cplot2 = ax[1].contourf(X1, X2, proba_std2.squeeze(), alpha=0.5, levels=20)\n plt.colorbar(cplot2, ax=ax[1])\n\n ax[0].scatter(X[:, 0], X[:, 1], c=y);\n ax[1].scatter(X[:, 0], X[:, 1], c=y);\n\n ax[0].set_title(\"Posterior $\\mu$\");\n ax[1].set_title(\"Posterior $\\mu \\pm 2*\\sigma$\");\n\n\nmake_blobs\n\nX, y = make_blobs(n_samples=n_samples, random_state=random_state, centers=2)\n\nplt.scatter(X[:, 0], X[:, 1], c=y);\nfit_and_plot(X, y)\n\n\n\n\n\n\n\n\n\n\n\n\nmake_circles\n\nX, y = make_circles(n_samples=n_samples, random_state=random_state, noise=noise, factor=0.1)\n\nplt.scatter(X[:, 0], X[:, 1], c=y);\nfit_and_plot(X, y)" + "objectID": "posts/2023-06-12-GNN_for_regression.html#knn-setting", + "href": "posts/2023-06-12-GNN_for_regression.html#knn-setting", + "title": "Graph Neural Networks for Regression", + "section": "KNN Setting", + "text": "KNN Setting\n\nK = 1\nA_train = get_KNN_A(train_x, K).to(device)\nA_all = get_KNN_A(x, K).to(device)\ntitle = f\"KNN based adjacency matrix with K={K}\"\n\nfit_and_plot(title)\n\nEpoch 2999 Loss: 0.04107221961021423: 100%|██████████| 3000/3000 [00:07<00:00, 383.88it/s] \n\n\n\n\n\n\nK = 3\nA_train = get_KNN_A(train_x, K).to(device)\nA_all = get_KNN_A(x, K).to(device)\ntitle = f\"KNN based adjacency matrix with K={K}\"\n\nfit_and_plot(title)\n\nEpoch 2999 Loss: 0.14372628927230835: 100%|██████████| 3000/3000 [00:07<00:00, 404.74it/s]\n\n\n\n\n\n\nK = 7\nA_train = get_KNN_A(train_x, K).to(device)\nA_all = get_KNN_A(x, K).to(device)\ntitle = f\"KNN based adjacency matrix with K={K}\"\n\nfit_and_plot(title)\n\nEpoch 2999 Loss: 0.13950258493423462: 100%|██████████| 3000/3000 [00:07<00:00, 381.66it/s]\n\n\n\n\n\n\nK = 15\nA_train = get_KNN_A(train_x, K).to(device)\nA_all = get_KNN_A(x, K).to(device)\ntitle = f\"KNN based adjacency matrix with K={K}\"\n\nfit_and_plot(title)\n\nEpoch 2999 Loss: 0.33879855275154114: 100%|██████████| 3000/3000 [00:07<00:00, 376.56it/s]" + }, + { + "objectID": "about.html", + "href": "about.html", + "title": "About", + "section": "", + "text": "Hi, I am Zeel. This is my blog, where I add coding + other resources related to my research. Head over to this page for my personal website." + }, + { + "objectID": "index.html", + "href": "index.html", + "title": "blog", + "section": "", + "text": "Learnings from the Brick Kiln Project\n\n\n\n\n\n\n\nML\n\n\n\n\nLearnings from the Brick Kiln Project\n\n\n\n\n\n\nNov 28, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nData Handling for Large Scale ML\n\n\n\n\n\n\n\nML\n\n\n\n\nAn exploratory analysis of various dataset handling processes to optimize memory, diskspace and speed.\n\n\n\n\n\n\nSep 30, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nBayesian Gaussian Basis Regression\n\n\n\n\n\n\n\nML\n\n\n\n\nBayesian Gaussian Basis Regression\n\n\n\n\n\n\nAug 31, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nDownload low-cost data from OpenAQ\n\n\n\n\n\n\n\nML, GP\n\n\n\n\nA guide to download low-cost sensor data from OpenAQ\n\n\n\n\n\n\nJul 26, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nClimate Modeling with GPs\n\n\n\n\n\n\n\nML\n\n\n\n\nExploring the use of GPs for climate modeling\n\n\n\n\n\n\nJul 4, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nMulti-class classification with Gaussian Processes\n\n\n\n\n\n\n\nML, GP\n\n\n\n\nMulti-class GP classification with different strategies\n\n\n\n\n\n\nJul 4, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nClimate Modeling with SIRENs\n\n\n\n\n\n\n\nML\n\n\n\n\nExploring the use of SIRENs for climate modeling\n\n\n\n\n\n\nJul 1, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nGNNs and GPs\n\n\n\n\n\n\n\nML\n\n\n\n\nExploring similarities between GNNs and GPs\n\n\n\n\n\n\nJun 23, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nGraph Neural Networks for Regression\n\n\n\n\n\n\n\nML\n\n\n\n\nChallenges in using GNNs for regression using various strategies\n\n\n\n\n\n\nJun 12, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nBasis functions\n\n\n\n\n\n\n\nML\n\n\n\n\nExploring basis functions\n\n\n\n\n\n\nJun 12, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nConditional Neural Processes for Image Interpolation\n\n\n\n\n\n\n\nML\n\n\n\n\nExtreme Image Interpolation with Conditional Neural processes\n\n\n\n\n\n\nMay 31, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nPasswordless SSH setup for MacOS Hosts\n\n\n\n\n\n\n\nmacOS\n\n\n\n\nA tiny handbook to setup passwordless ssh in MacOS\n\n\n\n\n\n\nMay 14, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nSine Combination Networks\n\n\n\n\n\n\n\nML\n\n\n\n\nChallenges in fitting to a combination of sine waves\n\n\n\n\n\n\nApr 29, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nNeural Network Gaussian Process\n\n\n\n\n\n\n\nGP\n\n\nML\n\n\n\n\nExploring NTK kernels + GPJax with toy datasets\n\n\n\n\n\n\nMar 28, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nStochastic Variational Gaussian processes in JAX\n\n\n\n\n\n\n\nGP\n\n\n\n\nA practical implementation of Hensman et al. 2015 from scratch in JAX\n\n\n\n\n\n\nOct 31, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nMulti-Output Gaussian Processes\n\n\n\n\n\n\n\nML\n\n\n\n\nExploring MOGPs from scratch\n\n\n\n\n\n\nOct 27, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nGaussian Processes - A no-skip-math version\n\n\n\n\n\n\n\nML\n\n\n\n\nEnd-to-end math derivations for Gaussian process regression and classification\n\n\n\n\n\n\nOct 21, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nTrain NN with KFAC-Laplace in JAX\n\n\n\n\n\n\n\nML\n\n\n\n\nExploring KFAC-Laplace approximation on simple problems in JAX\n\n\n\n\n\n\nOct 18, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nConditional Neural Processes in JAX\n\n\n\n\n\n\n\nML\n\n\n\n\nImplementing conditional neural processes from scratch in JAX\n\n\n\n\n\n\nAug 1, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nJAX Optimizers\n\n\n\n\n\n\n\nML\n\n\n\n\nPros and cons of several jax optimizers.\n\n\n\n\n\n\nJun 10, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nGet a list of contributors from a repo\n\n\n\n\n\n\n\nGitHub\n\n\n\n\nGet contributors’ list using GitHub API and pandas\n\n\n\n\n\n\nMay 17, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nIteratively reweighted least squares (IRLS) logistic regression\n\n\n\n\n\n\n\nML\n\n\n\n\nImplementation of IRLS from Probabilistic ML book of Dr. Kevin Murphy and its comparison with naive second order implementation.\n\n\n\n\n\n\nMay 14, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nGcloud cheatsheet\n\n\n\n\n\n\n\nGcloud\n\n\n\n\nMost used commands while working with gcloud\n\n\n\n\n\n\nApr 9, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nGitHub Contrubuting FAQs\n\n\n\n\n\n\n\nGitHub\n\n\n\n\nThis is a collection of FAQs/road-blocks/queries/issues I had over the past 2 years of engagement with GitHub.\n\n\n\n\n\n\nApr 6, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nTorch essentials\n\n\n\n\n\n\n\nML\n\n\n\n\nPractical and direct introduction to PyTorch\n\n\n\n\n\n\nMar 8, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nProbabilistic Machine Learning\n\n\n\n\n\n\n\nML\n\n\n\n\nA video lecture series from Prof. Philipp Hennig\n\n\n\n\n\n\nMar 6, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nUncertainty in Deep Learning\n\n\n\n\n\n\n\nML\n\n\n\n\nReview of PhD thesis of Dr. Yarin Gal\n\n\n\n\n\n\nMar 5, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nPyTorch Tips\n\n\n\n\n\n\n\nML\n\n\n\n\nPyTorch zen tips\n\n\n\n\n\n\nFeb 25, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nConference Presentation Tips\n\n\n\n\n\n\n\nAcademic\n\n\n\n\nConference Presentation Tips\n\n\n\n\n\n\nJan 29, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nComparing Gaussian Process Regression Frameworks\n\n\n\n\n\n\n\nML\n\n\n\n\nA basic comparison among GPy, GPyTorch and TinyGP\n\n\n\n\n\n\nJan 25, 2022\n\n\nZeel B Patel, Harsh Patel, Shivam Sahni\n\n\n\n\n\n\n \n\n\n\n\nQuery by Committee\n\n\n\n\n\n\n\nML\n\n\n\n\nA programming introduction to QBC with Random Forest Classifier.\n\n\n\n\n\n\nJan 24, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nKL divergence v/s cross-entropy\n\n\n\n\n\n\n\nML\n\n\n\n\nUnderstanding KL divergence\n\n\n\n\n\n\nJan 20, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nWhy .py files are better than .ipynb files for ML codebase\n\n\n\n\n\n\n\nPython\n\n\n\n\nWhere .py files are better than .ipynb files?\n\n\n\n\n\n\nJan 15, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nAnonymization tips for double-blind submission\n\n\n\n\n\n\n\nAcademic\n\n\n\n\nA last-minute help list\n\n\n\n\n\n\nOct 26, 2021\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nInput Warped GPs - A failed idea\n\n\n\n\n\n\n\nML\n\n\n\n\nAn idea of input warping GPs\n\n\n\n\n\n\nOct 23, 2021\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nSparseGPs in Stheno\n\n\n\n\n\nA simple demo of sparse regression in stheno with VFE and FITC methods.\n\n\n\n\n\n\nOct 12, 2021\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nDocker Cheatsheet\n\n\n\n\n\n\n\nDocker\n\n\n\n\nMost used command while working with Docker\n\n\n\n\n\n\nSep 28, 2021\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nHow to apply constraint on parameters in various GP libraries\n\n\n\n\n\nApply constraints in GPy, GPFlow, GPyTorch\n\n\n\n\n\n\nSep 27, 2021\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nUnderstanding Kernels in Gaussian Processes\n\n\n\n\n\n\n\nML\n\n\n\n\nAn exploratory analysis of kernels in GPs\n\n\n\n\n\n\nMar 22, 2021\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nProgramatically download OpenAQ data\n\n\n\n\n\nProgramatically download OpenAQ data\n\n\n\n\n\n\nSep 21, 2020\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nActive Learning with Bayesian Linear Regression\n\n\n\n\n\n\n\nML\n\n\n\n\nA programming introduction to Active Learning with Bayesian Linear Regression.\n\n\n\n\n\n\nMar 28, 2020\n\n\nZeel B Patel, Nipun Batra\n\n\n\n\n\n\nNo matching items" + }, + { + "objectID": "posts/2023-07-01-climate-modeling-with-siren.html", + "href": "posts/2023-07-01-climate-modeling-with-siren.html", + "title": "Climate Modeling with SIRENs", + "section": "", + "text": "import os\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"\n\nimport pyproj\nimport numpy as np\nimport xarray as xr\n\nimport tensorflow as tf\nfrom tensorflow.keras import layers, initializers, activations\nfrom tensorflow.keras.applications.resnet50 import ResNet50\n\nimport matplotlib.pyplot as plt\n\n2023-07-05 10:53:19.821615: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\nTo enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n2023-07-05 10:53:20.503492: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n\n\n\ndef SIREN(input_dim, output_dim, features, activation_scale, dropout):\n first_init = lambda input_dim: initializers.RandomUniform(-1 / input_dim, 1 / input_dim)\n other_init = lambda input_dim: initializers.RandomUniform(-np.sqrt(6 / input_dim) / activation_scale, np.sqrt(6 / input_dim) / activation_scale)\n model = tf.keras.Sequential()\n model.add(layers.Dense(features[0], input_shape=(input_dim,), kernel_initializer=first_init(input_dim), activation=tf.sin))\n for i in range(1, len(features)):\n model.add(layers.Dense(features[i], kernel_initializer=other_init(features[i-1]), activation=tf.sin))\n model.add(layers.Dropout(dropout))\n model.add(layers.Dense(output_dim, kernel_initializer=other_init(features[-1]), activation='linear'))\n return model\n\ndef MLP(input_dim, output_dim, features, dropout):\n model = tf.keras.Sequential()\n model.add(layers.Dense(features[0], input_shape=(input_dim,), activation=activations.relu))\n for i in range(1, len(features)):\n model.add(layers.Dense(features[i], activation=activations.relu))\n model.add(layers.Dropout(dropout))\n model.add(layers.Dense(output_dim, activation='linear'))\n return model\n \ndef ResNet():\n resnet = ResNet50(include_top=False, weights=None, input_shape=(64, 32, 1), pooling='avg')\n model = tf.keras.Sequential()\n model.add(resnet)\n model.add(layers.Dense(2048, activation='relu'))\n model.add(layers.Dense(32768, activation='linear'))\n return model\n\n\ndata5 = xr.open_dataset(\"../data/2m_temperature_2018_5.625deg_Jan.nc\").to_dataframe().reset_index()\ndata1 = xr.open_dataset(\"../data/2m_temperature_2018_1.40625deg_Jan.nc\").to_dataframe().reset_index()\n\n\ndata5.head()\n\n\n\n\n\n\n\n\nlon\nlat\ntime\nt2m\n\n\n\n\n0\n0.0\n-87.1875\n2018-01-01 00:00:00\n250.728180\n\n\n1\n0.0\n-87.1875\n2018-01-01 01:00:00\n250.468552\n\n\n2\n0.0\n-87.1875\n2018-01-01 02:00:00\n250.250931\n\n\n3\n0.0\n-87.1875\n2018-01-01 03:00:00\n250.040314\n\n\n4\n0.0\n-87.1875\n2018-01-01 04:00:00\n249.993790\n\n\n\n\n\n\n\n\ntime_stamp = \"2018-01-01 01:00:00\"\ntrain_df = data5[data5.time == time_stamp]\ntest_df = data1[data1.time == time_stamp]\n\nX = np.stack([train_df.lat.values, train_df.lon.values], axis=1)\ny = train_df[[\"t2m\"]].values\nprint(f\"{X.shape=}, {y.shape=}\")\n\nX_test = np.stack([test_df.lat.values, test_df.lon.values], axis=1)\ny_test = test_df[[\"t2m\"]].values\nprint(f\"{X_test.shape=}, {y_test.shape=}\")\n\n# rff = np.random.normal(size=(2, 16)) * 0.01\n# X = np.concatenate([np.sin(X @ rff), np.cos(X @ rff)], axis=1)\n# print(f\"{sin_cos.shape=}\")\n# X = X @ sin_cos\n# X_test = np.concatenate([np.sin(X_test @ rff), np.cos(X_test @ rff)], axis=1)\n\nprint(f\"{X.shape=}, {X_test.shape=}\")\n\nX.shape=(2048, 2), y.shape=(2048, 1)\nX_test.shape=(32768, 2), y_test.shape=(32768, 1)\nX.shape=(2048, 2), X_test.shape=(32768, 2)\n\n\n\nX_max = np.max(X, axis=0, keepdims=True)\nX_min = np.min(X, axis=0, keepdims=True)\n\nX_scaled = (X - X_min) / (X_max - X_min)\nX_test_scaled = (X_test - X_min) / (X_max - X_min)\n\ny_min = np.min(y, axis=0, keepdims=True)\ny_max = np.max(y, axis=0, keepdims=True)\n\ny_scaled = (y - y_min) / (y_max - y_min)\n\n# y_mean = np.mean(y, axis=0, keepdims=True)\n# y_std = np.std(y, axis=0, keepdims=True)\n\n# y_scaled = (y - y_mean) / y_std\n\n\n# model = SIREN(2, 1, [256]*4, 30.0, 0.0)\nmodel = MLP(2, 1, [256]*4, 0.0)\n# model = ResNet()\nmodel.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), loss='mse')\n\n2023-07-05 10:53:35.788674: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 78915 MB memory: -> device: 0, name: NVIDIA A100-SXM4-80GB, pci bus id: 0000:41:00.0, compute capability: 8.0\n\n\n\nhistory = model.fit(X_scaled, y_scaled, epochs=5000, batch_size=X_scaled.shape[0], verbose=0)\n\n2023-07-05 10:53:41.293413: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n2023-07-05 10:53:41.295380: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x7fda0dc018f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n2023-07-05 10:53:41.295395: I tensorflow/compiler/xla/service/service.cc:177] StreamExecutor device (0): NVIDIA A100-SXM4-80GB, Compute Capability 8.0\n2023-07-05 10:53:41.300089: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n2023-07-05 10:53:41.443317: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8600\n2023-07-05 10:53:41.587834: I ./tensorflow/compiler/jit/device_compiler.h:180] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n\n\n\nplt.plot(history.history['loss']);\n\n\n\n\n\ny_pred = model.predict(X_test_scaled) * (y_max - y_min) + y_min\nplt.imshow(y_pred.reshape(256, 128), origin='lower', extent=[-180, 180, -90, 90], cmap='coolwarm', interpolation=\"none\");\n\n1024/1024 [==============================] - 2s 1ms/step\n\n\n\n\n\n\nplt.imshow(y.reshape(64, 32), origin='lower', extent=[-180, 180, -90, 90], cmap='coolwarm', interpolation=\"none\");\n\n\n\n\n\ndiff = y_pred.reshape(256, 128) - y_test.reshape(256, 128)\nplt.imshow(diff, origin='lower', extent=[-180, 180, -90, 90], cmap='coolwarm', interpolation=\"none\");\nplt.colorbar();\nplt.title(\"Diff\")\n\nText(0.5, 1.0, 'Diff')\n\n\n\n\n\n\n# rmse = np.sqrt(np.mean(np.abs(X_test[:, 0:1])*(y_pred.ravel() - y_test.ravel())**2))/np.mean(y_test.ravel() * np.abs(X_test[:, 0:1]))\nrmse = np.sqrt(np.mean((y_pred.ravel() - y_test.ravel())**2))\nprint(f\"{rmse=}\")\n\nrmse=2.781035\n\n\n\nmean_bias = np.mean(y_pred.ravel() - y_test.ravel())\nprint(f\"{mean_bias=}\")\n\nmean_bias=-0.22927472" }, { "objectID": "posts/2023-04-29-sine-combination-netowrks.html", @@ -616,221 +630,221 @@ "text": "Plot loss surface\n\nw1 = jnp.linspace(0, 3, 100)\nw2 = jnp.linspace(0, 3, 100)\nW1, W2 = jnp.meshgrid(w1, w2)\nloss = jax.vmap(jax.vmap(lambda w1, w2: loss_fn((w1, w2), x, y)))(W1, W2)\n\n# plot the loss surface in 3D\nfig = plt.figure(figsize=(8, 6))\nax = fig.add_subplot(111, projection='3d')\nax.plot_surface(W1, W2, loss, cmap=\"viridis\", alpha=0.9);\nax.set_xlabel(\"w1\");\nax.set_ylabel(\"w2\");\n# top view\nax.view_init(30, 45)" }, { - "objectID": "posts/2023-07-01-climate-modeling-with-siren.html", - "href": "posts/2023-07-01-climate-modeling-with-siren.html", - "title": "Climate Modeling with SIRENs", + "objectID": "posts/2022-10-31-stochastic-variational-gp.html", + "href": "posts/2022-10-31-stochastic-variational-gp.html", + "title": "Stochastic Variational Gaussian processes in JAX", "section": "", - "text": "import os\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"\n\nimport pyproj\nimport numpy as np\nimport xarray as xr\n\nimport tensorflow as tf\nfrom tensorflow.keras import layers, initializers, activations\nfrom tensorflow.keras.applications.resnet50 import ResNet50\n\nimport matplotlib.pyplot as plt\n\n2023-07-05 10:53:19.821615: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\nTo enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n2023-07-05 10:53:20.503492: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n\n\n\ndef SIREN(input_dim, output_dim, features, activation_scale, dropout):\n first_init = lambda input_dim: initializers.RandomUniform(-1 / input_dim, 1 / input_dim)\n other_init = lambda input_dim: initializers.RandomUniform(-np.sqrt(6 / input_dim) / activation_scale, np.sqrt(6 / input_dim) / activation_scale)\n model = tf.keras.Sequential()\n model.add(layers.Dense(features[0], input_shape=(input_dim,), kernel_initializer=first_init(input_dim), activation=tf.sin))\n for i in range(1, len(features)):\n model.add(layers.Dense(features[i], kernel_initializer=other_init(features[i-1]), activation=tf.sin))\n model.add(layers.Dropout(dropout))\n model.add(layers.Dense(output_dim, kernel_initializer=other_init(features[-1]), activation='linear'))\n return model\n\ndef MLP(input_dim, output_dim, features, dropout):\n model = tf.keras.Sequential()\n model.add(layers.Dense(features[0], input_shape=(input_dim,), activation=activations.relu))\n for i in range(1, len(features)):\n model.add(layers.Dense(features[i], activation=activations.relu))\n model.add(layers.Dropout(dropout))\n model.add(layers.Dense(output_dim, activation='linear'))\n return model\n \ndef ResNet():\n resnet = ResNet50(include_top=False, weights=None, input_shape=(64, 32, 1), pooling='avg')\n model = tf.keras.Sequential()\n model.add(resnet)\n model.add(layers.Dense(2048, activation='relu'))\n model.add(layers.Dense(32768, activation='linear'))\n return model\n\n\ndata5 = xr.open_dataset(\"../data/2m_temperature_2018_5.625deg_Jan.nc\").to_dataframe().reset_index()\ndata1 = xr.open_dataset(\"../data/2m_temperature_2018_1.40625deg_Jan.nc\").to_dataframe().reset_index()\n\n\ndata5.head()\n\n\n\n\n\n\n\n\nlon\nlat\ntime\nt2m\n\n\n\n\n0\n0.0\n-87.1875\n2018-01-01 00:00:00\n250.728180\n\n\n1\n0.0\n-87.1875\n2018-01-01 01:00:00\n250.468552\n\n\n2\n0.0\n-87.1875\n2018-01-01 02:00:00\n250.250931\n\n\n3\n0.0\n-87.1875\n2018-01-01 03:00:00\n250.040314\n\n\n4\n0.0\n-87.1875\n2018-01-01 04:00:00\n249.993790\n\n\n\n\n\n\n\n\ntime_stamp = \"2018-01-01 01:00:00\"\ntrain_df = data5[data5.time == time_stamp]\ntest_df = data1[data1.time == time_stamp]\n\nX = np.stack([train_df.lat.values, train_df.lon.values], axis=1)\ny = train_df[[\"t2m\"]].values\nprint(f\"{X.shape=}, {y.shape=}\")\n\nX_test = np.stack([test_df.lat.values, test_df.lon.values], axis=1)\ny_test = test_df[[\"t2m\"]].values\nprint(f\"{X_test.shape=}, {y_test.shape=}\")\n\n# rff = np.random.normal(size=(2, 16)) * 0.01\n# X = np.concatenate([np.sin(X @ rff), np.cos(X @ rff)], axis=1)\n# print(f\"{sin_cos.shape=}\")\n# X = X @ sin_cos\n# X_test = np.concatenate([np.sin(X_test @ rff), np.cos(X_test @ rff)], axis=1)\n\nprint(f\"{X.shape=}, {X_test.shape=}\")\n\nX.shape=(2048, 2), y.shape=(2048, 1)\nX_test.shape=(32768, 2), y_test.shape=(32768, 1)\nX.shape=(2048, 2), X_test.shape=(32768, 2)\n\n\n\nX_max = np.max(X, axis=0, keepdims=True)\nX_min = np.min(X, axis=0, keepdims=True)\n\nX_scaled = (X - X_min) / (X_max - X_min)\nX_test_scaled = (X_test - X_min) / (X_max - X_min)\n\ny_min = np.min(y, axis=0, keepdims=True)\ny_max = np.max(y, axis=0, keepdims=True)\n\ny_scaled = (y - y_min) / (y_max - y_min)\n\n# y_mean = np.mean(y, axis=0, keepdims=True)\n# y_std = np.std(y, axis=0, keepdims=True)\n\n# y_scaled = (y - y_mean) / y_std\n\n\n# model = SIREN(2, 1, [256]*4, 30.0, 0.0)\nmodel = MLP(2, 1, [256]*4, 0.0)\n# model = ResNet()\nmodel.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), loss='mse')\n\n2023-07-05 10:53:35.788674: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 78915 MB memory: -> device: 0, name: NVIDIA A100-SXM4-80GB, pci bus id: 0000:41:00.0, compute capability: 8.0\n\n\n\nhistory = model.fit(X_scaled, y_scaled, epochs=5000, batch_size=X_scaled.shape[0], verbose=0)\n\n2023-07-05 10:53:41.293413: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n2023-07-05 10:53:41.295380: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x7fda0dc018f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n2023-07-05 10:53:41.295395: I tensorflow/compiler/xla/service/service.cc:177] StreamExecutor device (0): NVIDIA A100-SXM4-80GB, Compute Capability 8.0\n2023-07-05 10:53:41.300089: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n2023-07-05 10:53:41.443317: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8600\n2023-07-05 10:53:41.587834: I ./tensorflow/compiler/jit/device_compiler.h:180] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n\n\n\nplt.plot(history.history['loss']);\n\n\n\n\n\ny_pred = model.predict(X_test_scaled) * (y_max - y_min) + y_min\nplt.imshow(y_pred.reshape(256, 128), origin='lower', extent=[-180, 180, -90, 90], cmap='coolwarm', interpolation=\"none\");\n\n1024/1024 [==============================] - 2s 1ms/step\n\n\n\n\n\n\nplt.imshow(y.reshape(64, 32), origin='lower', extent=[-180, 180, -90, 90], cmap='coolwarm', interpolation=\"none\");\n\n\n\n\n\ndiff = y_pred.reshape(256, 128) - y_test.reshape(256, 128)\nplt.imshow(diff, origin='lower', extent=[-180, 180, -90, 90], cmap='coolwarm', interpolation=\"none\");\nplt.colorbar();\nplt.title(\"Diff\")\n\nText(0.5, 1.0, 'Diff')\n\n\n\n\n\n\n# rmse = np.sqrt(np.mean(np.abs(X_test[:, 0:1])*(y_pred.ravel() - y_test.ravel())**2))/np.mean(y_test.ravel() * np.abs(X_test[:, 0:1]))\nrmse = np.sqrt(np.mean((y_pred.ravel() - y_test.ravel())**2))\nprint(f\"{rmse=}\")\n\nrmse=2.781035\n\n\n\nmean_bias = np.mean(y_pred.ravel() - y_test.ravel())\nprint(f\"{mean_bias=}\")\n\nmean_bias=-0.22927472" + "text": "I recently read a compact and clean explanation of SVGP in the following blog post by Dr. Martin Ingram:\nNow, I am attempting to implement a practical code from scratch for the same (What is practical about it? Sometimes math does not simply translate to code without careful modifications). I am assuming that you have read the blog post cited above before moving further. Let’s go for coding!" }, { - "objectID": "index.html", - "href": "index.html", - "title": "blog", - "section": "", - "text": "Data Handling for Large Scale ML\n\n\n\n\n\n\n\nML\n\n\n\n\nAn exploratory analysis of various dataset handling processes to optimize memory, diskspace and speed.\n\n\n\n\n\n\nSep 30, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nBayesian Gaussian Basis Regression\n\n\n\n\n\n\n\nML\n\n\n\n\nBayesian Gaussian Basis Regression\n\n\n\n\n\n\nAug 31, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nDownload low-cost data from OpenAQ\n\n\n\n\n\n\n\nML, GP\n\n\n\n\nA guide to download low-cost sensor data from OpenAQ\n\n\n\n\n\n\nJul 26, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nClimate Modeling with GPs\n\n\n\n\n\n\n\nML\n\n\n\n\nExploring the use of GPs for climate modeling\n\n\n\n\n\n\nJul 4, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nMulti-class classification with Gaussian Processes\n\n\n\n\n\n\n\nML, GP\n\n\n\n\nMulti-class GP classification with different strategies\n\n\n\n\n\n\nJul 4, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nClimate Modeling with SIRENs\n\n\n\n\n\n\n\nML\n\n\n\n\nExploring the use of SIRENs for climate modeling\n\n\n\n\n\n\nJul 1, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nGNNs and GPs\n\n\n\n\n\n\n\nML\n\n\n\n\nExploring similarities between GNNs and GPs\n\n\n\n\n\n\nJun 23, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nGraph Neural Networks for Regression\n\n\n\n\n\n\n\nML\n\n\n\n\nChallenges in using GNNs for regression using various strategies\n\n\n\n\n\n\nJun 12, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nBasis functions\n\n\n\n\n\n\n\nML\n\n\n\n\nExploring basis functions\n\n\n\n\n\n\nJun 12, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nConditional Neural Processes for Image Interpolation\n\n\n\n\n\n\n\nML\n\n\n\n\nExtreme Image Interpolation with Conditional Neural processes\n\n\n\n\n\n\nMay 31, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nPasswordless SSH setup for MacOS Hosts\n\n\n\n\n\n\n\nmacOS\n\n\n\n\nA tiny handbook to setup passwordless ssh in MacOS\n\n\n\n\n\n\nMay 14, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nSine Combination Networks\n\n\n\n\n\n\n\nML\n\n\n\n\nChallenges in fitting to a combination of sine waves\n\n\n\n\n\n\nApr 29, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nNeural Network Gaussian Process\n\n\n\n\n\n\n\nGP\n\n\nML\n\n\n\n\nExploring NTK kernels + GPJax with toy datasets\n\n\n\n\n\n\nMar 28, 2023\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nStochastic Variational Gaussian processes in JAX\n\n\n\n\n\n\n\nGP\n\n\n\n\nA practical implementation of Hensman et al. 2015 from scratch in JAX\n\n\n\n\n\n\nOct 31, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nMulti-Output Gaussian Processes\n\n\n\n\n\n\n\nML\n\n\n\n\nExploring MOGPs from scratch\n\n\n\n\n\n\nOct 27, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nGaussian Processes - A no-skip-math version\n\n\n\n\n\n\n\nML\n\n\n\n\nEnd-to-end math derivations for Gaussian process regression and classification\n\n\n\n\n\n\nOct 21, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nTrain NN with KFAC-Laplace in JAX\n\n\n\n\n\n\n\nML\n\n\n\n\nExploring KFAC-Laplace approximation on simple problems in JAX\n\n\n\n\n\n\nOct 18, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nConditional Neural Processes in JAX\n\n\n\n\n\n\n\nML\n\n\n\n\nImplementing conditional neural processes from scratch in JAX\n\n\n\n\n\n\nAug 1, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nJAX Optimizers\n\n\n\n\n\n\n\nML\n\n\n\n\nPros and cons of several jax optimizers.\n\n\n\n\n\n\nJun 10, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nGet a list of contributors from a repo\n\n\n\n\n\n\n\nGitHub\n\n\n\n\nGet contributors’ list using GitHub API and pandas\n\n\n\n\n\n\nMay 17, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nIteratively reweighted least squares (IRLS) logistic regression\n\n\n\n\n\n\n\nML\n\n\n\n\nImplementation of IRLS from Probabilistic ML book of Dr. Kevin Murphy and its comparison with naive second order implementation.\n\n\n\n\n\n\nMay 14, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nGcloud cheatsheet\n\n\n\n\n\n\n\nGcloud\n\n\n\n\nMost used commands while working with gcloud\n\n\n\n\n\n\nApr 9, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nGitHub Contrubuting FAQs\n\n\n\n\n\n\n\nGitHub\n\n\n\n\nThis is a collection of FAQs/road-blocks/queries/issues I had over the past 2 years of engagement with GitHub.\n\n\n\n\n\n\nApr 6, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nTorch essentials\n\n\n\n\n\n\n\nML\n\n\n\n\nPractical and direct introduction to PyTorch\n\n\n\n\n\n\nMar 8, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nProbabilistic Machine Learning\n\n\n\n\n\n\n\nML\n\n\n\n\nA video lecture series from Prof. Philipp Hennig\n\n\n\n\n\n\nMar 6, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nUncertainty in Deep Learning\n\n\n\n\n\n\n\nML\n\n\n\n\nReview of PhD thesis of Dr. Yarin Gal\n\n\n\n\n\n\nMar 5, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nPyTorch Tips\n\n\n\n\n\n\n\nML\n\n\n\n\nPyTorch zen tips\n\n\n\n\n\n\nFeb 25, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nConference Presentation Tips\n\n\n\n\n\n\n\nAcademic\n\n\n\n\nConference Presentation Tips\n\n\n\n\n\n\nJan 29, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nComparing Gaussian Process Regression Frameworks\n\n\n\n\n\n\n\nML\n\n\n\n\nA basic comparison among GPy, GPyTorch and TinyGP\n\n\n\n\n\n\nJan 25, 2022\n\n\nZeel B Patel, Harsh Patel, Shivam Sahni\n\n\n\n\n\n\n \n\n\n\n\nQuery by Committee\n\n\n\n\n\n\n\nML\n\n\n\n\nA programming introduction to QBC with Random Forest Classifier.\n\n\n\n\n\n\nJan 24, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nKL divergence v/s cross-entropy\n\n\n\n\n\n\n\nML\n\n\n\n\nUnderstanding KL divergence\n\n\n\n\n\n\nJan 20, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nWhy .py files are better than .ipynb files for ML codebase\n\n\n\n\n\n\n\nPython\n\n\n\n\nWhere .py files are better than .ipynb files?\n\n\n\n\n\n\nJan 15, 2022\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nAnonymization tips for double-blind submission\n\n\n\n\n\n\n\nAcademic\n\n\n\n\nA last-minute help list\n\n\n\n\n\n\nOct 26, 2021\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nInput Warped GPs - A failed idea\n\n\n\n\n\n\n\nML\n\n\n\n\nAn idea of input warping GPs\n\n\n\n\n\n\nOct 23, 2021\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nSparseGPs in Stheno\n\n\n\n\n\nA simple demo of sparse regression in stheno with VFE and FITC methods.\n\n\n\n\n\n\nOct 12, 2021\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nDocker Cheatsheet\n\n\n\n\n\n\n\nDocker\n\n\n\n\nMost used command while working with Docker\n\n\n\n\n\n\nSep 28, 2021\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nHow to apply constraint on parameters in various GP libraries\n\n\n\n\n\nApply constraints in GPy, GPFlow, GPyTorch\n\n\n\n\n\n\nSep 27, 2021\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nUnderstanding Kernels in Gaussian Processes\n\n\n\n\n\n\n\nML\n\n\n\n\nAn exploratory analysis of kernels in GPs\n\n\n\n\n\n\nMar 22, 2021\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nProgramatically download OpenAQ data\n\n\n\n\n\nProgramatically download OpenAQ data\n\n\n\n\n\n\nSep 21, 2020\n\n\nZeel B Patel\n\n\n\n\n\n\n \n\n\n\n\nActive Learning with Bayesian Linear Regression\n\n\n\n\n\n\n\nML\n\n\n\n\nA programming introduction to Active Learning with Bayesian Linear Regression.\n\n\n\n\n\n\nMar 28, 2020\n\n\nZeel B Patel, Nipun Batra\n\n\n\n\n\n\nNo matching items" + "objectID": "posts/2022-10-31-stochastic-variational-gp.html#imports", + "href": "posts/2022-10-31-stochastic-variational-gp.html#imports", + "title": "Stochastic Variational Gaussian processes in JAX", + "section": "Imports", + "text": "Imports\n\n# JAX\nimport jax\nfrom jax.flatten_util import ravel_pytree\nimport jax.numpy as jnp\nimport jax.scipy as jsp\n\n# Partially initialize functions\nfrom functools import partial\n\n# TFP\nimport tensorflow_probability.substrates.jax as tfp\ntfd = tfp.distributions\ntfb = tfp.bijectors\n\n# GP Kernels\nfrom tinygp import kernels\n\n# sklearn\nfrom sklearn.datasets import make_moons, make_blobs, make_circles\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.metrics import accuracy_score\n\n# Optimization\nimport optax\n\n# Plotting\nimport matplotlib.pyplot as plt\nplt.rcParams['scatter.edgecolors'] = \"k\"\n\n# Progress bar\nfrom tqdm import tqdm\n\n# Jitter\nJITTER = 1e-6\n\n# Enable JAX 64bit\njax.config.update(\"jax_enable_x64\", True)" }, { - "objectID": "about.html", - "href": "about.html", - "title": "About", - "section": "", - "text": "Hi, I am Zeel. This is my blog, where I add coding + other resources related to my research. Head over to this page for my personal website." + "objectID": "posts/2022-10-31-stochastic-variational-gp.html#dataset", + "href": "posts/2022-10-31-stochastic-variational-gp.html#dataset", + "title": "Stochastic Variational Gaussian processes in JAX", + "section": "Dataset", + "text": "Dataset\nFor this blog post, we will stick to the classification problem and pick a reasonable classification dataset.\n\nn_samples = 100\nnoise = 0.1\nrandom_state = 0\nshuffle = True\n\nX, y = make_moons(\n n_samples=n_samples, random_state=random_state, noise=noise, shuffle=shuffle\n)\nX = StandardScaler().fit_transform(X) # Yes, this is useful for GPs\n\nX, y = map(jnp.array, (X, y))\n\nplt.scatter(X[:, 0], X[:, 1], c=y);\n\nWARNING:absl:No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)" }, { - "objectID": "posts/2023-06-12-GNN_for_regression.html", - "href": "posts/2023-06-12-GNN_for_regression.html", - "title": "Graph Neural Networks for Regression", - "section": "", - "text": "import os\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"3\"\n\nimport GPy\n\nimport torch\nimport torch.nn as nn\n\nfrom tqdm import trange\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.model_selection import train_test_split\n\ndevice = \"cuda\"" + "objectID": "posts/2022-10-31-stochastic-variational-gp.html#methodology", + "href": "posts/2022-10-31-stochastic-variational-gp.html#methodology", + "title": "Stochastic Variational Gaussian processes in JAX", + "section": "Methodology", + "text": "Methodology\nTo define a GP, we need a kernel function. Let us use the RBF or Exponentiated Quadratic or Squared Exponential kernel.\n\nlengthscale = 1.0\nvariance = 1.0\n\nkernel_fn = variance * kernels.ExpSquared(scale=lengthscale)\n\nkernel_fn(X, X).shape\n\n(100, 100)\n\n\nAs explained in the blog post, we want to minimize the following loss function:\n\\[\nKL[q(u|\\eta) || p(u|y, \\theta)] = KL[q(u|\\eta) || p(u | \\theta)] - \\mathbb{E}_{u \\sim q(u|\\eta)} \\log p(y | u, \\theta) + const\n\\]\nLet us break down the loss and discuss each componant.\n\nKL divergence\nIn the first term, we want to compute the KL divergence between prior and variational distribution of GP at inducing points. First, we need to define the inducing points.\n\nkey = jax.random.PRNGKey(0)\nn_inducing = 10\nn_dim = X.shape[1]\n\nX_inducing = jax.random.normal(key, shape=(n_inducing, n_dim))\nX_inducing.shape\n\n(10, 2)\n\n\nNow, defining the prior and variational distributions.\n\ngp_mean = 0.43 # a scalar parameter to train\n\nprior_mean = gp_mean * jnp.zeros(n_inducing)\nprior_cov = kernel_fn(X_inducing, X_inducing)\n\nprior_distribution = tfd.MultivariateNormalFullCovariance(prior_mean, prior_cov)\n\n\nvariational_mean = jax.random.uniform(key, shape=(n_inducing,)) # a vector parameter to train\n\nA covariance matrix can not be learned directly due to positive definite constraint. We can decompose a covariance matrix in a following way:\n\\[\n\\begin{aligned}\nK &= diag(\\boldsymbol{\\sigma})\\Sigma diag(\\boldsymbol{\\sigma})\\\\\n &= diag(\\boldsymbol{\\sigma})LL^T diag(\\boldsymbol{\\sigma})\n\\end{aligned}\n\\]\nWhere, \\(\\Sigma\\) is a correlation matrix, \\(L\\) is a lower triangular cholesky decomposition of \\(\\Sigma\\) and \\(\\boldsymbol{\\sigma}\\) is the variance vector. We can use tfb.CorrelationCholesky to generate \\(L\\) from an unconstrained vector:\n\nrandom_vector = jax.random.normal(key, shape=(3,))\ncorr_chol = tfb.CorrelationCholesky()(random_vector)\ncorrelation = corr_chol@corr_chol.T\ncorrelation\n\nDeviceArray([[ 1. , 0.54464529, -0.7835968 ],\n [ 0.54464529, 1. , -0.33059078],\n [-0.7835968 , -0.33059078, 1. ]], dtype=float64)\n\n\nTo constrain \\(\\boldsymbol{\\sigma}\\), any positivity constraint would suffice. So, combining these tricks, we can model the covariance as following:\n\nrandom_vector = jax.random.normal(\n key, shape=(n_inducing * (n_inducing - 1) // 2,)\n) # a trainable parameter\nlog_sigma = jax.random.normal(key, shape=(n_inducing, 1)) # a trainable parameter\n\n\nsigma = jnp.exp(log_sigma)\ncorr_chol = tfb.CorrelationCholesky()(random_vector)\nvariational_cov = sigma * sigma.T * (corr_chol @ corr_chol.T)\nprint(variational_cov.shape)\n\nvariational_distribution = tfd.MultivariateNormalFullCovariance(variational_mean, variational_cov\n)\n\n(10, 10)\n\n\nNow, we can compute the KL divergence:\n\nvariational_distribution.kl_divergence(prior_distribution)\n\nDeviceArray(416.89357355, dtype=float64)\n\n\n\n\nExpectation over the likelihood\nWe want to compute the following expectation:\n\\[\n-\\sum_{i=1}^N \\mathbb{E}_{f_i \\sim q(f_i | \\eta, \\theta)} \\log p(y_i| f_i, \\theta)\n\\]\nNote that, \\(p(y_i| f_i, \\theta)\\) can be any likelihood depending upon the problem, but for classification, we may use a Bernoulli likelihood.\n\nf = jax.random.normal(key, shape=y.shape)\nlikelihood_distribution = tfd.Bernoulli(logits=f)\n\nlog_likelihood = likelihood_distribution.log_prob(y).sum()\nlog_likelihood\n\nDeviceArray(-72.04665624, dtype=float64)\n\n\nWe need to sample \\(f_i\\) from \\(q(f_i | \\eta, \\theta)\\) which has the following form:\n\\[\n\\begin{aligned}\nq(u) &\\sim \\mathcal{N}(\\boldsymbol{m}, S)\\\\\nq(f_i | \\eta, \\theta) &\\sim \\mathcal{N}(\\mu_i, \\sigma_i^2)\\\\\n\\mu_i &= A\\boldsymbol{m}\\\\\n\\sigma_i^2 &= K_{ii} + A(S - K_{mm})A^T\\\\\nA &= K_{im}K_{mm}^{-1}\n\\end{aligned}\n\\]\nNote that matrix inversion is often unstable with jnp.linalg.inv and thus we will use cholesky tricks to compute \\(A\\).\n\ndef q_f(x_i):\n x_i = x_i.reshape(1, -1) # ensure correct shape\n K_im = kernel_fn(x_i, X_inducing)\n K_mm = kernel_fn(X_inducing, X_inducing)\n chol_mm = jnp.linalg.cholesky(K_mm + jnp.eye(K_mm.shape[0])*JITTER)\n A = jsp.linalg.cho_solve((chol_mm, True), K_im.T).T\n \n mu_i = A@variational_mean\n sigma_sqr_i = kernel_fn(x_i, x_i) + A@(variational_cov - prior_cov)@A.T\n \n return tfd.Normal(loc=mu_i, scale=sigma_sqr_i**0.5)\n\nHere is a function to compute log likelihood for a single data-point:\n\ndef log_likelihood(x_i, y_i, seed):\n sample = q_f(x_i).sample(seed=seed)\n log_likelihood = tfd.Bernoulli(logits=sample).log_prob(y_i)\n return log_likelihood.squeeze()\n\n\nlog_likelihood(X[0], y[0], seed=key)\n\nDeviceArray(-0.17831203, dtype=float64)\n\n\nWe can use jax.vmap to compute log_likelihood over a batch. With that, we can leverage the stochastic variational inference following section 10.3.1 (Eq. 10.108) from pml book2. Basically, in each iteration, we need to multiply the batch log likelihood with \\(\\frac{N}{B}\\) to get an unbiased minibatch approximation where \\(N\\) is size of the full dataset and \\(B\\) is the batch size.\n\nbatch_size = 10\n\nseeds = jax.random.split(key, num=batch_size)\n\nll = len(y)/batch_size * jax.vmap(log_likelihood)(X[:batch_size], y[:batch_size], seeds).sum()\nll\n\nDeviceArray(-215.46520331, dtype=float64)\n\n\nNote that, once the parameters are optimized, we can use the derivations of \\(q(f_i | \\eta, \\theta)\\) to compute the posterior distribution. We have figured out all the pieces by now so it is the time to put it togather in a single class. Some pointers to note are the following:\n\nWe define a single function get_constrained_params to transform all unconstrained parameters.\njax.lax.scan gives a huge boost to a training loop.\nThere is some repeatation of code due to lack of super code optimization. You can do it at your end if needed." }, { - "objectID": "posts/2023-06-12-GNN_for_regression.html#create-a-synthetic-dataset", - "href": "posts/2023-06-12-GNN_for_regression.html#create-a-synthetic-dataset", - "title": "Graph Neural Networks for Regression", - "section": "Create a synthetic dataset", - "text": "Create a synthetic dataset\n\nnp.random.seed(0)\ntorch.random.manual_seed(4)\n\nN = 50\nx = np.linspace(-1, 1, N).reshape(-1, 1)\nkernel = GPy.kern.RBF(input_dim=1, variance=1, lengthscale=0.1)\ny = np.random.multivariate_normal(np.zeros(N), kernel.K(x)).reshape(-1, 1)\ny_noisy = y + np.random.normal(0, 0.1, N).reshape(-1, 1)\n\ntrain_x, test_x, train_y, test_y = train_test_split(x, y_noisy, test_size=0.4, random_state=0)\n\nplt.plot(x, y, label=\"True\");\nplt.plot(train_x, train_y, 'o', label='train')\nplt.plot(test_x, test_y, 'o', label='test')\nplt.legend();\n\nx, y, y_noisy = map(lambda x: torch.tensor(x).float().to(device), (x, y, y_noisy))\ntrain_x, test_x, train_y, test_y = map(lambda x: torch.tensor(x).float().to(device), (train_x, test_x, train_y, test_y))\nprint(x.shape, y.shape, y_noisy.shape)\n\ntorch.Size([50, 1]) torch.Size([50, 1]) torch.Size([50, 1])" + "objectID": "posts/2022-10-31-stochastic-variational-gp.html#all-in-one", + "href": "posts/2022-10-31-stochastic-variational-gp.html#all-in-one", + "title": "Stochastic Variational Gaussian processes in JAX", + "section": "All in one", + "text": "All in one\n\nclass SVGP:\n def __init__(self, X_inducing, data_size):\n self.X_inducing = X_inducing\n self.n_inducing = len(X_inducing)\n self.data_size = data_size\n \n def init_params(self, seed):\n variational_corr_chol_param = tfb.CorrelationCholesky().inverse(jnp.eye(self.n_inducing))\n \n dummy_params = {\"log_variance\": jnp.zeros(()),\n \"log_scale\": jnp.zeros(()), \n \"mean\": jnp.zeros(()),\n \"X_inducing\": self.X_inducing,\n \"variational_mean\": jnp.zeros(self.n_inducing),\n \"variational_corr_chol_param\": variational_corr_chol_param,\n \"log_variational_sigma\": jnp.zeros((self.n_inducing, 1)),\n }\n \n flat_params, unravel_fn = ravel_pytree(dummy_params)\n random_params = jax.random.normal(key, shape=(len(flat_params), ))\n params = unravel_fn(random_params)\n return params\n \n @staticmethod\n def get_constrained_params(params):\n return {\"mean\": params[\"mean\"],\n \"variance\": jnp.exp(params['log_variance']), \n \"scale\": jnp.exp(params['log_scale']), \n \"X_inducing\": params[\"X_inducing\"],\n \"variational_mean\": params[\"variational_mean\"],\n \"variational_corr_chol_param\": params[\"variational_corr_chol_param\"],\n \"variational_sigma\": jnp.exp(params[\"log_variational_sigma\"])}\n \n @staticmethod\n def get_q_f(params, x_i, prior_distribution, variational_distribution):\n x_i = x_i.reshape(1, -1) # ensure correct shape\n \n kernel_fn = params['variance'] * kernels.ExpSquared(scale=params[\"scale\"])\n K_im = kernel_fn(x_i, params[\"X_inducing\"])\n K_mm = prior_distribution.covariance()\n chol_mm = jnp.linalg.cholesky(K_mm)\n A = jsp.linalg.cho_solve((chol_mm, True), K_im.T).T\n\n mu_i = A@params[\"variational_mean\"]\n sigma_sqr_i = kernel_fn(x_i, x_i) + A@(variational_distribution.covariance() - K_mm)@A.T\n\n return tfd.Normal(loc=mu_i, scale=sigma_sqr_i**0.5)\n \n def get_distributions(self, params):\n kernel_fn = params['variance'] * kernels.ExpSquared(scale=params[\"scale\"])\n prior_mean = params[\"mean\"]\n prior_cov = kernel_fn(params[\"X_inducing\"], params[\"X_inducing\"]) + jnp.eye(self.n_inducing)*JITTER\n prior_distribution = tfd.MultivariateNormalFullCovariance(prior_mean, prior_cov)\n\n corr_chol = tfb.CorrelationCholesky()(params[\"variational_corr_chol_param\"])\n sigma = jnp.diag(params[\"variational_sigma\"])\n variational_cov = sigma*sigma.T*(corr_chol@corr_chol.T) + jnp.eye(self.n_inducing)*JITTER\n variational_distribution = tfd.MultivariateNormalFullCovariance(params[\"variational_mean\"], variational_cov)\n \n return prior_distribution, variational_distribution\n \n def loss_fn(self, params, X_batch, y_batch, seed):\n params = self.get_constrained_params(params)\n \n # Get distributions\n prior_distribution, variational_distribution = self.get_distributions(params)\n \n # Compute kl\n kl = variational_distribution.kl_divergence(prior_distribution)\n\n # Compute log likelihood\n def log_likelihood_fn(x_i, y_i, seed):\n q_f = self.get_q_f(params, x_i, prior_distribution, variational_distribution)\n sample = q_f.sample(seed=seed)\n log_likelihood = tfd.Bernoulli(logits=sample).log_prob(y_i)\n return log_likelihood.squeeze()\n \n seeds = jax.random.split(seed, num=len(y_batch))\n log_likelihood = jax.vmap(log_likelihood_fn)(X_batch, y_batch, seeds).sum() * self.data_size/len(y_batch)\n\n return kl - log_likelihood\n \n def fit_fn(self, X, y, init_params, optimizer, n_iters, batch_size, seed):\n state = optimizer.init(init_params)\n value_and_grad_fn = jax.value_and_grad(self.loss_fn)\n \n def one_step(params_and_state, seed):\n params, state = params_and_state\n idx = jax.random.choice(seed, self.data_size, (batch_size,), replace=False)\n X_batch, y_batch = X[idx], y[idx]\n \n seed2 = jax.random.split(seed, 1)[0]\n loss, grads = value_and_grad_fn(params, X_batch, y_batch, seed2)\n updates, state = optimizer.update(grads, state)\n params = optax.apply_updates(params, updates)\n return (params, state), (loss, params)\n \n seeds = jax.random.split(seed, num=n_iters)\n (best_params, _), (loss_history, params_history) = jax.lax.scan(one_step, (init_params, state), xs=seeds)\n return best_params, loss_history, params_history\n\n def predict_fn(self, params, X_new):\n constrained_params = self.get_constrained_params(params)\n prior_distribution, variational_distribution = self.get_distributions(constrained_params)\n \n def _predict_fn(x_i): \n # Get posterior\n q_f = self.get_q_f(constrained_params, x_i, prior_distribution, variational_distribution)\n return q_f.mean().squeeze(), q_f.variance().squeeze()\n \n mean, var = jax.vmap(_predict_fn)(X_new)\n return mean.squeeze(), var.squeeze()" }, { - "objectID": "posts/2023-06-12-GNN_for_regression.html#fit-with-a-simple-mlp", - "href": "posts/2023-06-12-GNN_for_regression.html#fit-with-a-simple-mlp", - "title": "Graph Neural Networks for Regression", - "section": "Fit with a simple MLP", - "text": "Fit with a simple MLP\n\ndef fit(model, x, y, A=None, lr=0.01, epochs=100):\n optimizer = torch.optim.Adam(model.parameters(), lr=lr)\n loss_fn = nn.MSELoss()\n \n if A is None:\n inputs = (x,)\n else:\n inputs = (x, A)\n \n losses = []\n pbar = trange(epochs)\n for epoch in pbar:\n optimizer.zero_grad()\n y_hat = model(*inputs)\n loss = loss_fn(y_hat, y)\n losses.append(loss.item())\n pbar.set_description(f\"Epoch {epoch} Loss: {loss.item()}\")\n loss.backward()\n optimizer.step()\n \n return losses\n\nclass SimpleMLP(nn.Module):\n def __init__(self, features):\n super().__init__()\n layers = [nn.Linear(1, features[0]), nn.ReLU()]\n for in_features, out_features in zip(features, features[1:]):\n layers.append(nn.Linear(in_features, out_features))\n layers.append(nn.ReLU())\n \n layers.append(nn.Linear(features[-1], 1))\n \n self.layers = nn.Sequential(*layers)\n \n def forward(self, x):\n return self.layers(x)\n\n\ntorch.manual_seed(0)\nmodel = SimpleMLP([10, 10, 10]).to(device)\nfit(model, train_x, train_y, lr=0.01, epochs=1000);\n\npred_y = model(x)\n\n(x_, y_, train_x_, train_y_, test_x_, test_y_, pred_y_) = map(lambda x: x.cpu().detach().numpy(), (x, y, train_x, train_y, test_x, test_y, pred_y))\nplt.plot(x_, y_, label=\"True\");\nplt.plot(train_x_, train_y_, 'o', label='train')\nplt.plot(test_x_, test_y_, 'o', label='test')\nplt.plot(x_, pred_y_, label='pred')\nplt.legend();\n\nEpoch 999 Loss: 0.07143261283636093: 100%|██████████| 1000/1000 [00:02<00:00, 410.79it/s]" + "objectID": "posts/2022-10-31-stochastic-variational-gp.html#train-and-predict", + "href": "posts/2022-10-31-stochastic-variational-gp.html#train-and-predict", + "title": "Stochastic Variational Gaussian processes in JAX", + "section": "Train and predict", + "text": "Train and predict\n\nn_inducing = 20\nn_epochs = 100\nbatch_size = 10\ndata_size = len(y)\nn_iters = n_epochs*(data_size/batch_size)\nn_iters\n\n1000.0\n\n\n\nkey = jax.random.PRNGKey(0)\nkey2, subkey = jax.random.split(key)\noptimizer = optax.adam(learning_rate=0.01)\n\nX_inducing = jax.random.choice(key, X, (n_inducing,), replace=False)\nmodel = SVGP(X_inducing, data_size)\n\ninit_params = model.init_params(key2)\n\nmodel.loss_fn(init_params, X, y, key)\nbest_params, loss_history, params_history = model.fit_fn(X, y, init_params, optimizer, n_iters, batch_size, subkey)\n\nplt.figure()\nplt.plot(loss_history);\nplt.title(\"Loss\");\n\n\n\n\n\nx = jnp.linspace(-3.5, 3.5, 100)\nseed = jax.random.PRNGKey(123)\n\nX1, X2 = jnp.meshgrid(x, x)\nf = lambda x1, x2: model.predict_fn(best_params, jnp.array([x1, x2]).reshape(1, -1))\npred_mean, pred_var = jax.vmap(jax.vmap(f))(X1, X2)\nlogits = tfd.Normal(pred_mean, pred_var**0.5).sample(seed=seed, sample_shape=(10000,))\nproba = jax.nn.sigmoid(logits)\n\nproba_mean = proba.mean(axis=0)\nproba_std2 = proba.std(axis=0)*2\n\n\nfig, ax = plt.subplots(1, 2, figsize=(12,4))\ncplot1 = ax[0].contourf(X1, X2, proba_mean.squeeze(), alpha=0.5, levels=20)\nplt.colorbar(cplot1, ax=ax[0])\n\ncplot2 = ax[1].contourf(X1, X2, proba_std2.squeeze(), alpha=0.5, levels=20)\nplt.colorbar(cplot2, ax=ax[1])\n\nax[0].scatter(X[:, 0], X[:, 1], c=y);\nax[1].scatter(X[:, 0], X[:, 1], c=y);\n\nax[0].set_title(\"Posterior $\\mu$\");\nax[1].set_title(\"Posterior $\\mu \\pm 2*\\sigma$\");" }, { - "objectID": "posts/2023-06-12-GNN_for_regression.html#create-a-gcn-layer", - "href": "posts/2023-06-12-GNN_for_regression.html#create-a-gcn-layer", - "title": "Graph Neural Networks for Regression", - "section": "Create a GCN layer", - "text": "Create a GCN layer\n\nclass GCNLayer(nn.Module):\n def __init__(self, in_features, out_features):\n super().__init__()\n self.linear = nn.Linear(in_features, out_features)\n \n def forward(self, x, A): \n return self.linear(A @ x)\n \n \nclass GCN(nn.Module):\n def __init__(self, features):\n super().__init__()\n layers = [GCNLayer(1, features[0]), nn.ReLU()]\n for in_features, out_features in zip(features, features[1:]):\n layers.append(GCNLayer(in_features, out_features))\n layers.append(nn.ReLU())\n \n layers.append(nn.Linear(features[-1], 1))\n self.layers = nn.Sequential(*layers)\n \n def forward(self, x, A):\n for layer in self.layers:\n if isinstance(layer, GCNLayer):\n x = layer(x, A)\n else:\n x = layer(x)\n return x\n \ndef get_eucledean_A(x, exponent):\n d = ((x - x.T)**2)**0.5\n d = torch.where(d==0, torch.min(d[d!=0])/2, d) # self distance is 0, so replace it with half of the min distance\n A = 1/(d**exponent)\n return A/A.sum(dim=1, keepdim=True)\n\ndef get_KNN_A(x, k):\n d = torch.abs(x - x.T)\n A = torch.zeros_like(d)\n _, indices = torch.topk(d, k, dim=1, largest=False)\n for i, index in enumerate(indices):\n A[i, index] = 1\n return A/A.sum(dim=1, keepdim=True)\n\ndef fit_and_plot(title):\n model = GCN([10, 10, 10]).to(device)\n losses = fit(model, train_x, train_y, A=A_train, lr=0.001, epochs=3000);\n\n pred_y = model(x, A_all)\n\n fig, ax = plt.subplots(1, 2, figsize=(12, 4))\n axes = ax[0]\n axes.plot(losses)\n axes.set_title(\"Losses\")\n\n (x_, y_, train_x_, train_y_, test_x_, test_y_, pred_y_) = map(lambda x: x.cpu().detach().numpy(), (x, y, train_x, train_y, test_x, test_y, pred_y))\n axes = ax[1]\n axes.plot(x_, y_, label=\"True\");\n axes.plot(train_x_, train_y_, 'o', label='train')\n axes.plot(test_x_, test_y_, 'o', label='test')\n axes.plot(x_, pred_y_, label='pred')\n axes.set_title(title)\n axes.legend();" + "objectID": "posts/2022-10-31-stochastic-variational-gp.html#some-more-datasets", + "href": "posts/2022-10-31-stochastic-variational-gp.html#some-more-datasets", + "title": "Stochastic Variational Gaussian processes in JAX", + "section": "Some more datasets", + "text": "Some more datasets\n\ndef fit_and_plot(X, y):\n X = StandardScaler().fit_transform(X) # Yes, this is useful for GPs\n X, y = map(jnp.array, (X, y))\n\n X_inducing = jax.random.choice(key, X, (n_inducing,), replace=False)\n model = SVGP(X_inducing, data_size)\n\n init_params = model.init_params(key2)\n\n model.loss_fn(init_params, X, y, key)\n best_params, loss_history, params_history = model.fit_fn(X, y, init_params, optimizer, n_iters, batch_size, subkey)\n\n plt.figure()\n plt.plot(loss_history);\n plt.title(\"Loss\");\n \n f = lambda x1, x2: model.predict_fn(best_params, jnp.array([x1, x2]).reshape(1, -1))\n pred_mean, pred_var = jax.vmap(jax.vmap(f))(X1, X2)\n logits = tfd.Normal(pred_mean, pred_var**0.5).sample(seed=seed, sample_shape=(10000,))\n proba = jax.nn.sigmoid(logits)\n\n proba_mean = proba.mean(axis=0)\n proba_std2 = proba.std(axis=0)*2\n \n fig, ax = plt.subplots(1, 2, figsize=(12,4))\n cplot1 = ax[0].contourf(X1, X2, proba_mean.squeeze(), alpha=0.5, levels=20)\n plt.colorbar(cplot1, ax=ax[0])\n\n cplot2 = ax[1].contourf(X1, X2, proba_std2.squeeze(), alpha=0.5, levels=20)\n plt.colorbar(cplot2, ax=ax[1])\n\n ax[0].scatter(X[:, 0], X[:, 1], c=y);\n ax[1].scatter(X[:, 0], X[:, 1], c=y);\n\n ax[0].set_title(\"Posterior $\\mu$\");\n ax[1].set_title(\"Posterior $\\mu \\pm 2*\\sigma$\");\n\n\nmake_blobs\n\nX, y = make_blobs(n_samples=n_samples, random_state=random_state, centers=2)\n\nplt.scatter(X[:, 0], X[:, 1], c=y);\nfit_and_plot(X, y)\n\n\n\n\n\n\n\n\n\n\n\n\nmake_circles\n\nX, y = make_circles(n_samples=n_samples, random_state=random_state, noise=noise, factor=0.1)\n\nplt.scatter(X[:, 0], X[:, 1], c=y);\nfit_and_plot(X, y)" }, { - "objectID": "posts/2023-06-12-GNN_for_regression.html#idw-setting", - "href": "posts/2023-06-12-GNN_for_regression.html#idw-setting", - "title": "Graph Neural Networks for Regression", - "section": "IDW setting", - "text": "IDW setting\n\nexponent = 1\nA_train = get_eucledean_A(train_x, exponent).to(device)\nA_all = get_eucledean_A(x, exponent).to(device)\ntitle = f\"Distance based adjacency matrix with exponent {exponent}\"\n\nfit_and_plot(title)\n\nEpoch 2999 Loss: 0.05447980388998985: 100%|██████████| 3000/3000 [00:07<00:00, 390.93it/s] \n\n\n\n\n\n\nexponent = 2\nA_train = get_eucledean_A(train_x, exponent).to(device)\nA_all = get_eucledean_A(x, exponent).to(device)\ntitle = f\"Distance based adjacency matrix with exponent {exponent}\"\n\nfit_and_plot(title)\n\nEpoch 2999 Loss: 0.06475391983985901: 100%|██████████| 3000/3000 [00:07<00:00, 413.49it/s]\n\n\n\n\n\n\nexponent = 3\nA_train = get_eucledean_A(train_x, exponent).to(device)\nA_all = get_eucledean_A(x, exponent).to(device)\ntitle = f\"Distance based adjacency matrix with exponent {exponent}\"\n\nfit_and_plot(title)\n\nEpoch 2999 Loss: 0.043554823845624924: 100%|██████████| 3000/3000 [00:08<00:00, 367.28it/s]" + "objectID": "posts/2023-06-23-GNNs_and_GPs.html", + "href": "posts/2023-06-23-GNNs_and_GPs.html", + "title": "GNNs and GPs", + "section": "", + "text": "import GPy\nimport numpy as np\nimport pandas as pd\n\nfrom sklearn.preprocessing import MinMaxScaler, StandardScaler\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.ensemble import RandomForestRegressor\n\nimport regdata as rd\nimport matplotlib.pyplot as plt\n\nimport torch\nimport torch.nn as nn\n\n\nx_train, y_train, x_test = rd.Step().get_data()\ny_train = y_train.reshape(-1, 1)\nx_test = x_test * 1.5\nprint(x_train.shape, y_train.shape, x_test.shape)\n\nplt.scatter(x_train, y_train, label='train');\n\n(50, 1) (50, 1) (100, 1)\n\n\n\n\n\n\nkernel = GPy.kern.RBF(1, variance=1, lengthscale=1)\nmodel = GPy.models.GPRegression(x_train, y_train.reshape(-1, 1), kernel)\nmodel.Gaussian_noise.variance = 0.1\n\ny_pred_gp, y_var = model.predict(x_test)\n\nplt.scatter(x_train, y_train, label='train');\nplt.plot(x_test, y_pred_gp, label='pred');\n\n\n\n\n\nclass GCN_Forward(nn.Module):\n def __init__(self, in_features, out_features):\n super().__init__()\n self.fc = nn.Linear(in_features, out_features)\n \n def forward(self, x, A):\n x = self.fc(x)\n x = torch.matmul(A, x)\n return x\n \nclass GCN_Reverse(nn.Module):\n def __init__(self, in_features, out_features):\n super().__init__()\n self.fc = nn.Linear(in_features, out_features)\n \n def forward(self, x, A):\n x = torch.matmul(A, x)\n x = self.fc(x)\n return x\n\nclass NN(nn.Module):\n def __init__(self, features):\n super().__init__()\n self.features = features\n \n for i, (in_features, out_features) in enumerate(zip(features[:-1], features[1:])):\n setattr(self, f'layer_{i}', nn.Linear(in_features, out_features))\n \n self.last_layer = nn.Linear(features[-1], 1)\n \n def forward(self, x, A):\n for i in range(len(self.features) - 1):\n if isinstance(getattr(self, f'layer_{i}'), GCN_Forward):\n x = getattr(self, f'layer_{i}')(x, A)\n else:\n x = getattr(self, f'layer_{i}')(x)\n x = nn.functional.gelu(x)\n \n x = self.last_layer(x)\n return x\n\nclass GCN(NN):\n def __init__(self, features):\n super().__init__(features)\n for i, (in_features, out_features) in enumerate(zip(features[:-1], features[1:])):\n setattr(self, f'layer_{i}', GCN_Forward(in_features, out_features))\n\n\nA = torch.tensor(kernel.K(x_train, x_train)).float()\n# A.fill_diagonal_(0)\nA = A / A.sum(dim=0, keepdim=True)\n# A.fill_diagonal_(1)\n\nnum_epochs = 500\nfeatures = [1, 1024]\n\ngcn_model = GCN(features=features)\nnn_model = NN(features=features)\n\ngcn_optimizer = torch.optim.Adam(gcn_model.parameters(), lr=0.01)\nnn_optimizer = torch.optim.Adam(nn_model.parameters(), lr=0.01)\n\ncriterion = nn.MSELoss()\n\nx_train_torch = torch.from_numpy(x_train).float()\ny_train_torch = torch.from_numpy(y_train).float()\n\ngcn_losses = []\nnn_losses = []\nfor epoch in range(num_epochs):\n gcn_optimizer.zero_grad()\n nn_optimizer.zero_grad()\n \n y_out_gcn = gcn_model(x_train_torch, A)\n y_out_nn = nn_model(x_train_torch, A)\n gcn_loss = criterion(y_out_gcn, y_train_torch)\n nn_loss = criterion(y_out_nn, y_train_torch)\n \n gcn_loss.backward()\n nn_loss.backward()\n \n gcn_losses.append(gcn_loss.item())\n nn_losses.append(nn_loss.item())\n \n gcn_optimizer.step()\n nn_optimizer.step()\n \nplt.plot(gcn_losses, label='gcn');\nplt.plot(nn_losses, label='nn');\nplt.legend();\n\n\n\n\n\nA_test = torch.tensor(kernel.K(x_test, x_test)).float()\n# A_test.fill_diagonal_(0)\nA_test = A_test / A_test.sum(dim=0, keepdim=True)\n# A_test.fill_diagonal_(1)\n\ny_pred_nn = nn_model(torch.from_numpy(x_test).float(), A_test).detach().numpy()\ny_pred_gcn = gcn_model(torch.from_numpy(x_test).float(), A_test).detach().numpy()\n\nplt.figure(figsize=(10, 6))\nplt.scatter(x_train, y_train, label='train');\nplt.plot(x_train, y_out_gcn.detach().numpy(), label='pred GCN train');\nplt.plot(x_train, y_out_nn.detach().numpy(), label='pred NN train');\nplt.plot(x_test, y_pred_gp, label='pred GP', linestyle='--');\nplt.plot(x_test, y_pred_nn, label='pred NN');\nplt.plot(x_test, y_pred_gcn, label='pred GCN');\nplt.ylim(-3, 3);\nplt.legend();" }, { - "objectID": "posts/2023-06-12-GNN_for_regression.html#knn-setting", - "href": "posts/2023-06-12-GNN_for_regression.html#knn-setting", - "title": "Graph Neural Networks for Regression", - "section": "KNN Setting", - "text": "KNN Setting\n\nK = 1\nA_train = get_KNN_A(train_x, K).to(device)\nA_all = get_KNN_A(x, K).to(device)\ntitle = f\"KNN based adjacency matrix with K={K}\"\n\nfit_and_plot(title)\n\nEpoch 2999 Loss: 0.04107221961021423: 100%|██████████| 3000/3000 [00:07<00:00, 383.88it/s] \n\n\n\n\n\n\nK = 3\nA_train = get_KNN_A(train_x, K).to(device)\nA_all = get_KNN_A(x, K).to(device)\ntitle = f\"KNN based adjacency matrix with K={K}\"\n\nfit_and_plot(title)\n\nEpoch 2999 Loss: 0.14372628927230835: 100%|██████████| 3000/3000 [00:07<00:00, 404.74it/s]\n\n\n\n\n\n\nK = 7\nA_train = get_KNN_A(train_x, K).to(device)\nA_all = get_KNN_A(x, K).to(device)\ntitle = f\"KNN based adjacency matrix with K={K}\"\n\nfit_and_plot(title)\n\nEpoch 2999 Loss: 0.13950258493423462: 100%|██████████| 3000/3000 [00:07<00:00, 381.66it/s]\n\n\n\n\n\n\nK = 15\nA_train = get_KNN_A(train_x, K).to(device)\nA_all = get_KNN_A(x, K).to(device)\ntitle = f\"KNN based adjacency matrix with K={K}\"\n\nfit_and_plot(title)\n\nEpoch 2999 Loss: 0.33879855275154114: 100%|██████████| 3000/3000 [00:07<00:00, 376.56it/s]" + "objectID": "posts/2022-05-17-contributors_sorted_by_prs.html", + "href": "posts/2022-05-17-contributors_sorted_by_prs.html", + "title": "Get a list of contributors from a repo", + "section": "", + "text": "import pandas as pd" }, { - "objectID": "posts/2023-05-31-CNPs_for_Images.html", - "href": "posts/2023-05-31-CNPs_for_Images.html", - "title": "Conditional Neural Processes for Image Interpolation", - "section": "", - "text": "import os\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n# turn off preallocation by JAX\nos.environ[\"XLA_PYTHON_CLIENT_PREALLOCATE\"] = \"false\"\n\nimport numpy as np\nimport pandas as pd\n\nfrom tqdm import tqdm\nimport jax\nimport jax.numpy as jnp\nimport flax.linen as nn\n\nimport distrax as dx\n\nimport optax\n\n# load mnist dataset from tensorflow datasets\nimport tensorflow_datasets as tfds\n\nfrom sklearn.model_selection import train_test_split\n\nimport matplotlib.pyplot as plt\n# define initializers\ndef first_layer_init(key, shape, dtype=jnp.float32):\n num_input = shape[0] # reverse order compared to torch\n return jax.random.uniform(key, shape, dtype, minval=-1.0/num_input, maxval=1.0/num_input)\n\ndef other_layers_init(key, shape, dtype=jnp.float32):\n num_input = shape[0] # reverse order compared to torch\n return jax.random.uniform(key, shape, dtype, minval=-np.sqrt(6 / num_input)/30, maxval=np.sqrt(6 / num_input)/30)\n\nclass Encoder(nn.Module):\n features: list\n encoding_dims: int\n\n @nn.compact\n def __call__(self, x_context, y_context):\n x = jnp.hstack([x_context, y_context.reshape(x_context.shape[0], -1)])\n \n x = nn.Dense(self.features[0], kernel_init=first_layer_init, bias_init=first_layer_init)(x)\n x = jnp.sin(30*x)\n # x = nn.Dense(self.features[0])(x)\n # x = nn.relu(x)\n \n \n for n_features in self.features[1:]:\n x = nn.Dense(n_features, kernel_init=other_layers_init, bias_init=other_layers_init)(x)\n x = jnp.sin(30*x)\n # x = nn.Dense(n_features)(x)\n # x = nn.relu(x)\n\n x = nn.Dense(self.encoding_dims)(x)\n\n representation = x.mean(axis=0, keepdims=True) # option 1\n return representation # (1, encoding_dims)\n\nclass Decoder(nn.Module):\n features: list\n output_dim: int\n\n @nn.compact\n def __call__(self, representation, x):\n representation = jnp.repeat(representation, x.shape[0], axis=0)\n x = jnp.hstack([representation, x])\n \n x = nn.Dense(self.features[0], kernel_init=first_layer_init, bias_init=first_layer_init)(x)\n x = jnp.sin(30*x)\n # x = nn.Dense(self.features[0])(x)\n # x = nn.relu(x)\n\n for n_features in self.features:\n x = nn.Dense(n_features, kernel_init=other_layers_init, bias_init=other_layers_init)(x)\n x = jnp.sin(30*x)\n # x = nn.Dense(n_features)(x)\n # x = nn.relu(x)\n\n x = nn.Dense(self.output_dim*2)(x)\n loc, raw_scale = x[:, :self.output_dim], x[:, self.output_dim:]\n scale = jnp.exp(raw_scale)\n \n return loc, scale\n\nclass CNP(nn.Module):\n encoder_features: list\n encoding_dims: int\n decoder_features: list\n output_dim: int\n\n @nn.compact\n def __call__(self, x_content, y_context, x_target):\n representation = Encoder(self.encoder_features, self.encoding_dims)(x_content, y_context)\n loc, scale = Decoder(self.decoder_features, self.output_dim)(representation, x_target)\n return loc, scale\n\n def loss_fn(self, params, x_context, y_context, x_target, y_target):\n loc, scale = self.apply(params, x_context, y_context, x_target)\n predictive_distribution = dx.MultivariateNormalDiag(loc=loc, scale_diag=0.005+scale)\n return -predictive_distribution.log_prob(y_target)" + "objectID": "posts/2022-05-17-contributors_sorted_by_prs.html#config", + "href": "posts/2022-05-17-contributors_sorted_by_prs.html#config", + "title": "Get a list of contributors from a repo", + "section": "Config", + "text": "Config\n\nowner = \"probml\"\nrepo = \"pyprobml\"" }, { - "objectID": "posts/2023-05-31-CNPs_for_Images.html#load-mnist", - "href": "posts/2023-05-31-CNPs_for_Images.html#load-mnist", - "title": "Conditional Neural Processes for Image Interpolation", - "section": "Load MNIST", - "text": "Load MNIST\n\nds = tfds.load('mnist')\n\n\ndef dataset_to_arrays(dataset):\n data = []\n labels = []\n stopper = 0\n end = 100\n for sample in dataset:\n data.append(sample[\"image\"].numpy())\n labels.append(sample[\"label\"].numpy())\n stopper += 1\n if stopper == end:\n break\n return np.array(data), np.array(labels)[..., None]\n\ntrain_data, train_labels = dataset_to_arrays(ds[\"train\"])\ntest_data, test_labels = dataset_to_arrays(ds[\"test\"])\n\ntrain_data.shape, train_labels.shape, test_data.shape, test_labels.shape\n\n2023-06-02 09:58:48.609001: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.\n2023-06-02 09:58:48.681190: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.\n\n\n((100, 28, 28, 1), (100, 1), (100, 28, 28, 1), (100, 1))\n\n\n\ncoords = np.linspace(-1, 1, 28)\nx, y = np.meshgrid(coords, coords)\ntrain_X = jnp.stack([x, y], axis=-1).reshape(-1, 2)\n\ntrain_y = jax.vmap(lambda x: x.reshape(-1, 1))(train_data) / 255.0\ntrain_X.shape, train_y.shape, type(train_X), type(train_y)\n\n((784, 2),\n (100, 784, 1),\n jaxlib.xla_extension.ArrayImpl,\n jaxlib.xla_extension.ArrayImpl)\n\n\n\niterations = 10000\n\ndef loss_fn(params, context_X, context_y, target_X, target_y):\n def loss_fn_per_sample(context_X, context_y, target_X, target_y):\n loc, scale = model.apply(params, context_X, context_y, target_X)\n # predictive_distribution = dx.MultivariateNormalDiag(loc=loc, scale_diag=scale)\n # return -predictive_distribution.log_prob(target_y)\n return jnp.square(loc.ravel() - target_y.ravel()).mean()\n \n return jax.vmap(loss_fn_per_sample, in_axes=(None, 0, None, 0))(context_X, context_y, target_X, target_y).mean()\n\nvalue_and_grad_fn = jax.jit(jax.value_and_grad(loss_fn))\nmodel = CNP([256]*2, 128, [256]*4, 1)\nparams = model.init(jax.random.PRNGKey(0), train_X, train_y[0], train_X)\noptimizer = optax.adam(1e-5)\nstate = optimizer.init(params)\n\n# losses = []\n# for iter in tqdm(range(iterations)):\n# tmp_index = jax.random.permutation(jax.random.PRNGKey(iter), index)\n# context_X = train_X[tmp_index][:int(train_X.shape[0]*0.05)]\n# context_y = train_y[:, tmp_index, :][:, :int(train_X.shape[0]*0.05), :]\n# target_X = train_X[tmp_index][int(train_X.shape[0]*0.05):]\n# target_y = train_y[:, tmp_index, :][:, int(train_X.shape[0]*0.05):, :]\n \n# # print(context_X.shape, context_y.shape, target_X.shape, target_y.shape)\n# # print(loss_fn(params, context_X, context_y, target_X, target_y).shape)\n \n# loss, grads = value_and_grad_fn(params, context_X, context_y, target_X, target_y)\n# updates, state = optimizer.update(grads, state)\n# params = optax.apply_updates(params, updates)\n# losses.append(loss.item())\n\ndef one_step(params_and_state, key):\n params, state = params_and_state\n tmp_index = jax.random.permutation(key, train_X.shape[0])\n context_X = train_X[tmp_index][:int(train_X.shape[0]*0.05)]\n context_y = train_y[:, tmp_index, :][:, :int(train_X.shape[0]*0.05), :]\n target_X = train_X[tmp_index][int(train_X.shape[0]*0.05):]\n target_y = train_y[:, tmp_index, :][:, int(train_X.shape[0]*0.05):, :]\n loss, grads = value_and_grad_fn(params, context_X, context_y, target_X, target_y)\n updates, state = optimizer.update(grads, state)\n params = optax.apply_updates(params, updates)\n return (params, state), loss\n\n(params, state), loss_history = jax.lax.scan(one_step, (params, state), jax.random.split(jax.random.PRNGKey(0), iterations))\n\n\nplt.plot(loss_history[10:]);\n\n\n\n\n\ntest_key = jax.random.PRNGKey(0)\ntmp_index = jax.random.permutation(test_key, train_X.shape[0])\ncontext_X = train_X[tmp_index][:int(train_X.shape[0]*0.5)]\ncontext_y = train_y[:, tmp_index, :][:, :int(train_X.shape[0]*0.5), :]\ntarget_X = train_X#[tmp_index][int(train_X.shape[0]*0.5):]\ntarget_y = train_y#[:, tmp_index, :][:, int(train_X.shape[0]*0.5):, :]\n\nid = 91\nplt.imshow(train_y[id].reshape(28, 28), cmap=\"gray\", interpolation=None);\n\nlocs, scales = jax.vmap(model.apply, in_axes=(None, None, 0, None))(params, context_X, context_y, target_X)\n# full_preds = jnp.concatenate([context_y, locs], axis=1)\n# full_preds = full_preds.at[:, tmp_index, :].set(full_preds).__array__()\n\nplt.figure()\nplt.imshow(locs[id].reshape(28, 28), cmap=\"gray\", interpolation=None);" + "objectID": "posts/2022-05-17-contributors_sorted_by_prs.html#get-all-contributors-to-a-repo", + "href": "posts/2022-05-17-contributors_sorted_by_prs.html#get-all-contributors-to-a-repo", + "title": "Get a list of contributors from a repo", + "section": "Get all contributors to a repo", + "text": "Get all contributors to a repo\n\ncontributors = pd.read_json(f\"https://api.github.com/repos/{owner}/{repo}/contributors?per_page=100\")\ncontributors = contributors.set_index(\"login\")\nprint(f\"Number of contributors: {len(contributors.index.unique())}\")\ncontributors.head(2)\n\nNumber of contributors: 47\n\n\n\n \n \n \n\n\n\n\n\n\nid\nnode_id\navatar_url\ngravatar_id\nurl\nhtml_url\nfollowers_url\nfollowing_url\ngists_url\nstarred_url\nsubscriptions_url\norganizations_url\nrepos_url\nevents_url\nreceived_events_url\ntype\nsite_admin\ncontributions\n\n\nlogin\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nmurphyk\n4632336\nMDQ6VXNlcjQ2MzIzMzY=\nhttps://avatars.githubusercontent.com/u/463233...\n\nhttps://api.github.com/users/murphyk\nhttps://github.com/murphyk\nhttps://api.github.com/users/murphyk/followers\nhttps://api.github.com/users/murphyk/following...\nhttps://api.github.com/users/murphyk/gists{/gi...\nhttps://api.github.com/users/murphyk/starred{/...\nhttps://api.github.com/users/murphyk/subscript...\nhttps://api.github.com/users/murphyk/orgs\nhttps://api.github.com/users/murphyk/repos\nhttps://api.github.com/users/murphyk/events{/p...\nhttps://api.github.com/users/murphyk/received_...\nUser\nFalse\n1777\n\n\nNeoanarika\n5188337\nMDQ6VXNlcjUxODgzMzc=\nhttps://avatars.githubusercontent.com/u/518833...\n\nhttps://api.github.com/users/Neoanarika\nhttps://github.com/Neoanarika\nhttps://api.github.com/users/Neoanarika/followers\nhttps://api.github.com/users/Neoanarika/follow...\nhttps://api.github.com/users/Neoanarika/gists{...\nhttps://api.github.com/users/Neoanarika/starre...\nhttps://api.github.com/users/Neoanarika/subscr...\nhttps://api.github.com/users/Neoanarika/orgs\nhttps://api.github.com/users/Neoanarika/repos\nhttps://api.github.com/users/Neoanarika/events...\nhttps://api.github.com/users/Neoanarika/receiv...\nUser\nFalse\n184" }, { - "objectID": "posts/2022-03-08-torch-essentials.html", - "href": "posts/2022-03-08-torch-essentials.html", - "title": "Torch essentials", - "section": "", - "text": "import torch\nimport numpy as np\n\n\ntensor1 = torch.tensor([1,2,3.], dtype=torch.float32)\ntensor2 = torch.tensor([5,6,7.], dtype=torch.float64)\ndisplay(tensor1, tensor2)\n\ntensor([1., 2., 3.])\n\n\ntensor([5., 6., 7.], dtype=torch.float64)\n\n\n\ndisplay(type(tensor1), type(tensor2))\n\ntorch.Tensor\n\n\ntorch.Tensor\n\n\n\ndisplay(tensor1.dtype, tensor2.dtype)\n\ntorch.float32\n\n\ntorch.float64\n\n\n\nlong_tensor = tensor1.to(torch.int32) # device, dtype, tensor\ndisplay(long_tensor)\n\ntensor([1, 2, 3], dtype=torch.int32)\n\n\n\nlong_tensor.device\n\ndevice(type='cpu')\n\n\n\ndevice = 'cuda' if torch.cuda.is_available() else 'cpu'\nlong_tensor_gpu = long_tensor.to(device)\nlong_tensor_gpu\n\ntensor([1, 2, 3], device='cuda:0', dtype=torch.int32)\n\n\n\nlong_tensor_born_on_gpu = torch.zeros(2,10, device=device).to(torch.float64)\nlong_tensor_born_on_gpu\n\ntensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], device='cuda:0',\n dtype=torch.float64)\n\n\n\ninspired_tensor = torch.tensor([1.,2.]).to(long_tensor_born_on_gpu)\ninspired_tensor\n\ntensor([1., 2.], device='cuda:0', dtype=torch.float64)\n\n\n\nnp_array = np.array([1,2,3.])\nnp_array.log()\n\nAttributeError: 'numpy.ndarray' object has no attribute 'log'\n\n\n\npt_array = torch.tensor([1,2,3.])\npt_array.log() # sin(), cos(), tan(), exp()\n\ntensor([0.0000, 0.6931, 1.0986])" + "objectID": "posts/2022-05-17-contributors_sorted_by_prs.html#fetch-all-prs-from-a-repo", + "href": "posts/2022-05-17-contributors_sorted_by_prs.html#fetch-all-prs-from-a-repo", + "title": "Get a list of contributors from a repo", + "section": "Fetch all PRs from a repo", + "text": "Fetch all PRs from a repo\n\npage_range = range(1, 6)\nget_pr_df = lambda page: pd.read_json(f\"https://api.github.com/repos/probml/pyprobml/pulls?state=all&per_page=100&page={page}\")\npull_requests = pd.concat(map(get_pr_df, page_range))\nprint(f\"Number of PRs: {len(pull_requests)}\")\npull_requests.head(2)\n\nNumber of PRs: 497\n\n\n\n \n \n \n\n\n\n\n\n\nurl\nid\nnode_id\nhtml_url\ndiff_url\npatch_url\nissue_url\nnumber\nstate\nlocked\n...\nreview_comments_url\nreview_comment_url\ncomments_url\nstatuses_url\nhead\nbase\n_links\nauthor_association\nauto_merge\nactive_lock_reason\n\n\n\n\n0\nhttps://api.github.com/repos/probml/pyprobml/p...\n938329819\nPR_kwDOA-3vB8437cbb\nhttps://github.com/probml/pyprobml/pull/841\nhttps://github.com/probml/pyprobml/pull/841.diff\nhttps://github.com/probml/pyprobml/pull/841.patch\nhttps://api.github.com/repos/probml/pyprobml/i...\n841\nclosed\nFalse\n...\nhttps://api.github.com/repos/probml/pyprobml/p...\nhttps://api.github.com/repos/probml/pyprobml/p...\nhttps://api.github.com/repos/probml/pyprobml/i...\nhttps://api.github.com/repos/probml/pyprobml/s...\n{'label': 'karm-patel:posrprocessing', 'ref': ...\n{'label': 'probml:master', 'ref': 'master', 's...\n{'self': {'href': 'https://api.github.com/repo...\nCONTRIBUTOR\nNaN\nNaN\n\n\n1\nhttps://api.github.com/repos/probml/pyprobml/p...\n938317389\nPR_kwDOA-3vB8437ZZN\nhttps://github.com/probml/pyprobml/pull/840\nhttps://github.com/probml/pyprobml/pull/840.diff\nhttps://github.com/probml/pyprobml/pull/840.patch\nhttps://api.github.com/repos/probml/pyprobml/i...\n840\nclosed\nFalse\n...\nhttps://api.github.com/repos/probml/pyprobml/p...\nhttps://api.github.com/repos/probml/pyprobml/p...\nhttps://api.github.com/repos/probml/pyprobml/i...\nhttps://api.github.com/repos/probml/pyprobml/s...\n{'label': 'karm-patel:master', 'ref': 'master'...\n{'label': 'probml:master', 'ref': 'master', 's...\n{'self': {'href': 'https://api.github.com/repo...\nCONTRIBUTOR\nNaN\nNaN\n\n\n\n\n\n2 rows × 36 columns" }, { - "objectID": "posts/2022-03-08-torch-essentials.html#lets-go-hands-on", - "href": "posts/2022-03-08-torch-essentials.html#lets-go-hands-on", - "title": "Torch essentials", - "section": "", - "text": "import torch\nimport numpy as np\n\n\ntensor1 = torch.tensor([1,2,3.], dtype=torch.float32)\ntensor2 = torch.tensor([5,6,7.], dtype=torch.float64)\ndisplay(tensor1, tensor2)\n\ntensor([1., 2., 3.])\n\n\ntensor([5., 6., 7.], dtype=torch.float64)\n\n\n\ndisplay(type(tensor1), type(tensor2))\n\ntorch.Tensor\n\n\ntorch.Tensor\n\n\n\ndisplay(tensor1.dtype, tensor2.dtype)\n\ntorch.float32\n\n\ntorch.float64\n\n\n\nlong_tensor = tensor1.to(torch.int32) # device, dtype, tensor\ndisplay(long_tensor)\n\ntensor([1, 2, 3], dtype=torch.int32)\n\n\n\nlong_tensor.device\n\ndevice(type='cpu')\n\n\n\ndevice = 'cuda' if torch.cuda.is_available() else 'cpu'\nlong_tensor_gpu = long_tensor.to(device)\nlong_tensor_gpu\n\ntensor([1, 2, 3], device='cuda:0', dtype=torch.int32)\n\n\n\nlong_tensor_born_on_gpu = torch.zeros(2,10, device=device).to(torch.float64)\nlong_tensor_born_on_gpu\n\ntensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], device='cuda:0',\n dtype=torch.float64)\n\n\n\ninspired_tensor = torch.tensor([1.,2.]).to(long_tensor_born_on_gpu)\ninspired_tensor\n\ntensor([1., 2.], device='cuda:0', dtype=torch.float64)\n\n\n\nnp_array = np.array([1,2,3.])\nnp_array.log()\n\nAttributeError: 'numpy.ndarray' object has no attribute 'log'\n\n\n\npt_array = torch.tensor([1,2,3.])\npt_array.log() # sin(), cos(), tan(), exp()\n\ntensor([0.0000, 0.6931, 1.0986])" + "objectID": "posts/2022-05-17-contributors_sorted_by_prs.html#get-a-list-of-contributors-sorted-by-count-of-prs", + "href": "posts/2022-05-17-contributors_sorted_by_prs.html#get-a-list-of-contributors-sorted-by-count-of-prs", + "title": "Get a list of contributors from a repo", + "section": "Get a list of contributors sorted by count of PRs", + "text": "Get a list of contributors sorted by count of PRs\n\npull_requests['login'] = pull_requests['user'].apply(lambda x: x[\"login\"])\nsorted_by_pr_count = pull_requests.groupby(\"login\").agg({'url': len}).sort_values(by='url', ascending=False)\nsorted_by_pr_count.rename(columns={'url': 'Number of PRs'}, inplace=True)\nsorted_by_pr_count.head(5)\n\n\n \n \n \n\n\n\n\n\n\nNumber of PRs\n\n\nlogin\n\n\n\n\n\nDrishttii\n79\n\n\ngerdm\n55\n\n\nkaralleyna\n43\n\n\nalways-newbie161\n29\n\n\nkarm-patel\n29" }, { - "objectID": "posts/2022-03-08-torch-essentials.html#gradient-is-all-you-need", - "href": "posts/2022-03-08-torch-essentials.html#gradient-is-all-you-need", - "title": "Torch essentials", - "section": "Gradient is all you need", - "text": "Gradient is all you need\n\nimport matplotlib.pyplot as plt\n\n\nx = torch.rand(5,1)\ny = 3 * x + 2 + torch.randn_like(x)*0.1\n\nplt.scatter(x, y);\n\n\n\n\n\nx_plus_ones = torch.cat([torch.ones_like(x), x], dim=1)\nx_plus_ones.shape\n\ntorch.Size([5, 2])\n\n\n\ntheta = torch.zeros(2,1, requires_grad=True)\ntheta\n\ntensor([[0.],\n [0.]], requires_grad=True)\n\n\n\ntheta.grad\n\n\ntheta.grad_fn\n\n\nlr = 0.1\n\ny_pred = x_plus_ones@theta\nloss = ((y_pred - y)**2).mean()\nloss.backward()\n# y_pred = torch.matmul(x_plus_ones, theta)\n# y_pred = torch.mm(x_plus_ones, theta)\n\n\ntheta.grad # dloss/dtheta\n\ntensor([[-6.3681],\n [-2.8128]])\n\n\n\ntheta.grad_fn\n\n\ntheta.data -= lr * theta.grad.data\n\n\ntheta\n\ntensor([[0.6368],\n [0.2813]], requires_grad=True)\n\n\n\ntheta.grad_fn\n\n\nwith torch.no_grad():\n plt.scatter(x, y)\n plt.plot(x, x_plus_ones@theta)\n\n\n\n\n\nfor i in range(10):\n theta.grad.data.zero_()\n y_pred = x_plus_ones@theta\n loss = ((y_pred - y)**2).mean()\n loss.backward()\n theta.data -= lr * theta.grad\n\n\nwith torch.no_grad():\n plt.scatter(x, y)\n plt.plot(x, x_plus_ones@theta)" + "objectID": "posts/2022-05-17-contributors_sorted_by_prs.html#create-a-dashboard", + "href": "posts/2022-05-17-contributors_sorted_by_prs.html#create-a-dashboard", + "title": "Get a list of contributors from a repo", + "section": "Create a dashboard", + "text": "Create a dashboard\n\ndef get_href_user(user):\n username, profile_link = user.split(\"|\")\n return f\"[{username}]({profile_link})\"\n\ndashboard = pd.DataFrame(index=sorted_by_pr_count.index)\ndashboard[\"Avatar\"] = contributors.avatar_url.apply(lambda url: f'<img width=\"25\" alt=\"image\" src=\"{url}\">')\ndashboard[\"Contributor\"] = (contributors.index +\"|\"+ contributors['html_url']).apply(get_href_user)\ndashboard[\"Number of PRs\"] = sorted_by_pr_count[\"Number of PRs\"]\nprint(dashboard.dropna().T.to_markdown())\n\n| | Drishttii | gerdm | karalleyna | always-newbie161 | karm-patel | Duane321 | Nirzu97 | patel-zeel | animesh-007 | ashishpapanai | shivaditya-meduri | Neoanarika | andrewnc | nappaillav | Abdelrahman350 | mjsML | jdf22 | kzymgch | nalzok | nitish1295 | Garvit9000c | AnkitaKumariJain14 | rohit-khoiwal-30 | shobro | raymondyeh07 | khanshehjad | alenm10 | firatoncel | AnandShegde | Aadesh-1404 | nealmcb | nipunbatra | petercerno | posgnu | mvervuurt | hieuza | Prahitha | TripleTop | UmarJ | Vishal987595 | a-fakhri | adamnemecek | galv | jlh2018 | krasserm | yuanx749 |\n|:--------------|:----------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------|\n| Avatar | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/35187749?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/4108759?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/36455180?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/66471669?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/59387624?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/19956442?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/28842790?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/59758528?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/53366877?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/52123364?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/77324692?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/5188337?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/7716402?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/43855961?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/47902062?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/7131192?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/1637094?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/10054419?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/13443062?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/21181046?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/68856476?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/62535006?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/87682045?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/54628243?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/5696982?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/31896767?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/42214173?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/9141211?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/79975787?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/68186100?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/119472?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/60985?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/1649209?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/30136201?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/6399881?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/1021144?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/44160152?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/48208522?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/34779641?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/97757583?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/65111198?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/182415?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/4767568?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/40842099?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/202907?v=4\"> | <img width=\"25\" alt=\"image\" src=\"https://avatars.githubusercontent.com/u/47032563?v=4\"> |\n| Contributor | [Drishttii](https://github.com/Drishttii) | [gerdm](https://github.com/gerdm) | [karalleyna](https://github.com/karalleyna) | [always-newbie161](https://github.com/always-newbie161) | [karm-patel](https://github.com/karm-patel) | [Duane321](https://github.com/Duane321) | [Nirzu97](https://github.com/Nirzu97) | [patel-zeel](https://github.com/patel-zeel) | [animesh-007](https://github.com/animesh-007) | [ashishpapanai](https://github.com/ashishpapanai) | [shivaditya-meduri](https://github.com/shivaditya-meduri) | [Neoanarika](https://github.com/Neoanarika) | [andrewnc](https://github.com/andrewnc) | [nappaillav](https://github.com/nappaillav) | [Abdelrahman350](https://github.com/Abdelrahman350) | [mjsML](https://github.com/mjsML) | [jdf22](https://github.com/jdf22) | [kzymgch](https://github.com/kzymgch) | [nalzok](https://github.com/nalzok) | [nitish1295](https://github.com/nitish1295) | [Garvit9000c](https://github.com/Garvit9000c) | [AnkitaKumariJain14](https://github.com/AnkitaKumariJain14) | [rohit-khoiwal-30](https://github.com/rohit-khoiwal-30) | [shobro](https://github.com/shobro) | [raymondyeh07](https://github.com/raymondyeh07) | [khanshehjad](https://github.com/khanshehjad) | [alenm10](https://github.com/alenm10) | [firatoncel](https://github.com/firatoncel) | [AnandShegde](https://github.com/AnandShegde) | [Aadesh-1404](https://github.com/Aadesh-1404) | [nealmcb](https://github.com/nealmcb) | [nipunbatra](https://github.com/nipunbatra) | [petercerno](https://github.com/petercerno) | [posgnu](https://github.com/posgnu) | [mvervuurt](https://github.com/mvervuurt) | [hieuza](https://github.com/hieuza) | [Prahitha](https://github.com/Prahitha) | [TripleTop](https://github.com/TripleTop) | [UmarJ](https://github.com/UmarJ) | [Vishal987595](https://github.com/Vishal987595) | [a-fakhri](https://github.com/a-fakhri) | [adamnemecek](https://github.com/adamnemecek) | [galv](https://github.com/galv) | [jlh2018](https://github.com/jlh2018) | [krasserm](https://github.com/krasserm) | [yuanx749](https://github.com/yuanx749) |\n| Number of PRs | 79 | 55 | 43 | 29 | 29 | 29 | 25 | 23 | 18 | 17 | 16 | 10 | 10 | 10 | 8 | 7 | 7 | 6 | 6 | 5 | 4 | 4 | 3 | 3 | 2 | 2 | 2 | 2 | 2 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |" }, { - "objectID": "posts/2022-03-08-torch-essentials.html#advanced", - "href": "posts/2022-03-08-torch-essentials.html#advanced", - "title": "Torch essentials", - "section": "Advanced", - "text": "Advanced\n\nclass LinearRegression(torch.nn.Module):\n def __init__(self):\n super().__init__()\n self.theta = torch.nn.Parameter(torch.zeros(2,1))\n# self.register_parameter(theta, torch.zeros(2,1))\n \n def forward(self, x): # Don't call directly. it is called by __call__ method\n x_plus_ones = torch.cat([torch.ones_like(x), x], dim=1)\n y_pred = x_plus_ones@self.theta\n return y_pred\n\n\nmodel = LinearRegression()\nmodel\n\nLinearRegression()\n\n\n\nfor name, value in model.named_parameters():\n print(name, value)\n\ntheta Parameter containing:\ntensor([[0.],\n [0.]], requires_grad=True)\n\n\n\noptimizer = torch.optim.Adam(model.parameters(), lr=0.1)\nloss_fn = torch.nn.MSELoss() # torch.nn.CrossEntropyLoss()\n\nfor i in range(10):\n optimizer.zero_grad()\n \n y_pred = model(x)\n loss = loss_fn(y_pred, y)\n loss.backward()\n \n optimizer.step()\n\n\nmodel.state_dict()\n\nOrderedDict([('theta',\n tensor([[0.9799],\n [0.9808]]))])" + "objectID": "posts/2023-07-03-Brick_Kilns_identification.html", + "href": "posts/2023-07-03-Brick_Kilns_identification.html", + "title": "blog", + "section": "", + "text": "# %pip install segment-geospatial groundingdino-py leafmap localtileserver\nimport leafmap\nfrom samgeo import tms_to_geotiff\nfrom samgeo.text_sam import LangSAM\n\n\nm = leafmap.Map(center=[28.6139, 77.2090], zoom=10, height=\"600px\")\nm.add_basemap(\"SATELLITE\")\nm\n\n\n\n\nSATELLITE has been already added before.\nSATELLITE has been already added before.\n\n\nTypeError: can only concatenate list (not \"NoneType\") to list\n\n\n\nm.user_roi_bounds()\n\n[77.0217, 28.5583, 77.3015, 28.6635]" }, { - "objectID": "posts/2022-03-08-torch-essentials.html#wanna-run-on-gpu", - "href": "posts/2022-03-08-torch-essentials.html#wanna-run-on-gpu", - "title": "Torch essentials", - "section": "Wanna run on GPU?", - "text": "Wanna run on GPU?\n\nx_gpu = x.to(device)\ny_gpu = y.to(device)\n\n\nprint(model.theta)\nmodel.to(device)\nprint(model.theta)\n\nParameter containing:\ntensor([[0.9799],\n [0.9808]], requires_grad=True)\nParameter containing:\ntensor([[0.9799],\n [0.9808]], device='cuda:0', requires_grad=True)\n\n\n\noptimizer = torch.optim.Adam(model.parameters(), lr=0.1)\nloss_fn = torch.nn.MSELoss() # torch.nn.CrossEntropyLoss()\n\nfor i in range(10):\n optimizer.zero_grad()\n \n y_pred = model(x_gpu)\n loss = loss_fn(y_pred, y_gpu)\n loss.backward()\n \n optimizer.step()" + "objectID": "posts/2022-01-20-kl-divergence.html", + "href": "posts/2022-01-20-kl-divergence.html", + "title": "KL divergence v/s cross-entropy", + "section": "", + "text": "In a classification problem, for a data-point \\(\\mathbf{x}_i\\), we have the true label \\(y_i\\) associated with it.\nLet us assume that we have three possible outcomes \\(\\{L1, L2, L3\\}\\) and for current \\(\\mathbf{x}_i\\), corresponding \\(y_i\\) is \\(L2\\). Then Ground truth probability distribution is the following:\n\\[\np_G(y = L1) = 0\\\\\np_G(y = L2) = 1\\\\\np_G(y=L3) = 0\n\\]\nLet us assume that our classifier model Predicted the following distribution:\n\\[\np_P(y = L1) = 0.1\\\\\np_P(y = L2) = 0.8\\\\\np_P(y=L3) = 0.1\n\\]" }, { - "objectID": "posts/2022-03-08-torch-essentials.html#state-dictionary", - "href": "posts/2022-03-08-torch-essentials.html#state-dictionary", - "title": "Torch essentials", - "section": "State dictionary", - "text": "State dictionary\n\n# torch.save(model.state_dict(), path)\n# model.load_state_dict(torch.load(path))" + "objectID": "posts/2022-01-20-kl-divergence.html#ground", + "href": "posts/2022-01-20-kl-divergence.html#ground", + "title": "KL divergence v/s cross-entropy", + "section": "", + "text": "In a classification problem, for a data-point \\(\\mathbf{x}_i\\), we have the true label \\(y_i\\) associated with it.\nLet us assume that we have three possible outcomes \\(\\{L1, L2, L3\\}\\) and for current \\(\\mathbf{x}_i\\), corresponding \\(y_i\\) is \\(L2\\). Then Ground truth probability distribution is the following:\n\\[\np_G(y = L1) = 0\\\\\np_G(y = L2) = 1\\\\\np_G(y=L3) = 0\n\\]\nLet us assume that our classifier model Predicted the following distribution:\n\\[\np_P(y = L1) = 0.1\\\\\np_P(y = L2) = 0.8\\\\\np_P(y=L3) = 0.1\n\\]" }, { - "objectID": "posts/2022-03-08-torch-essentials.html#nn-way", - "href": "posts/2022-03-08-torch-essentials.html#nn-way", - "title": "Torch essentials", - "section": "NN way", - "text": "NN way\n\nclass LinearRegression(torch.nn.Module):\n def __init__(self):\n super().__init__()\n self.layer = torch.nn.Linear(2, 1) # torch.nn.Linear(128, 64)\n # What else? \n# self.activation = torch.nn.ReLU()\n# torch.nn.LSTM()\n# torch.nn.Conv2d()\n \n def forward(self, x): # Don't call directly. it is called by __call__ method\n x_plus_ones = torch.cat([torch.ones_like(x), x], dim=1)\n y_pred = self.layer(x_plus_ones)\n return y_pred" + "objectID": "posts/2022-01-20-kl-divergence.html#kl-divergence", + "href": "posts/2022-01-20-kl-divergence.html#kl-divergence", + "title": "KL divergence v/s cross-entropy", + "section": "KL divergence", + "text": "KL divergence\nWe can use KL divergence to check how good is our model. The formula is:\n\\[\nD_{KL}(p_G\\;\\rVert\\;p_P) = \\sum_{y_i \\in \\{L1, L2, L3\\}} p_G(y_i)\\log\\frac{p_G(y_i)}{p_P(y_i)}\n\\]\nFor our example,\n\\[\nD_{KL}(p_G\\;\\rVert\\;p_P) = \\log\\frac{1}{0.8}\n\\]\nIt is evident that if \\(p_P(y = L2)\\) decreses from \\(0.8\\), \\(D_{KL}(p_G\\;\\rVert\\;p_P)\\) will increase and vice versa. Note that KL divergence is not symmetric which means \\(D_{KL}(p_G\\;\\rVert\\;p_P) \\ne D_{KL}(p_P\\;\\rVert\\;p_G)\\)." }, { - "objectID": "posts/2022-01-29-presentation_tips.html", - "href": "posts/2022-01-29-presentation_tips.html", - "title": "Conference Presentation Tips", - "section": "", - "text": "General\n\nFirst page goes like this:\n\nTitle\nAuthors (Underline presenting author, no need to put * in case of equal contribution)\nAffiliations\nConference name\n\nIf importing figures from paper, avoid including the captions.\nInclude lot of images and less maths\nTalk should end with summary and not the future work or thank you slide or something.\nCite the references on the same slide in bottom.\n\nRefer to “Giving talks” section of this blog.\n\n\nDos and Don’ts\n\nNever put too detailed information difficult to grasp: a table with many numbers, a complex derivation all in one go, very complicated diagram." + "objectID": "posts/2022-01-20-kl-divergence.html#cross-entory", + "href": "posts/2022-01-20-kl-divergence.html#cross-entory", + "title": "KL divergence v/s cross-entropy", + "section": "Cross-entory", + "text": "Cross-entory\nCross-entropy is another measure for distribution similarity. The formula is:\n\\[\nH(p_G, p_P) = \\sum_{y_i \\in \\{L1, L2, L3\\}} - p_G(y_i)\\log p_P(y_i)\n\\]\nFor our example:\n\\[\nH(p_G, p_P) = -\\log 0.8 = \\log \\frac{1}{0.8}\n\\]" }, { - "objectID": "posts/2023-11-26-Torch-DataLoaders.html", - "href": "posts/2023-11-26-Torch-DataLoaders.html", - "title": "Data Handling for Large Scale ML", - "section": "", - "text": "import os\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"3\"\n\nimport torch\nimport torch.nn as nn\nfrom numcodecs import GZip, Zstd, Blosc\n\nfrom time import time, sleep\nfrom tqdm import tqdm\nfrom glob import glob\nfrom os.path import join\nfrom torch.utils.data import DataLoader, Dataset\nfrom joblib import Parallel, delayed\nimport xarray as xr\nimport numpy as np\n\nfrom torchvision.models import vit_b_16\nfrom astra.torch.models import ViTClassifier\nfrom astra.torch.utils import train_fn" + "objectID": "posts/2022-01-20-kl-divergence.html#kl-divergence-vs-cross-entropy", + "href": "posts/2022-01-20-kl-divergence.html#kl-divergence-vs-cross-entropy", + "title": "KL divergence v/s cross-entropy", + "section": "KL divergence v/s cross-entropy", + "text": "KL divergence v/s cross-entropy\nThis shows that KL divergence and cross-entropy will return the same values for a simple classification problem. Then why do we use cross-entropy as a loss function and not KL divergence?\nThat’s because KL divergence will compute additional constant terms (zero here) that are not adding any value in minimization." }, { - "objectID": "posts/2023-11-26-Torch-DataLoaders.html#imports", - "href": "posts/2023-11-26-Torch-DataLoaders.html#imports", - "title": "Data Handling for Large Scale ML", + "objectID": "posts/2023-05-14-ssh-macos.html", + "href": "posts/2023-05-14-ssh-macos.html", + "title": "Passwordless SSH setup for MacOS Hosts", "section": "", - "text": "import os\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"3\"\n\nimport torch\nimport torch.nn as nn\nfrom numcodecs import GZip, Zstd, Blosc\n\nfrom time import time, sleep\nfrom tqdm import tqdm\nfrom glob import glob\nfrom os.path import join\nfrom torch.utils.data import DataLoader, Dataset\nfrom joblib import Parallel, delayed\nimport xarray as xr\nimport numpy as np\n\nfrom torchvision.models import vit_b_16\nfrom astra.torch.models import ViTClassifier\nfrom astra.torch.utils import train_fn" + "text": "HOST: The computer physically present with you.\nREMOTE: The remote computer that you’d like to access via ssh.\nREMOTE-IP: Ip address of the REMOTE.\nPORT: The port on which the ssh server is running on REMOTE." }, { - "objectID": "posts/2023-11-26-Torch-DataLoaders.html#is-.nc-better-than-zarr", - "href": "posts/2023-11-26-Torch-DataLoaders.html#is-.nc-better-than-zarr", - "title": "Data Handling for Large Scale ML", - "section": "Is .nc better than zarr?", - "text": "Is .nc better than zarr?\n\nos.system(f\"du -sh {base_path}\")\n\n1.8G /home/patel_zeel/bkdb/bangladesh_pnas_pred/team1\n\n\n0\n\n\n\nsave_path = \"/tmp/nc_check_uncompressed\"\nos.makedirs(save_path, exist_ok=True)\nfiles = []\ndef zarr_to_nc(file):\n with xr.open_zarr(file, consolidated=False) as ds:\n ds.to_netcdf(join(save_path, file.split(\"/\")[-1].replace(\".zarr\", \".nc\")))\n\n_ = Parallel(n_jobs=32)(delayed(zarr_to_nc)(file) for file in tqdm(glob(join(base_path, \"*.zarr\"))))\n\nos.system(f\"du -sh {save_path}\")\n\n 0%| | 0/1501 [00:00<?, ?it/s]100%|██████████| 1501/1501 [00:24<00:00, 62.47it/s] \n\n\n5.3G /tmp/nc_check_uncompressed\n\n\n0\n\n\n\nsave_path = \"/tmp/nc_check_compressed\"\nos.system(f\"rm -rf {save_path}\")\nos.makedirs(save_path, exist_ok=True)\n\nencoding = {var: {\"zlib\": True, \"complevel\": 1} for var in [\"data\"]}\n\nfiles = []\ndef zarr_to_nc(file):\n with xr.open_zarr(file, consolidated=False) as ds:\n ds.to_netcdf(join(save_path, file.split(\"/\")[-1].replace(\".zarr\", \".nc\")), encoding=encoding)\n\n_ = Parallel(n_jobs=32)(delayed(zarr_to_nc)(file) for file in tqdm(glob(join(base_path, \"*.zarr\"))))\n\nos.system(f\"du -sh {save_path}\")\n\n100%|██████████| 1501/1501 [00:04<00:00, 311.18it/s]\n\n\n1.8G /tmp/nc_check_compressed\n\n\n0\n\n\n\nclass XarrayDatasetWithNC(Dataset):\n def __init__(self, path, max_files):\n self.base_path = path\n self.all_files = glob(join(path, \"*.nc\"))[:max_files]\n self.all_files.sort()\n self.all_ds = [xr.open_dataset(file) for file in tqdm(self.all_files)]\n self.lat_lags = [-2, -1, 0, 1, 2]\n self.lon_lags = [-2, -1, 0, 1, 2]\n \n def __len__(self):\n return len(self.all_files) * 25\n \n def __getitem__(self, idx):\n file_idx = idx // 25\n local_idx = idx % 25\n lat_lag = self.lat_lags[local_idx // 5]\n lon_lag = self.lon_lags[local_idx % 5]\n \n ds = self.all_ds[file_idx]\n img = ds.isel(lat_lag=lat_lag, lon_lag=lon_lag)['data'].values\n return torch.tensor(np.einsum(\"hwc->chw\", img).astype(np.float32) / 255)\n\n\nnc_path = \"/tmp/nc_check_compressed\"\n\n\nbatch_size = 128\nnum_workers = 32\n\ndataset = XarrayDatasetWithNC(nc_path, max_files=max_files)\nprocess_it(dataset, batch_size, num_workers)\n\n100%|██████████| 500/500 [00:02<00:00, 246.27it/s]\nTime: 0.7414: 100%|██████████| 98/98 [01:25<00:00, 1.15it/s]\n\n\nAverage Iteration Processing Time: 0.8260 +- 0.0530\nTotal time for all iterations: 80.9527\nTotal Wall Time per iteration: 0.8725\nTotal Wall Time: 85.5034" + "objectID": "posts/2023-05-14-ssh-macos.html#terminology", + "href": "posts/2023-05-14-ssh-macos.html#terminology", + "title": "Passwordless SSH setup for MacOS Hosts", + "section": "", + "text": "HOST: The computer physically present with you.\nREMOTE: The remote computer that you’d like to access via ssh.\nREMOTE-IP: Ip address of the REMOTE.\nPORT: The port on which the ssh server is running on REMOTE." }, { - "objectID": "posts/2023-11-26-Torch-DataLoaders.html#additional-experiments", - "href": "posts/2023-11-26-Torch-DataLoaders.html#additional-experiments", - "title": "Data Handling for Large Scale ML", - "section": "Additional experiments", - "text": "Additional experiments\n\nn_images = 60000\nt = 84.9131/500/25 * n_images\nprint(f\"Time to process {n_images} images: \", t/60, \"minutes\")\n\nTime to process 60000 images: 6.793048000000001 minutes\n\n\n\nfiles = glob(join(base_path, \"*.zarr\"))\ndata_tensors = []\nfor file in tqdm(files):\n with xr.open_zarr(file, consolidated=False) as ds:\n # print(ds['data'].values.reshape(-1, 224, 224, 3))\n data_tensors.append(torch.tensor(np.einsum(\"nhwc->nchw\", ds['data'].values.reshape(-1, 224, 224, 3)).astype(np.float16) / 255))\n\n100%|██████████| 1501/1501 [02:44<00:00, 9.13it/s]\n\n\n\nall_in_one = torch.concat(data_tensors, dim=0)\nall_in_one.shape\n\ntorch.Size([37525, 3, 224, 224])\n\n\n\nall_in_one = all_in_one.to('cuda')" + "objectID": "posts/2023-05-14-ssh-macos.html#what-is-the-problem", + "href": "posts/2023-05-14-ssh-macos.html#what-is-the-problem", + "title": "Passwordless SSH setup for MacOS Hosts", + "section": "What is the problem?", + "text": "What is the problem?\nSimilar to Windows machines, one can run the following commands on a macOS HOST for setting up the passwordless ssh:\nssh-keygen\nssh-copy-id -i ~/.ssh/id_rsa.pub -p PORT USERANAME@REMOTE-IP\nBut this does not work out of the box without the following command which lets your HOST know about the private key.\nssh-add ~/.ssh/id_rsa\nAfter this, connection works fine from macOS CLI. However, if you are trying to connect to REMOTE from VS code, make sure you restart VS code before attempting to connect (quit from the Dock as well).\nSo far so good. But this setup fails when you reboot your HOST since ssh-add is not perstistently adding the pirvate key to HOST.\nSo, what to do now?" }, { - "objectID": "posts/2023-11-26-Torch-DataLoaders.html#insights", - "href": "posts/2023-11-26-Torch-DataLoaders.html#insights", - "title": "Data Handling for Large Scale ML", - "section": "Insights", - "text": "Insights\n\nGPU Memory consumption is 17776MiB / 81920MiB for batch size 128 for ViT model\nUploading torch.Size([37525, 3, 224, 224]) of float32 data to GPU takes 22054MiB / 81920MiB of GPU Memory. Same data with float16 takes 11202MiB / 81920MiB of GPU Memory.\nIt seems .nc or .zarr are not making much difference in terms of time and/or memory." + "objectID": "posts/2023-05-14-ssh-macos.html#permenant-solution", + "href": "posts/2023-05-14-ssh-macos.html#permenant-solution", + "title": "Passwordless SSH setup for MacOS Hosts", + "section": "Permenant solution", + "text": "Permenant solution\nI found a permenant and full-proof solution here. For each REMOTE you add in your HOST’s ~/.ssh/config, after generating a key pair and copying it to REMOTE with ssh-copy-id command, modify its entry in ~/.ssh/config like the following and the issue should be permenently resolved.\nHost REMOTE\n UseKeychain yes\n AddKeysToAgent yes\n IdentityFile ~/.ssh/id_rsa\n HostName REMOTE-IP\n Port PORT\n User USERNAME" }, { - "objectID": "posts/2022-03-06-probabilistic-machine-learning.html", - "href": "posts/2022-03-06-probabilistic-machine-learning.html", - "title": "Probabilistic Machine Learning", + "objectID": "posts/2021-10-12-sparsegps.html", + "href": "posts/2021-10-12-sparsegps.html", + "title": "SparseGPs in Stheno", "section": "", - "text": "An inference problem requires statements about the value of an unobserved (latent) variable x based on observations y which are related to x, but may not be sufficient to fully determine x. This requires a notion of uncertainty.\n\nWe can define the following rules because \\(p(E) = 1\\) for any event \\(E\\).\n\nSum rule: \\(p(E) = p(E|A) + p(E|\\neg A)\\)\n\nProduct rule: \\(p(E, A) = p(E|A)p(A) = p(A|E)p(E)\\)\n\nBayes’ theorem: \\(p(E|A) = \\frac{p(A|E)p(E)}{p(A)}\\)" + "text": "# !pip install -U regdata\n\n\nimport regdata as rd\nimport torch\nimport matplotlib.pyplot as plt\nfrom matplotlib.animation import FuncAnimation\nfrom matplotlib import rc\nimport wbml.out as out\nfrom wbml.plot import tweak\n\nfrom stheno import B, GP, EQ, PseudoObsVFE, PseudoObsFITC\nfrom varz.torch import Vars, minimise_l_bfgs_b, parametrised, Positive\nimport lab.torch" }, { - "objectID": "posts/2022-03-06-probabilistic-machine-learning.html#introduction", - "href": "posts/2022-03-06-probabilistic-machine-learning.html#introduction", - "title": "Probabilistic Machine Learning", + "objectID": "posts/2021-10-12-sparsegps.html#imports", + "href": "posts/2021-10-12-sparsegps.html#imports", + "title": "SparseGPs in Stheno", "section": "", - "text": "An inference problem requires statements about the value of an unobserved (latent) variable x based on observations y which are related to x, but may not be sufficient to fully determine x. This requires a notion of uncertainty.\n\nWe can define the following rules because \\(p(E) = 1\\) for any event \\(E\\).\n\nSum rule: \\(p(E) = p(E|A) + p(E|\\neg A)\\)\n\nProduct rule: \\(p(E, A) = p(E|A)p(A) = p(A|E)p(E)\\)\n\nBayes’ theorem: \\(p(E|A) = \\frac{p(A|E)p(E)}{p(A)}\\)" + "text": "# !pip install -U regdata\n\n\nimport regdata as rd\nimport torch\nimport matplotlib.pyplot as plt\nfrom matplotlib.animation import FuncAnimation\nfrom matplotlib import rc\nimport wbml.out as out\nfrom wbml.plot import tweak\n\nfrom stheno import B, GP, EQ, PseudoObsVFE, PseudoObsFITC\nfrom varz.torch import Vars, minimise_l_bfgs_b, parametrised, Positive\nimport lab.torch" }, { - "objectID": "posts/2022-01-24-query_by_committee.html", - "href": "posts/2022-01-24-query_by_committee.html", - "title": "Query by Committee", - "section": "", - "text": "# Common imports\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib.animation import FuncAnimation\nfrom matplotlib import rc\n\nplt.style.use('fivethirtyeight')\nrc('animation', html='jshtml')\n\n# Copy the models\nfrom copy import deepcopy\n\n# Sklearn imports\nfrom sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\nfrom sklearn.datasets import make_classification\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import accuracy_score, f1_score\n\n# Entropy function\nfrom scipy.stats import entropy\n\n# Progress helper\nfrom IPython.display import clear_output" + "objectID": "posts/2021-10-12-sparsegps.html#data-preperation", + "href": "posts/2021-10-12-sparsegps.html#data-preperation", + "title": "SparseGPs in Stheno", + "section": "Data preperation", + "text": "Data preperation\n\n# Define points to predict at.\nx = B.linspace(0, 10, 100)\nx_obs = B.linspace(0, 7, 50_000)\nx_ind = B.linspace(0, 10, 20)\n\n# Construct a prior.\nf = GP(EQ().periodic(2 * B.pi))\n\n# Sample a true, underlying function and observations.\nf_true = B.sin(x)\ny_obs = B.sin(x_obs) + B.sqrt(0.5) * B.randn(*x_obs.shape)" }, { - "objectID": "posts/2022-01-24-query_by_committee.html#qbc-by-posterior-sampling", - "href": "posts/2022-01-24-query_by_committee.html#qbc-by-posterior-sampling", - "title": "Query by Committee", - "section": "QBC by posterior sampling", - "text": "QBC by posterior sampling\n\nInteresting fact: For probabilistic models, QBC is similar to uncertainty sampling. How?\n\nDraw \\(k\\) parameter sets from the posterior distribution representing \\(k\\) different models.\nQuery a point which shows maximum disagreement among the points." + "objectID": "posts/2021-10-12-sparsegps.html#plotting-function", + "href": "posts/2021-10-12-sparsegps.html#plotting-function", + "title": "SparseGPs in Stheno", + "section": "Plotting function", + "text": "Plotting function\n\ndef plot(method):\n if method == 'VFE':\n # Plot result.\n plt.plot(x, f_true, label=\"True\", style=\"test\")\n plt.scatter(\n x_obs,\n y_obs,\n label=\"Observations\",\n style=\"train\",\n c=\"tab:green\",\n alpha=0.35,\n )\n plt.scatter(\n x_ind,\n obs.mu(f.measure)[:, 0],\n label=\"Inducing Points\",\n style=\"train\",\n s=20,\n )\n plt.plot(x, mean, label=\"Prediction\", style=\"pred\")\n plt.fill_between(x, lower, upper, style=\"pred\")\n tweak()\n\n plt.show()\n else:\n # Plot result.\n plt.plot(x, f_true, label=\"True\", style=\"test\")\n plt.scatter(\n x_obs,\n y_obs,\n label=\"Observations\",\n style=\"train\",\n c=\"tab:green\",\n alpha=0.35,\n )\n plt.scatter(\n x_ind,\n B.dense(f_post(x_ind).mean),\n label=\"Inducing Points\",\n style=\"train\",\n s=20,\n )\n plt.plot(x, mean, label=\"Prediction\", style=\"pred\")\n plt.fill_between(x, lower, upper, style=\"pred\")\n tweak()\n\n plt.show()" }, { - "objectID": "posts/2022-01-24-query_by_committee.html#an-example-bayesian-linear-regression", - "href": "posts/2022-01-24-query_by_committee.html#an-example-bayesian-linear-regression", - "title": "Query by Committee", - "section": "An example: Bayesian linear regression", - "text": "An example: Bayesian linear regression\n\nnp.random.seed(0)\nN = 10\nX = np.linspace(-1,1,N).reshape(-1,1)\n\nt0 = 3\nt1 = 2\n\ny = X * t1 + t0 + np.random.rand(N,1)\n\nplt.scatter(X, y);\n\n\n\n\n\nAssume a posterior\n\nn_samples = 50\n\nt0_dist_samples = np.random.normal(t0, 0.1, size=n_samples)\nt1_dist_samples = np.random.normal(t1, 1, size=n_samples)\n\n\n\nPlot the models\n\nplt.scatter(X, y)\n\nfor i in range(len(t0_dist_samples)):\n sample_t0 = t0_dist_samples[i]\n sample_t1 = t1_dist_samples[i]\n \n plt.plot(X, X * sample_t1 + sample_t0,alpha=0.1)" + "objectID": "posts/2021-10-12-sparsegps.html#sparse-regression-with-variational-free-energy-vfe-method", + "href": "posts/2021-10-12-sparsegps.html#sparse-regression-with-variational-free-energy-vfe-method", + "title": "SparseGPs in Stheno", + "section": "Sparse regression with Variational Free Energy (VFE) method", + "text": "Sparse regression with Variational Free Energy (VFE) method\n\n# Compute a pseudo-point approximation of the posterior.\nobs = PseudoObsVFE(f(x_ind), (f(x_obs, 0.5), y_obs))\n\n# Compute the ELBO.\nout.kv(\"ELBO\", obs.elbo(f.measure))\n\n# Compute the approximate posterior.\nf_post = f | obs\n\n# Make predictions with the approximate posterior.\nmean, lower, upper = f_post(x, 0.5).marginal_credible_bounds()\nplot('VFE')\n\nELBO: -5.345e+04" }, { - "objectID": "posts/2022-01-24-query_by_committee.html#qbc-by-bootstrapping", - "href": "posts/2022-01-24-query_by_committee.html#qbc-by-bootstrapping", - "title": "Query by Committee", - "section": "QBC by bootstrapping", - "text": "QBC by bootstrapping\n\n2 class dataset\n\nX, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, random_state=3, shuffle=True)\n\nplt.figure()\nplt.scatter(X[:,0], X[:,1], c=y);\n\n\n\n\n\n\nFull data fit with RF\n\nmodel = RandomForestClassifier(random_state=0)\nmodel.fit(X, y);\n\nRandomForestClassifier(random_state=0)\n\n\n\n\nVisualize decision boundary\n\ngrid_X1, grid_X2 = np.meshgrid(np.linspace(X[:,0].min()-0.1, X[:,0].max()+0.1, 100), \n np.linspace(X[:,1].min()-0.1, X[:,1].max()+0.1, 100))\n\ngrid_X = [(x1, x2) for x1, x2 in zip(grid_X1.ravel(), grid_X2.ravel())]\n\ngrid_pred = model.predict(grid_X)\n\nplt.figure(figsize=(6,5))\nplt.scatter(X[:,0], X[:,1], c=y);\nplt.contourf(grid_X1, grid_X2, grid_pred.reshape(*grid_X1.shape), alpha=0.2);\n\n\n\n\n\n\nTrain, pool, test split\n\nX_train_pool, X_test, y_train_pool, y_test = train_test_split(X, y, test_size=0.2, random_state=0, stratify=y)\nX_train, X_pool, y_train, y_pool = train_test_split(X_train_pool, y_train_pool, train_size=20, random_state=0)\n\nX_list = [X_train, X_pool, X_test]\ny_list = [y_train, y_pool, y_test]\nt_list = ['Train', 'Pool', 'Test']\n\nfig, ax = plt.subplots(1,3,figsize=(15,4), sharex=True, sharey=True)\nfor i in range(3):\n ax[i].scatter(X_list[i][:,0], X_list[i][:,1], c=y_list[i])\n ax[i].set_title(t_list[i])\n \n\n\n\n\n\n\nFitting a model on initial train data\n\nAL_model = RandomForestClassifier(n_jobs=28, random_state=0)\n\nAL_model.fit(X_train, y_train);\n\nRandomForestClassifier(n_jobs=28, random_state=0)\n\n\n\n\nGet the votes from trees on pool dataset\n\nvotes = np.zeros(shape=(X_pool.shape[0], len(AL_model.estimators_)))\n\nfor learner_idx, learner in enumerate(AL_model.estimators_):\n votes[:, learner_idx] = learner.predict(X_pool)\n\n\nvotes.shape\n\n(780, 100)\n\n\n\nvotes\n\narray([[0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.],\n [1., 1., 1., ..., 0., 1., 1.],\n ...,\n [0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.],\n [0., 0., 0., ..., 0., 0., 0.]])\n\n\n\n\nConvert to probabilities\n\np_vote = np.zeros(shape=(X_pool.shape[0], X_pool.shape[1]))\n\nfor vote_idx, vote in enumerate(votes):\n vote_counter = {0 : (1-vote).sum(), 1 : vote.sum()}\n\n for class_idx, class_label in enumerate(range(X.shape[1])):\n p_vote[vote_idx, class_idx] = vote_counter[class_label]/len(AL_model.estimators_)\n\n\np_vote\n\narray([[1. , 0. ],\n [0.89, 0.11],\n [0.06, 0.94],\n ...,\n [0.93, 0.07],\n [1. , 0. ],\n [1. , 0. ]])\n\n\n\n\nCalculate dissimilarity (entropy)\n\nexample_id = 2\n\n\nans = 0\nfor category in range(X_pool.shape[1]):\n ans += (-p_vote[example_id][category] * np.log(p_vote[example_id][category]))\n\nans\n\n0.22696752250060448\n\n\n\nentr = entropy(p_vote, axis=1)\n\n\nentr[example_id]\n\n0.22696752250060448\n\n\n\n\nActive Learning Flow\n\ndef get_query_idx():\n # Gather the votes\n votes = np.zeros(shape=(X_pool.shape[0], len(AL_model.estimators_)))\n for learner_idx, learner in enumerate(AL_model.estimators_):\n votes[:, learner_idx] = learner.predict(X_pool)\n \n # Calcuate probability of votes\n p_vote = np.zeros(shape=(X_pool.shape[0], X_pool.shape[1]))\n for vote_idx, vote in enumerate(votes):\n vote_counter = {0 : (1-vote).sum(), \n 1 : vote.sum()}\n\n for class_idx, class_label in enumerate(range(X.shape[1])):\n p_vote[vote_idx, class_idx] = vote_counter[class_label]/len(AL_model.estimators_)\n \n # Calculate entropy for each example\n entr = entropy(p_vote, axis=1)\n \n # Choose example with highest entropy (disagreement)\n return entr.argmax()\n\n\n\nPrepare data for random sampling\n\nX_train_rand = X_train.copy()\ny_train_rand = y_train.copy()\nX_pool_rand = X_pool.copy()\ny_pool_rand = y_pool.copy()\n\nrandom_model = RandomForestClassifier(n_jobs=28, random_state=0)\n\n\n\nRun active learning\n\nAL_iters = 100\nnp.random.seed(0)\n\nAL_inds = []\nAL_models = []\nrandom_inds = []\nrandom_models = []\n\nfor iteration in range(AL_iters):\n clear_output(wait=True)\n print(\"iteration\", iteration)\n ######## Active Learning ############\n # Fit the model\n AL_model.fit(X_train, y_train)\n AL_models.append(deepcopy(AL_model))\n \n # Query a point\n query_idx = get_query_idx()\n AL_inds.append(query_idx)\n \n # Add it to the train data\n X_train = np.concatenate([X_train, X_pool[query_idx:query_idx+1, :]], axis=0)\n y_train = np.concatenate([y_train, y_pool[query_idx:query_idx+1]], axis=0)\n \n # Remove it from the pool data\n X_pool = np.delete(X_pool, query_idx, axis=0)\n y_pool = np.delete(y_pool, query_idx, axis=0)\n \n ######## Random Sampling ############\n # Fit the model\n random_model.fit(X_train_rand, y_train_rand)\n random_models.append(deepcopy(random_model))\n \n # Query a point\n query_idx = np.random.choice(len(X_pool))\n random_inds.append(query_idx)\n # Add it to the train data\n X_train_rand = np.concatenate([X_train_rand, X_pool_rand[query_idx:query_idx+1, :]], axis=0)\n y_train_rand = np.concatenate([y_train_rand, y_pool_rand[query_idx:query_idx+1]], axis=0)\n \n # Remove it from the pool data\n X_pool_rand = np.delete(X_pool_rand, query_idx, axis=0)\n y_pool_rand = np.delete(y_pool_rand, query_idx, axis=0)\n\niteration 99\n\n\n\n\nPlot accuracy\n\nrandom_scores = []\nAL_scores = []\nfor iteration in range(AL_iters):\n clear_output(wait=True)\n print(\"iteration\", iteration)\n AL_scores.append(accuracy_score(y_test, AL_models[iteration].predict(X_test)))\n random_scores.append(accuracy_score(y_test, random_models[iteration].predict(X_test)))\n \nplt.plot(AL_scores, label='Active Learning');\nplt.plot(random_scores, label='Random Sampling');\nplt.legend();\nplt.xlabel('Iterations');\nplt.ylabel('Accuracy\\n(Higher is better)');\n\niteration 99\n\n\n\n\n\n\n\nPlot decision boundary\n\ndef update(i):\n for each in ax:\n each.cla()\n \n AL_grid_preds = AL_models[i].predict(grid_X)\n random_grid_preds = random_models[i].predict(grid_X)\n \n # Active learning\n ax[0].scatter(X_train[:n_train,0], X_train[:n_train,1], c=y_train[:n_train], label='initial_train', alpha=0.2)\n ax[0].scatter(X_train[n_train:n_train+i, 0], X_train[n_train:n_train+i, 1], \n c=y_train[n_train:n_train+i], label='new_points')\n ax[0].contourf(grid_X1, grid_X2, AL_grid_preds.reshape(*grid_X1.shape), alpha=0.2);\n ax[0].set_title('New points')\n \n ax[1].scatter(X_test[:, 0], X_test[:, 1], c=y_test, label='test_set')\n ax[1].contourf(grid_X1, grid_X2, AL_grid_preds.reshape(*grid_X1.shape), alpha=0.2);\n ax[1].set_title('Test points');\n ax[0].text(locs[0],locs[1],'Active Learning')\n \n # Random sampling\n ax[2].scatter(X_train_rand[:n_train,0], X_train_rand[:n_train,1], c=y_train_rand[:n_train], label='initial_train', alpha=0.2)\n ax[2].scatter(X_train_rand[n_train:n_train+i, 0], X_train_rand[n_train:n_train+i, 1], \n c=y_train_rand[n_train:n_train+i], label='new_points')\n ax[2].contourf(grid_X1, grid_X2, random_grid_preds.reshape(*grid_X1.shape), alpha=0.2);\n ax[2].set_title('New points')\n \n ax[3].scatter(X_test[:, 0], X_test[:, 1], c=y_test, label='test_set')\n ax[3].contourf(grid_X1, grid_X2, random_grid_preds.reshape(*grid_X1.shape), alpha=0.2);\n ax[3].set_title('Test points');\n ax[2].text(locs[0],locs[1],'Random Sampling');\n\n\nlocs = (2.7, 4)\nfig, ax = plt.subplots(2,2,figsize=(12,6), sharex=True, sharey=True)\nax = ax.ravel()\nn_train = X_train.shape[0]-AL_iters\n\nanim = FuncAnimation(fig, func=update, frames=range(100))\nplt.close()\nanim\n\n\n\n\n\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n Once\n \n Loop\n \n Reflect" + "objectID": "posts/2021-10-12-sparsegps.html#sparse-regression-with-fully-independent-training-conditional-fitc-mehod", + "href": "posts/2021-10-12-sparsegps.html#sparse-regression-with-fully-independent-training-conditional-fitc-mehod", + "title": "SparseGPs in Stheno", + "section": "Sparse Regression with Fully Independent Training Conditional (FITC) mehod", + "text": "Sparse Regression with Fully Independent Training Conditional (FITC) mehod\n\n# Compute a pseudo-point approximation of the posterior.\nobs = PseudoObsFITC(f(x_ind), (f(x_obs, 0.5), y_obs))\n\n# Compute the ELBO.\nout.kv(\"ELBO\", obs.elbo(f.measure))\n\n# Compute the approximate posterior.\nf_post = f | obs\n\n# Make predictions with the approximate posterior.\nmean, lower, upper = f_post(x, 0.5).marginal_credible_bounds()\nplot('FITC')\n\nELBO: -5.345e+04" }, { - "objectID": "posts/2023-07-01-climate-modeling-with-SpecialGP.html", - "href": "posts/2023-07-01-climate-modeling-with-SpecialGP.html", - "title": "Climate Modeling with GPs", - "section": "", - "text": "import os\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n\nimport pyproj\nimport numpy as np\nimport xarray as xr\n\nfrom skgpytorch.models import GPRegression\n\nimport matplotlib.pyplot as plt\n\n\n# def haversine(lon1, lat1, lon2, lat2):\n# \"\"\"\n# Calculate the great circle distance in kilometers between two points \n# on the earth (specified in decimal degrees)\n# \"\"\"\n# # convert decimal degrees to radians \n# lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])\n\n# # haversine formula \n# dlon = lon2 - lon1 \n# dlat = lat2 - lat1 \n# a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2\n# c = 2 * np.arcsin(np.sqrt(a)) \n# r = 6371 # Radius of earth in kilometers. Use 3956 for miles. Determines return value units.\n# return c * r\n\n# def new_coords(lat1, long1):\n# new_lat1 = haversine(0, 0, 0, lat1)\n# new_long1 = haversine(0, 0, long1, 0)\n# return new_lat1, new_long1\n\ndef lat_long_to_cartesian(latitude, longitude):\n # Convert latitude and longitude to radians\n phi = np.radians(latitude)\n lam = np.radians(longitude)\n\n # Constants for WGS 84 ellipsoid\n a = 6378137.0 # equatorial radius in meters\n e = 0.0818191908426 # eccentricity\n\n # Calculate Earth's radius at the given latitude\n R = a / np.sqrt(1 - (e ** 2) * (np.sin(phi) ** 2))\n\n # Convert to Cartesian coordinates\n X = R * np.sin(lam)\n Y = R * np.tan(phi)\n\n return X, Y\n\ndef wgs84_coords(lat, lon): \n # Define coordinate systems\n wgs84 = pyproj.CRS.from_epsg(4326) # WGS 84 lat-long system\n utm_zone_32n = pyproj.CRS.from_string(\"+proj=utm +zone=32 +ellps=WGS84 +datum=WGS84 +units=m +no_defs\")\n\n # Create a transformer object\n transformer = pyproj.Transformer.from_crs(wgs84, utm_zone_32n)\n\n # Convert lat-long coordinates to UTM coordinates\n utm_easting, utm_northing = transformer.transform(lon, lat)\n\n return utm_northing, utm_easting\n\n# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n\n# This source code is licensed under the license found in the\n# LICENSE file in the root directory of this source tree.\n# --------------------------------------------------------\n# Position embedding utils\n# --------------------------------------------------------\n\n\n# --------------------------------------------------------\n# 2D sine-cosine position embedding\n# References:\n# Transformer: https://github.com/tensorflow/models/blob/master/official/nlp/transformer/model_utils.py\n# MoCo v3: https://github.com/facebookresearch/moco-v3\n# --------------------------------------------------------\ndef get_2d_sincos_pos_embed(embed_dim, grid_size_h, grid_size_w, cls_token=False):\n \"\"\"\n grid_size: int of the grid height and width\n return:\n pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)\n \"\"\"\n grid_h = np.arange(grid_size_h, dtype=np.float32)\n grid_w = np.arange(grid_size_w, dtype=np.float32)\n grid = np.meshgrid(grid_w, grid_h) # here w goes first\n grid = np.stack(grid, axis=0)\n\n grid = grid.reshape([2, 1, grid_size_h, grid_size_w])\n pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)\n if cls_token:\n pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0)\n return pos_embed\n\n\ndef get_2d_sincos_pos_embed_from_grid(embed_dim, grid):\n assert embed_dim % 2 == 0\n\n # use half of dimensions to encode grid_h\n emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0]) # (H*W, D/2)\n emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1]) # (H*W, D/2)\n\n emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D)\n return emb\n\n\ndef get_1d_sincos_pos_embed_from_grid(embed_dim, pos):\n \"\"\"\n embed_dim: output dimension for each position\n pos: a list of positions to be encoded: size (M,)\n out: (M, D)\n \"\"\"\n assert embed_dim % 2 == 0\n omega = np.arange(embed_dim // 2, dtype=np.float)\n omega /= embed_dim / 2.0\n omega = 1.0 / 10000**omega # (D/2,)\n\n pos = pos.reshape(-1) # (M,)\n out = np.einsum(\"m,d->md\", pos, omega) # (M, D/2), outer product\n\n emb_sin = np.sin(out) # (M, D/2)\n emb_cos = np.cos(out) # (M, D/2)\n\n emb = np.concatenate([emb_sin, emb_cos], axis=1) # (M, D)\n return emb\n\n\n# --------------------------------------------------------\n# Interpolate position embeddings for high-resolution\n# References:\n# DeiT: https://github.com/facebookresearch/deit\n# --------------------------------------------------------\ndef interpolate_pos_embed(model, checkpoint_model, new_size=(64, 128)):\n if \"net.pos_embed\" in checkpoint_model:\n pos_embed_checkpoint = checkpoint_model[\"net.pos_embed\"]\n embedding_size = pos_embed_checkpoint.shape[-1]\n orig_num_patches = pos_embed_checkpoint.shape[-2]\n patch_size = model.patch_size\n w_h_ratio = 2\n orig_h = int((orig_num_patches // w_h_ratio) ** 0.5)\n orig_w = w_h_ratio * orig_h\n orig_size = (orig_h, orig_w)\n new_size = (new_size[0] // patch_size, new_size[1] // patch_size)\n # print (orig_size)\n # print (new_size)\n if orig_size[0] != new_size[0]:\n print(\"Interpolate PEs from %dx%d to %dx%d\" % (orig_size[0], orig_size[1], new_size[0], new_size[1]))\n pos_tokens = pos_embed_checkpoint.reshape(-1, orig_size[0], orig_size[1], embedding_size).permute(\n 0, 3, 1, 2\n )\n new_pos_tokens = torch.nn.functional.interpolate(\n pos_tokens, size=(new_size[0], new_size[1]), mode=\"bicubic\", align_corners=False\n )\n new_pos_tokens = new_pos_tokens.permute(0, 2, 3, 1).flatten(1, 2)\n checkpoint_model[\"net.pos_embed\"] = new_pos_tokens\n\n\ndef interpolate_channel_embed(checkpoint_model, new_len):\n if \"net.channel_embed\" in checkpoint_model:\n channel_embed_checkpoint = checkpoint_model[\"net.channel_embed\"]\n old_len = channel_embed_checkpoint.shape[1]\n if new_len <= old_len:\n checkpoint_model[\"net.channel_embed\"] = channel_embed_checkpoint[:, :new_len]\n\n\ndef SIREN(input_dim, output_dim, features, activation_scale, dropout):\n model = tf.keras.Sequential()\n model.add(layers.Dense(features[0], input_shape=(input_dim,), kernel_initializer=initializers.RandomUniform(-1 / input_dim, 1 / input_dim), activation=tf.sin))\n for i in range(1, len(features)):\n model.add(layers.Dense(features[i], kernel_initializer=initializers.RandomUniform(-np.sqrt(6 / features[i-1]) / activation_scale, np.sqrt(6 / features[i-1]) / activation_scale), activation=tf.sin))\n model.add(layers.Dropout(dropout))\n model.add(layers.Dense(output_dim, kernel_initializer=initializers.RandomUniform(-np.sqrt(6 / features[-1]) / activation_scale, np.sqrt(6 / features[-1]) / activation_scale), activation='linear'))\n return model\n\ndef MLP(input_dim, output_dim, features, activation_scale, dropout):\n model = tf.keras.Sequential()\n model.add(layers.Dense(features[0], input_shape=(input_dim,), activation=activations.relu))\n for i in range(1, len(features)):\n model.add(layers.Dense(features[i], activation=activations.relu))\n model.add(layers.Dropout(dropout))\n model.add(layers.Dense(output_dim, activation='linear'))\n return model\n \ndef ResNet():\n resnet = ResNet50(include_top=False, weights=None, input_shape=(64, 32, 1), pooling='avg')\n model = tf.keras.Sequential()\n model.add(resnet)\n model.add(layers.Dense(2048, activation='relu'))\n model.add(layers.Dense(32768, activation='linear'))\n return model\n\n\ndata5 = xr.open_dataset(\"../data/2m_temperature_2018_5.625deg_Jan.nc\").to_dataframe().reset_index()\ndata1 = xr.open_dataset(\"../data/2m_temperature_2018_1.40625deg_Jan.nc\").to_dataframe().reset_index()\n\n\ndata5.head()\n\n\n\n\n\n\n\n\nlon\nlat\ntime\nt2m\n\n\n\n\n0\n0.0\n-87.1875\n2018-01-01 00:00:00\n250.728180\n\n\n1\n0.0\n-87.1875\n2018-01-01 01:00:00\n250.468552\n\n\n2\n0.0\n-87.1875\n2018-01-01 02:00:00\n250.250931\n\n\n3\n0.0\n-87.1875\n2018-01-01 03:00:00\n250.040314\n\n\n4\n0.0\n-87.1875\n2018-01-01 04:00:00\n249.993790\n\n\n\n\n\n\n\n\ntime_stamp = \"2018-01-01 01:00:00\"\ntrain_df = data5[data5.time == time_stamp]\ntest_df = data1[data1.time == time_stamp]\n\nX = np.stack([train_df.lat.values, train_df.lon.values], axis=1)\ny = train_df[[\"t2m\"]].values\nprint(f\"{X.shape=}, {y.shape=}\")\n\nX_test = np.stack([test_df.lat.values, test_df.lon.values], axis=1)\ny_test = test_df[[\"t2m\"]].values\nprint(f\"{X_test.shape=}, {y_test.shape=}\")\n\nrff = np.random.normal(size=(2, 16)) * 0.01\n# X = np.concatenate([np.sin(X @ rff), np.cos(X @ rff)], axis=1)\n# print(f\"{sin_cos.shape=}\")\n# X = X @ sin_cos\n# X_test = np.concatenate([np.sin(X_test @ rff), np.cos(X_test @ rff)], axis=1)\n\nprint(f\"{X.shape=}, {X_test.shape=}\")\n\nX.shape=(2048, 2), y.shape=(2048, 1)\nX_test.shape=(32768, 2), y_test.shape=(32768, 1)\nX.shape=(2048, 2), X_test.shape=(32768, 2)\n\n\n\nX_max = np.max(X, axis=0, keepdims=True)\nX_min = np.min(X, axis=0, keepdims=True)\n\nX_scaled = (X - X_min) / (X_max - X_min)\nX_test_scaled = (X_test - X_min) / (X_max - X_min)\n\ny_min = np.min(y, axis=0, keepdims=True)\ny_max = np.max(y, axis=0, keepdims=True)\n\ny_scaled = (y - y_min) / (y_max - y_min)\n\n# y_mean = np.mean(y, axis=0, keepdims=True)\n# y_std = np.std(y, axis=0, keepdims=True)\n\n# y_scaled = (y - y_mean) / y_std\n\n\nmodel = MLP(2, 1, [256]*4, 30.0, 0.0)\n# model = ResNet()\nmodel.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), loss='mse')\n\n\nhistory = model.fit(X_scaled, y_scaled, epochs=5000, batch_size=X_scaled.shape[0], verbose=0)\n\n\nplt.plot(history.history['loss']);\n\n\n\n\n\ny_pred = model.predict(X_test_scaled) * (y_max - y_min) + y_min\nplt.imshow(y_pred.reshape(256, 128), origin='lower', extent=[-180, 180, -90, 90], cmap='coolwarm', interpolation=\"none\");\n\n1024/1024 [==============================] - 1s 1ms/step\n\n\n\n\n\n\nplt.imshow(y.reshape(64, 32), origin='lower', extent=[-180, 180, -90, 90], cmap='coolwarm', interpolation=\"none\");\n\n\n\n\n\ndiff = y_pred.reshape(256, 128) - y_test.reshape(256, 128)\nplt.imshow(diff, origin='lower', extent=[-180, 180, -90, 90], cmap='coolwarm', interpolation=\"none\");\nplt.colorbar();\nplt.title(\"Diff\")\n\nText(0.5, 1.0, 'Diff')\n\n\n\n\n\n\n# rmse = np.sqrt(np.mean(np.abs(X_test[:, 0:1])*(y_pred.ravel() - y_test.ravel())**2))/np.mean(y_test.ravel() * np.abs(X_test[:, 0:1]))\nrmse = np.sqrt(np.mean((y_pred.ravel() - y_test.ravel())**2))\nprint(f\"{rmse=}\")\n\nrmse=2.7606046\n\n\n\nmean_bias = np.mean(y_pred.ravel() - y_test.ravel())\nprint(f\"{mean_bias=}\")\n\nmean_bias=0.10866926" + "objectID": "posts/2021-10-12-sparsegps.html#hyperparameter-tuning-noisy-sine-data", + "href": "posts/2021-10-12-sparsegps.html#hyperparameter-tuning-noisy-sine-data", + "title": "SparseGPs in Stheno", + "section": "Hyperparameter tuning (Noisy Sine data)", + "text": "Hyperparameter tuning (Noisy Sine data)\n\ndef model(vs):\n \"\"\"Constuct a model with learnable parameters.\"\"\"\n return vs['variance']*GP(EQ().stretch(vs['length_scale']))\n\n\ntorch.manual_seed(123)\n\ndataObj = rd.SineNoisy(scale_X=False, scale_y=False, return_test=True, backend='torch')\nx_obs, y_obs, x = dataObj.get_data()\n\n\nplt.scatter(x_obs, y_obs, s=2);\n\n\n\n\n\nVFE\n\nvs = Vars(torch.float64)\nvs.positive(name=\"noise\")\nvs.positive(name=\"length_scale\");\nvs.positive(name=\"variance\");\nvs.positive(init=torch.linspace(0.4,0.6,10), shape=(10,), name='x_ind')\nvs.requires_grad(True)\n\noptimizer = torch.optim.Adam(vs.get_latent_vars(), lr=0.1)\nfig, ax = plt.subplots(1,2,figsize=(15,5))\nlosses = []\n\ndef update(i):\n optimizer.zero_grad()\n gp = model(vs)\n obs = PseudoObsVFE(gp(vs['x_ind']), (gp(x_obs, vs['noise']), y_obs))\n loss = -obs.elbo(gp.measure)\n losses.append(loss.item())\n loss.backward()\n optimizer.step()\n \n gp_post = gp | obs\n mean, lower, upper = gp_post(x, vs['noise']).marginal_credible_bounds()\n ind_mean = B.dense(gp_post(vs['x_ind']).mean)\n \n ax[0].cla();ax[1].cla();\n ax[0].scatter(x_obs, y_obs, s=2)\n with torch.no_grad():\n ax[0].plot()\n ax[0].plot(x, B.dense(mean), label='Prediction')\n ax[0].fill_between(x.ravel(), lower, upper, alpha=0.2, label='Uncertainty')\n ax[0].plot(x, dataObj.f(x), label='True')\n ax[0].scatter(vs['x_ind'], ind_mean, label='Inducing points')\n ax[0].set_xlabel('X')\n ax[0].legend()\n \n ax[1].plot(losses, label='loss')\n ax[1].set_xlabel('Iterations')\n ax[1].legend()\n \nanim = FuncAnimation(fig, update, range(50))\nrc('animation', html='jshtml')\nplt.close()\nanim\n\n\n\n\n\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n Once\n \n Loop\n \n Reflect\n \n \n\n\n\n\n\n\n\n\nFITC\n\nvs = Vars(torch.float64)\nvs.positive(name=\"noise\")\nvs.positive(name=\"length_scale\");\nvs.positive(name=\"variance\");\nvs.positive(init=torch.linspace(0.4,0.6,10), shape=(10,), name='x_ind')\nvs.requires_grad(True)\n\noptimizer = torch.optim.Adam(vs.get_latent_vars(), lr=0.1)\nfig, ax = plt.subplots(1,2,figsize=(15,5))\nlosses = []\n\ndef update(i):\n optimizer.zero_grad()\n gp = model(vs)\n obs = PseudoObsFITC(gp(vs['x_ind']), (gp(x_obs, vs['noise']), y_obs))\n loss = -obs.elbo(gp.measure)\n losses.append(loss.item())\n loss.backward()\n optimizer.step()\n \n gp_post = gp | obs\n mean, lower, upper = gp_post(x, vs['noise']).marginal_credible_bounds()\n ind_mean = B.dense(gp_post(vs['x_ind']).mean)\n \n ax[0].cla();ax[1].cla();\n ax[0].scatter(x_obs, y_obs, s=2)\n with torch.no_grad():\n ax[0].plot()\n ax[0].plot(x, B.dense(mean), label='Prediction')\n ax[0].fill_between(x.ravel(), lower, upper, alpha=0.2, label='Uncertainty')\n ax[0].plot(x, dataObj.f(x), label='True')\n ax[0].scatter(vs['x_ind'], ind_mean, label='Inducing points')\n ax[0].set_xlabel('X')\n ax[0].legend()\n \n ax[1].plot(losses, label='loss')\n ax[1].set_xlabel('Iterations')\n ax[1].legend()\n \nanim = FuncAnimation(fig, update, range(50))\nrc('animation', html='jshtml')\nplt.close()\nanim\n\n\n\n\n\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n Once\n \n Loop\n \n Reflect" }, { "objectID": "posts/2023-07-26-PurpleAir.html", diff --git a/sitemap.xml b/sitemap.xml index 086e13b..f2d2973 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,174 +2,178 @@ https://github.com/patel-zeel/blog/posts/2022-04-06-github_faqs.html - 2023-11-26T15:02:56.002Z + 2023-11-28T09:41:01.044Z https://github.com/patel-zeel/blog/posts/2021-03-22-gp_kernels.html - 2023-11-26T15:02:55.206Z + 2023-11-28T09:41:00.260Z https://github.com/patel-zeel/blog/posts/2022-04-09-gcloud.html - 2023-11-26T15:02:51.986Z + 2023-11-28T09:40:57.056Z https://github.com/patel-zeel/blog/posts/2023-06-23-Basis_functions.html - 2023-11-26T15:02:51.446Z + 2023-11-28T09:40:56.472Z https://github.com/patel-zeel/blog/posts/2022-05-14-iteratively_reweighted_least_squares.html - 2023-11-26T15:02:50.910Z + 2023-11-28T09:40:55.936Z https://github.com/patel-zeel/blog/posts/2022-01-15-py_over_ipynb.html - 2023-11-26T15:02:50.058Z + 2023-11-28T09:40:55.088Z https://github.com/patel-zeel/blog/posts/2023-03-28-nngp.html - 2023-11-26T15:02:49.358Z + 2023-11-28T09:40:54.372Z https://github.com/patel-zeel/blog/posts/2023-08-31-bayesian-gaussian-basis-regression.html - 2023-11-26T15:02:48.806Z + 2023-11-28T09:40:53.800Z https://github.com/patel-zeel/blog/posts/2022-06-10-jaxoptimizers.html - 2023-11-26T15:02:48.190Z + 2023-11-28T09:40:53.176Z https://github.com/patel-zeel/blog/posts/2021-10-26-anonymization-tips.html - 2023-11-26T15:02:47.530Z + 2023-11-28T09:40:52.508Z https://github.com/patel-zeel/blog/posts/2022-01-25-gp_frameworks_comparison.html - 2023-11-26T15:02:46.726Z + 2023-11-28T09:40:51.704Z https://github.com/patel-zeel/blog/posts/2022-08-01-conditional_neural_processes.html - 2023-11-26T15:02:46.006Z + 2023-11-28T09:40:50.984Z - https://github.com/patel-zeel/blog/posts/2021-10-12-sparsegps.html - 2023-11-26T15:02:45.082Z + https://github.com/patel-zeel/blog/posts/2023-11-28-learnings_from_brick_kiln_project.html + 2023-11-28T09:40:50.080Z - https://github.com/patel-zeel/blog/posts/2023-05-14-ssh-macos.html - 2023-11-26T15:02:42.898Z + https://github.com/patel-zeel/blog/posts/2023-07-01-climate-modeling-with-SpecialGP.html + 2023-11-28T09:40:48.048Z - https://github.com/patel-zeel/blog/posts/2022-01-20-kl-divergence.html - 2023-11-26T15:02:39.995Z + https://github.com/patel-zeel/blog/posts/2022-01-24-query_by_committee.html + 2023-11-28T09:40:47.376Z - https://github.com/patel-zeel/blog/posts/2023-07-03-Brick_Kilns_identification.html - 2023-11-26T15:02:39.479Z + https://github.com/patel-zeel/blog/posts/2022-03-06-probabilistic-machine-learning.html + 2023-11-28T09:40:44.500Z - https://github.com/patel-zeel/blog/posts/2022-05-17-contributors_sorted_by_prs.html - 2023-11-26T15:02:38.815Z + https://github.com/patel-zeel/blog/posts/2023-11-26-Torch-DataLoaders.html + 2023-11-28T09:40:44.040Z - https://github.com/patel-zeel/blog/posts/2023-06-23-GNNs_and_GPs.html - 2023-11-26T15:02:38.139Z + https://github.com/patel-zeel/blog/posts/2022-01-29-presentation_tips.html + 2023-11-28T09:40:43.140Z - https://github.com/patel-zeel/blog/posts/2022-10-31-stochastic-variational-gp.html - 2023-11-26T15:02:37.475Z + https://github.com/patel-zeel/blog/posts/2022-03-08-torch-essentials.html + 2023-11-28T09:40:42.600Z - https://github.com/patel-zeel/blog/posts/2023-04-29-sine-combination-netowrks.html - 2023-11-26T15:02:36.651Z + https://github.com/patel-zeel/blog/posts/2023-05-31-CNPs_for_Images.html + 2023-11-28T09:40:41.800Z - https://github.com/patel-zeel/blog/posts/2023-07-01-climate-modeling-with-siren.html - 2023-11-26T15:02:36.023Z + https://github.com/patel-zeel/blog/posts/2023-06-12-GNN_for_regression.html + 2023-11-28T09:40:41.176Z + + + https://github.com/patel-zeel/blog/about.html + 2023-11-28T09:40:40.444Z https://github.com/patel-zeel/blog/index.html - 2023-11-26T15:02:35.147Z + 2023-11-28T09:40:39.928Z - https://github.com/patel-zeel/blog/about.html - 2023-11-26T15:02:35.647Z + https://github.com/patel-zeel/blog/posts/2023-07-01-climate-modeling-with-siren.html + 2023-11-28T09:40:40.824Z - https://github.com/patel-zeel/blog/posts/2023-06-12-GNN_for_regression.html - 2023-11-26T15:02:36.367Z + https://github.com/patel-zeel/blog/posts/2023-04-29-sine-combination-netowrks.html + 2023-11-28T09:40:41.460Z - https://github.com/patel-zeel/blog/posts/2023-05-31-CNPs_for_Images.html - 2023-11-26T15:02:36.959Z + https://github.com/patel-zeel/blog/posts/2022-10-31-stochastic-variational-gp.html + 2023-11-28T09:40:42.272Z - https://github.com/patel-zeel/blog/posts/2022-03-08-torch-essentials.html - 2023-11-26T15:02:37.819Z + https://github.com/patel-zeel/blog/posts/2023-06-23-GNNs_and_GPs.html + 2023-11-28T09:40:42.888Z - https://github.com/patel-zeel/blog/posts/2022-01-29-presentation_tips.html - 2023-11-26T15:02:38.391Z + https://github.com/patel-zeel/blog/posts/2022-05-17-contributors_sorted_by_prs.html + 2023-11-28T09:40:43.604Z - https://github.com/patel-zeel/blog/posts/2023-11-26-Torch-DataLoaders.html - 2023-11-26T15:02:39.247Z + https://github.com/patel-zeel/blog/posts/2023-07-03-Brick_Kilns_identification.html + 2023-11-28T09:40:44.260Z - https://github.com/patel-zeel/blog/posts/2022-03-06-probabilistic-machine-learning.html - 2023-11-26T15:02:39.719Z + https://github.com/patel-zeel/blog/posts/2022-01-20-kl-divergence.html + 2023-11-28T09:40:44.756Z - https://github.com/patel-zeel/blog/posts/2022-01-24-query_by_committee.html - 2023-11-26T15:02:42.622Z + https://github.com/patel-zeel/blog/posts/2023-05-14-ssh-macos.html + 2023-11-28T09:40:47.648Z - https://github.com/patel-zeel/blog/posts/2023-07-01-climate-modeling-with-SpecialGP.html - 2023-11-26T15:02:43.294Z + https://github.com/patel-zeel/blog/posts/2021-10-12-sparsegps.html + 2023-11-28T09:40:49.796Z https://github.com/patel-zeel/blog/posts/2023-07-26-PurpleAir.html - 2023-11-26T15:02:45.714Z + 2023-11-28T09:40:50.692Z https://github.com/patel-zeel/blog/posts/2022-03-05-uncertainty-in-deep-learning.html - 2023-11-26T15:02:46.274Z + 2023-11-28T09:40:51.252Z https://github.com/patel-zeel/blog/posts/2023-07-05-Multiclass_GP_classification.html - 2023-11-26T15:02:47.098Z + 2023-11-28T09:40:52.076Z https://github.com/patel-zeel/blog/posts/2022-10-21-gaussian-processes.html - 2023-11-26T15:02:47.898Z + 2023-11-28T09:40:52.884Z https://github.com/patel-zeel/blog/posts/2022-02-25-torch-tips.html - 2023-11-26T15:02:48.466Z + 2023-11-28T09:40:53.460Z https://github.com/patel-zeel/blog/posts/2021-09-27-constraints.html - 2023-11-26T15:02:49.062Z + 2023-11-28T09:40:54.064Z https://github.com/patel-zeel/blog/posts/2022-10-18-kfac-laplace.html - 2023-11-26T15:02:49.790Z + 2023-11-28T09:40:54.804Z https://github.com/patel-zeel/blog/posts/2022-10-27-mogp.html - 2023-11-26T15:02:50.370Z + 2023-11-28T09:40:55.412Z https://github.com/patel-zeel/blog/posts/2021-09-28-docker_cheatsheet.html - 2023-11-26T15:02:51.210Z + 2023-11-28T09:40:56.236Z https://github.com/patel-zeel/blog/posts/2020-09-21-programatically_download_openaq_data.html - 2023-11-26T15:02:51.690Z + 2023-11-28T09:40:56.732Z https://github.com/patel-zeel/blog/posts/2020-03-28-active_learning_with_bayesian_linear_regression.html - 2023-11-26T15:02:53.654Z + 2023-11-28T09:40:58.712Z https://github.com/patel-zeel/blog/posts/2021-10-23-warped-gp.html - 2023-11-26T15:02:55.562Z + 2023-11-28T09:41:00.616Z