diff --git a/.tether/vignettes-src/distribution.Rmd b/.tether/vignettes-src/distribution.Rmd
index d1fb7deeb..28f46bfa7 100644
--- a/.tether/vignettes-src/distribution.Rmd
+++ b/.tether/vignettes-src/distribution.Rmd
@@ -188,9 +188,7 @@ layout_map["d1/bias"] = ("model",)
 # You can also set the layout for the layer output like
 layout_map["d2/output"] = ("data", None)
 
-model_parallel = keras.distribution.ModelParallel(
-    mesh_2d, layout_map, batch_dim_name="data"
-)
+model_parallel = keras.distribution.ModelParallel(layout_map, batch_dim_name="data")
 
 keras.distribution.set_distribution(model_parallel)
 
diff --git a/.tether/vignettes-src/parked/_custom_train_step_in_torch.Rmd b/.tether/vignettes-src/parked/_custom_train_step_in_torch.Rmd
index 505a4422f..e1b2edb55 100644
--- a/.tether/vignettes-src/parked/_custom_train_step_in_torch.Rmd
+++ b/.tether/vignettes-src/parked/_custom_train_step_in_torch.Rmd
@@ -2,7 +2,7 @@
 title: Customizing what happens in `fit()` with PyTorch
 author: '[fchollet](https://twitter.com/fchollet)'
 date-created: 2023/06/27
-last-modified: 2023/06/27
+last-modified: 2024/08/01
 description: Overriding the training step of the Model class with PyTorch.
 accelerator: GPU
 output: rmarkdown::html_vignette
@@ -390,7 +390,7 @@ class GAN(keras.Model):
 
     def train_step(self, real_images):
         device = "cuda" if torch.cuda.is_available() else "cpu"
-        if isinstance(real_images, tuple):
+        if isinstance(real_images, tuple) or isinstance(real_images, list):
             real_images = real_images[0]
         # Sample random points in the latent space
         batch_size = real_images.shape[0]
diff --git a/.tether/vignettes-src/parked/_writing_a_custom_training_loop_in_jax.Rmd b/.tether/vignettes-src/parked/_writing_a_custom_training_loop_in_jax.Rmd
index 48d789d6f..89ad7e599 100644
--- a/.tether/vignettes-src/parked/_writing_a_custom_training_loop_in_jax.Rmd
+++ b/.tether/vignettes-src/parked/_writing_a_custom_training_loop_in_jax.Rmd
@@ -153,7 +153,7 @@ def compute_loss_and_updates(trainable_variables, non_trainable_variables, x, y)
 ```
 
 Once you have such a function, you can get the gradient function by
-specifying `hax_aux` in `value_and_grad`: it tells JAX that the loss
+specifying `has_aux` in `value_and_grad`: it tells JAX that the loss
 computation function returns more outputs than just the loss. Note that the loss
 should always be the first output.
 
diff --git a/.tether/vignettes-src/writing_your_own_callbacks.Rmd b/.tether/vignettes-src/writing_your_own_callbacks.Rmd
index cee15b9c3..1803477d4 100644
--- a/.tether/vignettes-src/writing_your_own_callbacks.Rmd
+++ b/.tether/vignettes-src/writing_your_own_callbacks.Rmd
@@ -293,7 +293,7 @@ class EarlyStoppingAtMinLoss(keras.callbacks.Callback):
         # The epoch the training stops at.
         self.stopped_epoch = 0
         # Initialize the best as infinity.
-        self.best = np.Inf
+        self.best = np.inf
 
     def on_epoch_end(self, epoch, logs=None):
         current = logs.get("loss")
diff --git a/vignettes-src/distribution.Rmd b/vignettes-src/distribution.Rmd
index c4b3b343c..a70cca771 100644
--- a/vignettes-src/distribution.Rmd
+++ b/vignettes-src/distribution.Rmd
@@ -93,7 +93,7 @@ mesh <- keras$distribution$DeviceMesh(
 # "data" as columns, and it is a [4, 2] grid when it mapped to the physical
 # devices on the mesh.
 layout_2d <- keras$distribution$TensorLayout(
-  axes = c("model", "data"), 
+  axes = c("model", "data"),
   device_mesh = mesh
 )
 
@@ -131,8 +131,8 @@ data_parallel <- keras$distribution$DataParallel(devices = devices)
 
 # Or you can choose to create DataParallel with a 1D `DeviceMesh`.
 mesh_1d <- keras$distribution$DeviceMesh(
-  shape = shape(8), 
-  axis_names = list("data"), 
+  shape = shape(8),
+  axis_names = list("data"),
   devices = devices
 )
 data_parallel <- keras$distribution$DataParallel(device_mesh = mesh_1d)
@@ -213,8 +213,7 @@ layout_map["d1/bias"] <- tuple("model")
 layout_map["d2/output"] <- tuple("data", NULL)
 
 model_parallel <- keras$distribution$ModelParallel(
-  layout_map = layout_map, 
-  batch_dim_name = "data"
+  layout_map, batch_dim_name = "data"
 )
 
 keras$distribution$set_distribution(model_parallel)
diff --git a/vignettes-src/parked/_custom_train_step_in_torch.Rmd b/vignettes-src/parked/_custom_train_step_in_torch.Rmd
index f1d3106b3..078ae4d7e 100644
--- a/vignettes-src/parked/_custom_train_step_in_torch.Rmd
+++ b/vignettes-src/parked/_custom_train_step_in_torch.Rmd
@@ -2,7 +2,7 @@
 title: Customizing what happens in `fit()` with PyTorch
 author: '[fchollet](https://twitter.com/fchollet)'
 date-created: 2023/06/27
-last-modified: 2023/06/27
+last-modified: 2024/08/01
 description: Overriding the training step of the Model class with PyTorch.
 accelerator: GPU
 output: rmarkdown::html_vignette
@@ -390,7 +390,7 @@ class GAN(keras.Model):
 
     def train_step(self, real_images):
         device = "cuda" if torch.cuda.is_available() else "cpu"
-        if isinstance(real_images, tuple):
+        if isinstance(real_images, tuple) or isinstance(real_images, list):
             real_images = real_images[0]
         # Sample random points in the latent space
         batch_size = real_images.shape[0]
diff --git a/vignettes-src/parked/_writing_a_custom_training_loop_in_jax.Rmd b/vignettes-src/parked/_writing_a_custom_training_loop_in_jax.Rmd
index 95c7a916f..0b483d53d 100644
--- a/vignettes-src/parked/_writing_a_custom_training_loop_in_jax.Rmd
+++ b/vignettes-src/parked/_writing_a_custom_training_loop_in_jax.Rmd
@@ -153,7 +153,7 @@ def compute_loss_and_updates(trainable_variables, non_trainable_variables, x, y)
 ```
 
 Once you have such a function, you can get the gradient function by
-specifying `hax_aux` in `value_and_grad`: it tells JAX that the loss
+specifying `has_aux` in `value_and_grad`: it tells JAX that the loss
 computation function returns more outputs than just the loss. Note that the loss
 should always be the first output.