diff --git a/action_files/test_models/src/multivariate_models.py b/action_files/test_models/src/multivariate_models.py index 2b343c2aa..1b1d9593b 100644 --- a/action_files/test_models/src/multivariate_models.py +++ b/action_files/test_models/src/multivariate_models.py @@ -10,7 +10,7 @@ from neuralforecast.models.tsmixer import TSMixer from neuralforecast.models.tsmixerx import TSMixerx from neuralforecast.models.itransformer import iTransformer -from neuralforecast.models.stemgnn import StemGNN +# from neuralforecast.models.stemgnn import StemGNN from neuralforecast.models.mlpmultivariate import MLPMultivariate from neuralforecast.models.timemixer import TimeMixer @@ -30,7 +30,7 @@ def main(dataset: str = 'multivariate', group: str = 'ETTm2') -> None: TSMixer(h=horizon, n_series=7, input_size=2 * horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=500), TSMixerx(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=500), iTransformer(h=horizon, n_series=7, input_size=2 * horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=500), - StemGNN(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), dropout_rate=0.0, max_steps=1000, val_check_steps=500), + # StemGNN(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), dropout_rate=0.0, max_steps=1000, val_check_steps=500), MLPMultivariate(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), max_steps=1000, val_check_steps=500), TimeMixer(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=500) ] diff --git a/nbs/common.base_model.ipynb b/nbs/common.base_model.ipynb index 718034d66..b408f6d19 100644 --- a/nbs/common.base_model.ipynb +++ b/nbs/common.base_model.ipynb @@ -441,7 +441,7 @@ " if self.val_size == 0:\n", " return\n", " losses = torch.stack(self.validation_step_outputs)\n", - " avg_loss = losses.mean().item()\n", + " avg_loss = losses.mean().detach().item()\n", " self.log(\n", " \"ptl/val_loss\",\n", " avg_loss,\n", diff --git a/nbs/common.base_multivariate.ipynb b/nbs/common.base_multivariate.ipynb index 913c0fd23..81c933527 100644 --- a/nbs/common.base_multivariate.ipynb +++ b/nbs/common.base_multivariate.ipynb @@ -131,7 +131,7 @@ " self.h = h\n", " self.input_size = input_size\n", " self.n_series = n_series\n", - " self.padder = nn.ConstantPad1d(padding=(0, self.h), value=0)\n", + " self.padder = nn.ConstantPad1d(padding=(0, self.h), value=0.0)\n", "\n", " # Multivariate models do not support these loss functions yet.\n", " unsupported_losses = (\n", @@ -390,12 +390,12 @@ "\n", " self.log(\n", " 'train_loss',\n", - " loss.item(),\n", + " loss.detach().item(),\n", " batch_size=outsample_y.size(0),\n", " prog_bar=True,\n", " on_epoch=True,\n", " )\n", - " self.train_trajectories.append((self.global_step, loss.item()))\n", + " self.train_trajectories.append((self.global_step, loss.detach().item()))\n", " return loss\n", "\n", " def validation_step(self, batch, batch_idx):\n", @@ -440,7 +440,7 @@ "\n", " self.log(\n", " 'valid_loss',\n", - " valid_loss.item(),\n", + " valid_loss.detach().item(),\n", " batch_size=outsample_y.size(0),\n", " prog_bar=True,\n", " on_epoch=True,\n", diff --git a/nbs/common.base_recurrent.ipynb b/nbs/common.base_recurrent.ipynb index 694322891..0311141c6 100644 --- a/nbs/common.base_recurrent.ipynb +++ b/nbs/common.base_recurrent.ipynb @@ -137,7 +137,7 @@ " self.h = h\n", " self.input_size = input_size\n", " self.inference_input_size = inference_input_size\n", - " self.padder = nn.ConstantPad1d(padding=(0, self.h), value=0)\n", + " self.padder = nn.ConstantPad1d(padding=(0, self.h), value=0.0)\n", "\n", " unsupported_distributions = ['Bernoulli', 'ISQF']\n", " if isinstance(self.loss, losses.DistributionLoss) and\\\n", @@ -254,7 +254,7 @@ "\n", " # Test size covers all data, pad left one timestep with zeros\n", " if temporal.shape[-1] == self.test_size:\n", - " padder_left = nn.ConstantPad1d(padding=(1, 0), value=0)\n", + " padder_left = nn.ConstantPad1d(padding=(1, 0), value=0.0)\n", " temporal = padder_left(temporal)\n", "\n", " # Parse batch\n", @@ -365,12 +365,12 @@ "\n", " self.log(\n", " 'train_loss',\n", - " loss.item(),\n", + " loss.detach().item(),\n", " batch_size=outsample_y.size(0),\n", " prog_bar=True,\n", " on_epoch=True,\n", " )\n", - " self.train_trajectories.append((self.global_step, loss.item()))\n", + " self.train_trajectories.append((self.global_step, loss.detach().item()))\n", " return loss\n", "\n", " def validation_step(self, batch, batch_idx):\n", @@ -438,7 +438,7 @@ "\n", " self.log(\n", " 'valid_loss',\n", - " valid_loss.item(),\n", + " valid_loss.detach().item(),\n", " batch_size=outsample_y.size(0),\n", " prog_bar=True,\n", " on_epoch=True,\n", diff --git a/nbs/common.base_windows.ipynb b/nbs/common.base_windows.ipynb index 90635d391..e48ea4123 100644 --- a/nbs/common.base_windows.ipynb +++ b/nbs/common.base_windows.ipynb @@ -143,9 +143,9 @@ " self.windows_batch_size = windows_batch_size\n", " self.start_padding_enabled = start_padding_enabled\n", " if start_padding_enabled:\n", - " self.padder_train = nn.ConstantPad1d(padding=(self.input_size-1, self.h), value=0)\n", + " self.padder_train = nn.ConstantPad1d(padding=(self.input_size-1, self.h), value=0.0)\n", " else:\n", - " self.padder_train = nn.ConstantPad1d(padding=(0, self.h), value=0)\n", + " self.padder_train = nn.ConstantPad1d(padding=(0, self.h), value=0.0)\n", "\n", " # Batch sizes\n", " self.batch_size = batch_size\n", @@ -265,7 +265,7 @@ " if step == 'predict':\n", " initial_input = temporal.shape[-1] - self.test_size\n", " if initial_input <= self.input_size: # There is not enough data to predict first timestamp\n", - " padder_left = nn.ConstantPad1d(padding=(self.input_size-initial_input, 0), value=0)\n", + " padder_left = nn.ConstantPad1d(padding=(self.input_size-initial_input, 0), value=0.0)\n", " temporal = padder_left(temporal)\n", " predict_step_size = self.predict_step_size\n", " cutoff = - self.input_size - self.test_size\n", @@ -280,11 +280,11 @@ " temporal = batch['temporal'][:, :, cutoff:]\n", " if temporal.shape[-1] < window_size:\n", " initial_input = temporal.shape[-1] - self.val_size\n", - " padder_left = nn.ConstantPad1d(padding=(self.input_size-initial_input, 0), value=0)\n", + " padder_left = nn.ConstantPad1d(padding=(self.input_size-initial_input, 0), value=0.0)\n", " temporal = padder_left(temporal)\n", "\n", " if (step=='predict') and (self.test_size==0) and (len(self.futr_exog_list)==0):\n", - " padder_right = nn.ConstantPad1d(padding=(0, self.h), value=0)\n", + " padder_right = nn.ConstantPad1d(padding=(0, self.h), value=0.0)\n", " temporal = padder_right(temporal)\n", "\n", " windows = temporal.unfold(dimension=-1,\n", @@ -447,12 +447,12 @@ "\n", " self.log(\n", " 'train_loss',\n", - " loss.item(),\n", + " loss.detach().item(),\n", " batch_size=outsample_y.size(0),\n", " prog_bar=True,\n", " on_epoch=True,\n", " )\n", - " self.train_trajectories.append((self.global_step, loss.item()))\n", + " self.train_trajectories.append((self.global_step, loss.detach().item()))\n", " return loss\n", "\n", " def _compute_valid_loss(self, outsample_y, output, outsample_mask, temporal_cols, y_idx):\n", @@ -533,7 +533,7 @@ "\n", " self.log(\n", " 'valid_loss',\n", - " valid_loss.item(),\n", + " valid_loss.detach().item(),\n", " batch_size=batch_size,\n", " prog_bar=True,\n", " on_epoch=True,\n", diff --git a/nbs/losses.pytorch.ipynb b/nbs/losses.pytorch.ipynb index 45bb4a93d..efcff01a1 100644 --- a/nbs/losses.pytorch.ipynb +++ b/nbs/losses.pytorch.ipynb @@ -105,9 +105,7 @@ " Auxiliary funtion to handle divide by 0\n", " \"\"\"\n", " div = a / b\n", - " div[div != div] = 0.0\n", - " div[div == float('inf')] = 0.0\n", - " return div" + " return torch.nan_to_num(div, nan=0.0, posinf=0.0, neginf=0.0)" ] }, { @@ -1412,7 +1410,7 @@ " if (loc is not None) and (scale is not None):\n", " mean = (mean * scale) + loc\n", " tscale = (tscale + eps) * scale\n", - " df = 2.0 + F.softplus(df)\n", + " df = 3.0 + F.softplus(df)\n", " return (df, mean, tscale)\n", "\n", "def normal_domain_map(input: torch.Tensor):\n", diff --git a/neuralforecast/common/_base_model.py b/neuralforecast/common/_base_model.py index 9423dcdba..ab20ca193 100644 --- a/neuralforecast/common/_base_model.py +++ b/neuralforecast/common/_base_model.py @@ -421,7 +421,7 @@ def on_validation_epoch_end(self): if self.val_size == 0: return losses = torch.stack(self.validation_step_outputs) - avg_loss = losses.mean().item() + avg_loss = losses.mean().detach().item() self.log( "ptl/val_loss", avg_loss, diff --git a/neuralforecast/common/_base_multivariate.py b/neuralforecast/common/_base_multivariate.py index 460702bb6..8a31a2637 100644 --- a/neuralforecast/common/_base_multivariate.py +++ b/neuralforecast/common/_base_multivariate.py @@ -77,7 +77,7 @@ def __init__( self.h = h self.input_size = input_size self.n_series = n_series - self.padder = nn.ConstantPad1d(padding=(0, self.h), value=0) + self.padder = nn.ConstantPad1d(padding=(0, self.h), value=0.0) # Multivariate models do not support these loss functions yet. unsupported_losses = ( @@ -389,12 +389,12 @@ def training_step(self, batch, batch_idx): self.log( "train_loss", - loss.item(), + loss.detach().item(), batch_size=outsample_y.size(0), prog_bar=True, on_epoch=True, ) - self.train_trajectories.append((self.global_step, loss.item())) + self.train_trajectories.append((self.global_step, loss.detach().item())) return loss def validation_step(self, batch, batch_idx): @@ -456,7 +456,7 @@ def validation_step(self, batch, batch_idx): self.log( "valid_loss", - valid_loss.item(), + valid_loss.detach().item(), batch_size=outsample_y.size(0), prog_bar=True, on_epoch=True, diff --git a/neuralforecast/common/_base_recurrent.py b/neuralforecast/common/_base_recurrent.py index a427d15ae..3502c9379 100644 --- a/neuralforecast/common/_base_recurrent.py +++ b/neuralforecast/common/_base_recurrent.py @@ -77,7 +77,7 @@ def __init__( self.h = h self.input_size = input_size self.inference_input_size = inference_input_size - self.padder = nn.ConstantPad1d(padding=(0, self.h), value=0) + self.padder = nn.ConstantPad1d(padding=(0, self.h), value=0.0) unsupported_distributions = ["Bernoulli", "ISQF"] if ( @@ -210,7 +210,7 @@ def _create_windows(self, batch, step): # Test size covers all data, pad left one timestep with zeros if temporal.shape[-1] == self.test_size: - padder_left = nn.ConstantPad1d(padding=(1, 0), value=0) + padder_left = nn.ConstantPad1d(padding=(1, 0), value=0.0) temporal = padder_left(temporal) # Parse batch @@ -349,12 +349,12 @@ def training_step(self, batch, batch_idx): self.log( "train_loss", - loss.item(), + loss.detach().item(), batch_size=outsample_y.size(0), prog_bar=True, on_epoch=True, ) - self.train_trajectories.append((self.global_step, loss.item())) + self.train_trajectories.append((self.global_step, loss.detach().item())) return loss def validation_step(self, batch, batch_idx): @@ -447,7 +447,7 @@ def validation_step(self, batch, batch_idx): self.log( "valid_loss", - valid_loss.item(), + valid_loss.detach().item(), batch_size=outsample_y.size(0), prog_bar=True, on_epoch=True, diff --git a/neuralforecast/common/_base_windows.py b/neuralforecast/common/_base_windows.py index 416535c2e..cee5417ac 100644 --- a/neuralforecast/common/_base_windows.py +++ b/neuralforecast/common/_base_windows.py @@ -83,10 +83,10 @@ def __init__( self.start_padding_enabled = start_padding_enabled if start_padding_enabled: self.padder_train = nn.ConstantPad1d( - padding=(self.input_size - 1, self.h), value=0 + padding=(self.input_size - 1, self.h), value=0.0 ) else: - self.padder_train = nn.ConstantPad1d(padding=(0, self.h), value=0) + self.padder_train = nn.ConstantPad1d(padding=(0, self.h), value=0.0) # Batch sizes self.batch_size = batch_size @@ -216,7 +216,7 @@ def _create_windows(self, batch, step, w_idxs=None): initial_input <= self.input_size ): # There is not enough data to predict first timestamp padder_left = nn.ConstantPad1d( - padding=(self.input_size - initial_input, 0), value=0 + padding=(self.input_size - initial_input, 0), value=0.0 ) temporal = padder_left(temporal) predict_step_size = self.predict_step_size @@ -233,7 +233,7 @@ def _create_windows(self, batch, step, w_idxs=None): if temporal.shape[-1] < window_size: initial_input = temporal.shape[-1] - self.val_size padder_left = nn.ConstantPad1d( - padding=(self.input_size - initial_input, 0), value=0 + padding=(self.input_size - initial_input, 0), value=0.0 ) temporal = padder_left(temporal) @@ -242,7 +242,7 @@ def _create_windows(self, batch, step, w_idxs=None): and (self.test_size == 0) and (len(self.futr_exog_list) == 0) ): - padder_right = nn.ConstantPad1d(padding=(0, self.h), value=0) + padder_right = nn.ConstantPad1d(padding=(0, self.h), value=0.0) temporal = padder_right(temporal) windows = temporal.unfold( @@ -440,12 +440,12 @@ def training_step(self, batch, batch_idx): self.log( "train_loss", - loss.item(), + loss.detach().item(), batch_size=outsample_y.size(0), prog_bar=True, on_epoch=True, ) - self.train_trajectories.append((self.global_step, loss.item())) + self.train_trajectories.append((self.global_step, loss.detach().item())) return loss def _compute_valid_loss( @@ -551,7 +551,7 @@ def validation_step(self, batch, batch_idx): self.log( "valid_loss", - valid_loss.item(), + valid_loss.detach().item(), batch_size=batch_size, prog_bar=True, on_epoch=True, diff --git a/neuralforecast/losses/pytorch.py b/neuralforecast/losses/pytorch.py index 98184c055..a65b1c532 100644 --- a/neuralforecast/losses/pytorch.py +++ b/neuralforecast/losses/pytorch.py @@ -35,9 +35,7 @@ def _divide_no_nan(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: Auxiliary funtion to handle divide by 0 """ div = a / b - div[div != div] = 0.0 - div[div == float("inf")] = 0.0 - return div + return torch.nan_to_num(div, nan=0.0, posinf=0.0, neginf=0.0) # %% ../../nbs/losses.pytorch.ipynb 7 def _weighted_mean(losses, weights): @@ -825,7 +823,7 @@ def student_scale_decouple(output, loc=None, scale=None, eps: float = 0.1): if (loc is not None) and (scale is not None): mean = (mean * scale) + loc tscale = (tscale + eps) * scale - df = 2.0 + F.softplus(df) + df = 3.0 + F.softplus(df) return (df, mean, tscale)