diff --git a/nbs/core.ipynb b/nbs/core.ipynb
index 737a12420..a4f164ed5 100644
--- a/nbs/core.ipynb
+++ b/nbs/core.ipynb
@@ -87,7 +87,7 @@
" TFT, VanillaTransformer,\n",
" Informer, Autoformer, FEDformer,\n",
" StemGNN, PatchTST, TimesNet, TimeLLM, TSMixer, TSMixerx,\n",
- " MLPMultivariate\n",
+ " MLPMultivariate, iTransformer\n",
")"
]
},
@@ -228,6 +228,7 @@
" 'tsmixer': TSMixer, 'autotsmixer': TSMixer,\n",
" 'tsmixerx': TSMixerx, 'autotsmixerx': TSMixerx,\n",
" 'mlpmultivariate': MLPMultivariate, 'automlpmultivariate': MLPMultivariate,\n",
+ " 'itransformer': iTransformer, 'autoitransformer': iTransformer\n",
"}"
]
},
diff --git a/nbs/imgs_models/iTransformer.png b/nbs/imgs_models/iTransformer.png
new file mode 100644
index 000000000..01605df9a
Binary files /dev/null and b/nbs/imgs_models/iTransformer.png differ
diff --git a/nbs/models.ipynb b/nbs/models.ipynb
index 0dafa2598..82331c3b2 100644
--- a/nbs/models.ipynb
+++ b/nbs/models.ipynb
@@ -60,6 +60,7 @@
"from neuralforecast.models.fedformer import FEDformer\n",
"from neuralforecast.models.patchtst import PatchTST\n",
"from neuralforecast.models.timesnet import TimesNet\n",
+ "from neuralforecast.models.itransformer import iTransformer\n",
"\n",
"from neuralforecast.models.stemgnn import StemGNN\n",
"from neuralforecast.models.hint import HINT\n",
@@ -2706,6 +2707,157 @@
"model.fit(dataset=dataset)"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a61c3be9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "class AutoiTransformer(BaseAuto):\n",
+ "\n",
+ " default_config = {\n",
+ " \"input_size_multiplier\": [1, 2, 3, 4, 5],\n",
+ " \"h\": None,\n",
+ " \"n_series\": None,\n",
+ " \"hidden_size\": tune.choice([64, 128, 256]),\n",
+ " \"n_heads\": tune.choice([4, 8]),\n",
+ " \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
+ " \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n",
+ " \"max_steps\": tune.choice([500, 1000, 2000]),\n",
+ " \"batch_size\": tune.choice([32, 64, 128, 256]),\n",
+ " \"loss\": None,\n",
+ " \"random_seed\": tune.randint(1, 20),\n",
+ " }\n",
+ "\n",
+ " def __init__(self,\n",
+ " h,\n",
+ " n_series,\n",
+ " loss=MAE(),\n",
+ " valid_loss=None,\n",
+ " config=None, \n",
+ " search_alg=BasicVariantGenerator(random_state=1),\n",
+ " num_samples=10,\n",
+ " refit_with_val=False,\n",
+ " cpus=cpu_count(),\n",
+ " gpus=torch.cuda.device_count(),\n",
+ " verbose=False,\n",
+ " alias=None,\n",
+ " backend='ray',\n",
+ " callbacks=None):\n",
+ " \n",
+ " # Define search space, input/output sizes\n",
+ " if config is None:\n",
+ " config = self.get_default_config(h=h, backend=backend, n_series=n_series) \n",
+ "\n",
+ " # Always use n_series from parameters, raise exception with Optuna because we can't enforce it\n",
+ " if backend == 'ray':\n",
+ " config['n_series'] = n_series\n",
+ " elif backend == 'optuna':\n",
+ " mock_trial = MockTrial()\n",
+ " if ('n_series' in config(mock_trial) and config(mock_trial)['n_series'] != n_series) or ('n_series' not in config(mock_trial)):\n",
+ " raise Exception(f\"config needs 'n_series': {n_series}\") \n",
+ "\n",
+ " super(AutoiTransformer, self).__init__(\n",
+ " cls_model=iTransformer, \n",
+ " h=h,\n",
+ " loss=loss,\n",
+ " valid_loss=valid_loss,\n",
+ " config=config,\n",
+ " search_alg=search_alg,\n",
+ " num_samples=num_samples, \n",
+ " refit_with_val=refit_with_val,\n",
+ " cpus=cpus,\n",
+ " gpus=gpus,\n",
+ " verbose=verbose,\n",
+ " alias=alias,\n",
+ " backend=backend,\n",
+ " callbacks=callbacks, \n",
+ " )\n",
+ "\n",
+ " @classmethod\n",
+ " def get_default_config(cls, h, backend, n_series):\n",
+ " config = cls.default_config.copy() \n",
+ " config['input_size'] = tune.choice([h * x \\\n",
+ " for x in config[\"input_size_multiplier\"]])\n",
+ "\n",
+ " # Rolling windows with step_size=1 or step_size=h\n",
+ " # See `BaseWindows` and `BaseRNN`'s create_windows\n",
+ " config['step_size'] = tune.choice([1, h])\n",
+ " del config[\"input_size_multiplier\"]\n",
+ " if backend == 'optuna':\n",
+ " # Always use n_series from parameters\n",
+ " config['n_series'] = n_series\n",
+ " config = cls._ray_config_to_optuna(config) \n",
+ "\n",
+ " return config "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8f416fa0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "show_doc(AutoiTransformer, title_level=3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7ffd40db",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%capture\n",
+ "# Use your own config or AutoiTransformer.default_config\n",
+ "config = dict(max_steps=1, val_check_steps=1, input_size=12, hidden_size=16)\n",
+ "model = AutoiTransformer(h=12, n_series=1, config=config, num_samples=1, cpus=1)\n",
+ "\n",
+ "# Fit and predict\n",
+ "model.fit(dataset=dataset)\n",
+ "y_hat = model.predict(dataset=dataset)\n",
+ "\n",
+ "# Optuna\n",
+ "model = AutoiTransformer(h=12, n_series=1, config=None, backend='optuna')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7a2052de",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Check Optuna\n",
+ "assert model.config(MockTrial())['h'] == 12\n",
+ "\n",
+ "# Unit test to test that Auto* model contains all required arguments from BaseAuto\n",
+ "test_args(AutoiTransformer, exclude_args=['cls_model']) \n",
+ "\n",
+ "# Unit test for situation: Optuna with updated default config\n",
+ "my_config = AutoiTransformer.get_default_config(h=12, n_series=1, backend='optuna')\n",
+ "def my_config_new(trial):\n",
+ " config = {**my_config(trial)}\n",
+ " config.update({'max_steps': 1, 'val_check_steps': 1, 'input_size': 12, 'hidden_size': 16})\n",
+ " return config\n",
+ "\n",
+ "model = AutoiTransformer(h=12, n_series=1, config=my_config_new, backend='optuna', num_samples=1, cpus=1)\n",
+ "model.fit(dataset=dataset)\n",
+ "\n",
+ "# Unit test for situation: Ray with updated default config\n",
+ "my_config = AutoiTransformer.get_default_config(h=12, n_series=1, backend='ray')\n",
+ "my_config['max_steps'] = 1\n",
+ "my_config['val_check_steps'] = 1\n",
+ "my_config['input_size'] = 12\n",
+ "my_config['hidden_size'] = 16\n",
+ "model = AutoiTransformer(h=12, n_series=1, config=my_config, backend='ray', num_samples=1, cpus=1)\n",
+ "model.fit(dataset=dataset)"
+ ]
+ },
{
"attachments": {},
"cell_type": "markdown",
diff --git a/nbs/models.itransformer.ipynb b/nbs/models.itransformer.ipynb
new file mode 100644
index 000000000..16f223d3f
--- /dev/null
+++ b/nbs/models.itransformer.ipynb
@@ -0,0 +1,1170 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| default_exp models.itransformer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "from fastcore.test import test_eq\n",
+ "from nbdev.showdoc import show_doc"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# iTransformer"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The iTransformer model simply takes the Transformer architecture but it applies the attention and feed-forward network on the inverted dimensions. This means that time points of each individual series are embedded into tokens. That way, the attention mechanisms learn multivariate correlation and the feed-forward network learns non-linear relationships.\n",
+ "\n",
+ "**References**\n",
+ "- [Yong Liu, Tengge Hu, Haoran Zhang, Haixu Wu, Shiyu Wang, Lintao Ma, Mingsheng Long. \"iTransformer: Inverted Transformers Are Effective for Time Series Forecasting\"](https://arxiv.org/abs/2310.06625)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "![Figure 1. Architecture of iTransformer.](imgs_models/itransformer.png)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "import torch\n",
+ "import torch.nn as nn\n",
+ "import torch.nn.functional as F\n",
+ "\n",
+ "import numpy as np\n",
+ "\n",
+ "from typing import Optional\n",
+ "from math import sqrt\n",
+ "\n",
+ "from neuralforecast.losses.pytorch import MAE\n",
+ "from neuralforecast.common._base_multivariate import BaseMultivariate\n",
+ "\n",
+ "from neuralforecast.common._modules import TransEncoder, TransEncoderLayer, AttentionLayer"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 1. Auxiliary functions"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 1.1 Attention"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exporti\n",
+ "\n",
+ "class TriangularCausalMask():\n",
+ " def __init__(self, B, L, device=\"cpu\"):\n",
+ " mask_shape = [B, 1, L, L]\n",
+ " with torch.no_grad():\n",
+ " self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)\n",
+ "\n",
+ " @property\n",
+ " def mask(self):\n",
+ " return self._mask\n",
+ "\n",
+ "class FullAttention(nn.Module):\n",
+ " def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):\n",
+ " super(FullAttention, self).__init__()\n",
+ " self.scale = scale\n",
+ " self.mask_flag = mask_flag\n",
+ " self.output_attention = output_attention\n",
+ " self.dropout = nn.Dropout(attention_dropout)\n",
+ "\n",
+ " def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):\n",
+ " B, L, H, E = queries.shape\n",
+ " _, S, _, D = values.shape\n",
+ " scale = self.scale or 1. / sqrt(E)\n",
+ "\n",
+ " scores = torch.einsum(\"blhe,bshe->bhls\", queries, keys)\n",
+ "\n",
+ " if self.mask_flag:\n",
+ " if attn_mask is None:\n",
+ " attn_mask = TriangularCausalMask(B, L, device=queries.device)\n",
+ "\n",
+ " scores.masked_fill_(attn_mask.mask, -np.inf)\n",
+ "\n",
+ " A = self.dropout(torch.softmax(scale * scores, dim=-1))\n",
+ " V = torch.einsum(\"bhls,bshd->blhd\", A, values)\n",
+ "\n",
+ " if self.output_attention:\n",
+ " return (V.contiguous(), A)\n",
+ " else:\n",
+ " return (V.contiguous(), None) "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 1.2 Inverted embedding"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| exporti\n",
+ "\n",
+ "class DataEmbedding_inverted(nn.Module):\n",
+ " def __init__(self, c_in, hidden_size, dropout=0.1):\n",
+ " super(DataEmbedding_inverted, self).__init__()\n",
+ " self.value_embedding = nn.Linear(c_in, hidden_size)\n",
+ " self.dropout = nn.Dropout(p=dropout)\n",
+ "\n",
+ " def forward(self, x, x_mark):\n",
+ " x = x.permute(0, 2, 1)\n",
+ " # x: [Batch Variate Time]\n",
+ " if x_mark is None:\n",
+ " x = self.value_embedding(x)\n",
+ " else:\n",
+ " # the potential to take covariates (e.g. timestamps) as tokens\n",
+ " x = self.value_embedding(torch.cat([x, x_mark.permute(0, 2, 1)], 1)) \n",
+ " # x: [Batch Variate hidden_size]\n",
+ " return self.dropout(x)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 2. Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "\n",
+ "class iTransformer(BaseMultivariate):\n",
+ "\n",
+ " \"\"\" iTransformer\n",
+ "\n",
+ " **Parameters:**
\n",
+ " `h`: int, Forecast horizon.
\n",
+ " `input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n",
+ " `n_series`: int, number of time-series.
\n",
+ " `futr_exog_list`: str list, future exogenous columns.
\n",
+ " `hist_exog_list`: str list, historic exogenous columns.
\n",
+ " `stat_exog_list`: str list, static exogenous columns.
\n",
+ " `hidden_size`: int, dimension of the model.
\n",
+ " `n_heads`: int, number of heads.
\n",
+ " `e_layers`: int, number of encoder layers.
\n",
+ " `d_layers`: int, number of decoder layers.
\n",
+ " `d_ff`: int, dimension of fully-connected layer.
\n",
+ " `factor`: int, attention factor.
\n",
+ " `dropout`: float, dropout rate.
\n",
+ " `use_norm`: bool, whether to normalize or not.
\n",
+ " `loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n",
+ " `valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n",
+ " `max_steps`: int=1000, maximum number of training steps.
\n",
+ " `learning_rate`: float=1e-3, Learning rate between (0, 1).
\n",
+ " `num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n",
+ " `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n",
+ " `val_check_steps`: int=100, Number of training steps between every validation loss check.
\n",
+ " `batch_size`: int=32, number of different series in each batch.
\n",
+ " `step_size`: int=1, step size between each window of temporal data.
\n",
+ " `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
+ " `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
+ " `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n",
+ " `drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n",
+ " `alias`: str, optional, Custom name of the model.
\n",
+ " `optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n",
+ " `optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n",
+ " `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n",
+ " \n",
+ " **References**
\n",
+ " - [Yong Liu, Tengge Hu, Haoran Zhang, Haixu Wu, Shiyu Wang, Lintao Ma, Mingsheng Long. \"iTransformer: Inverted Transformers Are Effective for Time Series Forecasting\"](https://arxiv.org/abs/2310.06625)\n",
+ " \"\"\"\n",
+ "\n",
+ " # Class attributes\n",
+ " SAMPLING_TYPE = 'multivariate'\n",
+ "\n",
+ " def __init__(self,\n",
+ " h,\n",
+ " input_size,\n",
+ " n_series,\n",
+ " futr_exog_list = None,\n",
+ " hist_exog_list = None,\n",
+ " stat_exog_list = None,\n",
+ " hidden_size: int = 512,\n",
+ " n_heads: int = 8,\n",
+ " e_layers: int = 2,\n",
+ " d_layers: int = 1,\n",
+ " d_ff: int = 2048,\n",
+ " factor: int = 1,\n",
+ " dropout: float = 0.1,\n",
+ " use_norm: bool = True,\n",
+ " loss = MAE(),\n",
+ " valid_loss = None,\n",
+ " max_steps: int = 1000,\n",
+ " learning_rate: float = 1e-3,\n",
+ " num_lr_decays: int = -1,\n",
+ " early_stop_patience_steps: int =-1,\n",
+ " val_check_steps: int = 100,\n",
+ " batch_size: int = 32,\n",
+ " step_size: int = 1,\n",
+ " scaler_type: str = 'identity',\n",
+ " random_seed: int = 1,\n",
+ " num_workers_loader: int = 0,\n",
+ " drop_last_loader: bool = False,\n",
+ " optimizer = None,\n",
+ " optimizer_kwargs = None,\n",
+ " **trainer_kwargs):\n",
+ " \n",
+ " super(iTransformer, self).__init__(h=h,\n",
+ " input_size=input_size,\n",
+ " n_series=n_series,\n",
+ " stat_exog_list = None,\n",
+ " futr_exog_list = None,\n",
+ " hist_exog_list = None,\n",
+ " loss=loss,\n",
+ " valid_loss=valid_loss,\n",
+ " max_steps=max_steps,\n",
+ " learning_rate=learning_rate,\n",
+ " num_lr_decays=num_lr_decays,\n",
+ " early_stop_patience_steps=early_stop_patience_steps,\n",
+ " val_check_steps=val_check_steps,\n",
+ " batch_size=batch_size,\n",
+ " step_size=step_size,\n",
+ " scaler_type=scaler_type,\n",
+ " random_seed=random_seed,\n",
+ " num_workers_loader=num_workers_loader,\n",
+ " drop_last_loader=drop_last_loader,\n",
+ " optimizer=optimizer,\n",
+ " optimizer_kwargs=optimizer_kwargs,\n",
+ " **trainer_kwargs)\n",
+ " \n",
+ " # Asserts\n",
+ " if stat_exog_list is not None:\n",
+ " raise Exception(\"iTransformer does not support static exogenous variables\")\n",
+ " if futr_exog_list is not None:\n",
+ " raise Exception(\"iTransformer does not support future exogenous variables\")\n",
+ " if hist_exog_list is not None:\n",
+ " raise Exception(\"iTransformer does not support historical exogenous variables\")\n",
+ " \n",
+ " self.enc_in = n_series\n",
+ " self.dec_in = n_series\n",
+ " self.c_out = n_series\n",
+ " self.hidden_size = hidden_size\n",
+ " self.n_heads = n_heads\n",
+ " self.e_layers = e_layers\n",
+ " self.d_layers = d_layers\n",
+ " self.d_ff = d_ff\n",
+ " self.factor = factor\n",
+ " self.dropout = dropout\n",
+ " self.use_norm = use_norm\n",
+ "\n",
+ " # Architecture\n",
+ " self.enc_embedding = DataEmbedding_inverted(input_size, self.hidden_size, self.dropout)\n",
+ "\n",
+ " self.encoder = TransEncoder(\n",
+ " [\n",
+ " TransEncoderLayer(\n",
+ " AttentionLayer(\n",
+ " FullAttention(False, self.factor, attention_dropout=self.dropout), self.hidden_size, self.n_heads),\n",
+ " self.hidden_size,\n",
+ " self.d_ff,\n",
+ " dropout=self.dropout,\n",
+ " activation=F.gelu\n",
+ " ) for l in range(self.e_layers)\n",
+ " ],\n",
+ " norm_layer=torch.nn.LayerNorm(self.hidden_size)\n",
+ " )\n",
+ "\n",
+ " self.projector = nn.Linear(self.hidden_size, h, bias=True)\n",
+ " \n",
+ " def forecast(self, x_enc):\n",
+ " if self.use_norm:\n",
+ " # Normalization from Non-stationary Transformer\n",
+ " means = x_enc.mean(1, keepdim=True).detach()\n",
+ " x_enc = x_enc - means\n",
+ " stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)\n",
+ " x_enc /= stdev\n",
+ "\n",
+ " _, _, N = x_enc.shape # B L N\n",
+ " # B: batch_size; E: hidden_size; \n",
+ " # L: input_size; S: horizon(h);\n",
+ " # N: number of variate (tokens), can also includes covariates\n",
+ "\n",
+ " # Embedding\n",
+ " # B L N -> B N E (B L N -> B L E in the vanilla Transformer)\n",
+ " enc_out = self.enc_embedding(x_enc, None) # covariates (e.g timestamp) can be also embedded as tokens\n",
+ " \n",
+ " # B N E -> B N E (B L E -> B L E in the vanilla Transformer)\n",
+ " # the dimensions of embedded time series has been inverted, and then processed by native attn, layernorm and ffn modules\n",
+ " enc_out, attns = self.encoder(enc_out, attn_mask=None)\n",
+ "\n",
+ " # B N E -> B N S -> B S N \n",
+ " dec_out = self.projector(enc_out).permute(0, 2, 1)[:, :, :N] # filter the covariates\n",
+ "\n",
+ " if self.use_norm:\n",
+ " # De-Normalization from Non-stationary Transformer\n",
+ " dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.h, 1))\n",
+ " dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.h, 1))\n",
+ "\n",
+ " return dec_out\n",
+ " \n",
+ " def forward(self, windows_batch):\n",
+ " insample_y = windows_batch['insample_y']\n",
+ "\n",
+ " y_pred = self.forecast(insample_y)\n",
+ " y_pred = y_pred[:, -self.h:, :]\n",
+ " y_pred = self.loss.domain_map(y_pred)\n",
+ "\n",
+ " # domain_map might have squeezed the last dimension in case n_series == 1\n",
+ " if y_pred.ndim == 2:\n",
+ " return y_pred.unsqueeze(-1)\n",
+ " else:\n",
+ " return y_pred\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "---\n",
+ "\n",
+ "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/itransformer.py#L94){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+ "\n",
+ "### iTransformer\n",
+ "\n",
+ "> iTransformer (h, input_size, n_series, futr_exog_list=None,\n",
+ "> hist_exog_list=None, stat_exog_list=None,\n",
+ "> hidden_size:int=512, n_heads:int=8, e_layers:int=2,\n",
+ "> d_layers:int=1, d_ff:int=2048, factor:int=1,\n",
+ "> dropout:float=0.1, use_norm:bool=True, loss=MAE(),\n",
+ "> valid_loss=None, max_steps:int=1000,\n",
+ "> learning_rate:float=0.001, num_lr_decays:int=-1,\n",
+ "> early_stop_patience_steps:int=-1, val_check_steps:int=100,\n",
+ "> batch_size:int=32, step_size:int=1,\n",
+ "> scaler_type:str='identity', random_seed:int=1,\n",
+ "> num_workers_loader:int=0, drop_last_loader:bool=False,\n",
+ "> optimizer=None, optimizer_kwargs=None, **trainer_kwargs)\n",
+ "\n",
+ "iTransformer\n",
+ "\n",
+ "**Parameters:**
\n",
+ "`h`: int, Forecast horizon.
\n",
+ "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n",
+ "`n_series`: int, number of time-series.
\n",
+ "`futr_exog_list`: str list, future exogenous columns.
\n",
+ "`hist_exog_list`: str list, historic exogenous columns.
\n",
+ "`stat_exog_list`: str list, static exogenous columns.
\n",
+ "`hidden_size`: int, dimension of the model.
\n",
+ "`n_heads`: int, number of heads.
\n",
+ "`e_layers`: int, number of encoder layers.
\n",
+ "`d_layers`: int, number of decoder layers.
\n",
+ "`d_ff`: int, dimension of fully-connected layer.
\n",
+ "`factor`: int, attention factor.
\n",
+ "`dropout`: float, dropout rate.
\n",
+ "`use_norm`: bool, whether to normalize or not.
\n",
+ "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n",
+ "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n",
+ "`max_steps`: int=1000, maximum number of training steps.
\n",
+ "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n",
+ "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n",
+ "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n",
+ "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n",
+ "`batch_size`: int=32, number of different series in each batch.
\n",
+ "`step_size`: int=1, step size between each window of temporal data.
\n",
+ "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
+ "`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
+ "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n",
+ "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n",
+ "`alias`: str, optional, Custom name of the model.
\n",
+ "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n",
+ "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n",
+ "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n",
+ "\n",
+ "**References**
\n",
+ "- [Yong Liu, Tengge Hu, Haoran Zhang, Haixu Wu, Shiyu Wang, Lintao Ma, Mingsheng Long. \"iTransformer: Inverted Transformers Are Effective for Time Series Forecasting\"](https://arxiv.org/abs/2310.06625)"
+ ],
+ "text/plain": [
+ "---\n",
+ "\n",
+ "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/itransformer.py#L94){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+ "\n",
+ "### iTransformer\n",
+ "\n",
+ "> iTransformer (h, input_size, n_series, futr_exog_list=None,\n",
+ "> hist_exog_list=None, stat_exog_list=None,\n",
+ "> hidden_size:int=512, n_heads:int=8, e_layers:int=2,\n",
+ "> d_layers:int=1, d_ff:int=2048, factor:int=1,\n",
+ "> dropout:float=0.1, use_norm:bool=True, loss=MAE(),\n",
+ "> valid_loss=None, max_steps:int=1000,\n",
+ "> learning_rate:float=0.001, num_lr_decays:int=-1,\n",
+ "> early_stop_patience_steps:int=-1, val_check_steps:int=100,\n",
+ "> batch_size:int=32, step_size:int=1,\n",
+ "> scaler_type:str='identity', random_seed:int=1,\n",
+ "> num_workers_loader:int=0, drop_last_loader:bool=False,\n",
+ "> optimizer=None, optimizer_kwargs=None, **trainer_kwargs)\n",
+ "\n",
+ "iTransformer\n",
+ "\n",
+ "**Parameters:**
\n",
+ "`h`: int, Forecast horizon.
\n",
+ "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n",
+ "`n_series`: int, number of time-series.
\n",
+ "`futr_exog_list`: str list, future exogenous columns.
\n",
+ "`hist_exog_list`: str list, historic exogenous columns.
\n",
+ "`stat_exog_list`: str list, static exogenous columns.
\n",
+ "`hidden_size`: int, dimension of the model.
\n",
+ "`n_heads`: int, number of heads.
\n",
+ "`e_layers`: int, number of encoder layers.
\n",
+ "`d_layers`: int, number of decoder layers.
\n",
+ "`d_ff`: int, dimension of fully-connected layer.
\n",
+ "`factor`: int, attention factor.
\n",
+ "`dropout`: float, dropout rate.
\n",
+ "`use_norm`: bool, whether to normalize or not.
\n",
+ "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n",
+ "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n",
+ "`max_steps`: int=1000, maximum number of training steps.
\n",
+ "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n",
+ "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n",
+ "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n",
+ "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n",
+ "`batch_size`: int=32, number of different series in each batch.
\n",
+ "`step_size`: int=1, step size between each window of temporal data.
\n",
+ "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
+ "`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
+ "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n",
+ "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n",
+ "`alias`: str, optional, Custom name of the model.
\n",
+ "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n",
+ "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n",
+ "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n",
+ "\n",
+ "**References**
\n",
+ "- [Yong Liu, Tengge Hu, Haoran Zhang, Haixu Wu, Shiyu Wang, Lintao Ma, Mingsheng Long. \"iTransformer: Inverted Transformers Are Effective for Time Series Forecasting\"](https://arxiv.org/abs/2310.06625)"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "show_doc(iTransformer)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "---\n",
+ "\n",
+ "### iTransformer.fit\n",
+ "\n",
+ "> iTransformer.fit (dataset, val_size=0, test_size=0, random_seed=None)\n",
+ "\n",
+ "Fit.\n",
+ "\n",
+ "The `fit` method, optimizes the neural network's weights using the\n",
+ "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n",
+ "and the `loss` function as defined during the initialization.\n",
+ "Within `fit` we use a PyTorch Lightning `Trainer` that\n",
+ "inherits the initialization's `self.trainer_kwargs`, to customize\n",
+ "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n",
+ "\n",
+ "The method is designed to be compatible with SKLearn-like classes\n",
+ "and in particular to be compatible with the StatsForecast library.\n",
+ "\n",
+ "By default the `model` is not saving training checkpoints to protect\n",
+ "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n",
+ "\n",
+ "**Parameters:**
\n",
+ "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
+ "`val_size`: int, validation size for temporal cross-validation.
\n",
+ "`test_size`: int, test size for temporal cross-validation.
"
+ ],
+ "text/plain": [
+ "---\n",
+ "\n",
+ "### iTransformer.fit\n",
+ "\n",
+ "> iTransformer.fit (dataset, val_size=0, test_size=0, random_seed=None)\n",
+ "\n",
+ "Fit.\n",
+ "\n",
+ "The `fit` method, optimizes the neural network's weights using the\n",
+ "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n",
+ "and the `loss` function as defined during the initialization.\n",
+ "Within `fit` we use a PyTorch Lightning `Trainer` that\n",
+ "inherits the initialization's `self.trainer_kwargs`, to customize\n",
+ "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n",
+ "\n",
+ "The method is designed to be compatible with SKLearn-like classes\n",
+ "and in particular to be compatible with the StatsForecast library.\n",
+ "\n",
+ "By default the `model` is not saving training checkpoints to protect\n",
+ "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n",
+ "\n",
+ "**Parameters:**
\n",
+ "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
+ "`val_size`: int, validation size for temporal cross-validation.
\n",
+ "`test_size`: int, test size for temporal cross-validation.
"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "show_doc(iTransformer.fit, name='iTransformer.fit')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "---\n",
+ "\n",
+ "### iTransformer.predict\n",
+ "\n",
+ "> iTransformer.predict (dataset, test_size=None, step_size=1,\n",
+ "> random_seed=None, **data_module_kwargs)\n",
+ "\n",
+ "Predict.\n",
+ "\n",
+ "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n",
+ "\n",
+ "**Parameters:**
\n",
+ "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
+ "`test_size`: int=None, test size for temporal cross-validation.
\n",
+ "`step_size`: int=1, Step size between each window.
\n",
+ "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)."
+ ],
+ "text/plain": [
+ "---\n",
+ "\n",
+ "### iTransformer.predict\n",
+ "\n",
+ "> iTransformer.predict (dataset, test_size=None, step_size=1,\n",
+ "> random_seed=None, **data_module_kwargs)\n",
+ "\n",
+ "Predict.\n",
+ "\n",
+ "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n",
+ "\n",
+ "**Parameters:**
\n",
+ "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
+ "`test_size`: int=None, test size for temporal cross-validation.
\n",
+ "`step_size`: int=1, Step size between each window.
\n",
+ "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)."
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "show_doc(iTransformer.predict, name='iTransformer.predict')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 3. Usage example"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import pytorch_lightning as pl\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "from neuralforecast import NeuralForecast\n",
+ "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
+ "from neuralforecast.losses.pytorch import MSE"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/marcopeix/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/utilities/parsing.py:198: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.\n",
+ "/Users/marcopeix/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/utilities/parsing.py:198: Attribute 'valid_loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['valid_loss'])`.\n",
+ "Seed set to 1\n",
+ "GPU available: True (mps), used: True\n",
+ "TPU available: False, using: 0 TPU cores\n",
+ "IPU available: False, using: 0 IPUs\n",
+ "HPU available: False, using: 0 HPUs\n",
+ "\n",
+ " | Name | Type | Params\n",
+ "---------------------------------------------------------\n",
+ "0 | padder | ConstantPad1d | 0 \n",
+ "1 | loss | MSE | 0 \n",
+ "2 | valid_loss | MAE | 0 \n",
+ "3 | scaler | TemporalNorm | 0 \n",
+ "4 | enc_embedding | DataEmbedding_inverted | 3.2 K \n",
+ "5 | encoder | TransEncoder | 135 K \n",
+ "6 | projector | Linear | 1.5 K \n",
+ "---------------------------------------------------------\n",
+ "140 K Trainable params\n",
+ "0 Non-trainable params\n",
+ "140 K Total params\n",
+ "0.562 Total estimated model params size (MB)\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "db2340a0a0ea4ab79a8f3c3fbc5e8962",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Sanity Checking: | | 0/? [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/marcopeix/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=10` in the `DataLoader` to improve performance.\n",
+ "/Users/marcopeix/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 2. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.\n",
+ "/Users/marcopeix/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=10` in the `DataLoader` to improve performance.\n",
+ "/Users/marcopeix/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "499599d15ddc4ef69a653340540ac9c8",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Training: | | 0/? [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "0a234ef80f03437a920e86dcf52dbb60",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Validation: | | 0/? [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "dfc4495951c5426684adc42701431ccf",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Validation: | | 0/? [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "0c2adbd4d1f44c469fd5bd51e395a11e",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Validation: | | 0/? [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a2e0cad621744cdeba37bc9896ec3b1a",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Validation: | | 0/? [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "ad6c156845724c5fa62e40857bdce80e",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Validation: | | 0/? [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9e0c8c67be5b4e63ac1937a643b644b4",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Validation: | | 0/? [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "ea1d7416742f487aa2e1672f2dc957ec",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Validation: | | 0/? [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/marcopeix/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/utilsforecast/processing.py:352: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.\n",
+ " freq = pd.tseries.frequencies.to_offset(freq)\n",
+ "/Users/marcopeix/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/utilsforecast/processing.py:404: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.\n",
+ " freq = pd.tseries.frequencies.to_offset(freq)\n",
+ "/Users/marcopeix/dev/neuralforecast/neuralforecast/tsdataset.py:91: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+ " self.temporal = torch.tensor(temporal, dtype=torch.float)\n",
+ "/Users/marcopeix/dev/neuralforecast/neuralforecast/tsdataset.py:95: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
+ " self.static = torch.tensor(static, dtype=torch.float)\n",
+ "Trainer already configured with model summary callbacks: []. Skipping setting a default `ModelSummary` callback.\n",
+ "GPU available: True (mps), used: True\n",
+ "TPU available: False, using: 0 TPU cores\n",
+ "IPU available: False, using: 0 IPUs\n",
+ "HPU available: False, using: 0 HPUs\n",
+ "/Users/marcopeix/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=10` in the `DataLoader` to improve performance.\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b61339b3642d44bfb953a7b2becf4cc4",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Predicting: | | 0/? [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/marcopeix/dev/neuralforecast/neuralforecast/core.py:179: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.\n",
+ " warnings.warn(\n"
+ ]
+ }
+ ],
+ "source": [
+ "Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
+ "\n",
+ "model = iTransformer(h=12,\n",
+ " input_size=24,\n",
+ " n_series=2,\n",
+ " hidden_size=128,\n",
+ " n_heads=2,\n",
+ " e_layers=2,\n",
+ " d_layers=1,\n",
+ " d_ff=4,\n",
+ " factor=1,\n",
+ " dropout=0.1,\n",
+ " use_norm=True,\n",
+ " loss=MSE(),\n",
+ " valid_loss=MAE(),\n",
+ " early_stop_patience_steps=3,\n",
+ " batch_size=32)\n",
+ "\n",
+ "fcst = NeuralForecast(models=[model], freq='M')\n",
+ "fcst.fit(df=Y_train_df, static_df=AirPassengersStatic, val_size=12)\n",
+ "forecasts = fcst.predict(futr_df=Y_test_df)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "