bitcn_first_push (#958)

Co-authored-by: Cristian Challu <[email protected]>
Nixtla · Apr 8, 2024 · e0aa338 · e0aa338
1 parent 3679715
commit e0aa338
Show file tree

Hide file tree

Showing 10 changed files with 1,977 additions and 46 deletions.
diff --git a/nbs/core.ipynb b/nbs/core.ipynb
@@ -87,7 +87,8 @@
     "    TFT, VanillaTransformer,\n",
     "    Informer, Autoformer, FEDformer,\n",
     "    StemGNN, PatchTST, TimesNet, TimeLLM, TSMixer, TSMixerx,\n",
-    "    MLPMultivariate, iTransformer\n",
+    "    MLPMultivariate, iTransformer,\n",
+    "    BiTCN,\n",
     ")"
    ]
   },
@@ -228,7 +229,8 @@
     "    'tsmixer': TSMixer, 'autotsmixer': TSMixer,\n",
     "    'tsmixerx': TSMixerx, 'autotsmixerx': TSMixerx,\n",
     "    'mlpmultivariate': MLPMultivariate, 'automlpmultivariate': MLPMultivariate,\n",
-    "    'itransformer': iTransformer, 'autoitransformer': iTransformer\n",
+    "    'itransformer': iTransformer, 'autoitransformer': iTransformer,\n",
+    "    'bitcn': BiTCN, 'autobitcn': BiTCN,\n",
     "}"
    ]
   },

diff --git a/nbs/examples/Exogenous_Variables.ipynb b/nbs/examples/Exogenous_Variables.ipynb
diff --git a/nbs/imgs_models/bitcn.png b/nbs/imgs_models/bitcn.png
diff --git a/nbs/models.bitcn.ipynb b/nbs/models.bitcn.ipynb
diff --git a/nbs/models.ipynb b/nbs/models.ipynb
@@ -45,6 +45,7 @@
     "from neuralforecast.models.lstm import LSTM\n",
     "from neuralforecast.models.deepar import DeepAR\n",
     "from neuralforecast.models.dilated_rnn import DilatedRNN\n",
+    "from neuralforecast.models.bitcn import BiTCN\n",
     "\n",
     "from neuralforecast.models.mlp import MLP\n",
     "from neuralforecast.models.nbeats import NBEATS\n",
@@ -1044,6 +1045,286 @@
     "model.fit(dataset=dataset)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7a0616ae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "class AutoBiTCN(BaseAuto):\n",
+    "\n",
+    "    default_config = {\n",
+    "        \"input_size_multiplier\": [1, 2, 3, 4, 5],\n",
+    "        \"h\": None,\n",
+    "        \"hidden_size\": tune.choice([16, 32]),\n",
+    "        \"dropout\": tune.uniform(0.0, 0.99),  \n",
+    "        \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
+    "        \"scaler_type\": tune.choice([None, 'robust', 'standard']),\n",
+    "        \"max_steps\": tune.choice([500, 1000, 2000]),\n",
+    "        \"batch_size\": tune.choice([32, 64, 128, 256]),\n",
+    "        \"windows_batch_size\": tune.choice([128, 256, 512, 1024]),\n",
+    "        \"loss\": None,\n",
+    "        \"random_seed\": tune.randint(1, 20),\n",
+    "    }\n",
+    "\n",
+    "    def __init__(self,\n",
+    "                 h,\n",
+    "                 loss=MAE(),\n",
+    "                 valid_loss=None,\n",
+    "                 config=None, \n",
+    "                 search_alg=BasicVariantGenerator(random_state=1),\n",
+    "                 num_samples=10,\n",
+    "                 refit_with_val=False,\n",
+    "                 cpus=cpu_count(),\n",
+    "                 gpus=torch.cuda.device_count(),\n",
+    "                 verbose=False,\n",
+    "                 alias=None,\n",
+    "                 backend='ray',\n",
+    "                 callbacks=None):\n",
+    "        \n",
+    "        # Define search space, input/output sizes\n",
+    "        if config is None:\n",
+    "            config = self.get_default_config(h=h, backend=backend)          \n",
+    "\n",
+    "        super(AutoBiTCN, self).__init__(\n",
+    "              cls_model=BiTCN, \n",
+    "              h=h,\n",
+    "              loss=loss,\n",
+    "              valid_loss=valid_loss,\n",
+    "              config=config,\n",
+    "              search_alg=search_alg,\n",
+    "              num_samples=num_samples, \n",
+    "              refit_with_val=refit_with_val,\n",
+    "              cpus=cpus,\n",
+    "              gpus=gpus,\n",
+    "              verbose=verbose,\n",
+    "              alias=alias,\n",
+    "              backend=backend,\n",
+    "              callbacks=callbacks,            \n",
+    "        )\n",
+    "\n",
+    "    @classmethod\n",
+    "    def get_default_config(cls, h, backend, n_series=None):\n",
+    "        config = cls.default_config.copy()\n",
+    "        config['input_size'] = tune.choice([h*x \\\n",
+    "                        for x in config['input_size_multiplier']])\n",
+    "        config['step_size'] = tune.choice([1, h])        \n",
+    "        del config['input_size_multiplier']\n",
+    "        if backend == 'optuna':\n",
+    "            config = cls._ray_config_to_optuna(config)         \n",
+    "\n",
+    "        return config   "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "433d2ef6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(AutoBiTCN, title_level=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "95850f3c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-04-06 10:40:24,017\tINFO worker.py:1724 -- Started a local Ray instance.\n",
+      "2024-04-06 10:40:25,556\tINFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.\n",
+      "2024-04-06 10:40:25,559\tINFO tune.py:583 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949\n",
+      "Seed set to 1\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%capture\n",
+    "# Use your own config or AutoNHITS.default_config\n",
+    "config = dict(max_steps=1, val_check_steps=1, input_size=12, hidden_size=8)\n",
+    "model = AutoBiTCN(h=12, config=config, num_samples=1, cpus=1)\n",
+    "\n",
+    "# Fit and predict\n",
+    "model.fit(dataset=dataset)\n",
+    "y_hat = model.predict(dataset=dataset)\n",
+    "\n",
+    "# Optuna\n",
+    "model = AutoBiTCN(h=12, config=None, backend='optuna')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7c905530",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m c:\\Users\\ospra\\miniconda3\\envs\\neuralforecast\\lib\\site-packages\\ray\\tune\\integration\\pytorch_lightning.py:194: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m c:\\Users\\ospra\\miniconda3\\envs\\neuralforecast\\lib\\site-packages\\pytorch_lightning\\utilities\\parsing.py:199: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m c:\\Users\\ospra\\miniconda3\\envs\\neuralforecast\\lib\\site-packages\\pytorch_lightning\\utilities\\parsing.py:199: Attribute 'valid_loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['valid_loss'])`.\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m Seed set to 11\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m GPU available: True (cuda), used: True\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m TPU available: False, using: 0 TPU cores\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m IPU available: False, using: 0 IPUs\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m HPU available: False, using: 0 HPUs\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m `Trainer(val_check_interval=1)` was configured so validation will run after every batch.\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m Missing logger folder: C:\\Users\\ospra\\ray_results\\_train_tune_2024-04-06_10-40-40\\_train_tune_4d1da_00000\\lightning_logs\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m \n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m   | Name         | Type          | Params\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m -----------------------------------------------\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m 0 | padder_train | ConstantPad1d | 0     \n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m 1 | loss         | MAE           | 0     \n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m 2 | scaler       | TemporalNorm  | 0     \n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m 3 | lin_hist     | Linear        | 16    \n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m 4 | drop_hist    | Dropout       | 0     \n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m 5 | net_bwd      | Sequential    | 944   \n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m 6 | feature_lin  | Linear        | 9     \n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m 7 | temporal_lin | Linear        | 156   \n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m -----------------------------------------------\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m 1.1 K     Trainable params\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m 0         Non-trainable params\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m 1.1 K     Total params\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m 0.004     Total estimated model params size (MB)\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m c:\\Users\\ospra\\miniconda3\\envs\\neuralforecast\\lib\\site-packages\\pytorch_lightning\\trainer\\connectors\\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sanity Checking: |          | 0/? [00:00<?, ?it/s]\n",
+      "Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m c:\\Users\\ospra\\miniconda3\\envs\\neuralforecast\\lib\\site-packages\\pytorch_lightning\\utilities\\data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 1. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m c:\\Users\\ospra\\miniconda3\\envs\\neuralforecast\\lib\\site-packages\\pytorch_lightning\\trainer\\connectors\\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.\n",
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m c:\\Users\\ospra\\miniconda3\\envs\\neuralforecast\\lib\\site-packages\\pytorch_lightning\\loops\\fit_loop.py:298: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Seed set to 11\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 0:   0%|          | 0/1 [00:00<?, ?it/s]                             \n",
+      "Epoch 0: 100%|██████████| 1/1 [00:00<00:00,  4.57it/s, v_num=0, train_loss_step=1.310]\n",
+      "Validation: |          | 0/? [00:00<?, ?it/s]\u001b[A\n",
+      "Validation:   0%|          | 0/1 [00:00<?, ?it/s]\u001b[A\n",
+      "Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]\u001b[A\n",
+      "Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 333.28it/s]\u001b[A\n",
+      "Epoch 0: 100%|██████████| 1/1 [00:00<00:00,  4.31it/s, v_num=0, train_loss_step=1.310, valid_loss=63.80, train_loss_epoch=1.310]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[36m(_train_tune pid=27632)\u001b[0m `Trainer.fit` stopped: `max_steps=1` reached.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d505eecf77894263876049515e8f1b2e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Sanity Checking: |          | 0/? [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f580d33f09444a7894e3efdd24438950",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Training: |          | 0/? [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7b56670511af494bb13a2b6c4bff77bd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Validation: |          | 0/? [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "#| hide\n",
+    "# Check Optuna\n",
+    "assert model.config(MockTrial())['h'] == 12\n",
+    "\n",
+    "# Unit test to test that Auto* model contains all required arguments from BaseAuto\n",
+    "test_args(AutoBiTCN, exclude_args=['cls_model']) \n",
+    "\n",
+    "# Unit test for situation: Optuna with updated default config\n",
+    "my_config = AutoBiTCN.get_default_config(h=12, backend='optuna')\n",
+    "def my_config_new(trial):\n",
+    "    config = {**my_config(trial)}\n",
+    "    config.update({'max_steps': 1, 'val_check_steps': 1, 'input_size': 12, 'hidden_size': 8})\n",
+    "    return config\n",
+    "\n",
+    "model = AutoBiTCN(h=12, config=my_config_new, backend='optuna', num_samples=1, cpus=1)\n",
+    "model.fit(dataset=dataset)\n",
+    "\n",
+    "# Unit test for situation: Ray with updated default config\n",
+    "my_config = AutoBiTCN.get_default_config(h=12, backend='ray')\n",
+    "my_config['max_steps'] = 1\n",
+    "my_config['val_check_steps'] = 1\n",
+    "my_config['input_size'] = 12\n",
+    "my_config['hidden_size'] = 8\n",
+    "model = AutoBiTCN(h=12, config=my_config, backend='ray', num_samples=1, cpus=1)\n",
+    "model.fit(dataset=dataset)"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",

diff --git a/neuralforecast/_modidx.py b/neuralforecast/_modidx.py
@@ -10,6 +10,10 @@
                                                                                       'neuralforecast/auto.py'),
                                      'neuralforecast.auto.AutoAutoformer.get_default_config': ( 'models.html#autoautoformer.get_default_config',
                                                                                                 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoBiTCN': ('models.html#autobitcn', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoBiTCN.__init__': ('models.html#autobitcn.__init__', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoBiTCN.get_default_config': ( 'models.html#autobitcn.get_default_config',
+                                                                                           'neuralforecast/auto.py'),
                                      'neuralforecast.auto.AutoDLinear': ('models.html#autodlinear', 'neuralforecast/auto.py'),
                                      'neuralforecast.auto.AutoDLinear.__init__': ( 'models.html#autodlinear.__init__',
                                                                                    'neuralforecast/auto.py'),
@@ -463,6 +467,24 @@
                                                                                                               'neuralforecast/models/autoformer.py'),
                                                   'neuralforecast.models.autoformer.SeriesDecomp.forward': ( 'models.autoformer.html#seriesdecomp.forward',
                                                                                                              'neuralforecast/models/autoformer.py')},
+            'neuralforecast.models.bitcn': { 'neuralforecast.models.bitcn.BiTCN': ( 'models.bitcn.html#bitcn',
+                                                                                    'neuralforecast/models/bitcn.py'),
+                                             'neuralforecast.models.bitcn.BiTCN.__init__': ( 'models.bitcn.html#bitcn.__init__',
+                                                                                             'neuralforecast/models/bitcn.py'),
+                                             'neuralforecast.models.bitcn.BiTCN.forward': ( 'models.bitcn.html#bitcn.forward',
+                                                                                            'neuralforecast/models/bitcn.py'),
+                                             'neuralforecast.models.bitcn.CustomConv1d': ( 'models.bitcn.html#customconv1d',
+                                                                                           'neuralforecast/models/bitcn.py'),
+                                             'neuralforecast.models.bitcn.CustomConv1d.__init__': ( 'models.bitcn.html#customconv1d.__init__',
+                                                                                                    'neuralforecast/models/bitcn.py'),
+                                             'neuralforecast.models.bitcn.CustomConv1d.forward': ( 'models.bitcn.html#customconv1d.forward',
+                                                                                                   'neuralforecast/models/bitcn.py'),
+                                             'neuralforecast.models.bitcn.TCNCell': ( 'models.bitcn.html#tcncell',
+                                                                                      'neuralforecast/models/bitcn.py'),
+                                             'neuralforecast.models.bitcn.TCNCell.__init__': ( 'models.bitcn.html#tcncell.__init__',
+                                                                                               'neuralforecast/models/bitcn.py'),
+                                             'neuralforecast.models.bitcn.TCNCell.forward': ( 'models.bitcn.html#tcncell.forward',
+                                                                                              'neuralforecast/models/bitcn.py')},
             'neuralforecast.models.deepar': { 'neuralforecast.models.deepar.Decoder': ( 'models.deepar.html#decoder',
                                                                                         'neuralforecast/models/deepar.py'),
                                               'neuralforecast.models.deepar.Decoder.__init__': ( 'models.deepar.html#decoder.__init__',