diff --git a/build_tools/changelog.py b/build_tools/changelog.py index c81e94489..d4470104e 100644 --- a/build_tools/changelog.py +++ b/build_tools/changelog.py @@ -62,7 +62,9 @@ def fetch_latest_release(): # noqa: D103 """ import httpx - response = httpx.get(f"{GITHUB_REPOS}/{OWNER}/{REPO}/releases/latest", headers=HEADERS) + response = httpx.get( + f"{GITHUB_REPOS}/{OWNER}/{REPO}/releases/latest", headers=HEADERS + ) if response.status_code == 200: return response.json() @@ -91,7 +93,9 @@ def fetch_pull_requests_since_last_release() -> list[dict]: all_pulls = [] while not is_exhausted: pulls = fetch_merged_pull_requests(page=page) - all_pulls.extend([p for p in pulls if parser.parse(p["merged_at"]) > published_at]) + all_pulls.extend( + [p for p in pulls if parser.parse(p["merged_at"]) > published_at] + ) is_exhausted = any(parser.parse(p["updated_at"]) < published_at for p in pulls) page += 1 return all_pulls @@ -101,7 +105,9 @@ def github_compare_tags(tag_left: str, tag_right: str = "HEAD"): """Compare commit between two tags.""" import httpx - response = httpx.get(f"{GITHUB_REPOS}/{OWNER}/{REPO}/compare/{tag_left}...{tag_right}") + response = httpx.get( + f"{GITHUB_REPOS}/{OWNER}/{REPO}/compare/{tag_left}...{tag_right}" + ) if response.status_code == 200: return response.json() else: @@ -135,7 +141,9 @@ def assign_prs(prs, categs: list[dict[str, list[str]]]): # if any(l.startswith("module") for l in pr_labels): # print(i, pr_labels) - assigned["Other"] = list(set(range(len(prs))) - {i for _, j in assigned.items() for i in j}) + assigned["Other"] = list( + set(range(len(prs))) - {i for _, j in assigned.items() for i in j} + ) return assigned diff --git a/docs/source/conf.py b/docs/source/conf.py index 4c6b40ffb..57fa832fb 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -145,7 +145,9 @@ def setup(app: Sphinx): "navbar_end": ["navbar-icon-links.html", "search-field.html"], "show_nav_level": 2, "header_links_before_dropdown": 10, - "external_links": [{"name": "GitHub", "url": "https://github.com/sktime/pytorch-forecasting"}], + "external_links": [ + {"name": "GitHub", "url": "https://github.com/sktime/pytorch-forecasting"} + ], } html_sidebars = { diff --git a/examples/ar.py b/examples/ar.py index c8ced62b0..4422302af 100644 --- a/examples/ar.py +++ b/examples/ar.py @@ -51,14 +51,20 @@ stop_randomization=True, ) batch_size = 64 -train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0) -val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0) +train_dataloader = training.to_dataloader( + train=True, batch_size=batch_size, num_workers=0 +) +val_dataloader = validation.to_dataloader( + train=False, batch_size=batch_size, num_workers=0 +) # save datasets training.save("training.pkl") validation.save("validation.pkl") -early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=5, verbose=False, mode="min") +early_stop_callback = EarlyStopping( + monitor="val_loss", min_delta=1e-4, patience=5, verbose=False, mode="min" +) lr_logger = LearningRateMonitor() trainer = pl.Trainer( diff --git a/examples/nbeats.py b/examples/nbeats.py index 283c6cda5..ce2f636c6 100644 --- a/examples/nbeats.py +++ b/examples/nbeats.py @@ -42,13 +42,21 @@ add_target_scales=False, ) -validation = TimeSeriesDataSet.from_dataset(training, data, min_prediction_idx=training_cutoff) +validation = TimeSeriesDataSet.from_dataset( + training, data, min_prediction_idx=training_cutoff +) batch_size = 128 -train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=2) -val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=2) +train_dataloader = training.to_dataloader( + train=True, batch_size=batch_size, num_workers=2 +) +val_dataloader = validation.to_dataloader( + train=False, batch_size=batch_size, num_workers=2 +) -early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min") +early_stop_callback = EarlyStopping( + monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min" +) trainer = pl.Trainer( max_epochs=100, accelerator="auto", @@ -63,7 +71,12 @@ net = NBeats.from_dataset( - training, learning_rate=3e-2, log_interval=10, log_val_interval=1, log_gradient_flow=False, weight_decay=1e-2 + training, + learning_rate=3e-2, + log_interval=10, + log_val_interval=1, + log_gradient_flow=False, + weight_decay=1e-2, ) print(f"Number of parameters in network: {net.size() / 1e3:.1f}k") diff --git a/examples/stallion.py b/examples/stallion.py index 1b9be12c1..0066c3e22 100644 --- a/examples/stallion.py +++ b/examples/stallion.py @@ -7,10 +7,16 @@ import numpy as np from pandas.core.common import SettingWithCopyWarning -from pytorch_forecasting import GroupNormalizer, TemporalFusionTransformer, TimeSeriesDataSet +from pytorch_forecasting import ( + GroupNormalizer, + TemporalFusionTransformer, + TimeSeriesDataSet, +) from pytorch_forecasting.data.examples import get_stallion_data from pytorch_forecasting.metrics import QuantileLoss -from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters +from pytorch_forecasting.models.temporal_fusion_transformer.tuning import ( + optimize_hyperparameters, +) warnings.simplefilter("error", category=SettingWithCopyWarning) @@ -22,8 +28,12 @@ data["time_idx"] = data["date"].dt.year * 12 + data["date"].dt.month data["time_idx"] -= data["time_idx"].min() -data["avg_volume_by_sku"] = data.groupby(["time_idx", "sku"], observed=True).volume.transform("mean") -data["avg_volume_by_agency"] = data.groupby(["time_idx", "agency"], observed=True).volume.transform("mean") +data["avg_volume_by_sku"] = data.groupby( + ["time_idx", "sku"], observed=True +).volume.transform("mean") +data["avg_volume_by_agency"] = data.groupby( + ["time_idx", "agency"], observed=True +).volume.transform("mean") # data = data[lambda x: (x.sku == data.iloc[0]["sku"]) & (x.agency == data.iloc[0]["agency"])] special_days = [ "easter_day", @@ -39,7 +49,9 @@ "beer_capital", "music_fest", ] -data[special_days] = data[special_days].apply(lambda x: x.map({0: "", 1: x.name})).astype("category") +data[special_days] = ( + data[special_days].apply(lambda x: x.map({0: "", 1: x.name})).astype("category") +) training_cutoff = data["time_idx"].max() - 6 max_encoder_length = 36 @@ -50,14 +62,17 @@ time_idx="time_idx", target="volume", group_ids=["agency", "sku"], - min_encoder_length=max_encoder_length // 2, # allow encoder lengths from 0 to max_prediction_length + min_encoder_length=max_encoder_length + // 2, # allow encoder lengths from 0 to max_prediction_length max_encoder_length=max_encoder_length, min_prediction_length=1, max_prediction_length=max_prediction_length, static_categoricals=["agency", "sku"], static_reals=["avg_population_2017", "avg_yearly_household_income_2017"], time_varying_known_categoricals=["special_days", "month"], - variable_groups={"special_days": special_days}, # group of categorical variables can be treated as one variable + variable_groups={ + "special_days": special_days + }, # group of categorical variables can be treated as one variable time_varying_known_reals=["time_idx", "price_regular", "discount_in_percent"], time_varying_unknown_categoricals=[], time_varying_unknown_reals=[ @@ -78,17 +93,25 @@ ) -validation = TimeSeriesDataSet.from_dataset(training, data, predict=True, stop_randomization=True) +validation = TimeSeriesDataSet.from_dataset( + training, data, predict=True, stop_randomization=True +) batch_size = 64 -train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0) -val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0) +train_dataloader = training.to_dataloader( + train=True, batch_size=batch_size, num_workers=0 +) +val_dataloader = validation.to_dataloader( + train=False, batch_size=batch_size, num_workers=0 +) # save datasets training.save("t raining.pkl") validation.save("validation.pkl") -early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min") +early_stop_callback = EarlyStopping( + monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min" +) lr_logger = LearningRateMonitor() logger = TensorBoardLogger(log_graph=True) diff --git a/pyproject.toml b/pyproject.toml index edf9dd82f..0f5ef235a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,63 +1,3 @@ -[tool.ruff] -line-length = 120 -exclude = [ - "docs/build/", - "node_modules/", - ".eggs/", - "versioneer.py", - "venv/", - ".venv/", - ".git/", - ".history/", -] - -[tool.ruff.lint] -select = ["E", "F", "W", "C4", "S"] -extend-ignore = [ - "E203", # space before : (needed for how black formats slicing) - "E402", # module level import not at top of file - "E731", # do not assign a lambda expression, use a def - "E741", # ignore not easy to read variables like i l I etc. - "C406", # Unnecessary list literal - rewrite as a dict literal. - "C408", # Unnecessary dict call - rewrite as a literal. - "C409", # Unnecessary list passed to tuple() - rewrite as a tuple literal. - "F401", # unused imports - "S101", # use of assert -] - -[tool.ruff.lint.isort] -known-first-party = ["pytorch_forecasting"] -combine-as-imports = true -force-sort-within-sections = true - -[tool.black] -line-length = 120 -include = '\.pyi?$' -exclude = ''' -( - /( - \.eggs # exclude a few common directories in the - | \.git # root of the project - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | _build - | buck-out - | build - | dist - )/ - | docs/build/ - | node_modules/ - | venve/ - | .venv/ -) -''' - -[tool.nbqa.mutate] -ruff = 1 -black = 1 - [project] name = "pytorch-forecasting" readme = "README.md" # Markdown files are supported @@ -184,3 +124,67 @@ build-backend = "setuptools.build_meta" requires = [ "setuptools>=70.0.0", ] + +[tool.ruff] +line-length = 88 +exclude = [ + "docs/build/", + "node_modules/", + ".eggs/", + "versioneer.py", + "venv/", + ".venv/", + ".git/", + ".history/", +] + +[tool.ruff.lint] +select = ["E", "F", "W", "C4", "S"] +extend-select = [ + "I", # isort + "C4", # https://pypi.org/project/flake8-comprehensions +] +extend-ignore = [ + "E203", # space before : (needed for how black formats slicing) + "E402", # module level import not at top of file + "E731", # do not assign a lambda expression, use a def + "E741", # ignore not easy to read variables like i l I etc. + "C406", # Unnecessary list literal - rewrite as a dict literal. + "C408", # Unnecessary dict call - rewrite as a literal. + "C409", # Unnecessary list passed to tuple() - rewrite as a tuple literal. + "F401", # unused imports + "S101", # use of assert +] + +[tool.ruff.lint.isort] +known-first-party = ["pytorch_forecasting"] +combine-as-imports = true +force-sort-within-sections = true + +[tool.black] +line-length = 88 +include = '\.pyi?$' +exclude = ''' +( + /( + \.eggs # exclude a few common directories in the + | \.git # root of the project + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | buck-out + | build + | dist + )/ + | docs/build/ + | node_modules/ + | venve/ + | .venv/ +) +''' + +[tool.nbqa.mutate] +ruff = 1 +black = 1 diff --git a/pytorch_forecasting/data/encoders.py b/pytorch_forecasting/data/encoders.py index 2bcf521ca..562ad66ed 100644 --- a/pytorch_forecasting/data/encoders.py +++ b/pytorch_forecasting/data/encoders.py @@ -2,16 +2,22 @@ Encoders for encoding categorical variables and scaling continuous data. """ -from typing import Any, Callable, Dict, Iterable, List, Tuple, Union, Optional +from copy import deepcopy +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union import warnings import numpy as np import pandas as pd -from copy import deepcopy from sklearn.base import BaseEstimator, TransformerMixin import torch from torch.distributions import constraints -from torch.distributions.transforms import ExpTransform, PowerTransform, SigmoidTransform, Transform, _clipped_sigmoid +from torch.distributions.transforms import ( + ExpTransform, + PowerTransform, + SigmoidTransform, + Transform, + _clipped_sigmoid, +) import torch.nn.functional as F from torch.nn.utils import rnn @@ -133,17 +139,46 @@ class TransformMixIn: # dict of PyTorch functions that transforms and inversely transforms values. # inverse entry required if "reverse" is not the "inverse" of "forward". TRANSFORMATIONS = { - "log": dict(forward=_clipped_log, reverse=torch.exp, inverse_torch=ExpTransform()), - "log1p": dict(forward=torch.log1p, reverse=torch.exp, inverse=torch.expm1, inverse_torch=Expm1Transform()), - "logit": dict(forward=_clipped_logit, reverse=_clipped_sigmoid, inverse_torch=SigmoidTransform()), - "count": dict(forward=_plus_one, reverse=F.softplus, inverse=_minus_one, inverse_torch=MinusOneTransform()), - "softplus": dict(forward=softplus_inv, reverse=F.softplus, inverse_torch=SoftplusTransform()), - "relu": dict(forward=_identity, reverse=F.relu, inverse=_identity, inverse_torch=ReLuTransform()), - "sqrt": dict(forward=torch.sqrt, reverse=_square, inverse_torch=PowerTransform(exponent=2.0)), + "log": dict( + forward=_clipped_log, reverse=torch.exp, inverse_torch=ExpTransform() + ), + "log1p": dict( + forward=torch.log1p, + reverse=torch.exp, + inverse=torch.expm1, + inverse_torch=Expm1Transform(), + ), + "logit": dict( + forward=_clipped_logit, + reverse=_clipped_sigmoid, + inverse_torch=SigmoidTransform(), + ), + "count": dict( + forward=_plus_one, + reverse=F.softplus, + inverse=_minus_one, + inverse_torch=MinusOneTransform(), + ), + "softplus": dict( + forward=softplus_inv, reverse=F.softplus, inverse_torch=SoftplusTransform() + ), + "relu": dict( + forward=_identity, + reverse=F.relu, + inverse=_identity, + inverse_torch=ReLuTransform(), + ), + "sqrt": dict( + forward=torch.sqrt, + reverse=_square, + inverse_torch=PowerTransform(exponent=2.0), + ), } @classmethod - def get_transform(cls, transformation: Union[str, Dict[str, Callable]]) -> Dict[str, Callable]: + def get_transform( + cls, transformation: Union[str, Dict[str, Callable]] + ) -> Dict[str, Callable]: """Return transformation functions. Args: @@ -186,7 +221,9 @@ def preprocess( y = np.asarray(y) return y - def inverse_preprocess(self, y: Union[pd.Series, np.ndarray, torch.Tensor]) -> Union[np.ndarray, torch.Tensor]: + def inverse_preprocess( + self, y: Union[pd.Series, np.ndarray, torch.Tensor] + ) -> Union[np.ndarray, torch.Tensor]: """ Inverse preprocess re-scaled data (e.g. take exp). @@ -207,7 +244,9 @@ def inverse_preprocess(self, y: Union[pd.Series, np.ndarray, torch.Tensor]) -> U return y -class NaNLabelEncoder(InitialParameterRepresenterMixIn, BaseEstimator, TransformerMixin, TransformMixIn): +class NaNLabelEncoder( + InitialParameterRepresenterMixIn, BaseEstimator, TransformerMixin, TransformMixIn +): """ Labelencoder that can optionally always encode nan and unknown classes (in transform) as class ``0`` """ @@ -251,7 +290,8 @@ def is_numeric(y: pd.Series) -> bool: bool: True if series is numeric """ return y.dtype.kind in "bcif" or ( - isinstance(y.dtype, pd.CategoricalDtype) and y.cat.categories.dtype.kind in "bcif" + isinstance(y.dtype, pd.CategoricalDtype) + and y.cat.categories.dtype.kind in "bcif" ) def fit(self, y: pd.Series, overwrite: bool = False): @@ -292,7 +332,11 @@ def fit(self, y: pd.Series, overwrite: bool = False): return self def transform( - self, y: Iterable, return_norm: bool = False, target_scale=None, ignore_na: bool = False + self, + y: Iterable, + return_norm: bool = False, + target_scale=None, + ignore_na: bool = False, ) -> Union[torch.Tensor, np.ndarray]: """ Encode iterable with integers. @@ -387,7 +431,9 @@ def get_parameters(self, groups=None, group_names=None) -> np.ndarray: return np.zeros(2, dtype=np.float64) -class TorchNormalizer(InitialParameterRepresenterMixIn, BaseEstimator, TransformerMixin, TransformMixIn): +class TorchNormalizer( + InitialParameterRepresenterMixIn, BaseEstimator, TransformerMixin, TransformMixIn +): """ Basic target transformer that can be fit also on torch tensors. """ @@ -425,11 +471,17 @@ def __init__( can be defined to provide a torch distribution transform for inverse transformations. """ self.method = method - assert method in ["standard", "robust", "identity"], f"method has invalid value {method}" + assert method in [ + "standard", + "robust", + "identity", + ], f"method has invalid value {method}" self.center = center self.transformation = transformation self.method_kwargs = method_kwargs - self._method_kwargs = deepcopy(method_kwargs) if method_kwargs is not None else {} + self._method_kwargs = ( + deepcopy(method_kwargs) if method_kwargs is not None else {} + ) def get_parameters(self, *args, **kwargs) -> torch.Tensor: """ @@ -438,7 +490,9 @@ def get_parameters(self, *args, **kwargs) -> torch.Tensor: Returns: torch.Tensor: First element is center of data and second is scale """ - return torch.stack([torch.as_tensor(self.center_), torch.as_tensor(self.scale_)], dim=-1) + return torch.stack( + [torch.as_tensor(self.center_), torch.as_tensor(self.scale_)], dim=-1 + ) def fit(self, y: Union[pd.Series, np.ndarray, torch.Tensor]): """ @@ -455,7 +509,9 @@ def fit(self, y: Union[pd.Series, np.ndarray, torch.Tensor]): return self def _set_parameters( - self, y_center: Union[pd.Series, np.ndarray, torch.Tensor], y_scale: Union[pd.Series, np.ndarray, torch.Tensor] + self, + y_center: Union[pd.Series, np.ndarray, torch.Tensor], + y_scale: Union[pd.Series, np.ndarray, torch.Tensor], ): """ Calculate parameters for scale and center based on input timeseries @@ -498,17 +554,31 @@ def _set_parameters( elif self.method == "robust": if isinstance(y_center, torch.Tensor): - self.center_ = y_center.quantile(self._method_kwargs.get("center", 0.5), dim=-1) + self.center_ = y_center.quantile( + self._method_kwargs.get("center", 0.5), dim=-1 + ) q_75 = y_scale.quantile(self._method_kwargs.get("upper", 0.75), dim=-1) q_25 = y_scale.quantile(self._method_kwargs.get("lower", 0.25), dim=-1) elif isinstance(y_center, np.ndarray): - self.center_ = np.percentile(y_center, self._method_kwargs.get("center", 0.5) * 100, axis=-1) - q_75 = np.percentile(y_scale, self._method_kwargs.get("upper", 0.75) * 100, axis=-1) - q_25 = np.percentile(y_scale, self._method_kwargs.get("lower", 0.25) * 100, axis=-1) + self.center_ = np.percentile( + y_center, self._method_kwargs.get("center", 0.5) * 100, axis=-1 + ) + q_75 = np.percentile( + y_scale, self._method_kwargs.get("upper", 0.75) * 100, axis=-1 + ) + q_25 = np.percentile( + y_scale, self._method_kwargs.get("lower", 0.25) * 100, axis=-1 + ) else: - self.center_ = np.percentile(y_center, self._method_kwargs.get("center", 0.5) * 100, axis=-1) - q_75 = np.percentile(y_scale, self._method_kwargs.get("upper", 0.75) * 100) - q_25 = np.percentile(y_scale, self._method_kwargs.get("lower", 0.25) * 100) + self.center_ = np.percentile( + y_center, self._method_kwargs.get("center", 0.5) * 100, axis=-1 + ) + q_75 = np.percentile( + y_scale, self._method_kwargs.get("upper", 0.75) * 100 + ) + q_25 = np.percentile( + y_scale, self._method_kwargs.get("lower", 0.25) * 100 + ) self.scale_ = (q_75 - q_25) / 2.0 + eps if not self.center and self.method != "identity": self.scale_ = self.center_ @@ -529,7 +599,10 @@ def transform( y: Union[pd.Series, np.ndarray, torch.Tensor], return_norm: bool = False, target_scale: torch.Tensor = None, - ) -> Union[Tuple[Union[np.ndarray, torch.Tensor], np.ndarray], Union[np.ndarray, torch.Tensor]]: + ) -> Union[ + Tuple[Union[np.ndarray, torch.Tensor], np.ndarray], + Union[np.ndarray, torch.Tensor], + ]: """ Rescale data. @@ -658,7 +731,12 @@ def __init__( can be defined to provide a torch distribution transform for inverse transformations. """ method_kwargs = deepcopy(method_kwargs) if method_kwargs is not None else {} - super().__init__(method=method, center=center, transformation=transformation, method_kwargs=method_kwargs) + super().__init__( + method=method, + center=center, + transformation=transformation, + method_kwargs=method_kwargs, + ) self.max_length = max_length def fit(self, y: Union[pd.Series, np.ndarray, torch.Tensor]): @@ -675,7 +753,9 @@ def fit(self, y: Union[pd.Series, np.ndarray, torch.Tensor]): if self.max_length is None: y_center = y_scale = self.preprocess(y) elif isinstance(self.max_length, int): - y_center = y_scale = self.preprocess(self._slice(y, slice(-self.max_length, None))) + y_center = y_scale = self.preprocess( + self._slice(y, slice(-self.max_length, None)) + ) else: y = self.preprocess(self._slice(y, slice(-max(self.max_length), None))) if np.argmax(self.max_length) == 0: @@ -771,7 +851,12 @@ def __init__( self._groups = list(groups) if groups is not None else [] self.scale_by_group = scale_by_group method_kwargs = deepcopy(method_kwargs) if method_kwargs is not None else {} - super().__init__(method=method, center=center, transformation=transformation, method_kwargs=method_kwargs) + super().__init__( + method=method, + center=center, + transformation=transformation, + method_kwargs=method_kwargs, + ) def fit(self, y: pd.Series, X: pd.DataFrame): """ @@ -787,9 +872,14 @@ def fit(self, y: pd.Series, X: pd.DataFrame): y = self.preprocess(y) eps = np.finfo(np.float16).eps if len(self._groups) == 0: - assert not self.scale_by_group, "No groups are defined, i.e. `scale_by_group=[]`" + assert ( + not self.scale_by_group + ), "No groups are defined, i.e. `scale_by_group=[]`" if self.method == "standard": - self.norm_ = {"center": np.mean(y), "scale": np.std(y) + eps} # center and scale + self.norm_ = { + "center": np.mean(y), + "scale": np.std(y) + eps, + } # center and scale else: quantiles = np.quantile( y, @@ -833,7 +923,8 @@ def fit(self, y: pd.Series, X: pd.DataFrame): .assign( center=lambda x: x[self._method_kwargs.get("center", 0.5)], scale=lambda x: ( - x[self._method_kwargs.get("upper", 0.75)] - x[self._method_kwargs.get("lower", 0.25)] + x[self._method_kwargs.get("upper", 0.75)] + - x[self._method_kwargs.get("lower", 0.25)] ) / 2.0 + eps, @@ -848,8 +939,12 @@ def swap_parameters(norm): norm["center"] = 0.0 return norm - self.norm_ = {g: swap_parameters(norm) for g, norm in self.norm_.items()} - self.missing_ = {group: scales.median().to_dict() for group, scales in self.norm_.items()} + self.norm_ = { + g: swap_parameters(norm) for g, norm in self.norm_.items() + } + self.missing_ = { + group: scales.median().to_dict() for group, scales in self.norm_.items() + } else: if self.method == "standard": @@ -876,7 +971,8 @@ def swap_parameters(norm): .assign( center=lambda x: x[self._method_kwargs.get("center", 0.5)], scale=lambda x: ( - x[self._method_kwargs.get("upper", 0.75)] - x[self._method_kwargs.get("lower", 0.25)] + x[self._method_kwargs.get("upper", 0.75)] + - x[self._method_kwargs.get("lower", 0.25)] ) / 2.0 + eps, @@ -888,8 +984,17 @@ def swap_parameters(norm): self.missing_ = self.norm_.median().to_dict() if ( - (self.scale_by_group and any((self.norm_[group]["scale"] < 1e-7).any() for group in self._groups)) - or (not self.scale_by_group and isinstance(self.norm_["scale"], float) and self.norm_["scale"] < 1e-7) + ( + self.scale_by_group + and any( + (self.norm_[group]["scale"] < 1e-7).any() for group in self._groups + ) + ) + or ( + not self.scale_by_group + and isinstance(self.norm_["scale"], float) + and self.norm_["scale"] < 1e-7 + ) or ( not self.scale_by_group and not isinstance(self.norm_["scale"], float) @@ -938,7 +1043,11 @@ def inverse_transform(self, y: pd.Series, X: pd.DataFrame): raise NotImplementedError() def transform( - self, y: pd.Series, X: pd.DataFrame = None, return_norm: bool = False, target_scale: torch.Tensor = None + self, + y: pd.Series, + X: pd.DataFrame = None, + return_norm: bool = False, + target_scale: torch.Tensor = None, ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: """ Scale input data. @@ -961,7 +1070,9 @@ def transform( target_scale = self.get_norm(X) return super().transform(y, return_norm=return_norm, target_scale=target_scale) - def get_parameters(self, groups: Union[torch.Tensor, list, tuple], group_names: List[str] = None) -> np.ndarray: + def get_parameters( + self, groups: Union[torch.Tensor, list, tuple], group_names: List[str] = None + ) -> np.ndarray: """ Get fitted scaling parameters for a given group. @@ -982,7 +1093,9 @@ def get_parameters(self, groups: Union[torch.Tensor, list, tuple], group_names: else: # filter group names group_names = [name for name in group_names if name in self._groups] - assert len(group_names) == len(self._groups), "Passed groups and fitted do not match" + assert len(group_names) == len( + self._groups + ), "Passed groups and fitted do not match" if len(self._groups) == 0: params = np.array([self.norm_["center"], self.norm_["scale"]]) @@ -992,7 +1105,9 @@ def get_parameters(self, groups: Union[torch.Tensor, list, tuple], group_names: try: norm = norm * self.norm_[group_name].loc[group].to_numpy() except KeyError: - norm = norm * np.asarray([self.missing_[group_name][name] for name in self.names]) + norm = norm * np.asarray( + [self.missing_[group_name][name] for name in self.names] + ) norm = np.power(norm, 1.0 / len(self._groups)) params = norm else: @@ -1013,7 +1128,9 @@ def get_norm(self, X: pd.DataFrame) -> pd.DataFrame: pd.DataFrame: dataframe with scaling parameterswhere each row corresponds to the input dataframe """ if len(self._groups) == 0: - norm = np.asarray([self.norm_["center"], self.norm_["scale"]]).reshape(1, -1) + norm = np.asarray([self.norm_["center"], self.norm_["scale"]]).reshape( + 1, -1 + ) elif self.scale_by_group: norm = [ np.prod( @@ -1030,7 +1147,13 @@ def get_norm(self, X: pd.DataFrame) -> pd.DataFrame: ] norm = np.power(np.stack(norm, axis=1), 1.0 / len(self._groups)) else: - norm = X[self._groups].set_index(self._groups).join(self.norm_).fillna(self.missing_).to_numpy() + norm = ( + X[self._groups] + .set_index(self._groups) + .join(self.norm_) + .fillna(self.missing_) + .to_numpy() + ) return norm @@ -1048,7 +1171,9 @@ def __init__(self, normalizers: List[TorchNormalizer]): """ self.normalizers = normalizers - def fit(self, y: Union[pd.DataFrame, np.ndarray, torch.Tensor], X: pd.DataFrame = None): + def fit( + self, y: Union[pd.DataFrame, np.ndarray, torch.Tensor], X: pd.DataFrame = None + ): """ Fit transformer, i.e. determine center and scale of data @@ -1097,7 +1222,10 @@ def transform( X: pd.DataFrame = None, return_norm: bool = False, target_scale: List[torch.Tensor] = None, - ) -> Union[List[Tuple[Union[np.ndarray, torch.Tensor], np.ndarray]], List[Union[np.ndarray, torch.Tensor]]]: + ) -> Union[ + List[Tuple[Union[np.ndarray, torch.Tensor], np.ndarray]], + List[Union[np.ndarray, torch.Tensor]], + ]: """ Scale input data. @@ -1122,9 +1250,13 @@ def transform( else: scale = None if isinstance(normalizer, GroupNormalizer): - r = normalizer.transform(y[idx], X, return_norm=return_norm, target_scale=scale) + r = normalizer.transform( + y[idx], X, return_norm=return_norm, target_scale=scale + ) else: - r = normalizer.transform(y[idx], return_norm=return_norm, target_scale=scale) + r = normalizer.transform( + y[idx], return_norm=return_norm, target_scale=scale + ) res.append(r) if return_norm: @@ -1132,7 +1264,9 @@ def transform( else: return res - def __call__(self, data: Dict[str, Union[List[torch.Tensor], torch.Tensor]]) -> List[torch.Tensor]: + def __call__( + self, data: Dict[str, Union[List[torch.Tensor], torch.Tensor]] + ) -> List[torch.Tensor]: """ Inverse transformation but with network output as input. @@ -1145,7 +1279,12 @@ def __call__(self, data: Dict[str, Union[List[torch.Tensor], torch.Tensor]]) -> List[torch.Tensor]: list of de-scaled data """ denormalized = [ - normalizer(dict(prediction=data["prediction"][idx], target_scale=data["target_scale"][idx])) + normalizer( + dict( + prediction=data["prediction"][idx], + target_scale=data["target_scale"][idx], + ) + ) for idx, normalizer in enumerate(self.normalizers) ] return denormalized @@ -1157,7 +1296,10 @@ def get_parameters(self, *args, **kwargs) -> List[torch.Tensor]: Returns: List[torch.Tensor]: First element is center of data and second is scale """ - return [normalizer.get_parameters(*args, **kwargs) for normalizer in self.normalizers] + return [ + normalizer.get_parameters(*args, **kwargs) + for normalizer in self.normalizers + ] def __getattr__(self, name: str): """ diff --git a/pytorch_forecasting/data/examples.py b/pytorch_forecasting/data/examples.py index 79b7cb52d..0a9a01e51 100644 --- a/pytorch_forecasting/data/examples.py +++ b/pytorch_forecasting/data/examples.py @@ -92,9 +92,9 @@ def generate_ar_data( # generate series x = np.arange(timesteps)[None, :] - series = (x * linear_trends + x**2 * quadratic_trends) * trend + seasonalities * np.sin( - 2 * np.pi * seasonality * x / timesteps - ) + series = ( + x * linear_trends + x**2 * quadratic_trends + ) * trend + seasonalities * np.sin(2 * np.pi * seasonality * x / timesteps) # add noise series = levels * series * (1 + noise * np.random.normal(size=series.shape)) if exp: diff --git a/pytorch_forecasting/data/samplers.py b/pytorch_forecasting/data/samplers.py index 8312d5921..99961b54e 100644 --- a/pytorch_forecasting/data/samplers.py +++ b/pytorch_forecasting/data/samplers.py @@ -40,12 +40,20 @@ def __init__( # Since collections.abc.Iterable does not check for `__getitem__`, which # is one way for an object to be an iterable, we don't do an `isinstance` # check here. - if not isinstance(batch_size, int) or isinstance(batch_size, bool) or batch_size <= 0: + if ( + not isinstance(batch_size, int) + or isinstance(batch_size, bool) + or batch_size <= 0 + ): raise ValueError( - "batch_size should be a positive integer value, " "but got batch_size={}".format(batch_size) + "batch_size should be a positive integer value, " + "but got batch_size={}".format(batch_size) ) if not isinstance(drop_last, bool): - raise ValueError("drop_last should be a boolean value, but got " "drop_last={}".format(drop_last)) + raise ValueError( + "drop_last should be a boolean value, but got " + "drop_last={}".format(drop_last) + ) self.sampler = sampler self.batch_size = batch_size self.drop_last = drop_last @@ -78,7 +86,9 @@ def construct_batch_groups(self, groups): if self.drop_last: self._group_sizes[name] = len(group) // self.batch_size else: - self._group_sizes[name] = (len(group) + self.batch_size - 1) // self.batch_size + self._group_sizes[name] = ( + len(group) + self.batch_size - 1 + ) // self.batch_size if self._group_sizes[name] == 0: self._group_sizes[name] = 1 warns.append(name) @@ -90,9 +100,13 @@ def construct_batch_groups(self, groups): ) # create index from which can be sampled: index is equal to number of batches # associate index with prediction time - self._group_index = np.repeat(list(self._group_sizes.keys()), list(self._group_sizes.values())) + self._group_index = np.repeat( + list(self._group_sizes.keys()), list(self._group_sizes.values()) + ) # associate index with batch within prediction time group - self._sub_group_index = np.concatenate([np.arange(size) for size in self._group_sizes.values()]) + self._sub_group_index = np.concatenate( + [np.arange(size) for size in self._group_sizes.values()] + ) def __iter__(self): if self.shuffle: # shuffle samples @@ -127,7 +141,9 @@ def get_groups(self, sampler: Sampler): index = data_source.index # get groups, i.e. group all samples by first predict time last_time = data_source.data["time"][index["index_end"].to_numpy()].numpy() - decoder_lengths = data_source.calculate_decoder_length(last_time, index.sequence_length) + decoder_lengths = data_source.calculate_decoder_length( + last_time, index.sequence_length + ) first_prediction_time = index.time + index.sequence_length - decoder_lengths + 1 groups = pd.RangeIndex(0, len(index.index)).groupby(first_prediction_time) return groups diff --git a/pytorch_forecasting/data/timeseries.py b/pytorch_forecasting/data/timeseries.py index 566c16a6a..c13fb0acf 100644 --- a/pytorch_forecasting/data/timeseries.py +++ b/pytorch_forecasting/data/timeseries.py @@ -8,7 +8,7 @@ from copy import copy as _copy, deepcopy from functools import lru_cache import inspect -from typing import Any, Callable, Dict, List, Tuple, Union, Optional +from typing import Any, Callable, Dict, List, Optional, Tuple, Union import warnings import numpy as np @@ -34,7 +34,9 @@ from pytorch_forecasting.utils._dependencies import _check_matplotlib -def _find_end_indices(diffs: np.ndarray, max_lengths: np.ndarray, min_length: int) -> Tuple[np.ndarray, np.ndarray]: +def _find_end_indices( + diffs: np.ndarray, max_lengths: np.ndarray, min_length: int +) -> Tuple[np.ndarray, np.ndarray]: """ Identify end indices in series even if some values are missing. @@ -82,7 +84,9 @@ def _find_end_indices(diffs: np.ndarray, max_lengths: np.ndarray, min_length: in pass -def check_for_nonfinite(tensor: torch.Tensor, names: Union[str, List[str]]) -> torch.Tensor: +def check_for_nonfinite( + tensor: torch.Tensor, names: Union[str, List[str]] +) -> torch.Tensor: """ Check if 2D tensor contains NAs or inifinite values. @@ -192,15 +196,24 @@ def __init__( time_varying_unknown_categoricals: Optional[List[str]] = None, time_varying_unknown_reals: Optional[List[str]] = None, variable_groups: Optional[Dict[str, List[int]]] = None, - constant_fill_strategy: Optional[Dict[str, Union[str, float, int, bool]]] = None, + constant_fill_strategy: Optional[ + Dict[str, Union[str, float, int, bool]] + ] = None, allow_missing_timesteps: bool = False, lags: Optional[Dict[str, List[int]]] = None, add_relative_time_idx: bool = False, add_target_scales: bool = False, add_encoder_length: Union[bool, str] = "auto", - target_normalizer: Union[NORMALIZER, str, List[NORMALIZER], Tuple[NORMALIZER], None] = "auto", + target_normalizer: Union[ + NORMALIZER, str, List[NORMALIZER], Tuple[NORMALIZER], None + ] = "auto", categorical_encoders: Optional[Dict[str, NaNLabelEncoder]] = None, - scalers: Optional[Dict[str, Union[StandardScaler, RobustScaler, TorchNormalizer, EncoderNormalizer]]] = None, + scalers: Optional[ + Dict[ + str, + Union[StandardScaler, RobustScaler, TorchNormalizer, EncoderNormalizer], + ] + ] = None, randomize_length: Union[None, Tuple[float, float], bool] = False, predict_mode: bool = False, ): @@ -334,46 +347,68 @@ def __init__( """ super().__init__() self.max_encoder_length = max_encoder_length - assert isinstance(self.max_encoder_length, int), "max encoder length must be integer" + assert isinstance( + self.max_encoder_length, int + ), "max encoder length must be integer" if min_encoder_length is None: min_encoder_length = max_encoder_length self.min_encoder_length = min_encoder_length assert ( self.min_encoder_length <= self.max_encoder_length ), "max encoder length has to be larger equals min encoder length" - assert isinstance(self.min_encoder_length, int), "min encoder length must be integer" + assert isinstance( + self.min_encoder_length, int + ), "min encoder length must be integer" self.max_prediction_length = max_prediction_length - assert isinstance(self.max_prediction_length, int), "max prediction length must be integer" + assert isinstance( + self.max_prediction_length, int + ), "max prediction length must be integer" if min_prediction_length is None: min_prediction_length = max_prediction_length self.min_prediction_length = min_prediction_length assert ( self.min_prediction_length <= self.max_prediction_length ), "max prediction length has to be larger equals min prediction length" - assert self.min_prediction_length > 0, "min prediction length must be larger than 0" - assert isinstance(self.min_prediction_length, int), "min prediction length must be integer" - assert data[time_idx].dtype.kind == "i", "Timeseries index should be of type integer" + assert ( + self.min_prediction_length > 0 + ), "min prediction length must be larger than 0" + assert isinstance( + self.min_prediction_length, int + ), "min prediction length must be integer" + assert ( + data[time_idx].dtype.kind == "i" + ), "Timeseries index should be of type integer" self.target = target self.weight = weight self.time_idx = time_idx self.group_ids = [] if group_ids is None else list(group_ids) self.static_categoricals = static_categoricals - self._static_categoricals = [] if static_categoricals is None else list(static_categoricals) + self._static_categoricals = ( + [] if static_categoricals is None else list(static_categoricals) + ) self.static_reals = static_reals self._static_reals = [] if static_reals is None else list(static_reals) self.time_varying_known_categoricals = time_varying_known_categoricals self._time_varying_known_categoricals = ( - [] if time_varying_known_categoricals is None else list(time_varying_known_categoricals) + [] + if time_varying_known_categoricals is None + else list(time_varying_known_categoricals) ) self.time_varying_known_reals = time_varying_known_reals - self._time_varying_known_reals = [] if time_varying_known_reals is None else list(time_varying_known_reals) + self._time_varying_known_reals = ( + [] if time_varying_known_reals is None else list(time_varying_known_reals) + ) self.time_varying_unknown_categoricals = time_varying_unknown_categoricals self._time_varying_unknown_categoricals = ( - [] if time_varying_unknown_categoricals is None else list(time_varying_unknown_categoricals) + [] + if time_varying_unknown_categoricals is None + else list(time_varying_unknown_categoricals) ) self.time_varying_unknown_reals = time_varying_unknown_reals self._time_varying_unknown_reals = ( - [] if time_varying_unknown_reals is None else list(time_varying_unknown_reals) + [] + if time_varying_unknown_reals is None + else list(time_varying_unknown_reals) ) self.add_relative_time_idx = add_relative_time_idx @@ -388,17 +423,23 @@ def __init__( min_prediction_idx = data[self.time_idx].min() self.min_prediction_idx = min_prediction_idx self.constant_fill_strategy = constant_fill_strategy - self._constant_fill_strategy = {} if constant_fill_strategy is None else deepcopy(constant_fill_strategy) + self._constant_fill_strategy = ( + {} if constant_fill_strategy is None else deepcopy(constant_fill_strategy) + ) self.predict_mode = predict_mode self.allow_missing_timesteps = allow_missing_timesteps self.target_normalizer = target_normalizer self.categorical_encoders = categorical_encoders - self._categorical_encoders = {} if categorical_encoders is None else deepcopy(categorical_encoders) + self._categorical_encoders = ( + {} if categorical_encoders is None else deepcopy(categorical_encoders) + ) self.scalers = scalers self._scalers = {} if scalers is None else deepcopy(scalers) self.add_target_scales = add_target_scales self.variable_groups = variable_groups - self._variable_groups = {} if variable_groups is None else deepcopy(variable_groups) + self._variable_groups = ( + {} if variable_groups is None else deepcopy(variable_groups) + ) self.lags = lags self._lags = {} if lags is None else deepcopy(lags) @@ -431,18 +472,28 @@ def __init__( assert ( "relative_time_idx" not in data.columns ), "relative_time_idx is a protected column and must not be present in data" - if "relative_time_idx" not in self._time_varying_known_reals and "relative_time_idx" not in self.reals: + if ( + "relative_time_idx" not in self._time_varying_known_reals + and "relative_time_idx" not in self.reals + ): self._time_varying_known_reals.append("relative_time_idx") - data.loc[:, "relative_time_idx"] = 0.0 # dummy - real value will be set dynamically in __getitem__() + data.loc[:, "relative_time_idx"] = ( + 0.0 # dummy - real value will be set dynamically in __getitem__() + ) # add decoder length to static real variables if self.add_encoder_length: assert ( "encoder_length" not in data.columns ), "encoder_length is a protected column and must not be present in data" - if "encoder_length" not in self._time_varying_known_reals and "encoder_length" not in self.reals: + if ( + "encoder_length" not in self._time_varying_known_reals + and "encoder_length" not in self.reals + ): self._static_reals.append("encoder_length") - data.loc[:, "encoder_length"] = 0 # dummy - real value will be set dynamically in __getitem__() + data.loc[:, "encoder_length"] = ( + 0 # dummy - real value will be set dynamically in __getitem__() + ) # validate self._validate_data(data) @@ -469,7 +520,9 @@ def __init__( self._time_varying_known_categoricals.append(lagged_name) elif name in self._time_varying_unknown_reals: for lagged_name, lag in lagged_names.items(): - if lag < self.max_prediction_length: # keep in unknown as if lag is too small + if ( + lag < self.max_prediction_length + ): # keep in unknown as if lag is too small if lagged_name not in self._time_varying_unknown_reals: self._time_varying_unknown_reals.append(lagged_name) else: @@ -478,26 +531,40 @@ def __init__( self._time_varying_known_reals.append(lagged_name) elif name in self._time_varying_unknown_categoricals: for lagged_name, lag in lagged_names.items(): - if lag < self.max_prediction_length: # keep in unknown as if lag is too small - if lagged_name not in self._time_varying_unknown_categoricals: - self._time_varying_unknown_categoricals.append(lagged_name) + if ( + lag < self.max_prediction_length + ): # keep in unknown as if lag is too small + if ( + lagged_name + not in self._time_varying_unknown_categoricals + ): + self._time_varying_unknown_categoricals.append( + lagged_name + ) if lagged_name not in self._time_varying_known_categoricals: # switch to known so that lag can be used in decoder directly self._time_varying_known_categoricals.append(lagged_name) else: - raise KeyError(f"lagged variable {name} is not a known nor unknown time-varying variable") + raise KeyError( + f"lagged variable {name} is not a known nor unknown time-varying variable" + ) # filter data if min_prediction_idx is not None: # filtering for min_prediction_idx will be done on subsequence level ensuring # minimal decoder index is always >= min_prediction_idx - data = data[lambda x: x[self.time_idx] >= self.min_prediction_idx - self.max_encoder_length - self.max_lag] + data = data[ + lambda x: x[self.time_idx] + >= self.min_prediction_idx - self.max_encoder_length - self.max_lag + ] data = data.sort_values(self.group_ids + [self.time_idx]) # preprocess data data = self._preprocess_data(data) for target in self.target_names: - assert target not in self._scalers, "Target normalizer is separate and not in scalers." + assert ( + target not in self._scalers + ), "Target normalizer is separate and not in scalers." # create index self.index = self._construct_index(data, predict_mode=self.predict_mode) @@ -511,7 +578,11 @@ def dropout_categoricals(self) -> List[str]: list of categorical variables that are unknown when making a forecast without observed history """ - return [name for name, encoder in self._categorical_encoders.items() if encoder.add_nan] + return [ + name + for name, encoder in self._categorical_encoders.items() + if encoder.add_nan + ] def _get_lagged_names(self, name: str) -> Dict[str, int]: """ @@ -546,7 +617,13 @@ def lagged_targets(self) -> Dict[str, str]: """Subset of `lagged_variables` but only includes variables that are lagged targets.""" vars = {} for name in self._lags: - vars.update({lag_name: name for lag_name in self._get_lagged_names(name) if name in self.target_names}) + vars.update( + { + lag_name: name + for lag_name in self._get_lagged_names(name) + if name in self.target_names + } + ) return vars @property @@ -590,7 +667,10 @@ def _set_target_normalizer(self, data: pd.DataFrame): if data[target].dtype.kind != "f": # category normalizers.append(NaNLabelEncoder()) if self.add_target_scales: - warnings.warn("Target scales will be only added for continous targets", UserWarning) + warnings.warn( + "Target scales will be only added for continous targets", + UserWarning, + ) else: data_positive = (data[target] > 0).all() if data_positive: @@ -601,7 +681,9 @@ def _set_target_normalizer(self, data: pd.DataFrame): else: transformer = None if self.max_encoder_length > 20 and self.min_encoder_length > 1: - normalizers.append(EncoderNormalizer(transformation=transformer)) + normalizers.append( + EncoderNormalizer(transformation=transformer) + ) else: normalizers.append(GroupNormalizer(transformation=transformer)) if self.multi_target: @@ -619,7 +701,9 @@ def _set_target_normalizer(self, data: pd.DataFrame): assert isinstance( self.target_normalizer, (TorchNormalizer, NaNLabelEncoder) ), f"target_normalizer has to be either None or of class TorchNormalizer but found {self.target_normalizer}" - assert not self.multi_target or isinstance(self.target_normalizer, MultiNormalizer), ( + assert not self.multi_target or isinstance( + self.target_normalizer, MultiNormalizer + ), ( "multiple targets / list of targets requires MultiNormalizer as target_normalizer " f"but found {self.target_normalizer}" ) @@ -656,7 +740,10 @@ def _validate_data(self, data: pd.DataFrame): raise KeyError(f"variable {name} specified but not found in data") if not ( name in object_columns - or (name in category_columns and data[name].cat.categories.dtype.kind not in "bifc") + or ( + name in category_columns + and data[name].cat.categories.dtype.kind not in "bifc" + ) ): raise ValueError( f"Data type of category {name} was found to be numeric - use a string type / categorified string" @@ -709,14 +796,22 @@ def _preprocess_data(self, data: pd.DataFrame) -> pd.DataFrame: name not in self._variable_groups ), f"lagged variables that are in {self._variable_groups} are not supported yet" for lagged_name, lag in self._get_lagged_names(name).items(): - data[lagged_name] = data.groupby(self.group_ids, observed=True)[name].shift(lag) + data[lagged_name] = data.groupby(self.group_ids, observed=True)[ + name + ].shift(lag) # encode group ids - this encoding for name, group_name in self._group_ids_mapping.items(): # use existing encoder - but a copy of it not too loose current encodings - encoder = deepcopy(self._categorical_encoders.get(group_name, NaNLabelEncoder())) - self._categorical_encoders[group_name] = encoder.fit(data[name].to_numpy().reshape(-1), overwrite=False) - data[group_name] = self.transform_values(name, data[name], inverse=False, group_id=True) + encoder = deepcopy( + self._categorical_encoders.get(group_name, NaNLabelEncoder()) + ) + self._categorical_encoders[group_name] = encoder.fit( + data[name].to_numpy().reshape(-1), overwrite=False + ) + data[group_name] = self.transform_values( + name, data[name], inverse=False, group_id=True + ) # encode categoricals first to ensure that group normalizer for relies on encoded categories if isinstance( @@ -731,33 +826,45 @@ def _preprocess_data(self, data: pd.DataFrame) -> pd.DataFrame: if name in self._variable_groups: # fit groups columns = self._variable_groups[name] if name not in self._categorical_encoders: - self._categorical_encoders[name] = NaNLabelEncoder().fit(data[columns].to_numpy().reshape(-1)) + self._categorical_encoders[name] = NaNLabelEncoder().fit( + data[columns].to_numpy().reshape(-1) + ) elif self._categorical_encoders[name] is not None: try: check_is_fitted(self._categorical_encoders[name]) except NotFittedError: - self._categorical_encoders[name] = self._categorical_encoders[name].fit( - data[columns].to_numpy().reshape(-1) - ) + self._categorical_encoders[name] = self._categorical_encoders[ + name + ].fit(data[columns].to_numpy().reshape(-1)) else: if name not in self._categorical_encoders: self._categorical_encoders[name] = NaNLabelEncoder().fit(data[name]) - elif self._categorical_encoders[name] is not None and name not in self.target_names: + elif ( + self._categorical_encoders[name] is not None + and name not in self.target_names + ): try: check_is_fitted(self._categorical_encoders[name]) except NotFittedError: - self._categorical_encoders[name] = self._categorical_encoders[name].fit(data[name]) + self._categorical_encoders[name] = self._categorical_encoders[ + name + ].fit(data[name]) # encode them for name in dict.fromkeys(group_ids_to_encode + self.flat_categoricals): # targets and its lagged versions are handled separetely if name not in self.target_names and name not in self.lagged_targets: data[name] = self.transform_values( - name, data[name], inverse=False, ignore_na=name in self.lagged_variables + name, + data[name], + inverse=False, + ignore_na=name in self.lagged_variables, ) # save special variables - assert "__time_idx__" not in data.columns, "__time_idx__ is a protected column and must not be present in data" + assert ( + "__time_idx__" not in data.columns + ), "__time_idx__ is a protected column and must not be present in data" data["__time_idx__"] = data[self.time_idx] # save unscaled for target in self.target_names: assert ( @@ -775,7 +882,9 @@ def _preprocess_data(self, data: pd.DataFrame) -> pd.DataFrame: except NotFittedError: if isinstance(self.target_normalizer, EncoderNormalizer): self.target_normalizer.fit(data[self.target]) - elif isinstance(self.target_normalizer, (GroupNormalizer, MultiNormalizer)): + elif isinstance( + self.target_normalizer, (GroupNormalizer, MultiNormalizer) + ): self.target_normalizer.fit(data[self.target], data) else: self.target_normalizer.fit(data[self.target]) @@ -786,19 +895,31 @@ def _preprocess_data(self, data: pd.DataFrame) -> pd.DataFrame: # transformation over the entire time range but by each group common_init_args = [ name - for name in inspect.signature(GroupNormalizer.__init__).parameters.keys() - if name in inspect.signature(EncoderNormalizer.__init__).parameters.keys() + for name in inspect.signature( + GroupNormalizer.__init__ + ).parameters.keys() + if name + in inspect.signature(EncoderNormalizer.__init__).parameters.keys() and name not in ["data", "self"] ] - copy_kwargs = {name: getattr(self.target_normalizer, name) for name in common_init_args} + copy_kwargs = { + name: getattr(self.target_normalizer, name) + for name in common_init_args + } normalizer = GroupNormalizer(groups=self.group_ids, **copy_kwargs) - data[self.target], scales = normalizer.fit_transform(data[self.target], data, return_norm=True) + data[self.target], scales = normalizer.fit_transform( + data[self.target], data, return_norm=True + ) elif isinstance(self.target_normalizer, GroupNormalizer): - data[self.target], scales = self.target_normalizer.transform(data[self.target], data, return_norm=True) + data[self.target], scales = self.target_normalizer.transform( + data[self.target], data, return_norm=True + ) elif isinstance(self.target_normalizer, MultiNormalizer): - transformed, scales = self.target_normalizer.transform(data[self.target], data, return_norm=True) + transformed, scales = self.target_normalizer.transform( + data[self.target], data, return_norm=True + ) for idx, target in enumerate(self.target_names): data[target] = transformed[idx] @@ -814,20 +935,26 @@ def _preprocess_data(self, data: pd.DataFrame) -> pd.DataFrame: scales = None else: - data[self.target], scales = self.target_normalizer.transform(data[self.target], return_norm=True) + data[self.target], scales = self.target_normalizer.transform( + data[self.target], return_norm=True + ) # add target scales if self.add_target_scales: if not isinstance(self.target_normalizer, MultiNormalizer): scales = [scales] for target_idx, target in enumerate(self.target_names): - if not isinstance(self.target_normalizers[target_idx], NaNLabelEncoder): + if not isinstance( + self.target_normalizers[target_idx], NaNLabelEncoder + ): for scale_idx, name in enumerate(["center", "scale"]): feature_name = f"{target}_{name}" assert ( feature_name not in data.columns ), f"{feature_name} is a protected column and must not be present in data" - data[feature_name] = scales[target_idx][:, scale_idx].squeeze() + data[feature_name] = scales[target_idx][ + :, scale_idx + ].squeeze() if feature_name not in self.reals: self._static_reals.append(feature_name) @@ -856,13 +983,17 @@ def _preprocess_data(self, data: pd.DataFrame) -> pd.DataFrame: and transformer is not None and not isinstance(transformer, EncoderNormalizer) ): - data[name] = self.transform_values(name, data[name], data=data, inverse=False) + data[name] = self.transform_values( + name, data[name], data=data, inverse=False + ) # encode lagged categorical targets for name in self.lagged_targets: # normalizer only now available if name in self.flat_categoricals: - data[name] = self.transform_values(name, data[name], inverse=False, ignore_na=True) + data[name] = self.transform_values( + name, data[name], inverse=False, ignore_na=True + ) # encode constant values self.encoded_constant_fill_strategy = {} @@ -894,7 +1025,9 @@ def get_transformer(self, name: str, group_id: bool = False): """ if group_id: name = self._group_ids_mapping[name] - elif name in self.lagged_variables: # recover transformer fitted on non-lagged variable + elif ( + name in self.lagged_variables + ): # recover transformer fitted on non-lagged variable name = self.lagged_variables[name] if name in self.flat_categoricals + self.group_ids + self._group_ids: @@ -985,15 +1118,20 @@ def _data_to_tensors(self, data: pd.DataFrame) -> Dict[str, torch.Tensor]: """ index = check_for_nonfinite( - torch.tensor(data[self._group_ids].to_numpy(np.int64), dtype=torch.int64), self.group_ids + torch.tensor(data[self._group_ids].to_numpy(np.int64), dtype=torch.int64), + self.group_ids, ) time = check_for_nonfinite( - torch.tensor(data["__time_idx__"].to_numpy(np.int64), dtype=torch.int64), self.time_idx + torch.tensor(data["__time_idx__"].to_numpy(np.int64), dtype=torch.int64), + self.time_idx, ) # categorical covariates categorical = check_for_nonfinite( - torch.tensor(data[self.flat_categoricals].to_numpy(np.int64), dtype=torch.int64), self.flat_categoricals + torch.tensor( + data[self.flat_categoricals].to_numpy(np.int64), dtype=torch.int64 + ), + self.flat_categoricals, ) # get weight @@ -1012,7 +1150,10 @@ def _data_to_tensors(self, data: pd.DataFrame) -> Dict[str, torch.Tensor]: if isinstance(self.target_normalizer, NaNLabelEncoder): target = [ check_for_nonfinite( - torch.tensor(data[f"__target__{self.target}"].to_numpy(dtype=np.int64), dtype=torch.long), + torch.tensor( + data[f"__target__{self.target}"].to_numpy(dtype=np.int64), + dtype=torch.long, + ), self.target, ) ] @@ -1022,9 +1163,13 @@ def _data_to_tensors(self, data: pd.DataFrame) -> Dict[str, torch.Tensor]: check_for_nonfinite( torch.tensor( data[f"__target__{name}"].to_numpy( - dtype=[np.float64, np.int64][data[name].dtype.kind in "bi"] + dtype=[np.float64, np.int64][ + data[name].dtype.kind in "bi" + ] ), - dtype=[torch.float, torch.long][data[name].dtype.kind in "bi"], + dtype=[torch.float, torch.long][ + data[name].dtype.kind in "bi" + ], ), name, ) @@ -1033,18 +1178,29 @@ def _data_to_tensors(self, data: pd.DataFrame) -> Dict[str, torch.Tensor]: else: target = [ check_for_nonfinite( - torch.tensor(data[f"__target__{self.target}"].to_numpy(dtype=np.float64), dtype=torch.float), + torch.tensor( + data[f"__target__{self.target}"].to_numpy(dtype=np.float64), + dtype=torch.float, + ), self.target, ) ] # continuous covariates continuous = check_for_nonfinite( - torch.tensor(data[self.reals].to_numpy(dtype=np.float64), dtype=torch.float), self.reals + torch.tensor( + data[self.reals].to_numpy(dtype=np.float64), dtype=torch.float + ), + self.reals, ) tensors = dict( - reals=continuous, categoricals=categorical, groups=index, target=target, weight=weight, time=time + reals=continuous, + categoricals=categorical, + groups=index, + target=target, + weight=weight, + time=time, ) return tensors @@ -1058,7 +1214,9 @@ def categoricals(self) -> List[str]: List[str]: list of variables """ return ( - self._static_categoricals + self._time_varying_known_categoricals + self._time_varying_unknown_categoricals + self._static_categoricals + + self._time_varying_known_categoricals + + self._time_varying_unknown_categoricals ) @property @@ -1098,7 +1256,11 @@ def reals(self) -> List[str]: Returns: List[str]: list of variables """ - return self._static_reals + self._time_varying_known_reals + self._time_varying_unknown_reals + return ( + self._static_reals + + self._time_varying_known_reals + + self._time_varying_unknown_reals + ) @property @lru_cache(None) @@ -1156,7 +1318,12 @@ def get_parameters(self) -> Dict[str, Any]: @classmethod def from_dataset( - cls, dataset, data: pd.DataFrame, stop_randomization: bool = False, predict: bool = False, **update_kwargs + cls, + dataset, + data: pd.DataFrame, + stop_randomization: bool = False, + predict: bool = False, + **update_kwargs, ): """ Generate dataset with different underlying data but same variable encoders and scalers, etc. @@ -1176,7 +1343,11 @@ def from_dataset( TimeSeriesDataSet: new dataset """ return cls.from_parameters( - dataset.get_parameters(), data, stop_randomization=stop_randomization, predict=predict, **update_kwargs + dataset.get_parameters(), + data, + stop_randomization=stop_randomization, + predict=predict, + **update_kwargs, ) @classmethod @@ -1209,7 +1380,8 @@ def from_parameters( stop_randomization = True elif not stop_randomization: warnings.warn( - "If predicting, no randomization should be possible - setting stop_randomization=True", UserWarning + "If predicting, no randomization should be possible - setting stop_randomization=True", + UserWarning, ) stop_randomization = True parameters["min_prediction_length"] = parameters["max_prediction_length"] @@ -1240,11 +1412,21 @@ def _construct_index(self, data: pd.DataFrame, predict_mode: bool) -> pd.DataFra df_index_first = g["__time_idx__"].transform("first").to_frame("time_first") df_index_last = g["__time_idx__"].transform("last").to_frame("time_last") - df_index_diff_to_next = -g["__time_idx__"].diff(-1).fillna(-1).astype(int).to_frame("time_diff_to_next") - df_index = pd.concat([df_index_first, df_index_last, df_index_diff_to_next], axis=1) + df_index_diff_to_next = ( + -g["__time_idx__"] + .diff(-1) + .fillna(-1) + .astype(int) + .to_frame("time_diff_to_next") + ) + df_index = pd.concat( + [df_index_first, df_index_last, df_index_diff_to_next], axis=1 + ) df_index["index_start"] = np.arange(len(df_index)) df_index["time"] = data["__time_idx__"] - df_index["count"] = (df_index["time_last"] - df_index["time_first"]).astype(int) + 1 + df_index["count"] = (df_index["time_last"] - df_index["time_first"]).astype( + int + ) + 1 sequence_ids = g.ngroup() df_index["sequence_id"] = sequence_ids @@ -1252,7 +1434,9 @@ def _construct_index(self, data: pd.DataFrame, predict_mode: bool) -> pd.DataFra max_sequence_length = self.max_prediction_length + self.max_encoder_length # calculate maximum index to include from current index_start - max_time = (df_index["time"] + max_sequence_length - 1).clip(upper=df_index["count"] + df_index.time_first - 1) + max_time = (df_index["time"] + max_sequence_length - 1).clip( + upper=df_index["count"] + df_index.time_first - 1 + ) # if there are missing timesteps, we cannot say directly what is the last timestep to include # therefore we iterate until it is found @@ -1270,13 +1454,21 @@ def _construct_index(self, data: pd.DataFrame, predict_mode: bool) -> pd.DataFra # while the previous steps have ensured that we start a sequence on every time step, the missing_sequences # ensure that there is a sequence that finishes on every timestep if len(missing_sequences) > 0: - shortened_sequences = df_index.iloc[missing_sequences[:, 0]].assign(index_end=missing_sequences[:, 1]) + shortened_sequences = df_index.iloc[missing_sequences[:, 0]].assign( + index_end=missing_sequences[:, 1] + ) # concatenate shortened sequences - df_index = pd.concat([df_index, shortened_sequences], axis=0, ignore_index=True) + df_index = pd.concat( + [df_index, shortened_sequences], axis=0, ignore_index=True + ) # filter out where encode and decode length are not satisfied - df_index["sequence_length"] = df_index["time"].iloc[df_index["index_end"]].to_numpy() - df_index["time"] + 1 + df_index["sequence_length"] = ( + df_index["time"].iloc[df_index["index_end"]].to_numpy() + - df_index["time"] + + 1 + ) # filter too short sequences df_index = df_index[ @@ -1284,24 +1476,35 @@ def _construct_index(self, data: pd.DataFrame, predict_mode: bool) -> pd.DataFra lambda x: (x.sequence_length >= min_sequence_length) & # prediction must be for after minimal prediction index + length of prediction - (x["sequence_length"] + x["time"] >= self.min_prediction_idx + self.min_prediction_length) + ( + x["sequence_length"] + x["time"] + >= self.min_prediction_idx + self.min_prediction_length + ) ] - if predict_mode: # keep longest element per series (i.e. the first element that spans to the end of the series) + if ( + predict_mode + ): # keep longest element per series (i.e. the first element that spans to the end of the series) # filter all elements that are longer than the allowed maximum sequence length df_index = df_index[ lambda x: (x["time_last"] - x["time"] + 1 <= max_sequence_length) & (x["sequence_length"] >= min_sequence_length) ] # choose longest sequence - df_index = df_index.loc[df_index.groupby("sequence_id").sequence_length.idxmax()] + df_index = df_index.loc[ + df_index.groupby("sequence_id").sequence_length.idxmax() + ] # check that all groups/series have at least one entry in the index if not sequence_ids.isin(df_index.sequence_id).all(): - missing_groups = data.loc[~sequence_ids.isin(df_index.sequence_id), self._group_ids].drop_duplicates() + missing_groups = data.loc[ + ~sequence_ids.isin(df_index.sequence_id), self._group_ids + ].drop_duplicates() # decode values for name, id in self._group_ids_mapping.items(): - missing_groups[id] = self.transform_values(name, missing_groups[id], inverse=True, group_id=True) + missing_groups[id] = self.transform_values( + name, missing_groups[id], inverse=True, group_id=True + ) warnings.warn( "Min encoder length and/or min_prediction_idx and/or min prediction length and/or lags are " "too large for " @@ -1363,9 +1566,15 @@ def decoded_index(self) -> pd.DataFrame: index_last = self.index["index_end"].to_numpy() index = ( # get group ids in order of index - pd.DataFrame(self.data["groups"][index_start].numpy(), columns=self.group_ids) + pd.DataFrame( + self.data["groups"][index_start].numpy(), columns=self.group_ids + ) # to original values - .apply(lambda x: self.transform_values(name=x.name, values=x, group_id=True, inverse=True)) + .apply( + lambda x: self.transform_values( + name=x.name, values=x, group_id=True, inverse=True + ) + ) # add time index .assign( time_idx_first=self.data["time"][index_start].numpy(), @@ -1373,14 +1582,20 @@ def decoded_index(self) -> pd.DataFrame: # prediction index is last time index - decoder length + 1 time_idx_first_prediction=lambda x: x.time_idx_last - self.calculate_decoder_length( - time_last=x.time_idx_last, sequence_length=x.time_idx_last - x.time_idx_first + 1 + time_last=x.time_idx_last, + sequence_length=x.time_idx_last - x.time_idx_first + 1, ) + 1, ) ) return index - def plot_randomization(self, betas: Tuple[float, float] = None, length: int = None, min_length: int = None): + def plot_randomization( + self, + betas: Tuple[float, float] = None, + length: int = None, + min_length: int = None, + ): """ Plot expected randomized length distribution. @@ -1421,7 +1636,10 @@ def __len__(self) -> int: return self.index.shape[0] def set_overwrite_values( - self, values: Union[float, torch.Tensor], variable: str, target: Union[str, slice] = "decoder" + self, + values: Union[float, torch.Tensor], + variable: str, + target: Union[str, slice] = "decoder", ) -> None: """ Convenience method to quickly overwrite values in decoder or encoder (or both) for a specific variable. @@ -1433,26 +1651,40 @@ def set_overwrite_values( a slice object which is directly used to overwrite indices, e.g. ``slice(-5, None)`` will overwrite the last 5 values. Defaults to "decoder". """ - values = torch.tensor(self.transform_values(variable, np.asarray(values).reshape(-1), inverse=False)).squeeze() + values = torch.tensor( + self.transform_values( + variable, np.asarray(values).reshape(-1), inverse=False + ) + ).squeeze() assert target in [ "all", "decoder", "encoder", ], f"target has be one of 'all', 'decoder' or 'encoder' but target={target} instead" - if variable in self._static_categoricals or variable in self._static_categoricals: + if ( + variable in self._static_categoricals + or variable in self._static_categoricals + ): target = "all" if variable in self.target_names: raise NotImplementedError("Target variable is not supported") if self.weight is not None and self.weight == variable: raise NotImplementedError("Weight variable is not supported") - if isinstance(self._scalers.get(variable, self._categorical_encoders.get(variable)), TorchNormalizer): - raise NotImplementedError("TorchNormalizer (e.g. GroupNormalizer) is not supported") + if isinstance( + self._scalers.get(variable, self._categorical_encoders.get(variable)), + TorchNormalizer, + ): + raise NotImplementedError( + "TorchNormalizer (e.g. GroupNormalizer) is not supported" + ) if self._overwrite_values is None: self._overwrite_values = {} - self._overwrite_values.update(dict(values=values, variable=variable, target=target)) + self._overwrite_values.update( + dict(values=values, variable=variable, target=target) + ) def reset_overwrite_values(self) -> None: """ @@ -1477,9 +1709,11 @@ def calculate_decoder_length( """ if isinstance(time_last, int): decoder_length = min( - time_last - (self.min_prediction_idx - 1), # not going beyond min prediction idx + time_last + - (self.min_prediction_idx - 1), # not going beyond min prediction idx self.max_prediction_length, # maximum prediction length - sequence_length - self.min_encoder_length, # sequence length - min decoder length + sequence_length + - self.min_encoder_length, # sequence length - min decoder length ) else: decoder_length = np.min( @@ -1504,14 +1738,21 @@ def __getitem__(self, idx: int) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]: index = self.index.iloc[idx] # get index data data_cont = self.data["reals"][index.index_start : index.index_end + 1].clone() - data_cat = self.data["categoricals"][index.index_start : index.index_end + 1].clone() + data_cat = self.data["categoricals"][ + index.index_start : index.index_end + 1 + ].clone() time = self.data["time"][index.index_start : index.index_end + 1].clone() - target = [d[index.index_start : index.index_end + 1].clone() for d in self.data["target"]] + target = [ + d[index.index_start : index.index_end + 1].clone() + for d in self.data["target"] + ] groups = self.data["groups"][index.index_start].clone() if self.data["weight"] is None: weight = None else: - weight = self.data["weight"][index.index_start : index.index_end + 1].clone() + weight = self.data["weight"][ + index.index_start : index.index_end + 1 + ].clone() # get target scale in the form of a list target_scale = self.target_normalizer.get_parameters(groups, self.group_ids) if not isinstance(self.target_normalizer, MultiNormalizer): @@ -1520,10 +1761,16 @@ def __getitem__(self, idx: int) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]: # fill in missing values (if not all time indices are specified sequence_length = len(time) if sequence_length < index.sequence_length: - assert self.allow_missing_timesteps, "allow_missing_timesteps should be True if sequences have gaps" - repetitions = torch.cat([time[1:] - time[:-1], torch.ones(1, dtype=time.dtype)]) + assert ( + self.allow_missing_timesteps + ), "allow_missing_timesteps should be True if sequences have gaps" + repetitions = torch.cat( + [time[1:] - time[:-1], torch.ones(1, dtype=time.dtype)] + ) indices = torch.repeat_interleave(torch.arange(len(time)), repetitions) - repetition_indices = torch.cat([torch.tensor([False], dtype=torch.bool), indices[1:] == indices[:-1]]) + repetition_indices = torch.cat( + [torch.tensor([False], dtype=torch.bool), indices[1:] == indices[:-1]] + ) # select data data_cat = data_cat[indices] @@ -1536,22 +1783,31 @@ def __getitem__(self, idx: int) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]: if self.time_idx in self.reals: time_idx = self.reals.index(self.time_idx) data_cont[:, time_idx] = torch.linspace( - data_cont[0, time_idx], data_cont[-1, time_idx], len(target[0]), dtype=data_cont.dtype + data_cont[0, time_idx], + data_cont[-1, time_idx], + len(target[0]), + dtype=data_cont.dtype, ) # make replacements to fill in categories for name, value in self.encoded_constant_fill_strategy.items(): if name in self.reals: data_cont[repetition_indices, self.reals.index(name)] = value - elif name in [f"__target__{target_name}" for target_name in self.target_names]: + elif name in [ + f"__target__{target_name}" for target_name in self.target_names + ]: target_pos = self.target_names.index(name[len("__target__") :]) target[target_pos][repetition_indices] = value elif name in self.flat_categoricals: - data_cat[repetition_indices, self.flat_categoricals.index(name)] = value + data_cat[repetition_indices, self.flat_categoricals.index(name)] = ( + value + ) elif name in self.target_names: # target is just not an input value pass else: - raise KeyError(f"Variable {name} is not known and thus cannot be filled in") + raise KeyError( + f"Variable {name} is not known and thus cannot be filled in" + ) sequence_length = len(target[0]) @@ -1565,12 +1821,16 @@ def __getitem__(self, idx: int) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]: assert ( decoder_length >= self.min_prediction_length ), "Decoder length should be at least minimum prediction length" - assert encoder_length >= self.min_encoder_length, "Encoder length should be at least minimum encoder length" + assert ( + encoder_length >= self.min_encoder_length + ), "Encoder length should be at least minimum encoder length" if self.randomize_length is not None: # randomization improves generalization # modify encode and decode lengths modifiable_encoder_length = encoder_length - self.min_encoder_length - encoder_length_probability = Beta(self.randomize_length[0], self.randomize_length[1]).sample() + encoder_length_probability = Beta( + self.randomize_length[0], self.randomize_length[1] + ).sample() # subsample a new/smaller encode length new_encoder_length = self.min_encoder_length + int( @@ -1578,35 +1838,64 @@ def __getitem__(self, idx: int) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]: ) # extend decode length if possible - new_decoder_length = min(decoder_length + (encoder_length - new_encoder_length), self.max_prediction_length) + new_decoder_length = min( + decoder_length + (encoder_length - new_encoder_length), + self.max_prediction_length, + ) # select subset of sequence of new sequence if new_encoder_length + new_decoder_length < len(target[0]): - data_cat = data_cat[encoder_length - new_encoder_length : encoder_length + new_decoder_length] - data_cont = data_cont[encoder_length - new_encoder_length : encoder_length + new_decoder_length] - target = [t[encoder_length - new_encoder_length : encoder_length + new_decoder_length] for t in target] + data_cat = data_cat[ + encoder_length + - new_encoder_length : encoder_length + + new_decoder_length + ] + data_cont = data_cont[ + encoder_length + - new_encoder_length : encoder_length + + new_decoder_length + ] + target = [ + t[ + encoder_length + - new_encoder_length : encoder_length + + new_decoder_length + ] + for t in target + ] if weight is not None: - weight = weight[encoder_length - new_encoder_length : encoder_length + new_decoder_length] + weight = weight[ + encoder_length + - new_encoder_length : encoder_length + + new_decoder_length + ] encoder_length = new_encoder_length decoder_length = new_decoder_length # switch some variables to nan if encode length is 0 if encoder_length == 0 and len(self.dropout_categoricals) > 0: - data_cat[:, [self.flat_categoricals.index(c) for c in self.dropout_categoricals]] = ( - 0 # zero is encoded nan - ) + data_cat[ + :, + [ + self.flat_categoricals.index(c) + for c in self.dropout_categoricals + ], + ] = 0 # zero is encoded nan assert decoder_length > 0, "Decoder length should be greater than 0" assert encoder_length >= 0, "Encoder length should be at least 0" if self.add_relative_time_idx: data_cont[:, self.reals.index("relative_time_idx")] = ( - torch.arange(-encoder_length, decoder_length, dtype=data_cont.dtype) / self.max_encoder_length + torch.arange(-encoder_length, decoder_length, dtype=data_cont.dtype) + / self.max_encoder_length ) if self.add_encoder_length: data_cont[:, self.reals.index("encoder_length")] = ( - (encoder_length - 0.5 * self.max_encoder_length) / self.max_encoder_length * 2.0 + (encoder_length - 0.5 * self.max_encoder_length) + / self.max_encoder_length + * 2.0 ) # rescale target @@ -1619,14 +1908,20 @@ def __getitem__(self, idx: int) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]: single_target_scale = target_normalizer.get_parameters() # modify input data if target_name in self.reals: - data_cont[:, self.reals.index(target_name)] = target_normalizer.transform(target[idx]) + data_cont[:, self.reals.index(target_name)] = ( + target_normalizer.transform(target[idx]) + ) if self.add_target_scales: - data_cont[:, self.reals.index(f"{target_name}_center")] = self.transform_values( - f"{target_name}_center", single_target_scale[0] - )[0] - data_cont[:, self.reals.index(f"{target_name}_scale")] = self.transform_values( - f"{target_name}_scale", single_target_scale[1] - )[0] + data_cont[:, self.reals.index(f"{target_name}_center")] = ( + self.transform_values( + f"{target_name}_center", single_target_scale[0] + )[0] + ) + data_cont[:, self.reals.index(f"{target_name}_scale")] = ( + self.transform_values( + f"{target_name}_scale", single_target_scale[1] + )[0] + ) # scale needs to be numpy to be consistent with GroupNormalizer target_scale[idx] = single_target_scale.numpy() @@ -1713,29 +2008,55 @@ def _collate_fn( """ # collate function for dataloader # lengths - encoder_lengths = torch.tensor([batch[0]["encoder_length"] for batch in batches], dtype=torch.long) - decoder_lengths = torch.tensor([batch[0]["decoder_length"] for batch in batches], dtype=torch.long) + encoder_lengths = torch.tensor( + [batch[0]["encoder_length"] for batch in batches], dtype=torch.long + ) + decoder_lengths = torch.tensor( + [batch[0]["decoder_length"] for batch in batches], dtype=torch.long + ) # ids decoder_time_idx_start = ( - torch.tensor([batch[0]["encoder_time_idx_start"] for batch in batches], dtype=torch.long) + encoder_lengths + torch.tensor( + [batch[0]["encoder_time_idx_start"] for batch in batches], + dtype=torch.long, + ) + + encoder_lengths ) - decoder_time_idx = decoder_time_idx_start.unsqueeze(1) + torch.arange(decoder_lengths.max()).unsqueeze(0) + decoder_time_idx = decoder_time_idx_start.unsqueeze(1) + torch.arange( + decoder_lengths.max() + ).unsqueeze(0) groups = torch.stack([batch[0]["groups"] for batch in batches]) # features encoder_cont = rnn.pad_sequence( - [batch[0]["x_cont"][:length] for length, batch in zip(encoder_lengths, batches)], batch_first=True + [ + batch[0]["x_cont"][:length] + for length, batch in zip(encoder_lengths, batches) + ], + batch_first=True, ) encoder_cat = rnn.pad_sequence( - [batch[0]["x_cat"][:length] for length, batch in zip(encoder_lengths, batches)], batch_first=True + [ + batch[0]["x_cat"][:length] + for length, batch in zip(encoder_lengths, batches) + ], + batch_first=True, ) decoder_cont = rnn.pad_sequence( - [batch[0]["x_cont"][length:] for length, batch in zip(encoder_lengths, batches)], batch_first=True + [ + batch[0]["x_cont"][length:] + for length, batch in zip(encoder_lengths, batches) + ], + batch_first=True, ) decoder_cat = rnn.pad_sequence( - [batch[0]["x_cat"][length:] for length, batch in zip(encoder_lengths, batches)], batch_first=True + [ + batch[0]["x_cat"][length:] + for length, batch in zip(encoder_lengths, batches) + ], + batch_first=True, ) # target scale @@ -1744,34 +2065,54 @@ def _collate_fn( elif isinstance(batches[0][0]["target_scale"], (list, tuple)): target_scale = [] for idx in range(len(batches[0][0]["target_scale"])): - if isinstance(batches[0][0]["target_scale"][idx], torch.Tensor): # stack tensor - scale = torch.stack([batch[0]["target_scale"][idx] for batch in batches]) + if isinstance( + batches[0][0]["target_scale"][idx], torch.Tensor + ): # stack tensor + scale = torch.stack( + [batch[0]["target_scale"][idx] for batch in batches] + ) else: scale = torch.from_numpy( - np.array([batch[0]["target_scale"][idx] for batch in batches], dtype=np.float32), + np.array( + [batch[0]["target_scale"][idx] for batch in batches], + dtype=np.float32, + ), ) target_scale.append(scale) else: # convert to tensor target_scale = torch.from_numpy( - np.array([batch[0]["target_scale"] for batch in batches], dtype=np.float32), + np.array( + [batch[0]["target_scale"] for batch in batches], dtype=np.float32 + ), ) # target and weight if isinstance(batches[0][1][0], (tuple, list)): target = [ - rnn.pad_sequence([batch[1][0][idx] for batch in batches], batch_first=True) + rnn.pad_sequence( + [batch[1][0][idx] for batch in batches], batch_first=True + ) for idx in range(len(batches[0][1][0])) ] encoder_target = [ - rnn.pad_sequence([batch[0]["encoder_target"][idx] for batch in batches], batch_first=True) + rnn.pad_sequence( + [batch[0]["encoder_target"][idx] for batch in batches], + batch_first=True, + ) for idx in range(len(batches[0][1][0])) ] else: - target = rnn.pad_sequence([batch[1][0] for batch in batches], batch_first=True) - encoder_target = rnn.pad_sequence([batch[0]["encoder_target"] for batch in batches], batch_first=True) + target = rnn.pad_sequence( + [batch[1][0] for batch in batches], batch_first=True + ) + encoder_target = rnn.pad_sequence( + [batch[0]["encoder_target"] for batch in batches], batch_first=True + ) if batches[0][1][1] is not None: - weight = rnn.pad_sequence([batch[1][1] for batch in batches], batch_first=True) + weight = rnn.pad_sequence( + [batch[1][1] for batch in batches], batch_first=True + ) else: weight = None @@ -1793,7 +2134,11 @@ def _collate_fn( ) def to_dataloader( - self, train: bool = True, batch_size: int = 64, batch_sampler: Union[Sampler, str] = None, **kwargs + self, + train: bool = True, + batch_size: int = 64, + batch_sampler: Union[Sampler, str] = None, + **kwargs, ) -> DataLoader: """ Get dataloader from dataset. @@ -1883,7 +2228,9 @@ def to_dataloader( drop_last=kwargs["drop_last"], ) else: - raise ValueError(f"batch_sampler {sampler} unknown - see docstring for valid batch_sampler") + raise ValueError( + f"batch_sampler {sampler} unknown - see docstring for valid batch_sampler" + ) del kwargs["batch_size"] del kwargs["shuffle"] del kwargs["drop_last"] @@ -1904,9 +2251,15 @@ def x_to_index(self, x: Dict[str, torch.Tensor]) -> pd.DataFrame: for id in self.group_ids: index_data[id] = x["groups"][:, self.group_ids.index(id)].cpu() # decode if possible - index_data[id] = self.transform_values(id, index_data[id], inverse=True, group_id=True) + index_data[id] = self.transform_values( + id, index_data[id], inverse=True, group_id=True + ) index = pd.DataFrame(index_data) return index def __repr__(self) -> str: - return repr_class(self, attributes=self.get_parameters(), extra_attributes=dict(length=len(self))) + return repr_class( + self, + attributes=self.get_parameters(), + extra_attributes=dict(length=len(self)), + ) diff --git a/pytorch_forecasting/metrics/__init__.py b/pytorch_forecasting/metrics/__init__.py index 8f7c29f9a..d688def1a 100644 --- a/pytorch_forecasting/metrics/__init__.py +++ b/pytorch_forecasting/metrics/__init__.py @@ -19,7 +19,16 @@ NegativeBinomialDistributionLoss, NormalDistributionLoss, ) -from pytorch_forecasting.metrics.point import MAE, MAPE, MASE, RMSE, SMAPE, CrossEntropy, PoissonLoss, TweedieLoss +from pytorch_forecasting.metrics.point import ( + MAE, + MAPE, + MASE, + RMSE, + SMAPE, + CrossEntropy, + PoissonLoss, + TweedieLoss, +) from pytorch_forecasting.metrics.quantile import QuantileLoss __all__ = [ diff --git a/pytorch_forecasting/metrics/_mqf2_utils.py b/pytorch_forecasting/metrics/_mqf2_utils.py index 5dffbab45..9627b8a81 100644 --- a/pytorch_forecasting/metrics/_mqf2_utils.py +++ b/pytorch_forecasting/metrics/_mqf2_utils.py @@ -4,7 +4,12 @@ from cpflows.flows import DeepConvexFlow, SequentialFlow import torch -from torch.distributions import AffineTransform, Distribution, Normal, TransformedDistribution +from torch.distributions import ( + AffineTransform, + Distribution, + Normal, + TransformedDistribution, +) import torch.nn.functional as F @@ -67,14 +72,19 @@ def __init__( self.is_energy_score = is_energy_score self.estimate_logdet = estimate_logdet - def get_potential(self, x: torch.Tensor, context: Optional[torch.Tensor] = None) -> torch.Tensor: + def get_potential( + self, x: torch.Tensor, context: Optional[torch.Tensor] = None + ) -> torch.Tensor: n = x.size(0) output = self.picnn(x, context) if self.is_energy_score: return output else: - return F.softplus(self.w1) * output + F.softplus(self.w0) * (x.view(n, -1) ** 2).sum(1, keepdim=True) / 2 + return ( + F.softplus(self.w1) * output + + F.softplus(self.w0) * (x.view(n, -1) ** 2).sum(1, keepdim=True) / 2 + ) def forward_transform( self, @@ -84,7 +94,9 @@ def forward_transform( extra: Optional[torch.Tensor] = None, ) -> Tuple[torch.Tensor, torch.Tensor]: if self.estimate_logdet: - return self.forward_transform_stochastic(x, logdet, context=context, extra=extra) + return self.forward_transform_stochastic( + x, logdet, context=context, extra=extra + ) else: return self.forward_transform_bruteforce(x, logdet, context=context) @@ -106,7 +118,9 @@ def __init__(self, networks: List[torch.nn.Module]) -> None: super().__init__(networks) self.networks = self.flows - def forward(self, x: torch.Tensor, context: Optional[torch.Tensor] = None) -> torch.Tensor: + def forward( + self, x: torch.Tensor, context: Optional[torch.Tensor] = None + ) -> torch.Tensor: for network in self.networks: if isinstance(network, DeepConvexNet): x = network.forward(x, context=context) @@ -139,7 +153,9 @@ def es_sample(self, hidden_state: torch.Tensor, dimension: int) -> torch.Tensor: standard_normal = Normal(zero, one) samples = self.forward( - standard_normal.sample([num_samples * dimension]).view(num_samples, dimension), + standard_normal.sample([num_samples * dimension]).view( + num_samples, dimension + ), context=hidden_state, ) @@ -184,7 +200,9 @@ def energy_score( # (numel_batch * dimension * es_num_samples x hidden_size) - hidden_state_repeat = hidden_state.repeat_interleave(repeats=es_num_samples, dim=0) + hidden_state_repeat = hidden_state.repeat_interleave( + repeats=es_num_samples, dim=0 + ) w = self.es_sample(hidden_state_repeat, dimension) w_prime = self.es_sample(hidden_state_repeat, dimension) @@ -280,7 +298,9 @@ def __init__( super().__init__(batch_shape=self.batch_shape, validate_args=validate_args) - self.context_length = self.hidden_state.shape[-2] if len(self.hidden_state.shape) > 2 else 1 + self.context_length = ( + self.hidden_state.shape[-2] if len(self.hidden_state.shape) > 2 else 1 + ) self.numel_batch = self.get_numel(self.batch_shape) # mean zero and std one @@ -338,7 +358,9 @@ def log_prob(self, z: torch.Tensor) -> torch.Tensor: z = torch.clamp(z, min=-self.threshold_input, max=self.threshold_input) z = self.stack_sliding_view(z) - loss = self.picnn.logp(z, self.hidden_state.reshape(-1, self.hidden_state.shape[-1])) + loss = self.picnn.logp( + z, self.hidden_state.reshape(-1, self.hidden_state.shape[-1]) + ) return loss @@ -369,9 +391,13 @@ def energy_score(self, z: torch.Tensor) -> torch.Tensor: beta = self.beta z = self.stack_sliding_view(z) - reshaped_hidden_state = self.hidden_state.reshape(-1, self.hidden_state.shape[-1]) + reshaped_hidden_state = self.hidden_state.reshape( + -1, self.hidden_state.shape[-1] + ) - loss = self.picnn.energy_score(z, reshaped_hidden_state, es_num_samples=es_num_samples, beta=beta) + loss = self.picnn.energy_score( + z, reshaped_hidden_state, es_num_samples=es_num_samples, beta=beta + ) return loss @@ -395,7 +421,9 @@ def rsample(self, sample_shape: torch.Size = torch.Size()) -> torch.Tensor: num_samples_per_batch = MQF2Distribution.get_numel(sample_shape) num_samples = num_samples_per_batch * numel_batch - hidden_state_repeat = self.hidden_state.repeat_interleave(repeats=num_samples_per_batch, dim=0) + hidden_state_repeat = self.hidden_state.repeat_interleave( + repeats=num_samples_per_batch, dim=0 + ) alpha = torch.rand( (num_samples, prediction_length), @@ -413,7 +441,9 @@ def rsample(self, sample_shape: torch.Size = torch.Size()) -> torch.Tensor: ) return samples - def quantile(self, alpha: torch.Tensor, hidden_state: Optional[torch.Tensor] = None) -> torch.Tensor: + def quantile( + self, alpha: torch.Tensor, hidden_state: Optional[torch.Tensor] = None + ) -> torch.Tensor: """ Generates the predicted paths associated with the quantile levels alpha @@ -514,11 +544,13 @@ def energy_score(self, y: torch.Tensor) -> torch.Tensor: return loss * (repeated_scale**beta) - def quantile(self, alpha: torch.Tensor, hidden_state: Optional[torch.Tensor] = None) -> torch.Tensor: + def quantile( + self, alpha: torch.Tensor, hidden_state: Optional[torch.Tensor] = None + ) -> torch.Tensor: result = self.base_dist.quantile(alpha, hidden_state=hidden_state) - result = result.reshape(self.base_dist.hidden_state.size(0), -1, self.base_dist.prediction_length).transpose( - 0, 1 - ) + result = result.reshape( + self.base_dist.hidden_state.size(0), -1, self.base_dist.prediction_length + ).transpose(0, 1) for transform in self.transforms: # transform separate for each prediction horizon result = transform(result) diff --git a/pytorch_forecasting/metrics/base_metrics.py b/pytorch_forecasting/metrics/base_metrics.py index 43c84998e..97d0a1278 100644 --- a/pytorch_forecasting/metrics/base_metrics.py +++ b/pytorch_forecasting/metrics/base_metrics.py @@ -28,7 +28,13 @@ class Metric(LightningMetric): higher_is_better = False is_differentiable = True - def __init__(self, name: str = None, quantiles: List[float] = None, reduction="mean", **kwargs): + def __init__( + self, + name: str = None, + quantiles: List[float] = None, + reduction="mean", + **kwargs, + ): """ Initialize metric @@ -63,7 +69,10 @@ def compute(self) -> torch.Tensor: raise NotImplementedError() def rescale_parameters( - self, parameters: torch.Tensor, target_scale: torch.Tensor, encoder: BaseEstimator + self, + parameters: torch.Tensor, + target_scale: torch.Tensor, + encoder: BaseEstimator, ) -> torch.Tensor: """ Rescale normalized parameters into the scale required for the output. @@ -90,13 +99,17 @@ def to_prediction(self, y_pred: torch.Tensor) -> torch.Tensor: """ if y_pred.ndim == 3: if self.quantiles is None: - assert y_pred.size(-1) == 1, "Prediction should only have one extra dimension" + assert ( + y_pred.size(-1) == 1 + ), "Prediction should only have one extra dimension" y_pred = y_pred[..., 0] else: y_pred = y_pred.mean(-1) return y_pred - def to_quantiles(self, y_pred: torch.Tensor, quantiles: List[float] = None) -> torch.Tensor: + def to_quantiles( + self, y_pred: torch.Tensor, quantiles: List[float] = None + ) -> torch.Tensor: """ Convert network prediction into a quantile prediction. @@ -116,10 +129,14 @@ def to_quantiles(self, y_pred: torch.Tensor, quantiles: List[float] = None) -> t elif y_pred.ndim == 3: if y_pred.size(2) > 1: # single dimension means all quantiles are the same assert quantiles is not None, "quantiles are not defined" - y_pred = torch.quantile(y_pred, torch.tensor(quantiles, device=y_pred.device), dim=2).permute(1, 2, 0) + y_pred = torch.quantile( + y_pred, torch.tensor(quantiles, device=y_pred.device), dim=2 + ).permute(1, 2, 0) return y_pred else: - raise ValueError(f"prediction has 1 or more than 3 dimensions: {y_pred.ndim}") + raise ValueError( + f"prediction has 1 or more than 3 dimensions: {y_pred.ndim}" + ) def __add__(self, metric: LightningMetric): composite_metric = CompositeMetric(metrics=[self]) @@ -175,9 +192,13 @@ def reset(self) -> None: def persistent(self, mode: bool = False) -> None: self.torchmetric.persistent(mode=mode) - def _convert(self, y_pred: torch.Tensor, target: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + def _convert( + self, y_pred: torch.Tensor, target: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor]: # unpack target into target and weights - if isinstance(target, (list, tuple)) and not isinstance(target, rnn.PackedSequence): + if isinstance(target, (list, tuple)) and not isinstance( + target, rnn.PackedSequence + ): target, weight = target if weight is not None: raise NotImplementedError( @@ -200,7 +221,9 @@ def _convert(self, y_pred: torch.Tensor, target: torch.Tensor) -> Tuple[torch.Te target = target.flatten() return y_pred, target - def update(self, y_pred: torch.Tensor, target: torch.Tensor, **kwargs) -> torch.Tensor: + def update( + self, y_pred: torch.Tensor, target: torch.Tensor, **kwargs + ) -> torch.Tensor: # flatten target and prediction y_pred_flattened, target_flattened = self._convert(y_pred, target) @@ -220,7 +243,9 @@ def __repr__(self): return f"WrappedTorchmetric({repr(self.torchmetric)})" -def convert_torchmetric_to_pytorch_forecasting_metric(metric: LightningMetric) -> Metric: +def convert_torchmetric_to_pytorch_forecasting_metric( + metric: LightningMetric, +) -> Metric: """ If necessary, convert a torchmetric to a PyTorch Forecasting metric that works with PyTorch Forecasting models. @@ -255,9 +280,13 @@ def __init__(self, metrics: List[LightningMetric], weights: List[float] = None): assert len(metrics) > 0, "at least one metric has to be specified" if weights is None: weights = [1.0 for _ in metrics] - assert len(weights) == len(metrics), "Number of weights has to match number of metrics" + assert len(weights) == len( + metrics + ), "Number of weights has to match number of metrics" - self.metrics = [convert_torchmetric_to_pytorch_forecasting_metric(m) for m in metrics] + self.metrics = [ + convert_torchmetric_to_pytorch_forecasting_metric(m) for m in metrics + ] self.weights = weights super().__init__() @@ -265,7 +294,12 @@ def __init__(self, metrics: List[LightningMetric], weights: List[float] = None): def __repr__(self): name = ( f"{self.__class__.__name__}(" - + ", ".join([f"{w:.3g} * {repr(m)}" if w != 1.0 else repr(m) for w, m in zip(self.weights, self.metrics)]) + + ", ".join( + [ + f"{w:.3g} * {repr(m)}" if w != 1.0 else repr(m) + for w, m in zip(self.weights, self.metrics) + ] + ) + ")" ) return name @@ -361,7 +395,11 @@ def forward(self, y_pred: torch.Tensor, y_actual: torch.Tensor, **kwargs): def _wrap_compute(self, compute: Callable) -> Callable: return compute - def _sync_dist(self, dist_sync_fn: Optional[Callable] = None, process_group: Optional[Any] = None) -> None: + def _sync_dist( + self, + dist_sync_fn: Optional[Callable] = None, + process_group: Optional[Any] = None, + ) -> None: # No syncing required here. syncing will be done in metrics pass @@ -503,7 +541,11 @@ class CompositeMetric(LightningMetric): higher_is_better = False is_differentiable = True - def __init__(self, metrics: Optional[List[LightningMetric]] = None, weights: Optional[List[float]] = None): + def __init__( + self, + metrics: Optional[List[LightningMetric]] = None, + weights: Optional[List[float]] = None, + ): """ Args: metrics (List[LightningMetric], optional): list of metrics to combine. Defaults to None. @@ -516,7 +558,9 @@ def __init__(self, metrics: Optional[List[LightningMetric]] = None, weights: Opt metrics = [] if weights is None: weights = [1.0 for _ in metrics] - assert len(weights) == len(metrics), "Number of weights has to match number of metrics" + assert len(weights) == len( + metrics + ), "Number of weights has to match number of metrics" self._metrics = list(metrics) self._weights = list(weights) @@ -525,7 +569,10 @@ def __init__(self, metrics: Optional[List[LightningMetric]] = None, weights: Opt def __repr__(self): name = " + ".join( - [f"{w:.3g} * {repr(m)}" if w != 1.0 else repr(m) for w, m in zip(self._weights, self._metrics)] + [ + f"{w:.3g} * {repr(m)}" if w != 1.0 else repr(m) + for w, m in zip(self._weights, self._metrics) + ] ) return name @@ -592,7 +639,11 @@ def forward(self, y_pred: torch.Tensor, y_actual: torch.Tensor, **kwargs): def _wrap_compute(self, compute: Callable) -> Callable: return compute - def _sync_dist(self, dist_sync_fn: Optional[Callable] = None, process_group: Optional[Any] = None) -> None: + def _sync_dist( + self, + dist_sync_fn: Optional[Callable] = None, + process_group: Optional[Any] = None, + ) -> None: # No syncing required here. syncing will be done in metrics pass @@ -664,7 +715,9 @@ def __init__(self, metric: Metric, **kwargs): super().__init__(**kwargs) self.metric = metric - def update(self, y_pred: torch.Tensor, y_actual: torch.Tensor, **kwargs) -> torch.Tensor: + def update( + self, y_pred: torch.Tensor, y_actual: torch.Tensor, **kwargs + ) -> torch.Tensor: """ Calculate composite metric @@ -680,9 +733,13 @@ def update(self, y_pred: torch.Tensor, y_actual: torch.Tensor, **kwargs) -> torc self.metric.update(y_pred_mean, y_mean, **kwargs) @staticmethod - def _calculate_mean(y_pred: torch.Tensor, y_actual: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + def _calculate_mean( + y_pred: torch.Tensor, y_actual: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor]: # extract target and weight - if isinstance(y_actual, (tuple, list)) and not isinstance(y_actual, rnn.PackedSequence): + if isinstance(y_actual, (tuple, list)) and not isinstance( + y_actual, rnn.PackedSequence + ): target, weight = y_actual else: target = y_actual @@ -736,7 +793,11 @@ def forward(self, y_pred: torch.Tensor, y_actual: torch.Tensor, **kwargs): def _wrap_compute(self, compute: Callable) -> Callable: return compute - def _sync_dist(self, dist_sync_fn: Optional[Callable] = None, process_group: Optional[Any] = None) -> None: + def _sync_dist( + self, + dist_sync_fn: Optional[Callable] = None, + process_group: Optional[Any] = None, + ) -> None: # No syncing required here. syncing will be done in metrics pass @@ -754,10 +815,20 @@ class MultiHorizonMetric(Metric): def __init__(self, reduction: str = "mean", **kwargs) -> None: super().__init__(reduction=reduction, **kwargs) - self.add_state("losses", default=torch.tensor(0.0), dist_reduce_fx="sum" if reduction != "none" else "cat") - self.add_state("lengths", default=torch.tensor(0), dist_reduce_fx="sum" if reduction != "none" else "mean") + self.add_state( + "losses", + default=torch.tensor(0.0), + dist_reduce_fx="sum" if reduction != "none" else "cat", + ) + self.add_state( + "lengths", + default=torch.tensor(0), + dist_reduce_fx="sum" if reduction != "none" else "mean", + ) - def loss(self, y_pred: Dict[str, torch.Tensor], target: torch.Tensor) -> torch.Tensor: + def loss( + self, y_pred: Dict[str, torch.Tensor], target: torch.Tensor + ) -> torch.Tensor: """ Calculate loss without reduction. Override in derived classes @@ -784,7 +855,9 @@ def update(self, y_pred, target): torch.Tensor: loss as a single number for backpropagation """ # unpack weight - if isinstance(target, (list, tuple)) and not isinstance(target, rnn.PackedSequence): + if isinstance(target, (list, tuple)) and not isinstance( + target, rnn.PackedSequence + ): target, weight = target else: weight = None @@ -793,7 +866,12 @@ def update(self, y_pred, target): if isinstance(target, rnn.PackedSequence): target, lengths = unpack_sequence(target) else: - lengths = torch.full((target.size(0),), fill_value=target.size(1), dtype=torch.long, device=target.device) + lengths = torch.full( + (target.size(0),), + fill_value=target.size(1), + dtype=torch.long, + device=target.device, + ) losses = self.loss(y_pred, target) # weight samples @@ -822,7 +900,9 @@ def compute(self): loss = self.reduce_loss(self.losses, lengths=self.lengths) return loss - def mask_losses(self, losses: torch.Tensor, lengths: torch.Tensor, reduction: str = None) -> torch.Tensor: + def mask_losses( + self, losses: torch.Tensor, lengths: torch.Tensor, reduction: str = None + ) -> torch.Tensor: """ Mask losses. @@ -838,7 +918,9 @@ def mask_losses(self, losses: torch.Tensor, lengths: torch.Tensor, reduction: st reduction = self.reduction if losses.ndim > 0: # mask loss - mask = torch.arange(losses.size(1), device=losses.device).unsqueeze(0) >= lengths.unsqueeze(-1) + mask = torch.arange(losses.size(1), device=losses.device).unsqueeze( + 0 + ) >= lengths.unsqueeze(-1) if losses.ndim > 2: mask = mask.unsqueeze(-1) dim_normalizer = losses.size(-1) @@ -851,7 +933,9 @@ def mask_losses(self, losses: torch.Tensor, lengths: torch.Tensor, reduction: st losses = losses.masked_fill(mask, 0.0) / dim_normalizer return losses - def reduce_loss(self, losses: torch.Tensor, lengths: torch.Tensor, reduction: str = None) -> torch.Tensor: + def reduce_loss( + self, losses: torch.Tensor, lengths: torch.Tensor, reduction: str = None + ) -> torch.Tensor: """ Reduce loss. @@ -904,7 +988,12 @@ class DistributionLoss(MultiHorizonMetric): distribution_class: distributions.Distribution distribution_arguments: List[str] - def __init__(self, name: str = None, quantiles: Optional[List[float]] = None, reduction="mean"): + def __init__( + self, + name: str = None, + quantiles: Optional[List[float]] = None, + reduction="mean", + ): """ Initialize metric @@ -981,7 +1070,9 @@ def sample(self, y_pred, n_samples: int) -> torch.Tensor: samples = samples.transpose(0, 1) return samples - def to_quantiles(self, y_pred: torch.Tensor, quantiles: List[float] = None, n_samples: int = 100) -> torch.Tensor: + def to_quantiles( + self, y_pred: torch.Tensor, quantiles: List[float] = None, n_samples: int = 100 + ) -> torch.Tensor: """ Convert network prediction into a quantile prediction. @@ -998,10 +1089,14 @@ def to_quantiles(self, y_pred: torch.Tensor, quantiles: List[float] = None, n_sa quantiles = self.quantiles try: distribution = self.map_x_to_distribution(y_pred) - quantiles = distribution.icdf(torch.tensor(quantiles, device=y_pred.device)[:, None, None]).permute(1, 2, 0) + quantiles = distribution.icdf( + torch.tensor(quantiles, device=y_pred.device)[:, None, None] + ).permute(1, 2, 0) except NotImplementedError: # resort to derive quantiles empirically samples = torch.sort(self.sample(y_pred, n_samples), -1).values - quantiles = torch.quantile(samples, torch.tensor(quantiles, device=samples.device), dim=2).permute(1, 2, 0) + quantiles = torch.quantile( + samples, torch.tensor(quantiles, device=samples.device), dim=2 + ).permute(1, 2, 0) return quantiles diff --git a/pytorch_forecasting/metrics/distributions.py b/pytorch_forecasting/metrics/distributions.py index 30c3db558..fc93f56e9 100644 --- a/pytorch_forecasting/metrics/distributions.py +++ b/pytorch_forecasting/metrics/distributions.py @@ -9,7 +9,10 @@ import torch.nn.functional as F from pytorch_forecasting.data.encoders import TorchNormalizer, softplus_inv -from pytorch_forecasting.metrics.base_metrics import DistributionLoss, MultivariateDistributionLoss +from pytorch_forecasting.metrics.base_metrics import ( + DistributionLoss, + MultivariateDistributionLoss, +) class NormalDistributionLoss(DistributionLoss): @@ -27,17 +30,31 @@ def map_x_to_distribution(self, x: torch.Tensor) -> distributions.Normal: return distributions.TransformedDistribution(distr, [scaler]) else: return distributions.TransformedDistribution( - distr, [scaler, TorchNormalizer.get_transform(self._transformation)["inverse_torch"]] + distr, + [ + scaler, + TorchNormalizer.get_transform(self._transformation)[ + "inverse_torch" + ], + ], ) def rescale_parameters( - self, parameters: torch.Tensor, target_scale: torch.Tensor, encoder: BaseEstimator + self, + parameters: torch.Tensor, + target_scale: torch.Tensor, + encoder: BaseEstimator, ) -> torch.Tensor: self._transformation = encoder.transformation loc = parameters[..., 0] scale = F.softplus(parameters[..., 1]) return torch.concat( - [target_scale.unsqueeze(1).expand(-1, loc.size(1), -1), loc.unsqueeze(-1), scale.unsqueeze(-1)], dim=-1 + [ + target_scale.unsqueeze(1).expand(-1, loc.size(1), -1), + loc.unsqueeze(-1), + scale.unsqueeze(-1), + ], + dim=-1, ) @@ -81,7 +98,9 @@ def __init__( # determine bias self._diag_bias: float = ( - softplus_inv(torch.tensor(self.sigma_init) ** 2).item() if self.sigma_init > 0.0 else 0.0 + softplus_inv(torch.tensor(self.sigma_init) ** 2).item() + if self.sigma_init > 0.0 + else 0.0 ) # determine normalizer to bring unscaled diagonal close to 1.0 self._cov_factor_scale: float = np.sqrt(self.rank) @@ -96,25 +115,47 @@ def map_x_to_distribution(self, x: torch.Tensor) -> distributions.Normal: cov_factor=x[..., 4:], cov_diag=x[..., 3], ) - scaler = distributions.AffineTransform(loc=x[0, :, 0], scale=x[0, :, 1], event_dim=1) + scaler = distributions.AffineTransform( + loc=x[0, :, 0], scale=x[0, :, 1], event_dim=1 + ) if self._transformation is None: return distributions.TransformedDistribution(distr, [scaler]) else: return distributions.TransformedDistribution( - distr, [scaler, TorchNormalizer.get_transform(self._transformation)["inverse_torch"]] + distr, + [ + scaler, + TorchNormalizer.get_transform(self._transformation)[ + "inverse_torch" + ], + ], ) def rescale_parameters( - self, parameters: torch.Tensor, target_scale: torch.Tensor, encoder: BaseEstimator + self, + parameters: torch.Tensor, + target_scale: torch.Tensor, + encoder: BaseEstimator, ) -> torch.Tensor: self._transformation = encoder.transformation # scale loc = parameters[..., 0].unsqueeze(-1) - scale = F.softplus(parameters[..., 1].unsqueeze(-1) + self._diag_bias) + self.sigma_minimum**2 + scale = ( + F.softplus(parameters[..., 1].unsqueeze(-1) + self._diag_bias) + + self.sigma_minimum**2 + ) cov_factor = parameters[..., 2:] / self._cov_factor_scale - return torch.concat([target_scale.unsqueeze(1).expand(-1, loc.size(1), -1), loc, scale, cov_factor], dim=-1) + return torch.concat( + [ + target_scale.unsqueeze(1).expand(-1, loc.size(1), -1), + loc, + scale, + cov_factor, + ], + dim=-1, + ) class NegativeBinomialDistributionLoss(DistributionLoss): @@ -136,19 +177,32 @@ def map_x_to_distribution(self, x: torch.Tensor) -> distributions.NegativeBinomi return self.distribution_class(total_count=r, probs=p) def rescale_parameters( - self, parameters: torch.Tensor, target_scale: torch.Tensor, encoder: BaseEstimator + self, + parameters: torch.Tensor, + target_scale: torch.Tensor, + encoder: BaseEstimator, ) -> torch.Tensor: - assert not encoder.center, "NegativeBinomialDistributionLoss is not compatible with `center=True` normalization" - assert encoder.transformation not in ["logit", "log"], "Cannot use bound transformation such as 'logit'" + assert ( + not encoder.center + ), "NegativeBinomialDistributionLoss is not compatible with `center=True` normalization" + assert encoder.transformation not in [ + "logit", + "log", + ], "Cannot use bound transformation such as 'logit'" if encoder.transformation in ["log1p"]: mean = torch.exp(parameters[..., 0] * target_scale[..., 1].unsqueeze(-1)) shape = ( F.softplus(torch.exp(parameters[..., 1])) - / torch.exp(target_scale[..., 1].unsqueeze(-1)).sqrt() # todo: is this correct? + / torch.exp( + target_scale[..., 1].unsqueeze(-1) + ).sqrt() # todo: is this correct? ) else: mean = F.softplus(parameters[..., 0]) * target_scale[..., 1].unsqueeze(-1) - shape = F.softplus(parameters[..., 1]) / target_scale[..., 1].unsqueeze(-1).sqrt() + shape = ( + F.softplus(parameters[..., 1]) + / target_scale[..., 1].unsqueeze(-1).sqrt() + ) return torch.stack([mean, shape], dim=-1) def to_prediction(self, y_pred: torch.Tensor) -> torch.Tensor: @@ -181,17 +235,24 @@ def map_x_to_distribution(self, x: torch.Tensor) -> distributions.LogNormal: return self.distribution_class(loc=x[..., 0], scale=x[..., 1]) def rescale_parameters( - self, parameters: torch.Tensor, target_scale: torch.Tensor, encoder: BaseEstimator + self, + parameters: torch.Tensor, + target_scale: torch.Tensor, + encoder: BaseEstimator, ) -> torch.Tensor: assert isinstance(encoder.transformation, str) and encoder.transformation in [ "log", "log1p", ], f"Log distribution requires log scaling but found `transformation={encoder.transform}`" - assert encoder.transformation not in ["logit"], "Cannot use bound transformation such as 'logit'" + assert encoder.transformation not in [ + "logit" + ], "Cannot use bound transformation such as 'logit'" scale = F.softplus(parameters[..., 1]) * target_scale[..., 1].unsqueeze(-1) - loc = parameters[..., 0] * target_scale[..., 1].unsqueeze(-1) + target_scale[..., 0].unsqueeze(-1) + loc = parameters[..., 0] * target_scale[..., 1].unsqueeze(-1) + target_scale[ + ..., 0 + ].unsqueeze(-1) return torch.stack([loc, scale], dim=-1) @@ -211,7 +272,9 @@ class BetaDistributionLoss(DistributionLoss): def map_x_to_distribution(self, x: torch.Tensor) -> distributions.Beta: mean = x[..., 0] shape = x[..., 1] - return self.distribution_class(concentration0=(1 - mean) * shape, concentration1=mean * shape) + return self.distribution_class( + concentration0=(1 - mean) * shape, concentration1=mean * shape + ) def loss(self, y_pred: torch.Tensor, y_actual: torch.Tensor) -> torch.Tensor: """ @@ -230,23 +293,38 @@ def loss(self, y_pred: torch.Tensor, y_actual: torch.Tensor) -> torch.Tensor: return loss def rescale_parameters( - self, parameters: torch.Tensor, target_scale: torch.Tensor, encoder: BaseEstimator + self, + parameters: torch.Tensor, + target_scale: torch.Tensor, + encoder: BaseEstimator, ) -> torch.Tensor: - assert encoder.transformation in ["logit"], "Beta distribution is only compatible with logit transformation" + assert encoder.transformation in [ + "logit" + ], "Beta distribution is only compatible with logit transformation" assert encoder.center, "Beta distribution requires normalizer to center data" - scaled_mean = encoder(dict(prediction=parameters[..., 0], target_scale=target_scale)) + scaled_mean = encoder( + dict(prediction=parameters[..., 0], target_scale=target_scale) + ) # need to first transform target scale standard deviation in logit space to real space # we assume a normal distribution in logit space (we used a logit transform and a standard scaler) # and know that the variance of the beta distribution is limited by `scaled_mean * (1 - scaled_mean)` - scaled_mean = scaled_mean * (1 - 2 * self.eps) + self.eps # ensure that mean is not exactly 0 or 1 + scaled_mean = ( + scaled_mean * (1 - 2 * self.eps) + self.eps + ) # ensure that mean is not exactly 0 or 1 mean_derivative = scaled_mean * (1 - scaled_mean) # we can approximate variance as # torch.pow(torch.tanh(target_scale[..., 1].unsqueeze(1) * torch.sqrt(mean_derivative)), 2) * mean_derivative # shape is (positive) parameter * mean_derivative / var shape_scaler = ( - torch.pow(torch.tanh(target_scale[..., 1].unsqueeze(1) * torch.sqrt(mean_derivative)), 2) + self.eps + torch.pow( + torch.tanh( + target_scale[..., 1].unsqueeze(1) * torch.sqrt(mean_derivative) + ), + 2, + ) + + self.eps ) scaled_shape = F.softplus(parameters[..., 1]) / shape_scaler return torch.stack([scaled_mean, scaled_shape], dim=-1) @@ -357,7 +435,12 @@ def map_x_to_distribution(self, x: torch.Tensor) -> distributions.Distribution: else: return self.transformed_distribution_class( distr, - [scaler, TorchNormalizer.get_transform(self._transformation)["inverse_torch"]], + [ + scaler, + TorchNormalizer.get_transform(self._transformation)[ + "inverse_torch" + ], + ], ) def loss(self, y_pred: torch.Tensor, y_actual: torch.Tensor) -> torch.Tensor: @@ -379,12 +462,19 @@ def loss(self, y_pred: torch.Tensor, y_actual: torch.Tensor) -> torch.Tensor: return loss.reshape(-1, 1) def rescale_parameters( - self, parameters: torch.Tensor, target_scale: torch.Tensor, encoder: BaseEstimator + self, + parameters: torch.Tensor, + target_scale: torch.Tensor, + encoder: BaseEstimator, ) -> torch.Tensor: self._transformation = encoder.transformation - return torch.concat([parameters.reshape(parameters.size(0), -1), target_scale], dim=-1) + return torch.concat( + [parameters.reshape(parameters.size(0), -1), target_scale], dim=-1 + ) - def to_quantiles(self, y_pred: torch.Tensor, quantiles: List[float] = None) -> torch.Tensor: + def to_quantiles( + self, y_pred: torch.Tensor, quantiles: List[float] = None + ) -> torch.Tensor: """ Convert network prediction into a quantile prediction. @@ -404,8 +494,12 @@ def to_quantiles(self, y_pred: torch.Tensor, quantiles: List[float] = None) -> t .repeat(y_pred.size(0), 1) .expand(-1, self.prediction_length) ) - hidden_state = distribution.base_dist.hidden_state.repeat_interleave(len(quantiles), dim=0) - result = distribution.quantile(alpha, hidden_state=hidden_state) # (batch_size * quantiles x prediction_length) + hidden_state = distribution.base_dist.hidden_state.repeat_interleave( + len(quantiles), dim=0 + ) + result = distribution.quantile( + alpha, hidden_state=hidden_state + ) # (batch_size * quantiles x prediction_length) # reshape result = result.reshape(-1, len(quantiles), self.prediction_length).transpose( @@ -419,7 +513,9 @@ class ImplicitQuantileNetwork(nn.Module): def __init__(self, input_size: int, hidden_size: int): super().__init__() self.quantile_layer = nn.Sequential( - nn.Linear(hidden_size, hidden_size), nn.PReLU(), nn.Linear(hidden_size, input_size) + nn.Linear(hidden_size, hidden_size), + nn.PReLU(), + nn.Linear(hidden_size, input_size), ) self.output_layer = nn.Sequential( nn.Linear(input_size, input_size), @@ -430,11 +526,15 @@ def __init__(self, input_size: int, hidden_size: int): def forward(self, x: torch.Tensor, quantiles: torch.Tensor) -> torch.Tensor: # embed quantiles - cos_emb_tau = torch.cos(quantiles[:, None] * self.cos_multipliers[None]) # n_quantiles x hidden_size + cos_emb_tau = torch.cos( + quantiles[:, None] * self.cos_multipliers[None] + ) # n_quantiles x hidden_size # modulates input depending on quantile cos_emb_tau = self.quantile_layer(cos_emb_tau) # n_quantiles x input_size - emb_inputs = x.unsqueeze(-2) * (1.0 + cos_emb_tau) # ... x n_quantiles x input_size + emb_inputs = x.unsqueeze(-2) * ( + 1.0 + cos_emb_tau + ) # ... x n_quantiles x input_size emb_outputs = self.output_layer(emb_inputs).squeeze(-1) # ... x n_quantiles return emb_outputs @@ -466,14 +566,18 @@ def __init__( if quantiles is None: quantiles = [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98] super().__init__(quantiles=quantiles) - self.quantile_network = ImplicitQuantileNetwork(input_size=input_size, hidden_size=hidden_size) + self.quantile_network = ImplicitQuantileNetwork( + input_size=input_size, hidden_size=hidden_size + ) self.distribution_arguments = list(range(int(input_size))) self.n_loss_samples = n_loss_samples def sample(self, y_pred, n_samples: int) -> torch.Tensor: eps = 1e-3 # for a couple of random quantiles (excl. 0 and 1 as they would lead to infinities) - quantiles = torch.rand(size=(n_samples,), device=y_pred.device).clamp(eps, 1 - eps) + quantiles = torch.rand(size=(n_samples,), device=y_pred.device).clamp( + eps, 1 - eps + ) # make prediction samples = self.to_quantiles(y_pred, quantiles=quantiles) return samples @@ -491,19 +595,29 @@ def loss(self, y_pred: torch.Tensor, y_actual: torch.Tensor) -> torch.Tensor: """ eps = 1e-3 # for a couple of random quantiles (excl. 0 and 1 as they would lead to infinities) - quantiles = torch.rand(size=(self.n_loss_samples,), device=y_pred.device).clamp(eps, 1 - eps) + quantiles = torch.rand(size=(self.n_loss_samples,), device=y_pred.device).clamp( + eps, 1 - eps + ) # make prediction pred_quantiles = self.to_quantiles(y_pred, quantiles=quantiles) # and calculate quantile loss errors = y_actual[..., None] - pred_quantiles - loss = 2 * torch.fmax(quantiles[None] * errors, (quantiles[None] - 1) * errors).mean(dim=-1) + loss = 2 * torch.fmax( + quantiles[None] * errors, (quantiles[None] - 1) * errors + ).mean(dim=-1) return loss def rescale_parameters( - self, parameters: torch.Tensor, target_scale: torch.Tensor, encoder: BaseEstimator + self, + parameters: torch.Tensor, + target_scale: torch.Tensor, + encoder: BaseEstimator, ) -> torch.Tensor: self._transformation = encoder.transformation - return torch.concat([parameters, target_scale.unsqueeze(1).expand(-1, parameters.size(1), -1)], dim=-1) + return torch.concat( + [parameters, target_scale.unsqueeze(1).expand(-1, parameters.size(1), -1)], + dim=-1, + ) def to_prediction(self, y_pred: torch.Tensor, n_samples: int = 100) -> torch.Tensor: if n_samples is None: @@ -512,7 +626,9 @@ def to_prediction(self, y_pred: torch.Tensor, n_samples: int = 100) -> torch.Ten # for a couple of random quantiles (excl. 0 and 1 as they would lead to infinities) make prediction return self.sample(y_pred, n_samples=n_samples).mean(-1) - def to_quantiles(self, y_pred: torch.Tensor, quantiles: List[float] = None) -> torch.Tensor: + def to_quantiles( + self, y_pred: torch.Tensor, quantiles: List[float] = None + ) -> torch.Tensor: """ Convert network prediction into a quantile prediction. diff --git a/pytorch_forecasting/metrics/point.py b/pytorch_forecasting/metrics/point.py index 753d309e5..a2d930fc0 100644 --- a/pytorch_forecasting/metrics/point.py +++ b/pytorch_forecasting/metrics/point.py @@ -32,9 +32,16 @@ class PoissonLoss(MultiHorizonMetric): The result is the model prediction. """ - def loss(self, y_pred: Dict[str, torch.Tensor], target: torch.Tensor) -> torch.Tensor: + def loss( + self, y_pred: Dict[str, torch.Tensor], target: torch.Tensor + ) -> torch.Tensor: return F.poisson_nll_loss( - super().to_prediction(y_pred), target, log_input=True, full=False, eps=1e-6, reduction="none" + super().to_prediction(y_pred), + target, + log_input=True, + full=False, + eps=1e-6, + reduction="none", ) def to_prediction(self, out: Dict[str, torch.Tensor]): @@ -50,7 +57,12 @@ def to_quantiles(self, out: Dict[str, torch.Tensor], quantiles=None): predictions = self.to_prediction(out) return ( torch.stack( - [torch.tensor(scipy.stats.poisson(predictions.detach().cpu().numpy()).ppf(q)) for q in quantiles], + [ + torch.tensor( + scipy.stats.poisson(predictions.detach().cpu().numpy()).ppf(q) + ) + for q in quantiles + ], dim=-1, ) .type(predictions.dtype) @@ -101,9 +113,9 @@ class CrossEntropy(MultiHorizonMetric): """ def loss(self, y_pred, target): - loss = F.cross_entropy(y_pred.view(-1, y_pred.size(-1)), target.view(-1), reduction="none").view( - -1, target.size(-1) - ) + loss = F.cross_entropy( + y_pred.view(-1, y_pred.size(-1)), target.view(-1), reduction="none" + ).view(-1, target.size(-1)) return loss def to_prediction(self, y_pred: torch.Tensor) -> torch.Tensor: @@ -120,7 +132,9 @@ def to_prediction(self, y_pred: torch.Tensor) -> torch.Tensor: """ return y_pred.argmax(dim=-1) - def to_quantiles(self, y_pred: torch.Tensor, quantiles: List[float] = None) -> torch.Tensor: + def to_quantiles( + self, y_pred: torch.Tensor, quantiles: List[float] = None + ) -> torch.Tensor: """ Convert network prediction into a quantile prediction. @@ -189,7 +203,12 @@ def update( if isinstance(target, rnn.PackedSequence): target, lengths = unpack_sequence(target) else: - lengths = torch.full((target.size(0),), fill_value=target.size(1), dtype=torch.long, device=target.device) + lengths = torch.full( + (target.size(0),), + fill_value=target.size(1), + dtype=torch.long, + device=target.device, + ) # determine lengths for encoder if encoder_lengths is None: @@ -199,7 +218,9 @@ def update( assert not target.requires_grad # calculate loss with "none" reduction - scaling = self.calculate_scaling(target, lengths, encoder_target, encoder_lengths) + scaling = self.calculate_scaling( + target, lengths, encoder_target, encoder_lengths + ) losses = self.loss(y_pred, target, scaling) # weight samples @@ -217,19 +238,28 @@ def calculate_scaling(target, lengths, encoder_target, encoder_lengths): eps = 1e-6 batch_size = target.size(0) total_lengths = lengths + encoder_lengths - assert (total_lengths > 1).all(), "Need at least 2 target values to be able to calculate MASE" + assert ( + total_lengths > 1 + ).all(), "Need at least 2 target values to be able to calculate MASE" max_length = target.size(1) + encoder_target.size(1) - if (total_lengths != max_length).any(): # if decoder or encoder targets have sequences of different lengths + if ( + total_lengths != max_length + ).any(): # if decoder or encoder targets have sequences of different lengths targets = torch.cat( [ encoder_target, - torch.zeros(batch_size, target.size(1), device=target.device, dtype=encoder_target.dtype), + torch.zeros( + batch_size, + target.size(1), + device=target.device, + dtype=encoder_target.dtype, + ), ], dim=1, ) - target_index = torch.arange(target.size(1), device=target.device, dtype=torch.long).unsqueeze(0).expand( - batch_size, -1 - ) + encoder_lengths.unsqueeze(-1) + target_index = torch.arange( + target.size(1), device=target.device, dtype=torch.long + ).unsqueeze(0).expand(batch_size, -1) + encoder_lengths.unsqueeze(-1) targets.scatter_(dim=1, src=target, index=target_index) else: targets = torch.cat([encoder_target, target], dim=1) @@ -242,7 +272,9 @@ def calculate_scaling(target, lengths, encoder_target, encoder_lengths): zero_correction_indices = total_lengths[not_maximum_length] - 1 if len(zero_correction_indices) > 0: diffs[ - torch.arange(batch_size, dtype=torch.long, device=diffs.device)[not_maximum_length], + torch.arange(batch_size, dtype=torch.long, device=diffs.device)[ + not_maximum_length + ], zero_correction_indices, ] = 0.0 diff --git a/pytorch_forecasting/models/__init__.py b/pytorch_forecasting/models/__init__.py index ec143f2f8..d4173f620 100644 --- a/pytorch_forecasting/models/__init__.py +++ b/pytorch_forecasting/models/__init__.py @@ -15,7 +15,9 @@ from pytorch_forecasting.models.nhits import NHiTS from pytorch_forecasting.models.nn import GRU, LSTM, MultiEmbedding, get_rnn from pytorch_forecasting.models.rnn import RecurrentNetwork -from pytorch_forecasting.models.temporal_fusion_transformer import TemporalFusionTransformer +from pytorch_forecasting.models.temporal_fusion_transformer import ( + TemporalFusionTransformer, +) __all__ = [ "NBeats", diff --git a/pytorch_forecasting/models/base_model.py b/pytorch_forecasting/models/base_model.py index 3068f558a..238c87816 100644 --- a/pytorch_forecasting/models/base_model.py +++ b/pytorch_forecasting/models/base_model.py @@ -28,7 +28,12 @@ import yaml from pytorch_forecasting.data import TimeSeriesDataSet -from pytorch_forecasting.data.encoders import EncoderNormalizer, GroupNormalizer, MultiNormalizer, NaNLabelEncoder +from pytorch_forecasting.data.encoders import ( + EncoderNormalizer, + GroupNormalizer, + MultiNormalizer, + NaNLabelEncoder, +) from pytorch_forecasting.metrics import ( MAE, MASE, @@ -52,7 +57,10 @@ groupby_apply, to_list, ) -from pytorch_forecasting.utils._dependencies import _check_matplotlib, _get_installed_packages +from pytorch_forecasting.utils._dependencies import ( + _check_matplotlib, + _get_installed_packages, +) # todo: compile models @@ -82,7 +90,11 @@ def _torch_cat_na(x: List[torch.Tensor]) -> torch.Tensor: [ xi, torch.full( - (xi.shape[0], max_first_len - xi.shape[1], *xi.shape[2:]), + ( + xi.shape[0], + max_first_len - xi.shape[1], + *xi.shape[2:], + ), float("nan"), device=xi.device, ), @@ -95,7 +107,9 @@ def _torch_cat_na(x: List[torch.Tensor]) -> torch.Tensor: # check if remaining dimensions are all equal if x[0].ndim > 2: - remaining_dimensions_equal = all(all(xi.size(i) == x[0].size(i) for xi in x) for i in range(2, x[0].ndim)) + remaining_dimensions_equal = all( + all(xi.size(i) == x[0].size(i) for xi in x) for i in range(2, x[0].ndim) + ) else: remaining_dimensions_equal = True @@ -113,8 +127,15 @@ def _torch_cat_na(x: List[torch.Tensor]) -> torch.Tensor: def _concatenate_output( - output: List[Dict[str, List[Union[List[torch.Tensor], torch.Tensor, bool, int, str, np.ndarray]]]] -) -> Dict[str, Union[torch.Tensor, np.ndarray, List[Union[torch.Tensor, int, bool, str]]]]: + output: List[ + Dict[ + str, + List[Union[List[torch.Tensor], torch.Tensor, bool, int, str, np.ndarray]], + ] + ] +) -> Dict[ + str, Union[torch.Tensor, np.ndarray, List[Union[torch.Tensor, int, bool, str]]] +]: """ Concatenate multiple batches of output dictionary. @@ -137,12 +158,20 @@ def _concatenate_output( output_cat[name] = [] for target_id in range(len(v0)): if isinstance(v0[target_id], torch.Tensor): - output_cat[name].append(_torch_cat_na([out[name][target_id] for out in output])) + output_cat[name].append( + _torch_cat_na([out[name][target_id] for out in output]) + ) else: try: - output_cat[name].append(np.concatenate([out[name][target_id] for out in output], axis=0)) + output_cat[name].append( + np.concatenate( + [out[name][target_id] for out in output], axis=0 + ) + ) except ValueError: - output_cat[name] = [item for out in output for item in out[name][target_id]] + output_cat[name] = [ + item for out in output for item in out[name][target_id] + ] # flatten list for everything else else: try: @@ -168,7 +197,9 @@ def _concatenate_output( # return type of predict function PredictTuple = namedtuple( - "prediction", ["output", "x", "index", "decoder_lengths", "y"], defaults=(None, None, None, None, None) + "prediction", + ["output", "x", "index", "decoder_lengths", "y"], + defaults=(None, None, None, None, None), ) @@ -241,7 +272,12 @@ def on_predict_batch_end( # mask non-predictions if isinstance(out, (list, tuple)): out = [ - o.masked_fill(nan_mask, torch.tensor(float("nan"))) if o.dtype == torch.float else o for o in out + ( + o.masked_fill(nan_mask, torch.tensor(float("nan"))) + if o.dtype == torch.float + else o + ) + for o in out ] elif out.dtype == torch.float: # only floats can be filled with nans out = out.masked_fill(nan_mask, torch.tensor(float("nan"))) @@ -250,11 +286,19 @@ def on_predict_batch_end( # mask non-predictions if isinstance(out, (list, tuple)): out = [ - o.masked_fill(nan_mask.unsqueeze(-1), torch.tensor(float("nan"))) if o.dtype == torch.float else o + ( + o.masked_fill( + nan_mask.unsqueeze(-1), torch.tensor(float("nan")) + ) + if o.dtype == torch.float + else o + ) for o in out ] elif out.dtype == torch.float: - out = out.masked_fill(nan_mask.unsqueeze(-1), torch.tensor(float("nan"))) + out = out.masked_fill( + nan_mask.unsqueeze(-1), torch.tensor(float("nan")) + ) elif self.mode == "raw": pass else: @@ -279,17 +323,32 @@ def on_predict_batch_end( out = Prediction(**out) # write to disk if self.output_dir is not None: - super().on_predict_batch_end(trainer, pl_module, out, batch, batch_idx, dataloader_idx) + super().on_predict_batch_end( + trainer, pl_module, out, batch, batch_idx, dataloader_idx + ) - def write_on_batch_end(self, trainer, pl_module, prediction, batch_indices, batch, batch_idx, dataloader_idx): - torch.save(prediction, os.path.join(self.output_dir, f"predictions_{batch_idx}.pt")) + def write_on_batch_end( + self, + trainer, + pl_module, + prediction, + batch_indices, + batch, + batch_idx, + dataloader_idx, + ): + torch.save( + prediction, os.path.join(self.output_dir, f"predictions_{batch_idx}.pt") + ) self._reset_data() def write_on_epoch_end(self, trainer, pl_module, predictions, batch_indices): torch.save(predictions, os.path.join(self.output_dir, "predictions.pt")) self._reset_data() - def on_predict_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: + def on_predict_epoch_end( + self, trainer: "pl.Trainer", pl_module: "pl.LightningModule" + ) -> None: output = self._output if len(output) > 0: # concatenate output (of different batches) @@ -299,7 +358,10 @@ def on_predict_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningMo and len(output[0]) > 0 and isinstance(output[0][0], torch.Tensor) ): - output = [_torch_cat_na([out[idx] for out in output]) for idx in range(len(output[0]))] + output = [ + _torch_cat_na([out[idx] for out in output]) + for idx in range(len(output[0])) + ] else: output = _torch_cat_na(output) elif self.mode == "raw": @@ -307,7 +369,12 @@ def on_predict_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningMo # if len(output) > 0: # generate output - if self.return_x or self.return_index or self.return_decoder_lengths or self.return_y: + if ( + self.return_x + or self.return_index + or self.return_decoder_lengths + or self.return_y + ): output = dict(output=output) if self.return_x: output["x"] = _concatenate_output(self._x_list) @@ -329,7 +396,12 @@ def on_predict_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningMo # write to disk if self.interval.on_epoch: - self.write_on_epoch_end(trainer, pl_module, self._output, trainer.predict_loop.epoch_batch_indices) + self.write_on_epoch_end( + trainer, + pl_module, + self._output, + trainer.predict_loop.epoch_batch_indices, + ) self._reset_data(result=False) @property @@ -484,7 +556,11 @@ def __init__( ) self.save_hyperparameters( - {name: val for name, val in init_args.items() if name not in self.hparams and name not in ["self"]} + { + name: val + for name, val in init_args.items() + if name not in self.hparams and name not in ["self"] + } ) # update log interval if not defined @@ -493,16 +569,26 @@ def __init__( if not hasattr(self, "loss"): if isinstance(loss, (tuple, list)): - self.loss = MultiLoss(metrics=[convert_torchmetric_to_pytorch_forecasting_metric(l) for l in loss]) + self.loss = MultiLoss( + metrics=[ + convert_torchmetric_to_pytorch_forecasting_metric(l) + for l in loss + ] + ) else: self.loss = convert_torchmetric_to_pytorch_forecasting_metric(loss) if not hasattr(self, "logging_metrics"): self.logging_metrics = nn.ModuleList( - [convert_torchmetric_to_pytorch_forecasting_metric(l) for l in logging_metrics] + [ + convert_torchmetric_to_pytorch_forecasting_metric(l) + for l in logging_metrics + ] ) if not hasattr(self, "output_transformer"): self.output_transformer = output_transformer - if not hasattr(self, "optimizer"): # callables are removed from hyperparameters, so better to save them + if not hasattr( + self, "optimizer" + ): # callables are removed from hyperparameters, so better to save them self.optimizer = self.hparams.optimizer if not hasattr(self, "dataset_parameters"): self.dataset_parameters = dataset_parameters @@ -587,12 +673,16 @@ def transform_output( encoder=self.output_transformer.normalizers, # need to use normalizer per encoder ) else: - out = loss.rescale_parameters(prediction, target_scale=target_scale, encoder=self.output_transformer) + out = loss.rescale_parameters( + prediction, target_scale=target_scale, encoder=self.output_transformer + ) return out @staticmethod def deduce_default_output_parameters( - dataset: TimeSeriesDataSet, kwargs: Dict[str, Any], default_loss: MultiHorizonMetric = None + dataset: TimeSeriesDataSet, + kwargs: Dict[str, Any], + default_loss: MultiHorizonMetric = None, ) -> Dict[str, Any]: """ Deduce default parameters for output for `from_dataset()` method. @@ -633,7 +723,9 @@ def get_output_size(normalizer, loss): if isinstance(loss, MultiLoss) and "output_size" not in kwargs: new_kwargs["output_size"] = [ get_output_size(normalizer, l) - for normalizer, l in zip(dataset.target_normalizer.normalizers, loss.metrics) + for normalizer, l in zip( + dataset.target_normalizer.normalizers, loss.metrics + ) ] elif "output_size" not in kwargs: new_kwargs["output_size"] = get_output_size(dataset.target_normalizer, loss) @@ -659,7 +751,9 @@ def on_train_epoch_end(self): self.training_step_outputs.clear() def predict_step(self, batch, batch_idx): - predict_callback = [c for c in self.trainer.callbacks if isinstance(c, PredictCallback)][0] + predict_callback = [ + c for c in self.trainer.callbacks if isinstance(c, PredictCallback) + ][0] x, y = batch _, out = self.step(x, y, batch_idx, **predict_callback.predict_kwargs) return out # need to return output to be able to use predict callback @@ -712,8 +806,12 @@ def create_log( Dict[str, Any]: log dictionary to be returned by training and validation steps """ - prediction_kwargs = {} if prediction_kwargs is None else deepcopy(prediction_kwargs) - quantiles_kwargs = {} if quantiles_kwargs is None else deepcopy(quantiles_kwargs) + prediction_kwargs = ( + {} if prediction_kwargs is None else deepcopy(prediction_kwargs) + ) + quantiles_kwargs = ( + {} if quantiles_kwargs is None else deepcopy(quantiles_kwargs) + ) # log if isinstance(self.loss, DistributionLoss): prediction_kwargs.setdefault("n_samples", 20) @@ -724,12 +822,20 @@ def create_log( self.log_metrics(x, y, out, prediction_kwargs=prediction_kwargs) if self.log_interval > 0: self.log_prediction( - x, out, batch_idx, prediction_kwargs=prediction_kwargs, quantiles_kwargs=quantiles_kwargs + x, + out, + batch_idx, + prediction_kwargs=prediction_kwargs, + quantiles_kwargs=quantiles_kwargs, ) return {} def step( - self, x: Dict[str, torch.Tensor], y: Tuple[torch.Tensor, torch.Tensor], batch_idx: int, **kwargs + self, + x: Dict[str, torch.Tensor], + y: Tuple[torch.Tensor, torch.Tensor], + batch_idx: int, + **kwargs, ) -> Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]]: """ Run for each train/val step. @@ -751,7 +857,10 @@ def step( y = ( [ rnn.pack_padded_sequence( - y_part, lengths=x["decoder_lengths"].cpu(), batch_first=True, enforce_sorted=False + y_part, + lengths=x["decoder_lengths"].cpu(), + batch_first=True, + enforce_sorted=False, ) for y_part in y[0] ], @@ -760,7 +869,10 @@ def step( else: y = ( rnn.pack_padded_sequence( - y[0], lengths=x["decoder_lengths"].cpu(), batch_first=True, enforce_sorted=False + y[0], + lengths=x["decoder_lengths"].cpu(), + batch_first=True, + enforce_sorted=False, ), y[1], ) @@ -793,10 +905,15 @@ def step( # select relevant features indices = torch.tensor( - [self.hparams.x_reals.index(name) for name in self.hparams.monotone_constaints.keys()] + [ + self.hparams.x_reals.index(name) + for name in self.hparams.monotone_constaints.keys() + ] ) monotonicity = torch.tensor( - list(self.hparams.monotone_constaints.values()), dtype=gradient.dtype, device=gradient.device + list(self.hparams.monotone_constaints.values()), + dtype=gradient.dtype, + device=gradient.device, ) # add additionl loss if gradient points in wrong direction gradient = gradient[..., indices] * monotonicity[None, None] @@ -807,7 +924,10 @@ def step( if not self.predicting: if isinstance(self.loss, (MASE, MultiLoss)): loss = self.loss( - prediction, y, encoder_target=x["encoder_target"], encoder_lengths=x["encoder_lengths"] + prediction, + y, + encoder_target=x["encoder_target"], + encoder_lengths=x["encoder_lengths"], ) else: loss = self.loss(prediction, y) @@ -822,7 +942,10 @@ def step( prediction = out["prediction"] if not self.predicting: if isinstance(self.loss, (MASE, MultiLoss)): - mase_kwargs = dict(encoder_target=x["encoder_target"], encoder_lengths=x["encoder_lengths"]) + mase_kwargs = dict( + encoder_target=x["encoder_target"], + encoder_lengths=x["encoder_lengths"], + ) loss = self.loss(prediction, y, **mase_kwargs) else: loss = self.loss(prediction, y) @@ -874,7 +997,10 @@ def log_metrics( y_true = (y_part, y[1]) if isinstance(metric, MASE): loss_value = metric( - y_point, y_true, encoder_target=encoder_target, encoder_lengths=x["encoder_lengths"] + y_point, + y_true, + encoder_target=encoder_target, + encoder_lengths=x["encoder_lengths"], ) else: loss_value = metric(y_point, y_true) @@ -968,7 +1094,11 @@ def _logger_supports(self, method: str) -> bool: return hasattr(self.logger.experiment, method) def log_prediction( - self, x: Dict[str, torch.Tensor], out: Dict[str, torch.Tensor], batch_idx: int, **kwargs + self, + x: Dict[str, torch.Tensor], + out: Dict[str, torch.Tensor], + batch_idx: int, + **kwargs, ) -> None: """ Log metrics every training/validation step. @@ -980,10 +1110,14 @@ def log_prediction( **kwargs: paramters to pass to ``plot_prediction`` """ # log single prediction figure - if (batch_idx % self.log_interval == 0 or self.log_interval < 1.0) and self.log_interval > 0: + if ( + batch_idx % self.log_interval == 0 or self.log_interval < 1.0 + ) and self.log_interval > 0: if self.log_interval < 1.0: # log multiple steps log_indices = torch.arange( - 0, len(x["encoder_lengths"]), max(1, round(self.log_interval * len(x["encoder_lengths"]))) + 0, + len(x["encoder_lengths"]), + max(1, round(self.log_interval * len(x["encoder_lengths"]))), ) else: log_indices = [0] @@ -998,7 +1132,9 @@ def log_prediction( return None for idx in log_indices: - fig = self.plot_prediction(x, out, idx=idx, add_loss_to_title=True, **kwargs) + fig = self.plot_prediction( + x, out, idx=idx, add_loss_to_title=True, **kwargs + ) tag = f"{self.current_stage} prediction" if self.training: tag += f" of item {idx} in global batch {self.global_step}" @@ -1060,7 +1196,9 @@ def plot_prediction( encoder_targets = to_list(x["encoder_target"]) decoder_targets = to_list(x["decoder_target"]) - y_raws = to_list(out["prediction"]) # raw predictions - used for calculating loss + y_raws = to_list( + out["prediction"] + ) # raw predictions - used for calculating loss y_hats = to_list(self.to_prediction(out, **prediction_kwargs)) y_quantiles = to_list(self.to_quantiles(out, **quantiles_kwargs)) @@ -1074,7 +1212,11 @@ def plot_prediction( y = torch.cat( ( y_all[: x["encoder_lengths"][idx]], - y_all[max_encoder_length : (max_encoder_length + x["decoder_lengths"][idx])], + y_all[ + max_encoder_length : ( + max_encoder_length + x["decoder_lengths"][idx] + ) + ], ), ) # move predictions to cpu @@ -1115,12 +1257,25 @@ def plot_prediction( plotter(x_pred, y_hat, label="predicted", c=pred_color) # plot predicted quantiles - plotter(x_pred, y_quantile[:, y_quantile.shape[1] // 2], c=pred_color, alpha=0.15) + plotter( + x_pred, + y_quantile[:, y_quantile.shape[1] // 2], + c=pred_color, + alpha=0.15, + ) for i in range(y_quantile.shape[1] // 2): if len(x_pred) > 1: - ax.fill_between(x_pred, y_quantile[:, i], y_quantile[:, -i - 1], alpha=0.15, fc=pred_color) + ax.fill_between( + x_pred, + y_quantile[:, i], + y_quantile[:, -i - 1], + alpha=0.15, + fc=pred_color, + ) else: - quantiles = torch.tensor([[y_quantile[0, i]], [y_quantile[0, -i - 1]]]) + quantiles = torch.tensor( + [[y_quantile[0, i]], [y_quantile[0, -i - 1]]] + ) ax.errorbar( x_pred, y[[-n_pred]], @@ -1137,9 +1292,13 @@ def plot_prediction( elif isinstance(add_loss_to_title, Metric): loss = add_loss_to_title else: - raise ValueError(f"add_loss_to_title '{add_loss_to_title}'' is unkown") + raise ValueError( + f"add_loss_to_title '{add_loss_to_title}'' is unkown" + ) if isinstance(loss, MASE): - loss_value = loss(y_raw[None], (y[-n_pred:][None], None), y[:n_pred][None]) + loss_value = loss( + y_raw[None], (y[-n_pred:][None], None), y[:n_pred][None] + ) elif isinstance(loss, Metric): try: loss_value = loss(y_raw[None], (y[-n_pred:][None], None)) @@ -1168,7 +1327,9 @@ def log_gradient_flow(self, named_parameters: Dict[str, torch.Tensor]) -> None: if p.grad is not None and p.requires_grad and "bias" not in name: layers.append(name) ave_grads.append(p.grad.abs().cpu().mean()) - self.logger.experiment.add_histogram(tag=name, values=p.grad, global_step=self.global_step) + self.logger.experiment.add_histogram( + tag=name, values=p.grad, global_step=self.global_step + ) mpl_available = _check_matplotlib("log_gradient_flow", raise_error=False) @@ -1184,7 +1345,9 @@ def log_gradient_flow(self, named_parameters: Dict[str, torch.Tensor]) -> None: ax.set_ylabel("Average gradient") ax.set_yscale("log") ax.set_title("Gradient flow") - self.logger.experiment.add_figure("Gradient flow", fig, global_step=self.global_step) + self.logger.experiment.add_figure( + "Gradient flow", fig, global_step=self.global_step + ) def on_after_backward(self): """ @@ -1222,17 +1385,26 @@ def configure_optimizers(self): if callable(self.optimizer): try: optimizer = self.optimizer( - self.parameters(), lr=lr, weight_decay=self.hparams.weight_decay, **optimizer_params + self.parameters(), + lr=lr, + weight_decay=self.hparams.weight_decay, + **optimizer_params, ) except TypeError: # in case there is no weight decay optimizer = self.optimizer(self.parameters(), lr=lr, **optimizer_params) elif self.hparams.optimizer == "adam": optimizer = torch.optim.Adam( - self.parameters(), lr=lr, weight_decay=self.hparams.weight_decay, **optimizer_params + self.parameters(), + lr=lr, + weight_decay=self.hparams.weight_decay, + **optimizer_params, ) elif self.hparams.optimizer == "adamw": optimizer = torch.optim.AdamW( - self.parameters(), lr=lr, weight_decay=self.hparams.weight_decay, **optimizer_params + self.parameters(), + lr=lr, + weight_decay=self.hparams.weight_decay, + **optimizer_params, ) elif self.hparams.optimizer == "ranger": if not ptopt_in_env: @@ -1251,46 +1423,74 @@ def configure_optimizers(self): elif self.trainer.limit_train_batches is not None: # if finding limiting train batches, set iterations to it optimizer_params.setdefault( - "num_iterations", min(self.trainer.num_training_batches, self.trainer.limit_train_batches) + "num_iterations", + min( + self.trainer.num_training_batches, + self.trainer.limit_train_batches, + ), ) else: # if finding not limiting train batches, set iterations to dataloader length - optimizer_params.setdefault("num_iterations", self.trainer.num_training_batches) - optimizer = Ranger21(self.parameters(), lr=lr, weight_decay=self.hparams.weight_decay, **optimizer_params) + optimizer_params.setdefault( + "num_iterations", self.trainer.num_training_batches + ) + optimizer = Ranger21( + self.parameters(), + lr=lr, + weight_decay=self.hparams.weight_decay, + **optimizer_params, + ) elif self.hparams.optimizer == "sgd": optimizer = torch.optim.SGD( - self.parameters(), lr=lr, weight_decay=self.hparams.weight_decay, **optimizer_params + self.parameters(), + lr=lr, + weight_decay=self.hparams.weight_decay, + **optimizer_params, ) elif hasattr(torch.optim, self.hparams.optimizer): try: optimizer = getattr(torch.optim, self.hparams.optimizer)( - self.parameters(), lr=lr, weight_decay=self.hparams.weight_decay, **optimizer_params + self.parameters(), + lr=lr, + weight_decay=self.hparams.weight_decay, + **optimizer_params, ) except TypeError: # in case there is no weight decay - optimizer = getattr(torch.optim, self.hparams.optimizer)(self.parameters(), lr=lr, **optimizer_params) + optimizer = getattr(torch.optim, self.hparams.optimizer)( + self.parameters(), lr=lr, **optimizer_params + ) elif ptopt_in_env: import pytorch_optimizer if hasattr(pytorch_optimizer, self.hparams.optimizer): try: optimizer = getattr(pytorch_optimizer, self.hparams.optimizer)( - self.parameters(), lr=lr, weight_decay=self.hparams.weight_decay, **optimizer_params + self.parameters(), + lr=lr, + weight_decay=self.hparams.weight_decay, + **optimizer_params, ) except TypeError: # in case there is no weight decay optimizer = getattr(pytorch_optimizer, self.hparams.optimizer)( self.parameters(), lr=lr, **optimizer_params ) else: - raise ValueError(f"Optimizer of self.hparams.optimizer={self.hparams.optimizer} unknown") + raise ValueError( + f"Optimizer of self.hparams.optimizer={self.hparams.optimizer} unknown" + ) else: - raise ValueError(f"Optimizer of self.hparams.optimizer={self.hparams.optimizer} unknown") + raise ValueError( + f"Optimizer of self.hparams.optimizer={self.hparams.optimizer} unknown" + ) # set scheduler if isinstance(lrs, (list, tuple)): # change for each epoch # normalize lrs lrs = np.array(lrs) / lrs[0] scheduler_config = { - "scheduler": LambdaLR(optimizer, lambda epoch: lrs[min(epoch, len(lrs) - 1)]), + "scheduler": LambdaLR( + optimizer, lambda epoch: lrs[min(epoch, len(lrs) - 1)] + ), "interval": "epoch", "frequency": 1, "strict": False, @@ -1338,7 +1538,9 @@ def from_dataset(cls, dataset: TimeSeriesDataSet, **kwargs) -> LightningModule: net.loss, MultiLoss ), f"multiple targets require loss to be MultiLoss but found {net.loss}" else: - assert not isinstance(net.loss, MultiLoss), "MultiLoss not compatible with single target" + assert not isinstance( + net.loss, MultiLoss + ), "MultiLoss not compatible with single target" return net @@ -1349,9 +1551,13 @@ def on_save_checkpoint(self, checkpoint: Dict[str, Any]) -> None: # hyper parameters are passed as arguments directly and not as single dictionary checkpoint["hparams_name"] = "kwargs" # save specials - checkpoint[self.CHECKPOINT_HYPER_PARAMS_SPECIAL_KEY] = {k: getattr(self, k) for k in self.hparams_special} + checkpoint[self.CHECKPOINT_HYPER_PARAMS_SPECIAL_KEY] = { + k: getattr(self, k) for k in self.hparams_special + } # add special hparams them back to save the hparams correctly for checkpoint - checkpoint[self.CHECKPOINT_HYPER_PARAMS_KEY].update(checkpoint[self.CHECKPOINT_HYPER_PARAMS_SPECIAL_KEY]) + checkpoint[self.CHECKPOINT_HYPER_PARAMS_KEY].update( + checkpoint[self.CHECKPOINT_HYPER_PARAMS_SPECIAL_KEY] + ) @property def target_names(self) -> List[str]: @@ -1390,7 +1596,10 @@ def to_prediction(self, out: Dict[str, Any], use_metric: bool = True, **kwargs): # if samples were already drawn directly take mean # todo: support classification if isinstance(self.loss, MultiLoss): - out = [Metric.to_prediction(loss, out["prediction"][idx]) for idx, loss in enumerate(self.loss)] + out = [ + Metric.to_prediction(loss, out["prediction"][idx]) + for idx, loss in enumerate(self.loss) + ] else: out = Metric.to_prediction(self.loss, out["prediction"]) else: @@ -1419,12 +1628,18 @@ def to_quantiles(self, out: Dict[str, Any], use_metric: bool = True, **kwargs): # todo: support classification if isinstance(self.loss, MultiLoss): out = [ - Metric.to_quantiles(loss, out["prediction"][idx], quantiles=kwargs.get("quantiles", loss.quantiles)) + Metric.to_quantiles( + loss, + out["prediction"][idx], + quantiles=kwargs.get("quantiles", loss.quantiles), + ) for idx, loss in enumerate(self.loss) ] else: out = Metric.to_quantiles( - self.loss, out["prediction"], quantiles=kwargs.get("quantiles", self.loss.quantiles) + self.loss, + out["prediction"], + quantiles=kwargs.get("quantiles", self.loss.quantiles), ) else: try: @@ -1479,9 +1694,13 @@ def predict( """ # convert to dataloader if isinstance(data, pd.DataFrame): - data = TimeSeriesDataSet.from_parameters(self.dataset_parameters, data, predict=True) + data = TimeSeriesDataSet.from_parameters( + self.dataset_parameters, data, predict=True + ) if isinstance(data, TimeSeriesDataSet): - dataloader = data.to_dataloader(batch_size=batch_size, train=False, num_workers=num_workers) + dataloader = data.to_dataloader( + batch_size=batch_size, train=False, num_workers=num_workers + ) else: dataloader = data @@ -1490,7 +1709,9 @@ def predict( mode_kwargs = {} # ensure passed dataloader is correct - assert isinstance(dataloader.dataset, TimeSeriesDataSet), "dataset behind dataloader mut be TimeSeriesDataSet" + assert isinstance( + dataloader.dataset, TimeSeriesDataSet + ), "dataset behind dataloader mut be TimeSeriesDataSet" predict_callback = PredictCallback( mode=mode, @@ -1505,14 +1726,18 @@ def predict( ) if trainer_kwargs is None: trainer_kwargs = {} - trainer_kwargs.setdefault("callbacks", trainer_kwargs.get("callbacks", []) + [predict_callback]) + trainer_kwargs.setdefault( + "callbacks", trainer_kwargs.get("callbacks", []) + [predict_callback] + ) trainer_kwargs.setdefault("enable_progress_bar", False) trainer_kwargs.setdefault("inference_mode", False) assert ( "fast_dev_run" not in trainer_kwargs ), "fast_dev_run should be passed as argument to predict and not in trainer_kwargs" log_level_lighting = logging.getLogger("lightning").getEffectiveLevel() - log_level_pytorch_lightning = logging.getLogger("pytorch_lightning").getEffectiveLevel() + log_level_pytorch_lightning = logging.getLogger( + "pytorch_lightning" + ).getEffectiveLevel() logging.getLogger("lightning").setLevel(logging.WARNING) logging.getLogger("pytorch_lightning").setLevel(logging.WARNING) trainer = Trainer(fast_dev_run=fast_dev_run, **trainer_kwargs) @@ -1559,12 +1784,19 @@ def predict_dependency( """ values = np.asarray(values) if isinstance(data, pd.DataFrame): # convert to dataframe - data = TimeSeriesDataSet.from_parameters(self.dataset_parameters, data, predict=True) + data = TimeSeriesDataSet.from_parameters( + self.dataset_parameters, data, predict=True + ) elif isinstance(data, DataLoader): data = data.dataset results = [] - progress_bar = tqdm(desc="Predict", unit=" batches", total=len(values), disable=not show_progress_bar) + progress_bar = tqdm( + desc="Predict", + unit=" batches", + total=len(values), + disable=not show_progress_bar, + ) for idx, value in enumerate(values): # set values data.set_overwrite_values(variable=variable, values=value, target=target) @@ -1572,7 +1804,9 @@ def predict_dependency( pred_kwargs = deepcopy(kwargs) pred_kwargs.setdefault("mode", "prediction") - if idx == 0 and mode == "dataframe": # need index for returning as dataframe + if ( + idx == 0 and mode == "dataframe" + ): # need index for returning as dataframe res = self.predict(data, return_index=True, **pred_kwargs) results.append(res.output) else: @@ -1587,7 +1821,9 @@ def predict_dependency( # convert results to requested output format if mode == "series": - results = results[:, ~torch.isnan(results[0])].mean(1) # average samples and prediction horizon + results = results[:, ~torch.isnan(results[0])].mean( + 1 + ) # average samples and prediction horizon results = pd.Series(results.cpu().numpy(), index=values) elif mode == "dataframe": @@ -1603,16 +1839,24 @@ def predict_dependency( .assign(prediction=results.flatten().cpu().numpy()) ) dependencies[variable] = values.repeat(len(data)) - first_prediction = dependencies.groupby(data.group_ids, observed=True).prediction.transform("first") - dependencies["normalized_prediction"] = dependencies["prediction"] / first_prediction - dependencies["id"] = dependencies.groupby(data.group_ids, observed=True).ngroup() + first_prediction = dependencies.groupby( + data.group_ids, observed=True + ).prediction.transform("first") + dependencies["normalized_prediction"] = ( + dependencies["prediction"] / first_prediction + ) + dependencies["id"] = dependencies.groupby( + data.group_ids, observed=True + ).ngroup() results = dependencies elif mode == "raw": pass else: - raise ValueError(f"mode {mode} is unknown - see documentation for available modes") + raise ValueError( + f"mode {mode} is unknown - see documentation for available modes" + ) return results @@ -1691,12 +1935,18 @@ def static_variables(self) -> List[str]: @property def encoder_variables(self) -> List[str]: """List of all encoder variables in model (excluding static variables)""" - return self.hparams.time_varying_categoricals_encoder + self.hparams.time_varying_reals_encoder + return ( + self.hparams.time_varying_categoricals_encoder + + self.hparams.time_varying_reals_encoder + ) @property def decoder_variables(self) -> List[str]: """List of all decoder variables in model (excluding static variables)""" - return self.hparams.time_varying_categoricals_decoder + self.hparams.time_varying_reals_decoder + return ( + self.hparams.time_varying_categoricals_decoder + + self.hparams.time_varying_reals_decoder + ) @property def categorical_groups_mapping(self) -> Dict[str, str]: @@ -1727,7 +1977,8 @@ def from_dataset( # assert fixed encoder and decoder length for the moment if allowed_encoder_known_variable_names is None: allowed_encoder_known_variable_names = ( - dataset._time_varying_known_categoricals + dataset._time_varying_known_reals + dataset._time_varying_known_categoricals + + dataset._time_varying_known_reals ) # embeddings @@ -1757,7 +2008,9 @@ def from_dataset( time_varying_categoricals_decoder=dataset._time_varying_known_categoricals, static_reals=dataset._static_reals, time_varying_reals_encoder=[ - name for name in dataset._time_varying_known_reals if name in allowed_encoder_known_variable_names + name + for name in dataset._time_varying_known_reals + if name in allowed_encoder_known_variable_names ] + dataset._time_varying_unknown_reals, time_varying_reals_decoder=dataset._time_varying_known_reals, @@ -1795,8 +2048,12 @@ def extract_features( x_cat = x["decoder_cat"] x_cont = x["decoder_cont"] elif period == "all": - x_cat = torch.cat([x["encoder_cat"], x["decoder_cat"]], dim=1) # concatenate in time dimension - x_cont = torch.cat([x["encoder_cont"], x["decoder_cont"]], dim=1) # concatenate in time dimension + x_cat = torch.cat( + [x["encoder_cat"], x["decoder_cat"]], dim=1 + ) # concatenate in time dimension + x_cont = torch.cat( + [x["encoder_cont"], x["decoder_cont"]], dim=1 + ) # concatenate in time dimension else: raise ValueError(f"Unknown type: {type}") @@ -1868,7 +2125,10 @@ def calculate_prediction_actual_by_variable( reals = x["decoder_cont"] for idx, name in enumerate(self.hparams.x_reals): averages_actual[name], support[name] = groupby_apply( - (reals[..., idx][mask] * positive_bins / std).round().clamp(-positive_bins, positive_bins).long() + (reals[..., idx][mask] * positive_bins / std) + .round() + .clamp(-positive_bins, positive_bins) + .long() + positive_bins, y_flat, bins=bins, @@ -1876,7 +2136,10 @@ def calculate_prediction_actual_by_variable( return_histogram=True, ) averages_prediction[name], _ = groupby_apply( - (reals[..., idx][mask] * positive_bins / std).round().clamp(-positive_bins, positive_bins).long() + (reals[..., idx][mask] * positive_bins / std) + .round() + .clamp(-positive_bins, positive_bins) + .long() + positive_bins, y_pred_flat, bins=bins, @@ -1886,7 +2149,9 @@ def calculate_prediction_actual_by_variable( # categorical_variables cats = x["decoder_cat"] - for idx, name in enumerate(self.hparams.x_categoricals): # todo: make it work for grouped categoricals + for idx, name in enumerate( + self.hparams.x_categoricals + ): # todo: make it work for grouped categoricals reduction = "sum" name = self.categorical_groups_mapping.get(name, name) averages_actual_cat, support_cat = groupby_apply( @@ -1931,7 +2196,11 @@ def calculate_prediction_actual_by_variable( } def plot_prediction_actual_by_variable( - self, data: Dict[str, Dict[str, torch.Tensor]], name: str = None, ax=None, log_scale: bool = None + self, + data: Dict[str, Dict[str, torch.Tensor]], + name: str = None, + ax=None, + log_scale: bool = None, ): """ Plot predicions and actual averages by variables @@ -1955,7 +2224,10 @@ def plot_prediction_actual_by_variable( from matplotlib import pyplot as plt if name is None: # run recursion for figures - figs = {name: self.plot_prediction_actual_by_variable(data, name) for name in data["support"].keys()} + figs = { + name: self.plot_prediction_actual_by_variable(data, name) + for name in data["support"].keys() + } return figs else: # create figure @@ -1998,7 +2270,9 @@ def plot_prediction_actual_by_variable( # create x if name in to_list(self.dataset_parameters["target"]): if isinstance(self.output_transformer, MultiNormalizer): - scaler = self.output_transformer.normalizers[self.dataset_parameters["target"].index(name)] + scaler = self.output_transformer.normalizers[ + self.dataset_parameters["target"].index(name) + ] else: scaler = self.output_transformer else: @@ -2020,19 +2294,30 @@ def plot_prediction_actual_by_variable( elif name in self.hparams.embedding_labels: # sort values from lowest to highest sorting = values_actual.argsort() - labels = np.asarray(list(self.hparams.embedding_labels[name].keys()))[support_non_zero][sorting] + labels = np.asarray(list(self.hparams.embedding_labels[name].keys()))[ + support_non_zero + ][sorting] values_actual = values_actual[sorting] values_prediction = values_prediction[sorting] support = support[sorting] # cut entries if there are too many categories to fit nicely on the plot maxsize = 50 if values_actual.size > maxsize: - values_actual = np.concatenate([values_actual[: maxsize // 2], values_actual[-maxsize // 2 :]]) + values_actual = np.concatenate( + [values_actual[: maxsize // 2], values_actual[-maxsize // 2 :]] + ) values_prediction = np.concatenate( - [values_prediction[: maxsize // 2], values_prediction[-maxsize // 2 :]] + [ + values_prediction[: maxsize // 2], + values_prediction[-maxsize // 2 :], + ] + ) + labels = np.concatenate( + [labels[: maxsize // 2], labels[-maxsize // 2 :]] + ) + support = np.concatenate( + [support[: maxsize // 2], support[-maxsize // 2 :]] ) - labels = np.concatenate([labels[: maxsize // 2], labels[-maxsize // 2 :]]) - support = np.concatenate([support[: maxsize // 2], support[-maxsize // 2 :]]) # plot for each category x = np.arange(values_actual.size) x_step = 1 @@ -2088,13 +2373,21 @@ def from_dataset( """ kwargs.setdefault("target", dataset.target) # check that lags for targets are the same - lags = {name: lag for name, lag in dataset._lags.items() if name in dataset.target_names} # filter for targets + lags = { + name: lag + for name, lag in dataset._lags.items() + if name in dataset.target_names + } # filter for targets target0 = dataset.target_names[0] lag = set(lags.get(target0, [])) for target in dataset.target_names: - assert lag == set(lags.get(target, [])), f"all target lags in dataset must be the same but found {lags}" + assert lag == set( + lags.get(target, []) + ), f"all target lags in dataset must be the same but found {lags}" - kwargs.setdefault("target_lags", {name: dataset._get_lagged_names(name) for name in lags}) + kwargs.setdefault( + "target_lags", {name: dataset._get_lagged_names(name) for name in lags} + ) return super().from_dataset(dataset, **kwargs) def output_to_prediction( @@ -2121,34 +2414,46 @@ def output_to_prediction( """ single_prediction = to_list(normalized_prediction_parameters)[0].ndim == 2 if single_prediction: # add time dimension as it is expected - normalized_prediction_parameters = apply_to_list(normalized_prediction_parameters, lambda x: x.unsqueeze(1)) + normalized_prediction_parameters = apply_to_list( + normalized_prediction_parameters, lambda x: x.unsqueeze(1) + ) # transform into real space prediction_parameters = self.transform_output( - prediction=normalized_prediction_parameters, target_scale=target_scale, **kwargs + prediction=normalized_prediction_parameters, + target_scale=target_scale, + **kwargs, ) # todo: handle classification # sample value(s) from distribution and select first sample if isinstance(self.loss, DistributionLoss) or ( - isinstance(self.loss, MultiLoss) and isinstance(self.loss[0], DistributionLoss) + isinstance(self.loss, MultiLoss) + and isinstance(self.loss[0], DistributionLoss) ): # todo: handle mixed losses if n_samples > 1: prediction_parameters = apply_to_list( - prediction_parameters, lambda x: x.reshape(int(x.size(0) / n_samples), n_samples, -1) + prediction_parameters, + lambda x: x.reshape(int(x.size(0) / n_samples), n_samples, -1), ) prediction = self.loss.sample(prediction_parameters, 1) - prediction = apply_to_list(prediction, lambda x: x.reshape(x.size(0) * n_samples, 1, -1)) + prediction = apply_to_list( + prediction, lambda x: x.reshape(x.size(0) * n_samples, 1, -1) + ) else: prediction = self.loss.sample(normalized_prediction_parameters, 1) else: prediction = prediction_parameters # normalize prediction prediction - normalized_prediction = self.output_transformer.transform(prediction, target_scale=target_scale) + normalized_prediction = self.output_transformer.transform( + prediction, target_scale=target_scale + ) if isinstance(normalized_prediction, list): input_target = torch.cat(normalized_prediction, dim=-1) else: - input_target = normalized_prediction # set next input target to normalized prediction + input_target = ( + normalized_prediction # set next input target to normalized prediction + ) # remove time dimension if single_prediction: @@ -2280,7 +2585,10 @@ def decode_one(idx, lagged_targets, hidden_state): for idx in range(n_decoder_steps): # get lagged targets current_target, current_hidden_state = decode_one( - idx, lagged_targets=normalized_output, hidden_state=current_hidden_state, **kwargs + idx, + lagged_targets=normalized_output, + hidden_state=current_hidden_state, + **kwargs, ) # get prediction and its normalized version for the next step @@ -2296,7 +2604,10 @@ def decode_one(idx, lagged_targets, hidden_state): output = torch.stack(output, dim=1) else: # for multi-targets - output = [torch.stack([out[idx] for out in output], dim=1) for idx in range(len(self.target_positions))] + output = [ + torch.stack([out[idx] for out in output], dim=1) + for idx in range(len(self.target_positions)) + ] return output @property @@ -2344,8 +2655,12 @@ def plot_prediction( matplotlib figure """ - prediction_kwargs = {} if prediction_kwargs is None else deepcopy(prediction_kwargs) - quantiles_kwargs = {} if quantiles_kwargs is None else deepcopy(quantiles_kwargs) + prediction_kwargs = ( + {} if prediction_kwargs is None else deepcopy(prediction_kwargs) + ) + quantiles_kwargs = ( + {} if quantiles_kwargs is None else deepcopy(quantiles_kwargs) + ) # get predictions if isinstance(self.loss, DistributionLoss): @@ -2377,7 +2692,9 @@ def lagged_target_positions(self) -> Dict[int, torch.LongTensor]: ) -class AutoRegressiveBaseModelWithCovariates(BaseModelWithCovariates, AutoRegressiveBaseModel): +class AutoRegressiveBaseModelWithCovariates( + BaseModelWithCovariates, AutoRegressiveBaseModel +): """ Model with additional methods for autoregressive models with covariates. diff --git a/pytorch_forecasting/models/baseline.py b/pytorch_forecasting/models/baseline.py index 142ff3cda..2a071daed 100644 --- a/pytorch_forecasting/models/baseline.py +++ b/pytorch_forecasting/models/baseline.py @@ -59,11 +59,18 @@ def forward(self, x: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: return self.to_network_output(prediction=prediction) def forward_one_target( - self, encoder_lengths: torch.Tensor, decoder_lengths: torch.Tensor, encoder_target: torch.Tensor + self, + encoder_lengths: torch.Tensor, + decoder_lengths: torch.Tensor, + encoder_target: torch.Tensor, ): max_prediction_length = decoder_lengths.max() - assert encoder_lengths.min() > 0, "Encoder lengths of at least 1 required to obtain last value" - last_values = encoder_target[torch.arange(encoder_target.size(0)), encoder_lengths - 1] + assert ( + encoder_lengths.min() > 0 + ), "Encoder lengths of at least 1 required to obtain last value" + last_values = encoder_target[ + torch.arange(encoder_target.size(0)), encoder_lengths - 1 + ] prediction = last_values[:, None].expand(-1, max_prediction_length) return prediction diff --git a/pytorch_forecasting/models/deepar/_deepar.py b/pytorch_forecasting/models/deepar/_deepar.py index f9bcb186b..c8346367b 100644 --- a/pytorch_forecasting/models/deepar/_deepar.py +++ b/pytorch_forecasting/models/deepar/_deepar.py @@ -26,7 +26,10 @@ MultivariateDistributionLoss, NormalDistributionLoss, ) -from pytorch_forecasting.models.base_model import AutoRegressiveBaseModelWithCovariates, Prediction +from pytorch_forecasting.models.base_model import ( + AutoRegressiveBaseModelWithCovariates, + Prediction, +) from pytorch_forecasting.models.nn import HiddenState, MultiEmbedding, get_rnn from pytorch_forecasting.utils import apply_to_list, to_list @@ -153,15 +156,19 @@ def __init__( ) lagged_target_names = [l for lags in target_lags.values() for l in lags] - assert set(self.encoder_variables) - set(to_list(target)) - set(lagged_target_names) == set( - self.decoder_variables - ) - set(lagged_target_names), "Encoder and decoder variables have to be the same apart from target variable" + assert set(self.encoder_variables) - set(to_list(target)) - set( + lagged_target_names + ) == set(self.decoder_variables) - set( + lagged_target_names + ), "Encoder and decoder variables have to be the same apart from target variable" for targeti in to_list(target): assert ( targeti in time_varying_reals_encoder ), f"target {targeti} has to be real" # todo: remove this restriction assert (isinstance(target, str) and isinstance(loss, DistributionLoss)) or ( - isinstance(target, (list, tuple)) and isinstance(loss, MultiLoss) and len(loss) == len(target) + isinstance(target, (list, tuple)) + and isinstance(loss, MultiLoss) + and len(loss) == len(target) ), "number of targets should be equivalent to number of loss metrics" rnn_class = get_rnn(cell_type) @@ -178,10 +185,15 @@ def __init__( # add linear layers for argument projects if isinstance(target, str): # single target - self.distribution_projector = nn.Linear(self.hparams.hidden_size, len(self.loss.distribution_arguments)) + self.distribution_projector = nn.Linear( + self.hparams.hidden_size, len(self.loss.distribution_arguments) + ) else: # multi target self.distribution_projector = nn.ModuleList( - [nn.Linear(self.hparams.hidden_size, len(args)) for args in self.loss.distribution_arguments] + [ + nn.Linear(self.hparams.hidden_size, len(args)) + for args in self.loss.distribution_arguments + ] ) @classmethod @@ -204,22 +216,33 @@ def from_dataset( """ new_kwargs = {} if dataset.multi_target: - new_kwargs.setdefault("loss", MultiLoss([NormalDistributionLoss()] * len(dataset.target_names))) + new_kwargs.setdefault( + "loss", + MultiLoss([NormalDistributionLoss()] * len(dataset.target_names)), + ) new_kwargs.update(kwargs) assert not isinstance(dataset.target_normalizer, NaNLabelEncoder) and ( not isinstance(dataset.target_normalizer, MultiNormalizer) - or all(not isinstance(normalizer, NaNLabelEncoder) for normalizer in dataset.target_normalizer) + or all( + not isinstance(normalizer, NaNLabelEncoder) + for normalizer in dataset.target_normalizer + ) ), "target(s) should be continuous - categorical targets are not supported" # todo: remove this restriction if isinstance(new_kwargs.get("loss", None), MultivariateDistributionLoss): assert ( dataset.min_prediction_length == dataset.max_prediction_length ), "Multivariate models require constant prediction lenghts" return super().from_dataset( - dataset, allowed_encoder_known_variable_names=allowed_encoder_known_variable_names, **new_kwargs + dataset, + allowed_encoder_known_variable_names=allowed_encoder_known_variable_names, + **new_kwargs, ) def construct_input_vector( - self, x_cat: torch.Tensor, x_cont: torch.Tensor, one_off_target: torch.Tensor = None + self, + x_cat: torch.Tensor, + x_cont: torch.Tensor, + one_off_target: torch.Tensor = None, ) -> torch.Tensor: """ Create input vector into RNN network @@ -271,11 +294,15 @@ def decode_all( hidden_state: HiddenState, lengths: torch.Tensor = None, ): - decoder_output, hidden_state = self.rnn(x, hidden_state, lengths=lengths, enforce_sorted=False) + decoder_output, hidden_state = self.rnn( + x, hidden_state, lengths=lengths, enforce_sorted=False + ) if isinstance(self.hparams.target, str): # single target output = self.distribution_projector(decoder_output) else: - output = [projector(decoder_output) for projector in self.distribution_projector] + output = [ + projector(decoder_output) for projector in self.distribution_projector + ] return output, hidden_state def decode( @@ -292,7 +319,9 @@ def decode( sampling new targets from past predictions iteratively """ if n_samples is None: - output, _ = self.decode_all(input_vector, hidden_state, lengths=decoder_lengths) + output, _ = self.decode_all( + input_vector, hidden_state, lengths=decoder_lengths + ) output = self.transform_output(output, target_scale=target_scale) else: # run in eval, i.e. simulation mode @@ -301,7 +330,9 @@ def decode( # repeat for n_samples input_vector = input_vector.repeat_interleave(n_samples, 0) hidden_state = self.rnn.repeat_interleave(hidden_state, n_samples) - target_scale = apply_to_list(target_scale, lambda x: x.repeat_interleave(n_samples, 0)) + target_scale = apply_to_list( + target_scale, lambda x: x.repeat_interleave(n_samples, 0) + ) # define function to run at every decoding step def decode_one( @@ -315,7 +346,9 @@ def decode_one( if idx > lag: x[:, 0, lag_positions] = lagged_targets[-lag] prediction, hidden_state = self.decode_all(x, hidden_state) - prediction = apply_to_list(prediction, lambda x: x[:, 0]) # select first time step + prediction = apply_to_list( + prediction, lambda x: x[:, 0] + ) # select first time step return prediction, hidden_state # make predictions which are fed into next step @@ -329,10 +362,17 @@ def decode_one( ) # reshape predictions for n_samples: # from n_samples * batch_size x time steps to batch_size x time steps x n_samples - output = apply_to_list(output, lambda x: x.reshape(-1, n_samples, input_vector.size(1)).permute(0, 2, 1)) + output = apply_to_list( + output, + lambda x: x.reshape(-1, n_samples, input_vector.size(1)).permute( + 0, 2, 1 + ), + ) return output - def forward(self, x: Dict[str, torch.Tensor], n_samples: int = None) -> Dict[str, torch.Tensor]: + def forward( + self, x: Dict[str, torch.Tensor], n_samples: int = None + ) -> Dict[str, torch.Tensor]: """ Forward network """ @@ -342,7 +382,9 @@ def forward(self, x: Dict[str, torch.Tensor], n_samples: int = None) -> Dict[str x["decoder_cat"], x["decoder_cont"], one_off_target=x["encoder_cont"][ - torch.arange(x["encoder_cont"].size(0), device=x["encoder_cont"].device), + torch.arange( + x["encoder_cont"].size(0), device=x["encoder_cont"].device + ), x["encoder_lengths"] - 1, self.target_positions.unsqueeze(-1), ].T.contiguous(), @@ -361,7 +403,10 @@ def forward(self, x: Dict[str, torch.Tensor], n_samples: int = None) -> Dict[str return self.to_network_output(prediction=output) def create_log(self, x, y, out, batch_idx): - n_samples = [self.hparams.n_validation_samples, self.hparams.n_plotting_samples][self.training] + n_samples = [ + self.hparams.n_validation_samples, + self.hparams.n_plotting_samples, + ][self.training] log = super().create_log( x, y, diff --git a/pytorch_forecasting/models/mlp/_decodermlp.py b/pytorch_forecasting/models/mlp/_decodermlp.py index 24b72c0f3..4f9dfa5d3 100644 --- a/pytorch_forecasting/models/mlp/_decodermlp.py +++ b/pytorch_forecasting/models/mlp/_decodermlp.py @@ -2,14 +2,22 @@ Simple models based on fully connected networks """ -from typing import Dict, List, Tuple, Union, Optional +from typing import Dict, List, Optional, Tuple, Union import numpy as np import torch from torch import nn from pytorch_forecasting.data import TimeSeriesDataSet -from pytorch_forecasting.metrics import MAE, MAPE, MASE, RMSE, SMAPE, MultiHorizonMetric, QuantileLoss +from pytorch_forecasting.metrics import ( + MAE, + MAPE, + MASE, + RMSE, + SMAPE, + MultiHorizonMetric, + QuantileLoss, +) from pytorch_forecasting.models.base_model import BaseModelWithCovariates from pytorch_forecasting.models.mlp.submodules import FullyConnectedModule from pytorch_forecasting.models.nn.embeddings import MultiEmbedding @@ -148,15 +156,20 @@ def decoder_reals_positions(self) -> List[int]: if name in self.decoder_variables + self.static_variables ] - def forward(self, x: Dict[str, torch.Tensor], n_samples: int = None) -> Dict[str, torch.Tensor]: + def forward( + self, x: Dict[str, torch.Tensor], n_samples: int = None + ) -> Dict[str, torch.Tensor]: """ Forward network """ # x is a batch generated based on the TimeSeriesDataset batch_size = x["decoder_lengths"].size(0) - embeddings = self.input_embeddings(x["decoder_cat"]) # returns dictionary with embedding tensors + embeddings = self.input_embeddings( + x["decoder_cat"] + ) # returns dictionary with embedding tensors network_input = torch.cat( - [x["decoder_cont"][..., self.decoder_reals_positions]] + list(embeddings.values()), + [x["decoder_cont"][..., self.decoder_reals_positions]] + + list(embeddings.values()), dim=-1, ) prediction = self.mlp(network_input.view(-1, self.mlp.input_size)).view( @@ -174,6 +187,8 @@ def forward(self, x: Dict[str, torch.Tensor], n_samples: int = None) -> Dict[str @classmethod def from_dataset(cls, dataset: TimeSeriesDataSet, **kwargs): - new_kwargs = cls.deduce_default_output_parameters(dataset, kwargs, QuantileLoss()) + new_kwargs = cls.deduce_default_output_parameters( + dataset, kwargs, QuantileLoss() + ) kwargs.update(new_kwargs) return super().from_dataset(dataset, **kwargs) diff --git a/pytorch_forecasting/models/mlp/submodules.py b/pytorch_forecasting/models/mlp/submodules.py index 7166eb8a5..b0f32ec8b 100644 --- a/pytorch_forecasting/models/mlp/submodules.py +++ b/pytorch_forecasting/models/mlp/submodules.py @@ -34,7 +34,9 @@ def __init__( module_list.append(nn.LayerNorm(hidden_size)) # hidden layers for _ in range(n_hidden_layers): - module_list.extend([nn.Linear(hidden_size, hidden_size), activation_class()]) + module_list.extend( + [nn.Linear(hidden_size, hidden_size), activation_class()] + ) if dropout is not None: module_list.append(nn.Dropout(dropout)) if norm: diff --git a/pytorch_forecasting/models/nbeats/__init__.py b/pytorch_forecasting/models/nbeats/__init__.py index dcf4e1b34..b3264272d 100644 --- a/pytorch_forecasting/models/nbeats/__init__.py +++ b/pytorch_forecasting/models/nbeats/__init__.py @@ -1,6 +1,10 @@ """N-Beats model for timeseries forecasting without covariates.""" from pytorch_forecasting.models.nbeats._nbeats import NBeats -from pytorch_forecasting.models.nbeats.sub_modules import NBEATSGenericBlock, NBEATSSeasonalBlock, NBEATSTrendBlock +from pytorch_forecasting.models.nbeats.sub_modules import ( + NBEATSGenericBlock, + NBEATSSeasonalBlock, + NBEATSTrendBlock, +) __all__ = ["NBeats", "NBEATSGenericBlock", "NBEATSSeasonalBlock", "NBEATSTrendBlock"] diff --git a/pytorch_forecasting/models/nbeats/_nbeats.py b/pytorch_forecasting/models/nbeats/_nbeats.py index 8d00392cc..0c0d172ba 100644 --- a/pytorch_forecasting/models/nbeats/_nbeats.py +++ b/pytorch_forecasting/models/nbeats/_nbeats.py @@ -11,7 +11,11 @@ from pytorch_forecasting.data.encoders import NaNLabelEncoder from pytorch_forecasting.metrics import MAE, MAPE, MASE, RMSE, SMAPE, MultiHorizonMetric from pytorch_forecasting.models.base_model import BaseModel -from pytorch_forecasting.models.nbeats.sub_modules import NBEATSGenericBlock, NBEATSSeasonalBlock, NBEATSTrendBlock +from pytorch_forecasting.models.nbeats.sub_modules import ( + NBEATSGenericBlock, + NBEATSSeasonalBlock, + NBEATSTrendBlock, +) from pytorch_forecasting.utils._dependencies import _check_matplotlib @@ -156,11 +160,25 @@ def forward(self, x: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: target = x["encoder_cont"][..., 0] timesteps = self.hparams.context_length + self.hparams.prediction_length - generic_forecast = [torch.zeros((target.size(0), timesteps), dtype=torch.float32, device=self.device)] - trend_forecast = [torch.zeros((target.size(0), timesteps), dtype=torch.float32, device=self.device)] - seasonal_forecast = [torch.zeros((target.size(0), timesteps), dtype=torch.float32, device=self.device)] + generic_forecast = [ + torch.zeros( + (target.size(0), timesteps), dtype=torch.float32, device=self.device + ) + ] + trend_forecast = [ + torch.zeros( + (target.size(0), timesteps), dtype=torch.float32, device=self.device + ) + ] + seasonal_forecast = [ + torch.zeros( + (target.size(0), timesteps), dtype=torch.float32, device=self.device + ) + ] forecast = torch.zeros( - (target.size(0), self.hparams.prediction_length), dtype=torch.float32, device=self.device + (target.size(0), self.hparams.prediction_length), + dtype=torch.float32, + device=self.device, ) backcast = target # initialize backcast @@ -185,12 +203,21 @@ def forward(self, x: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: return self.to_network_output( prediction=self.transform_output(forecast, target_scale=x["target_scale"]), - backcast=self.transform_output(prediction=target - backcast, target_scale=x["target_scale"]), - trend=self.transform_output(torch.stack(trend_forecast, dim=0).sum(0), target_scale=x["target_scale"]), + backcast=self.transform_output( + prediction=target - backcast, target_scale=x["target_scale"] + ), + trend=self.transform_output( + torch.stack(trend_forecast, dim=0).sum(0), + target_scale=x["target_scale"], + ), seasonality=self.transform_output( - torch.stack(seasonal_forecast, dim=0).sum(0), target_scale=x["target_scale"] + torch.stack(seasonal_forecast, dim=0).sum(0), + target_scale=x["target_scale"], + ), + generic=self.transform_output( + torch.stack(generic_forecast, dim=0).sum(0), + target_scale=x["target_scale"], ), - generic=self.transform_output(torch.stack(generic_forecast, dim=0).sum(0), target_scale=x["target_scale"]), ) @classmethod @@ -205,11 +232,16 @@ def from_dataset(cls, dataset: TimeSeriesDataSet, **kwargs): Returns: NBeats """ - new_kwargs = {"prediction_length": dataset.max_prediction_length, "context_length": dataset.max_encoder_length} + new_kwargs = { + "prediction_length": dataset.max_prediction_length, + "context_length": dataset.max_encoder_length, + } new_kwargs.update(kwargs) # validate arguments - assert isinstance(dataset.target, str), "only one target is allowed (passed as string to dataset)" + assert isinstance( + dataset.target, str + ), "only one target is allowed (passed as string to dataset)" assert not isinstance( dataset.target_normalizer, NaNLabelEncoder ), "only regression tasks are supported - target must not be categorical" @@ -221,8 +253,12 @@ def from_dataset(cls, dataset: TimeSeriesDataSet, **kwargs): dataset.max_prediction_length == dataset.min_prediction_length ), "only fixed prediction length is allowed, but max_prediction_length != min_prediction_length" - assert dataset.randomize_length is None, "length has to be fixed, but randomize_length is not None" - assert not dataset.add_relative_time_idx, "add_relative_time_idx has to be False" + assert ( + dataset.randomize_length is None + ), "length has to be fixed, but randomize_length is not None" + assert ( + not dataset.add_relative_time_idx + ), "add_relative_time_idx has to be False" assert ( len(dataset.flat_categoricals) == 0 @@ -240,17 +276,26 @@ def step(self, x, y, batch_idx) -> Dict[str, torch.Tensor]: """ log, out = super().step(x, y, batch_idx=batch_idx) - if self.hparams.backcast_loss_ratio > 0 and not self.predicting: # add loss from backcast + if ( + self.hparams.backcast_loss_ratio > 0 and not self.predicting + ): # add loss from backcast backcast = out["backcast"] backcast_weight = ( - self.hparams.backcast_loss_ratio * self.hparams.prediction_length / self.hparams.context_length + self.hparams.backcast_loss_ratio + * self.hparams.prediction_length + / self.hparams.context_length ) backcast_weight = backcast_weight / (backcast_weight + 1) # normalize forecast_weight = 1 - backcast_weight if isinstance(self.loss, MASE): - backcast_loss = self.loss(backcast, x["encoder_target"], x["decoder_target"]) * backcast_weight + backcast_loss = ( + self.loss(backcast, x["encoder_target"], x["decoder_target"]) + * backcast_weight + ) else: - backcast_loss = self.loss(backcast, x["encoder_target"]) * backcast_weight + backcast_loss = ( + self.loss(backcast, x["encoder_target"]) * backcast_weight + ) label = ["val", "train"][self.training] self.log( f"{label}_backcast_loss", @@ -326,10 +371,18 @@ def plot_interpretation( else: fig = ax[0].get_figure() - time = torch.arange(-self.hparams.context_length, self.hparams.prediction_length) + time = torch.arange( + -self.hparams.context_length, self.hparams.prediction_length + ) # plot target vs prediction - ax[0].plot(time, torch.cat([x["encoder_target"][idx], x["decoder_target"][idx]]).detach().cpu(), label="target") + ax[0].plot( + time, + torch.cat([x["encoder_target"][idx], x["decoder_target"][idx]]) + .detach() + .cpu(), + label="target", + ) ax[0].plot( time, torch.cat( diff --git a/pytorch_forecasting/models/nbeats/sub_modules.py b/pytorch_forecasting/models/nbeats/sub_modules.py index e300d452f..b5cff4341 100644 --- a/pytorch_forecasting/models/nbeats/sub_modules.py +++ b/pytorch_forecasting/models/nbeats/sub_modules.py @@ -18,7 +18,9 @@ def linear(input_size, output_size, bias=True, dropout: int = None): return lin -def linspace(backcast_length: int, forecast_length: int, centered: bool = False) -> Tuple[np.ndarray, np.ndarray]: +def linspace( + backcast_length: int, forecast_length: int, centered: bool = False +) -> Tuple[np.ndarray, np.ndarray]: if centered: norm = max(backcast_length, forecast_length) start = -backcast_length @@ -27,7 +29,9 @@ def linspace(backcast_length: int, forecast_length: int, centered: bool = False) norm = backcast_length + forecast_length start = 0 stop = backcast_length + forecast_length - 1 - lin_space = np.linspace(start / norm, stop / norm, backcast_length + forecast_length, dtype=np.float32) + lin_space = np.linspace( + start / norm, stop / norm, backcast_length + forecast_length, dtype=np.float32 + ) b_ls = lin_space[:backcast_length] f_ls = lin_space[backcast_length:] return b_ls, f_ls @@ -97,22 +101,44 @@ def __init__( dropout=dropout, ) - backcast_linspace, forecast_linspace = linspace(backcast_length, forecast_length, centered=False) + backcast_linspace, forecast_linspace = linspace( + backcast_length, forecast_length, centered=False + ) - p1, p2 = (thetas_dim // 2, thetas_dim // 2) if thetas_dim % 2 == 0 else (thetas_dim // 2, thetas_dim // 2 + 1) + p1, p2 = ( + (thetas_dim // 2, thetas_dim // 2) + if thetas_dim % 2 == 0 + else (thetas_dim // 2, thetas_dim // 2 + 1) + ) s1_b = torch.tensor( - [np.cos(2 * np.pi * i * backcast_linspace) for i in self.get_frequencies(p1)], dtype=torch.float32 + [ + np.cos(2 * np.pi * i * backcast_linspace) + for i in self.get_frequencies(p1) + ], + dtype=torch.float32, ) # H/2-1 s2_b = torch.tensor( - [np.sin(2 * np.pi * i * backcast_linspace) for i in self.get_frequencies(p2)], dtype=torch.float32 + [ + np.sin(2 * np.pi * i * backcast_linspace) + for i in self.get_frequencies(p2) + ], + dtype=torch.float32, ) self.register_buffer("S_backcast", torch.cat([s1_b, s2_b])) s1_f = torch.tensor( - [np.cos(2 * np.pi * i * forecast_linspace) for i in self.get_frequencies(p1)], dtype=torch.float32 + [ + np.cos(2 * np.pi * i * forecast_linspace) + for i in self.get_frequencies(p1) + ], + dtype=torch.float32, ) # H/2-1 s2_f = torch.tensor( - [np.sin(2 * np.pi * i * forecast_linspace) for i in self.get_frequencies(p2)], dtype=torch.float32 + [ + np.sin(2 * np.pi * i * forecast_linspace) + for i in self.get_frequencies(p2) + ], + dtype=torch.float32, ) self.register_buffer("S_forecast", torch.cat([s1_f, s2_f])) @@ -126,7 +152,9 @@ def forward(self, x) -> Tuple[torch.Tensor, torch.Tensor]: return backcast, forecast def get_frequencies(self, n): - return np.linspace(0, (self.backcast_length + self.forecast_length) / self.min_period, n) + return np.linspace( + 0, (self.backcast_length + self.forecast_length) / self.min_period, n + ) class NBEATSTrendBlock(NBEATSBlock): @@ -149,13 +177,21 @@ def __init__( dropout=dropout, ) - backcast_linspace, forecast_linspace = linspace(backcast_length, forecast_length, centered=True) - norm = np.sqrt(forecast_length / thetas_dim) # ensure range of predictions is comparable to input + backcast_linspace, forecast_linspace = linspace( + backcast_length, forecast_length, centered=True + ) + norm = np.sqrt( + forecast_length / thetas_dim + ) # ensure range of predictions is comparable to input - coefficients = torch.tensor([backcast_linspace**i for i in range(thetas_dim)], dtype=torch.float32) + coefficients = torch.tensor( + [backcast_linspace**i for i in range(thetas_dim)], dtype=torch.float32 + ) self.register_buffer("T_backcast", coefficients * norm) - coefficients = torch.tensor([forecast_linspace**i for i in range(thetas_dim)], dtype=torch.float32) + coefficients = torch.tensor( + [forecast_linspace**i for i in range(thetas_dim)], dtype=torch.float32 + ) self.register_buffer("T_forecast", coefficients * norm) def forward(self, x) -> Tuple[torch.Tensor, torch.Tensor]: diff --git a/pytorch_forecasting/models/nhits/_nhits.py b/pytorch_forecasting/models/nhits/_nhits.py index 6d7902135..641447c3f 100644 --- a/pytorch_forecasting/models/nhits/_nhits.py +++ b/pytorch_forecasting/models/nhits/_nhits.py @@ -11,7 +11,15 @@ from pytorch_forecasting.data import TimeSeriesDataSet from pytorch_forecasting.data.encoders import NaNLabelEncoder -from pytorch_forecasting.metrics import MAE, MAPE, MASE, RMSE, SMAPE, MultiHorizonMetric, MultiLoss +from pytorch_forecasting.metrics import ( + MAE, + MAPE, + MASE, + RMSE, + SMAPE, + MultiHorizonMetric, + MultiLoss, +) from pytorch_forecasting.models.base_model import BaseModelWithCovariates from pytorch_forecasting.models.nhits.sub_modules import NHiTS as NHiTSModule from pytorch_forecasting.models.nn.embeddings import MultiEmbedding @@ -178,14 +186,20 @@ def __init__( # provide default downsampling sizes n_stacks = len(n_blocks) if pooling_sizes is None: - pooling_sizes = np.exp2(np.round(np.linspace(0.49, np.log2(prediction_length / 2), n_stacks))) + pooling_sizes = np.exp2( + np.round(np.linspace(0.49, np.log2(prediction_length / 2), n_stacks)) + ) pooling_sizes = [int(x) for x in pooling_sizes[::-1]] # remove zero from pooling_sizes pooling_sizes = max(pooling_sizes, [1] * len(pooling_sizes)) if downsample_frequencies is None: - downsample_frequencies = [min(prediction_length, int(np.power(x, 1.5))) for x in pooling_sizes] + downsample_frequencies = [ + min(prediction_length, int(np.power(x, 1.5))) for x in pooling_sizes + ] # remove zero from downsample_frequencies - downsample_frequencies = max(downsample_frequencies, [1] * len(downsample_frequencies)) + downsample_frequencies = max( + downsample_frequencies, [1] * len(downsample_frequencies) + ) # set static hidden size if static_hidden_size is None: @@ -235,8 +249,11 @@ def decoder_covariate_size(self) -> int: Returns: int: size of time-dependent covariates used by the decoder """ - return len(set(self.hparams.time_varying_reals_decoder) - set(self.target_names)) + sum( - self.embeddings.output_size[name] for name in self.hparams.time_varying_categoricals_decoder + return len( + set(self.hparams.time_varying_reals_decoder) - set(self.target_names) + ) + sum( + self.embeddings.output_size[name] + for name in self.hparams.time_varying_categoricals_decoder ) @property @@ -246,8 +263,11 @@ def encoder_covariate_size(self) -> int: Returns: int: size of time-dependent covariates used by the encoder """ - return len(set(self.hparams.time_varying_reals_encoder) - set(self.target_names)) + sum( - self.embeddings.output_size[name] for name in self.hparams.time_varying_categoricals_encoder + return len( + set(self.hparams.time_varying_reals_encoder) - set(self.target_names) + ) + sum( + self.embeddings.output_size[name] + for name in self.hparams.time_varying_categoricals_encoder ) @property @@ -258,7 +278,8 @@ def static_size(self) -> int: int: size of static covariates """ return len(self.hparams.static_reals) + sum( - self.embeddings.output_size[name] for name in self.hparams.static_categoricals + self.embeddings.output_size[name] + for name in self.hparams.static_categoricals ) @property @@ -283,29 +304,43 @@ def forward(self, x: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """ # covariates if self.encoder_covariate_size > 0: - encoder_features = self.extract_features(x, self.embeddings, period="encoder") + encoder_features = self.extract_features( + x, self.embeddings, period="encoder" + ) encoder_x_t = torch.concat( - [encoder_features[name] for name in self.encoder_variables if name not in self.target_names], + [ + encoder_features[name] + for name in self.encoder_variables + if name not in self.target_names + ], dim=2, ) else: encoder_x_t = None if self.decoder_covariate_size > 0: - decoder_features = self.extract_features(x, self.embeddings, period="decoder") - decoder_x_t = torch.concat([decoder_features[name] for name in self.decoder_variables], dim=2) + decoder_features = self.extract_features( + x, self.embeddings, period="decoder" + ) + decoder_x_t = torch.concat( + [decoder_features[name] for name in self.decoder_variables], dim=2 + ) else: decoder_x_t = None # statics if self.static_size > 0: - x_s = torch.concat([encoder_features[name][:, 0] for name in self.static_variables], dim=1) + x_s = torch.concat( + [encoder_features[name][:, 0] for name in self.static_variables], dim=1 + ) else: x_s = None # target encoder_y = x["encoder_cont"][..., self.target_positions] - encoder_mask = create_mask(x["encoder_lengths"].max(), x["encoder_lengths"], inverse=True) + encoder_mask = create_mask( + x["encoder_lengths"].max(), x["encoder_lengths"], inverse=True + ) # run model forecast, backcast, block_forecasts, block_backcasts = self.model( @@ -321,18 +356,25 @@ def forward(self, x: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: forecast = forecast.split(self.hparams.output_size, dim=2) backcast = backcast.split(1, dim=2) block_backcasts = tuple( - self.transform_output(block.squeeze(3).split(1, dim=2), target_scale=x["target_scale"]) + self.transform_output( + block.squeeze(3).split(1, dim=2), target_scale=x["target_scale"] + ) for block in block_backcasts.split(1, dim=3) ) block_forecasts = tuple( self.transform_output( - block.squeeze(3).split(self.hparams.output_size, dim=2), target_scale=x["target_scale"] + block.squeeze(3).split(self.hparams.output_size, dim=2), + target_scale=x["target_scale"], ) for block in block_forecasts.split(1, dim=3) ) else: block_backcasts = tuple( - self.transform_output(block.squeeze(3), target_scale=x["target_scale"], loss=MultiHorizonMetric()) + self.transform_output( + block.squeeze(3), + target_scale=x["target_scale"], + loss=MultiHorizonMetric(), + ) for block in block_backcasts.split(1, dim=3) ) block_forecasts = tuple( @@ -375,17 +417,25 @@ def from_dataset(cls, dataset: TimeSeriesDataSet, **kwargs): dataset.max_prediction_length == dataset.min_prediction_length ), "only fixed prediction length is allowed, but max_prediction_length != min_prediction_length" - assert dataset.randomize_length is None, "length has to be fixed, but randomize_length is not None" - assert not dataset.add_relative_time_idx, "add_relative_time_idx has to be False" + assert ( + dataset.randomize_length is None + ), "length has to be fixed, but randomize_length is not None" + assert ( + not dataset.add_relative_time_idx + ), "add_relative_time_idx has to be False" new_kwargs = copy(kwargs) new_kwargs.update( - {"prediction_length": dataset.max_prediction_length, "context_length": dataset.max_encoder_length} + { + "prediction_length": dataset.max_prediction_length, + "context_length": dataset.max_encoder_length, + } ) new_kwargs.update(cls.deduce_default_output_parameters(dataset, kwargs, MASE())) assert (new_kwargs.get("backcast_loss_ratio", 0) == 0) | ( - isinstance(new_kwargs["output_size"], int) and new_kwargs["output_size"] == 1 + isinstance(new_kwargs["output_size"], int) + and new_kwargs["output_size"] == 1 ) or all( o == 1 for o in new_kwargs["output_size"] ), "output sizes can only be of size 1, i.e. point forecasts if backcast_loss_ratio > 0" @@ -399,10 +449,14 @@ def step(self, x, y, batch_idx) -> Dict[str, torch.Tensor]: """ log, out = super().step(x, y, batch_idx=batch_idx) - if self.hparams.backcast_loss_ratio > 0 and not self.predicting: # add loss from backcast + if ( + self.hparams.backcast_loss_ratio > 0 and not self.predicting + ): # add loss from backcast backcast = out["backcast"] backcast_weight = ( - self.hparams.backcast_loss_ratio * self.hparams.prediction_length / self.hparams.context_length + self.hparams.backcast_loss_ratio + * self.hparams.prediction_length + / self.hparams.context_length ) backcast_weight = backcast_weight / (backcast_weight + 1) # normalize forecast_weight = 1 - backcast_weight @@ -417,7 +471,9 @@ def step(self, x, y, batch_idx) -> Dict[str, torch.Tensor]: * backcast_weight ) else: - backcast_loss = self.loss(backcast, x["encoder_target"]) * backcast_weight + backcast_loss = ( + self.loss(backcast, x["encoder_target"]) * backcast_weight + ) label = ["val", "train"][self.training] self.log( f"{label}_backcast_loss", @@ -467,7 +523,9 @@ def plot_interpretation( from matplotlib import pyplot as plt if not isinstance(self.loss, MultiLoss): # not multi-target - prediction = self.to_prediction(dict(prediction=output["prediction"][[idx]].detach()))[0].cpu() + prediction = self.to_prediction( + dict(prediction=output["prediction"][[idx]].detach()) + )[0].cpu() block_forecasts = [ self.to_prediction(dict(prediction=block[[idx]].detach()))[0].cpu() for block in output["block_forecasts"] @@ -475,8 +533,12 @@ def plot_interpretation( elif isinstance(output["prediction"], (tuple, list)): # multi-target figs = [] # predictions and block forecasts need to be converted - prediction = [p[[idx]].detach() for p in output["prediction"]] # select index - prediction = self.to_prediction(dict(prediction=prediction)) # transform to prediction + prediction = [ + p[[idx]].detach() for p in output["prediction"] + ] # select index + prediction = self.to_prediction( + dict(prediction=prediction) + ) # transform to prediction prediction = [p[0].cpu() for p in prediction] # select first and only index block_forecasts = [ @@ -493,11 +555,16 @@ def plot_interpretation( figs.append( self.plot_interpretation( - dict(encoder_target=x["encoder_target"][i], decoder_target=x["decoder_target"][i]), + dict( + encoder_target=x["encoder_target"][i], + decoder_target=x["decoder_target"][i], + ), dict( backcast=output["backcast"][i], prediction=prediction[i], - block_backcasts=[block[i] for block in output["block_backcasts"]], + block_backcasts=[ + block[i] for block in output["block_backcasts"] + ], block_forecasts=[block[i] for block in block_forecasts], ), idx=idx, @@ -506,7 +573,9 @@ def plot_interpretation( ) return figs else: - prediction = output["prediction"] # multi target that has already been transformed + prediction = output[ + "prediction" + ] # multi target that has already been transformed block_forecasts = output["block_forecasts"] if ax is None: @@ -518,7 +587,11 @@ def plot_interpretation( # target prop_cycle = iter(plt.rcParams["axes.prop_cycle"]) color = next(prop_cycle)["color"] - ax[0].plot(torch.arange(-self.hparams.context_length, 0), x["encoder_target"][idx].detach().cpu(), c=color) + ax[0].plot( + torch.arange(-self.hparams.context_length, 0), + x["encoder_target"][idx].detach().cpu(), + c=color, + ) ax[0].plot( torch.arange(self.hparams.prediction_length), x["decoder_target"][idx].detach().cpu(), diff --git a/pytorch_forecasting/models/nhits/sub_modules.py b/pytorch_forecasting/models/nhits/sub_modules.py index a9ec43389..bc882c459 100644 --- a/pytorch_forecasting/models/nhits/sub_modules.py +++ b/pytorch_forecasting/models/nhits/sub_modules.py @@ -10,7 +10,11 @@ class StaticFeaturesEncoder(nn.Module): def __init__(self, in_features, out_features): super().__init__() - layers = [nn.Dropout(p=0.5), nn.Linear(in_features=in_features, out_features=out_features), nn.ReLU()] + layers = [ + nn.Dropout(p=0.5), + nn.Linear(in_features=in_features, out_features=out_features), + nn.ReLU(), + ] self.encoder = nn.Sequential(*layers) def forward(self, x): @@ -21,7 +25,9 @@ def forward(self, x): class IdentityBasis(nn.Module): def __init__(self, backcast_size: int, forecast_size: int, interpolation_mode: str): super().__init__() - assert (interpolation_mode in ["linear", "nearest"]) or ("cubic" in interpolation_mode) + assert (interpolation_mode in ["linear", "nearest"]) or ( + "cubic" in interpolation_mode + ) self.forecast_size = forecast_size self.backcast_size = backcast_size self.interpolation_mode = interpolation_mode @@ -38,7 +44,9 @@ def forward( if self.interpolation_mode == "nearest": knots = knots[:, None, :] - forecast = F.interpolate(knots, size=self.forecast_size, mode=self.interpolation_mode) + forecast = F.interpolate( + knots, size=self.forecast_size, mode=self.interpolation_mode + ) forecast = forecast[:, 0, :] elif self.interpolation_mode == "linear": knots = knots[:, None, :] @@ -53,9 +61,13 @@ def forward( n_batches = int(np.ceil(len(knots) / batch_size)) for i in range(n_batches): forecast_i = F.interpolate( - knots[i * batch_size : (i + 1) * batch_size], size=self.forecast_size, mode="bicubic" + knots[i * batch_size : (i + 1) * batch_size], + size=self.forecast_size, + mode="bicubic", ) # , align_corners=True) - forecast[i * batch_size : (i + 1) * batch_size] += forecast_i[:, 0, 0, :] + forecast[i * batch_size : (i + 1) * batch_size] += forecast_i[ + :, 0, 0, : + ] return backcast, forecast @@ -137,17 +149,32 @@ def __init__( activ = getattr(nn, activation)() if pooling_mode == "max": - self.pooling_layer = nn.MaxPool1d(kernel_size=self.pooling_sizes, stride=self.pooling_sizes, ceil_mode=True) + self.pooling_layer = nn.MaxPool1d( + kernel_size=self.pooling_sizes, + stride=self.pooling_sizes, + ceil_mode=True, + ) elif pooling_mode == "average": - self.pooling_layer = nn.AvgPool1d(kernel_size=self.pooling_sizes, stride=self.pooling_sizes, ceil_mode=True) + self.pooling_layer = nn.AvgPool1d( + kernel_size=self.pooling_sizes, + stride=self.pooling_sizes, + ceil_mode=True, + ) hidden_layers = [] for i in range(n_layers): - hidden_layers.append(nn.Linear(in_features=self.hidden_size[i], out_features=self.hidden_size[i + 1])) + hidden_layers.append( + nn.Linear( + in_features=self.hidden_size[i], + out_features=self.hidden_size[i + 1], + ) + ) hidden_layers.append(activ) if self.batch_normalization: - hidden_layers.append(nn.BatchNorm1d(num_features=self.hidden_size[i + 1])) + hidden_layers.append( + nn.BatchNorm1d(num_features=self.hidden_size[i + 1]) + ) if self.dropout > 0: hidden_layers.append(nn.Dropout(p=self.dropout)) @@ -155,19 +182,26 @@ def __init__( output_layer = [ nn.Linear( in_features=self.hidden_size[-1], - out_features=context_length * len(output_size) + n_theta * sum(output_size), + out_features=context_length * len(output_size) + + n_theta * sum(output_size), ) ] layers = hidden_layers + output_layer # static_size is computed with data, static_hidden_size is provided by user, if 0 no statics are used if (self.static_size > 0) and (self.static_hidden_size > 0): - self.static_encoder = StaticFeaturesEncoder(in_features=static_size, out_features=static_hidden_size) + self.static_encoder = StaticFeaturesEncoder( + in_features=static_size, out_features=static_hidden_size + ) self.layers = nn.Sequential(*layers) self.basis = basis def forward( - self, encoder_y: torch.Tensor, encoder_x_t: torch.Tensor, decoder_x_t: torch.Tensor, x_s: torch.Tensor + self, + encoder_y: torch.Tensor, + encoder_x_t: torch.Tensor, + decoder_x_t: torch.Tensor, + x_s: torch.Tensor, ) -> Tuple[torch.Tensor, torch.Tensor]: batch_size = len(encoder_y) @@ -201,11 +235,21 @@ def forward( # Compute local projection weights and projection theta = self.layers(encoder_y) - backcast_theta = theta[:, : self.context_length * len(self.output_size)].reshape(-1, self.context_length) - forecast_theta = theta[:, self.context_length * len(self.output_size) :].reshape(-1, self.n_theta) - backcast, forecast = self.basis(backcast_theta, forecast_theta, encoder_x_t, decoder_x_t) - backcast = backcast.reshape(-1, len(self.output_size), self.context_length).transpose(1, 2) - forecast = forecast.reshape(-1, sum(self.output_size), self.prediction_length).transpose(1, 2) + backcast_theta = theta[ + :, : self.context_length * len(self.output_size) + ].reshape(-1, self.context_length) + forecast_theta = theta[ + :, self.context_length * len(self.output_size) : + ].reshape(-1, self.n_theta) + backcast, forecast = self.basis( + backcast_theta, forecast_theta, encoder_x_t, decoder_x_t + ) + backcast = backcast.reshape( + -1, len(self.output_size), self.context_length + ).transpose(1, 2) + forecast = forecast.reshape( + -1, sum(self.output_size), self.prediction_length + ).transpose(1, 2) return backcast, forecast @@ -343,15 +387,17 @@ def forward( decoder_x_t, x_s, ): - residuals = ( - encoder_y # .flip(dims=(1,)) # todo: check if flip is required or should be rather replaced by scatter - ) + residuals = encoder_y # .flip(dims=(1,)) # todo: check if flip is required or should be rather replaced by scatter # encoder_x_t = encoder_x_t.flip(dims=(-1,)) # encoder_mask = encoder_mask.flip(dims=(-1,)) encoder_mask = encoder_mask.unsqueeze(-1) - level = encoder_y[:, -1:].repeat(1, self.prediction_length, 1) # Level with Naive1 - forecast_level = level.repeat_interleave(torch.tensor(self.output_size, device=level.device), dim=2) + level = encoder_y[:, -1:].repeat( + 1, self.prediction_length, 1 + ) # Level with Naive1 + forecast_level = level.repeat_interleave( + torch.tensor(self.output_size, device=level.device), dim=2 + ) # level with last available observation if self.naive_level: @@ -367,7 +413,10 @@ def forward( # forecast by block for block in self.blocks: block_backcast, block_forecast = block( - encoder_y=residuals, encoder_x_t=encoder_x_t, decoder_x_t=decoder_x_t, x_s=x_s + encoder_y=residuals, + encoder_x_t=encoder_x_t, + decoder_x_t=decoder_x_t, + x_s=x_s, ) residuals = (residuals - block_backcast) * encoder_mask diff --git a/pytorch_forecasting/models/nn/__init__.py b/pytorch_forecasting/models/nn/__init__.py index 1d5121021..e5adfaa76 100644 --- a/pytorch_forecasting/models/nn/__init__.py +++ b/pytorch_forecasting/models/nn/__init__.py @@ -2,4 +2,11 @@ from pytorch_forecasting.models.nn.rnn import GRU, LSTM, HiddenState, get_rnn from pytorch_forecasting.utils import TupleOutputMixIn -__all__ = ["MultiEmbedding", "get_rnn", "LSTM", "GRU", "HiddenState", "TupleOutputMixIn"] +__all__ = [ + "MultiEmbedding", + "get_rnn", + "LSTM", + "GRU", + "HiddenState", + "TupleOutputMixIn", +] diff --git a/pytorch_forecasting/models/nn/embeddings.py b/pytorch_forecasting/models/nn/embeddings.py index c9af1740f..a5ecbf93d 100644 --- a/pytorch_forecasting/models/nn/embeddings.py +++ b/pytorch_forecasting/models/nn/embeddings.py @@ -1,4 +1,4 @@ -from typing import Dict, List, Tuple, Union, Optional +from typing import Dict, List, Optional, Tuple, Union import torch import torch.nn as nn @@ -16,13 +16,17 @@ def forward(self, x): return super().forward(x) # Squash samples and timesteps into a single axis - x_reshape = x.contiguous().view(-1, x.size(-1)) # (samples * timesteps, input_size) + x_reshape = x.contiguous().view( + -1, x.size(-1) + ) # (samples * timesteps, input_size) y = super().forward(x_reshape) # We have to reshape Y if self.batch_first: - y = y.contiguous().view(x.size(0), -1, y.size(-1)) # (samples, timesteps, output_size) + y = y.contiguous().view( + x.size(0), -1, y.size(-1) + ) # (samples, timesteps, output_size) else: y = y.view(-1, x.size(1), y.size(-1)) # (timesteps, samples, output_size) return y @@ -33,7 +37,9 @@ class MultiEmbedding(nn.Module): def __init__( self, - embedding_sizes: Union[Dict[str, Tuple[int, int]], Dict[str, int], List[int], List[Tuple[int, int]]], + embedding_sizes: Union[ + Dict[str, Tuple[int, int]], Dict[str, int], List[int], List[Tuple[int, int]] + ], x_categoricals: List[str] = None, categorical_groups: Optional[Dict[str, List[str]]] = None, embedding_paddings: Optional[List[str]] = None, @@ -79,7 +85,9 @@ def __init__( self.concat_output = False # return dictionary of embeddings # conduct input data checks assert x_categoricals is not None, "x_categoricals must be provided." - categorical_group_variables = [name for names in categorical_groups.values() for name in names] + categorical_group_variables = [ + name for names in categorical_groups.values() for name in names + ] if len(categorical_groups) > 0: assert all( name in embedding_sizes for name in categorical_groups @@ -91,7 +99,9 @@ def __init__( name in x_categoricals for name in categorical_group_variables ), "group variables in categorical_groups must be in x_categoricals." assert all( - name in embedding_sizes for name in embedding_sizes if name not in categorical_group_variables + name in embedding_sizes + for name in embedding_sizes + if name not in categorical_group_variables ), ( "all variables in embedding_sizes must be in x_categoricals - but only if" "not already in categorical_groups." @@ -101,7 +111,9 @@ def __init__( x_categoricals is None and len(categorical_groups) == 0 ), "If embedding_sizes is not a dictionary, categorical_groups and x_categoricals must be empty." # number embeddings based on order - embedding_sizes = {str(name): size for name, size in enumerate(embedding_sizes)} + embedding_sizes = { + str(name): size for name, size in enumerate(embedding_sizes) + } x_categoricals = list(embedding_sizes.keys()) self.concat_output = True @@ -128,7 +140,10 @@ def init_embeddings(self): self.embedding_sizes[name][1] = embedding_size if name in self.categorical_groups: # embedding bag if related embeddings self.embeddings[name] = TimeDistributedEmbeddingBag( - self.embedding_sizes[name][0], embedding_size, mode="sum", batch_first=True + self.embedding_sizes[name][0], + embedding_size, + mode="sum", + batch_first=True, ) else: if name in self.embedding_paddings: @@ -185,7 +200,10 @@ def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]: input_vectors[name] = emb( x[ ..., - [self.x_categoricals.index(cat_name) for cat_name in self.categorical_groups[name]], + [ + self.x_categoricals.index(cat_name) + for cat_name in self.categorical_groups[name] + ], ] ) else: diff --git a/pytorch_forecasting/models/nn/rnn.py b/pytorch_forecasting/models/nn/rnn.py index 0853b7d13..c96725d5f 100644 --- a/pytorch_forecasting/models/nn/rnn.py +++ b/pytorch_forecasting/models/nn/rnn.py @@ -21,7 +21,10 @@ class RNN(ABC, nn.RNNBase): @abstractmethod def handle_no_encoding( - self, hidden_state: HiddenState, no_encoding: torch.BoolTensor, initial_hidden_state: HiddenState + self, + hidden_state: HiddenState, + no_encoding: torch.BoolTensor, + initial_hidden_state: HiddenState, ) -> HiddenState: """ Mask the hidden_state where there is no encoding. @@ -50,7 +53,9 @@ def init_hidden_state(self, x: torch.Tensor) -> HiddenState: pass @abstractmethod - def repeat_interleave(self, hidden_state: HiddenState, n_samples: int) -> HiddenState: + def repeat_interleave( + self, hidden_state: HiddenState, n_samples: int + ) -> HiddenState: """ Duplicate the hidden_state n_samples times. @@ -89,7 +94,9 @@ def forward( Output is packed sequence if input has been a packed sequence. """ if isinstance(x, rnn.PackedSequence) or lengths is None: - assert lengths is None, "cannot combine x of type PackedSequence with lengths argument" + assert ( + lengths is None + ), "cannot combine x of type PackedSequence with lengths argument" return super().forward(x, hx=hx) else: min_length = lengths.min() @@ -99,15 +106,30 @@ def forward( if max_length == 0: hidden_state = self.init_hidden_state(x) if self.batch_first: - out = torch.zeros(lengths.size(0), x.size(1), self.hidden_size, dtype=x.dtype, device=x.device) + out = torch.zeros( + lengths.size(0), + x.size(1), + self.hidden_size, + dtype=x.dtype, + device=x.device, + ) else: - out = torch.zeros(x.size(0), lengths.size(0), self.hidden_size, dtype=x.dtype, device=x.device) + out = torch.zeros( + x.size(0), + lengths.size(0), + self.hidden_size, + dtype=x.dtype, + device=x.device, + ) return out, hidden_state else: pack_lengths = lengths.where(lengths > 0, torch.ones_like(lengths)) packed_out, hidden_state = super().forward( rnn.pack_padded_sequence( - x, pack_lengths.cpu(), enforce_sorted=enforce_sorted, batch_first=self.batch_first + x, + pack_lengths.cpu(), + enforce_sorted=enforce_sorted, + batch_first=self.batch_first, ), hx=hx, ) @@ -121,10 +143,14 @@ def forward( else: initial_hidden_state = hx # propagate initial hidden state when sequence length was 0 - hidden_state = self.handle_no_encoding(hidden_state, no_encoding, initial_hidden_state) + hidden_state = self.handle_no_encoding( + hidden_state, no_encoding, initial_hidden_state + ) # return unpacked sequence - out, _ = rnn.pad_packed_sequence(packed_out, batch_first=self.batch_first) + out, _ = rnn.pad_packed_sequence( + packed_out, batch_first=self.batch_first + ) return out, hidden_state @@ -132,7 +158,10 @@ class LSTM(RNN, nn.LSTM): """LSTM that can handle zero-length sequences""" def handle_no_encoding( - self, hidden_state: HiddenState, no_encoding: torch.BoolTensor, initial_hidden_state: HiddenState + self, + hidden_state: HiddenState, + no_encoding: torch.BoolTensor, + initial_hidden_state: HiddenState, ) -> HiddenState: hidden, cell = hidden_state hidden = hidden.masked_scatter(no_encoding, initial_hidden_state[0]) @@ -157,7 +186,9 @@ def init_hidden_state(self, x: torch.Tensor) -> HiddenState: ) return hidden, cell - def repeat_interleave(self, hidden_state: HiddenState, n_samples: int) -> HiddenState: + def repeat_interleave( + self, hidden_state: HiddenState, n_samples: int + ) -> HiddenState: hidden, cell = hidden_state hidden = hidden.repeat_interleave(n_samples, 1) cell = cell.repeat_interleave(n_samples, 1) @@ -168,7 +199,10 @@ class GRU(RNN, nn.GRU): """GRU that can handle zero-length sequences""" def handle_no_encoding( - self, hidden_state: HiddenState, no_encoding: torch.BoolTensor, initial_hidden_state: HiddenState + self, + hidden_state: HiddenState, + no_encoding: torch.BoolTensor, + initial_hidden_state: HiddenState, ) -> HiddenState: return hidden_state.masked_scatter(no_encoding, initial_hidden_state) @@ -185,7 +219,9 @@ def init_hidden_state(self, x: torch.Tensor) -> HiddenState: ) return hidden - def repeat_interleave(self, hidden_state: HiddenState, n_samples: int) -> HiddenState: + def repeat_interleave( + self, hidden_state: HiddenState, n_samples: int + ) -> HiddenState: return hidden_state.repeat_interleave(n_samples, 1) @@ -206,5 +242,7 @@ def get_rnn(cell_type: Union[Type[RNN], str]) -> Type[RNN]: elif cell_type == "GRU": rnn = GRU else: - raise ValueError(f"RNN type {cell_type} is not supported. supported: [LSTM, GRU]") + raise ValueError( + f"RNN type {cell_type} is not supported. supported: [LSTM, GRU]" + ) return rnn diff --git a/pytorch_forecasting/models/rnn/_rnn.py b/pytorch_forecasting/models/rnn/_rnn.py index 142892dcb..a3f026292 100644 --- a/pytorch_forecasting/models/rnn/_rnn.py +++ b/pytorch_forecasting/models/rnn/_rnn.py @@ -3,7 +3,7 @@ """ from copy import copy -from typing import Dict, List, Tuple, Union, Optional +from typing import Dict, List, Optional, Tuple, Union import numpy as np import torch @@ -11,7 +11,16 @@ from pytorch_forecasting.data.encoders import MultiNormalizer, NaNLabelEncoder from pytorch_forecasting.data.timeseries import TimeSeriesDataSet -from pytorch_forecasting.metrics import MAE, MAPE, MASE, RMSE, SMAPE, MultiHorizonMetric, MultiLoss, QuantileLoss +from pytorch_forecasting.metrics import ( + MAE, + MAPE, + MASE, + RMSE, + SMAPE, + MultiHorizonMetric, + MultiLoss, + QuantileLoss, +) from pytorch_forecasting.models.base_model import AutoRegressiveBaseModelWithCovariates from pytorch_forecasting.models.nn import HiddenState, MultiEmbedding, get_rnn from pytorch_forecasting.utils import apply_to_list, to_list @@ -123,15 +132,19 @@ def __init__( ) lagged_target_names = [l for lags in target_lags.values() for l in lags] - assert set(self.encoder_variables) - set(to_list(target)) - set(lagged_target_names) == set( - self.decoder_variables - ) - set(lagged_target_names), "Encoder and decoder variables have to be the same apart from target variable" + assert set(self.encoder_variables) - set(to_list(target)) - set( + lagged_target_names + ) == set(self.decoder_variables) - set( + lagged_target_names + ), "Encoder and decoder variables have to be the same apart from target variable" for targeti in to_list(target): assert ( targeti in time_varying_reals_encoder ), f"target {targeti} has to be real" # todo: remove this restriction assert (isinstance(target, str) and isinstance(loss, MultiHorizonMetric)) or ( - isinstance(target, (list, tuple)) and isinstance(loss, MultiLoss) and len(loss) == len(target) + isinstance(target, (list, tuple)) + and isinstance(loss, MultiLoss) + and len(loss) == len(target) ), "number of targets should be equivalent to number of loss metrics" rnn_class = get_rnn(cell_type) @@ -148,14 +161,23 @@ def __init__( # add linear layers for argument projects if isinstance(target, str): # single target - self.output_projector = nn.Linear(self.hparams.hidden_size, self.hparams.output_size) - assert not isinstance(self.loss, QuantileLoss), "QuantileLoss does not work with recurrent network" + self.output_projector = nn.Linear( + self.hparams.hidden_size, self.hparams.output_size + ) + assert not isinstance( + self.loss, QuantileLoss + ), "QuantileLoss does not work with recurrent network" else: # multi target self.output_projector = nn.ModuleList( - [nn.Linear(self.hparams.hidden_size, size) for size in self.hparams.output_size] + [ + nn.Linear(self.hparams.hidden_size, size) + for size in self.hparams.output_size + ] ) for l in self.loss: - assert not isinstance(l, QuantileLoss), "QuantileLoss does not work with recurrent network" + assert not isinstance( + l, QuantileLoss + ), "QuantileLoss does not work with recurrent network" @classmethod def from_dataset( @@ -176,17 +198,29 @@ def from_dataset( Recurrent network """ new_kwargs = copy(kwargs) - new_kwargs.update(cls.deduce_default_output_parameters(dataset=dataset, kwargs=kwargs, default_loss=MAE())) + new_kwargs.update( + cls.deduce_default_output_parameters( + dataset=dataset, kwargs=kwargs, default_loss=MAE() + ) + ) assert not isinstance(dataset.target_normalizer, NaNLabelEncoder) and ( not isinstance(dataset.target_normalizer, MultiNormalizer) - or all(not isinstance(normalizer, NaNLabelEncoder) for normalizer in dataset.target_normalizer) + or all( + not isinstance(normalizer, NaNLabelEncoder) + for normalizer in dataset.target_normalizer + ) ), "target(s) should be continuous - categorical targets are not supported" # todo: remove this restriction return super().from_dataset( - dataset, allowed_encoder_known_variable_names=allowed_encoder_known_variable_names, **new_kwargs + dataset, + allowed_encoder_known_variable_names=allowed_encoder_known_variable_names, + **new_kwargs, ) def construct_input_vector( - self, x_cat: torch.Tensor, x_cont: torch.Tensor, one_off_target: torch.Tensor = None + self, + x_cat: torch.Tensor, + x_cont: torch.Tensor, + one_off_target: torch.Tensor = None, ) -> torch.Tensor: """ Create input vector into RNN network @@ -238,7 +272,9 @@ def decode_all( hidden_state: HiddenState, lengths: torch.Tensor = None, ): - decoder_output, hidden_state = self.rnn(x, hidden_state, lengths=lengths, enforce_sorted=False) + decoder_output, hidden_state = self.rnn( + x, hidden_state, lengths=lengths, enforce_sorted=False + ) if isinstance(self.hparams.target, str): # single target output = self.output_projector(decoder_output) else: @@ -259,7 +295,9 @@ def decode( sampling new targets from past predictions iteratively """ if self.training: - output, _ = self.decode_all(input_vector, hidden_state, lengths=decoder_lengths) + output, _ = self.decode_all( + input_vector, hidden_state, lengths=decoder_lengths + ) output = self.transform_output(output, target_scale=target_scale) else: # run in eval, i.e. simulation mode @@ -278,7 +316,9 @@ def decode_one( if idx > lag: x[:, 0, lag_positions] = lagged_targets[-lag] prediction, hidden_state = self.decode_all(x, hidden_state) - prediction = apply_to_list(prediction, lambda x: x[:, 0]) # select first time step + prediction = apply_to_list( + prediction, lambda x: x[:, 0] + ) # select first time step return prediction, hidden_state # make predictions which are fed into next step @@ -291,7 +331,9 @@ def decode_one( ) return output - def forward(self, x: Dict[str, torch.Tensor], n_samples: int = None) -> Dict[str, torch.Tensor]: + def forward( + self, x: Dict[str, torch.Tensor], n_samples: int = None + ) -> Dict[str, torch.Tensor]: """ Forward network """ @@ -301,7 +343,9 @@ def forward(self, x: Dict[str, torch.Tensor], n_samples: int = None) -> Dict[str x["decoder_cat"], x["decoder_cont"], one_off_target=x["encoder_cont"][ - torch.arange(x["encoder_cont"].size(0), device=x["encoder_cont"].device), + torch.arange( + x["encoder_cont"].size(0), device=x["encoder_cont"].device + ), x["encoder_lengths"] - 1, self.target_positions.unsqueeze(-1), ].T.contiguous(), diff --git a/pytorch_forecasting/models/temporal_fusion_transformer/__init__.py b/pytorch_forecasting/models/temporal_fusion_transformer/__init__.py index 90a73ff15..c823d6229 100644 --- a/pytorch_forecasting/models/temporal_fusion_transformer/__init__.py +++ b/pytorch_forecasting/models/temporal_fusion_transformer/__init__.py @@ -1,6 +1,8 @@ """Temporal fusion transformer for forecasting timeseries.""" -from pytorch_forecasting.models.temporal_fusion_transformer._tft import TemporalFusionTransformer +from pytorch_forecasting.models.temporal_fusion_transformer._tft import ( + TemporalFusionTransformer, +) from pytorch_forecasting.models.temporal_fusion_transformer.sub_modules import ( AddNorm, GateAddNorm, diff --git a/pytorch_forecasting/models/temporal_fusion_transformer/_tft.py b/pytorch_forecasting/models/temporal_fusion_transformer/_tft.py index 5a5ebeac5..75f4d552b 100644 --- a/pytorch_forecasting/models/temporal_fusion_transformer/_tft.py +++ b/pytorch_forecasting/models/temporal_fusion_transformer/_tft.py @@ -3,7 +3,7 @@ """ from copy import copy -from typing import Dict, List, Tuple, Union, Optional +from typing import Dict, List, Optional, Tuple, Union import numpy as np import torch @@ -11,7 +11,14 @@ from torchmetrics import Metric as LightningMetric from pytorch_forecasting.data import TimeSeriesDataSet -from pytorch_forecasting.metrics import MAE, MAPE, RMSE, SMAPE, MultiHorizonMetric, QuantileLoss +from pytorch_forecasting.metrics import ( + MAE, + MAPE, + RMSE, + SMAPE, + MultiHorizonMetric, + QuantileLoss, +) from pytorch_forecasting.models.base_model import BaseModelWithCovariates from pytorch_forecasting.models.nn import LSTM, MultiEmbedding from pytorch_forecasting.models.temporal_fusion_transformer.sub_modules import ( @@ -22,7 +29,14 @@ InterpretableMultiHeadAttention, VariableSelectionNetwork, ) -from pytorch_forecasting.utils import create_mask, detach, integer_histogram, masked_op, padded_stack, to_list +from pytorch_forecasting.utils import ( + create_mask, + detach, + integer_histogram, + masked_op, + padded_stack, + to_list, +) from pytorch_forecasting.utils._dependencies import _check_matplotlib @@ -167,7 +181,9 @@ def __init__( loss = QuantileLoss() self.save_hyperparameters() # store loss function separately as it is a module - assert isinstance(loss, LightningMetric), "Loss has to be a PyTorch Lightning `Metric`" + assert isinstance( + loss, LightningMetric + ), "Loss has to be a PyTorch Lightning `Metric`" super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs) # processing inputs @@ -183,7 +199,12 @@ def __init__( # continuous variable processing self.prescalers = nn.ModuleDict( { - name: nn.Linear(1, self.hparams.hidden_continuous_sizes.get(name, self.hparams.hidden_continuous_size)) + name: nn.Linear( + 1, + self.hparams.hidden_continuous_sizes.get( + name, self.hparams.hidden_continuous_size + ), + ) for name in self.reals } ) @@ -191,39 +212,50 @@ def __init__( # variable selection # variable selection for static variables static_input_sizes = { - name: self.input_embeddings.output_size[name] for name in self.hparams.static_categoricals + name: self.input_embeddings.output_size[name] + for name in self.hparams.static_categoricals } static_input_sizes.update( { - name: self.hparams.hidden_continuous_sizes.get(name, self.hparams.hidden_continuous_size) + name: self.hparams.hidden_continuous_sizes.get( + name, self.hparams.hidden_continuous_size + ) for name in self.hparams.static_reals } ) self.static_variable_selection = VariableSelectionNetwork( input_sizes=static_input_sizes, hidden_size=self.hparams.hidden_size, - input_embedding_flags={name: True for name in self.hparams.static_categoricals}, + input_embedding_flags={ + name: True for name in self.hparams.static_categoricals + }, dropout=self.hparams.dropout, prescalers=self.prescalers, ) # variable selection for encoder and decoder encoder_input_sizes = { - name: self.input_embeddings.output_size[name] for name in self.hparams.time_varying_categoricals_encoder + name: self.input_embeddings.output_size[name] + for name in self.hparams.time_varying_categoricals_encoder } encoder_input_sizes.update( { - name: self.hparams.hidden_continuous_sizes.get(name, self.hparams.hidden_continuous_size) + name: self.hparams.hidden_continuous_sizes.get( + name, self.hparams.hidden_continuous_size + ) for name in self.hparams.time_varying_reals_encoder } ) decoder_input_sizes = { - name: self.input_embeddings.output_size[name] for name in self.hparams.time_varying_categoricals_decoder + name: self.input_embeddings.output_size[name] + for name in self.hparams.time_varying_categoricals_decoder } decoder_input_sizes.update( { - name: self.hparams.hidden_continuous_sizes.get(name, self.hparams.hidden_continuous_size) + name: self.hparams.hidden_continuous_sizes.get( + name, self.hparams.hidden_continuous_size + ) for name in self.hparams.time_varying_reals_decoder } ) @@ -250,24 +282,32 @@ def __init__( self.encoder_variable_selection = VariableSelectionNetwork( input_sizes=encoder_input_sizes, hidden_size=self.hparams.hidden_size, - input_embedding_flags={name: True for name in self.hparams.time_varying_categoricals_encoder}, + input_embedding_flags={ + name: True for name in self.hparams.time_varying_categoricals_encoder + }, dropout=self.hparams.dropout, context_size=self.hparams.hidden_size, prescalers=self.prescalers, single_variable_grns=( - {} if not self.hparams.share_single_variable_networks else self.shared_single_variable_grns + {} + if not self.hparams.share_single_variable_networks + else self.shared_single_variable_grns ), ) self.decoder_variable_selection = VariableSelectionNetwork( input_sizes=decoder_input_sizes, hidden_size=self.hparams.hidden_size, - input_embedding_flags={name: True for name in self.hparams.time_varying_categoricals_decoder}, + input_embedding_flags={ + name: True for name in self.hparams.time_varying_categoricals_decoder + }, dropout=self.hparams.dropout, context_size=self.hparams.hidden_size, prescalers=self.prescalers, single_variable_grns=( - {} if not self.hparams.share_single_variable_networks else self.shared_single_variable_grns + {} + if not self.hparams.share_single_variable_networks + else self.shared_single_variable_grns ), ) @@ -298,7 +338,10 @@ def __init__( # for post lstm static enrichment self.static_context_enrichment = GatedResidualNetwork( - self.hparams.hidden_size, self.hparams.hidden_size, self.hparams.hidden_size, self.hparams.dropout + self.hparams.hidden_size, + self.hparams.hidden_size, + self.hparams.hidden_size, + self.hparams.dropout, ) # lstm encoder (history) and decoder (future) for local processing @@ -319,10 +362,14 @@ def __init__( ) # skip connection for lstm - self.post_lstm_gate_encoder = GatedLinearUnit(self.hparams.hidden_size, dropout=self.hparams.dropout) + self.post_lstm_gate_encoder = GatedLinearUnit( + self.hparams.hidden_size, dropout=self.hparams.dropout + ) self.post_lstm_gate_decoder = self.post_lstm_gate_encoder # self.post_lstm_gate_decoder = GatedLinearUnit(self.hparams.hidden_size, dropout=self.hparams.dropout) - self.post_lstm_add_norm_encoder = AddNorm(self.hparams.hidden_size, trainable_add=False) + self.post_lstm_add_norm_encoder = AddNorm( + self.hparams.hidden_size, trainable_add=False + ) # self.post_lstm_add_norm_decoder = AddNorm(self.hparams.hidden_size, trainable_add=True) self.post_lstm_add_norm_decoder = self.post_lstm_add_norm_encoder @@ -337,24 +384,36 @@ def __init__( # attention for long-range processing self.multihead_attn = InterpretableMultiHeadAttention( - d_model=self.hparams.hidden_size, n_head=self.hparams.attention_head_size, dropout=self.hparams.dropout + d_model=self.hparams.hidden_size, + n_head=self.hparams.attention_head_size, + dropout=self.hparams.dropout, ) self.post_attn_gate_norm = GateAddNorm( self.hparams.hidden_size, dropout=self.hparams.dropout, trainable_add=False ) self.pos_wise_ff = GatedResidualNetwork( - self.hparams.hidden_size, self.hparams.hidden_size, self.hparams.hidden_size, dropout=self.hparams.dropout + self.hparams.hidden_size, + self.hparams.hidden_size, + self.hparams.hidden_size, + dropout=self.hparams.dropout, ) # output processing -> no dropout at this late stage - self.pre_output_gate_norm = GateAddNorm(self.hparams.hidden_size, dropout=None, trainable_add=False) + self.pre_output_gate_norm = GateAddNorm( + self.hparams.hidden_size, dropout=None, trainable_add=False + ) if self.n_targets > 1: # if to run with multiple targets self.output_layer = nn.ModuleList( - [nn.Linear(self.hparams.hidden_size, output_size) for output_size in self.hparams.output_size] + [ + nn.Linear(self.hparams.hidden_size, output_size) + for output_size in self.hparams.output_size + ] ) else: - self.output_layer = nn.Linear(self.hparams.hidden_size, self.hparams.output_size) + self.output_layer = nn.Linear( + self.hparams.hidden_size, self.hparams.output_size + ) @classmethod def from_dataset( @@ -378,11 +437,15 @@ def from_dataset( # update defaults new_kwargs = copy(kwargs) new_kwargs["max_encoder_length"] = dataset.max_encoder_length - new_kwargs.update(cls.deduce_default_output_parameters(dataset, kwargs, QuantileLoss())) + new_kwargs.update( + cls.deduce_default_output_parameters(dataset, kwargs, QuantileLoss()) + ) # create class and return return super().from_dataset( - dataset, allowed_encoder_known_variable_names=allowed_encoder_known_variable_names, **new_kwargs + dataset, + allowed_encoder_known_variable_names=allowed_encoder_known_variable_names, + **new_kwargs, ) def expand_static_context(self, context, timesteps): @@ -391,7 +454,9 @@ def expand_static_context(self, context, timesteps): """ return context[:, None].expand(-1, timesteps, -1) - def get_attention_mask(self, encoder_lengths: torch.LongTensor, decoder_lengths: torch.LongTensor): + def get_attention_mask( + self, encoder_lengths: torch.LongTensor, decoder_lengths: torch.LongTensor + ): """ Returns causal mask to apply for self-attention layer. """ @@ -402,7 +467,11 @@ def get_attention_mask(self, encoder_lengths: torch.LongTensor, decoder_lengths: # indices for which is predicted predict_step = torch.arange(0, decoder_length, device=self.device)[:, None] # do not attend to steps to self or after prediction - decoder_mask = (attend_step >= predict_step).unsqueeze(0).expand(encoder_lengths.size(0), -1, -1) + decoder_mask = ( + (attend_step >= predict_step) + .unsqueeze(0) + .expand(encoder_lengths.size(0), -1, -1) + ) else: # there is value in attending to future forecasts if they are made with knowledge currently # available @@ -410,9 +479,17 @@ def get_attention_mask(self, encoder_lengths: torch.LongTensor, decoder_lengths: # matter in the future than the past) # or alternatively using the same layer but allowing forward attention - i.e. only # masking out non-available data and self - decoder_mask = create_mask(decoder_length, decoder_lengths).unsqueeze(1).expand(-1, decoder_length, -1) + decoder_mask = ( + create_mask(decoder_length, decoder_lengths) + .unsqueeze(1) + .expand(-1, decoder_length, -1) + ) # do not attend to steps where data is padded - encoder_mask = create_mask(encoder_lengths.max(), encoder_lengths).unsqueeze(1).expand(-1, decoder_length, -1) + encoder_mask = ( + create_mask(encoder_lengths.max(), encoder_lengths) + .unsqueeze(1) + .expand(-1, decoder_length, -1) + ) # combine masks along attended time - first encoder and then decoder mask = torch.cat( ( @@ -429,8 +506,12 @@ def forward(self, x: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """ encoder_lengths = x["encoder_lengths"] decoder_lengths = x["decoder_lengths"] - x_cat = torch.cat([x["encoder_cat"], x["decoder_cat"]], dim=1) # concatenate in time dimension - x_cont = torch.cat([x["encoder_cont"], x["decoder_cont"]], dim=1) # concatenate in time dimension + x_cat = torch.cat( + [x["encoder_cat"], x["decoder_cat"]], dim=1 + ) # concatenate in time dimension + x_cont = torch.cat( + [x["encoder_cont"], x["decoder_cont"]], dim=1 + ) # concatenate in time dimension timesteps = x_cont.size(1) # encode + decode length max_encoder_length = int(encoder_lengths.max()) input_vectors = self.input_embeddings(x_cat) @@ -445,32 +526,46 @@ def forward(self, x: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: # Embedding and variable selection if len(self.static_variables) > 0: # static embeddings will be constant over entire batch - static_embedding = {name: input_vectors[name][:, 0] for name in self.static_variables} - static_embedding, static_variable_selection = self.static_variable_selection(static_embedding) + static_embedding = { + name: input_vectors[name][:, 0] for name in self.static_variables + } + static_embedding, static_variable_selection = ( + self.static_variable_selection(static_embedding) + ) else: static_embedding = torch.zeros( - (x_cont.size(0), self.hparams.hidden_size), dtype=self.dtype, device=self.device + (x_cont.size(0), self.hparams.hidden_size), + dtype=self.dtype, + device=self.device, + ) + static_variable_selection = torch.zeros( + (x_cont.size(0), 0), dtype=self.dtype, device=self.device ) - static_variable_selection = torch.zeros((x_cont.size(0), 0), dtype=self.dtype, device=self.device) static_context_variable_selection = self.expand_static_context( self.static_context_variable_selection(static_embedding), timesteps ) embeddings_varying_encoder = { - name: input_vectors[name][:, :max_encoder_length] for name in self.encoder_variables + name: input_vectors[name][:, :max_encoder_length] + for name in self.encoder_variables } - embeddings_varying_encoder, encoder_sparse_weights = self.encoder_variable_selection( - embeddings_varying_encoder, - static_context_variable_selection[:, :max_encoder_length], + embeddings_varying_encoder, encoder_sparse_weights = ( + self.encoder_variable_selection( + embeddings_varying_encoder, + static_context_variable_selection[:, :max_encoder_length], + ) ) embeddings_varying_decoder = { - name: input_vectors[name][:, max_encoder_length:] for name in self.decoder_variables # select decoder + name: input_vectors[name][:, max_encoder_length:] + for name in self.decoder_variables # select decoder } - embeddings_varying_decoder, decoder_sparse_weights = self.decoder_variable_selection( - embeddings_varying_decoder, - static_context_variable_selection[:, max_encoder_length:], + embeddings_varying_decoder, decoder_sparse_weights = ( + self.decoder_variable_selection( + embeddings_varying_decoder, + static_context_variable_selection[:, max_encoder_length:], + ) ) # LSTM @@ -478,11 +573,16 @@ def forward(self, x: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: input_hidden = self.static_context_initial_hidden_lstm(static_embedding).expand( self.hparams.lstm_layers, -1, -1 ) - input_cell = self.static_context_initial_cell_lstm(static_embedding).expand(self.hparams.lstm_layers, -1, -1) + input_cell = self.static_context_initial_cell_lstm(static_embedding).expand( + self.hparams.lstm_layers, -1, -1 + ) # run local encoder encoder_output, (hidden, cell) = self.lstm_encoder( - embeddings_varying_encoder, (input_hidden, input_cell), lengths=encoder_lengths, enforce_sorted=False + embeddings_varying_encoder, + (input_hidden, input_cell), + lengths=encoder_lengths, + enforce_sorted=False, ) # run local decoder @@ -495,17 +595,22 @@ def forward(self, x: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: # skip connection over lstm lstm_output_encoder = self.post_lstm_gate_encoder(encoder_output) - lstm_output_encoder = self.post_lstm_add_norm_encoder(lstm_output_encoder, embeddings_varying_encoder) + lstm_output_encoder = self.post_lstm_add_norm_encoder( + lstm_output_encoder, embeddings_varying_encoder + ) lstm_output_decoder = self.post_lstm_gate_decoder(decoder_output) - lstm_output_decoder = self.post_lstm_add_norm_decoder(lstm_output_decoder, embeddings_varying_decoder) + lstm_output_decoder = self.post_lstm_add_norm_decoder( + lstm_output_decoder, embeddings_varying_decoder + ) lstm_output = torch.cat([lstm_output_encoder, lstm_output_decoder], dim=1) # static enrichment static_context_enrichment = self.static_context_enrichment(static_embedding) attn_input = self.static_enrichment( - lstm_output, self.expand_static_context(static_context_enrichment, timesteps) + lstm_output, + self.expand_static_context(static_context_enrichment, timesteps), ) # Attention @@ -513,11 +618,15 @@ def forward(self, x: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: q=attn_input[:, max_encoder_length:], # query only for predictions k=attn_input, v=attn_input, - mask=self.get_attention_mask(encoder_lengths=encoder_lengths, decoder_lengths=decoder_lengths), + mask=self.get_attention_mask( + encoder_lengths=encoder_lengths, decoder_lengths=decoder_lengths + ), ) # skip connection over attention - attn_output = self.post_attn_gate_norm(attn_output, attn_input[:, max_encoder_length:]) + attn_output = self.post_attn_gate_norm( + attn_output, attn_input[:, max_encoder_length:] + ) output = self.pos_wise_ff(attn_output) @@ -604,8 +713,12 @@ def interpret_output( decoder_attention[idx, :, :, :decoder_length] = x[..., :decoder_length] else: decoder_attention = out["decoder_attention"].clone() - decoder_mask = create_mask(out["decoder_attention"].size(1), out["decoder_lengths"]) - decoder_attention[decoder_mask[..., None, None].expand_as(decoder_attention)] = float("nan") + decoder_mask = create_mask( + out["decoder_attention"].size(1), out["decoder_lengths"] + ) + decoder_attention[ + decoder_mask[..., None, None].expand_as(decoder_attention) + ] = float("nan") if isinstance(out["encoder_attention"], (list, tuple)): # same game for encoder attention @@ -620,17 +733,17 @@ def interpret_output( # scatter into tensor for idx, x in enumerate(out["encoder_attention"]): encoder_length = out["encoder_lengths"][idx] - encoder_attention[idx, :, :, self.hparams.max_encoder_length - encoder_length :] = x[ - ..., :encoder_length - ] + encoder_attention[ + idx, :, :, self.hparams.max_encoder_length - encoder_length : + ] = x[..., :encoder_length] else: # roll encoder attention (so start last encoder value is on the right) encoder_attention = out["encoder_attention"].clone() shifts = encoder_attention.size(3) - out["encoder_lengths"] new_index = ( - torch.arange(encoder_attention.size(3), device=encoder_attention.device)[None, None, None].expand_as( - encoder_attention - ) + torch.arange( + encoder_attention.size(3), device=encoder_attention.device + )[None, None, None].expand_as(encoder_attention) - shifts[:, None, None, None] ) % encoder_attention.size(3) encoder_attention = torch.gather(encoder_attention, dim=3, index=new_index) @@ -641,7 +754,8 @@ def interpret_output( torch.full( ( *encoder_attention.shape[:-1], - self.hparams.max_encoder_length - out["encoder_lengths"].max(), + self.hparams.max_encoder_length + - out["encoder_lengths"].max(), ), float("nan"), dtype=encoder_attention.dtype, @@ -657,7 +771,9 @@ def interpret_output( attention[attention < 1e-5] = float("nan") # histogram of decode and encode lengths - encoder_length_histogram = integer_histogram(out["encoder_lengths"], min=0, max=self.hparams.max_encoder_length) + encoder_length_histogram = integer_histogram( + out["encoder_lengths"], min=0, max=self.hparams.max_encoder_length + ) decoder_length_histogram = integer_histogram( out["decoder_lengths"], min=1, max=out["decoder_variables"].size(1) ) @@ -665,7 +781,9 @@ def interpret_output( # mask where decoder and encoder where not applied when averaging variable selection weights encoder_variables = out["encoder_variables"].squeeze(-2).clone() encode_mask = create_mask(encoder_variables.size(1), out["encoder_lengths"]) - encoder_variables = encoder_variables.masked_fill(encode_mask.unsqueeze(-1), 0.0).sum(dim=1) + encoder_variables = encoder_variables.masked_fill( + encode_mask.unsqueeze(-1), 0.0 + ).sum(dim=1) encoder_variables /= ( out["encoder_lengths"] .where(out["encoder_lengths"] > 0, torch.ones_like(out["encoder_lengths"])) @@ -674,7 +792,9 @@ def interpret_output( decoder_variables = out["decoder_variables"].squeeze(-2).clone() decode_mask = create_mask(decoder_variables.size(1), out["decoder_lengths"]) - decoder_variables = decoder_variables.masked_fill(decode_mask.unsqueeze(-1), 0.0).sum(dim=1) + decoder_variables = decoder_variables.masked_fill( + decode_mask.unsqueeze(-1), 0.0 + ).sum(dim=1) decoder_variables /= out["decoder_lengths"].unsqueeze(-1) # static variables need no masking @@ -683,7 +803,10 @@ def interpret_output( # average over heads + only keep prediction attention and attention on observed timesteps attention = masked_op( attention[ - :, attention_prediction_horizon, :, : self.hparams.max_encoder_length + attention_prediction_horizon + :, + attention_prediction_horizon, + :, + : self.hparams.max_encoder_length + attention_prediction_horizon, ], op="mean", dim=1, @@ -696,7 +819,9 @@ def interpret_output( attention = masked_op(attention, dim=0, op=reduction) else: - attention = attention / masked_op(attention, dim=1, op="sum").unsqueeze(-1) # renormalize + attention = attention / masked_op(attention, dim=1, op="sum").unsqueeze( + -1 + ) # renormalize interpretation = dict( attention=attention.masked_fill(torch.isnan(attention), 0.0), @@ -786,7 +911,11 @@ def plot_interpretation(self, interpretation: Dict[str, torch.Tensor]): attention = interpretation["attention"].detach().cpu() attention = attention / attention.sum(-1).unsqueeze(-1) ax.plot( - np.arange(-self.hparams.max_encoder_length, attention.size(0) - self.hparams.max_encoder_length), attention + np.arange( + -self.hparams.max_encoder_length, + attention.size(0) - self.hparams.max_encoder_length, + ), + attention, ) ax.set_xlabel("Time index") ax.set_ylabel("Attention") @@ -798,20 +927,30 @@ def make_selection_plot(title, values, labels): fig, ax = plt.subplots(figsize=(7, len(values) * 0.25 + 2)) order = np.argsort(values) values = values / values.sum(-1).unsqueeze(-1) - ax.barh(np.arange(len(values)), values[order] * 100, tick_label=np.asarray(labels)[order]) + ax.barh( + np.arange(len(values)), + values[order] * 100, + tick_label=np.asarray(labels)[order], + ) ax.set_title(title) ax.set_xlabel("Importance in %") plt.tight_layout() return fig figs["static_variables"] = make_selection_plot( - "Static variables importance", interpretation["static_variables"].detach().cpu(), self.static_variables + "Static variables importance", + interpretation["static_variables"].detach().cpu(), + self.static_variables, ) figs["encoder_variables"] = make_selection_plot( - "Encoder variables importance", interpretation["encoder_variables"].detach().cpu(), self.encoder_variables + "Encoder variables importance", + interpretation["encoder_variables"].detach().cpu(), + self.encoder_variables, ) figs["decoder_variables"] = make_selection_plot( - "Decoder variables importance", interpretation["decoder_variables"].detach().cpu(), self.decoder_variables + "Decoder variables importance", + interpretation["decoder_variables"].detach().cpu(), + self.decoder_variables, ) return figs @@ -823,12 +962,18 @@ def log_interpretation(self, outputs): # extract interpretations interpretation = { # use padded_stack because decoder length histogram can be of different length - name: padded_stack([x["interpretation"][name].detach() for x in outputs], side="right", value=0).sum(0) + name: padded_stack( + [x["interpretation"][name].detach() for x in outputs], + side="right", + value=0, + ).sum(0) for name in outputs[0]["interpretation"].keys() } # normalize attention with length histogram squared to account for: 1. zeros in attention and # 2. higher attention due to less values - attention_occurances = interpretation["encoder_length_histogram"][1:].flip(0).float().cumsum(0) + attention_occurances = ( + interpretation["encoder_length_histogram"][1:].flip(0).float().cumsum(0) + ) attention_occurances = attention_occurances / attention_occurances.max() attention_occurances = torch.cat( [ @@ -841,8 +986,12 @@ def log_interpretation(self, outputs): ], dim=0, ) - interpretation["attention"] = interpretation["attention"] / attention_occurances.pow(2).clamp(1.0) - interpretation["attention"] = interpretation["attention"] / interpretation["attention"].sum() + interpretation["attention"] = interpretation[ + "attention" + ] / attention_occurances.pow(2).clamp(1.0) + interpretation["attention"] = ( + interpretation["attention"] / interpretation["attention"].sum() + ) mpl_available = _check_matplotlib("log_interpretation", raise_error=False) @@ -857,14 +1006,21 @@ def log_interpretation(self, outputs): # log to tensorboard for name, fig in figs.items(): self.logger.experiment.add_figure( - f"{label.capitalize()} {name} importance", fig, global_step=self.global_step + f"{label.capitalize()} {name} importance", + fig, + global_step=self.global_step, ) # log lengths of encoder/decoder for type in ["encoder", "decoder"]: fig, ax = plt.subplots() lengths = ( - padded_stack([out["interpretation"][f"{type}_length_histogram"] for out in outputs]) + padded_stack( + [ + out["interpretation"][f"{type}_length_histogram"] + for out in outputs + ] + ) .sum(0) .detach() .cpu() @@ -879,7 +1035,9 @@ def log_interpretation(self, outputs): ax.set_title(f"{type.capitalize()} length distribution in {label} epoch") self.logger.experiment.add_figure( - f"{label.capitalize()} {type} length distribution", fig, global_step=self.global_step + f"{label.capitalize()} {type} length distribution", + fig, + global_step=self.global_step, ) def log_embeddings(self): @@ -894,5 +1052,8 @@ def log_embeddings(self): for name, emb in self.input_embeddings.items(): labels = self.hparams.embedding_labels[name] self.logger.experiment.add_embedding( - emb.weight.data.detach().cpu(), metadata=labels, tag=name, global_step=self.global_step + emb.weight.data.detach().cpu(), + metadata=labels, + tag=name, + global_step=self.global_step, ) diff --git a/pytorch_forecasting/models/temporal_fusion_transformer/sub_modules.py b/pytorch_forecasting/models/temporal_fusion_transformer/sub_modules.py index db9a0a884..7ad55b11d 100644 --- a/pytorch_forecasting/models/temporal_fusion_transformer/sub_modules.py +++ b/pytorch_forecasting/models/temporal_fusion_transformer/sub_modules.py @@ -2,9 +2,9 @@ Implementation of ``nn.Modules`` for temporal fusion transformer. """ +from copy import deepcopy import math from typing import Dict, Tuple -from copy import deepcopy import torch import torch.nn as nn @@ -22,20 +22,26 @@ def forward(self, x): return self.module(x) # Squash samples and timesteps into a single axis - x_reshape = x.contiguous().view(-1, x.size(-1)) # (samples * timesteps, input_size) + x_reshape = x.contiguous().view( + -1, x.size(-1) + ) # (samples * timesteps, input_size) y = self.module(x_reshape) # We have to reshape Y if self.batch_first: - y = y.contiguous().view(x.size(0), -1, y.size(-1)) # (samples, timesteps, output_size) + y = y.contiguous().view( + x.size(0), -1, y.size(-1) + ) # (samples, timesteps, output_size) else: y = y.view(-1, x.size(1), y.size(-1)) # (timesteps, samples, output_size) return y class TimeDistributedInterpolation(nn.Module): - def __init__(self, output_size: int, batch_first: bool = False, trainable: bool = False): + def __init__( + self, output_size: int, batch_first: bool = False, trainable: bool = False + ): super().__init__() self.output_size = output_size self.batch_first = batch_first @@ -45,7 +51,9 @@ def __init__(self, output_size: int, batch_first: bool = False, trainable: bool self.gate = nn.Sigmoid() def interpolate(self, x): - upsampled = F.interpolate(x.unsqueeze(1), self.output_size, mode="linear", align_corners=True).squeeze(1) + upsampled = F.interpolate( + x.unsqueeze(1), self.output_size, mode="linear", align_corners=True + ).squeeze(1) if self.trainable: upsampled = upsampled * self.gate(self.mask.unsqueeze(0)) * 2.0 return upsampled @@ -55,13 +63,17 @@ def forward(self, x): return self.interpolate(x) # Squash samples and timesteps into a single axis - x_reshape = x.contiguous().view(-1, x.size(-1)) # (samples * timesteps, input_size) + x_reshape = x.contiguous().view( + -1, x.size(-1) + ) # (samples * timesteps, input_size) y = self.interpolate(x_reshape) # We have to reshape Y if self.batch_first: - y = y.contiguous().view(x.size(0), -1, y.size(-1)) # (samples, timesteps, output_size) + y = y.contiguous().view( + x.size(0), -1, y.size(-1) + ) # (samples, timesteps, output_size) else: y = y.view(-1, x.size(1), y.size(-1)) # (timesteps, samples, output_size) @@ -99,7 +111,9 @@ def forward(self, x): class ResampleNorm(nn.Module): - def __init__(self, input_size: int, output_size: int = None, trainable_add: bool = True): + def __init__( + self, input_size: int, output_size: int = None, trainable_add: bool = True + ): super().__init__() self.input_size = input_size @@ -107,7 +121,9 @@ def __init__(self, input_size: int, output_size: int = None, trainable_add: bool self.output_size = output_size or input_size if self.input_size != self.output_size: - self.resample = TimeDistributedInterpolation(self.output_size, batch_first=True, trainable=False) + self.resample = TimeDistributedInterpolation( + self.output_size, batch_first=True, trainable=False + ) if self.trainable_add: self.mask = nn.Parameter(torch.zeros(self.output_size, dtype=torch.float)) @@ -126,7 +142,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: class AddNorm(nn.Module): - def __init__(self, input_size: int, skip_size: int = None, trainable_add: bool = True): + def __init__( + self, input_size: int, skip_size: int = None, trainable_add: bool = True + ): super().__init__() self.input_size = input_size @@ -134,7 +152,9 @@ def __init__(self, input_size: int, skip_size: int = None, trainable_add: bool = self.skip_size = skip_size or input_size if self.input_size != self.skip_size: - self.resample = TimeDistributedInterpolation(self.input_size, batch_first=True, trainable=False) + self.resample = TimeDistributedInterpolation( + self.input_size, batch_first=True, trainable=False + ) if self.trainable_add: self.mask = nn.Parameter(torch.zeros(self.input_size, dtype=torch.float)) @@ -168,8 +188,12 @@ def __init__( self.skip_size = skip_size or self.hidden_size self.dropout = dropout - self.glu = GatedLinearUnit(self.input_size, hidden_size=self.hidden_size, dropout=self.dropout) - self.add_norm = AddNorm(self.hidden_size, skip_size=self.skip_size, trainable_add=trainable_add) + self.glu = GatedLinearUnit( + self.input_size, hidden_size=self.hidden_size, dropout=self.dropout + ) + self.add_norm = AddNorm( + self.hidden_size, skip_size=self.skip_size, trainable_add=trainable_add + ) def forward(self, x, skip): output = self.glu(x) @@ -225,7 +249,9 @@ def init_weights(self): if "bias" in name: torch.nn.init.zeros_(p) elif "fc1" in name or "fc2" in name: - torch.nn.init.kaiming_normal_(p, a=0, mode="fan_in", nonlinearity="leaky_relu") + torch.nn.init.kaiming_normal_( + p, a=0, mode="fan_in", nonlinearity="leaky_relu" + ) elif "context" in name: torch.nn.init.xavier_uniform_(p) @@ -265,7 +291,9 @@ def __init__( self.hidden_size = hidden_size self.input_sizes = input_sizes self.input_embedding_flags = input_embedding_flags - self._input_embedding_flags = {} if input_embedding_flags is None else deepcopy(input_embedding_flags) + self._input_embedding_flags = ( + {} if input_embedding_flags is None else deepcopy(input_embedding_flags) + ) self.dropout = dropout self.context_size = context_size @@ -295,7 +323,9 @@ def __init__( if name in single_variable_grns: self.single_variable_grns[name] = single_variable_grns[name] elif self._input_embedding_flags.get(name, False): - self.single_variable_grns[name] = ResampleNorm(input_size, self.hidden_size) + self.single_variable_grns[name] = ResampleNorm( + input_size, self.hidden_size + ) else: self.single_variable_grns[name] = GatedResidualNetwork( input_size, @@ -314,7 +344,10 @@ def __init__( @property def input_size_total(self): - return sum(size if name in self._input_embedding_flags else size for name, size in self.input_sizes.items()) + return sum( + size if name in self._input_embedding_flags else size + for name, size in self.input_sizes.items() + ) @property def num_inputs(self): @@ -346,18 +379,26 @@ def forward(self, x: Dict[str, torch.Tensor], context: torch.Tensor = None): variable_embedding = x[name] if name in self.prescalers: variable_embedding = self.prescalers[name](variable_embedding) - outputs = self.single_variable_grns[name](variable_embedding) # fast forward if only one variable + outputs = self.single_variable_grns[name]( + variable_embedding + ) # fast forward if only one variable if outputs.ndim == 3: # -> batch size, time, hidden size, n_variables - sparse_weights = torch.ones(outputs.size(0), outputs.size(1), 1, 1, device=outputs.device) # + sparse_weights = torch.ones( + outputs.size(0), outputs.size(1), 1, 1, device=outputs.device + ) # else: # ndim == 2 -> batch size, hidden size, n_variables - sparse_weights = torch.ones(outputs.size(0), 1, 1, device=outputs.device) + sparse_weights = torch.ones( + outputs.size(0), 1, 1, device=outputs.device + ) return outputs, sparse_weights class PositionalEncoder(torch.nn.Module): def __init__(self, d_model, max_seq_len=160): super().__init__() - assert d_model % 2 == 0, "model dimension has to be multiple of 2 (encode sin(pos) and cos(pos))" + assert ( + d_model % 2 == 0 + ), "model dimension has to be multiple of 2 (encode sin(pos) and cos(pos))" self.d_model = d_model pe = torch.zeros(max_seq_len, d_model) for pos in range(max_seq_len): @@ -390,7 +431,9 @@ def forward(self, q, k, v, mask=None): attn = torch.bmm(q, k.permute(0, 2, 1)) # query-key overlap if self.scale: - dimension = torch.as_tensor(k.size(-1), dtype=attn.dtype, device=attn.device).sqrt() + dimension = torch.as_tensor( + k.size(-1), dtype=attn.dtype, device=attn.device + ).sqrt() attn = attn / dimension if mask is not None: @@ -413,8 +456,12 @@ def __init__(self, n_head: int, d_model: int, dropout: float = 0.0): self.dropout = nn.Dropout(p=dropout) self.v_layer = nn.Linear(self.d_model, self.d_v) - self.q_layers = nn.ModuleList([nn.Linear(self.d_model, self.d_q) for _ in range(self.n_head)]) - self.k_layers = nn.ModuleList([nn.Linear(self.d_model, self.d_k) for _ in range(self.n_head)]) + self.q_layers = nn.ModuleList( + [nn.Linear(self.d_model, self.d_q) for _ in range(self.n_head)] + ) + self.k_layers = nn.ModuleList( + [nn.Linear(self.d_model, self.d_k) for _ in range(self.n_head)] + ) self.attention = ScaledDotProductAttention() self.w_h = nn.Linear(self.d_v, self.d_model, bias=False) diff --git a/pytorch_forecasting/models/temporal_fusion_transformer/tuning.py b/pytorch_forecasting/models/temporal_fusion_transformer/tuning.py index 7f2dfab8b..ddd7c94f7 100644 --- a/pytorch_forecasting/models/temporal_fusion_transformer/tuning.py +++ b/pytorch_forecasting/models/temporal_fusion_transformer/tuning.py @@ -102,7 +102,9 @@ def optimize_hyperparameters( import statsmodels.api as sm # need to inherit from callback for this to work - class PyTorchLightningPruningCallbackAdjusted(PyTorchLightningPruningCallback, pl.Callback): # noqa: E501 + class PyTorchLightningPruningCallbackAdjusted( + PyTorchLightningPruningCallback, pl.Callback + ): # noqa: E501 pass if pruner is None: @@ -129,12 +131,16 @@ class PyTorchLightningPruningCallbackAdjusted(PyTorchLightningPruningCallback, p def objective(trial: optuna.Trial) -> float: # Filenames for each trial must be made unique in order to access each checkpoint. checkpoint_callback = ModelCheckpoint( - dirpath=os.path.join(model_path, "trial_{}".format(trial.number)), filename="{epoch}", monitor="val_loss" + dirpath=os.path.join(model_path, "trial_{}".format(trial.number)), + filename="{epoch}", + monitor="val_loss", ) learning_rate_callback = LearningRateMonitor() logger = TensorBoardLogger(log_dir, name="optuna", version=trial.number) - gradient_clip_val = trial.suggest_loguniform("gradient_clip_val", *gradient_clip_val_range) + gradient_clip_val = trial.suggest_loguniform( + "gradient_clip_val", *gradient_clip_val_range + ) default_trainer_kwargs = dict( accelerator="auto", max_epochs=max_epochs, @@ -166,7 +172,9 @@ def objective(trial: optuna.Trial) -> float: min(hidden_continuous_size_range[1], hidden_size), log=True, ), - attention_head_size=trial.suggest_int("attention_head_size", *attention_head_size_range), + attention_head_size=trial.suggest_int( + "attention_head_size", *attention_head_size_range + ), log_interval=-1, **kwargs, ) @@ -191,7 +199,9 @@ def objective(trial: optuna.Trial) -> float: ) loss_finite = np.isfinite(res.results["loss"]) - if loss_finite.sum() > 3: # at least 3 valid values required for learning rate finder + if ( + loss_finite.sum() > 3 + ): # at least 3 valid values required for learning rate finder lr_smoothed, loss_smoothed = sm.nonparametric.lowess( np.asarray(res.results["loss"])[loss_finite], np.asarray(res.results["lr"])[loss_finite], @@ -204,12 +214,18 @@ def objective(trial: optuna.Trial) -> float: optimal_lr = res.results["lr"][optimal_idx] optuna_logger.info(f"Using learning rate of {optimal_lr:.3g}") # add learning rate artificially - model.hparams.learning_rate = trial.suggest_uniform("learning_rate", optimal_lr, optimal_lr) + model.hparams.learning_rate = trial.suggest_uniform( + "learning_rate", optimal_lr, optimal_lr + ) else: - model.hparams.learning_rate = trial.suggest_loguniform("learning_rate", *learning_rate_range) + model.hparams.learning_rate = trial.suggest_loguniform( + "learning_rate", *learning_rate_range + ) # fit - trainer.fit(model, train_dataloaders=train_dataloaders, val_dataloaders=val_dataloaders) + trainer.fit( + model, train_dataloaders=train_dataloaders, val_dataloaders=val_dataloaders + ) # report result return trainer.callback_metrics["val_loss"].item() diff --git a/pytorch_forecasting/utils/_dependencies.py b/pytorch_forecasting/utils/_dependencies.py index 6ac4e6722..e8db215b4 100644 --- a/pytorch_forecasting/utils/_dependencies.py +++ b/pytorch_forecasting/utils/_dependencies.py @@ -57,6 +57,8 @@ def _check_matplotlib(ref="This feature", raise_error=True): pkgs = _get_installed_packages() if raise_error and "matplotlib" not in pkgs: - raise ImportError(f"{ref} requires matplotlib. Please install matplotlib with `pip install matplotlib`.") + raise ImportError( + f"{ref} requires matplotlib. Please install matplotlib with `pip install matplotlib`." + ) return "matplotlib" in pkgs diff --git a/pytorch_forecasting/utils/_utils.py b/pytorch_forecasting/utils/_utils.py index a814c3cb5..27044372d 100644 --- a/pytorch_forecasting/utils/_utils.py +++ b/pytorch_forecasting/utils/_utils.py @@ -42,7 +42,11 @@ def integer_histogram( def groupby_apply( - keys: torch.Tensor, values: torch.Tensor, bins: int = 95, reduction: str = "mean", return_histogram: bool = False + keys: torch.Tensor, + values: torch.Tensor, + bins: int = 95, + reduction: str = "mean", + return_histogram: bool = False, ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: """ Groupby apply for torch tensors @@ -64,16 +68,24 @@ def groupby_apply( else: raise ValueError(f"Unknown reduction '{reduction}'") uniques, counts = keys.unique(return_counts=True) - groups = torch.stack([reduce(item) for item in torch.split_with_sizes(values, tuple(counts))]) - reduced = torch.zeros(bins, dtype=values.dtype, device=values.device).scatter(dim=0, index=uniques, src=groups) + groups = torch.stack( + [reduce(item) for item in torch.split_with_sizes(values, tuple(counts))] + ) + reduced = torch.zeros(bins, dtype=values.dtype, device=values.device).scatter( + dim=0, index=uniques, src=groups + ) if return_histogram: - hist = torch.zeros(bins, dtype=torch.long, device=values.device).scatter(dim=0, index=uniques, src=counts) + hist = torch.zeros(bins, dtype=torch.long, device=values.device).scatter( + dim=0, index=uniques, src=counts + ) return reduced, hist else: return reduced -def profile(function: Callable, profile_fname: str, filter: str = "", period=0.0001, **kwargs): +def profile( + function: Callable, profile_fname: str, filter: str = "", period=0.0001, **kwargs +): """ Profile a given function with ``vmprof``. @@ -119,7 +131,9 @@ def get_embedding_size(n: int, max_size: int = 100) -> int: return 1 -def create_mask(size: int, lengths: torch.LongTensor, inverse: bool = False) -> torch.BoolTensor: +def create_mask( + size: int, lengths: torch.LongTensor, inverse: bool = False +) -> torch.BoolTensor: """ Create boolean masks of shape len(lenghts) x size. @@ -135,9 +149,13 @@ def create_mask(size: int, lengths: torch.LongTensor, inverse: bool = False) -> """ if inverse: # return where values are - return torch.arange(size, device=lengths.device).unsqueeze(0) < lengths.unsqueeze(-1) + return torch.arange(size, device=lengths.device).unsqueeze( + 0 + ) < lengths.unsqueeze(-1) else: # return where no values are - return torch.arange(size, device=lengths.device).unsqueeze(0) >= lengths.unsqueeze(-1) + return torch.arange(size, device=lengths.device).unsqueeze( + 0 + ) >= lengths.unsqueeze(-1) _NEXT_FAST_LEN = {} @@ -206,12 +224,16 @@ def autocorrelation(input, dim=0): # truncate and normalize the result, then transpose back to original shape autocorr = autocorr[..., :N] - autocorr = autocorr / torch.tensor(range(N, 0, -1), dtype=input.dtype, device=input.device) + autocorr = autocorr / torch.tensor( + range(N, 0, -1), dtype=input.dtype, device=input.device + ) autocorr = autocorr / autocorr[..., :1] return autocorr.transpose(dim, -1) -def unpack_sequence(sequence: Union[torch.Tensor, rnn.PackedSequence]) -> Tuple[torch.Tensor, torch.Tensor]: +def unpack_sequence( + sequence: Union[torch.Tensor, rnn.PackedSequence] +) -> Tuple[torch.Tensor, torch.Tensor]: """ Unpack RNN sequence. @@ -227,7 +249,9 @@ def unpack_sequence(sequence: Union[torch.Tensor, rnn.PackedSequence]) -> Tuple[ # batch sizes reside on the CPU by default -> we need to bring them to GPU lengths = lengths.to(sequence.device) else: - lengths = torch.ones(sequence.size(0), device=sequence.device, dtype=torch.long) * sequence.size(1) + lengths = torch.ones( + sequence.size(0), device=sequence.device, dtype=torch.long + ) * sequence.size(1) return sequence, lengths @@ -250,14 +274,18 @@ def concat_sequences( return torch.cat(sequences, dim=1) elif isinstance(sequences[0], (tuple, list)): return tuple( - concat_sequences([sequences[ii][i] for ii in range(len(sequences))]) for i in range(len(sequences[0])) + concat_sequences([sequences[ii][i] for ii in range(len(sequences))]) + for i in range(len(sequences[0])) ) else: raise ValueError("Unsupported sequence type") def padded_stack( - tensors: List[torch.Tensor], side: str = "right", mode: str = "constant", value: Union[int, float] = 0 + tensors: List[torch.Tensor], + side: str = "right", + mode: str = "constant", + value: Union[int, float] = 0, ) -> torch.Tensor: """ Stack tensors along first dimension and pad them along last dimension to ensure their size is equal. @@ -283,7 +311,11 @@ def make_padding(pad): out = torch.stack( [ - F.pad(x, make_padding(full_size - x.size(-1)), mode=mode, value=value) if full_size - x.size(-1) > 0 else x + ( + F.pad(x, make_padding(full_size - x.size(-1)), mode=mode, value=value) + if full_size - x.size(-1) > 0 + else x + ) for x in tensors ], dim=0, @@ -485,7 +517,9 @@ def detach( return x -def masked_op(tensor: torch.Tensor, op: str = "mean", dim: int = 0, mask: torch.Tensor = None) -> torch.Tensor: +def masked_op( + tensor: torch.Tensor, op: str = "mean", dim: int = 0, mask: torch.Tensor = None +) -> torch.Tensor: """Calculate operation on masked tensor. Args: @@ -531,14 +565,21 @@ def repr_class( extra_attributes = {} # get attributes if isinstance(attributes, (tuple, list)): - attributes = {name: getattr(obj, name) for name in attributes if hasattr(obj, name)} + attributes = { + name: getattr(obj, name) for name in attributes if hasattr(obj, name) + } attributes_strings = [f"{name}={repr(value)}" for name, value in attributes.items()] # get header header_name = obj.__class__.__name__ # add extra attributes if len(extra_attributes) > 0: - extra_attributes_strings = [f"{name}={repr(value)}" for name, value in extra_attributes.items()] - if len(header_name) + 2 + len(", ".join(extra_attributes_strings)) > max_characters_before_break: + extra_attributes_strings = [ + f"{name}={repr(value)}" for name, value in extra_attributes.items() + ] + if ( + len(header_name) + 2 + len(", ".join(extra_attributes_strings)) + > max_characters_before_break + ): header = f"{header_name}[\n\t" + ",\n\t".join(attributes_strings) + "\n](" else: header = f"{header_name}[{', '.join(extra_attributes_strings)}](" @@ -547,7 +588,10 @@ def repr_class( # create final representation attributes_string = ", ".join(attributes_strings) - if len(attributes_string) + len(header.split("\n")[-1]) + 1 > max_characters_before_break: + if ( + len(attributes_string) + len(header.split("\n")[-1]) + 1 + > max_characters_before_break + ): attributes_string = "\n\t" + ",\n\t".join(attributes_strings) + "\n" return f"{header}{attributes_string})" @@ -565,4 +609,10 @@ def extra_repr(self) -> str: return "\t" + repr(self.hparams).replace("\n", "\n\t") else: attributes = list(inspect.signature(self.__class__).parameters.keys()) - return ", ".join([f"{name}={repr(getattr(self, name))}" for name in attributes if hasattr(self, name)]) + return ", ".join( + [ + f"{name}={repr(getattr(self, name))}" + for name in attributes + if hasattr(self, name) + ] + ) diff --git a/tests/conftest.py b/tests/conftest.py index 58dd0189e..608f06550 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -47,7 +47,9 @@ def test_data(): "beer_capital", "music_fest", ] - data[special_days] = data[special_days].apply(lambda x: x.map({0: "", 1: x.name})).astype("category") + data[special_days] = ( + data[special_days].apply(lambda x: x.map({0: "", 1: x.name})).astype("category") + ) data = data[lambda x: x.time_idx < 10] # downsample return data diff --git a/tests/test_data/test_encoders.py b/tests/test_data/test_encoders.py index 78967e467..b88204113 100644 --- a/tests/test_data/test_encoders.py +++ b/tests/test_data/test_encoders.py @@ -37,9 +37,15 @@ def test_NaNLabelEncoder(data, allow_nan): with pytest.raises(KeyError): encoder.transform(transform_data) else: - assert encoder.transform(transform_data)[0] == 0, "First value should be translated to 0 if nan" - assert encoder.transform(transform_data)[-1] == 0, "Last value should be translated to 0 if nan" - assert encoder.transform(fit_data)[0] > 0, "First value should not be 0 if not nan" + assert ( + encoder.transform(transform_data)[0] == 0 + ), "First value should be translated to 0 if nan" + assert ( + encoder.transform(transform_data)[-1] == 0 + ), "Last value should be translated to 0 if nan" + assert ( + encoder.transform(fit_data)[0] > 0 + ), "First value should not be 0 if not nan" def test_NaNLabelEncoder_add(): @@ -80,11 +86,16 @@ def test_EncoderNormalizer(kwargs): if kwargs.get("transformation") in ["relu", "softplus", "log1p"]: assert ( - normalizer.inverse_transform(torch.as_tensor(normalizer.fit_transform(data))) >= 0 + normalizer.inverse_transform( + torch.as_tensor(normalizer.fit_transform(data)) + ) + >= 0 ).all(), "Inverse transform should yield only positive values" else: assert torch.isclose( - normalizer.inverse_transform(torch.as_tensor(normalizer.fit_transform(data))), + normalizer.inverse_transform( + torch.as_tensor(normalizer.fit_transform(data)) + ), torch.as_tensor(data), atol=1e-5, ).all(), "Inverse transform should reverse transform" @@ -107,7 +118,9 @@ def test_EncoderNormalizer(kwargs): ) def test_GroupNormalizer(kwargs, groups): data = pd.DataFrame(dict(a=[1, 1, 2, 2, 3], b=[1.1, 1.1, 1.0, 0.0, 1.1])) - defaults = dict(method="standard", transformation=None, center=True, scale_by_group=False) + defaults = dict( + method="standard", transformation=None, center=True, scale_by_group=False + ) defaults.update(kwargs) kwargs = defaults kwargs["groups"] = groups @@ -122,7 +135,9 @@ def test_GroupNormalizer(kwargs, groups): ) if kwargs.get("transformation") in ["relu", "softplus", "log1p", "log"]: - assert (normalizer(test_data) >= 0).all(), "Inverse transform should yield only positive values" + assert ( + normalizer(test_data) >= 0 + ).all(), "Inverse transform should yield only positive values" else: assert torch.isclose( normalizer(test_data), torch.tensor(data.b.iloc[0]), atol=1e-5 @@ -136,7 +151,11 @@ def test_EncoderNormalizer_with_limited_history(): def test_MultiNormalizer_fitted(): - data = pd.DataFrame(dict(a=[1, 1, 2, 2, 3], b=[1.1, 1.1, 1.0, 5.0, 1.1], c=[1.1, 1.1, 1.0, 5.0, 1.1])) + data = pd.DataFrame( + dict( + a=[1, 1, 2, 2, 3], b=[1.1, 1.1, 1.0, 5.0, 1.1], c=[1.1, 1.1, 1.0, 5.0, 1.1] + ) + ) normalizer = MultiNormalizer([GroupNormalizer(groups=["a"]), TorchNormalizer()]) @@ -160,8 +179,17 @@ def test_TorchNormalizer_dtype_consistency(): """ parameters = torch.tensor([[[366.4587]]]) target_scale = torch.tensor([[427875.7500, 80367.4766]], dtype=torch.float64) - assert TorchNormalizer()(dict(prediction=parameters, target_scale=target_scale)).dtype == torch.float32 - assert TorchNormalizer().transform(parameters, target_scale=target_scale).dtype == torch.float32 + assert ( + TorchNormalizer()(dict(prediction=parameters, target_scale=target_scale)).dtype + == torch.float32 + ) + assert ( + TorchNormalizer().transform(parameters, target_scale=target_scale).dtype + == torch.float32 + ) y = np.array([1, 2, 3], dtype=np.float32) - assert TorchNormalizer(method="identity").fit(y).get_parameters().dtype == torch.float32 + assert ( + TorchNormalizer(method="identity").fit(y).get_parameters().dtype + == torch.float32 + ) diff --git a/tests/test_data/test_samplers.py b/tests/test_data/test_samplers.py index d7bdf2004..cb8edbdbe 100644 --- a/tests/test_data/test_samplers.py +++ b/tests/test_data/test_samplers.py @@ -13,14 +13,22 @@ (True, False, False, 1000), ], ) -def test_TimeSynchronizedBatchSampler(test_dataset, shuffle, drop_last, as_string, batch_size): +def test_TimeSynchronizedBatchSampler( + test_dataset, shuffle, drop_last, as_string, batch_size +): if as_string: dataloader = test_dataset.to_dataloader( - batch_sampler="synchronized", shuffle=shuffle, drop_last=drop_last, batch_size=batch_size + batch_sampler="synchronized", + shuffle=shuffle, + drop_last=drop_last, + batch_size=batch_size, ) else: sampler = TimeSynchronizedBatchSampler( - SequentialSampler(test_dataset), shuffle=shuffle, drop_last=drop_last, batch_size=batch_size + SequentialSampler(test_dataset), + shuffle=shuffle, + drop_last=drop_last, + batch_size=batch_size, ) dataloader = test_dataset.to_dataloader(batch_sampler=sampler) diff --git a/tests/test_data/test_timeseries.py b/tests/test_data/test_timeseries.py index fcc556141..ff9d3e25d 100644 --- a/tests/test_data/test_timeseries.py +++ b/tests/test_data/test_timeseries.py @@ -9,7 +9,12 @@ import torch from torch.utils.data.sampler import SequentialSampler -from pytorch_forecasting.data import EncoderNormalizer, GroupNormalizer, NaNLabelEncoder, TimeSeriesDataSet +from pytorch_forecasting.data import ( + EncoderNormalizer, + GroupNormalizer, + NaNLabelEncoder, + TimeSeriesDataSet, +) from pytorch_forecasting.data.encoders import MultiNormalizer, TorchNormalizer from pytorch_forecasting.data.timeseries import _find_end_indices from pytorch_forecasting.utils import to_list @@ -17,10 +22,37 @@ def test_find_end_indices(): diffs = np.array([1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1]) - max_lengths = np.array([4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 1, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 1]) + max_lengths = np.array( + [4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 1, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 1] + ) ends, missings = _find_end_indices(diffs, max_lengths, min_length=3) - ends_test = np.array([3, 4, 4, 5, 6, 8, 9, 10, 10, 10, 10, 14, 15, 15, 16, 17, 19, 20, 21, 21, 21, 21]) + ends_test = np.array( + [ + 3, + 4, + 4, + 5, + 6, + 8, + 9, + 10, + 10, + 10, + 10, + 14, + 15, + 15, + 16, + 17, + 19, + 20, + 21, + 21, + 21, + 21, + ] + ) missings_test = np.array([[0, 2], [5, 7], [11, 13], [16, 18]]) np.testing.assert_array_equal(ends, ends_test) np.testing.assert_array_equal(missings, missings_test) @@ -28,7 +60,11 @@ def test_find_end_indices(): def test_raise_short_encoder_length(test_data): with pytest.warns(UserWarning): - test_data = test_data[lambda x: ~((x.agency == "Agency_22") & (x.sku == "SKU_01") & (x.time_idx > 3))] + test_data = test_data[ + lambda x: ~( + (x.agency == "Agency_22") & (x.sku == "SKU_01") & (x.time_idx > 3) + ) + ] TimeSeriesDataSet( test_data, time_idx="time_idx", @@ -73,7 +109,9 @@ def check_dataloader_output(dataset: TimeSeriesDataSet, out: Dict[str, torch.Ten assert not torch.isnan(vi).any(), f"Values for {k} should not be nan" # check weight - assert y[1] is None or isinstance(y[1], torch.Tensor), "weights should be none or tensor" + assert y[1] is None or isinstance( + y[1], torch.Tensor + ), "weights should be none or tensor" if isinstance(y[1], torch.Tensor): assert torch.isfinite(y[1]).all(), "Values for weight should be finite" assert not torch.isnan(y[1]).any(), "Values for weight should not be nan" @@ -114,8 +152,22 @@ def check_dataloader_output(dataset: TimeSeriesDataSet, out: Dict[str, torch.Ten ] ), ), - dict(time_varying_known_reals=["time_idx", "price_regular", "discount_in_percent"]), - dict(time_varying_unknown_reals=["volume", "log_volume", "industry_volume", "soda_volume", "avg_max_temp"]), + dict( + time_varying_known_reals=[ + "time_idx", + "price_regular", + "discount_in_percent", + ] + ), + dict( + time_varying_unknown_reals=[ + "volume", + "log_volume", + "industry_volume", + "soda_volume", + "avg_max_temp", + ] + ), dict( target_normalizer=GroupNormalizer( groups=["agency", "sku"], @@ -137,7 +189,10 @@ def check_dataloader_output(dataset: TimeSeriesDataSet, out: Dict[str, torch.Ten time_varying_known_categoricals=["month"], time_varying_known_reals=["time_idx", "price_regular"], ), - dict(categorical_encoders={"month": NaNLabelEncoder(add_nan=True)}, time_varying_known_categoricals=["month"]), + dict( + categorical_encoders={"month": NaNLabelEncoder(add_nan=True)}, + time_varying_known_categoricals=["month"], + ), dict(constant_fill_strategy=dict(volume=0.0), allow_missing_timesteps=True), dict(target_normalizer=None), ], @@ -194,7 +249,10 @@ def test_from_dataset_equivalence(test_data): predict=True, ) # ensure validation1 and validation2 datasets are exactly the same despite different data inputs - for v1, v2 in zip(iter(validation1.to_dataloader(train=False)), iter(validation2.to_dataloader(train=False))): + for v1, v2 in zip( + iter(validation1.to_dataloader(train=False)), + iter(validation2.to_dataloader(train=False)), + ): for k in v1[0].keys(): if isinstance(v1[0][k], (tuple, list)): assert len(v1[0][k]) == len(v2[0][k]) @@ -216,7 +274,11 @@ def test_dataset_index(test_dataset): @pytest.mark.parametrize("min_prediction_idx", [0, 1, 3, 7]) def test_min_prediction_idx(test_dataset, test_data, min_prediction_idx): dataset = TimeSeriesDataSet.from_dataset( - test_dataset, test_data, min_prediction_idx=min_prediction_idx, min_encoder_length=1, max_prediction_length=10 + test_dataset, + test_data, + min_prediction_idx=min_prediction_idx, + min_encoder_length=1, + max_prediction_length=10, ) for x, _ in iter(dataset.to_dataloader(num_workers=0, batch_size=1000)): @@ -250,7 +312,10 @@ def test_overwrite_values(test_dataset, value, variable, target): output_name_suffix = "cat" if target == "all": - output_names = [f"encoder_{output_name_suffix}", f"decoder_{output_name_suffix}"] + output_names = [ + f"encoder_{output_name_suffix}", + f"decoder_{output_name_suffix}", + ] else: output_names = [f"{target}_{output_name_suffix}"] @@ -269,7 +334,9 @@ def test_overwrite_values(test_dataset, value, variable, target): for name in outputs[0].keys(): changed = torch.isclose(outputs[0][name], control_outputs[0][name]).all() assert changed, f"Output {name} should be reset" - assert torch.isclose(outputs[1][0], control_outputs[1][0]).all(), "Target should be reset" + assert torch.isclose( + outputs[1][0], control_outputs[1][0] + ).all(), "Target should be reset" @pytest.mark.parametrize( @@ -277,7 +344,9 @@ def test_overwrite_values(test_dataset, value, variable, target): [ {}, dict( - target_normalizer=GroupNormalizer(groups=["agency", "sku"], transformation="log1p", scale_by_group=True), + target_normalizer=GroupNormalizer( + groups=["agency", "sku"], transformation="log1p", scale_by_group=True + ), ), ], ) @@ -293,7 +362,9 @@ def test_new_group_ids(test_data, kwargs): max_prediction_length=2, min_prediction_length=1, min_encoder_length=1, - categorical_encoders=dict(agency=NaNLabelEncoder(add_nan=True), sku=NaNLabelEncoder(add_nan=True)), + categorical_encoders=dict( + agency=NaNLabelEncoder(add_nan=True), sku=NaNLabelEncoder(add_nan=True) + ), **kwargs, ) @@ -343,7 +414,9 @@ def test_encoder_normalizer_for_covariates(test_data): [ {}, dict( - target_normalizer=MultiNormalizer(normalizers=[TorchNormalizer(), EncoderNormalizer()]), + target_normalizer=MultiNormalizer( + normalizers=[TorchNormalizer(), EncoderNormalizer()] + ), ), dict(add_target_scales=True), dict(weight="volume"), @@ -421,15 +494,24 @@ def test_lagged_variables(test_data, kwargs): lag_idx = vars.index(f"{name}_lagged_by_{lag}") target = x[..., target_idx][:, 0] lagged_target = torch.roll(x[..., lag_idx], -lag, dims=1)[:, 0] - assert torch.isclose(target, lagged_target).all(), "lagged target must be the same as non-lagged target" + assert torch.isclose( + target, lagged_target + ).all(), "lagged target must be the same as non-lagged target" @pytest.mark.parametrize( "agency,first_prediction_idx,should_raise", - [("Agency_01", 0, False), ("xxxxx", 0, True), ("Agency_01", 100, True), ("Agency_01", 4, False)], + [ + ("Agency_01", 0, False), + ("xxxxx", 0, True), + ("Agency_01", 100, True), + ("Agency_01", 4, False), + ], ) def test_filter_data(test_dataset, agency, first_prediction_idx, should_raise): - func = lambda x: (x.agency == agency) & (x.time_idx_first_prediction >= first_prediction_idx) + func = lambda x: (x.agency == agency) & ( + x.time_idx_first_prediction >= first_prediction_idx + ) if should_raise: with pytest.raises(ValueError): test_dataset.filter(func) @@ -441,7 +523,9 @@ def test_filter_data(test_dataset, agency, first_prediction_idx, should_raise): for x, _ in iter(filtered_dataset.to_dataloader()): index = test_dataset.x_to_index(x) assert (index["agency"] == agency).all(), "Agency filter has failed" - assert index["time_idx"].min() == first_prediction_idx, "First prediction filter has failed" + assert ( + index["time_idx"].min() == first_prediction_idx + ), "First prediction filter has failed" def test_graph_sampler(test_dataset): @@ -475,7 +559,10 @@ def distance_to_weights(dist): indices = self.sampler.data_source.index.iloc[self._groups[name]] selected_pos = indices["index_start"].iloc[sub_group_idx] # remove selected sample - indices = indices[lambda x: x["sequence_id"] != indices["sequence_id"].iloc[sub_group_idx]] + indices = indices[ + lambda x: x["sequence_id"] + != indices["sequence_id"].iloc[sub_group_idx] + ] # filter duplicate timeseries # indices = indices.sort_values("sequence_length").drop_duplicates("sequence_id", keep="last") @@ -495,12 +582,17 @@ def distance_to_weights(dist): # sample random subset of neighborhood batch_size = min(len(relevant_indices), self.batch_size - 1) batch_indices = [selected_index] + np.random.choice( - relevant_indices, p=sample_weights / sample_weights.sum(), replace=False, size=batch_size + relevant_indices, + p=sample_weights / sample_weights.sum(), + replace=False, + size=batch_size, ).tolist() yield batch_indices dl = test_dataset.to_dataloader( - batch_sampler=NeighborhoodSampler(SequentialSampler(test_dataset), batch_size=200, shuffle=True) + batch_sampler=NeighborhoodSampler( + SequentialSampler(test_dataset), batch_size=200, shuffle=True + ) ) for idx, a in enumerate(dl): print(a[0]["groups"].shape) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 9b5d72b29..3c7031d31 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -22,7 +22,9 @@ def test_composite_metric(): metric1 = SMAPE() metric2 = MAE() combined_metric = 1.0 * (0.3 * metric1 + 2.0 * metric2 + metric1) - assert isinstance(combined_metric, CompositeMetric), "combined metric should be composite metric" + assert isinstance( + combined_metric, CompositeMetric + ), "combined metric should be composite metric" # test repr() repr(combined_metric) @@ -45,15 +47,23 @@ def test_composite_metric(): @pytest.mark.parametrize( "decoder_lengths,y", [ - (torch.tensor([1, 2], dtype=torch.long), torch.tensor([[0.0, 1.0], [5.0, 1.0]])), + ( + torch.tensor([1, 2], dtype=torch.long), + torch.tensor([[0.0, 1.0], [5.0, 1.0]]), + ), (2 * torch.ones(2, dtype=torch.long), torch.tensor([[0.0, 1.0], [5.0, 1.0]])), - (2 * torch.ones(2, dtype=torch.long), torch.tensor([[[0.0, 1.0], [1.0, 1.0]], [[5.0, 1.0], [1.0, 2.0]]])), + ( + 2 * torch.ones(2, dtype=torch.long), + torch.tensor([[[0.0, 1.0], [1.0, 1.0]], [[5.0, 1.0], [1.0, 2.0]]]), + ), ], ) def test_aggregation_metric(decoder_lengths, y): y_pred = torch.tensor([[0.0, 2.0], [4.0, 3.0]]) if (decoder_lengths != y_pred.size(-1)).any(): - y_packed = rnn.pack_padded_sequence(y, lengths=decoder_lengths, batch_first=True, enforce_sorted=False) + y_packed = rnn.pack_padded_sequence( + y, lengths=decoder_lengths, batch_first=True, enforce_sorted=False + ) else: y_packed = y @@ -75,7 +85,9 @@ def test_none_reduction(): @pytest.mark.parametrize( ["center", "transformation"], - itertools.product([True, False], ["log", "log1p", "softplus", "relu", "logit", None]), + itertools.product( + [True, False], ["log", "log1p", "softplus", "relu", "logit", None] + ), ) def test_NormalDistributionLoss(center, transformation): mean = 1.0 @@ -95,7 +107,9 @@ def test_NormalDistributionLoss(center, transformation): dim=-1, ) loss = NormalDistributionLoss() - rescaled_parameters = loss.rescale_parameters(parameters, target_scale=target_scale, encoder=normalizer) + rescaled_parameters = loss.rescale_parameters( + parameters, target_scale=target_scale, encoder=normalizer + ) samples = loss.sample(rescaled_parameters, 1) assert torch.isclose(target.mean(), samples.mean(), atol=0.1, rtol=0.5) if center: # if not centered, softplus distorts std too much for testing @@ -104,13 +118,17 @@ def test_NormalDistributionLoss(center, transformation): @pytest.mark.parametrize( ["center", "transformation"], - itertools.product([True, False], ["log", "log1p", "softplus", "relu", "logit", None]), + itertools.product( + [True, False], ["log", "log1p", "softplus", "relu", "logit", None] + ), ) def test_LogNormalDistributionLoss(center, transformation): mean = 2.0 std = 0.2 n = 100000 - target = LogNormalDistributionLoss.distribution_class(loc=mean, scale=std).sample((n,)) + target = LogNormalDistributionLoss.distribution_class(loc=mean, scale=std).sample( + (n,) + ) normalizer = TorchNormalizer(center=center, transformation=transformation) normalized_target = normalizer.fit_transform(target).view(1, -1) target_scale = normalizer.get_parameters().unsqueeze(0) @@ -123,35 +141,55 @@ def test_LogNormalDistributionLoss(center, transformation): if transformation not in ["log", "log1p"]: with pytest.raises(AssertionError): - rescaled_parameters = loss.rescale_parameters(parameters, target_scale=target_scale, encoder=normalizer) + rescaled_parameters = loss.rescale_parameters( + parameters, target_scale=target_scale, encoder=normalizer + ) else: - rescaled_parameters = loss.rescale_parameters(parameters, target_scale=target_scale, encoder=normalizer) + rescaled_parameters = loss.rescale_parameters( + parameters, target_scale=target_scale, encoder=normalizer + ) samples = loss.sample(rescaled_parameters, 1) - assert torch.isclose(torch.as_tensor(mean), samples.log().mean(), atol=0.1, rtol=0.2) + assert torch.isclose( + torch.as_tensor(mean), samples.log().mean(), atol=0.1, rtol=0.2 + ) if center: # if not centered, softplus distorts std too much for testing - assert torch.isclose(torch.as_tensor(std), samples.log().std(), atol=0.1, rtol=0.7) + assert torch.isclose( + torch.as_tensor(std), samples.log().std(), atol=0.1, rtol=0.7 + ) @pytest.mark.parametrize( ["center", "transformation"], - itertools.product([True, False], ["log", "log1p", "softplus", "relu", "logit", None]), + itertools.product( + [True, False], ["log", "log1p", "softplus", "relu", "logit", None] + ), ) def test_NegativeBinomialDistributionLoss(center, transformation): mean = 100.0 shape = 1.0 n = 100000 - target = NegativeBinomialDistributionLoss().map_x_to_distribution(torch.tensor([mean, shape])).sample((n,)) + target = ( + NegativeBinomialDistributionLoss() + .map_x_to_distribution(torch.tensor([mean, shape])) + .sample((n,)) + ) normalizer = TorchNormalizer(center=center, transformation=transformation) normalized_target = normalizer.fit_transform(target).view(1, -1) target_scale = normalizer.get_parameters().unsqueeze(0) - parameters = torch.stack([normalized_target, 1.0 * torch.ones_like(normalized_target)], dim=-1) + parameters = torch.stack( + [normalized_target, 1.0 * torch.ones_like(normalized_target)], dim=-1 + ) loss = NegativeBinomialDistributionLoss() if center or transformation in ["logit", "log"]: with pytest.raises(AssertionError): - rescaled_parameters = loss.rescale_parameters(parameters, target_scale=target_scale, encoder=normalizer) + rescaled_parameters = loss.rescale_parameters( + parameters, target_scale=target_scale, encoder=normalizer + ) else: - rescaled_parameters = loss.rescale_parameters(parameters, target_scale=target_scale, encoder=normalizer) + rescaled_parameters = loss.rescale_parameters( + parameters, target_scale=target_scale, encoder=normalizer + ) samples = loss.sample(rescaled_parameters, 1) assert torch.isclose(target.mean(), samples.mean(), atol=0.1, rtol=0.5) assert torch.isclose(target.std(), samples.std(), atol=0.1, rtol=0.5) @@ -159,32 +197,50 @@ def test_NegativeBinomialDistributionLoss(center, transformation): @pytest.mark.parametrize( ["center", "transformation"], - itertools.product([True, False], ["log", "log1p", "softplus", "relu", "logit", None]), + itertools.product( + [True, False], ["log", "log1p", "softplus", "relu", "logit", None] + ), ) def test_BetaDistributionLoss(center, transformation): initial_mean = 0.1 initial_shape = 10 n = 100000 - target = BetaDistributionLoss().map_x_to_distribution(torch.tensor([initial_mean, initial_shape])).sample((n,)) + target = ( + BetaDistributionLoss() + .map_x_to_distribution(torch.tensor([initial_mean, initial_shape])) + .sample((n,)) + ) normalizer = TorchNormalizer(center=center, transformation=transformation) normalized_target = normalizer.fit_transform(target).view(1, -1) target_scale = normalizer.get_parameters().unsqueeze(0) - parameters = torch.stack([normalized_target, 1.0 * torch.ones_like(normalized_target)], dim=-1) + parameters = torch.stack( + [normalized_target, 1.0 * torch.ones_like(normalized_target)], dim=-1 + ) loss = BetaDistributionLoss() if transformation not in ["logit"] or not center: with pytest.raises(AssertionError): - loss.rescale_parameters(parameters, target_scale=target_scale, encoder=normalizer) + loss.rescale_parameters( + parameters, target_scale=target_scale, encoder=normalizer + ) else: - rescaled_parameters = loss.rescale_parameters(parameters, target_scale=target_scale, encoder=normalizer) + rescaled_parameters = loss.rescale_parameters( + parameters, target_scale=target_scale, encoder=normalizer + ) samples = loss.sample(rescaled_parameters, 1) - assert torch.isclose(torch.as_tensor(initial_mean), samples.mean(), atol=0.01, rtol=0.01) # mean=0.1 - assert torch.isclose(target.std(), samples.std(), atol=0.02, rtol=0.3) # std=0.09 + assert torch.isclose( + torch.as_tensor(initial_mean), samples.mean(), atol=0.01, rtol=0.01 + ) # mean=0.1 + assert torch.isclose( + target.std(), samples.std(), atol=0.02, rtol=0.3 + ) # std=0.09 @pytest.mark.parametrize( ["center", "transformation"], - itertools.product([True, False], ["log", "log1p", "softplus", "relu", "logit", None]), + itertools.product( + [True, False], ["log", "log1p", "softplus", "relu", "logit", None] + ), ) def test_MultivariateNormalDistributionLoss(center, transformation): normalizer = TorchNormalizer(center=center, transformation=transformation) @@ -195,18 +251,26 @@ def test_MultivariateNormalDistributionLoss(center, transformation): n = 1000000 loss = MultivariateNormalDistributionLoss() - target = loss.distribution_class(loc=mean, cov_diag=std**2, cov_factor=cov_factor).sample((n,)) + target = loss.distribution_class( + loc=mean, cov_diag=std**2, cov_factor=cov_factor + ).sample((n,)) target = normalizer.inverse_preprocess(target) target = target[:, 0] normalized_target = normalizer.fit_transform(target).view(1, -1) target_scale = normalizer.get_parameters().unsqueeze(0) scale = torch.ones_like(normalized_target) * normalized_target.std() parameters = torch.concat( - [normalized_target[..., None], scale[..., None], torch.zeros((1, normalized_target.size(1), loss.rank))], + [ + normalized_target[..., None], + scale[..., None], + torch.zeros((1, normalized_target.size(1), loss.rank)), + ], dim=-1, ) - rescaled_parameters = loss.rescale_parameters(parameters, target_scale=target_scale, encoder=normalizer) + rescaled_parameters = loss.rescale_parameters( + parameters, target_scale=target_scale, encoder=normalizer + ) samples = loss.sample(rescaled_parameters, 1) assert torch.isclose(target.mean(), samples.mean(), atol=3.0, rtol=0.5) if center: # if not centered, softplus distorts std too much for testing diff --git a/tests/test_models/conftest.py b/tests/test_models/conftest.py index 97e94dd0d..e276446a6 100644 --- a/tests/test_models/conftest.py +++ b/tests/test_models/conftest.py @@ -42,11 +42,15 @@ def data_with_covariates(): "beer_capital", "music_fest", ] - data[special_days] = data[special_days].apply(lambda x: x.map({0: "", 1: x.name})).astype("category") + data[special_days] = ( + data[special_days].apply(lambda x: x.map({0: "", 1: x.name})).astype("category") + ) data = data.astype(dict(industry_volume=float)) # select data subset - data = data[lambda x: x.sku.isin(data.sku.unique()[:2])][lambda x: x.agency.isin(data.agency.unique()[:2])] + data = data[lambda x: x.sku.isin(data.sku.unique()[:2])][ + lambda x: x.agency.isin(data.agency.unique()[:2]) + ] # default target data["target"] = data["volume"].clip(1e-3, 1.0) @@ -73,7 +77,9 @@ def make_dataloaders(data_with_covariates, **kwargs): ) validation = TimeSeriesDataSet.from_dataset( - training, data_with_covariates.copy(), min_prediction_idx=training.index.time.max() + 1 + training, + data_with_covariates.copy(), + min_prediction_idx=training.index.time.max() + 1, ) train_dataloader = training.to_dataloader(train=True, batch_size=2, num_workers=0) val_dataloader = validation.to_dataloader(train=False, batch_size=2, num_workers=0) @@ -105,9 +111,21 @@ def make_dataloaders(data_with_covariates, **kwargs): "music_fest", ] ), - time_varying_known_reals=["time_idx", "price_regular", "price_actual", "discount", "discount_in_percent"], + time_varying_known_reals=[ + "time_idx", + "price_regular", + "price_actual", + "discount", + "discount_in_percent", + ], time_varying_unknown_categoricals=[], - time_varying_unknown_reals=["volume", "log_volume", "industry_volume", "soda_volume", "avg_max_temp"], + time_varying_unknown_reals=[ + "volume", + "log_volume", + "industry_volume", + "soda_volume", + "avg_max_temp", + ], constant_fill_strategy={"volume": 0}, categorical_encoders={"sku": NaNLabelEncoder(add_nan=True)}, ), @@ -115,12 +133,18 @@ def make_dataloaders(data_with_covariates, **kwargs): dict(randomize_length=True, min_encoder_length=2), dict(target_normalizer=EncoderNormalizer(), min_encoder_length=2), dict(target_normalizer=GroupNormalizer(transformation="log1p")), - dict(target_normalizer=GroupNormalizer(groups=["agency", "sku"], transformation="softplus", center=False)), + dict( + target_normalizer=GroupNormalizer( + groups=["agency", "sku"], transformation="softplus", center=False + ) + ), dict(target="agency"), # test multiple targets dict(target=["industry_volume", "volume"]), dict(target=["agency", "volume"]), - dict(target=["agency", "volume"], min_encoder_length=1, min_prediction_length=1), + dict( + target=["agency", "volume"], min_encoder_length=1, min_prediction_length=1 + ), dict(target=["agency", "volume"], weight="volume"), # test weights dict(target="volume", weight="volume"), @@ -153,9 +177,22 @@ def dataloaders_with_different_encoder_decoder_length(data_with_covariates): "music_fest", ] ), - time_varying_known_reals=["time_idx", "price_regular", "price_actual", "discount", "discount_in_percent"], + time_varying_known_reals=[ + "time_idx", + "price_regular", + "price_actual", + "discount", + "discount_in_percent", + ], time_varying_unknown_categoricals=[], - time_varying_unknown_reals=["target", "volume", "log_volume", "industry_volume", "soda_volume", "avg_max_temp"], + time_varying_unknown_reals=[ + "target", + "volume", + "log_volume", + "industry_volume", + "soda_volume", + "avg_max_temp", + ], static_categoricals=["agency"], add_relative_time_idx=False, target_normalizer=GroupNormalizer(groups=["agency", "sku"], center=False), @@ -212,8 +249,14 @@ def dataloaders_fixed_window_without_covariates(): stop_randomization=True, ) batch_size = 2 - train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0) - val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0) - test_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0) + train_dataloader = training.to_dataloader( + train=True, batch_size=batch_size, num_workers=0 + ) + val_dataloader = validation.to_dataloader( + train=False, batch_size=batch_size, num_workers=0 + ) + test_dataloader = validation.to_dataloader( + train=False, batch_size=batch_size, num_workers=0 + ) return dict(train=train_dataloader, val=val_dataloader, test=test_dataloader) diff --git a/tests/test_models/test_deepar.py b/tests/test_models/test_deepar.py index c9af3c2f1..2204bb59b 100644 --- a/tests/test_models/test_deepar.py +++ b/tests/test_models/test_deepar.py @@ -42,13 +42,17 @@ def _integration( add_relative_time_idx=True, ) data_loader_default_kwargs.update(data_loader_kwargs) - dataloaders_with_covariates = make_dataloaders(data_with_covariates, **data_loader_default_kwargs) + dataloaders_with_covariates = make_dataloaders( + data_with_covariates, **data_loader_default_kwargs + ) train_dataloader = dataloaders_with_covariates["train"] val_dataloader = dataloaders_with_covariates["val"] test_dataloader = dataloaders_with_covariates["test"] - early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=1, verbose=False, mode="min") + early_stop_callback = EarlyStopping( + monitor="val_loss", min_delta=1e-4, patience=1, verbose=False, mode="min" + ) logger = TensorBoardLogger(tmp_path) if trainer_kwargs is None: @@ -100,7 +104,11 @@ def _integration( shutil.rmtree(tmp_path, ignore_errors=True) net.predict( - val_dataloader, fast_dev_run=True, return_index=True, return_decoder_lengths=True, trainer_kwargs=trainer_kwargs + val_dataloader, + fast_dev_run=True, + return_index=True, + return_decoder_lengths=True, + trainer_kwargs=trainer_kwargs, ) @@ -112,23 +120,36 @@ def _integration( dict( loss=LogNormalDistributionLoss(), clip_target=True, - data_loader_kwargs=dict(target_normalizer=GroupNormalizer(groups=["agency", "sku"], transformation="log")), + data_loader_kwargs=dict( + target_normalizer=GroupNormalizer( + groups=["agency", "sku"], transformation="log" + ) + ), ), dict( loss=NegativeBinomialDistributionLoss(), clip_target=False, - data_loader_kwargs=dict(target_normalizer=GroupNormalizer(groups=["agency", "sku"], center=False)), + data_loader_kwargs=dict( + target_normalizer=GroupNormalizer( + groups=["agency", "sku"], center=False + ) + ), ), dict( loss=BetaDistributionLoss(), clip_target=True, data_loader_kwargs=dict( - target_normalizer=GroupNormalizer(groups=["agency", "sku"], transformation="logit") + target_normalizer=GroupNormalizer( + groups=["agency", "sku"], transformation="logit" + ) ), ), dict( data_loader_kwargs=dict( - lags={"volume": [2, 5]}, target="volume", time_varying_unknown_reals=["volume"], min_encoder_length=2 + lags={"volume": [2, 5]}, + target="volume", + time_varying_unknown_reals=["volume"], + min_encoder_length=2, ) ), dict( @@ -141,19 +162,28 @@ def _integration( dict( loss=ImplicitQuantileNetworkDistributionLoss(hidden_size=8), ), - dict(loss=MultivariateNormalDistributionLoss(), trainer_kwargs=dict(accelerator="cpu")), + dict( + loss=MultivariateNormalDistributionLoss(), + trainer_kwargs=dict(accelerator="cpu"), + ), dict( loss=MultivariateNormalDistributionLoss(), data_loader_kwargs=dict( - target_normalizer=GroupNormalizer(groups=["agency", "sku"], transformation="log1p") + target_normalizer=GroupNormalizer( + groups=["agency", "sku"], transformation="log1p" + ) ), trainer_kwargs=dict(accelerator="cpu"), ), ], ) def test_integration(data_with_covariates, tmp_path, kwargs): - if "loss" in kwargs and isinstance(kwargs["loss"], NegativeBinomialDistributionLoss): - data_with_covariates = data_with_covariates.assign(volume=lambda x: x.volume.round()) + if "loss" in kwargs and isinstance( + kwargs["loss"], NegativeBinomialDistributionLoss + ): + data_with_covariates = data_with_covariates.assign( + volume=lambda x: x.volume.round() + ) _integration(data_with_covariates, tmp_path, **kwargs) @@ -171,20 +201,37 @@ def model(dataloaders_with_covariates): def test_predict_average(model, dataloaders_with_covariates): - prediction = model.predict(dataloaders_with_covariates["val"], fast_dev_run=True, mode="prediction", n_samples=100) + prediction = model.predict( + dataloaders_with_covariates["val"], + fast_dev_run=True, + mode="prediction", + n_samples=100, + ) assert prediction.ndim == 2, "expected averaging of samples" def test_predict_samples(model, dataloaders_with_covariates): - prediction = model.predict(dataloaders_with_covariates["val"], fast_dev_run=True, mode="samples", n_samples=100) + prediction = model.predict( + dataloaders_with_covariates["val"], + fast_dev_run=True, + mode="samples", + n_samples=100, + ) assert prediction.size()[-1] == 100, "expected raw samples" -@pytest.mark.parametrize("loss", [NormalDistributionLoss(), MultivariateNormalDistributionLoss()]) +@pytest.mark.parametrize( + "loss", [NormalDistributionLoss(), MultivariateNormalDistributionLoss()] +) def test_pickle(dataloaders_with_covariates, loss): dataset = dataloaders_with_covariates["train"].dataset model = DeepAR.from_dataset( - dataset, hidden_size=5, learning_rate=0.15, log_gradient_flow=True, log_interval=1000, loss=loss + dataset, + hidden_size=5, + learning_rate=0.15, + log_gradient_flow=True, + log_interval=1000, + loss=loss, ) pkl = pickle.dumps(model) pickle.loads(pkl) diff --git a/tests/test_models/test_mlp.py b/tests/test_models/test_mlp.py index f1a58c935..71f62632d 100644 --- a/tests/test_models/test_mlp.py +++ b/tests/test_models/test_mlp.py @@ -12,7 +12,9 @@ from pytorch_forecasting.models import DecoderMLP -def _integration(data_with_covariates, tmp_path, data_loader_kwargs={}, train_only=False, **kwargs): +def _integration( + data_with_covariates, tmp_path, data_loader_kwargs={}, train_only=False, **kwargs +): data_loader_default_kwargs = dict( target="target", time_varying_known_reals=["price_actual"], @@ -21,12 +23,19 @@ def _integration(data_with_covariates, tmp_path, data_loader_kwargs={}, train_on add_relative_time_idx=True, ) data_loader_default_kwargs.update(data_loader_kwargs) - dataloaders_with_covariates = make_dataloaders(data_with_covariates, **data_loader_default_kwargs) + dataloaders_with_covariates = make_dataloaders( + data_with_covariates, **data_loader_default_kwargs + ) train_dataloader = dataloaders_with_covariates["train"] val_dataloader = dataloaders_with_covariates["val"] test_dataloader = dataloaders_with_covariates["test"] early_stop_callback = EarlyStopping( - monitor="val_loss", min_delta=1e-4, patience=1, verbose=False, mode="min", strict=False + monitor="val_loss", + min_delta=1e-4, + patience=1, + verbose=False, + mode="min", + strict=False, ) logger = TensorBoardLogger(tmp_path) @@ -61,17 +70,29 @@ def _integration(data_with_covariates, tmp_path, data_loader_kwargs={}, train_on val_dataloaders=val_dataloader, ) # check loading - net = DecoderMLP.load_from_checkpoint(trainer.checkpoint_callback.best_model_path) + net = DecoderMLP.load_from_checkpoint( + trainer.checkpoint_callback.best_model_path + ) # check prediction - net.predict(val_dataloader, fast_dev_run=True, return_index=True, return_decoder_lengths=True) + net.predict( + val_dataloader, + fast_dev_run=True, + return_index=True, + return_decoder_lengths=True, + ) # check test dataloader test_outputs = trainer.test(net, dataloaders=test_dataloader) assert len(test_outputs) > 0 finally: shutil.rmtree(tmp_path, ignore_errors=True) - net.predict(val_dataloader, fast_dev_run=True, return_index=True, return_decoder_lengths=True) + net.predict( + val_dataloader, + fast_dev_run=True, + return_index=True, + return_decoder_lengths=True, + ) @pytest.mark.parametrize( @@ -100,7 +121,9 @@ def _integration(data_with_covariates, tmp_path, data_loader_kwargs={}, train_on ], ) def test_integration(data_with_covariates, tmp_path, kwargs): - _integration(data_with_covariates.assign(target=lambda x: x.volume), tmp_path, **kwargs) + _integration( + data_with_covariates.assign(target=lambda x: x.volume), tmp_path, **kwargs + ) @pytest.fixture diff --git a/tests/test_models/test_nbeats.py b/tests/test_models/test_nbeats.py index 6fb969f3c..604891394 100644 --- a/tests/test_models/test_nbeats.py +++ b/tests/test_models/test_nbeats.py @@ -15,7 +15,9 @@ def test_integration(dataloaders_fixed_window_without_covariates, tmp_path): val_dataloader = dataloaders_fixed_window_without_covariates["val"] test_dataloader = dataloaders_fixed_window_without_covariates["test"] - early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=1, verbose=False, mode="min") + early_stop_callback = EarlyStopping( + monitor="val_loss", min_delta=1e-4, patience=1, verbose=False, mode="min" + ) logger = TensorBoardLogger(tmp_path) trainer = pl.Trainer( @@ -51,11 +53,21 @@ def test_integration(dataloaders_fixed_window_without_covariates, tmp_path): net = NBeats.load_from_checkpoint(trainer.checkpoint_callback.best_model_path) # check prediction - net.predict(val_dataloader, fast_dev_run=True, return_index=True, return_decoder_lengths=True) + net.predict( + val_dataloader, + fast_dev_run=True, + return_index=True, + return_decoder_lengths=True, + ) finally: shutil.rmtree(tmp_path, ignore_errors=True) - net.predict(val_dataloader, fast_dev_run=True, return_index=True, return_decoder_lengths=True) + net.predict( + val_dataloader, + fast_dev_run=True, + return_index=True, + return_decoder_lengths=True, + ) @pytest.fixture(scope="session") @@ -83,6 +95,9 @@ def test_pickle(model): ) def test_interpretation(model, dataloaders_fixed_window_without_covariates): raw_predictions = model.predict( - dataloaders_fixed_window_without_covariates["val"], mode="raw", return_x=True, fast_dev_run=True + dataloaders_fixed_window_without_covariates["val"], + mode="raw", + return_x=True, + fast_dev_run=True, ) model.plot_interpretation(raw_predictions.x, raw_predictions.output, idx=0) diff --git a/tests/test_models/test_nhits.py b/tests/test_models/test_nhits.py index 42fffc982..959026f51 100644 --- a/tests/test_models/test_nhits.py +++ b/tests/test_models/test_nhits.py @@ -10,7 +10,9 @@ from pytorch_forecasting.data.timeseries import TimeSeriesDataSet from pytorch_forecasting.metrics import MQF2DistributionLoss, QuantileLoss -from pytorch_forecasting.metrics.distributions import ImplicitQuantileNetworkDistributionLoss +from pytorch_forecasting.metrics.distributions import ( + ImplicitQuantileNetworkDistributionLoss, +) from pytorch_forecasting.models import NHiTS from pytorch_forecasting.utils._dependencies import _get_installed_packages @@ -20,7 +22,9 @@ def _integration(dataloader, tmp_path, trainer_kwargs=None, **kwargs): val_dataloader = dataloader["val"] test_dataloader = dataloader["test"] - early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=1, verbose=False, mode="min") + early_stop_callback = EarlyStopping( + monitor="val_loss", min_delta=1e-4, patience=1, verbose=False, mode="min" + ) logger = TensorBoardLogger(tmp_path) if trainer_kwargs is None: @@ -123,7 +127,9 @@ def test_integration( kwargs["loss"] = ImplicitQuantileNetworkDistributionLoss() elif dataloader == "multivariate-quantiles": dataloader = dataloaders_with_covariates - kwargs["loss"] = MQF2DistributionLoss(prediction_length=dataloader["train"].dataset.max_prediction_length) + kwargs["loss"] = MQF2DistributionLoss( + prediction_length=dataloader["train"].dataset.max_prediction_length + ) kwargs["learning_rate"] = 1e-9 kwargs["trainer_kwargs"] = dict(accelerator="cpu") else: @@ -155,8 +161,12 @@ def test_pickle(model): reason="skip test if required package matplotlib not installed", ) def test_interpretation(model, dataloaders_with_covariates): - raw_predictions = model.predict(dataloaders_with_covariates["val"], mode="raw", return_x=True, fast_dev_run=True) - model.plot_prediction(raw_predictions.x, raw_predictions.output, idx=0, add_loss_to_title=True) + raw_predictions = model.predict( + dataloaders_with_covariates["val"], mode="raw", return_x=True, fast_dev_run=True + ) + model.plot_prediction( + raw_predictions.x, raw_predictions.output, idx=0, add_loss_to_title=True + ) model.plot_interpretation(raw_predictions.x, raw_predictions.output, idx=0) @@ -195,7 +205,9 @@ def test_prediction_length(max_prediction_length: int): forecaster, train_dataloaders=training_data_loader, ) - validation_dataset = TimeSeriesDataSet.from_dataset(training_dataset, data, stop_randomization=True, predict=True) + validation_dataset = TimeSeriesDataSet.from_dataset( + training_dataset, data, stop_randomization=True, predict=True + ) validation_data_loader = validation_dataset.to_dataloader(train=False) forecaster.predict( validation_data_loader, diff --git a/tests/test_models/test_nn/test_embeddings.py b/tests/test_models/test_nn/test_embeddings.py index a2318e6ce..844a46c4a 100644 --- a/tests/test_models/test_nn/test_embeddings.py +++ b/tests/test_models/test_nn/test_embeddings.py @@ -20,7 +20,9 @@ def test_MultiEmbedding(kwargs): x = torch.randint(0, 10, size=(4, 3)) embedding = MultiEmbedding(**kwargs) - assert embedding.input_size == x.size(1), "Input size should be equal to number of features" + assert embedding.input_size == x.size( + 1 + ), "Input size should be equal to number of features" out = embedding(x) if isinstance(out, dict): assert isinstance(kwargs["embedding_sizes"], dict) diff --git a/tests/test_models/test_nn/test_rnn.py b/tests/test_models/test_nn/test_rnn.py index dcc97d7aa..d951332bf 100644 --- a/tests/test_models/test_nn/test_rnn.py +++ b/tests/test_models/test_nn/test_rnn.py @@ -45,7 +45,9 @@ def test_zero_length_sequence(klass, rnn_kwargs): init_hidden_state = [init_hidden_state] for idx in range(len(hidden_state)): - assert hidden_state[idx].size() == init_hidden_state[idx].size(), "Hidden state sizes should be equal" + assert ( + hidden_state[idx].size() == init_hidden_state[idx].size() + ), "Hidden state sizes should be equal" assert (hidden_state[idx][:, lengths == 0] == 0).all() and ( hidden_state[idx][:, lengths > 0] != 0 ).all(), "Hidden state should be zero for zero-length sequences" diff --git a/tests/test_models/test_rnn_model.py b/tests/test_models/test_rnn_model.py index aec8eabc2..bae7fee20 100644 --- a/tests/test_models/test_rnn_model.py +++ b/tests/test_models/test_rnn_model.py @@ -12,7 +12,12 @@ def _integration( - data_with_covariates, tmp_path, cell_type="LSTM", data_loader_kwargs={}, clip_target: bool = False, **kwargs + data_with_covariates, + tmp_path, + cell_type="LSTM", + data_loader_kwargs={}, + clip_target: bool = False, + **kwargs, ): data_with_covariates = data_with_covariates.copy() if clip_target: @@ -27,12 +32,16 @@ def _integration( add_relative_time_idx=True, ) data_loader_default_kwargs.update(data_loader_kwargs) - dataloaders_with_covariates = make_dataloaders(data_with_covariates, **data_loader_default_kwargs) + dataloaders_with_covariates = make_dataloaders( + data_with_covariates, **data_loader_default_kwargs + ) train_dataloader = dataloaders_with_covariates["train"] val_dataloader = dataloaders_with_covariates["val"] test_dataloader = dataloaders_with_covariates["test"] - early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=1, verbose=False, mode="min") + early_stop_callback = EarlyStopping( + monitor="val_loss", min_delta=1e-4, patience=1, verbose=False, mode="min" + ) logger = TensorBoardLogger(tmp_path) trainer = pl.Trainer( @@ -66,14 +75,26 @@ def _integration( test_outputs = trainer.test(net, dataloaders=test_dataloader) assert len(test_outputs) > 0 # check loading - net = RecurrentNetwork.load_from_checkpoint(trainer.checkpoint_callback.best_model_path) + net = RecurrentNetwork.load_from_checkpoint( + trainer.checkpoint_callback.best_model_path + ) # check prediction - net.predict(val_dataloader, fast_dev_run=True, return_index=True, return_decoder_lengths=True) + net.predict( + val_dataloader, + fast_dev_run=True, + return_index=True, + return_decoder_lengths=True, + ) finally: shutil.rmtree(tmp_path, ignore_errors=True) - net.predict(val_dataloader, fast_dev_run=True, return_index=True, return_decoder_lengths=True) + net.predict( + val_dataloader, + fast_dev_run=True, + return_index=True, + return_decoder_lengths=True, + ) @pytest.mark.parametrize( @@ -82,11 +103,18 @@ def _integration( {}, {"cell_type": "GRU"}, dict( - data_loader_kwargs=dict(target_normalizer=GroupNormalizer(groups=["agency", "sku"], center=False)), + data_loader_kwargs=dict( + target_normalizer=GroupNormalizer( + groups=["agency", "sku"], center=False + ) + ), ), dict( data_loader_kwargs=dict( - lags={"volume": [2, 5]}, target="volume", time_varying_unknown_reals=["volume"], min_encoder_length=2 + lags={"volume": [2, 5]}, + target="volume", + time_varying_unknown_reals=["volume"], + min_encoder_length=2, ) ), dict( diff --git a/tests/test_models/test_temporal_fusion_transformer.py b/tests/test_models/test_temporal_fusion_transformer.py index 9fe784458..ae250d6d8 100644 --- a/tests/test_models/test_temporal_fusion_transformer.py +++ b/tests/test_models/test_temporal_fusion_transformer.py @@ -22,7 +22,9 @@ TweedieLoss, ) from pytorch_forecasting.models import TemporalFusionTransformer -from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters +from pytorch_forecasting.models.temporal_fusion_transformer.tuning import ( + optimize_hyperparameters, +) from pytorch_forecasting.utils._dependencies import _get_installed_packages if sys.version.startswith("3.6"): # python 3.6 does not have nullcontext @@ -39,7 +41,11 @@ def nullcontext(enter_result=None): def test_integration(multiple_dataloaders_with_covariates, tmp_path): - _integration(multiple_dataloaders_with_covariates, tmp_path, trainer_kwargs=dict(accelerator="cpu")) + _integration( + multiple_dataloaders_with_covariates, + tmp_path, + trainer_kwargs=dict(accelerator="cpu"), + ) def test_non_causal_attention(dataloaders_with_covariates, tmp_path): @@ -53,7 +59,9 @@ def test_non_causal_attention(dataloaders_with_covariates, tmp_path): def test_distribution_loss(data_with_covariates, tmp_path): - data_with_covariates = data_with_covariates.assign(volume=lambda x: x.volume.round()) + data_with_covariates = data_with_covariates.assign( + volume=lambda x: x.volume.round() + ) dataloaders_with_covariates = make_dataloaders( data_with_covariates, target="volume", @@ -75,7 +83,9 @@ def test_distribution_loss(data_with_covariates, tmp_path): reason="Test skipped if required package cpflows not available", ) def test_mqf2_loss(data_with_covariates, tmp_path): - data_with_covariates = data_with_covariates.assign(volume=lambda x: x.volume.round()) + data_with_covariates = data_with_covariates.assign( + volume=lambda x: x.volume.round() + ) dataloaders_with_covariates = make_dataloaders( data_with_covariates, target="volume", @@ -83,10 +93,14 @@ def test_mqf2_loss(data_with_covariates, tmp_path): time_varying_unknown_reals=["volume"], static_categoricals=["agency"], add_relative_time_idx=True, - target_normalizer=GroupNormalizer(groups=["agency", "sku"], center=False, transformation="log1p"), + target_normalizer=GroupNormalizer( + groups=["agency", "sku"], center=False, transformation="log1p" + ), ) - prediction_length = dataloaders_with_covariates["train"].dataset.min_prediction_length + prediction_length = dataloaders_with_covariates[ + "train" + ].dataset.min_prediction_length _integration( dataloaders_with_covariates, @@ -102,7 +116,9 @@ def _integration(dataloader, tmp_path, loss=None, trainer_kwargs=None, **kwargs) val_dataloader = dataloader["val"] test_dataloader = dataloader["test"] - early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=1, verbose=False, mode="min") + early_stop_callback = EarlyStopping( + monitor="val_loss", min_delta=1e-4, patience=1, verbose=False, mode="min" + ) # check training logger = TensorBoardLogger(tmp_path) @@ -138,7 +154,11 @@ def _integration(dataloader, tmp_path, loss=None, trainer_kwargs=None, **kwargs) elif isinstance(train_dataloader.dataset.target_normalizer, MultiNormalizer): loss = MultiLoss( [ - CrossEntropy() if isinstance(normalizer, NaNLabelEncoder) else QuantileLoss() + ( + CrossEntropy() + if isinstance(normalizer, NaNLabelEncoder) + else QuantileLoss() + ) for normalizer in train_dataloader.dataset.target_normalizer.normalizers ] ) @@ -171,7 +191,9 @@ def _integration(dataloader, tmp_path, loss=None, trainer_kwargs=None, **kwargs) assert len(test_outputs) > 0 # check loading - net = TemporalFusionTransformer.load_from_checkpoint(trainer.checkpoint_callback.best_model_path) + net = TemporalFusionTransformer.load_from_checkpoint( + trainer.checkpoint_callback.best_model_path + ) # check prediction predictions = net.predict( @@ -193,11 +215,15 @@ def check(x): for xi in x.values(): check(xi) else: - assert pred_len == x.shape[0], "first dimension should be prediction length" + assert ( + pred_len == x.shape[0] + ), "first dimension should be prediction length" check(predictions.output) if isinstance(predictions.output, torch.Tensor): - assert predictions.output.ndim == 2, "shape of predictions should be batch_size x timesteps" + assert ( + predictions.output.ndim == 2 + ), "shape of predictions should be batch_size x timesteps" else: assert all( p.ndim == 2 for p in predictions.output @@ -246,7 +272,9 @@ def test_tensorboard_graph_log(dataloaders_with_covariates, model, tmp_path): def test_init_shared_network(dataloaders_with_covariates): dataset = dataloaders_with_covariates["train"].dataset - net = TemporalFusionTransformer.from_dataset(dataset, share_single_variable_networks=True) + net = TemporalFusionTransformer.from_dataset( + dataset, share_single_variable_networks=True + ) net.predict(dataset, fast_dev_run=True) @@ -287,15 +315,26 @@ def test_pickle(model): pickle.loads(pkl) -@pytest.mark.parametrize("kwargs", [dict(mode="dataframe"), dict(mode="series"), dict(mode="raw")]) -def test_predict_dependency(model, dataloaders_with_covariates, data_with_covariates, kwargs): +@pytest.mark.parametrize( + "kwargs", [dict(mode="dataframe"), dict(mode="series"), dict(mode="raw")] +) +def test_predict_dependency( + model, dataloaders_with_covariates, data_with_covariates, kwargs +): train_dataset = dataloaders_with_covariates["train"].dataset data_with_covariates = data_with_covariates.copy() dataset = TimeSeriesDataSet.from_dataset( - train_dataset, data_with_covariates[lambda x: x.agency == data_with_covariates.agency.iloc[0]], predict=True + train_dataset, + data_with_covariates[lambda x: x.agency == data_with_covariates.agency.iloc[0]], + predict=True, ) model.predict_dependency(dataset, variable="discount", values=[0.1, 0.0], **kwargs) - model.predict_dependency(dataset, variable="agency", values=data_with_covariates.agency.unique()[:2], **kwargs) + model.predict_dependency( + dataset, + variable="agency", + values=data_with_covariates.agency.unique()[:2], + **kwargs, + ) @pytest.mark.skipif( @@ -304,7 +343,9 @@ def test_predict_dependency(model, dataloaders_with_covariates, data_with_covari ) def test_actual_vs_predicted_plot(model, dataloaders_with_covariates): prediction = model.predict(dataloaders_with_covariates["val"], return_x=True) - averages = model.calculate_prediction_actual_by_variable(prediction.x, prediction.output) + averages = model.calculate_prediction_actual_by_variable( + prediction.x, prediction.output + ) model.plot_prediction_actual_by_variable(averages) @@ -359,7 +400,9 @@ def test_prediction_with_dataloder_raw(data_with_covariates, tmp_path): ) logger = TensorBoardLogger(tmp_path) trainer = pl.Trainer(max_epochs=1, gradient_clip_val=1e-6, logger=logger) - trainer.fit(net, train_dataloaders=dataset.to_dataloader(batch_size=4, num_workers=0)) + trainer.fit( + net, train_dataloaders=dataset.to_dataloader(batch_size=4, num_workers=0) + ) # choose small batch size to provoke issue res = net.predict(dataset.to_dataloader(batch_size=2, num_workers=0), mode="raw") @@ -399,7 +442,9 @@ def test_prediction_with_dataframe(model, data_with_covariates): reason="Test skipped on Win due to bug #1632, or if missing required packages", ) @pytest.mark.parametrize("use_learning_rate_finder", [True, False]) -def test_hyperparameter_optimization_integration(dataloaders_with_covariates, tmp_path, use_learning_rate_finder): +def test_hyperparameter_optimization_integration( + dataloaders_with_covariates, tmp_path, use_learning_rate_finder +): train_dataloader = dataloaders_with_covariates["train"] val_dataloader = dataloaders_with_covariates["val"] try: diff --git a/tests/test_utils/test_show_versions.py b/tests/test_utils/test_show_versions.py index 7fa3626f7..8ace16359 100644 --- a/tests/test_utils/test_show_versions.py +++ b/tests/test_utils/test_show_versions.py @@ -3,7 +3,11 @@ import pathlib import uuid -from pytorch_forecasting.utils._maint._show_versions import DEFAULT_DEPS_TO_SHOW, _get_deps_info, show_versions +from pytorch_forecasting.utils._maint._show_versions import ( + DEFAULT_DEPS_TO_SHOW, + _get_deps_info, + show_versions, +) def test_show_versions_runs():