From 9903da15d7db9aa4622bdd91138857cc95f1d73c Mon Sep 17 00:00:00 2001 From: Grigory Zlotin <122524141+yellowssnake@users.noreply.github.com> Date: Thu, 21 Mar 2024 17:52:53 +0300 Subject: [PATCH] BinaryOperatorTransform basic logic (#260) * BinaryOperatorTransform basic logic * Change CHANGELOG * add all operations to test_binary_operator * fix codestyle * change test, correct logic in binary transform in inplace case * change tests * fix import * change inverse_operation logic * add new tests, add description * minor changes * change docstrings, tests, error descriptions * . * new changes * . * correct rendering * add doctest * add doctest * fix doctest * fix changelog * . --- CHANGELOG.md | 2 +- docs/source/api_reference/transforms.rst | 4 +- etna/transforms/__init__.py | 2 + etna/transforms/math/__init__.py | 2 + etna/transforms/math/binary_operator.py | 240 +++++++++++++++ .../test_inference/test_inverse_transform.py | 81 ++++++ .../test_inference/test_transform.py | 81 ++++++ .../test_binary_operator_transform.py | 275 ++++++++++++++++++ 8 files changed, 685 insertions(+), 2 deletions(-) create mode 100644 etna/transforms/math/binary_operator.py create mode 100644 tests/test_transforms/test_math/test_binary_operator_transform.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 17c0df998..043f7a7e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased ### Added -- +- Add `BinaryOperationTransform` to transforms ([#260](https://github.com/etna-team/etna/pull/260)) - - - diff --git a/docs/source/api_reference/transforms.rst b/docs/source/api_reference/transforms.rst index 4dc377fe2..75a487c7d 100644 --- a/docs/source/api_reference/transforms.rst +++ b/docs/source/api_reference/transforms.rst @@ -135,7 +135,7 @@ Scaling transforms: MinMaxScalerTransform MaxAbsScalerTransform -Functional transforms: +Functional transforms and their utilities: .. autosummary:: :toctree: api/ @@ -143,6 +143,8 @@ Functional transforms: LambdaTransform AddConstTransform + BinaryOperationTransform + BinaryOperator LogTransform YeoJohnsonTransform BoxCoxTransform diff --git a/etna/transforms/__init__.py b/etna/transforms/__init__.py index 39b53d72b..21f063ce8 100644 --- a/etna/transforms/__init__.py +++ b/etna/transforms/__init__.py @@ -27,6 +27,8 @@ from etna.transforms.feature_selection import MRMRFeatureSelectionTransform from etna.transforms.feature_selection import TreeFeatureSelectionTransform from etna.transforms.math import AddConstTransform +from etna.transforms.math import BinaryOperationTransform +from etna.transforms.math import BinaryOperator from etna.transforms.math import BoxCoxTransform from etna.transforms.math import DifferencingTransform from etna.transforms.math import ExogShiftTransform diff --git a/etna/transforms/math/__init__.py b/etna/transforms/math/__init__.py index 04ef41b9c..de0c1aa12 100644 --- a/etna/transforms/math/__init__.py +++ b/etna/transforms/math/__init__.py @@ -1,5 +1,7 @@ from etna.transforms.math.add_constant import AddConstTransform from etna.transforms.math.apply_lambda import LambdaTransform +from etna.transforms.math.binary_operator import BinaryOperationTransform +from etna.transforms.math.binary_operator import BinaryOperator from etna.transforms.math.differencing import DifferencingTransform from etna.transforms.math.lags import ExogShiftTransform from etna.transforms.math.lags import LagTransform diff --git a/etna/transforms/math/binary_operator.py b/etna/transforms/math/binary_operator.py new file mode 100644 index 000000000..7a1277b8b --- /dev/null +++ b/etna/transforms/math/binary_operator.py @@ -0,0 +1,240 @@ +from enum import Enum +from typing import List +from typing import Optional + +import pandas as pd + +from etna.datasets import TSDataset +from etna.transforms.base import ReversibleTransform + + +class BinaryOperator(str, Enum): + """Enum for mathematical operators from pandas.""" + + #: Add operation, value: "+" + add = "+" + #: Subtraction operation, value: "-" + sub = "-" + #: Multiplication operation, value: "*" + mul = "*" + #: Division operation, value: "/" + div = "/" + #: Floordivision operation, value: "//" + floordiv = "//" + #: Module operation, value: "%" + mod = "%" + #: Pow operation, value: "**" + pow = "**" + #: Equal operation, value: "==" + eq = "==" + #: Not operation, value: "!=" + ne = "!=" + #: Less or equal operation, value: "<=" + le = "<=" + #: Less operation, value: "<" + lt = "<" + #: Greater or equal operation, value: ">=" + ge = ">=" + #: Greater operation, value: ">" + gt = ">" + + @classmethod + def _missing_(cls, value): + raise ValueError(f"Supported operands: {', '.join([repr(m.value) for m in cls])}.") + + def perform(self, df: pd.DataFrame, left_operand: str, right_operand: str, out_column: str) -> pd.DataFrame: + """Perform binary operation on passed dataframe. + + - If during the operation a division by zero of a positive number occurs, writes +inf to this cell of the column, if negative - -inf, if 0/0 - nan. + - In the case of raising a negative number to a non-integer power, writes nan to this cell of the column. + + Parameters + ---------- + df: + Source Dataframe + left_operand: + Name of the left column + right_operand: + Name of the right column + out_column: + Resulting column name, which contains the result of the operation operand(left, right) + + Returns + ------- + : + Column which contains result of operation + """ + pandas_operator = getattr(pd.DataFrame, self.name) + df_left = df.loc[:, pd.IndexSlice[:, left_operand]].rename(columns={left_operand: out_column}, level="feature") + df_right = df.loc[:, pd.IndexSlice[:, right_operand]].rename( + columns={right_operand: out_column}, level="feature" + ) + return pandas_operator(df_left, df_right) + + +class BinaryOperationTransform(ReversibleTransform): + """Perform binary operation on the columns of dataset. + + - Inverse_transform functionality is only supported for operations +, -, * , /. + - If during the operation a division by zero of a positive number occurs, writes +inf to this cell of the column, if negative - -inf, if 0/0 - nan. + - In the case of raising a negative number to a non-integer power, writes nan to this cell of the column. + + Examples + -------- + >>> import numpy as np + >>> from etna.datasets import generate_ar_df + >>> df = generate_ar_df(start_time="2020-01-01", periods=30, freq="D", n_segments=1) + >>> df["feature"] = np.full(30, 10) + >>> df["target"] = np.full(30, 1) + >>> df_ts_format = TSDataset.to_dataset(df) + >>> ts = TSDataset(df_ts_format, "D") + >>> ts["2020-01-01":"2020-01-06", "segment_0", ["feature", "target"]] + segment segment_0 + feature feature target + timestamp + 2020-01-01 10 1 + 2020-01-02 10 1 + 2020-01-03 10 1 + 2020-01-04 10 1 + 2020-01-05 10 1 + 2020-01-06 10 1 + >>> transformer = BinaryOperationTransform(left_column="feature", right_column="target", operator="+", out_column="target") + >>> new_ts = transformer.fit_transform(ts=ts) + >>> new_ts["2020-01-01":"2020-01-06", "segment_0", ["feature", "target"]] + segment segment_0 + feature feature target + timestamp + 2020-01-01 10 11 + 2020-01-02 10 11 + 2020-01-03 10 11 + 2020-01-04 10 11 + 2020-01-05 10 11 + 2020-01-06 10 11 + """ + + def __init__(self, left_column: str, right_column: str, operator: str, out_column: Optional[str] = None): + """Create instance of BinaryOperationTransform. + + Parameters + ---------- + left_column: + Name of the left column + right_column: + Name of the right column + operator: + Operation to perform on the columns, see :py:class:`~etna.transforms.math.binary_operator.BinaryOperator` + out_column: + - Resulting column name, if don't set, name will be `left_column operator right_column`. + - If out_column is left_column or right_column, apply changes to the existing column out_column, else create new column. + """ + inverse_logic = {"+": "-", "-": "+", "*": "/", "/": "*"} + super().__init__(required_features=[left_column, right_column]) + self._inplace_flag = (left_column == out_column) | (right_column == out_column) + self.left_column = left_column + self.right_column = right_column + if self.left_column == self.right_column: + raise ValueError("You should use LambdaTransform, when you perform operation only with one column") + self.operator = BinaryOperator(operator) + self.out_column = out_column if out_column is not None else self.left_column + self.operator + self.right_column + + self._in_column_regressor: Optional[bool] = None + self.inverse_operator = BinaryOperator(inverse_logic[operator]) if operator in inverse_logic else None + + def fit(self, ts: TSDataset) -> "BinaryOperationTransform": + """Fit the transform.""" + self._in_column_regressor = self.left_column in ts.regressors and self.right_column in ts.regressors + super().fit(ts) + return self + + def _fit(self, df: pd.DataFrame) -> "BinaryOperationTransform": + """Fit preprocess method, does nothing in ``BinaryOperationTransform`` case. + + Parameters + ---------- + df: + dataframe with data. + + Returns + ------- + : + result + """ + return self + + def _transform(self, df: pd.DataFrame) -> pd.DataFrame: + """Perform operation on passed dataframe. + + Parameters + ---------- + df: + dataframe with data to transform. + + Returns + ------- + : + transformed dataframe + """ + result = self.operator.perform( + df=df, + left_operand=self.left_column, + right_operand=self.right_column, + out_column=self.out_column, + ) + if self._inplace_flag: + df.loc[:, pd.IndexSlice[:, self.out_column]] = result + else: + df = pd.concat((df, result), axis=1) + return df + + def _inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame: + """Perform reverse operation on passed dataframe. + If + + Parameters + ---------- + df: + dataframe with data to transform. + + Returns + ------- + : pd.Dataframe + transformed dataframe + + Raises + ------ + ValueError: + if out_column is not left_column or right_column + ValueError: + If initial operation is not '+', '-', '*' or '/' + """ + if not self._inplace_flag: + return df + + if self.inverse_operator is None: + raise ValueError("We only support inverse transform if the original operation is .+, .-, .*, ./") + + support_column = self.left_column if (self.left_column != self.out_column) else self.right_column + if self.operator in ["+", "*"]: + df.loc[:, pd.IndexSlice[:, self.out_column]] = self.inverse_operator.perform( + df=df, left_operand=self.out_column, right_operand=support_column, out_column=self.out_column + ) + else: + if self.right_column == self.out_column: + if self.operator == "-": + df.loc[:, pd.IndexSlice[:, self.out_column]] = -df.loc[:, pd.IndexSlice[:, self.out_column]] + else: + df.loc[:, pd.IndexSlice[:, self.out_column]] = 1 / df.loc[:, pd.IndexSlice[:, self.out_column]] + df.loc[:, pd.IndexSlice[:, self.out_column]] = self.inverse_operator.perform( + df=df, left_operand=self.out_column, right_operand=support_column, out_column=self.out_column + ) + + return df + + def get_regressors_info(self) -> List[str]: + """Return the list with regressors created by the transform.""" + if self._in_column_regressor is None: + raise ValueError("Transform is not fitted!") + return [self.out_column] if self._in_column_regressor and not self._inplace_flag else [] + + +all = ["BinaryOperationTransform"] diff --git a/tests/test_transforms/test_inference/test_inverse_transform.py b/tests/test_transforms/test_inference/test_inverse_transform.py index 57b7db2ec..0326a915b 100644 --- a/tests/test_transforms/test_inference/test_inverse_transform.py +++ b/tests/test_transforms/test_inference/test_inverse_transform.py @@ -9,6 +9,7 @@ from etna.analysis import StatisticsRelevanceTable from etna.models import ProphetModel from etna.transforms import AddConstTransform +from etna.transforms import BinaryOperationTransform from etna.transforms import BoxCoxTransform from etna.transforms import ChangePointsLevelTransform from etna.transforms import ChangePointsSegmentationTransform @@ -147,6 +148,18 @@ def _test_inverse_transform_train_subset_segments(self, ts, transform, segments) # math (AddConstTransform(in_column="target", value=1, inplace=False), "regular_ts"), (AddConstTransform(in_column="target", value=1, inplace=True), "regular_ts"), + ( + BinaryOperationTransform( + left_column="positive", right_column="target", operator="+", out_column="positive" + ), + "ts_with_exog", + ), + ( + BinaryOperationTransform( + left_column="positive", right_column="target", operator="+", out_column="new_col" + ), + "ts_with_exog", + ), (LagTransform(in_column="target", lags=[1, 2, 3]), "regular_ts"), ( LambdaTransform(in_column="target", transform_func=lambda x: x + 1, inplace=False), @@ -345,6 +358,18 @@ def _test_inverse_transform_future_subset_segments(self, ts, transform, segments (AddConstTransform(in_column="target", value=1, inplace=False), "regular_ts"), (AddConstTransform(in_column="target", value=1, inplace=True), "regular_ts"), (AddConstTransform(in_column="positive", value=1, inplace=True), "ts_with_exog"), + ( + BinaryOperationTransform( + left_column="positive", right_column="target", operator="+", out_column="positive" + ), + "ts_with_exog", + ), + ( + BinaryOperationTransform( + left_column="positive", right_column="target", operator="+", out_column="new_col" + ), + "ts_with_exog", + ), (LagTransform(in_column="target", lags=[1, 2, 3]), "regular_ts"), ( LambdaTransform(in_column="target", transform_func=lambda x: x + 1, inplace=False), @@ -578,6 +603,20 @@ def _test_inverse_transform_train_new_segments(self, ts, transform, train_segmen {}, ), (AddConstTransform(in_column="target", value=1, inplace=True), "regular_ts", {"change": {"target"}}), + ( + BinaryOperationTransform( + left_column="positive", right_column="target", operator="+", out_column="positive" + ), + "ts_with_exog", + {"change": {"positive"}}, + ), + ( + BinaryOperationTransform( + left_column="positive", right_column="target", operator="+", out_column="new_col" + ), + "ts_with_exog", + {}, + ), ( LagTransform(in_column="target", lags=[1, 2, 3], out_column="res"), "regular_ts", @@ -876,6 +915,20 @@ def _test_inverse_transform_future_new_segments(self, ts, transform, train_segme ), (AddConstTransform(in_column="target", value=1, inplace=True), "regular_ts", {}), (AddConstTransform(in_column="positive", value=1, inplace=True), "ts_with_exog", {"change": {"positive"}}), + ( + BinaryOperationTransform( + left_column="positive", right_column="weekday", operator="+", out_column="positive" + ), + "ts_with_exog", + {"change": {"positive"}}, + ), + ( + BinaryOperationTransform( + left_column="positive", right_column="weekday", operator="+", out_column="new_col" + ), + "ts_with_exog", + {}, + ), ( LagTransform(in_column="target", lags=[1, 2, 3], out_column="res"), "regular_ts", @@ -1337,6 +1390,20 @@ def _test_inverse_transform_future_with_target( {}, ), (AddConstTransform(in_column="target", value=1, inplace=True), "regular_ts", {"change": {"target"}}), + ( + BinaryOperationTransform( + left_column="positive", right_column="target", operator="+", out_column="positive" + ), + "ts_with_exog", + {"change": {"positive"}}, + ), + ( + BinaryOperationTransform( + left_column="positive", right_column="target", operator="+", out_column="new_col" + ), + "ts_with_exog", + {}, + ), ( LagTransform(in_column="target", lags=[1, 2, 3], out_column="res"), "regular_ts", @@ -1718,6 +1785,20 @@ def _test_inverse_transform_future_without_target( ), (AddConstTransform(in_column="target", value=1, inplace=True), "regular_ts", {}), (AddConstTransform(in_column="positive", value=1, inplace=True), "ts_with_exog", {"change": {"positive"}}), + ( + BinaryOperationTransform( + left_column="positive", right_column="weekday", operator="+", out_column="positive" + ), + "ts_with_exog", + {"change": {"positive"}}, + ), + ( + BinaryOperationTransform( + left_column="positive", right_column="weekday", operator="+", out_column="new_col" + ), + "ts_with_exog", + {}, + ), ( LagTransform(in_column="target", lags=[1, 2, 3], out_column="res"), "regular_ts", diff --git a/tests/test_transforms/test_inference/test_transform.py b/tests/test_transforms/test_inference/test_transform.py index 66e93b778..e7b8a6f8b 100644 --- a/tests/test_transforms/test_inference/test_transform.py +++ b/tests/test_transforms/test_inference/test_transform.py @@ -9,6 +9,7 @@ from etna.analysis import StatisticsRelevanceTable from etna.models import ProphetModel from etna.transforms import AddConstTransform +from etna.transforms import BinaryOperationTransform from etna.transforms import BoxCoxTransform from etna.transforms import ChangePointsLevelTransform from etna.transforms import ChangePointsSegmentationTransform @@ -139,6 +140,18 @@ def _test_transform_train_subset_segments(self, ts, transform, segments): # math (AddConstTransform(in_column="target", value=1, inplace=False), "regular_ts"), (AddConstTransform(in_column="target", value=1, inplace=True), "regular_ts"), + ( + BinaryOperationTransform( + left_column="weekday", right_column="positive", operator="+", out_column="positive" + ), + "ts_with_exog", + ), + ( + BinaryOperationTransform( + left_column="weekday", right_column="positive", operator="+", out_column="new_col" + ), + "ts_with_exog", + ), (LagTransform(in_column="target", lags=[1, 2, 3]), "regular_ts"), ( LambdaTransform(in_column="target", transform_func=lambda x: x + 1, inplace=False), @@ -327,6 +340,18 @@ def _test_transform_future_subset_segments(self, ts, transform, segments, horizo (AddConstTransform(in_column="target", value=1, inplace=False), "regular_ts"), (AddConstTransform(in_column="target", value=1, inplace=True), "regular_ts"), (AddConstTransform(in_column="positive", value=1, inplace=True), "ts_with_exog"), + ( + BinaryOperationTransform( + left_column="positive", right_column="target", operator="+", out_column="target" + ), + "ts_with_exog", + ), + ( + BinaryOperationTransform( + left_column="positive", right_column="target", operator="+", out_column="new_col" + ), + "ts_with_exog", + ), (LagTransform(in_column="target", lags=[1, 2, 3]), "regular_ts"), ( LambdaTransform(in_column="target", transform_func=lambda x: x + 1, inplace=False), @@ -522,6 +547,20 @@ def _test_transform_train_new_segments(self, ts, transform, train_segments, expe {"create": {"res"}}, ), (AddConstTransform(in_column="target", value=1, inplace=True), "regular_ts", {"change": {"target"}}), + ( + BinaryOperationTransform( + left_column="weekday", right_column="positive", operator="+", out_column="positive" + ), + "ts_with_exog", + {"change": {"positive"}}, + ), + ( + BinaryOperationTransform( + left_column="weekday", right_column="positive", operator="+", out_column="new_col" + ), + "ts_with_exog", + {"create": {"new_col"}}, + ), ( LagTransform(in_column="target", lags=[1, 2, 3], out_column="res"), "regular_ts", @@ -815,6 +854,20 @@ def _test_transform_future_new_segments(self, ts, transform, train_segments, exp ), (AddConstTransform(in_column="target", value=1, inplace=True), "regular_ts", {}), (AddConstTransform(in_column="positive", value=1, inplace=True), "ts_with_exog", {"change": {"positive"}}), + ( + BinaryOperationTransform( + left_column="positive", right_column="target", operator="+", out_column="positive" + ), + "ts_with_exog", + {"change": {"positive"}}, + ), + ( + BinaryOperationTransform( + left_column="positive", right_column="target", operator="+", out_column="new_col" + ), + "ts_with_exog", + {"create": {"new_col"}}, + ), ( LagTransform(in_column="target", lags=[1, 2, 3], out_column="res"), "regular_ts", @@ -1190,6 +1243,20 @@ def _test_transform_future_with_target(self, ts, transform, expected_changes, ga {"create": {"res"}}, ), (AddConstTransform(in_column="target", value=1, inplace=True), "regular_ts", {"change": {"target"}}), + ( + BinaryOperationTransform( + left_column="positive", right_column="target", operator="+", out_column="target" + ), + "ts_with_exog", + {"change": {"target"}}, + ), + ( + BinaryOperationTransform( + left_column="positive", right_column="target", operator="+", out_column="new_col" + ), + "ts_with_exog", + {"create": {"new_col"}}, + ), ( LagTransform(in_column="target", lags=[1, 2, 3], out_column="res"), "regular_ts", @@ -1535,6 +1602,20 @@ def _test_transform_future_without_target(self, ts, transform, expected_changes, ), (AddConstTransform(in_column="target", value=1, inplace=True), "regular_ts", {}), (AddConstTransform(in_column="positive", value=1, inplace=True), "ts_with_exog", {"change": {"positive"}}), + ( + BinaryOperationTransform( + left_column="positive", right_column="target", operator="+", out_column="positive" + ), + "ts_with_exog", + {"change": {"positive"}}, + ), + ( + BinaryOperationTransform( + left_column="positive", right_column="target", operator="+", out_column="new_col" + ), + "ts_with_exog", + {"create": {"new_col"}}, + ), ( LagTransform(in_column="target", lags=[1, 2, 3], out_column="res"), "regular_ts", diff --git a/tests/test_transforms/test_math/test_binary_operator_transform.py b/tests/test_transforms/test_math/test_binary_operator_transform.py new file mode 100644 index 000000000..f0836f6f9 --- /dev/null +++ b/tests/test_transforms/test_math/test_binary_operator_transform.py @@ -0,0 +1,275 @@ +import operator +from copy import deepcopy + +import numpy as np +import numpy.testing +import pytest + +from etna.datasets import TSDataset +from etna.datasets import generate_ar_df +from etna.transforms.math import binary_operator + +ops = { + "+": operator.add, + "-": operator.sub, + "*": operator.mul, + "/": operator.truediv, + "//": operator.floordiv, + "%": operator.mod, + "**": operator.pow, + "==": operator.eq, + ">=": operator.ge, + "<=": operator.le, + ">": operator.gt, + "<": operator.lt, +} + + +@pytest.fixture +def ts_one_segment(random_seed) -> TSDataset: + """Generate dataset with non-positive target.""" + df = generate_ar_df(start_time="2020-01-01", periods=100, freq="D", n_segments=1) + df["feature"] = np.random.uniform(10, 0, size=100) + df["target"] = np.random.uniform(10, 0, size=100) + df = TSDataset.to_dataset(df) + ts = TSDataset(df, freq="D") + return ts + + +@pytest.fixture +def ts_two_segments(random_seed) -> TSDataset: + """Generate dataset with non-positive target.""" + df = generate_ar_df(start_time="2020-01-01", periods=100, freq="D", n_segments=2) + df["feature"] = np.random.uniform(10, 0, size=200) + df["target"] = np.random.uniform(10, 0, size=200) + df = TSDataset.to_dataset(df) + ts = TSDataset(df, freq="D") + return ts + + +@pytest.mark.parametrize( + "operand, left_column, right_column, out_column", + [ + ("+", "feature", "target", "target"), + ("-", "feature", "target", "target"), + ("*", "feature", "target", "target"), + ("/", "feature", "target", "target"), + ("//", "feature", "target", "target"), + ("%", "feature", "target", "target"), + ("**", "feature", "target", "target"), + ("==", "feature", "target", "target"), + (">=", "feature", "target", "target"), + ("<=", "feature", "target", "target"), + (">", "feature", "target", "target"), + ("<", "feature", "target", "target"), + ("+", "feature", "target", "new_col"), + ("-", "feature", "target", "new_col"), + ("*", "feature", "target", "new_col"), + ("/", "feature", "target", "new_col"), + ("//", "feature", "target", "new_col"), + ("%", "feature", "target", "new_col"), + ("**", "feature", "target", "new_col"), + ("==", "feature", "target", "new_col"), + (">=", "feature", "target", "new_col"), + ("<=", "feature", "target", "new_col"), + (">", "feature", "target", "new_col"), + ("<", "feature", "target", "new_col"), + ("+", "feature", "target", None), + ("-", "feature", "target", None), + ("*", "feature", "target", None), + ("/", "feature", "target", None), + ("//", "feature", "target", None), + ("%", "feature", "target", None), + ("**", "feature", "target", None), + ("==", "feature", "target", None), + (">=", "feature", "target", None), + ("<=", "feature", "target", None), + (">", "feature", "target", None), + ("<", "feature", "target", None), + ], +) +def test_simple_one_segment(ts_one_segment: TSDataset, operand, left_column, right_column, out_column): + left_vals = deepcopy(ts_one_segment.df["segment_0"][left_column].values) + right_vals = deepcopy(ts_one_segment.df["segment_0"][right_column].values) + checker_vals = deepcopy(ops[operand](left_vals, right_vals)) + transformer = binary_operator.BinaryOperationTransform( + left_column=left_column, right_column=right_column, operator=operand, out_column=out_column + ) + new_ts = transformer.fit_transform(ts=ts_one_segment) + new_ts_vals = new_ts.df["segment_0"][transformer.out_column].to_numpy() + numpy.testing.assert_array_almost_equal(new_ts_vals, checker_vals) + if out_column is None: + assert transformer.out_column == left_column + operand + right_column + + +@pytest.mark.parametrize( + "operand, left_column, right_column, out_column", + [ + ("+", "feature", "target", "target"), + ("-", "feature", "target", "target"), + ("*", "feature", "target", "target"), + ("/", "feature", "target", "target"), + ("//", "feature", "target", "target"), + ("%", "feature", "target", "target"), + ("**", "feature", "target", "target"), + ("==", "feature", "target", "target"), + (">=", "feature", "target", "target"), + ("<=", "feature", "target", "target"), + (">", "feature", "target", "target"), + ("<", "feature", "target", "target"), + ("+", "feature", "target", "new_col"), + ("-", "feature", "target", "new_col"), + ("*", "feature", "target", "new_col"), + ("/", "feature", "target", "new_col"), + ("//", "feature", "target", "new_col"), + ("%", "feature", "target", "new_col"), + ("**", "feature", "target", "new_col"), + ("==", "feature", "target", "new_col"), + (">=", "feature", "target", "new_col"), + ("<=", "feature", "target", "new_col"), + (">", "feature", "target", "new_col"), + ("<", "feature", "target", "new_col"), + ("+", "feature", "target", None), + ("-", "feature", "target", None), + ("*", "feature", "target", None), + ("/", "feature", "target", None), + ("//", "feature", "target", None), + ("%", "feature", "target", None), + ("**", "feature", "target", None), + ("==", "feature", "target", None), + (">=", "feature", "target", None), + ("<=", "feature", "target", None), + (">", "feature", "target", None), + ("<", "feature", "target", None), + ], +) +def test_simple_two_segments(ts_two_segments: TSDataset, operand, left_column, right_column, out_column): + left_vals1 = deepcopy(ts_two_segments.df["segment_0"][left_column].values) + right_vals1 = deepcopy(ts_two_segments.df["segment_0"][right_column].values) + left_vals2 = deepcopy(ts_two_segments.df["segment_1"][left_column].values) + right_vals2 = deepcopy(ts_two_segments.df["segment_1"][right_column].values) + checker_vals1 = ops[operand](left_vals1, right_vals1) + checker_vals2 = ops[operand](left_vals2, right_vals2) + transformer = binary_operator.BinaryOperationTransform( + left_column=left_column, right_column=right_column, operator=operand, out_column=out_column + ) + new_ts = transformer.fit_transform(ts=ts_two_segments) + new_ts_vals1 = new_ts.df["segment_0"][transformer.out_column].to_numpy() + new_ts_vals2 = new_ts.df["segment_1"][transformer.out_column].to_numpy() + numpy.testing.assert_array_almost_equal(new_ts_vals1, checker_vals1) + numpy.testing.assert_array_almost_equal(new_ts_vals2, checker_vals2) + if out_column is None: + assert transformer.out_column == left_column + operand + right_column + + +@pytest.mark.parametrize( + "operand, left_column, right_column, out_column", + [ + ("+", "feature", "target", "target"), + ("-", "feature", "target", "target"), + ("*", "feature", "target", "target"), + ("/", "feature", "target", "target"), + ("+", "target", "feature", "target"), + ("-", "target", "feature", "target"), + ("*", "target", "feature", "target"), + ("/", "target", "feature", "target"), + ("+", "feature", "target", "feature"), + ("-", "feature", "target", "feature"), + ("*", "feature", "target", "feature"), + ("/", "feature", "target", "feature"), + ("+", "target", "feature", "feature"), + ("-", "target", "feature", "feature"), + ("*", "target", "feature", "feature"), + ("/", "target", "feature", "feature"), + ], +) +def test_inverse_one_segment(ts_one_segment, operand, left_column, right_column, out_column): + target_vals = deepcopy(ts_one_segment.df["segment_0"][out_column].values) + transformer = binary_operator.BinaryOperationTransform( + left_column=left_column, right_column=right_column, operator=operand, out_column=out_column + ) + new_ts = transformer.fit_transform(ts=ts_one_segment) + new_ts = transformer.inverse_transform(ts=new_ts) + new_ts_vals = new_ts.df["segment_0"][out_column].to_numpy() + numpy.testing.assert_array_almost_equal(new_ts_vals, target_vals) + + +@pytest.mark.parametrize( + "operand, left_column, right_column, out_column", + [ + ("+", "feature", "target", "target"), + ("-", "feature", "target", "target"), + ("*", "feature", "target", "target"), + ("/", "feature", "target", "target"), + ("+", "target", "feature", "target"), + ("-", "target", "feature", "target"), + ("*", "target", "feature", "target"), + ("/", "target", "feature", "target"), + ("+", "feature", "target", "feature"), + ("-", "feature", "target", "feature"), + ("*", "feature", "target", "feature"), + ("/", "feature", "target", "feature"), + ("+", "target", "feature", "feature"), + ("-", "target", "feature", "feature"), + ("*", "target", "feature", "feature"), + ("/", "target", "feature", "feature"), + ], +) +def test_inverse_two_segments(ts_two_segments, operand, left_column, right_column, out_column): + target_vals1 = deepcopy(ts_two_segments.df["segment_0"][out_column].values) + target_vals2 = deepcopy(ts_two_segments.df["segment_1"][out_column].values) + transformer = binary_operator.BinaryOperationTransform( + left_column=left_column, right_column=right_column, operator=operand, out_column=out_column + ) + new_ts = transformer.fit_transform(ts=ts_two_segments) + new_ts = transformer.inverse_transform(ts=new_ts) + new_ts_vals1 = new_ts.df["segment_0"][out_column].to_numpy() + new_ts_vals2 = new_ts.df["segment_1"][out_column].to_numpy() + numpy.testing.assert_array_almost_equal(new_ts_vals1, target_vals1) + numpy.testing.assert_array_almost_equal(new_ts_vals2, target_vals2) + + +@pytest.mark.parametrize( + "operand", + [ + "//", + "%", + "**", + "==", + ">=", + "<=", + ">", + "<", + ], +) +def test_inverse_failed_unsupported_operator(ts_one_segment, operand): + transformer = binary_operator.BinaryOperationTransform( + left_column="feature", right_column="target", operator=operand, out_column="target" + ) + with pytest.raises( + ValueError, + match="We only support inverse transform if the original operation is .+, .-, .*, ./", + ): + _ = transformer.inverse_transform(ts=ts_one_segment) + + +@pytest.mark.parametrize( + "operand, left_column, right_column, out_column", + [ + ("+", "feature", "target", "new_col"), + ("-", "feature", "target", "new_col"), + ("*", "feature", "target", "new_col"), + ("/", "feature", "target", "new_col"), + ], +) +def test_inverse_failed_not_inplace(ts_one_segment, operand, left_column, right_column, out_column): + left_vals = deepcopy(ts_one_segment.df["segment_0"][left_column].values) + right_vals = deepcopy(ts_one_segment.df["segment_0"][right_column].values) + checker_vals = deepcopy(ops[operand](left_vals, right_vals)) + transformer = binary_operator.BinaryOperationTransform( + left_column=left_column, right_column=right_column, operator=operand, out_column=out_column + ) + new_ts = transformer.fit_transform(ts=ts_one_segment) + new_ts = transformer.inverse_transform(ts=new_ts) + new_ts_vals = new_ts.df["segment_0"][out_column].to_numpy() + numpy.testing.assert_array_almost_equal(new_ts_vals, checker_vals)