Skip to content

Commit

Permalink
remove non trailing null rows in fit_transform and check in test_impu…
Browse files Browse the repository at this point in the history
…ter of test_linear_quantile

Signed-off-by: lschilders <[email protected]>
  • Loading branch information
lschilders committed Oct 2, 2024
1 parent ee53800 commit 52017ee
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
6 changes: 6 additions & 0 deletions openstef/feature_engineering/missing_values_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ def __init__(
).set_output(transform="pandas")
self.imputer_._validate_params()

def remove_trailing_null_rows(self, df: pd.DataFrame) -> pd.DataFrame:
"""Remove rows with trailing null values in a DataFrame."""
self.non_trailing_null_rows = ~df.bfill().isnull().any(axis="columns")
return df.loc[self.non_trailing_null_rows]

def fit(self, x, y=None):
"""Fit the imputer on the input data."""
_ = check_array(x, force_all_finite="allow-nan")
Expand Down Expand Up @@ -106,6 +111,7 @@ def fit_transform(self, x, y=None) -> tuple[pd.DataFrame, Optional[pd.Series]]:
y = y.loc[self.non_trailing_null_rows]

x = self.transform(x)
x = x.loc[self.non_trailing_null_rows]
return x, y

@classmethod
Expand Down
3 changes: 2 additions & 1 deletion test/unit/model/regressors/test_linear_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ def test_imputer(self):
self.assertTrue((model2.predict(X_) == model2.predict(X)).all())

# check if last row is removed because of trailing null values
self.assertEqual(X_.shape[0], n_sample - 1)
X_transformed, _ = model2.imputer_.fit_transform(X)
self.assertEqual(X_transformed.shape[0], n_sample - 1)

def test_value_error_raised(self):
# Check if Value Error is raised when 0.5 is not in the requested quantiles list
Expand Down

0 comments on commit 52017ee

Please sign in to comment.