Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added fix and unit test for detecting flatliners on prediction data w… #482

Merged
merged 5 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ jobs:
# Test
- name: Unit test with pytest
run: |
pip install pytest pytest-cov
pip install -r test-requirements.txt
pytest --cov-report=xml --cov=openstef/ test/ --junitxml=pytest-report.xml
# Fix relative paths in coverage file
# Known bug: https://community.sonarsource.com/t/sonar-on-github-actions-with-python-coverage-source-issue/36057
Expand Down
4 changes: 3 additions & 1 deletion openstef/validation/validation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <[email protected]> # noqa E501>
#
# SPDX-License-Identifier: MPL-2.0
from datetime import timedelta
from datetime import datetime, timedelta
from typing import Union

import math
Expand Down Expand Up @@ -222,6 +222,8 @@ def detect_ongoing_zero_flatliner(
bool: Indicating wether or not there is a zero flatliner ongoing for the given load.

"""
# remove all timestamps in the future
load = load[load.index.tz_localize(None) <= datetime.utcnow()]
latest_measurement_time = load.index.max()
latest_measurements = load[
latest_measurement_time - timedelta(minutes=duration_threshold_minutes) :
Expand Down
1 change: 1 addition & 0 deletions test-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ autoflake==1.7.5
bandit==1.7.4
black==23.9.1
docformatter==1.5.0
freezegun~=1.2.2
isort==5.10.1
pydocstyle==6.1.1
pylint==2.15.4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,31 @@
# SPDX-License-Identifier: MPL-2.0

from datetime import datetime, timedelta
from freezegun import freeze_time
from test.unit.utils.base import BaseTestCase
import numpy as np
import pandas as pd

from openstef.validation.validation import detect_ongoing_zero_flatliner

now = datetime.utcnow()
three_hour_range = pd.date_range(
start=now - timedelta(minutes=180), end=now, freq="0.25H"
)


@freeze_time("2023-10-30 12:01:02")
class TestDetectOngoingZeroFlatliners(BaseTestCase):
def setUp(self) -> None:
super().setUp()
now = datetime.utcnow()
self.three_hour_range = pd.date_range(
start=now - timedelta(minutes=180), end=now, freq="0.25H"
)
self.four_hour_range_predict_setting = pd.date_range(
start=now - timedelta(minutes=180),
end=now + timedelta(minutes=60),
freq="0.25H",
)

def test_all_zero(self):
# Arrange
load = pd.Series(index=three_hour_range, data=[0 for i in range(13)])
load = pd.Series(index=self.three_hour_range, data=[0 for i in range(13)])
duration_threshold = 120

# Act
Expand All @@ -29,7 +38,7 @@ def test_all_zero(self):

def test_all_nonzero(self):
# Arrange
load = pd.Series(index=three_hour_range, data=[i for i in range(1, 14)])
load = pd.Series(index=self.three_hour_range, data=[i for i in range(1, 14)])
duration_threshold = 120

# Act
Expand All @@ -43,7 +52,9 @@ def test_only_last_nonzero(self):
# now the pattern has ended since the last measurement is not zero anymore.

# Arrange
load = pd.Series(index=three_hour_range, data=[0 for i in range(1, 13)] + [1])
load = pd.Series(
index=self.three_hour_range, data=[0 for i in range(1, 13)] + [1]
)
duration_threshold = 120

# Act
Expand All @@ -58,7 +69,7 @@ def test_zero_flatliner_pattern_below_threshold(self):

# Arrange
load = pd.Series(
index=three_hour_range, data=[i for i in range(1, 10)] + [0, 0, 0, 0]
index=self.three_hour_range, data=[i for i in range(1, 10)] + [0, 0, 0, 0]
)
duration_threshold = 120

Expand All @@ -71,7 +82,7 @@ def test_zero_flatliner_pattern_below_threshold(self):
def test_zero_flatliner_pattern_just_above_threshold(self):
# Arrange
load = pd.Series(
index=three_hour_range, data=[1, 2, 3, 4] + [0 for i in range(9)]
index=self.three_hour_range, data=[1, 2, 3, 4] + [0 for i in range(9)]
)
duration_threshold = 120

Expand All @@ -84,7 +95,7 @@ def test_zero_flatliner_pattern_just_above_threshold(self):
def test_zero_flatliner_and_missing_values(self):
# Arrange
load = pd.Series(
index=three_hour_range,
index=self.three_hour_range,
data=[1, 2, 3, 4] + [0, 0, 0, 0, np.nan, np.nan, np.nan, np.nan, 0],
)
duration_threshold = 120
Expand All @@ -97,11 +108,31 @@ def test_zero_flatliner_and_missing_values(self):

def test_all_missing_values(self):
# Arrange
load = pd.Series(index=three_hour_range, data=[np.nan for i in range(13)])
load = pd.Series(index=self.three_hour_range, data=[np.nan for i in range(13)])
duration_threshold = 120

# Act
zero_flatliner_ongoing = detect_ongoing_zero_flatliner(load, duration_threshold)

# Assert
assert zero_flatliner_ongoing == False

def test_zero_flatliner_predict_future(self):
# Scenario: A forecast is made on a zero flatliner, which contains timestamps in the
# future with NaN values that need to be predicted.
# In this case: Time in future > duration_threshold.

# Arrange
load = pd.Series(
index=self.four_hour_range_predict_setting,
data=[1, 2, 3]
+ [0, 0, 0, 0, 0, 0, 0, 0, 0]
+ [np.nan, np.nan, np.nan, np.nan, np.nan],
)
duration_threshold = 60

# Act
zero_flatliner_ongoing = detect_ongoing_zero_flatliner(load, duration_threshold)

# Assert
assert zero_flatliner_ongoing == True
Loading