Skip to content

Commit

Permalink
Updated test datasets by adding cyclical time of the day features. Re…
Browse files Browse the repository at this point in the history
…named time of day features for easier readability in feature importance plots.

Signed-off-by: Egor Dmitriev <[email protected]>
  • Loading branch information
egordm committed Mar 12, 2024
1 parent e02da4e commit c463a49
Show file tree
Hide file tree
Showing 6 changed files with 251 additions and 266 deletions.
4 changes: 2 additions & 2 deletions openstef/feature_engineering/temporal_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def add_time_of_the_day_cyclic_features(
)
period_of_the_day = 2 * np.pi * second_of_the_day / NUM_SECONDS_IN_A_DAY

data["sin_time_of_day"] = np.sin(period_of_the_day)
data["cos_time_of_day"] = np.cos(period_of_the_day)
data["time_of_day_sine"] = np.sin(period_of_the_day)
data["time_of_day_cosine"] = np.cos(period_of_the_day)

return data
242 changes: 121 additions & 121 deletions test/unit/data/input_data_multi_horizon_features.csv

Large diffs are not rendered by default.

242 changes: 121 additions & 121 deletions test/unit/data/input_data_with_features.csv

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions test/unit/data/input_data_with_holiday_features.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
,load,temp,humidity,pressure,T-1d,T-2d,T-3d,T-4d,T-5d,T-6d,T-7d,T-8d,T-9d,T-10d,T-11d,T-12d,T-13d,T-14d,IsWeekendDay,IsWeekDay,IsSunday,Month,Quarter,is_national_holiday,is_nieuwjaarsdag,is_goede_vrijdag,is_eerste_paasdag,is_tweede_paasdag,is_koningsdag,is_bevrijdingsdag,is_hemelvaart,is_eerste_pinksterdag,is_tweede_pinksterdag,is_eerste_kerstdag,is_tweede_kerstdag,is_bridgeday,is_schoolholiday,is_bouwvaknoord,is_voorjaarsvakantiemidden,is_voorjaarsvakantienoord,is_herfstvakantiezuid,is_kerstvakantie,is_zomervakantiemidden,is_herfstvakantiemidden,is_herfstvakantienoord,is_bouwvakmidden,is_zomervakantienoord,is_bouwvakzuid,is_zomervakantiezuid,is_meivakantie,is_voorjaarsvakantiezuid,saturation_pressure,vapour_pressure,dewpoint,air_density
2020-02-01 10:00:00,10,9,1.0,,,,,,,,,,,,,,,,True,False,False,2,1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,11.49201518556511,11.49201518556511,8.999999999999998,
2020-02-01 10:10:00,15,9,0.02,,,,,,,,,,,,,,,,True,False,False,2,1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,11.49201518556511,0.2298403037113022,-38.006190493385226,
2022-12-26 10:00:00,20,9,0.03,,,,,,,,,,,,,,,,False,True,False,12,4,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,11.49201518556511,0.3447604555669533,-33.97264888737701,
2020-04-27 11:00:00,15,9,0.04,,,,,,,,,,,,,,,,False,True,False,4,2,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,11.49201518556511,0.4596806074226044,-31.01205505051214,
,load,temp,humidity,pressure,T-1d,T-2d,T-3d,T-4d,T-5d,T-6d,T-7d,T-8d,T-9d,T-10d,T-11d,T-12d,T-13d,T-14d,IsWeekendDay,IsWeekDay,IsSunday,Month,Quarter,is_national_holiday,is_nieuwjaarsdag,is_goede_vrijdag,is_eerste_paasdag,is_tweede_paasdag,is_koningsdag,is_bevrijdingsdag,is_hemelvaart,is_eerste_pinksterdag,is_tweede_pinksterdag,is_eerste_kerstdag,is_tweede_kerstdag,is_bridgeday,is_schoolholiday,is_bouwvaknoord,is_herfstvakantiemidden,is_meivakantie,is_bouwvakmidden,is_zomervakantienoord,is_zomervakantiemidden,is_voorjaarsvakantiezuid,is_bouwvakzuid,is_herfstvakantiezuid,is_voorjaarsvakantiemidden,is_herfstvakantienoord,is_kerstvakantie,is_zomervakantiezuid,is_voorjaarsvakantienoord,time_of_day_sine,time_of_day_cosine,saturation_pressure,vapour_pressure,dewpoint,air_density
2020-02-01 10:00:00,10,9,1.0,,,,,,,,,,,,,,,,True,False,False,2,1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.49999999999999994,-0.8660254037844387,11.49201518556511,11.49201518556511,8.999999999999998,
2020-02-01 10:10:00,15,9,0.02,,,,,,,,,,,,,,,,True,False,False,2,1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.4617486132350339,-0.8870108331782217,11.49201518556511,0.2298403037113022,-38.006190493385226,
2022-12-26 10:00:00,20,9,0.03,,,,,,,,,,,,,,,,False,True,False,12,4,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,0.49999999999999994,-0.8660254037844387,11.49201518556511,0.3447604555669533,-33.97264888737701,
2020-04-27 11:00:00,15,9,0.04,,,,,,,,,,,,,,,,False,True,False,4,2,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,0.258819045102521,-0.9659258262890682,11.49201518556511,0.4596806074226044,-31.01205505051214,
15 changes: 0 additions & 15 deletions test/unit/feature_engineering/test_apply_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,6 @@ def test_apply_features(self):
pj={"model": "xgb", "lat": 52.132633, "lon": 5.291266},
)

# Time of the day cyclic features are not in the expected output dataset
input_data_with_features = input_data_with_features.drop(
columns=time_of_the_day_cyclic_features_columns
)

expected_output = TestData.load("input_data_with_features.csv")

self.assertDataframeEqual(
Expand Down Expand Up @@ -131,11 +126,6 @@ def test_train_feature_applicator(self):
pj={"model": "proleaf", "lat": 52.132633, "lon": 5.291266},
)

# Time of the day cyclic features are not in the expected output dataset
input_data_with_features = input_data_with_features.drop(
columns=time_of_the_day_cyclic_features_columns
)

expected_output = TestData.load("input_data_multi_horizon_features.csv")

self.assertDataframeEqual(
Expand Down Expand Up @@ -205,11 +195,6 @@ def test_apply_holiday_features(self):
data=input_data, horizon=24
)

# Time of the day cyclic features are not in the expected output dataset
input_data_with_features = input_data_with_features.drop(
columns=time_of_the_day_cyclic_features_columns
)

expected = TestData.load("../data/input_data_with_holiday_features.csv")

self.assertDataframeEqual(
Expand Down
4 changes: 2 additions & 2 deletions test/unit/feature_engineering/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,5 @@ def test_add_time_of_the_day_cyclic_features(self):
* np.pi
)

assert np.allclose(output_data["sin_time_of_day"], np.sin(periods))
assert np.allclose(output_data["cos_time_of_day"], np.cos(periods))
assert np.allclose(output_data["time_of_day_sine"], np.sin(periods))
assert np.allclose(output_data["time_of_day_cosine"], np.cos(periods))

0 comments on commit c463a49

Please sign in to comment.