From 9830f63a297adad391c8e20e7ca21387a7fc9ba3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Berland?= Date: Tue, 2 Feb 2021 15:11:50 +0100 Subject: [PATCH] Move to datetime64 indexed dataframes --- src/fmu/ensemble/observations.py | 30 ++++++++++++++------------- src/fmu/ensemble/realization.py | 2 +- tests/test_ecl2df.py | 10 +++++++-- tests/test_ensemble_eclfail.py | 7 ++++--- tests/test_observations.py | 4 ++-- tests/test_realization.py | 35 +++++++++++++++++++++----------- 6 files changed, 54 insertions(+), 34 deletions(-) diff --git a/src/fmu/ensemble/observations.py b/src/fmu/ensemble/observations.py index 4344cf67..455b9316 100644 --- a/src/fmu/ensemble/observations.py +++ b/src/fmu/ensemble/observations.py @@ -7,6 +7,7 @@ import logging import yaml +import numpy as np import pandas as pd import dateutil @@ -311,7 +312,8 @@ def _realization_mismatch(self, real): column_keys=[obsunit["key"], obsunit["histvec"]], ) elif isinstance( - obsunit["time_index"], (datetime.datetime, datetime.date) + obsunit["time_index"], + (datetime.datetime, datetime.date, np.datetime64), ): # real.get_smry only allows strings or # list of datetimes as time_index. @@ -479,18 +481,16 @@ def _clean_observations(self): # If time_index is not a supported mnemonic, # parse it to a date object if "time_index" in unit: - if ( - unit["time_index"] - not in [ - "raw", - "report", - "yearly", - "daily", - "first", - "last", - "monthly", - ] - and not isinstance(unit["time_index"], datetime.datetime) + if unit["time_index"] not in [ + "raw", + "report", + "yearly", + "daily", + "first", + "last", + "monthly", + ] and not isinstance( + unit["time_index"], (datetime.datetime, np.datetime64) ): try: unit["time_index"] = dateutil.parser.isoparse( @@ -535,7 +535,9 @@ def _clean_observations(self): observation["date"] = dateutil.parser.isoparse( observation["date"] ).date() - if not isinstance(observation["date"], datetime.date): + if not isinstance( + observation["date"], (datetime.date, np.datetime64) + ): logger.error("Date not understood %s", str(observation["date"])) continue # If everything is deleted from 'smry', delete it diff --git a/src/fmu/ensemble/realization.py b/src/fmu/ensemble/realization.py index 16346b43..d748df06 100644 --- a/src/fmu/ensemble/realization.py +++ b/src/fmu/ensemble/realization.py @@ -998,7 +998,7 @@ def get_smry( start_date=None, end_date=None, include_restart=True, - datetimeindex=False, + datetimeindex=True, ): """Wrapper for ecl2df.summary diff --git a/tests/test_ecl2df.py b/tests/test_ecl2df.py index 673f3803..0c30d13d 100644 --- a/tests/test_ecl2df.py +++ b/tests/test_ecl2df.py @@ -61,14 +61,20 @@ def extract_compdat(kwargs): def test_smry_via_ecl2df(): """Test that we could use ecl2df for smry extraction instead - of the native code inside fmu-ensemble""" + of the native code inside fmu-ensemble. + + (This test code was made before fmu-ensemble used ecl2df by default) + """ def get_smry(kwargs): """Callback function to extract smry data using ecl2df on a ScratchRealization""" eclfiles = kwargs["realization"].get_eclfiles() return ecl2df.summary.df( - eclfiles, time_index=kwargs["time_index"], column_keys=kwargs["column_keys"] + eclfiles, + time_index=kwargs["time_index"], + column_keys=kwargs["column_keys"], + datetime=True, ) if "__file__" in globals(): diff --git a/tests/test_ensemble_eclfail.py b/tests/test_ensemble_eclfail.py index 16eb1371..9645ff0e 100644 --- a/tests/test_ensemble_eclfail.py +++ b/tests/test_ensemble_eclfail.py @@ -10,7 +10,6 @@ import os import logging import shutil -import datetime import numpy as np import pandas as pd @@ -134,7 +133,8 @@ def test_ens_premature_ecl(tmpdir): # The FOPR rate vector should be all zero after the stop assert ( fail_foprs[ - (fail_foprs["REAL"] == 1) & (fail_foprs["DATE"] > datetime.date(2000, 8, 1)) + (fail_foprs["REAL"] == 1) + & (fail_foprs["DATE"] > np.datetime64("2000-08-01")) ]["FOPR"] .abs() .sum() @@ -142,7 +142,8 @@ def test_ens_premature_ecl(tmpdir): ) assert ( fail_foprs[ - (fail_foprs["REAL"] == 0) & (fail_foprs["DATE"] > datetime.date(2000, 8, 1)) + (fail_foprs["REAL"] == 0) + & (fail_foprs["DATE"] > np.datetime64("2000-08-01")) ]["FOPR"] .abs() .sum() diff --git a/tests/test_observations.py b/tests/test_observations.py index 8c64fab0..cacc0b02 100644 --- a/tests/test_observations.py +++ b/tests/test_observations.py @@ -297,10 +297,10 @@ def test_smryh(): {"smryh": [{"key": "FOPT", "histvec": "FOPTH", "time_index": "2003-02-01"}]} ) obs_future = Observations( - {"smryh": [{"key": "FOPT", "histvec": "FOPTH", "time_index": "3003-02-01"}]} + {"smryh": [{"key": "FOPT", "histvec": "FOPTH", "time_index": "2203-02-01"}]} ) obs_past = Observations( - {"smryh": [{"key": "FOPT", "histvec": "FOPTH", "time_index": "1003-02-01"}]} + {"smryh": [{"key": "FOPT", "histvec": "FOPTH", "time_index": "1678-02-01"}]} ) assert obs_isodatestr diff --git a/tests/test_realization.py b/tests/test_realization.py index fee80a83..dce317b7 100644 --- a/tests/test_realization.py +++ b/tests/test_realization.py @@ -337,7 +337,8 @@ def test_volumetric_rates(): # We are probably neither at the start or at the end of the production # interval. cumulative_error = ddcum["FOPR"].sum() - ( - dcum["FOPT"].loc[subset_dates[-1]] - dcum["FOPT"].loc[subset_dates[0]] + dcum["FOPT"].loc[np.datetime64(subset_dates[-1])] + - dcum["FOPT"].loc[np.datetime64(subset_dates[0])] ) # Give some slack, we might have done a lot of interpolation @@ -428,19 +429,27 @@ def test_datenormalization(): raw = real.get_smry(column_keys="FOPT", time_index="raw") assert str(raw.index[-1]) == "2003-01-02 00:00:00" daily = real.get_smry(column_keys="FOPT", time_index="daily") - assert str(daily.index[-1]) == "2003-01-02" + assert daily.index[-1] == np.datetime64("2003-01-02") monthly = real.get_smry(column_keys="FOPT", time_index="monthly") - assert str(monthly.index[-1]) == "2003-02-01" + assert monthly.index[-1] == np.datetime64("2003-02-01") yearly = real.get_smry(column_keys="FOPT", time_index="yearly") - assert str(yearly.index[-1]) == "2004-01-01" + assert yearly.index[-1] == np.datetime64("2004-01-01") + + # First Monday after 2003-01-02: weekly = real.get_smry(column_keys="FOPT", time_index="weekly") - assert str(weekly.index[-1]) == "2003-01-06" # First Monday after 2003-01-02 + assert weekly.index[-1] == np.datetime64("2003-01-06") + + # First Monday after 2003-01-02 weekly = real.get_smry(column_keys="FOPT", time_index="W-MON") - assert str(weekly.index[-1]) == "2003-01-06" # First Monday after 2003-01-02 + assert weekly.index[-1] == np.datetime64("2003-01-06") + + # First Tuesday after 2003-01-02 weekly = real.get_smry(column_keys="FOPT", time_index="W-TUE") - assert str(weekly.index[-1]) == "2003-01-07" # First Tuesday after 2003-01-02 + assert weekly.index[-1] == np.datetime64("2003-01-07") + + # First Thursday after 2003-01-02 weekly = real.get_smry(column_keys="FOPT", time_index="W-THU") - assert str(weekly.index[-1]) == "2003-01-02" # First Thursday after 2003-01-02 + assert weekly.index[-1] == np.datetime64("2003-01-02") # Check that time_index=None and time_index="raw" behaves like default raw = real.load_smry(column_keys="FOPT", time_index="raw") @@ -456,13 +465,15 @@ def test_datenormalization(): real.load_smry(column_keys="FOPT", time_index="raw") assert str(real.get_df("unsmry--raw")["DATE"].iloc[-1]) == "2003-01-02 00:00:00" real.load_smry(column_keys="FOPT", time_index="daily") - assert str(real.get_df("unsmry--daily")["DATE"].iloc[-1]) == "2003-01-02" + assert real.get_df("unsmry--daily")["DATE"].iloc[-1] == np.datetime64("2003-01-02") real.load_smry(column_keys="FOPT", time_index="monthly") - assert str(real.get_df("unsmry--monthly")["DATE"].iloc[-1]) == "2003-02-01" + assert real.get_df("unsmry--monthly")["DATE"].iloc[-1] == np.datetime64( + "2003-02-01" + ) real.load_smry(column_keys="FOPT", time_index="yearly") - assert str(real.get_df("unsmry--yearly")["DATE"].iloc[-1]) == "2004-01-01" + assert real.get_df("unsmry--yearly")["DATE"].iloc[-1] == np.datetime64("2004-01-01") real.load_smry(column_keys="FOPT", time_index="weekly") - assert str(real.get_df("unsmry--weekly")["DATE"].iloc[-1]) == "2003-01-06" + assert real.get_df("unsmry--weekly")["DATE"].iloc[-1] == np.datetime64("2003-01-06") def test_singlereal_ecl(tmp="TMP"):