Skip to content

Commit

Permalink
WIP: Change realization.get_smry() to always return dummy index
Browse files Browse the repository at this point in the history
  • Loading branch information
berland committed Feb 26, 2021
1 parent 1aca83b commit a014abf
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 27 deletions.
9 changes: 5 additions & 4 deletions src/fmu/ensemble/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,8 +689,7 @@ def get_df(self, localpath, merge=None):
# the realization index, and end up in a MultiIndex
dframe = pd.concat(dflist, sort=False).reset_index()
dframe.rename(columns={"level_0": "REAL"}, inplace=True)
del dframe["level_1"] # This is the indices from each real
return dframe
return dframe.drop("level_1", axis="columns", errors="ignore")
raise KeyError("No data found for " + localpath)

def load_smry(
Expand Down Expand Up @@ -1329,6 +1328,9 @@ def get_smry(
Wraps around Realization.get_smry() which wraps around
ecl.summary.EclSum.pandas_frame()
The returned dataframe will always have a dummy index, and
DATE and REAL as columns.
Args:
time_index: list of DateTime if interpolation is wanted
default is None, which returns the raw Eclipse report times
Expand Down Expand Up @@ -1374,10 +1376,9 @@ def get_smry(
include_restart=include_restart,
)
dframe.insert(0, "REAL", index)
dframe.index.name = "DATE"
dflist.append(dframe)
if dflist:
return pd.concat(dflist, sort=False).reset_index()
return pd.concat(dflist, sort=False)
return pd.DataFrame()

def get_eclgrid(self, props, report=0, agg="mean", active_only=False):
Expand Down
9 changes: 5 additions & 4 deletions src/fmu/ensemble/realization.py
Original file line number Diff line number Diff line change
Expand Up @@ -979,13 +979,16 @@ def get_smry(
start_date=None,
end_date=None,
include_restart=True,
datetimeindex=False,
):
"""Wrapper for ecl2df.summary
This gives access to the underlying data on disk without
touching internalized dataframes.
The returned dataframe will have a dummy index, and the dates in
the column DATE. The DATE column will contain either datetime.datetime
or pandas.Timestamp objects.
Arguments:
time_index: string indicating a resampling frequency,
'yearly', 'monthly', 'daily', 'first', 'last' or 'raw', the
Expand All @@ -1003,7 +1006,6 @@ def get_smry(
end_date will always be included. Overridden if time_index
is 'first' or 'last'.
include_restart (bool): Whether to traverse restart files.
datetimeindex (bool): Set to True if a datetime64 indes is wanted.
Returns empty dataframe if there is no summary file, or if the
column_keys are not existing.
Expand All @@ -1020,8 +1022,7 @@ def get_smry(
include_restart=include_restart,
params=False,
paramfile=None,
datetime=datetimeindex,
)
).reset_index()
except OSError:
# Missing or bogus UNSMRY file
return pd.DataFrame()
Expand Down
4 changes: 3 additions & 1 deletion src/fmu/ensemble/util/rates.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ def compute_volumetric_rates(realization, column_keys, time_index, time_unit):
return pd.DataFrame()

cum_df = realization.get_smry(column_keys=column_keys, time_index=time_index)
# get_smry() for realizations return a dataframe indexed by 'DATE'

if not cum_df.empty:
cum_df.set_index("DATE", inplace=True)

# Compute row-wise difference, shift back one row
# to get the NaN to the end, and then drop the NaN.
Expand Down
4 changes: 1 addition & 3 deletions tests/test_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def test_reek001(tmpdir):
paramsdf = reekensemble.parameters # also test as property
paramsdf = reekensemble.get_df("parameters.txt")
assert len(paramsdf) == 5
print(paramsdf.head())
assert len(paramsdf.columns) == 26 # 25 parameters, + REAL column
paramsdf.to_csv("params.csv", index=False)

Expand Down Expand Up @@ -498,9 +499,6 @@ def test_ensemble_ecl():
assert not reekensemble.get_wellnames("")
assert len(reekensemble.get_wellnames(["OP*", "WI*"])) == 8

# eclipse well groups list
assert len(reekensemble.get_groupnames()) == 3

# delta between two ensembles
diff = reekensemble - reekensemble
assert len(diff.get_smry(column_keys=["FOPR", "FGPR", "FWCT"]).columns) == 5
Expand Down
41 changes: 26 additions & 15 deletions tests/test_realization.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def test_volumetric_rates():
daily_dates = real.get_smry_dates(freq="daily", normalize=False)
subset_dates = list(np.random.choice(daily_dates, size=10, replace=False))
subset_dates.sort()
dcum = real.get_smry(column_keys="FOPT", time_index=subset_dates)
dcum = real.get_smry(column_keys="FOPT", time_index=subset_dates).set_index("DATE")
ddcum = real.get_volumetric_rates(column_keys="FOPT", time_index=subset_dates)
assert ddcum["FOPR"].iloc[-1] == 0

Expand Down Expand Up @@ -426,21 +426,29 @@ def test_datenormalization():
realdir = os.path.join(testdir, "data/testensemble-reek001", "realization-0/iter-0")
real = ensemble.ScratchRealization(realdir)
raw = real.get_smry(column_keys="FOPT", time_index="raw")
assert str(raw.index[-1]) == "2003-01-02 00:00:00"
assert str(raw["DATE"].values[-1]) == "2003-01-02T00:00:00.000000000"
daily = real.get_smry(column_keys="FOPT", time_index="daily")
assert str(daily.index[-1]) == "2003-01-02"
assert str(daily["DATE"].values[-1]) == "2003-01-02"
monthly = real.get_smry(column_keys="FOPT", time_index="monthly")
assert str(monthly.index[-1]) == "2003-02-01"
assert str(monthly["DATE"].values[-1]) == "2003-02-01"
yearly = real.get_smry(column_keys="FOPT", time_index="yearly")
assert str(yearly.index[-1]) == "2004-01-01"
assert str(yearly["DATE"].values[-1]) == "2004-01-01"
weekly = real.get_smry(column_keys="FOPT", time_index="weekly")
assert str(weekly.index[-1]) == "2003-01-06" # First Monday after 2003-01-02
assert (
str(weekly["DATE"].values[-1]) == "2003-01-06"
) # First Monday after 2003-01-02
weekly = real.get_smry(column_keys="FOPT", time_index="W-MON")
assert str(weekly.index[-1]) == "2003-01-06" # First Monday after 2003-01-02
assert (
str(weekly["DATE"].values[-1]) == "2003-01-06"
) # First Monday after 2003-01-02
weekly = real.get_smry(column_keys="FOPT", time_index="W-TUE")
assert str(weekly.index[-1]) == "2003-01-07" # First Tuesday after 2003-01-02
assert (
str(weekly["DATE"].values[-1]) == "2003-01-07"
) # First Tuesday after 2003-01-02
weekly = real.get_smry(column_keys="FOPT", time_index="W-THU")
assert str(weekly.index[-1]) == "2003-01-02" # First Thursday after 2003-01-02
assert (
str(weekly["DATE"].values[-1]) == "2003-01-02"
) # First Thursday after 2003-01-02

# Check that time_index=None and time_index="raw" behaves like default
raw = real.load_smry(column_keys="FOPT", time_index="raw")
Expand All @@ -454,15 +462,18 @@ def test_datenormalization():
# Check that we get the same correct normalization
# with load_smry()
real.load_smry(column_keys="FOPT", time_index="raw")
assert str(real.get_df("unsmry--raw")["DATE"].iloc[-1]) == "2003-01-02 00:00:00"
assert (
str(real.get_df("unsmry--raw")["DATE"].values[-1])
== "2003-01-02T00:00:00.000000000"
)
real.load_smry(column_keys="FOPT", time_index="daily")
assert str(real.get_df("unsmry--daily")["DATE"].iloc[-1]) == "2003-01-02"
assert str(real.get_df("unsmry--daily")["DATE"].values[-1]) == "2003-01-02"
real.load_smry(column_keys="FOPT", time_index="monthly")
assert str(real.get_df("unsmry--monthly")["DATE"].iloc[-1]) == "2003-02-01"
assert str(real.get_df("unsmry--monthly")["DATE"].values[-1]) == "2003-02-01"
real.load_smry(column_keys="FOPT", time_index="yearly")
assert str(real.get_df("unsmry--yearly")["DATE"].iloc[-1]) == "2004-01-01"
assert str(real.get_df("unsmry--yearly")["DATE"].values[-1]) == "2004-01-01"
real.load_smry(column_keys="FOPT", time_index="weekly")
assert str(real.get_df("unsmry--weekly")["DATE"].iloc[-1]) == "2003-01-06"
assert str(real.get_df("unsmry--weekly")["DATE"].values[-1]) == "2003-01-06"


def test_singlereal_ecl(tmp="TMP"):
Expand Down Expand Up @@ -512,7 +523,7 @@ def test_singlereal_ecl(tmp="TMP"):
# Try ISO-date for time_index:
singledata = real.get_smry(time_index="2000-05-05", column_keys="FOPT")
assert "FOPT" in singledata
assert "2000-05-05" in singledata.index
assert str(singledata["DATE"].values[0]).startswith("2000-05-05")

# start and end should be included:
assert (
Expand Down

0 comments on commit a014abf

Please sign in to comment.