Skip to content

Commit

Permalink
API change: Refactor to use ecl2df for summary file extraction
Browse files Browse the repository at this point in the history
* realization.get_smry() has changed to always return a dummy index
* cache_eclsum is pruned from fmu-ensemble.
  • Loading branch information
berland committed Mar 9, 2021
1 parent 868bc9d commit fc4395f
Show file tree
Hide file tree
Showing 14 changed files with 227 additions and 462 deletions.
3 changes: 0 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@ exclude = docs,
[aliases]
test = pytest

[tool:pytest]
addopts = --verbose -x

[build_sphinx]
all-files = 1
warning-is-error = 1
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

REQUIREMENTS = [
"ecl>=2.9",
"ecl2df",
"numpy",
"pandas",
"pyyaml>=5.1",
Expand Down
85 changes: 7 additions & 78 deletions src/fmu/ensemble/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,16 +689,13 @@ def get_df(self, localpath, merge=None):
# the realization index, and end up in a MultiIndex
dframe = pd.concat(dflist, sort=False).reset_index()
dframe.rename(columns={"level_0": "REAL"}, inplace=True)
del dframe["level_1"] # This is the indices from each real
return dframe
return dframe.drop("level_1", axis="columns", errors="ignore")
raise KeyError("No data found for " + localpath)

def load_smry(
self,
time_index="raw",
column_keys=None,
stacked=None,
cache_eclsum=None,
start_date=None,
end_date=None,
include_restart=True,
Expand Down Expand Up @@ -743,9 +740,6 @@ def load_smry(
by vector name, and with realization index as columns.
This only works when time_index is the same for all
realizations. Not implemented yet!
cache_eclsum (boolean): Boolean for whether we should cache the EclSum
objects. Set to False if you cannot keep all EclSum files in
memory simultaneously
start_date (str or date): First date to include.
Dates prior to this date will be dropped, supplied
start_date will always be included. Overridden if time_index
Expand All @@ -761,28 +755,6 @@ def load_smry(
pd.DataFame: Summary vectors for the ensemble, or
a dict of dataframes if stacked=False.
"""
if stacked is not None:
warnings.warn(
(
"stacked option to load_smry() is deprecated and "
"will be removed in fmu-ensemble v2.0.0"
),
FutureWarning,
)
else:
stacked = True
if not stacked:
raise NotImplementedError

if cache_eclsum is not None:
warnings.warn(
(
"cache_eclsum option to load_smry() is deprecated and "
"will be removed in fmu-ensemble v2.0.0"
),
FutureWarning,
)

# Future: Multithread this!
for realidx, realization in self.realizations.items():
# We do not store the returned DataFrames here,
Expand All @@ -793,7 +765,6 @@ def load_smry(
realization.load_smry(
time_index=time_index,
column_keys=column_keys,
cache_eclsum=cache_eclsum,
start_date=start_date,
end_date=end_date,
include_restart=include_restart,
Expand Down Expand Up @@ -984,7 +955,6 @@ def get_smry_dates(
normalize=True,
start_date=None,
end_date=None,
cache_eclsum=None,
include_restart=True,
):
"""Return list of datetimes for an ensemble according to frequency
Expand Down Expand Up @@ -1016,28 +986,12 @@ def get_smry_dates(
Returns:
list of datetimes. Empty list if no data found.
"""

if cache_eclsum is not None:
warnings.warn(
(
"cache_eclsum option to get_smry_dates() is deprecated and "
"will be removed in fmu-ensemble v2.0.0"
),
FutureWarning,
)
else:
cache_eclsum = True

# Build list of list of eclsum dates
eclsumsdates = []
for _, realization in self.realizations.items():
if realization.get_eclsum(
cache=cache_eclsum, include_restart=include_restart
):
if realization.get_eclsum(include_restart=include_restart):
eclsumsdates.append(
realization.get_eclsum(
cache=cache_eclsum, include_restart=include_restart
).dates
realization.get_eclsum(include_restart=include_restart).dates
)
return unionize_smry_dates(eclsumsdates, freq, normalize, start_date, end_date)

Expand All @@ -1046,7 +1000,6 @@ def get_smry_stats(
column_keys=None,
time_index="monthly",
quantiles=None,
cache_eclsum=None,
start_date=None,
end_date=None,
):
Expand All @@ -1069,8 +1022,6 @@ def get_smry_stats(
to compute. Quantiles refer to scientific standard, which
is opposite to the oil industry convention.
Ask for p10 if you need the oil industry p90.
cache_eclsum: boolean for whether to keep the loaded EclSum
object in memory after data has been loaded.
start_date: str or date with first date to include.
Dates prior to this date will be dropped, supplied
start_date will always be included. Overridden if time_index
Expand All @@ -1088,15 +1039,6 @@ def get_smry_stats(
strings in the outer index are changed accordingly. If no
data is found, return empty DataFrame.
"""
if cache_eclsum is not None:
warnings.warn(
(
"cache_eclsum option to get_smry_stats() is deprecated and "
"will be removed in fmu-ensemble v2.0.0"
),
FutureWarning,
)

if quantiles is None:
quantiles = [10, 90]

Expand All @@ -1111,7 +1053,6 @@ def get_smry_stats(
dframe = self.get_smry(
time_index=time_index,
column_keys=column_keys,
cache_eclsum=cache_eclsum,
start_date=start_date,
end_date=end_date,
)
Expand Down Expand Up @@ -1377,7 +1318,6 @@ def get_smry(
self,
time_index=None,
column_keys=None,
cache_eclsum=None,
start_date=None,
end_date=None,
include_restart=True,
Expand All @@ -1388,6 +1328,9 @@ def get_smry(
Wraps around Realization.get_smry() which wraps around
ecl.summary.EclSum.pandas_frame()
The returned dataframe will always have a dummy index, and
DATE and REAL as columns.
Args:
time_index: list of DateTime if interpolation is wanted
default is None, which returns the raw Eclipse report times
Expand All @@ -1396,9 +1339,6 @@ def get_smry(
a wanted frequencey for dates, daily, weekly, monthly, yearly,
that will be send to get_smry_dates()
column_keys: list of column key wildcards
cache_eclsum: boolean for whether to cache the EclSum
objects. Defaults to True. Set to False if
not enough memory to keep all summary files in memory.
start_date: str or date with first date to include.
Dates prior to this date will be dropped, supplied
start_date will always be included. Overridden if time_index
Expand All @@ -1415,15 +1355,6 @@ def get_smry(
REAL with integers is added to distinguish realizations. If
no realizations, empty DataFrame is returned.
"""
if cache_eclsum is not None:
warnings.warn(
(
"cache_eclsum option to get_smry() is deprecated and "
"will be removed in fmu-ensemble v2.0.0"
),
FutureWarning,
)

if isinstance(time_index, str):
# Try interpreting as ISO-date:
try:
Expand All @@ -1442,14 +1373,12 @@ def get_smry(
dframe = realization.get_smry(
time_index=time_index,
column_keys=column_keys,
cache_eclsum=cache_eclsum,
include_restart=include_restart,
)
dframe.insert(0, "REAL", index)
dframe.index.name = "DATE"
dflist.append(dframe)
if dflist:
return pd.concat(dflist, sort=False).reset_index()
return pd.concat(dflist, sort=False)
return pd.DataFrame()

def get_eclgrid(self, props, report=0, agg="mean", active_only=False):
Expand Down
52 changes: 2 additions & 50 deletions src/fmu/ensemble/ensembleset.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,6 @@ def load_smry(
self,
time_index="raw",
column_keys=None,
cache_eclsum=None,
start_date=None,
end_date=None,
):
Expand All @@ -596,9 +595,6 @@ def load_smry(
If a string is supplied, that string is attempted used
via get_smry_dates() in order to obtain a time index.
column_keys: list of column key wildcards
cache_eclsum: Boolean for whether we should cache the EclSum
objects. Set to False if you cannot keep all EclSum files in
memory simultaneously
start_date: str or date with first date to include.
Dates prior to this date will be dropped, supplied
start_date will always be included. Overridden if time_index
Expand All @@ -612,21 +608,11 @@ def load_smry(
A DataFame of summary vectors for the ensembleset.
The column 'ENSEMBLE' will denote each ensemble's name
"""
if cache_eclsum is not None:
warnings.warn(
(
"cache_eclsum option to load_smry() is deprecated and "
"will be removed in fmu-ensemble v2.0.0"
),
FutureWarning,
)

# Future: Multithread this:
for _, ensemble in self._ensembles.items():
ensemble.load_smry(
time_index=time_index,
column_keys=column_keys,
cache_eclsum=cache_eclsum,
start_date=start_date,
end_date=end_date,
)
Expand All @@ -640,7 +626,6 @@ def get_smry(
self,
time_index=None,
column_keys=None,
cache_eclsum=None,
start_date=None,
end_date=None,
):
Expand All @@ -656,11 +641,6 @@ def get_smry(
If a string is supplied, that string is attempted used
via get_smry_dates() in order to obtain a time index.
column_keys: list of column key wildcards
cache_eclsum: boolean for whether to cache the EclSum
objects. Defaults to False. Set to True if
there is enough memory to keep all realizations summary
files in memory at once. This will speed up subsequent
operations
start_date: str or date with first date to include.
Dates prior to this date will be dropped, supplied
start_date will always be included. Overridden if time_index
Expand All @@ -674,30 +654,16 @@ def get_smry(
ENSEMBLE will distinguish the different ensembles by their
respective names.
"""

if cache_eclsum is not None:
warnings.warn(
(
"cache_eclsum option to get_smry() is deprecated and "
"will be removed in fmu-ensemble v2.0.0"
),
FutureWarning,
)

smrylist = []
for _, ensemble in self._ensembles.items():
smry = ensemble.get_smry(
time_index, column_keys, cache_eclsum, start_date, end_date
)
smry = ensemble.get_smry(time_index, column_keys, start_date, end_date)
smry.insert(0, "ENSEMBLE", ensemble.name)
smrylist.append(smry)
if smrylist:
return pd.concat(smrylist, sort=False)
return pd.DataFrame()

def get_smry_dates(
self, freq="monthly", cache_eclsum=None, start_date=None, end_date=None
):
def get_smry_dates(self, freq="monthly", start_date=None, end_date=None):
"""Return list of datetimes from an ensembleset
Datetimes from each realization in each ensemble can
Expand All @@ -709,9 +675,6 @@ def get_smry_dates(
yield the sorted union of all valid timesteps for
all realizations. Other valid options are
'daily', 'monthly' and 'yearly'.
cache_eclsum: Boolean for whether we should cache the EclSum
objects. Set to False if you cannot keep all EclSum files in
memory simultaneously
start_date: str or date with first date to include.
Dates prior to this date will be dropped, supplied
start_date will always be included. Overridden if time_index
Expand All @@ -723,22 +686,11 @@ def get_smry_dates(
Returns:
list of datetime.date.
"""

if cache_eclsum is not None:
warnings.warn(
(
"cache_eclsum option to get_smry_dates() is deprecated and "
"will be removed in fmu-ensemble v2.0.0"
),
FutureWarning,
)

rawdates = set()
for _, ensemble in self._ensembles.items():
rawdates = rawdates.union(
ensemble.get_smry_dates(
freq="report",
cache_eclsum=cache_eclsum,
start_date=start_date,
end_date=end_date,
)
Expand Down
6 changes: 2 additions & 4 deletions src/fmu/ensemble/observations.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
"""
Observations support and related calculations
"""
"""Observations support and related calculations"""

import os
import math
Expand Down Expand Up @@ -174,7 +172,7 @@ def load_smry(self, realization, smryvector, time_index="yearly", smryerror=None
"""
dataseries = realization.get_smry(
column_keys=[smryvector], time_index=time_index
)[smryvector]
)[["DATE", smryvector]].set_index("DATE")[smryvector]

# In the context of this function, datetimes are not supported. Ensure dates:
if isinstance(dataseries.index, pd.DatetimeIndex):
Expand Down
Loading

0 comments on commit fc4395f

Please sign in to comment.