API change: Refactor to use ecl2df for summary file extraction

* realization.get_smry() has changed to always return a dummy index * cache_eclsum is pruned from fmu-ensemble.
equinor · Mar 9, 2021 · fc4395f · fc4395f
1 parent 868bc9d
commit fc4395f
Show file tree

Hide file tree

Showing 14 changed files with 227 additions and 462 deletions.
diff --git a/setup.cfg b/setup.cfg
@@ -9,9 +9,6 @@ exclude = docs,
 [aliases]
 test = pytest
 
-[tool:pytest]
-addopts = --verbose -x
-
 [build_sphinx]
 all-files = 1
 warning-is-error = 1

diff --git a/setup.py b/setup.py
@@ -22,6 +22,7 @@
 
 REQUIREMENTS = [
     "ecl>=2.9",
+    "ecl2df",
     "numpy",
     "pandas",
     "pyyaml>=5.1",

diff --git a/src/fmu/ensemble/ensemble.py b/src/fmu/ensemble/ensemble.py
@@ -689,16 +689,13 @@ def get_df(self, localpath, merge=None):
             # the realization index, and end up in a MultiIndex
             dframe = pd.concat(dflist, sort=False).reset_index()
             dframe.rename(columns={"level_0": "REAL"}, inplace=True)
-            del dframe["level_1"]  # This is the indices from each real
-            return dframe
+            return dframe.drop("level_1", axis="columns", errors="ignore")
         raise KeyError("No data found for " + localpath)
 
     def load_smry(
         self,
         time_index="raw",
         column_keys=None,
-        stacked=None,
-        cache_eclsum=None,
         start_date=None,
         end_date=None,
         include_restart=True,
@@ -743,9 +740,6 @@ def load_smry(
                 by vector name, and with realization index as columns.
                 This only works when time_index is the same for all
                 realizations. Not implemented yet!
-            cache_eclsum (boolean): Boolean for whether we should cache the EclSum
-                objects. Set to False if you cannot keep all EclSum files in
-                memory simultaneously
             start_date (str or date): First date to include.
                 Dates prior to this date will be dropped, supplied
                 start_date will always be included. Overridden if time_index
@@ -761,28 +755,6 @@ def load_smry(
             pd.DataFame: Summary vectors for the ensemble, or
             a dict of dataframes if stacked=False.
         """
-        if stacked is not None:
-            warnings.warn(
-                (
-                    "stacked option to load_smry() is deprecated and "
-                    "will be removed in fmu-ensemble v2.0.0"
-                ),
-                FutureWarning,
-            )
-        else:
-            stacked = True
-        if not stacked:
-            raise NotImplementedError
-
-        if cache_eclsum is not None:
-            warnings.warn(
-                (
-                    "cache_eclsum option to load_smry() is deprecated and "
-                    "will be removed in fmu-ensemble v2.0.0"
-                ),
-                FutureWarning,
-            )
-
         # Future: Multithread this!
         for realidx, realization in self.realizations.items():
             # We do not store the returned DataFrames here,
@@ -793,7 +765,6 @@ def load_smry(
             realization.load_smry(
                 time_index=time_index,
                 column_keys=column_keys,
-                cache_eclsum=cache_eclsum,
                 start_date=start_date,
                 end_date=end_date,
                 include_restart=include_restart,
@@ -984,7 +955,6 @@ def get_smry_dates(
         normalize=True,
         start_date=None,
         end_date=None,
-        cache_eclsum=None,
         include_restart=True,
     ):
         """Return list of datetimes for an ensemble according to frequency
@@ -1016,28 +986,12 @@ def get_smry_dates(
         Returns:
             list of datetimes. Empty list if no data found.
         """
-
-        if cache_eclsum is not None:
-            warnings.warn(
-                (
-                    "cache_eclsum option to get_smry_dates() is deprecated and "
-                    "will be removed in fmu-ensemble v2.0.0"
-                ),
-                FutureWarning,
-            )
-        else:
-            cache_eclsum = True
-
         # Build list of list of eclsum dates
         eclsumsdates = []
         for _, realization in self.realizations.items():
-            if realization.get_eclsum(
-                cache=cache_eclsum, include_restart=include_restart
-            ):
+            if realization.get_eclsum(include_restart=include_restart):
                 eclsumsdates.append(
-                    realization.get_eclsum(
-                        cache=cache_eclsum, include_restart=include_restart
-                    ).dates
+                    realization.get_eclsum(include_restart=include_restart).dates
                 )
         return unionize_smry_dates(eclsumsdates, freq, normalize, start_date, end_date)
 
@@ -1046,7 +1000,6 @@ def get_smry_stats(
         column_keys=None,
         time_index="monthly",
         quantiles=None,
-        cache_eclsum=None,
         start_date=None,
         end_date=None,
     ):
@@ -1069,8 +1022,6 @@ def get_smry_stats(
                to compute. Quantiles refer to scientific standard, which
                is opposite to the oil industry convention.
                Ask for p10 if you need the oil industry p90.
-            cache_eclsum: boolean for whether to keep the loaded EclSum
-                object in memory after data has been loaded.
             start_date: str or date with first date to include.
                 Dates prior to this date will be dropped, supplied
                 start_date will always be included. Overridden if time_index
@@ -1088,15 +1039,6 @@ def get_smry_stats(
             strings in the outer index are changed accordingly. If no
             data is found, return empty DataFrame.
         """
-        if cache_eclsum is not None:
-            warnings.warn(
-                (
-                    "cache_eclsum option to get_smry_stats() is deprecated and "
-                    "will be removed in fmu-ensemble v2.0.0"
-                ),
-                FutureWarning,
-            )
-
         if quantiles is None:
             quantiles = [10, 90]
 
@@ -1111,7 +1053,6 @@ def get_smry_stats(
         dframe = self.get_smry(
             time_index=time_index,
             column_keys=column_keys,
-            cache_eclsum=cache_eclsum,
             start_date=start_date,
             end_date=end_date,
         )
@@ -1377,7 +1318,6 @@ def get_smry(
         self,
         time_index=None,
         column_keys=None,
-        cache_eclsum=None,
         start_date=None,
         end_date=None,
         include_restart=True,
@@ -1388,6 +1328,9 @@ def get_smry(
         Wraps around Realization.get_smry() which wraps around
         ecl.summary.EclSum.pandas_frame()
 
+        The returned dataframe will always have a dummy index, and
+        DATE and REAL as columns.
+
         Args:
             time_index: list of DateTime if interpolation is wanted
                default is None, which returns the raw Eclipse report times
@@ -1396,9 +1339,6 @@ def get_smry(
                a wanted frequencey for dates, daily, weekly, monthly, yearly,
                that will be send to get_smry_dates()
             column_keys: list of column key wildcards
-            cache_eclsum: boolean for whether to cache the EclSum
-                objects. Defaults to True. Set to False if
-                not enough memory to keep all summary files in memory.
             start_date: str or date with first date to include.
                 Dates prior to this date will be dropped, supplied
                 start_date will always be included. Overridden if time_index
@@ -1415,15 +1355,6 @@ def get_smry(
             REAL with integers is added to distinguish realizations. If
             no realizations, empty DataFrame is returned.
         """
-        if cache_eclsum is not None:
-            warnings.warn(
-                (
-                    "cache_eclsum option to get_smry() is deprecated and "
-                    "will be removed in fmu-ensemble v2.0.0"
-                ),
-                FutureWarning,
-            )
-
         if isinstance(time_index, str):
             # Try interpreting as ISO-date:
             try:
@@ -1442,14 +1373,12 @@ def get_smry(
             dframe = realization.get_smry(
                 time_index=time_index,
                 column_keys=column_keys,
-                cache_eclsum=cache_eclsum,
                 include_restart=include_restart,
             )
             dframe.insert(0, "REAL", index)
-            dframe.index.name = "DATE"
             dflist.append(dframe)
         if dflist:
-            return pd.concat(dflist, sort=False).reset_index()
+            return pd.concat(dflist, sort=False)
         return pd.DataFrame()
 
     def get_eclgrid(self, props, report=0, agg="mean", active_only=False):

diff --git a/src/fmu/ensemble/ensembleset.py b/src/fmu/ensemble/ensembleset.py
@@ -572,7 +572,6 @@ def load_smry(
         self,
         time_index="raw",
         column_keys=None,
-        cache_eclsum=None,
         start_date=None,
         end_date=None,
     ):
@@ -596,9 +595,6 @@ def load_smry(
                If a string is supplied, that string is attempted used
                via get_smry_dates() in order to obtain a time index.
             column_keys: list of column key wildcards
-            cache_eclsum: Boolean for whether we should cache the EclSum
-                objects. Set to False if you cannot keep all EclSum files in
-                memory simultaneously
             start_date: str or date with first date to include.
                 Dates prior to this date will be dropped, supplied
                 start_date will always be included. Overridden if time_index
@@ -612,21 +608,11 @@ def load_smry(
             A DataFame of summary vectors for the ensembleset.
             The column 'ENSEMBLE' will denote each ensemble's name
         """
-        if cache_eclsum is not None:
-            warnings.warn(
-                (
-                    "cache_eclsum option to load_smry() is deprecated and "
-                    "will be removed in fmu-ensemble v2.0.0"
-                ),
-                FutureWarning,
-            )
-
         # Future: Multithread this:
         for _, ensemble in self._ensembles.items():
             ensemble.load_smry(
                 time_index=time_index,
                 column_keys=column_keys,
-                cache_eclsum=cache_eclsum,
                 start_date=start_date,
                 end_date=end_date,
             )
@@ -640,7 +626,6 @@ def get_smry(
         self,
         time_index=None,
         column_keys=None,
-        cache_eclsum=None,
         start_date=None,
         end_date=None,
     ):
@@ -656,11 +641,6 @@ def get_smry(
                If a string is supplied, that string is attempted used
                via get_smry_dates() in order to obtain a time index.
             column_keys: list of column key wildcards
-            cache_eclsum: boolean for whether to cache the EclSum
-                objects. Defaults to False. Set to True if
-                there is enough memory to keep all realizations summary
-                files in memory at once. This will speed up subsequent
-                operations
             start_date: str or date with first date to include.
                 Dates prior to this date will be dropped, supplied
                 start_date will always be included. Overridden if time_index
@@ -674,30 +654,16 @@ def get_smry(
             ENSEMBLE will distinguish the different ensembles by their
             respective names.
         """
-
-        if cache_eclsum is not None:
-            warnings.warn(
-                (
-                    "cache_eclsum option to get_smry() is deprecated and "
-                    "will be removed in fmu-ensemble v2.0.0"
-                ),
-                FutureWarning,
-            )
-
         smrylist = []
         for _, ensemble in self._ensembles.items():
-            smry = ensemble.get_smry(
-                time_index, column_keys, cache_eclsum, start_date, end_date
-            )
+            smry = ensemble.get_smry(time_index, column_keys, start_date, end_date)
             smry.insert(0, "ENSEMBLE", ensemble.name)
             smrylist.append(smry)
         if smrylist:
             return pd.concat(smrylist, sort=False)
         return pd.DataFrame()
 
-    def get_smry_dates(
-        self, freq="monthly", cache_eclsum=None, start_date=None, end_date=None
-    ):
+    def get_smry_dates(self, freq="monthly", start_date=None, end_date=None):
         """Return list of datetimes from an ensembleset
 
         Datetimes from each realization in each ensemble can
@@ -709,9 +675,6 @@ def get_smry_dates(
                yield the sorted union of all valid timesteps for
                all realizations. Other valid options are
                'daily', 'monthly' and 'yearly'.
-            cache_eclsum: Boolean for whether we should cache the EclSum
-                objects. Set to False if you cannot keep all EclSum files in
-                memory simultaneously
             start_date: str or date with first date to include.
                 Dates prior to this date will be dropped, supplied
                 start_date will always be included. Overridden if time_index
@@ -723,22 +686,11 @@ def get_smry_dates(
         Returns:
             list of datetime.date.
         """
-
-        if cache_eclsum is not None:
-            warnings.warn(
-                (
-                    "cache_eclsum option to get_smry_dates() is deprecated and "
-                    "will be removed in fmu-ensemble v2.0.0"
-                ),
-                FutureWarning,
-            )
-
         rawdates = set()
         for _, ensemble in self._ensembles.items():
             rawdates = rawdates.union(
                 ensemble.get_smry_dates(
                     freq="report",
-                    cache_eclsum=cache_eclsum,
                     start_date=start_date,
                     end_date=end_date,
                 )

diff --git a/src/fmu/ensemble/observations.py b/src/fmu/ensemble/observations.py
@@ -1,6 +1,4 @@
-"""
-Observations support and related calculations
-"""
+"""Observations support and related calculations"""
 
 import os
 import math
@@ -174,7 +172,7 @@ def load_smry(self, realization, smryvector, time_index="yearly", smryerror=None
         """
         dataseries = realization.get_smry(
             column_keys=[smryvector], time_index=time_index
-        )[smryvector]
+        )[["DATE", smryvector]].set_index("DATE")[smryvector]
 
         # In the context of this function, datetimes are not supported. Ensure dates:
         if isinstance(dataseries.index, pd.DatetimeIndex):