diff --git a/setup.cfg b/setup.cfg index 598af0d7..86b20dee 100644 --- a/setup.cfg +++ b/setup.cfg @@ -9,9 +9,6 @@ exclude = docs, [aliases] test = pytest -[tool:pytest] -addopts = --verbose -x - [build_sphinx] all-files = 1 warning-is-error = 1 diff --git a/setup.py b/setup.py index 0e69fd7b..fc245422 100644 --- a/setup.py +++ b/setup.py @@ -22,6 +22,7 @@ REQUIREMENTS = [ "ecl>=2.9", + "ecl2df", "numpy", "pandas", "pyyaml>=5.1", diff --git a/src/fmu/ensemble/ensemble.py b/src/fmu/ensemble/ensemble.py index fc778604..80434078 100644 --- a/src/fmu/ensemble/ensemble.py +++ b/src/fmu/ensemble/ensemble.py @@ -6,7 +6,6 @@ import logging import warnings -import dateutil import pandas as pd import numpy as np import yaml @@ -345,7 +344,7 @@ def to_virtual(self, name=None): ] smrycolumns = {smrykey for sublist in smrycolumns for smrykey in sublist} # flatten - meta = self.get_smry_meta(smrycolumns) + meta = self.get_smry_meta() if meta: meta_df = pd.DataFrame.from_dict(meta, orient="index") meta_df.index.name = "SMRYCOLUMN" @@ -604,7 +603,7 @@ def get_smrykeys(self, vector_match=None): logger.warning("No EclSum available for realization %d", index) return list(result) - def get_smry_meta(self, column_keys=None): + def get_smry_meta(self): """ Provide metadata for summary data vectors. @@ -618,31 +617,12 @@ def get_smry_meta(self, column_keys=None): * keyword (str) * wgname (str or None) - The requested columns are asked for over the entire ensemble, and if necessary - all realizations will be checked to obtain the metadata for a specific key. - If metadata differ between realization, behaviour is *undefined*. - - Args: - column_keys (list or str): Column key wildcards. - Returns: dict of dict with metadata information """ - ensemble_smry_keys = self.get_smrykeys(vector_match=column_keys) meta = {} - needed_reals = 0 - # Loop over realizations until all requested keys are accounted for for _, realization in self.realizations.items(): - needed_reals += 1 - real_meta = realization.get_smry_meta(column_keys=ensemble_smry_keys) - meta.update(real_meta) - missing_keys = set(ensemble_smry_keys) - set(meta.keys()) - if not missing_keys: - break - if needed_reals: - logger.info( - "Searched %s realization(s) to get summary metadata", str(needed_reals) - ) + meta.update(realization.get_smry_meta()) return meta def get_df(self, localpath, merge=None): @@ -669,6 +649,7 @@ def get_df(self, localpath, merge=None): KeyError if no data is found in no realizations. """ dflist = {} + meta = {} for index, realization in self.realizations.items(): try: data = realization.get_df(localpath, merge=merge) @@ -677,6 +658,8 @@ def get_df(self, localpath, merge=None): elif isinstance(data, (str, int, float, np.number)): data = pd.DataFrame(index=[1], columns=[localpath], data=data) if isinstance(data, pd.DataFrame): + if "meta" in data.attrs: + meta.update(data.attrs["meta"]) dflist[index] = data else: raise ValueError("Unkown datatype returned " + "from realization") @@ -689,16 +672,17 @@ def get_df(self, localpath, merge=None): # the realization index, and end up in a MultiIndex dframe = pd.concat(dflist, sort=False).reset_index() dframe.rename(columns={"level_0": "REAL"}, inplace=True) - del dframe["level_1"] # This is the indices from each real - return dframe + + # Merge metadata from each frame: + if meta: + dframe.attrs["meta"] = meta + return dframe.drop("level_1", axis="columns", errors="ignore") raise KeyError("No data found for " + localpath) def load_smry( self, time_index="raw", column_keys=None, - stacked=None, - cache_eclsum=None, start_date=None, end_date=None, include_restart=True, @@ -743,9 +727,6 @@ def load_smry( by vector name, and with realization index as columns. This only works when time_index is the same for all realizations. Not implemented yet! - cache_eclsum (boolean): Boolean for whether we should cache the EclSum - objects. Set to False if you cannot keep all EclSum files in - memory simultaneously start_date (str or date): First date to include. Dates prior to this date will be dropped, supplied start_date will always be included. Overridden if time_index @@ -761,28 +742,6 @@ def load_smry( pd.DataFame: Summary vectors for the ensemble, or a dict of dataframes if stacked=False. """ - if stacked is not None: - warnings.warn( - ( - "stacked option to load_smry() is deprecated and " - "will be removed in fmu-ensemble v2.0.0" - ), - FutureWarning, - ) - else: - stacked = True - if not stacked: - raise NotImplementedError - - if cache_eclsum is not None: - warnings.warn( - ( - "cache_eclsum option to load_smry() is deprecated and " - "will be removed in fmu-ensemble v2.0.0" - ), - FutureWarning, - ) - # Future: Multithread this! for realidx, realization in self.realizations.items(): # We do not store the returned DataFrames here, @@ -793,7 +752,6 @@ def load_smry( realization.load_smry( time_index=time_index, column_keys=column_keys, - cache_eclsum=cache_eclsum, start_date=start_date, end_date=end_date, include_restart=include_restart, @@ -984,7 +942,6 @@ def get_smry_dates( normalize=True, start_date=None, end_date=None, - cache_eclsum=None, include_restart=True, ): """Return list of datetimes for an ensemble according to frequency @@ -1016,28 +973,12 @@ def get_smry_dates( Returns: list of datetimes. Empty list if no data found. """ - - if cache_eclsum is not None: - warnings.warn( - ( - "cache_eclsum option to get_smry_dates() is deprecated and " - "will be removed in fmu-ensemble v2.0.0" - ), - FutureWarning, - ) - else: - cache_eclsum = True - # Build list of list of eclsum dates eclsumsdates = [] for _, realization in self.realizations.items(): - if realization.get_eclsum( - cache=cache_eclsum, include_restart=include_restart - ): + if realization.get_eclsum(include_restart=include_restart): eclsumsdates.append( - realization.get_eclsum( - cache=cache_eclsum, include_restart=include_restart - ).dates + realization.get_eclsum(include_restart=include_restart).dates ) return unionize_smry_dates(eclsumsdates, freq, normalize, start_date, end_date) @@ -1046,7 +987,6 @@ def get_smry_stats( column_keys=None, time_index="monthly", quantiles=None, - cache_eclsum=None, start_date=None, end_date=None, ): @@ -1059,6 +999,10 @@ def get_smry_stats( independent of what is internalized. It accesses the summary files directly and can thus obtain data at any time frequency. + Quantiles refer to the scientific standard, opposite to the oil + industry convention. If quantiles are explicitly supplied, the 'pXX' + strings in the outer index are changed accordingly. + Args: column_keys: list of column key wildcards time_index: list of DateTime if interpolation is wanted @@ -1069,8 +1013,6 @@ def get_smry_stats( to compute. Quantiles refer to scientific standard, which is opposite to the oil industry convention. Ask for p10 if you need the oil industry p90. - cache_eclsum: boolean for whether to keep the loaded EclSum - object in memory after data has been loaded. start_date: str or date with first date to include. Dates prior to this date will be dropped, supplied start_date will always be included. Overridden if time_index @@ -1081,22 +1023,9 @@ def get_smry_stats( is 'first' or 'last'. If string, use ISO-format, YYYY-MM-DD. Returns: A MultiIndex dataframe. Outer index is 'minimum', 'maximum', - 'mean', 'p10', 'p90', inner index are the dates. Column names - are the different vectors. Quantiles refer to the scientific - standard, opposite to the oil industry convention. - If quantiles are explicitly supplied, the 'pXX' - strings in the outer index are changed accordingly. If no - data is found, return empty DataFrame. + 'mean', 'p10', 'p90', inner index is DATE. Column names are summary + vectors. If no data is found, an empty dataframe is returned. """ - if cache_eclsum is not None: - warnings.warn( - ( - "cache_eclsum option to get_smry_stats() is deprecated and " - "will be removed in fmu-ensemble v2.0.0" - ), - FutureWarning, - ) - if quantiles is None: quantiles = [10, 90] @@ -1111,25 +1040,23 @@ def get_smry_stats( dframe = self.get_smry( time_index=time_index, column_keys=column_keys, - cache_eclsum=cache_eclsum, start_date=start_date, end_date=end_date, ) if "REAL" in dframe: - dframe = dframe.drop(columns="REAL").groupby("DATE") + dframe_grouped = dframe.drop(columns="REAL").groupby("DATE") else: - logger.warning("No data found for get_smry_stats") + logger.warning("No data found for get_smry_stats()") return pd.DataFrame() # Build a dictionary of dataframes to be concatenated dframes = {} - dframes["mean"] = dframe.mean() + dframes["mean"] = dframe_grouped.mean() for quantile in quantiles: quantile_str = "p" + str(quantile) - dframes[quantile_str] = dframe.quantile(q=quantile / 100.0) - dframes["maximum"] = dframe.max() - dframes["minimum"] = dframe.min() - + dframes[quantile_str] = dframe_grouped.quantile(q=quantile / 100.0) + dframes["maximum"] = dframe_grouped.max() + dframes["minimum"] = dframe_grouped.min() return pd.concat(dframes, names=["STATISTIC"], sort=False) def get_wellnames(self, well_match=None): @@ -1251,6 +1178,12 @@ def agg(self, aggregation, keylist=None, excludekeys=None): key = shortcut2path(self.keys(), key) data = self.get_df(key) + # Preserve metadata in dataframes: + if "meta" in data.attrs: + meta = data.attrs["meta"] + else: + meta = {} + # This column should never appear in aggregated data del data["REAL"] @@ -1310,6 +1243,10 @@ def agg(self, aggregation, keylist=None, excludekeys=None): # We have to recognize scalars. if len(aggregated) == 1 and aggregated.index.values[0] == key: aggregated = parse_number(aggregated.values[0]) + + # Preserve metadata: + if meta: + aggregated.attrs["meta"] = meta vreal.append(key, aggregated) return vreal @@ -1377,7 +1314,6 @@ def get_smry( self, time_index=None, column_keys=None, - cache_eclsum=None, start_date=None, end_date=None, include_restart=True, @@ -1386,8 +1322,14 @@ def get_smry( Aggregates summary data from all realizations. Wraps around Realization.get_smry() which wraps around + ecl2df.summary.df() which wraps around ecl.summary.EclSum.pandas_frame() + The returned dataframe will always have a dummy index, and + DATE and REAL as columns. The DATE datatype will be datetime64[ns] + if dates are prior to year 2262, if not it will be datetime.datetime + objects. + Args: time_index: list of DateTime if interpolation is wanted default is None, which returns the raw Eclipse report times @@ -1396,9 +1338,6 @@ def get_smry( a wanted frequencey for dates, daily, weekly, monthly, yearly, that will be send to get_smry_dates() column_keys: list of column key wildcards - cache_eclsum: boolean for whether to cache the EclSum - objects. Defaults to True. Set to False if - not enough memory to keep all summary files in memory. start_date: str or date with first date to include. Dates prior to this date will be dropped, supplied start_date will always be included. Overridden if time_index @@ -1415,41 +1354,24 @@ def get_smry( REAL with integers is added to distinguish realizations. If no realizations, empty DataFrame is returned. """ - if cache_eclsum is not None: - warnings.warn( - ( - "cache_eclsum option to get_smry() is deprecated and " - "will be removed in fmu-ensemble v2.0.0" - ), - FutureWarning, - ) - - if isinstance(time_index, str): - # Try interpreting as ISO-date: - try: - parseddate = dateutil.parser.isoparse(time_index) - time_index = [parseddate] - # But this should fail when a frequency string is supplied: - except ValueError: - time_index = self.get_smry_dates( - time_index, - start_date=start_date, - end_date=end_date, - include_restart=include_restart, - ) dflist = [] + meta = {} for index, realization in self.realizations.items(): dframe = realization.get_smry( time_index=time_index, column_keys=column_keys, - cache_eclsum=cache_eclsum, + start_date=start_date, + end_date=end_date, include_restart=include_restart, ) + if "meta" in dframe.attrs: + meta.update(dframe.attrs["meta"]) dframe.insert(0, "REAL", index) - dframe.index.name = "DATE" dflist.append(dframe) if dflist: - return pd.concat(dflist, sort=False).reset_index() + dframes = pd.concat(dflist, sort=False) + dframes.attrs["meta"] = meta + return dframes return pd.DataFrame() def get_eclgrid(self, props, report=0, agg="mean", active_only=False): diff --git a/src/fmu/ensemble/ensemblecombination.py b/src/fmu/ensemble/ensemblecombination.py index 1a51cd82..7a68416a 100644 --- a/src/fmu/ensemble/ensemblecombination.py +++ b/src/fmu/ensemble/ensemblecombination.py @@ -98,20 +98,30 @@ def get_df(self, localpath, merge=None): refdf = self.ref.get_df(localpath, merge=merge).set_index(indexlist) refdf = refdf.select_dtypes(include="number") result = refdf.mul(self.scale) + meta = {} + if "meta" in refdf.attrs: + meta.update(refdf.attrs["meta"]) if self.add: otherdf = self.add.get_df(localpath, merge=merge).set_index(indexlist) otherdf = otherdf.select_dtypes(include="number") result = result.add(otherdf) + if "meta" in otherdf.attrs: + meta.update(otherdf.attrs["meta"]) if self.sub: otherdf = self.sub.get_df(localpath, merge=merge).set_index(indexlist) otherdf = otherdf.select_dtypes(include="number") result = result.sub(otherdf) + if "meta" in otherdf.attrs: + meta.update(otherdf.attrs["meta"]) # Delete rows where everything is NaN, which will be case when # realization (multi-)indices does not match up in both ensembles. result.dropna(axis="index", how="all", inplace=True) # Also delete columns where everything is NaN, happens when # column data are not similar result.dropna(axis="columns", how="all", inplace=True) + # Add metadata: + if meta: + result.attrs["meta"] = meta return result.reset_index() def to_virtual(self, keyfilter=None): @@ -204,6 +214,7 @@ def get_smry(self, column_keys=None, time_index=None): time_index=time_index, column_keys=column_keys ).set_index(indexlist) result = result.sub(otherdf) + result.attrs["meta"] = self.get_smry_meta() return result.reset_index() def get_smry_stats(self, column_keys=None, time_index="monthly"): @@ -251,7 +262,7 @@ def get_smry_stats(self, column_keys=None, time_index="monthly"): sort=False, ) - def get_smry_meta(self, column_keys=None): + def get_smry_meta(self): """ Provide metadata for summary data vectors. @@ -264,15 +275,12 @@ def get_smry_meta(self, column_keys=None): * get_num (int) (only provided if not None) * keyword (str) * wgname (str or None) - - Args: - column_keys: List or str of column key wildcards """ - meta = self.ref.get_smry_meta(column_keys=column_keys) + meta = self.ref.get_smry_meta() if self.add: - meta.update(self.add.get_smry_meta(column_keys=column_keys)) + meta.update(self.add.get_smry_meta()) if self.sub: - meta.update(self.sub.get_smry_meta(column_keys=column_keys)) + meta.update(self.sub.get_smry_meta()) return meta def agg(self, aggregation, keylist=None, excludekeys=None): diff --git a/src/fmu/ensemble/ensembleset.py b/src/fmu/ensemble/ensembleset.py index e825b0d4..911c3eb6 100644 --- a/src/fmu/ensemble/ensembleset.py +++ b/src/fmu/ensemble/ensembleset.py @@ -572,7 +572,6 @@ def load_smry( self, time_index="raw", column_keys=None, - cache_eclsum=None, start_date=None, end_date=None, ): @@ -596,9 +595,6 @@ def load_smry( If a string is supplied, that string is attempted used via get_smry_dates() in order to obtain a time index. column_keys: list of column key wildcards - cache_eclsum: Boolean for whether we should cache the EclSum - objects. Set to False if you cannot keep all EclSum files in - memory simultaneously start_date: str or date with first date to include. Dates prior to this date will be dropped, supplied start_date will always be included. Overridden if time_index @@ -612,21 +608,11 @@ def load_smry( A DataFame of summary vectors for the ensembleset. The column 'ENSEMBLE' will denote each ensemble's name """ - if cache_eclsum is not None: - warnings.warn( - ( - "cache_eclsum option to load_smry() is deprecated and " - "will be removed in fmu-ensemble v2.0.0" - ), - FutureWarning, - ) - # Future: Multithread this: for _, ensemble in self._ensembles.items(): ensemble.load_smry( time_index=time_index, column_keys=column_keys, - cache_eclsum=cache_eclsum, start_date=start_date, end_date=end_date, ) @@ -640,7 +626,6 @@ def get_smry( self, time_index=None, column_keys=None, - cache_eclsum=None, start_date=None, end_date=None, ): @@ -656,11 +641,6 @@ def get_smry( If a string is supplied, that string is attempted used via get_smry_dates() in order to obtain a time index. column_keys: list of column key wildcards - cache_eclsum: boolean for whether to cache the EclSum - objects. Defaults to False. Set to True if - there is enough memory to keep all realizations summary - files in memory at once. This will speed up subsequent - operations start_date: str or date with first date to include. Dates prior to this date will be dropped, supplied start_date will always be included. Overridden if time_index @@ -674,30 +654,16 @@ def get_smry( ENSEMBLE will distinguish the different ensembles by their respective names. """ - - if cache_eclsum is not None: - warnings.warn( - ( - "cache_eclsum option to get_smry() is deprecated and " - "will be removed in fmu-ensemble v2.0.0" - ), - FutureWarning, - ) - smrylist = [] for _, ensemble in self._ensembles.items(): - smry = ensemble.get_smry( - time_index, column_keys, cache_eclsum, start_date, end_date - ) + smry = ensemble.get_smry(time_index, column_keys, start_date, end_date) smry.insert(0, "ENSEMBLE", ensemble.name) smrylist.append(smry) if smrylist: return pd.concat(smrylist, sort=False) return pd.DataFrame() - def get_smry_dates( - self, freq="monthly", cache_eclsum=None, start_date=None, end_date=None - ): + def get_smry_dates(self, freq="monthly", start_date=None, end_date=None): """Return list of datetimes from an ensembleset Datetimes from each realization in each ensemble can @@ -709,9 +675,6 @@ def get_smry_dates( yield the sorted union of all valid timesteps for all realizations. Other valid options are 'daily', 'monthly' and 'yearly'. - cache_eclsum: Boolean for whether we should cache the EclSum - objects. Set to False if you cannot keep all EclSum files in - memory simultaneously start_date: str or date with first date to include. Dates prior to this date will be dropped, supplied start_date will always be included. Overridden if time_index @@ -723,22 +686,11 @@ def get_smry_dates( Returns: list of datetime.date. """ - - if cache_eclsum is not None: - warnings.warn( - ( - "cache_eclsum option to get_smry_dates() is deprecated and " - "will be removed in fmu-ensemble v2.0.0" - ), - FutureWarning, - ) - rawdates = set() for _, ensemble in self._ensembles.items(): rawdates = rawdates.union( ensemble.get_smry_dates( freq="report", - cache_eclsum=cache_eclsum, start_date=start_date, end_date=end_date, ) diff --git a/src/fmu/ensemble/observations.py b/src/fmu/ensemble/observations.py index 0492310a..af453c49 100644 --- a/src/fmu/ensemble/observations.py +++ b/src/fmu/ensemble/observations.py @@ -1,6 +1,4 @@ -""" -Observations support and related calculations -""" +"""Observations support and related calculations""" import os import math @@ -174,7 +172,7 @@ def load_smry(self, realization, smryvector, time_index="yearly", smryerror=None """ dataseries = realization.get_smry( column_keys=[smryvector], time_index=time_index - )[smryvector] + )[["DATE", smryvector]].set_index("DATE")[smryvector] # In the context of this function, datetimes are not supported. Ensure dates: if isinstance(dataseries.index, pd.DatetimeIndex): diff --git a/src/fmu/ensemble/realization.py b/src/fmu/ensemble/realization.py index 561494da..e21b63e3 100644 --- a/src/fmu/ensemble/realization.py +++ b/src/fmu/ensemble/realization.py @@ -26,20 +26,14 @@ from ecl.grid import EclGrid from ecl import EclFileFlagEnum +import ecl2df + from .virtualrealization import VirtualRealization from .realizationcombination import RealizationCombination from .util import parse_number, flatten, shortcut2path from .util.rates import compute_volumetric_rates from .util.dates import unionize_smry_dates -HAVE_ECL2DF = False -try: - import ecl2df - - HAVE_ECL2DF = True -except ImportError: - HAVE_ECL2DF = False - logger = logging.getLogger(__name__) @@ -105,8 +99,7 @@ def __init__( self.files = pd.DataFrame( columns=["FULLPATH", "FILETYPE", "LOCALPATH", "BASENAME"] ) - self._eclsum = None # Placeholder for caching - self._eclsum_include_restart = None # Flag for cached object + self.eclfiles = None # ecl2df.EclFiles object # The datastore for internalized data. Dictionary # indexed by filenames (local to the realization). @@ -244,17 +237,6 @@ def to_virtual(self, name=None, deepcopy=True): else: vreal = VirtualRealization(name, self.data) - # Conserve metadata for smry vectors. Build metadata dict for all - # loaded summary vectors. - smrycolumns = [ - self.get_df(key).columns for key in self.keys() if "unsmry" in key - ] - smrycolumns = {smrykey for sublist in smrycolumns for smrykey in sublist} - meta = self.get_smry_meta(list(smrycolumns)) - if meta: - meta_df = pd.DataFrame.from_dict(meta, orient="index") - meta_df.index.name = "SMRYCOLUMN" - vreal.append("__smry_metadata", meta_df.reset_index()) return vreal def load_file(self, localpath, fformat, convert_numeric=True, force_reread=False): @@ -852,18 +834,21 @@ def get_eclfiles(self): Returns: ecl2df.EclFiles. None if nothing found """ - if not HAVE_ECL2DF: - logger.warning("ecl2df not installed. Skipping") - return None - data_file_row = self.files[self.files["FILETYPE"] == "DATA"] + data_file_rows = self.files[self.files["FILETYPE"] == "DATA"] data_filename = None - if len(data_file_row) == 1: - data_filename = data_file_row["FULLPATH"].values[0] + unsmry_file_rows = self.files[self.files["FILETYPE"] == "UNSMRY"] + unsmry_filename = None + if len(data_file_rows) == 1: + data_filename = data_file_rows["FULLPATH"].values[0] + elif len(unsmry_file_rows) == 1: + unsmry_filename = unsmry_file_rows["FULLPATH"].values[0] + # We construct the DATA file, even though it might not exist: + data_filename = unsmry_filename.replace(".UNSMRY", ".DATA") elif self._autodiscovery: data_fileguess = os.path.join(self._origpath, "eclipse/model", "*.DATA") data_filenamelist = glob.glob(data_fileguess) if not data_filenamelist: - return None # No filename matches *DATA + return None # No filename matches *DATA or *UNSMRY if len(data_filenamelist) > 1: logger.warning( ( @@ -871,17 +856,32 @@ def get_eclfiles(self): "consider turning off auto-discovery" ) ) - data_filename = data_filenamelist[0] - self.find_files(data_filename) + if data_filenamelist: + data_filename = data_filenamelist[0] + self.find_files(data_filename) + + unsmry_fileguess = os.path.join(self._origpath, "eclipse/model", "*.UNSMRY") + unsmry_filenamelist = glob.glob(unsmry_fileguess) + if not unsmry_filenamelist: + return None # No filename matches + if len(unsmry_filenamelist) > 1: + logger.warning( + "Multiple UNSMRY files found, consider turning off auto-discovery" + ) + unsmry_filename = unsmry_filenamelist[0] + self.find_files(unsmry_filename) + else: - # There is no DATA file to be found. - logger.warning("No DATA file found!") + logger.warning("No DATA and/or UNSMRY file found!") return None if not os.path.exists(data_filename): - return None + if unsmry_filename is not None: + return ecl2df.EclFiles(unsmry_filename.replace(".UNSMRY", ".DATA")) + else: + return None return ecl2df.EclFiles(data_filename) - def get_eclsum(self, cache=True, include_restart=True): + def get_eclsum(self, include_restart=True): """ Fetch the Eclipse Summary file from the realization and return as a libecl EclSum object @@ -895,9 +895,6 @@ def get_eclsum(self, cache=True, include_restart=True): turning off autodiscovery is strongly recommended. Arguments: - cache: boolean indicating whether we should keep an - object reference to the EclSum object. Set to - false if you need to conserve memory. include_restart: boolean sent to libecl for whether restart files should be traversed. @@ -905,10 +902,6 @@ def get_eclsum(self, cache=True, include_restart=True): EclSum: object representing the summary file. None if nothing was found. """ - if cache and self._eclsum: # Return cached object if available - if self._eclsum_include_restart == include_restart: - return self._eclsum - unsmry_file_row = self.files[self.files.FILETYPE == "UNSMRY"] unsmry_filename = None if len(unsmry_file_row) == 1: @@ -939,136 +932,51 @@ def get_eclsum(self, cache=True, include_restart=True): # or if SMSPEC is missing. logger.warning("Failed to create summary instance from %s", unsmry_filename) return None - - if cache: - self._eclsum = eclsum - self._eclsum_include_restart = include_restart - return eclsum - def load_smry( - self, - time_index="raw", - column_keys=None, - cache_eclsum=None, - start_date=None, - end_date=None, - include_restart=True, - ): - """Produce dataframe from Summary data from the realization - - When this function is called, the dataframe will be - internalized. Internalization of summary data in a - realization object supports different time_index, but there is - no handling of multiple sets of column_keys. The cached data - will be called - - 'share/results/tables/unsmry--.csv' - - where is among 'yearly', 'monthly', 'daily', 'first', - 'last' or 'raw' (meaning the raw dates in the SMRY file), depending - on the chosen time_index. If a custom time_index (list - of datetime) was supplied, will be called 'custom'. - - Wraps ecl.summary.EclSum.pandas_frame() - - See also get_smry() - - Args: - time_index: string indicating a resampling frequency, - 'yearly', 'monthly', 'daily', 'first', 'last' or 'raw', the - latter will return the simulated report steps (also default). - If a list of DateTime is supplied, data will be resampled - to these. - column_keys: list of column key wildcards. None means everything. - cache_eclsum: boolean for whether to keep the loaded EclSum - object in memory after data has been loaded. - start_date: str or date with first date to include. - Dates prior to this date will be dropped, supplied - start_date will always be included. Overridden if time_index - is 'first' or 'last'. - end_date: str or date with last date to be included. - Dates past this date will be dropped, supplied - end_date will always be included. Overridden if time_index - is 'first' or 'last'. - include_restart: boolean sent to libecl for whether restart - files should be traversed. - - Returns: - DataFrame with summary keys as columns and dates as indices. - Empty dataframe if no summary is available or column - keys do not exist. - DataFrame: with summary keys as columns and dates as indices. - Empty dataframe if no summary is available. - """ - if cache_eclsum is not None: - warnings.warn( - ( - "cache_eclsum option to load_smry() is deprecated and " - "will be removed in fmu-ensemble v2.0.0" - ), - FutureWarning, - ) - else: - cache_eclsum = True - - if not self.get_eclsum(cache=cache_eclsum): - # Return empty, but do not store the empty dataframe in self.data - return pd.DataFrame() - time_index_path = time_index - if time_index == "raw": - time_index_arg = None - elif isinstance(time_index, str): - # Note: This call will recache the smry object. - time_index_arg = self.get_smry_dates( - freq=time_index, - start_date=start_date, - end_date=end_date, - include_restart=include_restart, - ) - elif isinstance(time_index, (list, np.ndarray)): - time_index_arg = time_index - time_index_path = "custom" - elif time_index is None: - time_index_path = "raw" - time_index_arg = time_index - else: - raise TypeError("'time_index' has to be a string, a list or None") - - if not isinstance(column_keys, list): - column_keys = [column_keys] - - # Do the actual work: - dframe = self.get_eclsum( - cache=cache_eclsum, include_restart=include_restart - ).pandas_frame(time_index_arg, column_keys) - dframe = dframe.reset_index() - dframe.rename(columns={"index": "DATE"}, inplace=True) - - # Cache the result: - localpath = "share/results/tables/unsmry--" + time_index_path + ".csv" - self.data[localpath] = dframe - - # Do this to ensure that we cut the rope to the EclSum object - # Can be critical for garbage collection - if not cache_eclsum: - self._eclsum = None + def load_smry(self, **kwargs): + """Wrap around get_smry(), but also cache the result""" + dframe = self.get_smry(**kwargs) + cachename = None + # Cache the result for supported time indices: + if "time_index" not in kwargs or kwargs["time_index"] is None: + cachename = "raw" + elif isinstance(kwargs["time_index"], list): + cachename = "custom" + elif str(kwargs["time_index"]) in [ + "raw", + "first", + "last", + "report", + "daily", + "weekly", + "monthly", + "yearly", + ]: + cachename = kwargs["time_index"] + + if cachename: + localpath = "share/results/tables/unsmry--" + cachename + ".csv" + self.data[localpath] = dframe return dframe def get_smry( self, time_index=None, column_keys=None, - cache_eclsum=None, start_date=None, end_date=None, include_restart=True, ): - """Wrapper for EclSum.pandas_frame + """Wrapper for ecl2df.summary This gives access to the underlying data on disk without touching internalized dataframes. + The returned dataframe will have a dummy index, and the dates in + the column DATE. The DATE column will contain either datetime.datetime + or pandas.Timestamp objects. + Arguments: time_index: string indicating a resampling frequency, 'yearly', 'monthly', 'daily', 'first', 'last' or 'raw', the @@ -1077,8 +985,6 @@ def get_smry( to these. If a date in ISO-8601 format is supplied, that is used as a single date. column_keys: list of column key wildcards. None means everything. - cache_eclsum: boolean for whether to keep the loaded EclSum - object in memory after data has been loaded. start_date: str or date with first date to include. Dates prior to this date will be dropped, supplied start_date will always be included. Overridden if time_index @@ -1087,59 +993,37 @@ def get_smry( Dates past this date will be dropped, supplied end_date will always be included. Overridden if time_index is 'first' or 'last'. + include_restart (bool): Whether to traverse restart files. Returns empty dataframe if there is no summary file, or if the column_keys are not existing. """ + if self.get_eclfiles() is None: + return pd.DataFrame() + try: + return ecl2df.summary.df( + self.get_eclfiles(), + time_index=time_index, + column_keys=column_keys, + start_date=start_date, + end_date=end_date, + include_restart=include_restart, + params=False, + paramfile=None, + ).reset_index() + except OSError: + # Missing or bogus UNSMRY file + return pd.DataFrame() + except ValueError: + # From libecl when requested columns keys are not found, + # or from pd.tseries.frequencies.to_offset() if frequency + # specifier is not known. + return pd.DataFrame() - if cache_eclsum is not None: - warnings.warn( - ( - "cache_eclsum option to get_smry() is deprecated and " - "will be removed in fmu-ensemble v2.0.0" - ), - FutureWarning, - ) - else: - cache_eclsum = True - - if not isinstance(column_keys, list): - column_keys = [column_keys] - if isinstance(time_index, str) and time_index == "raw": - time_index_arg = None - elif isinstance(time_index, str): - try: - parseddate = dateutil.parser.isoparse(time_index) - time_index_arg = [parseddate] - except ValueError: - - time_index_arg = self.get_smry_dates( - freq=time_index, - start_date=start_date, - end_date=end_date, - include_restart=include_restart, - ) - elif time_index is None or isinstance(time_index, (list, np.ndarray)): - time_index_arg = time_index - else: - raise TypeError("'time_index' has to be a string, a list or None") - if self.get_eclsum(cache=cache_eclsum, include_restart=include_restart): - try: - dataframe = self.get_eclsum( - cache=cache_eclsum, include_restart=include_restart - ).pandas_frame(time_index_arg, column_keys) - except ValueError: - # We get here if we have requested non-existing column keys - return pd.DataFrame() - if not cache_eclsum: - # Ensure EclSum object can be garbage collected - self._eclsum = None - return dataframe - return pd.DataFrame() - - def get_smry_meta(self, column_keys=None): + def get_smry_meta(self): """ - Provide metadata for summary data vectors. + Provide metadata for summary data vectors. Only works + for summary data that has been loaded into this object. A dictionary indexed by summary vector names is returned, and each value is another dictionary with potentially the metadata types: @@ -1150,24 +1034,10 @@ def get_smry_meta(self, column_keys=None): * get_num (int) (only provided if not None) * keyword (str) * wgname (str or None) - - Args: - column_keys: List or str of column key wildcards """ - column_keys = self._glob_smry_keys(column_keys) meta = {} - eclsum = self.get_eclsum() - for col in column_keys: - meta[col] = {} - meta[col]["unit"] = eclsum.unit(col) - meta[col]["is_total"] = eclsum.is_total(col) - meta[col]["is_rate"] = eclsum.is_rate(col) - meta[col]["is_historical"] = eclsum.smspec_node(col).is_historical() - meta[col]["keyword"] = eclsum.smspec_node(col).keyword - meta[col]["wgname"] = eclsum.smspec_node(col).wgname - num = eclsum.smspec_node(col).get_num() - if num is not None: - meta[col]["get_num"] = num + for dframe in [self.get_df(key) for key in self.keys() if "unsmry" in key]: + meta.update(dframe.attrs["meta"]) return meta def _glob_smry_keys(self, column_keys): @@ -1195,7 +1065,7 @@ def _glob_smry_keys(self, column_keys): keys = set() for key in column_keys: if isinstance(key, str): - keys = keys.union(set(self._eclsum.keys(key))) + keys = keys.union(set(self.get_eclsum().keys(key))) return list(keys) def get_volumetric_rates(self, column_keys=None, time_index=None, time_unit=None): @@ -1224,25 +1094,19 @@ def get_smryvalues(self, props_wildcard=None): ), FutureWarning, ) - - if not self._eclsum: # check if it is cached - self.get_eclsum() - - if not self._eclsum: - return pd.DataFrame() - props = self._glob_smry_keys(props_wildcard) - if "numpy_vector" in dir(self._eclsum): + if "numpy_vector" in dir(self.get_eclsum()): data = { - prop: self._eclsum.numpy_vector(prop, report_only=False) + prop: self.get_eclsum().numpy_vector(prop, report_only=False) for prop in props } else: # get_values() is deprecated in newer libecl data = { - prop: self._eclsum.get_values(prop, report_only=False) for prop in props + prop: self.get_eclsum().get_values(prop, report_only=False) + for prop in props } - dates = self._eclsum.get_dates(report_only=False) + dates = self.get_eclsum().get_dates(report_only=False) return pd.DataFrame(data=data, index=dates) def get_smry_dates( diff --git a/src/fmu/ensemble/realizationcombination.py b/src/fmu/ensemble/realizationcombination.py index 4d45298b..4f3e43d4 100644 --- a/src/fmu/ensemble/realizationcombination.py +++ b/src/fmu/ensemble/realizationcombination.py @@ -149,6 +149,8 @@ def get_df(self, localpath, merge=None): # Also delete columns where everything is NaN, happens when # column data are not similar result.dropna(axis="columns", how="all", inplace=True) + # Add metadata + result.attrs["meta"] = self.get_smry_meta() return result.reset_index() if isinstance(result, pd.Series): return result.dropna().to_dict() @@ -240,7 +242,7 @@ def get_smry(self, column_keys=None, time_index=None): result = result.sub(otherdf) return result.reset_index() - def get_smry_meta(self, column_keys=None): + def get_smry_meta(self): """ Provide metadata for summary data vectors. @@ -253,15 +255,12 @@ def get_smry_meta(self, column_keys=None): * get_num (int) (only provided if not None) * keyword (str) * wgname (str og None) - - Args: - column_keys: List or str of column key wildcards """ - meta = self.ref.get_smry_meta(column_keys=column_keys) + meta = self.ref.get_smry_meta() if self.add: - meta.update(self.add.get_smry_meta(column_keys=column_keys)) + meta.update(self.add.get_smry_meta()) if self.sub: - meta.update(self.sub.get_smry_meta(column_keys=column_keys)) + meta.update(self.sub.get_smry_meta()) return meta @property diff --git a/src/fmu/ensemble/util/rates.py b/src/fmu/ensemble/util/rates.py index d192b552..faed7742 100644 --- a/src/fmu/ensemble/util/rates.py +++ b/src/fmu/ensemble/util/rates.py @@ -65,7 +65,9 @@ def compute_volumetric_rates(realization, column_keys, time_index, time_unit): return pd.DataFrame() cum_df = realization.get_smry(column_keys=column_keys, time_index=time_index) - # get_smry() for realizations return a dataframe indexed by 'DATE' + + if not cum_df.empty: + cum_df.set_index("DATE", inplace=True) # Compute row-wise difference, shift back one row # to get the NaN to the end, and then drop the NaN. diff --git a/src/fmu/ensemble/virtualensemble.py b/src/fmu/ensemble/virtualensemble.py index 35ba07a7..99efdd76 100644 --- a/src/fmu/ensemble/virtualensemble.py +++ b/src/fmu/ensemble/virtualensemble.py @@ -4,7 +4,6 @@ import os import re import shutil -import fnmatch import datetime import warnings import logging @@ -545,6 +544,11 @@ def prepare_vens_directory(filesystempath, delete=False): if self._manifest: with open(os.path.join(filesystempath, "_manifest.yml"), "w") as fhandle: fhandle.write(yaml.dump(self._manifest)) + smry_meta = self.get_smry_meta() + if smry_meta: + smry_meta_df = pd.DataFrame.from_dict(smry_meta, orient="index") + smry_meta_df.index.name = "SMRYCOLUMN" + smry_meta_df.to_csv(os.path.join(filesystempath, "__smry_metadata")) # The README dumped here is just for convenience. Do not assume # anything about its content. @@ -664,7 +668,7 @@ def from_disk(self, filesystempath, fmt="parquet", lazy_load=False): # with data coming from disk. self._data = {} self._name = None - + smry_meta = {} for root, _, filenames in os.walk(filesystempath): if "__discoveredfiles" in root: # Never traverse the collections of dumped @@ -683,6 +687,14 @@ def from_disk(self, filesystempath, fmt="parquet", lazy_load=False): if filename == "_manifest.yml": self.manifest = os.path.join(root, "_manifest.yml") + if filename == "__smry_metadata": + smry_meta_df = pd.read_csv(os.path.join(root, filename)) + smry_meta = ( + smry_meta_df.set_index("SMRYCOLUMN") + .replace({np.nan: None}) + .to_dict(orient="index") + ) + # We will loop through the directory structure, and # data will be duplicated as they can be both in csv # and parquet files. We will only load one of them if so. @@ -725,6 +737,11 @@ def from_disk(self, filesystempath, fmt="parquet", lazy_load=False): for internalizedkey, filename in self.lazy_frames.items(): logger.info("Loading file %s", filename) self._load_frame_fromdisk(internalizedkey, filename) + + # Attach any found metadata to all smry frames: + if smry_meta and "unsmry" in internalizedkey: + self.data[internalizedkey].attrs["meta"] = smry_meta + # (meta will not be included if lazy-load) self.lazy_frames = {} # This function must be called whenever we have done @@ -872,11 +889,6 @@ def get_smry(self, column_keys=None, time_index="monthly"): # Now ask the VirtualRealization to do interpolation interp = vreal.get_smry(column_keys=column_keys, time_index=time_index) - # Assume we get back a dataframe indexed by the dates from vreal - # We must reset that index, and ensure the index column - # gets a correct name - interp.index = interp.index.set_names(["DATE"]) - interp = interp.reset_index() interp["REAL"] = realidx smry_interpolated.append(interp) return pd.concat(smry_interpolated, ignore_index=True, sort=False) @@ -992,7 +1004,7 @@ def get_volumetric_rates( vol_rates_dfs.append(vol_rate_df) return pd.concat(vol_rates_dfs, ignore_index=True, sort=False) - def get_smry_meta(self, column_keys=None): + def get_smry_meta(self): """ Provide metadata for summary data vectors. @@ -1006,35 +1018,13 @@ def get_smry_meta(self, column_keys=None): * keyword (str) * wgname (str or None) - This data is produced from loaded summary dataframes upon ensemble - virtualization. - - Args: - column_keys (list or str): Column key wildcards. - Returns: dict of dict with metadata. """ - if column_keys is None: - column_keys = ["*"] - if not isinstance(column_keys, list): - column_keys = [column_keys] - - available_smrynames = self.get_df("__smry_metadata")["SMRYCOLUMN"].values - matches = set() - for key in column_keys: - matches = matches.union( - [name for name in available_smrynames if fnmatch.fnmatch(name, key)] - ) - # The .replace() in the chain below is to convert NaN's to None, to - # mimic the dataframes before they are exported to disk. - return ( - self.get_df("__smry_metadata") - .set_index("SMRYCOLUMN") - .loc[matches, :] - .replace({np.nan: None}) - .to_dict(orient="index") - ) + meta = {} + for dframe in [self.get_df(key) for key in self.keys() if "unsmry" in key]: + meta.update(dframe.attrs["meta"]) + return meta def __sub__(self, other): """Substract another ensemble from this""" diff --git a/src/fmu/ensemble/virtualrealization.py b/src/fmu/ensemble/virtualrealization.py index 113d346b..1255649f 100644 --- a/src/fmu/ensemble/virtualrealization.py +++ b/src/fmu/ensemble/virtualrealization.py @@ -126,6 +126,11 @@ def to_disk(self, filesystempath, delete=False): logger.warning( "Don't know how to dump %s of type %s to disk", key, type(key) ) + smry_meta = self.get_smry_meta() + if smry_meta: + smry_meta_df = pd.DataFrame.from_dict(smry_meta, orient="index") + smry_meta_df.index.name = "SMRYCOLUMN" + smry_meta_df.to_csv(os.path.join(dirname, "__smry_metadata")) def load_disk(self, filesystempath): """Load data for a virtual realization from disk. @@ -153,6 +158,7 @@ def load_disk(self, filesystempath): FutureWarning, ) logger.info("Loading virtual realization from %s", filesystempath) + meta = {} for root, _, filenames in os.walk(filesystempath): for filename in filenames: if filename == "_description": @@ -166,6 +172,13 @@ def load_disk(self, filesystempath): elif filename == "__repr__": # Not implemented.. continue + elif filename == "__smry_metadata": + meta_df = pd.read_csv(os.path.join(root, filename)) + meta = ( + meta_df.set_index("SMRYCOLUMN") + .replace({np.nan: None}) + .to_dict(orient="index") + ) else: # GUESS scalar, key-value txt or CSV from the first # two lines. SHAKY! @@ -204,6 +217,13 @@ def load_disk(self, filesystempath): self.append(filename, pd.read_csv(os.path.join(root, filename))) logger.info("Read csv file %s", filename) + # Attach any found metadata to all smry frames: + if meta: + for key in self.keys(): + if "unsmry" in key and isinstance(self.data[key], pd.DataFrame): + print("ASSIGNING META for " + key) + self.data[key].attrs["meta"] = meta + def to_json(self): """ Dump realization data to json. @@ -287,6 +307,10 @@ def get_smry(self, column_keys=None, time_index="monthly"): Returns data for those columns that are known, unknown columns will be issued a warning for. + The returned dataframe will have a dummy index, and the dates in + the column DATE. The DATE column will contain either datetime.datetime + or pandas.Timestamp objects. + BUG: If some columns are available only in certain dataframes, we might miss them (e.g. we ask for yearly FOPT, and we have yearly smry with only WOPT data, and FOPT is only in daily @@ -360,8 +384,16 @@ def get_smry(self, column_keys=None, time_index="monthly"): smry = self.get_df("unsmry--" + chosen_smry)[["DATE"] + column_keys] - # Add the extra datetimes to interpolate at. + # Preserve meta through the dataframe operations: + if "meta" in smry.attrs: + meta = smry.attrs["meta"] + else: + meta = {} + + # index is dummy, the date is in the DATE column smry.set_index("DATE", inplace=True) + + # Add the extra datetimes to interpolate at. smry.index = pd.to_datetime(smry.index) smry = smry.append( pd.DataFrame(index=pd.to_datetime(time_index_dt)), sort=False @@ -390,8 +422,11 @@ def get_smry(self, column_keys=None, time_index="monthly"): smry[noncum_columns].fillna(method="bfill").fillna(value=0) ) - smry.index = smry.index.set_names(["DATE"]) - return smry.loc[pd.to_datetime(time_index_dt)] + smry = smry.loc[pd.to_datetime(time_index_dt)] + smry.index.name = "DATE" + if meta: + smry.attrs["meta"] = meta + return smry.reset_index() def get_smry_dates(self, freq="monthly", normalize=False): """Return list of datetimes available in the realization @@ -444,7 +479,7 @@ def get_smry_dates(self, freq="monthly", normalize=False): # Convert from Pandas' datetime64 to datetime.date: return [x.date() for x in datetimes] - def get_smry_meta(self, column_keys=None): + def get_smry_meta(self): """ Provide metadata for summary data vectors. @@ -458,30 +493,13 @@ def get_smry_meta(self, column_keys=None): * keyword (str) * wgname (str or None) - Args: - column_keys (list or str): Column key wildcards. - Returns: dict of dict with metadata information """ - # Warning: Code is identical the same function in virtualensemble.py - if column_keys is None: - column_keys = ["*"] - if not isinstance(column_keys, list): - column_keys = [column_keys] - - available_smrynames = self.get_df("__smry_metadata")["SMRYCOLUMN"].values - matches = set() - for key in column_keys: - matches = matches.union( - [name for name in available_smrynames if fnmatch.fnmatch(name, key)] - ) - return ( - self.get_df("__smry_metadata") - .set_index("SMRYCOLUMN") - .loc[matches, :] - .to_dict(orient="index") - ) + meta = {} + for dframe in [self.get_df(key) for key in self.keys() if "unsmry" in key]: + meta.update(dframe.attrs["meta"]) + return meta def _glob_smry_keys(self, column_keys): """Glob a list of column keys diff --git a/tests/test_ecl2df.py b/tests/test_ecl2df.py index 3edcbede..673f3803 100644 --- a/tests/test_ecl2df.py +++ b/tests/test_ecl2df.py @@ -3,25 +3,16 @@ import os import logging -import pytest +import ecl2df from fmu.ensemble import ScratchEnsemble, ScratchRealization -HAVE_ECL2DF = True -try: - import ecl2df -except ImportError: - HAVE_ECL2DF = False - logger = logging.getLogger(__name__) def test_ecl2df_real(): """Check that we can utilize ecl2df on single realizations""" - if not HAVE_ECL2DF: - pytest.skip() - if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) @@ -49,8 +40,6 @@ def test_reek(): reekens = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0" ) - if not HAVE_ECL2DF: - pytest.skip() def extract_compdat(kwargs): """Callback fnction to extract compdata data using ecl2df @@ -90,8 +79,6 @@ def get_smry(kwargs): reekens = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0" ) - if not HAVE_ECL2DF: - pytest.skip() callback_smry = reekens.apply(get_smry, column_keys="FOPT", time_index="yearly") direct_smry = reekens.get_smry(column_keys="FOPT", time_index="yearly") diff --git a/tests/test_ensemble.py b/tests/test_ensemble.py index 3eb1930a..4b8ab19e 100644 --- a/tests/test_ensemble.py +++ b/tests/test_ensemble.py @@ -152,7 +152,6 @@ def test_reek001(tmpdir): ] ) assert len(reekensemble) == 5 - print(reekensemble.files) assert len(reekensemble.files) == 24 # File discovery must be repeated for the newly added realizations @@ -240,18 +239,6 @@ def test_emptyens(): assert isinstance(emptymeta, dict) assert not emptymeta - emptymeta = ens.get_smry_meta("*") - assert isinstance(emptymeta, dict) - assert not emptymeta - - emptymeta = ens.get_smry_meta("FOPT") - assert isinstance(emptymeta, dict) - assert not emptymeta - - emptymeta = ens.get_smry_meta(["FOPT"]) - assert isinstance(emptymeta, dict) - assert not emptymeta - # Add a realization manually: ens.add_realizations( testdir + "/data/testensemble-reek001/" + "realization-0/iter-0" @@ -473,24 +460,19 @@ def test_ensemble_ecl(): ) # Summary metadata: + reekensemble.load_smry(time_index="yearly", column_keys="*") meta = reekensemble.get_smry_meta() assert len(meta) == len(reekensemble.get_smrykeys()) assert "FOPT" in meta assert not meta["FOPT"]["is_rate"] assert meta["FOPT"]["is_total"] - meta = reekensemble.get_smry_meta("FOPT") - assert meta["FOPT"]["is_total"] + # Meta should also be returned via dataframe's "attrs" + yearly_df_load = reekensemble.load_smry(time_index="yearly", column_keys="FOPT") + assert set(yearly_df_load.attrs["meta"].keys()) == set(["FOPT"]) - meta = reekensemble.get_smry_meta("*") - assert meta["FOPT"]["is_total"] - - meta = reekensemble.get_smry_meta(["*"]) - assert meta["FOPT"]["is_total"] - - meta = reekensemble.get_smry_meta(["FOPT", "BOGUS"]) - assert meta["FOPT"]["is_total"] - assert "BOGUS" not in meta + yearly_df_get = reekensemble.get_smry(time_index="yearly", column_keys="FOPT") + assert set(yearly_df_get.attrs["meta"].keys()) == set(["FOPT"]) # Eclipse well names list assert len(reekensemble.get_wellnames("OP*")) == 5 @@ -499,9 +481,6 @@ def test_ensemble_ecl(): assert not reekensemble.get_wellnames("") assert len(reekensemble.get_wellnames(["OP*", "WI*"])) == 8 - # eclipse well groups list - assert len(reekensemble.get_groupnames()) == 3 - # delta between two ensembles diff = reekensemble - reekensemble assert len(diff.get_smry(column_keys=["FOPR", "FGPR", "FWCT"]).columns) == 5 @@ -829,57 +808,6 @@ def test_nonexisting(): assert not nopermission -def test_eclsumcaching(): - """Test caching of eclsum""" - - if "__file__" in globals(): - # Easen up copying test code into interactive sessions - testdir = os.path.dirname(os.path.abspath(__file__)) - else: - testdir = os.path.abspath(".") - - dirs = testdir + "/data/testensemble-reek001/" + "realization-*/iter-0" - ens = ScratchEnsemble("reektest", dirs) - - # The problem here is if you load in a lot of UNSMRY files - # and the Python process keeps them in memory. Not sure - # how to check in code that an object has been garbage collected - # but for garbage collection to work, at least the realization - # _eclsum variable must be None. - - ens.load_smry() - # Default is to do caching, so these will not be None: - assert all([x._eclsum for (idx, x) in ens.realizations.items()]) - - # If we redo this operation, the same objects should all - # be None afterwards: - ens.load_smry(cache_eclsum=False) - # cache_eclsum==None is from v1.1.5 no longer equivalent to False - assert not any([x._eclsum for (idx, x) in ens.realizations.items()]) - - ens.get_smry() - assert all([x._eclsum for (idx, x) in ens.realizations.items()]) - - ens.get_smry(cache_eclsum=False) - assert not any([x._eclsum for (idx, x) in ens.realizations.items()]) - - ens.get_smry_stats() - assert all([x._eclsum for (idx, x) in ens.realizations.items()]) - - ens.get_smry_stats(cache_eclsum=False) - assert not any([x._eclsum for (idx, x) in ens.realizations.items()]) - - ens.get_smry_dates() - assert all([x._eclsum for (idx, x) in ens.realizations.items()]) - - # Clear the cached objects because the statement above has cached it.. - for _, realization in ens.realizations.items(): - realization._eclsum = None - - ens.get_smry_dates(cache_eclsum=False) - assert not any([x._eclsum for (idx, x) in ens.realizations.items()]) - - def test_filedescriptors(): """Test how filedescriptors are used. diff --git a/tests/test_ensemble_agg.py b/tests/test_ensemble_agg.py index 5b675fe6..e65e762b 100644 --- a/tests/test_ensemble_agg.py +++ b/tests/test_ensemble_agg.py @@ -39,6 +39,12 @@ def test_ensemble_aggregations(tmpdir): } tmpdir.chdir() + assert "FOPT" in stats["min"].get_df("unsmry--yearly").attrs["meta"] + assert ( + "FOPT" + in stats["min"].get_smry(column_keys="FOPT", time_index="yearly").attrs["meta"] + ) + stats["min"].to_disk("virtreal_min", delete=True) stats["max"].to_disk("virtreal_max", delete=True) stats["mean"].to_disk("virtreal_mean", delete=True) diff --git a/tests/test_ensemble_eclfail.py b/tests/test_ensemble_eclfail.py index 16eb1371..a29f59a7 100644 --- a/tests/test_ensemble_eclfail.py +++ b/tests/test_ensemble_eclfail.py @@ -73,8 +73,8 @@ def test_ens_premature_ecl(tmpdir): # Check also get_smry(): assert len(failensemble.get_smry().groupby("REAL").max()["DATE"].unique()) == 2 - # With time_index set to something, then all realization will get - # interpolated onto the same date range + # With time_index set to something, each realization is still time-interpolated + # individually and we still have two different max-dates: assert ( len( failensemble.get_smry(time_index="monthly") @@ -82,9 +82,10 @@ def test_ens_premature_ecl(tmpdir): .max()["DATE"] .unique() ) - == 1 + == 2 ) - # This is in fact *different* from what you would get from load_smry (issue #97) + # load_smry and get_smry behave the same + # (they were different in fmu-ensemble 1.x) assert ( len( failensemble.load_smry(time_index="monthly") @@ -94,8 +95,6 @@ def test_ens_premature_ecl(tmpdir): ) == 2 ) - # (this behaviour might change, get_smry() is allowed in - # the future to mimic load_smry()) # Check that FOPT is very much lower in real 1 in failed ensemble: assert ( @@ -156,34 +155,74 @@ def test_ens_premature_ecl(tmpdir): filtered_stats = filtered_fail_ensemble.get_smry_stats(time_index="monthly") # Original stats orig_stats = origensemble.get_smry_stats(time_index="monthly") + orig_smry = origensemble.get_smry(time_index="monthly").set_index(["REAL", "DATE"]) + + # fmu-ensemble 1.x extrapolated the failed realization with zero rates to the + # common end-date for the ensemble, giving zero as the minimum realization. + # fmu-ensemble 2.x have NaNs for rates after the failure date, and do not + # enter the statistics + + # Thus the minimum rates at the latest dates (post failure in real 0) is nonzero: + assert fail_stats.loc["minimum"]["FOPR"].iloc[-30:].abs().sum() > 0 + + # The final date is present in the statistics frames + assert "2003-02-01" in fail_stats.loc["minimum"].index.astype(str).values - # The 30 last rows are the rows from 2000-09-01 to 2003-02-01: - assert fail_stats.loc["minimum"]["FOPR"].iloc[-30:].abs().sum() == 0 - assert fail_stats.loc["minimum"]["FOPT"].iloc[-30:].unique()[0] == 1431247.125 # Oh no, in filtered stats, the last date 2003-02-01 is # not included, probably a minor bug! + assert "2003-02-01" not in filtered_stats.loc["minimum"].index.astype(str).values # But that means that the indexing of the last 30 is a little bit rogue. # (this test should work even that bug is fixed) assert filtered_stats.loc["minimum"]["FOPR"].iloc[-29:].abs().sum() > 0 assert len(filtered_stats.loc["minimum"]["FOPT"].iloc[-29:].unique()) == 29 - # Mean FOPR and FOPT should be affected by the zero-padded rates: - assert ( - fail_stats.loc["mean"].iloc[-10]["FOPR"] - < filtered_stats.loc["mean"].iloc[-10]["FOPR"] - ) - assert ( - fail_stats.loc["mean"].iloc[-10]["FOPR"] - < orig_stats.loc["mean"].iloc[-10]["FOPR"] - ) - assert ( - fail_stats.loc["mean"].iloc[-10]["FOPT"] - < filtered_stats.loc["mean"].iloc[-10]["FOPT"] - ) - assert ( - fail_stats.loc["mean"].iloc[-10]["FOPT"] - < orig_stats.loc["mean"].iloc[-10]["FOPT"] - ) + # Mean FOPR and FOPT should be affected by the zero-padded rates. + # In fail_stats, realization 1 is truncated, and in filtered_stats + # realization 1 does not exist. + + # Some manually computed means from orig summary: + fopr_mean_all = ( + orig_smry.loc[0, datetime.datetime(2002, 1, 1)]["FOPR"] + # Pandas allows index lookup using both strings and datetimes (not date), + # because we have done a set_index() on the frame. + + orig_smry.loc[1, "2002-01-01"]["FOPR"] + + orig_smry.loc[2, "2002-01-01"]["FOPR"] + + orig_smry.loc[3, "2002-01-01"]["FOPR"] + + orig_smry.loc[4, "2002-01-01"]["FOPR"] + ) / 5 + fopr_mean_not1 = ( + orig_smry.loc[0, "2002-01-01"]["FOPR"] + + orig_smry.loc[2, "2002-01-01"]["FOPR"] + + orig_smry.loc[3, "2002-01-01"]["FOPR"] + + orig_smry.loc[4, "2002-01-01"]["FOPR"] + ) / 4 # == 5627.0299072265625 + + # The last alternative was how fmu.ensemble v1.x worked: + fopr_mean_zero1 = ( # noqa + orig_smry.loc[0, "2002-01-01"]["FOPR"] + + 0 + + orig_smry.loc[2, "2002-01-01"]["FOPR"] + + orig_smry.loc[3, "2002-01-01"]["FOPR"] + + orig_smry.loc[4, "2002-01-01"]["FOPR"] + ) / 5 # == 4501.62392578125 + + # Pandas 1.2.3 at least provides different time objects between the two frames: + # failensemble.get_smry_stats(time_index="monthly").loc["mean"].index.values + # filtered_fail_ensemble.get_smry_stats(time_index="monthly").loc["mean"].index.values + # with datetime.date() in the first and datetime64[ns] in the latter. + # We don't want to expose this test code to that detail, so convert to strings: + fail_stats_mean = fail_stats.loc["mean"] + fail_stats_mean.index = fail_stats_mean.index.astype(str) + assert fail_stats_mean.loc["2002-01-01"]["FOPR"] == fopr_mean_not1 + filtered_stats_mean = filtered_stats.loc["mean"] + filtered_stats_mean.index = filtered_stats_mean.index.astype(str) + assert filtered_stats_mean.loc["2002-01-01"]["FOPR"] == fopr_mean_not1 + orig_stats_mean = orig_stats.loc["mean"] + orig_stats_mean.index = orig_stats_mean.index.astype(str) + assert orig_stats_mean.loc["2002-01-01"]["FOPR"] == fopr_mean_all + # FOPT is handled identical to FOPR, as there is no extrapolation + # by default of summary vectors in fmu.ensemble v2.x (in v1.x rates and totals + # were extrapolated individually) # Delta profiles: delta_fail = origensemble - failensemble diff --git a/tests/test_ensemblecombination.py b/tests/test_ensemblecombination.py index abda4134..87986fce 100644 --- a/tests/test_ensemblecombination.py +++ b/tests/test_ensemblecombination.py @@ -52,7 +52,12 @@ def test_ensemblecombination_basic(): ].sum() ) - smrymeta = diff.get_smry_meta(["FO*"]) + # Test presence of summary metadata: + assert "FOPT" in half.get_df("unsmry--yearly").attrs["meta"] + assert ( + "FOPT" in half.get_smry(column_keys="FOPT", time_index="yearly").attrs["meta"] + ) + smrymeta = diff.get_smry_meta() assert "FOPT" in smrymeta # This is only true since we only juggle one ensemble here: @@ -96,7 +101,7 @@ def test_ensemblecombination_basic(): # We can test something cheaper: zero = reekensemble + reekensemble - 2 * reekensemble assert zero["parameters"]["KRW1"].sum() == 0 - smrymeta = zero.get_smry_meta(["FO*"]) + smrymeta = zero.get_smry_meta() assert "FOPT" in smrymeta vzero = ( @@ -107,6 +112,7 @@ def test_ensemblecombination_basic(): assert vzero["parameters"]["KRW1"].sum() == 0 assert len(diff.get_smry(column_keys=["FOPR", "FGPR", "FWCT"]).columns) == 5 + assert "FOPR" in diff.get_smry(column_keys="FOPR").attrs["meta"] # eclipse summary vector statistics for a given ensemble df_stats = diff.get_smry_stats(column_keys=["FOPR", "FGPR"], time_index="monthly") diff --git a/tests/test_observations.py b/tests/test_observations.py index 8c64fab0..520fc9f9 100644 --- a/tests/test_observations.py +++ b/tests/test_observations.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Testing observations in fmu-ensemble.""" import os @@ -66,24 +65,26 @@ def test_real_mismatch(): ) realmis = obs.mismatch(real) - # Check layout of returned data - assert isinstance(realmis, pd.DataFrame) - assert len(realmis) == 1 + pd.testing.assert_frame_equal( + realmis, + pd.DataFrame( + [ + { + "OBSTYPE": "txt", + "OBSKEY": "parameters.txt/FWL", + "MISMATCH": -2.0, + "L1": 2.0, + "L2": 4.0, + "SIMVALUE": 1700, + "OBSVALUE": 1702, + "MEASERROR": 1, + "SIGN": -1, + } + ] + ), + ) assert "REAL" not in realmis.columns # should only be there for ensembles. - assert "OBSTYPE" in realmis.columns - assert "OBSKEY" in realmis.columns assert "DATE" not in realmis.columns # date is not relevant - assert "MISMATCH" in realmis.columns - assert "L1" in realmis.columns - assert "L2" in realmis.columns - - # Check actually computed values, there should only be one row with data: - assert realmis.loc[0, "OBSTYPE"] == "txt" - assert realmis.loc[0, "OBSKEY"] == "parameters.txt/FWL" - assert realmis.loc[0, "MISMATCH"] == -2 - assert realmis.loc[0, "SIGN"] == -1 - assert realmis.loc[0, "L1"] == 2 - assert realmis.loc[0, "L2"] == 4 # Another observation set: obs2 = Observations( @@ -96,12 +97,46 @@ def test_real_mismatch(): } ) realmis2 = obs2.mismatch(real) - assert len(realmis2) == 3 - assert "parameters.txt/RMS_SEED" in realmis2["OBSKEY"].values - assert "outputs.txt/top_structure" in realmis2["OBSKEY"].values - assert "npv.txt" in realmis2["OBSKEY"].values - - # assert much more! + pd.testing.assert_frame_equal( + realmis2, + pd.DataFrame( + [ + { + "OBSTYPE": "txt", + "OBSKEY": "parameters.txt/RMS_SEED", + "MISMATCH": -177148215.0, + "L1": 177148215.0, + "L2": 3.1381490077686224e16, + "SIMVALUE": 422851785, + "OBSVALUE": 600000000, + "MEASERROR": 1, + "SIGN": -1, + }, + { + "OBSTYPE": "txt", + "OBSKEY": "outputs.txt/top_structure", + "MISMATCH": 24.0, + "L1": 24.0, + "L2": 576.0, + "SIMVALUE": 3224, + "OBSVALUE": 3200, + "MEASERROR": 1, + "SIGN": 1, + }, + { + "OBSTYPE": "scalar", + "OBSKEY": "npv.txt", + "MISMATCH": 44.0, + "L1": 44.0, + "L2": 1936.0, + "SIMVALUE": 3444, + "OBSVALUE": 3400, + "MEASERROR": 1, + "SIGN": 1, + }, + ] + ), + ) # Test that we can write the observations to yaml # and verify that the exported yaml can be reimported @@ -215,6 +250,26 @@ def test_smry(): # loaded realization. mismatch = obs.mismatch(real) + # Assert the first row exactly: + pd.testing.assert_frame_equal( + mismatch.head(1), + pd.DataFrame( + [ + { + "OBSTYPE": "smry", + "OBSKEY": "WBP4:OP_1", + "DATE": datetime.date(2001, 1, 1), + "MEASERROR": 4.0, + "MISMATCH": -2.159454345703125, + "OBSVALUE": 251.0, + "SIMVALUE": 248.84054565429688, + "L1": 2.159454345703125, + "L2": 4.663243071176112, + "SIGN": -1, + } + ] + ), + ) assert len(mismatch) == 21 # later: implement counting in the obs object assert mismatch.L1.sum() > 0 assert mismatch.L2.sum() > 0 @@ -537,7 +592,6 @@ def test_ensset_mismatch(): == mismatch[mismatch.ENSEMBLE == "iter-1"].L1.sum() ) - # This is quite hard to input in dict-format. Better via YAML.. obs_pr = Observations( { "smry": [ diff --git a/tests/test_realization.py b/tests/test_realization.py index 70e1e216..fc7bf685 100644 --- a/tests/test_realization.py +++ b/tests/test_realization.py @@ -320,9 +320,7 @@ def test_volumetric_rates(): assert real.get_volumetric_rates(column_keys="FOOBAR").empty assert real.get_volumetric_rates(column_keys=["FOOBAR"]).empty assert real.get_volumetric_rates(column_keys={}).empty - - with pytest.raises(ValueError): - real.get_volumetric_rates(column_keys="FOPT", time_index="bogus") + assert real.get_volumetric_rates(column_keys="FOPT", time_index="bogus").empty mcum = real.get_smry(column_keys="FOPT", time_index="monthly") dmcum = real.get_volumetric_rates(column_keys="FOPT", time_index="monthly") @@ -330,9 +328,9 @@ def test_volumetric_rates(): # Pick 10 **random** dates to get the volumetric rates between: daily_dates = real.get_smry_dates(freq="daily", normalize=False) - subset_dates = np.random.choice(daily_dates, size=10, replace=False) + subset_dates = list(np.random.choice(daily_dates, size=10, replace=False)) subset_dates.sort() - dcum = real.get_smry(column_keys="FOPT", time_index=subset_dates) + dcum = real.get_smry(column_keys="FOPT", time_index=subset_dates).set_index("DATE") ddcum = real.get_volumetric_rates(column_keys="FOPT", time_index=subset_dates) assert ddcum["FOPR"].iloc[-1] == 0 @@ -428,21 +426,29 @@ def test_datenormalization(): realdir = os.path.join(testdir, "data/testensemble-reek001", "realization-0/iter-0") real = ensemble.ScratchRealization(realdir) raw = real.get_smry(column_keys="FOPT", time_index="raw") - assert str(raw.index[-1]) == "2003-01-02 00:00:00" + assert str(raw["DATE"].values[-1]) == "2003-01-02T00:00:00.000000000" daily = real.get_smry(column_keys="FOPT", time_index="daily") - assert str(daily.index[-1]) == "2003-01-02" + assert str(daily["DATE"].values[-1]) == "2003-01-02" monthly = real.get_smry(column_keys="FOPT", time_index="monthly") - assert str(monthly.index[-1]) == "2003-02-01" + assert str(monthly["DATE"].values[-1]) == "2003-02-01" yearly = real.get_smry(column_keys="FOPT", time_index="yearly") - assert str(yearly.index[-1]) == "2004-01-01" + assert str(yearly["DATE"].values[-1]) == "2004-01-01" weekly = real.get_smry(column_keys="FOPT", time_index="weekly") - assert str(weekly.index[-1]) == "2003-01-06" # First Monday after 2003-01-02 + assert ( + str(weekly["DATE"].values[-1]) == "2003-01-06" + ) # First Monday after 2003-01-02 weekly = real.get_smry(column_keys="FOPT", time_index="W-MON") - assert str(weekly.index[-1]) == "2003-01-06" # First Monday after 2003-01-02 + assert ( + str(weekly["DATE"].values[-1]) == "2003-01-06" + ) # First Monday after 2003-01-02 weekly = real.get_smry(column_keys="FOPT", time_index="W-TUE") - assert str(weekly.index[-1]) == "2003-01-07" # First Tuesday after 2003-01-02 + assert ( + str(weekly["DATE"].values[-1]) == "2003-01-07" + ) # First Tuesday after 2003-01-02 weekly = real.get_smry(column_keys="FOPT", time_index="W-THU") - assert str(weekly.index[-1]) == "2003-01-02" # First Thursday after 2003-01-02 + assert ( + str(weekly["DATE"].values[-1]) == "2003-01-02" + ) # First Thursday after 2003-01-02 # Check that time_index=None and time_index="raw" behaves like default raw = real.load_smry(column_keys="FOPT", time_index="raw") @@ -456,15 +462,18 @@ def test_datenormalization(): # Check that we get the same correct normalization # with load_smry() real.load_smry(column_keys="FOPT", time_index="raw") - assert str(real.get_df("unsmry--raw")["DATE"].iloc[-1]) == "2003-01-02 00:00:00" + assert ( + str(real.get_df("unsmry--raw")["DATE"].values[-1]) + == "2003-01-02T00:00:00.000000000" + ) real.load_smry(column_keys="FOPT", time_index="daily") - assert str(real.get_df("unsmry--daily")["DATE"].iloc[-1]) == "2003-01-02" + assert str(real.get_df("unsmry--daily")["DATE"].values[-1]) == "2003-01-02" real.load_smry(column_keys="FOPT", time_index="monthly") - assert str(real.get_df("unsmry--monthly")["DATE"].iloc[-1]) == "2003-02-01" + assert str(real.get_df("unsmry--monthly")["DATE"].values[-1]) == "2003-02-01" real.load_smry(column_keys="FOPT", time_index="yearly") - assert str(real.get_df("unsmry--yearly")["DATE"].iloc[-1]) == "2004-01-01" + assert str(real.get_df("unsmry--yearly")["DATE"].values[-1]) == "2004-01-01" real.load_smry(column_keys="FOPT", time_index="weekly") - assert str(real.get_df("unsmry--weekly")["DATE"].iloc[-1]) == "2003-01-06" + assert str(real.get_df("unsmry--weekly")["DATE"].values[-1]) == "2003-01-06" def test_singlereal_ecl(tmp="TMP"): @@ -514,7 +523,7 @@ def test_singlereal_ecl(tmp="TMP"): # Try ISO-date for time_index: singledata = real.get_smry(time_index="2000-05-05", column_keys="FOPT") assert "FOPT" in singledata - assert "2000-05-05" in singledata.index + assert str(singledata["DATE"].values[0]).startswith("2000-05-05") # start and end should be included: assert ( @@ -1061,15 +1070,33 @@ def test_find_files_yml(): def test_get_smry_meta(): - """ - Test getting eclsum metadata for single realization. + """Test getting eclsum metadata for single realization. + + Only works for loaded summary data """ testdir = os.path.dirname(os.path.abspath(__file__)) realdir = os.path.join(testdir, "data/testensemble-reek001", "realization-0/iter-0") real = ensemble.ScratchRealization(realdir) - meta = real.get_smry_meta(column_keys=["*"]) - assert isinstance(meta, dict) + dframe = real.load_smry(column_keys=["FOPT"]) + # The metadata dictionary is attached to the dataframe using + # the "attrs" dataframe attribute: + assert "FOPT" in dframe.attrs["meta"] + + # The same metadata is also available through get_smry_meta() + assert set(real.get_smry_meta().keys()) == set(["FOPT"]) + + real.load_smry(column_keys=["FOPR"]) + # This is not cumulative, since we are overwriting unsmry--raw + assert set(real.get_smry_meta().keys()) == set(["FOPR"]) + + real.load_smry(time_index="yearly", column_keys=["FOPT"]) + # Meta-data will accumulate over internalized summary frames: + assert set(real.get_smry_meta().keys()) == set(["FOPR", "FOPT"]) + + # Load all vectors: + real.load_smry() + meta = real.get_smry_meta() assert "FOPT" in meta assert "FOPTH" in meta assert meta["FOPT"]["unit"] == "SM3" diff --git a/tests/test_realizationcombination.py b/tests/test_realizationcombination.py index 33399c10..bd867030 100644 --- a/tests/test_realizationcombination.py +++ b/tests/test_realizationcombination.py @@ -65,7 +65,9 @@ def test_realizationcombination_basic(): assert "parameters.txt" not in vdiff_filtered2.keys() assert "FWPR" in vdiff_filtered2.get_df("unsmry--yearly") - smrymeta = realdiff.get_smry_meta(["FO*"]) + # Summary metadata: + assert "FWPR" in vdiff_filtered2.get_df("unsmry--yearly").attrs["meta"] + smrymeta = realdiff.get_smry_meta() assert "FOPT" in smrymeta smry_params = realdiff.get_df("unsmry--yearly", merge="parameters.txt") @@ -93,16 +95,18 @@ def test_realizationcomb_virt_meta(): real1 = ensemble.ScratchRealization(real1dir) real1.load_smry(time_index="yearly", column_keys=["FOPT", "WOPT*"]) - # Virtualized based on the loades summary vectors, which + # Virtualized based on the loaded summary vectors, which # differ between the two realizations. vreal0 = real0.to_virtual() vreal1 = real1.to_virtual() - assert "WOPT" not in vreal0.get_smry_meta(column_keys="*") - assert "FOPT" in vreal0.get_smry_meta(column_keys="*") - assert "WOPT:OP_3" in vreal1.get_smry_meta(column_keys="*") - assert "WOPT:OP_3" not in vreal0.get_smry_meta(column_keys="*") - assert "FOPT" in vreal1.get_smry_meta(column_keys="*") + assert "FOPT" in vreal0.get_smry(column_keys="FOPT").attrs["meta"] + + assert "WOPT" not in vreal0.get_smry_meta() + assert "FOPT" in vreal0.get_smry_meta() + assert "WOPT:OP_3" in vreal1.get_smry_meta() + assert "WOPT:OP_3" not in vreal0.get_smry_meta() + assert "FOPT" in vreal1.get_smry_meta() def test_manual_aggregation(): diff --git a/tests/test_virtualensemble.py b/tests/test_virtualensemble.py index 450a9365..ca2db0ff 100644 --- a/tests/test_virtualensemble.py +++ b/tests/test_virtualensemble.py @@ -407,10 +407,7 @@ def test_get_smry_meta(tmpdir): assert origmeta["FOPT"] == metadict["FOPT"] assert origmeta["FWPTH"] == metadict["FWPTH"] - assert not vens.get_smry_meta([]) - assert vens.get_smry_meta(column_keys="FOPT")["FOPT"] == origmeta["FOPT"] - - assert not vens.get_smry_meta(column_keys="WOPT:NOTEXISTING") + assert vens.get_smry_meta()["FOPT"] == origmeta["FOPT"] # Test that it is retrievable after dumping to disk: vens_disk_path = str(tmpdir.join("vens_dumped")) diff --git a/tests/test_virtualrealization.py b/tests/test_virtualrealization.py index f98c2d9b..80f500e9 100644 --- a/tests/test_virtualrealization.py +++ b/tests/test_virtualrealization.py @@ -129,6 +129,9 @@ def test_virtual_fromdisk(tmpdir): ) assert real.get_df("npv.txt") == 3444 + # Check that metadata for summary has been conserved: + assert "FOPT" in vreal.get_df("unsmry--yearly").attrs["meta"] + def test_get_smry(): """Check that we can to get_smry() on virtual realizations""" @@ -155,10 +158,10 @@ def test_get_smry(): assert all(vfopt == fopt) # But note that the dtype of the index in each dataframe differs # vfopt.index.dtype == datetime, while fopt.index.dtype == object - assert len(fopt.columns) == 1 # DATE is index (unlabeled) + assert len(fopt.columns) == 2 # DATE is the first column dvfopt = vreal.get_smry(column_keys="FOPT", time_index="daily") - assert all(dvfopt.diff() >= 0) + assert all(dvfopt["FOPT"].diff().dropna() >= 0) # Linear interpolation should give many unique values: assert len(dvfopt["FOPT"].unique()) == 1462 # Length is here 1462 while daily smry for the scratchrealization @@ -256,7 +259,7 @@ def test_get_smry2(): alldefaults = vreal.get_smry() assert len(alldefaults) == monthly_length - assert len(alldefaults.columns) == 49 + assert len(alldefaults.columns) == 50 def test_get_smry_cumulative(): @@ -378,6 +381,11 @@ def test_get_smry_meta(): assert meta["FOPT"]["wgname"] is None + assert "FOPT" in vreal.get_df("unsmry--yearly").attrs["meta"] + assert ( + "FOPT" in vreal.get_smry(column_keys="FOPT", time_index="monthly").attrs["meta"] + ) + def test_get_df_merge(): """Test the merge support in get_df. Could be tricky for virtualrealizations