From b4f5e871cb008f092391d11e67d075c2c17c278c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5vard=20Berland?= <havb@equinor.com>
Date: Tue, 9 Mar 2021 12:53:40 +0100
Subject: [PATCH] Resolve outstanding issues for tests to pass.

Mostly related to realization.get_smry() returning dummy index
---
 src/fmu/ensemble/observations.py       |   2 +-
 src/fmu/ensemble/virtualensemble.py    |   5 --
 src/fmu/ensemble/virtualrealization.py |  12 ++-
 tests/test_observations.py             | 102 +++++++++++++++++++------
 tests/test_virtualrealization.py       |   6 +-
 5 files changed, 91 insertions(+), 36 deletions(-)

diff --git a/src/fmu/ensemble/observations.py b/src/fmu/ensemble/observations.py
index 4344cf67..af453c49 100644
--- a/src/fmu/ensemble/observations.py
+++ b/src/fmu/ensemble/observations.py
@@ -172,7 +172,7 @@ def load_smry(self, realization, smryvector, time_index="yearly", smryerror=None
         """
         dataseries = realization.get_smry(
             column_keys=[smryvector], time_index=time_index
-        )[smryvector]
+        )[["DATE", smryvector]].set_index("DATE")[smryvector]
 
         # In the context of this function, datetimes are not supported. Ensure dates:
         if isinstance(dataseries.index, pd.DatetimeIndex):
diff --git a/src/fmu/ensemble/virtualensemble.py b/src/fmu/ensemble/virtualensemble.py
index 35ba07a7..80957324 100644
--- a/src/fmu/ensemble/virtualensemble.py
+++ b/src/fmu/ensemble/virtualensemble.py
@@ -872,11 +872,6 @@ def get_smry(self, column_keys=None, time_index="monthly"):
 
             # Now ask the VirtualRealization to do interpolation
             interp = vreal.get_smry(column_keys=column_keys, time_index=time_index)
-            # Assume we get back a dataframe indexed by the dates from vreal
-            # We must reset that index, and ensure the index column
-            # gets a correct name
-            interp.index = interp.index.set_names(["DATE"])
-            interp = interp.reset_index()
             interp["REAL"] = realidx
             smry_interpolated.append(interp)
         return pd.concat(smry_interpolated, ignore_index=True, sort=False)
diff --git a/src/fmu/ensemble/virtualrealization.py b/src/fmu/ensemble/virtualrealization.py
index 113d346b..7f97d252 100644
--- a/src/fmu/ensemble/virtualrealization.py
+++ b/src/fmu/ensemble/virtualrealization.py
@@ -287,6 +287,10 @@ def get_smry(self, column_keys=None, time_index="monthly"):
         Returns data for those columns that are known, unknown
         columns will be issued a warning for.
 
+        The returned dataframe will have a dummy index, and the dates in
+        the column DATE. The DATE column will contain either datetime.datetime
+        or pandas.Timestamp objects.
+
         BUG: If some columns are available only in certain dataframes,
         we might miss them (e.g. we ask for yearly FOPT, and we have
         yearly smry with only WOPT data, and FOPT is only in daily
@@ -359,9 +363,10 @@ def get_smry(self, column_keys=None, time_index="monthly"):
         )
 
         smry = self.get_df("unsmry--" + chosen_smry)[["DATE"] + column_keys]
+        # index is dummy, the date is in the DATE column
+        smry.set_index("DATE", inplace=True)
 
         # Add the extra datetimes to interpolate at.
-        smry.set_index("DATE", inplace=True)
         smry.index = pd.to_datetime(smry.index)
         smry = smry.append(
             pd.DataFrame(index=pd.to_datetime(time_index_dt)), sort=False
@@ -390,8 +395,9 @@ def get_smry(self, column_keys=None, time_index="monthly"):
                 smry[noncum_columns].fillna(method="bfill").fillna(value=0)
             )
 
-        smry.index = smry.index.set_names(["DATE"])
-        return smry.loc[pd.to_datetime(time_index_dt)]
+        smry = smry.loc[pd.to_datetime(time_index_dt)]
+        smry.index.name = "DATE"
+        return smry.reset_index()
 
     def get_smry_dates(self, freq="monthly", normalize=False):
         """Return list of datetimes available in the realization
diff --git a/tests/test_observations.py b/tests/test_observations.py
index 8c64fab0..520fc9f9 100644
--- a/tests/test_observations.py
+++ b/tests/test_observations.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """Testing observations in fmu-ensemble."""
 
 import os
@@ -66,24 +65,26 @@ def test_real_mismatch():
     )
     realmis = obs.mismatch(real)
 
-    # Check layout of returned data
-    assert isinstance(realmis, pd.DataFrame)
-    assert len(realmis) == 1
+    pd.testing.assert_frame_equal(
+        realmis,
+        pd.DataFrame(
+            [
+                {
+                    "OBSTYPE": "txt",
+                    "OBSKEY": "parameters.txt/FWL",
+                    "MISMATCH": -2.0,
+                    "L1": 2.0,
+                    "L2": 4.0,
+                    "SIMVALUE": 1700,
+                    "OBSVALUE": 1702,
+                    "MEASERROR": 1,
+                    "SIGN": -1,
+                }
+            ]
+        ),
+    )
     assert "REAL" not in realmis.columns  # should only be there for ensembles.
-    assert "OBSTYPE" in realmis.columns
-    assert "OBSKEY" in realmis.columns
     assert "DATE" not in realmis.columns  # date is not relevant
-    assert "MISMATCH" in realmis.columns
-    assert "L1" in realmis.columns
-    assert "L2" in realmis.columns
-
-    # Check actually computed values, there should only be one row with data:
-    assert realmis.loc[0, "OBSTYPE"] == "txt"
-    assert realmis.loc[0, "OBSKEY"] == "parameters.txt/FWL"
-    assert realmis.loc[0, "MISMATCH"] == -2
-    assert realmis.loc[0, "SIGN"] == -1
-    assert realmis.loc[0, "L1"] == 2
-    assert realmis.loc[0, "L2"] == 4
 
     # Another observation set:
     obs2 = Observations(
@@ -96,12 +97,46 @@ def test_real_mismatch():
         }
     )
     realmis2 = obs2.mismatch(real)
-    assert len(realmis2) == 3
-    assert "parameters.txt/RMS_SEED" in realmis2["OBSKEY"].values
-    assert "outputs.txt/top_structure" in realmis2["OBSKEY"].values
-    assert "npv.txt" in realmis2["OBSKEY"].values
-
-    # assert much more!
+    pd.testing.assert_frame_equal(
+        realmis2,
+        pd.DataFrame(
+            [
+                {
+                    "OBSTYPE": "txt",
+                    "OBSKEY": "parameters.txt/RMS_SEED",
+                    "MISMATCH": -177148215.0,
+                    "L1": 177148215.0,
+                    "L2": 3.1381490077686224e16,
+                    "SIMVALUE": 422851785,
+                    "OBSVALUE": 600000000,
+                    "MEASERROR": 1,
+                    "SIGN": -1,
+                },
+                {
+                    "OBSTYPE": "txt",
+                    "OBSKEY": "outputs.txt/top_structure",
+                    "MISMATCH": 24.0,
+                    "L1": 24.0,
+                    "L2": 576.0,
+                    "SIMVALUE": 3224,
+                    "OBSVALUE": 3200,
+                    "MEASERROR": 1,
+                    "SIGN": 1,
+                },
+                {
+                    "OBSTYPE": "scalar",
+                    "OBSKEY": "npv.txt",
+                    "MISMATCH": 44.0,
+                    "L1": 44.0,
+                    "L2": 1936.0,
+                    "SIMVALUE": 3444,
+                    "OBSVALUE": 3400,
+                    "MEASERROR": 1,
+                    "SIGN": 1,
+                },
+            ]
+        ),
+    )
 
     # Test that we can write the observations to yaml
     # and verify that the exported yaml can be reimported
@@ -215,6 +250,26 @@ def test_smry():
     # loaded realization.
     mismatch = obs.mismatch(real)
 
+    # Assert the first row exactly:
+    pd.testing.assert_frame_equal(
+        mismatch.head(1),
+        pd.DataFrame(
+            [
+                {
+                    "OBSTYPE": "smry",
+                    "OBSKEY": "WBP4:OP_1",
+                    "DATE": datetime.date(2001, 1, 1),
+                    "MEASERROR": 4.0,
+                    "MISMATCH": -2.159454345703125,
+                    "OBSVALUE": 251.0,
+                    "SIMVALUE": 248.84054565429688,
+                    "L1": 2.159454345703125,
+                    "L2": 4.663243071176112,
+                    "SIGN": -1,
+                }
+            ]
+        ),
+    )
     assert len(mismatch) == 21  # later: implement counting in the obs object
     assert mismatch.L1.sum() > 0
     assert mismatch.L2.sum() > 0
@@ -537,7 +592,6 @@ def test_ensset_mismatch():
         == mismatch[mismatch.ENSEMBLE == "iter-1"].L1.sum()
     )
 
-    # This is quite hard to input in dict-format. Better via YAML..
     obs_pr = Observations(
         {
             "smry": [
diff --git a/tests/test_virtualrealization.py b/tests/test_virtualrealization.py
index f98c2d9b..bed01c12 100644
--- a/tests/test_virtualrealization.py
+++ b/tests/test_virtualrealization.py
@@ -155,10 +155,10 @@ def test_get_smry():
     assert all(vfopt == fopt)
     # But note that the dtype of the index in each dataframe differs
     # vfopt.index.dtype == datetime, while fopt.index.dtype == object
-    assert len(fopt.columns) == 1  # DATE is index (unlabeled)
+    assert len(fopt.columns) == 2  # DATE is the first column
 
     dvfopt = vreal.get_smry(column_keys="FOPT", time_index="daily")
-    assert all(dvfopt.diff() >= 0)
+    assert all(dvfopt["FOPT"].diff().dropna() >= 0)
     # Linear interpolation should give many unique values:
     assert len(dvfopt["FOPT"].unique()) == 1462
     # Length is here 1462 while daily smry for the scratchrealization
@@ -256,7 +256,7 @@ def test_get_smry2():
 
     alldefaults = vreal.get_smry()
     assert len(alldefaults) == monthly_length
-    assert len(alldefaults.columns) == 49
+    assert len(alldefaults.columns) == 50
 
 
 def test_get_smry_cumulative():