From 211b8bb521edb4b61d245048ce0c87132cdd8868 Mon Sep 17 00:00:00 2001
From: Luca Fiorito <lucafiorito.11@gmail.com>
Date: Thu, 29 Aug 2024 18:40:39 +0200
Subject: [PATCH] Read FY samples from excel (#342)

* update

* update

---------

Co-authored-by: Luca Fiorito <lfiorito@sckcen.be>
---
 sandy/core/samples.py | 128 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 104 insertions(+), 24 deletions(-)

diff --git a/sandy/core/samples.py b/sandy/core/samples.py
index 23d1d1ef..699f7c31 100644
--- a/sandy/core/samples.py
+++ b/sandy/core/samples.py
@@ -1,14 +1,85 @@
 import numpy as np
 import pandas as pd
 
-import sandy
+from .xs import Xs, redundant_xs
 
 __author__ = "Luca Fiorito"
 __all__ = [
         "Samples",
+        "read_fy_samples",
         ]
 
 
+def read_fy_samples(file='PERT_MF8_MT454.xlsx'):
+    """
+    Read relative perturbations for fission yields from excel file produced by
+    :obj:`~sandy.core.endf6.Endf6.get_perturbations_fy`.
+
+    Parameters
+    ----------
+    file : `str`, optional
+        The name of the file containing the perturbations for MT454.
+        The default is `'PERT_MF8_MT454.xlsx'`.
+        The default of the tabulated excel file is:
+            
+            - 1st column: energy in eV
+            - 2nd column: ZAP
+            - 3rd - nth columns: sample ID
+        
+        The name of each sheet is a ZAM number.
+
+    Returns
+    -------
+    smp : `pd.DataFrame`
+        Dataframe with perturbation coefficients given per:
+            
+            - ZAM: fissioning nuclide
+            - E: neutron energy
+            - ZAP: fission product
+            - SMP: sample ID
+
+    Notes
+    -----
+    .. note:: This does not use the object :obj:`~sandy.core.samples.Samples`.
+
+
+    Examples
+    --------
+    
+    Default use case.
+    Produce an excel file of samples.
+
+    >>> import sandy
+    >>> tape = sandy.get_endf6_file("jeff_33", "nfpy", [922350, 922380])
+    >>> smps = tape.get_perturbations(2)
+
+    Read it.
+    
+    >>> smps2 = sandy.read_fy_samples()
+
+    Test that it was read correctly.
+
+    >>> smps = smps.astype({'ZAM': 'int32', 'E': 'float64', 'ZAP': 'int32', 'SMP': 'int32', 'VALS': 'float64'})
+    >>> smps2 = smps2.astype({'ZAM': 'int32', 'E': 'float64', 'ZAP': 'int32', 'SMP': 'int32', 'VALS': 'float64'})
+    >>> assert smps2[["ZAM", "E", "ZAP", "SMP"]].equals(smps[["ZAM", "E", "ZAP", "SMP"]])
+    >>> np.testing.assert_array_almost_equal(smps2.VALS, smps.VALS)
+    """
+    all_sheets = pd.read_excel(file, sheet_name=None)
+    
+    smp = []
+    for k, v in all_sheets.items():
+        s = v.ffill().assign(ZAM=int(k)).set_index(["ZAM", "E", "ZAP"]).rename_axis("SMP", axis=1).stack().rename("VALS").reset_index()
+        smp.append(s)
+    
+    # same sorting structure as when it was produced in get_perturbations_fy
+    smp = pd.concat(smp, ignore_index=True).sort_values(by=["ZAM", "E", "ZAP", "SMP"])
+    
+    # for some reasons get_perturbations_fy gives me this
+
+    return smp
+
+
+
 class Samples():
     """
     Container for samples.
@@ -33,7 +104,7 @@ class Samples():
     get_rstd
         Return relative standard deviation vector of samples.   
     iterate_xs_samples
-        Generator that iterates over each sample (in the form of :func:`sandy.Xs`).
+        Generator that iterates over each sample (in the form of :obj:`~sandy.core.xs.Xs`).
     """
 
     _columnsname = "SMP"
@@ -51,16 +122,16 @@ def data(self):
 
         Attributes
         ----------
-        index : `pandas.Index` or `pandas.MultiIndex`
+        index : `pd.Index` or `pandas.MultiIndex`
             indices
-        columns : `pandas.Index`
+        columns : `pd.Index`
             samples numbering
-        values : `numpy.array`
+        values : `np.array`
             sample values as `float`
 
         Returns
         -------
-        `pandas.DataFrame`
+        `pd.DataFrame`
             tabulated samples
         """
         return self._data
@@ -76,7 +147,7 @@ def get_condition_number(self):
 
         Notes
         -----
-        ..note:: the condition number can help assess multicollinearity.
+        ..note:: The condition number can help assess multicollinearity.
         """
         # The first step is to normalize the independent variables to have
         # unit length
@@ -108,15 +179,15 @@ def get_rstd(self):
 
     def iterate_xs_samples(self):
         """
-        Iterate samples one by one and shape them as a :func:`sandy.Xs`
+        Iterate samples one by one and shape them as a :obj:`~sandy.core.xs.Xs`
         dataframe, but with mutligroup structure.
-        This output should be passed to :func:`sandy.Xs._perturb`.
-        The function is called by :func:`sandy.Endf6.apply_perturbations`
+        This output should be passed to :obj:`~sandy.core.xs.Xs._perturb`.
+        The function is called by :obj:`~sandy.core.endf6..Endf6.apply_perturbations`.
 
         Yields
         ------
         n : `int`
-            .
+            sample ID.
         s : `pd.DataFrame`
             dataframe of perturbation coefficients with:
                 
@@ -128,21 +199,25 @@ def iterate_xs_samples(self):
         If samples refer to redundant MT number, the same identical samples
         are passed one level down to the partial MT components.
         For instance:
+
             - MT=4 samples will be assigned to MT=50-91
             - MT=1 samples will be assigned to MT=2 and MT=3
             - MT=18 samples will be assigned to MT=19-21 and MT=38
         
-        ..important:: Assigning samples from redundant MT number to partial
+        .. important:: Assigning samples from redundant MT number to partial
                       components only applies if the partial components do not
                       have their own samples, and it only goes one level deep.
 
         Examples
         --------
-        Get samples fot MT=1
+
+        Get samples fot MT=1.
+        >>> import sandy
         >>> endf6 = sandy.get_endf6_file('jeff_33', 'xs', 10010)
         >>> smps2 = endf6.get_perturbations(1, njoy_kws=dict(err=1, chi=False, mubar=False, nubar=False, errorr33_kws=dict(mt=2)))[33]
 
-        Copy samples each time to a redundant or partial MT
+        Copy samples each time to a redundant or partial MT.
+
         >>> smps1 = sandy.Samples(smps2.data.reset_index().assign(MT=1).set_index(["MAT", "MT", "E"]))
         >>> smps3 = sandy.Samples(smps1.data.reset_index().assign(MT=3).set_index(["MAT", "MT", "E"]))
         >>> smps18 = sandy.Samples(smps1.data.reset_index().assign(MT=18).set_index(["MAT", "MT", "E"]))
@@ -153,7 +228,8 @@ def iterate_xs_samples(self):
         >>> smps101 = sandy.Samples(smps1.data.reset_index().assign(MT=101).set_index(["MAT", "MT", "E"]))
         >>> smps452 = sandy.Samples(smps1.data.reset_index().assign(MT=452).set_index(["MAT", "MT", "E"]))
 
-        Check that samples are passed correctly to daughter MTs (only one level deep)
+        Check that samples are passed correctly to daughter MTs (only one level deep).
+
         >>> expected = pd.MultiIndex.from_product([[125], [51]], names=["MAT", "MT"])
         >>> assert next(smps51.iterate_xs_samples())[1].columns.equals(expected)
 
@@ -183,23 +259,27 @@ def iterate_xs_samples(self):
 
 
         In this example the original covariance contains data for MT=1 and MT=51.
+
         >>> endf6 = sandy.get_endf6_file('jeff_33', 'xs', 942400)
         >>> smps = endf6.get_perturbations(1, njoy_kws=dict(err=1, chi=False, mubar=False, nubar=False, errorr33_kws=dict(mt=[1, 51])))[33]
 
         Then, since MT=1 is redundant, samples are passed to its partial components (MT=2 and MT=3).
+
         >>> expected = pd.MultiIndex.from_product([[9440], [1, 51] + list(sandy.redundant_xs[1])], names=["MAT", "MT"])
         >>> assert next(smps.iterate_xs_samples())[1].columns.equals(expected)
         
         If case one of the partial components already has samples, i.e., MT=2...
+
         >>> endf6 = sandy.get_endf6_file('jeff_33', 'xs', 942400)
         >>> smps = endf6.get_perturbations(1, njoy_kws=dict(err=1, chi=False, mubar=False, nubar=False, errorr33_kws=dict(mt=[1, 2, 51])))[33]
 
         Then the MT=1 samples are not passed to the partial components, which 
         in this case it means that MT=2 is not changed and MT=3 is not created.
+
         >>> expected = pd.MultiIndex.from_product([[9440], [1, 2, 51]], names=["MAT", "MT"])
         >>> assert next(smps.iterate_xs_samples())[1].columns.equals(expected)
         """
-        levels = sandy.Xs._columnsnames
+        levels = Xs._columnsnames
         df = self.data.unstack(level=levels)
         
         # -- Iterate over samples
@@ -209,7 +289,7 @@ def iterate_xs_samples(self):
             for mat in s.columns.get_level_values("MAT").unique():
                 
                 # -- Iterate redundant xs (from MT107 to MT1)
-                for k, v in sandy.redundant_xs.items():
+                for k, v in redundant_xs.items():
                     if not (mat, k) in s.columns:
                         continue
                     daughters = pd.MultiIndex.from_product([[mat], v], names=["MAT", "MT"])
@@ -242,24 +322,24 @@ def _mean_convergence(self):
         foo = lambda x: smp.loc[:x].mean()
         return pd.DataFrame(map(foo, rng), index=rng)
 
-    def from_excel(file, beg=None, end=None):
+    def from_excel(file, beg=None, end=None):   # this should be corrected as a class method
         """
         Read perturbation coefficients (for nubar and xs) from excel file.
         The file format is compatible with what written in
-        :obj: `~sandy.cov.CategoryCov.sampling`
+        :obj:`~sandy.cov.CategoryCov.sampling`
         
         Parameters
         ----------
         file : `str`
-            excel file name.
+            Excel file name.
         beg : `int`
-            first sample to consider. Default is first in dataframe.
+            First sample to consider. Default is first in dataframe.
         end : `int`
-            last samples to conisder. Default is last in dataframe.
+            Last samples to conisder. Default is last in dataframe.
 
         Returns
         -------
-        smp : :obj: `Samples`
+        smp : :obj:`~sandy.core.samples.Samples`
             Samples dataframe.
         """
         df = pd.read_excel(file, sheet_name="SMP")
@@ -275,5 +355,5 @@ def from_excel(file, beg=None, end=None):
 
         df = df.iloc[:, loc+2:].loc[:, beg:end].reset_index(drop=True)
         df.index = idx
-        smp = sandy.Samples(df)
+        smp = Samples(df)
         return smp