From 1b6b20cb6d4a1e8fb5f114dc6766512cd655fa89 Mon Sep 17 00:00:00 2001
From: "wouteredeling@gmail.com" <wouteredeling@gmail.com>
Date: Thu, 1 Oct 2020 13:00:12 +0200
Subject: [PATCH 1/7] more efficient eval confidence intervals

---
 easyvvuq/analysis/qmc_analysis.py | 31 ++++++++++++-------------------
 easyvvuq/sampling/mc_sampler.py   |  2 +-
 2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/easyvvuq/analysis/qmc_analysis.py b/easyvvuq/analysis/qmc_analysis.py
index 8edb3214a..67b22a949 100644
--- a/easyvvuq/analysis/qmc_analysis.py
+++ b/easyvvuq/analysis/qmc_analysis.py
@@ -5,6 +5,7 @@
 """
 import logging
 import numpy as np
+from scipy.stats import norm
 from easyvvuq import OutputType
 from .base import BaseAnalysisElement
 from easyvvuq.sampling import QMCSampler
@@ -157,33 +158,25 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000):
         sobols_total_dict = {}
         conf_total_dict = {}
 
+        # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params
+        # see reference above.
+        f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc)
+        r = np.random.randint(n_mc, size=(n_mc, n_samples))
+
         for j, param_name in enumerate(self.sampler.vary.get_keys()):
-            # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params
-            # see reference above.
-            f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc)
             # our point estimate for the 1st and total order Sobol indices
             value_first = self._first_order(f_M2, f_M1, f_Ni[:, j])
             value_total = self._total_order(f_M2, f_M1, f_Ni[:, j])
-            # array for resampled estimates
-            sobols_first = np.zeros([n_samples, n_qoi])
-            sobols_total = np.zeros([n_samples, n_qoi])
-            for i in range(n_samples):
-                # resample, must be done on already seperated output due to
-                # the specific order in samples
-                idx = np.random.randint(0, n_mc - 1, n_mc)
-                f_M2_resample = f_M2[idx]
-                f_M1_resample = f_M1[idx]
-                f_Ni_resample = f_Ni[idx]
-                # recompute Sobol indices
-                sobols_first[i] = self._first_order(f_M2_resample, f_M1_resample,
-                                                    f_Ni_resample[:, j])
-                sobols_total[i] = self._total_order(f_M2_resample, f_M1_resample,
-                                                    f_Ni_resample[:, j])
-            # compute confidence intervals
+            #sobols computed from resampled data points
+            sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j])
+            sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j])
+
+            # compute confidence intervals based on percentiles
             _, low_first, high_first = confidence_interval(sobols_first, value_first,
                                                            alpha, pivotal=True)
             _, low_total, high_total = confidence_interval(sobols_total, value_total,
                                                            alpha, pivotal=True)
+
             # store results
             sobols_first_dict[param_name] = value_first
             conf_first_dict[param_name] = {'low': low_first, 'high': high_first}
diff --git a/easyvvuq/sampling/mc_sampler.py b/easyvvuq/sampling/mc_sampler.py
index de9ee2f12..a6bfa2929 100644
--- a/easyvvuq/sampling/mc_sampler.py
+++ b/easyvvuq/sampling/mc_sampler.py
@@ -113,7 +113,7 @@ def saltelli(self, n_mc):
         self.xi_mc[(step - 1):self.max_num:step] = M_1
         # store N_i entries between M2 and M1
         for i in range(self.n_params):
-            N_i = np.array(M_2)
+            N_i = np.copy(M_2)
             # N_i = M2 with i-th colum from M1
             N_i[:, i] = M_1[:, i]
             self.xi_mc[(i + 1):self.max_num:step] = N_i

From fcff1c3fdf2b1c0e74c2f93acadd7a3e61a9a4a8 Mon Sep 17 00:00:00 2001
From: "wouteredeling@gmail.com" <wouteredeling@gmail.com>
Date: Thu, 1 Oct 2020 13:01:05 +0200
Subject: [PATCH 2/7] fix pepe8

---
 easyvvuq/analysis/qmc_analysis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/easyvvuq/analysis/qmc_analysis.py b/easyvvuq/analysis/qmc_analysis.py
index 67b22a949..ea603ba21 100644
--- a/easyvvuq/analysis/qmc_analysis.py
+++ b/easyvvuq/analysis/qmc_analysis.py
@@ -167,7 +167,7 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000):
             # our point estimate for the 1st and total order Sobol indices
             value_first = self._first_order(f_M2, f_M1, f_Ni[:, j])
             value_total = self._total_order(f_M2, f_M1, f_Ni[:, j])
-            #sobols computed from resampled data points
+            # sobols computed from resampled data points
             sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j])
             sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j])
 

From 8fa541a9cd01867582466268353bd90d8d029a48 Mon Sep 17 00:00:00 2001
From: "wouteredeling@gmail.com" <wouteredeling@gmail.com>
Date: Thu, 1 Oct 2020 15:27:22 +0200
Subject: [PATCH 3/7] mod test

---
 tests/test_mc_analysis.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/test_mc_analysis.py b/tests/test_mc_analysis.py
index 546724e03..7405c965d 100644
--- a/tests/test_mc_analysis.py
+++ b/tests/test_mc_analysis.py
@@ -75,14 +75,14 @@ def test_sobol_bootstrap(data):
     assert(s1['x2'] == pytest.approx(0.20727553481694053, 0.01))
     assert(st['x1'] == pytest.approx(0.8132793654841785, 0.01))
     assert(st['x2'] == pytest.approx(0.3804962894947435, 0.01))
-    assert(s1_conf['x1']['low'][0] == pytest.approx(0.14387035, 0.01))
-    assert(s1_conf['x1']['high'][0] == pytest.approx(0.89428774, 0.01))
-    assert(s1_conf['x2']['low'][0] == pytest.approx(-0.11063341, 0.01))
-    assert(s1_conf['x2']['high'][0] == pytest.approx(0.46752829, 0.01))
-    assert(st_conf['x1']['low'][0] == pytest.approx(0.61368887, 0.01))
-    assert(st_conf['x1']['high'][0] == pytest.approx(1.01858671, 0.01))
-    assert(st_conf['x2']['low'][0] == pytest.approx(0.24361207, 0.01))
-    assert(st_conf['x2']['high'][0] == pytest.approx(0.49214117, 0.01))
+    assert(s1_conf['x1']['low'] == pytest.approx(0.14387035, 0.01))
+    assert(s1_conf['x1']['high'] == pytest.approx(0.89428774, 0.01))
+    assert(s1_conf['x2']['low'] == pytest.approx(-0.11063341, 0.01))
+    assert(s1_conf['x2']['high'] == pytest.approx(0.46752829, 0.01))
+    assert(st_conf['x1']['low'] == pytest.approx(0.61368887, 0.01))
+    assert(st_conf['x1']['high'] == pytest.approx(1.01858671, 0.01))
+    assert(st_conf['x2']['low'] == pytest.approx(0.24361207, 0.01))
+    assert(st_conf['x2']['high'] == pytest.approx(0.49214117, 0.01))
 
 
 def test_separate_output_values(data):

From d6a7f9ee47458a53b8579f45b3b19e31d2b7d145 Mon Sep 17 00:00:00 2001
From: "wouteredeling@gmail.com" <wouteredeling@gmail.com>
Date: Wed, 28 Oct 2020 08:29:57 +0100
Subject: [PATCH 4/7] added HDF5 collator

---
 easyvvuq/analysis/qmc_analysis.py  |  34 +++++--
 easyvvuq/collate/__init__.py       |   1 +
 easyvvuq/collate/aggregate_hdf5.py | 157 +++++++++++++++++++++++++++++
 3 files changed, 182 insertions(+), 10 deletions(-)
 create mode 100644 easyvvuq/collate/aggregate_hdf5.py

diff --git a/easyvvuq/analysis/qmc_analysis.py b/easyvvuq/analysis/qmc_analysis.py
index ea603ba21..ee82b3dba 100644
--- a/easyvvuq/analysis/qmc_analysis.py
+++ b/easyvvuq/analysis/qmc_analysis.py
@@ -5,6 +5,7 @@
 """
 import logging
 import numpy as np
+import pandas as pd
 from scipy.stats import norm
 from easyvvuq import OutputType
 from .base import BaseAnalysisElement
@@ -64,7 +65,7 @@ def analyse(self, data_frame):
             ['statistical_moments', 'percentiles', 'sobol_indices',
              'correlation_matrices', 'output_distributions']
         """
-        if data_frame.empty:
+        if type(data_frame) == pd.DataFrame and data_frame.empty:
             raise RuntimeError(
                 "No data in data frame passed to analyse element")
 
@@ -101,7 +102,7 @@ def get_samples(self, data_frame):
 
         Parameters
         ----------
-        data_frame : the EasyVVUQ Pandas dataframe.
+        data_frame : the EasyVVUQ Pandas or HDF5 dataframe.
 
         Returns
         -------
@@ -110,13 +111,20 @@ def get_samples(self, data_frame):
 
         """
         samples = {k: [] for k in self.qoi_cols}
-        for run_id in data_frame.run_id.unique():
+        if type(data_frame) == pd.DataFrame:
+            for run_id in data_frame.run_id.unique():
+                for k in self.qoi_cols:
+                    values = data_frame.loc[data_frame['run_id'] == run_id][k].values
+                    samples[k].append(values)
+        elif type(data_frame) == dict:
+            #dict is not sorted, make sure the runs are processed in ascending order
             for k in self.qoi_cols:
-                data = data_frame.loc[data_frame['run_id'] == run_id][k]
-                samples[k].append(data.values)
+                run_id_int = [int(run_id.split('Run_')[-1]) for run_id in data_frame[k].keys()]
+                for run_id in range(1, np.max(run_id_int) + 1):
+                        samples[k].append(data_frame[k]['Run_' + str(run_id)])
         return samples
 
-    def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000):
+    def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000):
         """
         Computes the first order and total order Sobol indices using Saltelli's
         method. To assess the sampling inaccuracy, bootstrap confidence intervals
@@ -144,7 +152,7 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000):
         assert len(samples) > 0
         assert alpha > 0.0
         assert alpha < 1.0
-        assert n_samples > 0
+        assert n_bootstrap > 0
 
         # convert to array
         samples = np.array(samples)
@@ -152,7 +160,7 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000):
         # and the size of the QoI
         n_params = self.sampler.n_params
         n_mc = self.sampler.n_mc_samples
-        n_qoi = samples[0].size
+        # n_qoi = samples[0].size
         sobols_first_dict = {}
         conf_first_dict = {}
         sobols_total_dict = {}
@@ -161,12 +169,14 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000):
         # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params
         # see reference above.
         f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc)
-        r = np.random.randint(n_mc, size=(n_mc, n_samples))
+        r = np.random.randint(n_mc, size=(n_mc, n_bootstrap))
 
         for j, param_name in enumerate(self.sampler.vary.get_keys()):
+
             # our point estimate for the 1st and total order Sobol indices
             value_first = self._first_order(f_M2, f_M1, f_Ni[:, j])
             value_total = self._total_order(f_M2, f_M1, f_Ni[:, j])
+
             # sobols computed from resampled data points
             sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j])
             sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j])
@@ -176,7 +186,7 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000):
                                                            alpha, pivotal=True)
             _, low_total, high_total = confidence_interval(sobols_total, value_total,
                                                            alpha, pivotal=True)
-
+            print('e')
             # store results
             sobols_first_dict[param_name] = value_first
             conf_first_dict[param_name] = {'low': low_first, 'high': high_first}
@@ -228,6 +238,7 @@ def _separate_output_values(samples, n_params, n_mc_samples):
 
         return f_M2, f_M1, f_Ni
 
+    # Adapted from SALib
     @staticmethod
     def _first_order(f_M2, f_M1, f_Ni):
         """Calculate first order sensitivity indices.
@@ -245,9 +256,12 @@ def _first_order(f_M2, f_M1, f_Ni):
         -------
         A NumPy array of the n_params first-order Sobol indices.
         """
+        print('a')
         V = np.var(np.r_[f_M2, f_M1], axis=0)
+        print('b')
         return np.mean(f_M1 * (f_Ni - f_M2), axis=0) / (V + (V == 0)) * (V != 0)
 
+    # Adapted from SALib
     @staticmethod
     def _total_order(f_M2, f_M1, f_Ni):
         """Calculate total order sensitivity indices. See also:
diff --git a/easyvvuq/collate/__init__.py b/easyvvuq/collate/__init__.py
index 833b81632..42411d31e 100644
--- a/easyvvuq/collate/__init__.py
+++ b/easyvvuq/collate/__init__.py
@@ -1,3 +1,4 @@
 from .base import BaseCollationElement
 from .aggregate_samples import AggregateSamples
 from .aggregate_by_variables import AggregateByVariables
+from .aggregate_hdf5 import AggregateHDF5
diff --git a/easyvvuq/collate/aggregate_hdf5.py b/easyvvuq/collate/aggregate_hdf5.py
new file mode 100644
index 000000000..a3f3dc1b2
--- /dev/null
+++ b/easyvvuq/collate/aggregate_hdf5.py
@@ -0,0 +1,157 @@
+"""Provides an element for aggregation of results from all complete runs.
+"""
+
+import os
+from .base import BaseCollationElement
+from easyvvuq import OutputType, constants
+import h5py
+
+__copyright__ = """
+
+    Copyright 2018 Robin A. Richardson, David W. Wright
+
+    This file is part of EasyVVUQ
+
+    EasyVVUQ is free software: you can redistribute it and/or modify
+    it under the terms of the Lesser GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    EasyVVUQ is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    Lesser GNU General Public License for more details.
+
+    You should have received a copy of the Lesser GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+"""
+__license__ = "LGPL"
+
+
+class AggregateHDF5(BaseCollationElement, collater_name="aggregate_hdf5"):
+    """
+    Aggregate the results of all completed simulations described by the
+    Campaign. Will simply concatenate all simulation data into one big
+    HDF5 DataFrame. This dataframe is stored in the campaign directory.
+
+    """
+
+    def collate(self, campaign, app_id):
+        """
+        Collected the decoded run results for all completed runs with ENCODED status
+
+        Parameters
+        ----------
+        campaign : :obj:`easyvvuq.campaign.Campaign`
+            EasyVVUQ coordination object from which to get information on runs
+            to be collated.
+
+        Returns
+        -------
+        `int`:
+            The number of new data rows added during collation
+        """
+
+        print('Collating data...')
+        decoder = campaign._active_app_decoder
+
+        if decoder.output_type != OutputType.SAMPLE:
+            raise RuntimeError('Can only aggregate sample type data')
+
+        # Aggregate any uncollated runs into a dataframe (for appending to existing full df)
+        new_data = {}
+        qoi_cols = campaign._active_app_decoder.output_columns
+
+        # Loop through all runs with status ENCODED (and therefore not yet COLLATED)
+        processed_run_IDs = []
+        for run_id, run_info in campaign.campaign_db.runs(
+                status=constants.Status.ENCODED, app_id=app_id):
+
+            # Use decoder to check if run has completed (in general application-specific)
+            if decoder.sim_complete(run_info=run_info):
+
+                run_data = decoder.parse_sim_output(run_info=run_info)
+                new_data[run_id] = {}
+                for qoi in qoi_cols:
+                    new_data[run_id][qoi] = run_data[qoi].values
+                processed_run_IDs.append(run_id)
+
+        self.append_data(campaign, new_data)
+        campaign.campaign_db.set_run_statuses(processed_run_IDs, constants.Status.COLLATED)
+        print('done.')
+
+        return len(processed_run_IDs)
+
+    def append_data(self, campaign, new_data):
+        """
+
+        Parameters
+        ----------
+        campaign : EasyVVUQ campaign object
+        new_data : a dictionary containing the new simulation results for
+        each quantity of interest. The results are appended to the HDF5 file
+        samples.hdf5, located in the Campaign work directory
+
+        Returns
+        -------
+        None.
+
+        """
+        hdf5_file = os.path.join(campaign.campaign_dir, 'samples.hdf5')
+        #retrieve the names of the quantities of interest
+        qoi_cols = campaign._active_app_decoder.output_columns
+
+        h5f = h5py.File(hdf5_file, 'a')
+        for qoi in qoi_cols:
+            #Each QoI is stored in a separate group
+            if not qoi in h5f.keys():
+                h5f.create_group(qoi)
+            #Each QoI sample is stored in a dataset qoi/run_id
+            for run_id in new_data.keys():
+                h5f.create_dataset(qoi + '/' + run_id, data=new_data[run_id][qoi])
+        h5f.close()
+
+    def get_collated_dataframe(self, campaign, app_id):
+        """
+        Read the samples from the HDF5 file samples.hdf5, and store them in
+        a dictionary to be used in the analysis classes.
+
+        Parameters
+        ----------
+        campaign : EasyVVUQ Campaign object
+        app_id : the EasyVVUQ app id.
+
+        Returns
+        -------
+        data : the simulation samples stored in a dictionary. Format of the
+        dict is data[qoi][run_id], e.g. data['velocity']['Run_1']
+
+        """
+
+        hdf5_file = os.path.join(campaign.campaign_dir, 'samples.hdf5')
+        qoi_cols = campaign._active_app_decoder.output_columns
+        h5f = h5py.File(hdf5_file, 'r')
+        data = {}
+        for qoi in qoi_cols:
+            data[qoi] = {}
+            for run_id in h5f[qoi].keys():
+                data[qoi][run_id] = h5f[qoi + '/' + run_id][()]
+        h5f.close()
+        return data
+
+    def element_version(self):
+        return "0.1"
+
+    def is_restartable(self):
+        return True
+
+    def get_restart_dict(self):
+        """Return dict required for restart from serlialized form.
+
+        Returns
+        -------
+        dict:
+            No restart parameters required
+        """
+        return {}

From 0c6e52316f4711ad101df04bb8ead6cda257f05f Mon Sep 17 00:00:00 2001
From: "wouteredeling@gmail.com" <wouteredeling@gmail.com>
Date: Wed, 28 Oct 2020 08:33:02 +0100
Subject: [PATCH 5/7] lower n_bootstrap

---
 easyvvuq/analysis/qmc_analysis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/easyvvuq/analysis/qmc_analysis.py b/easyvvuq/analysis/qmc_analysis.py
index ee82b3dba..f9391e298 100644
--- a/easyvvuq/analysis/qmc_analysis.py
+++ b/easyvvuq/analysis/qmc_analysis.py
@@ -124,7 +124,7 @@ def get_samples(self, data_frame):
                         samples[k].append(data_frame[k]['Run_' + str(run_id)])
         return samples
 
-    def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000):
+    def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=100):
         """
         Computes the first order and total order Sobol indices using Saltelli's
         method. To assess the sampling inaccuracy, bootstrap confidence intervals

From a60dae1930b23fe8453214206941d13b53a0ccd5 Mon Sep 17 00:00:00 2001
From: "wouteredeling@gmail.com" <wouteredeling@gmail.com>
Date: Thu, 29 Oct 2020 15:35:58 +0100
Subject: [PATCH 6/7] added a sequantial implementation of the bootstrap conf
 intervals

---
 easyvvuq/analysis/qmc_analysis.py | 43 ++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/easyvvuq/analysis/qmc_analysis.py b/easyvvuq/analysis/qmc_analysis.py
index f9391e298..fc406e207 100644
--- a/easyvvuq/analysis/qmc_analysis.py
+++ b/easyvvuq/analysis/qmc_analysis.py
@@ -50,7 +50,7 @@ def element_version(self):
         """Version of this element"""
         return "0.2"
 
-    def analyse(self, data_frame):
+    def analyse(self, data_frame, **kwargs):
         """Perform QMC analysis on a given pandas DataFrame.
 
         Parameters
@@ -80,7 +80,7 @@ def analyse(self, data_frame):
         }
 
         # Extract output values for each quantity of interest from Dataframe
-        samples = self.get_samples(data_frame)
+        samples = self.get_samples(data_frame, **kwargs)
 
         # Compute descriptive statistics for each quantity of interest
         for k in qoi_cols:
@@ -96,13 +96,13 @@ def analyse(self, data_frame):
 
         return results
 
-    def get_samples(self, data_frame):
+    def get_samples(self, data_frame, **kwargs):
         """
         Converts the Pandas dataframe into a dictionary.
 
         Parameters
         ----------
-        data_frame : the EasyVVUQ Pandas or HDF5 dataframe.
+        data_frame : the EasyVVUQ Pandas or dictonary dataframe.
 
         Returns
         -------
@@ -114,17 +114,23 @@ def get_samples(self, data_frame):
         if type(data_frame) == pd.DataFrame:
             for run_id in data_frame.run_id.unique():
                 for k in self.qoi_cols:
-                    values = data_frame.loc[data_frame['run_id'] == run_id][k].values
+                    if 'output_index' in kwargs:
+                        values = data_frame.loc[data_frame['run_id'] == run_id][k].values[kwargs['output_index']]
+                    else:
+                        values = data_frame.loc[data_frame['run_id'] == run_id][k].values
                     samples[k].append(values)
         elif type(data_frame) == dict:
             #dict is not sorted, make sure the runs are processed in ascending order
             for k in self.qoi_cols:
                 run_id_int = [int(run_id.split('Run_')[-1]) for run_id in data_frame[k].keys()]
                 for run_id in range(1, np.max(run_id_int) + 1):
+                    if 'output_index' in kwargs:
+                        samples[k].append(data_frame[k]['Run_' + str(run_id)][kwargs['output_index']])                       
+                    else:
                         samples[k].append(data_frame[k]['Run_' + str(run_id)])
         return samples
 
-    def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=100):
+    def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000):
         """
         Computes the first order and total order Sobol indices using Saltelli's
         method. To assess the sampling inaccuracy, bootstrap confidence intervals
@@ -160,7 +166,7 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=100):
         # and the size of the QoI
         n_params = self.sampler.n_params
         n_mc = self.sampler.n_mc_samples
-        # n_qoi = samples[0].size
+        n_qoi = samples[0].size
         sobols_first_dict = {}
         conf_first_dict = {}
         sobols_total_dict = {}
@@ -178,15 +184,30 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=100):
             value_total = self._total_order(f_M2, f_M1, f_Ni[:, j])
 
             # sobols computed from resampled data points
-            sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j])
-            sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j])
+            if n_mc * n_bootstrap * n_qoi <= 10**7:
+                #this is a vectorized computation, Is fast, but f_M2[r] will be of size
+                #(n_mc, n_bootstrap, n_qoi), this can become too large and cause a crash, 
+                #especially when dealing with large QoI (n_qoi >> 1). So this is only done
+                #when n_mc * n_bootstrap * n_qoi <= 10**7
+                print("Vectorized bootstrapping")
+                sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j])
+                sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j])
+            else:
+                #array for resampled estimates
+                sobols_first = np.zeros([n_bootstrap, n_qoi])
+                sobols_total = np.zeros([n_bootstrap, n_qoi])
+                print("Sequential bootstrapping")
+                #non-vectorized implementation
+                for i in range(n_bootstrap):
+                    #resampled sample matrices of size (n_mc, n_qoi)
+                    sobols_first[i] = self._first_order(f_M2[r[i]], f_M1[r[i]], f_Ni[r[i], j])
+                    sobols_total[i] = self._total_order(f_M2[r[i]], f_M1[r[i]], f_Ni[r[i], j])
 
             # compute confidence intervals based on percentiles
             _, low_first, high_first = confidence_interval(sobols_first, value_first,
                                                            alpha, pivotal=True)
             _, low_total, high_total = confidence_interval(sobols_total, value_total,
                                                            alpha, pivotal=True)
-            print('e')
             # store results
             sobols_first_dict[param_name] = value_first
             conf_first_dict[param_name] = {'low': low_first, 'high': high_first}
@@ -256,9 +277,7 @@ def _first_order(f_M2, f_M1, f_Ni):
         -------
         A NumPy array of the n_params first-order Sobol indices.
         """
-        print('a')
         V = np.var(np.r_[f_M2, f_M1], axis=0)
-        print('b')
         return np.mean(f_M1 * (f_Ni - f_M2), axis=0) / (V + (V == 0)) * (V != 0)
 
     # Adapted from SALib

From 89aafa00ae43f7ed435d4478bc4e444cf192be03 Mon Sep 17 00:00:00 2001
From: "wouteredeling@gmail.com" <wouteredeling@gmail.com>
Date: Fri, 30 Oct 2020 15:37:53 +0100
Subject: [PATCH 7/7] added merge_campaign subroutine

---
 easyvvuq/analysis/qmc_analysis.py | 44 ++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 4 deletions(-)

diff --git a/easyvvuq/analysis/qmc_analysis.py b/easyvvuq/analysis/qmc_analysis.py
index fc406e207..8db62f2e3 100644
--- a/easyvvuq/analysis/qmc_analysis.py
+++ b/easyvvuq/analysis/qmc_analysis.py
@@ -69,6 +69,16 @@ def analyse(self, data_frame, **kwargs):
             raise RuntimeError(
                 "No data in data frame passed to analyse element")
 
+        # Extract output values for each quantity of interest from Dataframe
+        samples = self.get_samples(data_frame, **kwargs)
+        
+        return self.compute_results(samples)
+
+    def compute_results(self, samples):
+        """
+        
+        """
+        
         qoi_cols = self.qoi_cols
 
         results = {
@@ -79,9 +89,6 @@ def analyse(self, data_frame, **kwargs):
             'conf_sobols_total': {k: {} for k in qoi_cols}
         }
 
-        # Extract output values for each quantity of interest from Dataframe
-        samples = self.get_samples(data_frame, **kwargs)
-
         # Compute descriptive statistics for each quantity of interest
         for k in qoi_cols:
             results['statistical_moments'][k] = {'mean': np.mean(samples[k], axis=0),
@@ -129,7 +136,35 @@ def get_samples(self, data_frame, **kwargs):
                     else:
                         samples[k].append(data_frame[k]['Run_' + str(run_id)])
         return samples
+    
+    def merge_campaigns(self, data_frames):
+        """
+        Merge the dataframes of multiple (Q)MC campaigns, and then compute the results.
+        Only use this if the same vary dict was used in the campaigns.
+
+        Parameters
+        ----------
+        data_frames : list of EasyVVUQ data frames
+
+        Returns
+        -------
+        results : the results dictionary computed from the merged sample sets
 
+        """
+
+        assert type(data_frames) is list
+
+        samples = {k: [] for k in self.qoi_cols}
+        #loop over all data frames
+        for data_frame in data_frames:
+            #convert each data frame into a dict
+            sample_dict = self.get_samples(data_frame)
+            #for each QoI in the dict, merge the lists of samples
+            for k in self.qoi_cols:
+                samples[k] += sample_dict[k]
+        #compute the results using the merged sample set
+        return self.compute_results(samples)
+                
     def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000):
         """
         Computes the first order and total order Sobol indices using Saltelli's
@@ -165,7 +200,8 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000):
         # the number of parameter and the number of MC samples in n_mc * (n_params + 2)
         # and the size of the QoI
         n_params = self.sampler.n_params
-        n_mc = self.sampler.n_mc_samples
+        # n_mc = self.sampler.n_mc_samples
+        n_mc = int(samples.shape[0]/(n_params + 2))
         n_qoi = samples[0].size
         sobols_first_dict = {}
         conf_first_dict = {}