From 1b6b20cb6d4a1e8fb5f114dc6766512cd655fa89 Mon Sep 17 00:00:00 2001 From: "wouteredeling@gmail.com" Date: Thu, 1 Oct 2020 13:00:12 +0200 Subject: [PATCH 1/7] more efficient eval confidence intervals --- easyvvuq/analysis/qmc_analysis.py | 31 ++++++++++++------------------- easyvvuq/sampling/mc_sampler.py | 2 +- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/easyvvuq/analysis/qmc_analysis.py b/easyvvuq/analysis/qmc_analysis.py index 8edb3214a..67b22a949 100644 --- a/easyvvuq/analysis/qmc_analysis.py +++ b/easyvvuq/analysis/qmc_analysis.py @@ -5,6 +5,7 @@ """ import logging import numpy as np +from scipy.stats import norm from easyvvuq import OutputType from .base import BaseAnalysisElement from easyvvuq.sampling import QMCSampler @@ -157,33 +158,25 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000): sobols_total_dict = {} conf_total_dict = {} + # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params + # see reference above. + f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc) + r = np.random.randint(n_mc, size=(n_mc, n_samples)) + for j, param_name in enumerate(self.sampler.vary.get_keys()): - # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params - # see reference above. - f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc) # our point estimate for the 1st and total order Sobol indices value_first = self._first_order(f_M2, f_M1, f_Ni[:, j]) value_total = self._total_order(f_M2, f_M1, f_Ni[:, j]) - # array for resampled estimates - sobols_first = np.zeros([n_samples, n_qoi]) - sobols_total = np.zeros([n_samples, n_qoi]) - for i in range(n_samples): - # resample, must be done on already seperated output due to - # the specific order in samples - idx = np.random.randint(0, n_mc - 1, n_mc) - f_M2_resample = f_M2[idx] - f_M1_resample = f_M1[idx] - f_Ni_resample = f_Ni[idx] - # recompute Sobol indices - sobols_first[i] = self._first_order(f_M2_resample, f_M1_resample, - f_Ni_resample[:, j]) - sobols_total[i] = self._total_order(f_M2_resample, f_M1_resample, - f_Ni_resample[:, j]) - # compute confidence intervals + #sobols computed from resampled data points + sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j]) + sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j]) + + # compute confidence intervals based on percentiles _, low_first, high_first = confidence_interval(sobols_first, value_first, alpha, pivotal=True) _, low_total, high_total = confidence_interval(sobols_total, value_total, alpha, pivotal=True) + # store results sobols_first_dict[param_name] = value_first conf_first_dict[param_name] = {'low': low_first, 'high': high_first} diff --git a/easyvvuq/sampling/mc_sampler.py b/easyvvuq/sampling/mc_sampler.py index de9ee2f12..a6bfa2929 100644 --- a/easyvvuq/sampling/mc_sampler.py +++ b/easyvvuq/sampling/mc_sampler.py @@ -113,7 +113,7 @@ def saltelli(self, n_mc): self.xi_mc[(step - 1):self.max_num:step] = M_1 # store N_i entries between M2 and M1 for i in range(self.n_params): - N_i = np.array(M_2) + N_i = np.copy(M_2) # N_i = M2 with i-th colum from M1 N_i[:, i] = M_1[:, i] self.xi_mc[(i + 1):self.max_num:step] = N_i From fcff1c3fdf2b1c0e74c2f93acadd7a3e61a9a4a8 Mon Sep 17 00:00:00 2001 From: "wouteredeling@gmail.com" Date: Thu, 1 Oct 2020 13:01:05 +0200 Subject: [PATCH 2/7] fix pepe8 --- easyvvuq/analysis/qmc_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easyvvuq/analysis/qmc_analysis.py b/easyvvuq/analysis/qmc_analysis.py index 67b22a949..ea603ba21 100644 --- a/easyvvuq/analysis/qmc_analysis.py +++ b/easyvvuq/analysis/qmc_analysis.py @@ -167,7 +167,7 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000): # our point estimate for the 1st and total order Sobol indices value_first = self._first_order(f_M2, f_M1, f_Ni[:, j]) value_total = self._total_order(f_M2, f_M1, f_Ni[:, j]) - #sobols computed from resampled data points + # sobols computed from resampled data points sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j]) sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j]) From 8fa541a9cd01867582466268353bd90d8d029a48 Mon Sep 17 00:00:00 2001 From: "wouteredeling@gmail.com" Date: Thu, 1 Oct 2020 15:27:22 +0200 Subject: [PATCH 3/7] mod test --- tests/test_mc_analysis.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_mc_analysis.py b/tests/test_mc_analysis.py index 546724e03..7405c965d 100644 --- a/tests/test_mc_analysis.py +++ b/tests/test_mc_analysis.py @@ -75,14 +75,14 @@ def test_sobol_bootstrap(data): assert(s1['x2'] == pytest.approx(0.20727553481694053, 0.01)) assert(st['x1'] == pytest.approx(0.8132793654841785, 0.01)) assert(st['x2'] == pytest.approx(0.3804962894947435, 0.01)) - assert(s1_conf['x1']['low'][0] == pytest.approx(0.14387035, 0.01)) - assert(s1_conf['x1']['high'][0] == pytest.approx(0.89428774, 0.01)) - assert(s1_conf['x2']['low'][0] == pytest.approx(-0.11063341, 0.01)) - assert(s1_conf['x2']['high'][0] == pytest.approx(0.46752829, 0.01)) - assert(st_conf['x1']['low'][0] == pytest.approx(0.61368887, 0.01)) - assert(st_conf['x1']['high'][0] == pytest.approx(1.01858671, 0.01)) - assert(st_conf['x2']['low'][0] == pytest.approx(0.24361207, 0.01)) - assert(st_conf['x2']['high'][0] == pytest.approx(0.49214117, 0.01)) + assert(s1_conf['x1']['low'] == pytest.approx(0.14387035, 0.01)) + assert(s1_conf['x1']['high'] == pytest.approx(0.89428774, 0.01)) + assert(s1_conf['x2']['low'] == pytest.approx(-0.11063341, 0.01)) + assert(s1_conf['x2']['high'] == pytest.approx(0.46752829, 0.01)) + assert(st_conf['x1']['low'] == pytest.approx(0.61368887, 0.01)) + assert(st_conf['x1']['high'] == pytest.approx(1.01858671, 0.01)) + assert(st_conf['x2']['low'] == pytest.approx(0.24361207, 0.01)) + assert(st_conf['x2']['high'] == pytest.approx(0.49214117, 0.01)) def test_separate_output_values(data): From d6a7f9ee47458a53b8579f45b3b19e31d2b7d145 Mon Sep 17 00:00:00 2001 From: "wouteredeling@gmail.com" Date: Wed, 28 Oct 2020 08:29:57 +0100 Subject: [PATCH 4/7] added HDF5 collator --- easyvvuq/analysis/qmc_analysis.py | 34 +++++-- easyvvuq/collate/__init__.py | 1 + easyvvuq/collate/aggregate_hdf5.py | 157 +++++++++++++++++++++++++++++ 3 files changed, 182 insertions(+), 10 deletions(-) create mode 100644 easyvvuq/collate/aggregate_hdf5.py diff --git a/easyvvuq/analysis/qmc_analysis.py b/easyvvuq/analysis/qmc_analysis.py index ea603ba21..ee82b3dba 100644 --- a/easyvvuq/analysis/qmc_analysis.py +++ b/easyvvuq/analysis/qmc_analysis.py @@ -5,6 +5,7 @@ """ import logging import numpy as np +import pandas as pd from scipy.stats import norm from easyvvuq import OutputType from .base import BaseAnalysisElement @@ -64,7 +65,7 @@ def analyse(self, data_frame): ['statistical_moments', 'percentiles', 'sobol_indices', 'correlation_matrices', 'output_distributions'] """ - if data_frame.empty: + if type(data_frame) == pd.DataFrame and data_frame.empty: raise RuntimeError( "No data in data frame passed to analyse element") @@ -101,7 +102,7 @@ def get_samples(self, data_frame): Parameters ---------- - data_frame : the EasyVVUQ Pandas dataframe. + data_frame : the EasyVVUQ Pandas or HDF5 dataframe. Returns ------- @@ -110,13 +111,20 @@ def get_samples(self, data_frame): """ samples = {k: [] for k in self.qoi_cols} - for run_id in data_frame.run_id.unique(): + if type(data_frame) == pd.DataFrame: + for run_id in data_frame.run_id.unique(): + for k in self.qoi_cols: + values = data_frame.loc[data_frame['run_id'] == run_id][k].values + samples[k].append(values) + elif type(data_frame) == dict: + #dict is not sorted, make sure the runs are processed in ascending order for k in self.qoi_cols: - data = data_frame.loc[data_frame['run_id'] == run_id][k] - samples[k].append(data.values) + run_id_int = [int(run_id.split('Run_')[-1]) for run_id in data_frame[k].keys()] + for run_id in range(1, np.max(run_id_int) + 1): + samples[k].append(data_frame[k]['Run_' + str(run_id)]) return samples - def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000): + def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000): """ Computes the first order and total order Sobol indices using Saltelli's method. To assess the sampling inaccuracy, bootstrap confidence intervals @@ -144,7 +152,7 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000): assert len(samples) > 0 assert alpha > 0.0 assert alpha < 1.0 - assert n_samples > 0 + assert n_bootstrap > 0 # convert to array samples = np.array(samples) @@ -152,7 +160,7 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000): # and the size of the QoI n_params = self.sampler.n_params n_mc = self.sampler.n_mc_samples - n_qoi = samples[0].size + # n_qoi = samples[0].size sobols_first_dict = {} conf_first_dict = {} sobols_total_dict = {} @@ -161,12 +169,14 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000): # code evaluations of input matrices M1, M2 and Ni, i = 1,...,n_params # see reference above. f_M2, f_M1, f_Ni = self._separate_output_values(samples, n_params, n_mc) - r = np.random.randint(n_mc, size=(n_mc, n_samples)) + r = np.random.randint(n_mc, size=(n_mc, n_bootstrap)) for j, param_name in enumerate(self.sampler.vary.get_keys()): + # our point estimate for the 1st and total order Sobol indices value_first = self._first_order(f_M2, f_M1, f_Ni[:, j]) value_total = self._total_order(f_M2, f_M1, f_Ni[:, j]) + # sobols computed from resampled data points sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j]) sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j]) @@ -176,7 +186,7 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_samples=1000): alpha, pivotal=True) _, low_total, high_total = confidence_interval(sobols_total, value_total, alpha, pivotal=True) - + print('e') # store results sobols_first_dict[param_name] = value_first conf_first_dict[param_name] = {'low': low_first, 'high': high_first} @@ -228,6 +238,7 @@ def _separate_output_values(samples, n_params, n_mc_samples): return f_M2, f_M1, f_Ni + # Adapted from SALib @staticmethod def _first_order(f_M2, f_M1, f_Ni): """Calculate first order sensitivity indices. @@ -245,9 +256,12 @@ def _first_order(f_M2, f_M1, f_Ni): ------- A NumPy array of the n_params first-order Sobol indices. """ + print('a') V = np.var(np.r_[f_M2, f_M1], axis=0) + print('b') return np.mean(f_M1 * (f_Ni - f_M2), axis=0) / (V + (V == 0)) * (V != 0) + # Adapted from SALib @staticmethod def _total_order(f_M2, f_M1, f_Ni): """Calculate total order sensitivity indices. See also: diff --git a/easyvvuq/collate/__init__.py b/easyvvuq/collate/__init__.py index 833b81632..42411d31e 100644 --- a/easyvvuq/collate/__init__.py +++ b/easyvvuq/collate/__init__.py @@ -1,3 +1,4 @@ from .base import BaseCollationElement from .aggregate_samples import AggregateSamples from .aggregate_by_variables import AggregateByVariables +from .aggregate_hdf5 import AggregateHDF5 diff --git a/easyvvuq/collate/aggregate_hdf5.py b/easyvvuq/collate/aggregate_hdf5.py new file mode 100644 index 000000000..a3f3dc1b2 --- /dev/null +++ b/easyvvuq/collate/aggregate_hdf5.py @@ -0,0 +1,157 @@ +"""Provides an element for aggregation of results from all complete runs. +""" + +import os +from .base import BaseCollationElement +from easyvvuq import OutputType, constants +import h5py + +__copyright__ = """ + + Copyright 2018 Robin A. Richardson, David W. Wright + + This file is part of EasyVVUQ + + EasyVVUQ is free software: you can redistribute it and/or modify + it under the terms of the Lesser GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + EasyVVUQ is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + Lesser GNU General Public License for more details. + + You should have received a copy of the Lesser GNU General Public License + along with this program. If not, see . + +""" +__license__ = "LGPL" + + +class AggregateHDF5(BaseCollationElement, collater_name="aggregate_hdf5"): + """ + Aggregate the results of all completed simulations described by the + Campaign. Will simply concatenate all simulation data into one big + HDF5 DataFrame. This dataframe is stored in the campaign directory. + + """ + + def collate(self, campaign, app_id): + """ + Collected the decoded run results for all completed runs with ENCODED status + + Parameters + ---------- + campaign : :obj:`easyvvuq.campaign.Campaign` + EasyVVUQ coordination object from which to get information on runs + to be collated. + + Returns + ------- + `int`: + The number of new data rows added during collation + """ + + print('Collating data...') + decoder = campaign._active_app_decoder + + if decoder.output_type != OutputType.SAMPLE: + raise RuntimeError('Can only aggregate sample type data') + + # Aggregate any uncollated runs into a dataframe (for appending to existing full df) + new_data = {} + qoi_cols = campaign._active_app_decoder.output_columns + + # Loop through all runs with status ENCODED (and therefore not yet COLLATED) + processed_run_IDs = [] + for run_id, run_info in campaign.campaign_db.runs( + status=constants.Status.ENCODED, app_id=app_id): + + # Use decoder to check if run has completed (in general application-specific) + if decoder.sim_complete(run_info=run_info): + + run_data = decoder.parse_sim_output(run_info=run_info) + new_data[run_id] = {} + for qoi in qoi_cols: + new_data[run_id][qoi] = run_data[qoi].values + processed_run_IDs.append(run_id) + + self.append_data(campaign, new_data) + campaign.campaign_db.set_run_statuses(processed_run_IDs, constants.Status.COLLATED) + print('done.') + + return len(processed_run_IDs) + + def append_data(self, campaign, new_data): + """ + + Parameters + ---------- + campaign : EasyVVUQ campaign object + new_data : a dictionary containing the new simulation results for + each quantity of interest. The results are appended to the HDF5 file + samples.hdf5, located in the Campaign work directory + + Returns + ------- + None. + + """ + hdf5_file = os.path.join(campaign.campaign_dir, 'samples.hdf5') + #retrieve the names of the quantities of interest + qoi_cols = campaign._active_app_decoder.output_columns + + h5f = h5py.File(hdf5_file, 'a') + for qoi in qoi_cols: + #Each QoI is stored in a separate group + if not qoi in h5f.keys(): + h5f.create_group(qoi) + #Each QoI sample is stored in a dataset qoi/run_id + for run_id in new_data.keys(): + h5f.create_dataset(qoi + '/' + run_id, data=new_data[run_id][qoi]) + h5f.close() + + def get_collated_dataframe(self, campaign, app_id): + """ + Read the samples from the HDF5 file samples.hdf5, and store them in + a dictionary to be used in the analysis classes. + + Parameters + ---------- + campaign : EasyVVUQ Campaign object + app_id : the EasyVVUQ app id. + + Returns + ------- + data : the simulation samples stored in a dictionary. Format of the + dict is data[qoi][run_id], e.g. data['velocity']['Run_1'] + + """ + + hdf5_file = os.path.join(campaign.campaign_dir, 'samples.hdf5') + qoi_cols = campaign._active_app_decoder.output_columns + h5f = h5py.File(hdf5_file, 'r') + data = {} + for qoi in qoi_cols: + data[qoi] = {} + for run_id in h5f[qoi].keys(): + data[qoi][run_id] = h5f[qoi + '/' + run_id][()] + h5f.close() + return data + + def element_version(self): + return "0.1" + + def is_restartable(self): + return True + + def get_restart_dict(self): + """Return dict required for restart from serlialized form. + + Returns + ------- + dict: + No restart parameters required + """ + return {} From 0c6e52316f4711ad101df04bb8ead6cda257f05f Mon Sep 17 00:00:00 2001 From: "wouteredeling@gmail.com" Date: Wed, 28 Oct 2020 08:33:02 +0100 Subject: [PATCH 5/7] lower n_bootstrap --- easyvvuq/analysis/qmc_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easyvvuq/analysis/qmc_analysis.py b/easyvvuq/analysis/qmc_analysis.py index ee82b3dba..f9391e298 100644 --- a/easyvvuq/analysis/qmc_analysis.py +++ b/easyvvuq/analysis/qmc_analysis.py @@ -124,7 +124,7 @@ def get_samples(self, data_frame): samples[k].append(data_frame[k]['Run_' + str(run_id)]) return samples - def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000): + def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=100): """ Computes the first order and total order Sobol indices using Saltelli's method. To assess the sampling inaccuracy, bootstrap confidence intervals From a60dae1930b23fe8453214206941d13b53a0ccd5 Mon Sep 17 00:00:00 2001 From: "wouteredeling@gmail.com" Date: Thu, 29 Oct 2020 15:35:58 +0100 Subject: [PATCH 6/7] added a sequantial implementation of the bootstrap conf intervals --- easyvvuq/analysis/qmc_analysis.py | 43 ++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/easyvvuq/analysis/qmc_analysis.py b/easyvvuq/analysis/qmc_analysis.py index f9391e298..fc406e207 100644 --- a/easyvvuq/analysis/qmc_analysis.py +++ b/easyvvuq/analysis/qmc_analysis.py @@ -50,7 +50,7 @@ def element_version(self): """Version of this element""" return "0.2" - def analyse(self, data_frame): + def analyse(self, data_frame, **kwargs): """Perform QMC analysis on a given pandas DataFrame. Parameters @@ -80,7 +80,7 @@ def analyse(self, data_frame): } # Extract output values for each quantity of interest from Dataframe - samples = self.get_samples(data_frame) + samples = self.get_samples(data_frame, **kwargs) # Compute descriptive statistics for each quantity of interest for k in qoi_cols: @@ -96,13 +96,13 @@ def analyse(self, data_frame): return results - def get_samples(self, data_frame): + def get_samples(self, data_frame, **kwargs): """ Converts the Pandas dataframe into a dictionary. Parameters ---------- - data_frame : the EasyVVUQ Pandas or HDF5 dataframe. + data_frame : the EasyVVUQ Pandas or dictonary dataframe. Returns ------- @@ -114,17 +114,23 @@ def get_samples(self, data_frame): if type(data_frame) == pd.DataFrame: for run_id in data_frame.run_id.unique(): for k in self.qoi_cols: - values = data_frame.loc[data_frame['run_id'] == run_id][k].values + if 'output_index' in kwargs: + values = data_frame.loc[data_frame['run_id'] == run_id][k].values[kwargs['output_index']] + else: + values = data_frame.loc[data_frame['run_id'] == run_id][k].values samples[k].append(values) elif type(data_frame) == dict: #dict is not sorted, make sure the runs are processed in ascending order for k in self.qoi_cols: run_id_int = [int(run_id.split('Run_')[-1]) for run_id in data_frame[k].keys()] for run_id in range(1, np.max(run_id_int) + 1): + if 'output_index' in kwargs: + samples[k].append(data_frame[k]['Run_' + str(run_id)][kwargs['output_index']]) + else: samples[k].append(data_frame[k]['Run_' + str(run_id)]) return samples - def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=100): + def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000): """ Computes the first order and total order Sobol indices using Saltelli's method. To assess the sampling inaccuracy, bootstrap confidence intervals @@ -160,7 +166,7 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=100): # and the size of the QoI n_params = self.sampler.n_params n_mc = self.sampler.n_mc_samples - # n_qoi = samples[0].size + n_qoi = samples[0].size sobols_first_dict = {} conf_first_dict = {} sobols_total_dict = {} @@ -178,15 +184,30 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=100): value_total = self._total_order(f_M2, f_M1, f_Ni[:, j]) # sobols computed from resampled data points - sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j]) - sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j]) + if n_mc * n_bootstrap * n_qoi <= 10**7: + #this is a vectorized computation, Is fast, but f_M2[r] will be of size + #(n_mc, n_bootstrap, n_qoi), this can become too large and cause a crash, + #especially when dealing with large QoI (n_qoi >> 1). So this is only done + #when n_mc * n_bootstrap * n_qoi <= 10**7 + print("Vectorized bootstrapping") + sobols_first = self._first_order(f_M2[r], f_M1[r], f_Ni[r, j]) + sobols_total = self._total_order(f_M2[r], f_M1[r], f_Ni[r, j]) + else: + #array for resampled estimates + sobols_first = np.zeros([n_bootstrap, n_qoi]) + sobols_total = np.zeros([n_bootstrap, n_qoi]) + print("Sequential bootstrapping") + #non-vectorized implementation + for i in range(n_bootstrap): + #resampled sample matrices of size (n_mc, n_qoi) + sobols_first[i] = self._first_order(f_M2[r[i]], f_M1[r[i]], f_Ni[r[i], j]) + sobols_total[i] = self._total_order(f_M2[r[i]], f_M1[r[i]], f_Ni[r[i], j]) # compute confidence intervals based on percentiles _, low_first, high_first = confidence_interval(sobols_first, value_first, alpha, pivotal=True) _, low_total, high_total = confidence_interval(sobols_total, value_total, alpha, pivotal=True) - print('e') # store results sobols_first_dict[param_name] = value_first conf_first_dict[param_name] = {'low': low_first, 'high': high_first} @@ -256,9 +277,7 @@ def _first_order(f_M2, f_M1, f_Ni): ------- A NumPy array of the n_params first-order Sobol indices. """ - print('a') V = np.var(np.r_[f_M2, f_M1], axis=0) - print('b') return np.mean(f_M1 * (f_Ni - f_M2), axis=0) / (V + (V == 0)) * (V != 0) # Adapted from SALib From 89aafa00ae43f7ed435d4478bc4e444cf192be03 Mon Sep 17 00:00:00 2001 From: "wouteredeling@gmail.com" Date: Fri, 30 Oct 2020 15:37:53 +0100 Subject: [PATCH 7/7] added merge_campaign subroutine --- easyvvuq/analysis/qmc_analysis.py | 44 ++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/easyvvuq/analysis/qmc_analysis.py b/easyvvuq/analysis/qmc_analysis.py index fc406e207..8db62f2e3 100644 --- a/easyvvuq/analysis/qmc_analysis.py +++ b/easyvvuq/analysis/qmc_analysis.py @@ -69,6 +69,16 @@ def analyse(self, data_frame, **kwargs): raise RuntimeError( "No data in data frame passed to analyse element") + # Extract output values for each quantity of interest from Dataframe + samples = self.get_samples(data_frame, **kwargs) + + return self.compute_results(samples) + + def compute_results(self, samples): + """ + + """ + qoi_cols = self.qoi_cols results = { @@ -79,9 +89,6 @@ def analyse(self, data_frame, **kwargs): 'conf_sobols_total': {k: {} for k in qoi_cols} } - # Extract output values for each quantity of interest from Dataframe - samples = self.get_samples(data_frame, **kwargs) - # Compute descriptive statistics for each quantity of interest for k in qoi_cols: results['statistical_moments'][k] = {'mean': np.mean(samples[k], axis=0), @@ -129,7 +136,35 @@ def get_samples(self, data_frame, **kwargs): else: samples[k].append(data_frame[k]['Run_' + str(run_id)]) return samples + + def merge_campaigns(self, data_frames): + """ + Merge the dataframes of multiple (Q)MC campaigns, and then compute the results. + Only use this if the same vary dict was used in the campaigns. + + Parameters + ---------- + data_frames : list of EasyVVUQ data frames + + Returns + ------- + results : the results dictionary computed from the merged sample sets + """ + + assert type(data_frames) is list + + samples = {k: [] for k in self.qoi_cols} + #loop over all data frames + for data_frame in data_frames: + #convert each data frame into a dict + sample_dict = self.get_samples(data_frame) + #for each QoI in the dict, merge the lists of samples + for k in self.qoi_cols: + samples[k] += sample_dict[k] + #compute the results using the merged sample set + return self.compute_results(samples) + def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000): """ Computes the first order and total order Sobol indices using Saltelli's @@ -165,7 +200,8 @@ def sobol_bootstrap(self, samples, alpha=0.05, n_bootstrap=1000): # the number of parameter and the number of MC samples in n_mc * (n_params + 2) # and the size of the QoI n_params = self.sampler.n_params - n_mc = self.sampler.n_mc_samples + # n_mc = self.sampler.n_mc_samples + n_mc = int(samples.shape[0]/(n_params + 2)) n_qoi = samples[0].size sobols_first_dict = {} conf_first_dict = {}