Skip to content

Commit

Permalink
Michaelgemmell/bayes workflow (#262)
Browse files Browse the repository at this point in the history
* initial commit

* magnetic recon tests

* use error attribute if it is given

* Added poisson noise and adjusted calibration factor

* changed name magnetic_recon -> equilibrium_reconstruction

* diagnostic data with channel dim are treated as individual measurements

* added function of example LOS

* refactoring

* refactoring

* refactoring

* initial commit

* removed obsolete attributes

* Setting up testflow

* more tests

* more test stubs

* small fix

* cosmetic changes

* changed marker style

* small fix

* adjusting alpha

* black reformatting

* fixing formatting

* flake8 fix

* flake8 fix

* boolean indexing -> xarray.where in call

* fixing tests

* removing import

* precommit fixes

* stashing for quick result

* added calc_impurity for generating profiles from concentrations to update profiles method

* added conditional handling of diagnostics in data pipeline

* moved calc_impurity to plasma method

* tweaking calibration factor

* tweaking calibration factor

* tweaking calibration factor

* changed burn_in to burn_in_fraction

* added violinplots

* removed comment

* included dictionary logic for fitting impurities scaled by concentrations and electron density

* changed name cxff_pi.ti_0d to cxff_pi.ti0

* new option for initialising at maximum-likelihood estimate

* added xlim for violin plot

* tweaking calibration

* fixed neutral density units

* added set plot functions

* added center mass sampling and Ti/Te y0_ref options

* plot formatting changed

* black formatting

* deleted example bayes_opt

* moved sample with autocorr function to bayes_dev_workflow

* renamed bayesworkflow

* stashing

* multiplot formatting

* generalised writing kinetic profiles to blobs

* function autocorr plot added

* ion density added to kinetic profiles saved

* inital commit of abstract workflow and example of usage

* doc strings

* renaming workflow

* Ti_ref as default

* abstract methods added

* Changed to new MDSPlus structure

* Adding default methods

* moved sampling to it's own function in abstract class

* moved sampling to it's own function in abstract class

* renamed abstract class to AbstractBayesWorkflow

* comments

* added high density sampling as method

* added attributes to __init__

* error handling __init__

* error handling __init__

* moved read data to abstract class

* initial commit

* kwargs added

* xrcs wavelength units corrected

* background to int

* stash

* renamed impurities

* stashing

* stashing

* stashing

* fixing key names

* updating and testing example

* added background as attribute of helike model

* fixed phantom methods

* rearranged methods

* moved background to call kwarg

* params and kwargs now are given as model_{var} and model prefix is removed before model call

* adding _phantom_data and _exp_data abstract methods

* renaming to BayesWorkflowExample

* initial commit

* renamed phantom_params to profile_params

* renamed bayesopt -> bayesmodel

* renamed bayesopt -> bayesmodel

* moved start point sampling to its own method _sample_start_points

* fixed variable name

* fixed bug with start_points being overwritten

* removed workflow_dev

* moving equilibrium to workflow object

* when reading raw data save transforms to their own attribute

* transforms saved to workflow class

* fixed name

* renamed example transform

* made example los function

* minor name fix

* made read_test_data function

* made example los function

* minor type

* added reader to read_test_data method

* fixed copying workflow object states

* updated example to work with assign_profiles

* can now run with pulse = None and synthetic transforms/equilibrium

* formatting

* refactoring names

* deleted old bayes_models tests

* reformatted _build_bckc for readability

* moved percentage error to class attribute

* fixing init percent error

* black formatting

* renamed kin_prof -> plasma_profiles

* refactored window handling

* replaced doppler_broaden with physics.ev_doppler

* removed methods from __init__

* stashing

* setup_plasma now takes kwargs

* adding plasma to models now happens in setup_opt_data

* workflow broken up into methods

* removed redundant kwargs

* fixed non_plasma call option

* fixed plasma_initialisation

* fixing violin plotting

* units for xrcs.spectra.wavelength fixed

* nchannels added to example_los

* adding print message for fake data reading

* adjusting example los

* bckc method not printing spectra/fit not available everytime

* adjusting priors

* moved kwargs to sample function

* nsamples kwarg added to sample_from_high_density_region

* mocked bda_tree module

* fixed import

* fixed import

* black formatting

* precommit
  • Loading branch information
michael-gemmell authored Aug 17, 2023
1 parent a8651c0 commit 3317b28
Show file tree
Hide file tree
Showing 25 changed files with 2,141 additions and 772 deletions.
148 changes: 108 additions & 40 deletions indica/bayesmodels.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,25 @@
from copy import deepcopy
import warnings

import flatdict
import numpy as np
from scipy.stats import uniform

np.seterr(all="ignore")

warnings.simplefilter("ignore", category=FutureWarning)


PROFILES = [
"electron_temperature",
"electron_density",
"ion_temperature",
"ion_density",
"impurity_density",
"fast_density",
"neutral_density",
"zeff",
]


def gaussian(x, mean, sigma):
return 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-1 / 2 * ((x - mean) / sigma) ** 2)

Expand Down Expand Up @@ -44,46 +54,85 @@ def __init__(
quant_to_optimise: list = [],
priors: dict = {},
diagnostic_models: list = [],
percent_error: float = 0.10,
):
self.plasma = plasma
self.data = data
self.quant_to_optimise = quant_to_optimise
self.diagnostic_models = diagnostic_models
self.priors = priors
self.percent_error = percent_error

for diag_model in self.diagnostic_models:
diag_model.plasma = self.plasma

missing_data = list(set(quant_to_optimise).difference(data.keys()))
if missing_data: # list of keys in quant_to_optimise but not data
if missing_data: # gives list of keys in quant_to_optimise but not data
raise ValueError(f"{missing_data} not found in data given")

def _build_bckc(self, params: dict, **kwargs):
# TODO: consider how to handle if models have overlapping kwargs
# Params is a dictionary which is updated by optimiser,
# kwargs is constant i.e. settings for models
def _build_bckc(self, params, **kwargs):
"""
Parameters
----------
params - dictionary which is updated by optimiser
kwargs - passed to model i.e. settings
Returns
-------
bckc of results
"""
self.bckc: dict = {}
for model in self.diagnostic_models:
self.bckc = dict(
self.bckc, **{model.name: {**model(**{**params, **kwargs})}}
)
self.bckc = flatdict.FlatDict(self.bckc, delimiter=".")
# removes "model.name." from params and kwargs then passes them to model
# e.g. xrcs.background -> background
_nuisance_params = {
param_name.replace(model.name + ".", ""): param_value
for param_name, param_value in params.items()
if model.name in param_name
}
_model_settings = {
kwarg_name.replace(model.name + ".", ""): kwarg_value
for kwarg_name, kwarg_value in kwargs.items()
if model.name in kwarg_name
}

_model_kwargs = {
**_nuisance_params,
**_model_settings,
} # combine dictionaries
_bckc = model(**_model_kwargs)
_model_bckc = {
f"{model.name}.{value_name}": value
for value_name, value in _bckc.items()
} # prepend model name to bckc
self.bckc = dict(self.bckc, **_model_bckc)
return

def _ln_likelihood(self):
ln_likelihood = 0
for key in self.quant_to_optimise:
# TODO: What to use as error? Assume percentage error if none given...
# Float128 is used since rounding of small numbers causes
# problems when initial results are bad fits
model_data = self.bckc[key].values.astype("float64")
# Float128 since rounding of small numbers causes problems
# when initial results are bad fits
model_data = self.bckc[key].astype("float128")
exp_data = (
self.data[key]
.sel(t=self.plasma.time_to_calculate)
.values.astype("float64")
self.data[key].sel(t=self.plasma.time_to_calculate).astype("float128")
)
_ln_likelihood = np.log(gaussian(model_data, exp_data, exp_data * 0.10))
ln_likelihood += np.nanmean(_ln_likelihood)
exp_error = (
exp_data * self.percent_error
) # Assume percentage error if none given.
if hasattr(self.data[key], "error"):
if (
self.data[key].error != 0
).any(): # TODO: Some models have an error of 0 given
exp_error = self.data[key].error.sel(
t=self.plasma.time_to_calculate
)

_ln_likelihood = np.log(gaussian(model_data, exp_data, exp_error))
# treat channel as key dim which isn't averaged like other dims
if "channel" in _ln_likelihood.dims:
_ln_likelihood = _ln_likelihood.sum(dim="channel", skipna=True)
ln_likelihood += _ln_likelihood.mean(skipna=True).values
return ln_likelihood

def _ln_prior(self, parameters: dict):
Expand Down Expand Up @@ -144,6 +193,34 @@ def sample_from_priors(self, param_names, size=10):
samples = samples[:, 0:size]
return samples.transpose()

def sample_from_high_density_region(
self, param_names: list, sampler, nwalkers: int, nsamples=100
):
start_points = self.sample_from_priors(param_names, size=nsamples)

ln_prob, _ = sampler.compute_log_prob(start_points)
num_best_points = int(nsamples * 0.05)
index_best_start = np.argsort(ln_prob)[-num_best_points:]
best_start_points = start_points[index_best_start, :]
best_points_std = np.std(best_start_points, axis=0)

# Passing samples through ln_prior and redrawing if they fail
samples = np.empty((param_names.__len__(), 0))
while samples.size < param_names.__len__() * nwalkers:
sample = np.random.normal(
np.mean(best_start_points, axis=0),
best_points_std * 2,
size=(nwalkers * 5, len(param_names)),
)
start = {name: sample[:, idx] for idx, name in enumerate(param_names)}
ln_prior = self._ln_prior(start)
# Convert from dictionary of arrays -> array,
# then filtering out where ln_prior is -infinity
accepted_samples = np.array(list(start.values()))[:, ln_prior != -np.inf]
samples = np.append(samples, accepted_samples, axis=1)
start_points = samples[:, 0:nwalkers].transpose()
return start_points

def ln_posterior(self, parameters: dict, **kwargs):
"""
Posterior probability given to optimisers
Expand All @@ -164,31 +241,22 @@ def ln_posterior(self, parameters: dict, **kwargs):
"""

ln_prior = self._ln_prior(parameters)
if ln_prior == -np.inf: # Don't call model if outside priors
if ln_prior == -np.inf: # Don't call models if outside priors
return -np.inf, {}

self.plasma.update_profiles(parameters)
self._build_bckc(parameters, **kwargs) # model calls
ln_likelihood = self._ln_likelihood() # compare results to data
ln_posterior = ln_likelihood + ln_prior

kin_profs = {
"electron_density": self.plasma.electron_density.sel(
t=self.plasma.time_to_calculate
),
"electron_temperature": self.plasma.electron_temperature.sel(
t=self.plasma.time_to_calculate
),
"ion_temperature": self.plasma.ion_temperature.sel(
t=self.plasma.time_to_calculate
),
"impurity_density": self.plasma.impurity_density.sel(
t=self.plasma.time_to_calculate
),
"zeff": self.plasma.zeff.sum("element").sel(
t=self.plasma.time_to_calculate
),
# TODO: add Nh
}
blob = deepcopy({**self.bckc, **kin_profs})
plasma_profiles = {}
for profile_key in PROFILES:
if hasattr(self.plasma, profile_key):
plasma_profiles[profile_key] = getattr(self.plasma, profile_key).sel(
t=self.plasma.time_to_calculate
)
else:
raise ValueError(f"plasma does not have attribute {profile_key}")

blob = deepcopy({**self.bckc, **plasma_profiles})
return ln_posterior, blob
36 changes: 22 additions & 14 deletions indica/models/charge_exchange.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def __init__(
element: str = "c",
instrument_method="get_charge_exchange",
):

self.name = name
self.element = element
self.instrument_method = instrument_method
Expand All @@ -43,6 +42,12 @@ def _build_bckc_dictionary(self):
self.bckc[quantity] = self.Ti_at_channels
long_name = "Ion temperature"
units = "eV"
elif quant == "spectra":
# Placeholder
continue
elif quant == "fit":
# Placeholder
continue
else:
print(f"{quant} not available in model for {self.instrument_method}")
continue
Expand Down Expand Up @@ -115,31 +120,34 @@ def __call__(
return self.bckc


def pi_transform_example(nchannels: int):
x_positions = np.linspace(0.2, 0.8, nchannels)
y_positions = np.linspace(0.0, 0.0, nchannels)
z_positions = np.linspace(0.0, 0.0, nchannels)

transect_transform = TransectCoordinates(
x_positions,
y_positions,
z_positions,
"pi",
machine_dimensions=((0.15, 0.95), (-0.7, 0.7)),
)
return transect_transform


def example_run(
pulse: int = None,
diagnostic_name: str = "cxrs",
plasma=None,
plot=False,
):

# TODO: LOS sometimes crossing bad EFIT reconstruction

if plasma is None:
plasma = example_plasma(pulse=pulse)

# Create new interferometers diagnostics
nchannels = 5
x_positions = np.linspace(0.2, 0.8, nchannels)
y_positions = np.linspace(0.0, 0.0, nchannels)
z_positions = np.linspace(0.0, 0.0, nchannels)

transect_transform = TransectCoordinates(
x_positions,
y_positions,
z_positions,
diagnostic_name,
machine_dimensions=plasma.machine_dimensions,
)
transect_transform = pi_transform_example(5)
transect_transform.set_equilibrium(plasma.equilibrium)
model = ChargeExchange(
diagnostic_name,
Expand Down
Loading

0 comments on commit 3317b28

Please sign in to comment.