Merge pull request #150 from stefanradev93/Development

Development
bayesflow-org · Mar 14, 2024 · b38f23e · b38f23e
2 parents 01e6588 + 3d90e91
commit b38f23e
Show file tree

Hide file tree

Showing 6 changed files with 680 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -105,10 +105,10 @@ the model-amortizer combination on unseen simulations:
 # Generate 500 new simulated data sets
 new_sims = trainer.configurator(generative_model(500))
 
-# Obtain 100 posteriors draws per data set instantly
+# Obtain 100 posterior draws per data set instantly
 posterior_draws = amortized_posterior.sample(new_sims, n_samples=100)
 
-# Diagnoze calibration
+# Diagnose calibration
 fig = bf.diagnostics.plot_sbc_histograms(posterior_draws, new_sims['parameters'])
 ```
 
@@ -302,7 +302,7 @@ This project is currently managed by researchers from Rensselaer Polytechnic Ins
 You can cite BayesFlow along the lines of:
 
 - We approximated the posterior with neural posterior estimation and learned summary statistics (NPE; Radev et al., 2020), as implemented in the BayesFlow software for amortized Bayesian workflows (Radev et al., 2023a).
-- We approximated the likelihood with neural likelihood estimation (NLE; Papamakarios et al., 2019) without hand-cafted summary statistics, as implemented in the BayesFlow software for amortized Bayesian workflows (Radev et al., 2023b).
+- We approximated the likelihood with neural likelihood estimation (NLE; Papamakarios et al., 2019) without hand-crafted summary statistics, as implemented in the BayesFlow software for amortized Bayesian workflows (Radev et al., 2023b).
 - We performed simultaneous posterior and likelihood estimation with jointly amortized neural approximation (JANA; Radev et al., 2023a), as implemented in the BayesFlow software for amortized Bayesian workflows (Radev et al., 2023b).
 
 1. Radev, S. T., Schmitt, M., Schumacher, L., Elsemüller, L., Pratz, V., Schälte, Y., Köthe, U., & Bürkner, P.-C. (2023a). BayesFlow: Amortized Bayesian workflows with neural networks. *The Journal of Open Source Software, 8(89)*, 5702.([arXiv](https://arxiv.org/abs/2306.16015))([JOSS](https://joss.theoj.org/papers/10.21105/joss.05702))

diff --git a/bayesflow/amortizers.py b/bayesflow/amortizers.py
@@ -31,7 +31,7 @@
 from bayesflow.default_settings import DEFAULT_KEYS
 from bayesflow.exceptions import ConfigurationError, SummaryStatsError
 from bayesflow.helper_functions import check_tensor_sanity
-from bayesflow.losses import log_loss, mmd_summary_space
+from bayesflow.losses import log_loss, mmd_summary_space, norm_diff
 from bayesflow.networks import EvidentialNetwork
 
 
@@ -412,7 +412,7 @@ def _compute_summary_condition(self, summary_conditions, direct_conditions, **kw
         elif direct_conditions is not None:
             full_cond = direct_conditions
         else:
-            raise SummaryStatsError("Could not concatenarte or determine conditioning inputs...")
+            raise SummaryStatsError("Could not concatenate or determine conditioning inputs...")
         return sum_condition, full_cond
 
     def _determine_latent_dist(self, latent_dist):
@@ -1337,7 +1337,7 @@ def compute_loss(self, input_dict, **kwargs):
         """
 
         net_out = self(input_dict, **kwargs)
-        loss = tf.reduce_mean(self.loss_fn(net_out - input_dict[DEFAULT_KEYS["parameters"]]))
+        loss = tf.reduce_mean(self.loss_fn(net_out, input_dict[DEFAULT_KEYS["parameters"]]))
         return loss
 
     def _compute_summary_condition(self, summary_conditions, direct_conditions, **kwargs):
@@ -1366,4 +1366,4 @@ def _determine_loss(self, loss_fun, norm_ord):
         # In case of user-provided loss, override norm order
         if loss_fun is not None:
             return loss_fun
-        return partial(tf.norm, ord=norm_ord, axis=-1)
+        return partial(norm_diff, ord=norm_ord, axis=-1)
diff --git a/bayesflow/helper_networks.py b/bayesflow/helper_networks.py
@@ -332,7 +332,7 @@ def __init__(self, latent_dim, act_norm_init, **kwargs):
 
         super().__init__(**kwargs)
 
-        # Initialize scale and bias with zeros and ones if no batch for initalization was provided.
+        # Initialize scale and bias with zeros and ones if no batch for initialization was provided.
         if act_norm_init is None:
             self.scale = tf.Variable(tf.ones((latent_dim,)), trainable=True, name="act_norm_scale")
 
@@ -594,7 +594,15 @@ class ConfigurableMLP(tf.keras.Model):
     """Implements a simple configurable MLP with optional residual connections and dropout."""
 
     def __init__(
-        self, input_dim, hidden_dim=512, num_hidden=2, activation="relu", residual=True, dropout_rate=0.05, **kwargs
+        self,
+        input_dim,
+        hidden_dim=512,
+        output_dim=None,
+        num_hidden=2,
+        activation="relu",
+        residual=True,
+        dropout_rate=0.05,
+        **kwargs,
     ):
         """
         Creates an instance of a flexible and simple MLP with optional residual connections and dropout.
@@ -605,6 +613,8 @@ def __init__(
             The input dimensionality
         hidden_dim     : int, optional, default: 512
             The dimensionality of the hidden layers
+        output_dim     : int, optional, default: None
+            The output dimensionality. If None is passed, `output_dim` is set to `input_dim`
         num_hidden     : int, optional, default: 2
             The number of hidden layers (minimum: 1)
         activation     : string, optional, default: 'relu'
@@ -618,6 +628,7 @@ def __init__(
         super().__init__(**kwargs)
 
         self.input_dim = input_dim
+        self.output_dim = input_dim if output_dim is None else output_dim
         self.model = tf.keras.Sequential(
             [tf.keras.layers.Dense(hidden_dim, activation=activation), tf.keras.layers.Dropout(dropout_rate)]
         )
@@ -630,7 +641,7 @@ def __init__(
                     dropout_rate=dropout_rate,
                 )
             )
-        self.model.add(tf.keras.layers.Dense(input_dim))
+        self.model.add(tf.keras.layers.Dense(self.output_dim))
 
     def call(self, inputs, **kwargs):
         return self.model(inputs, **kwargs)

diff --git a/bayesflow/losses.py b/bayesflow/losses.py
@@ -184,3 +184,20 @@ def log_loss(model_indices, preds, evidential=False, label_smoothing=0.01):
     # Actual loss + regularization (if given)
     loss = -tf.reduce_mean(tf.reduce_sum(model_indices * tf.math.log(preds), axis=1))
     return loss
+
+
+def norm_diff(tensor_a, tensor_b, axis=None, ord='euclidean'):
+    """
+    Wrapper around tf.norm that computes the norm of the difference between two tensors along the specified axis.
+
+    Parameters
+    ----------
+    tensor_a : A Tensor.
+    tensor_b : A Tensor. Must be the same shape as tensor_a.
+    axis     : Any or None
+        Axis along which to compute the norm of the difference. Default is None.
+    ord      : int or str
+        Order of the norm. Supports 'euclidean' and other norms supported by tf.norm. Default is 'euclidean'.
+    """
+    difference = tensor_a - tensor_b
+    return tf.norm(difference, ord=ord, axis=axis)
diff --git a/examples/Amortized_Point_Estimation.ipynb b/examples/Amortized_Point_Estimation.ipynb
diff --git a/setup.cfg b/setup.cfg
@@ -42,9 +42,9 @@ install_requires =
     seaborn >= 0.11
     tqdm >= 4.65
     matplotlib >= 3.5
-    tensorflow-macos >= 2.10; sys_platform == 'darwin' and platform_machine == 'arm64'
-    tensorflow >= 2.10.1; sys_platform != 'darwin' or platform_machine != 'arm64'
-    tensorflow_probability >= 0.17
+    tensorflow-macos >= 2.10, < 2.16; sys_platform == 'darwin' and platform_machine == 'arm64'
+    tensorflow >= 2.10.1, < 2.16; sys_platform != 'darwin' or platform_machine != 'arm64'
+    tensorflow_probability >= 0.17, < 0.24
 
 [options.extras_require]
 testing =