add support for bernoulli distribution

pymc-devs · May 19, 2020 · ccb29d8 · ccb29d8
1 parent 70d7e80
commit ccb29d8
Show file tree

Hide file tree

Showing 6 changed files with 204 additions and 9 deletions.
diff --git a/Untitled1.ipynb b/Untitled1.ipynb
@@ -0,0 +1,157 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pymc3 as pm\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "data = pickle.load(open(\"issue_247.pkl\", \"rb\"))\n",
+    "\n",
+    "pi0 = 0.051366009925488\n",
+    "mu = 0.783230896500752\n",
+    "sigma = 0.816999481742865\n",
+    "lower = -2.94\n",
+    "upper = 0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "def sigmoid(x):\n",
+    "    return 1 / (1 + math.exp(-x))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Elemwise{mul,no_inplace}.0\n"
+     ]
+    },
+    {
+     "ename": "ValueError",
+     "evalue": "TypeError: object of type 'TensorVariable' has no len()\n",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m----------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0mTraceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-29-8b1218cbb925>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      6\u001b[0m         \u001b[0malpha\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDeterministic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"alpha\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlower\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0malpha_offset\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m2\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mupper\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mlower\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxi\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mbeta\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m         \u001b[0mp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"X\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m@\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconstant\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxi\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mbeta\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      9\u001b[0m         \u001b[0my_obs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mBernoulli\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'y_obs'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msigmoid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobserved\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     10\u001b[0m         \u001b[0mpm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msample\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/miniconda3/envs/py3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py\u001b[0m in \u001b[0;36mconstant\u001b[0;34m(value, dtype, shape, name)\u001b[0m\n\u001b[1;32m    260\u001b[0m   \"\"\"\n\u001b[1;32m    261\u001b[0m   return _constant_impl(value, dtype, shape, name, verify_shape=False,\n\u001b[0;32m--> 262\u001b[0;31m                         allow_broadcast=True)\n\u001b[0m\u001b[1;32m    263\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    264\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/miniconda3/envs/py3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py\u001b[0m in \u001b[0;36m_constant_impl\u001b[0;34m(value, dtype, shape, name, verify_shape, allow_broadcast)\u001b[0m\n\u001b[1;32m    268\u001b[0m   \u001b[0mctx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcontext\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    269\u001b[0m   \u001b[0;32mif\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecuting_eagerly\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 270\u001b[0;31m     \u001b[0mt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconvert_to_eager_tensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    271\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mshape\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    272\u001b[0m       \u001b[0;32mreturn\u001b[0m \u001b[0mt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/miniconda3/envs/py3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py\u001b[0m in \u001b[0;36mconvert_to_eager_tensor\u001b[0;34m(value, ctx, dtype)\u001b[0m\n\u001b[1;32m     94\u001b[0m       \u001b[0mdtype\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdtypes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_datatype_enum\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     95\u001b[0m   \u001b[0mctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mensure_initialized\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 96\u001b[0;31m   \u001b[0;32mreturn\u001b[0m \u001b[0mops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mEagerTensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdevice_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     97\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     98\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mValueError\u001b[0m: TypeError: object of type 'TensorVariable' has no len()\n"
+     ]
+    }
+   ],
+   "source": [
+    "with pm.Model() as model:\n",
+    "        xi = pm.Bernoulli('xi', pi0, shape=data['X'].shape[1])\n",
+    "        beta_offset = pm.Normal('beta_offset', 0, 1, shape=data['X'].shape[1])\n",
+    "        beta = pm.Deterministic(\"beta\", mu + beta_offset * sigma)\n",
+    "        alpha_offset = pm.Uniform(\"alpha_offset\", -1, 1)\n",
+    "        alpha = pm.Deterministic(\"alpha\", lower + (alpha_offset + 1) / 2 * (upper - lower))\n",
+    "        p = data[\"X\"] @ (xi * beta)\n",
+    "        y_obs = pm.Bernoulli('y_obs', sigmoid(p), observed = y)\n",
+    "        pm.sample()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "@pm.model\n",
+    "def get_model(y, X, pi0=0.5, mu=0, sigma=1, lower=-1, upper=1):\n",
+    "    xi = yield pm.Bernoulli('xi', pi0 * tf.ones(X.shape[1], 'float32'))\n",
+    "    beta_offset = yield pm.Normal('beta_offset', tf.zeros(X.shape[1], 'float32'), tf.ones(X.shape[1], 'float32'))\n",
+    "    beta = yield pm.Deterministic(\"beta\", mu + beta_offset * sigma)\n",
+    "    alpha_offset = yield pm.Uniform(\"alpha_offset\", -1, 1)\n",
+    "    alpha = yield pm.Deterministic(\"alpha\", lower + (alpha_offset + 1) / 2 * (upper - lower))\n",
+    "    \n",
+    "    p = tf.linalg.matvec(X, tf.cast(xi, tf.float32) * beta)\n",
+    "    yield pm.Bernoulli('y_obs', tf.math.sigmoid(p), observed = y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "AttributeError",
+     "evalue": "module 'numpy' has no attribute 'sigmoid'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m----------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0mTraceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-13-144d8cff0ae1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msigmoid\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m: module 'numpy' has no attribute 'sigmoid'"
+     ]
+    }
+   ],
+   "source": [
+    "np.sigmoid"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/issue_247.pkl b/issue_247.pkl
diff --git a/issue_247.tar b/issue_247.tar
diff --git a/pymc4/distributions/discrete.py b/pymc4/distributions/discrete.py
@@ -5,7 +5,7 @@
     PositiveDiscreteDistribution,
     BoundedDiscreteDistribution,
 )
-from pymc4.distributions.state_functions import categorical_uniform_fn
+from pymc4.distributions.state_functions import categorical_uniform_fn, bernoulli_uniform_fn
 
 __all__ = [
     "Bernoulli",
@@ -58,13 +58,16 @@ class Bernoulli(BoundedDiscreteDistribution):
         Probability of success (0 < probs < 1).
     """
 
+    _grad_support = False
+
     def __init__(self, name, probs, **kwargs):
         super().__init__(name, probs=probs, **kwargs)
+        self._default_new_state_part = bernoulli_uniform_fn
 
     @staticmethod
     def _init_distribution(conditions):
         probs = conditions["probs"]
-        return tfd.Bernoulli(probs=probs)
+        return tfd.Bernoulli(probs=probs, dtype=tf.float32)
 
     def lower_limit(self):
         return 0

diff --git a/pymc4/distributions/state_functions.py b/pymc4/distributions/state_functions.py
@@ -32,13 +32,48 @@ def _fn(state_parts, seed):
             if len(state_parts) != len(scales):
                 raise ValueError("`scale` must broadcast with `state_parts`")
             probs = tf.ones_like(state_parts)
+
+            deltas = [
+                tfd.Categorical(
+                    probs=probs / tf.math.reduce_sum(probs, -1), dtype=tf.float32
+                ).sample(seed=seed, sample_shape=5)
+                for scale_part, state_part in zip(scales, state_parts)
+            ]
+            return deltas
+
+    return _fn
+
+
+def bernoulli_uniform_fn(scale=1.0, name=None):
+    """Returns a callable that samples new proposal from Bernoulli distribution with uniform probabilites
+    Args:
+       scale: a `Tensor` or Python `list` of `Tensor`s of any shapes and `dtypes`
+         controlling the upper and lower bound of the uniform proposal
+         distribution.
+       name: Python `str` name prefixed to Ops created by this function.
+           Default value: 'random_walk_uniform_fn'.
+
+    Returns:
+        bernoulli_uniform_fn: A callable accepting a Python `list` of `Tensor`s
+         representing the state parts of the `current_state` and an `int`
+         representing the random seed used to generate the proposal. The callable
+         returns the same-type `list` of `Tensor`s as the input and represents the
+         proposal for the RWM algorithm.
+    """
+
+    def _fn(state_parts, seed):
+        with tf.name_scope(name or "bernoulli_uniform_fn"):
+            scales = scale if mcmc_util.is_list_like(scale) else [scale]
+            if len(scales) == 1:
+                scales *= len(state_parts)
+            if len(state_parts) != len(scales):
+                raise ValueError("`scale` must broadcast with `state_parts`")
+            probs = tf.ones_like(state_parts)
+
             deltas = [
-                tf.squeeze(
-                    tfd.Categorical(
-                        probs=probs / tf.math.reduce_sum(probs, -1), dtype=tf.float32
-                    ).sample(seed=seed, sample_shape=(tf.shape(state_part))),
-                    -1,
-                )
+                tfd.Bernoulli(
+                    0.5, dtype=tf.float32
+                ).sample(seed=seed, sample_shape=(tf.shape(state_part)))
                 for scale_part, state_part in zip(scales, state_parts)
             ]
             return deltas

diff --git a/pymc4/mcmc/samplers.py b/pymc4/mcmc/samplers.py
@@ -297,7 +297,7 @@ def __init__(self, *args, **kwargs):
         self._stat_names = ["target_log_prob"]
 
     def _trace_fn(self, current_state, pkr):
-        return pkr.target_log_prob
+        return (pkr.target_log_prob, ) + tuple(self.deterministics_callback(*current_state))
 
     @staticmethod
     def _convert_sampler_methods(sampler_methods):