From af24c5118e8bfa1c119608e1b1784b54eb0a7050 Mon Sep 17 00:00:00 2001
From: "Douglas G. Moore" <doug@dglmoore.com>
Date: Mon, 12 Aug 2019 00:22:28 -0700
Subject: [PATCH] Enable doc testing

---
 docs/source/conf.py            |  4 +-
 docs/source/dist.rst           | 68 +++++++++++++++++++++++++---------
 docs/source/shannon.rst        | 65 +++++++++++++++++++++++---------
 docs/source/timeseries.rst     | 29 +++++++++++++++
 docs/source/utils.rst          | 24 +++++++-----
 pyinform/activeinfo.py         | 26 ++++++++-----
 pyinform/blockentropy.py       | 33 ++++++++++-------
 pyinform/conditionalentropy.py | 18 ++++-----
 pyinform/dist.py               | 40 ++++++++++----------
 pyinform/entropyrate.py        | 22 ++++++-----
 pyinform/mutualinfo.py         |  4 +-
 pyinform/relativeentropy.py    |  4 +-
 pyinform/shannon.py            | 46 +++++++++++------------
 pyinform/transferentropy.py    | 64 +++++++++++++++++---------------
 pyinform/utils/binning.py      | 47 ++++++++++++++---------
 pyinform/utils/coalesce.py     |  8 +++-
 pyinform/utils/encoding.py     | 52 ++++++++++++++++----------
 17 files changed, 352 insertions(+), 202 deletions(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 805d2e1..1783735 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -13,8 +13,8 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
-import sys
 import os
+import sys
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
@@ -31,9 +31,9 @@
 # ones.
 extensions = [
     'sphinx.ext.autodoc',
-    'sphinx.ext.todo',
     'sphinx.ext.mathjax',
     'sphinx.ext.viewcode',
+    'sphinx.ext.doctest',
 ]
 
 # Add any paths that contain templates here, relative to this directory.
diff --git a/docs/source/dist.rst b/docs/source/dist.rst
index 5a974c0..88344f4 100644
--- a/docs/source/dist.rst
+++ b/docs/source/dist.rst
@@ -1,5 +1,10 @@
 .. _dist:
 
+.. testsetup:: Dist
+
+    from pyinform import Dist
+
+
 Empirical Distributions
 =======================
 
@@ -15,7 +20,9 @@ Example 1: Construction
 ^^^^^^^^^^^^^^^^^^^^^^^
 You can construct a distribution with a specified number of unique observables.
 This construction method results in an *invalid* distribution as no
-observations have been made thus far. ::
+observations have been made thus far.
+
+.. doctest:: Dist
 
     >>> d = Dist(5)
     >>> d.valid()
@@ -26,7 +33,9 @@ observations have been made thus far. ::
     5
 
 Alternatively you can construct a distribution given a list (or NumPy array)
-of observation counts: ::
+of observation counts:
+
+.. doctest:: Dist
 
     >>> d = Dist([0,0,1,2,1,0,0])
     >>> d.valid()
@@ -41,16 +50,20 @@ Example 2: Making Observations
 
 Once a distribution has been constructed, we can begin making observations.
 There are two methods for doing so. The first uses the standard indexing
-operations, treating the distribution similarly to a list: ::
+operations, treating the distribution similarly to a list:
+
+.. doctest:: Dist
 
     >>> d = Dist(5)
     >>> for i in range(len(d)):
     ...     d[i] = i*i
     >>> list(d)
-    [0, 1, 4, 9, 25]
+    [0, 1, 4, 9, 16]
 
 The second method is to make *incremental* changes to the distribution. This
-is useful when making observations of timeseries: ::
+is useful when making observations of timeseries:
+
+.. doctest:: Dist
 
     >>> obs = [1,0,1,2,2,1,2,3,2,2]
     >>> d = Dist(max(obs) + 1)
@@ -61,7 +74,9 @@ is useful when making observations of timeseries: ::
     [1, 3, 5, 1]
 
 It is important to remember that :py:class:`~.dist.Dist` keeps track of your
-events as you provide them. For example: ::
+events as you provide them. For example:
+
+.. doctest:: Dist
 
     >>> obs = [1, 1, 3, 5, 1, 3, 7, 9]
     >>> d = Dist(max(obs) + 1)
@@ -77,12 +92,13 @@ events as you provide them. For example: ::
 
 If you know there are "gaps" in your time series, e.g. no even numbers, then you
 can use the utility function :py:func:`~.utils.coalesce.coalesce_series` to get
-rid of them: ::
+rid of them:
+
+.. doctest:: Dist
 
     >>> from pyinform import utils
     >>> obs = [1, 1, 3, 5, 1, 3, 7, 9]
     >>> coal, b = utils.coalesce_series(obs)
-    (array([0, 0, 1, 2, 0, 1, 3, 4], dtype=int32), 5)
     >>> d = Dist(b)
     >>> for event in coal:
     ...     assert(d[event] == d.tick(event) - 1)
@@ -92,7 +108,7 @@ rid of them: ::
     >>> d[1]
     2
     >>> d[3]
-    7
+    1
 
 This can significantly improve memory usage in situations where the range of
 possible states is large, but is sparsely sampled in the time series.
@@ -102,7 +118,9 @@ Example 3: Probabilities
 
 Once some observations have been made, we can start asking for probabilities.
 As in the previous examples, there are multiple ways of doing this. The first
-is to just ask for the probability of a given event. ::
+is to just ask for the probability of a given event.
+
+.. doctest:: Dist
 
     >>> d = Dist([3,0,1,2])
     >>> d.probability(0)
@@ -114,18 +132,22 @@ is to just ask for the probability of a given event. ::
     >>> d.probability(3)
     0.3333333333333333
 
-Sometimes it is nice to just dump the probabilities out to an array: ::
+Sometimes it is nice to just dump the probabilities out to an array:
+
+.. doctest:: Dist
 
     >>> d = Dist([3,0,1,2])
     >>> d.dump()
-    array([ 0.5       ,  0.        ,  0.16666667,  0.33333333])
+    array([0.5       , 0.        , 0.16666667, 0.33333333])
 
 Example 4: Shannon Entropy
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Once you have a distribution you can do lots of fun things with it. In
 this example, we will compute the shannon entropy of a timeseries of
-observed values. ::
+observed values.
+
+.. testcode:: Dist
 
     from math import log2
     from pyinform.dist import Dist
@@ -134,15 +156,21 @@ observed values. ::
     d = Dist(max(obs) + 1)
     for event in obs:
         d.tick(event)
-    
+
     h = 0.
     for p in d.dump():
         h -= p * log2(p)
 
-    print(h) # 1.68547529723
+    print(h)
+
+.. testoutput:: Dist
+
+    1.6854752972273344
 
 Of course **PyInform** provides a function for this:
-:py:func:`pyinform.shannon.entropy`. ::
+:py:func:`pyinform.shannon.entropy`.
+
+.. testcode:: Dist
 
     from pyinform.dist import Dist
     from pyinform.shannon import entropy
@@ -151,8 +179,12 @@ Of course **PyInform** provides a function for this:
     d = Dist(max(obs) + 1)
     for event in obs:
         d.tick(event)
-    
-    print(entropy(dist)) # 1.6854752972273344
+
+    print(entropy(d))
+
+.. testoutput:: Dist
+
+    1.6854752972273344
 
 
 API Documentation
diff --git a/docs/source/shannon.rst b/docs/source/shannon.rst
index 101bab5..7cdc70a 100644
--- a/docs/source/shannon.rst
+++ b/docs/source/shannon.rst
@@ -1,5 +1,10 @@
 .. _shannon:
 
+.. testsetup:: shannon
+
+    from pyinform import Dist
+    from pyinform import shannon
+
 Shannon Information Measures
 ============================
 
@@ -16,16 +21,26 @@ Example 1: Entropy and Random Numbers
 
 The :py:func:`pyinform.shannon.entropy` function allows us to calculate the
 Shannon entropy of a distributions. Let's try generating a random distribution
-and see what the entropy looks like? ::
+and see what the entropy looks like?
+
+.. testcode::
 
     import numpy as np
+    from pyinform.dist import Dist
+    from pyinform.shannon import entropy
 
+    np.random.seed(2019)
     xs = np.random.randint(0,10,10000)
     d = Dist(10)
     for x in xs:
         d.tick(x)
-    print(entropy(d))       # 3.32137023165359
-    print(entropy(d, b=10)) # 0.9998320664331565
+    print(entropy(d))
+    print(entropy(d, b=10))
+
+.. testoutput::
+
+    3.3216276921709724
+    0.9999095697715877
 
 This is exactly what you should expect; the pseudo-random number generate does
 a decent job producing integers in a uniform fashion.
@@ -34,14 +49,17 @@ Example 2: Mutual Information
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 How correlated are consecutive integers? Let's find out using
-:py:func:`mutual_info`. ::
+:py:func:`mutual_info`.
+
+.. testcode::
 
+    import numpy as np
     from pyinform.dist import Dist
     from pyinform.shannon import mutual_info
-    import numpy as np
 
-    obs = np.random.randint(0, 10, 10000)
-    
+    np.random.seed(2019)
+    obs = np.random.randint(0, 10, 100)
+
     p_xy = Dist(100)
     p_x  = Dist(10)
     p_y  = Dist(10)
@@ -52,8 +70,13 @@ How correlated are consecutive integers? Let's find out using
             p_y.tick(y)
             p_xy.tick(10*x + y)
 
-    print(mutual_info(p_xy, p_x, p_y))       # -1.7763568394002505e-15
-    print(mutual_info(p_xy, p_x, p_y, b=10)) # -6.661338147750939e-16
+    print(mutual_info(p_xy, p_x, p_y))
+    print(mutual_info(p_xy, p_x, p_y, b=10))
+
+.. testoutput::
+
+    1.3322676295501878e-15
+    4.440892098500626e-16
 
 Due to the subtlties of floating-point computation we don't get zero. Really,
 though the mutual information is zero.
@@ -62,26 +85,34 @@ Example 3: Relative Entropy and Biased Random Numbers
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Okay. Now let's generate some binary sequences. The first will be roughly
-uniform, but the second will be biased toward 0. ::
+uniform, but the second will be biased toward 0.
 
+.. testcode::
+
+    import numpy as np
     from pyinform.dist import Dist
     from pyinform.shannon import relative_entropy
-    import numpy as np
-    
+
     p = Dist(2)
     q = Dist(2)
 
-    ys = np.random.randint(0, 2, 10000)
+    np.random.seed(2019)
+    ys = np.random.randint(0, 2, 100)
     for y in ys:
         p.tick(y)
 
-    xs = np.random.randint(0, 6, 10000)
+    xs = np.random.randint(0, 6, 100)
     for i, _ in enumerate(xs):
-        xs[i] = (((xs[i] % 5) % 4) % 3) % 2 
+        xs[i] = (((xs[i] % 5) % 4) % 3) % 2
         q.tick(xs[i])
 
-    print(relative_entropy(q,p)) # 0.3338542254583825
-    print(relative_entropy(p,q)) # 0.40107198925821924
+    print(relative_entropy(q,p))
+    print(relative_entropy(p,q))
+
+.. testoutput::
+
+    0.3810306585586593
+    0.4924878808808457
 
 API Documentation
 -----------------
diff --git a/docs/source/timeseries.rst b/docs/source/timeseries.rst
index 8b2d901..9a50035 100644
--- a/docs/source/timeseries.rst
+++ b/docs/source/timeseries.rst
@@ -1,5 +1,34 @@
 .. _timeseries:
 
+.. testsetup:: active_info
+
+    from pyinform import active_info
+
+.. testsetup:: block_entropy
+
+    from pyinform import block_entropy
+
+.. testsetup:: conditional_entropy
+
+    from pyinform import conditional_entropy
+
+.. testsetup:: entropy_rate
+
+    from pyinform import entropy_rate
+
+.. testsetup:: mutual_info
+
+    from pyinform import mutual_info
+
+.. testsetup:: relative_entropy
+
+    from pyinform import relative_entropy
+
+.. testsetup:: transfer_entropy
+
+    from pyinform import transfer_entropy
+
+
 Time Series Measures
 ====================
 
diff --git a/docs/source/utils.rst b/docs/source/utils.rst
index 013f4df..1b916a0 100644
--- a/docs/source/utils.rst
+++ b/docs/source/utils.rst
@@ -1,29 +1,33 @@
 Utilities
 =========
 
+.. testsetup:: utils
+
+    from pyinform import utils
+
 .. automodule:: pyinform.utils
 
     State Binning
     -------------
-    
+
     .. automodule:: pyinform.utils.binning
-    
+
         .. autofunction:: series_range
-    
+
         .. autofunction:: bin_series
-    
+
     State Coalescing
     ----------------
-    
+
     .. automodule:: pyinform.utils.coalesce
-    
+
         .. autofunction:: coalesce_series
-    
+
     State Encoding
     --------------
-    
+
     .. automodule:: pyinform.utils.encoding
-    
+
         .. autofunction:: encode
-    
+
         .. autofunction:: decode
diff --git a/pyinform/activeinfo.py b/pyinform/activeinfo.py
index fffbbda..56c1b6e 100644
--- a/pyinform/activeinfo.py
+++ b/pyinform/activeinfo.py
@@ -36,10 +36,12 @@
 
 The typical usage is to provide the time series as a sequence (or
 ``numpy.ndarray``) and the history length as an integer and let the
-:py:func:`active_info` sort out the rest: ::
+:py:func:`active_info` sort out the rest:
+
+.. doctest:: active_info
 
     >>> active_info([0,0,1,1,1,1,0,0,0], k=2)
-    0.3059584928680419
+    0.3059584928680418
     >>> active_info([0,0,1,1,1,1,0,0,0], k=2, local=True)
     array([[-0.19264508,  0.80735492,  0.22239242,  0.22239242, -0.36257008,
              1.22239242,  0.22239242]])
@@ -47,10 +49,12 @@
 Multiple Initial Conditions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-What about multiple initial conditions? We've got that covered! ::
+What about multiple initial conditions? We've got that covered!
+
+.. doctest:: active_info
 
     >>> active_info([[0,0,1,1,1,1,0,0,0], [1,0,0,1,0,0,1,0,0]], k=2)
-    0.35987902873686073
+    0.35987902873686084
     >>> active_info([[0,0,1,1,1,1,0,0,0], [1,0,0,1,0,0,1,0,0]], k=2, local=True)
     array([[ 0.80735492, -0.36257008,  0.63742992,  0.63742992, -0.77760758,
              0.80735492, -1.19264508],
@@ -59,23 +63,27 @@
 
 As mentioned in :ref:`subtle-details`, averaging the AI for over the initial
 conditions does not give the same result as constructing the distributions using
-all of the initial conditions together. ::
+all of the initial conditions together.
+
+.. doctest:: active_info
 
     >>> import numpy as np
     >>> series = np.asarray([[0,0,1,1,1,1,0,0,0], [1,0,0,1,0,0,1,0,0]])
     >>> np.apply_along_axis(active_info, 1, series, 2).mean()
-    0.58453953071733644
+    0.5845395307173363
+
+Or if you are feeling verbose:
 
-Or if you are feeling verbose: ::
+.. doctest:: active_info
 
     >>> ai = np.empty(len(series))
     >>> for i, xs in enumerate(series):
     ...     ai[i] = active_info(xs, k=2)
     ...
     >>> ai
-    array([ 0.30595849,  0.86312057])
+    array([0.30595849, 0.86312057])
     >>> ai.mean()
-    0.58453953071733644
+    0.5845395307173363
 """
 
 import numpy as np
diff --git a/pyinform/blockentropy.py b/pyinform/blockentropy.py
index 7cc5fbe..8b1c1eb 100644
--- a/pyinform/blockentropy.py
+++ b/pyinform/blockentropy.py
@@ -22,40 +22,47 @@
 
 The typical usage is to provide the time series as a sequence (or
 ``numpy.ndarray``) and the block size as an integer and let the
-:py:func:`block_entropy` sort out the rest: ::
+:py:func:`block_entropy` sort out the rest:
+
+.. doctest:: block_entropy
 
     >>> block_entropy([0,0,1,1,1,1,0,0,0], k=1)
     0.9910760598382222
     >>> block_entropy([0,0,1,1,1,1,0,0,0], k=1, local=True)
-    array([[ 0.84799691,  0.84799691,  1.169925  ,  1.169925  ,  1.169925  ,
-            1.169925  ,  0.84799691,  0.84799691,  0.84799691]])
+    array([[0.84799691, 0.84799691, 1.169925  , 1.169925  , 1.169925  ,
+            1.169925  , 0.84799691, 0.84799691, 0.84799691]])
 
-::
+.. doctest:: block_entropy
 
     >>> block_entropy([0,0,1,1,1,1,0,0,0], k=2)
     1.811278124459133
     >>> block_entropy([0,0,1,1,1,1,0,0,0], k=2, local=True)
-    array([[ 1.4150375,  3.       ,  1.4150375,  1.4150375,  1.4150375,
-            3.       ,  1.4150375,  1.4150375]])
+    array([[1.4150375, 3.       , 1.4150375, 1.4150375, 1.4150375, 3.       ,
+            1.4150375, 1.4150375]])
 
 Multiple Initial Conditions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Do we support multiple initial conditions? Of course we do! ::
+Do we support multiple initial conditions? Of course we do!
+
+.. doctest:: block_entropy
 
     >>> series = [[0,0,1,1,1,1,0,0,0], [1,0,0,1,0,0,1,0,0]]
     >>> block_entropy(series, k=2)
     1.936278124459133
     >>> block_entropy(series, k=2, local=True)
-    array([[ 1.4150375,  2.4150375,  2.4150375,  2.4150375,  2.4150375,
-             2.       ,  1.4150375,  1.4150375],
-           [ 2.       ,  1.4150375,  2.4150375,  2.       ,  1.4150375,
-             2.4150375,  2.       ,  1.4150375]])
+    array([[1.4150375, 2.4150375, 2.4150375, 2.4150375, 2.4150375, 2.       ,
+            1.4150375, 1.4150375],
+           [2.       , 1.4150375, 2.4150375, 2.       , 1.4150375, 2.4150375,
+            2.       , 1.4150375]])
+
+Or you can compute the block entropy on each initial condition and average:
 
-Or you can compute the block entropy on each initial condition and average: ::
+.. doctest:: block_entropy
 
+    >>> import numpy as np
     >>> np.apply_along_axis(block_entropy, 1, series, 2).mean()
-    1.6862781244591329
+    1.686278124459133
 """
 import numpy as np
 
diff --git a/pyinform/conditionalentropy.py b/pyinform/conditionalentropy.py
index 308330c..25f3901 100644
--- a/pyinform/conditionalentropy.py
+++ b/pyinform/conditionalentropy.py
@@ -26,7 +26,7 @@
 Examples
 --------
 
-::
+.. doctest:: conditional_entropy
 
     >>> xs = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1]
     >>> ys = [0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1]
@@ -35,15 +35,15 @@
     >>> conditional_entropy(ys,xs)      # H(X|Y)
     0.5077571498797332
     >>> conditional_entropy(xs, ys, local=True)
-    array([ 3.        ,  3.        ,  0.19264508,  0.19264508,  0.19264508,
-            0.19264508,  0.19264508,  0.19264508,  0.19264508,  0.19264508,
-            0.19264508,  0.19264508,  0.19264508,  0.19264508,  0.19264508,
-            0.19264508,  0.4150375 ,  0.4150375 ,  0.4150375 ,  2.        ])
+    array([3.        , 3.        , 0.19264508, 0.19264508, 0.19264508,
+           0.19264508, 0.19264508, 0.19264508, 0.19264508, 0.19264508,
+           0.19264508, 0.19264508, 0.19264508, 0.19264508, 0.19264508,
+           0.19264508, 0.4150375 , 0.4150375 , 0.4150375 , 2.        ])
     >>> conditional_entropy(ys, xs, local=True)
-    array([ 1.32192809,  1.32192809,  0.09953567,  0.09953567,  0.09953567,
-            0.09953567,  0.09953567,  0.09953567,  0.09953567,  0.09953567,
-            0.09953567,  0.09953567,  0.09953567,  0.09953567,  0.09953567,
-            0.09953567,  0.73696559,  0.73696559,  0.73696559,  3.9068906 ])
+    array([1.32192809, 1.32192809, 0.09953567, 0.09953567, 0.09953567,
+           0.09953567, 0.09953567, 0.09953567, 0.09953567, 0.09953567,
+           0.09953567, 0.09953567, 0.09953567, 0.09953567, 0.09953567,
+           0.09953567, 0.73696559, 0.73696559, 0.73696559, 3.9068906 ])
 """
 import numpy as np
 
diff --git a/pyinform/dist.py b/pyinform/dist.py
index ed78323..044b590 100644
--- a/pyinform/dist.py
+++ b/pyinform/dist.py
@@ -28,7 +28,7 @@ def __init__(self, n):
 
         .. rubric:: Examples:
 
-        ::
+        .. doctest:: Dist
 
             >>> d = Dist(5)
             >>> d = Dist([0,0,1,2])
@@ -67,11 +67,11 @@ def __len__(self):
 
         .. rubric:: Examples:
 
-        ::
+        .. doctest:: Dist
 
             >>> len(Dist(5))
             5
-            >>> len(Dist[0,1,5])
+            >>> len(Dist([0,1,5]))
             3
 
         See also :py:meth:`.counts`.
@@ -93,7 +93,7 @@ def resize(self, n):
 
         .. rubric:: Examples:
 
-        ::
+        .. doctest:: Dist
 
             >>> d = Dist(5)
             >>> d.resize(3)
@@ -103,7 +103,7 @@ def resize(self, n):
             >>> len(d)
             8
 
-        ::
+        .. doctest:: Dist
 
             >>> d = Dist([1,2,3,4])
             >>> d.resize(2)
@@ -129,7 +129,7 @@ def copy(self):
 
         .. rubric:: Examples:
 
-        ::
+        .. doctest:: Dist
 
             >>> d = Dist([1,2,3])
             >>> e = d
@@ -139,7 +139,7 @@ def copy(self):
             >>> list(d)
             [3, 2, 3]
 
-        ::
+        .. doctest:: Dist
 
             >>> f = d.copy()
             >>> f[0] = 1
@@ -161,13 +161,13 @@ def counts(self):
 
         .. rubric:: Examples:
 
-        ::
+        .. doctest:: Dist
 
             >>> d = Dist(5)
             >>> d.counts()
             0
 
-        ::
+        .. doctest:: Dist
 
             >>> d = Dist([1,0,3,2])
             >>> d.counts()
@@ -187,13 +187,13 @@ def valid(self):
 
         .. rubric:: Examples:
 
-        ::
+        .. doctest:: Dist
 
             >>> d = Dist(5)
             >>> d.valid()
             False
 
-        ::
+        .. doctest:: Dist
 
             >>> d = Dist([0,0,0,1])
             >>> d.valid()
@@ -212,13 +212,13 @@ def __getitem__(self, event):
 
         .. rubric:: Examples:
 
-        ::
+        .. doctest:: Dist
 
             >>> d = Dist(2)
             >>> (d[0], d[1])
             (0, 0)
 
-        ::
+        .. doctest:: Dist
 
             >>> d = Dist([0,1])
             >>> (d[0], d[1])
@@ -243,7 +243,7 @@ def __setitem__(self, event, value):
 
         .. rubric:: Examples:
 
-        ::
+        .. doctest:: Dist
 
             >>> d = Dist(2)
             >>> for i, _ in enumerate(d):
@@ -252,7 +252,7 @@ def __setitem__(self, event, value):
             >>> list(d)
             [0, 1]
 
-        ::
+        .. doctest:: Dist
 
             >>> d = Dist([0,1,2,3])
             >>> for i, n in enumerate(d):
@@ -280,7 +280,7 @@ def tick(self, event):
 
         .. rubric:: Examples:
 
-        ::
+        .. doctest:: Dist
 
             >>> d = Dist(5)
             >>> for i, _ in enumerate(d):
@@ -289,7 +289,7 @@ def tick(self, event):
             >>> list(d)
             [1, 1, 1, 1, 1]
 
-        ::
+        .. doctest:: Dist
 
             >>> d = Dist([0,1,2,3])
             >>> for i, _ in enumerate(d):
@@ -315,7 +315,7 @@ def probability(self, event):
 
         .. rubric:: Examples:
 
-        ::
+        .. doctest:: Dist
 
             >>> d = Dist([1,1,1,1])
             >>> for i, _ in enumerate(d):
@@ -343,11 +343,11 @@ def dump(self):
 
         .. rubric:: Examples:
 
-        ::
+        .. doctest:: Dist
 
             >>> d = Dist([1,2,2,1])
             >>> d.dump()
-            array([ 0.16666667,  0.33333333,  0.33333333,  0.16666667])
+            array([0.16666667, 0.33333333, 0.33333333, 0.16666667])
 
         See also :py:meth:`.probability`.
 
diff --git a/pyinform/entropyrate.py b/pyinform/entropyrate.py
index 4e7c91f..3722239 100644
--- a/pyinform/entropyrate.py
+++ b/pyinform/entropyrate.py
@@ -41,29 +41,33 @@
 
 Let's apply the entropy rate to a single initial condition. Typically, you will
 just provide the time series and the history length, and let
-:py:func:`.entropy_rate` take care of the rest: ::
+:py:func:`.entropy_rate` take care of the rest:
+
+.. doctest:: entropy_rate
 
     >>> entropy_rate([0,0,1,1,1,1,0,0,0], k=2)
     0.6792696431662095
     >>> entropy_rate([0,0,1,1,1,1,0,0,0], k=2, local=True)
-    array([[ 1.       ,  0.       ,  0.5849625,  0.5849625,  1.5849625,
-             0.       ,  1.       ]])
+    array([[1.       , 0.       , 0.5849625, 0.5849625, 1.5849625, 0.       ,
+            1.       ]])
     >>> entropy_rate([0,0,1,1,1,1,2,2,2], k=2)
-    0.24830578469386944
+    0.39355535745192416
 
 Multiple Initial Conditions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Of course multiple initial conditions are handled. ::
+Of course multiple initial conditions are handled.
+
+.. doctest:: entropy_rate
 
     >>> series = [[0,0,1,1,1,1,0,0,0], [1,0,0,1,0,0,1,0,0]]
     >>> entropy_rate(series, k=2)
     0.6253491072973907
     >>> entropy_rate(series, k=2, local=True)
-    array([[ 0.4150375,  1.5849625,  0.5849625,  0.5849625,  1.5849625,
-             0.       ,  2.       ],
-           [ 0.       ,  0.4150375,  0.5849625,  0.       ,  0.4150375,
-             0.5849625,  0.       ]])
+    array([[0.4150375, 1.5849625, 0.5849625, 0.5849625, 1.5849625, 0.       ,
+            2.       ],
+           [0.       , 0.4150375, 0.5849625, 0.       , 0.4150375, 0.5849625,
+            0.       ]])
 """
 
 import numpy as np
diff --git a/pyinform/mutualinfo.py b/pyinform/mutualinfo.py
index 7b773bb..19738cb 100644
--- a/pyinform/mutualinfo.py
+++ b/pyinform/mutualinfo.py
@@ -25,12 +25,12 @@
 Examples
 --------
 
-::
+.. doctest:: mutual_info
 
     >>> xs = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1]
     >>> ys = [0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1]
     >>> mutual_info(xs, ys)
-    0.214170945007629
+    0.21417094500762912
     >>> mutual_info(xs, ys, local=True)
     array([-1.        , -1.        ,  0.22239242,  0.22239242,  0.22239242,
             0.22239242,  0.22239242,  0.22239242,  0.22239242,  0.22239242,
diff --git a/pyinform/relativeentropy.py b/pyinform/relativeentropy.py
index f4bfd13..2fffe9a 100644
--- a/pyinform/relativeentropy.py
+++ b/pyinform/relativeentropy.py
@@ -30,7 +30,7 @@
 Examples
 --------
 
-::
+.. doctest:: relative_entropy
 
         >>> xs = [0,1,0,0,0,0,0,0,0,1]
         >>> ys = [0,1,1,1,1,0,0,1,0,0]
@@ -39,7 +39,7 @@
         >>> relative_entropy(ys, xs)
         0.3219280948873624
 
-::
+.. doctest:: relative_entropy
 
         >>> xs = [0,0,0,0]
         >>> ys = [0,1,1,0]
diff --git a/pyinform/shannon.py b/pyinform/shannon.py
index 6fbb83d..b11ad11 100644
--- a/pyinform/shannon.py
+++ b/pyinform/shannon.py
@@ -19,20 +19,20 @@ def entropy(p, b=2.0):
 
     .. rubric:: Examples:
 
-    ::
+    .. doctest:: shannon
 
         >>> d = Dist([1,1,1,1])
-        >>> entropy(d)
+        >>> shannon.entropy(d)
         2.0
-        >>> entropy(d, 4)
+        >>> shannon.entropy(d, 4)
         1.0
 
-    ::
+    .. doctest:: shannon
 
         >>> d = Dist([2,1])
-        >>> entropy(d)
+        >>> shannon.entropy(d)
         0.9182958340544896
-        >>> entropy(d, b=3)
+        >>> shannon.entropy(d, b=3)
         0.579380164285695
 
     See [Shannon1948a]_ for more details.
@@ -72,13 +72,13 @@ def mutual_info(p_xy, p_x, p_y, b=2.0):
 
     .. rubric:: Examples:
 
-    ::
+    .. doctest:: shannon
 
         >>> xy = Dist([10,70,15,5])
         >>> x = Dist([80,20])
         >>> y = Dist([25,75])
-        >>> mutual_info(xy, x, y)
-        0.214170945007629
+        >>> shannon.mutual_info(xy, x, y)
+        0.21417094500762912
 
     :param p_xy: the joint distribution
     :type p_xy: :py:class:`pyinform.dist.Dist`
@@ -115,14 +115,14 @@ def conditional_entropy(p_xy, p_y, b=2.0):
 
     .. rubric:: Examples:
 
-    ::
+    .. doctest:: shannon
 
         >>> xy = Dist([10,70,15,5])
         >>> x = Dist([80,20])
         >>> y = Dist([25,75])
-        >>> conditional_entropy(xy, x)
+        >>> shannon.conditional_entropy(xy, x)
         0.5971071794515037
-        >>> conditional_entropy(xy, y)
+        >>> shannon.conditional_entropy(xy, y)
         0.5077571498797332
 
     :param p_xy: the joint distribution
@@ -157,18 +157,18 @@ def conditional_mutual_info(p_xyz, p_xz, p_yz, p_z, b=2.0):
                  &= H(X,Z) + H(Y,Z) - H(Z) - H(X,Y,Z)
 
 
-    .. _Conditional mutual information: https://en.wikipedia.org/wiki/Conditional_entropy
+    .. _Conditional mutual information: https://en.wikipedia.org/wiki/Conditional_mutual_information
 
     .. rubric:: Examples:
 
-    ::
+    .. doctest:: shannon
 
         >>> xyz = Dist([24,24,9,6,25,15,10,5])
         >>> xz = Dist([15,9,5,10])
         >>> yz = Dist([9,15,10,15])
         >>> z = Dist([3,5])
-        >>> conditional_mutual_info(xyz, xz, yz, z)
-        0.12594942727460323
+        >>> shannon.conditional_mutual_info(xyz, xz, yz, z)
+        0.12594942727460334
 
     :param p_xyz: the joint distribution
     :type p_xyz: :py:class:`pyinform.dist.Dist`
@@ -207,22 +207,22 @@ def relative_entropy(p, q, b=2.0):
 
     .. rubric:: Examples:
 
-    ::
+    .. doctest:: shannon
 
         >>> p = Dist([4,1])
         >>> q = Dist([1,1])
-        >>> relative_entropy(p,q)
+        >>> shannon.relative_entropy(p,q)
         0.27807190511263774
-        >>> relative_entropy(q,p)
-        0.32192809488736235
+        >>> shannon.relative_entropy(q,p)
+        0.3219280948873624
 
-    ::
+    .. doctest:: shannon
 
         >>> p = Dist([1,0])
         >>> q = Dist([1,1])
-        >>> relative_entropy(p,q)
+        >>> shannon.relative_entropy(p,q)
         1.0
-        >>> relative_entropy(q,p)
+        >>> shannon.relative_entropy(q,p)
         nan
 
     :param p: the *posterior* distribution
diff --git a/pyinform/transferentropy.py b/pyinform/transferentropy.py
index d1d3338..af99d40 100644
--- a/pyinform/transferentropy.py
+++ b/pyinform/transferentropy.py
@@ -48,74 +48,78 @@
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Just give us a couple of time series and tell us the history length and we'll
-give you a number ::
+give you a number
+
+.. doctest:: transfer_entropy
 
     >>> xs = [0,0,1,1,1,1,0,0,0]
     >>> ys = [0,1,1,1,1,0,0,0,1]
     >>> transfer_entropy(ys, xs, k=1)
-    0.8112781244591329
+    0.8112781244591327
     >>> transfer_entropy(ys, xs, k=2)
-    0.6792696431662095
+    0.6792696431662097
     >>> transfer_entropy(xs, ys, k=1)
-    0.21691718668869964
+    0.21691718668869922
     >>> transfer_entropy(xs, ys, k=2) # pesky floating-point math
-    -2.220446049250313e-16
+    0.0
+
+or an array if you ask for it
 
-or an array if you ask for it ::
+.. doctest:: transfer_entropy
 
     >>> transfer_entropy(ys, xs, k=1, local=True)
-    array([[ 0.4150375,  2.       ,  0.4150375,  0.4150375,  0.4150375,
-             2.       ,  0.4150375,  0.4150375]])
+    array([[0.4150375, 2.       , 0.4150375, 0.4150375, 0.4150375, 2.       ,
+            0.4150375, 0.4150375]])
     >>> transfer_entropy(ys, xs, k=2, local=True)
-    array([[ 1.       ,  0.       ,  0.5849625,  0.5849625,  1.5849625,
-             0.       ,  1.       ]])
+    array([[1.       , 0.       , 0.5849625, 0.5849625, 1.5849625, 0.       ,
+            1.       ]])
     >>> transfer_entropy(xs, ys, k=1, local=True)
     array([[ 0.4150375,  0.4150375, -0.169925 , -0.169925 ,  0.4150375,
              1.       , -0.5849625,  0.4150375]])
     >>> transfer_entropy(xs, ys, k=2, local=True)
-    array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.]])
+    array([[0., 0., 0., 0., 0., 0., 0.]])
 
 Multiple Initial Conditions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Uhm, yes we can! (Did you really expect anything less?) ::
+Uhm, yes we can! (Did you really expect anything less?)
+
+.. doctest:: transfer_entropy
 
     >>> xs = [[0,0,1,1,1,1,0,0,0], [1,0,0,0,0,1,1,1,0]]
     >>> ys = [[1,0,0,0,0,1,1,1,1], [1,1,1,1,0,0,0,1,1]]
     >>> transfer_entropy(ys, xs, k=1)
-    0.8828560636920486
+    0.8828560636920488
     >>> transfer_entropy(ys, xs, k=2)
-    0.6935361388961918
+    0.693536138896192
     >>> transfer_entropy(xs, ys, k=1)
-    0.15969728512148262
+    0.15969728512148243
     >>> transfer_entropy(xs, ys, k=2)
     0.0
 
 
-And local too::
+And local too
+
+.. doctest:: transfer_entropy
 
     >>> transfer_entropy(ys, xs, k=1, local=True)
-    array([[ 0.4150375 ,  2.        ,  0.67807191,  0.67807191,  0.67807191,
-             1.4150375 ,  0.4150375 ,  0.4150375 ],
-           [ 1.4150375 ,  0.4150375 ,  0.4150375 ,  0.4150375 ,  2.        ,
-             0.67807191,  0.67807191,  1.4150375 ]])
+    array([[0.4150375 , 2.        , 0.67807191, 0.67807191, 0.67807191,
+            1.4150375 , 0.4150375 , 0.4150375 ],
+           [1.4150375 , 0.4150375 , 0.4150375 , 0.4150375 , 2.        ,
+            0.67807191, 0.67807191, 1.4150375 ]])
     >>> transfer_entropy(ys, xs, k=2, local=True)
-    array([[ 1.32192809,  0.        ,  0.73696559,  0.73696559,  1.32192809,
-             0.        ,  0.73696559],
-           [ 0.        ,  0.73696559,  0.73696559,  1.32192809,  0.        ,
-             0.73696559,  1.32192809]])
+    array([[1.32192809, 0.        , 0.73696559, 0.73696559, 1.32192809,
+            0.        , 0.73696559],
+           [0.        , 0.73696559, 0.73696559, 1.32192809, 0.        ,
+            0.73696559, 1.32192809]])
     >>> transfer_entropy(xs, ys, k=1, local=True)
     array([[ 0.5849625 ,  0.48542683, -0.25153877, -0.25153877,  0.48542683,
              0.36257008, -0.22239242, -0.22239242],
            [ 0.36257008, -0.22239242, -0.22239242,  0.5849625 ,  0.48542683,
             -0.25153877,  0.48542683,  0.36257008]])
     >>> transfer_entropy(xs, ys, k=2, local=True)
-    array([[  0.00000000e+00,  -2.22044605e-16,  -2.22044605e-16,
-             -2.22044605e-16,   0.00000000e+00,  -2.22044605e-16,
-             -2.22044605e-16],
-           [ -2.22044605e-16,  -2.22044605e-16,  -2.22044605e-16,
-              0.00000000e+00,  -2.22044605e-16,  -2.22044605e-16,
-              0.00000000e+00]])
+    array([[0., 0., 0., 0., 0., 0., 0.],
+           [0., 0., 0., 0., 0., 0., 0.]])
 """
 import numpy as np
 
diff --git a/pyinform/utils/binning.py b/pyinform/utils/binning.py
index 3f1ac3a..5d8291e 100644
--- a/pyinform/utils/binning.py
+++ b/pyinform/utils/binning.py
@@ -32,11 +32,12 @@ def series_range(series):
     """
     Compute the range of a continuously-valued time series.
 
-    Examples: ::
+    Examples:
+
+    .. doctest:: utils
 
-        >>> from pyinform import utils
         >>> utils.series_range([0,1,2,3,4,5])
-        (5, 0, 5)
+        (5.0, 0.0, 5.0)
         >>> utils.series_range([-0.1, 8.5, 0.02, -6.3])
         (14.8, -6.3, 8.5)
 
@@ -67,20 +68,24 @@ def bin_series(series, b=None, step=None, bounds=None):
     .. rubric:: 1. Specified Number of Bins
 
     The first is binning the time series into *b* uniform bins (with *b* an
-    integer). ::
+    integer).
+
+    .. doctest:: utils
 
-        >>> from pyinform import utils
         >>> import numpy as np
+        >>> np.random.seed(2019)
         >>> xs = 10 * np.random.rand(20)
         >>> xs
-        array([ 6.62004974,  7.24471972,  0.76670198,  2.66306833,  4.32200795,
-                8.84902227,  6.83491844,  7.05008074,  3.79287646,  6.50844032,
-                8.68804879,  6.79543773,  0.3222078 ,  7.39576325,  7.54150189,
-                1.06422897,  1.91958431,  2.34760945,  3.90139184,  3.08885353])
+        array([9.03482214, 3.93080507, 6.23969961, 6.37877401, 8.80499069,
+               2.99172019, 7.0219827 , 9.03206161, 8.81381926, 4.05749798,
+               4.52446621, 2.67070324, 1.6286487 , 8.89214695, 1.48476226,
+               9.84723485, 0.32361219, 5.15350754, 2.01129047, 8.86010874])
         >>> utils.bin_series(xs, b=2)
-        (array([1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0], dtype=int32), 2, 4.263407236635026)
+        (array([1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1],
+              dtype=int32), 2, 4.761811327822174)
         >>> utils.bin_series(xs, b=3)
-        (array([2, 2, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 0, 0, 0, 1, 0], dtype=int32), 3, 2.8422714910900173)
+        (array([2, 1, 1, 1, 2, 0, 2, 2, 2, 1, 1, 0, 0, 2, 0, 2, 0, 1, 0, 2],
+              dtype=int32), 3, 3.1745408852147823)
 
 
     With this approach the binned sequence (as an ``numpy.ndarray``), the number
@@ -91,12 +96,16 @@ def bin_series(series, b=None, step=None, bounds=None):
 
     .. rubric:: 2. Fixed Size Bins
 
-    The second type of binning produces bins of a specific size *step*.::
+    The second type of binning produces bins of a specific size *step*.
+
+    .. doctest:: utils
 
         >>> utils.bin_series(xs, step=4.0)
-        (array([1, 1, 0, 0, 0, 2, 1, 1, 0, 1, 2, 1, 0, 1, 1, 0, 0, 0, 0, 0], dtype=int32), 3, 4.0)
+        (array([2, 0, 1, 1, 2, 0, 1, 2, 2, 0, 1, 0, 0, 2, 0, 2, 0, 1, 0, 2],
+              dtype=int32), 3, 4.0)
         >>> utils.bin_series(xs, step=2.0)
-        (array([3, 3, 0, 1, 1, 4, 3, 3, 1, 3, 4, 3, 0, 3, 3, 0, 0, 1, 1, 1], dtype=int32), 5, 2.0)
+        (array([4, 1, 2, 3, 4, 1, 3, 4, 4, 1, 2, 1, 0, 4, 0, 4, 0, 2, 0, 4],
+              dtype=int32), 5, 2.0)
 
     As in the previous case the binned sequence, the number of bins, and the
     size of each bin are returned.
@@ -110,12 +119,16 @@ def bin_series(series, b=None, step=None, bounds=None):
     The third type of binning is breaks the real number line into segments with
     specified boundaries or thresholds, and the time series is binned according
     to this partitioning. The bounds are expected to be provided in ascending
-    order.::
+    order.
+
+    .. doctest:: utils
 
         >>> utils.bin_series(xs, bounds=[2.0, 7.5])
-        (array([1, 1, 0, 1, 1, 2, 1, 1, 1, 1, 2, 1, 0, 1, 2, 0, 0, 1, 1, 1], dtype=int32), 3, [2.0, 7.5])
+        (array([2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 0, 2, 0, 2, 0, 1, 1, 2],
+              dtype=int32), 3, [2.0, 7.5])
         >>> utils.bin_series(xs, bounds=[2.0])
-        (array([1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1], dtype=int32), 2, [2.0])
+        (array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1],
+              dtype=int32), 2, [2.0])
 
     Unlike the previous two types of binning, this approach returns the specific
     bounds rather than the bin sizes. The other two returns, the binned
diff --git a/pyinform/utils/coalesce.py b/pyinform/utils/coalesce.py
index 07ca8b8..cf0926f 100644
--- a/pyinform/utils/coalesce.py
+++ b/pyinform/utils/coalesce.py
@@ -19,7 +19,9 @@ def coalesce_series(series):
     relative number of states.
 
     This function thus provides a way of "compressing" a time series into as
-    small a base as possible. For example ::
+    small a base as possible. For example
+
+    .. doctest:: utils
 
         >>> utils.coalesce_series([2,9,2,9,9])
         (array([0, 1, 0, 1, 1], dtype=int32), 2)
@@ -32,11 +34,15 @@ def coalesce_series(series):
     The two standard usage cases for this function are to reduce the base of a
     time series
 
+    .. doctest:: utils
+
         >>> utils.coalesce_series([0,2,0,2,0,2])
         (array([0, 1, 0, 1, 0, 1], dtype=int32), 2)
 
     or ensure that the states are non-negative
 
+    .. doctest:: utils
+
         >>> utils.coalesce_series([-8,2,6,-2,4])
         (array([0, 2, 4, 1, 3], dtype=int32), 5)
 
diff --git a/pyinform/utils/encoding.py b/pyinform/utils/encoding.py
index 3273b9a..8ced4f1 100644
--- a/pyinform/utils/encoding.py
+++ b/pyinform/utils/encoding.py
@@ -39,25 +39,27 @@ def encode(state, b=None):
     significant bits of the encoded integer are determined by the left-most
     end of the unencoded state.
 
-        >>> from pyinform.utils import *
-        >>> encode([0,0,1], b=2)
+    .. doctest:: utils
+
+        >>> utils.encode([0,0,1], b=2)
         1
-        >>> encode([0,1,0], b=3)
+        >>> utils.encode([0,1,0], b=3)
         3
-        >>> encode([1,0,0], b=4)
+        >>> utils.encode([1,0,0], b=4)
         16
-        >>> encode([1,0,4], b=5)
+        >>> utils.encode([1,0,4], b=5)
         29
 
     If *b* is not provided (or is None), the base is inferred from the state
     with a minimum value of 2.
 
-        >>> from pyinform.utils import *
-        >>> encode([0,0,2])
+    .. doctest:: utils
+
+        >>> utils.encode([0,0,2])
         2
-        >>> encode([0,2,0])
+        >>> utils.encode([0,2,0])
         6
-        >>> encode([1,2,1])
+        >>> utils.encode([1,2,1])
         16
 
     See also :py:func:`.decode`.
@@ -94,24 +96,30 @@ def decode(encoding, b, n=None):
     The provided encoded state is decoded using the `big-endian`__ encoding
     scheme.
 
-        >>> decode(2, b=2, n=2)
+    .. doctest:: utils
+
+        >>> utils.decode(2, b=2, n=2)
         array([1, 0], dtype=int32)
-        >>> decode(6, b=2, n=3)
+        >>> utils.decode(6, b=2, n=3)
         array([1, 1, 0], dtype=int32)
-        >>> decode(6, b=3, n=2)
+        >>> utils.decode(6, b=3, n=2)
         array([2, 0], dtype=int32)
 
     Note that the base *b* must be provided, but the number of digits *n* is
     optional. If it is provided then the decoded state will have exactly that
     many elements.
 
-        >>> decode(2, b=2, n=4)
+    .. doctest:: utils
+
+        >>> utils.decode(2, b=2, n=4)
         array([0, 0, 1, 0], dtype=int32)
 
     However, if *n* is too small to contain a full representation of the state,
     an error will be raised.
 
-        >>> decode(6, b=2, n=2)
+    .. doctest:: utils
+
+        >>> utils.decode(6, b=2, n=2)
         Traceback (most recent call last):
           File "<stdin>", line 1, in <module>
           File "/home/ubuntu/workspace/pyinform/utils/encoding.py", line 126, in decode
@@ -123,21 +131,25 @@ def decode(encoding, b, n=None):
     If *n* is not provided, the length of the decoded state is as small as
     possible:
 
-        >>> decode(1, b=2)
+    .. doctest:: utils
+
+        >>> utils.decode(1, b=2)
         array([1], dtype=int32)
-        >>> decode(1, b=3)
+        >>> utils.decode(1, b=3)
         array([1], dtype=int32)
-        >>> decode(3, b=2)
+        >>> utils.decode(3, b=2)
         array([1, 1], dtype=int32)
-        >>> decode(3, b=3)
+        >>> utils.decode(3, b=3)
         array([1, 0], dtype=int32)
-        >>> decode(3, b=4)
+        >>> utils.decode(3, b=4)
         array([3], dtype=int32)
 
     Of course :py:func:`.encode` and :py:func:`.decode` play well together.
 
+    .. doctest:: utils
+
         >>> for i in range(100):
-        ...     assert(encode(decode(i, b=2)) == i)
+        ...     assert(utils.encode(utils.decode(i, b=2)) == i)
         ...
         >>>