Merge pull request #446 from mj-will/auto-generate-doc-images

DOC: auto generate doc images and code outputs
mj-will · Jan 3, 2025 · a54c870 · a54c870
2 parents 1ff98a9 + d844216
commit a54c870
Show file tree

Hide file tree

Showing 14 changed files with 197 additions and 96 deletions.
diff --git a/docs/.gitignore b/docs/.gitignore
@@ -0,0 +1 @@
+example_run
diff --git a/docs/assets/insertion_indices.png b/docs/assets/insertion_indices.png
diff --git a/docs/assets/logXlogL.png b/docs/assets/logXlogL.png
diff --git a/docs/assets/posterior_distribution.png b/docs/assets/posterior_distribution.png
diff --git a/docs/assets/state.png b/docs/assets/state.png
diff --git a/docs/assets/trace.png b/docs/assets/trace.png
diff --git a/docs/conf.py b/docs/conf.py
@@ -1,8 +1,21 @@
 import os
+import subprocess
 import sys
 
 import nessai
 
+# -- Run pre-build steps script ----------------------------------------------
+
+
+def run_external_script(script_path):
+    """Run a script to generate plots."""
+    script_path = os.path.abspath(script_path)
+    subprocess.run(["python", script_path], check=True)
+
+
+# Generate plots from an example run
+run_external_script("scripts/example_run.py")
+
 # -- Path setup --------------------------------------------------------------
 
 sys.path.insert(0, os.path.abspath("../src/nessai/"))
@@ -26,6 +39,8 @@
     "sphinx.ext.inheritance_diagram",
     "sphinx_tabs.tabs",
     "autoapi.extension",
+    "IPython.sphinxext.ipython_console_highlighting",
+    "IPython.sphinxext.ipython_directive",
 ]
 templates_path = ["_templates"]
 exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

diff --git a/docs/further-details.rst b/docs/further-details.rst
@@ -5,54 +5,33 @@ Further details
 Using live points
 =================
 
-Live points in ``nessai`` are stored in numpy structured arrays. These are array have fields which can be accessed like the values of dictionaries but they can also be indexed. ``nessai`` include various functions for constructing live point arrays and also converting these arrays to other common python formats, see mod:`nessai.livepoint`
+Live points in ``nessai`` are stored in numpy structured arrays. These are array have fields which can be accessed like the values of dictionaries but they can also be indexed.
+``nessai`` include various functions for constructing live point arrays and also converting these arrays to other common python formats, see :py:mod:`nessai.livepoint`
 
 Here's an example of constructing a live point:
 
-.. code-block:: python
+.. ipython:: python
 
-    >>> from nessai.livepoint import parameters_to_live_point
-    >>> x = parameters_to_live_point([1, 2], ['a', 'b'])
-    >>> print(x)    # the live point
-    (1., 2., 0., 0.)
-    >>> print(x.dtype.names)
-    ('a', 'b', 'logP', 'logL')
-    >>> print(x['a'])    # the value of parameter 'a'
-    1.0
+    from nessai.livepoint import parameters_to_live_point
+    x = parameters_to_live_point([1, 2], ['a', 'b'])
+    print(x)    # the live point
+    print(x.dtype.names)
+    print(x['a'])    # the value of parameter 'a'
 
 
 Importantly the structured arrays used for live points can also contain multiple live points:
 
-.. code-block:: python
+.. ipython:: python
 
-    >>> from nessai.livepoint import numpy_array_to_live_points
-    >>> import numpy as np
-    >>> rng = np.random.default_rng()
-    >>> x = rng.random((10, 2))    # 10 live points with 2 parameters
-    >>> print(x)
-    [[0.72451217 0.1788154 ]
-     [0.31549832 0.55898106]
-     [0.74000576 0.73103116]
-     [0.37362176 0.25791271]
-     [0.61056168 0.05940721]
-     [0.33988486 0.54106604]
-     [0.82653691 0.14523437]
-     [0.62390321 0.32606928]
-     [0.21743918 0.23915047]
-     [0.45478996 0.09699358]]
-    >>> y = numpy_array_to_live_points(x, ['a', 'b'])    # call the parameters a and b
-    >>> print(y)
-    array([(0.72451217, 0.1788154 , 0., 0.), (0.31549832, 0.55898106, 0., 0.),
-           (0.74000576, 0.73103116, 0., 0.), (0.37362176, 0.25791271, 0., 0.),
-           (0.61056168, 0.05940721, 0., 0.), (0.33988486, 0.54106604, 0., 0.),
-           (0.82653691, 0.14523437, 0., 0.), (0.62390321, 0.32606928, 0., 0.),
-           (0.21743918, 0.23915047, 0., 0.), (0.45478996, 0.09699358, 0., 0.)],
-          dtype=[('a', '<f8'), ('b', '<f8'), ('logP', '<f8'), ('logL', '<f8')])
-    >>> y['a']    # all the values of a
-    array([0.72451217, 0.31549832, 0.74000576, 0.37362176, 0.61056168,
-           0.33988486, 0.82653691, 0.62390321, 0.21743918, 0.45478996])
-    >>> y[0]      # the first live point
-    (0.72451217, 0.1788154, 0., 0.)
+    from nessai.livepoint import numpy_array_to_live_points
+    import numpy as np
+    rng = np.random.default_rng(42)
+    x = rng.random((10, 2))    # 10 live points with 2 parameters
+    print(x)
+    y = numpy_array_to_live_points(x, ['a', 'b'])    # call the parameters a and b
+    print(y)
+    y['a']    # all the values of a
+    y[0]      # the first live point
 
 
 Using analytic priors
@@ -151,10 +130,3 @@ via :py:class:`~nessai.flowsampler.FlowSampler`.
 .. warning::
     The checkpoint callback is not included in the output of :python:`__getstate__`
     and must be specified when resuming the sampler via :py:class:`~nessai.flowsampler.FlowSampler`.
-
-
-Detailed explanation of outputs
-===============================
-
-.. note::
-    This section has not been completed yet
diff --git a/docs/gaussian-example.rst b/docs/gaussian-example.rst
@@ -13,51 +13,5 @@ Code
 Output
 ======
 
-In this examples the sampler with save the outputs to ``outdir/2d_examples/``. The following is a explanation of the files in that directory.
-
-Posterior distribution
-----------------------
-
-The posterior distribution is plotted in ``posterior_distribution.png``, this includes the distributions for the parameters that were sampled and the distribution of the log-prior and log-likelihood.
-
-.. image:: assets/posterior_distribution.png
-    :alt: posterior_distribution.png
-
-
-Trace
------
-
-The trace plot shows the nested samples for each parameter as a function of the log-prior volume.
-
-.. image:: assets/trace.png
-    :alt: trace.png
-
-
-State
------
-
-The *state* plot shows all the statistics which are tracked during sampling as a function of iteration. From top to bottom these are
-
-* The minimum and maximum log-likelihood of the current set of live points
-* The cumulative number of likelihood evaluations
-* The current log-evidence :math:`\log Z` and fraction change in evidence :math:`\text{d}Z`
-* The acceptance of the population and proposal stages alongside the radius use for each population stage.
-* The :math:`p`-value of the insertion indices every ``nlive`` live points
-
-The iterations at which the normalising flow has been trained are indicated with vertical lines and total sampling-time is shown at the top of the plot.
-
-.. image:: assets/state.png
-    :alt: state.png
-
-
-Insertion indices
------------------
-
-The distribution of the insertion indices for all of the nested samples is shown on the left along with the expect uniform distribution and the 1-sigma bounds determined by the total number of live points. The cumulative mass function is shown on the right where the uniform function is shown with a dashed line, the overall distribution shown in blue and the distribution every ``nlive`` live point shown in light grey.
-
-
-.. image:: assets/insertion_indices.png
-    :alt: insertion_indices.png
-
-
-This plot is useful when checking if the sampler is correctly converged, a non-uniform distribution indicates the sampler is either under or over-constrained.
+In this examples the sampler with save the outputs to ``outdir/2d_examples/``.
+For an explanation of the outputs see :ref:`understanding the outputs`.
diff --git a/docs/index.rst b/docs/index.rst
@@ -15,6 +15,7 @@ For questions or other support, please either use our `gitter room <https://app.
 
    installation
    running-the-sampler
+   outputs
    sampler-configuration
    importance-nested-sampling
    reparameterisations
@@ -72,6 +73,10 @@ If you find ``nessai`` useful in your work please cite the DOI for this code and
       archivePrefix = "arXiv",
       primaryClass = "astro-ph.IM",
       reportNumber = "LIGO-P2200283",
-      month = "2",
+      doi = "10.1088/2632-2153/acd5aa",
+      journal = "Mach. Learn. Sci. Tech.",
+      volume = "4",
+      number = "3",
+      pages = "035011",
       year = "2023"
    }
diff --git a/docs/outputs.rst b/docs/outputs.rst
@@ -0,0 +1,135 @@
+Understanding the outputs
+=========================
+
+This page describes the outputs of the standard sampler and how to interpret them.
+
+Logging output
+--------------
+
+If the logger has been configured, the sampler will output various information
+to the terminal and/or log file. By default, the logging level is set to
+``INFO`` which will output the progress of the sampler and any warnings or
+errors.
+
+By default, the sampler with log every :code:`nlive` iterations and the log will
+look something like this:
+
+.. code-block:: console
+
+        12-20 12:26 nessai INFO    : it:  6000: Rolling KS test: D=0.0325, p-value=0.0143
+        12-20 12:26 nessai INFO    : it:  6000: n eval: 23744 H: 3.10 dlogZ: 4.172 logZ: -8.996 +/- 0.039 logLmax: -1.84
+
+The first line summarises the results of the Kolmogorov-Smirnov test for
+the insertion indices. The second line shows the following:
+
+- :code:`n eval` is the total number of likelihood evaluations
+- :code:`H` is the current information
+- :code:`dlogZ` is the change in log-evidence, this is used as the stopping criterion
+- :code:`logZ` is the current log-evidence
+- :code:`logLmax` is the maximum log-likelihood
+
+
+Configuring logging
+^^^^^^^^^^^^^^^^^^^
+
+The logger is configured via the :py:func:`nessai.utils.logging.configure_logger` function.
+This allows the user to change logging level, output file and format as well as
+some other options. For more information see documentation for the function.
+
+The logging output from the sampler can also be configured to change its verbosity
+and frequency. This is done via the following options:
+
+- :code:`log_on_iteration` sets whether the sampler logs on iteration or time.
+- :code:`logging_interval` sets the interval at which the sampler logs information. If :code:`log_on_iteration` is set to :code:`True`, the sampler will log every :code:`logging_interval` iterations. If :code:`log_on_iteration` is set to :code:`False`, the sampler will log every :code:`logging_interval` seconds.
+
+These options can be passed when configuring the sampler. The standard sampler
+and importance nested sampler have different defaults for these options.
+
+Output files
+------------
+
+The sampler will output various files to the output directory. These include
+files describing the sampler configuration, a result file, files for resuming
+the sampler and various plots (see :ref:`plots`). The specific files are:
+
+- :code:`config.json` - A JSON file containing the configuration of the sampler.
+- :code:`result.hdf5` or `result.json` - A file containing the results of the sampler. The default format is `hdf5` but this can be changed to `json` by setting `result_extension='json'` in the sampler configuration.
+- :code:`nested_sampler_resume.pkl` - A pickle file containing the state of the sampler which can be used to resume the sampler.
+
+There are also various subdirectories which are created by the sampler:
+
+- :code:`proposal` - Contains the proposal config (:code:`flow_config.json`), file for resuming (:code:`model.pt`) and any plots that are enabled.
+- :code:`diagnostics` - Contains additional diagnostic plots.
+
+
+Plots
+-----
+
+If :code:`plot=True`, the sampler will automatically generate various plots
+which are saved in the output directory. These plots are useful for checking
+the convergence of the sampler and the quality of the samples.
+
+Posterior distribution
+^^^^^^^^^^^^^^^^^^^^^^^
+
+The posterior distribution is plotted in ``posterior_distribution.png``, this
+includes the distributions for the parameters that were sampled and the
+distribution of the log-prior, log-likelihood and the iteration at which the
+sample was drawn.
+
+.. image:: example_run/posterior_distribution.png
+    :alt: posterior_distribution.png
+
+
+Trace
+^^^^^
+
+The trace plot shows the nested samples for each parameter as a function of the log-prior volume.
+Whilst the sampler is running, the current live points will be shown in red.
+
+.. image:: example_run/trace.png
+    :alt: trace.png
+
+
+State
+^^^^^
+
+The *state* plot shows all the statistics which are tracked during sampling as a function of iteration. From top to bottom these are
+
+* The minimum and maximum log-likelihood of the current set of live points
+* The cumulative number of likelihood evaluations
+* The current log-evidence :math:`\log Z` and fraction change in evidence :math:`\text{d}Z`
+* The acceptance of the population and proposal stages alongside the radius use for each population stage.
+* The :math:`p`-value of the insertion indices every ``nlive`` live points
+
+The iterations at which the normalising flow has been trained are indicated with vertical lines and total sampling-time is shown at the top of the plot.
+
+.. image:: example_run/state.png
+    :alt: state.png
+
+
+Insertion indices
+^^^^^^^^^^^^^^^^^^
+
+The distribution of the insertion indices for all of the nested samples is shown
+on the left along with the expect uniform distribution and the 1-sigma bounds
+determined by the total number of live points.
+
+The middle and right-hand plots show the difference between the analytic
+and estimated cumulative mass functions. The middle plot shows the difference
+between the CMFs for the entire run and the right-hand plot shows the difference
+for 8 equally sized sections of the run, lighter colours indicate later sections.
+
+
+.. image:: example_run/insertion_indices.png
+    :alt: insertion_indices.png
+
+
+This plot is useful when checking if the sampler is correctly converged, a non-uniform distribution indicates the sampler is either under or over-constrained.
+
+
+Diagnostic plots
+^^^^^^^^^^^^^^^^
+
+Additional diagnostic plots are saved in the `diagnostics` directory. These show
+the distribution of the insertion indices every :code:`nlive` iterations.
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -3,3 +3,6 @@ numpydoc
 sphinx-autoapi
 sphinx_tabs
 sphinx-book-theme
+nessai
+nessai-models
+ipython
diff --git a/docs/running-the-sampler.rst b/docs/running-the-sampler.rst
@@ -63,7 +63,7 @@ Once the sampler has converged the results and other automatically generated plo
 * two resume files (``.pkl``) used for resuming the sampler.
 * ``config.json``: the exact configuration used for the sampler.
 
-For a more detail explanation of outputs and examples, see :ref:`here<Detailed explanation of outputs>`
+For a more detail explanation of outputs and examples, see :ref:`here<understanding the outputs>`
 
 
 Complete examples

diff --git a/docs/scripts/example_run.py b/docs/scripts/example_run.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+"""
+Script to generate example plots for a run with nessai
+"""
+
+from nessai_models import Gaussian
+
+from nessai.flowsampler import FlowSampler
+
+output_dir = "example_run"
+model = Gaussian()
+
+fs = FlowSampler(
+    model, nlive=500, output=output_dir, checkpointing=False, resume=False
+)
+fs.run()