From c318484bf5c26ea54602b861a69d9edcd36ab909 Mon Sep 17 00:00:00 2001
From: Srinivas Vasudevan <srvasude@google.com>
Date: Thu, 6 Jun 2024 11:37:07 -0700
Subject: [PATCH] Separate out header and source files for distributions

---
 cxx/distributions/BUILD                    |  12 ++-
 cxx/distributions/adapter.hh               |   4 +-
 cxx/distributions/beta_bernoulli.cc        |  63 +++++++++++
 cxx/distributions/beta_bernoulli.hh        |  57 ++--------
 cxx/distributions/bigram.cc                | 120 +++++++++++++++++++++
 cxx/distributions/bigram.hh                | 118 +++-----------------
 cxx/distributions/dirichlet_categorical.cc |  65 +++++++++++
 cxx/distributions/dirichlet_categorical.hh |  66 +++---------
 cxx/distributions/normal.hh                |   2 +-
 9 files changed, 293 insertions(+), 214 deletions(-)
 create mode 100644 cxx/distributions/beta_bernoulli.cc
 create mode 100644 cxx/distributions/bigram.cc
 create mode 100644 cxx/distributions/dirichlet_categorical.cc
diff --git a/cxx/distributions/BUILD b/cxx/distributions/BUILD
index df6b4da..61f12b5 100644
--- a/cxx/distributions/BUILD
+++ b/cxx/distributions/BUILD
@@ -1,14 +1,13 @@
 licenses(["notice"])
 
-
 cc_library(
     name = "distributions",
     visibility = ["//:__subpackages__"],
     deps = [
         ":adapter",
         ":base",
-        ":bigram",
         ":beta_bernoulli",
+        ":bigram",
         ":dirichlet_categorical",
         ":normal",
     ],
@@ -31,6 +30,7 @@ cc_library(
 
 cc_library(
     name = "beta_bernoulli",
+    srcs = ["beta_bernoulli.cc"],
     hdrs = ["beta_bernoulli.hh"],
     visibility = ["//:__subpackages__"],
     deps = [
@@ -42,6 +42,7 @@ cc_library(
 
 cc_library(
     name = "bigram",
+    srcs = ["bigram.cc"],
     hdrs = ["bigram.hh"],
     visibility = ["//:__subpackages__"],
     deps = [
@@ -54,7 +55,8 @@ cc_library(
 
 cc_library(
     name = "dirichlet_categorical",
-    srcs = ["dirichlet_categorical.hh"],
+    srcs = ["dirichlet_categorical.cc"],
+    hdrs = ["dirichlet_categorical.hh"],
     visibility = ["//:__subpackages__"],
     deps = [
         ":base",
@@ -64,8 +66,8 @@ cc_library(
 
 cc_library(
     name = "normal",
-    hdrs = ["normal.hh"],
     srcs = ["normal.cc"],
+    hdrs = ["normal.hh"],
     visibility = ["//:__subpackages__"],
     deps = [
         ":base",
@@ -109,8 +111,8 @@ cc_test(
     name = "dirichlet_categorical_test",
     srcs = ["dirichlet_categorical_test.cc"],
     deps = [
-        ":dirichlet_categorical",
         ":beta_bernoulli",
+        ":dirichlet_categorical",
         "@boost//:algorithm",
         "@boost//:test",
     ],
diff --git a/cxx/distributions/adapter.hh b/cxx/distributions/adapter.hh
index aef5a8a..b679990 100644
--- a/cxx/distributions/adapter.hh
+++ b/cxx/distributions/adapter.hh
@@ -56,9 +56,7 @@ class DistributionAdapter : public Distribution<std::string> {
     return to_string(s);
   }
 
-  void transition_hyperparameters() {
-    d->transition_hyperparameters();
-  }
+  void transition_hyperparameters() { d->transition_hyperparameters(); }
 
   ~DistributionAdapter() { delete d; }
 };
diff --git a/cxx/distributions/beta_bernoulli.cc b/cxx/distributions/beta_bernoulli.cc
new file mode 100644
index 0000000..032e09c
--- /dev/null
+++ b/cxx/distributions/beta_bernoulli.cc
@@ -0,0 +1,63 @@
+// Copyright 2024
+// See LICENSE.txt
+
+#include "distributions/beta_bernoulli.hh"
+
+#include <cassert>
+
+#include "util_math.hh"
+
+void BetaBernoulli::incorporate(const double& x) {
+  assert(x == 0 || x == 1);
+  ++N;
+  s += x;
+}
+
+void BetaBernoulli::unincorporate(const double& x) {
+  assert(x == 0 || x == 1);
+  --N;
+  s -= x;
+  assert(0 <= s);
+  assert(0 <= N);
+}
+
+double BetaBernoulli::logp(const double& x) const {
+  assert(x == 0 || x == 1);
+  double log_denom = log(N + alpha + beta);
+  double log_numer = x ? log(s + alpha) : log(N - s + beta);
+  return log_numer - log_denom;
+}
+
+double BetaBernoulli::logp_score() const {
+  double v1 = lbeta(s + alpha, N - s + beta);
+  double v2 = lbeta(alpha, beta);
+  return v1 - v2;
+}
+
+double BetaBernoulli::sample() {
+  double p = exp(logp(1));
+  std::vector<int> items{0, 1};
+  std::vector<double> weights{1 - p, p};
+  int idx = choice(weights, prng);
+  return items[idx];
+}
+
+void BetaBernoulli::transition_hyperparameters() {
+  std::vector<double> logps;
+  std::vector<std::pair<double, double>> hypers;
+  // C++ doesn't yet allow range for-loops over existing variables.  Sigh.
+  for (double alphat : alpha_grid) {
+    for (double betat : beta_grid) {
+      alpha = alphat;
+      beta = betat;
+      double lp = logp_score();
+      if (!std::isnan(lp)) {
+        logps.push_back(logp_score());
+        hypers.push_back(std::make_pair(alpha, beta));
+      }
+    }
+  }
+  int i = sample_from_logps(logps, prng);
+  alpha = hypers[i].first;
+  beta = hypers[i].second;
+}
diff --git a/cxx/distributions/beta_bernoulli.hh b/cxx/distributions/beta_bernoulli.hh
index 6dcea49..d0b0ce7 100644
--- a/cxx/distributions/beta_bernoulli.hh
+++ b/cxx/distributions/beta_bernoulli.hh
@@ -23,57 +23,16 @@ class BetaBernoulli : public Distribution<double> {
     alpha_grid = log_linspace(1e-4, 1e4, 10, true);
     beta_grid = log_linspace(1e-4, 1e4, 10, true);
   }
-  void incorporate(const double& x) {
-    assert(x == 0 || x == 1);
-    ++N;
-    s += x;
-  }
-  void unincorporate(const double& x) {
-    assert(x == 0 || x == 1);
-    --N;
-    s -= x;
-    assert(0 <= s);
-    assert(0 <= N);
-  }
 
-  double logp(const double& x) const {
-    assert(x == 0 || x == 1);
-    double log_denom = log(N + alpha + beta);
-    double log_numer = x ? log(s + alpha) : log(N - s + beta);
-    return log_numer - log_denom;
-  }
+  void incorporate(const double& x);
 
-  double logp_score() const {
-    double v1 = lbeta(s + alpha, N - s + beta);
-    double v2 = lbeta(alpha, beta);
-    return v1 - v2;
-  }
+  void unincorporate(const double& x);
 
-  double sample() {
-    double p = exp(logp(1));
-    std::vector<int> items{0, 1};
-    std::vector<double> weights{1 - p, p};
-    int idx = choice(weights, prng);
-    return items[idx];
-  }
+  double logp(const double& x) const;
 
-  void transition_hyperparameters() {
-    std::vector<double> logps;
-    std::vector<std::pair<double, double>> hypers;
-    // C++ doesn't yet allow range for-loops over existing variables.  Sigh.
-    for (double alphat : alpha_grid) {
-      for (double betat : beta_grid) {
-        alpha = alphat;
-        beta = betat;
-        double lp = logp_score();
-        if (!std::isnan(lp)) {
-          logps.push_back(logp_score());
-          hypers.push_back(std::make_pair(alpha, beta));
-        }
-      }
-    }
-    int i = sample_from_logps(logps, prng);
-    alpha = hypers[i].first;
-    beta = hypers[i].second;
-  }
+  double logp_score() const;
+
+  double sample();
+
+  void transition_hyperparameters();
 };
diff --git a/cxx/distributions/bigram.cc b/cxx/distributions/bigram.cc
new file mode 100644
index 0000000..147b729
--- /dev/null
+++ b/cxx/distributions/bigram.cc
@@ -0,0 +1,120 @@
+// Copyright 2024
+// See LICENSE.txt
+
+#include "distributions/bigram.hh"
+
+#include <cassert>
+
+#include "distributions/base.hh"
+
+void Bigram::assert_valid_char(const char c) const {
+  assert(c >= ' ' && c <= '~');
+}
+
+size_t Bigram::char_to_index(const char c) const {
+  assert_valid_char(c);
+  return c - ' ';
+}
+
+char Bigram::index_to_char(const size_t i) const {
+  const char c = i + ' ';
+  assert_valid_char(c);
+  return c;
+}
+
+std::vector<size_t> Bigram::string_to_indices(const std::string& str) const {
+  // Convert the string to a vector of indices between 0 and `num_chars`,
+  // with a start/stop symbol at the beginning/end.
+  std::vector<size_t> inds = {num_chars};
+  for (const char& c : str) {
+    inds.push_back(char_to_index(c));
+  }
+  inds.push_back(num_chars);
+  return inds;
+}
+
+void Bigram::incorporate(const std::string& x) {
+  const std::vector<size_t> indices = string_to_indices(x);
+  for (size_t i = 0; i != indices.size() - 1; ++i) {
+    transition_dists[indices[i]].incorporate(indices[i + 1]);
+  }
+  ++N;
+}
+
+void Bigram::unincorporate(const std::string& s) {
+  const std::vector<size_t> indices = string_to_indices(s);
+  for (size_t i = 0; i != indices.size() - 1; ++i) {
+    transition_dists[indices[i]].unincorporate(indices[i + 1]);
+  }
+  --N;
+}
+
+double Bigram::logp(const std::string& s) const {
+  const std::vector<size_t> indices = string_to_indices(s);
+  double total_logp = 0.0;
+  for (size_t i = 0; i != indices.size() - 1; ++i) {
+    total_logp += transition_dists[indices[i]].logp(indices[i + 1]);
+    // Incorporate each value so that subsequent probabilities are
+    // conditioned on it.
+    transition_dists[indices[i]].incorporate(indices[i + 1]);
+  }
+  for (size_t i = 0; i != indices.size() - 1; ++i) {
+    transition_dists[indices[i]].unincorporate(indices[i + 1]);
+  }
+  return total_logp;
+}
+
+double Bigram::logp_score() const {
+  double logp = 0;
+  for (const auto& d : transition_dists) {
+    logp += d.logp_score();
+  }
+  return logp;
+}
+
+std::string Bigram::sample() {
+  std::string sampled_string;
+  // TODO(emilyaf): Reconsider the reserved length and maybe enforce a
+  // max length.
+  sampled_string.reserve(30);
+  // Sample the first character conditioned on the stop/start symbol.
+  size_t current_ind = num_chars;
+  size_t next_ind = transition_dists[current_ind].sample();
+  transition_dists[current_ind].incorporate(next_ind);
+  current_ind = next_ind;
+
+  // Sample additional characters until the stop/start symbol is sampled.
+  // Incorporate the sampled character at each loop iteration so that
+  // subsequent samples are conditioned on its observation.
+  while (current_ind != num_chars) {
+    sampled_string += index_to_char(current_ind);
+    next_ind = transition_dists[current_ind].sample();
+    transition_dists[current_ind].incorporate(next_ind);
+    current_ind = next_ind;
+  }
+  unincorporate(sampled_string);
+  return sampled_string;
+}
+
+void Bigram::set_alpha(double alphat) {
+  alpha = alphat;
+  for (auto& trans_dist : transition_dists) {
+    trans_dist.alpha = alpha;
+  }
+}
+
+void Bigram::transition_hyperparameters() {
+  std::vector<double> logps;
+  std::vector<double> alphas;
+  // C++ doesn't yet allow range for-loops over existing variables.  Sigh.
+  for (double alphat : ALPHA_GRID) {
+    set_alpha(alphat);
+    double lp = logp_score();
+    if (!std::isnan(lp)) {
+      logps.push_back(logp_score());
+      alphas.push_back(alphat);
+    }
+  }
+  int i = sample_from_logps(logps, prng);
+  set_alpha(alphas[i]);
+}
diff --git a/cxx/distributions/bigram.hh b/cxx/distributions/bigram.hh
index 07f633f..6fc664a 100644
--- a/cxx/distributions/bigram.hh
+++ b/cxx/distributions/bigram.hh
@@ -2,33 +2,19 @@
 // See LICENSE.txt
 
 #pragma once
-#include <cassert>
 
-#include "base.hh"
-#include "dirichlet_categorical.hh"
+#include "distributions/base.hh"
+#include "distributions/dirichlet_categorical.hh"
 
 class Bigram : public Distribution<std::string> {
  private:
-  void assert_valid_char(const char c) const { assert(c >= ' ' && c <= '~'); }
-  size_t char_to_index(const char c) const {
-    assert_valid_char(c);
-    return c - ' ';
-  }
-  char index_to_char(const size_t i) const {
-    const char c = i + ' ';
-    assert_valid_char(c);
-    return c;
-  }
-  std::vector<size_t> string_to_indices(const std::string& str) const {
-    // Convert the string to a vector of indices between 0 and `num_chars`,
-    // with a start/stop symbol at the beginning/end.
-    std::vector<size_t> inds = {num_chars};
-    for (const char& c : str) {
-      inds.push_back(char_to_index(c));
-    }
-    inds.push_back(num_chars);
-    return inds;
-  }
+  void assert_valid_char(const char c) const;
+
+  size_t char_to_index(const char c) const;
+
+  char index_to_char(const size_t i) const;
+
+  std::vector<size_t> string_to_indices(const std::string& str) const;
 
  public:
   double alpha = 1;  // hyperparameter for all transition distributions.
@@ -48,89 +34,17 @@ class Bigram : public Distribution<std::string> {
     }
   }
 
-  void incorporate(const std::string& x) {
-    const std::vector<size_t> indices = string_to_indices(x);
-    for (size_t i = 0; i != indices.size() - 1; ++i) {
-      transition_dists[indices[i]].incorporate(indices[i + 1]);
-    }
-    ++N;
-  }
+  void incorporate(const std::string& x);
 
-  void unincorporate(const std::string& s) {
-    const std::vector<size_t> indices = string_to_indices(s);
-    for (size_t i = 0; i != indices.size() - 1; ++i) {
-      transition_dists[indices[i]].unincorporate(indices[i + 1]);
-    }
-    --N;
-  }
+  void unincorporate(const std::string& s);
 
-  double logp(const std::string& s) const {
-    const std::vector<size_t> indices = string_to_indices(s);
-    double total_logp = 0.0;
-    for (size_t i = 0; i != indices.size() - 1; ++i) {
-      total_logp += transition_dists[indices[i]].logp(indices[i + 1]);
-      // Incorporate each value so that subsequent probabilities are
-      // conditioned on it.
-      transition_dists[indices[i]].incorporate(indices[i + 1]);
-    }
-    for (size_t i = 0; i != indices.size() - 1; ++i) {
-      transition_dists[indices[i]].unincorporate(indices[i + 1]);
-    }
-    return total_logp;
-  }
+  double logp(const std::string& s) const;
 
-  double logp_score() const {
-    double logp = 0;
-    for (const auto& d : transition_dists) {
-      logp += d.logp_score();
-    }
-    return logp;
-  }
+  double logp_score() const;
 
-  std::string sample() {
-    std::string sampled_string;
-    // TODO(emilyaf): Reconsider the reserved length and maybe enforce a
-    // max length.
-    sampled_string.reserve(30);
-    // Sample the first character conditioned on the stop/start symbol.
-    size_t current_ind = num_chars;
-    size_t next_ind = transition_dists[current_ind].sample();
-    transition_dists[current_ind].incorporate(next_ind);
-    current_ind = next_ind;
-
-    // Sample additional characters until the stop/start symbol is sampled.
-    // Incorporate the sampled character at each loop iteration so that
-    // subsequent samples are conditioned on its observation.
-    while (current_ind != num_chars) {
-      sampled_string += index_to_char(current_ind);
-      next_ind = transition_dists[current_ind].sample();
-      transition_dists[current_ind].incorporate(next_ind);
-      current_ind = next_ind;
-    }
-    unincorporate(sampled_string);
-    return sampled_string;
-  }
+  std::string sample();
 
-  void set_alpha(double alphat) {
-    alpha = alphat;
-    for (auto& trans_dist : transition_dists) {
-      trans_dist.alpha = alpha;
-    }
-  }
+  void set_alpha(double alphat);
 
-  void transition_hyperparameters() {
-    std::vector<double> logps;
-    std::vector<double> alphas;
-    // C++ doesn't yet allow range for-loops over existing variables.  Sigh.
-    for (double alphat : ALPHA_GRID) {
-      set_alpha(alphat);
-      double lp = logp_score();
-      if (!std::isnan(lp)) {
-        logps.push_back(logp_score());
-        alphas.push_back(alphat);
-      }
-    }
-    int i = sample_from_logps(logps, prng);
-    set_alpha(alphas[i]);
-  }
+  void transition_hyperparameters();
 };
diff --git a/cxx/distributions/dirichlet_categorical.cc b/cxx/distributions/dirichlet_categorical.cc
new file mode 100644
index 0000000..c153ec9
--- /dev/null
+++ b/cxx/distributions/dirichlet_categorical.cc
@@ -0,0 +1,65 @@
+// Copyright 2024
+// See LICENSE.txt
+
+#include "distributions/dirichlet_categorical.hh"
+
+#include <algorithm>
+#include <cassert>
+#include <random>
+
+#include "util_math.hh"
+
+void DirichletCategorical::incorporate(const double& x) {
+  assert(x >= 0 && x < counts.size());
+  counts[size_t(x)] += 1;
+  ++N;
+}
+
+void DirichletCategorical::unincorporate(const double& x) {
+  const size_t y = x;
+  assert(y < counts.size());
+  counts[y] -= 1;
+  --N;
+  assert(0 <= counts[y]);
+  assert(0 <= N);
+}
+
+double DirichletCategorical::logp(const double& x) const {
+  assert(x >= 0 && x < counts.size());
+  const double numer = log(alpha + counts[size_t(x)]);
+  const double denom = log(N + alpha * counts.size());
+  return numer - denom;
+}
+
+double DirichletCategorical::logp_score() const {
+  const size_t k = counts.size();
+  const double a = alpha * k;
+  double lg = 0;
+  for (size_t x : counts) {
+    lg += lgamma(static_cast<double>(x) + alpha);
+  }
+  return lgamma(a) - lgamma(a + N) + lg - k * lgamma(alpha);
+}
+double DirichletCategorical::sample() {
+  std::vector<double> weights(counts.size());
+  std::transform(counts.begin(), counts.end(), weights.begin(),
+                 [&](size_t y) -> double { return y + alpha; });
+  int idx = choice(weights, prng);
+  return double(idx);
+}
+
+void DirichletCategorical::transition_hyperparameters() {
+  std::vector<double> logps;
+  std::vector<double> alphas;
+  // C++ doesn't yet allow range for-loops over existing variables.  Sigh.
+  for (double alphat : ALPHA_GRID) {
+    alpha = alphat;
+    double lp = logp_score();
+    if (!std::isnan(lp)) {
+      logps.push_back(logp_score());
+      alphas.push_back(alpha);
+    }
+  }
+  int i = sample_from_logps(logps, prng);
+  alpha = alphas[i];
+}
diff --git a/cxx/distributions/dirichlet_categorical.hh b/cxx/distributions/dirichlet_categorical.hh
index b17d59d..0fe7150 100644
--- a/cxx/distributions/dirichlet_categorical.hh
+++ b/cxx/distributions/dirichlet_categorical.hh
@@ -2,12 +2,9 @@
 // See LICENSE.txt
 
 #pragma once
-#include <algorithm>
-#include <cassert>
-#include <iostream>
 #include <random>
 
-#include "base.hh"
+#include "distributions/base.hh"
 #include "util_math.hh"
 
 #define ALPHA_GRID \
@@ -25,54 +22,15 @@ class DirichletCategorical : public Distribution<double> {
     this->prng = prng;
     counts = std::vector<int>(k, 0);
   }
-  void incorporate(const double& x) {
-    assert(x >= 0 && x < counts.size());
-    counts[size_t(x)] += 1;
-    ++N;
-  }
-  void unincorporate(const double& x) {
-    const size_t y = x;
-    assert(y < counts.size());
-    counts[y] -= 1;
-    --N;
-    assert(0 <= counts[y]);
-    assert(0 <= N);
-  }
-  double logp(const double& x) const {
-    assert(x >= 0 && x < counts.size());
-    const double numer = log(alpha + counts[size_t(x)]);
-    const double denom = log(N + alpha * counts.size());
-    return numer - denom;
-  }
-  double logp_score() const {
-    const size_t k = counts.size();
-    const double a = alpha * k;
-    double lg = 0;
-    for (size_t x : counts) {
-      lg += lgamma(static_cast<double>(x) + alpha);
-    }
-    return lgamma(a) - lgamma(a + N) + lg - k * lgamma(alpha);
-  }
-  double sample() {
-    std::vector<double> weights(counts.size());
-    std::transform(counts.begin(), counts.end(), weights.begin(),
-                   [&](size_t y) -> double { return y + alpha; });
-    int idx = choice(weights, prng);
-    return double(idx);
-  }
-  void transition_hyperparameters() {
-    std::vector<double> logps;
-    std::vector<double> alphas;
-    // C++ doesn't yet allow range for-loops over existing variables.  Sigh.
-    for (double alphat : ALPHA_GRID) {
-      alpha = alphat;
-      double lp = logp_score();
-      if (!std::isnan(lp)) {
-        logps.push_back(logp_score());
-        alphas.push_back(alpha);
-      }
-    }
-    int i = sample_from_logps(logps, prng);
-    alpha = alphas[i];
-  }
+  void incorporate(const double& x);
+
+  void unincorporate(const double& x);
+
+  double logp(const double& x) const;
+
+  double logp_score() const;
+
+  double sample();
+
+  void transition_hyperparameters();
 };
diff --git a/cxx/distributions/normal.hh b/cxx/distributions/normal.hh
index e230f79..aed4552 100644
--- a/cxx/distributions/normal.hh
+++ b/cxx/distributions/normal.hh
@@ -50,7 +50,7 @@ class Normal : public Distribution<double> {
 
   void unincorporate(const double& x);
 
-  void posterior_hypers(double *mprime, double *sprime) const;
+  void posterior_hypers(double* mprime, double* sprime) const;
 
   double logp(const double& x) const;