Skip to content

Commit

Permalink
Fix some more SimpleStringEmission errors caught by the test
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasColthurst committed Jun 13, 2024
1 parent f72023c commit fedb22a
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
7 changes: 7 additions & 0 deletions cxx/emissions/simple_string.hh
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include <cstdio>
#include <unordered_map>
#include "emissions/base.hh"
#include "distributions/beta_bernoulli.hh"
Expand Down Expand Up @@ -156,6 +157,11 @@ class SimpleStringEmission : public Emission<std::string> {
const std::vector<std::string>& corrupted,
std::mt19937* unused_prng) {
std::string clean;
// This implemention does simple voting per absolute string position.
// A better version would first average the corrupted string lengths to
// get a target length for clean, and then for each position i in
// clean, find the mode among the
// corrupted[j][i * corrupted[j].length / clean_length]
size_t i = 0;
while (true) {
std::unordered_map<char, int> counts;
Expand All @@ -178,6 +184,7 @@ class SimpleStringEmission : public Emission<std::string> {
return clean;
}
clean = clean + mode;
++i;
}
}

Expand Down
5 changes: 3 additions & 2 deletions cxx/emissions/simple_string_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ BOOST_AUTO_TEST_CASE(test_simple) {
BOOST_TEST(ss.N == 0);
ss.incorporate(std::make_pair<std::string, std::string>("hello", "hello"));
BOOST_TEST(ss.N == 1);
ss.incorporate(std::make_pair<std::string, std::string>("hello", "hello"));
ss.unincorporate(std::make_pair<std::string, std::string>("hello", "hello"));
BOOST_TEST(ss.N == 0);
BOOST_TEST(orig_lp == ss.logp_score());

Expand All @@ -27,7 +27,8 @@ BOOST_AUTO_TEST_CASE(test_simple) {

std::mt19937 prng;
std::string corrupted = ss.sample_corrupted("clean", &prng);
BOOST_TEST(corrupted == "clean");
BOOST_TEST(corrupted.length() > 3);
BOOST_TEST(corrupted.length() < 7);

BOOST_TEST(ss.propose_clean({"clean", "clean!", "cl5an", "lean"}, &prng)
== "clean");
Expand Down

0 comments on commit fedb22a

Please sign in to comment.