Skip to content

Commit

Permalink
changed topN of evaluator to 1 & added more typo options
Browse files Browse the repository at this point in the history
  • Loading branch information
bab2min committed Apr 14, 2024
1 parent e91047b commit c6f2427
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 15 deletions.
4 changes: 2 additions & 2 deletions tools/Evaluator.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ class Evaluator
std::vector<TestResult> testsets, errors;
const kiwi::Kiwi* kw = nullptr;
kiwi::Match matchOption;
size_t topN = 3;
size_t topN = 1;
public:
Evaluator(const std::string& testSetFile, const kiwi::Kiwi* _kw, kiwi::Match _matchOption = kiwi::Match::all, size_t topN = 3);
Evaluator(const std::string& testSetFile, const kiwi::Kiwi* _kw, kiwi::Match _matchOption = kiwi::Match::all, size_t topN = 1);
void run();
Score evaluate();
const std::vector<TestResult>& getErrors() const { return errors; }
Expand Down
51 changes: 38 additions & 13 deletions tools/evaluator_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,28 @@ using namespace std;
using namespace kiwi;

int doEvaluate(const string& modelPath, const string& output, const vector<string>& input,
bool normCoda, bool zCoda, bool useSBG, float typoCostWeight, bool cTypo)
bool normCoda, bool zCoda, bool multiDict, bool useSBG,
float typoCostWeight, bool bTypo, bool cTypo,
int repeat)
{
try
{
if (typoCostWeight > 0 && !bTypo && !cTypo)
{
bTypo = true;
}
else if (typoCostWeight == 0)
{
bTypo = false;
cTypo = false;
}

DefaultTypoSet typos[] = { DefaultTypoSet::withoutTypo, DefaultTypoSet::basicTypoSet, DefaultTypoSet::continualTypoSet, DefaultTypoSet::basicTypoSetWithContinual};

tutils::Timer timer;
Kiwi kw = KiwiBuilder{ modelPath, 1, BuildOption::default_, useSBG }.build(
typoCostWeight > 0 ? (cTypo ? DefaultTypoSet::basicTypoSetWithContinual : DefaultTypoSet::basicTypoSet) : DefaultTypoSet::withoutTypo
auto option = (BuildOption::default_ & ~BuildOption::loadMultiDict) | (multiDict ? BuildOption::loadMultiDict : BuildOption::none);
Kiwi kw = KiwiBuilder{ modelPath, 1, option, useSBG }.build(
typos[(bTypo ? 1 : 0) + (cTypo ? 2 : 0)]
);
if (typoCostWeight > 0) kw.setTypoCostWeight(typoCostWeight);

Expand All @@ -34,10 +49,13 @@ int doEvaluate(const string& modelPath, const string& output, const vector<strin
cout << "Test file: " << tf << endl;
try
{
Evaluator test{ tf, &kw, (normCoda ? Match::allWithNormalizing : Match::all) & ~(zCoda ? Match::none : Match::zCoda)};
Evaluator test{ tf, &kw, (normCoda ? Match::allWithNormalizing : Match::all) & ~(zCoda ? Match::none : Match::zCoda) };
tutils::Timer total;
test.run();
double tm = total.getElapsed();
for (int i = 0; i < repeat; ++i)
{
test.run();
}
double tm = total.getElapsed() / repeat;
auto result = test.evaluate();

cout << result.micro << ", " << result.macro << endl;
Expand Down Expand Up @@ -93,21 +111,27 @@ int main(int argc, const char* argv[])

ValueArg<string> model{ "m", "model", "Kiwi model path", false, "ModelGenerator", "string" };
ValueArg<string> output{ "o", "output", "output dir for evaluation errors", false, "", "string" };
SwitchArg withoutNormCoda{ "", "wcoda", "without normalizing coda", false };
SwitchArg withoutZCoda{ "", "wzcoda", "without z-coda", false };
SwitchArg noNormCoda{ "", "no-normcoda", "without normalizing coda", false };
SwitchArg noZCoda{ "", "no-zcoda", "without z-coda", false };
SwitchArg noMulti{ "", "no-multi", "turn off multi dict", false };
SwitchArg useSBG{ "", "sbg", "use SkipBigram", false };
ValueArg<float> typoTolerant{ "", "typo", "make typo-tolerant model", false, 0.f, "float"};
ValueArg<float> typoWeight{ "", "typo", "typo weight", false, 0.f, "float"};
SwitchArg bTypo{ "", "btypo", "make basic-typo-tolerant model", false };
SwitchArg cTypo{ "", "ctypo", "make continual-typo-tolerant model", false };
ValueArg<int> repeat{ "", "repeat", "repeat evaluation for benchmark", false, 1, "int" };
UnlabeledMultiArg<string> files{ "files", "evaluation set files", true, "string" };

cmd.add(model);
cmd.add(output);
cmd.add(files);
cmd.add(withoutNormCoda);
cmd.add(withoutZCoda);
cmd.add(noNormCoda);
cmd.add(noZCoda);
cmd.add(noMulti);
cmd.add(useSBG);
cmd.add(typoTolerant);
cmd.add(typoWeight);
cmd.add(bTypo);
cmd.add(cTypo);
cmd.add(repeat);

try
{
Expand All @@ -118,6 +142,7 @@ int main(int argc, const char* argv[])
cerr << "error: " << e.error() << " for arg " << e.argId() << endl;
return -1;
}
return doEvaluate(model, output, files.getValue(), !withoutNormCoda, !withoutZCoda, useSBG, typoTolerant, cTypo);
return doEvaluate(model, output, files.getValue(),
!noNormCoda, !noZCoda, !noMulti, useSBG, typoWeight, bTypo, cTypo, repeat);
}

0 comments on commit c6f2427

Please sign in to comment.