Skip to content

Commit

Permalink
fixed a bug where AutoJoiner failed with unk tag input
Browse files Browse the repository at this point in the history
  • Loading branch information
bab2min committed Dec 25, 2023
1 parent 3a7ab76 commit 5f44f86
Showing 1 changed file with 12 additions and 5 deletions.
17 changes: 12 additions & 5 deletions src/Joiner.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "Joiner.hpp"
#include "Joiner.hpp"
#include "FrozenTrie.hpp"

using namespace std;
Expand Down Expand Up @@ -300,24 +300,31 @@ namespace kiwi
if (!node) break;
}

// prevent unknown or partial tag
POSTag fixedTag = tag;
if (tag == POSTag::unknown || tag == POSTag::p)
{
fixedTag = POSTag::nnp;
}

if (node && kiwi->formTrie.hasMatch(formHead = node->val(kiwi->formTrie)))
{
Vector<const Morpheme*> cands;
foreachMorpheme(formHead, [&](const Morpheme* m)
{
if (inferRegularity && clearIrregular(m->tag) == clearIrregular(tag))
if (inferRegularity && clearIrregular(m->tag) == clearIrregular(fixedTag))
{
cands.emplace_back(m);
}
else if (!inferRegularity && m->tag == tag)
else if (!inferRegularity && m->tag == fixedTag)
{
cands.emplace_back(m);
}
});

if (cands.size() <= 1)
{
auto lmId = cands.empty() ? getDefaultMorphemeId(clearIrregular(tag)) : cands[0]->lmMorphemeId;
auto lmId = cands.empty() ? getDefaultMorphemeId(clearIrregular(fixedTag)) : cands[0]->lmMorphemeId;
if (!cands.empty()) tag = cands[0]->tag;
for (auto& cand : candidates)
{
Expand Down Expand Up @@ -373,7 +380,7 @@ namespace kiwi
}
else
{
auto lmId = getDefaultMorphemeId(clearIrregular(tag));
auto lmId = getDefaultMorphemeId(clearIrregular(fixedTag));
for (auto& cand : candidates)
{
cand.score += cand.lmState.next(kiwi->langMdl, lmId);
Expand Down

0 comments on commit 5f44f86

Please sign in to comment.