Skip to content

Commit

Permalink
added loadMultiDict options
Browse files Browse the repository at this point in the history
  • Loading branch information
bab2min committed Feb 4, 2024
1 parent 71f24e4 commit f3dfb4c
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 5 deletions.
4 changes: 3 additions & 1 deletion include/kiwi/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,9 @@ namespace kiwi

loadTypoDict = 1 << 2, /**< 오타 사전(typo.dict)의 로딩 여부를 설정한다.*/

default_ = integrateAllomorph | loadDefaultDict | loadTypoDict,
loadMultiDict = 1 << 3, /**< 복합명사 사전(multi.dict)의 로딩 여부를 설정한다. 복합명사 사전은 복합명사의 구성 형태소를 저장하고 있다. */

default_ = integrateAllomorph | loadDefaultDict | loadTypoDict | loadMultiDict,
};

struct Morpheme;
Expand Down
3 changes: 2 additions & 1 deletion include/kiwi/capi.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ enum
KIWI_BUILD_INTEGRATE_ALLOMORPH = 1,
KIWI_BUILD_LOAD_DEFAULT_DICT = 2,
KIWI_BUILD_LOAD_TYPO_DICT = 4,
KIWI_BUILD_DEFAULT = 7,
KIWI_BUILD_LOAD_MULTI_DICT = 8,
KIWI_BUILD_DEFAULT = 15,
KIWI_BUILD_MODEL_TYPE_KNLM = 0x0000,
KIWI_BUILD_MODEL_TYPE_SBG = 0x0100,
};
Expand Down
21 changes: 18 additions & 3 deletions src/KiwiBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,11 @@ KiwiBuilder::KiwiBuilder(const string& modelPath, size_t _numThreads, BuildOptio
loadDictionary(modelPath + "/typo.dict");
}

if (!!(options & BuildOption::loadMultiDict))
{
loadDictionary(modelPath + "/multi.dict");
}

{
ifstream ifs;
combiningRule = make_shared<cmb::CompiledRule>(cmb::RuleSet{ openFile(ifs, modelPath + string{ "/combiningRule.txt" }) }.compile());
Expand Down Expand Up @@ -1807,10 +1812,20 @@ Kiwi KiwiBuilder::build(const TypoTransformer& typos, float typoCostThreshold) c
auto ptypos = typos.prepare();
for (auto f : sortedForms)
{
for (auto t : ptypos._generate(f->form, typoCostThreshold))
// 현재는 공백이 없는 단일 단어에 대해서만 오타 교정을 수행
// 공백이 포함된 복합 명사류의 경우 오타 후보가 지나치게 많아져
// 메모리 요구량이 급격히 증가하기 때문.
if (f->numSpaces == 0)
{
for (auto t : ptypos._generate(f->form, typoCostThreshold))
{
if (t.leftCond != CondVowel::none && f->vowel != CondVowel::none && t.leftCond != f->vowel) continue;
typoGroup[removeSpace(t.str)].emplace_back(f - ret.forms.data(), t.cost, t.leftCond);
}
}
else
{
if (t.leftCond != CondVowel::none && f->vowel != CondVowel::none && t.leftCond != f->vowel) continue;
typoGroup[removeSpace(t.str)].emplace_back(f - ret.forms.data(), t.cost, t.leftCond);
typoGroup[removeSpace(f->form)].emplace_back(f - ret.forms.data(), 0, CondVowel::none);
}
}

Expand Down

0 comments on commit f3dfb4c

Please sign in to comment.