Skip to content

Commit

Permalink
added TypoTransformer to js bindings
Browse files Browse the repository at this point in the history
  • Loading branch information
RicBent committed Jun 25, 2024
1 parent 05f2cfa commit d76603c
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 9 deletions.
42 changes: 35 additions & 7 deletions bindings/wasm/kiwi_wasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,9 +231,12 @@ json build(const json& args) {

const auto typos = buildArgs.value("typos", json(nullptr));

DefaultTypoSet typoSet = DefaultTypoSet::withoutTypo;
const float typoCostThreshold = buildArgs.value("typoCostThreshold", 2.5f);

if (typos.is_string()) {
if (typos.is_null()) {
instances.emplace(id, builder.build(DefaultTypoSet::withoutTypo, typoCostThreshold));
} else if (typos.is_string()) {
DefaultTypoSet typoSet = DefaultTypoSet::withoutTypo;
const std::string typosStr = typos.get<std::string>();

if (typosStr == "basic") {
Expand All @@ -242,14 +245,39 @@ json build(const json& args) {
typoSet = DefaultTypoSet::continualTypoSet;
} else if (typosStr == "basicWithContinual") {
typoSet = DefaultTypoSet::basicTypoSetWithContinual;
} else {
throw std::runtime_error("Invalid typo set: " + typosStr);
}
}

const float typoCostThreshold = buildArgs.value("typoCostThreshold", 2.5f);
instances.emplace(id, builder.build(typoSet, typoCostThreshold));
} else {
TypoTransformer typoTransformer;

for (const auto& def : typos.value("defs", json::array())) {
const float cost = def.value("cost", 1.0f);

CondVowel condVowel = CondVowel::none;
const std::string condVowelStr = def.value("condVowel", "none");

if (condVowelStr == "any") {
condVowel = CondVowel::any;
} else if (condVowelStr == "vowel") {
condVowel = CondVowel::vowel;
} else if (condVowelStr == "applosive") {
condVowel = CondVowel::applosive;
}

instances.emplace(id, builder.build(typoSet, typoCostThreshold));
for (const auto& orig8 : def["orig"]) {
const auto orig16 = utf8To16(orig8);
for (const auto& error8 : def["error"]) {
typoTransformer.addTypo(orig16, utf8To16(error8), cost, condVowel);
}
}
}

const float continualTypoCost = typos.value("continualTypoCost", 1.0f);
typoTransformer.setContinualTypoCost(continualTypoCost);

instances.emplace(id, builder.build(typoTransformer, typoCostThreshold));
}

return id;
}
Expand Down
1 change: 1 addition & 0 deletions bindings/wasm/package-demo/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 36 additions & 2 deletions bindings/wasm/package/src/build-args.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,38 @@ export interface UserWord {
origWord?: string;
};

export interface TypoDefinition {
/**
* Source strings
*/
orig: string[];
/**
* The typos to be replaced
*/
error: string[];
/**
* Replacement cost. Defaults to 1.
*/
cost?: number;
/**
* Conditions under which typos can be replaced.
* One of `none`, `any` (after any letter), `vowel` (after a vowel), or `applosive` (after an applosive).
* Defaults to `none` when omitted.
*/
condition?: "none" | "any" | "vowel" | "applosive";
}

export interface TypoTransformer {
/**
* A list of {@link TypoDefinition} that define typo generation rules.
*/
defs: TypoDefinition[];
/**
* The cost of continual typos. Defaults to 1.
*/
continualTypoCost?: number;
}

export interface BuildArgs {
/**
* The model files to load. Required.
Expand Down Expand Up @@ -74,9 +106,11 @@ export interface BuildArgs {
*/
modelType?: 'knlm' | 'sbg';
/**
* The typo information to use for correction. Defaults to none, which disables typo correction.
* The typo information to use for correction.
* Can be one of the built in `none`, `basic`, `continual`, `basicWithContinual` typo sets, or a custom {@link TypoTransformer}.
* Defaults to `none`, which disables typo correction.
*/
typos?: 'basic' | 'continual' | 'basic_with_continual';
typos?: 'none' | 'basic' | 'continual' | 'basicWithContinual' | TypoTransformer;
/**
* The maximum typo cost to consider when correcting typos. Typos beyond this cost will not be explored. Defaults to 2.5.
*/
Expand Down

0 comments on commit d76603c

Please sign in to comment.