Skip to content

Commit

Permalink
Use unicode-normalization module
Browse files Browse the repository at this point in the history
  • Loading branch information
neet committed Aug 25, 2024
1 parent 201f599 commit 3f39355
Showing 5 changed files with 20 additions and 5 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@

A collection of utility for with the Ainu language

## Rleases
## Releases

ainu-utils is distributed as a Rust crate, but you can also use its binding for Python and Node.js.

2 changes: 1 addition & 1 deletion ainu-utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -6,6 +6,6 @@ description = "A collection of utilities for the Ainu language"
license = "MIT"

[dependencies]
diacritics = "0.2.2"
once_cell = "1.19.0"
regex = "1.10.2"
unicode-normalization = "0.1.23"
9 changes: 7 additions & 2 deletions ainu-utils/src/kana/kana.rs
Original file line number Diff line number Diff line change
@@ -2,11 +2,16 @@ use super::constants::{CONSONANTS, SPECIAL_CONSONANTS, VOWELS};
use super::linking::link;
use super::maps::{TABLE_1, TABLE_2};
use super::symbols::symbols;
use diacritics::remove_diacritics;
use unicode_normalization::char::is_combining_mark;
use unicode_normalization::UnicodeNormalization;

fn strip_accents(input: &str) -> String {
input.nfkd().filter(|c| !is_combining_mark(*c)).collect()
}

fn normalize(mut input: String) -> String {
input = input.to_lowercase();
input = remove_diacritics(&input).to_string();
input = strip_accents(input.as_str()).to_string();
input
}

5 changes: 5 additions & 0 deletions ainu-utils/src/kana/kana_test.rs
Original file line number Diff line number Diff line change
@@ -163,3 +163,8 @@ fn test_k_prefix() {
"イランカラㇷ゚テ。 カニ アナㇰ イモ ケ エアㇱカイ クㇽ クネ。"
)
}

#[test]
fn test_diacritics() {
assert_eq!(to_kana("kamúy"), "カムイ")
}
7 changes: 6 additions & 1 deletion cspell.json
Original file line number Diff line number Diff line change
@@ -3,7 +3,12 @@
"ignorePaths": [],
"dictionaryDefinitions": [],
"dictionaries": [],
"words": ["bindgen", "pyfunction", "pymodule"],
"words": [
"bindgen",
"nfkd",
"pyfunction",
"pymodule"
],
"ignoreWords": [],
"import": []
}

0 comments on commit 3f39355

Please sign in to comment.