Skip to content

Commit

Permalink
fix: chewing issues
Browse files Browse the repository at this point in the history
  • Loading branch information
JuniorSuperTux committed Aug 6, 2024
1 parent 7353bc1 commit 74cad83
Showing 1 changed file with 30 additions and 20 deletions.
50 changes: 30 additions & 20 deletions commands/tocfl/chewing.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,18 +72,12 @@
"yuan": "ㄩㄢ",
"yun": "ㄩㄣ",
"yong": "ㄩㄥ",
# v is used to replace ü in typing
"nü": "ㄋㄩ",
"lü": "ㄌㄩ",
"nv": "ㄋㄩ",
"lv": "ㄌㄩ",
}

PINYIN_CENTER = {
"i": "ㄧ",
"u": "ㄨ",
"u": "ㄨ", # also ㄩ
"ü": "ㄩ",
"v": "ㄩ",
}

# The designer of Hanyu Pinyin used e to represent both 「ㄜ」 and 「ㄝ」.
Expand Down Expand Up @@ -170,24 +164,36 @@ def match_chewing(string: str, index: int, target: dict[str, str]):
# Resolve duplicates
if target == PINYIN_COMBINED:
if target_str == "uan" and string[index - 1] in [
"y",
"j",
"q",
"x",
]:
result = "ㄩㄢ"
elif target_str == "un" and string[index - 1] in [
"y",
"j",
"q",
"x",
]:
result = "ㄩㄣ"
elif target == PINYIN_CENTER:
if target_str == "u" and string[index - 1] in ["j", "q", "x"]:
result = "ㄩ"
elif target == PINYIN_FINALS:
if target_str == "e" and string[index - 1] == "y":
if target_str == "e" and string[index - 1] in "iü":
result = "ㄝ"
# TODO separate those which can have j, q, x as the initial constant
# FIXME ugly bad code
elif target_str == "en" and (
string[index - 1] in ["j", "q", "x"]
or string[index - 2] in ["j", "q", "x"]
):
continue

return (index + i, result)
if target == PINYIN_COMBINED:
if forms_new_word(string, index + i):
return (index + i, result)
else:
return (index + i, result)
return (index + 1, None)


Expand All @@ -203,7 +209,9 @@ def forms_new_word(pinyin: str, index: int):

def to_chewing(pinyin: str) -> str:
# Remove leading and trailing spaces
pinyin = pinyin.strip().lower()
pinyin = pinyin.strip()
# Handle all capital letters and lower-case letters
pinyin = pinyin.lower()

# Temporarily store the chewing tones and original index
tones = []
Expand All @@ -229,33 +237,35 @@ def to_chewing(pinyin: str) -> str:
chewing += "ㄦ¯"
break

# Check matches for independent words
# Check matches for independent words
res = match_chewing(pinyin, index, PINYIN_ALONE)
if res[1] and forms_new_word(pinyin, res[0]):
chewing += res[1] # ㄧㄚ
chewing += res[1] # ㄧㄚ
index = res[0]

else:
initial = match_chewing(pinyin, index, PINYIN_INITIALS)
assert initial[1], f"Failed to match initial in '{pinyin}' at index {index - 1}"
assert initial[
1
], f"Failed to match initial in '{pinyin}' at index {index - 1}"
index = initial[0]
chewing += initial[1] # ㄍ
chewing += initial[1] # ㄍ
combined = match_chewing(pinyin, index, PINYIN_COMBINED)
if combined[1]:
index = combined[0]
chewing += combined[1] # ㄨㄤ
chewing += combined[1] # ㄨㄤ
else:
center = match_chewing(pinyin, index, PINYIN_CENTER)
if center[1]:
chewing += center[1] # ㄍㄨ
chewing += center[1] # ㄍㄨ
index = center[0]
final = match_chewing(pinyin, index, PINYIN_FINALS)
if final[1]:
chewing += final[1] # ㄍㄨㄛ
chewing += final[1] # ㄍㄨㄛ
index = final[0]

if len(tones) and tones[0][0] < index:
chewing += tones.pop(0)[1] # ㄍㄨㄛˊ
chewing += tones.pop(0)[1] # ㄍㄨㄛˊ
else:
chewing += "˙"

Expand Down

0 comments on commit 74cad83

Please sign in to comment.