Skip to content

Commit

Permalink
Merge branch 'main' into add-black
Browse files Browse the repository at this point in the history
  • Loading branch information
chanomkaimuk committed Aug 10, 2024
2 parents 2eace61 + fb4f045 commit ff1c04f
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 13 deletions.
34 changes: 22 additions & 12 deletions commands/tocfl/chewing.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,18 +72,12 @@
"yuan": "ㄩㄢ",
"yun": "ㄩㄣ",
"yong": "ㄩㄥ",
# v is used to replace ü in typing
"nü": "ㄋㄩ",
"lü": "ㄌㄩ",
"nv": "ㄋㄩ",
"lv": "ㄌㄩ",
}

PINYIN_CENTER = {
"i": "ㄧ",
"u": "ㄨ",
"u": "ㄨ", # also ㄩ
"ü": "ㄩ",
"v": "ㄩ",
}

# The designer of Hanyu Pinyin used e to represent both 「ㄜ」 and 「ㄝ」.
Expand Down Expand Up @@ -170,24 +164,36 @@ def match_chewing(string: str, index: int, target: dict[str, str]):
# Resolve duplicates
if target == PINYIN_COMBINED:
if target_str == "uan" and string[index - 1] in [
"y",
"j",
"q",
"x",
]:
result = "ㄩㄢ"
elif target_str == "un" and string[index - 1] in [
"y",
"j",
"q",
"x",
]:
result = "ㄩㄣ"
elif target == PINYIN_CENTER:
if target_str == "u" and string[index - 1] in ["j", "q", "x"]:
result = "ㄩ"
elif target == PINYIN_FINALS:
if target_str == "e" and string[index - 1] == "y":
if target_str == "e" and string[index - 1] in "iü":
result = "ㄝ"
# TODO separate those which can have j, q, x as the initial constant
# FIXME ugly bad code
elif target_str == "en" and (
string[index - 1] in ["j", "q", "x"]
or string[index - 2] in ["j", "q", "x"]
):
continue

return (index + i, result)
if target == PINYIN_COMBINED:
if forms_new_word(string, index + i):
return (index + i, result)
else:
return (index + i, result)
return (index + 1, None)


Expand All @@ -204,6 +210,8 @@ def forms_new_word(pinyin: str, index: int):
def to_chewing(pinyin: str) -> str:
# Remove leading and trailing spaces
pinyin = pinyin.strip()
# Handle all capital letters and lower-case letters
pinyin = pinyin.lower()

# Temporarily store the chewing tones and original index
tones = []
Expand Down Expand Up @@ -237,7 +245,9 @@ def to_chewing(pinyin: str) -> str:

else:
initial = match_chewing(pinyin, index, PINYIN_INITIALS)
assert initial[1], f"Failed to match initial in '{pinyin}' at index {index - 1}"
assert initial[
1
], f"Failed to match initial in '{pinyin}' at index {index - 1}"
index = initial[0]
chewing += initial[1] # ㄍ
combined = match_chewing(pinyin, index, PINYIN_COMBINED)
Expand Down
17 changes: 17 additions & 0 deletions commands/tocfl/db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from modules.supabase import supabaseClient

TABLE = "tocfl"

def get_random_tocfl_choices_from_db(num_choices=5):
command_name = "get_random_unique_pinyin"
data, c = supabaseClient.rpc(
command_name,
{"number_of_entries": num_choices},
).execute()
if c == 0:
return None
return data[1]

if __name__ == "__main__":
choices = get_random_tocfl_choices_from_db()
print(choices)
115 changes: 115 additions & 0 deletions commands/tocfl/quiz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from .db import get_random_tocfl_choices_from_db
import discord
from typing import List, Tuple
import pandas as pd
from random import shuffle
from .chewing import to_chewing

# sys.path.append(str(Path(__file__).parent.parent.parent)) # I hate python imports
from modules.quiz.multiple_choice import MultipleChoiceView, QuizChoice
from discord.app_commands import Choice
NUM_ROWS_CHOICES = [Choice(name=i, value=i) for i in range(2, 5+1)] # multiple choice options are from 2 to 5


def _db_results_to_df(results: List[dict]) -> pd.DataFrame:
df = pd.DataFrame(results)
# for "vocab" and "pinyin" columns: only keep the first part of the string
df["vocab"] = df["vocab"].apply(lambda x: x.split("/")[0])
df["pinyin"] = df["pinyin"].apply(lambda x: x.split("/")[0])
# add zhuyin column
try:
df["zhuyin"] = df["pinyin"].apply(to_chewing).str.replace("\u3000", " ") # replace full-width space with half-width space for now
# add "pronunciation" column as a combination of pinyin and zhuyin
df["pronunciation"] = df["pinyin"] + " / " + df["zhuyin"]
# drop the "pinyin" and "zhuyin" columns
df.drop(columns=["pinyin", "zhuyin"], inplace=True)
except Exception as e:
df["pronunciation"] = df["pinyin"]
df.drop(columns=["pinyin"], inplace=True)
return df.set_index("id")

def df_results_to_choices(df: pd.DataFrame,
num_choices:int,
is_ask_pronunciation:bool=True
) -> Tuple[List[QuizChoice], str]:
col_to_display, col_to_ask = ("pronunciation", "vocab") if is_ask_pronunciation else ("vocab", "pronunciation")
# select one row as the correct answer
correct_row_index = df.sample().index[0]
vocab_to_display = df.loc[correct_row_index, col_to_ask]
# convert to QuizChoice object
correct_choice = QuizChoice(df.loc[correct_row_index, col_to_display], is_correct=True)
# select incorrect choices as rows that do not have the same pronunciation as the correct answer
incorrect_rows = df[df[col_to_display] != correct_choice.label].sample(num_choices-1)
incorrect_choices = [QuizChoice(row[col_to_display], is_correct=False) for _, row in incorrect_rows.iterrows()]
# shuffle the choices
choices = [correct_choice] + incorrect_choices
shuffle(choices)
return choices, vocab_to_display

def register_quiz_subcommand(
tocfl_group: discord.app_commands.Group,
):
@tocfl_group.command(
name="quiz-pronunciation",
description="Guess the pronunciation of the given TOCFL word",
)
@discord.app_commands.describe(is_private="Whether the quiz should be private")
@discord.app_commands.choices(num_rows=NUM_ROWS_CHOICES)
async def tocfl_quiz_pronunciation(
interaction: discord.Interaction,
num_rows: Choice[int] = 4,
is_private: bool = False,
):
# get random choices from the database
choices = get_random_tocfl_choices_from_db(num_choices=num_rows)
# convert to DataFrame
df = _db_results_to_df(choices)
# convert to QuizChoice objects
choices, vocab_to_ask = df_results_to_choices(df, num_rows, is_ask_pronunciation=True)
# create the view
view = MultipleChoiceView(choices=choices)
# send the message
await interaction.response.send_message(
f"Choose the correct pronunciation for: {vocab_to_ask}",
view=view,
ephemeral=is_private,
)


@tocfl_group.command(
name="quiz-vocab",
description="Guess the character of the given TOCFL pronunciation",
)
@discord.app_commands.describe(is_private="Whether the quiz should be private")
@discord.app_commands.choices(num_rows=NUM_ROWS_CHOICES)
async def tocfl_quiz_vocab(
interaction: discord.Interaction,
num_rows: Choice[int] = 4,
is_private: bool = False,
):
# get random choices from the database
choices = get_random_tocfl_choices_from_db(num_choices=num_rows)
# convert to DataFrame
df = _db_results_to_df(choices)
# convert to QuizChoice objects
choices, vocab_to_ask = df_results_to_choices(df, num_rows, is_ask_pronunciation=False)
# create the view
view = MultipleChoiceView(choices=choices)
# send the message
await interaction.response.send_message(
f"Choose the correct answer for: {vocab_to_ask}",
view=view,
ephemeral=is_private,
)




if __name__ == "__main__":
num_choices = 4
choices = get_random_tocfl_choices_from_db(num_choices=num_choices)
df = _db_results_to_df(choices)
choices, vocab_to_ask = df_results_to_choices(df, num_choices, is_ask_pronunciation=False)
print(f"Choose the correct answer for: {vocab_to_ask}")
print("Choices:")
print('\n'.join(['* ' + choice.label for choice in choices]))
4 changes: 3 additions & 1 deletion commands/tocfl/tocfl.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from random import randint
from .consts import TOCFL_LEVELS_CHOICES, TOCFL_LEVELS
from .chewing import to_chewing

from .quiz import register_quiz_subcommand

def register_commands(
tree: discord.app_commands.CommandTree,
Expand All @@ -16,6 +16,8 @@ def register_commands(
):
tocfl_group = app_commands.Group(name="tocfl", description="TOCFL commands")

register_quiz_subcommand(tocfl_group)

@tocfl_group.command(
name="random",
description="Get a random TOCFL word",
Expand Down

0 comments on commit ff1c04f

Please sign in to comment.