From 43456df9fd1cbb2d152a974cdd2ce066336eb138 Mon Sep 17 00:00:00 2001 From: chanomkaimuk <22185824+chanomkaimuk@users.noreply.github.com> Date: Mon, 5 Aug 2024 22:54:35 +0200 Subject: [PATCH 1/8] hotfix: .lower() pinyin --- commands/tocfl/chewing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commands/tocfl/chewing.py b/commands/tocfl/chewing.py index 96ea3ae..8ac3c20 100644 --- a/commands/tocfl/chewing.py +++ b/commands/tocfl/chewing.py @@ -203,7 +203,7 @@ def forms_new_word(pinyin: str, index: int): def to_chewing(pinyin: str) -> str: # Remove leading and trailing spaces - pinyin = pinyin.strip() + pinyin = pinyin.strip().lower() # Temporarily store the chewing tones and original index tones = [] From f79d74f0d58d29418f28caa5ef9251d7e259e073 Mon Sep 17 00:00:00 2001 From: chanomkaimuk <22185824+chanomkaimuk@users.noreply.github.com> Date: Mon, 5 Aug 2024 22:54:56 +0200 Subject: [PATCH 2/8] call supabase function to get choices --- commands/tocfl/db.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 commands/tocfl/db.py diff --git a/commands/tocfl/db.py b/commands/tocfl/db.py new file mode 100644 index 0000000..996a845 --- /dev/null +++ b/commands/tocfl/db.py @@ -0,0 +1,17 @@ +from modules.supabase import supabaseClient + +TABLE = "tocfl" + +def get_random_tocfl_choices_from_db(num_choices=5): + command_name = "get_random_unique_pinyin" + data, c = supabaseClient.rpc( + command_name, + {"number_of_entries": num_choices}, + ).execute() + if c == 0: + return None + return data[1] + +if __name__ == "__main__": + choices = get_random_tocfl_choices_from_db() + print(choices) \ No newline at end of file From 336334935f2be030d08315482fc0d9874e54b7da Mon Sep 17 00:00:00 2001 From: chanomkaimuk <22185824+chanomkaimuk@users.noreply.github.com> Date: Mon, 5 Aug 2024 22:56:02 +0200 Subject: [PATCH 3/8] fetch and create quiz choices --- commands/tocfl/quiz.py | 50 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 commands/tocfl/quiz.py diff --git a/commands/tocfl/quiz.py b/commands/tocfl/quiz.py new file mode 100644 index 0000000..9042812 --- /dev/null +++ b/commands/tocfl/quiz.py @@ -0,0 +1,50 @@ +from .db import get_random_tocfl_choices_from_db +import discord +from typing import List, Tuple +import pandas as pd +from random import shuffle +from .chewing import to_chewing + +# sys.path.append(str(Path(__file__).parent.parent.parent)) # I hate python imports +from modules.quiz.multiple_choice import MultipleChoiceView, QuizChoice + + +def _db_results_to_df(results: List[dict]) -> pd.DataFrame: + df = pd.DataFrame(results) + # for "vocab" and "pinyin" columns: only keep the first part of the string + df["vocab"] = df["vocab"].apply(lambda x: x.split("/")[0]) + df["pinyin"] = df["pinyin"].apply(lambda x: x.split("/")[0]) + # add zhuyin column + df["zhuyin"] = df["pinyin"].apply(to_chewing).str.replace("\u3000", " ") # replace full-width space with half-width space for now + # add "pronunciation" column as a combination of pinyin and zhuyin + df["pronunciation"] = df["pinyin"] + " / " + df["zhuyin"] + # drop the "pinyin" and "zhuyin" columns + df.drop(columns=["pinyin", "zhuyin"], inplace=True) + return df.set_index("id") + +def df_results_to_choices(df: pd.DataFrame, + num_choices:int, + is_ask_pronunciation:bool=True +) -> Tuple[List[QuizChoice], str]: + col_to_display, col_to_ask = ("pronunciation", "vocab") if is_ask_pronunciation else ("vocab", "pronunciation") + # select one row as the correct answer + correct_row_index = df.sample().index[0] + vocab_to_display = df.loc[correct_row_index, col_to_ask] + # convert to QuizChoice object + correct_choice = QuizChoice(df.loc[correct_row_index, col_to_display], is_correct=True) + # select incorrect choices as rows that do not have the same pronunciation as the correct answer + incorrect_rows = df[df[col_to_display] != correct_choice.label].sample(num_choices-1) + incorrect_choices = [QuizChoice(row[col_to_display], is_correct=False) for _, row in incorrect_rows.iterrows()] + # shuffle the choices + choices = [correct_choice] + incorrect_choices + shuffle(choices) + return choices, vocab_to_display + + +if __name__ == "__main__": + num_choices = 4 + choices = get_random_tocfl_choices_from_db(num_choices=num_choices) + df = _db_results_to_df(choices) + choices, vocab_to_ask = df_results_to_choices(df, num_choices, is_ask_pronunciation=False) + print(f"Choose the correct answer for: {vocab_to_ask}") + print(f"Choices:\n{'\n'.join(['* ' + choice.label for choice in choices])}") \ No newline at end of file From 762c99a5c7368318e7dc402de136c7b8e61f5586 Mon Sep 17 00:00:00 2001 From: chanomkaimuk <22185824+chanomkaimuk@users.noreply.github.com> Date: Mon, 5 Aug 2024 23:15:48 +0200 Subject: [PATCH 4/8] add code skeleton for command registration --- commands/tocfl/quiz.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/commands/tocfl/quiz.py b/commands/tocfl/quiz.py index 9042812..1f2fac8 100644 --- a/commands/tocfl/quiz.py +++ b/commands/tocfl/quiz.py @@ -7,6 +7,8 @@ # sys.path.append(str(Path(__file__).parent.parent.parent)) # I hate python imports from modules.quiz.multiple_choice import MultipleChoiceView, QuizChoice +from discord.app_commands import Choice +NUM_ROWS_CHOICES = [Choice(name=i, value=i) for i in range(2, 5+1)] # multiple choice options are from 2 to 5 def _db_results_to_df(results: List[dict]) -> pd.DataFrame: @@ -40,6 +42,37 @@ def df_results_to_choices(df: pd.DataFrame, shuffle(choices) return choices, vocab_to_display +def register_quiz_subcommand( + tocfl_group: discord.app_commands.Group, +): + @tocfl_group.command( + name="quiz-pronunciation", + description="Guess the pronunciation of the given TOCFL word", + ) + @discord.app_commands.describe(is_private="Whether the quiz should be private") + @discord.app_commands.choices(num_rows=NUM_ROWS_CHOICES) + async def tocfl_quiz_pronunciation( + interaction: discord.Interaction, + num_rows: Choice[int] = None, + is_private: bool = False, + ): + pass + + @tocfl_group.command( + name="quiz-vocab", + description="Guess the character of the given TOCFL pronunciation", + ) + @discord.app_commands.describe(is_private="Whether the quiz should be private") + @discord.app_commands.choices(num_rows=NUM_ROWS_CHOICES) + async def tocfl_quiz_pronunciation( + interaction: discord.Interaction, + num_rows: Choice[int] = None, + is_private: bool = False, + ): + pass + + + if __name__ == "__main__": num_choices = 4 From c59a562ca29270440c90145fb1bc0c6a7c4754ba Mon Sep 17 00:00:00 2001 From: chanomkaimuk <22185824+chanomkaimuk@users.noreply.github.com> Date: Tue, 6 Aug 2024 04:20:59 +0200 Subject: [PATCH 5/8] quiz embed and command --- commands/tocfl/quiz.py | 37 ++++++++++++++++++++++++++++++++----- commands/tocfl/tocfl.py | 4 +++- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/commands/tocfl/quiz.py b/commands/tocfl/quiz.py index 1f2fac8..c930cdd 100644 --- a/commands/tocfl/quiz.py +++ b/commands/tocfl/quiz.py @@ -53,10 +53,24 @@ def register_quiz_subcommand( @discord.app_commands.choices(num_rows=NUM_ROWS_CHOICES) async def tocfl_quiz_pronunciation( interaction: discord.Interaction, - num_rows: Choice[int] = None, + num_rows: Choice[int] = 4, is_private: bool = False, ): - pass + # get random choices from the database + choices = get_random_tocfl_choices_from_db(num_choices=num_rows) + # convert to DataFrame + df = _db_results_to_df(choices) + # convert to QuizChoice objects + choices, vocab_to_ask = df_results_to_choices(df, num_rows, is_ask_pronunciation=True) + # create the view + view = MultipleChoiceView(choices=choices) + # send the message + await interaction.response.send_message( + f"Choose the correct pronunciation for: {vocab_to_ask}", + view=view, + ephemeral=is_private, + ) + @tocfl_group.command( name="quiz-vocab", @@ -64,12 +78,25 @@ async def tocfl_quiz_pronunciation( ) @discord.app_commands.describe(is_private="Whether the quiz should be private") @discord.app_commands.choices(num_rows=NUM_ROWS_CHOICES) - async def tocfl_quiz_pronunciation( + async def tocfl_quiz_vocab( interaction: discord.Interaction, - num_rows: Choice[int] = None, + num_rows: Choice[int] = 4, is_private: bool = False, ): - pass + # get random choices from the database + choices = get_random_tocfl_choices_from_db(num_choices=num_rows) + # convert to DataFrame + df = _db_results_to_df(choices) + # convert to QuizChoice objects + choices, vocab_to_ask = df_results_to_choices(df, num_rows, is_ask_pronunciation=False) + # create the view + view = MultipleChoiceView(choices=choices) + # send the message + await interaction.response.send_message( + f"Choose the correct answer for: {vocab_to_ask}", + view=view, + ephemeral=is_private, + ) diff --git a/commands/tocfl/tocfl.py b/commands/tocfl/tocfl.py index 716273e..fede176 100644 --- a/commands/tocfl/tocfl.py +++ b/commands/tocfl/tocfl.py @@ -7,7 +7,7 @@ from random import randint from .consts import TOCFL_LEVELS_CHOICES, TOCFL_LEVELS from .chewing import to_chewing - +from .quiz import register_quiz_subcommand def register_commands( tree: discord.app_commands.CommandTree, @@ -18,6 +18,8 @@ def register_commands( name="tocfl", description="TOCFL commands" ) + register_quiz_subcommand(tocfl_group) + @tocfl_group.command( name="random", description="Get a random TOCFL word", From 7353bc140409f35e8b3d072f042690273b47bfff Mon Sep 17 00:00:00 2001 From: chanomkaimuk <22185824+chanomkaimuk@users.noreply.github.com> Date: Tue, 6 Aug 2024 04:26:20 +0200 Subject: [PATCH 6/8] python3.12 syntax booboo --- commands/tocfl/quiz.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/commands/tocfl/quiz.py b/commands/tocfl/quiz.py index c930cdd..3c2ca72 100644 --- a/commands/tocfl/quiz.py +++ b/commands/tocfl/quiz.py @@ -107,4 +107,5 @@ async def tocfl_quiz_vocab( df = _db_results_to_df(choices) choices, vocab_to_ask = df_results_to_choices(df, num_choices, is_ask_pronunciation=False) print(f"Choose the correct answer for: {vocab_to_ask}") - print(f"Choices:\n{'\n'.join(['* ' + choice.label for choice in choices])}") \ No newline at end of file + print("Choices:") + print('\n'.join(['* ' + choice.label for choice in choices])) \ No newline at end of file From 74cad83407bc9bdf2f6faa29c713271334baeee5 Mon Sep 17 00:00:00 2001 From: JuniorTux Date: Tue, 6 Aug 2024 10:50:53 +0800 Subject: [PATCH 7/8] fix: chewing issues --- commands/tocfl/chewing.py | 50 +++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/commands/tocfl/chewing.py b/commands/tocfl/chewing.py index 8ac3c20..8d3c9f5 100644 --- a/commands/tocfl/chewing.py +++ b/commands/tocfl/chewing.py @@ -72,18 +72,12 @@ "yuan": "ㄩㄢ", "yun": "ㄩㄣ", "yong": "ㄩㄥ", - # v is used to replace ü in typing - "nü": "ㄋㄩ", - "lü": "ㄌㄩ", - "nv": "ㄋㄩ", - "lv": "ㄌㄩ", } PINYIN_CENTER = { "i": "ㄧ", - "u": "ㄨ", + "u": "ㄨ", # also ㄩ "ü": "ㄩ", - "v": "ㄩ", } # The designer of Hanyu Pinyin used e to represent both 「ㄜ」 and 「ㄝ」. @@ -170,24 +164,36 @@ def match_chewing(string: str, index: int, target: dict[str, str]): # Resolve duplicates if target == PINYIN_COMBINED: if target_str == "uan" and string[index - 1] in [ - "y", "j", "q", "x", ]: result = "ㄩㄢ" elif target_str == "un" and string[index - 1] in [ - "y", "j", "q", "x", ]: result = "ㄩㄣ" + elif target == PINYIN_CENTER: + if target_str == "u" and string[index - 1] in ["j", "q", "x"]: + result = "ㄩ" elif target == PINYIN_FINALS: - if target_str == "e" and string[index - 1] == "y": + if target_str == "e" and string[index - 1] in "iü": result = "ㄝ" + # TODO separate those which can have j, q, x as the initial constant + # FIXME ugly bad code + elif target_str == "en" and ( + string[index - 1] in ["j", "q", "x"] + or string[index - 2] in ["j", "q", "x"] + ): + continue - return (index + i, result) + if target == PINYIN_COMBINED: + if forms_new_word(string, index + i): + return (index + i, result) + else: + return (index + i, result) return (index + 1, None) @@ -203,7 +209,9 @@ def forms_new_word(pinyin: str, index: int): def to_chewing(pinyin: str) -> str: # Remove leading and trailing spaces - pinyin = pinyin.strip().lower() + pinyin = pinyin.strip() + # Handle all capital letters and lower-case letters + pinyin = pinyin.lower() # Temporarily store the chewing tones and original index tones = [] @@ -229,33 +237,35 @@ def to_chewing(pinyin: str) -> str: chewing += "ㄦ¯" break - # Check matches for independent words + # Check matches for independent words res = match_chewing(pinyin, index, PINYIN_ALONE) if res[1] and forms_new_word(pinyin, res[0]): - chewing += res[1] # ㄧㄚ + chewing += res[1] # ㄧㄚ index = res[0] else: initial = match_chewing(pinyin, index, PINYIN_INITIALS) - assert initial[1], f"Failed to match initial in '{pinyin}' at index {index - 1}" + assert initial[ + 1 + ], f"Failed to match initial in '{pinyin}' at index {index - 1}" index = initial[0] - chewing += initial[1] # ㄍ + chewing += initial[1] # ㄍ combined = match_chewing(pinyin, index, PINYIN_COMBINED) if combined[1]: index = combined[0] - chewing += combined[1] # ㄨㄤ + chewing += combined[1] # ㄨㄤ else: center = match_chewing(pinyin, index, PINYIN_CENTER) if center[1]: - chewing += center[1] # ㄍㄨ + chewing += center[1] # ㄍㄨ index = center[0] final = match_chewing(pinyin, index, PINYIN_FINALS) if final[1]: - chewing += final[1] # ㄍㄨㄛ + chewing += final[1] # ㄍㄨㄛ index = final[0] if len(tones) and tones[0][0] < index: - chewing += tones.pop(0)[1] # ㄍㄨㄛˊ + chewing += tones.pop(0)[1] # ㄍㄨㄛˊ else: chewing += "˙" From 674f71b6fc28c9bb7b62bb2f8908ef8674b92f97 Mon Sep 17 00:00:00 2001 From: chanomkaimuk <22185824+chanomkaimuk@users.noreply.github.com> Date: Tue, 6 Aug 2024 19:20:17 +0200 Subject: [PATCH 8/8] dont use zhuyin if it fails --- commands/tocfl/quiz.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/commands/tocfl/quiz.py b/commands/tocfl/quiz.py index 3c2ca72..0a365d0 100644 --- a/commands/tocfl/quiz.py +++ b/commands/tocfl/quiz.py @@ -17,11 +17,15 @@ def _db_results_to_df(results: List[dict]) -> pd.DataFrame: df["vocab"] = df["vocab"].apply(lambda x: x.split("/")[0]) df["pinyin"] = df["pinyin"].apply(lambda x: x.split("/")[0]) # add zhuyin column - df["zhuyin"] = df["pinyin"].apply(to_chewing).str.replace("\u3000", " ") # replace full-width space with half-width space for now - # add "pronunciation" column as a combination of pinyin and zhuyin - df["pronunciation"] = df["pinyin"] + " / " + df["zhuyin"] - # drop the "pinyin" and "zhuyin" columns - df.drop(columns=["pinyin", "zhuyin"], inplace=True) + try: + df["zhuyin"] = df["pinyin"].apply(to_chewing).str.replace("\u3000", " ") # replace full-width space with half-width space for now + # add "pronunciation" column as a combination of pinyin and zhuyin + df["pronunciation"] = df["pinyin"] + " / " + df["zhuyin"] + # drop the "pinyin" and "zhuyin" columns + df.drop(columns=["pinyin", "zhuyin"], inplace=True) + except Exception as e: + df["pronunciation"] = df["pinyin"] + df.drop(columns=["pinyin"], inplace=True) return df.set_index("id") def df_results_to_choices(df: pd.DataFrame,