Merge branch 'main' into add-black

placeTW · Aug 10, 2024 · ff1c04f · ff1c04f
2 parents 2eace61 + fb4f045
commit ff1c04f
Show file tree

Hide file tree

Showing 4 changed files with 157 additions and 13 deletions.
diff --git a/commands/tocfl/chewing.py b/commands/tocfl/chewing.py
@@ -72,18 +72,12 @@
     "yuan": "ㄩㄢ",
     "yun": "ㄩㄣ",
     "yong": "ㄩㄥ",
-    # v is used to replace ü in typing
-    "nü": "ㄋㄩ",
-    "lü": "ㄌㄩ",
-    "nv": "ㄋㄩ",
-    "lv": "ㄌㄩ",
 }
 
 PINYIN_CENTER = {
     "i": "ㄧ",
-    "u": "ㄨ",
+    "u": "ㄨ",  # also ㄩ
     "ü": "ㄩ",
-    "v": "ㄩ",
 }
 
 # The designer of Hanyu Pinyin used e to represent both 「ㄜ」 and 「ㄝ」.
@@ -170,24 +164,36 @@ def match_chewing(string: str, index: int, target: dict[str, str]):
             # Resolve duplicates
             if target == PINYIN_COMBINED:
                 if target_str == "uan" and string[index - 1] in [
-                    "y",
                     "j",
                     "q",
                     "x",
                 ]:
                     result = "ㄩㄢ"
                 elif target_str == "un" and string[index - 1] in [
-                    "y",
                     "j",
                     "q",
                     "x",
                 ]:
                     result = "ㄩㄣ"
+            elif target == PINYIN_CENTER:
+                if target_str == "u" and string[index - 1] in ["j", "q", "x"]:
+                    result = "ㄩ"
             elif target == PINYIN_FINALS:
-                if target_str == "e" and string[index - 1] == "y":
+                if target_str == "e" and string[index - 1] in "iü":
                     result = "ㄝ"
+                # TODO separate those which can have j, q, x as the initial constant
+                # FIXME ugly bad code
+                elif target_str == "en" and (
+                    string[index - 1] in ["j", "q", "x"]
+                    or string[index - 2] in ["j", "q", "x"]
+                ):
+                    continue
 
-            return (index + i, result)
+            if target == PINYIN_COMBINED:
+                if forms_new_word(string, index + i):
+                    return (index + i, result)
+            else:
+                return (index + i, result)
     return (index + 1, None)
 
 
@@ -204,6 +210,8 @@ def forms_new_word(pinyin: str, index: int):
 def to_chewing(pinyin: str) -> str:
     # Remove leading and trailing spaces
     pinyin = pinyin.strip()
+    # Handle all capital letters and lower-case letters
+    pinyin = pinyin.lower()
 
     # Temporarily store the chewing tones and original index
     tones = []
@@ -237,7 +245,9 @@ def to_chewing(pinyin: str) -> str:
 
         else:
             initial = match_chewing(pinyin, index, PINYIN_INITIALS)
-            assert initial[1], f"Failed to match initial in '{pinyin}' at index {index - 1}"
+            assert initial[
+                1
+            ], f"Failed to match initial in '{pinyin}' at index {index - 1}"
             index = initial[0]
             chewing += initial[1]  # ㄍ
             combined = match_chewing(pinyin, index, PINYIN_COMBINED)

diff --git a/commands/tocfl/db.py b/commands/tocfl/db.py
@@ -0,0 +1,17 @@
+from modules.supabase import supabaseClient
+
+TABLE = "tocfl"
+
+def get_random_tocfl_choices_from_db(num_choices=5):
+    command_name = "get_random_unique_pinyin"
+    data, c = supabaseClient.rpc(
+        command_name,
+        {"number_of_entries": num_choices},
+    ).execute()
+    if c == 0:
+        return None
+    return data[1]
+
+if __name__ == "__main__":
+    choices = get_random_tocfl_choices_from_db()
+    print(choices)
diff --git a/commands/tocfl/quiz.py b/commands/tocfl/quiz.py
@@ -0,0 +1,115 @@
+from .db import get_random_tocfl_choices_from_db
+import discord
+from typing import List, Tuple
+import pandas as pd
+from random import shuffle
+from .chewing import to_chewing
+
+# sys.path.append(str(Path(__file__).parent.parent.parent)) # I hate python imports
+from modules.quiz.multiple_choice import MultipleChoiceView, QuizChoice
+from discord.app_commands import Choice
+NUM_ROWS_CHOICES = [Choice(name=i, value=i) for i in range(2, 5+1)] # multiple choice options are from 2 to 5
+
+
+def _db_results_to_df(results: List[dict]) -> pd.DataFrame:
+    df = pd.DataFrame(results)
+    # for "vocab" and "pinyin" columns: only keep the first part of the string
+    df["vocab"] = df["vocab"].apply(lambda x: x.split("/")[0])
+    df["pinyin"] = df["pinyin"].apply(lambda x: x.split("/")[0])
+    # add zhuyin column
+    try:
+        df["zhuyin"] = df["pinyin"].apply(to_chewing).str.replace("\u3000", " ") # replace full-width space with half-width space for now
+        # add "pronunciation" column as a combination of pinyin and zhuyin
+        df["pronunciation"] = df["pinyin"] + " / " + df["zhuyin"]
+        # drop the "pinyin" and "zhuyin" columns
+        df.drop(columns=["pinyin", "zhuyin"], inplace=True)
+    except Exception as e:
+        df["pronunciation"] = df["pinyin"]
+        df.drop(columns=["pinyin"], inplace=True)
+    return df.set_index("id")
+
+def df_results_to_choices(df: pd.DataFrame,
+    num_choices:int,
+    is_ask_pronunciation:bool=True
+) -> Tuple[List[QuizChoice], str]:
+    col_to_display, col_to_ask = ("pronunciation", "vocab") if is_ask_pronunciation else ("vocab", "pronunciation")
+    # select one row as the correct answer
+    correct_row_index = df.sample().index[0]
+    vocab_to_display = df.loc[correct_row_index, col_to_ask]
+    # convert to QuizChoice object
+    correct_choice = QuizChoice(df.loc[correct_row_index, col_to_display], is_correct=True)
+    # select incorrect choices as rows that do not have the same pronunciation as the correct answer
+    incorrect_rows = df[df[col_to_display] != correct_choice.label].sample(num_choices-1)
+    incorrect_choices = [QuizChoice(row[col_to_display], is_correct=False) for _, row in incorrect_rows.iterrows()]
+    # shuffle the choices
+    choices = [correct_choice] + incorrect_choices
+    shuffle(choices)
+    return choices, vocab_to_display
+
+def register_quiz_subcommand(
+    tocfl_group: discord.app_commands.Group,
+):
+    @tocfl_group.command(
+        name="quiz-pronunciation",
+        description="Guess the pronunciation of the given TOCFL word",
+    )
+    @discord.app_commands.describe(is_private="Whether the quiz should be private")
+    @discord.app_commands.choices(num_rows=NUM_ROWS_CHOICES)
+    async def tocfl_quiz_pronunciation(
+        interaction: discord.Interaction,
+        num_rows: Choice[int] = 4,
+        is_private: bool = False,
+    ):
+        # get random choices from the database
+        choices = get_random_tocfl_choices_from_db(num_choices=num_rows)
+        # convert to DataFrame
+        df = _db_results_to_df(choices)
+        # convert to QuizChoice objects
+        choices, vocab_to_ask = df_results_to_choices(df, num_rows, is_ask_pronunciation=True)
+        # create the view
+        view = MultipleChoiceView(choices=choices)
+        # send the message
+        await interaction.response.send_message(
+            f"Choose the correct pronunciation for: {vocab_to_ask}",
+            view=view,
+            ephemeral=is_private,
+        )
+
+
+    @tocfl_group.command(
+        name="quiz-vocab",
+        description="Guess the character of the given TOCFL pronunciation",
+    )
+    @discord.app_commands.describe(is_private="Whether the quiz should be private")
+    @discord.app_commands.choices(num_rows=NUM_ROWS_CHOICES)
+    async def tocfl_quiz_vocab(
+        interaction: discord.Interaction,
+        num_rows: Choice[int] = 4,
+        is_private: bool = False,
+    ):
+        # get random choices from the database
+        choices = get_random_tocfl_choices_from_db(num_choices=num_rows)
+        # convert to DataFrame
+        df = _db_results_to_df(choices)
+        # convert to QuizChoice objects
+        choices, vocab_to_ask = df_results_to_choices(df, num_rows, is_ask_pronunciation=False)
+        # create the view
+        view = MultipleChoiceView(choices=choices)
+        # send the message
+        await interaction.response.send_message(
+            f"Choose the correct answer for: {vocab_to_ask}",
+            view=view,
+            ephemeral=is_private,
+        )
+
+
+
+
+if __name__ == "__main__":
+    num_choices = 4
+    choices = get_random_tocfl_choices_from_db(num_choices=num_choices)
+    df = _db_results_to_df(choices)
+    choices, vocab_to_ask = df_results_to_choices(df, num_choices, is_ask_pronunciation=False)
+    print(f"Choose the correct answer for: {vocab_to_ask}")
+    print("Choices:")
+    print('\n'.join(['* ' + choice.label for choice in choices]))
diff --git a/commands/tocfl/tocfl.py b/commands/tocfl/tocfl.py
@@ -7,7 +7,7 @@
 from random import randint
 from .consts import TOCFL_LEVELS_CHOICES, TOCFL_LEVELS
 from .chewing import to_chewing
-
+from .quiz import register_quiz_subcommand
 
 def register_commands(
     tree: discord.app_commands.CommandTree,
@@ -16,6 +16,8 @@ def register_commands(
 ):
     tocfl_group = app_commands.Group(name="tocfl", description="TOCFL commands")
 
+    register_quiz_subcommand(tocfl_group)
+
     @tocfl_group.command(
         name="random",
         description="Get a random TOCFL word",