Refactor Leaderboard Submission Eval Code. (#63)

* Refactor control of eval and warmup to our end * fix: nvidia workflow naming error * fix: single quotes needed for bash * fix: single quotes needed for bash * fix: wrong unit of time * fix: siros suggestions, remove dim and assert on implementation match * Change GitHub message
gpu-mode · Dec 21, 2024 · ede66fa · ede66fa
1 parent 764edb0
commit ede66fa
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 30 deletions.
diff --git a/.github/workflows/nvidia_workflow.yml b/.github/workflows/nvidia_workflow.yml
@@ -99,7 +99,7 @@ jobs:
         shell: bash
         run: |
           if [[ -n "${{ github.event.inputs.eval_content }}" ]]; then
-            if [[ "${{ github.event.inputs.eval_content }}" == *.cu ]]; then
+            if [[ "${{ github.event.inputs.eval_filename }}" == *.cu ]]; then
               echo "Compiling and running CUDA file..."
               nvcc "${{ github.event.inputs.eval_filename }}" -o cuda_program
               ./cuda_program > training.log 2>&1

diff --git a/src/discord-cluster-manager/cogs/leaderboard_cog.py b/src/discord-cluster-manager/cogs/leaderboard_cog.py
@@ -39,9 +39,7 @@ async def submit(
         gpu_type="Choose the GPU type for Modal",
     )
     @app_commands.choices(
-        gpu_type=[
-            app_commands.Choice(name=gpu.value, value=gpu.value) for gpu in ModalGPU
-        ]
+        gpu_type=[app_commands.Choice(name=gpu.value, value=gpu.value) for gpu in ModalGPU]
     )
     async def submit_modal(
         self,
@@ -79,7 +77,7 @@ async def submit_modal(
                 f"Ran on Modal. Leaderboard '{leaderboard_name}'.\n"
                 + f"Submission title: {script.filename}.\n"
                 + f"Submission user: {interaction.user.id}.\n"
-                + f"Runtime: {score} ms",
+                + f"Runtime: {score:.9f} seconds.",
                 ephemeral=True,
             )
         except ValueError:
@@ -90,16 +88,12 @@ async def submit_modal(
             )
 
     ### GITHUB SUBCOMMAND
-    @app_commands.command(
-        name="github", description="Submit leaderboard data for GitHub"
-    )
+    @app_commands.command(name="github", description="Submit leaderboard data for GitHub")
     @app_commands.describe(
         gpu_type="Choose the GPU type for Github Runners",
     )
     @app_commands.choices(
-        gpu_type=[
-            app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in GitHubGPU
-        ]
+        gpu_type=[app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in GitHubGPU]
     )
     async def submit_github(
         self,
@@ -157,9 +151,7 @@ async def submit_github(
                 print(f"Webhook not found: {e}")
                 await send_discord_message(interaction, "❌ The webhook was not found.")
 
-            message_contents = [
-                msg.content async for msg in github_thread.history(limit=None)
-            ]
+            message_contents = [msg.content async for msg in github_thread.history(limit=None)]
 
             # Compute eval or submission score, call runner here.
             # TODO: Make this more robust later
@@ -180,14 +172,17 @@ async def submit_github(
                 if interaction.user.nick is None
                 else interaction.user.nick
             )
+
             await send_discord_message(
                 interaction,
                 "Successfully ran on GitHub runners!\n"
                 + f"Leaderboard '{leaderboard_name}'.\n"
                 + f"Submission title: {script.filename}.\n"
-                + f"Submission user: {user_id}\n"
-                + f"Runtime: {score} ms\n",
+                + f"Submission user: {user_id}.\n"
+                + f"Runtime: {score:.9f} seconds.",
+                ephemeral=True,
             )
+
         except ValueError:
             await send_discord_message(
                 interaction,
@@ -225,9 +220,7 @@ async def select_callback(self, interaction: Interaction):
 class LeaderboardCog(commands.Cog):
     def __init__(self, bot):
         self.bot: commands.Bot = bot
-        self.get_leaderboards = bot.leaderboard_group.command(name="list")(
-            self.get_leaderboards
-        )
+        self.get_leaderboards = bot.leaderboard_group.command(name="list")(self.get_leaderboards)
         self.leaderboard_create = bot.leaderboard_group.command(
             name="create", description="Create a new leaderboard"
         )(self.leaderboard_create)
@@ -246,9 +239,7 @@ async def get_leaderboards(self, interaction: discord.Interaction):
             leaderboards = db.get_leaderboards()
 
         if not leaderboards:
-            await send_discord_message(
-                interaction, "No leaderboards found.", ephemeral=True
-            )
+            await send_discord_message(interaction, "No leaderboards found.", ephemeral=True)
             return
 
         # Create embed
@@ -257,9 +248,7 @@ async def get_leaderboards(self, interaction: discord.Interaction):
         # Add fields for each leaderboard
         for lb in leaderboards:
             deadline_str = lb["deadline"].strftime("%Y-%m-%d %H:%M")
-            embed.add_field(
-                name=lb["name"], value=f"Deadline: {deadline_str}", inline=False
-            )
+            embed.add_field(name=lb["name"], value=f"Deadline: {deadline_str}", inline=False)
 
         await interaction.followup.send(interaction, embed=embed)
 
@@ -318,7 +307,7 @@ async def leaderboard_create(
                     if "duplicate key" in err:
                         await send_discord_message(
                             interaction,
-                            'Error: Tried to create a leaderboard '
+                            "Error: Tried to create a leaderboard "
                             f'"{leaderboard_name}" that already exists.',
                             ephemeral=True,
                         )
@@ -383,9 +372,7 @@ async def get_leaderboard_submissions(
             )
 
             for submission in submissions:
-                user_id = await get_user_from_id(
-                    submission["user_id"], interaction, self.bot
-                )
+                user_id = await get_user_from_id(submission["user_id"], interaction, self.bot)
 
                 embed.add_field(
                     name=f"{user_id}: {submission['submission_name']}",

diff --git a/src/discord-cluster-manager/leaderboard_eval.py b/src/discord-cluster-manager/leaderboard_eval.py
@@ -3,10 +3,57 @@
 ########
 
 py_eval = """
-from reference import metric
+import torch
+import time
+from reference import ref_kernel, generate_input
+from train import custom_kernel
+
+
+def check_implementation() -> bool:
+    for _ in range(10):  # check multiple times
+        input_tensors = generate_input()
+        for input in input_tensors:
+            custom_output = custom_kernel(input)
+            ref_output = ref_kernel(input)
+
+            if not torch.allclose(custom_output, ref_output, atol=1e-5):
+                print('mismatch found! custom implementation doesnt match reference.')
+                return False
+
+    print('custom implementation matches the reference implementation.')
+    return True
+
+
+def metric():
+    warmup_runs = 10
+    timed_runs = 100
+
+    # warmup
+    print('warming up...')
+    for _ in range(warmup_runs):
+        input_tensors = generate_input()
+        for input in input_tensors:
+            _ = custom_kernel(input)
+            _ = ref_kernel(input)
+
+    # timing
+    print('timing custom implementation...')
+    input_tensor = generate_input()
+    start_time = time.time()
+    for _ in range(timed_runs):
+        for input in input_tensors:
+            _ = custom_kernel(input)
+
+    custom_duration = (time.time() - start_time) / timed_runs
+
+    print(f'submitted kernel runtime: {custom_duration:.4f} seconds')
+
+    return custom_duration
 
 def main():
+    assert (check_implementation())
     s = metric()
+
     print(f'score:{s}')
 
 if __name__ == '__main__':