new verifiers

gpu-mode · Jan 14, 2025 · 990fdce · 990fdce
1 parent fb446aa
commit 990fdce
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 21 deletions.
diff --git a/scripts/ci_test_cuda.py b/scripts/ci_test_cuda.py
@@ -82,7 +82,8 @@ def test_cuda_validation_fail():
         """
     comp, run = run_cuda_script(cu_eval, ref.read_text(), sub, arch=None)
     assert comp.success is True
-    assert run.success is False
+    assert run.success is True
+    assert run.passed is False
     assert run.command == "./eval.out"
     # we never reach the benchmark part, because the test fails
     assert "warming up..." not in run.stdout

diff --git a/scripts/ci_test_python.py b/scripts/ci_test_python.py
@@ -34,7 +34,8 @@ def custom_kernel(input):
     return [torch.zeros_like(i) for i in input]
         """
     run = run_pytorch_script(py_eval, ref.read_text(), sub, arch=None)
-    assert run.success is False
+    assert run.success is True
+    assert run.passed is False
     assert run.command == "python eval.py"
     # we never reach the benchmark part, because the test fails
     assert "warming up..." not in run.stdout

diff --git a/src/discord-cluster-manager/cogs/verify_run_cog.py b/src/discord-cluster-manager/cogs/verify_run_cog.py
@@ -41,14 +41,22 @@ async def verify_github_run(
         github_cog: GitHubCog,
         choice: app_commands.Choice,
         interaction: discord.Interaction,
+        lang: str,
     ) -> bool:
         github_command = github_cog.run_github
-        cuda_file = create_mock_attachment(
-            "test.cu", Path("examples/identity_cuda/submission.cuh").read_text()
-        )
-        reference_code = Path("examples/identity_cuda/reference.cuh").read_text()
+        if lang == "py":
+            sub_code = create_mock_attachment(
+                "submission.py", Path("examples/identity_py/submission.py").read_text()
+            )
+            ref_code = Path("examples/identity_py/reference.py").read_text()
+        else:
+            sub_code = create_mock_attachment(
+                "test.cu", Path("examples/identity_cuda/submission.cuh").read_text()
+            )
+            ref_code = Path("examples/identity_cuda/reference.cuh").read_text()
+
         github_thread = await github_command.callback(
-            github_cog, interaction, cuda_file, choice, reference_code=reference_code
+            github_cog, interaction, sub_code, choice, reference_code=ref_code
         )
 
         message_contents = [msg.content async for msg in github_thread.history(limit=None)]
@@ -57,7 +65,7 @@ async def verify_github_run(
             "Processing `.*` with",
             "GitHub Action triggered! Run ID:",
             "Training completed with status: success",
-            ".*```\nLogs.*:",
+            "'check': 'pass'",
             "View the full run at:",
         ]
 
@@ -69,7 +77,7 @@ async def verify_github_run(
         if all_patterns_found:
             await send_discord_message(
                 interaction,
-                f"✅ GitHub run ({choice.name}) completed successfully - "
+                f"✅ GitHub run ({choice.name}) for {lang} completed successfully - "
                 "all expected messages found!",
             )
             return True
@@ -81,26 +89,35 @@ async def verify_github_run(
             ]
             await send_discord_message(
                 interaction,
-                f"❌ GitHub run ({choice.name}) verification failed. Missing expected messages:\n"
+                f"❌ GitHub run ({choice.name}) for {lang} verification failed. Missing expected messages:\n"
                 + "\n".join(f"- {pattern}" for pattern in missing_patterns),
             )
             return False
 
-    async def verify_modal_run(self, modal_cog: ModalCog, interaction: discord.Interaction) -> bool:
+    async def verify_modal_run(
+        self, modal_cog: ModalCog, interaction: discord.Interaction, lang: str
+    ) -> bool:
         t4 = app_commands.Choice(name="T4", value="t4")
         modal_command = modal_cog.run_modal
 
-        sub_code = create_mock_attachment(
-            "submission.py", Path("examples/identity_py/submission.py").read_text()
-        )
-        ref_code = Path("examples/identity_py/reference.py").read_text()
+        if lang == "py":
+            sub_code = create_mock_attachment(
+                "submission.py", Path("examples/identity_py/submission.py").read_text()
+            )
+            ref_code = Path("examples/identity_py/reference.py").read_text()
+        else:
+            sub_code = create_mock_attachment(
+                "test.cu", Path("examples/identity_cuda/submission.cuh").read_text()
+            )
+            ref_code = Path("examples/identity_cuda/reference.cuh").read_text()
+
         modal_thread = await modal_command.callback(
             modal_cog, interaction, sub_code, t4, reference_code=ref_code
         )
 
         message_contents = [msg.content async for msg in modal_thread.history(limit=None)]
 
-        required_patterns = ["Running on Modal...", "Job completed!"]
+        required_patterns = ["Running on Modal...", "Success!"]
 
         all_patterns_found = all(
             any(re.search(pattern, content, re.DOTALL) is not None for content in message_contents)
@@ -110,7 +127,7 @@ async def verify_modal_run(self, modal_cog: ModalCog, interaction: discord.Inter
         if all_patterns_found:
             await send_discord_message(
                 interaction,
-                "✅ Modal run completed successfully - all expected messages found!",
+                f"✅ Modal run for {lang} completed successfully - all expected messages found!",
             )
             return True
         else:
@@ -121,7 +138,7 @@ async def verify_modal_run(self, modal_cog: ModalCog, interaction: discord.Inter
             ]
             await send_discord_message(
                 interaction,
-                "❌ Modal run verification failed. Missing expected messages:\n"
+                f"❌ Modal run verification for {lang} failed. Missing expected messages:\n"
                 + "\n".join(f"- {pattern}" for pattern in missing_patterns),
             )
             return False
@@ -145,9 +162,11 @@ async def verify_runs(self, interaction: discord.Interaction):
             amd = app_commands.Choice(name="AMD", value="amd")
 
             results = await asyncio.gather(
-                self.verify_github_run(github_cog, nvidia, interaction),
-                self.verify_github_run(github_cog, amd, interaction),
-                self.verify_modal_run(modal_cog, interaction),
+                self.verify_github_run(github_cog, nvidia, interaction, "py"),
+                self.verify_github_run(github_cog, nvidia, interaction, "cu"),
+                self.verify_modal_run(modal_cog, interaction, "py"),
+                self.verify_github_run(github_cog, amd, interaction, "py"),
+                self.verify_modal_run(modal_cog, interaction, "cu"),
             )
 
             if all(results):