From 476eb7e094d5d3955795f233bcb3dc02dbcb840f Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Tue, 7 Jan 2025 18:19:42 -0500
Subject: [PATCH] reward model doesn't work well with batched

---
 src/axolotl/prompt_strategies/bradley_terry/chat_template.py | 4 ++++
 src/axolotl/prompt_strategies/chat_template.py               | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/axolotl/prompt_strategies/bradley_terry/chat_template.py b/src/axolotl/prompt_strategies/bradley_terry/chat_template.py
index 627466287..c6b0fe2cf 100644
--- a/src/axolotl/prompt_strategies/bradley_terry/chat_template.py
+++ b/src/axolotl/prompt_strategies/bradley_terry/chat_template.py
@@ -21,6 +21,10 @@ class BTChatTemplateStrategy(ChatTemplateStrategy):
     Bradley-Terry reward model pairwise chat template prompt strategy.
     """
 
+    @property
+    def supports_batched(self) -> bool:
+        return False
+
     def _tokenize_single_prompt(self, prompt):
         """
 
diff --git a/src/axolotl/prompt_strategies/chat_template.py b/src/axolotl/prompt_strategies/chat_template.py
index 126c9d2f3..bb87ee45b 100644
--- a/src/axolotl/prompt_strategies/chat_template.py
+++ b/src/axolotl/prompt_strategies/chat_template.py
@@ -239,7 +239,7 @@ def tokenize_prompt(self, prompt: dict[str, Any]):
         Public method that can handle either a single prompt or a batch of prompts.
         """
 
-        if not self.is_prompt_batched(prompt):
+        if not self.is_prompt_batched(prompt) or not self.supports_batched:
             return self._tokenize_single_prompt(prompt)
 
         res = defaultdict(lambda: [])