reward model doesn't work well with batched

axolotl-ai-cloud · Jan 7, 2025 · 476eb7e · 476eb7e
1 parent 86de7fb
commit 476eb7e
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 1 deletion.
diff --git a/src/axolotl/prompt_strategies/bradley_terry/chat_template.py b/src/axolotl/prompt_strategies/bradley_terry/chat_template.py
@@ -21,6 +21,10 @@ class BTChatTemplateStrategy(ChatTemplateStrategy):
     Bradley-Terry reward model pairwise chat template prompt strategy.
     """
 
+    @property
+    def supports_batched(self) -> bool:
+        return False
+
     def _tokenize_single_prompt(self, prompt):
         """
 

diff --git a/src/axolotl/prompt_strategies/chat_template.py b/src/axolotl/prompt_strategies/chat_template.py
@@ -239,7 +239,7 @@ def tokenize_prompt(self, prompt: dict[str, Any]):
         Public method that can handle either a single prompt or a batch of prompts.
         """
 
-        if not self.is_prompt_batched(prompt):
+        if not self.is_prompt_batched(prompt) or not self.supports_batched:
             return self._tokenize_single_prompt(prompt)
 
         res = defaultdict(lambda: [])