From 476eb7e094d5d3955795f233bcb3dc02dbcb840f Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 7 Jan 2025 18:19:42 -0500 Subject: [PATCH] reward model doesn't work well with batched --- src/axolotl/prompt_strategies/bradley_terry/chat_template.py | 4 ++++ src/axolotl/prompt_strategies/chat_template.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/axolotl/prompt_strategies/bradley_terry/chat_template.py b/src/axolotl/prompt_strategies/bradley_terry/chat_template.py index 627466287..c6b0fe2cf 100644 --- a/src/axolotl/prompt_strategies/bradley_terry/chat_template.py +++ b/src/axolotl/prompt_strategies/bradley_terry/chat_template.py @@ -21,6 +21,10 @@ class BTChatTemplateStrategy(ChatTemplateStrategy): Bradley-Terry reward model pairwise chat template prompt strategy. """ + @property + def supports_batched(self) -> bool: + return False + def _tokenize_single_prompt(self, prompt): """ diff --git a/src/axolotl/prompt_strategies/chat_template.py b/src/axolotl/prompt_strategies/chat_template.py index 126c9d2f3..bb87ee45b 100644 --- a/src/axolotl/prompt_strategies/chat_template.py +++ b/src/axolotl/prompt_strategies/chat_template.py @@ -239,7 +239,7 @@ def tokenize_prompt(self, prompt: dict[str, Any]): Public method that can handle either a single prompt or a batch of prompts. """ - if not self.is_prompt_batched(prompt): + if not self.is_prompt_batched(prompt) or not self.supports_batched: return self._tokenize_single_prompt(prompt) res = defaultdict(lambda: [])