Skip to content

Commit

Permalink
fix for inference keep end
Browse files Browse the repository at this point in the history
  • Loading branch information
lmeribal committed Nov 13, 2024
1 parent 1b86dd8 commit d565f1b
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion turbo_alignment/dataset/chat/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,20 @@ def __keep_start(
inference: bool,
max_tokens: int | None = None,
) -> tuple[int, int]:
print(max_tokens, conversation.messages)
continuation = False
for i, (message, end_index) in enumerate(zip(conversation.messages[::-1], replicas_cum_len[::-1])):
print(i, message, end_index)
if self.settings.only_answer_loss:
if inference and message.role == ChatMessageRole.BOT:
if inference and message.role == ChatMessageRole.BOT and not continuation:
continuation = True
continue
if not inference and message.role != ChatMessageRole.BOT:
continue
continuation = False

if max_tokens is None or end_index < max_tokens:
print('RETURN', len(replicas_cum_len) - i)
return 0, len(replicas_cum_len) - i

raise ValueError('Can\'t trim dialogue to fit all requirements')
Expand Down Expand Up @@ -197,6 +203,7 @@ def _truncate_and_merge(
labels = np.array([])

truncated_conversation_messages = conversation.messages[left_bound:right_bound]
print("TRUNCATED", truncated_conversation_messages)
truncated_tokenized_replicas = tokenized_replicas[left_bound:right_bound]

if self.source.system_prompt is not None and self.settings.keep_end and left_bound != 0:
Expand Down

0 comments on commit d565f1b

Please sign in to comment.