From 7a9b6bb1e5e03a38545ee4507d70d39098960430 Mon Sep 17 00:00:00 2001 From: aciddelgado Date: Mon, 9 Dec 2024 10:17:43 -0800 Subject: [PATCH] copy to avoid bug --- src/generators.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/generators.cpp b/src/generators.cpp index eff98f5ca..248fc4ec6 100644 --- a/src/generators.cpp +++ b/src/generators.cpp @@ -297,6 +297,7 @@ void Generator::ComputeLogits(DeviceSpan next_tokens) { if (computed_logits_) throw std::runtime_error("ComputeLogits called again without calling AppendTokens or GenerateNextToken first"); + next_tokens.CopyDeviceToCpu(); auto logits = state_->Run(search_->GetSequenceLength(), next_tokens, search_->GetNextIndices()); if (g_log.enabled && g_log.model_logits) { auto& stream = Log("model_logits");