Skip to content

Commit

Permalink
chore(sync): prepare for 0.5.2 (#219)
Browse files Browse the repository at this point in the history
  • Loading branch information
nbsp authored Dec 19, 2024
2 parents 428b62e + e20d82a commit b4b9214
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 14 deletions.
5 changes: 5 additions & 0 deletions .changeset/eight-mugs-glow.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@livekit/agents-plugin-openai": patch
---

fix(tts): add missing crypto import to OpenAI tts
5 changes: 5 additions & 0 deletions .changeset/khaki-ties-design.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@livekit/agents": patch
---

fix(pipeline): add transcription for AGENT_SPEECH_COMMITTED
5 changes: 5 additions & 0 deletions .changeset/moody-poems-juggle.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@livekit/agents-plugin-openai": patch
---

groq: add support for llama 3.3 70b
21 changes: 14 additions & 7 deletions agents/src/pipeline/agent_output.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export class SynthesisHandle {
static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');

#speechId: string;
text?: string;
ttsSource: SpeechSource;
#agentPlayout: AgentPlayout;
tts: TTS;
Expand Down Expand Up @@ -97,7 +98,7 @@ export class AgentOutput {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
return new CancellablePromise(async (resolve, _, onCancel) => {
const ttsSource = await handle.ttsSource;
let task: CancellablePromise<void>;
let task: CancellablePromise<string>;
if (typeof ttsSource === 'string') {
task = stringSynthesisTask(ttsSource, handle);
} else {
Expand All @@ -113,6 +114,10 @@ export class AgentOutput {
} finally {
if (handle.intFut.done) {
gracefullyCancel(task);
} else {
task.then((text) => {
handle.text = text;
});
}
}

Expand All @@ -121,9 +126,9 @@ export class AgentOutput {
}
}

const stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<void> => {
const stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<string> => {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
return new CancellablePromise<void>(async (resolve, _, onCancel) => {
return new CancellablePromise(async (resolve, _, onCancel) => {
let cancelled = false;
onCancel(() => {
cancelled = true;
Expand All @@ -141,16 +146,17 @@ const stringSynthesisTask = (text: string, handle: SynthesisHandle): Cancellable
}
handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);

resolve();
resolve(text);
});
};

const streamSynthesisTask = (
stream: AsyncIterable<string>,
handle: SynthesisHandle,
): CancellablePromise<void> => {
): CancellablePromise<string> => {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
return new CancellablePromise<void>(async (resolve, _, onCancel) => {
return new CancellablePromise(async (resolve, _, onCancel) => {
let fullText = '';
let cancelled = false;
onCancel(() => {
cancelled = true;
Expand All @@ -170,12 +176,13 @@ const streamSynthesisTask = (
readGeneratedAudio();

for await (const text of stream) {
fullText += text;
if (cancelled) break;
ttsStream.pushText(text);
}
ttsStream.flush();
ttsStream.endInput();

resolve();
resolve(fullText);
});
};
8 changes: 2 additions & 6 deletions agents/src/pipeline/pipeline_agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -520,8 +520,7 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
// add it to the chat context for this new reply synthesis
copiedCtx.messages.push(
ChatMessage.create({
// TODO(nbsp): uhhh unsure where to get the played text here
// text: playingSpeech.synthesisHandle.(theres no ttsForwarder here)
text: playingSpeech.synthesisHandle.text,
role: ChatRole.ASSISTANT,
}),
);
Expand Down Expand Up @@ -620,8 +619,7 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
}
commitUserQuestionIfNeeded();

// TODO(nbsp): what goes here
let collectedText = '';
const collectedText = handle.synthesisHandle.text;
const isUsingTools = handle.source instanceof LLMStream && !!handle.source.functionCalls.length;
const extraToolsMessages = []; // additional messages from the functions to add to the context
let interrupted = handle.interrupted;
Expand Down Expand Up @@ -685,8 +683,6 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
const playHandle = answerSynthesis.play();
await playHandle.join().await;

// TODO(nbsp): what text goes here
collectedText = '';
interrupted = answerSynthesis.interrupted;
newFunctionCalls = answerLLMStream.functionCalls;

Expand Down
1 change: 1 addition & 0 deletions plugins/openai/src/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ export type GroqChatModels =
| 'llama-3.1-405b-reasoning'
| 'llama-3.1-70b-versatile'
| 'llama-3.1-8b-instant'
| 'llama-3.3-70b-versatile'
| 'llama3-groq-70b-8192-tool-use-preview'
| 'llama3-groq-8b-8192-tool-use-preview'
| 'llama-guard-3-8b'
Expand Down
3 changes: 2 additions & 1 deletion plugins/openai/src/tts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
//
// SPDX-License-Identifier: Apache-2.0
import { AudioByteStream, tts } from '@livekit/agents';
import { randomUUID } from 'crypto';
import { OpenAI } from 'openai';
import type { TTSModels, TTSVoices } from './models.js';

Expand Down Expand Up @@ -81,7 +82,7 @@ export class ChunkedStream extends tts.ChunkedStream {

async #run(stream: Promise<Response>) {
const buffer = await stream.then((r) => r.arrayBuffer());
const requestId = crypto.randomUUID();
const requestId = randomUUID();
const audioByteStream = new AudioByteStream(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS);
const frames = audioByteStream.write(buffer);

Expand Down

0 comments on commit b4b9214

Please sign in to comment.