feat: toggle looping with soundfile mixer

pipecat-ai · Nov 6, 2024 · 309943d · 309943d
1 parent bd50201
commit 309943d
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 15 deletions.
diff --git a/examples/foundational/23-bot-background-sound.py b/examples/foundational/23-bot-background-sound.py
@@ -6,13 +6,17 @@
 
 import argparse
 import asyncio
-import aiohttp
 import os
 import sys
 
+import aiohttp
+from dotenv import load_dotenv
+from loguru import logger
+from runner import configure_with_args
+
 from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer
 from pipecat.audio.vad.silero import SileroVADAnalyzer
-from pipecat.frames.frames import LLMMessagesFrame, MixerUpdateSettingsFrame, MixerEnableFrame
+from pipecat.frames.frames import LLMMessagesFrame, MixerEnableFrame, MixerUpdateSettingsFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -21,12 +25,6 @@
 from pipecat.services.openai import OpenAILLMService
 from pipecat.transports.services.daily import DailyParams, DailyTransport
 
-from runner import configure_with_args
-
-from loguru import logger
-
-from dotenv import load_dotenv
-
 load_dotenv(override=True)
 
 logger.remove(0)
@@ -40,8 +38,11 @@ async def main():
 
         (room_url, token, args) = await configure_with_args(session, parser)
 
+        script_dir = os.path.dirname(__file__)
+        ding_path = os.path.join(script_dir, "assets", "ding2.wav")
+
         soundfile_mixer = SoundfileMixer(
-            sound_files={"office": args.input},
+            sound_files={"office": args.input, "ding": ding_path},
             default_sound="office",
             volume=2.0,
         )
@@ -102,11 +103,19 @@ async def on_first_participant_joined(transport, participant):
             await transport.capture_participant_transcription(participant["id"])
             # Show how to use mixer control frames.
             await asyncio.sleep(10.0)
+            logger.info("10 Sending mixer control frames.")
             await task.queue_frame(MixerUpdateSettingsFrame({"volume": 0.5}))
             await asyncio.sleep(5.0)
-            await task.queue_frame(MixerEnableFrame(False))
+            logger.info("15 Sending mixer control frames.")
+            await task.queue_frame(
+                MixerUpdateSettingsFrame({"volume": 0.75, "sound": "ding", "loop": False})
+            )
+            # await task.queue_frame(MixerEnableFrame(False))
             await asyncio.sleep(5.0)
+            logger.info("20 Sending mixer control frames.")
             await task.queue_frame(MixerEnableFrame(True))
+            await task.queue_frame(MixerUpdateSettingsFrame({"sound": "ding"}))
+            await task.queue_frame(MixerUpdateSettingsFrame({"sound": "office", "loop": True}))
             await asyncio.sleep(5.0)
             # Kick off the conversation.
             messages.append({"role": "system", "content": "Please introduce yourself to the user."})

diff --git a/src/pipecat/audio/mixers/soundfile_mixer.py b/src/pipecat/audio/mixers/soundfile_mixer.py
@@ -5,16 +5,14 @@
 #
 
 import asyncio
-
 from typing import Any, Dict, Mapping
 
 import numpy as np
+from loguru import logger
 
 from pipecat.audio.mixers.base_audio_mixer import BaseAudioMixer
 from pipecat.audio.utils import resample_audio
-from pipecat.frames.frames import MixerControlFrame, MixerUpdateSettingsFrame, MixerEnableFrame
-
-from loguru import logger
+from pipecat.frames.frames import MixerControlFrame, MixerEnableFrame, MixerUpdateSettingsFrame
 
 try:
     import soundfile as sf
@@ -45,6 +43,7 @@ def __init__(
         sound_files: Mapping[str, str],
         default_sound: str,
         volume: float = 0.4,
+        loop: bool = True,
         **kwargs,
     ):
         super().__init__(**kwargs)
@@ -56,6 +55,7 @@ def __init__(
         self._sounds: Dict[str, Any] = {}
         self._current_sound = default_sound
         self._mixing = True
+        self._loop = loop
 
     async def start(self, sample_rate: int):
         self._sample_rate = sample_rate
@@ -85,6 +85,8 @@ async def _update_settings(self, frame: MixerUpdateSettingsFrame):
                     await self._change_sound(value)
                 case "volume":
                     await self._update_volume(value)
+                case "loop":
+                    await self._update_loop(value)
 
     async def _change_sound(self, sound: str):
         if sound in self._sound_files:
@@ -96,6 +98,9 @@ async def _change_sound(self, sound: str):
     async def _update_volume(self, volume: float):
         self._volume = volume
 
+    async def _update_loop(self, loop: bool):
+        self._loop = loop
+
     def _load_sound_file(self, sound_name: str, file_name: str):
         try:
             logger.debug(f"Loading background sound from {file_name}")
@@ -108,7 +113,7 @@ def _load_sound_file(self, sound_name: str, file_name: str):
 
             # Convert from np to bytes again.
             self._sounds[sound_name] = np.frombuffer(audio, dtype=np.int16)
-        except Exception as ex:
+        except Exception:
             logger.error(f"Unable to open file {file_name}")
 
     def _mix_with_sound(self, audio: bytes):
@@ -127,6 +132,8 @@ def _mix_with_sound(self, audio: bytes):
 
         # Go back to the beginning if we don't have enough data.
         if self._sound_pos + chunk_size > len(sound):
+            if not self._loop:
+                return audio
             self._sound_pos = 0
 
         start_pos = self._sound_pos