Skip to content

Commit

Permalink
Merge pull request #611 from pipecat-ai/aleix/audio-filters
Browse files Browse the repository at this point in the history
introduce audio filters
  • Loading branch information
aconchillo authored Nov 5, 2024
2 parents 126324c + 358c458 commit 6082da2
Show file tree
Hide file tree
Showing 11 changed files with 154 additions and 8 deletions.
12 changes: 9 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- Introduce output transport audio mixers. Output transport audio mixers can be
used, for example, to add background sounds or any other audio mixing
functionality before the output audio is actually written to the transport.
- Added audio filter `NoisereduceFilter`.

- Introduce input transport audio filters (`BaseAudioFilter`). Audio filters can
be used to remove background noises before audio is sent to VAD.

- Introduce output transport audio mixers (`BaseAudioMixer`). Output transport
audio mixers can be used, for example, to add background sounds or any other
audio mixing functionality before the output audio is actually written to the
transport.

- Added `GatedOpenAILLMContextAggregator`. This aggregator keeps the last
received OpenAI LLM context frame and it doesn't let it through until the
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ livekit = [ "livekit~=0.17.5", "livekit-api~=0.7.1", "tenacity~=8.5.0" ]
lmnt = [ "lmnt~=1.1.4" ]
local = [ "pyaudio~=0.2.14" ]
moondream = [ "einops~=0.8.0", "timm~=1.0.8", "transformers~=4.44.0" ]
noisereduce = [ "noisereduce~=3.0.3" ]
openai = [ "openai~=1.50.2", "websockets~=13.1", "python-deepcompare~=1.0.1" ]
openpipe = [ "openpipe~=4.24.0" ]
playht = [ "pyht~=0.1.4", "websockets~=13.1" ]
Expand Down
Empty file.
47 changes: 47 additions & 0 deletions src/pipecat/audio/filters/base_audio_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

from abc import ABC, abstractmethod

from pipecat.frames.frames import FilterControlFrame


class BaseAudioFilter(ABC):
"""This is a base class for input transport audio filters. If an audio
filter is provided to the input transport it will be used to process audio
before VAD and before pushing it downstream. There are control frames to
update filter settings or to enable or disable the filter at runtime.
"""

@abstractmethod
async def start(self, sample_rate: int):
"""This will be called from the input transport when the transport is
started. It can be used to initialize the filter. The input transport
sample rate is provided so the filter can adjust to that sample rate.
"""
pass

@abstractmethod
async def stop(self):
"""This will be called from the input transport when the transport is
stopping.
"""
pass

@abstractmethod
async def process_frame(self, frame: FilterControlFrame):
"""This will be called when the input transport receives a
FilterControlFrame.
"""
pass

@abstractmethod
async def filter(self, audio: bytes) -> bytes:
pass
54 changes: 54 additions & 0 deletions src/pipecat/audio/filters/noisereduce_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

import numpy as np

from pipecat.audio.filters.base_audio_filter import BaseAudioFilter

from loguru import logger

from pipecat.frames.frames import FilterControlFrame, FilterEnableFrame

try:
import noisereduce as nr
except ModuleNotFoundError as e:
logger.error(f"Exception: {e}")
logger.error(
"In order to use the noisereduce filter, you need to `pip install pipecat-ai[noisereduce]`."
)
raise Exception(f"Missing module: {e}")


class NoisereduceFilter(BaseAudioFilter):
def __init__(self) -> None:
self._filtering = True
self._sample_rate = 0

async def start(self, sample_rate: int):
self._sample_rate = sample_rate

async def stop(self):
pass

async def process_frame(self, frame: FilterControlFrame):
if isinstance(frame, FilterEnableFrame):
self._filtering = frame.enable

async def filter(self, audio: bytes) -> bytes:
if not self._filtering:
return audio

data = np.frombuffer(audio, dtype=np.int16)

# Add a small epsilon to avoid division by zero.
epsilon = 1e-10
data = data.astype(np.float32) + epsilon

# Noise reduction
reduced_noise = nr.reduce_noise(y=data, sr=self._sample_rate)
audio = np.clip(reduced_noise, -32768, 32767).astype(np.int16).tobytes()

return audio
4 changes: 2 additions & 2 deletions src/pipecat/audio/mixers/base_audio_mixer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from abc import ABC, abstractmethod

from pipecat.frames.frames import Frame
from pipecat.frames.frames import MixerControlFrame


class BaseAudioMixer(ABC):
Expand Down Expand Up @@ -36,7 +36,7 @@ async def stop(self):
pass

@abstractmethod
async def process_frame(self, frame: Frame):
async def process_frame(self, frame: MixerControlFrame):
"""This will be called when the output transport receives a
MixerControlFrame.
Expand Down
4 changes: 2 additions & 2 deletions src/pipecat/audio/mixers/soundfile_mixer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from pipecat.audio.mixers.base_audio_mixer import BaseAudioMixer
from pipecat.audio.utils import resample_audio
from pipecat.frames.frames import Frame, MixerUpdateSettingsFrame, MixerEnableFrame
from pipecat.frames.frames import MixerControlFrame, MixerUpdateSettingsFrame, MixerEnableFrame

from loguru import logger

Expand Down Expand Up @@ -65,7 +65,7 @@ async def start(self, sample_rate: int):
async def stop(self):
pass

async def process_frame(self, frame: Frame):
async def process_frame(self, frame: MixerControlFrame):
if isinstance(frame, MixerUpdateSettingsFrame):
await self._update_settings(frame)
elif isinstance(frame, MixerEnableFrame):
Expand Down
23 changes: 22 additions & 1 deletion src/pipecat/frames/frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,9 +584,30 @@ class VADParamsUpdateFrame(ControlFrame):
params: VADParams


@dataclass
class FilterControlFrame(ControlFrame):
"""Base control frame for other audio filter frames."""

pass


@dataclass
class FilterUpdateSettingsFrame(FilterControlFrame):
"""Control frame to update filter settings."""

settings: Mapping[str, Any]


@dataclass
class FilterEnableFrame(FilterControlFrame):
"""Control frame to enable or disable the filter at runtime."""

enable: bool


@dataclass
class MixerControlFrame(ControlFrame):
"""Base control frame for other mixer frames."""
"""Base control frame for other audio mixer frames."""

pass

Expand Down
13 changes: 13 additions & 0 deletions src/pipecat/transports/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
BotInterruptionFrame,
CancelFrame,
EndFrame,
FilterUpdateSettingsFrame,
Frame,
InputAudioRawFrame,
StartFrame,
Expand Down Expand Up @@ -41,6 +42,9 @@ def __init__(self, params: TransportParams, **kwargs):
self._audio_task = None

async def start(self, frame: StartFrame):
# Start audio filter.
if self._params.audio_in_filter:
await self._params.audio_in_filter.start(self._params.audio_in_sample_rate)
# Create audio input queue and task if needed.
if self._params.audio_in_enabled or self._params.vad_enabled:
self._audio_in_queue = asyncio.Queue()
Expand All @@ -52,6 +56,9 @@ async def stop(self, frame: EndFrame):
self._audio_task.cancel()
await self._audio_task
self._audio_task = None
# Stop audio filter.
if self._params.audio_in_filter:
await self._params.audio_in_filter.stop()

async def cancel(self, frame: CancelFrame):
# Cancel and wait for the audio input task to finish.
Expand Down Expand Up @@ -100,6 +107,8 @@ async def process_frame(self, frame: Frame, direction: FrameDirection):
vad_analyzer = self.vad_analyzer()
if vad_analyzer:
vad_analyzer.set_params(frame.params)
elif isinstance(frame, FilterUpdateSettingsFrame) and self._params.audio_in_filter:
await self._params.audio_in_filter.process_frame(frame)
# Other frames
else:
await self.push_frame(frame, direction)
Expand Down Expand Up @@ -165,6 +174,10 @@ async def _audio_task_handler(self):

audio_passthrough = True

# If an audio filter is available, run it before VAD.
if self._params.audio_in_filter:
frame.audio = await self._params.audio_in_filter.filter(frame.audio)

# Check VAD and push event if necessary. We just care about
# changes from QUIET to SPEAKING and vice versa.
if self._params.vad_enabled:
Expand Down
2 changes: 2 additions & 0 deletions src/pipecat/transports/base_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,15 @@ def __init__(self, params: TransportParams, **kwargs):
self._bot_speaking = False

async def start(self, frame: StartFrame):
# Start audio mixer.
if self._params.audio_out_mixer:
await self._params.audio_out_mixer.start(self._params.audio_out_sample_rate)
self._create_output_tasks()
self._create_sink_tasks()

async def stop(self, frame: EndFrame):
await self._cancel_output_tasks()
# Stop audio mixer.
if self._params.audio_out_mixer:
await self._params.audio_out_mixer.stop()

Expand Down
2 changes: 2 additions & 0 deletions src/pipecat/transports/base_transport.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pydantic import ConfigDict
from pydantic.main import BaseModel

from pipecat.audio.filters.base_audio_filter import BaseAudioFilter
from pipecat.audio.mixers.base_audio_mixer import BaseAudioMixer
from pipecat.audio.vad.vad_analyzer import VADAnalyzer
from pipecat.processors.frame_processor import FrameProcessor
Expand All @@ -39,6 +40,7 @@ class TransportParams(BaseModel):
audio_in_enabled: bool = False
audio_in_sample_rate: int = 16000
audio_in_channels: int = 1
audio_in_filter: Optional[BaseAudioFilter] = None
vad_enabled: bool = False
vad_audio_passthrough: bool = False
vad_analyzer: VADAnalyzer | None = None
Expand Down

0 comments on commit 6082da2

Please sign in to comment.