-
Notifications
You must be signed in to change notification settings - Fork 322
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #611 from pipecat-ai/aleix/audio-filters
introduce audio filters
- Loading branch information
Showing
11 changed files
with
154 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# | ||
# Copyright (c) 2024, Daily | ||
# | ||
# SPDX-License-Identifier: BSD 2-Clause License | ||
# | ||
|
||
from abc import ABC, abstractmethod | ||
|
||
from pipecat.frames.frames import FilterControlFrame | ||
|
||
|
||
class BaseAudioFilter(ABC): | ||
"""This is a base class for input transport audio filters. If an audio | ||
filter is provided to the input transport it will be used to process audio | ||
before VAD and before pushing it downstream. There are control frames to | ||
update filter settings or to enable or disable the filter at runtime. | ||
""" | ||
|
||
@abstractmethod | ||
async def start(self, sample_rate: int): | ||
"""This will be called from the input transport when the transport is | ||
started. It can be used to initialize the filter. The input transport | ||
sample rate is provided so the filter can adjust to that sample rate. | ||
""" | ||
pass | ||
|
||
@abstractmethod | ||
async def stop(self): | ||
"""This will be called from the input transport when the transport is | ||
stopping. | ||
""" | ||
pass | ||
|
||
@abstractmethod | ||
async def process_frame(self, frame: FilterControlFrame): | ||
"""This will be called when the input transport receives a | ||
FilterControlFrame. | ||
""" | ||
pass | ||
|
||
@abstractmethod | ||
async def filter(self, audio: bytes) -> bytes: | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# | ||
# Copyright (c) 2024, Daily | ||
# | ||
# SPDX-License-Identifier: BSD 2-Clause License | ||
# | ||
|
||
import numpy as np | ||
|
||
from pipecat.audio.filters.base_audio_filter import BaseAudioFilter | ||
|
||
from loguru import logger | ||
|
||
from pipecat.frames.frames import FilterControlFrame, FilterEnableFrame | ||
|
||
try: | ||
import noisereduce as nr | ||
except ModuleNotFoundError as e: | ||
logger.error(f"Exception: {e}") | ||
logger.error( | ||
"In order to use the noisereduce filter, you need to `pip install pipecat-ai[noisereduce]`." | ||
) | ||
raise Exception(f"Missing module: {e}") | ||
|
||
|
||
class NoisereduceFilter(BaseAudioFilter): | ||
def __init__(self) -> None: | ||
self._filtering = True | ||
self._sample_rate = 0 | ||
|
||
async def start(self, sample_rate: int): | ||
self._sample_rate = sample_rate | ||
|
||
async def stop(self): | ||
pass | ||
|
||
async def process_frame(self, frame: FilterControlFrame): | ||
if isinstance(frame, FilterEnableFrame): | ||
self._filtering = frame.enable | ||
|
||
async def filter(self, audio: bytes) -> bytes: | ||
if not self._filtering: | ||
return audio | ||
|
||
data = np.frombuffer(audio, dtype=np.int16) | ||
|
||
# Add a small epsilon to avoid division by zero. | ||
epsilon = 1e-10 | ||
data = data.astype(np.float32) + epsilon | ||
|
||
# Noise reduction | ||
reduced_noise = nr.reduce_noise(y=data, sr=self._sample_rate) | ||
audio = np.clip(reduced_noise, -32768, 32767).astype(np.int16).tobytes() | ||
|
||
return audio |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters