diff --git a/README.md b/README.md index ea99fc6..64dd4ef 100644 --- a/README.md +++ b/README.md @@ -118,9 +118,8 @@ y : np.ndarray [shape=(# frames,) or (# channels, # frames)], real-valued length of the windowed signal after padding with zeros. The number of rows in the STFT matrix ``D`` is ``(1 + n_fft/2)``. The default value, ``n_fft=2048`` samples, corresponds to a physical - duration of 93 milliseconds at a sample rate of 22050 Hz, i.e. the - default sample rate in librosa. This value is well adapted for music - signals. However, in speech processing, the recommended value is 512, + duration of 93 milliseconds at a sample rate of 22050 Hz. + This value is well adapted for music signals. However, in speech processing, the recommended value is 512, corresponding to 23 milliseconds at a sample rate of 22050 Hz. In any case, we recommend setting ``n_fft`` to a power of two for optimizing the speed of the fast Fourier transform (FFT) algorithm., by default 1024 diff --git a/environment.yml b/environment.yml index ff81418..61ed9c6 100755 --- a/environment.yml +++ b/environment.yml @@ -7,7 +7,6 @@ dependencies: - notebook>5.2 - scipy - matplotlib - - librosa - numpy - tqdm - pip: diff --git a/noisereduce/noisereduce.py b/noisereduce/noisereduce.py index d779c17..1b06465 100755 --- a/noisereduce/noisereduce.py +++ b/noisereduce/noisereduce.py @@ -77,9 +77,8 @@ def reduce_noise( length of the windowed signal after padding with zeros. The number of rows in the STFT matrix ``D`` is ``(1 + n_fft/2)``. The default value, ``n_fft=2048`` samples, corresponds to a physical - duration of 93 milliseconds at a sample rate of 22050 Hz, i.e. the - default sample rate in librosa. This value is well adapted for music - signals. However, in speech processing, the recommended value is 512, + duration of 93 milliseconds at a sample rate of 22050 Hz. + This value is well adapted for music signals. However, in speech processing, the recommended value is 512, corresponding to 23 milliseconds at a sample rate of 22050 Hz. In any case, we recommend setting ``n_fft`` to a power of two for optimizing the speed of the fast Fourier transform (FFT) algorithm., by default 1024 diff --git a/noisereduce/spectralgate/nonstationary.py b/noisereduce/spectralgate/nonstationary.py index c6199f2..f570a6c 100644 --- a/noisereduce/spectralgate/nonstationary.py +++ b/noisereduce/spectralgate/nonstationary.py @@ -1,30 +1,28 @@ from noisereduce.spectralgate.base import SpectralGate import numpy as np -from librosa import stft, istft -from scipy.signal import filtfilt, fftconvolve -import tempfile +from scipy.signal import filtfilt, fftconvolve, stft, istft from .utils import sigmoid class SpectralGateNonStationary(SpectralGate): def __init__( - self, - y, - sr, - chunk_size, - padding, - n_fft, - win_length, - hop_length, - time_constant_s, - freq_mask_smooth_hz, - time_mask_smooth_ms, - thresh_n_mult_nonstationary, - sigmoid_slope_nonstationary, - tmp_folder, - prop_decrease, - use_tqdm, - n_jobs, + self, + y, + sr, + chunk_size, + padding, + n_fft, + win_length, + hop_length, + time_constant_s, + freq_mask_smooth_hz, + time_mask_smooth_ms, + thresh_n_mult_nonstationary, + sigmoid_slope_nonstationary, + tmp_folder, + prop_decrease, + use_tqdm, + n_jobs, ): self._thresh_n_mult_nonstationary = thresh_n_mult_nonstationary self._sigmoid_slope_nonstationary = sigmoid_slope_nonstationary @@ -50,11 +48,12 @@ def spectral_gating_nonstationary(self, chunk): """non-stationary version of spectral gating""" denoised_channels = np.zeros(chunk.shape, chunk.dtype) for ci, channel in enumerate(chunk): - sig_stft = stft( - (channel), - n_fft=self._n_fft, - hop_length=self._hop_length, - win_length=self._win_length, + _, _, sig_stft = stft( + channel, + nfft=self._n_fft, + noverlap=self._win_length - self._hop_length, + nperseg=self._win_length, + padded=False ) # get abs of signal stft abs_sig_stft = np.abs(sig_stft) @@ -81,17 +80,18 @@ def spectral_gating_nonstationary(self, chunk): sig_mask = fftconvolve(sig_mask, self._smoothing_filter, mode="same") sig_mask = sig_mask * self._prop_decrease + np.ones(np.shape(sig_mask)) * ( - 1.0 - self._prop_decrease + 1.0 - self._prop_decrease ) # multiply signal with mask sig_stft_denoised = sig_stft * sig_mask # invert/recover the signal - denoised_signal = istft( + _, denoised_signal = istft( sig_stft_denoised, - hop_length=self._hop_length, - win_length=self._win_length, + nfft=self._n_fft, + noverlap=self._win_length - self._hop_length, + nperseg=self._win_length ) denoised_channels[ci, : len(denoised_signal)] = denoised_signal return denoised_channels @@ -104,12 +104,12 @@ def _do_filter(self, chunk): def get_time_smoothed_representation( - spectral, samplerate, hop_length, time_constant_s=0.001 + spectral, samplerate, hop_length, time_constant_s=0.001 ): t_frames = time_constant_s * samplerate / float(hop_length) # By default, this solves the equation for b: # b**2 + (1 - b) / t_frames - 2 = 0 # which approximates the full-width half-max of the # squared frequency response of the IIR low-pass filt - b = (np.sqrt(1 + 4 * t_frames**2) - 1) / (2 * t_frames**2) + b = (np.sqrt(1 + 4 * t_frames ** 2) - 1) / (2 * t_frames ** 2) return filtfilt([b], [1, b - 1], spectral, axis=-1, padtype=None) diff --git a/noisereduce/spectralgate/stationary.py b/noisereduce/spectralgate/stationary.py index 86b82b1..61c1e98 100644 --- a/noisereduce/spectralgate/stationary.py +++ b/noisereduce/spectralgate/stationary.py @@ -1,30 +1,29 @@ from noisereduce.spectralgate.base import SpectralGate import numpy as np -from librosa import stft, istft -from scipy.signal import fftconvolve +from scipy.signal import fftconvolve, stft, istft from .utils import _amp_to_db class SpectralGateStationary(SpectralGate): def __init__( - self, - y, - sr, - y_noise, - n_std_thresh_stationary, - chunk_size, - clip_noise_stationary, - padding, - n_fft, - win_length, - hop_length, - time_constant_s, - freq_mask_smooth_hz, - time_mask_smooth_ms, - tmp_folder, - prop_decrease, - use_tqdm, - n_jobs, + self, + y, + sr, + y_noise, + n_std_thresh_stationary, + chunk_size, + clip_noise_stationary, + padding, + n_fft, + win_length, + hop_length, + time_constant_s, + freq_mask_smooth_hz, + time_mask_smooth_ms, + tmp_folder, + prop_decrease, + use_tqdm, + n_jobs, ): super().__init__( y=y, @@ -65,35 +64,36 @@ def __init__( self.y_noise = self.y_noise[:chunk_size] # calculate statistics over y_noise - abs_noise_stft = np.abs( - stft( - (self.y_noise), - n_fft=self._n_fft, - hop_length=self._hop_length, - win_length=self._win_length, - ) + _, _, noise_stft = stft( + self.y_noise, + nfft=self._n_fft, + noverlap=self._win_length - self._hop_length, + nperseg=self._win_length, + padded=False ) - noise_stft_db = _amp_to_db(abs_noise_stft) + + noise_stft_db = _amp_to_db(noise_stft) self.mean_freq_noise = np.mean(noise_stft_db, axis=1) self.std_freq_noise = np.std(noise_stft_db, axis=1) self.noise_thresh = ( - self.mean_freq_noise + self.std_freq_noise * self.n_std_thresh_stationary + self.mean_freq_noise + self.std_freq_noise * self.n_std_thresh_stationary ) def spectral_gating_stationary(self, chunk): """non-stationary version of spectral gating""" denoised_channels = np.zeros(chunk.shape, chunk.dtype) for ci, channel in enumerate(chunk): - sig_stft = stft( - (channel), - n_fft=self._n_fft, - hop_length=self._hop_length, - win_length=self._win_length, + _, _, sig_stft = stft( + channel, + nfft=self._n_fft, + noverlap=self._win_length - self._hop_length, + nperseg=self._win_length, + padded=False ) # spectrogram of signal in dB - sig_stft_db = _amp_to_db(np.abs(sig_stft)) + sig_stft_db = _amp_to_db(sig_stft) # calculate the threshold for each frequency/time bin db_thresh = np.repeat( @@ -106,7 +106,7 @@ def spectral_gating_stationary(self, chunk): sig_mask = sig_stft_db > db_thresh sig_mask = sig_mask * self._prop_decrease + np.ones(np.shape(sig_mask)) * ( - 1.0 - self._prop_decrease + 1.0 - self._prop_decrease ) if self.smooth_mask: @@ -117,10 +117,11 @@ def spectral_gating_stationary(self, chunk): sig_stft_denoised = sig_stft * sig_mask # invert/recover the signal - denoised_signal = istft( + _, denoised_signal = istft( sig_stft_denoised, - hop_length=self._hop_length, - win_length=self._win_length, + nfft=self._n_fft, + noverlap=self._win_length - self._hop_length, + nperseg=self._win_length ) denoised_channels[ci, : len(denoised_signal)] = denoised_signal return denoised_channels diff --git a/noisereduce/spectralgate/utils.py b/noisereduce/spectralgate/utils.py index 7904ab6..fff68f8 100644 --- a/noisereduce/spectralgate/utils.py +++ b/noisereduce/spectralgate/utils.py @@ -1,5 +1,4 @@ import numpy as np -from librosa.core import amplitude_to_db, db_to_amplitude def sigmoid(x, shift, mult): @@ -9,15 +8,9 @@ def sigmoid(x, shift, mult): return 1 / (1 + np.exp(-(x + shift) * mult)) -def _amp_to_db(x): +def _amp_to_db(x, top_db=80.0, eps=np.finfo(np.float64).eps): """ Convert the input tensor from amplitude to decibel scale. """ - return amplitude_to_db(x, ref=1.0, amin=1e-20, top_db=80.0) - - -def _db_to_amp(x, ): - """ - Convert the input tensor from decibel scale to amplitude. - """ - return db_to_amplitude(x, ref=1.0) + x_db = 20 * np.log10(np.abs(x) + eps) + return np.maximum(x_db, np.max(x_db, axis=-1, keepdims=True) - top_db) diff --git a/requirements.txt b/requirements.txt index 5047237..84bcb8f 100755 --- a/requirements.txt +++ b/requirements.txt @@ -2,11 +2,11 @@ -e . matplotlib -librosa numpy scipy tqdm torch +joblib # for testing diff --git a/setup.py b/setup.py index fed50fc..2bc7ddc 100755 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="noisereduce", packages=find_packages(), - version="3.0.2", + version="3.0.3", description="Noise reduction using Spectral Gating in Python", author="Tim Sainburg", license="MIT", @@ -21,7 +21,7 @@ "Topic :: Education", "Topic :: Scientific/Engineering", ], - install_requires=["scipy", "matplotlib", "librosa", "numpy", "tqdm"], + install_requires=["scipy", "matplotlib", "numpy", "tqdm", "joblib"], extras_require={ "PyTorch": ["torch>=1.9.0"], },