diff --git a/madmom/audio/cepstrogram.py b/madmom/audio/cepstrogram.py index e2d661cea..ad9fbcc68 100644 --- a/madmom/audio/cepstrogram.py +++ b/madmom/audio/cepstrogram.py @@ -10,12 +10,15 @@ from __future__ import absolute_import, division, print_function import inspect +import math +from functools import partial + import numpy as np from scipy.fftpack import dct -from ..processors import Processor from .filters import MelFilterbank from .spectrogram import Spectrogram +from ..processors import Processor class Cepstrogram(np.ndarray): @@ -34,6 +37,7 @@ class applies some transformation (usually a DCT) on a spectrogram. one is instantiated with these additional keyword arguments. """ + # pylint: disable=super-on-old-class # pylint: disable=super-init-not-called # pylint: disable=attribute-defined-outside-init @@ -122,6 +126,21 @@ def process(self, data, **kwargs): MFCC_MUL = 1. MFCC_ADD = np.spacing(1) MFCC_DCT_NORM = "ortho" +MFCC_DELTA_FILTER = np.linspace(4, -4, 9) / 60 +MFCC_DELTADELTA_FILTER = np.linspace(1, -1, 3) / 2 + + +# https://stackoverflow.com/questions/3012421/python-memoising-deferred-lookup-property-decorator#3013910 +def lazyprop(fn): + attr_name = '_lazy_' + fn.__name__ + + @property + def _lazyprop(self): + if not hasattr(self, attr_name): + setattr(self, attr_name, fn(self)) + return getattr(self, attr_name) + + return _lazyprop class MFCC(Cepstrogram): @@ -151,7 +170,7 @@ class MFCC(Cepstrogram): logarithm. add : float, optional Add this value before taking the logarithm of the magnitudes. - dct_norm : {None, 'ortho'}, optional + dct_norm : {'ortho', None}, optional Normalization mode (see scipy.fftpack.dct). Default is 'ortho'. kwargs : dict If no :class:`.audio.spectrogram.Spectrogram` instance was given, one @@ -225,6 +244,121 @@ def __new__(cls, spectrogram, filterbank=MelFilterbank, # return the object return obj + @staticmethod + def calc_deltas(data, delta_filter): + """ + Applies the given filter to the data after automatically padding by + replicating the first and last frame. The length of the padding is + calculated via ceil(len(delta_filter)). + + Applying a filter means passing the matrix column after column to + ``np.convolve()``. Aftwerwards the array is truncated to the same + shape as the input array. + + Parameters + ---------- + data: numpy array + containing the data to process + delta_filter: numpy array + the filter used for convolution + + Returns + ------- + deltas: numpy array + containing the deltas, has the same shape as data + """ + # prepare vectorized convolve function + # (requires transposed matrices in our use case) + vconv = np.vectorize(partial(np.convolve, mode="same"), + signature='(n),(m)->(k)') + # pad data by replicating the first and the last frame + k = int(math.ceil(len(delta_filter) / 2)) + padded = np.vstack((np.array([data[0], ] * k), + data, + np.array([data[-1], ] * k))) + # calculate the deltas for each coefficient + deltas = vconv(padded.transpose(), delta_filter) + return deltas.transpose()[k:-k] + + @lazyprop + def deltas(self, delta_filter=MFCC_DELTA_FILTER): + """ + Return the derivative of this MFCC's coefficients by convolving with + a filter. Accessing this property corresponds to the function call + ``MFCC.calc_deltas(self, delta_filter)``. However, using this property, + the result is calculated only once and cached for later access. + See ``@lazyprop``for further details. + + Parameters + ---------- + delta_filter: numpy array, optional + the filter used for convolution, defaults to MFCC_DELTA_FILTER + + Returns + ------- + deltas: numpy array + containing the deltas, has the same shape as self + """ + return MFCC.calc_deltas(self, delta_filter) + + @lazyprop + def deltadeltas(self, deltadelta_filter=MFCC_DELTADELTA_FILTER): + """ + Return the second order derivative of this MFCC's coefficients by + convolving with a filter. Accessing this property corresponds to the + function call ``MFCC.calc_deltas(self, deltadelta_filter)``. However, + using this property, the result is calculated only once and cached + for later access. See ``@lazyprop``for further details. + + Parameters + ---------- + delta_filter: numpy array, optional + the filter used for convolution, defaults to MFCC_DELTA_FILTER + + Returns + ------- + deltas: numpy array + containing the deltas, has the same shape as self + """ + return MFCC.calc_deltas(self.deltas, deltadelta_filter) + + def calc_voicebox_deltas(self, delta_filter=MFCC_DELTA_FILTER, + ddelta_filter=MFCC_DELTADELTA_FILTER): + """ + Method to calculate deltas and deltadeltas the way it is done in the + voicebox MatLab toolbox. + + see http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html + + Parameters + ---------- + delta_filter : numpy array + filter to calculate the derivative of this MFCC's data + ddelta_filter : numpy array + filter to calculate the derivative of the derivative + + Returns + ------- + [self, deltas, deltadeltas] : numpy array, shape (|frames|, |bands|*3) + a horizontally stacked np array consisting of the MFCC coefficients + its derivative and the derivative of second order + """ + padded_input = np.vstack( + (np.array([self[0], ] * 5), self, np.array([self[-1], ] * 5))) + deltashape = tuple(reversed(padded_input.shape)) + flat_input = padded_input.transpose().flatten() + + deltas = np.convolve(flat_input, delta_filter, mode="same") \ + .reshape(deltashape).T[4:-4, ] + deltadeltashape = tuple(reversed(deltas.shape)) + flat_deltas = deltas.transpose().flatten() + deltas = deltas[1:-1, ] + + deltadeltas = np.convolve(flat_deltas, ddelta_filter, mode="same") \ + .reshape(deltadeltashape).T[1:-1, ] + + return np.hstack((self, deltas, deltadeltas)) + def __array_finalize__(self, obj): if obj is None: return