Add deltas and deltadetas to MFCCs

CPJKU · May 19, 2017 · e9240e3 · e9240e3
1 parent 844c76d
commit e9240e3
Showing 1 changed file with 136 additions and 2 deletions.
diff --git a/madmom/audio/cepstrogram.py b/madmom/audio/cepstrogram.py
@@ -10,12 +10,15 @@
 from __future__ import absolute_import, division, print_function
 
 import inspect
+import math
+from functools import partial
+
 import numpy as np
 from scipy.fftpack import dct
 
-from ..processors import Processor
 from .filters import MelFilterbank
 from .spectrogram import Spectrogram
+from ..processors import Processor
 
 
 class Cepstrogram(np.ndarray):
@@ -34,6 +37,7 @@ class applies some transformation (usually a DCT) on a spectrogram.
         one is instantiated with these additional keyword arguments.
 
     """
+
     # pylint: disable=super-on-old-class
     # pylint: disable=super-init-not-called
     # pylint: disable=attribute-defined-outside-init
@@ -122,6 +126,21 @@ def process(self, data, **kwargs):
 MFCC_MUL = 1.
 MFCC_ADD = np.spacing(1)
 MFCC_DCT_NORM = "ortho"
+MFCC_DELTA_FILTER = np.linspace(4, -4, 9) / 60
+MFCC_DELTADELTA_FILTER = np.linspace(1, -1, 3) / 2
+
+
+# https://stackoverflow.com/questions/3012421/python-memoising-deferred-lookup-property-decorator#3013910
+def lazyprop(fn):
+    attr_name = '_lazy_' + fn.__name__
+
+    @property
+    def _lazyprop(self):
+        if not hasattr(self, attr_name):
+            setattr(self, attr_name, fn(self))
+        return getattr(self, attr_name)
+
+    return _lazyprop
 
 
 class MFCC(Cepstrogram):
@@ -151,7 +170,7 @@ class MFCC(Cepstrogram):
         logarithm.
     add : float, optional
         Add this value before taking the logarithm of the magnitudes.
-    dct_norm : {None, 'ortho'}, optional
+    dct_norm : {'ortho', None}, optional
         Normalization mode (see scipy.fftpack.dct). Default is 'ortho'.
     kwargs : dict
         If no :class:`.audio.spectrogram.Spectrogram` instance was given, one
@@ -225,6 +244,121 @@ def __new__(cls, spectrogram, filterbank=MelFilterbank,
         # return the object
         return obj
 
+    @staticmethod
+    def calc_deltas(data, delta_filter):
+        """
+        Applies the given filter to the data after automatically padding by
+        replicating the first and last frame. The length of the padding is
+        calculated via ceil(len(delta_filter)).
+
+        Applying a filter means passing the matrix column after column to
+        ``np.convolve()``. Aftwerwards the array is truncated to the same
+        shape as the input array.
+
+        Parameters
+        ----------
+        data: numpy array
+            containing the data to process
+        delta_filter: numpy array
+            the filter used for convolution
+
+        Returns
+        -------
+        deltas: numpy array
+             containing the deltas, has the same shape as data
+        """
+        # prepare vectorized convolve function
+        # (requires transposed matrices in our use case)
+        vconv = np.vectorize(partial(np.convolve, mode="same"),
+                             signature='(n),(m)->(k)')
+        # pad data by replicating the first and the last frame
+        k = int(math.ceil(len(delta_filter) / 2))
+        padded = np.vstack((np.array([data[0], ] * k),
+                            data,
+                            np.array([data[-1], ] * k)))
+        # calculate the deltas for each coefficient
+        deltas = vconv(padded.transpose(), delta_filter)
+        return deltas.transpose()[k:-k]
+
+    @lazyprop
+    def deltas(self, delta_filter=MFCC_DELTA_FILTER):
+        """
+        Return the derivative of this MFCC's coefficients by convolving with
+        a filter. Accessing this property corresponds to the function call
+        ``MFCC.calc_deltas(self, delta_filter)``. However, using this property,
+        the result is calculated only once and cached for later access.
+        See ``@lazyprop``for further details.
+
+        Parameters
+        ----------
+        delta_filter: numpy array, optional
+            the filter used for convolution, defaults to MFCC_DELTA_FILTER
+
+        Returns
+        -------
+        deltas: numpy array
+             containing the deltas, has the same shape as self
+        """
+        return MFCC.calc_deltas(self, delta_filter)
+
+    @lazyprop
+    def deltadeltas(self, deltadelta_filter=MFCC_DELTADELTA_FILTER):
+        """
+        Return the second order derivative of this MFCC's coefficients by
+        convolving with a filter. Accessing this property corresponds to the
+        function call ``MFCC.calc_deltas(self, deltadelta_filter)``. However,
+        using this property, the result is calculated only once and cached
+        for later access. See ``@lazyprop``for further details.
+
+        Parameters
+        ----------
+        delta_filter: numpy array, optional
+            the filter used for convolution, defaults to MFCC_DELTA_FILTER
+
+        Returns
+        -------
+        deltas: numpy array
+             containing the deltas, has the same shape as self
+        """
+        return MFCC.calc_deltas(self.deltas, deltadelta_filter)
+
+    def calc_voicebox_deltas(self, delta_filter=MFCC_DELTA_FILTER,
+                             ddelta_filter=MFCC_DELTADELTA_FILTER):
+        """
+        Method to calculate deltas and deltadeltas the way it is done in the
+        voicebox MatLab toolbox.
+
+        see http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
+
+        Parameters
+        ----------
+        delta_filter : numpy array
+            filter to calculate the derivative of this MFCC's data
+        ddelta_filter : numpy array
+            filter to calculate the derivative of the derivative
+
+        Returns
+        -------
+        [self, deltas, deltadeltas] : numpy array, shape (|frames|, |bands|*3)
+            a horizontally stacked np array consisting of the MFCC coefficients
+            its derivative and the derivative of second order
+        """
+        padded_input = np.vstack(
+            (np.array([self[0], ] * 5), self, np.array([self[-1], ] * 5)))
+        deltashape = tuple(reversed(padded_input.shape))
+        flat_input = padded_input.transpose().flatten()
+
+        deltas = np.convolve(flat_input, delta_filter, mode="same") \
+                   .reshape(deltashape).T[4:-4, ]
+        deltadeltashape = tuple(reversed(deltas.shape))
+        flat_deltas = deltas.transpose().flatten()
+        deltas = deltas[1:-1, ]
+
+        deltadeltas = np.convolve(flat_deltas, ddelta_filter, mode="same") \
+                        .reshape(deltadeltashape).T[1:-1, ]
+
+        return np.hstack((self, deltas, deltadeltas))
+
     def __array_finalize__(self, obj):
         if obj is None:
             return