Skip to content

Commit

Permalink
Add deltas and deltadetas to MFCCs
Browse files Browse the repository at this point in the history
  • Loading branch information
scriptator authored and Sebastian Böck committed May 19, 2017
1 parent 844c76d commit e9240e3
Showing 1 changed file with 136 additions and 2 deletions.
138 changes: 136 additions & 2 deletions madmom/audio/cepstrogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,15 @@
from __future__ import absolute_import, division, print_function

import inspect
import math
from functools import partial

import numpy as np
from scipy.fftpack import dct

from ..processors import Processor
from .filters import MelFilterbank
from .spectrogram import Spectrogram
from ..processors import Processor


class Cepstrogram(np.ndarray):
Expand All @@ -34,6 +37,7 @@ class applies some transformation (usually a DCT) on a spectrogram.
one is instantiated with these additional keyword arguments.
"""

# pylint: disable=super-on-old-class
# pylint: disable=super-init-not-called
# pylint: disable=attribute-defined-outside-init
Expand Down Expand Up @@ -122,6 +126,21 @@ def process(self, data, **kwargs):
MFCC_MUL = 1.
MFCC_ADD = np.spacing(1)
MFCC_DCT_NORM = "ortho"
MFCC_DELTA_FILTER = np.linspace(4, -4, 9) / 60
MFCC_DELTADELTA_FILTER = np.linspace(1, -1, 3) / 2


# https://stackoverflow.com/questions/3012421/python-memoising-deferred-lookup-property-decorator#3013910
def lazyprop(fn):
attr_name = '_lazy_' + fn.__name__

@property
def _lazyprop(self):
if not hasattr(self, attr_name):
setattr(self, attr_name, fn(self))
return getattr(self, attr_name)

return _lazyprop


class MFCC(Cepstrogram):
Expand Down Expand Up @@ -151,7 +170,7 @@ class MFCC(Cepstrogram):
logarithm.
add : float, optional
Add this value before taking the logarithm of the magnitudes.
dct_norm : {None, 'ortho'}, optional
dct_norm : {'ortho', None}, optional
Normalization mode (see scipy.fftpack.dct). Default is 'ortho'.
kwargs : dict
If no :class:`.audio.spectrogram.Spectrogram` instance was given, one
Expand Down Expand Up @@ -225,6 +244,121 @@ def __new__(cls, spectrogram, filterbank=MelFilterbank,
# return the object
return obj

@staticmethod
def calc_deltas(data, delta_filter):
"""
Applies the given filter to the data after automatically padding by
replicating the first and last frame. The length of the padding is
calculated via ceil(len(delta_filter)).
Applying a filter means passing the matrix column after column to
``np.convolve()``. Aftwerwards the array is truncated to the same
shape as the input array.
Parameters
----------
data: numpy array
containing the data to process
delta_filter: numpy array
the filter used for convolution
Returns
-------
deltas: numpy array
containing the deltas, has the same shape as data
"""
# prepare vectorized convolve function
# (requires transposed matrices in our use case)
vconv = np.vectorize(partial(np.convolve, mode="same"),
signature='(n),(m)->(k)')
# pad data by replicating the first and the last frame
k = int(math.ceil(len(delta_filter) / 2))
padded = np.vstack((np.array([data[0], ] * k),
data,
np.array([data[-1], ] * k)))
# calculate the deltas for each coefficient
deltas = vconv(padded.transpose(), delta_filter)
return deltas.transpose()[k:-k]

@lazyprop
def deltas(self, delta_filter=MFCC_DELTA_FILTER):
"""
Return the derivative of this MFCC's coefficients by convolving with
a filter. Accessing this property corresponds to the function call
``MFCC.calc_deltas(self, delta_filter)``. However, using this property,
the result is calculated only once and cached for later access.
See ``@lazyprop``for further details.
Parameters
----------
delta_filter: numpy array, optional
the filter used for convolution, defaults to MFCC_DELTA_FILTER
Returns
-------
deltas: numpy array
containing the deltas, has the same shape as self
"""
return MFCC.calc_deltas(self, delta_filter)

@lazyprop
def deltadeltas(self, deltadelta_filter=MFCC_DELTADELTA_FILTER):
"""
Return the second order derivative of this MFCC's coefficients by
convolving with a filter. Accessing this property corresponds to the
function call ``MFCC.calc_deltas(self, deltadelta_filter)``. However,
using this property, the result is calculated only once and cached
for later access. See ``@lazyprop``for further details.
Parameters
----------
delta_filter: numpy array, optional
the filter used for convolution, defaults to MFCC_DELTA_FILTER
Returns
-------
deltas: numpy array
containing the deltas, has the same shape as self
"""
return MFCC.calc_deltas(self.deltas, deltadelta_filter)

def calc_voicebox_deltas(self, delta_filter=MFCC_DELTA_FILTER,
ddelta_filter=MFCC_DELTADELTA_FILTER):
"""
Method to calculate deltas and deltadeltas the way it is done in the
voicebox MatLab toolbox.
see http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
Parameters
----------
delta_filter : numpy array
filter to calculate the derivative of this MFCC's data
ddelta_filter : numpy array
filter to calculate the derivative of the derivative
Returns
-------
[self, deltas, deltadeltas] : numpy array, shape (|frames|, |bands|*3)
a horizontally stacked np array consisting of the MFCC coefficients
its derivative and the derivative of second order
"""
padded_input = np.vstack(
(np.array([self[0], ] * 5), self, np.array([self[-1], ] * 5)))
deltashape = tuple(reversed(padded_input.shape))
flat_input = padded_input.transpose().flatten()

deltas = np.convolve(flat_input, delta_filter, mode="same") \
.reshape(deltashape).T[4:-4, ]
deltadeltashape = tuple(reversed(deltas.shape))
flat_deltas = deltas.transpose().flatten()
deltas = deltas[1:-1, ]

deltadeltas = np.convolve(flat_deltas, ddelta_filter, mode="same") \
.reshape(deltadeltashape).T[1:-1, ]

return np.hstack((self, deltas, deltadeltas))

def __array_finalize__(self, obj):
if obj is None:
return
Expand Down

0 comments on commit e9240e3

Please sign in to comment.