shithub: aubio

Download patch

ref: fa713bddc04f8732b89abba20b48b8699dafca07
parent: 2886984b75b4a146642769acb43335362a06e0c1
author: Paul Brossier <piem@piem.org>
date: Fri Nov 16 22:15:07 EST 2018

[filterbank] add set_mel_coeffs

--- a/src/spectral/filterbank_mel.c
+++ b/src/spectral/filterbank_mel.c
@@ -206,3 +206,91 @@
 
   return retval;
 }
+
+uint_t
+aubio_filterbank_set_mel_coeffs (aubio_filterbank_t * fb, smpl_t samplerate,
+    smpl_t freq_min, smpl_t freq_max)
+{
+  uint_t m, retval;
+  smpl_t start, end, step;
+  fvec_t *freqs;
+  fmat_t *coeffs = aubio_filterbank_get_coeffs(fb);
+  uint_t n_bands = coeffs->height;
+
+  if (freq_max < 0) {
+    AUBIO_ERR("filterbank: set_mel_coeffs freq_max should be > 0\n");
+    return AUBIO_FAIL;
+  } else if (freq_max == 0) {
+    end = aubio_hztomel(samplerate / 2.);
+  } else {
+    end = aubio_hztomel(freq_max);
+  }
+  if (freq_min < 0) {
+    AUBIO_ERR("filterbank: set_mel_coeffs freq_min should be > 0\n");
+    return AUBIO_FAIL;
+  } else {
+    start = aubio_hztomel(freq_min);
+  }
+  if (n_bands <= 0) {
+    AUBIO_ERR("filterbank: set_mel_coeffs n_bands should be > 0\n");
+    return AUBIO_FAIL;
+  }
+
+  freqs = new_fvec(n_bands + 2);
+  step = (end - start) / (n_bands + 1);
+
+  for (m = 0; m < n_bands + 2; m++)
+  {
+    freqs->data[m] = MIN(aubio_meltohz(start + step * m), samplerate/2.);
+  }
+
+  retval = aubio_filterbank_set_triangle_bands (fb, freqs, samplerate);
+
+  /* destroy vector used to store frequency limits */
+  del_fvec (freqs);
+  return retval;
+}
+
+uint_t
+aubio_filterbank_set_mel_coeffs_htk (aubio_filterbank_t * fb, smpl_t samplerate,
+    smpl_t freq_min, smpl_t freq_max)
+{
+  uint_t m, retval;
+  smpl_t start, end, step;
+  fvec_t *freqs;
+  fmat_t *coeffs = aubio_filterbank_get_coeffs(fb);
+  uint_t n_bands = coeffs->height;
+
+  if (freq_max < 0) {
+    AUBIO_ERR("filterbank: set_mel_coeffs freq_max should be > 0\n");
+    return AUBIO_FAIL;
+  } else if (freq_max == 0) {
+    end = aubio_hztomel_htk(samplerate / 2.);
+  } else {
+    end = aubio_hztomel_htk(freq_max);
+  }
+  if (freq_min < 0) {
+    AUBIO_ERR("filterbank: set_mel_coeffs freq_min should be > 0\n");
+    return AUBIO_FAIL;
+  } else {
+    start = aubio_hztomel_htk(freq_min);
+  }
+  if (n_bands <= 0) {
+    AUBIO_ERR("filterbank: set_mel_coeffs n_bands should be > 0\n");
+    return AUBIO_FAIL;
+  }
+
+  freqs = new_fvec (n_bands + 2);
+  step = (end - start) / (n_bands + 1);
+
+  for (m = 0; m < n_bands + 2; m++)
+  {
+    freqs->data[m] = MIN(aubio_meltohz_htk(step * m), samplerate/2.);
+  }
+
+  retval = aubio_filterbank_set_triangle_bands (fb, freqs, samplerate);
+
+  /* destroy vector used to store frequency limits */
+  del_fvec (freqs);
+  return retval;
+}
--- a/src/spectral/filterbank_mel.h
+++ b/src/spectral/filterbank_mel.h
@@ -57,14 +57,60 @@
   \param fb filterbank object
   \param samplerate audio sampling rate
 
-  The filter coefficients are built according to Malcolm Slaney's Auditory
-  Toolbox, available online at the following address (see file mfcc.m):
+  The filter coefficients are built to match exactly Malcolm Slaney's Auditory
+  Toolbox implementation (see file mfcc.m). The number of filters should be 40.
 
+  References
+  ----------
+
+  Malcolm Slaney, *Auditory Toolbox Version 2, Technical Report #1998-010*
   https://engineering.purdue.edu/~malcolm/interval/1998-010/
 
 */
 uint_t aubio_filterbank_set_mel_coeffs_slaney (aubio_filterbank_t * fb,
     smpl_t samplerate);
+
+/** Mel filterbank initialization
+
+  \param fb filterbank object
+  \param samplerate audio sampling rate
+  \param fmin start frequency, in Hz
+  \param fmax end frequency, in Hz
+
+  The filterbank will be initialized with bands linearly spaced in the mel
+  scale, from `fmin` to `fmax`.
+
+  References
+  ----------
+
+  Malcolm Slaney, *Auditory Toolbox Version 2, Technical Report #1998-010*
+  https://engineering.purdue.edu/~malcolm/interval/1998-010/
+
+*/
+uint_t aubio_filterbank_set_mel_coeffs(aubio_filterbank_t * fb,
+    smpl_t samplerate, smpl_t freq_min, smpl_t freq_max);
+
+/** Mel filterbank initialization
+
+  \param fb filterbank object
+  \param samplerate audio sampling rate
+  \param fmin start frequency, in Hz
+  \param fmax end frequency, in Hz
+
+  The bank of filters will be initalized to to cover linearly spaced bands in
+  the Htk mel scale, from `fmin` to `fmax`.
+
+  References
+  ----------
+
+  Douglas O'Shaughnessy (1987). *Speech communication: human and machine*.
+  Addison-Wesley. p. 150. ISBN 978-0-201-16520-3.
+
+  HTK Speech Recognition Toolkit: http://htk.eng.cam.ac.uk/
+
+*/
+uint_t aubio_filterbank_set_mel_coeffs_htk(aubio_filterbank_t * fb,
+    smpl_t samplerate, smpl_t freq_min, smpl_t freq_max);
 
 #ifdef __cplusplus
 }