shithub: aubio

Download patch

ref: 81b3910a9829de70e3756af22adcd70f96691f8d
parent: 16c12a1a7f2d7d85808587c4fbc201b0ddbe9007
parent: faeec7c5fa5496bc76342a802daec485a27103a0
author: Paul Brossier <piem@piem.org>
date: Sun Oct 1 08:50:15 EDT 2017

Merge branch 'intel_ipp_pull' of https://github.com/emuell/aubio into emuell-intel_ipp_pull

--- a/examples/utils.c
+++ b/examples/utils.c
@@ -86,6 +86,8 @@
 
 void examples_common_init (int argc, char **argv)
 {
+  /* initialize statics */
+  aubio_init();
 
   /* parse command line arguments */
   parse_args (argc, argv);
--- a/src/aubio_priv.h
+++ b/src/aubio_priv.h
@@ -93,6 +93,7 @@
 #define aubio_vDSP_minv       vDSP_minv
 #define aubio_vDSP_minvi      vDSP_minvi
 #define aubio_vDSP_dotpr      vDSP_dotpr
+#define aubio_vDSP_vclr       vDSP_vclr
 #else /* HAVE_AUBIO_DOUBLE */
 #define aubio_vDSP_mmov       vDSP_mmovD
 #define aubio_vDSP_vmul       vDSP_vmulD
@@ -104,6 +105,7 @@
 #define aubio_vDSP_minv       vDSP_minvD
 #define aubio_vDSP_minvi      vDSP_minviD
 #define aubio_vDSP_dotpr      vDSP_dotprD
+#define aubio_vDSP_vclr       vDSP_vclrD
 #endif /* HAVE_AUBIO_DOUBLE */
 #endif /* HAVE_ACCELERATE */
 
--- a/src/cvec.c
+++ b/src/cvec.c
@@ -21,6 +21,12 @@
 #include "aubio_priv.h"
 #include "cvec.h"
 
+#if defined HAVE_INTEL_IPP
+#include <ippcore.h>
+#include <ippvm.h>
+#include <ipps.h>
+#endif
+
 cvec_t * new_cvec(uint_t length) {
   cvec_t * s;
   if ((sint_t)length <= 0) {
@@ -85,31 +91,53 @@
         s->length, t->length);
     return;
   }
-#ifdef HAVE_MEMCPY_HACKS
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsCopy_64f(s->phas, t->phas, (int)s->length);
+    ippsCopy_64f(s->norm, t->norm, (int)s->length);
+  #else
+    ippsCopy_32f(s->phas, t->phas, (int)s->length);
+    ippsCopy_32f(s->norm, t->norm, (int)s->length);
+  #endif
+#elif defined(HAVE_MEMCPY_HACKS)
   memcpy(t->norm, s->norm, t->length * sizeof(smpl_t));
   memcpy(t->phas, s->phas, t->length * sizeof(smpl_t));
-#else /* HAVE_MEMCPY_HACKS */
+#else
   uint_t j;
   for (j=0; j< t->length; j++) {
     t->norm[j] = s->norm[j];
     t->phas[j] = s->phas[j];
   }
-#endif /* HAVE_MEMCPY_HACKS */
+#endif
 }
 
-void cvec_norm_set_all (cvec_t *s, smpl_t val) {
+void cvec_norm_set_all(cvec_t *s, smpl_t val) {
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsSet_64f(val, s->norm, (int)s->length);
+  #else
+    ippsSet_32f(val, s->norm, (int)s->length);
+  #endif
+#else
   uint_t j;
   for (j=0; j< s->length; j++) {
     s->norm[j] = val;
   }
+#endif
 }
 
 void cvec_norm_zeros(cvec_t *s) {
-#ifdef HAVE_MEMCPY_HACKS
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsZero_64f(s->norm, (int)s->length);
+  #else
+    ippsZero_32f(s->norm, (int)s->length);
+  #endif
+#elif defined(HAVE_MEMCPY_HACKS)
   memset(s->norm, 0, s->length * sizeof(smpl_t));
-#else /* HAVE_MEMCPY_HACKS */
+#else 
   cvec_norm_set_all (s, 0.);
-#endif /* HAVE_MEMCPY_HACKS */
+#endif
 }
 
 void cvec_norm_ones(cvec_t *s) {
@@ -117,14 +145,28 @@
 }
 
 void cvec_phas_set_all (cvec_t *s, smpl_t val) {
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsSet_64f(val, s->phas, (int)s->length);
+  #else
+    ippsSet_32f(val, s->phas, (int)s->length);
+  #endif
+#else
   uint_t j;
   for (j=0; j< s->length; j++) {
     s->phas[j] = val;
   }
+#endif
 }
 
 void cvec_phas_zeros(cvec_t *s) {
-#ifdef HAVE_MEMCPY_HACKS
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsZero_64f(s->phas, (int)s->length);
+  #else
+    ippsZero_32f(s->phas, (int)s->length);
+  #endif
+#elif defined(HAVE_MEMCPY_HACKS)
   memset(s->phas, 0, s->length * sizeof(smpl_t));
 #else
   cvec_phas_set_all (s, 0.);
@@ -141,8 +183,20 @@
 }
 
 void cvec_logmag(cvec_t *s, smpl_t lambda) {
-  uint_t j;
-  for (j=0; j< s->length; j++) {
-    s->norm[j] = LOG(lambda * s->norm[j] + 1);
-  }
+  #if defined(HAVE_INTEL_IPP)
+    #if HAVE_AUBIO_DOUBLE
+      ippsMulC_64f(s->norm, lambda, s->norm, (int)s->length);
+      ippsAddC_64f(s->norm, 1.0, s->norm, (int)s->length);
+      ippsLn_64f_A26(s->norm, s->norm, (int)s->length);
+    #else
+      ippsMulC_32f(s->norm, lambda, s->norm, (int)s->length);
+      ippsAddC_32f(s->norm, 1.0, s->norm, (int)s->length);
+      ippsLn_32f_A21(s->norm, s->norm, (int)s->length);
+    #endif
+  #else
+    uint_t j;
+    for (j=0; j< s->length; j++) {
+      s->norm[j] = LOG(lambda * s->norm[j] + 1);
+    }
+  #endif
 }
--- a/src/fvec.c
+++ b/src/fvec.c
@@ -21,6 +21,12 @@
 #include "aubio_priv.h"
 #include "fvec.h"
 
+#if defined HAVE_INTEL_IPP
+#include <ippcore.h>
+#include <ippvm.h>
+#include <ipps.h>
+#endif
+
 fvec_t * new_fvec(uint_t length) {
   fvec_t * s;
   if ((sint_t)length <= 0) {
@@ -60,28 +66,39 @@
 }
 
 void fvec_set_all (fvec_t *s, smpl_t val) {
-#if !defined(HAVE_ACCELERATE) && !defined(HAVE_ATLAS)
-  uint_t j;
-  for (j=0; j< s->length; j++) {
-    s->data[j] = val;
-  }
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsSet_64f(val, s->data, (int)s->length);
+  #else
+    ippsSet_32f(val, s->data, (int)s->length);
+  #endif
 #elif defined(HAVE_ATLAS)
   aubio_catlas_set(s->length, val, s->data, 1);
 #elif defined(HAVE_ACCELERATE)
   aubio_vDSP_vfill(&val, s->data, 1, s->length);
+#else
+  uint_t j;
+  for ( j = 0; j< s->length; j++ )
+  {
+    s->data[j] = val;
+  }
 #endif
 }
 
 void fvec_zeros(fvec_t *s) {
-#if !defined(HAVE_MEMCPY_HACKS) && !defined(HAVE_ACCELERATE)
-  fvec_set_all (s, 0.);
-#else
-#if defined(HAVE_MEMCPY_HACKS)
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsZero_64f(s->data, (int)s->length);
+  #else
+    ippsZero_32f(s->data, (int)s->length);
+  #endif
+#elif defined(HAVE_ACCELERATE)
+  aubio_vDSP_vclr(s->data, 1, s->length);
+#elif defined(HAVE_MEMCPY_HACKS)
   memset(s->data, 0, s->length * sizeof(smpl_t));
 #else
-  aubio_vDSP_vclr(s->data, 1, s->length);
+  fvec_set_all(s, 0.);
 #endif
-#endif
 }
 
 void fvec_ones(fvec_t *s) {
@@ -96,27 +113,39 @@
 }
 
 void fvec_weight(fvec_t *s, const fvec_t *weight) {
-#ifndef HAVE_ACCELERATE
-  uint_t j;
   uint_t length = MIN(s->length, weight->length);
-  for (j=0; j< length; j++) {
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsMul_64f(s->data, weight->data, s->data, (int)length);
+  #else
+    ippsMul_32f(s->data, weight->data, s->data, (int)length);
+  #endif
+#elif defined(HAVE_ACCELERATE) 
+  aubio_vDSP_vmul( s->data, 1, weight->data, 1, s->data, 1, length );
+#else
+  uint_t j;
+  for (j = 0; j < length; j++) {
     s->data[j] *= weight->data[j];
   }
-#else
-  aubio_vDSP_vmul(s->data, 1, weight->data, 1, s->data, 1, s->length);
 #endif /* HAVE_ACCELERATE */
 }
 
 void fvec_weighted_copy(const fvec_t *in, const fvec_t *weight, fvec_t *out) {
-#ifndef HAVE_ACCELERATE
+  uint_t length = MIN(in->length, MIN(out->length, weight->length));
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsMul_64f(in->data, weight->data, out->data, (int)length);
+  #else
+    ippsMul_32f(in->data, weight->data, out->data, (int)length);
+  #endif
+#elif defined(HAVE_ACCELERATE) 
+  aubio_vDSP_vmul(in->data, 1, weight->data, 1, out->data, 1, length);
+#else
   uint_t j;
-  uint_t length = MIN(out->length, weight->length);
-  for (j=0; j< length; j++) {
+  for (j = 0; j < length; j++) {
     out->data[j] = in->data[j] * weight->data[j];
   }
-#else
-  aubio_vDSP_vmul(in->data, 1, weight->data, 1, out->data, 1, out->length);
-#endif /* HAVE_ACCELERATE */
+#endif
 }
 
 void fvec_copy(const fvec_t *s, fvec_t *t) {
@@ -125,16 +154,22 @@
         s->length, t->length);
     return;
   }
-#ifdef HAVE_NOOPT
-  uint_t j;
-  for (j=0; j< t->length; j++) {
-    t->data[j] = s->data[j];
-  }
-#elif defined(HAVE_MEMCPY_HACKS)
-  memcpy(t->data, s->data, t->length * sizeof(smpl_t));
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsCopy_64f(s->data, t->data, (int)s->length);
+  #else
+    ippsCopy_32f(s->data, t->data, (int)s->length);
+  #endif
 #elif defined(HAVE_ATLAS)
   aubio_cblas_copy(s->length, s->data, 1, t->data, 1);
 #elif defined(HAVE_ACCELERATE)
   aubio_vDSP_mmov(s->data, t->data, 1, s->length, 1, 1);
+#elif defined(HAVE_MEMCPY_HACKS)
+  memcpy(t->data, s->data, t->length * sizeof(smpl_t));
+#else
+  uint_t j;
+  for (j = 0; j < t->length; j++) {
+    t->data[j] = s->data[j];
+  }
 #endif
 }
--- a/src/mathutils.c
+++ b/src/mathutils.c
@@ -25,6 +25,12 @@
 #include "mathutils.h"
 #include "musicutils.h"
 
+#if defined HAVE_INTEL_IPP
+#include <ippcore.h>
+#include <ippvm.h>
+#include <ipps.h>
+#endif
+
 /** Window types */
 typedef enum
 {
@@ -159,16 +165,23 @@
 fvec_mean (fvec_t * s)
 {
   smpl_t tmp = 0.0;
-#ifndef HAVE_ACCELERATE
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsMean_64f(s->data, (int)s->length, &tmp);
+  #else
+    ippsMean_32f(s->data, (int)s->length, &tmp, ippAlgHintFast);
+  #endif
+    return tmp;
+#elif defined(HAVE_ACCELERATE)
+  aubio_vDSP_meanv(s->data, 1, &tmp, s->length);
+  return tmp;
+#else
   uint_t j;
   for (j = 0; j < s->length; j++) {
     tmp += s->data[j];
   }
-  return tmp / (smpl_t) (s->length);
-#else
-  aubio_vDSP_meanv(s->data, 1, &tmp, s->length);
-  return tmp;
-#endif /* HAVE_ACCELERATE */
+  return tmp / (smpl_t)(s->length);
+#endif
 }
 
 smpl_t
@@ -175,14 +188,20 @@
 fvec_sum (fvec_t * s)
 {
   smpl_t tmp = 0.0;
-#ifndef HAVE_ACCELERATE
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsSum_64f(s->data, (int)s->length, &tmp);
+  #else
+    ippsSum_32f(s->data, (int)s->length, &tmp, ippAlgHintFast);
+  #endif
+#elif defined(HAVE_ACCELERATE)
+  aubio_vDSP_sve(s->data, 1, &tmp, s->length);
+#else
   uint_t j;
   for (j = 0; j < s->length; j++) {
     tmp += s->data[j];
   }
-#else
-  aubio_vDSP_sve(s->data, 1, &tmp, s->length);
-#endif /* HAVE_ACCELERATE */
+#endif
   return tmp;
 }
 
@@ -189,15 +208,22 @@
 smpl_t
 fvec_max (fvec_t * s)
 {
-#ifndef HAVE_ACCELERATE
+#if defined(HAVE_INTEL_IPP)
+  smpl_t tmp = 0.;
+  #if HAVE_AUBIO_DOUBLE
+    ippsMax_64f( s->data, (int)s->length, &tmp);
+  #else
+    ippsMax_32f( s->data, (int)s->length, &tmp);
+#endif
+#elif defined(HAVE_ACCELERATE)
+  smpl_t tmp = 0.;
+  aubio_vDSP_maxv( s->data, 1, &tmp, s->length );
+#else
   uint_t j;
-  smpl_t tmp = 0.0;
-  for (j = 0; j < s->length; j++) {
+  smpl_t tmp = s->data[0];
+  for (j = 1; j < s->length; j++) {
     tmp = (tmp > s->data[j]) ? tmp : s->data[j];
   }
-#else
-  smpl_t tmp = 0.;
-  aubio_vDSP_maxv(s->data, 1, &tmp, s->length);
 #endif
   return tmp;
 }
@@ -205,15 +231,22 @@
 smpl_t
 fvec_min (fvec_t * s)
 {
-#ifndef HAVE_ACCELERATE
+#if defined(HAVE_INTEL_IPP)
+  smpl_t tmp = 0.;
+  #if HAVE_AUBIO_DOUBLE
+    ippsMin_64f(s->data, (int)s->length, &tmp);
+  #else
+    ippsMin_32f(s->data, (int)s->length, &tmp);
+#endif
+#elif defined(HAVE_ACCELERATE)
+  smpl_t tmp = 0.;
+  aubio_vDSP_minv(s->data, 1, &tmp, s->length);
+#else
   uint_t j;
   smpl_t tmp = s->data[0];
-  for (j = 0; j < s->length; j++) {
+  for (j = 1; j < s->length; j++) {
     tmp = (tmp < s->data[j]) ? tmp : s->data[j];
   }
-#else
-  smpl_t tmp = 0.;
-  aubio_vDSP_minv(s->data, 1, &tmp, s->length);
 #endif
   return tmp;
 }
@@ -574,6 +607,17 @@
   return i;
 }
 
+uint_t
+aubio_power_of_two_order (uint_t a)
+{
+  int order = 0; 
+  int temp = aubio_next_power_of_two(a);
+  while (temp >>= 1) {
+    ++order;
+  }
+  return order;
+}
+
 smpl_t
 aubio_db_spl (const fvec_t * o)
 {
@@ -635,6 +679,18 @@
     }
     acf[i] = tmp / (smpl_t) (length - i);
   }
+}
+
+void
+aubio_init (void)
+{
+/* initialize intel IPP */
+#ifdef HAVE_INTEL_IPP
+  IppStatus status = ippInit();
+  if (status != ippStsNoErr) {
+    fprintf (stderr, "Error: failed to initialize Intel IPP - status %d\n", status);
+  }
+#endif
 }
 
 void
--- a/src/mathutils.h
+++ b/src/mathutils.h
@@ -312,6 +312,9 @@
 /** return the next power of power of 2 greater than a */
 uint_t aubio_next_power_of_two(uint_t a);
 
+/** return the log2 factor of the given power of 2 value a */
+uint_t aubio_power_of_two_order(uint_t a);
+
 /** compute normalised autocorrelation function
 
   \param input vector to compute autocorrelation from
--- a/src/musicutils.h
+++ b/src/musicutils.h
@@ -92,6 +92,13 @@
 /** convert midi value (0-128) to frequency (Hz) */
 smpl_t aubio_miditofreq (smpl_t midi);
 
+/** initialize global status at beginning of program
+
+  This function should be used before doing anything else in aubio. 
+  So far it is only used to initialize the Intel IPP library, when it's used.
+*/
+void aubio_init (void);
+
 /** clean up cached memory at the end of program
 
   This function should be used at the end of programs to purge all cached
--- a/src/spectral/fft.c
+++ b/src/spectral/fft.c
@@ -77,8 +77,7 @@
 // a global mutex for FFTW thread safety
 pthread_mutex_t aubio_fftw_mutex = PTHREAD_MUTEX_INITIALIZER;
 
-#else
-#ifdef HAVE_ACCELERATE        // using ACCELERATE
+#elif defined HAVE_ACCELERATE        // using ACCELERATE
 // https://developer.apple.com/library/mac/#documentation/Accelerate/Reference/vDSPRef/Reference/reference.html
 #include <Accelerate/Accelerate.h>
 
@@ -112,32 +111,53 @@
 #define aubio_vvsqrt                   vvsqrt
 #endif /* HAVE_AUBIO_DOUBLE */
 
-#else                         // using OOURA
+#elif defined HAVE_INTEL_IPP // using INTEL IPP
+
+#include <ippcore.h>
+#include <ippvm.h>
+#include <ipps.h>
+
+#else // using OOURA
 // let's use ooura instead
 extern void aubio_ooura_rdft(int, int, smpl_t *, int *, smpl_t *);
 
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
+#endif
 
 struct _aubio_fft_t {
   uint_t winsize;
   uint_t fft_size;
+
 #ifdef HAVE_FFTW3             // using FFTW3
   real_t *in, *out;
   fftw_plan pfw, pbw;
-  fft_data_t * specdata;      /* complex spectral data */
-#else
-#ifdef HAVE_ACCELERATE        // using ACCELERATE
+  fft_data_t * specdata; /* complex spectral data */
+
+#elif defined HAVE_ACCELERATE  // using ACCELERATE
   int log2fftsize;
   aubio_FFTSetup fftSetup;
   aubio_DSPSplitComplex spec;
   smpl_t *in, *out;
+  
+#elif defined HAVE_INTEL_IPP  // using Intel IPP
+  // mark FFT impl as Intel IPP
+  #define INTEL_IPP_FFT 1
+  smpl_t *in, *out;
+  Ipp8u* memSpec;
+  Ipp8u* memInit;
+  Ipp8u* memBuffer;
+  #if HAVE_AUBIO_DOUBLE
+    struct FFTSpec_R_64f* fftSpec;
+    Ipp64fc* complexOut;
+  #else
+    struct FFTSpec_R_32f* fftSpec;
+    Ipp32fc* complexOut;
+  #endif
 #else                         // using OOURA
   smpl_t *in, *out;
   smpl_t *w;
   int *ip;
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
+#endif /* using OOURA */
+
   fvec_t * compspec;
 };
 
@@ -147,6 +167,7 @@
     AUBIO_ERR("fft: got winsize %d, but can not be < 2\n", winsize);
     goto beach;
   }
+
 #ifdef HAVE_FFTW3
   uint_t i;
   s->winsize  = winsize;
@@ -175,17 +196,66 @@
   for (i = 0; i < s->fft_size; i++) {
     s->specdata[i] = 0.;
   }
-#else
-#ifdef HAVE_ACCELERATE        // using ACCELERATE
+
+#elif defined HAVE_ACCELERATE  // using ACCELERATE
   s->winsize = winsize;
   s->fft_size = winsize;
   s->compspec = new_fvec(winsize);
-  s->log2fftsize = (uint_t)log2f(s->fft_size);
+  s->log2fftsize = aubio_power_of_two_order(s->fft_size);
   s->in = AUBIO_ARRAY(smpl_t, s->fft_size);
   s->out = AUBIO_ARRAY(smpl_t, s->fft_size);
   s->spec.realp = AUBIO_ARRAY(smpl_t, s->fft_size/2);
   s->spec.imagp = AUBIO_ARRAY(smpl_t, s->fft_size/2);
   s->fftSetup = aubio_vDSP_create_fftsetup(s->log2fftsize, FFT_RADIX2);
+
+#elif defined HAVE_INTEL_IPP  // using Intel IPP
+  const IppHintAlgorithm qualityHint = ippAlgHintAccurate; // OR ippAlgHintFast;
+  const int flags = IPP_FFT_NODIV_BY_ANY; // we're scaling manually afterwards 
+  int order = aubio_power_of_two_order(winsize);
+  int sizeSpec, sizeInit, sizeBuffer;
+  IppStatus status;
+
+  if (winsize <= 4 || aubio_is_power_of_two(winsize) != 1)
+  {
+    AUBIO_ERR("intel IPP fft: can only create with sizes > 4 and power of two, requested %d,"
+      " try recompiling aubio with --enable-fftw3\n", winsize);
+    goto beach;
+  }
+
+#if HAVE_AUBIO_DOUBLE
+  status = ippsFFTGetSize_R_64f(order, flags, qualityHint,
+      &sizeSpec, &sizeInit, &sizeBuffer);
+#else
+  status = ippsFFTGetSize_R_32f(order, flags, qualityHint,
+    &sizeSpec, &sizeInit, &sizeBuffer);
+#endif
+  if (status != ippStsNoErr) {
+    AUBIO_ERR("fft: failed to initialize fft. IPP error: %d\n", status);
+    goto beach;
+  }
+  s->fft_size = s->winsize = winsize;
+  s->compspec = new_fvec(winsize);
+  s->in = AUBIO_ARRAY(smpl_t, s->winsize);
+  s->out = AUBIO_ARRAY(smpl_t, s->winsize);
+  s->memSpec = ippsMalloc_8u(sizeSpec);
+  s->memBuffer = ippsMalloc_8u(sizeBuffer);
+  if (sizeInit > 0 ) {
+    s->memInit = ippsMalloc_8u(sizeInit);
+  }
+#if HAVE_AUBIO_DOUBLE
+  s->complexOut = ippsMalloc_64fc(s->fft_size / 2 + 1);
+  status = ippsFFTInit_R_64f(
+    &s->fftSpec, order, flags, qualityHint, s->memSpec, s->memInit);
+#else
+  s->complexOut = ippsMalloc_32fc(s->fft_size / 2 + 1);
+  status = ippsFFTInit_R_32f(
+    &s->fftSpec, order, flags, qualityHint, s->memSpec, s->memInit);
+#endif
+  if (status != ippStsNoErr) {
+    AUBIO_ERR("fft: failed to initialize. IPP error: %d\n", status);
+    goto beach;
+  }
+
 #else                         // using OOURA
   if (aubio_is_power_of_two(winsize) != 1) {
     AUBIO_ERR("fft: can only create with sizes power of two, requested %d,"
@@ -200,9 +270,10 @@
   s->ip    = AUBIO_ARRAY(int   , s->fft_size);
   s->w     = AUBIO_ARRAY(smpl_t, s->fft_size);
   s->ip[0] = 0;
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
+#endif /* using OOURA */
+
   return s;
+
 beach:
   AUBIO_FREE(s);
   return NULL;
@@ -210,7 +281,6 @@
 
 void del_aubio_fft(aubio_fft_t * s) {
   /* destroy data */
-  del_fvec(s->compspec);
 #ifdef HAVE_FFTW3             // using FFTW3
   pthread_mutex_lock(&aubio_fftw_mutex);
   fftw_destroy_plan(s->pfw);
@@ -217,28 +287,36 @@
   fftw_destroy_plan(s->pbw);
   fftw_free(s->specdata);
   pthread_mutex_unlock(&aubio_fftw_mutex);
-#else /* HAVE_FFTW3 */
-#ifdef HAVE_ACCELERATE        // using ACCELERATE
+
+#elif defined HAVE_ACCELERATE // using ACCELERATE
   AUBIO_FREE(s->spec.realp);
   AUBIO_FREE(s->spec.imagp);
   aubio_vDSP_destroy_fftsetup(s->fftSetup);
+
+#elif defined HAVE_INTEL_IPP  // using Intel IPP
+  ippFree(s->memSpec);
+  ippFree(s->memInit);
+  ippFree(s->memBuffer);
+  ippFree(s->complexOut);
+
 #else                         // using OOURA
   AUBIO_FREE(s->w);
   AUBIO_FREE(s->ip);
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
-  AUBIO_FREE(s->out);
+#endif
+
+  del_fvec(s->compspec);
   AUBIO_FREE(s->in);
+  AUBIO_FREE(s->out);
   AUBIO_FREE(s);
 }
 
 void aubio_fft_do(aubio_fft_t * s, const fvec_t * input, cvec_t * spectrum) {
   aubio_fft_do_complex(s, input, s->compspec);
-  aubio_fft_get_spectrum(s->compspec, spectrum);
+  aubio_fft_get_spectrum(s, s->compspec, spectrum);
 }
 
 void aubio_fft_rdo(aubio_fft_t * s, const cvec_t * spectrum, fvec_t * output) {
-  aubio_fft_get_realimag(spectrum, s->compspec);
+  aubio_fft_get_realimag(s, spectrum, s->compspec);
   aubio_fft_rdo_complex(s, s->compspec, output);
 }
 
@@ -251,6 +329,7 @@
 #else
   memcpy(s->in, input->data, s->winsize * sizeof(smpl_t));
 #endif /* HAVE_MEMCPY_HACKS */
+
 #ifdef HAVE_FFTW3             // using FFTW3
   fftw_execute(s->pfw);
 #ifdef HAVE_COMPLEX_H
@@ -265,8 +344,8 @@
     compspec->data[i] = s->specdata[i];
   }
 #endif /* HAVE_COMPLEX_H */
-#else /* HAVE_FFTW3 */
-#ifdef HAVE_ACCELERATE        // using ACCELERATE
+
+#elif defined HAVE_ACCELERATE // using ACCELERATE
   // convert real data to even/odd format used in vDSP
   aubio_vDSP_ctoz((aubio_DSPComplex*)s->in, 2, &s->spec, 1, s->fft_size/2);
   // compute the FFT
@@ -281,6 +360,29 @@
   // apply scaling
   smpl_t scale = 1./2.;
   aubio_vDSP_vsmul(compspec->data, 1, &scale, compspec->data, 1, s->fft_size);
+
+#elif defined HAVE_INTEL_IPP  // using Intel IPP
+
+  // apply fft
+#if HAVE_AUBIO_DOUBLE
+  ippsFFTFwd_RToCCS_64f(s->in, (Ipp64f*)s->complexOut, s->fftSpec, s->memBuffer);
+#else
+  ippsFFTFwd_RToCCS_32f(s->in, (Ipp32f*)s->complexOut, s->fftSpec, s->memBuffer);
+#endif
+  // convert complex buffer to [ r0, r1, ..., rN, iN-1, .., i2, i1]
+  compspec->data[0] = s->complexOut[0].re;
+  compspec->data[s->fft_size / 2] = s->complexOut[s->fft_size / 2].re;
+  for (i = 1; i < s->fft_size / 2; i++) {
+    compspec->data[i] = s->complexOut[i].re;
+    compspec->data[s->fft_size - i] = s->complexOut[i].im;
+  }
+  // apply scaling
+#if HAVE_AUBIO_DOUBLE
+  ippsMulC_64f(compspec->data, 1.0 / 2.0, compspec->data, s->fft_size);
+#else
+  ippsMulC_32f(compspec->data, 1.0 / 2.0, compspec->data, s->fft_size);
+#endif
+
 #else                         // using OOURA
   aubio_ooura_rdft(s->winsize, 1, s->in, s->ip, s->w);
   compspec->data[0] = s->in[0];
@@ -289,8 +391,7 @@
     compspec->data[i] = s->in[2 * i];
     compspec->data[s->winsize - i] = - s->in[2 * i + 1];
   }
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
+#endif /* using OOURA */
 }
 
 void aubio_fft_rdo_complex(aubio_fft_t * s, const fvec_t * compspec, fvec_t * output) {
@@ -313,8 +414,8 @@
   for (i = 0; i < output->length; i++) {
     output->data[i] = s->out[i]*renorm;
   }
-#else /* HAVE_FFTW3 */
-#ifdef HAVE_ACCELERATE        // using ACCELERATE
+
+#elif defined HAVE_ACCELERATE // using ACCELERATE
   // convert from real imag  [ r0, r1, ..., rN, iN-1, .., i2, i1]
   // to vDSP packed format   [ r0, rN, r1, i1, ..., rN-1, iN-1 ]
   s->out[0] = compspec->data[0];
@@ -332,6 +433,30 @@
   // apply scaling
   smpl_t scale = 1.0 / s->winsize;
   aubio_vDSP_vsmul(output->data, 1, &scale, output->data, 1, s->fft_size);
+
+#elif defined HAVE_INTEL_IPP  // using Intel IPP
+
+  // convert from real imag  [ r0, 0, ..., rN, iN-1, .., i2, i1, iN-1] to complex format
+  s->complexOut[0].re = compspec->data[0];
+  s->complexOut[0].im = 0;
+  s->complexOut[s->fft_size / 2].re = compspec->data[s->fft_size / 2];
+  s->complexOut[s->fft_size / 2].im = 0.0;
+  for (i = 1; i < s->fft_size / 2; i++) {
+    s->complexOut[i].re = compspec->data[i];
+    s->complexOut[i].im = compspec->data[s->fft_size - i];
+  }
+#if HAVE_AUBIO_DOUBLE
+  // apply fft
+  ippsFFTInv_CCSToR_64f((const Ipp64f *)s->complexOut, output->data, s->fftSpec, s->memBuffer);
+  // apply scaling
+  ippsMulC_64f(output->data, 2.0 / s->winsize, output->data, s->fft_size);
+#else
+  // apply fft
+  ippsFFTInv_CCSToR_32f((const Ipp32f *)s->complexOut, output->data, s->fftSpec, s->memBuffer);
+  // apply scaling
+  ippsMulC_32f(output->data, 2.0f / s->winsize, output->data, s->fft_size);
+#endif /* HAVE_AUBIO_DOUBLE */
+
 #else                         // using OOURA
   smpl_t scale = 2.0 / s->winsize;
   s->out[0] = compspec->data[0];
@@ -344,22 +469,45 @@
   for (i=0; i < s->winsize; i++) {
     output->data[i] = s->out[i] * scale;
   }
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
+#endif
 }
 
-void aubio_fft_get_spectrum(const fvec_t * compspec, cvec_t * spectrum) {
-  aubio_fft_get_phas(compspec, spectrum);
-  aubio_fft_get_norm(compspec, spectrum);
+void aubio_fft_get_spectrum(aubio_fft_t *s, const fvec_t * compspec, cvec_t * spectrum) {
+  aubio_fft_get_phas(s, compspec, spectrum);
+  aubio_fft_get_norm(s, compspec, spectrum);
 }
 
-void aubio_fft_get_realimag(const cvec_t * spectrum, fvec_t * compspec) {
-  aubio_fft_get_imag(spectrum, compspec);
-  aubio_fft_get_real(spectrum, compspec);
+void aubio_fft_get_realimag(aubio_fft_t *s, const cvec_t * spectrum, fvec_t * compspec) {
+  aubio_fft_get_imag(s, spectrum, compspec);
+  aubio_fft_get_real(s, spectrum, compspec);
 }
 
-void aubio_fft_get_phas(const fvec_t * compspec, cvec_t * spectrum) {
+void aubio_fft_get_phas(aubio_fft_t *s, const fvec_t * compspec, cvec_t * spectrum) {
+
+#ifdef INTEL_IPP_FFT // using Intel IPP FFT
   uint_t i;
+  
+  // convert from real imag  [ r0, 0, ..., rN, iN-1, .., i2, i1, iN-1] to complex format
+  s->complexOut[0].re = compspec->data[0];
+  s->complexOut[0].im = 0;
+  s->complexOut[s->fft_size / 2].re = compspec->data[s->fft_size / 2];
+  s->complexOut[s->fft_size / 2].im = 0.0;
+  for (i = 1; i < spectrum->length - 1; i++) {
+    s->complexOut[i].re = compspec->data[i];
+    s->complexOut[i].im = compspec->data[compspec->length - i];
+  }
+  
+#if HAVE_AUBIO_DOUBLE
+  IppStatus status = ippsPhase_64fc(s->complexOut, spectrum->phas, spectrum->length);
+#else
+  IppStatus status = ippsPhase_32fc(s->complexOut, spectrum->phas, spectrum->length);
+#endif
+  if (status != ippStsNoErr) {
+    AUBIO_ERR("fft: failed to extract phase from fft. IPP error: %d\n", status);
+  }
+
+#else                 // NOT using Intel IPP
+  uint_t i;
   if (compspec->data[0] < 0) {
     spectrum->phas[0] = PI;
   } else {
@@ -374,9 +522,10 @@
   } else {
     spectrum->phas[spectrum->length - 1] = 0.;
   }
+#endif
 }
 
-void aubio_fft_get_norm(const fvec_t * compspec, cvec_t * spectrum) {
+void aubio_fft_get_norm(aubio_fft_t *s, const fvec_t * compspec, cvec_t * spectrum) {
   uint_t i = 0;
   spectrum->norm[0] = ABS(compspec->data[0]);
   for (i=1; i < spectrum->length - 1; i++) {
@@ -387,7 +536,7 @@
     ABS(compspec->data[compspec->length/2]);
 }
 
-void aubio_fft_get_imag(const cvec_t * spectrum, fvec_t * compspec) {
+void aubio_fft_get_imag(aubio_fft_t *s, const cvec_t * spectrum, fvec_t * compspec) {
   uint_t i;
   for (i = 1; i < ( compspec->length + 1 ) / 2 /*- 1 + 1*/; i++) {
     compspec->data[compspec->length - i] =
@@ -395,7 +544,7 @@
   }
 }
 
-void aubio_fft_get_real(const cvec_t * spectrum, fvec_t * compspec) {
+void aubio_fft_get_real(aubio_fft_t *s, const cvec_t * spectrum, fvec_t * compspec) {
   uint_t i;
   for (i = 0; i < compspec->length / 2 + 1; i++) {
     compspec->data[i] =
--- a/src/spectral/fft.h
+++ b/src/spectral/fft.h
@@ -98,7 +98,7 @@
   \param spectrum cvec norm/phas output array
 
 */
-void aubio_fft_get_spectrum(const fvec_t * compspec, cvec_t * spectrum);
+void aubio_fft_get_spectrum(aubio_fft_t *s, const fvec_t * compspec, cvec_t * spectrum);
 /** convert real/imag spectrum to norm/phas spectrum
 
   \param compspec real/imag input fft array
@@ -105,7 +105,7 @@
   \param spectrum cvec norm/phas output array
 
 */
-void aubio_fft_get_realimag(const cvec_t * spectrum, fvec_t * compspec);
+void aubio_fft_get_realimag(aubio_fft_t *s, const cvec_t * spectrum, fvec_t * compspec);
 
 /** compute phas spectrum from real/imag parts
 
@@ -113,7 +113,7 @@
   \param spectrum cvec norm/phas output array
 
 */
-void aubio_fft_get_phas(const fvec_t * compspec, cvec_t * spectrum);
+void aubio_fft_get_phas(aubio_fft_t *s, const fvec_t * compspec, cvec_t * spectrum);
 /** compute imaginary part from the norm/phas cvec
 
   \param spectrum norm/phas input array
@@ -120,7 +120,7 @@
   \param compspec real/imag output fft array
 
 */
-void aubio_fft_get_imag(const cvec_t * spectrum, fvec_t * compspec);
+void aubio_fft_get_imag(aubio_fft_t *s, const cvec_t * spectrum, fvec_t * compspec);
 
 /** compute norm component from real/imag parts
 
@@ -128,7 +128,7 @@
   \param spectrum cvec norm/phas output array
 
 */
-void aubio_fft_get_norm(const fvec_t * compspec, cvec_t * spectrum);
+void aubio_fft_get_norm(aubio_fft_t *s, const fvec_t * compspec, cvec_t * spectrum);
 /** compute real part from norm/phas components
 
   \param spectrum norm/phas input array
@@ -135,7 +135,7 @@
   \param compspec real/imag output fft array
 
 */
-void aubio_fft_get_real(const cvec_t * spectrum, fvec_t * compspec);
+void aubio_fft_get_real(aubio_fft_t *s, const cvec_t * spectrum, fvec_t * compspec);
 
 #ifdef __cplusplus
 }
--- a/tests/src/io/test-sink-multi.c
+++ b/tests/src/io/test-sink-multi.c
@@ -6,6 +6,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+
   sint_t err = 0;
 
   if (argc < 3) {
@@ -68,5 +70,8 @@
 beach_fmat:
   del_aubio_source(i);
 beach_source:
+
+  aubio_cleanup();
+  
   return err;
 }
--- a/tests/src/io/test-sink.c
+++ b/tests/src/io/test-sink.c
@@ -3,6 +3,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+
   sint_t err = 0;
 
   if (argc < 3) {
@@ -54,5 +56,8 @@
 beach_source:
   del_fvec(vec);
 beach_fvec:
+
+  aubio_cleanup();
+  
   return err;
 }
--- a/tests/src/io/test-sink_apple_audio-multi.c
+++ b/tests/src/io/test-sink_apple_audio-multi.c
@@ -7,6 +7,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+
   sint_t err = 0;
 
   if (argc < 3) {
@@ -74,5 +76,8 @@
   err = 3;
   PRINT_ERR("aubio was not compiled with aubio_sink_apple_audio\n");
 #endif /* HAVE_SINK_APPLE_AUDIO */
+
+  aubio_cleanup();
+  
   return err;
 }
--- a/tests/src/io/test-sink_apple_audio.c
+++ b/tests/src/io/test-sink_apple_audio.c
@@ -7,6 +7,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+
   sint_t err = 0;
 
   if (argc < 3) {
@@ -63,5 +65,8 @@
   err = 3;
   PRINT_ERR("aubio was not compiled with aubio_source_apple_audio\n");
 #endif /* HAVE_SINK_APPLE_AUDIO */
+
+  aubio_cleanup();
+  
   return err;
 }
--- a/tests/src/io/test-sink_sndfile-multi.c
+++ b/tests/src/io/test-sink_sndfile-multi.c
@@ -7,6 +7,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+
   sint_t err = 0;
 
   if (argc < 3) {
@@ -74,5 +76,8 @@
   err = 3;
   PRINT_ERR("aubio was not compiled with aubio_sink_sndfile\n");
 #endif /* HAVE_SNDFILE */
+
+  aubio_cleanup();
+  
   return err;
 }
--- a/tests/src/io/test-sink_sndfile.c
+++ b/tests/src/io/test-sink_sndfile.c
@@ -7,6 +7,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+
   sint_t err = 0;
 
   if (argc < 3) {
@@ -63,5 +65,8 @@
   err = 3;
   PRINT_ERR("aubio was not compiled with aubio_source_sndfile\n");
 #endif /* HAVE_SNDFILE */
+
+  aubio_cleanup();
+  
   return err;
 }
--- a/tests/src/io/test-sink_wavwrite-multi.c
+++ b/tests/src/io/test-sink_wavwrite-multi.c
@@ -7,6 +7,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+
   sint_t err = 0;
 
   if (argc < 3) {
@@ -74,5 +76,8 @@
   err = 3;
   PRINT_ERR("aubio was not compiled with aubio_sink_wavwrite\n");
 #endif /* HAVE_WAVWRITE */
+
+  aubio_cleanup();
+  
   return err;
 }
--- a/tests/src/io/test-sink_wavwrite.c
+++ b/tests/src/io/test-sink_wavwrite.c
@@ -7,6 +7,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+
   sint_t err = 0;
 
   if (argc < 3) {
@@ -63,5 +65,8 @@
   err = 3;
   PRINT_ERR("aubio was not compiled with aubio_sink_wavwrite\n");
 #endif /* HAVE_WAVWRITE */
+
+  aubio_cleanup();
+  
   return err;
 }
--- a/tests/src/io/test-source.c
+++ b/tests/src/io/test-source.c
@@ -3,6 +3,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+
   uint_t err = 0;
   if (argc < 2) {
     err = 2;
@@ -55,5 +57,6 @@
   del_fvec (vec);
   del_aubio_source (s);
 beach:
+  aubio_cleanup();
   return err;
 }
--- a/tests/src/io/test-source_apple_audio.c
+++ b/tests/src/io/test-source_apple_audio.c
@@ -7,6 +7,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+
   uint_t err = 0;
   if (argc < 2) {
     err = 2;
@@ -59,5 +61,8 @@
   err = 3;
   PRINT_ERR("aubio was not compiled with aubio_source_apple_audio\n");
 #endif /* HAVE_SOURCE_APPLE_AUDIO */
+
+  aubio_cleanup();
+  
   return err;
 }
--- a/tests/src/io/test-source_avcodec.c
+++ b/tests/src/io/test-source_avcodec.c
@@ -7,6 +7,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+
   uint_t err = 0;
   if (argc < 2) {
     err = 2;
@@ -59,5 +61,8 @@
   err = 3;
   PRINT_ERR("aubio was not compiled with aubio_source_avcodec\n");
 #endif /* HAVE_LIBAV */
+
+  aubio_cleanup();
+  
   return err;
 }
--- a/tests/src/io/test-source_multi.c
+++ b/tests/src/io/test-source_multi.c
@@ -3,6 +3,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+
   sint_t err = 0;
   if (argc < 2) {
     err = -2;
@@ -53,5 +55,7 @@
   del_aubio_source (s);
 beach:
 
+  aubio_cleanup();
+  
   return err;
 }
--- a/tests/src/io/test-source_seek.c
+++ b/tests/src/io/test-source_seek.c
@@ -3,6 +3,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+
   uint_t err = 0;
   if (argc < 2) {
     err = 2;
@@ -88,5 +90,8 @@
   assert ( old_n_frames_2 == old_n_frames_1 );
   // check that we got about half the frames, with 3 decimals
   assert ( roundf(1.e3 * old_n_frames_1 / old_n_frames_3) / 1.e3 == 2.);
+
+  aubio_cleanup();
+  
   return err;
 }
--- a/tests/src/io/test-source_sndfile.c
+++ b/tests/src/io/test-source_sndfile.c
@@ -7,6 +7,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+
   uint_t err = 0;
   if (argc < 2) {
     err = 2;
@@ -59,5 +61,8 @@
   err = 3;
   PRINT_ERR("aubio was not compiled with aubio_source_sndfile\n");
 #endif /* HAVE_SNDFILE */
+
+  aubio_cleanup();
+  
   return err;
 }
--- a/tests/src/io/test-source_wavread.c
+++ b/tests/src/io/test-source_wavread.c
@@ -7,6 +7,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+
   uint_t err = 0;
   if (argc < 2) {
     err = 2;
@@ -60,5 +62,8 @@
   err = 3;
   PRINT_ERR("aubio was not compiled with aubio_source_wavread\n");
 #endif /* HAVE_WAVREAD */
+
+  aubio_cleanup();
+  
   return err;
 }
--- a/tests/src/onset/test-onset.c
+++ b/tests/src/onset/test-onset.c
@@ -3,6 +3,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+  
   uint_t err = 0;
   if (argc < 2) {
     err = 2;
--- a/tests/src/pitch/test-pitch.c
+++ b/tests/src/pitch/test-pitch.c
@@ -2,6 +2,8 @@
 
 int main (void)
 {
+  aubio_init();
+  
   // 1. allocate some memory
   uint_t n = 0; // frame counter
   uint_t win_s = 1024; // window size
--- a/tests/src/pitch/test-pitchfcomb.c
+++ b/tests/src/pitch/test-pitchfcomb.c
@@ -7,6 +7,7 @@
 
 int main (void)
 {
+  aubio_init();
   uint_t i = 0;
   uint_t win_s = 1024; // window size
   uint_t hop_s = win_s/4; // hop size
--- a/tests/src/pitch/test-pitchmcomb.c
+++ b/tests/src/pitch/test-pitchmcomb.c
@@ -7,6 +7,8 @@
 
 int main (void)
 {
+  aubio_init();
+  
   uint_t n = 10; // compute n times
   uint_t win_s = 1024; // window size
   uint_t hop_s = win_s/4; // hop size
--- a/tests/src/pitch/test-pitchschmitt.c
+++ b/tests/src/pitch/test-pitchschmitt.c
@@ -7,6 +7,7 @@
 
 int main (void)
 {
+  aubio_init();
   uint_t n = 10; // compute n times
   uint_t win_s = 1024; // window size
   // create some vectors
--- a/tests/src/pitch/test-pitchspecacf.c
+++ b/tests/src/pitch/test-pitchspecacf.c
@@ -7,6 +7,8 @@
 
 int main (void)
 {
+  aubio_init();
+  
   uint_t n = 10; // compute n times
   uint_t win_s = 1024; // window size
   // create some vectors
--- a/tests/src/pitch/test-pitchyin.c
+++ b/tests/src/pitch/test-pitchyin.c
@@ -7,6 +7,8 @@
 
 int main (void)
 {
+  aubio_init();
+  
   uint_t n = 10; // compute n times
   uint_t win_s = 1024; // window size
   // create some vectors
--- a/tests/src/pitch/test-pitchyinfft.c
+++ b/tests/src/pitch/test-pitchyinfft.c
@@ -7,6 +7,8 @@
 
 int main (void)
 {
+  aubio_init();
+  
   uint_t n = 10; // compute n times
   uint_t win_s = 1024; // window size
   // create some vectors
--- a/tests/src/spectral/test-awhitening.c
+++ b/tests/src/spectral/test-awhitening.c
@@ -3,6 +3,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+
   sint_t err = 0;
 
   if (argc < 3) {
@@ -79,6 +81,7 @@
 beach_source:
   del_fvec(vec);
 beach_fvec:
+  aubio_cleanup();
   return err;
 }
 
--- a/tests/src/spectral/test-fft.c
+++ b/tests/src/spectral/test-fft.c
@@ -2,6 +2,8 @@
 
 int main (void)
 {
+  aubio_init();
+  
   int return_code = 0;
   uint_t i, n_iters = 100; // number of iterations
   uint_t win_s = 512; // window size
@@ -43,6 +45,8 @@
   del_fvec(in);
   del_cvec(fftgrain);
   del_fvec(out);
+
   aubio_cleanup();
+  
   return return_code;
 }
--- a/tests/src/spectral/test-filterbank.c
+++ b/tests/src/spectral/test-filterbank.c
@@ -2,6 +2,8 @@
 
 int main (void)
 {
+  aubio_init();
+  
   uint_t win_s = 1024; // window size
   uint_t n_filters = 13; // number of filters
 
@@ -33,6 +35,7 @@
   del_aubio_filterbank (o);
   del_cvec (in_spec);
   del_fvec (out_filters);
+  
   aubio_cleanup ();
 
   return 0;
--- a/tests/src/spectral/test-filterbank_mel.c
+++ b/tests/src/spectral/test-filterbank_mel.c
@@ -2,6 +2,8 @@
 
 int main (void)
 {
+  aubio_init();
+  
   uint_t samplerate = 16000; // samplerate of signal to filter
   uint_t win_s = 512; // fft size
   uint_t n_filters = 40; // number of filters
@@ -32,6 +34,7 @@
   del_aubio_filterbank (o);
   del_cvec (in_spec);
   del_fvec (out_filters);
+  
   aubio_cleanup ();
 
   return 0;
--- a/tests/src/spectral/test-mfcc.c
+++ b/tests/src/spectral/test-mfcc.c
@@ -2,6 +2,8 @@
 
 int main (void)
 {
+  aubio_init();
+
   uint_t win_s = 512; // fft size
   uint_t n_filters = 40; // number of filters
   uint_t n_coefs = 13; // number of coefficients
@@ -24,6 +26,7 @@
   del_aubio_mfcc (o);
   del_cvec (in);
   del_fvec (out);
+  
   aubio_cleanup ();
 
   return 0;
--- a/tests/src/spectral/test-phasevoc.c
+++ b/tests/src/spectral/test-phasevoc.c
@@ -2,6 +2,8 @@
 
 int main (void)
 {
+  aubio_init();
+  
   uint_t n = 6; // compute n times
   uint_t win_s = 32; // window size
   uint_t hop_s = win_s / 4; // hop size
@@ -41,6 +43,7 @@
   del_cvec(fftgrain);
   del_fvec(out);
   del_aubio_pvoc(pv);
+  
   aubio_cleanup();
 
   return 0;
--- a/tests/src/spectral/test-specdesc.c
+++ b/tests/src/spectral/test-specdesc.c
@@ -2,6 +2,8 @@
 
 int main (void)
 {
+  aubio_init();
+  
   uint_t win_s = 1024; // window size
   cvec_t *in = new_cvec (win_s); // input buffer
   fvec_t *out = new_fvec (1); // output spectral descriptor
@@ -38,6 +40,7 @@
 
   del_cvec (in);
   del_fvec (out);
+  
   aubio_cleanup ();
 
   return 0;
--- a/tests/src/spectral/test-tss.c
+++ b/tests/src/spectral/test-tss.c
@@ -2,6 +2,8 @@
 
 int main (void)
 {
+  aubio_init();
+  
   uint_t n = 10; // compute n times
   uint_t win_s = 1024; // window size
   uint_t hop_s = 256;  // hop size
--- a/tests/src/synth/test-sampler.c
+++ b/tests/src/synth/test-sampler.c
@@ -3,6 +3,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+  
   sint_t err = 0;
 
   if (argc < 4) {
@@ -53,6 +55,7 @@
   del_aubio_source(source);
   del_aubio_sink(sink);
   del_fvec(vec);
+  
   aubio_cleanup();
 
   return 0;
--- a/tests/src/synth/test-wavetable.c
+++ b/tests/src/synth/test-wavetable.c
@@ -3,6 +3,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+  
   sint_t err = 0;
 
   if (argc < 2) {
@@ -62,6 +64,7 @@
   del_aubio_wavetable (wavetable);
   del_aubio_sink(sink);
   del_fvec(vec);
+  
   aubio_cleanup();
 
   return 0;
--- a/tests/src/tempo/test-beattracking.c
+++ b/tests/src/tempo/test-beattracking.c
@@ -5,6 +5,8 @@
 
 int main (void)
 {
+  aubio_init();
+  
   uint_t i = 0;
   uint_t win_s = 1024; // window size
   fvec_t * in = new_fvec (win_s); // input buffer
@@ -33,6 +35,7 @@
   del_aubio_beattracking(tempo);
   del_fvec(in);
   del_fvec(out);
+  
   aubio_cleanup();
 
   return 0;
--- a/tests/src/tempo/test-tempo.c
+++ b/tests/src/tempo/test-tempo.c
@@ -3,6 +3,8 @@
 
 int main (int argc, char **argv)
 {
+  aubio_init();
+  
   uint_t err = 0;
   if (argc < 2) {
     err = 2;
@@ -56,6 +58,7 @@
   del_fvec(in);
   del_fvec(out);
   del_aubio_source(source);
+  
 beach:
   aubio_cleanup();
 
--- a/tests/src/temporal/test-a_weighting.c
+++ b/tests/src/temporal/test-a_weighting.c
@@ -2,7 +2,8 @@
 
 int main (void)
 {
-  
+  aubio_init();
+
   aubio_filter_t * f;
 
   uint_t rates[] = { 8000, 16000, 22050, 44100, 96000, 192000};
@@ -38,6 +39,8 @@
   }
   del_aubio_filter (f);
 
+  aubio_cleanup();
+  
   return 0;
 }
 
--- a/tests/src/temporal/test-biquad.c
+++ b/tests/src/temporal/test-biquad.c
@@ -2,6 +2,8 @@
 
 int main (void)
 {
+  aubio_init();
+
   uint_t win_s = 64; // window size
 
   // create biquad filter with `b0`, `b1`, `b2`, `a1`, `a2`
@@ -28,5 +30,7 @@
   del_fvec(tmp_vec);
   del_fvec(out_vec);
 
+  aubio_cleanup();
+  
   return 0;
 }
--- a/tests/src/temporal/test-c_weighting.c
+++ b/tests/src/temporal/test-c_weighting.c
@@ -2,6 +2,8 @@
 
 int main (void)
 {
+  aubio_init();
+
   aubio_filter_t * f;
 
   uint_t rates[] = { 8000, 16000, 22050, 44100, 96000, 192000};
@@ -37,6 +39,8 @@
   }
   del_aubio_filter (f);
 
+  aubio_cleanup();
+  
   return 0;
 }
 
--- a/tests/src/temporal/test-filter.c
+++ b/tests/src/temporal/test-filter.c
@@ -2,6 +2,8 @@
 
 int main (void)
 {
+  aubio_init();
+  
   uint_t win_s = 16; // window size
   uint_t impulse_at = win_s / 2;
   fvec_t *in = new_fvec (win_s); // input buffer
@@ -29,6 +31,7 @@
   del_fvec (in);
   del_fvec (out);
   del_aubio_filter (o);
+  
   aubio_cleanup ();
 
   return 0;
--- a/tests/src/temporal/test-resampler.c
+++ b/tests/src/temporal/test-resampler.c
@@ -2,6 +2,8 @@
 
 int main (void)
 {
+  aubio_init();
+
   uint_t win_s = 1024; // window size
   smpl_t ratio = 0.5;
   fvec_t *in = new_fvec (win_s); // input buffer
@@ -17,6 +19,8 @@
   del_aubio_resampler (o);
   del_fvec (in);
   del_fvec (out);
+
+  aubio_cleanup();
 
   return 0;
 }
--- a/tests/src/test-cvec.c
+++ b/tests/src/test-cvec.c
@@ -3,6 +3,8 @@
 
 int main (void)
 {
+  aubio_init();
+
   uint_t i, window_size = 16; // window size
   cvec_t * complex_vector = new_cvec (window_size); // input buffer
   uint_t rand_times = 4;
@@ -44,5 +46,8 @@
 
   // destroy it
   del_cvec(complex_vector);
+
+  aubio_cleanup();
+
   return 0;
 }
--- a/tests/src/test-delnull.c
+++ b/tests/src/test-delnull.c
@@ -6,6 +6,8 @@
 
 int main (void)
 {
+  aubio_init();
+
   uint_t return_code = 0;
   fvec_t *f = new_fvec(-12);
   cvec_t *c = new_cvec(-12);
@@ -20,5 +22,8 @@
   } else if (fft != NULL) {
     return_code = 3;
   }
+
+  aubio_cleanup();
+  
   return return_code;
 }
--- a/tests/src/test-fmat.c
+++ b/tests/src/test-fmat.c
@@ -6,6 +6,8 @@
 
 int main (void)
 {
+  aubio_init();
+
   uint_t height = 3, length = 9, i, j;
   // create fmat_t object
   fmat_t * mat = new_fmat (height, length);
@@ -25,6 +27,9 @@
   fmat_print(mat);
   // destroy it
   del_fmat(mat);
+
+  aubio_cleanup();
+  
   return 0;
 }
 
--- a/tests/src/test-fvec.c
+++ b/tests/src/test-fvec.c
@@ -3,6 +3,8 @@
 
 int main (void)
 {
+  aubio_init();
+
   uint_t vec_size = 10, i;
   fvec_t * vec = new_fvec (vec_size);
 
@@ -37,6 +39,8 @@
 
   // now destroys the vector
   del_fvec(vec);
+
+  aubio_cleanup();
 
   return 0;
 }
--- a/tests/src/test-lvec.c
+++ b/tests/src/test-lvec.c
@@ -3,6 +3,8 @@
 
 int main (void)
 {
+  aubio_init();
+
   uint_t win_s = 32; // window size
   lvec_t * sp = new_lvec (win_s); // input buffer
   lvec_set_sample (sp, 2./3., 0);
@@ -13,6 +15,9 @@
   lvec_set_all (sp, 3./5.);
   lvec_print (sp);
   del_lvec(sp);
+
+  aubio_cleanup();
+  
   return 0;
 }
 
--- a/tests/src/test-mathutils-window.c
+++ b/tests/src/test-mathutils-window.c
@@ -3,6 +3,8 @@
 
 int main (void)
 {
+  aubio_init();
+
   uint_t length = 0;
   uint_t n_length = 4, n_types = 10, i, t;
   uint_t lengths[4] = { 8, 10, 15, 16 };
@@ -26,6 +28,9 @@
       del_fvec(window);
     }
   }
+
+  aubio_cleanup();
+  
   return 0;
 }
 
--- a/tests/src/test-mathutils.c
+++ b/tests/src/test-mathutils.c
@@ -113,8 +113,13 @@
 
 int main (void)
 {
+  aubio_init();
+
   test_next_power_of_two();
   test_miditofreq();
   test_freqtomidi();
+
+  aubio_cleanup();
+  
   return 0;
 }
--- a/tests/src/utils/test-hist.c
+++ b/tests/src/utils/test-hist.c
@@ -4,6 +4,8 @@
 
 int main (void)
 {
+  aubio_init();
+
   uint_t length;
   for (length = 1; length < 10; length ++ ) {
     aubio_hist_t *o = new_aubio_hist(0, 1, length);
@@ -25,6 +27,9 @@
     del_aubio_hist(o);
     del_fvec(t);
   }
+
+  aubio_cleanup();
+  
   return 0;
 }
 
--- a/tests/src/utils/test-log.c
+++ b/tests/src/utils/test-log.c
@@ -23,6 +23,8 @@
 
 int main (void)
 {
+  aubio_init();
+
   fprintf(stdout, "### testing normal logging\n");
   AUBIO_ERR("testing normal AUBIO_LOG_ERR\n");
   AUBIO_INF("testing normal AUBIO_LOG_INF\n");
@@ -56,5 +58,7 @@
   AUBIO_MSG("testing custom set_level_function AUBIO_LOG_MSG\n");
   AUBIO_DBG("testing again normal AUBIO_LOG_DBG\n");
 
+  aubio_cleanup();
+  
   return 0;
 }
--- a/tests/src/utils/test-parameter.c
+++ b/tests/src/utils/test-parameter.c
@@ -24,6 +24,8 @@
 
 int main (void)
 {
+  aubio_init();
+
   smpl_t max_value = 100.;
   smpl_t min_value = 0.;
   uint_t steps = 10;
@@ -66,5 +68,7 @@
 
   del_aubio_parameter (param);
 
+  aubio_cleanup();
+  
   return 0;
 }
--- a/tests/src/utils/test-scale.c
+++ b/tests/src/utils/test-scale.c
@@ -4,6 +4,8 @@
 
 int main (void)
 {
+  aubio_init();
+
   uint_t n = 0;
   uint_t win_s = 1024; // window size
   fvec_t * in = new_fvec (win_s); // input buffer
@@ -18,5 +20,7 @@
   del_aubio_scale(o);
   del_fvec(in);
 
+  aubio_cleanup();
+  
   return 0;
 }
--- a/wscript
+++ b/wscript
@@ -50,6 +50,9 @@
     add_option_enable_disable(ctx, 'fftw3', default = False,
             help_str = 'compile with fftw3 instead of ooura',
             help_disable_str = 'do not compile with fftw3')
+    add_option_enable_disable(ctx, 'intelipp', default = None,
+            help_str = 'use Intel IPP libraries (auto)',
+            help_disable_str = 'do not use Intel IPP libraries')
     add_option_enable_disable(ctx, 'complex', default = False,
             help_str ='compile with C99 complex',
             help_disable_str = 'do not use C99 complex (default)' )
@@ -155,6 +158,10 @@
         ctx.env.LINKFLAGS += ['/DEBUG', '/INCREMENTAL:NO']
         # configure warnings
         ctx.env.CFLAGS += ['/W4', '/D_CRT_SECURE_NO_WARNINGS']
+        # ignore "possible loss of data" warnings
+        ctx.env.CFLAGS += ['/wd4305', '/wd4244', '/wd4245', '/wd4267']
+        # ignore "unreferenced formal parameter" warnings
+        ctx.env.CFLAGS += ['/wd4100']
         # set optimization level and runtime libs
         if (ctx.options.build_type == "release"):
             ctx.env.CFLAGS += ['/Ox']
@@ -282,7 +289,19 @@
         ctx.check(header_name='complex.h')
     else:
         ctx.msg('Checking if complex.h is enabled', 'no')
-
+    
+    # check for Intel IPP
+    if (ctx.options.enable_intelipp != False):
+        if (ctx.check(header_name=['ippcore.h', 'ippvm.h', 'ipps.h'], mandatory = False) and
+            ctx.check(lib=['ippcore', 'ippvm', 'ipps'], uselib_store='INTEL_IPP', mandatory = False)):
+            ctx.msg('Checking if Intel IPP is available', 'yes')
+            ctx.define('HAVE_INTEL_IPP', 1)
+            if ctx.env.CC_NAME == 'msvc':
+                # force linking multi-threaded static IPP libraries on Windows with msvc
+                ctx.define('_IPP_SEQUENTIAL_STATIC', 1)
+        else:
+            ctx.msg('Checking if Intel IPP is available', 'no')
+    
     # check for fftw3
     if (ctx.options.enable_fftw3 != False or ctx.options.enable_fftw3f != False):
         # one of fftwf or fftw3f
@@ -306,7 +325,7 @@
                         mandatory = ctx.options.enable_fftw3)
         ctx.define('HAVE_FFTW3', 1)
 
-    # fftw not enabled, use vDSP or ooura
+    # fftw not enabled, use vDSP, intelIPP or ooura
     if 'HAVE_FFTW3F' in ctx.env.define_key:
         ctx.msg('Checking for FFT implementation', 'fftw3f')
     elif 'HAVE_FFTW3' in ctx.env.define_key:
@@ -313,6 +332,8 @@
         ctx.msg('Checking for FFT implementation', 'fftw3')
     elif 'HAVE_ACCELERATE' in ctx.env.define_key:
         ctx.msg('Checking for FFT implementation', 'vDSP')
+    elif 'HAVE_INTEL_IPP' in ctx.env.define_key:
+        ctx.msg('Checking for FFT implementation', 'Intel IPP')
     else:
         ctx.msg('Checking for FFT implementation', 'ooura')