shithub: aubio

--- a/src/aubio_priv.h

+++ b/src/aubio_priv.h

@@ -123,7 +123,38 @@

 #endif /* HAVE_AUBIO_DOUBLE */

 #endif /* HAVE_ATLAS */

-#if !defined(HAVE_MEMCPY_HACKS) && !defined(HAVE_ACCELERATE) && !defined(HAVE_ATLAS)

+#if defined HAVE_INTEL_IPP

+#include <ippcore.h>

+#include <ippvm.h>

+#include <ipps.h>

+#ifndef HAVE_AUBIO_DOUBLE

+#define aubio_ippsSet         ippsSet_32f

+#define aubio_ippsZero        ippsZero_32f

+#define aubio_ippsCopy        ippsCopy_32f

+#define aubio_ippsMul         ippsMul_32f

+#define aubio_ippsMulC        ippsMulC_32f

+#define aubio_ippsAddC        ippsAddC_32f

+#define aubio_ippsLn          ippsLn_32f_A21

+#define aubio_ippsMean(a,b,c) ippsMean_32f(a, b, c, ippAlgHintFast)

+#define aubio_ippsSum(a,b,c)  ippsSum_32f(a, b, c, ippAlgHintFast)

+#define aubio_ippsMax         ippsMax_32f

+#define aubio_ippsMin         ippsMin_32f

+#else /* HAVE_AUBIO_DOUBLE */

+#define aubio_ippsSet         ippsSet_64f

+#define aubio_ippsZero        ippsZero_64f

+#define aubio_ippsCopy        ippsCopy_64f

+#define aubio_ippsMul         ippsMul_64f

+#define aubio_ippsMulC        ippsMulC_64f

+#define aubio_ippsAddC        ippsAddC_64f

+#define aubio_ippsLn          ippsLn_64f_A26

+#define aubio_ippsMean        ippsMean_64f

+#define aubio_ippsSum         ippsSum_64f

+#define aubio_ippsMax         ippsMax_64f

+#define aubio_ippsMin         ippsMin_64f

+#endif /* HAVE_AUBIO_DOUBLE */

+#endif

+#if !defined(HAVE_MEMCPY_HACKS) && !defined(HAVE_ACCELERATE) && !defined(HAVE_ATLAS) && !defined(HAVE_INTEL_IPP)

 #define HAVE_NOOPT 1

 #else

 #undef HAVE_NOOPT

--- a/src/cvec.c

+++ b/src/cvec.c

@@ -85,31 +85,40 @@

         s->length, t->length);

     return;

-#ifdef HAVE_MEMCPY_HACKS

+#if defined(HAVE_INTEL_IPP)

+  aubio_ippsCopy(s->phas, t->phas, (int)s->length);

+  aubio_ippsCopy(s->norm, t->norm, (int)s->length);

+#elif defined(HAVE_MEMCPY_HACKS)

   memcpy(t->norm, s->norm, t->length * sizeof(smpl_t));

   memcpy(t->phas, s->phas, t->length * sizeof(smpl_t));

-#else /* HAVE_MEMCPY_HACKS */

+#else

   uint_t j;

   for (j=0; j< t->length; j++) {

     t->norm[j] = s->norm[j];

     t->phas[j] = s->phas[j];

-#endif /* HAVE_MEMCPY_HACKS */

+#endif

-void cvec_norm_set_all (cvec_t *s, smpl_t val) {

+void cvec_norm_set_all(cvec_t *s, smpl_t val) {

+#if defined(HAVE_INTEL_IPP)

+  aubio_ippsSet(val, s->norm, (int)s->length);

+#else

   uint_t j;

   for (j=0; j< s->length; j++) {

     s->norm[j] = val;

+#endif

 void cvec_norm_zeros(cvec_t *s) {

-#ifdef HAVE_MEMCPY_HACKS

+#if defined(HAVE_INTEL_IPP)

+  aubio_ippsZero(s->norm, (int)s->length);

+#elif defined(HAVE_MEMCPY_HACKS)

   memset(s->norm, 0, s->length * sizeof(smpl_t));

-#else /* HAVE_MEMCPY_HACKS */

+#else

   cvec_norm_set_all (s, 0.);

-#endif /* HAVE_MEMCPY_HACKS */

+#endif

 void cvec_norm_ones(cvec_t *s) {

@@ -117,14 +126,20 @@

 void cvec_phas_set_all (cvec_t *s, smpl_t val) {

+#if defined(HAVE_INTEL_IPP)

+  aubio_ippsSet(val, s->phas, (int)s->length);

+#else

   uint_t j;

   for (j=0; j< s->length; j++) {

     s->phas[j] = val;

+#endif

 void cvec_phas_zeros(cvec_t *s) {

-#ifdef HAVE_MEMCPY_HACKS

+#if defined(HAVE_INTEL_IPP)

+  aubio_ippsZero(s->phas, (int)s->length);

+#elif defined(HAVE_MEMCPY_HACKS)

   memset(s->phas, 0, s->length * sizeof(smpl_t));

 #else

   cvec_phas_set_all (s, 0.);

@@ -141,8 +156,14 @@

 void cvec_logmag(cvec_t *s, smpl_t lambda) {

-  uint_t j;

-  for (j=0; j< s->length; j++) {

-    s->norm[j] = LOG(lambda * s->norm[j] + 1);

-  }

+  #if defined(HAVE_INTEL_IPP)

+    aubio_ippsMulC(s->norm, lambda, s->norm, (int)s->length);

+    aubio_ippsAddC(s->norm, 1.0, s->norm, (int)s->length);

+    aubio_ippsLn(s->norm, s->norm, (int)s->length);

+  #else

+    uint_t j;

+    for (j=0; j< s->length; j++) {

+      s->norm[j] = LOG(lambda * s->norm[j] + 1);

+    }

+  #endif

--- a/src/fvec.c

+++ b/src/fvec.c

@@ -60,28 +60,31 @@

 void fvec_set_all (fvec_t *s, smpl_t val) {

-#if !defined(HAVE_ACCELERATE) && !defined(HAVE_ATLAS)

-  uint_t j;

-  for (j=0; j< s->length; j++) {

-    s->data[j] = val;

-  }

+#if defined(HAVE_INTEL_IPP)

+  aubio_ippsSet(val, s->data, (int)s->length);

 #elif defined(HAVE_ATLAS)

   aubio_catlas_set(s->length, val, s->data, 1);

 #elif defined(HAVE_ACCELERATE)

   aubio_vDSP_vfill(&val, s->data, 1, s->length);

+#else

+  uint_t j;

+  for ( j = 0; j< s->length; j++ )

+  {

+    s->data[j] = val;

+  }

 #endif

 void fvec_zeros(fvec_t *s) {

-#if !defined(HAVE_MEMCPY_HACKS) && !defined(HAVE_ACCELERATE)

-  fvec_set_all (s, 0.);

-#else

-#if defined(HAVE_MEMCPY_HACKS)

+#if defined(HAVE_INTEL_IPP)

+  aubio_ippsZero(s->data, (int)s->length);

+#elif defined(HAVE_ACCELERATE)

+  aubio_vDSP_vclr(s->data, 1, s->length);

+#elif defined(HAVE_MEMCPY_HACKS)

   memset(s->data, 0, s->length * sizeof(smpl_t));

 #else

-  aubio_vDSP_vclr(s->data, 1, s->length);

+  fvec_set_all(s, 0.);

 #endif

-#endif

 void fvec_ones(fvec_t *s) {

@@ -96,27 +99,31 @@

 void fvec_weight(fvec_t *s, const fvec_t *weight) {

-#ifndef HAVE_ACCELERATE

-  uint_t j;

   uint_t length = MIN(s->length, weight->length);

-  for (j=0; j< length; j++) {

+#if defined(HAVE_INTEL_IPP)

+  aubio_ippsMul(s->data, weight->data, s->data, (int)length);

+#elif defined(HAVE_ACCELERATE)

+  aubio_vDSP_vmul( s->data, 1, weight->data, 1, s->data, 1, length );

+#else

+  uint_t j;

+  for (j = 0; j < length; j++) {

     s->data[j] *= weight->data[j];

-#else

-  aubio_vDSP_vmul(s->data, 1, weight->data, 1, s->data, 1, s->length);

 #endif /* HAVE_ACCELERATE */

 void fvec_weighted_copy(const fvec_t *in, const fvec_t *weight, fvec_t *out) {

-#ifndef HAVE_ACCELERATE

+  uint_t length = MIN(in->length, MIN(out->length, weight->length));

+#if defined(HAVE_INTEL_IPP)

+  aubio_ippsMul(in->data, weight->data, out->data, (int)length);

+#elif defined(HAVE_ACCELERATE)

+  aubio_vDSP_vmul(in->data, 1, weight->data, 1, out->data, 1, length);

+#else

   uint_t j;

-  uint_t length = MIN(out->length, weight->length);

-  for (j=0; j< length; j++) {

+  for (j = 0; j < length; j++) {

     out->data[j] = in->data[j] * weight->data[j];

-#else

-  aubio_vDSP_vmul(in->data, 1, weight->data, 1, out->data, 1, out->length);

-#endif /* HAVE_ACCELERATE */

+#endif

 void fvec_copy(const fvec_t *s, fvec_t *t) {

@@ -125,16 +132,18 @@

         s->length, t->length);

     return;

-#ifdef HAVE_NOOPT

-  uint_t j;

-  for (j=0; j< t->length; j++) {

-    t->data[j] = s->data[j];

-  }

-#elif defined(HAVE_MEMCPY_HACKS)

-  memcpy(t->data, s->data, t->length * sizeof(smpl_t));

+#if defined(HAVE_INTEL_IPP)

+  aubio_ippsCopy(s->data, t->data, (int)s->length);

 #elif defined(HAVE_ATLAS)

   aubio_cblas_copy(s->length, s->data, 1, t->data, 1);

 #elif defined(HAVE_ACCELERATE)

   aubio_vDSP_mmov(s->data, t->data, 1, s->length, 1, 1);

+#elif defined(HAVE_MEMCPY_HACKS)

+  memcpy(t->data, s->data, t->length * sizeof(smpl_t));

+#else

+  uint_t j;

+  for (j = 0; j < t->length; j++) {

+    t->data[j] = s->data[j];

+  }

 #endif

--- a/src/mathutils.c

+++ b/src/mathutils.c

@@ -159,16 +159,19 @@

 fvec_mean (fvec_t * s)

   smpl_t tmp = 0.0;

-#ifndef HAVE_ACCELERATE

+#if defined(HAVE_INTEL_IPP)

+  aubio_ippsMean(s->data, (int)s->length, &tmp);

+  return tmp;

+#elif defined(HAVE_ACCELERATE)

+  aubio_vDSP_meanv(s->data, 1, &tmp, s->length);

+  return tmp;

+#else

   uint_t j;

   for (j = 0; j < s->length; j++) {

     tmp += s->data[j];

-  return tmp / (smpl_t) (s->length);

-#else

-  aubio_vDSP_meanv(s->data, 1, &tmp, s->length);

-  return tmp;

-#endif /* HAVE_ACCELERATE */

+  return tmp / (smpl_t)(s->length);

+#endif

 smpl_t

@@ -175,14 +178,16 @@

 fvec_sum (fvec_t * s)

   smpl_t tmp = 0.0;

-#ifndef HAVE_ACCELERATE

+#if defined(HAVE_INTEL_IPP)

+  aubio_ippsSum(s->data, (int)s->length, &tmp);

+#elif defined(HAVE_ACCELERATE)

+  aubio_vDSP_sve(s->data, 1, &tmp, s->length);

+#else

   uint_t j;

   for (j = 0; j < s->length; j++) {

     tmp += s->data[j];

-#else

-  aubio_vDSP_sve(s->data, 1, &tmp, s->length);

-#endif /* HAVE_ACCELERATE */

+#endif

   return tmp;

@@ -189,15 +194,18 @@

 smpl_t

 fvec_max (fvec_t * s)

-#ifndef HAVE_ACCELERATE

+#if defined(HAVE_INTEL_IPP)

+  smpl_t tmp = 0.;

+  aubio_ippsMax( s->data, (int)s->length, &tmp);

+#elif defined(HAVE_ACCELERATE)

+  smpl_t tmp = 0.;

+  aubio_vDSP_maxv( s->data, 1, &tmp, s->length );

+#else

   uint_t j;

-  smpl_t tmp = 0.0;

-  for (j = 0; j < s->length; j++) {

+  smpl_t tmp = s->data[0];

+  for (j = 1; j < s->length; j++) {

     tmp = (tmp > s->data[j]) ? tmp : s->data[j];

-#else

-  smpl_t tmp = 0.;

-  aubio_vDSP_maxv(s->data, 1, &tmp, s->length);

 #endif

   return tmp;

@@ -205,15 +213,18 @@

 smpl_t

 fvec_min (fvec_t * s)

-#ifndef HAVE_ACCELERATE

+#if defined(HAVE_INTEL_IPP)

+  smpl_t tmp = 0.;

+  aubio_ippsMin(s->data, (int)s->length, &tmp);

+#elif defined(HAVE_ACCELERATE)

+  smpl_t tmp = 0.;

+  aubio_vDSP_minv(s->data, 1, &tmp, s->length);

+#else

   uint_t j;

   smpl_t tmp = s->data[0];

-  for (j = 0; j < s->length; j++) {

+  for (j = 1; j < s->length; j++) {

     tmp = (tmp < s->data[j]) ? tmp : s->data[j];

-#else

-  smpl_t tmp = 0.;

-  aubio_vDSP_minv(s->data, 1, &tmp, s->length);

 #endif

   return tmp;

@@ -572,6 +583,17 @@

   uint_t i = 1;

   while (i < a) i <<= 1;

   return i;

+}

+uint_t

+aubio_power_of_two_order (uint_t a)

+{

+  int order = 0;

+  int temp = aubio_next_power_of_two(a);

+  while (temp >>= 1) {

+    ++order;

+  }

+  return order;

 smpl_t

--- a/src/mathutils.h

+++ b/src/mathutils.h

@@ -312,6 +312,9 @@

 /** return the next power of power of 2 greater than a */

 uint_t aubio_next_power_of_two(uint_t a);

+/** return the log2 factor of the given power of 2 value a */

+uint_t aubio_power_of_two_order(uint_t a);

 /** compute normalised autocorrelation function

   \param input vector to compute autocorrelation from

--- a/src/spectral/fft.c

+++ b/src/spectral/fft.c

@@ -77,8 +77,7 @@

 // a global mutex for FFTW thread safety

 pthread_mutex_t aubio_fftw_mutex = PTHREAD_MUTEX_INITIALIZER;

-#else

-#ifdef HAVE_ACCELERATE        // using ACCELERATE

+#elif defined HAVE_ACCELERATE        // using ACCELERATE

 // https://developer.apple.com/library/mac/#documentation/Accelerate/Reference/vDSPRef/Reference/reference.html

 #include <Accelerate/Accelerate.h>

@@ -112,32 +111,65 @@

 #define aubio_vvsqrt                   vvsqrt

 #endif /* HAVE_AUBIO_DOUBLE */

-#else                         // using OOURA

+#elif defined HAVE_INTEL_IPP // using INTEL IPP

+#if !HAVE_AUBIO_DOUBLE

+#define aubio_IppFloat                 Ipp32f

+#define aubio_IppComplex               Ipp32fc

+#define aubio_FFTSpec                  FFTSpec_R_32f

+#define aubio_ippsMalloc_complex       ippsMalloc_32fc

+#define aubio_ippsFFTInit_R            ippsFFTInit_R_32f

+#define aubio_ippsFFTGetSize_R         ippsFFTGetSize_R_32f

+#define aubio_ippsFFTInv_CCSToR        ippsFFTInv_CCSToR_32f

+#define aubio_ippsFFTFwd_RToCCS        ippsFFTFwd_RToCCS_32f

+#define aubio_ippsAtan2                ippsAtan2_32f_A21

+#else /* HAVE_AUBIO_DOUBLE */

+#define aubio_IppFloat                 Ipp64f

+#define aubio_IppComplex               Ipp64fc

+#define aubio_FFTSpec                  FFTSpec_R_64f

+#define aubio_ippsMalloc_complex       ippsMalloc_64fc

+#define aubio_ippsFFTInit_R            ippsFFTInit_R_64f

+#define aubio_ippsFFTGetSize_R         ippsFFTGetSize_R_64f

+#define aubio_ippsFFTInv_CCSToR        ippsFFTInv_CCSToR_64f

+#define aubio_ippsFFTFwd_RToCCS        ippsFFTFwd_RToCCS_64f

+#define aubio_ippsAtan2                ippsAtan2_64f_A50

+#endif

+#else // using OOURA

 // let's use ooura instead

 extern void aubio_ooura_rdft(int, int, smpl_t *, int *, smpl_t *);

-#endif /* HAVE_ACCELERATE */

-#endif /* HAVE_FFTW3 */

+#endif

 struct _aubio_fft_t {

   uint_t winsize;

   uint_t fft_size;

 #ifdef HAVE_FFTW3             // using FFTW3

   real_t *in, *out;

   fftw_plan pfw, pbw;

-  fft_data_t * specdata;      /* complex spectral data */

-#else

-#ifdef HAVE_ACCELERATE        // using ACCELERATE

+  fft_data_t * specdata; /* complex spectral data */

+#elif defined HAVE_ACCELERATE  // using ACCELERATE

   int log2fftsize;

   aubio_FFTSetup fftSetup;

   aubio_DSPSplitComplex spec;

   smpl_t *in, *out;

+#elif defined HAVE_INTEL_IPP  // using Intel IPP

+  smpl_t *in, *out;

+  Ipp8u* memSpec;

+  Ipp8u* memInit;

+  Ipp8u* memBuffer;

+  struct aubio_FFTSpec* fftSpec;

+  aubio_IppComplex* complexOut;

 #else                         // using OOURA

   smpl_t *in, *out;

   smpl_t *w;

   int *ip;

-#endif /* HAVE_ACCELERATE */

-#endif /* HAVE_FFTW3 */

+#endif /* using OOURA */

   fvec_t * compspec;

};

@@ -147,6 +179,7 @@

     AUBIO_ERR("fft: got winsize %d, but can not be < 2\n", winsize);

     goto beach;

 #ifdef HAVE_FFTW3

   uint_t i;

   s->winsize  = winsize;

@@ -175,17 +208,55 @@

   for (i = 0; i < s->fft_size; i++) {

     s->specdata[i] = 0.;

-#else

-#ifdef HAVE_ACCELERATE        // using ACCELERATE

+#elif defined HAVE_ACCELERATE  // using ACCELERATE

   s->winsize = winsize;

   s->fft_size = winsize;

   s->compspec = new_fvec(winsize);

-  s->log2fftsize = (uint_t)log2f(s->fft_size);

+  s->log2fftsize = aubio_power_of_two_order(s->fft_size);

   s->in = AUBIO_ARRAY(smpl_t, s->fft_size);

   s->out = AUBIO_ARRAY(smpl_t, s->fft_size);

   s->spec.realp = AUBIO_ARRAY(smpl_t, s->fft_size/2);

   s->spec.imagp = AUBIO_ARRAY(smpl_t, s->fft_size/2);

   s->fftSetup = aubio_vDSP_create_fftsetup(s->log2fftsize, FFT_RADIX2);

+#elif defined HAVE_INTEL_IPP  // using Intel IPP

+  const IppHintAlgorithm qualityHint = ippAlgHintAccurate; // OR ippAlgHintFast;

+  const int flags = IPP_FFT_NODIV_BY_ANY; // we're scaling manually afterwards

+  int order = aubio_power_of_two_order(winsize);

+  int sizeSpec, sizeInit, sizeBuffer;

+  IppStatus status;

+  if (winsize <= 4 || aubio_is_power_of_two(winsize) != 1)

+  {

+    AUBIO_ERR("intel IPP fft: can only create with sizes > 4 and power of two, requested %d,"

+      " try recompiling aubio with --enable-fftw3\n", winsize);

+    goto beach;

+  }

+  status = aubio_ippsFFTGetSize_R(order, flags, qualityHint,

+      &sizeSpec, &sizeInit, &sizeBuffer);

+  if (status != ippStsNoErr) {

+    AUBIO_ERR("fft: failed to initialize fft. IPP error: %d\n", status);

+    goto beach;

+  }

+  s->fft_size = s->winsize = winsize;

+  s->compspec = new_fvec(winsize);

+  s->in = AUBIO_ARRAY(smpl_t, s->winsize);

+  s->out = AUBIO_ARRAY(smpl_t, s->winsize);

+  s->memSpec = ippsMalloc_8u(sizeSpec);

+  s->memBuffer = ippsMalloc_8u(sizeBuffer);

+  if (sizeInit > 0 ) {

+    s->memInit = ippsMalloc_8u(sizeInit);

+  }

+  s->complexOut = aubio_ippsMalloc_complex(s->fft_size / 2 + 1);

+  status = aubio_ippsFFTInit_R(

+    &s->fftSpec, order, flags, qualityHint, s->memSpec, s->memInit);

+  if (status != ippStsNoErr) {

+    AUBIO_ERR("fft: failed to initialize. IPP error: %d\n", status);

+    goto beach;

+  }

 #else                         // using OOURA

   if (aubio_is_power_of_two(winsize) != 1) {

     AUBIO_ERR("fft: can only create with sizes power of two, requested %d,"

@@ -200,9 +271,10 @@

   s->ip    = AUBIO_ARRAY(int   , s->fft_size);

   s->w     = AUBIO_ARRAY(smpl_t, s->fft_size);

   s->ip[0] = 0;

-#endif /* HAVE_ACCELERATE */

-#endif /* HAVE_FFTW3 */

+#endif /* using OOURA */

   return s;

 beach:

   AUBIO_FREE(s);

   return NULL;

@@ -210,7 +282,6 @@

 void del_aubio_fft(aubio_fft_t * s) {

   /* destroy data */

-  del_fvec(s->compspec);

 #ifdef HAVE_FFTW3             // using FFTW3

   pthread_mutex_lock(&aubio_fftw_mutex);

   fftw_destroy_plan(s->pfw);

@@ -217,18 +288,26 @@

   fftw_destroy_plan(s->pbw);

   fftw_free(s->specdata);

   pthread_mutex_unlock(&aubio_fftw_mutex);

-#else /* HAVE_FFTW3 */

-#ifdef HAVE_ACCELERATE        // using ACCELERATE

+#elif defined HAVE_ACCELERATE // using ACCELERATE

   AUBIO_FREE(s->spec.realp);

   AUBIO_FREE(s->spec.imagp);

   aubio_vDSP_destroy_fftsetup(s->fftSetup);

+#elif defined HAVE_INTEL_IPP  // using Intel IPP

+  ippFree(s->memSpec);

+  ippFree(s->memInit);

+  ippFree(s->memBuffer);

+  ippFree(s->complexOut);

 #else                         // using OOURA

   AUBIO_FREE(s->w);

   AUBIO_FREE(s->ip);

-#endif /* HAVE_ACCELERATE */

-#endif /* HAVE_FFTW3 */

-  AUBIO_FREE(s->out);

+#endif

+  del_fvec(s->compspec);

   AUBIO_FREE(s->in);

+  AUBIO_FREE(s->out);

   AUBIO_FREE(s);

@@ -251,6 +330,7 @@

 #else

   memcpy(s->in, input->data, s->winsize * sizeof(smpl_t));

 #endif /* HAVE_MEMCPY_HACKS */

 #ifdef HAVE_FFTW3             // using FFTW3

   fftw_execute(s->pfw);

 #ifdef HAVE_COMPLEX_H

@@ -265,8 +345,8 @@

     compspec->data[i] = s->specdata[i];

 #endif /* HAVE_COMPLEX_H */

-#else /* HAVE_FFTW3 */

-#ifdef HAVE_ACCELERATE        // using ACCELERATE

+#elif defined HAVE_ACCELERATE // using ACCELERATE

   // convert real data to even/odd format used in vDSP

   aubio_vDSP_ctoz((aubio_DSPComplex*)s->in, 2, &s->spec, 1, s->fft_size/2);

   // compute the FFT

@@ -281,6 +361,19 @@

   // apply scaling

   smpl_t scale = 1./2.;

   aubio_vDSP_vsmul(compspec->data, 1, &scale, compspec->data, 1, s->fft_size);

+#elif defined HAVE_INTEL_IPP  // using Intel IPP

+  // apply fft

+  aubio_ippsFFTFwd_RToCCS(s->in, (aubio_IppFloat*)s->complexOut, s->fftSpec, s->memBuffer);

+  // convert complex buffer to [ r0, r1, ..., rN, iN-1, .., i2, i1]

+  compspec->data[0] = s->complexOut[0].re;

+  compspec->data[s->fft_size / 2] = s->complexOut[s->fft_size / 2].re;

+  for (i = 1; i < s->fft_size / 2; i++) {

+    compspec->data[i] = s->complexOut[i].re;

+    compspec->data[s->fft_size - i] = s->complexOut[i].im;

+  }

 #else                         // using OOURA

   aubio_ooura_rdft(s->winsize, 1, s->in, s->ip, s->w);

   compspec->data[0] = s->in[0];

@@ -289,8 +382,7 @@

     compspec->data[i] = s->in[2 * i];

     compspec->data[s->winsize - i] = - s->in[2 * i + 1];

-#endif /* HAVE_ACCELERATE */

-#endif /* HAVE_FFTW3 */

+#endif /* using OOURA */

 void aubio_fft_rdo_complex(aubio_fft_t * s, const fvec_t * compspec, fvec_t * output) {

@@ -313,8 +405,8 @@

   for (i = 0; i < output->length; i++) {

     output->data[i] = s->out[i]*renorm;

-#else /* HAVE_FFTW3 */

-#ifdef HAVE_ACCELERATE        // using ACCELERATE

+#elif defined HAVE_ACCELERATE // using ACCELERATE

   // convert from real imag  [ r0, r1, ..., rN, iN-1, .., i2, i1]

   // to vDSP packed format   [ r0, rN, r1, i1, ..., rN-1, iN-1 ]

   s->out[0] = compspec->data[0];

@@ -332,6 +424,23 @@

   // apply scaling

   smpl_t scale = 1.0 / s->winsize;

   aubio_vDSP_vsmul(output->data, 1, &scale, output->data, 1, s->fft_size);

+#elif defined HAVE_INTEL_IPP  // using Intel IPP

+  // convert from real imag  [ r0, 0, ..., rN, iN-1, .., i2, i1, iN-1] to complex format

+  s->complexOut[0].re = compspec->data[0];

+  s->complexOut[0].im = 0;

+  s->complexOut[s->fft_size / 2].re = compspec->data[s->fft_size / 2];

+  s->complexOut[s->fft_size / 2].im = 0.0;

+  for (i = 1; i < s->fft_size / 2; i++) {

+    s->complexOut[i].re = compspec->data[i];

+    s->complexOut[i].im = compspec->data[s->fft_size - i];

+  }

+  // apply fft

+  aubio_ippsFFTInv_CCSToR((const aubio_IppFloat *)s->complexOut, output->data, s->fftSpec, s->memBuffer);

+  // apply scaling

+  aubio_ippsMulC(output->data, 1.0 / s->winsize, output->data, s->fft_size);

 #else                         // using OOURA

   smpl_t scale = 2.0 / s->winsize;

   s->out[0] = compspec->data[0];

@@ -344,8 +453,7 @@

   for (i=0; i < s->winsize; i++) {

     output->data[i] = s->out[i] * scale;

-#endif /* HAVE_ACCELERATE */

-#endif /* HAVE_FFTW3 */

+#endif

 void aubio_fft_get_spectrum(const fvec_t * compspec, cvec_t * spectrum) {

@@ -365,10 +473,26 @@

   } else {

     spectrum->phas[0] = 0.;

+#if defined(HAVE_INTEL_IPP)

+  // convert from real imag  [ r0, r1, ..., rN, iN-1, ..., i2, i1, i0]

+  //                     to  [ r0, r1, ..., rN, i0, i1, i2, ..., iN-1]

+  for (i = 1; i < spectrum->length / 2; i++) {

+    ELEM_SWAP(compspec->data[compspec->length - i],

+        compspec->data[spectrum->length + i - 1]);

+  }

+  aubio_ippsAtan2(compspec->data + spectrum->length,

+      compspec->data + 1, spectrum->phas + 1, spectrum->length - 1);

+  // revert the imaginary part back again

+  for (i = 1; i < spectrum->length / 2; i++) {

+    ELEM_SWAP(compspec->data[spectrum->length + i - 1],

+        compspec->data[compspec->length - i]);

+  }

+#else

   for (i=1; i < spectrum->length - 1; i++) {

     spectrum->phas[i] = ATAN2(compspec->data[compspec->length-i],

         compspec->data[i]);

+#endif

   if (compspec->data[compspec->length/2] < 0) {

     spectrum->phas[spectrum->length - 1] = PI;

   } else {

--- a/src/wscript_build

+++ b/src/wscript_build

@@ -3,6 +3,7 @@

 uselib = []

 uselib += ['M']

 uselib += ['FFTW3', 'FFTW3F']

+uselib += ['INTEL_IPP']

 uselib += ['SAMPLERATE']

 uselib += ['SNDFILE']

 uselib += ['AVCODEC']

--- a/wscript

+++ b/wscript

@@ -50,6 +50,9 @@

     add_option_enable_disable(ctx, 'fftw3', default = False,

             help_str = 'compile with fftw3 instead of ooura',

             help_disable_str = 'do not compile with fftw3')

+    add_option_enable_disable(ctx, 'intelipp', default = False,

+            help_str = 'use Intel IPP libraries (auto)',

+            help_disable_str = 'do not use Intel IPP libraries')

     add_option_enable_disable(ctx, 'complex', default = False,

             help_str ='compile with C99 complex',

             help_disable_str = 'do not use C99 complex (default)' )

@@ -155,6 +158,10 @@

         ctx.env.LINKFLAGS += ['/DEBUG', '/INCREMENTAL:NO']

         # configure warnings

         ctx.env.CFLAGS += ['/W4', '/D_CRT_SECURE_NO_WARNINGS']

+        # ignore "possible loss of data" warnings

+        ctx.env.CFLAGS += ['/wd4305', '/wd4244', '/wd4245', '/wd4267']

+        # ignore "unreferenced formal parameter" warnings

+        ctx.env.CFLAGS += ['/wd4100']

         # set optimization level and runtime libs

         if (ctx.options.build_type == "release"):

             ctx.env.CFLAGS += ['/Ox']

@@ -244,7 +251,9 @@

         ctx.env.cstlib_PATTERN = '%s.a'

         # tell emscripten functions we want to expose

-        from python.lib.gen_external import get_c_declarations, get_cpp_objects_from_c_declarations, get_all_func_names_from_lib, generate_lib_from_c_declarations

+        from python.lib.gen_external import get_c_declarations, \

+                get_cpp_objects_from_c_declarations, get_all_func_names_from_lib, \

+                generate_lib_from_c_declarations

         c_decls = get_c_declarations(usedouble=False)  # emscripten can't use double

         objects = list(get_cpp_objects_from_c_declarations(c_decls))

         # ensure that aubio structs are exported

@@ -283,6 +292,21 @@

     else:

         ctx.msg('Checking if complex.h is enabled', 'no')

+    # check for Intel IPP

+    if (ctx.options.enable_intelipp != False):

+        has_ipp_headers = ctx.check(header_name=['ippcore.h', 'ippvm.h', 'ipps.h'],

+                mandatory = False)

+        has_ipp_libs = ctx.check(lib=['ippcore', 'ippvm', 'ipps'],

+                uselib_store='INTEL_IPP', mandatory = False)

+        if (has_ipp_headers and has_ipp_libs):

+            ctx.msg('Checking if Intel IPP is available', 'yes')

+            ctx.define('HAVE_INTEL_IPP', 1)

+            if ctx.env.CC_NAME == 'msvc':

+                # force linking multi-threaded static IPP libraries on Windows with msvc

+                ctx.define('_IPP_SEQUENTIAL_STATIC', 1)

+        else:

+            ctx.msg('Checking if Intel IPP is available', 'no')

     # check for fftw3

     if (ctx.options.enable_fftw3 != False or ctx.options.enable_fftw3f != False):

         # one of fftwf or fftw3f

@@ -306,7 +330,7 @@

                         mandatory = ctx.options.enable_fftw3)

         ctx.define('HAVE_FFTW3', 1)

-    # fftw not enabled, use vDSP or ooura

+    # fftw not enabled, use vDSP, intelIPP or ooura

     if 'HAVE_FFTW3F' in ctx.env.define_key:

         ctx.msg('Checking for FFT implementation', 'fftw3f')

     elif 'HAVE_FFTW3' in ctx.env.define_key:

@@ -313,6 +337,8 @@

         ctx.msg('Checking for FFT implementation', 'fftw3')

     elif 'HAVE_ACCELERATE' in ctx.env.define_key:

         ctx.msg('Checking for FFT implementation', 'vDSP')

+    elif 'HAVE_INTEL_IPP' in ctx.env.define_key:

+        ctx.msg('Checking for FFT implementation', 'Intel IPP')

     else:

         ctx.msg('Checking for FFT implementation', 'ooura')

--

⑨