ref: 20c876939927652944f45b2f3967bc9d59024e48
parent: 25cf9577c0f942143a95908d25d727cb43b4f819
parent: 799d992f0e2edac2a508919e6eb3b535fb8590c3
author: Paul Brossier <piem@piem.org>
date: Sun Oct 1 16:04:30 EDT 2017
Merge branch 'intel_ipp'
--- a/src/aubio_priv.h
+++ b/src/aubio_priv.h
@@ -123,7 +123,38 @@
#endif /* HAVE_AUBIO_DOUBLE */
#endif /* HAVE_ATLAS */
-#if !defined(HAVE_MEMCPY_HACKS) && !defined(HAVE_ACCELERATE) && !defined(HAVE_ATLAS)
+#if defined HAVE_INTEL_IPP
+#include <ippcore.h>
+#include <ippvm.h>
+#include <ipps.h>
+#ifndef HAVE_AUBIO_DOUBLE
+#define aubio_ippsSet ippsSet_32f
+#define aubio_ippsZero ippsZero_32f
+#define aubio_ippsCopy ippsCopy_32f
+#define aubio_ippsMul ippsMul_32f
+#define aubio_ippsMulC ippsMulC_32f
+#define aubio_ippsAddC ippsAddC_32f
+#define aubio_ippsLn ippsLn_32f_A21
+#define aubio_ippsMean(a,b,c) ippsMean_32f(a, b, c, ippAlgHintFast)
+#define aubio_ippsSum(a,b,c) ippsSum_32f(a, b, c, ippAlgHintFast)
+#define aubio_ippsMax ippsMax_32f
+#define aubio_ippsMin ippsMin_32f
+#else /* HAVE_AUBIO_DOUBLE */
+#define aubio_ippsSet ippsSet_64f
+#define aubio_ippsZero ippsZero_64f
+#define aubio_ippsCopy ippsCopy_64f
+#define aubio_ippsMul ippsMul_64f
+#define aubio_ippsMulC ippsMulC_64f
+#define aubio_ippsAddC ippsAddC_64f
+#define aubio_ippsLn ippsLn_64f_A26
+#define aubio_ippsMean ippsMean_64f
+#define aubio_ippsSum ippsSum_64f
+#define aubio_ippsMax ippsMax_64f
+#define aubio_ippsMin ippsMin_64f
+#endif /* HAVE_AUBIO_DOUBLE */
+#endif
+
+#if !defined(HAVE_MEMCPY_HACKS) && !defined(HAVE_ACCELERATE) && !defined(HAVE_ATLAS) && !defined(HAVE_INTEL_IPP)
#define HAVE_NOOPT 1
#else
#undef HAVE_NOOPT
--- a/src/cvec.c
+++ b/src/cvec.c
@@ -85,31 +85,40 @@
s->length, t->length);
return;
}
-#ifdef HAVE_MEMCPY_HACKS
+#if defined(HAVE_INTEL_IPP)
+ aubio_ippsCopy(s->phas, t->phas, (int)s->length);
+ aubio_ippsCopy(s->norm, t->norm, (int)s->length);
+#elif defined(HAVE_MEMCPY_HACKS)
memcpy(t->norm, s->norm, t->length * sizeof(smpl_t));
memcpy(t->phas, s->phas, t->length * sizeof(smpl_t));
-#else /* HAVE_MEMCPY_HACKS */
+#else
uint_t j;
for (j=0; j< t->length; j++) {
t->norm[j] = s->norm[j];
t->phas[j] = s->phas[j];
}
-#endif /* HAVE_MEMCPY_HACKS */
+#endif
}
-void cvec_norm_set_all (cvec_t *s, smpl_t val) {
+void cvec_norm_set_all(cvec_t *s, smpl_t val) {
+#if defined(HAVE_INTEL_IPP)
+ aubio_ippsSet(val, s->norm, (int)s->length);
+#else
uint_t j;
for (j=0; j< s->length; j++) {
s->norm[j] = val;
}
+#endif
}
void cvec_norm_zeros(cvec_t *s) {
-#ifdef HAVE_MEMCPY_HACKS
+#if defined(HAVE_INTEL_IPP)
+ aubio_ippsZero(s->norm, (int)s->length);
+#elif defined(HAVE_MEMCPY_HACKS)
memset(s->norm, 0, s->length * sizeof(smpl_t));
-#else /* HAVE_MEMCPY_HACKS */
+#else
cvec_norm_set_all (s, 0.);
-#endif /* HAVE_MEMCPY_HACKS */
+#endif
}
void cvec_norm_ones(cvec_t *s) {
@@ -117,14 +126,20 @@
}
void cvec_phas_set_all (cvec_t *s, smpl_t val) {
+#if defined(HAVE_INTEL_IPP)
+ aubio_ippsSet(val, s->phas, (int)s->length);
+#else
uint_t j;
for (j=0; j< s->length; j++) {
s->phas[j] = val;
}
+#endif
}
void cvec_phas_zeros(cvec_t *s) {
-#ifdef HAVE_MEMCPY_HACKS
+#if defined(HAVE_INTEL_IPP)
+ aubio_ippsZero(s->phas, (int)s->length);
+#elif defined(HAVE_MEMCPY_HACKS)
memset(s->phas, 0, s->length * sizeof(smpl_t));
#else
cvec_phas_set_all (s, 0.);
@@ -141,8 +156,14 @@
}
void cvec_logmag(cvec_t *s, smpl_t lambda) {
- uint_t j;
- for (j=0; j< s->length; j++) {
- s->norm[j] = LOG(lambda * s->norm[j] + 1);
- }
+ #if defined(HAVE_INTEL_IPP)
+ aubio_ippsMulC(s->norm, lambda, s->norm, (int)s->length);
+ aubio_ippsAddC(s->norm, 1.0, s->norm, (int)s->length);
+ aubio_ippsLn(s->norm, s->norm, (int)s->length);
+ #else
+ uint_t j;
+ for (j=0; j< s->length; j++) {
+ s->norm[j] = LOG(lambda * s->norm[j] + 1);
+ }
+ #endif
}
--- a/src/fvec.c
+++ b/src/fvec.c
@@ -60,28 +60,31 @@
}
void fvec_set_all (fvec_t *s, smpl_t val) {
-#if !defined(HAVE_ACCELERATE) && !defined(HAVE_ATLAS)
- uint_t j;
- for (j=0; j< s->length; j++) {
- s->data[j] = val;
- }
+#if defined(HAVE_INTEL_IPP)
+ aubio_ippsSet(val, s->data, (int)s->length);
#elif defined(HAVE_ATLAS)
aubio_catlas_set(s->length, val, s->data, 1);
#elif defined(HAVE_ACCELERATE)
aubio_vDSP_vfill(&val, s->data, 1, s->length);
+#else
+ uint_t j;
+ for ( j = 0; j< s->length; j++ )
+ {
+ s->data[j] = val;
+ }
#endif
}
void fvec_zeros(fvec_t *s) {
-#if !defined(HAVE_MEMCPY_HACKS) && !defined(HAVE_ACCELERATE)
- fvec_set_all (s, 0.);
-#else
-#if defined(HAVE_MEMCPY_HACKS)
+#if defined(HAVE_INTEL_IPP)
+ aubio_ippsZero(s->data, (int)s->length);
+#elif defined(HAVE_ACCELERATE)
+ aubio_vDSP_vclr(s->data, 1, s->length);
+#elif defined(HAVE_MEMCPY_HACKS)
memset(s->data, 0, s->length * sizeof(smpl_t));
#else
- aubio_vDSP_vclr(s->data, 1, s->length);
+ fvec_set_all(s, 0.);
#endif
-#endif
}
void fvec_ones(fvec_t *s) {
@@ -96,27 +99,31 @@
}
void fvec_weight(fvec_t *s, const fvec_t *weight) {
-#ifndef HAVE_ACCELERATE
- uint_t j;
uint_t length = MIN(s->length, weight->length);
- for (j=0; j< length; j++) {
+#if defined(HAVE_INTEL_IPP)
+ aubio_ippsMul(s->data, weight->data, s->data, (int)length);
+#elif defined(HAVE_ACCELERATE)
+ aubio_vDSP_vmul( s->data, 1, weight->data, 1, s->data, 1, length );
+#else
+ uint_t j;
+ for (j = 0; j < length; j++) {
s->data[j] *= weight->data[j];
}
-#else
- aubio_vDSP_vmul(s->data, 1, weight->data, 1, s->data, 1, s->length);
#endif /* HAVE_ACCELERATE */
}
void fvec_weighted_copy(const fvec_t *in, const fvec_t *weight, fvec_t *out) {
-#ifndef HAVE_ACCELERATE
+ uint_t length = MIN(in->length, MIN(out->length, weight->length));
+#if defined(HAVE_INTEL_IPP)
+ aubio_ippsMul(in->data, weight->data, out->data, (int)length);
+#elif defined(HAVE_ACCELERATE)
+ aubio_vDSP_vmul(in->data, 1, weight->data, 1, out->data, 1, length);
+#else
uint_t j;
- uint_t length = MIN(out->length, weight->length);
- for (j=0; j< length; j++) {
+ for (j = 0; j < length; j++) {
out->data[j] = in->data[j] * weight->data[j];
}
-#else
- aubio_vDSP_vmul(in->data, 1, weight->data, 1, out->data, 1, out->length);
-#endif /* HAVE_ACCELERATE */
+#endif
}
void fvec_copy(const fvec_t *s, fvec_t *t) {
@@ -125,16 +132,18 @@
s->length, t->length);
return;
}
-#ifdef HAVE_NOOPT
- uint_t j;
- for (j=0; j< t->length; j++) {
- t->data[j] = s->data[j];
- }
-#elif defined(HAVE_MEMCPY_HACKS)
- memcpy(t->data, s->data, t->length * sizeof(smpl_t));
+#if defined(HAVE_INTEL_IPP)
+ aubio_ippsCopy(s->data, t->data, (int)s->length);
#elif defined(HAVE_ATLAS)
aubio_cblas_copy(s->length, s->data, 1, t->data, 1);
#elif defined(HAVE_ACCELERATE)
aubio_vDSP_mmov(s->data, t->data, 1, s->length, 1, 1);
+#elif defined(HAVE_MEMCPY_HACKS)
+ memcpy(t->data, s->data, t->length * sizeof(smpl_t));
+#else
+ uint_t j;
+ for (j = 0; j < t->length; j++) {
+ t->data[j] = s->data[j];
+ }
#endif
}
--- a/src/mathutils.c
+++ b/src/mathutils.c
@@ -159,16 +159,19 @@
fvec_mean (fvec_t * s)
{
smpl_t tmp = 0.0;
-#ifndef HAVE_ACCELERATE
+#if defined(HAVE_INTEL_IPP)
+ aubio_ippsMean(s->data, (int)s->length, &tmp);
+ return tmp;
+#elif defined(HAVE_ACCELERATE)
+ aubio_vDSP_meanv(s->data, 1, &tmp, s->length);
+ return tmp;
+#else
uint_t j;
for (j = 0; j < s->length; j++) {
tmp += s->data[j];
}
- return tmp / (smpl_t) (s->length);
-#else
- aubio_vDSP_meanv(s->data, 1, &tmp, s->length);
- return tmp;
-#endif /* HAVE_ACCELERATE */
+ return tmp / (smpl_t)(s->length);
+#endif
}
smpl_t
@@ -175,14 +178,16 @@
fvec_sum (fvec_t * s)
{
smpl_t tmp = 0.0;
-#ifndef HAVE_ACCELERATE
+#if defined(HAVE_INTEL_IPP)
+ aubio_ippsSum(s->data, (int)s->length, &tmp);
+#elif defined(HAVE_ACCELERATE)
+ aubio_vDSP_sve(s->data, 1, &tmp, s->length);
+#else
uint_t j;
for (j = 0; j < s->length; j++) {
tmp += s->data[j];
}
-#else
- aubio_vDSP_sve(s->data, 1, &tmp, s->length);
-#endif /* HAVE_ACCELERATE */
+#endif
return tmp;
}
@@ -189,15 +194,18 @@
smpl_t
fvec_max (fvec_t * s)
{
-#ifndef HAVE_ACCELERATE
+#if defined(HAVE_INTEL_IPP)
+ smpl_t tmp = 0.;
+ aubio_ippsMax( s->data, (int)s->length, &tmp);
+#elif defined(HAVE_ACCELERATE)
+ smpl_t tmp = 0.;
+ aubio_vDSP_maxv( s->data, 1, &tmp, s->length );
+#else
uint_t j;
- smpl_t tmp = 0.0;
- for (j = 0; j < s->length; j++) {
+ smpl_t tmp = s->data[0];
+ for (j = 1; j < s->length; j++) {
tmp = (tmp > s->data[j]) ? tmp : s->data[j];
}
-#else
- smpl_t tmp = 0.;
- aubio_vDSP_maxv(s->data, 1, &tmp, s->length);
#endif
return tmp;
}
@@ -205,15 +213,18 @@
smpl_t
fvec_min (fvec_t * s)
{
-#ifndef HAVE_ACCELERATE
+#if defined(HAVE_INTEL_IPP)
+ smpl_t tmp = 0.;
+ aubio_ippsMin(s->data, (int)s->length, &tmp);
+#elif defined(HAVE_ACCELERATE)
+ smpl_t tmp = 0.;
+ aubio_vDSP_minv(s->data, 1, &tmp, s->length);
+#else
uint_t j;
smpl_t tmp = s->data[0];
- for (j = 0; j < s->length; j++) {
+ for (j = 1; j < s->length; j++) {
tmp = (tmp < s->data[j]) ? tmp : s->data[j];
}
-#else
- smpl_t tmp = 0.;
- aubio_vDSP_minv(s->data, 1, &tmp, s->length);
#endif
return tmp;
}
@@ -572,6 +583,17 @@
uint_t i = 1;
while (i < a) i <<= 1;
return i;
+}
+
+uint_t
+aubio_power_of_two_order (uint_t a)
+{
+ int order = 0;
+ int temp = aubio_next_power_of_two(a);
+ while (temp >>= 1) {
+ ++order;
+ }
+ return order;
}
smpl_t
--- a/src/mathutils.h
+++ b/src/mathutils.h
@@ -312,6 +312,9 @@
/** return the next power of power of 2 greater than a */
uint_t aubio_next_power_of_two(uint_t a);
+/** return the log2 factor of the given power of 2 value a */
+uint_t aubio_power_of_two_order(uint_t a);
+
/** compute normalised autocorrelation function
\param input vector to compute autocorrelation from
--- a/src/spectral/fft.c
+++ b/src/spectral/fft.c
@@ -77,8 +77,7 @@
// a global mutex for FFTW thread safety
pthread_mutex_t aubio_fftw_mutex = PTHREAD_MUTEX_INITIALIZER;
-#else
-#ifdef HAVE_ACCELERATE // using ACCELERATE
+#elif defined HAVE_ACCELERATE // using ACCELERATE
// https://developer.apple.com/library/mac/#documentation/Accelerate/Reference/vDSPRef/Reference/reference.html
#include <Accelerate/Accelerate.h>
@@ -112,32 +111,65 @@
#define aubio_vvsqrt vvsqrt
#endif /* HAVE_AUBIO_DOUBLE */
-#else // using OOURA
+#elif defined HAVE_INTEL_IPP // using INTEL IPP
+
+#if !HAVE_AUBIO_DOUBLE
+#define aubio_IppFloat Ipp32f
+#define aubio_IppComplex Ipp32fc
+#define aubio_FFTSpec FFTSpec_R_32f
+#define aubio_ippsMalloc_complex ippsMalloc_32fc
+#define aubio_ippsFFTInit_R ippsFFTInit_R_32f
+#define aubio_ippsFFTGetSize_R ippsFFTGetSize_R_32f
+#define aubio_ippsFFTInv_CCSToR ippsFFTInv_CCSToR_32f
+#define aubio_ippsFFTFwd_RToCCS ippsFFTFwd_RToCCS_32f
+#define aubio_ippsAtan2 ippsAtan2_32f_A21
+#else /* HAVE_AUBIO_DOUBLE */
+#define aubio_IppFloat Ipp64f
+#define aubio_IppComplex Ipp64fc
+#define aubio_FFTSpec FFTSpec_R_64f
+#define aubio_ippsMalloc_complex ippsMalloc_64fc
+#define aubio_ippsFFTInit_R ippsFFTInit_R_64f
+#define aubio_ippsFFTGetSize_R ippsFFTGetSize_R_64f
+#define aubio_ippsFFTInv_CCSToR ippsFFTInv_CCSToR_64f
+#define aubio_ippsFFTFwd_RToCCS ippsFFTFwd_RToCCS_64f
+#define aubio_ippsAtan2 ippsAtan2_64f_A50
+#endif
+
+
+#else // using OOURA
// let's use ooura instead
extern void aubio_ooura_rdft(int, int, smpl_t *, int *, smpl_t *);
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
+#endif
struct _aubio_fft_t {
uint_t winsize;
uint_t fft_size;
+
#ifdef HAVE_FFTW3 // using FFTW3
real_t *in, *out;
fftw_plan pfw, pbw;
- fft_data_t * specdata; /* complex spectral data */
-#else
-#ifdef HAVE_ACCELERATE // using ACCELERATE
+ fft_data_t * specdata; /* complex spectral data */
+
+#elif defined HAVE_ACCELERATE // using ACCELERATE
int log2fftsize;
aubio_FFTSetup fftSetup;
aubio_DSPSplitComplex spec;
smpl_t *in, *out;
+
+#elif defined HAVE_INTEL_IPP // using Intel IPP
+ smpl_t *in, *out;
+ Ipp8u* memSpec;
+ Ipp8u* memInit;
+ Ipp8u* memBuffer;
+ struct aubio_FFTSpec* fftSpec;
+ aubio_IppComplex* complexOut;
#else // using OOURA
smpl_t *in, *out;
smpl_t *w;
int *ip;
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
+#endif /* using OOURA */
+
fvec_t * compspec;
};
@@ -147,6 +179,7 @@
AUBIO_ERR("fft: got winsize %d, but can not be < 2\n", winsize);
goto beach;
}
+
#ifdef HAVE_FFTW3
uint_t i;
s->winsize = winsize;
@@ -175,17 +208,55 @@
for (i = 0; i < s->fft_size; i++) {
s->specdata[i] = 0.;
}
-#else
-#ifdef HAVE_ACCELERATE // using ACCELERATE
+
+#elif defined HAVE_ACCELERATE // using ACCELERATE
s->winsize = winsize;
s->fft_size = winsize;
s->compspec = new_fvec(winsize);
- s->log2fftsize = (uint_t)log2f(s->fft_size);
+ s->log2fftsize = aubio_power_of_two_order(s->fft_size);
s->in = AUBIO_ARRAY(smpl_t, s->fft_size);
s->out = AUBIO_ARRAY(smpl_t, s->fft_size);
s->spec.realp = AUBIO_ARRAY(smpl_t, s->fft_size/2);
s->spec.imagp = AUBIO_ARRAY(smpl_t, s->fft_size/2);
s->fftSetup = aubio_vDSP_create_fftsetup(s->log2fftsize, FFT_RADIX2);
+
+#elif defined HAVE_INTEL_IPP // using Intel IPP
+ const IppHintAlgorithm qualityHint = ippAlgHintAccurate; // OR ippAlgHintFast;
+ const int flags = IPP_FFT_NODIV_BY_ANY; // we're scaling manually afterwards
+ int order = aubio_power_of_two_order(winsize);
+ int sizeSpec, sizeInit, sizeBuffer;
+ IppStatus status;
+
+ if (winsize <= 4 || aubio_is_power_of_two(winsize) != 1)
+ {
+ AUBIO_ERR("intel IPP fft: can only create with sizes > 4 and power of two, requested %d,"
+ " try recompiling aubio with --enable-fftw3\n", winsize);
+ goto beach;
+ }
+
+ status = aubio_ippsFFTGetSize_R(order, flags, qualityHint,
+ &sizeSpec, &sizeInit, &sizeBuffer);
+ if (status != ippStsNoErr) {
+ AUBIO_ERR("fft: failed to initialize fft. IPP error: %d\n", status);
+ goto beach;
+ }
+ s->fft_size = s->winsize = winsize;
+ s->compspec = new_fvec(winsize);
+ s->in = AUBIO_ARRAY(smpl_t, s->winsize);
+ s->out = AUBIO_ARRAY(smpl_t, s->winsize);
+ s->memSpec = ippsMalloc_8u(sizeSpec);
+ s->memBuffer = ippsMalloc_8u(sizeBuffer);
+ if (sizeInit > 0 ) {
+ s->memInit = ippsMalloc_8u(sizeInit);
+ }
+ s->complexOut = aubio_ippsMalloc_complex(s->fft_size / 2 + 1);
+ status = aubio_ippsFFTInit_R(
+ &s->fftSpec, order, flags, qualityHint, s->memSpec, s->memInit);
+ if (status != ippStsNoErr) {
+ AUBIO_ERR("fft: failed to initialize. IPP error: %d\n", status);
+ goto beach;
+ }
+
#else // using OOURA
if (aubio_is_power_of_two(winsize) != 1) {
AUBIO_ERR("fft: can only create with sizes power of two, requested %d,"
@@ -200,9 +271,10 @@
s->ip = AUBIO_ARRAY(int , s->fft_size);
s->w = AUBIO_ARRAY(smpl_t, s->fft_size);
s->ip[0] = 0;
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
+#endif /* using OOURA */
+
return s;
+
beach:
AUBIO_FREE(s);
return NULL;
@@ -210,7 +282,6 @@
void del_aubio_fft(aubio_fft_t * s) {
/* destroy data */
- del_fvec(s->compspec);
#ifdef HAVE_FFTW3 // using FFTW3
pthread_mutex_lock(&aubio_fftw_mutex);
fftw_destroy_plan(s->pfw);
@@ -217,18 +288,26 @@
fftw_destroy_plan(s->pbw);
fftw_free(s->specdata);
pthread_mutex_unlock(&aubio_fftw_mutex);
-#else /* HAVE_FFTW3 */
-#ifdef HAVE_ACCELERATE // using ACCELERATE
+
+#elif defined HAVE_ACCELERATE // using ACCELERATE
AUBIO_FREE(s->spec.realp);
AUBIO_FREE(s->spec.imagp);
aubio_vDSP_destroy_fftsetup(s->fftSetup);
+
+#elif defined HAVE_INTEL_IPP // using Intel IPP
+ ippFree(s->memSpec);
+ ippFree(s->memInit);
+ ippFree(s->memBuffer);
+ ippFree(s->complexOut);
+
#else // using OOURA
AUBIO_FREE(s->w);
AUBIO_FREE(s->ip);
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
- AUBIO_FREE(s->out);
+#endif
+
+ del_fvec(s->compspec);
AUBIO_FREE(s->in);
+ AUBIO_FREE(s->out);
AUBIO_FREE(s);
}
@@ -251,6 +330,7 @@
#else
memcpy(s->in, input->data, s->winsize * sizeof(smpl_t));
#endif /* HAVE_MEMCPY_HACKS */
+
#ifdef HAVE_FFTW3 // using FFTW3
fftw_execute(s->pfw);
#ifdef HAVE_COMPLEX_H
@@ -265,8 +345,8 @@
compspec->data[i] = s->specdata[i];
}
#endif /* HAVE_COMPLEX_H */
-#else /* HAVE_FFTW3 */
-#ifdef HAVE_ACCELERATE // using ACCELERATE
+
+#elif defined HAVE_ACCELERATE // using ACCELERATE
// convert real data to even/odd format used in vDSP
aubio_vDSP_ctoz((aubio_DSPComplex*)s->in, 2, &s->spec, 1, s->fft_size/2);
// compute the FFT
@@ -281,6 +361,19 @@
// apply scaling
smpl_t scale = 1./2.;
aubio_vDSP_vsmul(compspec->data, 1, &scale, compspec->data, 1, s->fft_size);
+
+#elif defined HAVE_INTEL_IPP // using Intel IPP
+
+ // apply fft
+ aubio_ippsFFTFwd_RToCCS(s->in, (aubio_IppFloat*)s->complexOut, s->fftSpec, s->memBuffer);
+ // convert complex buffer to [ r0, r1, ..., rN, iN-1, .., i2, i1]
+ compspec->data[0] = s->complexOut[0].re;
+ compspec->data[s->fft_size / 2] = s->complexOut[s->fft_size / 2].re;
+ for (i = 1; i < s->fft_size / 2; i++) {
+ compspec->data[i] = s->complexOut[i].re;
+ compspec->data[s->fft_size - i] = s->complexOut[i].im;
+ }
+
#else // using OOURA
aubio_ooura_rdft(s->winsize, 1, s->in, s->ip, s->w);
compspec->data[0] = s->in[0];
@@ -289,8 +382,7 @@
compspec->data[i] = s->in[2 * i];
compspec->data[s->winsize - i] = - s->in[2 * i + 1];
}
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
+#endif /* using OOURA */
}
void aubio_fft_rdo_complex(aubio_fft_t * s, const fvec_t * compspec, fvec_t * output) {
@@ -313,8 +405,8 @@
for (i = 0; i < output->length; i++) {
output->data[i] = s->out[i]*renorm;
}
-#else /* HAVE_FFTW3 */
-#ifdef HAVE_ACCELERATE // using ACCELERATE
+
+#elif defined HAVE_ACCELERATE // using ACCELERATE
// convert from real imag [ r0, r1, ..., rN, iN-1, .., i2, i1]
// to vDSP packed format [ r0, rN, r1, i1, ..., rN-1, iN-1 ]
s->out[0] = compspec->data[0];
@@ -332,6 +424,23 @@
// apply scaling
smpl_t scale = 1.0 / s->winsize;
aubio_vDSP_vsmul(output->data, 1, &scale, output->data, 1, s->fft_size);
+
+#elif defined HAVE_INTEL_IPP // using Intel IPP
+
+ // convert from real imag [ r0, 0, ..., rN, iN-1, .., i2, i1, iN-1] to complex format
+ s->complexOut[0].re = compspec->data[0];
+ s->complexOut[0].im = 0;
+ s->complexOut[s->fft_size / 2].re = compspec->data[s->fft_size / 2];
+ s->complexOut[s->fft_size / 2].im = 0.0;
+ for (i = 1; i < s->fft_size / 2; i++) {
+ s->complexOut[i].re = compspec->data[i];
+ s->complexOut[i].im = compspec->data[s->fft_size - i];
+ }
+ // apply fft
+ aubio_ippsFFTInv_CCSToR((const aubio_IppFloat *)s->complexOut, output->data, s->fftSpec, s->memBuffer);
+ // apply scaling
+ aubio_ippsMulC(output->data, 1.0 / s->winsize, output->data, s->fft_size);
+
#else // using OOURA
smpl_t scale = 2.0 / s->winsize;
s->out[0] = compspec->data[0];
@@ -344,8 +453,7 @@
for (i=0; i < s->winsize; i++) {
output->data[i] = s->out[i] * scale;
}
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
+#endif
}
void aubio_fft_get_spectrum(const fvec_t * compspec, cvec_t * spectrum) {
@@ -365,10 +473,26 @@
} else {
spectrum->phas[0] = 0.;
}
+#if defined(HAVE_INTEL_IPP)
+ // convert from real imag [ r0, r1, ..., rN, iN-1, ..., i2, i1, i0]
+ // to [ r0, r1, ..., rN, i0, i1, i2, ..., iN-1]
+ for (i = 1; i < spectrum->length / 2; i++) {
+ ELEM_SWAP(compspec->data[compspec->length - i],
+ compspec->data[spectrum->length + i - 1]);
+ }
+ aubio_ippsAtan2(compspec->data + spectrum->length,
+ compspec->data + 1, spectrum->phas + 1, spectrum->length - 1);
+ // revert the imaginary part back again
+ for (i = 1; i < spectrum->length / 2; i++) {
+ ELEM_SWAP(compspec->data[spectrum->length + i - 1],
+ compspec->data[compspec->length - i]);
+ }
+#else
for (i=1; i < spectrum->length - 1; i++) {
spectrum->phas[i] = ATAN2(compspec->data[compspec->length-i],
compspec->data[i]);
}
+#endif
if (compspec->data[compspec->length/2] < 0) {
spectrum->phas[spectrum->length - 1] = PI;
} else {
--- a/src/wscript_build
+++ b/src/wscript_build
@@ -3,6 +3,7 @@
uselib = []
uselib += ['M']
uselib += ['FFTW3', 'FFTW3F']
+uselib += ['INTEL_IPP']
uselib += ['SAMPLERATE']
uselib += ['SNDFILE']
uselib += ['AVCODEC']
--- a/wscript
+++ b/wscript
@@ -50,6 +50,9 @@
add_option_enable_disable(ctx, 'fftw3', default = False,
help_str = 'compile with fftw3 instead of ooura',
help_disable_str = 'do not compile with fftw3')
+ add_option_enable_disable(ctx, 'intelipp', default = False,
+ help_str = 'use Intel IPP libraries (auto)',
+ help_disable_str = 'do not use Intel IPP libraries')
add_option_enable_disable(ctx, 'complex', default = False,
help_str ='compile with C99 complex',
help_disable_str = 'do not use C99 complex (default)' )
@@ -155,6 +158,10 @@
ctx.env.LINKFLAGS += ['/DEBUG', '/INCREMENTAL:NO']
# configure warnings
ctx.env.CFLAGS += ['/W4', '/D_CRT_SECURE_NO_WARNINGS']
+ # ignore "possible loss of data" warnings
+ ctx.env.CFLAGS += ['/wd4305', '/wd4244', '/wd4245', '/wd4267']
+ # ignore "unreferenced formal parameter" warnings
+ ctx.env.CFLAGS += ['/wd4100']
# set optimization level and runtime libs
if (ctx.options.build_type == "release"):
ctx.env.CFLAGS += ['/Ox']
@@ -244,7 +251,9 @@
ctx.env.cstlib_PATTERN = '%s.a'
# tell emscripten functions we want to expose
- from python.lib.gen_external import get_c_declarations, get_cpp_objects_from_c_declarations, get_all_func_names_from_lib, generate_lib_from_c_declarations
+ from python.lib.gen_external import get_c_declarations, \
+ get_cpp_objects_from_c_declarations, get_all_func_names_from_lib, \
+ generate_lib_from_c_declarations
c_decls = get_c_declarations(usedouble=False) # emscripten can't use double
objects = list(get_cpp_objects_from_c_declarations(c_decls))
# ensure that aubio structs are exported
@@ -283,6 +292,21 @@
else:
ctx.msg('Checking if complex.h is enabled', 'no')
+ # check for Intel IPP
+ if (ctx.options.enable_intelipp != False):
+ has_ipp_headers = ctx.check(header_name=['ippcore.h', 'ippvm.h', 'ipps.h'],
+ mandatory = False)
+ has_ipp_libs = ctx.check(lib=['ippcore', 'ippvm', 'ipps'],
+ uselib_store='INTEL_IPP', mandatory = False)
+ if (has_ipp_headers and has_ipp_libs):
+ ctx.msg('Checking if Intel IPP is available', 'yes')
+ ctx.define('HAVE_INTEL_IPP', 1)
+ if ctx.env.CC_NAME == 'msvc':
+ # force linking multi-threaded static IPP libraries on Windows with msvc
+ ctx.define('_IPP_SEQUENTIAL_STATIC', 1)
+ else:
+ ctx.msg('Checking if Intel IPP is available', 'no')
+
# check for fftw3
if (ctx.options.enable_fftw3 != False or ctx.options.enable_fftw3f != False):
# one of fftwf or fftw3f
@@ -306,7 +330,7 @@
mandatory = ctx.options.enable_fftw3)
ctx.define('HAVE_FFTW3', 1)
- # fftw not enabled, use vDSP or ooura
+ # fftw not enabled, use vDSP, intelIPP or ooura
if 'HAVE_FFTW3F' in ctx.env.define_key:
ctx.msg('Checking for FFT implementation', 'fftw3f')
elif 'HAVE_FFTW3' in ctx.env.define_key:
@@ -313,6 +337,8 @@
ctx.msg('Checking for FFT implementation', 'fftw3')
elif 'HAVE_ACCELERATE' in ctx.env.define_key:
ctx.msg('Checking for FFT implementation', 'vDSP')
+ elif 'HAVE_INTEL_IPP' in ctx.env.define_key:
+ ctx.msg('Checking for FFT implementation', 'Intel IPP')
else:
ctx.msg('Checking for FFT implementation', 'ooura')