shithub: opus

--- a/Makefile.am

+++ b/Makefile.am

@@ -39,7 +39,10 @@

 if HAVE_SSE4_1

 SILK_SOURCES += $(SILK_SOURCES_SSE4_1)

 endif

+if HAVE_AVX2

+SILK_SOURCES += $(SILK_SOURCES_FLOAT_AVX2)

 endif

+endif

 if DISABLE_FLOAT_API

 else

@@ -427,6 +430,7 @@

 if HAVE_AVX2

 AVX2_OBJ = $(CELT_SOURCES_AVX2:.c=.lo) \

            $(SILK_SOURCES_AVX2:.c=.lo) \

+           $(SILK_SOURCES_FLOAT_AVX2:.c=.lo) \

            $(DNN_SOURCES_AVX2:.c=.lo)

 $(AVX2_OBJ): CFLAGS += $(OPUS_X86_AVX2_CFLAGS)

 endif

--- a/silk/float/SigProc_FLP.h

+++ b/silk/float/SigProc_FLP.h

@@ -30,6 +30,7 @@

 #include "SigProc_FIX.h"

 #include "float_cast.h"

+#include "main.h"

 #include <math.h>

 #ifdef  __cplusplus

@@ -124,11 +125,16 @@

);

 /* inner product of two silk_float arrays, with result as double */

-double silk_inner_product_FLP(

+double silk_inner_product_FLP_c(

     const silk_float    *data1,

     const silk_float    *data2,

     opus_int            dataSize

);

+#ifndef OVERRIDE_inner_product_FLP

+#define silk_inner_product_FLP(data1, data2, dataSize) silk_inner_product_FLP_c(data1, data2, dataSize)

+#endif

 /* sum of squares of a silk_float array, with result as double */

 double silk_energy_FLP(

--- a/silk/float/inner_product_FLP.c

+++ b/silk/float/inner_product_FLP.c

@@ -32,7 +32,7 @@

 #include "SigProc_FLP.h"

 /* inner product of two silk_float arrays, with result as double */

-double silk_inner_product_FLP(

+double silk_inner_product_FLP_c(

     const silk_float    *data1,

     const silk_float    *data2,

     opus_int            dataSize

--- a/silk/float/x86/inner_product_FLP_avx2.c

+++ b/silk/float/x86/inner_product_FLP_avx2.c

@@ -35,7 +35,7 @@

 /* inner product of two silk_float arrays, with result as double */

-double silk_inner_product_FLP(

+double silk_inner_product_FLP_avx2(

     const silk_float    *data1,

     const silk_float    *data2,

     opus_int            dataSize

--- a/silk/x86/main_sse.h

+++ b/silk/x86/main_sse.h

@@ -269,5 +269,22 @@

 #  endif

+double silk_inner_product_FLP_avx2(

+    const silk_float    *data1,

+    const silk_float    *data2,

+    opus_int            dataSize

+);

+#if defined (OPUS_X86_PRESUME_AVX2)

+#define OVERRIDE_inner_product_FLP

+#define silk_inner_product_FLP(data1, data2, dataSize) silk_inner_product_FLP_avx2(data1, data2, dataSize)

+#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_X86_MAY_HAVE_AVX2)

+/*#define OVERRIDE_inner_product_FLP*/

+#endif

 # endif

 #endif

--- a/silk_sources.mk

+++ b/silk_sources.mk

@@ -159,3 +159,6 @@

 silk/float/scale_vector_FLP.c \

 silk/float/schur_FLP.c \

 silk/float/sort_FLP.c

+SILK_SOURCES_FLOAT_AVX2 = \

+silk/float/x86/inner_product_FLP_avx2.c

--

⑨