ref: 71fb707875b95672f0cd1cb153c890eff4219720
parent: affb551e47052d5b9a0e37c681c816a6cf4159a7
author: Timothy B. Terriberry <territim@amazon.com>
date: Wed Jul 6 11:21:16 EDT 2022
Don't compile x86 cpu detection without RTCD. Also #error if RTCD is enabled without a detection method, like Arm. A number of SILK functions also still used the lookup tables, even when RTCD was disabled. Fix those, too.
--- a/celt/cpu_support.h
+++ b/celt/cpu_support.h
@@ -43,10 +43,11 @@
*/
#define OPUS_ARCHMASK 3
-#elif (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
+#elif defined(OPUS_HAVE_RTCD) && \
+ ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
- (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
+ (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
#include "x86/x86cpu.h"
/* We currently support 5 x86 variants:
--- a/celt/x86/x86cpu.c
+++ b/celt/x86/x86cpu.c
@@ -35,12 +35,12 @@
#include "pitch.h"
#include "x86cpu.h"
-#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
+#if defined(OPUS_HAVE_RTCD) && \
+ ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
- (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
+ (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
-
#if defined(_MSC_VER)
#include <intrin.h>
@@ -91,6 +91,9 @@
what we want on CPUs that don't support CPUID. */
CPUInfo[3] = CPUInfo[2] = CPUInfo[1] = CPUInfo[0] = 0;
}
+#else
+# error "Configured to use x86 RTCD, but no CPU detection method available. " \
+ "Reconfigure with --disable-rtcd (or send patches)."
#endif
}
--- a/silk/SigProc_FIX.h
+++ b/silk/SigProc_FIX.h
@@ -609,10 +609,12 @@
/* the following seems faster on x86 */
#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
-#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
+#if !defined(OVERRIDE_silk_burg_modified)
#define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
+#endif
+#if !defined(OVERRIDE_silk_inner_prod16)
#define silk_inner_prod16(inVec1, inVec2, len, arch) \
((void)(arch),silk_inner_prod16_c(inVec1, inVec2, len))
#endif
--- a/silk/x86/SigProc_FIX_sse.h
+++ b/silk/x86/SigProc_FIX_sse.h
@@ -46,10 +46,12 @@
);
# if defined(OPUS_X86_PRESUME_SSE4_1)
+
+# define OVERRIDE_silk_burg_modified
# define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
((void)(arch), silk_burg_modified_sse4_1(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
-# else
+# elif defined(OPUS_HAVE_RTCD)
extern void (*const SILK_BURG_MODIFIED_IMPL[OPUS_ARCHMASK + 1])(
opus_int32 *res_nrg, /* O Residual energy */
@@ -62,6 +64,7 @@
const opus_int D, /* I Order */
int arch /* I Run-time architecture */);
+# define OVERRIDE_silk_burg_modified
# define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
((*SILK_BURG_MODIFIED_IMPL[(arch) & OPUS_ARCHMASK])(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
@@ -76,10 +79,11 @@
# if defined(OPUS_X86_PRESUME_SSE4_1)
+# define OVERRIDE_silk_inner_prod16
# define silk_inner_prod16(inVec1, inVec2, len, arch) \
((void)(arch),silk_inner_prod16_sse4_1(inVec1, inVec2, len))
-# else
+# elif defined(OPUS_HAVE_RTCD)
extern opus_int64 (*const SILK_INNER_PROD16_IMPL[OPUS_ARCHMASK + 1])(
const opus_int16 *inVec1,
@@ -86,6 +90,7 @@
const opus_int16 *inVec2,
const opus_int len);
+# define OVERRIDE_silk_inner_prod16
# define silk_inner_prod16(inVec1, inVec2, len, arch) \
((*SILK_INNER_PROD16_IMPL[(arch) & OPUS_ARCHMASK])(inVec1, inVec2, len))
--- a/silk/x86/main_sse.h
+++ b/silk/x86/main_sse.h
@@ -34,8 +34,6 @@
# if defined(OPUS_X86_MAY_HAVE_SSE4_1)
-# define OVERRIDE_silk_VQ_WMat_EC
-
void silk_VQ_WMat_EC_sse4_1(
opus_int8 *ind, /* O index of best codebook vector */
opus_int32 *res_nrg_Q15, /* O best residual energy */
@@ -53,12 +51,13 @@
# if defined OPUS_X86_PRESUME_SSE4_1
+# define OVERRIDE_silk_VQ_WMat_EC
# define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
subfr_len, max_gain_Q7, L, arch) \
((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
subfr_len, max_gain_Q7, L))
-# else
+# elif defined(OPUS_HAVE_RTCD)
extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
opus_int8 *ind, /* O index of best codebook vector */
@@ -75,6 +74,7 @@
const opus_int L /* I number of vectors in codebook */
);
+# define OVERRIDE_silk_VQ_WMat_EC
# define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
subfr_len, max_gain_Q7, L, arch) \
((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
@@ -82,8 +82,6 @@
# endif
-# define OVERRIDE_silk_NSQ
-
void silk_NSQ_sse4_1(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
@@ -104,12 +102,13 @@
# if defined OPUS_X86_PRESUME_SSE4_1
+# define OVERRIDE_silk_NSQ
# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
-# else
+# elif defined(OPUS_HAVE_RTCD)
extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
const silk_encoder_state *psEncC, /* I Encoder State */
@@ -129,6 +128,7 @@
const opus_int LTP_scale_Q14 /* I LTP state scaling */
);
+# define OVERRIDE_silk_NSQ
# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
@@ -136,8 +136,6 @@
# endif
-# define OVERRIDE_silk_NSQ_del_dec
-
void silk_NSQ_del_dec_sse4_1(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
@@ -158,12 +156,13 @@
# if defined OPUS_X86_PRESUME_SSE4_1
+# define OVERRIDE_silk_NSQ_del_dec
# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
-# else
+# elif defined(OPUS_HAVE_RTCD)
extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
const silk_encoder_state *psEncC, /* I Encoder State */
@@ -183,6 +182,7 @@
const opus_int LTP_scale_Q14 /* I LTP state scaling */
);
+# define OVERRIDE_silk_NSQ_del_dec
# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
@@ -221,8 +221,6 @@
silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */
);
-# define OVERRIDE_silk_VAD_GetSA_Q8
-
opus_int silk_VAD_GetSA_Q8_sse4_1(
silk_encoder_state *psEnC,
const opus_int16 pIn[]
@@ -229,16 +227,19 @@
);
# if defined(OPUS_X86_PRESUME_SSE4_1)
+
+# define OVERRIDE_silk_VAD_GetSA_Q8
# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn))
-# else
+# elif defined(OPUS_HAVE_RTCD)
-# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \
- ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn))
-
extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])(
silk_encoder_state *psEnC,
const opus_int16 pIn[]);
+
+# define OVERRIDE_silk_VAD_GetSA_Q8
+# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \
+ ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn))
# endif
--- a/silk/x86/x86_silk_map.c
+++ b/silk/x86/x86_silk_map.c
@@ -35,7 +35,7 @@
#include "pitch.h"
#include "main.h"
-#if !defined(OPUS_X86_PRESUME_SSE4_1)
+#if defined(OPUS_HAVE_RTCD) && !defined(OPUS_X86_PRESUME_SSE4_1)
#if defined(FIXED_POINT)