ref: 9a2c0e34cad4d6f81103a8b6560fef69e8cd4047
parent: 31a8028e9786fd1f463e797f979feb7df3a96947
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Wed Jun 14 10:26:20 EDT 2023
Detect AVX/AVX2/FMA instead of just AVX
--- a/Makefile.am
+++ b/Makefile.am
@@ -52,8 +52,8 @@
if HAVE_SSE4_1
CELT_SOURCES += $(CELT_SOURCES_SSE4_1)
endif
-if HAVE_AVX
-CELT_SOURCES += $(CELT_SOURCES_AVX)
+if HAVE_AVX2
+CELT_SOURCES += $(CELT_SOURCES_AVX2)
endif
endif
@@ -395,9 +395,9 @@
$(SSE4_1_OBJ): CFLAGS += $(OPUS_X86_SSE4_1_CFLAGS)
endif
-if HAVE_AVX
-AVX_OBJ = $(CELT_SOURCES_AVX:.c=.lo)
-$(AVX_OBJ): CFLAGS += $(OPUS_X86_AVX_CFLAGS)
+if HAVE_AVX2
+AVX2_OBJ = $(CELT_SOURCES_AVX2:.c=.lo)
+$(AVX2_OBJ): CFLAGS += $(OPUS_X86_AVX2_CFLAGS)
endif
if HAVE_ARM_NEON_INTR
--- a/celt/cpu_support.h
+++ b/celt/cpu_support.h
@@ -47,7 +47,7 @@
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
- (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
+ (defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
#include "x86/x86cpu.h"
/* We currently support 5 x86 variants:
--- a/celt/x86/x86cpu.c
+++ b/celt/x86/x86cpu.c
@@ -39,7 +39,7 @@
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
- (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
+ (defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
#if defined(_MSC_VER)
@@ -105,7 +105,7 @@
int HW_SSE2;
int HW_SSE41;
/* SIMD: 256-bit */
- int HW_AVX;
+ int HW_AVX2;
} CPU_Feature;
static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
@@ -121,13 +121,19 @@
cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0;
cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0;
cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0;
- cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0;
+ cpu_feature->HW_AVX2 = (info[2] & (1 << 28)) != 0 && (info[2] & (1 << 12)) != 0;
+ if (cpu_feature->HW_AVX2 && nIds >= 7) {
+ cpuid(info, 7);
+ cpu_feature->HW_AVX2 = cpu_feature->HW_AVX2 && (info[1] & (1 << 5)) != 0;
+ } else {
+ cpu_feature->HW_AVX2 = 0;
+ }
}
else {
cpu_feature->HW_SSE = 0;
cpu_feature->HW_SSE2 = 0;
cpu_feature->HW_SSE41 = 0;
- cpu_feature->HW_AVX = 0;
+ cpu_feature->HW_AVX2 = 0;
}
}
@@ -157,7 +163,7 @@
}
arch++;
- if (!cpu_feature.HW_AVX)
+ if (!cpu_feature.HW_AVX2)
{
return arch;
}
--- a/celt/x86/x86cpu.h
+++ b/celt/x86/x86cpu.h
@@ -46,10 +46,10 @@
# define MAY_HAVE_SSE4_1(name) name ## _c
# endif
-# if defined(OPUS_X86_MAY_HAVE_AVX)
-# define MAY_HAVE_AVX(name) name ## _avx
+# if defined(OPUS_X86_MAY_HAVE_AVX2)
+# define MAY_HAVE_AVX2(name) name ## _avx
# else
-# define MAY_HAVE_AVX(name) name ## _c
+# define MAY_HAVE_AVX2(name) name ## _c
# endif
# if defined(OPUS_HAVE_RTCD)
--- a/celt_sources.mk
+++ b/celt_sources.mk
@@ -33,7 +33,7 @@
celt/x86/celt_lpc_sse4_1.c \
celt/x86/pitch_sse4_1.c
-CELT_SOURCES_AVX = \
+CELT_SOURCES_AVX2 = \
celt/x86/pitch_avx.c
CELT_SOURCES_ARM_RTCD = \
--- a/configure.ac
+++ b/configure.ac
@@ -368,12 +368,12 @@
AM_CONDITIONAL([HAVE_SSE], [false])
AM_CONDITIONAL([HAVE_SSE2], [false])
AM_CONDITIONAL([HAVE_SSE4_1], [false])
-AM_CONDITIONAL([HAVE_AVX], [false])
+AM_CONDITIONAL([HAVE_AVX2], [false])
m4_define([DEFAULT_X86_SSE_CFLAGS], [-msse])
m4_define([DEFAULT_X86_SSE2_CFLAGS], [-msse2])
m4_define([DEFAULT_X86_SSE4_1_CFLAGS], [-msse4.1])
-m4_define([DEFAULT_X86_AVX_CFLAGS], [-mavx])
+m4_define([DEFAULT_X86_AVX2_CFLAGS], [-mavx -mfma -avx2])
m4_define([DEFAULT_ARM_NEON_INTR_CFLAGS], [-mfpu=neon])
# With GCC on ARM32 softfp architectures (e.g. Android, or older Ubuntu) you need to specify
# -mfloat-abi=softfp for -mfpu=neon to work. However, on ARM32 hardfp architectures (e.g. newer Ubuntu),
@@ -390,13 +390,13 @@
AC_ARG_VAR([X86_SSE_CFLAGS], [C compiler flags to compile SSE intrinsics @<:@default=]DEFAULT_X86_SSE_CFLAGS[@:>@])
AC_ARG_VAR([X86_SSE2_CFLAGS], [C compiler flags to compile SSE2 intrinsics @<:@default=]DEFAULT_X86_SSE2_CFLAGS[@:>@])
AC_ARG_VAR([X86_SSE4_1_CFLAGS], [C compiler flags to compile SSE4.1 intrinsics @<:@default=]DEFAULT_X86_SSE4_1_CFLAGS[@:>@])
-AC_ARG_VAR([X86_AVX_CFLAGS], [C compiler flags to compile AVX intrinsics @<:@default=]DEFAULT_X86_AVX_CFLAGS[@:>@])
+AC_ARG_VAR([X86_AVX2_CFLAGS], [C compiler flags to compile AVX2 intrinsics @<:@default=]DEFAULT_X86_AVX2_CFLAGS[@:>@])
AC_ARG_VAR([ARM_NEON_INTR_CFLAGS], [C compiler flags to compile ARM NEON intrinsics @<:@default=]DEFAULT_ARM_NEON_INTR_CFLAGS / DEFAULT_ARM_NEON_SOFTFP_INTR_CFLAGS[@:>@])
AS_VAR_SET_IF([X86_SSE_CFLAGS], [], [AS_VAR_SET([X86_SSE_CFLAGS], "DEFAULT_X86_SSE_CFLAGS")])
AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86_SSE2_CFLAGS")])
AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")])
-AS_VAR_SET_IF([X86_AVX_CFLAGS], [], [AS_VAR_SET([X86_AVX_CFLAGS], "DEFAULT_X86_AVX_CFLAGS")])
+AS_VAR_SET_IF([X86_AVX2_CFLAGS], [], [AS_VAR_SET([X86_AVX2_CFLAGS], "DEFAULT_X86_AVX2_CFLAGS")])
AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])])
AC_DEFUN([OPUS_PATH_NE10],
@@ -617,10 +617,10 @@
]
)
OPUS_CHECK_INTRINSICS(
- [AVX],
- [$X86_AVX_CFLAGS],
- [OPUS_X86_MAY_HAVE_AVX],
- [OPUS_X86_PRESUME_AVX],
+ [AVX2],
+ [$X86_AVX2_CFLAGS],
+ [OPUS_X86_MAY_HAVE_AVX2],
+ [OPUS_X86_PRESUME_AVX2],
[[#include <immintrin.h>
#include <time.h>
]],
@@ -631,10 +631,10 @@
return _mm_cvtss_si32(_mm256_extractf128_ps(mtest, 0));
]]
)
- AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1" && test x"$OPUS_X86_PRESUME_AVX" != x"1"],
+ AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1" && test x"$OPUS_X86_PRESUME_AVX2" != x"1"],
[
- OPUS_X86_AVX_CFLAGS="$X86_AVX_CFLAGS"
- AC_SUBST([OPUS_X86_AVX_CFLAGS])
+ OPUS_X86_AVX2_CFLAGS="$X86_AVX2_CFLAGS"
+ AC_SUBST([OPUS_X86_AVX2_CFLAGS])
]
)
AS_IF([test x"$rtcd_support" = x"no"], [rtcd_support=""])
@@ -676,17 +676,17 @@
[
AC_MSG_WARN([Compiler does not support SSE4.1 intrinsics])
])
- AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"],
+ AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1"],
[
- AC_DEFINE([OPUS_X86_MAY_HAVE_AVX], 1, [Compiler supports X86 AVX Intrinsics])
- intrinsics_support="$intrinsics_support AVX"
+ AC_DEFINE([OPUS_X86_MAY_HAVE_AVX2], 1, [Compiler supports X86 AVX2 Intrinsics])
+ intrinsics_support="$intrinsics_support AVX2"
- AS_IF([test x"$OPUS_X86_PRESUME_AVX" = x"1"],
- [AC_DEFINE([OPUS_X86_PRESUME_AVX], 1, [Define if binary requires AVX intrinsics support])],
- [rtcd_support="$rtcd_support AVX"])
+ AS_IF([test x"$OPUS_X86_PRESUME_AVX2" = x"1"],
+ [AC_DEFINE([OPUS_X86_PRESUME_AVX2], 1, [Define if binary requires AVX2 intrinsics support])],
+ [rtcd_support="$rtcd_support AVX2"])
],
[
- AC_MSG_WARN([Compiler does not support AVX intrinsics])
+ AC_MSG_WARN([Compiler does not support AVX2 intrinsics])
])
AS_IF([test x"$intrinsics_support" = x""],
@@ -769,8 +769,8 @@
[test x"$OPUS_X86_MAY_HAVE_SSE2" = x"1"])
AM_CONDITIONAL([HAVE_SSE4_1],
[test x"$OPUS_X86_MAY_HAVE_SSE4_1" = x"1"])
-AM_CONDITIONAL([HAVE_AVX],
- [test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"])
+AM_CONDITIONAL([HAVE_AVX2],
+ [test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1"])
AM_CONDITIONAL([HAVE_RTCD],
[test x"$enable_rtcd" = x"yes" -a x"$rtcd_support" != x"no"])
--
⑨