shithub: opus

Download patch

ref: 9a2c0e34cad4d6f81103a8b6560fef69e8cd4047
parent: 31a8028e9786fd1f463e797f979feb7df3a96947
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Wed Jun 14 10:26:20 EDT 2023

Detect AVX/AVX2/FMA instead of just AVX

--- a/Makefile.am
+++ b/Makefile.am
@@ -52,8 +52,8 @@
 if HAVE_SSE4_1
 CELT_SOURCES += $(CELT_SOURCES_SSE4_1)
 endif
-if HAVE_AVX
-CELT_SOURCES += $(CELT_SOURCES_AVX)
+if HAVE_AVX2
+CELT_SOURCES += $(CELT_SOURCES_AVX2)
 endif
 endif
 
@@ -395,9 +395,9 @@
 $(SSE4_1_OBJ): CFLAGS += $(OPUS_X86_SSE4_1_CFLAGS)
 endif
 
-if HAVE_AVX
-AVX_OBJ = $(CELT_SOURCES_AVX:.c=.lo)
-$(AVX_OBJ): CFLAGS += $(OPUS_X86_AVX_CFLAGS)
+if HAVE_AVX2
+AVX2_OBJ = $(CELT_SOURCES_AVX2:.c=.lo)
+$(AVX2_OBJ): CFLAGS += $(OPUS_X86_AVX2_CFLAGS)
 endif
 
 if HAVE_ARM_NEON_INTR
--- a/celt/cpu_support.h
+++ b/celt/cpu_support.h
@@ -47,7 +47,7 @@
   ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
   (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
   (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
-  (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
+  (defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
 
 #include "x86/x86cpu.h"
 /* We currently support 5 x86 variants:
--- a/celt/x86/x86cpu.c
+++ b/celt/x86/x86cpu.c
@@ -39,7 +39,7 @@
   ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
   (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
   (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
-  (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
+  (defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
 
 #if defined(_MSC_VER)
 
@@ -105,7 +105,7 @@
     int HW_SSE2;
     int HW_SSE41;
     /*  SIMD: 256-bit */
-    int HW_AVX;
+    int HW_AVX2;
 } CPU_Feature;
 
 static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
@@ -121,13 +121,19 @@
         cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0;
         cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0;
         cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0;
-        cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0;
+        cpu_feature->HW_AVX2 = (info[2] & (1 << 28)) != 0 && (info[2] & (1 << 12)) != 0;
+        if (cpu_feature->HW_AVX2 && nIds >= 7) {
+            cpuid(info, 7);
+            cpu_feature->HW_AVX2 = cpu_feature->HW_AVX2 && (info[1] & (1 << 5)) != 0;
+        } else {
+            cpu_feature->HW_AVX2 = 0;
+        }
     }
     else {
         cpu_feature->HW_SSE = 0;
         cpu_feature->HW_SSE2 = 0;
         cpu_feature->HW_SSE41 = 0;
-        cpu_feature->HW_AVX = 0;
+        cpu_feature->HW_AVX2 = 0;
     }
 }
 
@@ -157,7 +163,7 @@
     }
     arch++;
 
-    if (!cpu_feature.HW_AVX)
+    if (!cpu_feature.HW_AVX2)
     {
         return arch;
     }
--- a/celt/x86/x86cpu.h
+++ b/celt/x86/x86cpu.h
@@ -46,10 +46,10 @@
 #  define MAY_HAVE_SSE4_1(name) name ## _c
 # endif
 
-# if defined(OPUS_X86_MAY_HAVE_AVX)
-#  define MAY_HAVE_AVX(name) name ## _avx
+# if defined(OPUS_X86_MAY_HAVE_AVX2)
+#  define MAY_HAVE_AVX2(name) name ## _avx
 # else
-#  define MAY_HAVE_AVX(name) name ## _c
+#  define MAY_HAVE_AVX2(name) name ## _c
 # endif
 
 # if defined(OPUS_HAVE_RTCD)
--- a/celt_sources.mk
+++ b/celt_sources.mk
@@ -33,7 +33,7 @@
 celt/x86/celt_lpc_sse4_1.c \
 celt/x86/pitch_sse4_1.c
 
-CELT_SOURCES_AVX = \
+CELT_SOURCES_AVX2 = \
 celt/x86/pitch_avx.c
 
 CELT_SOURCES_ARM_RTCD = \
--- a/configure.ac
+++ b/configure.ac
@@ -368,12 +368,12 @@
 AM_CONDITIONAL([HAVE_SSE], [false])
 AM_CONDITIONAL([HAVE_SSE2], [false])
 AM_CONDITIONAL([HAVE_SSE4_1], [false])
-AM_CONDITIONAL([HAVE_AVX], [false])
+AM_CONDITIONAL([HAVE_AVX2], [false])
 
 m4_define([DEFAULT_X86_SSE_CFLAGS], [-msse])
 m4_define([DEFAULT_X86_SSE2_CFLAGS], [-msse2])
 m4_define([DEFAULT_X86_SSE4_1_CFLAGS], [-msse4.1])
-m4_define([DEFAULT_X86_AVX_CFLAGS], [-mavx])
+m4_define([DEFAULT_X86_AVX2_CFLAGS], [-mavx -mfma -avx2])
 m4_define([DEFAULT_ARM_NEON_INTR_CFLAGS], [-mfpu=neon])
 # With GCC on ARM32 softfp architectures (e.g. Android, or older Ubuntu) you need to specify
 # -mfloat-abi=softfp for -mfpu=neon to work.  However, on ARM32 hardfp architectures (e.g. newer Ubuntu),
@@ -390,13 +390,13 @@
 AC_ARG_VAR([X86_SSE_CFLAGS], [C compiler flags to compile SSE intrinsics @<:@default=]DEFAULT_X86_SSE_CFLAGS[@:>@])
 AC_ARG_VAR([X86_SSE2_CFLAGS], [C compiler flags to compile SSE2 intrinsics @<:@default=]DEFAULT_X86_SSE2_CFLAGS[@:>@])
 AC_ARG_VAR([X86_SSE4_1_CFLAGS], [C compiler flags to compile SSE4.1 intrinsics @<:@default=]DEFAULT_X86_SSE4_1_CFLAGS[@:>@])
-AC_ARG_VAR([X86_AVX_CFLAGS], [C compiler flags to compile AVX intrinsics @<:@default=]DEFAULT_X86_AVX_CFLAGS[@:>@])
+AC_ARG_VAR([X86_AVX2_CFLAGS], [C compiler flags to compile AVX2 intrinsics @<:@default=]DEFAULT_X86_AVX2_CFLAGS[@:>@])
 AC_ARG_VAR([ARM_NEON_INTR_CFLAGS], [C compiler flags to compile ARM NEON intrinsics @<:@default=]DEFAULT_ARM_NEON_INTR_CFLAGS / DEFAULT_ARM_NEON_SOFTFP_INTR_CFLAGS[@:>@])
 
 AS_VAR_SET_IF([X86_SSE_CFLAGS], [], [AS_VAR_SET([X86_SSE_CFLAGS], "DEFAULT_X86_SSE_CFLAGS")])
 AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86_SSE2_CFLAGS")])
 AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")])
-AS_VAR_SET_IF([X86_AVX_CFLAGS], [], [AS_VAR_SET([X86_AVX_CFLAGS], "DEFAULT_X86_AVX_CFLAGS")])
+AS_VAR_SET_IF([X86_AVX2_CFLAGS], [], [AS_VAR_SET([X86_AVX2_CFLAGS], "DEFAULT_X86_AVX2_CFLAGS")])
 AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])])
 
 AC_DEFUN([OPUS_PATH_NE10],
@@ -617,10 +617,10 @@
           ]
       )
       OPUS_CHECK_INTRINSICS(
-         [AVX],
-         [$X86_AVX_CFLAGS],
-         [OPUS_X86_MAY_HAVE_AVX],
-         [OPUS_X86_PRESUME_AVX],
+         [AVX2],
+         [$X86_AVX2_CFLAGS],
+         [OPUS_X86_MAY_HAVE_AVX2],
+         [OPUS_X86_PRESUME_AVX2],
          [[#include <immintrin.h>
            #include <time.h>
          ]],
@@ -631,10 +631,10 @@
              return _mm_cvtss_si32(_mm256_extractf128_ps(mtest, 0));
          ]]
       )
-      AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1" && test x"$OPUS_X86_PRESUME_AVX" != x"1"],
+      AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1" && test x"$OPUS_X86_PRESUME_AVX2" != x"1"],
           [
-             OPUS_X86_AVX_CFLAGS="$X86_AVX_CFLAGS"
-             AC_SUBST([OPUS_X86_AVX_CFLAGS])
+             OPUS_X86_AVX2_CFLAGS="$X86_AVX2_CFLAGS"
+             AC_SUBST([OPUS_X86_AVX2_CFLAGS])
           ]
       )
          AS_IF([test x"$rtcd_support" = x"no"], [rtcd_support=""])
@@ -676,17 +676,17 @@
          [
             AC_MSG_WARN([Compiler does not support SSE4.1 intrinsics])
          ])
-         AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"],
+         AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1"],
          [
-            AC_DEFINE([OPUS_X86_MAY_HAVE_AVX], 1, [Compiler supports X86 AVX Intrinsics])
-            intrinsics_support="$intrinsics_support AVX"
+            AC_DEFINE([OPUS_X86_MAY_HAVE_AVX2], 1, [Compiler supports X86 AVX2 Intrinsics])
+            intrinsics_support="$intrinsics_support AVX2"
 
-            AS_IF([test x"$OPUS_X86_PRESUME_AVX" = x"1"],
-               [AC_DEFINE([OPUS_X86_PRESUME_AVX], 1, [Define if binary requires AVX intrinsics support])],
-               [rtcd_support="$rtcd_support AVX"])
+            AS_IF([test x"$OPUS_X86_PRESUME_AVX2" = x"1"],
+               [AC_DEFINE([OPUS_X86_PRESUME_AVX2], 1, [Define if binary requires AVX2 intrinsics support])],
+               [rtcd_support="$rtcd_support AVX2"])
          ],
          [
-            AC_MSG_WARN([Compiler does not support AVX intrinsics])
+            AC_MSG_WARN([Compiler does not support AVX2 intrinsics])
          ])
 
          AS_IF([test x"$intrinsics_support" = x""],
@@ -769,8 +769,8 @@
     [test x"$OPUS_X86_MAY_HAVE_SSE2" = x"1"])
 AM_CONDITIONAL([HAVE_SSE4_1],
     [test x"$OPUS_X86_MAY_HAVE_SSE4_1" = x"1"])
-AM_CONDITIONAL([HAVE_AVX],
-    [test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"])
+AM_CONDITIONAL([HAVE_AVX2],
+    [test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1"])
 
 AM_CONDITIONAL([HAVE_RTCD],
  [test x"$enable_rtcd" = x"yes" -a x"$rtcd_support" != x"no"])
--