shithub: opus

Download patch

ref: 12a356e431d1b2d3531d3d73de330bf9ee9be48b
parent: ec64b3c5b7abd621dfddee6b4cc115298e5d6803
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Sat Dec 18 13:07:59 EST 2021

Disable dangerous SSE 4.1 intrinsic optimizations

These could result in 16-byte-aligned loads on unaligned data, causing
a segfault.

--- a/celt/x86/x86cpu.h
+++ b/celt/x86/x86cpu.h
@@ -56,40 +56,10 @@
 int opus_select_arch(void);
 # endif
 
-/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32()
-  or _mm_cvtepi16_epi32() when optimizations are disabled, even though the
-  actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory
-  reference, these require 16-byte alignment and load a full 16 bytes (instead
-  of 4 or 8), possibly reading out of bounds.
-
-  We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or
-  _mm_loadl_epi64(), which should have the same semantics as an m32 or m64
-  reference in the PMOVSXWD instruction itself, but gcc is not smart enough to
-  optimize this out when optimizations ARE enabled.
-
-  Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32
-  (which is fair, since technically the compiler is always allowed to do the
-  dereference before invoking the function implementing the intrinsic).
-  However, it is smart enough to eliminate the extra MOVD instruction.
-  For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out
-  the extra MOVQ if it's specified explicitly */
-
-# if defined(__clang__) || !defined(__OPTIMIZE__)
-#  define OP_CVTEPI8_EPI32_M32(x) \
+#define OP_CVTEPI8_EPI32_M32(x) \
  (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x))))
-# else
-#  define OP_CVTEPI8_EPI32_M32(x) \
- (_mm_cvtepi8_epi32(*(__m128i *)(x)))
-#endif
 
-/* similar reasoning about the instruction sequence as in the 32-bit macro above,
- */
-# if defined(__clang__) || !defined(__OPTIMIZE__)
-#  define OP_CVTEPI16_EPI32_M64(x) \
+#define OP_CVTEPI16_EPI32_M64(x) \
  (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
-# else
-#  define OP_CVTEPI16_EPI32_M64(x) \
- (_mm_cvtepi16_epi32(*(__m128i *)(x)))
-# endif
 
 #endif