ref: fcecf9970b242dfa73213e6d00ce9d57979e5fb0
parent: 48a9da08807277e3e78cdbffff0da0a2fc7384da
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Fri Mar 8 18:57:12 EST 2024
Remove the use of __m128i_u entirely It's just an internal gcc/clang type
--- a/configure.ac
+++ b/configure.ac
@@ -699,7 +699,7 @@
mtest = _mm256_fmadd_ps(mtest, mtest, mtest);
mtest1 = _mm256_set_m128i(_mm_loadu_si64(utest), _mm_loadu_si64(utest));
mtest2 =
- _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i_u *)utest));
+ _mm256_cvtepi16_epi32(_mm_loadu_si128(utest));
return _mm256_extract_epi16(_mm256_xor_si256(
_mm256_xor_si256(mtest1, mtest2), _mm256_cvttps_epi32(mtest)), 0);
]]
--- a/silk/x86/NSQ_del_dec_avx2.c
+++ b/silk/x86/NSQ_del_dec_avx2.c
@@ -73,7 +73,6 @@
/* Intrinsics not defined on MSVC */
#ifdef _MSC_VER
#include <Intsafe.h>
-#define __m128i_u __m128i
static inline int __builtin_sadd_overflow(opus_int32 a, opus_int32 b, opus_int32* res)
{*res = a+b;
@@ -959,7 +958,7 @@
{__m256i x = _mm256_cvtepi16_epi64(_mm_loadu_si64(&x16[i]));
x = _mm256_slli_epi64(_mm256_mul_epi32(x, _mm256_set1_epi32(inv_gain_Q26)), 16);
- _mm_storeu_si128((__m128i_u*)&x_sc_Q10[i], silk_cvtepi64_epi32_high(x));
+ _mm_storeu_si128((__m128i*)&x_sc_Q10[i], silk_cvtepi64_epi32_high(x));
}
/* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
@@ -985,8 +984,8 @@
/* Scale long-term shaping state */
for (i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i+=4)
{- __m128i_u* p = (__m128i_u*)&NSQ->sLTP_shp_Q14[i];
- _mm_storeu_si128(p, silk_mm_smulww_epi32(_mm_loadu_si128(p), gain_adj_Q16));
+ opus_int32 *p = &NSQ->sLTP_shp_Q14[i];
+ _mm_storeu_si128((__m128i*)p, silk_mm_smulww_epi32(_mm_loadu_si128((__m128i*)p), gain_adj_Q16));
}
/* Scale long-term prediction state */
@@ -1041,13 +1040,13 @@
/* Allowing wrap around so that two wraps can cancel each other. The rare
cases where the result wraps around can only be triggered by invalid streams*/
- __m256i in_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i_u*)&in_ptr[-8]));
- __m256i B_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i_u*)& B[0]));
+ __m256i in_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i*)&in_ptr[-8]));
+ __m256i B_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i*)& B[0]));
__m256i sum = _mm256_mullo_epi32(in_v, silk_mm256_reverse_epi32(B_v));
if (order > 10)
{- in_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i_u*)&in_ptr[-16]));
- B_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i_u*)&B [8]));
+ in_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i*)&in_ptr[-16]));
+ B_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i*)&B [8]));
B_v = silk_mm256_reverse_epi32(B_v);
}
else
--
⑨