shithub: opus

--- a/celt/arch.h

+++ b/celt/arch.h

@@ -330,6 +330,8 @@

 #define SUB32(a,b) ((a)-(b))

 #define ADD32_ovflw(a,b) ((a)+(b))

 #define SUB32_ovflw(a,b) ((a)-(b))

+#define PSHR32_ovflw(a,shift) (a)

 #define MULT16_16_16(a,b)     ((a)*(b))

 #define MULT16_16(a,b)     ((opus_val32)(a)*(opus_val32)(b))

 #define MAC16_16(c,a,b)     ((c)+(opus_val32)(a)*(opus_val32)(b))

--- a/celt/fixed_debug.h

+++ b/celt/fixed_debug.h

@@ -69,6 +69,8 @@

 /* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */

 /** Negate 32-bit value, ignore any overflows */

 #define NEG32_ovflw(a) (celt_mips+=2,(opus_val32)(0-(opus_uint32)(a)))

+/** 32-bit arithmetic shift right with rounding-to-nearest, ignoring overflows */

+#define PSHR32_ovflw(a,shift) (SHR32(ADD32_ovflw(a, (EXTEND32(1)<<(shift)>>1)),shift))

 static OPUS_INLINE short NEG16(int x)

--- a/celt/fixed_generic.h

+++ b/celt/fixed_generic.h

@@ -147,6 +147,8 @@

 /* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */

 /** Negate 32-bit value, ignore any overflows */

 #define NEG32_ovflw(a) ((opus_val32)(0-(opus_uint32)(a)))

+/** 32-bit arithmetic shift right with rounding-to-nearest, ignoring overflows */

+#define PSHR32_ovflw(a,shift) (SHR32(ADD32_ovflw(a, (EXTEND32(1)<<(shift)>>1)),shift))

 /** 16x16 multiplication where the result fits in 16 bits */

 #define MULT16_16_16(a,b)     ((((opus_val16)(a))*((opus_val16)(b))))

--- a/celt/mdct.c

+++ b/celt/mdct.c

@@ -330,8 +330,8 @@

          t0 = t[i];

          t1 = t[N4+i];

          /* We'd scale up by 2 here, but instead it's done when mixing the windows */

-         yr = PSHR32(ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)), IMDCT_HEADROOM);

-         yi = PSHR32(SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)), IMDCT_HEADROOM);

+         yr = PSHR32_ovflw(ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)), IMDCT_HEADROOM);

+         yi = PSHR32_ovflw(SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)), IMDCT_HEADROOM);

          /* We swap real and imag because we're using an FFT instead of an IFFT. */

          re = yp1[1];

          im = yp1[0];

@@ -341,8 +341,8 @@

          t0 = t[(N4-i-1)];

          t1 = t[(N2-i-1)];

          /* We'd scale up by 2 here, but instead it's done when mixing the windows */

-         yr = PSHR32(ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)), IMDCT_HEADROOM);

-         yi = PSHR32(SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)), IMDCT_HEADROOM);

+         yr = PSHR32_ovflw(ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)), IMDCT_HEADROOM);

+         yi = PSHR32_ovflw(SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)), IMDCT_HEADROOM);

          yp1[0] = yr;

          yp0[1] = yi;

          yp0 += 2;

--

⑨