shithub: opus

--- a/celt/arm/arm_celt_map.c

+++ b/celt/arm/arm_celt_map.c

@@ -1,5 +1,6 @@

 /* Copyright (c) 2010 Xiph.Org Foundation

- * Copyright (c) 2013 Parrot */

+ * Copyright (c) 2013 Parrot

+ * Copyright (c) 2024 Arm Limited */

/*

    Redistribution and use in source and binary forms, with or without

    modification, are permitted provided that the following conditions

@@ -29,11 +30,24 @@

 #include "config.h"

 #endif

-#include "pitch.h"

 #include "kiss_fft.h"

+#include "mathops.h"

 #include "mdct.h"

+#include "pitch.h"

 #if defined(OPUS_HAVE_RTCD)

+# if !defined(DISABLE_FLOAT_API)

+#  if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)

+void (*const CELT_FLOAT2INT16_IMPL[OPUS_ARCHMASK+1])(const float * OPUS_RESTRICT in, short * OPUS_RESTRICT out, int cnt) = {

+  celt_float2int16_c,   /* ARMv4 */

+  celt_float2int16_c,   /* EDSP */

+  celt_float2int16_c,   /* Media */

+  celt_float2int16_neon,/* NEON */

+  celt_float2int16_neon /* DOTPROD */

+};

+#  endif

+# endif

 # if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)

 opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y, int N) = {

--- a/celt/arm/celt_neon_intr.c

+++ b/celt/arm/celt_neon_intr.c

@@ -1,4 +1,5 @@

 /* Copyright (c) 2014-2015 Xiph.Org Foundation

+   Copyright (c) 2024 Arm Limited

    Written by Viswanath Puttagunta */

/**

    @file celt_neon_intr.c

@@ -35,7 +36,57 @@

 #endif

 #include <arm_neon.h>

+#include "../float_cast.h"

+#include "../mathops.h"

 #include "../pitch.h"

+#if defined(OPUS_CHECK_ASM)

+#include <stdlib.h>

+#endif

+#if !defined(DISABLE_FLOAT_API) && defined(OPUS_ARM_MAY_HAVE_NEON_INTR)

+void celt_float2int16_neon(const float * OPUS_RESTRICT in, short * OPUS_RESTRICT out, int cnt)

+{

+   int i = 0;

+#if defined(__ARM_NEON)

+   const int BLOCK_SIZE = 16;

+   const int blockedSize = cnt / BLOCK_SIZE * BLOCK_SIZE;

+   for (; i < blockedSize; i += BLOCK_SIZE)

+   {

+      float32x4_t orig_a = vld1q_f32(&in[i +  0]);

+      float32x4_t orig_b = vld1q_f32(&in[i +  4]);

+      float32x4_t orig_c = vld1q_f32(&in[i +  8]);

+      float32x4_t orig_d = vld1q_f32(&in[i + 12]);

+      int16x4_t asShort_a = vqmovn_s32(vroundf(vmulq_n_f32(orig_a, CELT_SIG_SCALE)));

+      int16x4_t asShort_b = vqmovn_s32(vroundf(vmulq_n_f32(orig_b, CELT_SIG_SCALE)));

+      int16x4_t asShort_c = vqmovn_s32(vroundf(vmulq_n_f32(orig_c, CELT_SIG_SCALE)));

+      int16x4_t asShort_d = vqmovn_s32(vroundf(vmulq_n_f32(orig_d, CELT_SIG_SCALE)));

+      vst1_s16(&out[i +  0], asShort_a);

+      vst1_s16(&out[i +  4], asShort_b);

+      vst1_s16(&out[i +  8], asShort_c);

+      vst1_s16(&out[i + 12], asShort_d);

+# if defined(OPUS_CHECK_ASM)

+      short out_c[BLOCK_SIZE];

+      int j;

+      for(j = 0; j < BLOCK_SIZE; j++)

+      {

+         out_c[j] = FLOAT2INT16(in[i + j]);

+         celt_assert(abs((out_c[j] - out[i + j])) <= 1);

+      }

+# endif

+   }

+#endif

+   for (; i < cnt; i++)

+   {

+      out[i] = FLOAT2INT16(in[i]);

+   }

+}

+#endif

 #if defined(FIXED_POINT)

 #include <string.h>

--- /dev/null

+++ b/celt/arm/mathops_arm.h

@@ -1,0 +1,65 @@

+/* Copyright (c) 2024 Arm Limited */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#if !defined(MATHOPS_ARM_H)

+# define MATHOPS_ARM_H

+#include "armcpu.h"

+#include "cpu_support.h"

+#include "opus_defines.h"

+# if !defined(DISABLE_FLOAT_API) && defined(OPUS_ARM_MAY_HAVE_NEON_INTR)

+#include <arm_neon.h>

+static inline int32x4_t vroundf(float32x4_t x)

+{

+#  if defined(__aarch64__) || (defined(__ARM_ARCH) && __ARM_ARCH >= 8)

+    return vcvtaq_s32_f32(x);

+#  else

+    uint32x4_t sign = vandq_u32(vreinterpretq_u32_f32(x), vdupq_n_u32(0x80000000));

+    uint32x4_t bias = vdupq_n_u32(0x3F000000);

+    return vcvtq_s32_f32(vaddq_f32(x, vreinterpretq_f32_u32(vorrq_u32(bias, sign))));

+#  endif

+}

+void celt_float2int16_neon(const float * OPUS_RESTRICT in, short * OPUS_RESTRICT out, int cnt);

+#  if defined(OPUS_HAVE_RTCD) && \

+    (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))

+extern void

+(*const CELT_FLOAT2INT16_IMPL[OPUS_ARCHMASK+1])(const float * OPUS_RESTRICT in, short * OPUS_RESTRICT out, int cnt);

+#   define OVERRIDE_FLOAT2INT16 (1)

+#   define celt_float2int16(in, out, cnt, arch) \

+      ((*CELT_FLOAT2INT16_IMPL[(arch)&OPUS_ARCHMASK])(in, out, cnt))

+#  elif defined(OPUS_ARM_PRESUME_NEON_INTR)

+#   define OVERRIDE_FLOAT2INT16 (1)

+#   define celt_float2int16(in, out, cnt, arch) ((void)(arch), celt_float2int16_neon(in, out, cnt))

+#  endif

+# endif

+#endif /* MATHOPS_ARM_H */

--- a/celt/float_cast.h

+++ b/celt/float_cast.h

@@ -98,6 +98,13 @@

                 return intgr ;

+#elif defined(__aarch64__)

+        #include <arm_neon.h>

+        static OPUS_INLINE opus_int32 float2int(float flt)

+        {

+                return vcvtns_s32_f32(flt);

+        }

 #elif defined(HAVE_LRINTF) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L

--- a/celt/mathops.c

+++ b/celt/mathops.c

@@ -1,6 +1,7 @@

 /* Copyright (c) 2002-2008 Jean-Marc Valin

    Copyright (c) 2007-2008 CSIRO

    Copyright (c) 2007-2009 Xiph.Org Foundation

+   Copyright (c) 2024 Arm Limited

    Written by Jean-Marc Valin */

/**

    @file mathops.h

@@ -35,6 +36,7 @@

 #include "config.h"

 #endif

+#include "float_cast.h"

 #include "mathops.h"

 /*Compute floor(sqrt(_val)) with exact arithmetic.

@@ -215,3 +217,16 @@

 #endif

+#ifndef DISABLE_FLOAT_API

+void celt_float2int16_c(const float * OPUS_RESTRICT in, short * OPUS_RESTRICT out, int cnt)

+{

+   int i;

+   for (i = 0; i < cnt; i++)

+   {

+      out[i] = FLOAT2INT16(in[i]);

+   }

+}

+#endif /* DISABLE_FLOAT_API */

--- a/celt/mathops.h

+++ b/celt/mathops.h

@@ -1,6 +1,7 @@

 /* Copyright (c) 2002-2008 Jean-Marc Valin

    Copyright (c) 2007-2008 CSIRO

    Copyright (c) 2007-2009 Xiph.Org Foundation

+   Copyright (c) 2024 Arm Limited

    Written by Jean-Marc Valin, and Yunho Huh */

/**

    @file mathops.h

@@ -38,6 +39,10 @@

 #include "entcode.h"

 #include "os_support.h"

+#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)

+#include "arm/mathops_arm.h"

+#endif

 #define PI 3.141592653f

 /* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */

@@ -476,4 +481,15 @@

 #endif /* FIXED_POINT */

+#ifndef DISABLE_FLOAT_API

+void celt_float2int16_c(const float * OPUS_RESTRICT in, short * OPUS_RESTRICT out, int cnt);

+#ifndef OVERRIDE_FLOAT2INT16

+#define celt_float2int16(in, out, cnt, arch) ((void)(arch), celt_float2int16_c(in, out, cnt))

+#endif

+#endif /* DISABLE_FLOAT_API */

 #endif /* MATHOPS_H */

--- a/celt/tests/test_unit_mathops.c

+++ b/celt/tests/test_unit_mathops.c

@@ -1,5 +1,6 @@

 /* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation,

                            Gregory Maxwell

+   Copyright (c) 2024 Arm Limited

    Written by Jean-Marc Valin, Gregory Maxwell, Timothy B. Terriberry,

    and Yunho Huh */

/*

@@ -37,8 +38,10 @@

 #include <stdio.h>

 #include <math.h>

-#include "mathops.h"

 #include "bands.h"

+#include "cpu_support.h"

+#include "float_cast.h"

+#include "mathops.h"

 #ifdef FIXED_POINT

 #define WORD "%d"

@@ -351,8 +354,94 @@

 #endif

+#ifndef DISABLE_FLOAT_API

+void testcelt_float2int16(int use_ref_impl, int buffer_size)

+{

+#define MAX_BUFFER_SIZE 2080

+   int i, cnt;

+   float floatsToConvert[MAX_BUFFER_SIZE];

+   short results[MAX_BUFFER_SIZE] = { 0 };

+   float scaleInt16RangeTo01;

+   celt_assert(buffer_size <= MAX_BUFFER_SIZE);

+   scaleInt16RangeTo01 = 1.f / 32768.f;

+   cnt = 0;

+   while (cnt + 15 < buffer_size && cnt < buffer_size / 2)

+   {

+      floatsToConvert[cnt++] = 77777.0f * scaleInt16RangeTo01;

+      floatsToConvert[cnt++] = 33000.0f * scaleInt16RangeTo01;

+      floatsToConvert[cnt++] = 32768.0f * scaleInt16RangeTo01;

+      floatsToConvert[cnt++] = 32767.4f * scaleInt16RangeTo01;

+      floatsToConvert[cnt++] = 32766.6f * scaleInt16RangeTo01;

+      floatsToConvert[cnt++] = .501 * scaleInt16RangeTo01;

+      floatsToConvert[cnt++] = .499f * scaleInt16RangeTo01;

+      floatsToConvert[cnt++] = .0f;

+      floatsToConvert[cnt++] = -.499f * scaleInt16RangeTo01;

+      floatsToConvert[cnt++] = -.501f * scaleInt16RangeTo01;

+      floatsToConvert[cnt++] = -32767.6f * scaleInt16RangeTo01;

+      floatsToConvert[cnt++] = -32768.4f * scaleInt16RangeTo01;

+      floatsToConvert[cnt++] = -32769.0f * scaleInt16RangeTo01;

+      floatsToConvert[cnt++] = -33000.0f * scaleInt16RangeTo01;

+      floatsToConvert[cnt++] = -77777.0f * scaleInt16RangeTo01;

+      celt_assert(cnt < buffer_size);

+   }

+   while (cnt < buffer_size)

+   {

+      float inInt16Range = cnt * 7 + .5;

+      inInt16Range += (cnt & 0x01) ? .1 : -.1;

+      inInt16Range *= (cnt & 0x02) ? 1 : -1;

+      floatsToConvert[cnt++] = inInt16Range * scaleInt16RangeTo01;

+   }

+   for (i = 0; i < MAX_BUFFER_SIZE; ++i)

+   {

+      results[i] = 42;

+   }

+   if (use_ref_impl)

+   {

+      celt_float2int16_c(floatsToConvert, results, cnt);

+   } else {

+      celt_float2int16(floatsToConvert, results, cnt, opus_select_arch());

+   }

+   for (i = 0; i < cnt; ++i)

+   {

+      const float expected = FLOAT2INT16(floatsToConvert[i]);

+      if (results[i] != expected)

+      {

+         fprintf (stderr, "testcelt_float2int16 failed: celt_float2int16 converted %f (index: %d) to %d (x*32768=%f, expected: %d, cnt: %d, ref: %d)\n",

+               floatsToConvert[i], i, (int)results[i], floatsToConvert[i] * 32768.0f, (int)expected, buffer_size, use_ref_impl);

+         ret = 1;

+      }

+   }

+   for (i = cnt; i < MAX_BUFFER_SIZE; ++i)

+   {

+      if (results[i] != 42)

+      {

+         fprintf (stderr, "testcelt_float2int16 failed: buffer overflow (cnt: %d, ref: %d)\n", buffer_size, use_ref_impl);

+         ret = 1;

+         break;

+      }

+   }

+#undef MAX_BUFFER_SIZE

+}

+#endif

 int main(void)

+   int i;

+   int use_ref_impl[2] = { 0, 1 };

    testbitexactcos();

    testbitexactlog2tan();

    testdiv();

@@ -364,6 +453,15 @@

    testilog2();

    testlog2_db();

    testexp2_db();

+#endif

+#ifndef DISABLE_FLOAT_API

+   for (i = 0; i <= 1; ++i)

+   {

+      testcelt_float2int16(use_ref_impl[i], 1);

+      testcelt_float2int16(use_ref_impl[i], 32);

+      testcelt_float2int16(use_ref_impl[i], 127);

+      testcelt_float2int16(use_ref_impl[i], 1031);

+   }

 #endif

    return ret;

--- a/celt_headers.mk

+++ b/celt_headers.mk

@@ -39,6 +39,7 @@

 celt/arm/fixed_arm64.h \

 celt/arm/kiss_fft_armv4.h \

 celt/arm/kiss_fft_armv5e.h \

+celt/arm/mathops_arm.h \

 celt/arm/pitch_arm.h \

 celt/arm/fft_arm.h \

 celt/arm/mdct_arm.h \

--- a/src/opus_decoder.c

+++ b/src/opus_decoder.c

@@ -1,4 +1,5 @@

 /* Copyright (c) 2010 Xiph.Org Foundation, Skype Limited

+   Copyright (c) 2024 Arm Limited

    Written by Jean-Marc Valin and Koen Vos */

/*

    Redistribution and use in source and binary forms, with or without

@@ -835,7 +836,7 @@

       opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec)

        VARDECL(opus_res, out);

-       int ret, i;

+       int ret;

        int nb_samples;

        ALLOC_STACK;

@@ -858,8 +859,13 @@

        ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, OPTIONAL_CLIP, NULL, 0);

        if (ret > 0)

+# if defined(FIXED_POINT)

+          int i;

           for (i=0;i<ret*st->channels;i++)

              pcm[i] = RES2INT16(out[i]);

+# else

+          celt_float2int16(out, pcm, ret*st->channels, st->arch);

+# endif

        RESTORE_STACK;

        return ret;

--

⑨