shithub: opus

--- /dev/null

+++ b/dnn/_kiss_fft_guts.h

@@ -1,0 +1,182 @@

+/*Copyright (c) 2003-2004, Mark Borgerding

+  All rights reserved.

+  Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions are met:

+    * Redistributions of source code must retain the above copyright notice,

+       this list of conditions and the following disclaimer.

+    * Redistributions in binary form must reproduce the above copyright notice,

+       this list of conditions and the following disclaimer in the

+       documentation and/or other materials provided with the distribution.

+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

+  POSSIBILITY OF SUCH DAMAGE.*/

+#ifndef KISS_FFT_GUTS_H

+#define KISS_FFT_GUTS_H

+#define MIN(a,b) ((a)<(b) ? (a):(b))

+#define MAX(a,b) ((a)>(b) ? (a):(b))

+/* kiss_fft.h

+   defines kiss_fft_scalar as either short or a float type

+   and defines

+   typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */

+#include "kiss_fft.h"

+/*

+  Explanation of macros dealing with complex math:

+   C_MUL(m,a,b)         : m = a*b

+   C_FIXDIV( c , div )  : if a fixed point impl., c /= div. noop otherwise

+   C_SUB( res, a,b)     : res = a - b

+   C_SUBFROM( res , a)  : res -= a

+   C_ADDTO( res , a)    : res += a

+ * */

+#ifdef FIXED_POINT

+#include "arch.h"

+#define SAMP_MAX 2147483647

+#define TWID_MAX 32767

+#define TRIG_UPSCALE 1

+#define SAMP_MIN -SAMP_MAX

+#   define S_MUL(a,b) MULT16_32_Q15(b, a)

+#   define C_MUL(m,a,b) \

+      do{ (m).r = SUB32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \

+          (m).i = ADD32_ovflw(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)

+#   define C_MULC(m,a,b) \

+      do{ (m).r = ADD32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \

+          (m).i = SUB32_ovflw(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)

+#   define C_MULBYSCALAR( c, s ) \

+      do{ (c).r =  S_MUL( (c).r , s ) ;\

+          (c).i =  S_MUL( (c).i , s ) ; }while(0)

+#   define DIVSCALAR(x,k) \

+        (x) = S_MUL(  x, (TWID_MAX-((k)>>1))/(k)+1 )

+#   define C_FIXDIV(c,div) \

+        do {    DIVSCALAR( (c).r , div);  \

+                DIVSCALAR( (c).i  , div); }while (0)

+#define  C_ADD( res, a,b)\

+    do {(res).r=ADD32_ovflw((a).r,(b).r);  (res).i=ADD32_ovflw((a).i,(b).i); \

+    }while(0)

+#define  C_SUB( res, a,b)\

+    do {(res).r=SUB32_ovflw((a).r,(b).r);  (res).i=SUB32_ovflw((a).i,(b).i); \

+    }while(0)

+#define C_ADDTO( res , a)\

+    do {(res).r = ADD32_ovflw((res).r, (a).r);  (res).i = ADD32_ovflw((res).i,(a).i);\

+    }while(0)

+#define C_SUBFROM( res , a)\

+    do {(res).r = ADD32_ovflw((res).r,(a).r);  (res).i = SUB32_ovflw((res).i,(a).i); \

+    }while(0)

+#if defined(OPUS_ARM_INLINE_ASM)

+#include "arm/kiss_fft_armv4.h"

+#endif

+#if defined(OPUS_ARM_INLINE_EDSP)

+#include "arm/kiss_fft_armv5e.h"

+#endif

+#if defined(MIPSr1_ASM)

+#include "mips/kiss_fft_mipsr1.h"

+#endif

+#else  /* not FIXED_POINT*/

+#   define S_MUL(a,b) ( (a)*(b) )

+#define C_MUL(m,a,b) \

+    do{ (m).r = (a).r*(b).r - (a).i*(b).i;\

+        (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)

+#define C_MULC(m,a,b) \

+    do{ (m).r = (a).r*(b).r + (a).i*(b).i;\

+        (m).i = (a).i*(b).r - (a).r*(b).i; }while(0)

+#define C_MUL4(m,a,b) C_MUL(m,a,b)

+#   define C_FIXDIV(c,div) /* NOOP */

+#   define C_MULBYSCALAR( c, s ) \

+    do{ (c).r *= (s);\

+        (c).i *= (s); }while(0)

+#endif

+#ifndef CHECK_OVERFLOW_OP

+#  define CHECK_OVERFLOW_OP(a,op,b) /* noop */

+#endif

+#ifndef C_ADD

+#define  C_ADD( res, a,b)\

+    do { \

+            CHECK_OVERFLOW_OP((a).r,+,(b).r)\

+            CHECK_OVERFLOW_OP((a).i,+,(b).i)\

+            (res).r=(a).r+(b).r;  (res).i=(a).i+(b).i; \

+    }while(0)

+#define  C_SUB( res, a,b)\

+    do { \

+            CHECK_OVERFLOW_OP((a).r,-,(b).r)\

+            CHECK_OVERFLOW_OP((a).i,-,(b).i)\

+            (res).r=(a).r-(b).r;  (res).i=(a).i-(b).i; \

+    }while(0)

+#define C_ADDTO( res , a)\

+    do { \

+            CHECK_OVERFLOW_OP((res).r,+,(a).r)\

+            CHECK_OVERFLOW_OP((res).i,+,(a).i)\

+            (res).r += (a).r;  (res).i += (a).i;\

+    }while(0)

+#define C_SUBFROM( res , a)\

+    do {\

+            CHECK_OVERFLOW_OP((res).r,-,(a).r)\

+            CHECK_OVERFLOW_OP((res).i,-,(a).i)\

+            (res).r -= (a).r;  (res).i -= (a).i; \

+    }while(0)

+#endif /* C_ADD defined */

+#ifdef FIXED_POINT

+/*#  define KISS_FFT_COS(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))

+#  define KISS_FFT_SIN(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))*/

+#  define KISS_FFT_COS(phase)  floor(.5+TWID_MAX*cos (phase))

+#  define KISS_FFT_SIN(phase)  floor(.5+TWID_MAX*sin (phase))

+#  define HALF_OF(x) ((x)>>1)

+#elif defined(USE_SIMD)

+#  define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )

+#  define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) )

+#  define HALF_OF(x) ((x)*_mm_set1_ps(.5f))

+#else

+#  define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)

+#  define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)

+#  define HALF_OF(x) ((x)*.5f)

+#endif

+#define  kf_cexp(x,phase) \

+        do{ \

+                (x)->r = KISS_FFT_COS(phase);\

+                (x)->i = KISS_FFT_SIN(phase);\

+        }while(0)

+#define  kf_cexp2(x,phase) \

+   do{ \

+      (x)->r = TRIG_UPSCALE*celt_cos_norm((phase));\

+      (x)->i = TRIG_UPSCALE*celt_cos_norm((phase)-32768);\

+}while(0)

+#endif /* KISS_FFT_GUTS_H */

--- /dev/null

+++ b/dnn/arch.h

@@ -1,0 +1,261 @@

+/* Copyright (c) 2003-2008 Jean-Marc Valin

+   Copyright (c) 2007-2008 CSIRO

+   Copyright (c) 2007-2009 Xiph.Org Foundation

+   Written by Jean-Marc Valin */

+/**

+   @file arch.h

+   @brief Various architecture definitions for CELT

+*/

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef ARCH_H

+#define ARCH_H

+#include "opus_types.h"

+#include "common.h"

+# if !defined(__GNUC_PREREQ)

+#  if defined(__GNUC__)&&defined(__GNUC_MINOR__)

+#   define __GNUC_PREREQ(_maj,_min) \

+ ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))

+#  else

+#   define __GNUC_PREREQ(_maj,_min) 0

+#  endif

+# endif

+#define CELT_SIG_SCALE 32768.f

+#define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__);

+#ifdef ENABLE_ASSERTIONS

+#include <stdio.h>

+#include <stdlib.h>

+#ifdef __GNUC__

+__attribute__((noreturn))

+#endif

+static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)

+{

+   fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);

+   abort();

+}

+#define celt_assert(cond) {if (!(cond)) {celt_fatal("assertion failed: " #cond);}}

+#define celt_assert2(cond, message) {if (!(cond)) {celt_fatal("assertion failed: " #cond "\n" message);}}

+#else

+#define celt_assert(cond)

+#define celt_assert2(cond, message)

+#endif

+#define IMUL32(a,b) ((a)*(b))

+#define MIN16(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum 16-bit value.   */

+#define MAX16(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 16-bit value.   */

+#define MIN32(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum 32-bit value.   */

+#define MAX32(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 32-bit value.   */

+#define IMIN(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum int value.   */

+#define IMAX(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum int value.   */

+#define UADD32(a,b) ((a)+(b))

+#define USUB32(a,b) ((a)-(b))

+/* Set this if opus_int64 is a native type of the CPU. */

+/* Assume that all LP64 architectures have fast 64-bit types; also x86_64

+   (which can be ILP32 for x32) and Win64 (which is LLP64). */

+#if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64)

+#define OPUS_FAST_INT64 1

+#else

+#define OPUS_FAST_INT64 0

+#endif

+#define PRINT_MIPS(file)

+#ifdef FIXED_POINT

+typedef opus_int16 opus_val16;

+typedef opus_int32 opus_val32;

+typedef opus_int64 opus_val64;

+typedef opus_val32 celt_sig;

+typedef opus_val16 celt_norm;

+typedef opus_val32 celt_ener;

+#define Q15ONE 32767

+#define SIG_SHIFT 12

+/* Safe saturation value for 32-bit signals. Should be less than

+   2^31*(1-0.85) to avoid blowing up on DC at deemphasis.*/

+#define SIG_SAT (300000000)

+#define NORM_SCALING 16384

+#define DB_SHIFT 10

+#define EPSILON 1

+#define VERY_SMALL 0

+#define VERY_LARGE16 ((opus_val16)32767)

+#define Q15_ONE ((opus_val16)32767)

+#define SCALEIN(a)      (a)

+#define SCALEOUT(a)     (a)

+#define ABS16(x) ((x) < 0 ? (-(x)) : (x))

+#define ABS32(x) ((x) < 0 ? (-(x)) : (x))

+static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {

+   return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;

+}

+#ifdef FIXED_DEBUG

+#include "fixed_debug.h"

+#else

+#include "fixed_generic.h"

+#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR

+#include "arm/fixed_arm64.h"

+#elif OPUS_ARM_INLINE_EDSP

+#include "arm/fixed_armv5e.h"

+#elif defined (OPUS_ARM_INLINE_ASM)

+#include "arm/fixed_armv4.h"

+#elif defined (BFIN_ASM)

+#include "fixed_bfin.h"

+#elif defined (TI_C5X_ASM)

+#include "fixed_c5x.h"

+#elif defined (TI_C6X_ASM)

+#include "fixed_c6x.h"

+#endif

+#endif

+#else /* FIXED_POINT */

+typedef float opus_val16;

+typedef float opus_val32;

+typedef float opus_val64;

+typedef float celt_sig;

+typedef float celt_norm;

+typedef float celt_ener;

+#ifdef FLOAT_APPROX

+/* This code should reliably detect NaN/inf even when -ffast-math is used.

+   Assumes IEEE 754 format. */

+static OPUS_INLINE int celt_isnan(float x)

+{

+   union {float f; opus_uint32 i;} in;

+   in.f = x;

+   return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0;

+}

+#else

+#ifdef __FAST_MATH__

+#error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input

+#endif

+#define celt_isnan(x) ((x)!=(x))

+#endif

+#define Q15ONE 1.0f

+#define NORM_SCALING 1.f

+#define EPSILON 1e-15f

+#define VERY_SMALL 1e-30f

+#define VERY_LARGE16 1e15f

+#define Q15_ONE ((opus_val16)1.f)

+/* This appears to be the same speed as C99's fabsf() but it's more portable. */

+#define ABS16(x) ((float)fabs(x))

+#define ABS32(x) ((float)fabs(x))

+#define QCONST16(x,bits) (x)

+#define QCONST32(x,bits) (x)

+#define NEG16(x) (-(x))

+#define NEG32(x) (-(x))

+#define NEG32_ovflw(x) (-(x))

+#define EXTRACT16(x) (x)

+#define EXTEND32(x) (x)

+#define SHR16(a,shift) (a)

+#define SHL16(a,shift) (a)

+#define SHR32(a,shift) (a)

+#define SHL32(a,shift) (a)

+#define PSHR32(a,shift) (a)

+#define VSHR32(a,shift) (a)

+#define PSHR(a,shift)   (a)

+#define SHR(a,shift)    (a)

+#define SHL(a,shift)    (a)

+#define SATURATE(x,a)   (x)

+#define SATURATE16(x)   (x)

+#define ROUND16(a,shift)  (a)

+#define SROUND16(a,shift) (a)

+#define HALF16(x)       (.5f*(x))

+#define HALF32(x)       (.5f*(x))

+#define ADD16(a,b) ((a)+(b))

+#define SUB16(a,b) ((a)-(b))

+#define ADD32(a,b) ((a)+(b))

+#define SUB32(a,b) ((a)-(b))

+#define ADD32_ovflw(a,b) ((a)+(b))

+#define SUB32_ovflw(a,b) ((a)-(b))

+#define MULT16_16_16(a,b)     ((a)*(b))

+#define MULT16_16(a,b)     ((opus_val32)(a)*(opus_val32)(b))

+#define MAC16_16(c,a,b)     ((c)+(opus_val32)(a)*(opus_val32)(b))

+#define MULT16_32_Q15(a,b)     ((a)*(b))

+#define MULT16_32_Q16(a,b)     ((a)*(b))

+#define MULT32_32_Q31(a,b)     ((a)*(b))

+#define MAC16_32_Q15(c,a,b)     ((c)+(a)*(b))

+#define MAC16_32_Q16(c,a,b)     ((c)+(a)*(b))

+#define MULT16_16_Q11_32(a,b)     ((a)*(b))

+#define MULT16_16_Q11(a,b)     ((a)*(b))

+#define MULT16_16_Q13(a,b)     ((a)*(b))

+#define MULT16_16_Q14(a,b)     ((a)*(b))

+#define MULT16_16_Q15(a,b)     ((a)*(b))

+#define MULT16_16_P15(a,b)     ((a)*(b))

+#define MULT16_16_P13(a,b)     ((a)*(b))

+#define MULT16_16_P14(a,b)     ((a)*(b))

+#define MULT16_32_P16(a,b)     ((a)*(b))

+#define DIV32_16(a,b)     (((opus_val32)(a))/(opus_val16)(b))

+#define DIV32(a,b)     (((opus_val32)(a))/(opus_val32)(b))

+#define SCALEIN(a)      ((a)*CELT_SIG_SCALE)

+#define SCALEOUT(a)     ((a)*(1/CELT_SIG_SCALE))

+#define SIG2WORD16(x) (x)

+#endif /* !FIXED_POINT */

+#ifndef GLOBAL_STACK_SIZE

+#ifdef FIXED_POINT

+#define GLOBAL_STACK_SIZE 120000

+#else

+#define GLOBAL_STACK_SIZE 120000

+#endif

+#endif

+#endif /* ARCH_H */

--- /dev/null

+++ b/dnn/celt_lpc.c

@@ -1,0 +1,279 @@

+/* Copyright (c) 2009-2010 Xiph.Org Foundation

+   Written by Jean-Marc Valin */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include "celt_lpc.h"

+#include "arch.h"

+#include "common.h"

+#include "pitch.h"

+void _celt_lpc(

+      opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */

+const opus_val32 *ac,  /* in:  [0...p] autocorrelation values  */

+int          p

+)

+{

+   int i, j;

+   opus_val32 r;

+   opus_val32 error = ac[0];

+#ifdef FIXED_POINT

+   opus_val32 lpc[LPC_ORDER];

+#else

+   float *lpc = _lpc;

+#endif

+   RNN_CLEAR(lpc, p);

+   if (ac[0] != 0)

+   {

+      for (i = 0; i < p; i++) {

+         /* Sum up this iteration's reflection coefficient */

+         opus_val32 rr = 0;

+         for (j = 0; j < i; j++)

+            rr += MULT32_32_Q31(lpc[j],ac[i - j]);

+         rr += SHR32(ac[i + 1],3);

+         r = -SHL32(rr,3)/error;

+         /*  Update LPC coefficients and total error */

+         lpc[i] = SHR32(r,3);

+         for (j = 0; j < (i+1)>>1; j++)

+         {

+            opus_val32 tmp1, tmp2;

+            tmp1 = lpc[j];

+            tmp2 = lpc[i-1-j];

+            lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);

+            lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);

+         }

+         error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);

+         /* Bail out once we get 30 dB gain */

+#ifdef FIXED_POINT

+         if (error<SHR32(ac[0],10))

+            break;

+#else

+         if (error<.001f*ac[0])

+            break;

+#endif

+      }

+   }

+#ifdef FIXED_POINT

+   for (i=0;i<p;i++)

+      _lpc[i] = ROUND16(lpc[i],16);

+#endif

+}

+void celt_fir(

+         const opus_val16 *x,

+         const opus_val16 *num,

+         opus_val16 *y,

+         int N,

+         int ord)

+{

+   int i,j;

+   opus_val16 rnum[ord];

+   for(i=0;i<ord;i++)

+      rnum[i] = num[ord-i-1];

+   for (i=0;i<N-3;i+=4)

+   {

+      opus_val32 sum[4];

+      sum[0] = SHL32(EXTEND32(x[i  ]), SIG_SHIFT);

+      sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT),

+      sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);

+      sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);

+      xcorr_kernel(rnum, x+i-ord, sum, ord);

+      y[i  ] = ROUND16(sum[0], SIG_SHIFT);

+      y[i+1] = ROUND16(sum[1], SIG_SHIFT);

+      y[i+2] = ROUND16(sum[2], SIG_SHIFT);

+      y[i+3] = ROUND16(sum[3], SIG_SHIFT);

+   }

+   for (;i<N;i++)

+   {

+      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);

+      for (j=0;j<ord;j++)

+         sum = MAC16_16(sum,rnum[j],x[i+j-ord]);

+      y[i] = ROUND16(sum, SIG_SHIFT);

+   }

+}

+void celt_iir(const opus_val32 *_x,

+         const opus_val16 *den,

+         opus_val32 *_y,

+         int N,

+         int ord,

+         opus_val16 *mem)

+{

+#ifdef SMALL_FOOTPRINT

+   int i,j;

+   for (i=0;i<N;i++)

+   {

+      opus_val32 sum = _x[i];

+      for (j=0;j<ord;j++)

+      {

+         sum -= MULT16_16(den[j],mem[j]);

+      }

+      for (j=ord-1;j>=1;j--)

+      {

+         mem[j]=mem[j-1];

+      }

+      mem[0] = SROUND16(sum, SIG_SHIFT);

+      _y[i] = sum;

+   }

+#else

+   int i,j;

+   celt_assert((ord&3)==0);

+   opus_val16 rden[ord];

+   opus_val16 y[N+ord];

+   for(i=0;i<ord;i++)

+      rden[i] = den[ord-i-1];

+   for(i=0;i<ord;i++)

+      y[i] = -mem[ord-i-1];

+   for(;i<N+ord;i++)

+      y[i]=0;

+   for (i=0;i<N-3;i+=4)

+   {

+      /* Unroll by 4 as if it were an FIR filter */

+      opus_val32 sum[4];

+      sum[0]=_x[i];

+      sum[1]=_x[i+1];

+      sum[2]=_x[i+2];

+      sum[3]=_x[i+3];

+      xcorr_kernel(rden, y+i, sum, ord);

+      /* Patch up the result to compensate for the fact that this is an IIR */

+      y[i+ord  ] = -SROUND16(sum[0],SIG_SHIFT);

+      _y[i  ] = sum[0];

+      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);

+      y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);

+      _y[i+1] = sum[1];

+      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);

+      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);

+      y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);

+      _y[i+2] = sum[2];

+      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);

+      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);

+      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);

+      y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);

+      _y[i+3] = sum[3];

+   }

+   for (;i<N;i++)

+   {

+      opus_val32 sum = _x[i];

+      for (j=0;j<ord;j++)

+         sum -= MULT16_16(rden[j],y[i+j]);

+      y[i+ord] = SROUND16(sum,SIG_SHIFT);

+      _y[i] = sum;

+   }

+   for(i=0;i<ord;i++)

+      mem[i] = _y[N-i-1];

+#endif

+}

+int _celt_autocorr(

+                   const opus_val16 *x,   /*  in: [0...n-1] samples x   */

+                   opus_val32       *ac,  /* out: [0...lag-1] ac values */

+                   const opus_val16       *window,

+                   int          overlap,

+                   int          lag,

+                   int          n)

+{

+   opus_val32 d;

+   int i, k;

+   int fastN=n-lag;

+   int shift;

+   const opus_val16 *xptr;

+   opus_val16 xx[n];

+   celt_assert(n>0);

+   celt_assert(overlap>=0);

+   if (overlap == 0)

+   {

+      xptr = x;

+   } else {

+      for (i=0;i<n;i++)

+         xx[i] = x[i];

+      for (i=0;i<overlap;i++)

+      {

+         xx[i] = MULT16_16_Q15(x[i],window[i]);

+         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);

+      }

+      xptr = xx;

+   }

+   shift=0;

+#ifdef FIXED_POINT

+   {

+      opus_val32 ac0;

+      ac0 = 1+(n<<7);

+      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);

+      for(i=(n&1);i<n;i+=2)

+      {

+         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);

+         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);

+      }

+      shift = celt_ilog2(ac0)-30+10;

+      shift = (shift)/2;

+      if (shift>0)

+      {

+         for(i=0;i<n;i++)

+            xx[i] = PSHR32(xptr[i], shift);

+         xptr = xx;

+      } else

+         shift = 0;

+   }

+#endif

+   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1);

+   for (k=0;k<=lag;k++)

+   {

+      for (i = k+fastN, d = 0; i < n; i++)

+         d = MAC16_16(d, xptr[i], xptr[i-k]);

+      ac[k] += d;

+   }

+#ifdef FIXED_POINT

+   shift = 2*shift;

+   if (shift<=0)

+      ac[0] += SHL32((opus_int32)1, -shift);

+   if (ac[0] < 268435456)

+   {

+      int shift2 = 29 - EC_ILOG(ac[0]);

+      for (i=0;i<=lag;i++)

+         ac[i] = SHL32(ac[i], shift2);

+      shift -= shift2;

+   } else if (ac[0] >= 536870912)

+   {

+      int shift2=1;

+      if (ac[0] >= 1073741824)

+         shift2++;

+      for (i=0;i<=lag;i++)

+         ac[i] = SHR32(ac[i], shift2);

+      shift += shift2;

+   }

+#endif

+   return shift;

+}

--- /dev/null

+++ b/dnn/celt_lpc.h

@@ -1,0 +1,59 @@

+/* Copyright (c) 2009-2010 Xiph.Org Foundation

+   Written by Jean-Marc Valin */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef PLC_H

+#define PLC_H

+#include "arch.h"

+#include "common.h"

+#if defined(OPUS_X86_MAY_HAVE_SSE4_1)

+#include "x86/celt_lpc_sse.h"

+#endif

+#define LPC_ORDER 24

+void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);

+void celt_fir(

+         const opus_val16 *x,

+         const opus_val16 *num,

+         opus_val16 *y,

+         int N,

+         int ord);

+void celt_iir(const opus_val32 *x,

+         const opus_val16 *den,

+         opus_val32 *y,

+         int N,

+         int ord,

+         opus_val16 *mem);

+int _celt_autocorr(const opus_val16 *x, opus_val32 *ac,

+         const opus_val16 *window, int overlap, int lag, int n);

+#endif /* PLC_H */

--- /dev/null

+++ b/dnn/common.h

@@ -1,0 +1,48 @@

+#ifndef COMMON_H

+#define COMMON_H

+#include "stdlib.h"

+#include "string.h"

+#define RNN_INLINE inline

+#define OPUS_INLINE inline

+/** RNNoise wrapper for malloc(). To do your own dynamic allocation, all you need t

+o do is replace this function and rnnoise_free */

+#ifndef OVERRIDE_RNNOISE_ALLOC

+static RNN_INLINE void *rnnoise_alloc (size_t size)

+{

+   return malloc(size);

+}

+#endif

+/** RNNoise wrapper for free(). To do your own dynamic allocation, all you need to do is replace this function and rnnoise_alloc */

+#ifndef OVERRIDE_RNNOISE_FREE

+static RNN_INLINE void rnnoise_free (void *ptr)

+{

+   free(ptr);

+}

+#endif

+/** Copy n elements from src to dst. The 0* term provides compile-time type checking  */

+#ifndef OVERRIDE_RNN_COPY

+#define RNN_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))

+#endif

+/** Copy n elements from src to dst, allowing overlapping regions. The 0* term

+    provides compile-time type checking */

+#ifndef OVERRIDE_RNN_MOVE

+#define RNN_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))

+#endif

+/** Set n elements of dst to zero */

+#ifndef OVERRIDE_RNN_CLEAR

+#define RNN_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))

+#endif

+#endif

--- /dev/null

+++ b/dnn/denoise.c

@@ -1,0 +1,676 @@

+/* Copyright (c) 2017 Mozilla */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR

+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include <stdlib.h>

+#include <string.h>

+#include <stdio.h>

+#include "kiss_fft.h"

+#include "common.h"

+#include <math.h>

+#include "rnnoise.h"

+#include "pitch.h"

+#include "arch.h"

+#define FRAME_SIZE_SHIFT 2

+#define FRAME_SIZE (40<<FRAME_SIZE_SHIFT)

+#define WINDOW_SIZE (2*FRAME_SIZE)

+#define FREQ_SIZE (FRAME_SIZE + 1)

+#define PITCH_MIN_PERIOD 20

+#define PITCH_MAX_PERIOD 256

+#define PITCH_FRAME_SIZE 320

+#define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)

+#define SQUARE(x) ((x)*(x))

+#define SMOOTH_BANDS 1

+#if SMOOTH_BANDS

+#define NB_BANDS 18

+#else

+#define NB_BANDS 17

+#endif

+#define CEPS_MEM 8

+#define NB_DELTA_CEPS 6

+#define NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2)

+#ifndef TRAINING

+#define TRAINING 0

+#endif

+static const opus_int16 eband5ms[] = {

+/*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k*/

+  0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 12, 14, 16, 20, 24, 28, 34, 40

+};

+typedef struct {

+  int init;

+  kiss_fft_state *kfft;

+  float half_window[FRAME_SIZE];

+  float dct_table[NB_BANDS*NB_BANDS];

+} CommonState;

+struct DenoiseState {

+  float analysis_mem[FRAME_SIZE];

+  float cepstral_mem[CEPS_MEM][NB_BANDS];

+  int memid;

+  float synthesis_mem[FRAME_SIZE];

+  float pitch_buf[PITCH_BUF_SIZE];

+  float pitch_enh_buf[PITCH_BUF_SIZE];

+  float last_gain;

+  int last_period;

+  float mem_hp_x[2];

+  float lastg[NB_BANDS];

+};

+#if SMOOTH_BANDS

+void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {

+  int i;

+  float sum[NB_BANDS] = {0};

+  for (i=0;i<NB_BANDS-1;i++)

+  {

+    int j;

+    int band_size;

+    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;

+    for (j=0;j<band_size;j++) {

+      float tmp;

+      float frac = (float)j/band_size;

+      tmp = SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);

+      tmp += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);

+      sum[i] += (1-frac)*tmp;

+      sum[i+1] += frac*tmp;

+    }

+  }

+  sum[0] *= 2;

+  sum[NB_BANDS-1] *= 2;

+  for (i=0;i<NB_BANDS;i++)

+  {

+    bandE[i] = sum[i];

+  }

+}

+void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) {

+  int i;

+  float sum[NB_BANDS] = {0};

+  for (i=0;i<NB_BANDS-1;i++)

+  {

+    int j;

+    int band_size;

+    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;

+    for (j=0;j<band_size;j++) {

+      float tmp;

+      float frac = (float)j/band_size;

+      tmp = X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r;

+      tmp += X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i;

+      sum[i] += (1-frac)*tmp;

+      sum[i+1] += frac*tmp;

+    }

+  }

+  sum[0] *= 2;

+  sum[NB_BANDS-1] *= 2;

+  for (i=0;i<NB_BANDS;i++)

+  {

+    bandE[i] = sum[i];

+  }

+}

+void interp_band_gain(float *g, const float *bandE) {

+  int i;

+  memset(g, 0, FREQ_SIZE);

+  for (i=0;i<NB_BANDS-1;i++)

+  {

+    int j;

+    int band_size;

+    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;

+    for (j=0;j<band_size;j++) {

+      float frac = (float)j/band_size;

+      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = (1-frac)*bandE[i] + frac*bandE[i+1];

+    }

+  }

+}

+#else

+void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {

+  int i;

+  for (i=0;i<NB_BANDS;i++)

+  {

+    int j;

+    opus_val32 sum = 0;

+    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++) {

+      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);

+      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);

+    }

+    bandE[i] = sum;

+  }

+}

+void interp_band_gain(float *g, const float *bandE) {

+  int i;

+  memset(g, 0, FREQ_SIZE);

+  for (i=0;i<NB_BANDS;i++)

+  {

+    int j;

+    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++)

+      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = bandE[i];

+  }

+}

+#endif

+CommonState common;

+static void check_init() {

+  int i;

+  if (common.init) return;

+  common.kfft = opus_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0);

+  for (i=0;i<FRAME_SIZE;i++)

+    common.half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE));

+  for (i=0;i<NB_BANDS;i++) {

+    int j;

+    for (j=0;j<NB_BANDS;j++) {

+      common.dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);

+      if (j==0) common.dct_table[i*NB_BANDS + j] *= sqrt(.5);

+    }

+  }

+  common.init = 1;

+}

+static void dct(float *out, const float *in) {

+  int i;

+  check_init();

+  for (i=0;i<NB_BANDS;i++) {

+    int j;

+    float sum = 0;

+    for (j=0;j<NB_BANDS;j++) {

+      sum += in[j] * common.dct_table[j*NB_BANDS + i];

+    }

+    out[i] = sum*sqrt(2./22);

+  }

+}

+#if 0

+static void idct(float *out, const float *in) {

+  int i;

+  check_init();

+  for (i=0;i<NB_BANDS;i++) {

+    int j;

+    float sum = 0;

+    for (j=0;j<NB_BANDS;j++) {

+      sum += in[j] * common.dct_table[i*NB_BANDS + j];

+    }

+    out[i] = sum*sqrt(2./22);

+  }

+}

+#endif

+static void forward_transform(kiss_fft_cpx *out, const float *in) {

+  int i;

+  kiss_fft_cpx x[WINDOW_SIZE];

+  kiss_fft_cpx y[WINDOW_SIZE];

+  check_init();

+  for (i=0;i<WINDOW_SIZE;i++) {

+    x[i].r = in[i];

+    x[i].i = 0;

+  }

+  opus_fft(common.kfft, x, y, 0);

+  for (i=0;i<FREQ_SIZE;i++) {

+    out[i] = y[i];

+  }

+}

+static void inverse_transform(float *out, const kiss_fft_cpx *in) {

+  int i;

+  kiss_fft_cpx x[WINDOW_SIZE];

+  kiss_fft_cpx y[WINDOW_SIZE];

+  check_init();

+  for (i=0;i<FREQ_SIZE;i++) {

+    x[i] = in[i];

+  }

+  for (;i<WINDOW_SIZE;i++) {

+    x[i].r = x[WINDOW_SIZE - i].r;

+    x[i].i = -x[WINDOW_SIZE - i].i;

+  }

+  opus_fft(common.kfft, x, y, 0);

+  /* output in reverse order for IFFT. */

+  out[0] = WINDOW_SIZE*y[0].r;

+  for (i=1;i<WINDOW_SIZE;i++) {

+    out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r;

+  }

+}

+static void apply_window(float *x) {

+  int i;

+  check_init();

+  for (i=0;i<FRAME_SIZE;i++) {

+    x[i] *= common.half_window[i];

+    x[WINDOW_SIZE - 1 - i] *= common.half_window[i];

+  }

+}

+int rnnoise_get_size() {

+  return sizeof(DenoiseState);

+}

+int rnnoise_init(DenoiseState *st) {

+  memset(st, 0, sizeof(*st));

+  return 0;

+}

+DenoiseState *rnnoise_create() {

+  DenoiseState *st;

+  st = malloc(rnnoise_get_size());

+  rnnoise_init(st);

+  return st;

+}

+void rnnoise_destroy(DenoiseState *st) {

+  free(st);

+}

+#if TRAINING

+int lowpass = FREQ_SIZE;

+int band_lp = NB_BANDS;

+#endif

+static void frame_analysis(DenoiseState *st, kiss_fft_cpx *X, float *Ex, const float *in) {

+  int i;

+  float x[WINDOW_SIZE];

+  RNN_COPY(x, st->analysis_mem, FRAME_SIZE);

+  for (i=0;i<FRAME_SIZE;i++) x[FRAME_SIZE + i] = in[i];

+  RNN_COPY(st->analysis_mem, in, FRAME_SIZE);

+  apply_window(x);

+  forward_transform(X, x);

+#if TRAINING

+  for (i=lowpass;i<FREQ_SIZE;i++)

+    X[i].r = X[i].i = 0;

+#endif

+  compute_band_energy(Ex, X);

+}

+static int compute_frame_features(DenoiseState *st, kiss_fft_cpx *X, kiss_fft_cpx *P,

+                                  float *Ex, float *Ep, float *Exp, float *features, const float *in) {

+  int i;

+  float E = 0;

+  float *ceps_0, *ceps_1, *ceps_2;

+  float spec_variability = 0;

+  float Ly[NB_BANDS];

+  float p[WINDOW_SIZE];

+  float pitch_buf[PITCH_BUF_SIZE>>1];

+  int pitch_index;

+  float gain;

+  float *(pre[1]);

+  float tmp[NB_BANDS];

+  float follow, logMax;

+  frame_analysis(st, X, Ex, in);

+  RNN_MOVE(st->pitch_buf, &st->pitch_buf[FRAME_SIZE], PITCH_BUF_SIZE-FRAME_SIZE);

+  RNN_COPY(&st->pitch_buf[PITCH_BUF_SIZE-FRAME_SIZE], in, FRAME_SIZE);

+  pre[0] = &st->pitch_buf[0];

+  pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1);

+  pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE,

+               PITCH_MAX_PERIOD-3*PITCH_MIN_PERIOD, &pitch_index);

+  pitch_index = PITCH_MAX_PERIOD-pitch_index;

+  gain = remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD,

+          PITCH_FRAME_SIZE, &pitch_index, st->last_period, st->last_gain);

+  st->last_period = pitch_index;

+  st->last_gain = gain;

+  for (i=0;i<WINDOW_SIZE;i++)

+    p[i] = st->pitch_buf[PITCH_BUF_SIZE-WINDOW_SIZE-pitch_index+i];

+  apply_window(p);

+  forward_transform(P, p);

+  compute_band_energy(Ep, P);

+  compute_band_corr(Exp, X, P);

+  for (i=0;i<NB_BANDS;i++) Exp[i] = Exp[i]/sqrt(.001+Ex[i]*Ep[i]);

+  dct(tmp, Exp);

+  for (i=0;i<NB_DELTA_CEPS;i++) features[NB_BANDS+2*NB_DELTA_CEPS+i] = tmp[i];

+  features[NB_BANDS+2*NB_DELTA_CEPS] -= 1.3;

+  features[NB_BANDS+2*NB_DELTA_CEPS+1] -= 0.9;

+  features[NB_BANDS+3*NB_DELTA_CEPS] = .01*(pitch_index-300);

+  logMax = -2;

+  follow = -2;

+  for (i=0;i<NB_BANDS;i++) {

+    Ly[i] = log10(1e-2+Ex[i]);

+    Ly[i] = MAX16(logMax-7, MAX16(follow-1.5, Ly[i]));

+    logMax = MAX16(logMax, Ly[i]);

+    follow = MAX16(follow-1.5, Ly[i]);

+    E += Ex[i];

+  }

+  if (!TRAINING && E < 0.04) {

+    /* If there's no audio, avoid messing up the state. */

+    RNN_CLEAR(features, NB_FEATURES);

+    return 1;

+  }

+  dct(features, Ly);

+  features[0] -= 12;

+  features[1] -= 4;

+  ceps_0 = st->cepstral_mem[st->memid];

+  ceps_1 = (st->memid < 1) ? st->cepstral_mem[CEPS_MEM+st->memid-1] : st->cepstral_mem[st->memid-1];

+  ceps_2 = (st->memid < 2) ? st->cepstral_mem[CEPS_MEM+st->memid-2] : st->cepstral_mem[st->memid-2];

+  for (i=0;i<NB_BANDS;i++) ceps_0[i] = features[i];

+  st->memid++;

+  for (i=0;i<NB_DELTA_CEPS;i++) {

+    features[i] = ceps_0[i] + ceps_1[i] + ceps_2[i];

+    features[NB_BANDS+i] = ceps_0[i] - ceps_2[i];

+    features[NB_BANDS+NB_DELTA_CEPS+i] =  ceps_0[i] - 2*ceps_1[i] + ceps_2[i];

+  }

+  /* Spectral variability features. */

+  if (st->memid == CEPS_MEM) st->memid = 0;

+  for (i=0;i<CEPS_MEM;i++)

+  {

+    int j;

+    float mindist = 1e15f;

+    for (j=0;j<CEPS_MEM;j++)

+    {

+      int k;

+      float dist=0;

+      for (k=0;k<NB_BANDS;k++)

+      {

+        float tmp;

+        tmp = st->cepstral_mem[i][k] - st->cepstral_mem[j][k];

+        dist += tmp*tmp;

+      }

+      if (j!=i)

+        mindist = MIN32(mindist, dist);

+    }

+    spec_variability += mindist;

+  }

+  features[NB_BANDS+3*NB_DELTA_CEPS+1] = spec_variability/CEPS_MEM-2.1;

+  return TRAINING && E < 0.1;

+}

+static void frame_synthesis(DenoiseState *st, float *out, const kiss_fft_cpx *y) {

+  float x[WINDOW_SIZE];

+  int i;

+  inverse_transform(x, y);

+  apply_window(x);

+  for (i=0;i<FRAME_SIZE;i++) out[i] = x[i] + st->synthesis_mem[i];

+  RNN_COPY(st->synthesis_mem, &x[FRAME_SIZE], FRAME_SIZE);

+}

+static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {

+  int i;

+  for (i=0;i<N;i++) {

+    float xi, yi;

+    xi = x[i];

+    yi = x[i] + mem[0];

+    mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);

+    mem[1] = (b[1]*(double)xi - a[1]*(double)yi);

+    y[i] = yi;

+  }

+}

+void pitch_filter(kiss_fft_cpx *X, const kiss_fft_cpx *P, const float *Ex, const float *Ep,

+                  const float *Exp, const float *g) {

+  int i;

+  float r[NB_BANDS];

+  float rf[FREQ_SIZE] = {0};

+  for (i=0;i<NB_BANDS;i++) {

+#if 0

+    if (Exp[i]>g[i]) r[i] = 1;

+    else r[i] = Exp[i]*(1-g[i])/(.001 + g[i]*(1-Exp[i]));

+    r[i] = MIN16(1, MAX16(0, r[i]));

+#else

+    if (Exp[i]>g[i]) r[i] = 1;

+    else r[i] = SQUARE(Exp[i])*(1-SQUARE(g[i]))/(.001 + SQUARE(g[i])*(1-SQUARE(Exp[i])));

+    r[i] = sqrt(MIN16(1, MAX16(0, r[i])));

+#endif

+    r[i] *= sqrt(Ex[i]/(1e-8+Ep[i]));

+  }

+  interp_band_gain(rf, r);

+  for (i=0;i<FREQ_SIZE;i++) {

+    X[i].r += rf[i]*P[i].r;

+    X[i].i += rf[i]*P[i].i;

+  }

+  float newE[NB_BANDS];

+  compute_band_energy(newE, X);

+  float norm[NB_BANDS];

+  float normf[FREQ_SIZE]={0};

+  for (i=0;i<NB_BANDS;i++) {

+    norm[i] = sqrt(Ex[i]/(1e-8+newE[i]));

+  }

+  interp_band_gain(normf, norm);

+  for (i=0;i<FREQ_SIZE;i++) {

+    X[i].r *= normf[i];

+    X[i].i *= normf[i];

+  }

+}

+float rnnoise_process_frame(DenoiseState *st, float *out, const float *in) {

+  int i;

+  kiss_fft_cpx X[FREQ_SIZE];

+  kiss_fft_cpx P[WINDOW_SIZE];

+  float x[FRAME_SIZE];

+  float Ex[NB_BANDS], Ep[NB_BANDS];

+  float Exp[NB_BANDS];

+  float features[NB_FEATURES];

+  float g[NB_BANDS];

+  float gf[FREQ_SIZE]={1};

+  float vad_prob = 0;

+  int silence;

+  static const float a_hp[2] = {-1.99599, 0.99600};

+  static const float b_hp[2] = {-2, 1};

+  biquad(x, st->mem_hp_x, in, b_hp, a_hp, FRAME_SIZE);

+  silence = compute_frame_features(st, X, P, Ex, Ep, Exp, features, x);

+  if (!silence) {

+    pitch_filter(X, P, Ex, Ep, Exp, g);

+    for (i=0;i<NB_BANDS;i++) {

+      float alpha = .6f;

+      g[i] = MAX16(g[i], alpha*st->lastg[i]);

+      st->lastg[i] = g[i];

+    }

+    interp_band_gain(gf, g);

+#if 1

+    for (i=0;i<FREQ_SIZE;i++) {

+      X[i].r *= gf[i];

+      X[i].i *= gf[i];

+    }

+#endif

+  }

+  frame_synthesis(st, out, X);

+  return vad_prob;

+}

+#if TRAINING

+static float uni_rand() {

+  return rand()/(double)RAND_MAX-.5;

+}

+static void rand_resp(float *a, float *b) {

+  a[0] = .75*uni_rand();

+  a[1] = .75*uni_rand();

+  b[0] = .75*uni_rand();

+  b[1] = .75*uni_rand();

+}

+int main(int argc, char **argv) {

+  int i;

+  int count=0;

+  static const float a_hp[2] = {-1.99599, 0.99600};

+  static const float b_hp[2] = {-2, 1};

+  float a_noise[2] = {0};

+  float b_noise[2] = {0};

+  float a_sig[2] = {0};

+  float b_sig[2] = {0};

+  float mem_hp_x[2]={0};

+  float mem_hp_n[2]={0};

+  float mem_resp_x[2]={0};

+  float mem_resp_n[2]={0};

+  float x[FRAME_SIZE];

+  float n[FRAME_SIZE];

+  float xn[FRAME_SIZE];

+  int vad_cnt=0;

+  int gain_change_count=0;

+  float speech_gain = 1, noise_gain = 1;

+  FILE *f1, *f2, *fout;

+  DenoiseState *st;

+  DenoiseState *noise_state;

+  DenoiseState *noisy;

+  st = rnnoise_create();

+  noise_state = rnnoise_create();

+  noisy = rnnoise_create();

+  if (argc!=4) {

+    fprintf(stderr, "usage: %s <speech> <noise> <output denoised>\n", argv[0]);

+    return 1;

+  }

+  f1 = fopen(argv[1], "r");

+  f2 = fopen(argv[2], "r");

+  fout = fopen(argv[3], "w");

+  for(i=0;i<150;i++) {

+    short tmp[FRAME_SIZE];

+    fread(tmp, sizeof(short), FRAME_SIZE, f2);

+  }

+  while (1) {

+    kiss_fft_cpx X[FREQ_SIZE], Y[FREQ_SIZE], N[FREQ_SIZE], P[WINDOW_SIZE];

+    float Ex[NB_BANDS], Ey[NB_BANDS], En[NB_BANDS], Ep[NB_BANDS];

+    float Exp[NB_BANDS];

+    float Ln[NB_BANDS];

+    float features[NB_FEATURES];

+    float g[NB_BANDS];

+    float gf[FREQ_SIZE]={1};

+    short tmp[FRAME_SIZE];

+    float vad=0;

+    float vad_prob;

+    float E=0;

+    if (count==50000000) break;

+    if (++gain_change_count > 2821) {

+      speech_gain = pow(10., (-40+(rand()%60))/20.);

+      noise_gain = pow(10., (-30+(rand()%50))/20.);

+      if (rand()%10==0) noise_gain = 0;

+      noise_gain *= speech_gain;

+      if (rand()%10==0) speech_gain = 0;

+      gain_change_count = 0;

+      rand_resp(a_noise, b_noise);

+      rand_resp(a_sig, b_sig);

+      lowpass = FREQ_SIZE * 3000./24000. * pow(50., rand()/(double)RAND_MAX);

+      for (i=0;i<NB_BANDS;i++) {

+        if (eband5ms[i]<<FRAME_SIZE_SHIFT > lowpass) {

+          band_lp = i;

+          break;

+        }

+      }

+    }

+    if (speech_gain != 0) {

+      fread(tmp, sizeof(short), FRAME_SIZE, f1);

+      if (feof(f1)) {

+        rewind(f1);

+        fread(tmp, sizeof(short), FRAME_SIZE, f1);

+      }

+      for (i=0;i<FRAME_SIZE;i++) x[i] = speech_gain*tmp[i];

+      for (i=0;i<FRAME_SIZE;i++) E += tmp[i]*(float)tmp[i];

+    } else {

+      for (i=0;i<FRAME_SIZE;i++) x[i] = 0;

+      E = 0;

+    }

+    if (noise_gain!=0) {

+      fread(tmp, sizeof(short), FRAME_SIZE, f2);

+      if (feof(f2)) {

+        rewind(f2);

+        fread(tmp, sizeof(short), FRAME_SIZE, f2);

+      }

+      for (i=0;i<FRAME_SIZE;i++) n[i] = noise_gain*tmp[i];

+    } else {

+      for (i=0;i<FRAME_SIZE;i++) n[i] = 0;

+    }

+    biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);

+    biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);

+    biquad(n, mem_hp_n, n, b_hp, a_hp, FRAME_SIZE);

+    biquad(n, mem_resp_n, n, b_noise, a_noise, FRAME_SIZE);

+    for (i=0;i<FRAME_SIZE;i++) xn[i] = x[i] + n[i];

+    if (E > 1e9f) {

+      vad_cnt=0;

+    } else if (E > 1e8f) {

+      vad_cnt -= 5;

+    } else if (E > 1e7f) {

+      vad_cnt++;

+    } else {

+      vad_cnt+=2;

+    }

+    if (vad_cnt < 0) vad_cnt = 0;

+    if (vad_cnt > 15) vad_cnt = 15;

+    if (vad_cnt >= 10) vad = 0;

+    else if (vad_cnt > 0) vad = 0.5f;

+    else vad = 1.f;

+    frame_analysis(st, Y, Ey, x);

+    frame_analysis(noise_state, N, En, n);

+    for (i=0;i<NB_BANDS;i++) Ln[i] = log10(1e-2+En[i]);

+    int silence = compute_frame_features(noisy, X, P, Ex, Ep, Exp, features, xn);

+    pitch_filter(X, P, Ex, Ep, Exp, g);

+    //printf("%f %d\n", noisy->last_gain, noisy->last_period);

+    for (i=0;i<NB_BANDS;i++) {

+      g[i] = sqrt((Ey[i]+1e-3)/(Ex[i]+1e-3));

+      if (g[i] > 1) g[i] = 1;

+      if (silence || i > band_lp) g[i] = -1;

+      if (Ey[i] < 5e-2 && Ex[i] < 5e-2) g[i] = -1;

+      if (vad==0 && noise_gain==0) g[i] = -1;

+    }

+    count++;

+#if 0

+    for (i=0;i<NB_FEATURES;i++) printf("%f ", features[i]);

+    for (i=0;i<NB_BANDS;i++) printf("%f ", g[i]);

+    for (i=0;i<NB_BANDS;i++) printf("%f ", Ln[i]);

+    printf("%f\n", vad);

+#endif

+#if 1

+    fwrite(features, sizeof(float), NB_FEATURES, stdout);

+    fwrite(g, sizeof(float), NB_BANDS, stdout);

+    fwrite(Ln, sizeof(float), NB_BANDS, stdout);

+    fwrite(&vad, sizeof(float), 1, stdout);

+#endif

+#if 0

+    compute_rnn(&noisy->rnn, g, &vad_prob, features);

+    interp_band_gain(gf, g);

+#if 1

+    for (i=0;i<FREQ_SIZE;i++) {

+      X[i].r *= gf[i];

+      X[i].i *= gf[i];

+    }

+#endif

+    frame_synthesis(noisy, xn, X);

+    for (i=0;i<FRAME_SIZE;i++) tmp[i] = xn[i];

+    fwrite(tmp, sizeof(short), FRAME_SIZE, fout);

+#endif

+  }

+  fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1);

+  fclose(f1);

+  fclose(f2);

+  fclose(fout);

+  return 0;

+}

+#endif

--- /dev/null

+++ b/dnn/kiss_fft.c

@@ -1,0 +1,601 @@

+/*Copyright (c) 2003-2004, Mark Borgerding

+  Lots of modifications by Jean-Marc Valin

+  Copyright (c) 2005-2007, Xiph.Org Foundation

+  Copyright (c) 2008,      Xiph.Org Foundation, CSIRO

+  All rights reserved.

+  Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions are met:

+    * Redistributions of source code must retain the above copyright notice,

+       this list of conditions and the following disclaimer.

+    * Redistributions in binary form must reproduce the above copyright notice,

+       this list of conditions and the following disclaimer in the

+       documentation and/or other materials provided with the distribution.

+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

+  POSSIBILITY OF SUCH DAMAGE.*/

+/* This code is originally from Mark Borgerding's KISS-FFT but has been

+   heavily modified to better suit Opus */

+#ifndef SKIP_CONFIG_H

+#  ifdef HAVE_CONFIG_H

+#    include "config.h"

+#  endif

+#endif

+#include "_kiss_fft_guts.h"

+#define CUSTOM_MODES

+/* The guts header contains all the multiplication and addition macros that are defined for

+   complex numbers.  It also delares the kf_ internal functions.

+*/

+static void kf_bfly2(

+                     kiss_fft_cpx * Fout,

+                     int m,

+                     int N

+                    )

+{

+   kiss_fft_cpx * Fout2;

+   int i;

+   (void)m;

+#ifdef CUSTOM_MODES

+   if (m==1)

+   {

+      celt_assert(m==1);

+      for (i=0;i<N;i++)

+      {

+         kiss_fft_cpx t;

+         Fout2 = Fout + 1;

+         t = *Fout2;

+         C_SUB( *Fout2 ,  *Fout , t );

+         C_ADDTO( *Fout ,  t );

+         Fout += 2;

+      }

+   } else

+#endif

+   {

+      opus_val16 tw;

+      tw = QCONST16(0.7071067812f, 15);

+      /* We know that m==4 here because the radix-2 is just after a radix-4 */

+      celt_assert(m==4);

+      for (i=0;i<N;i++)

+      {

+         kiss_fft_cpx t;

+         Fout2 = Fout + 4;

+         t = Fout2[0];

+         C_SUB( Fout2[0] ,  Fout[0] , t );

+         C_ADDTO( Fout[0] ,  t );

+         t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);

+         t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);

+         C_SUB( Fout2[1] ,  Fout[1] , t );

+         C_ADDTO( Fout[1] ,  t );

+         t.r = Fout2[2].i;

+         t.i = -Fout2[2].r;

+         C_SUB( Fout2[2] ,  Fout[2] , t );

+         C_ADDTO( Fout[2] ,  t );

+         t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);

+         t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);

+         C_SUB( Fout2[3] ,  Fout[3] , t );

+         C_ADDTO( Fout[3] ,  t );

+         Fout += 8;

+      }

+   }

+}

+static void kf_bfly4(

+                     kiss_fft_cpx * Fout,

+                     const size_t fstride,

+                     const kiss_fft_state *st,

+                     int m,

+                     int N,

+                     int mm

+                    )

+{

+   int i;

+   if (m==1)

+   {

+      /* Degenerate case where all the twiddles are 1. */

+      for (i=0;i<N;i++)

+      {

+         kiss_fft_cpx scratch0, scratch1;

+         C_SUB( scratch0 , *Fout, Fout[2] );

+         C_ADDTO(*Fout, Fout[2]);

+         C_ADD( scratch1 , Fout[1] , Fout[3] );

+         C_SUB( Fout[2], *Fout, scratch1 );

+         C_ADDTO( *Fout , scratch1 );

+         C_SUB( scratch1 , Fout[1] , Fout[3] );

+         Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);

+         Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);

+         Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);

+         Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);

+         Fout+=4;

+      }

+   } else {

+      int j;

+      kiss_fft_cpx scratch[6];

+      const kiss_twiddle_cpx *tw1,*tw2,*tw3;

+      const int m2=2*m;

+      const int m3=3*m;

+      kiss_fft_cpx * Fout_beg = Fout;

+      for (i=0;i<N;i++)

+      {

+         Fout = Fout_beg + i*mm;

+         tw3 = tw2 = tw1 = st->twiddles;

+         /* m is guaranteed to be a multiple of 4. */

+         for (j=0;j<m;j++)

+         {

+            C_MUL(scratch[0],Fout[m] , *tw1 );

+            C_MUL(scratch[1],Fout[m2] , *tw2 );

+            C_MUL(scratch[2],Fout[m3] , *tw3 );

+            C_SUB( scratch[5] , *Fout, scratch[1] );

+            C_ADDTO(*Fout, scratch[1]);

+            C_ADD( scratch[3] , scratch[0] , scratch[2] );

+            C_SUB( scratch[4] , scratch[0] , scratch[2] );

+            C_SUB( Fout[m2], *Fout, scratch[3] );

+            tw1 += fstride;

+            tw2 += fstride*2;

+            tw3 += fstride*3;

+            C_ADDTO( *Fout , scratch[3] );

+            Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);

+            Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);

+            Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);

+            Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);

+            ++Fout;

+         }

+      }

+   }

+}

+#ifndef RADIX_TWO_ONLY

+static void kf_bfly3(

+                     kiss_fft_cpx * Fout,

+                     const size_t fstride,

+                     const kiss_fft_state *st,

+                     int m,

+                     int N,

+                     int mm

+                    )

+{

+   int i;

+   size_t k;

+   const size_t m2 = 2*m;

+   const kiss_twiddle_cpx *tw1,*tw2;

+   kiss_fft_cpx scratch[5];

+   kiss_twiddle_cpx epi3;

+   kiss_fft_cpx * Fout_beg = Fout;

+#ifdef FIXED_POINT

+   /*epi3.r = -16384;*/ /* Unused */

+   epi3.i = -28378;

+#else

+   epi3 = st->twiddles[fstride*m];

+#endif

+   for (i=0;i<N;i++)

+   {

+      Fout = Fout_beg + i*mm;

+      tw1=tw2=st->twiddles;

+      /* For non-custom modes, m is guaranteed to be a multiple of 4. */

+      k=m;

+      do {

+         C_MUL(scratch[1],Fout[m] , *tw1);

+         C_MUL(scratch[2],Fout[m2] , *tw2);

+         C_ADD(scratch[3],scratch[1],scratch[2]);

+         C_SUB(scratch[0],scratch[1],scratch[2]);

+         tw1 += fstride;

+         tw2 += fstride*2;

+         Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));

+         Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));

+         C_MULBYSCALAR( scratch[0] , epi3.i );

+         C_ADDTO(*Fout,scratch[3]);

+         Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);

+         Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);

+         Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);

+         Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);

+         ++Fout;

+      } while(--k);

+   }

+}

+#ifndef OVERRIDE_kf_bfly5

+static void kf_bfly5(

+                     kiss_fft_cpx * Fout,

+                     const size_t fstride,

+                     const kiss_fft_state *st,

+                     int m,

+                     int N,

+                     int mm

+                    )

+{

+   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;

+   int i, u;

+   kiss_fft_cpx scratch[13];

+   const kiss_twiddle_cpx *tw;

+   kiss_twiddle_cpx ya,yb;

+   kiss_fft_cpx * Fout_beg = Fout;

+#ifdef FIXED_POINT

+   ya.r = 10126;

+   ya.i = -31164;

+   yb.r = -26510;

+   yb.i = -19261;

+#else

+   ya = st->twiddles[fstride*m];

+   yb = st->twiddles[fstride*2*m];

+#endif

+   tw=st->twiddles;

+   for (i=0;i<N;i++)

+   {

+      Fout = Fout_beg + i*mm;

+      Fout0=Fout;

+      Fout1=Fout0+m;

+      Fout2=Fout0+2*m;

+      Fout3=Fout0+3*m;

+      Fout4=Fout0+4*m;

+      /* For non-custom modes, m is guaranteed to be a multiple of 4. */

+      for ( u=0; u<m; ++u ) {

+         scratch[0] = *Fout0;

+         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);

+         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);

+         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);

+         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);

+         C_ADD( scratch[7],scratch[1],scratch[4]);

+         C_SUB( scratch[10],scratch[1],scratch[4]);

+         C_ADD( scratch[8],scratch[2],scratch[3]);

+         C_SUB( scratch[9],scratch[2],scratch[3]);

+         Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));

+         Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));

+         scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));

+         scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));

+         scratch[6].r =  ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));

+         scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));

+         C_SUB(*Fout1,scratch[5],scratch[6]);

+         C_ADD(*Fout4,scratch[5],scratch[6]);

+         scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));

+         scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));

+         scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));

+         scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));

+         C_ADD(*Fout2,scratch[11],scratch[12]);

+         C_SUB(*Fout3,scratch[11],scratch[12]);

+         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;

+      }

+   }

+}

+#endif /* OVERRIDE_kf_bfly5 */

+#endif

+#ifdef CUSTOM_MODES

+static

+void compute_bitrev_table(

+         int Fout,

+         opus_int16 *f,

+         const size_t fstride,

+         int in_stride,

+         opus_int16 * factors,

+         const kiss_fft_state *st

+            )

+{

+   const int p=*factors++; /* the radix  */

+   const int m=*factors++; /* stage's fft length/p */

+    /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/

+   if (m==1)

+   {

+      int j;

+      for (j=0;j<p;j++)

+      {

+         *f = Fout+j;

+         f += fstride*in_stride;

+      }

+   } else {

+      int j;

+      for (j=0;j<p;j++)

+      {

+         compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);

+         f += fstride*in_stride;

+         Fout += m;

+      }

+   }

+}

+/*  facbuf is populated by p1,m1,p2,m2, ...

+    where

+    p[i] * m[i] = m[i-1]

+    m0 = n                  */

+static

+int kf_factor(int n,opus_int16 * facbuf)

+{

+    int p=4;

+    int i;

+    int stages=0;

+    int nbak = n;

+    /*factor out powers of 4, powers of 2, then any remaining primes */

+    do {

+        while (n % p) {

+            switch (p) {

+                case 4: p = 2; break;

+                case 2: p = 3; break;

+                default: p += 2; break;

+            }

+            if (p>32000 || (opus_int32)p*(opus_int32)p > n)

+                p = n;          /* no more factors, skip to end */

+        }

+        n /= p;

+#ifdef RADIX_TWO_ONLY

+        if (p!=2 && p != 4)

+#else

+        if (p>5)

+#endif

+        {

+           return 0;

+        }

+        facbuf[2*stages] = p;

+        if (p==2 && stages > 1)

+        {

+           facbuf[2*stages] = 4;

+           facbuf[2] = 2;

+        }

+        stages++;

+    } while (n > 1);

+    n = nbak;

+    /* Reverse the order to get the radix 4 at the end, so we can use the

+       fast degenerate case. It turns out that reversing the order also

+       improves the noise behaviour. */

+    for (i=0;i<stages/2;i++)

+    {

+       int tmp;

+       tmp = facbuf[2*i];

+       facbuf[2*i] = facbuf[2*(stages-i-1)];

+       facbuf[2*(stages-i-1)] = tmp;

+    }

+    for (i=0;i<stages;i++)

+    {

+        n /= facbuf[2*i];

+        facbuf[2*i+1] = n;

+    }

+    return 1;

+}

+static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)

+{

+   int i;

+#ifdef FIXED_POINT

+   for (i=0;i<nfft;++i) {

+      opus_val32 phase = -i;

+      kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));

+   }

+#else

+   for (i=0;i<nfft;++i) {

+      const double pi=3.14159265358979323846264338327;

+      double phase = ( -2*pi /nfft ) * i;

+      kf_cexp(twiddles+i, phase );

+   }

+#endif

+}

+int opus_fft_alloc_arch_c(kiss_fft_state *st) {

+   (void)st;

+   return 0;

+}

+/*

+ *

+ * Allocates all necessary storage space for the fft and ifft.

+ * The return value is a contiguous block of memory.  As such,

+ * It can be freed with free().

+ * */

+kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,

+                                        const kiss_fft_state *base, int arch)

+{

+    kiss_fft_state *st=NULL;

+    size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/

+    if ( lenmem==NULL ) {

+        st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );

+    }else{

+        if (mem != NULL && *lenmem >= memneeded)

+            st = (kiss_fft_state*)mem;

+        *lenmem = memneeded;

+    }

+    if (st) {

+        opus_int16 *bitrev;

+        kiss_twiddle_cpx *twiddles;

+        st->nfft=nfft;

+#ifdef FIXED_POINT

+        st->scale_shift = celt_ilog2(st->nfft);

+        if (st->nfft == 1<<st->scale_shift)

+           st->scale = Q15ONE;

+        else

+           st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);

+#else

+        st->scale = 1.f/nfft;

+#endif

+        if (base != NULL)

+        {

+           st->twiddles = base->twiddles;

+           st->shift = 0;

+           while (st->shift < 32 && nfft<<st->shift != base->nfft)

+              st->shift++;

+           if (st->shift>=32)

+              goto fail;

+        } else {

+           st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);

+           compute_twiddles(twiddles, nfft);

+           st->shift = -1;

+        }

+        if (!kf_factor(nfft,st->factors))

+        {

+           goto fail;

+        }

+        /* bitrev */

+        st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);

+        if (st->bitrev==NULL)

+            goto fail;

+        compute_bitrev_table(0, bitrev, 1,1, st->factors,st);

+        /* Initialize architecture specific fft parameters */

+        if (opus_fft_alloc_arch(st, arch))

+            goto fail;

+    }

+    return st;

+fail:

+    opus_fft_free(st, arch);

+    return NULL;

+}

+kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)

+{

+   return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);

+}

+void opus_fft_free_arch_c(kiss_fft_state *st) {

+   (void)st;

+}

+void opus_fft_free(const kiss_fft_state *cfg, int arch)

+{

+   if (cfg)

+   {

+      opus_fft_free_arch((kiss_fft_state *)cfg, arch);

+      opus_free((opus_int16*)cfg->bitrev);

+      if (cfg->shift < 0)

+         opus_free((kiss_twiddle_cpx*)cfg->twiddles);

+      opus_free((kiss_fft_state*)cfg);

+   }

+}

+#endif /* CUSTOM_MODES */

+void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)

+{

+    int m2, m;

+    int p;

+    int L;

+    int fstride[MAXFACTORS];

+    int i;

+    int shift;

+    /* st->shift can be -1 */

+    shift = st->shift>0 ? st->shift : 0;

+    fstride[0] = 1;

+    L=0;

+    do {

+       p = st->factors[2*L];

+       m = st->factors[2*L+1];

+       fstride[L+1] = fstride[L]*p;

+       L++;

+    } while(m!=1);

+    m = st->factors[2*L-1];

+    for (i=L-1;i>=0;i--)

+    {

+       if (i!=0)

+          m2 = st->factors[2*i-1];

+       else

+          m2 = 1;

+       switch (st->factors[2*i])

+       {

+       case 2:

+          kf_bfly2(fout, m, fstride[i]);

+          break;

+       case 4:

+          kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);

+          break;

+ #ifndef RADIX_TWO_ONLY

+       case 3:

+          kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);

+          break;

+       case 5:

+          kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);

+          break;

+ #endif

+       }

+       m = m2;

+    }

+}

+void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)

+{

+   int i;

+   opus_val16 scale;

+#ifdef FIXED_POINT

+   /* Allows us to scale with MULT16_32_Q16(), which is faster than

+      MULT16_32_Q15() on ARM. */

+   int scale_shift = st->scale_shift-1;

+#endif

+   scale = st->scale;

+   celt_assert2 (fin != fout, "In-place FFT not supported");

+   /* Bit-reverse the input */

+   for (i=0;i<st->nfft;i++)

+   {

+      kiss_fft_cpx x = fin[i];

+      fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);

+      fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);

+   }

+   opus_fft_impl(st, fout);

+}

+void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)

+{

+   int i;

+   celt_assert2 (fin != fout, "In-place FFT not supported");

+   /* Bit-reverse the input */

+   for (i=0;i<st->nfft;i++)

+      fout[st->bitrev[i]] = fin[i];

+   for (i=0;i<st->nfft;i++)

+      fout[i].i = -fout[i].i;

+   opus_fft_impl(st, fout);

+   for (i=0;i<st->nfft;i++)

+      fout[i].i = -fout[i].i;

+}

--- /dev/null

+++ b/dnn/kiss_fft.h

@@ -1,0 +1,203 @@

+/*Copyright (c) 2003-2004, Mark Borgerding

+  Lots of modifications by Jean-Marc Valin

+  Copyright (c) 2005-2007, Xiph.Org Foundation

+  Copyright (c) 2008,      Xiph.Org Foundation, CSIRO

+  All rights reserved.

+  Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions are met:

+    * Redistributions of source code must retain the above copyright notice,

+       this list of conditions and the following disclaimer.

+    * Redistributions in binary form must reproduce the above copyright notice,

+       this list of conditions and the following disclaimer in the

+       documentation and/or other materials provided with the distribution.

+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

+  POSSIBILITY OF SUCH DAMAGE.*/

+#ifndef KISS_FFT_H

+#define KISS_FFT_H

+#include <stdlib.h>

+#include <math.h>

+#include "arch.h"

+#include <stdlib.h>

+#define opus_alloc(x) malloc(x)

+#define opus_free(x) free(x)

+#ifdef __cplusplus

+extern "C" {

+#endif

+#ifdef USE_SIMD

+# include <xmmintrin.h>

+# define kiss_fft_scalar __m128

+#define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes)

+#else

+#define KISS_FFT_MALLOC opus_alloc

+#endif

+#ifdef FIXED_POINT

+#include "arch.h"

+#  define kiss_fft_scalar opus_int32

+#  define kiss_twiddle_scalar opus_int16

+#else

+# ifndef kiss_fft_scalar

+/*  default is float */

+#   define kiss_fft_scalar float

+#   define kiss_twiddle_scalar float

+#   define KF_SUFFIX _celt_single

+# endif

+#endif

+typedef struct {

+    kiss_fft_scalar r;

+    kiss_fft_scalar i;

+}kiss_fft_cpx;

+typedef struct {

+   kiss_twiddle_scalar r;

+   kiss_twiddle_scalar i;

+}kiss_twiddle_cpx;

+#define MAXFACTORS 8

+/* e.g. an fft of length 128 has 4 factors

+ as far as kissfft is concerned

+ 4*4*4*2

+ */

+typedef struct arch_fft_state{

+   int is_supported;

+   void *priv;

+} arch_fft_state;

+typedef struct kiss_fft_state{

+    int nfft;

+    opus_val16 scale;

+#ifdef FIXED_POINT

+    int scale_shift;

+#endif

+    int shift;

+    opus_int16 factors[2*MAXFACTORS];

+    const opus_int16 *bitrev;

+    const kiss_twiddle_cpx *twiddles;

+    arch_fft_state *arch_fft;

+} kiss_fft_state;

+#if defined(HAVE_ARM_NE10)

+#include "arm/fft_arm.h"

+#endif

+/*typedef struct kiss_fft_state* kiss_fft_cfg;*/

+/**

+ *  opus_fft_alloc

+ *

+ *  Initialize a FFT (or IFFT) algorithm's cfg/state buffer.

+ *

+ *  typical usage:      kiss_fft_cfg mycfg=opus_fft_alloc(1024,0,NULL,NULL);

+ *

+ *  The return value from fft_alloc is a cfg buffer used internally

+ *  by the fft routine or NULL.

+ *

+ *  If lenmem is NULL, then opus_fft_alloc will allocate a cfg buffer using malloc.

+ *  The returned value should be free()d when done to avoid memory leaks.

+ *

+ *  The state can be placed in a user supplied buffer 'mem':

+ *  If lenmem is not NULL and mem is not NULL and *lenmem is large enough,

+ *      then the function places the cfg in mem and the size used in *lenmem

+ *      and returns mem.

+ *

+ *  If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),

+ *      then the function returns NULL and places the minimum cfg

+ *      buffer size in *lenmem.

+ * */

+kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch);

+kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch);

+/**

+ * opus_fft(cfg,in_out_buf)

+ *

+ * Perform an FFT on a complex input buffer.

+ * for a forward FFT,

+ * fin should be  f[0] , f[1] , ... ,f[nfft-1]

+ * fout will be   F[0] , F[1] , ... ,F[nfft-1]

+ * Note that each element is complex and can be accessed like

+    f[k].r and f[k].i

+ * */

+void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);

+void opus_ifft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);

+void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);

+void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);

+void opus_fft_free(const kiss_fft_state *cfg, int arch);

+void opus_fft_free_arch_c(kiss_fft_state *st);

+int opus_fft_alloc_arch_c(kiss_fft_state *st);

+#if !defined(OVERRIDE_OPUS_FFT)

+/* Is run-time CPU detection enabled on this platform? */

+#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10))

+extern int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(

+ kiss_fft_state *st);

+#define opus_fft_alloc_arch(_st, arch) \

+         ((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))

+extern void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(

+ kiss_fft_state *st);

+#define opus_fft_free_arch(_st, arch) \

+         ((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))

+extern void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,

+ const kiss_fft_cpx *fin, kiss_fft_cpx *fout);

+#define opus_fft(_cfg, _fin, _fout, arch) \

+   ((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))

+extern void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,

+ const kiss_fft_cpx *fin, kiss_fft_cpx *fout);

+#define opus_ifft(_cfg, _fin, _fout, arch) \

+   ((*OPUS_IFFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))

+#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */

+#define opus_fft_alloc_arch(_st, arch) \

+         ((void)(arch), opus_fft_alloc_arch_c(_st))

+#define opus_fft_free_arch(_st, arch) \

+         ((void)(arch), opus_fft_free_arch_c(_st))

+#define opus_fft(_cfg, _fin, _fout, arch) \

+         ((void)(arch), opus_fft_c(_cfg, _fin, _fout))

+#define opus_ifft(_cfg, _fin, _fout, arch) \

+         ((void)(arch), opus_ifft_c(_cfg, _fin, _fout))

+#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */

+#endif /* end if !defined(OVERRIDE_OPUS_FFT) */

+#ifdef __cplusplus

+}

+#endif

+#endif

--- /dev/null

+++ b/dnn/opus_types.h

@@ -1,0 +1,159 @@

+/* (C) COPYRIGHT 1994-2002 Xiph.Org Foundation */

+/* Modified by Jean-Marc Valin */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+/* opus_types.h based on ogg_types.h from libogg */

+/**

+   @file opus_types.h

+   @brief Opus reference implementation types

+*/

+#ifndef OPUS_TYPES_H

+#define OPUS_TYPES_H

+/* Use the real stdint.h if it's there (taken from Paul Hsieh's pstdint.h) */

+#if (defined(__STDC__) && __STDC__ && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H))

+#include <stdint.h>

+   typedef int16_t opus_int16;

+   typedef uint16_t opus_uint16;

+   typedef int32_t opus_int32;

+   typedef uint32_t opus_uint32;

+#elif defined(_WIN32)

+#  if defined(__CYGWIN__)

+#    include <_G_config.h>

+     typedef _G_int32_t opus_int32;

+     typedef _G_uint32_t opus_uint32;

+     typedef _G_int16 opus_int16;

+     typedef _G_uint16 opus_uint16;

+#  elif defined(__MINGW32__)

+     typedef short opus_int16;

+     typedef unsigned short opus_uint16;

+     typedef int opus_int32;

+     typedef unsigned int opus_uint32;

+#  elif defined(__MWERKS__)

+     typedef int opus_int32;

+     typedef unsigned int opus_uint32;

+     typedef short opus_int16;

+     typedef unsigned short opus_uint16;

+#  else

+     /* MSVC/Borland */

+     typedef __int32 opus_int32;

+     typedef unsigned __int32 opus_uint32;

+     typedef __int16 opus_int16;

+     typedef unsigned __int16 opus_uint16;

+#  endif

+#elif defined(__MACOS__)

+#  include <sys/types.h>

+   typedef SInt16 opus_int16;

+   typedef UInt16 opus_uint16;

+   typedef SInt32 opus_int32;

+   typedef UInt32 opus_uint32;

+#elif (defined(__APPLE__) && defined(__MACH__)) /* MacOS X Framework build */

+#  include <sys/types.h>

+   typedef int16_t opus_int16;

+   typedef u_int16_t opus_uint16;

+   typedef int32_t opus_int32;

+   typedef u_int32_t opus_uint32;

+#elif defined(__BEOS__)

+   /* Be */

+#  include <inttypes.h>

+   typedef int16 opus_int16;

+   typedef u_int16 opus_uint16;

+   typedef int32_t opus_int32;

+   typedef u_int32_t opus_uint32;

+#elif defined (__EMX__)

+   /* OS/2 GCC */

+   typedef short opus_int16;

+   typedef unsigned short opus_uint16;

+   typedef int opus_int32;

+   typedef unsigned int opus_uint32;

+#elif defined (DJGPP)

+   /* DJGPP */

+   typedef short opus_int16;

+   typedef unsigned short opus_uint16;

+   typedef int opus_int32;

+   typedef unsigned int opus_uint32;

+#elif defined(R5900)

+   /* PS2 EE */

+   typedef int opus_int32;

+   typedef unsigned opus_uint32;

+   typedef short opus_int16;

+   typedef unsigned short opus_uint16;

+#elif defined(__SYMBIAN32__)

+   /* Symbian GCC */

+   typedef signed short opus_int16;

+   typedef unsigned short opus_uint16;

+   typedef signed int opus_int32;

+   typedef unsigned int opus_uint32;

+#elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)

+   typedef short opus_int16;

+   typedef unsigned short opus_uint16;

+   typedef long opus_int32;

+   typedef unsigned long opus_uint32;

+#elif defined(CONFIG_TI_C6X)

+   typedef short opus_int16;

+   typedef unsigned short opus_uint16;

+   typedef int opus_int32;

+   typedef unsigned int opus_uint32;

+#else

+   /* Give up, take a reasonable guess */

+   typedef short opus_int16;

+   typedef unsigned short opus_uint16;

+   typedef int opus_int32;

+   typedef unsigned int opus_uint32;

+#endif

+#define opus_int         int                     /* used for counters etc; at least 16 bits */

+#define opus_int64       long long

+#define opus_int8        signed char

+#define opus_uint        unsigned int            /* used for counters etc; at least 16 bits */

+#define opus_uint64      unsigned long long

+#define opus_uint8       unsigned char

+#endif  /* OPUS_TYPES_H */

--- /dev/null

+++ b/dnn/pitch.c

@@ -1,0 +1,526 @@

+/* Copyright (c) 2007-2008 CSIRO

+   Copyright (c) 2007-2009 Xiph.Org Foundation

+   Written by Jean-Marc Valin */

+/**

+   @file pitch.c

+   @brief Pitch analysis

+ */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include "pitch.h"

+#include "common.h"

+//#include "modes.h"

+//#include "stack_alloc.h"

+//#include "mathops.h"

+#include "celt_lpc.h"

+#include "math.h"

+static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,

+                            int max_pitch, int *best_pitch

+#ifdef FIXED_POINT

+                            , int yshift, opus_val32 maxcorr

+#endif

+                            )

+{

+   int i, j;

+   opus_val32 Syy=1;

+   opus_val16 best_num[2];

+   opus_val32 best_den[2];

+#ifdef FIXED_POINT

+   int xshift;

+   xshift = celt_ilog2(maxcorr)-14;

+#endif

+   best_num[0] = -1;

+   best_num[1] = -1;

+   best_den[0] = 0;

+   best_den[1] = 0;

+   best_pitch[0] = 0;

+   best_pitch[1] = 1;

+   for (j=0;j<len;j++)

+      Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift));

+   for (i=0;i<max_pitch;i++)

+   {

+      if (xcorr[i]>0)

+      {

+         opus_val16 num;

+         opus_val32 xcorr16;

+         xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift));

+#ifndef FIXED_POINT

+         /* Considering the range of xcorr16, this should avoid both underflows

+            and overflows (inf) when squaring xcorr16 */

+         xcorr16 *= 1e-12f;

+#endif

+         num = MULT16_16_Q15(xcorr16,xcorr16);

+         if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))

+         {

+            if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy))

+            {

+               best_num[1] = best_num[0];

+               best_den[1] = best_den[0];

+               best_pitch[1] = best_pitch[0];

+               best_num[0] = num;

+               best_den[0] = Syy;

+               best_pitch[0] = i;

+            } else {

+               best_num[1] = num;

+               best_den[1] = Syy;

+               best_pitch[1] = i;

+            }

+         }

+      }

+      Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);

+      Syy = MAX32(1, Syy);

+   }

+}

+static void celt_fir5(const opus_val16 *x,

+         const opus_val16 *num,

+         opus_val16 *y,

+         int N,

+         opus_val16 *mem)

+{

+   int i;

+   opus_val16 num0, num1, num2, num3, num4;

+   opus_val32 mem0, mem1, mem2, mem3, mem4;

+   num0=num[0];

+   num1=num[1];

+   num2=num[2];

+   num3=num[3];

+   num4=num[4];

+   mem0=mem[0];

+   mem1=mem[1];

+   mem2=mem[2];

+   mem3=mem[3];

+   mem4=mem[4];

+   for (i=0;i<N;i++)

+   {

+      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);

+      sum = MAC16_16(sum,num0,mem0);

+      sum = MAC16_16(sum,num1,mem1);

+      sum = MAC16_16(sum,num2,mem2);

+      sum = MAC16_16(sum,num3,mem3);

+      sum = MAC16_16(sum,num4,mem4);

+      mem4 = mem3;

+      mem3 = mem2;

+      mem2 = mem1;

+      mem1 = mem0;

+      mem0 = x[i];

+      y[i] = ROUND16(sum, SIG_SHIFT);

+   }

+   mem[0]=mem0;

+   mem[1]=mem1;

+   mem[2]=mem2;

+   mem[3]=mem3;

+   mem[4]=mem4;

+}

+void pitch_downsample(celt_sig *x[], opus_val16 *x_lp,

+      int len, int C)

+{

+   int i;

+   opus_val32 ac[5];

+   opus_val16 tmp=Q15ONE;

+   opus_val16 lpc[4], mem[5]={0,0,0,0,0};

+   opus_val16 lpc2[5];

+   opus_val16 c1 = QCONST16(.8f,15);

+#ifdef FIXED_POINT

+   int shift;

+   opus_val32 maxabs = celt_maxabs32(x[0], len);

+   if (C==2)

+   {

+      opus_val32 maxabs_1 = celt_maxabs32(x[1], len);

+      maxabs = MAX32(maxabs, maxabs_1);

+   }

+   if (maxabs<1)

+      maxabs=1;

+   shift = celt_ilog2(maxabs)-10;

+   if (shift<0)

+      shift=0;

+   if (C==2)

+      shift++;

+#endif

+   for (i=1;i<len>>1;i++)

+      x_lp[i] = SHR32(HALF32(HALF32(x[0][(2*i-1)]+x[0][(2*i+1)])+x[0][2*i]), shift);

+   x_lp[0] = SHR32(HALF32(HALF32(x[0][1])+x[0][0]), shift);

+   if (C==2)

+   {

+      for (i=1;i<len>>1;i++)

+         x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift);

+      x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift);

+   }

+   _celt_autocorr(x_lp, ac, NULL, 0,

+                  4, len>>1);

+   /* Noise floor -40 dB */

+#ifdef FIXED_POINT

+   ac[0] += SHR32(ac[0],13);

+#else

+   ac[0] *= 1.0001f;

+#endif

+   /* Lag windowing */

+   for (i=1;i<=4;i++)

+   {

+      /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/

+#ifdef FIXED_POINT

+      ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);

+#else

+      ac[i] -= ac[i]*(.008f*i)*(.008f*i);

+#endif

+   }

+   _celt_lpc(lpc, ac, 4);

+   for (i=0;i<4;i++)

+   {

+      tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);

+      lpc[i] = MULT16_16_Q15(lpc[i], tmp);

+   }

+   /* Add a zero */

+   lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);

+   lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);

+   lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);

+   lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);

+   lpc2[4] = MULT16_16_Q15(c1,lpc[3]);

+   celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);

+}

+void celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,

+      opus_val32 *xcorr, int len, int max_pitch)

+{

+#if 0 /* This is a simple version of the pitch correlation that should work

+         well on DSPs like Blackfin and TI C5x/C6x */

+   int i, j;

+#ifdef FIXED_POINT

+   opus_val32 maxcorr=1;

+#endif

+   for (i=0;i<max_pitch;i++)

+   {

+      opus_val32 sum = 0;

+      for (j=0;j<len;j++)

+         sum = MAC16_16(sum, _x[j], _y[i+j]);

+      xcorr[i] = sum;

+#ifdef FIXED_POINT

+      maxcorr = MAX32(maxcorr, sum);

+#endif

+   }

+#ifdef FIXED_POINT

+   return maxcorr;

+#endif

+#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */

+   int i;

+   /*The EDSP version requires that max_pitch is at least 1, and that _x is

+      32-bit aligned.

+     Since it's hard to put asserts in assembly, put them here.*/

+#ifdef FIXED_POINT

+   opus_val32 maxcorr=1;

+#endif

+   celt_assert(max_pitch>0);

+   celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);

+   for (i=0;i<max_pitch-3;i+=4)

+   {

+      opus_val32 sum[4]={0,0,0,0};

+      xcorr_kernel(_x, _y+i, sum, len);

+      xcorr[i]=sum[0];

+      xcorr[i+1]=sum[1];

+      xcorr[i+2]=sum[2];

+      xcorr[i+3]=sum[3];

+#ifdef FIXED_POINT

+      sum[0] = MAX32(sum[0], sum[1]);

+      sum[2] = MAX32(sum[2], sum[3]);

+      sum[0] = MAX32(sum[0], sum[2]);

+      maxcorr = MAX32(maxcorr, sum[0]);

+#endif

+   }

+   /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */

+   for (;i<max_pitch;i++)

+   {

+      opus_val32 sum;

+      sum = celt_inner_prod(_x, _y+i, len);

+      xcorr[i] = sum;

+#ifdef FIXED_POINT

+      maxcorr = MAX32(maxcorr, sum);

+#endif

+   }

+#ifdef FIXED_POINT

+   return maxcorr;

+#endif

+#endif

+}

+void pitch_search(const opus_val16 *x_lp, opus_val16 *y,

+                  int len, int max_pitch, int *pitch)

+{

+   int i, j;

+   int lag;

+   int best_pitch[2]={0,0};

+#ifdef FIXED_POINT

+   opus_val32 maxcorr;

+   opus_val32 xmax, ymax;

+   int shift=0;

+#endif

+   int offset;

+   celt_assert(len>0);

+   celt_assert(max_pitch>0);

+   lag = len+max_pitch;

+   opus_val16 x_lp4[len>>2];

+   opus_val16 y_lp4[lag>>2];

+   opus_val32 xcorr[max_pitch>>1];

+   /* Downsample by 2 again */

+   for (j=0;j<len>>2;j++)

+      x_lp4[j] = x_lp[2*j];

+   for (j=0;j<lag>>2;j++)

+      y_lp4[j] = y[2*j];

+#ifdef FIXED_POINT

+   xmax = celt_maxabs16(x_lp4, len>>2);

+   ymax = celt_maxabs16(y_lp4, lag>>2);

+   shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11;

+   if (shift>0)

+   {

+      for (j=0;j<len>>2;j++)

+         x_lp4[j] = SHR16(x_lp4[j], shift);

+      for (j=0;j<lag>>2;j++)

+         y_lp4[j] = SHR16(y_lp4[j], shift);

+      /* Use double the shift for a MAC */

+      shift *= 2;

+   } else {

+      shift = 0;

+   }

+#endif

+   /* Coarse search with 4x decimation */

+#ifdef FIXED_POINT

+   maxcorr =

+#endif

+   celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2);

+   find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch

+#ifdef FIXED_POINT

+                   , 0, maxcorr

+#endif

+                   );

+   /* Finer search with 2x decimation */

+#ifdef FIXED_POINT

+   maxcorr=1;

+#endif

+   for (i=0;i<max_pitch>>1;i++)

+   {

+      opus_val32 sum;

+      xcorr[i] = 0;

+      if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)

+         continue;

+#ifdef FIXED_POINT

+      sum = 0;

+      for (j=0;j<len>>1;j++)

+         sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);

+#else

+      sum = celt_inner_prod(x_lp, y+i, len>>1);

+#endif

+      xcorr[i] = MAX32(-1, sum);

+#ifdef FIXED_POINT

+      maxcorr = MAX32(maxcorr, sum);

+#endif

+   }

+   find_best_pitch(xcorr, y, len>>1, max_pitch>>1, best_pitch

+#ifdef FIXED_POINT

+                   , shift+1, maxcorr

+#endif

+                   );

+   /* Refine by pseudo-interpolation */

+   if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1)

+   {

+      opus_val32 a, b, c;

+      a = xcorr[best_pitch[0]-1];

+      b = xcorr[best_pitch[0]];

+      c = xcorr[best_pitch[0]+1];

+      if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a))

+         offset = 1;

+      else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c))

+         offset = -1;

+      else

+         offset = 0;

+   } else {

+      offset = 0;

+   }

+   *pitch = 2*best_pitch[0]-offset;

+}

+#ifdef FIXED_POINT

+static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)

+{

+   opus_val32 x2y2;

+   int sx, sy, shift;

+   opus_val32 g;

+   opus_val16 den;

+   if (xy == 0 || xx == 0 || yy == 0)

+      return 0;

+   sx = celt_ilog2(xx)-14;

+   sy = celt_ilog2(yy)-14;

+   shift = sx + sy;

+   x2y2 = SHR32(MULT16_16(VSHR32(xx, sx), VSHR32(yy, sy)), 14);

+   if (shift & 1) {

+      if (x2y2 < 32768)

+      {

+         x2y2 <<= 1;

+         shift--;

+      } else {

+         x2y2 >>= 1;

+         shift++;

+      }

+   }

+   den = celt_rsqrt_norm(x2y2);

+   g = MULT16_32_Q15(den, xy);

+   g = VSHR32(g, (shift>>1)-1);

+   return EXTRACT16(MIN32(g, Q15ONE));

+}

+#else

+static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)

+{

+   return xy/sqrt(1+xx*yy);

+}

+#endif

+static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};

+opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,

+      int N, int *T0_, int prev_period, opus_val16 prev_gain)

+{

+   int k, i, T, T0;

+   opus_val16 g, g0;

+   opus_val16 pg;

+   opus_val32 xy,xx,yy,xy2;

+   opus_val32 xcorr[3];

+   opus_val32 best_xy, best_yy;

+   int offset;

+   int minperiod0;

+   minperiod0 = minperiod;

+   maxperiod /= 2;

+   minperiod /= 2;

+   *T0_ /= 2;

+   prev_period /= 2;

+   N /= 2;

+   x += maxperiod;

+   if (*T0_>=maxperiod)

+      *T0_=maxperiod-1;

+   T = T0 = *T0_;

+   opus_val32 yy_lookup[maxperiod+1];

+   dual_inner_prod(x, x, x-T0, N, &xx, &xy);

+   yy_lookup[0] = xx;

+   yy=xx;

+   for (i=1;i<=maxperiod;i++)

+   {

+      yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);

+      yy_lookup[i] = MAX32(0, yy);

+   }

+   yy = yy_lookup[T0];

+   best_xy = xy;

+   best_yy = yy;

+   g = g0 = compute_pitch_gain(xy, xx, yy);

+   /* Look for any pitch at T/k */

+   for (k=2;k<=15;k++)

+   {

+      int T1, T1b;

+      opus_val16 g1;

+      opus_val16 cont=0;

+      opus_val16 thresh;

+      T1 = (2*T0+k)/(2*k);

+      if (T1 < minperiod)

+         break;

+      /* Look for another strong correlation at T1b */

+      if (k==2)

+      {

+         if (T1+T0>maxperiod)

+            T1b = T0;

+         else

+            T1b = T0+T1;

+      } else

+      {

+         T1b = (2*second_check[k]*T0+k)/(2*k);

+      }

+      dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);

+      xy = HALF32(xy + xy2);

+      yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]);

+      g1 = compute_pitch_gain(xy, xx, yy);

+      if (abs(T1-prev_period)<=1)

+         cont = prev_gain;

+      else if (abs(T1-prev_period)<=2 && 5*k*k < T0)

+         cont = HALF16(prev_gain);

+      else

+         cont = 0;

+      thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);

+      /* Bias against very high pitch (very short period) to avoid false-positives

+         due to short-term correlation */

+      if (T1<3*minperiod)

+         thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);

+      else if (T1<2*minperiod)

+         thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont);

+      if (g1 > thresh)

+      {

+         best_xy = xy;

+         best_yy = yy;

+         T = T1;

+         g = g1;

+      }

+   }

+   best_xy = MAX32(0, best_xy);

+   if (best_yy <= best_xy)

+      pg = Q15ONE;

+   else

+      pg = best_xy/(best_yy+1);

+   for (k=0;k<3;k++)

+      xcorr[k] = celt_inner_prod(x, x-(T+k-1), N);

+   if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))

+      offset = 1;

+   else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))

+      offset = -1;

+   else

+      offset = 0;

+   if (pg > g)

+      pg = g;

+   *T0_ = 2*T+offset;

+   if (*T0_<minperiod0)

+      *T0_=minperiod0;

+   return pg;

+}

--- /dev/null

+++ b/dnn/pitch.h

@@ -1,0 +1,149 @@

+/* Copyright (c) 2007-2008 CSIRO

+   Copyright (c) 2007-2009 Xiph.Org Foundation

+   Written by Jean-Marc Valin */

+/**

+   @file pitch.h

+   @brief Pitch analysis

+ */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef PITCH_H

+#define PITCH_H

+//#include "modes.h"

+//#include "cpu_support.h"

+#include "arch.h"

+void pitch_downsample(celt_sig *x[], opus_val16 *x_lp,

+      int len, int C);

+void pitch_search(const opus_val16 *x_lp, opus_val16 *y,

+                  int len, int max_pitch, int *pitch);

+opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,

+      int N, int *T0, int prev_period, opus_val16 prev_gain);

+/* OPT: This is the kernel you really want to optimize. It gets used a lot

+   by the prefilter and by the PLC. */

+static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)

+{

+   int j;

+   opus_val16 y_0, y_1, y_2, y_3;

+   celt_assert(len>=3);

+   y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */

+   y_0=*y++;

+   y_1=*y++;

+   y_2=*y++;

+   for (j=0;j<len-3;j+=4)

+   {

+      opus_val16 tmp;

+      tmp = *x++;

+      y_3=*y++;

+      sum[0] = MAC16_16(sum[0],tmp,y_0);

+      sum[1] = MAC16_16(sum[1],tmp,y_1);

+      sum[2] = MAC16_16(sum[2],tmp,y_2);

+      sum[3] = MAC16_16(sum[3],tmp,y_3);

+      tmp=*x++;

+      y_0=*y++;

+      sum[0] = MAC16_16(sum[0],tmp,y_1);

+      sum[1] = MAC16_16(sum[1],tmp,y_2);

+      sum[2] = MAC16_16(sum[2],tmp,y_3);

+      sum[3] = MAC16_16(sum[3],tmp,y_0);

+      tmp=*x++;

+      y_1=*y++;

+      sum[0] = MAC16_16(sum[0],tmp,y_2);

+      sum[1] = MAC16_16(sum[1],tmp,y_3);

+      sum[2] = MAC16_16(sum[2],tmp,y_0);

+      sum[3] = MAC16_16(sum[3],tmp,y_1);

+      tmp=*x++;

+      y_2=*y++;

+      sum[0] = MAC16_16(sum[0],tmp,y_3);

+      sum[1] = MAC16_16(sum[1],tmp,y_0);

+      sum[2] = MAC16_16(sum[2],tmp,y_1);

+      sum[3] = MAC16_16(sum[3],tmp,y_2);

+   }

+   if (j++<len)

+   {

+      opus_val16 tmp = *x++;

+      y_3=*y++;

+      sum[0] = MAC16_16(sum[0],tmp,y_0);

+      sum[1] = MAC16_16(sum[1],tmp,y_1);

+      sum[2] = MAC16_16(sum[2],tmp,y_2);

+      sum[3] = MAC16_16(sum[3],tmp,y_3);

+   }

+   if (j++<len)

+   {

+      opus_val16 tmp=*x++;

+      y_0=*y++;

+      sum[0] = MAC16_16(sum[0],tmp,y_1);

+      sum[1] = MAC16_16(sum[1],tmp,y_2);

+      sum[2] = MAC16_16(sum[2],tmp,y_3);

+      sum[3] = MAC16_16(sum[3],tmp,y_0);

+   }

+   if (j<len)

+   {

+      opus_val16 tmp=*x++;

+      y_1=*y++;

+      sum[0] = MAC16_16(sum[0],tmp,y_2);

+      sum[1] = MAC16_16(sum[1],tmp,y_3);

+      sum[2] = MAC16_16(sum[2],tmp,y_0);

+      sum[3] = MAC16_16(sum[3],tmp,y_1);

+   }

+}

+static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,

+      int N, opus_val32 *xy1, opus_val32 *xy2)

+{

+   int i;

+   opus_val32 xy01=0;

+   opus_val32 xy02=0;

+   for (i=0;i<N;i++)

+   {

+      xy01 = MAC16_16(xy01, x[i], y01[i]);

+      xy02 = MAC16_16(xy02, x[i], y02[i]);

+   }

+   *xy1 = xy01;

+   *xy2 = xy02;

+}

+/*We make sure a C version is always available for cases where the overhead of

+  vectorization and passing around an arch flag aren't worth it.*/

+static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x,

+      const opus_val16 *y, int N)

+{

+   int i;

+   opus_val32 xy=0;

+   for (i=0;i<N;i++)

+      xy = MAC16_16(xy, x[i], y[i]);

+   return xy;

+}

+void celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,

+      opus_val32 *xcorr, int len, int max_pitch);

+#endif

--- /dev/null

+++ b/dnn/rnnoise.h

@@ -1,0 +1,52 @@

+/* Copyright (c) 2017 Mozilla */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR

+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef RNNOISE_EXPORT

+# if defined(WIN32)

+#  if defined(RNNOISE_BUILD) && defined(DLL_EXPORT)

+#   define RNNOISE_EXPORT __declspec(dllexport)

+#  else

+#   define RNNOISE_EXPORT

+#  endif

+# elif defined(__GNUC__) && defined(RNNOISE_BUILD)

+#  define RNNOISE_EXPORT __attribute__ ((visibility ("default")))

+# else

+#  define RNNOISE_EXPORT

+# endif

+#endif

+typedef struct DenoiseState DenoiseState;

+RNNOISE_EXPORT int rnnoise_get_size();

+RNNOISE_EXPORT int rnnoise_init(DenoiseState *st);

+RNNOISE_EXPORT DenoiseState *rnnoise_create();

+RNNOISE_EXPORT void rnnoise_destroy(DenoiseState *st);

+RNNOISE_EXPORT float rnnoise_process_frame(DenoiseState *st, float *out, const float *in);

--- a/dnn/train_lpcnet.py

+++ b/dnn/train_lpcnet.py

@@ -4,13 +4,20 @@

 import sys

 import numpy as np

 from keras.optimizers import Adam

+from keras.callbacks import ModelCheckpoint

 from ulaw import ulaw2lin, lin2ulaw

-nb_epochs = 10

-batch_size = 32

+import tensorflow as tf

+from keras.backend.tensorflow_backend import set_session

+config = tf.ConfigProto()

+config.gpu_options.per_process_gpu_memory_fraction = 0.44

+set_session(tf.Session(config=config))

+nb_epochs = 40

+batch_size = 64

 model = lpcnet.new_wavernn_model()

-model.compile(optimizer=Adam(0.001), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

+model.compile(optimizer=Adam(0.0008), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

 model.summary()

 pcmfile = sys.argv[1]

@@ -28,4 +35,8 @@

 in_data = np.reshape(in_data, (nb_frames, chunk_size, 1))

 out_data = np.reshape(out_data, (nb_frames, chunk_size, 1))

-model.fit(in_data, out_data, batch_size=batch_size, epochs=nb_epochs, validation_split=0.2)

+checkpoint = ModelCheckpoint('wavernn1f_{epoch:02d}.h5')

+#model.load_weights('wavernn1c_01.h5')

+model.compile(optimizer=Adam(0.002, amsgrad=True, decay=1e-4), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

+model.fit(in_data, out_data, batch_size=batch_size, epochs=30, validation_split=0.2, callbacks=[checkpoint])

--

⑨