shithub: opus

Download patch

ref: b93dbfc0bc04cf793ed87a1b4538ad3c014e911a
parent: f3bc6bacd25a4cd5b69e680348727997b3f177d1
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Wed Dec 1 11:09:09 EST 2021

Adding Burg spectral estimation code

--- a/dnn/Makefile.am
+++ b/dnn/Makefile.am
@@ -8,6 +8,7 @@
 
 lib_LTLIBRARIES = liblpcnet.la
 noinst_HEADERS = arch.h  \
+		 burg.h \
 		 common.h  \
 		 freq.h  \
 		 _kiss_fft_guts.h  \
@@ -25,6 +26,7 @@
 		 vec_neon.h
 
 liblpcnet_la_SOURCES = \
+	burg.c \
 	common.c \
 	kiss99.c \
 	lpcnet.c \
@@ -54,7 +56,7 @@
 #dump_data_SOURCES = dump_data.c
 #dump_data_LDADD = $(DUMP_OBJ) $(LIBM)
 
-dump_data_SOURCES = common.c dump_data.c freq.c kiss_fft.c pitch.c lpcnet_dec.c lpcnet_enc.c ceps_codebooks.c
+dump_data_SOURCES = common.c dump_data.c burg.c freq.c kiss_fft.c pitch.c lpcnet_dec.c lpcnet_enc.c ceps_codebooks.c
 dump_data_LDADD = $(LIBM)
 dump_data_CFLAGS = $(AM_CFLAGS)
 
--- /dev/null
+++ b/dnn/burg.c
@@ -1,0 +1,245 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include <string.h>
+#include <assert.h>
+
+#include "burg.h"
+
+#define MAX_FRAME_SIZE              384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384*/
+#define SILK_MAX_ORDER_LPC          16
+#define FIND_LPC_COND_FAC           1e-5f
+
+/* sum of squares of a silk_float array, with result as double */
+static double silk_energy_FLP(
+    const float    *data,
+    int            dataSize
+)
+{
+    int i;
+    double   result;
+
+    /* 4x unrolled loop */
+    result = 0.0;
+    for( i = 0; i < dataSize - 3; i += 4 ) {
+        result += data[ i + 0 ] * (double)data[ i + 0 ] +
+                  data[ i + 1 ] * (double)data[ i + 1 ] +
+                  data[ i + 2 ] * (double)data[ i + 2 ] +
+                  data[ i + 3 ] * (double)data[ i + 3 ];
+    }
+
+    /* add any remaining products */
+    for( ; i < dataSize; i++ ) {
+        result += data[ i ] * (double)data[ i ];
+    }
+
+    assert( result >= 0.0 );
+    return result;
+}
+
+/* inner product of two silk_float arrays, with result as double */
+static double silk_inner_product_FLP(
+    const float    *data1,
+    const float    *data2,
+    int            dataSize
+)
+{
+    int i;
+    double   result;
+
+    /* 4x unrolled loop */
+    result = 0.0;
+    for( i = 0; i < dataSize - 3; i += 4 ) {
+        result += data1[ i + 0 ] * (double)data2[ i + 0 ] +
+                  data1[ i + 1 ] * (double)data2[ i + 1 ] +
+                  data1[ i + 2 ] * (double)data2[ i + 2 ] +
+                  data1[ i + 3 ] * (double)data2[ i + 3 ];
+    }
+
+    /* add any remaining products */
+    for( ; i < dataSize; i++ ) {
+        result += data1[ i ] * (double)data2[ i ];
+    }
+
+    return result;
+}
+
+
+/* Compute reflection coefficients from input signal */
+float silk_burg_analysis(              /* O    returns residual energy                                     */
+    float          A[],                /* O    prediction coefficients (length order)                      */
+    const float    x[],                /* I    input signal, length: nb_subfr*(D+L_sub)                    */
+    const float    minInvGain,         /* I    minimum inverse prediction gain                             */
+    const int      subfr_length,       /* I    input signal subframe length (incl. D preceding samples)    */
+    const int      nb_subfr,           /* I    number of subframes stacked in x                            */
+    const int      D                   /* I    order                                                       */
+)
+{
+    int         k, n, s, reached_max_gain;
+    double           C0, invGain, num, nrg_f, nrg_b, rc, Atmp, tmp1, tmp2;
+    const float *x_ptr;
+    double           C_first_row[ SILK_MAX_ORDER_LPC ], C_last_row[ SILK_MAX_ORDER_LPC ];
+    double           CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ];
+    double           Af[ SILK_MAX_ORDER_LPC ];
+
+    assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
+
+    /* Compute autocorrelations, added over subframes */
+    C0 = silk_energy_FLP( x, nb_subfr * subfr_length );
+    memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( double ) );
+    for( s = 0; s < nb_subfr; s++ ) {
+        x_ptr = x + s * subfr_length;
+        for( n = 1; n < D + 1; n++ ) {
+            C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n );
+        }
+    }
+    memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) );
+
+    /* Initialize */
+    CAb[ 0 ] = CAf[ 0 ] = C0 + FIND_LPC_COND_FAC * C0 + 1e-9f;
+    invGain = 1.0f;
+    reached_max_gain = 0;
+    for( n = 0; n < D; n++ ) {
+        /* Update first row of correlation matrix (without first element) */
+        /* Update last row of correlation matrix (without last element, stored in reversed order) */
+        /* Update C * Af */
+        /* Update C * flipud(Af) (stored in reversed order) */
+        for( s = 0; s < nb_subfr; s++ ) {
+            x_ptr = x + s * subfr_length;
+            tmp1 = x_ptr[ n ];
+            tmp2 = x_ptr[ subfr_length - n - 1 ];
+            for( k = 0; k < n; k++ ) {
+                C_first_row[ k ] -= x_ptr[ n ] * x_ptr[ n - k - 1 ];
+                C_last_row[ k ]  -= x_ptr[ subfr_length - n - 1 ] * x_ptr[ subfr_length - n + k ];
+                Atmp = Af[ k ];
+                tmp1 += x_ptr[ n - k - 1 ] * Atmp;
+                tmp2 += x_ptr[ subfr_length - n + k ] * Atmp;
+            }
+            for( k = 0; k <= n; k++ ) {
+                CAf[ k ] -= tmp1 * x_ptr[ n - k ];
+                CAb[ k ] -= tmp2 * x_ptr[ subfr_length - n + k - 1 ];
+            }
+        }
+        tmp1 = C_first_row[ n ];
+        tmp2 = C_last_row[ n ];
+        for( k = 0; k < n; k++ ) {
+            Atmp = Af[ k ];
+            tmp1 += C_last_row[  n - k - 1 ] * Atmp;
+            tmp2 += C_first_row[ n - k - 1 ] * Atmp;
+        }
+        CAf[ n + 1 ] = tmp1;
+        CAb[ n + 1 ] = tmp2;
+
+        /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
+        num = CAb[ n + 1 ];
+        nrg_b = CAb[ 0 ];
+        nrg_f = CAf[ 0 ];
+        for( k = 0; k < n; k++ ) {
+            Atmp = Af[ k ];
+            num   += CAb[ n - k ] * Atmp;
+            nrg_b += CAb[ k + 1 ] * Atmp;
+            nrg_f += CAf[ k + 1 ] * Atmp;
+        }
+        assert( nrg_f > 0.0 );
+        assert( nrg_b > 0.0 );
+
+        /* Calculate the next order reflection (parcor) coefficient */
+        rc = -2.0 * num / ( nrg_f + nrg_b );
+        assert( rc > -1.0 && rc < 1.0 );
+
+        /* Update inverse prediction gain */
+        tmp1 = invGain * ( 1.0 - rc * rc );
+        if( tmp1 <= minInvGain ) {
+            /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
+            rc = sqrt( 1.0 - minInvGain / invGain );
+            if( num > 0 ) {
+                /* Ensure adjusted reflection coefficients has the original sign */
+                rc = -rc;
+            }
+            invGain = minInvGain;
+            reached_max_gain = 1;
+        } else {
+            invGain = tmp1;
+        }
+
+        /* Update the AR coefficients */
+        for( k = 0; k < (n + 1) >> 1; k++ ) {
+            tmp1 = Af[ k ];
+            tmp2 = Af[ n - k - 1 ];
+            Af[ k ]         = tmp1 + rc * tmp2;
+            Af[ n - k - 1 ] = tmp2 + rc * tmp1;
+        }
+        Af[ n ] = rc;
+
+        if( reached_max_gain ) {
+            /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
+            for( k = n + 1; k < D; k++ ) {
+                Af[ k ] = 0.0;
+            }
+            break;
+        }
+
+        /* Update C * Af and C * Ab */
+        for( k = 0; k <= n + 1; k++ ) {
+            tmp1 = CAf[ k ];
+            CAf[ k ]          += rc * CAb[ n - k + 1 ];
+            CAb[ n - k + 1  ] += rc * tmp1;
+        }
+    }
+
+    if( reached_max_gain ) {
+        /* Convert to float */
+        for( k = 0; k < D; k++ ) {
+            A[ k ] = (float)( -Af[ k ] );
+        }
+        /* Subtract energy of preceding samples from C0 */
+        for( s = 0; s < nb_subfr; s++ ) {
+            C0 -= silk_energy_FLP( x + s * subfr_length, D );
+        }
+        /* Approximate residual energy */
+        nrg_f = C0 * invGain;
+    } else {
+        /* Compute residual energy and store coefficients as float */
+        nrg_f = CAf[ 0 ];
+        tmp1 = 1.0;
+        for( k = 0; k < D; k++ ) {
+            Atmp = Af[ k ];
+            nrg_f += CAf[ k + 1 ] * Atmp;
+            tmp1  += Atmp * Atmp;
+            A[ k ] = (float)(-Atmp);
+        }
+        nrg_f -= FIND_LPC_COND_FAC * C0 * tmp1;
+    }
+
+    /* Return residual energy */
+    return (float)nrg_f;
+}
--- /dev/null
+++ b/dnn/burg.h
@@ -1,0 +1,36 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+
+float silk_burg_analysis(              /* O    returns residual energy                                     */
+    float          A[],                /* O    prediction coefficients (length order)                      */
+    const float    x[],                /* I    input signal, length: nb_subfr*(D+L_sub)                    */
+    const float    minInvGain,         /* I    minimum inverse prediction gain                             */
+    const int      subfr_length,       /* I    input signal subframe length (incl. D preceding samples)    */
+    const int      nb_subfr,           /* I    number of subframes stacked in x                            */
+    const int      D                   /* I    order                                                       */
+);
--- a/dnn/freq.c
+++ b/dnn/freq.c
@@ -37,6 +37,7 @@
 #include "freq.h"
 #include "pitch.h"
 #include "arch.h"
+#include "burg.h"
 #include <assert.h>
 
 #define SQUARE(x) ((x)*(x))
@@ -58,6 +59,32 @@
 } CommonState;
 
 
+void compute_band_energy_inverse(float *bandE, const kiss_fft_cpx *X) {
+  int i;
+  float sum[NB_BANDS] = {0};
+  for (i=0;i<NB_BANDS-1;i++)
+  {
+    int j;
+    int band_size;
+    band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
+    for (j=0;j<band_size;j++) {
+      float tmp;
+      float frac = (float)j/band_size;
+      tmp = SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].r);
+      tmp += SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].i);
+      tmp = 1.f/(tmp + 1e-9);
+      sum[i] += (1-frac)*tmp;
+      sum[i+1] += frac*tmp;
+    }
+  }
+  sum[0] *= 2;
+  sum[NB_BANDS-1] *= 2;
+  for (i=0;i<NB_BANDS;i++)
+  {
+    bandE[i] = sum[i];
+  }
+}
+
 float _lpcnet_lpc(
       opus_val16 *lpc, /* out: [0...p-1] LPC coefficients      */
       opus_val16 *rc,
@@ -126,6 +153,41 @@
   {
     bandE[i] = sum[i];
   }
+}
+
+void compute_burg_cepstrum(const short *pcm, float *burg_cepstrum, int len, int order) {
+  int i;
+  float burg_in[FRAME_SIZE];
+  float burg_lpc[LPC_ORDER];
+  float x[WINDOW_SIZE];
+  float Eburg[NB_BANDS];
+  float g;
+  float E;
+  kiss_fft_cpx LPC[FREQ_SIZE];
+  float Ly[NB_BANDS];
+  assert(order <= LPC_ORDER);
+  assert(len <= FRAME_SIZE);
+  for (i=0;i<len-1;i++) burg_in[i] = pcm[i+1] - PREEMPHASIS*pcm[i];
+  g = silk_burg_analysis(burg_lpc, burg_in, 1e-3, len-1, 1, order);
+  g /= len - 2*(order-1);
+  //printf("%g\n", g);
+  RNN_CLEAR(x, WINDOW_SIZE);
+  x[0] = 1;
+  for (i=0;i<order;i++) x[i+1] = -burg_lpc[i]*pow(.995, i+1);
+  forward_transform(LPC, x);
+  compute_band_energy_inverse(Eburg, LPC);
+  for (i=0;i<NB_BANDS;i++) Eburg[i] *= .45*g*(1.f/((float)WINDOW_SIZE*WINDOW_SIZE*WINDOW_SIZE));
+  float logMax = -2;
+  float follow = -2;
+  for (i=0;i<NB_BANDS;i++) {
+    Ly[i] = log10(1e-2+Eburg[i]);
+    Ly[i] = MAX16(logMax-8, MAX16(follow-2.5, Ly[i]));
+    logMax = MAX16(logMax, Ly[i]);
+    follow = MAX16(follow-2.5, Ly[i]);
+    E += Eburg[i];
+  }
+  dct(burg_cepstrum, Ly);
+  burg_cepstrum[0] += - 4;
 }
 
 void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) {
--- a/dnn/freq.h
+++ b/dnn/freq.h
@@ -47,6 +47,7 @@
 
 void compute_band_energy(float *bandE, const kiss_fft_cpx *X);
 void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P);
+void compute_burg_cepstrum(const short *pcm, float *burg_cepstrum, int len, int order);
 
 void apply_window(float *x);
 void dct(float *out, const float *in);
--