shithub: opus

--- a/.github/workflows/autotools.yml

+++ b/.github/workflows/autotools.yml

@@ -29,6 +29,12 @@

             compiler: gcc,

             buildconfig: --enable-assertions --enable-custom-modes

+        - {

+            name: "Linux/GCC/EnableDNN",

+            os: ubuntu-latest,

+            compiler: gcc,

+            buildconfig: --enable-assertions --enable-custom-modes --enable-dred --enable-osce

+          }

     steps:

       - uses: actions/checkout@v3

         # No AutoMake on Mac so let's install it

@@ -42,4 +48,4 @@

       - name: Build

         run: make -j 2

       - name: Test

-        run: make check -j 2

\ No newline at end of file

+        run: make check -j 2

--- a/.github/workflows/dred.yml

+++ b/.github/workflows/dred.yml

@@ -74,7 +74,7 @@

         run: mkdir build

       - name: Configure

         working-directory: ./build

-        run: cmake .. ${{ matrix.config.args }} -DCMAKE_BUILD_TYPE=${{ matrix.config.config }} -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON

+        run: cmake .. ${{ matrix.config.args }} -DCMAKE_BUILD_TYPE=${{ matrix.config.config }} -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_OSCE=ON

       - name: Build

         working-directory: ./build

         run: cmake --build . -j 2 --config ${{ matrix.config.config }} --target package

--- a/.gitlab-ci.yml

+++ b/.gitlab-ci.yml

@@ -64,9 +64,9 @@

     - !reference [.snippets, git_prep]

   script:

     - ./autogen.sh

-    - CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx

+    - CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx --enable-dred --enable-osce

     - make -j16

-    - DISTCHECK_CONFIGURE_FLAGS="--enable-float-approx CFLAGS='-mavx -mfma -mavx2 -O2'" make distcheck -j16

+    - DISTCHECK_CONFIGURE_FLAGS="--enable-float-approx --enable-dred --enable-osce CFLAGS='-mavx -mfma -mavx2 -O2'" make distcheck -j16

   cache:

     paths:

       - "src/*.o"

@@ -87,7 +87,7 @@

   script:

     - ./autogen.sh

     - mkdir build

-    - cmake -S . -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_X86_PRESUME_AVX2=ON

+    - cmake -S . -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_OSCE=ON -DOPUS_X86_PRESUME_AVX2=ON

     - cmake --build build

     - cd build && ctest --output-on-failure -j 16

@@ -101,7 +101,7 @@

   script:

     - ./autogen.sh

     - mkdir builddir

-    - meson setup -Dtests=enabled -Ddocs=enabled -Dbuildtype=release builddir

+    - meson setup -Denable-deep-plc=true -Denable-osce=true -Denable-dred=true -Dtests=enabled -Ddocs=enabled -Dbuildtype=release builddir

     - meson compile -C builddir

     - meson test -C builddir

     #- meson dist --no-tests -C builddir

--- a/CMakeLists.txt

+++ b/CMakeLists.txt

@@ -87,6 +87,10 @@

 option(OPUS_DRED ${OPUS_DRED_HELP_STR} OFF)

 add_feature_info(OPUS_DRED OPUS_DRED ${OPUS_DRED_HELP_STR})

+set(OPUS_OSCE_HELP_STR "enable OSCE.")

+option(OPUS_OSCE ${OPUS_OSCE_HELP_STR} OFF)

+add_feature_info(OPUS_OSCE OPUS_OSCE ${OPUS_OSCE_HELP_STR})

 if(APPLE)

   set(OPUS_BUILD_FRAMEWORK_HELP_STR "build Framework bundle for Apple systems.")

   option(OPUS_BUILD_FRAMEWORK ${OPUS_BUILD_FRAMEWORK_HELP_STR} OFF)

@@ -364,8 +368,6 @@

 add_sources_group(opus silk ${silk_headers} ${silk_sources})

 add_sources_group(opus celt ${celt_headers} ${celt_sources})

-add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})

-add_sources_group(opus lpcnet ${dred_headers} ${dred_sources})

 if(OPUS_FIXED_POINT)

   add_sources_group(opus silk ${silk_sources_fixed})

@@ -380,13 +382,28 @@

   target_compile_definitions(opus PRIVATE DISABLE_FLOAT_API)

 endif()

+if (OPUS_DEEP_PLC OR OPUS_DRED OR OPUS_OSCE)

+  add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})

+  set(OPUS_DNN TRUE)

+else()

+  set(OPUS_DNN FALSE)

+endif()

+if (OPUS_DNN)

+  add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})

+  target_compile_definitions(opus PRIVATE ENABLE_DEEP_PLC)

+endif()

 if (OPUS_DRED)

+  add_sources_group(opus lpcnet ${dred_headers} ${dred_sources})

   target_compile_definitions(opus PRIVATE ENABLE_DRED)

-  if(NOT OPUS_DEEP_PLC)

-	  target_compile_definitions(opus PRIVATE ENABLE_DEEP_PLC)

-  endif()

 endif()

+if (OPUS_OSCE)

+  add_sources_group(opus lpcnet ${osce_headers} ${osce_sources})

+  target_compile_definitions(opus PRIVATE ENABLE_OSCE)

+endif()

 if(NOT OPUS_DISABLE_INTRINSICS)

   if(((OPUS_X86_MAY_HAVE_SSE AND NOT OPUS_X86_PRESUME_SSE) OR

      (OPUS_X86_MAY_HAVE_SSE2 AND NOT OPUS_X86_PRESUME_SSE2) OR

@@ -405,7 +422,9 @@

     endif()

     add_sources_group(opus celt ${celt_sources_x86_rtcd})

     add_sources_group(opus silk ${silk_sources_x86_rtcd})

-    add_sources_group(opus lpcnet ${dnn_sources_x86_rtcd})

+    if (OPUS_DNN)

+      add_sources_group(opus lpcnet ${dnn_sources_x86_rtcd})

+    endif()

   endif()

   if(SSE1_SUPPORTED)

@@ -427,7 +446,9 @@

   if(SSE2_SUPPORTED)

     if(OPUS_X86_MAY_HAVE_SSE2)

       add_sources_group(opus celt ${celt_sources_sse2})

-      add_sources_group(opus lpcnet ${dnn_sources_sse2})

+      if (OPUS_DNN)

+        add_sources_group(opus lpcnet ${dnn_sources_sse2})

+      endif()

       target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE2)

       if(NOT MSVC)

         set_source_files_properties(${celt_sources_sse2} ${dnn_sources_sse2} PROPERTIES COMPILE_FLAGS -msse2)

@@ -445,7 +466,9 @@

     if(OPUS_X86_MAY_HAVE_SSE4_1)

       add_sources_group(opus celt ${celt_sources_sse4_1})

       add_sources_group(opus silk ${silk_sources_sse4_1})

-      add_sources_group(opus lpcnet ${dnn_sources_sse4_1})

+      if (OPUS_DNN)

+        add_sources_group(opus lpcnet ${dnn_sources_sse4_1})

+      endif()

       target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE4_1)

       if(NOT MSVC)

         set_source_files_properties(${celt_sources_sse4_1} ${silk_sources_sse4_1} ${dnn_sources_sse4_1} PROPERTIES COMPILE_FLAGS -msse4.1)

@@ -471,7 +494,9 @@

       add_sources_group(opus celt ${celt_sources_avx2})

       add_sources_group(opus silk ${silk_sources_avx2})

       add_sources_group(opus silk ${silk_sources_float_avx2})

-      add_sources_group(opus lpcnet ${dnn_sources_avx2})

+      if (OPUS_DNN)

+        add_sources_group(opus lpcnet ${dnn_sources_avx2})

+      endif()

       target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_AVX2)

       if(MSVC)

         set(AVX2_FLAGS "${AVX2_FLAGS} /arch:AVX2")

@@ -524,7 +549,9 @@

     add_sources_group(opus celt ${celt_sources_arm_neon_intr})

     add_sources_group(opus silk ${silk_sources_arm_neon_intr})

-    add_sources_group(opus lpcnet ${dnn_sources_arm_neon})

+    if (OPUS_DNN)

+      add_sources_group(opus lpcnet ${dnn_sources_arm_neon})

+    endif()

     # silk arm neon depends on main_Fix.h

     target_include_directories(opus PRIVATE silk/fixed)

--- a/Makefile.am

+++ b/Makefile.am

@@ -25,6 +25,9 @@

 if ENABLE_DRED

 LPCNET_SOURCES += $(DRED_SOURCES)

 endif

+if ENABLE_OSCE

+LPCNET_SOURCES += $(OSCE_SOURCES)

+endif

 if FIXED_POINT

 SILK_SOURCES += $(SILK_SOURCES_FIXED)

@@ -131,6 +134,9 @@

 endif

 if ENABLE_DRED

 LPCNET_HEAD += $(DRED_HEAD)

+endif

+if ENABLE_OSCE

+LPCNET_HEAD += $(OSCE_HEAD)

 endif

 libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(LPCNET_SOURCES) $(OPUS_SOURCES)

--- a/autogen.sh

+++ b/autogen.sh

@@ -9,7 +9,7 @@

 srcdir=`dirname $0`

 test -n "$srcdir" && cd "$srcdir"

-dnn/download_model.sh df63771

+dnn/download_model.sh 591c8ba

 echo "Updating build configuration files, please wait...."

--- a/cmake/OpusSources.cmake

+++ b/cmake/OpusSources.cmake

@@ -42,8 +42,10 @@

 get_opus_sources(DEEP_PLC_HEAD lpcnet_headers.mk deep_plc_headers)

 get_opus_sources(DRED_HEAD lpcnet_headers.mk dred_headers)

+get_opus_sources(OSCE_HEAD lpcnet_headers.mk osce_headers)

 get_opus_sources(DEEP_PLC_SOURCES lpcnet_sources.mk deep_plc_sources)

 get_opus_sources(DRED_SOURCES lpcnet_sources.mk dred_sources)

+get_opus_sources(OSCE_SOURCES lpcnet_sources.mk osce_sources)

 get_opus_sources(DNN_SOURCES_X86_RTCD lpcnet_sources.mk dnn_sources_x86_rtcd)

 get_opus_sources(DNN_SOURCES_SSE2 lpcnet_sources.mk dnn_sources_sse2)

 get_opus_sources(DNN_SOURCES_SSE4_1 lpcnet_sources.mk dnn_sources_sse4_1)

--- a/configure.ac

+++ b/configure.ac

@@ -175,10 +175,10 @@

     [AS_HELP_STRING([--enable-deep-plc], [Use deep PLC for SILK])],,

     [enable_deep_plc=no])

-AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes"],[

+AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"],[

   AC_DEFINE([ENABLE_DEEP_PLC], [1], [Deep PLC])

])

-AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes"])

+AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])

 has_float_approx=no

 case "$host_cpu" in

@@ -903,6 +903,31 @@

 AS_IF([test "$enable_dnn_debug_float" = "no"], [

        AC_DEFINE([DISABLE_DEBUG_FLOAT], [1], [Disable DNN debug float])

])

+AC_ARG_ENABLE([osce-training-data],

+  AS_HELP_STRING([--enable-osce-training-data], [enables feature output for SILK enhancement]),,

+  [enable_osc_training_data=no]

+)

+AS_IF([test "$enable_osce_training_data" = "yes"], [

+       AC_DEFINE([ENABLE_OSCE_TRAINING_DATA], [1], [Enable dumping of OSCE training data])

+])

+AC_MSG_CHECKING([argument osce training data])

+AS_IF([test "$enable_osce_training_data" = "yes"], [

+       AC_MSG_RESULT([yes])

+], [AC_MSG_RESULT([no])])

+AC_ARG_ENABLE([osce],

+  AS_HELP_STRING([--enable-osce], [enables feature output for SILK enhancement]),,

+  [enable_osce=no]

+)

+AS_IF([test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"], [

+       AC_DEFINE([ENABLE_OSCE], [1], [Enable Opus Speech Coding Enhancement])

+])

+AM_CONDITIONAL([ENABLE_OSCE], [test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])

 AM_CONDITIONAL([HAVE_DOXYGEN], [test "$HAVE_DOXYGEN" = "yes"])

--- /dev/null

+++ b/dnn/adaconvtest.c

@@ -1,0 +1,449 @@

+#include "lace_data.h"

+#include "nolace_data.h"

+#include "osce.h"

+#include "nndsp.h"

+#include <stdlib.h>

+#include <stdio.h>

+#include <math.h>

+extern const WeightArray lacelayers_arrays[];

+extern const WeightArray nolacelayers_arrays[];

+void adaconv_compare(

+    const char * prefix,

+    int num_frames,

+    AdaConvState* hAdaConv,

+    LinearLayer *kernel_layer,

+    LinearLayer *gain_layer,

+    int feature_dim,

+    int frame_size,

+    int overlap_size,

+    int in_channels,

+    int out_channels,

+    int kernel_size,

+    int left_padding,

+    float filter_gain_a,

+    float filter_gain_b,

+    float shape_gain

+)

+{

+    char feature_file[256];

+    char x_in_file[256];

+    char x_out_file[256];

+    char message[512];

+    int i_frame, i_sample;

+    float mse;

+    float features[512];

+    float x_in[512];

+    float x_out_ref[512];

+    float x_out[512];

+    float window[40];

+    init_adaconv_state(hAdaConv);

+    compute_overlap_window(window, 40);

+    FILE *f_features, *f_x_in, *f_x_out;

+    strcpy(feature_file, prefix);

+    strcat(feature_file, "_features.f32");

+    f_features = fopen(feature_file, "r");

+    if (f_features == NULL)

+    {

+        sprintf(message, "could not open file %s", feature_file);

+        perror(message);

+        exit(1);

+    }

+    strcpy(x_in_file, prefix);

+    strcat(x_in_file, "_x_in.f32");

+    f_x_in = fopen(x_in_file, "r");

+    if (f_x_in == NULL)

+    {

+        sprintf(message, "could not open file %s", x_in_file);

+        perror(message);

+        exit(1);

+    }

+    strcpy(x_out_file, prefix);

+    strcat(x_out_file, "_x_out.f32");

+    f_x_out = fopen(x_out_file, "r");

+    if (f_x_out == NULL)

+    {

+        sprintf(message, "could not open file %s", x_out_file);

+        perror(message);

+        exit(1);

+    }

+    for (i_frame = 0; i_frame < num_frames; i_frame ++)

+    {

+        if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)

+        {

+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);

+            exit(1);

+        }

+        if (fread(x_in, sizeof(float), frame_size * in_channels, f_x_in) != frame_size * in_channels)

+        {

+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);

+            exit(1);

+        }

+        if (fread(x_out_ref, sizeof(float), frame_size * out_channels, f_x_out) != frame_size * out_channels)

+        {

+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);

+            exit(1);

+        }

+        adaconv_process_frame(hAdaConv, x_out, x_in, features, kernel_layer, gain_layer, feature_dim,

+            frame_size, overlap_size, in_channels, out_channels, kernel_size, left_padding,

+            filter_gain_a, filter_gain_b, shape_gain, window, 0);

+        mse = 0;

+        for (i_sample = 0; i_sample < frame_size * out_channels; i_sample ++)

+        {

+            mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);

+        }

+        mse = sqrt(mse / (frame_size * out_channels));

+        printf("rmse[%d] %f\n", i_frame, mse);

+    }

+}

+void adacomb_compare(

+    const char * prefix,

+    int num_frames,

+    AdaCombState* hAdaComb,

+    LinearLayer *kernel_layer,

+    LinearLayer *gain_layer,

+    LinearLayer *global_gain_layer,

+    int feature_dim,

+    int frame_size,

+    int overlap_size,

+    int kernel_size,

+    int left_padding,

+    float filter_gain_a,

+    float filter_gain_b,

+    float log_gain_limit

+)

+{

+    char feature_file[256];

+    char x_in_file[256];

+    char p_in_file[256];

+    char x_out_file[256];

+    char message[512];

+    int i_frame, i_sample;

+    float mse;

+    float features[512];

+    float x_in[512];

+    float x_out_ref[512];

+    float x_out[512];

+    int pitch_lag;

+    float window[40];

+    init_adacomb_state(hAdaComb);

+    compute_overlap_window(window, 40);

+    FILE *f_features, *f_x_in, *f_p_in, *f_x_out;

+    strcpy(feature_file, prefix);

+    strcat(feature_file, "_features.f32");

+    f_features = fopen(feature_file, "r");

+    if (f_features == NULL)

+    {

+        sprintf(message, "could not open file %s", feature_file);

+        perror(message);

+        exit(1);

+    }

+    strcpy(x_in_file, prefix);

+    strcat(x_in_file, "_x_in.f32");

+    f_x_in = fopen(x_in_file, "r");

+    if (f_x_in == NULL)

+    {

+        sprintf(message, "could not open file %s", x_in_file);

+        perror(message);

+        exit(1);

+    }

+    strcpy(p_in_file, prefix);

+    strcat(p_in_file, "_p_in.s32");

+    f_p_in = fopen(p_in_file, "r");

+    if (f_p_in == NULL)

+    {

+        sprintf(message, "could not open file %s", p_in_file);

+        perror(message);

+        exit(1);

+    }

+    strcpy(x_out_file, prefix);

+    strcat(x_out_file, "_x_out.f32");

+    f_x_out = fopen(x_out_file, "r");

+    if (f_x_out == NULL)

+    {

+        sprintf(message, "could not open file %s", x_out_file);

+        perror(message);

+        exit(1);

+    }

+    for (i_frame = 0; i_frame < num_frames; i_frame ++)

+    {

+        if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)

+        {

+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);

+            exit(1);

+        }

+        if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)

+        {

+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);

+            exit(1);

+        }

+        if (fread(&pitch_lag, sizeof(int), 1, f_p_in) != 1)

+        {

+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, p_in_file);

+            exit(1);

+        }

+        if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)

+        {

+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);

+            exit(1);

+        }

+        adacomb_process_frame(hAdaComb, x_out, x_in, features, kernel_layer, gain_layer, global_gain_layer,

+            pitch_lag, feature_dim, frame_size, overlap_size, kernel_size, left_padding, filter_gain_a, filter_gain_b, log_gain_limit, window, 0);

+        mse = 0;

+        for (i_sample = 0; i_sample < frame_size; i_sample ++)

+        {

+            mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);

+        }

+        mse = sqrt(mse / (frame_size));

+        printf("rmse[%d] %f\n", i_frame, mse);

+    }

+}

+void adashape_compare(

+    const char * prefix,

+    int num_frames,

+    AdaShapeState* hAdaShape,

+    LinearLayer *alpha1,

+    LinearLayer *alpha2,

+    int feature_dim,

+    int frame_size,

+    int avg_pool_k

+)

+{

+    char feature_file[256];

+    char x_in_file[256];

+    char x_out_file[256];

+    char message[512];

+    int i_frame, i_sample;

+    float mse;

+    float features[512];

+    float x_in[512];

+    float x_out_ref[512];

+    float x_out[512];

+    init_adashape_state(hAdaShape);

+    FILE *f_features, *f_x_in, *f_x_out;

+    strcpy(feature_file, prefix);

+    strcat(feature_file, "_features.f32");

+    f_features = fopen(feature_file, "r");

+    if (f_features == NULL)

+    {

+        sprintf(message, "could not open file %s", feature_file);

+        perror(message);

+        exit(1);

+    }

+    strcpy(x_in_file, prefix);

+    strcat(x_in_file, "_x_in.f32");

+    f_x_in = fopen(x_in_file, "r");

+    if (f_x_in == NULL)

+    {

+        sprintf(message, "could not open file %s", x_in_file);

+        perror(message);

+        exit(1);

+    }

+    strcpy(x_out_file, prefix);

+    strcat(x_out_file, "_x_out.f32");

+    f_x_out = fopen(x_out_file, "r");

+    if (f_x_out == NULL)

+    {

+        sprintf(message, "could not open file %s", x_out_file);

+        perror(message);

+        exit(1);

+    }

+    for (i_frame = 0; i_frame < num_frames; i_frame ++)

+    {

+        if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)

+        {

+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);

+            exit(1);

+        }

+        if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)

+        {

+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);

+            exit(1);

+        }

+        if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)

+        {

+            fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);

+            exit(1);

+        }

+        adashape_process_frame(hAdaShape, x_out, x_in, features, alpha1, alpha2, feature_dim,

+            frame_size, avg_pool_k, 0);

+        mse = 0;

+        for (i_sample = 0; i_sample < frame_size; i_sample ++)

+        {

+            mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);

+        }

+        mse = sqrt(mse / (frame_size));

+        printf("rmse[%d] %f\n", i_frame, mse);

+    }

+}

+int main()

+{

+    LACELayers hLACE;

+    NOLACELayers hNoLACE;

+    AdaConvState hAdaConv;

+    AdaCombState hAdaComb;

+    AdaShapeState hAdaShape;

+    init_adaconv_state(&hAdaConv);

+    init_lacelayers(&hLACE, lacelayers_arrays);

+    init_nolacelayers(&hNoLACE, nolacelayers_arrays);

+    printf("\ntesting lace.af1 (1 in, 1 out)...\n");

+    adaconv_compare(

+        "testvectors/lace_af1",

+        5,

+        &hAdaConv,

+        &hLACE.lace_af1_kernel,

+        &hLACE.lace_af1_gain,

+        LACE_AF1_FEATURE_DIM,

+        LACE_AF1_FRAME_SIZE,

+        LACE_AF1_OVERLAP_SIZE,

+        LACE_AF1_IN_CHANNELS,

+        LACE_AF1_OUT_CHANNELS,

+        LACE_AF1_KERNEL_SIZE,

+        LACE_AF1_LEFT_PADDING,

+        LACE_AF1_FILTER_GAIN_A,

+        LACE_AF1_FILTER_GAIN_B,

+        LACE_AF1_SHAPE_GAIN

+    );

+    printf("\ntesting nolace.af1 (1 in, 2 out)...\n");

+    adaconv_compare(

+        "testvectors/nolace_af1",

+        5,

+        &hAdaConv,

+        &hNoLACE.nolace_af1_kernel,

+        &hNoLACE.nolace_af1_gain,

+        NOLACE_AF1_FEATURE_DIM,

+        NOLACE_AF1_FRAME_SIZE,

+        NOLACE_AF1_OVERLAP_SIZE,

+        NOLACE_AF1_IN_CHANNELS,

+        NOLACE_AF1_OUT_CHANNELS,

+        NOLACE_AF1_KERNEL_SIZE,

+        NOLACE_AF1_LEFT_PADDING,

+        NOLACE_AF1_FILTER_GAIN_A,

+        NOLACE_AF1_FILTER_GAIN_B,

+        NOLACE_AF1_SHAPE_GAIN

+    );

+    printf("testing nolace.af4 (2 in, 1 out)...\n");

+    adaconv_compare(

+        "testvectors/nolace_af4",

+        5,

+        &hAdaConv,

+        &hNoLACE.nolace_af4_kernel,

+        &hNoLACE.nolace_af4_gain,

+        NOLACE_AF4_FEATURE_DIM,

+        NOLACE_AF4_FRAME_SIZE,

+        NOLACE_AF4_OVERLAP_SIZE,

+        NOLACE_AF4_IN_CHANNELS,

+        NOLACE_AF4_OUT_CHANNELS,

+        NOLACE_AF4_KERNEL_SIZE,

+        NOLACE_AF4_LEFT_PADDING,

+        NOLACE_AF4_FILTER_GAIN_A,

+        NOLACE_AF4_FILTER_GAIN_B,

+        NOLACE_AF4_SHAPE_GAIN

+    );

+    printf("\ntesting nolace.af2 (2 in, 2 out)...\n");

+    adaconv_compare(

+        "testvectors/nolace_af2",

+        5,

+        &hAdaConv,

+        &hNoLACE.nolace_af2_kernel,

+        &hNoLACE.nolace_af2_gain,

+        NOLACE_AF2_FEATURE_DIM,

+        NOLACE_AF2_FRAME_SIZE,

+        NOLACE_AF2_OVERLAP_SIZE,

+        NOLACE_AF2_IN_CHANNELS,

+        NOLACE_AF2_OUT_CHANNELS,

+        NOLACE_AF2_KERNEL_SIZE,

+        NOLACE_AF2_LEFT_PADDING,

+        NOLACE_AF2_FILTER_GAIN_A,

+        NOLACE_AF2_FILTER_GAIN_B,

+        NOLACE_AF2_SHAPE_GAIN

+    );

+    printf("\ntesting lace.cf1...\n");

+    adacomb_compare(

+        "testvectors/lace_cf1",

+        5,

+        &hAdaComb,

+        &hLACE.lace_cf1_kernel,

+        &hLACE.lace_cf1_gain,

+        &hLACE.lace_cf1_global_gain,

+        LACE_CF1_FEATURE_DIM,

+        LACE_CF1_FRAME_SIZE,

+        LACE_CF1_OVERLAP_SIZE,

+        LACE_CF1_KERNEL_SIZE,

+        LACE_CF1_LEFT_PADDING,

+        LACE_CF1_FILTER_GAIN_A,

+        LACE_CF1_FILTER_GAIN_B,

+        LACE_CF1_LOG_GAIN_LIMIT

+    );

+    printf("\ntesting nolace.tdshape1...\n");

+    adashape_compare(

+        "testvectors/nolace_tdshape1",

+        5,

+        &hAdaShape,

+        &hNoLACE.nolace_tdshape1_alpha1,

+        &hNoLACE.nolace_tdshape1_alpha2,

+        NOLACE_TDSHAPE1_FEATURE_DIM,

+        NOLACE_TDSHAPE1_FRAME_SIZE,

+        NOLACE_TDSHAPE1_AVG_POOL_K

+    );

+    return 0;

+}

+/* gcc -DVAR_ARRAYS -DENABLE_OSCE  -I ../include -I ../silk -I . -I ../celt adaconvtest.c nndsp.c lace_data.c nolace_data.c nnet.c nnet_default.c ../celt/pitch.c ../celt/celt_lpc.c parse_lpcnet_weights.c -lm -o adaconvtest */

\ No newline at end of file

--- a/dnn/meson.build

+++ b/dnn/meson.build

@@ -5,6 +5,11 @@

   dnn_sources += dred_sources

 endif

+osce_sources = sources['OSCE_SOURCES']

+if opt_enable_osce

+  dnn_sources += osce_sources

+endif

 dnn_sources_sse2 = sources['DNN_SOURCES_SSE2']

 dnn_sources_sse4_1 = sources['DNN_SOURCES_SSE4_1']

 dnn_sources_avx2 = sources['DNN_SOURCES_AVX2']

--- /dev/null

+++ b/dnn/nndsp.c

@@ -1,0 +1,412 @@

+/* Copyright (c) 2023 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include "nndsp.h"

+#include "arch.h"

+#include "nnet.h"

+#include "os_support.h"

+#include "pitch.h"

+#include <math.h>

+#ifndef M_PI

+#define M_PI 3.141592653589793f

+#endif

+#define KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel) ((((i_out_channels) * in_channels) + (i_in_channels)) * kernel_size + (i_kernel))

+void init_adaconv_state(AdaConvState *hAdaConv)

+{

+    OPUS_CLEAR(hAdaConv, 1);

+}

+void init_adacomb_state(AdaCombState *hAdaComb)

+{

+    OPUS_CLEAR(hAdaComb, 1);

+}

+void init_adashape_state(AdaShapeState *hAdaShape)

+{

+    OPUS_CLEAR(hAdaShape, 1);

+}

+void compute_overlap_window(float *window, int overlap_size)

+{

+    int i_sample;

+    for (i_sample=0; i_sample < overlap_size; i_sample++)

+    {

+        window[i_sample] = 0.5f + 0.5f * cos(M_PI * (i_sample + 0.5f) / overlap_size);

+    }

+}

+#ifdef DEBUG_NNDSP

+void print_float_vector(const char* name, const float *vec, int length)

+{

+    for (int i = 0; i < length; i ++)

+    {

+        printf("%s[%d]: %f\n", name, i, vec[i]);

+    }

+}

+#endif

+static void scale_kernel(

+    float *kernel,

+    int in_channels,

+    int out_channels,

+    int kernel_size,

+    float *gain

+)

+/* normalizes (p-norm) kernel over input channel and kernel dimension */

+{

+    float norm;

+    int i_in_channels, i_out_channels, i_kernel;

+    for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)

+    {

+        norm = 0;

+        for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels ++)

+        {

+            for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)

+            {

+                norm += kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] * kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];

+            }

+        }

+#ifdef DEBUG_NNDSP

+        printf("kernel norm: %f, %f\n", norm, sqrt(norm));

+#endif

+        norm = 1.f / (1e-6f + sqrt(norm));

+        for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)

+        {

+            for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)

+            {

+                kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] *= norm * gain[i_out_channels];

+            }

+        }

+    }

+}

+static void transform_gains(

+    float *gains,

+    int num_gains,

+    float filter_gain_a,

+    float filter_gain_b

+)

+{

+    int i;

+    for (i = 0; i < num_gains; i++)

+    {

+        gains[i] = exp(filter_gain_a * gains[i] + filter_gain_b);

+    }

+}

+void adaconv_process_frame(

+    AdaConvState* hAdaConv,

+    float *x_out,

+    const float *x_in,

+    const float *features,

+    const LinearLayer *kernel_layer,

+    const LinearLayer *gain_layer,

+    int feature_dim,

+    int frame_size,

+    int overlap_size,

+    int in_channels,

+    int out_channels,

+    int kernel_size,

+    int left_padding,

+    float filter_gain_a,

+    float filter_gain_b,

+    float shape_gain,

+    float *window,

+    int arch

+)

+{

+    float output_buffer[ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS];

+    float kernel_buffer[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];

+    float input_buffer[ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE)];

+    float kernel0[ADACONV_MAX_KERNEL_SIZE];

+    float kernel1[ADACONV_MAX_KERNEL_SIZE];

+    float channel_buffer0[ADACONV_MAX_OVERLAP_SIZE];

+    float channel_buffer1[ADACONV_MAX_FRAME_SIZE];

+    float gain_buffer[ADACONV_MAX_OUTPUT_CHANNELS];

+    float *p_input;

+    int i_in_channels, i_out_channels, i_sample;

+    (void) feature_dim; /* ToDo: figure out whether we might need this information */

+    celt_assert(shape_gain == 1);

+    celt_assert(left_padding == kernel_size - 1); /* currently only supports causal version. Non-causal version not difficult to implement but will require third loop */

+    celt_assert(kernel_size < frame_size);

+    OPUS_CLEAR(output_buffer, ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS);

+    OPUS_CLEAR(kernel_buffer, ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS);

+    OPUS_CLEAR(input_buffer, ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE));

+#ifdef DEBUG_NNDSP

+    print_float_vector("x_in", x_in, in_channels * frame_size);

+#endif

+    /* prepare input */

+    for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)

+    {

+        OPUS_COPY(input_buffer + i_in_channels * (kernel_size + frame_size), hAdaConv->history + i_in_channels * kernel_size, kernel_size);

+        OPUS_COPY(input_buffer + kernel_size + i_in_channels * (kernel_size + frame_size), x_in + frame_size * i_in_channels, frame_size);

+    }

+    p_input = input_buffer + kernel_size;

+    /* calculate new kernel and new gain */

+    compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);

+    compute_generic_dense(gain_layer, gain_buffer, features, ACTIVATION_TANH, arch);

+#ifdef DEBUG_NNDSP

+    print_float_vector("features", features, feature_dim);

+    print_float_vector("adaconv_kernel_raw", kernel_buffer, in_channels * out_channels * kernel_size);

+    print_float_vector("adaconv_gain_raw", gain_buffer, out_channels);

+#endif

+    transform_gains(gain_buffer, out_channels, filter_gain_a, filter_gain_b);

+    scale_kernel(kernel_buffer, in_channels, out_channels, kernel_size, gain_buffer);

+#ifdef DEBUG_NNDSP

+    print_float_vector("adaconv_kernel", kernel_buffer, in_channels * out_channels * kernel_size);

+    print_float_vector("adaconv_gain", gain_buffer, out_channels);

+#endif

+    /* calculate overlapping part using kernel from last frame */

+    for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)

+    {

+        for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)

+        {

+            OPUS_CLEAR(kernel0, ADACONV_MAX_KERNEL_SIZE);

+            OPUS_CLEAR(kernel1, ADACONV_MAX_KERNEL_SIZE);

+            OPUS_COPY(kernel0, hAdaConv->last_kernel + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);

+            OPUS_COPY(kernel1, kernel_buffer + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);

+            celt_pitch_xcorr(kernel0, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer0, ADACONV_MAX_KERNEL_SIZE, overlap_size, arch);

+            celt_pitch_xcorr(kernel1, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer1, ADACONV_MAX_KERNEL_SIZE, frame_size, arch);

+            for (i_sample = 0; i_sample < overlap_size; i_sample++)

+            {

+                output_buffer[i_sample + i_out_channels * frame_size] +=  window[i_sample] * channel_buffer0[i_sample];

+                output_buffer[i_sample + i_out_channels * frame_size] += (1.f - window[i_sample]) * channel_buffer1[i_sample];

+            }

+            for (i_sample = overlap_size; i_sample < frame_size; i_sample++)

+            {

+                output_buffer[i_sample + i_out_channels * frame_size] += channel_buffer1[i_sample];

+            }

+        }

+    }

+    OPUS_COPY(x_out, output_buffer, out_channels * frame_size);

+#ifdef DEBUG_NNDSP

+    print_float_vector("x_out", x_out, out_channels * frame_size);

+#endif

+    /* buffer update */

+    for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)

+    {

+        OPUS_COPY(hAdaConv->history + i_in_channels * kernel_size, p_input + i_in_channels * (frame_size + kernel_size) + frame_size - kernel_size, kernel_size);

+    }

+    OPUS_COPY(hAdaConv->last_kernel, kernel_buffer, kernel_size * in_channels * out_channels);

+}

+void adacomb_process_frame(

+    AdaCombState* hAdaComb,

+    float *x_out,

+    const float *x_in,

+    const float *features,

+    const LinearLayer *kernel_layer,

+    const LinearLayer *gain_layer,

+    const LinearLayer *global_gain_layer,

+    int pitch_lag,

+    int feature_dim,

+    int frame_size,

+    int overlap_size,

+    int kernel_size,

+    int left_padding,

+    float filter_gain_a,

+    float filter_gain_b,

+    float log_gain_limit,

+    float *window,

+    int arch

+)

+{

+    float output_buffer[ADACOMB_MAX_FRAME_SIZE];

+    float output_buffer_last[ADACOMB_MAX_FRAME_SIZE];

+    float kernel_buffer[ADACOMB_MAX_KERNEL_SIZE];

+    float input_buffer[ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE];

+    float gain, global_gain;

+    float *p_input;

+    int i_sample;

+    float kernel[16];

+    float last_kernel[16];

+    (void) feature_dim; /* ToDo: figure out whether we might need this information */

+    OPUS_CLEAR(output_buffer, ADACOMB_MAX_FRAME_SIZE);

+    OPUS_CLEAR(kernel_buffer, ADACOMB_MAX_KERNEL_SIZE);

+    OPUS_CLEAR(input_buffer, ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE);

+    OPUS_COPY(input_buffer, hAdaComb->history, kernel_size + ADACOMB_MAX_LAG);

+    OPUS_COPY(input_buffer + kernel_size + ADACOMB_MAX_LAG, x_in, frame_size);

+    p_input = input_buffer + kernel_size + ADACOMB_MAX_LAG;

+    /* calculate new kernel and new gain */

+    compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);

+    compute_generic_dense(gain_layer, &gain, features, ACTIVATION_RELU, arch);

+    compute_generic_dense(global_gain_layer, &global_gain, features, ACTIVATION_TANH, arch);

+#ifdef DEBUG_NNDSP

+    print_float_vector("features", features, feature_dim);

+    print_float_vector("adacomb_kernel_raw", kernel_buffer, kernel_size);

+    print_float_vector("adacomb_gain_raw", &gain, 1);

+    print_float_vector("adacomb_global_gain_raw", &global_gain, 1);

+#endif

+    gain = exp(log_gain_limit - gain);

+    global_gain = exp(filter_gain_a * global_gain + filter_gain_b);

+    scale_kernel(kernel_buffer, 1, 1, kernel_size, &gain);

+#ifdef DEBUG_NNDSP

+    print_float_vector("adacomb_kernel", kernel_buffer, kernel_size);

+    print_float_vector("adacomb_gain", &gain, 1);

+#endif

+    OPUS_CLEAR(kernel, ADACOMB_MAX_KERNEL_SIZE);

+    OPUS_CLEAR(last_kernel, ADACOMB_MAX_KERNEL_SIZE);

+    OPUS_COPY(kernel, kernel_buffer, kernel_size);

+    OPUS_COPY(last_kernel, hAdaComb->last_kernel, kernel_size);

+    celt_pitch_xcorr(last_kernel, &p_input[- left_padding - hAdaComb->last_pitch_lag], output_buffer_last, ADACOMB_MAX_KERNEL_SIZE, overlap_size, arch);

+    celt_pitch_xcorr(kernel, &p_input[- left_padding - pitch_lag], output_buffer, ADACOMB_MAX_KERNEL_SIZE, frame_size, arch);

+    for (i_sample = 0; i_sample < overlap_size; i_sample++)

+    {

+      output_buffer[i_sample] = hAdaComb->last_global_gain * window[i_sample] * output_buffer_last[i_sample] + global_gain * (1.f - window[i_sample]) * output_buffer[i_sample];

+    }

+    for (i_sample = 0; i_sample < overlap_size; i_sample++)

+    {

+      output_buffer[i_sample] += (window[i_sample] * hAdaComb->last_global_gain + (1.f - window[i_sample]) * global_gain) * p_input[i_sample];

+    }

+    for (i_sample = overlap_size; i_sample < frame_size; i_sample++)

+    {

+      output_buffer[i_sample] = global_gain * (output_buffer[i_sample] + p_input[i_sample]);

+    }

+    OPUS_COPY(x_out, output_buffer, frame_size);

+#ifdef DEBUG_NNDSP

+    print_float_vector("x_out", x_out, frame_size);

+#endif

+    /* buffer update */

+    OPUS_COPY(hAdaComb->last_kernel, kernel_buffer, kernel_size);

+    OPUS_COPY(hAdaComb->history, p_input + frame_size - kernel_size - ADACOMB_MAX_LAG, kernel_size + ADACOMB_MAX_LAG);

+    hAdaComb->last_pitch_lag = pitch_lag;

+    hAdaComb->last_global_gain = global_gain;

+}

+void adashape_process_frame(

+    AdaShapeState *hAdaShape,

+    float *x_out,

+    const float *x_in,

+    const float *features,

+    const LinearLayer *alpha1,

+    const LinearLayer *alpha2,

+    int feature_dim,

+    int frame_size,

+    int avg_pool_k,

+    int arch

+)

+{

+    float in_buffer[ADASHAPE_MAX_INPUT_DIM + ADASHAPE_MAX_FRAME_SIZE];

+    float out_buffer[ADASHAPE_MAX_FRAME_SIZE];

+    int i, k;

+    int tenv_size;

+    float mean;

+    float *tenv;

+    celt_assert(frame_size % avg_pool_k == 0);

+    celt_assert(feature_dim + frame_size / avg_pool_k + 1 < ADASHAPE_MAX_INPUT_DIM);

+    tenv_size = frame_size / avg_pool_k;

+    tenv = in_buffer + feature_dim;

+    OPUS_CLEAR(tenv, tenv_size + 1);

+    OPUS_COPY(in_buffer, features, feature_dim);

+    /* calculate temporal envelope */

+    mean = 0;

+    for (i = 0; i < tenv_size; i++)

+    {

+        for (k = 0; k < avg_pool_k; k++)

+        {

+            tenv[i] += fabs(x_in[i * avg_pool_k + k]);

+        }

+        tenv[i] = log(tenv[i] / avg_pool_k + 1.52587890625e-05f);

+        mean += tenv[i];

+    }

+    mean /= tenv_size;

+    for (i = 0; i < tenv_size; i++)

+    {

+        tenv[i] -= mean;

+    }

+    tenv[tenv_size] = mean;

+#ifdef DEBUG_NNDSP

+    print_float_vector("tenv", tenv, tenv_size + 1);

+#endif

+    /* calculate temporal weights */

+#ifdef DEBUG_NNDSP

+    print_float_vector("alpha1_in", in_buffer, feature_dim + tenv_size + 1);

+#endif

+    compute_generic_conv1d(alpha1, out_buffer, hAdaShape->conv_alpha1_state, in_buffer, feature_dim + tenv_size + 1, ACTIVATION_LINEAR, arch);

+#ifdef DEBUG_NNDSP

+    print_float_vector("alpha1_out", out_buffer, frame_size);

+#endif

+    /* compute leaky ReLU by hand. ToDo: try tanh activation */

+    for (i = 0; i < frame_size; i ++)

+    {

+        in_buffer[i] = out_buffer[i] >= 0 ? out_buffer[i] : 0.2f * out_buffer[i];

+    }

+#ifdef DEBUG_NNDSP

+    print_float_vector("post_alpha1", in_buffer, frame_size);

+#endif

+    compute_generic_conv1d(alpha2, out_buffer, hAdaShape->conv_alpha2_state, in_buffer, frame_size, ACTIVATION_LINEAR, arch);

+    /* shape signal */

+    for (i = 0; i < frame_size; i ++)

+    {

+        x_out[i] = exp(out_buffer[i]) * x_in[i];

+    }

+}

--- /dev/null

+++ b/dnn/nndsp.h

@@ -1,0 +1,141 @@

+/* Copyright (c) 2023 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef NNDSP_H

+#define NNDSP_H

+#include "opus_types.h"

+#include "nnet.h"

+#include <string.h>

+#define ADACONV_MAX_KERNEL_SIZE 16

+#define ADACONV_MAX_INPUT_CHANNELS 2

+#define ADACONV_MAX_OUTPUT_CHANNELS 2

+#define ADACONV_MAX_FRAME_SIZE 80

+#define ADACONV_MAX_OVERLAP_SIZE 40

+#define ADACOMB_MAX_LAG 300

+#define ADACOMB_MAX_KERNEL_SIZE 16

+#define ADACOMB_MAX_FRAME_SIZE 80

+#define ADACOMB_MAX_OVERLAP_SIZE 40

+#define ADASHAPE_MAX_INPUT_DIM 512

+#define ADASHAPE_MAX_FRAME_SIZE 160

+/*#define DEBUG_NNDSP*/

+#ifdef DEBUG_NNDSP

+#include <stdio.h>

+#endif

+void print_float_vector(const char* name, const float *vec, int length);

+typedef struct {

+    float history[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS];

+    float last_kernel[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];

+    float last_gain;

+} AdaConvState;

+typedef struct {

+    float history[ADACOMB_MAX_KERNEL_SIZE + ADACOMB_MAX_LAG];

+    float last_kernel[ADACOMB_MAX_KERNEL_SIZE];

+    float last_global_gain;

+    int last_pitch_lag;

+} AdaCombState;

+typedef struct {

+    float conv_alpha1_state[ADASHAPE_MAX_INPUT_DIM];

+    float conv_alpha2_state[ADASHAPE_MAX_FRAME_SIZE];

+} AdaShapeState;

+void init_adaconv_state(AdaConvState *hAdaConv);

+void init_adacomb_state(AdaCombState *hAdaComb);

+void init_adashape_state(AdaShapeState *hAdaShape);

+void compute_overlap_window(float *window, int overlap_size);

+void adaconv_process_frame(

+    AdaConvState* hAdaConv,

+    float *x_out,

+    const float *x_in,

+    const float *features,

+    const LinearLayer *kernel_layer,

+    const LinearLayer *gain_layer,

+    int feature_dim, /* not strictly necessary */

+    int frame_size,

+    int overlap_size,

+    int in_channels,

+    int out_channels,

+    int kernel_size,

+    int left_padding,

+    float filter_gain_a,

+    float filter_gain_b,

+    float shape_gain,

+    float *window,

+    int arch

+);

+void adacomb_process_frame(

+    AdaCombState* hAdaComb,

+    float *x_out,

+    const float *x_in,

+    const float *features,

+    const LinearLayer *kernel_layer,

+    const LinearLayer *gain_layer,

+    const LinearLayer *global_gain_layer,

+    int pitch_lag,

+    int feature_dim,

+    int frame_size,

+    int overlap_size,

+    int kernel_size,

+    int left_padding,

+    float filter_gain_a,

+    float filter_gain_b,

+    float log_gain_limit,

+    float *window,

+    int arch

+);

+void adashape_process_frame(

+    AdaShapeState *hAdaShape,

+    float *x_out,

+    const float *x_in,

+    const float *features,

+    const LinearLayer *alpha1,

+    const LinearLayer *alpha2,

+    int feature_dim,

+    int frame_size,

+    int avg_pool_k,

+    int arch

+);

+#endif

--- a/dnn/nnet.c

+++ b/dnn/nnet.c

@@ -41,6 +41,10 @@

 #include "os_support.h"

 #include "vec.h"

+#ifdef ENABLE_OSCE

+#include "osce_config.h"

+#endif

 #ifdef NO_OPTIMIZATIONS

 #if defined(_MSC_VER)

 #pragma message ("Compiling without any vectorization. This code will be very slow")

@@ -59,8 +63,11 @@

    compute_activation(output, output, layer->nb_outputs, activation, arch);

+#ifdef ENABLE_OSCE

+#define MAX_RNN_NEURONS_ALL IMAX(IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), DRED_MAX_RNN_NEURONS), OSCE_MAX_RNN_NEURONS)

+#else

 #define MAX_RNN_NEURONS_ALL IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), DRED_MAX_RNN_NEURONS)

+#endif

 void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)

--- a/dnn/nnet_arch.h

+++ b/dnn/nnet_arch.h

@@ -64,13 +64,29 @@

    return x < 0 ? 0 : x;

+/*#define HIGH_ACCURACY */

 void RTCD_SUF(compute_activation_)(float *output, const float *input, int N, int activation)

    int i;

    if (activation == ACTIVATION_SIGMOID) {

+#ifdef HIGH_ACCURACY

+      for (int n=0; n<N; n++)

+      {

+         output[n] = 1.f  / (1 + exp(-input[n]));

+      }

+#else

       vec_sigmoid(output, input, N);

+#endif

    } else if (activation == ACTIVATION_TANH) {

+#ifdef HIGH_ACCURACY

+      for (int n=0; n<N; n++)

+      {

+         output[n] = tanh(input[n]);

+      }

+#else

       vec_tanh(output, input, N);

+#endif

    } else if (activation == ACTIVATION_SWISH) {

       vec_swish(output, input, N);

    } else if (activation == ACTIVATION_RELU) {

--- /dev/null

+++ b/dnn/osce.c

@@ -1,0 +1,1411 @@

+/* Copyright (c) 2023 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include <math.h>

+#include "osce.h"

+#include "osce_features.h"

+#include "os_support.h"

+#include "nndsp.h"

+#include "float_cast.h"

+#include "arch.h"

+#ifdef OSCE_DEBUG

+#include <stdio.h>

+/*#define WRITE_FEATURES*/

+/*#define DEBUG_LACE*/

+/*#define DEBUG_NOLACE*/

+#define FINIT(fid, name, mode) do{if (fid == NULL) {fid = fopen(name, mode);}} while(0)

+#endif

+#ifdef ENABLE_OSCE_TRAINING_DATA

+#include <stdio.h>

+#endif

+#define CLIP(a, min, max) (((a) < (min) ? (min) : (a)) > (max) ? (max) : (a))

+extern const WeightArray lacelayers_arrays[];

+extern const WeightArray nolacelayers_arrays[];

+/* LACE */

+#ifndef DISABLE_LACE

+static void compute_lace_numbits_embedding(float *emb, float numbits, int dim, float min_val, float max_val, int logscale)

+{

+    float x;

+    (void) dim;

+    numbits = logscale ? log(numbits) : numbits;

+    x = CLIP(numbits, min_val, max_val) - (max_val + min_val) / 2;

+    emb[0] = sin(x * LACE_NUMBITS_SCALE_0 - 0.5f);

+    emb[1] = sin(x * LACE_NUMBITS_SCALE_1 - 0.5f);

+    emb[2] = sin(x * LACE_NUMBITS_SCALE_2 - 0.5f);

+    emb[3] = sin(x * LACE_NUMBITS_SCALE_3 - 0.5f);

+    emb[4] = sin(x * LACE_NUMBITS_SCALE_4 - 0.5f);

+    emb[5] = sin(x * LACE_NUMBITS_SCALE_5 - 0.5f);

+    emb[6] = sin(x * LACE_NUMBITS_SCALE_6 - 0.5f);

+    emb[7] = sin(x * LACE_NUMBITS_SCALE_7 - 0.5f);

+}

+static int init_lace(LACE *hLACE, const WeightArray *weights)

+{

+    int ret = 0;

+    OPUS_CLEAR(hLACE, 1);

+    celt_assert(weights != NULL);

+    ret = init_lacelayers(&hLACE->layers, weights);

+    compute_overlap_window(hLACE->window, LACE_OVERLAP_SIZE);

+    return ret;

+}

+static void reset_lace_state(LACEState *state)

+{

+    OPUS_CLEAR(state, 1);

+    init_adacomb_state(&state->cf1_state);

+    init_adacomb_state(&state->cf2_state);

+    init_adaconv_state(&state->af1_state);

+}

+static void lace_feature_net(

+    LACE *hLACE,

+    LACEState *state,

+    float *output,

+    const float *features,

+    const float *numbits,

+    const int *periods,

+    int arch

+)

+{

+    float input_buffer[4 * IMAX(LACE_COND_DIM, LACE_HIDDEN_FEATURE_DIM)];

+    float output_buffer[4 * IMAX(LACE_COND_DIM, LACE_HIDDEN_FEATURE_DIM)];

+    float numbits_embedded[2 * LACE_NUMBITS_EMBEDDING_DIM];

+    int i_subframe;

+    compute_lace_numbits_embedding(numbits_embedded, numbits[0], LACE_NUMBITS_EMBEDDING_DIM,

+        log(LACE_NUMBITS_RANGE_LOW), log(LACE_NUMBITS_RANGE_HIGH), 1);

+    compute_lace_numbits_embedding(numbits_embedded + LACE_NUMBITS_EMBEDDING_DIM, numbits[1], LACE_NUMBITS_EMBEDDING_DIM,

+        log(LACE_NUMBITS_RANGE_LOW), log(LACE_NUMBITS_RANGE_HIGH), 1);

+    /* scaling and dimensionality reduction */

+    for (i_subframe = 0; i_subframe < 4; i_subframe ++)

+    {

+        OPUS_COPY(input_buffer, features + i_subframe * LACE_NUM_FEATURES, LACE_NUM_FEATURES);

+        OPUS_COPY(input_buffer + LACE_NUM_FEATURES, hLACE->layers.lace_pitch_embedding.float_weights + periods[i_subframe] * LACE_PITCH_EMBEDDING_DIM, LACE_PITCH_EMBEDDING_DIM);

+        OPUS_COPY(input_buffer + LACE_NUM_FEATURES + LACE_PITCH_EMBEDDING_DIM, numbits_embedded, 2 * LACE_NUMBITS_EMBEDDING_DIM);

+        compute_generic_conv1d(

+            &hLACE->layers.lace_fnet_conv1,

+            output_buffer + i_subframe * LACE_HIDDEN_FEATURE_DIM,

+            NULL,

+            input_buffer,

+            LACE_NUM_FEATURES + LACE_PITCH_EMBEDDING_DIM + 2 * LACE_NUMBITS_EMBEDDING_DIM,

+            ACTIVATION_TANH,

+            arch);

+    }

+    /* subframe accumulation */

+    OPUS_COPY(input_buffer, output_buffer, 4 * LACE_HIDDEN_FEATURE_DIM);

+    compute_generic_conv1d(

+        &hLACE->layers.lace_fnet_conv2,

+        output_buffer,

+        state->feature_net_conv2_state,

+        input_buffer,

+        4 * LACE_HIDDEN_FEATURE_DIM,

+        ACTIVATION_TANH,

+        arch

+    );

+    /* tconv upsampling */

+    OPUS_COPY(input_buffer, output_buffer, 4 * LACE_COND_DIM);

+    compute_generic_dense(

+        &hLACE->layers.lace_fnet_tconv,

+        output_buffer,

+        input_buffer,

+        ACTIVATION_LINEAR,

+        arch

+    );

+    /* GRU */

+    OPUS_COPY(input_buffer, output_buffer, 4 * LACE_COND_DIM);

+    for (i_subframe = 0; i_subframe < 4; i_subframe++)

+    {

+        compute_generic_gru(

+            &hLACE->layers.lace_fnet_gru_input,

+            &hLACE->layers.lace_fnet_gru_recurrent,

+            state->feature_net_gru_state,

+            input_buffer + i_subframe * LACE_COND_DIM,

+            arch

+        );

+        OPUS_COPY(output + i_subframe * LACE_COND_DIM, state->feature_net_gru_state, LACE_COND_DIM);

+    }

+}

+static void lace_process_20ms_frame(

+    LACE* hLACE,

+    LACEState *state,

+    float *x_out,

+    const float *x_in,

+    const float *features,

+    const float *numbits,

+    const int *periods,

+    int arch

+)

+{

+    float feature_buffer[4 * LACE_COND_DIM];

+    float output_buffer[4 * LACE_FRAME_SIZE];

+    int i_subframe, i_sample;

+#ifdef DEBUG_LACE

+    static FILE *f_features=NULL, *f_encfeatures=NULL, *f_xin=NULL, *f_xpreemph=NULL, *f_postcf1=NULL;

+    static FILE *f_postcf2=NULL, *f_postaf1=NULL, *f_xdeemph, *f_numbits, *f_periods;

+    FINIT(f_features, "debug/c_features.f32", "wb");

+    FINIT(f_encfeatures, "debug/c_encoded_features.f32", "wb");

+    FINIT(f_xin, "debug/c_x_in.f32", "wb");

+    FINIT(f_xpreemph, "debug/c_xpreemph.f32", "wb");

+    FINIT(f_xdeemph, "debug/c_xdeemph.f32", "wb");

+    FINIT(f_postcf1, "debug/c_post_cf1.f32", "wb");

+    FINIT(f_postcf2, "debug/c_post_cf2.f32", "wb");

+    FINIT(f_postaf1, "debug/c_post_af1.f32", "wb");

+    FINIT(f_numbits, "debug/c_numbits.f32", "wb");

+    FINIT(f_periods, "debug/c_periods.s32", "wb");

+    fwrite(x_in, sizeof(*x_in), 4 * LACE_FRAME_SIZE, f_xin);

+    fwrite(numbits, sizeof(*numbits), 2, f_numbits);

+    fwrite(periods, sizeof(*periods), 4, f_periods);

+#endif

+    /* pre-emphasis */

+    for (i_sample = 0; i_sample < 4 * LACE_FRAME_SIZE; i_sample ++)

+    {

+        output_buffer[i_sample] = x_in[i_sample] - LACE_PREEMPH * state->preemph_mem;

+        state->preemph_mem = x_in[i_sample];

+    }

+    /* run feature encoder */

+    lace_feature_net(hLACE, state, feature_buffer, features, numbits, periods, arch);

+#ifdef DEBUG_LACE

+    fwrite(features, sizeof(*features), 4 * LACE_NUM_FEATURES, f_features);

+    fwrite(feature_buffer, sizeof(*feature_buffer), 4 * LACE_COND_DIM, f_encfeatures);

+    fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_xpreemph);

+#endif

+    /* 1st comb filtering stage */

+    for (i_subframe = 0; i_subframe < 4; i_subframe++)

+    {

+        adacomb_process_frame(

+            &state->cf1_state,

+            output_buffer + i_subframe * LACE_FRAME_SIZE,

+            output_buffer + i_subframe * LACE_FRAME_SIZE,

+            feature_buffer + i_subframe * LACE_COND_DIM,

+            &hLACE->layers.lace_cf1_kernel,

+            &hLACE->layers.lace_cf1_gain,

+            &hLACE->layers.lace_cf1_global_gain,

+            periods[i_subframe],

+            LACE_COND_DIM,

+            LACE_FRAME_SIZE,

+            LACE_OVERLAP_SIZE,

+            LACE_CF1_KERNEL_SIZE,

+            LACE_CF1_LEFT_PADDING,

+            LACE_CF1_FILTER_GAIN_A,

+            LACE_CF1_FILTER_GAIN_B,

+            LACE_CF1_LOG_GAIN_LIMIT,

+            hLACE->window,

+            arch);

+    }

+#ifdef DEBUG_LACE

+    fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postcf1);

+#endif

+    /* 2nd comb filtering stage */

+    for (i_subframe = 0; i_subframe < 4; i_subframe++)

+    {

+        adacomb_process_frame(

+            &state->cf2_state,

+            output_buffer + i_subframe * LACE_FRAME_SIZE,

+            output_buffer + i_subframe * LACE_FRAME_SIZE,

+            feature_buffer + i_subframe * LACE_COND_DIM,

+            &hLACE->layers.lace_cf2_kernel,

+            &hLACE->layers.lace_cf2_gain,

+            &hLACE->layers.lace_cf2_global_gain,

+            periods[i_subframe],

+            LACE_COND_DIM,

+            LACE_FRAME_SIZE,

+            LACE_OVERLAP_SIZE,

+            LACE_CF2_KERNEL_SIZE,

+            LACE_CF2_LEFT_PADDING,

+            LACE_CF2_FILTER_GAIN_A,

+            LACE_CF2_FILTER_GAIN_B,

+            LACE_CF2_LOG_GAIN_LIMIT,

+            hLACE->window,

+            arch);

+    }

+#ifdef DEBUG_LACE

+    fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postcf2);

+#endif

+    /* final adaptive filtering stage */

+    for (i_subframe = 0; i_subframe < 4; i_subframe++)

+    {

+        adaconv_process_frame(

+            &state->af1_state,

+            output_buffer + i_subframe * LACE_FRAME_SIZE,

+            output_buffer + i_subframe * LACE_FRAME_SIZE,

+            feature_buffer + i_subframe * LACE_COND_DIM,

+            &hLACE->layers.lace_af1_kernel,

+            &hLACE->layers.lace_af1_gain,

+            LACE_COND_DIM,

+            LACE_FRAME_SIZE,

+            LACE_OVERLAP_SIZE,

+            LACE_AF1_IN_CHANNELS,

+            LACE_AF1_OUT_CHANNELS,

+            LACE_AF1_KERNEL_SIZE,

+            LACE_AF1_LEFT_PADDING,

+            LACE_AF1_FILTER_GAIN_A,

+            LACE_AF1_FILTER_GAIN_B,

+            LACE_AF1_SHAPE_GAIN,

+            hLACE->window,

+            arch);

+    }

+#ifdef DEBUG_LACE

+    fwrite(output_buffer, sizeof(float), 4 * LACE_FRAME_SIZE, f_postaf1);

+#endif

+    /* de-emphasis */

+    for (i_sample = 0; i_sample < 4 * LACE_FRAME_SIZE; i_sample ++)

+    {

+        x_out[i_sample] = output_buffer[i_sample] + LACE_PREEMPH * state->deemph_mem;

+        state->deemph_mem = x_out[i_sample];

+    }

+#ifdef DEBUG_LACE

+    fwrite(x_out, sizeof(float), 4 * LACE_FRAME_SIZE, f_xdeemph);

+#endif

+}

+#endif /* #ifndef DISABLE_LACE */

+/* NoLACE */

+#ifndef DISABLE_NOLACE

+static void compute_nolace_numbits_embedding(float *emb, float numbits, int dim, float min_val, float max_val, int logscale)

+{

+    float x;

+    (void) dim;

+    numbits = logscale ? log(numbits) : numbits;

+    x = CLIP(numbits, min_val, max_val) - (max_val + min_val) / 2;

+    emb[0] = sin(x * NOLACE_NUMBITS_SCALE_0 - 0.5f);

+    emb[1] = sin(x * NOLACE_NUMBITS_SCALE_1 - 0.5f);

+    emb[2] = sin(x * NOLACE_NUMBITS_SCALE_2 - 0.5f);

+    emb[3] = sin(x * NOLACE_NUMBITS_SCALE_3 - 0.5f);

+    emb[4] = sin(x * NOLACE_NUMBITS_SCALE_4 - 0.5f);

+    emb[5] = sin(x * NOLACE_NUMBITS_SCALE_5 - 0.5f);

+    emb[6] = sin(x * NOLACE_NUMBITS_SCALE_6 - 0.5f);

+    emb[7] = sin(x * NOLACE_NUMBITS_SCALE_7 - 0.5f);

+}

+static int init_nolace(NoLACE *hNoLACE, const WeightArray *weights)

+{

+    int ret = 0;

+    OPUS_CLEAR(hNoLACE, 1);

+    celt_assert(weights != NULL);

+    ret = init_nolacelayers(&hNoLACE->layers, weights);

+    compute_overlap_window(hNoLACE->window, NOLACE_OVERLAP_SIZE);

+    return ret;

+}

+static void reset_nolace_state(NoLACEState *state)

+{

+    OPUS_CLEAR(state, 1);

+    init_adacomb_state(&state->cf1_state);

+    init_adacomb_state(&state->cf2_state);

+    init_adaconv_state(&state->af1_state);

+    init_adaconv_state(&state->af2_state);

+    init_adaconv_state(&state->af3_state);

+    init_adaconv_state(&state->af4_state);

+    init_adashape_state(&state->tdshape1_state);

+    init_adashape_state(&state->tdshape2_state);

+    init_adashape_state(&state->tdshape3_state);

+}

+static void nolace_feature_net(

+    NoLACE *hNoLACE,

+    NoLACEState *state,

+    float *output,

+    const float *features,

+    const float *numbits,

+    const int *periods,

+    int arch

+)

+{

+    float input_buffer[4 * IMAX(NOLACE_COND_DIM, NOLACE_HIDDEN_FEATURE_DIM)];

+    float output_buffer[4 * IMAX(NOLACE_COND_DIM, NOLACE_HIDDEN_FEATURE_DIM)];

+    float numbits_embedded[2 * NOLACE_NUMBITS_EMBEDDING_DIM];

+    int i_subframe;

+    compute_nolace_numbits_embedding(numbits_embedded, numbits[0], NOLACE_NUMBITS_EMBEDDING_DIM,

+        log(NOLACE_NUMBITS_RANGE_LOW), log(NOLACE_NUMBITS_RANGE_HIGH), 1);

+    compute_nolace_numbits_embedding(numbits_embedded + NOLACE_NUMBITS_EMBEDDING_DIM, numbits[1], NOLACE_NUMBITS_EMBEDDING_DIM,

+        log(NOLACE_NUMBITS_RANGE_LOW), log(NOLACE_NUMBITS_RANGE_HIGH), 1);

+    /* scaling and dimensionality reduction */

+    for (i_subframe = 0; i_subframe < 4; i_subframe ++)

+    {

+        OPUS_COPY(input_buffer, features + i_subframe * NOLACE_NUM_FEATURES, NOLACE_NUM_FEATURES);

+        OPUS_COPY(input_buffer + NOLACE_NUM_FEATURES, hNoLACE->layers.nolace_pitch_embedding.float_weights + periods[i_subframe] * NOLACE_PITCH_EMBEDDING_DIM, NOLACE_PITCH_EMBEDDING_DIM);

+        OPUS_COPY(input_buffer + NOLACE_NUM_FEATURES + NOLACE_PITCH_EMBEDDING_DIM, numbits_embedded, 2 * NOLACE_NUMBITS_EMBEDDING_DIM);

+        compute_generic_conv1d(

+            &hNoLACE->layers.nolace_fnet_conv1,

+            output_buffer + i_subframe * NOLACE_HIDDEN_FEATURE_DIM,

+            NULL,

+            input_buffer,

+            NOLACE_NUM_FEATURES + NOLACE_PITCH_EMBEDDING_DIM + 2 * NOLACE_NUMBITS_EMBEDDING_DIM,

+            ACTIVATION_TANH,

+            arch);

+    }

+    /* subframe accumulation */

+    OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_HIDDEN_FEATURE_DIM);

+    compute_generic_conv1d(

+        &hNoLACE->layers.nolace_fnet_conv2,

+        output_buffer,

+        state->feature_net_conv2_state,

+        input_buffer,

+        4 * NOLACE_HIDDEN_FEATURE_DIM,

+        ACTIVATION_TANH,

+        arch

+    );

+    /* tconv upsampling */

+    OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_COND_DIM);

+    compute_generic_dense(

+        &hNoLACE->layers.nolace_fnet_tconv,

+        output_buffer,

+        input_buffer,

+        ACTIVATION_LINEAR,

+        arch

+    );

+    /* GRU */

+    OPUS_COPY(input_buffer, output_buffer, 4 * NOLACE_COND_DIM);

+    for (i_subframe = 0; i_subframe < 4; i_subframe++)

+    {

+        compute_generic_gru(

+            &hNoLACE->layers.nolace_fnet_gru_input,

+            &hNoLACE->layers.nolace_fnet_gru_recurrent,

+            state->feature_net_gru_state,

+            input_buffer + i_subframe * NOLACE_COND_DIM,

+            arch

+        );

+        OPUS_COPY(output + i_subframe * NOLACE_COND_DIM, state->feature_net_gru_state, NOLACE_COND_DIM);

+    }

+}

+static void nolace_process_20ms_frame(

+    NoLACE* hNoLACE,

+    NoLACEState *state,

+    float *x_out,

+    const float *x_in,

+    const float *features,

+    const float *numbits,

+    const int *periods,

+    int arch

+)

+{

+    float feature_buffer[4 * NOLACE_COND_DIM];

+    float feature_transform_buffer[4 * NOLACE_COND_DIM];

+    float x_buffer1[8 * NOLACE_FRAME_SIZE];

+    float x_buffer2[8 * NOLACE_FRAME_SIZE];

+    int i_subframe, i_sample;

+    NOLACELayers *layers = &hNoLACE->layers;

+#ifdef DEBUG_NOLACE

+    static FILE *f_features=NULL, *f_encfeatures=NULL, *f_xin=NULL, *f_xpreemph=NULL, *f_postcf1=NULL;

+    static FILE *f_postcf2=NULL, *f_postaf1=NULL, *f_xdeemph, *f_numbits, *f_periods;

+    static FILE *f_ffpostcf1, *f_fpostcf2, *f_fpostaf1;

+    FINIT(f_features, "debug/c_features.f32", "wb");

+    FINIT(f_encfeatures, "debug/c_encoded_features.f32", "wb");

+    FINIT(f_xin, "debug/c_x_in.f32", "wb");

+    FINIT(f_xpreemph, "debug/c_xpreemph.f32", "wb");

+    FINIT(f_xdeemph, "debug/c_xdeemph.f32", "wb");

+    FINIT(f_postcf1, "debug/c_post_cf1.f32", "wb");

+    FINIT(f_postcf2, "debug/c_post_cf2.f32", "wb");

+    FINIT(f_postaf1, "debug/c_post_af1.f32", "wb");

+    FINIT(f_numbits, "debug/c_numbits.f32", "wb");

+    FINIT(f_periods, "debug/c_periods.s32", "wb");

+    fwrite(x_in, sizeof(*x_in), 4 * NOLACE_FRAME_SIZE, f_xin);

+    fwrite(numbits, sizeof(*numbits), 2, f_numbits);

+    fwrite(periods, sizeof(*periods), 4, f_periods);

+#endif

+    /* pre-emphasis */

+    for (i_sample = 0; i_sample < 4 * NOLACE_FRAME_SIZE; i_sample ++)

+    {

+        x_buffer1[i_sample] = x_in[i_sample] - NOLACE_PREEMPH * state->preemph_mem;

+        state->preemph_mem = x_in[i_sample];

+    }

+    /* run feature encoder */

+    nolace_feature_net(hNoLACE, state, feature_buffer, features, numbits, periods, arch);

+#ifdef DEBUG_NOLACE

+    fwrite(features, sizeof(*features), 4 * NOLACE_NUM_FEATURES, f_features);

+    fwrite(feature_buffer, sizeof(*feature_buffer), 4 * NOLACE_COND_DIM, f_encfeatures);

+    fwrite(output_buffer, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_xpreemph);

+#endif

+    /* 1st comb filtering stage */

+    for (i_subframe = 0; i_subframe < 4; i_subframe++)

+    {

+        /* modifies signal in place */

+        adacomb_process_frame(

+            &state->cf1_state,

+            x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,

+            x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,

+            feature_buffer + i_subframe * NOLACE_COND_DIM,

+            &hNoLACE->layers.nolace_cf1_kernel,

+            &hNoLACE->layers.nolace_cf1_gain,

+            &hNoLACE->layers.nolace_cf1_global_gain,

+            periods[i_subframe],

+            NOLACE_COND_DIM,

+            NOLACE_FRAME_SIZE,

+            NOLACE_OVERLAP_SIZE,

+            NOLACE_CF1_KERNEL_SIZE,

+            NOLACE_CF1_LEFT_PADDING,

+            NOLACE_CF1_FILTER_GAIN_A,

+            NOLACE_CF1_FILTER_GAIN_B,

+            NOLACE_CF1_LOG_GAIN_LIMIT,

+            hNoLACE->window,

+            arch);

+        compute_generic_conv1d(

+            &layers->nolace_post_cf1,

+            feature_transform_buffer + i_subframe * NOLACE_COND_DIM,

+            state->post_cf1_state,

+            feature_buffer + i_subframe * NOLACE_COND_DIM,

+            NOLACE_COND_DIM,

+            ACTIVATION_TANH,

+            arch);

+    }

+    /* update feature buffer */

+    OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);

+#ifdef DEBUG_NOLACE

+    fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_postcf1);

+#endif

+    /* 2nd comb filtering stage */

+    for (i_subframe = 0; i_subframe < 4; i_subframe++)

+    {

+        /* modifies signal in place */

+        adacomb_process_frame(

+            &state->cf2_state,

+            x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,

+            x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,

+            feature_buffer + i_subframe * NOLACE_COND_DIM,

+            &hNoLACE->layers.nolace_cf2_kernel,

+            &hNoLACE->layers.nolace_cf2_gain,

+            &hNoLACE->layers.nolace_cf2_global_gain,

+            periods[i_subframe],

+            NOLACE_COND_DIM,

+            NOLACE_FRAME_SIZE,

+            NOLACE_OVERLAP_SIZE,

+            NOLACE_CF2_KERNEL_SIZE,

+            NOLACE_CF2_LEFT_PADDING,

+            NOLACE_CF2_FILTER_GAIN_A,

+            NOLACE_CF2_FILTER_GAIN_B,

+            NOLACE_CF2_LOG_GAIN_LIMIT,

+            hNoLACE->window,

+            arch);

+        compute_generic_conv1d(

+            &layers->nolace_post_cf2,

+            feature_transform_buffer + i_subframe * NOLACE_COND_DIM,

+            state->post_cf2_state,

+            feature_buffer + i_subframe * NOLACE_COND_DIM,

+            NOLACE_COND_DIM,

+            ACTIVATION_TANH,

+            arch);

+    }

+    /* update feature buffer */

+    OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);

+#ifdef DEBUG_NOLACE

+    fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_postcf2);

+#endif

+    /* final adaptive filtering stage */

+    for (i_subframe = 0; i_subframe < 4; i_subframe++)

+    {

+        adaconv_process_frame(

+            &state->af1_state,

+            x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF1_OUT_CHANNELS,

+            x_buffer1 + i_subframe * NOLACE_FRAME_SIZE,

+            feature_buffer + i_subframe * NOLACE_COND_DIM,

+            &hNoLACE->layers.nolace_af1_kernel,

+            &hNoLACE->layers.nolace_af1_gain,

+            NOLACE_COND_DIM,

+            NOLACE_FRAME_SIZE,

+            NOLACE_OVERLAP_SIZE,

+            NOLACE_AF1_IN_CHANNELS,

+            NOLACE_AF1_OUT_CHANNELS,

+            NOLACE_AF1_KERNEL_SIZE,

+            NOLACE_AF1_LEFT_PADDING,

+            NOLACE_AF1_FILTER_GAIN_A,

+            NOLACE_AF1_FILTER_GAIN_B,

+            NOLACE_AF1_SHAPE_GAIN,

+            hNoLACE->window,

+            arch);

+        compute_generic_conv1d(

+            &layers->nolace_post_af1,

+            feature_transform_buffer + i_subframe * NOLACE_COND_DIM,

+            state->post_af1_state,

+            feature_buffer + i_subframe * NOLACE_COND_DIM,

+            NOLACE_COND_DIM,

+            ACTIVATION_TANH,

+            arch);

+    }

+    /* update feature buffer */

+    OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);

+#ifdef DEBUG_NOLACE

+    fwrite(x_buffer2, sizeof(float), 4 * NOLACE_FRAME_SIZE * NOLACE_AF1_OUT_CHANNELS, f_postaf1);

+#endif

+    /* first shape-mix round */

+    for (i_subframe = 0; i_subframe < 4; i_subframe++)

+    {

+        celt_assert(NOLACE_AF1_OUT_CHANNELS == 2);

+        /* modifies second channel in place */

+        adashape_process_frame(

+            &state->tdshape1_state,

+            x_buffer2 + i_subframe * NOLACE_AF1_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,

+            x_buffer2 + i_subframe * NOLACE_AF1_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,

+            feature_buffer + i_subframe * NOLACE_COND_DIM,

+            &layers->nolace_tdshape1_alpha1,

+            &layers->nolace_tdshape1_alpha2,

+            NOLACE_TDSHAPE1_FEATURE_DIM,

+            NOLACE_TDSHAPE1_FRAME_SIZE,

+            NOLACE_TDSHAPE1_AVG_POOL_K,

+            arch

+        );

+        adaconv_process_frame(

+            &state->af2_state,

+            x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF2_OUT_CHANNELS,

+            x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF2_IN_CHANNELS,

+            feature_buffer + i_subframe * NOLACE_COND_DIM,

+            &hNoLACE->layers.nolace_af2_kernel,

+            &hNoLACE->layers.nolace_af2_gain,

+            NOLACE_COND_DIM,

+            NOLACE_FRAME_SIZE,

+            NOLACE_OVERLAP_SIZE,

+            NOLACE_AF2_IN_CHANNELS,

+            NOLACE_AF2_OUT_CHANNELS,

+            NOLACE_AF2_KERNEL_SIZE,

+            NOLACE_AF2_LEFT_PADDING,

+            NOLACE_AF2_FILTER_GAIN_A,

+            NOLACE_AF2_FILTER_GAIN_B,

+            NOLACE_AF2_SHAPE_GAIN,

+            hNoLACE->window,

+            arch);

+        compute_generic_conv1d(

+            &layers->nolace_post_af2,

+            feature_transform_buffer + i_subframe * NOLACE_COND_DIM,

+            state->post_af2_state,

+            feature_buffer + i_subframe * NOLACE_COND_DIM,

+            NOLACE_COND_DIM,

+            ACTIVATION_TANH,

+            arch);

+    }

+    /* update feature buffer */

+    OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);

+#ifdef DEBUG_NOLACE

+    fwrite(x_buffer1, sizeof(float), 4 * NOLACE_FRAME_SIZE * NOLACE_AF2_OUT_CHANNELS, f_postaf2);

+#endif

+    /* second shape-mix round */

+    for (i_subframe = 0; i_subframe < 4; i_subframe++)

+    {

+        celt_assert(NOLACE_AF2_OUT_CHANNELS == 2);

+        /* modifies second channel in place */

+        adashape_process_frame(

+            &state->tdshape2_state,

+            x_buffer1 + i_subframe * NOLACE_AF2_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,

+            x_buffer1 + i_subframe * NOLACE_AF2_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,

+            feature_buffer + i_subframe * NOLACE_COND_DIM,

+            &layers->nolace_tdshape2_alpha1,

+            &layers->nolace_tdshape2_alpha2,

+            NOLACE_TDSHAPE2_FEATURE_DIM,

+            NOLACE_TDSHAPE2_FRAME_SIZE,

+            NOLACE_TDSHAPE2_AVG_POOL_K,

+            arch

+        );

+        adaconv_process_frame(

+            &state->af3_state,

+            x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF3_OUT_CHANNELS,

+            x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF3_IN_CHANNELS,

+            feature_buffer + i_subframe * NOLACE_COND_DIM,

+            &hNoLACE->layers.nolace_af3_kernel,

+            &hNoLACE->layers.nolace_af3_gain,

+            NOLACE_COND_DIM,

+            NOLACE_FRAME_SIZE,

+            NOLACE_OVERLAP_SIZE,

+            NOLACE_AF3_IN_CHANNELS,

+            NOLACE_AF3_OUT_CHANNELS,

+            NOLACE_AF3_KERNEL_SIZE,

+            NOLACE_AF3_LEFT_PADDING,

+            NOLACE_AF3_FILTER_GAIN_A,

+            NOLACE_AF3_FILTER_GAIN_B,

+            NOLACE_AF3_SHAPE_GAIN,

+            hNoLACE->window,

+            arch);

+        compute_generic_conv1d(

+            &layers->nolace_post_af3,

+            feature_transform_buffer + i_subframe * NOLACE_COND_DIM,

+            state->post_af3_state,

+            feature_buffer + i_subframe * NOLACE_COND_DIM,

+            NOLACE_COND_DIM,

+            ACTIVATION_TANH,

+            arch);

+    }

+    /* update feature buffer */

+    OPUS_COPY(feature_buffer, feature_transform_buffer, 4 * NOLACE_COND_DIM);

+    /* third shape-mix round */

+    for (i_subframe = 0; i_subframe < 4; i_subframe++)

+    {

+        celt_assert(NOLACE_AF3_OUT_CHANNELS == 2);

+        /* modifies second channel in place */

+        adashape_process_frame(

+            &state->tdshape3_state,

+            x_buffer2 + i_subframe * NOLACE_AF3_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,

+            x_buffer2 + i_subframe * NOLACE_AF3_OUT_CHANNELS * NOLACE_FRAME_SIZE + NOLACE_FRAME_SIZE,

+            feature_buffer + i_subframe * NOLACE_COND_DIM,

+            &layers->nolace_tdshape3_alpha1,

+            &layers->nolace_tdshape3_alpha2,

+            NOLACE_TDSHAPE3_FEATURE_DIM,

+            NOLACE_TDSHAPE3_FRAME_SIZE,

+            NOLACE_TDSHAPE3_AVG_POOL_K,

+            arch

+        );

+        adaconv_process_frame(

+            &state->af4_state,

+            x_buffer1 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF4_OUT_CHANNELS,

+            x_buffer2 + i_subframe * NOLACE_FRAME_SIZE * NOLACE_AF4_IN_CHANNELS,

+            feature_buffer + i_subframe * NOLACE_COND_DIM,

+            &hNoLACE->layers.nolace_af4_kernel,

+            &hNoLACE->layers.nolace_af4_gain,

+            NOLACE_COND_DIM,

+            NOLACE_FRAME_SIZE,

+            NOLACE_OVERLAP_SIZE,

+            NOLACE_AF4_IN_CHANNELS,

+            NOLACE_AF4_OUT_CHANNELS,

+            NOLACE_AF4_KERNEL_SIZE,

+            NOLACE_AF4_LEFT_PADDING,

+            NOLACE_AF4_FILTER_GAIN_A,

+            NOLACE_AF4_FILTER_GAIN_B,

+            NOLACE_AF4_SHAPE_GAIN,

+            hNoLACE->window,

+            arch);

+    }

+    /* de-emphasis */

+    for (i_sample = 0; i_sample < 4 * NOLACE_FRAME_SIZE; i_sample ++)

+    {

+        x_out[i_sample] = x_buffer1[i_sample] + NOLACE_PREEMPH * state->deemph_mem;

+        state->deemph_mem = x_out[i_sample];

+    }

+#ifdef DEBUG_NOLACE

+    fwrite(x_out, sizeof(float), 4 * NOLACE_FRAME_SIZE, f_xdeemph);

+#endif

+}

+#endif /* #ifndef DISABLE_NOLACE */

+/* API */

+void osce_reset(silk_OSCE_struct *hOSCE, int method)

+{

+    OSCEState *state = &hOSCE->state;

+    OPUS_CLEAR(&hOSCE->features, 1);

+    switch(method)

+    {

+        case OSCE_METHOD_NONE:

+            break;

+#ifndef DISABLE_LACE

+        case OSCE_METHOD_LACE:

+            reset_lace_state(&state->lace);

+            break;

+#endif

+#ifndef DISABLE_NOLACE

+        case OSCE_METHOD_NOLACE:

+            reset_nolace_state(&state->nolace);

+            break;

+#endif

+        default:

+            celt_assert(0 && "method not defined"); /* Question: return error code? */

+    }

+    hOSCE->method = method;

+    hOSCE->features.reset = 2;

+}

+#if 0

+#include <stdio.h>

+static void print_float_array(FILE *fid, const char  *name, const float *array, int n)

+{

+    int i;

+    for (i = 0; i < n; i++)

+    {

+        fprintf(fid, "%s[%d]: %f\n", name, i, array[i]);

+    }

+}

+static void print_int_array(FILE *fid, const char  *name, const int *array, int n)

+{

+    int i;

+    for (i = 0; i < n; i++)

+    {

+        fprintf(fid, "%s[%d]: %d\n", name, i, array[i]);

+    }

+}

+static void print_int8_array(FILE *fid, const char  *name, const opus_int8 *array, int n)

+{

+    int i;

+    for (i = 0; i < n; i++)

+    {

+        fprintf(fid, "%s[%d]: %d\n", name, i, array[i]);

+    }

+}

+static void print_linear_layer(FILE *fid, const char *name, LinearLayer *layer)

+{

+    int i, n_in, n_out, n_total;

+    char tmp[256];

+    n_in = layer->nb_inputs;

+    n_out = layer->nb_outputs;

+    n_total = n_in * n_out;

+    fprintf(fid, "\nprinting layer %s...\n", name);

+    fprintf(fid, "%s.nb_inputs: %d\n%s.nb_outputs: %d\n", name, n_in, name, n_out);

+    if (layer->bias !=NULL){}

+    if (layer->subias !=NULL){}

+    if (layer->weights !=NULL){}

+    if (layer->float_weights !=NULL){}

+    if (layer->bias != NULL) {sprintf(tmp, "%s.bias", name); print_float_array(fid, tmp, layer->bias, n_out);}

+    if (layer->subias != NULL) {sprintf(tmp, "%s.subias", name); print_float_array(fid, tmp, layer->subias, n_out);}

+    if (layer->weights != NULL) {sprintf(tmp, "%s.weights", name); print_int8_array(fid, tmp, layer->weights, n_total);}

+    if (layer->float_weights != NULL) {sprintf(tmp, "%s.float_weights", name); print_float_array(fid, tmp, layer->float_weights, n_total);}

+    //if (layer->weights_idx != NULL) {sprintf(tmp, "%s.weights_idx", name); print_float_array(fid, tmp, layer->weights_idx, n_total);}

+    if (layer->diag != NULL) {sprintf(tmp, "%s.diag", name); print_float_array(fid, tmp, layer->diag, n_in);}

+    if (layer->scale != NULL) {sprintf(tmp, "%s.scale", name); print_float_array(fid, tmp, layer->scale, n_out);}

+}

+#endif

+int osce_load_models(OSCEModel *model, const unsigned char *data, int len)

+{

+    int ret = 0;

+    WeightArray *list;

+    if (data != NULL  && len)

+    {

+        /* init from buffer */

+        parse_weights(&list, data, len);

+#ifndef DISABLE_LACE

+        if (ret == 0) {ret = init_lace(&model->lace, list);}

+#endif

+#ifndef DISABLE_LACE

+        if (ret == 0) {ret = init_nolace(&model->nolace, list);}

+#endif

+        free(list);

+    } else

+    {

+#ifdef USE_WEIGHTS_FILE

+        return -1;

+#else

+#ifndef DISABLE_LACE

+        if (ret == 0) {ret = init_lace(&model->lace, lacelayers_arrays);}

+#endif

+#ifndef DISABLE_LACE

+        if (ret == 0) {ret = init_nolace(&model->nolace, nolacelayers_arrays);}

+#endif

+#endif /* USE_WEIGHTS_FILE */

+    }

+    ret = ret ? -1 : 0;

+    return ret;

+}

+void osce_enhance_frame(

+    OSCEModel                   *model,                         /* I    OSCE model struct                           */

+    silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */

+    silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */

+    opus_int16                  xq[],                           /* I/O  Decoded speech                              */

+    opus_int32                  num_bits,                       /* I    Size of SILK payload in bits                */

+    int                         arch                            /* I    Run-time architecture                       */

+)

+{

+    float in_buffer[320];

+    float out_buffer[320];

+    float features[4 * OSCE_FEATURE_DIM];

+    float numbits[2];

+    int periods[4];

+    int i;

+    /* enhancement only implemented for 20 ms frame at 16kHz */

+    if (psDec->fs_kHz != 16 || psDec->nb_subfr != 4)

+    {

+        osce_reset(&psDec->osce, psDec->osce.method);

+        return;

+    }

+    osce_calculate_features(psDec, psDecCtrl, features, numbits, periods, xq, num_bits);

+    /* scale input */

+    for (i = 0; i < 320; i++)

+    {

+        in_buffer[i] = ((float) xq[i]) * (1.f/32768.f);

+    }

+    switch(psDec->osce.method)

+    {

+        case OSCE_METHOD_NONE:

+            OPUS_COPY(out_buffer, in_buffer, 320);

+            break;

+#ifndef DISABLE_LACE

+        case OSCE_METHOD_LACE:

+            lace_process_20ms_frame(&model->lace, &psDec->osce.state.lace, out_buffer, in_buffer, features, numbits, periods, arch);

+            break;

+#endif

+#ifndef DISABLE_NOLACE

+        case OSCE_METHOD_NOLACE:

+            nolace_process_20ms_frame(&model->nolace, &psDec->osce.state.nolace, out_buffer, in_buffer, features, numbits, periods, arch);

+            break;

+#endif

+        default:

+            celt_assert(0 && "method not defined");

+    }

+#ifdef ENABLE_OSCE_TRAINING_DATA

+    int  k;

+    static FILE *flpc = NULL;

+    static FILE *fgain = NULL;

+    static FILE *fltp = NULL;

+    static FILE *fperiod = NULL;

+    static FILE *fnoisy16k = NULL;

+    static FILE* f_numbits = NULL;

+    static FILE* f_numbits_smooth = NULL;

+    if (flpc == NULL) {flpc = fopen("features_lpc.f32", "wb");}

+    if (fgain == NULL) {fgain = fopen("features_gain.f32", "wb");}

+    if (fltp == NULL) {fltp = fopen("features_ltp.f32", "wb");}

+    if (fperiod == NULL) {fperiod = fopen("features_period.s16", "wb");}

+    if (fnoisy16k == NULL) {fnoisy16k = fopen("noisy_16k.s16", "wb");}

+    if(f_numbits == NULL) {f_numbits = fopen("features_num_bits.s32", "wb");}

+    if (f_numbits_smooth == NULL) {f_numbits_smooth = fopen("features_num_bits_smooth.f32", "wb");}

+    fwrite(&num_bits, sizeof(num_bits), 1, f_numbits);

+    fwrite(&(psDec->osce.features.numbits_smooth), sizeof(psDec->osce.features.numbits_smooth), 1, f_numbits_smooth);

+    for (k = 0; k < psDec->nb_subfr; k++)

+    {

+        float tmp;

+        int16_t itmp;

+        float lpc_buffer[16] = {0};

+        opus_int16 *A_Q12, *B_Q14;

+        (void) num_bits;

+        (void) arch;

+        /* gain */

+        tmp = (float) psDecCtrl->Gains_Q16[k] / (1UL << 16);

+        fwrite(&tmp, sizeof(tmp), 1, fgain);

+        /* LPC */

+        A_Q12 = psDecCtrl->PredCoef_Q12[ k >> 1 ];

+        for (i = 0; i < psDec->LPC_order; i++)

+        {

+            lpc_buffer[i] = (float) A_Q12[i] / (1U << 12);

+        }

+        fwrite(lpc_buffer, sizeof(lpc_buffer[0]), 16, flpc);

+        /* LTP */

+        B_Q14 = &psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER ];

+        for (i = 0; i < 5; i++)

+        {

+            tmp = (float) B_Q14[i] / (1U << 14);

+            fwrite(&tmp, sizeof(tmp), 1, fltp);

+        }

+        /* periods */

+        itmp = psDec->indices.signalType == TYPE_VOICED ? psDecCtrl->pitchL[ k ] : 0;

+        fwrite(&itmp, sizeof(itmp), 1, fperiod);

+    }

+    fwrite(xq, psDec->nb_subfr * psDec->subfr_length, sizeof(xq[0]), fnoisy16k);

+#endif

+    if (psDec->osce.features.reset > 1)

+    {

+        OPUS_COPY(out_buffer, in_buffer, 320);

+        psDec->osce.features.reset --;

+    }

+    else if (psDec->osce.features.reset)

+    {

+        osce_cross_fade_10ms(out_buffer, in_buffer, 320);

+        psDec->osce.features.reset = 0;

+    }

+    /* scale output */

+    for (i = 0; i < 320; i++)

+    {

+        float tmp = 32768.f * out_buffer[i];

+        if (tmp > 32767.f) tmp = 32767.f;

+        if (tmp < -32767.f) tmp = -32767.f;

+        xq[i] = float2int(tmp);

+    }

+}

+#if 0

+#include <stdio.h>

+void lace_feature_net_compare(

+    const char * prefix,

+    int num_frames,

+    LACE* hLACE

+)

+{

+    char in_feature_file[256];

+    char out_feature_file[256];

+    char numbits_file[256];

+    char periods_file[256];

+    char message[512];

+    int i_frame, i_feature;

+    float mse;

+    float in_features[4 * LACE_NUM_FEATURES];

+    float out_features[4 * LACE_COND_DIM];

+    float out_features2[4 * LACE_COND_DIM];

+    float numbits[2];

+    int periods[4];

+    init_lace(hLACE);

+    FILE *f_in_features, *f_out_features, *f_numbits, *f_periods;

+    strcpy(in_feature_file, prefix);

+    strcat(in_feature_file, "_in_features.f32");

+    f_in_features = fopen(in_feature_file, "rb");

+    if (f_in_features == NULL)

+    {

+        sprintf(message, "could not open file %s", in_feature_file);

+        perror(message);

+        exit(1);

+    }

+    strcpy(out_feature_file, prefix);

+    strcat(out_feature_file, "_out_features.f32");

+    f_out_features = fopen(out_feature_file, "rb");

+    if (f_out_features == NULL)

+    {

+        sprintf(message, "could not open file %s", out_feature_file);

+        perror(message);

+        exit(1);

+    }

+    strcpy(periods_file, prefix);

+    strcat(periods_file, "_periods.s32");

+    f_periods = fopen(periods_file, "rb");

+    if (f_periods == NULL)

+    {

+        sprintf(message, "could not open file %s", periods_file);

+        perror(message);

+        exit(1);

+    }

+    strcpy(numbits_file, prefix);

+    strcat(numbits_file, "_numbits.f32");

+    f_numbits = fopen(numbits_file, "rb");

+    if (f_numbits == NULL)

+    {

+        sprintf(message, "could not open file %s", numbits_file);

+        perror(message);

+        exit(1);

+    }

+    for (i_frame = 0; i_frame < num_frames; i_frame ++)

+    {

+        if(fread(in_features, sizeof(float), 4 * LACE_NUM_FEATURES, f_in_features) != 4 * LACE_NUM_FEATURES)

+        {

+            fprintf(stderr, "could not read frame %d from in_features\n", i_frame);

+            exit(1);

+        }

+        if(fread(out_features, sizeof(float), 4 * LACE_COND_DIM, f_out_features) != 4 * LACE_COND_DIM)

+        {

+            fprintf(stderr, "could not read frame %d from out_features\n", i_frame);

+            exit(1);

+        }

+        if(fread(periods, sizeof(int), 4, f_periods) != 4)

+        {

+            fprintf(stderr, "could not read frame %d from periods\n", i_frame);

+            exit(1);

+        }

+        if(fread(numbits, sizeof(float), 2, f_numbits) != 2)

+        {

+            fprintf(stderr, "could not read frame %d from numbits\n", i_frame);

+            exit(1);

+        }

+        lace_feature_net(hLACE, out_features2, in_features, numbits, periods);

+        float mse = 0;

+        for (int i = 0; i < 4 * LACE_COND_DIM; i ++)

+        {

+            mse += pow(out_features[i] - out_features2[i], 2);

+        }

+        mse /= (4 * LACE_COND_DIM);

+        printf("rmse: %f\n", sqrt(mse));

+    }

+    fclose(f_in_features);

+    fclose(f_out_features);

+    fclose(f_numbits);

+    fclose(f_periods);

+}

+void lace_demo(

+    char *prefix,

+    char *output

+)

+{

+    char feature_file[256];

+    char numbits_file[256];

+    char periods_file[256];

+    char x_in_file[256];

+    char message[512];

+    int i_frame;

+    float mse;

+    float features[4 * LACE_NUM_FEATURES];

+    float numbits[2];

+    int periods[4];

+    float x_in[4 * LACE_FRAME_SIZE];

+    int16_t x_out[4 * LACE_FRAME_SIZE];

+    float buffer[4 * LACE_FRAME_SIZE];

+    LACE hLACE;

+    int frame_counter = 0;

+    FILE *f_features, *f_numbits, *f_periods, *f_x_in, *f_x_out;

+    init_lace(&hLACE);

+    strcpy(feature_file, prefix);

+    strcat(feature_file, "_features.f32");

+    f_features = fopen(feature_file, "rb");

+    if (f_features == NULL)

+    {

+        sprintf(message, "could not open file %s", feature_file);

+        perror(message);

+        exit(1);

+    }

+    strcpy(x_in_file, prefix);

+    strcat(x_in_file, "_x_in.f32");

+    f_x_in = fopen(x_in_file, "rb");

+    if (f_x_in == NULL)

+    {

+        sprintf(message, "could not open file %s", x_in_file);

+        perror(message);

+        exit(1);

+    }

+    f_x_out = fopen(output, "wb");

+    if (f_x_out == NULL)

+    {

+        sprintf(message, "could not open file %s", output);

+        perror(message);

+        exit(1);

+    }

+    strcpy(periods_file, prefix);

+    strcat(periods_file, "_periods.s32");

+    f_periods = fopen(periods_file, "rb");

+    if (f_periods == NULL)

+    {

+        sprintf(message, "could not open file %s", periods_file);

+        perror(message);

+        exit(1);

+    }

+    strcpy(numbits_file, prefix);

+    strcat(numbits_file, "_numbits.f32");

+    f_numbits = fopen(numbits_file, "rb");

+    if (f_numbits == NULL)

+    {

+        sprintf(message, "could not open file %s", numbits_file);

+        perror(message);

+        exit(1);

+    }

+    printf("processing %s\n", prefix);

+    while (fread(x_in, sizeof(float), 4 * LACE_FRAME_SIZE, f_x_in) == 4 * LACE_FRAME_SIZE)

+    {

+        printf("\rframe: %d", frame_counter++);

+        if(fread(features, sizeof(float), 4 * LACE_NUM_FEATURES, f_features) != 4 * LACE_NUM_FEATURES)

+        {

+            fprintf(stderr, "could not read frame %d from features\n", i_frame);

+            exit(1);

+        }

+        if(fread(periods, sizeof(int), 4, f_periods) != 4)

+        {

+            fprintf(stderr, "could not read frame %d from periods\n", i_frame);

+            exit(1);

+        }

+        if(fread(numbits, sizeof(float), 2, f_numbits) != 2)

+        {

+            fprintf(stderr, "could not read frame %d from numbits\n", i_frame);

+            exit(1);

+        }

+        lace_process_20ms_frame(

+            &hLACE,

+            buffer,

+            x_in,

+            features,

+            numbits,

+            periods

+        );

+        for (int n=0; n < 4 * LACE_FRAME_SIZE; n ++)

+        {

+            float tmp = (1UL<<15) * buffer[n];

+            tmp = CLIP(tmp, -32768, 32767);

+            x_out[n] = (int16_t) round(tmp);

+        }

+        fwrite(x_out, sizeof(int16_t), 4 * LACE_FRAME_SIZE, f_x_out);

+    }

+    printf("\ndone!\n");

+    fclose(f_features);

+    fclose(f_numbits);

+    fclose(f_periods);

+    fclose(f_x_in);

+    fclose(f_x_out);

+}

+void nolace_demo(

+    char *prefix,

+    char *output

+)

+{

+    char feature_file[256];

+    char numbits_file[256];

+    char periods_file[256];

+    char x_in_file[256];

+    char message[512];

+    int i_frame;

+    float mse;

+    float features[4 * LACE_NUM_FEATURES];

+    float numbits[2];

+    int periods[4];

+    float x_in[4 * LACE_FRAME_SIZE];

+    int16_t x_out[4 * LACE_FRAME_SIZE];

+    float buffer[4 * LACE_FRAME_SIZE];

+    NoLACE hNoLACE;

+    int frame_counter = 0;

+    FILE *f_features, *f_numbits, *f_periods, *f_x_in, *f_x_out;

+    init_nolace(&hNoLACE);

+    strcpy(feature_file, prefix);

+    strcat(feature_file, "_features.f32");

+    f_features = fopen(feature_file, "rb");

+    if (f_features == NULL)

+    {

+        sprintf(message, "could not open file %s", feature_file);

+        perror(message);

+        exit(1);

+    }

+    strcpy(x_in_file, prefix);

+    strcat(x_in_file, "_x_in.f32");

+    f_x_in = fopen(x_in_file, "rb");

+    if (f_x_in == NULL)

+    {

+        sprintf(message, "could not open file %s", x_in_file);

+        perror(message);

+        exit(1);

+    }

+    f_x_out = fopen(output, "wb");

+    if (f_x_out == NULL)

+    {

+        sprintf(message, "could not open file %s", output);

+        perror(message);

+        exit(1);

+    }

+    strcpy(periods_file, prefix);

+    strcat(periods_file, "_periods.s32");

+    f_periods = fopen(periods_file, "rb");

+    if (f_periods == NULL)

+    {

+        sprintf(message, "could not open file %s", periods_file);

+        perror(message);

+        exit(1);

+    }

+    strcpy(numbits_file, prefix);

+    strcat(numbits_file, "_numbits.f32");

+    f_numbits = fopen(numbits_file, "rb");

+    if (f_numbits == NULL)

+    {

+        sprintf(message, "could not open file %s", numbits_file);

+        perror(message);

+        exit(1);

+    }

+    printf("processing %s\n", prefix);

+    while (fread(x_in, sizeof(float), 4 * LACE_FRAME_SIZE, f_x_in) == 4 * LACE_FRAME_SIZE)

+    {

+        printf("\rframe: %d", frame_counter++);

+        if(fread(features, sizeof(float), 4 * LACE_NUM_FEATURES, f_features) != 4 * LACE_NUM_FEATURES)

+        {

+            fprintf(stderr, "could not read frame %d from features\n", i_frame);

+            exit(1);

+        }

+        if(fread(periods, sizeof(int), 4, f_periods) != 4)

+        {

+            fprintf(stderr, "could not read frame %d from periods\n", i_frame);

+            exit(1);

+        }

+        if(fread(numbits, sizeof(float), 2, f_numbits) != 2)

+        {

+            fprintf(stderr, "could not read frame %d from numbits\n", i_frame);

+            exit(1);

+        }

+        nolace_process_20ms_frame(

+            &hNoLACE,

+            buffer,

+            x_in,

+            features,

+            numbits,

+            periods

+        );

+        for (int n=0; n < 4 * LACE_FRAME_SIZE; n ++)

+        {

+            float tmp = (1UL<<15) * buffer[n];

+            tmp = CLIP(tmp, -32768, 32767);

+            x_out[n] = (int16_t) round(tmp);

+        }

+        fwrite(x_out, sizeof(int16_t), 4 * LACE_FRAME_SIZE, f_x_out);

+    }

+    printf("\ndone!\n");

+    fclose(f_features);

+    fclose(f_numbits);

+    fclose(f_periods);

+    fclose(f_x_in);

+    fclose(f_x_out);

+}

+int main()

+{

+#if 0

+    LACE hLACE;

+    lace_feature_net_compare("testvec2/lace", 5, &hLACE);

+    lace_demo("testdata/test9", "out_lace_c_9kbps.pcm");

+    lace_demo("testdata/test6", "out_lace_c_6kbps.pcm");

+#endif

+    nolace_demo("testdata/test9", "out_nolace_c_9kbps.pcm");

+}

+#endif

+/*gcc  -I ../include -I . -I ../silk -I ../celt osce.c nndsp.c lace_data.c nolace_data.c nnet.c parse_lpcnet_weights.c -lm -o lacetest*/

--- /dev/null

+++ b/dnn/osce.h

@@ -1,0 +1,81 @@

+/* Copyright (c) 2023 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef OSCE_H

+#define OSCE_H

+#include "opus_types.h"

+/*#include "osce_config.h"*/

+#ifndef DISABLE_LACE

+#include "lace_data.h"

+#endif

+#ifndef DISABLE_NOLACE

+#include "nolace_data.h"

+#endif

+#include "nndsp.h"

+#include "nnet.h"

+#include "osce_structs.h"

+#include "structs.h"

+#define OSCE_METHOD_NONE 0

+#ifndef DISABLE_LACE

+#define OSCE_METHOD_LACE 1

+#endif

+#ifndef DISABLE_NOLACE

+#define OSCE_METHOD_NOLACE 2

+#endif

+#if !defined(DISABLE_NOLACE)

+#define OSCE_DEFAULT_METHOD OSCE_METHOD_NOLACE

+#elif !defined(DISABLE_LACE)

+#define OSCE_DEFAULT_METHOD OSCE_METHOD_LACE

+#else

+#define OSCE_DEFAULT_METHOD OSCE_METHOD_NONE

+#endif

+/* API */

+void osce_enhance_frame(

+    OSCEModel                   *model,                         /* I    OSCE model struct                           */

+    silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */

+    silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */

+    opus_int16                  xq[],                           /* I/O  Decoded speech                              */

+    opus_int32                  num_bits,                       /* I    Size of SILK payload in bits                */

+    int                         arch                            /* I    Run-time architecture                       */

+);

+int osce_load_models(OSCEModel *hModel, const unsigned char *data, int len);

+void osce_reset(silk_OSCE_struct *hOSCE, int method);

+#endif

--- /dev/null

+++ b/dnn/osce_config.h

@@ -1,0 +1,62 @@

+/* Copyright (c) 2023 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef OSCE_CONFIG

+#define OSCE_CONFIG

+#define OSCE_MAX_RNN_NEURONS 256

+#define OSCE_FEATURES_MAX_HISTORY 350

+#define OSCE_FEATURE_DIM 93

+#define OSCE_MAX_FEATURE_FRAMES 4

+#define OSCE_CLEAN_SPEC_NUM_BANDS 64

+#define OSCE_NOISY_SPEC_NUM_BANDS 18

+#define OSCE_NO_PITCH_VALUE 7

+#define OSCE_PREEMPH 0.85f

+#define OSCE_PITCH_HANGOVER 8

+#define OSCE_CLEAN_SPEC_START 0

+#define OSCE_CLEAN_SPEC_LENGTH 64

+#define OSCE_NOISY_CEPSTRUM_START 64

+#define OSCE_NOISY_CEPSTRUM_LENGTH 18

+#define OSCE_ACORR_START 82

+#define OSCE_ACORR_LENGTH 5

+#define OSCE_LTP_START 87

+#define OSCE_LTP_LENGTH 5

+#define OSCE_LOG_GAIN_START 92

+#define OSCE_LOG_GAIN_LENGTH 1

+#endif

\ No newline at end of file

--- /dev/null

+++ b/dnn/osce_features.c

@@ -1,0 +1,454 @@

+/* Copyright (c) 2023 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#define OSCE_SPEC_WINDOW_SIZE 320

+#define OSCE_SPEC_NUM_FREQS 161

+/*DEBUG*/

+/*#define WRITE_FEATURES*/

+/*#define DEBUG_PRING*/

+/*******/

+#include "stack_alloc.h"

+#include "osce_features.h"

+#include "kiss_fft.h"

+#include "os_support.h"

+#include "osce.h"

+#include "freq.h"

+#if defined(WRITE_FEATURES) || defined(DEBUG_PRING)

+#include <stdio.h>

+#include <stdlib.h>

+#endif

+static const int center_bins_clean[64] = {

+      0,      2,      5,      8,     10,     12,     15,     18,

+     20,     22,     25,     28,     30,     33,     35,     38,

+     40,     42,     45,     48,     50,     52,     55,     58,

+     60,     62,     65,     68,     70,     73,     75,     78,

+     80,     82,     85,     88,     90,     92,     95,     98,

+    100,    102,    105,    108,    110,    112,    115,    118,

+    120,    122,    125,    128,    130,    132,    135,    138,

+    140,    142,    145,    148,    150,    152,    155,    160

+};

+static const int center_bins_noisy[18] = {

+      0,      4,      8,     12,     16,     20,     24,     28,

+     32,     40,     48,     56,     64,     80,     96,    112,

+    136,    160

+};

+static const float band_weights_clean[64] = {

+     0.666666666667f,     0.400000000000f,     0.333333333333f,     0.400000000000f,

+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,

+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,

+     0.400000000000f,     0.400000000000f,     0.400000000000f,     0.400000000000f,

+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,

+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,

+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,

+     0.400000000000f,     0.400000000000f,     0.400000000000f,     0.400000000000f,

+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,

+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,

+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,

+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,

+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,

+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,

+     0.500000000000f,     0.400000000000f,     0.333333333333f,     0.400000000000f,

+     0.500000000000f,     0.400000000000f,     0.250000000000f,     0.333333333333f

+};

+static const float band_weights_noisy[18] = {

+     0.400000000000f,     0.250000000000f,     0.250000000000f,     0.250000000000f,

+     0.250000000000f,     0.250000000000f,     0.250000000000f,     0.250000000000f,

+     0.166666666667f,     0.125000000000f,     0.125000000000f,     0.125000000000f,

+     0.083333333333f,     0.062500000000f,     0.062500000000f,     0.050000000000f,

+     0.041666666667f,     0.080000000000f

+};

+static float osce_window[OSCE_SPEC_WINDOW_SIZE] = {

+     0.004908718808f,     0.014725683311f,     0.024541228523f,     0.034354408400f,     0.044164277127f,

+     0.053969889210f,     0.063770299562f,     0.073564563600f,     0.083351737332f,     0.093130877450f,

+     0.102901041421f,     0.112661287575f,     0.122410675199f,     0.132148264628f,     0.141873117332f,

+     0.151584296010f,     0.161280864678f,     0.170961888760f,     0.180626435180f,     0.190273572448f,

+     0.199902370753f,     0.209511902052f,     0.219101240157f,     0.228669460829f,     0.238215641862f,

+     0.247738863176f,     0.257238206902f,     0.266712757475f,     0.276161601717f,     0.285583828929f,

+     0.294978530977f,     0.304344802381f,     0.313681740399f,     0.322988445118f,     0.332264019538f,

+     0.341507569661f,     0.350718204573f,     0.359895036535f,     0.369037181064f,     0.378143757022f,

+     0.387213886697f,     0.396246695891f,     0.405241314005f,     0.414196874117f,     0.423112513073f,

+     0.431987371563f,     0.440820594212f,     0.449611329655f,     0.458358730621f,     0.467061954019f,

+     0.475720161014f,     0.484332517110f,     0.492898192230f,     0.501416360796f,     0.509886201809f,

+     0.518306898929f,     0.526677640552f,     0.534997619887f,     0.543266035038f,     0.551482089078f,

+     0.559644990127f,     0.567753951426f,     0.575808191418f,     0.583806933818f,     0.591749407690f,

+     0.599634847523f,     0.607462493302f,     0.615231590581f,     0.622941390558f,     0.630591150148f,

+     0.638180132051f,     0.645707604824f,     0.653172842954f,     0.660575126926f,     0.667913743292f,

+     0.675187984742f,     0.682397150168f,     0.689540544737f,     0.696617479953f,     0.703627273726f,

+     0.710569250438f,     0.717442741007f,     0.724247082951f,     0.730981620454f,     0.737645704427f,

+     0.744238692572f,     0.750759949443f,     0.757208846506f,     0.763584762206f,     0.769887082016f,

+     0.776115198508f,     0.782268511401f,     0.788346427627f,     0.794348361383f,     0.800273734191f,

+     0.806121974951f,     0.811892519997f,     0.817584813152f,     0.823198305781f,     0.828732456844f,

+     0.834186732948f,     0.839560608398f,     0.844853565250f,     0.850065093356f,     0.855194690420f,

+     0.860241862039f,     0.865206121757f,     0.870086991109f,     0.874883999665f,     0.879596685080f,

+     0.884224593137f,     0.888767277786f,     0.893224301196f,     0.897595233788f,     0.901879654283f,

+     0.906077149740f,     0.910187315596f,     0.914209755704f,     0.918144082372f,     0.921989916403f,

+     0.925746887127f,     0.929414632439f,     0.932992798835f,     0.936481041442f,     0.939879024058f,

+     0.943186419177f,     0.946402908026f,     0.949528180593f,     0.952561935658f,     0.955503880820f,

+     0.958353732530f,     0.961111216112f,     0.963776065795f,     0.966348024735f,     0.968826845041f,

+     0.971212287799f,     0.973504123096f,     0.975702130039f,     0.977806096779f,     0.979815820533f,

+     0.981731107599f,     0.983551773378f,     0.985277642389f,     0.986908548290f,     0.988444333892f,

+     0.989884851171f,     0.991229961288f,     0.992479534599f,     0.993633450666f,     0.994691598273f,

+     0.995653875433f,     0.996520189401f,     0.997290456679f,     0.997964603026f,     0.998542563469f,

+     0.999024282300f,     0.999409713092f,     0.999698818696f,     0.999891571247f,     0.999987952167f,

+     0.999987952167f,     0.999891571247f,     0.999698818696f,     0.999409713092f,     0.999024282300f,

+     0.998542563469f,     0.997964603026f,     0.997290456679f,     0.996520189401f,     0.995653875433f,

+     0.994691598273f,     0.993633450666f,     0.992479534599f,     0.991229961288f,     0.989884851171f,

+     0.988444333892f,     0.986908548290f,     0.985277642389f,     0.983551773378f,     0.981731107599f,

+     0.979815820533f,     0.977806096779f,     0.975702130039f,     0.973504123096f,     0.971212287799f,

+     0.968826845041f,     0.966348024735f,     0.963776065795f,     0.961111216112f,     0.958353732530f,

+     0.955503880820f,     0.952561935658f,     0.949528180593f,     0.946402908026f,     0.943186419177f,

+     0.939879024058f,     0.936481041442f,     0.932992798835f,     0.929414632439f,     0.925746887127f,

+     0.921989916403f,     0.918144082372f,     0.914209755704f,     0.910187315596f,     0.906077149740f,

+     0.901879654283f,     0.897595233788f,     0.893224301196f,     0.888767277786f,     0.884224593137f,

+     0.879596685080f,     0.874883999665f,     0.870086991109f,     0.865206121757f,     0.860241862039f,

+     0.855194690420f,     0.850065093356f,     0.844853565250f,     0.839560608398f,     0.834186732948f,

+     0.828732456844f,     0.823198305781f,     0.817584813152f,     0.811892519997f,     0.806121974951f,

+     0.800273734191f,     0.794348361383f,     0.788346427627f,     0.782268511401f,     0.776115198508f,

+     0.769887082016f,     0.763584762206f,     0.757208846506f,     0.750759949443f,     0.744238692572f,

+     0.737645704427f,     0.730981620454f,     0.724247082951f,     0.717442741007f,     0.710569250438f,

+     0.703627273726f,     0.696617479953f,     0.689540544737f,     0.682397150168f,     0.675187984742f,

+     0.667913743292f,     0.660575126926f,     0.653172842954f,     0.645707604824f,     0.638180132051f,

+     0.630591150148f,     0.622941390558f,     0.615231590581f,     0.607462493302f,     0.599634847523f,

+     0.591749407690f,     0.583806933818f,     0.575808191418f,     0.567753951426f,     0.559644990127f,

+     0.551482089078f,     0.543266035038f,     0.534997619887f,     0.526677640552f,     0.518306898929f,

+     0.509886201809f,     0.501416360796f,     0.492898192230f,     0.484332517110f,     0.475720161014f,

+     0.467061954019f,     0.458358730621f,     0.449611329655f,     0.440820594212f,     0.431987371563f,

+     0.423112513073f,     0.414196874117f,     0.405241314005f,     0.396246695891f,     0.387213886697f,

+     0.378143757022f,     0.369037181064f,     0.359895036535f,     0.350718204573f,     0.341507569661f,

+     0.332264019538f,     0.322988445118f,     0.313681740399f,     0.304344802381f,     0.294978530977f,

+     0.285583828929f,     0.276161601717f,     0.266712757475f,     0.257238206902f,     0.247738863176f,

+     0.238215641862f,     0.228669460829f,     0.219101240157f,     0.209511902052f,     0.199902370753f,

+     0.190273572448f,     0.180626435180f,     0.170961888760f,     0.161280864678f,     0.151584296010f,

+     0.141873117332f,     0.132148264628f,     0.122410675199f,     0.112661287575f,     0.102901041421f,

+     0.093130877450f,     0.083351737332f,     0.073564563600f,     0.063770299562f,     0.053969889210f,

+     0.044164277127f,     0.034354408400f,     0.024541228523f,     0.014725683311f,     0.004908718808f

+};

+static void apply_filterbank(float *x_out, float *x_in, const int *center_bins, const float* band_weights, int num_bands)

+{

+    int b, i;

+    float frac;

+    celt_assert(x_in != x_out)

+    x_out[0] = 0;

+    for (b = 0; b < num_bands - 1; b++)

+    {

+        x_out[b+1] = 0;

+        for (i = center_bins[b]; i < center_bins[b+1]; i++)

+        {

+            frac = (float) (center_bins[b+1] - i) / (center_bins[b+1] - center_bins[b]);

+            x_out[b]   += band_weights[b] * frac * x_in[i];

+            x_out[b+1] += band_weights[b+1] * (1 - frac) * x_in[i];

+        }

+    }

+    x_out[num_bands - 1] += band_weights[num_bands - 1] * x_in[center_bins[num_bands - 1]];

+#ifdef DEBUG_PRINT

+    for (b = 0; b < num_bands; b++)

+    {

+        printf("band[%d]: %f\n", b, x_out[b]);

+    }

+#endif

+}

+static void mag_spec_320_onesided(float *out, float *in)

+{

+    celt_assert(OSCE_SPEC_WINDOW_SIZE == 320);

+    kiss_fft_cpx buffer[OSCE_SPEC_WINDOW_SIZE];

+    int k;

+    forward_transform(buffer, in);

+    for (k = 0; k < OSCE_SPEC_NUM_FREQS; k++)

+    {

+        out[k] = OSCE_SPEC_WINDOW_SIZE * sqrt(buffer[k].r * buffer[k].r + buffer[k].i * buffer[k].i);

+#ifdef DEBUG_PRINT

+        printf("magspec[%d]: %f\n", k, out[k]);

+#endif

+    }

+}

+static void calculate_log_spectrum_from_lpc(float *spec, opus_int16 *a_q12, int lpc_order)

+{

+    float buffer[OSCE_SPEC_WINDOW_SIZE] = {0};

+    int i;

+    /* zero expansion */

+    buffer[0] = 1;

+    for (i = 0; i < lpc_order; i++)

+    {

+        buffer[i+1] = - (float)a_q12[i] / (1U << 12);

+    }

+    /* calculate and invert magnitude spectrum */

+    mag_spec_320_onesided(buffer, buffer);

+    for (i = 0; i < OSCE_SPEC_NUM_FREQS; i++)

+    {

+        buffer[i] = 1.f / (buffer[i] + 1e-9f);

+    }

+    /* apply filterbank */

+    apply_filterbank(spec, buffer, center_bins_clean, band_weights_clean, OSCE_CLEAN_SPEC_NUM_BANDS);

+    /* log and scaling */

+    for (i = 0; i < OSCE_CLEAN_SPEC_NUM_BANDS; i++)

+    {

+        spec[i] = 0.3f * log(spec[i] + 1e-9f);

+    }

+}

+static void calculate_cepstrum(float *cepstrum, float *signal)

+{

+    float buffer[OSCE_SPEC_WINDOW_SIZE];

+    float *spec = &buffer[OSCE_SPEC_NUM_FREQS + 3];

+    int n;

+    celt_assert(cepstrum != signal)

+    for (n = 0; n < OSCE_SPEC_WINDOW_SIZE; n++)

+    {

+        buffer[n] = osce_window[n] * signal[n];

+    }

+    /* calculate magnitude spectrum */

+    mag_spec_320_onesided(buffer, buffer);

+    /* accumulate bands */

+    apply_filterbank(spec, buffer, center_bins_noisy, band_weights_noisy, OSCE_NOISY_SPEC_NUM_BANDS);

+    /* log domain conversion */

+    for (n = 0; n < OSCE_NOISY_SPEC_NUM_BANDS; n++)

+    {

+        spec[n] = log(spec[n] + 1e-9f);

+#ifdef DEBUG_PRINT

+        printf("logspec[%d]: %f\n", n, spec[n]);

+#endif

+    }

+    /* DCT-II (orthonormal) */

+    celt_assert(OSCE_NOISY_SPEC_NUM_BANDS == NB_BANDS);

+    dct(cepstrum, spec);

+}

+static void calculate_acorr(float *acorr, float *signal, int lag)

+{

+    int n, k;

+    celt_assert(acorr != signal)

+    for (k = -2; k <= 2; k++)

+    {

+        acorr[k+2] = 0;

+        float xx = 0;

+        float xy = 0;

+        float yy = 0;

+        for (n = 0; n < 80; n++)

+        {

+            /* obviously wasteful -> fix later */

+            xx += signal[n] * signal[n];

+            yy += signal[n - lag + k] * signal[n - lag + k];

+            xy += signal[n] * signal[n - lag + k];

+        }

+        acorr[k+2] = xy / sqrt(xx * yy + 1e-9f);

+    }

+}

+static int pitch_postprocessing(OSCEFeatureState *psFeatures, int lag, int type)

+{

+    int new_lag;

+#ifdef OSCE_HANGOVER_BUGFIX

+#define TESTBIT 1

+#else

+#define TESTBIT 0

+#endif

+    /* hangover is currently disabled to reflect a bug in the python code. ToDo: re-evaluate hangover */

+    if (type != TYPE_VOICED && psFeatures->last_type == TYPE_VOICED && TESTBIT)

+    /* enter hangover */

+    {

+        new_lag = OSCE_NO_PITCH_VALUE;

+        if (psFeatures->pitch_hangover_count < OSCE_PITCH_HANGOVER)

+        {

+            new_lag = psFeatures->last_lag;

+            psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER;

+        }

+    }

+    else if (type != TYPE_VOICED && psFeatures->pitch_hangover_count && TESTBIT)

+    /* continue hangover */

+    {

+        new_lag = psFeatures->last_lag;

+        psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER;

+    }

+    else if (type != TYPE_VOICED)

+    /* unvoiced frame after hangover */

+    {

+        new_lag = OSCE_NO_PITCH_VALUE;

+        psFeatures->pitch_hangover_count = 0;

+    }

+    else

+    /* voiced frame: update last_lag */

+    {

+        new_lag = lag;

+        psFeatures->last_lag = lag;

+        psFeatures->pitch_hangover_count = 0;

+    }

+    /* buffer update */

+    psFeatures->last_type = type;

+    /* with the current setup this should never happen (but who knows...) */

+    celt_assert(new_lag)

+    return new_lag;

+}

+void osce_calculate_features(

+    silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */

+    silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */

+    float                       *features,                      /* O    input features                              */

+    float                       *numbits,                       /* O    numbits and smoothed numbits                */

+    int                         *periods,                       /* O    pitch lags on subframe basis                */

+    const opus_int16            xq[],                           /* I    Decoded speech                              */

+    opus_int32                  num_bits                        /* I    Size of SILK payload in bits                */

+)

+{

+    int num_subframes, num_samples;

+    float buffer[OSCE_FEATURES_MAX_HISTORY + OSCE_MAX_FEATURE_FRAMES * 80];

+    float *frame, *pfeatures;

+    OSCEFeatureState *psFeatures;

+    int i, n, k;

+#ifdef WRITE_FEATURES

+    static FILE *f_feat = NULL;

+    if (f_feat == NULL)

+    {

+        f_feat = fopen("assembled_features.f32", "wb");

+    }

+#endif

+    /*OPUS_CLEAR(buffer, 1);*/

+    memset(buffer, 0, sizeof(buffer));

+    num_subframes = psDec->nb_subfr;

+    num_samples = num_subframes * 80;

+    psFeatures = &psDec->osce.features;

+    /* smooth bit count */

+    psFeatures->numbits_smooth = 0.9f * psFeatures->numbits_smooth + 0.1f * num_bits;

+    numbits[0] = num_bits;

+#ifdef OSCE_NUMBITS_BUGFIX

+    numbits[1] = psFeatures->numbits_smooth;

+#else

+    numbits[1] = num_bits;

+#endif

+    for (n = 0; n < num_samples; n++)

+    {

+        buffer[OSCE_FEATURES_MAX_HISTORY + n] = (float) xq[n] / (1U<<15);

+    }

+    OPUS_COPY(buffer, psFeatures->signal_history, OSCE_FEATURES_MAX_HISTORY);

+    for (k = 0; k < num_subframes; k++)

+    {

+        pfeatures = features + k * OSCE_FEATURE_DIM;

+        frame = &buffer[OSCE_FEATURES_MAX_HISTORY + k * 80];

+        memset(pfeatures, 0, OSCE_FEATURE_DIM); /* precaution */

+        /* clean spectrum from lpcs (update every other frame) */

+        if (k % 2 == 0)

+        {

+            calculate_log_spectrum_from_lpc(pfeatures + OSCE_CLEAN_SPEC_START, psDecCtrl->PredCoef_Q12[k >> 1], psDec->LPC_order);

+        }

+        else

+        {

+            OPUS_COPY(pfeatures + OSCE_CLEAN_SPEC_START, pfeatures + OSCE_CLEAN_SPEC_START - OSCE_FEATURE_DIM, OSCE_CLEAN_SPEC_LENGTH);

+        }

+        /* noisy cepstrum from signal (update every other frame) */

+        if (k % 2 == 0)

+        {

+            calculate_cepstrum(pfeatures + OSCE_NOISY_CEPSTRUM_START, frame - 160);

+        }

+        else

+        {

+            OPUS_COPY(pfeatures + OSCE_NOISY_CEPSTRUM_START, pfeatures + OSCE_NOISY_CEPSTRUM_START - OSCE_FEATURE_DIM, OSCE_NOISY_CEPSTRUM_LENGTH);

+        }

+        /* pitch hangover and zero value replacement */

+        periods[k] = pitch_postprocessing(psFeatures, psDecCtrl->pitchL[k], psDec->indices.signalType);

+        /* auto-correlation around pitch lag */

+        calculate_acorr(pfeatures + OSCE_ACORR_START, frame, periods[k]);

+        /* ltp */

+        celt_assert(OSCE_LTP_LENGTH == LTP_ORDER)

+        for (i = 0; i < OSCE_LTP_LENGTH; i++)

+        {

+            pfeatures[OSCE_LTP_START + i] = (float) psDecCtrl->LTPCoef_Q14[k * LTP_ORDER + i] / (1U << 14);

+        }

+        /* frame gain */

+        pfeatures[OSCE_LOG_GAIN_START] = log((float) psDecCtrl->Gains_Q16[k] / (1UL << 16) + 1e-9f);

+#ifdef WRITE_FEATURES

+        fwrite(pfeatures, sizeof(*pfeatures), 93, f_feat);

+#endif

+    }

+    /* buffer update */

+    OPUS_COPY(psFeatures->signal_history, &buffer[num_samples], OSCE_FEATURES_MAX_HISTORY);

+}

+void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length)

+{

+    int i;

+    celt_assert(length >= 160);

+    for (i = 0; i < 160; i++)

+    {

+        x_enhanced[i] = osce_window[i] * x_enhanced[i] + (1.f - osce_window[i]) * x_in[i];

+    }

+}

--- /dev/null

+++ b/dnn/osce_features.h

@@ -1,0 +1,50 @@

+/* Copyright (c) 2023 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef OSCE_FEATURES_H

+#define OSCE_FEATURES_H

+#include "structs.h"

+#include "opus_types.h"

+#define OSCE_NUMBITS_BUGFIX

+void osce_calculate_features(

+    silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */

+    silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */

+    float                       *features,                      /* O    input features                              */

+    float                       *numbits,                       /* O    numbits and smoothed numbits                */

+    int                         *periods,                       /* O    pitch lags on subframe basis                */

+    const opus_int16            xq[],                           /* I    Decoded speech                              */

+    opus_int32                  num_bits                        /* I    Size of SILK payload in bits                */

+);

+void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length);

+#endif

\ No newline at end of file

--- /dev/null

+++ b/dnn/osce_structs.h

@@ -1,0 +1,124 @@

+/* Copyright (c) 2023 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef OSCE_STRUCTS_H

+#define OSCE_STRUCTS_H

+#include "opus_types.h"

+#include "osce_config.h"

+#ifndef DISABLE_LACE

+#include "lace_data.h"

+#endif

+#ifndef DISABLE_NOLACE

+#include "nolace_data.h"

+#endif

+#include "nndsp.h"

+#include "nnet.h"

+/* feature calculation */

+typedef struct {

+    float               numbits_smooth;

+    int                 pitch_hangover_count;

+    int                 last_lag;

+    int                 last_type;

+    float               signal_history[OSCE_FEATURES_MAX_HISTORY];

+    int                 reset;

+} OSCEFeatureState;

+#ifndef DISABLE_LACE

+/* LACE */

+typedef struct {

+    float feature_net_conv2_state[LACE_FNET_CONV2_STATE_SIZE];

+    float feature_net_gru_state[LACE_COND_DIM];

+    AdaCombState cf1_state;

+    AdaCombState cf2_state;

+    AdaConvState af1_state;

+    float preemph_mem;

+    float deemph_mem;

+} LACEState;

+typedef struct

+{

+    LACELayers layers;

+    float window[LACE_OVERLAP_SIZE];

+} LACE;

+#endif /* #ifndef DISABLE_LACE */

+#ifndef DISABLE_NOLACE

+/* NoLACE */

+typedef struct {

+    float feature_net_conv2_state[NOLACE_FNET_CONV2_STATE_SIZE];

+    float feature_net_gru_state[NOLACE_COND_DIM];

+    float post_cf1_state[NOLACE_COND_DIM];

+    float post_cf2_state[NOLACE_COND_DIM];

+    float post_af1_state[NOLACE_COND_DIM];

+    float post_af2_state[NOLACE_COND_DIM];

+    float post_af3_state[NOLACE_COND_DIM];

+    AdaCombState cf1_state;

+    AdaCombState cf2_state;

+    AdaConvState af1_state;

+    AdaConvState af2_state;

+    AdaConvState af3_state;

+    AdaConvState af4_state;

+    AdaShapeState tdshape1_state;

+    AdaShapeState tdshape2_state;

+    AdaShapeState tdshape3_state;

+    float preemph_mem;

+    float deemph_mem;

+} NoLACEState;

+typedef struct {

+    NOLACELayers layers;

+    float window[LACE_OVERLAP_SIZE];

+} NoLACE;

+#endif /* #ifndef DISABLE_NOLACE */

+/* OSCEModel */

+typedef struct {

+#ifndef DISABLE_LACE

+    LACE lace;

+#endif

+#ifndef DISABLE_NOLACE

+    NoLACE nolace;

+#endif

+} OSCEModel;

+typedef union {

+#ifndef DISABLE_LACE

+    LACEState lace;

+#endif

+#ifndef DISABLE_NOLACE

+    NoLACEState nolace;

+#endif

+} OSCEState;

+#endif

\ No newline at end of file

--- /dev/null

+++ b/dnn/torch/osce/create_testvectors.py

@@ -1,0 +1,165 @@

+"""

+/* Copyright (c) 2023 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+"""

+import os

+import argparse

+import torch

+import numpy as np

+from models import model_dict

+from utils import endoscopy

+parser = argparse.ArgumentParser()

+parser.add_argument('checkpoint_path', type=str, help='path to folder containing checkpoints "lace_checkpoint.pth" and nolace_checkpoint.pth"')

+parser.add_argument('output_folder', type=str, help='output folder for testvectors')

+parser.add_argument('--debug', action='store_true', help='add debug output to output folder')

+def create_adaconv_testvector(prefix, adaconv, num_frames, debug=False):

+    feature_dim = adaconv.feature_dim

+    in_channels = adaconv.in_channels

+    out_channels = adaconv.out_channels

+    frame_size = adaconv.frame_size

+    features = torch.randn((1, num_frames, feature_dim))

+    x_in = torch.randn((1, in_channels, num_frames * frame_size))

+    x_out = adaconv(x_in, features, debug=debug)

+    features = features[0].detach().numpy()

+    x_in = x_in[0].reshape(in_channels, num_frames, frame_size).permute(1, 0, 2).detach().numpy()

+    x_out = x_out[0].reshape(out_channels, num_frames, frame_size).permute(1, 0, 2).detach().numpy()

+    features.tofile(prefix + '_features.f32')

+    x_in.tofile(prefix + '_x_in.f32')

+    x_out.tofile(prefix + '_x_out.f32')

+def create_adacomb_testvector(prefix, adacomb, num_frames, debug=False):

+    feature_dim = adacomb.feature_dim

+    in_channels = 1

+    frame_size = adacomb.frame_size

+    features = torch.randn((1, num_frames, feature_dim))

+    x_in = torch.randn((1, in_channels, num_frames * frame_size))

+    p_in = torch.randint(adacomb.kernel_size, 250, (1, num_frames))

+    x_out = adacomb(x_in, features, p_in, debug=debug)

+    features = features[0].detach().numpy()

+    x_in = x_in[0].permute(1, 0).detach().numpy()

+    p_in = p_in[0].detach().numpy().astype(np.int32)

+    x_out = x_out[0].permute(1, 0).detach().numpy()

+    features.tofile(prefix + '_features.f32')

+    x_in.tofile(prefix + '_x_in.f32')

+    p_in.tofile(prefix + '_p_in.s32')

+    x_out.tofile(prefix + '_x_out.f32')

+def create_adashape_testvector(prefix, adashape, num_frames):

+    feature_dim = adashape.feature_dim

+    frame_size = adashape.frame_size

+    features = torch.randn((1, num_frames, feature_dim))

+    x_in = torch.randn((1, 1, num_frames * frame_size))

+    x_out = adashape(x_in, features)

+    features = features[0].detach().numpy()

+    x_in = x_in.flatten().detach().numpy()

+    x_out = x_out.flatten().detach().numpy()

+    features.tofile(prefix + '_features.f32')

+    x_in.tofile(prefix + '_x_in.f32')

+    x_out.tofile(prefix + '_x_out.f32')

+def create_feature_net_testvector(prefix, model, num_frames):

+    num_features = model.num_features

+    num_subframes = 4 * num_frames

+    input_features = torch.randn((1, num_subframes, num_features))

+    periods = torch.randint(32, 300, (1, num_subframes))

+    numbits = model.numbits_range[0] + torch.rand((1, num_frames, 2)) * (model.numbits_range[1] - model.numbits_range[0])

+    pembed = model.pitch_embedding(periods)

+    nembed = torch.repeat_interleave(model.numbits_embedding(numbits).flatten(2), 4, dim=1)

+    full_features = torch.cat((input_features, pembed, nembed), dim=-1)

+    cf = model.feature_net(full_features)

+    input_features.float().numpy().tofile(prefix + "_in_features.f32")

+    periods.numpy().astype(np.int32).tofile(prefix + "_periods.s32")

+    numbits.float().numpy().tofile(prefix + "_numbits.f32")

+    full_features.detach().numpy().tofile(prefix + "_full_features.f32")

+    cf.detach().numpy().tofile(prefix + "_out_features.f32")

+if __name__ == "__main__":

+    args = parser.parse_args()

+    os.makedirs(args.output_folder, exist_ok=True)

+    lace_checkpoint = torch.load(os.path.join(args.checkpoint_path, "lace_checkpoint.pth"), map_location='cpu')

+    nolace_checkpoint = torch.load(os.path.join(args.checkpoint_path, "nolace_checkpoint.pth"), map_location='cpu')

+    lace = model_dict['lace'](**lace_checkpoint['setup']['model']['kwargs'])

+    nolace = model_dict['nolace'](**nolace_checkpoint['setup']['model']['kwargs'])

+    lace.load_state_dict(lace_checkpoint['state_dict'])

+    nolace.load_state_dict(nolace_checkpoint['state_dict'])

+    if args.debug:

+        endoscopy.init(args.output_folder)

+    # lace af1, 1 input channel, 1 output channel

+    create_adaconv_testvector(os.path.join(args.output_folder, "lace_af1"), lace.af1, 5, debug=args.debug)

+    # nolace af1, 1 input channel, 2 output channels

+    create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af1"), nolace.af1, 5, debug=args.debug)

+    # nolace af4, 2 input channel, 1 output channels

+    create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af4"), nolace.af4, 5, debug=args.debug)

+    # nolace af2, 2 input channel, 2 output channels

+    create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af2"), nolace.af2, 5, debug=args.debug)

+    # lace cf1

+    create_adacomb_testvector(os.path.join(args.output_folder, "lace_cf1"), lace.cf1, 5, debug=args.debug)

+    # nolace tdshape1

+    create_adashape_testvector(os.path.join(args.output_folder, "nolace_tdshape1"), nolace.tdshape1, 5)

+    # lace feature net

+    create_feature_net_testvector(os.path.join(args.output_folder, 'lace'), lace, 5)

+    if args.debug:

+        endoscopy.close()

--- a/dnn/torch/osce/data/silk_enhancement_set.py

+++ b/dnn/torch/osce/data/silk_enhancement_set.py

@@ -49,7 +49,6 @@

                  num_bands_noisy_spec=18,

                  noisy_spec_scale='opus',

                  noisy_apply_dct=True,

-                 add_offset=False,

                  add_double_lag_acorr=False,

):

@@ -73,7 +72,6 @@

         self.gains = np.fromfile(os.path.join(path, 'features_gain.f32'), dtype=np.float32)

         self.num_bits = np.fromfile(os.path.join(path, 'features_num_bits.s32'), dtype=np.int32)

         self.num_bits_smooth = np.fromfile(os.path.join(path, 'features_num_bits_smooth.f32'), dtype=np.float32)

-        self.offsets = np.fromfile(os.path.join(path, 'features_offset.f32'), dtype=np.float32)

         self.clean_signal_hp = np.fromfile(os.path.join(path, 'clean_hp.s16'), dtype=np.int16)

         self.clean_signal    = np.fromfile(os.path.join(path, 'clean.s16'), dtype=np.int16)

@@ -86,7 +84,6 @@

                                                     num_bands_noisy_spec,

                                                     noisy_spec_scale,

                                                     noisy_apply_dct,

-                                                    add_offset,

                                                     add_double_lag_acorr)

         self.history_len = 700 if add_double_lag_acorr else 350

@@ -120,8 +117,7 @@

               self.lpcs[frame_start : frame_stop],

               self.gains[frame_start : frame_stop],

               self.ltps[frame_start : frame_stop],

-              self.periods[frame_start : frame_stop],

-              self.offsets[frame_start : frame_stop]

+              self.periods[frame_start : frame_stop]

         if self.preemph > 0:

--- a/dnn/torch/osce/export_model_weights.py

+++ b/dnn/torch/osce/export_model_weights.py

@@ -40,12 +40,55 @@

 from wexchange.torch import dump_torch_weights

 from models import model_dict

+from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d

+from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d

+from utils.layers.td_shaper import TDShaper

+from wexchange.torch import dump_torch_weights

 parser = argparse.ArgumentParser()

 parser.add_argument('checkpoint', type=str, help='LACE or NoLACE model checkpoint')

 parser.add_argument('output_dir', type=str, help='output folder')

+parser.add_argument('--quantize', action="store_true", help='quantization according to schedule')

+schedules = {

+    'nolace': [

+        ('pitch_embedding', dict()),

+        ('feature_net.conv1', dict()),

+        ('feature_net.conv2', dict(quantize=True, scale=None)),

+        ('feature_net.tconv', dict(quantize=True, scale=None)),

+        ('feature_net.gru', dict()),

+        ('cf1', dict(quantize=True, scale=None)),

+        ('cf2', dict(quantize=True, scale=None)),

+        ('af1', dict(quantize=True, scale=None)),

+        ('tdshape1', dict()),

+        ('tdshape2', dict()),

+        ('tdshape3', dict()),

+        ('af2', dict(quantize=True, scale=None)),

+        ('af3', dict(quantize=True, scale=None)),

+        ('af4', dict(quantize=True, scale=None)),

+        ('post_cf1', dict(quantize=True, scale=None)),

+        ('post_cf2', dict(quantize=True, scale=None)),

+        ('post_af1', dict(quantize=True, scale=None)),

+        ('post_af2', dict(quantize=True, scale=None)),

+        ('post_af3', dict(quantize=True, scale=None))

+    ],

+    'lace' : [

+        ('pitch_embedding', dict()),

+        ('feature_net.conv1', dict()),

+        ('feature_net.conv2', dict(quantize=True, scale=None)),

+        ('feature_net.tconv', dict(quantize=True, scale=None)),

+        ('feature_net.gru', dict()),

+        ('cf1', dict(quantize=True, scale=None)),

+        ('cf2', dict(quantize=True, scale=None)),

+        ('af1', dict(quantize=True, scale=None))

+    ]

+}

 # auxiliary functions

 def sha1(filename):

     BUF_SIZE = 65536

@@ -60,9 +103,29 @@

     return sha1.hexdigest()

+def osce_dump_generic(writer, name, module):

+    if isinstance(module, torch.nn.Linear) or isinstance(module, torch.nn.Conv1d) \

+            or isinstance(module, torch.nn.ConvTranspose1d) or isinstance(module, torch.nn.Embedding) \

+                or isinstance(module, LimitedAdaptiveConv1d) or isinstance(module, LimitedAdaptiveComb1d) \

+                    or isinstance(module, TDShaper) or isinstance(module, torch.nn.GRU):

+                        dump_torch_weights(writer, module, name=name, verbose=True)

+    else:

+        for child_name, child in module.named_children():

+            osce_dump_generic(writer, (name + "_" + child_name).replace("feature_net", "fnet"), child)

 def export_name(name):

-    return name.replace('.', '_')

+    name = name.replace('.', '_')

+    name = name.replace('feature_net', 'fnet')

+    return name

+def osce_scheduled_dump(writer, prefix, model, schedule):

+    if not prefix.endswith('_'):

+        prefix += '_'

+    for name, kwargs in schedule:

+        dump_torch_weights(writer, model.get_submodule(name), prefix + export_name(name), **kwargs, verbose=True)

 if __name__ == "__main__":

     args = parser.parse_args()

@@ -76,22 +139,34 @@

     # create model and load weights

     checkpoint = torch.load(checkpoint_path, map_location='cpu')

     model = model_dict[checkpoint['setup']['model']['name']](*checkpoint['setup']['model']['args'], **checkpoint['setup']['model']['kwargs'])

+    model.load_state_dict(checkpoint['state_dict'])

     # CWriter

     model_name = checkpoint['setup']['model']['name']

-    cwriter = wexchange.c_export.CWriter(os.path.join(outdir, model_name + "_data"), message=message, model_struct_name=model_name.upper())

+    cwriter = wexchange.c_export.CWriter(os.path.join(outdir, model_name + "_data"), message=message, model_struct_name=model_name.upper() + 'Layers', add_typedef=True)

-    # dump numbits_embedding parameters by hand

-    numbits_embedding = model.get_submodule('numbits_embedding')

-    weights = next(iter(numbits_embedding.parameters()))

-    for i, c in enumerate(weights):

-        cwriter.header.write(f"\nNUMBITS_COEF_{i} {float(c.detach())}f")

-    cwriter.header.write("\n\n")

+    # Add custom includes and global parameters

+    cwriter.header.write(f'''

+#define {model_name.upper()}_PREEMPH {model.preemph}f

+#define {model_name.upper()}_FRAME_SIZE {model.FRAME_SIZE}

+#define {model_name.upper()}_OVERLAP_SIZE 40

+#define {model_name.upper()}_NUM_FEATURES {model.num_features}

+#define {model_name.upper()}_PITCH_MAX {model.pitch_max}

+#define {model_name.upper()}_PITCH_EMBEDDING_DIM {model.pitch_embedding_dim}

+#define {model_name.upper()}_NUMBITS_RANGE_LOW {model.numbits_range[0]}

+#define {model_name.upper()}_NUMBITS_RANGE_HIGH {model.numbits_range[1]}

+#define {model_name.upper()}_NUMBITS_EMBEDDING_DIM {model.numbits_embedding_dim}

+#define {model_name.upper()}_COND_DIM {model.cond_dim}

+#define {model_name.upper()}_HIDDEN_FEATURE_DIM {model.hidden_feature_dim}

+''')

+    for i, s in enumerate(model.numbits_embedding.scale_factors):

+        cwriter.header.write(f"#define {model_name.upper()}_NUMBITS_SCALE_{i} {float(s.detach().cpu())}f\n")

     # dump layers

-    for name, module in model.named_modules():

-        if isinstance(module, torch.nn.Linear) or isinstance(module, torch.nn.Conv1d) \

-            or isinstance(module, torch.nn.ConvTranspose1d) or isinstance(module, torch.nn.Embedding):

-                dump_torch_weights(cwriter, module, name=export_name(name), verbose=True)

+    if model_name in schedules and args.quantize:

+        osce_scheduled_dump(cwriter, model_name, model, schedules[model_name])

+    else:

+        osce_dump_generic(cwriter, model_name, model)

     cwriter.close()

--- a/dnn/torch/osce/models/lace.py

+++ b/dnn/torch/osce/models/lace.py

@@ -96,7 +96,7 @@

         self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)

         # spectral shaping

-        self.af1 = LimitedAdaptiveConv1d(1, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)

+        self.af1 = LimitedAdaptiveConv1d(1, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)

     def flop_count(self, rate=16000, verbose=False):

--- a/dnn/torch/osce/models/no_lace.py

+++ b/dnn/torch/osce/models/no_lace.py

@@ -96,8 +96,8 @@

         # comb filters

         left_pad = self.kernel_size // 2

         right_pad = self.kernel_size - 1 - left_pad

-        self.cf1 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)

-        self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)

+        self.cf1 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)

+        self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)

         # spectral shaping

         self.af1 = LimitedAdaptiveConv1d(1, 2, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)

--- a/dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py

+++ b/dnn/torch/osce/utils/layers/limited_adaptive_comb1d.py

@@ -41,13 +41,13 @@

                  feature_dim,

                  frame_size=160,

                  overlap_size=40,

-                 use_bias=True,

                  padding=None,

                  max_lag=256,

                  name=None,

                  gain_limit_db=10,

                  global_gain_limits_db=[-6, 6],

-                 norm_p=2):

+                 norm_p=2,

+                 **kwargs):

"""

         Parameters:

@@ -87,7 +87,6 @@

         self.kernel_size   = kernel_size

         self.frame_size    = frame_size

         self.overlap_size  = overlap_size

-        self.use_bias      = use_bias

         self.max_lag       = max_lag

         self.limit_db      = gain_limit_db

         self.norm_p        = norm_p

@@ -101,8 +100,6 @@

         # network for generating convolution weights

         self.conv_kernel = nn.Linear(feature_dim, kernel_size)

-        if self.use_bias:

-            self.conv_bias = nn.Linear(feature_dim,1)

         # comb filter gain

         self.filter_gain = nn.Linear(feature_dim, 1)

@@ -154,9 +151,6 @@

         conv_kernels = self.conv_kernel(features).reshape((batch_size, num_frames, self.out_channels, self.in_channels, self.kernel_size))

         conv_kernels = conv_kernels / (1e-6 + torch.norm(conv_kernels, p=self.norm_p, dim=-1, keepdim=True))

-        if self.use_bias:

-            conv_biases  = self.conv_bias(features).permute(0, 2, 1)

         conv_gains   = torch.exp(- torch.relu(self.filter_gain(features).permute(0, 2, 1)) + self.log_gain_limit)

         # calculate gains

         global_conv_gains   = torch.exp(self.filter_gain_a * torch.tanh(self.global_filter_gain(features).permute(0, 2, 1)) + self.filter_gain_b)

@@ -190,10 +184,6 @@

             new_chunk = torch.conv1d(xx, conv_kernels[:, i, ...].reshape((batch_size * self.out_channels, self.in_channels, self.kernel_size)), groups=batch_size).reshape(batch_size, self.out_channels, -1)

-            if self.use_bias:

-                new_chunk = new_chunk + conv_biases[:, :, i : i + 1]

             offset = self.max_lag + self.padding[0]

             new_chunk = global_conv_gains[:, :, i : i + 1] * (new_chunk * conv_gains[:, :, i : i + 1] + x[..., offset + i * frame_size : offset + (i + 1) * frame_size + overlap_size])

@@ -222,10 +212,6 @@

         count += 2 * (frame_rate * self.feature_dim * self.kernel_size)

         count += 2 * (self.in_channels * self.out_channels * self.kernel_size * (1 + overhead) * rate)

         count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels

-        # bias computation

-        if self.use_bias:

-            count += 2 * (frame_rate * self.feature_dim) + rate * (1 + overhead)

         # a0 computation

         count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels

--- a/dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py

+++ b/dnn/torch/osce/utils/layers/limited_adaptive_conv1d.py

@@ -46,12 +46,12 @@

                  feature_dim,

                  frame_size=160,

                  overlap_size=40,

-                 use_bias=True,

                  padding=None,

                  name=None,

                  gain_limits_db=[-6, 6],

                  shape_gain_db=0,

-                 norm_p=2):

+                 norm_p=2,

+                 **kwargs):

"""

         Parameters:

@@ -90,7 +90,6 @@

         self.kernel_size    = kernel_size

         self.frame_size     = frame_size

         self.overlap_size   = overlap_size

-        self.use_bias       = use_bias

         self.gain_limits_db = gain_limits_db

         self.shape_gain_db  = shape_gain_db

         self.norm_p         = norm_p

@@ -104,9 +103,6 @@

         # network for generating convolution weights

         self.conv_kernel = nn.Linear(feature_dim, in_channels * out_channels * kernel_size)

-        if self.use_bias:

-            self.conv_bias = nn.Linear(feature_dim, out_channels)

         self.shape_gain = min(1, 10**(shape_gain_db / 20))

         self.filter_gain = nn.Linear(feature_dim, out_channels)

@@ -133,10 +129,6 @@

         count += 2 * (frame_rate * self.feature_dim * self.kernel_size)

         count += 2 * (self.in_channels * self.out_channels * self.kernel_size * (1 + overhead) * rate)

-        # bias computation

-        if self.use_bias:

-            count += 2 * (frame_rate * self.feature_dim) + rate * (1 + overhead)

         # gain computation

         count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels

@@ -182,9 +174,6 @@

         id_kernels[..., self.padding[1]] = 1

         conv_kernels = self.shape_gain * conv_kernels + (1 - self.shape_gain) * id_kernels

-        if self.use_bias:

-            conv_biases  = self.conv_bias(features).permute(0, 2, 1)

         # calculate gains

         conv_gains   = torch.exp(self.filter_gain_a * torch.tanh(self.filter_gain(features)) + self.filter_gain_b)

--- a/dnn/torch/osce/utils/silk_features.py

+++ b/dnn/torch/osce/utils/silk_features.py

@@ -33,6 +33,7 @@

 import torch

 import scipy

+import scipy.signal

 from utils.pitch import hangover, calculate_acorr_window

 from utils.spec import create_filter_bank, cepstrum, log_spectrum, log_spectrum_from_lpc

@@ -59,7 +60,6 @@

                          num_bands_noisy_spec=18,

                          noisy_spec_scale='opus',

                          noisy_apply_dct=True,

-                         add_offset=False,

                          add_double_lag_acorr=False

):

@@ -67,7 +67,7 @@

     fb_clean_spec = create_filter_bank(num_bands_clean_spec, 320, scale='erb', round_center_bins=True, normalize=True)

     fb_noisy_spec = create_filter_bank(num_bands_noisy_spec, 320, scale=noisy_spec_scale, round_center_bins=True, normalize=True)

-    def create_features(noisy, noisy_history, lpcs, gains, ltps, periods, offsets):

+    def create_features(noisy, noisy_history, lpcs, gains, ltps, periods):

         periods = periods.copy()

@@ -89,10 +89,7 @@

         acorr, _ = calculate_acorr_window(noisy, 80, periods, noisy_history, radius=acorr_radius, add_double_lag_acorr=add_double_lag_acorr)

-        if add_offset:

-            features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains, offsets.reshape(-1, 1)), axis=-1, dtype=np.float32)

-        else:

-            features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains), axis=-1, dtype=np.float32)

+        features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains), axis=-1, dtype=np.float32)

         return features, periods.astype(np.int64)

@@ -110,7 +107,6 @@

                         num_bands_noisy_spec=18,

                         noisy_spec_scale='opus',

                         noisy_apply_dct=True,

-                        add_offset=False,

                         add_double_lag_acorr=False,

                         **kwargs):

@@ -122,13 +118,12 @@

     periods = np.fromfile(os.path.join(path, 'features_period.s16'), dtype=np.int16)

     num_bits = np.fromfile(os.path.join(path, 'features_num_bits.s32'), dtype=np.int32).astype(np.float32).reshape(-1, 1)

     num_bits_smooth = np.fromfile(os.path.join(path, 'features_num_bits_smooth.f32'), dtype=np.float32).reshape(-1, 1)

-    offsets = np.fromfile(os.path.join(path, 'features_offset.f32'), dtype=np.float32)

     # load signal, add back delay and pre-emphasize

     signal  = np.fromfile(os.path.join(path, 'noisy.s16'), dtype=np.int16).astype(np.float32) / (2 ** 15)

     signal = np.concatenate((np.zeros(skip, dtype=np.float32), signal), dtype=np.float32)

-    create_features = silk_feature_factory(no_pitch_value, acorr_radius, pitch_hangover, num_bands_clean_spec, num_bands_noisy_spec, noisy_spec_scale, noisy_apply_dct, add_offset, add_double_lag_acorr)

+    create_features = silk_feature_factory(no_pitch_value, acorr_radius, pitch_hangover, num_bands_clean_spec, num_bands_noisy_spec, noisy_spec_scale, noisy_apply_dct, add_double_lag_acorr)

     num_frames = min((len(signal) // 320) * 4, len(lpcs))

     signal = signal[: num_frames * 80]

@@ -138,11 +133,10 @@

     periods = periods[: num_frames]

     num_bits = num_bits[: num_frames // 4]

     num_bits_smooth = num_bits[: num_frames // 4]

-    offsets = offsets[: num_frames]

     numbits = np.repeat(np.concatenate((num_bits, num_bits_smooth), axis=-1, dtype=np.float32), 4, axis=0)

-    features, periods = create_features(signal, np.zeros(350, dtype=signal.dtype), lpcs, gains, ltps, periods, offsets)

+    features, periods = create_features(signal, np.zeros(350, dtype=signal.dtype), lpcs, gains, ltps, periods)

     if preemph > 0:

         signal[1:] -= preemph * signal[:-1]

--- a/dnn/torch/osce/utils/spec.py

+++ b/dnn/torch/osce/utils/spec.py

@@ -30,6 +30,7 @@

 import math as m

 import numpy as np

 import scipy

+import scipy.fftpack

 import torch

 def erb(f):

--- a/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py

+++ b/dnn/torch/weight-exchange/wexchange/c_export/c_writer.py

@@ -38,7 +38,8 @@

                  create_state_struct=False,

                  enable_binary_blob=True,

                  model_struct_name="Model",

-                 nnet_header="nnet.h"):

+                 nnet_header="nnet.h",

+                 add_typedef=False):

"""

         Writer class for creating souce and header files for weight exports to C

@@ -73,6 +74,7 @@

         self.enable_binary_blob = enable_binary_blob

         self.create_state_struct = create_state_struct

         self.model_struct_name = model_struct_name

+        self.add_typedef = add_typedef

         # for binary blob format, format is key=<layer name>, value=(<layer type>, <init call>)

         self.layer_dict = OrderedDict()

@@ -119,11 +121,17 @@

         # create model type

         if self.enable_binary_blob:

-            self.header.write(f"\nstruct {self.model_struct_name} {{")

+            if self.add_typedef:

+                self.header.write(f"\ntypedef struct {{")

+            else:

+                self.header.write(f"\nstruct {self.model_struct_name} {{")

             for name, data in self.layer_dict.items():

                 layer_type = data[0]

                 self.header.write(f"\n    {layer_type} {name};")

-            self.header.write(f"\n}};\n")

+            if self.add_typedef:

+                self.header.write(f"\n}} {self.model_struct_name};\n")

+            else:

+                self.header.write(f"\n}};\n")

             init_prototype = f"int init_{self.model_struct_name.lower()}({self.model_struct_name} *model, const WeightArray *arrays)"

             self.header.write(f"\n{init_prototype};\n")

--- a/dnn/torch/weight-exchange/wexchange/torch/__init__.py

+++ b/dnn/torch/weight-exchange/wexchange/torch/__init__.py

@@ -34,3 +34,4 @@

 from .torch import dump_torch_grucell_weights

 from .torch import dump_torch_embedding_weights, load_torch_embedding_weights

 from .torch import dump_torch_weights, load_torch_weights

+from .torch import dump_torch_adaptive_conv1d_weights

\ No newline at end of file

--- a/dnn/torch/weight-exchange/wexchange/torch/torch.py

+++ b/dnn/torch/weight-exchange/wexchange/torch/torch.py

@@ -28,12 +28,154 @@

"""

 import os

+import sys

 import torch

 import numpy as np

+sys.path.append(sys.path.append(os.path.join(os.path.dirname(__file__), '../osce')))

+try:

+    import utils.layers as osce_layers

+    from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d

+    from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d

+    from utils.layers.td_shaper import TDShaper

+    has_osce=True

+except:

+    has_osce=False

 from wexchange.c_export import CWriter, print_gru_layer, print_dense_layer, print_conv1d_layer, print_tconv1d_layer, print_conv2d_layer

+def dump_torch_adaptive_conv1d_weights(where, adaconv, name='adaconv', scale=1/128, quantize=False):

+    w_kernel = adaconv.conv_kernel.weight.detach().cpu().numpy().copy()

+    b_kernel = adaconv.conv_kernel.bias.detach().cpu().numpy().copy()

+    w_gain = adaconv.filter_gain.weight.detach().cpu().numpy().copy()

+    b_gain = adaconv.filter_gain.bias.detach().cpu().numpy().copy()

+    if isinstance(where, CWriter):

+        # pad kernel for quantization

+        left_padding = adaconv.padding[0]

+        kernel_size = adaconv.kernel_size

+        in_channels = adaconv.in_channels

+        out_channels = adaconv.out_channels

+        feature_dim = adaconv.feature_dim

+        if quantize and kernel_size % 8:

+            kernel_padding = 8 - (kernel_size % 8)

+            w_kernel = np.concatenate(

+                (np.zeros((out_channels, in_channels, kernel_padding, feature_dim)), w_kernel.reshape(out_channels, in_channels, kernel_size, feature_dim)),

+                dtype=w_kernel.dtype,

+                axis=2).reshape(-1, feature_dim)

+            b_kernel = np.concatenate(

+                (np.zeros((out_channels, in_channels, kernel_padding)), b_kernel.reshape(out_channels, in_channels, kernel_size)),

+                dtype=b_kernel.dtype,

+                axis=2).reshape(-1)

+            left_padding += kernel_padding

+            kernel_size += kernel_padding

+        # write relevant scalar parameters to header file

+        where.header.write(f"""

+#define {name.upper()}_FILTER_GAIN_A {adaconv.filter_gain_a:f}f

+#define {name.upper()}_FILTER_GAIN_B {adaconv.filter_gain_b:f}f

+#define {name.upper()}_SHAPE_GAIN {adaconv.shape_gain:f}f

+#define {name.upper()}_KERNEL_SIZE {kernel_size}

+#define {name.upper()}_FRAME_SIZE {adaconv.frame_size}

+#define {name.upper()}_LEFT_PADDING {left_padding}

+#define {name.upper()}_OVERLAP_SIZE {adaconv.overlap_size}

+#define {name.upper()}_IN_CHANNELS {adaconv.in_channels}

+#define {name.upper()}_OUT_CHANNELS {adaconv.out_channels}

+#define {name.upper()}_NORM_P {adaconv.norm_p}

+#define {name.upper()}_FEATURE_DIM {adaconv.feature_dim}

+"""

+        )

+        print_dense_layer(where, name + "_kernel", w_kernel, b_kernel, scale=scale, format='torch', sparse=False, diagonal=False, quantize=quantize)

+        print_dense_layer(where, name + "_gain", w_gain, b_gain, format='torch', sparse=False, diagonal=False, quantize=False)

+    else:

+        np.save(where, 'weight_kernel.npy', w_kernel)

+        np.save(where, 'bias_kernel.npy', b_kernel)

+        np.save(where, 'weight_gain.npy', w_gain)

+        np.save(where, 'bias_gain.npy', b_gain)

+def dump_torch_adaptive_comb1d_weights(where, adaconv, name='adaconv', scale=1/128, quantize=False):

+    w_kernel = adaconv.conv_kernel.weight.detach().cpu().numpy().copy()

+    b_kernel = adaconv.conv_kernel.bias.detach().cpu().numpy().copy()

+    w_gain = adaconv.filter_gain.weight.detach().cpu().numpy().copy()

+    b_gain = adaconv.filter_gain.bias.detach().cpu().numpy().copy()

+    w_global_gain = adaconv.global_filter_gain.weight.detach().cpu().numpy().copy()

+    b_global_gain = adaconv.global_filter_gain.bias.detach().cpu().numpy().copy()

+    if isinstance(where, CWriter):

+        # pad kernel for quantization

+        left_padding = adaconv.padding[0]

+        kernel_size = adaconv.kernel_size

+        if quantize and w_kernel.shape[0] % 8:

+            kernel_padding = 8 - (w_kernel.shape[0] % 8)

+            w_kernel = np.concatenate((np.zeros((kernel_padding, w_kernel.shape[1])), w_kernel), dtype=w_kernel.dtype)

+            b_kernel = np.concatenate((np.zeros((kernel_padding)), b_kernel), dtype=b_kernel.dtype)

+            left_padding += kernel_padding

+            kernel_size += kernel_padding

+        # write relevant scalar parameters to header file

+        where.header.write(f"""

+#define {name.upper()}_FILTER_GAIN_A {adaconv.filter_gain_a:f}f

+#define {name.upper()}_FILTER_GAIN_B {adaconv.filter_gain_b:f}f

+#define {name.upper()}_LOG_GAIN_LIMIT {adaconv.log_gain_limit:f}f

+#define {name.upper()}_KERNEL_SIZE {kernel_size}

+#define {name.upper()}_LEFT_PADDING {left_padding}

+#define {name.upper()}_FRAME_SIZE {adaconv.frame_size}

+#define {name.upper()}_OVERLAP_SIZE {adaconv.overlap_size}

+#define {name.upper()}_IN_CHANNELS {adaconv.in_channels}

+#define {name.upper()}_OUT_CHANNELS {adaconv.out_channels}

+#define {name.upper()}_NORM_P {adaconv.norm_p}

+#define {name.upper()}_FEATURE_DIM {adaconv.feature_dim}

+#define {name.upper()}_MAX_LAG {adaconv.max_lag}

+"""

+        )

+        print_dense_layer(where, name + "_kernel", w_kernel, b_kernel, scale=scale, format='torch', sparse=False, diagonal=False, quantize=quantize)

+        print_dense_layer(where, name + "_gain", w_gain, b_gain, format='torch', sparse=False, diagonal=False, quantize=False)

+        print_dense_layer(where, name + "_global_gain", w_global_gain, b_global_gain, format='torch', sparse=False, diagonal=False, quantize=False)

+    else:

+        np.save(where, 'weight_kernel.npy', w_kernel)

+        np.save(where, 'bias_kernel.npy', b_kernel)

+        np.save(where, 'weight_gain.npy', w_gain)

+        np.save(where, 'bias_gain.npy', b_gain)

+        np.save(where, 'weight_global_gain.npy', w_global_gain)

+        np.save(where, 'bias_global_gain.npy', b_global_gain)

+def dump_torch_tdshaper(where, shaper, name='tdshaper'):

+    if isinstance(where, CWriter):

+        where.header.write(f"""

+#define {name.upper()}_FEATURE_DIM {shaper.feature_dim}

+#define {name.upper()}_FRAME_SIZE {shaper.frame_size}

+#define {name.upper()}_AVG_POOL_K {shaper.avg_pool_k}

+#define {name.upper()}_INNOVATE {1 if shaper.innovate else 0}

+#define {name.upper()}_POOL_AFTER {1 if shaper.pool_after else 0}

+"""

+        )

+    dump_torch_conv1d_weights(where, shaper.feature_alpha1, name + "_alpha1")

+    dump_torch_conv1d_weights(where, shaper.feature_alpha2, name + "_alpha2")

+    if shaper.innovate:

+        dump_torch_conv1d_weights(where, shaper.feature_alpha1b, name + "_alpha1b")

+        dump_torch_conv1d_weights(where, shaper.feature_alpha1c, name + "_alpha1c")

+        dump_torch_conv1d_weights(where, shaper.feature_alpha2b, name + "_alpha2b")

+        dump_torch_conv1d_weights(where, shaper.feature_alpha2c, name + "_alpha2c")

 def dump_torch_gru_weights(where, gru, name='gru', input_sparse=False, recurrent_sparse=False, quantize=False, scale=1/128, recurrent_scale=1/128):

     assert gru.num_layers == 1

@@ -221,7 +363,6 @@

 def dump_torch_embedding_weights(where, embed, name='embed', scale=1/128, sparse=False, diagonal=False, quantize=False):

-    print("quantize = ", quantize)

     w = embed.weight.detach().cpu().numpy().copy().transpose()

     b = np.zeros(w.shape[0], dtype=w.dtype)

@@ -257,11 +398,21 @@

     elif isinstance(module, torch.nn.Conv2d):

         return dump_torch_conv2d_weights(where, module, name, **kwargs)

     elif isinstance(module, torch.nn.Embedding):

-        return dump_torch_embedding_weights(where, module)

+        return dump_torch_embedding_weights(where, module, name, **kwargs)

     elif isinstance(module, torch.nn.ConvTranspose1d):

         return dump_torch_tconv1d_weights(where, module, name, **kwargs)

     else:

-        raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')

+        if has_osce:

+            if isinstance(module, LimitedAdaptiveConv1d):

+                dump_torch_adaptive_conv1d_weights(where, module, name, **kwargs)

+            elif isinstance(module, LimitedAdaptiveComb1d):

+                dump_torch_adaptive_comb1d_weights(where, module, name, **kwargs)

+            elif isinstance(module, TDShaper):

+                dump_torch_tdshaper(where, module, name, **kwargs)

+            else:

+                raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')

+        else:

+            raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')

 def load_torch_weights(where, module):

     """ generic function for loading weights of some torch.nn.Module """

--- a/dnn/write_lpcnet_weights.c

+++ b/dnn/write_lpcnet_weights.c

@@ -46,6 +46,10 @@

 #include "plc_data.c"

 #include "dred_rdovae_enc_data.c"

 #include "dred_rdovae_dec_data.c"

+#ifdef ENABLE_OSCE

+#include "lace_data.c"

+#include "nolace_data.c"

+#endif

 void write_weights(const WeightArray *list, FILE *fout)

@@ -53,6 +57,9 @@

   unsigned char zeros[WEIGHT_BLOCK_SIZE] = {0};

   while (list[i].name != NULL) {

     WeightHead h;

+    if (strlen(list[i].name) >= sizeof(h.name) - 1) {

+      printf("[write_weights] warning: name %s too long\n", list[i].name);

+    }

     memcpy(h.head, "DNNw", 4);

     h.version = WEIGHT_BLOB_VERSION;

     h.type = list[i].type;

@@ -77,6 +84,14 @@

   write_weights(lpcnet_plc_arrays, fout);

   write_weights(rdovaeenc_arrays, fout);

   write_weights(rdovaedec_arrays, fout);

+#ifdef ENABLE_OSCE

+#ifndef DISABLE_LACE

+  write_weights(lacelayers_arrays, fout);

+#endif

+#ifndef DISABLE_NOLACE

+  write_weights(nolacelayers_arrays, fout);

+#endif

+#endif

   fclose(fout);

   return 0;

--- a/lpcnet_headers.mk

+++ b/lpcnet_headers.mk

@@ -29,3 +29,12 @@

 dnn/dred_rdovae_dec.h \

 dnn/dred_rdovae_dec_data.h \

 dnn/dred_rdovae_stats_data.h

+OSCE_HEAD= \

+dnn/osce.h \

+dnn/osce_config.h \

+dnn/osce_structs.h \

+dnn/osce_features.h \

+dnn/nndsp.h \

+dnn/lace_data.h \

+dnn/nolace_data.h

--- a/lpcnet_sources.mk

+++ b/lpcnet_sources.mk

@@ -23,6 +23,13 @@

 silk/dred_coding.c \

 silk/dred_decoder.c

+OSCE_SOURCES = \

+dnn/osce.c \

+dnn/osce_features.c \

+dnn/nndsp.c \

+dnn/lace_data.c \

+dnn/nolace_data.c

 DNN_SOURCES_X86_RTCD = dnn/x86/x86_dnn_map.c

 DNN_SOURCES_AVX2 = dnn/x86/nnet_avx2.c

 DNN_SOURCES_SSE4_1 = dnn/x86/nnet_sse4_1.c

--- a/meson.build

+++ b/meson.build

@@ -148,6 +148,7 @@

   [ 'float-approx', 'FLOAT_APPROX' ],

   [ 'enable-deep-plc', 'ENABLE_DEEP_PLC' ],

   [ 'enable-dred', 'ENABLE_DRED' ],

+  [ 'enable-osce', 'ENABLE_OSCE' ],

   [ 'assertions', 'ENABLE_ASSERTIONS' ],

   [ 'hardening', 'ENABLE_HARDENING' ],

   [ 'fuzzing', 'FUZZING' ],

--- a/meson_options.txt

+++ b/meson_options.txt

@@ -9,6 +9,7 @@

 option('enable-deep-plc', type : 'boolean', value : false, description : 'Enable Deep Packet Loss Concealment (PLC)')

 option('enable-dred', type : 'boolean', value : false, description : 'Enable Deep Redundancy (DRED)')

+option('enable-osce', type : 'boolean', value : false, description : 'Enable Opus Speech Coding Enhancement (OSCE)')

 option('enable-dnn-debug-float', type : 'boolean', value : false, description : 'Compute DNN using float weights')

 option('custom-modes', type : 'boolean', value : false, description : 'Enable non-Opus modes, e.g. 44.1 kHz & 2^n frames')

--- a/silk/API.h

+++ b/silk/API.h

@@ -92,7 +92,17 @@

 /* Decoder functions                    */

 /****************************************/

 /***********************************************/

+/* Load OSCE models from external data pointer */

+/***********************************************/

+opus_int silk_LoadOSCEModels(

+    void *decState,                                     /* O    I/O State                                       */

+    const unsigned char *data,                          /* I    pointer to binary blob                          */

+    int len                                             /* I    length of binary blob data                      */

+);

+/***********************************************/

 /* Get size in bytes of the Silk decoder state */

 /***********************************************/

 opus_int silk_Get_Decoder_Size(                         /* O    Returns error code                              */

@@ -100,8 +110,12 @@

);

 /*************************/

-/* Init or Reset decoder */

+/* Init and Reset decoder */

 /*************************/

+opus_int silk_ResetDecoder(                              /* O    Returns error code                              */

+    void                            *decState            /* I/O  State                                           */

+);

 opus_int silk_InitDecoder(                              /* O    Returns error code                              */

     void                            *decState           /* I/O  State                                           */

);

--- a/silk/control.h

+++ b/silk/control.h

@@ -147,6 +147,11 @@

     /* I:   Enable Deep PLC                                                                 */

     opus_int enable_deep_plc;

+#ifdef ENABLE_OSCE

+    /* I: OSCE method */

+    opus_int osce_method;

+#endif

 } silk_DecControlStruct;

 #ifdef __cplusplus

--- a/silk/dec_API.c

+++ b/silk/dec_API.c

@@ -33,6 +33,11 @@

 #include "stack_alloc.h"

 #include "os_support.h"

+#ifdef ENABLE_OSCE

+#include "osce.h"

+#include "osce_structs.h"

+#endif

 /************************/

 /* Decoder Super Struct */

 /************************/

@@ -42,6 +47,9 @@

     opus_int                         nChannelsAPI;

     opus_int                         nChannelsInternal;

     opus_int                         prev_decode_only_middle;

+#ifdef ENABLE_OSCE

+    OSCEModel                        osce_model;

+#endif

 } silk_decoder;

 /*********************/

@@ -48,6 +56,24 @@

 /* Decoder functions */

 /*********************/

+opus_int silk_LoadOSCEModels(void *decState, const unsigned char *data, int len)

+{

+#ifdef ENABLE_OSCE

+    opus_int ret = SILK_NO_ERROR;

+    ret = osce_load_models(&((silk_decoder *)decState)->osce_model, data, len);

+    return ret;

+#else

+    (void) decState;

+    (void) data;

+    (void) len;

+    return SILK_NO_ERROR;

+#endif

+}

 opus_int silk_Get_Decoder_Size(                         /* O    Returns error code                              */

     opus_int                        *decSizeBytes       /* O    Number of bytes in SILK decoder state           */

@@ -60,6 +86,24 @@

 /* Reset decoder state */

+opus_int silk_ResetDecoder(                              /* O    Returns error code                              */

+    void                            *decState           /* I/O  State                                           */

+)

+{

+    opus_int n, ret = SILK_NO_ERROR;

+    silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;

+    for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {

+        ret  = silk_reset_decoder( &channel_state[ n ] );

+    }

+    silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo));

+    /* Not strictly needed, but it's cleaner that way */

+    ((silk_decoder *)decState)->prev_decode_only_middle = 0;

+    return ret;

+}

 opus_int silk_InitDecoder(                              /* O    Returns error code                              */

     void                            *decState           /* I/O  State                                           */

@@ -67,6 +111,11 @@

     opus_int n, ret = SILK_NO_ERROR;

     silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;

+#ifndef USE_WEIGHTS_FILE

+    /* load osce models */

+    silk_LoadOSCEModels(decState, NULL, 0);

+#endif

     for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {

         ret  = silk_init_decoder( &channel_state[ n ] );

@@ -301,9 +350,17 @@

             } else {

                 condCoding = CODE_CONDITIONALLY;

+#ifdef ENABLE_OSCE

+            if ( channel_state[n].osce.method != decControl->osce_method ) {

+                osce_reset( &channel_state[n].osce, decControl->osce_method );

+            }

+#endif

             ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding,

 #ifdef ENABLE_DEEP_PLC

                 n == 0 ? lpcnet : NULL,

+#endif

+#ifdef ENABLE_OSCE

+                &psDec->osce_model,

 #endif

                 arch);

         } else {

--- a/silk/decode_frame.c

+++ b/silk/decode_frame.c

@@ -33,6 +33,10 @@

 #include "stack_alloc.h"

 #include "PLC.h"

+#ifdef ENABLE_OSCE

+#include "osce.h"

+#endif

 /****************/

 /* Decode frame */

 /****************/

@@ -46,16 +50,25 @@

 #ifdef ENABLE_DEEP_PLC

     LPCNetPLCState              *lpcnet,

 #endif

+#ifdef ENABLE_OSCE

+    OSCEModel                   *osce_model,

+#endif

     int                         arch                            /* I    Run-time architecture                       */

     VARDECL( silk_decoder_control, psDecCtrl );

     opus_int         L, mv_len, ret = 0;

+#ifdef ENABLE_OSCE

+    opus_int32  ec_start;

+#endif

     SAVE_STACK;

     L = psDec->frame_length;

     ALLOC( psDecCtrl, 1, silk_decoder_control );

     psDecCtrl->LTP_scale_Q14 = 0;

+#ifdef ENABLE_OSCE

+    ec_start = ec_tell(psRangeDec);

+#endif

     /* Safety checks */

     celt_assert( L > 0 && L <= MAX_FRAME_LENGTH );

@@ -87,7 +100,22 @@

         /********************************************************/

         silk_decode_core( psDec, psDecCtrl, pOut, pulses, arch );

+        /*************************/

+        /* Update output buffer. */

+        /*************************/

+        celt_assert( psDec->ltp_mem_length >= psDec->frame_length );

+        mv_len = psDec->ltp_mem_length - psDec->frame_length;

+        silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );

+        silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );

+#ifdef ENABLE_OSCE

         /********************************************************/

+        /* Run SILK enhancer                                    */

+        /********************************************************/

+        osce_enhance_frame( osce_model, psDec, psDecCtrl, pOut, ec_tell(psRangeDec) - ec_start, arch );

+#endif

+        /********************************************************/

         /* Update PLC state                                     */

         /********************************************************/

         silk_PLC( psDec, psDecCtrl, pOut, 0,

@@ -109,15 +137,18 @@

             lpcnet,

 #endif

             arch );

-    }

-    /*************************/

-    /* Update output buffer. */

-    /*************************/

-    celt_assert( psDec->ltp_mem_length >= psDec->frame_length );

-    mv_len = psDec->ltp_mem_length - psDec->frame_length;

-    silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );

-    silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );

+#ifdef ENABLE_OSCE

+        osce_reset( &psDec->osce, psDec->osce.method );

+#endif

+        /*************************/

+        /* Update output buffer. */

+        /*************************/

+        celt_assert( psDec->ltp_mem_length >= psDec->frame_length );

+        mv_len = psDec->ltp_mem_length - psDec->frame_length;

+        silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );

+        silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );

+    }

     /************************************************/

     /* Comfort noise generation / estimation        */

--- a/silk/init_decoder.c

+++ b/silk/init_decoder.c

@@ -31,15 +31,21 @@

 #include "main.h"

+#ifdef ENABLE_OSCE

+#include "osce.h"

+#endif

+#include "structs.h"

 /************************/

-/* Init Decoder State   */

+/* Reset Decoder State  */

 /************************/

-opus_int silk_init_decoder(

+opus_int silk_reset_decoder(

     silk_decoder_state          *psDec                          /* I/O  Decoder state pointer                       */

     /* Clear the entire encoder state, except anything copied */

-    silk_memset( psDec, 0, sizeof( silk_decoder_state ) );

+    silk_memset( &psDec->SILK_DECODER_STATE_RESET_START, 0, sizeof( silk_decoder_state ) - ((char*) &psDec->SILK_DECODER_STATE_RESET_START - (char*)psDec) );

     /* Used to deactivate LSF interpolation */

     psDec->first_frame_after_reset = 1;

@@ -51,6 +57,27 @@

     /* Reset PLC state */

     silk_PLC_Reset( psDec );

+#ifdef ENABLE_OSCE

+    /* Reset OSCE state and method */

+    osce_reset(&psDec->osce, OSCE_DEFAULT_METHOD);

+#endif

+    return 0;

+}

+/************************/

+/* Init Decoder State   */

+/************************/

+opus_int silk_init_decoder(

+    silk_decoder_state          *psDec                          /* I/O  Decoder state pointer                       */

+)

+{

+    /* Clear the entire encoder state, except anything copied */

+    silk_memset( psDec, 0, sizeof( silk_decoder_state ) );

+    silk_reset_decoder( psDec );

     return(0);

--- a/silk/main.h

+++ b/silk/main.h

@@ -389,6 +389,10 @@

 /****************************************************/

 /* Decoder Functions                                */

 /****************************************************/

+opus_int silk_reset_decoder(

+    silk_decoder_state          *psDec                          /* I/O  Decoder state pointer                       */

+);

 opus_int silk_init_decoder(

     silk_decoder_state          *psDec                          /* I/O  Decoder state pointer                       */

);

@@ -412,6 +416,9 @@

     opus_int                    condCoding,                     /* I    The type of conditional coding to use       */

 #ifdef ENABLE_DEEP_PLC

     LPCNetPLCState              *lpcnet,

+#endif

+#ifdef ENABLE_OSCE

+    OSCEModel                   *osce_model,

 #endif

     int                         arch                            /* I    Run-time architecture                       */

);

--- a/silk/structs.h

+++ b/silk/structs.h

@@ -44,6 +44,11 @@

 #include "dred_decoder.h"

 #endif

+#ifdef ENABLE_OSCE

+#include "osce_config.h"

+#include "osce_structs.h"

+#endif

 #ifdef __cplusplus

 extern "C"

@@ -238,6 +243,14 @@

 } silk_encoder_state;

+#ifdef ENABLE_OSCE

+typedef struct {

+    OSCEFeatureState features;

+    OSCEState state;

+    int method;

+} silk_OSCE_struct;

+#endif

 /* Struct for Packet Loss Concealment */

 typedef struct {

     opus_int32                  pitchL_Q8;                          /* Pitch lag to use for voiced concealment                          */

@@ -270,6 +283,10 @@

 /* Decoder state                */

 /********************************/

 typedef struct {

+#ifdef ENABLE_OSCE

+    silk_OSCE_struct            osce;

+#endif

+#define SILK_DECODER_STATE_RESET_START prev_gain_Q16

     opus_int32                  prev_gain_Q16;

     opus_int32                  exc_Q14[ MAX_FRAME_LENGTH ];

     opus_int32                  sLPC_Q14_buf[ MAX_LPC_ORDER ];

--- a/silk_sources.mk

+++ b/silk_sources.mk

@@ -161,4 +161,4 @@

 silk/float/sort_FLP.c

 SILK_SOURCES_FLOAT_AVX2 = \

-silk/float/x86/inner_product_FLP_avx2.c

+silk/float/x86/inner_product_FLP_avx2.c

\ No newline at end of file

--- a/src/opus_decoder.c

+++ b/src/opus_decoder.c

@@ -57,6 +57,10 @@

 #include "dred_rdovae_dec.h"

 #endif

+#ifdef ENABLE_OSCE

+#include "osce.h"

+#endif

 struct OpusDecoder {

    int          celt_dec_offset;

    int          silk_dec_offset;

@@ -383,7 +387,7 @@

          pcm_ptr = pcm_silk;

       if (st->prev_mode==MODE_CELT_ONLY)

-         silk_InitDecoder( silk_dec );

+         silk_ResetDecoder( silk_dec );

       /* The SILK PLC cannot produce frames of less than 10 ms */

       st->DecControl.payloadSize_ms = IMAX(10, 1000 * audiosize / st->Fs);

@@ -408,6 +412,15 @@

      st->DecControl.enable_deep_plc = st->complexity >= 5;

+#ifdef ENABLE_OSCE

+     st->DecControl.osce_method = OSCE_METHOD_NONE;

+#ifndef DISABLE_LACE

+     if (st->complexity >= 6) {st->DecControl.osce_method = OSCE_METHOD_LACE;}

+#endif

+#ifndef DISABLE_NOLACE

+     if (st->complexity >= 7) {st->DecControl.osce_method = OSCE_METHOD_NOLACE;}

+#endif

+#endif

      lost_flag = data == NULL ? 1 : 2 * !!decode_fec;

      decoded_samples = 0;

@@ -953,7 +966,7 @@

             ((char*)&st->OPUS_DECODER_RESET_START - (char*)st));

       celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);

-      silk_InitDecoder( silk_dec );

+      silk_ResetDecoder( silk_dec );

       st->stream_channels = st->channels;

       st->frame_size = st->Fs/400;

 #ifdef ENABLE_DEEP_PLC

@@ -1044,6 +1057,7 @@

           goto bad_arg;

        ret = lpcnet_plc_load_model(&st->lpcnet, data, len);

+       ret = silk_LoadOSCEModels(silk_dec, data, len) || ret;

    break;

 #endif

--- a/src/opus_demo.c

+++ b/src/opus_demo.c

@@ -70,6 +70,10 @@

   FILE *file;

   unsigned char *data;

   file = fopen(filename, "r");

+  if (file == NULL)

+  {

+    perror("could not open blob file\n");

+  }

   fseek(file, 0L, SEEK_END);

   *len = ftell(file);

   fseek(file, 0L, SEEK_SET);

@@ -254,6 +258,68 @@

 #endif

+#ifdef ENABLE_OSCE_TRAINING_DATA

+#define COMPLEXITY_MIN 0

+#define COMPLEXITY_MAX 10

+#define PACKET_LOSS_PERC_MIN 0

+#define PACKET_LOSS_PERC_MAX 50

+#define PACKET_LOSS_PERC_STEP 5

+#define CBR_BITRATE_LIMIT 8000

+#define NUM_BITRATES 102

+static int bitrates[NUM_BITRATES] = {

+        6000,  6060,  6120,  6180,  6240,  6300,  6360,  6420,  6480,

+        6525,  6561,  6598,  6634,  6670,  6707,  6743,  6780,  6816,

+        6853,  6889,  6926,  6962,  6999,  7042,  7085,  7128,  7171,

+        7215,  7258,  7301,  7344,  7388,  7431,  7474,  7512,  7541,

+        7570,  7599,  7628,  7657,  7686,  7715,  7744,  7773,  7802,

+        7831,  7860,  7889,  7918,  7947,  7976,  8013,  8096,  8179,

+        8262,  8344,  8427,  8511,  8605,  8699,  8792,  8886,  8980,

+        9100,  9227,  9354,  9480,  9561,  9634,  9706,  9779,  9851,

+        9924,  9996, 10161, 10330, 10499, 10698, 10898, 11124, 11378,

+       11575, 11719, 11862, 12014, 12345, 12751, 13195, 13561, 13795,

+       14069, 14671, 15403, 15790, 16371, 17399, 17968, 19382, 20468,

+       22000, 32000, 64000

+};

+static int randint(int min, int max, int step)

+{

+    double r = ((double) rand())/ (RAND_MAX + 1.);

+    int d;

+    d = ((int) ((max + 1 - min) * r / step) * step) + min;

+    return d;

+}

+static void new_random_setting(OpusEncoder *enc)

+{

+    int bitrate_bps;

+    int complexity;

+    int packet_loss_perc;

+    int use_vbr;

+    bitrate_bps = bitrates[randint(0, NUM_BITRATES - 1, 1)];

+    complexity  = randint(COMPLEXITY_MIN, COMPLEXITY_MAX, 1);

+    packet_loss_perc = randint(PACKET_LOSS_PERC_MIN, PACKET_LOSS_PERC_MAX, PACKET_LOSS_PERC_STEP);

+    use_vbr = bitrate_bps < CBR_BITRATE_LIMIT ? 1 : randint(0, 1, 1);

+    if (1)

+    {

+        printf("changing settings to %d\t%d\t%d\t%d\n", bitrate_bps, complexity, packet_loss_perc, use_vbr);

+    }

+    opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps));

+    opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(complexity));

+    opus_encoder_ctl(enc, OPUS_SET_PACKET_LOSS_PERC(packet_loss_perc));

+    opus_encoder_ctl(enc, OPUS_SET_VBR(use_vbr));

+}

+#endif

 int main(int argc, char *argv[])

     int err;

@@ -316,6 +382,10 @@

     int lost_count=0;

     FILE *packet_loss_file=NULL;

     int dred_duration=0;

+#ifdef ENABLE_OSCE_TRAINING_DATA

+    int silk_random_switching = 0;

+    int silk_frame_counter = 0;

+#endif

 #ifdef USE_WEIGHTS_FILE

     int blob_len;

     unsigned char *blob_data;

@@ -546,6 +616,12 @@

             mode_list = celt_hq_test;

             nb_modes_in_list = 4;

             args++;

+#ifdef ENABLE_OSCE_TRAINING_DATA

+        } else if( strcmp( argv[ args ], "-silk_random_switching" ) == 0 ){

+            silk_random_switching = atoi( argv[ args + 1 ] );

+            printf("switching encoding parameters every %dth frame\n", silk_random_switching);

+            args += 2;

+#endif

         } else {

             printf( "Error: unrecognized setting: %s\n\n", argv[ args ] );

             print_usage( argv );

@@ -759,6 +835,15 @@

                 opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(mode_list[curr_mode][3]));

                 frame_size = mode_list[curr_mode][2];

+#ifdef ENABLE_OSCE_TRAINING_DATA

+            if (silk_random_switching)

+            {

+                silk_frame_counter += 1;

+                if (silk_frame_counter % silk_random_switching == 0) {

+                    new_random_setting(enc);

+                }

+            }

+#endif

             num_read = fread(fbytes, sizeof(short)*channels, frame_size-remaining, fin);

             curr_read = (int)num_read;

             tot_in += curr_read;

--- a/src/opus_encoder.c

+++ b/src/opus_encoder.c

@@ -50,6 +50,9 @@

 #else

 #include "float/structs_FLP.h"

 #endif

+#ifdef ENABLE_OSCE_TRAINING_DATA

+#include <stdio.h>

+#endif

 #define MAX_ENCODER_BUFFER 480

@@ -1693,6 +1696,25 @@

     if (st->application == OPUS_APPLICATION_VOIP)

        hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs, st->arch);

+#ifdef ENABLE_OSCE_TRAINING_DATA

+       /* write out high pass filtered clean signal*/

+       static FILE *fout =NULL;

+       if (fout == NULL)

+       {

+         fout = fopen("clean_hp.s16", "wb");

+       }

+       {

+         int idx;

+         opus_int16 tmp;

+         for (idx = 0; idx < frame_size; idx++)

+         {

+            tmp = (opus_int16) (32768 * pcm_buf[total_buffer + idx] + 0.5f);

+            fwrite(&tmp, sizeof(tmp), 1, fout);

+         }

+       }

+#endif

     } else {

        dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);

@@ -2909,7 +2931,9 @@

                goto bad_arg;

+#ifdef ENABLE_DRED

             ret = dred_encoder_load_model(&st->dred_encoder, data, len);

+#endif

         break;

 #endif

--- a/tests/test_opus_api.c

+++ b/tests/test_opus_api.c

@@ -103,7 +103,7 @@

    for(c=0;c<4;c++)

       i=opus_decoder_get_size(c);

-      if(((c==1||c==2)&&(i<=2048||i>1<<17))||((c!=1&&c!=2)&&i!=0))test_failed();

+      if(((c==1||c==2)&&(i<=2048||i>1<<18))||((c!=1&&c!=2)&&i!=0))test_failed();

       fprintf(stdout,"    opus_decoder_get_size(%d)=%d ...............%s OK.\n",c,i,i>0?"":"....");

       cfgs++;

@@ -367,7 +367,7 @@

       for(b=-1;b<4;b++)

          i=opus_multistream_decoder_get_size(a,b);

-         if(((a>0&&b<=a&&b>=0)&&(i<=2048||i>((1<<17)*a)))||((a<1||b>a||b<0)&&i!=0))test_failed();

+         if(((a>0&&b<=a&&b>=0)&&(i<=2048||i>((1<<18)*a)))||((a<1||b>a||b<0)&&i!=0))test_failed();

          fprintf(stdout,"    opus_multistream_decoder_get_size(%2d,%2d)=%d %sOK.\n",a,b,i,i>0?"":"... ");

          cfgs++;

--

⑨