shithub: opus

--- a/dnn/Makefile.am

+++ b/dnn/Makefile.am

@@ -32,11 +32,13 @@

 	lpcnet.c \

 	lpcnet_dec.c \

 	lpcnet_enc.c \

+	lpcnet_tables.c \

 	nnet.c \

 	nnet_data.c \

 	plc_data.c \

 	ceps_codebooks.c \

 	pitch.c \

+	parse_lpcnet_weights.c \

 	freq.c \

 	kiss_fft.c \

 	lpcnet_plc.c

@@ -45,7 +47,7 @@

 liblpcnet_la_LDFLAGS = -no-undefined \

  -version-info @OP_LT_CURRENT@:@OP_LT_REVISION@:@OP_LT_AGE@

-noinst_PROGRAMS = lpcnet_demo dump_data

+noinst_PROGRAMS = lpcnet_demo dump_data dump_weights_blob

 lpcnet_demo_SOURCES = lpcnet_demo.c

 lpcnet_demo_LDADD = liblpcnet.la

@@ -56,9 +58,13 @@

 #dump_data_SOURCES = dump_data.c

 #dump_data_LDADD = $(DUMP_OBJ) $(LIBM)

-dump_data_SOURCES = common.c dump_data.c burg.c freq.c kiss_fft.c pitch.c lpcnet_dec.c lpcnet_enc.c ceps_codebooks.c

+dump_data_SOURCES = common.c dump_data.c burg.c freq.c kiss_fft.c pitch.c lpcnet_dec.c lpcnet_enc.c lpcnet_tables.c ceps_codebooks.c

 dump_data_LDADD = $(LIBM)

 dump_data_CFLAGS = $(AM_CFLAGS)

+dump_weights_blob_SOURCES = nnet_data.c plc_data.c write_lpcnet_weights.c

+dump_weights_blob_LDADD = $(LIBM)

+dump_weights_blob_CFLAGS = $(AM_CFLAGS) -DDUMP_BINARY_WEIGHTS

 pkgconfigdir = $(libdir)/pkgconfig

 pkgconfig_DATA = lpcnet.pc

--- a/dnn/README.md

+++ b/dnn/README.md

@@ -11,6 +11,7 @@

 - J.-M. Valin, J. Skoglund, [A Real-Time Wideband Neural Vocoder at 1.6 kb/s Using LPCNet](https://jmvalin.ca/papers/lpcnet_codec.pdf), *Proc. INTERSPEECH*, arxiv:1903.12087, 2019.

 - J. Skoglund, J.-M. Valin, [Improving Opus Low Bit Rate Quality with Neural Speech Synthesis](https://jmvalin.ca/papers/opusnet.pdf), *Proc. INTERSPEECH*, arxiv:1905.04628, 2020.

 - J.-M. Valin, A. Mustafa, C. Montgomery, T.B. Terriberry, M. Klingbeil, P. Smaragdis, A. Krishnaswamy, [Real-Time Packet Loss Concealment With Mixed Generative and Predictive Model](https://jmvalin.ca/papers/lpcnet_plc.pdf), *Proc. INTERSPEECH*, arxiv:2205.05785, 2022.

+- J.-M. Valin, J. Büthe, A. Mustafa, [Low-Bitrate Redundancy Coding of Speech Using a Rate-Distortion-Optimized Variational Autoencoder](https://jmvalin.ca/papers/valin_dred.pdf), *Proc. ICASSP*, arXiv:2212.04453, 2023. ([blog post](https://www.amazon.science/blog/neural-encoding-enables-more-efficient-recovery-of-lost-audio-packets))

 # Introduction

--- a/dnn/autogen.sh

+++ b/dnn/autogen.sh

@@ -6,7 +6,7 @@

 test -n "$srcdir" && cd "$srcdir"

 #SHA1 of the first commit compatible with the current model

-commit=97e64b3

+commit=399be7c

 ./download_model.sh $commit

 echo "Updating build configuration files for lpcnet, please wait...."

--- a/dnn/common.c

+++ b/dnn/common.c

@@ -40,7 +40,7 @@

     float ref[NB_BANDS];

     float pred[3*NB_BANDS];

     RNN_COPY(ref, x, NB_BANDS);

-    for (i=0;i<NB_BANDS;i++) pred[i] = .5*(left[i] + right[i]);

+    for (i=0;i<NB_BANDS;i++) pred[i] = .5f*(left[i] + right[i]);

     for (i=0;i<NB_BANDS;i++) pred[NB_BANDS+i] = left[i];

     for (i=0;i<NB_BANDS;i++) pred[2*NB_BANDS+i] = right[i];

     for (i=0;i<NB_BANDS;i++) {

--- a/dnn/common.h

+++ b/dnn/common.h

@@ -36,8 +36,8 @@

     float s;

     float scale_1 = 32768.f/255.f;

-    u = u - 128;

-    s = u >= 0 ? 1 : -1;

+    u = u - 128.f;

+    s = u >= 0.f ? 1.f : -1.f;

     u = fabs(u);

     return s*scale_1*(exp(u/128.*LOG256)-1);

--- /dev/null

+++ b/dnn/download_model.bat

@@ -1,0 +1,10 @@

+@echo off

+set model=lpcnet_data-%1.tar.gz

+if not exist %model% (

+    echo Downloading latest model

+    powershell -Command "(New-Object System.Net.WebClient).DownloadFile('https://media.xiph.org/lpcnet/data/%model%', '%model%')"

+)

+tar -xvzf %model%

--- a/dnn/download_model.sh

+++ b/dnn/download_model.sh

@@ -7,7 +7,7 @@

         echo "Downloading latest model"

         wget https://media.xiph.org/lpcnet/data/$model

fi

-tar xvf $model

+tar xvof $model

 touch src/nnet_data.[ch]

 touch src/plc_data.[ch]

 mv src/*.[ch] .

--- /dev/null

+++ b/dnn/dred_rdovae.c

@@ -1,0 +1,135 @@

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include "dred_rdovae.h"

+#include "dred_rdovae_enc.h"

+#include "dred_rdovae_dec.h"

+#include "dred_rdovae_stats_data.h"

+void DRED_rdovae_decode_all(float *features, const float *state, const float *latents, int nb_latents)

+{

+    int i;

+    RDOVAEDec dec;

+    memset(&dec, 0, sizeof(dec));

+    DRED_rdovae_dec_init_states(&dec, state);

+    for (i = 0; i < 2*nb_latents; i += 2)

+    {

+        DRED_rdovae_decode_qframe(

+            &dec,

+            &features[2*i*DRED_NUM_FEATURES],

+            &latents[(i/2)*DRED_LATENT_DIM]);

+    }

+}

+size_t DRED_rdovae_get_enc_size()

+{

+    return sizeof(RDOVAEEnc);

+}

+size_t DRED_rdovae_get_dec_size()

+{

+    return sizeof(RDOVAEDec);

+}

+void DRED_rdovae_init_encoder(RDOVAEEnc *enc_state)

+{

+    memset(enc_state, 0, sizeof(*enc_state));

+}

+void DRED_rdovae_init_decoder(RDOVAEDec *dec_state)

+{

+    memset(dec_state, 0, sizeof(*dec_state));

+}

+RDOVAEEnc * DRED_rdovae_create_encoder()

+{

+    RDOVAEEnc *enc;

+    enc = (RDOVAEEnc*) calloc(sizeof(*enc), 1);

+    DRED_rdovae_init_encoder(enc);

+    return enc;

+}

+RDOVAEDec * DRED_rdovae_create_decoder()

+{

+    RDOVAEDec *dec;

+    dec = (RDOVAEDec*) calloc(sizeof(*dec), 1);

+    DRED_rdovae_init_decoder(dec);

+    return dec;

+}

+void DRED_rdovae_destroy_decoder(RDOVAEDec* dec)

+{

+    free(dec);

+}

+void DRED_rdovae_destroy_encoder(RDOVAEEnc* enc)

+{

+    free(enc);

+}

+void DRED_rdovae_encode_dframe(RDOVAEEnc *enc_state, float *latents, float *initial_state, const float *input)

+{

+    dred_rdovae_encode_dframe(enc_state, latents, initial_state, input);

+}

+void DRED_rdovae_dec_init_states(RDOVAEDec *h, const float * initial_state)

+{

+    dred_rdovae_dec_init_states(h, initial_state);

+}

+void DRED_rdovae_decode_qframe(RDOVAEDec *h, float *qframe, const float *z)

+{

+    dred_rdovae_decode_qframe(h, qframe, z);

+}

+const opus_uint16 * DRED_rdovae_get_p0_pointer(void)

+{

+    return &dred_p0_q15[0];

+}

+const opus_uint16 * DRED_rdovae_get_dead_zone_pointer(void)

+{

+    return &dred_dead_zone_q10[0];

+}

+const opus_uint16 * DRED_rdovae_get_r_pointer(void)

+{

+    return &dred_r_q15[0];

+}

+const opus_uint16 * DRED_rdovae_get_quant_scales_pointer(void)

+{

+    return &dred_quant_scales_q8[0];

+}

--- /dev/null

+++ b/dnn/dred_rdovae_dec.c

@@ -1,0 +1,96 @@

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include "dred_rdovae_dec.h"

+#include "dred_rdovae_constants.h"

+void dred_rdovae_dec_init_states(

+    RDOVAEDec *h,            /* io: state buffer handle */

+    const float *initial_state  /* i: initial state */

+    )

+{

+    /* initialize GRU states from initial state */

+    _lpcnet_compute_dense(&state1, h->dense2_state, initial_state);

+    _lpcnet_compute_dense(&state2, h->dense4_state, initial_state);

+    _lpcnet_compute_dense(&state3, h->dense6_state, initial_state);

+}

+void dred_rdovae_decode_qframe(

+    RDOVAEDec *dec_state,       /* io: state buffer handle */

+    float *qframe,              /* o: quadruple feature frame (four concatenated frames in reverse order) */

+    const float *input          /* i: latent vector */

+    )

+{

+    float buffer[DEC_DENSE1_OUT_SIZE + DEC_DENSE2_OUT_SIZE + DEC_DENSE3_OUT_SIZE + DEC_DENSE4_OUT_SIZE + DEC_DENSE5_OUT_SIZE + DEC_DENSE6_OUT_SIZE + DEC_DENSE7_OUT_SIZE + DEC_DENSE8_OUT_SIZE];

+    int output_index = 0;

+    int input_index = 0;

+    float zero_vector[1024] = {0};

+    /* run encoder stack and concatenate output in buffer*/

+    _lpcnet_compute_dense(&dec_dense1, &buffer[output_index], input);

+    input_index = output_index;

+    output_index += DEC_DENSE1_OUT_SIZE;

+    compute_gruB(&dec_dense2, zero_vector, dec_state->dense2_state, &buffer[input_index]);

+    memcpy(&buffer[output_index], dec_state->dense2_state, DEC_DENSE2_OUT_SIZE * sizeof(float));

+    input_index = output_index;

+    output_index += DEC_DENSE2_OUT_SIZE;

+    _lpcnet_compute_dense(&dec_dense3, &buffer[output_index], &buffer[input_index]);

+    input_index = output_index;

+    output_index += DEC_DENSE3_OUT_SIZE;

+    compute_gruB(&dec_dense4, zero_vector, dec_state->dense4_state, &buffer[input_index]);

+    memcpy(&buffer[output_index], dec_state->dense4_state, DEC_DENSE4_OUT_SIZE * sizeof(float));

+    input_index = output_index;

+    output_index += DEC_DENSE4_OUT_SIZE;

+    _lpcnet_compute_dense(&dec_dense5, &buffer[output_index], &buffer[input_index]);

+    input_index = output_index;

+    output_index += DEC_DENSE5_OUT_SIZE;

+    compute_gruB(&dec_dense6, zero_vector, dec_state->dense6_state, &buffer[input_index]);

+    memcpy(&buffer[output_index], dec_state->dense6_state, DEC_DENSE6_OUT_SIZE * sizeof(float));

+    input_index = output_index;

+    output_index += DEC_DENSE6_OUT_SIZE;

+    _lpcnet_compute_dense(&dec_dense7, &buffer[output_index], &buffer[input_index]);

+    input_index = output_index;

+    output_index += DEC_DENSE7_OUT_SIZE;

+    _lpcnet_compute_dense(&dec_dense8, &buffer[output_index], &buffer[input_index]);

+    output_index += DEC_DENSE8_OUT_SIZE;

+    _lpcnet_compute_dense(&dec_final, qframe, buffer);

+}

\ No newline at end of file

--- /dev/null

+++ b/dnn/dred_rdovae_dec.h

@@ -1,0 +1,44 @@

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef _DRED_RDOVAE_DEC_H

+#define _DRED_RDOVAE_DEC_H

+#include "dred_rdovae.h"

+#include "dred_rdovae_dec_data.h"

+#include "dred_rdovae_stats_data.h"

+struct RDOVAEDecStruct {

+    float dense2_state[DEC_DENSE2_STATE_SIZE];

+    float dense4_state[DEC_DENSE2_STATE_SIZE];

+    float dense6_state[DEC_DENSE2_STATE_SIZE];

+};

+void dred_rdovae_dec_init_states(RDOVAEDec *h, const float * initial_state);

+void dred_rdovae_decode_qframe(RDOVAEDec *h, float *qframe, const float * z);

+#endif

\ No newline at end of file

--- /dev/null

+++ b/dnn/dred_rdovae_enc.c

@@ -1,0 +1,94 @@

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#include <math.h>

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include "dred_rdovae_enc.h"

+void dred_rdovae_encode_dframe(

+    RDOVAEEnc *enc_state,           /* io: encoder state */

+    float *latents,                 /* o: latent vector */

+    float *initial_state,           /* o: initial state */

+    const float *input              /* i: double feature frame (concatenated) */

+    )

+{

+    float buffer[ENC_DENSE1_OUT_SIZE + ENC_DENSE2_OUT_SIZE + ENC_DENSE3_OUT_SIZE + ENC_DENSE4_OUT_SIZE + ENC_DENSE5_OUT_SIZE + ENC_DENSE6_OUT_SIZE + ENC_DENSE7_OUT_SIZE + ENC_DENSE8_OUT_SIZE + GDENSE1_OUT_SIZE];

+    int output_index = 0;

+    int input_index = 0;

+    float zero_vector[1024] = {0};

+    /* run encoder stack and concatenate output in buffer*/

+    _lpcnet_compute_dense(&enc_dense1, &buffer[output_index], input);

+    input_index = output_index;

+    output_index += ENC_DENSE1_OUT_SIZE;

+    compute_gruB(&enc_dense2, zero_vector, enc_state->dense2_state, &buffer[input_index]);

+    memcpy(&buffer[output_index], enc_state->dense2_state, ENC_DENSE2_OUT_SIZE * sizeof(float));

+    input_index = output_index;

+    output_index += ENC_DENSE2_OUT_SIZE;

+    _lpcnet_compute_dense(&enc_dense3, &buffer[output_index], &buffer[input_index]);

+    input_index = output_index;

+    output_index += ENC_DENSE3_OUT_SIZE;

+    compute_gruB(&enc_dense4, zero_vector, enc_state->dense4_state, &buffer[input_index]);

+    memcpy(&buffer[output_index], enc_state->dense4_state, ENC_DENSE4_OUT_SIZE * sizeof(float));

+    input_index = output_index;

+    output_index += ENC_DENSE4_OUT_SIZE;

+    _lpcnet_compute_dense(&enc_dense5, &buffer[output_index], &buffer[input_index]);

+    input_index = output_index;

+    output_index += ENC_DENSE5_OUT_SIZE;

+    compute_gruB(&enc_dense6, zero_vector, enc_state->dense6_state, &buffer[input_index]);

+    memcpy(&buffer[output_index], enc_state->dense6_state, ENC_DENSE6_OUT_SIZE * sizeof(float));

+    input_index = output_index;

+    output_index += ENC_DENSE6_OUT_SIZE;

+    _lpcnet_compute_dense(&enc_dense7, &buffer[output_index], &buffer[input_index]);

+    input_index = output_index;

+    output_index += ENC_DENSE7_OUT_SIZE;

+    _lpcnet_compute_dense(&enc_dense8, &buffer[output_index], &buffer[input_index]);

+    output_index += ENC_DENSE8_OUT_SIZE;

+    /* compute latents from concatenated input buffer */

+    compute_conv1d(&bits_dense, latents, enc_state->bits_dense_state, buffer);

+    /* next, calculate initial state */

+    _lpcnet_compute_dense(&gdense1, &buffer[output_index], buffer);

+    input_index = output_index;

+    _lpcnet_compute_dense(&gdense2, initial_state, &buffer[input_index]);

+}

--- /dev/null

+++ b/dnn/dred_rdovae_enc.h

@@ -1,0 +1,45 @@

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef _DRED_RDOVAE_ENC_H

+#define _DRED_RDOVAE_ENC_H

+#include "dred_rdovae.h"

+#include "dred_rdovae_enc_data.h"

+struct RDOVAEEncStruct {

+    float dense2_state[3 * ENC_DENSE2_STATE_SIZE];

+    float dense4_state[3 * ENC_DENSE4_STATE_SIZE];

+    float dense6_state[3 * ENC_DENSE6_STATE_SIZE];

+    float bits_dense_state[BITS_DENSE_STATE_SIZE];

+};

+void dred_rdovae_encode_dframe(RDOVAEEnc *enc_state, float *latents, float *initial_state, const float *input);

+#endif

\ No newline at end of file

--- /dev/null

+++ b/dnn/dump_lpcnet_tables.c

@@ -1,0 +1,104 @@

+/* Copyright (c) 2017-2018 Mozilla

+   Copyright (c) 2023 Amazon */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR

+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include <math.h>

+#include <stdio.h>

+#include "freq.h"

+#include "kiss_fft.h"

+int main(void) {

+  int i;

+  FILE *file;

+  kiss_fft_state *kfft;

+  float half_window[OVERLAP_SIZE];

+  float dct_table[NB_BANDS*NB_BANDS];

+  file=fopen("lpcnet_tables.c", "wb");

+  fprintf(file, "/* The contents of this file was automatically generated by dump_lpcnet_tables.c*/\n\n");

+  fprintf(file, "#ifdef HAVE_CONFIG_H\n");

+  fprintf(file, "#include \"config.h\"\n");

+  fprintf(file, "#endif\n");

+  fprintf(file, "#include \"kiss_fft.h\"\n\n");

+  kfft = opus_fft_alloc_twiddles(WINDOW_SIZE, NULL, NULL, NULL, 0);

+  fprintf(file, "static const arch_fft_state arch_fft = {0, NULL};\n\n");

+  fprintf (file, "static const opus_int16 fft_bitrev[%d] = {\n", kfft->nfft);

+  for (i=0;i<kfft->nfft;i++)

+    fprintf (file, "%d,%c", kfft->bitrev[i],(i+16)%15==0?'\n':' ');

+  fprintf (file, "};\n\n");

+  fprintf (file, "static const kiss_twiddle_cpx fft_twiddles[%d] = {\n", kfft->nfft);

+  for (i=0;i<kfft->nfft;i++)

+    fprintf (file, "{%#0.9gf, %#0.9gf},%c", kfft->twiddles[i].r, kfft->twiddles[i].i,(i+3)%2==0?'\n':' ');

+  fprintf (file, "};\n\n");

+  fprintf(file, "const kiss_fft_state kfft = {\n");

+  fprintf(file, "%d, /* nfft */\n", kfft->nfft);

+  fprintf(file, "%#0.8gf, /* scale */\n", kfft->scale);

+  fprintf(file, "%d, /* shift */\n", kfft->shift);

+  fprintf(file, "{");

+  for (i=0;i<2*MAXFACTORS;i++) {

+    fprintf(file, "%d, ", kfft->factors[i]);

+  }

+  fprintf(file, "}, /* factors */\n");

+  fprintf(file, "fft_bitrev, /* bitrev*/\n");

+  fprintf(file, "fft_twiddles, /* twiddles*/\n");

+  fprintf(file, "(arch_fft_state *)&arch_fft, /* arch_fft*/\n");

+  fprintf(file, "};\n\n");

+  for (i=0;i<OVERLAP_SIZE;i++)

+    half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/OVERLAP_SIZE) * sin(.5*M_PI*(i+.5)/OVERLAP_SIZE));

+  fprintf(file, "const float half_window[] = {\n");

+  for (i=0;i<OVERLAP_SIZE;i++)

+    fprintf (file, "%#0.9gf,%c", half_window[i],(i+6)%5==0?'\n':' ');

+  fprintf(file, "};\n\n");

+  for (i=0;i<NB_BANDS;i++) {

+    int j;

+    for (j=0;j<NB_BANDS;j++) {

+      dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);

+      if (j==0) dct_table[i*NB_BANDS + j] *= sqrt(.5);

+    }

+  }

+  fprintf(file, "const float dct_table[] = {\n");

+  for (i=0;i<NB_BANDS*NB_BANDS;i++)

+    fprintf (file, "%#0.9gf,%c", dct_table[i],(i+6)%5==0?'\n':' ');

+  fprintf(file, "};\n");

+  fclose(file);

+  return 0;

+}

--- a/dnn/freq.c

+++ b/dnn/freq.c

@@ -51,14 +51,12 @@

     0.8f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.666667f, 0.5f, 0.5f, 0.5f, 0.333333f, 0.25f, 0.25f, 0.2f, 0.166667f, 0.173913f

};

-typedef struct {

-  int init;

-  kiss_fft_state *kfft;

-  float half_window[OVERLAP_SIZE];

-  float dct_table[NB_BANDS*NB_BANDS];

-} CommonState;

+extern const kiss_fft_state kfft;

+extern const float half_window[OVERLAP_SIZE];

+extern const float dct_table[NB_BANDS*NB_BANDS];

 void compute_band_energy_inverse(float *bandE, const kiss_fft_cpx *X) {

   int i;

   float sum[NB_BANDS] = {0};

@@ -162,15 +160,15 @@

   float x[WINDOW_SIZE];

   float Eburg[NB_BANDS];

   float g;

-  float E;

   kiss_fft_cpx LPC[FREQ_SIZE];

   float Ly[NB_BANDS];

+  float logMax = -2;

+  float follow = -2;

   assert(order <= LPC_ORDER);

   assert(len <= FRAME_SIZE);

   for (i=0;i<len-1;i++) burg_in[i] = pcm[i+1] - PREEMPHASIS*pcm[i];

   g = silk_burg_analysis(burg_lpc, burg_in, 1e-3, len-1, 1, order);

   g /= len - 2*(order-1);

-  //printf("%g\n", g);

   RNN_CLEAR(x, WINDOW_SIZE);

   x[0] = 1;

   for (i=0;i<order;i++) x[i+1] = -burg_lpc[i]*pow(.995, i+1);

@@ -177,14 +175,11 @@

   forward_transform(LPC, x);

   compute_band_energy_inverse(Eburg, LPC);

   for (i=0;i<NB_BANDS;i++) Eburg[i] *= .45*g*(1.f/((float)WINDOW_SIZE*WINDOW_SIZE*WINDOW_SIZE));

-  float logMax = -2;

-  float follow = -2;

   for (i=0;i<NB_BANDS;i++) {

     Ly[i] = log10(1e-2+Eburg[i]);

     Ly[i] = MAX16(logMax-8, MAX16(follow-2.5, Ly[i]));

     logMax = MAX16(logMax, Ly[i]);

     follow = MAX16(follow-2.5, Ly[i]);

-    E += Eburg[i];

   dct(burg_cepstrum, Ly);

   burg_cepstrum[0] += - 4;

@@ -243,32 +238,14 @@

-CommonState common;

-static void check_init(void) {

-  int i;

-  if (common.init) return;

-  common.kfft = opus_fft_alloc_twiddles(WINDOW_SIZE, NULL, NULL, NULL, 0);

-  for (i=0;i<OVERLAP_SIZE;i++)

-    common.half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/OVERLAP_SIZE) * sin(.5*M_PI*(i+.5)/OVERLAP_SIZE));

-  for (i=0;i<NB_BANDS;i++) {

-    int j;

-    for (j=0;j<NB_BANDS;j++) {

-      common.dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);

-      if (j==0) common.dct_table[i*NB_BANDS + j] *= sqrt(.5);

-    }

-  }

-  common.init = 1;

-}

 void dct(float *out, const float *in) {

   int i;

-  check_init();

   for (i=0;i<NB_BANDS;i++) {

     int j;

     float sum = 0;

     for (j=0;j<NB_BANDS;j++) {

-      sum += in[j] * common.dct_table[j*NB_BANDS + i];

+      sum += in[j] * dct_table[j*NB_BANDS + i];

     out[i] = sum*sqrt(2./NB_BANDS);

@@ -276,12 +253,11 @@

 void idct(float *out, const float *in) {

   int i;

-  check_init();

   for (i=0;i<NB_BANDS;i++) {

     int j;

     float sum = 0;

     for (j=0;j<NB_BANDS;j++) {

-      sum += in[j] * common.dct_table[i*NB_BANDS + j];

+      sum += in[j] * dct_table[i*NB_BANDS + j];

     out[i] = sum*sqrt(2./NB_BANDS);

@@ -291,12 +267,11 @@

   int i;

   kiss_fft_cpx x[WINDOW_SIZE];

   kiss_fft_cpx y[WINDOW_SIZE];

-  check_init();

   for (i=0;i<WINDOW_SIZE;i++) {

     x[i].r = in[i];

     x[i].i = 0;

-  opus_fft(common.kfft, x, y, 0);

+  opus_fft(&kfft, x, y, 0);

   for (i=0;i<FREQ_SIZE;i++) {

     out[i] = y[i];

@@ -306,7 +281,6 @@

   int i;

   kiss_fft_cpx x[WINDOW_SIZE];

   kiss_fft_cpx y[WINDOW_SIZE];

-  check_init();

   for (i=0;i<FREQ_SIZE;i++) {

     x[i] = in[i];

@@ -314,7 +288,7 @@

     x[i].r = x[WINDOW_SIZE - i].r;

     x[i].i = -x[WINDOW_SIZE - i].i;

-  opus_fft(common.kfft, x, y, 0);

+  opus_fft(&kfft, x, y, 0);

   /* output in reverse order for IFFT. */

   out[0] = WINDOW_SIZE*y[0].r;

   for (i=1;i<WINDOW_SIZE;i++) {

@@ -371,10 +345,9 @@

 void apply_window(float *x) {

   int i;

-  check_init();

   for (i=0;i<OVERLAP_SIZE;i++) {

-    x[i] *= common.half_window[i];

-    x[WINDOW_SIZE - 1 - i] *= common.half_window[i];

+    x[i] *= half_window[i];

+    x[WINDOW_SIZE - 1 - i] *= half_window[i];

--- /dev/null

+++ b/dnn/include/dred_rdovae.h

@@ -1,0 +1,60 @@

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#include <stdlib.h>

+#include "opus_types.h"

+typedef struct RDOVAEDecStruct RDOVAEDec;

+typedef struct RDOVAEEncStruct RDOVAEEnc;

+void DRED_rdovae_decode_all(float *features, const float *state, const float *latents, int nb_latents);

+size_t DRED_rdovae_get_enc_size(void);

+size_t DRED_rdovae_get_dec_size(void);

+RDOVAEDec * DRED_rdovae_create_decoder(void);

+RDOVAEEnc * DRED_rdovae_create_encoder(void);

+void DRED_rdovae_destroy_decoder(RDOVAEDec* h);

+void DRED_rdovae_destroy_encoder(RDOVAEEnc* h);

+void DRED_rdovae_init_encoder(RDOVAEEnc *enc_state);

+void DRED_rdovae_encode_dframe(RDOVAEEnc *enc_state, float *latents, float *initial_state, const float *input);

+void DRED_rdovae_dec_init_states(RDOVAEDec *h, const float * initial_state);

+void DRED_rdovae_decode_qframe(RDOVAEDec *h, float *qframe, const float * z);

+const opus_uint16 * DRED_rdovae_get_p0_pointer(void);

+const opus_uint16 * DRED_rdovae_get_dead_zone_pointer(void);

+const opus_uint16 * DRED_rdovae_get_r_pointer(void);

+const opus_uint16 * DRED_rdovae_get_quant_scales_pointer(void);

--- a/dnn/include/lpcnet.h

+++ b/dnn/include/lpcnet.h

@@ -197,4 +197,6 @@

 LPCNET_EXPORT void lpcnet_plc_fec_add(LPCNetPLCState *st, const float *features);

+LPCNET_EXPORT void lpcnet_plc_fec_clear(LPCNetPLCState *st);

 #endif

--- a/dnn/kiss_fft.c

+++ b/dnn/kiss_fft.c

@@ -506,10 +506,10 @@

    if (cfg)

       opus_fft_free_arch((kiss_fft_state *)cfg, arch);

-      opus_free((opus_int16*)cfg->bitrev);

+      free((opus_int16*)cfg->bitrev);

       if (cfg->shift < 0)

-         opus_free((kiss_twiddle_cpx*)cfg->twiddles);

-      opus_free((kiss_fft_state*)cfg);

+         free((kiss_twiddle_cpx*)cfg->twiddles);

+      free((kiss_fft_state*)cfg);

--- a/dnn/kiss_fft.h

+++ b/dnn/kiss_fft.h

@@ -34,8 +34,7 @@

 #include "arch.h"

 #include <stdlib.h>

-#define opus_alloc(x) malloc(x)

-#define opus_free(x) free(x)

+#define lpcnet_alloc(x) malloc(x)

 #ifdef __cplusplus

 extern "C" {

@@ -46,7 +45,7 @@

 # define kiss_fft_scalar __m128

 #define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes)

 #else

-#define KISS_FFT_MALLOC opus_alloc

+#define KISS_FFT_MALLOC lpcnet_alloc

 #endif

 #ifdef FIXED_POINT

--- a/dnn/lpcnet.c

+++ b/dnn/lpcnet.c

@@ -89,23 +89,21 @@

     float dense1_out[FEATURE_DENSE1_OUT_SIZE];

     int pitch;

     float rc[LPC_ORDER];

-    //static float features[NB_FEATURES];

-    //RNN_COPY(features, lpcnet->last_features, NB_FEATURES);

     /* Matches the Python code -- the 0.1 avoids rounding issues. */

     pitch = (int)floor(.1 + 50*features[NB_BANDS]+100);

     pitch = IMIN(255, IMAX(33, pitch));

     net = &lpcnet->nnet;

     RNN_COPY(in, features, NB_FEATURES);

-    compute_embedding(&embed_pitch, &in[NB_FEATURES], pitch);

-    compute_conv1d(&feature_conv1, conv1_out, net->feature_conv1_state, in);

+    compute_embedding(&lpcnet->model.embed_pitch, &in[NB_FEATURES], pitch);

+    compute_conv1d(&lpcnet->model.feature_conv1, conv1_out, net->feature_conv1_state, in);

     if (lpcnet->frame_count < FEATURE_CONV1_DELAY) RNN_CLEAR(conv1_out, FEATURE_CONV1_OUT_SIZE);

-    compute_conv1d(&feature_conv2, conv2_out, net->feature_conv2_state, conv1_out);

+    compute_conv1d(&lpcnet->model.feature_conv2, conv2_out, net->feature_conv2_state, conv1_out);

     if (lpcnet->frame_count < FEATURES_DELAY) RNN_CLEAR(conv2_out, FEATURE_CONV2_OUT_SIZE);

-    _lpcnet_compute_dense(&feature_dense1, dense1_out, conv2_out);

-    _lpcnet_compute_dense(&feature_dense2, condition, dense1_out);

+    _lpcnet_compute_dense(&lpcnet->model.feature_dense1, dense1_out, conv2_out);

+    _lpcnet_compute_dense(&lpcnet->model.feature_dense2, condition, dense1_out);

     RNN_COPY(rc, condition, LPC_ORDER);

-    _lpcnet_compute_dense(&gru_a_dense_feature, gru_a_condition, condition);

-    _lpcnet_compute_dense(&gru_b_dense_feature, gru_b_condition, condition);

+    _lpcnet_compute_dense(&lpcnet->model.gru_a_dense_feature, gru_a_condition, condition);

+    _lpcnet_compute_dense(&lpcnet->model.gru_b_dense_feature, gru_b_condition, condition);

 #ifdef END2END

     rc2lpc(lpc, rc);

 #elif FEATURES_DELAY>0

@@ -118,29 +116,54 @@

 #ifdef LPC_GAMMA

     lpc_weighting(lpc, LPC_GAMMA);

 #endif

-    //RNN_COPY(lpcnet->last_features, _features, NB_FEATURES);

     if (lpcnet->frame_count < 1000) lpcnet->frame_count++;

-int run_sample_network(NNetState *net, const float *gru_a_condition, const float *gru_b_condition, int last_exc, int last_sig, int pred, const float *sampling_logit_table, kiss99_ctx *rng)

+void run_frame_network_deferred(LPCNetState *lpcnet, const float *features)

+    int max_buffer_size = lpcnet->model.feature_conv1.kernel_size + lpcnet->model.feature_conv2.kernel_size - 2;

+    celt_assert(max_buffer_size <= MAX_FEATURE_BUFFER_SIZE);

+    if (lpcnet->feature_buffer_fill == max_buffer_size) {

+        RNN_MOVE(lpcnet->feature_buffer, &lpcnet->feature_buffer[NB_FEATURES],  (max_buffer_size-1)*NB_FEATURES);

+    } else {

+      lpcnet->feature_buffer_fill++;

+    }

+    RNN_COPY(&lpcnet->feature_buffer[(lpcnet->feature_buffer_fill-1)*NB_FEATURES], features, NB_FEATURES);

+}

+void run_frame_network_flush(LPCNetState *lpcnet)

+{

+    int i;

+    for (i=0;i<lpcnet->feature_buffer_fill;i++) {

+        float lpc[LPC_ORDER];

+        float gru_a_condition[3*GRU_A_STATE_SIZE];

+        float gru_b_condition[3*GRU_B_STATE_SIZE];

+        run_frame_network(lpcnet, gru_a_condition, gru_b_condition, lpc, &lpcnet->feature_buffer[i*NB_FEATURES]);

+    }

+    lpcnet->feature_buffer_fill = 0;

+}

+int run_sample_network(LPCNetState *lpcnet, const float *gru_a_condition, const float *gru_b_condition, int last_exc, int last_sig, int pred, const float *sampling_logit_table, kiss99_ctx *rng)

+{

+    NNetState *net;

     float gru_a_input[3*GRU_A_STATE_SIZE];

     float in_b[GRU_A_STATE_SIZE+FEATURE_DENSE2_OUT_SIZE];

     float gru_b_input[3*GRU_B_STATE_SIZE];

+    net = &lpcnet->nnet;

 #if 1

-    compute_gru_a_input(gru_a_input, gru_a_condition, GRU_A_STATE_SIZE, &gru_a_embed_sig, last_sig, &gru_a_embed_pred, pred, &gru_a_embed_exc, last_exc);

+    compute_gru_a_input(gru_a_input, gru_a_condition, GRU_A_STATE_SIZE, &lpcnet->model.gru_a_embed_sig, last_sig, &lpcnet->model.gru_a_embed_pred, pred, &lpcnet->model.gru_a_embed_exc, last_exc);

 #else

     RNN_COPY(gru_a_input, gru_a_condition, 3*GRU_A_STATE_SIZE);

-    accum_embedding(&gru_a_embed_sig, gru_a_input, last_sig);

-    accum_embedding(&gru_a_embed_pred, gru_a_input, pred);

-    accum_embedding(&gru_a_embed_exc, gru_a_input, last_exc);

+    accum_embedding(&lpcnet->model.gru_a_embed_sig, gru_a_input, last_sig);

+    accum_embedding(&lpcnet->model.gru_a_embed_pred, gru_a_input, pred);

+    accum_embedding(&lpcnet->model.gru_a_embed_exc, gru_a_input, last_exc);

 #endif

     /*compute_gru3(&gru_a, net->gru_a_state, gru_a_input);*/

-    compute_sparse_gru(&sparse_gru_a, net->gru_a_state, gru_a_input);

+    compute_sparse_gru(&lpcnet->model.sparse_gru_a, net->gru_a_state, gru_a_input);

     RNN_COPY(in_b, net->gru_a_state, GRU_A_STATE_SIZE);

     RNN_COPY(gru_b_input, gru_b_condition, 3*GRU_B_STATE_SIZE);

-    compute_gruB(&gru_b, gru_b_input, net->gru_b_state, in_b);

-    return sample_mdense(&dual_fc, net->gru_b_state, sampling_logit_table, rng);

+    compute_gruB(&lpcnet->model.gru_b, gru_b_input, net->gru_b_state, in_b);

+    return sample_mdense(&lpcnet->model.dual_fc, net->gru_b_state, sampling_logit_table, rng);

 LPCNET_EXPORT int lpcnet_get_size()

@@ -151,15 +174,18 @@

 LPCNET_EXPORT int lpcnet_init(LPCNetState *lpcnet)

     int i;

+    int ret;

     const char* rng_string="LPCNet";

     memset(lpcnet, 0, lpcnet_get_size());

     lpcnet->last_exc = lin2ulaw(0.f);

     for (i=0;i<256;i++) {

-        float prob = .025+.95*i/255.;

+        float prob = .025f+.95f*i/255.f;

         lpcnet->sampling_logit_table[i] = -log((1-prob)/prob);

     kiss99_srand(&lpcnet->rng, (const unsigned char *)rng_string, strlen(rng_string));

-    return 0;

+    ret = init_lpcnet_model(&lpcnet->model, lpcnet_arrays);

+    celt_assert(ret == 0);

+    return ret;

@@ -176,6 +202,14 @@

     free(lpcnet);

+void lpcnet_reset_signal(LPCNetState *lpcnet)

+{

+    lpcnet->deemph_mem = 0;

+    lpcnet->last_exc = lin2ulaw(0.f);

+    RNN_CLEAR(lpcnet->last_sig, LPC_ORDER);

+    RNN_CLEAR(lpcnet->nnet.gru_a_state, GRU_A_STATE_SIZE);

+    RNN_CLEAR(lpcnet->nnet.gru_b_state, GRU_B_STATE_SIZE);

+}

 void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, short *output, int N, int preload)

@@ -197,7 +231,7 @@

         for (j=0;j<LPC_ORDER;j++) pred -= lpcnet->last_sig[j]*lpcnet->lpc[j];

         last_sig_ulaw = lin2ulaw(lpcnet->last_sig[0]);

         pred_ulaw = lin2ulaw(pred);

-        exc = run_sample_network(&lpcnet->nnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->last_exc, last_sig_ulaw, pred_ulaw, lpcnet->sampling_logit_table, &lpcnet->rng);

+        exc = run_sample_network(lpcnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->last_exc, last_sig_ulaw, pred_ulaw, lpcnet->sampling_logit_table, &lpcnet->rng);

         if (i < preload) {

           exc = lin2ulaw(output[i]-PREEMPH*lpcnet->deemph_mem - pred);

           pcm = output[i]-PREEMPH*lpcnet->deemph_mem;

--- a/dnn/lpcnet_dec.c

+++ b/dnn/lpcnet_dec.c

@@ -121,13 +121,13 @@

   for (sub=0;sub<4;sub++) {

     float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;

-    p *= 1 + modulation/16./7.*(2*sub-3);

+    p *= 1.f + modulation/16.f/7.f*(2*sub-3);

     p = MIN16(255, MAX16(33, p));

-    features[sub][NB_BANDS] = .02*(p-100);

-    features[sub][NB_BANDS + 1] = frame_corr-.5;

+    features[sub][NB_BANDS] = .02f*(p-100.f);

+    features[sub][NB_BANDS + 1] = frame_corr-.5f;

-  features[3][0] = (c0_id-64)/4.;

+  features[3][0] = (c0_id-64)/4.f;

   for (i=0;i<NB_BANDS_1;i++) {

     features[3][i+1] = ceps_codebook1[vq_end[0]*NB_BANDS_1 + i] + ceps_codebook2[vq_end[1]*NB_BANDS_1 + i] + ceps_codebook3[vq_end[2]*NB_BANDS_1 + i];

@@ -141,7 +141,7 @@

     features[1][i] = sign*ceps_codebook_diff4[vq_mid*NB_BANDS + i];

   if ((vq_mid&MULTI_MASK) < 2) {

-    for (i=0;i<NB_BANDS;i++) features[1][i] += .5*(vq_mem[i] + features[3][i]);

+    for (i=0;i<NB_BANDS;i++) features[1][i] += .5f*(vq_mem[i] + features[3][i]);

   } else if ((vq_mid&MULTI_MASK) == 2) {

     for (i=0;i<NB_BANDS;i++) features[1][i] += vq_mem[i];

   } else {

--- a/dnn/lpcnet_demo.c

+++ b/dnn/lpcnet_demo.c

@@ -39,6 +39,7 @@

 #define MODE_FEATURES 2

 #define MODE_SYNTHESIS 3

 #define MODE_PLC 4

+#define MODE_ADDLPC 5

 void usage(void) {

     fprintf(stderr, "usage: lpcnet_demo -encode <input.pcm> <compressed.lpcnet>\n");

@@ -46,7 +47,8 @@

     fprintf(stderr, "       lpcnet_demo -features <input.pcm> <features.f32>\n");

     fprintf(stderr, "       lpcnet_demo -synthesis <features.f32> <output.pcm>\n");

     fprintf(stderr, "       lpcnet_demo -plc <plc_options> <percent> <input.pcm> <output.pcm>\n");

-    fprintf(stderr, "       lpcnet_demo -plc_file <plc_options> <percent> <input.pcm> <output.pcm>\n\n");

+    fprintf(stderr, "       lpcnet_demo -plc_file <plc_options> <percent> <input.pcm> <output.pcm>\n");

+    fprintf(stderr, "       lpcnet_demo -addlpc <features_without_lpc.f32> <features_with_lpc.lpc>\n\n");

     fprintf(stderr, "  plc_options:\n");

     fprintf(stderr, "       causal:       normal (causal) PLC\n");

     fprintf(stderr, "       causal_dc:    normal (causal) PLC with DC offset compensation\n");

@@ -83,6 +85,8 @@

         argv+=2;

         argc-=2;

+    } else if (strcmp(argv[1], "-addlpc") == 0){

+        mode=MODE_ADDLPC;

     } else {

         usage();

@@ -165,8 +169,8 @@

         int count=0;

         int loss=0;

         int skip=0, extra=0;

-        if ((plc_flags&0x3) == LPCNET_PLC_NONCAUSAL) skip=extra=80;

         LPCNetPLCState *net;

+        if ((plc_flags&0x3) == LPCNET_PLC_NONCAUSAL) skip=extra=80;

         net = lpcnet_plc_create(plc_flags);

         while (1) {

             size_t ret;

@@ -187,6 +191,17 @@

           fwrite(pcm, sizeof(pcm[0]), extra, fout);

         lpcnet_plc_destroy(net);

+    } else if (mode == MODE_ADDLPC) {

+        float features[36];

+        size_t ret;

+        while (1) {

+            ret = fread(features, sizeof(features[0]), 36, fin);

+            if (ret != 36 || feof(fin)) break;

+            lpc_from_cepstrum(&features[20], &features[0]);

+            fwrite(features, sizeof(features[0]), 36, fout);

+        }

     } else {

         fprintf(stderr, "unknown action\n");

--- a/dnn/lpcnet_enc.c

+++ b/dnn/lpcnet_enc.c

@@ -52,7 +52,7 @@

 void vq_quantize_mbest(const float *codebook, int nb_entries, const float *x, int ndim, int mbest, float *dist, int *index)

   int i, j;

-  for (i=0;i<mbest;i++) dist[i] = 1e15;

+  for (i=0;i<mbest;i++) dist[i] = 1e15f;

   for (i=0;i<nb_entries;i++)

@@ -80,7 +80,7 @@

 int vq_quantize(const float *codebook, int nb_entries, const float *x, int ndim, float *dist_out)

   int i, j;

-  float min_dist = 1e15;

+  float min_dist = 1e15f;

   int nearest = 0;

   for (i=0;i<nb_entries;i++)

@@ -242,7 +242,7 @@

 static int find_nearest_multi(const float *codebook, int nb_entries, const float *x, int ndim, float *dist_out, int sign)

   int i, j;

-  float min_dist = 1e15;

+  float min_dist = 1e15f;

   int nearest = 0;

   for (i=0;i<nb_entries;i++)

@@ -290,7 +290,7 @@

     float s = 1;

     nb_entries = 1<<bits;

     RNN_COPY(ref, x, NB_BANDS);

-    for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5*(left[i] + right[i]);

+    for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5f*(left[i] + right[i]);

     for (i=0;i<NB_BANDS;i++) pred[2*NB_BANDS+i] = left[i];

     for (i=0;i<NB_BANDS;i++) pred[3*NB_BANDS+i] = right[i];

     for (i=0;i<4*NB_BANDS;i++) target[i] = x[i%NB_BANDS] - pred[i];

@@ -319,10 +319,10 @@

 int interp_search(const float *x, const float *left, const float *right, float *dist_out)

     int i, k;

-    float min_dist = 1e15;

+    float min_dist = 1e15f;

     int best_pred = 0;

     float pred[4*NB_BANDS];

-    for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5*(left[i] + right[i]);

+    for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5f*(left[i] + right[i]);

     for (i=0;i<NB_BANDS;i++) pred[2*NB_BANDS+i] = left[i];

     for (i=0;i<NB_BANDS;i++) pred[3*NB_BANDS+i] = right[i];

@@ -342,7 +342,7 @@

 void interp_diff(float *x, float *left, float *right, float *codebook, int bits, int sign)

     int i, k;

-    float min_dist = 1e15;

+    float min_dist = 1e15f;

     int best_pred = 0;

     float ref[NB_BANDS];

     float pred[4*NB_BANDS];

@@ -350,7 +350,7 @@

     (void)codebook;

     (void)bits;

     RNN_COPY(ref, x, NB_BANDS);

-    for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5*(left[i] + right[i]);

+    for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5f*(left[i] + right[i]);

     for (i=0;i<NB_BANDS;i++) pred[2*NB_BANDS+i] = left[i];

     for (i=0;i<NB_BANDS;i++) pred[3*NB_BANDS+i] = right[i];

@@ -378,7 +378,7 @@

 int double_interp_search(float features[4][NB_TOTAL_FEATURES], const float *mem) {

     int i, j;

     int best_id=0;

-    float min_dist = 1e15;

+    float min_dist = 1e15f;

     float dist[2][3];

     interp_search(features[0], mem, features[1], dist[0]);

     interp_search(features[2], features[1], features[3], dist[1]);

@@ -410,12 +410,12 @@

     id1 = best_id % 3;

     count = 1;

     if (id0 != 1) {

-        float t = (id0==0) ? .5 : 1.;

+        float t = (id0==0) ? .5f : 1.f;

         for (i=0;i<NB_BANDS;i++) features[1][i] += t*features[0][i];

         count += t;

     if (id1 != 2) {

-        float t = (id1==0) ? .5 : 1.;

+        float t = (id1==0) ? .5f : 1.f;

         for (i=0;i<NB_BANDS;i++) features[1][i] += t*features[2][i];

         count += t;

@@ -511,9 +511,9 @@

   follow = -2;

   for (i=0;i<NB_BANDS;i++) {

     Ly[i] = log10(1e-2+Ex[i]);

-    Ly[i] = MAX16(logMax-8, MAX16(follow-2.5, Ly[i]));

+    Ly[i] = MAX16(logMax-8, MAX16(follow-2.5f, Ly[i]));

     logMax = MAX16(logMax, Ly[i]);

-    follow = MAX16(follow-2.5, Ly[i]);

+    follow = MAX16(follow-2.5f, Ly[i]);

     E += Ex[i];

   dct(st->features[st->pcount], Ly);

@@ -529,7 +529,7 @@

       sum += st->lpc[j]*st->pitch_mem[j];

     RNN_MOVE(st->pitch_mem+1, st->pitch_mem, LPC_ORDER-1);

     st->pitch_mem[0] = aligned_in[i];

-    st->exc_buf[PITCH_MAX_PERIOD+i] = sum + .7*st->pitch_filt;

+    st->exc_buf[PITCH_MAX_PERIOD+i] = sum + .7f*st->pitch_filt;

     st->pitch_filt = sum;

     /*printf("%f\n", st->exc_buf[PITCH_MAX_PERIOD+i]);*/

@@ -548,7 +548,7 @@

       /* Upsample correlation by 3x and keep the max. */

       float interpolated[PITCH_MAX_PERIOD]={0};

       /* interp=sinc([-3:3]+1/3).*(.5+.5*cos(pi*[-3:3]/4.5)); interp=interp/sum(interp); */

-      static const float interp[7] = {0.026184, -0.098339, 0.369938, 0.837891, -0.184969, 0.070242, -0.020947};

+      static const float interp[7] = {0.026184f, -0.098339f, 0.369938f, 0.837891f, -0.184969f, 0.070242f, -0.020947f};

       for (i=4;i<PITCH_MAX_PERIOD-4;i++) {

         float val1=0, val2=0;

         int j;

@@ -582,7 +582,7 @@

   float sx=0, sxx=0, sxy=0, sy=0, sw=0;

   float frame_corr;

   int voiced;

-  float frame_weight_sum = 1e-15;

+  float frame_weight_sum = 1e-15f;

   float center_pitch;

   int main_pitch;

   int modulation;

@@ -594,11 +594,11 @@

   for(sub=0;sub<8;sub++) frame_weight_sum += st->frame_weight[2+sub];

   for(sub=0;sub<8;sub++) st->frame_weight[2+sub] *= (8.f/frame_weight_sum);

   for(sub=0;sub<8;sub++) {

-    float max_path_all = -1e15;

+    float max_path_all = -1e15f;

     best_i = 0;

     for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) {

       float xc_half = MAX16(MAX16(st->xc[2+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+sub][(PITCH_MAX_PERIOD+i-1)/2]);

-      if (st->xc[2+sub][i] < xc_half*1.1) st->xc[2+sub][i] *= .8;

+      if (st->xc[2+sub][i] < xc_half*1.1f) st->xc[2+sub][i] *= .8f;

     for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {

       int j;

@@ -666,7 +666,7 @@

   /*best_b = (sxx*sy - sx*sxy)/(sw*sxx - sx*sx);*/

   best_b = (sy - best_a*sx)/sw;

   /* Quantizing the pitch as "main" pitch + slope. */

-  center_pitch = best_b+5.5*best_a;

+  center_pitch = best_b+5.5f*best_a;

   main_pitch = (int)floor(.5 + 21.*log2(center_pitch/PITCH_MIN_PERIOD));

   main_pitch = IMAX(0, IMIN(63, main_pitch));

   modulation = (int)floor(.5 + 16*7*best_a/center_pitch);

@@ -677,13 +677,13 @@

   for (sub=0;sub<4;sub++) {

     if (quantize) {

       float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;

-      p *= 1 + modulation/16./7.*(2*sub-3);

+      p *= 1.f + modulation/16.f/7.f*(2*sub-3);

       p = MIN16(255, MAX16(33, p));

-      st->features[sub][NB_BANDS] = .02*(p-100);

-      st->features[sub][NB_BANDS + 1] = frame_corr-.5;

+      st->features[sub][NB_BANDS] = .02f*(p-100);

+      st->features[sub][NB_BANDS + 1] = frame_corr-.5f;

     } else {

-      st->features[sub][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);

-      st->features[sub][NB_BANDS + 1] = frame_corr-.5;

+      st->features[sub][NB_BANDS] = .01f*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);

+      st->features[sub][NB_BANDS + 1] = frame_corr-.5f;

     /*printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);*/

@@ -694,7 +694,7 @@

     /*printf("%f\n", st->features[3][0]);*/

     c0_id = (int)floor(.5 + st->features[3][0]*4);

     c0_id = IMAX(-64, IMIN(63, c0_id));

-    st->features[3][0] = c0_id/4.;

+    st->features[3][0] = c0_id/4.f;

     quantize_3stage_mbest(&st->features[3][1], vq_end);

     /*perform_interp_relaxation(st->features, st->vq_mem);*/

     quantize_diff(&st->features[1][0], st->vq_mem, &st->features[3][0], ceps_codebook_diff4, 12, 1, &vq_mid);

@@ -736,15 +736,15 @@

   int best[10];

   int pitch_prev[8][PITCH_MAX_PERIOD];

   float frame_corr;

-  float frame_weight_sum = 1e-15;

+  float frame_weight_sum = 1e-15f;

   for(sub=0;sub<8;sub++) frame_weight_sum += st->frame_weight[2+sub];

   for(sub=0;sub<8;sub++) st->frame_weight[2+sub] *= (8.f/frame_weight_sum);

   for(sub=0;sub<8;sub++) {

-    float max_path_all = -1e15;

+    float max_path_all = -1e15f;

     best_i = 0;

     for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) {

       float xc_half = MAX16(MAX16(st->xc[2+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+sub][(PITCH_MAX_PERIOD+i-1)/2]);

-      if (st->xc[2+sub][i] < xc_half*1.1) st->xc[2+sub][i] *= .8;

+      if (st->xc[2+sub][i] < xc_half*1.1) st->xc[2+sub][i] *= .8f;

     for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {

       int j;

@@ -781,8 +781,8 @@

   frame_corr /= 8;

   for (sub=0;sub<4;sub++) {

-    st->features[sub][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);

-    st->features[sub][NB_BANDS + 1] = frame_corr-.5;

+    st->features[sub][NB_BANDS] = .01f*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);

+    st->features[sub][NB_BANDS + 1] = frame_corr-.5f;

     /*printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);*/

   /*printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);*/

@@ -804,15 +804,15 @@

   int best[4];

   int pitch_prev[2][PITCH_MAX_PERIOD];

   float frame_corr;

-  float frame_weight_sum = 1e-15;

+  float frame_weight_sum = 1e-15f;

   for(sub=0;sub<2;sub++) frame_weight_sum += st->frame_weight[2+2*st->pcount+sub];

   for(sub=0;sub<2;sub++) st->frame_weight[2+2*st->pcount+sub] *= (2.f/frame_weight_sum);

   for(sub=0;sub<2;sub++) {

-    float max_path_all = -1e15;

+    float max_path_all = -1e15f;

     best_i = 0;

     for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) {

       float xc_half = MAX16(MAX16(st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i-1)/2]);

-      if (st->xc[2+2*st->pcount+sub][i] < xc_half*1.1) st->xc[2+2*st->pcount+sub][i] *= .8;

+      if (st->xc[2+2*st->pcount+sub][i] < xc_half*1.1f) st->xc[2+2*st->pcount+sub][i] *= .8f;

     for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {

       int j;

@@ -848,8 +848,8 @@

     best_i = pitch_prev[sub][best_i];

   frame_corr /= 2;

-  st->features[st->pcount][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2]+best[3]))-200);

-  st->features[st->pcount][NB_BANDS + 1] = frame_corr-.5;

+  st->features[st->pcount][NB_BANDS] = .01f*(IMAX(66, IMIN(510, best[2]+best[3]))-200);

+  st->features[st->pcount][NB_BANDS + 1] = frame_corr-.5f;

   if (ffeat) {

     fwrite(st->features[st->pcount], sizeof(float), NB_TOTAL_FEATURES, ffeat);

--- a/dnn/lpcnet_plc.c

+++ b/dnn/lpcnet_plc.c

@@ -32,11 +32,19 @@

 #include "lpcnet.h"

 #include "plc_data.h"

+#ifndef M_PI

+#define M_PI 3.141592653

+#endif

+/* Comment this out to have LPCNet update its state on every good packet (slow). */

+#define PLC_SKIP_UPDATES

 LPCNET_EXPORT int lpcnet_plc_get_size() {

   return sizeof(LPCNetPLCState);

 LPCNET_EXPORT int lpcnet_plc_init(LPCNetPLCState *st, int options) {

+  int ret;

   RNN_CLEAR(st, 1);

   lpcnet_init(&st->lpcnet);

   lpcnet_encoder_init(&st->enc);

@@ -60,7 +68,9 @@

     return -1;

   st->remove_dc = !!(options&LPCNET_PLC_DC_FILTER);

-  return 0;

+  ret = init_plc_model(&st->model, lpcnet_plc_arrays);

+  celt_assert(ret == 0);

+  return ret;

 LPCNET_EXPORT LPCNetPLCState *lpcnet_plc_create(int options) {

@@ -75,6 +85,10 @@

 void lpcnet_plc_fec_add(LPCNetPLCState *st, const float *features) {

+  if (features == NULL) {

+    st->fec_skip++;

+    return;

+  }

   if (st->fec_fill_pos == PLC_MAX_FEC) {

     if (st->fec_keep_pos == 0) {

       fprintf(stderr, "FEC buffer full\n");

@@ -89,28 +103,40 @@

   st->fec_fill_pos++;

-static void compute_plc_pred(PLCNetState *net, float *out, const float *in) {

+void lpcnet_plc_fec_clear(LPCNetPLCState *st) {

+  st->fec_keep_pos = st->fec_read_pos = st->fec_fill_pos = st-> fec_skip = 0;

+}

+static void compute_plc_pred(LPCNetPLCState *st, float *out, const float *in) {

   float zeros[3*PLC_MAX_RNN_NEURONS] = {0};

   float dense_out[PLC_DENSE1_OUT_SIZE];

-  _lpcnet_compute_dense(&plc_dense1, dense_out, in);

-  compute_gruB(&plc_gru1, zeros, net->plc_gru1_state, dense_out);

-  compute_gruB(&plc_gru2, zeros, net->plc_gru2_state, net->plc_gru1_state);

-  _lpcnet_compute_dense(&plc_out, out, net->plc_gru2_state);

+  PLCNetState *net = &st->plc_net;

+  _lpcnet_compute_dense(&st->model.plc_dense1, dense_out, in);

+  compute_gruB(&st->model.plc_gru1, zeros, net->plc_gru1_state, dense_out);

+  compute_gruB(&st->model.plc_gru2, zeros, net->plc_gru2_state, net->plc_gru1_state);

+  _lpcnet_compute_dense(&st->model.plc_out, out, net->plc_gru2_state);

   /* Artificially boost the correlation to make harmonics cleaner. */

   out[19] = MIN16(.5f, out[19]+.1f);

 static int get_fec_or_pred(LPCNetPLCState *st, float *out) {

-  if (st->fec_read_pos != st->fec_fill_pos) {

+  if (st->fec_read_pos != st->fec_fill_pos && st->fec_skip==0) {

+    float plc_features[2*NB_BANDS+NB_FEATURES+1] = {0};

+    float discard[NB_FEATURES];

     RNN_COPY(out, &st->fec[st->fec_read_pos][0], NB_FEATURES);

     st->fec_read_pos++;

     /* Make sure we can rewind a few frames back at resync time. */

     st->fec_keep_pos = IMAX(0, IMAX(st->fec_keep_pos, st->fec_read_pos-FEATURES_DELAY-1));

-    /* FIXME: Figure out how to update compute_plc_pred() without Burg features. */

+    /* Update PLC state using FEC, so without Burg features. */

+    RNN_COPY(&plc_features[2*NB_BANDS], out, NB_FEATURES);

+    plc_features[2*NB_BANDS+NB_FEATURES] = -1;

+    compute_plc_pred(st, discard, plc_features);

     return 1;

   } else {

     float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};

-    compute_plc_pred(&st->plc_net, out, zeros);

+    compute_plc_pred(st, out, zeros);

+    if (st->fec_skip > 0) st->fec_skip--;

     return 0;

@@ -119,13 +145,12 @@

   st->fec_read_pos -= offset;

   if (st->fec_read_pos < st->fec_keep_pos) {

     st->fec_read_pos = st->fec_keep_pos;

-    fprintf(stderr, "cannot rewind\n");

 void clear_state(LPCNetPLCState *st) {

   RNN_CLEAR(st->lpcnet.last_sig, LPC_ORDER);

-  st->lpcnet.last_exc = lin2ulaw(0.f);;

+  st->lpcnet.last_exc = lin2ulaw(0.f);

   st->lpcnet.deemph_mem = 0;

   RNN_CLEAR(st->lpcnet.nnet.gru_a_state, GRU_A_STATE_SIZE);

   RNN_CLEAR(st->lpcnet.nnet.gru_b_state, GRU_B_STATE_SIZE);

@@ -163,22 +188,14 @@

       float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};

       RNN_COPY(zeros, plc_features, 2*NB_BANDS);

       zeros[2*NB_BANDS+NB_FEATURES] = 1;

-      if (st->fec_active) {

-        if (FEATURES_DELAY > 0) st->plc_net = st->plc_copy[FEATURES_DELAY-1];

-        fec_rewind(st, FEATURES_DELAY);

-      } else {

+      if (st->enable_blending) {

+        LPCNetState copy;

         st->plc_net = st->plc_copy[FEATURES_DELAY];

-        compute_plc_pred(&st->plc_net, st->features, zeros);

+        compute_plc_pred(st, st->features, zeros);

         for (i=0;i<FEATURES_DELAY;i++) {

-          float lpc[LPC_ORDER];

-          float gru_a_condition[3*GRU_A_STATE_SIZE];

-          float gru_b_condition[3*GRU_B_STATE_SIZE];

           /* FIXME: backtrack state, replace features. */

-          run_frame_network(&st->lpcnet, gru_a_condition, gru_b_condition, lpc, st->features);

+          run_frame_network_deferred(&st->lpcnet, st->features);

-      }

-      if (st->enable_blending) {

-        LPCNetState copy;

         copy = st->lpcnet;

         lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], tmp, FRAME_SIZE-TRAINING_OFFSET, 0);

         for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) {

@@ -189,8 +206,14 @@

         st->lpcnet = copy;

         lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], pcm, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET);

       } else {

+        if (FEATURES_DELAY > 0) st->plc_net = st->plc_copy[FEATURES_DELAY-1];

+        fec_rewind(st, FEATURES_DELAY);

+#ifdef PLC_SKIP_UPDATES

+        lpcnet_reset_signal(&st->lpcnet);

+#else

         RNN_COPY(tmp, pcm, FRAME_SIZE-TRAINING_OFFSET);

         lpcnet_synthesize_tail_impl(&st->lpcnet, tmp, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET);

+#endif

       RNN_COPY(st->pcm, &pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);

       st->pcm_fill = TRAINING_OFFSET;

@@ -208,24 +231,28 @@

   if (!st->blend) {

     RNN_COPY(&plc_features[2*NB_BANDS], st->enc.features[0], NB_FEATURES);

     plc_features[2*NB_BANDS+NB_FEATURES] = 1;

-    compute_plc_pred(&st->plc_net, st->features, plc_features);

+    compute_plc_pred(st, st->features, plc_features);

     /* Discard an FEC frame that we know we will no longer need. */

-    if (st->fec_read_pos < st->fec_fill_pos) st->fec_read_pos++;

+    if (st->fec_skip) st->fec_skip--;

+    else if (st->fec_read_pos < st->fec_fill_pos) st->fec_read_pos++;

     st->fec_keep_pos = IMAX(0, IMAX(st->fec_keep_pos, st->fec_read_pos-FEATURES_DELAY-1));

   if (st->skip_analysis) {

-    if (!st->fec_active) {

-      float lpc[LPC_ORDER];

-      float gru_a_condition[3*GRU_A_STATE_SIZE];

-      float gru_b_condition[3*GRU_B_STATE_SIZE];

+    if (st->enable_blending) {

       /* FIXME: backtrack state, replace features. */

-      run_frame_network(&st->lpcnet, gru_a_condition, gru_b_condition, lpc, st->enc.features[0]);

+      run_frame_network_deferred(&st->lpcnet, st->enc.features[0]);

     st->skip_analysis--;

   } else {

     for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE+i] = pcm[i];

     RNN_COPY(output, &st->pcm[0], FRAME_SIZE);

+#ifdef PLC_SKIP_UPDATES

+    {

+      run_frame_network_deferred(&st->lpcnet, st->enc.features[0]);

+    }

+#else

     lpcnet_synthesize_impl(&st->lpcnet, st->enc.features[0], output, FRAME_SIZE, FRAME_SIZE);

+#endif

     RNN_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE);

   st->loss_count = 0;

@@ -235,7 +262,6 @@

   st->blend = 0;

-  st->fec_active = 0;

   return 0;

@@ -243,6 +269,7 @@

 static int lpcnet_plc_conceal_causal(LPCNetPLCState *st, short *pcm) {

   int i;

   short output[FRAME_SIZE];

+  run_frame_network_flush(&st->lpcnet);

   st->enc.pcount = 0;

   /* If we concealed the previous frame, finish synthesizing the rest of the samples. */

   /* FIXME: Copy/predict features. */

@@ -253,7 +280,7 @@

     RNN_COPY(output, &st->pcm[0], update_count);

     RNN_MOVE(&st->plc_copy[1], &st->plc_copy[0], FEATURES_DELAY);

     st->plc_copy[0] = st->plc_net;

-    st->fec_active = get_fec_or_pred(st, st->features);

+    get_fec_or_pred(st, st->features);

     lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], output, update_count, update_count);

     RNN_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE);

     st->pcm_fill -= update_count;

@@ -262,10 +289,10 @@

   RNN_MOVE(&st->plc_copy[1], &st->plc_copy[0], FEATURES_DELAY);

   st->plc_copy[0] = st->plc_net;

   lpcnet_synthesize_tail_impl(&st->lpcnet, pcm, FRAME_SIZE-TRAINING_OFFSET, 0);

-  st->fec_active = get_fec_or_pred(st, st->features);

+  if (get_fec_or_pred(st, st->features)) st->loss_count = 0;

+  else st->loss_count++;

   if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9));

   else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]);

-  //if (st->loss_count > 4) st->features[NB_FEATURES-1] = MAX16(-.5, st->features[NB_FEATURES-1]-.1*(st->loss_count-4));

   lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], &pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, 0);

     float x[FRAME_SIZE];

@@ -275,7 +302,6 @@

     compute_frame_features(&st->enc, x);

     process_single_frame(&st->enc, NULL);

-  if (!st->fec_active) st->loss_count++;

   st->blend = 1;

   if (st->remove_dc) {

     for (i=0;i<FRAME_SIZE;i++) {

@@ -330,7 +356,7 @@

     float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};

     RNN_COPY(zeros, plc_features, 2*NB_BANDS);

     zeros[2*NB_BANDS+NB_FEATURES] = 1;

-    compute_plc_pred(&st->plc_net, st->features, zeros);

+    compute_plc_pred(st, st->features, zeros);

     copy = st->lpcnet;

     lpcnet_synthesize_impl(&st->lpcnet, st->features, &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, 0);

     /* Undo initial DC offset removal so that we can take into account the last 5ms of synthesis. */

@@ -383,7 +409,7 @@

   if (st->loss_count == 0) {

     RNN_COPY(&plc_features[2*NB_BANDS], st->enc.features[0], NB_FEATURES);

     plc_features[2*NB_BANDS+NB_FEATURES] = 1;

-    compute_plc_pred(&st->plc_net, st->features, plc_features);

+    compute_plc_pred(st, st->features, plc_features);

     lpcnet_synthesize_impl(&st->lpcnet, st->enc.features[0], &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, TRAINING_OFFSET);

     lpcnet_synthesize_tail_impl(&st->lpcnet, pcm, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET);

@@ -406,10 +432,9 @@

   process_queued_update(st);

   st->enc.pcount = 0;

-  compute_plc_pred(&st->plc_net, st->features, zeros);

+  compute_plc_pred(st, st->features, zeros);

   if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9));

   else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]);

-  //if (st->loss_count > 4) st->features[NB_FEATURES-1] = MAX16(-.5, st->features[NB_FEATURES-1]-.1*(st->loss_count-4));

   if (st->loss_count == 0) {

     RNN_COPY(pcm, &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);

--- a/dnn/lpcnet_private.h

+++ b/dnn/lpcnet_private.h

@@ -23,11 +23,14 @@

 #define FORBIDDEN_INTERP 7

 #define PLC_MAX_FEC 100

+#define MAX_FEATURE_BUFFER_SIZE 4

 struct LPCNetState {

     NNetState nnet;

     int last_exc;

     float last_sig[LPC_ORDER];

+    float feature_buffer[NB_FEATURES*MAX_FEATURE_BUFFER_SIZE];

+    int feature_buffer_fill;

     float last_features[NB_FEATURES];

 #if FEATURES_DELAY>0

     float old_lpc[FEATURES_DELAY][LPC_ORDER];

@@ -39,6 +42,7 @@

     float deemph_mem;

     float lpc[LPC_ORDER];

     kiss99_ctx rng;

+    LPCNetModel model;

};

 struct LPCNetDecState {

@@ -76,7 +80,7 @@

   int fec_keep_pos;

   int fec_read_pos;

   int fec_fill_pos;

-  int fec_active;

+  int fec_skip;

   short pcm[PLC_BUF_SIZE+FRAME_SIZE];

   int pcm_fill;

   int skip_analysis;

@@ -94,6 +98,7 @@

   short dc_buf[TRAINING_OFFSET];

   int queued_update;

   short queued_samples[FRAME_SIZE];

+  PLCModel model;

};

 extern float ceps_codebook1[];

@@ -111,7 +116,12 @@

 void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, const unsigned char buf[8]);

+void lpcnet_reset_signal(LPCNetState *lpcnet);

 void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features);

+void run_frame_network_deferred(LPCNetState *lpcnet, const float *features);

+void run_frame_network_flush(LPCNetState *lpcnet);

 void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, short *output, int N, int preload);

 void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, short *output, int N, int preload);

 void lpcnet_synthesize_blend_impl(LPCNetState *lpcnet, const short *pcm_in, short *output, int N);

--- /dev/null

+++ b/dnn/lpcnet_tables.c

@@ -1,0 +1,307 @@

+/* The contents of this file was automatically generated by dump_lpcnet_tables.c*/

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include "kiss_fft.h"

+static const arch_fft_state arch_fft = {0, NULL};

+static const opus_int16 fft_bitrev[320] = {

+0, 64, 128, 192, 256, 16, 80, 144, 208, 272, 32, 96, 160, 224, 288,

+48, 112, 176, 240, 304, 4, 68, 132, 196, 260, 20, 84, 148, 212, 276,

+36, 100, 164, 228, 292, 52, 116, 180, 244, 308, 8, 72, 136, 200, 264,

+24, 88, 152, 216, 280, 40, 104, 168, 232, 296, 56, 120, 184, 248, 312,

+12, 76, 140, 204, 268, 28, 92, 156, 220, 284, 44, 108, 172, 236, 300,

+60, 124, 188, 252, 316, 1, 65, 129, 193, 257, 17, 81, 145, 209, 273,

+33, 97, 161, 225, 289, 49, 113, 177, 241, 305, 5, 69, 133, 197, 261,

+21, 85, 149, 213, 277, 37, 101, 165, 229, 293, 53, 117, 181, 245, 309,

+9, 73, 137, 201, 265, 25, 89, 153, 217, 281, 41, 105, 169, 233, 297,

+57, 121, 185, 249, 313, 13, 77, 141, 205, 269, 29, 93, 157, 221, 285,

+45, 109, 173, 237, 301, 61, 125, 189, 253, 317, 2, 66, 130, 194, 258,

+18, 82, 146, 210, 274, 34, 98, 162, 226, 290, 50, 114, 178, 242, 306,

+6, 70, 134, 198, 262, 22, 86, 150, 214, 278, 38, 102, 166, 230, 294,

+54, 118, 182, 246, 310, 10, 74, 138, 202, 266, 26, 90, 154, 218, 282,

+42, 106, 170, 234, 298, 58, 122, 186, 250, 314, 14, 78, 142, 206, 270,

+30, 94, 158, 222, 286, 46, 110, 174, 238, 302, 62, 126, 190, 254, 318,

+3, 67, 131, 195, 259, 19, 83, 147, 211, 275, 35, 99, 163, 227, 291,

+51, 115, 179, 243, 307, 7, 71, 135, 199, 263, 23, 87, 151, 215, 279,

+39, 103, 167, 231, 295, 55, 119, 183, 247, 311, 11, 75, 139, 203, 267,

+27, 91, 155, 219, 283, 43, 107, 171, 235, 299, 59, 123, 187, 251, 315,

+15, 79, 143, 207, 271, 31, 95, 159, 223, 287, 47, 111, 175, 239, 303,

+63, 127, 191, 255, 319, };

+static const kiss_twiddle_cpx fft_twiddles[320] = {

+{1.00000000f, -0.00000000f}, {0.999807239f, -0.0196336918f},

+{0.999229014f, -0.0392598175f}, {0.998265624f, -0.0588708036f},

+{0.996917307f, -0.0784590989f}, {0.995184720f, -0.0980171412f},

+{0.993068457f, -0.117537394f}, {0.990569353f, -0.137012348f},

+{0.987688363f, -0.156434461f}, {0.984426558f, -0.175796285f},

+{0.980785251f, -0.195090324f}, {0.976765871f, -0.214309156f},

+{0.972369909f, -0.233445361f}, {0.967599094f, -0.252491564f},

+{0.962455213f, -0.271440446f}, {0.956940353f, -0.290284663f},

+{0.951056540f, -0.309017003f}, {0.944806039f, -0.327630192f},

+{0.938191354f, -0.346117049f}, {0.931214929f, -0.364470512f},

+{0.923879504f, -0.382683426f}, {0.916187942f, -0.400748819f},

+{0.908143163f, -0.418659747f}, {0.899748266f, -0.436409235f},

+{0.891006529f, -0.453990489f}, {0.881921291f, -0.471396744f},

+{0.872496009f, -0.488621235f}, {0.862734377f, -0.505657375f},

+{0.852640152f, -0.522498548f}, {0.842217207f, -0.539138317f},

+{0.831469595f, -0.555570245f}, {0.820401430f, -0.571787953f},

+{0.809017003f, -0.587785244f}, {0.797320664f, -0.603555918f},

+{0.785316944f, -0.619093955f}, {0.773010433f, -0.634393275f},

+{0.760405958f, -0.649448037f}, {0.747508347f, -0.664252460f},

+{0.734322488f, -0.678800762f}, {0.720853567f, -0.693087339f},

+{0.707106769f, -0.707106769f}, {0.693087339f, -0.720853567f},

+{0.678800762f, -0.734322488f}, {0.664252460f, -0.747508347f},

+{0.649448037f, -0.760405958f}, {0.634393275f, -0.773010433f},

+{0.619093955f, -0.785316944f}, {0.603555918f, -0.797320664f},

+{0.587785244f, -0.809017003f}, {0.571787953f, -0.820401430f},

+{0.555570245f, -0.831469595f}, {0.539138317f, -0.842217207f},

+{0.522498548f, -0.852640152f}, {0.505657375f, -0.862734377f},

+{0.488621235f, -0.872496009f}, {0.471396744f, -0.881921291f},

+{0.453990489f, -0.891006529f}, {0.436409235f, -0.899748266f},

+{0.418659747f, -0.908143163f}, {0.400748819f, -0.916187942f},

+{0.382683426f, -0.923879504f}, {0.364470512f, -0.931214929f},

+{0.346117049f, -0.938191354f}, {0.327630192f, -0.944806039f},

+{0.309017003f, -0.951056540f}, {0.290284663f, -0.956940353f},

+{0.271440446f, -0.962455213f}, {0.252491564f, -0.967599094f},

+{0.233445361f, -0.972369909f}, {0.214309156f, -0.976765871f},

+{0.195090324f, -0.980785251f}, {0.175796285f, -0.984426558f},

+{0.156434461f, -0.987688363f}, {0.137012348f, -0.990569353f},

+{0.117537394f, -0.993068457f}, {0.0980171412f, -0.995184720f},

+{0.0784590989f, -0.996917307f}, {0.0588708036f, -0.998265624f},

+{0.0392598175f, -0.999229014f}, {0.0196336918f, -0.999807239f},

+{6.12323426e-17f, -1.00000000f}, {-0.0196336918f, -0.999807239f},

+{-0.0392598175f, -0.999229014f}, {-0.0588708036f, -0.998265624f},

+{-0.0784590989f, -0.996917307f}, {-0.0980171412f, -0.995184720f},

+{-0.117537394f, -0.993068457f}, {-0.137012348f, -0.990569353f},

+{-0.156434461f, -0.987688363f}, {-0.175796285f, -0.984426558f},

+{-0.195090324f, -0.980785251f}, {-0.214309156f, -0.976765871f},

+{-0.233445361f, -0.972369909f}, {-0.252491564f, -0.967599094f},

+{-0.271440446f, -0.962455213f}, {-0.290284663f, -0.956940353f},

+{-0.309017003f, -0.951056540f}, {-0.327630192f, -0.944806039f},

+{-0.346117049f, -0.938191354f}, {-0.364470512f, -0.931214929f},

+{-0.382683426f, -0.923879504f}, {-0.400748819f, -0.916187942f},

+{-0.418659747f, -0.908143163f}, {-0.436409235f, -0.899748266f},

+{-0.453990489f, -0.891006529f}, {-0.471396744f, -0.881921291f},

+{-0.488621235f, -0.872496009f}, {-0.505657375f, -0.862734377f},

+{-0.522498548f, -0.852640152f}, {-0.539138317f, -0.842217207f},

+{-0.555570245f, -0.831469595f}, {-0.571787953f, -0.820401430f},

+{-0.587785244f, -0.809017003f}, {-0.603555918f, -0.797320664f},

+{-0.619093955f, -0.785316944f}, {-0.634393275f, -0.773010433f},

+{-0.649448037f, -0.760405958f}, {-0.664252460f, -0.747508347f},

+{-0.678800762f, -0.734322488f}, {-0.693087339f, -0.720853567f},

+{-0.707106769f, -0.707106769f}, {-0.720853567f, -0.693087339f},

+{-0.734322488f, -0.678800762f}, {-0.747508347f, -0.664252460f},

+{-0.760405958f, -0.649448037f}, {-0.773010433f, -0.634393275f},

+{-0.785316944f, -0.619093955f}, {-0.797320664f, -0.603555918f},

+{-0.809017003f, -0.587785244f}, {-0.820401430f, -0.571787953f},

+{-0.831469595f, -0.555570245f}, {-0.842217207f, -0.539138317f},

+{-0.852640152f, -0.522498548f}, {-0.862734377f, -0.505657375f},

+{-0.872496009f, -0.488621235f}, {-0.881921291f, -0.471396744f},

+{-0.891006529f, -0.453990489f}, {-0.899748266f, -0.436409235f},

+{-0.908143163f, -0.418659747f}, {-0.916187942f, -0.400748819f},

+{-0.923879504f, -0.382683426f}, {-0.931214929f, -0.364470512f},

+{-0.938191354f, -0.346117049f}, {-0.944806039f, -0.327630192f},

+{-0.951056540f, -0.309017003f}, {-0.956940353f, -0.290284663f},

+{-0.962455213f, -0.271440446f}, {-0.967599094f, -0.252491564f},

+{-0.972369909f, -0.233445361f}, {-0.976765871f, -0.214309156f},

+{-0.980785251f, -0.195090324f}, {-0.984426558f, -0.175796285f},

+{-0.987688363f, -0.156434461f}, {-0.990569353f, -0.137012348f},

+{-0.993068457f, -0.117537394f}, {-0.995184720f, -0.0980171412f},

+{-0.996917307f, -0.0784590989f}, {-0.998265624f, -0.0588708036f},

+{-0.999229014f, -0.0392598175f}, {-0.999807239f, -0.0196336918f},

+{-1.00000000f, -1.22464685e-16f}, {-0.999807239f, 0.0196336918f},

+{-0.999229014f, 0.0392598175f}, {-0.998265624f, 0.0588708036f},

+{-0.996917307f, 0.0784590989f}, {-0.995184720f, 0.0980171412f},

+{-0.993068457f, 0.117537394f}, {-0.990569353f, 0.137012348f},

+{-0.987688363f, 0.156434461f}, {-0.984426558f, 0.175796285f},

+{-0.980785251f, 0.195090324f}, {-0.976765871f, 0.214309156f},

+{-0.972369909f, 0.233445361f}, {-0.967599094f, 0.252491564f},

+{-0.962455213f, 0.271440446f}, {-0.956940353f, 0.290284663f},

+{-0.951056540f, 0.309017003f}, {-0.944806039f, 0.327630192f},

+{-0.938191354f, 0.346117049f}, {-0.931214929f, 0.364470512f},

+{-0.923879504f, 0.382683426f}, {-0.916187942f, 0.400748819f},

+{-0.908143163f, 0.418659747f}, {-0.899748266f, 0.436409235f},

+{-0.891006529f, 0.453990489f}, {-0.881921291f, 0.471396744f},

+{-0.872496009f, 0.488621235f}, {-0.862734377f, 0.505657375f},

+{-0.852640152f, 0.522498548f}, {-0.842217207f, 0.539138317f},

+{-0.831469595f, 0.555570245f}, {-0.820401430f, 0.571787953f},

+{-0.809017003f, 0.587785244f}, {-0.797320664f, 0.603555918f},

+{-0.785316944f, 0.619093955f}, {-0.773010433f, 0.634393275f},

+{-0.760405958f, 0.649448037f}, {-0.747508347f, 0.664252460f},

+{-0.734322488f, 0.678800762f}, {-0.720853567f, 0.693087339f},

+{-0.707106769f, 0.707106769f}, {-0.693087339f, 0.720853567f},

+{-0.678800762f, 0.734322488f}, {-0.664252460f, 0.747508347f},

+{-0.649448037f, 0.760405958f}, {-0.634393275f, 0.773010433f},

+{-0.619093955f, 0.785316944f}, {-0.603555918f, 0.797320664f},

+{-0.587785244f, 0.809017003f}, {-0.571787953f, 0.820401430f},

+{-0.555570245f, 0.831469595f}, {-0.539138317f, 0.842217207f},

+{-0.522498548f, 0.852640152f}, {-0.505657375f, 0.862734377f},

+{-0.488621235f, 0.872496009f}, {-0.471396744f, 0.881921291f},

+{-0.453990489f, 0.891006529f}, {-0.436409235f, 0.899748266f},

+{-0.418659747f, 0.908143163f}, {-0.400748819f, 0.916187942f},

+{-0.382683426f, 0.923879504f}, {-0.364470512f, 0.931214929f},

+{-0.346117049f, 0.938191354f}, {-0.327630192f, 0.944806039f},

+{-0.309017003f, 0.951056540f}, {-0.290284663f, 0.956940353f},

+{-0.271440446f, 0.962455213f}, {-0.252491564f, 0.967599094f},

+{-0.233445361f, 0.972369909f}, {-0.214309156f, 0.976765871f},

+{-0.195090324f, 0.980785251f}, {-0.175796285f, 0.984426558f},

+{-0.156434461f, 0.987688363f}, {-0.137012348f, 0.990569353f},

+{-0.117537394f, 0.993068457f}, {-0.0980171412f, 0.995184720f},

+{-0.0784590989f, 0.996917307f}, {-0.0588708036f, 0.998265624f},

+{-0.0392598175f, 0.999229014f}, {-0.0196336918f, 0.999807239f},

+{-1.83697015e-16f, 1.00000000f}, {0.0196336918f, 0.999807239f},

+{0.0392598175f, 0.999229014f}, {0.0588708036f, 0.998265624f},

+{0.0784590989f, 0.996917307f}, {0.0980171412f, 0.995184720f},

+{0.117537394f, 0.993068457f}, {0.137012348f, 0.990569353f},

+{0.156434461f, 0.987688363f}, {0.175796285f, 0.984426558f},

+{0.195090324f, 0.980785251f}, {0.214309156f, 0.976765871f},

+{0.233445361f, 0.972369909f}, {0.252491564f, 0.967599094f},

+{0.271440446f, 0.962455213f}, {0.290284663f, 0.956940353f},

+{0.309017003f, 0.951056540f}, {0.327630192f, 0.944806039f},

+{0.346117049f, 0.938191354f}, {0.364470512f, 0.931214929f},

+{0.382683426f, 0.923879504f}, {0.400748819f, 0.916187942f},

+{0.418659747f, 0.908143163f}, {0.436409235f, 0.899748266f},

+{0.453990489f, 0.891006529f}, {0.471396744f, 0.881921291f},

+{0.488621235f, 0.872496009f}, {0.505657375f, 0.862734377f},

+{0.522498548f, 0.852640152f}, {0.539138317f, 0.842217207f},

+{0.555570245f, 0.831469595f}, {0.571787953f, 0.820401430f},

+{0.587785244f, 0.809017003f}, {0.603555918f, 0.797320664f},

+{0.619093955f, 0.785316944f}, {0.634393275f, 0.773010433f},

+{0.649448037f, 0.760405958f}, {0.664252460f, 0.747508347f},

+{0.678800762f, 0.734322488f}, {0.693087339f, 0.720853567f},

+{0.707106769f, 0.707106769f}, {0.720853567f, 0.693087339f},

+{0.734322488f, 0.678800762f}, {0.747508347f, 0.664252460f},

+{0.760405958f, 0.649448037f}, {0.773010433f, 0.634393275f},

+{0.785316944f, 0.619093955f}, {0.797320664f, 0.603555918f},

+{0.809017003f, 0.587785244f}, {0.820401430f, 0.571787953f},

+{0.831469595f, 0.555570245f}, {0.842217207f, 0.539138317f},

+{0.852640152f, 0.522498548f}, {0.862734377f, 0.505657375f},

+{0.872496009f, 0.488621235f}, {0.881921291f, 0.471396744f},

+{0.891006529f, 0.453990489f}, {0.899748266f, 0.436409235f},

+{0.908143163f, 0.418659747f}, {0.916187942f, 0.400748819f},

+{0.923879504f, 0.382683426f}, {0.931214929f, 0.364470512f},

+{0.938191354f, 0.346117049f}, {0.944806039f, 0.327630192f},

+{0.951056540f, 0.309017003f}, {0.956940353f, 0.290284663f},

+{0.962455213f, 0.271440446f}, {0.967599094f, 0.252491564f},

+{0.972369909f, 0.233445361f}, {0.976765871f, 0.214309156f},

+{0.980785251f, 0.195090324f}, {0.984426558f, 0.175796285f},

+{0.987688363f, 0.156434461f}, {0.990569353f, 0.137012348f},

+{0.993068457f, 0.117537394f}, {0.995184720f, 0.0980171412f},

+{0.996917307f, 0.0784590989f}, {0.998265624f, 0.0588708036f},

+{0.999229014f, 0.0392598175f}, {0.999807239f, 0.0196336918f},

+};

+const kiss_fft_state kfft = {

+320, /* nfft */

+0.0031250000f, /* scale */

+-1, /* shift */

+{5, 64, 4, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */

+fft_bitrev, /* bitrev*/

+fft_twiddles, /* twiddles*/

+(arch_fft_state *)&arch_fft, /* arch_fft*/

+};

+const float half_window[] = {

+3.78491532e-05f, 0.000340620492f, 0.000946046319f, 0.00185389258f, 0.00306380726f,

+0.00457531959f, 0.00638783723f, 0.00850064680f, 0.0109129101f, 0.0136236614f,

+0.0166318044f, 0.0199361145f, 0.0235352255f, 0.0274276342f, 0.0316116922f,

+0.0360856056f, 0.0408474281f, 0.0458950549f, 0.0512262285f, 0.0568385124f,

+0.0627293140f, 0.0688958541f, 0.0753351897f, 0.0820441842f, 0.0890194997f,

+0.0962576419f, 0.103754878f, 0.111507311f, 0.119510807f, 0.127761051f,

+0.136253506f, 0.144983411f, 0.153945804f, 0.163135484f, 0.172547072f,

+0.182174906f, 0.192013159f, 0.202055752f, 0.212296382f, 0.222728521f,

+0.233345464f, 0.244140238f, 0.255105674f, 0.266234398f, 0.277518868f,

+0.288951218f, 0.300523549f, 0.312227666f, 0.324055225f, 0.335997701f,

+0.348046392f, 0.360192508f, 0.372427016f, 0.384740859f, 0.397124738f,

+0.409569323f, 0.422065198f, 0.434602767f, 0.447172493f, 0.459764689f,

+0.472369671f, 0.484977663f, 0.497579008f, 0.510163903f, 0.522722721f,

+0.535245717f, 0.547723293f, 0.560145974f, 0.572504222f, 0.584788740f,

+0.596990347f, 0.609099925f, 0.621108532f, 0.633007407f, 0.644788086f,

+0.656442165f, 0.667961538f, 0.679338276f, 0.690564752f, 0.701633692f,

+0.712537885f, 0.723270535f, 0.733825266f, 0.744195819f, 0.754376352f,

+0.764361382f, 0.774145722f, 0.783724606f, 0.793093503f, 0.802248418f,

+0.811185598f, 0.819901764f, 0.828393936f, 0.836659551f, 0.844696403f,

+0.852502763f, 0.860077202f, 0.867418647f, 0.874526560f, 0.881400526f,

+0.888040781f, 0.894447744f, 0.900622249f, 0.906565487f, 0.912279010f,

+0.917764664f, 0.923024654f, 0.928061485f, 0.932878017f, 0.937477291f,

+0.941862822f, 0.946038187f, 0.950007319f, 0.953774393f, 0.957343817f,

+0.960720181f, 0.963908315f, 0.966913164f, 0.969739914f, 0.972393870f,

+0.974880517f, 0.977205336f, 0.979374051f, 0.981392324f, 0.983266115f,

+0.985001266f, 0.986603677f, 0.988079309f, 0.989434063f, 0.990674019f,

+0.991804957f, 0.992832899f, 0.993763626f, 0.994602919f, 0.995356441f,

+0.996029854f, 0.996628702f, 0.997158289f, 0.997623861f, 0.998030603f,

+0.998383403f, 0.998687088f, 0.998946249f, 0.999165416f, 0.999348700f,

+0.999500215f, 0.999623775f, 0.999723017f, 0.999801278f, 0.999861658f,

+0.999907196f, 0.999940455f, 0.999963880f, 0.999979615f, 0.999989510f,

+0.999995291f, 0.999998271f, 0.999999523f, 0.999999940f, 1.00000000f,

+};

+const float dct_table[] = {

+0.707106769f, 0.996194720f, 0.984807730f, 0.965925813f, 0.939692616f,

+0.906307817f, 0.866025388f, 0.819152057f, 0.766044438f, 0.707106769f,

+0.642787635f, 0.573576450f, 0.500000000f, 0.422618270f, 0.342020154f,

+0.258819044f, 0.173648179f, 0.0871557444f, 0.707106769f, 0.965925813f,

+0.866025388f, 0.707106769f, 0.500000000f, 0.258819044f, 6.12323426e-17f,

+-0.258819044f, -0.500000000f, -0.707106769f, -0.866025388f, -0.965925813f,

+-1.00000000f, -0.965925813f, -0.866025388f, -0.707106769f, -0.500000000f,

+-0.258819044f, 0.707106769f, 0.906307817f, 0.642787635f, 0.258819044f,

+-0.173648179f, -0.573576450f, -0.866025388f, -0.996194720f, -0.939692616f,

+-0.707106769f, -0.342020154f, 0.0871557444f, 0.500000000f, 0.819152057f,

+0.984807730f, 0.965925813f, 0.766044438f, 0.422618270f, 0.707106769f,

+0.819152057f, 0.342020154f, -0.258819044f, -0.766044438f, -0.996194720f,

+-0.866025388f, -0.422618270f, 0.173648179f, 0.707106769f, 0.984807730f,

+0.906307817f, 0.500000000f, -0.0871557444f, -0.642787635f, -0.965925813f,

+-0.939692616f, -0.573576450f, 0.707106769f, 0.707106769f, 6.12323426e-17f,

+-0.707106769f, -1.00000000f, -0.707106769f, -1.83697015e-16f, 0.707106769f,

+1.00000000f, 0.707106769f, 3.06161700e-16f, -0.707106769f, -1.00000000f,

+-0.707106769f, -4.28626385e-16f, 0.707106769f, 1.00000000f, 0.707106769f,

+0.707106769f, 0.573576450f, -0.342020154f, -0.965925813f, -0.766044438f,

+0.0871557444f, 0.866025388f, 0.906307817f, 0.173648179f, -0.707106769f,

+-0.984807730f, -0.422618270f, 0.500000000f, 0.996194720f, 0.642787635f,

+-0.258819044f, -0.939692616f, -0.819152057f, 0.707106769f, 0.422618270f,

+-0.642787635f, -0.965925813f, -0.173648179f, 0.819152057f, 0.866025388f,

+-0.0871557444f, -0.939692616f, -0.707106769f, 0.342020154f, 0.996194720f,

+0.500000000f, -0.573576450f, -0.984807730f, -0.258819044f, 0.766044438f,

+0.906307817f, 0.707106769f, 0.258819044f, -0.866025388f, -0.707106769f,

+0.500000000f, 0.965925813f, 3.06161700e-16f, -0.965925813f, -0.500000000f,

+0.707106769f, 0.866025388f, -0.258819044f, -1.00000000f, -0.258819044f,

+0.866025388f, 0.707106769f, -0.500000000f, -0.965925813f, 0.707106769f,

+0.0871557444f, -0.984807730f, -0.258819044f, 0.939692616f, 0.422618270f,

+-0.866025388f, -0.573576450f, 0.766044438f, 0.707106769f, -0.642787635f,

+-0.819152057f, 0.500000000f, 0.906307817f, -0.342020154f, -0.965925813f,

+0.173648179f, 0.996194720f, 0.707106769f, -0.0871557444f, -0.984807730f,

+0.258819044f, 0.939692616f, -0.422618270f, -0.866025388f, 0.573576450f,

+0.766044438f, -0.707106769f, -0.642787635f, 0.819152057f, 0.500000000f,

+-0.906307817f, -0.342020154f, 0.965925813f, 0.173648179f, -0.996194720f,

+0.707106769f, -0.258819044f, -0.866025388f, 0.707106769f, 0.500000000f,

+-0.965925813f, -4.28626385e-16f, 0.965925813f, -0.500000000f, -0.707106769f,

+0.866025388f, 0.258819044f, -1.00000000f, 0.258819044f, 0.866025388f,

+-0.707106769f, -0.500000000f, 0.965925813f, 0.707106769f, -0.422618270f,

+-0.642787635f, 0.965925813f, -0.173648179f, -0.819152057f, 0.866025388f,

+0.0871557444f, -0.939692616f, 0.707106769f, 0.342020154f, -0.996194720f,

+0.500000000f, 0.573576450f, -0.984807730f, 0.258819044f, 0.766044438f,

+-0.906307817f, 0.707106769f, -0.573576450f, -0.342020154f, 0.965925813f,

+-0.766044438f, -0.0871557444f, 0.866025388f, -0.906307817f, 0.173648179f,

+0.707106769f, -0.984807730f, 0.422618270f, 0.500000000f, -0.996194720f,

+0.642787635f, 0.258819044f, -0.939692616f, 0.819152057f, 0.707106769f,

+-0.707106769f, -1.83697015e-16f, 0.707106769f, -1.00000000f, 0.707106769f,

+5.51091070e-16f, -0.707106769f, 1.00000000f, -0.707106769f, -2.69484189e-15f,

+0.707106769f, -1.00000000f, 0.707106769f, -4.90477710e-16f, -0.707106769f,

+1.00000000f, -0.707106769f, 0.707106769f, -0.819152057f, 0.342020154f,

+0.258819044f, -0.766044438f, 0.996194720f, -0.866025388f, 0.422618270f,

+0.173648179f, -0.707106769f, 0.984807730f, -0.906307817f, 0.500000000f,

+0.0871557444f, -0.642787635f, 0.965925813f, -0.939692616f, 0.573576450f,

+0.707106769f, -0.906307817f, 0.642787635f, -0.258819044f, -0.173648179f,

+0.573576450f, -0.866025388f, 0.996194720f, -0.939692616f, 0.707106769f,

+-0.342020154f, -0.0871557444f, 0.500000000f, -0.819152057f, 0.984807730f,

+-0.965925813f, 0.766044438f, -0.422618270f, 0.707106769f, -0.965925813f,

+0.866025388f, -0.707106769f, 0.500000000f, -0.258819044f, 1.10280111e-15f,

+0.258819044f, -0.500000000f, 0.707106769f, -0.866025388f, 0.965925813f,

+-1.00000000f, 0.965925813f, -0.866025388f, 0.707106769f, -0.500000000f,

+0.258819044f, 0.707106769f, -0.996194720f, 0.984807730f, -0.965925813f,

+0.939692616f, -0.906307817f, 0.866025388f, -0.819152057f, 0.766044438f,

+-0.707106769f, 0.642787635f, -0.573576450f, 0.500000000f, -0.422618270f,

+0.342020154f, -0.258819044f, 0.173648179f, -0.0871557444f, };

--- a/dnn/nnet.c

+++ b/dnn/nnet.c

@@ -38,11 +38,16 @@

 #include "tansig_table.h"

 #include "nnet.h"

 #include "nnet_data.h"

+#include "dred_rdovae_constants.h"

 #include "plc_data.h"

 #ifdef NO_OPTIMIZATIONS

+#if defined(_MSC_VER)

+#pragma message ("Compiling without any vectorization. This code will be very slow")

+#else

 #warning Compiling without any vectorization. This code will be very slow

 #endif

+#endif

 #define SOFTMAX_HACK

@@ -316,7 +321,7 @@

       state[i] = h[i];

-#define MAX_RNN_NEURONS_ALL IMAX(MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS)

+#define MAX_RNN_NEURONS_ALL IMAX(IMAX(MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), DRED_MAX_RNN_NEURONS)

 void compute_gruB(const GRULayer *gru, const float* gru_b_condition, float *state, const float *input)

@@ -372,8 +377,8 @@

    int i;

    int N;

    int stride;

-   float zrh[3*MAX_RNN_NEURONS];

-   float recur[3*MAX_RNN_NEURONS];

+   float zrh[3*MAX_RNN_NEURONS_ALL];

+   float recur[3*MAX_RNN_NEURONS_ALL];

    float *z;

    float *r;

    float *h;

@@ -381,7 +386,7 @@

    z = zrh;

    r = &zrh[N];

    h = &zrh[2*N];

-   celt_assert(gru->nb_neurons <= MAX_RNN_NEURONS);

+   celt_assert(gru->nb_neurons <= MAX_RNN_NEURONS_ALL);

    celt_assert(input != state);

    celt_assert(gru->reset_after);

    stride = 3*N;

@@ -406,7 +411,7 @@

    int i, k;

    int N;

-   float recur[3*MAX_RNN_NEURONS];

+   float recur[3*MAX_RNN_NEURONS_ALL];

    float *z;

    float *r;

    float *h;

@@ -415,7 +420,7 @@

    z = recur;

    r = &recur[N];

    h = &recur[2*N];

-   celt_assert(gru->nb_neurons <= MAX_RNN_NEURONS);

+   celt_assert(gru->nb_neurons <= MAX_RNN_NEURONS_ALL);

    celt_assert(input != state);

    celt_assert(gru->reset_after);

 #ifdef USE_SU_BIAS

@@ -442,14 +447,16 @@

       state[i] = z[i]*state[i] + (1-z[i])*h[i];

+#define MAX_CONV_INPUTS_ALL IMAX(MAX_CONV_INPUTS, DRED_MAX_CONV_INPUTS)

 void compute_conv1d(const Conv1DLayer *layer, float *output, float *mem, const float *input)

    int i;

    int N, M;

    int stride;

-   float tmp[MAX_CONV_INPUTS];

+   float tmp[MAX_CONV_INPUTS_ALL];

    celt_assert(input != output);

-   celt_assert(layer->nb_inputs*layer->kernel_size <= MAX_CONV_INPUTS);

+   celt_assert(layer->nb_inputs*layer->kernel_size <= MAX_CONV_INPUTS_ALL);

    RNN_COPY(tmp, mem, layer->nb_inputs*(layer->kernel_size-1));

    RNN_COPY(&tmp[layer->nb_inputs*(layer->kernel_size-1)], input, layer->nb_inputs);

    M = layer->nb_inputs*layer->kernel_size;

--- a/dnn/nnet.h

+++ b/dnn/nnet.h

@@ -38,7 +38,30 @@

 #define ACTIVATION_SOFTMAX 4

 #define ACTIVATION_SWISH   5

+#define WEIGHT_BLOB_VERSION 0

+#define WEIGHT_BLOCK_SIZE 64

 typedef struct {

+  const char *name;

+  int type;

+  int size;

+  const void *data;

+} WeightArray;

+#define WEIGHT_TYPE_float 0

+#define WEIGHT_TYPE_int 1

+#define WEIGHT_TYPE_qweight 2

+typedef struct {

+  char head[4];

+  int version;

+  int type;

+  int size;

+  int block_size;

+  char name[44];

+} WeightHead;

+typedef struct {

   const float *bias;

   const float *input_weights;

   int nb_inputs;

@@ -121,5 +144,60 @@

 void compute_gru_a_input(float *output, const float *input, int N, const EmbeddingLayer *layer1, int val1, const EmbeddingLayer *layer2, int val2, const EmbeddingLayer *layer3, int val3);

 int sample_from_pdf(const float *pdf, int N, float exp_boost, float pdf_floor);

+extern const WeightArray lpcnet_arrays[];

+extern const WeightArray lpcnet_plc_arrays[];

+int mdense_init(MDenseLayer *layer, const WeightArray *arrays,

+  const char *bias,

+  const char *input_weights,

+  const char *factor,

+  int nb_inputs,

+  int nb_neurons,

+  int nb_channels,

+  int activation);

+int dense_init(DenseLayer *layer, const WeightArray *arrays,

+  const char *bias,

+  const char *input_weights,

+  int nb_inputs,

+  int nb_neurons,

+  int activation);

+int gru_init(GRULayer *layer, const WeightArray *arrays,

+  const char *bias,

+  const char *subias,

+  const char *input_weights,

+  const char *input_weights_idx,

+  const char *recurrent_weights,

+  int nb_inputs,

+  int nb_neurons,

+  int activation,

+  int reset_after);

+int sparse_gru_init(SparseGRULayer *layer, const WeightArray *arrays,

+  const char *bias,

+  const char *subias,

+  const char *diag_weights,

+  const char *recurrent_weights,

+  const char *idx,

+  int nb_neurons,

+  int activation,

+  int reset_after);

+int conv1d_init(Conv1DLayer *layer, const WeightArray *arrays,

+  const char *bias,

+  const char *input_weights,

+  int nb_inputs,

+  int kernel_size,

+  int nb_neurons,

+  int activation);

+int embedding_init(EmbeddingLayer *layer, const WeightArray *arrays,

+  const char *embedding_weights,

+  int nb_inputs,

+  int dim);

 #endif /* _MLP_H_ */

--- /dev/null

+++ b/dnn/parse_lpcnet_weights.c

@@ -1,0 +1,254 @@

+/* Copyright (c) 2023 Amazon */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR

+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include "nnet.h"

+#define SPARSE_BLOCK_SIZE 32

+extern const WeightArray lpcnet_arrays[];

+int parse_record(const unsigned char **data, int *len, WeightArray *array) {

+  WeightHead *h = (WeightHead *)*data;

+  if (*len < WEIGHT_BLOCK_SIZE) return -1;

+  if (h->block_size < h->size) return -1;

+  if (h->block_size > *len-WEIGHT_BLOCK_SIZE) return -1;

+  if (h->name[sizeof(h->name)-1] != 0) return -1;

+  if (h->size < 0) return -1;

+  array->name = h->name;

+  array->type = h->type;

+  array->size = h->size;

+  array->data = (*data)+WEIGHT_BLOCK_SIZE;

+  *data += h->block_size+WEIGHT_BLOCK_SIZE;

+  *len -= h->block_size+WEIGHT_BLOCK_SIZE;

+  return array->size;

+}

+int parse_weights(WeightArray **list, const unsigned char *data, int len)

+{

+  int nb_arrays=0;

+  int capacity=20;

+  *list = malloc(capacity*sizeof(WeightArray));

+  while (len > 0) {

+    int ret;

+    WeightArray array = {NULL, 0, 0, 0};

+    ret = parse_record(&data, &len, &array);

+    if (ret > 0) {

+      if (nb_arrays+1 >= capacity) {

+        /* Make sure there's room for the ending NULL element too. */

+        capacity = capacity*3/2;

+        *list = realloc(*list, capacity*sizeof(WeightArray));

+      }

+      (*list)[nb_arrays++] = array;

+    }

+  }

+  (*list)[nb_arrays].name=NULL;

+  return nb_arrays;

+}

+static const void *find_array_entry(const WeightArray *arrays, const char *name) {

+  while (arrays->name && strcmp(arrays->name, name) != 0) arrays++;

+  return arrays;

+}

+static const void *find_array_check(const WeightArray *arrays, const char *name, int size) {

+  const WeightArray *a = find_array_entry(arrays, name);

+  if (a && a->size == size) return a->data;

+  else return NULL;

+}

+static const void *find_idx_check(const WeightArray *arrays, const char *name, int nb_in, int nb_out, int *total_blocks) {

+  int remain;

+  const int *idx;

+  const WeightArray *a = find_array_entry(arrays, name);

+  *total_blocks = 0;

+  if (a == NULL) return NULL;

+  idx = a->data;

+  remain = a->size/sizeof(int);

+  while (remain > 0) {

+    int nb_blocks;

+    int i;

+    nb_blocks = *idx++;

+    if (remain < nb_blocks+1) return NULL;

+    for (i=0;i<nb_blocks;i++) {

+      int pos = *idx++;

+      if (pos+3 >= nb_in || (pos&0x3)) return NULL;

+    }

+    nb_out -= 8;

+    remain -= nb_blocks+1;

+    *total_blocks += nb_blocks;

+  }

+  if (nb_out != 0) return NULL;

+  return a->data;

+}

+int mdense_init(MDenseLayer *layer, const WeightArray *arrays,

+  const char *bias,

+  const char *input_weights,

+  const char *factor,

+  int nb_inputs,

+  int nb_neurons,

+  int nb_channels,

+  int activation)

+{

+  if ((layer->bias = find_array_check(arrays, bias, nb_neurons*nb_channels*sizeof(layer->bias[0]))) == NULL) return 1;

+  if ((layer->input_weights = find_array_check(arrays, input_weights, nb_inputs*nb_channels*nb_neurons*sizeof(layer->input_weights[0]))) == NULL) return 1;

+  if ((layer->factor = find_array_check(arrays, factor, nb_channels*nb_neurons*sizeof(layer->factor[0]))) == NULL) return 1;

+  layer->nb_inputs = nb_inputs;

+  layer->nb_neurons = nb_neurons;

+  layer->nb_channels = nb_channels;

+  layer->activation = activation;

+  return 0;

+}

+int dense_init(DenseLayer *layer, const WeightArray *arrays,

+  const char *bias,

+  const char *input_weights,

+  int nb_inputs,

+  int nb_neurons,

+  int activation)

+{

+  if ((layer->bias = find_array_check(arrays, bias, nb_neurons*sizeof(layer->bias[0]))) == NULL) return 1;

+  if ((layer->input_weights = find_array_check(arrays, input_weights, nb_inputs*nb_neurons*sizeof(layer->input_weights[0]))) == NULL) return 1;

+  layer->nb_inputs = nb_inputs;

+  layer->nb_neurons = nb_neurons;

+  layer->activation = activation;

+  return 0;

+}

+int gru_init(GRULayer *layer, const WeightArray *arrays,

+  const char *bias,

+  const char *subias,

+  const char *input_weights,

+  const char *input_weights_idx,

+  const char *recurrent_weights,

+  int nb_inputs,

+  int nb_neurons,

+  int activation,

+  int reset_after)

+{

+  int total_blocks;

+  if ((layer->bias = find_array_check(arrays, bias, 6*nb_neurons*sizeof(layer->bias[0]))) == NULL) return 1;

+  if ((layer->subias = find_array_check(arrays, subias, 6*nb_neurons*sizeof(layer->subias[0]))) == NULL) return 1;

+  if ((layer->input_weights_idx = find_idx_check(arrays, input_weights_idx, nb_inputs, 3*nb_neurons, &total_blocks)) == NULL) return 1;

+  if ((layer->input_weights = find_array_check(arrays, input_weights, SPARSE_BLOCK_SIZE*total_blocks*sizeof(layer->input_weights[0]))) == NULL) return 1;

+  if ((layer->recurrent_weights = find_array_check(arrays, recurrent_weights, 3*nb_neurons*nb_neurons*sizeof(layer->recurrent_weights[0]))) == NULL) return 1;

+  layer->nb_inputs = nb_inputs;

+  layer->nb_neurons = nb_neurons;

+  layer->activation = activation;

+  layer->reset_after = reset_after;

+  return 0;

+}

+int sparse_gru_init(SparseGRULayer *layer, const WeightArray *arrays,

+  const char *bias,

+  const char *subias,

+  const char *diag_weights,

+  const char *recurrent_weights,

+  const char *idx,

+  int nb_neurons,

+  int activation,

+  int reset_after)

+{

+  int total_blocks;

+  if ((layer->bias = find_array_check(arrays, bias, 6*nb_neurons*sizeof(layer->bias[0]))) == NULL) return 1;

+  if ((layer->subias = find_array_check(arrays, subias, 6*nb_neurons*sizeof(layer->subias[0]))) == NULL) return 1;

+  if ((layer->diag_weights = find_array_check(arrays, diag_weights, 3*nb_neurons*sizeof(layer->diag_weights[0]))) == NULL) return 1;

+  if ((layer->idx = find_idx_check(arrays, idx, nb_neurons, 3*nb_neurons, &total_blocks)) == NULL) return 1;

+  if ((layer->recurrent_weights = find_array_check(arrays, recurrent_weights, SPARSE_BLOCK_SIZE*total_blocks*sizeof(layer->recurrent_weights[0]))) == NULL) return 1;

+  layer->nb_neurons = nb_neurons;

+  layer->activation = activation;

+  layer->reset_after = reset_after;

+  return 0;

+}

+int conv1d_init(Conv1DLayer *layer, const WeightArray *arrays,

+  const char *bias,

+  const char *input_weights,

+  int nb_inputs,

+  int kernel_size,

+  int nb_neurons,

+  int activation)

+{

+  if ((layer->bias = find_array_check(arrays, bias, nb_neurons*sizeof(layer->bias[0]))) == NULL) return 1;

+  if ((layer->input_weights = find_array_check(arrays, input_weights, kernel_size*nb_inputs*nb_neurons*sizeof(layer->input_weights[0]))) == NULL) return 1;

+  layer->nb_inputs = nb_inputs;

+  layer->kernel_size = kernel_size;

+  layer->nb_neurons = nb_neurons;

+  layer->activation = activation;

+  return 0;

+}

+int embedding_init(EmbeddingLayer *layer, const WeightArray *arrays,

+  const char *embedding_weights,

+  int nb_inputs,

+  int dim)

+{

+  if ((layer->embedding_weights = find_array_check(arrays, embedding_weights, nb_inputs*dim*sizeof(layer->embedding_weights[0]))) == NULL) return 1;

+  layer->nb_inputs = nb_inputs;

+  layer->dim = dim;

+  return 0;

+}

+#if 0

+#include <fcntl.h>

+#include <sys/mman.h>

+#include <unistd.h>

+#include <sys/stat.h>

+#include <stdio.h>

+int main()

+{

+  int fd;

+  unsigned char *data;

+  int len;

+  int nb_arrays;

+  int i;

+  WeightArray *list;

+  struct stat st;

+  const char *filename = "weights_blob.bin";

+  stat(filename, &st);

+  len = st.st_size;

+  fd = open(filename, O_RDONLY);

+  data = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);

+  printf("size is %d\n", len);

+  nb_arrays = parse_weights(&list, data, len);

+  for (i=0;i<nb_arrays;i++) {

+    printf("found %s: size %d\n", list[i].name, list[i].size);

+  }

+  printf("%p\n", list[i].name);

+  free(list);

+  munmap(data, len);

+  close(fd);

+  return 0;

+}

+#endif

--- a/dnn/test_vec.c

+++ b/dnn/test_vec.c

@@ -10,7 +10,7 @@

 // we need to call two versions of each functions that have the same

 // name, so use #defines to temp rename them

-#define celt_exp2 celt_exp2_fast

+#define lpcnet_exp2 lpcnet_exp2_fast

 #define tansig_approx tansig_approx_fast

 #define sigmoid_approx sigmoid_approx_fast

 #define softmax softmax_fast

@@ -34,7 +34,7 @@

 #endif

-#undef celt_exp2

+#undef lpcnet_exp2

 #undef tansig_approx

 #undef sigmoid_approx

 #undef softmax

--- /dev/null

+++ b/dnn/torch/rdovae/README.md

@@ -1,0 +1,24 @@

+# Rate-Distortion-Optimized Variational Auto-Encoder

+## Setup

+The python code requires python >= 3.6 and has been tested with python 3.6 and python 3.10. To install requirements run

+```

+python -m pip install -r requirements.txt

+```

+## Training

+To generate training data use dump date from the main LPCNet repo

+```

+./dump_data -train 16khz_speech_input.s16 features.f32 data.s16

+```

+To train the model, simply run

+```

+python train_rdovae.py features.f32 output_folder

+```

+To train on CUDA device add `--cuda-visible-devices idx`.

+## ToDo

+- Upload checkpoints and add URLs

--- /dev/null

+++ b/dnn/torch/rdovae/export_rdovae_weights.py

@@ -1,0 +1,258 @@

+"""

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+"""

+import os

+import argparse

+parser = argparse.ArgumentParser()

+parser.add_argument('checkpoint', type=str, help='rdovae model checkpoint')

+parser.add_argument('output_dir', type=str, help='output folder')

+parser.add_argument('--format', choices=['C', 'numpy'], help='output format, default: C', default='C')

+args = parser.parse_args()

+import torch

+import numpy as np

+from rdovae import RDOVAE

+from wexchange.torch import dump_torch_weights

+from wexchange.c_export import CWriter, print_vector

+def dump_statistical_model(writer, qembedding):

+    w = qembedding.weight.detach()

+    levels, dim = w.shape

+    N = dim // 6

+    print("printing statistical model")

+    quant_scales    = torch.nn.functional.softplus(w[:, : N]).numpy()

+    dead_zone       = 0.05 * torch.nn.functional.softplus(w[:, N : 2 * N]).numpy()

+    r               = torch.sigmoid(w[:, 5 * N : 6 * N]).numpy()

+    p0              = torch.sigmoid(w[:, 4 * N : 5 * N]).numpy()

+    p0              = 1 - r ** (0.5 + 0.5 * p0)

+    quant_scales_q8 = np.round(quant_scales * 2**8).astype(np.uint16)

+    dead_zone_q10   = np.round(dead_zone * 2**10).astype(np.uint16)

+    r_q15           = np.round(r * 2**15).astype(np.uint16)

+    p0_q15          = np.round(p0 * 2**15).astype(np.uint16)

+    print_vector(writer.source, quant_scales_q8, 'dred_quant_scales_q8', dtype='opus_uint16', static=False)

+    print_vector(writer.source, dead_zone_q10, 'dred_dead_zone_q10', dtype='opus_uint16', static=False)

+    print_vector(writer.source, r_q15, 'dred_r_q15', dtype='opus_uint16', static=False)

+    print_vector(writer.source, p0_q15, 'dred_p0_q15', dtype='opus_uint16', static=False)

+    writer.header.write(

+f"""

+extern const opus_uint16 dred_quant_scales_q8[{levels * N}];

+extern const opus_uint16 dred_dead_zone_q10[{levels * N}];

+extern const opus_uint16 dred_r_q15[{levels * N}];

+extern const opus_uint16 dred_p0_q15[{levels * N}];

+"""

+    )

+def c_export(args, model):

+    message = f"Auto generated from checkpoint {os.path.basename(args.checkpoint)}"

+    enc_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_enc_data"), message=message)

+    dec_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_dec_data"), message=message)

+    stats_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_stats_data"), message=message)

+    constants_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_constants"), message=message, header_only=True)

+    # some custom includes

+    for writer in [enc_writer, dec_writer, stats_writer]:

+        writer.header.write(

+f"""

+#include "opus_types.h"

+#include "dred_rdovae_constants.h"

+#include "nnet.h"

+"""

+        )

+    # encoder

+    encoder_dense_layers = [

+        ('core_encoder.module.dense_1'       , 'enc_dense1',   'TANH'),

+        ('core_encoder.module.dense_2'       , 'enc_dense3',   'TANH'),

+        ('core_encoder.module.dense_3'       , 'enc_dense5',   'TANH'),

+        ('core_encoder.module.dense_4'       , 'enc_dense7',   'TANH'),

+        ('core_encoder.module.dense_5'       , 'enc_dense8',   'TANH'),

+        ('core_encoder.module.state_dense_1' , 'gdense1'    ,   'TANH'),

+        ('core_encoder.module.state_dense_2' , 'gdense2'    ,   'TANH')

+    ]

+    for name, export_name, activation in encoder_dense_layers:

+        layer = model.get_submodule(name)

+        dump_torch_weights(enc_writer, layer, name=export_name, activation=activation, verbose=True)

+    encoder_gru_layers = [

+        ('core_encoder.module.gru_1'         , 'enc_dense2',   'TANH'),

+        ('core_encoder.module.gru_2'         , 'enc_dense4',   'TANH'),

+        ('core_encoder.module.gru_3'         , 'enc_dense6',   'TANH')

+    ]

+    enc_max_rnn_units = max([dump_torch_weights(enc_writer, model.get_submodule(name), export_name, activation, verbose=True, input_sparse=True, dotp=True)

+                             for name, export_name, activation in encoder_gru_layers])

+    encoder_conv_layers = [

+        ('core_encoder.module.conv1'         , 'bits_dense' ,   'LINEAR')

+    ]

+    enc_max_conv_inputs = max([dump_torch_weights(enc_writer, model.get_submodule(name), export_name, activation, verbose=True) for name, export_name, activation in encoder_conv_layers])

+    del enc_writer

+    # decoder

+    decoder_dense_layers = [

+        ('core_decoder.module.gru_1_init'    , 'state1',        'TANH'),

+        ('core_decoder.module.gru_2_init'    , 'state2',        'TANH'),

+        ('core_decoder.module.gru_3_init'    , 'state3',        'TANH'),

+        ('core_decoder.module.dense_1'       , 'dec_dense1',    'TANH'),

+        ('core_decoder.module.dense_2'       , 'dec_dense3',    'TANH'),

+        ('core_decoder.module.dense_3'       , 'dec_dense5',    'TANH'),

+        ('core_decoder.module.dense_4'       , 'dec_dense7',    'TANH'),

+        ('core_decoder.module.dense_5'       , 'dec_dense8',    'TANH'),

+        ('core_decoder.module.output'        , 'dec_final',     'LINEAR')

+    ]

+    for name, export_name, activation in decoder_dense_layers:

+        layer = model.get_submodule(name)

+        dump_torch_weights(dec_writer, layer, name=export_name, activation=activation, verbose=True)

+    decoder_gru_layers = [

+        ('core_decoder.module.gru_1'         , 'dec_dense2',    'TANH'),

+        ('core_decoder.module.gru_2'         , 'dec_dense4',    'TANH'),

+        ('core_decoder.module.gru_3'         , 'dec_dense6',    'TANH')

+    ]

+    dec_max_rnn_units = max([dump_torch_weights(dec_writer, model.get_submodule(name), export_name, activation, verbose=True, input_sparse=True, dotp=True)

+                             for name, export_name, activation in decoder_gru_layers])

+    del dec_writer

+    # statistical model

+    qembedding = model.statistical_model.quant_embedding

+    dump_statistical_model(stats_writer, qembedding)

+    del stats_writer

+    # constants

+    constants_writer.header.write(

+f"""

+#define DRED_NUM_FEATURES {model.feature_dim}

+#define DRED_LATENT_DIM {model.latent_dim}

+#define DRED_STATE_DIME {model.state_dim}

+#define DRED_NUM_QUANTIZATION_LEVELS {model.quant_levels}

+#define DRED_MAX_RNN_NEURONS {max(enc_max_rnn_units, dec_max_rnn_units)}

+#define DRED_MAX_CONV_INPUTS {enc_max_conv_inputs}

+#define DRED_ENC_MAX_RNN_NEURONS {enc_max_conv_inputs}

+#define DRED_ENC_MAX_CONV_INPUTS {enc_max_conv_inputs}

+#define DRED_DEC_MAX_RNN_NEURONS {dec_max_rnn_units}

+"""

+    )

+    del constants_writer

+def numpy_export(args, model):

+    exchange_name_to_name = {

+        'encoder_stack_layer1_dense'    : 'core_encoder.module.dense_1',

+        'encoder_stack_layer3_dense'    : 'core_encoder.module.dense_2',

+        'encoder_stack_layer5_dense'    : 'core_encoder.module.dense_3',

+        'encoder_stack_layer7_dense'    : 'core_encoder.module.dense_4',

+        'encoder_stack_layer8_dense'    : 'core_encoder.module.dense_5',

+        'encoder_state_layer1_dense'    : 'core_encoder.module.state_dense_1',

+        'encoder_state_layer2_dense'    : 'core_encoder.module.state_dense_2',

+        'encoder_stack_layer2_gru'      : 'core_encoder.module.gru_1',

+        'encoder_stack_layer4_gru'      : 'core_encoder.module.gru_2',

+        'encoder_stack_layer6_gru'      : 'core_encoder.module.gru_3',

+        'encoder_stack_layer9_conv'     : 'core_encoder.module.conv1',

+        'statistical_model_embedding'   : 'statistical_model.quant_embedding',

+        'decoder_state1_dense'          : 'core_decoder.module.gru_1_init',

+        'decoder_state2_dense'          : 'core_decoder.module.gru_2_init',

+        'decoder_state3_dense'          : 'core_decoder.module.gru_3_init',

+        'decoder_stack_layer1_dense'    : 'core_decoder.module.dense_1',

+        'decoder_stack_layer3_dense'    : 'core_decoder.module.dense_2',

+        'decoder_stack_layer5_dense'    : 'core_decoder.module.dense_3',

+        'decoder_stack_layer7_dense'    : 'core_decoder.module.dense_4',

+        'decoder_stack_layer8_dense'    : 'core_decoder.module.dense_5',

+        'decoder_stack_layer9_dense'    : 'core_decoder.module.output',

+        'decoder_stack_layer2_gru'      : 'core_decoder.module.gru_1',

+        'decoder_stack_layer4_gru'      : 'core_decoder.module.gru_2',

+        'decoder_stack_layer6_gru'      : 'core_decoder.module.gru_3'

+    }

+    name_to_exchange_name = {value : key for key, value in exchange_name_to_name.items()}

+    for name, exchange_name in name_to_exchange_name.items():

+        print(f"printing layer {name}...")

+        dump_torch_weights(os.path.join(args.output_dir, exchange_name), model.get_submodule(name))

+if __name__ == "__main__":

+    os.makedirs(args.output_dir, exist_ok=True)

+    # load model from checkpoint

+    checkpoint = torch.load(args.checkpoint, map_location='cpu')

+    model = RDOVAE(*checkpoint['model_args'], **checkpoint['model_kwargs'])

+    missing_keys, unmatched_keys = model.load_state_dict(checkpoint['state_dict'], strict=False)

+    if len(missing_keys) > 0:

+        raise ValueError(f"error: missing keys in state dict")

+    if len(unmatched_keys) > 0:

+        print(f"warning: the following keys were unmatched {unmatched_keys}")

+    if args.format == 'C':

+        c_export(args, model)

+    elif args.format == 'numpy':

+        numpy_export(args, model)

+    else:

+        raise ValueError(f'error: unknown export format {args.format}')

\ No newline at end of file

--- /dev/null

+++ b/dnn/torch/rdovae/fec_encoder.py

@@ -1,0 +1,213 @@

+"""

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe and Jean-Marc Valin */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+"""

+import os

+import subprocess

+import argparse

+os.environ['CUDA_VISIBLE_DEVICES'] = ""

+parser = argparse.ArgumentParser(description='Encode redundancy for Opus neural FEC. Designed for use with voip application and 20ms frames')

+parser.add_argument('input', metavar='<input signal>', help='audio input (.wav or .raw or .pcm as int16)')

+parser.add_argument('checkpoint', metavar='<weights>', help='model checkpoint')

+parser.add_argument('q0', metavar='<quant level 0>', type=int, help='quantization level for most recent frame')

+parser.add_argument('q1', metavar='<quant level 1>', type=int, help='quantization level for oldest frame')

+parser.add_argument('output', type=str, help='output file (will be extended with .fec)')

+parser.add_argument('--dump-data', type=str, default='./dump_data', help='path to dump data executable (default ./dump_data)')

+parser.add_argument('--num-redundancy-frames', default=52, type=int, help='number of redundancy frames per packet (default 52)')

+parser.add_argument('--extra-delay', default=0, type=int, help="last features in packet are calculated with the decoder aligned samples, use this option to add extra delay (in samples at 16kHz)")

+parser.add_argument('--lossfile', type=str, help='file containing loss trace (0 for frame received, 1 for lost)')

+parser.add_argument('--debug-output', action='store_true', help='if set, differently assembled features are written to disk')

+args = parser.parse_args()

+import numpy as np

+from scipy.io import wavfile

+import torch

+from rdovae import RDOVAE

+from packets import write_fec_packets

+torch.set_num_threads(4)

+checkpoint = torch.load(args.checkpoint, map_location="cpu")

+model = RDOVAE(*checkpoint['model_args'], **checkpoint['model_kwargs'])

+model.load_state_dict(checkpoint['state_dict'], strict=False)

+model.to("cpu")

+lpc_order = 16

+## prepare input signal

+# SILK frame size is 20ms and LPCNet subframes are 10ms

+subframe_size = 160

+frame_size = 2 * subframe_size

+# 91 samples delay to align with SILK decoded frames

+silk_delay = 91

+# prepend zeros to have enough history to produce the first package

+zero_history = (args.num_redundancy_frames - 1) * frame_size

+# dump data has a (feature) delay of 10ms

+dump_data_delay = 160

+total_delay = silk_delay + zero_history + args.extra_delay - dump_data_delay

+# load signal

+if args.input.endswith('.raw') or args.input.endswith('.pcm'):

+    signal = np.fromfile(args.input, dtype='int16')

+elif args.input.endswith('.wav'):

+    fs, signal = wavfile.read(args.input)

+else:

+    raise ValueError(f'unknown input signal format: {args.input}')

+# fill up last frame with zeros

+padded_signal_length = len(signal) + total_delay

+tail = padded_signal_length % frame_size

+right_padding = (frame_size - tail) % frame_size

+signal = np.concatenate((np.zeros(total_delay, dtype=np.int16), signal, np.zeros(right_padding, dtype=np.int16)))

+padded_signal_file  = os.path.splitext(args.input)[0] + '_padded.raw'

+signal.tofile(padded_signal_file)

+# write signal and call dump_data to create features

+feature_file = os.path.splitext(args.input)[0] + '_features.f32'

+command = f"{args.dump_data} -test {padded_signal_file} {feature_file}"

+r = subprocess.run(command, shell=True)

+if r.returncode != 0:

+    raise RuntimeError(f"command '{command}' failed with exit code {r.returncode}")

+# load features

+nb_features = model.feature_dim + lpc_order

+nb_used_features = model.feature_dim

+# load features

+features = np.fromfile(feature_file, dtype='float32')

+num_subframes = len(features) // nb_features

+num_subframes = 2 * (num_subframes // 2)

+num_frames = num_subframes // 2

+features = np.reshape(features, (1, -1, nb_features))

+features = features[:, :, :nb_used_features]

+features = features[:, :num_subframes, :]

+# quant_ids in reverse decoding order

+quant_ids = torch.round((args.q1 + (args.q0 - args.q1) * torch.arange(args.num_redundancy_frames // 2) / (args.num_redundancy_frames // 2 - 1))).long()

+print(f"using quantization levels {quant_ids}...")

+# convert input to torch tensors

+features = torch.from_numpy(features)

+# run encoder

+print("running fec encoder...")

+with torch.no_grad():

+    # encoding

+    z, states, state_size = model.encode(features)

+    # decoder on packet chunks

+    input_length = args.num_redundancy_frames // 2

+    offset = args.num_redundancy_frames - 1

+    packets = []

+    packet_sizes = []

+    for i in range(offset, num_frames):

+        print(f"processing frame {i - offset}...")

+        # quantize / unquantize latent vectors

+        zi = torch.clone(z[:, i - 2 * input_length + 2: i + 1 : 2, :])

+        zi, rates = model.quantize(zi, quant_ids)

+        zi = model.unquantize(zi, quant_ids)

+        features = model.decode(zi, states[:, i : i + 1, :])

+        packets.append(features.squeeze(0).numpy())

+        packet_size = 8 * int((torch.sum(rates) + 7 + state_size) / 8)

+        packet_sizes.append(packet_size)

+# write packets

+packet_file = args.output + '.fec' if not args.output.endswith('.fec') else args.output

+write_fec_packets(packet_file, packets, packet_sizes)

+print(f"average redundancy rate: {int(round(sum(packet_sizes) / len(packet_sizes) * 50 / 1000))} kbps")

+# assemble features according to loss file

+if args.lossfile != None:

+    num_packets = len(packets)

+    loss = np.loadtxt(args.lossfile, dtype='int16')

+    fec_out = np.zeros((num_packets * 2, packets[0].shape[-1]), dtype='float32')

+    foffset = -2

+    ptr = 0

+    count = 2

+    for i in range(num_packets):

+        if (loss[i] == 0) or (i == num_packets - 1):

+            fec_out[ptr:ptr+count,:] = packets[i][foffset:, :]

+            ptr    += count

+            foffset = -2

+            count   = 2

+        else:

+            count   += 2

+            foffset -= 2

+    fec_out_full = np.zeros((fec_out.shape[0], 36), dtype=np.float32)

+    fec_out_full[:, : fec_out.shape[-1]] = fec_out

+    fec_out_full.tofile(packet_file[:-4] + f'_fec.f32')

+if args.debug_output:

+    import itertools

+    batches = [4]

+    offsets = [0, 2 * args.num_redundancy_frames - 4]

+    # sanity checks

+    # 1. concatenate features at offset 0

+    for batch, offset in itertools.product(batches, offsets):

+        stop = packets[0].shape[1] - offset

+        test_features = np.concatenate([packet[stop - batch: stop, :] for packet in packets[::batch//2]], axis=0)

+        test_features_full = np.zeros((test_features.shape[0], nb_features), dtype=np.float32)

+        test_features_full[:, :nb_used_features] = test_features[:, :]

+        print(f"writing debug output {packet_file[:-4] + f'_torch_batch{batch}_offset{offset}.f32'}")

+        test_features_full.tofile(packet_file[:-4] + f'_torch_batch{batch}_offset{offset}.f32')

--- /dev/null

+++ b/dnn/torch/rdovae/import_rdovae_weights.py

@@ -1,0 +1,143 @@

+"""

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+"""

+import os

+os.environ['CUDA_VISIBLE_DEVICES'] = ""

+import argparse

+parser = argparse.ArgumentParser()

+parser.add_argument('exchange_folder', type=str, help='exchange folder path')

+parser.add_argument('output', type=str, help='path to output model checkpoint')

+model_group = parser.add_argument_group(title="model parameters")

+model_group.add_argument('--num-features', type=int, help="number of features, default: 20", default=20)

+model_group.add_argument('--latent-dim', type=int, help="number of symbols produces by encoder, default: 80", default=80)

+model_group.add_argument('--cond-size', type=int, help="first conditioning size, default: 256", default=256)

+model_group.add_argument('--cond-size2', type=int, help="second conditioning size, default: 256", default=256)

+model_group.add_argument('--state-dim', type=int, help="dimensionality of transfered state, default: 24", default=24)

+model_group.add_argument('--quant-levels', type=int, help="number of quantization levels, default: 40", default=40)

+args = parser.parse_args()

+import torch

+from rdovae import RDOVAE

+from wexchange.torch import load_torch_weights

+exchange_name_to_name = {

+    'encoder_stack_layer1_dense'    : 'core_encoder.module.dense_1',

+    'encoder_stack_layer3_dense'    : 'core_encoder.module.dense_2',

+    'encoder_stack_layer5_dense'    : 'core_encoder.module.dense_3',

+    'encoder_stack_layer7_dense'    : 'core_encoder.module.dense_4',

+    'encoder_stack_layer8_dense'    : 'core_encoder.module.dense_5',

+    'encoder_state_layer1_dense'    : 'core_encoder.module.state_dense_1',

+    'encoder_state_layer2_dense'    : 'core_encoder.module.state_dense_2',

+    'encoder_stack_layer2_gru'      : 'core_encoder.module.gru_1',

+    'encoder_stack_layer4_gru'      : 'core_encoder.module.gru_2',

+    'encoder_stack_layer6_gru'      : 'core_encoder.module.gru_3',

+    'encoder_stack_layer9_conv'     : 'core_encoder.module.conv1',

+    'statistical_model_embedding'   : 'statistical_model.quant_embedding',

+    'decoder_state1_dense'          : 'core_decoder.module.gru_1_init',

+    'decoder_state2_dense'          : 'core_decoder.module.gru_2_init',

+    'decoder_state3_dense'          : 'core_decoder.module.gru_3_init',

+    'decoder_stack_layer1_dense'    : 'core_decoder.module.dense_1',

+    'decoder_stack_layer3_dense'    : 'core_decoder.module.dense_2',

+    'decoder_stack_layer5_dense'    : 'core_decoder.module.dense_3',

+    'decoder_stack_layer7_dense'    : 'core_decoder.module.dense_4',

+    'decoder_stack_layer8_dense'    : 'core_decoder.module.dense_5',

+    'decoder_stack_layer9_dense'    : 'core_decoder.module.output',

+    'decoder_stack_layer2_gru'      : 'core_decoder.module.gru_1',

+    'decoder_stack_layer4_gru'      : 'core_decoder.module.gru_2',

+    'decoder_stack_layer6_gru'      : 'core_decoder.module.gru_3'

+}

+if __name__ == "__main__":

+    checkpoint = dict()

+    # parameters

+    num_features    = args.num_features

+    latent_dim      = args.latent_dim

+    quant_levels    = args.quant_levels

+    cond_size       = args.cond_size

+    cond_size2      = args.cond_size2

+    state_dim       = args.state_dim

+    # model

+    checkpoint['model_args']    = (num_features, latent_dim, quant_levels, cond_size, cond_size2)

+    checkpoint['model_kwargs']  = {'state_dim': state_dim}

+    model = RDOVAE(*checkpoint['model_args'], **checkpoint['model_kwargs'])

+    dense_layer_names = [

+        'encoder_stack_layer1_dense',

+        'encoder_stack_layer3_dense',

+        'encoder_stack_layer5_dense',

+        'encoder_stack_layer7_dense',

+        'encoder_stack_layer8_dense',

+        'encoder_state_layer1_dense',

+        'encoder_state_layer2_dense',

+        'decoder_state1_dense',

+        'decoder_state2_dense',

+        'decoder_state3_dense',

+        'decoder_stack_layer1_dense',

+        'decoder_stack_layer3_dense',

+        'decoder_stack_layer5_dense',

+        'decoder_stack_layer7_dense',

+        'decoder_stack_layer8_dense',

+        'decoder_stack_layer9_dense'

+    ]

+    gru_layer_names = [

+        'encoder_stack_layer2_gru',

+        'encoder_stack_layer4_gru',

+        'encoder_stack_layer6_gru',

+        'decoder_stack_layer2_gru',

+        'decoder_stack_layer4_gru',

+        'decoder_stack_layer6_gru'

+    ]

+    conv1d_layer_names = [

+        'encoder_stack_layer9_conv'

+    ]

+    embedding_layer_names = [

+        'statistical_model_embedding'

+    ]

+    for name in dense_layer_names + gru_layer_names + conv1d_layer_names + embedding_layer_names:

+        print(f"loading weights for layer {exchange_name_to_name[name]}")

+        layer = model.get_submodule(exchange_name_to_name[name])

+        load_torch_weights(os.path.join(args.exchange_folder, name), layer)

+    checkpoint['state_dict'] = model.state_dict()

+    torch.save(checkpoint, args.output)

\ No newline at end of file

binary files /dev/null b/dnn/torch/rdovae/libs/wexchange-1.0-py3-none-any.whl differ

binary files /dev/null b/dnn/torch/rdovae/libs/wexchange-1.2-py3-none-any.whl differ

--- /dev/null

+++ b/dnn/torch/rdovae/packets/__init__.py

@@ -1,0 +1,1 @@

+from .fec_packets import write_fec_packets, read_fec_packets

\ No newline at end of file

--- /dev/null

+++ b/dnn/torch/rdovae/packets/fec_packets.c

@@ -1,0 +1,142 @@

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#include <stdio.h>

+#include <inttypes.h>

+#include "fec_packets.h"

+int get_fec_frame(const char * const filename, float *features, int packet_index, int subframe_index)

+{

+    int16_t version;

+    int16_t header_size;

+    int16_t num_packets;

+    int16_t packet_size;

+    int16_t subframe_size;

+    int16_t subframes_per_packet;

+    int16_t num_features;

+    long offset;

+    FILE *fid = fopen(filename, "rb");

+    /* read header */

+    if (fread(&version, sizeof(version), 1, fid) != 1) goto error;

+    if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;

+    if (fread(&num_packets, sizeof(num_packets), 1, fid) != 1) goto error;

+    if (fread(&packet_size, sizeof(packet_size), 1, fid) != 1) goto error;

+    if (fread(&subframe_size, sizeof(subframe_size), 1, fid) != 1) goto error;

+    if (fread(&subframes_per_packet, sizeof(subframes_per_packet), 1, fid) != 1) goto error;

+    if (fread(&num_features, sizeof(num_features), 1, fid) != 1) goto error;

+    /* check if indices are valid */

+    if (packet_index >= num_packets || subframe_index >= subframes_per_packet)

+    {

+        fprintf(stderr, "get_fec_frame: index out of bounds\n");

+        goto error;

+    }

+    /* calculate offset in file (+ 2 is for rate) */

+    offset = header_size + packet_index * packet_size + 2 + subframe_index * subframe_size;

+    fseek(fid, offset, SEEK_SET);

+    /* read features */

+    if (fread(features, sizeof(*features), num_features, fid) != num_features) goto error;

+    fclose(fid);

+    return 0;

+error:

+    fclose(fid);

+    return 1;

+}

+int get_fec_rate(const char * const filename, int packet_index)

+{

+    int16_t version;

+    int16_t header_size;

+    int16_t num_packets;

+    int16_t packet_size;

+    int16_t subframe_size;

+    int16_t subframes_per_packet;

+    int16_t num_features;

+    long offset;

+    int16_t rate;

+    FILE *fid = fopen(filename, "rb");

+    /* read header */

+    if (fread(&version, sizeof(version), 1, fid) != 1) goto error;

+    if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;

+    if (fread(&num_packets, sizeof(num_packets), 1, fid) != 1) goto error;

+    if (fread(&packet_size, sizeof(packet_size), 1, fid) != 1) goto error;

+    if (fread(&subframe_size, sizeof(subframe_size), 1, fid) != 1) goto error;

+    if (fread(&subframes_per_packet, sizeof(subframes_per_packet), 1, fid) != 1) goto error;

+    if (fread(&num_features, sizeof(num_features), 1, fid) != 1) goto error;

+    /* check if indices are valid */

+    if (packet_index >= num_packets)

+    {

+        fprintf(stderr, "get_fec_rate: index out of bounds\n");

+        goto error;

+    }

+    /* calculate offset in file (+ 2 is for rate) */

+    offset = header_size + packet_index * packet_size;

+    fseek(fid, offset, SEEK_SET);

+    /* read rate */

+    if (fread(&rate, sizeof(rate), 1, fid) != 1) goto error;

+    fclose(fid);

+    return (int) rate;

+error:

+    fclose(fid);

+    return -1;

+}

+#if 0

+int main()

+{

+    float features[20];

+    int i;

+    if (get_fec_frame("../test.fec", &features[0], 0, 127))

+    {

+        return 1;

+    }

+    for (i = 0; i < 20; i ++)

+    {

+        printf("%d %f\n", i, features[i]);

+    }

+    printf("rate: %d\n", get_fec_rate("../test.fec", 0));

+}

+#endif

\ No newline at end of file

--- /dev/null

+++ b/dnn/torch/rdovae/packets/fec_packets.h

@@ -1,0 +1,34 @@

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef _FEC_PACKETS_H

+#define _FEC_PACKETS_H

+int get_fec_frame(const char * const filename, float *features, int packet_index, int subframe_index);

+int get_fec_rate(const char * const filename, int packet_index);

+#endif

\ No newline at end of file

--- /dev/null

+++ b/dnn/torch/rdovae/packets/fec_packets.py

@@ -1,0 +1,108 @@

+"""

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+"""

+import numpy as np

+def write_fec_packets(filename, packets, rates=None):

+    """ writes packets in binary format """

+    assert np.dtype(np.float32).itemsize == 4

+    assert np.dtype(np.int16).itemsize == 2

+    # derive some sizes

+    num_packets             = len(packets)

+    subframes_per_packet    = packets[0].shape[-2]

+    num_features            = packets[0].shape[-1]

+    # size of float is 4

+    subframe_size           = num_features * 4

+    packet_size             = subframe_size * subframes_per_packet + 2 # two bytes for rate

+    version = 1

+    # header size (version, header_size, num_packets, packet_size, subframe_size, subrames_per_packet, num_features)

+    header_size = 14

+    with open(filename, 'wb') as f:

+        # header

+        f.write(np.int16(version).tobytes())

+        f.write(np.int16(header_size).tobytes())

+        f.write(np.int16(num_packets).tobytes())

+        f.write(np.int16(packet_size).tobytes())

+        f.write(np.int16(subframe_size).tobytes())

+        f.write(np.int16(subframes_per_packet).tobytes())

+        f.write(np.int16(num_features).tobytes())

+        # packets

+        for i, packet in enumerate(packets):

+            if type(rates) == type(None):

+                rate = 0

+            else:

+                rate = rates[i]

+            f.write(np.int16(rate).tobytes())

+            features = np.flip(packet, axis=-2)

+            f.write(features.astype(np.float32).tobytes())

+def read_fec_packets(filename):

+    """ reads packets from binary format """

+    assert np.dtype(np.float32).itemsize == 4

+    assert np.dtype(np.int16).itemsize == 2

+    with open(filename, 'rb') as f:

+        # header

+        version                 = np.frombuffer(f.read(2), dtype=np.int16).item()

+        header_size             = np.frombuffer(f.read(2), dtype=np.int16).item()

+        num_packets             = np.frombuffer(f.read(2), dtype=np.int16).item()

+        packet_size             = np.frombuffer(f.read(2), dtype=np.int16).item()

+        subframe_size           = np.frombuffer(f.read(2), dtype=np.int16).item()

+        subframes_per_packet    = np.frombuffer(f.read(2), dtype=np.int16).item()

+        num_features            = np.frombuffer(f.read(2), dtype=np.int16).item()

+        dummy_features          = np.zeros((subframes_per_packet, num_features), dtype=np.float32)

+        # packets

+        rates = []

+        packets = []

+        for i in range(num_packets):

+            rate = np.frombuffer(f.read(2), dtype=np.int16).item

+            rates.append(rate)

+            features = np.reshape(np.frombuffer(f.read(subframe_size * subframes_per_packet), dtype=np.float32), dummy_features.shape)

+            packet = np.flip(features, axis=-2)

+            packets.append(packet)

+    return packets

\ No newline at end of file

--- /dev/null

+++ b/dnn/torch/rdovae/rdovae/__init__.py

@@ -1,0 +1,2 @@

+from .rdovae import RDOVAE, distortion_loss, hard_rate_estimate, soft_rate_estimate

+from .dataset import RDOVAEDataset

--- /dev/null

+++ b/dnn/torch/rdovae/rdovae/dataset.py

@@ -1,0 +1,68 @@

+"""

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+"""

+import torch

+import numpy as np

+class RDOVAEDataset(torch.utils.data.Dataset):

+    def __init__(self,

+                feature_file,

+                sequence_length,

+                num_used_features=20,

+                num_features=36,

+                lambda_min=0.0002,

+                lambda_max=0.0135,

+                quant_levels=16,

+                enc_stride=2):

+        self.sequence_length = sequence_length

+        self.lambda_min = lambda_min

+        self.lambda_max = lambda_max

+        self.enc_stride = enc_stride

+        self.quant_levels = quant_levels

+        self.denominator = (quant_levels - 1) / np.log(lambda_max / lambda_min)

+        if sequence_length % enc_stride:

+            raise ValueError(f"RDOVAEDataset.__init__: enc_stride {enc_stride} does not divide sequence length {sequence_length}")

+        self.features = np.reshape(np.fromfile(feature_file, dtype=np.float32), (-1, num_features))

+        self.features = self.features[:, :num_used_features]

+        self.num_sequences = self.features.shape[0] // sequence_length

+    def __len__(self):

+        return self.num_sequences

+    def __getitem__(self, index):

+        features = self.features[index * self.sequence_length: (index + 1) * self.sequence_length, :]

+        q_ids = np.random.randint(0, self.quant_levels, (1)).astype(np.int64)

+        q_ids = np.repeat(q_ids, self.sequence_length // self.enc_stride, axis=0)

+        rate_lambda = self.lambda_min * np.exp(q_ids.astype(np.float32) / self.denominator).astype(np.float32)

+        return features, rate_lambda, q_ids

--- /dev/null

+++ b/dnn/torch/rdovae/rdovae/rdovae.py

@@ -1,0 +1,614 @@

+"""

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+"""

+""" Pytorch implementations of rate distortion optimized variational autoencoder """

+import math as m

+import torch

+from torch import nn

+import torch.nn.functional as F

+# Quantization and rate related utily functions

+def soft_pvq(x, k):

+    """ soft pyramid vector quantizer """

+    # L2 normalization

+    x_norm2 = x / (1e-15 + torch.norm(x, dim=-1, keepdim=True))

+    with torch.no_grad():

+        # quantization loop, no need to track gradients here

+        x_norm1 = x / torch.sum(torch.abs(x), dim=-1, keepdim=True)

+        # set initial scaling factor to k

+        scale_factor = k

+        x_scaled = scale_factor * x_norm1

+        x_quant = torch.round(x_scaled)

+        # we aim for ||x_quant||_L1 = k

+        for _ in range(10):

+            # remove signs and calculate L1 norm

+            abs_x_quant = torch.abs(x_quant)

+            abs_x_scaled = torch.abs(x_scaled)

+            l1_x_quant = torch.sum(abs_x_quant, axis=-1)

+            # increase, where target is too small and decrease, where target is too large

+            plus  = 1.0001 * torch.min((abs_x_quant + 0.5) / (abs_x_scaled + 1e-15), dim=-1).values

+            minus = 0.9999 * torch.max((abs_x_quant - 0.5) / (abs_x_scaled + 1e-15), dim=-1).values

+            factor = torch.where(l1_x_quant > k, minus, plus)

+            factor = torch.where(l1_x_quant == k, torch.ones_like(factor), factor)

+            scale_factor = scale_factor * factor.unsqueeze(-1)

+            # update x

+            x_scaled = scale_factor * x_norm1

+            x_quant = torch.round(x_quant)

+    # L2 normalization of quantized x

+    x_quant_norm2 = x_quant / (1e-15 + torch.norm(x_quant, dim=-1, keepdim=True))

+    quantization_error = x_quant_norm2 - x_norm2

+    return x_norm2 + quantization_error.detach()

+def cache_parameters(func):

+    cache = dict()

+    def cached_func(*args):

+        if args in cache:

+            return cache[args]

+        else:

+            cache[args] = func(*args)

+        return cache[args]

+    return cached_func

+@cache_parameters

+def pvq_codebook_size(n, k):

+    if k == 0:

+        return 1

+    if n == 0:

+        return 0

+    return pvq_codebook_size(n - 1, k) + pvq_codebook_size(n, k - 1) + pvq_codebook_size(n - 1, k - 1)

+def soft_rate_estimate(z, r, reduce=True):

+    """ rate approximation with dependent theta Eq. (7)"""

+    rate = torch.sum(

+        - torch.log2((1 - r)/(1 + r) * r ** torch.abs(z) + 1e-6),

+        dim=-1

+    )

+    if reduce:

+        rate = torch.mean(rate)

+    return rate

+def hard_rate_estimate(z, r, theta, reduce=True):

+    """ hard rate approximation """

+    z_q = torch.round(z)

+    p0 = 1 - r ** (0.5 + 0.5 * theta)

+    alpha = torch.relu(1 - torch.abs(z_q)) ** 2

+    rate = - torch.sum(

+        (alpha * torch.log2(p0 * r ** torch.abs(z_q) + 1e-6)

+        + (1 - alpha) * torch.log2(0.5 * (1 - p0) * (1 - r) * r ** (torch.abs(z_q) - 1) + 1e-6)),

+        dim=-1

+    )

+    if reduce:

+        rate = torch.mean(rate)

+    return rate

+def soft_dead_zone(x, dead_zone):

+    """ approximates application of a dead zone to x """

+    d = dead_zone * 0.05

+    return x - d * torch.tanh(x / (0.1 + d))

+def hard_quantize(x):

+    """ round with copy gradient trick """

+    return x + (torch.round(x) - x).detach()

+def noise_quantize(x):

+    """ simulates quantization with addition of random uniform noise """

+    return x + (torch.rand_like(x) - 0.5)

+# loss functions

+def distortion_loss(y_true, y_pred, rate_lambda=None):

+    """ custom distortion loss for LPCNet features """

+    if y_true.size(-1) != 20:

+        raise ValueError('distortion loss is designed to work with 20 features')

+    ceps_error   = y_pred[..., :18] - y_true[..., :18]

+    pitch_error  = 2 * (y_pred[..., 18:19] - y_true[..., 18:19]) / (2 + y_true[..., 18:19])

+    corr_error   = y_pred[..., 19:] - y_true[..., 19:]

+    pitch_weight = torch.relu(y_true[..., 19:] + 0.5) ** 2

+    loss = torch.mean(ceps_error ** 2 + (10/18) * torch.abs(pitch_error) * pitch_weight + (1/18) * corr_error ** 2, dim=-1)

+    if type(rate_lambda) != type(None):

+        loss = loss / torch.sqrt(rate_lambda)

+    loss = torch.mean(loss)

+    return loss

+# sampling functions

+import random

+def random_split(start, stop, num_splits=3, min_len=3):

+    get_min_len = lambda x : min([x[i+1] - x[i] for i in range(len(x) - 1)])

+    candidate = [start] + sorted([random.randint(start, stop-1) for i in range(num_splits)]) + [stop]

+    while get_min_len(candidate) < min_len:

+        candidate = [start] + sorted([random.randint(start, stop-1) for i in range(num_splits)]) + [stop]

+    return candidate

+# weight initialization and clipping

+def init_weights(module):

+    if isinstance(module, nn.GRU):

+        for p in module.named_parameters():

+            if p[0].startswith('weight_hh_'):

+                nn.init.orthogonal_(p[1])

+def weight_clip_factory(max_value):

+    """ weight clipping function concerning sum of abs values of adjecent weights """

+    def clip_weight_(w):

+        stop = w.size(1)

+        # omit last column if stop is odd

+        if stop % 2:

+            stop -= 1

+        max_values = max_value * torch.ones_like(w[:, :stop])

+        factor = max_value / torch.maximum(max_values,

+                                 torch.repeat_interleave(

+                                     torch.abs(w[:, :stop:2]) + torch.abs(w[:, 1:stop:2]),

+                                     2,

+                                     1))

+        with torch.no_grad():

+            w[:, :stop] *= factor

+    def clip_weights(module):

+        if isinstance(module, nn.GRU) or isinstance(module, nn.Linear):

+            for name, w in module.named_parameters():

+                if name.startswith('weight'):

+                    clip_weight_(w)

+    return clip_weights

+# RDOVAE module and submodules

+class CoreEncoder(nn.Module):

+    STATE_HIDDEN = 128

+    FRAMES_PER_STEP = 2

+    CONV_KERNEL_SIZE = 4

+    def __init__(self, feature_dim, output_dim, cond_size, cond_size2, state_size=24):

+        """ core encoder for RDOVAE

+            Computes latents, initial states, and rate estimates from features and lambda parameter

+        """

+        super(CoreEncoder, self).__init__()

+        # hyper parameters

+        self.feature_dim        = feature_dim

+        self.output_dim         = output_dim

+        self.cond_size          = cond_size

+        self.cond_size2         = cond_size2

+        self.state_size         = state_size

+        # derived parameters

+        self.input_dim = self.FRAMES_PER_STEP * self.feature_dim

+        self.conv_input_channels =  5 * cond_size + 3 * cond_size2

+        # layers

+        self.dense_1 = nn.Linear(self.input_dim, self.cond_size2)

+        self.gru_1   = nn.GRU(self.cond_size2, self.cond_size, batch_first=True)

+        self.dense_2 = nn.Linear(self.cond_size, self.cond_size2)

+        self.gru_2   = nn.GRU(self.cond_size2, self.cond_size, batch_first=True)

+        self.dense_3 = nn.Linear(self.cond_size, self.cond_size2)

+        self.gru_3   = nn.GRU(self.cond_size2, self.cond_size, batch_first=True)

+        self.dense_4 = nn.Linear(self.cond_size, self.cond_size)

+        self.dense_5 = nn.Linear(self.cond_size, self.cond_size)

+        self.conv1   = nn.Conv1d(self.conv_input_channels, self.output_dim, kernel_size=self.CONV_KERNEL_SIZE, padding='valid')

+        self.state_dense_1 = nn.Linear(self.conv_input_channels, self.STATE_HIDDEN)

+        self.state_dense_2 = nn.Linear(self.STATE_HIDDEN, self.state_size)

+        # initialize weights

+        self.apply(init_weights)

+    def forward(self, features):

+        # reshape features

+        x = torch.reshape(features, (features.size(0), features.size(1) // self.FRAMES_PER_STEP, self.FRAMES_PER_STEP * features.size(2)))

+        batch = x.size(0)

+        device = x.device

+        # run encoding layer stack

+        x1      = torch.tanh(self.dense_1(x))

+        x2, _   = self.gru_1(x1, torch.zeros((1, batch, self.cond_size)).to(device))

+        x3      = torch.tanh(self.dense_2(x2))

+        x4, _   = self.gru_2(x3, torch.zeros((1, batch, self.cond_size)).to(device))

+        x5      = torch.tanh(self.dense_3(x4))

+        x6, _   = self.gru_3(x5, torch.zeros((1, batch, self.cond_size)).to(device))

+        x7      = torch.tanh(self.dense_4(x6))

+        x8      = torch.tanh(self.dense_5(x7))

+        # concatenation of all hidden layer outputs

+        x9 = torch.cat((x1, x2, x3, x4, x5, x6, x7, x8), dim=-1)

+        # init state for decoder

+        states = torch.tanh(self.state_dense_1(x9))

+        states = torch.tanh(self.state_dense_2(states))

+        # latent representation via convolution

+        x9 = F.pad(x9.permute(0, 2, 1), [self.CONV_KERNEL_SIZE - 1, 0])

+        z = self.conv1(x9).permute(0, 2, 1)

+        return z, states

+class CoreDecoder(nn.Module):

+    FRAMES_PER_STEP = 4

+    def __init__(self, input_dim, output_dim, cond_size, cond_size2, state_size=24):

+        """ core decoder for RDOVAE

+            Computes features from latents, initial state, and quantization index

+        """

+        super(CoreDecoder, self).__init__()

+        # hyper parameters

+        self.input_dim  = input_dim

+        self.output_dim = output_dim

+        self.cond_size  = cond_size

+        self.cond_size2 = cond_size2

+        self.state_size = state_size

+        self.input_size = self.input_dim

+        self.concat_size = 4 * self.cond_size + 4 * self.cond_size2

+        # layers

+        self.dense_1    = nn.Linear(self.input_size, cond_size2)

+        self.gru_1      = nn.GRU(cond_size2, cond_size, batch_first=True)

+        self.dense_2    = nn.Linear(cond_size, cond_size2)

+        self.gru_2      = nn.GRU(cond_size2, cond_size, batch_first=True)

+        self.dense_3    = nn.Linear(cond_size, cond_size2)

+        self.gru_3      = nn.GRU(cond_size2, cond_size, batch_first=True)

+        self.dense_4    = nn.Linear(cond_size, cond_size2)

+        self.dense_5    = nn.Linear(cond_size2, cond_size2)

+        self.output  = nn.Linear(self.concat_size, self.FRAMES_PER_STEP * self.output_dim)

+        self.gru_1_init = nn.Linear(self.state_size, self.cond_size)

+        self.gru_2_init = nn.Linear(self.state_size, self.cond_size)

+        self.gru_3_init = nn.Linear(self.state_size, self.cond_size)

+        # initialize weights

+        self.apply(init_weights)

+    def forward(self, z, initial_state):

+        gru_1_state = torch.tanh(self.gru_1_init(initial_state).permute(1, 0, 2))

+        gru_2_state = torch.tanh(self.gru_2_init(initial_state).permute(1, 0, 2))

+        gru_3_state = torch.tanh(self.gru_3_init(initial_state).permute(1, 0, 2))

+        # run decoding layer stack

+        x1  = torch.tanh(self.dense_1(z))

+        x2, _ = self.gru_1(x1, gru_1_state)

+        x3  = torch.tanh(self.dense_2(x2))

+        x4, _ = self.gru_2(x3, gru_2_state)

+        x5  = torch.tanh(self.dense_3(x4))

+        x6, _ = self.gru_3(x5, gru_3_state)

+        x7  = torch.tanh(self.dense_4(x6))

+        x8  = torch.tanh(self.dense_5(x7))

+        x9 = torch.cat((x1, x2, x3, x4, x5, x6, x7, x8), dim=-1)

+        # output layer and reshaping

+        x10 = self.output(x9)

+        features = torch.reshape(x10, (x10.size(0), x10.size(1) * self.FRAMES_PER_STEP, x10.size(2) // self.FRAMES_PER_STEP))

+        return features

+class StatisticalModel(nn.Module):

+    def __init__(self, quant_levels, latent_dim):

+        """ Statistical model for latent space

+            Computes scaling, deadzone, r, and theta

+        """

+        super(StatisticalModel, self).__init__()

+        # copy parameters

+        self.latent_dim     = latent_dim

+        self.quant_levels   = quant_levels

+        self.embedding_dim  = 6 * latent_dim

+        # quantization embedding

+        self.quant_embedding    = nn.Embedding(quant_levels, self.embedding_dim)

+        # initialize embedding to 0

+        with torch.no_grad():

+            self.quant_embedding.weight[:] = 0

+    def forward(self, quant_ids):

+        """ takes quant_ids and returns statistical model parameters"""

+        x = self.quant_embedding(quant_ids)

+        # CAVE: theta_soft is not used anymore. Kick it out?

+        quant_scale = F.softplus(x[..., 0 * self.latent_dim : 1 * self.latent_dim])

+        dead_zone   = F.softplus(x[..., 1 * self.latent_dim : 2 * self.latent_dim])

+        theta_soft  = torch.sigmoid(x[..., 2 * self.latent_dim : 3 * self.latent_dim])

+        r_soft      = torch.sigmoid(x[..., 3 * self.latent_dim : 4 * self.latent_dim])

+        theta_hard  = torch.sigmoid(x[..., 4 * self.latent_dim : 5 * self.latent_dim])

+        r_hard      = torch.sigmoid(x[..., 5 * self.latent_dim : 6 * self.latent_dim])

+        return {

+            'quant_embedding'   : x,

+            'quant_scale'       : quant_scale,

+            'dead_zone'         : dead_zone,

+            'r_hard'            : r_hard,

+            'theta_hard'        : theta_hard,

+            'r_soft'            : r_soft,

+            'theta_soft'        : theta_soft

+        }

+class RDOVAE(nn.Module):

+    def __init__(self,

+                 feature_dim,

+                 latent_dim,

+                 quant_levels,

+                 cond_size,

+                 cond_size2,

+                 state_dim=24,

+                 split_mode='split',

+                 clip_weights=True,

+                 pvq_num_pulses=82,

+                 state_dropout_rate=0):

+        super(RDOVAE, self).__init__()

+        self.feature_dim    = feature_dim

+        self.latent_dim     = latent_dim

+        self.quant_levels   = quant_levels

+        self.cond_size      = cond_size

+        self.cond_size2     = cond_size2

+        self.split_mode     = split_mode

+        self.state_dim      = state_dim

+        self.pvq_num_pulses = pvq_num_pulses

+        self.state_dropout_rate = state_dropout_rate

+        # submodules encoder and decoder share the statistical model

+        self.statistical_model = StatisticalModel(quant_levels, latent_dim)

+        self.core_encoder = nn.DataParallel(CoreEncoder(feature_dim, latent_dim, cond_size, cond_size2, state_size=state_dim))

+        self.core_decoder = nn.DataParallel(CoreDecoder(latent_dim, feature_dim, cond_size, cond_size2, state_size=state_dim))

+        self.enc_stride = CoreEncoder.FRAMES_PER_STEP

+        self.dec_stride = CoreDecoder.FRAMES_PER_STEP

+        if clip_weights:

+            self.weight_clip_fn = weight_clip_factory(0.496)

+        else:

+            self.weight_clip_fn = None

+        if self.dec_stride % self.enc_stride != 0:

+            raise ValueError(f"get_decoder_chunks_generic: encoder stride does not divide decoder stride")

+    def clip_weights(self):

+        if not type(self.weight_clip_fn) == type(None):

+            self.apply(self.weight_clip_fn)

+    def get_decoder_chunks(self, z_frames, mode='split', chunks_per_offset = 4):

+        enc_stride = self.enc_stride

+        dec_stride = self.dec_stride

+        stride = dec_stride // enc_stride

+        chunks = []

+        for offset in range(stride):

+            # start is the smalles number = offset mod stride that decodes to a valid range

+            start = offset

+            while enc_stride * (start + 1) - dec_stride < 0:

+                start += stride

+            # check if start is a valid index

+            if start >= z_frames:

+                raise ValueError("get_decoder_chunks_generic: range too small")

+            # stop is the smallest number outside [0, num_enc_frames] that's congruent to offset mod stride

+            stop = z_frames - (z_frames % stride) + offset

+            while stop < z_frames:

+                stop += stride

+            # calculate split points

+            length = (stop - start)

+            if mode == 'split':

+                split_points = [start + stride * int(i * length / chunks_per_offset / stride) for i in range(chunks_per_offset)] + [stop]

+            elif mode == 'random_split':

+                split_points = [stride * x + start for x in random_split(0, (stop - start)//stride - 1, chunks_per_offset - 1, 1)]

+            else:

+                raise ValueError(f"get_decoder_chunks_generic: unknown mode {mode}")

+            for i in range(chunks_per_offset):

+                # (enc_frame_start, enc_frame_stop, enc_frame_stride, stride, feature_frame_start, feature_frame_stop)

+                # encoder range(i, j, stride) maps to feature range(enc_stride * (i + 1) - dec_stride, enc_stride * j)

+                # provided that i - j = 1 mod stride

+                chunks.append({

+                    'z_start'         : split_points[i],

+                    'z_stop'          : split_points[i + 1] - stride + 1,

+                    'z_stride'        : stride,

+                    'features_start'  : enc_stride * (split_points[i] + 1) - dec_stride,

+                    'features_stop'   : enc_stride * (split_points[i + 1] - stride + 1)

+                })

+        return chunks

+    def forward(self, features, q_id):

+        # calculate statistical model from quantization ID

+        statistical_model = self.statistical_model(q_id)

+        # run encoder

+        z, states = self.core_encoder(features)

+        # scaling, dead-zone and quantization

+        z = z * statistical_model['quant_scale']

+        z = soft_dead_zone(z, statistical_model['dead_zone'])

+        # quantization

+        z_q = hard_quantize(z) / statistical_model['quant_scale']

+        z_n = noise_quantize(z) / statistical_model['quant_scale']

+        states_q = soft_pvq(states, self.pvq_num_pulses)

+        if self.state_dropout_rate > 0:

+            drop = torch.rand(states_q.size(0)) < self.state_dropout_rate

+            mask = torch.ones_like(states_q)

+            mask[drop] = 0

+            states_q = states_q * mask

+        # decoder

+        chunks = self.get_decoder_chunks(z.size(1), mode=self.split_mode)

+        outputs_hq = []

+        outputs_sq = []

+        for chunk in chunks:

+            # decoder with hard quantized input

+            z_dec_reverse       = torch.flip(z_q[..., chunk['z_start'] : chunk['z_stop'] : chunk['z_stride'], :], [1])

+            dec_initial_state   = states_q[..., chunk['z_stop'] - 1 : chunk['z_stop'], :]

+            features_reverse = self.core_decoder(z_dec_reverse,  dec_initial_state)

+            outputs_hq.append((torch.flip(features_reverse, [1]), chunk['features_start'], chunk['features_stop']))

+            # decoder with soft quantized input

+            z_dec_reverse       = torch.flip(z_n[..., chunk['z_start'] : chunk['z_stop'] : chunk['z_stride'], :],  [1])

+            features_reverse    = self.core_decoder(z_dec_reverse, dec_initial_state)

+            outputs_sq.append((torch.flip(features_reverse, [1]), chunk['features_start'], chunk['features_stop']))

+        return {

+            'outputs_hard_quant' : outputs_hq,

+            'outputs_soft_quant' : outputs_sq,

+            'z'                 : z,

+            'statistical_model' : statistical_model

+        }

+    def encode(self, features):

+        """ encoder with quantization and rate estimation """

+        z, states = self.core_encoder(features)

+        # quantization of initial states

+        states = soft_pvq(states, self.pvq_num_pulses)

+        state_size = m.log2(pvq_codebook_size(self.state_dim, self.pvq_num_pulses))

+        return z, states, state_size

+    def decode(self, z, initial_state):

+        """ decoder (flips sequences by itself) """

+        z_reverse       = torch.flip(z, [1])

+        features_reverse = self.core_decoder(z_reverse, initial_state)

+        features = torch.flip(features_reverse, [1])

+        return features

+    def quantize(self, z, q_ids):

+        """ quantization of latent vectors """

+        stats = self.statistical_model(q_ids)

+        zq = z * stats['quant_scale']

+        zq = soft_dead_zone(zq, stats['dead_zone'])

+        zq = torch.round(zq)

+        sizes = hard_rate_estimate(zq, stats['r_hard'], stats['theta_hard'], reduce=False)

+        return zq, sizes

+    def unquantize(self, zq, q_ids):

+        """ re-scaling of latent vector """

+        stats = self.statistical_model(q_ids)

+        z = zq / stats['quant_scale']

+        return z

+    def freeze_model(self):

+        # freeze all parameters

+        for p in self.parameters():

+            p.requires_grad = False

+        for p in self.statistical_model.parameters():

+            p.requires_grad = True

--- /dev/null

+++ b/dnn/torch/rdovae/requirements.txt

@@ -1,0 +1,5 @@

+numpy

+scipy

+torch

+tqdm

+libs/wexchange-1.2-py3-none-any.whl

\ No newline at end of file

--- /dev/null

+++ b/dnn/torch/rdovae/train_rdovae.py

@@ -1,0 +1,270 @@

+"""

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+"""

+import os

+import argparse

+import torch

+import tqdm

+from rdovae import RDOVAE, RDOVAEDataset, distortion_loss, hard_rate_estimate, soft_rate_estimate

+parser = argparse.ArgumentParser()

+parser.add_argument('features', type=str, help='path to feature file in .f32 format')

+parser.add_argument('output', type=str, help='path to output folder')

+parser.add_argument('--cuda-visible-devices', type=str, help="comma separates list of cuda visible device indices, default: ''", default="")

+model_group = parser.add_argument_group(title="model parameters")

+model_group.add_argument('--latent-dim', type=int, help="number of symbols produces by encoder, default: 80", default=80)

+model_group.add_argument('--cond-size', type=int, help="first conditioning size, default: 256", default=256)

+model_group.add_argument('--cond-size2', type=int, help="second conditioning size, default: 256", default=256)

+model_group.add_argument('--state-dim', type=int, help="dimensionality of transfered state, default: 24", default=24)

+model_group.add_argument('--quant-levels', type=int, help="number of quantization levels, default: 16", default=16)

+model_group.add_argument('--lambda-min', type=float, help="minimal value for rate lambda, default: 0.0002", default=2e-4)

+model_group.add_argument('--lambda-max', type=float, help="maximal value for rate lambda, default: 0.0104", default=0.0104)

+model_group.add_argument('--pvq-num-pulses', type=int, help="number of pulses for PVQ, default: 82", default=82)

+model_group.add_argument('--state-dropout-rate', type=float, help="state dropout rate, default: 0", default=0.0)

+training_group = parser.add_argument_group(title="training parameters")

+training_group.add_argument('--batch-size', type=int, help="batch size, default: 32", default=32)

+training_group.add_argument('--lr', type=float, help='learning rate, default: 3e-4', default=3e-4)

+training_group.add_argument('--epochs', type=int, help='number of training epochs, default: 100', default=100)

+training_group.add_argument('--sequence-length', type=int, help='sequence length, needs to be divisible by 4, default: 256', default=256)

+training_group.add_argument('--lr-decay-factor', type=float, help='learning rate decay factor, default: 2.5e-5', default=2.5e-5)

+training_group.add_argument('--split-mode', type=str, choices=['split', 'random_split'], help='splitting mode for decoder input, default: split', default='split')

+training_group.add_argument('--enable-first-frame-loss', action='store_true', default=False, help='enables dedicated distortion loss on first 4 decoder frames')

+training_group.add_argument('--initial-checkpoint', type=str, help='initial checkpoint to start training from, default: None', default=None)

+training_group.add_argument('--train-decoder-only', action='store_true', help='freeze encoder and statistical model and train decoder only')

+args = parser.parse_args()

+# set visible devices

+os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda_visible_devices

+# checkpoints

+checkpoint_dir = os.path.join(args.output, 'checkpoints')

+checkpoint = dict()

+os.makedirs(checkpoint_dir, exist_ok=True)

+# training parameters

+batch_size = args.batch_size

+lr = args.lr

+epochs = args.epochs

+sequence_length = args.sequence_length

+lr_decay_factor = args.lr_decay_factor

+split_mode = args.split_mode

+# not exposed

+adam_betas = [0.9, 0.99]

+adam_eps = 1e-8

+checkpoint['batch_size'] = batch_size

+checkpoint['lr'] = lr

+checkpoint['lr_decay_factor'] = lr_decay_factor

+checkpoint['split_mode'] = split_mode

+checkpoint['epochs'] = epochs

+checkpoint['sequence_length'] = sequence_length

+checkpoint['adam_betas'] = adam_betas

+# logging

+log_interval = 10

+# device

+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

+# model parameters

+cond_size  = args.cond_size

+cond_size2 = args.cond_size2

+latent_dim = args.latent_dim

+quant_levels = args.quant_levels

+lambda_min = args.lambda_min

+lambda_max = args.lambda_max

+state_dim = args.state_dim

+# not expsed

+num_features = 20

+# training data

+feature_file = args.features

+# model

+checkpoint['model_args']    = (num_features, latent_dim, quant_levels, cond_size, cond_size2)

+checkpoint['model_kwargs']  = {'state_dim': state_dim, 'split_mode' : split_mode, 'pvq_num_pulses': args.pvq_num_pulses, 'state_dropout_rate': args.state_dropout_rate}

+model = RDOVAE(*checkpoint['model_args'], **checkpoint['model_kwargs'])

+if type(args.initial_checkpoint) != type(None):

+    checkpoint = torch.load(args.initial_checkpoint, map_location='cpu')

+    model.load_state_dict(checkpoint['state_dict'], strict=False)

+checkpoint['state_dict']    = model.state_dict()

+if args.train_decoder_only:

+    if args.initial_checkpoint is None:

+        print("warning: training decoder only without providing initial checkpoint")

+    for p in model.core_encoder.module.parameters():

+        p.requires_grad = False

+    for p in model.statistical_model.parameters():

+        p.requires_grad = False

+# dataloader

+checkpoint['dataset_args'] = (feature_file, sequence_length, num_features, 36)

+checkpoint['dataset_kwargs'] = {'lambda_min': lambda_min, 'lambda_max': lambda_max, 'enc_stride': model.enc_stride, 'quant_levels': quant_levels}

+dataset = RDOVAEDataset(*checkpoint['dataset_args'], **checkpoint['dataset_kwargs'])

+dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4)

+# optimizer

+params = [p for p in model.parameters() if p.requires_grad]

+optimizer = torch.optim.Adam(params, lr=lr, betas=adam_betas, eps=adam_eps)

+# learning rate scheduler

+scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lambda x : 1 / (1 + lr_decay_factor * x))

+if __name__ == '__main__':

+    # push model to device

+    model.to(device)

+    # training loop

+    for epoch in range(1, epochs + 1):

+        print(f"training epoch {epoch}...")

+        # running stats

+        running_rate_loss       = 0

+        running_soft_dist_loss  = 0

+        running_hard_dist_loss  = 0

+        running_hard_rate_loss  = 0

+        running_soft_rate_loss  = 0

+        running_total_loss      = 0

+        running_rate_metric     = 0

+        previous_total_loss     = 0

+        running_first_frame_loss = 0

+        with tqdm.tqdm(dataloader, unit='batch') as tepoch:

+            for i, (features, rate_lambda, q_ids) in enumerate(tepoch):

+                # zero out gradients

+                optimizer.zero_grad()

+                # push inputs to device

+                features    = features.to(device)

+                q_ids       = q_ids.to(device)

+                rate_lambda = rate_lambda.to(device)

+                rate_lambda_upsamp = torch.repeat_interleave(rate_lambda, 2, 1)

+                # run model

+                model_output = model(features, q_ids)

+                # collect outputs

+                z                   = model_output['z']

+                outputs_hard_quant  = model_output['outputs_hard_quant']

+                outputs_soft_quant  = model_output['outputs_soft_quant']

+                statistical_model   = model_output['statistical_model']

+                # rate loss

+                hard_rate = hard_rate_estimate(z, statistical_model['r_hard'], statistical_model['theta_hard'], reduce=False)

+                soft_rate = soft_rate_estimate(z, statistical_model['r_soft'], reduce=False)

+                soft_rate_loss = torch.mean(torch.sqrt(rate_lambda) * soft_rate)

+                hard_rate_loss = torch.mean(torch.sqrt(rate_lambda) * hard_rate)

+                rate_loss = (soft_rate_loss + 0.1 * hard_rate_loss)

+                hard_rate_metric = torch.mean(hard_rate)

+                ## distortion losses

+                # hard quantized decoder input

+                distortion_loss_hard_quant = torch.zeros_like(rate_loss)

+                for dec_features, start, stop in outputs_hard_quant:

+                    distortion_loss_hard_quant += distortion_loss(features[..., start : stop, :], dec_features, rate_lambda_upsamp[..., start : stop]) / len(outputs_hard_quant)

+                first_frame_loss = torch.zeros_like(rate_loss)

+                for dec_features, start, stop in outputs_hard_quant:

+                    first_frame_loss += distortion_loss(features[..., stop-4 : stop, :], dec_features[..., -4:, :], rate_lambda_upsamp[..., stop - 4 : stop]) / len(outputs_hard_quant)

+                # soft quantized decoder input

+                distortion_loss_soft_quant = torch.zeros_like(rate_loss)

+                for dec_features, start, stop in outputs_soft_quant:

+                    distortion_loss_soft_quant += distortion_loss(features[..., start : stop, :], dec_features, rate_lambda_upsamp[..., start : stop]) / len(outputs_soft_quant)

+                # total loss

+                total_loss = rate_loss + (distortion_loss_hard_quant + distortion_loss_soft_quant) / 2

+                if args.enable_first_frame_loss:

+                    total_loss = total_loss + 0.5 * torch.relu(first_frame_loss - distortion_loss_hard_quant)

+                total_loss.backward()

+                optimizer.step()

+                model.clip_weights()

+                scheduler.step()

+                # collect running stats

+                running_hard_dist_loss  += float(distortion_loss_hard_quant.detach().cpu())

+                running_soft_dist_loss  += float(distortion_loss_soft_quant.detach().cpu())

+                running_rate_loss       += float(rate_loss.detach().cpu())

+                running_rate_metric     += float(hard_rate_metric.detach().cpu())

+                running_total_loss      += float(total_loss.detach().cpu())

+                running_first_frame_loss += float(first_frame_loss.detach().cpu())

+                running_soft_rate_loss += float(soft_rate_loss.detach().cpu())

+                running_hard_rate_loss += float(hard_rate_loss.detach().cpu())

+                if (i + 1) % log_interval == 0:

+                    current_loss = (running_total_loss - previous_total_loss) / log_interval

+                    tepoch.set_postfix(

+                        current_loss=current_loss,

+                        total_loss=running_total_loss / (i + 1),

+                        dist_hq=running_hard_dist_loss / (i + 1),

+                        dist_sq=running_soft_dist_loss / (i + 1),

+                        rate_loss=running_rate_loss / (i + 1),

+                        rate=running_rate_metric / (i + 1),

+                        ffloss=running_first_frame_loss / (i + 1),

+                        rateloss_hard=running_hard_rate_loss / (i + 1),

+                        rateloss_soft=running_soft_rate_loss / (i + 1)

+                    )

+                    previous_total_loss = running_total_loss

+        # save checkpoint

+        checkpoint_path = os.path.join(checkpoint_dir, f'checkpoint_epoch_{epoch}.pth')

+        checkpoint['state_dict'] = model.state_dict()

+        checkpoint['loss'] = running_total_loss / len(dataloader)

+        checkpoint['epoch'] = epoch

+        torch.save(checkpoint, checkpoint_path)

--- /dev/null

+++ b/dnn/training_tf2/decode_rdovae.py

@@ -1,0 +1,111 @@

+#!/usr/bin/python3

+'''Copyright (c) 2021-2022 Amazon

+   Copyright (c) 2018-2019 Mozilla

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR

+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+'''

+# Train an LPCNet model

+import argparse

+#from plc_loader import PLCLoader

+parser = argparse.ArgumentParser(description='Train a PLC model')

+parser.add_argument('bits', metavar='<bits file>', help='binary features file (int16)')

+parser.add_argument('output', metavar='<output>', help='output features')

+parser.add_argument('--model', metavar='<model>', default='rdovae', help='PLC model python definition (without .py)')

+group1 = parser.add_mutually_exclusive_group()

+group1.add_argument('--weights', metavar='<input weights>', help='model weights')

+parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')

+parser.add_argument('--batch-size', metavar='<batch size>', default=1, type=int, help='batch size to use (default 128)')

+parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')

+args = parser.parse_args()

+import importlib

+rdovae = importlib.import_module(args.model)

+import sys

+import numpy as np

+from tensorflow.keras.optimizers import Adam

+from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger

+import tensorflow.keras.backend as K

+import h5py

+import tensorflow as tf

+from rdovae import pvq_quantize

+from rdovae import apply_dead_zone

+# Try reducing batch_size if you run out of memory on your GPU

+batch_size = args.batch_size

+model, encoder, decoder, qembedding = rdovae.new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=batch_size, cond_size=args.cond_size)

+model.load_weights(args.weights)

+lpc_order = 16

+nbits=80

+bits_file = args.bits

+sequence_size = args.seq_length

+# u for unquantised, load 16 bit PCM samples and convert to mu-law

+bits = np.memmap(bits_file + "-syms.f32", dtype='float32', mode='r')

+nb_sequences = len(bits)//(40*sequence_size)//batch_size*batch_size

+bits = bits[:nb_sequences*sequence_size*40]

+bits = np.reshape(bits, (nb_sequences, sequence_size//2, 20*4))

+print(bits.shape)

+lambda_val = 0.001 * np.ones((nb_sequences, sequence_size//2, 1))

+quant_id = np.round(3.8*np.log(lambda_val/.0002)).astype('int16')

+quant_id = quant_id[:,:,0]

+quant_embed = qembedding(quant_id)

+quant_scale = tf.math.softplus(quant_embed[:,:,:nbits])

+dead_zone = tf.math.softplus(quant_embed[:, :, nbits : 2 * nbits])

+bits = bits*quant_scale

+bits = np.round(apply_dead_zone([bits, dead_zone]).numpy())

+bits = bits/quant_scale

+state = np.memmap(bits_file + "-state.f32", dtype='float32', mode='r')

+state = np.reshape(state, (nb_sequences, sequence_size//2, 24))

+state = state[:,-1,:]

+state = pvq_quantize(state, 82)

+#state = state/(1e-15+tf.norm(state, axis=-1,keepdims=True))

+print("shapes are:")

+print(bits.shape)

+print(state.shape)

+bits = bits[:,1::2,:]

+features = decoder.predict([bits, state], batch_size=batch_size)

+features.astype('float32').tofile(args.output)

--- a/dnn/training_tf2/dump_lpcnet.py

+++ b/dnn/training_tf2/dump_lpcnet.py

@@ -26,6 +26,7 @@

'''

 import os

+import io

 import lpcnet

 import sys

 import numpy as np

@@ -52,11 +53,17 @@

 max_mdense_tmp = 1

 def printVector(f, vector, name, dtype='float', dotp=False):

+    global array_list

     if dotp:

         vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))

         vector = vector.transpose((2, 0, 3, 1))

     v = np.reshape(vector, (-1));

     #print('static const float ', name, '[', len(v), '] = \n', file=f)

+    if name not in array_list:

+        array_list.append(name)

+    f.write('#ifndef USE_WEIGHTS_FILE\n')

+    f.write('#define WEIGHTS_{}_DEFINED\n'.format(name))

+    f.write('#define WEIGHTS_{}_TYPE WEIGHT_TYPE_{}\n'.format(name, dtype))

     f.write('static const {} {}[{}] = {{\n   '.format(dtype, name, len(v)))

     for i in range(0, len(v)):

         f.write('{}'.format(v[i]))

@@ -69,7 +76,8 @@

         else:

             f.write(" ")

     #print(v, file=f)

-    f.write('\n};\n\n')

+    f.write('\n};\n')

+    f.write('#endif\n\n')

     return;

 def printSparseVector(f, A, name, have_diag=True):

@@ -133,11 +141,11 @@

         reset_after = 1

     neurons = weights[0].shape[1]//3

     max_rnn_neurons = max(max_rnn_neurons, neurons)

-    f.write('const SparseGRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_recurrent_weights_diag,\n   {}_recurrent_weights,\n   {}_recurrent_weights_idx,\n   {}, ACTIVATION_{}, {}\n}};\n\n'

-            .format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))

     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

     hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

-    hf.write('extern const SparseGRULayer {};\n\n'.format(name));

+    model_struct.write('  SparseGRULayer {};\n'.format(name));

+    model_init.write('  if (sparse_gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_recurrent_weights_diag", "{}_recurrent_weights", "{}_recurrent_weights_idx",  {}, ACTIVATION_{}, {})) return 1;\n'

+            .format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))

     return True

 def dump_grub(self, f, hf, gru_a_size):

@@ -169,9 +177,9 @@

         reset_after = 1

     neurons = weights[0].shape[1]//3

     max_rnn_neurons = max(max_rnn_neurons, neurons)

-    f.write('const GRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_weights,\n   {}_weights_idx,\n   {}_recurrent_weights,\n   {}, {}, ACTIVATION_{}, {}\n}};\n\n'

+    model_struct.write('  GRULayer {};\n'.format(name));

+    model_init.write('  if (gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_weights", "{}_weights_idx", "{}_recurrent_weights", {}, {}, ACTIVATION_{}, {})) return 1;\n'

             .format(name, name, name, name, name, name, gru_a_size, weights[0].shape[1]//3, activation, reset_after))

-    hf.write('extern const GRULayer {};\n\n'.format(name));

     return True

 def dump_gru_layer_dummy(self, f, hf):

@@ -186,10 +194,10 @@

 def dump_dense_layer_impl(name, weights, bias, activation, f, hf):

     printVector(f, weights, name + '_weights')

     printVector(f, bias, name + '_bias')

-    f.write('const DenseLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, ACTIVATION_{}\n}};\n\n'

-            .format(name, name, name, weights.shape[0], weights.shape[1], activation))

     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))

-    hf.write('extern const DenseLayer {};\n\n'.format(name));

+    model_struct.write('  DenseLayer {};\n'.format(name));

+    model_init.write('  if (dense_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, ACTIVATION_{})) return 1;\n'

+            .format(name, name, name, weights.shape[0], weights.shape[1], activation))

 def dump_dense_layer(self, f, hf):

     name = self.name

@@ -211,10 +219,10 @@

     printVector(f, np.transpose(weights[2], (1, 0)), name + '_factor')

     activation = self.activation.__name__.upper()

     max_mdense_tmp = max(max_mdense_tmp, weights[0].shape[0]*weights[0].shape[2])

-    f.write('const MDenseLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}_factor,\n   {}, {}, {}, ACTIVATION_{}\n}};\n\n'

-            .format(name, name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))

     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[0]))

-    hf.write('extern const MDenseLayer {};\n\n'.format(name));

+    model_struct.write('  MDenseLayer {};\n'.format(name));

+    model_init.write('  if (mdense_init(&model->{}, arrays, "{}_bias",  "{}_weights",  "{}_factor",  {}, {}, {}, ACTIVATION_{})) return 1;\n'

+            .format(name, name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))

     return False

 MDense.dump_layer = dump_mdense_layer

@@ -227,12 +235,12 @@

     printVector(f, weights[-1], name + '_bias')

     activation = self.activation.__name__.upper()

     max_conv_inputs = max(max_conv_inputs, weights[0].shape[1]*weights[0].shape[0])

-    f.write('const Conv1DLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, {}, ACTIVATION_{}\n}};\n\n'

-            .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))

     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))

     hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))

     hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))

-    hf.write('extern const Conv1DLayer {};\n\n'.format(name));

+    model_struct.write('  Conv1DLayer {};\n'.format(name));

+    model_init.write('  if (conv1d_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, {}, ACTIVATION_{})) return 1;\n'

+            .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))

     return True

 Conv1D.dump_layer = dump_conv1d_layer

@@ -239,10 +247,10 @@

 def dump_embedding_layer_impl(name, weights, f, hf):

     printVector(f, weights, name + '_weights')

-    f.write('const EmbeddingLayer {} = {{\n   {}_weights,\n   {}, {}\n}};\n\n'

-            .format(name, name, weights.shape[0], weights.shape[1]))

     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))

-    hf.write('extern const EmbeddingLayer {};\n\n'.format(name));

+    model_struct.write('  EmbeddingLayer {};\n'.format(name));

+    model_init.write('  if (embedding_init(&model->{}, arrays, "{}_weights", {}, {})) return 1;\n'

+            .format(name, name, weights.shape[0], weights.shape[1]))

 def dump_embedding_layer(self, f, hf):

     name = self.name

@@ -281,6 +289,12 @@

     f = open(cfile, 'w')

     hf = open(hfile, 'w')

+    model_struct = io.StringIO()

+    model_init = io.StringIO()

+    model_struct.write('typedef struct {\n')

+    model_init.write('#ifndef DUMP_BINARY_WEIGHTS\n')

+    model_init.write('int init_lpcnet_model(LPCNetModel *model, const WeightArray *arrays) {\n')

+    array_list = []

     f.write('/*This file is automatically generated from a Keras model*/\n')

     f.write('/*based on model {}*/\n\n'.format(sys.argv[1]))

@@ -326,13 +340,13 @@

     W = model.get_layer('gru_a').get_weights()[0][3*embed_size:,:]

     #FIXME: dump only half the biases

     b = model.get_layer('gru_a').get_weights()[2]

-    dump_dense_layer_impl('gru_a_dense_feature', W, b, 'LINEAR', f, hf)

+    dump_dense_layer_impl('gru_a_dense_feature', W, b[:len(b)//2], 'LINEAR', f, hf)

     W = model.get_layer('gru_b').get_weights()[0][model.rnn_units1:,:]

     b = model.get_layer('gru_b').get_weights()[2]

     # Set biases to zero because they'll be included in the GRU input part

     # (we need regular and SU biases)

-    dump_dense_layer_impl('gru_b_dense_feature', W, 0*b, 'LINEAR', f, hf)

+    dump_dense_layer_impl('gru_b_dense_feature', W, 0*b[:len(b)//2], 'LINEAR', f, hf)

     dump_grub(model.get_layer('gru_b'), f, hf, model.rnn_units1)

     layer_list = []

@@ -342,6 +356,19 @@

     dump_sparse_gru(model.get_layer('gru_a'), f, hf)

+    f.write('#ifndef USE_WEIGHTS_FILE\n')

+    f.write('const WeightArray lpcnet_arrays[] = {\n')

+    for name in array_list:

+        f.write('#ifdef WEIGHTS_{}_DEFINED\n'.format(name))

+        f.write('  {{"{}", WEIGHTS_{}_TYPE, sizeof({}), {}}},\n'.format(name, name, name, name))

+        f.write('#endif\n')

+    f.write('  {NULL, 0, 0, NULL}\n};\n')

+    f.write('#endif\n')

+    model_init.write('  return 0;\n}\n')

+    model_init.write('#endif\n')

+    f.write(model_init.getvalue())

     hf.write('#define MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons))

     hf.write('#define MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs))

     hf.write('#define MAX_MDENSE_TMP {}\n\n'.format(max_mdense_tmp))

@@ -350,8 +377,11 @@

     hf.write('typedef struct {\n')

     for i, name in enumerate(layer_list):

         hf.write('  float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper()))

-    hf.write('} NNetState;\n')

+    hf.write('} NNetState;\n\n')

+    model_struct.write('} LPCNetModel;\n\n')

+    hf.write(model_struct.getvalue())

+    hf.write('int init_lpcnet_model(LPCNetModel *model, const WeightArray *arrays);\n\n')

     hf.write('\n\n#endif\n')

     f.close()

--- a/dnn/training_tf2/dump_plc.py

+++ b/dnn/training_tf2/dump_plc.py

@@ -27,6 +27,7 @@

'''

 import lpcnet_plc

+import io

 import sys

 import numpy as np

 from tensorflow.keras.optimizers import Adam

@@ -41,11 +42,17 @@

 max_conv_inputs = 1

 def printVector(f, vector, name, dtype='float', dotp=False):

+    global array_list

     if dotp:

         vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))

         vector = vector.transpose((2, 0, 3, 1))

     v = np.reshape(vector, (-1));

     #print('static const float ', name, '[', len(v), '] = \n', file=f)

+    if name not in array_list:

+        array_list.append(name)

+    f.write('#ifndef USE_WEIGHTS_FILE\n')

+    f.write('#define WEIGHTS_{}_DEFINED\n'.format(name))

+    f.write('#define WEIGHTS_{}_TYPE WEIGHT_TYPE_{}\n'.format(name, dtype))

     f.write('static const {} {}[{}] = {{\n   '.format(dtype, name, len(v)))

     for i in range(0, len(v)):

         f.write('{}'.format(v[i]))

@@ -58,7 +65,8 @@

         else:

             f.write(" ")

     #print(v, file=f)

-    f.write('\n};\n\n')

+    f.write('\n};\n')

+    f.write('#endif\n\n')

     return;

 def printSparseVector(f, A, name, have_diag=True):

@@ -122,11 +130,11 @@

         reset_after = 1

     neurons = weights[0].shape[1]//3

     max_rnn_neurons = max(max_rnn_neurons, neurons)

-    f.write('const SparseGRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_recurrent_weights_diag,\n   {}_recurrent_weights,\n   {}_recurrent_weights_idx,\n   {}, ACTIVATION_{}, {}\n}};\n\n'

-            .format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))

     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

     hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

-    hf.write('extern const SparseGRULayer {};\n\n'.format(name));

+    model_struct.write('  SparseGRULayer {};\n'.format(name));

+    model_init.write('  if (sparse_gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_recurrent_weights_diag", "{}_recurrent_weights", "{}_recurrent_weights_idx",  {}, ACTIVATION_{}, {})) return 1;\n'

+            .format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))

     return True

 def dump_gru_layer(self, f, hf):

@@ -158,11 +166,11 @@

         reset_after = 1

     neurons = weights[0].shape[1]//3

     max_rnn_neurons = max(max_rnn_neurons, neurons)

-    f.write('const GRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_weights,\n   {}_weights_idx,\n   {}_recurrent_weights,\n   {}, {}, ACTIVATION_{}, {}\n}};\n\n'

-            .format(name, name, name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))

     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

     hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

-    hf.write('extern const GRULayer {};\n\n'.format(name));

+    model_struct.write('  GRULayer {};\n'.format(name));

+    model_init.write('  if (gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_weights", "{}_weights_idx", "{}_recurrent_weights", {}, {}, ACTIVATION_{}, {})) return 1;\n'

+             .format(name, name, name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))

     return True

 GRU.dump_layer = dump_gru_layer

@@ -178,10 +186,10 @@

 def dump_dense_layer_impl(name, weights, bias, activation, f, hf):

     printVector(f, weights, name + '_weights')

     printVector(f, bias, name + '_bias')

-    f.write('const DenseLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, ACTIVATION_{}\n}};\n\n'

-            .format(name, name, name, weights.shape[0], weights.shape[1], activation))

     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))

-    hf.write('extern const DenseLayer {};\n\n'.format(name));

+    model_struct.write('  DenseLayer {};\n'.format(name));

+    model_init.write('  if (dense_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, ACTIVATION_{})) return 1;\n'

+            .format(name, name, name, weights.shape[0], weights.shape[1], activation))

 def dump_dense_layer(self, f, hf):

     name = self.name

@@ -202,12 +210,12 @@

     printVector(f, weights[-1], name + '_bias')

     activation = self.activation.__name__.upper()

     max_conv_inputs = max(max_conv_inputs, weights[0].shape[1]*weights[0].shape[0])

-    f.write('const Conv1DLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, {}, ACTIVATION_{}\n}};\n\n'

-            .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))

     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))

     hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))

     hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))

-    hf.write('extern const Conv1DLayer {};\n\n'.format(name));

+    model_struct.write('  Conv1DLayer {};\n'.format(name));

+    model_init.write('  if (conv1d_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, {}, ACTIVATION_{})) return 1;\n'

+            .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))

     return True

 Conv1D.dump_layer = dump_conv1d_layer

@@ -235,6 +243,12 @@

 f = open(cfile, 'w')

 hf = open(hfile, 'w')

+model_struct = io.StringIO()

+model_init = io.StringIO()

+model_struct.write('typedef struct {\n')

+model_init.write('#ifndef DUMP_BINARY_WEIGHTS\n')

+model_init.write('int init_plc_model(PLCModel *model, const WeightArray *arrays) {\n')

+array_list = []

 f.write('/*This file is automatically generated from a Keras model*/\n')

@@ -250,7 +264,20 @@

         layer_list.append(layer.name)

 #dump_sparse_gru(model.get_layer('gru_a'), f, hf)

+f.write('#ifndef USE_WEIGHTS_FILE\n')

+f.write('const WeightArray lpcnet_plc_arrays[] = {\n')

+for name in array_list:

+    f.write('#ifdef WEIGHTS_{}_DEFINED\n'.format(name))

+    f.write('  {{"{}", WEIGHTS_{}_TYPE, sizeof({}), {}}},\n'.format(name, name, name, name))

+    f.write('#endif\n')

+f.write('  {NULL, 0, 0, NULL}\n};\n')

+f.write('#endif\n')

+model_init.write('  return 0;\n}\n')

+model_init.write('#endif\n')

+f.write(model_init.getvalue())

 hf.write('#define PLC_MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons))

 #hf.write('#define PLC_MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs))

@@ -257,7 +284,11 @@

 hf.write('typedef struct {\n')

 for i, name in enumerate(layer_list):

     hf.write('  float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper()))

-hf.write('} PLCNetState;\n')

+hf.write('} PLCNetState;\n\n')

+model_struct.write('} PLCModel;\n\n')

+hf.write(model_struct.getvalue())

+hf.write('int init_plc_model(PLCModel *model, const WeightArray *arrays);\n\n')

 hf.write('\n\n#endif\n')

--- /dev/null

+++ b/dnn/training_tf2/dump_rdovae.py

@@ -1,0 +1,306 @@

+"""

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+"""

+import argparse

+from ftplib import parse150

+import os

+os.environ['CUDA_VISIBLE_DEVICES'] = ""

+parser = argparse.ArgumentParser()

+parser.add_argument('weights', metavar="<weight file>", type=str, help='model weight file in hdf5 format')

+parser.add_argument('--cond-size', type=int, help="conditioning size (default: 256)", default=256)

+parser.add_argument('--latent-dim', type=int, help="dimension of latent space (default: 80)", default=80)

+parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 16)", default=16)

+args = parser.parse_args()

+# now import the heavy stuff

+import tensorflow as tf

+import numpy as np

+from keraslayerdump import dump_conv1d_layer, dump_dense_layer, dump_gru_layer, printVector

+from rdovae import new_rdovae_model

+def start_header(header_fid, header_name):

+    header_guard = os.path.basename(header_name)[:-2].upper() + "_H"

+    header_fid.write(

+f"""

+#ifndef {header_guard}

+#define {header_guard}

+"""

+    )

+def finish_header(header_fid):

+    header_fid.write(

+"""

+#endif

+"""

+    )

+def start_source(source_fid, header_name, weight_file):

+    source_fid.write(

+f"""

+/* this source file was automatically generated from weight file {weight_file} */

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include "{header_name}"

+"""

+    )

+def finish_source(source_fid):

+    pass

+def dump_statistical_model(qembedding, f, fh):

+    w = qembedding.weights[0].numpy()

+    levels, dim = w.shape

+    N = dim // 6

+    print("dumping statistical model")

+    quant_scales    = tf.math.softplus(w[:, : N]).numpy()

+    dead_zone       = 0.05 * tf.math.softplus(w[:, N : 2 * N]).numpy()

+    r               = tf.math.sigmoid(w[:, 5 * N : 6 * N]).numpy()

+    p0              = tf.math.sigmoid(w[:, 4 * N : 5 * N]).numpy()

+    p0              = 1 - r ** (0.5 + 0.5 * p0)

+    quant_scales_q8 = np.round(quant_scales * 2**8).astype(np.uint16)

+    dead_zone_q10   = np.round(dead_zone * 2**10).astype(np.uint16)

+    r_q15           = np.round(r * 2**15).astype(np.uint16)

+    p0_q15          = np.round(p0 * 2**15).astype(np.uint16)

+    printVector(f, quant_scales_q8, 'dred_quant_scales_q8', dtype='opus_uint16', static=False)

+    printVector(f, dead_zone_q10, 'dred_dead_zone_q10', dtype='opus_uint16', static=False)

+    printVector(f, r_q15, 'dred_r_q15', dtype='opus_uint16', static=False)

+    printVector(f, p0_q15, 'dred_p0_q15', dtype='opus_uint16', static=False)

+    fh.write(

+f"""

+extern const opus_uint16 dred_quant_scales_q8[{levels * N}];

+extern const opus_uint16 dred_dead_zone_q10[{levels * N}];

+extern const opus_uint16 dred_r_q15[{levels * N}];

+extern const opus_uint16 dred_p0_q15[{levels * N}];

+"""

+    )

+if __name__ == "__main__":

+    model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size, nb_quant=args.quant_levels)

+    model.load_weights(args.weights)

+    # encoder

+    encoder_dense_names = [

+        'enc_dense1',

+        'enc_dense3',

+        'enc_dense5',

+        'enc_dense7',

+        'enc_dense8',

+        'gdense1',

+        'gdense2'

+    ]

+    encoder_gru_names = [

+        'enc_dense2',

+        'enc_dense4',

+        'enc_dense6'

+    ]

+    encoder_conv1d_names = [

+        'bits_dense'

+    ]

+    source_fid = open("dred_rdovae_enc_data.c", 'w')

+    header_fid = open("dred_rdovae_enc_data.h", 'w')

+    start_header(header_fid, "dred_rdovae_enc_data.h")

+    start_source(source_fid, "dred_rdovae_enc_data.h", os.path.basename(args.weights))

+    header_fid.write(

+f"""

+#include "dred_rdovae_constants.h"

+#include "nnet.h"

+"""

+    )

+    # dump GRUs

+    max_rnn_neurons_enc = max(

+        [

+            dump_gru_layer(encoder.get_layer(name), source_fid, header_fid, dotp=True, sparse=True)

+            for name in encoder_gru_names

+        ]

+    )

+    # dump conv layers

+    max_conv_inputs = max(

+        [

+            dump_conv1d_layer(encoder.get_layer(name), source_fid, header_fid)

+            for name in encoder_conv1d_names

+        ]

+    )

+    # dump Dense layers

+    for name in encoder_dense_names:

+        layer = encoder.get_layer(name)

+        dump_dense_layer(layer, source_fid, header_fid)

+    # some global constants

+    header_fid.write(

+f"""

+#define DRED_ENC_MAX_RNN_NEURONS {max_rnn_neurons_enc}

+#define DRED_ENC_MAX_CONV_INPUTS {max_conv_inputs}

+"""

+    )

+    finish_header(header_fid)

+    finish_source(source_fid)

+    header_fid.close()

+    source_fid.close()

+    # statistical model

+    source_fid = open("dred_rdovae_stats_data.c", 'w')

+    header_fid = open("dred_rdovae_stats_data.h", 'w')

+    start_header(header_fid, "dred_rdovae_stats_data.h")

+    start_source(source_fid, "dred_rdovae_stats_data.h", os.path.basename(args.weights))

+    header_fid.write(

+"""

+#include "opus_types.h"

+"""

+    )

+    dump_statistical_model(qembedding, source_fid, header_fid)

+    finish_header(header_fid)

+    finish_source(source_fid)

+    header_fid.close()

+    source_fid.close()

+    # decoder

+    decoder_dense_names = [

+        'state1',

+        'state2',

+        'state3',

+        'dec_dense1',

+        'dec_dense3',

+        'dec_dense5',

+        'dec_dense7',

+        'dec_dense8',

+        'dec_final'

+    ]

+    decoder_gru_names = [

+        'dec_dense2',

+        'dec_dense4',

+        'dec_dense6'

+    ]

+    source_fid = open("dred_rdovae_dec_data.c", 'w')

+    header_fid = open("dred_rdovae_dec_data.h", 'w')

+    start_header(header_fid, "dred_rdovae_dec_data.h")

+    start_source(source_fid, "dred_rdovae_dec_data.h", os.path.basename(args.weights))

+    header_fid.write(

+f"""

+#include "dred_rdovae_constants.h"

+#include "nnet.h"

+"""

+    )

+    # dump GRUs

+    max_rnn_neurons_dec = max(

+        [

+            dump_gru_layer(decoder.get_layer(name), source_fid, header_fid, dotp=True, sparse=True)

+            for name in decoder_gru_names

+        ]

+    )

+    # dump Dense layers

+    for name in decoder_dense_names:

+        layer = decoder.get_layer(name)

+        dump_dense_layer(layer, source_fid, header_fid)

+    # some global constants

+    header_fid.write(

+f"""

+#define DRED_DEC_MAX_RNN_NEURONS {max_rnn_neurons_dec}

+"""

+    )

+    finish_header(header_fid)

+    finish_source(source_fid)

+    header_fid.close()

+    source_fid.close()

+    # common constants

+    header_fid = open("dred_rdovae_constants.h", 'w')

+    start_header(header_fid, "dred_rdovae_constants.h")

+    header_fid.write(

+f"""

+#define DRED_NUM_FEATURES 20

+#define DRED_LATENT_DIM {args.latent_dim}

+#define DRED_STATE_DIM {24}

+#define DRED_NUM_QUANTIZATION_LEVELS {qembedding.weights[0].shape[0]}

+#define DRED_MAX_RNN_NEURONS {max(max_rnn_neurons_enc, max_rnn_neurons_dec)}

+#define DRED_MAX_CONV_INPUTS {max_conv_inputs}

+"""

+    )

+    finish_header(header_fid)

\ No newline at end of file

--- /dev/null

+++ b/dnn/training_tf2/encode_rdovae.py

@@ -1,0 +1,125 @@

+#!/usr/bin/python3

+'''Copyright (c) 2021-2022 Amazon

+   Copyright (c) 2018-2019 Mozilla

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR

+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+'''

+# Train an LPCNet model

+import argparse

+#from plc_loader import PLCLoader

+parser = argparse.ArgumentParser(description='Train a PLC model')

+parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')

+parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')

+parser.add_argument('--model', metavar='<model>', default='rdovae', help='PLC model python definition (without .py)')

+group1 = parser.add_mutually_exclusive_group()

+group1.add_argument('--weights', metavar='<input weights>', help='model weights')

+parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')

+parser.add_argument('--batch-size', metavar='<batch size>', default=1, type=int, help='batch size to use (default 128)')

+parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')

+args = parser.parse_args()

+import importlib

+rdovae = importlib.import_module(args.model)

+from rdovae import apply_dead_zone

+import sys

+import numpy as np

+from tensorflow.keras.optimizers import Adam

+from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger

+import tensorflow.keras.backend as K

+import h5py

+import tensorflow as tf

+from rdovae import pvq_quantize

+# Try reducing batch_size if you run out of memory on your GPU

+batch_size = args.batch_size

+model, encoder, decoder, qembedding = rdovae.new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=batch_size, cond_size=args.cond_size)

+model.load_weights(args.weights)

+lpc_order = 16

+feature_file = args.features

+nb_features = model.nb_used_features + lpc_order

+nb_used_features = model.nb_used_features

+sequence_size = args.seq_length

+# u for unquantised, load 16 bit PCM samples and convert to mu-law

+features = np.memmap(feature_file, dtype='float32', mode='r')

+nb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size

+features = features[:nb_sequences*sequence_size*nb_features]

+features = np.reshape(features, (nb_sequences, sequence_size, nb_features))

+print(features.shape)

+features = features[:, :, :nb_used_features]

+#features = np.random.randn(73600, 1000, 17)

+bits, gru_state_dec = encoder.predict([features], batch_size=batch_size)

+(gru_state_dec).astype('float32').tofile(args.output + "-state.f32")

+#dist = rdovae.feat_dist_loss(features, quant_out)

+#rate = rdovae.sq1_rate_loss(features, model_bits)

+#rate2 = rdovae.sq_rate_metric(features, model_bits)

+#print(dist, rate, rate2)

+print("shapes are:")

+print(bits.shape)

+print(gru_state_dec.shape)

+features.astype('float32').tofile(args.output + "-input.f32")

+#quant_out.astype('float32').tofile(args.output + "-enc_dec.f32")

+nbits=80

+bits.astype('float32').tofile(args.output + "-syms.f32")

+lambda_val = 0.0002 * np.ones((nb_sequences, sequence_size//2, 1))

+quant_id = np.round(3.8*np.log(lambda_val/.0002)).astype('int16')

+quant_id = quant_id[:,:,0]

+quant_embed = qembedding(quant_id)

+quant_scale = tf.math.softplus(quant_embed[:,:,:nbits])

+dead_zone = tf.math.softplus(quant_embed[:, :, nbits : 2 * nbits])

+bits = bits*quant_scale

+bits = np.round(apply_dead_zone([bits, dead_zone]).numpy())

+bits = bits/quant_scale

+gru_state_dec = pvq_quantize(gru_state_dec, 82)

+#gru_state_dec = gru_state_dec/(1e-15+tf.norm(gru_state_dec, axis=-1,keepdims=True))

+gru_state_dec = gru_state_dec[:,-1,:]

+dec_out = decoder([bits[:,1::2,:], gru_state_dec])

+print(dec_out.shape)

+dec_out.numpy().astype('float32').tofile(args.output + "-quant_out.f32")

--- /dev/null

+++ b/dnn/training_tf2/fec_encoder.py

@@ -1,0 +1,257 @@

+"""

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe and Jean-Marc Valin */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+"""

+import os

+import subprocess

+import argparse

+import numpy as np

+from scipy.io import wavfile

+import tensorflow as tf

+from rdovae import new_rdovae_model, pvq_quantize, apply_dead_zone, sq_rate_metric

+from fec_packets import write_fec_packets, read_fec_packets

+debug = False

+if debug:

+    args = type('dummy', (object,),

+    {

+        'input' : 'item1.wav',

+        'weights' : 'testout/rdovae_alignment_fix_1024_120.h5',

+        'enc_lambda' : 0.0007,

+        'output' : "test_0007.fec",

+        'cond_size' : 1024,

+        'num_redundancy_frames' : 64,

+        'extra_delay' : 0,

+        'dump_data' : './dump_data'

+    })()

+    os.environ['CUDA_VISIBLE_DEVICES']=""

+else:

+    parser = argparse.ArgumentParser(description='Encode redundancy for Opus neural FEC. Designed for use with voip application and 20ms frames')

+    parser.add_argument('input', metavar='<input signal>', help='audio input (.wav or .raw or .pcm as int16)')

+    parser.add_argument('weights', metavar='<weights>', help='trained model file (.h5)')

+#    parser.add_argument('enc_lambda', metavar='<lambda>', type=float, help='lambda for controlling encoder rate')

+    parser.add_argument('output', type=str, help='output file (will be extended with .fec)')

+    parser.add_argument('--dump-data', type=str, default='./dump_data', help='path to dump data executable (default ./dump_data)')

+    parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')

+    parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 40)", default=40)

+    parser.add_argument('--num-redundancy-frames', default=64, type=int, help='number of redundancy frames (20ms) per packet (default 64)')

+    parser.add_argument('--extra-delay', default=0, type=int, help="last features in packet are calculated with the decoder aligned samples, use this option to add extra delay (in samples at 16kHz)")

+    parser.add_argument('--lossfile', type=str, help='file containing loss trace (0 for frame received, 1 for lost)')

+    parser.add_argument('--debug-output', action='store_true', help='if set, differently assembled features are written to disk')

+    args = parser.parse_args()

+model, encoder, decoder, qembedding = new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=1, nb_quant=args.quant_levels, cond_size=args.cond_size)

+model.load_weights(args.weights)

+lpc_order = 16

+## prepare input signal

+# SILK frame size is 20ms and LPCNet subframes are 10ms

+subframe_size = 160

+frame_size = 2 * subframe_size

+# 91 samples delay to align with SILK decoded frames

+silk_delay = 91

+# prepend zeros to have enough history to produce the first package

+zero_history = (args.num_redundancy_frames - 1) * frame_size

+# dump data has a (feature) delay of 10ms

+dump_data_delay = 160

+total_delay = silk_delay + zero_history + args.extra_delay - dump_data_delay

+# load signal

+if args.input.endswith('.raw') or args.input.endswith('.pcm') or args.input.endswith('.sw'):

+    signal = np.fromfile(args.input, dtype='int16')

+elif args.input.endswith('.wav'):

+    fs, signal = wavfile.read(args.input)

+else:

+    raise ValueError(f'unknown input signal format: {args.input}')

+# fill up last frame with zeros

+padded_signal_length = len(signal) + total_delay

+tail = padded_signal_length % frame_size

+right_padding = (frame_size - tail) % frame_size

+signal = np.concatenate((np.zeros(total_delay, dtype=np.int16), signal, np.zeros(right_padding, dtype=np.int16)))

+padded_signal_file  = os.path.splitext(args.input)[0] + '_padded.raw'

+signal.tofile(padded_signal_file)

+# write signal and call dump_data to create features

+feature_file = os.path.splitext(args.input)[0] + '_features.f32'

+command = f"{args.dump_data} -test {padded_signal_file} {feature_file}"

+r = subprocess.run(command, shell=True)

+if r.returncode != 0:

+    raise RuntimeError(f"command '{command}' failed with exit code {r.returncode}")

+# load features

+nb_features = model.nb_used_features + lpc_order

+nb_used_features = model.nb_used_features

+# load features

+features = np.fromfile(feature_file, dtype='float32')

+num_subframes = len(features) // nb_features

+num_subframes = 2 * (num_subframes // 2)

+num_frames = num_subframes // 2

+features = np.reshape(features, (1, -1, nb_features))

+features = features[:, :, :nb_used_features]

+features = features[:, :num_subframes, :]

+#variable quantizer depending on the delay

+q0 = 3

+q1 = 15

+quant_id = np.round(q1 + (q0-q1)*np.arange(args.num_redundancy_frames//2)/args.num_redundancy_frames).astype('int16')

+#print(quant_id)

+quant_embed = qembedding(quant_id)

+# run encoder

+print("running fec encoder...")

+symbols, gru_state_dec = encoder.predict(features)

+# apply quantization

+nsymbols = 80

+quant_scale = tf.math.softplus(quant_embed[:, :nsymbols]).numpy()

+dead_zone = tf.math.softplus(quant_embed[:, nsymbols : 2 * nsymbols]).numpy()

+#symbols = apply_dead_zone([symbols, dead_zone]).numpy()

+#qsymbols = np.round(symbols)

+quant_gru_state_dec = pvq_quantize(gru_state_dec, 82)

+# rate estimate

+hard_distr_embed = tf.math.sigmoid(quant_embed[:, 4 * nsymbols : ]).numpy()

+#rate_input = np.concatenate((qsymbols, hard_distr_embed, enc_lambda), axis=-1)

+#rates = sq_rate_metric(None, rate_input, reduce=False).numpy()

+# run decoder

+input_length = args.num_redundancy_frames // 2

+offset = args.num_redundancy_frames - 1

+packets = []

+packet_sizes = []

+sym_batch = np.zeros((num_frames-offset, args.num_redundancy_frames//2, nsymbols), dtype='float32')

+quant_state = quant_gru_state_dec[0, offset:num_frames, :]

+#pack symbols for batch processing

+for i in range(offset, num_frames):

+    sym_batch[i-offset, :, :] = symbols[0, i - 2 * input_length + 2 : i + 1 : 2, :]

+#quantize symbols

+sym_batch = sym_batch * quant_scale

+sym_batch = apply_dead_zone([sym_batch, dead_zone]).numpy()

+sym_batch = np.round(sym_batch)

+hard_distr_embed = np.broadcast_to(hard_distr_embed, (sym_batch.shape[0], sym_batch.shape[1], 2*sym_batch.shape[2]))

+fake_lambda = np.ones((sym_batch.shape[0], sym_batch.shape[1], 1), dtype='float32')

+rate_input = np.concatenate((sym_batch, hard_distr_embed, fake_lambda), axis=-1)

+rates = sq_rate_metric(None, rate_input, reduce=False).numpy()

+#print(rates.shape)

+print("average rate = ", np.mean(rates[args.num_redundancy_frames:,:]))

+#sym_batch.tofile('qsyms.f32')

+sym_batch = sym_batch / quant_scale

+#print(sym_batch.shape, quant_state.shape)

+#features = decoder.predict([sym_batch, quant_state])

+features = decoder([sym_batch, quant_state])

+#for i in range(offset, num_frames):

+#    print(f"processing frame {i - offset}...")

+#    features = decoder.predict([qsymbols[:, i - 2 * input_length + 2 : i + 1 : 2, :], quant_embed_dec[:, i - 2 * input_length + 2 : i + 1 : 2, :], quant_gru_state_dec[:, i, :]])

+#    packets.append(features)

+#    packet_size = 8 * int((np.sum(rates[:, i - 2 * input_length + 2 : i + 1 : 2]) + 7) / 8) + 64

+#    packet_sizes.append(packet_size)

+# write packets

+packet_file = args.output + '.fec' if not args.output.endswith('.fec') else args.output

+#write_fec_packets(packet_file, packets, packet_sizes)

+#print(f"average redundancy rate: {int(round(sum(packet_sizes) / len(packet_sizes) * 50 / 1000))} kbps")

+if args.lossfile != None:

+    loss = np.loadtxt(args.lossfile, dtype='int16')

+    fec_out = np.zeros((features.shape[0]*2, features.shape[-1]), dtype='float32')

+    foffset = -2

+    ptr = 0;

+    count = 2;

+    for i in range(features.shape[0]):

+        if (loss[i] == 0) or (i == features.shape[0]-1):

+            fec_out[ptr:ptr+count,:] = features[i, foffset:, :]

+            #print("filled ", count)

+            foffset = -2

+            ptr = ptr+count

+            count = 2

+        else:

+            count = count + 2

+            foffset = foffset - 2

+    fec_out_full = np.zeros((fec_out.shape[0], nb_features), dtype=np.float32)

+    fec_out_full[:, :nb_used_features] = fec_out

+    fec_out_full.tofile(packet_file[:-4] + f'_fec.f32')

+#create packets array like in the original version for debugging purposes

+for i in range(offset, num_frames):

+    packets.append(features[i-offset:i-offset+1, :, :])

+if args.debug_output:

+    import itertools

+    #batches = [2, 4]

+    batches = [4]

+    #offsets = [0, 4, 20]

+    offsets = [0, (args.num_redundancy_frames - 2)*2]

+    # sanity checks

+    # 1. concatenate features at offset 0

+    for batch, offset in itertools.product(batches, offsets):

+        stop = packets[0].shape[1] - offset

+        print(batch, offset, stop)

+        test_features = np.concatenate([packet[:,stop - batch: stop, :] for packet in packets[::batch//2]], axis=1)

+        test_features_full = np.zeros((test_features.shape[1], nb_features), dtype=np.float32)

+        test_features_full[:, :nb_used_features] = test_features[0, :, :]

+        print(f"writing debug output {packet_file[:-4] + f'_tf_batch{batch}_offset{offset}.f32'}")

+        test_features_full.tofile(packet_file[:-4] + f'_tf_batch{batch}_offset{offset}.f32')

--- /dev/null

+++ b/dnn/training_tf2/fec_packets.c

@@ -1,0 +1,142 @@

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#include <stdio.h>

+#include <inttypes.h>

+#include "fec_packets.h"

+int get_fec_frame(const char * const filename, float *features, int packet_index, int subframe_index)

+{

+    int16_t version;

+    int16_t header_size;

+    int16_t num_packets;

+    int16_t packet_size;

+    int16_t subframe_size;

+    int16_t subframes_per_packet;

+    int16_t num_features;

+    long offset;

+    FILE *fid = fopen(filename, "rb");

+    /* read header */

+    if (fread(&version, sizeof(version), 1, fid) != 1) goto error;

+    if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;

+    if (fread(&num_packets, sizeof(num_packets), 1, fid) != 1) goto error;

+    if (fread(&packet_size, sizeof(packet_size), 1, fid) != 1) goto error;

+    if (fread(&subframe_size, sizeof(subframe_size), 1, fid) != 1) goto error;

+    if (fread(&subframes_per_packet, sizeof(subframes_per_packet), 1, fid) != 1) goto error;

+    if (fread(&num_features, sizeof(num_features), 1, fid) != 1) goto error;

+    /* check if indices are valid */

+    if (packet_index >= num_packets || subframe_index >= subframes_per_packet)

+    {

+        fprintf(stderr, "get_fec_frame: index out of bounds\n");

+        goto error;

+    }

+    /* calculate offset in file (+ 2 is for rate) */

+    offset = header_size + packet_index * packet_size + 2 + subframe_index * subframe_size;

+    fseek(fid, offset, SEEK_SET);

+    /* read features */

+    if (fread(features, sizeof(*features), num_features, fid) != num_features) goto error;

+    fclose(fid);

+    return 0;

+error:

+    fclose(fid);

+    return 1;

+}

+int get_fec_rate(const char * const filename, int packet_index)

+{

+    int16_t version;

+    int16_t header_size;

+    int16_t num_packets;

+    int16_t packet_size;

+    int16_t subframe_size;

+    int16_t subframes_per_packet;

+    int16_t num_features;

+    long offset;

+    int16_t rate;

+    FILE *fid = fopen(filename, "rb");

+    /* read header */

+    if (fread(&version, sizeof(version), 1, fid) != 1) goto error;

+    if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;

+    if (fread(&num_packets, sizeof(num_packets), 1, fid) != 1) goto error;

+    if (fread(&packet_size, sizeof(packet_size), 1, fid) != 1) goto error;

+    if (fread(&subframe_size, sizeof(subframe_size), 1, fid) != 1) goto error;

+    if (fread(&subframes_per_packet, sizeof(subframes_per_packet), 1, fid) != 1) goto error;

+    if (fread(&num_features, sizeof(num_features), 1, fid) != 1) goto error;

+    /* check if indices are valid */

+    if (packet_index >= num_packets)

+    {

+        fprintf(stderr, "get_fec_rate: index out of bounds\n");

+        goto error;

+    }

+    /* calculate offset in file (+ 2 is for rate) */

+    offset = header_size + packet_index * packet_size;

+    fseek(fid, offset, SEEK_SET);

+    /* read rate */

+    if (fread(&rate, sizeof(rate), 1, fid) != 1) goto error;

+    fclose(fid);

+    return (int) rate;

+error:

+    fclose(fid);

+    return -1;

+}

+#if 0

+int main()

+{

+    float features[20];

+    int i;

+    if (get_fec_frame("../test.fec", &features[0], 0, 127))

+    {

+        return 1;

+    }

+    for (i = 0; i < 20; i ++)

+    {

+        printf("%d %f\n", i, features[i]);

+    }

+    printf("rate: %d\n", get_fec_rate("../test.fec", 0));

+}

+#endif

\ No newline at end of file

--- /dev/null

+++ b/dnn/training_tf2/fec_packets.h

@@ -1,0 +1,34 @@

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef _FEC_PACKETS_H

+#define _FEC_PACKETS_H

+int get_fec_frame(const char * const filename, float *features, int packet_index, int subframe_index);

+int get_fec_rate(const char * const filename, int packet_index);

+#endif

\ No newline at end of file

--- /dev/null

+++ b/dnn/training_tf2/fec_packets.py

@@ -1,0 +1,108 @@

+"""

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+"""

+import numpy as np

+def write_fec_packets(filename, packets, rates=None):

+    """ writes packets in binary format """

+    assert np.dtype(np.float32).itemsize == 4

+    assert np.dtype(np.int16).itemsize == 2

+    # derive some sizes

+    num_packets             = len(packets)

+    subframes_per_packet    = packets[0].shape[-2]

+    num_features            = packets[0].shape[-1]

+    # size of float is 4

+    subframe_size           = num_features * 4

+    packet_size             = subframe_size * subframes_per_packet + 2 # two bytes for rate

+    version = 1

+    # header size (version, header_size, num_packets, packet_size, subframe_size, subrames_per_packet, num_features)

+    header_size = 14

+    with open(filename, 'wb') as f:

+        # header

+        f.write(np.int16(version).tobytes())

+        f.write(np.int16(header_size).tobytes())

+        f.write(np.int16(num_packets).tobytes())

+        f.write(np.int16(packet_size).tobytes())

+        f.write(np.int16(subframe_size).tobytes())

+        f.write(np.int16(subframes_per_packet).tobytes())

+        f.write(np.int16(num_features).tobytes())

+        # packets

+        for i, packet in enumerate(packets):

+            if type(rates) == type(None):

+                rate = 0

+            else:

+                rate = rates[i]

+            f.write(np.int16(rate).tobytes())

+            features = np.flip(packet, axis=-2)

+            f.write(features.astype(np.float32).tobytes())

+def read_fec_packets(filename):

+    """ reads packets from binary format """

+    assert np.dtype(np.float32).itemsize == 4

+    assert np.dtype(np.int16).itemsize == 2

+    with open(filename, 'rb') as f:

+        # header

+        version                 = np.frombuffer(f.read(2), dtype=np.int16).item()

+        header_size             = np.frombuffer(f.read(2), dtype=np.int16).item()

+        num_packets             = np.frombuffer(f.read(2), dtype=np.int16).item()

+        packet_size             = np.frombuffer(f.read(2), dtype=np.int16).item()

+        subframe_size           = np.frombuffer(f.read(2), dtype=np.int16).item()

+        subframes_per_packet    = np.frombuffer(f.read(2), dtype=np.int16).item()

+        num_features            = np.frombuffer(f.read(2), dtype=np.int16).item()

+        dummy_features          = np.zeros((1, subframes_per_packet, num_features), dtype=np.float32)

+        # packets

+        rates = []

+        packets = []

+        for i in range(num_packets):

+            rate = np.frombuffer(f.read(2), dtype=np.int16).item

+            rates.append(rate)

+            features = np.reshape(np.frombuffer(f.read(subframe_size * subframes_per_packet), dtype=np.float32), dummy_features.shape)

+            packet = np.flip(features, axis=-2)

+            packets.append(packet)

+    return packets

\ No newline at end of file

--- /dev/null

+++ b/dnn/training_tf2/keraslayerdump.py

@@ -1,0 +1,189 @@

+'''Copyright (c) 2017-2018 Mozilla

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR

+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+'''

+""" helper functions for dumping some Keras layers to C files """

+import numpy as np

+def printVector(f, vector, name, dtype='float', dotp=False, static=True):

+    """ prints vector as one-dimensional C array """

+    if dotp:

+        vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))

+        vector = vector.transpose((2, 0, 3, 1))

+    v = np.reshape(vector, (-1))

+    if static:

+        f.write('static const {} {}[{}] = {{\n   '.format(dtype, name, len(v)))

+    else:

+        f.write('const {} {}[{}] = {{\n   '.format(dtype, name, len(v)))

+    for i in range(0, len(v)):

+        f.write('{}'.format(v[i]))

+        if (i!=len(v)-1):

+            f.write(',')

+        else:

+            break;

+        if (i%8==7):

+            f.write("\n   ")

+        else:

+            f.write(" ")

+    f.write('\n};\n\n')

+    return vector

+def printSparseVector(f, A, name, have_diag=True):

+    N = A.shape[0]

+    M = A.shape[1]

+    W = np.zeros((0,), dtype='int')

+    W0 = np.zeros((0,))

+    if have_diag:

+        diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])

+        A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))

+        A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))

+        A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))

+        printVector(f, diag, name + '_diag')

+    AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int')

+    idx = np.zeros((0,), dtype='int')

+    for i in range(M//8):

+        pos = idx.shape[0]

+        idx = np.append(idx, -1)

+        nb_nonzero = 0

+        for j in range(N//4):

+            block = A[j*4:(j+1)*4, i*8:(i+1)*8]

+            qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8]

+            if np.sum(np.abs(block)) > 1e-10:

+                nb_nonzero = nb_nonzero + 1

+                idx = np.append(idx, j*4)

+                vblock = qblock.transpose((1,0)).reshape((-1,))

+                W0 = np.concatenate([W0, block.reshape((-1,))])

+                W = np.concatenate([W, vblock])

+        idx[pos] = nb_nonzero

+    f.write('#ifdef DOT_PROD\n')

+    printVector(f, W, name, dtype='qweight')

+    f.write('#else /*DOT_PROD*/\n')

+    printVector(f, W0, name, dtype='qweight')

+    f.write('#endif /*DOT_PROD*/\n')

+    printVector(f, idx, name + '_idx', dtype='int')

+    return AQ

+def dump_sparse_gru(self, f, hf):

+    name = 'sparse_' + self.name

+    print("printing layer " + name + " of type sparse " + self.__class__.__name__)

+    weights = self.get_weights()

+    qweights = printSparseVector(f, weights[1], name + '_recurrent_weights')

+    printVector(f, weights[-1], name + '_bias')

+    subias = weights[-1].copy()

+    subias[1,:] = subias[1,:] - np.sum(qweights*(1./128),axis=0)

+    printVector(f, subias, name + '_subias')

+    if hasattr(self, 'activation'):

+        activation = self.activation.__name__.upper()

+    else:

+        activation = 'TANH'

+    if hasattr(self, 'reset_after') and not self.reset_after:

+        reset_after = 0

+    else:

+        reset_after = 1

+    neurons = weights[0].shape[1]//3

+    max_rnn_neurons = neurons

+    f.write('const SparseGRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_recurrent_weights_diag,\n   {}_recurrent_weights,\n   {}_recurrent_weights_idx,\n   {}, ACTIVATION_{}, {}\n}};\n\n'

+            .format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))

+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

+    hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

+    hf.write('extern const SparseGRULayer {};\n\n'.format(name));

+    return max_rnn_neurons

+def dump_gru_layer(self, f, hf, dotp=False, sparse=False):

+    name = self.name

+    print("printing layer " + name + " of type " + self.__class__.__name__)

+    weights = self.get_weights()

+    if sparse:

+        qweight = printSparseVector(f, weights[0], name + '_weights', have_diag=False)

+    else:

+        qweight = printVector(f, weights[0], name + '_weights')

+    if dotp:

+        f.write('#ifdef DOT_PROD\n')

+        qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127)

+        printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight')

+        f.write('#else /*DOT_PROD*/\n')

+    else:

+        qweight2 = weights[1]

+    printVector(f, weights[1], name + '_recurrent_weights')

+    if dotp:

+        f.write('#endif /*DOT_PROD*/\n')

+    printVector(f, weights[-1], name + '_bias')

+    subias = weights[-1].copy()

+    subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0)

+    subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0)

+    printVector(f, subias, name + '_subias')

+    if hasattr(self, 'activation'):

+        activation = self.activation.__name__.upper()

+    else:

+        activation = 'TANH'

+    if hasattr(self, 'reset_after') and not self.reset_after:

+        reset_after = 0

+    else:

+        reset_after = 1

+    neurons = weights[0].shape[1]//3

+    max_rnn_neurons = neurons

+    f.write('const GRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_weights,\n   {},\n   {}_recurrent_weights,\n   {}, {}, ACTIVATION_{}, {}\n}};\n\n'

+            .format(name, name, name, name, name + "_weights_idx" if sparse else "NULL", name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))

+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

+    hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))

+    hf.write('extern const GRULayer {};\n\n'.format(name));

+    return max_rnn_neurons

+def dump_dense_layer_impl(name, weights, bias, activation, f, hf):

+    printVector(f, weights, name + '_weights')

+    printVector(f, bias, name + '_bias')

+    f.write('const DenseLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, ACTIVATION_{}\n}};\n\n'

+            .format(name, name, name, weights.shape[0], weights.shape[1], activation))

+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))

+    hf.write('extern const DenseLayer {};\n\n'.format(name));

+def dump_dense_layer(self, f, hf):

+    name = self.name

+    print("printing layer " + name + " of type " + self.__class__.__name__)

+    weights = self.get_weights()

+    activation = self.activation.__name__.upper()

+    dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf)

+    return False

+def dump_conv1d_layer(self, f, hf):

+    name = self.name

+    print("printing layer " + name + " of type " + self.__class__.__name__)

+    weights = self.get_weights()

+    printVector(f, weights[0], name + '_weights')

+    printVector(f, weights[-1], name + '_bias')

+    activation = self.activation.__name__.upper()

+    max_conv_inputs = weights[0].shape[1]*weights[0].shape[0]

+    f.write('const Conv1DLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, {}, ACTIVATION_{}\n}};\n\n'

+            .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))

+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))

+    hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))

+    hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))

+    hf.write('extern const Conv1DLayer {};\n\n'.format(name));

+    return max_conv_inputs

--- a/dnn/training_tf2/plc_loader.py

+++ b/dnn/training_tf2/plc_loader.py

@@ -47,19 +47,25 @@

     def __getitem__(self, index):

         features = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]

-        #lost = (np.random.rand(features.shape[0], features.shape[1]) > .2).astype('float')

+        burg_lost = (np.random.rand(features.shape[0], features.shape[1]) > .1).astype('float')

+        burg_lost = np.reshape(burg_lost, (features.shape[0], features.shape[1], 1))

+        burg_mask = np.tile(burg_lost, (1,1,self.nb_burg_features))

         lost = self.lost_offset[self.lost_indices[index*self.batch_size:(index+1)*self.batch_size], :]

         lost = np.reshape(lost, (features.shape[0], features.shape[1], 1))

         lost_mask = np.tile(lost, (1,1,features.shape[2]))

         in_features = features*lost_mask

+        in_features[:,:,:self.nb_burg_features] = in_features[:,:,:self.nb_burg_features]*burg_mask

         #For the first frame after a loss, we don't have valid features, but the Burg estimate is valid.

-        in_features[:,1:,self.nb_burg_features:] = in_features[:,1:,self.nb_burg_features:]*lost_mask[:,:-1,self.nb_burg_features:]

+        #in_features[:,1:,self.nb_burg_features:] = in_features[:,1:,self.nb_burg_features:]*lost_mask[:,:-1,self.nb_burg_features:]

         out_lost = np.copy(lost)

-        out_lost[:,1:,:] = out_lost[:,1:,:]*out_lost[:,:-1,:]

+        #out_lost[:,1:,:] = out_lost[:,1:,:]*out_lost[:,:-1,:]

         out_features = np.concatenate([features[:,:,self.nb_burg_features:], 1.-out_lost], axis=-1)

-        inputs = [in_features*lost_mask, lost]

+        burg_sign = 2*burg_lost - 1

+        # last dim is 1 for received packet, 0 for lost packet, and -1 when just the Burg info is missing

+        inputs = [in_features*lost_mask, lost*burg_sign]

         outputs = [out_features]

         return (inputs, outputs)

--- /dev/null

+++ b/dnn/training_tf2/rdovae.py

@@ -1,0 +1,373 @@

+#!/usr/bin/python3

+'''Copyright (c) 2022 Amazon

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR

+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+'''

+import math

+import tensorflow as tf

+from tensorflow.keras.models import Model

+from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation, GaussianNoise, AveragePooling1D, RepeatVector

+from tensorflow.compat.v1.keras.layers import CuDNNGRU

+from tensorflow.keras import backend as K

+from tensorflow.keras.constraints import Constraint

+from tensorflow.keras.initializers import Initializer

+from tensorflow.keras.callbacks import Callback

+from tensorflow.keras.regularizers import l1

+import numpy as np

+import h5py

+from uniform_noise import UniformNoise

+class WeightClip(Constraint):

+    '''Clips the weights incident to each hidden unit to be inside a range

+    '''

+    def __init__(self, c=2):

+        self.c = c

+    def __call__(self, p):

+        # Ensure that abs of adjacent weights don't sum to more than 127. Otherwise there's a risk of

+        # saturation when implementing dot products with SSSE3 or AVX2.

+        return self.c*p/tf.maximum(self.c, tf.repeat(tf.abs(p[:, 1::2])+tf.abs(p[:, 0::2]), 2, axis=1))

+        #return K.clip(p, -self.c, self.c)

+    def get_config(self):

+        return {'name': self.__class__.__name__,

+            'c': self.c}

+constraint = WeightClip(0.496)

+def soft_quantize(x):

+    #x = 4*x

+    #x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)

+    #x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)

+    #x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)

+    return x

+def noise_quantize(x):

+    return soft_quantize(x + (K.random_uniform((128, 16, 80))-.5) )

+def hard_quantize(x):

+    x = soft_quantize(x)

+    quantized = tf.round(x)

+    return x + tf.stop_gradient(quantized - x)

+def apply_dead_zone(x):

+    d = x[1]*.05

+    x = x[0]

+    y = x - d*tf.math.tanh(x/(.1+d))

+    return y

+def rate_loss(y_true,y_pred):

+    log2_e = 1.4427

+    n = y_pred.shape[-1]

+    C = n - log2_e*np.math.log(np.math.gamma(n))

+    k = K.sum(K.abs(y_pred), axis=-1)

+    p = 1.5

+    #rate = C + (n-1)*log2_e*tf.math.log((k**p + (n/5)**p)**(1/p))

+    rate = C + (n-1)*log2_e*tf.math.log(k + .112*n**2/(n/1.8+k) )

+    return K.mean(rate)

+eps=1e-6

+def safelog2(x):

+    log2_e = 1.4427

+    return log2_e*tf.math.log(eps+x)

+def feat_dist_loss(y_true,y_pred):

+    lambda_1 = 1./K.sqrt(y_pred[:,:,:,-1])

+    y_pred = y_pred[:,:,:,:-1]

+    ceps = y_pred[:,:,:,:18] - y_true[:,:,:18]

+    pitch = 2*(y_pred[:,:,:,18:19] - y_true[:,:,18:19])/(y_true[:,:,18:19] + 2)

+    corr = y_pred[:,:,:,19:] - y_true[:,:,19:]

+    pitch_weight = K.square(K.maximum(0., y_true[:,:,19:]+.5))

+    return K.mean(lambda_1*K.mean(K.square(ceps) + 10*(1/18.)*K.abs(pitch)*pitch_weight + (1/18.)*K.square(corr), axis=-1))

+def sq1_rate_loss(y_true,y_pred):

+    lambda_val = K.sqrt(y_pred[:,:,-1])

+    y_pred = y_pred[:,:,:-1]

+    log2_e = 1.4427

+    n = y_pred.shape[-1]//3

+    r = (y_pred[:,:,2*n:])

+    p0 = (y_pred[:,:,n:2*n])

+    p0 = 1-r**(.5+.5*p0)

+    y_pred = y_pred[:,:,:n]

+    y_pred = soft_quantize(y_pred)

+    y0 = K.maximum(0., 1. - K.abs(y_pred))**2

+    rate = -y0*safelog2(p0*r**K.abs(y_pred)) - (1-y0)*safelog2(.5*(1-p0)*(1-r)*r**(K.abs(y_pred)-1))

+    rate = -safelog2(-.5*tf.math.log(r)*r**K.abs(y_pred))

+    rate = -safelog2((1-r)/(1+r)*r**K.abs(y_pred))

+    #rate = -safelog2(- tf.math.sinh(.5*tf.math.log(r))* r**K.abs(y_pred) - tf.math.cosh(K.maximum(0., .5 - K.abs(y_pred))*tf.math.log(r)) + 1)

+    rate = lambda_val*K.sum(rate, axis=-1)

+    return K.mean(rate)

+def sq2_rate_loss(y_true,y_pred):

+    lambda_val = K.sqrt(y_pred[:,:,-1])

+    y_pred = y_pred[:,:,:-1]

+    log2_e = 1.4427

+    n = y_pred.shape[-1]//3

+    r = y_pred[:,:,2*n:]

+    p0 = y_pred[:,:,n:2*n]

+    p0 = 1-r**(.5+.5*p0)

+    #theta = K.minimum(1., .5 + 0*p0 - 0.04*tf.math.log(r))

+    #p0 = 1-r**theta

+    y_pred = tf.round(y_pred[:,:,:n])

+    y0 = K.maximum(0., 1. - K.abs(y_pred))**2

+    rate = -y0*safelog2(p0*r**K.abs(y_pred)) - (1-y0)*safelog2(.5*(1-p0)*(1-r)*r**(K.abs(y_pred)-1))

+    rate = lambda_val*K.sum(rate, axis=-1)

+    return K.mean(rate)

+def sq_rate_metric(y_true,y_pred, reduce=True):

+    y_pred = y_pred[:,:,:-1]

+    log2_e = 1.4427

+    n = y_pred.shape[-1]//3

+    r = y_pred[:,:,2*n:]

+    p0 = y_pred[:,:,n:2*n]

+    p0 = 1-r**(.5+.5*p0)

+    #theta = K.minimum(1., .5 + 0*p0 - 0.04*tf.math.log(r))

+    #p0 = 1-r**theta

+    y_pred = tf.round(y_pred[:,:,:n])

+    y0 = K.maximum(0., 1. - K.abs(y_pred))**2

+    rate = -y0*safelog2(p0*r**K.abs(y_pred)) - (1-y0)*safelog2(.5*(1-p0)*(1-r)*r**(K.abs(y_pred)-1))

+    rate = K.sum(rate, axis=-1)

+    if reduce:

+        rate = K.mean(rate)

+    return rate

+def pvq_quant_search(x, k):

+    x = x/tf.reduce_sum(tf.abs(x), axis=-1, keepdims=True)

+    kx = k*x

+    y = tf.round(kx)

+    newk = k

+    for j in range(10):

+        #print("y = ", y)

+        #print("iteration ", j)

+        abs_y = tf.abs(y)

+        abs_kx = tf.abs(kx)

+        kk=tf.reduce_sum(abs_y, axis=-1)

+        #print("sums = ", kk)

+        plus = 1.000001*tf.reduce_min((abs_y+.5)/(abs_kx+1e-15), axis=-1)

+        minus = .999999*tf.reduce_max((abs_y-.5)/(abs_kx+1e-15), axis=-1)

+        #print("plus = ", plus)

+        #print("minus = ", minus)

+        factor = tf.where(kk>k, minus, plus)

+        factor = tf.where(kk==k, tf.ones_like(factor), factor)

+        #print("scale = ", factor)

+        factor = tf.expand_dims(factor, axis=-1)

+        #newk = newk * (k/kk)**.2

+        newk = newk*factor

+        kx = newk*x

+        #print("newk = ", newk)

+        #print("unquantized = ", newk*x)

+        y = tf.round(kx)

+    #print(y)

+    #print(K.mean(K.sum(K.abs(y), axis=-1)))

+    return y

+def pvq_quantize(x, k):

+    x = x/(1e-15+tf.norm(x, axis=-1,keepdims=True))

+    quantized = pvq_quant_search(x, k)

+    quantized = quantized/(1e-15+tf.norm(quantized, axis=-1,keepdims=True))

+    return x + tf.stop_gradient(quantized - x)

+def var_repeat(x):

+    return tf.repeat(tf.expand_dims(x[0], 1), K.shape(x[1])[1], axis=1)

+nb_state_dim = 24

+def new_rdovae_encoder(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batch_size=128, cond_size=128, cond_size2=256, training=False):

+    feat = Input(shape=(None, nb_used_features), batch_size=batch_size)

+    gru = CuDNNGRU if training else GRU

+    enc_dense1 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='enc_dense1')

+    enc_dense2 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='enc_dense2')

+    enc_dense3 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='enc_dense3')

+    enc_dense4 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='enc_dense4')

+    enc_dense5 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='enc_dense5')

+    enc_dense6 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='enc_dense6')

+    enc_dense7 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='enc_dense7')

+    enc_dense8 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='enc_dense8')

+    #bits_dense = Dense(nb_bits, activation='linear', name='bits_dense')

+    bits_dense = Conv1D(nb_bits, 4, padding='causal', activation='linear', name='bits_dense')

+    zero_out = Lambda(lambda x: 0*x)

+    inputs = Reshape((-1, 2*nb_used_features))(feat)

+    d1 = enc_dense1(inputs)

+    d2 = enc_dense2(d1)

+    d3 = enc_dense3(d2)

+    d4 = enc_dense4(d3)

+    d5 = enc_dense5(d4)

+    d6 = enc_dense6(d5)

+    d7 = enc_dense7(d6)

+    d8 = enc_dense8(d7)

+    pre_out = Concatenate()([d1, d2, d3, d4, d5, d6, d7, d8])

+    enc_out = bits_dense(pre_out)

+    global_dense1 = Dense(128, activation='tanh', name='gdense1')

+    global_dense2 = Dense(nb_state_dim, activation='tanh', name='gdense2')

+    global_bits = global_dense2(global_dense1(pre_out))

+    encoder = Model([feat], [enc_out, global_bits], name='encoder')

+    return encoder

+def new_rdovae_decoder(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batch_size=128, cond_size=128, cond_size2=256, training=False):

+    bits_input = Input(shape=(None, nb_bits), batch_size=batch_size, name="dec_bits")

+    gru_state_input = Input(shape=(nb_state_dim,), batch_size=batch_size, name="dec_state")

+    gru = CuDNNGRU if training else GRU

+    dec_dense1 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='dec_dense1')

+    dec_dense2 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='dec_dense2')

+    dec_dense3 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='dec_dense3')

+    dec_dense4 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='dec_dense4')

+    dec_dense5 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='dec_dense5')

+    dec_dense6 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='dec_dense6')

+    dec_dense7 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='dec_dense7')

+    dec_dense8 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='dec_dense8')

+    dec_final = Dense(bunch*nb_used_features, activation='linear', name='dec_final')

+    time_reverse = Lambda(lambda x: K.reverse(x, 1))

+    #time_reverse = Lambda(lambda x: x)

+    #gru_state_rep = RepeatVector(64//bunch)(gru_state_input)

+    #gru_state_rep = Lambda(var_repeat, output_shape=(None, nb_state_dim)) ([gru_state_input, bits_input])

+    gru_state1 = Dense(cond_size, name="state1", activation='tanh')(gru_state_input)

+    gru_state2 = Dense(cond_size, name="state2", activation='tanh')(gru_state_input)

+    gru_state3 = Dense(cond_size, name="state3", activation='tanh')(gru_state_input)

+    dec1 = dec_dense1(time_reverse(bits_input))

+    dec2 = dec_dense2(dec1, initial_state=gru_state1)

+    dec3 = dec_dense3(dec2)

+    dec4 = dec_dense4(dec3, initial_state=gru_state2)

+    dec5 = dec_dense5(dec4)

+    dec6 = dec_dense6(dec5, initial_state=gru_state3)

+    dec7 = dec_dense7(dec6)

+    dec8 = dec_dense8(dec7)

+    output = Reshape((-1, nb_used_features))(dec_final(Concatenate()([dec1, dec2, dec3, dec4, dec5, dec6, dec7, dec8])))

+    decoder = Model([bits_input, gru_state_input], time_reverse(output), name='decoder')

+    decoder.nb_bits = nb_bits

+    decoder.bunch = bunch

+    return decoder

+def new_split_decoder(decoder):

+    nb_bits = decoder.nb_bits

+    bunch = decoder.bunch

+    bits_input = Input(shape=(None, nb_bits), name="split_bits")

+    gru_state_input = Input(shape=(None,nb_state_dim), name="split_state")

+    range_select = Lambda(lambda x: x[0][:,x[1]:x[2],:])

+    elem_select = Lambda(lambda x: x[0][:,x[1],:])

+    points = [0, 100, 200, 300, 400]

+    outputs = []

+    for i in range(len(points)-1):

+        begin = points[i]//bunch

+        end = points[i+1]//bunch

+        state = elem_select([gru_state_input, end-1])

+        bits = range_select([bits_input, begin, end])

+        outputs.append(decoder([bits, state]))

+    output = Concatenate(axis=1)(outputs)

+    split = Model([bits_input, gru_state_input], output, name="split")

+    return split

+def tensor_concat(x):

+    #n = x[1]//2

+    #x = x[0]

+    n=2

+    y = []

+    for i in range(n-1):

+        offset = 2 * (n-1-i)

+        tmp = K.concatenate([x[i][:, offset:, :], x[-1][:, -offset:, :]], axis=-2)

+        y.append(tf.expand_dims(tmp, axis=0))

+    y.append(tf.expand_dims(x[-1], axis=0))

+    return Concatenate(axis=0)(y)

+def new_rdovae_model(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batch_size=128, cond_size=128, cond_size2=256, training=False):

+    feat = Input(shape=(None, nb_used_features), batch_size=batch_size)

+    quant_id = Input(shape=(None,), batch_size=batch_size)

+    lambda_val = Input(shape=(None, 1), batch_size=batch_size)

+    lambda_bunched = AveragePooling1D(pool_size=bunch//2, strides=bunch//2, padding="valid")(lambda_val)

+    lambda_up = Lambda(lambda x: K.repeat_elements(x, 2, axis=-2))(lambda_val)

+    qembedding = Embedding(nb_quant, 6*nb_bits, name='quant_embed', embeddings_initializer='zeros')

+    quant_embed_dec = qembedding(quant_id)

+    quant_scale = Activation('softplus')(Lambda(lambda x: x[:,:,:nb_bits], name='quant_scale_embed')(quant_embed_dec))

+    encoder = new_rdovae_encoder(nb_used_features, nb_bits, bunch, nb_quant, batch_size, cond_size, cond_size2, training=training)

+    ze, gru_state_dec = encoder([feat])

+    ze = Multiply()([ze, quant_scale])

+    decoder = new_rdovae_decoder(nb_used_features, nb_bits, bunch, nb_quant, batch_size, cond_size, cond_size2, training=training)

+    split_decoder = new_split_decoder(decoder)

+    dead_zone = Activation('softplus')(Lambda(lambda x: x[:,:,nb_bits:2*nb_bits], name='dead_zone_embed')(quant_embed_dec))

+    soft_distr_embed = Activation('sigmoid')(Lambda(lambda x: x[:,:,2*nb_bits:4*nb_bits], name='soft_distr_embed')(quant_embed_dec))

+    hard_distr_embed = Activation('sigmoid')(Lambda(lambda x: x[:,:,4*nb_bits:], name='hard_distr_embed')(quant_embed_dec))

+    noisequant = UniformNoise()

+    hardquant = Lambda(hard_quantize)

+    dzone = Lambda(apply_dead_zone)

+    dze = dzone([ze,dead_zone])

+    ndze = noisequant(dze)

+    dze_quant = hardquant(dze)

+    div = Lambda(lambda x: x[0]/x[1])

+    dze_quant = div([dze_quant,quant_scale])

+    ndze_unquant = div([ndze,quant_scale])

+    mod_select = Lambda(lambda x: x[0][:,x[1]::bunch//2,:])

+    gru_state_dec = Lambda(lambda x: pvq_quantize(x, 82))(gru_state_dec)

+    combined_output = []

+    unquantized_output = []

+    cat = Concatenate(name="out_cat")

+    for i in range(bunch//2):

+        dze_select = mod_select([dze_quant, i])

+        ndze_select = mod_select([ndze_unquant, i])

+        state_select = mod_select([gru_state_dec, i])

+        tmp = split_decoder([dze_select, state_select])

+        tmp = cat([tmp, lambda_up])

+        combined_output.append(tmp)

+        tmp = split_decoder([ndze_select, state_select])

+        tmp = cat([tmp, lambda_up])

+        unquantized_output.append(tmp)

+    concat = Lambda(tensor_concat, name="output")

+    combined_output = concat(combined_output)

+    unquantized_output = concat(unquantized_output)

+    e2 = Concatenate(name="hard_bits")([dze, hard_distr_embed, lambda_val])

+    e = Concatenate(name="soft_bits")([dze, soft_distr_embed, lambda_val])

+    model = Model([feat, quant_id, lambda_val], [combined_output, unquantized_output, e, e2], name="end2end")

+    model.nb_used_features = nb_used_features

+    return model, encoder, decoder, qembedding

--- /dev/null

+++ b/dnn/training_tf2/rdovae_exchange.py

@@ -1,0 +1,138 @@

+"""

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+"""

+import argparse

+import os

+import sys

+os.environ['CUDA_VISIBLE_DEVICES'] = ""

+parser = argparse.ArgumentParser()

+parser.add_argument('weights', metavar="<weight file>", type=str, help='model weight file in hdf5 format')

+parser.add_argument('output', metavar="<output folder>", type=str, help='output exchange folder')

+parser.add_argument('--cond-size', type=int, help="conditioning size (default: 256)", default=256)

+parser.add_argument('--latent-dim', type=int, help="dimension of latent space (default: 80)", default=80)

+parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 16)", default=16)

+args = parser.parse_args()

+# now import the heavy stuff

+from rdovae import new_rdovae_model

+from wexchange.tf import dump_tf_weights, load_tf_weights

+exchange_name = {

+    'enc_dense1'    : 'encoder_stack_layer1_dense',

+    'enc_dense3'    : 'encoder_stack_layer3_dense',

+    'enc_dense5'    : 'encoder_stack_layer5_dense',

+    'enc_dense7'    : 'encoder_stack_layer7_dense',

+    'enc_dense8'    : 'encoder_stack_layer8_dense',

+    'gdense1'       : 'encoder_state_layer1_dense',

+    'gdense2'       : 'encoder_state_layer2_dense',

+    'enc_dense2'    : 'encoder_stack_layer2_gru',

+    'enc_dense4'    : 'encoder_stack_layer4_gru',

+    'enc_dense6'    : 'encoder_stack_layer6_gru',

+    'bits_dense'    : 'encoder_stack_layer9_conv',

+    'qembedding'    : 'statistical_model_embedding',

+    'state1'        : 'decoder_state1_dense',

+    'state2'        : 'decoder_state2_dense',

+    'state3'        : 'decoder_state3_dense',

+    'dec_dense1'    : 'decoder_stack_layer1_dense',

+    'dec_dense3'    : 'decoder_stack_layer3_dense',

+    'dec_dense5'    : 'decoder_stack_layer5_dense',

+    'dec_dense7'    : 'decoder_stack_layer7_dense',

+    'dec_dense8'    : 'decoder_stack_layer8_dense',

+    'dec_final'     : 'decoder_stack_layer9_dense',

+    'dec_dense2'    : 'decoder_stack_layer2_gru',

+    'dec_dense4'    : 'decoder_stack_layer4_gru',

+    'dec_dense6'    : 'decoder_stack_layer6_gru'

+}

+if __name__ == "__main__":

+    model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size, nb_quant=args.quant_levels)

+    model.load_weights(args.weights)

+    os.makedirs(args.output, exist_ok=True)

+    # encoder

+    encoder_dense_names = [

+        'enc_dense1',

+        'enc_dense3',

+        'enc_dense5',

+        'enc_dense7',

+        'enc_dense8',

+        'gdense1',

+        'gdense2'

+    ]

+    encoder_gru_names = [

+        'enc_dense2',

+        'enc_dense4',

+        'enc_dense6'

+    ]

+    encoder_conv1d_names = [

+        'bits_dense'

+    ]

+    for name in encoder_dense_names + encoder_gru_names + encoder_conv1d_names:

+        print(f"writing layer {exchange_name[name]}...")

+        dump_tf_weights(os.path.join(args.output, exchange_name[name]), encoder.get_layer(name))

+    # qembedding

+    print(f"writing layer {exchange_name['qembedding']}...")

+    dump_tf_weights(os.path.join(args.output, exchange_name['qembedding']), qembedding)

+    # decoder

+    decoder_dense_names = [

+        'state1',

+        'state2',

+        'state3',

+        'dec_dense1',

+        'dec_dense3',

+        'dec_dense5',

+        'dec_dense7',

+        'dec_dense8',

+        'dec_final'

+    ]

+    decoder_gru_names = [

+        'dec_dense2',

+        'dec_dense4',

+        'dec_dense6'

+    ]

+    for name in decoder_dense_names + decoder_gru_names:

+        print(f"writing layer {exchange_name[name]}...")

+        dump_tf_weights(os.path.join(args.output, exchange_name[name]), decoder.get_layer(name))

--- /dev/null

+++ b/dnn/training_tf2/rdovae_import.py

@@ -1,0 +1,123 @@

+"""

+/* Copyright (c) 2022 Amazon

+   Written by Jan Buethe */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+"""

+import argparse

+import os

+import sys

+os.environ['CUDA_VISIBLE_DEVICES'] = ""

+parser = argparse.ArgumentParser()

+parser.add_argument('input', metavar="<input folder>", type=str, help='input exchange folder')

+parser.add_argument('weights', metavar="<weight file>", type=str, help='model weight file in hdf5 format')

+parser.add_argument('--cond-size', type=int, help="conditioning size (default: 256)", default=256)

+parser.add_argument('--latent-dim', type=int, help="dimension of latent space (default: 80)", default=80)

+parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 16)", default=16)

+args = parser.parse_args()

+# now import the heavy stuff

+from rdovae import new_rdovae_model

+from wexchange.tf import load_tf_weights

+exchange_name = {

+    'enc_dense1'    : 'encoder_stack_layer1_dense',

+    'enc_dense3'    : 'encoder_stack_layer3_dense',

+    'enc_dense5'    : 'encoder_stack_layer5_dense',

+    'enc_dense7'    : 'encoder_stack_layer7_dense',

+    'enc_dense8'    : 'encoder_stack_layer8_dense',

+    'gdense1'       : 'encoder_state_layer1_dense',

+    'gdense2'       : 'encoder_state_layer2_dense',

+    'enc_dense2'    : 'encoder_stack_layer2_gru',

+    'enc_dense4'    : 'encoder_stack_layer4_gru',

+    'enc_dense6'    : 'encoder_stack_layer6_gru',

+    'bits_dense'    : 'encoder_stack_layer9_conv',

+    'qembedding'    : 'statistical_model_embedding',

+    'state1'        : 'decoder_state1_dense',

+    'state2'        : 'decoder_state2_dense',

+    'state3'        : 'decoder_state3_dense',

+    'dec_dense1'    : 'decoder_stack_layer1_dense',

+    'dec_dense3'    : 'decoder_stack_layer3_dense',

+    'dec_dense5'    : 'decoder_stack_layer5_dense',

+    'dec_dense7'    : 'decoder_stack_layer7_dense',

+    'dec_dense8'    : 'decoder_stack_layer8_dense',

+    'dec_final'     : 'decoder_stack_layer9_dense',

+    'dec_dense2'    : 'decoder_stack_layer2_gru',

+    'dec_dense4'    : 'decoder_stack_layer4_gru',

+    'dec_dense6'    : 'decoder_stack_layer6_gru'

+}

+if __name__ == "__main__":

+    model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size, nb_quant=args.quant_levels)

+    encoder_layers = [

+        'enc_dense1',

+        'enc_dense3',

+        'enc_dense5',

+        'enc_dense7',

+        'enc_dense8',

+        'gdense1',

+        'gdense2',

+        'enc_dense2',

+        'enc_dense4',

+        'enc_dense6',

+        'bits_dense'

+    ]

+    decoder_layers = [

+        'state1',

+        'state2',

+        'state3',

+        'dec_dense1',

+        'dec_dense3',

+        'dec_dense5',

+        'dec_dense7',

+        'dec_dense8',

+        'dec_final',

+        'dec_dense2',

+        'dec_dense4',

+        'dec_dense6'

+    ]

+    for name in encoder_layers:

+        print(f"loading weight for layer {name}...")

+        load_tf_weights(os.path.join(args.input, exchange_name[name]), encoder.get_layer(name))

+    print(f"loading weight for layer qembedding...")

+    load_tf_weights(os.path.join(args.input, exchange_name['qembedding']), qembedding)

+    for name in decoder_layers:

+        print(f"loading weight for layer {name}...")

+        load_tf_weights(os.path.join(args.input, exchange_name[name]), decoder.get_layer(name))

+    model.save(args.weights)

\ No newline at end of file

--- /dev/null

+++ b/dnn/training_tf2/train_rdovae.py

@@ -1,0 +1,151 @@

+#!/usr/bin/python3

+'''Copyright (c) 2021-2022 Amazon

+   Copyright (c) 2018-2019 Mozilla

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR

+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+'''

+# Train an LPCNet model

+import tensorflow as tf

+strategy = tf.distribute.MultiWorkerMirroredStrategy()

+import argparse

+#from plc_loader import PLCLoader

+parser = argparse.ArgumentParser(description='Train a quantization model')

+parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')

+parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')

+parser.add_argument('--model', metavar='<model>', default='rdovae', help='PLC model python definition (without .py)')

+group1 = parser.add_mutually_exclusive_group()

+group1.add_argument('--quantize', metavar='<input weights>', help='quantize model')

+group1.add_argument('--retrain', metavar='<input weights>', help='continue training model')

+parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')

+parser.add_argument('--epochs', metavar='<epochs>', default=120, type=int, help='number of epochs to train for (default 120)')

+parser.add_argument('--batch-size', metavar='<batch size>', default=128, type=int, help='batch size to use (default 128)')

+parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')

+parser.add_argument('--lr', metavar='<learning rate>', type=float, help='learning rate')

+parser.add_argument('--decay', metavar='<decay>', type=float, help='learning rate decay')

+parser.add_argument('--logdir', metavar='<log dir>', help='directory for tensorboard log files')

+args = parser.parse_args()

+import importlib

+rdovae = importlib.import_module(args.model)

+import sys

+import numpy as np

+from tensorflow.keras.optimizers import Adam

+from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger

+import tensorflow.keras.backend as K

+import h5py

+#gpus = tf.config.experimental.list_physical_devices('GPU')

+#if gpus:

+#  try:

+#    tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])

+#  except RuntimeError as e:

+#    print(e)

+nb_epochs = args.epochs

+# Try reducing batch_size if you run out of memory on your GPU

+batch_size = args.batch_size

+quantize = args.quantize is not None

+retrain = args.retrain is not None

+if quantize:

+    lr = 0.00003

+    decay = 0

+    input_model = args.quantize

+else:

+    lr = 0.001

+    decay = 2.5e-5

+if args.lr is not None:

+    lr = args.lr

+if args.decay is not None:

+    decay = args.decay

+if retrain:

+    input_model = args.retrain

+opt = Adam(lr, decay=decay, beta_2=0.99)

+with strategy.scope():

+    model, encoder, decoder, _ = rdovae.new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=batch_size, cond_size=args.cond_size, nb_quant=16)

+    model.compile(optimizer=opt, loss=[rdovae.feat_dist_loss, rdovae.feat_dist_loss, rdovae.sq1_rate_loss, rdovae.sq2_rate_loss], loss_weights=[.5, .5, 1., .1], metrics={'hard_bits':rdovae.sq_rate_metric})

+    model.summary()

+lpc_order = 16

+feature_file = args.features

+nb_features = model.nb_used_features + lpc_order

+nb_used_features = model.nb_used_features

+sequence_size = args.seq_length

+# u for unquantised, load 16 bit PCM samples and convert to mu-law

+features = np.memmap(feature_file, dtype='float32', mode='r')

+nb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size

+features = features[:nb_sequences*sequence_size*nb_features]

+features = np.reshape(features, (nb_sequences, sequence_size, nb_features))

+print(features.shape)

+features = features[:, :, :nb_used_features]

+#lambda_val = np.repeat(np.random.uniform(.0007, .002, (features.shape[0], 1, 1)), features.shape[1]//2, axis=1)

+#quant_id = np.round(10*np.log(lambda_val/.0007)).astype('int16')

+#quant_id = quant_id[:,:,0]

+quant_id = np.repeat(np.random.randint(16, size=(features.shape[0], 1, 1), dtype='int16'), features.shape[1]//2, axis=1)

+lambda_val = .0002*np.exp(quant_id/3.8)

+quant_id = quant_id[:,:,0]

+# dump models to disk as we go

+checkpoint = ModelCheckpoint('{}_{}_{}.h5'.format(args.output, args.cond_size, '{epoch:02d}'))

+if args.retrain is not None:

+    model.load_weights(args.retrain)

+if quantize or retrain:

+    #Adapting from an existing model

+    model.load_weights(input_model)

+model.save_weights('{}_{}_initial.h5'.format(args.output, args.cond_size))

+callbacks = [checkpoint]

+#callbacks = []

+if args.logdir is not None:

+    logdir = '{}/{}_{}_logs'.format(args.logdir, args.output, args.cond_size)

+    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

+    callbacks.append(tensorboard_callback)

+model.fit([features, quant_id, lambda_val], [features, features, features, features], batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=callbacks)

--- /dev/null

+++ b/dnn/training_tf2/uniform_noise.py

@@ -1,0 +1,78 @@

+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.

+#

+# Licensed under the Apache License, Version 2.0 (the "License");

+# you may not use this file except in compliance with the License.

+# You may obtain a copy of the License at

+#

+#     http://www.apache.org/licenses/LICENSE-2.0

+#

+# Unless required by applicable law or agreed to in writing, software

+# distributed under the License is distributed on an "AS IS" BASIS,

+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+# See the License for the specific language governing permissions and

+# limitations under the License.

+# ==============================================================================

+"""Contains the UniformNoise layer."""

+import tensorflow.compat.v2 as tf

+from tensorflow.keras import backend

+from tensorflow.keras.layers import Layer

+class UniformNoise(Layer):

+    """Apply additive zero-centered uniform noise.

+    This is useful to mitigate overfitting

+    (you could see it as a form of random data augmentation).

+    Gaussian Noise (GS) is a natural choice as corruption process

+    for real valued inputs.

+    As it is a regularization layer, it is only active at training time.

+    Args:

+      stddev: Float, standard deviation of the noise distribution.

+      seed: Integer, optional random seed to enable deterministic behavior.

+    Call arguments:

+      inputs: Input tensor (of any rank).

+      training: Python boolean indicating whether the layer should behave in

+        training mode (adding noise) or in inference mode (doing nothing).

+    Input shape:

+      Arbitrary. Use the keyword argument `input_shape`

+      (tuple of integers, does not include the samples axis)

+      when using this layer as the first layer in a model.

+    Output shape:

+      Same shape as input.

+    """

+    def __init__(self, stddev=0.5, seed=None, **kwargs):

+        super().__init__(**kwargs)

+        self.supports_masking = True

+        self.stddev = stddev

+    def call(self, inputs, training=None):

+        def noised():

+            return inputs + backend.random_uniform(

+                shape=tf.shape(inputs),

+                minval=-self.stddev,

+                maxval=self.stddev,

+                dtype=inputs.dtype,

+            )

+        return backend.in_train_phase(noised, inputs, training=training)

+    def get_config(self):

+        config = {"stddev": self.stddev}

+        base_config = super().get_config()

+        return dict(list(base_config.items()) + list(config.items()))

+    def compute_output_shape(self, input_shape):

+        return input_shape

--- a/dnn/vec.h

+++ b/dnn/vec.h

@@ -37,7 +37,7 @@

 #if defined(__AVX__) || defined(__SSE2__)

 #include "vec_avx.h"

-#elif defined(__ARM_NEON__) || defined(__ARM_NEON)

+#elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && !defined(DISABLE_NEON)

 #include "vec_neon.h"

 #else

@@ -59,7 +59,7 @@

 /* No AVX2/FMA support */

 #ifndef LPCNET_TEST

-static inline float celt_exp2(float x)

+static inline float lpcnet_exp2(float x)

    int integer;

    float frac;

@@ -77,7 +77,7 @@

    res.i = (res.i + (integer<<23)) & 0x7fffffff;

    return res.f;

-#define celt_exp(x) celt_exp2((x)*1.44269504f)

+#define lpcnet_exp(x) lpcnet_exp2((x)*1.44269504f)

 static inline float tanh_approx(float x)

@@ -107,7 +107,7 @@

     int i;

     for (i=0;i<N;i++)

-        y[i] = celt_exp(x[i]);

+        y[i] = lpcnet_exp(x[i]);

 static inline void vec_tanh(float *y, const float *x, int N)

--- a/dnn/vec_avx.h

+++ b/dnn/vec_avx.h

@@ -33,6 +33,7 @@

 #define VEC_AVX_H

 #include <immintrin.h>

+#include <math.h>

 /* Use 8-bit dot products unless disabled or if stuck with SSE2. */

 #if (defined(__AVX2__) || defined(__SSSE3__)) && !defined(DISABLE_DOT_PROD)

@@ -41,7 +42,11 @@

 #else

+#if defined(_MSC_VER)

+#pragma message ("Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 to get better performance")

+#else

 #warning "Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 using -march= to get better performance"

+#endif

 #endif

@@ -81,7 +86,7 @@

 #define _mm256_storeu_ps(dst, src) mm256_storeu_ps(dst, src)

-static inline mm256_emu mm256_setzero_ps() {

+static inline mm256_emu mm256_setzero_ps(void) {

   mm256_emu ret;

   ret.lo = _mm_setzero_ps();

   ret.hi = ret.lo;

@@ -297,7 +302,7 @@

    const __m256 K1 = _mm256_set1_ps(0.69583354f);

    const __m256 K2 = _mm256_set1_ps(0.22606716f);

    const __m256 K3 = _mm256_set1_ps(0.078024523f);

-   const __m256 log2_E = _mm256_set1_ps(1.44269504);

+   const __m256 log2_E = _mm256_set1_ps(1.44269504f);

    const __m256 max_in = _mm256_set1_ps(50.f);

    const __m256 min_in = _mm256_set1_ps(-50.f);

    __m256 XF, Y;

@@ -519,7 +524,7 @@

 #endif

-static inline float celt_exp(float x)

+static inline float lpcnet_exp(float x)

    float out[8];

    __m256 X, Y;

@@ -540,7 +545,7 @@

         _mm256_storeu_ps(&y[i], Y);

     for (;i<N;i++)

-        y[i] = celt_exp(x[i]);

+        y[i] = lpcnet_exp(x[i]);

 #ifdef __AVX__

--- a/dnn/vec_neon.h

+++ b/dnn/vec_neon.h

@@ -33,7 +33,12 @@

 #ifndef DISABLE_DOT_PROD

 #define DOT_PROD

 #endif

+#ifdef DOT_PROD

 typedef signed char qweight;

+#else

+typedef float qweight;

+#endif

 #ifndef LPCNET_TEST

@@ -105,7 +110,7 @@

   return vmaxq_f32(min_out, vminq_f32(max_out, num));

-static inline float celt_exp(float x)

+static inline float lpcnet_exp(float x)

    float out[4];

    float32x4_t X, Y;

@@ -146,7 +151,7 @@

         vst1q_f32(&y[i], Y);

     for (;i<N;i++)

-        y[i] = celt_exp(x[i]);

+        y[i] = lpcnet_exp(x[i]);

 static inline void vec_tanh(float *y, const float *x, int N)

@@ -162,7 +167,7 @@

     for (;i<N;i++)

         float ex2;

-        ex2 = celt_exp(2*x[i]);

+        ex2 = lpcnet_exp(2*x[i]);

         y[i] = (ex2-1)/(ex2+1);

@@ -180,7 +185,7 @@

     for (;i<N;i++)

         float ex;

-        ex = celt_exp(x[i]);

+        ex = lpcnet_exp(x[i]);

         y[i] = (ex)/(ex+1);

--- /dev/null

+++ b/dnn/write_lpcnet_weights.c

@@ -1,0 +1,66 @@

+/* Copyright (c) 2023 Amazon */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR

+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include <stdio.h>

+#include "nnet.h"

+extern const WeightArray lpcnet_arrays[];

+extern const WeightArray lpcnet_plc_arrays[];

+void write_weights(const WeightArray *list, FILE *fout)

+{

+  int i=0;

+  unsigned char zeros[WEIGHT_BLOCK_SIZE] = {0};

+  while (list[i].name != NULL) {

+    WeightHead h;

+    memcpy(h.head, "DNNw", 4);

+    h.version = WEIGHT_BLOB_VERSION;

+    h.type = list[i].type;

+    h.size = list[i].size;

+    h.block_size = (h.size+WEIGHT_BLOCK_SIZE-1)/WEIGHT_BLOCK_SIZE*WEIGHT_BLOCK_SIZE;

+    RNN_CLEAR(h.name, sizeof(h.name));

+    strncpy(h.name, list[i].name, sizeof(h.name));

+    h.name[sizeof(h.name)-1] = 0;

+    celt_assert(sizeof(h) == WEIGHT_BLOCK_SIZE);

+    fwrite(&h, 1, WEIGHT_BLOCK_SIZE, fout);

+    fwrite(list[i].data, 1, h.size, fout);

+    fwrite(zeros, 1, h.block_size-h.size, fout);

+    i++;

+  }

+}

+int main()

+{

+  FILE *fout = fopen("weights_blob.bin", "w");

+  write_weights(lpcnet_arrays, fout);

+  write_weights(lpcnet_plc_arrays, fout);

+  fclose(fout);

+  return 0;

+}

--

⑨