shithub: opus

Download patch

ref: 93bc94ba793810e075b403f88c17536cca309414
parent: b6ac1c78bb1c6376a818c7a68ab034a68624b19a
parent: 886d647bb18cec8fb11955a27a9b48d5e7aba84a
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Tue May 23 20:57:20 EDT 2023

Merge branch 'exp_rdovae6'

--- a/dnn/Makefile.am
+++ b/dnn/Makefile.am
@@ -32,11 +32,13 @@
 	lpcnet.c \
 	lpcnet_dec.c \
 	lpcnet_enc.c \
+	lpcnet_tables.c \
 	nnet.c \
 	nnet_data.c \
 	plc_data.c \
 	ceps_codebooks.c \
 	pitch.c \
+	parse_lpcnet_weights.c \
 	freq.c \
 	kiss_fft.c \
 	lpcnet_plc.c
@@ -45,7 +47,7 @@
 liblpcnet_la_LDFLAGS = -no-undefined \
  -version-info @OP_LT_CURRENT@:@OP_LT_REVISION@:@OP_LT_AGE@
 
-noinst_PROGRAMS = lpcnet_demo dump_data
+noinst_PROGRAMS = lpcnet_demo dump_data dump_weights_blob
 
 lpcnet_demo_SOURCES = lpcnet_demo.c
 lpcnet_demo_LDADD = liblpcnet.la
@@ -56,9 +58,13 @@
 #dump_data_SOURCES = dump_data.c
 #dump_data_LDADD = $(DUMP_OBJ) $(LIBM)
 
-dump_data_SOURCES = common.c dump_data.c burg.c freq.c kiss_fft.c pitch.c lpcnet_dec.c lpcnet_enc.c ceps_codebooks.c
+dump_data_SOURCES = common.c dump_data.c burg.c freq.c kiss_fft.c pitch.c lpcnet_dec.c lpcnet_enc.c lpcnet_tables.c ceps_codebooks.c
 dump_data_LDADD = $(LIBM)
 dump_data_CFLAGS = $(AM_CFLAGS)
+
+dump_weights_blob_SOURCES = nnet_data.c plc_data.c write_lpcnet_weights.c
+dump_weights_blob_LDADD = $(LIBM)
+dump_weights_blob_CFLAGS = $(AM_CFLAGS) -DDUMP_BINARY_WEIGHTS
 
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = lpcnet.pc
--- a/dnn/README.md
+++ b/dnn/README.md
@@ -11,6 +11,7 @@
 - J.-M. Valin, J. Skoglund, [A Real-Time Wideband Neural Vocoder at 1.6 kb/s Using LPCNet](https://jmvalin.ca/papers/lpcnet_codec.pdf), *Proc. INTERSPEECH*, arxiv:1903.12087, 2019.
 - J. Skoglund, J.-M. Valin, [Improving Opus Low Bit Rate Quality with Neural Speech Synthesis](https://jmvalin.ca/papers/opusnet.pdf), *Proc. INTERSPEECH*, arxiv:1905.04628, 2020.
 - J.-M. Valin, A. Mustafa, C. Montgomery, T.B. Terriberry, M. Klingbeil, P. Smaragdis, A. Krishnaswamy, [Real-Time Packet Loss Concealment With Mixed Generative and Predictive Model](https://jmvalin.ca/papers/lpcnet_plc.pdf), *Proc. INTERSPEECH*, arxiv:2205.05785, 2022.
+- J.-M. Valin, J. Büthe, A. Mustafa, [Low-Bitrate Redundancy Coding of Speech Using a Rate-Distortion-Optimized Variational Autoencoder](https://jmvalin.ca/papers/valin_dred.pdf), *Proc. ICASSP*, arXiv:2212.04453, 2023. ([blog post](https://www.amazon.science/blog/neural-encoding-enables-more-efficient-recovery-of-lost-audio-packets))
 
 # Introduction
 
--- a/dnn/autogen.sh
+++ b/dnn/autogen.sh
@@ -6,7 +6,7 @@
 test -n "$srcdir" && cd "$srcdir"
 
 #SHA1 of the first commit compatible with the current model
-commit=97e64b3
+commit=399be7c
 ./download_model.sh $commit
 
 echo "Updating build configuration files for lpcnet, please wait...."
--- a/dnn/common.c
+++ b/dnn/common.c
@@ -40,7 +40,7 @@
     float ref[NB_BANDS];
     float pred[3*NB_BANDS];
     RNN_COPY(ref, x, NB_BANDS);
-    for (i=0;i<NB_BANDS;i++) pred[i] = .5*(left[i] + right[i]);
+    for (i=0;i<NB_BANDS;i++) pred[i] = .5f*(left[i] + right[i]);
     for (i=0;i<NB_BANDS;i++) pred[NB_BANDS+i] = left[i];
     for (i=0;i<NB_BANDS;i++) pred[2*NB_BANDS+i] = right[i];
     for (i=0;i<NB_BANDS;i++) {
--- a/dnn/common.h
+++ b/dnn/common.h
@@ -36,8 +36,8 @@
 {
     float s;
     float scale_1 = 32768.f/255.f;
-    u = u - 128;
-    s = u >= 0 ? 1 : -1;
+    u = u - 128.f;
+    s = u >= 0.f ? 1.f : -1.f;
     u = fabs(u);
     return s*scale_1*(exp(u/128.*LOG256)-1);
 }
--- /dev/null
+++ b/dnn/download_model.bat
@@ -1,0 +1,10 @@
+@echo off
+set model=lpcnet_data-%1.tar.gz
+
+if not exist %model% (
+    echo Downloading latest model
+    powershell -Command "(New-Object System.Net.WebClient).DownloadFile('https://media.xiph.org/lpcnet/data/%model%', '%model%')"
+)
+
+tar -xvzf %model%
+
--- a/dnn/download_model.sh
+++ b/dnn/download_model.sh
@@ -7,7 +7,7 @@
         echo "Downloading latest model"
         wget https://media.xiph.org/lpcnet/data/$model
 fi
-tar xvf $model
+tar xvof $model
 touch src/nnet_data.[ch]
 touch src/plc_data.[ch]
 mv src/*.[ch] .
--- /dev/null
+++ b/dnn/dred_rdovae.c
@@ -1,0 +1,135 @@
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "dred_rdovae.h"
+#include "dred_rdovae_enc.h"
+#include "dred_rdovae_dec.h"
+#include "dred_rdovae_stats_data.h"
+
+void DRED_rdovae_decode_all(float *features, const float *state, const float *latents, int nb_latents)
+{
+    int i;
+    RDOVAEDec dec;
+    memset(&dec, 0, sizeof(dec));
+    DRED_rdovae_dec_init_states(&dec, state);
+    for (i = 0; i < 2*nb_latents; i += 2)
+    {
+        DRED_rdovae_decode_qframe(
+            &dec,
+            &features[2*i*DRED_NUM_FEATURES],
+            &latents[(i/2)*DRED_LATENT_DIM]);
+    }
+}
+
+size_t DRED_rdovae_get_enc_size()
+{
+    return sizeof(RDOVAEEnc);
+}
+
+size_t DRED_rdovae_get_dec_size()
+{
+    return sizeof(RDOVAEDec);
+}
+
+void DRED_rdovae_init_encoder(RDOVAEEnc *enc_state)
+{
+    memset(enc_state, 0, sizeof(*enc_state));
+
+}
+
+void DRED_rdovae_init_decoder(RDOVAEDec *dec_state)
+{
+    memset(dec_state, 0, sizeof(*dec_state));
+}
+
+
+RDOVAEEnc * DRED_rdovae_create_encoder()
+{
+    RDOVAEEnc *enc;
+    enc = (RDOVAEEnc*) calloc(sizeof(*enc), 1);
+    DRED_rdovae_init_encoder(enc);
+    return enc;
+}
+
+RDOVAEDec * DRED_rdovae_create_decoder()
+{
+    RDOVAEDec *dec;
+    dec = (RDOVAEDec*) calloc(sizeof(*dec), 1);
+    DRED_rdovae_init_decoder(dec);
+    return dec;
+}
+
+void DRED_rdovae_destroy_decoder(RDOVAEDec* dec)
+{
+    free(dec);
+}
+
+void DRED_rdovae_destroy_encoder(RDOVAEEnc* enc)
+{
+    free(enc);
+}
+
+void DRED_rdovae_encode_dframe(RDOVAEEnc *enc_state, float *latents, float *initial_state, const float *input)
+{
+    dred_rdovae_encode_dframe(enc_state, latents, initial_state, input);
+}
+
+void DRED_rdovae_dec_init_states(RDOVAEDec *h, const float * initial_state)
+{
+    dred_rdovae_dec_init_states(h, initial_state);
+}
+
+void DRED_rdovae_decode_qframe(RDOVAEDec *h, float *qframe, const float *z)
+{
+    dred_rdovae_decode_qframe(h, qframe, z);
+}
+
+
+const opus_uint16 * DRED_rdovae_get_p0_pointer(void)
+{
+    return &dred_p0_q15[0];
+}
+
+const opus_uint16 * DRED_rdovae_get_dead_zone_pointer(void)
+{
+    return &dred_dead_zone_q10[0];
+}
+
+const opus_uint16 * DRED_rdovae_get_r_pointer(void)
+{
+    return &dred_r_q15[0];
+}
+
+const opus_uint16 * DRED_rdovae_get_quant_scales_pointer(void)
+{
+    return &dred_quant_scales_q8[0];
+}
--- /dev/null
+++ b/dnn/dred_rdovae_dec.c
@@ -1,0 +1,96 @@
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "dred_rdovae_dec.h"
+#include "dred_rdovae_constants.h"
+
+
+void dred_rdovae_dec_init_states(
+    RDOVAEDec *h,            /* io: state buffer handle */
+    const float *initial_state  /* i: initial state */
+    )
+{
+    /* initialize GRU states from initial state */
+    _lpcnet_compute_dense(&state1, h->dense2_state, initial_state);
+    _lpcnet_compute_dense(&state2, h->dense4_state, initial_state);
+    _lpcnet_compute_dense(&state3, h->dense6_state, initial_state);
+}
+
+
+void dred_rdovae_decode_qframe(
+    RDOVAEDec *dec_state,       /* io: state buffer handle */
+    float *qframe,              /* o: quadruple feature frame (four concatenated frames in reverse order) */
+    const float *input          /* i: latent vector */
+    )
+{
+    float buffer[DEC_DENSE1_OUT_SIZE + DEC_DENSE2_OUT_SIZE + DEC_DENSE3_OUT_SIZE + DEC_DENSE4_OUT_SIZE + DEC_DENSE5_OUT_SIZE + DEC_DENSE6_OUT_SIZE + DEC_DENSE7_OUT_SIZE + DEC_DENSE8_OUT_SIZE];
+    int output_index = 0;
+    int input_index = 0;
+    float zero_vector[1024] = {0};
+
+    /* run encoder stack and concatenate output in buffer*/
+    _lpcnet_compute_dense(&dec_dense1, &buffer[output_index], input);
+    input_index = output_index;
+    output_index += DEC_DENSE1_OUT_SIZE;
+
+    compute_gruB(&dec_dense2, zero_vector, dec_state->dense2_state, &buffer[input_index]);
+    memcpy(&buffer[output_index], dec_state->dense2_state, DEC_DENSE2_OUT_SIZE * sizeof(float));
+    input_index = output_index;
+    output_index += DEC_DENSE2_OUT_SIZE;
+
+    _lpcnet_compute_dense(&dec_dense3, &buffer[output_index], &buffer[input_index]);
+    input_index = output_index;
+    output_index += DEC_DENSE3_OUT_SIZE;
+
+    compute_gruB(&dec_dense4, zero_vector, dec_state->dense4_state, &buffer[input_index]);
+    memcpy(&buffer[output_index], dec_state->dense4_state, DEC_DENSE4_OUT_SIZE * sizeof(float));
+    input_index = output_index;
+    output_index += DEC_DENSE4_OUT_SIZE;
+
+    _lpcnet_compute_dense(&dec_dense5, &buffer[output_index], &buffer[input_index]);
+    input_index = output_index;
+    output_index += DEC_DENSE5_OUT_SIZE;
+
+    compute_gruB(&dec_dense6, zero_vector, dec_state->dense6_state, &buffer[input_index]);
+    memcpy(&buffer[output_index], dec_state->dense6_state, DEC_DENSE6_OUT_SIZE * sizeof(float));
+    input_index = output_index;
+    output_index += DEC_DENSE6_OUT_SIZE;
+
+    _lpcnet_compute_dense(&dec_dense7, &buffer[output_index], &buffer[input_index]);
+    input_index = output_index;
+    output_index += DEC_DENSE7_OUT_SIZE;
+
+    _lpcnet_compute_dense(&dec_dense8, &buffer[output_index], &buffer[input_index]);
+    output_index += DEC_DENSE8_OUT_SIZE;
+
+    _lpcnet_compute_dense(&dec_final, qframe, buffer);
+}
\ No newline at end of file
--- /dev/null
+++ b/dnn/dred_rdovae_dec.h
@@ -1,0 +1,44 @@
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _DRED_RDOVAE_DEC_H
+#define _DRED_RDOVAE_DEC_H
+
+#include "dred_rdovae.h"
+#include "dred_rdovae_dec_data.h"
+#include "dred_rdovae_stats_data.h"
+
+struct RDOVAEDecStruct {
+    float dense2_state[DEC_DENSE2_STATE_SIZE];
+    float dense4_state[DEC_DENSE2_STATE_SIZE];
+    float dense6_state[DEC_DENSE2_STATE_SIZE];
+};
+
+void dred_rdovae_dec_init_states(RDOVAEDec *h, const float * initial_state);
+void dred_rdovae_decode_qframe(RDOVAEDec *h, float *qframe, const float * z);
+
+#endif
\ No newline at end of file
--- /dev/null
+++ b/dnn/dred_rdovae_enc.c
@@ -1,0 +1,94 @@
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <math.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "dred_rdovae_enc.h"
+
+
+void dred_rdovae_encode_dframe(
+    RDOVAEEnc *enc_state,           /* io: encoder state */
+    float *latents,                 /* o: latent vector */
+    float *initial_state,           /* o: initial state */
+    const float *input              /* i: double feature frame (concatenated) */
+    )
+{
+    float buffer[ENC_DENSE1_OUT_SIZE + ENC_DENSE2_OUT_SIZE + ENC_DENSE3_OUT_SIZE + ENC_DENSE4_OUT_SIZE + ENC_DENSE5_OUT_SIZE + ENC_DENSE6_OUT_SIZE + ENC_DENSE7_OUT_SIZE + ENC_DENSE8_OUT_SIZE + GDENSE1_OUT_SIZE];
+    int output_index = 0;
+    int input_index = 0;
+    float zero_vector[1024] = {0};
+
+    /* run encoder stack and concatenate output in buffer*/
+    _lpcnet_compute_dense(&enc_dense1, &buffer[output_index], input);
+    input_index = output_index;
+    output_index += ENC_DENSE1_OUT_SIZE;
+
+    compute_gruB(&enc_dense2, zero_vector, enc_state->dense2_state, &buffer[input_index]);
+    memcpy(&buffer[output_index], enc_state->dense2_state, ENC_DENSE2_OUT_SIZE * sizeof(float));
+    input_index = output_index;
+    output_index += ENC_DENSE2_OUT_SIZE;
+
+    _lpcnet_compute_dense(&enc_dense3, &buffer[output_index], &buffer[input_index]);
+    input_index = output_index;
+    output_index += ENC_DENSE3_OUT_SIZE;
+
+    compute_gruB(&enc_dense4, zero_vector, enc_state->dense4_state, &buffer[input_index]);
+    memcpy(&buffer[output_index], enc_state->dense4_state, ENC_DENSE4_OUT_SIZE * sizeof(float));
+    input_index = output_index;
+    output_index += ENC_DENSE4_OUT_SIZE;
+
+    _lpcnet_compute_dense(&enc_dense5, &buffer[output_index], &buffer[input_index]);
+    input_index = output_index;
+    output_index += ENC_DENSE5_OUT_SIZE;
+
+    compute_gruB(&enc_dense6, zero_vector, enc_state->dense6_state, &buffer[input_index]);
+    memcpy(&buffer[output_index], enc_state->dense6_state, ENC_DENSE6_OUT_SIZE * sizeof(float));
+    input_index = output_index;
+    output_index += ENC_DENSE6_OUT_SIZE;
+
+    _lpcnet_compute_dense(&enc_dense7, &buffer[output_index], &buffer[input_index]);
+    input_index = output_index;
+    output_index += ENC_DENSE7_OUT_SIZE;
+
+    _lpcnet_compute_dense(&enc_dense8, &buffer[output_index], &buffer[input_index]);
+    output_index += ENC_DENSE8_OUT_SIZE;
+
+    /* compute latents from concatenated input buffer */
+    compute_conv1d(&bits_dense, latents, enc_state->bits_dense_state, buffer);
+
+
+    /* next, calculate initial state */
+    _lpcnet_compute_dense(&gdense1, &buffer[output_index], buffer);
+    input_index = output_index;
+    _lpcnet_compute_dense(&gdense2, initial_state, &buffer[input_index]);
+
+}
--- /dev/null
+++ b/dnn/dred_rdovae_enc.h
@@ -1,0 +1,45 @@
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _DRED_RDOVAE_ENC_H
+#define _DRED_RDOVAE_ENC_H
+
+#include "dred_rdovae.h"
+
+#include "dred_rdovae_enc_data.h"
+
+struct RDOVAEEncStruct {
+    float dense2_state[3 * ENC_DENSE2_STATE_SIZE];
+    float dense4_state[3 * ENC_DENSE4_STATE_SIZE];
+    float dense6_state[3 * ENC_DENSE6_STATE_SIZE];
+    float bits_dense_state[BITS_DENSE_STATE_SIZE];
+};
+
+void dred_rdovae_encode_dframe(RDOVAEEnc *enc_state, float *latents, float *initial_state, const float *input);
+
+
+#endif
\ No newline at end of file
--- /dev/null
+++ b/dnn/dump_lpcnet_tables.c
@@ -1,0 +1,104 @@
+/* Copyright (c) 2017-2018 Mozilla
+   Copyright (c) 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include <stdio.h>
+#include "freq.h"
+#include "kiss_fft.h"
+
+
+int main(void) {
+  int i;
+  FILE *file;
+  kiss_fft_state *kfft;
+  float half_window[OVERLAP_SIZE];
+  float dct_table[NB_BANDS*NB_BANDS];
+
+  file=fopen("lpcnet_tables.c", "wb");
+  fprintf(file, "/* The contents of this file was automatically generated by dump_lpcnet_tables.c*/\n\n");
+  fprintf(file, "#ifdef HAVE_CONFIG_H\n");
+  fprintf(file, "#include \"config.h\"\n");
+  fprintf(file, "#endif\n");
+
+  fprintf(file, "#include \"kiss_fft.h\"\n\n");
+
+  kfft = opus_fft_alloc_twiddles(WINDOW_SIZE, NULL, NULL, NULL, 0);
+
+  fprintf(file, "static const arch_fft_state arch_fft = {0, NULL};\n\n");
+
+  fprintf (file, "static const opus_int16 fft_bitrev[%d] = {\n", kfft->nfft);
+  for (i=0;i<kfft->nfft;i++)
+    fprintf (file, "%d,%c", kfft->bitrev[i],(i+16)%15==0?'\n':' ');
+  fprintf (file, "};\n\n");
+
+  fprintf (file, "static const kiss_twiddle_cpx fft_twiddles[%d] = {\n", kfft->nfft);
+  for (i=0;i<kfft->nfft;i++)
+    fprintf (file, "{%#0.9gf, %#0.9gf},%c", kfft->twiddles[i].r, kfft->twiddles[i].i,(i+3)%2==0?'\n':' ');
+  fprintf (file, "};\n\n");
+
+
+  fprintf(file, "const kiss_fft_state kfft = {\n");
+  fprintf(file, "%d, /* nfft */\n", kfft->nfft);
+  fprintf(file, "%#0.8gf, /* scale */\n", kfft->scale);
+  fprintf(file, "%d, /* shift */\n", kfft->shift);
+  fprintf(file, "{");
+  for (i=0;i<2*MAXFACTORS;i++) {
+    fprintf(file, "%d, ", kfft->factors[i]);
+  }
+  fprintf(file, "}, /* factors */\n");
+  fprintf(file, "fft_bitrev, /* bitrev*/\n");
+  fprintf(file, "fft_twiddles, /* twiddles*/\n");
+  fprintf(file, "(arch_fft_state *)&arch_fft, /* arch_fft*/\n");
+
+  fprintf(file, "};\n\n");
+
+  for (i=0;i<OVERLAP_SIZE;i++)
+    half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/OVERLAP_SIZE) * sin(.5*M_PI*(i+.5)/OVERLAP_SIZE));
+  fprintf(file, "const float half_window[] = {\n");
+  for (i=0;i<OVERLAP_SIZE;i++)
+    fprintf (file, "%#0.9gf,%c", half_window[i],(i+6)%5==0?'\n':' ');
+  fprintf(file, "};\n\n");
+
+  for (i=0;i<NB_BANDS;i++) {
+    int j;
+    for (j=0;j<NB_BANDS;j++) {
+      dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);
+      if (j==0) dct_table[i*NB_BANDS + j] *= sqrt(.5);
+    }
+  }
+  fprintf(file, "const float dct_table[] = {\n");
+  for (i=0;i<NB_BANDS*NB_BANDS;i++)
+    fprintf (file, "%#0.9gf,%c", dct_table[i],(i+6)%5==0?'\n':' ');
+  fprintf(file, "};\n");
+
+  fclose(file);
+  return 0;
+}
--- a/dnn/freq.c
+++ b/dnn/freq.c
@@ -51,14 +51,12 @@
     0.8f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.666667f, 0.5f, 0.5f, 0.5f, 0.333333f, 0.25f, 0.25f, 0.2f, 0.166667f, 0.173913f
 };
 
-typedef struct {
-  int init;
-  kiss_fft_state *kfft;
-  float half_window[OVERLAP_SIZE];
-  float dct_table[NB_BANDS*NB_BANDS];
-} CommonState;
 
+extern const kiss_fft_state kfft;
+extern const float half_window[OVERLAP_SIZE];
+extern const float dct_table[NB_BANDS*NB_BANDS];
 
+
 void compute_band_energy_inverse(float *bandE, const kiss_fft_cpx *X) {
   int i;
   float sum[NB_BANDS] = {0};
@@ -162,15 +160,15 @@
   float x[WINDOW_SIZE];
   float Eburg[NB_BANDS];
   float g;
-  float E;
   kiss_fft_cpx LPC[FREQ_SIZE];
   float Ly[NB_BANDS];
+  float logMax = -2;
+  float follow = -2;
   assert(order <= LPC_ORDER);
   assert(len <= FRAME_SIZE);
   for (i=0;i<len-1;i++) burg_in[i] = pcm[i+1] - PREEMPHASIS*pcm[i];
   g = silk_burg_analysis(burg_lpc, burg_in, 1e-3, len-1, 1, order);
   g /= len - 2*(order-1);
-  //printf("%g\n", g);
   RNN_CLEAR(x, WINDOW_SIZE);
   x[0] = 1;
   for (i=0;i<order;i++) x[i+1] = -burg_lpc[i]*pow(.995, i+1);
@@ -177,14 +175,11 @@
   forward_transform(LPC, x);
   compute_band_energy_inverse(Eburg, LPC);
   for (i=0;i<NB_BANDS;i++) Eburg[i] *= .45*g*(1.f/((float)WINDOW_SIZE*WINDOW_SIZE*WINDOW_SIZE));
-  float logMax = -2;
-  float follow = -2;
   for (i=0;i<NB_BANDS;i++) {
     Ly[i] = log10(1e-2+Eburg[i]);
     Ly[i] = MAX16(logMax-8, MAX16(follow-2.5, Ly[i]));
     logMax = MAX16(logMax, Ly[i]);
     follow = MAX16(follow-2.5, Ly[i]);
-    E += Eburg[i];
   }
   dct(burg_cepstrum, Ly);
   burg_cepstrum[0] += - 4;
@@ -243,32 +238,14 @@
   }
 }
 
-CommonState common;
 
-static void check_init(void) {
-  int i;
-  if (common.init) return;
-  common.kfft = opus_fft_alloc_twiddles(WINDOW_SIZE, NULL, NULL, NULL, 0);
-  for (i=0;i<OVERLAP_SIZE;i++)
-    common.half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/OVERLAP_SIZE) * sin(.5*M_PI*(i+.5)/OVERLAP_SIZE));
-  for (i=0;i<NB_BANDS;i++) {
-    int j;
-    for (j=0;j<NB_BANDS;j++) {
-      common.dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);
-      if (j==0) common.dct_table[i*NB_BANDS + j] *= sqrt(.5);
-    }
-  }
-  common.init = 1;
-}
-
 void dct(float *out, const float *in) {
   int i;
-  check_init();
   for (i=0;i<NB_BANDS;i++) {
     int j;
     float sum = 0;
     for (j=0;j<NB_BANDS;j++) {
-      sum += in[j] * common.dct_table[j*NB_BANDS + i];
+      sum += in[j] * dct_table[j*NB_BANDS + i];
     }
     out[i] = sum*sqrt(2./NB_BANDS);
   }
@@ -276,12 +253,11 @@
 
 void idct(float *out, const float *in) {
   int i;
-  check_init();
   for (i=0;i<NB_BANDS;i++) {
     int j;
     float sum = 0;
     for (j=0;j<NB_BANDS;j++) {
-      sum += in[j] * common.dct_table[i*NB_BANDS + j];
+      sum += in[j] * dct_table[i*NB_BANDS + j];
     }
     out[i] = sum*sqrt(2./NB_BANDS);
   }
@@ -291,12 +267,11 @@
   int i;
   kiss_fft_cpx x[WINDOW_SIZE];
   kiss_fft_cpx y[WINDOW_SIZE];
-  check_init();
   for (i=0;i<WINDOW_SIZE;i++) {
     x[i].r = in[i];
     x[i].i = 0;
   }
-  opus_fft(common.kfft, x, y, 0);
+  opus_fft(&kfft, x, y, 0);
   for (i=0;i<FREQ_SIZE;i++) {
     out[i] = y[i];
   }
@@ -306,7 +281,6 @@
   int i;
   kiss_fft_cpx x[WINDOW_SIZE];
   kiss_fft_cpx y[WINDOW_SIZE];
-  check_init();
   for (i=0;i<FREQ_SIZE;i++) {
     x[i] = in[i];
   }
@@ -314,7 +288,7 @@
     x[i].r = x[WINDOW_SIZE - i].r;
     x[i].i = -x[WINDOW_SIZE - i].i;
   }
-  opus_fft(common.kfft, x, y, 0);
+  opus_fft(&kfft, x, y, 0);
   /* output in reverse order for IFFT. */
   out[0] = WINDOW_SIZE*y[0].r;
   for (i=1;i<WINDOW_SIZE;i++) {
@@ -371,10 +345,9 @@
 
 void apply_window(float *x) {
   int i;
-  check_init();
   for (i=0;i<OVERLAP_SIZE;i++) {
-    x[i] *= common.half_window[i];
-    x[WINDOW_SIZE - 1 - i] *= common.half_window[i];
+    x[i] *= half_window[i];
+    x[WINDOW_SIZE - 1 - i] *= half_window[i];
   }
 }
 
--- /dev/null
+++ b/dnn/include/dred_rdovae.h
@@ -1,0 +1,60 @@
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include <stdlib.h>
+
+#include "opus_types.h"
+
+typedef struct RDOVAEDecStruct RDOVAEDec;
+typedef struct RDOVAEEncStruct RDOVAEEnc;
+
+void DRED_rdovae_decode_all(float *features, const float *state, const float *latents, int nb_latents);
+
+
+size_t DRED_rdovae_get_enc_size(void);
+
+size_t DRED_rdovae_get_dec_size(void);
+
+RDOVAEDec * DRED_rdovae_create_decoder(void);
+RDOVAEEnc * DRED_rdovae_create_encoder(void);
+void DRED_rdovae_destroy_decoder(RDOVAEDec* h);
+void DRED_rdovae_destroy_encoder(RDOVAEEnc* h);
+
+
+void DRED_rdovae_init_encoder(RDOVAEEnc *enc_state);
+
+void DRED_rdovae_encode_dframe(RDOVAEEnc *enc_state, float *latents, float *initial_state, const float *input);
+
+void DRED_rdovae_dec_init_states(RDOVAEDec *h, const float * initial_state);
+
+void DRED_rdovae_decode_qframe(RDOVAEDec *h, float *qframe, const float * z);
+
+const opus_uint16 * DRED_rdovae_get_p0_pointer(void);
+const opus_uint16 * DRED_rdovae_get_dead_zone_pointer(void);
+const opus_uint16 * DRED_rdovae_get_r_pointer(void);
+const opus_uint16 * DRED_rdovae_get_quant_scales_pointer(void);
--- a/dnn/include/lpcnet.h
+++ b/dnn/include/lpcnet.h
@@ -197,4 +197,6 @@
 
 LPCNET_EXPORT void lpcnet_plc_fec_add(LPCNetPLCState *st, const float *features);
 
+LPCNET_EXPORT void lpcnet_plc_fec_clear(LPCNetPLCState *st);
+
 #endif
--- a/dnn/kiss_fft.c
+++ b/dnn/kiss_fft.c
@@ -506,10 +506,10 @@
    if (cfg)
    {
       opus_fft_free_arch((kiss_fft_state *)cfg, arch);
-      opus_free((opus_int16*)cfg->bitrev);
+      free((opus_int16*)cfg->bitrev);
       if (cfg->shift < 0)
-         opus_free((kiss_twiddle_cpx*)cfg->twiddles);
-      opus_free((kiss_fft_state*)cfg);
+         free((kiss_twiddle_cpx*)cfg->twiddles);
+      free((kiss_fft_state*)cfg);
    }
 }
 
--- a/dnn/kiss_fft.h
+++ b/dnn/kiss_fft.h
@@ -34,8 +34,7 @@
 #include "arch.h"
 
 #include <stdlib.h>
-#define opus_alloc(x) malloc(x)
-#define opus_free(x) free(x)
+#define lpcnet_alloc(x) malloc(x)
 
 #ifdef __cplusplus
 extern "C" {
@@ -46,7 +45,7 @@
 # define kiss_fft_scalar __m128
 #define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes)
 #else
-#define KISS_FFT_MALLOC opus_alloc
+#define KISS_FFT_MALLOC lpcnet_alloc
 #endif
 
 #ifdef FIXED_POINT
--- a/dnn/lpcnet.c
+++ b/dnn/lpcnet.c
@@ -89,23 +89,21 @@
     float dense1_out[FEATURE_DENSE1_OUT_SIZE];
     int pitch;
     float rc[LPC_ORDER];
-    //static float features[NB_FEATURES];
-    //RNN_COPY(features, lpcnet->last_features, NB_FEATURES);
     /* Matches the Python code -- the 0.1 avoids rounding issues. */
     pitch = (int)floor(.1 + 50*features[NB_BANDS]+100);
     pitch = IMIN(255, IMAX(33, pitch));
     net = &lpcnet->nnet;
     RNN_COPY(in, features, NB_FEATURES);
-    compute_embedding(&embed_pitch, &in[NB_FEATURES], pitch);
-    compute_conv1d(&feature_conv1, conv1_out, net->feature_conv1_state, in);
+    compute_embedding(&lpcnet->model.embed_pitch, &in[NB_FEATURES], pitch);
+    compute_conv1d(&lpcnet->model.feature_conv1, conv1_out, net->feature_conv1_state, in);
     if (lpcnet->frame_count < FEATURE_CONV1_DELAY) RNN_CLEAR(conv1_out, FEATURE_CONV1_OUT_SIZE);
-    compute_conv1d(&feature_conv2, conv2_out, net->feature_conv2_state, conv1_out);
+    compute_conv1d(&lpcnet->model.feature_conv2, conv2_out, net->feature_conv2_state, conv1_out);
     if (lpcnet->frame_count < FEATURES_DELAY) RNN_CLEAR(conv2_out, FEATURE_CONV2_OUT_SIZE);
-    _lpcnet_compute_dense(&feature_dense1, dense1_out, conv2_out);
-    _lpcnet_compute_dense(&feature_dense2, condition, dense1_out);
+    _lpcnet_compute_dense(&lpcnet->model.feature_dense1, dense1_out, conv2_out);
+    _lpcnet_compute_dense(&lpcnet->model.feature_dense2, condition, dense1_out);
     RNN_COPY(rc, condition, LPC_ORDER);
-    _lpcnet_compute_dense(&gru_a_dense_feature, gru_a_condition, condition);
-    _lpcnet_compute_dense(&gru_b_dense_feature, gru_b_condition, condition);
+    _lpcnet_compute_dense(&lpcnet->model.gru_a_dense_feature, gru_a_condition, condition);
+    _lpcnet_compute_dense(&lpcnet->model.gru_b_dense_feature, gru_b_condition, condition);
 #ifdef END2END
     rc2lpc(lpc, rc);
 #elif FEATURES_DELAY>0    
@@ -118,29 +116,54 @@
 #ifdef LPC_GAMMA
     lpc_weighting(lpc, LPC_GAMMA);
 #endif
-    //RNN_COPY(lpcnet->last_features, _features, NB_FEATURES);
     if (lpcnet->frame_count < 1000) lpcnet->frame_count++;
 }
 
-int run_sample_network(NNetState *net, const float *gru_a_condition, const float *gru_b_condition, int last_exc, int last_sig, int pred, const float *sampling_logit_table, kiss99_ctx *rng)
+void run_frame_network_deferred(LPCNetState *lpcnet, const float *features)
 {
+    int max_buffer_size = lpcnet->model.feature_conv1.kernel_size + lpcnet->model.feature_conv2.kernel_size - 2;
+    celt_assert(max_buffer_size <= MAX_FEATURE_BUFFER_SIZE);
+    if (lpcnet->feature_buffer_fill == max_buffer_size) {
+        RNN_MOVE(lpcnet->feature_buffer, &lpcnet->feature_buffer[NB_FEATURES],  (max_buffer_size-1)*NB_FEATURES);
+    } else {
+      lpcnet->feature_buffer_fill++;
+    }
+    RNN_COPY(&lpcnet->feature_buffer[(lpcnet->feature_buffer_fill-1)*NB_FEATURES], features, NB_FEATURES);
+}
+
+void run_frame_network_flush(LPCNetState *lpcnet)
+{
+    int i;
+    for (i=0;i<lpcnet->feature_buffer_fill;i++) {
+        float lpc[LPC_ORDER];
+        float gru_a_condition[3*GRU_A_STATE_SIZE];
+        float gru_b_condition[3*GRU_B_STATE_SIZE];
+        run_frame_network(lpcnet, gru_a_condition, gru_b_condition, lpc, &lpcnet->feature_buffer[i*NB_FEATURES]);
+    }
+    lpcnet->feature_buffer_fill = 0;
+}
+
+int run_sample_network(LPCNetState *lpcnet, const float *gru_a_condition, const float *gru_b_condition, int last_exc, int last_sig, int pred, const float *sampling_logit_table, kiss99_ctx *rng)
+{
+    NNetState *net;
     float gru_a_input[3*GRU_A_STATE_SIZE];
     float in_b[GRU_A_STATE_SIZE+FEATURE_DENSE2_OUT_SIZE];
     float gru_b_input[3*GRU_B_STATE_SIZE];
+    net = &lpcnet->nnet;
 #if 1
-    compute_gru_a_input(gru_a_input, gru_a_condition, GRU_A_STATE_SIZE, &gru_a_embed_sig, last_sig, &gru_a_embed_pred, pred, &gru_a_embed_exc, last_exc);
+    compute_gru_a_input(gru_a_input, gru_a_condition, GRU_A_STATE_SIZE, &lpcnet->model.gru_a_embed_sig, last_sig, &lpcnet->model.gru_a_embed_pred, pred, &lpcnet->model.gru_a_embed_exc, last_exc);
 #else
     RNN_COPY(gru_a_input, gru_a_condition, 3*GRU_A_STATE_SIZE);
-    accum_embedding(&gru_a_embed_sig, gru_a_input, last_sig);
-    accum_embedding(&gru_a_embed_pred, gru_a_input, pred);
-    accum_embedding(&gru_a_embed_exc, gru_a_input, last_exc);
+    accum_embedding(&lpcnet->model.gru_a_embed_sig, gru_a_input, last_sig);
+    accum_embedding(&lpcnet->model.gru_a_embed_pred, gru_a_input, pred);
+    accum_embedding(&lpcnet->model.gru_a_embed_exc, gru_a_input, last_exc);
 #endif
     /*compute_gru3(&gru_a, net->gru_a_state, gru_a_input);*/
-    compute_sparse_gru(&sparse_gru_a, net->gru_a_state, gru_a_input);
+    compute_sparse_gru(&lpcnet->model.sparse_gru_a, net->gru_a_state, gru_a_input);
     RNN_COPY(in_b, net->gru_a_state, GRU_A_STATE_SIZE);
     RNN_COPY(gru_b_input, gru_b_condition, 3*GRU_B_STATE_SIZE);
-    compute_gruB(&gru_b, gru_b_input, net->gru_b_state, in_b);
-    return sample_mdense(&dual_fc, net->gru_b_state, sampling_logit_table, rng);
+    compute_gruB(&lpcnet->model.gru_b, gru_b_input, net->gru_b_state, in_b);
+    return sample_mdense(&lpcnet->model.dual_fc, net->gru_b_state, sampling_logit_table, rng);
 }
 
 LPCNET_EXPORT int lpcnet_get_size()
@@ -151,15 +174,18 @@
 LPCNET_EXPORT int lpcnet_init(LPCNetState *lpcnet)
 {
     int i;
+    int ret;
     const char* rng_string="LPCNet";
     memset(lpcnet, 0, lpcnet_get_size());
     lpcnet->last_exc = lin2ulaw(0.f);
     for (i=0;i<256;i++) {
-        float prob = .025+.95*i/255.;
+        float prob = .025f+.95f*i/255.f;
         lpcnet->sampling_logit_table[i] = -log((1-prob)/prob);
     }
     kiss99_srand(&lpcnet->rng, (const unsigned char *)rng_string, strlen(rng_string));
-    return 0;
+    ret = init_lpcnet_model(&lpcnet->model, lpcnet_arrays);
+    celt_assert(ret == 0);
+    return ret;
 }
 
 
@@ -176,6 +202,14 @@
     free(lpcnet);
 }
 
+void lpcnet_reset_signal(LPCNetState *lpcnet)
+{
+    lpcnet->deemph_mem = 0;
+    lpcnet->last_exc = lin2ulaw(0.f);
+    RNN_CLEAR(lpcnet->last_sig, LPC_ORDER);
+    RNN_CLEAR(lpcnet->nnet.gru_a_state, GRU_A_STATE_SIZE);
+    RNN_CLEAR(lpcnet->nnet.gru_b_state, GRU_B_STATE_SIZE);
+}
 
 void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, short *output, int N, int preload)
 {
@@ -197,7 +231,7 @@
         for (j=0;j<LPC_ORDER;j++) pred -= lpcnet->last_sig[j]*lpcnet->lpc[j];
         last_sig_ulaw = lin2ulaw(lpcnet->last_sig[0]);
         pred_ulaw = lin2ulaw(pred);
-        exc = run_sample_network(&lpcnet->nnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->last_exc, last_sig_ulaw, pred_ulaw, lpcnet->sampling_logit_table, &lpcnet->rng);
+        exc = run_sample_network(lpcnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->last_exc, last_sig_ulaw, pred_ulaw, lpcnet->sampling_logit_table, &lpcnet->rng);
         if (i < preload) {
           exc = lin2ulaw(output[i]-PREEMPH*lpcnet->deemph_mem - pred);
           pcm = output[i]-PREEMPH*lpcnet->deemph_mem;
--- a/dnn/lpcnet_dec.c
+++ b/dnn/lpcnet_dec.c
@@ -121,13 +121,13 @@
   }
   for (sub=0;sub<4;sub++) {
     float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
-    p *= 1 + modulation/16./7.*(2*sub-3);
+    p *= 1.f + modulation/16.f/7.f*(2*sub-3);
     p = MIN16(255, MAX16(33, p));
-    features[sub][NB_BANDS] = .02*(p-100);
-    features[sub][NB_BANDS + 1] = frame_corr-.5;
+    features[sub][NB_BANDS] = .02f*(p-100.f);
+    features[sub][NB_BANDS + 1] = frame_corr-.5f;
   }
   
-  features[3][0] = (c0_id-64)/4.;
+  features[3][0] = (c0_id-64)/4.f;
   for (i=0;i<NB_BANDS_1;i++) {
     features[3][i+1] = ceps_codebook1[vq_end[0]*NB_BANDS_1 + i] + ceps_codebook2[vq_end[1]*NB_BANDS_1 + i] + ceps_codebook3[vq_end[2]*NB_BANDS_1 + i];
   }
@@ -141,7 +141,7 @@
     features[1][i] = sign*ceps_codebook_diff4[vq_mid*NB_BANDS + i];
   }
   if ((vq_mid&MULTI_MASK) < 2) {
-    for (i=0;i<NB_BANDS;i++) features[1][i] += .5*(vq_mem[i] + features[3][i]);
+    for (i=0;i<NB_BANDS;i++) features[1][i] += .5f*(vq_mem[i] + features[3][i]);
   } else if ((vq_mid&MULTI_MASK) == 2) {
     for (i=0;i<NB_BANDS;i++) features[1][i] += vq_mem[i];
   } else {
--- a/dnn/lpcnet_demo.c
+++ b/dnn/lpcnet_demo.c
@@ -39,6 +39,7 @@
 #define MODE_FEATURES 2
 #define MODE_SYNTHESIS 3
 #define MODE_PLC 4
+#define MODE_ADDLPC 5
 
 void usage(void) {
     fprintf(stderr, "usage: lpcnet_demo -encode <input.pcm> <compressed.lpcnet>\n");
@@ -46,7 +47,8 @@
     fprintf(stderr, "       lpcnet_demo -features <input.pcm> <features.f32>\n");
     fprintf(stderr, "       lpcnet_demo -synthesis <features.f32> <output.pcm>\n");
     fprintf(stderr, "       lpcnet_demo -plc <plc_options> <percent> <input.pcm> <output.pcm>\n");
-    fprintf(stderr, "       lpcnet_demo -plc_file <plc_options> <percent> <input.pcm> <output.pcm>\n\n");
+    fprintf(stderr, "       lpcnet_demo -plc_file <plc_options> <percent> <input.pcm> <output.pcm>\n");
+    fprintf(stderr, "       lpcnet_demo -addlpc <features_without_lpc.f32> <features_with_lpc.lpc>\n\n");
     fprintf(stderr, "  plc_options:\n");
     fprintf(stderr, "       causal:       normal (causal) PLC\n");
     fprintf(stderr, "       causal_dc:    normal (causal) PLC with DC offset compensation\n");
@@ -83,6 +85,8 @@
         }
         argv+=2;
         argc-=2;
+    } else if (strcmp(argv[1], "-addlpc") == 0){
+        mode=MODE_ADDLPC;
     } else {
         usage();
     }
@@ -165,8 +169,8 @@
         int count=0;
         int loss=0;
         int skip=0, extra=0;
-        if ((plc_flags&0x3) == LPCNET_PLC_NONCAUSAL) skip=extra=80;
         LPCNetPLCState *net;
+        if ((plc_flags&0x3) == LPCNET_PLC_NONCAUSAL) skip=extra=80;
         net = lpcnet_plc_create(plc_flags);
         while (1) {
             size_t ret;
@@ -187,6 +191,17 @@
           fwrite(pcm, sizeof(pcm[0]), extra, fout);
         }
         lpcnet_plc_destroy(net);
+    } else if (mode == MODE_ADDLPC) {
+        float features[36];
+        size_t ret;
+
+        while (1) {
+            ret = fread(features, sizeof(features[0]), 36, fin);
+            if (ret != 36 || feof(fin)) break;
+            lpc_from_cepstrum(&features[20], &features[0]);
+            fwrite(features, sizeof(features[0]), 36, fout);
+        }
+
     } else {
         fprintf(stderr, "unknown action\n");
     }
--- a/dnn/lpcnet_enc.c
+++ b/dnn/lpcnet_enc.c
@@ -52,7 +52,7 @@
 void vq_quantize_mbest(const float *codebook, int nb_entries, const float *x, int ndim, int mbest, float *dist, int *index)
 {
   int i, j;
-  for (i=0;i<mbest;i++) dist[i] = 1e15;
+  for (i=0;i<mbest;i++) dist[i] = 1e15f;
   
   for (i=0;i<nb_entries;i++)
   {
@@ -80,7 +80,7 @@
 int vq_quantize(const float *codebook, int nb_entries, const float *x, int ndim, float *dist_out)
 {
   int i, j;
-  float min_dist = 1e15;
+  float min_dist = 1e15f;
   int nearest = 0;
   
   for (i=0;i<nb_entries;i++)
@@ -242,7 +242,7 @@
 static int find_nearest_multi(const float *codebook, int nb_entries, const float *x, int ndim, float *dist_out, int sign)
 {
   int i, j;
-  float min_dist = 1e15;
+  float min_dist = 1e15f;
   int nearest = 0;
 
   for (i=0;i<nb_entries;i++)
@@ -290,7 +290,7 @@
     float s = 1;
     nb_entries = 1<<bits;
     RNN_COPY(ref, x, NB_BANDS);
-    for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5*(left[i] + right[i]);
+    for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5f*(left[i] + right[i]);
     for (i=0;i<NB_BANDS;i++) pred[2*NB_BANDS+i] = left[i];
     for (i=0;i<NB_BANDS;i++) pred[3*NB_BANDS+i] = right[i];
     for (i=0;i<4*NB_BANDS;i++) target[i] = x[i%NB_BANDS] - pred[i];
@@ -319,10 +319,10 @@
 int interp_search(const float *x, const float *left, const float *right, float *dist_out)
 {
     int i, k;
-    float min_dist = 1e15;
+    float min_dist = 1e15f;
     int best_pred = 0;
     float pred[4*NB_BANDS];
-    for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5*(left[i] + right[i]);
+    for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5f*(left[i] + right[i]);
     for (i=0;i<NB_BANDS;i++) pred[2*NB_BANDS+i] = left[i];
     for (i=0;i<NB_BANDS;i++) pred[3*NB_BANDS+i] = right[i];
 
@@ -342,7 +342,7 @@
 void interp_diff(float *x, float *left, float *right, float *codebook, int bits, int sign)
 {
     int i, k;
-    float min_dist = 1e15;
+    float min_dist = 1e15f;
     int best_pred = 0;
     float ref[NB_BANDS];
     float pred[4*NB_BANDS];
@@ -350,7 +350,7 @@
     (void)codebook;
     (void)bits;
     RNN_COPY(ref, x, NB_BANDS);
-    for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5*(left[i] + right[i]);
+    for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5f*(left[i] + right[i]);
     for (i=0;i<NB_BANDS;i++) pred[2*NB_BANDS+i] = left[i];
     for (i=0;i<NB_BANDS;i++) pred[3*NB_BANDS+i] = right[i];
 
@@ -378,7 +378,7 @@
 int double_interp_search(float features[4][NB_TOTAL_FEATURES], const float *mem) {
     int i, j;
     int best_id=0;
-    float min_dist = 1e15;
+    float min_dist = 1e15f;
     float dist[2][3];
     interp_search(features[0], mem, features[1], dist[0]);
     interp_search(features[2], features[1], features[3], dist[1]);
@@ -410,12 +410,12 @@
     id1 = best_id % 3;
     count = 1;
     if (id0 != 1) {
-        float t = (id0==0) ? .5 : 1.;
+        float t = (id0==0) ? .5f : 1.f;
         for (i=0;i<NB_BANDS;i++) features[1][i] += t*features[0][i];
         count += t;
     }
     if (id1 != 2) {
-        float t = (id1==0) ? .5 : 1.;
+        float t = (id1==0) ? .5f : 1.f;
         for (i=0;i<NB_BANDS;i++) features[1][i] += t*features[2][i];
         count += t;
     }
@@ -511,9 +511,9 @@
   follow = -2;
   for (i=0;i<NB_BANDS;i++) {
     Ly[i] = log10(1e-2+Ex[i]);
-    Ly[i] = MAX16(logMax-8, MAX16(follow-2.5, Ly[i]));
+    Ly[i] = MAX16(logMax-8, MAX16(follow-2.5f, Ly[i]));
     logMax = MAX16(logMax, Ly[i]);
-    follow = MAX16(follow-2.5, Ly[i]);
+    follow = MAX16(follow-2.5f, Ly[i]);
     E += Ex[i];
   }
   dct(st->features[st->pcount], Ly);
@@ -529,7 +529,7 @@
       sum += st->lpc[j]*st->pitch_mem[j];
     RNN_MOVE(st->pitch_mem+1, st->pitch_mem, LPC_ORDER-1);
     st->pitch_mem[0] = aligned_in[i];
-    st->exc_buf[PITCH_MAX_PERIOD+i] = sum + .7*st->pitch_filt;
+    st->exc_buf[PITCH_MAX_PERIOD+i] = sum + .7f*st->pitch_filt;
     st->pitch_filt = sum;
     /*printf("%f\n", st->exc_buf[PITCH_MAX_PERIOD+i]);*/
   }
@@ -548,7 +548,7 @@
       /* Upsample correlation by 3x and keep the max. */
       float interpolated[PITCH_MAX_PERIOD]={0};
       /* interp=sinc([-3:3]+1/3).*(.5+.5*cos(pi*[-3:3]/4.5)); interp=interp/sum(interp); */
-      static const float interp[7] = {0.026184, -0.098339, 0.369938, 0.837891, -0.184969, 0.070242, -0.020947};
+      static const float interp[7] = {0.026184f, -0.098339f, 0.369938f, 0.837891f, -0.184969f, 0.070242f, -0.020947f};
       for (i=4;i<PITCH_MAX_PERIOD-4;i++) {
         float val1=0, val2=0;
         int j;
@@ -582,7 +582,7 @@
   float sx=0, sxx=0, sxy=0, sy=0, sw=0;
   float frame_corr;
   int voiced;
-  float frame_weight_sum = 1e-15;
+  float frame_weight_sum = 1e-15f;
   float center_pitch;
   int main_pitch;
   int modulation;
@@ -594,11 +594,11 @@
   for(sub=0;sub<8;sub++) frame_weight_sum += st->frame_weight[2+sub];
   for(sub=0;sub<8;sub++) st->frame_weight[2+sub] *= (8.f/frame_weight_sum);
   for(sub=0;sub<8;sub++) {
-    float max_path_all = -1e15;
+    float max_path_all = -1e15f;
     best_i = 0;
     for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) {
       float xc_half = MAX16(MAX16(st->xc[2+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+sub][(PITCH_MAX_PERIOD+i-1)/2]);
-      if (st->xc[2+sub][i] < xc_half*1.1) st->xc[2+sub][i] *= .8;
+      if (st->xc[2+sub][i] < xc_half*1.1f) st->xc[2+sub][i] *= .8f;
     }
     for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
       int j;
@@ -666,7 +666,7 @@
   /*best_b = (sxx*sy - sx*sxy)/(sw*sxx - sx*sx);*/
   best_b = (sy - best_a*sx)/sw;
   /* Quantizing the pitch as "main" pitch + slope. */
-  center_pitch = best_b+5.5*best_a;
+  center_pitch = best_b+5.5f*best_a;
   main_pitch = (int)floor(.5 + 21.*log2(center_pitch/PITCH_MIN_PERIOD));
   main_pitch = IMAX(0, IMIN(63, main_pitch));
   modulation = (int)floor(.5 + 16*7*best_a/center_pitch);
@@ -677,13 +677,13 @@
   for (sub=0;sub<4;sub++) {
     if (quantize) {
       float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
-      p *= 1 + modulation/16./7.*(2*sub-3);
+      p *= 1.f + modulation/16.f/7.f*(2*sub-3);
       p = MIN16(255, MAX16(33, p));
-      st->features[sub][NB_BANDS] = .02*(p-100);
-      st->features[sub][NB_BANDS + 1] = frame_corr-.5;
+      st->features[sub][NB_BANDS] = .02f*(p-100);
+      st->features[sub][NB_BANDS + 1] = frame_corr-.5f;
     } else {
-      st->features[sub][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);
-      st->features[sub][NB_BANDS + 1] = frame_corr-.5;
+      st->features[sub][NB_BANDS] = .01f*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);
+      st->features[sub][NB_BANDS + 1] = frame_corr-.5f;
     }
     /*printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);*/
   }
@@ -694,7 +694,7 @@
     /*printf("%f\n", st->features[3][0]);*/
     c0_id = (int)floor(.5 + st->features[3][0]*4);
     c0_id = IMAX(-64, IMIN(63, c0_id));
-    st->features[3][0] = c0_id/4.;
+    st->features[3][0] = c0_id/4.f;
     quantize_3stage_mbest(&st->features[3][1], vq_end);
     /*perform_interp_relaxation(st->features, st->vq_mem);*/
     quantize_diff(&st->features[1][0], st->vq_mem, &st->features[3][0], ceps_codebook_diff4, 12, 1, &vq_mid);
@@ -736,15 +736,15 @@
   int best[10];
   int pitch_prev[8][PITCH_MAX_PERIOD];
   float frame_corr;
-  float frame_weight_sum = 1e-15;
+  float frame_weight_sum = 1e-15f;
   for(sub=0;sub<8;sub++) frame_weight_sum += st->frame_weight[2+sub];
   for(sub=0;sub<8;sub++) st->frame_weight[2+sub] *= (8.f/frame_weight_sum);
   for(sub=0;sub<8;sub++) {
-    float max_path_all = -1e15;
+    float max_path_all = -1e15f;
     best_i = 0;
     for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) {
       float xc_half = MAX16(MAX16(st->xc[2+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+sub][(PITCH_MAX_PERIOD+i-1)/2]);
-      if (st->xc[2+sub][i] < xc_half*1.1) st->xc[2+sub][i] *= .8;
+      if (st->xc[2+sub][i] < xc_half*1.1) st->xc[2+sub][i] *= .8f;
     }
     for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
       int j;
@@ -781,8 +781,8 @@
   }
   frame_corr /= 8;
   for (sub=0;sub<4;sub++) {
-    st->features[sub][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);
-    st->features[sub][NB_BANDS + 1] = frame_corr-.5;
+    st->features[sub][NB_BANDS] = .01f*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);
+    st->features[sub][NB_BANDS + 1] = frame_corr-.5f;
     /*printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);*/
   }
   /*printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);*/
@@ -804,15 +804,15 @@
   int best[4];
   int pitch_prev[2][PITCH_MAX_PERIOD];
   float frame_corr;
-  float frame_weight_sum = 1e-15;
+  float frame_weight_sum = 1e-15f;
   for(sub=0;sub<2;sub++) frame_weight_sum += st->frame_weight[2+2*st->pcount+sub];
   for(sub=0;sub<2;sub++) st->frame_weight[2+2*st->pcount+sub] *= (2.f/frame_weight_sum);
   for(sub=0;sub<2;sub++) {
-    float max_path_all = -1e15;
+    float max_path_all = -1e15f;
     best_i = 0;
     for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) {
       float xc_half = MAX16(MAX16(st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i-1)/2]);
-      if (st->xc[2+2*st->pcount+sub][i] < xc_half*1.1) st->xc[2+2*st->pcount+sub][i] *= .8;
+      if (st->xc[2+2*st->pcount+sub][i] < xc_half*1.1f) st->xc[2+2*st->pcount+sub][i] *= .8f;
     }
     for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
       int j;
@@ -848,8 +848,8 @@
     best_i = pitch_prev[sub][best_i];
   }
   frame_corr /= 2;
-  st->features[st->pcount][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2]+best[3]))-200);
-  st->features[st->pcount][NB_BANDS + 1] = frame_corr-.5;
+  st->features[st->pcount][NB_BANDS] = .01f*(IMAX(66, IMIN(510, best[2]+best[3]))-200);
+  st->features[st->pcount][NB_BANDS + 1] = frame_corr-.5f;
   if (ffeat) {
     fwrite(st->features[st->pcount], sizeof(float), NB_TOTAL_FEATURES, ffeat);
   }
--- a/dnn/lpcnet_plc.c
+++ b/dnn/lpcnet_plc.c
@@ -32,11 +32,19 @@
 #include "lpcnet.h"
 #include "plc_data.h"
 
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+/* Comment this out to have LPCNet update its state on every good packet (slow). */
+#define PLC_SKIP_UPDATES
+
 LPCNET_EXPORT int lpcnet_plc_get_size() {
   return sizeof(LPCNetPLCState);
 }
 
 LPCNET_EXPORT int lpcnet_plc_init(LPCNetPLCState *st, int options) {
+  int ret;
   RNN_CLEAR(st, 1);
   lpcnet_init(&st->lpcnet);
   lpcnet_encoder_init(&st->enc);
@@ -60,7 +68,9 @@
     return -1;
   }
   st->remove_dc = !!(options&LPCNET_PLC_DC_FILTER);
-  return 0;
+  ret = init_plc_model(&st->model, lpcnet_plc_arrays);
+  celt_assert(ret == 0);
+  return ret;
 }
 
 LPCNET_EXPORT LPCNetPLCState *lpcnet_plc_create(int options) {
@@ -75,6 +85,10 @@
 }
 
 void lpcnet_plc_fec_add(LPCNetPLCState *st, const float *features) {
+  if (features == NULL) {
+    st->fec_skip++;
+    return;
+  }
   if (st->fec_fill_pos == PLC_MAX_FEC) {
     if (st->fec_keep_pos == 0) {
       fprintf(stderr, "FEC buffer full\n");
@@ -89,28 +103,40 @@
   st->fec_fill_pos++;
 }
 
-static void compute_plc_pred(PLCNetState *net, float *out, const float *in) {
+void lpcnet_plc_fec_clear(LPCNetPLCState *st) {
+  st->fec_keep_pos = st->fec_read_pos = st->fec_fill_pos = st-> fec_skip = 0;
+}
+
+
+static void compute_plc_pred(LPCNetPLCState *st, float *out, const float *in) {
   float zeros[3*PLC_MAX_RNN_NEURONS] = {0};
   float dense_out[PLC_DENSE1_OUT_SIZE];
-  _lpcnet_compute_dense(&plc_dense1, dense_out, in);
-  compute_gruB(&plc_gru1, zeros, net->plc_gru1_state, dense_out);
-  compute_gruB(&plc_gru2, zeros, net->plc_gru2_state, net->plc_gru1_state);
-  _lpcnet_compute_dense(&plc_out, out, net->plc_gru2_state);
+  PLCNetState *net = &st->plc_net;
+  _lpcnet_compute_dense(&st->model.plc_dense1, dense_out, in);
+  compute_gruB(&st->model.plc_gru1, zeros, net->plc_gru1_state, dense_out);
+  compute_gruB(&st->model.plc_gru2, zeros, net->plc_gru2_state, net->plc_gru1_state);
+  _lpcnet_compute_dense(&st->model.plc_out, out, net->plc_gru2_state);
   /* Artificially boost the correlation to make harmonics cleaner. */
   out[19] = MIN16(.5f, out[19]+.1f);
 }
 
 static int get_fec_or_pred(LPCNetPLCState *st, float *out) {
-  if (st->fec_read_pos != st->fec_fill_pos) {
+  if (st->fec_read_pos != st->fec_fill_pos && st->fec_skip==0) {
+    float plc_features[2*NB_BANDS+NB_FEATURES+1] = {0};
+    float discard[NB_FEATURES];
     RNN_COPY(out, &st->fec[st->fec_read_pos][0], NB_FEATURES);
     st->fec_read_pos++;
     /* Make sure we can rewind a few frames back at resync time. */
     st->fec_keep_pos = IMAX(0, IMAX(st->fec_keep_pos, st->fec_read_pos-FEATURES_DELAY-1));
-    /* FIXME: Figure out how to update compute_plc_pred() without Burg features. */
+    /* Update PLC state using FEC, so without Burg features. */
+    RNN_COPY(&plc_features[2*NB_BANDS], out, NB_FEATURES);
+    plc_features[2*NB_BANDS+NB_FEATURES] = -1;
+    compute_plc_pred(st, discard, plc_features);
     return 1;
   } else {
     float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};
-    compute_plc_pred(&st->plc_net, out, zeros);
+    compute_plc_pred(st, out, zeros);
+    if (st->fec_skip > 0) st->fec_skip--;
     return 0;
   }
 }
@@ -119,13 +145,12 @@
   st->fec_read_pos -= offset;
   if (st->fec_read_pos < st->fec_keep_pos) {
     st->fec_read_pos = st->fec_keep_pos;
-    fprintf(stderr, "cannot rewind\n");
   }
 }
 
 void clear_state(LPCNetPLCState *st) {
   RNN_CLEAR(st->lpcnet.last_sig, LPC_ORDER);
-  st->lpcnet.last_exc = lin2ulaw(0.f);;
+  st->lpcnet.last_exc = lin2ulaw(0.f);
   st->lpcnet.deemph_mem = 0;
   RNN_CLEAR(st->lpcnet.nnet.gru_a_state, GRU_A_STATE_SIZE);
   RNN_CLEAR(st->lpcnet.nnet.gru_b_state, GRU_B_STATE_SIZE);
@@ -163,22 +188,14 @@
       float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};
       RNN_COPY(zeros, plc_features, 2*NB_BANDS);
       zeros[2*NB_BANDS+NB_FEATURES] = 1;
-      if (st->fec_active) {
-        if (FEATURES_DELAY > 0) st->plc_net = st->plc_copy[FEATURES_DELAY-1];
-        fec_rewind(st, FEATURES_DELAY);
-      } else {
+      if (st->enable_blending) {
+        LPCNetState copy;
         st->plc_net = st->plc_copy[FEATURES_DELAY];
-        compute_plc_pred(&st->plc_net, st->features, zeros);
+        compute_plc_pred(st, st->features, zeros);
         for (i=0;i<FEATURES_DELAY;i++) {
-          float lpc[LPC_ORDER];
-          float gru_a_condition[3*GRU_A_STATE_SIZE];
-          float gru_b_condition[3*GRU_B_STATE_SIZE];
           /* FIXME: backtrack state, replace features. */
-          run_frame_network(&st->lpcnet, gru_a_condition, gru_b_condition, lpc, st->features);
+          run_frame_network_deferred(&st->lpcnet, st->features);
         }
-      }
-      if (st->enable_blending) {
-        LPCNetState copy;
         copy = st->lpcnet;
         lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], tmp, FRAME_SIZE-TRAINING_OFFSET, 0);
         for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) {
@@ -189,8 +206,14 @@
         st->lpcnet = copy;
         lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], pcm, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET);
       } else {
+        if (FEATURES_DELAY > 0) st->plc_net = st->plc_copy[FEATURES_DELAY-1];
+        fec_rewind(st, FEATURES_DELAY);
+#ifdef PLC_SKIP_UPDATES
+        lpcnet_reset_signal(&st->lpcnet);
+#else
         RNN_COPY(tmp, pcm, FRAME_SIZE-TRAINING_OFFSET);
         lpcnet_synthesize_tail_impl(&st->lpcnet, tmp, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET);
+#endif
       }
       RNN_COPY(st->pcm, &pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
       st->pcm_fill = TRAINING_OFFSET;
@@ -208,24 +231,28 @@
   if (!st->blend) {
     RNN_COPY(&plc_features[2*NB_BANDS], st->enc.features[0], NB_FEATURES);
     plc_features[2*NB_BANDS+NB_FEATURES] = 1;
-    compute_plc_pred(&st->plc_net, st->features, plc_features);
+    compute_plc_pred(st, st->features, plc_features);
     /* Discard an FEC frame that we know we will no longer need. */
-    if (st->fec_read_pos < st->fec_fill_pos) st->fec_read_pos++;
+    if (st->fec_skip) st->fec_skip--;
+    else if (st->fec_read_pos < st->fec_fill_pos) st->fec_read_pos++;
     st->fec_keep_pos = IMAX(0, IMAX(st->fec_keep_pos, st->fec_read_pos-FEATURES_DELAY-1));
   }
   if (st->skip_analysis) {
-    if (!st->fec_active) {
-      float lpc[LPC_ORDER];
-      float gru_a_condition[3*GRU_A_STATE_SIZE];
-      float gru_b_condition[3*GRU_B_STATE_SIZE];
+    if (st->enable_blending) {
       /* FIXME: backtrack state, replace features. */
-      run_frame_network(&st->lpcnet, gru_a_condition, gru_b_condition, lpc, st->enc.features[0]);
+      run_frame_network_deferred(&st->lpcnet, st->enc.features[0]);
     }
     st->skip_analysis--;
   } else {
     for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE+i] = pcm[i];
     RNN_COPY(output, &st->pcm[0], FRAME_SIZE);
+#ifdef PLC_SKIP_UPDATES
+    {
+      run_frame_network_deferred(&st->lpcnet, st->enc.features[0]);
+    }
+#else
     lpcnet_synthesize_impl(&st->lpcnet, st->enc.features[0], output, FRAME_SIZE, FRAME_SIZE);
+#endif
     RNN_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE);
   }
   st->loss_count = 0;
@@ -235,7 +262,6 @@
     }
   }
   st->blend = 0;
-  st->fec_active = 0;
   return 0;
 }
 
@@ -243,6 +269,7 @@
 static int lpcnet_plc_conceal_causal(LPCNetPLCState *st, short *pcm) {
   int i;
   short output[FRAME_SIZE];
+  run_frame_network_flush(&st->lpcnet);
   st->enc.pcount = 0;
   /* If we concealed the previous frame, finish synthesizing the rest of the samples. */
   /* FIXME: Copy/predict features. */
@@ -253,7 +280,7 @@
     RNN_COPY(output, &st->pcm[0], update_count);
     RNN_MOVE(&st->plc_copy[1], &st->plc_copy[0], FEATURES_DELAY);
     st->plc_copy[0] = st->plc_net;
-    st->fec_active = get_fec_or_pred(st, st->features);
+    get_fec_or_pred(st, st->features);
     lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], output, update_count, update_count);
     RNN_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE);
     st->pcm_fill -= update_count;
@@ -262,10 +289,10 @@
   RNN_MOVE(&st->plc_copy[1], &st->plc_copy[0], FEATURES_DELAY);
   st->plc_copy[0] = st->plc_net;
   lpcnet_synthesize_tail_impl(&st->lpcnet, pcm, FRAME_SIZE-TRAINING_OFFSET, 0);
-  st->fec_active = get_fec_or_pred(st, st->features);
+  if (get_fec_or_pred(st, st->features)) st->loss_count = 0;
+  else st->loss_count++;
   if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9));
   else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]);
-  //if (st->loss_count > 4) st->features[NB_FEATURES-1] = MAX16(-.5, st->features[NB_FEATURES-1]-.1*(st->loss_count-4));
   lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], &pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, 0);
   {
     float x[FRAME_SIZE];
@@ -275,7 +302,6 @@
     compute_frame_features(&st->enc, x);
     process_single_frame(&st->enc, NULL);
   }
-  if (!st->fec_active) st->loss_count++;
   st->blend = 1;
   if (st->remove_dc) {
     for (i=0;i<FRAME_SIZE;i++) {
@@ -330,7 +356,7 @@
     float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};
     RNN_COPY(zeros, plc_features, 2*NB_BANDS);
     zeros[2*NB_BANDS+NB_FEATURES] = 1;
-    compute_plc_pred(&st->plc_net, st->features, zeros);
+    compute_plc_pred(st, st->features, zeros);
     copy = st->lpcnet;
     lpcnet_synthesize_impl(&st->lpcnet, st->features, &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, 0);
     /* Undo initial DC offset removal so that we can take into account the last 5ms of synthesis. */
@@ -383,7 +409,7 @@
   if (st->loss_count == 0) {
     RNN_COPY(&plc_features[2*NB_BANDS], st->enc.features[0], NB_FEATURES);
     plc_features[2*NB_BANDS+NB_FEATURES] = 1;
-    compute_plc_pred(&st->plc_net, st->features, plc_features);
+    compute_plc_pred(st, st->features, plc_features);
     lpcnet_synthesize_impl(&st->lpcnet, st->enc.features[0], &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, TRAINING_OFFSET);
     lpcnet_synthesize_tail_impl(&st->lpcnet, pcm, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET);
   }
@@ -406,10 +432,9 @@
   process_queued_update(st);
   st->enc.pcount = 0;
 
-  compute_plc_pred(&st->plc_net, st->features, zeros);
+  compute_plc_pred(st, st->features, zeros);
   if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9));
   else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]);
-  //if (st->loss_count > 4) st->features[NB_FEATURES-1] = MAX16(-.5, st->features[NB_FEATURES-1]-.1*(st->loss_count-4));
 
   if (st->loss_count == 0) {
     RNN_COPY(pcm, &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
--- a/dnn/lpcnet_private.h
+++ b/dnn/lpcnet_private.h
@@ -23,11 +23,14 @@
 #define FORBIDDEN_INTERP 7
 
 #define PLC_MAX_FEC 100
+#define MAX_FEATURE_BUFFER_SIZE 4
 
 struct LPCNetState {
     NNetState nnet;
     int last_exc;
     float last_sig[LPC_ORDER];
+    float feature_buffer[NB_FEATURES*MAX_FEATURE_BUFFER_SIZE];
+    int feature_buffer_fill;
     float last_features[NB_FEATURES];
 #if FEATURES_DELAY>0
     float old_lpc[FEATURES_DELAY][LPC_ORDER];
@@ -39,6 +42,7 @@
     float deemph_mem;
     float lpc[LPC_ORDER];
     kiss99_ctx rng;
+    LPCNetModel model;
 };
 
 struct LPCNetDecState {
@@ -76,7 +80,7 @@
   int fec_keep_pos;
   int fec_read_pos;
   int fec_fill_pos;
-  int fec_active;
+  int fec_skip;
   short pcm[PLC_BUF_SIZE+FRAME_SIZE];
   int pcm_fill;
   int skip_analysis;
@@ -94,6 +98,7 @@
   short dc_buf[TRAINING_OFFSET];
   int queued_update;
   short queued_samples[FRAME_SIZE];
+  PLCModel model;
 };
 
 extern float ceps_codebook1[];
@@ -111,7 +116,12 @@
 
 void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, const unsigned char buf[8]);
 
+void lpcnet_reset_signal(LPCNetState *lpcnet);
 void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features);
+void run_frame_network_deferred(LPCNetState *lpcnet, const float *features);
+void run_frame_network_flush(LPCNetState *lpcnet);
+
+
 void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, short *output, int N, int preload);
 void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, short *output, int N, int preload);
 void lpcnet_synthesize_blend_impl(LPCNetState *lpcnet, const short *pcm_in, short *output, int N);
--- /dev/null
+++ b/dnn/lpcnet_tables.c
@@ -1,0 +1,307 @@
+/* The contents of this file was automatically generated by dump_lpcnet_tables.c*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "kiss_fft.h"
+
+static const arch_fft_state arch_fft = {0, NULL};
+
+static const opus_int16 fft_bitrev[320] = {
+0, 64, 128, 192, 256, 16, 80, 144, 208, 272, 32, 96, 160, 224, 288,
+48, 112, 176, 240, 304, 4, 68, 132, 196, 260, 20, 84, 148, 212, 276,
+36, 100, 164, 228, 292, 52, 116, 180, 244, 308, 8, 72, 136, 200, 264,
+24, 88, 152, 216, 280, 40, 104, 168, 232, 296, 56, 120, 184, 248, 312,
+12, 76, 140, 204, 268, 28, 92, 156, 220, 284, 44, 108, 172, 236, 300,
+60, 124, 188, 252, 316, 1, 65, 129, 193, 257, 17, 81, 145, 209, 273,
+33, 97, 161, 225, 289, 49, 113, 177, 241, 305, 5, 69, 133, 197, 261,
+21, 85, 149, 213, 277, 37, 101, 165, 229, 293, 53, 117, 181, 245, 309,
+9, 73, 137, 201, 265, 25, 89, 153, 217, 281, 41, 105, 169, 233, 297,
+57, 121, 185, 249, 313, 13, 77, 141, 205, 269, 29, 93, 157, 221, 285,
+45, 109, 173, 237, 301, 61, 125, 189, 253, 317, 2, 66, 130, 194, 258,
+18, 82, 146, 210, 274, 34, 98, 162, 226, 290, 50, 114, 178, 242, 306,
+6, 70, 134, 198, 262, 22, 86, 150, 214, 278, 38, 102, 166, 230, 294,
+54, 118, 182, 246, 310, 10, 74, 138, 202, 266, 26, 90, 154, 218, 282,
+42, 106, 170, 234, 298, 58, 122, 186, 250, 314, 14, 78, 142, 206, 270,
+30, 94, 158, 222, 286, 46, 110, 174, 238, 302, 62, 126, 190, 254, 318,
+3, 67, 131, 195, 259, 19, 83, 147, 211, 275, 35, 99, 163, 227, 291,
+51, 115, 179, 243, 307, 7, 71, 135, 199, 263, 23, 87, 151, 215, 279,
+39, 103, 167, 231, 295, 55, 119, 183, 247, 311, 11, 75, 139, 203, 267,
+27, 91, 155, 219, 283, 43, 107, 171, 235, 299, 59, 123, 187, 251, 315,
+15, 79, 143, 207, 271, 31, 95, 159, 223, 287, 47, 111, 175, 239, 303,
+63, 127, 191, 255, 319, };
+
+static const kiss_twiddle_cpx fft_twiddles[320] = {
+{1.00000000f, -0.00000000f}, {0.999807239f, -0.0196336918f},
+{0.999229014f, -0.0392598175f}, {0.998265624f, -0.0588708036f},
+{0.996917307f, -0.0784590989f}, {0.995184720f, -0.0980171412f},
+{0.993068457f, -0.117537394f}, {0.990569353f, -0.137012348f},
+{0.987688363f, -0.156434461f}, {0.984426558f, -0.175796285f},
+{0.980785251f, -0.195090324f}, {0.976765871f, -0.214309156f},
+{0.972369909f, -0.233445361f}, {0.967599094f, -0.252491564f},
+{0.962455213f, -0.271440446f}, {0.956940353f, -0.290284663f},
+{0.951056540f, -0.309017003f}, {0.944806039f, -0.327630192f},
+{0.938191354f, -0.346117049f}, {0.931214929f, -0.364470512f},
+{0.923879504f, -0.382683426f}, {0.916187942f, -0.400748819f},
+{0.908143163f, -0.418659747f}, {0.899748266f, -0.436409235f},
+{0.891006529f, -0.453990489f}, {0.881921291f, -0.471396744f},
+{0.872496009f, -0.488621235f}, {0.862734377f, -0.505657375f},
+{0.852640152f, -0.522498548f}, {0.842217207f, -0.539138317f},
+{0.831469595f, -0.555570245f}, {0.820401430f, -0.571787953f},
+{0.809017003f, -0.587785244f}, {0.797320664f, -0.603555918f},
+{0.785316944f, -0.619093955f}, {0.773010433f, -0.634393275f},
+{0.760405958f, -0.649448037f}, {0.747508347f, -0.664252460f},
+{0.734322488f, -0.678800762f}, {0.720853567f, -0.693087339f},
+{0.707106769f, -0.707106769f}, {0.693087339f, -0.720853567f},
+{0.678800762f, -0.734322488f}, {0.664252460f, -0.747508347f},
+{0.649448037f, -0.760405958f}, {0.634393275f, -0.773010433f},
+{0.619093955f, -0.785316944f}, {0.603555918f, -0.797320664f},
+{0.587785244f, -0.809017003f}, {0.571787953f, -0.820401430f},
+{0.555570245f, -0.831469595f}, {0.539138317f, -0.842217207f},
+{0.522498548f, -0.852640152f}, {0.505657375f, -0.862734377f},
+{0.488621235f, -0.872496009f}, {0.471396744f, -0.881921291f},
+{0.453990489f, -0.891006529f}, {0.436409235f, -0.899748266f},
+{0.418659747f, -0.908143163f}, {0.400748819f, -0.916187942f},
+{0.382683426f, -0.923879504f}, {0.364470512f, -0.931214929f},
+{0.346117049f, -0.938191354f}, {0.327630192f, -0.944806039f},
+{0.309017003f, -0.951056540f}, {0.290284663f, -0.956940353f},
+{0.271440446f, -0.962455213f}, {0.252491564f, -0.967599094f},
+{0.233445361f, -0.972369909f}, {0.214309156f, -0.976765871f},
+{0.195090324f, -0.980785251f}, {0.175796285f, -0.984426558f},
+{0.156434461f, -0.987688363f}, {0.137012348f, -0.990569353f},
+{0.117537394f, -0.993068457f}, {0.0980171412f, -0.995184720f},
+{0.0784590989f, -0.996917307f}, {0.0588708036f, -0.998265624f},
+{0.0392598175f, -0.999229014f}, {0.0196336918f, -0.999807239f},
+{6.12323426e-17f, -1.00000000f}, {-0.0196336918f, -0.999807239f},
+{-0.0392598175f, -0.999229014f}, {-0.0588708036f, -0.998265624f},
+{-0.0784590989f, -0.996917307f}, {-0.0980171412f, -0.995184720f},
+{-0.117537394f, -0.993068457f}, {-0.137012348f, -0.990569353f},
+{-0.156434461f, -0.987688363f}, {-0.175796285f, -0.984426558f},
+{-0.195090324f, -0.980785251f}, {-0.214309156f, -0.976765871f},
+{-0.233445361f, -0.972369909f}, {-0.252491564f, -0.967599094f},
+{-0.271440446f, -0.962455213f}, {-0.290284663f, -0.956940353f},
+{-0.309017003f, -0.951056540f}, {-0.327630192f, -0.944806039f},
+{-0.346117049f, -0.938191354f}, {-0.364470512f, -0.931214929f},
+{-0.382683426f, -0.923879504f}, {-0.400748819f, -0.916187942f},
+{-0.418659747f, -0.908143163f}, {-0.436409235f, -0.899748266f},
+{-0.453990489f, -0.891006529f}, {-0.471396744f, -0.881921291f},
+{-0.488621235f, -0.872496009f}, {-0.505657375f, -0.862734377f},
+{-0.522498548f, -0.852640152f}, {-0.539138317f, -0.842217207f},
+{-0.555570245f, -0.831469595f}, {-0.571787953f, -0.820401430f},
+{-0.587785244f, -0.809017003f}, {-0.603555918f, -0.797320664f},
+{-0.619093955f, -0.785316944f}, {-0.634393275f, -0.773010433f},
+{-0.649448037f, -0.760405958f}, {-0.664252460f, -0.747508347f},
+{-0.678800762f, -0.734322488f}, {-0.693087339f, -0.720853567f},
+{-0.707106769f, -0.707106769f}, {-0.720853567f, -0.693087339f},
+{-0.734322488f, -0.678800762f}, {-0.747508347f, -0.664252460f},
+{-0.760405958f, -0.649448037f}, {-0.773010433f, -0.634393275f},
+{-0.785316944f, -0.619093955f}, {-0.797320664f, -0.603555918f},
+{-0.809017003f, -0.587785244f}, {-0.820401430f, -0.571787953f},
+{-0.831469595f, -0.555570245f}, {-0.842217207f, -0.539138317f},
+{-0.852640152f, -0.522498548f}, {-0.862734377f, -0.505657375f},
+{-0.872496009f, -0.488621235f}, {-0.881921291f, -0.471396744f},
+{-0.891006529f, -0.453990489f}, {-0.899748266f, -0.436409235f},
+{-0.908143163f, -0.418659747f}, {-0.916187942f, -0.400748819f},
+{-0.923879504f, -0.382683426f}, {-0.931214929f, -0.364470512f},
+{-0.938191354f, -0.346117049f}, {-0.944806039f, -0.327630192f},
+{-0.951056540f, -0.309017003f}, {-0.956940353f, -0.290284663f},
+{-0.962455213f, -0.271440446f}, {-0.967599094f, -0.252491564f},
+{-0.972369909f, -0.233445361f}, {-0.976765871f, -0.214309156f},
+{-0.980785251f, -0.195090324f}, {-0.984426558f, -0.175796285f},
+{-0.987688363f, -0.156434461f}, {-0.990569353f, -0.137012348f},
+{-0.993068457f, -0.117537394f}, {-0.995184720f, -0.0980171412f},
+{-0.996917307f, -0.0784590989f}, {-0.998265624f, -0.0588708036f},
+{-0.999229014f, -0.0392598175f}, {-0.999807239f, -0.0196336918f},
+{-1.00000000f, -1.22464685e-16f}, {-0.999807239f, 0.0196336918f},
+{-0.999229014f, 0.0392598175f}, {-0.998265624f, 0.0588708036f},
+{-0.996917307f, 0.0784590989f}, {-0.995184720f, 0.0980171412f},
+{-0.993068457f, 0.117537394f}, {-0.990569353f, 0.137012348f},
+{-0.987688363f, 0.156434461f}, {-0.984426558f, 0.175796285f},
+{-0.980785251f, 0.195090324f}, {-0.976765871f, 0.214309156f},
+{-0.972369909f, 0.233445361f}, {-0.967599094f, 0.252491564f},
+{-0.962455213f, 0.271440446f}, {-0.956940353f, 0.290284663f},
+{-0.951056540f, 0.309017003f}, {-0.944806039f, 0.327630192f},
+{-0.938191354f, 0.346117049f}, {-0.931214929f, 0.364470512f},
+{-0.923879504f, 0.382683426f}, {-0.916187942f, 0.400748819f},
+{-0.908143163f, 0.418659747f}, {-0.899748266f, 0.436409235f},
+{-0.891006529f, 0.453990489f}, {-0.881921291f, 0.471396744f},
+{-0.872496009f, 0.488621235f}, {-0.862734377f, 0.505657375f},
+{-0.852640152f, 0.522498548f}, {-0.842217207f, 0.539138317f},
+{-0.831469595f, 0.555570245f}, {-0.820401430f, 0.571787953f},
+{-0.809017003f, 0.587785244f}, {-0.797320664f, 0.603555918f},
+{-0.785316944f, 0.619093955f}, {-0.773010433f, 0.634393275f},
+{-0.760405958f, 0.649448037f}, {-0.747508347f, 0.664252460f},
+{-0.734322488f, 0.678800762f}, {-0.720853567f, 0.693087339f},
+{-0.707106769f, 0.707106769f}, {-0.693087339f, 0.720853567f},
+{-0.678800762f, 0.734322488f}, {-0.664252460f, 0.747508347f},
+{-0.649448037f, 0.760405958f}, {-0.634393275f, 0.773010433f},
+{-0.619093955f, 0.785316944f}, {-0.603555918f, 0.797320664f},
+{-0.587785244f, 0.809017003f}, {-0.571787953f, 0.820401430f},
+{-0.555570245f, 0.831469595f}, {-0.539138317f, 0.842217207f},
+{-0.522498548f, 0.852640152f}, {-0.505657375f, 0.862734377f},
+{-0.488621235f, 0.872496009f}, {-0.471396744f, 0.881921291f},
+{-0.453990489f, 0.891006529f}, {-0.436409235f, 0.899748266f},
+{-0.418659747f, 0.908143163f}, {-0.400748819f, 0.916187942f},
+{-0.382683426f, 0.923879504f}, {-0.364470512f, 0.931214929f},
+{-0.346117049f, 0.938191354f}, {-0.327630192f, 0.944806039f},
+{-0.309017003f, 0.951056540f}, {-0.290284663f, 0.956940353f},
+{-0.271440446f, 0.962455213f}, {-0.252491564f, 0.967599094f},
+{-0.233445361f, 0.972369909f}, {-0.214309156f, 0.976765871f},
+{-0.195090324f, 0.980785251f}, {-0.175796285f, 0.984426558f},
+{-0.156434461f, 0.987688363f}, {-0.137012348f, 0.990569353f},
+{-0.117537394f, 0.993068457f}, {-0.0980171412f, 0.995184720f},
+{-0.0784590989f, 0.996917307f}, {-0.0588708036f, 0.998265624f},
+{-0.0392598175f, 0.999229014f}, {-0.0196336918f, 0.999807239f},
+{-1.83697015e-16f, 1.00000000f}, {0.0196336918f, 0.999807239f},
+{0.0392598175f, 0.999229014f}, {0.0588708036f, 0.998265624f},
+{0.0784590989f, 0.996917307f}, {0.0980171412f, 0.995184720f},
+{0.117537394f, 0.993068457f}, {0.137012348f, 0.990569353f},
+{0.156434461f, 0.987688363f}, {0.175796285f, 0.984426558f},
+{0.195090324f, 0.980785251f}, {0.214309156f, 0.976765871f},
+{0.233445361f, 0.972369909f}, {0.252491564f, 0.967599094f},
+{0.271440446f, 0.962455213f}, {0.290284663f, 0.956940353f},
+{0.309017003f, 0.951056540f}, {0.327630192f, 0.944806039f},
+{0.346117049f, 0.938191354f}, {0.364470512f, 0.931214929f},
+{0.382683426f, 0.923879504f}, {0.400748819f, 0.916187942f},
+{0.418659747f, 0.908143163f}, {0.436409235f, 0.899748266f},
+{0.453990489f, 0.891006529f}, {0.471396744f, 0.881921291f},
+{0.488621235f, 0.872496009f}, {0.505657375f, 0.862734377f},
+{0.522498548f, 0.852640152f}, {0.539138317f, 0.842217207f},
+{0.555570245f, 0.831469595f}, {0.571787953f, 0.820401430f},
+{0.587785244f, 0.809017003f}, {0.603555918f, 0.797320664f},
+{0.619093955f, 0.785316944f}, {0.634393275f, 0.773010433f},
+{0.649448037f, 0.760405958f}, {0.664252460f, 0.747508347f},
+{0.678800762f, 0.734322488f}, {0.693087339f, 0.720853567f},
+{0.707106769f, 0.707106769f}, {0.720853567f, 0.693087339f},
+{0.734322488f, 0.678800762f}, {0.747508347f, 0.664252460f},
+{0.760405958f, 0.649448037f}, {0.773010433f, 0.634393275f},
+{0.785316944f, 0.619093955f}, {0.797320664f, 0.603555918f},
+{0.809017003f, 0.587785244f}, {0.820401430f, 0.571787953f},
+{0.831469595f, 0.555570245f}, {0.842217207f, 0.539138317f},
+{0.852640152f, 0.522498548f}, {0.862734377f, 0.505657375f},
+{0.872496009f, 0.488621235f}, {0.881921291f, 0.471396744f},
+{0.891006529f, 0.453990489f}, {0.899748266f, 0.436409235f},
+{0.908143163f, 0.418659747f}, {0.916187942f, 0.400748819f},
+{0.923879504f, 0.382683426f}, {0.931214929f, 0.364470512f},
+{0.938191354f, 0.346117049f}, {0.944806039f, 0.327630192f},
+{0.951056540f, 0.309017003f}, {0.956940353f, 0.290284663f},
+{0.962455213f, 0.271440446f}, {0.967599094f, 0.252491564f},
+{0.972369909f, 0.233445361f}, {0.976765871f, 0.214309156f},
+{0.980785251f, 0.195090324f}, {0.984426558f, 0.175796285f},
+{0.987688363f, 0.156434461f}, {0.990569353f, 0.137012348f},
+{0.993068457f, 0.117537394f}, {0.995184720f, 0.0980171412f},
+{0.996917307f, 0.0784590989f}, {0.998265624f, 0.0588708036f},
+{0.999229014f, 0.0392598175f}, {0.999807239f, 0.0196336918f},
+};
+
+const kiss_fft_state kfft = {
+320, /* nfft */
+0.0031250000f, /* scale */
+-1, /* shift */
+{5, 64, 4, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
+fft_bitrev, /* bitrev*/
+fft_twiddles, /* twiddles*/
+(arch_fft_state *)&arch_fft, /* arch_fft*/
+};
+
+const float half_window[] = {
+3.78491532e-05f, 0.000340620492f, 0.000946046319f, 0.00185389258f, 0.00306380726f,
+0.00457531959f, 0.00638783723f, 0.00850064680f, 0.0109129101f, 0.0136236614f,
+0.0166318044f, 0.0199361145f, 0.0235352255f, 0.0274276342f, 0.0316116922f,
+0.0360856056f, 0.0408474281f, 0.0458950549f, 0.0512262285f, 0.0568385124f,
+0.0627293140f, 0.0688958541f, 0.0753351897f, 0.0820441842f, 0.0890194997f,
+0.0962576419f, 0.103754878f, 0.111507311f, 0.119510807f, 0.127761051f,
+0.136253506f, 0.144983411f, 0.153945804f, 0.163135484f, 0.172547072f,
+0.182174906f, 0.192013159f, 0.202055752f, 0.212296382f, 0.222728521f,
+0.233345464f, 0.244140238f, 0.255105674f, 0.266234398f, 0.277518868f,
+0.288951218f, 0.300523549f, 0.312227666f, 0.324055225f, 0.335997701f,
+0.348046392f, 0.360192508f, 0.372427016f, 0.384740859f, 0.397124738f,
+0.409569323f, 0.422065198f, 0.434602767f, 0.447172493f, 0.459764689f,
+0.472369671f, 0.484977663f, 0.497579008f, 0.510163903f, 0.522722721f,
+0.535245717f, 0.547723293f, 0.560145974f, 0.572504222f, 0.584788740f,
+0.596990347f, 0.609099925f, 0.621108532f, 0.633007407f, 0.644788086f,
+0.656442165f, 0.667961538f, 0.679338276f, 0.690564752f, 0.701633692f,
+0.712537885f, 0.723270535f, 0.733825266f, 0.744195819f, 0.754376352f,
+0.764361382f, 0.774145722f, 0.783724606f, 0.793093503f, 0.802248418f,
+0.811185598f, 0.819901764f, 0.828393936f, 0.836659551f, 0.844696403f,
+0.852502763f, 0.860077202f, 0.867418647f, 0.874526560f, 0.881400526f,
+0.888040781f, 0.894447744f, 0.900622249f, 0.906565487f, 0.912279010f,
+0.917764664f, 0.923024654f, 0.928061485f, 0.932878017f, 0.937477291f,
+0.941862822f, 0.946038187f, 0.950007319f, 0.953774393f, 0.957343817f,
+0.960720181f, 0.963908315f, 0.966913164f, 0.969739914f, 0.972393870f,
+0.974880517f, 0.977205336f, 0.979374051f, 0.981392324f, 0.983266115f,
+0.985001266f, 0.986603677f, 0.988079309f, 0.989434063f, 0.990674019f,
+0.991804957f, 0.992832899f, 0.993763626f, 0.994602919f, 0.995356441f,
+0.996029854f, 0.996628702f, 0.997158289f, 0.997623861f, 0.998030603f,
+0.998383403f, 0.998687088f, 0.998946249f, 0.999165416f, 0.999348700f,
+0.999500215f, 0.999623775f, 0.999723017f, 0.999801278f, 0.999861658f,
+0.999907196f, 0.999940455f, 0.999963880f, 0.999979615f, 0.999989510f,
+0.999995291f, 0.999998271f, 0.999999523f, 0.999999940f, 1.00000000f,
+};
+
+const float dct_table[] = {
+0.707106769f, 0.996194720f, 0.984807730f, 0.965925813f, 0.939692616f,
+0.906307817f, 0.866025388f, 0.819152057f, 0.766044438f, 0.707106769f,
+0.642787635f, 0.573576450f, 0.500000000f, 0.422618270f, 0.342020154f,
+0.258819044f, 0.173648179f, 0.0871557444f, 0.707106769f, 0.965925813f,
+0.866025388f, 0.707106769f, 0.500000000f, 0.258819044f, 6.12323426e-17f,
+-0.258819044f, -0.500000000f, -0.707106769f, -0.866025388f, -0.965925813f,
+-1.00000000f, -0.965925813f, -0.866025388f, -0.707106769f, -0.500000000f,
+-0.258819044f, 0.707106769f, 0.906307817f, 0.642787635f, 0.258819044f,
+-0.173648179f, -0.573576450f, -0.866025388f, -0.996194720f, -0.939692616f,
+-0.707106769f, -0.342020154f, 0.0871557444f, 0.500000000f, 0.819152057f,
+0.984807730f, 0.965925813f, 0.766044438f, 0.422618270f, 0.707106769f,
+0.819152057f, 0.342020154f, -0.258819044f, -0.766044438f, -0.996194720f,
+-0.866025388f, -0.422618270f, 0.173648179f, 0.707106769f, 0.984807730f,
+0.906307817f, 0.500000000f, -0.0871557444f, -0.642787635f, -0.965925813f,
+-0.939692616f, -0.573576450f, 0.707106769f, 0.707106769f, 6.12323426e-17f,
+-0.707106769f, -1.00000000f, -0.707106769f, -1.83697015e-16f, 0.707106769f,
+1.00000000f, 0.707106769f, 3.06161700e-16f, -0.707106769f, -1.00000000f,
+-0.707106769f, -4.28626385e-16f, 0.707106769f, 1.00000000f, 0.707106769f,
+0.707106769f, 0.573576450f, -0.342020154f, -0.965925813f, -0.766044438f,
+0.0871557444f, 0.866025388f, 0.906307817f, 0.173648179f, -0.707106769f,
+-0.984807730f, -0.422618270f, 0.500000000f, 0.996194720f, 0.642787635f,
+-0.258819044f, -0.939692616f, -0.819152057f, 0.707106769f, 0.422618270f,
+-0.642787635f, -0.965925813f, -0.173648179f, 0.819152057f, 0.866025388f,
+-0.0871557444f, -0.939692616f, -0.707106769f, 0.342020154f, 0.996194720f,
+0.500000000f, -0.573576450f, -0.984807730f, -0.258819044f, 0.766044438f,
+0.906307817f, 0.707106769f, 0.258819044f, -0.866025388f, -0.707106769f,
+0.500000000f, 0.965925813f, 3.06161700e-16f, -0.965925813f, -0.500000000f,
+0.707106769f, 0.866025388f, -0.258819044f, -1.00000000f, -0.258819044f,
+0.866025388f, 0.707106769f, -0.500000000f, -0.965925813f, 0.707106769f,
+0.0871557444f, -0.984807730f, -0.258819044f, 0.939692616f, 0.422618270f,
+-0.866025388f, -0.573576450f, 0.766044438f, 0.707106769f, -0.642787635f,
+-0.819152057f, 0.500000000f, 0.906307817f, -0.342020154f, -0.965925813f,
+0.173648179f, 0.996194720f, 0.707106769f, -0.0871557444f, -0.984807730f,
+0.258819044f, 0.939692616f, -0.422618270f, -0.866025388f, 0.573576450f,
+0.766044438f, -0.707106769f, -0.642787635f, 0.819152057f, 0.500000000f,
+-0.906307817f, -0.342020154f, 0.965925813f, 0.173648179f, -0.996194720f,
+0.707106769f, -0.258819044f, -0.866025388f, 0.707106769f, 0.500000000f,
+-0.965925813f, -4.28626385e-16f, 0.965925813f, -0.500000000f, -0.707106769f,
+0.866025388f, 0.258819044f, -1.00000000f, 0.258819044f, 0.866025388f,
+-0.707106769f, -0.500000000f, 0.965925813f, 0.707106769f, -0.422618270f,
+-0.642787635f, 0.965925813f, -0.173648179f, -0.819152057f, 0.866025388f,
+0.0871557444f, -0.939692616f, 0.707106769f, 0.342020154f, -0.996194720f,
+0.500000000f, 0.573576450f, -0.984807730f, 0.258819044f, 0.766044438f,
+-0.906307817f, 0.707106769f, -0.573576450f, -0.342020154f, 0.965925813f,
+-0.766044438f, -0.0871557444f, 0.866025388f, -0.906307817f, 0.173648179f,
+0.707106769f, -0.984807730f, 0.422618270f, 0.500000000f, -0.996194720f,
+0.642787635f, 0.258819044f, -0.939692616f, 0.819152057f, 0.707106769f,
+-0.707106769f, -1.83697015e-16f, 0.707106769f, -1.00000000f, 0.707106769f,
+5.51091070e-16f, -0.707106769f, 1.00000000f, -0.707106769f, -2.69484189e-15f,
+0.707106769f, -1.00000000f, 0.707106769f, -4.90477710e-16f, -0.707106769f,
+1.00000000f, -0.707106769f, 0.707106769f, -0.819152057f, 0.342020154f,
+0.258819044f, -0.766044438f, 0.996194720f, -0.866025388f, 0.422618270f,
+0.173648179f, -0.707106769f, 0.984807730f, -0.906307817f, 0.500000000f,
+0.0871557444f, -0.642787635f, 0.965925813f, -0.939692616f, 0.573576450f,
+0.707106769f, -0.906307817f, 0.642787635f, -0.258819044f, -0.173648179f,
+0.573576450f, -0.866025388f, 0.996194720f, -0.939692616f, 0.707106769f,
+-0.342020154f, -0.0871557444f, 0.500000000f, -0.819152057f, 0.984807730f,
+-0.965925813f, 0.766044438f, -0.422618270f, 0.707106769f, -0.965925813f,
+0.866025388f, -0.707106769f, 0.500000000f, -0.258819044f, 1.10280111e-15f,
+0.258819044f, -0.500000000f, 0.707106769f, -0.866025388f, 0.965925813f,
+-1.00000000f, 0.965925813f, -0.866025388f, 0.707106769f, -0.500000000f,
+0.258819044f, 0.707106769f, -0.996194720f, 0.984807730f, -0.965925813f,
+0.939692616f, -0.906307817f, 0.866025388f, -0.819152057f, 0.766044438f,
+-0.707106769f, 0.642787635f, -0.573576450f, 0.500000000f, -0.422618270f,
+0.342020154f, -0.258819044f, 0.173648179f, -0.0871557444f, };
--- a/dnn/nnet.c
+++ b/dnn/nnet.c
@@ -38,11 +38,16 @@
 #include "tansig_table.h"
 #include "nnet.h"
 #include "nnet_data.h"
+#include "dred_rdovae_constants.h"
 #include "plc_data.h"
 
 #ifdef NO_OPTIMIZATIONS
+#if defined(_MSC_VER)
+#pragma message ("Compiling without any vectorization. This code will be very slow")
+#else
 #warning Compiling without any vectorization. This code will be very slow
 #endif
+#endif
 
 
 #define SOFTMAX_HACK
@@ -316,7 +321,7 @@
       state[i] = h[i];
 }
 
-#define MAX_RNN_NEURONS_ALL IMAX(MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS)
+#define MAX_RNN_NEURONS_ALL IMAX(IMAX(MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), DRED_MAX_RNN_NEURONS)
 
 void compute_gruB(const GRULayer *gru, const float* gru_b_condition, float *state, const float *input)
 {
@@ -372,8 +377,8 @@
    int i;
    int N;
    int stride;
-   float zrh[3*MAX_RNN_NEURONS];
-   float recur[3*MAX_RNN_NEURONS];
+   float zrh[3*MAX_RNN_NEURONS_ALL];
+   float recur[3*MAX_RNN_NEURONS_ALL];
    float *z;
    float *r;
    float *h;
@@ -381,7 +386,7 @@
    z = zrh;
    r = &zrh[N];
    h = &zrh[2*N];
-   celt_assert(gru->nb_neurons <= MAX_RNN_NEURONS);
+   celt_assert(gru->nb_neurons <= MAX_RNN_NEURONS_ALL);
    celt_assert(input != state);
    celt_assert(gru->reset_after);
    stride = 3*N;
@@ -406,7 +411,7 @@
 {
    int i, k;
    int N;
-   float recur[3*MAX_RNN_NEURONS];
+   float recur[3*MAX_RNN_NEURONS_ALL];
    float *z;
    float *r;
    float *h;
@@ -415,7 +420,7 @@
    z = recur;
    r = &recur[N];
    h = &recur[2*N];
-   celt_assert(gru->nb_neurons <= MAX_RNN_NEURONS);
+   celt_assert(gru->nb_neurons <= MAX_RNN_NEURONS_ALL);
    celt_assert(input != state);
    celt_assert(gru->reset_after);
 #ifdef USE_SU_BIAS
@@ -442,14 +447,16 @@
       state[i] = z[i]*state[i] + (1-z[i])*h[i];
 }
 
+#define MAX_CONV_INPUTS_ALL IMAX(MAX_CONV_INPUTS, DRED_MAX_CONV_INPUTS)
+
 void compute_conv1d(const Conv1DLayer *layer, float *output, float *mem, const float *input)
 {
    int i;
    int N, M;
    int stride;
-   float tmp[MAX_CONV_INPUTS];
+   float tmp[MAX_CONV_INPUTS_ALL];
    celt_assert(input != output);
-   celt_assert(layer->nb_inputs*layer->kernel_size <= MAX_CONV_INPUTS);
+   celt_assert(layer->nb_inputs*layer->kernel_size <= MAX_CONV_INPUTS_ALL);
    RNN_COPY(tmp, mem, layer->nb_inputs*(layer->kernel_size-1));
    RNN_COPY(&tmp[layer->nb_inputs*(layer->kernel_size-1)], input, layer->nb_inputs);
    M = layer->nb_inputs*layer->kernel_size;
--- a/dnn/nnet.h
+++ b/dnn/nnet.h
@@ -38,7 +38,30 @@
 #define ACTIVATION_SOFTMAX 4
 #define ACTIVATION_SWISH   5
 
+#define WEIGHT_BLOB_VERSION 0
+#define WEIGHT_BLOCK_SIZE 64
 typedef struct {
+  const char *name;
+  int type;
+  int size;
+  const void *data;
+} WeightArray;
+
+#define WEIGHT_TYPE_float 0
+#define WEIGHT_TYPE_int 1
+#define WEIGHT_TYPE_qweight 2
+
+typedef struct {
+  char head[4];
+  int version;
+  int type;
+  int size;
+  int block_size;
+  char name[44];
+} WeightHead;
+
+
+typedef struct {
   const float *bias;
   const float *input_weights;
   int nb_inputs;
@@ -121,5 +144,60 @@
 void compute_gru_a_input(float *output, const float *input, int N, const EmbeddingLayer *layer1, int val1, const EmbeddingLayer *layer2, int val2, const EmbeddingLayer *layer3, int val3);
 
 int sample_from_pdf(const float *pdf, int N, float exp_boost, float pdf_floor);
+
+
+extern const WeightArray lpcnet_arrays[];
+extern const WeightArray lpcnet_plc_arrays[];
+
+int mdense_init(MDenseLayer *layer, const WeightArray *arrays,
+  const char *bias,
+  const char *input_weights,
+  const char *factor,
+  int nb_inputs,
+  int nb_neurons,
+  int nb_channels,
+  int activation);
+
+int dense_init(DenseLayer *layer, const WeightArray *arrays,
+  const char *bias,
+  const char *input_weights,
+  int nb_inputs,
+  int nb_neurons,
+  int activation);
+
+int gru_init(GRULayer *layer, const WeightArray *arrays,
+  const char *bias,
+  const char *subias,
+  const char *input_weights,
+  const char *input_weights_idx,
+  const char *recurrent_weights,
+  int nb_inputs,
+  int nb_neurons,
+  int activation,
+  int reset_after);
+
+int sparse_gru_init(SparseGRULayer *layer, const WeightArray *arrays,
+  const char *bias,
+  const char *subias,
+  const char *diag_weights,
+  const char *recurrent_weights,
+  const char *idx,
+  int nb_neurons,
+  int activation,
+  int reset_after);
+
+int conv1d_init(Conv1DLayer *layer, const WeightArray *arrays,
+  const char *bias,
+  const char *input_weights,
+  int nb_inputs,
+  int kernel_size,
+  int nb_neurons,
+  int activation);
+
+int embedding_init(EmbeddingLayer *layer, const WeightArray *arrays,
+  const char *embedding_weights,
+  int nb_inputs,
+  int dim);
+
 
 #endif /* _MLP_H_ */
--- /dev/null
+++ b/dnn/parse_lpcnet_weights.c
@@ -1,0 +1,254 @@
+/* Copyright (c) 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "nnet.h"
+
+#define SPARSE_BLOCK_SIZE 32
+
+extern const WeightArray lpcnet_arrays[];
+
+int parse_record(const unsigned char **data, int *len, WeightArray *array) {
+  WeightHead *h = (WeightHead *)*data;
+  if (*len < WEIGHT_BLOCK_SIZE) return -1;
+  if (h->block_size < h->size) return -1;
+  if (h->block_size > *len-WEIGHT_BLOCK_SIZE) return -1;
+  if (h->name[sizeof(h->name)-1] != 0) return -1;
+  if (h->size < 0) return -1;
+  array->name = h->name;
+  array->type = h->type;
+  array->size = h->size;
+  array->data = (*data)+WEIGHT_BLOCK_SIZE;
+  
+  *data += h->block_size+WEIGHT_BLOCK_SIZE;
+  *len -= h->block_size+WEIGHT_BLOCK_SIZE;
+  return array->size;
+}
+
+int parse_weights(WeightArray **list, const unsigned char *data, int len)
+{
+  int nb_arrays=0;
+  int capacity=20;
+  *list = malloc(capacity*sizeof(WeightArray));
+  while (len > 0) {
+    int ret;
+    WeightArray array = {NULL, 0, 0, 0};
+    ret = parse_record(&data, &len, &array);
+    if (ret > 0) {
+      if (nb_arrays+1 >= capacity) {
+        /* Make sure there's room for the ending NULL element too. */
+        capacity = capacity*3/2;
+        *list = realloc(*list, capacity*sizeof(WeightArray));
+      }
+      (*list)[nb_arrays++] = array;
+    }
+  }
+  (*list)[nb_arrays].name=NULL;
+  return nb_arrays;
+}
+
+static const void *find_array_entry(const WeightArray *arrays, const char *name) {
+  while (arrays->name && strcmp(arrays->name, name) != 0) arrays++;
+  return arrays;
+}
+
+static const void *find_array_check(const WeightArray *arrays, const char *name, int size) {
+  const WeightArray *a = find_array_entry(arrays, name);
+  if (a && a->size == size) return a->data;
+  else return NULL;
+}
+
+static const void *find_idx_check(const WeightArray *arrays, const char *name, int nb_in, int nb_out, int *total_blocks) {
+  int remain;
+  const int *idx;
+  const WeightArray *a = find_array_entry(arrays, name);
+  *total_blocks = 0;
+  if (a == NULL) return NULL;
+  idx = a->data;
+  remain = a->size/sizeof(int);
+  while (remain > 0) {
+    int nb_blocks;
+    int i;
+    nb_blocks = *idx++;
+    if (remain < nb_blocks+1) return NULL;
+    for (i=0;i<nb_blocks;i++) {
+      int pos = *idx++;
+      if (pos+3 >= nb_in || (pos&0x3)) return NULL; 
+    }
+    nb_out -= 8;
+    remain -= nb_blocks+1;
+    *total_blocks += nb_blocks;
+  }
+  if (nb_out != 0) return NULL;
+  return a->data;
+}
+
+int mdense_init(MDenseLayer *layer, const WeightArray *arrays,
+  const char *bias,
+  const char *input_weights,
+  const char *factor,
+  int nb_inputs,
+  int nb_neurons,
+  int nb_channels,
+  int activation)
+{
+  if ((layer->bias = find_array_check(arrays, bias, nb_neurons*nb_channels*sizeof(layer->bias[0]))) == NULL) return 1;
+  if ((layer->input_weights = find_array_check(arrays, input_weights, nb_inputs*nb_channels*nb_neurons*sizeof(layer->input_weights[0]))) == NULL) return 1;
+  if ((layer->factor = find_array_check(arrays, factor, nb_channels*nb_neurons*sizeof(layer->factor[0]))) == NULL) return 1;
+  layer->nb_inputs = nb_inputs;
+  layer->nb_neurons = nb_neurons;
+  layer->nb_channels = nb_channels;
+  layer->activation = activation;
+  return 0;
+}
+
+int dense_init(DenseLayer *layer, const WeightArray *arrays,
+  const char *bias,
+  const char *input_weights,
+  int nb_inputs,
+  int nb_neurons,
+  int activation)
+{
+  if ((layer->bias = find_array_check(arrays, bias, nb_neurons*sizeof(layer->bias[0]))) == NULL) return 1;
+  if ((layer->input_weights = find_array_check(arrays, input_weights, nb_inputs*nb_neurons*sizeof(layer->input_weights[0]))) == NULL) return 1;
+  layer->nb_inputs = nb_inputs;
+  layer->nb_neurons = nb_neurons;
+  layer->activation = activation;
+  return 0;
+}
+
+int gru_init(GRULayer *layer, const WeightArray *arrays,
+  const char *bias,
+  const char *subias,
+  const char *input_weights,
+  const char *input_weights_idx,
+  const char *recurrent_weights,
+  int nb_inputs,
+  int nb_neurons,
+  int activation,
+  int reset_after)
+{
+  int total_blocks;
+  if ((layer->bias = find_array_check(arrays, bias, 6*nb_neurons*sizeof(layer->bias[0]))) == NULL) return 1;
+  if ((layer->subias = find_array_check(arrays, subias, 6*nb_neurons*sizeof(layer->subias[0]))) == NULL) return 1;
+  if ((layer->input_weights_idx = find_idx_check(arrays, input_weights_idx, nb_inputs, 3*nb_neurons, &total_blocks)) == NULL) return 1;
+  if ((layer->input_weights = find_array_check(arrays, input_weights, SPARSE_BLOCK_SIZE*total_blocks*sizeof(layer->input_weights[0]))) == NULL) return 1;
+  if ((layer->recurrent_weights = find_array_check(arrays, recurrent_weights, 3*nb_neurons*nb_neurons*sizeof(layer->recurrent_weights[0]))) == NULL) return 1;
+  layer->nb_inputs = nb_inputs;
+  layer->nb_neurons = nb_neurons;
+  layer->activation = activation;
+  layer->reset_after = reset_after;
+  return 0;
+}
+
+int sparse_gru_init(SparseGRULayer *layer, const WeightArray *arrays,
+  const char *bias,
+  const char *subias,
+  const char *diag_weights,
+  const char *recurrent_weights,
+  const char *idx,
+  int nb_neurons,
+  int activation,
+  int reset_after)
+{
+  int total_blocks;
+  if ((layer->bias = find_array_check(arrays, bias, 6*nb_neurons*sizeof(layer->bias[0]))) == NULL) return 1;
+  if ((layer->subias = find_array_check(arrays, subias, 6*nb_neurons*sizeof(layer->subias[0]))) == NULL) return 1;
+  if ((layer->diag_weights = find_array_check(arrays, diag_weights, 3*nb_neurons*sizeof(layer->diag_weights[0]))) == NULL) return 1;
+  if ((layer->idx = find_idx_check(arrays, idx, nb_neurons, 3*nb_neurons, &total_blocks)) == NULL) return 1;
+  if ((layer->recurrent_weights = find_array_check(arrays, recurrent_weights, SPARSE_BLOCK_SIZE*total_blocks*sizeof(layer->recurrent_weights[0]))) == NULL) return 1;
+  layer->nb_neurons = nb_neurons;
+  layer->activation = activation;
+  layer->reset_after = reset_after;
+  return 0;
+}
+
+int conv1d_init(Conv1DLayer *layer, const WeightArray *arrays,
+  const char *bias,
+  const char *input_weights,
+  int nb_inputs,
+  int kernel_size,
+  int nb_neurons,
+  int activation)
+{
+  if ((layer->bias = find_array_check(arrays, bias, nb_neurons*sizeof(layer->bias[0]))) == NULL) return 1;
+  if ((layer->input_weights = find_array_check(arrays, input_weights, kernel_size*nb_inputs*nb_neurons*sizeof(layer->input_weights[0]))) == NULL) return 1;
+  layer->nb_inputs = nb_inputs;
+  layer->kernel_size = kernel_size;
+  layer->nb_neurons = nb_neurons;
+  layer->activation = activation;
+  return 0;
+}
+
+int embedding_init(EmbeddingLayer *layer, const WeightArray *arrays,
+  const char *embedding_weights,
+  int nb_inputs,
+  int dim)
+{
+  if ((layer->embedding_weights = find_array_check(arrays, embedding_weights, nb_inputs*dim*sizeof(layer->embedding_weights[0]))) == NULL) return 1;
+  layer->nb_inputs = nb_inputs;
+  layer->dim = dim;
+  return 0;
+}
+
+
+
+#if 0
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <stdio.h>
+
+int main()
+{
+  int fd;
+  unsigned char *data;
+  int len;
+  int nb_arrays;
+  int i;
+  WeightArray *list;
+  struct stat st;
+  const char *filename = "weights_blob.bin";
+  stat(filename, &st);
+  len = st.st_size;
+  fd = open(filename, O_RDONLY);
+  data = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
+  printf("size is %d\n", len);
+  nb_arrays = parse_weights(&list, data, len);
+  for (i=0;i<nb_arrays;i++) {
+    printf("found %s: size %d\n", list[i].name, list[i].size);
+  }
+  printf("%p\n", list[i].name);
+  free(list);
+  munmap(data, len);
+  close(fd);
+  return 0;
+}
+#endif
--- a/dnn/test_vec.c
+++ b/dnn/test_vec.c
@@ -10,7 +10,7 @@
 // we need to call two versions of each functions that have the same
 // name, so use #defines to temp rename them
 
-#define celt_exp2 celt_exp2_fast
+#define lpcnet_exp2 lpcnet_exp2_fast
 #define tansig_approx tansig_approx_fast
 #define sigmoid_approx sigmoid_approx_fast
 #define softmax softmax_fast
@@ -34,7 +34,7 @@
 
 #endif
 
-#undef celt_exp2
+#undef lpcnet_exp2
 #undef tansig_approx
 #undef sigmoid_approx
 #undef softmax
--- /dev/null
+++ b/dnn/torch/rdovae/README.md
@@ -1,0 +1,24 @@
+# Rate-Distortion-Optimized Variational Auto-Encoder
+
+## Setup
+The python code requires python >= 3.6 and has been tested with python 3.6 and python 3.10. To install requirements run
+```
+python -m pip install -r requirements.txt
+```
+
+## Training
+To generate training data use dump date from the main LPCNet repo
+```
+./dump_data -train 16khz_speech_input.s16 features.f32 data.s16
+```
+
+To train the model, simply run
+```
+python train_rdovae.py features.f32 output_folder
+```
+
+To train on CUDA device add `--cuda-visible-devices idx`.
+
+
+## ToDo
+- Upload checkpoints and add URLs
--- /dev/null
+++ b/dnn/torch/rdovae/export_rdovae_weights.py
@@ -1,0 +1,258 @@
+"""
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('checkpoint', type=str, help='rdovae model checkpoint')
+parser.add_argument('output_dir', type=str, help='output folder')
+parser.add_argument('--format', choices=['C', 'numpy'], help='output format, default: C', default='C')
+
+args = parser.parse_args()
+
+import torch
+import numpy as np
+
+from rdovae import RDOVAE
+from wexchange.torch import dump_torch_weights
+from wexchange.c_export import CWriter, print_vector
+
+
+def dump_statistical_model(writer, qembedding):
+    w = qembedding.weight.detach()
+    levels, dim = w.shape
+    N = dim // 6
+
+    print("printing statistical model")
+    quant_scales    = torch.nn.functional.softplus(w[:, : N]).numpy()
+    dead_zone       = 0.05 * torch.nn.functional.softplus(w[:, N : 2 * N]).numpy()
+    r               = torch.sigmoid(w[:, 5 * N : 6 * N]).numpy()
+    p0              = torch.sigmoid(w[:, 4 * N : 5 * N]).numpy()
+    p0              = 1 - r ** (0.5 + 0.5 * p0)
+
+    quant_scales_q8 = np.round(quant_scales * 2**8).astype(np.uint16)
+    dead_zone_q10   = np.round(dead_zone * 2**10).astype(np.uint16)
+    r_q15           = np.round(r * 2**15).astype(np.uint16)
+    p0_q15          = np.round(p0 * 2**15).astype(np.uint16)
+
+    print_vector(writer.source, quant_scales_q8, 'dred_quant_scales_q8', dtype='opus_uint16', static=False)
+    print_vector(writer.source, dead_zone_q10, 'dred_dead_zone_q10', dtype='opus_uint16', static=False)
+    print_vector(writer.source, r_q15, 'dred_r_q15', dtype='opus_uint16', static=False)
+    print_vector(writer.source, p0_q15, 'dred_p0_q15', dtype='opus_uint16', static=False)
+
+    writer.header.write(
+f"""
+extern const opus_uint16 dred_quant_scales_q8[{levels * N}];
+extern const opus_uint16 dred_dead_zone_q10[{levels * N}];
+extern const opus_uint16 dred_r_q15[{levels * N}];
+extern const opus_uint16 dred_p0_q15[{levels * N}];
+
+"""
+    )
+
+
+def c_export(args, model):
+    
+    message = f"Auto generated from checkpoint {os.path.basename(args.checkpoint)}"
+    
+    enc_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_enc_data"), message=message)
+    dec_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_dec_data"), message=message)
+    stats_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_stats_data"), message=message)
+    constants_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_constants"), message=message, header_only=True)
+    
+    # some custom includes
+    for writer in [enc_writer, dec_writer, stats_writer]:
+        writer.header.write(
+f"""
+#include "opus_types.h"
+
+#include "dred_rdovae_constants.h"
+
+#include "nnet.h"
+"""
+        )
+        
+    # encoder
+    encoder_dense_layers = [
+        ('core_encoder.module.dense_1'       , 'enc_dense1',   'TANH'), 
+        ('core_encoder.module.dense_2'       , 'enc_dense3',   'TANH'),
+        ('core_encoder.module.dense_3'       , 'enc_dense5',   'TANH'),
+        ('core_encoder.module.dense_4'       , 'enc_dense7',   'TANH'),
+        ('core_encoder.module.dense_5'       , 'enc_dense8',   'TANH'),
+        ('core_encoder.module.state_dense_1' , 'gdense1'    ,   'TANH'),
+        ('core_encoder.module.state_dense_2' , 'gdense2'    ,   'TANH')
+    ]
+    
+    for name, export_name, activation in encoder_dense_layers:
+        layer = model.get_submodule(name)
+        dump_torch_weights(enc_writer, layer, name=export_name, activation=activation, verbose=True)
+  
+  
+    encoder_gru_layers = [    
+        ('core_encoder.module.gru_1'         , 'enc_dense2',   'TANH'),
+        ('core_encoder.module.gru_2'         , 'enc_dense4',   'TANH'),
+        ('core_encoder.module.gru_3'         , 'enc_dense6',   'TANH')
+    ]
+ 
+    enc_max_rnn_units = max([dump_torch_weights(enc_writer, model.get_submodule(name), export_name, activation, verbose=True, input_sparse=True, dotp=True)
+                             for name, export_name, activation in encoder_gru_layers])
+ 
+    
+    encoder_conv_layers = [   
+        ('core_encoder.module.conv1'         , 'bits_dense' ,   'LINEAR') 
+    ]
+    
+    enc_max_conv_inputs = max([dump_torch_weights(enc_writer, model.get_submodule(name), export_name, activation, verbose=True) for name, export_name, activation in encoder_conv_layers])    
+
+    
+    del enc_writer
+    
+    # decoder
+    decoder_dense_layers = [
+        ('core_decoder.module.gru_1_init'    , 'state1',        'TANH'),
+        ('core_decoder.module.gru_2_init'    , 'state2',        'TANH'),
+        ('core_decoder.module.gru_3_init'    , 'state3',        'TANH'),
+        ('core_decoder.module.dense_1'       , 'dec_dense1',    'TANH'),
+        ('core_decoder.module.dense_2'       , 'dec_dense3',    'TANH'),
+        ('core_decoder.module.dense_3'       , 'dec_dense5',    'TANH'),
+        ('core_decoder.module.dense_4'       , 'dec_dense7',    'TANH'),
+        ('core_decoder.module.dense_5'       , 'dec_dense8',    'TANH'),
+        ('core_decoder.module.output'        , 'dec_final',     'LINEAR')
+    ]
+
+    for name, export_name, activation in decoder_dense_layers:
+        layer = model.get_submodule(name)
+        dump_torch_weights(dec_writer, layer, name=export_name, activation=activation, verbose=True)
+        
+
+    decoder_gru_layers = [
+        ('core_decoder.module.gru_1'         , 'dec_dense2',    'TANH'),
+        ('core_decoder.module.gru_2'         , 'dec_dense4',    'TANH'),
+        ('core_decoder.module.gru_3'         , 'dec_dense6',    'TANH')
+    ]
+    
+    dec_max_rnn_units = max([dump_torch_weights(dec_writer, model.get_submodule(name), export_name, activation, verbose=True, input_sparse=True, dotp=True)
+                             for name, export_name, activation in decoder_gru_layers])
+        
+    del dec_writer
+    
+    # statistical model
+    qembedding = model.statistical_model.quant_embedding
+    dump_statistical_model(stats_writer, qembedding)
+    
+    del stats_writer
+    
+    # constants
+    constants_writer.header.write(
+f"""
+#define DRED_NUM_FEATURES {model.feature_dim}
+
+#define DRED_LATENT_DIM {model.latent_dim}
+
+#define DRED_STATE_DIME {model.state_dim}
+
+#define DRED_NUM_QUANTIZATION_LEVELS {model.quant_levels}
+
+#define DRED_MAX_RNN_NEURONS {max(enc_max_rnn_units, dec_max_rnn_units)}
+
+#define DRED_MAX_CONV_INPUTS {enc_max_conv_inputs}
+
+#define DRED_ENC_MAX_RNN_NEURONS {enc_max_conv_inputs}
+
+#define DRED_ENC_MAX_CONV_INPUTS {enc_max_conv_inputs}
+
+#define DRED_DEC_MAX_RNN_NEURONS {dec_max_rnn_units}
+
+"""
+    )
+    
+    del constants_writer
+
+
+def numpy_export(args, model):
+    
+    exchange_name_to_name = {
+        'encoder_stack_layer1_dense'    : 'core_encoder.module.dense_1',
+        'encoder_stack_layer3_dense'    : 'core_encoder.module.dense_2',
+        'encoder_stack_layer5_dense'    : 'core_encoder.module.dense_3',
+        'encoder_stack_layer7_dense'    : 'core_encoder.module.dense_4',
+        'encoder_stack_layer8_dense'    : 'core_encoder.module.dense_5',
+        'encoder_state_layer1_dense'    : 'core_encoder.module.state_dense_1',
+        'encoder_state_layer2_dense'    : 'core_encoder.module.state_dense_2',
+        'encoder_stack_layer2_gru'      : 'core_encoder.module.gru_1',
+        'encoder_stack_layer4_gru'      : 'core_encoder.module.gru_2',
+        'encoder_stack_layer6_gru'      : 'core_encoder.module.gru_3',
+        'encoder_stack_layer9_conv'     : 'core_encoder.module.conv1',
+        'statistical_model_embedding'   : 'statistical_model.quant_embedding',
+        'decoder_state1_dense'          : 'core_decoder.module.gru_1_init',
+        'decoder_state2_dense'          : 'core_decoder.module.gru_2_init',
+        'decoder_state3_dense'          : 'core_decoder.module.gru_3_init',
+        'decoder_stack_layer1_dense'    : 'core_decoder.module.dense_1',
+        'decoder_stack_layer3_dense'    : 'core_decoder.module.dense_2',
+        'decoder_stack_layer5_dense'    : 'core_decoder.module.dense_3',
+        'decoder_stack_layer7_dense'    : 'core_decoder.module.dense_4',
+        'decoder_stack_layer8_dense'    : 'core_decoder.module.dense_5',
+        'decoder_stack_layer9_dense'    : 'core_decoder.module.output',
+        'decoder_stack_layer2_gru'      : 'core_decoder.module.gru_1',
+        'decoder_stack_layer4_gru'      : 'core_decoder.module.gru_2',
+        'decoder_stack_layer6_gru'      : 'core_decoder.module.gru_3'
+    }
+    
+    name_to_exchange_name = {value : key for key, value in exchange_name_to_name.items()}
+    
+    for name, exchange_name in name_to_exchange_name.items():
+        print(f"printing layer {name}...")
+        dump_torch_weights(os.path.join(args.output_dir, exchange_name), model.get_submodule(name))
+
+
+if __name__ == "__main__":
+    
+    
+    os.makedirs(args.output_dir, exist_ok=True)
+    
+    
+    # load model from checkpoint
+    checkpoint = torch.load(args.checkpoint, map_location='cpu')
+    model = RDOVAE(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+    missing_keys, unmatched_keys = model.load_state_dict(checkpoint['state_dict'], strict=False)
+
+    if len(missing_keys) > 0:
+        raise ValueError(f"error: missing keys in state dict")
+
+    if len(unmatched_keys) > 0:
+        print(f"warning: the following keys were unmatched {unmatched_keys}")
+    
+    if args.format == 'C':
+        c_export(args, model)
+    elif args.format == 'numpy':
+        numpy_export(args, model)
+    else:
+        raise ValueError(f'error: unknown export format {args.format}')
\ No newline at end of file
--- /dev/null
+++ b/dnn/torch/rdovae/fec_encoder.py
@@ -1,0 +1,213 @@
+"""
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe and Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import subprocess
+import argparse
+
+os.environ['CUDA_VISIBLE_DEVICES'] = ""
+
+parser = argparse.ArgumentParser(description='Encode redundancy for Opus neural FEC. Designed for use with voip application and 20ms frames')
+
+parser.add_argument('input', metavar='<input signal>', help='audio input (.wav or .raw or .pcm as int16)')
+parser.add_argument('checkpoint', metavar='<weights>', help='model checkpoint')
+parser.add_argument('q0', metavar='<quant level 0>', type=int, help='quantization level for most recent frame')
+parser.add_argument('q1', metavar='<quant level 1>', type=int, help='quantization level for oldest frame')
+parser.add_argument('output', type=str, help='output file (will be extended with .fec)')
+
+parser.add_argument('--dump-data', type=str, default='./dump_data', help='path to dump data executable (default ./dump_data)')
+parser.add_argument('--num-redundancy-frames', default=52, type=int, help='number of redundancy frames per packet (default 52)')
+parser.add_argument('--extra-delay', default=0, type=int, help="last features in packet are calculated with the decoder aligned samples, use this option to add extra delay (in samples at 16kHz)")
+parser.add_argument('--lossfile', type=str, help='file containing loss trace (0 for frame received, 1 for lost)')
+parser.add_argument('--debug-output', action='store_true', help='if set, differently assembled features are written to disk')
+
+args = parser.parse_args()
+
+import numpy as np
+from scipy.io import wavfile
+import torch
+
+from rdovae import RDOVAE
+from packets import write_fec_packets
+
+torch.set_num_threads(4)
+
+checkpoint = torch.load(args.checkpoint, map_location="cpu")
+model = RDOVAE(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+model.load_state_dict(checkpoint['state_dict'], strict=False)
+model.to("cpu")
+
+lpc_order = 16
+
+## prepare input signal
+# SILK frame size is 20ms and LPCNet subframes are 10ms
+subframe_size = 160
+frame_size = 2 * subframe_size
+
+# 91 samples delay to align with SILK decoded frames
+silk_delay = 91
+
+# prepend zeros to have enough history to produce the first package
+zero_history = (args.num_redundancy_frames - 1) * frame_size
+
+# dump data has a (feature) delay of 10ms
+dump_data_delay = 160
+
+total_delay = silk_delay + zero_history + args.extra_delay - dump_data_delay
+
+# load signal
+if args.input.endswith('.raw') or args.input.endswith('.pcm'):
+    signal = np.fromfile(args.input, dtype='int16')
+    
+elif args.input.endswith('.wav'):
+    fs, signal = wavfile.read(args.input)
+else:
+    raise ValueError(f'unknown input signal format: {args.input}')
+
+# fill up last frame with zeros
+padded_signal_length = len(signal) + total_delay
+tail = padded_signal_length % frame_size
+right_padding = (frame_size - tail) % frame_size
+    
+signal = np.concatenate((np.zeros(total_delay, dtype=np.int16), signal, np.zeros(right_padding, dtype=np.int16)))
+
+padded_signal_file  = os.path.splitext(args.input)[0] + '_padded.raw'
+signal.tofile(padded_signal_file)
+
+# write signal and call dump_data to create features
+
+feature_file = os.path.splitext(args.input)[0] + '_features.f32'
+command = f"{args.dump_data} -test {padded_signal_file} {feature_file}"
+r = subprocess.run(command, shell=True)
+if r.returncode != 0:
+    raise RuntimeError(f"command '{command}' failed with exit code {r.returncode}")
+
+# load features
+nb_features = model.feature_dim + lpc_order
+nb_used_features = model.feature_dim
+
+# load features
+features = np.fromfile(feature_file, dtype='float32')
+num_subframes = len(features) // nb_features
+num_subframes = 2 * (num_subframes // 2)
+num_frames = num_subframes // 2
+
+features = np.reshape(features, (1, -1, nb_features))
+features = features[:, :, :nb_used_features]
+features = features[:, :num_subframes, :]
+
+# quant_ids in reverse decoding order
+quant_ids = torch.round((args.q1 + (args.q0 - args.q1) * torch.arange(args.num_redundancy_frames // 2) / (args.num_redundancy_frames // 2 - 1))).long()
+
+print(f"using quantization levels {quant_ids}...")
+
+# convert input to torch tensors
+features = torch.from_numpy(features)
+
+
+# run encoder
+print("running fec encoder...")
+with torch.no_grad():
+
+    # encoding
+    z, states, state_size = model.encode(features)
+
+
+    # decoder on packet chunks
+    input_length = args.num_redundancy_frames // 2
+    offset = args.num_redundancy_frames - 1
+
+    packets = []
+    packet_sizes = []
+
+    for i in range(offset, num_frames):
+        print(f"processing frame {i - offset}...")
+        # quantize / unquantize latent vectors
+        zi = torch.clone(z[:, i - 2 * input_length + 2: i + 1 : 2, :])
+        zi, rates = model.quantize(zi, quant_ids)
+        zi = model.unquantize(zi, quant_ids)
+        
+        features = model.decode(zi, states[:, i : i + 1, :])
+        packets.append(features.squeeze(0).numpy())
+        packet_size = 8 * int((torch.sum(rates) + 7 + state_size) / 8)
+        packet_sizes.append(packet_size)
+
+
+# write packets
+packet_file = args.output + '.fec' if not args.output.endswith('.fec') else args.output
+write_fec_packets(packet_file, packets, packet_sizes)
+
+
+print(f"average redundancy rate: {int(round(sum(packet_sizes) / len(packet_sizes) * 50 / 1000))} kbps")
+
+# assemble features according to loss file
+if args.lossfile != None:
+    num_packets = len(packets)
+    loss = np.loadtxt(args.lossfile, dtype='int16')
+    fec_out = np.zeros((num_packets * 2, packets[0].shape[-1]), dtype='float32')
+    foffset = -2
+    ptr = 0
+    count = 2
+    for i in range(num_packets):
+        if (loss[i] == 0) or (i == num_packets - 1):
+            
+            fec_out[ptr:ptr+count,:] = packets[i][foffset:, :]
+
+            ptr    += count
+            foffset = -2
+            count   = 2
+        else:
+            count   += 2
+            foffset -= 2
+
+    fec_out_full = np.zeros((fec_out.shape[0], 36), dtype=np.float32)
+    fec_out_full[:, : fec_out.shape[-1]] = fec_out
+
+    fec_out_full.tofile(packet_file[:-4] + f'_fec.f32')
+    
+    
+if args.debug_output:
+    import itertools
+
+    batches = [4]
+    offsets = [0, 2 * args.num_redundancy_frames - 4]
+        
+    # sanity checks
+    # 1. concatenate features at offset 0
+    for batch, offset in itertools.product(batches, offsets):
+
+        stop = packets[0].shape[1] - offset
+        test_features = np.concatenate([packet[stop - batch: stop, :] for packet in packets[::batch//2]], axis=0)
+
+        test_features_full = np.zeros((test_features.shape[0], nb_features), dtype=np.float32)
+        test_features_full[:, :nb_used_features] = test_features[:, :]
+
+        print(f"writing debug output {packet_file[:-4] + f'_torch_batch{batch}_offset{offset}.f32'}")
+        test_features_full.tofile(packet_file[:-4] + f'_torch_batch{batch}_offset{offset}.f32')
+
--- /dev/null
+++ b/dnn/torch/rdovae/import_rdovae_weights.py
@@ -1,0 +1,143 @@
+"""
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = ""
+
+import argparse
+
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('exchange_folder', type=str, help='exchange folder path')
+parser.add_argument('output', type=str, help='path to output model checkpoint')
+
+model_group = parser.add_argument_group(title="model parameters")
+model_group.add_argument('--num-features', type=int, help="number of features, default: 20", default=20)
+model_group.add_argument('--latent-dim', type=int, help="number of symbols produces by encoder, default: 80", default=80)
+model_group.add_argument('--cond-size', type=int, help="first conditioning size, default: 256", default=256)
+model_group.add_argument('--cond-size2', type=int, help="second conditioning size, default: 256", default=256)
+model_group.add_argument('--state-dim', type=int, help="dimensionality of transfered state, default: 24", default=24)
+model_group.add_argument('--quant-levels', type=int, help="number of quantization levels, default: 40", default=40)
+
+args = parser.parse_args()
+
+import torch
+from rdovae import RDOVAE
+from wexchange.torch import load_torch_weights
+
+exchange_name_to_name = {
+    'encoder_stack_layer1_dense'    : 'core_encoder.module.dense_1',
+    'encoder_stack_layer3_dense'    : 'core_encoder.module.dense_2',
+    'encoder_stack_layer5_dense'    : 'core_encoder.module.dense_3',
+    'encoder_stack_layer7_dense'    : 'core_encoder.module.dense_4',
+    'encoder_stack_layer8_dense'    : 'core_encoder.module.dense_5',
+    'encoder_state_layer1_dense'    : 'core_encoder.module.state_dense_1',
+    'encoder_state_layer2_dense'    : 'core_encoder.module.state_dense_2',
+    'encoder_stack_layer2_gru'      : 'core_encoder.module.gru_1',
+    'encoder_stack_layer4_gru'      : 'core_encoder.module.gru_2',
+    'encoder_stack_layer6_gru'      : 'core_encoder.module.gru_3',
+    'encoder_stack_layer9_conv'     : 'core_encoder.module.conv1',
+    'statistical_model_embedding'   : 'statistical_model.quant_embedding',
+    'decoder_state1_dense'          : 'core_decoder.module.gru_1_init',
+    'decoder_state2_dense'          : 'core_decoder.module.gru_2_init',
+    'decoder_state3_dense'          : 'core_decoder.module.gru_3_init',
+    'decoder_stack_layer1_dense'    : 'core_decoder.module.dense_1',
+    'decoder_stack_layer3_dense'    : 'core_decoder.module.dense_2',
+    'decoder_stack_layer5_dense'    : 'core_decoder.module.dense_3',
+    'decoder_stack_layer7_dense'    : 'core_decoder.module.dense_4',
+    'decoder_stack_layer8_dense'    : 'core_decoder.module.dense_5',
+    'decoder_stack_layer9_dense'    : 'core_decoder.module.output',
+    'decoder_stack_layer2_gru'      : 'core_decoder.module.gru_1',
+    'decoder_stack_layer4_gru'      : 'core_decoder.module.gru_2',
+    'decoder_stack_layer6_gru'      : 'core_decoder.module.gru_3'
+}
+
+if __name__ == "__main__":
+    checkpoint = dict()
+
+    # parameters
+    num_features    = args.num_features
+    latent_dim      = args.latent_dim
+    quant_levels    = args.quant_levels
+    cond_size       = args.cond_size
+    cond_size2      = args.cond_size2
+    state_dim       = args.state_dim
+    
+
+    # model
+    checkpoint['model_args']    = (num_features, latent_dim, quant_levels, cond_size, cond_size2)
+    checkpoint['model_kwargs']  = {'state_dim': state_dim}
+    model = RDOVAE(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+
+    dense_layer_names = [
+        'encoder_stack_layer1_dense',
+        'encoder_stack_layer3_dense',
+        'encoder_stack_layer5_dense',
+        'encoder_stack_layer7_dense',
+        'encoder_stack_layer8_dense',
+        'encoder_state_layer1_dense',
+        'encoder_state_layer2_dense',
+        'decoder_state1_dense',      
+        'decoder_state2_dense',      
+        'decoder_state3_dense',      
+        'decoder_stack_layer1_dense',
+        'decoder_stack_layer3_dense',
+        'decoder_stack_layer5_dense',
+        'decoder_stack_layer7_dense',
+        'decoder_stack_layer8_dense',
+        'decoder_stack_layer9_dense'
+    ]
+
+    gru_layer_names = [
+        'encoder_stack_layer2_gru',
+        'encoder_stack_layer4_gru',
+        'encoder_stack_layer6_gru',
+        'decoder_stack_layer2_gru',
+        'decoder_stack_layer4_gru',
+        'decoder_stack_layer6_gru' 
+    ]
+
+    conv1d_layer_names = [
+        'encoder_stack_layer9_conv'
+    ]
+
+    embedding_layer_names = [
+        'statistical_model_embedding'
+    ]
+
+    for name in dense_layer_names + gru_layer_names + conv1d_layer_names + embedding_layer_names:
+        print(f"loading weights for layer {exchange_name_to_name[name]}")
+        layer = model.get_submodule(exchange_name_to_name[name])
+        load_torch_weights(os.path.join(args.exchange_folder, name), layer)
+
+    checkpoint['state_dict'] = model.state_dict()
+
+    torch.save(checkpoint, args.output)
\ No newline at end of file
binary files /dev/null b/dnn/torch/rdovae/libs/wexchange-1.0-py3-none-any.whl differ
binary files /dev/null b/dnn/torch/rdovae/libs/wexchange-1.2-py3-none-any.whl differ
--- /dev/null
+++ b/dnn/torch/rdovae/packets/__init__.py
@@ -1,0 +1,1 @@
+from .fec_packets import write_fec_packets, read_fec_packets
\ No newline at end of file
--- /dev/null
+++ b/dnn/torch/rdovae/packets/fec_packets.c
@@ -1,0 +1,142 @@
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdio.h>
+#include <inttypes.h>
+
+#include "fec_packets.h"
+
+int get_fec_frame(const char * const filename, float *features, int packet_index, int subframe_index)
+{
+
+    int16_t version;
+    int16_t header_size;
+    int16_t num_packets;
+    int16_t packet_size;
+    int16_t subframe_size;
+    int16_t subframes_per_packet;
+    int16_t num_features;
+    long offset;
+
+    FILE *fid = fopen(filename, "rb");
+    
+    /* read header */
+    if (fread(&version, sizeof(version), 1, fid) != 1) goto error;
+    if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;
+    if (fread(&num_packets, sizeof(num_packets), 1, fid) != 1) goto error;
+    if (fread(&packet_size, sizeof(packet_size), 1, fid) != 1) goto error;
+    if (fread(&subframe_size, sizeof(subframe_size), 1, fid) != 1) goto error;
+    if (fread(&subframes_per_packet, sizeof(subframes_per_packet), 1, fid) != 1) goto error;
+    if (fread(&num_features, sizeof(num_features), 1, fid) != 1) goto error;
+
+    /* check if indices are valid */
+    if (packet_index >= num_packets || subframe_index >= subframes_per_packet)
+    {
+        fprintf(stderr, "get_fec_frame: index out of bounds\n");
+        goto error;
+    }
+
+    /* calculate offset in file (+ 2 is for rate) */
+    offset = header_size + packet_index * packet_size + 2 + subframe_index * subframe_size;
+    fseek(fid, offset, SEEK_SET);
+
+    /* read features */
+    if (fread(features, sizeof(*features), num_features, fid) != num_features) goto error;
+
+    fclose(fid);
+    return 0;
+
+error:
+    fclose(fid);
+    return 1;
+}
+
+int get_fec_rate(const char * const filename, int packet_index)
+{
+    int16_t version;
+    int16_t header_size;
+    int16_t num_packets;
+    int16_t packet_size;
+    int16_t subframe_size;
+    int16_t subframes_per_packet;
+    int16_t num_features;
+    long offset;
+    int16_t rate;
+
+    FILE *fid = fopen(filename, "rb");
+    
+    /* read header */
+    if (fread(&version, sizeof(version), 1, fid) != 1) goto error;
+    if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;
+    if (fread(&num_packets, sizeof(num_packets), 1, fid) != 1) goto error;
+    if (fread(&packet_size, sizeof(packet_size), 1, fid) != 1) goto error;
+    if (fread(&subframe_size, sizeof(subframe_size), 1, fid) != 1) goto error;
+    if (fread(&subframes_per_packet, sizeof(subframes_per_packet), 1, fid) != 1) goto error;
+    if (fread(&num_features, sizeof(num_features), 1, fid) != 1) goto error;
+
+    /* check if indices are valid */
+    if (packet_index >= num_packets)
+    {
+        fprintf(stderr, "get_fec_rate: index out of bounds\n");
+        goto error;
+    }
+
+    /* calculate offset in file (+ 2 is for rate) */
+    offset = header_size + packet_index * packet_size;
+    fseek(fid, offset, SEEK_SET);
+
+    /* read rate */
+    if (fread(&rate, sizeof(rate), 1, fid) != 1) goto error;
+
+    fclose(fid);
+    return (int) rate;
+
+error:
+    fclose(fid);
+    return -1;
+}
+
+#if 0
+int main()
+{
+    float features[20];
+    int i;
+
+    if (get_fec_frame("../test.fec", &features[0], 0, 127))
+    {
+        return 1;
+    }
+
+    for (i = 0; i < 20; i ++)
+    {
+        printf("%d %f\n", i, features[i]);
+    }
+
+    printf("rate: %d\n", get_fec_rate("../test.fec", 0));
+
+}
+#endif
\ No newline at end of file
--- /dev/null
+++ b/dnn/torch/rdovae/packets/fec_packets.h
@@ -1,0 +1,34 @@
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _FEC_PACKETS_H
+#define _FEC_PACKETS_H
+
+int get_fec_frame(const char * const filename, float *features, int packet_index, int subframe_index);
+int get_fec_rate(const char * const filename, int packet_index);
+
+#endif
\ No newline at end of file
--- /dev/null
+++ b/dnn/torch/rdovae/packets/fec_packets.py
@@ -1,0 +1,108 @@
+"""
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import numpy as np
+
+
+
+def write_fec_packets(filename, packets, rates=None):
+    """ writes packets in binary format """
+    
+    assert np.dtype(np.float32).itemsize == 4
+    assert np.dtype(np.int16).itemsize == 2
+    
+    # derive some sizes 
+    num_packets             = len(packets)
+    subframes_per_packet    = packets[0].shape[-2]
+    num_features            = packets[0].shape[-1]
+    
+    # size of float is 4
+    subframe_size           = num_features * 4
+    packet_size             = subframe_size * subframes_per_packet + 2 # two bytes for rate
+    
+    version = 1
+    # header size (version, header_size, num_packets, packet_size, subframe_size, subrames_per_packet, num_features)
+    header_size = 14
+    
+    with open(filename, 'wb') as f:
+        
+        # header
+        f.write(np.int16(version).tobytes())
+        f.write(np.int16(header_size).tobytes())
+        f.write(np.int16(num_packets).tobytes())
+        f.write(np.int16(packet_size).tobytes())
+        f.write(np.int16(subframe_size).tobytes())
+        f.write(np.int16(subframes_per_packet).tobytes())
+        f.write(np.int16(num_features).tobytes())
+        
+        # packets
+        for i, packet in enumerate(packets):
+            if type(rates) == type(None):
+                rate = 0
+            else:
+                rate = rates[i]
+            
+            f.write(np.int16(rate).tobytes())
+            
+            features = np.flip(packet, axis=-2)
+            f.write(features.astype(np.float32).tobytes())
+            
+        
+def read_fec_packets(filename):
+    """ reads packets from binary format """
+    
+    assert np.dtype(np.float32).itemsize == 4
+    assert np.dtype(np.int16).itemsize == 2
+    
+    with open(filename, 'rb') as f:
+        
+        # header
+        version                 = np.frombuffer(f.read(2), dtype=np.int16).item()
+        header_size             = np.frombuffer(f.read(2), dtype=np.int16).item()
+        num_packets             = np.frombuffer(f.read(2), dtype=np.int16).item()
+        packet_size             = np.frombuffer(f.read(2), dtype=np.int16).item()
+        subframe_size           = np.frombuffer(f.read(2), dtype=np.int16).item()
+        subframes_per_packet    = np.frombuffer(f.read(2), dtype=np.int16).item()
+        num_features            = np.frombuffer(f.read(2), dtype=np.int16).item()
+        
+        dummy_features          = np.zeros((subframes_per_packet, num_features), dtype=np.float32)
+        
+        # packets
+        rates = []
+        packets = []
+        for i in range(num_packets):
+                     
+            rate = np.frombuffer(f.read(2), dtype=np.int16).item
+            rates.append(rate)
+            
+            features = np.reshape(np.frombuffer(f.read(subframe_size * subframes_per_packet), dtype=np.float32), dummy_features.shape)
+            packet = np.flip(features, axis=-2)
+            packets.append(packet)
+            
+    return packets
\ No newline at end of file
--- /dev/null
+++ b/dnn/torch/rdovae/rdovae/__init__.py
@@ -1,0 +1,2 @@
+from .rdovae import RDOVAE, distortion_loss, hard_rate_estimate, soft_rate_estimate
+from .dataset import RDOVAEDataset
--- /dev/null
+++ b/dnn/torch/rdovae/rdovae/dataset.py
@@ -1,0 +1,68 @@
+"""
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import torch
+import numpy as np
+
+class RDOVAEDataset(torch.utils.data.Dataset):
+    def __init__(self,
+                feature_file,
+                sequence_length,
+                num_used_features=20,
+                num_features=36,
+                lambda_min=0.0002,
+                lambda_max=0.0135,
+                quant_levels=16,
+                enc_stride=2):
+        
+        self.sequence_length = sequence_length
+        self.lambda_min = lambda_min
+        self.lambda_max = lambda_max
+        self.enc_stride = enc_stride
+        self.quant_levels = quant_levels
+        self.denominator = (quant_levels - 1) / np.log(lambda_max / lambda_min)
+
+        if sequence_length % enc_stride:
+            raise ValueError(f"RDOVAEDataset.__init__: enc_stride {enc_stride} does not divide sequence length {sequence_length}")
+        
+        self.features = np.reshape(np.fromfile(feature_file, dtype=np.float32), (-1, num_features))
+        self.features = self.features[:, :num_used_features]
+        self.num_sequences = self.features.shape[0] // sequence_length
+
+    def __len__(self):
+        return self.num_sequences
+
+    def __getitem__(self, index):
+        features = self.features[index * self.sequence_length: (index + 1) * self.sequence_length, :]
+        q_ids = np.random.randint(0, self.quant_levels, (1)).astype(np.int64)
+        q_ids = np.repeat(q_ids, self.sequence_length // self.enc_stride, axis=0)
+        rate_lambda = self.lambda_min * np.exp(q_ids.astype(np.float32) / self.denominator).astype(np.float32)
+
+        return features, rate_lambda, q_ids
+
--- /dev/null
+++ b/dnn/torch/rdovae/rdovae/rdovae.py
@@ -1,0 +1,614 @@
+"""
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+""" Pytorch implementations of rate distortion optimized variational autoencoder """
+
+import math as m
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+# Quantization and rate related utily functions
+
+def soft_pvq(x, k):
+    """ soft pyramid vector quantizer """
+
+    # L2 normalization
+    x_norm2 = x / (1e-15 + torch.norm(x, dim=-1, keepdim=True))
+    
+
+    with torch.no_grad():
+        # quantization loop, no need to track gradients here
+        x_norm1 = x / torch.sum(torch.abs(x), dim=-1, keepdim=True)
+
+        # set initial scaling factor to k
+        scale_factor = k
+        x_scaled = scale_factor * x_norm1
+        x_quant = torch.round(x_scaled)
+
+        # we aim for ||x_quant||_L1 = k
+        for _ in range(10):
+            # remove signs and calculate L1 norm
+            abs_x_quant = torch.abs(x_quant)
+            abs_x_scaled = torch.abs(x_scaled)
+            l1_x_quant = torch.sum(abs_x_quant, axis=-1)
+
+            # increase, where target is too small and decrease, where target is too large
+            plus  = 1.0001 * torch.min((abs_x_quant + 0.5) / (abs_x_scaled + 1e-15), dim=-1).values
+            minus = 0.9999 * torch.max((abs_x_quant - 0.5) / (abs_x_scaled + 1e-15), dim=-1).values
+            factor = torch.where(l1_x_quant > k, minus, plus)
+            factor = torch.where(l1_x_quant == k, torch.ones_like(factor), factor)
+            scale_factor = scale_factor * factor.unsqueeze(-1)
+
+            # update x
+            x_scaled = scale_factor * x_norm1
+            x_quant = torch.round(x_quant)
+
+    # L2 normalization of quantized x
+    x_quant_norm2 = x_quant / (1e-15 + torch.norm(x_quant, dim=-1, keepdim=True))
+    quantization_error = x_quant_norm2 - x_norm2
+
+    return x_norm2 + quantization_error.detach()
+
+def cache_parameters(func):
+    cache = dict()
+    def cached_func(*args):
+        if args in cache:
+            return cache[args]
+        else:
+            cache[args] = func(*args)
+        
+        return cache[args]
+    return cached_func
+        
+@cache_parameters
+def pvq_codebook_size(n, k):
+    
+    if k == 0:
+        return 1
+    
+    if n == 0:
+        return 0
+    
+    return pvq_codebook_size(n - 1, k) + pvq_codebook_size(n, k - 1) + pvq_codebook_size(n - 1, k - 1)
+
+
+def soft_rate_estimate(z, r, reduce=True):
+    """ rate approximation with dependent theta Eq. (7)"""
+
+    rate = torch.sum(
+        - torch.log2((1 - r)/(1 + r) * r ** torch.abs(z) + 1e-6),
+        dim=-1
+    )
+
+    if reduce:
+        rate = torch.mean(rate)
+
+    return rate
+
+
+def hard_rate_estimate(z, r, theta, reduce=True):
+    """ hard rate approximation """
+
+    z_q = torch.round(z)
+    p0 = 1 - r ** (0.5 + 0.5 * theta)
+    alpha = torch.relu(1 - torch.abs(z_q)) ** 2
+    rate = - torch.sum(
+        (alpha * torch.log2(p0 * r ** torch.abs(z_q) + 1e-6) 
+        + (1 - alpha) * torch.log2(0.5 * (1 - p0) * (1 - r) * r ** (torch.abs(z_q) - 1) + 1e-6)),
+        dim=-1
+    )
+
+    if reduce:
+        rate = torch.mean(rate)
+
+    return rate
+
+
+
+def soft_dead_zone(x, dead_zone):
+    """ approximates application of a dead zone to x """
+    d = dead_zone * 0.05
+    return x - d * torch.tanh(x / (0.1 + d))
+
+
+def hard_quantize(x):
+    """ round with copy gradient trick """
+    return x + (torch.round(x) - x).detach()
+
+
+def noise_quantize(x):
+    """ simulates quantization with addition of random uniform noise """
+    return x + (torch.rand_like(x) - 0.5)
+
+
+# loss functions
+
+
+def distortion_loss(y_true, y_pred, rate_lambda=None):
+    """ custom distortion loss for LPCNet features """
+    
+    if y_true.size(-1) != 20:
+        raise ValueError('distortion loss is designed to work with 20 features')
+
+    ceps_error   = y_pred[..., :18] - y_true[..., :18]
+    pitch_error  = 2 * (y_pred[..., 18:19] - y_true[..., 18:19]) / (2 + y_true[..., 18:19])
+    corr_error   = y_pred[..., 19:] - y_true[..., 19:]
+    pitch_weight = torch.relu(y_true[..., 19:] + 0.5) ** 2
+
+    loss = torch.mean(ceps_error ** 2 + (10/18) * torch.abs(pitch_error) * pitch_weight + (1/18) * corr_error ** 2, dim=-1)
+
+    if type(rate_lambda) != type(None):
+        loss = loss / torch.sqrt(rate_lambda)
+
+    loss = torch.mean(loss)
+        
+    return loss
+
+
+# sampling functions
+
+import random
+
+
+def random_split(start, stop, num_splits=3, min_len=3):
+    get_min_len = lambda x : min([x[i+1] - x[i] for i in range(len(x) - 1)])
+    candidate = [start] + sorted([random.randint(start, stop-1) for i in range(num_splits)]) + [stop]
+    
+    while get_min_len(candidate) < min_len: 
+        candidate = [start] + sorted([random.randint(start, stop-1) for i in range(num_splits)]) + [stop]
+    
+    return candidate
+
+
+
+# weight initialization and clipping
+def init_weights(module):
+    
+    if isinstance(module, nn.GRU):
+        for p in module.named_parameters():
+            if p[0].startswith('weight_hh_'):
+                nn.init.orthogonal_(p[1])
+
+    
+def weight_clip_factory(max_value):
+    """ weight clipping function concerning sum of abs values of adjecent weights """
+    def clip_weight_(w):
+        stop = w.size(1)
+        # omit last column if stop is odd
+        if stop % 2:
+            stop -= 1
+        max_values = max_value * torch.ones_like(w[:, :stop])
+        factor = max_value / torch.maximum(max_values,
+                                 torch.repeat_interleave(
+                                     torch.abs(w[:, :stop:2]) + torch.abs(w[:, 1:stop:2]),
+                                     2,
+                                     1))
+        with torch.no_grad():
+            w[:, :stop] *= factor
+    
+    def clip_weights(module):
+        if isinstance(module, nn.GRU) or isinstance(module, nn.Linear):
+            for name, w in module.named_parameters():
+                if name.startswith('weight'):
+                    clip_weight_(w)
+    
+    return clip_weights
+
+# RDOVAE module and submodules
+
+
+class CoreEncoder(nn.Module):
+    STATE_HIDDEN = 128
+    FRAMES_PER_STEP = 2
+    CONV_KERNEL_SIZE = 4
+    
+    def __init__(self, feature_dim, output_dim, cond_size, cond_size2, state_size=24):
+        """ core encoder for RDOVAE
+        
+            Computes latents, initial states, and rate estimates from features and lambda parameter
+        
+        """
+
+        super(CoreEncoder, self).__init__()
+
+        # hyper parameters
+        self.feature_dim        = feature_dim
+        self.output_dim         = output_dim
+        self.cond_size          = cond_size
+        self.cond_size2         = cond_size2
+        self.state_size         = state_size
+
+        # derived parameters
+        self.input_dim = self.FRAMES_PER_STEP * self.feature_dim
+        self.conv_input_channels =  5 * cond_size + 3 * cond_size2
+
+        # layers
+        self.dense_1 = nn.Linear(self.input_dim, self.cond_size2)
+        self.gru_1   = nn.GRU(self.cond_size2, self.cond_size, batch_first=True)
+        self.dense_2 = nn.Linear(self.cond_size, self.cond_size2)
+        self.gru_2   = nn.GRU(self.cond_size2, self.cond_size, batch_first=True)
+        self.dense_3 = nn.Linear(self.cond_size, self.cond_size2)
+        self.gru_3   = nn.GRU(self.cond_size2, self.cond_size, batch_first=True)
+        self.dense_4 = nn.Linear(self.cond_size, self.cond_size)
+        self.dense_5 = nn.Linear(self.cond_size, self.cond_size)
+        self.conv1   = nn.Conv1d(self.conv_input_channels, self.output_dim, kernel_size=self.CONV_KERNEL_SIZE, padding='valid')
+
+        self.state_dense_1 = nn.Linear(self.conv_input_channels, self.STATE_HIDDEN)
+
+        self.state_dense_2 = nn.Linear(self.STATE_HIDDEN, self.state_size)
+
+        # initialize weights
+        self.apply(init_weights)
+
+
+    def forward(self, features):
+
+        # reshape features
+        x = torch.reshape(features, (features.size(0), features.size(1) // self.FRAMES_PER_STEP, self.FRAMES_PER_STEP * features.size(2)))
+
+        batch = x.size(0)
+        device = x.device
+
+        # run encoding layer stack
+        x1      = torch.tanh(self.dense_1(x))
+        x2, _   = self.gru_1(x1, torch.zeros((1, batch, self.cond_size)).to(device))
+        x3      = torch.tanh(self.dense_2(x2))
+        x4, _   = self.gru_2(x3, torch.zeros((1, batch, self.cond_size)).to(device))
+        x5      = torch.tanh(self.dense_3(x4))
+        x6, _   = self.gru_3(x5, torch.zeros((1, batch, self.cond_size)).to(device))
+        x7      = torch.tanh(self.dense_4(x6))
+        x8      = torch.tanh(self.dense_5(x7))
+
+        # concatenation of all hidden layer outputs
+        x9 = torch.cat((x1, x2, x3, x4, x5, x6, x7, x8), dim=-1)
+        
+        # init state for decoder
+        states = torch.tanh(self.state_dense_1(x9))
+        states = torch.tanh(self.state_dense_2(states))
+
+        # latent representation via convolution
+        x9 = F.pad(x9.permute(0, 2, 1), [self.CONV_KERNEL_SIZE - 1, 0])
+        z = self.conv1(x9).permute(0, 2, 1)
+
+        return z, states
+
+
+
+
+class CoreDecoder(nn.Module):
+
+    FRAMES_PER_STEP = 4
+
+    def __init__(self, input_dim, output_dim, cond_size, cond_size2, state_size=24):
+        """ core decoder for RDOVAE
+        
+            Computes features from latents, initial state, and quantization index
+        
+        """
+
+        super(CoreDecoder, self).__init__()
+
+        # hyper parameters
+        self.input_dim  = input_dim
+        self.output_dim = output_dim
+        self.cond_size  = cond_size
+        self.cond_size2 = cond_size2
+        self.state_size = state_size
+
+        self.input_size = self.input_dim
+        
+        self.concat_size = 4 * self.cond_size + 4 * self.cond_size2
+
+        # layers
+        self.dense_1    = nn.Linear(self.input_size, cond_size2)
+        self.gru_1      = nn.GRU(cond_size2, cond_size, batch_first=True)
+        self.dense_2    = nn.Linear(cond_size, cond_size2)
+        self.gru_2      = nn.GRU(cond_size2, cond_size, batch_first=True)
+        self.dense_3    = nn.Linear(cond_size, cond_size2)
+        self.gru_3      = nn.GRU(cond_size2, cond_size, batch_first=True)
+        self.dense_4    = nn.Linear(cond_size, cond_size2)
+        self.dense_5    = nn.Linear(cond_size2, cond_size2)
+
+        self.output  = nn.Linear(self.concat_size, self.FRAMES_PER_STEP * self.output_dim)
+
+
+        self.gru_1_init = nn.Linear(self.state_size, self.cond_size)
+        self.gru_2_init = nn.Linear(self.state_size, self.cond_size)
+        self.gru_3_init = nn.Linear(self.state_size, self.cond_size)
+
+        # initialize weights
+        self.apply(init_weights)
+
+    def forward(self, z, initial_state):
+        
+        gru_1_state = torch.tanh(self.gru_1_init(initial_state).permute(1, 0, 2))
+        gru_2_state = torch.tanh(self.gru_2_init(initial_state).permute(1, 0, 2))
+        gru_3_state = torch.tanh(self.gru_3_init(initial_state).permute(1, 0, 2))
+
+        # run decoding layer stack
+        x1  = torch.tanh(self.dense_1(z))
+        x2, _ = self.gru_1(x1, gru_1_state)
+        x3  = torch.tanh(self.dense_2(x2))
+        x4, _ = self.gru_2(x3, gru_2_state)
+        x5  = torch.tanh(self.dense_3(x4))
+        x6, _ = self.gru_3(x5, gru_3_state)
+        x7  = torch.tanh(self.dense_4(x6))
+        x8  = torch.tanh(self.dense_5(x7))
+        x9 = torch.cat((x1, x2, x3, x4, x5, x6, x7, x8), dim=-1)
+
+        # output layer and reshaping
+        x10 = self.output(x9)
+        features = torch.reshape(x10, (x10.size(0), x10.size(1) * self.FRAMES_PER_STEP, x10.size(2) // self.FRAMES_PER_STEP))
+
+        return features
+
+
+class StatisticalModel(nn.Module):
+    def __init__(self, quant_levels, latent_dim):
+        """ Statistical model for latent space
+        
+            Computes scaling, deadzone, r, and theta 
+        
+        """
+
+        super(StatisticalModel, self).__init__()
+
+        # copy parameters
+        self.latent_dim     = latent_dim
+        self.quant_levels   = quant_levels
+        self.embedding_dim  = 6 * latent_dim
+
+        # quantization embedding
+        self.quant_embedding    = nn.Embedding(quant_levels, self.embedding_dim)
+        
+        # initialize embedding to 0
+        with torch.no_grad():
+            self.quant_embedding.weight[:] = 0
+
+
+    def forward(self, quant_ids):
+        """ takes quant_ids and returns statistical model parameters"""
+
+        x = self.quant_embedding(quant_ids)
+
+        # CAVE: theta_soft is not used anymore. Kick it out?
+        quant_scale = F.softplus(x[..., 0 * self.latent_dim : 1 * self.latent_dim])
+        dead_zone   = F.softplus(x[..., 1 * self.latent_dim : 2 * self.latent_dim])
+        theta_soft  = torch.sigmoid(x[..., 2 * self.latent_dim : 3 * self.latent_dim])
+        r_soft      = torch.sigmoid(x[..., 3 * self.latent_dim : 4 * self.latent_dim])
+        theta_hard  = torch.sigmoid(x[..., 4 * self.latent_dim : 5 * self.latent_dim])
+        r_hard      = torch.sigmoid(x[..., 5 * self.latent_dim : 6 * self.latent_dim])
+        
+
+        return {
+            'quant_embedding'   : x,
+            'quant_scale'       : quant_scale,
+            'dead_zone'         : dead_zone,
+            'r_hard'            : r_hard,
+            'theta_hard'        : theta_hard,
+            'r_soft'            : r_soft,
+            'theta_soft'        : theta_soft
+        }
+
+
+class RDOVAE(nn.Module):
+    def __init__(self,
+                 feature_dim,
+                 latent_dim,
+                 quant_levels,
+                 cond_size,
+                 cond_size2,
+                 state_dim=24,
+                 split_mode='split',
+                 clip_weights=True,
+                 pvq_num_pulses=82,
+                 state_dropout_rate=0):
+
+        super(RDOVAE, self).__init__()
+
+        self.feature_dim    = feature_dim
+        self.latent_dim     = latent_dim
+        self.quant_levels   = quant_levels
+        self.cond_size      = cond_size
+        self.cond_size2     = cond_size2
+        self.split_mode     = split_mode
+        self.state_dim      = state_dim
+        self.pvq_num_pulses = pvq_num_pulses
+        self.state_dropout_rate = state_dropout_rate
+        
+        # submodules encoder and decoder share the statistical model
+        self.statistical_model = StatisticalModel(quant_levels, latent_dim)
+        self.core_encoder = nn.DataParallel(CoreEncoder(feature_dim, latent_dim, cond_size, cond_size2, state_size=state_dim))
+        self.core_decoder = nn.DataParallel(CoreDecoder(latent_dim, feature_dim, cond_size, cond_size2, state_size=state_dim))
+        
+        self.enc_stride = CoreEncoder.FRAMES_PER_STEP
+        self.dec_stride = CoreDecoder.FRAMES_PER_STEP
+       
+        if clip_weights:
+            self.weight_clip_fn = weight_clip_factory(0.496)
+        else:
+            self.weight_clip_fn = None
+        
+        if self.dec_stride % self.enc_stride != 0:
+            raise ValueError(f"get_decoder_chunks_generic: encoder stride does not divide decoder stride")
+    
+    def clip_weights(self):
+        if not type(self.weight_clip_fn) == type(None):
+            self.apply(self.weight_clip_fn)
+            
+    def get_decoder_chunks(self, z_frames, mode='split', chunks_per_offset = 4):
+        
+        enc_stride = self.enc_stride
+        dec_stride = self.dec_stride
+
+        stride = dec_stride // enc_stride
+        
+        chunks = []
+
+        for offset in range(stride):
+            # start is the smalles number = offset mod stride that decodes to a valid range
+            start = offset
+            while enc_stride * (start + 1) - dec_stride < 0:
+                start += stride
+
+            # check if start is a valid index
+            if start >= z_frames:
+                raise ValueError("get_decoder_chunks_generic: range too small")
+
+            # stop is the smallest number outside [0, num_enc_frames] that's congruent to offset mod stride
+            stop = z_frames - (z_frames % stride) + offset
+            while stop < z_frames:
+                stop += stride
+
+            # calculate split points
+            length = (stop - start)
+            if mode == 'split':
+                split_points = [start + stride * int(i * length / chunks_per_offset / stride) for i in range(chunks_per_offset)] + [stop]
+            elif mode == 'random_split':
+                split_points = [stride * x + start for x in random_split(0, (stop - start)//stride - 1, chunks_per_offset - 1, 1)]
+            else:
+                raise ValueError(f"get_decoder_chunks_generic: unknown mode {mode}")
+
+
+            for i in range(chunks_per_offset):
+                # (enc_frame_start, enc_frame_stop, enc_frame_stride, stride, feature_frame_start, feature_frame_stop)
+                # encoder range(i, j, stride) maps to feature range(enc_stride * (i + 1) - dec_stride, enc_stride * j)
+                # provided that i - j = 1 mod stride
+                chunks.append({
+                    'z_start'         : split_points[i],
+                    'z_stop'          : split_points[i + 1] - stride + 1,
+                    'z_stride'        : stride,
+                    'features_start'  : enc_stride * (split_points[i] + 1) - dec_stride,
+                    'features_stop'   : enc_stride * (split_points[i + 1] - stride + 1)
+                })
+
+        return chunks
+
+
+    def forward(self, features, q_id):
+
+        # calculate statistical model from quantization ID
+        statistical_model = self.statistical_model(q_id)
+
+        # run encoder
+        z, states = self.core_encoder(features)
+
+        # scaling, dead-zone and quantization
+        z = z * statistical_model['quant_scale']
+        z = soft_dead_zone(z, statistical_model['dead_zone'])
+
+        # quantization
+        z_q = hard_quantize(z) / statistical_model['quant_scale']
+        z_n = noise_quantize(z) / statistical_model['quant_scale']
+        states_q = soft_pvq(states, self.pvq_num_pulses)
+        
+        if self.state_dropout_rate > 0:
+            drop = torch.rand(states_q.size(0)) < self.state_dropout_rate
+            mask = torch.ones_like(states_q)
+            mask[drop] = 0
+            states_q = states_q * mask
+
+        # decoder
+        chunks = self.get_decoder_chunks(z.size(1), mode=self.split_mode)
+
+        outputs_hq = []
+        outputs_sq = []
+        for chunk in chunks:
+            # decoder with hard quantized input
+            z_dec_reverse       = torch.flip(z_q[..., chunk['z_start'] : chunk['z_stop'] : chunk['z_stride'], :], [1])
+            dec_initial_state   = states_q[..., chunk['z_stop'] - 1 : chunk['z_stop'], :]
+            features_reverse = self.core_decoder(z_dec_reverse,  dec_initial_state)
+            outputs_hq.append((torch.flip(features_reverse, [1]), chunk['features_start'], chunk['features_stop']))
+
+
+            # decoder with soft quantized input
+            z_dec_reverse       = torch.flip(z_n[..., chunk['z_start'] : chunk['z_stop'] : chunk['z_stride'], :],  [1])
+            features_reverse    = self.core_decoder(z_dec_reverse, dec_initial_state)
+            outputs_sq.append((torch.flip(features_reverse, [1]), chunk['features_start'], chunk['features_stop']))          
+
+        return {
+            'outputs_hard_quant' : outputs_hq,
+            'outputs_soft_quant' : outputs_sq,
+            'z'                 : z,
+            'statistical_model' : statistical_model
+        }
+
+    def encode(self, features):
+        """ encoder with quantization and rate estimation """
+        
+        z, states = self.core_encoder(features)
+        
+        # quantization of initial states
+        states = soft_pvq(states, self.pvq_num_pulses)     
+        state_size = m.log2(pvq_codebook_size(self.state_dim, self.pvq_num_pulses))
+        
+        return z, states, state_size
+
+    def decode(self, z, initial_state):
+        """ decoder (flips sequences by itself) """
+        
+        z_reverse       = torch.flip(z, [1])
+        features_reverse = self.core_decoder(z_reverse, initial_state)
+        features = torch.flip(features_reverse, [1])
+        
+        return features
+        
+    def quantize(self, z, q_ids):
+        """ quantization of latent vectors """
+
+        stats = self.statistical_model(q_ids)
+
+        zq = z * stats['quant_scale']
+        zq = soft_dead_zone(zq, stats['dead_zone'])
+        zq = torch.round(zq)
+
+        sizes = hard_rate_estimate(zq, stats['r_hard'], stats['theta_hard'], reduce=False)
+
+        return zq, sizes
+
+    def unquantize(self, zq, q_ids):
+        """ re-scaling of latent vector """
+
+        stats = self.statistical_model(q_ids)
+
+        z = zq / stats['quant_scale']
+
+        return z
+    
+    def freeze_model(self):
+
+        # freeze all parameters
+        for p in self.parameters():
+            p.requires_grad = False
+        
+        for p in self.statistical_model.parameters():
+            p.requires_grad = True
+
--- /dev/null
+++ b/dnn/torch/rdovae/requirements.txt
@@ -1,0 +1,5 @@
+numpy
+scipy
+torch
+tqdm
+libs/wexchange-1.2-py3-none-any.whl
\ No newline at end of file
--- /dev/null
+++ b/dnn/torch/rdovae/train_rdovae.py
@@ -1,0 +1,270 @@
+"""
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import os
+import argparse
+
+import torch
+import tqdm
+
+from rdovae import RDOVAE, RDOVAEDataset, distortion_loss, hard_rate_estimate, soft_rate_estimate
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('features', type=str, help='path to feature file in .f32 format')
+parser.add_argument('output', type=str, help='path to output folder')
+
+parser.add_argument('--cuda-visible-devices', type=str, help="comma separates list of cuda visible device indices, default: ''", default="")
+
+
+model_group = parser.add_argument_group(title="model parameters")
+model_group.add_argument('--latent-dim', type=int, help="number of symbols produces by encoder, default: 80", default=80)
+model_group.add_argument('--cond-size', type=int, help="first conditioning size, default: 256", default=256)
+model_group.add_argument('--cond-size2', type=int, help="second conditioning size, default: 256", default=256)
+model_group.add_argument('--state-dim', type=int, help="dimensionality of transfered state, default: 24", default=24)
+model_group.add_argument('--quant-levels', type=int, help="number of quantization levels, default: 16", default=16)
+model_group.add_argument('--lambda-min', type=float, help="minimal value for rate lambda, default: 0.0002", default=2e-4)
+model_group.add_argument('--lambda-max', type=float, help="maximal value for rate lambda, default: 0.0104", default=0.0104)
+model_group.add_argument('--pvq-num-pulses', type=int, help="number of pulses for PVQ, default: 82", default=82)
+model_group.add_argument('--state-dropout-rate', type=float, help="state dropout rate, default: 0", default=0.0)
+
+training_group = parser.add_argument_group(title="training parameters")
+training_group.add_argument('--batch-size', type=int, help="batch size, default: 32", default=32)
+training_group.add_argument('--lr', type=float, help='learning rate, default: 3e-4', default=3e-4)
+training_group.add_argument('--epochs', type=int, help='number of training epochs, default: 100', default=100)
+training_group.add_argument('--sequence-length', type=int, help='sequence length, needs to be divisible by 4, default: 256', default=256)
+training_group.add_argument('--lr-decay-factor', type=float, help='learning rate decay factor, default: 2.5e-5', default=2.5e-5)
+training_group.add_argument('--split-mode', type=str, choices=['split', 'random_split'], help='splitting mode for decoder input, default: split', default='split')
+training_group.add_argument('--enable-first-frame-loss', action='store_true', default=False, help='enables dedicated distortion loss on first 4 decoder frames')
+training_group.add_argument('--initial-checkpoint', type=str, help='initial checkpoint to start training from, default: None', default=None)
+training_group.add_argument('--train-decoder-only', action='store_true', help='freeze encoder and statistical model and train decoder only')
+
+args = parser.parse_args()
+
+# set visible devices
+os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda_visible_devices
+
+# checkpoints
+checkpoint_dir = os.path.join(args.output, 'checkpoints')
+checkpoint = dict()
+os.makedirs(checkpoint_dir, exist_ok=True)
+
+# training parameters
+batch_size = args.batch_size
+lr = args.lr
+epochs = args.epochs
+sequence_length = args.sequence_length
+lr_decay_factor = args.lr_decay_factor
+split_mode = args.split_mode
+# not exposed
+adam_betas = [0.9, 0.99]
+adam_eps = 1e-8
+
+checkpoint['batch_size'] = batch_size
+checkpoint['lr'] = lr
+checkpoint['lr_decay_factor'] = lr_decay_factor 
+checkpoint['split_mode'] = split_mode
+checkpoint['epochs'] = epochs
+checkpoint['sequence_length'] = sequence_length
+checkpoint['adam_betas'] = adam_betas
+
+# logging
+log_interval = 10
+
+# device
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+
+# model parameters
+cond_size  = args.cond_size
+cond_size2 = args.cond_size2
+latent_dim = args.latent_dim
+quant_levels = args.quant_levels
+lambda_min = args.lambda_min
+lambda_max = args.lambda_max
+state_dim = args.state_dim
+# not expsed
+num_features = 20
+
+
+# training data
+feature_file = args.features
+
+# model
+checkpoint['model_args']    = (num_features, latent_dim, quant_levels, cond_size, cond_size2)
+checkpoint['model_kwargs']  = {'state_dim': state_dim, 'split_mode' : split_mode, 'pvq_num_pulses': args.pvq_num_pulses, 'state_dropout_rate': args.state_dropout_rate}
+model = RDOVAE(*checkpoint['model_args'], **checkpoint['model_kwargs'])
+
+if type(args.initial_checkpoint) != type(None):
+    checkpoint = torch.load(args.initial_checkpoint, map_location='cpu')
+    model.load_state_dict(checkpoint['state_dict'], strict=False)
+
+checkpoint['state_dict']    = model.state_dict()
+
+if args.train_decoder_only:
+    if args.initial_checkpoint is None:
+        print("warning: training decoder only without providing initial checkpoint")
+        
+    for p in model.core_encoder.module.parameters():
+        p.requires_grad = False
+        
+    for p in model.statistical_model.parameters():
+        p.requires_grad = False
+
+# dataloader
+checkpoint['dataset_args'] = (feature_file, sequence_length, num_features, 36)
+checkpoint['dataset_kwargs'] = {'lambda_min': lambda_min, 'lambda_max': lambda_max, 'enc_stride': model.enc_stride, 'quant_levels': quant_levels}
+dataset = RDOVAEDataset(*checkpoint['dataset_args'], **checkpoint['dataset_kwargs'])
+dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=4)
+
+
+
+# optimizer
+params = [p for p in model.parameters() if p.requires_grad]
+optimizer = torch.optim.Adam(params, lr=lr, betas=adam_betas, eps=adam_eps)
+
+
+# learning rate scheduler
+scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lambda x : 1 / (1 + lr_decay_factor * x))
+
+if __name__ == '__main__':
+
+    # push model to device
+    model.to(device)
+
+    # training loop
+
+    for epoch in range(1, epochs + 1):
+
+        print(f"training epoch {epoch}...")
+
+        # running stats
+        running_rate_loss       = 0
+        running_soft_dist_loss  = 0
+        running_hard_dist_loss  = 0
+        running_hard_rate_loss  = 0
+        running_soft_rate_loss  = 0
+        running_total_loss      = 0
+        running_rate_metric     = 0
+        previous_total_loss     = 0
+        running_first_frame_loss = 0
+
+        with tqdm.tqdm(dataloader, unit='batch') as tepoch:
+            for i, (features, rate_lambda, q_ids) in enumerate(tepoch):
+
+                # zero out gradients
+                optimizer.zero_grad()
+                
+                # push inputs to device
+                features    = features.to(device)
+                q_ids       = q_ids.to(device)
+                rate_lambda = rate_lambda.to(device)
+
+                
+                rate_lambda_upsamp = torch.repeat_interleave(rate_lambda, 2, 1)
+                
+                # run model
+                model_output = model(features, q_ids)
+
+                # collect outputs
+                z                   = model_output['z']
+                outputs_hard_quant  = model_output['outputs_hard_quant']
+                outputs_soft_quant  = model_output['outputs_soft_quant']
+                statistical_model   = model_output['statistical_model']
+
+                # rate loss
+                hard_rate = hard_rate_estimate(z, statistical_model['r_hard'], statistical_model['theta_hard'], reduce=False)
+                soft_rate = soft_rate_estimate(z, statistical_model['r_soft'], reduce=False)
+                soft_rate_loss = torch.mean(torch.sqrt(rate_lambda) * soft_rate)
+                hard_rate_loss = torch.mean(torch.sqrt(rate_lambda) * hard_rate)
+                rate_loss = (soft_rate_loss + 0.1 * hard_rate_loss)
+                hard_rate_metric = torch.mean(hard_rate)
+
+                ## distortion losses
+
+                # hard quantized decoder input
+                distortion_loss_hard_quant = torch.zeros_like(rate_loss)
+                for dec_features, start, stop in outputs_hard_quant:
+                    distortion_loss_hard_quant += distortion_loss(features[..., start : stop, :], dec_features, rate_lambda_upsamp[..., start : stop]) / len(outputs_hard_quant)
+
+                first_frame_loss = torch.zeros_like(rate_loss)
+                for dec_features, start, stop in outputs_hard_quant:
+                    first_frame_loss += distortion_loss(features[..., stop-4 : stop, :], dec_features[..., -4:, :], rate_lambda_upsamp[..., stop - 4 : stop]) / len(outputs_hard_quant)
+
+                # soft quantized decoder input
+                distortion_loss_soft_quant = torch.zeros_like(rate_loss)
+                for dec_features, start, stop in outputs_soft_quant:
+                    distortion_loss_soft_quant += distortion_loss(features[..., start : stop, :], dec_features, rate_lambda_upsamp[..., start : stop]) / len(outputs_soft_quant)
+
+                # total loss
+                total_loss = rate_loss + (distortion_loss_hard_quant + distortion_loss_soft_quant) / 2
+                
+                if args.enable_first_frame_loss:
+                    total_loss = total_loss + 0.5 * torch.relu(first_frame_loss - distortion_loss_hard_quant)
+                
+
+                total_loss.backward()
+                
+                optimizer.step()
+                
+                model.clip_weights()
+                
+                scheduler.step()
+
+                # collect running stats
+                running_hard_dist_loss  += float(distortion_loss_hard_quant.detach().cpu())
+                running_soft_dist_loss  += float(distortion_loss_soft_quant.detach().cpu())
+                running_rate_loss       += float(rate_loss.detach().cpu())
+                running_rate_metric     += float(hard_rate_metric.detach().cpu())
+                running_total_loss      += float(total_loss.detach().cpu())
+                running_first_frame_loss += float(first_frame_loss.detach().cpu())
+                running_soft_rate_loss += float(soft_rate_loss.detach().cpu())
+                running_hard_rate_loss += float(hard_rate_loss.detach().cpu())
+
+                if (i + 1) % log_interval == 0:
+                    current_loss = (running_total_loss - previous_total_loss) / log_interval
+                    tepoch.set_postfix(
+                        current_loss=current_loss,
+                        total_loss=running_total_loss / (i + 1),
+                        dist_hq=running_hard_dist_loss / (i + 1),
+                        dist_sq=running_soft_dist_loss / (i + 1),
+                        rate_loss=running_rate_loss / (i + 1),
+                        rate=running_rate_metric / (i + 1),
+                        ffloss=running_first_frame_loss / (i + 1),
+                        rateloss_hard=running_hard_rate_loss / (i + 1),
+                        rateloss_soft=running_soft_rate_loss / (i + 1)
+                    )
+                    previous_total_loss = running_total_loss
+
+        # save checkpoint
+        checkpoint_path = os.path.join(checkpoint_dir, f'checkpoint_epoch_{epoch}.pth')
+        checkpoint['state_dict'] = model.state_dict()
+        checkpoint['loss'] = running_total_loss / len(dataloader)
+        checkpoint['epoch'] = epoch
+        torch.save(checkpoint, checkpoint_path)
--- /dev/null
+++ b/dnn/training_tf2/decode_rdovae.py
@@ -1,0 +1,111 @@
+#!/usr/bin/python3
+'''Copyright (c) 2021-2022 Amazon
+   Copyright (c) 2018-2019 Mozilla
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+# Train an LPCNet model
+
+import argparse
+#from plc_loader import PLCLoader
+
+parser = argparse.ArgumentParser(description='Train a PLC model')
+
+parser.add_argument('bits', metavar='<bits file>', help='binary features file (int16)')
+parser.add_argument('output', metavar='<output>', help='output features')
+parser.add_argument('--model', metavar='<model>', default='rdovae', help='PLC model python definition (without .py)')
+group1 = parser.add_mutually_exclusive_group()
+group1.add_argument('--weights', metavar='<input weights>', help='model weights')
+parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')
+parser.add_argument('--batch-size', metavar='<batch size>', default=1, type=int, help='batch size to use (default 128)')
+parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')
+
+
+args = parser.parse_args()
+
+import importlib
+rdovae = importlib.import_module(args.model)
+
+import sys
+import numpy as np
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
+import tensorflow.keras.backend as K
+import h5py
+
+import tensorflow as tf
+from rdovae import pvq_quantize
+from rdovae import apply_dead_zone
+
+# Try reducing batch_size if you run out of memory on your GPU
+batch_size = args.batch_size
+
+model, encoder, decoder, qembedding = rdovae.new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=batch_size, cond_size=args.cond_size)
+model.load_weights(args.weights)
+
+lpc_order = 16
+nbits=80
+
+
+bits_file = args.bits
+sequence_size = args.seq_length
+
+# u for unquantised, load 16 bit PCM samples and convert to mu-law
+
+
+bits = np.memmap(bits_file + "-syms.f32", dtype='float32', mode='r')
+nb_sequences = len(bits)//(40*sequence_size)//batch_size*batch_size
+bits = bits[:nb_sequences*sequence_size*40]
+
+bits = np.reshape(bits, (nb_sequences, sequence_size//2, 20*4))
+print(bits.shape)
+
+lambda_val = 0.001 * np.ones((nb_sequences, sequence_size//2, 1))
+quant_id = np.round(3.8*np.log(lambda_val/.0002)).astype('int16')
+quant_id = quant_id[:,:,0]
+quant_embed = qembedding(quant_id)
+quant_scale = tf.math.softplus(quant_embed[:,:,:nbits])
+dead_zone = tf.math.softplus(quant_embed[:, :, nbits : 2 * nbits])
+
+bits = bits*quant_scale
+bits = np.round(apply_dead_zone([bits, dead_zone]).numpy())
+bits = bits/quant_scale
+
+
+state = np.memmap(bits_file + "-state.f32", dtype='float32', mode='r')
+
+state = np.reshape(state, (nb_sequences, sequence_size//2, 24))
+state = state[:,-1,:]
+state = pvq_quantize(state, 82)
+#state = state/(1e-15+tf.norm(state, axis=-1,keepdims=True))
+
+print("shapes are:")
+print(bits.shape)
+print(state.shape)
+
+bits = bits[:,1::2,:]
+features = decoder.predict([bits, state], batch_size=batch_size)
+
+features.astype('float32').tofile(args.output)
--- a/dnn/training_tf2/dump_lpcnet.py
+++ b/dnn/training_tf2/dump_lpcnet.py
@@ -26,6 +26,7 @@
 '''
 
 import os
+import io
 import lpcnet
 import sys
 import numpy as np
@@ -52,11 +53,17 @@
 max_mdense_tmp = 1
 
 def printVector(f, vector, name, dtype='float', dotp=False):
+    global array_list
     if dotp:
         vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
         vector = vector.transpose((2, 0, 3, 1))
     v = np.reshape(vector, (-1));
     #print('static const float ', name, '[', len(v), '] = \n', file=f)
+    if name not in array_list:
+        array_list.append(name)
+    f.write('#ifndef USE_WEIGHTS_FILE\n')
+    f.write('#define WEIGHTS_{}_DEFINED\n'.format(name))
+    f.write('#define WEIGHTS_{}_TYPE WEIGHT_TYPE_{}\n'.format(name, dtype))
     f.write('static const {} {}[{}] = {{\n   '.format(dtype, name, len(v)))
     for i in range(0, len(v)):
         f.write('{}'.format(v[i]))
@@ -69,7 +76,8 @@
         else:
             f.write(" ")
     #print(v, file=f)
-    f.write('\n};\n\n')
+    f.write('\n};\n')
+    f.write('#endif\n\n')
     return;
 
 def printSparseVector(f, A, name, have_diag=True):
@@ -133,11 +141,11 @@
         reset_after = 1
     neurons = weights[0].shape[1]//3
     max_rnn_neurons = max(max_rnn_neurons, neurons)
-    f.write('const SparseGRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_recurrent_weights_diag,\n   {}_recurrent_weights,\n   {}_recurrent_weights_idx,\n   {}, ACTIVATION_{}, {}\n}};\n\n'
-            .format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))
     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
     hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
-    hf.write('extern const SparseGRULayer {};\n\n'.format(name));
+    model_struct.write('  SparseGRULayer {};\n'.format(name));
+    model_init.write('  if (sparse_gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_recurrent_weights_diag", "{}_recurrent_weights", "{}_recurrent_weights_idx",  {}, ACTIVATION_{}, {})) return 1;\n'
+            .format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))
     return True
 
 def dump_grub(self, f, hf, gru_a_size):
@@ -169,9 +177,9 @@
         reset_after = 1
     neurons = weights[0].shape[1]//3
     max_rnn_neurons = max(max_rnn_neurons, neurons)
-    f.write('const GRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_weights,\n   {}_weights_idx,\n   {}_recurrent_weights,\n   {}, {}, ACTIVATION_{}, {}\n}};\n\n'
+    model_struct.write('  GRULayer {};\n'.format(name));
+    model_init.write('  if (gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_weights", "{}_weights_idx", "{}_recurrent_weights", {}, {}, ACTIVATION_{}, {})) return 1;\n'
             .format(name, name, name, name, name, name, gru_a_size, weights[0].shape[1]//3, activation, reset_after))
-    hf.write('extern const GRULayer {};\n\n'.format(name));
     return True
 
 def dump_gru_layer_dummy(self, f, hf):
@@ -186,10 +194,10 @@
 def dump_dense_layer_impl(name, weights, bias, activation, f, hf):
     printVector(f, weights, name + '_weights')
     printVector(f, bias, name + '_bias')
-    f.write('const DenseLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, ACTIVATION_{}\n}};\n\n'
-            .format(name, name, name, weights.shape[0], weights.shape[1], activation))
     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
-    hf.write('extern const DenseLayer {};\n\n'.format(name));
+    model_struct.write('  DenseLayer {};\n'.format(name));
+    model_init.write('  if (dense_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, ACTIVATION_{})) return 1;\n'
+            .format(name, name, name, weights.shape[0], weights.shape[1], activation))
 
 def dump_dense_layer(self, f, hf):
     name = self.name
@@ -211,10 +219,10 @@
     printVector(f, np.transpose(weights[2], (1, 0)), name + '_factor')
     activation = self.activation.__name__.upper()
     max_mdense_tmp = max(max_mdense_tmp, weights[0].shape[0]*weights[0].shape[2])
-    f.write('const MDenseLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}_factor,\n   {}, {}, {}, ACTIVATION_{}\n}};\n\n'
-            .format(name, name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[0]))
-    hf.write('extern const MDenseLayer {};\n\n'.format(name));
+    model_struct.write('  MDenseLayer {};\n'.format(name));
+    model_init.write('  if (mdense_init(&model->{}, arrays, "{}_bias",  "{}_weights",  "{}_factor",  {}, {}, {}, ACTIVATION_{})) return 1;\n'
+            .format(name, name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
     return False
 MDense.dump_layer = dump_mdense_layer
 
@@ -227,12 +235,12 @@
     printVector(f, weights[-1], name + '_bias')
     activation = self.activation.__name__.upper()
     max_conv_inputs = max(max_conv_inputs, weights[0].shape[1]*weights[0].shape[0])
-    f.write('const Conv1DLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, {}, ACTIVATION_{}\n}};\n\n'
-            .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))
     hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))
     hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))
-    hf.write('extern const Conv1DLayer {};\n\n'.format(name));
+    model_struct.write('  Conv1DLayer {};\n'.format(name));
+    model_init.write('  if (conv1d_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, {}, ACTIVATION_{})) return 1;\n'
+            .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
     return True
 Conv1D.dump_layer = dump_conv1d_layer
 
@@ -239,10 +247,10 @@
 
 def dump_embedding_layer_impl(name, weights, f, hf):
     printVector(f, weights, name + '_weights')
-    f.write('const EmbeddingLayer {} = {{\n   {}_weights,\n   {}, {}\n}};\n\n'
-            .format(name, name, weights.shape[0], weights.shape[1]))
     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
-    hf.write('extern const EmbeddingLayer {};\n\n'.format(name));
+    model_struct.write('  EmbeddingLayer {};\n'.format(name));
+    model_init.write('  if (embedding_init(&model->{}, arrays, "{}_weights", {}, {})) return 1;\n'
+            .format(name, name, weights.shape[0], weights.shape[1]))
 
 def dump_embedding_layer(self, f, hf):
     name = self.name
@@ -281,6 +289,12 @@
 
     f = open(cfile, 'w')
     hf = open(hfile, 'w')
+    model_struct = io.StringIO()
+    model_init = io.StringIO()
+    model_struct.write('typedef struct {\n')
+    model_init.write('#ifndef DUMP_BINARY_WEIGHTS\n')
+    model_init.write('int init_lpcnet_model(LPCNetModel *model, const WeightArray *arrays) {\n')
+    array_list = []
 
     f.write('/*This file is automatically generated from a Keras model*/\n')
     f.write('/*based on model {}*/\n\n'.format(sys.argv[1]))
@@ -326,13 +340,13 @@
     W = model.get_layer('gru_a').get_weights()[0][3*embed_size:,:]
     #FIXME: dump only half the biases
     b = model.get_layer('gru_a').get_weights()[2]
-    dump_dense_layer_impl('gru_a_dense_feature', W, b, 'LINEAR', f, hf)
+    dump_dense_layer_impl('gru_a_dense_feature', W, b[:len(b)//2], 'LINEAR', f, hf)
 
     W = model.get_layer('gru_b').get_weights()[0][model.rnn_units1:,:]
     b = model.get_layer('gru_b').get_weights()[2]
     # Set biases to zero because they'll be included in the GRU input part
     # (we need regular and SU biases)
-    dump_dense_layer_impl('gru_b_dense_feature', W, 0*b, 'LINEAR', f, hf)
+    dump_dense_layer_impl('gru_b_dense_feature', W, 0*b[:len(b)//2], 'LINEAR', f, hf)
     dump_grub(model.get_layer('gru_b'), f, hf, model.rnn_units1)
 
     layer_list = []
@@ -342,6 +356,19 @@
 
     dump_sparse_gru(model.get_layer('gru_a'), f, hf)
 
+    f.write('#ifndef USE_WEIGHTS_FILE\n')
+    f.write('const WeightArray lpcnet_arrays[] = {\n')
+    for name in array_list:
+        f.write('#ifdef WEIGHTS_{}_DEFINED\n'.format(name))
+        f.write('  {{"{}", WEIGHTS_{}_TYPE, sizeof({}), {}}},\n'.format(name, name, name, name))
+        f.write('#endif\n')
+    f.write('  {NULL, 0, 0, NULL}\n};\n')
+    f.write('#endif\n')
+
+    model_init.write('  return 0;\n}\n')
+    model_init.write('#endif\n')
+    f.write(model_init.getvalue())
+
     hf.write('#define MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons))
     hf.write('#define MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs))
     hf.write('#define MAX_MDENSE_TMP {}\n\n'.format(max_mdense_tmp))
@@ -350,8 +377,11 @@
     hf.write('typedef struct {\n')
     for i, name in enumerate(layer_list):
         hf.write('  float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper())) 
-    hf.write('} NNetState;\n')
+    hf.write('} NNetState;\n\n')
 
+    model_struct.write('} LPCNetModel;\n\n')
+    hf.write(model_struct.getvalue())
+    hf.write('int init_lpcnet_model(LPCNetModel *model, const WeightArray *arrays);\n\n')
     hf.write('\n\n#endif\n')
 
     f.close()
--- a/dnn/training_tf2/dump_plc.py
+++ b/dnn/training_tf2/dump_plc.py
@@ -27,6 +27,7 @@
 '''
 
 import lpcnet_plc
+import io
 import sys
 import numpy as np
 from tensorflow.keras.optimizers import Adam
@@ -41,11 +42,17 @@
 max_conv_inputs = 1
 
 def printVector(f, vector, name, dtype='float', dotp=False):
+    global array_list
     if dotp:
         vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
         vector = vector.transpose((2, 0, 3, 1))
     v = np.reshape(vector, (-1));
     #print('static const float ', name, '[', len(v), '] = \n', file=f)
+    if name not in array_list:
+        array_list.append(name)
+    f.write('#ifndef USE_WEIGHTS_FILE\n')
+    f.write('#define WEIGHTS_{}_DEFINED\n'.format(name))
+    f.write('#define WEIGHTS_{}_TYPE WEIGHT_TYPE_{}\n'.format(name, dtype))
     f.write('static const {} {}[{}] = {{\n   '.format(dtype, name, len(v)))
     for i in range(0, len(v)):
         f.write('{}'.format(v[i]))
@@ -58,7 +65,8 @@
         else:
             f.write(" ")
     #print(v, file=f)
-    f.write('\n};\n\n')
+    f.write('\n};\n')
+    f.write('#endif\n\n')
     return;
 
 def printSparseVector(f, A, name, have_diag=True):
@@ -122,11 +130,11 @@
         reset_after = 1
     neurons = weights[0].shape[1]//3
     max_rnn_neurons = max(max_rnn_neurons, neurons)
-    f.write('const SparseGRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_recurrent_weights_diag,\n   {}_recurrent_weights,\n   {}_recurrent_weights_idx,\n   {}, ACTIVATION_{}, {}\n}};\n\n'
-            .format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))
     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
     hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
-    hf.write('extern const SparseGRULayer {};\n\n'.format(name));
+    model_struct.write('  SparseGRULayer {};\n'.format(name));
+    model_init.write('  if (sparse_gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_recurrent_weights_diag", "{}_recurrent_weights", "{}_recurrent_weights_idx",  {}, ACTIVATION_{}, {})) return 1;\n'
+            .format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))
     return True
 
 def dump_gru_layer(self, f, hf):
@@ -158,11 +166,11 @@
         reset_after = 1
     neurons = weights[0].shape[1]//3
     max_rnn_neurons = max(max_rnn_neurons, neurons)
-    f.write('const GRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_weights,\n   {}_weights_idx,\n   {}_recurrent_weights,\n   {}, {}, ACTIVATION_{}, {}\n}};\n\n'
-            .format(name, name, name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))
     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
     hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
-    hf.write('extern const GRULayer {};\n\n'.format(name));
+    model_struct.write('  GRULayer {};\n'.format(name));
+    model_init.write('  if (gru_init(&model->{}, arrays, "{}_bias", "{}_subias", "{}_weights", "{}_weights_idx", "{}_recurrent_weights", {}, {}, ACTIVATION_{}, {})) return 1;\n'
+             .format(name, name, name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))
     return True
 GRU.dump_layer = dump_gru_layer
 
@@ -178,10 +186,10 @@
 def dump_dense_layer_impl(name, weights, bias, activation, f, hf):
     printVector(f, weights, name + '_weights')
     printVector(f, bias, name + '_bias')
-    f.write('const DenseLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, ACTIVATION_{}\n}};\n\n'
-            .format(name, name, name, weights.shape[0], weights.shape[1], activation))
     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
-    hf.write('extern const DenseLayer {};\n\n'.format(name));
+    model_struct.write('  DenseLayer {};\n'.format(name));
+    model_init.write('  if (dense_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, ACTIVATION_{})) return 1;\n'
+            .format(name, name, name, weights.shape[0], weights.shape[1], activation))
 
 def dump_dense_layer(self, f, hf):
     name = self.name
@@ -202,12 +210,12 @@
     printVector(f, weights[-1], name + '_bias')
     activation = self.activation.__name__.upper()
     max_conv_inputs = max(max_conv_inputs, weights[0].shape[1]*weights[0].shape[0])
-    f.write('const Conv1DLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, {}, ACTIVATION_{}\n}};\n\n'
-            .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
     hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))
     hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))
     hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))
-    hf.write('extern const Conv1DLayer {};\n\n'.format(name));
+    model_struct.write('  Conv1DLayer {};\n'.format(name));
+    model_init.write('  if (conv1d_init(&model->{}, arrays, "{}_bias", "{}_weights", {}, {}, {}, ACTIVATION_{})) return 1;\n'
+            .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
     return True
 Conv1D.dump_layer = dump_conv1d_layer
 
@@ -235,6 +243,12 @@
 
 f = open(cfile, 'w')
 hf = open(hfile, 'w')
+model_struct = io.StringIO()
+model_init = io.StringIO()
+model_struct.write('typedef struct {\n')
+model_init.write('#ifndef DUMP_BINARY_WEIGHTS\n')
+model_init.write('int init_plc_model(PLCModel *model, const WeightArray *arrays) {\n')
+array_list = []
 
 
 f.write('/*This file is automatically generated from a Keras model*/\n')
@@ -250,7 +264,20 @@
         layer_list.append(layer.name)
 
 #dump_sparse_gru(model.get_layer('gru_a'), f, hf)
+f.write('#ifndef USE_WEIGHTS_FILE\n')
+f.write('const WeightArray lpcnet_plc_arrays[] = {\n')
+for name in array_list:
+    f.write('#ifdef WEIGHTS_{}_DEFINED\n'.format(name))
+    f.write('  {{"{}", WEIGHTS_{}_TYPE, sizeof({}), {}}},\n'.format(name, name, name, name))
+    f.write('#endif\n')
+f.write('  {NULL, 0, 0, NULL}\n};\n')
+f.write('#endif\n')
 
+model_init.write('  return 0;\n}\n')
+model_init.write('#endif\n')
+f.write(model_init.getvalue())
+
+
 hf.write('#define PLC_MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons))
 #hf.write('#define PLC_MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs))
 
@@ -257,7 +284,11 @@
 hf.write('typedef struct {\n')
 for i, name in enumerate(layer_list):
     hf.write('  float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper())) 
-hf.write('} PLCNetState;\n')
+hf.write('} PLCNetState;\n\n')
+
+model_struct.write('} PLCModel;\n\n')
+hf.write(model_struct.getvalue())
+hf.write('int init_plc_model(PLCModel *model, const WeightArray *arrays);\n\n')
 
 hf.write('\n\n#endif\n')
 
--- /dev/null
+++ b/dnn/training_tf2/dump_rdovae.py
@@ -1,0 +1,306 @@
+"""
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+
+import argparse
+from ftplib import parse150
+import os
+
+os.environ['CUDA_VISIBLE_DEVICES'] = ""
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('weights', metavar="<weight file>", type=str, help='model weight file in hdf5 format')
+parser.add_argument('--cond-size', type=int, help="conditioning size (default: 256)", default=256)
+parser.add_argument('--latent-dim', type=int, help="dimension of latent space (default: 80)", default=80)
+parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 16)", default=16)
+
+args = parser.parse_args()
+
+# now import the heavy stuff
+import tensorflow as tf
+import numpy as np
+from keraslayerdump import dump_conv1d_layer, dump_dense_layer, dump_gru_layer, printVector
+from rdovae import new_rdovae_model
+
+def start_header(header_fid, header_name):
+    header_guard = os.path.basename(header_name)[:-2].upper() + "_H"
+    header_fid.write(
+f"""
+#ifndef {header_guard}
+#define {header_guard}
+
+"""
+    )
+
+def finish_header(header_fid):
+    header_fid.write(
+"""
+#endif
+
+"""
+    )
+
+def start_source(source_fid, header_name, weight_file):
+    source_fid.write(
+f"""
+/* this source file was automatically generated from weight file {weight_file} */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "{header_name}"
+
+"""
+    )
+
+def finish_source(source_fid):
+    pass
+
+
+def dump_statistical_model(qembedding, f, fh):
+    w = qembedding.weights[0].numpy()
+    levels, dim = w.shape
+    N = dim // 6
+
+    print("dumping statistical model")
+    quant_scales    = tf.math.softplus(w[:, : N]).numpy()
+    dead_zone       = 0.05 * tf.math.softplus(w[:, N : 2 * N]).numpy()
+    r               = tf.math.sigmoid(w[:, 5 * N : 6 * N]).numpy()
+    p0              = tf.math.sigmoid(w[:, 4 * N : 5 * N]).numpy()
+    p0              = 1 - r ** (0.5 + 0.5 * p0)
+
+    quant_scales_q8 = np.round(quant_scales * 2**8).astype(np.uint16)
+    dead_zone_q10   = np.round(dead_zone * 2**10).astype(np.uint16)
+    r_q15           = np.round(r * 2**15).astype(np.uint16)
+    p0_q15          = np.round(p0 * 2**15).astype(np.uint16)
+
+    printVector(f, quant_scales_q8, 'dred_quant_scales_q8', dtype='opus_uint16', static=False)
+    printVector(f, dead_zone_q10, 'dred_dead_zone_q10', dtype='opus_uint16', static=False)
+    printVector(f, r_q15, 'dred_r_q15', dtype='opus_uint16', static=False)
+    printVector(f, p0_q15, 'dred_p0_q15', dtype='opus_uint16', static=False)
+
+    fh.write(
+f"""
+extern const opus_uint16 dred_quant_scales_q8[{levels * N}];
+extern const opus_uint16 dred_dead_zone_q10[{levels * N}];
+extern const opus_uint16 dred_r_q15[{levels * N}];
+extern const opus_uint16 dred_p0_q15[{levels * N}];
+
+"""
+    )
+
+if __name__ == "__main__":
+
+    model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size, nb_quant=args.quant_levels)
+    model.load_weights(args.weights)
+
+
+
+
+    # encoder
+    encoder_dense_names = [
+        'enc_dense1',
+        'enc_dense3',
+        'enc_dense5',
+        'enc_dense7',
+        'enc_dense8',
+        'gdense1',
+        'gdense2'
+    ]
+
+    encoder_gru_names = [
+        'enc_dense2',
+        'enc_dense4',
+        'enc_dense6'
+    ]
+
+    encoder_conv1d_names = [
+        'bits_dense'
+    ]
+
+    source_fid = open("dred_rdovae_enc_data.c", 'w')
+    header_fid = open("dred_rdovae_enc_data.h", 'w')
+
+    start_header(header_fid, "dred_rdovae_enc_data.h")
+    start_source(source_fid, "dred_rdovae_enc_data.h", os.path.basename(args.weights))
+
+    header_fid.write(
+f"""
+#include "dred_rdovae_constants.h"
+
+#include "nnet.h"
+"""
+    )
+
+    # dump GRUs
+    max_rnn_neurons_enc = max(
+        [
+            dump_gru_layer(encoder.get_layer(name), source_fid, header_fid, dotp=True, sparse=True)
+            for name in encoder_gru_names
+        ]
+    )
+
+    # dump conv layers
+    max_conv_inputs = max(
+        [
+            dump_conv1d_layer(encoder.get_layer(name), source_fid, header_fid)
+            for name in encoder_conv1d_names
+        ] 
+    )
+
+    # dump Dense layers
+    for name in encoder_dense_names:
+        layer = encoder.get_layer(name)
+        dump_dense_layer(layer, source_fid, header_fid)
+
+    # some global constants
+    header_fid.write(
+f"""
+
+#define DRED_ENC_MAX_RNN_NEURONS {max_rnn_neurons_enc}
+
+#define DRED_ENC_MAX_CONV_INPUTS {max_conv_inputs}
+
+"""
+    )
+
+    finish_header(header_fid)
+    finish_source(source_fid)
+
+    header_fid.close()
+    source_fid.close()
+
+    # statistical model
+    source_fid = open("dred_rdovae_stats_data.c", 'w')
+    header_fid = open("dred_rdovae_stats_data.h", 'w')
+
+    start_header(header_fid, "dred_rdovae_stats_data.h")
+    start_source(source_fid, "dred_rdovae_stats_data.h", os.path.basename(args.weights))
+
+    header_fid.write(
+"""
+
+#include "opus_types.h"
+
+"""
+    )
+
+    dump_statistical_model(qembedding, source_fid, header_fid)
+
+    finish_header(header_fid)
+    finish_source(source_fid)
+
+    header_fid.close()
+    source_fid.close()
+
+    # decoder
+    decoder_dense_names = [
+        'state1',
+        'state2',
+        'state3',
+        'dec_dense1',
+        'dec_dense3',
+        'dec_dense5',
+        'dec_dense7',
+        'dec_dense8',
+        'dec_final'
+    ]   
+
+    decoder_gru_names = [
+        'dec_dense2',
+        'dec_dense4',
+        'dec_dense6'
+    ] 
+
+    source_fid = open("dred_rdovae_dec_data.c", 'w')
+    header_fid = open("dred_rdovae_dec_data.h", 'w')
+
+    start_header(header_fid, "dred_rdovae_dec_data.h")
+    start_source(source_fid, "dred_rdovae_dec_data.h", os.path.basename(args.weights))
+
+    header_fid.write(
+f"""
+#include "dred_rdovae_constants.h"
+
+#include "nnet.h"
+"""
+    )
+
+
+    # dump GRUs
+    max_rnn_neurons_dec = max(
+        [
+            dump_gru_layer(decoder.get_layer(name), source_fid, header_fid, dotp=True, sparse=True)
+            for name in decoder_gru_names
+        ]
+    )
+
+    # dump Dense layers
+    for name in decoder_dense_names:
+        layer = decoder.get_layer(name)
+        dump_dense_layer(layer, source_fid, header_fid)
+
+    # some global constants
+    header_fid.write(
+f"""
+
+#define DRED_DEC_MAX_RNN_NEURONS {max_rnn_neurons_dec}
+
+"""
+    )
+
+    finish_header(header_fid)
+    finish_source(source_fid)
+
+    header_fid.close()
+    source_fid.close()
+
+    # common constants
+    header_fid = open("dred_rdovae_constants.h", 'w')
+    start_header(header_fid, "dred_rdovae_constants.h")
+
+    header_fid.write(
+f"""
+#define DRED_NUM_FEATURES 20
+
+#define DRED_LATENT_DIM {args.latent_dim}
+
+#define DRED_STATE_DIM {24}
+
+#define DRED_NUM_QUANTIZATION_LEVELS {qembedding.weights[0].shape[0]}
+
+#define DRED_MAX_RNN_NEURONS {max(max_rnn_neurons_enc, max_rnn_neurons_dec)}
+
+#define DRED_MAX_CONV_INPUTS {max_conv_inputs}
+"""
+    )
+
+    finish_header(header_fid)
\ No newline at end of file
--- /dev/null
+++ b/dnn/training_tf2/encode_rdovae.py
@@ -1,0 +1,125 @@
+#!/usr/bin/python3
+'''Copyright (c) 2021-2022 Amazon
+   Copyright (c) 2018-2019 Mozilla
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+# Train an LPCNet model
+
+import argparse
+#from plc_loader import PLCLoader
+
+parser = argparse.ArgumentParser(description='Train a PLC model')
+
+parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
+parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')
+parser.add_argument('--model', metavar='<model>', default='rdovae', help='PLC model python definition (without .py)')
+group1 = parser.add_mutually_exclusive_group()
+group1.add_argument('--weights', metavar='<input weights>', help='model weights')
+parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')
+parser.add_argument('--batch-size', metavar='<batch size>', default=1, type=int, help='batch size to use (default 128)')
+parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')
+
+
+args = parser.parse_args()
+
+import importlib
+rdovae = importlib.import_module(args.model)
+
+from rdovae import apply_dead_zone
+
+import sys
+import numpy as np
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
+import tensorflow.keras.backend as K
+import h5py
+
+import tensorflow as tf
+from rdovae import pvq_quantize
+
+# Try reducing batch_size if you run out of memory on your GPU
+batch_size = args.batch_size
+
+model, encoder, decoder, qembedding = rdovae.new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=batch_size, cond_size=args.cond_size)
+model.load_weights(args.weights)
+
+lpc_order = 16
+
+feature_file = args.features
+nb_features = model.nb_used_features + lpc_order
+nb_used_features = model.nb_used_features
+sequence_size = args.seq_length
+
+# u for unquantised, load 16 bit PCM samples and convert to mu-law
+
+
+features = np.memmap(feature_file, dtype='float32', mode='r')
+nb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size
+features = features[:nb_sequences*sequence_size*nb_features]
+
+features = np.reshape(features, (nb_sequences, sequence_size, nb_features))
+print(features.shape)
+features = features[:, :, :nb_used_features]
+#features = np.random.randn(73600, 1000, 17)
+
+
+bits, gru_state_dec = encoder.predict([features], batch_size=batch_size)
+(gru_state_dec).astype('float32').tofile(args.output + "-state.f32")
+
+
+#dist = rdovae.feat_dist_loss(features, quant_out)
+#rate = rdovae.sq1_rate_loss(features, model_bits)
+#rate2 = rdovae.sq_rate_metric(features, model_bits)
+#print(dist, rate, rate2)
+
+print("shapes are:")
+print(bits.shape)
+print(gru_state_dec.shape)
+
+features.astype('float32').tofile(args.output + "-input.f32")
+#quant_out.astype('float32').tofile(args.output + "-enc_dec.f32")
+nbits=80
+bits.astype('float32').tofile(args.output + "-syms.f32")
+
+lambda_val = 0.0002 * np.ones((nb_sequences, sequence_size//2, 1))
+quant_id = np.round(3.8*np.log(lambda_val/.0002)).astype('int16')
+quant_id = quant_id[:,:,0]
+quant_embed = qembedding(quant_id)
+quant_scale = tf.math.softplus(quant_embed[:,:,:nbits])
+dead_zone = tf.math.softplus(quant_embed[:, :, nbits : 2 * nbits])
+
+bits = bits*quant_scale
+bits = np.round(apply_dead_zone([bits, dead_zone]).numpy())
+bits = bits/quant_scale
+
+gru_state_dec = pvq_quantize(gru_state_dec, 82)
+#gru_state_dec = gru_state_dec/(1e-15+tf.norm(gru_state_dec, axis=-1,keepdims=True))
+gru_state_dec = gru_state_dec[:,-1,:]
+dec_out = decoder([bits[:,1::2,:], gru_state_dec])
+
+print(dec_out.shape)
+
+dec_out.numpy().astype('float32').tofile(args.output + "-quant_out.f32")
--- /dev/null
+++ b/dnn/training_tf2/fec_encoder.py
@@ -1,0 +1,257 @@
+"""
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe and Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+import os
+import subprocess
+import argparse
+
+
+import numpy as np
+from scipy.io import wavfile
+import tensorflow as tf
+
+from rdovae import new_rdovae_model, pvq_quantize, apply_dead_zone, sq_rate_metric
+from fec_packets import write_fec_packets, read_fec_packets
+
+
+debug = False
+
+if debug:
+    args = type('dummy', (object,),
+    {
+        'input' : 'item1.wav',
+        'weights' : 'testout/rdovae_alignment_fix_1024_120.h5',
+        'enc_lambda' : 0.0007,
+        'output' : "test_0007.fec",
+        'cond_size' : 1024,
+        'num_redundancy_frames' : 64,
+        'extra_delay' : 0,
+        'dump_data' : './dump_data'
+    })()
+    os.environ['CUDA_VISIBLE_DEVICES']=""
+else:
+    parser = argparse.ArgumentParser(description='Encode redundancy for Opus neural FEC. Designed for use with voip application and 20ms frames')
+
+    parser.add_argument('input', metavar='<input signal>', help='audio input (.wav or .raw or .pcm as int16)')
+    parser.add_argument('weights', metavar='<weights>', help='trained model file (.h5)')
+#    parser.add_argument('enc_lambda', metavar='<lambda>', type=float, help='lambda for controlling encoder rate')
+    parser.add_argument('output', type=str, help='output file (will be extended with .fec)')
+
+    parser.add_argument('--dump-data', type=str, default='./dump_data', help='path to dump data executable (default ./dump_data)')
+    parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')
+    parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 40)", default=40)
+    parser.add_argument('--num-redundancy-frames', default=64, type=int, help='number of redundancy frames (20ms) per packet (default 64)')
+    parser.add_argument('--extra-delay', default=0, type=int, help="last features in packet are calculated with the decoder aligned samples, use this option to add extra delay (in samples at 16kHz)")
+    parser.add_argument('--lossfile', type=str, help='file containing loss trace (0 for frame received, 1 for lost)')
+
+    parser.add_argument('--debug-output', action='store_true', help='if set, differently assembled features are written to disk')
+
+    args = parser.parse_args()
+
+model, encoder, decoder, qembedding = new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=1, nb_quant=args.quant_levels, cond_size=args.cond_size)
+model.load_weights(args.weights)
+
+lpc_order = 16
+
+## prepare input signal
+# SILK frame size is 20ms and LPCNet subframes are 10ms
+subframe_size = 160
+frame_size = 2 * subframe_size
+
+# 91 samples delay to align with SILK decoded frames
+silk_delay = 91
+
+# prepend zeros to have enough history to produce the first package
+zero_history = (args.num_redundancy_frames - 1) * frame_size
+
+# dump data has a (feature) delay of 10ms
+dump_data_delay = 160
+
+total_delay = silk_delay + zero_history + args.extra_delay - dump_data_delay
+
+# load signal
+if args.input.endswith('.raw') or args.input.endswith('.pcm') or args.input.endswith('.sw'):
+    signal = np.fromfile(args.input, dtype='int16')
+    
+elif args.input.endswith('.wav'):
+    fs, signal = wavfile.read(args.input)
+else:
+    raise ValueError(f'unknown input signal format: {args.input}')
+
+# fill up last frame with zeros
+padded_signal_length = len(signal) + total_delay
+tail = padded_signal_length % frame_size
+right_padding = (frame_size - tail) % frame_size
+    
+signal = np.concatenate((np.zeros(total_delay, dtype=np.int16), signal, np.zeros(right_padding, dtype=np.int16)))
+
+padded_signal_file  = os.path.splitext(args.input)[0] + '_padded.raw'
+signal.tofile(padded_signal_file)
+
+# write signal and call dump_data to create features
+
+feature_file = os.path.splitext(args.input)[0] + '_features.f32'
+command = f"{args.dump_data} -test {padded_signal_file} {feature_file}"
+r = subprocess.run(command, shell=True)
+if r.returncode != 0:
+    raise RuntimeError(f"command '{command}' failed with exit code {r.returncode}")
+
+# load features
+nb_features = model.nb_used_features + lpc_order
+nb_used_features = model.nb_used_features
+
+# load features
+features = np.fromfile(feature_file, dtype='float32')
+num_subframes = len(features) // nb_features
+num_subframes = 2 * (num_subframes // 2)
+num_frames = num_subframes // 2
+
+features = np.reshape(features, (1, -1, nb_features))
+features = features[:, :, :nb_used_features]
+features = features[:, :num_subframes, :]
+
+#variable quantizer depending on the delay
+q0 = 3
+q1 = 15
+quant_id = np.round(q1 + (q0-q1)*np.arange(args.num_redundancy_frames//2)/args.num_redundancy_frames).astype('int16')
+#print(quant_id)
+
+quant_embed = qembedding(quant_id)
+
+# run encoder
+print("running fec encoder...")
+symbols, gru_state_dec = encoder.predict(features)
+
+# apply quantization
+nsymbols = 80
+quant_scale = tf.math.softplus(quant_embed[:, :nsymbols]).numpy()
+dead_zone = tf.math.softplus(quant_embed[:, nsymbols : 2 * nsymbols]).numpy()
+#symbols = apply_dead_zone([symbols, dead_zone]).numpy()
+#qsymbols = np.round(symbols)
+quant_gru_state_dec = pvq_quantize(gru_state_dec, 82)
+
+# rate estimate
+hard_distr_embed = tf.math.sigmoid(quant_embed[:, 4 * nsymbols : ]).numpy()
+#rate_input = np.concatenate((qsymbols, hard_distr_embed, enc_lambda), axis=-1)
+#rates = sq_rate_metric(None, rate_input, reduce=False).numpy()
+
+# run decoder
+input_length = args.num_redundancy_frames // 2
+offset = args.num_redundancy_frames - 1
+
+packets = []
+packet_sizes = []
+
+sym_batch = np.zeros((num_frames-offset, args.num_redundancy_frames//2, nsymbols), dtype='float32')
+quant_state = quant_gru_state_dec[0, offset:num_frames, :]
+#pack symbols for batch processing
+for i in range(offset, num_frames):
+    sym_batch[i-offset, :, :] = symbols[0, i - 2 * input_length + 2 : i + 1 : 2, :]
+
+#quantize symbols
+sym_batch = sym_batch * quant_scale
+sym_batch = apply_dead_zone([sym_batch, dead_zone]).numpy()
+sym_batch = np.round(sym_batch)
+
+hard_distr_embed = np.broadcast_to(hard_distr_embed, (sym_batch.shape[0], sym_batch.shape[1], 2*sym_batch.shape[2]))
+fake_lambda = np.ones((sym_batch.shape[0], sym_batch.shape[1], 1), dtype='float32')
+rate_input = np.concatenate((sym_batch, hard_distr_embed, fake_lambda), axis=-1)
+rates = sq_rate_metric(None, rate_input, reduce=False).numpy()
+#print(rates.shape)
+print("average rate = ", np.mean(rates[args.num_redundancy_frames:,:]))
+
+#sym_batch.tofile('qsyms.f32')
+
+sym_batch = sym_batch / quant_scale
+#print(sym_batch.shape, quant_state.shape)
+#features = decoder.predict([sym_batch, quant_state])
+features = decoder([sym_batch, quant_state])
+
+#for i in range(offset, num_frames):
+#    print(f"processing frame {i - offset}...")
+#    features = decoder.predict([qsymbols[:, i - 2 * input_length + 2 : i + 1 : 2, :], quant_embed_dec[:, i - 2 * input_length + 2 : i + 1 : 2, :], quant_gru_state_dec[:, i, :]])
+#    packets.append(features)
+#    packet_size = 8 * int((np.sum(rates[:, i - 2 * input_length + 2 : i + 1 : 2]) + 7) / 8) + 64
+#    packet_sizes.append(packet_size)
+
+
+# write packets
+packet_file = args.output + '.fec' if not args.output.endswith('.fec') else args.output
+#write_fec_packets(packet_file, packets, packet_sizes)
+
+
+#print(f"average redundancy rate: {int(round(sum(packet_sizes) / len(packet_sizes) * 50 / 1000))} kbps")
+
+if args.lossfile != None:
+    loss = np.loadtxt(args.lossfile, dtype='int16')
+    fec_out = np.zeros((features.shape[0]*2, features.shape[-1]), dtype='float32')
+    foffset = -2
+    ptr = 0;
+    count = 2;
+    for i in range(features.shape[0]):
+        if (loss[i] == 0) or (i == features.shape[0]-1):
+            fec_out[ptr:ptr+count,:] = features[i, foffset:, :]
+            #print("filled ", count)
+            foffset = -2
+            ptr = ptr+count
+            count = 2
+        else:
+            count = count + 2
+            foffset = foffset - 2
+
+    fec_out_full = np.zeros((fec_out.shape[0], nb_features), dtype=np.float32)
+    fec_out_full[:, :nb_used_features] = fec_out
+
+    fec_out_full.tofile(packet_file[:-4] + f'_fec.f32')
+    
+
+#create packets array like in the original version for debugging purposes
+for i in range(offset, num_frames):
+    packets.append(features[i-offset:i-offset+1, :, :])
+
+if args.debug_output:
+    import itertools
+
+    #batches = [2, 4]
+    batches = [4]
+    #offsets = [0, 4, 20]
+    offsets = [0, (args.num_redundancy_frames - 2)*2]
+    # sanity checks
+    # 1. concatenate features at offset 0
+    for batch, offset in itertools.product(batches, offsets):
+
+        stop = packets[0].shape[1] - offset
+        print(batch, offset, stop)
+        test_features = np.concatenate([packet[:,stop - batch: stop, :] for packet in packets[::batch//2]], axis=1)
+
+        test_features_full = np.zeros((test_features.shape[1], nb_features), dtype=np.float32)
+        test_features_full[:, :nb_used_features] = test_features[0, :, :]
+
+        print(f"writing debug output {packet_file[:-4] + f'_tf_batch{batch}_offset{offset}.f32'}")
+        test_features_full.tofile(packet_file[:-4] + f'_tf_batch{batch}_offset{offset}.f32')
+
--- /dev/null
+++ b/dnn/training_tf2/fec_packets.c
@@ -1,0 +1,142 @@
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdio.h>
+#include <inttypes.h>
+
+#include "fec_packets.h"
+
+int get_fec_frame(const char * const filename, float *features, int packet_index, int subframe_index)
+{
+
+    int16_t version;
+    int16_t header_size;
+    int16_t num_packets;
+    int16_t packet_size;
+    int16_t subframe_size;
+    int16_t subframes_per_packet;
+    int16_t num_features;
+    long offset;
+
+    FILE *fid = fopen(filename, "rb");
+    
+    /* read header */
+    if (fread(&version, sizeof(version), 1, fid) != 1) goto error;
+    if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;
+    if (fread(&num_packets, sizeof(num_packets), 1, fid) != 1) goto error;
+    if (fread(&packet_size, sizeof(packet_size), 1, fid) != 1) goto error;
+    if (fread(&subframe_size, sizeof(subframe_size), 1, fid) != 1) goto error;
+    if (fread(&subframes_per_packet, sizeof(subframes_per_packet), 1, fid) != 1) goto error;
+    if (fread(&num_features, sizeof(num_features), 1, fid) != 1) goto error;
+
+    /* check if indices are valid */
+    if (packet_index >= num_packets || subframe_index >= subframes_per_packet)
+    {
+        fprintf(stderr, "get_fec_frame: index out of bounds\n");
+        goto error;
+    }
+
+    /* calculate offset in file (+ 2 is for rate) */
+    offset = header_size + packet_index * packet_size + 2 + subframe_index * subframe_size;
+    fseek(fid, offset, SEEK_SET);
+
+    /* read features */
+    if (fread(features, sizeof(*features), num_features, fid) != num_features) goto error;
+
+    fclose(fid);
+    return 0;
+
+error:
+    fclose(fid);
+    return 1;
+}
+
+int get_fec_rate(const char * const filename, int packet_index)
+{
+    int16_t version;
+    int16_t header_size;
+    int16_t num_packets;
+    int16_t packet_size;
+    int16_t subframe_size;
+    int16_t subframes_per_packet;
+    int16_t num_features;
+    long offset;
+    int16_t rate;
+
+    FILE *fid = fopen(filename, "rb");
+    
+    /* read header */
+    if (fread(&version, sizeof(version), 1, fid) != 1) goto error;
+    if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error;
+    if (fread(&num_packets, sizeof(num_packets), 1, fid) != 1) goto error;
+    if (fread(&packet_size, sizeof(packet_size), 1, fid) != 1) goto error;
+    if (fread(&subframe_size, sizeof(subframe_size), 1, fid) != 1) goto error;
+    if (fread(&subframes_per_packet, sizeof(subframes_per_packet), 1, fid) != 1) goto error;
+    if (fread(&num_features, sizeof(num_features), 1, fid) != 1) goto error;
+
+    /* check if indices are valid */
+    if (packet_index >= num_packets)
+    {
+        fprintf(stderr, "get_fec_rate: index out of bounds\n");
+        goto error;
+    }
+
+    /* calculate offset in file (+ 2 is for rate) */
+    offset = header_size + packet_index * packet_size;
+    fseek(fid, offset, SEEK_SET);
+
+    /* read rate */
+    if (fread(&rate, sizeof(rate), 1, fid) != 1) goto error;
+
+    fclose(fid);
+    return (int) rate;
+
+error:
+    fclose(fid);
+    return -1;
+}
+
+#if 0
+int main()
+{
+    float features[20];
+    int i;
+
+    if (get_fec_frame("../test.fec", &features[0], 0, 127))
+    {
+        return 1;
+    }
+
+    for (i = 0; i < 20; i ++)
+    {
+        printf("%d %f\n", i, features[i]);
+    }
+
+    printf("rate: %d\n", get_fec_rate("../test.fec", 0));
+
+}
+#endif
\ No newline at end of file
--- /dev/null
+++ b/dnn/training_tf2/fec_packets.h
@@ -1,0 +1,34 @@
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _FEC_PACKETS_H
+#define _FEC_PACKETS_H
+
+int get_fec_frame(const char * const filename, float *features, int packet_index, int subframe_index);
+int get_fec_rate(const char * const filename, int packet_index);
+
+#endif
\ No newline at end of file
--- /dev/null
+++ b/dnn/training_tf2/fec_packets.py
@@ -1,0 +1,108 @@
+"""
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+import numpy as np
+
+
+
+def write_fec_packets(filename, packets, rates=None):
+    """ writes packets in binary format """
+    
+    assert np.dtype(np.float32).itemsize == 4
+    assert np.dtype(np.int16).itemsize == 2
+    
+    # derive some sizes 
+    num_packets             = len(packets)
+    subframes_per_packet    = packets[0].shape[-2]
+    num_features            = packets[0].shape[-1]
+    
+    # size of float is 4
+    subframe_size           = num_features * 4
+    packet_size             = subframe_size * subframes_per_packet + 2 # two bytes for rate
+    
+    version = 1
+    # header size (version, header_size, num_packets, packet_size, subframe_size, subrames_per_packet, num_features)
+    header_size = 14
+    
+    with open(filename, 'wb') as f:
+        
+        # header
+        f.write(np.int16(version).tobytes())
+        f.write(np.int16(header_size).tobytes())
+        f.write(np.int16(num_packets).tobytes())
+        f.write(np.int16(packet_size).tobytes())
+        f.write(np.int16(subframe_size).tobytes())
+        f.write(np.int16(subframes_per_packet).tobytes())
+        f.write(np.int16(num_features).tobytes())
+        
+        # packets
+        for i, packet in enumerate(packets):
+            if type(rates) == type(None):
+                rate = 0
+            else:
+                rate = rates[i]
+            
+            f.write(np.int16(rate).tobytes())
+            
+            features = np.flip(packet, axis=-2)
+            f.write(features.astype(np.float32).tobytes())
+            
+        
+def read_fec_packets(filename):
+    """ reads packets from binary format """
+    
+    assert np.dtype(np.float32).itemsize == 4
+    assert np.dtype(np.int16).itemsize == 2
+    
+    with open(filename, 'rb') as f:
+        
+        # header
+        version                 = np.frombuffer(f.read(2), dtype=np.int16).item()
+        header_size             = np.frombuffer(f.read(2), dtype=np.int16).item()
+        num_packets             = np.frombuffer(f.read(2), dtype=np.int16).item()
+        packet_size             = np.frombuffer(f.read(2), dtype=np.int16).item()
+        subframe_size           = np.frombuffer(f.read(2), dtype=np.int16).item()
+        subframes_per_packet    = np.frombuffer(f.read(2), dtype=np.int16).item()
+        num_features            = np.frombuffer(f.read(2), dtype=np.int16).item()
+        
+        dummy_features          = np.zeros((1, subframes_per_packet, num_features), dtype=np.float32)
+        
+        # packets
+        rates = []
+        packets = []
+        for i in range(num_packets):
+                     
+            rate = np.frombuffer(f.read(2), dtype=np.int16).item
+            rates.append(rate)
+            
+            features = np.reshape(np.frombuffer(f.read(subframe_size * subframes_per_packet), dtype=np.float32), dummy_features.shape)
+            packet = np.flip(features, axis=-2)
+            packets.append(packet)
+            
+    return packets
\ No newline at end of file
--- /dev/null
+++ b/dnn/training_tf2/keraslayerdump.py
@@ -1,0 +1,189 @@
+'''Copyright (c) 2017-2018 Mozilla
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+""" helper functions for dumping some Keras layers to C files """
+
+import numpy as np
+
+
+def printVector(f, vector, name, dtype='float', dotp=False, static=True):
+    """ prints vector as one-dimensional C array """
+    if dotp:
+        vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
+        vector = vector.transpose((2, 0, 3, 1))
+    v = np.reshape(vector, (-1))
+    if static:
+        f.write('static const {} {}[{}] = {{\n   '.format(dtype, name, len(v)))
+    else:
+        f.write('const {} {}[{}] = {{\n   '.format(dtype, name, len(v)))
+    for i in range(0, len(v)):
+        f.write('{}'.format(v[i]))
+        if (i!=len(v)-1):
+            f.write(',')
+        else:
+            break;
+        if (i%8==7):
+            f.write("\n   ")
+        else:
+            f.write(" ")
+    f.write('\n};\n\n')
+    return vector
+
+def printSparseVector(f, A, name, have_diag=True):
+    N = A.shape[0]
+    M = A.shape[1]
+    W = np.zeros((0,), dtype='int')
+    W0 = np.zeros((0,))
+    if have_diag:
+        diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])
+        A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))
+        A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))
+        A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))
+        printVector(f, diag, name + '_diag')
+    AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int')
+    idx = np.zeros((0,), dtype='int')
+    for i in range(M//8):
+        pos = idx.shape[0]
+        idx = np.append(idx, -1)
+        nb_nonzero = 0
+        for j in range(N//4):
+            block = A[j*4:(j+1)*4, i*8:(i+1)*8]
+            qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8]
+            if np.sum(np.abs(block)) > 1e-10:
+                nb_nonzero = nb_nonzero + 1
+                idx = np.append(idx, j*4)
+                vblock = qblock.transpose((1,0)).reshape((-1,))
+                W0 = np.concatenate([W0, block.reshape((-1,))])
+                W = np.concatenate([W, vblock])
+        idx[pos] = nb_nonzero
+    f.write('#ifdef DOT_PROD\n')
+    printVector(f, W, name, dtype='qweight')
+    f.write('#else /*DOT_PROD*/\n')
+    printVector(f, W0, name, dtype='qweight')
+    f.write('#endif /*DOT_PROD*/\n')
+    printVector(f, idx, name + '_idx', dtype='int')
+    return AQ
+
+def dump_sparse_gru(self, f, hf):
+    name = 'sparse_' + self.name
+    print("printing layer " + name + " of type sparse " + self.__class__.__name__)
+    weights = self.get_weights()
+    qweights = printSparseVector(f, weights[1], name + '_recurrent_weights')
+    printVector(f, weights[-1], name + '_bias')
+    subias = weights[-1].copy()
+    subias[1,:] = subias[1,:] - np.sum(qweights*(1./128),axis=0)
+    printVector(f, subias, name + '_subias')
+    if hasattr(self, 'activation'):
+        activation = self.activation.__name__.upper()
+    else:
+        activation = 'TANH'
+    if hasattr(self, 'reset_after') and not self.reset_after:
+        reset_after = 0
+    else:
+        reset_after = 1
+    neurons = weights[0].shape[1]//3
+    max_rnn_neurons = neurons
+    f.write('const SparseGRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_recurrent_weights_diag,\n   {}_recurrent_weights,\n   {}_recurrent_weights_idx,\n   {}, ACTIVATION_{}, {}\n}};\n\n'
+            .format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))
+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+    hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+    hf.write('extern const SparseGRULayer {};\n\n'.format(name));
+    return max_rnn_neurons
+
+def dump_gru_layer(self, f, hf, dotp=False, sparse=False):
+    name = self.name
+    print("printing layer " + name + " of type " + self.__class__.__name__)
+    weights = self.get_weights()
+    if sparse:
+        qweight = printSparseVector(f, weights[0], name + '_weights', have_diag=False)
+    else:
+        qweight = printVector(f, weights[0], name + '_weights')
+
+    if dotp:
+        f.write('#ifdef DOT_PROD\n')
+        qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127)
+        printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight')
+        f.write('#else /*DOT_PROD*/\n')
+    else:
+        qweight2 = weights[1]
+
+    printVector(f, weights[1], name + '_recurrent_weights')
+    if dotp:
+        f.write('#endif /*DOT_PROD*/\n')
+
+    printVector(f, weights[-1], name + '_bias')
+    subias = weights[-1].copy()
+    subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0)
+    subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0)
+    printVector(f, subias, name + '_subias')
+    if hasattr(self, 'activation'):
+        activation = self.activation.__name__.upper()
+    else:
+        activation = 'TANH'
+    if hasattr(self, 'reset_after') and not self.reset_after:
+        reset_after = 0
+    else:
+        reset_after = 1
+    neurons = weights[0].shape[1]//3
+    max_rnn_neurons = neurons
+    f.write('const GRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_weights,\n   {},\n   {}_recurrent_weights,\n   {}, {}, ACTIVATION_{}, {}\n}};\n\n'
+            .format(name, name, name, name, name + "_weights_idx" if sparse else "NULL", name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))
+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+    hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
+    hf.write('extern const GRULayer {};\n\n'.format(name));
+    return max_rnn_neurons
+
+def dump_dense_layer_impl(name, weights, bias, activation, f, hf):
+    printVector(f, weights, name + '_weights')
+    printVector(f, bias, name + '_bias')
+    f.write('const DenseLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, ACTIVATION_{}\n}};\n\n'
+            .format(name, name, name, weights.shape[0], weights.shape[1], activation))
+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
+    hf.write('extern const DenseLayer {};\n\n'.format(name));
+
+def dump_dense_layer(self, f, hf):
+    name = self.name
+    print("printing layer " + name + " of type " + self.__class__.__name__)
+    weights = self.get_weights()
+    activation = self.activation.__name__.upper()
+    dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf)
+    return False
+
+def dump_conv1d_layer(self, f, hf):
+    name = self.name
+    print("printing layer " + name + " of type " + self.__class__.__name__)
+    weights = self.get_weights()
+    printVector(f, weights[0], name + '_weights')
+    printVector(f, weights[-1], name + '_bias')
+    activation = self.activation.__name__.upper()
+    max_conv_inputs = weights[0].shape[1]*weights[0].shape[0]
+    f.write('const Conv1DLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, {}, ACTIVATION_{}\n}};\n\n'
+            .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
+    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))
+    hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))
+    hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))
+    hf.write('extern const Conv1DLayer {};\n\n'.format(name));
+    return max_conv_inputs
--- a/dnn/training_tf2/plc_loader.py
+++ b/dnn/training_tf2/plc_loader.py
@@ -47,19 +47,25 @@
 
     def __getitem__(self, index):
         features = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
-        #lost = (np.random.rand(features.shape[0], features.shape[1]) > .2).astype('float')
+        burg_lost = (np.random.rand(features.shape[0], features.shape[1]) > .1).astype('float')
+        burg_lost = np.reshape(burg_lost, (features.shape[0], features.shape[1], 1))
+        burg_mask = np.tile(burg_lost, (1,1,self.nb_burg_features))
+
         lost = self.lost_offset[self.lost_indices[index*self.batch_size:(index+1)*self.batch_size], :]
         lost = np.reshape(lost, (features.shape[0], features.shape[1], 1))
         lost_mask = np.tile(lost, (1,1,features.shape[2]))
         in_features = features*lost_mask
+        in_features[:,:,:self.nb_burg_features] = in_features[:,:,:self.nb_burg_features]*burg_mask
         
         #For the first frame after a loss, we don't have valid features, but the Burg estimate is valid.
-        in_features[:,1:,self.nb_burg_features:] = in_features[:,1:,self.nb_burg_features:]*lost_mask[:,:-1,self.nb_burg_features:]
+        #in_features[:,1:,self.nb_burg_features:] = in_features[:,1:,self.nb_burg_features:]*lost_mask[:,:-1,self.nb_burg_features:]
         out_lost = np.copy(lost)
-        out_lost[:,1:,:] = out_lost[:,1:,:]*out_lost[:,:-1,:]
+        #out_lost[:,1:,:] = out_lost[:,1:,:]*out_lost[:,:-1,:]
 
         out_features = np.concatenate([features[:,:,self.nb_burg_features:], 1.-out_lost], axis=-1)
-        inputs = [in_features*lost_mask, lost]
+        burg_sign = 2*burg_lost - 1
+        # last dim is 1 for received packet, 0 for lost packet, and -1 when just the Burg info is missing
+        inputs = [in_features*lost_mask, lost*burg_sign]
         outputs = [out_features]
         return (inputs, outputs)
 
--- /dev/null
+++ b/dnn/training_tf2/rdovae.py
@@ -1,0 +1,373 @@
+#!/usr/bin/python3
+'''Copyright (c) 2022 Amazon
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+import math
+import tensorflow as tf
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation, GaussianNoise, AveragePooling1D, RepeatVector
+from tensorflow.compat.v1.keras.layers import CuDNNGRU
+from tensorflow.keras import backend as K
+from tensorflow.keras.constraints import Constraint
+from tensorflow.keras.initializers import Initializer
+from tensorflow.keras.callbacks import Callback
+from tensorflow.keras.regularizers import l1
+import numpy as np
+import h5py
+from uniform_noise import UniformNoise
+
+class WeightClip(Constraint):
+    '''Clips the weights incident to each hidden unit to be inside a range
+    '''
+    def __init__(self, c=2):
+        self.c = c
+
+    def __call__(self, p):
+        # Ensure that abs of adjacent weights don't sum to more than 127. Otherwise there's a risk of
+        # saturation when implementing dot products with SSSE3 or AVX2.
+        return self.c*p/tf.maximum(self.c, tf.repeat(tf.abs(p[:, 1::2])+tf.abs(p[:, 0::2]), 2, axis=1))
+        #return K.clip(p, -self.c, self.c)
+
+    def get_config(self):
+        return {'name': self.__class__.__name__,
+            'c': self.c}
+
+constraint = WeightClip(0.496)
+
+def soft_quantize(x):
+    #x = 4*x
+    #x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)
+    #x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)
+    #x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x)    
+    return x
+
+def noise_quantize(x):
+    return soft_quantize(x + (K.random_uniform((128, 16, 80))-.5) )
+
+def hard_quantize(x):
+    x = soft_quantize(x)
+    quantized = tf.round(x)
+    return x + tf.stop_gradient(quantized - x)
+
+def apply_dead_zone(x):
+    d = x[1]*.05
+    x = x[0]
+    y = x - d*tf.math.tanh(x/(.1+d))
+    return y
+
+def rate_loss(y_true,y_pred):
+    log2_e = 1.4427
+    n = y_pred.shape[-1]
+    C = n - log2_e*np.math.log(np.math.gamma(n))
+    k = K.sum(K.abs(y_pred), axis=-1)
+    p = 1.5
+    #rate = C + (n-1)*log2_e*tf.math.log((k**p + (n/5)**p)**(1/p))
+    rate = C + (n-1)*log2_e*tf.math.log(k + .112*n**2/(n/1.8+k) )
+    return K.mean(rate)
+
+eps=1e-6
+def safelog2(x):
+    log2_e = 1.4427
+    return log2_e*tf.math.log(eps+x)
+
+def feat_dist_loss(y_true,y_pred):
+    lambda_1 = 1./K.sqrt(y_pred[:,:,:,-1])
+    y_pred = y_pred[:,:,:,:-1]
+    ceps = y_pred[:,:,:,:18] - y_true[:,:,:18]
+    pitch = 2*(y_pred[:,:,:,18:19] - y_true[:,:,18:19])/(y_true[:,:,18:19] + 2)
+    corr = y_pred[:,:,:,19:] - y_true[:,:,19:]
+    pitch_weight = K.square(K.maximum(0., y_true[:,:,19:]+.5))
+    return K.mean(lambda_1*K.mean(K.square(ceps) + 10*(1/18.)*K.abs(pitch)*pitch_weight + (1/18.)*K.square(corr), axis=-1))
+
+def sq1_rate_loss(y_true,y_pred):
+    lambda_val = K.sqrt(y_pred[:,:,-1])
+    y_pred = y_pred[:,:,:-1]
+    log2_e = 1.4427
+    n = y_pred.shape[-1]//3
+    r = (y_pred[:,:,2*n:])
+    p0 = (y_pred[:,:,n:2*n])
+    p0 = 1-r**(.5+.5*p0)
+    y_pred = y_pred[:,:,:n]
+    y_pred = soft_quantize(y_pred)
+
+    y0 = K.maximum(0., 1. - K.abs(y_pred))**2
+    rate = -y0*safelog2(p0*r**K.abs(y_pred)) - (1-y0)*safelog2(.5*(1-p0)*(1-r)*r**(K.abs(y_pred)-1))
+    rate = -safelog2(-.5*tf.math.log(r)*r**K.abs(y_pred))
+    rate = -safelog2((1-r)/(1+r)*r**K.abs(y_pred))
+    #rate = -safelog2(- tf.math.sinh(.5*tf.math.log(r))* r**K.abs(y_pred) - tf.math.cosh(K.maximum(0., .5 - K.abs(y_pred))*tf.math.log(r)) + 1)
+    rate = lambda_val*K.sum(rate, axis=-1)
+    return K.mean(rate)
+
+def sq2_rate_loss(y_true,y_pred):
+    lambda_val = K.sqrt(y_pred[:,:,-1])
+    y_pred = y_pred[:,:,:-1]
+    log2_e = 1.4427
+    n = y_pred.shape[-1]//3
+    r = y_pred[:,:,2*n:]
+    p0 = y_pred[:,:,n:2*n]
+    p0 = 1-r**(.5+.5*p0)
+    #theta = K.minimum(1., .5 + 0*p0 - 0.04*tf.math.log(r))
+    #p0 = 1-r**theta
+    y_pred = tf.round(y_pred[:,:,:n])
+    y0 = K.maximum(0., 1. - K.abs(y_pred))**2
+    rate = -y0*safelog2(p0*r**K.abs(y_pred)) - (1-y0)*safelog2(.5*(1-p0)*(1-r)*r**(K.abs(y_pred)-1))
+    rate = lambda_val*K.sum(rate, axis=-1)
+    return K.mean(rate)
+
+def sq_rate_metric(y_true,y_pred, reduce=True):
+    y_pred = y_pred[:,:,:-1]
+    log2_e = 1.4427
+    n = y_pred.shape[-1]//3
+    r = y_pred[:,:,2*n:]
+    p0 = y_pred[:,:,n:2*n]
+    p0 = 1-r**(.5+.5*p0)
+    #theta = K.minimum(1., .5 + 0*p0 - 0.04*tf.math.log(r))
+    #p0 = 1-r**theta
+    y_pred = tf.round(y_pred[:,:,:n])
+    y0 = K.maximum(0., 1. - K.abs(y_pred))**2
+    rate = -y0*safelog2(p0*r**K.abs(y_pred)) - (1-y0)*safelog2(.5*(1-p0)*(1-r)*r**(K.abs(y_pred)-1))
+    rate = K.sum(rate, axis=-1)
+    if reduce:
+        rate = K.mean(rate)
+    return rate
+
+def pvq_quant_search(x, k):
+    x = x/tf.reduce_sum(tf.abs(x), axis=-1, keepdims=True)
+    kx = k*x
+    y = tf.round(kx)
+    newk = k
+
+    for j in range(10):
+        #print("y = ", y)
+        #print("iteration ", j)
+        abs_y = tf.abs(y)
+        abs_kx = tf.abs(kx)
+        kk=tf.reduce_sum(abs_y, axis=-1)
+        #print("sums = ", kk)
+        plus = 1.000001*tf.reduce_min((abs_y+.5)/(abs_kx+1e-15), axis=-1)
+        minus = .999999*tf.reduce_max((abs_y-.5)/(abs_kx+1e-15), axis=-1)
+        #print("plus = ", plus)
+        #print("minus = ", minus)
+        factor = tf.where(kk>k, minus, plus)
+        factor = tf.where(kk==k, tf.ones_like(factor), factor)
+        #print("scale = ", factor)
+        factor = tf.expand_dims(factor, axis=-1)
+        #newk = newk * (k/kk)**.2
+        newk = newk*factor
+        kx = newk*x
+        #print("newk = ", newk)
+        #print("unquantized = ", newk*x)
+        y = tf.round(kx)
+
+    #print(y)
+    #print(K.mean(K.sum(K.abs(y), axis=-1)))
+    return y
+
+def pvq_quantize(x, k):
+    x = x/(1e-15+tf.norm(x, axis=-1,keepdims=True))
+    quantized = pvq_quant_search(x, k)
+    quantized = quantized/(1e-15+tf.norm(quantized, axis=-1,keepdims=True))
+    return x + tf.stop_gradient(quantized - x)
+
+
+def var_repeat(x):
+    return tf.repeat(tf.expand_dims(x[0], 1), K.shape(x[1])[1], axis=1)
+
+nb_state_dim = 24
+
+def new_rdovae_encoder(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batch_size=128, cond_size=128, cond_size2=256, training=False):
+    feat = Input(shape=(None, nb_used_features), batch_size=batch_size)
+
+    gru = CuDNNGRU if training else GRU
+    enc_dense1 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='enc_dense1')
+    enc_dense2 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='enc_dense2')
+    enc_dense3 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='enc_dense3')
+    enc_dense4 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='enc_dense4')
+    enc_dense5 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='enc_dense5')
+    enc_dense6 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='enc_dense6')
+    enc_dense7 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='enc_dense7')
+    enc_dense8 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='enc_dense8')
+
+    #bits_dense = Dense(nb_bits, activation='linear', name='bits_dense')
+    bits_dense = Conv1D(nb_bits, 4, padding='causal', activation='linear', name='bits_dense')
+
+    zero_out = Lambda(lambda x: 0*x)
+    inputs = Reshape((-1, 2*nb_used_features))(feat)
+    d1 = enc_dense1(inputs)
+    d2 = enc_dense2(d1)
+    d3 = enc_dense3(d2)
+    d4 = enc_dense4(d3)
+    d5 = enc_dense5(d4)
+    d6 = enc_dense6(d5)
+    d7 = enc_dense7(d6)
+    d8 = enc_dense8(d7)
+    pre_out = Concatenate()([d1, d2, d3, d4, d5, d6, d7, d8])
+    enc_out = bits_dense(pre_out)
+    global_dense1 = Dense(128, activation='tanh', name='gdense1')
+    global_dense2 = Dense(nb_state_dim, activation='tanh', name='gdense2')
+    global_bits = global_dense2(global_dense1(pre_out))
+
+    encoder = Model([feat], [enc_out, global_bits], name='encoder')
+    return encoder
+
+def new_rdovae_decoder(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batch_size=128, cond_size=128, cond_size2=256, training=False):
+    bits_input = Input(shape=(None, nb_bits), batch_size=batch_size, name="dec_bits")
+    gru_state_input = Input(shape=(nb_state_dim,), batch_size=batch_size, name="dec_state")
+
+    
+    gru = CuDNNGRU if training else GRU
+    dec_dense1 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='dec_dense1')
+    dec_dense2 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='dec_dense2')
+    dec_dense3 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='dec_dense3')
+    dec_dense4 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='dec_dense4')
+    dec_dense5 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='dec_dense5')
+    dec_dense6 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='dec_dense6')
+    dec_dense7 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='dec_dense7')
+    dec_dense8 = Dense(cond_size, activation='tanh', kernel_constraint=constraint, name='dec_dense8')
+
+    dec_final = Dense(bunch*nb_used_features, activation='linear', name='dec_final')
+
+    time_reverse = Lambda(lambda x: K.reverse(x, 1))
+    #time_reverse = Lambda(lambda x: x)
+    #gru_state_rep = RepeatVector(64//bunch)(gru_state_input)
+
+    #gru_state_rep = Lambda(var_repeat, output_shape=(None, nb_state_dim)) ([gru_state_input, bits_input])
+    gru_state1 = Dense(cond_size, name="state1", activation='tanh')(gru_state_input)
+    gru_state2 = Dense(cond_size, name="state2", activation='tanh')(gru_state_input)
+    gru_state3 = Dense(cond_size, name="state3", activation='tanh')(gru_state_input)
+
+    dec1 = dec_dense1(time_reverse(bits_input))
+    dec2 = dec_dense2(dec1, initial_state=gru_state1)
+    dec3 = dec_dense3(dec2)
+    dec4 = dec_dense4(dec3, initial_state=gru_state2)
+    dec5 = dec_dense5(dec4)
+    dec6 = dec_dense6(dec5, initial_state=gru_state3)
+    dec7 = dec_dense7(dec6)
+    dec8 = dec_dense8(dec7)
+    output = Reshape((-1, nb_used_features))(dec_final(Concatenate()([dec1, dec2, dec3, dec4, dec5, dec6, dec7, dec8])))
+    decoder = Model([bits_input, gru_state_input], time_reverse(output), name='decoder')
+    decoder.nb_bits = nb_bits
+    decoder.bunch = bunch
+    return decoder
+
+def new_split_decoder(decoder):
+    nb_bits = decoder.nb_bits
+    bunch = decoder.bunch
+    bits_input = Input(shape=(None, nb_bits), name="split_bits")
+    gru_state_input = Input(shape=(None,nb_state_dim), name="split_state")
+
+    range_select = Lambda(lambda x: x[0][:,x[1]:x[2],:])
+    elem_select = Lambda(lambda x: x[0][:,x[1],:])
+    points = [0, 100, 200, 300, 400]
+    outputs = []
+    for i in range(len(points)-1):
+        begin = points[i]//bunch
+        end = points[i+1]//bunch
+        state = elem_select([gru_state_input, end-1])
+        bits = range_select([bits_input, begin, end])
+        outputs.append(decoder([bits, state]))
+    output = Concatenate(axis=1)(outputs)
+    split = Model([bits_input, gru_state_input], output, name="split")
+    return split
+
+def tensor_concat(x):
+    #n = x[1]//2
+    #x = x[0]
+    n=2
+    y = []
+    for i in range(n-1):
+        offset = 2 * (n-1-i)
+        tmp = K.concatenate([x[i][:, offset:, :], x[-1][:, -offset:, :]], axis=-2) 
+        y.append(tf.expand_dims(tmp, axis=0))
+    y.append(tf.expand_dims(x[-1], axis=0))
+    return Concatenate(axis=0)(y)
+
+
+def new_rdovae_model(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batch_size=128, cond_size=128, cond_size2=256, training=False):
+
+    feat = Input(shape=(None, nb_used_features), batch_size=batch_size)
+    quant_id = Input(shape=(None,), batch_size=batch_size)
+    lambda_val = Input(shape=(None, 1), batch_size=batch_size)
+    lambda_bunched = AveragePooling1D(pool_size=bunch//2, strides=bunch//2, padding="valid")(lambda_val)
+    lambda_up = Lambda(lambda x: K.repeat_elements(x, 2, axis=-2))(lambda_val)
+
+    qembedding = Embedding(nb_quant, 6*nb_bits, name='quant_embed', embeddings_initializer='zeros')
+    quant_embed_dec = qembedding(quant_id)
+    quant_scale = Activation('softplus')(Lambda(lambda x: x[:,:,:nb_bits], name='quant_scale_embed')(quant_embed_dec))
+
+    encoder = new_rdovae_encoder(nb_used_features, nb_bits, bunch, nb_quant, batch_size, cond_size, cond_size2, training=training)
+    ze, gru_state_dec = encoder([feat])
+    ze = Multiply()([ze, quant_scale])
+
+    decoder = new_rdovae_decoder(nb_used_features, nb_bits, bunch, nb_quant, batch_size, cond_size, cond_size2, training=training)
+    split_decoder = new_split_decoder(decoder)
+
+    dead_zone = Activation('softplus')(Lambda(lambda x: x[:,:,nb_bits:2*nb_bits], name='dead_zone_embed')(quant_embed_dec))
+    soft_distr_embed = Activation('sigmoid')(Lambda(lambda x: x[:,:,2*nb_bits:4*nb_bits], name='soft_distr_embed')(quant_embed_dec))
+    hard_distr_embed = Activation('sigmoid')(Lambda(lambda x: x[:,:,4*nb_bits:], name='hard_distr_embed')(quant_embed_dec))
+
+    noisequant = UniformNoise()
+    hardquant = Lambda(hard_quantize)
+    dzone = Lambda(apply_dead_zone)
+    dze = dzone([ze,dead_zone])
+    ndze = noisequant(dze)
+    dze_quant = hardquant(dze)
+    
+    div = Lambda(lambda x: x[0]/x[1])
+    dze_quant = div([dze_quant,quant_scale])
+    ndze_unquant = div([ndze,quant_scale])
+
+    mod_select = Lambda(lambda x: x[0][:,x[1]::bunch//2,:])
+    gru_state_dec = Lambda(lambda x: pvq_quantize(x, 82))(gru_state_dec)
+    combined_output = []
+    unquantized_output = []
+    cat = Concatenate(name="out_cat")
+    for i in range(bunch//2):
+        dze_select = mod_select([dze_quant, i])
+        ndze_select = mod_select([ndze_unquant, i])
+        state_select = mod_select([gru_state_dec, i])
+
+        tmp = split_decoder([dze_select, state_select])
+        tmp = cat([tmp, lambda_up])
+        combined_output.append(tmp)
+
+        tmp = split_decoder([ndze_select, state_select])
+        tmp = cat([tmp, lambda_up])        
+        unquantized_output.append(tmp)
+
+    concat = Lambda(tensor_concat, name="output")
+    combined_output = concat(combined_output)
+    unquantized_output = concat(unquantized_output)
+    
+    e2 = Concatenate(name="hard_bits")([dze, hard_distr_embed, lambda_val])
+    e = Concatenate(name="soft_bits")([dze, soft_distr_embed, lambda_val])
+
+
+    model = Model([feat, quant_id, lambda_val], [combined_output, unquantized_output, e, e2], name="end2end")
+    model.nb_used_features = nb_used_features
+
+    return model, encoder, decoder, qembedding
+
--- /dev/null
+++ b/dnn/training_tf2/rdovae_exchange.py
@@ -1,0 +1,138 @@
+"""
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+
+import argparse
+import os
+import sys
+
+os.environ['CUDA_VISIBLE_DEVICES'] = ""
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('weights', metavar="<weight file>", type=str, help='model weight file in hdf5 format')
+parser.add_argument('output', metavar="<output folder>", type=str, help='output exchange folder')
+parser.add_argument('--cond-size', type=int, help="conditioning size (default: 256)", default=256)
+parser.add_argument('--latent-dim', type=int, help="dimension of latent space (default: 80)", default=80)
+parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 16)", default=16)
+
+args = parser.parse_args()
+
+# now import the heavy stuff
+from rdovae import new_rdovae_model
+from wexchange.tf import dump_tf_weights, load_tf_weights
+
+
+exchange_name = {
+    'enc_dense1'    : 'encoder_stack_layer1_dense',
+    'enc_dense3'    : 'encoder_stack_layer3_dense',
+    'enc_dense5'    : 'encoder_stack_layer5_dense',
+    'enc_dense7'    : 'encoder_stack_layer7_dense',
+    'enc_dense8'    : 'encoder_stack_layer8_dense',
+    'gdense1'       : 'encoder_state_layer1_dense',
+    'gdense2'       : 'encoder_state_layer2_dense',
+    'enc_dense2'    : 'encoder_stack_layer2_gru',
+    'enc_dense4'    : 'encoder_stack_layer4_gru',
+    'enc_dense6'    : 'encoder_stack_layer6_gru',
+    'bits_dense'    : 'encoder_stack_layer9_conv',
+    'qembedding'    : 'statistical_model_embedding',
+    'state1'        : 'decoder_state1_dense',
+    'state2'        : 'decoder_state2_dense',
+    'state3'        : 'decoder_state3_dense',
+    'dec_dense1'    : 'decoder_stack_layer1_dense',
+    'dec_dense3'    : 'decoder_stack_layer3_dense',
+    'dec_dense5'    : 'decoder_stack_layer5_dense',
+    'dec_dense7'    : 'decoder_stack_layer7_dense',
+    'dec_dense8'    : 'decoder_stack_layer8_dense',
+    'dec_final'     : 'decoder_stack_layer9_dense',
+    'dec_dense2'    : 'decoder_stack_layer2_gru',
+    'dec_dense4'    : 'decoder_stack_layer4_gru',
+    'dec_dense6'    : 'decoder_stack_layer6_gru'
+}
+
+
+if __name__ == "__main__":
+
+    model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size, nb_quant=args.quant_levels)
+    model.load_weights(args.weights)
+
+    os.makedirs(args.output, exist_ok=True)
+
+    # encoder
+    encoder_dense_names = [
+        'enc_dense1',
+        'enc_dense3',
+        'enc_dense5',
+        'enc_dense7',
+        'enc_dense8',
+        'gdense1',
+        'gdense2'
+    ]
+
+    encoder_gru_names = [
+        'enc_dense2',
+        'enc_dense4',
+        'enc_dense6'
+    ]
+
+    encoder_conv1d_names = [
+        'bits_dense'
+    ]
+
+
+    for name in encoder_dense_names + encoder_gru_names + encoder_conv1d_names:
+        print(f"writing layer {exchange_name[name]}...")
+        dump_tf_weights(os.path.join(args.output, exchange_name[name]), encoder.get_layer(name))
+
+    # qembedding
+    print(f"writing layer {exchange_name['qembedding']}...")
+    dump_tf_weights(os.path.join(args.output, exchange_name['qembedding']), qembedding)
+   
+    # decoder
+    decoder_dense_names = [
+        'state1',
+        'state2',
+        'state3',
+        'dec_dense1',
+        'dec_dense3',
+        'dec_dense5',
+        'dec_dense7',
+        'dec_dense8',
+        'dec_final'
+    ]   
+
+    decoder_gru_names = [
+        'dec_dense2',
+        'dec_dense4',
+        'dec_dense6'
+    ]
+
+    for name in decoder_dense_names + decoder_gru_names:
+        print(f"writing layer {exchange_name[name]}...")
+        dump_tf_weights(os.path.join(args.output, exchange_name[name]), decoder.get_layer(name))
--- /dev/null
+++ b/dnn/training_tf2/rdovae_import.py
@@ -1,0 +1,123 @@
+"""
+/* Copyright (c) 2022 Amazon
+   Written by Jan Buethe */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+"""
+
+
+import argparse
+import os
+import sys
+
+os.environ['CUDA_VISIBLE_DEVICES'] = ""
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('input', metavar="<input folder>", type=str, help='input exchange folder')
+parser.add_argument('weights', metavar="<weight file>", type=str, help='model weight file in hdf5 format')
+parser.add_argument('--cond-size', type=int, help="conditioning size (default: 256)", default=256)
+parser.add_argument('--latent-dim', type=int, help="dimension of latent space (default: 80)", default=80)
+parser.add_argument('--quant-levels', type=int, help="number of quantization steps (default: 16)", default=16)
+
+args = parser.parse_args()
+
+# now import the heavy stuff
+from rdovae import new_rdovae_model
+from wexchange.tf import load_tf_weights
+
+
+exchange_name = {
+    'enc_dense1'    : 'encoder_stack_layer1_dense',
+    'enc_dense3'    : 'encoder_stack_layer3_dense',
+    'enc_dense5'    : 'encoder_stack_layer5_dense',
+    'enc_dense7'    : 'encoder_stack_layer7_dense',
+    'enc_dense8'    : 'encoder_stack_layer8_dense',
+    'gdense1'       : 'encoder_state_layer1_dense',
+    'gdense2'       : 'encoder_state_layer2_dense',
+    'enc_dense2'    : 'encoder_stack_layer2_gru',
+    'enc_dense4'    : 'encoder_stack_layer4_gru',
+    'enc_dense6'    : 'encoder_stack_layer6_gru',
+    'bits_dense'    : 'encoder_stack_layer9_conv',
+    'qembedding'    : 'statistical_model_embedding',
+    'state1'        : 'decoder_state1_dense',
+    'state2'        : 'decoder_state2_dense',
+    'state3'        : 'decoder_state3_dense',
+    'dec_dense1'    : 'decoder_stack_layer1_dense',
+    'dec_dense3'    : 'decoder_stack_layer3_dense',
+    'dec_dense5'    : 'decoder_stack_layer5_dense',
+    'dec_dense7'    : 'decoder_stack_layer7_dense',
+    'dec_dense8'    : 'decoder_stack_layer8_dense',
+    'dec_final'     : 'decoder_stack_layer9_dense',
+    'dec_dense2'    : 'decoder_stack_layer2_gru',
+    'dec_dense4'    : 'decoder_stack_layer4_gru',
+    'dec_dense6'    : 'decoder_stack_layer6_gru'
+}
+
+if __name__ == "__main__":
+
+    model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size, nb_quant=args.quant_levels)
+    
+    encoder_layers = [
+        'enc_dense1',
+        'enc_dense3',
+        'enc_dense5',
+        'enc_dense7',
+        'enc_dense8',
+        'gdense1',
+        'gdense2',
+        'enc_dense2',
+        'enc_dense4',
+        'enc_dense6',
+        'bits_dense'
+    ]
+    
+    decoder_layers = [
+        'state1',
+        'state2',
+        'state3',
+        'dec_dense1',
+        'dec_dense3',
+        'dec_dense5',
+        'dec_dense7',
+        'dec_dense8',
+        'dec_final',
+        'dec_dense2',
+        'dec_dense4',
+        'dec_dense6'
+    ]
+    
+    for name in encoder_layers:
+        print(f"loading weight for layer {name}...")
+        load_tf_weights(os.path.join(args.input, exchange_name[name]), encoder.get_layer(name))
+    
+    print(f"loading weight for layer qembedding...")
+    load_tf_weights(os.path.join(args.input, exchange_name['qembedding']), qembedding)
+    
+    for name in decoder_layers:
+        print(f"loading weight for layer {name}...")
+        load_tf_weights(os.path.join(args.input, exchange_name[name]), decoder.get_layer(name))
+        
+    model.save(args.weights)
\ No newline at end of file
--- /dev/null
+++ b/dnn/training_tf2/train_rdovae.py
@@ -1,0 +1,151 @@
+#!/usr/bin/python3
+'''Copyright (c) 2021-2022 Amazon
+   Copyright (c) 2018-2019 Mozilla
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+'''
+
+# Train an LPCNet model
+import tensorflow as tf
+strategy = tf.distribute.MultiWorkerMirroredStrategy()
+
+
+import argparse
+#from plc_loader import PLCLoader
+
+parser = argparse.ArgumentParser(description='Train a quantization model')
+
+parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
+parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')
+parser.add_argument('--model', metavar='<model>', default='rdovae', help='PLC model python definition (without .py)')
+group1 = parser.add_mutually_exclusive_group()
+group1.add_argument('--quantize', metavar='<input weights>', help='quantize model')
+group1.add_argument('--retrain', metavar='<input weights>', help='continue training model')
+parser.add_argument('--cond-size', metavar='<units>', default=1024, type=int, help='number of units in conditioning network (default 1024)')
+parser.add_argument('--epochs', metavar='<epochs>', default=120, type=int, help='number of epochs to train for (default 120)')
+parser.add_argument('--batch-size', metavar='<batch size>', default=128, type=int, help='batch size to use (default 128)')
+parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')
+parser.add_argument('--lr', metavar='<learning rate>', type=float, help='learning rate')
+parser.add_argument('--decay', metavar='<decay>', type=float, help='learning rate decay')
+parser.add_argument('--logdir', metavar='<log dir>', help='directory for tensorboard log files')
+
+
+args = parser.parse_args()
+
+import importlib
+rdovae = importlib.import_module(args.model)
+
+import sys
+import numpy as np
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
+import tensorflow.keras.backend as K
+import h5py
+
+#gpus = tf.config.experimental.list_physical_devices('GPU')
+#if gpus:
+#  try:
+#    tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
+#  except RuntimeError as e:
+#    print(e)
+
+nb_epochs = args.epochs
+
+# Try reducing batch_size if you run out of memory on your GPU
+batch_size = args.batch_size
+
+quantize = args.quantize is not None
+retrain = args.retrain is not None
+
+if quantize:
+    lr = 0.00003
+    decay = 0
+    input_model = args.quantize
+else:
+    lr = 0.001
+    decay = 2.5e-5
+
+if args.lr is not None:
+    lr = args.lr
+
+if args.decay is not None:
+    decay = args.decay
+
+if retrain:
+    input_model = args.retrain
+
+
+opt = Adam(lr, decay=decay, beta_2=0.99)
+
+with strategy.scope():
+    model, encoder, decoder, _ = rdovae.new_rdovae_model(nb_used_features=20, nb_bits=80, batch_size=batch_size, cond_size=args.cond_size, nb_quant=16)
+    model.compile(optimizer=opt, loss=[rdovae.feat_dist_loss, rdovae.feat_dist_loss, rdovae.sq1_rate_loss, rdovae.sq2_rate_loss], loss_weights=[.5, .5, 1., .1], metrics={'hard_bits':rdovae.sq_rate_metric})
+    model.summary()
+
+lpc_order = 16
+
+feature_file = args.features
+nb_features = model.nb_used_features + lpc_order
+nb_used_features = model.nb_used_features
+sequence_size = args.seq_length
+
+# u for unquantised, load 16 bit PCM samples and convert to mu-law
+
+
+features = np.memmap(feature_file, dtype='float32', mode='r')
+nb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size
+features = features[:nb_sequences*sequence_size*nb_features]
+
+features = np.reshape(features, (nb_sequences, sequence_size, nb_features))
+print(features.shape)
+features = features[:, :, :nb_used_features]
+
+#lambda_val = np.repeat(np.random.uniform(.0007, .002, (features.shape[0], 1, 1)), features.shape[1]//2, axis=1)
+#quant_id = np.round(10*np.log(lambda_val/.0007)).astype('int16')
+#quant_id = quant_id[:,:,0]
+quant_id = np.repeat(np.random.randint(16, size=(features.shape[0], 1, 1), dtype='int16'), features.shape[1]//2, axis=1)
+lambda_val = .0002*np.exp(quant_id/3.8)
+quant_id = quant_id[:,:,0]
+
+# dump models to disk as we go
+checkpoint = ModelCheckpoint('{}_{}_{}.h5'.format(args.output, args.cond_size, '{epoch:02d}'))
+
+if args.retrain is not None:
+    model.load_weights(args.retrain)
+
+if quantize or retrain:
+    #Adapting from an existing model
+    model.load_weights(input_model)
+
+model.save_weights('{}_{}_initial.h5'.format(args.output, args.cond_size))
+
+callbacks = [checkpoint]
+#callbacks = []
+
+if args.logdir is not None:
+    logdir = '{}/{}_{}_logs'.format(args.logdir, args.output, args.cond_size)
+    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
+    callbacks.append(tensorboard_callback)
+
+model.fit([features, quant_id, lambda_val], [features, features, features, features], batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=callbacks)
--- /dev/null
+++ b/dnn/training_tf2/uniform_noise.py
@@ -1,0 +1,78 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains the UniformNoise layer."""
+
+
+import tensorflow.compat.v2 as tf
+
+from tensorflow.keras import backend
+
+from tensorflow.keras.layers import Layer
+
+class UniformNoise(Layer):
+    """Apply additive zero-centered uniform noise.
+
+    This is useful to mitigate overfitting
+    (you could see it as a form of random data augmentation).
+    Gaussian Noise (GS) is a natural choice as corruption process
+    for real valued inputs.
+
+    As it is a regularization layer, it is only active at training time.
+
+    Args:
+      stddev: Float, standard deviation of the noise distribution.
+      seed: Integer, optional random seed to enable deterministic behavior.
+
+    Call arguments:
+      inputs: Input tensor (of any rank).
+      training: Python boolean indicating whether the layer should behave in
+        training mode (adding noise) or in inference mode (doing nothing).
+
+    Input shape:
+      Arbitrary. Use the keyword argument `input_shape`
+      (tuple of integers, does not include the samples axis)
+      when using this layer as the first layer in a model.
+
+    Output shape:
+      Same shape as input.
+    """
+
+
+
+
+    def __init__(self, stddev=0.5, seed=None, **kwargs):
+        super().__init__(**kwargs)
+        self.supports_masking = True
+        self.stddev = stddev
+
+
+    def call(self, inputs, training=None):
+        def noised():
+            return inputs + backend.random_uniform(
+                shape=tf.shape(inputs),
+                minval=-self.stddev,
+                maxval=self.stddev,
+                dtype=inputs.dtype,
+            )
+
+        return backend.in_train_phase(noised, inputs, training=training)
+
+    def get_config(self):
+        config = {"stddev": self.stddev}
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
--- a/dnn/vec.h
+++ b/dnn/vec.h
@@ -37,7 +37,7 @@
 
 #if defined(__AVX__) || defined(__SSE2__)
 #include "vec_avx.h"
-#elif defined(__ARM_NEON__) || defined(__ARM_NEON)
+#elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && !defined(DISABLE_NEON)
 #include "vec_neon.h"
 #else
 
@@ -59,7 +59,7 @@
 
 /* No AVX2/FMA support */
 #ifndef LPCNET_TEST
-static inline float celt_exp2(float x)
+static inline float lpcnet_exp2(float x)
 {
    int integer;
    float frac;
@@ -77,7 +77,7 @@
    res.i = (res.i + (integer<<23)) & 0x7fffffff;
    return res.f;
 }
-#define celt_exp(x) celt_exp2((x)*1.44269504f)
+#define lpcnet_exp(x) lpcnet_exp2((x)*1.44269504f)
 
 static inline float tanh_approx(float x)
 {
@@ -107,7 +107,7 @@
 {
     int i;
     for (i=0;i<N;i++)
-        y[i] = celt_exp(x[i]);
+        y[i] = lpcnet_exp(x[i]);
 }
 
 static inline void vec_tanh(float *y, const float *x, int N)
--- a/dnn/vec_avx.h
+++ b/dnn/vec_avx.h
@@ -33,6 +33,7 @@
 #define VEC_AVX_H
 
 #include <immintrin.h>
+#include <math.h>
 
 /* Use 8-bit dot products unless disabled or if stuck with SSE2. */
 #if (defined(__AVX2__) || defined(__SSSE3__)) && !defined(DISABLE_DOT_PROD)
@@ -41,7 +42,11 @@
 
 #else
 
+#if defined(_MSC_VER)
+#pragma message ("Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 to get better performance")
+#else
 #warning "Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 using -march= to get better performance"
+#endif
 
 #endif
 
@@ -81,7 +86,7 @@
 #define _mm256_storeu_ps(dst, src) mm256_storeu_ps(dst, src)
 
 
-static inline mm256_emu mm256_setzero_ps() {
+static inline mm256_emu mm256_setzero_ps(void) {
   mm256_emu ret;
   ret.lo = _mm_setzero_ps();
   ret.hi = ret.lo;
@@ -297,7 +302,7 @@
    const __m256 K1 = _mm256_set1_ps(0.69583354f);
    const __m256 K2 = _mm256_set1_ps(0.22606716f);
    const __m256 K3 = _mm256_set1_ps(0.078024523f);
-   const __m256 log2_E = _mm256_set1_ps(1.44269504);
+   const __m256 log2_E = _mm256_set1_ps(1.44269504f);
    const __m256 max_in = _mm256_set1_ps(50.f);
    const __m256 min_in = _mm256_set1_ps(-50.f);
    __m256 XF, Y;
@@ -519,7 +524,7 @@
 
 #endif
 
-static inline float celt_exp(float x)
+static inline float lpcnet_exp(float x)
 {
    float out[8];
    __m256 X, Y;
@@ -540,7 +545,7 @@
         _mm256_storeu_ps(&y[i], Y);
     }
     for (;i<N;i++)
-        y[i] = celt_exp(x[i]);
+        y[i] = lpcnet_exp(x[i]);
 }
 
 #ifdef __AVX__
--- a/dnn/vec_neon.h
+++ b/dnn/vec_neon.h
@@ -33,7 +33,12 @@
 #ifndef DISABLE_DOT_PROD
 #define DOT_PROD
 #endif
+
+#ifdef DOT_PROD
 typedef signed char qweight;
+#else
+typedef float qweight;
+#endif
 
 
 #ifndef LPCNET_TEST
@@ -105,7 +110,7 @@
   return vmaxq_f32(min_out, vminq_f32(max_out, num));
 }
 
-static inline float celt_exp(float x)
+static inline float lpcnet_exp(float x)
 {
    float out[4];
    float32x4_t X, Y;
@@ -146,7 +151,7 @@
         vst1q_f32(&y[i], Y);
     }
     for (;i<N;i++)
-        y[i] = celt_exp(x[i]);
+        y[i] = lpcnet_exp(x[i]);
 }
 
 static inline void vec_tanh(float *y, const float *x, int N)
@@ -162,7 +167,7 @@
     for (;i<N;i++)
     {
         float ex2;
-        ex2 = celt_exp(2*x[i]);
+        ex2 = lpcnet_exp(2*x[i]);
         y[i] = (ex2-1)/(ex2+1);
     }
 }
@@ -180,7 +185,7 @@
     for (;i<N;i++)
     {
         float ex;
-        ex = celt_exp(x[i]);
+        ex = lpcnet_exp(x[i]);
         y[i] = (ex)/(ex+1);
     }
 }
--- /dev/null
+++ b/dnn/write_lpcnet_weights.c
@@ -1,0 +1,66 @@
+/* Copyright (c) 2023 Amazon */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include "nnet.h"
+
+extern const WeightArray lpcnet_arrays[];
+extern const WeightArray lpcnet_plc_arrays[];
+
+void write_weights(const WeightArray *list, FILE *fout)
+{
+  int i=0;
+  unsigned char zeros[WEIGHT_BLOCK_SIZE] = {0};
+  while (list[i].name != NULL) {
+    WeightHead h;
+    memcpy(h.head, "DNNw", 4);
+    h.version = WEIGHT_BLOB_VERSION;
+    h.type = list[i].type;
+    h.size = list[i].size;
+    h.block_size = (h.size+WEIGHT_BLOCK_SIZE-1)/WEIGHT_BLOCK_SIZE*WEIGHT_BLOCK_SIZE;
+    RNN_CLEAR(h.name, sizeof(h.name));
+    strncpy(h.name, list[i].name, sizeof(h.name));
+    h.name[sizeof(h.name)-1] = 0;
+    celt_assert(sizeof(h) == WEIGHT_BLOCK_SIZE);
+    fwrite(&h, 1, WEIGHT_BLOCK_SIZE, fout);
+    fwrite(list[i].data, 1, h.size, fout);
+    fwrite(zeros, 1, h.block_size-h.size, fout);
+    i++;
+  }
+}
+
+int main()
+{
+  FILE *fout = fopen("weights_blob.bin", "w");
+  write_weights(lpcnet_arrays, fout);
+  write_weights(lpcnet_plc_arrays, fout);
+  fclose(fout);
+  return 0;
+}
--