ref: e63292bd563d02e4f1f1c03558d22b0fe35d444f
parent: 8dcccc89348f1f7916338a4b2c36574fc1e90b27
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Sun Mar 17 09:24:58 EDT 2019
Split off decoder code
--- a/dnn/Makefile
+++ b/dnn/Makefile
@@ -22,7 +22,7 @@
all: dump_data test_lpcnet test_vec
-dump_data_objs := src/dump_data.o src/freq.o src/kiss_fft.o src/pitch.o src/celt_lpc.o src/lpcnet_enc.o src/ceps_codebooks.o
+dump_data_objs := src/dump_data.o src/freq.o src/kiss_fft.o src/pitch.o src/celt_lpc.o src/lpcnet_dec.o src/lpcnet_enc.o src/ceps_codebooks.o
dump_data_deps := $(dump_data_objs:.o=.d)
dump_data: $(dump_data_objs)
gcc -o $@ $(CFLAGS) $(dump_data_objs) -lm
--- a/dnn/dump_data.c
+++ b/dnn/dump_data.c
@@ -42,119 +42,7 @@
#include "lpcnet.h"
#include "lpcnet_private.h"
-typedef struct {- int byte_pos;
- int bit_pos;
- int max_bytes;
- const unsigned char *chars;
-} unpacker;
-void bits_unpacker_init(unpacker *bits, unsigned char *buf, int size) {- bits->byte_pos = 0;
- bits->bit_pos = 0;
- bits->max_bytes = size;
- bits->chars = buf;
-}
-
-unsigned int bits_unpack(unpacker *bits, int nb_bits) {- unsigned int d=0;
- while(nb_bits)
- {- if (bits->byte_pos == bits->max_bytes) {- fprintf(stderr, "something went horribly wrong\n");
- return 0;
- }
- d<<=1;
- d |= (bits->chars[bits->byte_pos]>>(BITS_PER_CHAR-1 - bits->bit_pos))&1;
- bits->bit_pos++;
- if (bits->bit_pos==BITS_PER_CHAR)
- {- bits->bit_pos=0;
- bits->byte_pos++;
- }
- nb_bits--;
- }
- return d;
-}
-
-void decode_packet(FILE *ffeat, float *vq_mem, unsigned char buf[8])
-{- int c0_id;
- int main_pitch;
- int modulation;
- int corr_id;
- int vq_end[3];
- int vq_mid;
- int interp_id;
-
- int i;
- int sub;
- int voiced = 1;
- float frame_corr;
- float features[4][NB_TOTAL_FEATURES];
- unpacker bits;
-
- bits_unpacker_init(&bits, buf, 8);
- c0_id = bits_unpack(&bits, 7);
- main_pitch = bits_unpack(&bits, 6);
- modulation = bits_unpack(&bits, 3);
- corr_id = bits_unpack(&bits, 2);
- vq_end[0] = bits_unpack(&bits, 10);
- vq_end[1] = bits_unpack(&bits, 10);
- vq_end[2] = bits_unpack(&bits, 10);
- vq_mid = bits_unpack(&bits, 13);
- interp_id = bits_unpack(&bits, 3);
- //fprintf(stdout, "%d %d %d %d %d %d %d %d %d\n", c0_id, main_pitch, modulation, corr_id, vq_end[0], vq_end[1], vq_end[2], vq_mid, interp_id);
-
-
- for (i=0;i<4;i++) RNN_CLEAR(&features[i][0], NB_TOTAL_FEATURES);
-
- modulation -= 4;
- if (modulation==-4) {- voiced = 0;
- modulation = 0;
- }
- if (voiced) {- frame_corr = 0.3875f + .175f*corr_id;
- } else {- frame_corr = 0.0375f + .075f*corr_id;
- }
- for (sub=0;sub<4;sub++) {- float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
- p *= 1 + modulation/16./7.*(2*sub-3);
- features[sub][2*NB_BANDS] = .02*(p-100);
- features[sub][2*NB_BANDS + 1] = frame_corr-.5;
- }
-
- features[3][0] = (c0_id-64)/4.;
- for (i=0;i<NB_BANDS_1;i++) {- features[3][i+1] = ceps_codebook1[vq_end[0]*NB_BANDS_1 + i] + ceps_codebook2[vq_end[1]*NB_BANDS_1 + i] + ceps_codebook3[vq_end[2]*NB_BANDS_1 + i];
- }
-
- float sign = 1;
- if (vq_mid >= 4096) {- vq_mid -= 4096;
- sign = -1;
- }
- for (i=0;i<NB_BANDS;i++) {- features[1][i] = sign*ceps_codebook_diff4[vq_mid*NB_BANDS + i];
- }
- if ((vq_mid&MULTI_MASK) < 2) {- for (i=0;i<NB_BANDS;i++) features[1][i] += .5*(vq_mem[i] + features[3][i]);
- } else if ((vq_mid&MULTI_MASK) == 2) {- for (i=0;i<NB_BANDS;i++) features[1][i] += vq_mem[i];
- } else {- for (i=0;i<NB_BANDS;i++) features[1][i] += features[3][i];
- }
-
- perform_double_interp(features, vq_mem, interp_id);
-
- RNN_COPY(vq_mem, &features[3][0], NB_BANDS);
- for (i=0;i<4;i++) {- fwrite(features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat);
- }
-}
-
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {int i;
for (i=0;i<N;i++) {@@ -293,11 +181,15 @@
while (1) {int ret;
unsigned char buf[8];
+ float features[4][NB_TOTAL_FEATURES];
//int c0_id, main_pitch, modulation, corr_id, vq_end[3], vq_mid, interp_id;
//ret = fscanf(f1, "%d %d %d %d %d %d %d %d %d\n", &c0_id, &main_pitch, &modulation, &corr_id, &vq_end[0], &vq_end[1], &vq_end[2], &vq_mid, &interp_id);
ret = fread(buf, 1, 8, f1);
if (ret != 8) break;
- decode_packet(ffeat, vq_mem, buf);
+ decode_packet(features, vq_mem, buf);
+ for (i=0;i<4;i++) {+ fwrite(features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat);
+ }
}
return 0;
}
--- /dev/null
+++ b/dnn/lpcnet_dec.c
@@ -1,0 +1,154 @@
+/* Copyright (c) 2017-2019 Mozilla */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "kiss_fft.h"
+#include "common.h"
+#include <math.h>
+#include "freq.h"
+#include "pitch.h"
+#include "arch.h"
+#include "celt_lpc.h"
+#include <assert.h>
+#include "lpcnet_private.h"
+#include "lpcnet.h"
+
+
+typedef struct {+ int byte_pos;
+ int bit_pos;
+ int max_bytes;
+ const unsigned char *chars;
+} unpacker;
+
+void bits_unpacker_init(unpacker *bits, unsigned char *buf, int size) {+ bits->byte_pos = 0;
+ bits->bit_pos = 0;
+ bits->max_bytes = size;
+ bits->chars = buf;
+}
+
+unsigned int bits_unpack(unpacker *bits, int nb_bits) {+ unsigned int d=0;
+ while(nb_bits)
+ {+ if (bits->byte_pos == bits->max_bytes) {+ fprintf(stderr, "something went horribly wrong\n");
+ return 0;
+ }
+ d<<=1;
+ d |= (bits->chars[bits->byte_pos]>>(BITS_PER_CHAR-1 - bits->bit_pos))&1;
+ bits->bit_pos++;
+ if (bits->bit_pos==BITS_PER_CHAR)
+ {+ bits->bit_pos=0;
+ bits->byte_pos++;
+ }
+ nb_bits--;
+ }
+ return d;
+}
+
+void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, unsigned char buf[8])
+{+ int c0_id;
+ int main_pitch;
+ int modulation;
+ int corr_id;
+ int vq_end[3];
+ int vq_mid;
+ int interp_id;
+
+ int i;
+ int sub;
+ int voiced = 1;
+ float frame_corr;
+ ;
+ unpacker bits;
+
+ bits_unpacker_init(&bits, buf, 8);
+ c0_id = bits_unpack(&bits, 7);
+ main_pitch = bits_unpack(&bits, 6);
+ modulation = bits_unpack(&bits, 3);
+ corr_id = bits_unpack(&bits, 2);
+ vq_end[0] = bits_unpack(&bits, 10);
+ vq_end[1] = bits_unpack(&bits, 10);
+ vq_end[2] = bits_unpack(&bits, 10);
+ vq_mid = bits_unpack(&bits, 13);
+ interp_id = bits_unpack(&bits, 3);
+ //fprintf(stdout, "%d %d %d %d %d %d %d %d %d\n", c0_id, main_pitch, modulation, corr_id, vq_end[0], vq_end[1], vq_end[2], vq_mid, interp_id);
+
+
+ for (i=0;i<4;i++) RNN_CLEAR(&features[i][0], NB_TOTAL_FEATURES);
+
+ modulation -= 4;
+ if (modulation==-4) {+ voiced = 0;
+ modulation = 0;
+ }
+ if (voiced) {+ frame_corr = 0.3875f + .175f*corr_id;
+ } else {+ frame_corr = 0.0375f + .075f*corr_id;
+ }
+ for (sub=0;sub<4;sub++) {+ float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
+ p *= 1 + modulation/16./7.*(2*sub-3);
+ features[sub][2*NB_BANDS] = .02*(p-100);
+ features[sub][2*NB_BANDS + 1] = frame_corr-.5;
+ }
+
+ features[3][0] = (c0_id-64)/4.;
+ for (i=0;i<NB_BANDS_1;i++) {+ features[3][i+1] = ceps_codebook1[vq_end[0]*NB_BANDS_1 + i] + ceps_codebook2[vq_end[1]*NB_BANDS_1 + i] + ceps_codebook3[vq_end[2]*NB_BANDS_1 + i];
+ }
+
+ float sign = 1;
+ if (vq_mid >= 4096) {+ vq_mid -= 4096;
+ sign = -1;
+ }
+ for (i=0;i<NB_BANDS;i++) {+ features[1][i] = sign*ceps_codebook_diff4[vq_mid*NB_BANDS + i];
+ }
+ if ((vq_mid&MULTI_MASK) < 2) {+ for (i=0;i<NB_BANDS;i++) features[1][i] += .5*(vq_mem[i] + features[3][i]);
+ } else if ((vq_mid&MULTI_MASK) == 2) {+ for (i=0;i<NB_BANDS;i++) features[1][i] += vq_mem[i];
+ } else {+ for (i=0;i<NB_BANDS;i++) features[1][i] += features[3][i];
+ }
+
+ perform_double_interp(features, vq_mem, interp_id);
+
+ RNN_COPY(vq_mem, &features[3][0], NB_BANDS);
+}
--- a/dnn/lpcnet_enc.c
+++ b/dnn/lpcnet_enc.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2017-2018 Mozilla */
+/* Copyright (c) 2017-2019 Mozilla */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -43,8 +43,6 @@
#include "lpcnet.h"
-#define NB_DELTA_CEPS 6
-
//#define NB_FEATURES (2*NB_BANDS+3+LPC_ORDER)
@@ -317,8 +315,6 @@
return id;
}
-
-#define FORBIDDEN_INTERP 7
int interp_search(const float *x, const float *left, const float *right, float *dist_out)
{--- a/dnn/lpcnet_private.h
+++ b/dnn/lpcnet_private.h
@@ -16,6 +16,9 @@
#define MULTI 4
#define MULTI_MASK (MULTI-1)
+#define FORBIDDEN_INTERP 7
+
+
struct LPCNetEncState{float analysis_mem[OVERLAP_SIZE];
float mem_preemph;
@@ -51,5 +54,6 @@
void compute_frame_features(LPCNetEncState *st, const float *in);
+void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, unsigned char buf[8]);
#endif
--- a/dnn/train_lpcnet.py
+++ b/dnn/train_lpcnet.py
@@ -103,8 +103,8 @@
del in_exc
# dump models to disk as we go
-checkpoint = ModelCheckpoint('lpcnet24fq_384_10_G16_{epoch:02d}.h5')+checkpoint = ModelCheckpoint('lpcnet24g_384_10_G16_{epoch:02d}.h5')-model.load_weights('lpcnet24f_384_10_G16_31.h5')-model.compile(optimizer=Adam(0.0005, amsgrad=True, decay=5e-5), loss='sparse_categorical_crossentropy')
+model.load_weights('lpcnet24c_384_10_G16_120.h5')+model.compile(optimizer=Adam(0.0001, amsgrad=True), loss='sparse_categorical_crossentropy')
model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, lpcnet.Sparsify(0, 0, 1, (0.05, 0.05, 0.2))])
--
⑨