shithub: opus

Download patch

ref: e63292bd563d02e4f1f1c03558d22b0fe35d444f
parent: 8dcccc89348f1f7916338a4b2c36574fc1e90b27
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Sun Mar 17 09:24:58 EDT 2019

Split off decoder code

--- a/dnn/Makefile
+++ b/dnn/Makefile
@@ -22,7 +22,7 @@
 
 all: dump_data test_lpcnet test_vec
 
-dump_data_objs := src/dump_data.o src/freq.o src/kiss_fft.o src/pitch.o src/celt_lpc.o src/lpcnet_enc.o src/ceps_codebooks.o
+dump_data_objs := src/dump_data.o src/freq.o src/kiss_fft.o src/pitch.o src/celt_lpc.o src/lpcnet_dec.o src/lpcnet_enc.o src/ceps_codebooks.o
 dump_data_deps := $(dump_data_objs:.o=.d)
 dump_data: $(dump_data_objs)
 	gcc -o $@ $(CFLAGS) $(dump_data_objs) -lm
--- a/dnn/dump_data.c
+++ b/dnn/dump_data.c
@@ -42,119 +42,7 @@
 #include "lpcnet.h"
 #include "lpcnet_private.h"
 
-typedef struct {
-    int byte_pos;
-    int bit_pos;
-    int max_bytes;
-    const unsigned char *chars;
-} unpacker;
 
-void bits_unpacker_init(unpacker *bits, unsigned char *buf, int size) {
-  bits->byte_pos = 0;
-  bits->bit_pos = 0;
-  bits->max_bytes = size;
-  bits->chars = buf;
-}
-
-unsigned int bits_unpack(unpacker *bits, int nb_bits) {
-  unsigned int d=0;
-  while(nb_bits)
-  {
-    if (bits->byte_pos == bits->max_bytes) {
-      fprintf(stderr, "something went horribly wrong\n");
-      return 0;
-    }
-    d<<=1;
-    d |= (bits->chars[bits->byte_pos]>>(BITS_PER_CHAR-1 - bits->bit_pos))&1;
-    bits->bit_pos++;
-    if (bits->bit_pos==BITS_PER_CHAR)
-    {
-      bits->bit_pos=0;
-      bits->byte_pos++;
-    }
-    nb_bits--;
-  }
-  return d;
-}
-
-void decode_packet(FILE *ffeat, float *vq_mem, unsigned char buf[8])
-{
-  int c0_id;
-  int main_pitch;
-  int modulation;
-  int corr_id;
-  int vq_end[3];
-  int vq_mid;
-  int interp_id;
-  
-  int i;
-  int sub;
-  int voiced = 1;
-  float frame_corr;
-  float features[4][NB_TOTAL_FEATURES];
-  unpacker bits;
-  
-  bits_unpacker_init(&bits, buf, 8);
-  c0_id = bits_unpack(&bits, 7);
-  main_pitch = bits_unpack(&bits, 6);
-  modulation = bits_unpack(&bits, 3);
-  corr_id = bits_unpack(&bits, 2);
-  vq_end[0] = bits_unpack(&bits, 10);
-  vq_end[1] = bits_unpack(&bits, 10);
-  vq_end[2] = bits_unpack(&bits, 10);
-  vq_mid = bits_unpack(&bits, 13);
-  interp_id = bits_unpack(&bits, 3);
-  //fprintf(stdout, "%d %d %d %d %d %d %d %d %d\n", c0_id, main_pitch, modulation, corr_id, vq_end[0], vq_end[1], vq_end[2], vq_mid, interp_id);
-
-  
-  for (i=0;i<4;i++) RNN_CLEAR(&features[i][0], NB_TOTAL_FEATURES);
-
-  modulation -= 4;
-  if (modulation==-4) {
-    voiced = 0;
-    modulation = 0;
-  }
-  if (voiced) {
-    frame_corr = 0.3875f + .175f*corr_id;
-  } else {
-    frame_corr = 0.0375f + .075f*corr_id;
-  }
-  for (sub=0;sub<4;sub++) {
-    float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
-    p *= 1 + modulation/16./7.*(2*sub-3);
-    features[sub][2*NB_BANDS] = .02*(p-100);
-    features[sub][2*NB_BANDS + 1] = frame_corr-.5;
-  }
-  
-  features[3][0] = (c0_id-64)/4.;
-  for (i=0;i<NB_BANDS_1;i++) {
-    features[3][i+1] = ceps_codebook1[vq_end[0]*NB_BANDS_1 + i] + ceps_codebook2[vq_end[1]*NB_BANDS_1 + i] + ceps_codebook3[vq_end[2]*NB_BANDS_1 + i];
-  }
-
-  float sign = 1;
-  if (vq_mid >= 4096) {
-    vq_mid -= 4096;
-    sign = -1;
-  }
-  for (i=0;i<NB_BANDS;i++) {
-    features[1][i] = sign*ceps_codebook_diff4[vq_mid*NB_BANDS + i];
-  }
-  if ((vq_mid&MULTI_MASK) < 2) {
-    for (i=0;i<NB_BANDS;i++) features[1][i] += .5*(vq_mem[i] + features[3][i]);
-  } else if ((vq_mid&MULTI_MASK) == 2) {
-    for (i=0;i<NB_BANDS;i++) features[1][i] += vq_mem[i];
-  } else {
-    for (i=0;i<NB_BANDS;i++) features[1][i] += features[3][i];
-  }
-  
-  perform_double_interp(features, vq_mem, interp_id);
-
-  RNN_COPY(vq_mem, &features[3][0], NB_BANDS);
-  for (i=0;i<4;i++) {
-    fwrite(features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat);
-  }
-}
-
 static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
   int i;
   for (i=0;i<N;i++) {
@@ -293,11 +181,15 @@
     while (1) {
       int ret;
       unsigned char buf[8];
+      float features[4][NB_TOTAL_FEATURES];
       //int c0_id, main_pitch, modulation, corr_id, vq_end[3], vq_mid, interp_id;
       //ret = fscanf(f1, "%d %d %d %d %d %d %d %d %d\n", &c0_id, &main_pitch, &modulation, &corr_id, &vq_end[0], &vq_end[1], &vq_end[2], &vq_mid, &interp_id);
       ret = fread(buf, 1, 8, f1);
       if (ret != 8) break;
-      decode_packet(ffeat, vq_mem, buf);
+      decode_packet(features, vq_mem, buf);
+      for (i=0;i<4;i++) {
+        fwrite(features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat);
+      }
     }
     return 0;
   }
--- /dev/null
+++ b/dnn/lpcnet_dec.c
@@ -1,0 +1,154 @@
+/* Copyright (c) 2017-2019 Mozilla */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "kiss_fft.h"
+#include "common.h"
+#include <math.h>
+#include "freq.h"
+#include "pitch.h"
+#include "arch.h"
+#include "celt_lpc.h"
+#include <assert.h>
+#include "lpcnet_private.h"
+#include "lpcnet.h"
+
+
+typedef struct {
+    int byte_pos;
+    int bit_pos;
+    int max_bytes;
+    const unsigned char *chars;
+} unpacker;
+
+void bits_unpacker_init(unpacker *bits, unsigned char *buf, int size) {
+  bits->byte_pos = 0;
+  bits->bit_pos = 0;
+  bits->max_bytes = size;
+  bits->chars = buf;
+}
+
+unsigned int bits_unpack(unpacker *bits, int nb_bits) {
+  unsigned int d=0;
+  while(nb_bits)
+  {
+    if (bits->byte_pos == bits->max_bytes) {
+      fprintf(stderr, "something went horribly wrong\n");
+      return 0;
+    }
+    d<<=1;
+    d |= (bits->chars[bits->byte_pos]>>(BITS_PER_CHAR-1 - bits->bit_pos))&1;
+    bits->bit_pos++;
+    if (bits->bit_pos==BITS_PER_CHAR)
+    {
+      bits->bit_pos=0;
+      bits->byte_pos++;
+    }
+    nb_bits--;
+  }
+  return d;
+}
+
+void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, unsigned char buf[8])
+{
+  int c0_id;
+  int main_pitch;
+  int modulation;
+  int corr_id;
+  int vq_end[3];
+  int vq_mid;
+  int interp_id;
+  
+  int i;
+  int sub;
+  int voiced = 1;
+  float frame_corr;
+  ;
+  unpacker bits;
+  
+  bits_unpacker_init(&bits, buf, 8);
+  c0_id = bits_unpack(&bits, 7);
+  main_pitch = bits_unpack(&bits, 6);
+  modulation = bits_unpack(&bits, 3);
+  corr_id = bits_unpack(&bits, 2);
+  vq_end[0] = bits_unpack(&bits, 10);
+  vq_end[1] = bits_unpack(&bits, 10);
+  vq_end[2] = bits_unpack(&bits, 10);
+  vq_mid = bits_unpack(&bits, 13);
+  interp_id = bits_unpack(&bits, 3);
+  //fprintf(stdout, "%d %d %d %d %d %d %d %d %d\n", c0_id, main_pitch, modulation, corr_id, vq_end[0], vq_end[1], vq_end[2], vq_mid, interp_id);
+
+  
+  for (i=0;i<4;i++) RNN_CLEAR(&features[i][0], NB_TOTAL_FEATURES);
+
+  modulation -= 4;
+  if (modulation==-4) {
+    voiced = 0;
+    modulation = 0;
+  }
+  if (voiced) {
+    frame_corr = 0.3875f + .175f*corr_id;
+  } else {
+    frame_corr = 0.0375f + .075f*corr_id;
+  }
+  for (sub=0;sub<4;sub++) {
+    float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
+    p *= 1 + modulation/16./7.*(2*sub-3);
+    features[sub][2*NB_BANDS] = .02*(p-100);
+    features[sub][2*NB_BANDS + 1] = frame_corr-.5;
+  }
+  
+  features[3][0] = (c0_id-64)/4.;
+  for (i=0;i<NB_BANDS_1;i++) {
+    features[3][i+1] = ceps_codebook1[vq_end[0]*NB_BANDS_1 + i] + ceps_codebook2[vq_end[1]*NB_BANDS_1 + i] + ceps_codebook3[vq_end[2]*NB_BANDS_1 + i];
+  }
+
+  float sign = 1;
+  if (vq_mid >= 4096) {
+    vq_mid -= 4096;
+    sign = -1;
+  }
+  for (i=0;i<NB_BANDS;i++) {
+    features[1][i] = sign*ceps_codebook_diff4[vq_mid*NB_BANDS + i];
+  }
+  if ((vq_mid&MULTI_MASK) < 2) {
+    for (i=0;i<NB_BANDS;i++) features[1][i] += .5*(vq_mem[i] + features[3][i]);
+  } else if ((vq_mid&MULTI_MASK) == 2) {
+    for (i=0;i<NB_BANDS;i++) features[1][i] += vq_mem[i];
+  } else {
+    for (i=0;i<NB_BANDS;i++) features[1][i] += features[3][i];
+  }
+  
+  perform_double_interp(features, vq_mem, interp_id);
+
+  RNN_COPY(vq_mem, &features[3][0], NB_BANDS);
+}
--- a/dnn/lpcnet_enc.c
+++ b/dnn/lpcnet_enc.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2017-2018 Mozilla */
+/* Copyright (c) 2017-2019 Mozilla */
 /*
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions
@@ -43,8 +43,6 @@
 #include "lpcnet.h"
 
 
-#define NB_DELTA_CEPS 6
-
 //#define NB_FEATURES (2*NB_BANDS+3+LPC_ORDER)
 
 
@@ -317,8 +315,6 @@
     
     return id;
 }
-
-#define FORBIDDEN_INTERP 7
 
 int interp_search(const float *x, const float *left, const float *right, float *dist_out)
 {
--- a/dnn/lpcnet_private.h
+++ b/dnn/lpcnet_private.h
@@ -16,6 +16,9 @@
 #define MULTI 4
 #define MULTI_MASK (MULTI-1)
 
+#define FORBIDDEN_INTERP 7
+
+
 struct LPCNetEncState{
   float analysis_mem[OVERLAP_SIZE];
   float mem_preemph;
@@ -51,5 +54,6 @@
 
 void compute_frame_features(LPCNetEncState *st, const float *in);
 
+void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, unsigned char buf[8]);
 
 #endif
--- a/dnn/train_lpcnet.py
+++ b/dnn/train_lpcnet.py
@@ -103,8 +103,8 @@
 del in_exc
 
 # dump models to disk as we go
-checkpoint = ModelCheckpoint('lpcnet24fq_384_10_G16_{epoch:02d}.h5')
+checkpoint = ModelCheckpoint('lpcnet24g_384_10_G16_{epoch:02d}.h5')
 
-model.load_weights('lpcnet24f_384_10_G16_31.h5')
-model.compile(optimizer=Adam(0.0005, amsgrad=True, decay=5e-5), loss='sparse_categorical_crossentropy')
+model.load_weights('lpcnet24c_384_10_G16_120.h5')
+model.compile(optimizer=Adam(0.0001, amsgrad=True), loss='sparse_categorical_crossentropy')
 model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, lpcnet.Sparsify(0, 0, 1, (0.05, 0.05, 0.2))])
--