shithub: opus

Download patch

ref: 4e331f377f478f3a6da1ac8923946d49534fd2c3
parent: fb1d4fdec28d79abd06dfb79bacaa3fd57c5a109
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Tue Oct 16 16:23:20 EDT 2018

LPC generation from the cepstral coefficients

--- a/dnn/denoise.c
+++ b/dnn/denoise.c
@@ -325,12 +325,29 @@
   RNN_COPY(st->analysis_mem, in, FRAME_SIZE);
   RNN_COPY(x0, x, WINDOW_SIZE);
   apply_window(x);
+  forward_transform(X, x);
+#if TRAINING
+  for (i=lowpass;i<FREQ_SIZE;i++)
+    X[i].r = X[i].i = 0;
+#endif
+  compute_band_energy(Ex, X);
   {
     float e;
     float g_1;
-    _celt_autocorr(x, ac, NULL, 0, LPC_ORDER, WINDOW_SIZE);
+    if (0) {
+      _celt_autocorr(x, ac, NULL, 0, LPC_ORDER, WINDOW_SIZE);
+    } else {
+      float Xr[FREQ_SIZE];
+      kiss_fft_cpx X_auto[FREQ_SIZE];
+      float x_auto[FRAME_SIZE];
+      interp_band_gain(Xr, Ex);
+      RNN_CLEAR(X_auto, FREQ_SIZE);
+      for (i=0;i<160;i++) X_auto[i].r = Xr[i];
+      inverse_transform(x_auto, X_auto);
+      for (i=0;i<LPC_ORDER+1;i++) ac[i] = x_auto[i];
+    }
     /* -40 dB noise floor. */
-    ac[0] += ac[0]*1e-4 + 320/12;
+    ac[0] += ac[0]*1e-4 + 320/12/38.;
     /* Lag windowing. */
     for (i=1;i<LPC_ORDER+1;i++) ac[i] *= (1 - 6e-5*i*i);
     e = _celt_lpc(lpc, rc, ac, LPC_ORDER);
@@ -341,7 +358,7 @@
     printf("\n");
 #endif
 #if 0
-    printf("%f 1 ", e);
+    printf("1 ");
     for(i=0;i<LPC_ORDER;i++) printf("%f ", lpc[i]);
     printf("\n");
 #endif
@@ -363,12 +380,6 @@
 #endif
     }
   }
-  forward_transform(X, x);
-#if TRAINING
-  for (i=lowpass;i<FREQ_SIZE;i++)
-    X[i].r = X[i].i = 0;
-#endif
-  compute_band_energy(Ex, X);
   return g;
 }
 
--- a/dnn/lpcnet.py
+++ b/dnn/lpcnet.py
@@ -11,8 +11,8 @@
 import h5py
 import sys
 
-rnn_units1=512
-rnn_units2=32
+rnn_units1=384
+rnn_units2=16
 pcm_bits = 8
 embed_size = 128
 pcm_levels = 2**pcm_bits
--- a/dnn/test_wavenet_audio.py
+++ b/dnn/test_wavenet_audio.py
@@ -42,7 +42,7 @@
 
 
 
-model.load_weights('wavenet5p0_30.h5')
+model.load_weights('wavenet5_384_10_G16np_50.h5')
 
 order = 16
 
--- a/dnn/train_wavenet_audio.py
+++ b/dnn/train_wavenet_audio.py
@@ -120,8 +120,8 @@
 in_data = np.concatenate([in_data, pred], axis=-1)
 
 # dump models to disk as we go
-checkpoint = ModelCheckpoint('lpcnet5_512_10_G32np_{epoch:02d}.h5')
+checkpoint = ModelCheckpoint('lpcnet9_384_10_G16_{epoch:02d}.h5')
 
 #model.load_weights('wavenet4f2_30.h5')
-model.compile(optimizer=Adam(0.0005, amsgrad=True, decay=5e-5), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
-model.fit([in_data, in_exc, features, periods], out_data, batch_size=batch_size, epochs=60, validation_split=0.2, callbacks=[checkpoint, lpcnet.Sparsify(1000, 20000, 200, 0.1)])
+model.compile(optimizer=Adam(0.001, amsgrad=True, decay=5e-5), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
+model.fit([in_data, in_exc, features, periods], out_data, batch_size=batch_size, epochs=120, validation_split=0.0, callbacks=[checkpoint, lpcnet.Sparsify(2000, 40000, 400, 0.1)])
--