shithub: opus

Download patch

ref: 7388486c356f36c329113a42f186f45372a0db54
parent: 6b279094aa81706474f7e397b803183c9fd28f43
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Tue Mar 12 10:41:37 EDT 2019

Generating samples using quantized cepstrum LPC

--- a/dnn/dump_data.c
+++ b/dnn/dump_data.c
@@ -721,6 +721,11 @@
     interp_id = double_interp_search(st->features, vq_mem);
     perform_double_interp(st->features, vq_mem, interp_id);
   }
+  for (sub=0;sub<4;sub++) {
+    float g = lpc_from_cepstrum(st->lpc, st->features[sub]);
+    st->features[sub][2*NB_BANDS+2] = log10(g);
+    for (i=0;i<LPC_ORDER;i++) st->features[sub][2*NB_BANDS+3+i] = st->lpc[i];
+  }
   //printf("\n");
   RNN_COPY(vq_mem, &st->features[3][0], NB_BANDS);
   if (encode) {
@@ -820,16 +825,23 @@
   b[1] = .75*uni_rand();
 }
 
-void write_audio(DenoiseState *st, const short *pcm, float noise_std, FILE *file) {
+void compute_noise(int *noise, float noise_std) {
   int i;
+  for (i=0;i<FRAME_SIZE;i++) {
+    noise[i] = (int)floor(.5 + noise_std*.707*(log_approx((float)rand()/RAND_MAX)-log_approx((float)rand()/RAND_MAX)));
+  }
+}
+
+void write_audio(DenoiseState *st, const short *pcm, const int *noise, FILE *file) {
+  int i, k;
+  for (k=0;k<4;k++) {
   unsigned char data[4*FRAME_SIZE];
   for (i=0;i<FRAME_SIZE;i++) {
-    int noise;
     float p=0;
     float e;
     int j;
-    for (j=0;j<LPC_ORDER;j++) p -= st->lpc[j]*st->sig_mem[j];
-    e = lin2ulaw(pcm[i] - p);
+    for (j=0;j<LPC_ORDER;j++) p -= st->features[k][2*NB_BANDS+3+j]*st->sig_mem[j];
+    e = lin2ulaw(pcm[k*FRAME_SIZE+i] - p);
     /* Signal. */
     data[4*i] = lin2ulaw(st->sig_mem[0]);
     /* Prediction. */
@@ -839,8 +851,7 @@
     /* Excitation out. */
     data[4*i+3] = e;
     /* Simulate error on excitation. */
-    noise = (int)floor(.5 + noise_std*.707*(log_approx((float)rand()/RAND_MAX)-log_approx((float)rand()/RAND_MAX)));
-    e += noise;
+    e += noise[k*FRAME_SIZE+i];
     e = IMIN(255, IMAX(0, e));
     
     RNN_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1);
@@ -848,6 +859,7 @@
     st->exc_mem = e;
   }
   fwrite(data, 4*FRAME_SIZE, 1, file);
+  }
 }
 
 int main(int argc, char **argv) {
@@ -866,6 +878,8 @@
   FILE *ffeat;
   FILE *fpcm=NULL;
   short pcm[FRAME_SIZE]={0};
+  short pcmbuf[FRAME_SIZE*4]={0};
+  int noisebuf[FRAME_SIZE*4]={0};
   short tmp[FRAME_SIZE] = {0};
   float savedX[FRAME_SIZE] = {0};
   float speech_gain=1;
@@ -980,17 +994,21 @@
       x[i] *= g;
     }
     for (i=0;i<FRAME_SIZE;i++) x[i] += rand()/(float)RAND_MAX - .5;
+    /* PCM is delayed by 1/2 frame to make the features centered on the frames. */
+    for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]);
     compute_frame_features(st, x);
+
+    RNN_COPY(&pcmbuf[st->pcount*FRAME_SIZE], pcm, FRAME_SIZE);
+    if (fpcm) {
+        compute_noise(&noisebuf[st->pcount*FRAME_SIZE], noise_std);
+    }
     st->pcount++;
     /* Running on groups of 4 frames. */
     if (st->pcount == 4) {
       process_superframe(st, ffeat, encode, quantize);
+      if (fpcm) write_audio(st, pcmbuf, noisebuf, fpcm);
       st->pcount = 0;
     }
-
-    /* PCM is delayed by 1/2 frame to make the features centered on the frames. */
-    for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]);
-    if (fpcm) write_audio(st, pcm, noise_std, fpcm);
     //if (fpcm) fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm);
     for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]);
     old_speech_gain = speech_gain;
--