shithub: opus

Download patch

ref: aa970f2a60ef1987cb53b764043b4895d1a6070b
parent: 54710acfe787aad78bcfa4d5150288b7fe027653
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Mon Jun 25 09:56:28 EDT 2018

Computing features

--- a/dnn/denoise.c
+++ b/dnn/denoise.c
@@ -64,7 +64,7 @@
 #define CEPS_MEM 8
 #define NB_DELTA_CEPS 6
 
-#define NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2)
+#define NB_FEATURES (2*NB_BANDS+2+LPC_ORDER)
 
 
 #ifndef TRAINING
@@ -305,12 +305,11 @@
 int band_lp = NB_BANDS;
 #endif
 
-static void frame_analysis(DenoiseState *st, kiss_fft_cpx *X, float *Ex, const float *in) {
+static void frame_analysis(DenoiseState *st, float *lpc, kiss_fft_cpx *X, float *Ex, const float *in) {
   int i;
   float x[WINDOW_SIZE];
   float x0[WINDOW_SIZE];
   float ac[LPC_ORDER+1];
-  float lpc[LPC_ORDER];
   float rc[LPC_ORDER];
   RNN_COPY(x, st->analysis_mem, FRAME_SIZE);
   for (i=0;i<FRAME_SIZE;i++) x[FRAME_SIZE + i] = in[i];
@@ -360,17 +359,15 @@
                                   float *Ex, float *Ep, float *Exp, float *features, const float *in) {
   int i;
   float E = 0;
-  float *ceps_0, *ceps_1, *ceps_2;
-  float spec_variability = 0;
   float Ly[NB_BANDS];
+  float lpc[LPC_ORDER];
   float p[WINDOW_SIZE];
   float pitch_buf[PITCH_BUF_SIZE];
   int pitch_index;
   float gain;
-  float *(pre[1]);
   float tmp[NB_BANDS];
   float follow, logMax;
-  frame_analysis(st, X, Ex, in);
+  frame_analysis(st, lpc, X, Ex, in);
   RNN_MOVE(st->pitch_buf, &st->pitch_buf[FRAME_SIZE], PITCH_BUF_SIZE-FRAME_SIZE);
   RNN_COPY(&st->pitch_buf[PITCH_BUF_SIZE-FRAME_SIZE], in, FRAME_SIZE);
   //pre[0] = &st->pitch_buf[0];
@@ -378,14 +375,14 @@
   pitch_downsample(pitch_buf, PITCH_BUF_SIZE);
   pitch_search(pitch_buf+PITCH_MAX_PERIOD, pitch_buf, PITCH_FRAME_SIZE<<1,
                (PITCH_MAX_PERIOD-3*PITCH_MIN_PERIOD)<<1, &pitch_index);
-  printf("%d ", pitch_index);
+  //printf("%d ", pitch_index);
   pitch_index = 2*PITCH_MAX_PERIOD-pitch_index;
-  printf("%d ", pitch_index);
+  //printf("%d ", pitch_index);
   gain = remove_doubling(pitch_buf, 2*PITCH_MAX_PERIOD, 2*PITCH_MIN_PERIOD,
           2*PITCH_FRAME_SIZE, &pitch_index, st->last_period, st->last_gain);
   st->last_period = pitch_index;
   st->last_gain = gain;
-  printf("%d %f\n", pitch_index, gain);
+  //printf("%d %f\n", pitch_index, gain);
   for (i=0;i<WINDOW_SIZE;i++)
     p[i] = st->pitch_buf[PITCH_BUF_SIZE-WINDOW_SIZE-pitch_index/2+i];
   apply_window(p);
@@ -393,60 +390,36 @@
   compute_band_energy(Ep, P);
   compute_band_corr(Exp, X, P);
   for (i=0;i<NB_BANDS;i++) Exp[i] = Exp[i]/sqrt(.001+Ex[i]*Ep[i]);
+#if 0
+  for (i=0;i<NB_BANDS;i++) printf("%f ", Exp[i]);
+  printf("\n");
+#endif
   dct(tmp, Exp);
-  for (i=0;i<NB_DELTA_CEPS;i++) features[NB_BANDS+2*NB_DELTA_CEPS+i] = tmp[i];
-  features[NB_BANDS+2*NB_DELTA_CEPS] -= 1.3;
-  features[NB_BANDS+2*NB_DELTA_CEPS+1] -= 0.9;
-  features[NB_BANDS+3*NB_DELTA_CEPS] = .01*(pitch_index-300);
+  for (i=0;i<NB_BANDS;i++) features[NB_BANDS+i] = tmp[i];
+  features[NB_BANDS] -= 1.3;
+  features[NB_BANDS+1] -= 0.9;
   logMax = -2;
   follow = -2;
   for (i=0;i<NB_BANDS;i++) {
     Ly[i] = log10(1e-2+Ex[i]);
-    Ly[i] = MAX16(logMax-7, MAX16(follow-1.5, Ly[i]));
+    Ly[i] = MAX16(logMax-8, MAX16(follow-2.5, Ly[i]));
     logMax = MAX16(logMax, Ly[i]);
-    follow = MAX16(follow-1.5, Ly[i]);
+    follow = MAX16(follow-2.5, Ly[i]);
     E += Ex[i];
   }
-  if (!TRAINING && E < 0.04) {
-    /* If there's no audio, avoid messing up the state. */
-    RNN_CLEAR(features, NB_FEATURES);
-    return 1;
-  }
   dct(features, Ly);
-  features[0] -= 12;
-  features[1] -= 4;
-  ceps_0 = st->cepstral_mem[st->memid];
-  ceps_1 = (st->memid < 1) ? st->cepstral_mem[CEPS_MEM+st->memid-1] : st->cepstral_mem[st->memid-1];
-  ceps_2 = (st->memid < 2) ? st->cepstral_mem[CEPS_MEM+st->memid-2] : st->cepstral_mem[st->memid-2];
-  for (i=0;i<NB_BANDS;i++) ceps_0[i] = features[i];
-  st->memid++;
-  for (i=0;i<NB_DELTA_CEPS;i++) {
-    features[i] = ceps_0[i] + ceps_1[i] + ceps_2[i];
-    features[NB_BANDS+i] = ceps_0[i] - ceps_2[i];
-    features[NB_BANDS+NB_DELTA_CEPS+i] =  ceps_0[i] - 2*ceps_1[i] + ceps_2[i];
-  }
-  /* Spectral variability features. */
-  if (st->memid == CEPS_MEM) st->memid = 0;
-  for (i=0;i<CEPS_MEM;i++)
-  {
-    int j;
-    float mindist = 1e15f;
-    for (j=0;j<CEPS_MEM;j++)
-    {
-      int k;
-      float dist=0;
-      for (k=0;k<NB_BANDS;k++)
-      {
-        float tmp;
-        tmp = st->cepstral_mem[i][k] - st->cepstral_mem[j][k];
-        dist += tmp*tmp;
-      }
-      if (j!=i)
-        mindist = MIN32(mindist, dist);
-    }
-    spec_variability += mindist;
-  }
-  features[NB_BANDS+3*NB_DELTA_CEPS+1] = spec_variability/CEPS_MEM-2.1;
+  features[0] -= 4;
+#if 0
+  for (i=0;i<NB_BANDS;i++) printf("%f ", Ly[i]);
+  printf("\n");
+#endif
+  features[2*NB_BANDS] = .01*(pitch_index-200);
+  features[2*NB_BANDS+1] = gain;
+  for (i=0;i<LPC_ORDER;i++) features[2*NB_BANDS+2+i] = lpc[i];
+#if 0
+  for (i=0;i<NB_FEATURES;i++) printf("%f ", features[i]);
+  printf("\n");
+#endif
   return TRAINING && E < 0.1;
 }
 
--