shithub: opus

Download patch

ref: 0077f4b8723c81e7f094f190f78715fdda21b5ef
parent: c9b7efd274807043ab926d5067362f6b7fbf352b
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Mon Mar 11 19:56:49 EDT 2019

add decoder

--- a/dnn/dump_data.c
+++ b/dnn/dump_data.c
@@ -735,6 +735,60 @@
   }
 }
 
+void decode_packet(FILE *ffeat, int c0_id, int main_pitch, int modulation, int corr_id, int vq_end[3], int vq_mid, int interp_id)
+{
+  int i;
+  int sub;
+  int voiced = 1;
+  float frame_corr;
+  float features[4][NB_FEATURES];
+  for (i=0;i<4;i++) RNN_CLEAR(&features[i][0], NB_FEATURES);
+
+  if (modulation==-4) {
+    voiced = 0;
+    modulation = 0;
+  }
+  if (voiced) {
+    frame_corr = 0.3875f + .175f*corr_id;
+  } else {
+    frame_corr = 0.0375f + .075f*corr_id;
+  }
+  for (sub=0;sub<4;sub++) {
+    float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
+    p *= 1 + modulation/16./7.*(2*sub-3);
+    features[sub][2*NB_BANDS] = .02*(p-100);
+    features[sub][2*NB_BANDS + 1] = frame_corr-.5;
+  }
+  
+  features[3][0] = c0_id/5.;
+  for (i=0;i<NB_BANDS_1;i++) {
+    features[3][i+1] = ceps_codebook1[vq_end[0]*NB_BANDS_1 + i] + ceps_codebook2[vq_end[1]*NB_BANDS_1 + i] + ceps_codebook3[vq_end[2]*NB_BANDS_1 + i];
+  }
+
+  float sign = 1;
+  if (vq_mid >= 2048) {
+    vq_mid -= 2048;
+    sign = -1;
+  }
+  for (i=0;i<NB_BANDS;i++) {
+    features[1][i] = sign*ceps_codebook_diff4[vq_mid*NB_BANDS + i];
+  }
+  if ((vq_mid&MULTI_MASK) < 2) {
+    for (i=0;i<NB_BANDS;i++) features[1][i] += .5*(vq_mem[i] + features[3][i]);
+  } else if ((vq_mid&MULTI_MASK) == 2) {
+    for (i=0;i<NB_BANDS;i++) features[1][i] += vq_mem[i];
+  } else {
+    for (i=0;i<NB_BANDS;i++) features[1][i] += features[3][i];
+  }
+  
+  perform_double_interp(features, vq_mem, interp_id);
+
+  RNN_COPY(vq_mem, &features[3][0], NB_BANDS);
+  for (i=0;i<4;i++) {
+    fwrite(features[i], sizeof(float), NB_FEATURES, ffeat);
+  }
+}
+
 static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
   int i;
   for (i=0;i<N;i++) {
@@ -824,6 +878,7 @@
   float noise_std=0;
   int training = -1;
   int encode = 0;
+  int decode = 0;
   st = rnnoise_create();
   if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
   if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0;
@@ -831,6 +886,10 @@
       training = 0;
       encode = 1;
   }
+  if (argc == 4 && strcmp(argv[1], "-decode")==0) {
+      training = 0;
+      decode = 1;
+  }
   if (training == -1) {
     fprintf(stderr, "usage: %s -train <speech> <features out> <pcm out>\n", argv[0]);
     fprintf(stderr, "  or   %s -test <speech> <features out>\n", argv[0]);
@@ -845,6 +904,16 @@
   if (ffeat == NULL) {
     fprintf(stderr,"Error opening output feature file: %s\n", argv[3]);
     exit(1);
+  }
+  if (decode) {
+    while (1) {
+      int ret;
+      int c0_id, main_pitch, modulation, corr_id, vq_end[3], vq_mid, interp_id;
+      ret = fscanf(f1, "%d %d %d %d %d %d %d %d %d\n", &c0_id, &main_pitch, &modulation, &corr_id, &vq_end[0], &vq_end[1], &vq_end[2], &vq_mid, &interp_id);
+      if (ret != 9) break;
+      decode_packet(ffeat, c0_id, main_pitch, modulation, corr_id, vq_end, vq_mid, interp_id);
+    }
+    return 0;
   }
   if (training) {
     fpcm = fopen(argv[4], "w");
--