ref: b5ff531bf0ec3576383d420d1ae6cca31f0612b0
parent: 0077f4b8723c81e7f094f190f78715fdda21b5ef
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Mon Mar 11 22:31:15 EDT 2019
Optional quantization
--- a/dnn/dump_data.c
+++ b/dnn/dump_data.c
@@ -591,7 +591,7 @@
}
}
-static void process_superframe(DenoiseState *st, FILE *ffeat, int encode) {+static void process_superframe(DenoiseState *st, FILE *ffeat, int encode, int quantize) {int i;
int sub;
int best_i;
@@ -607,10 +607,11 @@
float center_pitch;
int main_pitch;
int modulation;
- int c0_id;
- int vq_end[3];
- int vq_mid;
+ int c0_id=0;
+ int vq_end[3]={0};+ int vq_mid=0;
int corr_id = 0;
+ int interp_id=0;
for(sub=0;sub<8;sub++) frame_weight_sum += st->frame_weight[2+sub];
for(sub=0;sub<8;sub++) st->frame_weight[2+sub] *= (8.f/frame_weight_sum);
for(sub=0;sub<8;sub++) {@@ -654,7 +655,7 @@
best_i = pitch_prev[sub][best_i];
}
frame_corr /= 8;
- if (frame_corr < 0) frame_corr = 0;
+ if (quantize && frame_corr < 0) frame_corr = 0;
for (sub=0;sub<8;sub++) { //printf("%d %f\n", best[2+sub], frame_corr);}
@@ -677,11 +678,11 @@
max_a = mean_pitch/32;
best_a = MIN16(max_a, MAX16(-max_a, best_a));
corr_id = (int)floor((frame_corr-.3f)/.175f);
- frame_corr = 0.3875f + .175f*corr_id;
+ if (quantize) frame_corr = 0.3875f + .175f*corr_id;
} else {best_a = 0;
corr_id = (int)floor(frame_corr/.075f);
- frame_corr = 0.0375f + .075f*corr_id;
+ if (quantize) frame_corr = 0.0375f + .075f*corr_id;
}
//best_b = (sxx*sy - sx*sxy)/(sw*sxx - sx*sx);
best_b = (sy - best_a*sx)/sw;
@@ -695,35 +696,30 @@
//printf("%f %f\n", best_a/center_pitch, best_corr); //for (sub=2;sub<10;sub++) printf("%f %d %f\n", best_b + sub*best_a, best[sub], best_corr); for (sub=0;sub<4;sub++) {-#if 1
- float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
- p *= 1 + modulation/16./7.*(2*sub-3);
- st->features[sub][2*NB_BANDS] = .02*(p-100);
- //st->features[sub][2*NB_BANDS + 1] = voiced ? 1 : -1;
- //if (frame_corr < .2) st->features[sub][2*NB_BANDS] = -2;
- st->features[sub][2*NB_BANDS + 1] = frame_corr-.5;
-#else
- st->features[sub][2*NB_BANDS] = .01*(best[2+2*sub]+best[2+2*sub+1]-200);
- st->features[sub][2*NB_BANDS + 1] = frame_corr-.5;
-#endif
+ if (quantize) {+ float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
+ p *= 1 + modulation/16./7.*(2*sub-3);
+ st->features[sub][2*NB_BANDS] = .02*(p-100);
+ st->features[sub][2*NB_BANDS + 1] = frame_corr-.5;
+ } else {+ st->features[sub][2*NB_BANDS] = .01*(best[2+2*sub]+best[2+2*sub+1]-200);
+ st->features[sub][2*NB_BANDS + 1] = frame_corr-.5;
+ }
//printf("%f %d %f\n", st->features[sub][2*NB_BANDS], best[2+2*sub], frame_corr);}
//printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD);
RNN_COPY(&st->xc[1][0], &st->xc[9][0], PITCH_MAX_PERIOD);
- //printf("%f\n", st->features[3][0]);- c0_id = (int)floor(.5 + st->features[3][0]*5);
- st->features[3][0] = c0_id/5.;
- quantize_3stage_mbest(&st->features[3][1], vq_end);
- /*perform_interp_relaxation(st->features, vq_mem);*/
- quantize_diff(&st->features[1][0], vq_mem, &st->features[3][0], ceps_codebook_diff4, 11, 1, &vq_mid);
-#if 0
- interp_diff(&st->features[0][0], vq_mem, &st->features[1][0], ceps_codebook_diff2, 6, 0);
- interp_diff(&st->features[2][0], &st->features[1][0], &st->features[3][0], ceps_codebook_diff2, 6, 0);
-#else
- int interp_id = double_interp_search(st->features, vq_mem);
- perform_double_interp(st->features, vq_mem, interp_id);
-#endif
+ if (quantize) {+ //printf("%f\n", st->features[3][0]);+ c0_id = (int)floor(.5 + st->features[3][0]*5);
+ st->features[3][0] = c0_id/5.;
+ quantize_3stage_mbest(&st->features[3][1], vq_end);
+ /*perform_interp_relaxation(st->features, vq_mem);*/
+ quantize_diff(&st->features[1][0], vq_mem, &st->features[3][0], ceps_codebook_diff4, 11, 1, &vq_mid);
+ interp_id = double_interp_search(st->features, vq_mem);
+ perform_double_interp(st->features, vq_mem, interp_id);
+ }
//printf("\n");RNN_COPY(vq_mem, &st->features[3][0], NB_BANDS);
if (encode) {@@ -879,11 +875,21 @@
int training = -1;
int encode = 0;
int decode = 0;
+ int quantize = 0;
st = rnnoise_create();
if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
+ if (argc == 5 && strcmp(argv[1], "-qtrain")==0) {+ training = 1;
+ quantize = 1;
+ }
if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0;
+ if (argc == 4 && strcmp(argv[1], "-qtest")==0) {+ training = 0;
+ quantize = 1;
+ }
if (argc == 4 && strcmp(argv[1], "-encode")==0) {training = 0;
+ quantize = 1;
encode = 1;
}
if (argc == 4 && strcmp(argv[1], "-decode")==0) {@@ -976,7 +982,7 @@
st->pcount++;
/* Running on groups of 4 frames. */
if (st->pcount == 4) {- process_superframe(st, ffeat, encode);
+ process_superframe(st, ffeat, encode, quantize);
st->pcount = 0;
}
--
⑨