ref: 823be85778a78aee37ed3c050c1454f5e6797b98
parent: a15cc2016553a0fd39039603d04a6dfa77aa6687
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Tue Jan 29 21:29:54 EST 2019
comments
--- a/dnn/dump_data.c
+++ b/dnn/dump_data.c
@@ -155,6 +155,7 @@
static float xc[10][PITCH_MAX_PERIOD];
static float ener[10][PITCH_MAX_PERIOD];
static float frame_max_corr[PITCH_MAX_PERIOD];
+ /* Cross-correlation on half-frames. */
for (sub=0;sub<2;sub++) {int off = sub*FRAME_SIZE/2;
celt_pitch_xcorr(&st->exc_buf[PITCH_MAX_PERIOD+off], st->exc_buf+off, xcorr, FRAME_SIZE/2, PITCH_MAX_PERIOD);
@@ -170,6 +171,7 @@
#endif
}
pcount++;
+ /* Running on groups of 4 frames. */
if (pcount == 4) {int period;
float best_a=0;
@@ -181,6 +183,8 @@
int voiced;
best_corr = -100;
best_period = PITCH_MIN_PERIOD;
+ /* Search approximate pitch by considering the max correlation over all sub-frames
+ within a window corresponding to 25% of the pitch (4 semitones). */
for (i=PITCH_MAX_PERIOD-PITCH_MIN_PERIOD*5/4;i>=0;i--) {int j;
float corr;
@@ -235,6 +239,7 @@
}
frame_corr = sc/sw;
voiced = frame_corr > .45;
+ /* Linear regression to figure out the pitch contour. */
best_a = (sw*sxy - sx*sy)/(sw*sxx - sx*sx);
if (voiced) {float mean_pitch = sy/sw;
@@ -246,6 +251,7 @@
}
//best_b = (sxx*sy - sx*sxy)/(sw*sxx - sx*sx);
best_b = (sy - best_a*sx)/sw;
+ /* Quantizing the pitch as "main" pitch + slope. */
float center_pitch = best_b+5.5*best_a;
int main_pitch = (int)floor(.5 + 21.*log2(center_pitch/PITCH_MIN_PERIOD));
main_pitch = IMAX(0, IMIN(63, main_pitch));
--
⑨