ref: ddd5669e79a9e581e8420d2ed397e524da864337
parent: c99054dad9053ff7dc30cf0379d8aeb240b2e171
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Sat Oct 28 19:33:47 EDT 2023
Pitch and fargan model updates Removing one of the 2d conv layers for pitch estimation reduces complexity without noticeable degradation. FARGAN model has more adversarial training. Also, no need for the double precision in the low-pass filter.
--- a/autogen.sh
+++ b/autogen.sh
@@ -9,7 +9,7 @@
srcdir=`dirname $0`
test -n "$srcdir" && cd "$srcdir"
-dnn/download_model.sh 290be25
+dnn/download_model.sh c99054d
echo "Updating build configuration files, please wait...."
--- a/dnn/lpcnet_enc.c
+++ b/dnn/lpcnet_enc.c
@@ -81,8 +81,8 @@
float xi, yi;
xi = x[i];
yi = x[i] + mem[0];
- mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
- mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
+ mem[0] = mem[1] + (b[0]*xi - a[0]*yi);
+ mem[1] = (b[1]*xi - a[1]*yi);
y[i] = yi;
}
}
--- a/dnn/pitchdnn.c
+++ b/dnn/pitchdnn.c
@@ -33,8 +33,7 @@
/* xcorr*/
OPUS_COPY(&conv1_tmp1[1], xcorr_features, NB_XCORR_FEATURES);
compute_conv2d(&model->conv2d_1, &conv1_tmp2[1], st->xcorr_mem1, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH);
- compute_conv2d(&model->conv2d_2, &conv1_tmp1[1], st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH);
- compute_conv2d(&model->conv2d_3, downsampler_in, st->xcorr_mem3, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH);
+ compute_conv2d(&model->conv2d_2, downsampler_in, st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH);
compute_generic_dense(&model->dense_downsampler, downsampler_out, downsampler_in, ACTIVATION_TANH);
compute_generic_gru(&model->gru_1_input, &model->gru_1_recurrent, st->gru_state, downsampler_out);
--- a/dnn/torch/neural-pitch/export_neuralpitch_weights.py
+++ b/dnn/torch/neural-pitch/export_neuralpitch_weights.py
@@ -73,8 +73,7 @@
conv_layers = [
('conv.1', "conv2d_1"),- ('conv.4', "conv2d_2"),- ('conv.7', "conv2d_3")+ ('conv.4', "conv2d_2")]
--- a/dnn/torch/neural-pitch/models.py
+++ b/dnn/torch/neural-pitch/models.py
@@ -86,13 +86,10 @@
self.conv = torch.nn.Sequential(
torch.nn.ZeroPad2d((2,0,1,1)),
- torch.nn.Conv2d(1, 8, 3, bias=True),
+ torch.nn.Conv2d(1, 4, 3, bias=True),
self.activation,
torch.nn.ZeroPad2d((2,0,1,1)),
- torch.nn.Conv2d(8, 8, 3, bias=True),
- self.activation,
- torch.nn.ZeroPad2d((2,0,1,1)),
- torch.nn.Conv2d(8, 1, 3, bias=True),
+ torch.nn.Conv2d(4, 1, 3, bias=True),
self.activation,
)
--
⑨