shithub: opus

Download patch

ref: ddd5669e79a9e581e8420d2ed397e524da864337
parent: c99054dad9053ff7dc30cf0379d8aeb240b2e171
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Sat Oct 28 19:33:47 EDT 2023

Pitch and fargan model updates

Removing one of the 2d conv layers for pitch estimation reduces
complexity without noticeable degradation. FARGAN model has more
adversarial training.
Also, no need for the double precision in the low-pass filter.

--- a/autogen.sh
+++ b/autogen.sh
@@ -9,7 +9,7 @@
 srcdir=`dirname $0`
 test -n "$srcdir" && cd "$srcdir"
 
-dnn/download_model.sh 290be25
+dnn/download_model.sh c99054d
 
 echo "Updating build configuration files, please wait...."
 
--- a/dnn/lpcnet_enc.c
+++ b/dnn/lpcnet_enc.c
@@ -81,8 +81,8 @@
     float xi, yi;
     xi = x[i];
     yi = x[i] + mem[0];
-    mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
-    mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
+    mem[0] = mem[1] + (b[0]*xi - a[0]*yi);
+    mem[1] = (b[1]*xi - a[1]*yi);
     y[i] = yi;
   }
 }
--- a/dnn/pitchdnn.c
+++ b/dnn/pitchdnn.c
@@ -33,8 +33,7 @@
   /* xcorr*/
   OPUS_COPY(&conv1_tmp1[1], xcorr_features, NB_XCORR_FEATURES);
   compute_conv2d(&model->conv2d_1, &conv1_tmp2[1], st->xcorr_mem1, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH);
-  compute_conv2d(&model->conv2d_2, &conv1_tmp1[1], st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH);
-  compute_conv2d(&model->conv2d_3, downsampler_in, st->xcorr_mem3, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH);
+  compute_conv2d(&model->conv2d_2, downsampler_in, st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH);
 
   compute_generic_dense(&model->dense_downsampler, downsampler_out, downsampler_in, ACTIVATION_TANH);
   compute_generic_gru(&model->gru_1_input, &model->gru_1_recurrent, st->gru_state, downsampler_out);
--- a/dnn/torch/neural-pitch/export_neuralpitch_weights.py
+++ b/dnn/torch/neural-pitch/export_neuralpitch_weights.py
@@ -73,8 +73,7 @@
 
     conv_layers = [
         ('conv.1', "conv2d_1"),
-        ('conv.4', "conv2d_2"),
-        ('conv.7', "conv2d_3")
+        ('conv.4', "conv2d_2")
     ]
 
 
--- a/dnn/torch/neural-pitch/models.py
+++ b/dnn/torch/neural-pitch/models.py
@@ -86,13 +86,10 @@
 
         self.conv = torch.nn.Sequential(
             torch.nn.ZeroPad2d((2,0,1,1)),
-            torch.nn.Conv2d(1, 8, 3, bias=True),
+            torch.nn.Conv2d(1, 4, 3, bias=True),
             self.activation,
             torch.nn.ZeroPad2d((2,0,1,1)),
-            torch.nn.Conv2d(8, 8, 3, bias=True),
-            self.activation,
-            torch.nn.ZeroPad2d((2,0,1,1)),
-            torch.nn.Conv2d(8, 1, 3, bias=True),
+            torch.nn.Conv2d(4, 1, 3, bias=True),
             self.activation,
         )
 
--