shithub: opus

Download patch

ref: 33adba02c7ac5fe1d1f3bd4027f42b87cddc933c
parent: 966a2d22eb0999e9319d1003b4b55d9fd051d33d
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Sat Sep 30 23:59:17 EDT 2023

First version of pitch DNN C code

Totally untested -- most likely doesn't work

--- a/dnn/nnet.c
+++ b/dnn/nnet.c
@@ -415,7 +415,7 @@
 
 #define MAX_CONV2D_INPUTS 2048
 
-void compute_conv2d(const Conv2DLayer *conv, float *out, float *mem, const float *in, int len2, int activation)
+void compute_conv2d(const Conv2dLayer *conv, float *out, float *mem, const float *in, int len2, int activation)
 {
    int i;
    const float *bias;
--- a/dnn/nnet.h
+++ b/dnn/nnet.h
@@ -83,7 +83,7 @@
   int out_channels;
   int ktime;
   int kheight;
-} Conv2DLayer;
+} Conv2dLayer;
 
 typedef struct {
   const float *bias;
@@ -175,6 +175,7 @@
 extern const WeightArray rdovaeenc_arrays[];
 extern const WeightArray rdovaedec_arrays[];
 extern const WeightArray fwgan_arrays[];
+extern const WeightArray pitchdnn_arrays[];
 
 int linear_init(LinearLayer *layer, const WeightArray *arrays,
   const char *bias,
@@ -231,6 +232,8 @@
   int kernel_size,
   int nb_neurons,
   int activation);
+
+void compute_conv2d(const Conv2dLayer *conv, float *out, float *mem, const float *in, int len2, int activation);
 
 int embedding_init(EmbeddingLayer *layer, const WeightArray *arrays,
   const char *embedding_weights,
--- /dev/null
+++ b/dnn/pitchdnn.c
@@ -1,0 +1,61 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include "pitchdnn.h"
+#include "os_support.h"
+#include "nnet.h"
+#include "lpcnet_private.h"
+
+
+int compute_pitchdnn(
+    PitchDNNState *st,
+    const float *if_features,
+    const float *xcorr_features
+    )
+{
+  float if1_out[DENSE_IF_UPSAMPLER_1_OUT_SIZE];
+  float downsampler_in[NB_XCORR_FEATURES + DENSE_IF_UPSAMPLER_2_OUT_SIZE];
+  float downsampler_out[DENSE_DOWNSAMPLER_OUT_SIZE];
+  float conv1_tmp1[NB_XCORR_FEATURES + 2] = {0};
+  float conv1_tmp2[NB_XCORR_FEATURES + 2] = {0};
+  float output[DENSE_FINAL_UPSAMPLER_OUT_SIZE];
+  int i;
+  int pos=0;
+  float maxval=-1;
+  PitchDNN *model = &st->model;
+
+  /* IF */
+  compute_generic_dense(&model->dense_if_upsampler_1, if1_out, if_features, ACTIVATION_TANH);
+  compute_generic_dense(&model->dense_if_upsampler_2, &downsampler_in[NB_XCORR_FEATURES], if1_out, ACTIVATION_TANH);
+
+  /* xcorr*/
+  OPUS_COPY(&conv1_tmp1[1], xcorr_features, NB_XCORR_FEATURES);
+  compute_conv2d(&model->conv2d_1, &conv1_tmp2[1], st->xcorr_mem1, conv1_tmp1, NB_XCORR_FEATURES, ACTIVATION_TANH);
+  compute_conv2d(&model->conv2d_1, &conv1_tmp1[1], st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, ACTIVATION_TANH);
+  compute_conv2d(&model->conv2d_1, downsampler_in, st->xcorr_mem3, conv1_tmp1, NB_XCORR_FEATURES, ACTIVATION_TANH);
+
+  compute_generic_dense(&model->dense_downsampler, downsampler_out, downsampler_in, ACTIVATION_TANH);
+  compute_generic_gru(&model->gru_1_input, &model->gru_1_recurrent, st->gru_state, downsampler_out);
+  compute_generic_dense(&model->dense_final_upsampler, output, st->gru_state, ACTIVATION_LINEAR);
+
+  for (i=0;i<DENSE_FINAL_UPSAMPLER_OUT_SIZE;i++) {
+    if (output[i] > maxval) {
+      pos = i;
+      maxval = output[i];
+    }
+  }
+  return (1.f/60.f)*pos - 1.5;
+  /*return 256.f/pow(2.f, (1.f/60.f)*i);*/
+}
+
+
+void pitchdnn_init(PitchDNNState *st)
+{
+  int ret;
+  OPUS_CLEAR(st, 1);
+  ret = init_pitchdnn(&st->model, pitchdnn_arrays);
+  celt_assert(ret == 0);
+  /* FIXME: perform arch detection. */
+}
--- /dev/null
+++ b/dnn/pitchdnn.h
@@ -1,0 +1,30 @@
+#ifndef PITCHDNN_H
+#define PITCHDNN_H
+
+
+typedef struct PitchDNN PitchDNN;
+
+#include "pitchdnn_data.h"
+#include "lpcnet_private.h"
+
+#define NB_XCORR_FEATURES (PITCH_MAX_PERIOD-PITCH_MIN_PERIOD)
+
+
+typedef struct {
+  PitchDNN model;
+  float gru_state[GRU_1_STATE_SIZE];
+  float xcorr_mem1[(NB_XCORR_FEATURES + 2)*2];
+  float xcorr_mem2[(NB_XCORR_FEATURES + 2)*2*8];
+  float xcorr_mem3[(NB_XCORR_FEATURES + 2)*2*8];
+} PitchDNNState;
+
+
+void pitchdnn_init(PitchDNNState *st);
+
+int compute_pitchdnn(
+    PitchDNNState *st,
+    const float *if_features,
+    const float *xcorr_features
+    );
+
+#endif
--- a/dnn/torch/neural-pitch/export_neuralpitch_weights.py
+++ b/dnn/torch/neural-pitch/export_neuralpitch_weights.py
@@ -52,7 +52,7 @@
 
     message = f"Auto generated from checkpoint {os.path.basename(args.checkpoint)}"
 
-    writer = CWriter(os.path.join(args.output_dir, "neural_pitch_data"), message=message, model_struct_name='PitchDNN')
+    writer = CWriter(os.path.join(args.output_dir, "pitchdnn_data"), message=message, model_struct_name='PitchDNN')
     writer.header.write(
 f"""
 #include "opus_types.h"
--