shithub: opus

--- a/dnn/nnet.c

+++ b/dnn/nnet.c

@@ -384,6 +384,58 @@

    compute_generic_conv1d(&matrix, output, mem, input, layer->nb_inputs, layer->activation);

+/* Computes non-padded convolution for input [ ksize1 x in_channels x (len2+ksize2) ],

+   kernel [ out_channels x in_channels x ksize1 x ksize2 ],

+   storing the output as [ out_channels x len2 ].

+   We assume that the output dimension along the ksize1 axis is 1,

+   i.e. processing one frame at a time. */

+void conv2d_float(float *out, const float *weights, int in_channels, int out_channels, int ktime, int kheight, const float *in, int len2)

+{

+   int i;

+   int in_stride;

+   in_stride = len2+kheight-1;

+   OPUS_CLEAR(out, out_channels*len2);

+   for (i=0;i<out_channels;i++) {

+      int m;

+      for (m=0;m<in_channels;m++) {

+         int t;

+         for (t=0;t<ktime;t++) {

+            int h;

+            for (h=0;h<kheight;h++) {

+               int j;

+               for (j=0;j<len2;j++) {

+                  out[i*len2 + j] += weights[i*in_channels*ktime*kheight + m*ktime*kheight + t*kheight + h] *

+                                     in[t*in_channels*in_stride + m*in_stride + j + h];

+               }

+            }

+         }

+      }

+   }

+}

+#define MAX_CONV2D_INPUTS 2048

+void compute_conv2d(const Conv2DLayer *conv, float *out, float *mem, const float *in, int len2, int activation)

+{

+   int i;

+   const float *bias;

+   float in_buf[MAX_CONV2D_INPUTS];

+   int time_stride;

+   celt_assert(in != out);

+   time_stride = conv->in_channels*(len2+conv->kheight);

+   celt_assert(conv->ktime*time_stride <= MAX_CONV2D_INPUTS);

+   OPUS_COPY(in_buf, mem, (conv->ktime-1)*time_stride);

+   OPUS_COPY(&in_buf[(conv->ktime-1)*time_stride], in, time_stride);

+   OPUS_COPY(mem, &in_buf[time_stride], (conv->ktime-1)*time_stride);

+   bias = conv->bias;

+   conv2d_float(out, conv->float_weights, conv->in_channels, conv->out_channels, conv->ktime, conv->kheight, in_buf, len2);

+   if (bias != NULL) {

+      for (i=0;i<conv->out_channels*len2;i++) out[i] += bias[i];

+   }

+   compute_activation(out, out, conv->out_channels*len2, activation);

+}

 void compute_embedding(const EmbeddingLayer *layer, float *output, int input)

    int i;

--- a/dnn/nnet.h

+++ b/dnn/nnet.h

@@ -75,6 +75,16 @@

   int nb_outputs;

 } LinearLayer;

+/* Generic sparse affine transformation. */

+typedef struct {

+  const float *bias;

+  const float *float_weights;

+  int in_channels;

+  int out_channels;

+  int ktime;

+  int kheight;

+} Conv2DLayer;

 typedef struct {

   const float *bias;

   const float *input_weights;

--

⑨