ref: 966a2d22eb0999e9319d1003b4b55d9fd051d33d
parent: f3b86f941408b37b0c0236eb5b8b09605b8a713b
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Sat Sep 30 19:43:51 EDT 2023
Code for 2D convolution Untested
--- a/dnn/nnet.c
+++ b/dnn/nnet.c
@@ -384,6 +384,58 @@
compute_generic_conv1d(&matrix, output, mem, input, layer->nb_inputs, layer->activation);
}
+/* Computes non-padded convolution for input [ ksize1 x in_channels x (len2+ksize2) ],
+ kernel [ out_channels x in_channels x ksize1 x ksize2 ],
+ storing the output as [ out_channels x len2 ].
+ We assume that the output dimension along the ksize1 axis is 1,
+ i.e. processing one frame at a time. */
+void conv2d_float(float *out, const float *weights, int in_channels, int out_channels, int ktime, int kheight, const float *in, int len2)
+{
+ int i;
+ int in_stride;
+ in_stride = len2+kheight-1;
+ OPUS_CLEAR(out, out_channels*len2);
+ for (i=0;i<out_channels;i++) {
+ int m;
+ for (m=0;m<in_channels;m++) {
+ int t;
+ for (t=0;t<ktime;t++) {
+ int h;
+ for (h=0;h<kheight;h++) {
+ int j;
+ for (j=0;j<len2;j++) {
+ out[i*len2 + j] += weights[i*in_channels*ktime*kheight + m*ktime*kheight + t*kheight + h] *
+ in[t*in_channels*in_stride + m*in_stride + j + h];
+ }
+ }
+ }
+ }
+ }
+}
+
+#define MAX_CONV2D_INPUTS 2048
+
+void compute_conv2d(const Conv2DLayer *conv, float *out, float *mem, const float *in, int len2, int activation)
+{
+ int i;
+ const float *bias;
+ float in_buf[MAX_CONV2D_INPUTS];
+ int time_stride;
+ celt_assert(in != out);
+ time_stride = conv->in_channels*(len2+conv->kheight);
+ celt_assert(conv->ktime*time_stride <= MAX_CONV2D_INPUTS);
+ OPUS_COPY(in_buf, mem, (conv->ktime-1)*time_stride);
+ OPUS_COPY(&in_buf[(conv->ktime-1)*time_stride], in, time_stride);
+ OPUS_COPY(mem, &in_buf[time_stride], (conv->ktime-1)*time_stride);
+ bias = conv->bias;
+ conv2d_float(out, conv->float_weights, conv->in_channels, conv->out_channels, conv->ktime, conv->kheight, in_buf, len2);
+ if (bias != NULL) {
+ for (i=0;i<conv->out_channels*len2;i++) out[i] += bias[i];
+ }
+ compute_activation(out, out, conv->out_channels*len2, activation);
+}
+
+
void compute_embedding(const EmbeddingLayer *layer, float *output, int input)
{
int i;
--- a/dnn/nnet.h
+++ b/dnn/nnet.h
@@ -75,6 +75,16 @@
int nb_outputs;
} LinearLayer;
+/* Generic sparse affine transformation. */
+typedef struct {
+ const float *bias;
+ const float *float_weights;
+ int in_channels;
+ int out_channels;
+ int ktime;
+ int kheight;
+} Conv2DLayer;
+
typedef struct {
const float *bias;
const float *input_weights;
--
⑨