shithub: opus

Download patch

ref: 7cc30ec6817dec403ec98f1e0da30dfc7473f413
parent: d4506af5a9309dda4f798c70ce38dd95632e9b8d
author: Jean-Marc Valin <jmvalin@amazon.com>
date: Mon Nov 27 11:44:11 EST 2023

Force vectorization for DNN primitives

Avoids having to write intrinsics for simple loops

--- a/dnn/nnet_arch.h
+++ b/dnn/nnet_arch.h
@@ -38,7 +38,14 @@
 
 #define RTCD_SUF(name) CAT_SUFFIX(name, RTCD_ARCH)
 
+/* Force vectorization on for DNN code because some of the loops rely on
+   compiler vectorization rather than explicitly using intrinsics. */
+#ifdef __GNUC__
+#pragma GCC push_options
+#pragma GCC optimize("tree-vectorize")
+#endif
 
+
 #define MAX_ACTIVATIONS (4096)
 
 static OPUS_INLINE void vec_swish(float *y, const float *x, int N)
@@ -215,5 +222,9 @@
      RTCD_SUF(compute_activation_)(&out[i*hstride], &out[i*hstride], height, activation);
    }
 }
+
+#ifdef __GNUC__
+#pragma GCC pop_options
+#endif
 
 #endif
--