shithub: sox

Download patch

ref: 823bccee17610f859b0b98927d92290dc929b202
parent: d588a49f7fce9f72099b40a4c1eac0351a501269
author: Ulrich Klauer <ulrich@chirlu.de>
date: Sat Apr 7 15:06:03 EDT 2012

Improve (de)interleave performance

Changes to the implementation of interleave() and deinterleave().
Original intent was to make the functions parallelizable; it turns
out, however, that this reduces speed slightly. Bottleneck is probably
the synchronization between the caches.

Still, this version run sequentially is a few percent faster than the
previous code.

--- a/src/effects.c
+++ b/src/effects.c
@@ -624,13 +624,19 @@
 static void interleave(size_t flows, size_t length, sox_sample_t *from,
     size_t bufsiz, size_t offset, sox_sample_t *to)
 {
-  size_t i, f;
-  size_t wide_samples = length/flows;
-  size_t flow_offs = bufsiz/flows;
+  size_t i;
+  const size_t wide_samples = length/flows;
+  const size_t flow_offs = bufsiz/flows;
   from += offset/flows;
-  for (i = 0; i < wide_samples; i++)
-    for (f = 0; f < flows; f++)
-      *to++ = from[f*flow_offs + i];
+  for (i = 0; i < wide_samples; i++) {
+    sox_sample_t *inner_from = from + i;
+    sox_sample_t *inner_to = to + i * flows;
+    size_t f;
+    for (f = 0; f < flows; f++) {
+      *inner_to++ = *inner_from;
+      inner_from += flow_offs;
+    }
+  }
 }
 
 /* deinterleave() parameters:
@@ -646,11 +652,17 @@
 static void deinterleave(size_t flows, size_t length, sox_sample_t *from,
     sox_sample_t *to, size_t bufsiz, size_t offset)
 {
-  size_t i, f;
-  size_t wide_samples = length/flows;
-  size_t flow_offs = bufsiz/flows;
+  const size_t wide_samples = length/flows;
+  const size_t flow_offs = bufsiz/flows;
+  size_t f;
   to += offset/flows;
-  for (i = 0; i < wide_samples; i++)
-    for (f = 0; f < flows; f++)
-      to[f*flow_offs + i] = *from++;
+  for (f = 0; f < flows; f++) {
+    sox_sample_t *inner_to = to + f*flow_offs;
+    sox_sample_t *inner_from = from + f;
+    size_t i = wide_samples;
+    while (i--) {
+      *inner_to++ = *inner_from;
+      inner_from += flows;
+    }
+  }
 }