ref: 823bccee17610f859b0b98927d92290dc929b202
parent: d588a49f7fce9f72099b40a4c1eac0351a501269
	author: Ulrich Klauer <ulrich@chirlu.de>
	date: Sat Apr  7 15:06:03 EDT 2012
	
Improve (de)interleave performance Changes to the implementation of interleave() and deinterleave(). Original intent was to make the functions parallelizable; it turns out, however, that this reduces speed slightly. Bottleneck is probably the synchronization between the caches. Still, this version run sequentially is a few percent faster than the previous code.
--- a/src/effects.c
+++ b/src/effects.c
@@ -624,13 +624,19 @@
static void interleave(size_t flows, size_t length, sox_sample_t *from,
size_t bufsiz, size_t offset, sox_sample_t *to)
 {- size_t i, f;
- size_t wide_samples = length/flows;
- size_t flow_offs = bufsiz/flows;
+ size_t i;
+ const size_t wide_samples = length/flows;
+ const size_t flow_offs = bufsiz/flows;
from += offset/flows;
- for (i = 0; i < wide_samples; i++)
- for (f = 0; f < flows; f++)
- *to++ = from[f*flow_offs + i];
+  for (i = 0; i < wide_samples; i++) {+ sox_sample_t *inner_from = from + i;
+ sox_sample_t *inner_to = to + i * flows;
+ size_t f;
+    for (f = 0; f < flows; f++) {+ *inner_to++ = *inner_from;
+ inner_from += flow_offs;
+ }
+ }
}
/* deinterleave() parameters:
@@ -646,11 +652,17 @@
static void deinterleave(size_t flows, size_t length, sox_sample_t *from,
sox_sample_t *to, size_t bufsiz, size_t offset)
 {- size_t i, f;
- size_t wide_samples = length/flows;
- size_t flow_offs = bufsiz/flows;
+ const size_t wide_samples = length/flows;
+ const size_t flow_offs = bufsiz/flows;
+ size_t f;
to += offset/flows;
- for (i = 0; i < wide_samples; i++)
- for (f = 0; f < flows; f++)
- to[f*flow_offs + i] = *from++;
+  for (f = 0; f < flows; f++) {+ sox_sample_t *inner_to = to + f*flow_offs;
+ sox_sample_t *inner_from = from + f;
+ size_t i = wide_samples;
+    while (i--) {+ *inner_to++ = *inner_from;
+ inner_from += flows;
+ }
+ }
}
--
⑨