ref: 823bccee17610f859b0b98927d92290dc929b202
parent: d588a49f7fce9f72099b40a4c1eac0351a501269
author: Ulrich Klauer <ulrich@chirlu.de>
date: Sat Apr 7 15:06:03 EDT 2012
Improve (de)interleave performance Changes to the implementation of interleave() and deinterleave(). Original intent was to make the functions parallelizable; it turns out, however, that this reduces speed slightly. Bottleneck is probably the synchronization between the caches. Still, this version run sequentially is a few percent faster than the previous code.
--- a/src/effects.c
+++ b/src/effects.c
@@ -624,13 +624,19 @@
static void interleave(size_t flows, size_t length, sox_sample_t *from,
size_t bufsiz, size_t offset, sox_sample_t *to)
{
- size_t i, f;
- size_t wide_samples = length/flows;
- size_t flow_offs = bufsiz/flows;
+ size_t i;
+ const size_t wide_samples = length/flows;
+ const size_t flow_offs = bufsiz/flows;
from += offset/flows;
- for (i = 0; i < wide_samples; i++)
- for (f = 0; f < flows; f++)
- *to++ = from[f*flow_offs + i];
+ for (i = 0; i < wide_samples; i++) {
+ sox_sample_t *inner_from = from + i;
+ sox_sample_t *inner_to = to + i * flows;
+ size_t f;
+ for (f = 0; f < flows; f++) {
+ *inner_to++ = *inner_from;
+ inner_from += flow_offs;
+ }
+ }
}
/* deinterleave() parameters:
@@ -646,11 +652,17 @@
static void deinterleave(size_t flows, size_t length, sox_sample_t *from,
sox_sample_t *to, size_t bufsiz, size_t offset)
{
- size_t i, f;
- size_t wide_samples = length/flows;
- size_t flow_offs = bufsiz/flows;
+ const size_t wide_samples = length/flows;
+ const size_t flow_offs = bufsiz/flows;
+ size_t f;
to += offset/flows;
- for (i = 0; i < wide_samples; i++)
- for (f = 0; f < flows; f++)
- to[f*flow_offs + i] = *from++;
+ for (f = 0; f < flows; f++) {
+ sox_sample_t *inner_to = to + f*flow_offs;
+ sox_sample_t *inner_from = from + f;
+ size_t i = wide_samples;
+ while (i--) {
+ *inner_to++ = *inner_from;
+ inner_from += flows;
+ }
+ }
}