shithub: sox

Download patch

ref: ed8da1d58fd3a2cbba6a6be953713f1c18a07950
parent: 22af69d50f0ecd7fd19acdea3df9292bf720eb4c
author: Rob Sykes <robs@users.sourceforge.net>
date: Thu Mar 22 18:32:06 EDT 2012

more rate speed-ups

--- a/src/effects_i_dsp.c
+++ b/src/effects_i_dsp.c
@@ -398,8 +398,8 @@
   else {
     begin = (.997 - (2 - phase1) * .22) * *len + .5;
     end   = (.997 + (0 - phase1) * .22) * *len + .5;
-    begin = peak - begin - (begin & 1);
-    end   = peak + 1 + end + (end & 1);
+    begin = peak - (begin & ~3);
+    end   = peak + 1 + ((end + 3) & ~3);
     *len = end - begin;
     *h = lsx_realloc(*h, *len * sizeof(**h));
   }
--- a/src/rate.c
+++ b/src/rate.c
@@ -1,4 +1,4 @@
-/* Effect: change sample rate     Copyright (c) 2008 robs@users.sourceforge.net
+/* Effect: change sample rate  Copyright (c) 2008,12 robs@users.sourceforge.net
  *
  * This library is free software; you can redistribute it and/or modify it
  * under the terms of the GNU Lesser General Public License as published by
@@ -69,7 +69,7 @@
 
 typedef struct {    /* Data that are shared between channels and stages */
   sample_t   * poly_fir_coefs;
-  dft_filter_t half_band[2];   /* Div or mul by n */
+  dft_filter_t dft_filter[2];   /* Div or mul by n */
 } rate_shared_t;
 
 struct stage;
@@ -119,12 +119,12 @@
   p->at.parts.integer = 0;
 }
 
-static void half_sample(stage_t * p, fifo_t * output_fifo)
+static void decimate(stage_t * p, fifo_t * output_fifo)
 {
-  sample_t * output;
+  sample_t * output, tmp = 0;
   int i, j, num_in = max(0, fifo_occupancy(&p->fifo));
   rate_shared_t const * s = p->shared;
-  dft_filter_t const * f = &s->half_band[p->which];
+  dft_filter_t const * f = &s->dft_filter[p->which];
   int const overlap = f->num_taps - 1;
 
   while (num_in >= f->dft_length) {
@@ -137,27 +137,38 @@
 
     lsx_safe_rdft(f->dft_length, 1, output);
     output[0] *= f->coefs[0];
-    output[1] *= f->coefs[1];
-    for (i = 2; i < f->dft_length; i += 2) {
-      sample_t tmp = output[i];
-      output[i  ] = f->coefs[i  ] * tmp - f->coefs[i+1] * output[i+1];
-      output[i+1] = f->coefs[i+1] * tmp + f->coefs[i  ] * output[i+1];
+    if (p->tuple) {
+      output[1] *= f->coefs[1];
+      for (i = 2; i < f->dft_length; i += 2) {
+        tmp = output[i];
+        output[i  ] = f->coefs[i  ] * tmp - f->coefs[i+1] * output[i+1];
+        output[i+1] = f->coefs[i+1] * tmp + f->coefs[i  ] * output[i+1];
+      }
+      lsx_safe_rdft(f->dft_length, -1, output);
+      for (j = 0, i = p->rem; i < f->dft_length - overlap; ++j, i += p->tuple)
+        output[j] = output[i];
+      p->rem = i - (f->dft_length - overlap);
+      fifo_trim_by(output_fifo, f->dft_length - j);
     }
-    lsx_safe_rdft(f->dft_length, -1, output);
-
-    for (j = 0, i = p->rem; i < f->dft_length - overlap; ++j, i += p->tuple)
-      output[j] = output[i];
-    p->rem = i - (f->dft_length - overlap);
-    fifo_trim_by(output_fifo, f->dft_length - j);
+    else { /* F-domain */
+      for (i = 2; i < (f->dft_length >> 1); i += 2) {
+        tmp = output[i];
+        output[i  ] = f->coefs[i  ] * tmp - f->coefs[i+1] * output[i+1];
+        output[i+1] = f->coefs[i+1] * tmp + f->coefs[i  ] * output[i+1];
+      }
+      output[1] = f->coefs[i  ] * tmp - f->coefs[i+1] * output[i+1];
+      lsx_safe_rdft(f->dft_length >> 1, -1, output);
+      fifo_trim_by(output_fifo, (f->dft_length + overlap) >> 1);
+    }
   }
 }
 
-static void double_sample(stage_t * p, fifo_t * output_fifo)
+static void interpolate(stage_t * p, fifo_t * output_fifo)
 {
   sample_t * output;
   int i, j, num_in = max(0, fifo_occupancy(&p->fifo));
   rate_shared_t const * s = p->shared;
-  dft_filter_t const * f = &s->half_band[p->which];
+  dft_filter_t const * f = &s->dft_filter[p->which];
   int const overlap = f->num_taps - 1;
 
   while (p->rem + p->tuple * num_in >= f->dft_length) {
@@ -168,12 +179,27 @@
 
     output = fifo_reserve(output_fifo, f->dft_length);
     fifo_trim_by(output_fifo, overlap);
-    memset(output, 0, f->dft_length * sizeof(*output));
-    for (j = 0, i = p->rem; i < f->dft_length; ++j, i += p->tuple)
-      output[i] = input[j];
-    p->rem = p->tuple - 1 - divd.rem;
-
-    lsx_safe_rdft(f->dft_length, 1, output);
+    if (p->tuple == 2 || p->tuple == 4) { /* F-domain */
+      int portion = f->dft_length / p->tuple;
+      memcpy(output, input, (unsigned)portion * sizeof(*output));
+      lsx_safe_rdft(portion, 1, output);
+      for (i = portion + 2; i < (portion << 1); i += 2)
+        output[i] = output[(portion << 1) - i],
+        output[i+1] = -output[(portion << 1) - i + 1];
+      output[portion] = output[1];
+      output[portion + 1] = 0;
+      output[1] = output[0];
+      for (portion <<= 1; i < f->dft_length; i += portion, portion <<= 1) {
+        memcpy(output + i, output, portion * sizeof(*output));
+        output[i + 1] = 0;
+      }
+    } else {
+      memset(output, 0, f->dft_length * sizeof(*output));
+      for (j = 0, i = p->rem; i < f->dft_length; ++j, i += p->tuple)
+        output[i] = input[j];
+      p->rem = p->tuple - 1 - divd.rem;
+      lsx_safe_rdft(f->dft_length, 1, output);
+    }
     output[0] *= f->coefs[0];
     output[1] *= f->coefs[1];
     for (i = 2; i < f->dft_length; i += 2) {
@@ -189,7 +215,7 @@
     sample_t const h[], double Fp, double Fc, double Fn, double att,
     int multiplier, double phase, sox_bool allow_aliasing)
 {
-  dft_filter_t * f = &p->half_band[which];
+  dft_filter_t * f = &p->dft_filter[which];
   int dft_length, i;
 
   if (f->num_taps)
@@ -207,7 +233,16 @@
 
     if (phase != 50)
       lsx_fir_to_phase(&h2, &num_taps, &f->post_peak, phase);
-    else f->post_peak = num_taps / 2;
+    else {
+      if (Fn == 4 && ((num_taps - 1) & 4)) { /* preserve phase */
+        double * h3 = calloc(num_taps + 4, sizeof(*h3));
+        memcpy(h3 + 2, h2, num_taps * sizeof(*h3));
+        free(h2);
+        h2 = h3;
+        num_taps += 4;
+      }
+      f->post_peak = num_taps / 2;
+    }
 
     dft_length = lsx_set_dft_length(num_taps);
     f->coefs = calloc(dft_length, sizeof(*f->coefs));
@@ -297,8 +332,8 @@
     }
   }
   mult = tuple;
-  lsx_debug("i/o=%.16g; %.12g:%i @ level %i tuple=%i", p->factor, last_stage.step.all / MULT32, divisor, p->level, tuple);
-  lsx_debug("%x.%x", last_stage.step.parts.integer, last_stage.step.parts.fraction);
+  lsx_debug("i/o=%.16g; %.12g:%i @ level %i tuple=%i last-stage:%x.%x", p->factor, last_stage.step.all / MULT32,
+      divisor, p->level, tuple, last_stage.step.parts.integer, last_stage.step.parts.fraction);
 
   p->input_stage_num = -p->upsample;
   p->output_stage_num = p->level;
@@ -351,9 +386,9 @@
     assert((size_t)(quality - Low) < array_length(filters));
     init_dft_filter(shared, p->upsample, f->len, f->h, bw, 1., (double)max(tuple, two_or_three), att, mult, phase, allow_aliasing);
     if (p->upsample) {
-      pre_stage.fn = double_sample; /* Finish off setting up pre-stage */
-      pre_stage.preload = shared->half_band[1].post_peak / tuple;
-      pre_stage.rem     = shared->half_band[1].post_peak % tuple;
+      pre_stage.fn = interpolate; /* Finish off setting up pre-stage */
+      pre_stage.preload = shared->dft_filter[1].post_peak / tuple;
+      pre_stage.rem     = shared->dft_filter[1].post_peak % tuple;
       pre_stage.tuple = tuple;
       pre_stage.which = 1;
       if (two_factors && divisor != tuple) {
@@ -360,26 +395,26 @@
         int other = divisor / tuple;
         ++p->output_stage_num;
         init_dft_filter(shared, 0, 0, NULL, 1., (double)tuple, (double)divisor, att, other, phase, allow_aliasing);
-        last_stage.fn = double_sample;
-        last_stage.preload = shared->half_band[0].post_peak / other;
-        last_stage.rem     = shared->half_band[0].post_peak % other;
+        last_stage.fn = interpolate;
+        last_stage.preload = shared->dft_filter[0].post_peak / other;
+        last_stage.rem     = shared->dft_filter[0].post_peak % other;
         last_stage.tuple = other;
         last_stage.which = 0;
       }
       else {
         /* Start setting up post-stage; TODO don't use dft for short filters */
-        if ((1 - p->factor) / (1 - bw) > 2)
+        if ((1 - p->factor) / (1 - bw) > 2 || tuple != 2)
           init_dft_filter(shared, 0, 0, NULL, max(p->factor, min), 1., 2., att, 1, phase, allow_aliasing);
-        else shared->half_band[0] = shared->half_band[1];
+        else shared->dft_filter[0] = shared->dft_filter[1];
         if (two_factors && factor == 2) {
           ++p->output_stage_num;
-          last_stage.fn = half_sample;
-          last_stage.preload = shared->half_band[0].post_peak;
-          last_stage.tuple = 2;
+          last_stage.fn = decimate;
+          last_stage.preload = shared->dft_filter[0].post_peak;
+          last_stage.tuple = (old_behaviour | allow_aliasing) << 1;
         } else {
-          post_stage.fn = half_sample;
-          post_stage.preload = shared->half_band[0].post_peak;
-          post_stage.tuple = 2;
+          post_stage.fn = decimate;
+          post_stage.preload = shared->dft_filter[0].post_peak;
+          post_stage.tuple = (old_behaviour | allow_aliasing) << 1;
         }
       }
     }
@@ -389,9 +424,9 @@
         if ((1 - pass) / (1 - bw) > 2)
           init_dft_filter(shared, 1, 0, NULL, max(pass, min), 1., 2., att, 1, phase, allow_aliasing);
       }
-      post_stage.fn = half_sample;
-      post_stage.preload = shared->half_band[0].post_peak;
-      post_stage.tuple = two_or_three;
+      post_stage.fn = decimate;
+      post_stage.preload = shared->dft_filter[0].post_peak;
+      post_stage.tuple = two_or_three == 2 && !(old_behaviour | allow_aliasing)? 0 : two_or_three;
     }
   }
   else if (quality == Low && !p->upsample) {    /* dft is slower here, so */
@@ -401,10 +436,10 @@
   }
   if (p->level > 0) {
     stage_t * s = & p->stages[p->level - 1];
-    if (shared->half_band[1].num_taps) {
-      s->fn = half_sample;
-      s->preload = shared->half_band[1].post_peak;
-      s->tuple = 2;
+    if (shared->dft_filter[1].num_taps) {
+      s->fn = decimate;
+      s->preload = shared->dft_filter[1].post_peak;
+      s->tuple = (old_behaviour | allow_aliasing) << 1;
       s->which = 1;
     }
     else *s = post_stage;
@@ -471,9 +506,9 @@
 
   for (i = p->input_stage_num; i <= p->output_stage_num; ++i)
     fifo_delete(&p->stages[i].fifo);
-  free(shared->half_band[0].coefs);
-  if (shared->half_band[1].coefs != shared->half_band[0].coefs)
-    free(shared->half_band[1].coefs);
+  free(shared->dft_filter[0].coefs);
+  if (shared->dft_filter[1].coefs != shared->dft_filter[0].coefs)
+    free(shared->dft_filter[1].coefs);
   free(shared->poly_fir_coefs);
   memset(shared, 0, sizeof(*shared));
   free(p->stages - 1);