shithub: sox

Download patch

ref: a69f6b2b234ee29f36e0eeab253acdab09fffddf
parent: 959708b982613fa3d0b0aefe278c5188b6a94d67
author: Rob Sykes <robs@users.sourceforge.net>
date: Sat Jul 28 09:14:07 EDT 2012

Downsampling speed-ups; comparative execution time examples:
  192000 -> 44100  58.6%
  100000 -> 40000  83.9%
   96000 -> 44100  56.3%
   48000 -> 44100  71.2%

--- a/src/libsox.c
+++ b/src/libsox.c
@@ -133,7 +133,8 @@
   NULL,            /* char const * subsystem */
   NULL,            /* char       * tmp_path */
   sox_false,       /* sox_bool     use_magic */
-  sox_false        /* sox_bool     use_threads */
+  sox_false,       /* sox_bool     use_threads */
+  10               /* size_t       log2_dft_min_size */
 };
 
 sox_globals_t * sox_get_globals(void)
--- a/src/loudness.c
+++ b/src/loudness.c
@@ -86,7 +86,7 @@
   lsx_safe_rdft(work_len, -1, work);
   for (i = 0; i < n; ++i)
     h[i] = work[(work_len - n / 2 + i) % work_len] * 2. / work_len;
-  lsx_apply_kaiser(h, n, lsx_kaiser_beta(40 + 2./3 * fabs(delta)));
+  lsx_apply_kaiser(h, n, lsx_kaiser_beta(40 + 2./3 * fabs(delta), .1));
 
   free(work);
   return h;
--- a/src/rate.c
+++ b/src/rate.c
@@ -23,6 +23,7 @@
 #undef NDEBUG /* Must undef above assert.h or other that might include it. */
 #endif
 
+#define _GNU_SOURCE
 #include "sox_i.h"
 #include "fft4g.h"
 #include "dft_filter.h"
@@ -29,26 +30,40 @@
 #include <assert.h>
 #include <string.h>
 
-#define  calloc     lsx_calloc
-#define  malloc     lsx_malloc
-#define  raw_coef_t double
-#define  sample_t   double
-#define  TO_SOX     SOX_FLOAT_64BIT_TO_SAMPLE
-#define  FROM_SOX   SOX_SAMPLE_TO_FLOAT_64BIT
-#define  coef(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) coef_p[(fir_len) * ((interp_order) + 1) * (phase_num) + ((interp_order) + 1) * (fir_coef_num) + (interp_order - coef_interp_num)]
+#define calloc     lsx_calloc
+#define malloc     lsx_malloc
+#define raw_coef_t double
 
+#if 0 /* For float32 version, as used in foobar */
+  #define sample_t   float
+  #define num_coefs4 ((num_coefs + 3) & ~3) /* align coefs for SSE */
+  #define coefs4_check(i) ((i) < num_coefs)
+#else
+  #define sample_t   double
+  #define num_coefs4 num_coefs
+  #define coefs4_check(i) 1
+#endif
+
+#if defined M_PIl
+  #define hi_prec_clock_t long double /* __float128 is also a (slow) option */
+#else
+  #define hi_prec_clock_t double
+#endif
+
+#define coef(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) coef_p[(fir_len) * ((interp_order) + 1) * (phase_num) + ((interp_order) + 1) * (fir_coef_num) + (interp_order - coef_interp_num)]
+
 static sample_t * prepare_coefs(raw_coef_t const * coefs, int num_coefs,
     int num_phases, int interp_order, int multiplier)
 {
-  int i, j, length = num_coefs * num_phases;
+  int i, j, length = num_coefs4 * num_phases;
   sample_t * result = malloc(length * (interp_order + 1) * sizeof(*result));
   double fm1 = coefs[0], f1 = 0, f2 = 0;
 
-  for (i = num_coefs - 1; i >= 0; --i)
+  for (i = num_coefs4 - 1; i >= 0; --i)
     for (j = num_phases - 1; j >= 0; --j) {
       double f0 = fm1, b = 0, c = 0, d = 0; /* = 0 to kill compiler warning */
       int pos = i * num_phases + j - 1;
-      fm1 = (pos > 0 ? coefs[pos - 1] : 0) * multiplier;
+      fm1 = coefs4_check(i) && pos > 0 ? coefs[pos - 1] * multiplier : 0;
       switch (interp_order) {
         case 1: b = f1 - f0; break;
         case 2: b = f1 - (.5 * (f2+f0) - f1) - f0; c = .5 * (f2+f0) - f1; break;
@@ -56,7 +71,7 @@
         default: if (interp_order) assert(0);
       }
       #define coef_coef(x) \
-        coef(result, interp_order, num_coefs, j, x, num_coefs - 1 - i)
+        coef(result, interp_order, num_coefs4, j, x, num_coefs4 - 1 - i)
       coef_coef(0) = f0;
       if (interp_order > 0) coef_coef(1) = b;
       if (interp_order > 1) coef_coef(2) = c;
@@ -67,7 +82,7 @@
   return result;
 }
 
-typedef struct {    /* Data that are shared between channels and stages */
+typedef struct { /* So generated filter coefs may be shared between channels */
   sample_t   * poly_fir_coefs;
   dft_filter_t dft_filter[2];
 } rate_shared_t;
@@ -75,15 +90,20 @@
 struct stage;
 typedef void (* stage_fn_t)(struct stage * input, fifo_t * output);
 typedef struct stage {
-  rate_shared_t * shared;
-  fifo_t     fifo;
-  int        pre;              /* Number of past samples to store */
-  int        pre_post;         /* pre + number of future samples to store */
-  int        preload;          /* Number of zero samples to pre-load the fifo */
-  int        which;            /* Which, if any, of the 2 dft filters to use */
+  /* Common to all stage types: */
   stage_fn_t fn;
-                               /* For poly_fir & spline: */
-  union {                      /* 32bit.32bit fixed point arithmetic */
+  fifo_t     fifo;
+  int        pre;       /* Number of past samples to store */
+  int        pre_post;  /* pre + number of future samples to store */
+  int        preload;   /* Number of zero samples to pre-load the fifo */
+  double     out_in_ratio; /* For buffer management. */
+
+  /* For a stage with variable (run-time generated) filter coefs: */
+  rate_shared_t * shared;
+  int        dft_filter_num; /* Which, if any, of the 2 DFT filters to use */
+
+  /* For a stage with variable L/M: */
+  union {               /* 32bit.32bit fixed point arithmetic */
     #if defined(WORDS_BIGENDIAN)
     struct {int32_t integer; uint32_t fraction;} parts;
     #else
@@ -91,16 +111,18 @@
     #endif
     int64_t all;
     #define MULT32 (65536. * 65536.)
+
+    hi_prec_clock_t hi_prec_clock;
   } at, step;
+  sox_bool   use_hi_prec_clock;
   int        L, remL, remM;
-
-  double     out_in_ratio;
+  int        n, phase_bits;
 } stage_t;
 
 #define stage_occupancy(s) max(0, fifo_occupancy(&(s)->fifo) - (s)->pre_post)
 #define stage_read_p(s) ((sample_t *)fifo_read_ptr(&(s)->fifo) + (s)->pre)
 
-static void cubic_spline_fn(stage_t * p, fifo_t * output_fifo)
+static void cubic_stage_fn(stage_t * p, fifo_t * output_fifo)
 {
   int i, num_in = stage_occupancy(p), max_num_out = 1 + num_in*p->out_in_ratio;
   sample_t const * input = stage_read_p(p);
@@ -124,7 +146,7 @@
   sample_t * output, tmp;
   int i, j, num_in = max(0, fifo_occupancy(&p->fifo));
   rate_shared_t const * s = p->shared;
-  dft_filter_t const * f = &s->dft_filter[p->which];
+  dft_filter_t const * f = &s->dft_filter[p->dft_filter_num];
   int const overlap = f->num_taps - 1;
 
   while (p->remL + p->L * num_in >= f->dft_length) {
@@ -134,7 +156,7 @@
     num_in -= divd.quot;
 
     output = fifo_reserve(output_fifo, f->dft_length);
-    if (p->L == 2 || p->L == 4) { /* F-domain */
+    if (lsx_is_power_of_2(p->L)) { /* F-domain */
       int portion = f->dft_length / p->L;
       memcpy(output, input, (unsigned)portion * sizeof(*output));
       lsx_safe_rdft(portion, 1, output);
@@ -169,7 +191,8 @@
       }
       lsx_safe_rdft(f->dft_length, -1, output);
       if (p->step.parts.integer != 1) {
-        for (j = 0, i = p->remM; i < f->dft_length - overlap; ++j, i += p->step.parts.integer)
+        for (j = 0, i = p->remM; i < f->dft_length - overlap; ++j,
+            i += p->step.parts.integer)
           output[j] = output[i];
         p->remM = i - (f->dft_length - overlap);
         fifo_trim_by(output_fifo, f->dft_length - j);
@@ -185,44 +208,24 @@
       }
       output[1] = f->coefs[i] * output[i] - f->coefs[i+1] * output[i+1];
       lsx_safe_rdft(f->dft_length >> m, -1, output);
-      fifo_trim_by(output_fifo, (((1 << m) - 1) * f->dft_length + overlap) >> m);
+      fifo_trim_by(output_fifo, (((1 << m) - 1) * f->dft_length + overlap) >>m);
     }
   }
 }
 
-static void setup_dft_stage(rate_shared_t * shared, int which, stage_t * stage, int L, int M, sox_bool allow_aliasing)
+static void dft_stage_init(
+    unsigned instance, double Fp, double Fs, double Fn, double att,
+    double phase, stage_t * stage, int L, int M)
 {
-  stage->fn = dft_stage_fn;
-  stage->preload = shared->dft_filter[which].post_peak / L;
-  stage->remL    = shared->dft_filter[which].post_peak % L;
-  stage->L = L;
-  stage->step.parts.integer = abs(3-M) == 1 && !allow_aliasing? -M/2 : M;
-  stage->which = which;
-}
+  dft_filter_t * f = &stage->shared->dft_filter[instance];
+  
+  if (!f->num_taps) {
+    int num_taps = 0, dft_length, i;
+    int k = phase == 50 && lsx_is_power_of_2(L) && Fn == L? L << 1 : 4;
+    double * h = lsx_design_lpf(Fp, Fs, Fn, att, &num_taps, -k, -1.);
 
-static void init_dft_filter(rate_shared_t * p, unsigned which, int num_taps,
-    sample_t const h[], double Fp, double Fc, double Fn, double att,
-    int multiplier, double phase, sox_bool allow_aliasing)
-{
-  dft_filter_t * f = &p->dft_filter[which];
-  int dft_length, i;
-
-  if (f->num_taps)
-    return;
-  if (h) {
-    dft_length = lsx_set_dft_length(num_taps);
-    f->coefs = calloc(dft_length, sizeof(*f->coefs));
-    for (i = 0; i < num_taps; ++i)
-      f->coefs[(i + dft_length - num_taps + 1) & (dft_length - 1)]
-          = h[abs(num_taps / 2 - i)] / dft_length * 2 * multiplier;
-    f->post_peak = num_taps / 2;
-  }
-  else {
-    int k = 4 << (phase == 50 && multiplier == 4 && Fn == 4);
-    double * h2 = lsx_design_lpf(Fp, Fc, Fn, allow_aliasing, att, &num_taps, -k, -1.);
-
     if (phase != 50)
-      lsx_fir_to_phase(&h2, &num_taps, &f->post_peak, phase);
+      lsx_fir_to_phase(&h, &num_taps, &f->post_peak, phase);
     else f->post_peak = num_taps / 2;
 
     dft_length = lsx_set_dft_length(num_taps);
@@ -229,15 +232,20 @@
     f->coefs = calloc(dft_length, sizeof(*f->coefs));
     for (i = 0; i < num_taps; ++i)
       f->coefs[(i + dft_length - num_taps + 1) & (dft_length - 1)]
-          = h2[i] / dft_length * 2 * multiplier;
-    free(h2);
+        = h[i] / dft_length * 2 * L;
+    free(h);
+    f->num_taps = num_taps;
+    f->dft_length = dft_length;
+    lsx_safe_rdft(dft_length, 1, f->coefs);
+    lsx_debug("fir_len=%i dft_length=%i Fp=%g Fs=%g Fn=%g att=%g %i/%i",
+        num_taps, dft_length, Fp, Fs, Fn, att, L, M);
   }
-  assert(num_taps & 1);
-  f->num_taps = num_taps;
-  f->dft_length = dft_length;
-  lsx_debug("fir_len=%i dft_length=%i Fp=%g Fc=%g Fn=%g att=%g mult=%i",
-      num_taps, dft_length, Fp, Fc, Fn, att, multiplier);
-  lsx_safe_rdft(dft_length, 1, f->coefs);
+  stage->fn = dft_stage_fn;
+  stage->preload = f->post_peak / L;
+  stage->remL    = f->post_peak % L;
+  stage->L = L;
+  stage->step.parts.integer = abs(3-M) == 1 && Fs == 1? -M/2 : M;
+  stage->dft_filter_num = instance;
 }
 
 #include "rate_filters.h"
@@ -245,189 +253,212 @@
 typedef struct {
   double     factor;
   uint64_t   samples_in, samples_out;
-  int        input_stage_num, output_stage_num;
+  int        num_stages;
   stage_t    * stages;
 } rate_t;
 
-#define pre_stage p->stages[-1]
-#define frac_stage p->stages[level]
-#define post_stage p->stages[level + have_frac_stage]
-#define have_frac_stage (realM * fracL != 1)
+#define pre_stage       p->stages[shift]
+#define arb_stage       p->stages[shift + have_pre_stage]
+#define post_stage      p->stages[shift + have_pre_stage + have_arb_stage]
+#define have_pre_stage  (preM  * preL  != 1)
+#define have_arb_stage  (arbM  * arbL  != 1)
+#define have_post_stage (postM * postL != 1)
 
-typedef enum {Default = -1, Quick, Low, Medium, High, Very} quality_t;
+#define TO_3dB(a)       ((1.6e-6*a-7.5e-4)*a+.646)
+#define LOW_Q_BW0_PC    (67 + 5 / 8.)
 
-static void rate_init(rate_t * p, rate_shared_t * shared, double factor,
-    quality_t quality, int interp_order, double phase, double bandwidth,
-    sox_bool allow_aliasing)
+typedef enum {
+  rolloff_none, rolloff_small /* <= 0.01 dB */, rolloff_medium /* <= 0.35 dB */
+} rolloff_t;
+
+static void rate_init(
+  /* Private work areas (to be supplied by the client):                       */
+  rate_t * p,                /* Per audio channel.                            */
+  rate_shared_t * shared,    /* Between channels (undergoing same rate change)*/
+                            
+  /* Public parameters:                                             Typically */
+  double factor,             /* Input rate divided by output rate.            */
+  double bits,               /* Required bit-accuracy (pass + stop)  16|20|28 */
+  double phase,              /* Linear/minimum etc. filter phase.       50    */
+  double bw_pc,              /* Pass-band % (0dB pt.) to preserve.   91.3|98.4*/
+  double anti_aliasing_pc,   /* % bandwidth without aliasing            100   */
+  rolloff_t rolloff,         /* Pass-band roll-off                    small   */
+  sox_bool maintain_3dB_pt,  /*                                        true   */
+                            
+  /* Primarily for test/development purposes:                                 */
+  sox_bool use_hi_prec_clock,/* Increase irrational ratio accuracy.   false   */
+  int interpolator,          /* Force a particular coef interpolator.   -1    */
+  int max_coefs_size,        /* k bytes of coefs to try to keep below.  400   */
+  sox_bool noSmallIntOpt)    /* Disable small integer optimisations.  false   */
 {
-  int i, preL = 1, preM = 1, level = 0, fracL = 1, postL = 1, postM = 1;
-  sox_bool upsample = sox_false;
-  double realM = factor;
+  double att = (bits + 1) * linear_to_dB(2.), attArb = att;    /* pass + stop */
+  double tbw0 = 1 - bw_pc / 100, Fs_a = 2 - anti_aliasing_pc / 100;
+  double arbM = factor, tbw_tighten = 1;
+  int n = 0, i, preL = 1, preM = 1, shift = 0, arbL = 1, postL = 1, postM = 1;
+  sox_bool upsample = sox_false, rational = sox_false, iOpt = !noSmallIntOpt;
+  int mode = rolloff > rolloff_small? factor > 1 || bw_pc > LOW_Q_BW0_PC :
+    ceil(2 + (bits - 17) / 4);
+  stage_t * s;
 
   assert(factor > 0);
-  p->factor = factor;
+  assert(!bits || (15 <= bits && bits <= 33));
+  assert(0 <= phase && phase <= 100);
+  assert(53 <= bw_pc && bw_pc <= 100);
+  assert(85 <= anti_aliasing_pc && anti_aliasing_pc <= 100);
 
-  if (quality < Quick || quality > Very)
-    quality = High;
-
-  if (quality != Quick) while (sox_true) {
-    const int max_divisor = 2048;      /* Keep coef table size ~< 500kb */
-    double epsilon;
-    upsample = realM < 1;
-    for (i = realM, level = 0; i >>= 1; ++level); /* log base 2 */
-    realM /= 1 << (level + !upsample);
-    epsilon = fabs((uint32_t)(realM * MULT32 + .5) / (realM * MULT32) - 1);
-    for (i = 2; i <= max_divisor && fracL == 1; ++i) {
-      double try_d = realM * i;
-      int try = try_d + .5;
-      if (fabs(try / try_d - 1) <= epsilon) { /* N.B. beware of long doubles */
+  p->factor = factor;
+  if (bits) while (!n++) {                               /* Determine stages: */
+    int try, L, M, x, maxL = interpolator > 0? 1 : mode? 2048 :
+      ceil(max_coefs_size * 1000. / (U100_l * sizeof(sample_t)));
+    double d, epsilon = 0, frac;
+    upsample = arbM < 1;
+    for (i = arbM * .5, shift = 0; i >>= 1; arbM *= .5, ++shift);
+    preM = 1 - (arbM > 2);
+    postM = 1 + (arbM > 1 && arbM < 2), arbM /= postM;
+    preL = 1 + (upsample && mode), arbM *= preL;
+    if ((frac = arbM - (int)arbM))
+      epsilon = fabs((uint32_t)(frac * MULT32 + .5) / (frac * MULT32) - 1);
+    for (i = 1, rational = !frac; i <= maxL && !rational; ++i) {
+      d = frac * i, try = d + .5;
+      if ((rational = fabs(try / d - 1) <= epsilon)) {    /* No long doubles! */
         if (try == i)
-          realM = 1, fracL = 2, level += !upsample, upsample = sox_false;
-        else realM = try, fracL = i;
+          arbM = ceil(arbM), shift += arbM > 2, arbM /= 1 + (arbM > 2);
+        else arbM = i * (int)arbM + try, arbL = i;
       }
     }
-    if (upsample) {
-      if (postL == 1 && (realM != 1 || fracL > 5) && fracL / realM > 4) {
-        realM = realM * (postL = min((fracL / realM), 4)) / fracL, fracL = 1;
-        continue;
-      }
-      else if ((realM == 2 && fracL == 3) || (realM == 3 && fracL == 4))
-        preL = fracL, preM = realM, fracL = realM = 1;
-      else if (fracL < 6 && realM == 1)
-        preL = fracL, fracL = 1;
-      else if (quality > Low) {
-        preL = 2;
-        if (fracL % preL)
-          realM *= preL;
-        else fracL /= preL;
-      }
-    }
-    else {
-      if (fracL > 2) {
-        int L = fracL, M = realM;
-        for (i = level + 1; i && !(L & 1); L >>= 1, --i);
-        if (((M <<= i) < 7 && L < 3) || M == 4) {
-          preL = L, preM = M, realM = fracL = 1, level = 0, upsample = sox_true;
-          break;
-        }
-      }
-      postM = 2;
-      if (fracL == 2)
-        --fracL, postM -= !level, level -= !!level;
-    }
-    break;
+    L = preL * arbL, M = arbM * postM, x = (L|M)&1, L >>= !x, M >>= !x;
+    if (iOpt && postL == 1 && (d = preL * arbL / arbM) > 4 && d != 5) {
+      for (postL = 4, i = d / 16; i >>= 1; postL <<= 1);
+      arbM = arbM * postL / arbL / preL, arbL = 1, n = 0;
+    } else if (rational && (max(L, M) < 3 + 2 * iOpt || L * M < 6 * iOpt))
+      preL = L, preM = M, arbM = arbL = postM = 1;
+    if (!mode && (!rational || !n))
+      ++mode, n = 0;
   }
 
-  p->stages = (stage_t *)calloc((size_t)level + 4, sizeof(*p->stages)) + 1;
-  for (i = -1; i <= level + 1; ++i)
+  p->num_stages = shift + have_pre_stage + have_arb_stage + have_post_stage;
+  p->stages = calloc(p->num_stages + 1, sizeof(*p->stages));
+  for (i = 0; i < p->num_stages; ++i)
     p->stages[i].shared = shared;
 
-  p->output_stage_num = level;
+  if ((n = p->num_stages) > 1) {                              /* Att. budget: */
+    if (have_arb_stage)
+      att += linear_to_dB(2.), attArb = att, --n; 
+    att += linear_to_dB((double)n);
+  }
 
-  frac_stage.step.all = realM * MULT32 + .5;
-  frac_stage.out_in_ratio = MULT32 * fracL / frac_stage.step.all;
+  for (n = 0; n + 1u < array_length(half_firs) && att > half_firs[n].att; ++n);
+  for (i = 0, s = p->stages; i < shift; ++i, ++s) {
+    s->fn = half_firs[n].fn;
+    s->pre_post = 4 * half_firs[n].num_coefs;
+    s->preload = s->pre = s->pre_post >> 1;
+  }
 
-  if (quality == Quick) {
-    frac_stage.fn = cubic_spline_fn;
-    frac_stage.pre_post = max(3, frac_stage.step.parts.integer);
-    frac_stage.preload = frac_stage.pre = 1;
-    ++p->output_stage_num;
+  if (have_pre_stage) {
+    if (maintain_3dB_pt && have_post_stage) {    /* Trans. bands overlapping. */
+      double tbw3 = tbw0 * TO_3dB(att);               /* TODO: consider Fs_a. */
+      double x = ((2.1429e-4 - 5.2083e-7 * att) * att - .015863) * att + 3.95;
+      x = att * pow((tbw0 - tbw3) / (postM / (factor * postL) - 1 + tbw0), x);
+      if (x > .035) {
+        tbw_tighten = ((4.3074e-3 - 3.9121e-4 * x) * x - .040009) * x + 1.0014;
+        lsx_debug("x=%g tbw_tighten=%g", x, tbw_tighten);
+      }
+    }
+    dft_stage_init(0, 1 - tbw0 * tbw_tighten, Fs_a, preM? max(preL, preM) :
+        arbM / arbL, att, phase, &pre_stage, preL, max(preM, 1));
   }
-  else if (have_frac_stage) {
-    int n = (4 - (quality == Low)) * upsample + range_limit(quality, Medium, Very) - Medium;
-    poly_fir_t const * f = &poly_firs[n];
+
+  if (!bits) {                                  /* Quick and dirty arb stage: */
+    arb_stage.fn = cubic_stage_fn;
+    arb_stage.step.all = arbM * MULT32 + .5;
+    arb_stage.pre_post = max(3, arb_stage.step.parts.integer);
+    arb_stage.preload = arb_stage.pre = 1;
+    arb_stage.out_in_ratio = MULT32 * arbL / arb_stage.step.all;
+  }
+  else if (have_arb_stage) {                     /* Higher quality arb stage: */
+    poly_fir_t const * f = &poly_firs[6*(upsample + !!preM) + mode - !upsample];
+    int order, num_coefs = f->interp[0].scalar, phase_bits, phases, coefs_size;
+    double x = .5, at, Fp, Fs, Fn, mult = upsample? 1 : arbL / arbM;
     poly_fir1_t const * f1;
 
-    if (f->num_coefs & 1) {
-      if (fracL != 1 && (fracL & 1))
-        fracL <<= 1, realM *= 2, frac_stage.step.all <<= 1;
-      frac_stage.at.all = fracL * .5 * MULT32 + .5;
-    }
-    frac_stage.L = fracL;
+    Fn = !upsample && preM? x = arbM / arbL : 1;
+    Fp = !preM? mult : mode? .5 : 1;
+    Fs = 2 - Fp;           /* Ignore Fs_a; it would have little benefit here. */
+    Fp *= 1 - tbw0;
+    if (rolloff > rolloff_small && mode)
+      Fp = !preM? mult * .5 - .125 : mult * .05 + .1;
+    else if (rolloff == rolloff_small)
+      Fp = Fs - (Fs - .148 * x - Fp * .852) * (.00813 * bits + .973);
 
-    if (interp_order < 0)
-      interp_order = quality > High;
-    interp_order = fracL == 1? 1 + interp_order : 0;
-    f1 = &f->interp[interp_order];
+    i = (interpolator < 0? !rational : max(interpolator, !rational)) - 1;
+    do {
+      f1 = &f->interp[++i];
+      assert(f1->fn);
+      if (i)
+        arbM /= arbL, arbL = 1, rational = sox_false;
+      phase_bits = ceil(f1->scalar + log(mult)/log(2.));
+      phases = !rational? (1 << phase_bits) : arbL;
+      if (!f->interp[0].scalar) {
+        int phases0 = max(phases, 19), n0 = 0;
+        lsx_design_lpf(Fp, Fs, -Fn, attArb, &n0, phases0, f->beta);
+        num_coefs = n0 / phases0 + 1, num_coefs += num_coefs & !preM;
+      }
+      if ((num_coefs & 1) && rational && (arbL & 1))
+        phases <<= 1, arbL <<= 1, arbM *= 2;
+      at = arbL * .5 * (num_coefs & 1);
+      order = i + (i && mode > 4);
+      coefs_size = num_coefs4 * phases * (order + 1) * sizeof(sample_t);
+    } while (interpolator < 0 && i < 2 && f->interp[i+1].fn &&
+        coefs_size / 1000 > max_coefs_size);
 
-    if (!frac_stage.shared->poly_fir_coefs) {
-      int phases = fracL == 1? (1 << f1->phase_bits) : fracL;
-      int num_taps = f->num_coefs * phases - 1;
+    if (!arb_stage.shared->poly_fir_coefs) {
+      int num_taps = num_coefs * phases - 1;
       raw_coef_t * coefs = lsx_design_lpf(
-          f->pass, f->stop, 1., sox_false, f->att, &num_taps, phases, -1.);
-      assert(num_taps == f->num_coefs * phases - 1);
-      frac_stage.shared->poly_fir_coefs =
-          prepare_coefs(coefs, f->num_coefs, phases, interp_order, 1);
+          Fp, Fs, Fn, attArb, &num_taps, phases, f->beta);
+      arb_stage.shared->poly_fir_coefs = prepare_coefs(
+          coefs, num_coefs, phases, order, 1);
       lsx_debug("fir_len=%i phases=%i coef_interp=%i size=%s",
-          f->num_coefs, phases, interp_order,
-          lsx_sigfigs3((num_taps +1.) * (interp_order + 1) * sizeof(sample_t)));
+          num_coefs, phases, order, lsx_sigfigs3((double)coefs_size));
       free(coefs);
     }
-    frac_stage.fn = f1->fn;
-    frac_stage.pre_post = f->num_coefs - 1;
-    frac_stage.pre = 0;
-    frac_stage.preload = frac_stage.pre_post >> 1;
-    ++p->output_stage_num;
+    arb_stage.fn = f1->fn;
+    arb_stage.pre_post = num_coefs4 - 1;
+    arb_stage.preload = (num_coefs - 1) >> 1;
+    arb_stage.n = num_coefs4;
+    arb_stage.phase_bits = phase_bits;
+    arb_stage.L = arbL;
+    arb_stage.use_hi_prec_clock = mode > 1 && use_hi_prec_clock && !rational;
+    if (arb_stage.use_hi_prec_clock) {
+      arb_stage.at.hi_prec_clock = at;
+      arb_stage.step.hi_prec_clock = arbM;
+      arb_stage.out_in_ratio = arbL / arb_stage.step.hi_prec_clock;
+    } else {
+      arb_stage.at.all = at * MULT32 + .5;
+      arb_stage.step.all = arbM * MULT32 + .5;
+      arb_stage.out_in_ratio = MULT32 * arbL / arb_stage.step.all;
+    }
   }
-  if (quality == Low && !upsample) {  /* dft is slower here, so */
-    post_stage.fn = half_sample_low;       /* use normal convolution */
-    post_stage.pre_post = 2 * (array_length(half_fir_coefs_low) - 1);
-    post_stage.preload = post_stage.pre = post_stage.pre_post >> 1;
-    ++p->output_stage_num;
-  }
-  else if (quality != Quick) {
-    typedef struct {double bw, a;} filter_t;
-    static filter_t const filters[] = {
-      {.724, 100}, {.931, 110}, {.931, 125}, {.931, 170}};
-    filter_t const * f = &filters[quality - Low];
-    double att = allow_aliasing? (34./33)* f->a : f->a; /* negate att degrade */
-    double bw = bandwidth? 1 - (1 - bandwidth / 100) / LSX_TO_3dB : f->bw;
-    double min = 1 - (allow_aliasing? LSX_MAX_TBW0A : LSX_MAX_TBW0) / 100;
-    double pass = bw * fracL / realM / 2;
-    assert((size_t)(quality - Low) < array_length(filters));
 
-    if (preL * preM != 1) {
-      init_dft_filter(shared, 0, 0, 0, bw, 1., (double)max(preL, preM), att, preL, phase, allow_aliasing);
-      setup_dft_stage(shared, 0, &pre_stage, preL, preM, allow_aliasing);
-      --p->input_stage_num;
-    }
-    else if (level && have_frac_stage && (1 - pass) / (1 - bw) > 2)
-      init_dft_filter(shared, 0, 0, NULL, max(pass, min), 1., 2., att, 1, phase, allow_aliasing);
+  if (have_post_stage)
+    dft_stage_init(1, 1 - (1 - (1 - tbw0) *
+        (upsample? factor * postL / postM : 1)) * tbw_tighten, Fs_a,
+        (double)max(postL, postM), att, phase, &post_stage, postL, postM);
 
-    if (postL * postM != 1) {
-      init_dft_filter(shared, 1, 0, 0,
-          bw * (upsample? factor * postL / postM : 1),
-          1., (double)(upsample? postL : postM), att, postL, phase, allow_aliasing);
-      setup_dft_stage(shared, 1, &post_stage, postL, postM, allow_aliasing);
-      ++p->output_stage_num;
-    }
-  }
-  for (i = p->input_stage_num; i <= p->output_stage_num; ++i) {
-    stage_t * s = &p->stages[i];
-    if (i >= 0 && i < level - have_frac_stage) {
-      s->fn = half_sample_25;
-      s->pre_post = 4 * array_length(half_fir_coefs_25);
-      s->preload = s->pre = s->pre_post >> 1;
-    }
-    else if (level && i == level - 1) {
-      if (shared->dft_filter[0].num_taps)
-        setup_dft_stage(shared, 0, s, 1, 2, allow_aliasing);
-      else *s = post_stage;
-    }
+  for (i = 0, s = p->stages; i < p->num_stages; ++i, ++s) {
     fifo_create(&s->fifo, (int)sizeof(sample_t));
     memset(fifo_reserve(&s->fifo, s->preload), 0, sizeof(sample_t)*s->preload);
-    if (i < p->output_stage_num)
-      lsx_debug("stage=%-3ipre_post=%-3ipre=%-3ipreload=%i",
-          i, s->pre_post, s->pre, s->preload);
+    lsx_debug("%5i|%-5i preload=%i remL=%i",
+        s->pre, s->pre_post - s->pre, s->preload, s->remL);
   }
+  fifo_create(&s->fifo, (int)sizeof(sample_t));
 }
 
 static void rate_process(rate_t * p)
 {
-  stage_t * stage = p->stages + p->input_stage_num;
+  stage_t * stage = p->stages;
   int i;
 
-  for (i = p->input_stage_num; i < p->output_stage_num; ++i, ++stage)
+  for (i = 0; i < p->num_stages; ++i, ++stage)
     stage->fn(stage, &(stage+1)->fifo);
 }
 
@@ -434,12 +465,12 @@
 static sample_t * rate_input(rate_t * p, sample_t const * samples, size_t n)
 {
   p->samples_in += n;
-  return fifo_write(&p->stages[p->input_stage_num].fifo, (int)n, samples);
+  return fifo_write(&p->stages[0].fifo, (int)n, samples);
 }
 
 static sample_t const * rate_output(rate_t * p, sample_t * samples, size_t * n)
 {
-  fifo_t * fifo = &p->stages[p->output_stage_num].fifo;
+  fifo_t * fifo = &p->stages[p->num_stages].fifo;
   p->samples_out += *n = min(*n, (size_t)fifo_occupancy(fifo));
   return fifo_read(fifo, (int)*n, samples);
 }
@@ -446,7 +477,7 @@
 
 static void rate_flush(rate_t * p)
 {
-  fifo_t * fifo = &p->stages[p->output_stage_num].fifo;
+  fifo_t * fifo = &p->stages[p->num_stages].fifo;
   uint64_t samples_out = p->samples_in / p->factor + .5;
   size_t remaining = samples_out - p->samples_out;
   sample_t * buff = calloc(1024, sizeof(*buff));
@@ -467,14 +498,13 @@
   rate_shared_t * shared = p->stages[0].shared;
   int i;
 
-  for (i = p->input_stage_num; i <= p->output_stage_num; ++i)
+  for (i = 0; i <= p->num_stages; ++i)
     fifo_delete(&p->stages[i].fifo);
   free(shared->dft_filter[0].coefs);
-  if (shared->dft_filter[1].coefs != shared->dft_filter[0].coefs)
-    free(shared->dft_filter[1].coefs);
+  free(shared->dft_filter[1].coefs);
   free(shared->poly_fir_coefs);
   memset(shared, 0, sizeof(*shared));
-  free(p->stages - 1);
+  free(p->stages);
 }
 
 /*------------------------------- SoX Wrapper --------------------------------*/
@@ -481,9 +511,9 @@
 
 typedef struct {
   sox_rate_t      out_rate;
-  int             quality;
-  double          coef_interp, phase, bandwidth;
-  sox_bool        allow_aliasing;
+  int             rolloff, coef_interp, max_coefs_size;
+  double          bit_depth, phase, bw_0dB_pc, anti_aliasing_pc;
+  sox_bool        use_hi_prec_clock, noIOpt, given_0dB_pt;
   rate_t          rate;
   rate_shared_t   shared, * shared_ptr;
 } priv_t;
@@ -491,38 +521,90 @@
 static int create(sox_effect_t * effp, int argc, char **argv)
 {
   priv_t * p = (priv_t *) effp->priv;
-  int c;
-  char * dummy_p, * found_at, * opts = "+i:b:p:MILaosqlmhv", * qopts = opts +13;
+  int c, quality;
+  char * dummy_p, * found_at;
+  char const * opts = "+i:c:b:B:A:p:Q:R:d:MILafnost" "qlmghevu";
+  char const * qopts = strchr(opts, 'q');
+  double rej = 0, bw_3dB_pc = 0;
+  sox_bool allow_aliasing = sox_false;
   lsx_getopt_t optstate;
   lsx_getopt_init(argc, argv, opts, NULL, lsx_getopt_flag_none, 1, &optstate);
 
-  p->quality = -1;
+  p->coef_interp = quality = -1;
+  p->rolloff = rolloff_small;
   p->phase = 50;
+  p->max_coefs_size = 400;
   p->shared_ptr = &p->shared;
 
   while ((c = lsx_getopt(&optstate)) != -1) switch (c) {
-    GETOPT_NUMERIC(optstate, 'i', coef_interp, 1 , 3)
-    GETOPT_NUMERIC(optstate, 'p', phase,  0 , 100)
-    GETOPT_NUMERIC(optstate, 'b', bandwidth,  100 - LSX_MAX_TBW3, 99.7)
+    GETOPT_NUMERIC(optstate, 'i', coef_interp, -1, 2)
+    GETOPT_NUMERIC(optstate, 'c', max_coefs_size, 100, INT_MAX)
+    GETOPT_NUMERIC(optstate, 'p', phase, 0, 100)
+    GETOPT_NUMERIC(optstate, 'B', bw_0dB_pc, 53, 99.5)
+    GETOPT_NUMERIC(optstate, 'A', anti_aliasing_pc, 85, 100)
+    GETOPT_NUMERIC(optstate, 'd', bit_depth, 15, 33)
+    GETOPT_LOCAL_NUMERIC(optstate, 'b', bw_3dB_pc, 74, 99.7)
+    GETOPT_LOCAL_NUMERIC(optstate, 'R', rej, 90, 200)
+    GETOPT_LOCAL_NUMERIC(optstate, 'Q', quality, 0, 7)
     case 'M': p->phase =  0; break;
     case 'I': p->phase = 25; break;
     case 'L': p->phase = 50; break;
-    case 'a': p->allow_aliasing = sox_true; break;
-    case 's': p->bandwidth = 99; break;
-    default: if ((found_at = strchr(qopts, c))) p->quality = found_at - qopts;
-      else {lsx_fail("unknown option `-%c'", optstate.opt); return lsx_usage(effp);}
+    case 'a': allow_aliasing = sox_true; break;
+    case 'f': p->rolloff = rolloff_none; break;
+    case 'n': p->noIOpt = sox_true; break;
+    case 's': bw_3dB_pc = 99; break;
+    case 't': p->use_hi_prec_clock = sox_true; break;
+    default:
+      if ((found_at = strchr(qopts, c)))
+        quality = found_at - qopts;
+      else {
+        lsx_fail("unknown option `-%c'", optstate.opt);
+        return lsx_usage(effp);
+      }
   }
   argc -= optstate.ind, argv += optstate.ind;
 
-  if ((unsigned)p->quality < 2 && (p->bandwidth || p->phase != 50 || p->allow_aliasing)) {
+  if ((unsigned)quality < 2 && (p->bw_0dB_pc || bw_3dB_pc || p->phase != 50 ||
+        allow_aliasing || rej || p->bit_depth || p->anti_aliasing_pc)) {
     lsx_fail("override options not allowed with this quality level");
     return SOX_EOF;
   }
+  if (quality < 0 && rej == 0 && p->bit_depth == 0)
+    quality = 4;
+  if (rej)
+    p->bit_depth = rej / linear_to_dB(2.);
+  else {
+    if (quality >= 0) {
+      p->bit_depth = quality? 16 + 4 * max(quality - 3, 0) : 0;
+      if (quality <= 2)
+        p->rolloff = rolloff_medium;
+    }
+    rej = p->bit_depth * linear_to_dB(2.);
+  }
 
-  if (p->bandwidth && p->bandwidth < 100 - LSX_MAX_TBW3A && p->allow_aliasing) {
-    lsx_fail("minimum allowed bandwidth with aliasing is %g%%", 100 - LSX_MAX_TBW3A);
+  if (bw_3dB_pc && p->bw_0dB_pc) {
+    lsx_fail("conflicting bandwidth options");
     return SOX_EOF;
   }
+  allow_aliasing |= p->anti_aliasing_pc != 0;
+  if (!bw_3dB_pc && !p->bw_0dB_pc)
+    p->bw_0dB_pc = quality == 1? LOW_Q_BW0_PC : 100 - 5 / TO_3dB(rej);
+  else if (bw_3dB_pc && bw_3dB_pc < 85 && allow_aliasing) {
+    lsx_fail("minimum allowed 3dB bandwidth with aliasing is %g%%", 85);
+    return SOX_EOF;
+  }
+  else if (p->bw_0dB_pc && p->bw_0dB_pc < 74 && allow_aliasing) {
+    lsx_fail("minimum allowed bandwidth with aliasing is %g%%", 74);
+    return SOX_EOF;
+  }
+  if (bw_3dB_pc)
+    p->bw_0dB_pc = 100 - (100 - bw_3dB_pc) / TO_3dB(rej);
+  else {
+    bw_3dB_pc = 100 - (100 - p->bw_0dB_pc) * TO_3dB(rej);
+    p->given_0dB_pt = sox_true;
+  }
+  p->anti_aliasing_pc = p->anti_aliasing_pc? p->anti_aliasing_pc :
+    allow_aliasing? bw_3dB_pc : 100;
 
   if (argc) {
     if ((p->out_rate = lsx_parse_frequency(*argv, &dummy_p)) <= 0 || *dummy_p)
@@ -546,9 +628,9 @@
 
   effp->out_signal.channels = effp->in_signal.channels;
   effp->out_signal.rate = out_rate;
-  rate_init(&p->rate, p->shared_ptr, effp->in_signal.rate / out_rate,
-      p->quality, (int)p->coef_interp - 1, p->phase, p->bandwidth,
-      p->allow_aliasing);
+  rate_init(&p->rate, p->shared_ptr, effp->in_signal.rate/out_rate,p->bit_depth,
+      p->phase, p->bw_0dB_pc, p->anti_aliasing_pc, p->rolloff, !p->given_0dB_pt,
+      p->use_hi_prec_clock, p->coef_interp, p->max_coefs_size, p->noIOpt);
   return SOX_SUCCESS;
 }
 
@@ -560,11 +642,12 @@
   SOX_SAMPLE_LOCALS;
 
   sample_t const * s = rate_output(&p->rate, NULL, &odone);
-  for (i = 0; i < odone; ++i) *obuf++ = TO_SOX(*s++, effp->clips);
+  for (i = 0; i < odone; ++i)
+    *obuf++ = SOX_FLOAT_64BIT_TO_SAMPLE(*s++, effp->clips);
 
   if (*isamp && odone < *osamp) {
     sample_t * t = rate_input(&p->rate, NULL, *isamp);
-    for (i = *isamp; i; --i) *t++ = FROM_SOX(*ibuf++,);
+    for (i = *isamp; i; --i) *t++ = SOX_SAMPLE_TO_FLOAT_64BIT(*ibuf++,);
     rate_process(&p->rate);
   }
   else *isamp = 0;
--- a/src/rate_filters.h
+++ b/src/rate_filters.h
@@ -1,4 +1,4 @@
-/* Effect: change sample rate     Copyright (c) 2008 robs@users.sourceforge.net
+/* Effect: change sample rate  Copyright (c) 2008,12 robs@users.sourceforge.net
  *
  * This library is free software; you can redistribute it and/or modify it
  * under the terms of the GNU Lesser General Public License as published by
@@ -15,139 +15,120 @@
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/* Generated by m4 */
+static const sample_t half_fir_coefs_8[] = {
+  0.3115465451887802, -0.08734497241282892, 0.03681452335604365,
+  -0.01518925831569441, 0.005454118437408876, -0.001564400922162005,
+  0.0003181701445034203, -3.48001341225749e-5,
+};
+#define FUNCTION h8
+#define CONVOLVE _ _ _ _ _ _ _ _
+#define h8_l 8
+#define COEFS half_fir_coefs_8
+#include "rate_half_fir.h"
 
-static const sample_t half_fir_coefs_25[] = {
-  3.1358440327836512e-01, -9.2701477245364594e-02, 4.3647483867630447e-02,
-  -2.1545228788689186e-02, 1.0119340890565588e-02, -4.3181204279612133e-03,
-  1.6176661095102525e-03, -5.1348399782997947e-04, 1.3185858795078468e-04,
-  -2.5493512192147390e-05, 3.2461554199264636e-06, -1.9450196215470593e-07,
+static const sample_t half_fir_coefs_9[] = {
+  0.3122703613711853, -0.08922155288172305, 0.03913974805854332,
+  -0.01725059723447163, 0.006858970092378141, -0.002304518467568703,
+  0.0006096426006051062, -0.0001132393923815236, 1.119795386287666e-5,
 };
-static const sample_t half_fir_coefs_low[] = {
-  4.2759802493108773e-001, 3.0939308096100709e-001, 6.9285325719540158e-002,
-  -8.0642059355533674e-002, -6.0528749718348158e-002, 2.5228940037788555e-002,
-  4.7756850372993369e-002, 8.7463256642532057e-004, -3.3208422093026498e-002,
-  -1.3425983316344854e-002, 1.9188320662637096e-002, 1.7478840713827052e-002,
-  -7.5527851809344612e-003, -1.6145235261724403e-002, -6.3013968965413430e-004,
-  1.1965551091184719e-002, 5.1714613100614501e-003, -6.9898749683755968e-003,
-  -6.6150222806158742e-003, 2.6394681964090937e-003, 5.9365183404658526e-003,
-  3.5567920638016650e-004, -4.2031898513566123e-003, -1.8738555289555877e-003,
-  2.2991238738122328e-003, 2.2058519188488186e-003, -7.7796582498205363e-004,
-  -1.8212814627239918e-003, -1.4964619042558244e-004, 1.1706370821176716e-003,
-  5.3082071395224866e-004, -5.6771020453353900e-004, -5.4472363026668942e-004,
-  1.5914542178505357e-004, 3.8911127354338085e-004, 4.2076035174603683e-005,
-  -2.1015548483049000e-004, -9.5381290156278399e-005, 8.0903081108059553e-005,
-  7.5812875822003258e-005, -1.5004304266040688e-005, -3.9149443482028750e-005,
-  -6.0893901283459912e-006, 1.4040363940567877e-005, 4.9834316581482789e-006,
+#define FUNCTION h9
+#define CONVOLVE _ _ _ _ _ _ _ _ _
+#define h9_l 9
+#define COEFS half_fir_coefs_9
+#include "rate_half_fir.h"
+
+static const sample_t half_fir_coefs_10[] = {
+  0.3128545521327376, -0.09075671986104322, 0.04109637155154835,
+  -0.01906629512749895, 0.008184039342054333, -0.0030766775017262,
+  0.0009639607022414314, -0.0002358552746579827, 4.025184282444155e-5,
+  -3.629779111541012e-6,
 };
-#define FUNCTION half_sample_25 
-#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _ _
-#define COEFS half_fir_coefs_25 
-#define _ sum += (input[-(2*j +1)] + input[(2*j +1)]) * COEFS[j], ++j;
-static void FUNCTION(stage_t * p, fifo_t * output_fifo)
-{
-  sample_t const * input = stage_read_p(p);
-  int i, num_out = (stage_occupancy(p) + 1) / 2;
-  sample_t * output = fifo_reserve(output_fifo, num_out);
+#define FUNCTION h10
+#define CONVOLVE _ _ _ _ _ _ _ _ _ _
+#define h10_l 10
+#define COEFS half_fir_coefs_10
+#include "rate_half_fir.h"
 
-  for (i = 0; i < num_out; ++i, input += 2) {
-    int j = 0;
-    sample_t sum = input[0] * .5;
-    CONVOLVE
-    output[i] = sum;
-  }
-  fifo_read(&p->fifo, 2 * num_out, NULL);
-}
-#undef _
-#undef COEFS
-#undef CONVOLVE
-#undef FUNCTION
-#define FUNCTION half_sample_low
-#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
-#define COEFS half_fir_coefs_low
-assert_static(!((array_length(COEFS)- 1) & 1), HALF_FIR_LENGTH_low);
+static const sample_t half_fir_coefs_11[] = {
+  0.3133358837508807, -0.09203588680609488, 0.04276515428384758,
+  -0.02067356614745591, 0.00942253142371517, -0.003856330993895144,
+  0.001363470684892284, -0.0003987400965541919, 9.058629923971627e-5,
+  -1.428553070915318e-5, 1.183455238783835e-6,
+};
+#define FUNCTION h11
+#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _
+#define h11_l 11
+#define COEFS half_fir_coefs_11
+#include "rate_half_fir.h"
+
+static const sample_t half_fir_coefs_12[] = {
+  0.3137392991811407, -0.0931182192961332, 0.0442050575271454,
+  -0.02210391200618091, 0.01057473015666001, -0.00462766983973885,
+  0.001793630226239453, -0.0005961819959665878, 0.0001631475979359577,
+  -3.45557865639653e-5, 5.06188341942088e-6, -3.877010943315563e-7,
+};
+#define FUNCTION h12
+#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _ _
+#define h12_l 12
+#define COEFS half_fir_coefs_12
 #include "rate_half_fir.h"
-#define d100_l 16
-#define poly_fir_convolve_d100 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
-#define FUNCTION d100_0
-#define FIR_LENGTH d100_l
-#define CONVOLVE poly_fir_convolve_d100
+
+static const sample_t half_fir_coefs_13[] = {
+  0.3140822554324578, -0.0940458550886253, 0.04545990399121566,
+  -0.02338339450796002, 0.01164429409071052, -0.005380686021429845,
+  0.002242915773871009, -0.000822047600000082, 0.0002572510962395222,
+  -6.607320708956279e-5, 1.309926399120154e-5, -1.790719575255006e-6,
+  1.27504961098836e-7,
+};
+#define FUNCTION h13
+#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _ _ _
+#define h13_l 13
+#define COEFS half_fir_coefs_13
+#include "rate_half_fir.h"
+
+static struct {int num_coefs; stage_fn_t fn; float att;} const half_firs[] = {
+  { 8, h8 , 136.51},
+  { 9, h9 , 152.32},
+  {10, h10, 168.07},
+  {11, h11, 183.78},
+  {12, h12, 199.44},
+  {13, h13, 212.75},
+};
+
+#define HI_PREC_CLOCK
+
+#define VAR_LENGTH p->n
+#define VAR_CONVOLVE while (j < FIR_LENGTH) _
+#define VAR_POLY_PHASE_BITS p->phase_bits
+
+#define FUNCTION vpoly0
+#define FIR_LENGTH VAR_LENGTH
+#define CONVOLVE VAR_CONVOLVE
 #include "rate_poly_fir0.h"
-#define FUNCTION d100_1
+
+#define FUNCTION vpoly1
 #define COEF_INTERP 1
-#define PHASE_BITS 9
-#define FIR_LENGTH d100_l
-#define CONVOLVE poly_fir_convolve_d100
+#define PHASE_BITS VAR_POLY_PHASE_BITS
+#define FIR_LENGTH VAR_LENGTH
+#define CONVOLVE VAR_CONVOLVE
 #include "rate_poly_fir.h"
-#define d100_1_b 9
-#define FUNCTION d100_2
+
+#define FUNCTION vpoly2
 #define COEF_INTERP 2
-#define PHASE_BITS 7
-#define FIR_LENGTH d100_l
-#define CONVOLVE poly_fir_convolve_d100
+#define PHASE_BITS VAR_POLY_PHASE_BITS
+#define FIR_LENGTH VAR_LENGTH
+#define CONVOLVE VAR_CONVOLVE
 #include "rate_poly_fir.h"
-#define d100_2_b 7
-#define FUNCTION d100_3
+
+#define FUNCTION vpoly3
 #define COEF_INTERP 3
-#define PHASE_BITS 6
-#define FIR_LENGTH d100_l
-#define CONVOLVE poly_fir_convolve_d100
+#define PHASE_BITS VAR_POLY_PHASE_BITS
+#define FIR_LENGTH VAR_LENGTH
+#define CONVOLVE VAR_CONVOLVE
 #include "rate_poly_fir.h"
-#define d100_3_b 6
-#define d120_l 30
-#define poly_fir_convolve_d120 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
-#define FUNCTION d120_0
-#define FIR_LENGTH d120_l
-#define CONVOLVE poly_fir_convolve_d120
-#include "rate_poly_fir0.h"
-#define FUNCTION d120_1
-#define COEF_INTERP 1
-#define PHASE_BITS 10
-#define FIR_LENGTH d120_l
-#define CONVOLVE poly_fir_convolve_d120
-#include "rate_poly_fir.h"
-#define d120_1_b 10
-#define FUNCTION d120_2
-#define COEF_INTERP 2
-#define PHASE_BITS 9
-#define FIR_LENGTH d120_l
-#define CONVOLVE poly_fir_convolve_d120
-#include "rate_poly_fir.h"
-#define d120_2_b 9
-#define FUNCTION d120_3
-#define COEF_INTERP 3
-#define PHASE_BITS 7
-#define FIR_LENGTH d120_l
-#define CONVOLVE poly_fir_convolve_d120
-#include "rate_poly_fir.h"
-#define d120_3_b 7
-#define d150_l 38
-#define poly_fir_convolve_d150 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
-#define FUNCTION d150_0
-#define FIR_LENGTH d150_l
-#define CONVOLVE poly_fir_convolve_d150
-#include "rate_poly_fir0.h"
-#define FUNCTION d150_1
-#define COEF_INTERP 1
-#define PHASE_BITS 12
-#define FIR_LENGTH d150_l
-#define CONVOLVE poly_fir_convolve_d150
-#include "rate_poly_fir.h"
-#define d150_1_b 12
-#define FUNCTION d150_2
-#define COEF_INTERP 2
-#define PHASE_BITS 10
-#define FIR_LENGTH d150_l
-#define CONVOLVE poly_fir_convolve_d150
-#include "rate_poly_fir.h"
-#define d150_2_b 10
-#define FUNCTION d150_3
-#define COEF_INTERP 3
-#define PHASE_BITS 8
-#define FIR_LENGTH d150_l
-#define CONVOLVE poly_fir_convolve_d150
-#include "rate_poly_fir.h"
-#define d150_3_b 8
+
+#undef HI_PREC_CLOCK
+
 #define U100_l 42
 #define poly_fir_convolve_U100 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
 #define FUNCTION U100_0
@@ -154,27 +135,7 @@
 #define FIR_LENGTH U100_l
 #define CONVOLVE poly_fir_convolve_U100
 #include "rate_poly_fir0.h"
-#define FUNCTION U100_1
-#define COEF_INTERP 1
-#define PHASE_BITS 10
-#define FIR_LENGTH U100_l
-#define CONVOLVE poly_fir_convolve_U100
-#include "rate_poly_fir.h"
-#define U100_1_b 10
-#define FUNCTION U100_2
-#define COEF_INTERP 2
-#define PHASE_BITS 8
-#define FIR_LENGTH U100_l
-#define CONVOLVE poly_fir_convolve_U100
-#include "rate_poly_fir.h"
-#define U100_2_b 8
-#define FUNCTION U100_3
-#define COEF_INTERP 3
-#define PHASE_BITS 6
-#define FIR_LENGTH U100_l
-#define CONVOLVE poly_fir_convolve_U100
-#include "rate_poly_fir.h"
-#define U100_3_b 6
+
 #define u100_l 11
 #define poly_fir_convolve_u100 _ _ _ _ _ _ _ _ _ _ _
 #define FUNCTION u100_0
@@ -181,90 +142,46 @@
 #define FIR_LENGTH u100_l
 #define CONVOLVE poly_fir_convolve_u100
 #include "rate_poly_fir0.h"
+
 #define FUNCTION u100_1
 #define COEF_INTERP 1
-#define PHASE_BITS 9
+#define PHASE_BITS 8
 #define FIR_LENGTH u100_l
 #define CONVOLVE poly_fir_convolve_u100
 #include "rate_poly_fir.h"
-#define u100_1_b 9
+#define u100_1_b 8
+
 #define FUNCTION u100_2
 #define COEF_INTERP 2
-#define PHASE_BITS 7
-#define FIR_LENGTH u100_l
-#define CONVOLVE poly_fir_convolve_u100
-#include "rate_poly_fir.h"
-#define u100_2_b 7
-#define FUNCTION u100_3
-#define COEF_INTERP 3
 #define PHASE_BITS 6
 #define FIR_LENGTH u100_l
 #define CONVOLVE poly_fir_convolve_u100
 #include "rate_poly_fir.h"
-#define u100_3_b 6
-#define u120_l 15
-#define poly_fir_convolve_u120 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
-#define FUNCTION u120_0
-#define FIR_LENGTH u120_l
-#define CONVOLVE poly_fir_convolve_u120
-#include "rate_poly_fir0.h"
-#define FUNCTION u120_1
-#define COEF_INTERP 1
-#define PHASE_BITS 10
-#define FIR_LENGTH u120_l
-#define CONVOLVE poly_fir_convolve_u120
-#include "rate_poly_fir.h"
-#define u120_1_b 10
-#define FUNCTION u120_2
-#define COEF_INTERP 2
-#define PHASE_BITS 8
-#define FIR_LENGTH u120_l
-#define CONVOLVE poly_fir_convolve_u120
-#include "rate_poly_fir.h"
-#define u120_2_b 8
-#define FUNCTION u120_3
-#define COEF_INTERP 3
-#define PHASE_BITS 6
-#define FIR_LENGTH u120_l
-#define CONVOLVE poly_fir_convolve_u120
-#include "rate_poly_fir.h"
-#define u120_3_b 6
-#define u150_l 21
-#define poly_fir_convolve_u150 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
-#define FUNCTION u150_0
-#define FIR_LENGTH u150_l
-#define CONVOLVE poly_fir_convolve_u150
-#include "rate_poly_fir0.h"
-#define FUNCTION u150_1
-#define COEF_INTERP 1
-#define PHASE_BITS 11
-#define FIR_LENGTH u150_l
-#define CONVOLVE poly_fir_convolve_u150
-#include "rate_poly_fir.h"
-#define u150_1_b 11
-#define FUNCTION u150_2
-#define COEF_INTERP 2
-#define PHASE_BITS 9
-#define FIR_LENGTH u150_l
-#define CONVOLVE poly_fir_convolve_u150
-#include "rate_poly_fir.h"
-#define u150_2_b 9
-#define FUNCTION u150_3
-#define COEF_INTERP 3
-#define PHASE_BITS 7
-#define FIR_LENGTH u150_l
-#define CONVOLVE poly_fir_convolve_u150
-#include "rate_poly_fir.h"
-#define u150_3_b 7
-typedef struct {int phase_bits; stage_fn_t fn;} poly_fir1_t;
-typedef struct {int num_coefs; double pass, stop, att; poly_fir1_t interp[4];} poly_fir_t;
+#define u100_2_b 6
+
+typedef struct {float scalar; stage_fn_t fn;} poly_fir1_t;
+typedef struct {float beta; poly_fir1_t interp[3];} poly_fir_t;
+
 static poly_fir_t const poly_firs[] = {
-  {d100_l, .75,1.5, 108, {{0, d100_0}, {d100_1_b, d100_1}, {d100_2_b, d100_2}, {d100_3_b, d100_3}}},
-  {d120_l,  1, 1.5, 133, {{0, d120_0}, {d120_1_b, d120_1}, {d120_2_b, d120_2}, {d120_3_b, d120_3}}},
-  {d150_l,  1, 1.5, 165, {{0, d150_0}, {d150_1_b, d150_1}, {d150_2_b, d150_2}, {d150_3_b, d150_3}}},
-  {U100_l, .724, 1, 105, {{0, U100_0}, {U100_1_b, U100_1}, {U100_2_b, U100_2}, {U100_3_b, U100_3}}},
-  {u100_l, .3, 1.5, 107, {{0, u100_0}, {u100_1_b, u100_1}, {u100_2_b, u100_2}, {u100_3_b, u100_3}}},
-  {u120_l, .5, 1.5, 125, {{0, u120_0}, {u120_1_b, u120_1}, {u120_2_b, u120_2}, {u120_3_b, u120_3}}},
-  {u150_l, .5, 1.5, 174, {{0, u150_0}, {u150_1_b, u150_1}, {u150_2_b, u150_2}, {u150_3_b, u150_3}}},
+  {-1, {{0, vpoly0}, { 7.2, vpoly1}, {5.0, vpoly2}}}, 
+  {-1, {{0, vpoly0}, { 9.4, vpoly1}, {6.7, vpoly2}}}, 
+  {-1, {{0, vpoly0}, {12.4, vpoly1}, {7.8, vpoly2}}}, 
+  {-1, {{0, vpoly0}, {13.6, vpoly1}, {9.3, vpoly2}}}, 
+  {-1, {{0, vpoly0}, {10.5, vpoly2}, {8.4, vpoly3}}}, 
+  {-1, {{0, vpoly0}, {11.85,vpoly2}, {9.0, vpoly3}}}, 
+ 
+  {-1, {{0, vpoly0}, { 8.0, vpoly1}, {5.3, vpoly2}}}, 
+  {-1, {{0, vpoly0}, { 8.6, vpoly1}, {5.7, vpoly2}}}, 
+  {-1, {{0, vpoly0}, {10.6, vpoly1}, {6.75,vpoly2}}}, 
+  {-1, {{0, vpoly0}, {12.6, vpoly1}, {8.6, vpoly2}}}, 
+  {-1, {{0, vpoly0}, { 9.6, vpoly2}, {7.6, vpoly3}}}, 
+  {-1, {{0, vpoly0}, {11.4, vpoly2}, {8.65,vpoly3}}}, 
+               
+  {10.62, {{U100_l, U100_0}, {0, 0}, {0, 0}}}, 
+  {11.28, {{u100_l, u100_0}, {u100_1_b, u100_1}, {u100_2_b, u100_2}}}, 
+  {-1, {{0, vpoly0}, {   9, vpoly1}, {  6, vpoly2}}}, 
+  {-1, {{0, vpoly0}, {  11, vpoly1}, {  7, vpoly2}}}, 
+  {-1, {{0, vpoly0}, {  13, vpoly1}, {  8, vpoly2}}}, 
+  {-1, {{0, vpoly0}, {  10, vpoly2}, {  8, vpoly3}}}, 
+  {-1, {{0, vpoly0}, {  12, vpoly2}, {  9, vpoly3}}}, 
 };
-
--- a/src/rate_half_fir.h
+++ b/src/rate_half_fir.h
@@ -1,4 +1,4 @@
-/* Effect: change sample rate     Copyright (c) 2008 robs@users.sourceforge.net
+/* Effect: change sample rate  Copyright (c) 2008,12 robs@users.sourceforge.net
  *
  * This library is free software; you can redistribute it and/or modify it
  * under the terms of the GNU Lesser General Public License as published by
@@ -18,7 +18,7 @@
 /* Down-sample by a factor of 2 using a FIR with odd length (LEN).*/
 /* Input must be preceded and followed by LEN >> 1 samples. */
 
-#define _ sum += (input[-j] + input[j]) * COEFS[j], ++j;
+#define _ sum += (input[-(2*j +1)] + input[(2*j +1)]) * COEFS[j], ++j;
 static void FUNCTION(stage_t * p, fifo_t * output_fifo)
 {
   sample_t const * input = stage_read_p(p);
@@ -26,10 +26,9 @@
   sample_t * output = fifo_reserve(output_fifo, num_out);
 
   for (i = 0; i < num_out; ++i, input += 2) {
-    int j = 1;
-    sample_t sum = input[0] * COEFS[0];
+    int j = 0;
+    sample_t sum = input[0] * .5;
     CONVOLVE
-    assert(j == array_length(COEFS));
     output[i] = sum;
   }
   fifo_read(&p->fifo, 2 * num_out, NULL);
--- a/src/rate_poly_fir.h
+++ b/src/rate_poly_fir.h
@@ -1,4 +1,4 @@
-/* Effect: change sample rate     Copyright (c) 2008 robs@users.sourceforge.net
+/* Effect: change sample rate  Copyright (c) 2008,12 robs@users.sourceforge.net
  *
  * This library is free software; you can redistribute it and/or modify it
  * under the terms of the GNU Lesser General Public License as published by
@@ -15,9 +15,8 @@
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/* Up-sample by step in (0,1) using a poly-phase FIR with length LEN.*/
-/* Input must be preceded by LEN >> 1 samples. */
-/* Input must be followed by (LEN-1) >> 1 samples. */
+/* Resample using an interpolated poly-phase FIR with length LEN.*/
+/* Input must be followed by LEN-1 samples. */
 
 #define a (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 0,j))
 #define b (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 1,j))
@@ -24,13 +23,13 @@
 #define c (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 2,j))
 #define d (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 3,j))
 #if COEF_INTERP == 0
-  #define _ sum += a *at[j], ++j;
+  #define _ sum += a *in[j], ++j;
 #elif COEF_INTERP == 1
-  #define _ sum += (b *x + a)*at[j], ++j;
+  #define _ sum += (b *x + a)*in[j], ++j;
 #elif COEF_INTERP == 2
-  #define _ sum += ((c *x + b)*x + a)*at[j], ++j;
+  #define _ sum += ((c *x + b)*x + a)*in[j], ++j;
 #elif COEF_INTERP == 3
-  #define _ sum += (((d*x + c)*x + b)*x + a)*at[j], ++j;
+  #define _ sum += (((d*x + c)*x + b)*x + a)*in[j], ++j;
 #else
   #error COEF_INTERP
 #endif
@@ -41,23 +40,43 @@
   int i, num_in = stage_occupancy(p), max_num_out = 1 + num_in*p->out_in_ratio;
   sample_t * output = fifo_reserve(output_fifo, max_num_out);
 
-  for (i = 0; p->at.parts.integer < num_in; ++i, p->at.all += p->step.all) {
-    sample_t const * at = input + p->at.parts.integer;
-    uint32_t fraction = p->at.parts.fraction;
-    int phase = fraction >> (32 - PHASE_BITS); /* high-order bits */
+#if defined HI_PREC_CLOCK
+  if (p->use_hi_prec_clock) {
+    hi_prec_clock_t at = p->at.hi_prec_clock;
+    for (i = 0; (int)at < num_in; ++i, at += p->step.hi_prec_clock) {
+      sample_t const * in = input + (int)at;
+      hi_prec_clock_t fraction = at - (int)at;
+      int phase = fraction * (1 << PHASE_BITS);
+#if COEF_INTERP > 0
+      sample_t x = fraction * (1 << PHASE_BITS) - phase;
+#endif
+      sample_t sum = 0;
+      int j = 0;
+      CONVOLVE
+      output[i] = sum;
+    }
+    fifo_read(&p->fifo, (int)at, NULL);
+    p->at.hi_prec_clock = at - (int)at;
+  } else
+#endif
+  {
+    for (i = 0; p->at.parts.integer < num_in; ++i, p->at.all += p->step.all) {
+      sample_t const * in = input + p->at.parts.integer;
+      uint32_t fraction = p->at.parts.fraction;
+      int phase = fraction >> (32 - PHASE_BITS); /* high-order bits */
 #if COEF_INTERP > 0              /* low-order bits, scaled to [0,1) */
-    sample_t x = (sample_t) (fraction << PHASE_BITS) * (1 / MULT32);
+      sample_t x = (sample_t) (fraction << PHASE_BITS) * (1 / MULT32);
 #endif
-    sample_t sum = 0;
-    int j = 0;
-    CONVOLVE
-    assert(j == FIR_LENGTH);
-    output[i] = sum;
+      sample_t sum = 0;
+      int j = 0;
+      CONVOLVE
+      output[i] = sum;
+    }
+    fifo_read(&p->fifo, p->at.parts.integer, NULL);
+    p->at.parts.integer = 0;
   }
   assert(max_num_out - i >= 0);
   fifo_trim_by(output_fifo, max_num_out - i);
-  fifo_read(&p->fifo, p->at.parts.integer, NULL);
-  p->at.parts.integer = 0;
 }
 
 #undef _
--- a/src/rate_poly_fir0.h
+++ b/src/rate_poly_fir0.h
@@ -1,4 +1,4 @@
-/* Effect: change sample rate     Copyright (c) 2008 robs@users.sourceforge.net
+/* Effect: change sample rate  Copyright (c) 2008,12 robs@users.sourceforge.net
  *
  * This library is free software; you can redistribute it and/or modify it
  * under the terms of the GNU Lesser General Public License as published by
@@ -15,9 +15,8 @@
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/* Up-sample by rational step in (0,1) using a poly-phase FIR, length LEN.*/
-/* Input must be preceded by LEN >> 1 samples. */
-/* Input must be followed by (LEN-1) >> 1 samples. */
+/* Resample using a non-interpolated poly-phase FIR with length LEN.*/
+/* Input must be followed by LEN-1 samples. */
 
 #define _ sum += (coef(p->shared->poly_fir_coefs, 0, FIR_LENGTH, divided.rem, 0, j)) *at[j], ++j;
 
@@ -34,7 +33,6 @@
     sample_t sum = 0;
     int j = 0;
     CONVOLVE
-    assert(j == FIR_LENGTH);
     output[i] = sum;
   }
   assert(max_num_out - i >= 0);
@@ -41,7 +39,7 @@
   fifo_trim_by(output_fifo, max_num_out - i);
   divided2 = div(p->at.parts.integer, p->L);
   fifo_read(&p->fifo, divided2.quot, NULL);
-  p->at.parts.integer -= divided2.quot * p->L;
+  p->at.parts.integer = divided2.rem;
 }
 
 #undef _
--- a/src/sinc.c
+++ b/src/sinc.c
@@ -86,7 +86,7 @@
     return NULL;
   }
   att = att? att : 120;
-  lsx_kaiser_params(att, (tbw? tbw / Fn : .05) * .5, beta, num_taps);
+  lsx_kaiser_params(att, Fc, (tbw? tbw / Fn : .05) * .5, beta, num_taps);
   if (!n) {
     n = *num_taps;
     *num_taps = range_limit(n, 11, 32767);
@@ -94,7 +94,7 @@
       *num_taps = 1 + 2 * (int)((int)((*num_taps / 2) * Fc + .5) / Fc + .5);
     lsx_report("num taps = %i (from %i)", *num_taps, n);
   }
-  return lsx_make_lpf(*num_taps |= 1, Fc, *beta, 1., sox_false);
+  return lsx_make_lpf(*num_taps |= 1, Fc, *beta, 0., 1., sox_false);
 }
 
 static int start(sox_effect_t * effp)
--- a/src/sox.c
+++ b/src/sox.c
@@ -215,6 +215,8 @@
 
 static sox_bool stdin_is_a_tty, is_player, is_guarded, do_guarded_norm, no_dither, reported_sox_opts;
 
+struct timeval load_timeofday;
+
 static void cleanup(void)
 {
   size_t i;
@@ -1784,6 +1786,13 @@
 
   signal(SIGTERM, sigint); /* Stop gracefully, as soon as we possibly can. */
   signal(SIGINT , sigint); /* Either skip current input or behave as SIGTERM. */
+  if (very_first_effchain) {
+    struct timeval now;
+    double d;
+    gettimeofday(&now, NULL);
+    d = now.tv_sec - load_timeofday.tv_sec + (now.tv_usec - load_timeofday.tv_usec) / TIME_FRAC;
+    lsx_debug("start-up time = %g", d);
+  }
   flow_status = sox_flow_effects(effects_chain, update_status, NULL);
 
   /* Don't return SOX_EOF if
@@ -1914,6 +1923,7 @@
 "--combine concatenate    Concatenate all input files (default for sox, rec)",
 "--combine sequence       Sequence all input files (default for play)",
 "-D, --no-dither          Don't dither automatically",
+"--dft-min NUM            Minimum size (log2) for DFT processing (default 10)",
 "--effects-file FILENAME  File containing effects and options",
 "-G, --guard              Use temporary files to guard against clipping",
 "-h, --help               Display version number and usage information",
@@ -2177,6 +2187,7 @@
   {"clobber"         , lsx_option_arg_none    , NULL, 0},
   {"no-clobber"      , lsx_option_arg_none    , NULL, 0},
   {"multi-threaded"  , lsx_option_arg_none    , NULL, 0},
+  {"dft-min"         , lsx_option_arg_required, NULL, 0},
 
   {"bits"            , lsx_option_arg_required, NULL, 'b'},
   {"channels"        , lsx_option_arg_required, NULL, 'c'},
@@ -2362,6 +2373,13 @@
       case 22: no_clobber = sox_false; break;
       case 23: no_clobber = sox_true; break;
       case 24: sox_globals.use_threads = sox_true; break;
+      case 25:
+        if (sscanf(optstate.arg, "%i %c", &i, &dummy) != 1 || i < 8 || i > 16) {
+          lsx_fail("Min DFT size must be in range 8 to 16");
+          exit(1);
+        }
+        sox_globals.log2_dft_min_size = i;
+        break;
       }
       break;
 
@@ -2826,6 +2844,7 @@
   size_t i;
   char mybase[6];
 
+  gettimeofday(&load_timeofday, NULL);
   myname = argv[0];
   sox_globals.output_message_handler = output_message;
 
--- a/src/sox.h
+++ b/src/sox.h
@@ -516,6 +516,7 @@
 Boolean type, assignment (but not necessarily binary) compatible with C++ bool.
 */
 typedef enum sox_bool {
+    sox_bool_dummy = -1, /* Ensure a signed type */
     sox_false, /**< False = 0. */
     sox_true   /**< True = 1. */
 } sox_bool;
@@ -977,7 +978,6 @@
 @param clips Variable to increment if input sample is too large.
 */
 #define SOX_SAMPLE_TO_FLOAT_32BIT(d,clips) (LSX_USE_VAR(sox_macro_temp_double),sox_macro_temp_sample=(d),sox_macro_temp_sample>SOX_SAMPLE_MAX-64?++(clips),1:(((sox_macro_temp_sample+64)&~127)*(1./(SOX_SAMPLE_MAX+1.))))
-#define SOX_SAMPLE_TO_FLOAT_64BIT(d,clips) ((d)*(1./(SOX_SAMPLE_MAX+1.)))
 
 /**
 Client API:
@@ -1358,6 +1358,12 @@
   char       * tmp_path;         /**< Private: client-configured path to use for temporary files */
   sox_bool     use_magic;        /**< Private: true if client has requested use of 'magic' file-type detection */
   sox_bool     use_threads;      /**< Private: true if client has requested parallel effects processing */
+
+  /**
+  Log to base 2 of minimum size (in bytes) used by libSoX for DFT (filtering).
+  Plugins should use similarly-sized DFTs to get best performance.
+  */
+  size_t       log2_dft_min_size;
 } sox_globals_t;
 
 /**
--- a/src/sox_i.h
+++ b/src/sox_i.h
@@ -88,6 +88,7 @@
 int lsx_set_dft_length(int num_taps);
 void init_fft_cache(void);
 void clear_fft_cache(void);
+#define lsx_is_power_of_2(x) !(x < 2 || (x & (x - 1)))
 void lsx_safe_rdft(int len, int type, double * d);
 void lsx_safe_cdft(int len, int type, double * d);
 void lsx_power_spectrum(int n, double const * in, double * out);
@@ -98,15 +99,15 @@
 void lsx_apply_bartlett(double h[], const int num_points);
 void lsx_apply_blackman(double h[], const int num_points, double alpha);
 void lsx_apply_blackman_nutall(double h[], const int num_points);
-double lsx_kaiser_beta(double att);
+double lsx_kaiser_beta(double att, double tr_bw);
 void lsx_apply_kaiser(double h[], const int num_points, double beta);
-double * lsx_make_lpf(int num_taps, double Fc, double beta, double scale, sox_bool dc_norm);
-void lsx_kaiser_params(double att, double tr_bw, double * beta, int * num_taps);
+double * lsx_make_lpf(int num_taps, double Fc, double beta, double rho,
+    double scale, sox_bool dc_norm);
+void lsx_kaiser_params(double att, double Fc, double tr_bw, double * beta, int * num_taps);
 double * lsx_design_lpf(
-    double Fp,      /* End of pass-band; ~= 0.01dB point */
+    double Fp,      /* End of pass-band */
     double Fs,      /* Start of stop-band */
-    double Fn,      /* Nyquist freq; e.g. 0.5, 1, PI */
-    sox_bool allow_aliasing,
+    double Fn,      /* Nyquist freq; e.g. 0.5, 1, PI; < 0: dummy run */
     double att,     /* Stop-band attenuation in dB */
     int * num_taps, /* 0: value will be estimated */
     int k,          /* >0: number of phases; <0: num_taps ≡ 1 (mod -k) */
@@ -113,12 +114,6 @@
     double beta);   /* <0: value will be estimated */
 void lsx_fir_to_phase(double * * h, int * len,
     int * post_len, double phase0);
-#define LSX_TO_6dB .5869
-#define LSX_TO_3dB ((2/3.) * (.5 + LSX_TO_6dB))
-#define LSX_MAX_TBW0 36.
-#define LSX_MAX_TBW0A (LSX_MAX_TBW0 / (1 + LSX_TO_3dB))
-#define LSX_MAX_TBW3 floor(LSX_MAX_TBW0 * LSX_TO_3dB)
-#define LSX_MAX_TBW3A floor(LSX_MAX_TBW0A * LSX_TO_3dB)
 void lsx_plot_fir(double * h, int num_points, sox_rate_t rate, sox_plot_t type, char const * title, double y1, double y2);
 
 #ifdef HAVE_BYTESWAP_H
@@ -280,7 +275,7 @@
   } \
 }
 
-#define GETOPT_NUMERIC(state, ch, name, min, max) case ch:{ \
+#define GETOPT_LOCAL_NUMERIC(state, ch, name, min, max) case ch:{ \
   char * end_ptr; \
   double d = strtod(state.arg, &end_ptr); \
   if (end_ptr == state.arg || d < min || d > max || *end_ptr != '\0') {\
@@ -287,9 +282,10 @@
     lsx_fail("parameter `%s' must be between %g and %g", #name, (double)min, (double)max); \
     return lsx_usage(effp); \
   } \
-  p->name = d; \
+  name = d; \
   break; \
 }
+#define GETOPT_NUMERIC(state, ch, name, min, max) GETOPT_LOCAL_NUMERIC(state, ch, p->name, min, max)
 
 int lsx_effect_set_imin(sox_effect_t * effp, size_t imin);
 
--- a/src/spectrogram.c
+++ b/src/spectrogram.c
@@ -161,7 +161,7 @@
     case Window_Bartlett: lsx_apply_bartlett(w, n); break;
     case Window_Rectangular: break;
     default: lsx_apply_kaiser(w, n, lsx_kaiser_beta(
-        (p->dB_range + p->gain) * (1.1 + p->window_adjust / 50)));
+        (p->dB_range + p->gain) * (1.1 + p->window_adjust / 50), .1));
   }
   for (i = 0; i < p->dft_size; ++i) sum += p->window[i];
   for (i = 0; i < p->dft_size; ++i) p->window[i] *= 2 / sum