shithub: sox

Download patch

ref: 28eb39a890bfc5968a563480acd065c7ecac5eb0
parent: a84664a8dec90a25bc67df625051abd51225f08d
author: robs <robs>
date: Sat Jul 21 17:15:14 EDT 2007

tempo improvements

--- a/soxeffect.7
+++ b/soxeffect.7
@@ -413,17 +413,15 @@
 See also \fBfilter\fR for filters with a steeper roll-off.
 .TP
 \fBkey \fR[\fB\-q\fR] \fIshift\fR [\fIwindow\fR [\fIseek\fR [\fIoverlap\fR]]]
-Change the audio key (i.e. pitch but not tempo) using the SoundTouch [8]
-algorithm.
+Change the audio key (i.e. pitch but not tempo) using a WSOLA algorithm similar
+to that used by SoundTouch [8].
 .SP
 .I shift
-gives the key shift in `cents' (i.e. 100ths of a semitone).  See the
-.B
-tempo
+is the key shift in positive or negative `cents' (i.e. 100ths of a
+semitone).  See the
+.B tempo
 effect for a description of the other parameters.
 .SP
-Note: This effect works with only mono or stereo audio.
-.SP
 See also
 .B pitch
 for a similar effect.
@@ -1050,13 +1048,13 @@
 \fIp3\fR (trapezium): the percentage through each cycle at which `falling'
 ends; default=60.
 .TP
-\fBtempo \fR[\fB\-l\fR] \fIfactor\fR [\fIwindow\fR [\fIseek\fR [\fIoverlap\fR]]]
-Change the audio tempo (but not its pitch) using the SoundTouch [8] algorithm.
+\fBtempo \fR[\fB\-q\fR] \fIfactor\fR [\fIwindow\fR [\fIseek\fR [\fIoverlap\fR]]]
+Change the audio tempo (but not its pitch) using a WSOLA algorithm similar
+to that used by SoundTouch [8].
 .SP
 The optional
-.B \-l
-parameter selects a linear seek for a more accurate but slower version of the
-algorithm.
+.B \-q
+parameter selects a quicker but less accurate version of the algorithm.
 .SP
 .I factor
 gives the ratio of new tempo to the old tempo.
@@ -1063,13 +1061,11 @@
 .SP
 The optional
 .I window
-parameter gives the length in milliseconds (default 82) of a single
-processing sequence.  This determines to how long sequences the original
-sound is chopped in the time-stretch algorithm.  The larger this value
-is, the fewer sequences are used in processing.  In principle a bigger
-value sounds better when slowing down tempo, but worse when increasing
-tempo and vice versa.  Increasing this value reduces computational
-burden & vice versa.
+parameter gives the length in milliseconds of each chunk (or window) of
+audio processed by the algorithm.  The default value is 82\ ms and is
+typically suitable for making small changes to the tempo of music.  For
+larger tempo changes (e.g. a factor of 2), 50\ ms may be better; for
+speech, 30\ ms may be better.
 .SP
 The optional
 .I seek
@@ -1079,7 +1075,7 @@
 joining location when mixing the sound sequences back together.  The
 bigger this window setting is, the higher the possibility to find a
 better mixing position will become, but at the same time large values
-may cause a "drifting" artifact because consequent sequences will be
+may cause a `drifting' artifact because consequent sequences will be
 taken at more uneven intervals.  If there's a disturbing artifact that
 sounds as if a constant frequency was drifting around, try reducing this
 setting.  Increasing this value increases computational burden & vice
@@ -1094,8 +1090,6 @@
 be that critical parameter. If you reduce the \fIwindow\fR  setting by a
 large amount, you might wish to try a smaller value on this.  Increasing
 this value increases computational burden & vice versa.
-.SP
-Note: This effect works with only mono or stereo audio.
 .SP
 See also
 .B stretch
--- a/src/key.c
+++ b/src/key.c
@@ -31,7 +31,7 @@
 {
   double d;
   char dummy, arg[100];
-  int pos = (argc && !strcmp(*argv, "-l"))? 1 : 0;
+  int pos = (argc && !strcmp(*argv, "-q"))? 1 : 0;
 
   if (argc <= pos || sscanf(argv[pos], "%lf %c", &d, &dummy) != 1)
     return sox_usage(effp);
@@ -47,7 +47,7 @@
   static sox_effect_handler_t handler;
   handler = *sox_tempo_effect_fn();
   handler.name = "key";
-  handler.usage = "[-l] shift-in-cents [window-ms [seek-ms [overlap-ms]]]",
+  handler.usage = "[-q] shift-in-cents [window-ms [seek-ms [overlap-ms]]]",
   handler.getopts = getopts;
   return &handler;
 }
--- a/src/soxconfig.h.cmake
+++ b/src/soxconfig.h.cmake
@@ -22,7 +22,6 @@
 #cmakedefine HAVE_SAMPLERATE_H        1
 #cmakedefine HAVE_SNDFILE_1_0_12      1
 #cmakedefine HAVE_SNDFILE_H           1
-#cmakedefine HAVE_LIBSOUNDTOUCH       1
 #cmakedefine HAVE_STDINT_H            1
 #cmakedefine HAVE_STRCASECMP          1
 #cmakedefine HAVE_STRDUP              1
--- a/src/tempo.c
+++ b/src/tempo.c
@@ -18,13 +18,14 @@
  * Fifth Floor, 51 Franklin Street, Boston, MA 02111-1301, USA.
  */
 
-/* Addressible FIFO buffer */
-
 #include "sox_i.h"
 #include "xmalloc.h"
-
+#include <math.h>
 #include <string.h>
+#include <assert.h>
 
+/* Addressible FIFO buffer */
+
 typedef struct {
   char * data;
   size_t allocation;   /* Number of bytes allocated for data. */
@@ -112,502 +113,195 @@
 }
 
 /*
- * Change tempo (alter duration, maintain pitch) using a time domain
- * WSOLA-like method.  Based on TDStretch.cpp revision 1.24 from The
- * SoundTouch Library Copyright (c) Olli Parviainen 2001-2005.
+ * Change tempo (alter duration, maintain pitch) using a WSOLA algorithm.
+ * Based on ideas from Olli Parviainen's SoundTouch Library.
  */
 
-#include <string.h>
-#include <assert.h>
-#ifndef max
-#define max(a, b) ((a) >= (b) ? (a) : (b))
-#endif
-
-typedef enum {FALSE, TRUE} BOOL;
-
 typedef struct {
   size_t samples_in;
   size_t samples_out;
   double factor;
   size_t channels;
-  size_t sampleReq;
-  float * pMidBuffer;
-  float * pRefMidBuffer;
-  float * pRefMidBufferUnaligned;
-  size_t overlapLength;
-  size_t seekLength;
-  size_t seekWindowLength;
-  size_t maxOffset;
-  double nominalSkip;
-  double skipFract;
-  fifo_t outputBuffer;
-  fifo_t inputBuffer;
-  BOOL bQuickseek;
-  BOOL bMidBufferDirty;
-} TDStretch;
+  size_t process_size;
+  float * prev_win_end;
+  size_t overlap;
+  size_t seek;
+  size_t window;
+  size_t windows_total;
+  size_t skip_total;
+  fifo_t output_fifo;
+  fifo_t input_fifo;
+  sox_bool quick_seek;
+} Stretch;
 
-static void clearMidBuffer(TDStretch * p)
+/* For the Wave Similarity bit of WSOLA */
+static double difference(const float * a, const float * b, size_t length)
 {
-  if (p->bMidBufferDirty) {
-    memset((p->pMidBuffer), 0, 2 * sizeof(float) * p->overlapLength);
-    p->bMidBufferDirty = FALSE;
-  }
-}
-
-static void clearInput(TDStretch * p)
-{
-  p->samples_in = 0;
-  fifo_clear(&p->inputBuffer);
-  clearMidBuffer(p);
-}
-
-static void clear(TDStretch * p)
-{
-  fifo_clear(&p->outputBuffer);
-  fifo_clear(&p->inputBuffer);
-  clearMidBuffer(p);
-}
-
-/* Slopes the amplitude of the 'midBuffer' samples so that cross correlation */
-/* is faster to calculate */
-static void precalcCorrReferenceMono(TDStretch * p)
-{
-  int i;
-  float temp;
-
-  for (i = 0; i < (int) p->overlapLength; i++) {
-    temp = (float) i *(float) (p->overlapLength - i);
-
-    (p->pRefMidBuffer)[i] = (float) ((p->pMidBuffer)[i] * temp);
-  }
-}
-
-static void precalcCorrReferenceStereo(TDStretch * p)
-{
-  int i, cnt2;
-  float temp;
-
-  for (i = 0; i < (int) p->overlapLength; i++) {
-    temp = (float) i *(float) (p->overlapLength - i);
-
-    cnt2 = i * 2;
-    (p->pRefMidBuffer)[cnt2] = (float) ((p->pMidBuffer)[cnt2] * temp);
-    (p->pRefMidBuffer)[cnt2 + 1] = (float) ((p->pMidBuffer)[cnt2 + 1] * temp);
-  }
-}
-
-static double calcCrossCorrMono(
-    TDStretch * p, const float * mixingPos, const float * compare)
-{
-  double corr = 0;
+  double diff = 0;
   size_t i = 0;
 
-  /* Loop optimisation: */
-  #define _ corr += mixingPos[i] * compare[i], ++i;
-  do {_ _ _ _ _ _ _ _} while (i < p->overlapLength);
+  #define _ diff += sqr(a[i] - b[i]), ++i; /* Loop optimisation */
+  do {_ _ _ _ _ _ _ _} while (i < length); /* N.B. length ≡ 0 (mod 8) */
   #undef _
-  return corr;
+  return diff;
 }
 
-static double calcCrossCorrStereo(
-    TDStretch * p, const float * mixingPos, const float * compare)
+/* Find where the two windows are most alike over the overlap period. */
+static size_t stretch_best_overlap_position(Stretch * p, float const * new_win)
 {
-  double corr = 0;
-  size_t i = 0;
+  float * f = p->prev_win_end;
+  size_t j, best_pos, prev_best_pos = (p->seek + 1) >> 1, step = 64;
+  size_t i = best_pos = p->quick_seek? prev_best_pos : 0;
+  double diff, least_diff = difference(new_win + p->channels * i, f, p->channels * p->overlap);
+  int k = 0;
 
-  /* Loop optimisation: */
-  #define _ corr += mixingPos[i]*compare[i] + mixingPos[i+1]*compare[i+1], i+=2;
-  do {_ _ _ _ _ _ _ _} while (i < 2 * p->overlapLength);
-  #undef _
-  return corr;
-}
-
-/* Seeks for the optimal overlap-mixing position.  The best position is
- * determined as the position where the two overlapped sample sequences are
- * 'most alike', in terms of the highest cross-correlation value over the
- * overlapping period.  4 variants exist for mono/stereo, quick/accurate */
-
-static size_t seekBestOverlapPositionMono(
-    TDStretch * p, const float * refPos)
-{
-  size_t bestOffs;
-  double bestCorr, corr;
-  size_t tempOffset;
-  const float *compare;
-
-  /* Slopes the amplitude of the 'midBuffer' samples */
-  precalcCorrReferenceMono(p);
-  bestCorr = INT_MIN;
-  bestOffs = 0;
-
-  /* Scans for the best correlation value by testing each possible position */
-  /* over the permitted range. */
-  for (tempOffset = 0; tempOffset < p->seekLength; tempOffset++) {
-    compare = refPos + tempOffset;
-
-    /* Calculates correlation value for the mixing position corresponding */
-    /* to 'tempOffset' */
-    corr = calcCrossCorrMono(p, p->pRefMidBuffer, compare);
-
-    /* Checks for the highest correlation value */
-    if (corr > bestCorr) {
-      bestCorr = corr;
-      bestOffs = tempOffset;
-    }
-  }
-  return bestOffs;
-}
-
-static size_t seekBestOverlapPositionStereo(TDStretch * p,
-                                            const float * refPos)
-{
-  size_t bestOffs;
-  double bestCorr, corr;
-  size_t i;
-
-  precalcCorrReferenceStereo(p);
-  bestCorr = INT_MIN;
-  bestOffs = 0;
-
-  for (i = 0; i < p->seekLength; i++) {
-    corr = calcCrossCorrStereo(p, refPos + 2 * i, p->pRefMidBuffer);
-    if (corr > bestCorr) {
-      bestCorr = corr;
-      bestOffs = i;
-    }
-  }
-  return bestOffs;
-}
-
-/* Table for the quick hierarchical mixing position seeking algorithm */
-static int const scanOffsets[4][24] = {
-  { 124,  186,  248,  310,  372,  434,  496,  558,  620,  682,  744, 806,
-    868,  930,  992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488,   0},
-  {-100,  -75,  -50,  -25,   25,   50,   75,  100,    0,    0,    0,   0,
-      0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
-  { -20,  -15,  -10,   -5,    5,   10,   15,   20,    0,    0,    0,   0,
-      0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
-  {  -4,   -3,   -2,   -1,    1,    2,    3,    4,    0,    0,    0,   0,
-      0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0}};
-
-static size_t seekBestOverlapPositionMonoQuick(TDStretch * p,
-                                               const float * refPos)
-{
-  size_t j;
-  size_t bestOffs;
-  double bestCorr, corr;
-  size_t scanCount, corrOffset, tempOffset;
-
-  /* Slopes the amplitude of the 'midBuffer' samples */
-  precalcCorrReferenceMono(p);
-  bestCorr = INT_MIN;
-  bestOffs = 0;
-  corrOffset = 0;
-  tempOffset = 0;
-
-  /* Scans for the best correlation value using four-pass hierarchical
-   * search.  The look-up table 'scans' has hierarchical position adjusting
-   * steps.  In first pass the routine searhes for the highest correlation
-   * with relatively coarse steps, then rescans the neighbourhood of the
-   * highest correlation with better resolution and so on. */
-  for (scanCount = 0; scanCount < 4; scanCount++) {
-    j = 0;
-    while (scanOffsets[scanCount][j]) {
-      tempOffset = corrOffset + scanOffsets[scanCount][j];
-      if (tempOffset >= p->seekLength)
+  if (p->quick_seek) do { /* hierarchical search */
+    for (k = -1; k <= 1; k += 2) for (j = 1; j < 4 || step == 64; ++j) {
+      i = prev_best_pos + k * j * step;
+      if ((int)i < 0 || i >= p->seek)
         break;
-
-      /* Calculates correlation value for the mixing position corresponding */
-      /* to 'tempOffset' */
-      corr = calcCrossCorrMono(p, refPos + tempOffset, (p->pRefMidBuffer));
-
-      /* Checks for the highest correlation value */
-      if (corr > bestCorr) {
-        bestCorr = corr;
-        bestOffs = tempOffset;
-      }
-      j++;
+      diff = difference(new_win + p->channels * i, f, p->channels * p->overlap);
+      if (diff < least_diff)
+        least_diff = diff, best_pos = i;
     }
-    corrOffset = bestOffs;
+    prev_best_pos = best_pos;
+  } while (step >>= 2);
+  else for (i = 1; i < p->seek; i++) { /* linear search */
+    diff = difference(new_win + p->channels * i, f, p->channels * p->overlap);
+    if (diff < least_diff)
+      least_diff = diff, best_pos = i;
   }
-
-  return bestOffs;
+  return best_pos;
 }
 
-static size_t seekBestOverlapPositionStereoQuick(TDStretch * p,
-                                                 const float * refPos)
+/* For the Over-Lap bit of WSOLA */
+static void stretch_overlap(Stretch * p, const float * in1, const float * in2, float * output)
 {
-  size_t j;
-  size_t bestOffs;
-  double bestCorr, corr;
-  size_t scanCount, corrOffset, tempOffset;
+  size_t i, j, k = 0;
+  float fade_step = 1.0f / (float) p->overlap;
 
-  precalcCorrReferenceStereo(p);
-
-  bestCorr = INT_MIN;
-  bestOffs = 0;
-  corrOffset = 0;
-  tempOffset = 0;
-
-  for (scanCount = 0; scanCount < 4; scanCount++) {
-    j = 0;
-    while (scanOffsets[scanCount][j]) {
-      tempOffset = corrOffset + scanOffsets[scanCount][j];
-      if (tempOffset >= p->seekLength)
-        break;
-
-      corr =
-          calcCrossCorrStereo(p, refPos + 2 * tempOffset, p->pRefMidBuffer);
-
-      if (corr > bestCorr) {
-        bestCorr = corr;
-        bestOffs = tempOffset;
-      }
-      j++;
-    }
-    corrOffset = bestOffs;
+  for (i = 0; i < p->overlap; ++i) {
+    float fade_in  = fade_step * (float) i;
+    float fade_out = 1.0f - fade_in;
+    for (j = 0; j < p->channels; ++j, ++k)
+      output[k] = in1[k] * fade_out + in2[k] * fade_in;
   }
-  return bestOffs;
 }
 
-static size_t seekBestOverlapPosition(TDStretch * p,
-                                      const float * refPos)
+static void stretch_process_windows(Stretch * p)
 {
-  if (p->channels == 2) {
-    if (p->bQuickseek)
-      return seekBestOverlapPositionStereoQuick(p, refPos);
-    return seekBestOverlapPositionStereo(p, refPos);
-  } else if (p->bQuickseek)
-    return seekBestOverlapPositionMonoQuick(p, refPos);
-  return seekBestOverlapPositionMono(p, refPos);
-}
+  while (fifo_occupancy(&p->input_fifo) >= p->process_size) {
+    size_t skip, offset = 0;
 
-
-/* Overlaps samples in 'midBuffer' with the samples in 'input' */
-static void overlapMono(TDStretch * p, float * output,
-                        const float * input)
-{
-  int i, itemp;
-
-  for (i = 0; i < (int) p->overlapLength; i++) {
-    itemp = p->overlapLength - i;
-    output[i] = (input[i] * i + (p->pMidBuffer)[i] * itemp) / p->overlapLength;
-  }
-}
-
-static void overlapStereo(TDStretch * p, float * output,
-                          const float * input)
-{
-  int i;
-  size_t cnt2;
-  float fTemp;
-  float fScale;
-  float fi;
-
-  fScale = 1.0f / (float) p->overlapLength;
-
-  for (i = 0; i < (int) p->overlapLength; i++) {
-    fTemp = (float) (p->overlapLength - i) * fScale;
-    fi = (float) i *fScale;
-
-    cnt2 = 2 * i;
-    output[cnt2 + 0] = input[cnt2 + 0] * fi + p->pMidBuffer[cnt2 + 0] * fTemp;
-    output[cnt2 + 1] = input[cnt2 + 1] * fi + p->pMidBuffer[cnt2 + 1] * fTemp;
-  }
-}
-
-/* Overlaps samples in 'midBuffer' with the samples in 'inputBuffer' at
- * position of 'ovlPos'. */
-static inline void overlap(TDStretch * p, float * output,
-                           const float * input, size_t ovlPos)
-{
-  if (p->channels == 2)
-    overlapStereo(p, output, input + 2 * ovlPos);
-  else
-    overlapMono(p, output, input + ovlPos);
-}
-
-/* Processes as many processing frames of the samples 'inputBuffer', store */
-/* the result into 'outputBuffer' */
-static void processSamples(TDStretch * p)
-{
-  size_t ovlSkip, offset, temp;
-
-  if (p->bMidBufferDirty == FALSE) {
-    /* if midBuffer is empty, move the first samples of the input stream
-     * into it */
-    if (fifo_occupancy(&p->inputBuffer) < p->overlapLength)
-      return;   /* wait until we've got p->overlapLength samples */
-    fifo_read(&p->inputBuffer, p->overlapLength, p->pMidBuffer);
-    p->bMidBufferDirty = TRUE;
-  }
-  /* Process samples as long as there are enough samples in 'inputBuffer'
-   * to form a processing frame. */
-  while (fifo_occupancy(&p->inputBuffer) >= p->sampleReq) {
-    /* If tempo differs from the normal SCALE, scan for the best overlapping
-     * position */
-    offset = seekBestOverlapPosition(p, fifo_read_ptr(&p->inputBuffer));
-
-    /* Mix the samples in the 'inputBuffer' at position of 'offset' with the
-     * samples in 'midBuffer' using sliding overlapping ... first partially
-     * overlap with the end of the previous sequence (that's in 'midBuffer') */
-    overlap(p, fifo_reserve(&p->outputBuffer, p->overlapLength),
-            fifo_read_ptr(&p->inputBuffer), offset);
-
-    /* ... then copy sequence samples from 'inputBuffer' to output */
-    temp = (p->seekWindowLength - 2 * p->overlapLength);    /* & 0xfffffffe; */
-    if ((int)temp > 0) {
-      fifo_write(&p->outputBuffer, temp,
-                 (float *) fifo_read_ptr(&p->inputBuffer) +
-                 p->channels * (offset + p->overlapLength));
+    /* Copy or overlap the first bit to the output */
+    if (!p->windows_total)
+      fifo_write(&p->output_fifo, p->overlap, fifo_read_ptr(&p->input_fifo));
+    else {
+      offset = stretch_best_overlap_position(p, fifo_read_ptr(&p->input_fifo));
+      stretch_overlap(p, p->prev_win_end,
+          (float *) fifo_read_ptr(&p->input_fifo) + p->channels * offset,
+          fifo_write(&p->output_fifo, p->overlap, NULL));
     }
-    /* Copies the end of the current sequence from 'inputBuffer' to
-     * 'midBuffer' for being mixed with the beginning of the next
-     * processing sequence and so on */
-    assert(offset + p->seekWindowLength <= fifo_occupancy(&p->inputBuffer));
-    memcpy(p->pMidBuffer,
-           (float *) fifo_read_ptr(&p->inputBuffer) +
-           p->channels * (offset + p->seekWindowLength - p->overlapLength),
-           p->channels * sizeof(float) * p->overlapLength);
-    p->bMidBufferDirty = TRUE;
+    /* Copy the middle bit to the output */
+    if (p->window > 2 * p->overlap)
+      fifo_write(&p->output_fifo, p->window - 2 * p->overlap,
+                 (float *) fifo_read_ptr(&p->input_fifo) +
+                 p->channels * (offset + p->overlap));
 
-    /* Remove the processed samples from the input buffer. Update
-     * the difference between integer & nominal skip step to 'p->skipFract'
-     * in order to prevent the error from accumulating over time. */
-    p->skipFract += p->nominalSkip;     /* real skip size */
-    ovlSkip = (int) p->skipFract;       /* rounded to integer skip */
-    p->skipFract -= ovlSkip;    /* maintain the fraction part, i.e. real vs. integer skip */
-    fifo_read(&p->inputBuffer, ovlSkip, NULL);
-  }
-}
+    /* Copy the end bit to prev_win_end ready to be mixed with
+     * the beginning of the next window. */
+    assert(offset + p->window <= fifo_occupancy(&p->input_fifo));
+    memcpy(p->prev_win_end,
+           (float *) fifo_read_ptr(&p->input_fifo) +
+           p->channels * (offset + p->window - p->overlap),
+           p->channels * p->overlap * sizeof(*(p->prev_win_end)));
 
-/* Set new overlap length parameter & reallocate RefMidBuffer if necessary. */
-static void acceptNewOverlapLength(TDStretch * p, size_t newOverlapLength)
-{
-  size_t prevOvl;
+    /* The Advance bit of WSOLA */
+    skip = p->factor * (++p->windows_total * (p->window - p->overlap)) + 0.5;
+    skip -= (p->seek + 1) >> 1; /* So seek straddles the nominal skip point. */
+    p->skip_total += skip -= p->skip_total;
+    fifo_read(&p->input_fifo, skip, NULL);
 
-  prevOvl = p->overlapLength;
-  p->overlapLength = newOverlapLength;
-  if (p->overlapLength > prevOvl) {
-    free(p->pMidBuffer);
-    free(p->pRefMidBufferUnaligned);
-    p->pMidBuffer = xcalloc(p->overlapLength * 2, sizeof(float));
-    p->bMidBufferDirty = TRUE;
-    clearMidBuffer(p);
-    p->pRefMidBufferUnaligned = xcalloc(
-        2 * p->overlapLength + 16 / sizeof(float), sizeof(float));
-
-    /* For efficiency, align 'pRefMidBuffer' to 16 byte boundary */
-    p->pRefMidBuffer = (float *)
-      ((((unsigned long) (p->pRefMidBufferUnaligned)) + 15ul) & ~15ul);
+    sox_debug("%3u %u", offset, skip);
   }
 }
 
-/*  Sets routine control parameters. These control are certain time constants
- *  defining how the sound is stretched to the desired duration.
- *    'sampleRate' = sample rate of the sound
- *    'sequenceMS' = one processing sequence length in milliseconds
- *    'seekwindowMS' = seeking window length for scanning the best overlapping
- *       position
- *    'overlapMS' = overlapping length
- *    'tempo' = 1 for no change, < 1 for slower, > 1 for faster.
- *    'quickSeek' = whether to use a quick seek for the best overlapping
- *    position.
- */
-static void setParameters(TDStretch * p, double sampleRate, double tempo,
-    double sequenceMs, double seekWindowMs, double overlapMs, BOOL quickSeek)
+static void stretch_setup(Stretch * p,
+  double sample_rate,
+  double factor,      /* 1 for no change, < 1 for slower, > 1 for faster. */
+  double window_ms,   /* Processing window length in milliseconds. */
+  double seek_ms,     /* Milliseconds to seek for the best overlap position. */
+  double overlap_ms,  /* Overlap length in milliseconds. */
+  sox_bool quick_seek)/* Whether to quick seek for the best overlap position.*/
 {
-  size_t newOvl;
-  size_t intskip;
+  size_t max_skip;
 
-  p->factor = tempo;
-  p->bQuickseek = quickSeek;
-  p->maxOffset = p->seekLength = sampleRate * seekWindowMs / 1000 + .5;
-  p->seekWindowLength = sampleRate * sequenceMs / 1000 + .5;
-  newOvl = max(sampleRate * overlapMs / 1000 + 4.5, 16);
-  newOvl &= ~7; /* must be divisible by 8 */
-  acceptNewOverlapLength(p, newOvl);
+  p->factor = factor;
+  p->window = sample_rate * window_ms / 1000 + .5;
+  p->seek   = sample_rate * seek_ms / 1000 + .5;
+  p->overlap = max(sample_rate * overlap_ms / 1000 + 4.5, 16);
+  p->overlap &= ~7; /* must be divisible by 8 */
+  p->prev_win_end = xmalloc(p->overlap * p->channels * sizeof(*p->prev_win_end));
+  p->quick_seek = quick_seek;
 
-  /* Calculate ideal skip length (according to tempo value)  */
-  p->nominalSkip = tempo * (p->seekWindowLength - p->overlapLength);
-  p->skipFract = 0;
-  intskip = (int) (p->nominalSkip + 0.5);
-
-  /* Calculate how many samples are needed in the 'inputBuffer' to  */
-  /* process another batch of samples */
-  p->sampleReq =
-      max(intskip + p->overlapLength, p->seekWindowLength) + p->maxOffset;
+  /* # of samples needed in input fifo to process a window */
+  max_skip = ceil(factor * (p->window - p->overlap));
+  p->process_size = max(max_skip + p->overlap, p->window) + p->seek;
 }
 
-static float * putSamples(TDStretch * p, float const *samples, size_t n)
+static float * stretch_input(Stretch * p, float const *samples, size_t n)
 {
   p->samples_in += n;
-  return fifo_write(&p->inputBuffer, n, samples);
+  return fifo_write(&p->input_fifo, n, samples);
 }
 
-static float const * receiveSamples(
-    TDStretch * p, float * samples, size_t * n)
+static float const * stretch_output(
+    Stretch * p, float * samples, size_t * n)
 {
-  p->samples_out += *n = min(*n, fifo_occupancy(&p->outputBuffer));
-  return fifo_read(&p->outputBuffer, *n, samples);
+  p->samples_out += *n = min(*n, fifo_occupancy(&p->output_fifo));
+  return fifo_read(&p->output_fifo, *n, samples);
 }
 
-/* Flushes the last samples from the processing pipeline to the output.
- * Clears also the internal processing buffers.
- *
- * Note: This function is meant for extracting the last samples of a sound
- * stream. This function may introduce additional blank samples in the end
- * of the sound stream, and thus it's not recommended to call this function
- * in the middle of a sound stream. */
-static void flush(TDStretch * p)
+/* Flush samples remaining in the processing pipeline to the output. */
+static void stretch_flush(Stretch * p)
 {
   size_t samples_out = p->samples_in / p->factor + .5;
 
   if (p->samples_out < samples_out) {
     size_t remaining = p->samples_in / p->factor + .5 - p->samples_out;
-    float buff[128];
-    memset(buff, 0, sizeof(buff));
+    float * buff = xcalloc(128 * p->channels, sizeof(*buff));
 
-    while (fifo_occupancy(&p->outputBuffer) < remaining) {
-      putSamples(p, buff, sizeof(buff)/sizeof(buff[0])/p->channels);
-      processSamples(p);
+    while (fifo_occupancy(&p->output_fifo) < remaining) {
+      stretch_input(p, buff, 128);
+      stretch_process_windows(p);
     }
-    fifo_trim(&p->outputBuffer, remaining);
-    clearInput(p);
+    free(buff);
+    fifo_trim(&p->output_fifo, remaining);
+    p->samples_in = 0;
   }
 }
 
-static void deleteTDStretch(TDStretch * p)
+static void stretch_delete(Stretch * p)
 {
-  free(p->pMidBuffer);
-  free(p->pRefMidBufferUnaligned);
-  fifo_delete(&p->outputBuffer);
-  fifo_delete(&p->inputBuffer);
+  free(p->prev_win_end);
+  fifo_delete(&p->output_fifo);
+  fifo_delete(&p->input_fifo);
   free(p);
 }
 
-static TDStretch * newTDStretch(size_t channels)
+static Stretch * stretch_new(size_t channels)
 {
-  TDStretch * p = xcalloc(1, sizeof(*p));
+  Stretch * p = xcalloc(1, sizeof(*p));
   p->channels = channels;
-  fifo_create(&p->inputBuffer, p->channels * sizeof(float));
-  fifo_create(&p->outputBuffer, p->channels * sizeof(float));
+  fifo_create(&p->input_fifo, p->channels * sizeof(float));
+  fifo_create(&p->output_fifo, p->channels * sizeof(float));
   return p;
 }
 
-/*
- * libSoX tempo effect: adjust the audio tempo (but not key)
- *
- * Adjustment is given as the ratio of the new tempo to the old tempo.
- */
-
-#include "sox_i.h"
-#include <math.h>
-
 typedef struct tempo {
-  TDStretch   * tdstretch;
+  Stretch     * stretch;
   sox_bool    quick_seek;
-  double      factor, sequence_ms, seek_window_ms, overlap_ms;
+  double      factor, window_ms, seek_ms, overlap_ms;
 } priv_t;
 
 assert_static(sizeof(struct tempo) <= SOX_MAX_EFFECT_PRIVSIZE,
@@ -617,21 +311,19 @@
 {
   priv_t * p = (priv_t *) effp->priv;
 
-  p->sequence_ms    = 82; /* Set non-zero defaults: */
-  p->seek_window_ms = 14;
-  p->overlap_ms     = 12;
+  p->window_ms    = 82; /* Set non-zero defaults: */
+  p->seek_ms      = 14;
+  p->overlap_ms   = 12;
 
-  p->quick_seek = !argc || strcmp(*argv, "-l") || (--argc, ++argv, sox_false);
+  p->quick_seek = argc && !strcmp(*argv, "-q") && (--argc, ++argv, sox_true);
   do {                    /* break-able block */
-    NUMERIC_PARAMETER(factor        ,0.25, 4  )
-    NUMERIC_PARAMETER(sequence_ms   , 10 , 120)
-    NUMERIC_PARAMETER(seek_window_ms, 7  , 28 )
-    NUMERIC_PARAMETER(overlap_ms    , 6  , 24 )
+    NUMERIC_PARAMETER(factor      ,0.25, 4  )
+    NUMERIC_PARAMETER(window_ms   , 10 , 120)
+    NUMERIC_PARAMETER(seek_ms     , 3  , 28 )
+    NUMERIC_PARAMETER(overlap_ms  , 2  , 24 )
   } while (0);
-
-  sox_debug("factor:%g sequence:%g seek:%g overlap:%g quick:%i", p->factor,
-      p->sequence_ms, p->seek_window_ms, p->overlap_ms, p->quick_seek);
-  return argc || !p->factor? sox_usage(effp) : SOX_SUCCESS;
+  return argc || !p->factor || p->overlap_ms + p->seek_ms >= p->window_ms ?
+    sox_usage(effp) : SOX_SUCCESS;
 }
 
 static int start(sox_effect_t * effp)
@@ -640,14 +332,9 @@
 
   if (p->factor == 1)
     return SOX_EFF_NULL;
-
-  if (effp->ininfo.channels > 2) {
-    sox_fail("supports only mono or stereo audio");
-    return SOX_EOF;
-  }
-  p->tdstretch = newTDStretch(effp->ininfo.channels);
-  setParameters(p->tdstretch, effp->ininfo.rate, p->factor, p->sequence_ms,
-                p->seek_window_ms, p->overlap_ms, p->quick_seek);
+  p->stretch = stretch_new(effp->ininfo.channels);
+  stretch_setup(p->stretch, effp->ininfo.rate, p->factor, p->window_ms,
+                p->seek_ms, p->overlap_ms, p->quick_seek);
   return SOX_SUCCESS;
 }
 
@@ -657,16 +344,16 @@
   priv_t * p = (priv_t *) effp->priv;
   sox_size_t i;
   sox_size_t odone = *osamp /= effp->ininfo.channels;
-  float const * s = receiveSamples(p->tdstretch, NULL, &odone);
+  float const * s = stretch_output(p->stretch, NULL, &odone);
 
   for (i = 0; i < odone * effp->ininfo.channels; ++i)
     *obuf++ = SOX_FLOAT_32BIT_TO_SAMPLE(*s++, effp->clips);
 
   if (*isamp && odone < *osamp) {
-    float * t = putSamples(p->tdstretch, NULL, *isamp / effp->ininfo.channels);
+    float * t = stretch_input(p->stretch, NULL, *isamp / effp->ininfo.channels);
     for (i = *isamp; i; --i)
       *t++ = SOX_SAMPLE_TO_FLOAT_32BIT(*ibuf++, effp->clips);
-    processSamples(p->tdstretch);
+    stretch_process_windows(p->stretch);
   }
   else *isamp = 0;
 
@@ -677,13 +364,13 @@
 static int drain(sox_effect_t * effp, sox_ssample_t * obuf, sox_size_t * osamp)
 {
   static sox_size_t isamp = 0;
-  flush(((priv_t *)effp->priv)->tdstretch);
+  stretch_flush(((priv_t *)effp->priv)->stretch);
   return flow(effp, 0, obuf, &isamp, osamp);
 }
 
 static int stop(sox_effect_t * effp)
 {
-  deleteTDStretch(((priv_t *)effp->priv)->tdstretch);
+  stretch_delete(((priv_t *)effp->priv)->stretch);
   return SOX_SUCCESS;
 }
 
@@ -690,7 +377,7 @@
 sox_effect_handler_t const * sox_tempo_effect_fn(void)
 {
   static sox_effect_handler_t handler = {
-    "tempo", "[-l] factor [window-ms [seek-ms [overlap-ms]]]",
+    "tempo", "[-q] factor [window-ms [seek-ms [overlap-ms]]]",
     SOX_EFF_MCHAN | SOX_EFF_LENGTH,
     getopts, start, flow, drain, stop, NULL
   };