shithub: sox

--- a/soxeffect.7

+++ b/soxeffect.7

@@ -413,17 +413,15 @@

 See also \fBfilter\fR for filters with a steeper roll-off.

.TP

 \fBkey \fR[\fB\-q\fR] \fIshift\fR [\fIwindow\fR [\fIseek\fR [\fIoverlap\fR]]]

-Change the audio key (i.e. pitch but not tempo) using the SoundTouch [8]

-algorithm.

+Change the audio key (i.e. pitch but not tempo) using a WSOLA algorithm similar

+to that used by SoundTouch [8].

.SP

 .I shift

-gives the key shift in `cents' (i.e. 100ths of a semitone).  See the

-.B

-tempo

+is the key shift in positive or negative `cents' (i.e. 100ths of a

+semitone).  See the

+.B tempo

 effect for a description of the other parameters.

.SP

-Note: This effect works with only mono or stereo audio.

-.SP

 See also

 .B pitch

 for a similar effect.

@@ -1050,13 +1048,13 @@

 \fIp3\fR (trapezium): the percentage through each cycle at which `falling'

 ends; default=60.

.TP

-\fBtempo \fR[\fB\-l\fR] \fIfactor\fR [\fIwindow\fR [\fIseek\fR [\fIoverlap\fR]]]

-Change the audio tempo (but not its pitch) using the SoundTouch [8] algorithm.

+\fBtempo \fR[\fB\-q\fR] \fIfactor\fR [\fIwindow\fR [\fIseek\fR [\fIoverlap\fR]]]

+Change the audio tempo (but not its pitch) using a WSOLA algorithm similar

+to that used by SoundTouch [8].

.SP

 The optional

-.B \-l

-parameter selects a linear seek for a more accurate but slower version of the

-algorithm.

+.B \-q

+parameter selects a quicker but less accurate version of the algorithm.

.SP

 .I factor

 gives the ratio of new tempo to the old tempo.

@@ -1063,13 +1061,11 @@

.SP

 The optional

 .I window

-parameter gives the length in milliseconds (default 82) of a single

-processing sequence.  This determines to how long sequences the original

-sound is chopped in the time-stretch algorithm.  The larger this value

-is, the fewer sequences are used in processing.  In principle a bigger

-value sounds better when slowing down tempo, but worse when increasing

-tempo and vice versa.  Increasing this value reduces computational

-burden & vice versa.

+parameter gives the length in milliseconds of each chunk (or window) of

+audio processed by the algorithm.  The default value is 82\ ms and is

+typically suitable for making small changes to the tempo of music.  For

+larger tempo changes (e.g. a factor of 2), 50\ ms may be better; for

+speech, 30\ ms may be better.

.SP

 The optional

 .I seek

@@ -1079,7 +1075,7 @@

 joining location when mixing the sound sequences back together.  The

 bigger this window setting is, the higher the possibility to find a

 better mixing position will become, but at the same time large values

-may cause a "drifting" artifact because consequent sequences will be

+may cause a `drifting' artifact because consequent sequences will be

 taken at more uneven intervals.  If there's a disturbing artifact that

 sounds as if a constant frequency was drifting around, try reducing this

 setting.  Increasing this value increases computational burden & vice

@@ -1094,8 +1090,6 @@

 be that critical parameter. If you reduce the \fIwindow\fR  setting by a

 large amount, you might wish to try a smaller value on this.  Increasing

 this value increases computational burden & vice versa.

-.SP

-Note: This effect works with only mono or stereo audio.

.SP

 See also

 .B stretch

--- a/src/key.c

+++ b/src/key.c

@@ -31,7 +31,7 @@

   double d;

   char dummy, arg[100];

-  int pos = (argc && !strcmp(*argv, "-l"))? 1 : 0;

+  int pos = (argc && !strcmp(*argv, "-q"))? 1 : 0;

   if (argc <= pos || sscanf(argv[pos], "%lf %c", &d, &dummy) != 1)

     return sox_usage(effp);

@@ -47,7 +47,7 @@

   static sox_effect_handler_t handler;

   handler = *sox_tempo_effect_fn();

   handler.name = "key";

-  handler.usage = "[-l] shift-in-cents [window-ms [seek-ms [overlap-ms]]]",

+  handler.usage = "[-q] shift-in-cents [window-ms [seek-ms [overlap-ms]]]",

   handler.getopts = getopts;

   return &handler;

--- a/src/soxconfig.h.cmake

+++ b/src/soxconfig.h.cmake

@@ -22,7 +22,6 @@

 #cmakedefine HAVE_SAMPLERATE_H        1

 #cmakedefine HAVE_SNDFILE_1_0_12      1

 #cmakedefine HAVE_SNDFILE_H           1

-#cmakedefine HAVE_LIBSOUNDTOUCH       1

 #cmakedefine HAVE_STDINT_H            1

 #cmakedefine HAVE_STRCASECMP          1

 #cmakedefine HAVE_STRDUP              1

--- a/src/tempo.c

+++ b/src/tempo.c

@@ -18,13 +18,14 @@

  * Fifth Floor, 51 Franklin Street, Boston, MA 02111-1301, USA.

*/

-/* Addressible FIFO buffer */

 #include "sox_i.h"

 #include "xmalloc.h"

+#include <math.h>

 #include <string.h>

+#include <assert.h>

+/* Addressible FIFO buffer */

 typedef struct {

   char * data;

   size_t allocation;   /* Number of bytes allocated for data. */

@@ -112,502 +113,195 @@

/*

- * Change tempo (alter duration, maintain pitch) using a time domain

- * WSOLA-like method.  Based on TDStretch.cpp revision 1.24 from The

- * SoundTouch Library Copyright (c) Olli Parviainen 2001-2005.

+ * Change tempo (alter duration, maintain pitch) using a WSOLA algorithm.

+ * Based on ideas from Olli Parviainen's SoundTouch Library.

*/

-#include <string.h>

-#include <assert.h>

-#ifndef max

-#define max(a, b) ((a) >= (b) ? (a) : (b))

-#endif

-typedef enum {FALSE, TRUE} BOOL;

 typedef struct {

   size_t samples_in;

   size_t samples_out;

   double factor;

   size_t channels;

-  size_t sampleReq;

-  float * pMidBuffer;

-  float * pRefMidBuffer;

-  float * pRefMidBufferUnaligned;

-  size_t overlapLength;

-  size_t seekLength;

-  size_t seekWindowLength;

-  size_t maxOffset;

-  double nominalSkip;

-  double skipFract;

-  fifo_t outputBuffer;

-  fifo_t inputBuffer;

-  BOOL bQuickseek;

-  BOOL bMidBufferDirty;

-} TDStretch;

+  size_t process_size;

+  float * prev_win_end;

+  size_t overlap;

+  size_t seek;

+  size_t window;

+  size_t windows_total;

+  size_t skip_total;

+  fifo_t output_fifo;

+  fifo_t input_fifo;

+  sox_bool quick_seek;

+} Stretch;

-static void clearMidBuffer(TDStretch * p)

+/* For the Wave Similarity bit of WSOLA */

+static double difference(const float * a, const float * b, size_t length)

-  if (p->bMidBufferDirty) {

-    memset((p->pMidBuffer), 0, 2 * sizeof(float) * p->overlapLength);

-    p->bMidBufferDirty = FALSE;

-  }

-}

-static void clearInput(TDStretch * p)

-{

-  p->samples_in = 0;

-  fifo_clear(&p->inputBuffer);

-  clearMidBuffer(p);

-}

-static void clear(TDStretch * p)

-{

-  fifo_clear(&p->outputBuffer);

-  fifo_clear(&p->inputBuffer);

-  clearMidBuffer(p);

-}

-/* Slopes the amplitude of the 'midBuffer' samples so that cross correlation */

-/* is faster to calculate */

-static void precalcCorrReferenceMono(TDStretch * p)

-{

-  int i;

-  float temp;

-  for (i = 0; i < (int) p->overlapLength; i++) {

-    temp = (float) i *(float) (p->overlapLength - i);

-    (p->pRefMidBuffer)[i] = (float) ((p->pMidBuffer)[i] * temp);

-  }

-}

-static void precalcCorrReferenceStereo(TDStretch * p)

-{

-  int i, cnt2;

-  float temp;

-  for (i = 0; i < (int) p->overlapLength; i++) {

-    temp = (float) i *(float) (p->overlapLength - i);

-    cnt2 = i * 2;

-    (p->pRefMidBuffer)[cnt2] = (float) ((p->pMidBuffer)[cnt2] * temp);

-    (p->pRefMidBuffer)[cnt2 + 1] = (float) ((p->pMidBuffer)[cnt2 + 1] * temp);

-  }

-}

-static double calcCrossCorrMono(

-    TDStretch * p, const float * mixingPos, const float * compare)

-{

-  double corr = 0;

+  double diff = 0;

   size_t i = 0;

-  /* Loop optimisation: */

-  #define _ corr += mixingPos[i] * compare[i], ++i;

-  do {_ _ _ _ _ _ _ _} while (i < p->overlapLength);

+  #define _ diff += sqr(a[i] - b[i]), ++i; /* Loop optimisation */

+  do {_ _ _ _ _ _ _ _} while (i < length); /* N.B. length ≡ 0 (mod 8) */

   #undef _

-  return corr;

+  return diff;

-static double calcCrossCorrStereo(

-    TDStretch * p, const float * mixingPos, const float * compare)

+/* Find where the two windows are most alike over the overlap period. */

+static size_t stretch_best_overlap_position(Stretch * p, float const * new_win)

-  double corr = 0;

-  size_t i = 0;

+  float * f = p->prev_win_end;

+  size_t j, best_pos, prev_best_pos = (p->seek + 1) >> 1, step = 64;

+  size_t i = best_pos = p->quick_seek? prev_best_pos : 0;

+  double diff, least_diff = difference(new_win + p->channels * i, f, p->channels * p->overlap);

+  int k = 0;

-  /* Loop optimisation: */

-  #define _ corr += mixingPos[i]*compare[i] + mixingPos[i+1]*compare[i+1], i+=2;

-  do {_ _ _ _ _ _ _ _} while (i < 2 * p->overlapLength);

-  #undef _

-  return corr;

-}

-/* Seeks for the optimal overlap-mixing position.  The best position is

- * determined as the position where the two overlapped sample sequences are

- * 'most alike', in terms of the highest cross-correlation value over the

- * overlapping period.  4 variants exist for mono/stereo, quick/accurate */

-static size_t seekBestOverlapPositionMono(

-    TDStretch * p, const float * refPos)

-{

-  size_t bestOffs;

-  double bestCorr, corr;

-  size_t tempOffset;

-  const float *compare;

-  /* Slopes the amplitude of the 'midBuffer' samples */

-  precalcCorrReferenceMono(p);

-  bestCorr = INT_MIN;

-  bestOffs = 0;

-  /* Scans for the best correlation value by testing each possible position */

-  /* over the permitted range. */

-  for (tempOffset = 0; tempOffset < p->seekLength; tempOffset++) {

-    compare = refPos + tempOffset;

-    /* Calculates correlation value for the mixing position corresponding */

-    /* to 'tempOffset' */

-    corr = calcCrossCorrMono(p, p->pRefMidBuffer, compare);

-    /* Checks for the highest correlation value */

-    if (corr > bestCorr) {

-      bestCorr = corr;

-      bestOffs = tempOffset;

-    }

-  }

-  return bestOffs;

-}

-static size_t seekBestOverlapPositionStereo(TDStretch * p,

-                                            const float * refPos)

-{

-  size_t bestOffs;

-  double bestCorr, corr;

-  size_t i;

-  precalcCorrReferenceStereo(p);

-  bestCorr = INT_MIN;

-  bestOffs = 0;

-  for (i = 0; i < p->seekLength; i++) {

-    corr = calcCrossCorrStereo(p, refPos + 2 * i, p->pRefMidBuffer);

-    if (corr > bestCorr) {

-      bestCorr = corr;

-      bestOffs = i;

-    }

-  }

-  return bestOffs;

-}

-/* Table for the quick hierarchical mixing position seeking algorithm */

-static int const scanOffsets[4][24] = {

-  { 124,  186,  248,  310,  372,  434,  496,  558,  620,  682,  744, 806,

-    868,  930,  992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488,   0},

-  {-100,  -75,  -50,  -25,   25,   50,   75,  100,    0,    0,    0,   0,

-      0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},

-  { -20,  -15,  -10,   -5,    5,   10,   15,   20,    0,    0,    0,   0,

-      0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},

-  {  -4,   -3,   -2,   -1,    1,    2,    3,    4,    0,    0,    0,   0,

-      0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0}};

-static size_t seekBestOverlapPositionMonoQuick(TDStretch * p,

-                                               const float * refPos)

-{

-  size_t j;

-  size_t bestOffs;

-  double bestCorr, corr;

-  size_t scanCount, corrOffset, tempOffset;

-  /* Slopes the amplitude of the 'midBuffer' samples */

-  precalcCorrReferenceMono(p);

-  bestCorr = INT_MIN;

-  bestOffs = 0;

-  corrOffset = 0;

-  tempOffset = 0;

-  /* Scans for the best correlation value using four-pass hierarchical

-   * search.  The look-up table 'scans' has hierarchical position adjusting

-   * steps.  In first pass the routine searhes for the highest correlation

-   * with relatively coarse steps, then rescans the neighbourhood of the

-   * highest correlation with better resolution and so on. */

-  for (scanCount = 0; scanCount < 4; scanCount++) {

-    j = 0;

-    while (scanOffsets[scanCount][j]) {

-      tempOffset = corrOffset + scanOffsets[scanCount][j];

-      if (tempOffset >= p->seekLength)

+  if (p->quick_seek) do { /* hierarchical search */

+    for (k = -1; k <= 1; k += 2) for (j = 1; j < 4 || step == 64; ++j) {

+      i = prev_best_pos + k * j * step;

+      if ((int)i < 0 || i >= p->seek)

         break;

-      /* Calculates correlation value for the mixing position corresponding */

-      /* to 'tempOffset' */

-      corr = calcCrossCorrMono(p, refPos + tempOffset, (p->pRefMidBuffer));

-      /* Checks for the highest correlation value */

-      if (corr > bestCorr) {

-        bestCorr = corr;

-        bestOffs = tempOffset;

-      }

-      j++;

+      diff = difference(new_win + p->channels * i, f, p->channels * p->overlap);

+      if (diff < least_diff)

+        least_diff = diff, best_pos = i;

-    corrOffset = bestOffs;

+    prev_best_pos = best_pos;

+  } while (step >>= 2);

+  else for (i = 1; i < p->seek; i++) { /* linear search */

+    diff = difference(new_win + p->channels * i, f, p->channels * p->overlap);

+    if (diff < least_diff)

+      least_diff = diff, best_pos = i;

-  return bestOffs;

+  return best_pos;

-static size_t seekBestOverlapPositionStereoQuick(TDStretch * p,

-                                                 const float * refPos)

+/* For the Over-Lap bit of WSOLA */

+static void stretch_overlap(Stretch * p, const float * in1, const float * in2, float * output)

-  size_t j;

-  size_t bestOffs;

-  double bestCorr, corr;

-  size_t scanCount, corrOffset, tempOffset;

+  size_t i, j, k = 0;

+  float fade_step = 1.0f / (float) p->overlap;

-  precalcCorrReferenceStereo(p);

-  bestCorr = INT_MIN;

-  bestOffs = 0;

-  corrOffset = 0;

-  tempOffset = 0;

-  for (scanCount = 0; scanCount < 4; scanCount++) {

-    j = 0;

-    while (scanOffsets[scanCount][j]) {

-      tempOffset = corrOffset + scanOffsets[scanCount][j];

-      if (tempOffset >= p->seekLength)

-        break;

-      corr =

-          calcCrossCorrStereo(p, refPos + 2 * tempOffset, p->pRefMidBuffer);

-      if (corr > bestCorr) {

-        bestCorr = corr;

-        bestOffs = tempOffset;

-      }

-      j++;

-    }

-    corrOffset = bestOffs;

+  for (i = 0; i < p->overlap; ++i) {

+    float fade_in  = fade_step * (float) i;

+    float fade_out = 1.0f - fade_in;

+    for (j = 0; j < p->channels; ++j, ++k)

+      output[k] = in1[k] * fade_out + in2[k] * fade_in;

-  return bestOffs;

-static size_t seekBestOverlapPosition(TDStretch * p,

-                                      const float * refPos)

+static void stretch_process_windows(Stretch * p)

-  if (p->channels == 2) {

-    if (p->bQuickseek)

-      return seekBestOverlapPositionStereoQuick(p, refPos);

-    return seekBestOverlapPositionStereo(p, refPos);

-  } else if (p->bQuickseek)

-    return seekBestOverlapPositionMonoQuick(p, refPos);

-  return seekBestOverlapPositionMono(p, refPos);

-}

+  while (fifo_occupancy(&p->input_fifo) >= p->process_size) {

+    size_t skip, offset = 0;

-/* Overlaps samples in 'midBuffer' with the samples in 'input' */

-static void overlapMono(TDStretch * p, float * output,

-                        const float * input)

-{

-  int i, itemp;

-  for (i = 0; i < (int) p->overlapLength; i++) {

-    itemp = p->overlapLength - i;

-    output[i] = (input[i] * i + (p->pMidBuffer)[i] * itemp) / p->overlapLength;

-  }

-}

-static void overlapStereo(TDStretch * p, float * output,

-                          const float * input)

-{

-  int i;

-  size_t cnt2;

-  float fTemp;

-  float fScale;

-  float fi;

-  fScale = 1.0f / (float) p->overlapLength;

-  for (i = 0; i < (int) p->overlapLength; i++) {

-    fTemp = (float) (p->overlapLength - i) * fScale;

-    fi = (float) i *fScale;

-    cnt2 = 2 * i;

-    output[cnt2 + 0] = input[cnt2 + 0] * fi + p->pMidBuffer[cnt2 + 0] * fTemp;

-    output[cnt2 + 1] = input[cnt2 + 1] * fi + p->pMidBuffer[cnt2 + 1] * fTemp;

-  }

-}

-/* Overlaps samples in 'midBuffer' with the samples in 'inputBuffer' at

- * position of 'ovlPos'. */

-static inline void overlap(TDStretch * p, float * output,

-                           const float * input, size_t ovlPos)

-{

-  if (p->channels == 2)

-    overlapStereo(p, output, input + 2 * ovlPos);

-  else

-    overlapMono(p, output, input + ovlPos);

-}

-/* Processes as many processing frames of the samples 'inputBuffer', store */

-/* the result into 'outputBuffer' */

-static void processSamples(TDStretch * p)

-{

-  size_t ovlSkip, offset, temp;

-  if (p->bMidBufferDirty == FALSE) {

-    /* if midBuffer is empty, move the first samples of the input stream

-     * into it */

-    if (fifo_occupancy(&p->inputBuffer) < p->overlapLength)

-      return;   /* wait until we've got p->overlapLength samples */

-    fifo_read(&p->inputBuffer, p->overlapLength, p->pMidBuffer);

-    p->bMidBufferDirty = TRUE;

-  }

-  /* Process samples as long as there are enough samples in 'inputBuffer'

-   * to form a processing frame. */

-  while (fifo_occupancy(&p->inputBuffer) >= p->sampleReq) {

-    /* If tempo differs from the normal SCALE, scan for the best overlapping

-     * position */

-    offset = seekBestOverlapPosition(p, fifo_read_ptr(&p->inputBuffer));

-    /* Mix the samples in the 'inputBuffer' at position of 'offset' with the

-     * samples in 'midBuffer' using sliding overlapping ... first partially

-     * overlap with the end of the previous sequence (that's in 'midBuffer') */

-    overlap(p, fifo_reserve(&p->outputBuffer, p->overlapLength),

-            fifo_read_ptr(&p->inputBuffer), offset);

-    /* ... then copy sequence samples from 'inputBuffer' to output */

-    temp = (p->seekWindowLength - 2 * p->overlapLength);    /* & 0xfffffffe; */

-    if ((int)temp > 0) {

-      fifo_write(&p->outputBuffer, temp,

-                 (float *) fifo_read_ptr(&p->inputBuffer) +

-                 p->channels * (offset + p->overlapLength));

+    /* Copy or overlap the first bit to the output */

+    if (!p->windows_total)

+      fifo_write(&p->output_fifo, p->overlap, fifo_read_ptr(&p->input_fifo));

+    else {

+      offset = stretch_best_overlap_position(p, fifo_read_ptr(&p->input_fifo));

+      stretch_overlap(p, p->prev_win_end,

+          (float *) fifo_read_ptr(&p->input_fifo) + p->channels * offset,

+          fifo_write(&p->output_fifo, p->overlap, NULL));

-    /* Copies the end of the current sequence from 'inputBuffer' to

-     * 'midBuffer' for being mixed with the beginning of the next

-     * processing sequence and so on */

-    assert(offset + p->seekWindowLength <= fifo_occupancy(&p->inputBuffer));

-    memcpy(p->pMidBuffer,

-           (float *) fifo_read_ptr(&p->inputBuffer) +

-           p->channels * (offset + p->seekWindowLength - p->overlapLength),

-           p->channels * sizeof(float) * p->overlapLength);

-    p->bMidBufferDirty = TRUE;

+    /* Copy the middle bit to the output */

+    if (p->window > 2 * p->overlap)

+      fifo_write(&p->output_fifo, p->window - 2 * p->overlap,

+                 (float *) fifo_read_ptr(&p->input_fifo) +

+                 p->channels * (offset + p->overlap));

-    /* Remove the processed samples from the input buffer. Update

-     * the difference between integer & nominal skip step to 'p->skipFract'

-     * in order to prevent the error from accumulating over time. */

-    p->skipFract += p->nominalSkip;     /* real skip size */

-    ovlSkip = (int) p->skipFract;       /* rounded to integer skip */

-    p->skipFract -= ovlSkip;    /* maintain the fraction part, i.e. real vs. integer skip */

-    fifo_read(&p->inputBuffer, ovlSkip, NULL);

-  }

-}

+    /* Copy the end bit to prev_win_end ready to be mixed with

+     * the beginning of the next window. */

+    assert(offset + p->window <= fifo_occupancy(&p->input_fifo));

+    memcpy(p->prev_win_end,

+           (float *) fifo_read_ptr(&p->input_fifo) +

+           p->channels * (offset + p->window - p->overlap),

+           p->channels * p->overlap * sizeof(*(p->prev_win_end)));

-/* Set new overlap length parameter & reallocate RefMidBuffer if necessary. */

-static void acceptNewOverlapLength(TDStretch * p, size_t newOverlapLength)

-{

-  size_t prevOvl;

+    /* The Advance bit of WSOLA */

+    skip = p->factor * (++p->windows_total * (p->window - p->overlap)) + 0.5;

+    skip -= (p->seek + 1) >> 1; /* So seek straddles the nominal skip point. */

+    p->skip_total += skip -= p->skip_total;

+    fifo_read(&p->input_fifo, skip, NULL);

-  prevOvl = p->overlapLength;

-  p->overlapLength = newOverlapLength;

-  if (p->overlapLength > prevOvl) {

-    free(p->pMidBuffer);

-    free(p->pRefMidBufferUnaligned);

-    p->pMidBuffer = xcalloc(p->overlapLength * 2, sizeof(float));

-    p->bMidBufferDirty = TRUE;

-    clearMidBuffer(p);

-    p->pRefMidBufferUnaligned = xcalloc(

-        2 * p->overlapLength + 16 / sizeof(float), sizeof(float));

-    /* For efficiency, align 'pRefMidBuffer' to 16 byte boundary */

-    p->pRefMidBuffer = (float *)

-      ((((unsigned long) (p->pRefMidBufferUnaligned)) + 15ul) & ~15ul);

+    sox_debug("%3u %u", offset, skip);

-/*  Sets routine control parameters. These control are certain time constants

- *  defining how the sound is stretched to the desired duration.

- *    'sampleRate' = sample rate of the sound

- *    'sequenceMS' = one processing sequence length in milliseconds

- *    'seekwindowMS' = seeking window length for scanning the best overlapping

- *       position

- *    'overlapMS' = overlapping length

- *    'tempo' = 1 for no change, < 1 for slower, > 1 for faster.

- *    'quickSeek' = whether to use a quick seek for the best overlapping

- *    position.

- */

-static void setParameters(TDStretch * p, double sampleRate, double tempo,

-    double sequenceMs, double seekWindowMs, double overlapMs, BOOL quickSeek)

+static void stretch_setup(Stretch * p,

+  double sample_rate,

+  double factor,      /* 1 for no change, < 1 for slower, > 1 for faster. */

+  double window_ms,   /* Processing window length in milliseconds. */

+  double seek_ms,     /* Milliseconds to seek for the best overlap position. */

+  double overlap_ms,  /* Overlap length in milliseconds. */

+  sox_bool quick_seek)/* Whether to quick seek for the best overlap position.*/

-  size_t newOvl;

-  size_t intskip;

+  size_t max_skip;

-  p->factor = tempo;

-  p->bQuickseek = quickSeek;

-  p->maxOffset = p->seekLength = sampleRate * seekWindowMs / 1000 + .5;

-  p->seekWindowLength = sampleRate * sequenceMs / 1000 + .5;

-  newOvl = max(sampleRate * overlapMs / 1000 + 4.5, 16);

-  newOvl &= ~7; /* must be divisible by 8 */

-  acceptNewOverlapLength(p, newOvl);

+  p->factor = factor;

+  p->window = sample_rate * window_ms / 1000 + .5;

+  p->seek   = sample_rate * seek_ms / 1000 + .5;

+  p->overlap = max(sample_rate * overlap_ms / 1000 + 4.5, 16);

+  p->overlap &= ~7; /* must be divisible by 8 */

+  p->prev_win_end = xmalloc(p->overlap * p->channels * sizeof(*p->prev_win_end));

+  p->quick_seek = quick_seek;

-  /* Calculate ideal skip length (according to tempo value)  */

-  p->nominalSkip = tempo * (p->seekWindowLength - p->overlapLength);

-  p->skipFract = 0;

-  intskip = (int) (p->nominalSkip + 0.5);

-  /* Calculate how many samples are needed in the 'inputBuffer' to  */

-  /* process another batch of samples */

-  p->sampleReq =

-      max(intskip + p->overlapLength, p->seekWindowLength) + p->maxOffset;

+  /* # of samples needed in input fifo to process a window */

+  max_skip = ceil(factor * (p->window - p->overlap));

+  p->process_size = max(max_skip + p->overlap, p->window) + p->seek;

-static float * putSamples(TDStretch * p, float const *samples, size_t n)

+static float * stretch_input(Stretch * p, float const *samples, size_t n)

   p->samples_in += n;

-  return fifo_write(&p->inputBuffer, n, samples);

+  return fifo_write(&p->input_fifo, n, samples);

-static float const * receiveSamples(

-    TDStretch * p, float * samples, size_t * n)

+static float const * stretch_output(

+    Stretch * p, float * samples, size_t * n)

-  p->samples_out += *n = min(*n, fifo_occupancy(&p->outputBuffer));

-  return fifo_read(&p->outputBuffer, *n, samples);

+  p->samples_out += *n = min(*n, fifo_occupancy(&p->output_fifo));

+  return fifo_read(&p->output_fifo, *n, samples);

-/* Flushes the last samples from the processing pipeline to the output.

- * Clears also the internal processing buffers.

- *

- * Note: This function is meant for extracting the last samples of a sound

- * stream. This function may introduce additional blank samples in the end

- * of the sound stream, and thus it's not recommended to call this function

- * in the middle of a sound stream. */

-static void flush(TDStretch * p)

+/* Flush samples remaining in the processing pipeline to the output. */

+static void stretch_flush(Stretch * p)

   size_t samples_out = p->samples_in / p->factor + .5;

   if (p->samples_out < samples_out) {

     size_t remaining = p->samples_in / p->factor + .5 - p->samples_out;

-    float buff[128];

-    memset(buff, 0, sizeof(buff));

+    float * buff = xcalloc(128 * p->channels, sizeof(*buff));

-    while (fifo_occupancy(&p->outputBuffer) < remaining) {

-      putSamples(p, buff, sizeof(buff)/sizeof(buff[0])/p->channels);

-      processSamples(p);

+    while (fifo_occupancy(&p->output_fifo) < remaining) {

+      stretch_input(p, buff, 128);

+      stretch_process_windows(p);

-    fifo_trim(&p->outputBuffer, remaining);

-    clearInput(p);

+    free(buff);

+    fifo_trim(&p->output_fifo, remaining);

+    p->samples_in = 0;

-static void deleteTDStretch(TDStretch * p)

+static void stretch_delete(Stretch * p)

-  free(p->pMidBuffer);

-  free(p->pRefMidBufferUnaligned);

-  fifo_delete(&p->outputBuffer);

-  fifo_delete(&p->inputBuffer);

+  free(p->prev_win_end);

+  fifo_delete(&p->output_fifo);

+  fifo_delete(&p->input_fifo);

   free(p);

-static TDStretch * newTDStretch(size_t channels)

+static Stretch * stretch_new(size_t channels)

-  TDStretch * p = xcalloc(1, sizeof(*p));

+  Stretch * p = xcalloc(1, sizeof(*p));

   p->channels = channels;

-  fifo_create(&p->inputBuffer, p->channels * sizeof(float));

-  fifo_create(&p->outputBuffer, p->channels * sizeof(float));

+  fifo_create(&p->input_fifo, p->channels * sizeof(float));

+  fifo_create(&p->output_fifo, p->channels * sizeof(float));

   return p;

-/*

- * libSoX tempo effect: adjust the audio tempo (but not key)

- *

- * Adjustment is given as the ratio of the new tempo to the old tempo.

- */

-#include "sox_i.h"

-#include <math.h>

 typedef struct tempo {

-  TDStretch   * tdstretch;

+  Stretch     * stretch;

   sox_bool    quick_seek;

-  double      factor, sequence_ms, seek_window_ms, overlap_ms;

+  double      factor, window_ms, seek_ms, overlap_ms;

 } priv_t;

 assert_static(sizeof(struct tempo) <= SOX_MAX_EFFECT_PRIVSIZE,

@@ -617,21 +311,19 @@

   priv_t * p = (priv_t *) effp->priv;

-  p->sequence_ms    = 82; /* Set non-zero defaults: */

-  p->seek_window_ms = 14;

-  p->overlap_ms     = 12;

+  p->window_ms    = 82; /* Set non-zero defaults: */

+  p->seek_ms      = 14;

+  p->overlap_ms   = 12;

-  p->quick_seek = !argc || strcmp(*argv, "-l") || (--argc, ++argv, sox_false);

+  p->quick_seek = argc && !strcmp(*argv, "-q") && (--argc, ++argv, sox_true);

   do {                    /* break-able block */

-    NUMERIC_PARAMETER(factor        ,0.25, 4  )

-    NUMERIC_PARAMETER(sequence_ms   , 10 , 120)

-    NUMERIC_PARAMETER(seek_window_ms, 7  , 28 )

-    NUMERIC_PARAMETER(overlap_ms    , 6  , 24 )

+    NUMERIC_PARAMETER(factor      ,0.25, 4  )

+    NUMERIC_PARAMETER(window_ms   , 10 , 120)

+    NUMERIC_PARAMETER(seek_ms     , 3  , 28 )

+    NUMERIC_PARAMETER(overlap_ms  , 2  , 24 )

   } while (0);

-  sox_debug("factor:%g sequence:%g seek:%g overlap:%g quick:%i", p->factor,

-      p->sequence_ms, p->seek_window_ms, p->overlap_ms, p->quick_seek);

-  return argc || !p->factor? sox_usage(effp) : SOX_SUCCESS;

+  return argc || !p->factor || p->overlap_ms + p->seek_ms >= p->window_ms ?

+    sox_usage(effp) : SOX_SUCCESS;

 static int start(sox_effect_t * effp)

@@ -640,14 +332,9 @@

   if (p->factor == 1)

     return SOX_EFF_NULL;

-  if (effp->ininfo.channels > 2) {

-    sox_fail("supports only mono or stereo audio");

-    return SOX_EOF;

-  }

-  p->tdstretch = newTDStretch(effp->ininfo.channels);

-  setParameters(p->tdstretch, effp->ininfo.rate, p->factor, p->sequence_ms,

-                p->seek_window_ms, p->overlap_ms, p->quick_seek);

+  p->stretch = stretch_new(effp->ininfo.channels);

+  stretch_setup(p->stretch, effp->ininfo.rate, p->factor, p->window_ms,

+                p->seek_ms, p->overlap_ms, p->quick_seek);

   return SOX_SUCCESS;

@@ -657,16 +344,16 @@

   priv_t * p = (priv_t *) effp->priv;

   sox_size_t i;

   sox_size_t odone = *osamp /= effp->ininfo.channels;

-  float const * s = receiveSamples(p->tdstretch, NULL, &odone);

+  float const * s = stretch_output(p->stretch, NULL, &odone);

   for (i = 0; i < odone * effp->ininfo.channels; ++i)

     *obuf++ = SOX_FLOAT_32BIT_TO_SAMPLE(*s++, effp->clips);

   if (*isamp && odone < *osamp) {

-    float * t = putSamples(p->tdstretch, NULL, *isamp / effp->ininfo.channels);

+    float * t = stretch_input(p->stretch, NULL, *isamp / effp->ininfo.channels);

     for (i = *isamp; i; --i)

       *t++ = SOX_SAMPLE_TO_FLOAT_32BIT(*ibuf++, effp->clips);

-    processSamples(p->tdstretch);

+    stretch_process_windows(p->stretch);

   else *isamp = 0;

@@ -677,13 +364,13 @@

 static int drain(sox_effect_t * effp, sox_ssample_t * obuf, sox_size_t * osamp)

   static sox_size_t isamp = 0;

-  flush(((priv_t *)effp->priv)->tdstretch);

+  stretch_flush(((priv_t *)effp->priv)->stretch);

   return flow(effp, 0, obuf, &isamp, osamp);

 static int stop(sox_effect_t * effp)

-  deleteTDStretch(((priv_t *)effp->priv)->tdstretch);

+  stretch_delete(((priv_t *)effp->priv)->stretch);

   return SOX_SUCCESS;

@@ -690,7 +377,7 @@

 sox_effect_handler_t const * sox_tempo_effect_fn(void)

   static sox_effect_handler_t handler = {

-    "tempo", "[-l] factor [window-ms [seek-ms [overlap-ms]]]",

+    "tempo", "[-q] factor [window-ms [seek-ms [overlap-ms]]]",

     SOX_EFF_MCHAN | SOX_EFF_LENGTH,

     getopts, start, flow, drain, stop, NULL

};

--

⑨