shithub: sox

--- a/soxeffect.7

+++ b/soxeffect.7

@@ -412,6 +412,22 @@

.SP

 See also \fBfilter\fR for filters with a steeper roll-off.

.TP

+\fBkey \fR[\fB\-q\fR] \fIshift\fR [\fIwindow\fR [\fIseek\fR [\fIoverlap\fR]]]

+Change the audio key (i.e. pitch but not tempo) using the SoundTouch [8]

+algorithm.

+.SP

+.I shift

+gives the key shift in `cents' (i.e. 100ths of a semitone).  See the

+.B

+tempo

+effect for a description of the other parameters.

+.SP

+Note: This effect works with only mono or stereo audio.

+.SP

+See also

+.B pitch

+for a similar effect.

+.TP

 \fBladspa\fR \fBmodule\fR [\fBplugin\fR] [\fBargument\fR...]

 Apply a LADSPA [5] (Linux Audio Developer's Simple Plugin API) plugin.

 Despite the name, LADSPA is not Linux-specific, and a wide range of

@@ -1034,6 +1050,57 @@

 \fIp3\fR (trapezium): the percentage through each cycle at which `falling'

 ends; default=60.

.TP

+\fBtempo \fR[\fB\-l\fR] \fIfactor\fR [\fIwindow\fR [\fIseek\fR [\fIoverlap\fR]]]

+Change the audio tempo (but not its pitch) using the SoundTouch [8] algorithm.

+.SP

+The optional

+.B \-l

+parameter selects a linear seek for a more accurate but slower version of the

+algorithm.

+.SP

+.I factor

+gives the ratio of new tempo to the old tempo.

+.SP

+The optional

+.I window

+parameter gives the length in milliseconds (default 82) of a single

+processing sequence.  This determines to how long sequences the original

+sound is chopped in the time-stretch algorithm.  The larger this value

+is, the fewer sequences are used in processing.  In principle a bigger

+value sounds better when slowing down tempo, but worse when increasing

+tempo and vice versa.  Increasing this value reduces computational

+burden & vice versa.

+.SP

+The optional

+.I seek

+parameter gives the seeking window length in milliseconds (default 14)

+for the algorithm to find the best possible overlapping location.  This

+determines from how wide window the algorithm may look for an optimal

+joining location when mixing the sound sequences back together.  The

+bigger this window setting is, the higher the possibility to find a

+better mixing position will become, but at the same time large values

+may cause a "drifting" artifact because consequent sequences will be

+taken at more uneven intervals.  If there's a disturbing artifact that

+sounds as if a constant frequency was drifting around, try reducing this

+setting.  Increasing this value increases computational burden & vice

+versa.

+.SP

+The optional

+.I overlap

+parameter gives the overlap length in milliseconds (default 12).  When

+the chopped sound sequences are mixed back together, to form a

+continuous sound stream, this parameter defines over how long period the

+two consecutive sequences are let to overlap each other.  This shouldn't

+be that critical parameter. If you reduce the \fIwindow\fR  setting by a

+large amount, you might wish to try a smaller value on this.  Increasing

+this value increases computational burden & vice versa.

+.SP

+Note: This effect works with only mono or stereo audio.

+.SP

+See also

+.B stretch

+for a similar effect.

+.TP

 \fBtreble \fIgain\fR [\fIfrequency\fR [\fIwidth\fR[\fBs\fR\^|\^\fBh\fR\^|\^\fBo\fR\^|\^\fBq\fR]]]

 Apply a treble tone-control effect.

 See the description of the \fBbass\fR effect for details.

@@ -1210,7 +1277,7 @@

[5]

 Richard Furse,

 .IR "Linux Audio Developer's Simple Plugin API" ,

-http://www.ladspa.org/

+http://www.ladspa.org

.TP

[6]

 Richard Furse,

@@ -1220,7 +1287,12 @@

[7]

 Steve Harris,

 .IR "LADSPA plugins" ,

-http://plugin.org.uk/

+http://plugin.org.uk

+.TP

+[8]

+Olli Parviainen,

+.IR "SoundTouch Audio Processing Library" ,

+http://www.surina.net/soundtouch

 .SH AUTHORS

 Chris Bagwell (cbagwell@users.sourceforge.net).

 Other authors and contributors are listed in the AUTHORS file that

--- a/src/CMakeLists.txt

+++ b/src/CMakeLists.txt

@@ -1,5 +1,4 @@

 include(CheckIncludeFiles)

-include(CheckIncludeFileCXX)

 include(CheckFunctionExists)

 include(CheckLibraryExists)

 include(TestBigEndian)

@@ -42,13 +41,6 @@

 check_include_files("sys/timeb.h"        HAVE_SYS_TIMEB_H)

 check_include_files("unistd.h"           HAVE_UNISTD_H)

-check_include_file_cxx("soundtouch/SoundTouch.h" HAVE_LIBSOUNDTOUCH)

-if (HAVE_LIBSOUNDTOUCH)

-  set(optional_srcs ${optional_srcs} tempo)

-  set(optional_libs ${optional_libs} SoundTouch)

-endif (HAVE_LIBSOUNDTOUCH)

 check_function_exists("fseeko"           HAVE_FSEEKO)

 check_function_exists("getopt_long"      HAVE_GETOPT_LONG)

 check_function_exists("gettimeofday"     HAVE_GETTIMEOFDAY)

@@ -106,103 +98,30 @@

   DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/soxstdint.h.cmake

+# Format with: !xargs echo|tr ' ' '\n'|sort|column|expand|sed 's/^/  /'

 set(effects_srcs

-  biquad

-  biquads

-  chorus

-  compand

-  compandt

-  dcshift

-  dither

-  earwax

-  echo

-  echos

-  effects

-  fade

-  FFT

-  filter

-  flanger

-  mcompand

-  mixer

-  noiseprof

-  noisered

-  pad

-  pan

-  phaser

-  pitch

-  polyphas

-  rate

-  repeat

-  resample

-  reverb

-  reverse

-  silence

-  skeleff

-  speed

-  stat

-  stretch

-  swap

-  synth

-  tremolo

-  trim

-  vibro

-  vol

+  biquad          echos           noiseprof       resample        synth

+  biquads         effects         noisered        reverb          tempo

+  chorus          fade            pad             reverse         tremolo

+  compand         FFT             pan             silence         trim

+  compandt        filter          phaser          skeleff         vibro

+  dcshift         flanger         pitch           speed           vol

+  dither          key             polyphas        stat

+  earwax          mcompand        rate            stretch

+  echo            mixer           repeat          swap

 set(formats_srcs

-  8svx

-  adpcm

-  adpcms

-  aifc-fmt

-  aiff

-  aiff-fmt

-  al-fmt

-  au

-  auto

-  avr

-  cdr

-  cvsd

-  cvsd-fmt

-  dat

-  dvms-fmt

-  formats

-  g711

-  g721

-  g723_24

-  g723_40

-  g72x

-  gsm

-  hcom

-  ima-fmt

-  ima_rw

-  la-fmt

-  lpc10.c

-  lu-fmt

-  maud

-  nulfile

-  prc

-  raw

-  raw-fmt

-  s1-fmt

-  s2-fmt

-  s3-fmt

-  s4-fmt

-  sf

-  skelform

-  smp

-  sndrtool

-  sphere

-  tx16w

-  u1-fmt

-  u2-fmt

-  u3-fmt

-  u4-fmt

-  ul-fmt

-  voc

-  vox

-  vox-fmt

-  wav

-  wve

-  xa

+  8svx            cvsd            hcom            s1-fmt          u2-fmt

+  adpcm           cvsd-fmt        ima-fmt         s2-fmt          u3-fmt

+  adpcms          dat             ima_rw          s3-fmt          u4-fmt

+  aifc-fmt        dvms-fmt        la-fmt          s4-fmt          ul-fmt

+  aiff            formats         lpc10.c         sf              voc

+  aiff-fmt        g711            lu-fmt          skelform        vox

+  al-fmt          g721            maud            smp             vox-fmt

+  au              g723_24         nulfile         sndrtool        wav

+  auto            g723_40         prc             sphere          wve

+  avr             g72x            raw             tx16w           xa

+  cdr             gsm             raw-fmt         u1-fmt

 add_library(lib${PROJECT_NAME}

   getopt

@@ -218,9 +137,6 @@

 add_executable(${PROJECT_NAME} ${PROJECT_NAME}.c)

 target_link_libraries(${PROJECT_NAME} lib${PROJECT_NAME} lpc10 ${optional_libs})

-if (HAVE_LIBSOUNDTOUCH)

-  set_target_properties(${PROJECT_NAME} PROPERTIES LINKER_LANGUAGE CXX)

-endif (HAVE_LIBSOUNDTOUCH)

 add_executable(sox_sample_test sox_sample_test.c)

 add_custom_target(rec ALL ln -sf sox rec DEPENDS sox)

 add_custom_target(play ALL ln -sf sox play DEPENDS sox)

--- a/src/Makefile.am

+++ b/src/Makefile.am

@@ -172,11 +172,11 @@

 libsfx_la_SOURCES = band.h biquad.c biquad.h biquads.c chorus.c compand.c	\

 	  compandt.c compandt.h dcshift.c dither.c earwax.c echo.c echos.c	\

-	  effects.c effects.h fade.c FFT.c FFT.h filter.c flanger.c ladspa.c	\

-	  mcompand.c mixer.c noiseprof.c noisered.c noisered.h pad.c	\

+	  effects.c effects.h fade.c FFT.c FFT.h filter.c flanger.c key.c \

+	  ladspa.c mcompand.c mixer.c noiseprof.c noisered.c noisered.h pad.c \

 	  pan.c phaser.c pitch.c polyphas.c rabbit.c rate.c repeat.c	\

 	  resample.c reverb.c reverse.c silence.c skeleff.c speed.c	\

-	  stat.c stretch.c swap.c synth.c tremolo.c trim.c vibro.c	\

+	  stat.c stretch.c swap.c synth.c tempo.c tremolo.c trim.c vibro.c \

 	  vol.c

 libsfx_la_CFLAGS = @SAMPLERATE_CFLAGS@

 libsfx_la_LIBADD = @SAMPLERATE_LIBS@ libsox.la

--- a/src/effects.h

+++ b/src/effects.h

@@ -20,9 +20,7 @@

   EFFECT(flanger)

   EFFECT(highpass)

   EFFECT(highp)

-#ifdef HAVE_LIBSOUNDTOUCH

   EFFECT(key)

-#endif

 #ifdef HAVE_LADSPA_H

   EFFECT(ladspa)

 #endif

@@ -54,9 +52,7 @@

   EFFECT(stretch)

   EFFECT(swap)

   EFFECT(synth)

-#ifdef HAVE_LIBSOUNDTOUCH

   EFFECT(tempo)

-#endif

   EFFECT(treble)

   EFFECT(tremolo)

   EFFECT(trim)

--- /dev/null

+++ b/src/key.c

@@ -1,0 +1,53 @@

+/*

+ * Effect: change the audio key (i.e. change pitch but not tempo)

+ *

+ * Copyright (c) 2007 robs@users.sourceforge.net

+ *

+ * This library is free software; you can redistribute it and/or modify it

+ * under the terms of the GNU Lesser General Public License as published by

+ * the Free Software Foundation; either version 2 of the License, or (at

+ * your option) any later version.

+ *

+ * This library is distributed in the hope that it will be useful, but

+ * WITHOUT ANY WARRANTY; without even the implied warranty of

+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser

+ * General Public License for more details.

+ *

+ * You should have received a copy of the GNU Lesser General Public License

+ * along with this library.  If not, write to the Free Software Foundation,

+ * Fifth Floor, 51 Franklin Street, Boston, MA 02111-1301, USA.

+ *

+ *

+ * Adjustment is given as a number of cents (100ths of a semitone) to

+ * change.  Implementation comprises a tempo change (performed by tempo)

+ * and a speed change performed by whichever resampling effect is in effect.

+ */

+#include "sox_i.h"

+#include <math.h>

+#include <string.h>

+static int getopts(sox_effect_t * effp, int argc, char **argv)

+{

+  double d;

+  char dummy, arg[100];

+  int pos = (argc && !strcmp(*argv, "-l"))? 1 : 0;

+  if (argc <= pos || sscanf(argv[pos], "%lf %c", &d, &dummy) != 1)

+    return sox_usage(effp);

+  effp->global_info->speed *= d = pow(2., d / 1200);  /* cents --> factor */

+  sprintf(arg, "%g", 1 / d);

+  argv[pos] = arg;

+  return sox_tempo_effect_fn()->getopts(effp, argc, argv);

+}

+sox_effect_handler_t const * sox_key_effect_fn(void)

+{

+  static sox_effect_handler_t handler;

+  handler = *sox_tempo_effect_fn();

+  handler.name = "key";

+  handler.usage = "[-l] shift-in-cents [window-ms [seek-ms [overlap-ms]]]",

+  handler.getopts = getopts;

+  return &handler;

+}

--- /dev/null

+++ b/src/tempo.c

@@ -1,0 +1,698 @@

+/*

+ * Effect: change the audio tempo (but not key)

+ *

+ * Copyright (c) 2007 robs@users.sourceforge.net

+ *

+ * This library is free software; you can redistribute it and/or modify it

+ * under the terms of the GNU Lesser General Public License as published by

+ * the Free Software Foundation; either version 2 of the License, or (at

+ * your option) any later version.

+ *

+ * This library is distributed in the hope that it will be useful, but

+ * WITHOUT ANY WARRANTY; without even the implied warranty of

+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser

+ * General Public License for more details.

+ *

+ * You should have received a copy of the GNU Lesser General Public License

+ * along with this library.  If not, write to the Free Software Foundation,

+ * Fifth Floor, 51 Franklin Street, Boston, MA 02111-1301, USA.

+ */

+/* Addressible FIFO buffer */

+#include "sox_i.h"

+#include "xmalloc.h"

+#include <string.h>

+typedef struct {

+  char * data;

+  size_t allocation;   /* Number of bytes allocated for data. */

+  size_t item_size;    /* Size of each item in data */

+  size_t begin;        /* Offset of the first byte to read. */

+  size_t end;          /* 1 + Offset of the last byte byte to read. */

+} fifo_t;

+#define FIFO_MIN 0x4000

+static void fifo_clear(fifo_t * f)

+{

+  f->end = f->begin = 0;

+}

+static void * fifo_reserve(fifo_t * f, size_t n)

+{

+  n *= f->item_size;

+  if (f->begin == f->end)

+    fifo_clear(f);

+  while (1) {

+    if (f->end + n <= f->allocation) {

+      void *p = (char *) f->data + f->end;

+      f->end += n;

+      return p;

+    }

+    if (f->begin > FIFO_MIN) {

+      memmove(f->data, f->data + f->begin, f->end - f->begin);

+      f->end -= f->begin;

+      f->begin = 0;

+      continue;

+    }

+    f->allocation += n;

+    f->data = xrealloc(f->data, f->allocation);

+  }

+}

+static void * fifo_write(fifo_t * f, size_t n, void const * data)

+{

+  void * s = fifo_reserve(f, n);

+  if (data)

+    memcpy(s, data, n * f->item_size);

+  return s;

+}

+static void fifo_trim(fifo_t * f, size_t n)

+{

+  n *= f->item_size;

+  f->end = f->begin + n;

+}

+static size_t fifo_occupancy(fifo_t * f)

+{

+  return (f->end - f->begin) / f->item_size;

+}

+static void * fifo_read(fifo_t * f, size_t n, void * data)

+{

+  char * ret = f->data + f->begin;

+  n *= f->item_size;

+  if (n > f->end - f->begin)

+    return NULL;

+  if (data)

+    memcpy(data, ret, n);

+  f->begin += n;

+  return ret;

+}

+#define fifo_read_ptr(f) fifo_read(f, 0, NULL)

+static void fifo_delete(fifo_t * f)

+{

+  free(f->data);

+}

+static void fifo_create(fifo_t * f, size_t item_size)

+{

+  f->item_size = item_size;

+  f->allocation = FIFO_MIN;

+  f->data = xmalloc(f->allocation);

+  fifo_clear(f);

+}

+/*

+ * Change tempo (alter duration, maintain pitch) using a time domain

+ * WSOLA-like method.  Based on TDStretch.cpp revision 1.24 from The

+ * SoundTouch Library Copyright (c) Olli Parviainen 2001-2005.

+ */

+#include <string.h>

+#include <assert.h>

+#ifndef max

+#define max(a, b) ((a) >= (b) ? (a) : (b))

+#endif

+typedef enum {FALSE, TRUE} BOOL;

+typedef struct {

+  size_t samples_in;

+  size_t samples_out;

+  double factor;

+  size_t channels;

+  size_t sampleReq;

+  float * pMidBuffer;

+  float * pRefMidBuffer;

+  float * pRefMidBufferUnaligned;

+  size_t overlapLength;

+  size_t seekLength;

+  size_t seekWindowLength;

+  size_t maxOffset;

+  double nominalSkip;

+  double skipFract;

+  fifo_t outputBuffer;

+  fifo_t inputBuffer;

+  BOOL bQuickseek;

+  BOOL bMidBufferDirty;

+} TDStretch;

+static void clearMidBuffer(TDStretch * p)

+{

+  if (p->bMidBufferDirty) {

+    memset((p->pMidBuffer), 0, 2 * sizeof(float) * p->overlapLength);

+    p->bMidBufferDirty = FALSE;

+  }

+}

+static void clearInput(TDStretch * p)

+{

+  p->samples_in = 0;

+  fifo_clear(&p->inputBuffer);

+  clearMidBuffer(p);

+}

+static void clear(TDStretch * p)

+{

+  fifo_clear(&p->outputBuffer);

+  fifo_clear(&p->inputBuffer);

+  clearMidBuffer(p);

+}

+/* Slopes the amplitude of the 'midBuffer' samples so that cross correlation */

+/* is faster to calculate */

+static void precalcCorrReferenceMono(TDStretch * p)

+{

+  int i;

+  float temp;

+  for (i = 0; i < (int) p->overlapLength; i++) {

+    temp = (float) i *(float) (p->overlapLength - i);

+    (p->pRefMidBuffer)[i] = (float) ((p->pMidBuffer)[i] * temp);

+  }

+}

+static void precalcCorrReferenceStereo(TDStretch * p)

+{

+  int i, cnt2;

+  float temp;

+  for (i = 0; i < (int) p->overlapLength; i++) {

+    temp = (float) i *(float) (p->overlapLength - i);

+    cnt2 = i * 2;

+    (p->pRefMidBuffer)[cnt2] = (float) ((p->pMidBuffer)[cnt2] * temp);

+    (p->pRefMidBuffer)[cnt2 + 1] = (float) ((p->pMidBuffer)[cnt2 + 1] * temp);

+  }

+}

+static double calcCrossCorrMono(

+    TDStretch * p, const float * mixingPos, const float * compare)

+{

+  double corr = 0;

+  size_t i = 0;

+  /* Loop optimisation: */

+  #define _ corr += mixingPos[i] * compare[i], ++i;

+  do {_ _ _ _ _ _ _ _} while (i < p->overlapLength);

+  #undef _

+  return corr;

+}

+static double calcCrossCorrStereo(

+    TDStretch * p, const float * mixingPos, const float * compare)

+{

+  double corr = 0;

+  size_t i = 0;

+  /* Loop optimisation: */

+  #define _ corr += mixingPos[i]*compare[i] + mixingPos[i+1]*compare[i+1], i+=2;

+  do {_ _ _ _ _ _ _ _} while (i < 2 * p->overlapLength);

+  #undef _

+  return corr;

+}

+/* Seeks for the optimal overlap-mixing position.  The best position is

+ * determined as the position where the two overlapped sample sequences are

+ * 'most alike', in terms of the highest cross-correlation value over the

+ * overlapping period.  4 variants exist for mono/stereo, quick/accurate */

+static size_t seekBestOverlapPositionMono(

+    TDStretch * p, const float * refPos)

+{

+  size_t bestOffs;

+  double bestCorr, corr;

+  size_t tempOffset;

+  const float *compare;

+  /* Slopes the amplitude of the 'midBuffer' samples */

+  precalcCorrReferenceMono(p);

+  bestCorr = INT_MIN;

+  bestOffs = 0;

+  /* Scans for the best correlation value by testing each possible position */

+  /* over the permitted range. */

+  for (tempOffset = 0; tempOffset < p->seekLength; tempOffset++) {

+    compare = refPos + tempOffset;

+    /* Calculates correlation value for the mixing position corresponding */

+    /* to 'tempOffset' */

+    corr = calcCrossCorrMono(p, p->pRefMidBuffer, compare);

+    /* Checks for the highest correlation value */

+    if (corr > bestCorr) {

+      bestCorr = corr;

+      bestOffs = tempOffset;

+    }

+  }

+  return bestOffs;

+}

+static size_t seekBestOverlapPositionStereo(TDStretch * p,

+                                            const float * refPos)

+{

+  size_t bestOffs;

+  double bestCorr, corr;

+  size_t i;

+  precalcCorrReferenceStereo(p);

+  bestCorr = INT_MIN;

+  bestOffs = 0;

+  for (i = 0; i < p->seekLength; i++) {

+    corr = calcCrossCorrStereo(p, refPos + 2 * i, p->pRefMidBuffer);

+    if (corr > bestCorr) {

+      bestCorr = corr;

+      bestOffs = i;

+    }

+  }

+  return bestOffs;

+}

+/* Table for the quick hierarchical mixing position seeking algorithm */

+static int const scanOffsets[4][24] = {

+  { 124,  186,  248,  310,  372,  434,  496,  558,  620,  682,  744, 806,

+    868,  930,  992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488,   0},

+  {-100,  -75,  -50,  -25,   25,   50,   75,  100,    0,    0,    0,   0,

+      0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},

+  { -20,  -15,  -10,   -5,    5,   10,   15,   20,    0,    0,    0,   0,

+      0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},

+  {  -4,   -3,   -2,   -1,    1,    2,    3,    4,    0,    0,    0,   0,

+      0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0}};

+static size_t seekBestOverlapPositionMonoQuick(TDStretch * p,

+                                               const float * refPos)

+{

+  size_t j;

+  size_t bestOffs;

+  double bestCorr, corr;

+  size_t scanCount, corrOffset, tempOffset;

+  /* Slopes the amplitude of the 'midBuffer' samples */

+  precalcCorrReferenceMono(p);

+  bestCorr = INT_MIN;

+  bestOffs = 0;

+  corrOffset = 0;

+  tempOffset = 0;

+  /* Scans for the best correlation value using four-pass hierarchical

+   * search.  The look-up table 'scans' has hierarchical position adjusting

+   * steps.  In first pass the routine searhes for the highest correlation

+   * with relatively coarse steps, then rescans the neighbourhood of the

+   * highest correlation with better resolution and so on. */

+  for (scanCount = 0; scanCount < 4; scanCount++) {

+    j = 0;

+    while (scanOffsets[scanCount][j]) {

+      tempOffset = corrOffset + scanOffsets[scanCount][j];

+      if (tempOffset >= p->seekLength)

+        break;

+      /* Calculates correlation value for the mixing position corresponding */

+      /* to 'tempOffset' */

+      corr = calcCrossCorrMono(p, refPos + tempOffset, (p->pRefMidBuffer));

+      /* Checks for the highest correlation value */

+      if (corr > bestCorr) {

+        bestCorr = corr;

+        bestOffs = tempOffset;

+      }

+      j++;

+    }

+    corrOffset = bestOffs;

+  }

+  return bestOffs;

+}

+static size_t seekBestOverlapPositionStereoQuick(TDStretch * p,

+                                                 const float * refPos)

+{

+  size_t j;

+  size_t bestOffs;

+  double bestCorr, corr;

+  size_t scanCount, corrOffset, tempOffset;

+  precalcCorrReferenceStereo(p);

+  bestCorr = INT_MIN;

+  bestOffs = 0;

+  corrOffset = 0;

+  tempOffset = 0;

+  for (scanCount = 0; scanCount < 4; scanCount++) {

+    j = 0;

+    while (scanOffsets[scanCount][j]) {

+      tempOffset = corrOffset + scanOffsets[scanCount][j];

+      if (tempOffset >= p->seekLength)

+        break;

+      corr =

+          calcCrossCorrStereo(p, refPos + 2 * tempOffset, p->pRefMidBuffer);

+      if (corr > bestCorr) {

+        bestCorr = corr;

+        bestOffs = tempOffset;

+      }

+      j++;

+    }

+    corrOffset = bestOffs;

+  }

+  return bestOffs;

+}

+static size_t seekBestOverlapPosition(TDStretch * p,

+                                      const float * refPos)

+{

+  if (p->channels == 2) {

+    if (p->bQuickseek)

+      return seekBestOverlapPositionStereoQuick(p, refPos);

+    return seekBestOverlapPositionStereo(p, refPos);

+  } else if (p->bQuickseek)

+    return seekBestOverlapPositionMonoQuick(p, refPos);

+  return seekBestOverlapPositionMono(p, refPos);

+}

+/* Overlaps samples in 'midBuffer' with the samples in 'input' */

+static void overlapMono(TDStretch * p, float * output,

+                        const float * input)

+{

+  int i, itemp;

+  for (i = 0; i < (int) p->overlapLength; i++) {

+    itemp = p->overlapLength - i;

+    output[i] = (input[i] * i + (p->pMidBuffer)[i] * itemp) / p->overlapLength;

+  }

+}

+static void overlapStereo(TDStretch * p, float * output,

+                          const float * input)

+{

+  int i;

+  size_t cnt2;

+  float fTemp;

+  float fScale;

+  float fi;

+  fScale = 1.0f / (float) p->overlapLength;

+  for (i = 0; i < (int) p->overlapLength; i++) {

+    fTemp = (float) (p->overlapLength - i) * fScale;

+    fi = (float) i *fScale;

+    cnt2 = 2 * i;

+    output[cnt2 + 0] = input[cnt2 + 0] * fi + p->pMidBuffer[cnt2 + 0] * fTemp;

+    output[cnt2 + 1] = input[cnt2 + 1] * fi + p->pMidBuffer[cnt2 + 1] * fTemp;

+  }

+}

+/* Overlaps samples in 'midBuffer' with the samples in 'inputBuffer' at

+ * position of 'ovlPos'. */

+static inline void overlap(TDStretch * p, float * output,

+                           const float * input, size_t ovlPos)

+{

+  if (p->channels == 2)

+    overlapStereo(p, output, input + 2 * ovlPos);

+  else

+    overlapMono(p, output, input + ovlPos);

+}

+/* Processes as many processing frames of the samples 'inputBuffer', store */

+/* the result into 'outputBuffer' */

+static void processSamples(TDStretch * p)

+{

+  size_t ovlSkip, offset, temp;

+  if (p->bMidBufferDirty == FALSE) {

+    /* if midBuffer is empty, move the first samples of the input stream

+     * into it */

+    if (fifo_occupancy(&p->inputBuffer) < p->overlapLength)

+      return;   /* wait until we've got p->overlapLength samples */

+    fifo_read(&p->inputBuffer, p->overlapLength, p->pMidBuffer);

+    p->bMidBufferDirty = TRUE;

+  }

+  /* Process samples as long as there are enough samples in 'inputBuffer'

+   * to form a processing frame. */

+  while (fifo_occupancy(&p->inputBuffer) >= p->sampleReq) {

+    /* If tempo differs from the normal SCALE, scan for the best overlapping

+     * position */

+    offset = seekBestOverlapPosition(p, fifo_read_ptr(&p->inputBuffer));

+    /* Mix the samples in the 'inputBuffer' at position of 'offset' with the

+     * samples in 'midBuffer' using sliding overlapping ... first partially

+     * overlap with the end of the previous sequence (that's in 'midBuffer') */

+    overlap(p, fifo_reserve(&p->outputBuffer, p->overlapLength),

+            fifo_read_ptr(&p->inputBuffer), offset);

+    /* ... then copy sequence samples from 'inputBuffer' to output */

+    temp = (p->seekWindowLength - 2 * p->overlapLength);    /* & 0xfffffffe; */

+    if ((int)temp > 0) {

+      fifo_write(&p->outputBuffer, temp,

+                 (float *) fifo_read_ptr(&p->inputBuffer) +

+                 p->channels * (offset + p->overlapLength));

+    }

+    /* Copies the end of the current sequence from 'inputBuffer' to

+     * 'midBuffer' for being mixed with the beginning of the next

+     * processing sequence and so on */

+    assert(offset + p->seekWindowLength <= fifo_occupancy(&p->inputBuffer));

+    memcpy(p->pMidBuffer,

+           (float *) fifo_read_ptr(&p->inputBuffer) +

+           p->channels * (offset + p->seekWindowLength - p->overlapLength),

+           p->channels * sizeof(float) * p->overlapLength);

+    p->bMidBufferDirty = TRUE;

+    /* Remove the processed samples from the input buffer. Update

+     * the difference between integer & nominal skip step to 'p->skipFract'

+     * in order to prevent the error from accumulating over time. */

+    p->skipFract += p->nominalSkip;     /* real skip size */

+    ovlSkip = (int) p->skipFract;       /* rounded to integer skip */

+    p->skipFract -= ovlSkip;    /* maintain the fraction part, i.e. real vs. integer skip */

+    fifo_read(&p->inputBuffer, ovlSkip, NULL);

+  }

+}

+/* Set new overlap length parameter & reallocate RefMidBuffer if necessary. */

+static void acceptNewOverlapLength(TDStretch * p, size_t newOverlapLength)

+{

+  size_t prevOvl;

+  prevOvl = p->overlapLength;

+  p->overlapLength = newOverlapLength;

+  if (p->overlapLength > prevOvl) {

+    free(p->pMidBuffer);

+    free(p->pRefMidBufferUnaligned);

+    p->pMidBuffer = xcalloc(p->overlapLength * 2, sizeof(float));

+    p->bMidBufferDirty = TRUE;

+    clearMidBuffer(p);

+    p->pRefMidBufferUnaligned = xcalloc(

+        2 * p->overlapLength + 16 / sizeof(float), sizeof(float));

+    /* For efficiency, align 'pRefMidBuffer' to 16 byte boundary */

+    p->pRefMidBuffer = (float *)

+      ((((unsigned long) (p->pRefMidBufferUnaligned)) + 15ul) & ~15ul);

+  }

+}

+/*  Sets routine control parameters. These control are certain time constants

+ *  defining how the sound is stretched to the desired duration.

+ *    'sampleRate' = sample rate of the sound

+ *    'sequenceMS' = one processing sequence length in milliseconds

+ *    'seekwindowMS' = seeking window length for scanning the best overlapping

+ *       position

+ *    'overlapMS' = overlapping length

+ *    'tempo' = 1 for no change, < 1 for slower, > 1 for faster.

+ *    'quickSeek' = whether to use a quick seek for the best overlapping

+ *    position.

+ */

+static void setParameters(TDStretch * p, double sampleRate, double tempo,

+    double sequenceMs, double seekWindowMs, double overlapMs, BOOL quickSeek)

+{

+  size_t newOvl;

+  size_t intskip;

+  p->factor = tempo;

+  p->bQuickseek = quickSeek;

+  p->maxOffset = p->seekLength = sampleRate * seekWindowMs / 1000 + .5;

+  p->seekWindowLength = sampleRate * sequenceMs / 1000 + .5;

+  newOvl = max(sampleRate * overlapMs / 1000 + 4.5, 16);

+  newOvl &= ~7; /* must be divisible by 8 */

+  acceptNewOverlapLength(p, newOvl);

+  /* Calculate ideal skip length (according to tempo value)  */

+  p->nominalSkip = tempo * (p->seekWindowLength - p->overlapLength);

+  p->skipFract = 0;

+  intskip = (int) (p->nominalSkip + 0.5);

+  /* Calculate how many samples are needed in the 'inputBuffer' to  */

+  /* process another batch of samples */

+  p->sampleReq =

+      max(intskip + p->overlapLength, p->seekWindowLength) + p->maxOffset;

+}

+static float * putSamples(TDStretch * p, float const *samples, size_t n)

+{

+  p->samples_in += n;

+  return fifo_write(&p->inputBuffer, n, samples);

+}

+static float const * receiveSamples(

+    TDStretch * p, float * samples, size_t * n)

+{

+  p->samples_out += *n = min(*n, fifo_occupancy(&p->outputBuffer));

+  return fifo_read(&p->outputBuffer, *n, samples);

+}

+/* Flushes the last samples from the processing pipeline to the output.

+ * Clears also the internal processing buffers.

+ *

+ * Note: This function is meant for extracting the last samples of a sound

+ * stream. This function may introduce additional blank samples in the end

+ * of the sound stream, and thus it's not recommended to call this function

+ * in the middle of a sound stream. */

+static void flush(TDStretch * p)

+{

+  size_t samples_out = p->samples_in / p->factor + .5;

+  if (p->samples_out < samples_out) {

+    size_t remaining = p->samples_in / p->factor + .5 - p->samples_out;

+    float buff[128];

+    memset(buff, 0, sizeof(buff));

+    while (fifo_occupancy(&p->outputBuffer) < remaining) {

+      putSamples(p, buff, sizeof(buff)/sizeof(buff[0])/p->channels);

+      processSamples(p);

+    }

+    fifo_trim(&p->outputBuffer, remaining);

+    clearInput(p);

+  }

+}

+static void deleteTDStretch(TDStretch * p)

+{

+  free(p->pMidBuffer);

+  free(p->pRefMidBufferUnaligned);

+  fifo_delete(&p->outputBuffer);

+  fifo_delete(&p->inputBuffer);

+  free(p);

+}

+static TDStretch * newTDStretch(size_t channels)

+{

+  TDStretch * p = xcalloc(1, sizeof(*p));

+  p->channels = channels;

+  fifo_create(&p->inputBuffer, p->channels * sizeof(float));

+  fifo_create(&p->outputBuffer, p->channels * sizeof(float));

+  return p;

+}

+/*

+ * libSoX tempo effect: adjust the audio tempo (but not key)

+ *

+ * Adjustment is given as the ratio of the new tempo to the old tempo.

+ */

+#include "sox_i.h"

+#include <math.h>

+typedef struct tempo {

+  TDStretch   * tdstretch;

+  sox_bool    quick_seek;

+  double      factor, sequence_ms, seek_window_ms, overlap_ms;

+} priv_t;

+assert_static(sizeof(struct tempo) <= SOX_MAX_EFFECT_PRIVSIZE,

+              /* else */ tempo_PRIVSIZE_too_big);

+static int getopts(sox_effect_t * effp, int argc, char **argv)

+{

+  priv_t * p = (priv_t *) effp->priv;

+  p->sequence_ms    = 82; /* Set non-zero defaults: */

+  p->seek_window_ms = 14;

+  p->overlap_ms     = 12;

+  p->quick_seek = !argc || strcmp(*argv, "-l") || (--argc, ++argv, sox_false);

+  do {                    /* break-able block */

+    NUMERIC_PARAMETER(factor        ,0.25, 4  )

+    NUMERIC_PARAMETER(sequence_ms   , 10 , 120)

+    NUMERIC_PARAMETER(seek_window_ms, 7  , 28 )

+    NUMERIC_PARAMETER(overlap_ms    , 6  , 24 )

+  } while (0);

+  sox_debug("factor:%g sequence:%g seek:%g overlap:%g quick:%i", p->factor,

+      p->sequence_ms, p->seek_window_ms, p->overlap_ms, p->quick_seek);

+  return argc || !p->factor? sox_usage(effp) : SOX_SUCCESS;

+}

+static int start(sox_effect_t * effp)

+{

+  priv_t * p = (priv_t *) effp->priv;

+  if (p->factor == 1)

+    return SOX_EFF_NULL;

+  if (effp->ininfo.channels > 2) {

+    sox_fail("supports only mono or stereo audio");

+    return SOX_EOF;

+  }

+  p->tdstretch = newTDStretch(effp->ininfo.channels);

+  setParameters(p->tdstretch, effp->ininfo.rate, p->factor, p->sequence_ms,

+                p->seek_window_ms, p->overlap_ms, p->quick_seek);

+  return SOX_SUCCESS;

+}

+static int flow(sox_effect_t * effp, const sox_ssample_t * ibuf,

+                sox_ssample_t * obuf, sox_size_t * isamp, sox_size_t * osamp)

+{

+  priv_t * p = (priv_t *) effp->priv;

+  sox_size_t i;

+  sox_size_t odone = *osamp /= effp->ininfo.channels;

+  float const * s = receiveSamples(p->tdstretch, NULL, &odone);

+  for (i = 0; i < odone * effp->ininfo.channels; ++i)

+    *obuf++ = SOX_FLOAT_32BIT_TO_SAMPLE(*s++, effp->clips);

+  if (*isamp && odone < *osamp) {

+    float * t = putSamples(p->tdstretch, NULL, *isamp / effp->ininfo.channels);

+    for (i = *isamp; i; --i)

+      *t++ = SOX_SAMPLE_TO_FLOAT_32BIT(*ibuf++, effp->clips);

+    processSamples(p->tdstretch);

+  }

+  else *isamp = 0;

+  *osamp = odone * effp->ininfo.channels;

+  return SOX_SUCCESS;

+}

+static int drain(sox_effect_t * effp, sox_ssample_t * obuf, sox_size_t * osamp)

+{

+  static sox_size_t isamp = 0;

+  flush(((priv_t *)effp->priv)->tdstretch);

+  return flow(effp, 0, obuf, &isamp, osamp);

+}

+static int stop(sox_effect_t * effp)

+{

+  deleteTDStretch(((priv_t *)effp->priv)->tdstretch);

+  return SOX_SUCCESS;

+}

+sox_effect_handler_t const * sox_tempo_effect_fn(void)

+{

+  static sox_effect_handler_t handler = {

+    "tempo", "[-l] factor [window-ms [seek-ms [overlap-ms]]]",

+    SOX_EFF_MCHAN | SOX_EFF_LENGTH,

+    getopts, start, flow, drain, stop, NULL

+  };

+  return &handler;

+}

--- a/src/tempo.c++

+++ /dev/null

@@ -1,156 +1,0 @@

-/*

- * This library is free software; you can redistribute it and/or modify it

- * under the terms of the GNU Lesser General Public License as published by

- * the Free Software Foundation; either version 2 of the License, or (at

- * your option) any later version.

- *

- * This library is distributed in the hope that it will be useful, but

- * WITHOUT ANY WARRANTY; without even the implied warranty of

- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser

- * General Public License for more details.

- *

- * You should have received a copy of the GNU Lesser General Public License

- * along with this library.  If not, write to the Free Software Foundation,

- * Fifth Floor, 51 Franklin Street, Boston, MA 02111-1301, USA.

- */

-/*

- * (c) 2007 robs@users.sourceforge.net

- *

- * libSoX tempo effect: adjust the audio tempo (but not key)

- *

- * Adjustment is given as the ratio of the new tempo to the old tempo.

- *

- * libSoX key effect: adjust the audio pitch (but not tempo)

- *

- * Adjustment is given as a number of cents (100ths of a semitone) to

- * change.  Implementation comprises a tempo change (performed by tempo)

- * and a speed change performed by whichever resampling effect is in effect.

- */

-#include <soundtouch/SoundTouch.h>

-extern "C" {

-#include "sox_i.h"

-#include <math.h>

-#include <string.h>

-typedef struct tempo

-{

-  soundtouch::SAMPLETYPE * buffer;

-  soundtouch::SoundTouch * sound_touch;

-  double factor;

-} * tempo_t;

-assert_static(sizeof(struct tempo) <= SOX_MAX_EFFECT_PRIVSIZE,

-              /* else */ tempo_PRIVSIZE_too_big);

-static int create(sox_effect_t * effp, int n, char * * argv)

-{

-  tempo_t p = (tempo_t) effp->priv;

-  char dummy;

-  if (n == 1 && sscanf(*argv, "%lf %c", &p->factor, &dummy) == 1 && p->factor >=0.05 && p->factor <= 20)

-    return SOX_SUCCESS;

-  return sox_usage(effp);

-}

-static int start(sox_effect_t * effp)

-{

-  tempo_t p = (tempo_t) effp->priv;

-  if (!p->factor)

-    return SOX_EFF_NULL;

-  p->buffer = new soundtouch::SAMPLETYPE[effp->global_info->global_info->bufsiz];

-  p->sound_touch = new soundtouch::SoundTouch;

-  p->sound_touch->setSampleRate(static_cast<uint>(effp->ininfo.rate + 0.5));

-  p->sound_touch->setTempoChange(100 / p->factor - 100);

-  p->sound_touch->setChannels(1);

-  p->sound_touch->setPitchSemiTones(0);

-  p->sound_touch->setRateChange(0);

-  p->sound_touch->setSetting(SETTING_USE_AA_FILTER, 0);

-  return SOX_SUCCESS;

-}

-static int flow(sox_effect_t * effp, const sox_ssample_t * ibuf, sox_ssample_t * obuf,

-                sox_size_t * isamp, sox_size_t * osamp)

-{

-  tempo_t p = (tempo_t) effp->priv;

-  sox_size_t i;

-  sox_size_t idone = 0;

-  sox_size_t odone = p->sound_touch->receiveSamples(p->buffer, *osamp);

-  for (i = 0; i < odone; ++i)

-    obuf[i] = SOX_FLOAT_32BIT_TO_SAMPLE(p->buffer[i], effp->clips);

-  if (odone < *osamp)

-  if (*isamp && odone < *osamp) {

-    for (i = 0; i < *isamp; ++i)

-      p->buffer[i] = SOX_SAMPLE_TO_FLOAT_32BIT(ibuf[i], effp->clips);

-    p->sound_touch->putSamples(p->buffer, idone = *isamp);

-  }

-  *isamp = idone;

-  *osamp = odone;

-  return SOX_SUCCESS;

-}

-static int drain(sox_effect_t * effp, sox_ssample_t * obuf, sox_size_t * osamp)

-{

-  static sox_size_t isamp = 0;

-  tempo_t p = (tempo_t) effp->priv;

-  p->sound_touch->flush();

-  return flow(effp, 0, obuf, &isamp, osamp);

-}

-static int stop(sox_effect_t * effp)

-{

-  tempo_t p = (tempo_t) effp->priv;

-  delete p->sound_touch;

-  delete[] p->buffer;

-  return SOX_SUCCESS;

-}

-sox_effect_handler_t const *sox_tempo_effect_fn(void)

-{

-  static sox_effect_handler_t handler = {

-    "tempo", "factor", SOX_EFF_LENGTH,

-    create, start, flow, drain, stop, 0};

-  return &handler;

-}

-static int key_create(sox_effect_t * effp, int argc, char * * argv)

-{

-  double d;

-  char dummy, arg[100];

-  char * args[10];

-  sox_size_t nargs = 0;

-  if (!argc || sscanf(*argv, "%lf %c", &d, &dummy) != 1)

-    return sox_usage(effp);

-  d = pow(2., d/1200);

-  effp->global_info->speed *= d;

-  sprintf(arg, "%g", d);

-  args[nargs++] = arg;

-  ++argv, --argc;

-  return argc ? sox_usage(effp) :

-    sox_tempo_effect_fn()->getopts(effp, nargs, args);

-}

-sox_effect_handler_t const * sox_key_effect_fn(void)

-{

-  static sox_effect_handler_t handler;

-  handler = *sox_tempo_effect_fn();

-  handler.name = "key";

-  handler.usage = "shift-in-cents";

-  handler.getopts = key_create;

-  return &handler;

-}

-} // extern "C"

--

⑨