shithub: sox

ref: 29647996bc9726866a073a2125247df99244290b
dir: /src/bend.c/

View raw version
/* libSoX effect: Pitch Bend   (c) 2008 robs@users.sourceforge.net
 *
 * This library is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or (at
 * your option) any later version.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
 * General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this library; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */

/* Portions based on http://www.dspdimension.com/download smbPitchShift.cpp:
 *
 * COPYRIGHT 1999-2006 Stephan M. Bernsee <smb [AT] dspdimension [DOT] com>
 *
 *             The Wide Open License (WOL)
 *
 * Permission to use, copy, modify, distribute and sell this software and its
 * documentation for any purpose is hereby granted without fee, provided that
 * the above copyright notice and this license appear in all source copies. 
 * THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF
 * ANY KIND. See http://www.dspguru.com/wol.htm for more information.
 */

#include "sox_i.h"
#include "getopt.h"

static void smbFft(float *fftBuffer, long fftFrameSize, long sign)
/* 
 * FFT routine, (C)1996 S.M.Bernsee. Sign = -1 is FFT, 1 is iFFT (inverse)
 * Fills fftBuffer[0...2*fftFrameSize-1] with the Fourier transform of the
 * time domain data in fftBuffer[0...2*fftFrameSize-1]. The FFT array takes
 * and returns the cosine and sine parts in an interleaved manner, ie.
 * fftBuffer[0] = cosPart[0], fftBuffer[1] = sinPart[0], asf. fftFrameSize
 * must be a power of 2. It expects a complex input signal (see footnote 2),
 * ie. when working with 'common' audio signals our input signal has to be
 * passed as {in[0],0.,in[1],0.,in[2],0.,...} asf. In that case, the transform
 * of the frequencies of interest is in fftBuffer[0...fftFrameSize].
 */
{
  float wr, wi, arg, *p1, *p2, temp;
  float tr, ti, ur, ui, *p1r, *p1i, *p2r, *p2i;
  long i, bitm, j, le, le2, k;

  for (i = 2; i < 2 * fftFrameSize - 2; i += 2) {
    for (bitm = 2, j = 0; bitm < 2 * fftFrameSize; bitm <<= 1) {
      if (i & bitm)
        j++;
      j <<= 1;
    }
    if (i < j) {
      p1 = fftBuffer + i;
      p2 = fftBuffer + j;
      temp = *p1;
      *(p1++) = *p2;
      *(p2++) = temp;
      temp = *p1;
      *p1 = *p2;
      *p2 = temp;
    }
  }
  for (k = 0, le = 2; k < (long) (log((double) fftFrameSize) / log(2.) + .5);
       k++) {
    le <<= 1;
    le2 = le >> 1;
    ur = 1.0;
    ui = 0.0;
    arg = M_PI / (le2 >> 1);
    wr = cos(arg);
    wi = sign * sin(arg);
    for (j = 0; j < le2; j += 2) {
      p1r = fftBuffer + j;
      p1i = p1r + 1;
      p2r = p1r + le2;
      p2i = p2r + 1;
      for (i = j; i < 2 * fftFrameSize; i += le) {
        tr = *p2r * ur - *p2i * ui;
        ti = *p2r * ui + *p2i * ur;
        *p2r = *p1r - tr;
        *p2i = *p1i - ti;
        *p1r += tr;
        *p1i += ti;
        p1r += le;
        p1i += le;
        p2r += le;
        p2i += le;
      }
      tr = ur * wr - ui * wi;
      ui = ur * wi + ui * wr;
      ur = tr;
    }
  }
}

#define MAX_FRAME_LENGTH 8192

typedef struct {
  unsigned nbends;       /* Number of bends requested */
  struct {
    char *str;           /* Command-line argument to parse for this bend */
    size_t start;        /* Start bending when in_pos equals this */
    double cents;
    size_t duration;     /* Number of samples to bend */
  } *bends;

  unsigned frame_rate;
  size_t in_pos;         /* Number of samples read from the input stream */
  unsigned bends_pos;    /* Number of bends completed so far */

  double shift;

  float gInFIFO[MAX_FRAME_LENGTH];
  float gOutFIFO[MAX_FRAME_LENGTH];
  float gFFTworksp[2 * MAX_FRAME_LENGTH];
  float gLastPhase[MAX_FRAME_LENGTH / 2 + 1];
  float gSumPhase[MAX_FRAME_LENGTH / 2 + 1];
  float gOutputAccum[2 * MAX_FRAME_LENGTH];
  float gAnaFreq[MAX_FRAME_LENGTH];
  float gAnaMagn[MAX_FRAME_LENGTH];
  float gSynFreq[MAX_FRAME_LENGTH];
  float gSynMagn[MAX_FRAME_LENGTH];
  long gRover;
  int fftFrameSize, ovsamp;
} priv_t;

static int parse(sox_effect_t * effp, char **argv, sox_rate_t rate)
{
  priv_t *p = (priv_t *) effp->priv;
  size_t i, time = 0, delay;
  char const *next;

  for (i = 0; i < p->nbends; ++i) {
    if (argv)   /* 1st parse only */
      p->bends[i].str = lsx_strdup(argv[i]);
    next = lsx_parsesamples(rate, p->bends[i].str, &delay, 't');
    if (next == NULL || *next != ',')
      break;
    p->bends[i].start = time += delay;
    p->bends[i].cents = strtod(next + 1, (char **)&next);
    if (p->bends[i].cents == 0 || *next != ',')
      break;
    next = lsx_parsesamples(rate, next + 1, &p->bends[i].duration, 't');
    if (next == NULL || *next != '\0')
      break;
    time += p->bends[i].duration;
  }
  if (i < p->nbends)
    return lsx_usage(effp);
  return SOX_SUCCESS;
}

static int create(sox_effect_t * effp, int argc, char **argv)
{
  priv_t *p = (priv_t *) effp->priv;
  char const * opts = "f:o:";
  int c;

  p->frame_rate = 25;
  p->ovsamp = 16;
  while ((c = getopt(argc, argv, opts)) != -1) switch (c) {
    GETOPT_NUMERIC('f', frame_rate, 10 , 80)
    GETOPT_NUMERIC('o', ovsamp,  4 , 32)
    default: sox_fail("unknown option `-%c'", optopt); return lsx_usage(effp);
  }
  argc -= optind, argv += optind;

  p->bends = lsx_calloc(p->nbends = argc, sizeof(*p->bends));
  return parse(effp, argv, 1e6);     /* No rate yet; parse with dummy */
}

static int start(sox_effect_t * effp)
{
  priv_t *p = (priv_t *) effp->priv;
  unsigned i;

  int n = effp->in_signal.rate / p->frame_rate + .5;
  for (p->fftFrameSize = 2; n > 2; p->fftFrameSize <<= 1, n >>= 1);
  p->shift = 1;
  parse(effp, 0, effp->in_signal.rate); /* Re-parse now rate is known */
  p->in_pos = p->bends_pos = 0;
  for (i = 0; i < p->nbends; ++i)
    if (p->bends[i].duration)
      return SOX_SUCCESS;
  return SOX_EFF_NULL;
}

static int flow(sox_effect_t * effp, const sox_sample_t * ibuf,
                sox_sample_t * obuf, size_t * isamp, size_t * osamp)
{
  priv_t *p = (priv_t *) effp->priv;
  size_t i, len = *isamp = *osamp = min(*isamp, *osamp);
  double magn, phase, tmp, window, real, imag;
  double freqPerBin, expct;
  long k, qpd, index, inFifoLatency, stepSize, fftFrameSize2;
  float pitchShift = p->shift;

  /* set up some handy variables */
  fftFrameSize2 = p->fftFrameSize / 2;
  stepSize = p->fftFrameSize / p->ovsamp;
  freqPerBin = effp->in_signal.rate / p->fftFrameSize;
  expct = 2. * M_PI * (double) stepSize / (double) p->fftFrameSize;
  inFifoLatency = p->fftFrameSize - stepSize;
  if (!p->gRover)
    p->gRover = inFifoLatency;

  /* main processing loop */
  for (i = 0; i < len; i++) {
    ++p->in_pos;

    /* As long as we have not yet collected enough data just read in */
    p->gInFIFO[p->gRover] = SOX_SAMPLE_TO_FLOAT_32BIT(ibuf[i], effp->clips);
    obuf[i] = SOX_FLOAT_32BIT_TO_SAMPLE(
        p->gOutFIFO[p->gRover - inFifoLatency], effp->clips);
    p->gRover++;

    /* now we have enough data for processing */
    if (p->gRover >= p->fftFrameSize) {
      if (p->bends_pos != p->nbends && p->in_pos >=
          p->bends[p->bends_pos].start + p->bends[p->bends_pos].duration) {
        pitchShift = p->shift *= pow(2., p->bends[p->bends_pos].cents / 1200);
        ++p->bends_pos;
      }
      if (p->bends_pos != p->nbends && p->in_pos >= p->bends[p->bends_pos].start) {
        double progress = (double)(p->in_pos - p->bends[p->bends_pos].start) /
             p->bends[p->bends_pos].duration;
        progress = 1 - cos(M_PI * progress);
        progress *= p->bends[p->bends_pos].cents * (.5 / 1200);
        pitchShift = p->shift * pow(2., progress);
      }

      p->gRover = inFifoLatency;

      /* do windowing and re,im interleave */
      for (k = 0; k < p->fftFrameSize; k++) {
        window = -.5 * cos(2 * M_PI * k / (double) p->fftFrameSize) + .5;
        p->gFFTworksp[2 * k] = p->gInFIFO[k] * window;
        p->gFFTworksp[2 * k + 1] = 0.;
      }

      /* ***************** ANALYSIS ******************* */
      smbFft(p->gFFTworksp, p->fftFrameSize, -1);

      /* this is the analysis step */
      for (k = 0; k <= fftFrameSize2; k++) {
        /* de-interlace FFT buffer */
        real = p->gFFTworksp[2 * k];
        imag = p->gFFTworksp[2 * k + 1];

        /* compute magnitude and phase */
        magn = 2. * sqrt(real * real + imag * imag);
        phase = atan2(imag, real);

        /* compute phase difference */
        tmp = phase - p->gLastPhase[k];
        p->gLastPhase[k] = phase;

        tmp -= (double) k *expct; /* subtract expected phase difference */

        /* map delta phase into +/- Pi interval */
        qpd = tmp / M_PI;
        if (qpd >= 0)
          qpd += qpd & 1;
        else qpd -= qpd & 1;
        tmp -= M_PI * (double) qpd;

        /* get deviation from bin frequency from the +/- Pi interval */
        tmp = p->ovsamp * tmp / (2. * M_PI);

        /* compute the k-th partials' true frequency */
        tmp = (double) k *freqPerBin + tmp * freqPerBin;

        /* store magnitude and true frequency in analysis arrays */
        p->gAnaMagn[k] = magn;
        p->gAnaFreq[k] = tmp;

      }

      /* this does the actual pitch shifting */
      memset(p->gSynMagn, 0, p->fftFrameSize * sizeof(float));
      memset(p->gSynFreq, 0, p->fftFrameSize * sizeof(float));
      for (k = 0; k <= fftFrameSize2; k++) {
        index = k * pitchShift;
        if (index <= fftFrameSize2) {
          p->gSynMagn[index] += p->gAnaMagn[k];
          p->gSynFreq[index] = p->gAnaFreq[k] * pitchShift;
        }
      }

      for (k = 0; k <= fftFrameSize2; k++) { /* SYNTHESIS */
        /* get magnitude and true frequency from synthesis arrays */
        magn = p->gSynMagn[k], tmp = p->gSynFreq[k];
        tmp -= (double) k *freqPerBin; /* subtract bin mid frequency */
        tmp /= freqPerBin; /* get bin deviation from freq deviation */
        tmp = 2. * M_PI * tmp / p->ovsamp; /* take p->ovsamp into account */
        tmp += (double) k *expct; /* add the overlap phase advance back in */
        p->gSumPhase[k] += tmp; /* accumulate delta phase to get bin phase */
        phase = p->gSumPhase[k];
        /* get real and imag part and re-interleave */
        p->gFFTworksp[2 * k] = magn * cos(phase);
        p->gFFTworksp[2 * k + 1] = magn * sin(phase);
      }

      for (k = p->fftFrameSize + 2; k < 2 * p->fftFrameSize; k++)
        p->gFFTworksp[k] = 0.; /* zero negative frequencies */

      smbFft(p->gFFTworksp, p->fftFrameSize, 1); /* do inverse transform */

      /* do windowing and add to output accumulator */
      for (k = 0; k < p->fftFrameSize; k++) {
        window =
            -.5 * cos(2. * M_PI * (double) k / (double) p->fftFrameSize) + .5;
        p->gOutputAccum[k] +=
            2. * window * p->gFFTworksp[2 * k] / (fftFrameSize2 * p->ovsamp);
      }
      for (k = 0; k < stepSize; k++)
        p->gOutFIFO[k] = p->gOutputAccum[k];

      memmove(p->gOutputAccum, /* shift accumulator */
          p->gOutputAccum + stepSize, p->fftFrameSize * sizeof(float));

      for (k = 0; k < inFifoLatency; k++) /* move input FIFO */
        p->gInFIFO[k] = p->gInFIFO[k + stepSize];
    }
  }
  return SOX_SUCCESS;
}

static int stop(sox_effect_t * effp)
{
  priv_t *p = (priv_t *) effp->priv;

  if (p->bends_pos != p->nbends)
    sox_warn("Input audio too short; bends not applied: %u",
        p->nbends - p->bends_pos);
  return SOX_SUCCESS;
}

static int kill(sox_effect_t * effp)
{
  priv_t *p = (priv_t *) effp->priv;
  unsigned i;

  for (i = 0; i < p->nbends; ++i)
    free(p->bends[i].str);
  free(p->bends);
  return SOX_SUCCESS;
}

sox_effect_handler_t const *sox_bend_effect_fn(void)
{
  static sox_effect_handler_t handler = {
    "bend", "{delay,cents,duration}", SOX_EFF_GETOPT,
    create, start, flow, 0, stop, kill, sizeof(priv_t)
  };
  return &handler;
}