shithub: sox

ref: 77ec7a16652b4fbb88d3706397d961ee06265b4d
dir: /src/pitch.c/

View raw version
/* (c) Fabien Coelho <fabien@coelho.net> 03/2000 for sox.
 *
 * pitch shifting.
 *
 * This library is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or (at
 * your option) any later version.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
 * General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this library; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * I found a code on the Computer Music Journal web site
 * <http://mitpress.mit.edu/e-journals/Computer_Music_Journal/>
 * for pitch shifting the AD 1848 PC soundcards, with
 * a lot of (unclear) pointer and integer arithmetics, and
 * combine effects (feedback, delay, mixing).
 *
 * I tried to understand the code, dropped the other effects,
 * translated the stuff in float so it's easier to understand,
 * drop one of the lookup tables (I know that sin(pi/2-x) = cos(x)),
 * and added interpolation and fade options of my own.
 * cross fading is always symetric.
 *
 * Basically, the algorithm performs a resampling at the desire rate
 * to achieve the shift (interpolation function) on small overlapping windows,
 * and successive windows are faded in/out one into the other to
 * rebuild the final signal.
 *
 * I'm quite disappointed. At first thought, I looked for an FT-based
 * algorithm, something like "switch the signal to frequencies, shift
 * frequencies, and come back to time", but it does not seem to work
 * that way... at least not so easily. Or maybe my attempt was buggy.
 *
 * Here is the result. It can certainly be improved.
 * The result buzzes some time.
 * Lot of options available so than one can adjust the result.
 *
 * so as to lower the pitch, a larger window sounds better (30ms)?
 * so as to upper the pitch, a smaller window... (10ms)?
 *
 * Some speed-optimization could be added at code size expanse/expense?
 */

#include "sox_i.h"

#include <stdlib.h>
#include <string.h>

/* cross fading options for transitions
 */
#define PITCH_FADE_COS 0 /* cosine */
#define PITCH_FADE_HAM 1 /* Hamming */
#define PITCH_FADE_LIN 2 /* linear */
#define PITCH_FADE_TRA 3 /* trapezoid */

#define PITCH_FADE_DEFAULT PITCH_FADE_COS

/* interpolation options
 */
#define PITCH_INTERPOLE_CUB 0 /* cubic */
#define PITCH_INTERPOLE_LIN 1 /* linear */

#define PITCH_INTERPOLE_DEFAULT PITCH_INTERPOLE_CUB

/* default window width
 */
#define PITCH_DEFAULT_WIDTH ((double)(20.0e0)) /* 20 ms */

/* linear factors for the Hamming window
   0<=i<=n: HAM_n(i) = HAM0 + HAM1*cos(i*PI/n)
 */
#define HAM1               ((double)(0.46e0))
#define HAM0               ((double)(0.54e0))

/* state of buffer management... */
typedef enum { pi_input, pi_compute, pi_output } pitch_state_t;

/* structure hold by the effect descriptor. */
typedef struct {
    /* OPTIONS
     */
    double shift;   /* shift in cents, >0 to treble, <0 to bass */

    double width;   /* sweep size in ms */

    int interopt;        /* interpole option */

    int fadeopt;         /* fade option */
    double coef;    /* coefficient used by trapezoid */
    /* what about coef1/coef2 for hamming... */

    /* COMPUTATION
     */
    double rate;    /* sweep rate, around 1.0 */

    unsigned int step;   /* size of half a sweep, rounded to integer... */
    double * fade;  /* fading factors table lookup, ~ 1.0 -> ~ 0.0 */

    int overlap;         /* needed overlap */

    double * tmp;   /* temporary buffer */
    double * acc;   /* accumulation buffer */

    unsigned int iacc;   /* part of acc already output */

    sox_size_t size;      /* size of buffer for processing chunks. */
    unsigned int index;  /* index of next empty input item. */
    sox_sample_t *buf;    /* bufferize input */

    pitch_state_t state; /* buffer management status. */

} priv_t;

/* // debug functions

static char * fadeoptname(int opt)
{
    switch (opt)
    {
    case PITCH_FADE_COS: return "cosine";
    case PITCH_FADE_HAM: return "hamming";
    case PITCH_FADE_LIN: return "linear";
    case PITCH_FADE_TRA: return "trapezoid";
    default: return "UNEXPECTED";
    }
}

static void debug(priv_t * pitch, char * where)
{
  sox_debug("%s: ind=%d sz=%ld step=%d o=%d rate=%f ia=%d st=%d fo=%s",
  where, pitch->index, pitch->size, pitch->step, pitch->overlap,
  pitch->rate, pitch->iacc, pitch->state, fadeoptname(pitch->fadeopt));
}
*/

/* compute f(x) as a linear interpolation...
 */
static double lin(
  double f0,  /* f(0)  */
  double f1,  /* f(1)  */
  double x)   /* 0.0 <= x < 1.0 */
{
    return f0 * (1.0 - x) + f1 * x;
}

/* compute f(x) as a cubic function...
 */
static double cub(
  double fm1, /* f(-1) */
  double f0,  /* f(0)  */
  double f1,  /* f(1)  */
  double f2,  /* f(2)  */
  double x)   /* 0.0 <= x < 1.0 */
{
    /* a x^3 + b x^2 + c x + d */
    register double a, b, c, d;

    d = f0;
    b = 0.5 * (f1+fm1) - f0;
    a = (1.0/6.0) * (f2-f1+fm1-f0-4.0*b);
    c = f1 - a - b - d;

    return ((a * x + b) * x + c) * x + d;
}

/* interpolate a quarter (half a window)
 *
 * ibuf buffer of ilen length is swept at rate speed.
 * result put in output buffer obuf of size olen.
 */
static void interpolation(
  priv_t * pitch,
  const sox_sample_t *ibuf, sox_size_t ilen,
  double * out, sox_size_t olen,
  double rate) /* signed */
{
    register int i, size;
    register double index;

    size = pitch->step; /* size == olen? */

    if (rate>0) /* sweep forwards */
    {
        for (index=0.0, i=0; i<(int)olen; i++, index+=rate)
        {
            register int ifl = (int) index; /* FLOOR */
            register double frac = index - ifl;

            if (pitch->interopt==PITCH_INTERPOLE_LIN)
                out[i] = lin((double) ibuf[ifl],
                             (double) ibuf[ifl+1],
                             frac);
            else
                out[i] = cub((double) ibuf[ifl-1],
                             (double) ibuf[ifl],
                             (double) ibuf[ifl+1],
                             (double) ibuf[ifl+2],
                             frac);
        }
    }
    else /* rate < 0, sweep backwards */
    {
        for (index=ilen-1, i=olen-1; i>=0; i--, index+=rate)
        {
            register int ifl = (int) index; /* FLOOR */
            register double frac = index - ifl;

            if (pitch->interopt==PITCH_INTERPOLE_LIN)
                out[i] = lin((double) ibuf[ifl],
                             (double) ibuf[ifl+1],
                             frac);
            else
                out[i] = cub((double) ibuf[ifl-1],
                             (double) ibuf[ifl],
                             (double) ibuf[ifl+1],
                             (double) ibuf[ifl+2],
                             frac);
        }
    }
}

/* from input buffer to acc
 */
static void process_intput_buffer(priv_t * pitch)
{
    register int i, len = pitch->step;

    /* forwards sweep */
    interpolation(pitch,
                  pitch->buf+pitch->overlap, pitch->step+pitch->overlap,
                  pitch->tmp, pitch->step,
                  pitch->rate);

    for (i=0; i<len; i++)
        pitch->acc[i] = pitch->fade[i]*pitch->tmp[i];

    /* backwards sweep */
    interpolation(pitch,
                  pitch->buf, pitch->step+pitch->overlap,
                  pitch->tmp, pitch->step,
                  -pitch->rate);

    for (i=0; i<len; i++)
        pitch->acc[i] += pitch->fade[pitch->step-i-1]*pitch->tmp[i];
}

/*
 * Process options
 */
static int sox_pitch_getopts(sox_effect_t * effp, int n, char **argv)
{
    priv_t * pitch = (priv_t *) effp->priv;

    /* get pitch shift */
    pitch->shift = 0.0; /* default is no change */

    if (n && !sscanf(argv[0], "%lf", &pitch->shift))
      return lsx_usage(effp);

    /* sweep size in ms */
    pitch->width = PITCH_DEFAULT_WIDTH;
    if (n>1 && !sscanf(argv[1], "%lf", &pitch->width))
      return lsx_usage(effp);

    /* interpole option */
    pitch->interopt = PITCH_INTERPOLE_DEFAULT;
    if (n>2)
    {
        switch(argv[2][0])
        {
        case 'l':
        case 'L':
            pitch->interopt = PITCH_INTERPOLE_LIN;
            break;
        case 'c':
        case 'C':
            pitch->interopt = PITCH_INTERPOLE_CUB;
            break;
        default:
            return lsx_usage(effp);
        }
    }

    /* fade option */
    pitch->fadeopt = PITCH_FADE_DEFAULT; /* default */
    if (n>3)
    {
        switch (argv[3][0]) /* what a parser;-) */
        {
        case 'l':
        case 'L':
            pitch->fadeopt = PITCH_FADE_LIN;
            break;
        case 't':
        case 'T':
            pitch->fadeopt = PITCH_FADE_TRA;
            break;
        case 'h':
        case 'H':
            pitch->fadeopt = PITCH_FADE_HAM;
            break;
        case 'c':
        case 'C':
            pitch->fadeopt = PITCH_FADE_COS;
            break;
        default:
            return lsx_usage(effp);
        }
    }

    pitch->coef = 0.25;
    if (n>4 && (!sscanf(argv[4], "%lf", &pitch->coef) ||
                pitch->coef<0.0 || pitch->coef>0.5))
      return lsx_usage(effp);

    return SOX_SUCCESS;
}

/*
 * Start processing
 */
static int sox_pitch_start(sox_effect_t * effp)
{
    priv_t * pitch = (priv_t *) effp->priv;
    register int sample_rate = effp->out_signal.rate;
    unsigned int i;

    /* computer inner stuff... */

    pitch->state = pi_input;

    /* Should I trust pow?
     * BTW, the twelfth root of two is 1.0594630943592952645618252,
     * if we consider an equal temperament.
     */
    pitch->rate = pow(2.0, pitch->shift/1200.0);

    /* size is half of the actual target window size, because of symetry.
     */
    pitch->step = ((pitch->width*(0.0005))*sample_rate);

    /* make size odd? do we care? */
    /* if (!(size & 1)) size++; */

    /* security for safe cubic interpolation */
    if (pitch->rate > 1.0)
        pitch->overlap = (int) ((pitch->rate-1.0)*pitch->step) + 2;
    else
        pitch->overlap = 2;

    pitch->size = pitch->step + 2*pitch->overlap;

    pitch->fade = lsx_malloc(pitch->step*sizeof(double));
    pitch->tmp  = lsx_malloc(pitch->step*sizeof(double));
    pitch->acc  = lsx_malloc(pitch->step*sizeof(double));
    pitch->buf  = lsx_malloc(pitch->size*sizeof(sox_sample_t));
    pitch->index = pitch->overlap;

    /* default initial signal */
    for (i=0; i<pitch->size; i++)
        pitch->buf[i] = 0;

    if (pitch->fadeopt == PITCH_FADE_HAM)
    {
        /* does it make sense to have such an option? */
        register double pi_step = M_PI / (pitch->step-1);

        for (i=0; i<pitch->step; i++)
            pitch->fade[i] = (double) (HAM0 + HAM1*cos(pi_step*i));
    }
    else if (pitch->fadeopt == PITCH_FADE_COS)
    {
        register double pi_2_step = M_PI_2 / (pitch->step-1);

        pitch->fade[0] = 1.0; /* cos(0) == 1.0 */
        for (i=1; i<pitch->step-1; i++)
            pitch->fade[i]  = (double) cos(pi_2_step*i);
        pitch->fade[pitch->step-1] = 0.0; /* cos(PI/2) == 0.0 */
    }
    else if (pitch->fadeopt == PITCH_FADE_LIN)
    {
        register double stepth = 1.0 / (pitch->step-1);

        pitch->fade[0] = 1.0;
        for (i=1; i<pitch->step-1; i++)
            pitch->fade[i] = (pitch->step-i-1) * stepth;
        pitch->fade[pitch->step-1] = 0.0;
    }
    else if (pitch->fadeopt == PITCH_FADE_TRA)
    {
        /* 0 <= coef <= 0.5 */
        register unsigned int plat = (int) (pitch->step*pitch->coef);
        register double slope = 1.0 / (pitch->step - 2*plat);

        for (i=0; i<plat; i++)
            pitch->fade[i] = 1.0;

        for (; i<pitch->step-plat; i++)
            pitch->fade[i] = slope * (pitch->step-plat-i-1);

        for (; i<pitch->step; i++)
            pitch->fade[i] = 0.0;
    }
    else
    {
        sox_fail("unexpected PITCH_FADE parameter %d", pitch->fadeopt);
        return SOX_EOF;
    }

    if (pitch->shift == 0)
      return SOX_EFF_NULL;

    return SOX_SUCCESS;
}

/* Processes input.
 */
static int sox_pitch_flow(sox_effect_t * effp, const sox_sample_t *ibuf, sox_sample_t *obuf,
                sox_size_t *isamp, sox_size_t *osamp)
{
    priv_t * pitch = (priv_t *) effp->priv;
    int i, size;
    sox_size_t len, iindex, oindex;

    size = pitch->size;
    /* size to process */
    len = min(*isamp, *osamp);
    iindex = 0;
    oindex = 0;

    /* warning:
       because of the asynchronous nature of buffering,
       the output index can reach the buffer limits before full consumption.
       I put the input index just in case.
       If the code is correct, eithier len or iindex is redundant.
    */
    while (len>0 && iindex<*isamp && oindex<*osamp)
    {
        if (pitch->state == pi_input)
        {
            register int tocopy = min(pitch->size-pitch->index, len);

            memcpy(pitch->buf+pitch->index, ibuf+iindex, tocopy*sizeof(sox_sample_t));

            len -= tocopy;
            pitch->index += tocopy;
            iindex += tocopy;

            if (pitch->index==pitch->size)
                pitch->state = pi_compute;
        }

        if (pitch->state == pi_compute)
        {
            process_intput_buffer(pitch);
            pitch->state = pi_output;
            pitch->iacc = 0;
        }

        if (pitch->state == pi_output)
        {
            int toout = min(*osamp-oindex, pitch->step-pitch->iacc);

            for (i=0; i<toout; i++)
            {
                float f;

                f = pitch->acc[pitch->iacc++];
                SOX_SAMPLE_CLIP_COUNT(f, effp->clips);
                obuf[oindex++] = f;
            }

            if (pitch->iacc == pitch->step)
            {
                pitch->state = pi_input;

                /* shift input buffer. memmove? */
                for (i=0; i<2*pitch->overlap; i++)
                    pitch->buf[i] = pitch->buf[i+pitch->step];

                pitch->index = 2*pitch->overlap;
            }
        }
    }

    /* report consumption. */
    *isamp = iindex;
    *osamp = oindex;

    return SOX_SUCCESS;
}

/* at the end...
 */
static int sox_pitch_drain(sox_effect_t * effp, sox_sample_t *obuf, sox_size_t *osamp)
{
    priv_t * pitch = (priv_t *) effp->priv;
    sox_size_t i;

    if (pitch->state == pi_input)
    {
        /* complete input buffer content with 0. */
        for (i=pitch->index; i<pitch->size; i++)
            pitch->buf[i] = 0;

        pitch->state = pi_compute;
    }

    if (pitch->state == pi_compute)
    {
        process_intput_buffer(pitch);
        pitch->state = pi_output;
        pitch->iacc = 0;
    }

    /* (pitch->state == pi_output) */
    for (i=0; i<*osamp && i<pitch->index-pitch->overlap;)
    {
        float f;

        f = pitch->acc[pitch->iacc++];
        SOX_SAMPLE_CLIP_COUNT(f, effp->clips);
        obuf[i++] = f;
    }

    /* report... */
    *osamp = i;

    if ((pitch->index - pitch->overlap) == 0)
        return SOX_EOF;
    else
        return SOX_SUCCESS;
}

/*
 * Do anything required when you stop reading samples.
 * Don't close input file!
 */
static int sox_pitch_stop(sox_effect_t * effp)
{
    priv_t * pitch = (priv_t *) effp->priv;

    free(pitch->fade);
    free(pitch->tmp);
    free(pitch->acc);
    free(pitch->buf);

    return SOX_SUCCESS;
}

static sox_effect_handler_t sox_pitch_effect = {
  "pitch",
  "shift width interpole fade\n"
  "       (in cents, in ms, cub/lin, cos/ham/lin/trap)"
  "       (defaults: 0 20 c c)",
  SOX_EFF_LENGTH | SOX_EFF_DEPRECATED,
  sox_pitch_getopts,
  sox_pitch_start,
  sox_pitch_flow,
  sox_pitch_drain,
  sox_pitch_stop,
  NULL, sizeof(priv_t)
};

const sox_effect_handler_t *sox_pitch_effect_fn(void)
{
    return &sox_pitch_effect;
}