ref: 42f8ff432b68d90bdd981394a582a86a528b6482
dir: /src/tempo.c/
/* * Effect: change the audio tempo (but not key) * * Copyright (c) 2007 robs@users.sourceforge.net * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2 of the License, or (at * your option) any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this library. If not, write to the Free Software Foundation, * Fifth Floor, 51 Franklin Street, Boston, MA 02111-1301, USA. */ /* Addressible FIFO buffer */ #include "sox_i.h" #include "xmalloc.h" #include <string.h> typedef struct { char * data; size_t allocation; /* Number of bytes allocated for data. */ size_t item_size; /* Size of each item in data */ size_t begin; /* Offset of the first byte to read. */ size_t end; /* 1 + Offset of the last byte byte to read. */ } fifo_t; #define FIFO_MIN 0x4000 static void fifo_clear(fifo_t * f) { f->end = f->begin = 0; } static void * fifo_reserve(fifo_t * f, size_t n) { n *= f->item_size; if (f->begin == f->end) fifo_clear(f); while (1) { if (f->end + n <= f->allocation) { void *p = (char *) f->data + f->end; f->end += n; return p; } if (f->begin > FIFO_MIN) { memmove(f->data, f->data + f->begin, f->end - f->begin); f->end -= f->begin; f->begin = 0; continue; } f->allocation += n; f->data = xrealloc(f->data, f->allocation); } } static void * fifo_write(fifo_t * f, size_t n, void const * data) { void * s = fifo_reserve(f, n); if (data) memcpy(s, data, n * f->item_size); return s; } static void fifo_trim(fifo_t * f, size_t n) { n *= f->item_size; f->end = f->begin + n; } static size_t fifo_occupancy(fifo_t * f) { return (f->end - f->begin) / f->item_size; } static void * fifo_read(fifo_t * f, size_t n, void * data) { char * ret = f->data + f->begin; n *= f->item_size; if (n > f->end - f->begin) return NULL; if (data) memcpy(data, ret, n); f->begin += n; return ret; } #define fifo_read_ptr(f) fifo_read(f, 0, NULL) static void fifo_delete(fifo_t * f) { free(f->data); } static void fifo_create(fifo_t * f, size_t item_size) { f->item_size = item_size; f->allocation = FIFO_MIN; f->data = xmalloc(f->allocation); fifo_clear(f); } /* * Change tempo (alter duration, maintain pitch) using a time domain * WSOLA-like method. Based on TDStretch.cpp revision 1.24 from The * SoundTouch Library Copyright (c) Olli Parviainen 2001-2005. */ #include <string.h> #include <assert.h> #ifndef max #define max(a, b) ((a) >= (b) ? (a) : (b)) #endif typedef enum {FALSE, TRUE} BOOL; typedef struct { size_t samples_in; size_t samples_out; double factor; size_t channels; size_t sampleReq; float * pMidBuffer; float * pRefMidBuffer; float * pRefMidBufferUnaligned; size_t overlapLength; size_t seekLength; size_t seekWindowLength; size_t maxOffset; double nominalSkip; double skipFract; fifo_t outputBuffer; fifo_t inputBuffer; BOOL bQuickseek; BOOL bMidBufferDirty; } TDStretch; static void clearMidBuffer(TDStretch * p) { if (p->bMidBufferDirty) { memset((p->pMidBuffer), 0, 2 * sizeof(float) * p->overlapLength); p->bMidBufferDirty = FALSE; } } static void clearInput(TDStretch * p) { p->samples_in = 0; fifo_clear(&p->inputBuffer); clearMidBuffer(p); } static void clear(TDStretch * p) { fifo_clear(&p->outputBuffer); fifo_clear(&p->inputBuffer); clearMidBuffer(p); } /* Slopes the amplitude of the 'midBuffer' samples so that cross correlation */ /* is faster to calculate */ static void precalcCorrReferenceMono(TDStretch * p) { int i; float temp; for (i = 0; i < (int) p->overlapLength; i++) { temp = (float) i *(float) (p->overlapLength - i); (p->pRefMidBuffer)[i] = (float) ((p->pMidBuffer)[i] * temp); } } static void precalcCorrReferenceStereo(TDStretch * p) { int i, cnt2; float temp; for (i = 0; i < (int) p->overlapLength; i++) { temp = (float) i *(float) (p->overlapLength - i); cnt2 = i * 2; (p->pRefMidBuffer)[cnt2] = (float) ((p->pMidBuffer)[cnt2] * temp); (p->pRefMidBuffer)[cnt2 + 1] = (float) ((p->pMidBuffer)[cnt2 + 1] * temp); } } static double calcCrossCorrMono( TDStretch * p, const float * mixingPos, const float * compare) { double corr = 0; size_t i = 0; /* Loop optimisation: */ #define _ corr += mixingPos[i] * compare[i], ++i; do {_ _ _ _ _ _ _ _} while (i < p->overlapLength); #undef _ return corr; } static double calcCrossCorrStereo( TDStretch * p, const float * mixingPos, const float * compare) { double corr = 0; size_t i = 0; /* Loop optimisation: */ #define _ corr += mixingPos[i]*compare[i] + mixingPos[i+1]*compare[i+1], i+=2; do {_ _ _ _ _ _ _ _} while (i < 2 * p->overlapLength); #undef _ return corr; } /* Seeks for the optimal overlap-mixing position. The best position is * determined as the position where the two overlapped sample sequences are * 'most alike', in terms of the highest cross-correlation value over the * overlapping period. 4 variants exist for mono/stereo, quick/accurate */ static size_t seekBestOverlapPositionMono( TDStretch * p, const float * refPos) { size_t bestOffs; double bestCorr, corr; size_t tempOffset; const float *compare; /* Slopes the amplitude of the 'midBuffer' samples */ precalcCorrReferenceMono(p); bestCorr = INT_MIN; bestOffs = 0; /* Scans for the best correlation value by testing each possible position */ /* over the permitted range. */ for (tempOffset = 0; tempOffset < p->seekLength; tempOffset++) { compare = refPos + tempOffset; /* Calculates correlation value for the mixing position corresponding */ /* to 'tempOffset' */ corr = calcCrossCorrMono(p, p->pRefMidBuffer, compare); /* Checks for the highest correlation value */ if (corr > bestCorr) { bestCorr = corr; bestOffs = tempOffset; } } return bestOffs; } static size_t seekBestOverlapPositionStereo(TDStretch * p, const float * refPos) { size_t bestOffs; double bestCorr, corr; size_t i; precalcCorrReferenceStereo(p); bestCorr = INT_MIN; bestOffs = 0; for (i = 0; i < p->seekLength; i++) { corr = calcCrossCorrStereo(p, refPos + 2 * i, p->pRefMidBuffer); if (corr > bestCorr) { bestCorr = corr; bestOffs = i; } } return bestOffs; } /* Table for the quick hierarchical mixing position seeking algorithm */ static int const scanOffsets[4][24] = { { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806, 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0}, {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, { -20, -15, -10, -5, 5, 10, 15, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, { -4, -3, -2, -1, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; static size_t seekBestOverlapPositionMonoQuick(TDStretch * p, const float * refPos) { size_t j; size_t bestOffs; double bestCorr, corr; size_t scanCount, corrOffset, tempOffset; /* Slopes the amplitude of the 'midBuffer' samples */ precalcCorrReferenceMono(p); bestCorr = INT_MIN; bestOffs = 0; corrOffset = 0; tempOffset = 0; /* Scans for the best correlation value using four-pass hierarchical * search. The look-up table 'scans' has hierarchical position adjusting * steps. In first pass the routine searhes for the highest correlation * with relatively coarse steps, then rescans the neighbourhood of the * highest correlation with better resolution and so on. */ for (scanCount = 0; scanCount < 4; scanCount++) { j = 0; while (scanOffsets[scanCount][j]) { tempOffset = corrOffset + scanOffsets[scanCount][j]; if (tempOffset >= p->seekLength) break; /* Calculates correlation value for the mixing position corresponding */ /* to 'tempOffset' */ corr = calcCrossCorrMono(p, refPos + tempOffset, (p->pRefMidBuffer)); /* Checks for the highest correlation value */ if (corr > bestCorr) { bestCorr = corr; bestOffs = tempOffset; } j++; } corrOffset = bestOffs; } return bestOffs; } static size_t seekBestOverlapPositionStereoQuick(TDStretch * p, const float * refPos) { size_t j; size_t bestOffs; double bestCorr, corr; size_t scanCount, corrOffset, tempOffset; precalcCorrReferenceStereo(p); bestCorr = INT_MIN; bestOffs = 0; corrOffset = 0; tempOffset = 0; for (scanCount = 0; scanCount < 4; scanCount++) { j = 0; while (scanOffsets[scanCount][j]) { tempOffset = corrOffset + scanOffsets[scanCount][j]; if (tempOffset >= p->seekLength) break; corr = calcCrossCorrStereo(p, refPos + 2 * tempOffset, p->pRefMidBuffer); if (corr > bestCorr) { bestCorr = corr; bestOffs = tempOffset; } j++; } corrOffset = bestOffs; } return bestOffs; } static size_t seekBestOverlapPosition(TDStretch * p, const float * refPos) { if (p->channels == 2) { if (p->bQuickseek) return seekBestOverlapPositionStereoQuick(p, refPos); return seekBestOverlapPositionStereo(p, refPos); } else if (p->bQuickseek) return seekBestOverlapPositionMonoQuick(p, refPos); return seekBestOverlapPositionMono(p, refPos); } /* Overlaps samples in 'midBuffer' with the samples in 'input' */ static void overlapMono(TDStretch * p, float * output, const float * input) { int i, itemp; for (i = 0; i < (int) p->overlapLength; i++) { itemp = p->overlapLength - i; output[i] = (input[i] * i + (p->pMidBuffer)[i] * itemp) / p->overlapLength; } } static void overlapStereo(TDStretch * p, float * output, const float * input) { int i; size_t cnt2; float fTemp; float fScale; float fi; fScale = 1.0f / (float) p->overlapLength; for (i = 0; i < (int) p->overlapLength; i++) { fTemp = (float) (p->overlapLength - i) * fScale; fi = (float) i *fScale; cnt2 = 2 * i; output[cnt2 + 0] = input[cnt2 + 0] * fi + p->pMidBuffer[cnt2 + 0] * fTemp; output[cnt2 + 1] = input[cnt2 + 1] * fi + p->pMidBuffer[cnt2 + 1] * fTemp; } } /* Overlaps samples in 'midBuffer' with the samples in 'inputBuffer' at * position of 'ovlPos'. */ static inline void overlap(TDStretch * p, float * output, const float * input, size_t ovlPos) { if (p->channels == 2) overlapStereo(p, output, input + 2 * ovlPos); else overlapMono(p, output, input + ovlPos); } /* Processes as many processing frames of the samples 'inputBuffer', store */ /* the result into 'outputBuffer' */ static void processSamples(TDStretch * p) { size_t ovlSkip, offset, temp; if (p->bMidBufferDirty == FALSE) { /* if midBuffer is empty, move the first samples of the input stream * into it */ if (fifo_occupancy(&p->inputBuffer) < p->overlapLength) return; /* wait until we've got p->overlapLength samples */ fifo_read(&p->inputBuffer, p->overlapLength, p->pMidBuffer); p->bMidBufferDirty = TRUE; } /* Process samples as long as there are enough samples in 'inputBuffer' * to form a processing frame. */ while (fifo_occupancy(&p->inputBuffer) >= p->sampleReq) { /* If tempo differs from the normal SCALE, scan for the best overlapping * position */ offset = seekBestOverlapPosition(p, fifo_read_ptr(&p->inputBuffer)); /* Mix the samples in the 'inputBuffer' at position of 'offset' with the * samples in 'midBuffer' using sliding overlapping ... first partially * overlap with the end of the previous sequence (that's in 'midBuffer') */ overlap(p, fifo_reserve(&p->outputBuffer, p->overlapLength), fifo_read_ptr(&p->inputBuffer), offset); /* ... then copy sequence samples from 'inputBuffer' to output */ temp = (p->seekWindowLength - 2 * p->overlapLength); /* & 0xfffffffe; */ if ((int)temp > 0) { fifo_write(&p->outputBuffer, temp, (float *) fifo_read_ptr(&p->inputBuffer) + p->channels * (offset + p->overlapLength)); } /* Copies the end of the current sequence from 'inputBuffer' to * 'midBuffer' for being mixed with the beginning of the next * processing sequence and so on */ assert(offset + p->seekWindowLength <= fifo_occupancy(&p->inputBuffer)); memcpy(p->pMidBuffer, (float *) fifo_read_ptr(&p->inputBuffer) + p->channels * (offset + p->seekWindowLength - p->overlapLength), p->channels * sizeof(float) * p->overlapLength); p->bMidBufferDirty = TRUE; /* Remove the processed samples from the input buffer. Update * the difference between integer & nominal skip step to 'p->skipFract' * in order to prevent the error from accumulating over time. */ p->skipFract += p->nominalSkip; /* real skip size */ ovlSkip = (int) p->skipFract; /* rounded to integer skip */ p->skipFract -= ovlSkip; /* maintain the fraction part, i.e. real vs. integer skip */ fifo_read(&p->inputBuffer, ovlSkip, NULL); } } /* Set new overlap length parameter & reallocate RefMidBuffer if necessary. */ static void acceptNewOverlapLength(TDStretch * p, size_t newOverlapLength) { size_t prevOvl; prevOvl = p->overlapLength; p->overlapLength = newOverlapLength; if (p->overlapLength > prevOvl) { free(p->pMidBuffer); free(p->pRefMidBufferUnaligned); p->pMidBuffer = xcalloc(p->overlapLength * 2, sizeof(float)); p->bMidBufferDirty = TRUE; clearMidBuffer(p); p->pRefMidBufferUnaligned = xcalloc( 2 * p->overlapLength + 16 / sizeof(float), sizeof(float)); /* For efficiency, align 'pRefMidBuffer' to 16 byte boundary */ p->pRefMidBuffer = (float *) ((((unsigned long) (p->pRefMidBufferUnaligned)) + 15ul) & ~15ul); } } /* Sets routine control parameters. These control are certain time constants * defining how the sound is stretched to the desired duration. * 'sampleRate' = sample rate of the sound * 'sequenceMS' = one processing sequence length in milliseconds * 'seekwindowMS' = seeking window length for scanning the best overlapping * position * 'overlapMS' = overlapping length * 'tempo' = 1 for no change, < 1 for slower, > 1 for faster. * 'quickSeek' = whether to use a quick seek for the best overlapping * position. */ static void setParameters(TDStretch * p, double sampleRate, double tempo, double sequenceMs, double seekWindowMs, double overlapMs, BOOL quickSeek) { size_t newOvl; size_t intskip; p->factor = tempo; p->bQuickseek = quickSeek; p->maxOffset = p->seekLength = sampleRate * seekWindowMs / 1000 + .5; p->seekWindowLength = sampleRate * sequenceMs / 1000 + .5; newOvl = max(sampleRate * overlapMs / 1000 + 4.5, 16); newOvl &= ~7; /* must be divisible by 8 */ acceptNewOverlapLength(p, newOvl); /* Calculate ideal skip length (according to tempo value) */ p->nominalSkip = tempo * (p->seekWindowLength - p->overlapLength); p->skipFract = 0; intskip = (int) (p->nominalSkip + 0.5); /* Calculate how many samples are needed in the 'inputBuffer' to */ /* process another batch of samples */ p->sampleReq = max(intskip + p->overlapLength, p->seekWindowLength) + p->maxOffset; } static float * putSamples(TDStretch * p, float const *samples, size_t n) { p->samples_in += n; return fifo_write(&p->inputBuffer, n, samples); } static float const * receiveSamples( TDStretch * p, float * samples, size_t * n) { p->samples_out += *n = min(*n, fifo_occupancy(&p->outputBuffer)); return fifo_read(&p->outputBuffer, *n, samples); } /* Flushes the last samples from the processing pipeline to the output. * Clears also the internal processing buffers. * * Note: This function is meant for extracting the last samples of a sound * stream. This function may introduce additional blank samples in the end * of the sound stream, and thus it's not recommended to call this function * in the middle of a sound stream. */ static void flush(TDStretch * p) { size_t samples_out = p->samples_in / p->factor + .5; if (p->samples_out < samples_out) { size_t remaining = p->samples_in / p->factor + .5 - p->samples_out; float buff[128]; memset(buff, 0, sizeof(buff)); while (fifo_occupancy(&p->outputBuffer) < remaining) { putSamples(p, buff, sizeof(buff)/sizeof(buff[0])/p->channels); processSamples(p); } fifo_trim(&p->outputBuffer, remaining); clearInput(p); } } static void deleteTDStretch(TDStretch * p) { free(p->pMidBuffer); free(p->pRefMidBufferUnaligned); fifo_delete(&p->outputBuffer); fifo_delete(&p->inputBuffer); free(p); } static TDStretch * newTDStretch(size_t channels) { TDStretch * p = xcalloc(1, sizeof(*p)); p->channels = channels; fifo_create(&p->inputBuffer, p->channels * sizeof(float)); fifo_create(&p->outputBuffer, p->channels * sizeof(float)); return p; } /* * libSoX tempo effect: adjust the audio tempo (but not key) * * Adjustment is given as the ratio of the new tempo to the old tempo. */ #include "sox_i.h" #include <math.h> typedef struct tempo { TDStretch * tdstretch; sox_bool quick_seek; double factor, sequence_ms, seek_window_ms, overlap_ms; } priv_t; assert_static(sizeof(struct tempo) <= SOX_MAX_EFFECT_PRIVSIZE, /* else */ tempo_PRIVSIZE_too_big); static int getopts(sox_effect_t * effp, int argc, char **argv) { priv_t * p = (priv_t *) effp->priv; p->sequence_ms = 82; /* Set non-zero defaults: */ p->seek_window_ms = 14; p->overlap_ms = 12; p->quick_seek = !argc || strcmp(*argv, "-l") || (--argc, ++argv, sox_false); do { /* break-able block */ NUMERIC_PARAMETER(factor ,0.25, 4 ) NUMERIC_PARAMETER(sequence_ms , 10 , 120) NUMERIC_PARAMETER(seek_window_ms, 7 , 28 ) NUMERIC_PARAMETER(overlap_ms , 6 , 24 ) } while (0); sox_debug("factor:%g sequence:%g seek:%g overlap:%g quick:%i", p->factor, p->sequence_ms, p->seek_window_ms, p->overlap_ms, p->quick_seek); return argc || !p->factor? sox_usage(effp) : SOX_SUCCESS; } static int start(sox_effect_t * effp) { priv_t * p = (priv_t *) effp->priv; if (p->factor == 1) return SOX_EFF_NULL; if (effp->ininfo.channels > 2) { sox_fail("supports only mono or stereo audio"); return SOX_EOF; } p->tdstretch = newTDStretch(effp->ininfo.channels); setParameters(p->tdstretch, effp->ininfo.rate, p->factor, p->sequence_ms, p->seek_window_ms, p->overlap_ms, p->quick_seek); return SOX_SUCCESS; } static int flow(sox_effect_t * effp, const sox_ssample_t * ibuf, sox_ssample_t * obuf, sox_size_t * isamp, sox_size_t * osamp) { priv_t * p = (priv_t *) effp->priv; sox_size_t i; sox_size_t odone = *osamp /= effp->ininfo.channels; float const * s = receiveSamples(p->tdstretch, NULL, &odone); for (i = 0; i < odone * effp->ininfo.channels; ++i) *obuf++ = SOX_FLOAT_32BIT_TO_SAMPLE(*s++, effp->clips); if (*isamp && odone < *osamp) { float * t = putSamples(p->tdstretch, NULL, *isamp / effp->ininfo.channels); for (i = *isamp; i; --i) *t++ = SOX_SAMPLE_TO_FLOAT_32BIT(*ibuf++, effp->clips); processSamples(p->tdstretch); } else *isamp = 0; *osamp = odone * effp->ininfo.channels; return SOX_SUCCESS; } static int drain(sox_effect_t * effp, sox_ssample_t * obuf, sox_size_t * osamp) { static sox_size_t isamp = 0; flush(((priv_t *)effp->priv)->tdstretch); return flow(effp, 0, obuf, &isamp, osamp); } static int stop(sox_effect_t * effp) { deleteTDStretch(((priv_t *)effp->priv)->tdstretch); return SOX_SUCCESS; } sox_effect_handler_t const * sox_tempo_effect_fn(void) { static sox_effect_handler_t handler = { "tempo", "[-l] factor [window-ms [seek-ms [overlap-ms]]]", SOX_EFF_MCHAN | SOX_EFF_LENGTH, getopts, start, flow, drain, stop, NULL }; return &handler; }