ref: 51a1a378632b453d050c9be3afcf75fd2baa198c
dir: /src/speech/rateconv.c/
/* THIS IS A MODIFIED VERSION. It was modified by David Huggins-Daines <dhd@cepstral.com> in December 2001 for use in the Flite speech synthesis systems. */ /* * * RATECONV.C * * Convert sampling rate stdin to stdout * * Copyright (c) 1992, 1995 by Markus Mummert * * Redistribution and use of this software, modifcation and inclusion * into other forms of software are permitted provided that the following * conditions are met: * * 1. Redistributions of this software must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. If this software is redistributed in a modified condition * it must reveal clearly that it has been modified. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * * history: 2.9.92 begin coding * 5.9.92 fully operational * 14.2.95 provide BIG_ENDIAN, SWAPPED_BYTES_DEFAULT * switches, Copyright note and References * 25.11.95 changed XXX_ENDIAN to I_AM_XXX_ENDIAN * default gain set to 0.8 * 3.12.95 stereo implementation * SWAPPED_BYTES_DEFAULT now HBYTE1ST_DEFAULT * changed [L/2] to (L-1)/2 for exact symmetry * * * IMPLEMENTATION NOTES * * Converting is achieved by interpolating the input samples in * order to evaluate the represented continuous input slope at * sample instances of the new rate (resampling). It is implemented * as a polyphase FIR-filtering process (see reference). The rate * conversion factor is determined by a rational factor. Its * nominator and denominator are integers of almost arbitrary * value, limited only by coefficient memory size. * * General rate conversion formula: * * out(n*Tout) = SUM in(m*Tin) * g((n*d/u-m)*Tin) * Tin * over all m * * FIR-based rate conversion formula for polyphase processing: * * L-1 * out(n*Tout) = SUM in(A(i,n)*Tin) * g(B(i,n)*Tin) * Tin * i=0 * * A(i,n) = i - (L-1)/2 + [n*d/u] * = i - (L-1)/2 + [(n%u)*d/u] + [n/u]*d * B(i,n) = n*d/u - [n*d/u] + (L-1)/2 - i * = ((n%u)*d/u)%1 + (L-1)/2 - i * Tout = Tin * d/u * * where: * n,i running integers * out(t) output signal sampled at t=n*Tout * in(t) input signal sampled in intervalls Tin * u,d up- and downsampling factor, integers * g(t) interpolating function * L FIR-length of realized g(t), integer * / float-division-operator * % float-modulo-operator * [] integer-operator * * note: * (L-1)/2 in A(i,n) can be omitted as pure time shift yielding * a causal design with a delay of ((L-1)/2)*Tin. * n%u is a cyclic modulo-u counter clocked by out-rate * [n/u]*d is a d-increment counter, advanced when n%u resets * B(i,n)*Tin can take on L*u differnt values, at which g(t) * has to be sampled as a coefficient array * * Interpolation function design: * * The interpolation function design is based on a sinc-function * windowed by a gaussian function. The former determines the * cutoff frequency, the latter limits the necessary FIR-length by * pushing the outer skirts of the resulting impulse response below * a certain threshold fast enough. The drawback is a smoothed * cutoff inducing some aliasing. Due to the symmetry of g(t) the * group delay of the filtering process is contant (linear phase). * * g(t) = 2*fgK*sinc(pi*2*fgK*t) * exp(-pi*(2*fgG*t)**2) * * where: * fgK cutoff frequency of sinc function in f-domain * fgG key frequency of gaussian window in f-domain * reflecting the 6.82dB-down point * * note: * Taking fsin=1/Tin as the input sampling frequncy, it turns out * that in conjunction with L, u and d only the ratios fgK/(fsin/2) * and fgG/(fsin/2) specify the whole proces. Requiring fsin, fgK * and fgG as input purposely keeps the notion of absolute * frequencies. * * Numerical design: * * Samples are expected to be 16bit-signed integers, alternating * left and right channel in case of stereo mode- The byte order * per sample is selectable. FIR-filtering is implemented using * 32bit-signed integer arithmetic. Coefficients are scaled to * find the output sample in the high word of accumulated FIR-sum. * * Interpolation can lead to sample magnitudes exceeding the * input maximum. Worst case is a full scale step function on the * input. In this case the sinc-function exhibits an overshoot of * 2*9=18percent (Gibb's phaenomenon). In any case sample overflow * can be avoided by a gain of 0.8. * * If u=d=1 and if the input stream contains only a single sample, * the whole length of the FIR-filter will be written to the output. * In general the resulting output signal will be (L-1)*fsout/fsin * samples longer than the input signal. The effect is that a * finite input sequence is viewed as padded with zeros before the * `beginning' and after the `end'. * * The output lags ((L-1)/2)*Tin behind to implement g(t) as a * causal system corresponding to a causal relationship of the * discrete-time sequences in(m/fsin) and out(n/fsout) with * resepect to a sequence time origin at t=n*Tin=m*Tout=0. * * * REFERENCES * * Crochiere, R. E., Rabiner, L. R.: "Multirate Digital Signal * Processing", Prentice-Hall, Englewood Cliffs, New Jersey, 1983 * * Zwicker, E., Fastl, H.: "Psychoacoustics - Facts and Models", * Springer-Verlag, Berlin, Heidelberg, New-York, Tokyo, 1990 */ #include "cst_string.h" #include "cst_math.h" #include "cst_alloc.h" #include "cst_error.h" #include "cst_wave.h" /* * adaptable defines and globals */ #define DPRINTF(l, x) #define FIXSHIFT 16 #define FIXMUL (1<<FIXSHIFT) #ifndef M_PI #define M_PI 3.1415926535 #endif #define sqr(a) ((a)*(a)) /* * FIR-routines, mono and stereo * this is where we need all the MIPS */ static void fir_mono(int *inp, int *coep, int firlen, int *outp) { register int akku = 0, *endp; int n1 = (firlen / 8) * 8, n0 = firlen % 8; endp = coep + n1; while (coep != endp) { akku += *inp++ * *coep++; akku += *inp++ * *coep++; akku += *inp++ * *coep++; akku += *inp++ * *coep++; akku += *inp++ * *coep++; akku += *inp++ * *coep++; akku += *inp++ * *coep++; akku += *inp++ * *coep++; } endp = coep + n0; while (coep != endp) { akku += *inp++ * *coep++; } *outp = akku; } static void fir_stereo(int *inp, int *coep, int firlen, int *out1p, int *out2p) { register int akku1 = 0, akku2 = 0, *endp; int n1 = (firlen / 8) * 8, n0 = firlen % 8; endp = coep + n1; while (coep != endp) { akku1 += *inp++ * *coep; akku2 += *inp++ * *coep++; akku1 += *inp++ * *coep; akku2 += *inp++ * *coep++; akku1 += *inp++ * *coep; akku2 += *inp++ * *coep++; akku1 += *inp++ * *coep; akku2 += *inp++ * *coep++; akku1 += *inp++ * *coep; akku2 += *inp++ * *coep++; akku1 += *inp++ * *coep; akku2 += *inp++ * *coep++; akku1 += *inp++ * *coep; akku2 += *inp++ * *coep++; akku1 += *inp++ * *coep; akku2 += *inp++ * *coep++; } endp = coep + n0; while (coep != endp) { akku1 += *inp++ * *coep; akku2 += *inp++ * *coep++; } *out1p = akku1; *out2p = akku2; } /* * filtering from input buffer to output buffer; * returns number of processed samples in output buffer: * if it is not equal to output buffer size, * the input buffer is expected to be refilled upon entry, so that * the last firlen numbers of the old input buffer are * the first firlen numbers of the new input buffer; * if it is equal to output buffer size, the output buffer * is full and is expected to be stowed away; * */ static int filtering_on_buffers(cst_rateconv *filt) { int insize; DPRINTF(0, ("filtering_on_buffers(%d)\n", filt->incount)); insize = filt->incount + filt->lag; if (filt->channels == 1) { while (1) { filt->inoffset = (filt->cycctr * filt->down)/filt->up; if ((filt->inbaseidx + filt->inoffset + filt->len) > insize) { filt->inbaseidx -= insize - filt->len + 1; memcpy(filt->sin, filt->sin + insize - filt->lag, filt->lag * sizeof(int)); /* Prevent people from re-filtering the same stuff. */ filt->incount = 0; return 0; } fir_mono(filt->sin + filt->inoffset + filt->inbaseidx, filt->coep + filt->cycctr * filt->len, filt->len, filt->sout + filt->outidx); DPRINTF(1, ("in(%d + %d) = %d cycctr %d out(%d) = %d\n", filt->inoffset, filt->inbaseidx, filt->sin[filt->inoffset + filt->inbaseidx], filt->cycctr, filt->outidx, filt->sout[filt->outidx] >> FIXSHIFT)); ++filt->outidx; ++filt->cycctr; if (!(filt->cycctr %= filt->up)) filt->inbaseidx += filt->down; if (!(filt->outidx %= filt->outsize)) return filt->outsize; } } else if (filt->channels == 2) { /* * rule how to convert mono routine to stereo routine: * firlen, up, down and cycctr relate to samples in general, * wether mono or stereo; inbaseidx, inoffset and outidx as * well as insize and outsize still account for mono samples. */ while (1) { filt->inoffset = 2*((filt->cycctr * filt->down)/filt->up); if ((filt->inbaseidx + filt->inoffset + 2*filt->len) > insize) { filt->inbaseidx -= insize - 2*filt->len + 2; return filt->outidx; } fir_stereo(filt->sin + filt->inoffset + filt->inbaseidx, filt->coep + filt->cycctr * filt->len, filt->len, filt->sout + filt->outidx, filt->sout + filt->outidx + 1); filt->outidx += 2; ++filt->cycctr; if (!(filt->cycctr %= filt->up)) filt->inbaseidx += 2*filt->down; if (!(filt->outidx %= filt->outsize)) return filt->outsize; } } else { cst_errmsg("filtering_on_buffers: only 1 or 2 channels supported!\n"); cst_error(); } return 0; } /* * convert buffer of n samples to ints */ static void sample_to_int(short *buff, int n) { short *s, *e; int *d; if (n < 1) return; s = buff + n; d = (int*)buff + n; e = buff; while (s != e) { *--d = (int)(*--s); } } /* * convert buffer of n ints to samples */ static void int_to_sample(short *buff, int n) { int *s; short *e, *d; if (n < 1) return; s = (int *)buff; d = buff; e = buff + n; while (d != e) *d++ = (*s++ >> FIXSHIFT); } /* * read and convert input sample format to integer */ int cst_rateconv_in(cst_rateconv *filt, const short *inptr, int max) { if (max > filt->insize - filt->lag) max = filt->insize - filt->lag; if (max > 0) { memcpy(filt->sin + filt->lag, inptr, max * sizeof(short)); sample_to_int((short *)(filt->sin + filt->lag), max); } filt->incount = max; return max; } /* * do some conversion jobs and write */ int cst_rateconv_out(cst_rateconv *filt, short *outptr, int max) { int outsize; if ((outsize = filtering_on_buffers(filt)) == 0) return 0; if (max > outsize) max = outsize; int_to_sample((short *)filt->sout, max); memcpy(outptr, filt->sout, max * sizeof(short)); return max; } int cst_rateconv_leadout(cst_rateconv *filt) { memset(filt->sin + filt->lag, 0, filt->lag * sizeof(int)); filt->incount = filt->lag; return filt->lag; } /* * evaluate sinc(x) = sin(x)/x safely */ static double sinc(double x) { return(fabs(x) < 1E-50 ? 1.0 : sin(fmod(x,2*M_PI))/x); } /* * evaluate interpolation function g(t) at t * integral of g(t) over all times is expected to be one */ static double interpol_func(double t, double fgk, double fgg) { return (2*fgk*sinc(M_PI*2*fgk*t)*exp(-M_PI*sqr(2*fgg*t))); } /* * evaluate coefficient from i, q=n%u by sampling interpolation function * and scale it for integer multiplication used by FIR-filtering */ static int coefficient(int i, int q, cst_rateconv *filt) { return (int)(FIXMUL * filt->gain * interpol_func((fmod(q* filt->down/(double)filt->up,1.0) + (filt->len-1)/2.0 - i) / filt->fsin, filt->fgk, filt->fgg) / filt->fsin); } /* * set up coefficient array */ static void make_coe(cst_rateconv *filt) { int i, q; filt->coep = cst_alloc(int, filt->len * filt->up); for (i = 0; i < filt->len; i++) { for (q = 0; q < filt->up; q++) { filt->coep[q * filt->len + i] = coefficient(i, q, filt); } } } cst_rateconv * new_rateconv(int up, int down, int channels) { cst_rateconv *filt; if (!(channels == 1 || channels == 2)) { cst_errmsg("new_rateconv: channels must be 1 or 2\n"); cst_error(); } filt = cst_alloc(cst_rateconv, 1); filt->fsin = 1.0; filt->gain = 0.8; filt->fgg = 0.0116; filt->fgk = 0.461; filt->len = 162; filt->down = down; filt->up = up; filt->channels = channels; if (down > up) { filt->fgg *= (double) up / down; filt->fgk *= (double) up / down; filt->len = filt->len * down / up; } make_coe(filt); filt->lag = (filt->len - 1) * channels; filt->insize = channels*filt->len + filt->lag; filt->outsize = channels*filt->len; filt->sin = cst_alloc(int, filt->insize); filt->sout = cst_alloc(int, filt->outsize); return filt; } void delete_rateconv(cst_rateconv *filt) { cst_free(filt->coep); cst_free(filt->sin); cst_free(filt->sout); cst_free(filt); }