shithub: sox

Download patch

ref: 6b43e9afa4681b42350ea17adcf54a015afc1731
parent: b46c1ef7c4af24ca31095f40819f33a44b151268
author: robs <robs>
date: Tue Jun 3 04:04:31 EDT 2008

new rate effect

--- a/AUTHORS
+++ b/AUTHORS
@@ -21,8 +21,9 @@
 		EFFECTS: key, tempo, pad, bass, treble, delay, new reverb, new
 		flanger, soft-knee companding, speed via resampling, filters
 		makeover inc. gnuplot & octave plotting, splice, remix, norm,
-		contrast; new effects chain with buffering and any # channels.
-                OTHERS: open input files via URL, file multiply/merge, play
+		contrast, new rate;
+                new effects chain with buffering and any # channels.
+                OTHERS: open input files via URL, file merge, play
                 multiple files with mixed format types, play with replay-gain,
                 building with cmake, much manual improvement and expansion,
 		soxi, improved displays with -S & -V including VU and clips,
--- a/ChangeLog
+++ b/ChangeLog
@@ -18,7 +18,6 @@
   13.0.0   E avg, pick             mixer
   13.0.0   E highp, lowp           highpass -1, lowpass -1
   13.0.0   E mask                  dither
-  13.0.0   E rate                  ~= resample
   13.0.0   E vibro                 ~= tremolo
   13.0.0   F auto                  Becomes internal only
 
@@ -57,6 +56,7 @@
   o New `norm' (normalise) effect.  (robs)
   o New `delay' effect; delay one or more channels.  (robs)
   o New `contrast' enhancement effect [FR 708923].  (robs)
+  o Rewrite of rate; resample, polyphase, & rabbit now deprecated.  (robs)
   o `synth' can now sweep linearly (as well as logarithmically).  (robs)
   o Fix synth max. level setting for some output encodings.  (robs)
   o Fix crash on 64-bit arch. with tempo & key effects.  (Sami Liedes)
@@ -64,6 +64,8 @@
 
 Other new features:
 
+  o Now possible to control play-back resampling quality (and hence
+    cpu-load) via the PLAY_RATE_ARG environment variable.  (robs)
   o Command line support for multiple file comments.  (robs)
   o Soxi utility to extract/display file header fields.  (robs)
   o Pkg-config support. (Pascal Giard)
@@ -108,11 +110,14 @@
   -------  ----------------------  ----------------------  -------
   14.0.0   E stretch               ~= tempo                2008-09-11
   14.0.0   E pitch                 ~= key                  2008-09-11
+  14.0.0   E resample              ~= rate                 14.1.0 + 1 year
+  14.0.0   E polyphase             ~= rate                 14.1.0 + 1 year
+  14.0.0   E rabbit                ~= rate                 14.1.0 + 1 year
   14.1.0   F wve (native)          wve (libsndfile)        14.1.0 + 1 year
   14.1.0   F flac: libFLAC 1.1.1   libFLAC > 1.1.1         14.1.0 + 6 months
   14.1.0   Behaviour whereby       soxi                    14.1.0 + 1 year
            sox -V file(s) -n
-	   doesn't read to EOF.
+           doesn't read to EOF.
 
 
 sox-14.0.1	2008-01-29
--- a/sox.1
+++ b/sox.1
@@ -90,11 +90,9 @@
 .EE
 adds a header to a raw audio file,
 .EX
-	sox slow.aiff fixed.aiff speed 1.027 rabbit
+	sox slow.aiff fixed.aiff speed 1.027
 .EE
-adjusts audio speed using the
-.B rabbit
-algorithm,
+adjusts audio speed,
 .EX
 	sox short.au long.au longer.au
 .EE
@@ -255,6 +253,27 @@
 .B set
 command may vary from system to system.)
 .SP
+When playing a file with a sample rate that is not supported by the
+audio output device, SoX will automatically invoke the \fBrate\fR effect
+to perform the necessary sample rate conversion.  For
+compatibility with old hardware, here, the
+default \fBrate\fR quality level is set to `low'; however, this
+can be changed if desired, by explicitly specifing the \fBrate\fR
+effect with a different quality level, e.g.
+.EX
+	play ... rate -m
+.EE
+or by setting the environment varible
+.B PLAY_RATE_ARG
+to the desired quality option, e.g.
+.EX
+	set PLAY_RATE_ARG=-m
+	play ...
+.EE
+(Note that the syntax of the
+.B set
+command may vary from system to system.)
+.SP
 To help with setting a suitable recording level, SoX includes a simple VU
 meter which can be invoked (before making the actual recording) as follows:
 .EX
@@ -684,7 +703,8 @@
 different rates then a sample rate change effect must be run.  Since
 SoX has
 multiple rate changing effects, the user can specify which to use as an effect.
-If no rate change effect is specified then a default one will be chosen.
+If no rate change effect is specified then the \fBrate\fR effect will
+be chosen by default.
 .TP
 \fB\-t\fR, \fB\-\-type\fR \fIfile-type\fR
 Gives the type of the audio file.  This is useful when the
--- a/soxeffect.7
+++ b/soxeffect.7
@@ -199,7 +199,7 @@
 If the list is preceded by a
 .I soft-knee-dB
 value, then the points at where adjacent line segments on the
-transfer function meet will be rounded by the amout given. 
+transfer function meet will be rounded by the amount given. 
 Typical values for the transfer function are
 .BR 6:\-70,\-60,\-20 .
 .SP
@@ -319,7 +319,7 @@
 Gain-out is the volume of the output.
 .TP
 \fBechos \fIgain-in gain-out\fR <\fIdelay decay\fR>
-Add a sequence of echos to the audio.
+Add a sequence of echoes to the audio.
 Each
 .I "delay decay"
 pair gives the delay in milliseconds
@@ -387,12 +387,22 @@
 or greater than or equal to the Nyquist frequency.
 .SP
 The \fIwindow-len\fR, if unspecified, defaults to 128.
-Longer windows give a sharper cutoff, smaller windows a more gradual cutoff.
+Longer windows give a sharper cut-off, smaller windows a more gradual cut-off.
 .SP
-The \fIbeta\fR, if unspecified, defaults to 16.  This selects a Kaiser window.
-You can select a Nuttall window by specifying anything \(<= 2 here.
-For more discussion of beta, look under the \fBresample\fR effect.
+The \fIbeta\fR parameter
+determines the type of filter window used.  Any value greater than 2 is
+the beta for a Kaiser window.  Beta \(<= 2 selects a Nuttall window.
+If unspecified, the default is a Kaiser window with beta 16.
 .SP
+In the case of Kaiser window (beta > 2), lower betas produce a
+somewhat faster transition from pass-band to stop-band, at the cost of
+noticeable artifacts. A beta of 16 is the default, beta less than 10
+is not recommended. If you want a sharper cut-off, don't use low
+beta's, use a longer sample window. A Nuttall window is selected by
+specifying any `beta' \(<= 2, and the Nuttall window has somewhat
+steeper cut-off than the default Kaiser window. You will probably not
+need to use the beta parameter at all, unless you are just curious
+about comparing the effects of Nuttall vs. Kaiser windows.
 .TP
 \fBflanger\fR [\fIdelay depth regen width speed shape phase interp\fR]
 Apply a flanging effect to the audio.
@@ -430,7 +440,7 @@
 .TP
 \fBgain \fIdB-gain\fR
 Apply an amplification or an attenuation to the audio signal.
-This is just a alias for the
+This is just an alias for the
 .B vol
 effect\*mhandy for those who prefer to work in dBs by default.
 .TP
@@ -474,7 +484,7 @@
 module can contain more than one plugin) and any other arguments are
 for the control ports of the plugin. Missing arguments are supplied by
 default values if possible. Only plugins with at most one audio input
-and one audio output port can be used.  If found, the enviornment varible
+and one audio output port can be used.  If found, the environment varible
 LADSPA_PATH will be used as search path for plugins.
 .TP
 \fBlowpass\fR [\fB-1\fR|\fB-2\fR] \fIfrequency\fR [\fRwidth\fR[\fBq\fR\^|\^\fBo\fR\^|\^\fBh\fR]]
@@ -694,47 +704,48 @@
 (\fB\-t\fR).  The decay should be less than 0\*d5 to avoid
 feedback.  Gain-out is the volume of the output.
 .TP
-\fBpolyphase\fR [\fB\-w nut\fR\^|\^\fBham\fR] [\fB\-width \fIn\fR] [\fB\-cutoff \fIc\fR]
-Change the sampling rate using `polyphase interpolation', a DSP algorithm.
-This method is relatively slow and memory intensive.
+\fBrate\fR [\fB\-q\fR\^|\^\fB\-l\fR\^|\^\fB\-m\fR\^|\^\fB\-h\fR\^|\^\fB\-v\fR]
+Change the audio sampling rate (i.e. resample the audio)
+using a quality level as follows:
+.TS
+center box;
+cI cI cI cI lI
+cB c c c l.
+\ 	Quality	BW %	Rej dB	Typical Use
+\-q	T{
+quick & dirty
+T}	n/a	\(~=30 @ Fs/4	T{
+.na
+playback on ancient hardware
+T}
+\-l	low	80	100	T{
+.na
+playback on old hardware
+T}
+\-m	medium	99	100	audio playback
+\-h	high	99	120	T{
+.na
+16-bit mastering (use with dither)
+T}
+\-v	very high	99\*d7	150	24-bit mastering\ 
+.TE
+.DT
 .SP
-If the \fB\-w\fR parameter is \fBnut\fR, then a Nuttall (~90 dB
-stop-band) window will be used; \fBham\fR selects a Hamming (~43
-dB stop-band) window.  The default is Nuttall.
+where
+.B BW %
+is the percentage of the audio band that is preserved (based on the 3dB point)
+during sample rate conversion, and
+.B Rej dB
+is the level of noise rejection.
+The default quality level is `high' (\fB-h\fR).
 .SP
-The \fB\-width\fR parameter specifies the (approximate) width of the filter. The default is 1024 samples, which produces reasonable results.
-.SP
-The \fB\-cutoff\fR value (\fIc\fR) specifies the filter cutoff frequency in terms of fraction of
-frequency bandwidth, also know as the Nyquist frequency.  See
-the \fBresample\fR effect for
-further information on Nyquist frequency.  If up-sampling, then this is the
-fraction of the original signal
-that should go through.  If down-sampling, this is the fraction of the
-signal left after down-sampling.  The default is 0\*d95.
-.SP
 See also
-.B rabbit
+.BR resample ,
+.B polyphase
 and
-.B resample
+.B rabbit
 for other sample-rate changing effects.
 .TP
-\fBrabbit\fR [\fB\-c0\fR\^|\^\fB\-c1\fR\^|\^\fB\-c2\fR\^|\^\fB\-c3\fR\^|\^\fB\-c4\fR]
-Change the sampling rate using libsamplerate, also known as `Secret Rabbit
-Code'.  This effect is
-optional and must have been selected at compile time of SoX.  See
-http://www.mega-nerd.com/SRC for details of the algorithms.  Algorithms
-0 through 2 are progressively faster and lower quality versions of the
-sinc algorithm; the default is \fB\-c0\fR, which is probably the best
-quality algorithm for general use currently available in SoX.
-Algorithm 3 is zero-order hold, and 4 is linear interpolation.
-.SP
-See also
-.B polyphase
-and
-.B resample
-for other sample-rate changing effects, and see
-\fBresample\fR for more discussion of resampling.
-.TP
 \fBremix\fR [\fB\-a\fR\^|\^\fB\-m\fR] <\fIout-spec\fR>
 \fIout-spec\fR	= \fIin-spec\fR{\fB,\fIin-spec\fR} | \fB0\fR
 .br
@@ -883,102 +894,6 @@
 Note that repeating once yields two copies: the original audio and the
 repeated audio.
 .TP
-\fBresample\fR [\fB\-qs\fR\^|\^\fB\-q\fR\^|\^\fB\-ql\fR] [\fIrolloff\fR [\fIbeta\fR]]
-Change the sampling rate using simulated
-analog filtration.  Other rate changing effects available are
-\fBpolyphase\fR and \fBrabbit\fR.  There is a detailed analysis of
-\fBresample\fR and \fBpolyphase\fR at
-http://leute.server.de/wilde/resample.html; see \fBrabbit\fR for a
-pointer to its own documentation.
-.SP
-By default, linear interpolation is used,
-with a window width about 45 samples at the lower of the two rates.
-This gives an accuracy of about 16 bits, but insufficient stop-band rejection
-in the case that you want to have roll-off greater than about 0\*d8 of
-the Nyquist frequency.
-.SP
-The \fB\-q*\fR options will change the default values for roll-off and beta
-as well as use quadratic interpolation of filter
-coefficients, resulting in about 24 bits precision.
-The \fB\-qs\fR, \fB\-q\fR, or \fB\-ql\fR options specify increased accuracy
-at the cost of lower execution speed.  It is optional to specify
-roll-off and beta parameters when using the \fB\-q*\fR options.
-.SP
-Following is a table of the reasonable defaults which are built-in to
-SoX:
-.SP
-.TS
-center box;
-cB cB cB cB cB
-c c n c c
-cB c n c c.
-Option	Window	Roll-off	Beta	Interpolation
-(none)	45	0\*d80	16	linear
-\-qs	45	0\*d80	16	quadratic
-\-q	75	0\*d875	16	quadratic
-\-ql	149	0\*d94	16	quadratic
-.TE
-.DT
-.SP
-\fB\-qs\fR, \fB\-q\fR, or \fB\-ql\fR use window lengths of 45, 75, or 149
-samples, respectively, at the lower sample-rate of the two files.
-This means progressively sharper stop-band rejection, at proportionally
-slower execution times.
-.SP
-\fIrolloff\fR refers to the cut-off frequency of the
-low pass filter and is given in terms of the
-Nyquist frequency for the lower sample rate.  rolloff therefore should
-be something between 0 and 1, in practise 0\*d8\-0\*d95.  The defaults are
-indicated above.
-.SP
-The \fINyquist frequency\fR is equal to half the sample rate.  Logically,
-this is because the A/D converter needs at least 2 samples to detect 1
-cycle at the Nyquist frequency.  Frequencies higher then the Nyquist
-will actually appear as lower frequencies to the A/D converter and
-is called aliasing.  Normally, A/D converts run the signal through
-a lowpass filter first to avoid these problems.
-.SP
-Similar problems will happen in software when reducing the sample rate of
-an audio file (frequencies above the new Nyquist frequency can be aliased
-to lower frequencies).  Therefore, a good resample effect
-will remove all frequency information above the new Nyquist frequency.
-.SP
-The \fIrolloff\fR refers to how close to the Nyquist frequency this cutoff
-is, with closer being better.  When increasing the sample rate of an
-audio file you would not expect to have any frequencies exist that are
-past the original Nyquist frequency.  Because of resampling properties, it
-is common to have aliasing artifacts created above the old
-Nyquist frequency.  In that case the \fIrolloff\fR refers to how close
-to the original Nyquist frequency to use a highpass filter to remove
-these artifacts, with closer also being better.
-.SP
-The \fIbeta\fR parameter
-determines the type of filter window used.  Any value greater than 2 is
-the beta for a Kaiser window.  Beta \(<= 2 selects a Nuttall window.
-If unspecified, the default is a Kaiser window with beta 16.
-.SP
-In the case of Kaiser window (beta > 2), lower betas produce a
-somewhat faster transition from pass-band to stop-band, at the cost of
-noticeable artifacts. A beta of 16 is the default, beta less than 10
-is not recommended. If you want a sharper cutoff, don't use low
-beta's, use a longer sample window. A Nuttall window is selected by
-specifying any `beta' \(<= 2, and the Nuttall window has somewhat
-steeper cutoff than the default Kaiser window. You will probably not
-need to use the beta parameter at all, unless you are just curious
-about comparing the effects of Nuttall vs. Kaiser windows.
-.SP
-This is the default effect if the two files have different sampling rates.
-Default parameters are, as indicated above, Kaiser window of length 45,
-roll-off 0\*d80, beta 16, linear interpolation.
-.SP
-Note: \fB\-qs\fR is only slightly slower, but more accurate for
-16-bit or higher precision.
-.SP
-Note: In many cases of up-sampling, no interpolation is needed,
-as exact filter coefficients can be computed in a reasonable amount of space.
-To be precise, this is done when both input-rate < output-rate, and
-output-rate \(di gcd(input-rate, output-rate) \(<= 511.
-.TP
 \fBreverb\fR [\fB-w\fR|\fB--wet-only\fR] [\fIreverberance\fR (50%) [\fIHF-damping\fR (50%)
 [\fIroom-scale\fR (100%) [\fIstereo-depth\fR (100%)
 .br
@@ -991,7 +906,7 @@
 increases both the volume and the length of the audio, so to prevent clipping
 in these domains, a typical invocation might be:
 .EX
-	play dry.au vol -3dB pad 0 3 reverb
+	play dry.au gain -3 pad 0 3 reverb
 .EE
 .TP
 \fBreverse\fR
@@ -1083,16 +998,15 @@
 which the pitch (and tempo) should be adjusted: greater than 0
 increases, less than 0 decreases.
 .SP
-By default, the speed change is performed by the \fBresample\fR
-effect with its default parameters.  For higher quality
+By default, the speed change is performed by resampling with the \fBrate\fR
+effect using its default quality/speed.  For higher quality or higher speed
 resampling, in addition to the \fBspeed\fR effect, specify
-either the \fBresample\fR or the \fBrabbit\fR effect with
-appropriate parameters.
+the \fBrate\fR effect with the desired quality options.
 .TP
-\fBsplice \fR { \fIposition\fR[\fB,\fIexcess\fR[\fB,\fIleaway\fR]] }
+\fBsplice \fR { \fIposition\fR[\fB,\fIexcess\fR[\fB,\fIleeway\fR]] }
 Splice together audio sections.  This effect provides two things over
 simple audio concatenation: a (usually short) cross-fade is applied at
-the join and a wave similarity comparison is made to help determine the
+the join, and a wave similarity comparison is made to help determine the
 best place at which to make the join.
 .SP
 To perform a splice, first use the
@@ -1106,7 +1020,7 @@
 same
 .IR excess
 (before the ideal joining point), plus an additional
-.I leaway
+.I leeway
 (default 0\*d005 seconds).  Sox should then be invoked with the two
 audio sections as input files and the
 .B splice
@@ -1127,7 +1041,7 @@
 .EX
 	sox long.au part2.au trim 1:03.422
 .EE
-(5 ms excess plus 5 ms leaway, before the second verse starts)
+(5 ms excess plus 5 ms leeway, before the second verse starts)
 .EX
 	sox part1.au part2.au just-right.au splice 30.130
 .EE
@@ -1142,7 +1056,7 @@
 # All times measured in samples.
 rate=\`soxi -r "$1"\`
 e=\`expr $rate '*' 5 / 1000\`  # Using default excess
-l=$e                         # and leaway.
+l=$e                         # and leeway.
 sox "$1" piece.au trim \`expr $2 - $e - $l\`s \\
 	\`expr $3 - $2 + $e + $l + $e\`s
 sox "$1" part1.au trim 0 \`expr $4 + $e\`s
@@ -1164,7 +1078,7 @@
 In this case,
 .I excess
 would typically be an number of seconds, and
-.I leaway
+.I leeway
 should be set to zero.
 .TP
 \fBstat\fR [\fB\-s \fIn\fR] [\fB\-rms\fR] [\fB\-freq\fR] [\fB\-v\fR] [\fB\-d\fR]
@@ -1192,7 +1106,8 @@
 option is used to scale the input data by a given factor.  The default value
 of
 .I n
-is the max value of a signed long variable (0x7fffffff).  Internal effects
+is the maximum value of a signed long integer (7fffffff in hexadecimal).
+Internal effects
 always work with signed long PCM data and so the value should relate to this
 fact.
 .SP
@@ -1461,7 +1376,7 @@
 for a dynamic-range compression/expansion/limiting effect.
 .SS Deprecated Effects
 The following effects have been renamed or have their functionality
-included in another effect.  They continue to work in this version of
+included in another effect; they continue to work in this version of
 SoX but may be removed in future.
 .TP
 \fBpitch \fIshift\fR [\fIwidth interpolate fade\fR]
@@ -1470,7 +1385,7 @@
 .B key
 effect with
 .I search
-set to zero, so its results are comparitively poor; 
+set to zero, so its results are comparatively poor; 
 it is retained for backwards compatibility only.
 .SP
 Change by cross-fading shifted samples.
@@ -1486,6 +1401,145 @@
 option, can be \fBcos\fR, \fBhamming\fR, \fBlinear\fR or
 \fBtrapezoid\fR; the default is \fBcos\fR.
 .TP
+\fBpolyphase\fR [\fB\-w nut\fR\^|\^\fBham\fR] [\fB\-width \fIn\fR] [\fB\-cut-off \fIc\fR]
+Change the sampling rate using `polyphase interpolation', a DSP algorithm.
+\fBpolyphase\fR copes with only certain rational fraction resampling ratios,
+and, compared with the \fBrate\fR effect, is generally slow, memory intensive,
+and has poorer stop-band rejection.
+.SP
+If the \fB\-w\fR parameter is \fBnut\fR, then a Nuttall (~90 dB
+stop-band) window will be used; \fBham\fR selects a Hamming (~43
+dB stop-band) window.  The default is Nuttall.
+.SP
+The \fB\-width\fR parameter specifies the (approximate) width of the filter. The default is 1024 samples, which produces reasonable results.
+.SP
+The \fB\-cut-off\fR value (\fIc\fR) specifies the filter cut-off frequency in terms of fraction of
+frequency bandwidth, also know as the Nyquist frequency.  See
+the \fBresample\fR effect for
+further information on Nyquist frequency.  If up-sampling, then this is the
+fraction of the original signal
+that should go through.  If down-sampling, this is the fraction of the
+signal left after down-sampling.  The default is 0\*d95.
+.SP
+See also
+.BR rate ,
+.B rabbit
+and
+.B resample
+for other sample-rate changing effects.
+.TP
+\fBrabbit\fR [\fB\-c0\fR\^|\^\fB\-c1\fR\^|\^\fB\-c2\fR\^|\^\fB\-c3\fR\^|\^\fB\-c4\fR]
+Change the sampling rate using libsamplerate, also known as `Secret Rabbit
+Code'.  This effect is optional and, due to licence issues,
+is not included in all versions of SoX.
+Compared with the \fBrate\fR effect, \fBrabbit\fR is very slow.
+.SP
+See http://www.mega-nerd.com/SRC for details of the algorithms.  Algorithms
+0 through 2 are progressively faster and lower quality versions of the
+sinc algorithm; the default is \fB\-c0\fR.
+Algorithm 3 is zero-order hold, and 4 is linear interpolation.
+.SP
+See also
+.BR rate ,
+.B polyphase
+and
+.B resample
+for other sample-rate changing effects, and see
+\fBresample\fR for more discussion of resampling.
+.TP
+\fBresample\fR [\fB\-qs\fR\^|\^\fB\-q\fR\^|\^\fB\-ql\fR] [\fIrolloff\fR [\fIbeta\fR]]
+Change the sampling rate using simulated analog filtration.
+Compared with the \fBrate\fR effect, \fBresample\fR is slow, and has poorer
+stop-band rejection.
+Only the low quality option works with all resampling ratios.
+.SP
+By default, linear interpolation of the filter coefficients is used,
+with a window width about 45 samples at the lower of the two rates.
+This gives an accuracy of about 16 bits, but insufficient stop-band rejection
+in the case that you want to have roll-off greater than about 0\*d8 of
+the Nyquist frequency.
+.SP
+The \fB\-q*\fR options will change the default values for roll-off and beta
+as well as use quadratic interpolation of filter
+coefficients, resulting in about 24 bits precision.
+The \fB\-qs\fR, \fB\-q\fR, or \fB\-ql\fR options specify increased accuracy
+at the cost of lower execution speed.  It is optional to specify
+roll-off and beta parameters when using the \fB\-q*\fR options.
+.SP
+Following is a table of the reasonable defaults which are built-in to
+SoX:
+.SP
+.TS
+center box;
+cB cB cB cB cB
+c c n c c
+cB c n c c.
+Option	Window	Roll-off	Beta	Interpolation
+(none)	45	0\*d80	16	linear
+\-qs	45	0\*d80	16	quadratic
+\-q	75	0\*d875	16	quadratic
+\-ql	149	0\*d94	16	quadratic
+.TE
+.DT
+.SP
+\fB\-qs\fR, \fB\-q\fR, or \fB\-ql\fR use window lengths of 45, 75, or 149
+samples, respectively, at the lower sample-rate of the two files.
+This means progressively sharper stop-band rejection, at proportionally
+slower execution times.
+.SP
+\fIrolloff\fR refers to the cut-off frequency of the
+low pass filter and is given in terms of the
+Nyquist frequency for the lower sample rate.  rolloff therefore should
+be something between 0 and 1, in practise 0\*d8\-0\*d95.  The defaults are
+indicated above.
+.SP
+The \fINyquist frequency\fR is equal to half the sample rate.  Logically,
+this is because the A/D converter needs at least 2 samples to detect 1
+cycle at the Nyquist frequency.  Frequencies higher then the Nyquist
+will actually appear as lower frequencies to the A/D converter and
+is called aliasing.  Normally, A/D converts run the signal through
+a lowpass filter first to avoid these problems.
+.SP
+Similar problems will happen in software when reducing the sample rate of
+an audio file (frequencies above the new Nyquist frequency can be aliased
+to lower frequencies).  Therefore, a good resample effect
+will remove all frequency information above the new Nyquist frequency.
+.SP
+The \fIrolloff\fR refers to how close to the Nyquist frequency this cut-off
+is, with closer being better.  When increasing the sample rate of an
+audio file you would not expect to have any frequencies exist that are
+past the original Nyquist frequency.  Because of resampling properties, it
+is common to have aliasing artifacts created above the old
+Nyquist frequency.  In that case the \fIrolloff\fR refers to how close
+to the original Nyquist frequency to use a highpass filter to remove
+these artifacts, with closer also being better.
+.SP
+The \fIbeta\fR, if unspecified, defaults to 16.  This selects a Kaiser window.
+You can select a Nuttall window by specifying anything \(<= 2 here.
+For more discussion of beta, look under the \fBfilter\fR effect.
+.SP
+Default parameters are, as indicated above, Kaiser window of length 45,
+roll-off 0\*d80, beta 16, linear interpolation.
+.SP
+Note: \fB\-qs\fR is only slightly slower, but more accurate for
+16-bit or higher precision.
+.SP
+Note: In many cases of up-sampling, no interpolation is needed,
+as exact filter coefficients can be computed in a reasonable amount of space.
+To be precise, this is done when both input-rate < output-rate, and
+output-rate \(di gcd(input-rate, output-rate) \(<= 511.
+.SP
+See also
+.BR rate ,
+.B polyphase
+and
+.B rabbit
+for other sample-rate changing effects.
+There is a detailed analysis of
+\fBresample\fR and \fBpolyphase\fR at
+http://leute.server.de/wilde/resample.html; see \fBrabbit\fR for a
+pointer to its own documentation.
+.TP
 \fBstretch \fIfactor\fR [\fIwindow fade shift fading\fR]
 Change the audio duration (but not its pitch).
 This effect is equivalent to the
@@ -1492,7 +1546,7 @@
 .B tempo
 effect with (\fIfactor\fR inverted and)
 .I search
-set to zero, so its results are comparitively poor; 
+set to zero, so its results are comparatively poor; 
 it is retained for backwards compatibility only.
 .SP
 .I factor
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -19,16 +19,16 @@
 
 # Format with: !xargs echo|tr ' ' '\n'|sort|column|expand|sed 's/^/  /'
 set(effects_srcs
-  biquad          echo            noiseprof       repeat          swap
-  biquads         echos           noisered        resample        synth
-  chorus          fade            normalise       reverb          tempo
-  compand         FFT             output          reverse         tremolo
-  compandt        filter          pad             silence         trim
-  contrast        flanger         pan             skeleff         vol
-  dcshift         input           phaser          speed
-  delay           key             pitch           splice
-  dither          mcompand        polyphas        stat
-  earwax          mixer           remix           stretch
+  biquad          echo            mixer           rate            stat
+  biquads         echos           noiseprof       remix           stretch
+  chorus          fade            noisered        repeat          swap
+  compand         FFT             normalise       resample        synth
+  compandt        fft4g           output          reverb          tempo
+  contrast        filter          pad             reverse         tremolo
+  dcshift         flanger         pan             silence         trim
+  delay           input           phaser          skeleff         vol
+  dither          key             pitch           speed
+  earwax          mcompand        polyphas        splice
 )
 set(formats_srcs
   8svx            cvsd-fmt        ima-fmt         s2-fmt          u2-fmt
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -220,14 +220,18 @@
 # libsfx - effects library #
 ############################
 
-libsfx_la_SOURCES = band.h biquad.c biquad.h biquads.c chorus.c compand.c \
-	  compandt.c compandt.h dcshift.c dither.c earwax.c echo.c echos.c \
-	  effects.c effects.h fade.c FFT.c FFT.h fifo.h filter.c flanger.c key.c \
-	  ladspa.c mcompand.c mixer.c noiseprof.c noisered.c noisered.h pad.c \
-	  pan.c phaser.c pitch.c polyphas.c rabbit.c remix.c repeat.c \
-	  resample.c reverb.c reverse.c silence.c skeleff.c speed.c	\
-	  splice.c stat.c stretch.c swap.c synth.c tempo.c tremolo.c trim.c \
-	  vol.c normalise.c delay.c contrast.c effects_i.c input.c output.c
+libsfx_la_SOURCES = \
+	band.h biquad.c biquad.h biquads.c chorus.c compand.c compandt.c \
+	compandt.h contrast.c dcshift.c delay.c dither.c earwax.c echo.c \
+	echos.c effects.c effects.h effects_i.c fade.c fft4g.c fft4g.h FFT.c \
+	FFT.h fifo.h filter.c flanger.c input.c key.c ladspa.c mcompand.c \
+	mixer.c noiseprof.c noisered.c noisered.h normalise.c output.c pad.c \
+	pan.c phaser.c pitch.c polyphas.c rabbit.c rate.c rate_coefs.h \
+	rate_filters.h rate_half_fir.h rate_poly_fir0.h rate_poly_fir.h \
+	remix.c repeat.c resample.c reverb.c reverse.c silence.c skeleff.c \
+	speed.c	splice.c stat.c stretch.c swap.c synth.c tempo.c tremolo.c \
+	trim.c vol.c
+
 libsfx_la_CFLAGS = @SAMPLERATE_CFLAGS@
 libsfx_la_LIBADD = @SAMPLERATE_LIBS@ libsox.la
 
--- a/src/effects.h
+++ b/src/effects.h
@@ -57,6 +57,7 @@
 #ifdef HAVE_SAMPLERATE_H
   EFFECT(rabbit)
 #endif
+  EFFECT(rate)
   EFFECT(remix)
   EFFECT(repeat)
   EFFECT(resample)
--- /dev/null
+++ b/src/fft4g.c
@@ -1,0 +1,1335 @@
+/* Copyright Takuya OOURA, 1996-2001.
+
+You may use, copy, modify and distribute this code for any
+purpose (include commercial use) and without fee.  Please
+refer to this package when you modify this code.
+
+Package home:  http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+
+Fast Fourier/Cosine/Sine Transform
+    dimension   :one
+    data length :power of 2
+    decimation  :frequency
+    radix       :4, 2
+    data        :inplace
+    table       :use
+functions
+    cdft: Complex Discrete Fourier Transform
+    rdft: Real Discrete Fourier Transform
+    ddct: Discrete Cosine Transform
+    ddst: Discrete Sine Transform
+    dfct: Cosine Transform of RDFT (Real Symmetric DFT)
+    dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
+function prototypes
+    void cdft(int, int, double *, int *, double *);
+    void rdft(int, int, double *, int *, double *);
+    void ddct(int, int, double *, int *, double *);
+    void ddst(int, int, double *, int *, double *);
+    void dfct(int, double *, double *, int *, double *);
+    void dfst(int, double *, double *, int *, double *);
+
+
+-------- Complex DFT (Discrete Fourier Transform) --------
+    [definition]
+        <case1>
+            X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
+        <case2>
+            X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
+        (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            cdft(2*n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            cdft(2*n, -1, a, ip, w);
+    [parameters]
+        2*n            :data length (int)
+                        n >= 1, n = power of 2
+        a[0...2*n-1]   :input/output data (double *)
+                        input data
+                            a[2*j] = Re(x[j]), 
+                            a[2*j+1] = Im(x[j]), 0<=j<n
+                        output data
+                            a[2*k] = Re(X[k]), 
+                            a[2*k+1] = Im(X[k]), 0<=k<n
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n)
+                        strictly, 
+                        length of ip >= 
+                            2+(1<<(int)(log(n+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n/2-1]   :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of 
+            cdft(2*n, -1, a, ip, w);
+        is 
+            cdft(2*n, 1, a, ip, w);
+            for (j = 0; j <= 2 * n - 1; j++) {
+                a[j] *= 1.0 / n;
+            }
+        .
+
+
+-------- Real DFT / Inverse of Real DFT --------
+    [definition]
+        <case1> RDFT
+            R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
+            I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
+        <case2> IRDFT (excluding scale)
+            a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + 
+                   sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + 
+                   sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            rdft(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            rdft(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (double *)
+                        <case1>
+                            output data
+                                a[2*k] = R[k], 0<=k<n/2
+                                a[2*k+1] = I[k], 0<k<n/2
+                                a[1] = R[n/2]
+                        <case2>
+                            input data
+                                a[2*j] = R[j], 0<=j<n/2
+                                a[2*j+1] = I[j], 0<j<n/2
+                                a[1] = R[n/2]
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly, 
+                        length of ip >= 
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n/2-1]   :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of 
+            rdft(n, 1, a, ip, w);
+        is 
+            rdft(n, -1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
+    [definition]
+        <case1> IDCT (excluding scale)
+            C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
+        <case2> DCT
+            C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            ddct(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            ddct(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (double *)
+                        output data
+                            a[k] = C[k], 0<=k<n
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly, 
+                        length of ip >= 
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/4-1] :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of 
+            ddct(n, -1, a, ip, w);
+        is 
+            a[0] *= 0.5;
+            ddct(n, 1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- DST (Discrete Sine Transform) / Inverse of DST --------
+    [definition]
+        <case1> IDST (excluding scale)
+            S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
+        <case2> DST
+            S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            ddst(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            ddst(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (double *)
+                        <case1>
+                            input data
+                                a[j] = A[j], 0<j<n
+                                a[0] = A[n]
+                            output data
+                                a[k] = S[k], 0<=k<n
+                        <case2>
+                            output data
+                                a[k] = S[k], 0<k<n
+                                a[0] = S[n]
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly, 
+                        length of ip >= 
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/4-1] :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of 
+            ddst(n, -1, a, ip, w);
+        is 
+            a[0] *= 0.5;
+            ddst(n, 1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- Cosine Transform of RDFT (Real Symmetric DFT) --------
+    [definition]
+        C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
+    [usage]
+        ip[0] = 0; // first time only
+        dfct(n, a, t, ip, w);
+    [parameters]
+        n              :data length - 1 (int)
+                        n >= 2, n = power of 2
+        a[0...n]       :input/output data (double *)
+                        output data
+                            a[k] = C[k], 0<=k<=n
+        t[0...n/2]     :work area (double *)
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/4)
+                        strictly, 
+                        length of ip >= 
+                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/8-1] :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of 
+            a[0] *= 0.5;
+            a[n] *= 0.5;
+            dfct(n, a, t, ip, w);
+        is 
+            a[0] *= 0.5;
+            a[n] *= 0.5;
+            dfct(n, a, t, ip, w);
+            for (j = 0; j <= n; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
+    [definition]
+        S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
+    [usage]
+        ip[0] = 0; // first time only
+        dfst(n, a, t, ip, w);
+    [parameters]
+        n              :data length + 1 (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (double *)
+                        output data
+                            a[k] = S[k], 0<k<n
+                        (a[0] is used for work area)
+        t[0...n/2-1]   :work area (double *)
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/4)
+                        strictly, 
+                        length of ip >= 
+                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/8-1] :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of 
+            dfst(n, a, t, ip, w);
+        is 
+            dfst(n, a, t, ip, w);
+            for (j = 1; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+Appendix :
+    The cos/sin table is recalculated when the larger table required.
+    w[] and ip[] are compatible with all routines.
+*/
+
+
+#include <math.h>
+#include "fft4g.h"
+
+#ifdef FFT4G_FLOAT
+  #define double float
+  #define sin   sinf
+  #define cos   cosf
+  #define atan  atanf
+
+  #define cdft  cdft_f
+  #define rdft  rdft_f
+  #define ddct  ddct_f
+  #define ddst  ddst_f
+  #define dfct  dfct_f
+  #define dfst  dfst_f
+#endif
+
+static void bitrv2conj(int n, int *ip, double *a);
+static void bitrv2(int n, int *ip, double *a);
+static void cft1st(int n, double *a, double *w);
+static void cftbsub(int n, double *a, double *w);
+static void cftfsub(int n, double *a, double *w);
+static void cftmdl(int n, int l, double *a, double *w);
+static void dctsub(int n, double *a, int nc, double *c);
+static void dstsub(int n, double *a, int nc, double *c);
+static void makect(int nc, int *ip, double *c);
+static void makewt(int nw, int *ip, double *w);
+static void rftbsub(int n, double *a, int nc, double *c);
+static void rftfsub(int n, double *a, int nc, double *c);
+
+
+void cdft(int n, int isgn, double *a, int *ip, double *w)
+{
+    if (n > (ip[0] << 2)) {
+        makewt(n >> 2, ip, w);
+    }
+    if (n > 4) {
+        if (isgn >= 0) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+        } else {
+            bitrv2conj(n, ip + 2, a);
+            cftbsub(n, a, w);
+        }
+    } else if (n == 4) {
+        cftfsub(n, a, w);
+    }
+}
+
+
+void rdft(int n, int isgn, double *a, int *ip, double *w)
+{
+    int nw, nc;
+    double xi;
+    
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 2)) {
+        nc = n >> 2;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn >= 0) {
+        if (n > 4) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+        xi = a[0] - a[1];
+        a[0] += a[1];
+        a[1] = xi;
+    } else {
+        a[1] = 0.5 * (a[0] - a[1]);
+        a[0] -= a[1];
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            bitrv2(n, ip + 2, a);
+            cftbsub(n, a, w);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+    }
+}
+
+
+void ddct(int n, int isgn, double *a, int *ip, double *w)
+{
+    int j, nw, nc;
+    double xr;
+    
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > nc) {
+        nc = n;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn < 0) {
+        xr = a[n - 1];
+        for (j = n - 2; j >= 2; j -= 2) {
+            a[j + 1] = a[j] - a[j - 1];
+            a[j] += a[j - 1];
+        }
+        a[1] = a[0] - xr;
+        a[0] += xr;
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            bitrv2(n, ip + 2, a);
+            cftbsub(n, a, w);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+    }
+    dctsub(n, a, nc, w + nw);
+    if (isgn >= 0) {
+        if (n > 4) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+        xr = a[0] - a[1];
+        a[0] += a[1];
+        for (j = 2; j < n; j += 2) {
+            a[j - 1] = a[j] - a[j + 1];
+            a[j] += a[j + 1];
+        }
+        a[n - 1] = xr;
+    }
+}
+
+
+void ddst(int n, int isgn, double *a, int *ip, double *w)
+{
+    int j, nw, nc;
+    double xr;
+    
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > nc) {
+        nc = n;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn < 0) {
+        xr = a[n - 1];
+        for (j = n - 2; j >= 2; j -= 2) {
+            a[j + 1] = -a[j] - a[j - 1];
+            a[j] -= a[j - 1];
+        }
+        a[1] = a[0] + xr;
+        a[0] -= xr;
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            bitrv2(n, ip + 2, a);
+            cftbsub(n, a, w);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+    }
+    dstsub(n, a, nc, w + nw);
+    if (isgn >= 0) {
+        if (n > 4) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+        xr = a[0] - a[1];
+        a[0] += a[1];
+        for (j = 2; j < n; j += 2) {
+            a[j - 1] = -a[j] - a[j + 1];
+            a[j] -= a[j + 1];
+        }
+        a[n - 1] = -xr;
+    }
+}
+
+
+void dfct(int n, double *a, double *t, int *ip, double *w)
+{
+    int j, k, l, m, mh, nw, nc;
+    double xr, xi, yr, yi;
+    
+    nw = ip[0];
+    if (n > (nw << 3)) {
+        nw = n >> 3;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 1)) {
+        nc = n >> 1;
+        makect(nc, ip, w + nw);
+    }
+    m = n >> 1;
+    yi = a[m];
+    xi = a[0] + a[n];
+    a[0] -= a[n];
+    t[0] = xi - yi;
+    t[m] = xi + yi;
+    if (n > 2) {
+        mh = m >> 1;
+        for (j = 1; j < mh; j++) {
+            k = m - j;
+            xr = a[j] - a[n - j];
+            xi = a[j] + a[n - j];
+            yr = a[k] - a[n - k];
+            yi = a[k] + a[n - k];
+            a[j] = xr;
+            a[k] = yr;
+            t[j] = xi - yi;
+            t[k] = xi + yi;
+        }
+        t[mh] = a[mh] + a[n - mh];
+        a[mh] -= a[n - mh];
+        dctsub(m, a, nc, w + nw);
+        if (m > 4) {
+            bitrv2(m, ip + 2, a);
+            cftfsub(m, a, w);
+            rftfsub(m, a, nc, w + nw);
+        } else if (m == 4) {
+            cftfsub(m, a, w);
+        }
+        a[n - 1] = a[0] - a[1];
+        a[1] = a[0] + a[1];
+        for (j = m - 2; j >= 2; j -= 2) {
+            a[2 * j + 1] = a[j] + a[j + 1];
+            a[2 * j - 1] = a[j] - a[j + 1];
+        }
+        l = 2;
+        m = mh;
+        while (m >= 2) {
+            dctsub(m, t, nc, w + nw);
+            if (m > 4) {
+                bitrv2(m, ip + 2, t);
+                cftfsub(m, t, w);
+                rftfsub(m, t, nc, w + nw);
+            } else if (m == 4) {
+                cftfsub(m, t, w);
+            }
+            a[n - l] = t[0] - t[1];
+            a[l] = t[0] + t[1];
+            k = 0;
+            for (j = 2; j < m; j += 2) {
+                k += l << 2;
+                a[k - l] = t[j] - t[j + 1];
+                a[k + l] = t[j] + t[j + 1];
+            }
+            l <<= 1;
+            mh = m >> 1;
+            for (j = 0; j < mh; j++) {
+                k = m - j;
+                t[j] = t[m + k] - t[m + j];
+                t[k] = t[m + k] + t[m + j];
+            }
+            t[mh] = t[m + mh];
+            m = mh;
+        }
+        a[l] = t[0];
+        a[n] = t[2] - t[1];
+        a[0] = t[2] + t[1];
+    } else {
+        a[1] = a[0];
+        a[2] = t[0];
+        a[0] = t[1];
+    }
+}
+
+
+void dfst(int n, double *a, double *t, int *ip, double *w)
+{
+    int j, k, l, m, mh, nw, nc;
+    double xr, xi, yr, yi;
+    
+    nw = ip[0];
+    if (n > (nw << 3)) {
+        nw = n >> 3;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 1)) {
+        nc = n >> 1;
+        makect(nc, ip, w + nw);
+    }
+    if (n > 2) {
+        m = n >> 1;
+        mh = m >> 1;
+        for (j = 1; j < mh; j++) {
+            k = m - j;
+            xr = a[j] + a[n - j];
+            xi = a[j] - a[n - j];
+            yr = a[k] + a[n - k];
+            yi = a[k] - a[n - k];
+            a[j] = xr;
+            a[k] = yr;
+            t[j] = xi + yi;
+            t[k] = xi - yi;
+        }
+        t[0] = a[mh] - a[n - mh];
+        a[mh] += a[n - mh];
+        a[0] = a[m];
+        dstsub(m, a, nc, w + nw);
+        if (m > 4) {
+            bitrv2(m, ip + 2, a);
+            cftfsub(m, a, w);
+            rftfsub(m, a, nc, w + nw);
+        } else if (m == 4) {
+            cftfsub(m, a, w);
+        }
+        a[n - 1] = a[1] - a[0];
+        a[1] = a[0] + a[1];
+        for (j = m - 2; j >= 2; j -= 2) {
+            a[2 * j + 1] = a[j] - a[j + 1];
+            a[2 * j - 1] = -a[j] - a[j + 1];
+        }
+        l = 2;
+        m = mh;
+        while (m >= 2) {
+            dstsub(m, t, nc, w + nw);
+            if (m > 4) {
+                bitrv2(m, ip + 2, t);
+                cftfsub(m, t, w);
+                rftfsub(m, t, nc, w + nw);
+            } else if (m == 4) {
+                cftfsub(m, t, w);
+            }
+            a[n - l] = t[1] - t[0];
+            a[l] = t[0] + t[1];
+            k = 0;
+            for (j = 2; j < m; j += 2) {
+                k += l << 2;
+                a[k - l] = -t[j] - t[j + 1];
+                a[k + l] = t[j] - t[j + 1];
+            }
+            l <<= 1;
+            mh = m >> 1;
+            for (j = 1; j < mh; j++) {
+                k = m - j;
+                t[j] = t[m + k] + t[m + j];
+                t[k] = t[m + k] - t[m + j];
+            }
+            t[0] = t[m + mh];
+            m = mh;
+        }
+        a[l] = t[0];
+    }
+    a[0] = 0;
+}
+
+
+/* -------- initializing routines -------- */
+
+
+static void makewt(int nw, int *ip, double *w)
+{
+    int j, nwh;
+    double delta, x, y;
+    
+    ip[0] = nw;
+    ip[1] = 1;
+    if (nw > 2) {
+        nwh = nw >> 1;
+        delta = atan(1.0) / nwh;
+        w[0] = 1;
+        w[1] = 0;
+        w[nwh] = cos(delta * nwh);
+        w[nwh + 1] = w[nwh];
+        if (nwh > 2) {
+            for (j = 2; j < nwh; j += 2) {
+                x = cos(delta * j);
+                y = sin(delta * j);
+                w[j] = x;
+                w[j + 1] = y;
+                w[nw - j] = y;
+                w[nw - j + 1] = x;
+            }
+            bitrv2(nw, ip + 2, w);
+        }
+    }
+}
+
+
+static void makect(int nc, int *ip, double *c)
+{
+    int j, nch;
+    double delta;
+    
+    ip[1] = nc;
+    if (nc > 1) {
+        nch = nc >> 1;
+        delta = atan(1.0) / nch;
+        c[0] = cos(delta * nch);
+        c[nch] = 0.5 * c[0];
+        for (j = 1; j < nch; j++) {
+            c[j] = 0.5 * cos(delta * j);
+            c[nc - j] = 0.5 * sin(delta * j);
+        }
+    }
+}
+
+
+/* -------- child routines -------- */
+
+
+static void bitrv2(int n, int *ip, double *a)
+{
+    int j, j1, k, k1, l, m, m2;
+    double xr, xi, yr, yi;
+    
+    ip[0] = 0;
+    l = n;
+    m = 1;
+    while ((m << 3) < l) {
+        l >>= 1;
+        for (j = 0; j < m; j++) {
+            ip[m + j] = ip[j] + l;
+        }
+        m <<= 1;
+    }
+    m2 = 2 * m;
+    if ((m << 3) == l) {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 -= m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            j1 = 2 * k + m2 + ip[k];
+            k1 = j1 + m2;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+        }
+    } else {
+        for (k = 1; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+        }
+    }
+}
+
+
+static void bitrv2conj(int n, int *ip, double *a)
+{
+    int j, j1, k, k1, l, m, m2;
+    double xr, xi, yr, yi;
+    
+    ip[0] = 0;
+    l = n;
+    m = 1;
+    while ((m << 3) < l) {
+        l >>= 1;
+        for (j = 0; j < m; j++) {
+            ip[m + j] = ip[j] + l;
+        }
+        m <<= 1;
+    }
+    m2 = 2 * m;
+    if ((m << 3) == l) {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 -= m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 2 * k + ip[k];
+            a[k1 + 1] = -a[k1 + 1];
+            j1 = k1 + m2;
+            k1 = j1 + m2;
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            k1 += m2;
+            a[k1 + 1] = -a[k1 + 1];
+        }
+    } else {
+        a[1] = -a[1];
+        a[m2 + 1] = -a[m2 + 1];
+        for (k = 1; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 2 * k + ip[k];
+            a[k1 + 1] = -a[k1 + 1];
+            a[k1 + m2 + 1] = -a[k1 + m2 + 1];
+        }
+    }
+}
+
+
+static void cftfsub(int n, double *a, double *w)
+{
+    int j, j1, j2, j3, l;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+    
+    l = 2;
+    if (n > 8) {
+        cft1st(n, a, w);
+        l = 8;
+        while ((l << 2) < n) {
+            cftmdl(n, l, a, w);
+            l <<= 2;
+        }
+    }
+    if ((l << 2) == n) {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            a[j2] = x0r - x2r;
+            a[j2 + 1] = x0i - x2i;
+            a[j1] = x1r - x3i;
+            a[j1 + 1] = x1i + x3r;
+            a[j3] = x1r + x3i;
+            a[j3 + 1] = x1i - x3r;
+        }
+    } else {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            x0r = a[j] - a[j1];
+            x0i = a[j + 1] - a[j1 + 1];
+            a[j] += a[j1];
+            a[j + 1] += a[j1 + 1];
+            a[j1] = x0r;
+            a[j1 + 1] = x0i;
+        }
+    }
+}
+
+
+static void cftbsub(int n, double *a, double *w)
+{
+    int j, j1, j2, j3, l;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+    
+    l = 2;
+    if (n > 8) {
+        cft1st(n, a, w);
+        l = 8;
+        while ((l << 2) < n) {
+            cftmdl(n, l, a, w);
+            l <<= 2;
+        }
+    }
+    if ((l << 2) == n) {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = -a[j + 1] - a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = -a[j + 1] + a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i - x2i;
+            a[j2] = x0r - x2r;
+            a[j2 + 1] = x0i + x2i;
+            a[j1] = x1r - x3i;
+            a[j1 + 1] = x1i - x3r;
+            a[j3] = x1r + x3i;
+            a[j3 + 1] = x1i + x3r;
+        }
+    } else {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            x0r = a[j] - a[j1];
+            x0i = -a[j + 1] + a[j1 + 1];
+            a[j] += a[j1];
+            a[j + 1] = -a[j + 1] - a[j1 + 1];
+            a[j1] = x0r;
+            a[j1 + 1] = x0i;
+        }
+    }
+}
+
+
+static void cft1st(int n, double *a, double *w)
+{
+    int j, k1, k2;
+    double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+    
+    x0r = a[0] + a[2];
+    x0i = a[1] + a[3];
+    x1r = a[0] - a[2];
+    x1i = a[1] - a[3];
+    x2r = a[4] + a[6];
+    x2i = a[5] + a[7];
+    x3r = a[4] - a[6];
+    x3i = a[5] - a[7];
+    a[0] = x0r + x2r;
+    a[1] = x0i + x2i;
+    a[4] = x0r - x2r;
+    a[5] = x0i - x2i;
+    a[2] = x1r - x3i;
+    a[3] = x1i + x3r;
+    a[6] = x1r + x3i;
+    a[7] = x1i - x3r;
+    wk1r = w[2];
+    x0r = a[8] + a[10];
+    x0i = a[9] + a[11];
+    x1r = a[8] - a[10];
+    x1i = a[9] - a[11];
+    x2r = a[12] + a[14];
+    x2i = a[13] + a[15];
+    x3r = a[12] - a[14];
+    x3i = a[13] - a[15];
+    a[8] = x0r + x2r;
+    a[9] = x0i + x2i;
+    a[12] = x2i - x0i;
+    a[13] = x0r - x2r;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[10] = wk1r * (x0r - x0i);
+    a[11] = wk1r * (x0r + x0i);
+    x0r = x3i + x1r;
+    x0i = x3r - x1i;
+    a[14] = wk1r * (x0i - x0r);
+    a[15] = wk1r * (x0i + x0r);
+    k1 = 0;
+    for (j = 16; j < n; j += 16) {
+        k1 += 2;
+        k2 = 2 * k1;
+        wk2r = w[k1];
+        wk2i = w[k1 + 1];
+        wk1r = w[k2];
+        wk1i = w[k2 + 1];
+        wk3r = wk1r - 2 * wk2i * wk1i;
+        wk3i = 2 * wk2i * wk1r - wk1i;
+        x0r = a[j] + a[j + 2];
+        x0i = a[j + 1] + a[j + 3];
+        x1r = a[j] - a[j + 2];
+        x1i = a[j + 1] - a[j + 3];
+        x2r = a[j + 4] + a[j + 6];
+        x2i = a[j + 5] + a[j + 7];
+        x3r = a[j + 4] - a[j + 6];
+        x3i = a[j + 5] - a[j + 7];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        x0r -= x2r;
+        x0i -= x2i;
+        a[j + 4] = wk2r * x0r - wk2i * x0i;
+        a[j + 5] = wk2r * x0i + wk2i * x0r;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j + 2] = wk1r * x0r - wk1i * x0i;
+        a[j + 3] = wk1r * x0i + wk1i * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j + 6] = wk3r * x0r - wk3i * x0i;
+        a[j + 7] = wk3r * x0i + wk3i * x0r;
+        wk1r = w[k2 + 2];
+        wk1i = w[k2 + 3];
+        wk3r = wk1r - 2 * wk2r * wk1i;
+        wk3i = 2 * wk2r * wk1r - wk1i;
+        x0r = a[j + 8] + a[j + 10];
+        x0i = a[j + 9] + a[j + 11];
+        x1r = a[j + 8] - a[j + 10];
+        x1i = a[j + 9] - a[j + 11];
+        x2r = a[j + 12] + a[j + 14];
+        x2i = a[j + 13] + a[j + 15];
+        x3r = a[j + 12] - a[j + 14];
+        x3i = a[j + 13] - a[j + 15];
+        a[j + 8] = x0r + x2r;
+        a[j + 9] = x0i + x2i;
+        x0r -= x2r;
+        x0i -= x2i;
+        a[j + 12] = -wk2i * x0r - wk2r * x0i;
+        a[j + 13] = -wk2i * x0i + wk2r * x0r;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j + 10] = wk1r * x0r - wk1i * x0i;
+        a[j + 11] = wk1r * x0i + wk1i * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j + 14] = wk3r * x0r - wk3i * x0i;
+        a[j + 15] = wk3r * x0i + wk3i * x0r;
+    }
+}
+
+
+static void cftmdl(int n, int l, double *a, double *w)
+{
+    int j, j1, j2, j3, k, k1, k2, m, m2;
+    double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+    
+    m = l << 2;
+    for (j = 0; j < l; j += 2) {
+        j1 = j + l;
+        j2 = j1 + l;
+        j3 = j2 + l;
+        x0r = a[j] + a[j1];
+        x0i = a[j + 1] + a[j1 + 1];
+        x1r = a[j] - a[j1];
+        x1i = a[j + 1] - a[j1 + 1];
+        x2r = a[j2] + a[j3];
+        x2i = a[j2 + 1] + a[j3 + 1];
+        x3r = a[j2] - a[j3];
+        x3i = a[j2 + 1] - a[j3 + 1];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        a[j2] = x0r - x2r;
+        a[j2 + 1] = x0i - x2i;
+        a[j1] = x1r - x3i;
+        a[j1 + 1] = x1i + x3r;
+        a[j3] = x1r + x3i;
+        a[j3 + 1] = x1i - x3r;
+    }
+    wk1r = w[2];
+    for (j = m; j < l + m; j += 2) {
+        j1 = j + l;
+        j2 = j1 + l;
+        j3 = j2 + l;
+        x0r = a[j] + a[j1];
+        x0i = a[j + 1] + a[j1 + 1];
+        x1r = a[j] - a[j1];
+        x1i = a[j + 1] - a[j1 + 1];
+        x2r = a[j2] + a[j3];
+        x2i = a[j2 + 1] + a[j3 + 1];
+        x3r = a[j2] - a[j3];
+        x3i = a[j2 + 1] - a[j3 + 1];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        a[j2] = x2i - x0i;
+        a[j2 + 1] = x0r - x2r;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j1] = wk1r * (x0r - x0i);
+        a[j1 + 1] = wk1r * (x0r + x0i);
+        x0r = x3i + x1r;
+        x0i = x3r - x1i;
+        a[j3] = wk1r * (x0i - x0r);
+        a[j3 + 1] = wk1r * (x0i + x0r);
+    }
+    k1 = 0;
+    m2 = 2 * m;
+    for (k = m2; k < n; k += m2) {
+        k1 += 2;
+        k2 = 2 * k1;
+        wk2r = w[k1];
+        wk2i = w[k1 + 1];
+        wk1r = w[k2];
+        wk1i = w[k2 + 1];
+        wk3r = wk1r - 2 * wk2i * wk1i;
+        wk3i = 2 * wk2i * wk1r - wk1i;
+        for (j = k; j < l + k; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            x0r -= x2r;
+            x0i -= x2i;
+            a[j2] = wk2r * x0r - wk2i * x0i;
+            a[j2 + 1] = wk2r * x0i + wk2i * x0r;
+            x0r = x1r - x3i;
+            x0i = x1i + x3r;
+            a[j1] = wk1r * x0r - wk1i * x0i;
+            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+            x0r = x1r + x3i;
+            x0i = x1i - x3r;
+            a[j3] = wk3r * x0r - wk3i * x0i;
+            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+        }
+        wk1r = w[k2 + 2];
+        wk1i = w[k2 + 3];
+        wk3r = wk1r - 2 * wk2r * wk1i;
+        wk3i = 2 * wk2r * wk1r - wk1i;
+        for (j = k + m; j < l + (k + m); j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            x0r -= x2r;
+            x0i -= x2i;
+            a[j2] = -wk2i * x0r - wk2r * x0i;
+            a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
+            x0r = x1r - x3i;
+            x0i = x1i + x3r;
+            a[j1] = wk1r * x0r - wk1i * x0i;
+            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+            x0r = x1r + x3i;
+            x0i = x1i - x3r;
+            a[j3] = wk3r * x0r - wk3i * x0i;
+            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+        }
+    }
+}
+
+
+static void rftfsub(int n, double *a, int nc, double *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr, xi, yr, yi;
+    
+    m = n >> 1;
+    ks = 2 * nc / m;
+    kk = 0;
+    for (j = 2; j < m; j += 2) {
+        k = n - j;
+        kk += ks;
+        wkr = 0.5 - c[nc - kk];
+        wki = c[kk];
+        xr = a[j] - a[k];
+        xi = a[j + 1] + a[k + 1];
+        yr = wkr * xr - wki * xi;
+        yi = wkr * xi + wki * xr;
+        a[j] -= yr;
+        a[j + 1] -= yi;
+        a[k] += yr;
+        a[k + 1] -= yi;
+    }
+}
+
+
+static void rftbsub(int n, double *a, int nc, double *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr, xi, yr, yi;
+    
+    a[1] = -a[1];
+    m = n >> 1;
+    ks = 2 * nc / m;
+    kk = 0;
+    for (j = 2; j < m; j += 2) {
+        k = n - j;
+        kk += ks;
+        wkr = 0.5 - c[nc - kk];
+        wki = c[kk];
+        xr = a[j] - a[k];
+        xi = a[j + 1] + a[k + 1];
+        yr = wkr * xr + wki * xi;
+        yi = wkr * xi - wki * xr;
+        a[j] -= yr;
+        a[j + 1] = yi - a[j + 1];
+        a[k] += yr;
+        a[k + 1] = yi - a[k + 1];
+    }
+    a[m + 1] = -a[m + 1];
+}
+
+
+static void dctsub(int n, double *a, int nc, double *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr;
+    
+    m = n >> 1;
+    ks = nc / n;
+    kk = 0;
+    for (j = 1; j < m; j++) {
+        k = n - j;
+        kk += ks;
+        wkr = c[kk] - c[nc - kk];
+        wki = c[kk] + c[nc - kk];
+        xr = wki * a[j] - wkr * a[k];
+        a[j] = wkr * a[j] + wki * a[k];
+        a[k] = xr;
+    }
+    a[m] *= c[0];
+}
+
+
+static void dstsub(int n, double *a, int nc, double *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr;
+    
+    m = n >> 1;
+    ks = nc / n;
+    kk = 0;
+    for (j = 1; j < m; j++) {
+        k = n - j;
+        kk += ks;
+        wkr = c[kk] - c[nc - kk];
+        wki = c[kk] + c[nc - kk];
+        xr = wki * a[k] - wkr * a[j];
+        a[k] = wkr * a[k] + wki * a[j];
+        a[j] = xr;
+    }
+    a[m] *= c[0];
+}
+
--- /dev/null
+++ b/src/fft4g.h
@@ -1,0 +1,28 @@
+/* This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+void cdft(int, int, double *, int *, double *);
+void rdft(int, int, double *, int *, double *);
+void ddct(int, int, double *, int *, double *);
+void ddst(int, int, double *, int *, double *);
+void dfct(int, double *, double *, int *, double *);
+void dfst(int, double *, double *, int *, double *);
+
+void cdft_f(int, int, float *, int *, float *);
+void rdft_f(int, int, float *, int *, float *);
+void ddct_f(int, int, float *, int *, float *);
+void ddst_f(int, int, float *, int *, float *);
+void dfct_f(int, float *, float *, int *, float *);
+void dfst_f(int, float *, float *, int *, float *);
--- /dev/null
+++ b/src/rate.c
@@ -1,0 +1,582 @@
+/* Effect: change sample rate     Copyright (c) 2008 robs@users.sourceforge.net
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/* Based upon the techniques described in `The Quest For The Perfect Resampler'
+ * by Laurent De Soras; http://ldesoras.free.fr/doc/articles/resampler-en.pdf */
+
+#include "sox_i.h"
+#include "fft4g.h"
+#define  FIFO_SIZE_T int
+#include "fifo.h"
+#include <assert.h>
+#include <string.h>
+
+#define  calloc     lsx_calloc
+#define  malloc     lsx_malloc
+#define  raw_coef_t double
+#define  sample_t   double
+#define  TO_SOX     SOX_FLOAT_64BIT_TO_SAMPLE
+#define  FROM_SOX   SOX_SAMPLE_TO_FLOAT_64BIT
+#define  coef(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) coef_p[(fir_len) * ((interp_order) + 1) * (phase_num) + ((interp_order) + 1) * (fir_coef_num) + (interp_order - coef_interp_num)]
+#ifdef   NDEBUG
+#undef   NDEBUG     /* Enable assert always */
+#endif
+
+static sample_t * prepare_coefs(raw_coef_t const * coefs, int num_coefs,
+    int num_phases, int interp_order, int multiplier)
+{
+  int i, j, length = num_coefs * num_phases;
+  sample_t * result = malloc(length * (interp_order + 1) * sizeof(*result));
+  double fm1 = coefs[0], f1 = 0, f2 = 0;
+
+  for (i = num_coefs - 1; i >= 0; --i)
+    for (j = num_phases - 1; j >= 0; --j) {
+      double f0 = fm1, b = 0, c = 0, d = 0; /* = 0 to kill compiler warning */
+      int pos = i * num_phases + j - 1;
+      fm1 = (pos ? coefs[pos - 1] : 0) * multiplier;
+      switch (interp_order) {
+        case 1: b = f1 - f0; break;
+        case 2: b = f1 - (.5 * (f2+f0) - f1) - f0; c = .5 * (f2+f0) - f1; break;
+        case 3: c=.5*(f1+fm1)-f0;d=(1/6.)*(f2-f1+fm1-f0-4*c);b=f1-f0-d-c; break;
+        default: if (interp_order) assert(0);
+      }
+      #define coef_coef(x) \
+        coef(result, interp_order, num_coefs, j, x, num_coefs - 1 - i)
+      coef_coef(0) = f0;
+      if (interp_order > 0) coef_coef(1) = b;
+      if (interp_order > 1) coef_coef(2) = c;
+      if (interp_order > 2) coef_coef(3) = d;
+      #undef coef_coef
+      f2 = f1, f1 = f0;
+    }
+  return result;
+}
+
+typedef struct {
+  int        dft_length, num_taps;
+  sample_t   * coefs;
+} half_band_t;
+
+typedef struct {    /* Data that are shared between channels and filters */
+  sample_t   * poly_fir_coefs;
+  half_band_t half_band[2];    /* [0]: halve; [1]: down/up: halve/double */ 
+  sample_t   * sin_cos_table;  /* For Ooura fft */
+  int        * bit_rev_table;  /* ditto */
+} rate_shared_t;
+
+struct stage;
+typedef void (* stage_fn_t)(struct stage * input, fifo_t * output);
+typedef struct stage {
+  fifo_t     fifo;
+  int        pre;              /* Number of past samples to store */
+  int        pre_post;         /* pre + number of future samples to store */
+  int        preload;          /* Number of zero samples to pre-load the fifo */
+  int        which;            /* Which of the 2 half-band filters to use */
+  stage_fn_t fn;
+  sample_t   out_in_ratio;
+
+  rate_shared_t * shared;
+                               /* For poly_fir & spline: */
+  int       divisor;
+  union {                      /* 32bit.32bit fixed point arithmetic */
+    #if defined(WORDS_BIGENDIAN)
+    struct {int32_t integer; uint32_t fraction;} parts;
+    #else
+    struct {uint32_t fraction; int32_t integer;} parts;
+    #endif
+    int64_t all;
+    #define MULT32 (65536. * 65536.)
+  } at, step;
+} stage_t;
+
+#define stage_occupancy(s) max(0, fifo_occupancy(&(s)->fifo) - (s)->pre_post)
+#define stage_read_p(s) ((sample_t *)fifo_read_ptr(&(s)->fifo) + (s)->pre)
+
+static void cubic_spline(stage_t * p, fifo_t * output_fifo)
+{
+  int i, num_in = stage_occupancy(p), max_num_out = 1 + num_in*p->out_in_ratio;
+  sample_t const * input = stage_read_p(p);
+  sample_t * output = fifo_reserve(output_fifo, max_num_out);
+
+  for (i = 0; p->at.parts.integer < num_in; ++i, p->at.all += p->step.all) {
+    sample_t const * s = input + p->at.parts.integer;
+    sample_t x = p->at.parts.fraction * (1 / MULT32);
+    sample_t b = .5*(s[1]+s[-1])-*s, a = (1/6.)*(s[2]-s[1]+s[-1]-*s-4*b);
+    sample_t c = s[1]-*s-a-b;
+    output[i] = ((a*x + b)*x + c)*x + *s;
+  }
+  assert(max_num_out - i >= 0);
+  fifo_trim_by(output_fifo, max_num_out - i);
+  fifo_read(&p->fifo, p->at.parts.integer, NULL);
+  p->at.parts.integer = 0;
+}
+
+static void half_sample(stage_t * p, fifo_t * output_fifo)
+{
+  sample_t * output;
+  int i, j, num_in = max(0, fifo_occupancy(&p->fifo));
+  rate_shared_t const * s = p->shared;
+  half_band_t const * f = &s->half_band[p->which];
+  int const overlap = f->num_taps - 1;
+
+  while (num_in >= f->dft_length) {
+    sample_t const * input = fifo_read_ptr(&p->fifo);
+    fifo_read(&p->fifo, f->dft_length - overlap, NULL);
+    num_in -= f->dft_length - overlap;
+
+    output = fifo_reserve(output_fifo, f->dft_length);
+    fifo_trim_by(output_fifo, (f->dft_length + overlap) >> 1);
+    memcpy(output, input, f->dft_length * sizeof(*output));
+
+    rdft(f->dft_length, 1, output, s->bit_rev_table, s->sin_cos_table);
+    output[0] *= f->coefs[0];
+    output[1] *= f->coefs[1];
+    for (i = 2; i < f->dft_length; i += 2) {
+      sample_t tmp = output[i];
+      output[i  ] = f->coefs[i  ] * tmp - f->coefs[i+1] * output[i+1];
+      output[i+1] = f->coefs[i+1] * tmp + f->coefs[i  ] * output[i+1];
+    }
+    rdft(f->dft_length, -1, output, s->bit_rev_table, s->sin_cos_table);
+
+    for (j = 1, i = 2; i < f->dft_length - overlap; ++j, i += 2)
+      output[j] = output[i];
+  }
+}
+
+static void double_sample(stage_t * p, fifo_t * output_fifo)
+{
+  sample_t * output;
+  int i, j, num_in = max(0, fifo_occupancy(&p->fifo));
+  rate_shared_t const * s = p->shared;
+  half_band_t const * f = &s->half_band[1];
+  int const overlap = f->num_taps - 1;
+
+  while (num_in > f->dft_length >> 1) {
+    sample_t const * input = fifo_read_ptr(&p->fifo);
+    fifo_read(&p->fifo, (f->dft_length - overlap) >> 1, NULL);
+    num_in -= (f->dft_length - overlap) >> 1;
+
+    output = fifo_reserve(output_fifo, f->dft_length);
+    fifo_trim_by(output_fifo, overlap);
+    for (j = i = 0; i < f->dft_length; ++j, i += 2)
+      output[i] = input[j], output[i+1] = 0;
+
+    rdft(f->dft_length, 1, output, s->bit_rev_table, s->sin_cos_table);
+    output[0] *= f->coefs[0];
+    output[1] *= f->coefs[1];
+    for (i = 2; i < f->dft_length; i += 2) {
+      sample_t tmp = output[i];
+      output[i  ] = f->coefs[i  ] * tmp - f->coefs[i+1] * output[i+1];
+      output[i+1] = f->coefs[i+1] * tmp + f->coefs[i  ] * output[i+1];
+    }
+    rdft(f->dft_length, -1, output, s->bit_rev_table, s->sin_cos_table);
+  }
+}
+
+static double bessel_I_0(double x)
+{
+  double term = 1, sum = 1, last_sum, x2 = x / 2;
+  int i = 1;
+  do {
+    double y = x2 / i++;
+    last_sum = sum, sum += term *= y * y;
+  } while (sum != last_sum);
+  return sum;
+}
+
+static double * make_lpf(int num_taps, double Fc, double beta, double scale)
+{
+  double * h = malloc(num_taps * sizeof(*h));
+  int i, m = num_taps - 1;
+  assert(Fc >= 0 && Fc <= 1);
+  scale /= bessel_I_0(beta);
+  for (i = 0; i <= m / 2; ++i) {
+    double x = M_PI * (i - .5 * m), y = 2. * i / m - 1;
+    h[i] = x? sin(Fc * x) / x : Fc;
+    h[i] *= bessel_I_0(beta * sqrt(1 - y * y)) * scale;
+    h[m - i] = h[i];
+  }
+  return h;
+}
+
+static double * design_lpf(
+    double Fp,      /* End of pass-band; ~= 0.01dB point */
+    double Fc,      /* Start of stop-band */
+    double Fn,      /* Nyquist freq; e.g. 0.5, 1, PI */
+    double att,     /* Stop-band attenuation in dB */
+    int * num_taps, /* (Single phase.)  0: value will be estimated */
+    int k)          /* Number of phases; 0 for single-phase */
+{
+  double tr_bw, beta;
+
+  Fp /= Fn, Fc /= Fn;        /* Normalise to Fn (nyquist) = 1 */
+  tr_bw = .5869 * (Fc - Fp); /* Transition band-width: 6dB to stop points */
+
+  if (*num_taps == 0) {
+    double n160 = (.0425* att - 1.4) /  tr_bw;    /* Half order for att = 160 */
+    int n = n160 * (16.556 / (att - 39.6) + .8625) + .5;  /* For att [80,160) */
+    *num_taps = k? 2 * n : 2 * (n + (n & 1)) + 1; /* =1 %4 (0 phase 1/2 band) */
+  }
+  assert(att >= 80);
+  beta = att < 100 ? .1102 * (att - 8.7) : .1117 * att - 1.11;
+  if (k)
+    *num_taps = *num_taps * k - 1;
+  else k = 1;
+  return make_lpf(*num_taps, (Fc - tr_bw) / k, beta, (double)k);
+}
+
+static int set_dft_length(int num_taps) /* Set to 4 x nearest power of 2 */
+{
+  int result, n = num_taps;
+  for (result = 8; n > 2; result <<= 1, n >>= 1);
+  result = range_limit(result, 4096, 131072);
+  assert(num_taps * 2 < result);
+  return result;
+}
+
+static void half_band_filter_init(rate_shared_t * p, unsigned which, int num_taps, sample_t const h[], double Fp, double atten, int multiplier)
+{
+  half_band_t * f = &p->half_band[which];
+  int dft_length, i, M = num_taps / 2;
+
+  if (f->num_taps)
+    return;
+  if (h) {
+    dft_length = set_dft_length(num_taps);
+    f->coefs = calloc(dft_length, sizeof(*f->coefs));
+    for (i = 0; i < num_taps; ++i)
+      f->coefs[(i + dft_length - num_taps + 1) & (dft_length - 1)]
+          = h[abs(M - i)] / dft_length * 2 * multiplier;
+  }
+  else {
+    double * h = design_lpf(Fp, 1., 2., atten, &num_taps, 0);
+    dft_length = set_dft_length(num_taps);
+    f->coefs = calloc(dft_length, sizeof(*f->coefs));
+    for (i = 0; i < num_taps; ++i)
+      f->coefs[(i + dft_length - num_taps + 1) & (dft_length - 1)]
+          = h[i] / dft_length * 2 * multiplier;
+    free(h);
+  }
+  assert(num_taps & 1);
+  f->num_taps = num_taps;
+  f->dft_length = dft_length;
+  sox_debug("fir_len=%i dft_length=%i Fp=%g atten=%g", num_taps, dft_length, Fp, atten);
+  if (!p->bit_rev_table) {
+    p->bit_rev_table = calloc((size_t)sqrt((double)dft_length) + 2, sizeof(*p->bit_rev_table));
+    p->sin_cos_table = calloc(dft_length / 2, sizeof(*p->sin_cos_table));
+  }
+  rdft(dft_length, 1, f->coefs, p->bit_rev_table, p->sin_cos_table);
+}
+
+#include "rate_filters.h"
+
+typedef struct {
+  double     factor;
+  size_t     samples_in, samples_out;
+  int        level, input_level, output_level;
+  sox_bool   upsample;
+  stage_t    * stages;
+} rate_t;
+
+#define pre_stage p->stages[-1]
+#define last_stage p->stages[p->level]
+#define post_stage p->stages[p->level + 1]
+
+typedef enum {Default = -1, Quick, Low, Medium, High, Very, Ultra} quality_t;
+
+static void rate_init(rate_t * p, rate_shared_t * shared, double factor, quality_t quality, int interp_order)
+{
+  int i, mult, divisor = 1;
+
+  assert(factor > 0);
+  p->factor = factor;
+  if (quality < Quick || quality > Ultra)
+    quality = High;
+  if (quality != Quick) {
+    const int max_divisor = 2048;      /* Keep coef table size ~< 500kb */
+    const double epsilon = 4 / MULT32; /* Scaled to half this at max_divisor */
+    p->upsample = p->factor < 1;
+    for (i = factor, p->level = 0; i >>= 1; ++p->level); /* log base 2 */
+    factor /= 1 << (p->level + !p->upsample);
+    for (i = 2; i <= max_divisor && divisor == 1; ++i) {
+      double try_d = factor * i;
+      int try = try_d + .5;
+      if (fabs(try - try_d) < try * epsilon * (1 - (.5 / max_divisor) * i)) {
+        if (try == i)  /* Rounded to 1:1? */
+          factor = 1, divisor = 2, p->upsample = sox_false;
+        else factor = try, divisor = i;
+      }
+    }
+  }
+  p->stages = (stage_t *)calloc((size_t)p->level + 4, sizeof(*p->stages)) + 1;
+  for (i = -1; i <= p->level + 1; ++i) p->stages[i].shared = shared;
+  last_stage.step.all = factor * MULT32 + .5;
+  last_stage.out_in_ratio = MULT32 * divisor / last_stage.step.all;
+
+  if (divisor != 1)
+    assert(!last_stage.step.parts.fraction);
+  else if (quality != Quick)
+    assert(!last_stage.step.parts.integer);
+  sox_debug("i/o=%g; %.9g:%i @ level %i", p->factor, factor, divisor, p->level);
+
+  mult = 1 + p->upsample; /* Compensate for zero-stuffing in double_sample */
+  p->input_level = -p->upsample;
+  p->output_level = p->level;
+  if (quality == Quick) {
+    ++p->output_level;
+    last_stage.fn = cubic_spline;
+    last_stage.pre_post = max(3, last_stage.step.parts.integer);
+    last_stage.preload = last_stage.pre = 1;
+  }
+  else if (last_stage.out_in_ratio != 2 || (p->upsample && quality == Low)) {
+    poly_fir_t const * f;
+    poly_fir1_t const * f1;
+    int n = 4 * p->upsample + range_limit(quality, Medium, Very) - Medium;
+    if (interp_order < 0)
+      interp_order = quality > High;
+    interp_order = divisor == 1? 1 + interp_order : 0;
+    last_stage.divisor = divisor;
+    p->output_level += 2;
+    if (p->upsample && quality == Low)
+      mult = 1, ++p->input_level, --p->output_level, --n;
+    f = &poly_firs[n];
+    f1 = &f->interp[interp_order];
+    if (!last_stage.shared->poly_fir_coefs) {
+      int num_taps = 0, phases = divisor == 1? (1 << f1->phase_bits) : divisor;
+      raw_coef_t * coefs =
+          design_lpf(f->pass, f->stop, 1., f->att, &num_taps, phases);
+      assert(num_taps == f->num_coefs * phases - 1);
+      last_stage.shared->poly_fir_coefs =
+          prepare_coefs(coefs, f->num_coefs, phases, interp_order, mult);
+      sox_debug("fir_len=%i phases=%i coef_interp=%i multiplier=%i size=%s",
+          f->num_coefs, phases, interp_order, mult,
+          sigfigs3((num_taps + 1) * (interp_order + 1) * sizeof(sample_t)));
+      free(coefs);
+    }
+    last_stage.fn = f1->fn;
+    last_stage.pre_post = f->num_coefs - 1;
+    last_stage.pre = 0;
+    last_stage.preload = last_stage.pre_post >> 1;
+    mult = 1;
+  }
+  if (quality == Low) { /* dft is slower here, so use normal convolution */
+    post_stage.fn = half_sample_low;
+    post_stage.pre_post = 2 * (array_length(half_fir_coefs_low) - 1);
+    post_stage.preload = post_stage.pre = post_stage.pre_post >> 1;
+  }
+  else if (quality != Quick) {
+    typedef struct {int len; sample_t const * h; double bw, a;} filter_t;
+    static filter_t const filters[] = {
+      {2 * array_length(half_fir_coefs_low) - 1, half_fir_coefs_low, 0,0},
+      {0, NULL, .986, 110}, {0, NULL, .986, 125},
+      {0, NULL, .996, 156}, {0, NULL, .999, 156},
+    };
+    filter_t const * f = &filters[quality - Low];
+    assert((size_t)(quality - Low) < array_length(filters));
+    half_band_filter_init(shared, p->upsample, f->len, f->h, f->bw, f->a, mult);
+    if (p->upsample) {
+      pre_stage.fn = double_sample; /* Finish off setting up pre-stage */
+      pre_stage .preload = shared->half_band[1].num_taps >> 2;
+       /* Start setting up post-stage; TODO don't use dft for short filters */
+      if ((1 - p->factor) / (1 - f->bw) > 2)
+        half_band_filter_init(shared, 0, 0, NULL, max(p->factor, .64), f->a, 1);
+      else shared->half_band[0] = shared->half_band[1];
+    }
+    else if (p->level > 0 && p->output_level > p->level) {
+      double pass = f->bw * divisor / factor / 2;
+      if ((1 - pass) / (1 - f->bw) > 2)
+        half_band_filter_init(shared, 1, 0, NULL, max(pass, .64), f->a, 1);
+    }
+    post_stage.fn = half_sample;
+    post_stage.preload = shared->half_band[0].num_taps >> 1;
+  }
+  if (p->level > 0) {
+    stage_t * s = & p->stages[p->level - 1];
+    if (shared->half_band[1].num_taps) {
+      s->fn = half_sample;
+      s->preload = shared->half_band[1].num_taps >> 1;
+      s->which = 1;
+    }
+    else *s = post_stage;
+  }
+  for (i = p->input_level; i <= p->output_level; ++i) {
+    stage_t * s = &p->stages[i];
+    if (i >= 0 && i < p->level - 1) {
+      s->fn = half_sample_25;
+      s->pre_post = 2 * (array_length(half_fir_coefs_25) - 1);
+      s->preload = s->pre = s->pre_post >> 1;
+    }
+    fifo_create(&s->fifo, sizeof(sample_t));
+    memset(fifo_reserve(&s->fifo, s->preload), 0, sizeof(sample_t)*s->preload);
+    if (i < p->output_level)
+      sox_debug("stage=%-3ipre_post=%-3ipre=%-3ipreload=%i", i, s->pre_post, s->pre, s->preload);
+  }
+}
+
+static void rate_process(rate_t * p)
+{
+  stage_t * stage = p->stages + p->input_level;
+  int i;
+
+  for (i = p->input_level; i < p->output_level; ++i, ++stage)
+    stage->fn(stage, &(stage+1)->fifo);
+}
+
+static sample_t * rate_input(rate_t * p, sample_t const * samples, size_t n)
+{
+  p->samples_in += n;
+  return fifo_write(&p->stages[p->input_level].fifo, (int)n, samples);
+}
+
+static sample_t const * rate_output(rate_t * p, sample_t * samples, size_t * n)
+{
+  fifo_t * fifo = &p->stages[p->output_level].fifo;
+  p->samples_out += *n = min(*n, (size_t)fifo_occupancy(fifo));
+  return fifo_read(fifo, (int)*n, samples);
+}
+
+static void rate_flush(rate_t * p)
+{
+  fifo_t * fifo = &p->stages[p->output_level].fifo;
+  size_t samples_out = p->samples_in / p->factor + .5;
+  size_t remaining = samples_out - p->samples_out;
+  sample_t * buff = calloc(1024, sizeof(*buff));
+
+  if ((int)remaining > 0) {
+    while ((size_t)fifo_occupancy(fifo) < remaining) {
+      rate_input(p, buff, 1024);
+      rate_process(p);
+    }
+    fifo_trim_to(fifo, (int)remaining);
+    p->samples_in = 0;
+  }
+  free(buff);
+}
+
+static void rate_close(rate_t * p)
+{
+  rate_shared_t * shared = p->stages[0].shared;
+  int i;
+
+  for (i = p->input_level; i <= p->output_level; ++i)
+    fifo_delete(&p->stages[i].fifo);
+  free(shared->bit_rev_table);
+  free(shared->sin_cos_table);
+  free(shared->half_band[0].coefs);
+  if (shared->half_band[1].coefs != shared->half_band[0].coefs)
+    free(shared->half_band[1].coefs);
+  free(shared->poly_fir_coefs);
+  memset(shared, 0, sizeof(*shared));
+  free(p->stages - 1);
+}
+
+/*------------------------------- SoX Wrapper --------------------------------*/
+
+typedef struct {
+  sox_rate_t      out_rate;
+  int             quality, coef_interp;
+  rate_t          rate;
+  rate_shared_t   shared, * shared_ptr;
+} priv_t;
+
+static int create(sox_effect_t * effp, int argc, char **argv)
+{
+  priv_t * p = (priv_t *) effp->priv;
+  char dummy, * found_at, * bws = "-qlmhvu", * nums = "01";
+
+  p->quality = p->coef_interp = -1;
+  p->shared_ptr = &p->shared;
+  while (argc && strchr(bws, *argv[0])) {
+    char c, * q = *argv[0] == '-'? *argv + 1 : *argv;
+    while ((c = *q++)) {
+      if      ((found_at = strchr(bws+1, c))) p->quality = found_at - bws -1;
+      else if ((found_at = strchr(nums, c))) p->coef_interp = found_at - nums;
+      else return lsx_usage(effp);
+    }
+    argc--; argv++;
+  }
+  if (argc) {
+    if (sscanf(*argv, "%lf %c", &p->out_rate, &dummy) != 1 || p->out_rate <= 0)
+      return lsx_usage(effp);
+    argc--; argv++;
+  }
+  return argc? lsx_usage(effp) : SOX_SUCCESS;
+}
+
+static int start(sox_effect_t * effp)
+{
+  priv_t * p = (priv_t *) effp->priv;
+  double out_rate = p->out_rate != 0 ? p->out_rate : effp->out_signal.rate;
+
+  if (effp->in_signal.rate == out_rate)
+    return SOX_EFF_NULL;
+
+  effp->out_signal.channels = effp->in_signal.channels;
+  effp->out_signal.rate = out_rate;
+  rate_init(&p->rate, p->shared_ptr, effp->in_signal.rate / out_rate, p->quality, p->coef_interp);
+  return SOX_SUCCESS;
+}
+
+static int flow(sox_effect_t * effp, const sox_sample_t * ibuf,
+                sox_sample_t * obuf, sox_size_t * isamp, sox_size_t * osamp)
+{
+  priv_t * p = (priv_t *)effp->priv;
+  sox_size_t i;
+  size_t odone = *osamp; /* See comment in tempo.c */
+
+  sample_t const * s = rate_output(&p->rate, NULL, &odone);
+  for (i = 0; i < odone; ++i) *obuf++ = TO_SOX(*s++, effp->clips);
+
+  if (*isamp && odone < *osamp) {
+    sample_t * t = rate_input(&p->rate, NULL, *isamp);
+    for (i = *isamp; i; --i) *t++ = FROM_SOX(*ibuf++, effp->clips);
+    rate_process(&p->rate);
+  }
+  else *isamp = 0;
+  *osamp = odone;
+  return SOX_SUCCESS;
+}
+
+static int drain(sox_effect_t * effp, sox_sample_t * obuf, sox_size_t * osamp)
+{
+  priv_t * p = (priv_t *)effp->priv;
+  static sox_size_t isamp = 0;
+  rate_flush(&p->rate);
+  return flow(effp, 0, obuf, &isamp, osamp);
+}
+
+static int stop(sox_effect_t * effp)
+{
+  priv_t * p = (priv_t *) effp->priv;
+  rate_close(&p->rate);
+  return SOX_SUCCESS;
+}
+
+sox_effect_handler_t const * sox_rate_effect_fn(void)
+{
+  static sox_effect_handler_t handler = {
+    "rate", "[-q|-l|-m|-s|-h]\n"
+    "     Quality        BW %    Rej dB    Typical Use\n"
+    " -q  quick & dirty  n/a   ~30 @ Fs/4  playback on ancient hardware\n"
+    " -l  low            80       100      playback on old hardware\n" 
+    " -m  medium         99       100      audio playback\n"
+    " -h  high           99       120      16-bit mastering (use with dither)\n"
+    " -v  very high      99.7     150      24-bit mastering"
+    , SOX_EFF_RATE, create, start, flow, drain, stop, NULL, sizeof(priv_t)
+  };
+  return &handler;
+}
--- /dev/null
+++ b/src/rate_filters.h
@@ -1,0 +1,207 @@
+/* Effect: change sample rate     Copyright (c) 2008 robs@users.sourceforge.net
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/* Generated by m4 */
+
+static const sample_t half_fir_coefs_25[] = {
+  4.9622034521467345e-001, 3.1227031280072459e-001, 3.5198986166898451e-003,
+  -8.9221427418232149e-002, -2.8372758530229671e-003, 3.9139593785072563e-002,
+  1.9671926007707326e-003, -1.7250462144826118e-002, -1.1596954982514773e-003,
+  6.8588796907120527e-003, 5.7031347966698574e-004, -2.3044720814877507e-003,
+  -2.2679857326247756e-004, 6.0962502224911946e-004, 6.9133289864740816e-005,
+  -1.1323492804087647e-004, -1.4554678571602767e-005, 1.1197369453748703e-005,
+  1.6171529855434563e-006,
+};
+static const sample_t half_fir_coefs_low[] = {
+  4.2759802493108773e-001, 3.0939308096100709e-001, 6.9285325719540158e-002,
+  -8.0642059355533674e-002, -6.0528749718348158e-002, 2.5228940037788555e-002,
+  4.7756850372993369e-002, 8.7463256642532057e-004, -3.3208422093026498e-002,
+  -1.3425983316344854e-002, 1.9188320662637096e-002, 1.7478840713827052e-002,
+  -7.5527851809344612e-003, -1.6145235261724403e-002, -6.3013968965413430e-004,
+  1.1965551091184719e-002, 5.1714613100614501e-003, -6.9898749683755968e-003,
+  -6.6150222806158742e-003, 2.6394681964090937e-003, 5.9365183404658526e-003,
+  3.5567920638016650e-004, -4.2031898513566123e-003, -1.8738555289555877e-003,
+  2.2991238738122328e-003, 2.2058519188488186e-003, -7.7796582498205363e-004,
+  -1.8212814627239918e-003, -1.4964619042558244e-004, 1.1706370821176716e-003,
+  5.3082071395224866e-004, -5.6771020453353900e-004, -5.4472363026668942e-004,
+  1.5914542178505357e-004, 3.8911127354338085e-004, 4.2076035174603683e-005,
+  -2.1015548483049000e-004, -9.5381290156278399e-005, 8.0903081108059553e-005,
+  7.5812875822003258e-005, -1.5004304266040688e-005, -3.9149443482028750e-005,
+  -6.0893901283459912e-006, 1.4040363940567877e-005, 4.9834316581482789e-006,
+};
+#define FUNCTION half_sample_25 
+#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+#define COEFS half_fir_coefs_25 
+assert_static(!((array_length(COEFS)- 1) & 1), HALF_FIR_LENGTH_25 );
+#include "rate_half_fir.h"
+#define FUNCTION half_sample_low
+#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+#define COEFS half_fir_coefs_low
+assert_static(!((array_length(COEFS)- 1) & 1), HALF_FIR_LENGTH_low);
+#include "rate_half_fir.h"
+#define d100_l 16
+#define poly_fir_convolve_d100 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+#define FUNCTION d100_0
+#define FIR_LENGTH d100_l
+#define CONVOLVE poly_fir_convolve_d100
+#include "rate_poly_fir0.h"
+#define FUNCTION d100_1
+#define COEF_INTERP 1
+#define PHASE_BITS 8
+#define FIR_LENGTH d100_l
+#define CONVOLVE poly_fir_convolve_d100
+#include "rate_poly_fir.h"
+#define d100_1_b 8
+#define FUNCTION d100_2
+#define COEF_INTERP 2
+#define PHASE_BITS 6
+#define FIR_LENGTH d100_l
+#define CONVOLVE poly_fir_convolve_d100
+#include "rate_poly_fir.h"
+#define d100_2_b 6
+#define d120_l 30
+#define poly_fir_convolve_d120 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+#define FUNCTION d120_0
+#define FIR_LENGTH d120_l
+#define CONVOLVE poly_fir_convolve_d120
+#include "rate_poly_fir0.h"
+#define FUNCTION d120_1
+#define COEF_INTERP 1
+#define PHASE_BITS 10
+#define FIR_LENGTH d120_l
+#define CONVOLVE poly_fir_convolve_d120
+#include "rate_poly_fir.h"
+#define d120_1_b 10
+#define FUNCTION d120_2
+#define COEF_INTERP 2
+#define PHASE_BITS 7
+#define FIR_LENGTH d120_l
+#define CONVOLVE poly_fir_convolve_d120
+#include "rate_poly_fir.h"
+#define d120_2_b 7
+#define d150_l 38
+#define poly_fir_convolve_d150 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+#define FUNCTION d150_0
+#define FIR_LENGTH d150_l
+#define CONVOLVE poly_fir_convolve_d150
+#include "rate_poly_fir0.h"
+#define FUNCTION d150_1
+#define COEF_INTERP 1
+#define PHASE_BITS 11
+#define FIR_LENGTH d150_l
+#define CONVOLVE poly_fir_convolve_d150
+#include "rate_poly_fir.h"
+#define d150_1_b 11
+#define FUNCTION d150_2
+#define COEF_INTERP 2
+#define PHASE_BITS 8
+#define FIR_LENGTH d150_l
+#define CONVOLVE poly_fir_convolve_d150
+#include "rate_poly_fir.h"
+#define d150_2_b 8
+#define U100_l 42
+#define poly_fir_convolve_U100 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+#define FUNCTION U100_0
+#define FIR_LENGTH U100_l
+#define CONVOLVE poly_fir_convolve_U100
+#include "rate_poly_fir0.h"
+#define FUNCTION U100_1
+#define COEF_INTERP 1
+#define PHASE_BITS 9
+#define FIR_LENGTH U100_l
+#define CONVOLVE poly_fir_convolve_U100
+#include "rate_poly_fir.h"
+#define U100_1_b 9
+#define FUNCTION U100_2
+#define COEF_INTERP 2
+#define PHASE_BITS 6
+#define FIR_LENGTH U100_l
+#define CONVOLVE poly_fir_convolve_U100
+#include "rate_poly_fir.h"
+#define U100_2_b 6
+#define u100_l 10
+#define poly_fir_convolve_u100 _ _ _ _ _ _ _ _ _ _
+#define FUNCTION u100_0
+#define FIR_LENGTH u100_l
+#define CONVOLVE poly_fir_convolve_u100
+#include "rate_poly_fir0.h"
+#define FUNCTION u100_1
+#define COEF_INTERP 1
+#define PHASE_BITS 7
+#define FIR_LENGTH u100_l
+#define CONVOLVE poly_fir_convolve_u100
+#include "rate_poly_fir.h"
+#define u100_1_b 7
+#define FUNCTION u100_2
+#define COEF_INTERP 2
+#define PHASE_BITS 5
+#define FIR_LENGTH u100_l
+#define CONVOLVE poly_fir_convolve_u100
+#include "rate_poly_fir.h"
+#define u100_2_b 5
+#define u120_l 14
+#define poly_fir_convolve_u120 _ _ _ _ _ _ _ _ _ _ _ _ _ _
+#define FUNCTION u120_0
+#define FIR_LENGTH u120_l
+#define CONVOLVE poly_fir_convolve_u120
+#include "rate_poly_fir0.h"
+#define FUNCTION u120_1
+#define COEF_INTERP 1
+#define PHASE_BITS 8
+#define FIR_LENGTH u120_l
+#define CONVOLVE poly_fir_convolve_u120
+#include "rate_poly_fir.h"
+#define u120_1_b 8
+#define FUNCTION u120_2
+#define COEF_INTERP 2
+#define PHASE_BITS 6
+#define FIR_LENGTH u120_l
+#define CONVOLVE poly_fir_convolve_u120
+#include "rate_poly_fir.h"
+#define u120_2_b 6
+#define u150_l 20
+#define poly_fir_convolve_u150 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+#define FUNCTION u150_0
+#define FIR_LENGTH u150_l
+#define CONVOLVE poly_fir_convolve_u150
+#include "rate_poly_fir0.h"
+#define FUNCTION u150_1
+#define COEF_INTERP 1
+#define PHASE_BITS 10
+#define FIR_LENGTH u150_l
+#define CONVOLVE poly_fir_convolve_u150
+#include "rate_poly_fir.h"
+#define u150_1_b 10
+#define FUNCTION u150_2
+#define COEF_INTERP 2
+#define PHASE_BITS 7
+#define FIR_LENGTH u150_l
+#define CONVOLVE poly_fir_convolve_u150
+#include "rate_poly_fir.h"
+#define u150_2_b 7
+typedef struct {int phase_bits; stage_fn_t fn;} poly_fir1_t;
+typedef struct {int num_coefs; double pass, stop, att; poly_fir1_t interp[3];} poly_fir_t;
+static poly_fir_t const poly_firs[] = {
+  {d100_l, .75,1.5, 108, {{0, d100_0}, {d100_1_b, d100_1}, {d100_2_b, d100_2}}},
+  {d120_l,  1, 1.5, 133, {{0, d120_0}, {d120_1_b, d120_1}, {d120_2_b, d120_2}}},
+  {d150_l,  1, 1.5, 165, {{0, d150_0}, {d150_1_b, d150_1}, {d150_2_b, d150_2}}},
+  {U100_l, .724, 1, 105, {{0, U100_0}, {U100_1_b, U100_1}, {U100_2_b, U100_2}}},
+  {u100_l, .3, 1.5, 107, {{0, u100_0}, {u100_1_b, u100_1}, {u100_2_b, u100_2}}},
+  {u120_l, .5, 1.5, 125, {{0, u120_0}, {u120_1_b, u120_1}, {u120_2_b, u120_2}}},
+  {u150_l, .5, 1.5, 174, {{0, u150_0}, {u150_1_b, u150_1}, {u150_2_b, u150_2}}},
+};
+
--- /dev/null
+++ b/src/rate_half_fir.h
@@ -1,0 +1,40 @@
+/* Effect: change sample rate     Copyright (c) 2008 robs@users.sourceforge.net
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/* Down-sample by a factor of 2 using a FIR with odd length (LEN).*/
+/* Input must be preceded and followed by LEN >> 1 samples. */
+
+#define _ sum += (input[-j] + input[j]) * COEFS[j], ++j;
+static void FUNCTION(stage_t * p, fifo_t * output_fifo)
+{
+  sample_t const * input = stage_read_p(p);
+  int i, num_out = (stage_occupancy(p) + 1) / 2;
+  sample_t * output = fifo_reserve(output_fifo, num_out);
+
+  for (i = 0; i < num_out; ++i, input += 2) {
+    int j = 1;
+    sample_t sum = input[0] * COEFS[0];
+    CONVOLVE
+    assert(j == array_length(COEFS));
+    output[i] = sum;
+  }
+  fifo_read(&p->fifo, 2 * num_out, NULL);
+}
+#undef _
+#undef COEFS
+#undef CONVOLVE
+#undef FUNCTION
--- /dev/null
+++ b/src/rate_poly_fir.h
@@ -1,0 +1,72 @@
+/* Effect: change sample rate     Copyright (c) 2008 robs@users.sourceforge.net
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/* Up-sample by step in (0,1) using a poly-phase FIR with length LEN.*/
+/* Input must be preceded by LEN >> 1 samples. */
+/* Input must be followed by (LEN-1) >> 1 samples. */
+
+#define a (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 0,j))
+#define b (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 1,j))
+#define c (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 2,j))
+#define d (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 3,j))
+#if COEF_INTERP == 0
+  #define _ sum += a *at[j], ++j;
+#elif COEF_INTERP == 1
+  #define _ sum += (b *x + a)*at[j], ++j;
+#elif COEF_INTERP == 2
+  #define _ sum += ((c *x + b)*x + a)*at[j], ++j;
+#elif COEF_INTERP == 3
+  #define _ sum += (((d*x + c)*x + b)*x + a)*at[j], ++j;
+#else
+  #error COEF_INTERP
+#endif
+
+static void FUNCTION(stage_t * p, fifo_t * output_fifo)
+{
+  sample_t const * input = stage_read_p(p);
+  int i, num_in = stage_occupancy(p), max_num_out = 1 + num_in*p->out_in_ratio;
+  sample_t * output = fifo_reserve(output_fifo, max_num_out);
+
+  for (i = 0; p->at.parts.integer < num_in; ++i, p->at.all += p->step.all) {
+    sample_t const * at = input + p->at.parts.integer;
+    uint32_t fraction = p->at.parts.fraction;
+    int phase = fraction >> (32 - PHASE_BITS); /* high-order bits */
+#if COEF_INTERP > 0              /* low-order bits, scaled to [0,1) */
+    sample_t x = (sample_t) (fraction << PHASE_BITS) * (1 / MULT32);
+#endif
+    sample_t sum = 0;
+    int j = 0;
+    CONVOLVE
+    assert(j == FIR_LENGTH);
+    output[i] = sum;
+  }
+  assert(max_num_out - i >= 0);
+  fifo_trim_by(output_fifo, max_num_out - i);
+  fifo_read(&p->fifo, p->at.parts.integer, NULL);
+  p->at.parts.integer = 0;
+}
+
+#undef _
+#undef a
+#undef b
+#undef c
+#undef d
+#undef COEF_INTERP
+#undef CONVOLVE
+#undef FIR_LENGTH
+#undef FUNCTION
+#undef PHASE_BITS
--- /dev/null
+++ b/src/rate_poly_fir0.h
@@ -1,0 +1,50 @@
+/* Effect: change sample rate     Copyright (c) 2008 robs@users.sourceforge.net
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/* Up-sample by rational step in (0,1) using a poly-phase FIR, length LEN.*/
+/* Input must be preceded by LEN >> 1 samples. */
+/* Input must be followed by (LEN-1) >> 1 samples. */
+
+#define _ sum += (coef(p->shared->poly_fir_coefs, 0, FIR_LENGTH, divided.rem, 0, j)) *at[j], ++j;
+
+static void FUNCTION(stage_t * p, fifo_t * output_fifo)
+{
+  sample_t const * input = stage_read_p(p);
+  int i, num_in = stage_occupancy(p), max_num_out = 1 + num_in*p->out_in_ratio;
+  sample_t * output = fifo_reserve(output_fifo, max_num_out);
+  div_t divided;
+
+  for (i = 0; p->at.parts.integer < num_in * p->divisor; ++i, p->at.parts.integer += p->step.parts.integer) {
+    div_t divided = div(p->at.parts.integer, p->divisor);
+    sample_t const * at = input + divided.quot;
+    sample_t sum = 0;
+    int j = 0;
+    CONVOLVE
+    assert(j == FIR_LENGTH);
+    output[i] = sum;
+  }
+  assert(max_num_out - i >= 0);
+  fifo_trim_by(output_fifo, max_num_out - i);
+  divided = div(p->at.parts.integer, p->divisor);
+  fifo_read(&p->fifo, divided.quot, NULL);
+  p->at.parts.integer -= divided.quot * p->divisor;
+}
+
+#undef _
+#undef CONVOLVE
+#undef FIR_LENGTH
+#undef FUNCTION
--- a/src/sox.c
+++ b/src/sox.c
@@ -115,12 +115,12 @@
 /* We parse effects into a temporary effects table and then place into
  * the real effects table.  This makes it easier to reorder some effects
  * as needed.  For instance, we can run a resampling effect before
- * converting a mono file to stereo.  This allows the resample to work
+ * converting a mono file to stereo.  This allows the resampling to work
  * on half the data.
  *
  * User effects table must be 4 entries smaller then the real
  * effects table.  This is because at most we will need to add
- * a resample effect, a channel mixing effect, the input, and the output.
+ * a resampling effect, a channel mixing effect, the input, and the output.
  */
 #define MAX_USER_EFF (SOX_MAX_EFFECTS - 4)
 static sox_effect_t * user_efftab[MAX_USER_EFF];
@@ -525,17 +525,18 @@
   return &handler;
 }
 
-static void add_auto_effect(sox_effects_chain_t * chain, char const * name, sox_signalinfo_t * signal)
+static void add_auto_effect(sox_effects_chain_t * chain, char const * name, char * arg, sox_signalinfo_t * signal)
 {
   sox_effect_t * effp;
+  char * * argv = & arg;
 
-  /* Auto effect should always succeed here */
-  effp = sox_create_effect(sox_find_effect(name));
-  effp->handler.getopts(effp, 0, NULL);          /* Set up with default opts */
+  effp = sox_create_effect(sox_find_effect(name)); /* Should always succeed. */
 
-  /* But could fail here */
+  if (effp->handler.getopts(effp, arg != NULL, argv) == SOX_EOF)
+    exit(1); /* The failing effect should have displayed an error message */
+  
   if (sox_add_effect(chain, effp, signal, &ofile->ft->signal) != SOX_SUCCESS)
-    exit(2);
+    exit(2); /* The effects chain should have displayed an error message */
 }
 
 /* If needed effects are not given, auto-add at (performance) optimal point. */
@@ -544,6 +545,8 @@
   sox_signalinfo_t signal = combiner_signal;
   unsigned i, min_chan = 0, min_rate = 0;
   sox_effect_t * effp;
+  char * rate_arg = sox_mode != sox_play? NULL :
+    (rate_arg = getenv("PLAY_RATE_ARG"))? rate_arg : "-l";
 
   /* Find points after which we might add effects to change rate/chans */
   for (i = 0; i < nuser_effects; i++) {
@@ -560,12 +563,12 @@
   for (i = 0; i <= nuser_effects; i++) {
     /* If reducing channels, it's faster to do so before all other effects: */
     if (signal.channels > ofile->ft->signal.channels && i >= min_chan)
-      add_auto_effect(chain, "mixer", &signal);
+      add_auto_effect(chain, "mixer", NULL, &signal);
 
     /* If reducing rate, it's faster to do so before all other effects
      * (except reducing channels): */
     if (signal.rate > ofile->ft->signal.rate && i >= min_rate)
-      add_auto_effect(chain, "resample", &signal);
+      add_auto_effect(chain, "rate", rate_arg, &signal);
 
     if (i < nuser_effects)
       if (sox_add_effect(chain, user_efftab[i], &signal, &ofile->ft->signal) != SOX_SUCCESS)
@@ -573,9 +576,9 @@
   }
   /* Add auto effects if still needed at this point */
   if (signal.rate != ofile->ft->signal.rate)
-    add_auto_effect(chain, "resample", &signal);  /* Must be up-sampling */
+    add_auto_effect(chain, "rate", rate_arg, &signal); /* Must be up-sampling */
   if (signal.channels != ofile->ft->signal.channels)
-    add_auto_effect(chain, "mixer", &signal);     /* Must be increasing channels */
+    add_auto_effect(chain, "mixer", NULL, &signal); /* Must be increasing channels */
 
   /* Last `effect' in the chain is the output file */
   effp = sox_create_effect(output_effect_fn());