shithub: sox

--- a/ChangeLog

+++ b/ChangeLog

@@ -62,6 +62,9 @@

   o New --output option to write to multiple files in one run.

     Only useful with certain effects like trim and silence. (cbagwell)

   o Display SoX build environment information with -V -V.  (robs)

+  o Display (with -V) the detected file-type if it differs from the

+    file extension.  (robs)

+  o New -t option for soxi; to display the detected file type.  (robs)

 Other bug fixes:

--- a/Makefile.am

+++ b/Makefile.am

@@ -9,8 +9,8 @@

 # man pages are not considered to be sources, so need to add "dist_"

 # prefix to ensure they are added to the distribution.

-dist_man_MANS = sox.1 soxi.1 soxeffect.7 soxformat.7 libsox.3

-EXTRA_DIST = sox.txt soxi.txt soxeffect.txt soxformat.txt libsox.txt \

+dist_man_MANS = sox.1 soxi.1 soxformat.7 libsox.3

+EXTRA_DIST = sox.txt soxi.txt soxformat.txt libsox.txt \

 	     CMakeLists.txt sox.pc.in LICENSE.GPL LICENSE.LGPL

 if HAVE_PKGCONFIG

@@ -25,7 +25,7 @@

 .1.txt .3.txt .7.txt:

 	tbl $(srcdir)/$< | nroff -man | col -b > $@

-DOCTXT = sox.txt soxi.txt soxeffect.txt soxformat.txt libsox.txt

+DOCTXT = sox.txt soxi.txt soxformat.txt libsox.txt

 txt: $(DOCTXT)

 # Rule for making PDF man pages

@@ -32,7 +32,7 @@

 .1.pdf .3.pdf .7.pdf:

 	tbl $(srcdir)/$< | groff -man -Tps | ps2pdf - $@

-DOCPDF = sox.pdf soxi.pdf soxeffect.pdf soxformat.pdf libsox.pdf

+DOCPDF = sox.pdf soxi.pdf soxformat.pdf libsox.pdf

 pdf: $(DOCPDF)

 # Rule for making HTML man pages

@@ -48,7 +48,7 @@

 .1.html .3.html .7.html:

 	man2html -r $(srcdir)/$< | $(FIXHTML) > $@

-DOCHTML = sox.html soxi.html soxeffect.html soxformat.html libsox.html

+DOCHTML = sox.html soxi.html soxformat.html libsox.html

 html: $(DOCHTML)

 DISTCLEANFILES = $(DOCHTML) $(DOCPDF) $(DOCTXT)

--- a/README

+++ b/README

@@ -24,7 +24,6 @@

   o sox(1)

   o soxi(1)

   o soxformat(7)

-  o soxeffect(7)

   o libsox(3)

 or in plain text or PDF files for those systems without man.

--- a/libsox.3

+++ b/libsox.3

@@ -333,8 +333,7 @@

 This manual page is both incomplete and out of date.

 .SH SEE ALSO

 .BR sox (1),

-.BR soxformat (7),

-.BR soxeffect (7)

+.BR soxformat (7)

.SP

 example*.c in the SoX source distribution.

 .SH LICENSE

--- a/sox.1

+++ b/sox.1

@@ -29,7 +29,7 @@

.SP

.fi

..

-.TH SoX 1 "July 27, 2008" "sox" "Sound eXchange"

+.TH SoX 1 "September 22, 2008" "sox" "Sound eXchange"

 .SH NAME

 SoX \- Sound eXchange, the Swiss Army knife of audio manipulation

 .SH SYNOPSIS

@@ -46,6 +46,7 @@

     [\fIeffect\fR [\fIeffect-options\fR]] ...

.fi

 .SH DESCRIPTION

+.SS Introduction

 SoX reads and writes audio files in most popular formats and can

 optionally apply effects to them; it can combine multiple input

 sources, synthesise audio, and, on many systems, act as a general

@@ -69,7 +70,7 @@

.TS

 center;

l.

-Input(s) \*(RA Balancing \*(RA Combiner \*(RA Effects \*(RA Output

+Input(s) \*(RA Combiner \*(RA Effects \*(RA Output(s)

.TE

.DT

.SP

@@ -120,11 +121,8 @@

.SP

 N.B.  Detailed explanations of how to use \fIall\fR SoX parameters, file

 formats, and effects can be found below in this manual, and in

-.BR soxformat (7)

-and

-.BR soxeffect (7)

-respectively.

-.SS File Formats

+.BR soxformat (7).

+.SS File Format Types

 There are two types of audio file format that SoX can work with.  The

 first is `self-describing'; these formats include a header that

 completely describes the characteristics of the audio data that follows.

@@ -502,8 +500,8 @@

 specification.  The only work-around to this is to avoid such

 filenames; however, this is generally not difficult since most audio

 filenames have a filename `extension', whilst effect-names do not.

-.SP

-The following `special' filenames may be used in certain circumstances

+.SS Special Filenames

+The following special filenames may be used in certain circumstances

 in place of a normal filename on the command line:

.TP

 \fB\-\fR

@@ -547,6 +545,11 @@

 is by default 48\ kHz, but, as with a normal

 file, this can be overridden if desired using command-line format

 options (see below).

+.SS Supported File & Audio Device Types

+See

+.BR soxformat (7)

+for a list and description of the supported file formats and audio device

+drivers.

 .SH OPTIONS

 .SS Global Options

 These options can be specified on the command line at any point

@@ -919,6 +922,2113 @@

 use this option in

 .BR soxformat (7)

 for more information.

+.SH EFFECTS

+In addition to converting and playing audio files, SoX can be used to

+invoke a number of audio `effects'.  Multiple effects may be applied

+by specifying them one after another at the end of the SoX command line.

+Note that applying multiple effects in real-time (i.e. when playing audio)

+is likely to need a high performance computer; stopping other applications

+may alleviate performance issues should they occur.

+.SP

+Some of the SoX effects are primarily intended to be applied to a single

+instrument or `voice'.  To facilitate this, the \fBremix\fR effect and

+the global SoX option \fB\-M\fR can be used to isolate then recombine

+tracks from a multi-track recording.

+.SS Common Notation And Parameters

+In the descriptions that follow,

+brackets [ ] are used to denote parameters that are optional, braces

+{ } to denote those that are both optional and repeatable,

+and angle brackets < > to denote those that are repeatable but not

+optional.

+Where applicable, default values for optional parameters are shown in parenthesis ( ).

+.SP

+The following parameters are used with, and have the same meaning for,

+several effects:

+.TP

+\fIcentre\fR[\fBk\fR]

+See

+.IR frequency .

+.TP

+\fIfrequency\fR[\fBk\fR]

+A frequency in Hz, or, if appended with `k', kHz.

+.TP

+\fIgain\fR

+A power gain in dB.

+Zero gives no gain; less than zero gives an attenuation.

+.TP

+\fIwidth\fR[\fBh\fR\^|\^\fBk\fR\^|\^\fBo\fR\^|\^\fBq\fR]

+Used to specify the band-width of a filter.  A number of different

+methods to specify the width are available (though not all for every effect);

+one of the characters shown may be appended to select the desired method

+as follows:

+.TS

+center box;

+cI cI lI

+cB c l.

+\ 	Method	Notes

+h	Hz	\

+k	kHz	\

+o	Octaves	\

+q	Q-factor	See [2]

+.TE

+.DT

+.SP

+For each effect that uses this parameter, the default method (i.e. if no

+character is appended) is the one that it listed first in the effect's

+first line of description.

+.PP

+To see if SoX has support for an optional effect, enter

+.B sox \-h

+and look for its name under the list: `EFFECTS'.

+.SS Supported Effects

+.TP

+\fBallpass\fR \fIfrequency\fR[\fBk\fR]\fI width\fR[\fBh\fR\^|\^\fBk\fR\^|\^\fBo\fR\^|\^\fBq\fR]

+Apply a two-pole all-pass filter with central frequency (in Hz)

+\fIfrequency\fR, and filter-width \fIwidth\fR.

+An all-pass filter changes the

+audio's frequency to phase relationship without changing its frequency

+to amplitude relationship.  The filter is described in detail in [1].

+.SP

+This effect supports the \fB\-\-plot\fR global option.

+.TP

+\fBband\fR [\fB\-n\fR] \fIcenter\fR[\fBk\fR]\fR [\fIwidth\fR[\fBh\fR\^|\^\fBk\fR\^|\^\fBo\fR\^|\^\fBq\fR]]

+Apply a band-pass filter.

+The frequency response drops logarithmically

+around the

+.I center

+frequency.

+The

+.I width

+parameter gives the slope of the drop.

+The frequencies at

+.I center

++

+.I width

+and

+.I center

+\-

+.I width

+will be half of their original amplitudes.

+.B band

+defaults to a mode oriented to pitched audio,

+i.e. voice, singing, or instrumental music.

+The \fB\-n\fR (for noise) option uses the alternate mode

+for un-pitched audio (e.g. percussion).

+.B Warning:

+\fB\-n\fR introduces a power-gain of about 11dB in the filter, so beware

+of output clipping.

+.B band

+introduces noise in the shape of the filter,

+i.e. peaking at the

+.I center

+frequency and settling around it.

+.SP

+This effect supports the \fB\-\-plot\fR global option.

+.SP

+See also \fBfilter\fR for a bandpass filter with steeper shoulders.

+.TP

+\fBbandpass\fR\^|\^\fBbandreject\fR [\fB\-c\fR] \fIfrequency\fR[\fBk\fR]\fI width\fR[\fBh\fR\^|\^\fBk\fR\^|\^\fBo\fR\^|\^\fBq\fR]

+Apply a two-pole Butterworth band-pass or band-reject filter with

+central frequency \fIfrequency\fR, and (3dB-point) band-width

+\fIwidth\fR.  The

+.B \-c

+option applies only to

+.B bandpass

+and selects a constant skirt gain (peak gain = Q) instead of the

+default: constant 0dB peak gain.

+The filters roll off at 6dB per octave (20dB per decade)

+and are described in detail in [1].

+.SP

+These effects support the \fB\-\-plot\fR global option.

+.SP

+See also \fBfilter\fR for a bandpass filter with steeper shoulders.

+.TP

+\fBbandreject \fIfrequency\fR[\fBk\fR]\fI width\fR[\fBh\fR\^|\^\fBk\fR\^|\^\fBo\fR\^|\^\fBq\fR]

+Apply a band-reject filter.

+See the description of the \fBbandpass\fR effect for details.

+.TP

+\fBbass\fR\^|\^\fBtreble \fIgain\fR [\fIfrequency\fR[\fBk\fR]\fR [\fIwidth\fR[\fBs\fR\^|\^\fBh\fR\^|\^\fBk\fR\^|\^\fBo\fR\^|\^\fBq\fR]]]

+Boost or cut the bass (lower) or treble (upper) frequencies of the audio

+using a two-pole shelving filter with a response similar to that

+of a standard hi-fi's tone-controls.  This is also

+known as shelving equalisation (EQ).

+.SP

+\fIgain\fR gives the gain at 0\ Hz (for \fBbass\fR), or whichever is

+the lower of \(ap22\ kHz and the Nyquist frequency (for \fBtreble\fR).  Its

+useful range is about \-20 (for a large cut) to +20 (for a large

+boost).

+Beware of

+.B Clipping

+when using a positive \fIgain\fR.

+.SP

+If desired, the filter can be fine-tuned using the following

+optional parameters:

+.SP

+\fIfrequency\fR sets the filter's central frequency and so can be

+used to extend or reduce the frequency range to be boosted or

+cut.  The default value is 100\ Hz (for \fBbass\fR) or 3\ kHz (for

+\fBtreble\fR).

+.SP

+\fIwidth\fR

+determines how

+steep is the filter's shelf transition.  In addition to the common

+width specification methods described above,

+`slope' (the default, or if appended with `\fBs\fR') may be used.

+The useful range of `slope' is

+about 0\*d3, for a gentle slope, to 1 (the maximum), for a steep slope; the

+default value is 0\*d5.

+.SP

+The filters are described in detail in [1].

+.SP

+These effects support the \fB\-\-plot\fR global option.

+.SP

+See also \fBequalizer\fR for a peaking equalisation effect.

+.TP

+\fBchorus \fIgain-in gain-out\fR <\fIdelay decay speed depth \fB\-s\fR\^|\^\fB\-t\fR>

+Add a chorus effect to the audio.  This can make a single vocal sound

+like a chorus, but can also be applied to instrumentation.

+.SP

+Chorus resembles an echo effect with a short delay, but

+whereas with echo the delay is constant, with chorus, it

+is varied using sinusoidal or triangular modulation.  The modulation

+depth defines the range the modulated delay is played before or after the

+delay. Hence the delayed sound will sound slower or faster, that is the delayed

+sound tuned around the original one, like in a chorus where some vocals are

+slightly off key.

+See [3] for more discussion of the chorus effect.

+.SP

+Each four-tuple parameter

+delay/decay/speed/depth gives the delay in milliseconds

+and the decay (relative to gain-in) with a modulation

+speed in Hz using depth in milliseconds.

+The modulation is either sinusoidal (\fB\-s\fR) or triangular

+(\fB\-t\fR).  Gain-out is the volume of the output.

+.SP

+A typical delay is around 40ms to 60ms; the modulation speed is best

+near 0\*d25Hz and the modulation depth around 2ms.

+For example, a single delay:

+.EX

+	play guitar1.wav chorus 0.7 0.9 55 0.4 0.25 2 \-t

+.EE

+Two delays of the original samples:

+.EX

+	play guitar1.wav chorus 0.6 0.9 50 0.4 0.25 2 \-t \\

+		 60 0.32 0.4 1.3 \-s

+.EE

+A fuller sounding chorus (with three additional delays):

+.EX

+	play guitar1.wav chorus 0.5 0.9 50 0.4 0.25 2 \-t \\

+		 60 0.32 0.4 2.3 \-t 40 0.3 0.3 1.3 \-s

+.EE

+.TP

+\fBcompand \fIattack1\fB,\fIdecay1\fR{\fB,\fIattack2\fB,\fIdecay2\fR}

+[\fIsoft-knee-dB\fB:\fR]\fIin-dB1\fR[\fB,\fIout-dB1\fR]{\fB,\fIin-dB2\fB,\fIout-dB2\fR}

+.br

+[\fIgain\fR [\fIinitial-volume-dB\fR [\fIdelay\fR]]]

+.SP

+Compand (compress or expand) the dynamic range of the audio.

+.SP

+The

+.I attack

+and

+.I decay

+parameters (in seconds) determine the time over which the

+instantaneous level of the input signal is averaged to determine its

+volume; attacks refer to increases in volume and decays refer to

+decreases.

+For most situations, the attack time (response to the music getting

+louder) should be shorter than the decay time because the human ear is more

+sensitive to sudden loud music than sudden soft music.

+Where more than one pair of attack/decay parameters are

+specified, each input channel is companded separately and the number of

+pairs must agree with the number of input channels.

+Typical values are

+.B 0\*d3,0\*d8

+seconds.

+.SP

+The second parameter is a list of points on the compander's transfer

+function specified in dB relative to the maximum possible signal

+amplitude.  The input values must be in a strictly increasing order but

+the transfer function does not have to be monotonically rising.  If

+omitted, the value of

+.I out-dB1

+defaults to the same value as

+.IR in-dB1 ;

+levels below

+.I in-dB1

+are not companded (but may have gain applied to them).

+The point \fB0,0\fR is assumed but may be overridden (by

+\fB0,\fIout-dBn\fR).

+If the list is preceded by a

+.I soft-knee-dB

+value, then the points at where adjacent line segments on the

+transfer function meet will be rounded by the amount given.

+Typical values for the transfer function are

+.BR 6:\-70,\-60,\-20 .

+.SP

+The third (optional) parameter is an additional gain in dB to be applied

+at all points on the transfer function and allows easy adjustment

+of the overall gain.

+.SP

+The fourth (optional) parameter is an initial level to be assumed for

+each channel when companding starts.  This permits the user to supply a

+nominal level initially, so that, for example, a very large gain is not

+applied to initial signal levels before the companding action has begun

+to operate: it is quite probable that in such an event, the output would

+be severely clipped while the compander gain properly adjusts itself.

+A typical value (for audio which is initially quiet) is

+.B \-90

+dB.

+.SP

+The fifth (optional) parameter is a delay in seconds.  The input signal

+is analysed immediately to control the compander, but it is delayed

+before being fed to the volume adjuster.  Specifying a delay

+approximately equal to the attack/decay times allows the compander to

+effectively operate in a `predictive' rather than a reactive mode.

+A typical value is

+.B 0\*d2

+seconds.

+.SP

+This effect supports the \fB\-\-plot\fR global option (for the transfer function).

+.SP

+The following example might be used to make a piece of music with both

+quiet and loud passages suitable for listening to in a noisy environment

+such as a moving vehicle:

+.EX

+	sox asz.au asz-car.au compand 0.3,1 6:-70,-60,-20 -5 -90 0.2

+.EE

+The transfer function (`6:\-70,...') says that very soft sounds (below

+\-70dB) will remain unchanged.  This will stop the compander from

+boosting the volume on `silent' passages such as between movements.

+However, sounds in the range \-60dB to 0dB (maximum

+volume) will be boosted so that the 60dB dynamic range of the

+original music will be compressed 3-to-1 into a 20dB range, which is

+wide enough to enjoy the music but narrow enough to get around the

+road noise.  The `6:' selects 6dB soft-knee companding.

+The \-5 (dB) output gain is needed to avoid clipping (the number is

+inexact, and was derived by experimentation).

+The \-90 (dB) for the initial volume will work fine for a clip that starts

+with near silence, and the delay of 0\*d2 (seconds) has the effect of causing

+the compander to react a bit more quickly to sudden volume changes.

+.SP

+See also

+.B mcompand

+for a multiple-band companding effect.

+.TP

+\fBcontrast [\fIenhancement-amount (75)\fR]

+Comparable with compression, this effect modifies an audio signal to

+make it sound louder.

+.I enhancement-amount

+controls the amount of the enhancement and is a number in the range 0\-100.

+Note that

+.I enhancement-amount

+= 0 still gives a significant contrast enhancement.

+.B contrast

+is often used in conjunction with the

+.B norm

+effect as follows:

+.EX

+	sox infile outfile norm -i contrast

+.EE

+.TP

+\fBdcshift \fIshift\fR [\fIlimitergain\fR]

+DC Shift the audio, with basic linear amplitude formula.

+This is most useful if your audio tends to not be centered around

+a value of 0.  Shifting it back will allow you to get the most volume

+adjustments without clipping.

+.SP

+The first option is the \fIdcshift\fR value.  It is a floating point number that

+indicates the amount to shift.

+.SP

+An optional

+.I limitergain

+can be specified as well.  It should have a value much less than 1

+(e.g. 0\*d05 or 0\*d02) and is used only on peaks to prevent clipping.

+.SP

+An alternative approach to removing a DC offset (albeit with a short delay)

+is to use the

+.B highpass

+filter effect at a frequency of say 10Hz, as illustrated in the following

+example:

+.EX

+	sox -n out.au synth 5 sin %0 50 highpass 10

+.EE

+.TP

+\fBdeemph\fR

+Apply ISO 908 de-emphasis (a treble attenuation shelving filter) to

+44\*d1kHz (Compact Disc) audio.

+.SP

+Pre-emphasis was applied in the mastering of some CDs issued in the early

+1980s.  These included many classical music albums, as well as now

+sought-after issues of albums by The Beatles, Pink Floyd and others.

+Pre-emphasis should be removed at playback time by a de-emphasis

+filter in the playback device.  However, not all modern CD players have

+this filter, and very few PC CD drives have it; playing pre-emphasised

+audio without the correct de-emphasis filter results in audio that sounds harsh

+and is far from what its creators intended.

+.SP

+With the

+.B deemph

+effect, it is possible to apply the necessary de-emphasis to audio that

+has been extracted from a pre-emphasised CD, and then either burn the

+de-emphasised audio to a new CD (which will then play correctly on any

+CD player), or simply play the correctly de-emphasised audio files on the

+PC.  For example:

+.EX

+	sox track1.wav track1-deemph.wav deemph

+.EE

+and then burn track1-deemph.wav to CD, or

+.EX

+	play track1-deemph.wav

+.EE

+or simply

+.EX

+	play track1.wav deemph

+.EE

+The de-emphasis filter is implemented as a biquad; its maximum deviation

+from the ideal response is only 0\*d06dB (up to 20kHz).

+.SP

+This effect supports the \fB\-\-plot\fR global option.

+.SP

+See also the \fBbass\fR and \fBtreble\fR shelving equalisation effects.

+.TP

+\fBdelay\fR {\fIlength\fR}

+Delay one or more audio channels.

+.I length

+can specify a time or, if appended with an `s', a number of samples.

+For example,

+.B delay 1\*d5 0 0\*d5

+delays the first channel by 1\*d5 seconds, the third channel by 0\*d5

+seconds, and leaves the second channel (and any other channels that may be

+present) un-delayed.

+The following (one long) command plays a chime sound:

+.EX

+	play -n synth sin %-21.5 sin %-14.5 sin %-9.5 sin %-5.5 \\

+	  sin %-2.5 sin %2.5 gain -5.4 fade h 0.008 2 1.5 \\

+	  delay 0 .27 .54 .76 1.01 1.3 remix - fade h 0.1 2.72 2.5

+.EE

+.TP

+\fBdither\fR [\fIdepth\fR]

+Apply dithering to the audio.

+Dithering deliberately adds digital white noise to the signal

+in order to mask audible quantization effects that

+can occur if the output sample size is less than 24 bits.

+By default, the amount of noise added is \(12 bit;

+the optional \fIdepth\fR parameter is a (linear or voltage)

+multiplier of this amount.

+.SP

+This effect should not be followed by any other effect that

+affects the audio.

+.TP

+\fBearwax\fR

+Makes audio easier to listen to on headphones.

+Adds `cues' to 44\*d1kHz stereo (i.e. audio CD format) audio so that

+when listened to on headphones the stereo image is

+moved from inside

+your head (standard for headphones) to outside and in front of the

+listener (standard for speakers).  See

+http://www.geocities.com/beinges

+for a full explanation.

+.TP

+\fBecho \fIgain-in gain-out\fR <\fIdelay decay\fR>

+Add echoing to the audio.

+Echoes are reflected sound and can occur naturally amongst mountains

+(and sometimes large buildings) when talking or shouting; digital echo

+effects emulate this behaviour and are often used to help fill

+out the sound of a single instrument or vocal.  The time difference

+between the original signal and the reflection is the `delay' (time),

+and the loudness of the relected signal is the `decay'.  Multiple echoes

+can have different delays and decays.

+.SP

+Each given

+.I "delay decay"

+pair gives the delay in milliseconds

+and the decay (relative to gain-in) of that echo.

+Gain-out is the volume of the output.

+For example:

+This will make it sound as if there are twice as many instruments as are

+actually playing:

+.EX

+	play lead.aiff echo 0.8 0.88 60 0.4

+.EE

+If the delay is very short, then it sound like a (metallic) robot playing

+music:

+.EX

+	play lead.aiff echo 0.8 0.88 6 0.4

+.EE

+A longer delay will sound like an open air concert in the mountains:

+.EX

+	play lead.aiff echo 0.8 0.9 1000 0.3

+.EE

+One mountain more, and:

+.EX

+	play lead.aiff echo 0.8 0.9 1000 0.3 1800 0.25

+.EE

+.TP

+\fBechos \fIgain-in gain-out\fR <\fIdelay decay\fR>

+Add a sequence of echoes to the audio.

+Each

+.I "delay decay"

+pair gives the delay in milliseconds

+and the decay (relative to gain-in) of that echo.

+Gain-out is the volume of the output.

+.SP

+Like the echo effect, echos stand for `ECHO in Sequel', that is the first echos

+takes the input, the second the input and the first echos, the third the input

+and the first and the second echos, ... and so on.

+Care should be taken using many echos; a single echos

+has the same effect as a single echo.

+.SP

+The sample will be bounced twice in symmetric echos:

+.EX

+	play lead.aiff echos 0.8 0.7 700 0.25 700 0.3

+.EE

+The sample will be bounced twice in asymmetric echos:

+.EX

+	play lead.aiff echos 0.8 0.7 700 0.25 900 0.3

+.EE

+The sample will sound as if played in a garage:

+.EX

+	play lead.aiff echos 0.8 0.7 40 0.25 63 0.3

+.EE

+.TP

+\fBequalizer \fIfrequency\fR[\fBk\fR]\fI width\fR[\fBq\fR\^|\^\fBo\fR\^|\^\fBh\fR\^|\^\fBk\fR] \fIgain\fR

+Apply a two-pole peaking equalisation (EQ) filter.

+With this filter, the signal-level at and around a selected frequency

+can be increased or decreased, whilst (unlike band-pass and band-reject

+filters) that at all other frequencies is unchanged.

+.SP

+\fIfrequency\fR gives the filter's central frequency in Hz,

+\fIwidth\fR, the band-width,

+and \fIgain\fR the required gain

+or attenuation in dB.

+Beware of

+.B Clipping

+when using a positive \fIgain\fR.

+.SP

+In order to produce complex equalisation curves, this effect

+can be given several times, each with a different central frequency.

+.SP

+The filter is described in detail in [1].

+.SP

+This effect supports the \fB\-\-plot\fR global option.

+.SP

+See also \fBbass\fR and \fBtreble\fR for shelving equalisation effects.

+.TP

+\fBfade\fR [\fItype\fR] \fIfade-in-length\fR [\fIstop-time\fR [\fIfade-out-length\fR]]

+Add a fade effect to the beginning, end, or both of the audio.

+.SP

+For fade-ins, this starts from the first sample and ramps the volume of the audio from 0 to full volume over \fIfade-in-length\fR seconds.  Specify 0 seconds if no fade-in is wanted.

+.SP

+For fade-outs, the audio will be truncated at

+.I stop-time

+and

+the volume will be ramped from full volume down to 0 starting at

+\fIfade-out-length\fR seconds before the \fIstop-time\fR.  If

+.I fade-out-length

+is not specified, it defaults to the same value as

+\fIfade-in-length\fR.

+No fade-out is performed if

+.I stop-time

+is not specified.

+If the file length can be determined from the input file header and length-changing effects are not in effect, then \fB0\fR may be specified for

+.I stop-time

+to indicate the usual case of a fade-out that ends at the end of the input

+audio stream.

+.SP

+All times can be specified in either periods of time or sample counts.

+To specify time periods use the format hh:mm:ss.frac format.  To specify

+using sample counts, specify the number of samples and append the letter `s'

+to the sample count (for example `8000s').

+.SP

+An optional \fItype\fR can be specified to change the type of envelope.  Choices are \fBq\fR for quarter of a sine wave, \fBh\fR for half a sine wave, \fBt\fR for linear slope, \fBl\fR for logarithmic, and \fBp\fR for inverted parabola.  The default is logarithmic.

+.TP

+\fBfilter\fR [\fIlow\fR]\fB\-\fR[\fIhigh\fR] [\fIwindow-len\fR [\fIbeta\fR]]

+Apply a sinc-windowed lowpass, highpass, or bandpass filter of given

+window length to the signal.

+\fIlow\fR refers to the frequency of the lower 6dB corner of the filter.

+\fIhigh\fR refers to the frequency of the upper 6dB corner of the filter.

+.SP

+A low-pass filter is obtained by leaving \fIlow\fR unspecified, or 0.

+A high-pass filter is obtained by leaving \fIhigh\fR unspecified, or 0,

+or greater than or equal to the Nyquist frequency.

+.SP

+The \fIwindow-len\fR, if unspecified, defaults to 128.

+Longer windows give a sharper cut-off, smaller windows a more gradual cut-off.

+.SP

+The \fIbeta\fR parameter

+determines the type of filter window used.  Any value greater than 2 is

+the beta for a Kaiser window.  Beta \(<= 2 selects a Nuttall window.

+If unspecified, the default is a Kaiser window with beta 16.

+.SP

+In the case of Kaiser window (beta > 2), lower betas produce a

+somewhat faster transition from pass-band to stop-band, at the cost of

+noticeable artifacts. A beta of 16 is the default, beta less than 10

+is not recommended. If you want a sharper cut-off, don't use low

+beta's, use a longer sample window. A Nuttall window is selected by

+specifying any `beta' \(<= 2, and the Nuttall window has somewhat

+steeper cut-off than the default Kaiser window. You will probably not

+need to use the beta parameter at all, unless you are just curious

+about comparing the effects of Nuttall vs. Kaiser windows.

+.TP

+\fBflanger\fR [\fIdelay depth regen width speed shape phase interp\fR]

+Apply a flanging effect to the audio.

+See [3] for a detailed description of flanging.

+.SP

+All parameters are optional (right to left).

+.TS

+center box;

+cB cB cB lB

+cI c c l.

+\ 	Range	Default	Description

+delay	0 \- 10	0	Base delay in milliseconds.

+depth	0 \- 10	2	Added swept delay in milliseconds.

+regen	\-95 \- 95	0	T{

+.na

+Percentage regeneration (delayed signal feedback).

+T}

+width	0 \- 100	71	T{

+.na

+Percentage of delayed signal mixed with original.

+T}

+speed	0\*d1 \- 10	0\*d5	Sweeps per second (Hz).

+shape	\ 	sin	Swept wave shape: \fBsine\fR\^|\^\fBtriangle\fR.

+phase	0 \- 100	25	T{

+.na

+Swept wave percentage phase-shift for multi-channel (e.g. stereo) flange;

+0 = 100 = same phase on each channel.

+T}

+interp	\ 	lin	T{

+.na

+Digital delay-line interpolation: \fBlinear\fR\^|\^\fBquadratic\fR.

+T}

+.TE

+.DT

+.TP

+\fBgain \fIdB-gain\fR

+Apply an amplification or an attenuation to the audio signal.

+This is an alias for the

+.B vol

+effect\*mhandy for those who prefer to work in dBs by default.

+.TP

+\fBhighpass\fR\^|\^\fBlowpass\fR [\fB\-1\fR|\fB\-2\fR] \fIfrequency\fR[\fBk\fR]\fR [\fRwidth\fR[\fBq\fR\^|\^\fBo\fR\^|\^\fBh\fR\^|\^\fBk\fR]]

+Apply a high-pass or low-pass filter with 3dB point \fIfrequency\fR.

+The filter can be either single-pole (with

+.BR \-1 ),

+or double-pole (the default, or with

+.BR \-2 ).

+.I width

+applies only to double-pole filters;

+the default is Q = 0\*d707 and gives a Butterworth response.  The filters

+roll off at 6dB per pole per octave (20dB per pole per decade).  The

+double-pole filters are described in detail in [1].

+.SP

+These effects support the \fB\-\-plot\fR global option.

+.SP

+See also \fBfilter\fR for filters with a steeper roll-off.

+.TP

+\fBladspa\fR \fBmodule\fR [\fBplugin\fR] [\fBargument\fR...]

+Apply a LADSPA [5] (Linux Audio Developer's Simple Plugin API) plugin.

+Despite the name, LADSPA is not Linux-specific, and a wide range of

+effects is available as LADSPA plugins, such as cmt [6] (the Computer

+Music Toolkit) and Steve Harris's plugin collection [7]. The first

+argument is the plugin module, the second the name of the plugin (a

+module can contain more than one plugin) and any other arguments are

+for the control ports of the plugin. Missing arguments are supplied by

+default values if possible. Only plugins with at most one audio input

+and one audio output port can be used.  If found, the environment varible

+LADSPA_PATH will be used as search path for plugins.

+.TP

+\fBloudness [\fIgain\fR [\fIreference\fR]]

+Loudness control\*msimilar to the

+.B gain

+effect, but provides equalisation for the human auditory system.  See

+http://en.wikipedia.org/wiki/Loudness for a detailed description of

+loudness.  The gain is adjusted by the given

+.I gain

+parameter (usually negative) and the signal equalised according to ISO

+226 w.r.t. a reference level of 65dB, though an alternative

+.I reference

+level may be given if the original audio has been equalised for some

+other optimal level.

+.SP

+See also the

+.B gain

+effect.

+.TP

+\fBlowpass\fR [\fB\-1\fR|\fB\-2\fR] \fIfrequency\fR[\fBk\fR]\fR [\fRwidth\fR[\fBq\fR\^|\^\fBo\fR\^|\^\fBh\fR\^|\^\fBk\fR]]

+Apply a low-pass filter.

+See the description of the \fBhighpass\fR effect for details.

+.TP

+\fBmcompand\fR \(dq\fIattack1\fB,\fIdecay1\fR{\fB,\fIattack2\fB,\fIdecay2\fR}

+[\fIsoft-knee-dB\fB:\fR]\fIin-dB1\fR[\fB,\fIout-dB1\fR]{\fB,\fIin-dB2\fB,\fIout-dB2\fR}

+.br

+[\fIgain\fR [\fIinitial-volume-dB\fR [\fIdelay\fR]]]\(dq {\fIxover-freq\fR[\fBk\fR] \(dqattack1,...\(dq}

+.SP

+The multi-band compander is similar to the single-band compander but the

+audio is first divided into bands using Butterworth cross-over filters

+and a separately specifiable compander run on each band.  See the

+\fBcompand\fR effect for the definition of its parameters.  Compand

+parameters are specified between double quotes and the crossover

+frequency for that band is given by \fIxover-freq\fR; these can be

+repeated to create multiple bands.

+.SP

+For example, the following (one long) command shows how multi-band

+companding is typically used in FM radio:

+.EX

+	play track1.wav gain -3 filter 8000- 32 100 mcompand \\

+	\(dq0.005,0.1 -47,-40,-34,-34,-17,-33\(dq 100 \\

+	\(dq0.003,0.05 -47,-40,-34,-34,-17,-33\(dq 400 \\

+	\(dq0.000625,0.0125 -47,-40,-34,-34,-15,-33\(dq 1600 \\

+	\(dq0.0001,0.025 -47,-40,-34,-34,-31,-31,-0,-30\(dq 6400 \\

+	\(dq0,0.025 -38,-31,-28,-28,-0,-25\(dq \\

+	gain 15 highpass 22 highpass 22 filter -17500 256 \\

+	gain 9 lowpass -1 17801

+.EE

+The audio file is played with a simulated FM radio sound (or broadcast

+signal condition if the lowpass filter at the end is skipped).

+Note that the pipeline is set up with US-style 75us preemphasis.

+.SP

+See also

+.B compand

+for a single-band companding effect.

+.TP

+\fBmixer\fR [ \fB\-l\fR\^|\^\fB\-r\fR\^|\^\fB\-f\fR\^|\^\fB\-b\fR\^|\^\fB\-1\fR\^|\^\fB\-2\fR\^|\^\fB\-3\fR\^|\^\fB\-4\fR\^|\^\fIn\fR{\fB,\fIn\fR} ]

+Reduce the number of audio channels by mixing or selecting channels,

+or increase the number of channels by duplicating channels.

+Note: this effect operates on the audio

+.I channels

+within the SoX effects processing chain; it should not be confused with the

+.B \-m

+global option (where multiple

+.I files

+are mix-combined before entering the effects chain).

+.SP

+This effect is automatically used when the number of input

+channels differ from the number of output channels.  When reducing

+the number of channels it is possible to manually specify the

+.B mixer

+effect and use the \fB\-l\fR, \fB\-r\fR, \fB\-f\fR, \fB\-b\fR,

+\fB\-1\fR, \fB\-2\fR, \fB\-3\fR, \fB\-4\fR, options to select only

+the left, right, front, back channel(s) or specific channel

+for the output instead of averaging the channels.

+The \fB\-l\fR, and \fB\-r\fR options will do averaging

+in quad-channel files so select the exact channel to prevent this.

+.SP

+The

+.B mixer

+effect can also be invoked with up to 16

+numbers, separated by commas, which specify the proportion (0 = 0% and 1 = 100%)

+of each input channel that is to be mixed into each output channel.

+In two-channel mode, 4 numbers are given: l \*(RA l, l \*(RA r, r \*(RA l, and r \*(RA r,

+respectively.

+In four-channel mode, the first 4 numbers give the proportions for the

+left-front output channel, as follows: lf \*(RA lf, rf \*(RA lf, lb \*(RA lf, and

+rb \*(RA rf.

+The next 4 give the right-front output in the same order, then

+left-back and right-back.

+.SP

+It is also possible to use the 16 numbers to expand or reduce the

+channel count; just specify 0 for unused channels.

+.SP

+Finally, certain reduced combination of numbers can be specified

+for certain input/output channel combinations.

+.TS

+center box ;

+cB cB cB lB

+c c c l .

+In Ch	Out Ch	Num	Mappings

+2	1	2	l \*(RA l, r \*(RA l

+2	2	1	adjust balance

+4	1	4	lf \*(RA l, rf \*(RA l, lb \*(RA l, rb \*(RA l

+4	2	2	lf \*(RA l&rf \*(RA r, lb \*(RA l&rb \*(RA r

+4	4	1	adjust balance

+4	4	2	front balance, back balance

+.TE

+.DT

+.SP

+See also

+.B remix

+for a mixing effect that handles any number of channels.

+.TP

+\fBnoiseprof\fR [\fIprofile-file\fR]

+Calculate a profile of the audio for use in noise reduction.  See the

+description of the \fBnoisered\fR effect for details.

+.TP

+\fBnoisered\fR [\fIprofile-file\fR [\fIamount\fR]]

+Reduce noise in the audio signal by profiling and filtering.  This

+effect is moderately effective at removing consistent background noise

+such as hiss or hum.  To use it, first run SoX with the \fBnoiseprof\fR

+effect on a section of audio that ideally would contain silence but in

+fact contains noise\*msuch sections are typically found at the beginning

+or the end of a recording.  \fBnoiseprof\fR will write out a noise

+profile to \fIprofile-file\fR, or to stdout if no \fIprofile-file\fR or

+if `\-' is given.  E.g.

+.EX

+	sox speech.au -n trim 0 1.5 noiseprof speech.noise-profile

+.EE

+To actually remove the noise, run SoX again, this time with the \fBnoisered\fR

+effect;

+.B noisered

+will reduce noise according to a noise profile (which was generated by

+.BR noiseprof ),

+from

+.IR profile-file ,

+or from stdin if no \fIprofile-file\fR or if `\-' is given.  E.g.

+.EX

+	sox speech.au cleaned.au noisered speech.noise-profile 0.3

+.EE

+How much noise should be removed is specified by

+.IR amount \*ma

+number between 0 and 1 with a default of 0\*d5.  Higher numbers will

+remove more noise but present a greater likelihood of removing wanted

+components of the audio signal.  Before replacing an original recording

+with a noise-reduced version, experiment with different

+.I amount

+values to find the optimal one for your audio; use headphones to check

+that you are happy with the results, paying particular attention to quieter

+sections of the audio.

+.SP

+On most systems, the two stages\*mprofiling and reduction\*mcan be combined

+using a pipe, e.g.

+.EX

+	sox noisy.au -n trim 0 1 noiseprof | play noisy.au noisered

+.EE

+.TP

+\fBnorm\fR [\fB\-i\fR\^|\^\fB\-b\fR] [\fIlevel\fR]

+Normalise audio to 0dB FSD, to a given level relative to 0dB, or normalise

+the balance of multi-channel audio.

+Requires temporary file space to store the audio to be normalised.

+.SP

+To create a normalised copy of an audio file,

+.EX

+	sox infile outfile norm

+.EE

+can be used, though note that if `infile' has a simple encoding (e.g.

+PCM), then

+.EX

+	sox infile outfile vol \`sox infile -n stat -v 2>&1\`

+.EE

+(on systems that support this construct) might be quicker to execute

+(though perhaps not to type!) as it doesn't require a temporary file.

+.SP

+For a more complex example, suppose that `effect1' performs some unknown

+or unpredictable attenuation and that `effect2' requires up to 10dB of

+headroom, then

+.EX

+	sox infile outfile effect1 norm -10 effect2 norm

+.EE

+gives both effect2 and the output file the highest possible signal

+levels.

+.SP

+Normally, audio is normalised based on the level of the channel with

+the highest peak level, which means that whilst all channels are adjusted,

+only one channel attains

+the normalised level.  If the

+.B \-i

+option is given, then each channel is treated individually and

+will attain the normalised level.

+.SP

+If the

+.B \-b

+option is given (with a multi-channel audio file), then the audio

+channels will be balanced; i.e. the RMS level of each channel will be

+normalised to that of the channel with the highest RMS level.  This can

+be used, for example, to correct stereo imbalance.  Note that

+.B \-b

+can cause clipping.

+.SP

+In most cases,

+.B norm \-3

+should be the maximum level used at the output file (to leave headroom

+for playback-resampling, etc.).  See also the discussions of

+.B Clipping

+and Replay Gain above.

+.TP

+\fBoops\fR

+Out Of Phase Stereo effect.

+Mixes stereo to twin-mono where each mono channel contains the

+difference between the left and right stereo channels.

+This is sometimes known as the `karaoke' effect as it often has the effect

+of removing most or all of the vocals from a recording.

+.TP

+\fBpad\fR { \fIlength\fR[\fB@\fIposition\fR] }

+Pad the audio with silence, at the beginning, the end, or any

+specified points through the audio.

+Both

+.I length

+and

+.I position

+can specify a time or, if appended with an `s', a number of samples.

+.I length

+is the amount of silence to insert and

+.I position

+the position in the input audio stream at which to insert it.

+Any number of lengths and positions may be specified, provided that

+a specified position is not less that the previous one.

+.I position

+is optional for the first and last lengths specified and

+if omitted correspond to the beginning and the end of the audio respectively.

+For example,

+.B pad 1\*d5 1\*d5

+adds 1\*d5 seconds of silence padding at each end of the audio, whilst

+.B pad 4000s@3:00

+inserts 4000 samples of silence 3 minutes into the audio.

+If silence is wanted only at the end of the audio, specify either the end

+position or specify a zero-length pad at the start.

+.TP

+\fBphaser \fIgain-in gain-out delay decay speed\fR [\fB\-s\fR\^|\^\fB\-t\fR]

+Add a phasing effect to the audio.

+See [3] for a detailed description of phasing.

+.SP

+delay/decay/speed gives the delay in milliseconds

+and the decay (relative to gain-in) with a modulation

+speed in Hz.

+The modulation is either sinusoidal (\fB\-s\fR) \*mpreferable for multiple

+instruments, or triangular

+(\fB\-t\fR) \*mgives single instruments a sharper phasing effect.

+The decay should be less than 0\*d5 to avoid

+feedback, and usually no less than 0\*d1.  Gain-out is the volume of the output.

+.SP

+For example:

+.EX

+	play snare.flac phaser 0.8 0.74 3 0.4 0.5 -t

+.EE

+Gentler:

+.EX

+	play snare.flac phaser 0.9 0.85 4 0.23 1.3 -s

+.EE

+A popular sound:

+.EX

+	play snare.flac phaser 0.89 0.85 1 0.24 2 -t

+.EE

+More severe:

+.EX

+	play snare.flac phaser 0.6 0.66 3 0.6 2 -t

+.EE

+.TP

+\fBpitch \fR[\fB\-q\fR] \fIshift\fR [\fIsegment\fR [\fIsearch\fR [\fIoverlap\fR]]]

+Change the audio pitch (but not tempo).

+.SP

+.I shift

+gives the pitch shift as positive or negative `cents' (i.e. 100ths of a

+semitone).  See the

+.B tempo

+effect for a description of the other parameters.

+.TP

+\fBrate\fR [\fB\-q\fR\^|\^\fB\-l\fR\^|\^\fB\-m\fR\^|\^\fB\-h\fR\^|\^\fB\-v\fR] [override-options] \fIRATE\fR[\fBk\fR]

+Change the audio sampling rate (i.e. resample the audio) to any given

+.I RATE

+(even non-integer if this is supported by the output file format)

+using a quality level defined as follows:

+.TS

+center box;

+cI cI2w9 cI cI2w6 cIw6 lIw17

+cB c c c c l.

+\ 	Quality	T{

+\ Phase Response

+T}	T{

+Band-width

+T}	Rej dB	T{

+.na

+Typical Use

+T}

+\-q	T{

+.na

+quick

+T}	linear	n/a	T{

+.na

+\(~=30 @ \ Fs/4

+T}	T{

+.na

+playback on ancient hardware

+T}

+\-l	low	linear	80%	100	T{

+.na

+playback on old hardware

+T}

+\-m	medium	intermediate	95%	100	T{

+.na

+audio playback

+T}

+\-h	high	intermediate	95%	125	T{

+.na

+16-bit mastering (use with dither)

+T}

+\-v	T{

+.na

+very high

+T}	intermediate	95%	175	24-bit mastering

+.TE

+.DT

+.SP

+where

+.I Band-width

+is the percentage of the audio frequency band that is preserved and

+.I Rej dB

+is the level of noise rejection.  Increasing levels of resampling

+quality come at the expense of increasing amounts of time to process the

+audio.  If no quality option is given, the quality level used is `high'.

+.SP

+The `quick' algorithm uses cubic interpolation; all others use

+band-limited interpolation.  The `quick' and `low' quality

+algorithms have a `linear' phase response; for `medium', `high' and

+`very high', the phase response is configurable (see below), but

+defaults to `intermediate'.

+.SP

+The

+.B rate

+effect is invoked automatically if SoX's \fB\-r\fR option specifies a

+rate that is different to that of the input file(s).  Alternatively, if

+this effect is given explicitly, then SoX's

+.B \-r

+option need not be given.  For example, the following two commands are

+equivalent:

+.EX

+.ne 2

+	sox input.au -r 48k output.au bass -3

+	sox input.au        output.au bass -3 rate 48k

+.EE

+though the second command is more flexible as it allows

+.B rate

+options to be given, and allows the effects to be ordered arbitrarily.

+.TS

+center;

+c8 c8 c.

+*	*	*

+.TE

+.DT

+.SP

+The simple quality selection described above provides settings that

+satisfy the needs of the vast majority of resampling tasks.

+Occasionally, however, it may be desirable to fine-tune the resampler's

+filter response; this can be achieved using

+.IR override\ options ,

+as detailed in the following table:

+.TS

+center box;

+lB lw52.

+\-M/\-I/\-L	Phase response = minimum/intermediate/linear

+\-p\ 0\-100	T{

+.na

+Any phase response (0 = minimum, 25 = intermediate, 50 = linear, 100 = maximum)

+T}

+\-s	Steep filter (band-width = 99%)

+\-b\ 74\-99\*d7	Any band-width %

+\-a	Allow aliasing above the pass-band

+.TE

+.DT

+.SP

+N.B.  Override options can not be used with the `quick' or `low'

+quality algorithms.

+.SP

+All resamplers use filters that can sometimes create `echo' (a.k.a.

+`ringing') artefacts with transient signals such as those that occur

+with `finger snaps' or other highly percussive sounds.  Such artefacts are

+much more noticable to the human ear if they occur before the transient

+(`pre-echo') than if they occur after it (`post-echo').  The phase

+response setting controls the distribution of any transient echo between

+`pre' and `post': with minimum phase, there is no pre-echo but the

+longest post-echo; with linear phase, pre and post echo are in equal

+amounts (in signal terms, but not audibility terms); the intermediate

+phase setting attempts to find the best compromise by selecting a small

+length (and level) of pre-echo and a medium lengthed post-echo.

+.SP

+Minimum, intermediate, or linear phase response is selected using the

+.BR \-M ,

+.BR \-I ,

+or

+.B \-L

+option; a custom phase response can be created with the

+.B \-p

+option.  Note that phase responses between `linear' and `maximum'

+(greater than 50) are rarely useful.

+.SP

+A resampler's band-width setting determines how much of the frequency

+content of the original signal (w.r.t. the orignal sample rate when

+up-sampling, or the new sample rate when down-sampling) is preserved

+during conversion.  The term `pass-band' is used to refer to all frequencies

+up to the band-width point (e.g. for 44\*d1kHz sampling rate, and a

+resampling band-width of 95%, the pass-band represents frequencies from

+0Hz (D.C.) to circa 21kHz).  Increasing the resampler's band-width

+results in a slower conversion and can increase transient echo

+artefacts (and vice versa).

+.SP

+The

+.B \-s

+`steep filter' option changes resampling band-width from the default 95%

+(based on the 3dB point), to 99%.  The

+.B \-b

+option allows the band-width to be set to any value in the range

+74\-99\*d7 %, but note that band-width values greater than 99% are not

+recommended for normal use as they can cause excessive transient echo.

+.SP

+If the

+.B \-a

+option is given, then aliasing above the pass-band is allowed.  For

+example, with 44\*d1kHz sampling rate, and a

+resampling band-width of 95%, this means that frequency content above

+21kHz can be distorted; however, since this is above the pass-band (i.e.

+above the highest frequency of interest/audibility), this may not be a

+problem.  The benefits of allowing aliasing are reduced processing time,

+and reduced (by almost half) transient echo artefacts.

+Note that if this option is given, then

+the minimum band-width allowable with

+.B \-b

+increases to 85%.

+.SP

+Examples:

+.EX

+	sox input.wav -2 output.wav rate -s -a 44100 dither

+.EE

+default (high) quality resampling; overrides: steep filter, allow

+aliasing; to 44\*d1kHz sample rate; dither output to 2-byte (16-bit) WAV

+file.

+.EX

+	sox input.wav -3 output.aiff rate -v -L -b 90 48k

+.EE

+very high quality resampling; overrides: linear phase, band-width 90%;

+to 48k sample rate; store output to 3-byte (24-bit) AIFF file.

+.TS

+center;

+c8 c8 c.

+*	*	*

+.TE

+.DT

+.SP

+The

+.BR key ,

+.B speed

+and

+.B tempo

+effects all use the

+.B rate

+effect at their core.

+.SP

+See also

+.BR resample ,

+.B polyphase

+and

+.B rabbit

+for other sample-rate changing effects.

+.TP

+\fBremix\fR [\fB\-a\fR\^|\^\fB\-m\fR\^|\^\fB\-p\fR] <\fIout-spec\fR>

+\fIout-spec\fR	= \fIin-spec\fR{\fB,\fIin-spec\fR} | \fB0\fR

+.br

+\fIin-spec\fR	= [\fIin-chan\fR]\^[\fB\-\fR[\fIin-chan2\fR]]\^[\fIvol-spec\fR]

+.br

+\fIvol-spec\fR	= \fBp\fR\^|\^\fBi\fR\^|\^\fBv\^\fR[\fIvolume\fR]

+.br

+.SP

+Select and mix input audio channels into output audio channels.  Each output

+channel is specified, in turn, by a given \fIout-spec\fR: a list of

+contributing input channels and volume specifications.

+.SP

+Note that this effect operates on the audio

+.I channels

+within the SoX effects processing chain; it should not be confused with the

+.B \-m

+global option (where multiple

+.I files

+are mix-combined before entering the effects chain).

+.SP

+An

+.I out-spec

+contains comma-separated input channel-numbers and hyphen-delimited

+channel-number ranges; alternatively,

+.B 0

+may be given to create a silent output channel.  For example,

+.EX

+	sox input.au output.au remix 6 7 8 0

+.EE

+creates an output file with four channels, where channels 1, 2, and 3 are

+copies of channels 6, 7, and 8 in the input file, and channel 4 is silent.

+Whereas

+.EX

+	sox input.au output.au remix 1-3,7 3

+.EE

+creates a stereo output file where the left channel is a mix-down of input

+channels 1, 2, 3, and 7, and the right channel is a copy of input channel 3.

+.SP

+Where a range of channels is specified, the channel numbers to the left and

+right of the hyphen are optional and default to 1 and to the number of input

+channels respectively. Thus

+.EX

+	sox input.au output.au remix -

+.EE

+performs a mix-down of all input channels to mono.

+.SP

+By default, where an output channel is mixed from multiple (n) input

+channels, each input channel will be scaled by a factor of \(S1/\s-2n\s+2.

+Custom mixing volumes can be set by following a given input channel or range

+of input channels with a \fIvol-spec\fR (volume specification).

+This is one of the letters \fBp\fR, \fBi\fR, or \fBv\fR,

+followed by a volume number, the meaning of which depends on the given

+letter and is defined as follows:

+.TS

+center;

+lI lI lI

+c l l.

+Letter	Volume number	Notes

+p	power adjust in dB	0 = no change

+i	power adjust in dB	T{

+.na

+As `p', but invert the audio

+T}

+v	voltage multiplier	T{

+.na

+1 = no change, 0\*d5 \(~= 6dB attenuation, 2 \(~= 6dB gain, \-1 = invert

+T}

+.TE

+.DT

+.SP

+If an

+.I out-spec

+includes at least one

+.I vol-spec

+then, by default, \(S1/\s-2n\s+2 scaling is not applied to any other channels in the

+same out-spec (though may be in other out-specs).

+The \-a (automatic)

+option however, can be given to retain the automatic scaling in this

+case.  For example,

+.EX

+	sox input.au output.au remix 1,2 3,4v0.8

+.EE

+results in channel level multipliers of 0\*d5,0\*d5 1,0\*d8, whereas

+.EX

+	sox input.au output.au remix -a 1,2 3,4v0.8

+.EE

+results in channel level multipliers of 0\*d5,0\*d5 0\*d5,0\*d8.

+.SP

+The \-m (manual) option disables all automatic volume adjustments, so

+.EX

+	sox input.au output.au remix -m 1,2 3,4v0.8

+.EE

+results in channel level multipliers of 1,1 1,0\*d8.

+.SP

+The volume number is optional and omitting it corresponds to no volume

+change; however, the only case in which this is useful is in conjunction

+with

+.BR i .

+For example, if

+.I input.au

+is stereo, then

+.EX

+	sox input.au output.au remix 1,2i

+.EE

+is a mono equivalent of the

+.B oops

+effect.

+.SP

+If the \fB\-p\fR option is given, then any automatic \(S1/\s-2n\s+2 scaling

+is replaced by \(S1/\s-2\(srn\s+2 (`power') scaling; this gives a louder mix

+but one that might occasionally clip.

+.TS

+center;

+c8 c8 c.

+*	*	*

+.TE

+.DT

+.SP

+One typical use of the

+.B remix

+effect is to split an audio file into a set of files, each containing

+one of the constituent channels (in order to perform subsequent

+processing on individual audio channels).  Where more than a few

+channels are involved, a script such as the following is useful:

+.EX

+#!/bin/sh                        # This is a Bourne shell script

+chans=\`soxi -c "$1"\`

+while [ $chans -ge 1 ]; do

+  chans0=\`printf %02i $chans\`   # 2 digits hence up to 99 chans

+  out=\`echo "$1"|sed "s/\\(.*\\)\\.\\(.*\\)/\\1-$chans0.\\2/"\`

+  sox "$1" "$out" remix $chans

+  chans=\`expr $chans - 1\`

+done

+.EE

+If a file

+.I input.au

+containing six audio channels were given, the script would produce six

+output files:

+.IR input-01.au ,

+\fIinput-02.au\fR, ...,

+.IR input-06.au .

+.SP

+See also

+.B mixer

+and

+.B swap

+for similar effects.

+.TP

+\fBrepeat \fIcount\fR

+Repeat the entire audio \fIcount\fR times.

+Requires temporary file space to store the audio to be repeated.

+Note that repeating once yields two copies: the original audio and the

+repeated audio.

+.TP

+\fBreverb\fR [\fB\-w\fR|\fB\-\-wet-only\fR] [\fIreverberance\fR (50%) [\fIHF-damping\fR (50%)

+[\fIroom-scale\fR (100%) [\fIstereo-depth\fR (100%)

+.br

+[\fIpre-delay\fR (0ms) [\fIwet-gain\fR (0dB)]]]]]]

+.SP

+Add reverberation to the audio using the `freeverb' algorithm.  A

+reverberation effect is sometimes desirable for concert halls that are too

+small or contain so many people that the hall's natural reverberance is

+diminished.  Applying a small amount of stereo reverb to a (dry) mono signal

+will usually make it sound more natural.  See [3] for a detailed description

+of reverberation.

+.SP

+Note that this effect

+increases both the volume and the length of the audio, so to prevent clipping

+in these domains, a typical invocation might be:

+.EX

+	play dry.au gain -3 pad 0 3 reverb

+.EE

+.TP

+\fBreverse\fR

+Reverse the audio completely.

+Requires temporary file space to store the audio to be reversed.

+.TP

+\fBriaa\fR

+Apply RIAA vinyl playback equalisation.

+The sampling rate must be one of: 44\*d1, 48, 88\*d2, 96 kHz.

+.SP

+This effect supports the \fB\-\-plot\fR global option.

+.TP

+\fBsilence \fR[\fB\-l\fR] \fIabove-periods\fR [\fIduration

+threshold\fR[\fBd\fR\^|\^\fB%\fR] [\fIbelow-periods duration

+threshold\fR[\fBd\fR\^|\^\fB%\fR]]

+.SP

+Removes silence from the beginning, middle, or end of the audio.

+Silence is anything below a specified threshold.

+.SP

+The \fIabove-periods\fR value is used to indicate if audio should be

+trimmed at the beginning of the audio. A value of zero indicates no

+silence should be trimmed from the beginning. When specifying an

+non-zero \fIabove-periods\fR, it trims audio up until it finds

+non-silence. Normally, when trimming silence from beginning of audio

+the \fIabove-periods\fR will be 1 but it can be increased to higher

+values to trim all audio up to a specific count of non-silence

+periods. For example, if you had an audio file with two songs that

+each contained 2 seconds of silence before the song, you could specify

+an \fIabove-period\fR of 2 to strip out both silence periods and the

+first song.

+.SP

+When \fIabove-periods\fR is non-zero, you must also specify a

+\fIduration\fR and \fIthreshold\fR. \fIDuration\fR indications the

+amount of time that non-silence must be detected before it stops

+trimming audio. By increasing the duration, burst of noise can be

+treated as silence and trimmed off.

+.SP

+\fIThreshold\fR is used to indicate what sample value you should treat as

+silence.  For digital audio, a value of 0 may be fine but for audio

+recorded from analog, you may wish to increase the value to account

+for background noise.

+.SP

+When optionally trimming silence from the end of the audio, you specify

+a \fIbelow-periods\fR count.  In this case, \fIbelow-period\fR means

+to remove all audio after silence is detected.

+Normally, this will be a value 1 of but it can

+be increased to skip over periods of silence that are wanted.  For example,

+if you have a song with 2 seconds of silence in the middle and 2 second

+at the end, you could set below-period to a value of 2 to skip over the

+silence in the middle of the audio.

+.SP

+For \fIbelow-periods\fR, \fIduration\fR specifies a period of silence

+that must exist before audio is not copied any more.  By specifying

+a higher duration, silence that is wanted can be left in the audio.

+For example, if you have a song with an expected 1 second of silence

+in the middle and 2 seconds of silence at the end, a duration of 2

+seconds could be used to skip over the middle silence.

+.SP

+Unfortunately, you must know the length of the silence at the

+end of your audio file to trim off silence reliably.  A work around is

+to use the \fBsilence\fR effect in combination with the \fBreverse\fR effect.

+By first reversing the audio, you can use the \fIabove-periods\fR

+to reliably trim all audio from what looks like the front of the file.

+Then reverse the file again to get back to normal.

+.SP

+To remove silence from the middle of a file, specify a

+\fIbelow-periods\fR that is negative.  This value is then

+treated as a positive value and is also used to indicate the

+effect should restart processing as specified by the

+\fIabove-periods\fR, making it suitable for removing periods of

+silence in the middle of the audio.

+.SP

+The option

+.B \-l

+indicates that \fIbelow-periods\fR \fIduration\fR length of audio

+should be left intact at the beginning of each period of silence.

+For example, if you want to remove long pauses between words

+but do not want to remove the pauses completely.

+.SP

+The \fIperiod\fR counts are in units of samples. \fIDuration\fR counts

+may be in the format of hh:mm:ss.frac, or the exact count of samples.

+\fIThreshold\fR numbers may be suffixed with

+.B d

+to indicate the value is in decibels, or

+.B %

+to indicate a percentage of maximum value of the sample value

+(\fB0%\fR specifies pure digital silence).

+.SP

+The following example shows how this effect can be used to start a recording

+that does not contain the delay at the start which usually occurs between

+`pressing the record button' and the start of the performance:

+.EX

+	rec \fIparameters filename other-effects\fR silence 1 5 2%

+.EE

+.TP

+\fBspeed \fIfactor\fR[\fBc\fR]

+Adjust the audio speed (pitch and tempo together).  \fIfactor\fR

+is either the ratio of the new speed to the old speed: greater

+than 1 speeds up, less than 1 slows down, or, if appended with the

+letter

+`c', the number of cents (i.e. 100ths of a semitone) by

+which the pitch (and tempo) should be adjusted: greater than 0

+increases, less than 0 decreases.

+.SP

+By default, the speed change is performed by resampling with the \fBrate\fR

+effect using its default quality/speed.  For higher quality or higher speed

+resampling, in addition to the \fBspeed\fR effect, specify

+the \fBrate\fR effect with the desired quality option.

+.TP

+\fBspectrogram \fR[options]

+Create a spectrogram of the audio.  This effect is optional; type \fBsox

+\-\-help\fR and check the list of supported effects to see if it has

+been included.

+.SP

+The spectrogram is rendered in a Portable Network Graphic (PNG) file,

+and shows time in the X-axis, frequency in the Y-axis, and audio signal

+magnitude in the Z-axis.  Z-axis values are represented by the colour

+(or intensity) of the pixels in the X-Y plane.

+.SP

+This effect supports only one channel; for multi-channel input files,

+use either SoX's

+.B \-c 1

+option with the output file (to obtain a spectrogram on the mix-down),

+or the

+.B remix

+.I n

+effect to select a particular channel.  Be aware though, that both of

+these methods affect the audio in the effects chain.

+.RS

+.IP \fB\-x\ \fInum\fR

+X-axis pixels/second, default 100.  This controls the width of the

+spectrogram;

+.I num

+can be from 1 (low time resolution) to 5000 (high time resolution)

+and need not be an integer.  SoX

+may make a slight adjustment to the given number for processing

+quantisation reasons; if so, SoX will report the actual number used

+(viewable when

+.B \-\-verbose

+is in effect).

+.SP

+The maximum width of the spectrogram is 999 pixels; if the audio length

+and the given

+.B \-x

+number are such that this would be exceeded, then the spectrogram (and

+the effects chain) will be truncated.  To move the spectrogram to a

+point later in the audio stream, first invoke the

+.B trim

+effect; e.g.

+.EX

+  sox audio.ogg -n trim 1:00 spectrogram

+.EE

+starts the spectrogram at 1 minute through the audio.

+.IP \fB\-y\ \fInum\fR

+Y-axis resolution (1 \- 4), default 2.

+This controls the height of the spectrogram;

+.I num

+can be from 1 (low frequency resolution) to 4 (high frequency

+resolution).  For values greater than 2, the resulting image may be too

+tall to display on the screen; if so, a graphic manipulation package

+(such as

+.BR ImageMagick (1))

+can be used to re-size the image.

+.SP

+To increase the frequency resolution without increasing the height of

+the spectrogram, the

+.B rate

+effect may be invoked to reduce the sampling rate of the signal before

+invoking

+.BR spectrogram ;

+e.g.

+.EX

+  sox audio.ogg -r 4k -n rate spectrogram

+.EE

+allows detailed analysis of frequencies up to 2kHz (half the sampling

+rate).

+.IP \fB\-z\ \fInum\fR

+Z-axis (colour) range in dB, default 120.  This sets the dynamic-range

+of the spectrogram to be \-\fInum\fR\ dBFS to 0\ dBFS.

+.I Num

+may range from 20 to 180.  Decreasing dynamic-range effectively

+increases the `contrast' of the spectrogram display, and vice versa.

+.IP \fB\-Z\ \fInum\fR

+Sets the upper limit of the Z-axis in dBFS.

+A negative

+.I num

+effectively increases the `brightness' of the spectrogram display,

+and vice versa.

+.IP \fB\-q\ \fInum\fR

+Sets the Z-axis quantisation, i.e. the number of different colours (or

+intensities) in which to render Z-axis

+values.  A small number (e.g. 4) will give a `poster'-like effect making

+it easier to discern magnitude bands of similar level.  Small numbers

+also usually

+result in small PNG files.  The number given specifies the number of

+colours to use inside the Z-axis range; two colours are reserved to

+represent out-of-range values.

+.IP \fB\-w\ \fIname\fR

+Window: Hann (default), Hamming, Bartlett, Rectangular or Kaiser.  The

+spectrogram is produced using the Discrete Fourier Transform (DFT)

+algorithm.  A significant parameter to this algorithm is the choice of

+`window function'.  By default, SoX uses the Hann window which has good

+all-round frequency-resolution and dynamic-range properties.  For better

+frequency resolution (but lower dynamic-range), select a Hamming window;

+for higher dynamic-range (but poorer frequency-resolution), select a

+Kaiser window.  Bartlett and Rectangular windows are also available.

+Selecting a window other than Hann will usually require

+a corresponding

+.B \-z

+setting.

+.IP \fB\-s\fR

+Allow slack overlapping of DFT windows.

+This can, in some cases, increase image sharpness and give greater adherence

+to the

+.B \-x

+value, but at the expense of a little spectral loss.

+.IP \fB\-m\fR

+Creates a monochrome spectrogram (the default is colour).

+.IP \fB\-h\fR

+Selects a high-colour palette\*mless visually pleasing than the default

+colour palette, but it may make it easier to differentiate different levels.

+If this option is used in conjunction with

+.BR \-m ,

+the result will be a hybrid monochrome/colour palette.

+.IP \fB\-p\ \fInum\fR

+Permute the colours in a colour or hybrid palette.

+The

+.I num

+parameter (from 1 to 6) selects the permutation.

+.IP \fB\-l\fR

+Creates a `printer friendly' spectrogram with a light background (the

+default has a dark background).

+.IP \fB\-a\fR

+Suppress the display of the axis lines.  This is sometimes useful in

+helping to discern artefacts at the spectrogram edges.

+.IP \fB\-t\ \fItext\fR

+Set the image title\*mtext to display above the spectrogram.

+.IP \fB\-c\ \fItext\fR

+Set the image comment\*mtext to display below and to the left of the

+spectrogram.

+.IP \fB\-o\ \fItext\fR

+Name of the spectrogram output PNG file, default `spectrogram.png'.

+.RE

+.TP

+\

+For example, let's see what the spectrogram of a swept triangular wave looks

+like:

+.EX

+	sox -n -n synth 6 tri 10k:14k spectrogram -z 100 -w k

+.EE

+For the ability to perform off-line processing of spectral data, see the

+.B stat

+effect.

+.TP

+\fBsplice \fR { \fIposition\fR[\fB,\fIexcess\fR[\fB,\fIleeway\fR]] }

+Splice together audio sections.  This effect provides two things over

+simple audio concatenation: a (usually short) cross-fade is applied at

+the join, and a wave similarity comparison is made to help determine the

+best place at which to make the join.

+.SP

+To perform a splice, first use the

+.B trim

+effect to select the audio sections to be joined together.  As when

+performing a tape splice, the end of the section to be spliced onto

+should be trimmed with a small

+.I excess

+(default 0\*d005 seconds) of audio after the ideal joining point.  The

+beginning of the audio section to splice on should be trimmed with the

+same

+.IR excess

+(before the ideal joining point), plus an additional

+.I leeway

+(default 0\*d005 seconds).  SoX should then be invoked with the two

+audio sections as input files and the

+.B splice

+effect given with the position at which to perform the splice\*mthis is

+length of the first audio section (including the excess).

+.SP

+For example, a long song begins with two verses which start (as

+determined e.g. by using the

+.B play

+command with the

+.B trim

+(\fIstart\fR) effect) at times 0:30\*d125 and 1:03\*d432.

+The following commands cut out the first verse:

+.EX

+	sox too-long.au part1.au trim 0 30.130

+.EE

+(5 ms excess, after the first verse starts)

+.EX

+	sox long.au part2.au trim 1:03.422

+.EE

+(5 ms excess plus 5 ms leeway, before the second verse starts)

+.EX

+	sox part1.au part2.au just-right.au splice 30.130

+.EE

+Provided your arithmetic is good enough, multiple splices can be

+performed with a single

+.B splice

+invocation.  For example:

+.EX

+#!/bin/sh

+# Audio Copy and Paste Over

+# acpo infile copy-start copy-stop paste-over-start outfile

+# All times measured in samples.

+rate=\`soxi -r "$1"\`

+e=\`expr $rate '*' 5 / 1000\`  # Using default excess

+l=$e                         # and leeway.

+sox "$1" piece.au trim \`expr $2 - $e - $l\`s \\

+	\`expr $3 - $2 + $e + $l + $e\`s

+sox "$1" part1.au trim 0 \`expr $4 + $e\`s

+sox "$1" part2.au trim \`expr $4 + $3 - $2 - $e - $l\`s

+sox part1.au piece.au part2.au "$5" splice \\

+	\`expr $4 + $e\`s \\

+	\`expr $4 + $e + $3 - $2 + $e + $l + $e\`s

+.EE

+In the above Bourne shell script,

+two splices are used to `copy and paste' audio.

+.TS

+center;

+c8 c8 c.

+*	*	*

+.TE

+.DT

+.SP

+It is also possible to use this effect to perform general cross-fades, e.g. to

+join two songs.

+In this case,

+.I excess

+would typically be an number of seconds, and

+.I leeway

+should be set to zero.

+.TP

+\fBstat\fR [\fB\-s \fIscale\fR] [\fB\-rms\fR] [\fB\-freq\fR] [\fB\-v\fR] [\fB\-d\fR]

+Display time and frequency domain statistical information about the audio.

+Audio is passed unmodified through the SoX processing chain.

+.SP

+The information is output to the `standard error' (stderr) stream and is

+calculated, where

+.I n

+is the duration of the audio in samples,

+.I c

+is the number of audio channels,

+.I r

+is the audio sample rate, and

+.I x\s-2\dk\u\s0

+represents the PCM value (in the range \-1 to +1 by default) of each successive

+sample in the audio,

+as follows:

+.TS

+center;

+lI l l.

+Samples read	\fIn\fR\^\(mu\^\fIc\fR	\

+Length (seconds)	\fIn\fR\^\(di\^\fIr\fR

+Scaled by	\ 	See \-s below.

+Maximum amplitude	max(\fIx\s-2\dk\u\s0\fR)	T{

+The maximum sample value in the audio; usually this will be a positive number.

+T}

+Minimum amplitude	min(\fIx\s-2\dk\u\s0\fR)	T{

+The minimum sample value in the audio; usually this will be a negative number.

+T}

+Midline amplitude	\(12\^min(\fIx\s-2\dk\u\s0\fR)\^+\^\(12\^max(\fIx\s-2\dk\u\s0\fR)

+Mean norm	\(S1/\s-2n\s+2\^\(*S\^\^\(br\^\fIx\s-2\dk\u\s0\fR\^\(br\^	T{

+The average of the absolute value of each sample in the audio.

+T}

+Mean amplitude	\(S1/\s-2n\s+2\^\(*S\^\fIx\s-2\dk\u\s0\fR	T{

+The average of each sample in the audio.  If this figure is non-zero, then it indicates the

+presence of a D.C. offset (which could be removed using the

+.B dcshift

+effect).

+T}

+RMS amplitude	\(sr(\(S1/\s-2n\s+2\^\(*S\^\fIx\s-2\dk\u\s0\fR\(S2)	T{

+The level of a D.C. signal that would have the same power

+as the audio's average power.

+T}

+Maximum delta	max(\^\(br\^\fIx\s-2\dk\u\s0\fR\^\-\^\fIx\s-2\dk\-1\u\s0\fR\^\(br\^)

+Minimum delta	min(\^\(br\^\fIx\s-2\dk\u\s0\fR\^\-\^\fIx\s-2\dk\-1\u\s0\fR\^\(br\^)

+Mean delta	\(S1/\s-2n\-1\s+2\^\(*S\^\^\(br\^\fIx\s-2\dk\u\s0\fR\^\-\^\fIx\s-2\dk\-1\u\s0\fR\^\(br\^

+RMS delta	\(sr(\(S1/\s-2n\-1\s+2\^\(*S\^(\fIx\s-2\dk\u\s0\fR\^\-\^\fIx\s-2\dk\-1\u\s0\fR)\(S2)

+Rough frequency	\ 	In Hz.

+Volume Adjustment	\ 	T{

+The parameter to the

+.B vol

+effect which would make the audio as loud as possible without clipping.

+Note: See the discussion on

+.B Clipping

+above for reasons why it is rarely a good idea actually to do this.

+T}

+.TE

+.DT

+.SP

+The

+.B \-s

+option can be used to scale the input data by a given factor.

+The default value of

+.I scale

+is 2147483647 (i.e. the maximum value of a 32-bit signed integer).

+Internal effects

+always work with signed long PCM data and so the value should relate to this

+fact.

+.SP

+The

+.B \-rms

+option will convert all output average values to `root mean square'

+format.

+.SP

+The

+.B \-v

+option displays only the `Volume Adjustment' value.

+.SP

+The

+.B \-freq

+option calculates the input's power spectrum (4096 point DFT) instead of the

+statistics listed above.

+.SP

+The

+.B \-d

+option

+displays a hex dump of the 32-bit signed PCM data

+audio in SoX's internal buffer.

+This is mainly used to help track down endian problems that

+sometimes occur in cross-platform versions of SoX.

+.TP

+\fBswap\fR [\fI1 2\fR | \fI1 2 3 4\fR]

+Swap channels in multi-channel audio files.  Optionally, you may

+specify the channel order you would like the output in.  This defaults

+to output channel 2 and then 1 for stereo and 2, 1, 4, 3 for quad-channels.

+An interesting

+feature is that you may duplicate a given channel by overwriting another.

+This is done by repeating an output channel on the command-line.  For example,

+.B swap 2 2

+will overwrite channel 1 with channel 2; creating a stereo

+file with both channels containing the same audio.

+.SP

+See also the

+.B remix

+effect.

+.TP

+\fBsynth\fR [\fIlen\fR] {[\fItype\fR] [\fIcombine\fR] [[\fB%\fR]\fIfreq\fR[\fBk\fR][\fB:\fR\^|\^\fB+\fR\^|\^\fB/\fR\^|\^\fB\-\fR[\fB%\fR]\fIfreq2\fR[\fBk\fR]]] [\fIoff\fR] [\fIph\fR] [\fIp1\fR] [\fIp2\fR] [\fIp3\fR]}

+This effect can be used to generate fixed or swept frequency audio tones

+with various wave shapes, or to generate wide-band noise of various

+`colours'.

+Multiple synth effects can be cascaded to produce more complex

+waveforms; at each stage it is possible to choose whether the generated

+waveform will be mixed with, or modulated onto

+the output from the previous stage.

+Audio for each channel in a multi-channel audio file can be synthesised

+independently.

+.SP

+Though this effect is used to generate audio, an input file must still

+be given, the characteristics of which will be used to set the

+synthesised audio length, the number of channels, and the sampling rate;

+however, since the input file's audio is not normally needed, a `null

+file' (with the special name \fB\-n\fR) is often given instead (and the

+length specified as a parameter to \fBsynth\fR or by another given

+effect that can has an associated length).

+.SP

+For example, the following produces a 3 second, 48kHz,

+audio file containing a sine-wave swept from 300 to 3300\ Hz:

+.EX

+	sox -n output.au synth 3 sine 300-3300

+.EE

+and this produces an 8\ kHz version:

+.EX

+	sox -r 8000 -n output.au synth 3 sine 300-3300

+.EE

+Multiple channels can be synthesised by specifying the set of

+parameters shown between braces multiple times;

+the following puts the swept tone in the left channel and adds `brown'

+noise in the right:

+.EX

+	sox -n output.au synth 3 sine 300-3300 brownnoise

+.EE

+The following example shows how two synth effects can be cascaded

+to create a more complex waveform:

+.EX

+	sox -n output.au synth 0\*d5 sine 200-500 \(rs

+		synth 0\*d5 sine fmod 700-100

+.EE

+Frequencies can also be given as a number of musical semitones relative

+to `middle A' (440\ Hz) by prefixing a `%' character;  for example, the

+following could be used to help tune a guitar's `E' strings:

+.EX

+	play -n synth sine %-17

+.EE

+.B N.B.

+This effect generates audio at maximum volume (0dBFS), which means that there

+is a high chance of clipping when using the audio subsequently, so

+in most cases, you will want to follow this effect with the \fBgain\fR

+effect to prevent this from happening. (See also

+.B Clipping

+above.)

+.SP

+A detailed description of each

+.B synth

+parameter follows:

+.SP

+\fIlen\fR is the length of audio to synthesise expressed as a time

+or as a number of samples;

+0=inputlength, default=0.

+.SP

+The format for specifying lengths in time is hh:mm:ss.frac.  The format

+for specifying sample counts is the number of samples with the letter

+`s' appended to it.

+.SP

+\fItype\fR is one of sine, square, triangle, sawtooth, trapezium, exp,

+[white]noise, pinknoise, brownnoise; default=sine

+.SP

+\fIcombine\fR is one of create, mix, amod (amplitude modulation), fmod

+(frequency modulation); default=create

+.SP

+\fIfreq\fR/\fIfreq2\fR are the frequencies at the beginning/end of

+synthesis in Hz or, if preceded with `%', semitones relative to A

+(440\ Hz); for both, default=%0.  If

+.I freq2

+is given, then

+.I len

+must also have been given and the generated tone will be swept between

+the given frequencies.  The two given frequencies must be separated by

+one of the characters `:', `+', `/', or `\-'.  This character is used to

+specify the sweep function as follows:

+.RS

+.IP \fB:\fR

+Linear: the tone will change by a fixed number of hertz per second.

+.IP \fB+\fR

+Square: a second-order function is used to change the tone.

+.IP \fB/\fR

+Exponential: the tone will change by a fixed number of semitones per second.

+.IP \fB\-\fR

+Exponential: as `/', but initial phase always zero, and stepped (less

+smooth) frequency changes.

+.RE

+.TP

+\

+Not used for noise.

+.SP

+\fIoff\fR is the bias (DC-offset) of the signal in percent; default=0.

+.SP

+\fIph\fR is the phase shift in percentage of 1 cycle; default=0.  Not

+used for noise.

+.SP

+\fIp1\fR is the percentage of each cycle that is `on' (square), or

+`rising' (triangle, exp, trapezium); default=50 (square, triangle, exp),

+default=10 (trapezium).

+.SP

+\fIp2\fR (trapezium): the percentage through each cycle at which `falling'

+begins; default=50. exp: the amplitude in percent; default=100.

+.SP

+\fIp3\fR (trapezium): the percentage through each cycle at which `falling'

+ends; default=60.

+.TP

+\fBtempo \fR[\fB\-q\fR] \fIfactor\fR [\fIsegment\fR [\fIsearch\fR [\fIoverlap\fR]]]

+Change the audio tempo (but not its pitch).

+The audio is chopped up into segments which are then shifted in the time

+domain and overlapped (cross-faded) at points where their waveforms are

+most similar (as determined by measurement of `least squares').

+.SP

+By default, linear searches are used to find the best overlapping

+points; if the optional

+.B \-q

+parameter is given, tree searches are used instead, giving a quicker,

+but possibly lower quality, result.

+.SP

+.I factor

+gives the ratio of new tempo to the old tempo, so e.g. 1.1 speeds up the

+tempo by 10%, and 0.9 slows it down by 10%.

+.SP

+The optional

+.I segment

+parameter selects the algorithm's segment size in milliseconds.  The

+default value is 82 and is typically suited to making small changes to

+the tempo of music; for larger changes (e.g. a factor of 2), 50\ ms may

+give a better result.  When changing the tempo of speech, a segment size

+of around 30\ ms often works well.

+.SP

+The optional

+.I search

+parameter gives the audio length in milliseconds (default 14) over which

+the algorithm will search for overlapping points.  Larger values use

+more processing time and do not necessarily produce better results.

+.SP

+The optional

+.I overlap

+parameter gives the segment overlap length in milliseconds (default 12).

+.SP

+See also

+.B speed

+for an effect that changes tempo and pitch together, and

+.B pitch

+for an effect that changes pitch without changing tempo.

+.TP

+\fBtreble \fIgain\fR [\fIfrequency\fR[\fBk\fR]\fR [\fIwidth\fR[\fBs\fR\^|\^\fBh\fR\^|\^\fBk\fR\^|\^\fBo\fR\^|\^\fBq\fR]]]

+Apply a treble tone-control effect.

+See the description of the \fBbass\fR effect for details.

+.TP

+\fBtremolo \fIspeed\fR [\fIdepth\fR]

+Apply a tremolo (low frequency amplitude modulation) effect to the audio.

+The tremolo frequency in Hz is given by

+.IR speed ,

+and the depth as a percentage by

+.I depth

+(default 40).

+.SP

+Note: This effect is a special case of the

+.B synth

+effect.

+.TP

+\fBtrim \fIstart\fR [\fIlength\fR]

+Trim can trim off unwanted audio from the beginning and end of the

+audio.  Audio is not sent to the output stream until

+the \fIstart\fR location is reached.

+.SP

+The optional \fIlength\fR parameter tells the number of samples to output

+after the \fIstart\fR sample and is used to trim off the back side of the

+audio.  Using a value of 0 for the \fIstart\fR parameter will allow

+trimming off the back side only.

+.SP

+Both options can be specified using either an amount of time or an

+exact count of samples.  The format for specifying lengths in time is

+hh:mm:ss.frac.  A start value of 1:30\*d5 will not start until 1 minute,

+thirty and \(12 seconds into the audio.  The format for specifying

+sample counts is the number of samples with the letter `s' appended to

+it.  A value of 8000s will wait until 8000 samples are read before

+starting to process audio.

+.TP

+\fBvol \fIgain\fR [\fItype\fR [\fIlimitergain\fR]]

+Apply an amplification or an attenuation to the audio signal.

+Unlike the

+.B \-v

+option (which is used for balancing multiple input files as they enter the

+SoX effects processing chain),

+.B vol

+is an effect like any other so can be applied anywhere, and several times

+if necessary, during the processing chain.

+.SP

+The amount to change the volume is given by

+.I gain

+which is interpreted, according to the given \fItype\fR, as follows: if

+.I type

+is \fBamplitude\fR (or is omitted), then

+.I gain

+is an amplitude (i.e. voltage or linear) ratio,

+if \fBpower\fR, then a power (i.e. wattage or voltage-squared) ratio,

+and if \fBdB\fR, then a power change in dB.

+.SP

+When

+.I type

+is \fBamplitude\fR or \fBpower\fR, a

+.I gain

+of 1 leaves the volume unchanged,

+less than 1 decreases it,

+and greater than 1 increases it;

+a negative

+.I gain

+inverts the audio signal in addition to adjusting its volume.

+.SP

+When

+.I type

+is \fBdB\fR, a

+.I gain

+of 0 leaves the volume unchanged,

+less than 0 decreases it,

+and greater than 0 increases it.

+.SP

+See [4]

+for a detailed discussion on electrical (and hence audio signal)

+voltage and power ratios.

+.SP

+Beware of

+.B Clipping

+when the increasing the volume.

+.SP

+The

+.I gain

+and the

+.I type

+parameters can be concatenated if desired, e.g.

+.BR "vol 10dB" .

+.SP

+An optional \fIlimitergain\fR value can be specified and should be a

+value much less

+than 1 (e.g. 0\*d05 or 0\*d02) and is used only on peaks to prevent clipping.

+Not specifying this parameter will cause no limiter to be used.  In verbose

+mode, this effect will display the percentage of the audio that needed to be

+limited.

+.SP

+See also

+.B compand

+for a dynamic-range compression/expansion/limiting effect.

+.SS Deprecated Effects

+The following effects have been renamed or have their functionality

+included in another effect; they continue to work in this version of

+SoX but may be removed in future.

+.TP

+\fBkey \fR[\fB\-q\fR] \fIshift\fR [\fIsegment\fR [\fIsearch\fR [\fIoverlap\fR]]]

+Change the audio key (i.e. pitch but not tempo).

+This is just an alias for the

+.B pitch

+effect.

+.TP

+\fBpan \fIdirection\fR

+Pan the audio from one channel to another.  This is done by

+changing the volume of the input channels so that it fades out on one

+channel and fades-in on another.  If the number of input channels is

+different then the number of output channels then this effect tries to

+intelligently handle this.  For instance, if the input contains 1 channel

+and the output contains 2 channels, then it will create the missing channel

+itself.  The

+.I direction

+is a value from \-1 to 1.  \-1 represents

+far left and 1 represents far right.  Numbers in between will start the

+pan effect without totally muting the opposite channel.

+.TP

+\fBpolyphase\fR [\fB\-w nut\fR\^|\^\fBham\fR] [\fB\-width \fIn\fR] [\fB\-cut-off \fIc\fR]

+Change the sampling rate using `polyphase interpolation', a DSP algorithm.

+\fBpolyphase\fR copes with only certain rational fraction resampling ratios,

+and, compared with the \fBrate\fR effect, is generally slow, memory intensive,

+and has poorer stop-band rejection.

+.SP

+If the \fB\-w\fR parameter is \fBnut\fR, then a Nuttall (~90 dB

+stop-band) window will be used; \fBham\fR selects a Hamming (~43

+dB stop-band) window.  The default is Nuttall.

+.SP

+The \fB\-width\fR parameter specifies the (approximate) width of the filter. The default is 1024 samples, which produces reasonable results.

+.SP

+The \fB\-cut-off\fR value (\fIc\fR) specifies the filter cut-off frequency in terms of fraction of

+frequency bandwidth, also know as the Nyquist frequency.  See

+the \fBresample\fR effect for

+further information on Nyquist frequency.  If up-sampling, then this is the

+fraction of the original signal

+that should go through.  If down-sampling, this is the fraction of the

+signal left after down-sampling.  The default is 0\*d95.

+.SP

+See also

+.BR rate ,

+.B rabbit

+and

+.B resample

+for other sample-rate changing effects.

+.TP

+\fBrabbit\fR [\fB\-c0\fR\^|\^\fB\-c1\fR\^|\^\fB\-c2\fR\^|\^\fB\-c3\fR\^|\^\fB\-c4\fR]

+Change the sampling rate using libsamplerate, also known as `Secret Rabbit

+Code'.  This effect is optional and, due to licence issues,

+is not included in all versions of SoX.

+Compared with the \fBrate\fR effect, \fBrabbit\fR is very slow.

+.SP

+See http://www.mega-nerd.com/SRC for details of the algorithms.  Algorithms

+0 through 2 are progressively faster and lower quality versions of the

+sinc algorithm; the default is \fB\-c0\fR.

+Algorithm 3 is zero-order hold, and 4 is linear interpolation.

+.SP

+See also

+.BR rate ,

+.B polyphase

+and

+.B resample

+for other sample-rate changing effects, and see

+\fBresample\fR for more discussion of resampling.

+.TP

+\fBresample\fR [\fB\-qs\fR\^|\^\fB\-q\fR\^|\^\fB\-ql\fR] [\fIrolloff\fR [\fIbeta\fR]]

+Change the sampling rate using simulated analog filtration.

+Compared with the \fBrate\fR effect, \fBresample\fR is slow, and has poorer

+stop-band rejection.

+Only the low quality option works with all resampling ratios.

+.SP

+By default, linear interpolation of the filter coefficients is used,

+with a window width about 45 samples at the lower of the two rates.

+This gives an accuracy of about 16 bits, but insufficient stop-band rejection

+in the case that you want to have roll-off greater than about 0\*d8 of

+the Nyquist frequency.

+.SP

+The \fB\-q*\fR options will change the default values for roll-off and beta

+as well as use quadratic interpolation of filter

+coefficients, resulting in about 24 bits precision.

+The \fB\-qs\fR, \fB\-q\fR, or \fB\-ql\fR options specify increased accuracy

+at the cost of lower execution speed.  It is optional to specify

+roll-off and beta parameters when using the \fB\-q*\fR options.

+.SP

+Following is a table of the reasonable defaults which are built-in to

+SoX:

+.SP

+.TS

+center box;

+cB cB cB cB cB

+c c n c c

+cB c n c c.

+Option	Window	Roll-off	Beta	Interpolation

+(none)	45	0\*d80	16	linear

+\-qs	45	0\*d80	16	quadratic

+\-q	75	0\*d875	16	quadratic

+\-ql	149	0\*d94	16	quadratic

+.TE

+.DT

+.SP

+\fB\-qs\fR, \fB\-q\fR, or \fB\-ql\fR use window lengths of 45, 75, or 149

+samples, respectively, at the lower sample-rate of the two files.

+This means progressively sharper stop-band rejection, at proportionally

+slower execution times.

+.SP

+\fIrolloff\fR refers to the cut-off frequency of the

+low pass filter and is given in terms of the

+Nyquist frequency for the lower sample rate.  rolloff therefore should

+be something between 0 and 1, in practise 0\*d8\-0\*d95.  The defaults are

+indicated above.

+.SP

+The \fINyquist frequency\fR is equal to half the sample rate.  Logically,

+this is because the A/D converter needs at least 2 samples to detect 1

+cycle at the Nyquist frequency.  Frequencies higher then the Nyquist

+will actually appear as lower frequencies to the A/D converter and

+is called aliasing.  Normally, A/D converts run the signal through

+a lowpass filter first to avoid these problems.

+.SP

+Similar problems will happen in software when reducing the sample rate of

+an audio file (frequencies above the new Nyquist frequency can be aliased

+to lower frequencies).  Therefore, a good resample effect

+will remove all frequency information above the new Nyquist frequency.

+.SP

+The \fIrolloff\fR refers to how close to the Nyquist frequency this cut-off

+is, with closer being better.  When increasing the sample rate of an

+audio file you would not expect to have any frequencies exist that are

+past the original Nyquist frequency.  Because of resampling properties, it

+is common to have aliasing artifacts created above the old

+Nyquist frequency.  In that case the \fIrolloff\fR refers to how close

+to the original Nyquist frequency to use a highpass filter to remove

+these artifacts, with closer also being better.

+.SP

+The \fIbeta\fR, if unspecified, defaults to 16.  This selects a Kaiser window.

+You can select a Nuttall window by specifying anything \(<= 2 here.

+For more discussion of beta, look under the \fBfilter\fR effect.

+.SP

+Default parameters are, as indicated above, Kaiser window of length 45,

+roll-off 0\*d80, beta 16, linear interpolation.

+.SP

+Note: \fB\-qs\fR is only slightly slower, but more accurate for

+16-bit or higher precision.

+.SP

+See also

+.BR rate ,

+.B polyphase

+and

+.B rabbit

+for other sample-rate changing effects.

+There is a detailed analysis of

+\fBresample\fR and \fBpolyphase\fR at

+http://leute.server.de/wilde/resample.html; see \fBrabbit\fR for a

+pointer to its own documentation.

 .SH DIAGNOSTICS

 Exit status is 0 for no error, 1 if there is a problem with the

 command-line parameters, or 2 if an error occurs during file processing.

@@ -928,15 +3038,52 @@

 .SH SEE ALSO

 .BR soxi (1),

 .BR soxformat (7),

-.BR soxeffect (7),

+.BR libsox (3)

+.br

+.BR ImageMagick (1),

 .BR gnuplot (1),

 .BR octave (1),

-.BR wget (1),

-.BR libsox (3)

-.SP

+.BR wget (1)

+.br

 The SoX web site at http://sox.sourceforge.net

.br

 SoX scripting examples at http://sox.sourceforge.net/Docs/Scripts

+.SS References

+.TP

+[1]

+R. Bristow-Johnson,

+.IR "Cookbook formulae for audio EQ biquad filter coefficients" ,

+http://musicdsp.org/files/Audio-EQ-Cookbook.txt

+.TP

+[2]

+Wikipedia,

+.IR "Q-factor" ,

+http://en.wikipedia.org/wiki/Q_factor

+.TP

+[3]

+Scott Lehman,

+.IR "Effects Explained" ,

+http://harmony-central.com/Effects/effects-explained.html

+.TP

+[4]

+Wikipedia,

+.IR "Decibel" ,

+http://en.wikipedia.org/wiki/Decibel

+.TP

+[5]

+Richard Furse,

+.IR "Linux Audio Developer's Simple Plugin API" ,

+http://www.ladspa.org

+.TP

+[6]

+Richard Furse,

+.IR "Computer Music Toolkit" ,

+http://www.ladspa.org/cmt

+.TP

+[7]

+Steve Harris,

+.IR "LADSPA plugins" ,

+http://plugin.org.uk

 .SH LICENSE

 Copyright 1991 Lance Norskog and Sundry Contributors.

.br

--- a/soxeffect.7

+++ /dev/null

@@ -1,2201 +1,0 @@

-'\" t

-'\" The line above instructs most `man' programs to invoke tbl

-'\"

-'\" Separate paragraphs; not the same as PP which resets indent level.

-.de SP

-.if t .sp .5

-.if n .sp

-..

-'\"

-'\" Replacement em-dash for nroff (default is too short).

-.ie n .ds m " -

-.el .ds m \(em

-'\"

-'\" Placeholder macro for if longer nroff arrow is needed.

-.ds RA \(->

-'\"

-'\" Decimal point set slightly raised

-.if t .ds d \v'-.15m'.\v'+.15m'

-.if n .ds d .

-'\"

-'\" Enclosure macro for examples

-.de EX

-.SP

-.nf

-.ft CW

-..

-.de EE

-.ft R

-.SP

-.fi

-..

-.TH SoX 7 "September 16, 2008" "soxeffect" "Sound eXchange"

-.SH NAME

-SoX \- Sound eXchange, the Swiss Army knife of audio manipulation

-.SH DESCRIPTION

-This manual describes SoX audio effects; the SoX manual set starts with

-.BR sox (1).

-.SP

-In addition to converting and playing audio files, SoX can be used to

-invoke a number of audio `effects'.  Multiple effects may be applied

-by specifying them one after another at the end of the SoX command line.

-Note that applying multiple effects in real-time (i.e. when playing audio)

-is likely to need a high performance computer; stopping other applications

-may alleviate performance issues should they occur.

-.SP

-Some of the SoX effects are primarily intended to be applied to a single

-instrument or `voice'.  To facilitate this, the \fBremix\fR effect and

-the global SoX option \fB\-M\fR can be used to isolate then recombine

-tracks from a multi-track recording.

-.SP

-In the descriptions that follow,

-brackets [ ] are used to denote parameters that are optional, braces

-{ } to denote those that are both optional and repeatable,

-and angle brackets < > to denote those that are repeatable but not

-optional.

-Where applicable, default values for optional parameters are shown in parenthesis ( ).

-.SP

-The following parameters are used with, and have the same meaning for,

-several effects:

-.TP

-\fIcentre\fR[\fBk\fR]

-See

-.IR frequency .

-.TP

-\fIfrequency\fR[\fBk\fR]

-A frequency in Hz, or, if appended with `k', kHz.

-.TP

-\fIgain\fR

-A power gain in dB.

-Zero gives no gain; less than zero gives an attenuation.

-.TP

-\fIwidth\fR[\fBh\fR\^|\^\fBk\fR\^|\^\fBo\fR\^|\^\fBq\fR]

-Used to specify the band-width of a filter.  A number of different

-methods to specify the width are available (though not all for every effect);

-one of the characters shown may be appended to select the desired method

-as follows:

-.TS

-center box;

-cI cI lI

-cB c l.

-\ 	Method	Notes

-h	Hz	\

-k	kHz	\

-o	Octaves	\

-q	Q-factor	See [2]

-.TE

-.DT

-.SP

-For each effect that uses this parameter, the default method (i.e. if no

-character is appended) is the one that it listed first in the effect's

-first line of description.

-.PP

-To see if SoX has support for an optional effect, enter

-.B sox \-h

-and look for its name under the list: `EFFECTS'.

-.SS SOX EFFECTS

-.TP

-\fBallpass\fR \fIfrequency\fR[\fBk\fR]\fI width\fR[\fBh\fR\^|\^\fBk\fR\^|\^\fBo\fR\^|\^\fBq\fR]

-Apply a two-pole all-pass filter with central frequency (in Hz)

-\fIfrequency\fR, and filter-width \fIwidth\fR.

-An all-pass filter changes the

-audio's frequency to phase relationship without changing its frequency

-to amplitude relationship.  The filter is described in detail in [1].

-.SP

-This effect supports the \fB\-\-plot\fR global option.

-.TP

-\fBband\fR [\fB\-n\fR] \fIcenter\fR[\fBk\fR]\fR [\fIwidth\fR[\fBh\fR\^|\^\fBk\fR\^|\^\fBo\fR\^|\^\fBq\fR]]

-Apply a band-pass filter.

-The frequency response drops logarithmically

-around the

-.I center

-frequency.

-The

-.I width

-parameter gives the slope of the drop.

-The frequencies at

-.I center

-+

-.I width

-and

-.I center

-\-

-.I width

-will be half of their original amplitudes.

-.B band

-defaults to a mode oriented to pitched audio,

-i.e. voice, singing, or instrumental music.

-The \fB\-n\fR (for noise) option uses the alternate mode

-for un-pitched audio (e.g. percussion).

-.B Warning:

-\fB\-n\fR introduces a power-gain of about 11dB in the filter, so beware

-of output clipping.

-.B band

-introduces noise in the shape of the filter,

-i.e. peaking at the

-.I center

-frequency and settling around it.

-.SP

-This effect supports the \fB\-\-plot\fR global option.

-.SP

-See also \fBfilter\fR for a bandpass filter with steeper shoulders.

-.TP

-\fBbandpass\fR\^|\^\fBbandreject\fR [\fB\-c\fR] \fIfrequency\fR[\fBk\fR]\fI width\fR[\fBh\fR\^|\^\fBk\fR\^|\^\fBo\fR\^|\^\fBq\fR]

-Apply a two-pole Butterworth band-pass or band-reject filter with

-central frequency \fIfrequency\fR, and (3dB-point) band-width

-\fIwidth\fR.  The

-.B \-c

-option applies only to

-.B bandpass

-and selects a constant skirt gain (peak gain = Q) instead of the

-default: constant 0dB peak gain.

-The filters roll off at 6dB per octave (20dB per decade)

-and are described in detail in [1].

-.SP

-These effects support the \fB\-\-plot\fR global option.

-.SP

-See also \fBfilter\fR for a bandpass filter with steeper shoulders.

-.TP

-\fBbandreject \fIfrequency\fR[\fBk\fR]\fI width\fR[\fBh\fR\^|\^\fBk\fR\^|\^\fBo\fR\^|\^\fBq\fR]

-Apply a band-reject filter.

-See the description of the \fBbandpass\fR effect for details.

-.TP

-\fBbass\fR\^|\^\fBtreble \fIgain\fR [\fIfrequency\fR[\fBk\fR]\fR [\fIwidth\fR[\fBs\fR\^|\^\fBh\fR\^|\^\fBk\fR\^|\^\fBo\fR\^|\^\fBq\fR]]]

-Boost or cut the bass (lower) or treble (upper) frequencies of the audio

-using a two-pole shelving filter with a response similar to that

-of a standard hi-fi's tone-controls.  This is also

-known as shelving equalisation (EQ).

-.SP

-\fIgain\fR gives the gain at 0\ Hz (for \fBbass\fR), or whichever is

-the lower of \(ap22\ kHz and the Nyquist frequency (for \fBtreble\fR).  Its

-useful range is about \-20 (for a large cut) to +20 (for a large

-boost).

-Beware of

-.B Clipping

-when using a positive \fIgain\fR.

-.SP

-If desired, the filter can be fine-tuned using the following

-optional parameters:

-.SP

-\fIfrequency\fR sets the filter's central frequency and so can be

-used to extend or reduce the frequency range to be boosted or

-cut.  The default value is 100\ Hz (for \fBbass\fR) or 3\ kHz (for

-\fBtreble\fR).

-.SP

-\fIwidth\fR

-determines how

-steep is the filter's shelf transition.  In addition to the common

-width specification methods described above,

-`slope' (the default, or if appended with `\fBs\fR') may be used.

-The useful range of `slope' is

-about 0\*d3, for a gentle slope, to 1 (the maximum), for a steep slope; the

-default value is 0\*d5.

-.SP

-The filters are described in detail in [1].

-.SP

-These effects support the \fB\-\-plot\fR global option.

-.SP

-See also \fBequalizer\fR for a peaking equalisation effect.

-.TP

-\fBchorus \fIgain-in gain-out\fR <\fIdelay decay speed depth \fB\-s\fR\^|\^\fB\-t\fR>

-Add a chorus effect to the audio.  This can make a single vocal sound

-like a chorus, but can also be applied to instrumentation.

-.SP

-Chorus resembles an echo effect with a short delay, but

-whereas with echo the delay is constant, with chorus, it

-is varied using sinusoidal or triangular modulation.  The modulation

-depth defines the range the modulated delay is played before or after the

-delay. Hence the delayed sound will sound slower or faster, that is the delayed

-sound tuned around the original one, like in a chorus where some vocals are

-slightly off key.

-See [3] for more discussion of the chorus effect.

-.SP

-Each four-tuple parameter

-delay/decay/speed/depth gives the delay in milliseconds

-and the decay (relative to gain-in) with a modulation

-speed in Hz using depth in milliseconds.

-The modulation is either sinusoidal (\fB\-s\fR) or triangular

-(\fB\-t\fR).  Gain-out is the volume of the output.

-.SP

-A typical delay is around 40ms to 60ms; the modulation speed is best

-near 0\*d25Hz and the modulation depth around 2ms.

-For example, a single delay:

-.EX

-	play guitar1.wav chorus 0.7 0.9 55 0.4 0.25 2 \-t

-.EE

-Two delays of the original samples:

-.EX

-	play guitar1.wav chorus 0.6 0.9 50 0.4 0.25 2 \-t \\

-		 60 0.32 0.4 1.3 \-s

-.EE

-A fuller sounding chorus (with three additional delays):

-.EX

-	play guitar1.wav chorus 0.5 0.9 50 0.4 0.25 2 \-t \\

-		 60 0.32 0.4 2.3 \-t 40 0.3 0.3 1.3 \-s

-.EE

-.TP

-\fBcompand \fIattack1\fB,\fIdecay1\fR{\fB,\fIattack2\fB,\fIdecay2\fR}

-[\fIsoft-knee-dB\fB:\fR]\fIin-dB1\fR[\fB,\fIout-dB1\fR]{\fB,\fIin-dB2\fB,\fIout-dB2\fR}

-.br

-[\fIgain\fR [\fIinitial-volume-dB\fR [\fIdelay\fR]]]

-.SP

-Compand (compress or expand) the dynamic range of the audio.

-.SP

-The

-.I attack

-and

-.I decay

-parameters (in seconds) determine the time over which the

-instantaneous level of the input signal is averaged to determine its

-volume; attacks refer to increases in volume and decays refer to

-decreases.

-For most situations, the attack time (response to the music getting

-louder) should be shorter than the decay time because the human ear is more

-sensitive to sudden loud music than sudden soft music.

-Where more than one pair of attack/decay parameters are

-specified, each input channel is companded separately and the number of

-pairs must agree with the number of input channels.

-Typical values are

-.B 0\*d3,0\*d8

-seconds.

-.SP

-The second parameter is a list of points on the compander's transfer

-function specified in dB relative to the maximum possible signal

-amplitude.  The input values must be in a strictly increasing order but

-the transfer function does not have to be monotonically rising.  If

-omitted, the value of

-.I out-dB1

-defaults to the same value as

-.IR in-dB1 ;

-levels below

-.I in-dB1

-are not companded (but may have gain applied to them).

-The point \fB0,0\fR is assumed but may be overridden (by

-\fB0,\fIout-dBn\fR).

-If the list is preceded by a

-.I soft-knee-dB

-value, then the points at where adjacent line segments on the

-transfer function meet will be rounded by the amount given.

-Typical values for the transfer function are

-.BR 6:\-70,\-60,\-20 .

-.SP

-The third (optional) parameter is an additional gain in dB to be applied

-at all points on the transfer function and allows easy adjustment

-of the overall gain.

-.SP

-The fourth (optional) parameter is an initial level to be assumed for

-each channel when companding starts.  This permits the user to supply a

-nominal level initially, so that, for example, a very large gain is not

-applied to initial signal levels before the companding action has begun

-to operate: it is quite probable that in such an event, the output would

-be severely clipped while the compander gain properly adjusts itself.

-A typical value (for audio which is initially quiet) is

-.B \-90

-dB.

-.SP

-The fifth (optional) parameter is a delay in seconds.  The input signal

-is analysed immediately to control the compander, but it is delayed

-before being fed to the volume adjuster.  Specifying a delay

-approximately equal to the attack/decay times allows the compander to

-effectively operate in a `predictive' rather than a reactive mode.

-A typical value is

-.B 0\*d2

-seconds.

-.SP

-This effect supports the \fB\-\-plot\fR global option (for the transfer function).

-.SP

-The following example might be used to make a piece of music with both

-quiet and loud passages suitable for listening to in a noisy environment

-such as a moving vehicle:

-.EX

-	sox asz.au asz-car.au compand 0.3,1 6:-70,-60,-20 -5 -90 0.2

-.EE

-The transfer function (`6:\-70,...') says that very soft sounds (below

-\-70dB) will remain unchanged.  This will stop the compander from

-boosting the volume on `silent' passages such as between movements.

-However, sounds in the range \-60dB to 0dB (maximum

-volume) will be boosted so that the 60dB dynamic range of the

-original music will be compressed 3-to-1 into a 20dB range, which is

-wide enough to enjoy the music but narrow enough to get around the

-road noise.  The `6:' selects 6dB soft-knee companding.

-The \-5 (dB) output gain is needed to avoid clipping (the number is

-inexact, and was derived by experimentation).

-The \-90 (dB) for the initial volume will work fine for a clip that starts

-with near silence, and the delay of 0\*d2 (seconds) has the effect of causing

-the compander to react a bit more quickly to sudden volume changes.

-.SP

-See also

-.B mcompand

-for a multiple-band companding effect.

-.TP

-\fBcontrast [\fIenhancement-amount (75)\fR]

-Comparable with compression, this effect modifies an audio signal to

-make it sound louder.

-.I enhancement-amount

-controls the amount of the enhancement and is a number in the range 0\-100.

-Note that

-.I enhancement-amount

-= 0 still gives a significant contrast enhancement.

-.B contrast

-is often used in conjunction with the

-.B norm

-effect as follows:

-.EX

-	sox infile outfile norm -i contrast

-.EE

-.TP

-\fBdcshift \fIshift\fR [\fIlimitergain\fR]

-DC Shift the audio, with basic linear amplitude formula.

-This is most useful if your audio tends to not be centered around

-a value of 0.  Shifting it back will allow you to get the most volume

-adjustments without clipping.

-.SP

-The first option is the \fIdcshift\fR value.  It is a floating point number that

-indicates the amount to shift.

-.SP

-An optional

-.I limitergain

-can be specified as well.  It should have a value much less than 1

-(e.g. 0\*d05 or 0\*d02) and is used only on peaks to prevent clipping.

-.SP

-An alternative approach to removing a DC offset (albeit with a short delay)

-is to use the

-.B highpass

-filter effect at a frequency of say 10Hz, as illustrated in the following

-example:

-.EX

-	sox -n out.au synth 5 sin %0 50 highpass 10

-.EE

-.TP

-\fBdeemph\fR

-Apply ISO 908 de-emphasis (a treble attenuation shelving filter) to

-44\*d1kHz (Compact Disc) audio.

-.SP

-Pre-emphasis was applied in the mastering of some CDs issued in the early

-1980s.  These included many classical music albums, as well as now

-sought-after issues of albums by The Beatles, Pink Floyd and others.

-Pre-emphasis should be removed at playback time by a de-emphasis

-filter in the playback device.  However, not all modern CD players have

-this filter, and very few PC CD drives have it; playing pre-emphasised

-audio without the correct de-emphasis filter results in audio that sounds harsh

-and is far from what its creators intended.

-.SP

-With the

-.B deemph

-effect, it is possible to apply the necessary de-emphasis to audio that

-has been extracted from a pre-emphasised CD, and then either burn the

-de-emphasised audio to a new CD (which will then play correctly on any

-CD player), or simply play the correctly de-emphasised audio files on the

-PC.  For example:

-.EX

-	sox track1.wav track1-deemph.wav deemph

-.EE

-and then burn track1-deemph.wav to CD, or

-.EX

-	play track1-deemph.wav

-.EE

-or simply

-.EX

-	play track1.wav deemph

-.EE

-The de-emphasis filter is implemented as a biquad; its maximum deviation

-from the ideal response is only 0\*d06dB (up to 20kHz).

-.SP

-This effect supports the \fB\-\-plot\fR global option.

-.SP

-See also the \fBbass\fR and \fBtreble\fR shelving equalisation effects.

-.TP

-\fBdelay\fR {\fIlength\fR}

-Delay one or more audio channels.

-.I length

-can specify a time or, if appended with an `s', a number of samples.

-For example,

-.B delay 1\*d5 0 0\*d5

-delays the first channel by 1\*d5 seconds, the third channel by 0\*d5

-seconds, and leaves the second channel (and any other channels that may be

-present) un-delayed.

-The following (one long) command plays a chime sound:

-.EX

-	play -n synth sin %-21.5 sin %-14.5 sin %-9.5 sin %-5.5 \\

-	  sin %-2.5 sin %2.5 gain -5.4 fade h 0.008 2 1.5 \\

-	  delay 0 .27 .54 .76 1.01 1.3 remix - fade h 0.1 2.72 2.5

-.EE

-.TP

-\fBdither\fR [\fIdepth\fR]

-Apply dithering to the audio.

-Dithering deliberately adds digital white noise to the signal

-in order to mask audible quantization effects that

-can occur if the output sample size is less than 24 bits.

-By default, the amount of noise added is \(12 bit;

-the optional \fIdepth\fR parameter is a (linear or voltage)

-multiplier of this amount.

-.SP

-This effect should not be followed by any other effect that

-affects the audio.

-.TP

-\fBearwax\fR

-Makes audio easier to listen to on headphones.

-Adds `cues' to 44\*d1kHz stereo (i.e. audio CD format) audio so that

-when listened to on headphones the stereo image is

-moved from inside

-your head (standard for headphones) to outside and in front of the

-listener (standard for speakers).  See

-http://www.geocities.com/beinges

-for a full explanation.

-.TP

-\fBecho \fIgain-in gain-out\fR <\fIdelay decay\fR>

-Add echoing to the audio.

-Echoes are reflected sound and can occur naturally amongst mountains

-(and sometimes large buildings) when talking or shouting; digital echo

-effects emulate this behaviour and are often used to help fill

-out the sound of a single instrument or vocal.  The time difference

-between the original signal and the reflection is the `delay' (time),

-and the loudness of the relected signal is the `decay'.  Multiple echoes

-can have different delays and decays.

-.SP

-Each given

-.I "delay decay"

-pair gives the delay in milliseconds

-and the decay (relative to gain-in) of that echo.

-Gain-out is the volume of the output.

-For example:

-This will make it sound as if there are twice as many instruments as are

-actually playing:

-.EX

-	play lead.aiff echo 0.8 0.88 60 0.4

-.EE

-If the delay is very short, then it sound like a (metallic) robot playing

-music:

-.EX

-	play lead.aiff echo 0.8 0.88 6 0.4

-.EE

-A longer delay will sound like an open air concert in the mountains:

-.EX

-	play lead.aiff echo 0.8 0.9 1000 0.3

-.EE

-One mountain more, and:

-.EX

-	play lead.aiff echo 0.8 0.9 1000 0.3 1800 0.25

-.EE

-.TP

-\fBechos \fIgain-in gain-out\fR <\fIdelay decay\fR>

-Add a sequence of echoes to the audio.

-Each

-.I "delay decay"

-pair gives the delay in milliseconds

-and the decay (relative to gain-in) of that echo.

-Gain-out is the volume of the output.

-.SP

-Like the echo effect, echos stand for `ECHO in Sequel', that is the first echos

-takes the input, the second the input and the first echos, the third the input

-and the first and the second echos, ... and so on.

-Care should be taken using many echos; a single echos

-has the same effect as a single echo.

-.SP

-The sample will be bounced twice in symmetric echos:

-.EX

-	play lead.aiff echos 0.8 0.7 700 0.25 700 0.3

-.EE

-The sample will be bounced twice in asymmetric echos:

-.EX

-	play lead.aiff echos 0.8 0.7 700 0.25 900 0.3

-.EE

-The sample will sound as if played in a garage:

-.EX

-	play lead.aiff echos 0.8 0.7 40 0.25 63 0.3

-.EE

-.TP

-\fBequalizer \fIfrequency\fR[\fBk\fR]\fI width\fR[\fBq\fR\^|\^\fBo\fR\^|\^\fBh\fR\^|\^\fBk\fR] \fIgain\fR

-Apply a two-pole peaking equalisation (EQ) filter.

-With this filter, the signal-level at and around a selected frequency

-can be increased or decreased, whilst (unlike band-pass and band-reject

-filters) that at all other frequencies is unchanged.

-.SP

-\fIfrequency\fR gives the filter's central frequency in Hz,

-\fIwidth\fR, the band-width,

-and \fIgain\fR the required gain

-or attenuation in dB.

-Beware of

-.B Clipping

-when using a positive \fIgain\fR.

-.SP

-In order to produce complex equalisation curves, this effect

-can be given several times, each with a different central frequency.

-.SP

-The filter is described in detail in [1].

-.SP

-This effect supports the \fB\-\-plot\fR global option.

-.SP

-See also \fBbass\fR and \fBtreble\fR for shelving equalisation effects.

-.TP

-\fBfade\fR [\fItype\fR] \fIfade-in-length\fR [\fIstop-time\fR [\fIfade-out-length\fR]]

-Add a fade effect to the beginning, end, or both of the audio.

-.SP

-For fade-ins, this starts from the first sample and ramps the volume of the audio from 0 to full volume over \fIfade-in-length\fR seconds.  Specify 0 seconds if no fade-in is wanted.

-.SP

-For fade-outs, the audio will be truncated at

-.I stop-time

-and

-the volume will be ramped from full volume down to 0 starting at

-\fIfade-out-length\fR seconds before the \fIstop-time\fR.  If

-.I fade-out-length

-is not specified, it defaults to the same value as

-\fIfade-in-length\fR.

-No fade-out is performed if

-.I stop-time

-is not specified.

-If the file length can be determined from the input file header and length-changing effects are not in effect, then \fB0\fR may be specified for

-.I stop-time

-to indicate the usual case of a fade-out that ends at the end of the input

-audio stream.

-.SP

-All times can be specified in either periods of time or sample counts.

-To specify time periods use the format hh:mm:ss.frac format.  To specify

-using sample counts, specify the number of samples and append the letter `s'

-to the sample count (for example `8000s').

-.SP

-An optional \fItype\fR can be specified to change the type of envelope.  Choices are \fBq\fR for quarter of a sine wave, \fBh\fR for half a sine wave, \fBt\fR for linear slope, \fBl\fR for logarithmic, and \fBp\fR for inverted parabola.  The default is logarithmic.

-.TP

-\fBfilter\fR [\fIlow\fR]\fB\-\fR[\fIhigh\fR] [\fIwindow-len\fR [\fIbeta\fR]]

-Apply a sinc-windowed lowpass, highpass, or bandpass filter of given

-window length to the signal.

-\fIlow\fR refers to the frequency of the lower 6dB corner of the filter.

-\fIhigh\fR refers to the frequency of the upper 6dB corner of the filter.

-.SP

-A low-pass filter is obtained by leaving \fIlow\fR unspecified, or 0.

-A high-pass filter is obtained by leaving \fIhigh\fR unspecified, or 0,

-or greater than or equal to the Nyquist frequency.

-.SP

-The \fIwindow-len\fR, if unspecified, defaults to 128.

-Longer windows give a sharper cut-off, smaller windows a more gradual cut-off.

-.SP

-The \fIbeta\fR parameter

-determines the type of filter window used.  Any value greater than 2 is

-the beta for a Kaiser window.  Beta \(<= 2 selects a Nuttall window.

-If unspecified, the default is a Kaiser window with beta 16.

-.SP

-In the case of Kaiser window (beta > 2), lower betas produce a

-somewhat faster transition from pass-band to stop-band, at the cost of

-noticeable artifacts. A beta of 16 is the default, beta less than 10

-is not recommended. If you want a sharper cut-off, don't use low

-beta's, use a longer sample window. A Nuttall window is selected by

-specifying any `beta' \(<= 2, and the Nuttall window has somewhat

-steeper cut-off than the default Kaiser window. You will probably not

-need to use the beta parameter at all, unless you are just curious

-about comparing the effects of Nuttall vs. Kaiser windows.

-.TP

-\fBflanger\fR [\fIdelay depth regen width speed shape phase interp\fR]

-Apply a flanging effect to the audio.

-See [3] for a detailed description of flanging.

-.SP

-All parameters are optional (right to left).

-.TS

-center box;

-cB cB cB lB

-cI c c l.

-\ 	Range	Default	Description

-delay	0 \- 10	0	Base delay in milliseconds.

-depth	0 \- 10	2	Added swept delay in milliseconds.

-regen	\-95 \- 95	0	T{

-.na

-Percentage regeneration (delayed signal feedback).

-T}

-width	0 \- 100	71	T{

-.na

-Percentage of delayed signal mixed with original.

-T}

-speed	0\*d1 \- 10	0\*d5	Sweeps per second (Hz).

-shape	\ 	sin	Swept wave shape: \fBsine\fR\^|\^\fBtriangle\fR.

-phase	0 \- 100	25	T{

-.na

-Swept wave percentage phase-shift for multi-channel (e.g. stereo) flange;

-0 = 100 = same phase on each channel.

-T}

-interp	\ 	lin	T{

-.na

-Digital delay-line interpolation: \fBlinear\fR\^|\^\fBquadratic\fR.

-T}

-.TE

-.DT

-.TP

-\fBgain \fIdB-gain\fR

-Apply an amplification or an attenuation to the audio signal.

-This is an alias for the

-.B vol

-effect\*mhandy for those who prefer to work in dBs by default.

-.TP

-\fBhighpass\fR\^|\^\fBlowpass\fR [\fB\-1\fR|\fB\-2\fR] \fIfrequency\fR[\fBk\fR]\fR [\fRwidth\fR[\fBq\fR\^|\^\fBo\fR\^|\^\fBh\fR\^|\^\fBk\fR]]

-Apply a high-pass or low-pass filter with 3dB point \fIfrequency\fR.

-The filter can be either single-pole (with

-.BR \-1 ),

-or double-pole (the default, or with

-.BR \-2 ).

-.I width

-applies only to double-pole filters;

-the default is Q = 0\*d707 and gives a Butterworth response.  The filters

-roll off at 6dB per pole per octave (20dB per pole per decade).  The

-double-pole filters are described in detail in [1].

-.SP

-These effects support the \fB\-\-plot\fR global option.

-.SP

-See also \fBfilter\fR for filters with a steeper roll-off.

-.TP

-\fBladspa\fR \fBmodule\fR [\fBplugin\fR] [\fBargument\fR...]

-Apply a LADSPA [5] (Linux Audio Developer's Simple Plugin API) plugin.

-Despite the name, LADSPA is not Linux-specific, and a wide range of

-effects is available as LADSPA plugins, such as cmt [6] (the Computer

-Music Toolkit) and Steve Harris's plugin collection [7]. The first

-argument is the plugin module, the second the name of the plugin (a

-module can contain more than one plugin) and any other arguments are

-for the control ports of the plugin. Missing arguments are supplied by

-default values if possible. Only plugins with at most one audio input

-and one audio output port can be used.  If found, the environment varible

-LADSPA_PATH will be used as search path for plugins.

-.TP

-\fBloudness [\fIgain\fR [\fIreference\fR]]

-Loudness control\*msimilar to the

-.B gain

-effect, but provides equalisation for the human auditory system.  See

-http://en.wikipedia.org/wiki/Loudness for a detailed description of

-loudness.  The gain is adjusted by the given

-.I gain

-parameter (usually negative) and the signal equalised according to ISO

-226 w.r.t. a reference level of 65dB, though an alternative

-.I reference

-level may be given if the original audio has been equalised for some

-other optimal level.

-.SP

-See also the

-.B gain

-effect.

-.TP

-\fBlowpass\fR [\fB\-1\fR|\fB\-2\fR] \fIfrequency\fR[\fBk\fR]\fR [\fRwidth\fR[\fBq\fR\^|\^\fBo\fR\^|\^\fBh\fR\^|\^\fBk\fR]]

-Apply a low-pass filter.

-See the description of the \fBhighpass\fR effect for details.

-.TP

-\fBmcompand\fR \(dq\fIattack1\fB,\fIdecay1\fR{\fB,\fIattack2\fB,\fIdecay2\fR}

-[\fIsoft-knee-dB\fB:\fR]\fIin-dB1\fR[\fB,\fIout-dB1\fR]{\fB,\fIin-dB2\fB,\fIout-dB2\fR}

-.br

-[\fIgain\fR [\fIinitial-volume-dB\fR [\fIdelay\fR]]]\(dq {\fIxover-freq\fR[\fBk\fR] \(dqattack1,...\(dq}

-.SP

-The multi-band compander is similar to the single-band compander but the

-audio is first divided into bands using Butterworth cross-over filters

-and a separately specifiable compander run on each band.  See the

-\fBcompand\fR effect for the definition of its parameters.  Compand

-parameters are specified between double quotes and the crossover

-frequency for that band is given by \fIxover-freq\fR; these can be

-repeated to create multiple bands.

-.SP

-For example, the following (one long) command shows how multi-band

-companding is typically used in FM radio:

-.EX

-	play track1.wav gain -3 filter 8000- 32 100 mcompand \\

-	\(dq0.005,0.1 -47,-40,-34,-34,-17,-33\(dq 100 \\

-	\(dq0.003,0.05 -47,-40,-34,-34,-17,-33\(dq 400 \\

-	\(dq0.000625,0.0125 -47,-40,-34,-34,-15,-33\(dq 1600 \\

-	\(dq0.0001,0.025 -47,-40,-34,-34,-31,-31,-0,-30\(dq 6400 \\

-	\(dq0,0.025 -38,-31,-28,-28,-0,-25\(dq \\

-	gain 15 highpass 22 highpass 22 filter -17500 256 \\

-	gain 9 lowpass -1 17801

-.EE

-The audio file is played with a simulated FM radio sound (or broadcast

-signal condition if the lowpass filter at the end is skipped).

-Note that the pipeline is set up with US-style 75us preemphasis.

-.SP

-See also

-.B compand

-for a single-band companding effect.

-.TP

-\fBmixer\fR [ \fB\-l\fR\^|\^\fB\-r\fR\^|\^\fB\-f\fR\^|\^\fB\-b\fR\^|\^\fB\-1\fR\^|\^\fB\-2\fR\^|\^\fB\-3\fR\^|\^\fB\-4\fR\^|\^\fIn\fR{\fB,\fIn\fR} ]

-Reduce the number of audio channels by mixing or selecting channels,

-or increase the number of channels by duplicating channels.

-Note: this effect operates on the audio

-.I channels

-within the SoX effects processing chain; it should not be confused with the

-.B \-m

-global option (where multiple

-.I files

-are mix-combined before entering the effects chain).

-.SP

-This effect is automatically used when the number of input

-channels differ from the number of output channels.  When reducing

-the number of channels it is possible to manually specify the

-.B mixer

-effect and use the \fB\-l\fR, \fB\-r\fR, \fB\-f\fR, \fB\-b\fR,

-\fB\-1\fR, \fB\-2\fR, \fB\-3\fR, \fB\-4\fR, options to select only

-the left, right, front, back channel(s) or specific channel

-for the output instead of averaging the channels.

-The \fB\-l\fR, and \fB\-r\fR options will do averaging

-in quad-channel files so select the exact channel to prevent this.

-.SP

-The

-.B mixer

-effect can also be invoked with up to 16

-numbers, separated by commas, which specify the proportion (0 = 0% and 1 = 100%)

-of each input channel that is to be mixed into each output channel.

-In two-channel mode, 4 numbers are given: l \*(RA l, l \*(RA r, r \*(RA l, and r \*(RA r,

-respectively.

-In four-channel mode, the first 4 numbers give the proportions for the

-left-front output channel, as follows: lf \*(RA lf, rf \*(RA lf, lb \*(RA lf, and

-rb \*(RA rf.

-The next 4 give the right-front output in the same order, then

-left-back and right-back.

-.SP

-It is also possible to use the 16 numbers to expand or reduce the

-channel count; just specify 0 for unused channels.

-.SP

-Finally, certain reduced combination of numbers can be specified

-for certain input/output channel combinations.

-.TS

-center box ;

-cB cB cB lB

-c c c l .

-In Ch	Out Ch	Num	Mappings

-2	1	2	l \*(RA l, r \*(RA l

-2	2	1	adjust balance

-4	1	4	lf \*(RA l, rf \*(RA l, lb \*(RA l, rb \*(RA l

-4	2	2	lf \*(RA l&rf \*(RA r, lb \*(RA l&rb \*(RA r

-4	4	1	adjust balance

-4	4	2	front balance, back balance

-.TE

-.DT

-.SP

-See also

-.B remix

-for a mixing effect that handles any number of channels.

-.TP

-\fBnoiseprof\fR [\fIprofile-file\fR]

-Calculate a profile of the audio for use in noise reduction.  See the

-description of the \fBnoisered\fR effect for details.

-.TP

-\fBnoisered\fR [\fIprofile-file\fR [\fIamount\fR]]

-Reduce noise in the audio signal by profiling and filtering.  This

-effect is moderately effective at removing consistent background noise

-such as hiss or hum.  To use it, first run SoX with the \fBnoiseprof\fR

-effect on a section of audio that ideally would contain silence but in

-fact contains noise\*msuch sections are typically found at the beginning

-or the end of a recording.  \fBnoiseprof\fR will write out a noise

-profile to \fIprofile-file\fR, or to stdout if no \fIprofile-file\fR or

-if `\-' is given.  E.g.

-.EX

-	sox speech.au -n trim 0 1.5 noiseprof speech.noise-profile

-.EE

-To actually remove the noise, run SoX again, this time with the \fBnoisered\fR

-effect;

-.B noisered

-will reduce noise according to a noise profile (which was generated by

-.BR noiseprof ),

-from

-.IR profile-file ,

-or from stdin if no \fIprofile-file\fR or if `\-' is given.  E.g.

-.EX

-	sox speech.au cleaned.au noisered speech.noise-profile 0.3

-.EE

-How much noise should be removed is specified by

-.IR amount \*ma

-number between 0 and 1 with a default of 0\*d5.  Higher numbers will

-remove more noise but present a greater likelihood of removing wanted

-components of the audio signal.  Before replacing an original recording

-with a noise-reduced version, experiment with different

-.I amount

-values to find the optimal one for your audio; use headphones to check

-that you are happy with the results, paying particular attention to quieter

-sections of the audio.

-.SP

-On most systems, the two stages\*mprofiling and reduction\*mcan be combined

-using a pipe, e.g.

-.EX

-	sox noisy.au -n trim 0 1 noiseprof | play noisy.au noisered

-.EE

-.TP

-\fBnorm\fR [\fB\-i\fR\^|\^\fB\-b\fR] [\fIlevel\fR]

-Normalise audio to 0dB FSD, to a given level relative to 0dB, or normalise

-the balance of multi-channel audio.

-Requires temporary file space to store the audio to be normalised.

-.SP

-To create a normalised copy of an audio file,

-.EX

-	sox infile outfile norm

-.EE

-can be used, though note that if `infile' has a simple encoding (e.g.

-PCM), then

-.EX

-	sox infile outfile vol \`sox infile -n stat -v 2>&1\`

-.EE

-(on systems that support this construct) might be quicker to execute

-(though perhaps not to type!) as it doesn't require a temporary file.

-.SP

-For a more complex example, suppose that `effect1' performs some unknown

-or unpredictable attenuation and that `effect2' requires up to 10dB of

-headroom, then

-.EX

-	sox infile outfile effect1 norm -10 effect2 norm

-.EE

-gives both effect2 and the output file the highest possible signal

-levels.

-.SP

-Normally, audio is normalised based on the level of the channel with

-the highest peak level, which means that whilst all channels are adjusted,

-only one channel attains

-the normalised level.  If the

-.B \-i

-option is given, then each channel is treated individually and

-will attain the normalised level.

-.SP

-If the

-.B \-b

-option is given (with a multi-channel audio file), then the audio

-channels will be balanced; i.e. the RMS level of each channel will be

-normalised to that of the channel with the highest RMS level.  This can

-be used, for example, to correct stereo imbalance.  Note that

-.B \-b

-can cause clipping.

-.SP

-In most cases,

-.B norm \-3

-should be the maximum level used at the output file (to leave headroom

-for playback-resampling, etc.).  See also the discussions of clipping

-and Replay Gain in

-.BR sox (1).

-.TP

-\fBoops\fR

-Out Of Phase Stereo effect.

-Mixes stereo to twin-mono where each mono channel contains the

-difference between the left and right stereo channels.

-This is sometimes known as the `karaoke' effect as it often has the effect

-of removing most or all of the vocals from a recording.

-.TP

-\fBpad\fR { \fIlength\fR[\fB@\fIposition\fR] }

-Pad the audio with silence, at the beginning, the end, or any

-specified points through the audio.

-Both

-.I length

-and

-.I position

-can specify a time or, if appended with an `s', a number of samples.

-.I length

-is the amount of silence to insert and

-.I position

-the position in the input audio stream at which to insert it.

-Any number of lengths and positions may be specified, provided that

-a specified position is not less that the previous one.

-.I position

-is optional for the first and last lengths specified and

-if omitted correspond to the beginning and the end of the audio respectively.

-For example,

-.B pad 1\*d5 1\*d5

-adds 1\*d5 seconds of silence padding at each end of the audio, whilst

-.B pad 4000s@3:00

-inserts 4000 samples of silence 3 minutes into the audio.

-If silence is wanted only at the end of the audio, specify either the end

-position or specify a zero-length pad at the start.

-.TP

-\fBphaser \fIgain-in gain-out delay decay speed\fR [\fB\-s\fR\^|\^\fB\-t\fR]

-Add a phasing effect to the audio.

-See [3] for a detailed description of phasing.

-.SP

-delay/decay/speed gives the delay in milliseconds

-and the decay (relative to gain-in) with a modulation

-speed in Hz.

-The modulation is either sinusoidal (\fB\-s\fR) \*mpreferable for multiple

-instruments, or triangular

-(\fB\-t\fR) \*mgives single instruments a sharper phasing effect.

-The decay should be less than 0\*d5 to avoid

-feedback, and usually no less than 0\*d1.  Gain-out is the volume of the output.

-.SP

-For example:

-.EX

-	play snare.flac phaser 0.8 0.74 3 0.4 0.5 -t

-.EE

-Gentler:

-.EX

-	play snare.flac phaser 0.9 0.85 4 0.23 1.3 -s

-.EE

-A popular sound:

-.EX

-	play snare.flac phaser 0.89 0.85 1 0.24 2 -t

-.EE

-More severe:

-.EX

-	play snare.flac phaser 0.6 0.66 3 0.6 2 -t

-.EE

-.TP

-\fBpitch \fR[\fB\-q\fR] \fIshift\fR [\fIsegment\fR [\fIsearch\fR [\fIoverlap\fR]]]

-Change the audio pitch (but not tempo).

-.SP

-.I shift

-gives the pitch shift as positive or negative `cents' (i.e. 100ths of a

-semitone).  See the

-.B tempo

-effect for a description of the other parameters.

-.TP

-\fBrate\fR [\fB\-q\fR\^|\^\fB\-l\fR\^|\^\fB\-m\fR\^|\^\fB\-h\fR\^|\^\fB\-v\fR] [override-options] \fIRATE\fR[\fBk\fR]

-Change the audio sampling rate (i.e. resample the audio) to any given

-.I RATE

-(even non-integer if this is supported by the output file format)

-using a quality level defined as follows:

-.TS

-center box;

-cI cI2w9 cI cI2w6 cIw6 lIw17

-cB c c c c l.

-\ 	Quality	T{

-\ Phase Response

-T}	T{

-Band-width

-T}	Rej dB	T{

-.na

-Typical Use

-T}

-\-q	T{

-.na

-quick

-T}	linear	n/a	T{

-.na

-\(~=30 @ \ Fs/4

-T}	T{

-.na

-playback on ancient hardware

-T}

-\-l	low	linear	80%	100	T{

-.na

-playback on old hardware

-T}

-\-m	medium	intermediate	95%	100	T{

-.na

-audio playback

-T}

-\-h	high	intermediate	95%	125	T{

-.na

-16-bit mastering (use with dither)

-T}

-\-v	T{

-.na

-very high

-T}	intermediate	95%	175	24-bit mastering

-.TE

-.DT

-.SP

-where

-.I Band-width

-is the percentage of the audio frequency band that is preserved and

-.I Rej dB

-is the level of noise rejection.  Increasing levels of resampling

-quality come at the expense of increasing amounts of time to process the

-audio.  If no quality option is given, the quality level used is `high'.

-.SP

-The `quick' algorithm uses cubic interpolation; all others use

-band-limited interpolation.  The `quick' and `low' quality

-algorithms have a `linear' phase response; for `medium', `high' and

-`very high', the phase response is configurable (see below), but

-defaults to `intermediate'.

-.SP

-The

-.B rate

-effect is invoked automatically if SoX's \fB\-r\fR option specifies a

-rate that is different to that of the input file(s).  Alternatively, if

-this effect is given explicitly, then SoX's

-.B \-r

-option need not be given.  For example, the following two commands are

-equivalent:

-.EX

-.ne 2

-	sox input.au -r 48k output.au bass -3

-	sox input.au        output.au bass -3 rate 48k

-.EE

-though the second command is more flexible as it allows

-.B rate

-options to be given, and allows the effects to be ordered arbitrarily.

-.TS

-center;

-c8 c8 c.

-*	*	*

-.TE

-.DT

-.SP

-The simple quality selection described above provides settings that

-satisfy the needs of the vast majority of resampling tasks.

-Occasionally, however, it may be desirable to fine-tune the resampler's

-filter response; this can be achieved using

-.IR override\ options ,

-as detailed in the following table:

-.TS

-center box;

-lB lw52.

-\-M/\-I/\-L	Phase response = minimum/intermediate/linear

-\-p\ 0\-100	T{

-.na

-Any phase response (0 = minimum, 25 = intermediate, 50 = linear, 100 = maximum)

-T}

-\-s	Steep filter (band-width = 99%)

-\-b\ 74\-99\*d7	Any band-width %

-\-a	Allow aliasing above the pass-band

-.TE

-.DT

-.SP

-N.B.  Override options can not be used with the `quick' or `low'

-quality algorithms.

-.SP

-All resamplers use filters that can sometimes create `echo' (a.k.a.

-`ringing') artefacts with transient signals such as those that occur

-with `finger snaps' or other highly percussive sounds.  Such artefacts are

-much more noticable to the human ear if they occur before the transient

-(`pre-echo') than if they occur after it (`post-echo').  The phase

-response setting controls the distribution of any transient echo between

-`pre' and `post': with minimum phase, there is no pre-echo but the

-longest post-echo; with linear phase, pre and post echo are in equal

-amounts (in signal terms, but not audibility terms); the intermediate

-phase setting attempts to find the best compromise by selecting a small

-length (and level) of pre-echo and a medium lengthed post-echo.

-.SP

-Minimum, intermediate, or linear phase response is selected using the

-.BR \-M ,

-.BR \-I ,

-or

-.B \-L

-option; a custom phase response can be created with the

-.B \-p

-option.  Note that phase responses between `linear' and `maximum'

-(greater than 50) are rarely useful.

-.SP

-A resampler's band-width setting determines how much of the frequency

-content of the original signal (w.r.t. the orignal sample rate when

-up-sampling, or the new sample rate when down-sampling) is preserved

-during conversion.  The term `pass-band' is used to refer to all frequencies

-up to the band-width point (e.g. for 44\*d1kHz sampling rate, and a

-resampling band-width of 95%, the pass-band represents frequencies from

-0Hz (D.C.) to circa 21kHz).  Increasing the resampler's band-width

-results in a slower conversion and can increase transient echo

-artefacts (and vice versa).

-.SP

-The

-.B \-s

-`steep filter' option changes resampling band-width from the default 95%

-(based on the 3dB point), to 99%.  The

-.B \-b

-option allows the band-width to be set to any value in the range

-74\-99\*d7 %, but note that band-width values greater than 99% are not

-recommended for normal use as they can cause excessive transient echo.

-.SP

-If the

-.B \-a

-option is given, then aliasing above the pass-band is allowed.  For

-example, with 44\*d1kHz sampling rate, and a

-resampling band-width of 95%, this means that frequency content above

-21kHz can be distorted; however, since this is above the pass-band (i.e.

-above the highest frequency of interest/audibility), this may not be a

-problem.  The benefits of allowing aliasing are reduced processing time,

-and reduced (by almost half) transient echo artefacts.

-Note that if this option is given, then

-the minimum band-width allowable with

-.B \-b

-increases to 85%.

-.SP

-Examples:

-.EX

-	sox input.wav -2 output.wav rate -s -a 44100 dither

-.EE

-default (high) quality resampling; overrides: steep filter, allow

-aliasing; to 44\*d1kHz sample rate; dither output to 2-byte (16-bit) WAV

-file.

-.EX

-	sox input.wav -3 output.aiff rate -v -L -b 90 48k

-.EE

-very high quality resampling; overrides: linear phase, band-width 90%;

-to 48k sample rate; store output to 3-byte (24-bit) AIFF file.

-.TS

-center;

-c8 c8 c.

-*	*	*

-.TE

-.DT

-.SP

-The

-.BR key ,

-.B speed

-and

-.B tempo

-effects all use the

-.B rate

-effect at their core.

-.SP

-See also

-.BR resample ,

-.B polyphase

-and

-.B rabbit

-for other sample-rate changing effects.

-.TP

-\fBremix\fR [\fB\-a\fR\^|\^\fB\-m\fR\^|\^\fB\-p\fR] <\fIout-spec\fR>

-\fIout-spec\fR	= \fIin-spec\fR{\fB,\fIin-spec\fR} | \fB0\fR

-.br

-\fIin-spec\fR	= [\fIin-chan\fR]\^[\fB\-\fR[\fIin-chan2\fR]]\^[\fIvol-spec\fR]

-.br

-\fIvol-spec\fR	= \fBp\fR\^|\^\fBi\fR\^|\^\fBv\^\fR[\fIvolume\fR]

-.br

-.SP

-Select and mix input audio channels into output audio channels.  Each output

-channel is specified, in turn, by a given \fIout-spec\fR: a list of

-contributing input channels and volume specifications.

-.SP

-Note that this effect operates on the audio

-.I channels

-within the SoX effects processing chain; it should not be confused with the

-.B \-m

-global option (where multiple

-.I files

-are mix-combined before entering the effects chain).

-.SP

-An

-.I out-spec

-contains comma-separated input channel-numbers and hyphen-delimited

-channel-number ranges; alternatively,

-.B 0

-may be given to create a silent output channel.  For example,

-.EX

-	sox input.au output.au remix 6 7 8 0

-.EE

-creates an output file with four channels, where channels 1, 2, and 3 are

-copies of channels 6, 7, and 8 in the input file, and channel 4 is silent.

-Whereas

-.EX

-	sox input.au output.au remix 1-3,7 3

-.EE

-creates a stereo output file where the left channel is a mix-down of input

-channels 1, 2, 3, and 7, and the right channel is a copy of input channel 3.

-.SP

-Where a range of channels is specified, the channel numbers to the left and

-right of the hyphen are optional and default to 1 and to the number of input

-channels respectively. Thus

-.EX

-	sox input.au output.au remix -

-.EE

-performs a mix-down of all input channels to mono.

-.SP

-By default, where an output channel is mixed from multiple (n) input

-channels, each input channel will be scaled by a factor of \(S1/\s-2n\s+2.

-Custom mixing volumes can be set by following a given input channel or range

-of input channels with a \fIvol-spec\fR (volume specification).

-This is one of the letters \fBp\fR, \fBi\fR, or \fBv\fR,

-followed by a volume number, the meaning of which depends on the given

-letter and is defined as follows:

-.TS

-center;

-lI lI lI

-c l l.

-Letter	Volume number	Notes

-p	power adjust in dB	0 = no change

-i	power adjust in dB	T{

-.na

-As `p', but invert the audio

-T}

-v	voltage multiplier	T{

-.na

-1 = no change, 0\*d5 \(~= 6dB attenuation, 2 \(~= 6dB gain, \-1 = invert

-T}

-.TE

-.DT

-.SP

-If an

-.I out-spec

-includes at least one

-.I vol-spec

-then, by default, \(S1/\s-2n\s+2 scaling is not applied to any other channels in the

-same out-spec (though may be in other out-specs).

-The \-a (automatic)

-option however, can be given to retain the automatic scaling in this

-case.  For example,

-.EX

-	sox input.au output.au remix 1,2 3,4v0.8

-.EE

-results in channel level multipliers of 0\*d5,0\*d5 1,0\*d8, whereas

-.EX

-	sox input.au output.au remix -a 1,2 3,4v0.8

-.EE

-results in channel level multipliers of 0\*d5,0\*d5 0\*d5,0\*d8.

-.SP

-The \-m (manual) option disables all automatic volume adjustments, so

-.EX

-	sox input.au output.au remix -m 1,2 3,4v0.8

-.EE

-results in channel level multipliers of 1,1 1,0\*d8.

-.SP

-The volume number is optional and omitting it corresponds to no volume

-change; however, the only case in which this is useful is in conjunction

-with

-.BR i .

-For example, if

-.I input.au

-is stereo, then

-.EX

-	sox input.au output.au remix 1,2i

-.EE

-is a mono equivalent of the

-.B oops

-effect.

-.SP

-If the \fB\-p\fR option is given, then any automatic \(S1/\s-2n\s+2 scaling

-is replaced by \(S1/\s-2\(srn\s+2 (`power') scaling; this gives a louder mix

-but one that might occasionally clip.

-.TS

-center;

-c8 c8 c.

-*	*	*

-.TE

-.DT

-.SP

-One typical use of the

-.B remix

-effect is to split an audio file into a set of files, each containing

-one of the constituent channels (in order to perform subsequent

-processing on individual audio channels).  Where more than a few

-channels are involved, a script such as the following is useful:

-.EX

-#!/bin/sh                        # This is a Bourne shell script

-chans=\`soxi -c "$1"\`

-while [ $chans -ge 1 ]; do

-  chans0=\`printf %02i $chans\`   # 2 digits hence up to 99 chans

-  out=\`echo "$1"|sed "s/\\(.*\\)\\.\\(.*\\)/\\1-$chans0.\\2/"\`

-  sox "$1" "$out" remix $chans

-  chans=\`expr $chans - 1\`

-done

-.EE

-If a file

-.I input.au

-containing six audio channels were given, the script would produce six

-output files:

-.IR input-01.au ,

-\fIinput-02.au\fR, ...,

-.IR input-06.au .

-.SP

-See also

-.B mixer

-and

-.B swap

-for similar effects.

-.TP

-\fBrepeat \fIcount\fR

-Repeat the entire audio \fIcount\fR times.

-Requires temporary file space to store the audio to be repeated.

-Note that repeating once yields two copies: the original audio and the

-repeated audio.

-.TP

-\fBreverb\fR [\fB\-w\fR|\fB\-\-wet-only\fR] [\fIreverberance\fR (50%) [\fIHF-damping\fR (50%)

-[\fIroom-scale\fR (100%) [\fIstereo-depth\fR (100%)

-.br

-[\fIpre-delay\fR (0ms) [\fIwet-gain\fR (0dB)]]]]]]

-.SP

-Add reverberation to the audio using the `freeverb' algorithm.  A

-reverberation effect is sometimes desirable for concert halls that are too

-small or contain so many people that the hall's natural reverberance is

-diminished.  Applying a small amount of stereo reverb to a (dry) mono signal

-will usually make it sound more natural.  See [3] for a detailed description

-of reverberation.

-.SP

-Note that this effect

-increases both the volume and the length of the audio, so to prevent clipping

-in these domains, a typical invocation might be:

-.EX

-	play dry.au gain -3 pad 0 3 reverb

-.EE

-.TP

-\fBreverse\fR

-Reverse the audio completely.

-Requires temporary file space to store the audio to be reversed.

-.TP

-\fBriaa\fR

-Apply RIAA vinyl playback equalisation.

-The sampling rate must be one of: 44\*d1, 48, 88\*d2, 96 kHz.

-.SP

-This effect supports the \fB\-\-plot\fR global option.

-.TP

-\fBsilence \fR[\fB\-l\fR] \fIabove-periods\fR [\fIduration

-threshold\fR[\fBd\fR\^|\^\fB%\fR] [\fIbelow-periods duration

-threshold\fR[\fBd\fR\^|\^\fB%\fR]]

-.SP

-Removes silence from the beginning, middle, or end of the audio.

-Silence is anything below a specified threshold.

-.SP

-The \fIabove-periods\fR value is used to indicate if audio should be

-trimmed at the beginning of the audio. A value of zero indicates no

-silence should be trimmed from the beginning. When specifying an

-non-zero \fIabove-periods\fR, it trims audio up until it finds

-non-silence. Normally, when trimming silence from beginning of audio

-the \fIabove-periods\fR will be 1 but it can be increased to higher

-values to trim all audio up to a specific count of non-silence

-periods. For example, if you had an audio file with two songs that

-each contained 2 seconds of silence before the song, you could specify

-an \fIabove-period\fR of 2 to strip out both silence periods and the

-first song.

-.SP

-When \fIabove-periods\fR is non-zero, you must also specify a

-\fIduration\fR and \fIthreshold\fR. \fIDuration\fR indications the

-amount of time that non-silence must be detected before it stops

-trimming audio. By increasing the duration, burst of noise can be

-treated as silence and trimmed off.

-.SP

-\fIThreshold\fR is used to indicate what sample value you should treat as

-silence.  For digital audio, a value of 0 may be fine but for audio

-recorded from analog, you may wish to increase the value to account

-for background noise.

-.SP

-When optionally trimming silence from the end of the audio, you specify

-a \fIbelow-periods\fR count.  In this case, \fIbelow-period\fR means

-to remove all audio after silence is detected.

-Normally, this will be a value 1 of but it can

-be increased to skip over periods of silence that are wanted.  For example,

-if you have a song with 2 seconds of silence in the middle and 2 second

-at the end, you could set below-period to a value of 2 to skip over the

-silence in the middle of the audio.

-.SP

-For \fIbelow-periods\fR, \fIduration\fR specifies a period of silence

-that must exist before audio is not copied any more.  By specifying

-a higher duration, silence that is wanted can be left in the audio.

-For example, if you have a song with an expected 1 second of silence

-in the middle and 2 seconds of silence at the end, a duration of 2

-seconds could be used to skip over the middle silence.

-.SP

-Unfortunately, you must know the length of the silence at the

-end of your audio file to trim off silence reliably.  A work around is

-to use the \fBsilence\fR effect in combination with the \fBreverse\fR effect.

-By first reversing the audio, you can use the \fIabove-periods\fR

-to reliably trim all audio from what looks like the front of the file.

-Then reverse the file again to get back to normal.

-.SP

-To remove silence from the middle of a file, specify a

-\fIbelow-periods\fR that is negative.  This value is then

-treated as a positive value and is also used to indicate the

-effect should restart processing as specified by the

-\fIabove-periods\fR, making it suitable for removing periods of

-silence in the middle of the audio.

-.SP

-The option

-.B \-l

-indicates that \fIbelow-periods\fR \fIduration\fR length of audio

-should be left intact at the beginning of each period of silence.

-For example, if you want to remove long pauses between words

-but do not want to remove the pauses completely.

-.SP

-The \fIperiod\fR counts are in units of samples. \fIDuration\fR counts

-may be in the format of hh:mm:ss.frac, or the exact count of samples.

-\fIThreshold\fR numbers may be suffixed with

-.B d

-to indicate the value is in decibels, or

-.B %

-to indicate a percentage of maximum value of the sample value

-(\fB0%\fR specifies pure digital silence).

-.SP

-The following example shows how this effect can be used to start a recording

-that does not contain the delay at the start which usually occurs between

-`pressing the record button' and the start of the performance:

-.EX

-	rec \fIparameters filename other-effects\fR silence 1 5 2%

-.EE

-.TP

-\fBspeed \fIfactor\fR[\fBc\fR]

-Adjust the audio speed (pitch and tempo together).  \fIfactor\fR

-is either the ratio of the new speed to the old speed: greater

-than 1 speeds up, less than 1 slows down, or, if appended with the

-letter

-`c', the number of cents (i.e. 100ths of a semitone) by

-which the pitch (and tempo) should be adjusted: greater than 0

-increases, less than 0 decreases.

-.SP

-By default, the speed change is performed by resampling with the \fBrate\fR

-effect using its default quality/speed.  For higher quality or higher speed

-resampling, in addition to the \fBspeed\fR effect, specify

-the \fBrate\fR effect with the desired quality option.

-.TP

-\fBspectrogram \fR[options]

-Create a spectrogram of the audio.  This effect is optional; type \fBsox

-\-\-help\fR and check the list of supported effects to see if it has

-been included.

-.SP

-The spectrogram is rendered in a Portable Network Graphic (PNG) file,

-and shows time in the X-axis, frequency in the Y-axis, and audio signal

-magnitude in the Z-axis.  Z-axis values are represented by the colour

-(or intensity) of the pixels in the X-Y plane.

-.SP

-This effect supports only one channel; for multi-channel input files,

-use either SoX's

-.B \-c 1

-option with the output file (to obtain a spectrogram on the mix-down),

-or the

-.B remix

-.I n

-effect to select a particular channel.  Be aware though, that both of

-these methods affect the audio in the effects chain.

-.RS

-.IP \fB\-x\ \fInum\fR

-X-axis pixels/second, default 100.  This controls the width of the

-spectrogram;

-.I num

-can be from 1 (low time resolution) to 5000 (high time resolution)

-and need not be an integer.  SoX

-may make a slight adjustment to the given number for processing

-quantisation reasons; if so, SoX will report the actual number used

-(viewable when

-.B \-\-verbose

-is in effect).

-.SP

-The maximum width of the spectrogram is 999 pixels; if the audio length

-and the given

-.B \-x

-number are such that this would be exceeded, then the spectrogram (and

-the effects chain) will be truncated.  To move the spectrogram to a

-point later in the audio stream, first invoke the

-.B trim

-effect; e.g.

-.EX

-  sox audio.ogg -n trim 1:00 spectrogram

-.EE

-starts the spectrogram at 1 minute through the audio.

-.IP \fB\-y\ \fInum\fR

-Y-axis resolution (1 \- 4), default 2.

-This controls the height of the spectrogram;

-.I num

-can be from 1 (low frequency resolution) to 4 (high frequency

-resolution).  For values greater than 2, the resulting image may be too

-tall to display on the screen; if so, a graphic manipulation package

-(such as

-.BR ImageMagick (1))

-can be used to re-size the image.

-.SP

-To increase the frequency resolution without increasing the height of

-the spectrogram, the

-.B rate

-effect may be invoked to reduce the sampling rate of the signal before

-invoking

-.BR spectrogram ;

-e.g.

-.EX

-  sox audio.ogg -r 4k -n rate spectrogram

-.EE

-allows detailed analysis of frequencies up to 2kHz (half the sampling

-rate).

-.IP \fB\-z\ \fInum\fR

-Z-axis (colour) range in dB, default 120.  This sets the dynamic-range

-of the spectrogram to be \-\fInum\fR\ dBFS to 0\ dBFS.

-.I Num

-may range from 20 to 180.  Decreasing dynamic-range effectively

-increases the `contrast' of the spectrogram display, and vice versa.

-.IP \fB\-Z\ \fInum\fR

-Sets the upper limit of the Z-axis in dBFS.

-A negative

-.I num

-effectively increases the `brightness' of the spectrogram display,

-and vice versa.

-.IP \fB\-q\ \fInum\fR

-Sets the Z-axis quantisation, i.e. the number of different colours (or

-intensities) in which to render Z-axis

-values.  A small number (e.g. 4) will give a `poster'-like effect making

-it easier to discern magnitude bands of similar level.  Smaller numbers

-also usually

-result in smaller PNG files.  The number given specifies the number of

-colours to use inside the Z-axis range; two colours are reserved to

-represent out-of-range values.

-.IP \fB\-w\ \fIname\fR

-Window: Hann (default), Hamming, Bartlett, Rectangular or Kaiser.  The

-spectrogram is produced using the Discrete Fourier Transform (DFT)

-algorithm.  A significant parameter to this algorithm is the choice of

-`window function'.  By default, SoX uses the Hann window which has good

-all-round frequency-resolution and dynamic-range properties.  For better

-frequency resolution (but lower dynamic-range), select a Hamming window;

-for higher dynamic-range (but poorer frequency-resolution), select a

-Kaiser window.  Bartlett and Rectangular windows are also available.

-Selecting a window other than Hann will usually require

-a corresponding

-.B \-z

-setting.

-.IP \fB\-s\fR

-Allow slack overlapping of DFT windows.

-This can, in some cases, increase image sharpness and give greater adherence

-to the

-.B \-x

-value, but at the expense of a little spectral loss.

-.IP \fB\-m\fR

-Creates a monochrome spectrogram (the default is colour).

-.IP \fB\-h\fR

-Selects a high-colour palette\*mless visually pleasing than the default

-colour palette, but it may make it easier to differentiate different levels.

-If this option is used in conjunction with

-.BR \-m ,

-the result will be a hybrid monochrome/colour palette.

-.IP \fB\-p\ \fInum\fR

-Permute the colours in a colour or hybrid palette.

-The

-.I num

-parameter (from 1 to 6) selects the permutation.

-.IP \fB\-l\fR

-Creates a `printer friendly' spectrogram with a light background (the

-default has a dark background).

-.IP \fB\-a\fR

-Suppress the display of the axis lines.  This is sometimes useful in

-helping to discern artefacts at the spectrogram edges.

-.IP \fB\-t\ \fItext\fR

-Set the image title\*mtext to display above the spectrogram.

-.IP \fB\-c\ \fItext\fR

-Set the image comment\*mtext to display below and to the left of the

-spectrogram.

-.IP \fB\-o\ \fItext\fR

-Name of the spectrogram output PNG file, default `spectrogram.png'.

-.RE

-.TP

-\

-For example, let's see what the spectrogram of a swept triangular wave looks

-like:

-.EX

-	sox -n -n synth 6 tri 10k:14k spectrogram -z 100 -w k

-.EE

-For the ability to perform off-line processing of spectral data, see the

-.B stat

-effect.

-.TP

-\fBsplice \fR { \fIposition\fR[\fB,\fIexcess\fR[\fB,\fIleeway\fR]] }

-Splice together audio sections.  This effect provides two things over

-simple audio concatenation: a (usually short) cross-fade is applied at

-the join, and a wave similarity comparison is made to help determine the

-best place at which to make the join.

-.SP

-To perform a splice, first use the

-.B trim

-effect to select the audio sections to be joined together.  As when

-performing a tape splice, the end of the section to be spliced onto

-should be trimmed with a small

-.I excess

-(default 0\*d005 seconds) of audio after the ideal joining point.  The

-beginning of the audio section to splice on should be trimmed with the

-same

-.IR excess

-(before the ideal joining point), plus an additional

-.I leeway

-(default 0\*d005 seconds).  SoX should then be invoked with the two

-audio sections as input files and the

-.B splice

-effect given with the position at which to perform the splice\*mthis is

-length of the first audio section (including the excess).

-.SP

-For example, a long song begins with two verses which start (as

-determined e.g. by using the

-.B play

-command with the

-.B trim

-(\fIstart\fR) effect) at times 0:30\*d125 and 1:03\*d432.

-The following commands cut out the first verse:

-.EX

-	sox too-long.au part1.au trim 0 30.130

-.EE

-(5 ms excess, after the first verse starts)

-.EX

-	sox long.au part2.au trim 1:03.422

-.EE

-(5 ms excess plus 5 ms leeway, before the second verse starts)

-.EX

-	sox part1.au part2.au just-right.au splice 30.130

-.EE

-Provided your arithmetic is good enough, multiple splices can be

-performed with a single

-.B splice

-invocation.  For example:

-.EX

-#!/bin/sh

-# Audio Copy and Paste Over

-# acpo infile copy-start copy-stop paste-over-start outfile

-# All times measured in samples.

-rate=\`soxi -r "$1"\`

-e=\`expr $rate '*' 5 / 1000\`  # Using default excess

-l=$e                         # and leeway.

-sox "$1" piece.au trim \`expr $2 - $e - $l\`s \\

-	\`expr $3 - $2 + $e + $l + $e\`s

-sox "$1" part1.au trim 0 \`expr $4 + $e\`s

-sox "$1" part2.au trim \`expr $4 + $3 - $2 - $e - $l\`s

-sox part1.au piece.au part2.au "$5" splice \\

-	\`expr $4 + $e\`s \\

-	\`expr $4 + $e + $3 - $2 + $e + $l + $e\`s

-.EE

-In the above Bourne shell script,

-two splices are used to `copy and paste' audio.

-.TS

-center;

-c8 c8 c.

-*	*	*

-.TE

-.DT

-.SP

-It is also possible to use this effect to perform general cross-fades, e.g. to

-join two songs.

-In this case,

-.I excess

-would typically be an number of seconds, and

-.I leeway

-should be set to zero.

-.TP

-\fBstat\fR [\fB\-s \fIscale\fR] [\fB\-rms\fR] [\fB\-freq\fR] [\fB\-v\fR] [\fB\-d\fR]

-Display time and frequency domain statistical information about the audio.

-Audio is passed unmodified through the SoX processing chain.

-.SP

-The information is output to the `standard error' (stderr) stream and is

-calculated, where

-.I n

-is the duration of the audio in samples,

-.I c

-is the number of audio channels,

-.I r

-is the audio sample rate, and

-.I x\s-2\dk\u\s0

-represents the PCM value (in the range \-1 to +1 by default) of each successive

-sample in the audio,

-as follows:

-.TS

-center;

-lI l l.

-Samples read	\fIn\fR\^\(mu\^\fIc\fR	\

-Length (seconds)	\fIn\fR\^\(di\^\fIr\fR

-Scaled by	\ 	See \-s below.

-Maximum amplitude	max(\fIx\s-2\dk\u\s0\fR)	T{

-The maximum sample value in the audio; usually this will be a positive number.

-T}

-Minimum amplitude	min(\fIx\s-2\dk\u\s0\fR)	T{

-The minimum sample value in the audio; usually this will be a negative number.

-T}

-Midline amplitude	\(12\^min(\fIx\s-2\dk\u\s0\fR)\^+\^\(12\^max(\fIx\s-2\dk\u\s0\fR)

-Mean norm	\(S1/\s-2n\s+2\^\(*S\^\^\(br\^\fIx\s-2\dk\u\s0\fR\^\(br\^	T{

-The average of the absolute value of each sample in the audio.

-T}

-Mean amplitude	\(S1/\s-2n\s+2\^\(*S\^\fIx\s-2\dk\u\s0\fR	T{

-The average of each sample in the audio.  If this figure is non-zero, then it indicates the

-presence of a D.C. offset (which could be removed using the

-.B dcshift

-effect).

-T}

-RMS amplitude	\(sr(\(S1/\s-2n\s+2\^\(*S\^\fIx\s-2\dk\u\s0\fR\(S2)	T{

-The level of a D.C. signal that would have the same power

-as the audio's average power.

-T}

-Maximum delta	max(\^\(br\^\fIx\s-2\dk\u\s0\fR\^\-\^\fIx\s-2\dk\-1\u\s0\fR\^\(br\^)

-Minimum delta	min(\^\(br\^\fIx\s-2\dk\u\s0\fR\^\-\^\fIx\s-2\dk\-1\u\s0\fR\^\(br\^)

-Mean delta	\(S1/\s-2n\-1\s+2\^\(*S\^\^\(br\^\fIx\s-2\dk\u\s0\fR\^\-\^\fIx\s-2\dk\-1\u\s0\fR\^\(br\^

-RMS delta	\(sr(\(S1/\s-2n\-1\s+2\^\(*S\^(\fIx\s-2\dk\u\s0\fR\^\-\^\fIx\s-2\dk\-1\u\s0\fR)\(S2)

-Rough frequency	\ 	In Hz.

-Volume Adjustment	\ 	T{

-The parameter to the

-.B vol

-effect which would make the audio as loud as possible without clipping.

-Note: See the discussion on

-.B Clipping

-in

-.BR sox (1)

-for reasons why it is rarely a good idea actually to do this.

-T}

-.TE

-.DT

-.SP

-The

-.B \-s

-option can be used to scale the input data by a given factor.

-The default value of

-.I scale

-is 2147483647 (i.e. the maximum value of a 32-bit signed integer).

-Internal effects

-always work with signed long PCM data and so the value should relate to this

-fact.

-.SP

-The

-.B \-rms

-option will convert all output average values to `root mean square'

-format.

-.SP

-The

-.B \-v

-option displays only the `Volume Adjustment' value.

-.SP

-The

-.B \-freq

-option calculates the input's power spectrum (4096 point DFT) instead of the

-statistics listed above.

-.SP

-The

-.B \-d

-option

-displays a hex dump of the 32-bit signed PCM data

-audio in SoX's internal buffer.

-This is mainly used to help track down endian problems that

-sometimes occur in cross-platform versions of SoX.

-.TP

-\fBswap\fR [\fI1 2\fR | \fI1 2 3 4\fR]

-Swap channels in multi-channel audio files.  Optionally, you may

-specify the channel order you would like the output in.  This defaults

-to output channel 2 and then 1 for stereo and 2, 1, 4, 3 for quad-channels.

-An interesting

-feature is that you may duplicate a given channel by overwriting another.

-This is done by repeating an output channel on the command-line.  For example,

-.B swap 2 2

-will overwrite channel 1 with channel 2; creating a stereo

-file with both channels containing the same audio.

-.SP

-See also the

-.B remix

-effect.

-.TP

-\fBsynth\fR [\fIlen\fR] {[\fItype\fR] [\fIcombine\fR] [[\fB%\fR]\fIfreq\fR[\fBk\fR][\fB:\fR\^|\^\fB+\fR\^|\^\fB/\fR\^|\^\fB\-\fR[\fB%\fR]\fIfreq2\fR[\fBk\fR]]] [\fIoff\fR] [\fIph\fR] [\fIp1\fR] [\fIp2\fR] [\fIp3\fR]}

-This effect can be used to generate fixed or swept frequency audio tones

-with various wave shapes, or to generate wide-band noise of various

-`colours'.

-Multiple synth effects can be cascaded to produce more complex

-waveforms; at each stage it is possible to choose whether the generated

-waveform will be mixed with, or modulated onto

-the output from the previous stage.

-Audio for each channel in a multi-channel audio file can be synthesised

-independently.

-.SP

-Though this effect is used to generate audio, an input file must still

-be given, the characteristics of which will be used to set the

-synthesised audio length, the number of channels, and the sampling rate;

-however, since the input file's audio is not normally needed, a `null

-file' (with the special name \fB\-n\fR) is often given instead (and the

-length specified as a parameter to \fBsynth\fR or by another given

-effect that can has an associated length).

-.SP

-For example, the following produces a 3 second, 48kHz,

-audio file containing a sine-wave swept from 300 to 3300\ Hz:

-.EX

-	sox -n output.au synth 3 sine 300-3300

-.EE

-and this produces an 8\ kHz version:

-.EX

-	sox -r 8000 -n output.au synth 3 sine 300-3300

-.EE

-Multiple channels can be synthesised by specifying the set of

-parameters shown between braces multiple times;

-the following puts the swept tone in the left channel and adds `brown'

-noise in the right:

-.EX

-	sox -n output.au synth 3 sine 300-3300 brownnoise

-.EE

-The following example shows how two synth effects can be cascaded

-to create a more complex waveform:

-.EX

-	sox -n output.au synth 0\*d5 sine 200-500 \(rs

-		synth 0\*d5 sine fmod 700-100

-.EE

-Frequencies can also be given as a number of musical semitones relative

-to `middle A' (440\ Hz) by prefixing a `%' character;  for example, the

-following could be used to help tune a guitar's `E' strings:

-.EX

-	play -n synth sine %-17

-.EE

-.B N.B.

-This effect generates audio at maximum volume (0dBFS), which means that there

-is a high chance of clipping when using the audio subsequently, so

-in most cases, you will want to follow this effect with the \fBgain\fR

-effect to prevent this from happening. (See also

-.B Clipping

-in

-.BR sox (1).)

-.SP

-A detailed description of each

-.B synth

-parameter follows:

-.SP

-\fIlen\fR is the length of audio to synthesise expressed as a time

-or as a number of samples;

-0=inputlength, default=0.

-.SP

-The format for specifying lengths in time is hh:mm:ss.frac.  The format

-for specifying sample counts is the number of samples with the letter

-`s' appended to it.

-.SP

-\fItype\fR is one of sine, square, triangle, sawtooth, trapezium, exp,

-[white]noise, pinknoise, brownnoise; default=sine

-.SP

-\fIcombine\fR is one of create, mix, amod (amplitude modulation), fmod

-(frequency modulation); default=create

-.SP

-\fIfreq\fR/\fIfreq2\fR are the frequencies at the beginning/end of

-synthesis in Hz or, if preceded with `%', semitones relative to A

-(440\ Hz); for both, default=%0.  If

-.I freq2

-is given, then

-.I len

-must also have been given and the generated tone will be swept between

-the given frequencies.  The two given frequencies must be separated by

-one of the characters `:', `+', `/', or `\-'.  This character is used to

-specify the sweep function as follows:

-.RS

-.IP \fB:\fR

-Linear: the tone will change by a fixed number of hertz per second.

-.IP \fB+\fR

-Square: a second-order function is used to change the tone.

-.IP \fB/\fR

-Exponential: the tone will change by a fixed number of semitones per second.

-.IP \fB\-\fR

-Exponential: as `/', but initial phase always zero, and stepped (less

-smooth) frequency changes.

-.RE

-.TP

-\

-Not used for noise.

-.SP

-\fIoff\fR is the bias (DC-offset) of the signal in percent; default=0.

-.SP

-\fIph\fR is the phase shift in percentage of 1 cycle; default=0.  Not

-used for noise.

-.SP

-\fIp1\fR is the percentage of each cycle that is `on' (square), or

-`rising' (triangle, exp, trapezium); default=50 (square, triangle, exp),

-default=10 (trapezium).

-.SP

-\fIp2\fR (trapezium): the percentage through each cycle at which `falling'

-begins; default=50. exp: the amplitude in percent; default=100.

-.SP

-\fIp3\fR (trapezium): the percentage through each cycle at which `falling'

-ends; default=60.

-.TP

-\fBtempo \fR[\fB\-q\fR] \fIfactor\fR [\fIsegment\fR [\fIsearch\fR [\fIoverlap\fR]]]

-Change the audio tempo (but not its pitch).

-The audio is chopped up into segments which are then shifted in the time

-domain and overlapped (cross-faded) at points where their waveforms are

-most similar (as determined by measurement of `least squares').

-.SP

-By default, linear searches are used to find the best overlapping

-points; if the optional

-.B \-q

-parameter is given, tree searches are used instead, giving a quicker,

-but possibly lower quality, result.

-.SP

-.I factor

-gives the ratio of new tempo to the old tempo, so e.g. 1.1 speeds up the

-tempo by 10%, and 0.9 slows it down by 10%.

-.SP

-The optional

-.I segment

-parameter selects the algorithm's segment size in milliseconds.  The

-default value is 82 and is typically suited to making small changes to

-the tempo of music; for larger changes (e.g. a factor of 2), 50\ ms may

-give a better result.  When changing the tempo of speech, a segment size

-of around 30\ ms often works well.

-.SP

-The optional

-.I search

-parameter gives the audio length in milliseconds (default 14) over which

-the algorithm will search for overlapping points.  Larger values use

-more processing time and do not necessarily produce better results.

-.SP

-The optional

-.I overlap

-parameter gives the segment overlap length in milliseconds (default 12).

-.SP

-See also

-.B speed

-for an effect that changes tempo and pitch together, and

-.B pitch

-for an effect that changes pitch without changing tempo.

-.TP

-\fBtreble \fIgain\fR [\fIfrequency\fR[\fBk\fR]\fR [\fIwidth\fR[\fBs\fR\^|\^\fBh\fR\^|\^\fBk\fR\^|\^\fBo\fR\^|\^\fBq\fR]]]

-Apply a treble tone-control effect.

-See the description of the \fBbass\fR effect for details.

-.TP

-\fBtremolo \fIspeed\fR [\fIdepth\fR]

-Apply a tremolo (low frequency amplitude modulation) effect to the audio.

-The tremolo frequency in Hz is given by

-.IR speed ,

-and the depth as a percentage by

-.I depth

-(default 40).

-.SP

-Note: This effect is a special case of the

-.B synth

-effect.

-.TP

-\fBtrim \fIstart\fR [\fIlength\fR]

-Trim can trim off unwanted audio from the beginning and end of the

-audio.  Audio is not sent to the output stream until

-the \fIstart\fR location is reached.

-.SP

-The optional \fIlength\fR parameter tells the number of samples to output

-after the \fIstart\fR sample and is used to trim off the back side of the

-audio.  Using a value of 0 for the \fIstart\fR parameter will allow

-trimming off the back side only.

-.SP

-Both options can be specified using either an amount of time or an

-exact count of samples.  The format for specifying lengths in time is

-hh:mm:ss.frac.  A start value of 1:30\*d5 will not start until 1 minute,

-thirty and \(12 seconds into the audio.  The format for specifying

-sample counts is the number of samples with the letter `s' appended to

-it.  A value of 8000s will wait until 8000 samples are read before

-starting to process audio.

-.TP

-\fBvol \fIgain\fR [\fItype\fR [\fIlimitergain\fR]]

-Apply an amplification or an attenuation to the audio signal.

-Unlike the

-.B \-v

-option (which is used for balancing multiple input files as they enter the

-SoX effects processing chain),

-.B vol

-is an effect like any other so can be applied anywhere, and several times

-if necessary, during the processing chain.

-.SP

-The amount to change the volume is given by

-.I gain

-which is interpreted, according to the given \fItype\fR, as follows: if

-.I type

-is \fBamplitude\fR (or is omitted), then

-.I gain

-is an amplitude (i.e. voltage or linear) ratio,

-if \fBpower\fR, then a power (i.e. wattage or voltage-squared) ratio,

-and if \fBdB\fR, then a power change in dB.

-.SP

-When

-.I type

-is \fBamplitude\fR or \fBpower\fR, a

-.I gain

-of 1 leaves the volume unchanged,

-less than 1 decreases it,

-and greater than 1 increases it;

-a negative

-.I gain

-inverts the audio signal in addition to adjusting its volume.

-.SP

-When

-.I type

-is \fBdB\fR, a

-.I gain

-of 0 leaves the volume unchanged,

-less than 0 decreases it,

-and greater than 0 increases it.

-.SP

-See [4]

-for a detailed discussion on electrical (and hence audio signal)

-voltage and power ratios.

-.SP

-Beware of

-.B Clipping

-when the increasing the volume.

-.SP

-The

-.I gain

-and the

-.I type

-parameters can be concatenated if desired, e.g.

-.BR "vol 10dB" .

-.SP

-An optional \fIlimitergain\fR value can be specified and should be a

-value much less

-than 1 (e.g. 0\*d05 or 0\*d02) and is used only on peaks to prevent clipping.

-Not specifying this parameter will cause no limiter to be used.  In verbose

-mode, this effect will display the percentage of the audio that needed to be

-limited.

-.SP

-See also

-.B compand

-for a dynamic-range compression/expansion/limiting effect.

-.SS Deprecated Effects

-The following effects have been renamed or have their functionality

-included in another effect; they continue to work in this version of

-SoX but may be removed in future.

-.TP

-\fBkey \fR[\fB\-q\fR] \fIshift\fR [\fIsegment\fR [\fIsearch\fR [\fIoverlap\fR]]]

-Change the audio key (i.e. pitch but not tempo).

-This is just an alias for the

-.B pitch

-effect.

-.TP

-\fBpan \fIdirection\fR

-Pan the audio from one channel to another.  This is done by

-changing the volume of the input channels so that it fades out on one

-channel and fades-in on another.  If the number of input channels is

-different then the number of output channels then this effect tries to

-intelligently handle this.  For instance, if the input contains 1 channel

-and the output contains 2 channels, then it will create the missing channel

-itself.  The

-.I direction

-is a value from \-1 to 1.  \-1 represents

-far left and 1 represents far right.  Numbers in between will start the

-pan effect without totally muting the opposite channel.

-.TP

-\fBpolyphase\fR [\fB\-w nut\fR\^|\^\fBham\fR] [\fB\-width \fIn\fR] [\fB\-cut-off \fIc\fR]

-Change the sampling rate using `polyphase interpolation', a DSP algorithm.

-\fBpolyphase\fR copes with only certain rational fraction resampling ratios,

-and, compared with the \fBrate\fR effect, is generally slow, memory intensive,

-and has poorer stop-band rejection.

-.SP

-If the \fB\-w\fR parameter is \fBnut\fR, then a Nuttall (~90 dB

-stop-band) window will be used; \fBham\fR selects a Hamming (~43

-dB stop-band) window.  The default is Nuttall.

-.SP

-The \fB\-width\fR parameter specifies the (approximate) width of the filter. The default is 1024 samples, which produces reasonable results.

-.SP

-The \fB\-cut-off\fR value (\fIc\fR) specifies the filter cut-off frequency in terms of fraction of

-frequency bandwidth, also know as the Nyquist frequency.  See

-the \fBresample\fR effect for

-further information on Nyquist frequency.  If up-sampling, then this is the

-fraction of the original signal

-that should go through.  If down-sampling, this is the fraction of the

-signal left after down-sampling.  The default is 0\*d95.

-.SP

-See also

-.BR rate ,

-.B rabbit

-and

-.B resample

-for other sample-rate changing effects.

-.TP

-\fBrabbit\fR [\fB\-c0\fR\^|\^\fB\-c1\fR\^|\^\fB\-c2\fR\^|\^\fB\-c3\fR\^|\^\fB\-c4\fR]

-Change the sampling rate using libsamplerate, also known as `Secret Rabbit

-Code'.  This effect is optional and, due to licence issues,

-is not included in all versions of SoX.

-Compared with the \fBrate\fR effect, \fBrabbit\fR is very slow.

-.SP

-See http://www.mega-nerd.com/SRC for details of the algorithms.  Algorithms

-0 through 2 are progressively faster and lower quality versions of the

-sinc algorithm; the default is \fB\-c0\fR.

-Algorithm 3 is zero-order hold, and 4 is linear interpolation.

-.SP

-See also

-.BR rate ,

-.B polyphase

-and

-.B resample

-for other sample-rate changing effects, and see

-\fBresample\fR for more discussion of resampling.

-.TP

-\fBresample\fR [\fB\-qs\fR\^|\^\fB\-q\fR\^|\^\fB\-ql\fR] [\fIrolloff\fR [\fIbeta\fR]]

-Change the sampling rate using simulated analog filtration.

-Compared with the \fBrate\fR effect, \fBresample\fR is slow, and has poorer

-stop-band rejection.

-Only the low quality option works with all resampling ratios.

-.SP

-By default, linear interpolation of the filter coefficients is used,

-with a window width about 45 samples at the lower of the two rates.

-This gives an accuracy of about 16 bits, but insufficient stop-band rejection

-in the case that you want to have roll-off greater than about 0\*d8 of

-the Nyquist frequency.

-.SP

-The \fB\-q*\fR options will change the default values for roll-off and beta

-as well as use quadratic interpolation of filter

-coefficients, resulting in about 24 bits precision.

-The \fB\-qs\fR, \fB\-q\fR, or \fB\-ql\fR options specify increased accuracy

-at the cost of lower execution speed.  It is optional to specify

-roll-off and beta parameters when using the \fB\-q*\fR options.

-.SP

-Following is a table of the reasonable defaults which are built-in to

-SoX:

-.SP

-.TS

-center box;

-cB cB cB cB cB

-c c n c c

-cB c n c c.

-Option	Window	Roll-off	Beta	Interpolation

-(none)	45	0\*d80	16	linear

-\-qs	45	0\*d80	16	quadratic

-\-q	75	0\*d875	16	quadratic

-\-ql	149	0\*d94	16	quadratic

-.TE

-.DT

-.SP

-\fB\-qs\fR, \fB\-q\fR, or \fB\-ql\fR use window lengths of 45, 75, or 149

-samples, respectively, at the lower sample-rate of the two files.

-This means progressively sharper stop-band rejection, at proportionally

-slower execution times.

-.SP

-\fIrolloff\fR refers to the cut-off frequency of the

-low pass filter and is given in terms of the

-Nyquist frequency for the lower sample rate.  rolloff therefore should

-be something between 0 and 1, in practise 0\*d8\-0\*d95.  The defaults are

-indicated above.

-.SP

-The \fINyquist frequency\fR is equal to half the sample rate.  Logically,

-this is because the A/D converter needs at least 2 samples to detect 1

-cycle at the Nyquist frequency.  Frequencies higher then the Nyquist

-will actually appear as lower frequencies to the A/D converter and

-is called aliasing.  Normally, A/D converts run the signal through

-a lowpass filter first to avoid these problems.

-.SP

-Similar problems will happen in software when reducing the sample rate of

-an audio file (frequencies above the new Nyquist frequency can be aliased

-to lower frequencies).  Therefore, a good resample effect

-will remove all frequency information above the new Nyquist frequency.

-.SP

-The \fIrolloff\fR refers to how close to the Nyquist frequency this cut-off

-is, with closer being better.  When increasing the sample rate of an

-audio file you would not expect to have any frequencies exist that are

-past the original Nyquist frequency.  Because of resampling properties, it

-is common to have aliasing artifacts created above the old

-Nyquist frequency.  In that case the \fIrolloff\fR refers to how close

-to the original Nyquist frequency to use a highpass filter to remove

-these artifacts, with closer also being better.

-.SP

-The \fIbeta\fR, if unspecified, defaults to 16.  This selects a Kaiser window.

-You can select a Nuttall window by specifying anything \(<= 2 here.

-For more discussion of beta, look under the \fBfilter\fR effect.

-.SP

-Default parameters are, as indicated above, Kaiser window of length 45,

-roll-off 0\*d80, beta 16, linear interpolation.

-.SP

-Note: \fB\-qs\fR is only slightly slower, but more accurate for

-16-bit or higher precision.

-.SP

-Note: In many cases of up-sampling, no interpolation is needed,

-as exact filter coefficients can be computed in a reasonable amount of space.

-To be precise, this is done when both input-rate < output-rate, and

-output-rate \(di gcd(input-rate, output-rate) \(<= 511.

-.SP

-See also

-.BR rate ,

-.B polyphase

-and

-.B rabbit

-for other sample-rate changing effects.

-There is a detailed analysis of

-\fBresample\fR and \fBpolyphase\fR at

-http://leute.server.de/wilde/resample.html; see \fBrabbit\fR for a

-pointer to its own documentation.

-.SH SEE ALSO

-.BR sox (1),

-.BR soxi (1),

-.BR soxformat (7),

-.BR libsox (3),

-.SP

-The SoX web page at http://sox.sourceforge.net

-.br

-SoX scripting examples at http://sox.sourceforge.net/Docs/Scripts

-.SS References

-.TP

-[1]

-R. Bristow-Johnson,

-.IR "Cookbook formulae for audio EQ biquad filter coefficients" ,

-http://musicdsp.org/files/Audio-EQ-Cookbook.txt

-.TP

-[2]

-Wikipedia,

-.IR "Q-factor" ,

-http://en.wikipedia.org/wiki/Q_factor

-.TP

-[3]

-Scott Lehman,

-.IR "Effects Explained" ,

-http://harmony-central.com/Effects/effects-explained.html

-.TP

-[4]

-Wikipedia,

-.IR "Decibel" ,

-http://en.wikipedia.org/wiki/Decibel

-.TP

-[5]

-Richard Furse,

-.IR "Linux Audio Developer's Simple Plugin API" ,

-http://www.ladspa.org

-.TP

-[6]

-Richard Furse,

-.IR "Computer Music Toolkit" ,

-http://www.ladspa.org/cmt

-.TP

-[7]

-Steve Harris,

-.IR "LADSPA plugins" ,

-http://plugin.org.uk

-.SH AUTHORS

-Chris Bagwell (cbagwell@users.sourceforge.net).

-Other authors and contributors are listed in the AUTHORS file that

-is distributed with the source code.

--- a/soxformat.7

+++ b/soxformat.7

@@ -593,7 +593,6 @@

 .SH SEE ALSO

 .BR sox (1),

 .BR soxi (1),

-.BR soxeffect (7),

 .BR libsox (3),

 .BR octave (1),

 .BR wget (1)

--- a/soxi.1

+++ b/soxi.1

@@ -29,11 +29,11 @@

.SP

.fi

..

-.TH SoXI 1 "July 27, 2008" "soxi" "Sound eXchange"

+.TH SoXI 1 "September 22, 2008" "soxi" "Sound eXchange"

 .SH NAME

 SoXI \- Sound eXchange Information, display sound file metadata

 .SH SYNOPSIS

-\fBsoxi\fR [\fB\-r\fR\^|\^\fB\-c\fR\^|\^\fB\-s\fR\^|\^\fB\-d\fR\^|\^\fB\-b\fR\^|\^\fB\-e\fR\^|\^\fB\-a\fR] \fIinfile1\fR ...

+\fBsoxi\fR [\fB\-V\fR[\fIlevel\fR]] [\fB\-t\fR\^|\^\fB\-r\fR\^|\^\fB\-c\fR\^|\^\fB\-s\fR\^|\^\fB\-d\fR\^|\^\fB\-b\fR\^|\^\fB\-e\fR\^|\^\fB\-a\fR] \fIinfile1\fR ...

 .SH DESCRIPTION

 Displays information from the header of a given audio file or files.

 Supported audio file types are listed and described in

@@ -43,9 +43,17 @@

 is intended for use only with audio files with a self-describing header.

.SP

 By default, as much information as is available is shown.

-The options below may be used to select a single piece of information to show.

+An option may be given to select a single piece of information to show.

 .SH OPTIONS

.TP

+\fB\-V\fR

+Set verbosity. See

+.BR sox (1)

+for details.

+.TP

+\fB\-t\fR

+Show detected file-type.

+.TP

 \fB\-r\fR

 Show sample-rate.

.TP

@@ -73,7 +81,6 @@

 .SH SEE ALSO

 .BR sox (1),

 .BR soxformat (7),

-.BR soxeffect (7),

 .BR libsox (3)

.SP

 The SoX web site at http://sox.sourceforge.net

--- a/src/formats.c

+++ b/src/formats.c

@@ -312,21 +312,6 @@

   return SOX_SUCCESS;

-static char const * find_file_extension(char const * pathname)

-{

-  /* First, chop off any path portions of filename.  This

-   * prevents the next search from considering that part. */

-  char const * result = LAST_SLASH(pathname);

-  if (!result)

-    result = pathname;

-  /* Now look for an filename extension */

-  result = strrchr(result, '.');

-  if (result)

-    ++result;

-  return result;

-}

 static sox_bool is_uri(char const * text)

   if (!isalpha((int)*text))

--- a/src/sox.c

+++ b/src/sox.c

@@ -262,6 +262,7 @@

   static char const * const no_yes[] = {"no", "yes"};

   FILE * const output = sox_mode == sox_soxi? stdout : stderr;

+  char const * filetype = find_file_extension(ft->filename);

   if (sox_mode == sox_play && sox_globals.verbosity < 3) {

     play_file_info(ft, f, full);

@@ -270,7 +271,7 @@

   fprintf(output, "\n%s: '%s'",

     ft->mode == 'r'? "Input File     " : "Output File    ", ft->filename);

-  if (strcmp(ft->filename, "-") == 0 || (ft->handler.flags & SOX_FILE_DEVICE))

+  if (!filetype || strcasecmp(filetype, ft->filetype))

     fprintf(output, " (%s)", ft->handler.names[0]);

   fprintf(output, "\n");

@@ -563,7 +564,7 @@

     sox_fail("Failed creating effect.  Out of Memory?\n");

   if (effp->handler.flags & SOX_EFF_DEPRECATED)

-    sox_warn("effect `%s' is deprecated; see soxeffect(7) for an alternative",

+    sox_warn("effect `%s' is deprecated; see sox(1) for an alternative",

              effp->handler.name);

   if (sox_effect_options(effp, argc, argv) == SOX_EOF)

@@ -750,7 +751,7 @@

         sox_fail("Failed creating effect.  Out of Memory?\n");

       if (effp->handler.flags & SOX_EFF_DEPRECATED)

-        sox_warn("effect `%s' is deprecated; see soxeffect(7) for an alternative",

+        sox_warn("effect `%s' is deprecated; see sox(1) for an alternative",

                  effp->handler.name);

       /* The failing effect should have displayed an error message */

@@ -1967,8 +1968,8 @@

     add_file(&opts, device_name(opts.filetype));

-typedef enum {

-  full, rate, channels, samples, duration, bits, encoding, annotation} soxi_t;

+typedef enum {Full,

+  Type, Rate, Channels, Samples, Duration, Bits, Encoding, Annotation} soxi_t;

 static int soxi1(soxi_t * type, char * filename)

@@ -1979,18 +1980,19 @@

     return 1;

   ws = ft->signal.length / max(ft->signal.channels, 1);

   switch (*type) {

-    case rate: printf("%g\n", ft->signal.rate); break;

-    case channels: printf("%u\n", ft->signal.channels); break;

-    case samples: printf("%lu\n", (unsigned long)ws); break;

-    case duration: printf("%s\n", str_time((double)ws / max(ft->signal.rate, 1))); break;

-    case bits: printf("%u\n", ft->encoding.bits_per_sample); break;

-    case encoding: printf("%s\n", sox_encodings_info[ft->encoding.encoding].desc); break;

-    case annotation: if (ft->oob.comments) {

+    case Type: printf("%s\n", ft->filetype); break;

+    case Rate: printf("%g\n", ft->signal.rate); break;

+    case Channels: printf("%u\n", ft->signal.channels); break;

+    case Samples: printf("%lu\n", (unsigned long)ws); break;

+    case Duration: printf("%s\n", str_time((double)ws / max(ft->signal.rate, 1))); break;

+    case Bits: printf("%u\n", ft->encoding.bits_per_sample); break;

+    case Encoding: printf("%s\n", sox_encodings_info[ft->encoding.encoding].desc); break;

+    case Annotation: if (ft->oob.comments) {

       sox_comments_t p = ft->oob.comments;

       do printf("%s\n", *p); while (*++p);

     break;

-    case full: display_file_info(ft, NULL, sox_false); break;

+    case Full: display_file_info(ft, NULL, sox_false); break;

   return !!sox_close(ft);

@@ -1997,8 +1999,8 @@

 static int soxi(int argc, char * const * argv)

-  static char const opts[] = "rcsdbea?V::";

-  soxi_t type = full;

+  static char const opts[] = "trcsdbea?V::";

+  soxi_t type = Full;

   int opt, num_errors = 0;

   while ((opt = getopt(argc, argv, opts)) > 0) /* act only on last option */

@@ -2016,8 +2018,8 @@

         sox_globals.verbosity = (unsigned)i;

     } else type = 1 + (strchr(opts, opt) - opts);

-  if (type > annotation)

-    printf("Usage: soxi [-V] [-r|-c|-s|-d|-b|-e|-a] infile1 ...\n");

+  if (type > Annotation)

+    printf("Usage: soxi [-V[level]] [-t|-r|-c|-s|-d|-b|-e|-a] infile1 ...\n");

   else for (; optind < argc; ++optind) {

     if (sox_is_playlist(argv[optind]))

       num_errors += (sox_parse_playlist((sox_playlist_callback_t)soxi1, &type, argv[optind]) != SOX_SUCCESS);

--- a/src/util.c

+++ b/src/util.c

@@ -36,6 +36,21 @@

 #endif

+char const * find_file_extension(char const * pathname)

+{

+  /* First, chop off any path portions of filename.  This

+   * prevents the next search from considering that part. */

+  char const * result = LAST_SLASH(pathname);

+  if (!result)

+    result = pathname;

+  /* Now look for an filename extension */

+  result = strrchr(result, '.');

+  if (result)

+    ++result;

+  return result;

+}

 enum_item const * find_enum_text(char const * text, enum_item const * enum_items)

   enum_item const * result = NULL; /* Assume not found */

--- a/src/util.h

+++ b/src/util.h

@@ -99,6 +99,8 @@

 #define MACHINE_IS_LITTLEENDIAN 1

 #endif

+char const * find_file_extension(char const * pathname);

 typedef struct {char const *text; unsigned value;} enum_item;

 #define ENUM_ITEM(prefix, item) {#item, prefix##item},

--

⑨