ref: 01dddda9b91c640637d95d3ac0d0c01abc437648
parent: cc9a8199368f829a4eb666232fb60384fee3cfec
author: cbagwell <cbagwell>
date: Sun Aug 26 19:19:15 EDT 2001
Made auto effect default input handler. Merged mix.c into sox.c.
--- a/libst.txt
+++ b/libst.txt
@@ -1,9 +1,7 @@
-
-
-
ST(3) ST(3)
+
NAME
libst - Sound Tools : sound sample file and effects
libraries.
@@ -58,18 +56,6 @@
format operates from two data structures: a format struc�
ture, and a private structure.
-
-
-
- October 15 1996 1
-
-
-
-
-
-ST(3) ST(3)
-
-
The format structure contains a list of control parameters
for the sample: sampling rate, data size (bytes, words,
floats, etc.), encoding (unsigned, signed, logarithmic),
@@ -124,29 +110,23 @@
getopts is called with a character string
argument list for the effect.
-
-
-
- October 15 1996 2
-
-
-
-
-
-ST(3) ST(3)
-
-
start is called with the signal parameters
for the input and output streams.
flow is called with input and output data
buffers, and (by reference) the input
- and output data sizes. It processes
- the input buffer into the output
- buffer, and sets the size variables to
- the numbers of samples actually pro�
- cessed. It is under no obligation to
- fill the output buffer.
+ and output data buffer sizes. It pro�
+ cesses the input buffer into the out�
+ put buffer, and sets the size vari�
+ ables to the numbers of samples actu�
+ ally processed. It is under no obli�
+ gation to read from the input buffer
+ or write to the output buffer during
+ the same call. If the call returns
+ ST_EOF then this should be used as an
+ indication that this effect will no
+ longer read any data and can be used
+ to switch to drain mode sooner.
drain is called after there are no more
input data samples. If the effect
@@ -190,18 +170,6 @@
once in a program.
The program/library interface is pretty weak. There's too
-
-
-
- October 15 1996 3
-
-
-
-
-
-ST(3) ST(3)
-
-
much ad-hoc information which a program is supposed to
gather up. Sound Tools wants to be an object-oriented
dataflow architecture.
@@ -208,57 +176,4 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- October 15 1996 4
-
-
+ October 15 1996 ST(3)
--- a/sox.1
+++ b/sox.1
@@ -287,8 +287,11 @@
is mangling your sound samples.
.SH FILE TYPES
.I SoX
-uses the file extension of the input and output file to determine what
-type of file format to use. This can be overridden by specifying the
+attempts to determine the file type of input files automatically by looking
+at the header of the audio file. When it is unable to detect the file
+type or if its an output file
+then it uses the file extension of the file to determine what type of file
+format handler to use. This can be overridden by specifying the
"-t" option on the command line.
.P
The input and output files may be read from standard in and out. This
--- a/sox.txt
+++ b/sox.txt
@@ -1,9 +1,7 @@
-
-
-
SoX(1) SoX(1)
+
NAME
sox - Sound eXchange : universal sound sample translator
@@ -26,7 +24,7 @@
[ -c channels ] [ -x ] [ -e ]
Effects:
- avg [ -l | -r ]
+ avg [ -l | -r | -f | -b | n,n,...,n ]
band [ -n ] center [ width ]
bandpass frequency bandwidth
bandreject frequency bandwidth
@@ -34,7 +32,7 @@
-s | -t [ delay decay speed depth -s | -t ]
compand attack1,decay1[,attack2,decay2...]
in-dB1,out-dB1[,in-dB2,out-dB2...]
- [ gain ] [ initial-volume ]
+ [ gain [ initial-volume [ delay ] ] ]
copy
cut
deemph
@@ -58,23 +56,11 @@
polyphase [ -w < nut / ham > ]
[ -width < long / short / # > ]
[ -cutoff # ]
-
-
-
- July 24, 2000 1
-
-
-
-
-
-SoX(1) SoX(1)
-
-
rate
resample [ -qs | -q | -ql ] [ rolloff [ beta ] ]
reverb gain-out reverb-time delay [ delay ... ]
reverse
- speed factor
+ speed [ -c ] factor
split
stat [ -s n ] [ -rms ] [ -v ] [ -d ]
stretch [ factor [ window fade shift fading ]
@@ -125,17 +111,6 @@
sox file.au file.wav
-
-
- July 24, 2000 2
-
-
-
-
-
-SoX(1) SoX(1)
-
-
translates a sound file in SUN Sparc .AU format into a
Microsoft .WAV file, while
@@ -190,18 +165,6 @@
fidelity is not as important. When uncompressed
it has roughly the precision of 16-bit PCM
audio. Popular version of ADPCM include G.726,
-
-
-
- July 24, 2000 3
-
-
-
-
-
-SoX(1) SoX(1)
-
-
MS ADPCM, and IMA ADPCM. The -a flag has dif�
ferent meanings in different file handlers. In
.wav files it represents MS ADPCM files, in all
@@ -256,18 +219,6 @@
-p Run in preview mode and run fast. This will
somewhat speed up sox when the output format has
a different number of channels and a different
-
-
-
- July 24, 2000 4
-
-
-
-
-
-SoX(1) SoX(1)
-
-
rate than the input file. Currently, this
defaults to using the rate effect instead of the
resample effect for sample rate changes.
@@ -288,184 +239,165 @@
your sound samples.
FILE TYPES
- SoX uses the file extension of the input and output file
- to determine what type of file format to use. This can be
- overridden by specifying the "-t" option on the command
- line.
+ SoX attempts to determine the file type of input files
+ automatically by looking at the header of the audio file.
+ When it is unable to detect the file type or if its an
+ output file then it uses the file extension of the file to
+ determine what type of file format handler to use. This
+ can be overridden by specifying the "-t" option on the
+ command line.
- The input and output files may be read from standard in
- and out. This is done by specifying '-' as the filename.
+ The input and output files may be read from standard in
+ and out. This is done by specifying '-' as the filename.
- File formats which have headers are checked, if that
- header doesn't seem right, the program exits with an
+ File formats which have headers are checked, if that
+ header doesn't seem right, the program exits with an
appropriate message.
The following file formats are supported:
- .8svx Amiga 8SVX musical instrument description for�
+ .8svx Amiga 8SVX musical instrument description for�
mat.
- .aiff AIFF files used on Apple IIc/IIgs and SGI.
- Note: the AIFF format supports only one SSND
+ .aiff AIFF files used on Apple IIc/IIgs and SGI.
+ Note: the AIFF format supports only one SSND
chunk. It does not support multiple sound
- chunks, or the 8SVX musical instrument descrip�
+ chunks, or the 8SVX musical instrument descrip�
tion format. AIFF files are multimedia archives
- and can have multiple audio and picture chunks.
- You may need a separate archiver to work with
+ and can have multiple audio and picture chunks.
+ You may need a separate archiver to work with
them.
.au SUN Microsystems AU files. There are apparently
- many types of .au files; DEC has invented its
- own with a different magic number and word
+ many types of .au files; DEC has invented its
+ own with a different magic number and word
order. The .au handler can read these files but
- will not write them. Some .au files have valid
- AU headers and some do not. The latter are
- probably original SUN u-law 8000 hz samples.
-
-
-
- July 24, 2000 5
-
-
-
-
-
-SoX(1) SoX(1)
-
-
- These can be dealt with using the .ul format
+ will not write them. Some .au files have valid
+ AU headers and some do not. The latter are
+ probably original SUN u-law 8000 hz samples.
+ These can be dealt with using the .ul format
(see below).
.avr Audio Visual Research
- The AVR format is produced by a number of com�
+ The AVR format is produced by a number of com�
mercial packages on the Mac.
.cdr CD-R
- CD-R files are used in mastering music on Com�
- pact Disks. The audio data on a CD-R disk is a
- raw audio file with a format of stereo 16-bit
+ CD-R files are used in mastering music on Com�
+ pact Disks. The audio data on a CD-R disk is a
+ raw audio file with a format of stereo 16-bit
signed samples at a 44khz sample rate. There is
- a special blocking/padding oddity at the end of
- the audio file and is why it needs its own han�
+ a special blocking/padding oddity at the end of
+ the audio file and is why it needs its own han�
dler.
.cvs Continuously Variable Slope Delta modulation
- Used to compress speech audio for applications
+ Used to compress speech audio for applications
such as voice mail.
.dat Text Data files
- These files contain a textual representation of
- the sample data. There is one line at the
+ These files contain a textual representation of
+ the sample data. There is one line at the
beginning that contains the sample rate. Subse�
- quent lines contain two numeric data items: the
+ quent lines contain two numeric data items: the
time since the beginning of the first sample and
the sample value. Values are normalized so that
- the maximum and minimum are 1.00 and -1.00.
- This file format can be used to create data
- files for external programs such as FFT analyz�
- ers or graph routines. SoX can also convert a
- file in this format back into one of the other
+ the maximum and minimum are 1.00 and -1.00.
+ This file format can be used to create data
+ files for external programs such as FFT analyz�
+ ers or graph routines. SoX can also convert a
+ file in this format back into one of the other
file formats.
.gsm GSM 06.10 Lossy Speech Compression
- A standard for compressing speech which is used
- in the Global Standard for Mobil telecommunica�
- tions (GSM). Its good for its purpose, shrink�
- ing audio data size, but it will introduce lots
- of noise when a given sound sample is encoded
+ A standard for compressing speech which is used
+ in the Global Standard for Mobil telecommunica�
+ tions (GSM). Its good for its purpose, shrink�
+ ing audio data size, but it will introduce lots
+ of noise when a given sound sample is encoded
and decoded multiple times. This format is used
- by some voice mail applications. It is rather
+ by some voice mail applications. It is rather
CPU intensive.
GSM in sox is optional and requires access to an
- external GSM library. To see if there is sup�
- port for gsm run sox -h and look for it under
+ external GSM library. To see if there is sup�
+ port for gsm run sox -h and look for it under
the list of supported file formats.
- .hcom Macintosh HCOM files. These are (apparently)
+ .hcom Macintosh HCOM files. These are (apparently)
Mac FSSD files with some variant of Huffman com�
- pression. The Macintosh has wacky file formats
- and this format handler apparently doesn't han�
- dle all the ones it should. Mac users will need
- your usual arsenal of file converters to deal
+ pression. The Macintosh has wacky file formats
+ and this format handler apparently doesn't
+ handle all the ones it should. Mac users will
+ need your usual arsenal of file converters to
+ deal with an HCOM file under Unix or DOS.
-
-
- July 24, 2000 6
-
-
-
-
-
-SoX(1) SoX(1)
-
-
- with an HCOM file under Unix or DOS.
-
.maud An Amiga format
An IFF-conform sound file type, registered by MS
- MacroSystem Computer GmbH, published along with
- the "Toccata" sound-card on the Amiga. Allows
- 8bit linear, 16bit linear, A-Law, u-law in mono
+ MacroSystem Computer GmbH, published along with
+ the "Toccata" sound-card on the Amiga. Allows
+ 8bit linear, 16bit linear, A-Law, u-law in mono
and stereo.
+ .ogg Ogg Vorbis Compressed Audio.
+ Ogg Vorbis is a open, patent-free codec designed
+ for compressing music and streaming audio. It
+ is similar to MP3, VQF, AAC, and other lossy
+ formats. sox can decode all types of Ogg Vorbis
+ files, but can only encode at 128 kbps. Decod�
+ ing is somewhat CPU intensive and encoding is
+ very CPU intensive.
+ Ogg Vorbis in sox is optional and requires
+ access to external Ogg Vorbis libraries. To see
+ if there is support for Ogg Vorbis run sox -h
+ and look for it under the list of supported file
+ formats as "vorbis".
+
ossdsp OSS /dev/dsp device driver
This is a pseudo-file type and can be optionally
- compiled into Sox. Run sox -h to see if you
- have support for this file type. When this
- driver is used it allows you to open up the OSS
- /dev/dsp file and configure it to use the same
- data format as passed in to /fBSoX. It works
- for both playing and recording sound samples.
- When playing sound files it attempts to set up
- the OSS driver to use the same format as the
- input file. It is suggested to always override
- the output values to use the highest quality
+ compiled into Sox. Run sox -h to see if you
+ have support for this file type. When this
+ driver is used it allows you to open up the OSS
+ /dev/dsp file and configure it to use the same
+ data format as passed in to /fBSoX. It works
+ for both playing and recording sound samples.
+ When playing sound files it attempts to set up
+ the OSS driver to use the same format as the
+ input file. It is suggested to always override
+ the output values to use the highest quality
samples your sound card can handle. Example: -t
ossdsp -w -s /dev/dsp
.sf IRCAM Sound Files.
- Sound Files are used by academic music software
- such as the CSound package, and the MixView
+ Sound Files are used by academic music software
+ such as the CSound package, and the MixView
sound sample editor.
.sph
- SPHERE (SPeech HEader Resources) is a file for�
+ SPHERE (SPeech HEader Resources) is a file for�
mat defined by NIST (National Institute of Stan�
- dards and Technology) and is used with speech
- audio. SoX can read these files when they con�
- tain ulaw and PCM data. It will ignore any
- header information that says the data is com�
+ dards and Technology) and is used with speech
+ audio. SoX can read these files when they con�
+ tain ulaw and PCM data. It will ignore any
+ header information that says the data is com�
pressed using shorten compression and will treat
the data as either ulaw or PCM. This will allow
- SoX and the command line shorten program to be
- ran together using pipes to uncompress the data
- and then pass the result to SoX for processing.
+ SoX and the command line shorten program to be
+ ran together using pipes to uncompress the data
+ and then pass the result to SoX for processing.
.smp Turtle Beach SampleVision files.
- SMP files are for use with the PC-DOS package
- SampleVision by Turtle Beach Softworks. This
- package is for communication to several MIDI
- samplers. All sample rates are supported by the
- package, although not all are supported by the
- samplers themselves. Currently loop points are
+ SMP files are for use with the PC-DOS package
+ SampleVision by Turtle Beach Softworks. This
+ package is for communication to several MIDI
+ samplers. All sample rates are supported by the
+ package, although not all are supported by the
+ samplers themselves. Currently loop points are
ignored.
.snd
- Under DOS this file format is the same as the
- .sndt format. Under all other platforms it is
-
-
-
- July 24, 2000 7
-
-
-
-
-
-SoX(1) SoX(1)
-
-
+ Under DOS this file format is the same as the
+ .sndt format. Under all other platforms it is
the same as the .au format.
.sndt SoundTool files.
@@ -473,157 +405,156 @@
sunau Sun /dev/audio device driver
This is a pseudo-file type and can be optionally
- compiled into Sox. Run sox -h to see if you
- have support for this file type. When this
- driver is used it allows you to open up a Sun
+ compiled into Sox. Run sox -h to see if you
+ have support for this file type. When this
+ driver is used it allows you to open up a Sun
/dev/audio file and configure it to use the same
- data type as passed in to Sox. It works for
- both playing and recording sound samples. When
- playing sound files it attempts to set up the
+ data type as passed in to Sox. It works for
+ both playing and recording sound samples. When
+ playing sound files it attempts to set up the
audio driver to use the same format as the input
- file. It is suggested to always override the
+ file. It is suggested to always override the
output values to use the highest quality samples
- your hardware can handle. Example: -t sunau -w
+ your hardware can handle. Example: -t sunau -w
-s /dev/audio or -t sunau -U -c 1 /dev/audio for
older sun equipment.
.txw Yamaha TX-16W sampler.
- A file format from a Yamaha sampling keyboard
- which wrote IBM-PC format 3.5" floppies. Han�
+ A file format from a Yamaha sampling keyboard
+ which wrote IBM-PC format 3.5" floppies. Han�
dles reading of files which do not have the sam�
- ple rate field set to one of the expected by
- looking at some other bytes in the attack/loop
- length fields, and defaulting to 33kHz if the
+ ple rate field set to one of the expected by
+ looking at some other bytes in the attack/loop
+ length fields, and defaulting to 33kHz if the
sample rate is still unknown.
.vms More info to come.
- Used to compress speech audio for applications
+ Used to compress speech audio for applications
such as voice mail.
.voc Sound Blaster VOC files.
- VOC files are multi-part and contain silence
- parts, looping, and different sample rates for
- different chunks. On input, the silence parts
- are filled out, loops are rejected, and sample
- data with a new sample rate is rejected.
- Silence with a different sample rate is gener�
- ated appropriately. On output, silence is not
+ VOC files are multi-part and contain silence
+ parts, looping, and different sample rates for
+ different chunks. On input, the silence parts
+ are filled out, loops are rejected, and sample
+ data with a new sample rate is rejected.
+ Silence with a different sample rate is gener�
+ ated appropriately. On output, silence is not
detected, nor are impossible sample rates.
+ vorbis See .ogg format.
+
.wav Microsoft .WAV RIFF files.
- These appear to be very similar to IFF files,
- but not the same. They are the native sound
+ These appear to be very similar to IFF files,
+ but not the same. They are the native sound
file format of Windows. (Obviously, Windows was
- of such incredible importance to the computer
- industry that it just had to have its own sound
+ of such incredible importance to the computer
+ industry that it just had to have its own sound
file format.) Normally .wav files have all for�
- matting information in their headers, and so do
- not need any format options specified for an
- input file. If any are, they will override the
-
-
-
- July 24, 2000 8
-
-
-
-
-
-SoX(1) SoX(1)
-
-
- file header, and you will be warned to this
+ matting information in their headers, and so do
+ not need any format options specified for an
+ input file. If any are, they will override the
+ file header, and you will be warned to this
effect. You had better know what you are doing!
- Output format options will cause a format con�
- version, and the .wav will written appropri�
- ately. Sox currently can read PCM, ULAW, ALAW,
- MS ADPCM, and IMA (or DVI) ADPCM. It can write
+ Output format options will cause a format con�
+ version, and the .wav will written appropri�
+ ately. Sox currently can read PCM, ULAW, ALAW,
+ MS ADPCM, and IMA (or DVI) ADPCM. It can write
all of these formats including (NEW!) the ADPCM
encoding.
.wve Psion 8-bit alaw
- These are 8-bit a-law 8khz sound files used on
+ These are 8-bit a-law 8khz sound files used on
the Psion palmtop portable computer.
.raw Raw files (no header).
- The sample rate, size (byte, word, etc), and
+ The sample rate, size (byte, word, etc), and
encoding (signed, unsigned, etc.) of the sample
- file must be given. The number of channels
+ file must be given. The number of channels
defaults to 1.
.ub, .sb, .uw, .sw, .ul, .al, .sl
- These are several suffices which serve as a
- shorthand for raw files with a given size and
- encoding. Thus, ub, sb, uw, sw, ul and sl cor�
- respond to "unsigned byte", "signed byte",
- "unsigned word", "signed word", "ulaw" (byte),
- "alaw" (byte), and "signed long". The sample
- rate defaults to 8000 hz if not explicitly set,
- and the number of channels (as always) defaults
- to 1. There are lots of Sparc samples floating
- around in u-law format with no header and fixed
- at a sample rate of 8000 hz. (Certain sound
+ These are several suffices which serve as a
+ shorthand for raw files with a given size and
+ encoding. Thus, ub, sb, uw, sw, ul and sl cor�
+ respond to "unsigned byte", "signed byte",
+ "unsigned word", "signed word", "ulaw" (byte),
+ "alaw" (byte), and "signed long". The sample
+ rate defaults to 8000 hz if not explicitly set,
+ and the number of channels (as always) defaults
+ to 1. There are lots of Sparc samples floating
+ around in u-law format with no header and fixed
+ at a sample rate of 8000 hz. (Certain sound
management software cheerfully ignores the head�
- ers.) Similarly, most Mac sound files are in
+ ers.) Similarly, most Mac sound files are in
unsigned byte format with a sample rate of 11025
or 22050 hz.
- .auto This is a ``meta-type'': specifying this type
- for an input file triggers some code that tries
- to guess the real type by looking for magic
- words in the header. If the type can't be
- guessed, the program exits with an error mes�
- sage. The input must be a plain file, not a
+ .auto This is a ``meta-type'': specifying this type
+ for an input file triggers some code that tries
+ to guess the real type by looking for magic
+ words in the header. If the type can't be
+ guessed, the program exits with an error mes�
+ sage. The input must be a plain file, not a
pipe. This type can't be used for output files.
EFFECTS
Multiple effects may be applied to the audio data by spec�
- ifying them one after another at the end of the command
+ ifying them one after another at the end of the command
line.
- avg [ -l | -r ]
- Reduce the number of channels by averaging the
- samples, or duplicate channels to increase the
- number of channels. This effect is automati�
- cally used when the number of input channels
+ avg [ -l | -r | -f | -b | n,n,...,n ]
+ Reduce the number of channels by averaging the
+ samples, or duplicate channels to increase the
+ number of channels. This effect is automati�
+ cally used when the number of input channels
+ differ from the number of output channels. When
+ reducing the number of channels it is possible
+ to manually specify the avg effect and use the
+ -l, -r, -f, or -b options to select only the
+ left, right, front, or back channel(s) for the
+ output instead of averaging the channels. The
+ -f and -b options maintain left/right stereo
+ separation; use the avg effect twice to select a
+ single channel.
+ The avg effect can also be invoked with up to 16
+ double-precision numbers, which specify the pro�
+ portion of each input channel that is to be
+ mixed into each output channel. In two-channel
+ mode, 4 numbers are given: l->l, l->r, r->l, and
+ r->r, respectively. In four-channel mode, the
+ first 4 numbers give the proportions for the
+ left-front output channel, as follows: lf->lf,
+ rf->lf, lb->lf, and rb->rf. The next 4 give the
+ right-front output in the same order, then left-
+ back and right-back.
+ It is also possible to use the 16 numbers to
+ expand or reduce the channel count; just specify
+ 0 for unused channels. Finally, if fewer than 4
+ numbers are given, certain special abbreviations
+ may be invoked; see the source code for details.
- July 24, 2000 9
-
-
-
-
-
-SoX(1) SoX(1)
-
-
- differ from the number of output channels. When
- reducing the number of channels it is possible
- to manually specify the avg effect and use the
- -l and -r options to select only the left or
- right channel for the output instead of averag�
- ing the two channels.
-
band [ -n ] center [ width ]
- Apply a band-pass filter. The frequency
+ Apply a band-pass filter. The frequency
response drops logarithmically around the center
- frequency. The width gives the slope of the
- drop. The frequencies at center + width and
- center - width will be half of their original
+ frequency. The width gives the slope of the
+ drop. The frequencies at center + width and
+ center - width will be half of their original
amplitudes. Band defaults to a mode oriented to
pitched signals, i.e. voice, singing, or instru�
- mental music. The -n (for noise) option uses
- the alternate mode for un-pitched signals.
- Warning: -n introduces a power-gain of about
- 11dB in the filter, so beware of output clip�
+ mental music. The -n (for noise) option uses
+ the alternate mode for un-pitched signals.
+ Warning: -n introduces a power-gain of about
+ 11dB in the filter, so beware of output clip�
ping. Band introduces noise in the shape of the
filter, i.e. peaking at the center frequency and
- settling around it. See filter for a bandpass
+ settling around it. See filter for a bandpass
effect with steeper shoulders.
bandpass frequency bandwidth
- Butterworth bandpass filter. Description coming
+ Butterworth bandpass filter. Description coming
soon!
bandreject frequency bandwidth
@@ -633,10 +564,10 @@
chorus gain-in gain-out delay decay speed depth
-s | -t [ delay decay speed depth -s | -t ... ]
- Add a chorus to a sound sample. Each quadtuple
- delay/decay/speed/depth gives the delay in mil�
- liseconds and the decay (relative to gain-in)
- with a modulation speed in Hz using depth in
+ Add a chorus to a sound sample. Each quadtuple
+ delay/decay/speed/depth gives the delay in mil�
+ liseconds and the decay (relative to gain-in)
+ with a modulation speed in Hz using depth in
milliseconds. The modulation is either sinodial
(-s) or triangular (-t). Gain-out is the volume
of the output.
@@ -645,51 +576,50 @@
in-dB1,out-dB1[,in-dB2,out-dB2...]
- [gain] [initial-volume]
- Compand (compress or expand) the dynamic range
- of a sample. The attack and decay time specify
- the integration time over which the absolute
- value of the input signal is integrated to
- determine its volume. Where more than one pair
- of attack/decay parameters are specified, each
+ [gain [initial-volume [delay ] ] ]
+ Compand (compress or expand) the dynamic range
+ of a sample. The attack and decay time specify
+ the integration time over which the absolute
+ value of the input signal is integrated to
+ determine its volume; attacks refer to increases
+ in volume and decays refer to decreases. Where
+ more than one pair of attack/decay parameters
+ are specified, each channel is treated sepa�
+ rately and the number of pairs must agree with
+ the number of input channels. The second param�
+ eter is a list of points on the compander's
+ transfer function specified in dB relative to
+ the maximum possible signal amplitude. The
+ input values must be in a strictly increasing
+ order but the transfer function does not have to
+ be monotonically rising. The special value -inf
+ may be used to indicate that the input volume
+ should be associated output volume. The points
+ -inf,-inf and 0,0 are assumed; the latter may be
+ overridden, but the former may not.
+ The third (optional) parameter is a postprocess�
+ ing gain in dB which is applied after the com�
+ pression has taken place; the fourth (optional)
+ parameter is an initial volume to be assumed for
+ each channel when the effect starts. This per�
+ mits the user to supply a nominal level ini�
+ tially, so that, for example, a very large gain
+ is not applied to initial signal levels before
+ the companding action has begun to operate: it
+ is quite probable that in such an event, the
+ output would be severely clipped while the com�
+ pander gain properly adjusts itself.
+ The fifth (optional) parameter is a delay in
+ seconds. The input signal is analyzed immedi�
+ ately to control the compander, but it is
+ delayed before being fed to the volume adjuster.
+ Specifying a delay approximately equal to the
+ attack/decay times allows the compander to
+ effectively operate in a "predictive" rather
+ than a reactive mode.
- July 24, 2000 10
-
-
-
-
-
-SoX(1) SoX(1)
-
-
- channel is treated separately and the number of
- pairs must agree with the number of input chan�
- nels. The second parameter is a list of points
- on the compander's transfer function specified
- in dB relative to the maximum possible signal
- amplitude. The input values must be in a
- strictly increasing order but the transfer func�
- tion does not have to be monotonically rising.
- The special value -inf may be used to indicate
- that the input volume should be associated out�
- put volume. The points -inf,-inf and 0,0 are
- assumed; the latter may be overridden, but the
- former may not. The third (optional) parameter
- is a postprocessing gain in dB which is applied
- after the compression has taken place; the
- fourth (optional) parameter is an initial volume
- to be assumed for each channel when the effect
- starts. This permits the user to supply a nomi�
- nal level initially, so that, for example, a
- very large gain is not applied to initial signal
- levels before the companding action has begun to
- operate: it is quite probable that in such an
- event, the output would be severely clipped
- while the compander gain properly adjusts
- itself.
-
copy Copy the input file to the output file. This is
the default effect if both files have the same
sampling rate.
@@ -718,18 +648,6 @@
decay (relative to gain-in) of that echo. Gain-
out is the volume of the output.
-
-
-
- July 24, 2000 11
-
-
-
-
-
-SoX(1) SoX(1)
-
-
echos gain-in gain-out delay decay [ delay decay ... ]
Add a sequence of echos to a sound sample. Each
delay/decay part gives the delay in milliseconds
@@ -747,65 +665,54 @@
volume over fade-in-length seconds. Specify 0
seconds if no fade-in is wanted.
- For fade-outs, the audio data will be trucated
+ For fade-outs, the audio data will be truncated
at the stop-time and the volume will be ramped
from full volume down to 0 starting at fade-out-
length seconds before the stop-time. No fade-
out is performed if these options are not speci�
- fied.
+ fied. All times can be specified in seconds,
+ mm:ss.frac, or hh:mm:ss.frac format.
- An optional type can be specified to change the
- type of envelope. Choices are q for quarter of
- a sinewave, h for half a sinewave, t for linear
- slope, l for logarithmic, and p for inverted
+ An optional type can be specified to change the
+ type of envelope. Choices are q for quarter of
+ a sinewave, h for half a sinewave, t for linear
+ slope, l for logarithmic, and p for inverted
parabola. The default is a linear slope.
filter [ low ]-[ high ] [ window-len [ beta ] ]
Apply a Sinc-windowed lowpass, highpass, or
- bandpass filter of given window length to the
- signal. low refers to the frequency of the
- lower 6dB corner of the filter. high refers to
- the frequency of the upper 6dB corner of the
+ bandpass filter of given window length to the
+ signal. low refers to the frequency of the
+ lower 6dB corner of the filter. high refers to
+ the frequency of the upper 6dB corner of the
filter.
- A lowpass filter is obtained by leaving low
- unspecified, or 0. A highpass filter is
- obtained by leaving high unspecified, or 0, or
- greater than or equal to the Nyquist frequency.
+ A lowpass filter is obtained by leaving low
+ unspecified, or 0. A highpass filter is
+ obtained by leaving high unspecified, or 0, or
+ greater than or equal to the Nyquist frequency.
The window-len, if unspecified, defaults to 128.
- Longer windows give a sharper cutoff, smaller
+ Longer windows give a sharper cutoff, smaller
windows a more gradual cutoff.
- The beta, if unspecified, defaults to 16. This
- selects a Kaiser window. You can select a Nut�
- tall window by specifying anything <= 2.0 here.
- For more discussion of beta, look under the
+ The beta, if unspecified, defaults to 16. This
+ selects a Kaiser window. You can select a Nut�
+ tall window by specifying anything <= 2.0 here.
+ For more discussion of beta, look under the
resample effect.
-
-
-
- July 24, 2000 12
-
-
-
-
-
-SoX(1) SoX(1)
-
-
flanger gain-in gain-out delay decay speed < -s | -t >
- Add a flanger to a sound sample. Each triple
- delay/decay/speed gives the delay in millisec�
- onds and the decay (relative to gain-in) with a
+ Add a flanger to a sound sample. Each triple
+ delay/decay/speed gives the delay in millisec�
+ onds and the decay (relative to gain-in) with a
modulation speed in Hz. The modulation is
- either sinodial (-s) or triangular (-t). Gain-
+ either sinodial (-s) or triangular (-t). Gain-
out is the volume of the output.
highp frequency
- Apply a single pole recursive high-pass filter.
+ Apply a single pole recursive high-pass filter.
The frequency response drops logarithmically
with I frequency in the middle of the drop. The
slope of the filter is quite gentle. See filter
@@ -812,87 +719,75 @@
for a highpass effect with sharper cutoff.
highpass frequency
- Butterworth highpass filter. Description com�
+ Butterworth highpass filter. Description com�
ming soon!
lowp frequency
- Apply a single pole recursive low-pass filter.
+ Apply a single pole recursive low-pass filter.
The frequency response drops logarithmically
- with frequency in the middle of the drop. The
+ with frequency in the middle of the drop. The
slope of the filter is quite gentle. See filter
for a lowpass effect with sharper cutoff.
lowpass frequency
- Butterworth lowpass filter. Description coming
+ Butterworth lowpass filter. Description coming
soon!
map Display a list of loops in a sample, and miscel�
laneous loop info.
- mask Add "masking noise" to signal. This effect
- deliberately adds white noise to a sound in
- order to mask quantization effects, created by
- the process of playing a sound digitally. It
- tends to mask buzzing voices, for example. It
- adds 1/2 bit of noise to the sound file at the
+ mask Add "masking noise" to signal. This effect
+ deliberately adds white noise to a sound in
+ order to mask quantization effects, created by
+ the process of playing a sound digitally. It
+ tends to mask buzzing voices, for example. It
+ adds 1/2 bit of noise to the sound file at the
output bit depth.
pan direction
- Pan the sound of an audio file from one channel
+ Pan the sound of an audio file from one channel
to another. This is done by changing the volume
- of the input channels so that it fades out on
- one channel and fades-in on another. If the
- number of input channels is different then the
+ of the input channels so that it fades out on
+ one channel and fades-in on another. If the
+ number of input channels is different then the
number of output channels then this effect tries
- to intelligently handle this. For instance, if
+ to intelligently handle this. For instance, if
the input contains 1 channel and the output con�
- tains 2 channels, then it will create the miss�
- ing channel itself. The direction is a value
- from -1.0 to 1.0. -1.0 represents far left and
- 1.0 represents far right. Numbers in between
-
-
-
- July 24, 2000 13
-
-
-
-
-
-SoX(1) SoX(1)
-
-
+ tains 2 channels, then it will create the miss�
+ ing channel itself. The direction is a value
+ from -1.0 to 1.0. -1.0 represents far left and
+ 1.0 represents far right. Numbers in between
will start the pan effect without totally muting
the opposite channel.
phaser gain-in gain-out delay decay speed < -s | -t >
- Add a phaser to a sound sample. Each triple
- delay/decay/speed gives the delay in millisec�
- onds and the decay (relative to gain-in) with a
+ Add a phaser to a sound sample. Each triple
+ delay/decay/speed gives the delay in millisec�
+ onds and the decay (relative to gain-in) with a
modulation speed in Hz. The modulation is
- either sinodial (-s) or triangular (-t). The
+ either sinodial (-s) or triangular (-t). The
decay should be less than 0.5 to avoid feedback.
Gain-out is the volume of the output.
pick [ -1 | -2 | -3 | -4 | -l | -r ]
- Select the left or right channel of a stereo
- sample, or one of four channels in a quadro�
- phonic sample. The -l and -r options represent
- either the left or right channel. It is
- required that you use the -c 1 command line
+ Select the left or right channel of a stereo
+ sample, or one of four channels in a quadro�
+ phonic sample. The -l and -r options represent
+ either the left or right channel. It is
+ required that you use the -c 1 command line
option in order to force the output file to con�
tain only 1 channel.
pitch shift [ width interpole fade ]
- Change the pitch of file without affecting its
+ Change the pitch of file without affecting its
duration by cross-fading shifted samples. shift
is given in cents. Use a positive value to shift
- to treble, negative value to shift to bass.
- Default shift is 0. width of window is in ms.
- Default width is 20ms. Try 30ms to lower pitch,
- and 10ms to raise pitch. interpole option, can
+ to treble, negative value to shift to bass.
+ Default shift is 0. width of window is in ms.
+ Default width is 20ms. Try 30ms to lower pitch,
+ and 10ms to raise pitch. interpole option, can
be "cubic" or "linear". Default is "cubic". The
- fade option, can be "cos", "hamming", "linear"
+ fade option, can be "cos", "hamming", "linear"
or "trapezoid". Default is "cos".
polyphase [ -w < nut / ham > ]
@@ -901,59 +796,47 @@
[ -cutoff # ]
Translate input sampling rate to output sampling
- rate via polyphase interpolation, a DSP algo�
- rithm. This method is slow and uses lots of
+ rate via polyphase interpolation, a DSP algo�
+ rithm. This method is slow and uses lots of
RAM, but gives much better results than rate.
- -w < nut / ham > : select either a Nuttal (~90
- dB stopband) or Hamming (~43 dB stopband) win�
+ -w < nut / ham > : select either a Nuttal (~90
+ dB stopband) or Hamming (~43 dB stopband) win�
dow. Default is nut.
- -width long / short / # : specify the (approxi�
- mate) width of the filter. long is 1024 sam�
- ples; short is 128 samples. Alternatively, an
+ -width long / short / # : specify the (approxi�
+ mate) width of the filter. long is 1024 sam�
+ ples; short is 128 samples. Alternatively, an
exact number can be used. Default is long. The
- short option is not recommended, as it produces
+ short option is not recommended, as it produces
poor quality results.
-
-
-
- July 24, 2000 14
-
-
-
-
-
-SoX(1) SoX(1)
-
-
- -cutoff # : specify the filter cutoff frequency
- in terms of fraction of frequency bandwidth,
- also know as the Nyquist frequency. Please see
- the resample effect for further information on
- Nyquist frequency. If upsampling, then this is
- the fraction of the original signal that should
- go through. If downsampling, this is the frac�
- tion of the signal left after downsampling.
+ -cutoff # : specify the filter cutoff frequency
+ in terms of fraction of frequency bandwidth,
+ also know as the Nyquist frequency. Please see
+ the resample effect for further information on
+ Nyquist frequency. If upsampling, then this is
+ the fraction of the original signal that should
+ go through. If downsampling, this is the frac�
+ tion of the signal left after downsampling.
Default is 0.95. Remember that this is a float.
rate Translate input sampling rate to output sampling
- rate via linear interpolation to the Least Com�
+ rate via linear interpolation to the Least Com�
mon Multiple of the two sampling rates. This is
the default effect if the two files have differ�
- ent sampling rates and the preview options was
+ ent sampling rates and the preview options was
specified. This is fast but noisy: the spectrum
- of the original sound will be shifted upwards
- and duplicated faintly when up-translating by a
+ of the original sound will be shifted upwards
+ and duplicated faintly when up-translating by a
multiple.
- Lerp-ing is acceptable for cheap 8-bit sound
- hardware, but for CD-quality sound you should
- instead use either resample or polyphase. If
+ Lerp-ing is acceptable for cheap 8-bit sound
+ hardware, but for CD-quality sound you should
+ instead use either resample or polyphase. If
you are wondering which rate changing effects to
- use, you will want to read a detailed analysis
+ use, you will want to read a detailed analysis
of all of them at http://eakaw2.et.tu-dres�
den.de/~wilde/resample/resample.html
@@ -960,40 +843,28 @@
resample [ -qs | -q | -ql ] [ rolloff [ beta ] ]
Translate input sampling rate to output sampling
rate via simulated analog filtration. This
- method is slower than rate, but gives much bet�
+ method is slower than rate, but gives much bet�
ter results.
By default, linear interpolation is used, with a
- window width about 45 samples at the lower of
- the two rate. This gives an accuracy of about
- 16 bits, but insufficient stopband rejection in
- the case that you want to have rolloff greater
+ window width about 45 samples at the lower of
+ the two rate. This gives an accuracy of about
+ 16 bits, but insufficient stopband rejection in
+ the case that you want to have rolloff greater
than about 0.80 of the Nyquist frequency.
- The -q* options will change the default values
- for rolloff and beta as well as use quadratic
- interpolation of filter coefficients, resulting
+ The -q* options will change the default values
+ for rolloff and beta as well as use quadratic
+ interpolation of filter coefficients, resulting
in about 24 bits precision. The -qs, -q, or -ql
- options specify increased accuracy at the cost
- of lower execution speed. It is optional to
- specify rolloff and beta parameters when using
+ options specify increased accuracy at the cost
+ of lower execution speed. It is optional to
+ specify rolloff and beta parameters when using
the -q* options.
- Following is a table of the reasonable defaults
+ Following is a table of the reasonable defaults
which are built-in to sox:
-
-
-
- July 24, 2000 15
-
-
-
-
-
-SoX(1) SoX(1)
-
-
Option Window rolloff beta interpolation
------ ------ ------- ---- -------------
(none) 45 0.80 16 linear
@@ -1003,90 +874,78 @@
------ ------ ------- ---- -------------
-qs, -q, or -ql use window lengths of 45, 75, or
- 149 samples, respectively, at the lower sample-
+ 149 samples, respectively, at the lower sample-
rate of the two files. This means progressively
- sharper stop-band rejection, at proportionally
+ sharper stop-band rejection, at proportionally
slower execution times.
- rolloff refers to the cut-off frequency of the
- low pass filter and is given in terms of the
- Nyquist frequency for the lower sample rate.
- rolloff therefore should be something between
+ rolloff refers to the cut-off frequency of the
+ low pass filter and is given in terms of the
+ Nyquist frequency for the lower sample rate.
+ rolloff therefore should be something between
0.0 and 1.0, in practice 0.8-0.95. The defaults
are indicated above.
The Nyquist frequency is equal to (sample rate /
- 2). Logically, this is because the A/D con�
- verter needs at least 2 samples to detect 1
- cycle at the Nyquist frequency. Frequencies
- higher then the Nyquist will actually appear as
- lower frequencies to the A/D converter and is
+ 2). Logically, this is because the A/D con�
+ verter needs at least 2 samples to detect 1
+ cycle at the Nyquist frequency. Frequencies
+ higher then the Nyquist will actually appear as
+ lower frequencies to the A/D converter and is
called aliasing. Normally, A/D converts run the
- signal through a highpass filter first to avoid
+ signal through a highpass filter first to avoid
these problems.
- Similar problems will happen in software when
- reducing the sample rate of an audio file (fre�
- quencies above the new Nyquist frequency can be
- aliased to lower frequencies). Therefore, a
- good resample effect will remove all frequency
+ Similar problems will happen in software when
+ reducing the sample rate of an audio file (fre�
+ quencies above the new Nyquist frequency can be
+ aliased to lower frequencies). Therefore, a
+ good resample effect will remove all frequency
information above the new Nyquist frequency.
- The rolloff refers to how close to the Nyquist
+ The rolloff refers to how close to the Nyquist
frequency this cutoff is, with closer being bet�
- ter. When increasing the sample rate of an
+ ter. When increasing the sample rate of an
audio file you would not expect to have any fre�
- quencies exist that are past the original
- Nyquist frequency. Because of resampling prop�
- erties, it is common to have alaising data cre�
- ated that is above the old Nyquist frequency.
- In that case the rolloff refers to how close to
+ quencies exist that are past the original
+ Nyquist frequency. Because of resampling prop�
+ erties, it is common to have alaising data cre�
+ ated that is above the old Nyquist frequency.
+ In that case the rolloff refers to how close to
the original Nyquist frequency to use a highpass
- filter to remove this false data, with closer
+ filter to remove this false data, with closer
also being better.
The beta parameter determines the type of filter
- window used. Any value greater than 2.0 is the
+ window used. Any value greater than 2.0 is the
beta for a Kaiser window. Beta <= 2.0 selects a
-
-
-
- July 24, 2000 16
-
-
-
-
-
-SoX(1) SoX(1)
-
-
- Nuttall window. If unspecified, the default is
+ Nuttall window. If unspecified, the default is
a Kaiser window with beta 16.
In the case of Kaiser window (beta > 2.0), lower
- betas produce a somewhat faster transition from
- passband to stopband, at the cost of noticeable
- artifacts. A beta of 16 is the default, beta
- less than 10 is not recommended. If you want a
- sharper cutoff, don't use low beta's, use a
+ betas produce a somewhat faster transition from
+ passband to stopband, at the cost of noticeable
+ artifacts. A beta of 16 is the default, beta
+ less than 10 is not recommended. If you want a
+ sharper cutoff, don't use low beta's, use a
longer sample window. A Nuttall window is
- selected by specifying any 'beta' <= 2, and the
- Nuttall window has somewhat steeper cutoff than
- the default Kaiser window. You will probably
- not need to use the beta parameter at all,
- unless you are just curious about comparing the
+ selected by specifying any 'beta' <= 2, and the
+ Nuttall window has somewhat steeper cutoff than
+ the default Kaiser window. You will probably
+ not need to use the beta parameter at all,
+ unless you are just curious about comparing the
effects of Nuttall vs. Kaiser windows.
This is the default effect if the two files have
- different sampling rates. Default parameters
+ different sampling rates. Default parameters
are, as indicated above, Kaiser window of length
45, rolloff 0.80, beta 16, linear interpolation.
- NOTE: -qs is only slightly slower, but more
+ NOTE: -qs is only slightly slower, but more
accurate for 16-bit or higher precision.
- NOTE: In many cases of up-sampling, no interpo�
- lation is needed, as exact filter coefficients
+ NOTE: In many cases of up-sampling, no interpo�
+ lation is needed, as exact filter coefficients
can be computed in a reasonable amount of space.
To be precise, this is done when
@@ -1096,125 +955,105 @@
reverb gain-out delay [ delay ... ]
Add reverberation to a sound sample. Each delay
- is given in milliseconds and its feedback is
- depending on the reverb-time in milliseconds.
- Each delay should be in the range of half to
+ is given in milliseconds and its feedback is
+ depending on the reverb-time in milliseconds.
+ Each delay should be in the range of half to
quarter of reverb-time to get a realistic rever�
beration. Gain-out is the volume of the output.
- reverse Reverse the sound sample completely. Included
+ reverse Reverse the sound sample completely. Included
for finding Satanic subliminals.
- speed factor
- Speed up or down the sound, as a magnetic tape
+ speed [ -c ] factor
+ Speed up or down the sound, as a magnetic tape
with a speed control. It affects both pitch and
- time. A factor of 1.0 means no change, and is
+ time. A factor of 1.0 means no change, and is
the default. 2.0 doubles speed, thus time
- length is cut by a half and pitch is one octave
- higher. 0.5 halves speed thus time length dou�
- bles and pitch is one octave lower.
+ length is cut by a half and pitch is one octave
+ higher. 0.5 halves speed thus time length dou�
+ bles and pitch is one octave lower. If the
+ optional -c parameter is used then the factor is
+ specified in "cents".
-
-
-
- July 24, 2000 17
-
-
-
-
-
-SoX(1) SoX(1)
-
-
split Turn a mono sample into a stereo sample by copy�
- ing the input channel to the left and right
+ ing the input channel to the left and right
channels.
stat [ -s n ] [-rms ] [ -v ] [ -d ]
- Do a statistical check on the input file, and
+ Do a statistical check on the input file, and
print results on the standard error file. Audio
- data is passed unmodified from input to output
+ data is passed unmodified from input to output
file unless used along with the -e option.
The "Volume Adjustment:" field in the statistics
- gives you the argument to the -v number which
+ gives you the argument to the -v number which
will make the sample as loud as possible without
clipping.
The option -v will print out the "Volume Adjust�
- ment:" field's value only and return. This
- could be of use in scripts to auto convert the
+ ment:" field's value only and return. This
+ could be of use in scripts to auto convert the
volume.
- The -s n option is used to scale the input data
- by a given factor. The default value of n is
- the max value of a signed long variable
+ The -s n option is used to scale the input data
+ by a given factor. The default value of n is
+ the max value of a signed long variable
(0x7fffffff). Internal effects always work with
- signed long PCM data and so the value should
+ signed long PCM data and so the value should
relate to this fact.
- The -rms option will convert all output average
+ The -rms option will convert all output average
values to root mean square format.
There is also an optional parameter -d that will
- print out a hex dump of the sound file from the
- internal buffer that is in 32-bit signed PCM
- data. This is mainly only of use in tracking
- down endian problems that creep in to sox on
+ print out a hex dump of the sound file from the
+ internal buffer that is in 32-bit signed PCM
+ data. This is mainly only of use in tracking
+ down endian problems that creep in to sox on
cross-platform versions.
stretch factor [window fade shift fading]
- Time stretch file by a given factor. Change
+ Time stretch file by a given factor. Change
duration without affecting the pitch. factor of
- stretching: >1.0 lengthen, <1.0 shorten dura�
- tion. window size is in ms. Default is 20ms.
- The fade option, can be "lin". shift ratio, in
- [0.0 1.0]. Default depends on stretch factor.
- 1.0 to shorten, 0.8 to lengthen. The fading
- ratio, in [0.0 0.5]. The amount of a fade's
+ stretching: >1.0 lengthen, <1.0 shorten dura�
+ tion. window size is in ms. Default is 20ms.
+ The fade option, can be "lin". shift ratio, in
+ [0.0 1.0]. Default depends on stretch factor.
+ 1.0 to shorten, 0.8 to lengthen. The fading
+ ratio, in [0.0 0.5]. The amount of a fade's
default depends on factor and shift.
swap [ 1 2 | 1 2 3 4 ]
Swap channels in multi-channel sound files.
- Optionally, you may specify the channel order
- you would like the output in. This defaults to
+ Optionally, you may specify the channel order
+ you would like the output in. This defaults to
output channel 2 and then 1 for stereo and 2, 1,
-
-
-
- July 24, 2000 18
-
-
-
-
-
-SoX(1) SoX(1)
-
-
- 4, 3 for quad-channels. An interesting feature
- is that you may duplicate a given channel by
- overwriting another. This is done by repeating
- an output channel on the command line. For
- example, swap 2 2 will overwrite channel 1 with
- channel 2's data; creating a stereo file with
+ 4, 3 for quad-channels. An interesting feature
+ is that you may duplicate a given channel by
+ overwriting another. This is done by repeating
+ an output channel on the command line. For
+ example, swap 2 2 will overwrite channel 1 with
+ channel 2's data; creating a stereo file with
both channels containing the same audio data.
trim start [ length ]
- Trim can trim off unwanted audio data from the
+ Trim can trim off unwanted audio data from the
beginning and end of the audio file. Audio sam�
ples are not sent to the output stream until the
- start location is reached. start is a floating
+ start location is reached. start is a floating
point number that tells the number of seconds to
- wait before starting. If you know the sample
- number you would like to start at then the sec�
- onds can be obtained by multiply (sample # *
+ wait before starting. If you know the sample
+ number you would like to start at then the sec�
+ onds can be obtained by multiplying (sample # *
sample rate).
- The optional length parameter tells the number
- of samples to output after the start sample and
- is used to trim off the back side of the audio
- data. Using a value of 0 for the start parame�
- ter will allow trimming off the back side only.
+ The optional length parameter tells the number
+ of samples to output after the start sample and
+ is used to trim off the back side of the audio
+ data. Using a value of 0 for the start parame�
+ ter will allow trimming off the back side only.
+ Both start and length can also be specified in
+ mm:ss.frac or hh:mm:ss.frac format.
vibro speed [ depth ]
Add the world-famous Fender Vibro-Champ sound
@@ -1246,18 +1085,6 @@
rithmically. 0.0 is constant while +6 doubles
the amplitude.
An optional limitergain value can be specified
-
-
-
- July 24, 2000 19
-
-
-
-
-
-SoX(1) SoX(1)
-
-
and should be a value much less then 1.0 (ie
0.05 or 0.02) and is used only on peaks to pre�
vent clipping. Not specifying this parameter
@@ -1285,36 +1112,4 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- July 24, 2000 20
-
-
+ July 24, 2000 SoX(1)
--- a/soxexam.txt
+++ b/soxexam.txt
@@ -1,9 +1,7 @@
-
-
-
SoX(1) SoX(1)
+
NAME
soxexam - SoX Examples (CHEAT SHEET)
@@ -32,7 +30,7 @@
When working with headerless files (raw files), you may
take advantage of they pseudo-file types of .ub, .uw, .sb,
.sw, .ul, and .sl. By using these extensions on your
- filenames you will not have to specify the corrisponding
+ filenames you will not have to specify the corresponding
options on the command line.
Precision
@@ -58,18 +56,6 @@
GSM 16-bit
unsigned long 32-bit
signed long 32-bit
-
-
-
- December 10, 1999 1
-
-
-
-
-
-SoX(1) SoX(1)
-
-
___________ _________
Examples
@@ -124,18 +110,6 @@
8000 Hz ADPCM input file and then end up with the final
file as 44100 Hz ADPCM.
-
-
-
- December 10, 1999 2
-
-
-
-
-
-SoX(1) SoX(1)
-
-
sox firstfile.wav -r 44100 -s -w secondfile.wav
sox secondfile.wav thirdfile.wav swap
sox thirdfile.wav -a -b finalfile.wav mask
@@ -166,8 +140,8 @@
vocals or guitars.)
Single effects will be explained and some given parameter
- settings that can be used to understand the theorie by
- listening to the sound file with the added effect.
+ settings that can be used to understand the theory by lis�
+ tening to the sound file with the added effect.
Using multiple effects in parallel or in sequel can result
either in very perfect sound or ( mostly ) in a dramatic
@@ -176,8 +150,8 @@
the first time using effects try to compose them as less
as possible. We don't regard the composition of effects in
the examples because to many combinations are possible and
- you really need a very fast maschine and a lot of memory
- to play them in real-time.
+ you really need a very fast machine and a lot of memory to
+ play them in real-time.
And real-time playing of sounds will speed up learning the
parameter setting.
@@ -184,24 +158,12 @@
Basically, we will use the "play" front-end of SOX since
it is easier to listen sounds coming out of the speaker or
- earphone instead of looking at cryptical data in sound
+ earphone instead of looking at cryptic data in sound
files.
For easy listening of file.xxx ( "xxx" is any sound format
):
-
-
-
- December 10, 1999 3
-
-
-
-
-
-SoX(1) SoX(1)
-
-
play file.xxx effect-name effect-parameters
Or more SOX-like ( for "dsp" output ):
@@ -224,14 +186,14 @@
Notes:
I played all examples in real-time on a Pentium 100 with
- 32 Mb and Linux 2.0.30 using a self-recorded sample ( 3:15
+ 32 MB and Linux 2.0.30 using a self-recorded sample ( 3:15
min long in "wav" format with 44.1 kHz sample rate and
stereo 16 bit ). The sample should not contain any of the
effects. However, if you take any recording of a sound
track from radio or tape or cd, and it sounds like a live
concert or ten people are playing the same rhythm with
- their drums or funky-groves, then take any other sample.
- (Typically, less then four different intruments and no
+ their drums or funky-grooves, then take any other sample.
+ (Typically, less then four different instruments and no
synthesizer in the sample is suitable. Likewise, the com�
bination vocal, drums, bass and guitar.)
@@ -240,10 +202,10 @@
Echo
An echo effect can be naturally found in the mountains,
- standing somewhere on a moutain and shouting a single word
- will result in one or more repetitions of the word ( if
- not, turn a bit around ant try next, or climb to the next
- mountain ).
+ standing somewhere on a mountain and shouting a single
+ word will result in one or more repetitions of the word (
+ if not, turn a bit around ant try next, or climb to the
+ next mountain ).
However, the time difference between shouting and repeat�
ing is the delay (time), its loudness is the decay. Multi�
@@ -256,24 +218,12 @@
ple shortly after the original one.
This will sound as doubling the number of instruments
-
-
-
- December 10, 1999 4
-
-
-
-
-
-SoX(1) SoX(1)
-
-
playing the same sample:
play file.xxx echo 0.8 0.88 60.0 0.4
If the delay is very short then it sound like a (metallic)
- roboter playing music:
+ robot playing music:
play file.xxx echo 0.8 0.88 6.0 0.4
@@ -314,8 +264,8 @@
can be applied to other instrument samples too.
It works like the echo effect with a short delay, but the
- delay isn't constant. The delay is varied using a sin�
- odial or triangular modulation. The modulation depth
+ delay isn't constant. The delay is varied using a sinu�
+ soidal or triangular modulation. The modulation depth
defines the range the modulated delay is played before or
after the delay. Hence the delayed sound will sound slower
or faster, that is the delayed sound tuned around the
@@ -322,20 +272,8 @@
original one, like in a chorus where some vocal are a bit
out of tune.
-
-
-
- December 10, 1999 5
-
-
-
-
-
-SoX(1) SoX(1)
-
-
The typical delay is around 40ms to 60ms, the speed of the
- modualtion is best near 0.25Hz and the modulation depth
+ modulation is best near 0.25Hz and the modulation depth
around 2ms.
A single delay will make the sample more overloaded:
@@ -371,7 +309,7 @@
play file.xxx flanger 0.6 0.87 3.0 0.9 0.5 -s
- listen carefully between the difference of sinodial and
+ listen carefully between the difference of sinusoidal and
triangular modulation:
play file.xxx flanger 0.6 0.87 3.0 0.9 0.5 -t
@@ -381,7 +319,7 @@
play file.xxx flanger 0.8 0.88 3.0 0.4 0.5 -t
- The drunken loundspeaker system:
+ The drunken loudspeaker system:
play file.xxx flanger 0.9 0.9 4.0 0.23 1.3 -s
@@ -388,29 +326,17 @@
Reverb
The reverb effect is often used in audience hall which are
-
-
-
- December 10, 1999 6
-
-
-
-
-
-SoX(1) SoX(1)
-
-
to small or to many visitors disturb the reflection of
- sound at the walls to make the sound played more monumen�
- tal. You can try the reverb effect in your bathroom or
- garage or sport halls by shouting loud some words. You'll
- hear the words reflected from the walls.
+ sound at the walls to make the sound played more
+ monumental. You can try the reverb effect in your bathroom
+ or garage or sport halls by shouting loud some words.
+ You'll hear the words reflected from the walls.
The biggest problem in using the reverb effect is the cor�
rect setting of the (wall) delays such that the sound is
- relistic an doesn't sound like music playing in a tin or
- overloaded feedback distroys any illusion of any big hall.
- To help you for much realisitc reverb effects, you should
+ realistic an doesn't sound like music playing in a tin or
+ overloaded feedback destroys any illusion of any big hall.
+ To help you for much realistic reverb effects, you should
decide first, how long the reverb should take place until
it is not loud enough to be registered by your ears. This
is be done by the reverb time "t", in small halls 200ms in
@@ -417,13 +343,13 @@
bigger one 1000ms, if you like. Clearly, the walls of such
a hall aren't far away, so you should define its setting
be given every wall its delay time. However, if the wall
- is to far eway for the reverb time, you won't hear the
+ is to far away for the reverb time, you won't hear the
reverb, so the nearest wall will be best "t/4" delay and
- the farest "t/2". You can try other distances as well,
+ the farthest "t/2". You can try other distances as well,
but it won't sound very realistic. The walls shouldn't
- stand to close to each other and not in a multiple
- interger distance to each other ( so avoid wall like:
- 200.0 and 202.0, or something like 100.0 and 200.0 ).
+ stand to close to each other and not in a multiple integer
+ distance to each other ( so avoid wall like: 200.0 and
+ 202.0, or something like 100.0 and 200.0 ).
Since audience halls do have a lot of walls, we will start
designing one beginning with one wall:
@@ -445,9 +371,9 @@
240.0 280.0 300.0
If you run out of machine power or memory, then stop as
- much applications as possible ( every interupt will con�
- sume a lot of cpu time which for bigger halls is abso�
- lutely neccessary ).
+ much applications as possible ( every interrupt will con�
+ sume a lot of CPU time which for bigger halls is abso�
+ lutely necessary ).
Phaser
@@ -454,25 +380,13 @@
The phaser effect is like the flanger effect, but it uses
a reverb instead of an echo and does phase shifting.
You'll hear the difference in the examples comparing both
-
-
-
- December 10, 1999 7
-
-
-
-
-
-SoX(1) SoX(1)
-
-
effects ( simply change the effect name ). The delay mod�
- ulation can be done sinodial or triangular, preferable is
- the later one for multiple instruments playing. For single
- instrument sounds the sinodial phaser effect will give a
- sharper phasing effect. The decay shouln't be to close to
- 1.0 which will cause dramatic feedback. A good range is
- about 0.5 to 0.1 for the decay.
+ ulation can be done sinusoidal or triangular, preferable
+ is the later one for multiple instruments playing. For
+ single instrument sounds the sinusoidal phaser effect will
+ give a sharper phasing effect. The decay shouldn't be to
+ close to 1.0 which will cause dramatic feedback. A good
+ range is about 0.5 to 0.1 for the decay.
We will take a parameter setting as for the flanger before
( gain-out is lower since feedback can raise the output
@@ -480,7 +394,7 @@
play file.xxx phaser 0.8 0.74 3.0 0.4 0.5 -t
- The drunken loundspeaker system ( now less alkohol ):
+ The drunken loudspeaker system ( now less alcohol ):
play file.xxx phaser 0.9 0.85 4.0 0.23 1.3 -s
@@ -492,10 +406,44 @@
play file.xxx phaser 0.6 0.66 3.0 0.6 2.0 -t
+ Compander
+
+ The compander effect allows the dynamic range of a signal
+ to be compressed or expanded. For most situations, the
+ attack time (response to the music getting louder) should
+ be shorter than the decay time because our ears are more
+ sensitive to suddenly loud music than to suddenly soft
+ music.
+
+ For example, suppose you are listening to Strauss' "Also
+ Sprach Zarathustra" in a noisy environment such as a car.
+ If you turn up the volume enough to hear the soft passages
+ over the road noise, the loud sections will be too loud.
+ You could try this:
+
+ play file.xxx compand 0.3,1
+ -90,-90,-70,-70,-60,-20,0,0 -5 0 0.2
+
+ The transfer function ("-90,...") says that very soft
+ sounds between -90 and -70 decibels (-90 is about the
+ limit of 16-bit encoding) will remain unchanged. That
+ keeps the compander from boosting the volume on "silent"
+ passages such as between movements. However, sounds in
+ the range -60 decibels to 0 decibels (maximum volume) will
+ be boosted so that the 60-dB dynamic range of the original
+ music will be compressed 3-to-1 into a 20-dB range, which
+ is wide enough to enjoy the music but narrow enough to get
+ around the road noise. The -5 dB output gain is needed to
+ avoid clipping (the number is inexact, and was derived by
+ experimentation). The 0 for the initial volume will work
+ fine for a clip that starts with a bit of silence, and the
+ delay of 0.2 has the effect of causing the compander to
+ react a bit more quickly to sudden volume changes.
+
Other effects ( copy, rate, avg, stat, vibro, lowp, highp,
band, reverb )
- The other effects are simply to use. However, an "easy to
+ The other effects are simple to use. However, an "easy to
use manual" should be given here.
More effects ( to do ! )
@@ -503,15 +451,15 @@
There are a lot of effects around like noise gates, com�
pressors, waw-waw, stereo effects and so on. They should
be implemented making SOX to be more useful in sound mix�
- ing technics coming together with a great varity of dif�
- ferent sound effects.
+ ing techniques coming together with a great variety of
+ different sound effects.
- Combining effects be using then in parallel or sequel on
+ Combining effects by using them in parallel or sequence on
different channels needs some easy mechanism which is
real-time stable.
Really missing, is the changing of the parameters, start�
- ing and stoping of effects while playing samples in real-
+ ing and stopping of effects while playing samples in real-
time!
Good luck and have fun with all the effects!
@@ -519,76 +467,9 @@
Juergen Mueller (jmueller@uia.ua.ac.be)
-
-
-
-
- December 10, 1999 8
-
-
-
-
-
-SoX(1) SoX(1)
-
-
SEE ALSO
sox(1), play(1), rec(1)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- December 10, 1999 9
-
-
+ December 10, 1999 SoX(1)
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -69,13 +69,16 @@
PLAY_0 =
PLAY_1 = play
-all: sox mix $(PLAY_$(PLAY_SUPPORT))
+all: sox soxmix $(PLAY_$(PLAY_SUPPORT))
sox: libst.a sox.o
$(CC) $(LDFLAGS) -o sox sox.o $(LIBS)
-mix: libst.a mix.o
- $(CC) $(LDFLAGS) -o mix mix.o $(LIBS)
+soxmix.o:
+ $(CC) $(CFLAGS) -DSOXMIX -c -o soxmix.o sox.c
+
+soxmix: libst.a soxmix.o
+ $(CC) $(LDFLAGS) -o soxmix soxmix.o $(LIBS)
play: play.in
$(SED) -e 's|@PREFIX@|$(BINDIR)|g' < $(srcdir)/play.in > play
--- a/src/au.c
+++ b/src/au.c
@@ -166,7 +166,7 @@
}
else
{
- st_fail_errno(ft,ST_EHDR,"Sun/NeXT/DEC header doesn't start with magic word\nTry the '.ul' file type with '-t ul -r 8000 filename'");
+ st_fail_errno(ft,ST_EHDR,"Did not detect valid Sun/NeXT/DEC magic number in header.");
return(ST_EOF);
}
--- a/src/auto.c
+++ b/src/auto.c
@@ -17,74 +17,110 @@
#include "st.h"
#include <string.h>
+#if defined(DOS) || defined(WIN32)
+#define LASTCHAR '\\'
+#else
+#define LASTCHAR '/'
+#endif
+
int st_autostartread(ft)
ft_t ft;
{
- char *type;
- char header[132];
- int rc;
- if (!ft->seekable)
+ char *type;
+ char header[132];
+ int rc;
+
+ type = 0;
+
+ /* Attempt to auto-detect filetype using magic values. Abort loop
+ * and use file extension if any errors are detected.
+ */
+ if (ft->seekable)
+ {
+ if (fread(header, 1, sizeof(header), ft->fp) == sizeof(header))
{
- st_fail_errno(ft,ST_EOF,"Type AUTO input must be a file, not a pipe");
- return(ST_EOF);
- }
- if (fread(header, 1, sizeof(header), ft->fp) != sizeof(header))
- {
- st_fail_errno(ft,ST_EOF,"Type AUTO detects short file");
- return(ST_EOF);
- }
- fseek(ft->fp, 0L - sizeof header, 1); /* Seek back */
- type = 0;
- if ((strncmp(header, ".snd", 4) == 0) ||
- (strncmp(header, "dns.", 4) == 0) ||
- ((header[0] == '\0') && (strncmp(header+1, "ds.", 3) == 0))) {
+
+ fseek(ft->fp, 0L - sizeof header, 1); /* Seek back */
+ type = 0;
+ if ((strncmp(header, ".snd", 4) == 0) ||
+ (strncmp(header, "dns.", 4) == 0) ||
+ ((header[0] == '\0') && (strncmp(header+1, "ds.", 3) == 0)))
+ {
type = "au";
- }
- else if (strncmp(header, "FORM", 4) == 0) {
+ }
+ else if (strncmp(header, "FORM", 4) == 0)
+ {
if (strncmp(header + 8, "AIFF", 4) == 0)
- type = "aiff";
+ type = "aiff";
else if (strncmp(header + 8, "8SVX", 4) == 0)
- type = "8svx";
+ type = "8svx";
else if (strncmp(header + 8, "MAUD", 4) == 0)
- type = "maud";
- }
- else if (strncmp(header, "RIFF", 4) == 0 &&
- strncmp(header + 8, "WAVE", 4) == 0) {
+ type = "maud";
+ }
+ else if (strncmp(header, "RIFF", 4) == 0 &&
+ strncmp(header + 8, "WAVE", 4) == 0)
+ {
type = "wav";
- }
- else if (strncmp(header, "Creative Voice File", 19) == 0) {
+ }
+ else if (strncmp(header, "Creative Voice File", 19) == 0)
+ {
type = "voc";
- }
- else if (strncmp(header+65, "FSSD", 4) == 0 &&
- strncmp(header+128, "HCOM", 4) == 0) {
+ }
+ else if (strncmp(header+65, "FSSD", 4) == 0 &&
+ strncmp(header+128, "HCOM", 4) == 0)
+ {
type = "hcom";
- }
- else if (strncmp(header, "SOUND", 5) == 0) {
+ }
+ else if (strncmp(header, "SOUND", 5) == 0)
+ {
type = "sndt";
- }
- else if (strncmp(header, "2BIT", 4) == 0) {
+ }
+ else if (strncmp(header, "2BIT", 4) == 0)
+ {
type = "avr";
+ }
+ else if (strncmp(header, "NIST_1A", 4) == 0)
+ {
+ type = "sph";
+ }
}
- else if (strncmp(header, "NIST_1A", 4)) {
- type = "sph";
- }
+ }
- if (type == 0) {
- st_warn("Type AUTO doesn't recognize this header\n");
- st_warn("Trying: -t raw -r 44100 -s -w\n\n");
- type = "raw";
- ft->info.rate = 44100;
- ft->info.size = ST_SIZE_WORD;
- ft->info.encoding = ST_ENCODING_SIGN2;
- }
- st_report("Type AUTO changed to %s", type);
+ if (type == 0)
+ {
+ /* Use filename extension to determine audio type. */
+
+ /* First, chop off any path portions of filename. This
+ * prevents the next search from considering that part. */
+ if ((type = strrchr(ft->filename, LASTCHAR)) == NULL)
+ type = ft->filename;
+
+ /* Now look for an filename extension */
+ if ((type = strrchr(type, '.')) != NULL)
+ type++;
+ else
+ type = NULL;
+ }
+
+ if (type == 0)
+ {
+ st_warn("Could not detect type. Assuming signed 16-bit data using rate of 44100.\n");
+ type = "raw";
+ ft->info.rate = 44100;
+ ft->info.size = ST_SIZE_WORD;
+ ft->info.encoding = ST_ENCODING_SIGN2;
+ }
ft->filetype = type;
rc = st_gettype(ft); /* Change ft->h to the new format */
- if(rc)
- return (rc);
- (* ft->h->startread)(ft);
- return(ST_SUCCESS);
-}
+ if(rc != ST_SUCCESS)
+ {
+ st_fail_errno(ft,ST_EFMT,"Do not understand format type: %s\n",type);
+ return (rc);
+ }
+
+ st_report("Detected file format type: %s\n", type);
+ return ((* ft->h->startread)(ft));
+ }
int st_autostartwrite(ft)
ft_t ft;
--- a/src/sox.c
+++ b/src/sox.c
@@ -84,8 +84,14 @@
static int flow_effect(int);
static int drain_effect(int);
+#ifdef SOXMIX
+#define MAX_INPUT_FILES 2
+#define REQUIRED_INPUT_FILES 2
+#else
#define MAX_INPUT_FILES 1
#define REQUIRED_INPUT_FILES 1
+#endif
+
static ft_t informat[MAX_INPUT_FILES] = { 0 };
static int input_count = 0;
@@ -225,7 +231,8 @@
}
/* Make sure we got at least the required # of input filename */
- if (!informat[REQUIRED_INPUT_FILES-1] ||
+ if (input_count < REQUIRED_INPUT_FILES ||
+ !informat[REQUIRED_INPUT_FILES-1] ||
!informat[REQUIRED_INPUT_FILES-1]->filename)
usage("Not enough input files not specified");
@@ -283,20 +290,12 @@
{
informat[input_count] = ft;
- /* If filetype has not been set by command line options then
- * attempt to get it from filename extension.
- */
- if (!ft->filetype)
- {
- if ((ft->filetype = strrchr(ft->filename, '.')) != NULL)
- ft->filetype++;
- else /* Default to "auto" */
- ft->filetype = "auto";
- }
+ /* Let auto effect do the work if user is not overriding. */
+ if (!ft->filetype)
+ ft->filetype = "auto";
- /* See if we understand this type of file */
- if( st_gettype(ft) )
- st_fail("Unknown input file format for '%s'. Use -t option to override",ft->filename);
+ if ( st_gettype(ft) )
+ st_fail("Unknown input file format for '%s'. Use -t option to override",ft->filename);
/* Default the input comment to the filename if not set from
* command line.
@@ -327,15 +326,29 @@
#endif
}
+#if defined(DOS) || defined(WIN32)
+#define LASTCHAR '\\'
+#else
+#define LASTCHAR '/'
+#endif
+
static void copy_output(ft_t ft)
{
outformat = ft;
if (writing && !ft->filetype) {
- if ((ft->filetype = strrchr(ft->filename, '.')) != NULL)
- ft->filetype++;
- else
- ft->filetype = ft->filename;
+ /* Use filename extension to determine audio type. */
+
+ /* First, chop off any path portions of filename. This
+ * prevents the next search from considering that part. */
+ if ((ft->filetype = strrchr(ft->filename, LASTCHAR)) == NULL)
+ ft->filetype = ft->filename;
+
+ /* Now look for an filename extension */
+ if ((ft->filetype = strrchr(ft->filetype, '.')) != NULL)
+ ft->filetype++;
+ else
+ ft->filetype = NULL;
}
if ( st_gettype(ft) )
@@ -506,6 +519,20 @@
}
}
+static int compare_input(ft_t ft1, ft_t ft2)
+{
+ if (ft1->info.rate != ft2->info.rate)
+ return ST_EOF;
+ if (ft1->info.size != ft2->info.size)
+ return ST_EOF;
+ if (ft1->info.encoding != ft2->info.encoding)
+ return ST_EOF;
+ if (ft1->info.channels != ft2->info.channels)
+ return ST_EOF;
+
+ return ST_SUCCESS;
+}
+
/*
* Process input file -> effect table -> output file
* one buffer at a time
@@ -513,37 +540,60 @@
static void process(void) {
int e, f, flowstatus;
-
-
- /* Read and write starters can change their formats. */
- if ((* informat[0]->h->startread)(informat[0]) == ST_EOF)
+#ifdef SOXMIX
+ int s;
+ ULONG ilen[MAX_INPUT_FILES];
+ LONG *ibuf[MAX_INPUT_FILES];
+#endif
+
+ for (f = 0; f < input_count; f++)
{
- st_fail(informat[0]->st_errstr);
- }
+ /* Read and write starters can change their formats. */
+ if ((* informat[f]->h->startread)(informat[f]) != ST_SUCCESS)
+ {
+ st_fail("Failed reading %s: %s",informat[f]->filename,
+ informat[f]->st_errstr);
+ }
- /* Go a head and assume 1 channel audio if nothing is detected.
- * This is because libst usually doesn't set this for mono file
- * formats (for historical reasons).
- */
- if (informat[0]->info.channels == -1)
- informat[0]->info.channels = 1;
+ /* Go a head and assume 1 channel audio if nothing is detected.
+ * This is because libst usually doesn't set this for mono file
+ * formats (for historical reasons).
+ */
+ if (informat[f]->info.channels == -1)
+ informat[f]->info.channels = 1;
- if ( st_checkformat(informat[0]) )
- st_fail("bad input format");
-
-
- st_report("Input file: using sample rate %lu\n\tsize %s, encoding %s, %d %s",
- informat[0]->info.rate, st_sizes_str[informat[0]->info.size],
- st_encodings_str[informat[0]->info.encoding], informat[0]->info.channels,
- (informat[0]->info.channels > 1) ? "channels" : "channel");
+ if ( st_checkformat(informat[f]) )
+ st_fail("bad input format for file %s",informat[f]->filename);
- if (informat[0]->comment)
- st_report("Input file: comment \"%s\"\n", informat[0]->comment);
+ st_report("Input file %s: using sample rate %lu\n\tsize %s, encoding %s, %d %s",
+ informat[f]->filename, informat[f]->info.rate,
+ st_sizes_str[informat[f]->info.size],
+ st_encodings_str[informat[f]->info.encoding],
+ informat[f]->info.channels,
+ (informat[f]->info.channels > 1) ? "channels" : "channel");
+ if (informat[f]->comment)
+ st_report("Input file %s: comment \"%s\"\n",
+ informat[f]->filename, informat[f]->comment);
+ }
+
+#ifdef SOXMIX
+ for (f = 1; f < input_count; f++)
+ {
+ if (compare_input(informat[0], informat[f]) != ST_SUCCESS)
+ {
+ st_fail("Input files must have the same rate, channels, data size, and encoding");
+ }
+ }
+#endif
+
if (writing)
{
open_output(outformat);
+ /* Always use first input file as a reference for output
+ * file format.
+ */
st_copyformat(informat[0], outformat);
if ((*outformat->h->startwrite)(outformat) == ST_EOF)
@@ -554,9 +604,11 @@
if (st_checkformat(outformat))
st_fail("bad output format");
- st_report("Output file: using sample rate %lu\n\tsize %s, encoding %s, %d %s",
- outformat->info.rate, st_sizes_str[outformat->info.size],
- st_encodings_str[outformat->info.encoding], outformat->info.channels,
+ st_report("Output file %s: using sample rate %lu\n\tsize %s, encoding %s, %d %s",
+ outformat->filename, outformat->info.rate,
+ st_sizes_str[outformat->info.size],
+ st_encodings_str[outformat->info.encoding],
+ outformat->info.channels,
(outformat->info.channels > 1) ? "channels" : "channel");
if (outformat->comment)
@@ -591,18 +643,76 @@
}
}
+#ifdef SOXMIX
+ for (f = 0; f < MAX_INPUT_FILES; f++)
+ {
+ ibuf[f] = (LONG *)malloc(BUFSIZ * sizeof(LONG));
+ if (!ibuf[f])
+ {
+ st_fail("could not allocate memory");
+ }
+ }
+#endif
/*
* Just like errno, we must set st_errno to known values before
* calling I/O operations.
*/
- informat[0]->st_errno = 0;
+ for (f = 0; f < input_count; f++)
+ informat[f]->st_errno = 0;
outformat->st_errno = 0;
/* Run input data through effects and get more until olen == 0 */
do {
+
+#ifndef SOXMIX
efftab[0].olen = (*informat[0]->h->read)(informat[0],
efftab[0].obuf, (LONG) BUFSIZ);
+
+ if (informat[0]->st_errno)
+ {
+ st_warn("Error reading from %s: %s", informat[0]->filename,
+ informat[0]->st_errstr);
+ break;
+ }
+#else
+ for (f = 0; f < input_count; f++)
+ {
+ ilen[f] = (*informat[f]->h->read)(informat[f],
+ ibuf[f], (LONG)BUFSIZ);
+ if (informat[f]->st_errno)
+ {
+ st_warn("Error reading from %s: %s", informat[f]->filename,
+ informat[0]->st_errstr);
+ break;
+ }
+ }
+ if (f < input_count && informat[f]->st_errno)
+ break;
+
+ efftab[0].olen = 0;
+ for (f = 0; f < input_count; f++)
+ if (ilen[f] > efftab[0].olen)
+ efftab[0].olen = ilen[f];
+
+ for (s = 0; s < efftab[0].olen; s++)
+ {
+ /* Mix data together by dividing by the number
+ * of audio files and then summing up. This prevents
+ * overflows.
+ */
+ for (f = 0; f < input_count; f++)
+ {
+ if (f == 0)
+ efftab[0].obuf[s] =
+ (s<ilen[f]) ? (ibuf[f][s]/input_count) : 0;
+ else
+ if (s < ilen[f])
+ efftab[0].obuf[s] += ibuf[f][s]/input_count;
+ }
+ }
+#endif
+
efftab[0].odone = 0;
if (efftab[0].olen == 0)
@@ -620,14 +730,11 @@
flowstatus = flow_effect_out();
/* Negative flowstatus says no more output will ever be generated. */
- if (flowstatus < 0)
+ if (flowstatus < 0 || outformat->st_errno)
break;
} while (1); /* break; efftab[0].olen == 0 */
- if (informat[0]->st_errno)
- st_fail(informat[0]->st_errstr);
-
/* Drain the effects out first to last,
* pushing residue through subsequent effects */
/* oh, what a tangled web we weave */
@@ -659,14 +766,23 @@
(* efftabR[e].h->stop)(&efftabR[e]);
}
- if ((* informat[0]->h->stopread)(informat[0]) == ST_EOF)
- st_fail(informat[0]->st_errstr);
- fclose(informat[0]->fp);
+ for (f = 0; f < input_count; f++)
+ {
+ /* If problems closing input file, just warn user since
+ * we are exiting anyways.
+ */
+ if ((* informat[f]->h->stopread)(informat[f]) == ST_EOF)
+ st_warn(informat[f]->st_errstr);
+ fclose(informat[f]->fp);
+ }
if (writing)
{
+ /* problem closing output file, just warn user since we
+ * are exiting anyways.
+ */
if ((* outformat->h->stopwrite)(outformat) == ST_EOF)
- st_fail(outformat->st_errstr);
+ st_warn(outformat->st_errstr);
}
if (writing)
fclose(outformat->fp);
@@ -696,7 +812,10 @@
}
if (outformat->st_errno)
- st_fail(outformat->st_errstr);
+ {
+ st_warn("Error writing: %s",outformat->st_errstr);
+ break;
+ }
/* If any effect will never again produce data, give up. This
* works because of the pull status: the effect won't be able to