ref: af42ae773c1c656e618ce61b8fe8960c7a628f81
parent: a83636279984823b1d7e04cff5bfaa7588655f66
author: cbagwell <cbagwell>
date: Fri Jul 30 22:01:49 EDT 2004
Updated avg effect to mirror older pick effect. Added new repeat effect. Added new Dialogic/OKI ADPCM file handler.
--- a/Changelog
+++ b/Changelog
@@ -24,6 +24,8 @@
buffer overflows.
o Added patch from Redhat to allow resample to work on certain 64-bit
machines.
+ o Tony Seebregts added a file handler for headerless Dialogic/OKI ADPCM
+ files (VOX files).
sox-12.17.4
-----------
--- a/Makefile.dos
+++ b/Makefile.dos
@@ -17,13 +17,13 @@
g723_24.obj g723_40.obj g72x.obj gsm.obj \
hcom.obj ima_rw.obj maud.obj mp3.obj nulfile.obj oss.obj prc.obj \
raw.obj sf.obj smp.obj sndrtool.obj sphere.obj sunaudio.obj \
- tx16w.obj voc.obj vorbis.obj wav.obj wve.obj
+ tx16w.obj voc.obj vorbis.obj vox.obj wav.obj wve.obj
EOBJ = avg.obj band.obj bandpass.obj breject.obj btrworth.obj chorus.obj \
compand.obj copy.obj dcshift.obj deemphas.obj earwax.o \
echo.obj echos.obj fade.obj filter.obj flanger.obj highp.obj \
highpass.obj lowp.obj lowpass.obj map.obj mask.obj phaser.obj \
- pitch.obj pan.obj polyphase.obj rate.obj resample.obj \
+ pitch.obj pan.obj polyphase.obj rate.obj repeat.obj resample.obj \
reverb.obj reverse.obj silence.obj speed.obj stat.obj \
stretch.obj swap.obj synth.obj trim.obj vibro.obj vol.obj
--- a/Makefile.gcc
+++ b/Makefile.gcc
@@ -23,12 +23,12 @@
g711.o g721.o g723_16.o g723_24.o g723_40.o g72x.o gsm.o hcom.o \
ima_rw.o maud.o mp3.o nulfile.o oss.o prc.o raw.o sf.o smp.o \
sndrtool.o sphere.o sunaudio.o tx16w.o voc.o vorbis.o \
- wav.o wve.o
+ vox.o wav.o wve.o
EOBJ = avg.o band.o bandpass.o breject.o btrworth.o chorus.o compand.o \
copy.o dcshift.o deemphas.o earwax.o echo.o echos.o fade.o \
filter.o flanger.o highp.o highpass.o lowp.o lowpass.o map.o \
- mask.o pan.o phaser.o pitch.o polyphas.o rate.o \
+ mask.o pan.o phaser.o pitch.o polyphas.o rate.o repeat.o \
resample.o reverb.o reverse.o silence.o speed.o \
stat.o stretch.o swap.o synth.o trim.o vibro.o vol.o
--- a/README
+++ b/README
@@ -62,6 +62,7 @@
o Pan sound between channels
o Apply a phaser effect
o Change the pitch of a sound file without effecting its speed
+ o Repeat audio data
o Change sampling rates using several different algorithms. A
'resample' and 'polyphase' effect use high-grade signal rate
changes using real signal theory!
--- a/libst.txt
+++ b/libst.txt
@@ -1,179 +1,151 @@
-ST(3) ST(3)
+ST(3) ST(3)
NAME
- libst - Sound Tools : sound sample file and effects
- libraries.
+ libst - Sound Tools : sound sample file and effects libraries.
SYNOPSIS
cc file.c -o file libst.a
DESCRIPTION
- Sound Tools is a library of sound sample file format read�
- ers/writers and sound effects processors.
+ Sound Tools is a library of sound sample file format readers/writers
+ and sound effects processors.
- Sound Tools includes skeleton C files to assist you in
- writing new formats and effects. The full skeleton
- driver, skel.c, helps you write drivers for a new format
- which has data structures. The simple skeleton drivers
- help you write a new driver for raw (headerless) formats,
- or for formats which just have a simple header followed by
- raw data.
+ Sound Tools includes skeleton C files to assist you in writing new for-
+ mats and effects. The full skeleton driver, skel.c, helps you write
+ drivers for a new format which has data structures. The simple skele-
+ ton drivers help you write a new driver for raw (headerless) formats,
+ or for formats which just have a simple header followed by raw data.
- Most sound sample formats are fairly simple: they are just
- a string of bytes or words and are presumed to be sampled
- at a known data rate. Most of them have a short data
- structure at the beginning of the file.
+ Most sound sample formats are fairly simple: they are just a string of
+ bytes or words and are presumed to be sampled at a known data rate.
+ Most of them have a short data structure at the beginning of the file.
INTERNALS
- The Sound Tools formats and effects operate on an internal
- buffer format of signed 32-bit longs. The data processing
- routines are called with buffers of these samples, and
- buffer sizes which refer to the number of samples pro�
- cessed, not the number of bytes. File readers translate
- the input samples to signed longs and return the number of
- longs read. For example, data in linear signed byte for�
- mat is left-shifted 24 bits.
+ The Sound Tools formats and effects operate on an internal buffer for-
+ mat of signed 32-bit longs. The data processing routines are called
+ with buffers of these samples, and buffer sizes which refer to the num-
+ ber of samples processed, not the number of bytes. File readers trans-
+ late the input samples to signed longs and return the number of longs
+ read. For example, data in linear signed byte format is left-shifted
+ 24 bits.
- This does cause problems in processing the data. For
- example:
+ This does cause problems in processing the data. For example:
*obuf++ = (*ibuf++ + *ibuf++)/2;
- would not mix down left and right channels into one mono�
- phonic channel, because the resulting samples would over�
- flow 32 bits. Instead, the ``avg'' effects must use:
+ would not mix down left and right channels into one monophonic channel,
+ because the resulting samples would overflow 32 bits. Instead, the
+ ‘‘avg’’ effects must use:
*obuf++ = *ibuf++/2 + *ibuf++/2;
- Stereo data is stored with the left and right speaker data
- in successive samples. Quadraphonic data is stored in
- this order: left front, right front, left rear, right
- rear.
+ Stereo data is stored with the left and right speaker data in succes-
+ sive samples. Quadraphonic data is stored in this order: left front,
+ right front, left rear, right rear.
FORMATS
- A format is responsible for translating between sound sam�
- ple files and an internal buffer. The internal buffer is
- store in signed longs with a fixed sampling rate. The
- format operates from two data structures: a format struc�
- ture, and a private structure.
+ A format is responsible for translating between sound sample files and
+ an internal buffer. The internal buffer is store in signed longs with
+ a fixed sampling rate. The format operates from two data structures: a
+ format structure, and a private structure.
- The format structure contains a list of control parameters
- for the sample: sampling rate, data size (bytes, words,
- floats, etc.), encoding (unsigned, signed, logarithmic),
- number of sound channels. It also contains other state
- information: whether the sample file needs to be byte-
- swapped, whether fseek() will work, its suffix, its file
- stream pointer, its format pointer, and the private struc�
- ture for the format .
+ The format structure contains a list of control parameters for the sam-
+ ple: sampling rate, data size (bytes, words, floats, etc.), encoding
+ (unsigned, signed, logarithmic), number of sound channels. It also
+ contains other state information: whether the sample file needs to be
+ byte-swapped, whether fseek() will work, its suffix, its file stream
+ pointer, its format pointer, and the private structure for the format .
- The private area is just a preallocated data array for the
- format to use however it wishes. It should have a defined
- data structure and cast the array to that structure. See
- voc.c for the use of a private data area. Voc.c has to
- track the number of samples it writes and when finishing,
- seek back to the beginning of the file and write it out.
- The private area is not very large. The ``echo'' effect
- has to malloc() a much larger area for its delay line
- buffers.
+ The private area is just a preallocated data array for the format to
+ use however it wishes. It should have a defined data structure and
+ cast the array to that structure. See voc.c for the use of a private
+ data area. Voc.c has to track the number of samples it writes and when
+ finishing, seek back to the beginning of the file and write it out.
+ The private area is not very large. The ‘‘echo’’ effect has to mal-
+ loc() a much larger area for its delay line buffers.
A format has 6 routines:
- startread Set up the format parameters, or read
- in a data header, or do what needs to
- be done.
+ startread Set up the format parameters, or read in a data
+ header, or do what needs to be done.
- read Given a buffer and a length: read up
- to that many samples, transform them
- into signed long integers, and copy
- them into the buffer. Return the num�
- ber of samples actually read.
+ read Given a buffer and a length: read up to that many
+ samples, transform them into signed long integers,
+ and copy them into the buffer. Return the number
+ of samples actually read.
stopread Do what needs to be done.
- startwrite Set up the format parameters, or write
- out a data header, or do what needs to
- be done.
+ startwrite Set up the format parameters, or write out a data
+ header, or do what needs to be done.
- write Given a buffer and a length: copy that
- many samples out of the buffer, con�
- vert them from signed longs to the
- appropriate data, and write them to
- the file. If it can't write out all
- the samples, fail.
+ write Given a buffer and a length: copy that many samples
+ out of the buffer, convert them from signed longs
+ to the appropriate data, and write them to the
+ file. If it can’t write out all the samples, fail.
- stopwrite Fix up any file header, or do what
- needs to be done.
+ stopwrite Fix up any file header, or do what needs to be
+ done.
EFFECTS
- An effects loop has one input and one output stream. It
- has 5 routines.
+ An effects loop has one input and one output stream. It has 5 rou-
+ tines.
- getopts is called with a character string
- argument list for the effect.
+ getopts is called with a character string argument list for
+ the effect.
- start is called with the signal parameters
- for the input and output streams.
+ start is called with the signal parameters for the input
+ and output streams.
- flow is called with input and output data
- buffers, and (by reference) the input
- and output data buffer sizes. It pro�
- cesses the input buffer into the out�
- put buffer, and sets the size vari�
- ables to the numbers of samples actu�
- ally processed. It is under no obli�
- gation to read from the input buffer
- or write to the output buffer during
- the same call. If the call returns
- ST_EOF then this should be used as an
- indication that this effect will no
- longer read any data and can be used
- to switch to drain mode sooner.
+ flow is called with input and output data buffers, and
+ (by reference) the input and output data buffer
+ sizes. It processes the input buffer into the out-
+ put buffer, and sets the size variables to the num-
+ bers of samples actually processed. It is under no
+ obligation to read from the input buffer or write
+ to the output buffer during the same call. If the
+ call returns ST_EOF then this should be used as an
+ indication that this effect will no longer read any
+ data and can be used to switch to drain mode
+ sooner.
- drain is called after there are no more
- input data samples. If the effect
- wishes to generate more data samples
- it copies the generated data into a
- given buffer and returns the number of
- samples generated. If it fills the
- buffer, it will be called again, etc.
- The echo effect uses this to fade
- away.
+ drain is called after there are no more input data sam-
+ ples. If the effect wishes to generate more data
+ samples it copies the generated data into a given
+ buffer and returns the number of samples generated.
+ If it fills the buffer, it will be called again,
+ etc. The echo effect uses this to fade away.
- stop is called when there are no more input
- samples to process. stop may generate
- output samples on its own. See echo.c
- for how to do this, and see that what
- it does is absolutely bogus.
+ stop is called when there are no more input samples to
+ process. stop may generate output samples on its
+ own. See echo.c for how to do this, and see that
+ what it does is absolutely bogus.
COMMENTS
- Theoretically, formats can be used to manipulate several
- files inside one program. Multi-sample files, for example
- the download for a sampling keyboard, can be handled
- cleanly with this feature.
+ Theoretically, formats can be used to manipulate several files inside
+ one program. Multi-sample files, for example the download for a sam-
+ pling keyboard, can be handled cleanly with this feature.
PORTABILITY PROBLEMS
- Many computers don't supply arithmetic shifting, so do
- multiplies and divides instead of << and >>. The compiler
- will do the right thing if the CPU supplies arithmetic
- shifting.
+ Many computers don’t supply arithmetic shifting, so do multiplies and
+ divides instead of << and >>. The compiler will do the right thing if
+ the CPU supplies arithmetic shifting.
- Do all arithmetic conversions one stage at a time. I've
- had too many problems with "obviously clean" combinations.
+ Do all arithmetic conversions one stage at a time. I’ve had too many
+ problems with "obviously clean" combinations.
- In general, don't worry about "efficiency". The sox.c
- base translator is disk-bound on any machine (other than a
- 8088 PC with an SMD disk controller). Just comment your
- code and make sure it's clean and simple. You'll find
- that DSP code is extremely painful to write as it is.
+ In general, don’t worry about "efficiency". The sox.c base translator
+ is disk-bound on any machine (other than a 8088 PC with an SMD disk
+ controller). Just comment your code and make sure it’s clean and sim-
+ ple. You’ll find that DSP code is extremely painful to write as it is.
BUGS
- The HCOM format is not re-entrant; it can only be used
- once in a program.
+ The HCOM format is not re-entrant; it can only be used once in a pro-
+ gram.
- The program/library interface is pretty weak. There's too
- much ad-hoc information which a program is supposed to
- gather up. Sound Tools wants to be an object-oriented
- dataflow architecture.
+ The program/library interface is pretty weak. There’s too much ad-hoc
+ information which a program is supposed to gather up. Sound Tools
+ wants to be an object-oriented dataflow architecture.
- October 15 1996 ST(3)
+ October 15 1996 ST(3)
--- a/sox.1
+++ b/sox.1
@@ -45,7 +45,7 @@
.P
.B Effects:
.br
- \fBavg\fR [ -l | -r | -f | -b | n,n,...,n ]
+ \fBavg\fR [ -l | -r | -f | -b | -1 | -2 | -3 | -4 | n,n,...,n ]
.br
\fBband\fR [ -n ] \fIcenter\fR [ \fIwidth\fR ]
.br
@@ -98,7 +98,7 @@
.br
\fBphaser\fR \fIgain-in gain-out delay decay speed\fR < -s | -t >
.br
- \fBpick\fR [ \fI-1\fR | \fI-2\fR | \fI-3\fR | \fI-4\fR | \fI-l\fR | \fI-r\fR ]
+ \fBpick\fR [ \fI-1\fR | \fI-2\fR | \fI-3\fR | \fI-4\fR | \fI-l\fR | \fI-r\fR | \fI-f\fR | \fI-b\fR ]
.br
\fBpitch\fR \fIshift\fR [ \fIwidth interpole fade\fR ]
.br
@@ -108,6 +108,8 @@
.br
\fBrate\fR
.br
+ \fBrepeat\fR \fIcount\fR
+.br
\fBresample\fR [ -qs | -q | -ql ] [ \fIrolloff\fR [ \fIbeta\fR ] ]
.br
\fBreverb\fR \fIgain-out reverb-time delay\fR [ \fIdelay\fR ... ]
@@ -569,6 +571,10 @@
.B .ogg
format.
.TP 10
+.B vox
+A headerless file of Dialogic/OKI ADPCM audio data commonly comes with the
+extension .vox. This ADPCM data has 12-bit precision packed into only 4-bits.
+.TP 10
.B .wav
Microsoft .WAV RIFF files.
.br
@@ -629,18 +635,18 @@
Multiple effects may be applied to the audio data by specifying them
one after another at the end of the command line.
.TP 10
-avg [ \fI-l\fR | \fI-r\fR | \fI-f\fR | \fI-b\fR | \fIn,n,...,n\fR ]
+avg [ \fI-l\fR | \fI-r\fR | \fI-f\fR | \fI-b\fR | \fI-1\fR | \fI-2\fR | \fI-3\fR | \fI-4\fR | \fIn,n,...,n\fR ]
Reduce the number of channels by averaging the samples,
or duplicate channels to increase the number of channels.
This effect is automatically used when the number of input
channels differ from the number of output channels. When reducing
the number of channels it is possible to manually specify the
-avg effect and use the \fI-l\fR, \fI-r\fR, \fI-f\fR, or \fI-b\fR
-options to select only
-the left, right, front, or back channel(s) for the output instead of
-averaging the channels.
-The \fI-f\fR and \fI-b\fR options maintain left/right stereo
-separation; use the avg effect twice to select a single channel.
+avg effect and use the \fI-l\fR, \fI-r\fR, \fI-f\fR, \fI-b\fR,
+\fI-1\fR, \fI-2\fR, \fI-3\fR, \fI-4\fR, options to select only
+the left, right, front, back channel(s) or specific channel
+for the output instead of averaging the channels.
+The \fI-l\fR, \fI-r\fR, \fI-f\fR, and \fI-b\fR options will do averaging
+in quad-channel files so select the exact channel to prevent this.
The avg effect can also be invoked with up to 16 double-precision
numbers, which specify the proportion of each input channel that is
@@ -891,12 +897,8 @@
(-t). The decay should be less than 0.5 to avoid
feedback. Gain-out is the volume of the output.
.TP 10
-pick [ \fI-1\fR | \fI-2\fR | \fI-3\fR | \fI-4\fR | \fI-l\fR | \fI-r\fR ]
-Select the left or right channel of a stereo sample,
-or one of four channels in a quadraphonic sample. The \fI-l\fR and \fI-r\fR
-options represent either the left or right channel. It is required that
-you use the \fB-c 1\fR command line option in order to force the output file to
-contain only 1 channel.
+pick [ \fI-1\fR | \fI-2\fR | \fI-3\fR | \fI-4\fR | \fI-l\fR | \fI-r\fR | \fI-f\fR | \fI-b\fR ]
+Pick a subset of channels to be copied into the output file. This effect is just an alias of the "avg" effect but is left here for historical reasons.
.TP 10
pitch \fIshift [ width interpole fade ]\fB
Change the pitch of file without affecting its duration by cross-fading
@@ -971,6 +973,9 @@
If you are wondering which rate changing effects to use, you will want to read a
detailed analysis of all of them at http://eakaw2.et.tu-dresden.de/~wilde/resample/resample.html
.TP 10
+repeat \fIcount\fR
+Repeats the audio data \fIcount\fR times. Requires disk space to store the data to be repeated.
+.TP 10
resample [ \fI-qs\fB | \fI-q\fB | \fI-ql\fB ] [ \fIrolloff\fB [ \fIbeta\fB ] ]\fR
Translate input sampling rate to output sampling rate
via simulated analog filtration.
@@ -1035,7 +1040,7 @@
is, with closer being better. When increasing the sample rate of an
audio file you would not expect to have any frequencies exist that are
past the original Nyquist frequency. Because of resampling properties, it
-is common to have alaising data created that is above the old
+is common to have aliasing data created that is above the old
Nyquist frequency. In that case the \fIrolloff\fR refers to how close
to the original Nyquist frequency to use a highpass filter to remove
this false data, with closer also being better.
--- a/sox.txt
+++ b/sox.txt
@@ -1,4 +1,4 @@
-SoX(1) SoX(1)
+SoX(1) SoX(1)
@@ -24,13 +24,12 @@
[ -h ] [ -p ] [ -v volume ] [ -V ]
Format options:
- [ -t filetype ] [ -r rate ] [ -s/-u/-U/-A/-a/-i/-g/-f
- ]
- [ -b/-w/-l ]
+ [ -t filetype ] [ -r rate ] [ -s/-u/-U/-A/-a/-i/-g/-f ]
+ [ -b/-w/-l/-d ]
[ -c channels ] [ -x ] [ -e ]
Effects:
- avg [ -l | -r | -f | -b | n,n,...,n ]
+ avg [ -l | -r | -f | -b | -1 | -2 | -3 | -4 | n,n,...,n ]
band [ -n ] center [ width ]
bandpass frequency bandwidth
bandreject frequency bandwidth
@@ -57,12 +56,13 @@
mask
pan direction
phaser gain-in gain-out delay decay speed < -s | -t >
- pick [ -1 | -2 | -3 | -4 | -l | -r ]
+ pick [ -1 | -2 | -3 | -4 | -l | -r | -f | -b ]
pitch shift [ width interpole fade ]
polyphase [ -w < nut / ham > ]
[ -width < long / short / # > ]
[ -cutoff # ]
rate
+ repeat count
resample [ -qs | -q | -ql ] [ rolloff [ beta ] ]
reverb gain-out reverb-time delay [ delay ... ]
reverse
@@ -81,47 +81,41 @@
vol gain [ type [ limitergain ] ]
DESCRIPTION
- SoX is a command line program that can convert most popu�
- lar audio files to most other popular audio file formats.
- It can optionally change the audio sample data type and
- apply one or more sound effects to the file during this
- translation.
+ SoX is a command line program that can convert most popular audio files
+ to most other popular audio file formats. It can optionally change the
+ audio sample data type and apply one or more sound effects to the file
+ during this translation.
- soxmix is functionally the same as the command line pro�
- gram sox expect that it takes two files as input and mixes
- the audio together to produce a single file as output. It
- has a restriction that both input files must be of the
- same data type and sample rates.
+ soxmix is functionally the same as the command line program sox expect
+ that it takes two files as input and mixes the audio together to pro-
+ duce a single file as output. It has a restriction that both input
+ files must be of the same data type and sample rates.
- There are two types of audio files formats that SoX can
- work with. The first are self-describing file formats.
- These contain a header that completely describe the char�
- acteristics of the audio data that follows.
+ There are two types of audio files formats that SoX can work with. The
+ first are self-describing file formats. These contain a header that
+ completely describe the characteristics of the audio data that follows.
- The second type are header-less data, or sometimes called
- raw data. A user must pass enough information to SoX on
- the command line so that it knows what type of data it
- contains.
+ The second type are header-less data, or sometimes called raw data. A
+ user must pass enough information to SoX on the command line so that it
+ knows what type of data it contains.
- Audio data can usually be totally described by four char�
- acteristics:
+ Audio data can usually be totally described by four characteristics:
- rate The sample rate is in samples per second. For
- example, CD sample rates are at 44100.
+ rate The sample rate is in samples per second. For example, CD
+ sample rates are at 44100.
- data size The precision the data is stored in. Most popu�
- lar are 8-bit bytes or 16-bit words.
+ data size The precision the data is stored in. Most popular are 8-bit
+ bytes or 16-bit words.
data encoding
- What encoding the data type uses. Examples are
- u-law, ADPCM, or signed linear data.
+ What encoding the data type uses. Examples are u-law, ADPCM,
+ or signed linear data.
- channels How many channels are contained in the audio
- data. Mono and Stereo are the two most common.
+ channels How many channels are contained in the audio data. Mono and
+ Stereo are the two most common.
- Please refer to the soxexam(1) manual page for a long
- description with examples on how to use SoX with various
- types of file formats.
+ Please refer to the soxexam(1) manual page for a long description with
+ examples on how to use SoX with various types of file formats.
OPTIONS
The option syntax is a little grotty, but in essence:
@@ -128,504 +122,432 @@
sox File.au file.wav
- translates a sound file in SUN Sparc .AU format into a
- Microsoft .WAV file, while
+ translates a sound file in SUN Sparc .AU format into a Microsoft .WAV
+ file, while
sox -v 0.5 file.au -r 12000 file.wav mask
- does the same format translation but also lowers the
- amplitude by 1/2, changes the sampling rate to 12000
- hertz, and applies the mask sound effect to the audio
- data.
+ does the same format translation but also lowers the amplitude by 1/2,
+ changes the sampling rate to 12000 hertz, and applies the mask sound
+ effect to the audio data.
- The following will mix two sound files together to to pro�
- duce a single sound file.
+ The following will mix two sound files together to to produce a single
+ sound file.
soxmix music.wav voice.wav mixed.wav
Format options:
- Format options effect the audio samples that they immedi�
- ately precede. If they are placed before the input file
- name then they effect the input data. If they are placed
- before the output file name then they will effect the out�
- put data. By taking advantage of this, you can override a
- input file's corrupted header or produce an output file
- that is totally different style then the input file. It
- is also how SoX is informed about the format of raw input
- data.
+ Format options effect the audio samples that they immediately precede.
+ If they are placed before the input file name then they effect the
+ input data. If they are placed before the output file name then they
+ will effect the output data. By taking advantage of this, you can
+ override a input file’s corrupted header or produce an output file that
+ is totally different style then the input file. It is also how SoX is
+ informed about the format of raw input data.
-t filetype
- gives the type of the sound sample file. Useful
- when file extension is not standard or for spec�
- ifying the .auto file type.
+ gives the type of the sound sample file. Useful when file
+ extension is not standard or for specifying the .auto file
+ type.
- -r rate Gives the sample rate in Hertz of the file. To
- cause the output file to have a different sample
- rate than the input file, include this option as
- a part of the output options.
- If the input and output files have different
- rates then a sample rate change effect must be
- ran. If a sample rate changing effect is not
- specified then a default one will internally be
- ran by SoX using its default parameters.
+ -r rate Gives the sample rate in Hertz of the file. To cause the
+ output file to have a different sample rate than the input
+ file, include this option as a part of the output options.
+ If the input and output files have different rates then a
+ sample rate change effect must be ran. If a sample rate
+ changing effect is not specified then a default one will
+ internally be ran by SoX using its default parameters.
-s/-u/-U/-A/-a/-i/-g/-f
- The sample data encoding is signed linear (2's
- complement), unsigned linear, u-law (logarith�
- mic), A-law (logarithmic), ADPCM, IMA_ADPCM,
- GSM, or Floating-point.
- U-law (actually shorthand for mu-law) and A-law
- are the U.S. and international standards for
- logarithmic telephone sound compression. When
- uncompressed u-law has roughly the precision of
- 14-byte PCM audio and A-law has roughly the pre�
- cision of 13-bit PCM audio.
- A-law and u-law data is sometimes encoded using
- a reversed bit-ordering (ie. MSB becomes LSB).
- Internally, SoX understands how to work with
- this encoding but there is currently no command
- line option to specify it. If you need this
- support then you can use the psuedo file types
- of ".la" and ".lu" to inform sox of the encod�
- ing. See supported file types for more informa�
- tion.
- ADPCM is a form of sound compression that has a
- good compromise between good sound quality and
- fast encoding/decoding time. It is used for
- telephone sound compression and places were full
- fidelity is not as important. When uncompressed
- it has roughly the precision of 16-bit PCM
- audio. Popular version of ADPCM include G.726,
- MS ADPCM, and IMA ADPCM. The -a flag has dif�
- ferent meanings in different file handlers. In
- .wav files it represents MS ADPCM files, in all
- others it means G.726 ADPCM. IMA ADPCM is a
- specific form of ADPCM compression, slightly
- simpler and slightly lower fidelity than
- Microsoft's flavor of ADPCM. IMA ADPCM is also
- called DVI ADPCM.
- GSM is a standard used for telephone sound com�
- pression in European countries and its gaining
- popularity because of its quality. It usually
- is CPU intensive to work with GSM audio data.
+ The sample data encoding is signed linear (2’s complement),
+ unsigned linear, u-law (logarithmic), A-law (logarithmic),
+ ADPCM, IMA_ADPCM, GSM, or Floating-point.
+ U-law (actually shorthand for mu-law) and A-law are the U.S.
+ and international standards for logarithmic telephone sound
+ compression. When uncompressed u-law has roughly the preci-
+ sion of 14-bit PCM audio and A-law has roughly the precision
+ of 13-bit PCM audio.
+ A-law and u-law data is sometimes encoded using a reversed
+ bit-ordering (ie. MSB becomes LSB). Internally, SoX under-
+ stands how to work with this encoding but there is currently
+ no command line option to specify it. If you need this sup-
+ port then you can use the psuedo file types of ".la" and
+ ".lu" to inform sox of the encoding. See supported file
+ types for more information.
+ ADPCM is a form of sound compression that has a good compro-
+ mise between good sound quality and fast encoding/decoding
+ time. It is used for telephone sound compression and places
+ were full fidelity is not as important. When uncompressed it
+ has roughly the precision of 16-bit PCM audio. Popular ver-
+ sion of ADPCM include G.726, MS ADPCM, and IMA ADPCM. The -a
+ flag has different meanings in different file handlers. In
+ .wav files it represents MS ADPCM files, in all others it
+ means G.726 ADPCM. IMA ADPCM is a specific form of ADPCM
+ compression, slightly simpler and slightly lower fidelity
+ than Microsoft’s flavor of ADPCM. IMA ADPCM is also called
+ DVI ADPCM.
+ GSM is a standard used for telephone sound compression in
+ European countries and its gaining popularity because of its
+ quality. It usually is CPU intensive to work with GSM audio
+ data.
- -b/-w/-l The sample data size is in bytes, 16-bit words,
- or 32-bit long words.
+ -b/-w/-l/-d
+ The sample data size is in bytes, 16-bit words, 32-bit long
+ words, or 64-bit double long (long long) words.
- -x The sample data is in XINU format; that is, it
- comes from a machine with the opposite word
- order than yours and must be swapped according
- to the word-size given above. Only 16-bit and
- 32-bit integer data may be swapped. Machine-
- format floating-point data is not portable.
+ -x The sample data is in XINU format; that is, it comes from a
+ machine with the opposite word order than yours and must be
+ swapped according to the word-size given above. Only 16-bit
+ and 32-bit integer data may be swapped. Machine-format
+ floating-point data is not portable.
-c channels
- The number of sound channels in the data file.
- This may be 1, 2, or 4; for mono, stereo, or
- quad sound data. To cause the output file to
- have a different number of channels than the
- input file, include this option with the output
- file options. If the input and output file have
- a different number of channels then the avg
- effect must be used. If the avg effect is not
- specified on the command line it will be invoked
- internally with default parameters.
+ The number of sound channels in the data file. This may be
+ 1, 2, or 4; for mono, stereo, or quad sound data. To cause
+ the output file to have a different number of channels than
+ the input file, include this option with the output file
+ options. If the input and output file have a different num-
+ ber of channels then the avg effect must be used. If the avg
+ effect is not specified on the command line it will be
+ invoked internally with default parameters.
- -e When used after the input filename (so that it
- applies to the output file) it allows you to
- avoid giving an output filename and will not
- produce an output file. It will apply any spec�
- ified effects to the input file. This is mainly
- useful with the stat effect but can be used with
- others.
+ -e When used after the input filename (so that it applies to the
+ output file) it allows you to avoid giving an output filename
+ and will not produce an output file. It will apply any spec-
+ ified effects to the input file. This is mainly useful with
+ the stat effect but can be used with others.
General options:
-h Print version number and usage information.
- -p Run in preview mode and run fast. This will
- somewhat speed up SoX when the output format has
- a different number of channels and a different
- rate than the input file. Currently, this
- defaults to using the rate effect instead of the
- resample effect for sample rate changes.
+ -p Run in preview mode and run fast. This will somewhat speed
+ up SoX when the output format has a different number of chan-
+ nels and a different rate than the input file. Currently,
+ this defaults to using the rate effect instead of the resam-
+ ple effect for sample rate changes.
- -v volume Change amplitude (floating point); less than 1.0
- decreases, greater than 1.0 increases. May use
- a negative number to invert the phase of the
- audio data. It is interesting to note that we
- perceive volume logarithmically but this adjusts
+ -v volume Change amplitude (floating point); less than 1.0 decreases,
+ greater than 1.0 increases. May use a negative number to
+ invert the phase of the audio data. It is interesting to
+ note that we perceive volume logarithmically but this adjusts
the amplitude linearly.
- Note: see the stat effect for information on
- finding the maximum value that can be used with
- this option without causing audio data be be
- clipped.
+ Note: see the stat effect for information on finding the max-
+ imum value that can be used with this option without causing
+ audio data be be clipped.
- -V Print a description of processing phases. Use�
- ful for figuring out exactly how SoX is mangling
- your sound samples.
+ -V Print a description of processing phases. Useful for figur-
+ ing out exactly how SoX is mangling your sound samples.
FILE TYPES
- SoX attempts to determine the file type of input files
- automatically by looking at the header of the audio file.
- When it is unable to detect the file type or if its an
- output file then it uses the file extension of the file to
- determine what type of file format handler to use. This
- can be overridden by specifying the "-t" option on the
- command line.
+ SoX attempts to determine the file type of input files automatically by
+ looking at the header of the audio file. When it is unable to detect
+ the file type or if its an output file then it uses the file extension
+ of the file to determine what type of file format handler to use. This
+ can be overridden by specifying the "-t" option on the command line.
- The input and output files may be read from standard in
- and out. This is done by specifying '-' as the filename.
+ The input and output files may be read from standard in and out. This
+ is done by specifying ’-’ as the filename.
- File formats which have headers are checked, if that
- header doesn't seem right, the program exits with an
- appropriate message.
+ File formats which have headers are checked, if that header doesn’t
+ seem right, the program exits with an appropriate message.
The following file formats are supported:
- .8svx Amiga 8SVX musical instrument description for�
- mat.
+ .8svx Amiga 8SVX musical instrument description format.
- .aiff AIFF files used on Apple IIc/IIgs and SGI.
- Note: the AIFF format supports only one SSND
- chunk. It does not support multiple sound
- chunks, or the 8SVX musical instrument descrip�
- tion format. AIFF files are multimedia archives
- and can have multiple audio and picture chunks.
- You may need a separate archiver to work with
- them.
+ .aiff AIFF files used on Apple IIc/IIgs and SGI. Note: the AIFF
+ format supports only one SSND chunk. It does not support
+ multiple sound chunks, or the 8SVX musical instrument
+ description format. AIFF files are multimedia archives and
+ can have multiple audio and picture chunks. You may need a
+ separate archiver to work with them.
- .au SUN Microsystems AU files. There are apparently
- many types of .au files; DEC has invented its
- own with a different magic number and word
- order. The .au handler can read these files but
- will not write them. Some .au files have valid
- AU headers and some do not. The latter are
- probably original SUN u-law 8000 hz samples.
- These can be dealt with using the .ul format
- (see below).
+ .au SUN Microsystems AU files. There are apparently many types
+ of .au files; DEC has invented its own with a different magic
+ number and word order. The .au handler can read these files
+ but will not write them. Some .au files have valid AU head-
+ ers and some do not. The latter are probably original SUN u-
+ law 8000 hz samples. These can be dealt with using the .ul
+ format (see below).
.avr Audio Visual Research
- The AVR format is produced by a number of com�
- mercial packages on the Mac.
+ The AVR format is produced by a number of commercial packages
+ on the Mac.
.cdr CD-R
- CD-R files are used in mastering music on Com�
- pact Disks. The audio data on a CD-R disk is a
- raw audio file with a format of stereo 16-bit
- signed samples at a 44khz sample rate. There is
- a special blocking/padding oddity at the end of
- the audio file and is why it needs its own han�
- dler.
+ CD-R files are used in mastering music on Compact Disks. The
+ audio data on a CD-R disk is a raw audio file with a format
+ of stereo 16-bit signed samples at a 44khz sample rate.
+ There is a special blocking/padding oddity at the end of the
+ audio file and is why it needs its own handler.
.cvs Continuously Variable Slope Delta modulation
- Used to compress speech audio for applications
- such as voice mail.
+ Used to compress speech audio for applications such as voice
+ mail.
.dat Text Data files
- These files contain a textual representation of
- the sample data. There is one line at the
- beginning that contains the sample rate.
- Subsequent lines contain two numeric data items:
- the time since the beginning of the first sample
- and the sample value. Values are normalized so
- that the maximum and minimum are 1.00 and -1.00.
- This file format can be used to create data
- files for external programs such as FFT analyz�
- ers or graph routines. SoX can also convert a
- file in this format back into one of the other
- file formats.
+ These files contain a textual representation of the sample
+ data. There is one line at the beginning that contains the
+ sample rate. Subsequent lines contain two numeric data
+ items: the time since the beginning of the first sample and
+ the sample value. Values are normalized so that the maximum
+ and minimum are 1.00 and -1.00. This file format can be used
+ to create data files for external programs such as FFT ana-
+ lyzers or graph routines. SoX can also convert a file in
+ this format back into one of the other file formats.
.gsm GSM 06.10 Lossy Speech Compression
- A standard for compressing speech which is used
- in the Global Standard for Mobil telecommunica�
- tions (GSM). Its good for its purpose, shrink�
- ing audio data size, but it will introduce lots
- of noise when a given sound sample is encoded
- and decoded multiple times. This format is used
- by some voice mail applications. It is rather
- CPU intensive.
- GSM in SoX is optional and requires access to an
- external GSM library. To see if there is sup�
- port for gsm run sox -h and look for it under
- the list of supported file formats.
+ A standard for compressing speech which is used in the Global
+ Standard for Mobil telecommunications (GSM). Its good for
+ its purpose, shrinking audio data size, but it will introduce
+ lots of noise when a given sound sample is encoded and
+ decoded multiple times. This format is used by some voice
+ mail applications. It is rather CPU intensive.
+ GSM in SoX is optional and requires access to an external GSM
+ library. To see if there is support for gsm run sox -h and
+ look for it under the list of supported file formats.
- .hcom Macintosh HCOM files. These are (apparently)
- Mac FSSD files with some variant of Huffman com�
- pression. The Macintosh has wacky file formats
- and this format handler apparently doesn't han�
- dle all the ones it should. Mac users will need
- your usual arsenal of file converters to deal
- with an HCOM file under Unix or DOS.
+ .hcom Macintosh HCOM files. These are (apparently) Mac FSSD files
+ with some variant of Huffman compression. The Macintosh has
+ wacky file formats and this format handler apparently doesn’t
+ handle all the ones it should. Mac users will need your
+ usual arsenal of file converters to deal with an HCOM file
+ under Unix or DOS.
.maud An Amiga format
- An IFF-conform sound file type, registered by MS
- MacroSystem Computer GmbH, published along with
- the "Toccata" sound-card on the Amiga. Allows
- 8bit linear, 16bit linear, A-Law, u-law in mono
- and stereo.
+ An IFF-conform sound file type, registered by MS MacroSystem
+ Computer GmbH, published along with the "Toccata" sound-card
+ on the Amiga. Allows 8bit linear, 16bit linear, A-Law, u-law
+ in mono and stereo.
.mp3 MP3 Compressed Audio
- MP3 audio files come from the MPEG standards for
- audio and video compression. They are a lossy
- compression format that achieves good compres�
- sion rates with a minimum amount of quality
- loss. Also see Ogg Vorbis for a similar format.
- MP3 support in SoX is optional and requires
- access to either or both the external libmad and
- libmp3lame libraries. To see if there is sup�
- port for Mp3 run sox -h and look for it under
- the list of supported file formats as "mp3".
+ MP3 audio files come from the MPEG standards for audio and
+ video compression. They are a lossy compression format that
+ achieves good compression rates with a minimum amount of
+ quality loss. Also see Ogg Vorbis for a similar format. MP3
+ support in SoX is optional and requires access to either or
+ both the external libmad and libmp3lame libraries. To see if
+ there is support for Mp3 run sox -h and look for it under the
+ list of supported file formats as "mp3".
- .nul Null file handler. This is a fake file hander
- that act as if its reading a stream of 0's from
- a while or fake writing output to a file. This
- is not a very useful file handler in most cases.
- It might be useful in some scripts were you do
- not want to read or write from a real file but
- would like to specify a filename for consis�
- tency.
+ .nul Null file handler. This is a fake file hander that act as if
+ its reading a stream of 0’s from a while or fake writing out-
+ put to a file. This is not a very useful file handler in
+ most cases. It might be useful in some scripts were you do
+ not want to read or write from a real file but would like to
+ specify a filename for consistency.
.ogg Ogg Vorbis Compressed Audio.
- Ogg Vorbis is a open, patent-free CODEC designed
- for compressing music and streaming audio. It
- is similar to MP3, VQF, AAC, and other lossy
- formats. SoX can decode all types of Ogg Vorbis
- files, but can only encode at 128 kbps. Decod�
- ing is somewhat CPU intensive and encoding is
- very CPU intensive.
- Ogg Vorbis in SoX is optional and requires
- access to external Ogg Vorbis libraries. To see
- if there is support for Ogg Vorbis run sox -h
- and look for it under the list of supported file
- formats as "vorbis".
+ Ogg Vorbis is a open, patent-free CODEC designed for com-
+ pressing music and streaming audio. It is similar to MP3,
+ VQF, AAC, and other lossy formats. SoX can decode all types
+ of Ogg Vorbis files, but can only encode at 128 kbps. Decod-
+ ing is somewhat CPU intensive and encoding is very CPU inten-
+ sive.
+ Ogg Vorbis in SoX is optional and requires access to external
+ Ogg Vorbis libraries. To see if there is support for Ogg
+ Vorbis run sox -h and look for it under the list of supported
+ file formats as "vorbis".
ossdsp OSS /dev/dsp device driver
- This is a pseudo-file type and can be optionally
- compiled into SoX. Run sox -h to see if you
- have support for this file type. When this
- driver is used it allows you to open up the OSS
- /dev/dsp file and configure it to use the same
- data format as passed in to SoX. It works for
- both playing and recording sound samples. When
- playing sound files it attempts to set up the
- OSS driver to use the same format as the input
- file. It is suggested to always override the
- output values to use the highest quality samples
- your sound card can handle. Example: -t ossdsp
- -w -s /dev/dsp
+ This is a pseudo-file type and can be optionally compiled
+ into SoX. Run sox -h to see if you have support for this
+ file type. When this driver is used it allows you to open up
+ the OSS /dev/dsp file and configure it to use the same data
+ format as passed in to SoX. It works for both playing and
+ recording sound samples. When playing sound files it
+ attempts to set up the OSS driver to use the same format as
+ the input file. It is suggested to always override the out-
+ put values to use the highest quality samples your sound card
+ can handle. Example: -t ossdsp -w -s /dev/dsp
+ .prc Psion record.app
+ Used in some Psion devices for System alarms. This format is
+ newer then the .wve format that is used in some Psion
+ devices.
+
.sf IRCAM Sound Files.
- Sound Files are used by academic music software
- such as the CSound package, and the MixView
- sound sample editor.
+ Sound Files are used by academic music software such as the
+ CSound package, and the MixView sound sample editor.
.sph
- SPHERE (SPeech HEader Resources) is a file for�
- mat defined by NIST (National Institute of Stan�
- dards and Technology) and is used with speech
- audio. SoX can read these files when they con�
- tain u-law and PCM data. It will ignore any
- header information that says the data is com�
- pressed using shorten compression and will treat
- the data as either u-law or PCM. This will
- allow SoX and the command line shorten program
- to be ran together using pipes to uncompress the
- data and then pass the result to SoX for pro�
- cessing.
+ SPHERE (SPeech HEader Resources) is a file format defined by
+ NIST (National Institute of Standards and Technology) and is
+ used with speech audio. SoX can read these files when they
+ contain u-law and PCM data. It will ignore any header infor-
+ mation that says the data is compressed using shorten com-
+ pression and will treat the data as either u-law or PCM.
+ This will allow SoX and the command line shorten program to
+ be ran together using pipes to uncompress the data and then
+ pass the result to SoX for processing.
.smp Turtle Beach SampleVision files.
- SMP files are for use with the PC-DOS package
- SampleVision by Turtle Beach Softworks. This
- package is for communication to several MIDI
- samplers. All sample rates are supported by the
- package, although not all are supported by the
- samplers themselves. Currently loop points are
- ignored.
+ SMP files are for use with the PC-DOS package SampleVision by
+ Turtle Beach Softworks. This package is for communication to
+ several MIDI samplers. All sample rates are supported by the
+ package, although not all are supported by the samplers them-
+ selves. Currently loop points are ignored.
.snd
- Under DOS this file format is the same as the
- .sndt format. Under all other platforms it is
- the same as the .au format.
+ Under DOS this file format is the same as the .sndt format.
+ Under all other platforms it is the same as the .au format.
.sndt SoundTool files.
This is an older DOS file format.
sunau Sun /dev/audio device driver
- This is a pseudo-file type and can be optionally
- compiled into SoX. Run sox -h to see if you
- have support for this file type. When this
- driver is used it allows you to open up a Sun
- /dev/audio file and configure it to use the same
- data type as passed in to SoX. It works for
- both playing and recording sound samples. When
- playing sound files it attempts to set up the
- audio driver to use the same format as the input
- file. It is suggested to always override the
- output values to use the highest quality samples
- your hardware can handle. Example: -t sunau -w
- -s /dev/audio or -t sunau -U -c 1 /dev/audio for
- older sun equipment.
+ This is a pseudo-file type and can be optionally compiled
+ into SoX. Run sox -h to see if you have support for this
+ file type. When this driver is used it allows you to open up
+ a Sun /dev/audio file and configure it to use the same data
+ type as passed in to SoX. It works for both playing and
+ recording sound samples. When playing sound files it
+ attempts to set up the audio driver to use the same format as
+ the input file. It is suggested to always override the out-
+ put values to use the highest quality samples your hardware
+ can handle. Example: -t sunau -w -s /dev/audio or -t sunau
+ -U -c 1 /dev/audio for older sun equipment.
.txw Yamaha TX-16W sampler.
- A file format from a Yamaha sampling keyboard
- which wrote IBM-PC format 3.5" floppies. Han�
- dles reading of files which do not have the sam�
- ple rate field set to one of the expected by
- looking at some other bytes in the attack/loop
- length fields, and defaulting to 33kHz if the
- sample rate is still unknown.
+ A file format from a Yamaha sampling keyboard which wrote
+ IBM-PC format 3.5" floppies. Handles reading of files which
+ do not have the sample rate field set to one of the expected
+ by looking at some other bytes in the attack/loop length
+ fields, and defaulting to 33kHz if the sample rate is still
+ unknown.
.vms More info to come.
- Used to compress speech audio for applications
- such as voice mail.
+ Used to compress speech audio for applications such as voice
+ mail.
.voc Sound Blaster VOC files.
- VOC files are multi-part and contain silence
- parts, looping, and different sample rates for
- different chunks. On input, the silence parts
- are filled out, loops are rejected, and sample
- data with a new sample rate is rejected.
- Silence with a different sample rate is gener�
- ated appropriately. On output, silence is not
- detected, nor are impossible sample rates.
- Note, this version now supports playing VOC
- files with multiple blocks and supports playing
- files containing u-law and A-law samples.
+ VOC files are multi-part and contain silence parts, looping,
+ and different sample rates for different chunks. On input,
+ the silence parts are filled out, loops are rejected, and
+ sample data with a new sample rate is rejected. Silence with
+ a different sample rate is generated appropriately. On out-
+ put, silence is not detected, nor are impossible sample
+ rates. Note, this version now supports playing VOC files
+ with multiple blocks and supports playing files containing u-
+ law and A-law samples.
vorbis See .ogg format.
+ vox A headerless file of Dialogic/OKI ADPCM audio data commonly
+ comes with the extension .vox. This ADPCM data has 12-bit
+ precision packed into only 4-bits.
+
.wav Microsoft .WAV RIFF files.
- These appear to be very similar to IFF files,
- but not the same. They are the native sound
- file format of Windows. (Obviously, Windows was
- of such incredible importance to the computer
- industry that it just had to have its own sound
- file format.) Normally .wav files have all for�
- matting information in their headers, and so do
- not need any format options specified for an
- input file. If any are, they will override the
- file header, and you will be warned to this
- effect. You had better know what you are doing!
- Output format options will cause a format con�
- version, and the .wav will written appropri�
- ately. SoX currently can read PCM, ULAW, ALAW,
- MS ADPCM, and IMA (or DVI) ADPCM. It can write
- all of these formats including (NEW!) the ADPCM
- encoding.
+ These appear to be very similar to IFF files, but not the
+ same. They are the native sound file format of Windows.
+ (Obviously, Windows was of such incredible importance to the
+ computer industry that it just had to have its own sound file
+ format.) Normally .wav files have all formatting information
+ in their headers, and so do not need any format options spec-
+ ified for an input file. If any are, they will override the
+ file header, and you will be warned to this effect. You had
+ better know what you are doing! Output format options will
+ cause a format conversion, and the .wav will written appro-
+ priately. SoX currently can read PCM, ULAW, ALAW, MS ADPCM,
+ and IMA (or DVI) ADPCM. It can write all of these formats
+ including (NEW!) the ADPCM encoding.
.wve Psion 8-bit A-law
- These are 8-bit A-law 8khz sound files used on
- the Psion palmtop portable computer.
+ These are 8-bit A-law 8khz sound files used on the Psion
+ palmtop portable computer.
.raw Raw files (no header).
- The sample rate, size (byte, word, etc), and
- encoding (signed, unsigned, etc.) of the sample
- file must be given. The number of channels
- defaults to 1.
+ The sample rate, size (byte, word, etc), and encoding
+ (signed, unsigned, etc.) of the sample file must be given.
+ The number of channels defaults to 1.
.ub, .sb, .uw, .sw, .ul, .al, .lu, .la, .sl
- These are several suffices which serve as a
- shorthand for raw files with a given size and
- encoding. Thus, ub, sb, uw, sw, ul, al, lu, la
- and sl correspond to "unsigned byte", "signed
- byte", "unsigned word", "signed word", "u-law"
- (byte), "A-law" (byte), inverse bit order "u-
- law", inverse bit order "A-law", and "signed
- long". The sample rate defaults to 8000 hz if
- not explicitly set, and the number of channels
- defaults to 1. There are lots of Sparc samples
- floating around in u-law format with no header
- and fixed at a sample rate of 8000 hz. (Certain
- sound management software cheerfully ignores the
- headers.) Similarly, most Mac sound files are
- in unsigned byte format with a sample rate of
- 11025 or 22050 hz.
+ These are several suffices which serve as a shorthand for raw
+ files with a given size and encoding. Thus, ub, sb, uw, sw,
+ ul, al, lu, la and sl correspond to "unsigned byte", "signed
+ byte", "unsigned word", "signed word", "u-law" (byte), "A-
+ law" (byte), inverse bit order "u-law", inverse bit order "A-
+ law", and "signed long". The sample rate defaults to 8000 hz
+ if not explicitly set, and the number of channels defaults to
+ 1. There are lots of Sparc samples floating around in u-law
+ format with no header and fixed at a sample rate of 8000 hz.
+ (Certain sound management software cheerfully ignores the
+ headers.) Similarly, most Mac sound files are in unsigned
+ byte format with a sample rate of 11025 or 22050 hz.
- .auto This is a ``meta-type'': specifying this type
- for an input file triggers some code that tries
- to guess the real type by looking for magic
- words in the header. If the type can't be
- guessed, the program exits with an error mes�
- sage. The input must be a plain file, not a
- pipe. This type can't be used for output files.
+ .auto This is a ‘‘meta-type’’: specifying this type for an input
+ file triggers some code that tries to guess the real type by
+ looking for magic words in the header. If the type can’t be
+ guessed, the program exits with an error message. The input
+ must be a plain file, not a pipe. This type can’t be used
+ for output files.
EFFECTS
- Multiple effects may be applied to the audio data by spec�
- ifying them one after another at the end of the command
- line.
+ Multiple effects may be applied to the audio data by specifying them
+ one after another at the end of the command line.
- avg [ -l | -r | -f | -b | n,n,...,n ]
- Reduce the number of channels by averaging the
- samples, or duplicate channels to increase the
- number of channels. This effect is automati�
- cally used when the number of input channels
- differ from the number of output channels. When
- reducing the number of channels it is possible
- to manually specify the avg effect and use the
- -l, -r, -f, or -b options to select only the
- left, right, front, or back channel(s) for the
- output instead of averaging the channels. The
- -f and -b options maintain left/right stereo
- separation; use the avg effect twice to select a
- single channel.
+ avg [ -l | -r | -f | -b | -1 | -2 | -3 | -4 | n,n,...,n ]
+ Reduce the number of channels by averaging the samples, or
+ duplicate channels to increase the number of channels. This
+ effect is automatically used when the number of input chan-
+ nels differ from the number of output channels. When reduc-
+ ing the number of channels it is possible to manually specify
+ the avg effect and use the -l, -r, -f, -b, -1, -2, -3, -4,
+ options to select only the left, right, front, back chan-
+ nel(s) or specific channel for the output instead of averag-
+ ing the channels. The -l, -r, -f, and -b options will do
+ averaging in quad-channel files so select the exact channel
+ to prevent this.
- The avg effect can also be invoked with up to 16
- double-precision numbers, which specify the pro�
- portion of each input channel that is to be
- mixed into each output channel. In two-channel
- mode, 4 numbers are given: l->l, l->r, r->l, and
- r->r, respectively. In four-channel mode, the
- first 4 numbers give the proportions for the
- left-front output channel, as follows: lf->lf,
- rf->lf, lb->lf, and rb->rf. The next 4 give the
- right-front output in the same order, then left-
- back and right-back.
+ The avg effect can also be invoked with up to 16 double-pre-
+ cision numbers, which specify the proportion of each input
+ channel that is to be mixed into each output channel. In
+ two-channel mode, 4 numbers are given: l->l, l->r, r->l, and
+ r->r, respectively. In four-channel mode, the first 4 num-
+ bers give the proportions for the left-front output channel,
+ as follows: lf->lf, rf->lf, lb->lf, and rb->rf. The next 4
+ give the right-front output in the same order, then left-back
+ and right-back.
- It is also possible to use the 16 numbers to
- expand or reduce the channel count; just specify
- 0 for unused channels. Finally, if fewer than 4
- numbers are given, certain special abbreviations
- may be invoked; see the source code for details.
+ It is also possible to use the 16 numbers to expand or reduce
+ the channel count; just specify 0 for unused channels.
+ Finally, if fewer than 4 numbers are given, certain special
+ abbreviations may be invoked; see the source code for
+ details.
band [ -n ] center [ width ]
- Apply a band-pass filter. The frequency
- response drops logarithmically around the center
- frequency. The width gives the slope of the
- drop. The frequencies at center + width and
- center - width will be half of their original
- amplitudes. Band defaults to a mode oriented to
- pitched signals, i.e. voice, singing, or instru�
- mental music. The -n (for noise) option uses
- the alternate mode for un-pitched signals.
- Warning: -n introduces a power-gain of about
- 11dB in the filter, so beware of output clip�
- ping. Band introduces noise in the shape of the
- filter, i.e. peaking at the center frequency and
- settling around it. See filter for a bandpass
- effect with steeper shoulders.
+ Apply a band-pass filter. The frequency response drops loga-
+ rithmically around the center frequency. The width gives the
+ slope of the drop. The frequencies at center + width and
+ center - width will be half of their original amplitudes.
+ Band defaults to a mode oriented to pitched signals, i.e.
+ voice, singing, or instrumental music. The -n (for noise)
+ option uses the alternate mode for un-pitched signals. Warn-
+ ing: -n introduces a power-gain of about 11dB in the filter,
+ so beware of output clipping. Band introduces noise in the
+ shape of the filter, i.e. peaking at the center frequency and
+ settling around it. See filter for a bandpass effect with
+ steeper shoulders.
bandpass frequency bandwidth
- Butterworth bandpass filter. Description coming
- soon!
+ Butterworth bandpass filter. Description coming soon!
bandreject frequency bandwidth
- Butterworth bandreject filter. Description com�
- ing soon!
+ Butterworth bandreject filter. Description coming soon!
chorus gain-in gain-out delay decay speed depth
-s | -t [ delay decay speed depth -s | -t ... ]
- Add a chorus to a sound sample. Each quadtuple
- delay/decay/speed/depth gives the delay in mil�
- liseconds and the decay (relative to gain-in)
- with a modulation speed in Hz using depth in
- milliseconds. The modulation is either sinu�
- soidal (-s) or triangular (-t). Gain-out is the
- volume of the output.
+ Add a chorus to a sound sample. Each quadtuple
+ delay/decay/speed/depth gives the delay in milliseconds and
+ the decay (relative to gain-in) with a modulation speed in Hz
+ using depth in milliseconds. The modulation is either sinu-
+ soidal (-s) or triangular (-t). Gain-out is the volume of
+ the output.
compand attack1,decay1[,attack2,decay2...]
@@ -632,233 +554,193 @@
in-dB1,out-dB1[,in-dB2,out-dB2...]
[gain [initial-volume [delay ] ] ]
- Compand (compress or expand) the dynamic range
- of a sample. The attack and decay time specify
- the integration time over which the absolute
- value of the input signal is integrated to
- determine its volume; attacks refer to increases
- in volume and decays refer to decreases. Where
- more than one pair of attack/decay parameters
- are specified, each channel is treated sepa�
- rately and the number of pairs must agree with
- the number of input channels. The second param�
- eter is a list of points on the compander's
- transfer function specified in dB relative to
- the maximum possible signal amplitude. The
- input values must be in a strictly increasing
- order but the transfer function does not have to
- be monotonically rising. The special value -inf
- may be used to indicate that the input volume
- should be associated output volume. The points
- -inf,-inf and 0,0 are assumed; the latter may be
- overridden, but the former may not.
+ Compand (compress or expand) the dynamic range of a sample.
+ The attack and decay time specify the integration time over
+ which the absolute value of the input signal is integrated to
+ determine its volume; attacks refer to increases in volume
+ and decays refer to decreases. Where more than one pair of
+ attack/decay parameters are specified, each channel is
+ treated separately and the number of pairs must agree with
+ the number of input channels. The second parameter is a list
+ of points on the compander’s transfer function specified in
+ dB relative to the maximum possible signal amplitude. The
+ input values must be in a strictly increasing order but the
+ transfer function does not have to be monotonically rising.
+ The special value -inf may be used to indicate that the input
+ volume should be associated output volume. The points
+ -inf,-inf and 0,0 are assumed; the latter may be overridden,
+ but the former may not.
- The third (optional) parameter is a post-pro�
- cessing gain in dB which is applied after the
- compression has taken place; the fourth
- (optional) parameter is an initial volume to be
- assumed for each channel when the effect starts.
- This permits the user to supply a nominal level
- initially, so that, for example, a very large
- gain is not applied to initial signal levels
- before the companding action has begun to oper�
- ate: it is quite probable that in such an event,
- the output would be severely clipped while the
- compander gain properly adjusts itself.
+ The third (optional) parameter is a post-processing gain in
+ dB which is applied after the compression has taken place;
+ the fourth (optional) parameter is an initial volume to be
+ assumed for each channel when the effect starts. This per-
+ mits the user to supply a nominal level initially, so that,
+ for example, a very large gain is not applied to initial sig-
+ nal levels before the companding action has begun to operate:
+ it is quite probable that in such an event, the output would
+ be severely clipped while the compander gain properly adjusts
+ itself.
- The fifth (optional) parameter is a delay in
- seconds. The input signal is analyzed immedi�
- ately to control the compander, but it is
- delayed before being fed to the volume adjuster.
- Specifying a delay approximately equal to the
- attack/decay times allows the compander to
- effectively operate in a "predictive" rather
- than a reactive mode.
+ The fifth (optional) parameter is a delay in seconds. The
+ input signal is analyzed immediately to control the compan-
+ der, but it is delayed before being fed to the volume
+ adjuster. Specifying a delay approximately equal to the
+ attack/decay times allows the compander to effectively oper-
+ ate in a "predictive" rather than a reactive mode.
- copy Copy the input file to the output file. This is
- the default effect if both files have the same
- sampling rate.
+ copy Copy the input file to the output file. This is the default
+ effect if both files have the same sampling rate.
dcshift shift [ limitergain ]
- DC Shift the audio data, with basic linear
- amplitude formula. This is most useful if your
- audio data tends to not be centered around a
- value of 0. Shifting it back will allow you to
- get the most volume adjustments without clipping
- audio data.
- The first option is the dcshift value. It is a
- floating point number that indicates the amount
- to shift.
- An option limtergain value can be specified as
- well. It should have a value much less then 1.0
- and is used only on peaks to prevent clipping.
+ DC Shift the audio data, with basic linear amplitude formula.
+ This is most useful if your audio data tends to not be cen-
+ tered around a value of 0. Shifting it back will allow you
+ to get the most volume adjustments without clipping audio
+ data.
+ The first option is the dcshift value. It is a floating
+ point number that indicates the amount to shift.
+ An option limtergain value can be specified as well. It
+ should have a value much less then 1.0 and is used only on
+ peaks to prevent clipping.
- deemph Apply a treble attenuation shelving filter to
- samples in audio cd format. The frequency
- response of pre-emphasized recordings is recti�
- fied. The filtering is defined in the standard
- document ISO 908.
+ deemph Apply a treble attenuation shelving filter to samples in
+ audio cd format. The frequency response of pre-emphasized
+ recordings is rectified. The filtering is defined in the
+ standard document ISO 908.
- earwax Makes sound easier to listen to on headphones.
- Adds audio-cues to samples in audio cd format so
- that when listened to on headphones the stereo
- image is moved from inside your head (standard
- for headphones) to outside and in front of the
- listener (standard for speakers). See
- www.geocities.com/beinges for a full explana�
- tion.
+ earwax Makes sound easier to listen to on headphones. Adds audio-
+ cues to samples in audio cd format so that when listened to
+ on headphones the stereo image is moved from inside your head
+ (standard for headphones) to outside and in front of the lis-
+ tener (standard for speakers). See
+ www.geocities.com/beinges for a full explanation.
echo gain-in gain-out delay decay [ delay decay ... ]
- Add echoing to a sound sample. Each delay/decay
- part gives the delay in milliseconds and the
- decay (relative to gain-in) of that echo. Gain-
- out is the volume of the output.
+ Add echoing to a sound sample. Each delay/decay part gives
+ the delay in milliseconds and the decay (relative to gain-in)
+ of that echo. Gain-out is the volume of the output.
echos gain-in gain-out delay decay [ delay decay ... ]
- Add a sequence of echos to a sound sample. Each
- delay/decay part gives the delay in milliseconds
- and the decay (relative to gain-in) of that
- echo. Gain-out is the volume of the output.
+ Add a sequence of echos to a sound sample. Each delay/decay
+ part gives the delay in milliseconds and the decay (relative
+ to gain-in) of that echo. Gain-out is the volume of the out-
+ put.
fade [ type ] fade-in-length
[ stop-time [ fade-out-length ] ]
- Add a fade effect to the beginning, end, or both
- of the audio data.
+ Add a fade effect to the beginning, end, or both of the audio
+ data.
- For fade-ins, this starts from the first sample
- and ramps the volume of the audio from 0 to full
- volume over fade-in-length seconds. Specify 0
- seconds if no fade-in is wanted.
+ For fade-ins, this starts from the first sample and ramps the
+ volume of the audio from 0 to full volume over fade-in-length
+ seconds. Specify 0 seconds if no fade-in is wanted.
- For fade-outs, the audio data will be truncated
- at the stop-time and the volume will be ramped
- from full volume down to 0 starting at fade-out-
- length seconds before the stop-time. No fade-
- out is performed if these options are not speci�
- fied.
- All times can be specified in either periods of
- time or sample counts. To specify time periods
- use the format hh:mm:ss.frac format. To specify
- using sample counts, specify the number of sam�
- ples and append the letter 's' to the sample
- count (for example 8000s).
- An optional type can be specified to change the
- type of envelope. Choices are q for quarter of
- a sinewave, h for half a sinewave, t for linear
- slope, l for logarithmic, and p for inverted
- parabola. The default is a linear slope.
+ For fade-outs, the audio data will be truncated at the stop-
+ time and the volume will be ramped from full volume down to 0
+ starting at fade-out-length seconds before the stop-time. No
+ fade-out is performed if these options are not specified.
+ All times can be specified in either periods of time or sam-
+ ple counts. To specify time periods use the format
+ hh:mm:ss.frac format. To specify using sample counts, spec-
+ ify the number of samples and append the letter ’s’ to the
+ sample count (for example 8000s).
+ An optional type can be specified to change the type of enve-
+ lope. Choices are q for quarter of a sinewave, h for half a
+ sinewave, t for linear slope, l for logarithmic, and p for
+ inverted parabola. The default is a linear slope.
filter [ low ]-[ high ] [ window-len [ beta ] ]
- Apply a Sinc-windowed lowpass, highpass, or
- bandpass filter of given window length to the
- signal. low refers to the frequency of the
- lower 6dB corner of the filter. high refers to
- the frequency of the upper 6dB corner of the
- filter.
+ Apply a Sinc-windowed lowpass, highpass, or bandpass filter
+ of given window length to the signal. low refers to the fre-
+ quency of the lower 6dB corner of the filter. high refers to
+ the frequency of the upper 6dB corner of the filter.
- A lowpass filter is obtained by leaving low
- unspecified, or 0. A highpass filter is
- obtained by leaving high unspecified, or 0, or
- greater than or equal to the Nyquist frequency.
+ A lowpass filter is obtained by leaving low unspecified, or
+ 0. A highpass filter is obtained by leaving high unspeci-
+ fied, or 0, or greater than or equal to the Nyquist fre-
+ quency.
- The window-len, if unspecified, defaults to 128.
- Longer windows give a sharper cutoff, smaller
- windows a more gradual cutoff.
+ The window-len, if unspecified, defaults to 128. Longer win-
+ dows give a sharper cutoff, smaller windows a more gradual
+ cutoff.
- The beta, if unspecified, defaults to 16. This
- selects a Kaiser window. You can select a Nut�
- tall window by specifying anything <= 2.0 here.
- For more discussion of beta, look under the
- resample effect.
+ The beta, if unspecified, defaults to 16. This selects a
+ Kaiser window. You can select a Nuttall window by specifying
+ anything <= 2.0 here. For more discussion of beta, look
+ under the resample effect.
flanger gain-in gain-out delay decay speed < -s | -t >
- Add a flanger to a sound sample. Each triple
- delay/decay/speed gives the delay in millisec�
- onds and the decay (relative to gain-in) with a
- modulation speed in Hz. The modulation is
- either sinodial (-s) or triangular (-t). Gain-
- out is the volume of the output.
+ Add a flanger to a sound sample. Each triple
+ delay/decay/speed gives the delay in milliseconds and the
+ decay (relative to gain-in) with a modulation speed in Hz.
+ The modulation is either sinodial (-s) or triangular (-t).
+ Gain-out is the volume of the output.
highp frequency
- Apply a single pole recursive high-pass filter.
- The frequency response drops logarithmically
- with I frequency in the middle of the drop. The
- slope of the filter is quite gentle. See filter
- for a highpass effect with sharper cutoff.
+ Apply a single pole recursive high-pass filter. The fre-
+ quency response drops logarithmically with I frequency in the
+ middle of the drop. The slope of the filter is quite gentle.
+ See filter for a highpass effect with sharper cutoff.
highpass frequency
- Butterworth highpass filter. Description coming
- soon!
+ Butterworth highpass filter. Description coming soon!
lowp frequency
- Apply a single pole recursive low-pass filter.
- The frequency response drops logarithmically
- with frequency in the middle of the drop. The
- slope of the filter is quite gentle. See filter
- for a lowpass effect with sharper cutoff.
+ Apply a single pole recursive low-pass filter. The frequency
+ response drops logarithmically with frequency in the middle
+ of the drop. The slope of the filter is quite gentle. See
+ filter for a lowpass effect with sharper cutoff.
lowpass frequency
- Butterworth lowpass filter. Description coming
- soon!
+ Butterworth lowpass filter. Description coming soon!
- map Display a list of loops in a sample, and miscel�
- laneous loop info.
+ map Display a list of loops in a sample, and miscellaneous loop
+ info.
- mask Add "masking noise" to signal. This effect
- deliberately adds white noise to a sound in
- order to mask quantization effects, created by
- the process of playing a sound digitally. It
- tends to mask buzzing voices, for example. It
- adds 1/2 bit of noise to the sound file at the
- output bit depth.
+ mask Add "masking noise" to signal. This effect deliberately adds
+ white noise to a sound in order to mask quantization effects,
+ created by the process of playing a sound digitally. It
+ tends to mask buzzing voices, for example. It adds 1/2 bit
+ of noise to the sound file at the output bit depth.
pan direction
- Pan the sound of an audio file from one channel
- to another. This is done by changing the volume
- of the input channels so that it fades out on
- one channel and fades-in on another. If the
- number of input channels is different then the
- number of output channels then this effect tries
- to intelligently handle this. For instance, if
- the input contains 1 channel and the output con�
- tains 2 channels, then it will create the miss�
- ing channel itself. The direction is a value
- from -1.0 to 1.0. -1.0 represents far left and
- 1.0 represents far right. Numbers in between
- will start the pan effect without totally muting
- the opposite channel.
+ Pan the sound of an audio file from one channel to another.
+ This is done by changing the volume of the input channels so
+ that it fades out on one channel and fades-in on another. If
+ the number of input channels is different then the number of
+ output channels then this effect tries to intelligently han-
+ dle this. For instance, if the input contains 1 channel and
+ the output contains 2 channels, then it will create the miss-
+ ing channel itself. The direction is a value from -1.0 to
+ 1.0. -1.0 represents far left and 1.0 represents far right.
+ Numbers in between will start the pan effect without totally
+ muting the opposite channel.
phaser gain-in gain-out delay decay speed < -s | -t >
- Add a phaser to a sound sample. Each triple
- delay/decay/speed gives the delay in millisec�
- onds and the decay (relative to gain-in) with a
- modulation speed in Hz. The modulation is
- either sinodial (-s) or triangular (-t). The
- decay should be less than 0.5 to avoid feedback.
- Gain-out is the volume of the output.
+ Add a phaser to a sound sample. Each triple
+ delay/decay/speed gives the delay in milliseconds and the
+ decay (relative to gain-in) with a modulation speed in Hz.
+ The modulation is either sinodial (-s) or triangular (-t).
+ The decay should be less than 0.5 to avoid feedback. Gain-
+ out is the volume of the output.
- pick [ -1 | -2 | -3 | -4 | -l | -r ]
- Select the left or right channel of a stereo
- sample, or one of four channels in a quadra�
- phonic sample. The -l and -r options represent
- either the left or right channel. It is
- required that you use the -c 1 command line
- option in order to force the output file to con�
- tain only 1 channel.
+ pick [ -1 | -2 | -3 | -4 | -l | -r | -f | -b ]
+ Pick a subset of channels to be copied into the output file.
+ This effect is just an alias of the "avg" effect but is left
+ here for historical reasons.
pitch shift [ width interpole fade ]
- Change the pitch of file without affecting its
- duration by cross-fading shifted samples. shift
- is given in cents. Use a positive value to shift
- to treble, negative value to shift to bass.
- Default shift is 0. width of window is in ms.
- Default width is 20ms. Try 30ms to lower pitch,
- and 10ms to raise pitch. interpole option, can
- be "cubic" or "linear". Default is "cubic". The
- fade option, can be "cos", "hamming", "linear"
- or "trapezoid". Default is "cos".
+ Change the pitch of file without affecting its duration by
+ cross-fading shifted samples. shift is given in cents. Use a
+ positive value to shift to treble, negative value to shift to
+ bass. Default shift is 0. width of window is in ms. Default
+ width is 20ms. Try 30ms to lower pitch, and 10ms to raise
+ pitch. interpole option, can be "cubic" or "linear". Default
+ is "cubic". The fade option, can be "cos", "hamming", "lin-
+ ear" or "trapezoid". Default is "cos".
polyphase [ -w < nut / ham > ]
@@ -865,75 +747,69 @@
[ -width < long / short / # > ]
[ -cutoff # ]
- Translate input sampling rate to output sampling
- rate via polyphase interpolation, a DSP algo�
- rithm. This method is slow and uses lots of
- RAM, but gives much better results than rate.
+ Translate input sampling rate to output sampling rate via
+ polyphase interpolation, a DSP algorithm. This method is
+ slow and uses lots of RAM, but gives much better results than
+ rate.
- -w < nut / ham > : select either a Nuttal (~90
- dB stopband) or Hamming (~43 dB stopband) win�
- dow. Default is nut.
+ -w < nut / ham > : select either a Nuttal (~90 dB stopband)
+ or Hamming (~43 dB stopband) window. Default is nut.
- -width long / short / # : specify the (approxi�
- mate) width of the filter. long is 1024 sam�
- ples; short is 128 samples. Alternatively, an
- exact number can be used. Default is long. The
- short option is not recommended, as it produces
- poor quality results.
+ -width long / short / # : specify the (approximate) width of
+ the filter. long is 1024 samples; short is 128 samples.
+ Alternatively, an exact number can be used. Default is long.
+ The short option is not recommended, as it produces poor
+ quality results.
- -cutoff # : specify the filter cutoff frequency
- in terms of fraction of frequency bandwidth,
- also know as the Nyquist frequency. Please see
- the resample effect for further information on
- Nyquist frequency. If upsampling, then this is
- the fraction of the original signal that should
- go through. If downsampling, this is the frac�
- tion of the signal left after downsampling.
- Default is 0.95. Remember that this is a float.
+ -cutoff # : specify the filter cutoff frequency in terms of
+ fraction of frequency bandwidth, also know as the Nyquist
+ frequency. Please see the resample effect for further infor-
+ mation on Nyquist frequency. If upsampling, then this is the
+ fraction of the original signal that should go through. If
+ downsampling, this is the fraction of the signal left after
+ downsampling. Default is 0.95. Remember that this is a
+ float.
- rate Translate input sampling rate to output sampling
- rate via linear interpolation to the Least Com�
- mon Multiple of the two sampling rates. This is
- the default effect if the two files have differ�
- ent sampling rates and the preview options was
- specified. This is fast but noisy: the spectrum
- of the original sound will be shifted upwards
- and duplicated faintly when up-translating by a
- multiple.
+ rate Translate input sampling rate to output sampling rate via
+ linear interpolation to the Least Common Multiple of the two
+ sampling rates. This is the default effect if the two files
+ have different sampling rates and the preview options was
+ specified. This is fast but noisy: the spectrum of the orig-
+ inal sound will be shifted upwards and duplicated faintly
+ when up-translating by a multiple.
- Lerp-ing is acceptable for cheap 8-bit sound
- hardware, but for CD-quality sound you should
- instead use either resample or polyphase. If
- you are wondering which rate changing effects to
- use, you will want to read a detailed analysis
- of all of them at http://eakaw2.et.tu-dres�
- den.de/~wilde/resample/resample.html
+ Lerp-ing is acceptable for cheap 8-bit sound hardware, but
+ for CD-quality sound you should instead use either resample
+ or polyphase. If you are wondering which rate changing
+ effects to use, you will want to read a detailed analysis of
+ all of them at http://eakaw2.et.tu-dresden.de/~wilde/resam-
+ ple/resample.html
+ repeat count
+ Repeats the audio data count times. Requires disk space to
+ store the data to be repeated.
+
resample [ -qs | -q | -ql ] [ rolloff [ beta ] ]
- Translate input sampling rate to output sampling
- rate via simulated analog filtration. This
- method is slower than rate, but gives much bet�
- ter results.
+ Translate input sampling rate to output sampling rate via
+ simulated analog filtration. This method is slower than
+ rate, but gives much better results.
- By default, linear interpolation is used, with a
- window width about 45 samples at the lower of
- the two rate. This gives an accuracy of about
- 16 bits, but insufficient stopband rejection in
- the case that you want to have rolloff greater
- than about 0.80 of the Nyquist frequency.
+ By default, linear interpolation is used, with a window width
+ about 45 samples at the lower of the two rate. This gives an
+ accuracy of about 16 bits, but insufficient stopband rejec-
+ tion in the case that you want to have rolloff greater than
+ about 0.80 of the Nyquist frequency.
- The -q* options will change the default values
- for rolloff and beta as well as use quadratic
- interpolation of filter coefficients, resulting
- in about 24 bits precision. The -qs, -q, or -ql
- options specify increased accuracy at the cost
- of lower execution speed. It is optional to
- specify rolloff and beta parameters when using
- the -q* options.
+ The -q* options will change the default values for rolloff
+ and beta as well as use quadratic interpolation of filter
+ coefficients, resulting in about 24 bits precision. The -qs,
+ -q, or -ql options specify increased accuracy at the cost of
+ lower execution speed. It is optional to specify rolloff and
+ beta parameters when using the -q* options.
- Following is a table of the reasonable defaults
- which are built-in to SoX:
+ Following is a table of the reasonable defaults which are
+ built-in to SoX:
Option Window rolloff beta interpolation
------ ------ ------- ---- -------------
@@ -943,96 +819,83 @@
-ql 149 0.94 16 quadratic
------ ------ ------- ---- -------------
- -qs, -q, or -ql use window lengths of 45, 75, or
- 149 samples, respectively, at the lower sample-
- rate of the two files. This means progressively
- sharper stop-band rejection, at proportionally
- slower execution times.
+ -qs, -q, or -ql use window lengths of 45, 75, or 149 samples,
+ respectively, at the lower sample-rate of the two files.
+ This means progressively sharper stop-band rejection, at pro-
+ portionally slower execution times.
- rolloff refers to the cut-off frequency of the
- low pass filter and is given in terms of the
- Nyquist frequency for the lower sample rate.
- rolloff therefore should be something between
- 0.0 and 1.0, in practice 0.8-0.95. The defaults
- are indicated above.
+ rolloff refers to the cut-off frequency of the low pass fil-
+ ter and is given in terms of the Nyquist frequency for the
+ lower sample rate. rolloff therefore should be something
+ between 0.0 and 1.0, in practice 0.8-0.95. The defaults are
+ indicated above.
- The Nyquist frequency is equal to (sample rate /
- 2). Logically, this is because the A/D con�
- verter needs at least 2 samples to detect 1
- cycle at the Nyquist frequency. Frequencies
- higher then the Nyquist will actually appear as
- lower frequencies to the A/D converter and is
- called aliasing. Normally, A/D converts run the
- signal through a highpass filter first to avoid
- these problems.
+ The Nyquist frequency is equal to (sample rate / 2). Logi-
+ cally, this is because the A/D converter needs at least 2
+ samples to detect 1 cycle at the Nyquist frequency. Frequen-
+ cies higher then the Nyquist will actually appear as lower
+ frequencies to the A/D converter and is called aliasing.
+ Normally, A/D converts run the signal through a highpass fil-
+ ter first to avoid these problems.
- Similar problems will happen in software when
- reducing the sample rate of an audio file (fre�
- quencies above the new Nyquist frequency can be
- aliased to lower frequencies). Therefore, a
- good resample effect will remove all frequency
+ Similar problems will happen in software when reducing the
+ sample rate of an audio file (frequencies above the new
+ Nyquist frequency can be aliased to lower frequencies).
+ Therefore, a good resample effect will remove all frequency
information above the new Nyquist frequency.
- The rolloff refers to how close to the Nyquist
- frequency this cutoff is, with closer being bet�
- ter. When increasing the sample rate of an
- audio file you would not expect to have any fre�
- quencies exist that are past the original
- Nyquist frequency. Because of resampling prop�
- erties, it is common to have alaising data cre�
- ated that is above the old Nyquist frequency.
- In that case the rolloff refers to how close to
- the original Nyquist frequency to use a highpass
- filter to remove this false data, with closer
- also being better.
+ The rolloff refers to how close to the Nyquist frequency this
+ cutoff is, with closer being better. When increasing the
+ sample rate of an audio file you would not expect to have any
+ frequencies exist that are past the original Nyquist fre-
+ quency. Because of resampling properties, it is common to
+ have aliasing data created that is above the old Nyquist fre-
+ quency. In that case the rolloff refers to how close to the
+ original Nyquist frequency to use a highpass filter to remove
+ this false data, with closer also being better.
- The beta parameter determines the type of filter
- window used. Any value greater than 2.0 is the
- beta for a Kaiser window. Beta <= 2.0 selects a
- Nuttall window. If unspecified, the default is
- a Kaiser window with beta 16.
+ The beta parameter determines the type of filter window used.
+ Any value greater than 2.0 is the beta for a Kaiser window.
+ Beta <= 2.0 selects a Nuttall window. If unspecified, the
+ default is a Kaiser window with beta 16.
- In the case of Kaiser window (beta > 2.0), lower
- betas produce a somewhat faster transition from
- passband to stopband, at the cost of noticeable
- artifacts. A beta of 16 is the default, beta
- less than 10 is not recommended. If you want a
- sharper cutoff, don't use low beta's, use a
- longer sample window. A Nuttall window is
- selected by specifying any 'beta' <= 2, and the
- Nuttall window has somewhat steeper cutoff than
- the default Kaiser window. You will probably
- not need to use the beta parameter at all,
- unless you are just curious about comparing the
- effects of Nuttall vs. Kaiser windows.
+ In the case of Kaiser window (beta > 2.0), lower betas pro-
+ duce a somewhat faster transition from passband to stopband,
+ at the cost of noticeable artifacts. A beta of 16 is the
+ default, beta less than 10 is not recommended. If you want a
+ sharper cutoff, don’t use low beta’s, use a longer sample
+ window. A Nuttall window is selected by specifying any
+ ’beta’ <= 2, and the Nuttall window has somewhat steeper cut-
+ off than the default Kaiser window. You will probably not
+ need to use the beta parameter at all, unless you are just
+ curious about comparing the effects of Nuttall vs. Kaiser
+ windows.
- This is the default effect if the two files have
- different sampling rates. Default parameters
- are, as indicated above, Kaiser window of length
- 45, rolloff 0.80, beta 16, linear interpolation.
+ This is the default effect if the two files have different
+ sampling rates. Default parameters are, as indicated above,
+ Kaiser window of length 45, rolloff 0.80, beta 16, linear
+ interpolation.
- NOTE: -qs is only slightly slower, but more
- accurate for 16-bit or higher precision.
+ NOTE: -qs is only slightly slower, but more accurate for
+ 16-bit or higher precision.
- NOTE: In many cases of up-sampling, no interpo�
- lation is needed, as exact filter coefficients
- can be computed in a reasonable amount of space.
- To be precise, this is done when
+ NOTE: In many cases of up-sampling, no interpolation is
+ needed, as exact filter coefficients can be computed in a
+ reasonable amount of space. To be precise, this is done when
input_rate < output_rate
&&
output_rate/gcd(input_rate,output_rate) <= 511
- reverb gain-out delay [ delay ... ]
- Add reverberation to a sound sample. Each delay
- is given in milliseconds and its feedback is
- depending on the reverb-time in milliseconds.
- Each delay should be in the range of half to
- quarter of reverb-time to get a realistic rever�
- beration. Gain-out is the volume of the output.
+ reverb gain-out reverbe-time delay [ delay ... ]
+ Add reverberation to a sound sample. Each delay is given in
+ milliseconds and its feedback is depending on the reverb-time
+ in milliseconds. Each delay should be in the range of half
+ to quarter of reverb-time to get a realistic reverberation.
+ Gain-out is the volume of the output.
- reverse Reverse the sound sample completely. Included
- for finding Satanic subliminals.
+ reverse Reverse the sound sample completely. Included for finding
+ Satanic subliminals.
silence above_periods [ duration threshold[ d | % ]
@@ -1039,198 +902,166 @@
[ below_periods duration
threshold[ d | % ]]
- Removes silence from the beginning or end of a
- sound file. Silence is anything below a speci�
- fied threshold.
- When trimming silence from the beginning of a
- sound file, you specify a duration of audio that
- is above a given silence threshold before audio
- data is processed. You can also specify the
- count of periods of none silence you want to
- detect before processing audio data. Specify a
- period of 0 if you do not want to trim data from
- the front of the sound file.
- When optionally trimming silence form the end of
- a sound file, you specify the duration of audio
- that must be below a given threshold before
- stopping to process audio data. A count of
- periods that occur below the threshold may also
- be specified. If this options are not specified
- then data is not trimmed from the end of the
- audio file.
- Duration counts may be in the format of time,
- hh:mm:ss.frac, or in the exact count of samples.
- Threshold may be suffixed with d, or % to indi�
- cated the value is in decibels or a percentage
- of max value of the sample value. A value of
- '0%' will look for total silence.
+ Removes silence from the beginning or end of a sound file.
+ Silence is anything below a specified threshold.
+ When trimming silence from the beginning of a sound file, you
+ specify a duration of audio that is above a given silence
+ threshold before audio data is processed. You can also spec-
+ ify the count of periods of none silence you want to detect
+ before processing audio data. Specify a period of 0 if you
+ do not want to trim data from the front of the sound file.
+ When optionally trimming silence form the end of a sound
+ file, you specify the duration of audio that must be below a
+ given threshold before stopping to process audio data. A
+ count of periods that occur below the threshold may also be
+ specified. If this options are not specified then data is
+ not trimmed from the end of the audio file.
+ Duration counts may be in the format of time, hh:mm:ss.frac,
+ or in the exact count of samples.
+ Threshold may be suffixed with d, or % to indicated the value
+ is in decibels or a percentage of max value of the sample
+ value. A value of ’0%’ will look for total silence.
speed [ -c ] factor
- Speed up or down the sound, as a magnetic tape
- with a speed control. It affects both pitch and
- time. A factor of 1.0 means no change, and is
- the default. 2.0 doubles speed, thus time
- length is cut by a half and pitch is one octave
- higher. 0.5 halves speed thus time length dou�
- bles and pitch is one octave lower. If the
- optional -c parameter is used then the factor is
- specified in "cents".
+ Speed up or down the sound, as a magnetic tape with a speed
+ control. It affects both pitch and time. A factor of 1.0
+ means no change, and is the default. 2.0 doubles speed, thus
+ time length is cut by a half and pitch is one octave higher.
+ 0.5 halves speed thus time length doubles and pitch is one
+ octave lower. If the optional -c parameter is used then the
+ factor is specified in "cents".
- split Turn a mono sample into a stereo sample by copy�
- ing the input channel to the left and right
- channels.
+ split Turn a mono sample into a stereo sample by copying the input
+ channel to the left and right channels.
stat [ -s n ] [-rms ] [ -v ] [ -d ]
- Do a statistical check on the input file, and
- print results on the standard error file. Audio
- data is passed unmodified from input to output
- file unless used along with the -e option.
+ Do a statistical check on the input file, and print results
+ on the standard error file. Audio data is passed unmodified
+ from input to output file unless used along with the -e
+ option.
- The "Volume Adjustment:" field in the statistics
- gives you the argument to the -v number which
- will make the sample as loud as possible without
- clipping.
+ The "Volume Adjustment:" field in the statistics gives you
+ the argument to the -v number which will make the sample as
+ loud as possible without clipping.
- The option -v will print out the "Volume Adjust�
- ment:" field's value only and return. This
- could be of use in scripts to auto convert the
- volume.
+ The option -v will print out the "Volume Adjustment:" field’s
+ value only and return. This could be of use in scripts to
+ auto convert the volume.
- The -s n option is used to scale the input data
- by a given factor. The default value of n is
- the max value of a signed long variable
- (0x7fffffff). Internal effects always work with
- signed long PCM data and so the value should
- relate to this fact.
+ The -s n option is used to scale the input data by a given
+ factor. The default value of n is the max value of a signed
+ long variable (0x7fffffff). Internal effects always work
+ with signed long PCM data and so the value should relate to
+ this fact.
- The -rms option will convert all output average
- values to root mean square format.
+ The -rms option will convert all output average values to
+ root mean square format.
- There is also an optional parameter -d that will
- print out a hex dump of the sound file from the
- internal buffer that is in 32-bit signed PCM
- data. This is mainly only of use in tracking
- down endian problems that creep in to SoX on
- cross-platform versions.
+ There is also an optional parameter -d that will print out a
+ hex dump of the sound file from the internal buffer that is
+ in 32-bit signed PCM data. This is mainly only of use in
+ tracking down endian problems that creep in to SoX on cross-
+ platform versions.
stretch factor [window fade shift fading]
- Time stretch file by a given factor. Change
- duration without affecting the pitch. factor of
- stretching: >1.0 lengthen, <1.0 shorten dura�
- tion. window size is in ms. Default is 20ms.
- The fade option, can be "lin". shift ratio, in
- [0.0 1.0]. Default depends on stretch factor.
- 1.0 to shorten, 0.8 to lengthen. The fading
- ratio, in [0.0 0.5]. The amount of a fade's
- default depends on factor and shift.
+ Time stretch file by a given factor. Change duration without
+ affecting the pitch. factor of stretching: >1.0 lengthen,
+ <1.0 shorten duration. window size is in ms. Default is
+ 20ms. The fade option, can be "lin". shift ratio, in [0.0
+ 1.0]. Default depends on stretch factor. 1.0 to shorten, 0.8
+ to lengthen. The fading ratio, in [0.0 0.5]. The amount of a
+ fade’s default depends on factor and shift.
swap [ 1 2 | 1 2 3 4 ]
- Swap channels in multi-channel sound files.
- Optionally, you may specify the channel order
- you would like the output in. This defaults to
- output channel 2 and then 1 for stereo and 2, 1,
- 4, 3 for quad-channels. An interesting feature
- is that you may duplicate a given channel by
- overwriting another. This is done by repeating
- an output channel on the command line. For
- example, swap 2 2 will overwrite channel 1 with
- channel 2's data; creating a stereo file with
- both channels containing the same audio data.
+ Swap channels in multi-channel sound files. Optionally, you
+ may specify the channel order you would like the output in.
+ This defaults to output channel 2 and then 1 for stereo and
+ 2, 1, 4, 3 for quad-channels. An interesting feature is that
+ you may duplicate a given channel by overwriting another.
+ This is done by repeating an output channel on the command
+ line. For example, swap 2 2 will overwrite channel 1 with
+ channel 2’s data; creating a stereo file with both channels
+ containing the same audio data.
synth [ length ] type mix [ freq [ -freq2 ]
[ off ] [ ph ] [ p1 ] [ p2 ] [ p3 ]
- The synth effect will generate various types of
- audio data. Although this effect is used to
- generate audio data, an input file must be spec�
- ified. The length of the input audio file
- determines the length of the output audio file.
- <length> length in sec or hh:mm:ss.frac,
- 0=inputlength, default=0
- <type> is sine, square, triangle, sawtooth,
- trapetz, exp, whitenoise, pinknoise, brownnoise,
- default=sine
- <mix> is create, mix, amod, default=create
- <freq> frequency at beginning in Hz, not used
- for noise..
- <freq2> frequency at end in Hz, not used for
- noise.. <freq/2> can be given as %%n, where 'n'
- is the number of half notes in respect to A
- (440Hz)
- <off> Bias (DC-offset) of signal in percent,
+ The synth effect will generate various types of audio data.
+ Although this effect is used to generate audio data, an input
+ file must be specified. The length of the input audio file
+ determines the length of the output audio file.
+ <length> length in sec or hh:mm:ss.frac, 0=inputlength,
default=0
- <ph> phase shift 0..100 shift phase 0..2*Pi, not
- used for noise..
- <p1> square: Ton/Toff, triangle+trapetz: rising
- slope time (0..100)
+ <type> is sine, square, triangle, sawtooth, trapetz, exp,
+ whitenoise, pinknoise, brownnoise, default=sine
+ <mix> is create, mix, amod, default=create
+ <freq> frequency at beginning in Hz, not used for noise..
+ <freq2> frequency at end in Hz, not used for noise..
+ <freq/2> can be given as %%n, where ’n’ is the number of half
+ notes in respect to A (440Hz)
+ <off> Bias (DC-offset) of signal in percent, default=0
+ <ph> phase shift 0..100 shift phase 0..2*Pi, not used for
+ noise..
+ <p1> square: Ton/Toff, triangle+trapetz: rising slope time
+ (0..100)
<p2> trapetz: ON time (0..100)
<p3> trapetz: falling slope position (0..100)
trim start [ length ]
- Trim can trim off unwanted audio data from the
- beginning and end of the audio file. Audio sam�
- ples are not sent to the output stream until the
- start location is reached.
- The optional length parameter tells the number
- of samples to output after the start sample and
- is used to trim off the back side of the audio
- data. Using a value of 0 for the start parame�
- ter will allow trimming off the back side only.
- Both options can be specified using either an
- amount of time and an exact count of samples.
- The format for specifying lengths in time is
- hh:mm:ss.frac. A start value of 1:30.5 will not
- start until 1 minute, thirty and 1/2 seconds
- into the audio data. The format for specifying
- sample counts is the number of samples with the
- letter 's' appended to it. A value of 8000s
- will wait until 8000 samples are read before
+ Trim can trim off unwanted audio data from the beginning and
+ end of the audio file. Audio samples are not sent to the
+ output stream until the start location is reached.
+ The optional length parameter tells the number of samples to
+ output after the start sample and is used to trim off the
+ back side of the audio data. Using a value of 0 for the
+ start parameter will allow trimming off the back side only.
+ Both options can be specified using either an amount of time
+ and an exact count of samples. The format for specifying
+ lengths in time is hh:mm:ss.frac. A start value of 1:30.5
+ will not start until 1 minute, thirty and 1/2 seconds into
+ the audio data. The format for specifying sample counts is
+ the number of samples with the letter ’s’ appended to it. A
+ value of 8000s will wait until 8000 samples are read before
starting to process audio data.
vibro speed [ depth ]
- Add the world-famous Fender Vibro-Champ sound
- effect to a sound sample by using a sine wave as
- the volume knob. Speed gives the Hertz value of
- the wave. This must be under 30. Depth gives
- the amount the volume is cut into by the sine
- wave, ranging 0.0 to 1.0 and defaulting to 0.5.
+ Add the world-famous Fender Vibro-Champ sound effect to a
+ sound sample by using a sine wave as the volume knob. Speed
+ gives the Hertz value of the wave. This must be under 30.
+ Depth gives the amount the volume is cut into by the sine
+ wave, ranging 0.0 to 1.0 and defaulting to 0.5.
vol gain [ type [ limitergain ] ]
- The vol effect is much like the command line
- option -v. It allows you to adjust the volume
- of an input file and allows you to specify the
- adjustment in relation to amplitude, power, or
- dB. If type is not specified then it defaults
- to amplitude.
- When type is amplitude then a linear change of
- the amplitude is performed based on the gain.
- Therefore, a value of 1.0 will keep the volume
- the same, 0.0 to < 1.0 will cause the volume to
- decrease and values of > 1.0 will cause the vol�
- ume to increase. Beware of clipping audio data
- when the gain is greater then 1.0. A negative
- value performs the same adjustment while also
- changing the phase.
- When type is power then a value of 1.0 also
- means no change in volume.
- When type is dB the amplitude is changed loga�
- rithmically. 0.0 is constant while +6 doubles
- the amplitude.
- An optional limitergain value can be specified
- and should be a value much less then 1.0 (ie
- 0.05 or 0.02) and is used only on peaks to pre�
- vent clipping. Not specifying this parameter
- will cause no limiter to be used. In verbose
- mode, this effect will display the percentage of
- audio data that needed to be limited.
+ The vol effect is much like the command line option -v. It
+ allows you to adjust the volume of an input file and allows
+ you to specify the adjustment in relation to amplitude,
+ power, or dB. If type is not specified then it defaults to
+ amplitude.
+ When type is amplitude then a linear change of the amplitude
+ is performed based on the gain. Therefore, a value of 1.0
+ will keep the volume the same, 0.0 to < 1.0 will cause the
+ volume to decrease and values of > 1.0 will cause the volume
+ to increase. Beware of clipping audio data when the gain is
+ greater then 1.0. A negative value performs the same adjust-
+ ment while also changing the phase.
+ When type is power then a value of 1.0 also means no change
+ in volume.
+ When type is dB the amplitude is changed logarithmically.
+ 0.0 is constant while +6 doubles the amplitude.
+ An optional limitergain value can be specified and should be
+ a value much less then 1.0 (ie 0.05 or 0.02) and is used only
+ on peaks to prevent clipping. Not specifying this parameter
+ will cause no limiter to be used. In verbose mode, this
+ effect will display the percentage of audio data that needed
+ to be limited.
BUGS
- The syntax is horrific. Thats the breaks when trying to
- handle all things from the command line.
+ The syntax is horrific. Thats the breaks when trying to handle all
+ things from the command line.
- Please report any bugs found in this version of SoX to
- Chris Bagwell (cbagwell@sprynet.com)
+ Please report any bugs found in this version of SoX to Chris Bagwell
+ (cbagwell@users.sourceforge.net)
FILES
SEE ALSO
@@ -1237,11 +1068,10 @@
play(1), rec(1), soxexam(1)
NOTICES
- The version of SoX that accompanies this manual page is
- support by Chris Bagwell (cbagwell@users.sourceforge.net).
- Please refer any questions regarding it to this address.
- You may obtain the latest version at the the web site
- http://sox.sourceforge.net/
+ The version of SoX that accompanies this manual page is support by
+ Chris Bagwell (cbagwell@users.sourceforge.net). Please refer any ques-
+ tions regarding it to this address. You may obtain the latest version
+ at the the web site http://sox.sourceforge.net/
AUTHOR
Chris Bagwell (cbagwell@users.sourceforge.net).
@@ -1250,4 +1080,4 @@
- December 11, 2001 SoX(1)
+ December 11, 2001 SoX(1)
--- a/soxeffect
+++ b/soxeffect
@@ -25,7 +25,7 @@
echo "to stdout. This means that [ fopts ] need to be given so that"
echo "sox will know what format the audio data is in."
echo
- echo "effectname: avg/band/bandpass/bandreject/chorus/compand/copy/cut/deemph/earwax/echo/echos/fade/filter/flanger/highp/highpass/lowp/lowpass/map/mask/pan/phaser/pick/pitch/polyphase/rate/resample/reverb/reverse/speed/split/stat/stretch/swap/trim/vibro/vol"
+ echo "effectname: avg/band/bandpass/bandreject/chorus/compand/copy/cut/deemph/earwax/echo/echos/fade/filter/flanger/highp/highpass/lowp/lowpass/map/mask/pan/phaser/pick/pitch/polyphase/rate/repeat/resample/reverb/reverse/speed/split/stat/stretch/swap/trim/vibro/vol"
echo
echo "fopts: -c channels -h -r rate -t type -v volume -s/-u/-U/-A -b/-w/-l/-f/-d/-D -x"
echo ""
--- a/soxexam.txt
+++ b/soxexam.txt
@@ -1,4 +1,4 @@
-SoX(1) SoX(1)
+SoX(1) SoX(1)
@@ -8,41 +8,35 @@
CONVERSIONS
Introduction
- In general, SoX will attempt to take an input sound file
- format and convert it into a new file format using a simi�
- lar data type and sample rate. For instance, "sox mon�
- key.au monkey.wav" would try and convert the mono 8000Hz
- u-law sample .au file that comes with SoX to a 8000Hz u-
- law .wav file.
+ In general, SoX will attempt to take an input sound file format and
+ convert it into a new file format using a similar data type and sample
+ rate. For instance, "sox monkey.au monkey.wav" would try and convert
+ the mono 8000Hz u-law sample .au file that comes with SoX to a 8000Hz
+ u-law .wav file.
- If an output format doesn't support the same data type as
- the input file then SoX will generally select a default
- data type to save it in. You can override the default
- data type selection by using command line options. This
- is also useful for producing an output file with higher or
- lower precision data and/or sample rate.
+ If an output format doesn’t support the same data type as the input
+ file then SoX will generally select a default data type to save it in.
+ You can override the default data type selection by using command line
+ options. This is also useful for producing an output file with higher
+ or lower precision data and/or sample rate.
- Most file formats that contain headers can automatically
- be read in. When working with header-less file formats
- then a user must manually tell SoX the data type and sam�
- ple rate using command line options.
+ Most file formats that contain headers can automatically be read in.
+ When working with header-less file formats then a user must manually
+ tell SoX the data type and sample rate using command line options.
- When working with header-less files (raw files), you may
- take advantage of the pseudo-file types of .ub, .uw, .sb,
- .sw, .ul, and .sl. By using these extensions on your
- filenames you will not have to specify the corresponding
- options on the command line.
+ When working with header-less files (raw files), you may take advantage
+ of the pseudo-file types of .ub, .uw, .sb, .sw, .ul, and .sl. By using
+ these extensions on your filenames you will not have to specify the
+ corresponding options on the command line.
Precision
- The following data types and formats can be represented by
- their total uncompressed bit precision. When converting
- from one data type to another care must be taken to insure
- it has an equal or greater precision. If not then the
- audio quality will be degraded. This is not always a bad
- thing when your working with things such as voice audio
- and are concerned about disk space or bandwidth of the
- audio data.
+ The following data types and formats can be represented by their total
+ uncompressed bit precision. When converting from one data type to
+ another care must be taken to insure it has an equal or greater preci-
+ sion. If not then the audio quality will be degraded. This is not
+ always a bad thing when your working with things such as voice audio
+ and are concerned about disk space or bandwidth of the audio data.
Data Format Precision
___________ _________
@@ -60,204 +54,179 @@
Examples
- Use the '-V' option on all your command lines. It makes
- SoX print out its idea of what is going on. '-V' is your
- friend.
+ Use the ’-V’ option on all your command lines. It makes SoX print out
+ its idea of what is going on. ’-V’ is your friend.
- To convert from unsigned bytes at 8000 Hz to signed words
- at 8000 Hz:
+ To convert from unsigned bytes at 8000 Hz to signed words at 8000 Hz:
sox -r 8000 -c 1 filename.ub newfile.sw
- To convert from Apple's AIFF format to Microsoft's WAV
- format:
+ To convert from Apple’s AIFF format to Microsoft’s WAV format:
sox filename.aiff filename.wav
- To convert from mono raw 8000 Hz 8-bit unsigned PCM data
- to a WAV file:
+ To convert from mono raw 8000 Hz 8-bit unsigned PCM data to a WAV file:
sox -r 8000 -u -b -c 1 filename.raw filename.wav
- SoX may even be used to convert sample rates. Downcon�
- verting will reduce the bandwidth of a sample, but will
- reduce storage space on your disk. All such conversions
- are lossy and will introduce some noise. You should
- really pass your sample through a low pass filter prior to
- downconverting as this will prevent alias signals (which
- would sound like additional noise). For example to con�
- vert from a sample recorded at 11025 Hz to a u-law file at
- 8000 Hz sample rate:
+ SoX may even be used to convert sample rates. Downconverting will
+ reduce the bandwidth of a sample, but will reduce storage space on your
+ disk. All such conversions are lossy and will introduce some noise.
+ You should really pass your sample through a low pass filter prior to
+ downconverting as this will prevent alias signals (which would sound
+ like additional noise). For example to convert from a sample recorded
+ at 11025 Hz to a u-law file at 8000 Hz sample rate:
sox infile.wav -t au -r 8000 -U -b -c 1 outputfile.au
- To add a low-pass filter (note use of stdout for output of
- the first stage and stdin for input on the second stage):
+ To add a low-pass filter (note use of stdout for output of the first
+ stage and stdin for input on the second stage):
sox infile.wav -t raw -s -w -c 1 - lowpass 3700 |
- sox -t raw -r 11025 -s -w -c 1 - -t au -r 8000 -U -b
- -c 1 ofile.au
+ sox -t raw -r 11025 -s -w -c 1 - -t au -r 8000 -U -b -c 1 ofile.au
- If you hear some clicks and pops when converting to u-law
- or A-law, reduce the output level slightly, for example
- this will decrease it by 20%:
+ If you hear some clicks and pops when converting to u-law or A-law,
+ reduce the output level slightly, for example this will decrease it by
+ 20%:
- sox infile.wav -t au -r 8000 -U -b -c 1 -v .8 output�
- file.au
+ sox infile.wav -t au -r 8000 -U -b -c 1 -v .8 outputfile.au
- SoX is great to use along with other command line programs
- by passing data between the programs using pipelines. The
- most common example is to use mpg123 to convert mp3 files
- in to wav files. The following command line will do this:
+ SoX is great to use along with other command line programs by passing
+ data between the programs using pipelines. The most common example is
+ to use mpg123 to convert mp3 files in to wav files. The following com-
+ mand line will do this:
- mpg123 -b 10000 -s filename.mp3 | sox -t raw -r 44100 -s
- -w -c 2 - filename.wav
+ mpg123 -b 10000 -s filename.mp3 | sox -t raw -r 44100 -s -w -c 2 -
+ filename.wav
- When working with totally unknown audio data then the
- "auto" file format may be of use. It attempts to guess
- what the file type is and then you may save it into a
- known audio format.
+ When working with totally unknown audio data then the "auto" file for-
+ mat may be of use. It attempts to guess what the file type is and then
+ you may save it into a known audio format.
sox -V -t auto filename.snd filename.wav
- It is important to understand how the internals of SoX
- work with compressed audio including u-law, A-law, ADPCM,
- or GSM. SoX takes ALL input data types and converts them
- to uncompressed 32-bit signed data. It will then convert
- this internal version into the requested output format.
- This means additional noise can be introduced from decom�
- pressing data and then recompressing. If applying multi�
- ple effects to audio data, it is best to save the interme�
- diate data as PCM data. After the final effect is
- performed, then you can specify it as a compressed output
- format. This will keep noise introduction to a minimum.
+ It is important to understand how the internals of SoX work with com-
+ pressed audio including u-law, A-law, ADPCM, or GSM. SoX takes ALL
+ input data types and converts them to uncompressed 32-bit signed data.
+ It will then convert this internal version into the requested output
+ format. This means additional noise can be introduced from decompress-
+ ing data and then recompressing. If applying multiple effects to audio
+ data, it is best to save the intermediate data as PCM data. After the
+ final effect is performed, then you can specify it as a compressed out-
+ put format. This will keep noise introduction to a minimum.
- The following example applies various effects to an 8000
- Hz ADPCM input file and then end up with the final file as
- 44100 Hz ADPCM.
+ The following example applies various effects to an 8000 Hz ADPCM input
+ file and then end up with the final file as 44100 Hz ADPCM.
sox firstfile.wav -r 44100 -s -w secondfile.wav
sox secondfile.wav thirdfile.wav swap
sox thirdfile.wav -a -b finalfile.wav mask
- Under a DOS shell, you can convert several audio files to
- an new output format using something similar to the fol�
- lowing command line:
+ Under a DOS shell, you can convert several audio files to an new output
+ format using something similar to the following command line:
FOR %X IN (*.RAW) DO sox -r 11025 -w -s -t raw $X $X.wav
EFFECTS
- Special thanks goes to Juergen Mueller
- (jmeuller@uia.au.ac.be) for this write up on effects.
+ Special thanks goes to Juergen Mueller (jmeuller@uia.au.ac.be) for this
+ write up on effects.
Introduction:
- The core problem is that you need some experience in using
- effects in order to say "that any old sound file sounds
- with effects absolutely hip". There isn't any rule-based
- system which tell you the correct setting of all the
- parameters for every effect. But after some time you will
- become an expert in using effects.
+ The core problem is that you need some experience in using effects in
+ order to say "that any old sound file sounds with effects absolutely
+ hip". There isn’t any rule-based system which tell you the correct set-
+ ting of all the parameters for every effect. But after some time you
+ will become an expert in using effects.
- Here are some examples which can be used with any music
- sample. (For a sample where only a single instrument is
- playing, extreme parameter setting may make well-known
- "typically" or "classical" sounds. Likewise, for drums,
- vocals or guitars.)
+ Here are some examples which can be used with any music sample. (For a
+ sample where only a single instrument is playing, extreme parameter
+ setting may make well-known "typically" or "classical" sounds. Like-
+ wise, for drums, vocals or guitars.)
- Single effects will be explained and some given parameter
- settings that can be used to understand the theory by lis�
- tening to the sound file with the added effect.
+ Single effects will be explained and some given parameter settings that
+ can be used to understand the theory by listening to the sound file
+ with the added effect.
- Using multiple effects in parallel or in series can result
- either in a very nice sound or (mostly) in a dramatic
- overloading in variations of sounds such that your ear may
- follow the sound but you will feel unsatisfied. Hence, for
- the first time using effects try to compose them as mini�
- mally as possible. We don't regard the composition of
- effects in the examples because too many combinations are
- possible and you really need a very fast machine and a lot
- of memory to play them in real-time.
+ Using multiple effects in parallel or in series can result either in a
+ very nice sound or (mostly) in a dramatic overloading in variations of
+ sounds such that your ear may follow the sound but you will feel unsat-
+ isfied. Hence, for the first time using effects try to compose them as
+ minimally as possible. We don’t regard the composition of effects in
+ the examples because too many combinations are possible and you really
+ need a very fast machine and a lot of memory to play them in real-time.
- However, real-time playing of sounds will greatly speed up
- learning and/or tuning the parameter settings for your
- sounds in order to get that "perfect" effect.
+ However, real-time playing of sounds will greatly speed up learning
+ and/or tuning the parameter settings for your sounds in order to get
+ that "perfect" effect.
- Basically, we will use the "play" front-end of SoX since
- it is easier to listen sounds coming out of the speaker or
- earphone instead of looking at cryptic data in sound
- files.
+ Basically, we will use the "play" front-end of SoX since it is easier
+ to listen sounds coming out of the speaker or earphone instead of look-
+ ing at cryptic data in sound files.
- For easy listening of file.xxx ("xxx" is any sound for�
- mat):
+ For easy listening of file.xxx ("xxx" is any sound format):
play file.xxx effect-name effect-parameters
- Or more SoX-like (for "dsp" output on a UNIX/Linux com�
- puter):
+ Or more SoX-like (for "dsp" output on a UNIX/Linux computer):
- sox file.xxx -t ossdsp -w -s /dev/dsp effect-name
- effect-parameters
+ sox file.xxx -t ossdsp -w -s /dev/dsp effect-name effect-parame-
+ ters
or (for "au" output):
- sox file.xxx -t sunau -w -s /dev/audio effect-name
- effect-parameters
+ sox file.xxx -t sunau -w -s /dev/audio effect-name effect-parame-
+ ters
And for date freaks:
sox file.xxx file.yyy effect-name effect-parameters
- Additional options can be used. However, in this case, for
- real-time playing you'll need a very fast machine.
+ Additional options can be used. However, in this case, for real-time
+ playing you’ll need a very fast machine.
Notes:
- I played all examples in real-time on a Pentium 100 with
- 32 MB and Linux 2.0.30 using a self-recorded sample ( 3:15
- min long in "wav" format with 44.1 kHz sample rate and
- stereo 16 bit ). The sample should not contain any of the
- effects. However, if you take any recording of a sound
- track from radio or tape or CD, and it sounds like a live
- concert or ten people are playing the same rhythm with
- their drums or funky-grooves, then take any other sample.
- (Typically, less then four different instruments and no
- synthesizer in the sample is suitable. Likewise, the com�
- bination vocal, drums, bass and guitar.)
+ I played all examples in real-time on a Pentium 100 with 32 MB and
+ Linux 2.0.30 using a self-recorded sample ( 3:15 min long in "wav" for-
+ mat with 44.1 kHz sample rate and stereo 16 bit ). The sample should
+ not contain any of the effects. However, if you take any recording of a
+ sound track from radio or tape or CD, and it sounds like a live concert
+ or ten people are playing the same rhythm with their drums or funky-
+ grooves, then take any other sample. (Typically, less then four dif-
+ ferent instruments and no synthesizer in the sample is suitable. Like-
+ wise, the combination vocal, drums, bass and guitar.)
Effects:
Echo
- An echo effect can be naturally found in the mountains,
- standing somewhere on a mountain and shouting a single
- word will result in one or more repetitions of the word
- (if not, turn a bit around and try again, or climb to the
- next mountain).
+ An echo effect can be naturally found in the mountains, standing some-
+ where on a mountain and shouting a single word will result in one or
+ more repetitions of the word (if not, turn a bit around and try again,
+ or climb to the next mountain).
- However, the time difference between shouting and repeat�
- ing is the delay (time), its loudness is the decay. Multi�
- ple echos can have different delays and decays.
+ However, the time difference between shouting and repeating is the
+ delay (time), its loudness is the decay. Multiple echos can have dif-
+ ferent delays and decays.
- It is very popular to use echos to play an instrument with
- itself together, like some guitar players (Brain May from
- Queen) or vocalists are doing. For music samples of more
- than one instrument, echo can be used to add a second sam�
- ple shortly after the original one.
+ It is very popular to use echos to play an instrument with itself
+ together, like some guitar players (Brain May from Queen) or vocalists
+ are doing. For music samples of more than one instrument, echo can be
+ used to add a second sample shortly after the original one.
- This will sound as if you are doubling the number of
- instruments playing in the same sample:
+ This will sound as if you are doubling the number of instruments play-
+ ing in the same sample:
play file.xxx echo 0.8 0.88 60.0 0.4
- If the delay is very short, then it sound like a (metal�
- lic) robot playing music:
+ If the delay is very short, then it sound like a (metallic) robot play-
+ ing music:
play file.xxx echo 0.8 0.88 6.0 0.4
- Longer delay will sound like an open air concert in the
- mountains:
+ Longer delay will sound like an open air concert in the mountains:
play file.xxx echo 0.8 0.9 1000.0 0.3
@@ -267,12 +236,11 @@
Echos
- Like the echo effect, echos stand for "ECHO in Sequel",
- that is the first echos takes the input, the second the
- input and the first echos, the third the input and the
- first and the second echos, ... and so on. Care should be
- taken using many echos (see introduction); a single echos
- has the same effect as a single echo.
+ Like the echo effect, echos stand for "ECHO in Sequel", that is the
+ first echos takes the input, the second the input and the first echos,
+ the third the input and the first and the second echos, ... and so on.
+ Care should be taken using many echos (see introduction); a single
+ echos has the same effect as a single echo.
The sample will be bounced twice in symmetric echos:
@@ -288,22 +256,19 @@
Chorus
- The chorus effect has its name because it will often be
- used to make a single vocal sound like a chorus. But it
- can be applied to other instrument samples too.
+ The chorus effect has its name because it will often be used to make a
+ single vocal sound like a chorus. But it can be applied to other
+ instrument samples too.
- It works like the echo effect with a short delay, but the
- delay isn't constant. The delay is varied using a sinu�
- soidal or triangular modulation. The modulation depth
- defines the range the modulated delay is played before or
- after the delay. Hence the delayed sound will sound slower
- or faster, that is the delayed sound tuned around the
- original one, like in a chorus where some vocals are a bit
- out of tune.
+ It works like the echo effect with a short delay, but the delay isn’t
+ constant. The delay is varied using a sinusoidal or triangular modula-
+ tion. The modulation depth defines the range the modulated delay is
+ played before or after the delay. Hence the delayed sound will sound
+ slower or faster, that is the delayed sound tuned around the original
+ one, like in a chorus where some vocals are a bit out of tune.
- The typical delay is around 40ms to 60ms, the speed of the
- modulation is best near 0.25Hz and the modulation depth
- around 2ms.
+ The typical delay is around 40ms to 60ms, the speed of the modulation
+ is best near 0.25Hz and the modulation depth around 2ms.
A single delay will make the sample more overloaded:
@@ -311,39 +276,36 @@
Two delays of the original samples sound like this:
- play file.xxx chorus 0.6 0.9 50.0 0.4 0.25 2.0 -t
- 60.0 0.32 0.4 1.3 -s
+ play file.xxx chorus 0.6 0.9 50.0 0.4 0.25 2.0 -t 60.0 0.32 0.4
+ 1.3 -s
A big chorus of the sample is (three additional samples):
- play file.xxx chorus 0.5 0.9 50.0 0.4 0.25 2.0 -t
- 60.0 0.32 0.4 2.3 -t 40.0 0.3 0.3 1.3 -s
+ play file.xxx chorus 0.5 0.9 50.0 0.4 0.25 2.0 -t 60.0 0.32 0.4
+ 2.3 -t 40.0 0.3 0.3 1.3 -s
Flanger
- The flanger effect is like the chorus effect, but the
- delay varies between 0ms and maximal 5ms. It sound like
- wind blowing, sometimes faster or slower including changes
- of the speed.
+ The flanger effect is like the chorus effect, but the delay varies
+ between 0ms and maximal 5ms. It sound like wind blowing, sometimes
+ faster or slower including changes of the speed.
- The flanger effect is widely used in funk and soul music,
- where the guitar sound varies frequently slow or a bit
- faster.
+ The flanger effect is widely used in funk and soul music, where the
+ guitar sound varies frequently slow or a bit faster.
- The typical delay is around 3ms to 5ms, the speed of the
- modulation is best near 0.5Hz.
+ The typical delay is around 3ms to 5ms, the speed of the modulation is
+ best near 0.5Hz.
- Now, let's groove the sample:
+ Now, let’s groove the sample:
play file.xxx flanger 0.6 0.87 3.0 0.9 0.5 -s
- listen carefully between the difference of sinusoidal and
- triangular modulation:
+ listen carefully between the difference of sinusoidal and triangular
+ modulation:
play file.xxx flanger 0.6 0.87 3.0 0.9 0.5 -t
- If the decay is a bit lower, than the effect sounds more
- popular:
+ If the decay is a bit lower, than the effect sounds more popular:
play file.xxx flanger 0.8 0.88 3.0 0.4 0.5 -t
@@ -353,37 +315,32 @@
Reverb
- The reverb effect is often used in audience hall which are
- to small or contain too many many visitors which disturb
- (dampen) the reflection of sound at the walls. Reverb
- will make the sound be perceived as if it were in a large
- hall. You can try the reverb effect in your bathroom or
- garage or sport halls by shouting loud some words. You'll
- hear the words reflected from the walls.
+ The reverb effect is often used in audience hall which are to small or
+ contain too many many visitors which disturb (dampen) the reflection of
+ sound at the walls. Reverb will make the sound be perceived as if it
+ were in a large hall. You can try the reverb effect in your bathroom
+ or garage or sport halls by shouting loud some words. You’ll hear the
+ words reflected from the walls.
- The biggest problem in using the reverb effect is the cor�
- rect setting of the (wall) delays such that the sound is
- realistic and doesn't sound like music playing in a tin
- can or has overloaded feedback which destroys any illusion
- of playing in a big hall. To help you obtain realistic
- reverb effects, you should decide first how long the
- reverb should take place until it is not loud enough to be
- registered by your ears. This is be done by varying the
- reverb time "t". To simulate small halls, use 200ms. To
- simulate large halls, use 1000ms. Clearly, the walls of
- such a hall aren't far away, so you should define its set�
- ting be given every wall its delay time. However, if the
- wall is to far away for the reverb time, you won't hear
- the reverb, so the nearest wall will be best at "t/4"
- delay and the farthest at "t/2". You can try other dis�
- tances as well, but it won't sound very realistic. The
- walls shouldn't stand to close to each other and not in a
- multiple integer distance to each other ( so avoid wall
- like: 200.0 and 202.0, or something like 100.0 and 200.0
- ).
+ The biggest problem in using the reverb effect is the correct setting
+ of the (wall) delays such that the sound is realistic and doesn’t sound
+ like music playing in a tin can or has overloaded feedback which
+ destroys any illusion of playing in a big hall. To help you obtain
+ realistic reverb effects, you should decide first how long the reverb
+ should take place until it is not loud enough to be registered by your
+ ears. This is be done by varying the reverb time "t". To simulate
+ small halls, use 200ms. To simulate large halls, use 1000ms. Clearly,
+ the walls of such a hall aren’t far away, so you should define its set-
+ ting be given every wall its delay time. However, if the wall is to
+ far away for the reverb time, you won’t hear the reverb, so the nearest
+ wall will be best at "t/4" delay and the farthest at "t/2". You can try
+ other distances as well, but it won’t sound very realistic. The walls
+ shouldn’t stand to close to each other and not in a multiple integer
+ distance to each other ( so avoid wall like: 200.0 and 202.0, or some-
+ thing like 100.0 and 200.0 ).
- Since audience halls do have a lot of walls, we will start
- designing one beginning with one wall:
+ Since audience halls do have a lot of walls, we will start designing
+ one beginning with one wall:
play file.xxx reverb 1.0 600.0 180.0
@@ -393,35 +350,30 @@
Next two walls:
- play file.xxx reverb 1.0 600.0 180.0 200.0 220.0
- 240.0
+ play file.xxx reverb 1.0 600.0 180.0 200.0 220.0 240.0
Now, why not a futuristic hall with six walls:
- play file.xxx reverb 1.0 600.0 180.0 200.0 220.0
- 240.0 280.0 300.0
+ play file.xxx reverb 1.0 600.0 180.0 200.0 220.0 240.0 280.0
+ 300.0
- If you run out of machine power or memory, then stop as
- many applications as possible (every interrupt will con�
- sume a lot of CPU time which for bigger halls is abso�
- lutely necessary).
+ If you run out of machine power or memory, then stop as many applica-
+ tions as possible (every interrupt will consume a lot of CPU time which
+ for bigger halls is absolutely necessary).
Phaser
- The phaser effect is like the flanger effect, but it uses
- a reverb instead of an echo and does phase shifting.
- You'll hear the difference in the examples comparing both
- effects (simply change the effect name). The delay modu�
- lation can be sinusoidal or triangular, preferable is the
- later for multiple instruments. For single instrument
- sounds, the sinusoidal phaser effect will give a sharper
- phasing effect. The decay shouldn't be to close to 1.0
- which will cause dramatic feedback. A good range is about
- 0.5 to 0.1 for the decay.
+ The phaser effect is like the flanger effect, but it uses a reverb
+ instead of an echo and does phase shifting. You’ll hear the difference
+ in the examples comparing both effects (simply change the effect name).
+ The delay modulation can be sinusoidal or triangular, preferable is the
+ later for multiple instruments. For single instrument sounds, the sinu-
+ soidal phaser effect will give a sharper phasing effect. The decay
+ shouldn’t be to close to 1.0 which will cause dramatic feedback. A
+ good range is about 0.5 to 0.1 for the decay.
- We will take a parameter setting as for the flanger before
- (gain-out is lower since feedback can raise the output
- dramatically):
+ We will take a parameter setting as for the flanger before (gain-out is
+ lower since feedback can raise the output dramatically):
play file.xxx phaser 0.8 0.74 3.0 0.4 0.5 -t
@@ -439,43 +391,37 @@
Compander
- The compander effect allows the dynamic range of a signal
- to be compressed or expanded. For most situations, the
- attack time (response to the music getting louder) should
- be shorter than the decay time because our ears are more
- sensitive to suddenly loud music than to suddenly soft
- music.
+ The compander effect allows the dynamic range of a signal to be com-
+ pressed or expanded. For most situations, the attack time (response to
+ the music getting louder) should be shorter than the decay time because
+ our ears are more sensitive to suddenly loud music than to suddenly
+ soft music.
- For example, suppose you are listening to Strauss' "Also
- Sprach Zarathustra" in a noisy environment such as a car.
- If you turn up the volume enough to hear the soft passages
- over the road noise, the loud sections will be too loud.
- You could try this:
+ For example, suppose you are listening to Strauss’ "Also Sprach
+ Zarathustra" in a noisy environment such as a car. If you turn up the
+ volume enough to hear the soft passages over the road noise, the loud
+ sections will be too loud. You could try this:
- play file.xxx compand 0.3,1
- -90,-90,-70,-70,-60,-20,0,0 -5 0 0.2
+ play file.xxx compand 0.3,1 -90,-90,-70,-70,-60,-20,0,0 -5 0 0.2
- The transfer function ("-90,...") says that very soft
- sounds between -90 and -70 decibels (-90 is about the
- limit of 16-bit encoding) will remain unchanged. That
- keeps the compander from boosting the volume on "silent"
- passages such as between movements. However, sounds in
- the range -60 decibels to 0 decibels (maximum volume) will
- be boosted so that the 60-dB dynamic range of the original
- music will be compressed 3-to-1 into a 20-dB range, which
- is wide enough to enjoy the music but narrow enough to get
- around the road noise. The -5 dB output gain is needed to
- avoid clipping (the number is inexact, and was derived by
- experimentation). The 0 for the initial volume will work
- fine for a clip that starts with a bit of silence, and the
- delay of 0.2 has the effect of causing the compander to
- react a bit more quickly to sudden volume changes.
+ The transfer function ("-90,...") says that very soft sounds between
+ -90 and -70 decibels (-90 is about the limit of 16-bit encoding) will
+ remain unchanged. That keeps the compander from boosting the volume on
+ "silent" passages such as between movements. However, sounds in the
+ range -60 decibels to 0 decibels (maximum volume) will be boosted so
+ that the 60-dB dynamic range of the original music will be compressed
+ 3-to-1 into a 20-dB range, which is wide enough to enjoy the music but
+ narrow enough to get around the road noise. The -5 dB output gain is
+ needed to avoid clipping (the number is inexact, and was derived by
+ experimentation). The 0 for the initial volume will work fine for a
+ clip that starts with a bit of silence, and the delay of 0.2 has the
+ effect of causing the compander to react a bit more quickly to sudden
+ volume changes.
Changing the Rate of Playback
- You can use stretch to change the rate of playback of an
- audio sample while preserving the pitch. For example to
- play at 1/2 the speed:
+ You can use stretch to change the rate of playback of an audio sample
+ while preserving the pitch. For example to play at 1/2 the speed:
play file.wav stretch 2
@@ -483,11 +429,10 @@
play file.wav stretch .5
- Other related options are "speed" to change the speed of
- play (and changing the pitch accordingly), and pitch, to
- alter the pitch of a sample. For example to speed a sam�
- ple so it plays in 1/2 the time (for those Mickey Mouse
- voices):
+ Other related options are "speed" to change the speed of play (and
+ changing the pitch accordingly), and pitch, to alter the pitch of a
+ sample. For example to speed a sample so it plays in 1/2 the time (for
+ those Mickey Mouse voices):
play file.wav speed 2
@@ -497,27 +442,24 @@
- Other effects (copy, rate, avg, stat, vibro, lowp, highp,
- band, reverb)
+ Other effects (copy, rate, avg, stat, vibro, lowp, highp, band, reverb)
- The other effects are simple to use. However, an "easy to
- use manual" should be given here.
+ The other effects are simple to use. However, an "easy to use manual"
+ should be given here.
More effects (to do !)
- There are a lot of effects around like noise gates, com�
- pressors, waw-waw, stereo effects and so on. They should
- be implemented, making SoX more useful in sound mixing
- techniques coming together with a great variety of differ�
- ent sound effects.
+ There are a lot of effects around like noise gates, compressors, waw-
+ waw, stereo effects and so on. They should be implemented, making SoX
+ more useful in sound mixing techniques coming together with a great
+ variety of different sound effects.
- Combining effects by using them in parallel or serially on
- different channels needs some easy mechanism which is sta�
- ble for use in real-time.
+ Combining effects by using them in parallel or serially on different
+ channels needs some easy mechanism which is stable for use in real-
+ time.
- Really missing are the the changing of the parameters and
- starting/stopping of effects while playing samples in
- real-time!
+ Really missing are the the changing of the parameters and start-
+ ing/stopping of effects while playing samples in real-time!
Good luck and have fun with all the effects!
@@ -534,4 +476,4 @@
- December 11, 2001 SoX(1)
+ December 11, 2001 SoX(1)
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -29,6 +29,7 @@
GSM_SUPPORT = @GSM_SUPPORT@
CFLAGS = @CFLAGS@ -I$(srcdir) -I$(builddir)
+CPPFLAGS = @CPPFLAGS@
LDFLAGS = -L. -L./gsm @LDFLAGS@
LIBS = -lst $(GSM_LIB_$(GSM_SUPPORT)) @LIBS@
@@ -45,13 +46,13 @@
cvsd.o dat.o g711.o g721.o g723_16.o g723_24.o g723_40.o \
g72x.o gsm.o hcom.o ima_rw.o maud.o mp3.o nulfile.o prc.o \
raw.o sf.o smp.o sndrtool.o sphere.o tx16w.o voc.o vorbis.o \
- wav.o wve.o
+ vox.o wav.o wve.o
EOBJ = avg.o band.o bandpass.o breject.o btrworth.o chorus.o compand.o \
copy.o dcshift.o deemphas.o earwax.o echo.o echos.o \
- fade.o filter.o flanger.o highp.o highpass.o lowp.o lowpass.o \
- map.o mask.o pan.o phaser.o pitch.o polyphas.o \
- rate.o resample.o reverb.o reverse.o silence.o speed.o \
+ fade.o filter.o flanger.o highp.o highpass.o lowp.o \
+ lowpass.o map.o mask.o pan.o phaser.o pitch.o polyphas.o \
+ rate.o repeat.o resample.o reverb.o reverse.o silence.o speed.o \
stat.o stretch.o swap.o synth.o trim.o vibro.o vol.o
OSSOBJ_0 =
--- a/src/avg.c
+++ b/src/avg.c
@@ -15,7 +15,10 @@
* Sound Tools stereo/quad -> mono mixdown effect file.
* and mono/stereo -> stereo/quad channel duplication.
*
- * What's in a center channel?
+ * TODO: The shorthand concepts of volume change and balance
+ * is not finished. Users could change volumes of each
+ * channel though by manually specifing each channels
+ * values.
*/
#include "st_i.h"
@@ -25,24 +28,29 @@
/* Private data for SKEL file */
typedef struct avgstuff {
- /* How to generate each output channel. sources[i][j] */
- /* represents the fraction of channel i that should be passed */
- /* through to channel j on output, and so forth. Channel 0 is */
- /* left front, channel 1 is right front, and 2 and 3 are left */
- /* and right rear, respectively. (GHK) */
- double sources[4][4];
- int num_pans;
- int mix; /* How are we mixing it? */
+ /* How to generate each output channel. sources[i][j] */
+ /* represents the fraction of channel i that should be passed */
+ /* through to channel j on output, and so forth. Channel 0 is */
+ /* left front, channel 1 is right front, and 2 and 3 are left */
+ /* and right rear, respectively. (GHK) */
+ double sources[4][4];
+ int num_pans;
+ int mix; /* How are we mixing it? */
} *avg_t;
-#define MIX_CENTER 0
-#define MIX_LEFT 1
-#define MIX_RIGHT 2
-#define MIX_FRONT 3
-#define MIX_BACK 4
-#define MIX_SPECIFIED 5
+/* MIX_CENTER is shorthand to mix channels together at 50% each */
+#define MIX_CENTER 0
+#define MIX_LEFT 1
+#define MIX_RIGHT 2
+#define MIX_FRONT 3
+#define MIX_BACK 4
+#define MIX_SPECIFIED 5
+#define MIX_LEFT_FRONT 6
+#define MIX_RIGHT_FRONT 7
+#define MIX_LEFT_BACK 8
+#define MIX_RIGHT_BACK 9
-#define CLIP_LEVEL ((double)(((unsigned)1 << 31) - 1))
+#define CLIP_LEVEL ((double)(((unsigned)1 << 31) - 1))
/*
* Process options
@@ -54,7 +62,7 @@
int i;
for (i = 0; i < 16; i++)
- pans[i] = 0.0;
+ pans[i] = 0.0;
avg->mix = MIX_CENTER;
avg->num_pans = 0;
@@ -62,41 +70,47 @@
/* input and output channels, we'll record the information for */
/* later. */
if (n) {
- if(!strcmp(argv[0], "-l"))
- avg->mix = MIX_LEFT;
- else if (!strcmp(argv[0], "-r"))
- avg->mix = MIX_RIGHT;
- else if (!strcmp(argv[0], "-f"))
- avg->mix = MIX_FRONT;
- else if (!strcmp(argv[0], "-b"))
- avg->mix = MIX_BACK;
- else if (argv[0][0] == '-' && !isdigit((int)argv[0][1])
- && argv[0][1] != '.') {
- st_fail("Usage: avg [ -l | -r | -f | -b | n,n,n...,n ]");
- return (ST_EOF);
- }
- else {
- int commas;
- char *s;
- avg->mix = MIX_SPECIFIED;
- pans[0] = atof(argv[0]);
- for (s = argv[0], commas = 0; *s; ++s) {
- if (*s == ',') {
- ++commas;
- if (commas >= 16) {
- st_fail("avg can only take up to 16 pan values");
- return (ST_EOF);
- }
- pans[commas] = atof(s+1);
- }
- }
- avg->num_pans = commas + 1;
- }
- }
+ if(!strcmp(argv[0], "-l"))
+ avg->mix = MIX_LEFT;
+ else if (!strcmp(argv[0], "-r"))
+ avg->mix = MIX_RIGHT;
+ else if (!strcmp(argv[0], "-f"))
+ avg->mix = MIX_FRONT;
+ else if (!strcmp(argv[0], "-b"))
+ avg->mix = MIX_BACK;
+ else if (!strcmp(argv[0], "-1"))
+ avg->mix = MIX_LEFT_FRONT;
+ else if (!strcmp(argv[0], "-2"))
+ avg->mix = MIX_RIGHT_FRONT;
+ else if (!strcmp(argv[0], "-3"))
+ avg->mix = MIX_LEFT_BACK;
+ else if (!strcmp(argv[0], "-2"))
+ avg->mix = MIX_RIGHT_BACK;
+ else if (argv[0][0] == '-' && !isdigit((int)argv[0][1])
+ && argv[0][1] != '.') {
+ st_fail("Usage: avg [ -l | -r | -f | -b | -1 | -2 | -3 | -4 | n,n,n...,n ]");
+ return (ST_EOF);
+ }
+ else {
+ int commas;
+ char *s;
+ avg->mix = MIX_SPECIFIED;
+ pans[0] = atof(argv[0]);
+ for (s = argv[0], commas = 0; *s; ++s) {
+ if (*s == ',') {
+ ++commas;
+ if (commas >= 16) {
+ st_fail("avg can only take up to 16 pan values");
+ return (ST_EOF);
+ }
+ pans[commas] = atof(s+1);
+ }
+ }
+ avg->num_pans = commas + 1;
+ }
+ }
else {
- pans[0] = 0.5;
- pans[1] = 0.5;
- avg->num_pans = 2;
+ avg->mix = MIX_CENTER;
}
return (ST_SUCCESS);
}
@@ -197,45 +211,161 @@
st_fail("Output must have different number of channels to use avg effect");
return(ST_EOF);
}
- break; /* Code below will handle this case */
+ pans[0] = 0.5;
+ pans[1] = 0.5;
+ avg->num_pans = 2;
+ avg->mix = MIX_CENTER;
+ break; /* Code below will handle this case */
case MIX_LEFT:
- if (ichan < 2) {
- st_fail("Input must have at least two channels to use avg -l");
- return(ST_EOF);
+ if (ichan == 2 && ochan == 1)
+ {
+ pans[0] = 1.0;
+ pans[1] = 0.0;
+ avg->num_pans = 2;
}
- pans[0] = 1.0;
- pans[1] = 0.0;
- avg->num_pans = 2;
+ else if (ichan == 4 && ochan == 2)
+ {
+ pans[0] = 0.5;
+ pans[1] = 0.0;
+ pans[2] = 0.5;
+ pans[3] = 0.0;
+ avg->num_pans = 4;
+ }
+ else
+ {
+ st_fail("Can't average %d channels into %d channels",
+ ichan, ochan);
+ return ST_EOF;
+ }
break;
case MIX_RIGHT:
- if (ichan < 2) {
- st_fail("Input must have at least two channels to use avg -r");
- return(ST_EOF);
+ if (ichan == 2 && ochan == 1)
+ {
+ pans[0] = 0.0;
+ pans[1] = 1.0;
+ avg->num_pans = 2;
}
- pans[0] = 0.0;
- pans[1] = 1.0;
- avg->num_pans = 2;
+ else if (ichan == 4 && ochan == 2)
+ {
+ pans[0] = 0.0;
+ pans[1] = 0.5;
+ pans[2] = 0.0;
+ pans[3] = 0.5;
+ avg->num_pans = 4;
+ }
+ else
+ {
+ st_fail("Can't average %d channels into %d channels",
+ ichan, ochan);
+ return ST_EOF;
+ }
break;
case MIX_FRONT:
- if (ichan < 4) {
- st_fail("Input must have at least four channels to use avg -f");
- return(ST_EOF);
+ if (ichan == 4 && ochan == 2)
+ {
+ pans[0] = 1.0;
+ pans[1] = 1.0;
+ pans[2] = 0.0;
+ pans[3] = 0.0;
+ avg->num_pans = 4;
}
- pans[0] = 1.0;
- pans[1] = 0.0;
- avg->num_pans = 2;
+ else
+ {
+ st_fail("avg: -f option requires 4 channels input and 2 channel output");
+ return ST_EOF;
+ }
break;
case MIX_BACK:
- if (ichan < 4) {
- st_fail("Input must have at least four channels to use avg -b");
- return(ST_EOF);
+ if (ichan == 4 && ochan == 2)
+ {
+ pans[0] = 0.0;
+ pans[1] = 0.0;
+ pans[2] = 1.0;
+ pans[3] = 1.0;
+ avg->num_pans = 4;
}
- pans[0] = 0.0;
- pans[1] = 1.0;
- avg->num_pans = 2;
+ else
+ {
+ st_fail("avg: -b option requires 4 channels input and 2 channel output");
+ return ST_EOF;
+ }
break;
- default:
+ case MIX_LEFT_FRONT:
+ if (ichan == 2 && ochan == 1)
+ {
+ pans[0] = 1.0;
+ pans[1] = 0.0;
+ avg->num_pans = 2;
+ }
+ else if (ichan == 4 && ochan == 1)
+ {
+ pans[0] = 1.0;
+ pans[1] = 0.0;
+ pans[2] = 0.0;
+ pans[3] = 0.0;
+ avg->num_pans = 4;
+ }
+ else
+ {
+ st_fail("avg: -1 option requires 4 channels input and 1 channel output");
+ return ST_EOF;
+ }
break;
+ case MIX_RIGHT_FRONT:
+ if (ichan == 2 && ochan == 1)
+ {
+ pans[0] = 0.0;
+ pans[1] = 1.0;
+ avg->num_pans = 2;
+ }
+ else if (ichan == 4 && ochan == 1)
+ {
+ pans[0] = 0.0;
+ pans[1] = 1.0;
+ pans[2] = 0.0;
+ pans[3] = 0.0;
+ avg->num_pans = 4;
+ }
+ else
+ {
+ st_fail("avg: -2 option requires 4 channels input and 1 channel output");
+ return ST_EOF;
+ }
+ break;
+ case MIX_LEFT_BACK:
+ if (ichan == 4 && ochan == 1)
+ {
+ pans[0] = 0.0;
+ pans[1] = 0.0;
+ pans[2] = 1.0;
+ pans[3] = 0.0;
+ avg->num_pans = 4;
+ }
+ else
+ {
+ st_fail("avg: -3 option requires 4 channels input and 1 channel output");
+ return ST_EOF;
+ }
+ case MIX_RIGHT_BACK:
+ if (ichan == 4 && ochan == 1)
+ {
+ pans[0] = 0.0;
+ pans[1] = 0.0;
+ pans[2] = 0.0;
+ pans[3] = 1.0;
+ avg->num_pans = 4;
+ }
+ else
+ {
+ st_fail("avg: -4 option requires 4 channels input and 1 channel output");
+ return ST_EOF;
+ }
+
+ case MIX_SPECIFIED:
+ break;
+ default:
+ st_fail("Unknown mix option in average effect");
+ return ST_EOF;
}
/* If the number of pans given is 4 or fewer, handle the special */
@@ -296,6 +426,11 @@
avg->sources[3][3] = avg->sources[1][1];
}
}
+ else
+ {
+ st_fail("Invalid options specified to avg while not mixing");
+ return ST_EOF;
+ }
}
else if (avg->num_pans == 2) {
if (ichan == 2 && ochan == 1) {
@@ -304,7 +439,6 @@
}
else if (ichan == 4 && ochan == 2) {
avg->sources[0][0] = pans[0];
- avg->sources[0][1] = 0.0;
avg->sources[1][1] = pans[0];
avg->sources[2][0] = pans[1];
avg->sources[3][1] = pans[1];
@@ -312,7 +446,6 @@
else if (ichan == 4 && ochan == 4) {
/* pans[0] is front -> front, pans[1] is for back */
avg->sources[0][0] = pans[0];
- avg->sources[0][1] = 0.0;
avg->sources[1][1] = pans[0];
avg->sources[2][2] = pans[1];
avg->sources[3][3] = pans[1];
@@ -326,6 +459,12 @@
avg->sources[1][0] = pans[2];
avg->sources[1][1] = pans[3];
}
+ else if (ichan == 4 && ochan == 2) {
+ avg->sources[0][0] = pans[0];
+ avg->sources[1][1] = pans[1];
+ avg->sources[2][0] = pans[2];
+ avg->sources[3][1] = pans[3];
+ }
else if (ichan == 4 && ochan == 1) {
avg->sources[0][0] = pans[0];
avg->sources[1][0] = pans[1];
@@ -333,6 +472,12 @@
avg->sources[3][0] = pans[3];
}
}
+ else
+ {
+ st_fail("Invalid options specified to avg while not mixing");
+ return ST_EOF;
+ }
+
return (ST_SUCCESS);
}
--- a/src/handlers.c
+++ b/src/handlers.c
@@ -376,6 +376,10 @@
st_vorbisstartwrite, st_vorbiswrite, st_vorbisstopwrite,
st_format_nothing_seek},
#endif
+ {vorbisnames, 0,
+ st_voxstartread, st_voxread, st_voxstopread,
+ st_voxstartwrite, st_voxwrite, st_voxstopwrite,
+ st_format_nothing_seek},
{wavnames, ST_FILE_STEREO | ST_FILE_SEEK,
st_wavstartread, st_wavread, st_wavstopread,
st_wavstartwrite, st_wavwrite, st_wavstopwrite, st_wavseek},
@@ -465,6 +469,9 @@
{"phaser", 0,
st_phaser_getopts, st_phaser_start, st_phaser_flow,
st_phaser_drain, st_phaser_stop},
+ {"pick", ST_EFF_MCHAN | ST_EFF_CHAN,
+ st_avg_getopts, st_avg_start, st_avg_flow,
+ st_effect_nothing_drain, st_avg_stop},
{"pitch", 0,
st_pitch_getopts, st_pitch_start, st_pitch_flow,
st_pitch_drain, st_pitch_stop},
@@ -474,6 +481,9 @@
{"rate", ST_EFF_RATE,
st_rate_getopts, st_rate_start, st_rate_flow,
st_effect_nothing_drain, st_effect_nothing},
+ {"repeat", 0,
+ st_repeat_getopts, st_repeat_start, st_repeat_flow,
+ st_repeat_drain, st_repeat_stop},
{"resample", ST_EFF_RATE,
st_resample_getopts, st_resample_start, st_resample_flow,
st_resample_drain, st_resample_stop},
--- a/src/play.in
+++ b/src/play.in
@@ -61,7 +61,7 @@
EFFECTs are one or more of the following: avg, band, chorus, copy, cut,
deemph, echo, echos, flanger, highp, lowp, map, mask, phaser, pick, polyphase
-rate, resample, reverb, reverse, split, stat, vibro.
+rate, repeat, resample, reverb, reverse, split, stat, vibro.
See sox man page for detailed information on supported file types, data
formats, and effect options."
@@ -72,7 +72,7 @@
# loop over arguments
while [ $# -ne 0 ]; do
case "$1" in
- avg|band|bandpass|bandreject|chorus|compand|copy|cut|deemph|earwax|echo|echos|fade|filter|flanger|highp|highpass|lowp|lowpass|map|mask|pan|phaser|pick|pitch|polyphase|rate|resample|reverb|reverse|silence|speed|split|stat|stretch|swap|trim|vibro|vol)
+ avg|band|bandpass|bandreject|chorus|compand|copy|cut|deemph|earwax|echo|echos|fade|filter|flanger|highp|highpass|lowp|lowpass|map|mask|pan|phaser|pick|pitch|polyphase|rate|repeat|resample|reverb|reverse|silence|speed|split|stat|stretch|swap|trim|vibro|vol)
effects="$@"
break
;;
--- /dev/null
+++ b/src/repeat.c
@@ -1,0 +1,200 @@
+/*
+
+ Repeat effect file for SoX
+ Copyright (C) 2004 Jan Paul Schmidt <jps@fundament.org>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include "st_i.h"
+
+typedef struct repeatstuff {
+ FILE *fp;
+ int first_drain;
+ st_size_t total;
+ st_size_t remaining;
+ int repeats;
+} *repeat_t;
+
+int st_repeat_getopts(eff_t effp, int n, char **argv)
+{
+ repeat_t repeat = (repeat_t)effp->priv;
+
+ if (n != 1) {
+ st_fail("Usage: repeat count]");
+ return (ST_EOF);
+ }
+
+ if (!(sscanf(argv[0], "%i", &repeat->repeats))) {
+ st_fail("repeat: could not parse repeat parameter");
+ return (ST_EOF);
+ }
+
+ if (repeat->repeats < 0) {
+ st_fail("repeat: repeat parameter must be positive");
+ return (ST_EOF);
+ }
+
+ return (ST_SUCCESS);
+}
+
+int st_repeat_start(eff_t effp)
+{
+ repeat_t repeat = (repeat_t)effp->priv;
+
+ if ((repeat->fp = tmpfile()) == NULL) {
+ st_fail("repeat: could not create temporary file");
+ return (ST_EOF);
+ }
+
+ repeat->first_drain = 1;
+
+ return (ST_SUCCESS);
+}
+
+int st_repeat_flow(eff_t effp, st_sample_t *ibuf, st_sample_t *obuf,
+ st_size_t *isamp, st_size_t *osamp)
+{
+ repeat_t repeat = (repeat_t)effp->priv;
+
+ if (fwrite((char *)ibuf, sizeof(st_sample_t), *isamp, repeat->fp) !=
+ *isamp) {
+ st_fail("repeat: write error on temporary file\n");
+ return (ST_EOF);
+ }
+
+ *osamp = 0;
+
+ return (ST_SUCCESS);
+}
+
+int st_repeat_drain(eff_t effp, st_sample_t *obuf, st_size_t *osamp)
+{
+ size_t read = 0;
+ st_sample_t *buf;
+ st_size_t samp;
+ st_size_t done;
+
+ repeat_t repeat = (repeat_t)effp->priv;
+
+ if (repeat->first_drain == 1) {
+ repeat->first_drain = 0;
+
+ fseek(repeat->fp, 0L, SEEK_END);
+ repeat->total = ftell(repeat->fp);
+
+ if ((repeat->total % sizeof(st_sample_t)) != 0) {
+ st_fail("repeat: corrupted temporary file\n");
+ return (ST_EOF);
+ }
+
+ repeat->total /= sizeof(st_sample_t);
+ repeat->remaining = repeat->total;
+
+ fseek(repeat->fp, 0L, SEEK_SET);
+ }
+
+ if (repeat->remaining == 0) {
+ if (repeat->repeats == 0) {
+ *osamp = 0;
+ return (ST_SUCCESS);
+ }
+ else {
+ repeat->repeats--;
+ fseek(repeat->fp, 0L, SEEK_SET);
+ repeat->remaining = repeat->total;
+ }
+ }
+
+ if (repeat->remaining < 0) {
+ st_fail("repeat: unknown error");
+ return (ST_EOF);
+ }
+
+ if (*osamp > repeat->remaining) {
+ buf = obuf;
+ samp = repeat->remaining;
+
+ read = fread((char *)buf, sizeof(st_sample_t), samp,
+ repeat->fp);
+ if (read != samp) {
+ perror(strerror(errno));
+ st_fail("repeat1: read error on temporary file\n");
+ return(ST_EOF);
+ }
+
+ done = samp;
+ buf = &obuf[samp];
+ repeat->remaining = 0;
+
+ while (repeat->repeats > 0) {
+ repeat->repeats--;
+ fseek(repeat->fp, 0L, SEEK_SET);
+
+ if (repeat->total >= *osamp - done) {
+ samp = *osamp - done;
+ }
+ else {
+ samp = repeat->total;
+ if (samp > *osamp - done) {
+ samp = *osamp - done;
+ }
+ }
+
+ repeat->remaining = repeat->total - samp;
+
+ read = fread((char *)buf, sizeof(st_sample_t), samp,
+ repeat->fp);
+ if (read != samp) {
+ perror(strerror(errno));
+ st_fail("repeat2: read error on temporary "
+ "file\n");
+ return(ST_EOF);
+ }
+
+ done += samp;
+ if (done == *osamp) {
+ break;
+ }
+ }
+ *osamp = done;
+ }
+ else {
+ read = fread((char *)obuf, sizeof(st_sample_t), *osamp,
+ repeat->fp);
+ if (read != *osamp) {
+ perror(strerror(errno));
+ st_fail("repeat3: read error on temporary file\n");
+ return(ST_EOF);
+ }
+ repeat->remaining -= *osamp;
+ }
+
+ return (ST_SUCCESS);
+}
+
+int st_repeat_stop(eff_t effp)
+{
+ repeat_t repeat = (repeat_t)effp->priv;
+
+ fclose(repeat->fp);
+
+ return (ST_SUCCESS);
+}
+
--- a/src/st.h
+++ b/src/st.h
@@ -19,8 +19,8 @@
#include <stdlib.h>
#include "ststdint.h"
-/* Release 12.17.3 of libst */
-#define ST_LIB_VERSION_CODE 0x0c1104
+/* Release 12.17.5 of libst */
+#define ST_LIB_VERSION_CODE 0x0c1105
#define ST_LIB_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
typedef int32_t st_sample_t;
@@ -114,7 +114,7 @@
* Format information for input and output files.
*/
-#define ST_MAX_FILE_PRIVSIZE 1000
+#define ST_MAX_FILE_PRIVSIZE 1000
#define ST_MAX_EFFECT_PRIVSIZE 1000
#define ST_MAX_NLOOPS 8
--- a/src/st_i.h
+++ b/src/st_i.h
@@ -343,6 +343,13 @@
int st_vorbisstopwrite(ft_t ft);
#endif
+int st_voxstartread(ft_t ft);
+st_ssize_t st_voxread(ft_t ft, st_sample_t *buf, st_ssize_t len);
+int st_voxstopread(ft_t ft);
+int st_voxstartwrite(ft_t ft);
+st_ssize_t st_voxwrite(ft_t ft, st_sample_t *buf, st_ssize_t len);
+int st_voxstopwrite(ft_t ft);
+
int st_wavstartread(ft_t ft);
st_ssize_t st_wavread(ft_t ft, st_sample_t *buf, st_ssize_t len);
int st_wavstopread(ft_t ft);
@@ -512,6 +519,13 @@
int st_rate_flow(eff_t effp, st_sample_t *ibuf, st_sample_t *obuf,
st_size_t *isamp, st_size_t *osamp);
int st_rate_stop(eff_t effp);
+
+int st_repeat_getopts(eff_t effp, int argc, char **argv);
+int st_repeat_start(eff_t effp);
+int st_repeat_flow(eff_t effp, st_sample_t *ibuf, st_sample_t *obuf,
+ st_size_t *isamp, st_size_t *osamp);
+int st_repeat_drain(eff_t effp, st_sample_t *obuf, st_size_t *osamp);
+int st_repeat_stop(eff_t effp);
int st_resample_getopts(eff_t effp, int argc, char **argv);
int st_resample_start(eff_t effp);
--- /dev/null
+++ b/src/vox.c
@@ -1,0 +1,444 @@
+/************************************************************************
+ * SOX *
+ * *
+ * AUDIO FILE PROCESSING UTILITY *
+ * *
+ * Project : SOX *
+ * File : vox.c *
+ * Version : V12.17.4 *
+ * *
+ * Version History : V12.17.4 - Tony Seebregts *
+ * 5 May 2004 *
+ * 1. Original *
+ * *
+ * Description : SOX file format handler for Dialogic/Oki ADPCM VOX *
+ * files. *
+ * *
+ * Notes : 1. Based on the vox/devox code samples at: *
+ * *
+ * http://www.cis.ksu.edu/~tim/vox *
+ * *
+ * 2. Coded from SOX skeleton code supplied with SOX source. *
+ * *
+ * 3. Tested under: *
+ * - Windows 2000 SP3/Visual C++ V6.0 *
+ * - Windows 2000 SP3/Digital Mars V7.51 *
+ * *
+ ************************************************************************/
+
+ ///////////////////////////////////////////
+ // ORIGINAL SOX COPYRIGHT AND DISCLAIMER //
+///////////////////////////////////////////
+
+/************************************************************************
+ * July 5, 1991 *
+ * *
+ * Copyright 1991 Lance Norskog And Sundry Contributors *
+ * *
+ * This source code is freely redistributable and may be used for any *
+ * purpose. This copyright notice must be maintained. *
+ * *
+ * Lance Norskog And Sundry Contributors are not responsible for the *
+ * consequences of using this software. *
+ * *
+ ************************************************************************/
+
+ ///////////////////
+ // INCLUDE FILES //
+///////////////////
+
+#include "st_i.h"
+
+ //////////////
+ // TYPEDEFS //
+//////////////
+
+typedef struct voxstuff { struct { short last; // ADPCM codec state
+ short index;
+ } state;
+
+ struct { uint8_t byte; // write store
+ uint8_t flag;
+ } store;
+ } *vox_t;
+
+
+ ///////////////
+ // CONSTANTS //
+///////////////
+
+static short STEPSIZE[49] = { 16, 17, 19, 21, 23, 25, 28,
+ 31, 34, 37, 41, 45, 50, 55,
+ 60, 66, 73, 80, 88, 97, 107,
+ 118, 130, 143, 157, 173, 190, 209,
+ 230, 253, 279, 307, 337, 371, 408,
+ 449, 494, 544, 598, 658, 724, 796,
+ 876, 963, 1060,1166,1282,1411,1552
+ };
+
+static short STEPADJUST[8] = { -1,-1,-1,-1,2,4,6,8 };
+
+
+ /////////////////////////
+ // FUNCTION PROTOTYPES //
+/////////////////////////
+
+static uint8_t envox (short, vox_t);
+static short devox (uint8_t,vox_t);
+
+
+ ////////////////////
+ // IMPLEMENTATION //
+////////////////////
+
+/******************************************************************************
+ * Function : st_voxstartread
+ * Description: Initialises the file parameters and ADPCM codec state.
+ * Parameters : ft - file info structure
+ * Returns : int - ST_SUCCESS
+ * ST_EOF
+ * Exceptions :
+ * Notes : 1. VOX file format is 4-bit OKI ADPCM that decodes to
+ * to 12 bit signed linear PCM.
+ * 2. Dialogic only supports 6kHz, 8kHz and 11 kHz sampling
+ * rates but the codecs allows any user specified rate.
+ ******************************************************************************/
+
+int st_voxstartread (ft_t ft)
+ { vox_t state = (vox_t) ft->priv;
+
+
+ // ... setup file info
+
+ ft->file.buf = malloc (ST_BUFSIZ);
+
+ if (!ft->file.buf)
+ { st_fail_errno (ft,ST_ENOMEM,"Unable to allocate internal buffer memory");
+
+ return(ST_EOF);
+ }
+
+ ft->file.size = ST_BUFSIZ;
+ ft->file.count = 0;
+ ft->file.pos = 0;
+ ft->file.eof = 0;
+
+ ft->info.size = ST_SIZE_WORD;
+ ft->info.encoding = ST_ENCODING_SIGN2;
+ ft->info.channels = 1;
+
+ // ... initialise CODEC state
+
+ state->state.last = 0;
+ state->state.index = 0;
+ state->store.byte = 0;
+ state->store.flag = 0;
+
+ return (ST_SUCCESS);
+ }
+
+
+/******************************************************************************
+ * Function : st_voxread
+ * Description: Fills an internal buffer from the VOX file, converts the
+ * OKI ADPCM 4-bit samples to 12-bit signed PCM and then scales
+ * the samples to full range 16 bit PCM.
+ * Parameters : ft - file info structure
+ * buffer - output buffer
+ * length - size of output buffer
+ * Returns : int - number of samples returned in buffer
+ * Exceptions :
+ * Notes :
+ ******************************************************************************/
+
+st_ssize_t st_voxread (ft_t ft,st_sample_t *buffer,st_ssize_t length)
+ { vox_t state = (vox_t) ft->priv;
+ int count = 0;
+ int N;
+ uint8_t byte;
+ short word;
+
+ // ... round length down to nearest even number
+
+ N = length/2;
+ N *=2;
+
+ // ... loop until buffer full or EOF
+
+ while (count < N)
+ { // ... refill buffer
+
+ if (ft->file.pos >= ft->file.count)
+ { ft->file.count = st_read (ft,ft->file.buf,1,ft->file.size);
+ ft->file.pos = 0;
+
+ if (ft->file.count == 0)
+ break;
+ }
+
+ // ... decode two nibbles stored as a byte
+
+ byte = ft->file.buf[ft->file.pos++];
+
+ word = devox ((uint8_t) ((byte >> 4) & 0x0F),state);
+ *buffer++ = ST_SIGNED_WORD_TO_SAMPLE (word * 16);
+
+ word = devox ((uint8_t) (byte & 0x0F),state);
+ *buffer++ = ST_SIGNED_WORD_TO_SAMPLE (word * 16);
+
+ count += 2;
+ }
+
+ return count;
+ }
+
+/******************************************************************************
+ * Function : st_voxstopread
+ * Description: Frees the internal buffer allocated in st_voxstartread.
+ * Parameters : ft - file info structure
+ * Returns : int - ST_SUCCESS
+ * Exceptions :
+ * Notes :
+ ******************************************************************************/
+
+int st_voxstopread (ft_t ft)
+ { free (ft->file.buf);
+
+ return (ST_SUCCESS);
+ }
+
+
+/******************************************************************************
+ * Function : st_voxstartwrite
+ * Description: Initialises the file parameters and ADPCM codec state.
+ * Parameters : ft - file info structure
+ * Returns : int - ST_SUCCESS
+ * ST_EOF
+ * Exceptions :
+ * Notes : 1. VOX file format is 4-bit OKI ADPCM that decodes to
+ * to 12 bit signed linear PCM.
+ * 2. Dialogic only supports 6kHz, 8kHz and 11 kHz sampling
+ * rates but the codecs allows any user specified rate.
+ ******************************************************************************/
+
+int st_voxstartwrite (ft_t ft)
+ { vox_t state = (vox_t) ft->priv;
+
+
+ // ... setup file info
+
+ ft->file.buf = malloc (ST_BUFSIZ);
+
+ if (!ft->file.buf)
+ { st_fail_errno (ft,ST_ENOMEM,"Unable to allocate internal buffer memory");
+
+ return(ST_EOF);
+ }
+
+ ft->file.size = ST_BUFSIZ;
+ ft->file.count = 0;
+ ft->file.pos = 0;
+ ft->file.eof = 0;
+
+ ft->info.size = ST_SIZE_WORD;
+ ft->info.encoding = ST_ENCODING_SIGN2;
+ ft->info.channels = 1;
+
+ // ... initialise CODEC state
+
+ state->state.last = 0;
+ state->state.index = 0;
+ state->store.byte = 0;
+ state->store.flag = 0;
+
+ return (ST_SUCCESS);
+ }
+
+/******************************************************************************
+ * Function : st_voxwrite
+ * Description: Converts the supplied buffer to 12 bit linear PCM and encodes
+ * to OKI ADPCM 4-bit samples (packed a two nibbles per byte).
+ * Parameters : ft - file info structure
+ * buffer - output buffer
+ * length - size of output buffer
+ * Returns : int - ST_SUCCESS
+ * ST_EOF
+ * Exceptions :
+ * Notes :
+ ******************************************************************************/
+
+st_ssize_t st_voxwrite (ft_t ft,st_sample_t *buffer,st_ssize_t length)
+ { vox_t state = (vox_t) ft->priv;
+ int count = 0;
+ uint8_t byte = state->store.byte;
+ uint8_t flag = state->store.flag;
+ short word;
+
+ while (count < length)
+ { word = ST_SAMPLE_TO_SIGNED_WORD (*buffer++);
+ word /= 16;
+
+ byte <<= 4;
+ byte |= envox (word,state) & 0x0F;
+
+ flag++;
+ flag %= 2;
+
+ if (flag == 0)
+ { ft->file.buf[ft->file.count++] = byte;
+
+ if (ft->file.count >= ft->file.size)
+ { st_write (ft,ft->file.buf,1,ft->file.count);
+
+ ft->file.count = 0;
+ }
+ }
+
+ count++;
+ }
+
+ // ... keep last byte across calls
+
+ state->store.byte = byte;
+ state->store.flag = flag;
+
+ return (ST_SUCCESS);
+ }
+
+/******************************************************************************
+ * Function : st_voxstopwrite
+ * Description: Flushes any leftover samples and frees the internal buffer
+ * allocated in st_voxstartwrite.
+ * Parameters : ft - file info structure
+ * Returns : int - ST_SUCCESS
+ * Exceptions :
+ * Notes :
+ ******************************************************************************/
+
+int st_voxstopwrite (ft_t ft)
+ { vox_t state = (vox_t) ft->priv;
+ uint8_t byte = state->store.byte;
+ uint8_t flag = state->store.flag;
+
+ // ... flush remaining samples
+
+ if (flag != 0)
+ { byte <<= 4;
+ byte |= envox (0,state) & 0x0F;
+
+ ft->file.buf[ft->file.count++] = byte;
+ }
+
+ if (ft->file.count > 0)
+ st_write (ft,ft->file.buf,1,ft->file.count);
+
+ // ... free buffer
+
+ free (ft->file.buf);
+
+ return (ST_SUCCESS);
+ }
+
+/******************************************************************************
+ * Function : envox
+ * Description: Internal utility routine to encode 12 bit signed PCM to
+ * OKI ADPCM code
+ * Parameters : sample - 12 bit linear PCM sample
+ * state - CODEC state
+ * Returns : uint8_t - ADPCM nibble (in low order nibble)
+ * Exceptions :
+ * Notes :
+ ******************************************************************************/
+
+static uint8_t envox (short sample,vox_t state)
+ { uint8_t code;
+ short dn;
+ short ss;
+
+ ss = STEPSIZE[state->state.index];
+ code = 0x00;
+
+ if ((dn = sample - state->state.last) < 0)
+ { code = 0x08;
+ dn = -dn;
+ }
+
+ if (dn >= ss)
+ { code = code | 0x04;
+ dn -= ss;
+ }
+
+ if (dn >= ss/2)
+ { code = code | 0x02;
+ dn -= ss/2;
+ }
+
+ if (dn >= ss/4)
+ { code = code | 0x01;
+ }
+
+ // ... use decoder to set the estimate of last sample and adjust the step index
+
+ state->state.last = devox (code,state);
+
+ return (code);
+ }
+
+
+/******************************************************************************
+ * Function : devox
+ * Description: Internal utility routine to decode OKI ADPCM 4-bit samples to
+ * 12-bit signed PCM.
+ * Parameters : code - ADPCM code (nibble)
+ * state - CODEC state
+ * Returns : short - 12 bit signed PCM sample
+ * Exceptions :
+ * Notes :
+ ******************************************************************************/
+
+static short devox (uint8_t code,vox_t state)
+ { short dn;
+ short ss;
+ short sample;
+
+ ss = STEPSIZE[state->state.index];
+ dn = ss/8;
+
+ if (code & 0x01)
+ dn += ss/4;
+
+ if (code & 0x02)
+ dn += ss/2;
+
+ if (code & 0x04)
+ dn += ss;
+
+ if (code & 0x08)
+ dn = -dn;
+
+ sample = state->state.last + dn;
+
+ // ... clip to 12 bits
+
+ if (sample > 2047)
+ sample = 2047;
+
+ if (sample < -2048)
+ sample = -2048;
+
+ // ... adjust step size
+
+ state->state.last = sample;
+ state->state.index += STEPADJUST[code & 0x07];
+
+ if (state->state.index < 0)
+ state->state.index = 0;
+
+ if (state->state.index > 48)
+ state->state.index = 48;
+
+ // ... done
+
+ return (sample);
+ }
+