shithub: sox

--- a/libst.txt

+++ b/libst.txt

@@ -1,9 +1,7 @@

 ST(3)							    ST(3)

 NAME

        libst  -	 Sound	Tools  :  sound	 sample	 file and effects

        libraries.

@@ -58,18 +56,6 @@

        format operates from two data structures: a format  struc�

        ture, and a private structure.

-			 October 15 1996			1

-ST(3)							    ST(3)

        The format structure contains a list of control parameters

        for the sample: sampling rate, data  size  (bytes,  words,

        floats,	etc.),	encoding (unsigned, signed, logarithmic),

@@ -124,29 +110,23 @@

        getopts		   is  called  with  a	character  string

 			   argument list for the effect.

-			 October 15 1996			2

-ST(3)							    ST(3)

        start		   is called with the  signal  parameters

 			   for the input and output streams.

        flow		   is  called  with input and output data

 			   buffers, and (by reference) the  input

-			   and	output	data sizes.  It processes

-			   the	input  buffer  into  the   output

-			   buffer, and sets the size variables to

-			   the numbers of samples  actually  pro�

-			   cessed.   It is under no obligation to

-			   fill the output buffer.

+			   and output data buffer sizes.  It pro�

+			   cesses the input buffer into the  out�

+			   put	buffer,	 and  sets the size vari�

+			   ables to the numbers of samples  actu�

+			   ally	 processed.  It is under no obli�

+			   gation to read from the  input  buffer

+			   or  write  to the output buffer during

+			   the same call.  If  the  call  returns

+			   ST_EOF  then this should be used as an

+			   indication that this	 effect	 will  no

+			   longer  read	 any data and can be used

+			   to switch to drain mode sooner.

        drain		   is called  after  there  are	 no  more

 			   input  data	samples.   If  the effect

@@ -190,18 +170,6 @@

        once in a program.

        The program/library interface is pretty weak.  There's too

-			 October 15 1996			3

-ST(3)							    ST(3)

        much ad-hoc information which a	program	 is  supposed  to

        gather  up.   Sound  Tools  wants to be an object-oriented

        dataflow architecture.

@@ -208,57 +176,4 @@

-			 October 15 1996			4

+			 October 15 1996		    ST(3)

--- a/sox.1

+++ b/sox.1

@@ -287,8 +287,11 @@

 is mangling your sound samples.

 .SH FILE TYPES

 .I SoX

-uses the file extension of the input and output file to determine what

-type of file format to use.  This can be overridden by specifying the

+attempts to determine the file type of input files automatically by looking

+at the header of the audio file.  When it is unable to detect the file

+type or if its an output file

+then it uses the file extension of the file to determine what type of file

+format handler to use.  This can be overridden by specifying the

 "-t" option on the command line.

.P

 The input and output files may be read from standard in and out.  This

--- a/sox.txt

+++ b/sox.txt

@@ -1,9 +1,7 @@

 SoX(1)							   SoX(1)

 NAME

        sox - Sound eXchange : universal sound sample translator

@@ -26,7 +24,7 @@

 	   [ -c channels ] [ -x ] [ -e ]

        Effects:

-	   avg [ -l | -r ]

+	   avg [ -l | -r | -f | -b | n,n,...,n ]

 	   band [ -n ] center [ width ]

 	   bandpass frequency bandwidth

 	   bandreject frequency bandwidth

@@ -34,7 +32,7 @@

 		  -s | -t [ delay decay speed depth -s | -t ]

 	   compand attack1,decay1[,attack2,decay2...]

 		   in-dB1,out-dB1[,in-dB2,out-dB2...]

-		   [ gain ] [ initial-volume ]

+		   [ gain [ initial-volume [ delay ] ] ]

 	   copy

cut

 	   deemph

@@ -58,23 +56,11 @@

 	   polyphase [ -w < nut / ham > ]

 		     [	-width < long / short / # > ]

 		     [ -cutoff # ]

-			  July 24, 2000				1

-SoX(1)							   SoX(1)

 	   rate

 	   resample [ -qs | -q | -ql ] [ rolloff [ beta ] ]

 	   reverb gain-out reverb-time delay [ delay ... ]

 	   reverse

-	   speed factor

+	   speed [ -c ] factor

 	   split

 	   stat [ -s n ] [ -rms ] [ -v ] [ -d ]

 	   stretch [ factor [ window fade shift fading ]

@@ -125,17 +111,6 @@

 	    sox file.au file.wav

-			  July 24, 2000				2

-SoX(1)							   SoX(1)

        translates  a  sound  file  in SUN Sparc .AU format into a

        Microsoft .WAV file, while

@@ -190,18 +165,6 @@

 		 fidelity is not as important.	When uncompressed

 		 it has	 roughly  the  precision  of  16-bit  PCM

 		 audio.	  Popular version of ADPCM include G.726,

-			  July 24, 2000				3

-SoX(1)							   SoX(1)

 		 MS ADPCM, and IMA ADPCM.  The -a flag	has  dif�

 		 ferent	 meanings in different file handlers.  In

 		 .wav files it represents MS ADPCM files, in  all

@@ -256,18 +219,6 @@

        -p	 Run in preview mode and  run  fast.   This  will

 		 somewhat speed up sox when the output format has

 		 a different number of channels and  a	different

-			  July 24, 2000				4

-SoX(1)							   SoX(1)

 		 rate  than  the  input	 file.	 Currently,  this

 		 defaults to using the rate effect instead of the

 		 resample effect for sample rate changes.

@@ -288,184 +239,165 @@

 		 your sound samples.

 FILE TYPES

-       SoX  uses  the file extension of the input and output file

-       to determine what type of file format to use.  This can be

-       overridden  by  specifying  the "-t" option on the command

-       line.

+       SoX  attempts  to  determine  the file type of input files

+       automatically by looking at the header of the audio  file.

+       When  it	 is  unable  to detect the file type or if its an

+       output file then it uses the file extension of the file to

+       determine  what	type of file format handler to use.  This

+       can be overridden by specifying the  "-t"  option  on  the

+       command line.

-       The input and output files may be read  from  standard  in

-       and  out.  This is done by specifying '-' as the filename.

+       The  input  and	output files may be read from standard in

+       and out.	 This is done by specifying '-' as the	filename.

-       File formats which  have	 headers  are  checked,	 if  that

-       header  doesn't	seem  right,  the  program  exits with an

+       File  formats  which  have  headers  are	 checked, if that

+       header doesn't seem  right,  the	 program  exits	 with  an

        appropriate message.

        The following file formats are supported:

-       .8svx	 Amiga 8SVX musical instrument	description  for�

+       .8svx	 Amiga	8SVX  musical instrument description for�

 		 mat.

-       .aiff	 AIFF  files  used  on	Apple  IIc/IIgs	 and SGI.

-		 Note: the AIFF format	supports  only	one  SSND

+       .aiff	 AIFF files  used  on  Apple  IIc/IIgs	and  SGI.

+		 Note:	the  AIFF  format  supports only one SSND

 		 chunk.	  It  does  not	 support  multiple  sound

-		 chunks, or the 8SVX musical instrument	 descrip�

+		 chunks,  or the 8SVX musical instrument descrip�

 		 tion format.  AIFF files are multimedia archives

-		 and can have multiple audio and picture  chunks.

-		 You  may  need	 a separate archiver to work with

+		 and  can have multiple audio and picture chunks.

+		 You may need a separate archiver  to  work  with

 		 them.

        .au	 SUN Microsystems AU files.  There are apparently

-		 many  types  of  .au files; DEC has invented its

-		 own with  a  different	 magic	number	and  word

+		 many types of .au files; DEC  has  invented  its

+		 own  with  a  different  magic	 number	 and word

 		 order.	 The .au handler can read these files but

-		 will not write them.  Some .au files have  valid

-		 AU  headers  and  some	 do  not.  The latter are

-		 probably original SUN	u-law  8000  hz	 samples.

-			  July 24, 2000				5

-SoX(1)							   SoX(1)

-		 These	can  be	 dealt	with using the .ul format

+		 will  not write them.	Some .au files have valid

+		 AU headers and some  do  not.	 The  latter  are

+		 probably  original  SUN  u-law	 8000 hz samples.

+		 These can be dealt with  using	 the  .ul  format

 		 (see below).

        .avr	 Audio Visual Research

-		 The AVR format is produced by a number	 of  com�

+		 The  AVR  format is produced by a number of com�

 		 mercial packages on the Mac.

        .cdr	 CD-R

-		 CD-R  files  are used in mastering music on Com�

-		 pact Disks.  The audio data on a CD-R disk is	a

-		 raw  audio  file  with a format of stereo 16-bit

+		 CD-R files are used in mastering music	 on  Com�

+		 pact  Disks.  The audio data on a CD-R disk is a

+		 raw audio file with a format  of  stereo  16-bit

 		 signed samples at a 44khz sample rate.	 There is

-		 a  special blocking/padding oddity at the end of

-		 the audio file and is why it needs its own  han�

+		 a special blocking/padding oddity at the end  of

+		 the  audio file and is why it needs its own han�

 		 dler.

        .cvs	 Continuously Variable Slope Delta modulation

-		 Used  to  compress speech audio for applications

+		 Used to compress speech audio	for  applications

 		 such as voice mail.

        .dat	 Text Data files

-		 These files contain a textual representation  of

-		 the  sample  data.   There  is	 one  line at the

+		 These	files contain a textual representation of

+		 the sample data.   There  is  one  line  at  the

 		 beginning that contains the sample rate.  Subse�

-		 quent	lines contain two numeric data items: the

+		 quent lines contain two numeric data items:  the

 		 time since the beginning of the first sample and

 		 the sample value.  Values are normalized so that

-		 the maximum and  minimum  are	1.00  and  -1.00.

-		 This  file  format  can  be  used to create data

-		 files for external programs such as FFT  analyz�

-		 ers  or  graph routines.  SoX can also convert a

-		 file in this format back into one of  the  other

+		 the  maximum  and  minimum  are  1.00 and -1.00.

+		 This file format can  be  used	 to  create  data

+		 files	for external programs such as FFT analyz�

+		 ers or graph routines.	 SoX can also  convert	a

+		 file  in  this format back into one of the other

 		 file formats.

        .gsm	 GSM 06.10 Lossy Speech Compression

-		 A  standard for compressing speech which is used

-		 in the Global Standard for Mobil  telecommunica�

-		 tions	(GSM).	Its good for its purpose, shrink�

-		 ing audio data size, but it will introduce  lots

-		 of  noise  when  a given sound sample is encoded

+		 A standard for compressing speech which is  used

+		 in  the Global Standard for Mobil telecommunica�

+		 tions (GSM).  Its good for its purpose,  shrink�

+		 ing  audio data size, but it will introduce lots

+		 of noise when a given sound  sample  is  encoded

 		 and decoded multiple times.  This format is used

-		 by  some  voice mail applications.  It is rather

+		 by some voice mail applications.  It  is  rather

 		 CPU intensive.

 		 GSM in sox is optional and requires access to an

-		 external  GSM	library.  To see if there is sup�

-		 port for gsm run sox -h and look  for	it  under

+		 external GSM library.	To see if there	 is  sup�

+		 port  for  gsm	 run sox -h and look for it under

 		 the list of supported file formats.

-       .hcom	 Macintosh  HCOM  files.   These are (apparently)

+       .hcom	 Macintosh HCOM files.	 These	are  (apparently)

 		 Mac FSSD files with some variant of Huffman com�

-		 pression.   The Macintosh has wacky file formats

-		 and this format handler apparently doesn't  han�

-		 dle all the ones it should.  Mac users will need

-		 your usual arsenal of file  converters	 to  deal

+		 pression.  The Macintosh has wacky file  formats

+		 and   this  format  handler  apparently  doesn't

+		 handle all the ones it should.	 Mac  users  will

+		 need  your  usual  arsenal of file converters to

+		 deal with an HCOM file under Unix or DOS.

-			  July 24, 2000				6

-SoX(1)							   SoX(1)

-		 with an HCOM file under Unix or DOS.

        .maud	 An Amiga format

 		 An IFF-conform sound file type, registered by MS

-		 MacroSystem Computer GmbH, published along  with

-		 the  "Toccata"	 sound-card on the Amiga.  Allows

-		 8bit linear, 16bit linear, A-Law, u-law in  mono

+		 MacroSystem  Computer GmbH, published along with

+		 the "Toccata" sound-card on the  Amiga.   Allows

+		 8bit  linear, 16bit linear, A-Law, u-law in mono

 		 and stereo.

+       .ogg	 Ogg Vorbis Compressed Audio.

+		 Ogg Vorbis is a open, patent-free codec designed

+		 for  compressing  music and streaming audio.  It

+		 is similar to MP3, VQF,  AAC,	and  other  lossy

+		 formats.  sox can decode all types of Ogg Vorbis

+		 files, but can only encode at 128 kbps.   Decod�

+		 ing  is  somewhat  CPU intensive and encoding is

+		 very CPU intensive.

+		 Ogg Vorbis  in	 sox  is  optional  and	 requires

+		 access to external Ogg Vorbis libraries.  To see

+		 if there is support for Ogg Vorbis  run  sox  -h

+		 and look for it under the list of supported file

+		 formats as "vorbis".

        ossdsp	 OSS /dev/dsp device driver

 		 This is a pseudo-file type and can be optionally

-		 compiled into Sox.  Run sox -h	 to  see  if  you

-		 have  support	for  this  file	 type.	When this

-		 driver is used it allows you to open up the  OSS

-		 /dev/dsp  file	 and configure it to use the same

-		 data format as passed in to  /fBSoX.	It  works

-		 for  both  playing  and recording sound samples.

-		 When playing sound files it attempts to  set  up

-		 the  OSS  driver  to  use the same format as the

-		 input file.  It is suggested to always	 override

-		 the  output  values  to  use the highest quality

+		 compiled  into	 Sox.	Run  sox -h to see if you

+		 have support for  this	 file  type.   When  this

+		 driver	 is used it allows you to open up the OSS

+		 /dev/dsp file and configure it to use	the  same

+		 data  format  as  passed in to /fBSoX.	 It works

+		 for both playing and  recording  sound	 samples.

+		 When  playing	sound files it attempts to set up

+		 the OSS driver to use the  same  format  as  the

+		 input	file.  It is suggested to always override

+		 the output values to  use  the	 highest  quality

 		 samples your sound card can handle.  Example: -t

 		 ossdsp -w -s /dev/dsp

        .sf	 IRCAM Sound Files.

-		 Sound	Files are used by academic music software

-		 such as the  CSound  package,	and  the  MixView

+		 Sound Files are used by academic music	 software

+		 such  as  the	CSound	package,  and the MixView

 		 sound sample editor.

        .sph

-		 SPHERE	 (SPeech HEader Resources) is a file for�

+		 SPHERE (SPeech HEader Resources) is a file  for�

 		 mat defined by NIST (National Institute of Stan�

-		 dards	and  Technology)  and is used with speech

-		 audio.	 SoX can read these files when they  con�

-		 tain  ulaw  and  PCM  data.   It will ignore any

-		 header information that says the  data	 is  com�

+		 dards and Technology) and is  used  with  speech

+		 audio.	  SoX can read these files when they con�

+		 tain ulaw and PCM  data.   It	will  ignore  any

+		 header	 information  that  says the data is com�

 		 pressed using shorten compression and will treat

 		 the data as either ulaw or PCM.  This will allow

-		 SoX  and  the command line shorten program to be

-		 ran together using pipes to uncompress the  data

-		 and  then pass the result to SoX for processing.

+		 SoX and the command line shorten program  to  be

+		 ran  together using pipes to uncompress the data

+		 and then pass the result to SoX for  processing.

        .smp	 Turtle Beach SampleVision files.

-		 SMP files are for use with  the  PC-DOS  package

-		 SampleVision  by  Turtle  Beach  Softworks. This

-		 package is for	 communication	to  several  MIDI

-		 samplers.  All sample rates are supported by the

-		 package, although not all are supported  by  the

-		 samplers  themselves.	Currently loop points are

+		 SMP  files  are  for use with the PC-DOS package

+		 SampleVision by  Turtle  Beach	 Softworks.  This

+		 package  is  for  communication  to several MIDI

+		 samplers. All sample rates are supported by  the

+		 package,  although  not all are supported by the

+		 samplers themselves. Currently loop  points  are

 		 ignored.

        .snd

-		 Under DOS this file format is the  same  as  the

-		 .sndt	format.	  Under all other platforms it is

-			  July 24, 2000				7

-SoX(1)							   SoX(1)

+		 Under	DOS  this  file format is the same as the

+		 .sndt format.	Under all other platforms  it  is

 		 the same as the .au format.

        .sndt	 SoundTool files.

@@ -473,157 +405,156 @@

        sunau	 Sun /dev/audio device driver

 		 This is a pseudo-file type and can be optionally

-		 compiled  into	 Sox.	Run  sox -h to see if you

-		 have support for  this	 file  type.   When  this

-		 driver	 is  used  it allows you to open up a Sun

+		 compiled into Sox.  Run sox -h	 to  see  if  you

+		 have  support	for  this  file	 type.	When this

+		 driver is used it allows you to open  up  a  Sun

 		 /dev/audio file and configure it to use the same

-		 data  type  as	 passed	 in to Sox.  It works for

-		 both playing and recording sound samples.   When

-		 playing  sound	 files	it attempts to set up the

+		 data type as passed in to  Sox.   It  works  for

+		 both  playing and recording sound samples.  When

+		 playing sound files it attempts to  set  up  the

 		 audio driver to use the same format as the input

-		 file.	 It  is	 suggested to always override the

+		 file.	It is suggested to  always  override  the

 		 output values to use the highest quality samples

-		 your  hardware can handle.  Example: -t sunau -w

+		 your hardware can handle.  Example: -t sunau  -w

 		 -s /dev/audio or -t sunau -U -c 1 /dev/audio for

 		 older sun equipment.

        .txw	 Yamaha TX-16W sampler.

-		 A  file  format  from a Yamaha sampling keyboard

-		 which wrote IBM-PC format 3.5"	 floppies.   Han�

+		 A file format from a  Yamaha  sampling	 keyboard

+		 which	wrote  IBM-PC format 3.5" floppies.  Han�

 		 dles reading of files which do not have the sam�

-		 ple rate field set to one  of	the  expected  by

-		 looking  at  some other bytes in the attack/loop

-		 length fields, and defaulting to  33kHz  if  the

+		 ple  rate  field  set	to one of the expected by

+		 looking at some other bytes in	 the  attack/loop

+		 length	 fields,  and  defaulting to 33kHz if the

 		 sample rate is still unknown.

        .vms	 More info to come.

-		 Used  to  compress speech audio for applications

+		 Used to compress speech audio	for  applications

 		 such as voice mail.

        .voc	 Sound Blaster VOC files.

-		 VOC files are	multi-part  and	 contain  silence

-		 parts,	 looping,  and different sample rates for

-		 different chunks.  On input, the  silence  parts

-		 are  filled  out, loops are rejected, and sample

-		 data  with  a	new  sample  rate  is	rejected.

-		 Silence  with	a different sample rate is gener�

-		 ated appropriately.  On output, silence  is  not

+		 VOC  files  are  multi-part  and contain silence

+		 parts, looping, and different sample  rates  for

+		 different  chunks.   On input, the silence parts

+		 are filled out, loops are rejected,  and  sample

+		 data	with  a	 new  sample  rate  is	rejected.

+		 Silence with a different sample rate  is  gener�

+		 ated  appropriately.	On output, silence is not

 		 detected, nor are impossible sample rates.

+       vorbis	 See .ogg format.

        .wav	 Microsoft .WAV RIFF files.

-		 These	appear	to  be very similar to IFF files,

-		 but not the same.  They  are  the  native  sound

+		 These appear to be very similar  to  IFF  files,

+		 but  not  the	same.	They are the native sound

 		 file format of Windows.  (Obviously, Windows was

-		 of such incredible importance	to  the	 computer

-		 industry  that it just had to have its own sound

+		 of  such  incredible  importance to the computer

+		 industry that it just had to have its own  sound

 		 file format.)	Normally .wav files have all for�

-		 matting  information in their headers, and so do

-		 not need any format  options  specified  for  an

-		 input	file.  If any are, they will override the

-			  July 24, 2000				8

-SoX(1)							   SoX(1)

-		 file header, and you  will  be	 warned	 to  this

+		 matting information in their headers, and so  do

+		 not  need  any	 format	 options specified for an

+		 input file. If any are, they will  override  the

+		 file  header,	and  you  will	be warned to this

 		 effect.  You had better know what you are doing!

-		 Output format options will cause a  format  con�

-		 version,  and	the  .wav  will written appropri�

-		 ately.	 Sox currently can read PCM, ULAW,  ALAW,

-		 MS  ADPCM, and IMA (or DVI) ADPCM.  It can write

+		 Output	 format	 options will cause a format con�

+		 version, and the  .wav	 will  written	appropri�

+		 ately.	  Sox currently can read PCM, ULAW, ALAW,

+		 MS ADPCM, and IMA (or DVI) ADPCM.  It can  write

 		 all of these formats including (NEW!)	the ADPCM

 		 encoding.

        .wve	 Psion 8-bit alaw

-		 These	are  8-bit a-law 8khz sound files used on

+		 These are 8-bit a-law 8khz sound files	 used  on

 		 the Psion palmtop portable computer.

        .raw	 Raw files (no header).

-		 The sample rate, size	(byte,	word,  etc),  and

+		 The  sample  rate,  size  (byte, word, etc), and

 		 encoding (signed, unsigned, etc.)  of the sample

-		 file must be  given.	The  number  of	 channels

+		 file  must  be	 given.	  The  number of channels

 		 defaults to 1.

        .ub, .sb, .uw, .sw, .ul, .al, .sl

-		 These	are  several  suffices	which  serve as a

-		 shorthand for raw files with a	 given	size  and

-		 encoding.   Thus, ub, sb, uw, sw, ul and sl cor�

-		 respond  to  "unsigned	 byte",	 "signed   byte",

-		 "unsigned  word",  "signed word", "ulaw" (byte),

-		 "alaw" (byte), and "signed  long".   The  sample

-		 rate  defaults to 8000 hz if not explicitly set,

-		 and the number of channels (as always)	 defaults

-		 to  1.	 There are lots of Sparc samples floating

-		 around in u-law format with no header and  fixed

-		 at  a	sample	rate  of 8000 hz.  (Certain sound

+		 These are several  suffices  which  serve  as	a

+		 shorthand  for	 raw  files with a given size and

+		 encoding.  Thus, ub, sb, uw, sw, ul and sl  cor�

+		 respond   to  "unsigned  byte",  "signed  byte",

+		 "unsigned word", "signed word",  "ulaw"  (byte),

+		 "alaw"	 (byte),  and  "signed long".  The sample

+		 rate defaults to 8000 hz if not explicitly  set,

+		 and  the number of channels (as always) defaults

+		 to 1.	There are lots of Sparc samples	 floating

+		 around	 in u-law format with no header and fixed

+		 at a sample rate of  8000  hz.	  (Certain  sound

 		 management software cheerfully ignores the head�

-		 ers.)	 Similarly,  most  Mac sound files are in

+		 ers.)	Similarly, most Mac sound  files  are  in

 		 unsigned byte format with a sample rate of 11025

 		 or 22050 hz.

-       .auto	 This  is  a  ``meta-type'': specifying this type

-		 for an input file triggers some code that  tries

-		 to  guess  the	 real  type  by looking for magic

-		 words in the  header.	 If  the  type	can't  be

-		 guessed,  the	program	 exits with an error mes�

-		 sage.	The input must be a  plain  file,  not	a

+       .auto	 This is a ``meta-type'':  specifying  this  type

+		 for  an input file triggers some code that tries

+		 to guess the real  type  by  looking  for  magic

+		 words	in  the	 header.   If  the  type can't be

+		 guessed, the program exits with  an  error  mes�

+		 sage.	 The  input  must  be a plain file, not a

 		 pipe.	This type can't be used for output files.

 EFFECTS

        Multiple effects may be applied to the audio data by spec�

-       ifying  them  one  after another at the end of the command

+       ifying them one after another at the end	 of  the  command

        line.

-       avg [ -l | -r ]

-		 Reduce the number of channels by  averaging  the

-		 samples,  or  duplicate channels to increase the

-		 number of channels.  This  effect  is	automati�

-		 cally	used  when  the	 number of input channels

+       avg [ -l | -r | -f | -b | n,n,...,n ]

+		 Reduce	 the  number of channels by averaging the

+		 samples, or duplicate channels to  increase  the

+		 number	 of  channels.	 This effect is automati�

+		 cally used when the  number  of  input	 channels

+		 differ from the number of output channels.  When

+		 reducing the number of channels it  is	 possible

+		 to  manually  specify the avg effect and use the

+		 -l, -r, -f, or -b options  to	select	only  the

+		 left,	right,	front, or back channel(s) for the

+		 output instead of averaging the  channels.   The

+		 -f  and  -b  options  maintain left/right stereo

+		 separation; use the avg effect twice to select a

+		 single channel.

+		 The avg effect can also be invoked with up to 16

+		 double-precision numbers, which specify the pro�

+		 portion  of  each  input  channel  that is to be

+		 mixed into each output channel.  In  two-channel

+		 mode, 4 numbers are given: l->l, l->r, r->l, and

+		 r->r, respectively.  In four-channel  mode,  the

+		 first	4  numbers  give  the proportions for the

+		 left-front output channel, as	follows:  lf->lf,

+		 rf->lf, lb->lf, and rb->rf.  The next 4 give the

+		 right-front output in the same order, then left-

+		 back and right-back.

+		 It  is	 also  possible	 to use the 16 numbers to

+		 expand or reduce the channel count; just specify

+		 0 for unused channels.	 Finally, if fewer than 4

+		 numbers are given, certain special abbreviations

+		 may be invoked; see the source code for details.

-			  July 24, 2000				9

-SoX(1)							   SoX(1)

-		 differ from the number of output channels.  When

-		 reducing  the	number of channels it is possible

-		 to manually specify the avg effect and	 use  the

-		 -l  and  -r  options  to select only the left or

-		 right channel for the output instead of  averag�

-		 ing the two channels.

        band [ -n ] center [ width ]

-		 Apply	 a   band-pass	 filter.   The	frequency

+		 Apply	a  band-pass   filter.	  The	frequency

 		 response drops logarithmically around the center

-		 frequency.   The  width  gives	 the slope of the

-		 drop.	The frequencies at  center  +  width  and

-		 center	 -  width  will be half of their original

+		 frequency.  The width gives  the  slope  of  the

+		 drop.	 The  frequencies  at  center + width and

+		 center - width will be half  of  their	 original

 		 amplitudes.  Band defaults to a mode oriented to

 		 pitched signals, i.e. voice, singing, or instru�

-		 mental music.	The -n (for  noise)  option  uses

-		 the   alternate  mode	for  un-pitched	 signals.

-		 Warning: -n introduces	 a  power-gain	of  about

-		 11dB  in  the	filter, so beware of output clip�

+		 mental	 music.	  The  -n (for noise) option uses

+		 the  alternate	 mode  for  un-pitched	 signals.

+		 Warning:  -n  introduces  a  power-gain of about

+		 11dB in the filter, so beware	of  output  clip�

 		 ping.	Band introduces noise in the shape of the

 		 filter, i.e. peaking at the center frequency and

-		 settling around it.  See filter for  a	 bandpass

+		 settling  around  it.	See filter for a bandpass

 		 effect with steeper shoulders.

        bandpass frequency bandwidth

-		 Butterworth  bandpass filter. Description coming

+		 Butterworth bandpass filter. Description  coming

 		 soon!

        bandreject frequency bandwidth

@@ -633,10 +564,10 @@

        chorus gain-in gain-out delay decay speed depth

 	      -s | -t [ delay decay speed depth -s | -t ... ]

-		 Add  a chorus to a sound sample.  Each quadtuple

-		 delay/decay/speed/depth gives the delay in  mil�

-		 liseconds  and	 the  decay (relative to gain-in)

-		 with a modulation speed in  Hz	 using	depth  in

+		 Add a chorus to a sound sample.  Each	quadtuple

+		 delay/decay/speed/depth  gives the delay in mil�

+		 liseconds and the decay  (relative  to	 gain-in)

+		 with  a  modulation  speed  in Hz using depth in

 		 milliseconds.	The modulation is either sinodial

 		 (-s) or triangular (-t).  Gain-out is the volume

 		 of the output.

@@ -645,51 +576,50 @@

 	       in-dB1,out-dB1[,in-dB2,out-dB2...]

-	       [gain] [initial-volume]

-		 Compand  (compress  or expand) the dynamic range

-		 of a sample.  The attack and decay time  specify

-		 the  integration  time	 over  which the absolute

-		 value of  the	input  signal  is  integrated  to

-		 determine  its volume.	 Where more than one pair

-		 of attack/decay parameters are	 specified,  each

+	       [gain [initial-volume [delay ] ] ]

+		 Compand (compress or expand) the  dynamic  range

+		 of  a sample.	The attack and decay time specify

+		 the integration time  over  which  the	 absolute

+		 value	of  the	 input	signal	is  integrated to

+		 determine its volume; attacks refer to increases

+		 in  volume and decays refer to decreases.  Where

+		 more than one pair  of	 attack/decay  parameters

+		 are  specified,  each	channel	 is treated sepa�

+		 rately and the number of pairs must  agree  with

+		 the number of input channels.	The second param�

+		 eter is a list	 of  points  on	 the  compander's

+		 transfer  function  specified	in dB relative to

+		 the  maximum  possible	 signal	 amplitude.   The

+		 input	values	must  be in a strictly increasing

+		 order but the transfer function does not have to

+		 be monotonically rising.  The special value -inf

+		 may be used to indicate that  the  input  volume

+		 should	 be associated output volume.  The points

+		 -inf,-inf and 0,0 are assumed; the latter may be

+		 overridden, but the former may not.

+		 The third (optional) parameter is a postprocess�

+		 ing gain in dB which is applied after	the  com�

+		 pression  has taken place; the fourth (optional)

+		 parameter is an initial volume to be assumed for

+		 each  channel when the effect starts.	This per�

+		 mits the user to supply  a  nominal  level  ini�

+		 tially,  so that, for example, a very large gain

+		 is not applied to initial signal  levels  before

+		 the  companding  action has begun to operate: it

+		 is quite probable that in  such  an  event,  the

+		 output	 would be severely clipped while the com�

+		 pander gain properly adjusts itself.

+		 The fifth (optional) parameter	 is  a	delay  in

+		 seconds.   The	 input signal is analyzed immedi�

+		 ately	to  control  the  compander,  but  it  is

+		 delayed before being fed to the volume adjuster.

+		 Specifying a delay approximately  equal  to  the

+		 attack/decay	times  allows  the  compander  to

+		 effectively operate  in  a  "predictive"  rather

+		 than a reactive mode.

-			  July 24, 2000			       10

-SoX(1)							   SoX(1)

-		 channel  is treated separately and the number of

-		 pairs must agree with the number of input  chan�

-		 nels.	 The second parameter is a list of points

-		 on the compander's transfer  function	specified

-		 in  dB	 relative  to the maximum possible signal

-		 amplitude.   The  input  values  must	be  in	a

-		 strictly increasing order but the transfer func�

-		 tion does not have to be  monotonically  rising.

-		 The  special  value -inf may be used to indicate

-		 that the input volume should be associated  out�

-		 put  volume.	The  points -inf,-inf and 0,0 are

-		 assumed; the latter may be overridden,	 but  the

-		 former	 may not.  The third (optional) parameter

-		 is a postprocessing gain in dB which is  applied

-		 after	the  compression  has  taken  place;  the

-		 fourth (optional) parameter is an initial volume

-		 to  be	 assumed for each channel when the effect

-		 starts.  This permits the user to supply a nomi�

-		 nal  level  initially,	 so  that, for example, a

-		 very large gain is not applied to initial signal

-		 levels before the companding action has begun to

-		 operate: it is quite probable that  in	 such  an

-		 event,	 the  output  would  be	 severely clipped

-		 while	the  compander	gain   properly	  adjusts

-		 itself.

        copy	 Copy the input file to the output file.  This is

 		 the default effect if both files have	the  same

 		 sampling rate.

@@ -718,18 +648,6 @@

 		 decay (relative to gain-in) of that echo.  Gain-

 		 out is the volume of the output.

-			  July 24, 2000			       11

-SoX(1)							   SoX(1)

        echos gain-in gain-out delay decay [ delay decay ... ]

 		 Add a sequence of echos to a sound sample.  Each

 		 delay/decay part gives the delay in milliseconds

@@ -747,65 +665,54 @@

 		 volume over fade-in-length seconds.   Specify	0

 		 seconds if no fade-in is wanted.

-		 For  fade-outs,  the audio data will be trucated

+		 For  fade-outs, the audio data will be truncated

 		 at the stop-time and the volume will  be  ramped

 		 from full volume down to 0 starting at fade-out-

 		 length seconds before the stop-time.	No  fade-

 		 out is performed if these options are not speci�

-		 fied.

+		 fied.	All times can be  specified  in	 seconds,

+		 mm:ss.frac, or hh:mm:ss.frac format.

-		 An optional type can be specified to change  the

-		 type  of envelope.  Choices are q for quarter of

-		 a sinewave, h for half a sinewave, t for  linear

-		 slope,	 l  for	 logarithmic,  and p for inverted

+		 An  optional type can be specified to change the

+		 type of envelope.  Choices are q for quarter  of

+		 a  sinewave, h for half a sinewave, t for linear

+		 slope, l for logarithmic,  and	 p  for	 inverted

 		 parabola.  The default is a linear slope.

        filter [ low ]-[ high ] [ window-len [ beta ] ]

 		 Apply	a  Sinc-windowed  lowpass,  highpass,  or

-		 bandpass  filter  of  given window length to the

-		 signal.  low refers  to  the  frequency  of  the

-		 lower	6dB corner of the filter.  high refers to

-		 the frequency of the upper  6dB  corner  of  the

+		 bandpass filter of given window  length  to  the

+		 signal.   low	refers	to  the	 frequency of the

+		 lower 6dB corner of the filter.  high refers  to

+		 the  frequency	 of  the  upper 6dB corner of the

 		 filter.

-		 A  lowpass  filter  is	 obtained  by leaving low

-		 unspecified,  or  0.	A  highpass   filter   is

-		 obtained  by  leaving high unspecified, or 0, or

-		 greater than or equal to the Nyquist  frequency.

+		 A lowpass filter  is  obtained	 by  leaving  low

+		 unspecified,	or   0.	  A  highpass  filter  is

+		 obtained by leaving high unspecified, or  0,  or

+		 greater  than or equal to the Nyquist frequency.

 		 The window-len, if unspecified, defaults to 128.

-		 Longer windows give a	sharper	 cutoff,  smaller

+		 Longer	 windows  give	a sharper cutoff, smaller

 		 windows a more gradual cutoff.

-		 The  beta, if unspecified, defaults to 16.  This

-		 selects a Kaiser window.  You can select a  Nut�

-		 tall  window by specifying anything <= 2.0 here.

-		 For more discussion  of  beta,	 look  under  the

+		 The beta, if unspecified, defaults to 16.   This

+		 selects  a Kaiser window.  You can select a Nut�

+		 tall window by specifying anything <= 2.0  here.

+		 For  more  discussion	of  beta,  look under the

 		 resample effect.

-			  July 24, 2000			       12

-SoX(1)							   SoX(1)

        flanger gain-in gain-out delay decay speed < -s | -t >

-		 Add  a	 flanger  to a sound sample.  Each triple

-		 delay/decay/speed gives the delay  in	millisec�

-		 onds  and the decay (relative to gain-in) with a

+		 Add a flanger to a sound  sample.   Each  triple

+		 delay/decay/speed  gives  the delay in millisec�

+		 onds and the decay (relative to gain-in) with	a

 		 modulation  speed  in	Hz.   The  modulation  is

-		 either	 sinodial (-s) or triangular (-t).  Gain-

+		 either sinodial (-s) or triangular (-t).   Gain-

 		 out is the volume of the output.

        highp frequency

-		 Apply a single pole recursive high-pass  filter.

+		 Apply	a single pole recursive high-pass filter.

 		 The  frequency	 response  drops  logarithmically

 		 with I frequency in the middle of the drop.  The

 		 slope of the filter is quite gentle.  See filter

@@ -812,87 +719,75 @@

 		 for a highpass effect with sharper cutoff.

        highpass frequency

-		 Butterworth highpass filter.	Description  com�

+		 Butterworth  highpass	filter.	 Description com�

 		 ming soon!

        lowp frequency

-		 Apply	a  single pole recursive low-pass filter.

+		 Apply a single pole recursive	low-pass  filter.

 		 The  frequency	 response  drops  logarithmically

-		 with  frequency  in the middle of the drop.  The

+		 with frequency in the middle of the  drop.   The

 		 slope of the filter is quite gentle.  See filter

 		 for a lowpass effect with sharper cutoff.

        lowpass frequency

-		 Butterworth  lowpass filter.  Description coming

+		 Butterworth lowpass filter.  Description  coming

 		 soon!

        map	 Display a list of loops in a sample, and miscel�

 		 laneous loop info.

-       mask	 Add  "masking	noise"	to  signal.   This effect

-		 deliberately adds white  noise	 to  a	sound  in

-		 order	to  mask quantization effects, created by

-		 the process of playing a  sound  digitally.   It

-		 tends	to  mask buzzing voices, for example.  It

-		 adds 1/2 bit of noise to the sound file  at  the

+       mask	 Add "masking  noise"  to  signal.   This  effect

+		 deliberately  adds  white  noise  to  a sound in

+		 order to mask quantization effects,  created  by

+		 the  process  of  playing a sound digitally.  It

+		 tends to mask buzzing voices, for  example.   It

+		 adds  1/2  bit of noise to the sound file at the

 		 output bit depth.

        pan direction

-		 Pan  the sound of an audio file from one channel

+		 Pan the sound of an audio file from one  channel

 		 to another.  This is done by changing the volume

-		 of  the  input	 channels so that it fades out on

-		 one channel and fades-in  on  another.	  If  the

-		 number	 of  input channels is different then the

+		 of the input channels so that it  fades  out  on

+		 one  channel  and  fades-in  on another.  If the

+		 number of input channels is different	then  the

 		 number of output channels then this effect tries

-		 to  intelligently handle this.	 For instance, if

+		 to intelligently handle this.	For instance,  if

 		 the input contains 1 channel and the output con�

-		 tains	2 channels, then it will create the miss�

-		 ing channel itself.  The direction  is	 a  value

-		 from  -1.0 to 1.0.  -1.0 represents far left and

-		 1.0 represents far right.   Numbers  in  between

-			  July 24, 2000			       13

-SoX(1)							   SoX(1)

+		 tains 2 channels, then it will create the  miss�

+		 ing  channel  itself.	 The direction is a value

+		 from -1.0 to 1.0.  -1.0 represents far left  and

+		 1.0  represents  far  right.  Numbers in between

 		 will start the pan effect without totally muting

 		 the opposite channel.

        phaser gain-in gain-out delay decay speed < -s | -t >

-		 Add a phaser to a  sound  sample.   Each  triple

-		 delay/decay/speed  gives  the delay in millisec�

-		 onds and the decay (relative to gain-in) with	a

+		 Add  a	 phaser	 to  a sound sample.  Each triple

+		 delay/decay/speed gives the delay  in	millisec�

+		 onds  and the decay (relative to gain-in) with a

 		 modulation  speed  in	Hz.   The  modulation  is

-		 either sinodial (-s) or  triangular  (-t).   The

+		 either	 sinodial  (-s)	 or triangular (-t).  The

 		 decay should be less than 0.5 to avoid feedback.

 		 Gain-out is the volume of the output.

        pick [ -1 | -2 | -3 | -4 | -l | -r ]

-		 Select the left or right  channel  of	a  stereo

-		 sample,  or  one  of  four channels in a quadro�

-		 phonic sample. The -l and -r  options	represent

-		 either	  the  left  or	 right	channel.   It  is

-		 required that you use	the  -c	 1  command  line

+		 Select	 the  left  or	right channel of a stereo

+		 sample, or one of four	 channels  in  a  quadro�

+		 phonic	 sample.  The -l and -r options represent

+		 either	 the  left  or	right  channel.	  It   is

+		 required  that	 you  use  the	-c 1 command line

 		 option in order to force the output file to con�

 		 tain only 1 channel.

        pitch shift [ width interpole fade ]

-		 Change the pitch of file without  affecting  its

+		 Change	 the  pitch of file without affecting its

 		 duration by cross-fading shifted samples.  shift

 		 is given in cents. Use a positive value to shift

-		 to  treble,  negative	value  to  shift to bass.

-		 Default shift is 0.  width of window is  in  ms.

-		 Default  width is 20ms. Try 30ms to lower pitch,

-		 and 10ms to raise pitch.  interpole option,  can

+		 to treble, negative  value  to	 shift	to  bass.

+		 Default  shift	 is 0.	width of window is in ms.

+		 Default width is 20ms. Try 30ms to lower  pitch,

+		 and  10ms to raise pitch.  interpole option, can

 		 be "cubic" or "linear". Default is "cubic".  The

-		 fade option, can be "cos",  "hamming",	 "linear"

+		 fade  option,	can be "cos", "hamming", "linear"

 		 or "trapezoid".  Default is "cos".

        polyphase [ -w < nut / ham > ]

@@ -901,59 +796,47 @@

 		 [ -cutoff #  ]

 		 Translate input sampling rate to output sampling

-		 rate via polyphase interpolation,  a  DSP  algo�

-		 rithm.	  This	method	is  slow and uses lots of

+		 rate  via  polyphase  interpolation, a DSP algo�

+		 rithm.	 This method is slow  and  uses	 lots  of

 		 RAM, but gives much better results than rate.

-		 -w < nut / ham > : select either a  Nuttal  (~90

-		 dB  stopband)	or Hamming (~43 dB stopband) win�

+		 -w  <	nut / ham > : select either a Nuttal (~90

+		 dB stopband) or Hamming (~43 dB  stopband)  win�

 		 dow.  Default is nut.

-		 -width long / short / # : specify the	(approxi�

-		 mate)	width  of  the filter.	long is 1024 sam�

-		 ples; short is 128 samples.   Alternatively,  an

+		 -width	 long / short / # : specify the (approxi�

+		 mate) width of the filter.  long  is  1024  sam�

+		 ples;	short  is 128 samples.	Alternatively, an

 		 exact number can be used.  Default is long.  The

-		 short option is not recommended, as it	 produces

+		 short	option is not recommended, as it produces

 		 poor quality results.

-			  July 24, 2000			       14

-SoX(1)							   SoX(1)

-		 -cutoff  # : specify the filter cutoff frequency

-		 in terms of  fraction	of  frequency  bandwidth,

-		 also  know as the Nyquist frequency.  Please see

-		 the resample effect for further  information  on

-		 Nyquist  frequency.  If upsampling, then this is

-		 the fraction of the original signal that  should

-		 go  through.  If downsampling, this is the frac�

-		 tion of  the  signal  left  after  downsampling.

+		 -cutoff # : specify the filter cutoff	frequency

+		 in  terms  of	fraction  of frequency bandwidth,

+		 also know as the Nyquist frequency.  Please  see

+		 the  resample	effect for further information on

+		 Nyquist frequency.  If upsampling, then this  is

+		 the  fraction of the original signal that should

+		 go through.  If downsampling, this is the  frac�

+		 tion  of  the	signal	left  after downsampling.

 		 Default is 0.95.  Remember that this is a float.

        rate	 Translate input sampling rate to output sampling

-		 rate  via linear interpolation to the Least Com�

+		 rate via linear interpolation to the Least  Com�

 		 mon Multiple of the two sampling rates.  This is

 		 the default effect if the two files have differ�

-		 ent sampling rates and the preview  options  was

+		 ent  sampling	rates and the preview options was

 		 specified.  This is fast but noisy: the spectrum

-		 of the original sound will  be	 shifted  upwards

-		 and  duplicated faintly when up-translating by a

+		 of  the  original  sound will be shifted upwards

+		 and duplicated faintly when up-translating by	a

 		 multiple.

-		 Lerp-ing is acceptable	 for  cheap  8-bit  sound

-		 hardware,  but	 for  CD-quality sound you should

-		 instead use either resample  or  polyphase.   If

+		 Lerp-ing  is  acceptable  for	cheap 8-bit sound

+		 hardware, but for CD-quality  sound  you  should

+		 instead  use  either  resample or polyphase.  If

 		 you are wondering which rate changing effects to

-		 use, you will want to read a  detailed	 analysis

+		 use,  you  will want to read a detailed analysis

 		 of  all  of  them  at	http://eakaw2.et.tu-dres�

 		 den.de/~wilde/resample/resample.html

@@ -960,40 +843,28 @@

        resample [ -qs | -q | -ql ] [ rolloff [ beta ] ]

 		 Translate input sampling rate to output sampling

 		 rate  via  simulated  analog  filtration.   This

-		 method is slower than rate, but gives much  bet�

+		 method	 is slower than rate, but gives much bet�

 		 ter results.

 		 By default, linear interpolation is used, with a

-		 window width about 45 samples at  the	lower  of

-		 the  two  rate.  This gives an accuracy of about

-		 16 bits, but insufficient stopband rejection  in

-		 the  case  that you want to have rolloff greater

+		 window	 width	about  45 samples at the lower of

+		 the two rate.	This gives an accuracy	of  about

+		 16  bits, but insufficient stopband rejection in

+		 the case that you want to have	 rolloff  greater

 		 than about 0.80 of the Nyquist frequency.

-		 The -q* options will change the  default  values

-		 for  rolloff  and  beta as well as use quadratic

-		 interpolation of filter coefficients,	resulting

+		 The  -q*  options will change the default values

+		 for rolloff and beta as well  as  use	quadratic

+		 interpolation	of filter coefficients, resulting

 		 in about 24 bits precision.  The -qs, -q, or -ql

-		 options specify increased accuracy at	the  cost

-		 of  lower  execution  speed.	It is optional to

-		 specify rolloff and beta parameters  when  using

+		 options  specify  increased accuracy at the cost

+		 of lower execution speed.   It	 is  optional  to

+		 specify  rolloff  and beta parameters when using

 		 the -q* options.

-		 Following  is a table of the reasonable defaults

+		 Following is a table of the reasonable	 defaults

 		 which are built-in to sox:

-			  July 24, 2000			       15

-SoX(1)							   SoX(1)

 		    Option  Window rolloff beta interpolation

 		    ------  ------ ------- ---- -------------

 		    (none)    45    0.80    16	   linear

@@ -1003,90 +874,78 @@

 		    ------  ------ ------- ---- -------------

 		 -qs, -q, or -ql use window lengths of 45, 75, or

-		 149  samples, respectively, at the lower sample-

+		 149 samples, respectively, at the lower  sample-

 		 rate of the two files.	 This means progressively

-		 sharper  stop-band  rejection, at proportionally

+		 sharper stop-band rejection,  at  proportionally

 		 slower execution times.

-		 rolloff refers to the cut-off frequency  of  the

-		 low  pass  filter  and	 is given in terms of the

-		 Nyquist frequency for	the  lower  sample  rate.

-		 rolloff  therefore  should  be something between

+		 rolloff  refers  to the cut-off frequency of the

+		 low pass filter and is given  in  terms  of  the

+		 Nyquist  frequency  for  the  lower sample rate.

+		 rolloff therefore should  be  something  between

 		 0.0 and 1.0, in practice 0.8-0.95.  The defaults

 		 are indicated above.

 		 The Nyquist frequency is equal to (sample rate /

-		 2).  Logically, this is  because  the	A/D  con�

-		 verter	 needs	at  least  2  samples to detect 1

-		 cycle at  the	Nyquist	 frequency.   Frequencies

-		 higher	 then the Nyquist will actually appear as

-		 lower frequencies to the A/D  converter  and  is

+		 2).   Logically,  this	 is  because the A/D con�

+		 verter needs at least	2  samples  to	detect	1

+		 cycle	at  the	 Nyquist  frequency.  Frequencies

+		 higher then the Nyquist will actually appear  as

+		 lower	frequencies  to	 the A/D converter and is

 		 called aliasing.  Normally, A/D converts run the

-		 signal through a highpass filter first to  avoid

+		 signal	 through a highpass filter first to avoid

 		 these problems.

-		 Similar  problems  will  happen in software when

-		 reducing the sample rate of an audio file  (fre�

-		 quencies  above the new Nyquist frequency can be

-		 aliased to  lower  frequencies).   Therefore,	a

-		 good  resample	 effect will remove all frequency

+		 Similar problems will happen  in  software  when

+		 reducing  the sample rate of an audio file (fre�

+		 quencies above the new Nyquist frequency can  be

+		 aliased  to  lower  frequencies).   Therefore, a

+		 good resample effect will remove  all	frequency

 		 information above the new Nyquist frequency.

-		 The rolloff refers to how close to  the  Nyquist

+		 The  rolloff  refers to how close to the Nyquist

 		 frequency this cutoff is, with closer being bet�

-		 ter.  When increasing	the  sample  rate  of  an

+		 ter.	When  increasing  the  sample  rate of an

 		 audio file you would not expect to have any fre�

-		 quencies  exist  that	are  past  the	 original

-		 Nyquist  frequency.  Because of resampling prop�

-		 erties, it is common to have alaising data  cre�

-		 ated  that  is	 above the old Nyquist frequency.

-		 In that case the rolloff refers to how close  to

+		 quencies   exist  that	 are  past  the	 original

+		 Nyquist frequency.  Because of resampling  prop�

+		 erties,  it is common to have alaising data cre�

+		 ated that is above the	 old  Nyquist  frequency.

+		 In  that case the rolloff refers to how close to

 		 the original Nyquist frequency to use a highpass

-		 filter to remove this false  data,  with  closer

+		 filter	 to  remove  this false data, with closer

 		 also being better.

 		 The beta parameter determines the type of filter

-		 window used.  Any value greater than 2.0 is  the

+		 window	 used.	Any value greater than 2.0 is the

 		 beta for a Kaiser window.  Beta <= 2.0 selects a

-			  July 24, 2000			       16

-SoX(1)							   SoX(1)

-		 Nuttall window.  If unspecified, the default  is

+		 Nuttall  window.  If unspecified, the default is

 		 a Kaiser window with beta 16.

 		 In the case of Kaiser window (beta > 2.0), lower

-		 betas produce a somewhat faster transition  from

-		 passband  to stopband, at the cost of noticeable

-		 artifacts.  A beta of 16 is  the  default,  beta

-		 less  than 10 is not recommended.  If you want a

-		 sharper cutoff, don't	use  low  beta's,  use	a

+		 betas	produce a somewhat faster transition from

+		 passband to stopband, at the cost of  noticeable

+		 artifacts.   A	 beta  of 16 is the default, beta

+		 less than 10 is not recommended.  If you want	a

+		 sharper  cutoff,  don't  use  low  beta's, use a

 		 longer	 sample	 window.   A  Nuttall  window  is

-		 selected by specifying any 'beta' <= 2, and  the

-		 Nuttall  window has somewhat steeper cutoff than

-		 the default Kaiser window.   You  will	 probably

-		 not  need  to	use  the  beta	parameter at all,

-		 unless you are just curious about comparing  the

+		 selected  by specifying any 'beta' <= 2, and the

+		 Nuttall window has somewhat steeper cutoff  than

+		 the  default  Kaiser  window.	You will probably

+		 not need to  use  the	beta  parameter	 at  all,

+		 unless	 you are just curious about comparing the

 		 effects of Nuttall vs. Kaiser windows.

 		 This is the default effect if the two files have

-		 different sampling  rates.   Default  parameters

+		 different  sampling  rates.   Default parameters

 		 are, as indicated above, Kaiser window of length

 		 45, rolloff 0.80, beta 16, linear interpolation.

-		 NOTE:	-qs  is	 only  slightly	 slower, but more

+		 NOTE: -qs is  only  slightly  slower,	but  more

 		 accurate for 16-bit or higher precision.

-		 NOTE: In many cases of up-sampling, no	 interpo�

-		 lation	 is  needed, as exact filter coefficients

+		 NOTE:	In many cases of up-sampling, no interpo�

+		 lation is needed, as exact  filter  coefficients

 		 can be computed in a reasonable amount of space.

 		 To be precise, this is done when

@@ -1096,125 +955,105 @@

        reverb gain-out delay [ delay ... ]

 		 Add reverberation to a sound sample.  Each delay

-		 is  given  in	milliseconds  and its feedback is

-		 depending on the  reverb-time	in  milliseconds.

-		 Each  delay  should  be  in the range of half to

+		 is given in milliseconds  and	its  feedback  is

+		 depending  on	the  reverb-time in milliseconds.

+		 Each delay should be in the  range  of	 half  to

 		 quarter of reverb-time to get a realistic rever�

 		 beration.  Gain-out is the volume of the output.

-       reverse	 Reverse the sound sample  completely.	 Included

+       reverse	 Reverse  the  sound sample completely.	 Included

 		 for finding Satanic subliminals.

-       speed factor

-		 Speed	up  or down the sound, as a magnetic tape

+       speed [ -c ] factor

+		 Speed up or down the sound, as a  magnetic  tape

 		 with a speed control.	It affects both pitch and

-		 time.	A  factor  of 1.0 means no change, and is

+		 time. A factor of 1.0 means no	 change,  and  is

 		 the  default.	 2.0  doubles  speed,  thus  time

-		 length	 is cut by a half and pitch is one octave

-		 higher.  0.5 halves speed thus time length  dou�

-		 bles and pitch is one octave lower.

+		 length is cut by a half and pitch is one  octave

+		 higher.   0.5 halves speed thus time length dou�

+		 bles and pitch is  one	 octave	 lower.	  If  the

+		 optional -c parameter is used then the factor is

+		 specified in "cents".

-			  July 24, 2000			       17

-SoX(1)							   SoX(1)

        split	 Turn a mono sample into a stereo sample by copy�

-		 ing the input channel	to  the	 left  and  right

+		 ing  the  input  channel  to  the left and right

 		 channels.

        stat [ -s n ] [-rms ] [ -v ] [ -d ]

-		 Do  a	statistical  check on the input file, and

+		 Do a statistical check on the	input  file,  and

 		 print results on the standard error file.  Audio

-		 data  is  passed unmodified from input to output

+		 data is passed unmodified from input  to  output

 		 file unless used along with the -e option.

 		 The "Volume Adjustment:" field in the statistics

-		 gives	you  the  argument to the -v number which

+		 gives you the argument to the	-v  number  which

 		 will make the sample as loud as possible without

 		 clipping.

 		 The option -v will print out the "Volume Adjust�

-		 ment:" field's	 value	only  and  return.   This

-		 could	be  of use in scripts to auto convert the

+		 ment:"	 field's  value	 only  and  return.  This

+		 could be of use in scripts to auto  convert  the

 		 volume.

-		 The -s n option is used to scale the input  data

-		 by  a	given  factor.	The default value of n is

-		 the  max  value  of  a	 signed	  long	 variable

+		 The  -s n option is used to scale the input data

+		 by a given factor.  The default value	of  n  is

+		 the   max   value  of	a  signed  long	 variable

 		 (0x7fffffff).	Internal effects always work with

-		 signed long PCM data and  so  the  value  should

+		 signed	 long  PCM  data  and so the value should

 		 relate to this fact.

-		 The  -rms option will convert all output average

+		 The -rms option will convert all output  average

 		 values to root mean square format.

 		 There is also an optional parameter -d that will

-		 print	out a hex dump of the sound file from the

-		 internal buffer that is  in  32-bit  signed  PCM

-		 data.	 This  is  mainly only of use in tracking

-		 down endian problems that creep  in  to  sox  on

+		 print out a hex dump of the sound file from  the

+		 internal  buffer  that	 is  in 32-bit signed PCM

+		 data.	This is mainly only of	use  in	 tracking

+		 down  endian  problems	 that  creep in to sox on

 		 cross-platform versions.

        stretch factor [window fade shift fading]

-		 Time  stretch	file  by  a  given factor. Change

+		 Time stretch file  by	a  given  factor.  Change

 		 duration without affecting the pitch.	factor of

-		 stretching:  >1.0  lengthen,  <1.0 shorten dura�

-		 tion.	window size is in ms.  Default	is  20ms.

-		 The  fade option, can be "lin".  shift ratio, in

-		 [0.0 1.0]. Default depends  on	 stretch  factor.

-		 1.0  to  shorten,  0.8	 to lengthen.  The fading

-		 ratio, in [0.0 0.5].  The  amount  of	a  fade's

+		 stretching: >1.0 lengthen,  <1.0  shorten  dura�

+		 tion.	 window	 size  is in ms. Default is 20ms.

+		 The fade option, can be "lin".	 shift ratio,  in

+		 [0.0  1.0].  Default  depends on stretch factor.

+		 1.0 to shorten, 0.8  to  lengthen.   The  fading

+		 ratio,	 in  [0.0  0.5].  The  amount of a fade's

 		 default depends on factor and shift.

        swap [ 1 2 | 1 2 3 4 ]

 		 Swap  channels	 in  multi-channel  sound  files.

-		 Optionally, you may specify  the  channel  order

-		 you  would like the output in.	 This defaults to

+		 Optionally,  you  may	specify the channel order

+		 you would like the output in.	This defaults  to

 		 output channel 2 and then 1 for stereo and 2, 1,

-			  July 24, 2000			       18

-SoX(1)							   SoX(1)

-		 4,  3 for quad-channels.  An interesting feature

-		 is that you may duplicate  a  given  channel  by

-		 overwriting  another.	This is done by repeating

-		 an output channel  on	the  command  line.   For

-		 example,  swap 2 2 will overwrite channel 1 with

-		 channel 2's data; creating a  stereo  file  with

+		 4, 3 for quad-channels.  An interesting  feature

+		 is  that  you	may  duplicate a given channel by

+		 overwriting another.  This is done by	repeating

+		 an  output  channel  on  the  command line.  For

+		 example, swap 2 2 will overwrite channel 1  with

+		 channel  2's  data;  creating a stereo file with

 		 both channels containing the same audio data.

        trim start [ length ]

-		 Trim  can  trim off unwanted audio data from the

+		 Trim can trim off unwanted audio data	from  the

 		 beginning and end of the audio file.  Audio sam�

 		 ples are not sent to the output stream until the

-		 start location is reached.  start is a	 floating

+		 start	location is reached.  start is a floating

 		 point number that tells the number of seconds to

-		 wait before starting.	If you	know  the  sample

-		 number	 you would like to start at then the sec�

-		 onds can be obtained by  multiply  (sample  #	*

+		 wait  before  starting.   If you know the sample

+		 number you would like to start at then the  sec�

+		 onds  can be obtained by multiplying (sample # *

 		 sample rate).

-		 The  optional	length parameter tells the number

-		 of samples to output after the start sample  and

-		 is  used  to trim off the back side of the audio

-		 data.	Using a value of 0 for the start  parame�

-		 ter  will allow trimming off the back side only.

+		 The optional length parameter tells  the  number

+		 of  samples to output after the start sample and

+		 is used to trim off the back side of  the  audio

+		 data.	 Using a value of 0 for the start parame�

+		 ter will allow trimming off the back side  only.

+		 Both  start  and length can also be specified in

+		 mm:ss.frac or hh:mm:ss.frac format.

        vibro speed  [ depth ]

 		 Add the world-famous  Fender  Vibro-Champ  sound

@@ -1246,18 +1085,6 @@

 		 rithmically.	0.0  is constant while +6 doubles

 		 the amplitude.

 		 An optional limitergain value can  be	specified

-			  July 24, 2000			       19

-SoX(1)							   SoX(1)

 		 and  should  be  a  value much less then 1.0 (ie

 		 0.05 or 0.02) and is used only on peaks to  pre�

 		 vent  clipping.   Not	specifying this parameter

@@ -1285,36 +1112,4 @@

-			  July 24, 2000			       20

+			  July 24, 2000			   SoX(1)

--- a/soxexam.txt

+++ b/soxexam.txt

@@ -1,9 +1,7 @@

 SoX(1)							   SoX(1)

 NAME

        soxexam - SoX Examples (CHEAT SHEET)

@@ -32,7 +30,7 @@

        When working with headerless files (raw	files),	 you  may

        take advantage of they pseudo-file types of .ub, .uw, .sb,

        .sw, .ul, and .sl.  By  using  these  extensions	 on  your

-       filenames  you  will not have to specify the corrisponding

+       filenames  you  will not have to specify the corresponding

        options on the command line.

        Precision

@@ -58,18 +56,6 @@

 	       GSM	       16-bit

 	       unsigned long   32-bit

 	       signed long     32-bit

-			December 10, 1999			1

-SoX(1)							   SoX(1)

 	       ___________    _________

        Examples

@@ -124,18 +110,6 @@

        8000  Hz	 ADPCM	input file and then end up with the final

        file as 44100 Hz ADPCM.

-			December 10, 1999			2

-SoX(1)							   SoX(1)

 	 sox firstfile.wav -r 44100 -s -w secondfile.wav

 	 sox secondfile.wav thirdfile.wav swap

 	 sox thirdfile.wav -a -b finalfile.wav mask

@@ -166,8 +140,8 @@

        vocals or guitars.)

        Single  effects will be explained and some given parameter

-       settings that can be used to  understand	 the  theorie  by

-       listening to the sound file with the added effect.

+       settings that can be used to understand the theory by lis�

+       tening to the sound file with the added effect.

        Using multiple effects in parallel or in sequel can result

        either in very perfect sound or ( mostly ) in  a	 dramatic

@@ -176,8 +150,8 @@

        the  first  time using effects try to compose them as less

        as possible. We don't regard the composition of effects in

        the examples because to many combinations are possible and

-       you really need a very fast maschine and a lot  of  memory

-       to play them in real-time.

+       you really need a very fast machine and a lot of memory to

+       play them in real-time.

        And real-time playing of sounds will speed up learning the

        parameter setting.

@@ -184,24 +158,12 @@

        Basically, we will use the "play" front-end of  SOX  since

        it is easier to listen sounds coming out of the speaker or

-       earphone instead of looking at  cryptical  data	in  sound

+       earphone instead of  looking  at	 cryptic  data	in  sound

        files.

        For easy listening of file.xxx ( "xxx" is any sound format

):

-			December 10, 1999			3

-SoX(1)							   SoX(1)

 	     play file.xxx effect-name effect-parameters

        Or more SOX-like ( for "dsp" output ):

@@ -224,14 +186,14 @@

        Notes:

        I  played  all examples in real-time on a Pentium 100 with

-       32 Mb and Linux 2.0.30 using a self-recorded sample ( 3:15

+       32 MB and Linux 2.0.30 using a self-recorded sample ( 3:15

        min  long  in  "wav"  format with 44.1 kHz sample rate and

        stereo 16 bit ).	 The sample should not contain any of the

        effects.	 However,  if  you  take any recording of a sound

        track from radio or tape or cd, and it sounds like a  live

        concert	or  ten	 people	 are playing the same rhythm with

-       their drums or funky-groves, then take any  other  sample.

-       (Typically,  less  then	four  different intruments and no

+       their drums or funky-grooves, then take any other  sample.

+       (Typically,  less  then	four different instruments and no

        synthesizer in the sample is suitable. Likewise, the  com�

        bination vocal, drums, bass and guitar.)

@@ -240,10 +202,10 @@

        Echo

        An  echo	 effect	 can be naturally found in the mountains,

-       standing somewhere on a moutain and shouting a single word

-       will  result  in	 one or more repetitions of the word ( if

-       not, turn a bit around ant try next, or climb to the  next

-       mountain ).

+       standing somewhere on a mountain	 and  shouting	a  single

+       word  will result in one or more repetitions of the word (

+       if not, turn a bit around ant try next, or  climb  to  the

+       next mountain ).

        However,	 the time difference between shouting and repeat�

        ing is the delay (time), its loudness is the decay. Multi�

@@ -256,24 +218,12 @@

        ple shortly after the original one.

        This  will  sound  as  doubling	the number of instruments

-			December 10, 1999			4

-SoX(1)							   SoX(1)

        playing the same sample:

 	     play file.xxx echo 0.8 0.88 60.0 0.4

        If the delay is very short then it sound like a (metallic)

-       roboter playing music:

+       robot playing music:

 	     play file.xxx echo 0.8 0.88 6.0 0.4

@@ -314,8 +264,8 @@

        can be applied to other instrument samples too.

        It  works like the echo effect with a short delay, but the

-       delay isn't constant.  The delay is varied  using  a  sin�

-       odial  or  triangular  modulation.  The	modulation  depth

+       delay isn't constant.  The delay is varied using	 a  sinu�

+       soidal  or  triangular  modulation.  The	 modulation depth

        defines the range the modulated delay is played before  or

        after the delay. Hence the delayed sound will sound slower

        or faster, that is the  delayed	sound  tuned  around  the

@@ -322,20 +272,8 @@

        original	 one, like in a chorus where some vocal are a bit

        out of tune.

-			December 10, 1999			5

-SoX(1)							   SoX(1)

        The typical delay is around 40ms to 60ms, the speed of the

-       modualtion  is  best  near 0.25Hz and the modulation depth

+       modulation  is  best  near 0.25Hz and the modulation depth

        around 2ms.

        A single delay will make the sample more overloaded:

@@ -371,7 +309,7 @@

 	     play file.xxx flanger 0.6 0.87 3.0 0.9 0.5 -s

-       listen  carefully  between  the difference of sinodial and

+       listen  carefully between the difference of sinusoidal and

        triangular modulation:

 	     play file.xxx flanger 0.6 0.87 3.0 0.9 0.5 -t

@@ -381,7 +319,7 @@

 	     play file.xxx flanger 0.8 0.88 3.0 0.4 0.5 -t

-       The drunken loundspeaker system:

+       The drunken loudspeaker system:

 	     play file.xxx flanger 0.9 0.9 4.0 0.23 1.3 -s

@@ -388,29 +326,17 @@

        Reverb

        The reverb effect is often used in audience hall which are

-			December 10, 1999			6

-SoX(1)							   SoX(1)

        to small or to many visitors  disturb  the  reflection  of

-       sound  at the walls to make the sound played more monumen�

-       tal. You can try the reverb effect  in  your  bathroom  or

-       garage  or sport halls by shouting loud some words. You'll

-       hear the words reflected from the walls.

+       sound   at  the	walls  to  make	 the  sound  played  more

+       monumental. You can try the reverb effect in your bathroom

+       or  garage  or  sport  halls  by shouting loud some words.

+       You'll hear the words reflected from the walls.

        The biggest problem in using the reverb effect is the cor�

        rect  setting  of the (wall) delays such that the sound is

-       relistic an doesn't sound like music playing in a  tin  or

-       overloaded feedback distroys any illusion of any big hall.

-       To help you for much realisitc reverb effects, you  should

+       realistic an doesn't sound like music playing in a tin  or

+       overloaded feedback destroys any illusion of any big hall.

+       To help you for much realistic reverb effects, you  should

        decide  first, how long the reverb should take place until

        it is not loud enough to be registered by your ears.  This

        is be done by the reverb time "t", in small halls 200ms in

@@ -417,13 +343,13 @@

        bigger one 1000ms, if you like. Clearly, the walls of such

        a  hall	aren't far away, so you should define its setting

        be given every wall its delay time.  However, if the  wall

-       is  to  far  eway  for the reverb time, you won't hear the

+       is  to  far  away  for the reverb time, you won't hear the

        reverb, so the nearest wall will be best "t/4"  delay  and

-       the  farest  "t/2".   You can try other distances as well,

+       the  farthest "t/2".  You can try other distances as well,

        but it won't sound very realistic.   The	 walls	shouldn't

-       stand  to  close	 to  each  other  and  not  in a multiple

-       interger distance to each other	(  so  avoid  wall  like:

-       200.0 and 202.0, or something like 100.0 and 200.0 ).

+       stand to close to each other and not in a multiple integer

+       distance to each other ( so avoid  wall	like:  200.0  and

+       202.0, or something like 100.0 and 200.0 ).

        Since audience halls do have a lot of walls, we will start

        designing one beginning with one wall:

@@ -445,9 +371,9 @@

        240.0 280.0 300.0

        If you run out of machine power or memory,  then	 stop  as

-       much  applications  as possible ( every interupt will con�

-       sume a lot of cpu time which for	 bigger	 halls	is  abso�

-       lutely neccessary ).

+       much  applications as possible ( every interrupt will con�

+       sume a lot of CPU time which for	 bigger	 halls	is  abso�

+       lutely necessary ).

        Phaser

@@ -454,25 +380,13 @@

        The  phaser effect is like the flanger effect, but it uses

        a reverb instead of  an	echo  and  does	 phase	shifting.

        You'll  hear the difference in the examples comparing both

-			December 10, 1999			7

-SoX(1)							   SoX(1)

        effects ( simply change the effect name ).  The delay mod�

-       ulation	can be done sinodial or triangular, preferable is

-       the later one for multiple instruments playing. For single

-       instrument  sounds  the sinodial phaser effect will give a

-       sharper phasing effect.	The decay shouln't be to close to

-       1.0  which  will cause dramatic feedback.  A good range is

-       about 0.5 to 0.1 for the decay.

+       ulation	can  be done sinusoidal or triangular, preferable

+       is the later one for  multiple  instruments  playing.  For

+       single instrument sounds the sinusoidal phaser effect will

+       give a sharper phasing effect.  The decay shouldn't be  to

+       close  to  1.0 which will cause dramatic feedback.  A good

+       range is about 0.5 to 0.1 for the decay.

        We will take a parameter setting as for the flanger before

        (  gain-out  is	lower since feedback can raise the output

@@ -480,7 +394,7 @@

 	     play file.xxx phaser 0.8 0.74 3.0 0.4 0.5 -t

-       The drunken loundspeaker system ( now less alkohol ):

+       The drunken loudspeaker system ( now less alcohol ):

 	     play file.xxx phaser 0.9 0.85 4.0 0.23 1.3 -s

@@ -492,10 +406,44 @@

 	     play file.xxx phaser 0.6 0.66 3.0 0.6 2.0 -t

+       Compander

+       The compander effect allows the dynamic range of a  signal

+       to  be  compressed  or expanded.	 For most situations, the

+       attack time (response to the music getting louder)  should

+       be  shorter  than the decay time because our ears are more

+       sensitive to suddenly loud music	 than  to  suddenly  soft

+       music.

+       For  example,  suppose you are listening to Strauss' "Also

+       Sprach Zarathustra" in a noisy environment such as a  car.

+       If you turn up the volume enough to hear the soft passages

+       over the road noise, the loud sections will be  too  loud.

+       You could try this:

+		    play       file.xxx	      compand	    0.3,1

+       -90,-90,-70,-70,-60,-20,0,0 -5 0 0.2

+       The transfer function  ("-90,...")  says	 that  very  soft

+       sounds  between	-90  and  -70  decibels (-90 is about the

+       limit of 16-bit encoding)  will	remain	unchanged.   That

+       keeps  the  compander from boosting the volume on "silent"

+       passages such as between movements.   However,  sounds  in

+       the range -60 decibels to 0 decibels (maximum volume) will

+       be boosted so that the 60-dB dynamic range of the original

+       music  will be compressed 3-to-1 into a 20-dB range, which

+       is wide enough to enjoy the music but narrow enough to get

+       around the road noise.  The -5 dB output gain is needed to

+       avoid clipping (the number is inexact, and was derived  by

+       experimentation).   The 0 for the initial volume will work

+       fine for a clip that starts with a bit of silence, and the

+       delay  of  0.2  has the effect of causing the compander to

+       react a bit more quickly to sudden volume changes.

        Other effects ( copy, rate, avg, stat, vibro, lowp, highp,

        band, reverb )

-       The  other effects are simply to use. However, an "easy to

+       The  other effects are simple to use. However, an "easy to

        use manual" should be given here.

        More effects ( to do ! )

@@ -503,15 +451,15 @@

        There are a lot of effects around like noise  gates,  com�

        pressors,  waw-waw,  stereo effects and so on. They should

        be implemented making SOX to be more useful in sound  mix�

-       ing  technics  coming together with a great varity of dif�

-       ferent sound effects.

+       ing  techniques	coming	together  with a great variety of

+       different sound effects.

-       Combining effects be using then in parallel or  sequel  on

+       Combining effects by using them in parallel or sequence on

        different  channels  needs  some	 easy  mechanism which is

        real-time stable.

        Really missing, is the changing of the parameters,  start�

-       ing  and stoping of effects while playing samples in real-

+       ing and stopping of effects while playing samples in real-

        time!

        Good luck and have fun with all the effects!

@@ -519,76 +467,9 @@

 	    Juergen Mueller	     (jmueller@uia.ua.ac.be)

-			December 10, 1999			8

-SoX(1)							   SoX(1)

 SEE ALSO

        sox(1), play(1), rec(1)

-			December 10, 1999			9

+			December 10, 1999		   SoX(1)

--- a/src/Makefile.in

+++ b/src/Makefile.in

@@ -69,13 +69,16 @@

 PLAY_0    =

 PLAY_1    = play

-all: sox mix $(PLAY_$(PLAY_SUPPORT))

+all: sox soxmix $(PLAY_$(PLAY_SUPPORT))

 sox: libst.a sox.o

 	$(CC) $(LDFLAGS) -o sox sox.o $(LIBS)

-mix: libst.a mix.o

-	$(CC) $(LDFLAGS) -o mix mix.o $(LIBS)

+soxmix.o:

+	$(CC) $(CFLAGS) -DSOXMIX -c -o soxmix.o sox.c

+soxmix: libst.a soxmix.o

+	$(CC) $(LDFLAGS) -o soxmix soxmix.o $(LIBS)

 play: play.in

 	$(SED) -e 's|@PREFIX@|$(BINDIR)|g' < $(srcdir)/play.in > play

--- a/src/au.c

+++ b/src/au.c

@@ -166,7 +166,7 @@

 	else

-		st_fail_errno(ft,ST_EHDR,"Sun/NeXT/DEC header doesn't start with magic word\nTry the '.ul' file type with '-t ul -r 8000 filename'");

+		st_fail_errno(ft,ST_EHDR,"Did not detect valid Sun/NeXT/DEC magic number in header.");

 		return(ST_EOF);

--- a/src/auto.c

+++ b/src/auto.c

@@ -17,74 +17,110 @@

 #include "st.h"

 #include <string.h>

+#if defined(DOS) || defined(WIN32)

+#define LASTCHAR '\\'

+#else

+#define LASTCHAR '/'

+#endif

 int st_autostartread(ft)

 ft_t ft;

-	char *type;

-	char header[132];

-	int rc;

-	if (!ft->seekable)

+    char *type;

+    char header[132];

+    int rc;

+    type = 0;

+    /* Attempt to auto-detect filetype using magic values.  Abort loop

+     * and use file extension if any errors are detected.

+     */

+    if (ft->seekable)

+    {

+	if (fread(header, 1, sizeof(header), ft->fp) == sizeof(header))

-		st_fail_errno(ft,ST_EOF,"Type AUTO input must be a file, not a pipe");

-		return(ST_EOF);

-	}

-	if (fread(header, 1, sizeof(header), ft->fp) != sizeof(header))

-	{

-		st_fail_errno(ft,ST_EOF,"Type AUTO detects short file");

-		return(ST_EOF);

-	}

-	fseek(ft->fp, 0L - sizeof header, 1); /* Seek back */

-	type = 0;

-	if ((strncmp(header, ".snd", 4) == 0) ||

-	    (strncmp(header, "dns.", 4) == 0) ||

-	    ((header[0] == '\0') && (strncmp(header+1, "ds.", 3) == 0))) {

+	    fseek(ft->fp, 0L - sizeof header, 1); /* Seek back */

+	    type = 0;

+	    if ((strncmp(header, ".snd", 4) == 0) ||

+		    (strncmp(header, "dns.", 4) == 0) ||

+		    ((header[0] == '\0') && (strncmp(header+1, "ds.", 3) == 0)))

+	    {

 		type = "au";

-	}

-	else if (strncmp(header, "FORM", 4) == 0) {

+	    }

+	    else if (strncmp(header, "FORM", 4) == 0)

+	    {

 		if (strncmp(header + 8, "AIFF", 4) == 0)

-			type = "aiff";

+		    type = "aiff";

 		else if (strncmp(header + 8, "8SVX", 4) == 0)

-			type = "8svx";

+		    type = "8svx";

 		else if (strncmp(header + 8, "MAUD", 4) == 0)

-			type = "maud";

-	}

-	else if (strncmp(header, "RIFF", 4) == 0 &&

-		 strncmp(header + 8, "WAVE", 4) == 0) {

+		    type = "maud";

+	    }

+	    else if (strncmp(header, "RIFF", 4) == 0 &&

+		    strncmp(header + 8, "WAVE", 4) == 0)

+	    {

 		type = "wav";

-	}

-	else if (strncmp(header, "Creative Voice File", 19) == 0) {

+	    }

+	    else if (strncmp(header, "Creative Voice File", 19) == 0)

+	    {

 		type = "voc";

-	}

-	else if (strncmp(header+65, "FSSD", 4) == 0 &&

-		 strncmp(header+128, "HCOM", 4) == 0) {

+	    }

+	    else if (strncmp(header+65, "FSSD", 4) == 0 &&

+		    strncmp(header+128, "HCOM", 4) == 0)

+	    {

 		type = "hcom";

-	}

-	else if (strncmp(header, "SOUND", 5) == 0) {

+	    }

+	    else if (strncmp(header, "SOUND", 5) == 0)

+	    {

 		type = "sndt";

-	}

-	else if (strncmp(header, "2BIT", 4) == 0) {

+	    }

+	    else if (strncmp(header, "2BIT", 4) == 0)

+	    {

 		type = "avr";

+	    }

+	    else if (strncmp(header, "NIST_1A", 4) == 0)

+	    {

+		type = "sph";

+	    }

-	else if (strncmp(header, "NIST_1A", 4)) {

-	        type = "sph";

-	}

+    }

-  	if (type == 0) {

-  		st_warn("Type AUTO doesn't recognize this header\n");

-                st_warn("Trying: -t raw -r 44100 -s -w\n\n");

-                type = "raw";

-                ft->info.rate = 44100;

-                ft->info.size = ST_SIZE_WORD;

-                ft->info.encoding = ST_ENCODING_SIGN2;

-                }

-	st_report("Type AUTO changed to %s", type);

+    if (type == 0)

+    {

+	/* Use filename extension to determine audio type. */

+	/* First, chop off any path portions of filename.  This

+	 * prevents the next search from considering that part. */

+	if ((type = strrchr(ft->filename, LASTCHAR)) == NULL)

+	    type = ft->filename;

+	/* Now look for an filename extension */

+	if ((type = strrchr(type, '.')) != NULL)

+	    type++;

+	else

+	    type = NULL;

+    }

+    if (type == 0)

+    {

+	st_warn("Could not detect type.  Assuming signed 16-bit data using rate of 44100.\n");

+	type = "raw";

+	ft->info.rate = 44100;

+	ft->info.size = ST_SIZE_WORD;

+	ft->info.encoding = ST_ENCODING_SIGN2;

+    }

 	ft->filetype = type;

 	rc = st_gettype(ft); /* Change ft->h to the new format */

-	if(rc)

-		return (rc);

-	(* ft->h->startread)(ft);

-	return(ST_SUCCESS);

-}

+	if(rc != ST_SUCCESS)

+	{

+	    st_fail_errno(ft,ST_EFMT,"Do not understand format type: %s\n",type);

+	    return (rc);

+	}

+	st_report("Detected file format type: %s\n", type);

+	return ((* ft->h->startread)(ft));

+    }

 int st_autostartwrite(ft)

 ft_t ft;

--- a/src/sox.c

+++ b/src/sox.c

@@ -84,8 +84,14 @@

 static int flow_effect(int);

 static int drain_effect(int);

+#ifdef SOXMIX

+#define MAX_INPUT_FILES 2

+#define REQUIRED_INPUT_FILES 2

+#else

 #define MAX_INPUT_FILES 1

 #define REQUIRED_INPUT_FILES 1

+#endif

 static ft_t informat[MAX_INPUT_FILES] = { 0 };

 static int input_count = 0;

@@ -225,7 +231,8 @@

 	/* Make sure we got at least the required # of input filename */

-	if (!informat[REQUIRED_INPUT_FILES-1] ||

+	if (input_count < REQUIRED_INPUT_FILES ||

+	    !informat[REQUIRED_INPUT_FILES-1] ||

 	    !informat[REQUIRED_INPUT_FILES-1]->filename)

 	    usage("Not enough input files not specified");

@@ -283,20 +290,12 @@

     informat[input_count] = ft;

-    /* If filetype has not been set by command line options then

-     * attempt to get it from filename extension.

-     */

-    if (!ft->filetype)

-    {

-	if ((ft->filetype = strrchr(ft->filename, '.')) != NULL)

-	    ft->filetype++;

-	else /* Default to "auto" */

-	    ft->filetype = "auto";

-    }

+    /* Let auto effect do the work if user is not overriding. */

+    if (!ft->filetype)

+	ft->filetype = "auto";

-    /* See if we understand this type of file */

-    if( st_gettype(ft) )

-	st_fail("Unknown input file format for '%s'.  Use -t option to override",ft->filename);

+    if ( st_gettype(ft) )

+	st_fail("Unknown input file format for '%s'.  Use -t option to override",ft->filename);

     /* Default the input comment to the filename if not set from

      * command line.

@@ -327,15 +326,29 @@

 #endif

+#if defined(DOS) || defined(WIN32)

+#define LASTCHAR '\\'

+#else

+#define LASTCHAR '/'

+#endif

 static void copy_output(ft_t ft)

     outformat = ft;

     if (writing && !ft->filetype) {

-	if ((ft->filetype = strrchr(ft->filename, '.')) != NULL)

-	    ft->filetype++;

-	else

-	    ft->filetype = ft->filename;

+ 	/* Use filename extension to determine audio type. */

+        /* First, chop off any path portions of filename.  This

+         * prevents the next search from considering that part. */

+        if ((ft->filetype = strrchr(ft->filename, LASTCHAR)) == NULL)

+            ft->filetype = ft->filename;

+        /* Now look for an filename extension */

+        if ((ft->filetype = strrchr(ft->filetype, '.')) != NULL)

+            ft->filetype++;

+        else

+            ft->filetype = NULL;

     if ( st_gettype(ft) )

@@ -506,6 +519,20 @@

+static int compare_input(ft_t ft1, ft_t ft2)

+{

+    if (ft1->info.rate != ft2->info.rate)

+	return ST_EOF;

+    if (ft1->info.size != ft2->info.size)

+	return ST_EOF;

+    if (ft1->info.encoding != ft2->info.encoding)

+	return ST_EOF;

+    if (ft1->info.channels != ft2->info.channels)

+	return ST_EOF;

+    return ST_SUCCESS;

+}

/*

  * Process input file -> effect table -> output file

  *	one buffer at a time

@@ -513,37 +540,60 @@

 static void process(void) {

     int e, f, flowstatus;

-    /* Read and write starters can change their formats. */

-    if ((* informat[0]->h->startread)(informat[0]) == ST_EOF)

+#ifdef SOXMIX

+    int s;

+    ULONG ilen[MAX_INPUT_FILES];

+    LONG *ibuf[MAX_INPUT_FILES];

+#endif

+    for (f = 0; f < input_count; f++)

-        st_fail(informat[0]->st_errstr);

-    }

+	/* Read and write starters can change their formats. */

+	if ((* informat[f]->h->startread)(informat[f]) != ST_SUCCESS)

+	{

+	    st_fail("Failed reading %s: %s",informat[f]->filename,

+		    informat[f]->st_errstr);

+	}

-    /* Go a head and assume 1 channel audio if nothing is detected.

-     * This is because libst usually doesn't set this for mono file

-     * formats (for historical reasons).

-     */

-    if (informat[0]->info.channels == -1)

-	informat[0]->info.channels = 1;

+	/* Go a head and assume 1 channel audio if nothing is detected.

+	 * This is because libst usually doesn't set this for mono file

+	 * formats (for historical reasons).

+	 */

+	if (informat[f]->info.channels == -1)

+	    informat[f]->info.channels = 1;

-    if ( st_checkformat(informat[0]) )

-		st_fail("bad input format");

-    st_report("Input file: using sample rate %lu\n\tsize %s, encoding %s, %d %s",

-	   informat[0]->info.rate, st_sizes_str[informat[0]->info.size],

-	   st_encodings_str[informat[0]->info.encoding], informat[0]->info.channels,

-	   (informat[0]->info.channels > 1) ? "channels" : "channel");

+	if ( st_checkformat(informat[f]) )

+	    st_fail("bad input format for file %s",informat[f]->filename);

-    if (informat[0]->comment)

-	st_report("Input file: comment \"%s\"\n", informat[0]->comment);

+	st_report("Input file %s: using sample rate %lu\n\tsize %s, encoding %s, %d %s",

+		  informat[f]->filename, informat[f]->info.rate,

+		  st_sizes_str[informat[f]->info.size],

+		  st_encodings_str[informat[f]->info.encoding],

+		  informat[f]->info.channels,

+		  (informat[f]->info.channels > 1) ? "channels" : "channel");

+	if (informat[f]->comment)

+	    st_report("Input file %s: comment \"%s\"\n",

+		      informat[f]->filename, informat[f]->comment);

+    }

+#ifdef SOXMIX

+    for (f = 1; f < input_count; f++)

+    {

+	if (compare_input(informat[0], informat[f]) != ST_SUCCESS)

+	{

+	    st_fail("Input files must have the same rate, channels, data size, and encoding");

+	}

+    }

+#endif

     if (writing)

 	open_output(outformat);

+	/* Always use first input file as a reference for output

+	 * file format.

+	 */

 	st_copyformat(informat[0], outformat);

 	if ((*outformat->h->startwrite)(outformat) == ST_EOF)

@@ -554,9 +604,11 @@

 	if (st_checkformat(outformat))

 		st_fail("bad output format");

-	st_report("Output file: using sample rate %lu\n\tsize %s, encoding %s, %d %s",

-	       outformat->info.rate, st_sizes_str[outformat->info.size],

-	       st_encodings_str[outformat->info.encoding], outformat->info.channels,

+	st_report("Output file %s: using sample rate %lu\n\tsize %s, encoding %s, %d %s",

+	       outformat->filename, outformat->info.rate,

+	       st_sizes_str[outformat->info.size],

+	       st_encodings_str[outformat->info.encoding],

+	       outformat->info.channels,

 	       (outformat->info.channels > 1) ? "channels" : "channel");

 	if (outformat->comment)

@@ -591,18 +643,76 @@

+#ifdef SOXMIX

+    for (f = 0; f < MAX_INPUT_FILES; f++)

+    {

+	ibuf[f] = (LONG *)malloc(BUFSIZ * sizeof(LONG));

+	if (!ibuf[f])

+	{

+	    st_fail("could not allocate memory");

+	}

+    }

+#endif

/*

      * Just like errno, we must set st_errno to known values before

      * calling I/O operations.

*/

-    informat[0]->st_errno = 0;

+    for (f = 0; f < input_count; f++)

+	informat[f]->st_errno = 0;

     outformat->st_errno = 0;

     /* Run input data through effects and get more until olen == 0 */

     do {

+#ifndef SOXMIX

         efftab[0].olen = (*informat[0]->h->read)(informat[0],

                                               efftab[0].obuf, (LONG) BUFSIZ);

+    	if (informat[0]->st_errno)

+	{

+	    st_warn("Error reading from %s: %s", informat[0]->filename,

+		    informat[0]->st_errstr);

+	    break;

+	}

+#else

+	for (f = 0; f < input_count; f++)

+	{

+	    ilen[f] = (*informat[f]->h->read)(informat[f],

+			                      ibuf[f], (LONG)BUFSIZ);

+	    if (informat[f]->st_errno)

+	    {

+    		st_warn("Error reading from %s: %s", informat[f]->filename,

+    			informat[0]->st_errstr);

+    		break;

+	    }

+	}

+	if (f < input_count && informat[f]->st_errno)

+	    break;

+	efftab[0].olen = 0;

+	for (f = 0; f < input_count; f++)

+	    if (ilen[f] > efftab[0].olen)

+		efftab[0].olen = ilen[f];

+	for (s = 0; s < efftab[0].olen; s++)

+	{

+	    /* Mix data together by dividing by the number

+	     * of audio files and then summing up.  This prevents

+	     * overflows.

+	     */

+	    for (f = 0; f < input_count; f++)

+	    {

+		if (f == 0)

+		    efftab[0].obuf[s] =

+			(s<ilen[f]) ? (ibuf[f][s]/input_count) : 0;

+		else

+		    if (s < ilen[f])

+			efftab[0].obuf[s] += ibuf[f][s]/input_count;

+	    }

+	}

+#endif

         efftab[0].odone = 0;

         if (efftab[0].olen == 0)

@@ -620,14 +730,11 @@

 	flowstatus = flow_effect_out();

 	/* Negative flowstatus says no more output will ever be generated. */

-	if (flowstatus < 0)

+	if (flowstatus < 0 || outformat->st_errno)

 	    break;

     } while (1); /* break; efftab[0].olen == 0 */

-    if (informat[0]->st_errno)

-	st_fail(informat[0]->st_errstr);

     /* Drain the effects out first to last,

      * pushing residue through subsequent effects */

     /* oh, what a tangled web we weave */

@@ -659,14 +766,23 @@

 	    (* efftabR[e].h->stop)(&efftabR[e]);

-    if ((* informat[0]->h->stopread)(informat[0]) == ST_EOF)

-	st_fail(informat[0]->st_errstr);

-    fclose(informat[0]->fp);

+    for (f = 0; f < input_count; f++)

+    {

+	/* If problems closing input file, just warn user since

+	 * we are exiting anyways.

+	 */

+	if ((* informat[f]->h->stopread)(informat[f]) == ST_EOF)

+	    st_warn(informat[f]->st_errstr);

+	fclose(informat[f]->fp);

+    }

     if (writing)

+	/* problem closing output file, just warn user since we

+	 * are exiting anyways.

+	 */

         if ((* outformat->h->stopwrite)(outformat) == ST_EOF)

-	    st_fail(outformat->st_errstr);

+	    st_warn(outformat->st_errstr);

     if (writing)

         fclose(outformat->fp);

@@ -696,7 +812,10 @@

       if (outformat->st_errno)

-          st_fail(outformat->st_errstr);

+      {

+          st_warn("Error writing: %s",outformat->st_errstr);

+	  break;

+      }

       /* If any effect will never again produce data, give up.  This

        * works because of the pull status: the effect won't be able to

--

⑨