shithub: sox

Download patch

ref: 89e9bf4a96bc03100918df8889e3987a821a2f75
parent: f1424234f9a2ad986f316a9731d31fad6047679b
author: robs <robs>
date: Wed Sep 17 03:55:17 EDT 2008

change rate default settings

--- a/soxeffect.7
+++ b/soxeffect.7
@@ -29,7 +29,7 @@
 .SP
 .fi
 ..
-.TH SoX 7 "August 5, 2008" "soxeffect" "Sound eXchange"
+.TH SoX 7 "September 16, 2008" "soxeffect" "Sound eXchange"
 .SH NAME
 SoX \- Sound eXchange, the Swiss Army knife of audio manipulation
 .SH DESCRIPTION
@@ -912,64 +912,76 @@
 .B tempo
 effect for a description of the other parameters.
 .TP
-\fBrate\fR [\fB\-q\fR\^|\^\fB\-l\fR\^|\^\fB\-m\fR\^|\^\fB\-h\fR\^|\^\fB\-v\fR] [\fB\-p\fR \fIPHASE\fR\^|\^\fB\-M\fR\^|\^\fB\-I\fR\^|\^\fB\-L\fR] [\fB\-b\fR \fIBANDWIDTH\fR] [\fB\-a\fR] [\fIRATE\fR[\fBk\fR]]
-Change the audio sampling rate (i.e. resample the audio) to the given
+\fBrate\fR [\fB\-q\fR\^|\^\fB\-l\fR\^|\^\fB\-m\fR\^|\^\fB\-h\fR\^|\^\fB\-v\fR] [override-options] \fIRATE\fR[\fBk\fR]
+Change the audio sampling rate (i.e. resample the audio) to any given
 .I RATE
-using a quality level as follows:
+(even non-integer if this is supported by the output file format)
+using a quality level defined as follows:
 .TS
 center box;
-cI cI cI cI cI lI
+cI cI2w9 cI cI2w6 cIw6 lIw17
 cB c c c c l.
-\ 	Quality	Phase	BW %	Rej dB	Typical Use
+\ 	Quality	T{
+\ Phase Response
+T}	T{
+Band-width
+T}	Rej dB	T{
+.na
+Typical Use
+T}
 \-q	T{
 .na
-quick & dirty
-T}	Lin.	n/a	\(~=30 @ Fs/4	T{
+quick
+T}	linear	n/a	T{
 .na
+\(~=30 @ \ Fs/4
+T}	T{
+.na
 playback on ancient hardware
 T}
-\-l	low	Lin.	80	100	T{
+\-l	low	linear	80%	100	T{
 .na
 playback on old hardware
 T}
-\-m	medium	Int.	99	100	audio playback
-\-h	high	Int.	99	125	T{
+\-m	medium	intermediate	95%	100	T{
 .na
+audio playback
+T}
+\-h	high	intermediate	95%	125	T{
+.na
 16-bit mastering (use with dither)
 T}
-\-v	very high	Int.	99	175	24-bit mastering\ 
+\-v	T{
+.na
+very high
+T}	intermediate	95%	175	24-bit mastering
 .TE
 .DT
 .SP
 where
-.B BW %
-is the percentage of the audio band that is preserved (based on the 3dB
-point) during sample rate conversion, and
-.B Rej dB
-is the level of noise rejection.  The default quality level is `high'
-(\fB\-h\fR).
+.I Band-width
+is the percentage of the audio frequency band that is preserved and
+.I Rej dB
+is the level of noise rejection.  Increasing levels of resampling
+quality come at the expense of increasing amounts of time to process the
+audio.  If no quality option is given, the quality level used is `high'.
 .SP
-The
-.B \-q
-algorithm uses cubic interpolation; the others use bandwidth-limited
-interpolation.
-The
-.B \-q
-and
-.B \-l
-algorithms have a `linear' phase response; for the others, the phase
-response is configurable, but defaults to `intermediate' (see below for
-more details).
+The `quick' algorithm uses cubic interpolation; all others use
+band-width limited interpolation.  The `quick' and `low' quality
+algorithms have a `linear' phase response; for `medium', `high' and
+`very high', the phase response is configurable (see below), but
+defaults to `intermediate'.
 .SP
-This effect is invoked automatically if SoX's \fB\-r\fR option specifies
-a rate that is different to that of the input file(s).  Alternatively,
-this effect may be invoked with the output rate parameter
-.I RATE
-and SoX's
+The
+.B rate
+effect is invoked automatically if SoX's \fB\-r\fR option specifies a
+rate that is different to that of the input file(s).  Alternatively, if
+this effect is given explicitly, then SoX's
 .B \-r
 option need not be given.  For example, the following two commands are
 equivalent:
 .EX
+.ne 2
 	sox input.au -r 48k output.au bass -3
 	sox input.au        output.au bass -3 rate 48k
 .EE
@@ -983,69 +995,104 @@
 .TE
 .DT
 .SP
-The following, advanced options apply only to the
-.BR \-m ,
-.B \-h
-and
-.B \-v
-algorithms and are used primarily to control the resampling filter's
-`ringing' (see
-http://ccrma.stanford.edu/~jos/filters/Linear_Phase_Really_Ideal.html
-for a description of this phenomenon).  Note that ringing control is a
-compromise: reducing it comes at the expense of reducing band-width
-and/or increasing aliasing.
-.SP
-The
-.B \-p
-.IR PHASE ,
-.BR \-M ,
-.B \-I
-and
-.B \-L
-options control the phase response of the filter that is used in the
-resampling process.  Any phase value from 0 to 100 may be given with
-.BR \-p ,
-though values greater than 50 are rarely useful.  The following specific
-values are noteworthy:
+The simple quality selection described above provides settings that
+satisfy the needs of the vast majority of resampling tasks.
+Occasionally, however, it may be desirable to fine-tune the resampler's
+filter response; this can be achieved using
+.IR override\ options ,
+as detailed in the following table:
 .TS
 center box;
-cB cI cI cI
-cB c cB c.
--p \fIvalue\fR	T{
-Phase response
-T}	Short form	T{
-Ratio of pre- to post- ringing
+lB lw52.
+\-M/\-I/\-L	Phase response = minimum/intermediate/linear
+\-p\ 0\-100	T{
+.na
+Any phase response (0 = minimum, 25 = intermediate, 50 = linear, 100 = maximum)
 T}
-0	minimum	\-M	0:1
-25	intermediate	\-I	0\*d2:0\*d8
-50	linear	\-L	0\*d5:0\*d5
-100	maximum	\ 	1:0
+\-s	Steep filter (band-width = 99%)
+\-b\ 74\-99.7	Any band-width %
+\-a	Allow aliasing above the pass-band
 .TE
 .DT
 .SP
+N.B.  Override options can not be used with the `quick' or `low'
+quality algorithms.
+.SP
+All resamplers use filters that can sometimes create `echo' (a.k.a.
+`ringing') artefacts with transient signals such as those that occur
+with `finger snaps' or other highly percussive sounds.  Such artefacts are
+much more noticable to the human ear if they occur before the transient
+(`pre-echo') than if they occur after it (`post-echo').  The phase
+response setting controls the distribution of any transient echo between
+`pre' and `post': with minimum phase, there is no pre-echo but the
+longest post-echo; with linear phase, pre and post echo are in equal
+amounts (in signal terms, but not audibility terms); the intermediate
+phase setting attempts to find the best compromise by selecting a small
+length (and level) of pre-echo and a medium lengthed post-echo.
+.SP
+Minimum, intermediate, or linear phase response is selected using the
+.BR \-M ,
+.BR \-I ,
+or
+.B \-L
+option; a custom phase response can be created with the
+.B \-p
+option.  Note that phase responses between `linear' and `maximum'
+(greater than 50) are rarely useful.
+.SP
+A resampler's band-width setting determines how much of the frequency
+content of the original signal (w.r.t. the orignal sample rate when
+up-sampling, or the new sample rate when down-sampling) is preserved
+during conversion.  The term `pass-band' is used to refer to all frequencies
+up to the band-width point (e.g. for 44\*d1kHz sampling rate, and a
+resampling band-width of 95%, the pass-band represents frequencies from
+0Hz (D.C.) to circa 21kHz).  Increasing the resampler's band-width
+results in a slower conversion and can increase transient echo
+artefacts (and vice versa).
+.SP
 The
+.B \-s
+`steep filter' option changes resampling band-width from the default 95%
+(based on the 3dB point), to 99%.  The
 .B \-b
-.I BANDWIDTH
-(74\-99\*d7 %) option allows the preserved audio bandwidth to be reduced
-from the default (99%) and thus also reduce ringing.  For example,
-changing the bandwidth to 95% (which, at 44100Hz sampling rate,
-still preserves frequencies up to 21kHz) reduces pre- and post- ringing
-by 80%.
+option allows the band-width to be set to any value in the range
+74\-99\*d7 %, but note that band-width values greater than 99% are not
+recommended for normal use as they can cause excessive transient echo.
 .SP
 If the
 .B \-a
-option is given aliasing above the pass-band is allowed; this reduces
-pre- and post- ringing by 42%.  Note that if this option is given, then
+option is given, then aliasing above the pass-band is allowed.  For
+example, with 44\*d1kHz sampling rate, and a
+resampling band-width of 95%, this means that frequency content above
+21kHz can be distorted; however, since this is above the pass-band (i.e.
+above the highest frequency of interest/audibility), this may not be a
+problem.  The benefits of allowing aliasing are reduced processing time,
+and reduced (by almost half) transient echo artefacts.
+Note that if this option is given, then
 the minimum band-width allowable with
 .B \-b
 increases to 85%.
 .SP
-For example, using both \fB\-b 95\fR and \fB\-a\fR reduces all ringing
-by
+Examples:
+.EX
+	sox input.wav -2 output.wav rate -s -a 44100 dither
+.EE
+default (high) quality resampling; overrides: steep filter, allow
+aliasing; to 44\*d1kHz sample rate; dither output to 2-byte (16-bit) WAV
+file.
+.EX
+	sox input.wav -3 output.aiff rate -v -L -b 90 48k
+.EE
+very high quality resampling; overrides: linear phase, band-width 90%;
+to 48k sample rate; store output to 3-byte (24-bit) AIFF file. 
+.TS
+center;
+c8 c8 c.
+*	*	*
+.TE
+.DT
 .SP
-	100 \- (100 \- 80) \(mu (100 \- 42)% = 88\*d4%
-.SP
-Note that the
+The
 .BR key ,
 .B speed
 and
--- a/src/rate.c
+++ b/src/rate.c
@@ -366,7 +366,7 @@
 #define last_stage p->stages[p->level]
 #define post_stage p->stages[p->level + 1]
 
-typedef enum {Default = -1, Quick, Low, Medium, High, Very, Ultra} quality_t;
+typedef enum {Default = -1, Quick, Low, Medium, High, Very} quality_t;
 
 static void rate_init(rate_t * p, rate_shared_t * shared, double factor,
     quality_t quality, int interp_order, double phase, double bandwidth,
@@ -376,7 +376,7 @@
 
   assert(factor > 0);
   p->factor = factor;
-  if (quality < Quick || quality > Ultra)
+  if (quality < Quick || quality > Very)
     quality = High;
   if (quality != Quick) {
     const int max_divisor = 2048;      /* Keep coef table size ~< 500kb */
@@ -449,9 +449,7 @@
     typedef struct {int len; sample_t const * h; double bw, a;} filter_t;
     static filter_t const filters[] = {
       {2 * array_length(half_fir_coefs_low) - 1, half_fir_coefs_low, 0,0},
-      {0, NULL, .986, 110}, {0, NULL, .986, 125},
-      {0, NULL, .986, 170}, {0, NULL, .996, 170},
-    };
+      {0, NULL, .931, 110}, {0, NULL, .931, 125}, {0, NULL, .931, 170}};
     filter_t const * f = &filters[quality - Low];
     double att = allow_aliasing? (34./33)* f->a : f->a;
     double bw = bandwidth? 1 - (1 - bandwidth / 100) / TO_3dB : f->bw;
@@ -572,14 +570,13 @@
 static int create(sox_effect_t * effp, int argc, char **argv)
 {
   priv_t * p = (priv_t *) effp->priv;
-  int c, callers_optind = optind, callers_opterr = opterr;
-  char * dummy_p, * found_at, * opts = "+i:b:p:MILaqlmhvu", * qopts = opts +11;
+  int c;
+  char * dummy_p, * found_at, * opts = "+i:b:p:MILasqlmhv", * qopts = opts +12;
 
   p->quality = -1;
   p->phase = 25;
   p->shared_ptr = &p->shared;
 
-  --argv, ++argc, optind = 1, opterr = 0;                /* re-jig for getopt */
   while ((c = getopt(argc, argv, opts)) != -1) switch (c) {
     GETOPT_NUMERIC('i', coef_interp, 1 , 3)
     GETOPT_NUMERIC('p', phase,  0 , 100)
@@ -587,11 +584,12 @@
     case 'M': p->phase =  0; break;
     case 'I': p->phase = 25; break;
     case 'L': p->phase = 50; break;
+    case 's': p->bandwidth = 99; break;
     case 'a': p->allow_aliasing = sox_true; break;
     default: if ((found_at = strchr(qopts, c))) p->quality = found_at - qopts;
       else {sox_fail("unknown option `-%c'", optopt); return lsx_usage(effp);}
   }
-  argc-=optind, argv+=optind, optind = callers_optind, opterr = callers_opterr;
+  argc -= optind, argv += optind;
 
   if ((unsigned)p->quality < 2 && (p->bandwidth || p->phase != 25 || p->allow_aliasing)) {
     sox_fail("override options not allowed with this quality level");
@@ -664,19 +662,27 @@
 sox_effect_handler_t const * sox_rate_effect_fn(void)
 {
   static sox_effect_handler_t handler = {
-    "rate", "[-q|-l|-m|-h|-v] [-p PHASE|-M|-I|-L] [-b BANDWIDTH] [-a] [RATE[k]]"
-    "\n\n\tQuality\t\tPhase\tBW %   Rej dB\tTypical Use"
-    "\n -q\tquick & dirty\tLin.\tn/a  ~30 @ Fs/4\tplayback on ancient hardware"
-    "\n -l\tlow\t\t\"\t80\t100\tplayback on old hardware"
-    "\n -m\tmedium\t\tInt.\t99\t100\taudio playback"
-    "\n -h\thigh\t\t\"\t99\t125\t16-bit master (use with dither)"
-    "\n -v\tvery high\t\"\t99\t175\t24-bit master"
-    "\n\nOverrides (for -m, -h, -v):"
-    "\n -p 0-100\t0=minimum, 25=intermediate, 50=linear, 100=maximum"
-    "\n -M/I/L\t\tphase=min./int./lin."
-    "\n -b 74-99.7\t%"
-    "\n -a\t\tallow aliasing"
-    , SOX_EFF_RATE, create, start, flow, drain, stop, NULL, sizeof(priv_t)
+    "rate", 0, SOX_EFF_RATE | SOX_EFF_GETOPT,
+    create, start, flow, drain, stop, 0, sizeof(priv_t)
   };
+  static char const * lines[] = {
+    "[-q|-l|-m|-h|-v] [override-options] RATE[k]",
+    "                    PHASE    BAND-",
+    "     QUALITY       RESPONSE  WIDTH  REJ dB   TYPICAL USE",
+    " -q  quick          linear   n/a  ~30 @ Fs/4 playback on ancient hardware",
+    " -l  low            linear   80%     100     playback on old hardware",
+    " -m  medium         interm.  95%     100     audio playback",
+    " -h  high (default) interm.  95%     125     16-bit mastering (use with dither)",
+    " -v  very high      interm.  95%     175     24-bit mastering",
+    "              OVERRIDE OPTIONS (only with -m, -h, -v)",
+    " -M/-I/-L     Phase response = minimum/intermediate/linear",
+    " -p 0-100     Any phase response (0 = minimum, 25 = intermediate,",
+    "              50 = linear, 100 = maximum)",
+    " -s           Steep filter (band-width = 99%)",
+    " -b 74-99.7   Any band-width %",
+    " -a           Allow aliasing above the pass-band",
+  };
+  static char * usage;
+  handler.usage = lsx_usage_lines(&usage, lines, array_length(lines));
   return &handler;
 }