ref: 89e9bf4a96bc03100918df8889e3987a821a2f75
parent: f1424234f9a2ad986f316a9731d31fad6047679b
author: robs <robs>
date: Wed Sep 17 03:55:17 EDT 2008
change rate default settings
--- a/soxeffect.7
+++ b/soxeffect.7
@@ -29,7 +29,7 @@
.SP
.fi
..
-.TH SoX 7 "August 5, 2008" "soxeffect" "Sound eXchange"
+.TH SoX 7 "September 16, 2008" "soxeffect" "Sound eXchange"
.SH NAME
SoX \- Sound eXchange, the Swiss Army knife of audio manipulation
.SH DESCRIPTION
@@ -912,64 +912,76 @@
.B tempo
effect for a description of the other parameters.
.TP
-\fBrate\fR [\fB\-q\fR\^|\^\fB\-l\fR\^|\^\fB\-m\fR\^|\^\fB\-h\fR\^|\^\fB\-v\fR] [\fB\-p\fR \fIPHASE\fR\^|\^\fB\-M\fR\^|\^\fB\-I\fR\^|\^\fB\-L\fR] [\fB\-b\fR \fIBANDWIDTH\fR] [\fB\-a\fR] [\fIRATE\fR[\fBk\fR]]
-Change the audio sampling rate (i.e. resample the audio) to the given
+\fBrate\fR [\fB\-q\fR\^|\^\fB\-l\fR\^|\^\fB\-m\fR\^|\^\fB\-h\fR\^|\^\fB\-v\fR] [override-options] \fIRATE\fR[\fBk\fR]
+Change the audio sampling rate (i.e. resample the audio) to any given
.I RATE
-using a quality level as follows:
+(even non-integer if this is supported by the output file format)
+using a quality level defined as follows:
.TS
center box;
-cI cI cI cI cI lI
+cI cI2w9 cI cI2w6 cIw6 lIw17
cB c c c c l.
-\ Quality Phase BW % Rej dB Typical Use
+\ Quality T{
+\ Phase Response
+T} T{
+Band-width
+T} Rej dB T{
+.na
+Typical Use
+T}
\-q T{
.na
-quick & dirty
-T} Lin. n/a \(~=30 @ Fs/4 T{
+quick
+T} linear n/a T{
.na
+\(~=30 @ \ Fs/4
+T} T{
+.na
playback on ancient hardware
T}
-\-l low Lin. 80 100 T{
+\-l low linear 80% 100 T{
.na
playback on old hardware
T}
-\-m medium Int. 99 100 audio playback
-\-h high Int. 99 125 T{
+\-m medium intermediate 95% 100 T{
.na
+audio playback
+T}
+\-h high intermediate 95% 125 T{
+.na
16-bit mastering (use with dither)
T}
-\-v very high Int. 99 175 24-bit mastering\
+\-v T{
+.na
+very high
+T} intermediate 95% 175 24-bit mastering
.TE
.DT
.SP
where
-.B BW %
-is the percentage of the audio band that is preserved (based on the 3dB
-point) during sample rate conversion, and
-.B Rej dB
-is the level of noise rejection. The default quality level is `high'
-(\fB\-h\fR).
+.I Band-width
+is the percentage of the audio frequency band that is preserved and
+.I Rej dB
+is the level of noise rejection. Increasing levels of resampling
+quality come at the expense of increasing amounts of time to process the
+audio. If no quality option is given, the quality level used is `high'.
.SP
-The
-.B \-q
-algorithm uses cubic interpolation; the others use bandwidth-limited
-interpolation.
-The
-.B \-q
-and
-.B \-l
-algorithms have a `linear' phase response; for the others, the phase
-response is configurable, but defaults to `intermediate' (see below for
-more details).
+The `quick' algorithm uses cubic interpolation; all others use
+band-width limited interpolation. The `quick' and `low' quality
+algorithms have a `linear' phase response; for `medium', `high' and
+`very high', the phase response is configurable (see below), but
+defaults to `intermediate'.
.SP
-This effect is invoked automatically if SoX's \fB\-r\fR option specifies
-a rate that is different to that of the input file(s). Alternatively,
-this effect may be invoked with the output rate parameter
-.I RATE
-and SoX's
+The
+.B rate
+effect is invoked automatically if SoX's \fB\-r\fR option specifies a
+rate that is different to that of the input file(s). Alternatively, if
+this effect is given explicitly, then SoX's
.B \-r
option need not be given. For example, the following two commands are
equivalent:
.EX
+.ne 2
sox input.au -r 48k output.au bass -3
sox input.au output.au bass -3 rate 48k
.EE
@@ -983,69 +995,104 @@
.TE
.DT
.SP
-The following, advanced options apply only to the
-.BR \-m ,
-.B \-h
-and
-.B \-v
-algorithms and are used primarily to control the resampling filter's
-`ringing' (see
-http://ccrma.stanford.edu/~jos/filters/Linear_Phase_Really_Ideal.html
-for a description of this phenomenon). Note that ringing control is a
-compromise: reducing it comes at the expense of reducing band-width
-and/or increasing aliasing.
-.SP
-The
-.B \-p
-.IR PHASE ,
-.BR \-M ,
-.B \-I
-and
-.B \-L
-options control the phase response of the filter that is used in the
-resampling process. Any phase value from 0 to 100 may be given with
-.BR \-p ,
-though values greater than 50 are rarely useful. The following specific
-values are noteworthy:
+The simple quality selection described above provides settings that
+satisfy the needs of the vast majority of resampling tasks.
+Occasionally, however, it may be desirable to fine-tune the resampler's
+filter response; this can be achieved using
+.IR override\ options ,
+as detailed in the following table:
.TS
center box;
-cB cI cI cI
-cB c cB c.
--p \fIvalue\fR T{
-Phase response
-T} Short form T{
-Ratio of pre- to post- ringing
+lB lw52.
+\-M/\-I/\-L Phase response = minimum/intermediate/linear
+\-p\ 0\-100 T{
+.na
+Any phase response (0 = minimum, 25 = intermediate, 50 = linear, 100 = maximum)
T}
-0 minimum \-M 0:1
-25 intermediate \-I 0\*d2:0\*d8
-50 linear \-L 0\*d5:0\*d5
-100 maximum \ 1:0
+\-s Steep filter (band-width = 99%)
+\-b\ 74\-99.7 Any band-width %
+\-a Allow aliasing above the pass-band
.TE
.DT
.SP
+N.B. Override options can not be used with the `quick' or `low'
+quality algorithms.
+.SP
+All resamplers use filters that can sometimes create `echo' (a.k.a.
+`ringing') artefacts with transient signals such as those that occur
+with `finger snaps' or other highly percussive sounds. Such artefacts are
+much more noticable to the human ear if they occur before the transient
+(`pre-echo') than if they occur after it (`post-echo'). The phase
+response setting controls the distribution of any transient echo between
+`pre' and `post': with minimum phase, there is no pre-echo but the
+longest post-echo; with linear phase, pre and post echo are in equal
+amounts (in signal terms, but not audibility terms); the intermediate
+phase setting attempts to find the best compromise by selecting a small
+length (and level) of pre-echo and a medium lengthed post-echo.
+.SP
+Minimum, intermediate, or linear phase response is selected using the
+.BR \-M ,
+.BR \-I ,
+or
+.B \-L
+option; a custom phase response can be created with the
+.B \-p
+option. Note that phase responses between `linear' and `maximum'
+(greater than 50) are rarely useful.
+.SP
+A resampler's band-width setting determines how much of the frequency
+content of the original signal (w.r.t. the orignal sample rate when
+up-sampling, or the new sample rate when down-sampling) is preserved
+during conversion. The term `pass-band' is used to refer to all frequencies
+up to the band-width point (e.g. for 44\*d1kHz sampling rate, and a
+resampling band-width of 95%, the pass-band represents frequencies from
+0Hz (D.C.) to circa 21kHz). Increasing the resampler's band-width
+results in a slower conversion and can increase transient echo
+artefacts (and vice versa).
+.SP
The
+.B \-s
+`steep filter' option changes resampling band-width from the default 95%
+(based on the 3dB point), to 99%. The
.B \-b
-.I BANDWIDTH
-(74\-99\*d7 %) option allows the preserved audio bandwidth to be reduced
-from the default (99%) and thus also reduce ringing. For example,
-changing the bandwidth to 95% (which, at 44100Hz sampling rate,
-still preserves frequencies up to 21kHz) reduces pre- and post- ringing
-by 80%.
+option allows the band-width to be set to any value in the range
+74\-99\*d7 %, but note that band-width values greater than 99% are not
+recommended for normal use as they can cause excessive transient echo.
.SP
If the
.B \-a
-option is given aliasing above the pass-band is allowed; this reduces
-pre- and post- ringing by 42%. Note that if this option is given, then
+option is given, then aliasing above the pass-band is allowed. For
+example, with 44\*d1kHz sampling rate, and a
+resampling band-width of 95%, this means that frequency content above
+21kHz can be distorted; however, since this is above the pass-band (i.e.
+above the highest frequency of interest/audibility), this may not be a
+problem. The benefits of allowing aliasing are reduced processing time,
+and reduced (by almost half) transient echo artefacts.
+Note that if this option is given, then
the minimum band-width allowable with
.B \-b
increases to 85%.
.SP
-For example, using both \fB\-b 95\fR and \fB\-a\fR reduces all ringing
-by
+Examples:
+.EX
+ sox input.wav -2 output.wav rate -s -a 44100 dither
+.EE
+default (high) quality resampling; overrides: steep filter, allow
+aliasing; to 44\*d1kHz sample rate; dither output to 2-byte (16-bit) WAV
+file.
+.EX
+ sox input.wav -3 output.aiff rate -v -L -b 90 48k
+.EE
+very high quality resampling; overrides: linear phase, band-width 90%;
+to 48k sample rate; store output to 3-byte (24-bit) AIFF file.
+.TS
+center;
+c8 c8 c.
+* * *
+.TE
+.DT
.SP
- 100 \- (100 \- 80) \(mu (100 \- 42)% = 88\*d4%
-.SP
-Note that the
+The
.BR key ,
.B speed
and
--- a/src/rate.c
+++ b/src/rate.c
@@ -366,7 +366,7 @@
#define last_stage p->stages[p->level]
#define post_stage p->stages[p->level + 1]
-typedef enum {Default = -1, Quick, Low, Medium, High, Very, Ultra} quality_t;
+typedef enum {Default = -1, Quick, Low, Medium, High, Very} quality_t;
static void rate_init(rate_t * p, rate_shared_t * shared, double factor,
quality_t quality, int interp_order, double phase, double bandwidth,
@@ -376,7 +376,7 @@
assert(factor > 0);
p->factor = factor;
- if (quality < Quick || quality > Ultra)
+ if (quality < Quick || quality > Very)
quality = High;
if (quality != Quick) {
const int max_divisor = 2048; /* Keep coef table size ~< 500kb */
@@ -449,9 +449,7 @@
typedef struct {int len; sample_t const * h; double bw, a;} filter_t;
static filter_t const filters[] = {
{2 * array_length(half_fir_coefs_low) - 1, half_fir_coefs_low, 0,0},
- {0, NULL, .986, 110}, {0, NULL, .986, 125},
- {0, NULL, .986, 170}, {0, NULL, .996, 170},
- };
+ {0, NULL, .931, 110}, {0, NULL, .931, 125}, {0, NULL, .931, 170}};
filter_t const * f = &filters[quality - Low];
double att = allow_aliasing? (34./33)* f->a : f->a;
double bw = bandwidth? 1 - (1 - bandwidth / 100) / TO_3dB : f->bw;
@@ -572,14 +570,13 @@
static int create(sox_effect_t * effp, int argc, char **argv)
{
priv_t * p = (priv_t *) effp->priv;
- int c, callers_optind = optind, callers_opterr = opterr;
- char * dummy_p, * found_at, * opts = "+i:b:p:MILaqlmhvu", * qopts = opts +11;
+ int c;
+ char * dummy_p, * found_at, * opts = "+i:b:p:MILasqlmhv", * qopts = opts +12;
p->quality = -1;
p->phase = 25;
p->shared_ptr = &p->shared;
- --argv, ++argc, optind = 1, opterr = 0; /* re-jig for getopt */
while ((c = getopt(argc, argv, opts)) != -1) switch (c) {
GETOPT_NUMERIC('i', coef_interp, 1 , 3)
GETOPT_NUMERIC('p', phase, 0 , 100)
@@ -587,11 +584,12 @@
case 'M': p->phase = 0; break;
case 'I': p->phase = 25; break;
case 'L': p->phase = 50; break;
+ case 's': p->bandwidth = 99; break;
case 'a': p->allow_aliasing = sox_true; break;
default: if ((found_at = strchr(qopts, c))) p->quality = found_at - qopts;
else {sox_fail("unknown option `-%c'", optopt); return lsx_usage(effp);}
}
- argc-=optind, argv+=optind, optind = callers_optind, opterr = callers_opterr;
+ argc -= optind, argv += optind;
if ((unsigned)p->quality < 2 && (p->bandwidth || p->phase != 25 || p->allow_aliasing)) {
sox_fail("override options not allowed with this quality level");
@@ -664,19 +662,27 @@
sox_effect_handler_t const * sox_rate_effect_fn(void)
{
static sox_effect_handler_t handler = {
- "rate", "[-q|-l|-m|-h|-v] [-p PHASE|-M|-I|-L] [-b BANDWIDTH] [-a] [RATE[k]]"
- "\n\n\tQuality\t\tPhase\tBW % Rej dB\tTypical Use"
- "\n -q\tquick & dirty\tLin.\tn/a ~30 @ Fs/4\tplayback on ancient hardware"
- "\n -l\tlow\t\t\"\t80\t100\tplayback on old hardware"
- "\n -m\tmedium\t\tInt.\t99\t100\taudio playback"
- "\n -h\thigh\t\t\"\t99\t125\t16-bit master (use with dither)"
- "\n -v\tvery high\t\"\t99\t175\t24-bit master"
- "\n\nOverrides (for -m, -h, -v):"
- "\n -p 0-100\t0=minimum, 25=intermediate, 50=linear, 100=maximum"
- "\n -M/I/L\t\tphase=min./int./lin."
- "\n -b 74-99.7\t%"
- "\n -a\t\tallow aliasing"
- , SOX_EFF_RATE, create, start, flow, drain, stop, NULL, sizeof(priv_t)
+ "rate", 0, SOX_EFF_RATE | SOX_EFF_GETOPT,
+ create, start, flow, drain, stop, 0, sizeof(priv_t)
};
+ static char const * lines[] = {
+ "[-q|-l|-m|-h|-v] [override-options] RATE[k]",
+ " PHASE BAND-",
+ " QUALITY RESPONSE WIDTH REJ dB TYPICAL USE",
+ " -q quick linear n/a ~30 @ Fs/4 playback on ancient hardware",
+ " -l low linear 80% 100 playback on old hardware",
+ " -m medium interm. 95% 100 audio playback",
+ " -h high (default) interm. 95% 125 16-bit mastering (use with dither)",
+ " -v very high interm. 95% 175 24-bit mastering",
+ " OVERRIDE OPTIONS (only with -m, -h, -v)",
+ " -M/-I/-L Phase response = minimum/intermediate/linear",
+ " -p 0-100 Any phase response (0 = minimum, 25 = intermediate,",
+ " 50 = linear, 100 = maximum)",
+ " -s Steep filter (band-width = 99%)",
+ " -b 74-99.7 Any band-width %",
+ " -a Allow aliasing above the pass-band",
+ };
+ static char * usage;
+ handler.usage = lsx_usage_lines(&usage, lines, array_length(lines));
return &handler;
}