ref: 657353a83b907241f6151ddd7894c4a69ea042d7
parent: beca3a7dde21fe0a09542b49166bc94e954ae8e7
author: robs <robs>
date: Thu Sep 4 03:26:14 EDT 2008
add ringing control to rate effect
--- a/ChangeLog
+++ b/ChangeLog
@@ -28,7 +28,8 @@
o New --output option to write to multiple files in one run.
Only useful with certain effects like trim and silence. (cbagwell)
o Display SoX build environment information with -V -V. (robs)
- o Add minimum-phase option to `rate' effect. (robs)
+ o Change to intermediate phase for `rate' effect; phase, band-width
+ and aliasing now configurable. (robs)
Other bug fixes:
--- a/soxeffect.7
+++ b/soxeffect.7
@@ -919,52 +919,58 @@
play snare.flac phaser 0.6 0.66 3 0.6 2 -t
.EE
.TP
-\fBrate\fR [\fB\-q\fR\^|\^\fB\-l\fR\^|\^\fB\-m\fR\^|\^\fB\-h\fR\^|\^\fB\-v\fR] [\fB\-p\fR] [\fIRATE\fR[\fBk\fR]]
-Change the audio sampling rate (i.e. resample the audio)
+\fBrate\fR [\fB\-q\fR\^|\^\fB\-l\fR\^|\^\fB\-m\fR\^|\^\fB\-h\fR\^|\^\fB\-v\fR] [\fB\-p\fR \fIPHASE\fR\^|\^\fB\-M\fR\^|\^\fB\-I\fR\^|\^\fB\-L\fR] [\fB\-b\fR \fIBANDWIDTH\fR] [\fB\-a\fR] [\fIRATE\fR[\fBk\fR]]
+Change the audio sampling rate (i.e. resample the audio) to the given
+.I RATE
using a quality level as follows:
.TS
center box;
-cI cI cI cI lI
-cB c c c l.
-\ Quality BW % Rej dB Typical Use
+cI cI cI cI cI lI
+cB c c c c l.
+\ Quality Phase BW % Rej dB Typical Use
\-q T{
+.na
quick & dirty
-T} n/a \(~=30 @ Fs/4 T{
+T} Lin. n/a \(~=30 @ Fs/4 T{
.na
playback on ancient hardware
T}
-\-l low 80 100 T{
+\-l low Lin. 80 100 T{
.na
playback on old hardware
T}
-\-m medium 99 100 audio playback
-\-h high 99 125 T{
+\-m medium Int. 99 100 audio playback
+\-h high Int. 99 125 T{
.na
16-bit mastering (use with dither)
T}
-\-v very high 99 175 24-bit mastering\
+\-v very high Int. 99 175 24-bit mastering\
.TE
.DT
.SP
where
.B BW %
-is the percentage of the audio band that is preserved (based on the 3dB point)
-during sample rate conversion, and
+is the percentage of the audio band that is preserved (based on the 3dB
+point) during sample rate conversion, and
.B Rej dB
-is the level of noise rejection.
-The default quality level is `high' (\fB\-h\fR).
-The \-q algorithm uses cubic interpolation; the others use linear-phase
-bandwidth-limited interpolation.
+is the level of noise rejection. The default quality level is `high'
+(\fB\-h\fR).
.SP
-If the
-.B
-\-p
-option is given\*mvalid only with quality = medium or above\*mthen the algorithm
-used is minimum-phase instead of linear-phase.
+The
+.B \-q
+algorithm uses cubic interpolation; the others use bandwidth-limited
+interpolation.
+The
+.B \-q
+and
+.B \-l
+algorithms have a `linear' phase response; for the others, the phase
+response is configurable, but defaults to `intermediate' (see below for
+more details).
.SP
-This effect is invoked automatically if SoX's \fB\-r\fR option specifies a
-rate that is different to that of the input file(s). Alternatively, this
-effect may be invoked with the output rate parameter
+This effect is invoked automatically if SoX's \fB\-r\fR option specifies
+a rate that is different to that of the input file(s). Alternatively,
+this effect may be invoked with the output rate parameter
.I RATE
and SoX's
.B \-r
@@ -972,11 +978,88 @@
equivalent:
.EX
sox input.au -r 48k output.au bass -3
- sox input.au output.au bass -3 rate 48k
+ sox input.au output.au bass -3 rate 48k
.EE
-though the second command is more flexible as it allows a
+though the second command is more flexible as it allows
.B rate
-quality option to be given, and it allows the effects to be ordered arbitrarily.
+options to be given, and allows the effects to be ordered arbitrarily.
+.TS
+center;
+c8 c8 c.
+* * *
+.TE
+.DT
+.SP
+The following, advanced options apply only to the
+.BR \-m ,
+.B \-h
+and
+.B \-v
+algorithms and are used primarily to control the resampling filter's
+`ringing' (see
+http://ccrma.stanford.edu/~jos/filters/Linear_Phase_Really_Ideal.html
+for a description of this phenomenon). Note that ringing control is a
+compromise: reducing it comes at the expense of reducing band-width
+and/or increasing aliasing.
+.SP
+The
+.B \-p
+.IR PHASE ,
+.BR \-M ,
+.B \-I
+and
+.B \-L
+options control the phase response of the filter that is used in the
+resampling process. Any phase value from 0 to 100 may be given with
+.BR \-p ,
+though values greater than 50 are rarely useful. The following specific
+values are noteworthy:
+.TS
+center box;
+cB cI cI cI
+cB c cB c.
+-p \fIvalue\fR T{
+Phase response
+T} Short form T{
+Ratio of pre- to post- ringing
+T}
+0 minimum \-M 0:1
+25 intermediate \-I 0\*d2:0\*d8
+50 linear \-L 0\*d5:0\*d5
+100 maximum \ 1:0
+.TE
+.DT
+.SP
+The
+.B \-b
+.I BANDWIDTH
+(74\-99\*d7 %) option allows the preserved audio bandwidth to be reduced
+from the default (99%) and thus also reduce ringing. For example,
+changing the bandwidth to 95% (which, at 44100Hz sampling rate,
+still preserves frequencies up to 21kHz) reduces pre- and post- ringing
+by 80%.
+.SP
+If the
+.B \-a
+option is given aliasing above the pass-band is allowed; this reduces
+pre- and post- ringing by 42%. Note that if this option is given, then
+the minimum band-width allowable with
+.B \-b
+increases to 85%.
+.SP
+For example, using both \fB\-b 95\fR and \fB\-a\fR reduces all ringing
+by
+.SP
+ 100 \- (100 \- 80) \(mu (100 \- 42)% = 88\*d4%
+.SP
+Note that the
+.BR key ,
+.B speed
+and
+.B tempo
+effects all use the
+.B rate
+effect at their core.
.SP
See also
.BR resample ,
--- a/src/rate.c
+++ b/src/rate.c
@@ -24,6 +24,7 @@
#include "sox_i.h"
#include "fft4g.h"
+#include "getopt.h"
#define FIFO_SIZE_T int
#include "fifo.h"
#include <assert.h>
@@ -203,10 +204,18 @@
return h;
}
+#define TO_6dB .5869
+#define TO_3dB ((2/3.) * (.5 + TO_6dB))
+#define MAX_TBW0 36.
+#define MAX_TBW0A (MAX_TBW0 / (1 + TO_3dB))
+#define MAX_TBW3 floor(MAX_TBW0 * TO_3dB)
+#define MAX_TBW3A floor(MAX_TBW0A * TO_3dB)
+
static double * design_lpf(
double Fp, /* End of pass-band; ~= 0.01dB point */
double Fc, /* Start of stop-band */
double Fn, /* Nyquist freq; e.g. 0.5, 1, PI */
+ sox_bool allow_aliasing,
double att, /* Stop-band attenuation in dB */
int * num_taps, /* (Single phase.) 0: value will be estimated */
int k) /* Number of phases; 0 for single-phase */
@@ -213,8 +222,10 @@
{
double tr_bw, beta;
+ if (allow_aliasing)
+ Fc += (Fc - Fp) * TO_3dB;
Fp /= Fn, Fc /= Fn; /* Normalise to Fn = 1 */
- tr_bw = .5869 * (Fc - Fp); /* Transition band-width: 6dB to stop points */
+ tr_bw = TO_6dB * (Fc - Fp); /* Transition band-width: 6dB to stop points */
if (*num_taps == 0) { /* TODO this could be cleaner, esp. for k != 0 */
double n160 = (.0425* att - 1.4) / tr_bw; /* Half order for att = 160 */
@@ -229,39 +240,70 @@
return make_lpf(*num_taps, (Fc - tr_bw) / k, beta, (double)k);
}
-static void to_minimum_phase(double * h, int len, rate_shared_t * p)
+static void fir_to_phase(rate_shared_t * p, double * * h, int * len,
+ int * post_len, double phase0)
{
- double * work;
- int work_len, n = len, i;
- for (work_len = 32; n > 1; work_len <<= 1, n >>= 1);
+ double * work, phase = (phase0 > 50 ? 100 - phase0 : phase0) / 50;
+ int work_len, begin, end, peak = 0, i = *len;
+ for (work_len = 32; i > 1; work_len <<= 1, i >>= 1);
if (!p->bit_rev_table) {
p->bit_rev_table = calloc(dft_br_len(2*work_len),sizeof(*p->bit_rev_table));
p->sin_cos_table = calloc(dft_sc_len(2*work_len),sizeof(*p->sin_cos_table));
}
- work = calloc(2 * work_len, sizeof(*work));
- for (i = 0; i < len; ++i) work[2 * i] = h[i];
+ work = calloc(work_len, sizeof(*work));
+ for (i = 0; i < *len; ++i) work[i] = (*h)[i];
- cdft(2 * work_len, 1, work, p->bit_rev_table, p->sin_cos_table);
- for (i = 0; i < work_len; ++i) {
- work[2 * i] = log(sqrt(sqr(work[2 * i]) + sqr(work[2 * i + 1])));
- work[2 * i + 1] = 0;
+ rdft(work_len, 1, work, p->bit_rev_table, p->sin_cos_table); /* Cepstrum: */
+ work[0] = log(fabs(work[0])), work[1] = log(fabs(work[1]));
+ for (i = 2; i < work_len; i += 2) {
+ work[i] = log(sqrt(sqr(work[i]) + sqr(work[i + 1])));
+ work[i + 1] = 0;
}
- cdft(2 * work_len, -1, work, p->bit_rev_table, p->sin_cos_table);
- for (i = 0; i < 2 * work_len; ++i) work[i] *= 1. / work_len;
-
- for (i = 2; i < work_len; ++i)
+ rdft(work_len, -1, work, p->bit_rev_table, p->sin_cos_table);
+ for (i = 0; i < work_len; ++i) work[i] *= 2. / work_len;
+ for (i = 1; i < work_len / 2; ++i) { /* Window to reject acausal components */
work[i] *= 2;
- memset(work + work_len + 2, 0, (work_len - 2) * sizeof(*work));
+ work[i + work_len / 2] = 0;
+ }
+ rdft(work_len, 1, work, p->bit_rev_table, p->sin_cos_table);
+ /* Some DFTs require phase unwrapping here, but rdft seems not to. */
- cdft(2 * work_len, 1, work, p->bit_rev_table, p->sin_cos_table);
- for (i = 0; i < work_len; ++i) {
- double x = exp(work[2 * i]);
- work[2 * i] = x * cos(work[2 * i + 1]);
- work[2 * i + 1] = x * sin(work[2 * i + 1]);
+ for (i = 2; i < work_len; i += 2) /* Interpolate between linear & min phase */
+ work[i + 1] = phase * M_PI * .5 * i + (1 - phase) * work[i + 1];
+
+ work[0] = exp(work[0]), work[1] = exp(work[1]);
+ for (i = 2; i < work_len; i += 2) {
+ double x = exp(work[i]);
+ work[i ] = x * cos(work[i + 1]);
+ work[i + 1] = x * sin(work[i + 1]);
}
- cdft(2 * work_len, -1, work, p->bit_rev_table, p->sin_cos_table);
- for (i = 0; i < len; ++i) h[i] = work[2 * i] * 1. / work_len;
+ rdft(work_len, -1, work, p->bit_rev_table, p->sin_cos_table);
+ for (i = 0; i < work_len; ++i) work[i] *= 2. / work_len;
+
+ for (i = 1; i < work_len; ++i) if (work[i] > work[peak]) /* Find peak. */
+ peak = i; /* N.B. peak > 0 */
+
+ if (phase == 0)
+ begin = 0;
+ else if (phase == 1)
+ begin = 1 + (work_len - *len) / 2;
+ else {
+ if (peak < work_len / 4) { /* Low phases can wrap impulse, so unwrap: */
+ memmove(work + work_len / 4, work, work_len / 2 * sizeof(*work));
+ memmove(work, work + work_len * 3 / 4, work_len / 4 * sizeof(*work));
+ peak += work_len / 4;
+ }
+ begin = (.997 - (2 - phase) * .22) * *len + .5;
+ end = (.997 + (0 - phase) * .22) * *len + .5;
+ begin = peak - begin - (begin & 1);
+ end = peak + 1 + end + (end & 1);
+ *len = end - begin;
+ *h = realloc(*h, *len * sizeof(**h));
+ }
+ for (i = 0; i < *len; ++i)
+ (*h)[i] = work[begin + (phase0 > 50 ? *len - 1 - i : i)];
+ *post_len = begin + *len - (peak + 1);
free(work);
}
@@ -276,7 +318,7 @@
static void half_band_filter_init(rate_shared_t * p, unsigned which,
int num_taps, sample_t const h[], double Fp, double atten, int multiplier,
- sox_bool minimum_phase)
+ double phase, sox_bool allow_aliasing)
{
half_band_t * f = &p->half_band[which];
int dft_length, i;
@@ -292,15 +334,20 @@
f->post_peak = num_taps / 2;
}
else {
- double * h = design_lpf(Fp, 1., 2., atten, &num_taps, 0);
- if (minimum_phase)
- to_minimum_phase(h, num_taps, p);
+ /* Empirical adjustment to negate att degradation with intermediate phase */
+ double att = phase && phase != 50 && phase != 100? atten * (34./33) : atten;
+
+ double * h = design_lpf(Fp, 1., 2., allow_aliasing, att, &num_taps, 0);
+
+ if (phase != 50)
+ fir_to_phase(p, &h, &num_taps, &f->post_peak, phase);
+ else f->post_peak = num_taps / 2;
+
dft_length = set_dft_length(num_taps);
f->coefs = calloc(dft_length, sizeof(*f->coefs));
for (i = 0; i < num_taps; ++i)
f->coefs[(i + dft_length - num_taps + 1) & (dft_length - 1)]
= h[i] / dft_length * 2 * multiplier;
- f->post_peak = minimum_phase? (int)(num_taps * 0.997 + .5) : num_taps / 2;
free(h);
}
assert(num_taps & 1);
@@ -332,7 +379,8 @@
typedef enum {Default = -1, Quick, Low, Medium, High, Very, Ultra} quality_t;
static void rate_init(rate_t * p, rate_shared_t * shared, double factor,
- quality_t quality, int interp_order, sox_bool minimum_phase)
+ quality_t quality, int interp_order, double phase, double bandwidth,
+ sox_bool allow_aliasing)
{
int i, mult, divisor = 1;
@@ -392,7 +440,7 @@
if (!last_stage.shared->poly_fir_coefs) {
int num_taps = 0, phases = divisor == 1? (1 << f1->phase_bits) : divisor;
raw_coef_t * coefs =
- design_lpf(f->pass, f->stop, 1., f->att, &num_taps, phases);
+ design_lpf(f->pass, f->stop, 1., sox_false, f->att, &num_taps, phases);
assert(num_taps == f->num_coefs * phases - 1);
last_stage.shared->poly_fir_coefs =
prepare_coefs(coefs, f->num_coefs, phases, interp_order, mult);
@@ -412,23 +460,26 @@
static filter_t const filters[] = {
{2 * array_length(half_fir_coefs_low) - 1, half_fir_coefs_low, 0,0},
{0, NULL, .986, 110}, {0, NULL, .986, 125},
- {0, NULL, .986, 165}, {0, NULL, .996, 165},
+ {0, NULL, .986, 170}, {0, NULL, .996, 170},
};
filter_t const * f = &filters[quality - Low];
+ double att = allow_aliasing? (34./33)* f->a : f->a;
+ double bw = bandwidth? 1 - (1 - bandwidth / 100) / TO_3dB : f->bw;
+ double min = 1 - (allow_aliasing? MAX_TBW0A : MAX_TBW0) / 100;
assert((size_t)(quality - Low) < array_length(filters));
- half_band_filter_init(shared, p->upsample, f->len, f->h, f->bw, f->a, mult, minimum_phase);
+ half_band_filter_init(shared, p->upsample, f->len, f->h, bw, att, mult, phase, allow_aliasing);
if (p->upsample) {
pre_stage.fn = double_sample; /* Finish off setting up pre-stage */
pre_stage.preload = shared->half_band[1].post_peak >> 1;
/* Start setting up post-stage; TODO don't use dft for short filters */
- if ((1 - p->factor) / (1 - f->bw) > 2)
- half_band_filter_init(shared, 0, 0, NULL, max(p->factor, .64), f->a, 1, sox_false);
+ if ((1 - p->factor) / (1 - bw) > 2)
+ half_band_filter_init(shared, 0, 0, NULL, max(p->factor, min), att, 1, phase, allow_aliasing);
else shared->half_band[0] = shared->half_band[1];
}
else if (p->level > 0 && p->output_stage_num > p->level) {
- double pass = f->bw * divisor / factor / 2;
- if ((1 - pass) / (1 - f->bw) > 2)
- half_band_filter_init(shared, 1, 0, NULL, max(pass, .64), f->a, 1, sox_false);
+ double pass = bw * divisor / factor / 2;
+ if ((1 - pass) / (1 - bw) > 2)
+ half_band_filter_init(shared, 1, 0, NULL, max(pass, min), att, 1, phase, allow_aliasing);
}
post_stage.fn = half_sample;
post_stage.preload = shared->half_band[0].post_peak;
@@ -523,8 +574,9 @@
typedef struct {
sox_rate_t out_rate;
- int quality, coef_interp;
- sox_bool minimum_phase;
+ int quality;
+ double coef_interp, phase, bandwidth;
+ sox_bool allow_aliasing;
rate_t rate;
rate_shared_t shared, * shared_ptr;
} priv_t;
@@ -532,20 +584,37 @@
static int create(sox_effect_t * effp, int argc, char **argv)
{
priv_t * p = (priv_t *) effp->priv;
- char * dummy_p, * found_at, * bws = "-qlmhvu", * nums = "123";
+ int c, callers_optind = optind, callers_opterr = opterr;
+ char * dummy_p, * found_at, * opts = "+i:b:p:MILaqlmhvu", * qopts = opts +11;
- p->quality = p->coef_interp = -1;
+ p->quality = -1;
+ p->phase = 25;
p->shared_ptr = &p->shared;
- while (argc && strchr(bws, *argv[0])) {
- char c, * q = *argv[0] == '-'? *argv + 1 : *argv;
- while ((c = *q++)) {
- if ((found_at = strchr(bws+1, c))) p->quality = found_at - bws -1;
- else if ((found_at = strchr(nums, c))) p->coef_interp = found_at - nums;
- else if (c == 'p') p->minimum_phase = sox_true;
- else return lsx_usage(effp);
- }
- argc--; argv++;
+
+ --argv, ++argc, optind = 1, opterr = 0; /* re-jig for getopt */
+ while ((c = getopt(argc, argv, opts)) != -1) switch (c) {
+ GETOPT_NUMERIC('i', coef_interp, 1 , 3)
+ GETOPT_NUMERIC('p', phase, 0 , 100)
+ GETOPT_NUMERIC('b', bandwidth, 100 - MAX_TBW3, 99.7)
+ case 'M': p->phase = 0; break;
+ case 'I': p->phase = 25; break;
+ case 'L': p->phase = 50; break;
+ case 'a': p->allow_aliasing = sox_true; break;
+ default: if ((found_at = strchr(qopts, c))) p->quality = found_at - qopts;
+ else {sox_fail("unknown option `-%c'", optopt); return lsx_usage(effp);}
}
+ argc-=optind, argv+=optind, optind = callers_optind, opterr = callers_opterr;
+
+ if ((unsigned)p->quality < 2 && (p->bandwidth || p->phase != 25 || p->allow_aliasing)) {
+ sox_fail("override options not allowed with this quality level");
+ return SOX_EOF;
+ }
+
+ if (p->bandwidth && p->bandwidth < 100 - MAX_TBW3A && p->allow_aliasing) {
+ sox_fail("minimum allowed bandwidth with aliasing is %g%%", 100 - MAX_TBW3A);
+ return SOX_EOF;
+ }
+
if (argc) {
if ((p->out_rate = lsx_parse_frequency(*argv, &dummy_p)) <= 0 || *dummy_p)
return lsx_usage(effp);
@@ -565,7 +634,8 @@
effp->out_signal.channels = effp->in_signal.channels;
effp->out_signal.rate = out_rate;
- rate_init(&p->rate, p->shared_ptr, effp->in_signal.rate / out_rate, p->quality, p->coef_interp, p->minimum_phase);
+ rate_init(&p->rate, p->shared_ptr, effp->in_signal.rate / out_rate,
+ p->quality, (int)p->coef_interp - 1, p->phase, p->bandwidth, p->allow_aliasing);
return SOX_SUCCESS;
}
@@ -606,15 +676,18 @@
sox_effect_handler_t const * sox_rate_effect_fn(void)
{
static sox_effect_handler_t handler = {
- "rate", "[-q|-l|-m|-h|-v] [-p] [RATE[k]]"
- "\n\tQuality BW % Rej dB Typical Use"
- "\n -q\tquick & dirty n/a ~30 @ Fs/4 playback on ancient hardware"
- "\n -l\tlow 80 100 playback on old hardware"
- "\n -m\tmedium 99 100 audio playback"
- "\n -h\thigh 99 125 16-bit mastering (use with dither)"
- "\n -v\tvery high 99 175 24-bit mastering"
- /* "\n -u\tultra high 99.7 175 " */
- "\n\n -p\tminimum phase"
+ "rate", "[-q|-l|-m|-h|-v] [-p PHASE|-M|-I|-L] [-b BANDWIDTH] [-a] [RATE[k]]"
+ "\n\n\tQuality\t\tPhase\tBW % Rej dB\tTypical Use"
+ "\n -q\tquick & dirty\tLin.\tn/a ~30 @ Fs/4\tplayback on ancient hardware"
+ "\n -l\tlow\t\t\"\t80\t100\tplayback on old hardware"
+ "\n -m\tmedium\t\tInt.\t99\t100\taudio playback"
+ "\n -h\thigh\t\t\"\t99\t125\t16-bit master (use with dither)"
+ "\n -v\tvery high\t\"\t99\t175\t24-bit master"
+ "\n\nOverrides (for -m, -h, -v):"
+ "\n -p 0-100\t0=minimum, 25=intermediate, 50=linear, 100=maximum"
+ "\n -M/I/L\t\tphase=min./int./lin."
+ "\n -b 74-99.7\t%"
+ "\n -a\t\tallow aliasing"
, SOX_EFF_RATE, create, start, flow, drain, stop, NULL, sizeof(priv_t)
};
return &handler;