ref: eb205813c2efd425674e14462706aa1cd9a62190
parent: bea27204158fb70c8643450de60c5177ebcce1a0
author: Rob Sykes <robs@users.sourceforge.net>
date: Mon Mar 5 13:55:38 EST 2012
speed-ups for particular rational factors
--- a/src/rate.c
+++ b/src/rate.c
@@ -67,9 +67,9 @@
return result;
}
-typedef struct { /* Data that are shared between channels and filters */
+typedef struct { /* Data that are shared between channels and stages */
sample_t * poly_fir_coefs;
- dft_filter_t half_band[2]; /* [0]: halve; [1]: down/up: halve/double */
+ dft_filter_t half_band[2]; /* Div or mul by n */
} rate_shared_t;
struct stage;
@@ -80,7 +80,7 @@
int pre; /* Number of past samples to store */
int pre_post; /* pre + number of future samples to store */
int preload; /* Number of zero samples to pre-load the fifo */
- int which; /* Which of the 2 half-band filters to use */
+ int which; /* Which, if any, of the 2 dft filters to use */
stage_fn_t fn;
/* For poly_fir & spline: */
union { /* 32bit.32bit fixed point arithmetic */
@@ -94,6 +94,7 @@
} at, step;
int divisor; /* For step: > 1 for rational; 1 otherwise */
double out_in_ratio;
+ int rem, tuple;
} stage_t;
#define stage_occupancy(s) max(0, fifo_occupancy(&(s)->fifo) - (s)->pre_post)
@@ -132,7 +133,6 @@
num_in -= f->dft_length - overlap;
output = fifo_reserve(output_fifo, f->dft_length);
- fifo_trim_by(output_fifo, (f->dft_length + overlap) >> 1);
memcpy(output, input, f->dft_length * sizeof(*output));
lsx_safe_rdft(f->dft_length, 1, output);
@@ -145,8 +145,10 @@
}
lsx_safe_rdft(f->dft_length, -1, output);
- for (j = 1, i = 2; i < f->dft_length - overlap; ++j, i += 2)
+ for (j = 0, i = p->rem; i < f->dft_length - overlap; ++j, i += p->tuple)
output[j] = output[i];
+ p->rem = i - (f->dft_length - overlap);
+ fifo_trim_by(output_fifo, f->dft_length - j);
}
}
@@ -155,18 +157,21 @@
sample_t * output;
int i, j, num_in = max(0, fifo_occupancy(&p->fifo));
rate_shared_t const * s = p->shared;
- dft_filter_t const * f = &s->half_band[1];
+ dft_filter_t const * f = &s->half_band[p->which];
int const overlap = f->num_taps - 1;
- while (num_in > f->dft_length >> 1) {
+ while (p->rem + p->tuple * num_in >= f->dft_length) {
+ div_t divd = div(f->dft_length - overlap - p->rem + p->tuple - 1, p->tuple);
sample_t const * input = fifo_read_ptr(&p->fifo);
- fifo_read(&p->fifo, (f->dft_length - overlap) >> 1, NULL);
- num_in -= (f->dft_length - overlap) >> 1;
+ fifo_read(&p->fifo, divd.quot, NULL);
+ num_in -= divd.quot;
output = fifo_reserve(output_fifo, f->dft_length);
fifo_trim_by(output_fifo, overlap);
- for (j = i = 0; i < f->dft_length; ++j, i += 2)
- output[i] = input[j], output[i+1] = 0;
+ memset(output, 0, f->dft_length * sizeof(*output));
+ for (j = 0, i = p->rem; i < f->dft_length; ++j, i += p->tuple)
+ output[i] = input[j];
+ p->rem = p->tuple - 1 - divd.rem;
lsx_safe_rdft(f->dft_length, 1, output);
output[0] *= f->coefs[0];
@@ -180,9 +185,9 @@
}
}
-static void half_band_filter_init(rate_shared_t * p, unsigned which,
- int num_taps, sample_t const h[], double Fp, double att, int multiplier,
- double phase, sox_bool allow_aliasing)
+static void init_dft_filter(rate_shared_t * p, unsigned which, int num_taps,
+ sample_t const h[], double Fp, double Fc, double Fn, double att,
+ int multiplier, double phase, sox_bool allow_aliasing)
{
dft_filter_t * f = &p->half_band[which];
int dft_length, i;
@@ -198,7 +203,7 @@
f->post_peak = num_taps / 2;
}
else {
- double * h2 = lsx_design_lpf(Fp, 1., 2., allow_aliasing, att, &num_taps, 0);
+ double * h2 = lsx_design_lpf(Fp, Fc, Fn, allow_aliasing, att, &num_taps, 0, -1.);
if (phase != 50)
lsx_fir_to_phase(&h2, &num_taps, &f->post_peak, phase);
@@ -214,8 +219,8 @@
assert(num_taps & 1);
f->num_taps = num_taps;
f->dft_length = dft_length;
- lsx_debug("fir_len=%i dft_length=%i Fp=%g att=%g mult=%i",
- num_taps, dft_length, Fp, att, multiplier);
+ lsx_debug("fir_len=%i dft_length=%i Fp=%g Fc=%g Fn=%g att=%g mult=%i",
+ num_taps, dft_length, Fp, Fc, Fn, att, multiplier);
lsx_safe_rdft(dft_length, 1, f->coefs);
}
@@ -223,7 +228,7 @@
typedef struct {
double factor;
- size_t samples_in, samples_out;
+ uint64_t samples_in, samples_out;
int level, input_stage_num, output_stage_num;
sox_bool upsample;
stage_t * stages;
@@ -237,9 +242,10 @@
static void rate_init(rate_t * p, rate_shared_t * shared, double factor,
quality_t quality, int interp_order, double phase, double bandwidth,
- sox_bool allow_aliasing)
+ sox_bool allow_aliasing, sox_bool old_behaviour)
{
- int i, mult, divisor = 1;
+ int i, tuple, mult, divisor = 1, two_or_three = 2;
+ sox_bool two_factors = sox_false;
assert(factor > 0);
p->factor = factor;
@@ -247,19 +253,26 @@
quality = High;
if (quality != Quick) {
const int max_divisor = 2048; /* Keep coef table size ~< 500kb */
- const double epsilon = 4 / MULT32; /* Scaled to half this at max_divisor */
- p->upsample = p->factor < 1;
+ double epsilon;
+ p->upsample = factor < 1;
for (i = factor, p->level = 0; i >>= 1; ++p->level); /* log base 2 */
factor /= 1 << (p->level + !p->upsample);
+ epsilon = fabs((uint32_t)(factor * MULT32 + .5) / (factor * MULT32) - 1);
for (i = 2; i <= max_divisor && divisor == 1; ++i) {
double try_d = factor * i;
int try = try_d + .5;
- if (fabs(try - try_d) < try * epsilon * (1 - (.5 / max_divisor) * i)) {
- if (try == i) /* Rounded to 1:1? */
- factor = 1, divisor = 2, p->upsample = sox_false;
+ if (fabs(try / try_d - 1) <= epsilon) { /* N.B. beware of long doubles */
+ if (try == i) {
+ factor = 1, divisor = 2;
+ if (p->upsample)
+ p->upsample = sox_false;
+ else ++p->level;
+ }
else factor = try, divisor = i;
}
}
+ if (!old_behaviour && quality != Low && factor == 3 && divisor == 4 && p->level == 1)
+ two_or_three = 3;
}
p->stages = (stage_t *)calloc((size_t)p->level + 4, sizeof(*p->stages)) + 1;
for (i = -1; i <= p->level + 1; ++i) p->stages[i].shared = shared;
@@ -270,9 +283,23 @@
assert(!last_stage.step.parts.fraction);
else if (quality != Quick)
assert(!last_stage.step.parts.integer);
- lsx_debug("i/o=%g; %.9g:%i @ level %i", p->factor, factor, divisor, p->level);
- mult = 1 + p->upsample; /* Compensate for zero-stuffing in double_sample */
+ tuple = 1 + p->upsample;
+ if (!old_behaviour && p->upsample) {
+ if (factor == 2 && divisor == 3)
+ two_factors = sox_true, tuple = divisor;
+ else if (factor == 1) {
+ if (divisor < 6)
+ two_factors = sox_true, tuple = divisor;
+ else for (i = 2; !two_factors && i < 20; ++i)
+ if (!(divisor % i))
+ two_factors = sox_true, tuple = i;
+ }
+ }
+ mult = tuple;
+ lsx_debug("i/o=%.16g; %.12g:%i @ level %i tuple=%i", p->factor, last_stage.step.all / MULT32, divisor, p->level, tuple);
+ lsx_debug("%x.%x", last_stage.step.parts.integer, last_stage.step.parts.fraction);
+
p->input_stage_num = -p->upsample;
p->output_stage_num = p->level;
if (quality == Quick) {
@@ -281,7 +308,7 @@
last_stage.pre_post = max(3, last_stage.step.parts.integer);
last_stage.preload = last_stage.pre = 1;
}
- else if (last_stage.out_in_ratio != 2 || (p->upsample && quality == Low)) {
+ else if ((two_or_three != 3 && last_stage.out_in_ratio != 2 && !two_factors) || (p->upsample && quality == Low)) {
poly_fir_t const * f;
poly_fir1_t const * f1;
int n = 4 * p->upsample + range_limit(quality, Medium, Very) - Medium;
@@ -297,7 +324,7 @@
if (!last_stage.shared->poly_fir_coefs) {
int num_taps = 0, phases = divisor == 1? (1 << f1->phase_bits) : divisor;
raw_coef_t * coefs = lsx_design_lpf(
- f->pass, f->stop, 1., sox_false, f->att, &num_taps, phases);
+ f->pass, f->stop, 1., sox_false, f->att, &num_taps, phases, -1.);
assert(num_taps == f->num_coefs * phases - 1);
last_stage.shared->poly_fir_coefs =
prepare_coefs(coefs, f->num_coefs, phases, interp_order, mult);
@@ -322,22 +349,50 @@
double bw = bandwidth? 1 - (1 - bandwidth / 100) / LSX_TO_3dB : f->bw;
double min = 1 - (allow_aliasing? LSX_MAX_TBW0A : LSX_MAX_TBW0) / 100;
assert((size_t)(quality - Low) < array_length(filters));
- half_band_filter_init(shared, p->upsample, f->len, f->h, bw, att, mult, phase, allow_aliasing);
+ init_dft_filter(shared, p->upsample, f->len, f->h, bw, 1., (double)max(tuple, two_or_three), att, mult, phase, allow_aliasing);
if (p->upsample) {
pre_stage.fn = double_sample; /* Finish off setting up pre-stage */
- pre_stage.preload = shared->half_band[1].post_peak >> 1;
- /* Start setting up post-stage; TODO don't use dft for short filters */
- if ((1 - p->factor) / (1 - bw) > 2)
- half_band_filter_init(shared, 0, 0, NULL, max(p->factor, min), att, 1, phase, allow_aliasing);
- else shared->half_band[0] = shared->half_band[1];
+ pre_stage.preload = shared->half_band[1].post_peak / tuple;
+ pre_stage.rem = shared->half_band[1].post_peak % tuple;
+ pre_stage.tuple = tuple;
+ pre_stage.which = 1;
+ if (two_factors && divisor != tuple) {
+ int other = divisor / tuple;
+ ++p->output_stage_num;
+ init_dft_filter(shared, 0, 0, NULL, 1., (double)tuple, (double)divisor, att, other, phase, allow_aliasing);
+ last_stage.fn = double_sample;
+ last_stage.preload = shared->half_band[0].post_peak / other;
+ last_stage.rem = shared->half_band[0].post_peak % other;
+ last_stage.tuple = other;
+ last_stage.which = 0;
+ }
+ else {
+ /* Start setting up post-stage; TODO don't use dft for short filters */
+ if ((1 - p->factor) / (1 - bw) > 2)
+ init_dft_filter(shared, 0, 0, NULL, max(p->factor, min), 1., 2., att, 1, phase, allow_aliasing);
+ else shared->half_band[0] = shared->half_band[1];
+ if (two_factors && factor == 2) {
+ ++p->output_stage_num;
+ last_stage.fn = half_sample;
+ last_stage.preload = shared->half_band[0].post_peak;
+ last_stage.tuple = 2;
+ } else {
+ post_stage.fn = half_sample;
+ post_stage.preload = shared->half_band[0].post_peak;
+ post_stage.tuple = 2;
+ }
+ }
}
- else if (p->level > 0 && p->output_stage_num > p->level) {
- double pass = bw * divisor / factor / 2;
- if ((1 - pass) / (1 - bw) > 2)
- half_band_filter_init(shared, 1, 0, NULL, max(pass, min), att, 1, phase, allow_aliasing);
+ else {
+ if (p->level > 0 && p->output_stage_num > p->level) {
+ double pass = bw * divisor / factor / 2;
+ if ((1 - pass) / (1 - bw) > 2)
+ init_dft_filter(shared, 1, 0, NULL, max(pass, min), 1., 2., att, 1, phase, allow_aliasing);
+ }
+ post_stage.fn = half_sample;
+ post_stage.preload = shared->half_band[0].post_peak;
+ post_stage.tuple = two_or_three;
}
- post_stage.fn = half_sample;
- post_stage.preload = shared->half_band[0].post_peak;
}
else if (quality == Low && !p->upsample) { /* dft is slower here, so */
post_stage.fn = half_sample_low; /* use normal convolution */
@@ -349,6 +404,7 @@
if (shared->half_band[1].num_taps) {
s->fn = half_sample;
s->preload = shared->half_band[1].post_peak;
+ s->tuple = 2;
s->which = 1;
}
else *s = post_stage;
@@ -393,7 +449,7 @@
static void rate_flush(rate_t * p)
{
fifo_t * fifo = &p->stages[p->output_stage_num].fifo;
- size_t samples_out = p->samples_in / p->factor + .5;
+ uint64_t samples_out = p->samples_in / p->factor + .5;
size_t remaining = samples_out - p->samples_out;
sample_t * buff = calloc(1024, sizeof(*buff));
@@ -429,7 +485,7 @@
sox_rate_t out_rate;
int quality;
double coef_interp, phase, bandwidth;
- sox_bool allow_aliasing;
+ sox_bool allow_aliasing, old_behaviour;
rate_t rate;
rate_shared_t shared, * shared_ptr;
} priv_t;
@@ -438,7 +494,7 @@
{
priv_t * p = (priv_t *) effp->priv;
int c;
- char * dummy_p, * found_at, * opts = "+i:b:p:MILasqlmhv", * qopts = opts +12;
+ char * dummy_p, * found_at, * opts = "+i:b:p:MILaosqlmhv", * qopts = opts +13;
lsx_getopt_t optstate;
lsx_getopt_init(argc, argv, opts, NULL, lsx_getopt_flag_none, 1, &optstate);
@@ -453,8 +509,9 @@
case 'M': p->phase = 0; break;
case 'I': p->phase = 25; break;
case 'L': p->phase = 50; break;
- case 's': p->bandwidth = 99; break;
case 'a': p->allow_aliasing = sox_true; break;
+ case 'o': p->old_behaviour = sox_true; break;
+ case 's': p->bandwidth = 99; break;
default: if ((found_at = strchr(qopts, c))) p->quality = found_at - qopts;
else {lsx_fail("unknown option `-%c'", optstate.opt); return lsx_usage(effp);}
}
@@ -493,7 +550,8 @@
effp->out_signal.channels = effp->in_signal.channels;
effp->out_signal.rate = out_rate;
rate_init(&p->rate, p->shared_ptr, effp->in_signal.rate / out_rate,
- p->quality, (int)p->coef_interp - 1, p->phase, p->bandwidth, p->allow_aliasing);
+ p->quality, (int)p->coef_interp - 1, p->phase, p->bandwidth,
+ p->allow_aliasing, p->old_behaviour);
return SOX_SUCCESS;
}