shithub: sox

--- a/ChangeLog

+++ b/ChangeLog

@@ -28,7 +28,8 @@

   o New --output option to write to multiple files in one run.

     Only useful with certain effects like trim and silence. (cbagwell)

   o Display SoX build environment information with -V -V.  (robs)

-  o Add minimum-phase option to `rate' effect.  (robs)

+  o Change to intermediate phase for `rate' effect; phase, band-width

+    and aliasing now configurable.  (robs)

 Other bug fixes:

--- a/soxeffect.7

+++ b/soxeffect.7

@@ -919,52 +919,58 @@

 	play snare.flac phaser 0.6 0.66 3 0.6 2 -t

.EE

.TP

-\fBrate\fR [\fB\-q\fR\^|\^\fB\-l\fR\^|\^\fB\-m\fR\^|\^\fB\-h\fR\^|\^\fB\-v\fR] [\fB\-p\fR] [\fIRATE\fR[\fBk\fR]]

-Change the audio sampling rate (i.e. resample the audio)

+\fBrate\fR [\fB\-q\fR\^|\^\fB\-l\fR\^|\^\fB\-m\fR\^|\^\fB\-h\fR\^|\^\fB\-v\fR] [\fB\-p\fR \fIPHASE\fR\^|\^\fB\-M\fR\^|\^\fB\-I\fR\^|\^\fB\-L\fR] [\fB\-b\fR \fIBANDWIDTH\fR] [\fB\-a\fR] [\fIRATE\fR[\fBk\fR]]

+Change the audio sampling rate (i.e. resample the audio) to the given

+.I RATE

 using a quality level as follows:

.TS

 center box;

-cI cI cI cI lI

-cB c c c l.

-\ 	Quality	BW %	Rej dB	Typical Use

+cI cI cI cI cI lI

+cB c c c c l.

+\ 	Quality	Phase	BW %	Rej dB	Typical Use

 \-q	T{

+.na

 quick & dirty

-T}	n/a	\(~=30 @ Fs/4	T{

+T}	Lin.	n/a	\(~=30 @ Fs/4	T{

.na

 playback on ancient hardware

T}

-\-l	low	80	100	T{

+\-l	low	Lin.	80	100	T{

.na

 playback on old hardware

T}

-\-m	medium	99	100	audio playback

-\-h	high	99	125	T{

+\-m	medium	Int.	99	100	audio playback

+\-h	high	Int.	99	125	T{

.na

 16-bit mastering (use with dither)

T}

-\-v	very high	99	175	24-bit mastering\

+\-v	very high	Int.	99	175	24-bit mastering\

.TE

.DT

.SP

 where

 .B BW %

-is the percentage of the audio band that is preserved (based on the 3dB point)

-during sample rate conversion, and

+is the percentage of the audio band that is preserved (based on the 3dB

+point) during sample rate conversion, and

 .B Rej dB

-is the level of noise rejection.

-The default quality level is `high' (\fB\-h\fR).

-The \-q algorithm uses cubic interpolation; the others use linear-phase

-bandwidth-limited interpolation.

+is the level of noise rejection.  The default quality level is `high'

+(\fB\-h\fR).

.SP

-If the

-.B

-\-p

-option is given\*mvalid only with quality = medium or above\*mthen the algorithm

-used is minimum-phase instead of linear-phase.

+The

+.B \-q

+algorithm uses cubic interpolation; the others use bandwidth-limited

+interpolation.

+The

+.B \-q

+and

+.B \-l

+algorithms have a `linear' phase response; for the others, the phase

+response is configurable, but defaults to `intermediate' (see below for

+more details).

.SP

-This effect is invoked automatically if SoX's \fB\-r\fR option specifies a

-rate that is different to that of the input file(s).  Alternatively, this

-effect may be invoked with the output rate parameter

+This effect is invoked automatically if SoX's \fB\-r\fR option specifies

+a rate that is different to that of the input file(s).  Alternatively,

+this effect may be invoked with the output rate parameter

 .I RATE

 and SoX's

 .B \-r

@@ -972,11 +978,88 @@

 equivalent:

.EX

 	sox input.au -r 48k output.au bass -3

-	sox input.au output.au bass -3 rate 48k

+	sox input.au        output.au bass -3 rate 48k

.EE

-though the second command is more flexible as it allows a

+though the second command is more flexible as it allows

 .B rate

-quality option to be given, and it allows the effects to be ordered arbitrarily.

+options to be given, and allows the effects to be ordered arbitrarily.

+.TS

+center;

+c8 c8 c.

+*	*	*

+.TE

+.DT

+.SP

+The following, advanced options apply only to the

+.BR \-m ,

+.B \-h

+and

+.B \-v

+algorithms and are used primarily to control the resampling filter's

+`ringing' (see

+http://ccrma.stanford.edu/~jos/filters/Linear_Phase_Really_Ideal.html

+for a description of this phenomenon).  Note that ringing control is a

+compromise: reducing it comes at the expense of reducing band-width

+and/or increasing aliasing.

+.SP

+The

+.B \-p

+.IR PHASE ,

+.BR \-M ,

+.B \-I

+and

+.B \-L

+options control the phase response of the filter that is used in the

+resampling process.  Any phase value from 0 to 100 may be given with

+.BR \-p ,

+though values greater than 50 are rarely useful.  The following specific

+values are noteworthy:

+.TS

+center box;

+cB cI cI cI

+cB c cB c.

+-p \fIvalue\fR	T{

+Phase response

+T}	Short form	T{

+Ratio of pre- to post- ringing

+T}

+0	minimum	\-M	0:1

+25	intermediate	\-I	0\*d2:0\*d8

+50	linear	\-L	0\*d5:0\*d5

+100	maximum	\ 	1:0

+.TE

+.DT

+.SP

+The

+.B \-b

+.I BANDWIDTH

+(74\-99\*d7 %) option allows the preserved audio bandwidth to be reduced

+from the default (99%) and thus also reduce ringing.  For example,

+changing the bandwidth to 95% (which, at 44100Hz sampling rate,

+still preserves frequencies up to 21kHz) reduces pre- and post- ringing

+by 80%.

+.SP

+If the

+.B \-a

+option is given aliasing above the pass-band is allowed; this reduces

+pre- and post- ringing by 42%.  Note that if this option is given, then

+the minimum band-width allowable with

+.B \-b

+increases to 85%.

+.SP

+For example, using both \fB\-b 95\fR and \fB\-a\fR reduces all ringing

+by

+.SP

+	100 \- (100 \- 80) \(mu (100 \- 42)% = 88\*d4%

+.SP

+Note that the

+.BR key ,

+.B speed

+and

+.B tempo

+effects all use the

+.B rate

+effect at their core.

.SP

 See also

 .BR resample ,

--- a/src/rate.c

+++ b/src/rate.c

@@ -24,6 +24,7 @@

 #include "sox_i.h"

 #include "fft4g.h"

+#include "getopt.h"

 #define  FIFO_SIZE_T int

 #include "fifo.h"

 #include <assert.h>

@@ -203,10 +204,18 @@

   return h;

+#define TO_6dB .5869

+#define TO_3dB ((2/3.) * (.5 + TO_6dB))

+#define MAX_TBW0 36.

+#define MAX_TBW0A (MAX_TBW0 / (1 + TO_3dB))

+#define MAX_TBW3 floor(MAX_TBW0 * TO_3dB)

+#define MAX_TBW3A floor(MAX_TBW0A * TO_3dB)

 static double * design_lpf(

     double Fp,      /* End of pass-band; ~= 0.01dB point */

     double Fc,      /* Start of stop-band */

     double Fn,      /* Nyquist freq; e.g. 0.5, 1, PI */

+    sox_bool allow_aliasing,

     double att,     /* Stop-band attenuation in dB */

     int * num_taps, /* (Single phase.)  0: value will be estimated */

     int k)          /* Number of phases; 0 for single-phase */

@@ -213,8 +222,10 @@

   double tr_bw, beta;

+  if (allow_aliasing)

+    Fc += (Fc - Fp) * TO_3dB;

   Fp /= Fn, Fc /= Fn;        /* Normalise to Fn = 1 */

-  tr_bw = .5869 * (Fc - Fp); /* Transition band-width: 6dB to stop points */

+  tr_bw = TO_6dB * (Fc - Fp); /* Transition band-width: 6dB to stop points */

   if (*num_taps == 0) {        /* TODO this could be cleaner, esp. for k != 0 */

     double n160 = (.0425* att - 1.4) /  tr_bw;    /* Half order for att = 160 */

@@ -229,39 +240,70 @@

   return make_lpf(*num_taps, (Fc - tr_bw) / k, beta, (double)k);

-static void to_minimum_phase(double * h, int len, rate_shared_t * p)

+static void fir_to_phase(rate_shared_t * p, double * * h, int * len,

+    int * post_len, double phase0)

-  double * work;

-  int work_len, n = len, i;

-  for (work_len = 32; n > 1; work_len <<= 1, n >>= 1);

+  double * work, phase = (phase0 > 50 ? 100 - phase0 : phase0) / 50;

+  int work_len, begin, end, peak = 0, i = *len;

+  for (work_len = 32; i > 1; work_len <<= 1, i >>= 1);

   if (!p->bit_rev_table) {

     p->bit_rev_table = calloc(dft_br_len(2*work_len),sizeof(*p->bit_rev_table));

     p->sin_cos_table = calloc(dft_sc_len(2*work_len),sizeof(*p->sin_cos_table));

-  work = calloc(2 * work_len, sizeof(*work));

-  for (i = 0; i < len; ++i) work[2 * i] = h[i];

+  work = calloc(work_len, sizeof(*work));

+  for (i = 0; i < *len; ++i) work[i] = (*h)[i];

-  cdft(2 * work_len, 1, work, p->bit_rev_table, p->sin_cos_table);

-  for (i = 0; i < work_len; ++i) {

-    work[2 * i] = log(sqrt(sqr(work[2 * i]) + sqr(work[2 * i + 1])));

-    work[2 * i + 1] = 0;

+  rdft(work_len, 1, work, p->bit_rev_table, p->sin_cos_table); /* Cepstrum: */

+  work[0] = log(fabs(work[0])), work[1] = log(fabs(work[1]));

+  for (i = 2; i < work_len; i += 2) {

+    work[i] = log(sqrt(sqr(work[i]) + sqr(work[i + 1])));

+    work[i + 1] = 0;

-  cdft(2 * work_len, -1, work, p->bit_rev_table, p->sin_cos_table);

-  for (i = 0; i < 2 * work_len; ++i) work[i] *= 1. / work_len;

-  for (i = 2; i < work_len; ++i)

+  rdft(work_len, -1, work, p->bit_rev_table, p->sin_cos_table);

+  for (i = 0; i < work_len; ++i) work[i] *= 2. / work_len;

+  for (i = 1; i < work_len / 2; ++i) { /* Window to reject acausal components */

     work[i] *= 2;

-  memset(work + work_len + 2, 0, (work_len - 2) * sizeof(*work));

+    work[i + work_len / 2] = 0;

+  }

+  rdft(work_len, 1, work, p->bit_rev_table, p->sin_cos_table);

+  /* Some DFTs require phase unwrapping here, but rdft seems not to. */

-  cdft(2 * work_len, 1, work, p->bit_rev_table, p->sin_cos_table);

-  for (i = 0; i < work_len; ++i) {

-    double x = exp(work[2 * i]);

-    work[2 * i] = x * cos(work[2 * i + 1]);

-    work[2 * i + 1] = x * sin(work[2 * i + 1]);

+  for (i = 2; i < work_len; i += 2) /* Interpolate between linear & min phase */

+    work[i + 1] = phase * M_PI * .5 * i + (1 - phase) * work[i + 1];

+  work[0] = exp(work[0]), work[1] = exp(work[1]);

+  for (i = 2; i < work_len; i += 2) {

+    double x = exp(work[i]);

+    work[i    ] = x * cos(work[i + 1]);

+    work[i + 1] = x * sin(work[i + 1]);

-  cdft(2 * work_len, -1, work, p->bit_rev_table, p->sin_cos_table);

-  for (i = 0; i < len; ++i) h[i] = work[2 * i] * 1. / work_len;

+  rdft(work_len, -1, work, p->bit_rev_table, p->sin_cos_table);

+  for (i = 0; i < work_len; ++i) work[i] *= 2. / work_len;

+  for (i = 1; i < work_len; ++i) if (work[i] > work[peak])   /* Find peak. */

+    peak = i;                                             /* N.B. peak > 0 */

+  if (phase == 0)

+    begin = 0;

+  else if (phase == 1)

+    begin = 1 + (work_len - *len) / 2;

+  else {

+    if (peak < work_len / 4) { /* Low phases can wrap impulse, so unwrap: */

+      memmove(work + work_len / 4, work, work_len / 2 * sizeof(*work));

+      memmove(work, work + work_len * 3 / 4, work_len / 4 * sizeof(*work));

+      peak += work_len / 4;

+    }

+    begin = (.997 - (2 - phase) * .22) * *len + .5;

+    end   = (.997 + (0 - phase) * .22) * *len + .5;

+    begin = peak - begin - (begin & 1);

+    end   = peak + 1 + end + (end & 1);

+    *len = end - begin;

+    *h = realloc(*h, *len * sizeof(**h));

+  }

+  for (i = 0; i < *len; ++i)

+    (*h)[i] = work[begin + (phase0 > 50 ? *len - 1 - i : i)];

+  *post_len = begin + *len - (peak + 1);

   free(work);

@@ -276,7 +318,7 @@

 static void half_band_filter_init(rate_shared_t * p, unsigned which,

     int num_taps, sample_t const h[], double Fp, double atten, int multiplier,

-    sox_bool minimum_phase)

+    double phase, sox_bool allow_aliasing)

   half_band_t * f = &p->half_band[which];

   int dft_length, i;

@@ -292,15 +334,20 @@

     f->post_peak = num_taps / 2;

   else {

-    double * h = design_lpf(Fp, 1., 2., atten, &num_taps, 0);

-    if (minimum_phase)

-      to_minimum_phase(h, num_taps, p);

+    /* Empirical adjustment to negate att degradation with intermediate phase */

+    double att = phase && phase != 50 && phase != 100? atten * (34./33) : atten;

+    double * h = design_lpf(Fp, 1., 2., allow_aliasing, att, &num_taps, 0);

+    if (phase != 50)

+      fir_to_phase(p, &h, &num_taps, &f->post_peak, phase);

+    else f->post_peak = num_taps / 2;

     dft_length = set_dft_length(num_taps);

     f->coefs = calloc(dft_length, sizeof(*f->coefs));

     for (i = 0; i < num_taps; ++i)

       f->coefs[(i + dft_length - num_taps + 1) & (dft_length - 1)]

           = h[i] / dft_length * 2 * multiplier;

-    f->post_peak = minimum_phase? (int)(num_taps * 0.997 + .5) : num_taps / 2;

     free(h);

   assert(num_taps & 1);

@@ -332,7 +379,8 @@

 typedef enum {Default = -1, Quick, Low, Medium, High, Very, Ultra} quality_t;

 static void rate_init(rate_t * p, rate_shared_t * shared, double factor,

-    quality_t quality, int interp_order, sox_bool minimum_phase)

+    quality_t quality, int interp_order, double phase, double bandwidth,

+    sox_bool allow_aliasing)

   int i, mult, divisor = 1;

@@ -392,7 +440,7 @@

     if (!last_stage.shared->poly_fir_coefs) {

       int num_taps = 0, phases = divisor == 1? (1 << f1->phase_bits) : divisor;

       raw_coef_t * coefs =

-          design_lpf(f->pass, f->stop, 1., f->att, &num_taps, phases);

+          design_lpf(f->pass, f->stop, 1., sox_false, f->att, &num_taps, phases);

       assert(num_taps == f->num_coefs * phases - 1);

       last_stage.shared->poly_fir_coefs =

           prepare_coefs(coefs, f->num_coefs, phases, interp_order, mult);

@@ -412,23 +460,26 @@

     static filter_t const filters[] = {

       {2 * array_length(half_fir_coefs_low) - 1, half_fir_coefs_low, 0,0},

       {0, NULL, .986, 110}, {0, NULL, .986, 125},

-      {0, NULL, .986, 165}, {0, NULL, .996, 165},

+      {0, NULL, .986, 170}, {0, NULL, .996, 170},

};

     filter_t const * f = &filters[quality - Low];

+    double att = allow_aliasing? (34./33)* f->a : f->a;

+    double bw = bandwidth? 1 - (1 - bandwidth / 100) / TO_3dB : f->bw;

+    double min = 1 - (allow_aliasing? MAX_TBW0A : MAX_TBW0) / 100;

     assert((size_t)(quality - Low) < array_length(filters));

-    half_band_filter_init(shared, p->upsample, f->len, f->h, f->bw, f->a, mult, minimum_phase);

+    half_band_filter_init(shared, p->upsample, f->len, f->h, bw, att, mult, phase, allow_aliasing);

     if (p->upsample) {

       pre_stage.fn = double_sample; /* Finish off setting up pre-stage */

       pre_stage.preload = shared->half_band[1].post_peak >> 1;

        /* Start setting up post-stage; TODO don't use dft for short filters */

-      if ((1 - p->factor) / (1 - f->bw) > 2)

-        half_band_filter_init(shared, 0, 0, NULL, max(p->factor, .64), f->a, 1, sox_false);

+      if ((1 - p->factor) / (1 - bw) > 2)

+        half_band_filter_init(shared, 0, 0, NULL, max(p->factor, min), att, 1, phase, allow_aliasing);

       else shared->half_band[0] = shared->half_band[1];

     else if (p->level > 0 && p->output_stage_num > p->level) {

-      double pass = f->bw * divisor / factor / 2;

-      if ((1 - pass) / (1 - f->bw) > 2)

-        half_band_filter_init(shared, 1, 0, NULL, max(pass, .64), f->a, 1, sox_false);

+      double pass = bw * divisor / factor / 2;

+      if ((1 - pass) / (1 - bw) > 2)

+        half_band_filter_init(shared, 1, 0, NULL, max(pass, min), att, 1, phase, allow_aliasing);

     post_stage.fn = half_sample;

     post_stage.preload = shared->half_band[0].post_peak;

@@ -523,8 +574,9 @@

 typedef struct {

   sox_rate_t      out_rate;

-  int             quality, coef_interp;

-  sox_bool        minimum_phase;

+  int             quality;

+  double          coef_interp, phase, bandwidth;

+  sox_bool        allow_aliasing;

   rate_t          rate;

   rate_shared_t   shared, * shared_ptr;

 } priv_t;

@@ -532,20 +584,37 @@

 static int create(sox_effect_t * effp, int argc, char **argv)

   priv_t * p = (priv_t *) effp->priv;

-  char * dummy_p, * found_at, * bws = "-qlmhvu", * nums = "123";

+  int c, callers_optind = optind, callers_opterr = opterr;

+  char * dummy_p, * found_at, * opts = "+i:b:p:MILaqlmhvu", * qopts = opts +11;

-  p->quality = p->coef_interp = -1;

+  p->quality = -1;

+  p->phase = 25;

   p->shared_ptr = &p->shared;

-  while (argc && strchr(bws, *argv[0])) {

-    char c, * q = *argv[0] == '-'? *argv + 1 : *argv;

-    while ((c = *q++)) {

-      if      ((found_at = strchr(bws+1, c))) p->quality = found_at - bws -1;

-      else if ((found_at = strchr(nums, c))) p->coef_interp = found_at - nums;

-      else if (c == 'p') p->minimum_phase = sox_true;

-      else return lsx_usage(effp);

-    }

-    argc--; argv++;

+  --argv, ++argc, optind = 1, opterr = 0;                /* re-jig for getopt */

+  while ((c = getopt(argc, argv, opts)) != -1) switch (c) {

+    GETOPT_NUMERIC('i', coef_interp, 1 , 3)

+    GETOPT_NUMERIC('p', phase,  0 , 100)

+    GETOPT_NUMERIC('b', bandwidth,  100 - MAX_TBW3, 99.7)

+    case 'M': p->phase =  0; break;

+    case 'I': p->phase = 25; break;

+    case 'L': p->phase = 50; break;

+    case 'a': p->allow_aliasing = sox_true; break;

+    default: if ((found_at = strchr(qopts, c))) p->quality = found_at - qopts;

+      else {sox_fail("unknown option `-%c'", optopt); return lsx_usage(effp);}

+  argc-=optind, argv+=optind, optind = callers_optind, opterr = callers_opterr;

+  if ((unsigned)p->quality < 2 && (p->bandwidth || p->phase != 25 || p->allow_aliasing)) {

+    sox_fail("override options not allowed with this quality level");

+    return SOX_EOF;

+  }

+  if (p->bandwidth && p->bandwidth < 100 - MAX_TBW3A && p->allow_aliasing) {

+    sox_fail("minimum allowed bandwidth with aliasing is %g%%", 100 - MAX_TBW3A);

+    return SOX_EOF;

+  }

   if (argc) {

     if ((p->out_rate = lsx_parse_frequency(*argv, &dummy_p)) <= 0 || *dummy_p)

       return lsx_usage(effp);

@@ -565,7 +634,8 @@

   effp->out_signal.channels = effp->in_signal.channels;

   effp->out_signal.rate = out_rate;

-  rate_init(&p->rate, p->shared_ptr, effp->in_signal.rate / out_rate, p->quality, p->coef_interp, p->minimum_phase);

+  rate_init(&p->rate, p->shared_ptr, effp->in_signal.rate / out_rate,

+      p->quality, (int)p->coef_interp - 1, p->phase, p->bandwidth, p->allow_aliasing);

   return SOX_SUCCESS;

@@ -606,15 +676,18 @@

 sox_effect_handler_t const * sox_rate_effect_fn(void)

   static sox_effect_handler_t handler = {

-    "rate", "[-q|-l|-m|-h|-v] [-p] [RATE[k]]"

-    "\n\tQuality        BW %    Rej dB    Typical Use"

-    "\n -q\tquick & dirty  n/a   ~30 @ Fs/4  playback on ancient hardware"

-    "\n -l\tlow            80       100      playback on old hardware"

-    "\n -m\tmedium         99       100      audio playback"

-    "\n -h\thigh           99       125      16-bit mastering (use with dither)"

-    "\n -v\tvery high      99       175      24-bit mastering"

-    /* "\n -u\tultra high     99.7     175      " */

-    "\n\n -p\tminimum phase"

+    "rate", "[-q|-l|-m|-h|-v] [-p PHASE|-M|-I|-L] [-b BANDWIDTH] [-a] [RATE[k]]"

+    "\n\n\tQuality\t\tPhase\tBW %   Rej dB\tTypical Use"

+    "\n -q\tquick & dirty\tLin.\tn/a  ~30 @ Fs/4\tplayback on ancient hardware"

+    "\n -l\tlow\t\t\"\t80\t100\tplayback on old hardware"

+    "\n -m\tmedium\t\tInt.\t99\t100\taudio playback"

+    "\n -h\thigh\t\t\"\t99\t125\t16-bit master (use with dither)"

+    "\n -v\tvery high\t\"\t99\t175\t24-bit master"

+    "\n\nOverrides (for -m, -h, -v):"

+    "\n -p 0-100\t0=minimum, 25=intermediate, 50=linear, 100=maximum"

+    "\n -M/I/L\t\tphase=min./int./lin."

+    "\n -b 74-99.7\t%"

+    "\n -a\t\tallow aliasing"

     , SOX_EFF_RATE, create, start, flow, drain, stop, NULL, sizeof(priv_t)

};

   return &handler;

--

⑨