ref: 46b32db78ce8fc0bc2e2e9fd66e9dcdda2a2e503
parent: b70926425660d226a8c5a9e4881d0ecd471126eb
author: robs <robs>
date: Sun Jun 7 15:47:22 EDT 2009
new algorithm
--- a/src/vad.c
+++ b/src/vad.c
@@ -20,19 +20,24 @@
#include <string.h>
typedef struct {
- double mean_sqr, * log_mean_sqrs, min, held_min;
- unsigned power_boot_done, trigger_done, count;
+ double last_meas;
+ double meas, slope1, slope2; /* TC -controlled */
} chan_t;
-typedef struct { /* Configuation parameters: */
- double power_boot_mult;
- double power_tc, buffer_time, power_dt, trigger_rise, trigger_time;
+typedef struct { /* Configuration parameters: */
+ double hp_freq, lp_freq, measure_freq, search_step_time;
+ double measure_duration, search_time, pre_trigger_time, trigger_level;
+ double trigger_tc, slope_tc1, slope_tc2;
/* Working variables: */
- double tc_mult; /* Multiplier for decay time constant */
sox_sample_t * buffer;
- unsigned buffer_len, buffer_ptr, flush_done, power_boot_len;
- unsigned trigger_len, log_mean_sqrs_len, log_mean_sqrs_ptr;
+ unsigned search_len, buffer_len, buffer_ptr, flush_done, search_step_len;
+
+ double * dft_buf, * window1, * window2;
+ unsigned dft_len, measure_period, measure_timer, measure_len;
chan_t * channels;
+ double trigger_meas_tc_mult, trigger_slope_tc_mult1, trigger_slope_tc_mult2;
+ double search_slope_tc_mult1, search_slope_tc_mult2;
+ unsigned start_bin, end_bin;
} priv_t;
static int create(sox_effect_t * effp, int argc, char * * argv)
@@ -40,20 +45,36 @@
priv_t * p = (priv_t *)effp->priv;
int c;
- p->power_tc = .01; p->trigger_rise = 20;
- p->power_boot_mult= 3; p->trigger_time = .05;
- p->power_dt = .1; p->buffer_time = .05;
+ p->hp_freq = 300;
+ p->lp_freq = 12500;
+ p->measure_duration = .2;
+ p->measure_freq = 10;
+ p->trigger_tc = .2;
+ p->trigger_level = 33;
+ p->search_time = 1;
+ p->search_step_time = .05;
+ p->slope_tc1 = .35;
+ p->slope_tc2 = .075;
- while ((c = lsx_getopt(argc, argv, "+c:b:d:r:u:p:")) != -1) switch (c) {
- GETOPT_NUMERIC('c', power_tc ,.001 , 10)
- GETOPT_NUMERIC('b', power_boot_mult , 0 , 10)
- GETOPT_NUMERIC('d', power_dt ,.001 , 10)
- GETOPT_NUMERIC('r', trigger_rise , 1 , 100)
- GETOPT_NUMERIC('u', trigger_time , 0 , 10)
- GETOPT_NUMERIC('p', buffer_time , 0 , 10)
+ while ((c = lsx_getopt(argc, argv, "+h:l:m:f:T:t:s:q:S:F:p:")) != -1) switch (c) {
+ char * parse_ptr;
+ case 'h': p->hp_freq = lsx_parse_frequency(lsx_optarg, &parse_ptr);
+ if (p->hp_freq < 10 || *parse_ptr) return lsx_usage(effp);
+ break;
+ case 'l': p->lp_freq = lsx_parse_frequency(lsx_optarg, &parse_ptr);
+ if (p->lp_freq < 1000 || *parse_ptr) return lsx_usage(effp);
+ break;
+ GETOPT_NUMERIC('m', measure_duration, .02, 2)
+ GETOPT_NUMERIC('f', measure_freq , 1 ,100)
+ GETOPT_NUMERIC('T', trigger_tc , .001, 1)
+ GETOPT_NUMERIC('t', trigger_level , 0, 100)
+ GETOPT_NUMERIC('s', search_time , 0 , 4)
+ GETOPT_NUMERIC('q', search_step_time, .002, .02)
+ GETOPT_NUMERIC('S', slope_tc1 , .001, 1)
+ GETOPT_NUMERIC('F', slope_tc2 , .001, 1)
+ GETOPT_NUMERIC('p', pre_trigger_time, 0 , 4)
default: lsx_fail("invalid option `-%c'", optopt); return lsx_usage(effp);
}
- p->trigger_rise *= .1 * log(10.); /* Convert to natural log */
return lsx_optind !=argc? lsx_usage(effp) : SOX_SUCCESS;
}
@@ -60,20 +81,48 @@
static int start(sox_effect_t * effp)
{
priv_t * p = (priv_t *)effp->priv;
- size_t i;
+ unsigned i;
- p->tc_mult = exp(-1 / (p->power_tc * effp->in_signal.rate));
- p->power_boot_len = (p->power_tc * p->power_boot_mult + p->power_dt) * effp->in_signal.rate + .5;
- p->trigger_len = 1 + p->trigger_time * effp->in_signal.rate + .5;
+ unsigned pre_trigger_len = p->pre_trigger_time * effp->in_signal.rate + .5;
+ pre_trigger_len *= effp->in_signal.channels;
- p->log_mean_sqrs_len = p->power_dt * effp->in_signal.rate + .5;
- p->channels = lsx_calloc(effp->in_signal.channels, sizeof(*p->channels));
- for (i = 0; i < effp->in_signal.channels; ++i)
- lsx_Calloc(p->channels[i].log_mean_sqrs, p->log_mean_sqrs_len);
- p->buffer_len = p->trigger_len + p->buffer_time * effp->in_signal.rate + .5;
- p->buffer_len *= effp->in_signal.channels;
+ p->measure_len = effp->in_signal.rate * p->measure_duration + .5;
+ p->measure_len *= effp->in_signal.channels;
+ p->search_step_len = effp->in_signal.rate * p->search_step_time + .5;
+ p->search_step_len *= effp->in_signal.channels;
+
+ p->search_len = p->search_time * effp->in_signal.rate + .5;
+ p->search_len *= effp->in_signal.channels;
+ p->search_len += p->measure_len;
+
+ p->buffer_len = pre_trigger_len + p->search_len;
p->buffer = lsx_calloc(p->buffer_len, sizeof(*p->buffer));
- p->flush_done = p->log_mean_sqrs_ptr = p->buffer_ptr = 0;
+
+ for (p->dft_len = 16; p->dft_len < p->measure_len; p->dft_len <<= 1);
+ p->dft_buf = lsx_calloc(p->dft_len, sizeof(*p->dft_buf));
+
+ p->window1 = lsx_calloc(p->measure_len, sizeof(*p->window1));
+ for (i = 0; i < p->measure_len; ++i)
+ p->window1[i] = -2. / SOX_SAMPLE_MIN / p->measure_len;
+ lsx_apply_hann(p->window1, (int)p->measure_len);
+
+ p->start_bin = p->hp_freq / effp->in_signal.rate * p->dft_len + .5;
+ p->end_bin = p->lp_freq / effp->in_signal.rate * p->dft_len + .5;
+ p->end_bin = min(p->end_bin, p->dft_len / 2);
+ p->window2 = lsx_calloc(p->end_bin - p->start_bin, sizeof(*p->window2));
+ for (i = 0; i < p->end_bin - p->start_bin; ++i)
+ p->window2[i] = 2 * (p->dft_len / 2 + 1.) / (p->end_bin - p->start_bin);
+ lsx_apply_hann(p->window2, (int)(p->end_bin - p->start_bin));
+
+ p->flush_done = p->buffer_ptr = 0;
+ p->measure_period = effp->in_signal.rate / p->measure_freq + .5;
+ p->channels = lsx_calloc(effp->in_signal.channels, sizeof(*p->channels));
+ p->trigger_meas_tc_mult = exp(-1 / (p->trigger_tc * p->measure_freq));
+ p->trigger_slope_tc_mult1 = exp(-1 / (p->slope_tc1 * p->measure_freq));
+ p->trigger_slope_tc_mult2 = exp(-1 / (p->slope_tc2 * p->measure_freq));
+ p->search_slope_tc_mult1 = exp(-1 / (p->slope_tc1 / p->search_step_time));
+ p->search_slope_tc_mult2 = exp(-1 / (p->slope_tc2 / p->search_step_time));
+ lsx_warn("dft_len=%u measure_len=%u", p->dft_len, p->measure_len);
return SOX_SUCCESS;
}
@@ -99,53 +148,100 @@
return SOX_SUCCESS;
}
+static double measure(sox_effect_t * effp, size_t x)
+{
+ priv_t * p = (priv_t *)effp->priv;
+ double * buf = p->dft_buf;
+ double mult, result = 0;
+ size_t i;
+
+ for (i = 0; i < p->measure_len; ++i) {
+ buf[i] = p->buffer[x] * p->window1[i];
+ x = (x + effp->in_signal.channels) % p->buffer_len;
+ }
+ memset(buf + i, 0, (p->dft_len - i) * sizeof(*buf));
+ lsx_safe_rdft((int)p->dft_len, 1, buf);
+
+ memset(buf, 0, p->start_bin * sizeof(*buf));
+ for (i = p->start_bin; i < p->end_bin; ++i)
+ buf[i] = (sqr(buf[2*i]) + sqr(buf[2*i+1])) * p->window2[i-p->start_bin];
+ memset(buf + i, 0, ((p->dft_len >> 1) - i) * sizeof(*buf));
+ lsx_safe_rdft((int)p->dft_len >> 1, 1, buf);
+
+ i = max(1, (size_t)(.01 * p->dft_len + .5));
+ mult = (p->dft_len / 4 + 1.) / (p->dft_len / 4 - i);
+ for (; i < p->dft_len >> 2; ++i)
+ result += sqr(buf[2*i]) + sqr(buf[2*i+1]);
+ result = log(mult * result);
+ result = max(result + 50, 0);
+#if 0
+ fprintf(stderr, "%g\n", result);
+#endif
+ return result;
+}
+
static int flow_trigger(sox_effect_t * effp, sox_sample_t const * ibuf,
sox_sample_t * obuf, size_t * ilen, size_t * olen)
{
priv_t * p = (priv_t *)effp->priv;
sox_bool triggered = sox_false;
- size_t i, idone = 0;
+ size_t i, idone = 0, to_flush = 0;
while (idone < *ilen && !triggered) {
for (i = 0; i < effp->in_signal.channels; ++i, ++idone) {
chan_t * c = &p->channels[i];
- double tmp, d = SOX_SAMPLE_TO_FLOAT_64BIT(*ibuf,);
p->buffer[p->buffer_ptr++] = *ibuf++;
- /* Might need to add high-pass (e.g. for mains-hum or DC) and/or
- * low-pass (e.g. for noise-shaped dither) filters at this point. */
- c->mean_sqr = p->tc_mult * c->mean_sqr + (1 - p->tc_mult) * sqr(d);
+ if (p->measure_timer == p->measure_period - 1) {
+ size_t flush = p->measure_len;
+ size_t x = (p->buffer_ptr + p->buffer_len - flush) % p->buffer_len;
+ double slope, meas, meas0 = measure(effp, x);
+ c->meas = c->meas * p->trigger_meas_tc_mult + meas0 *(1 - p->trigger_meas_tc_mult);
+ if (c->last_meas) {
+ slope = (meas0 - c->last_meas) * p->measure_freq;
+ c->slope1 = c->slope1? c->slope1 * p->trigger_slope_tc_mult1 + slope
+ * (1 - p->trigger_slope_tc_mult1) : slope;
+ c->slope2 = c->slope2? c->slope2 * p->trigger_slope_tc_mult2 + slope
+ * (1 - p->trigger_slope_tc_mult2) : slope;
+ }
+ c->last_meas = meas0;
+#if 1
+ if (c->meas)
+ fprintf(stderr, "%g\n", c->meas);
+#endif
+ if (triggered |= c->meas > p->trigger_level) {
+ sox_bool started = sox_false;
+ do {
+ x = (x + p->buffer_len - p->search_step_len) % p->buffer_len;
+ flush += p->search_step_len;
+ meas = measure(effp, x);
#if 0
- if (++c->count == 48) {
- fprintf(stderr, "%g\n", 10 * log10(c->mean_sqr));
- c->count = 0;
- }
+ fprintf(stderr, "%g %g %g\n", meas, c->slope1, c->slope2);
#endif
- if (c->mean_sqr >= sqr(1. / SOX_SAMPLE_MIN)) {
- d = log(c->mean_sqr);
- if (c->power_boot_done == p->power_boot_len) {
- if (d - c->held_min < p->trigger_rise)
- c->trigger_done = 0;
- else triggered |= ++c->trigger_done == p->trigger_len;
+ slope = -(meas - c->last_meas) / p->search_step_time;
+ c->last_meas = meas;
+ if (slope > 0 || started) {
+ c->slope1 = c->slope1 * p->search_slope_tc_mult1 +
+ slope * (1 - p->search_slope_tc_mult1);
+ c->slope2 = c->slope2 * p->search_slope_tc_mult2 +
+ slope * (1 - p->search_slope_tc_mult2);
+ started = sox_true;
+ }
+ } while (flush < p->search_len && (
+ (meas > meas0 - 12 && (c->slope1 > 4 || c->slope2 > 2)) ||
+ meas > p->trigger_level));
+ to_flush = range_limit(flush, to_flush, p->search_len);
}
- else ++c->power_boot_done;
- tmp = c->log_mean_sqrs[p->log_mean_sqrs_ptr];
- c->log_mean_sqrs[p->log_mean_sqrs_ptr] = d;
- if (tmp <= c->min)
- for (c->min = i = 0; i < p->log_mean_sqrs_len; ++i)
- c->min = min(c->min, c->log_mean_sqrs[i]);
- else c->min = min(c->min, d);
- if (!c->trigger_done)
- c->held_min = c->min;
}
- else c->min = c->power_boot_done = c->trigger_done = 0;
}
if (p->buffer_ptr == p->buffer_len)
p->buffer_ptr = 0;
- if (++p->log_mean_sqrs_ptr == p->log_mean_sqrs_len)
- p->log_mean_sqrs_ptr = 0;
+ if (++p->measure_timer == p->measure_period)
+ p->measure_timer = 0;
}
if (triggered) {
size_t ilen1 = *ilen - idone;
+ p->flush_done = p->search_len - to_flush;
+ p->buffer_ptr = (p->buffer_ptr + p->flush_done) % p->buffer_len;
(effp->handler.flow = flow_flush)(effp, ibuf, obuf, &ilen1, olen);
idone += ilen1;
}
@@ -163,12 +259,11 @@
static int stop(sox_effect_t * effp)
{
priv_t * p = (priv_t *)effp->priv;
- size_t i;
-
- free(p->buffer);
- for (i = 0; i < effp->in_signal.channels; ++i)
- free(p->channels[i].log_mean_sqrs);
free(p->channels);
+ free(p->window2);
+ free(p->window1);
+ free(p->dft_buf);
+ free(p->buffer);
return SOX_SUCCESS;
}
@@ -175,11 +270,17 @@
sox_effect_handler_t const * lsx_vad_effect_fn(void)
{
static sox_effect_handler_t handler = {"vad", "[options]"
- "\n\t-c power-time-constant (0.01 s)"
- "\n\t-d max. trigger-rise-time (0.1 s)"
- "\n\t-r trigger-rise (20 dB)"
- "\n\t-u trigger-up-time (0.05 s)"
- "\n\t-p pre-trigger-buffer (0.05 s)"
+ "\n\t-h high-pass-filter (300 Hz)"
+ "\n\t-l low-pass-filter (12500 Hz)"
+ "\n\t-m measure-duration (0.2 s)"
+ "\n\t-f measure-frequency (10 Hz)"
+ "\n\t-T trigger-time-constant (0.2 s)"
+ "\n\t-t trigger-level (33)"
+ "\n\t-s search-time (1 s)"
+ "\n\t-q search-step-time (0.05 s)"
+ "\n\t-S slope-slow-time-constant (0.35 s)"
+ "\n\t-F slope-fast-time-constant (0.075 s)"
+ "\n\t-p pre-trigger-buffer (0 s)"
, SOX_EFF_MCHAN | SOX_EFF_LENGTH | SOX_EFF_MODIFY | SOX_EFF_ALPHA,
create, start, flow_trigger, drain, stop, NULL, sizeof(priv_t)
};