ref: dfdca3cec4e6fb7c79e47abfee1f23d6da3f0d48
dir: /src/vad.c/
/* libSoX effect: Voice Activity Detector (c) 2009 robs@users.sourceforge.net
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or (at
* your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
* General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "sox_i.h"
#include "sgetopt.h"
#include <string.h>
typedef struct {
double mean_sqr, * log_mean_sqrs, min, held_min;
unsigned power_boot_done, trigger_done, count;
} chan_t;
typedef struct { /* Configuation parameters: */
double power_boot_mult;
double power_tc, buffer_time, power_dt, trigger_rise, trigger_time;
/* Working variables: */
double tc_mult; /* Multiplier for decay time constant */
sox_sample_t * buffer;
unsigned buffer_len, buffer_ptr, flush_done, power_boot_len;
unsigned trigger_len, log_mean_sqrs_len, log_mean_sqrs_ptr;
chan_t * channels;
} priv_t;
static int create(sox_effect_t * effp, int argc, char * * argv)
{
priv_t * p = (priv_t *)effp->priv;
int c;
p->power_tc = .01; p->trigger_rise = 20;
p->power_boot_mult= 3; p->trigger_time = .05;
p->power_dt = .1; p->buffer_time = .05;
while ((c = lsx_getopt(argc, argv, "+c:b:d:r:u:p:")) != -1) switch (c) {
GETOPT_NUMERIC('c', power_tc ,.001 , 10)
GETOPT_NUMERIC('b', power_boot_mult , 0 , 10)
GETOPT_NUMERIC('d', power_dt ,.001 , 10)
GETOPT_NUMERIC('r', trigger_rise , 1 , 100)
GETOPT_NUMERIC('u', trigger_time , 0 , 10)
GETOPT_NUMERIC('p', buffer_time , 0 , 10)
default: lsx_fail("invalid option `-%c'", optopt); return lsx_usage(effp);
}
p->trigger_rise *= .1 * log(10.); /* Convert to natural log */
return lsx_optind !=argc? lsx_usage(effp) : SOX_SUCCESS;
}
static int start(sox_effect_t * effp)
{
priv_t * p = (priv_t *)effp->priv;
size_t i;
p->tc_mult = exp(-1 / (p->power_tc * effp->in_signal.rate));
p->power_boot_len = (p->power_tc * p->power_boot_mult + p->power_dt) * effp->in_signal.rate + .5;
p->trigger_len = 1 + p->trigger_time * effp->in_signal.rate + .5;
p->log_mean_sqrs_len = p->power_dt * effp->in_signal.rate + .5;
p->channels = lsx_calloc(effp->in_signal.channels, sizeof(*p->channels));
for (i = 0; i < effp->in_signal.channels; ++i)
lsx_Calloc(p->channels[i].log_mean_sqrs, p->log_mean_sqrs_len);
p->buffer_len = p->trigger_len + p->buffer_time * effp->in_signal.rate + .5;
p->buffer_len *= effp->in_signal.channels;
p->buffer = lsx_calloc(p->buffer_len, sizeof(*p->buffer));
p->flush_done = p->log_mean_sqrs_ptr = p->buffer_ptr = 0;
return SOX_SUCCESS;
}
static int flow_flush(sox_effect_t * effp, sox_sample_t const * ibuf,
sox_sample_t * obuf, size_t * ilen, size_t * olen)
{
priv_t * p = (priv_t *)effp->priv;
size_t odone = min(p->buffer_len - p->flush_done, *olen);
size_t odone1 = min(odone, p->buffer_len - p->buffer_ptr);
memcpy(obuf, p->buffer + p->buffer_ptr, odone1 * sizeof(*obuf));
if ((p->buffer_ptr += odone1) == p->buffer_len) {
memcpy(obuf + odone1, p->buffer, (odone - odone1) * sizeof(*obuf));
p->buffer_ptr = odone - odone1;
}
if ((p->flush_done += odone) == p->buffer_len) {
size_t olen1 = *olen - odone;
(effp->handler.flow = lsx_flow_copy)(effp, ibuf, obuf +odone, ilen, &olen1);
odone += olen1;
}
else *ilen = 0;
*olen = odone;
return SOX_SUCCESS;
}
static int flow_trigger(sox_effect_t * effp, sox_sample_t const * ibuf,
sox_sample_t * obuf, size_t * ilen, size_t * olen)
{
priv_t * p = (priv_t *)effp->priv;
sox_bool triggered = sox_false;
size_t i, idone = 0;
while (idone < *ilen && !triggered) {
for (i = 0; i < effp->in_signal.channels; ++i, ++idone) {
chan_t * c = &p->channels[i];
double tmp, d = SOX_SAMPLE_TO_FLOAT_64BIT(*ibuf,);
p->buffer[p->buffer_ptr++] = *ibuf++;
/* Might need to add high-pass (e.g. for mains-hum or DC) and/or
* low-pass (e.g. for noise-shaped dither) filters at this point. */
c->mean_sqr = p->tc_mult * c->mean_sqr + (1 - p->tc_mult) * sqr(d);
#if 0
if (++c->count == 48) {
fprintf(stderr, "%g\n", 10 * log10(c->mean_sqr));
c->count = 0;
}
#endif
if (c->mean_sqr >= sqr(1. / SOX_SAMPLE_MIN)) {
d = log(c->mean_sqr);
if (c->power_boot_done == p->power_boot_len) {
if (d - c->held_min < p->trigger_rise)
c->trigger_done = 0;
else triggered |= ++c->trigger_done == p->trigger_len;
}
else ++c->power_boot_done;
tmp = c->log_mean_sqrs[p->log_mean_sqrs_ptr];
c->log_mean_sqrs[p->log_mean_sqrs_ptr] = d;
if (tmp <= c->min)
for (c->min = i = 0; i < p->log_mean_sqrs_len; ++i)
c->min = min(c->min, c->log_mean_sqrs[i]);
else c->min = min(c->min, d);
if (!c->trigger_done)
c->held_min = c->min;
}
else c->min = c->power_boot_done = c->trigger_done = 0;
}
if (p->buffer_ptr == p->buffer_len)
p->buffer_ptr = 0;
if (++p->log_mean_sqrs_ptr == p->log_mean_sqrs_len)
p->log_mean_sqrs_ptr = 0;
}
if (triggered) {
size_t ilen1 = *ilen - idone;
(effp->handler.flow = flow_flush)(effp, ibuf, obuf, &ilen1, olen);
idone += ilen1;
}
else *olen = 0;
*ilen = idone;
return SOX_SUCCESS;
}
static int drain(sox_effect_t * effp, sox_sample_t * obuf, size_t * olen)
{
size_t ilen = 0;
return effp->handler.flow(effp, NULL, obuf, &ilen, olen);
}
static int stop(sox_effect_t * effp)
{
priv_t * p = (priv_t *)effp->priv;
size_t i;
free(p->buffer);
for (i = 0; i < effp->in_signal.channels; ++i)
free(p->channels[i].log_mean_sqrs);
free(p->channels);
return SOX_SUCCESS;
}
sox_effect_handler_t const * lsx_vad_effect_fn(void)
{
static sox_effect_handler_t handler = {"vad", "[options]"
"\n\t-c power-time-constant (0.01 s)"
"\n\t-d max. trigger-rise-time (0.1 s)"
"\n\t-r trigger-rise (20 dB)"
"\n\t-u trigger-up-time (0.05 s)"
"\n\t-p pre-trigger-buffer (0.05 s)"
, SOX_EFF_MCHAN | SOX_EFF_LENGTH | SOX_EFF_MODIFY,
create, start, flow_trigger, drain, stop, NULL, sizeof(priv_t)
};
return &handler;
}