shithub: sox

Download patch

ref: c468b5d2a04728c57718af316fe87f29b60950c4
parent: 4b6a44c0da56efeddf5fe12f72a0e19cc7b2d3b9
author: robs <robs>
date: Wed May 20 12:14:10 EDT 2009

vad effect

--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -36,8 +36,8 @@
   compandt        fade            noiseprof       reverse         tempo
   contrast        fft4g           noisered        silence         tremolo
   crop            filter          output          sinc            trim
-  dcshift         fir             overdrive       skeleff         vol
-  delay           firfit          pad             speed
+  dcshift         fir             overdrive       skeleff         vad
+  delay           firfit          pad             speed           vol
   dft_filter      flanger         pan             splice
 )
 set(formats_srcs
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -67,7 +67,7 @@
 	rate_filters.h rate_half_fir.h rate_poly_fir0.h rate_poly_fir.h \
 	remix.c repeat.c reverb.c reverse.c silence.c sinc.c skeleff.c speed.c \
 	splice.c stat.c stats.c stretch.c swap.c synth.c tempo.c tremolo.c \
-	trim.c vol.c
+	trim.c vad.c vol.c
 if HAVE_PNG
     libsox_la_SOURCES += spectrogram.c
 endif
--- a/src/effects.c
+++ b/src/effects.c
@@ -38,10 +38,12 @@
 }
 
 /* Pass through samples verbatim */
-static int default_flow(sox_effect_t * effp UNUSED, const sox_sample_t *ibuf UNUSED, sox_sample_t *obuf UNUSED, size_t *isamp, size_t *osamp)
+int lsx_flow_copy(sox_effect_t * effp, const sox_sample_t * ibuf,
+    sox_sample_t * obuf, size_t * isamp, size_t * osamp)
 {
   *isamp = *osamp = min(*isamp, *osamp);
   memcpy(obuf, ibuf, *isamp * sizeof(*obuf));
+  (void)effp;
   return SOX_SUCCESS;
 }
 
@@ -67,7 +69,7 @@
   effp->handler = *eh;
   if (!effp->handler.getopts) effp->handler.getopts = default_getopts;
   if (!effp->handler.start  ) effp->handler.start   = default_function;
-  if (!effp->handler.flow   ) effp->handler.flow    = default_flow;
+  if (!effp->handler.flow   ) effp->handler.flow    = lsx_flow_copy;
   if (!effp->handler.drain  ) effp->handler.drain   = default_drain;
   if (!effp->handler.stop   ) effp->handler.stop    = default_function;
   if (!effp->handler.kill   ) effp->handler.kill    = default_function;
--- a/src/effects.h
+++ b/src/effects.h
@@ -89,3 +89,4 @@
   EFFECT(tremolo)
   EFFECT(trim)
   EFFECT(vol)
+  EFFECT(vad)
--- a/src/sox_i.h
+++ b/src/sox_i.h
@@ -257,6 +257,8 @@
 
 /*--------------------------------- Effects ----------------------------------*/
 
+int lsx_flow_copy(sox_effect_t * effp, const sox_sample_t * ibuf,
+    sox_sample_t * obuf, size_t * isamp, size_t * osamp);
 int lsx_usage(sox_effect_t * effp);
 char * lsx_usage_lines(char * * usage, char const * const * lines, size_t n);
 #define EFFECT(f) extern sox_effect_handler_t const * lsx_##f##_effect_fn(void);
--- /dev/null
+++ b/src/vad.c
@@ -1,0 +1,165 @@
+/* libSoX effect: Voice Activity Detector  (c) 2009 robs@users.sourceforge.net
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "sox_i.h"
+#include "getopt.h"
+#include <string.h>
+
+typedef struct {double mean_sqr, *log_mean_sqrs; unsigned trigger_done;} chan_t;
+
+typedef struct {                /* Configuation parameters: */
+  unsigned      power_boot_len;
+  double        power_tc, buffer_time, power_dt, trigger_rise, trigger_time;
+                                /* Working variables: */
+  double        tc_mult;   /* Multiplier for decay time constant */
+  sox_sample_t  * buffer;
+  unsigned      buffer_len, buffer_ptr, flush_done, power_boot_done;
+  unsigned      trigger_len, log_mean_sqrs_len, log_mean_sqrs_ptr;
+  chan_t        * channels;
+} priv_t;
+
+static int create(sox_effect_t * effp, int argc, char * * argv)
+{
+  priv_t * p = (priv_t *)effp->priv;
+  int c;
+
+  p->power_tc       = .025; p->trigger_rise = 20;
+  p->power_boot_len = 2;    p->trigger_time = .01;
+  p->power_dt       = .1;   p->buffer_time  = .05;
+
+  while ((c = getopt(argc, argv, "+c:b:d:r:u:p:")) != -1) switch (c) {
+    GETOPT_NUMERIC('c', power_tc        ,.001 , 10)
+    GETOPT_NUMERIC('b', power_boot_len  ,   0 , 10)
+    GETOPT_NUMERIC('d', power_dt        ,.001 , 10)
+    GETOPT_NUMERIC('r', trigger_rise    ,   1 , 100)
+    GETOPT_NUMERIC('u', trigger_time    ,   0 , 10)
+    GETOPT_NUMERIC('p', buffer_time     ,   0 , 10)
+    default: lsx_fail("invalid option `-%c'", optopt); return lsx_usage(effp);
+  }
+  p->trigger_rise *= .1 * log(10.); /* Convert to natural log */
+  return optind !=argc? lsx_usage(effp) : SOX_SUCCESS;
+}
+
+static int start(sox_effect_t * effp)
+{
+  priv_t * p = (priv_t *)effp->priv;
+  size_t i;
+
+  p->tc_mult = exp(-1 / (p->power_tc * effp->in_signal.rate));
+  p->trigger_len = 1 + p->trigger_time * effp->in_signal.rate + .5;
+
+  p->log_mean_sqrs_len = p->power_dt * effp->in_signal.rate + .5;
+  p->channels = lsx_calloc(effp->in_signal.channels, sizeof(*p->channels));
+  for (i = 0; i < effp->in_signal.channels; ++i)
+    lsx_Calloc(p->channels[i].log_mean_sqrs, p->log_mean_sqrs_len);
+
+  p->buffer_len = p->trigger_len + p->buffer_time * effp->in_signal.rate + .5;
+  p->buffer_len *= effp->in_signal.channels;
+  p->buffer = lsx_calloc(p->buffer_len, sizeof(*p->buffer));
+  p->power_boot_done = p->flush_done = p->log_mean_sqrs_ptr = p->buffer_ptr = 0;
+  return SOX_SUCCESS;
+}
+
+static int flow_flush(sox_effect_t * effp, sox_sample_t const * ibuf,
+    sox_sample_t * obuf, size_t * ilen, size_t * olen)
+{
+  priv_t * p = (priv_t *)effp->priv;
+  size_t odone = min(p->buffer_len - p->flush_done, *olen);
+  size_t odone1 = min(odone, p->buffer_len - p->buffer_ptr);
+
+  memcpy(obuf, p->buffer + p->buffer_ptr, odone1 * sizeof(*obuf));
+  if ((p->buffer_ptr += odone1) == p->buffer_len) {
+    memcpy(obuf + odone1, p->buffer, (odone - odone1) * sizeof(*obuf));
+    p->buffer_ptr = odone - odone1;
+  }
+  if ((p->flush_done += odone) == p->buffer_len) {
+    size_t olen1 = *olen - odone;
+    (effp->handler.flow = lsx_flow_copy)(effp, ibuf, obuf +odone, ilen, &olen1);
+    odone += olen1;
+  }
+  else *ilen = 0;
+  *olen = odone;
+  return SOX_SUCCESS;
+}
+
+static int flow_trigger(sox_effect_t * effp, sox_sample_t const * ibuf,
+    sox_sample_t * obuf, size_t * ilen, size_t * olen)
+{
+  priv_t * p = (priv_t *)effp->priv;
+  sox_bool triggered = sox_false;
+  size_t i, idone = 0;
+
+  while (idone < *ilen && !triggered) {
+    for (i = 0; i < effp->in_signal.channels; ++i, ++idone) {
+      chan_t * c = &p->channels[i];
+      double d = SOX_SAMPLE_TO_FLOAT_64BIT(*ibuf,);
+      p->buffer[p->buffer_ptr++] = *ibuf++;
+      c->mean_sqr = p->tc_mult * c->mean_sqr + (1 - p->tc_mult) * sqr(d);
+      d = log(c->mean_sqr);
+      if (p->power_boot_done >= p->power_boot_len) {
+        if (d - c->log_mean_sqrs[p->log_mean_sqrs_ptr] < p->trigger_rise)
+          c->trigger_done = 0;
+        else triggered |= ++c->trigger_done == p->trigger_len;
+      }
+      c->log_mean_sqrs[p->log_mean_sqrs_ptr] = d;
+    }
+    if (p->buffer_ptr == p->buffer_len)
+      p->buffer_ptr = 0;
+    if (++p->log_mean_sqrs_ptr == p->log_mean_sqrs_len)
+      ++p->power_boot_done, p->log_mean_sqrs_ptr = 0;
+  }
+  if (triggered) {
+    size_t ilen1 = *ilen - idone;
+    (effp->handler.flow = flow_flush)(effp, ibuf, obuf, &ilen1, olen);
+    idone += ilen1;
+  }
+  else *olen = 0;
+  *ilen = idone;
+  return SOX_SUCCESS;
+}
+
+static int drain(sox_effect_t * effp, sox_sample_t * obuf, size_t * olen)
+{
+  size_t ilen = 0;
+  return effp->handler.flow(effp, NULL, obuf, &ilen, olen);
+}
+
+static int stop(sox_effect_t * effp)
+{
+  priv_t * p = (priv_t *)effp->priv;
+  size_t i;
+
+  free(p->buffer);
+  for (i = 0; i < effp->in_signal.channels; ++i)
+    free(p->channels[i].log_mean_sqrs);
+  free(p->channels);
+  return SOX_SUCCESS;
+}
+
+sox_effect_handler_t const * lsx_vad_effect_fn(void)
+{
+  static sox_effect_handler_t handler = {"vad", "[options]"
+    "\n\t-c power-time-constant (0.025 s)"
+    "\n\t-d trigger-rise-time   (0.1 s)"
+    "\n\t-r trigger-rise        (20 dB)"
+    "\n\t-u trigger-up-time     (0.01 s)"
+    "\n\t-p pre-trigger-buffer  (0.05 s)"
+    , SOX_EFF_MCHAN | SOX_EFF_LENGTH | SOX_EFF_MODIFY,
+    create, start, flow_trigger, drain, stop, NULL, sizeof(priv_t)
+  };
+  return &handler;
+}
--- a/src/xmalloc.h
+++ b/src/xmalloc.h
@@ -26,6 +26,7 @@
 void * lsx_realloc(void * ptr, size_t newsize);
 #define lsx_malloc(size) lsx_realloc(NULL, (size))
 #define lsx_calloc(n,s) ((n)*(s)? memset(lsx_malloc((n)*(s)),0,(n)*(s)) : NULL)
+#define lsx_Calloc(v,n)  v = lsx_calloc(n,sizeof(*(v)))
 #define lsx_strdup(p) ((p)? strcpy((char *)lsx_malloc(strlen(p) + 1), p) : NULL)
 #define lsx_memdup(p,s) ((p)? memcpy(lsx_malloc(s), p, s) : NULL)
 #define lsx_valloc(v,n)  v = lsx_malloc((n)*sizeof(*(v)))