shithub: sox

Download patch

ref: aa3dc68a1719f6115ccacfaab145eca19da38ece
parent: cbbb79d8f0981aa9b2585f4f1f2c64356af50afb
author: Ulrich Klauer <ulrich@chirlu.de>
date: Fri Jan 20 20:34:03 EST 2012

Rewritten trim effect

From-scratch rewrite of the trim effect. Any number of parameters may
be given, and the effect will alternate between discarding and copying
audio at each position. Time values may also be specified relative to
the end of audio, if audio length is known.

The old "trim" syntax is a special case of the extended syntax (only
one or two parameters allowed, first must not use "=", no positions
relative to end-of-audio), so backwards compatibility is maintained.

This also resolves feature request 2941349.

--- a/ChangeLog
+++ b/ChangeLog
@@ -84,6 +84,8 @@
   o Calculate output audio length for most effects. (Ulrich Klauer)
   o Fix problems with several effects when the buffer size was not evenly
     divisible by the number of channels. [3420899] (Ulrich Klauer)
+  o Complete rewrite of the trim effect with extended syntax (backwards
+    compatible) and capabilities. [FR 2941349] (Ulrich Klauer)
 
 Misc:
 
--- a/FEATURES.in
+++ b/FEATURES.in
@@ -93,7 +93,7 @@
 ** pad: Pad (usually) the ends of the audio with silence
 ** silence: Remove portions of silence from the audio
 ** splice: Perform the equivalent of a cross-faded tape splice
-** trim: Trim the ends of the audio
+** trim: Cuts portions out of the audio
 ** vad: Voice activity detector
 
 * Mixing effects
--- a/sox.1
+++ b/sox.1
@@ -3945,24 +3945,39 @@
 .I depth
 (default 40).
 .TP
-\fBtrim \fIstart\fR [\fIlength\fR\^|\^\fB=\fIend\fR]
-Trim can trim off unwanted audio from the beginning and end of the
-audio.  Audio is not sent to the output stream until
-the \fIstart\fR location is reached.
+\fBtrim\fR {[\fB=\fR\^|\^\fB\-\fR]\fIposition\fR}
+Cuts portions out of the audio.  Any number of \fIposition\fRs may be
+given; audio is not sent to the output until the first \fIposition\fR
+is reached.  The effect then alternates between copying and discarding
+audio at each \fIposition\fR.
 .SP
-The optional \fIlength\fR parameter gives the length of audio to output
-after the \fIstart\fR sample and is thus used to trim off the end of the
-audio.  Alternatively, an absolute end location can be given by
-preceding it with an equals sign.  Using a value of 0 for the \fIstart\fR
-parameter will allow trimming off the end only.
+If a \fIposition\fR is preceded by an equals or minus sign, it is
+interpreted relative to the beginning or the end of the audio,
+respectively.  (The audio length must be known for end-relative
+locations to work.)  Otherwise, it is considered an offset from the
+last \fIposition\fR, or from the start of audio for the first
+parameter.  Using a value of 0 for the first \fIposition\fR
+parameter allows copying from the beginning of the audio.
 .SP
-Both parameters can be specified using either an amount of time or an
+All parameters can be specified using either an amount of time or an
 exact count of samples.  The format for specifying lengths in time is
-hh:mm:ss.frac.  A start value of 1:30\*d5 will not start until 1 minute,
-thirty and \(12 seconds into the audio.  The format for specifying
-sample counts is the number of samples with the letter `s' appended to
-it.  A value of 8000s for the \fIstart\fR parameter will wait until
+hh:mm:ss.frac.  A value of 1:30\*d5 for the first parameter will not
+start until 1 minute, thirty and \(12 seconds into the audio.  The format
+for specifying sample counts is the number of samples with the letter `s'
+appended to it.  A value of 8000s for the first parameter will wait until
 8000 samples are read before starting to process audio.
+.SP
+For example,
+.EX
+   sox infile outfile trim 0 10
+.EE
+will copy the first ten seconds, while
+.EX
+   play infile trim 12:34 =15:00 -2:00
+.EE
+will play from 12 minutes 34 seconds into the audio up to 15 minutes into
+the audio (i.e. 2 minutes and 26 seconds long), then resume playing two
+minutes before the end of audio.
 .TP
 \fBupsample\fR [\fIfactor\fR]
 Upsample the signal by an integer factor: \fIfactor\fR\-1 zero-value
--- a/src/trim.c
+++ b/src/trim.c
@@ -1,221 +1,230 @@
-/* July 5, 1991
- * Copyright 1991 Lance Norskog And Sundry Contributors
- * This source code is freely redistributable and may be used for
- * any purpose.  This copyright notice must be maintained.
- * Lance Norskog And Sundry Contributors are not responsible for
- * the consequences of using this software.
+/* libSoX effect: trim - cut portions out of the audio
+ *
+ * First version written 01/2012 by Ulrich Klauer.
+ * Replaces an older trim effect originally written by Curt Zirzow in 2000.
+ *
+ * Copyright 2012 Chris Bagwell and SoX Contributors
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #include "sox_i.h"
-#include <string.h>
 
 typedef struct {
-    /* options here */
-    char *start_str;
-    char *end_str;
-    sox_bool end_is_absolute;
-
-    /* options converted to values */
-    uint64_t start;
-    uint64_t length;
-
-    /* internal stuff */
-    uint64_t index;
-    uint64_t trimmed;
+  /* parameters */
+  unsigned int num_pos;
+  struct {
+    uint64_t sample; /* NB: wide samples */
+    char *argstr;
+    enum {
+      a_start, a_latest, a_end
+    } anchor;
+  } *pos;
+  /* state */
+  unsigned int current_pos;
+  uint64_t samples_read; /* NB: wide samples */
+  sox_bool copying;
+  sox_bool uses_end;
 } priv_t;
 
-/*
- * Process options
- */
-static int sox_trim_getopts(sox_effect_t * effp, int argc, char **argv)
+static int parse(sox_effect_t *effp, int argc, char **argv)
 {
-    char *end;
-    priv_t * trim = (priv_t *) effp->priv;
-    uint64_t samples;
-    const char *n;
+  priv_t *p = (priv_t*) effp->priv;
+  unsigned int i;
   --argc, ++argv;
-
-    /* Do not know sample rate yet so hold off on completely parsing
-     * time related strings.
-     */
-    switch (argc) {
-        case 2:
-            end = argv[1];
-            if (*end == '=') {
-                trim->end_is_absolute = sox_true;
-                end++;
-            } else trim->end_is_absolute = sox_false;
-            trim->end_str = lsx_strdup(end);
-            /* Do a dummy parse to see if it will fail */
-            n = lsx_parsesamples(0., trim->end_str, &samples, 't');
-            if (!n || *n)
-              return lsx_usage(effp);
-            trim->length = samples;
-        case 1:
-            trim->start_str = lsx_strdup(argv[0]);
-            /* Do a dummy parse to see if it will fail */
-            n = lsx_parsesamples(0., trim->start_str, &samples, 't');
-            if (!n || *n)
-              return lsx_usage(effp);
-            trim->start = samples;
-            break;
-        default:
-            return lsx_usage(effp);
-
-    }
-    return (SOX_SUCCESS);
+  p->num_pos = argc;
+  lsx_Calloc(p->pos, p->num_pos);
+  p->uses_end = sox_false;
+  for (i = 0; i < p->num_pos; i++) {
+    uint64_t dummy;
+    const char *arg = argv[i];
+    if (arg[0] == '=') {
+      p->pos[i].anchor = a_start;
+      arg++;
+    } else if (arg[0] == '-') {
+      p->pos[i].anchor = a_end;
+      p->uses_end = sox_true;
+      arg++;
+    } else
+      p->pos[i].anchor = a_latest;
+    p->pos[i].argstr = lsx_strdup(arg);
+    /* dummy parse to check for syntax errors */
+    arg = lsx_parsesamples(0., arg, &dummy, 't');
+    if (!arg || *arg)
+      return lsx_usage(effp);
+  }
+  return SOX_SUCCESS;
 }
 
-/*
- * Start processing
- */
-static int sox_trim_start(sox_effect_t * effp)
+static int start(sox_effect_t *effp)
 {
-    priv_t * trim = (priv_t *) effp->priv;
-    uint64_t samples;
+  priv_t *p = (priv_t*) effp->priv;
+  uint64_t in_length = effp->in_signal.length != SOX_UNKNOWN_LEN ?
+    effp->in_signal.length / effp->in_signal.channels : SOX_UNKNOWN_LEN;
+  uint64_t last_seen = 0;
+  sox_bool open_end;
+  unsigned int i;
 
-    if (lsx_parsesamples(effp->in_signal.rate, trim->start_str,
-                        &samples, 't') == NULL)
-      return lsx_usage(effp);
-    trim->start = samples;
+  p->copying = sox_false;
 
-    if (trim->end_str)
-    {
-        if (lsx_parsesamples(effp->in_signal.rate, trim->end_str,
-                    &samples, 't') == NULL)
-          return lsx_usage(effp);
-        trim->length = samples;
-        if (trim->end_is_absolute) {
-            if (trim->length < trim->start) {
-                lsx_warn("end earlier than start");
-                trim->length = 0;
-                  /* with trim->end_str != NULL, this really means zero */
-            } else
-                trim->length -= trim->start;
+  /* calculate absolute positions */
+  if (in_length == SOX_UNKNOWN_LEN && p->uses_end) {
+    lsx_fail("Can't use positions relative to end: audio length is unknown.");
+    return SOX_EOF;
+  }
+  for (i = 0; i < p->num_pos; i++) {
+    uint64_t s, res = 0;
+    if (!lsx_parsesamples(effp->in_signal.rate, p->pos[i].argstr, &s, 't'))
+      return lsx_usage(effp);
+    switch (p->pos[i].anchor) {
+      case a_start: res = s; break;
+      case a_latest: res = last_seen + s; break;
+      case a_end:
+        if (s > in_length) {
+          lsx_fail("Position %u is before start of audio.", i+1);
+          return SOX_EOF;
         }
+        res = in_length - s;
+        break;
     }
-    else
-        trim->length = 0;
-          /* with trim->end_str == NULL, this means indefinite length */
+    last_seen = p->pos[i].sample = res;
+    lsx_debug_more("position %u at %" PRIu64, i+1, res);
+  }
 
-    lsx_debug("start at %" PRIu64 ", length %" PRIu64, trim->start, trim->length);
-
-    /* Account for # of channels */
-    trim->start *= effp->in_signal.channels;
-    trim->length *= effp->in_signal.channels;
-
-    trim->index = 0;
-    trim->trimmed = 0;
-
-    if (effp->in_signal.length != SOX_UNKNOWN_LEN) {
-      if (trim->start >= effp->in_signal.length) {
-        lsx_fail("start position after end of file");
-        return SOX_EOF;
-      } else if (trim->start + trim->length >= effp->in_signal.length) {
-        lsx_fail("end position after end of file");
-        return SOX_EOF;
-      }
+  /* sanity checks */
+  last_seen = 0;
+  for (i = 0; i < p->num_pos; i++) {
+    if (p->pos[i].sample < last_seen) {
+      lsx_fail("Position %u is behind the following position.", i);
+      return SOX_EOF;
     }
+    last_seen = p->pos[i].sample;
+  }
+  if (p->num_pos && in_length != SOX_UNKNOWN_LEN &&
+      p->pos[0].sample > in_length) {
+    lsx_fail("Start position after end of audio.");
+    return SOX_EOF;
+  }
+  if (p->num_pos && in_length != SOX_UNKNOWN_LEN &&
+      p->pos[p->num_pos-1].sample > in_length) {
+    lsx_fail("End position after end of audio.");
+    return SOX_EOF;
+  }
 
-    if (trim->end_str)
-      effp->out_signal.length = trim->length;
-    else if (effp->in_signal.length != SOX_UNKNOWN_LEN)
-      effp->out_signal.length = effp->in_signal.length - trim->start;
-    else
-      effp->out_signal.length = SOX_UNKNOWN_LEN;
+  if (!p->num_pos || (p->num_pos == 1 && !p->pos[0].sample))
+    return SOX_EFF_NULL;
 
-    return (SOX_SUCCESS);
+  /* calculate output length */
+  open_end = p->num_pos % 2;
+  if (open_end && in_length == SOX_UNKNOWN_LEN)
+    effp->out_signal.length = SOX_UNKNOWN_LEN;
+  else {
+    effp->out_signal.length = 0;
+    for (i = 0; i+1 < p->num_pos ; i += 2)
+      effp->out_signal.length +=
+        p->pos[i+1].sample - p->pos[i].sample;
+    if (open_end)
+      effp->out_signal.length +=
+        in_length - p->pos[p->num_pos-1].sample;
+    effp->out_signal.length *= effp->in_signal.channels;
+  }
+
+  return SOX_SUCCESS;
 }
 
-/*
- * Read up to len samples from file.
- * Convert to signed longs.
- * Place in buf[].
- * Return number of samples read.
- */
-static int sox_trim_flow(sox_effect_t * effp, const sox_sample_t *ibuf, sox_sample_t *obuf,
-                 size_t *isamp, size_t *osamp)
+static int flow(sox_effect_t *effp, const sox_sample_t *ibuf,
+    sox_sample_t *obuf, size_t *isamp, size_t *osamp)
 {
-    int result = SOX_SUCCESS;
-    int start_trim = 0;
-    int offset = 0;
-    int done;
+  priv_t *p = (priv_t*) effp->priv;
+  size_t len = min(*isamp, *osamp);
+  size_t channels = effp->in_signal.channels;
+  len /= channels;
+  *isamp = *osamp = 0;
 
-    priv_t * trim = (priv_t *) effp->priv;
+  while (len) {
+    size_t chunk;
 
-    /* Compute the most samples we can process this time */
-    done = ((*isamp < *osamp) ? *isamp : *osamp);
+    if (p->current_pos < p->num_pos &&
+        p->samples_read == p->pos[p->current_pos].sample) {
+      p->copying = !p->copying;
+      p->current_pos++;
+      if (p->current_pos >= p->num_pos && !p->copying)
+        return SOX_EOF;
+    }
 
-    /* Quick check to see if we are trimming off the back side yet.
-     * If so then we can skip trimming from the front side.
-     */
-    if (!trim->trimmed) {
-        if ((trim->index+done) <= trim->start) {
-            /* If we haven't read more than "start" samples, return that
-             * we've read all this buffer without outputing anything
-             */
-            *osamp = 0;
-            *isamp = done;
-            trim->index += done;
-            return (SOX_SUCCESS);
-        } else {
-            start_trim = 1;
-            /* We've read at least "start" samples.  Now find
-             * out where our target data begins and subtract that
-             * from the total to be copied this round.
-             */
-            offset = trim->start - trim->index;
-            done -= offset;
-        }
-    } /* !trimmed */
-
-    if (trim->trimmed || start_trim) {
-        if (trim->end_str && ((trim->trimmed+done) >= trim->length)) {
-            /* Since we know the end is in this block, we set done
-             * to the desired length less the amount already read.
-             */
-            done = trim->length - trim->trimmed;
-            result = SOX_EOF;
-        }
-
-        trim->trimmed += done;
+    chunk = p->current_pos < p->num_pos ?
+      min(len, p->pos[p->current_pos].sample - p->samples_read) : len;
+    if (p->copying) {
+      memcpy(obuf, ibuf, chunk * channels * sizeof(*obuf));
+      obuf += chunk * channels, *osamp += chunk * channels;
     }
-    memcpy(obuf, ibuf+offset, done * sizeof(*obuf));
-    *osamp = done;
-    *isamp = offset + done;
-    trim->index += done;
+    ibuf += chunk * channels; *isamp += chunk * channels;
+    p->samples_read += chunk, len -= chunk;
+  }
 
-    return result;
+  return SOX_SUCCESS;
 }
 
-static int lsx_kill(sox_effect_t * effp)
+static int drain(sox_effect_t *effp, sox_sample_t *obuf UNUSED, size_t *osamp)
 {
-    priv_t * trim = (priv_t *) effp->priv;
+  priv_t *p = (priv_t*) effp->priv;
+  *osamp = 0;
+  if (p->current_pos < p->num_pos)
+    lsx_warn("Audio shorter than expected; last %u positions not reached.",
+      p->num_pos - p->current_pos);
+  return SOX_EOF;
+}
 
-    free(trim->start_str);
-    free(trim->end_str);
-
-    return (SOX_SUCCESS);
+static int lsx_kill(sox_effect_t *effp)
+{
+  unsigned int i;
+  priv_t *p = (priv_t*) effp->priv;
+  for (i = 0; i < p->num_pos; i++)
+    free(p->pos[i].argstr);
+  free(p->pos);
+  return SOX_SUCCESS;
 }
 
-sox_uint64_t sox_trim_get_start(sox_effect_t * effp)
+sox_effect_handler_t const *lsx_trim_effect_fn(void)
 {
-    priv_t * trim = (priv_t *)effp->priv;
-    return trim->start;
+  static sox_effect_handler_t handler = {
+    "trim", "{[=|-]position}",
+    SOX_EFF_MCHAN | SOX_EFF_LENGTH | SOX_EFF_MODIFY,
+    parse, start, flow, drain, NULL, lsx_kill,
+    sizeof(priv_t)
+  };
+  return &handler;
 }
 
-void sox_trim_clear_start(sox_effect_t * effp)
+/* The following functions allow a libSoX client to do a speed
+ * optimization, by asking for the number of samples to be skipped
+ * at the beginning of the audio with sox_trim_get_start(), skipping
+ * that many samples in an efficient way such as seeking within the
+ * input file, then telling us it has been done by calling
+ * sox_trim_clear_start() (the name is historical).
+ * Note that sox_trim_get_start() returns the number of non-wide
+ * samples. */
+
+sox_uint64_t sox_trim_get_start(sox_effect_t *effp)
 {
-    priv_t * trim = (priv_t *)effp->priv;
-    trim->start = 0;
+    priv_t *p = (priv_t*) effp->priv;
+    return p->num_pos ? p->pos[0].sample * effp->in_signal.channels : 0;
 }
 
-const sox_effect_handler_t *lsx_trim_effect_fn(void)
+void sox_trim_clear_start(sox_effect_t *effp)
 {
-  static sox_effect_handler_t handler = {
-    "trim", "start [length|=end]", SOX_EFF_MCHAN | SOX_EFF_LENGTH | SOX_EFF_MODIFY,
-    sox_trim_getopts, sox_trim_start, sox_trim_flow,
-    NULL, NULL, lsx_kill, sizeof(priv_t)
-  };
-  return &handler;
+    priv_t *p = (priv_t*) effp->priv;
+    p->samples_read = p->num_pos ? p->pos[0].sample : 0;
 }