shithub: sox

Download patch

ref: 10f28719ce4e3efef7b237fa2f8a339c50a44b7b
parent: 48102ce56e922331446cc7a1c0833ecbb1e6532c
author: cbagwell <cbagwell>
date: Sun Mar 18 21:46:30 EDT 2007

Adding -l option to silence to leave in a minium amount of silence.
Useful for removing long pauses between words but not remove the
pause all together.

--- a/ChangeLog
+++ b/ChangeLog
@@ -20,6 +20,8 @@
   o Show (with --octave) compand transfer function.  (robs)
   o Allow e.g. "vol 6dB" (as well as "vol 6 dB").  (robs)
   o Changed deemph to be a true biquad for better accuracy.  (robs)
+  o Add option to silence effect to leave periods of silence
+    in and only strip out extra silence.   Mark Schreiber
 
   Other new features:
 
--- a/sox.1
+++ b/sox.1
@@ -1856,7 +1856,7 @@
 Reverse the audio completely.
 Requires disk space to store the data to be reversed.
 .TP
-\fBsilence \fIabove-periods\fR [\fIduration threshold\fR[\fBd\fR\^|\^\fB%\fR] [\fIbelow-periods duration threshold\fR[\fBd\fR\^|\^\fB%\fR]]
+\fBsilence [\fB\-l\fR] \fIabove-periods\fR [\fIduration threshold\fR[\fBd\fR\^|\^\fB%\fR] [\fIbelow-periods duration threshold\fR[\fBd\fR\^|\^\fB%\fR]]
 .SP
 Removes silence from the beginning, middle, or end of the audio.  Silence is anything below a specified threshold.
 .SP
@@ -1909,6 +1909,13 @@
 effect should restart processing as specified by the
 \fIabove-periods\fR, making it suitable for removing periods of
 silence in the middle of the audio.
+.SP
+The option
+.B \-l
+indicates that \fIbelow-periods\fR \fIduration\fR length of audio
+should be left intact at the beginning of each period of silence.
+For example, if you want to remove long pauses between words
+but do not want to remove the pauses completely.
 .SP
 The \fIperiod\fR counts are in units of samples.  \fIDuration\fR counts may be in the format of hh:mm:ss.frac, or the exact count of samples.  \fIThreshold\fR numbers may be suffixed with
 .B d
--- a/src/silence.c
+++ b/src/silence.c
@@ -61,6 +61,8 @@
     sox_size_t   window_size;
     double      rms_sum;
 
+    char        leave_silence;
+
     /* State Machine */
     char        mode;
 } *silence_t;
@@ -83,6 +85,16 @@
     silence_t   silence = (silence_t) effp->priv;
     int parse_count;
 
+    /* check for option switches */
+    silence->leave_silence = sox_false;
+    if (n > 0)
+    {
+        if (!strcmp("-l", *argv)) {
+            n--; argv++;
+            silence->leave_silence = sox_true;
+        }
+    }
+
     if (n < 1)
     {
         sox_fail(sox_silence_effect.usage);
@@ -457,21 +469,43 @@
             break;
 
         case SILENCE_COPY:
-            /* Attempts to copy samples into output buffer.  If not
-             * looking for silence to terminate copy then blindly
-             * copy data into output buffer.
+            /* Attempts to copy samples into output buffer.
              *
-             * If looking for silence, then see if input sample is above
-             * threshold.  If found then flush out hold off buffer
-             * and copy over to output buffer.  Tell user about
-             * input and output processing.
+             * Case B:
+             * If not looking for silence to terminate copy then
+             * blindly copy data into output buffer.
              *
-             * If not above threshold then store in hold off buffer
-             * and do not write to output buffer.  Tell user input
-             * was processed.
+             * Case A:
              *
-             * If hold off buffer is full then stop copying data and
-             * discard data in hold off buffer.
+             * Case 1a:
+             * If previous silence was detect then see if input sample is 
+             * above threshold.  If found then flush out hold off buffer
+             * and copy over to output buffer.  
+             *
+             * Case 1b:
+             * If no previous silence detect then see if input sample
+             * is above threshold.  If found then copy directly
+             * to output buffer.
+             *
+             * Case 2:
+             * If not above threshold then silence is detect so
+             * store in hold off buffer and do not write to output
+             * buffer.  Even though it wasn't put in output
+             * buffer, inform user that input was consumed.
+             *
+             * If hold off buffer is full after this then stop 
+             * copying data and discard data in hold off buffer.
+             *
+             * Special leave_silence logic:
+             *
+             * During this mode, go ahead and copy input
+             * samples to output buffer instead of holdoff buffer
+             * Then also short ciruit any flushes that would occur
+             * when non-silence is detect since samples were already
+             * copied.  This has the effect of always leaving
+             * holdoff[] amount of silence but deleting any
+             * beyond that amount.
+             *
              */
 silence_copy:
             nrOfTicks = min((*isamp-nrOfInSamplesRead), 
@@ -479,6 +513,7 @@
                            effp->ininfo.channels;
             if (silence->stop)
             {
+                /* Case A */
                 for(i = 0; i < nrOfTicks; i++)
                 {
                     threshold = 1;
@@ -490,16 +525,24 @@
                                                     silence->stop_unit);
                     }
 
-                    /* If above threshold, check to see if we where holding
+                    /* Case 1a
+                     * If above threshold, check to see if we where holding
                      * off previously.  If so then flush this buffer.
                      * We haven't incremented any pointers yet so nothing
                      * is lost.
+                     *
+                     * If user wants to leave_silence, then we
+                     * were already copying the data and so no
+                     * need to flush the old data.  Just resume
+                     * copying as if we were not holding off.
                      */
-                    if (threshold && silence->stop_holdoff_end)
+                    if (threshold && silence->stop_holdoff_end
+                        && !silence->leave_silence)
                     {
                         silence->mode = SILENCE_COPY_FLUSH;
                         goto silence_copy_flush;
                     }
+                    /* Case 1b */
                     else if (threshold)
                     {
                         /* Not holding off so copy into output buffer */
@@ -511,6 +554,7 @@
                             nrOfOutSamplesWritten++;
                         }
                     }
+                    /* Case 2 */
                     else if (!threshold)
                     {
                         /* Add to holdoff buffer */
@@ -517,6 +561,10 @@
                         for (j = 0; j < effp->ininfo.channels; j++)
                         {
                             update_rms(effp, *ibuf);
+                            if (silence->leave_silence) {
+                                *obuf++ = *ibuf;
+                                nrOfOutSamplesWritten++;
+                            }
                             silence->stop_holdoff[
                                 silence->stop_holdoff_end++] = *ibuf++;
                             nrOfInSamplesRead++;
@@ -570,6 +618,7 @@
             } /* Trimming off backend */
             else /* !(silence->stop) */
             {
+                /* Case B */
                 memcpy(obuf, ibuf, sizeof(sox_sample_t)*nrOfTicks*
                                    effp->ininfo.channels);
                 nrOfInSamplesRead += (nrOfTicks*effp->ininfo.channels);