ref: 8dcd32b1a08c95fd84a7c7b2f8811a40b568a8cd
dir: /src/silence.c/
/* Silence effect for SoX
 * by Heikki Leinonen (heilei@iki.fi) 25.03.2001
 * Major Modifications by Chris Bagwell 06.08.2001
 * Minor addition by Donnie Smith 13.08.2003
 *
 * This effect can delete samples from the start of a sound file
 * until it sees a specified count of samples exceed a given threshold
 * (any of the channels).
 * This effect can also delete samples from the end of a sound file
 * when it sees a specified count of samples below a given threshold
 * (all channels).
 * It may also be used to delete samples anywhere in a sound file.
 * Thesholds can be given as either a percentage or in decibels.
 */
#include "sox_i.h"
#include <string.h>
/* Private data for silence effect. */
#define SILENCE_TRIM        0
#define SILENCE_TRIM_FLUSH  1
#define SILENCE_COPY        2
#define SILENCE_COPY_FLUSH  3
#define SILENCE_STOP        4
typedef struct {
    char        start;
    int         start_periods;
    char        *start_duration_str;
    size_t   start_duration;
    double      start_threshold;
    char        start_unit; /* "d" for decibels or "%" for percent. */
    int         restart;
    sox_sample_t *start_holdoff;
    size_t   start_holdoff_offset;
    size_t   start_holdoff_end;
    int         start_found_periods;
    char        stop;
    int         stop_periods;
    char        *stop_duration_str;
    size_t   stop_duration;
    double      stop_threshold;
    char        stop_unit;
    sox_sample_t *stop_holdoff;
    size_t   stop_holdoff_offset;
    size_t   stop_holdoff_end;
    int         stop_found_periods;
    double      *window;
    double      *window_current;
    double      *window_end;
    size_t   window_size;
    double      rms_sum;
    char        leave_silence;
    /* State Machine */
    char        mode;
} priv_t;
static void clear_rms(sox_effect_t * effp)
{
    priv_t * silence = (priv_t *) effp->priv;
    memset(silence->window, 0,
           silence->window_size * sizeof(double));
    silence->window_current = silence->window;
    silence->window_end = silence->window + silence->window_size;
    silence->rms_sum = 0;
}
static int sox_silence_getopts(sox_effect_t * effp, int argc, char **argv)
{
    priv_t *   silence = (priv_t *) effp->priv;
    int parse_count;
    uint64_t temp;
    const char *n;
  --argc, ++argv;
    /* check for option switches */
    silence->leave_silence = sox_false;
    if (argc > 0)
    {
        if (!strcmp("-l", *argv)) {
            argc--; argv++;
            silence->leave_silence = sox_true;
        }
    }
    if (argc < 1)
      return lsx_usage(effp);
    /* Parse data related to trimming front side */
    silence->start = sox_false;
    if (sscanf(argv[0], "%d", &silence->start_periods) != 1)
      return lsx_usage(effp);
    if (silence->start_periods < 0)
    {
        lsx_fail("Periods must not be negative");
        return(SOX_EOF);
    }
    argv++;
    argc--;
    if (silence->start_periods > 0)
    {
        silence->start = sox_true;
        if (argc < 2)
          return lsx_usage(effp);
        /* We do not know the sample rate so we can not fully
         * parse the duration info yet.  So save argument off
         * for future processing.
         */
        silence->start_duration_str = lsx_strdup(argv[0]);
        /* Perform a fake parse to do error checking */
        n = lsx_parsesamples(0.,silence->start_duration_str,&temp,'s');
        if (!n || *n)
          return lsx_usage(effp);
        silence->start_duration = temp;
        parse_count = sscanf(argv[1], "%lf%c", &silence->start_threshold,
                &silence->start_unit);
        if (parse_count < 1)
          return lsx_usage(effp);
        else if (parse_count < 2)
            silence->start_unit = '%';
        argv++; argv++;
        argc--; argc--;
    }
    silence->stop = sox_false;
    /* Parse data needed for trimming of backside */
    if (argc > 0)
    {
        if (argc < 3)
          return lsx_usage(effp);
        if (sscanf(argv[0], "%d", &silence->stop_periods) != 1)
          return lsx_usage(effp);
        if (silence->stop_periods < 0)
        {
            silence->stop_periods = -silence->stop_periods;
            silence->restart = 1;
        }
        else
            silence->restart = 0;
        silence->stop = sox_true;
        argv++;
        argc--;
        /* We do not know the sample rate so we can not fully
         * parse the duration info yet.  So save argument off
         * for future processing.
         */
        silence->stop_duration_str = lsx_strdup(argv[0]);
        /* Perform a fake parse to do error checking */
        n = lsx_parsesamples(0.,silence->stop_duration_str,&temp,'s');
        if (!n || *n)
          return lsx_usage(effp);
        silence->stop_duration = temp;
        parse_count = sscanf(argv[1], "%lf%c", &silence->stop_threshold,
                             &silence->stop_unit);
        if (parse_count < 1)
          return lsx_usage(effp);
        else if (parse_count < 2)
            silence->stop_unit = '%';
        argv++; argv++;
        argc--; argc--;
    }
    /* Error checking */
    if (silence->start)
    {
        if ((silence->start_unit != '%') && (silence->start_unit != 'd'))
        {
            lsx_fail("Invalid unit specified");
            return lsx_usage(effp);
        }
        if ((silence->start_unit == '%') && ((silence->start_threshold < 0.0)
            || (silence->start_threshold > 100.0)))
        {
            lsx_fail("silence threshold should be between 0.0 and 100.0 %%");
            return (SOX_EOF);
        }
        if ((silence->start_unit == 'd') && (silence->start_threshold >= 0.0))
        {
            lsx_fail("silence threshold should be less than 0.0 dB");
            return(SOX_EOF);
        }
    }
    if (silence->stop)
    {
        if ((silence->stop_unit != '%') && (silence->stop_unit != 'd'))
        {
            lsx_fail("Invalid unit specified");
            return(SOX_EOF);
        }
        if ((silence->stop_unit == '%') && ((silence->stop_threshold < 0.0) ||
                    (silence->stop_threshold > 100.0)))
        {
            lsx_fail("silence threshold should be between 0.0 and 100.0 %%");
            return (SOX_EOF);
        }
        if ((silence->stop_unit == 'd') && (silence->stop_threshold >= 0.0))
        {
            lsx_fail("silence threshold should be less than 0.0 dB");
            return(SOX_EOF);
        }
    }
    return(SOX_SUCCESS);
}
static int sox_silence_start(sox_effect_t * effp)
{
    priv_t *silence = (priv_t *)effp->priv;
    uint64_t temp;
    /* When you want to remove silence, small window sizes are
     * better or else RMS will look like non-silence at
     * aburpt changes from load to silence.
     */
    silence->window_size = (effp->in_signal.rate / 50) * 
        effp->in_signal.channels;
    silence->window = lsx_malloc(silence->window_size * sizeof(double));
    clear_rms(effp);
    /* Now that we know sample rate, reparse duration. */
    if (silence->start)
    {
        if (lsx_parsesamples(effp->in_signal.rate, silence->start_duration_str,
                             &temp, 's') == NULL)
            return lsx_usage(effp);
        silence->start_duration = temp * effp->in_signal.channels;
    }
    if (silence->stop)
    {
        if (lsx_parsesamples(effp->in_signal.rate,silence->stop_duration_str,
                             &temp,'s') == NULL)
            return lsx_usage(effp);
        silence->stop_duration = temp * effp->in_signal.channels;
    }
    if (silence->start)
        silence->mode = SILENCE_TRIM;
    else
        silence->mode = SILENCE_COPY;
    silence->start_holdoff = lsx_malloc(sizeof(sox_sample_t)*silence->start_duration);
    silence->start_holdoff_offset = 0;
    silence->start_holdoff_end = 0;
    silence->start_found_periods = 0;
    silence->stop_holdoff = lsx_malloc(sizeof(sox_sample_t)*silence->stop_duration);
    silence->stop_holdoff_offset = 0;
    silence->stop_holdoff_end = 0;
    silence->stop_found_periods = 0;
    effp->out_signal.length = SOX_UNKNOWN_LEN; /* depends on input data */
    return(SOX_SUCCESS);
}
static sox_bool aboveThreshold(sox_effect_t const * effp,
    sox_sample_t value /* >= 0 */, double threshold, int unit)
{
  /* When scaling low bit data, noise values got scaled way up */
  /* Only consider the original bits when looking for silence */
  sox_sample_t masked_value = value & (-1 << (32 - effp->in_signal.precision));
  double scaled_value = (double)masked_value / SOX_SAMPLE_MAX;
  if (unit == '%')
    scaled_value *= 100;
  else if (unit == 'd')
    scaled_value = linear_to_dB(scaled_value);
  return scaled_value > threshold;
}
static sox_sample_t compute_rms(sox_effect_t * effp, sox_sample_t sample)
{
    priv_t * silence = (priv_t *) effp->priv;
    double new_sum;
    sox_sample_t rms;
    new_sum = silence->rms_sum;
    new_sum -= *silence->window_current;
    new_sum += ((double)sample * (double)sample);
    rms = sqrt(new_sum / silence->window_size);
    return (rms);
}
static void update_rms(sox_effect_t * effp, sox_sample_t sample)
{
    priv_t * silence = (priv_t *) effp->priv;
    silence->rms_sum -= *silence->window_current;
    *silence->window_current = ((double)sample * (double)sample);
    silence->rms_sum += *silence->window_current;
    silence->window_current++;
    if (silence->window_current >= silence->window_end)
        silence->window_current = silence->window;
}
/* Process signed long samples from ibuf to obuf. */
/* Return number of samples processed in isamp and osamp. */
static int sox_silence_flow(sox_effect_t * effp, const sox_sample_t *ibuf, sox_sample_t *obuf,
                    size_t *isamp, size_t *osamp)
{
    priv_t * silence = (priv_t *) effp->priv;
    int threshold;
    size_t i, j;
    size_t nrOfTicks, /* sometimes wide, sometimes non-wide samples */
      nrOfInSamplesRead, nrOfOutSamplesWritten; /* non-wide samples */
    nrOfInSamplesRead = 0;
    nrOfOutSamplesWritten = 0;
    switch (silence->mode)
    {
        case SILENCE_TRIM:
            /* Reads and discards all input data until it detects a
             * sample that is above the specified threshold.  Turns on
             * copy mode when detected.
             * Need to make sure and copy input in groups of "channels" to
             * prevent getting buffers out of sync.
             * nrOfTicks counts wide samples here.
             */
silence_trim:
            nrOfTicks = min((*isamp-nrOfInSamplesRead),
                            (*osamp-nrOfOutSamplesWritten)) /
                           effp->in_signal.channels;
            for(i = 0; i < nrOfTicks; i++)
            {
                threshold = 0;
                for (j = 0; j < effp->in_signal.channels; j++)
                {
                    threshold |= aboveThreshold(effp,
                                                compute_rms(effp, ibuf[j]),
                                                silence->start_threshold,
                                                silence->start_unit);
                }
                if (threshold)
                {
                    /* Add to holdoff buffer */
                    for (j = 0; j < effp->in_signal.channels; j++)
                    {
                        update_rms(effp, *ibuf);
                        silence->start_holdoff[
                            silence->start_holdoff_end++] = *ibuf++;
                        nrOfInSamplesRead++;
                    }
                    if (silence->start_holdoff_end >=
                            silence->start_duration)
                    {
                        if (++silence->start_found_periods >=
                                silence->start_periods)
                        {
                            silence->mode = SILENCE_TRIM_FLUSH;
                            goto silence_trim_flush;
                        }
                        /* Trash holdoff buffer since its not
                         * needed.  Start looking again.
                         */
                        silence->start_holdoff_offset = 0;
                        silence->start_holdoff_end = 0;
                    }
                }
                else /* !above Threshold */
                {
                    silence->start_holdoff_end = 0;
                    for (j = 0; j < effp->in_signal.channels; j++)
                    {
                        update_rms(effp, ibuf[j]);
                    }
                    ibuf += effp->in_signal.channels;
                    nrOfInSamplesRead += effp->in_signal.channels;
                }
            } /* for nrOfTicks */
            break;
        case SILENCE_TRIM_FLUSH:
             /* nrOfTicks counts non-wide samples here. */
silence_trim_flush:
            nrOfTicks = min((silence->start_holdoff_end -
                             silence->start_holdoff_offset),
                             (*osamp-nrOfOutSamplesWritten));
            nrOfTicks -= nrOfTicks % effp->in_signal.channels;
            for(i = 0; i < nrOfTicks; i++)
            {
                *obuf++ = silence->start_holdoff[silence->start_holdoff_offset++];
                nrOfOutSamplesWritten++;
            }
            /* If fully drained holdoff then switch to copy mode */
            if (silence->start_holdoff_offset == silence->start_holdoff_end)
            {
                silence->start_holdoff_offset = 0;
                silence->start_holdoff_end = 0;
                silence->mode = SILENCE_COPY;
                goto silence_copy;
            }
            break;
        case SILENCE_COPY:
            /* Attempts to copy samples into output buffer.
             *
             * Case B:
             * If not looking for silence to terminate copy then
             * blindly copy data into output buffer.
             *
             * Case A:
             *
             * Case 1a:
             * If previous silence was detect then see if input sample is
             * above threshold.  If found then flush out hold off buffer
             * and copy over to output buffer.
             *
             * Case 1b:
             * If no previous silence detect then see if input sample
             * is above threshold.  If found then copy directly
             * to output buffer.
             *
             * Case 2:
             * If not above threshold then silence is detect so
             * store in hold off buffer and do not write to output
             * buffer.  Even though it wasn't put in output
             * buffer, inform user that input was consumed.
             *
             * If hold off buffer is full after this then stop
             * copying data and discard data in hold off buffer.
             *
             * Special leave_silence logic:
             *
             * During this mode, go ahead and copy input
             * samples to output buffer instead of holdoff buffer
             * Then also short ciruit any flushes that would occur
             * when non-silence is detect since samples were already
             * copied.  This has the effect of always leaving
             * holdoff[] amount of silence but deleting any
             * beyond that amount.
             *
             * nrOfTicks counts wide samples here.
             */
silence_copy:
            nrOfTicks = min((*isamp-nrOfInSamplesRead),
                            (*osamp-nrOfOutSamplesWritten)) /
                           effp->in_signal.channels;
            if (silence->stop)
            {
                /* Case A */
                for(i = 0; i < nrOfTicks; i++)
                {
                    threshold = 1;
                    for (j = 0; j < effp->in_signal.channels; j++)
                    {
                        threshold &= aboveThreshold(effp,
                                                    compute_rms(effp, ibuf[j]),
                                                    silence->stop_threshold,
                                                    silence->stop_unit);
                    }
                    /* Case 1a
                     * If above threshold, check to see if we where holding
                     * off previously.  If so then flush this buffer.
                     * We haven't incremented any pointers yet so nothing
                     * is lost.
                     *
                     * If user wants to leave_silence, then we
                     * were already copying the data and so no
                     * need to flush the old data.  Just resume
                     * copying as if we were not holding off.
                     */
                    if (threshold && silence->stop_holdoff_end
                        && !silence->leave_silence)
                    {
                        silence->mode = SILENCE_COPY_FLUSH;
                        goto silence_copy_flush;
                    }
                    /* Case 1b */
                    else if (threshold)
                    {
                        /* Not holding off so copy into output buffer */
                        for (j = 0; j < effp->in_signal.channels; j++)
                        {
                            update_rms(effp, *ibuf);
                            *obuf++ = *ibuf++;
                            nrOfInSamplesRead++;
                            nrOfOutSamplesWritten++;
                        }
                    }
                    /* Case 2 */
                    else if (!threshold)
                    {
                        /* Add to holdoff buffer */
                        for (j = 0; j < effp->in_signal.channels; j++)
                        {
                            update_rms(effp, *ibuf);
                            if (silence->leave_silence) {
                                *obuf++ = *ibuf;
                                nrOfOutSamplesWritten++;
                            }
                            silence->stop_holdoff[
                                silence->stop_holdoff_end++] = *ibuf++;
                            nrOfInSamplesRead++;
                        }
                        /* Check if holdoff buffer is greater than duration
                         */
                        if (silence->stop_holdoff_end >=
                                silence->stop_duration)
                        {
                            /* Increment found counter and see if this
                             * is the last period.  If so then exit.
                             */
                            if (++silence->stop_found_periods >=
                                    silence->stop_periods)
                            {
                                silence->stop_holdoff_offset = 0;
                                silence->stop_holdoff_end = 0;
                                if (!silence->restart)
                                {
                                    *isamp = nrOfInSamplesRead;
                                    *osamp = nrOfOutSamplesWritten;
                                    silence->mode = SILENCE_STOP;
                                    /* Return SOX_EOF since no more processing */
                                    return (SOX_EOF);
                                }
                                else
                                {
                                    silence->stop_found_periods = 0;
                                    silence->start_found_periods = 0;
                                    silence->start_holdoff_offset = 0;
                                    silence->start_holdoff_end = 0;
                                    clear_rms(effp);
                                    silence->mode = SILENCE_TRIM;
                                    goto silence_trim;
                                }
                            }
                            else
                            {
                                /* Flush this buffer and start
                                 * looking again.
                                 */
                                silence->mode = SILENCE_COPY_FLUSH;
                                goto silence_copy_flush;
                            }
                            break;
                        } /* Filled holdoff buffer */
                    } /* Detected silence */
                } /* For # of samples */
            } /* Trimming off backend */
            else /* !(silence->stop) */
            {
                /* Case B */
                memcpy(obuf, ibuf, sizeof(sox_sample_t)*nrOfTicks*
                                   effp->in_signal.channels);
                nrOfInSamplesRead += (nrOfTicks*effp->in_signal.channels);
                nrOfOutSamplesWritten += (nrOfTicks*effp->in_signal.channels);
            }
            break;
        case SILENCE_COPY_FLUSH:
             /* nrOfTicks counts non-wide samples here. */
silence_copy_flush:
            nrOfTicks = min((silence->stop_holdoff_end -
                                silence->stop_holdoff_offset),
                            (*osamp-nrOfOutSamplesWritten));
            nrOfTicks -= nrOfTicks % effp->in_signal.channels;
            for(i = 0; i < nrOfTicks; i++)
            {
                *obuf++ = silence->stop_holdoff[silence->stop_holdoff_offset++];
                nrOfOutSamplesWritten++;
            }
            /* If fully drained holdoff then return to copy mode */
            if (silence->stop_holdoff_offset == silence->stop_holdoff_end)
            {
                silence->stop_holdoff_offset = 0;
                silence->stop_holdoff_end = 0;
                silence->mode = SILENCE_COPY;
                goto silence_copy;
            }
            break;
        case SILENCE_STOP:
            /* This code can't be reached. */
            nrOfInSamplesRead = *isamp;
            break;
        }
        *isamp = nrOfInSamplesRead;
        *osamp = nrOfOutSamplesWritten;
        return (SOX_SUCCESS);
}
static int sox_silence_drain(sox_effect_t * effp, sox_sample_t *obuf, size_t *osamp)
{
    priv_t * silence = (priv_t *) effp->priv;
    size_t i;
    size_t nrOfTicks, nrOfOutSamplesWritten = 0; /* non-wide samples */
    /* Only if in flush mode will there be possible samples to write
     * out during drain() call.
     */
    if (silence->mode == SILENCE_COPY_FLUSH ||
        silence->mode == SILENCE_COPY)
    {
        nrOfTicks = min((silence->stop_holdoff_end -
                            silence->stop_holdoff_offset), *osamp);
        nrOfTicks -= nrOfTicks % effp->in_signal.channels;
        for(i = 0; i < nrOfTicks; i++)
        {
            *obuf++ = silence->stop_holdoff[silence->stop_holdoff_offset++];
            nrOfOutSamplesWritten++;
        }
        /* If fully drained holdoff then stop */
        if (silence->stop_holdoff_offset == silence->stop_holdoff_end)
        {
            silence->stop_holdoff_offset = 0;
            silence->stop_holdoff_end = 0;
            silence->mode = SILENCE_STOP;
        }
    }
    *osamp = nrOfOutSamplesWritten;
    if (silence->mode == SILENCE_STOP || *osamp == 0)
        return SOX_EOF;
    else
        return SOX_SUCCESS;
}
static int sox_silence_stop(sox_effect_t * effp)
{
  priv_t * silence = (priv_t *) effp->priv;
  free(silence->window);
  free(silence->start_holdoff);
  free(silence->stop_holdoff);
  return(SOX_SUCCESS);
}
static int lsx_kill(sox_effect_t * effp)
{
  priv_t * silence = (priv_t *) effp->priv;
  free(silence->start_duration_str);
  free(silence->stop_duration_str);
  return SOX_SUCCESS;
}
static sox_effect_handler_t sox_silence_effect = {
  "silence",
  "[ -l ] above_periods [ duration threshold[d|%] ] [ below_periods duration threshold[d|%] ]",
  SOX_EFF_MCHAN | SOX_EFF_MODIFY | SOX_EFF_LENGTH,
  sox_silence_getopts,
  sox_silence_start,
  sox_silence_flow,
  sox_silence_drain,
  sox_silence_stop,
  lsx_kill, sizeof(priv_t)
};
const sox_effect_handler_t *lsx_silence_effect_fn(void)
{
    return &sox_silence_effect;
}