shithub: sox

ref: 5fce40be10e904ddbc195b93bccfd212c3d91e73
dir: /src/silence.c/

View raw version
/* Silence effect for SoX
 * by Heikki Leinonen (heilei@iki.fi) 25.03.2001
 * Major Modifications by Chris Bagwell 06.08.2001
 * Minor addition by Donnie Smith 13.08.2003
 *
 * This effect can delete samples from the start of a sound file
 * until it sees a specified count of samples exceed a given threshold 
 * (any of the channels).
 * This effect can also delete samples from the end of a sound file
 * when it sees a specified count of samples below a given threshold
 * (all channels).
 * It may also be used to delete samples anywhere in a sound file.
 * Theshold's can be given as either a percentage or in decibels.
 */


#include <string.h>
#include <math.h>
#include "st_i.h"

static st_effect_t st_silence_effect;

/* Private data for silence effect. */

#define SILENCE_TRIM        0
#define SILENCE_TRIM_FLUSH  1
#define SILENCE_COPY        2
#define SILENCE_COPY_FLUSH  3
#define SILENCE_STOP        4

typedef struct silencestuff
{
    char        start;
    int         start_periods;
    char        *start_duration_str;
    st_size_t   start_duration;
    double      start_threshold;
    char        start_unit; /* "d" for decibels or "%" for percent. */
    int         restart;

    st_sample_t *start_holdoff;
    st_size_t   start_holdoff_offset;
    st_size_t   start_holdoff_end;
    int         start_found_periods;

    char        stop;
    int         stop_periods;
    char        *stop_duration_str;
    st_size_t   stop_duration;
    double      stop_threshold;
    char        stop_unit;

    st_sample_t *stop_holdoff;
    st_size_t   stop_holdoff_offset;
    st_size_t   stop_holdoff_end;
    int         stop_found_periods;

    double      *window;
    double      *window_current;
    double      *window_end;
    st_size_t   window_size;
    double      rms_sum;

    /* State Machine */
    char        mode;
} *silence_t;

static void clear_rms(eff_t effp)

{
    silence_t silence = (silence_t) effp->priv;

    memset(silence->window, 0, 
           silence->window_size * sizeof(double));

    silence->window_current = silence->window;
    silence->window_end = silence->window + silence->window_size;
    silence->rms_sum = 0;
}

static int st_silence_getopts(eff_t effp, int n, char **argv)
{
    silence_t   silence = (silence_t) effp->priv;
    int parse_count;

    if (n < 1)
    {
        st_fail(st_silence_effect.usage);
        return (ST_EOF);
    }

    /* Parse data related to trimming front side */
    silence->start = false;
    if (sscanf(argv[0], "%d", &silence->start_periods) != 1)
    {
        st_fail(st_silence_effect.usage);
        return(ST_EOF);
    }
    if (silence->start_periods < 0)
    {
        st_fail("Periods must not be negative");
        return(ST_EOF);
    }
    argv++;
    n--;

    if (silence->start_periods > 0)
    {
        silence->start = true;
        if (n < 2)
        {
            st_fail(st_silence_effect.usage);
            return ST_EOF;
        }

        /* We do not know the sample rate so we can not fully
         * parse the duration info yet.  So save argument off
         * for future processing.
         */
        silence->start_duration_str = (char *)xmalloc(strlen(argv[0])+1);
        strcpy(silence->start_duration_str,argv[0]);
        /* Perform a fake parse to do error checking */
        if (st_parsesamples(0,silence->start_duration_str,
                    &silence->start_duration,'s') == NULL)
        {
            st_fail(st_silence_effect.usage);
            return(ST_EOF);
        }

        parse_count = sscanf(argv[1], "%lf%c", &silence->start_threshold, 
                &silence->start_unit);
        if (parse_count < 1)
        {
            st_fail(st_silence_effect.usage);
            return ST_EOF;
        }
        else if (parse_count < 2)
            silence->start_unit = '%';

        argv++; argv++;
        n--; n--;
    }

    silence->stop = false;
    /* Parse data needed for trimming of backside */
    if (n > 0)
    {
        if (n < 3)
        {
            st_fail(st_silence_effect.usage);
            return ST_EOF;
        }
        if (sscanf(argv[0], "%d", &silence->stop_periods) != 1)
        {
            st_fail(st_silence_effect.usage);
            return ST_EOF;
        }
        if (silence->stop_periods < 0)
        {
            silence->stop_periods = -silence->stop_periods;
            silence->restart = 1;
        }
        else
            silence->restart = 0;
        silence->stop = true;
        argv++;
        n--;

        /* We do not know the sample rate so we can not fully
         * parse the duration info yet.  So save argument off
         * for future processing.
         */
        silence->stop_duration_str = (char *)xmalloc(strlen(argv[0])+1);
        strcpy(silence->stop_duration_str,argv[0]);
        /* Perform a fake parse to do error checking */
        if (st_parsesamples(0,silence->stop_duration_str,
                    &silence->stop_duration,'s') == NULL)
        {
            st_fail(st_silence_effect.usage);
            return(ST_EOF);
        }

        parse_count = sscanf(argv[1], "%lf%c", &silence->stop_threshold, 
                             &silence->stop_unit);
        if (parse_count < 1)
        {
            st_fail(st_silence_effect.usage);
            return ST_EOF;
        }
        else if (parse_count < 2)
            silence->stop_unit = '%';

        argv++; argv++;
        n--; n--;
    }

    /* Error checking */
    if (silence->start)
    {
        if ((silence->start_unit != '%') && (silence->start_unit != 'd'))
        {
            st_fail("Invalid unit specified");
            st_fail(st_silence_effect.usage);
            return(ST_EOF);
        }
        if ((silence->start_unit == '%') && ((silence->start_threshold < 0.0)
            || (silence->start_threshold > 100.0)))
        {
            st_fail("silence threshold should be between 0.0 and 100.0 %%");
            return (ST_EOF);
        }
        if ((silence->start_unit == 'd') && (silence->start_threshold >= 0.0))
        {
            st_fail("silence threshold should be less than 0.0 dB");
            return(ST_EOF);
        }
    }

    if (silence->stop)
    {
        if ((silence->stop_unit != '%') && (silence->stop_unit != 'd'))
        {
            st_fail("Invalid unit specified");
            return(ST_EOF);
        }
        if ((silence->stop_unit == '%') && ((silence->stop_threshold < 0.0) || 
                    (silence->stop_threshold > 100.0)))
        {
            st_fail("silence threshold should be between 0.0 and 100.0 %%");
            return (ST_EOF);
        }
        if ((silence->stop_unit == 'd') && (silence->stop_threshold >= 0.0))
        {
            st_fail("silence threshold should be less than 0.0 dB");
            return(ST_EOF);
        }
    }
    return(ST_SUCCESS);
}

static int st_silence_start(eff_t effp)
{
        silence_t       silence = (silence_t) effp->priv;

        /* When you want to remove silence, small window sizes are
         * better or else RMS will look like non-silence at
         * aburpt changes from load to silence.
         */
        silence->window_size = (effp->ininfo.rate / 50) * 
                               effp->ininfo.channels;
        silence->window = (double *)xmalloc(silence->window_size *
                                           sizeof(double));

        clear_rms(effp);

        /* Now that we now sample rate, reparse duration. */
        if (silence->start)
        {
            if (st_parsesamples(effp->ininfo.rate, silence->start_duration_str,
                                &silence->start_duration, 's') == NULL)
            {
                st_fail(st_silence_effect.usage);
                return(ST_EOF);
            }
        }
        if (silence->stop)
        {
            if (st_parsesamples(effp->ininfo.rate,silence->stop_duration_str,
                                &silence->stop_duration,'s') == NULL)
            {
                st_fail(st_silence_effect.usage);
                return(ST_EOF);
            }
        }

        if (silence->start)
            silence->mode = SILENCE_TRIM;
        else
            silence->mode = SILENCE_COPY;

        silence->start_holdoff = (st_sample_t *)xmalloc(sizeof(st_sample_t)*silence->start_duration);
        silence->start_holdoff_offset = 0;
        silence->start_holdoff_end = 0;
        silence->start_found_periods = 0;

        silence->stop_holdoff = (st_sample_t *)xmalloc(sizeof(st_sample_t)*silence->stop_duration);
        silence->stop_holdoff_offset = 0;
        silence->stop_holdoff_end = 0;
        silence->stop_found_periods = 0;

        return(ST_SUCCESS);
}

static int aboveThreshold(eff_t effp, st_sample_t value, double threshold, char unit)
{
    double ratio;
    int rc;
    st_sample_t dummy_clipped_count = 0;

    /* When scaling low bit data, noise values got scaled way up */
    /* Only consider the original bits when looking for silence */
    switch(effp->ininfo.size)
    {
        case ST_SIZE_BYTE:
            value = ST_SAMPLE_TO_SIGNED_BYTE(value, dummy_clipped_count);
            ratio = (double)abs(value) / (double)ST_INT8_MAX;
            break;
        case ST_SIZE_WORD:
            value = ST_SAMPLE_TO_SIGNED_WORD(value, dummy_clipped_count);
            ratio = (double)abs(value) / (double)ST_INT16_MAX;
            break;
        case ST_SIZE_24BIT:
            value = ST_SAMPLE_TO_SIGNED_24BIT(value, dummy_clipped_count);
            ratio = (double)abs(value) / (double)ST_INT24_MAX;
            break;
        case ST_SIZE_DWORD:
            value = ST_SAMPLE_TO_SIGNED_DWORD(value,);
            ratio = (double)labs(value) / (double)ST_INT32_MAX;
            break;
        default:
            ratio = 0;
    }

    if (unit == '%')
        ratio *= 100.0;
    else if (unit == 'd')
        ratio = log10(ratio) * 20.0;
    rc = (ratio >= threshold);

    return rc;
}

static st_sample_t compute_rms(eff_t effp, st_sample_t sample)
{
    silence_t silence = (silence_t) effp->priv;
    double new_sum;
    st_sample_t rms;

    new_sum = silence->rms_sum;
    new_sum -= *silence->window_current;
    new_sum += ((double)sample * (double)sample);

    rms = sqrt(new_sum / silence->window_size);

    return (rms);
}

static void update_rms(eff_t effp, st_sample_t sample)
{
    silence_t silence = (silence_t) effp->priv;

    silence->rms_sum -= *silence->window_current;
    *silence->window_current = ((double)sample * (double)sample);
    silence->rms_sum += *silence->window_current;

    silence->window_current++;
    if (silence->window_current >= silence->window_end)
        silence->window_current = silence->window;
}

/* Process signed long samples from ibuf to obuf. */
/* Return number of samples processed in isamp and osamp. */
static int st_silence_flow(eff_t effp, const st_sample_t *ibuf, st_sample_t *obuf, 
                    st_size_t *isamp, st_size_t *osamp)
{
    silence_t silence = (silence_t) effp->priv;
    int threshold;
    st_size_t i, j;
    st_size_t nrOfTicks, nrOfInSamplesRead, nrOfOutSamplesWritten;

    nrOfInSamplesRead = 0;
    nrOfOutSamplesWritten = 0;

    switch (silence->mode)
    {
        case SILENCE_TRIM:
            /* Reads and discards all input data until it detects a
             * sample that is above the specified threshold.  Turns on
             * copy mode when detected.
             * Need to make sure and copy input in groups of "channels" to
             * prevent getting buffers out of sync.
             */
silence_trim:
            nrOfTicks = min((*isamp-nrOfInSamplesRead), 
                            (*osamp-nrOfOutSamplesWritten)) / 
                           effp->ininfo.channels;
            for(i = 0; i < nrOfTicks; i++)
            {
                threshold = 0;
                for (j = 0; j < effp->ininfo.channels; j++)
                {
                    threshold |= aboveThreshold(effp,
                                                compute_rms(effp, ibuf[j]),
                                                silence->start_threshold, 
                                                silence->start_unit);
                }

                if (threshold)
                {
                    /* Add to holdoff buffer */
                    for (j = 0; j < effp->ininfo.channels; j++)
                    {
                        update_rms(effp, *ibuf);
                        silence->start_holdoff[
                            silence->start_holdoff_end++] = *ibuf++;
                        nrOfInSamplesRead++;
                    }

                    if (silence->start_holdoff_end >=
                            silence->start_duration)
                    {
                        if (++silence->start_found_periods >=
                                silence->start_periods)
                        {
                            silence->mode = SILENCE_TRIM_FLUSH;
                            goto silence_trim_flush;
                        }
                        /* Trash holdoff buffer since its not
                         * needed.  Start looking again.
                         */
                        silence->start_holdoff_offset = 0;
                        silence->start_holdoff_end = 0;
                    }
                }
                else /* !above Threshold */
                {
                    silence->start_holdoff_end = 0;
                    for (j = 0; j < effp->ininfo.channels; j++)
                    {
                        update_rms(effp, ibuf[j]);
                    }
                    ibuf += effp->ininfo.channels; 
                    nrOfInSamplesRead += effp->ininfo.channels;
                }
            } /* for nrOfTicks */
            break;

        case SILENCE_TRIM_FLUSH:
silence_trim_flush:
            nrOfTicks = min((silence->start_holdoff_end -
                             silence->start_holdoff_offset), 
                             (*osamp-nrOfOutSamplesWritten)); 
            for(i = 0; i < nrOfTicks; i++)
            {
                *obuf++ = silence->start_holdoff[silence->start_holdoff_offset++];
                nrOfOutSamplesWritten++;
            }

            /* If fully drained holdoff then switch to copy mode */
            if (silence->start_holdoff_offset == silence->start_holdoff_end)
            {
                silence->start_holdoff_offset = 0;
                silence->start_holdoff_end = 0;
                silence->mode = SILENCE_COPY;
                goto silence_copy;
            }
            break;

        case SILENCE_COPY:
            /* Attempts to copy samples into output buffer.  If not
             * looking for silence to terminate copy then blindly
             * copy data into output buffer.
             *
             * If looking for silence, then see if input sample is above
             * threshold.  If found then flush out hold off buffer
             * and copy over to output buffer.  Tell user about
             * input and output processing.
             *
             * If not above threshold then store in hold off buffer
             * and do not write to output buffer.  Tell user input
             * was processed.
             *
             * If hold off buffer is full then stop copying data and
             * discard data in hold off buffer.
             */
silence_copy:
            nrOfTicks = min((*isamp-nrOfInSamplesRead), 
                            (*osamp-nrOfOutSamplesWritten)) / 
                           effp->ininfo.channels;
            if (silence->stop)
            {
                for(i = 0; i < nrOfTicks; i++)
                {
                    threshold = 1;
                    for (j = 0; j < effp->ininfo.channels; j++)
                    {
                        threshold &= aboveThreshold(effp, 
                                                    compute_rms(effp, ibuf[j]),
                                                    silence->stop_threshold, 
                                                    silence->stop_unit);
                    }

                    /* If above threshold, check to see if we where holding
                     * off previously.  If so then flush this buffer.
                     * We haven't incremented any pointers yet so nothing
                     * is lost.
                     */
                    if (threshold && silence->stop_holdoff_end)
                    {
                        silence->mode = SILENCE_COPY_FLUSH;
                        goto silence_copy_flush;
                    }
                    else if (threshold)
                    {
                        /* Not holding off so copy into output buffer */
                        for (j = 0; j < effp->ininfo.channels; j++)
                        {
                            update_rms(effp, *ibuf);
                            *obuf++ = *ibuf++;
                            nrOfInSamplesRead++;
                            nrOfOutSamplesWritten++;
                        }
                    }
                    else if (!threshold)
                    {
                        /* Add to holdoff buffer */
                        for (j = 0; j < effp->ininfo.channels; j++)
                        {
                            update_rms(effp, *ibuf);
                            silence->stop_holdoff[
                                silence->stop_holdoff_end++] = *ibuf++;
                            nrOfInSamplesRead++;
                        }

                        /* Check if holdoff buffer is greater than duration 
                         */
                        if (silence->stop_holdoff_end >= 
                                silence->stop_duration)
                        {
                            /* Increment found counter and see if this
                             * is the last period.  If so then exit.
                             */
                            if (++silence->stop_found_periods >= 
                                    silence->stop_periods)
                            {
                                silence->stop_holdoff_offset = 0;
                                silence->stop_holdoff_end = 0;
                                if (!silence->restart)
                                {
                                    *isamp = nrOfInSamplesRead;
                                    *osamp = nrOfOutSamplesWritten;
                                    silence->mode = SILENCE_STOP;
                                    /* Return ST_EOF since no more processing */
                                    return (ST_EOF);
                                }
                                else
                                {
                                    silence->stop_found_periods = 0;
                                    silence->start_found_periods = 0;
                                    silence->start_holdoff_offset = 0;
                                    silence->start_holdoff_end = 0;
                                    clear_rms(effp);
                                    silence->mode = SILENCE_TRIM;

                                    goto silence_trim;
                                }
                            }
                            else
                            {
                                /* Flush this buffer and start 
                                 * looking again.
                                 */
                                silence->mode = SILENCE_COPY_FLUSH;
                                goto silence_copy_flush;
                            }
                            break;
                        } /* Filled holdoff buffer */
                    } /* Detected silence */
                } /* For # of samples */
            } /* Trimming off backend */
            else /* !(silence->stop) */
            {
                memcpy(obuf, ibuf, sizeof(st_sample_t)*nrOfTicks*
                                   effp->ininfo.channels);
                nrOfInSamplesRead += (nrOfTicks*effp->ininfo.channels);
                nrOfOutSamplesWritten += (nrOfTicks*effp->ininfo.channels);
            }
            break;

        case SILENCE_COPY_FLUSH:
silence_copy_flush:
            nrOfTicks = min((silence->stop_holdoff_end -
                                silence->stop_holdoff_offset), 
                            (*osamp-nrOfOutSamplesWritten));

            for(i = 0; i < nrOfTicks; i++)
            {
                *obuf++ = silence->stop_holdoff[silence->stop_holdoff_offset++];
                nrOfOutSamplesWritten++;
            }

            /* If fully drained holdoff then return to copy mode */
            if (silence->stop_holdoff_offset == silence->stop_holdoff_end)
            {
                silence->stop_holdoff_offset = 0;
                silence->stop_holdoff_end = 0;
                silence->mode = SILENCE_COPY;
                goto silence_copy;
            }
            break;

        case SILENCE_STOP:
            nrOfInSamplesRead = *isamp;
            break;
        }

        *isamp = nrOfInSamplesRead;
        *osamp = nrOfOutSamplesWritten;

        return (ST_SUCCESS);
}

static int st_silence_drain(eff_t effp, st_sample_t *obuf, st_size_t *osamp)
{
    silence_t silence = (silence_t) effp->priv;
    st_size_t i;
    st_size_t nrOfTicks, nrOfOutSamplesWritten = 0;

    /* Only if in flush mode will there be possible samples to write
     * out during drain() call.
     */
    if (silence->mode == SILENCE_COPY_FLUSH || 
        silence->mode == SILENCE_COPY)
    {
        nrOfTicks = min((silence->stop_holdoff_end - 
                            silence->stop_holdoff_offset), *osamp);
        for(i = 0; i < nrOfTicks; i++)
        {
            *obuf++ = silence->stop_holdoff[silence->stop_holdoff_offset++];
            nrOfOutSamplesWritten++;
        }

        /* If fully drained holdoff then stop */
        if (silence->stop_holdoff_offset == silence->stop_holdoff_end)
        {
            silence->stop_holdoff_offset = 0;
            silence->stop_holdoff_end = 0;
            silence->mode = SILENCE_STOP;
        }
    }

    *osamp = nrOfOutSamplesWritten;
    if (silence->mode == SILENCE_STOP || *osamp == 0)
        return ST_EOF;
    else
        return ST_SUCCESS;
}

static int st_silence_stop(eff_t effp)
{
    silence_t silence = (silence_t) effp->priv;

    free(silence->window);
    free(silence->start_holdoff);
    free(silence->stop_holdoff);
    return(ST_SUCCESS);
}

static st_effect_t st_silence_effect = {
  "silence",
  "Usage: silence above_periods [ duration thershold[d | %% ] ] [ below_periods duration threshold[ d | %% ]]",
  ST_EFF_MCHAN,
  st_silence_getopts,
  st_silence_start,
  st_silence_flow,
  st_silence_drain,
  st_silence_stop,
  st_effect_nothing
};

const st_effect_t *st_silence_effect_fn(void)
{
    return &st_silence_effect;
}