shithub: sox

Download patch

ref: e5995fcd7b8ebe090a578d6bf00525c0e031ab78
parent: 1faf05aacdd69ad8071ad6cb0e47c1536c7479c5
author: robs <robs>
date: Tue Mar 24 18:00:39 EDT 2009

new fade type for splice

--- a/sox.1
+++ b/sox.1
@@ -2937,12 +2937,30 @@
 .B stat
 effect.
 .TP
-\fBsplice \fR { \fIposition\fR[\fB,\fIexcess\fR[\fB,\fIleeway\fR]] }
+\fBsplice \fR [\fB\-h\fR\^|\^\fB\-t\fR\^|\^\fB\-q\fR] { \fIposition\fR[\fB,\fIexcess\fR[\fB,\fIleeway\fR]] }
 Splice together audio sections.  This effect provides two things over
 simple audio concatenation: a (usually short) cross-fade is applied at
 the join, and a wave similarity comparison is made to help determine the
 best place at which to make the join.
 .SP
+One of the options
+.BR \-h ,
+.BR \-t ,
+or
+.B \-q
+may be given to select the fade envelope as triangular (a.k.a. linear)
+(the default), half-cosine wave, or quarter-cosine wave respectively.
+.TS
+center;
+cI lI lI lI
+cB l l l.
+Type	Audio	Fade level	Transitions
+t	correlated	constant gain	abrupt
+h	correlated	constant gain	smooth
+q	uncorrelated	constant power	smooth
+.TE
+.DT
+.SP
 To perform a splice, first use the
 .B trim
 effect to select the audio sections to be joined together.  As when
@@ -2969,16 +2987,26 @@
 (\fIstart\fR) effect) at times 0:30\*d125 and 1:03\*d432.
 The following commands cut out the first verse:
 .EX
-	sox too-long.au part1.au trim 0 30.130
+  sox too-long.au part1.au trim 0 30.130
 .EE
 (5 ms excess, after the first verse starts)
 .EX
-	sox long.au part2.au trim 1:03.422
+  sox long.au part2.au trim 1:03.422
 .EE
 (5 ms excess plus 5 ms leeway, before the second verse starts)
 .EX
-	sox part1.au part2.au just-right.au splice 30.130
+  sox part1.au part2.au just-right.au splice 30.130
 .EE
+For another example, the SoX command
+.EX
+  play "|sox -n -p synth 1 sin %1" "|sox -n -p synth 1 sin %3"
+.EE
+generates and plays two notes, but there is a nasty click at the
+transition; the click can be removed by splicing instead of
+concatenating the audio, i.e. by appending \fBsplice 1\fR to the
+command. (Clicks at the beginning and end of the audio can be removed by
+\fIpreceding\fR the splice effect with \fBfade q .01 2 .01\fR).
+.SP
 Provided your arithmetic is good enough, multiple splices can be
 performed with a single
 .B splice
@@ -3001,15 +3029,6 @@
 .EE
 In the above Bourne shell script,
 two splices are used to `copy and paste' audio.
-.SP
-The SoX command
-.EX
-  play "|sox -n -p synth 1 sin %1" "|sox -n -p synth 1 sin %3"
-.EE
-generates and plays two notes, but there is a nasty click at the
-transition; the click can be removed by appending \fBsplice 1\fR to the
-command. (Clicks at the beginning and end of the audio can be removed by
-\fIpreceding\fR the splice effect with \fBfade q .01 2 .01\fR).
 .TS
 center;
 c8 c8 c.
@@ -3017,13 +3036,23 @@
 .TE
 .DT
 .SP
-It is also possible to use this effect to perform general cross-fades, e.g. to
-join two songs.
-In this case,
+It is also possible to use this effect to perform general cross-fades,
+e.g. to join two songs.  In this case,
 .I excess
-would typically be an number of seconds, and
+would typically be an number of seconds, the
+.B \-q
+option should be given to indicate that the audio is uncorrelated, and
 .I leeway
-should be set to zero.
+should be zero (which is the default if
+.B \-q
+is given).  For example, if f1.au and f2.au are audio files
+to be cross-faded, then
+.EX
+	sox f1.au f2.au out.au splice -u $(soxi -D f1.au),3
+.EE
+cross-fades the files where the point of equal loudness is 3 seconds
+before the end of f1.au, i.e. the total length of the cross-fade is
+2 x 3 = 6 seconds (Note: the $(...) notation is POSIX shell).
 .TP
 \fBstat\fR [\fB\-s \fIscale\fR] [\fB\-rms\fR] [\fB\-freq\fR] [\fB\-v\fR] [\fB\-d\fR]
 Display time and frequency domain statistical information about the audio.
--- a/src/splice.c
+++ b/src/splice.c
@@ -1,5 +1,4 @@
-/* libSoX effect: splice with a WSOL method.
- * Copyright (c) 2008 robs@users.sourceforge.net
+/* libSoX effect: splice audio   Copyright (c) 2008-9 robs@users.sourceforge.net
  *
  * This library is free software; you can redistribute it and/or modify it
  * under the terms of the GNU Lesser General Public License as published by
@@ -18,7 +17,8 @@
 
 #include "sox_i.h"
 
-static double difference(const sox_sample_t * a, const sox_sample_t * b, size_t length)
+static double difference(
+    const sox_sample_t * a, const sox_sample_t * b, size_t length)
 {
   double diff = 0;
   size_t i = 0;
@@ -30,8 +30,8 @@
 }
 
 /* Find where the two segments are most alike over the overlap period. */
-static size_t best_overlap_position(sox_sample_t const * f1, sox_sample_t const * f2,
-    size_t overlap, size_t search, size_t channels)
+static size_t best_overlap_position(sox_sample_t const * f1,
+    sox_sample_t const * f2, size_t overlap, size_t search, size_t channels)
 {
   size_t i, best_pos = 0;
   double diff, least_diff = difference(f2, f1, channels * overlap);
@@ -44,19 +44,20 @@
   return best_pos;
 }
 
+
 typedef struct {
-  sox_bool uncorrelated;
+  enum {Cosine_2, Cosine_4, Triangular} fade_type;
   unsigned nsplices;     /* Number of splices requested */
   struct {
     char * str;          /* Command-line argument to parse for this splice */
-    size_t overlap;  /* Number of samples to overlap */
-    size_t search;   /* Number of samples to search */
-    size_t start;    /* Start splicing when in_pos equals this */
+    size_t overlap;      /* Number of samples to overlap */
+    size_t search;       /* Number of samples to search */
+    size_t start;        /* Start splicing when in_pos equals this */
   } * splices;
 
-  size_t in_pos;     /* Number of samples read from the input stream */
+  size_t in_pos;         /* Number of samples read from the input stream */
   unsigned splices_pos;  /* Number of splices completed so far */
-  size_t buffer_pos; /* Number of samples through the current splice */
+  size_t buffer_pos;     /* Number of samples through the current splice */
   size_t max_buffer_size;
   sox_sample_t * buffer;
   unsigned state;
@@ -68,11 +69,11 @@
   priv_t * p = (priv_t *)effp->priv;
   size_t i, j, k = 0;
 
-  if (p->uncorrelated) { /* Fade for constant RMS level (`power') */
+  if (p->fade_type == Cosine_4) {
     double fade_step = M_PI_2 / overlap;
     for (i = 0; i < overlap; ++i) {
       double fade_in  = sin(i * fade_step);
-      double fade_out = cos(i * fade_step);
+      double fade_out = cos(i * fade_step); /* constant RMS level (`power') */
       for (j = 0; j < channels; ++j, ++k) {
         double d = in1[k] * fade_out + in2[k] * fade_in;
         output[k] = SOX_ROUND_CLIP_COUNT(d, effp->clips); /* Might clip */
@@ -79,11 +80,22 @@
       }
     }
   }
-  else {                 /* Fade for constant peak level (`gain') */
+  else if (p->fade_type == Cosine_2) {
+    double fade_step = M_PI / overlap;
+    for (i = 0; i < overlap; ++i) {
+      double fade_in  = .5 - .5 * cos(i * fade_step);
+      double fade_out = 1 - fade_in;    /* constant peak level (`gain') */
+      for (j = 0; j < channels; ++j, ++k) {
+        double d = in1[k] * fade_out + in2[k] * fade_in;
+        output[k] = SOX_ROUND_CLIP_COUNT(d, effp->clips); /* Should not clip */
+      }
+    }
+  }
+  else /* Triangular */ {
     double fade_step = 1. / overlap;
     for (i = 0; i < overlap; ++i) {
       double fade_in  = fade_step * i;
-      double fade_out = 1 - fade_in;
+      double fade_out = 1 - fade_in;    /* constant peak level (`gain') */
       for (j = 0; j < channels; ++j, ++k) {
         double d = in1[k] * fade_out + in2[k] * fade_in;
         output[k] = SOX_ROUND_CLIP_COUNT(d, effp->clips); /* Should not clip */
@@ -92,7 +104,8 @@
   }
 }
 
-static size_t do_splice(sox_effect_t * effp, sox_sample_t * f, size_t overlap, size_t search, size_t channels)
+static size_t do_splice(sox_effect_t * effp,
+    sox_sample_t * f, size_t overlap, size_t search, size_t channels)
 {
   size_t offset = search? best_overlap_position(
       f, f + overlap * channels, overlap, search, channels) : 0;
@@ -112,7 +125,8 @@
     if (argv) /* 1st parse only */
       p->splices[i].str = lsx_strdup(argv[i]);
 
-    p->splices[i].overlap = p->splices[i].search = rate * 0.01 + .5;
+    p->splices[i].overlap = rate * 0.01 + .5;
+    p->splices[i].search = p->fade_type == Cosine_4? 0 : p->splices[i].overlap;
 
     next = lsx_parsesamples(rate, p->splices[i].str, &p->splices[i].start, 't');
     if (next == NULL) break;
@@ -146,8 +160,11 @@
 {
   priv_t * p = (priv_t *)effp->priv;
   --argc, ++argv;
-  if (argc && !strcmp(*argv, "-u"))
-    --argc, ++argv, p->uncorrelated = sox_true;
+  if (argc) {
+    if      (!strcmp(*argv, "-t")) p->fade_type = Triangular, --argc, ++argv;
+    else if (!strcmp(*argv, "-q")) p->fade_type = Cosine_4  , --argc, ++argv;
+    else if (!strcmp(*argv, "-h")) p->fade_type = Cosine_2  , --argc, ++argv;
+  }
   p->splices = lsx_calloc(p->nsplices = argc, sizeof(*p->splices));
   return parse(effp, argv, 1e5); /* No rate yet; parse with dummy */
 }
@@ -163,7 +180,7 @@
   p->state = p->splices_pos != p->nsplices && p->in_pos == p->splices[p->splices_pos].start;
   for (i = 0; i < p->nsplices; ++i)
     if (p->splices[i].overlap) {
-      if (p->uncorrelated && effp->in_signal.mult)
+      if (p->fade_type == Cosine_4 && effp->in_signal.mult)
         *effp->in_signal.mult *= pow(.5, .5);
       return SOX_SUCCESS;
     }
@@ -261,13 +278,14 @@
 sox_effect_handler_t const * lsx_splice_effect_fn(void)
 {
   static sox_effect_handler_t handler = {
-    "splice", "[-u] {position[,excess[,leeway]]}"
-    "\n  (default)  Correlated audio: fade for constant peak"
-    "\n  -u         Uncorrelated audio (e.g. cross-fade): fade for constant RMS"
-    "\n  position   The length of part 1 (including the excess)"
-    "\n  excess     At the end of part 1 & the start of part2 (default 0.005)"
-    "\n  leeway     Before part2 (default 0.005; set to 0 for cross-fade)",
-    SOX_EFF_MCHAN|SOX_EFF_LENGTH,
+    "splice", "[-h|-t|-q] {position[,excess[,leeway]]}"
+    "\n  -h        Half sine fade (default); constant gain (for correlated audio)"
+    "\n  -t        Triangular (linear) fade; constant gain (for correlated audio)"
+    "\n  -q        Quarter sine fade; constant power (for correlated audio e.g. x-fade)"
+    "\n  position  The length of part 1 (including the excess)"
+    "\n  excess    At the end of part 1 & the start of part2 (default 0.005)"
+    "\n  leeway    Before part2 (default 0.005; set to 0 for cross-fade)",
+    SOX_EFF_MCHAN | SOX_EFF_LENGTH,
     create, start, flow, drain, stop, kill, sizeof(priv_t)
   };
   return &handler;