shithub: sox

Download patch

ref: e62621b8f19ca1a4ae852c6bfd9a54a81e2f124a
parent: 15ba7f5fa46fe8a17d1f56a15598923ca9ea4c1a
author: robs <robs>
date: Thu May 21 11:53:36 EDT 2009

spectrogram -X

--- a/ChangeLog
+++ b/ChangeLog
@@ -85,6 +85,7 @@
   o New `biquad' filter effect using external coefficients.  (robs)
   o New `overdrive' effect.  (robs)
   o New `pluck' and `tpdf' types for `synth'.  (robs)
+  o New `-X duration' option for `spectrogram'.  (robs)
   o [2778142] just intonation for `synth'.  (robs)
   o Can now set common parameters for multiple `synth' channels.  (robs)
   o Richer gain/normalise options.  (robs)
--- a/sox.1
+++ b/sox.1
@@ -1798,18 +1798,30 @@
 effects.
 .TP
 \fBdcshift \fIshift\fR [\fIlimitergain\fR]
-DC Shift the audio, with basic linear amplitude formula.
-This is most useful if your audio tends to not be centred around
-a value of 0.  Shifting it back will allow you to get the most volume
-adjustments without clipping.
+Apply a DC shift to the audio.  This can be useful to remove a DC
+offset (caused perhaps by a hardware problem in the recording chain)
+from the audio.  The effect of a DC offset is reduced headroom and
+hence volume.
+The
+.B stat
+or
+.B stats
+effect can be used to determine if a signal has a DC offset.
 .SP
-The first option is the \fIdcshift\fR value.  It is a floating point number that
-indicates the amount to shift.
+The given \fIdcshift\fR value is a floating point number in the range
+of \(+-2 that indicates the amount to shift the audio (which is in the
+range of \(+-1).
 .SP
 An optional
 .I limitergain
 can be specified as well.  It should have a value much less than 1
 (e.g. 0\*d05 or 0\*d02) and is used only on peaks to prevent clipping.
+.TS
+center;
+c8 c8 c.
+*	*	*
+.TE
+.DT
 .SP
 An alternative approach to removing a DC offset (albeit with a short delay)
 is to use the
@@ -1820,10 +1832,6 @@
    sox -n dc.wav synth 5 sin %0 50
    sox dc.wav fixed.wav highpass 10
 .EE
-.SP
-See also the
-.B stats
-effect.
 .TP
 \fBdeemph\fR
 Apply ISO 908 de-emphasis (a treble attenuation shelving filter) to
@@ -3109,8 +3117,8 @@
 and need not be an integer.  SoX
 may make a slight adjustment to the given number for processing
 quantisation reasons; if so, SoX will report the actual number used
-(viewable when
-.B \-\-verbose
+(viewable when the SoX global option
+.B \-V
 is in effect).
 .SP
 The maximum width of the spectrogram is 999 pixels; if the audio length
@@ -3125,6 +3133,25 @@
    sox audio.ogg -n trim 1:00 spectrogram
 .EE
 starts the spectrogram at 1 minute through the audio.
+.SP
+See also
+.B \-X
+for an alternative way of setting the X-axis resolution.
+.IP \fB\-X\ \fIduration\fR
+Sets the X-axis resolution such that audio with the given
+.I duration
+([[HH:]MM:]SS) fits the maximum X-axis width.  For example,
+.EX
+   sox my.mp3 -n remix - spectrogram -X 03:24
+.EE
+or, with Bourne shell, PowerShell, etc.,
+.EX
+   sox my.mp3 -n remix - spectrogram -X $(soxi -D my.mp3)
+.EE
+.SP
+See also
+.B \-x
+for an alternative way of setting the X-axis resolution.
 .IP \fB\-y\ \fInum\fR
 Y-axis resolution (1 \- 4), default 2.
 This controls the height of the spectrogram;
--- a/src/spectrogram.c
+++ b/src/spectrogram.c
@@ -79,6 +79,8 @@
 static int getopts(sox_effect_t * effp, int argc, char **argv)
 {
   priv_t * p = (priv_t *)effp->priv;
+  size_t duration_1e5;
+  char const * next;
   int c;
 
   assert(array_length(p->bit_rev_table) >= (size_t)dft_br_len(MAX_DFT_SIZE));
@@ -87,7 +89,7 @@
   p->spectrum_points = 249, p->perm = 1;
   p->out_name = "spectrogram.png", p->comment = "Created by SoX";
 
-  while ((c = getopt(argc, argv, "+x:y:z:Z:q:p:w:st:c:amlho:")) != -1) switch (c) {
+  while ((c = getopt(argc, argv, "+x:X:y:z:Z:q:p:w:st:c:amlho:")) != -1) switch (c) {
     GETOPT_NUMERIC('x', pixels_per_sec,  1 , 5000)
     GETOPT_NUMERIC('y', y_size        ,  1 , 1 + MAX_DFT_SIZE_SHIFT)
     GETOPT_NUMERIC('z', dB_range      , 20 , 180)
@@ -96,6 +98,12 @@
     GETOPT_NUMERIC('p', perm          ,  1 , 6)
     case 'w': p->win_type = lsx_enum_option(c, window_options);   break;
     case 's': p->slack_overlap = sox_true; break;
+    case 'X': 
+      next = lsx_parsesamples(1e5, optarg, &duration_1e5, 't');
+      if (next == NULL || *next != '\0')
+        return lsx_usage(effp);
+      p->pixels_per_sec = MAX_COLS * 1e5 / duration_1e5;
+      break;
     case 't': p->title    = optarg;   break;
     case 'c': p->comment  = optarg;   break;
     case 'a': p->no_axes  = sox_true; break;
@@ -497,7 +505,8 @@
     "spectrogram", 0, SOX_EFF_MODIFY, getopts, start, flow, drain, stop, 0, sizeof(priv_t)};
   static char const * lines[] = {
     "[options]",
-    "\t-x num\tX-axis pixels/second, default 100",
+    "\t-x num\tX-axis pixels/second, default 100.  -x & -X are alternatives",
+    "\t-X time\tAudio duration to fit to X-axis e.g. $(soxi -D file)",
     "\t-y num\tY-axis resolution (1 - 4), default 2",
     "\t-z num\tZ-axis range in dB, default 120",
     "\t-Z num\tZ-axis maximum in dBFS, default 0",