shithub: sox

Download patch

ref: 0daa88772dca7ce53e235e4e988e4bc739442970
parent: 8025dd7861959189fc0abaade8d5be47244034da
author: robs <robs>
date: Mon May 25 11:04:15 EDT 2009

multi-channel support for spectrogram

--- a/ChangeLog
+++ b/ChangeLog
@@ -85,7 +85,9 @@
   o New `biquad' filter effect using external coefficients.  (robs)
   o New `overdrive' effect.  (robs)
   o New `pluck' and `tpdf' types for `synth'.  (robs)
-  o New `-X duration' option for `spectrogram'.  (robs)
+  o New multi-channel support and revised sizing options for `spectrogram'.
+    N.B. revised options are not directly backwards compatible -- see the
+    man page for details of the new syntax.  (robs)
   o [2778142] just intonation for `synth'.  (robs)
   o Can now set common parameters for multiple `synth' channels.  (robs)
   o Richer gain/normalise options.  (robs)
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,7 @@
   o Automatically `dither' as needed.
   o Improvements to AIFF, WAV, FLAC, MP3 handlers
   o ALSA driver now supports 24-bit.
+  o `spectrogram' effect enhancements including multi-channel support.
   o `synth' effect enhancements including new `pluck' type.
   o More gain/normalise options.
   o Now uses CPU multi-core to speed up some effects.
@@ -22,7 +23,7 @@
   o Can now auto-detect file-type even when inputing from a pipe.
 
 For the complete list of changes, see the ChangeLog at
-  http://sox.cvs.sourceforge.net/sox/sox/ChangeLog?revision=1.225&view=markup
+  http://sox.cvs.sourceforge.net/sox/sox/ChangeLog?revision=1.228&view=markup
 
 Thanks to all who contributed to this release.
 
--- a/sox.1
+++ b/sox.1
@@ -733,7 +733,7 @@
 SoX's global options.
 For example:
 .EX
-   SOX_OPTS="--buffer 20000 --play-rate-arg -hs"
+   SOX_OPTS="--buffer 20000 --play-rate-arg -hs --temp /mnt/temp"
 .EE
 Note that setting SOX_OPTS can potentially create unwanted changes in
 the behaviour of scripts or other programs that invoke SoX.  So SOX_OPTS
@@ -2949,9 +2949,8 @@
 .SP
 This effect supports the \fB\-\-plot\fR global option.
 .TP
-\fBsilence \fR[\fB\-l\fR] \fIabove-periods\fR [\fIduration
-threshold\fR[\fBd\fR\^|\^\fB%\fR] [\fIbelow-periods duration
-threshold\fR[\fBd\fR\^|\^\fB%\fR]]
+\fBsilence \fR[\fB\-l\fR] \fIabove-periods\fR [\fIduration threshold\fR[\fBd\fR\^|\^\fB%\fR]
+[\fIbelow-periods duration threshold\fR[\fBd\fR\^|\^\fB%\fR]]
 .SP
 Removes silence from the beginning, middle, or end of the audio.
 Silence is anything below a specified threshold.
@@ -3087,31 +3086,81 @@
 .B tempo
 effects.
 .TP
-\fBspectrogram \fR[options]
-Create a spectrogram of the audio.  This effect is optional; type \fBsox
-\-\-help\fR and check the list of supported effects to see if it has
-been included.
+\fBspectrogram \fR[\fIoptions\fR]
+Create a spectrogram of the audio; the audio is passed unmodified
+through the SoX processing chain.  This effect is optional\*mtype
+\fBsox \-\-help\fR and check the list of supported effects to see if
+it has been included.
 .SP
 The spectrogram is rendered in a Portable Network Graphic (PNG) file,
-and shows time in the X-axis, frequency in the Y-axis, and audio signal
-magnitude in the Z-axis.  Z-axis values are represented by the colour
-(or intensity) of the pixels in the X-Y plane.
+and shows time in the X-axis, frequency in the Y-axis, and audio
+signal magnitude in the Z-axis.  Z-axis values are represented by the
+colour (or optionally the intensity) of the pixels in the X-Y plane.
+If the audio signal contains multiple channels then these are shown
+from top to bottom starting from channel 1 (which is the left channel
+for stereo audio).
 .SP
-This effect supports only one channel.
-For multi-channel input files, use the
-.B remix
-effect as either
-.B remix \-
-to obtain a spectrogram on the mix-down,
-or
-.B remix
-.I n
-to select a particular channel; be aware though, that both of
-these methods affect the audio in the effects chain.
+For example, if `my.wav' is a stereo file, then with
+.EX
+   sox my.wav -n spectrogram
+.EE
+a spectrogram of the entire file will be created in the file
+`spectrogram.png'.  More often though, analysis of a smaller portion
+of the audio is required; e.g. with
+.EX
+   sox my.wav -n remix 2 trim 20 30 spectrogram
+.EE
+the spectrogram shows information only from the second (right)
+channel, and of thirty seconds of audio starting from twenty seconds
+in.  To analyse a small portion of the frequency domain, the
+.B rate
+effect may be used, e.g.
+.EX
+   sox my.wav -n rate 6k spectrogram
+.EE
+allows detailed analysis of frequencies up to 3kHz (half the sampling
+rate) i.e. where the human auditory system is most sensitive.
+With
+.EX
+   sox my.wav -n trim 0 10 spectrogram -x 600 -y 200 -z 100
+.EE
+the given options control the size of the spectrogram's X, Y & Z axes
+(in this case, the spectrogram area of the produced image will be 600
+by 200 pixels in size and the Z-axis range will be 100 dB).  Note that
+the produced image includes axes legends etc. and so will be a little
+larger than the specified spectrogram size.  In this example:
+.EX
+   sox -n -n synth 6 tri 10k:14k spectrogram -z 100 -w kaiser
+.EE
+an analysis `window' with high dynamic range is selected to best
+display the spectrogram of a swept triangular wave.  For a smilar
+example, append the following to the `chime' command in the
+description of the
+.B delay
+effect (above):
+.EX
+   rate 2k spectrogram -X 200 -Z -10 -w kaiser
+.EE
+Options are also avaliable to control the appearance (colour-set,
+brightness, contrast, etc.) and filename of the spectrogram; e.g. with
+.EX
+   sox my.wav -n spectrogram -m -l -o print.png
+.EE
+a spectrogram is created suitable for printing on a `black and white'
+printer.
+.SP
+.I Options:
 .RS
 .IP \fB\-x\ \fInum\fR
-X-axis pixels/second, default 100.  This controls the width of the
-spectrogram;
+Change the (maximum) width (X-axis) of the spectrogram from its default
+value of 800 pixels to a given number between 100 and 5000.
+See also \fB\-X\fR and \fB\-d\fR.
+.IP \fB\-X\ \fInum\fR
+X-axis pixels/second; the default is auto-calculated to fit the given
+or known audio duration to the X-axis size, or 100 otherwise.  If
+given in conjunction with \fB\-d\fR, this option affects the width of
+the spectrogram; otherwise, it affects the duration of the
+spectrogram.
 .I num
 can be from 1 (low time resolution) to 5000 (high time resolution)
 and need not be an integer.  SoX
@@ -3120,61 +3169,26 @@
 (viewable when the SoX global option
 .B \-V
 is in effect).
-.SP
-The maximum width of the spectrogram is 999 pixels; if the audio length
-and the given
-.B \-x
-number are such that this would be exceeded, then the spectrogram (and
-the effects chain) will be truncated.  To move the spectrogram to a
-point later in the audio stream, first invoke the
-.B trim
-effect; e.g.
-.EX
-   sox audio.ogg -n trim 1:00 spectrogram
-.EE
-starts the spectrogram at 1 minute through the audio.
-.SP
-See also
-.B \-X
-for an alternative way of setting the X-axis resolution.
-.IP \fB\-X\ \fIduration\fR
-Sets the X-axis resolution such that audio with the given
-.I duration
-([[HH:]MM:]SS) fits the maximum X-axis width.  For example,
-.EX
-   sox my.mp3 -n remix - spectrogram -X 03:24
-.EE
-or, with Bourne shell, PowerShell, etc.,
-.EX
-   sox my.mp3 -n remix - spectrogram -X $(soxi -D my.mp3)
-.EE
-.SP
-See also
-.B \-x
-for an alternative way of setting the X-axis resolution.
+See also \fB\-x\fR and \fB\-d\fR.
 .IP \fB\-y\ \fInum\fR
-Y-axis resolution (1 \- 4), default 2.
-This controls the height of the spectrogram;
-.I num
-can be from 1 (low frequency resolution) to 4 (high frequency
-resolution).  For values greater than 2, the resulting image may be too
-tall to display on the screen; if so, a graphic manipulation package
-(such as
-.BR ImageMagick (1))
-can be used to re-size the image.
-.SP
-To increase the frequency resolution without increasing the height of
-the spectrogram, the
-.B rate
-effect may be invoked to reduce the sampling rate of the signal before
-invoking
-.BR spectrogram ;
-e.g.
-.EX
-   sox audio.ogg -n rate 4k spectrogram
-.EE
-allows detailed analysis of frequencies up to 2kHz (half the sampling
-rate).
+Sets the Y-axis size in pixels (per channel); this is the number of
+frequency `bins' used in the Fourier analysis that produces the
+spectrogram.  N.B. it can be slow to produce the spectrogram if this
+number is not one more than a power of two (e.g. 129).  By default the
+Y-axis size is chosen automatically (depending on the number of
+channels).  See
+.B \-Y
+for alternative way of setting spectrogram height.
+.IP \fB\-Y\ \fInum\fR
+Sets the target total height of the spectrogram(s).  The default value
+is 550 pixels.  Using this option (and by default), SoX will choose a
+height for individual spectrogram channels that is one more than a
+power of two, so the actual total height may fall short of the given
+number.  However, there is also a minimum height per channel so if
+there are many channels, the number may be exceeded.
+See
+.B \-y
+for alternative way of setting spectrogram height.
 .IP \fB\-z\ \fInum\fR
 Z-axis (colour) range in dB, default 120.  This sets the dynamic-range
 of the spectrogram to be \-\fInum\fR\ dBFS to 0\ dBFS.
@@ -3205,10 +3219,10 @@
 frequency resolution (but lower dynamic-range), select a Hamming window;
 for higher dynamic-range (but poorer frequency-resolution), select a
 Kaiser window.  Bartlett and Rectangular windows are also available.
-Selecting a window other than Hann will usually require
-a corresponding
-.B \-z
-setting.
+.IP \fB\-W\ \fInum\fR
+Window adjustment parameter.  This can be used to make small
+adjustments to the Kaiser window shape.  A positive number (up to
+ten) increases its dynamic range, a negative number decreases it.
 .IP \fB\-s\fR
 Allow slack overlapping of DFT windows.
 This can, in some cases, increase image sharpness and give greater adherence
@@ -3227,7 +3241,7 @@
 Permute the colours in a colour or hybrid palette.
 The
 .I num
-parameter (from 1 to 6) selects the permutation.
+parameter, from 1 (the default) to 6, selects the permutation.
 .IP \fB\-l\fR
 Creates a `printer friendly' spectrogram with a light background (the
 default has a dark background).
@@ -3234,26 +3248,57 @@
 .IP \fB\-a\fR
 Suppress the display of the axis lines.  This is sometimes useful in
 helping to discern artefacts at the spectrogram edges.
+.IP \fB\-A\fR
+Selects an alternative, fixed colour-set.  This is provided only for
+compatibility with spectrograms produced by another package.  It should
+not normally be used as it has some problems, not least, a lack of
+differentiation at the bottom end which results in masking of low-level
+artefacts.
 .IP \fB\-t\ \fItext\fR
 Set the image title\*mtext to display above the spectrogram.
 .IP \fB\-c\ \fItext\fR
-Set the image comment\*mtext to display below and to the left of the
-spectrogram.
+Set (or clear) the image comment\*mtext to display below and to the
+left of the spectrogram.
 .IP \fB\-o\ \fItext\fR
 Name of the spectrogram output PNG file, default `spectrogram.png'.
 .RE
 .TP
 \ 
-For example, to see the spectrogram of a swept triangular wave:
+.I Advanced Options:
+.br
+In order to process a smaller section of audio without affecting other
+effects or the output signal (unlike when the
+.B trim
+effect is used), the following options may be used.
+.RS
+.IP \fB\-d\ \fIduration\fR
+This option sets the X-axis resolution such that audio with the given
+.I duration
+([[HH:]MM:]SS) fits the selected (or default) X-axis width.  For
+example,
 .EX
-   sox -n -n synth 6 tri 10k:14k spectrogram -z 100 -w k
+   sox input.mp3 output.wav -n spectrogram -d 1:00 stats
 .EE
-Append the following to the `chime' example in the
-.B delay
-effect to see its spectrogram:
+creates a spectrogram showing the first minute of the audio, whilst
+.EE
+the
+.B stats
+effect is applied to the entire audio signal.
+.SP
+See also
+.B \-X
+for an alternative way of setting the X-axis resolution.
+.IP \fB\-S\ \fItime\fR
+Start the spectrogram at the given point in the audio stream.  For
+example
 .EX
-   rate 2k spectrogram -x 200 -Z -15 -w k
+   sox input.aiff output.wav spectrogram -S 1:00
 .EE
+creates a spectrogram showing all but the first minute of the audio
+(the output file however, receives the entire audio stream).
+.RE
+.TP
+\ 
 For the ability to perform off-line processing of spectral data, see the
 .B stat
 effect.
@@ -3991,7 +4036,6 @@
 .BR libsox (3)
 .br
 .BR audacity (1),
-.BR ImageMagick (1),
 .BR gnuplot (1),
 .BR octave (1),
 .BR wget (1)
--- a/src/spectrogram.c
+++ b/src/spectrogram.c
@@ -1,4 +1,4 @@
-/* libSoX effect: Spectrogram       (c) 2008 robs@users.sourceforge.net
+/* libSoX effect: Spectrogram       (c) 2008-9 robs@users.sourceforge.net
  *
  * This library is free software; you can redistribute it and/or modify it
  * under the terms of the GNU Lesser General Public License as published by
@@ -15,11 +15,6 @@
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/* TODO
- *   o Two-channel support.
- *   o Option for a larger font (for use with image down-scaling).
- */
-
 #ifdef NDEBUG /* Enable assert always. */
 #undef NDEBUG /* Must undef above assert.h or other that might include it. */
 #endif
@@ -31,14 +26,8 @@
 #include <math.h>
 #include <png.h>
 
-#define malloc              lsx_malloc
-#define calloc              lsx_calloc
-#define realloc             lsx_realloc
-#define FROM_SOX            SOX_SAMPLE_TO_FLOAT_64BIT
-#define DFT_BASE_SIZE       512
-#define MAX_DFT_SIZE_SHIFT  3
-#define MAX_DFT_SIZE        (DFT_BASE_SIZE << MAX_DFT_SIZE_SHIFT)
-#define MAX_COLS            1000 /* Also max seconds */
+#define MAX_FFT_SIZE 4096
+#define is_p2(x) !(x & (x - 1))
 
 typedef enum {Window_Hann, Window_Hamming, Window_Bartlett, Window_Rectangular, Window_Kaiser} win_type_t;
 static lsx_enum_item const window_options[] = {
@@ -51,21 +40,24 @@
 
 typedef struct {
   /* Parameters */
-  double     pixels_per_sec;
-  int        y_size, dB_range, gain, spectrum_points, perm, max_cols;
+  double     pixels_per_sec, duration, start_time,  window_adjust;
+  int        x_size0, y_size, Y_size, dB_range, gain, spectrum_points, perm;
   sox_bool   monochrome, light_background, high_colour, slack_overlap, no_axes;
+  sox_bool   alt_palette, truncate;
   win_type_t win_type;
   char const * out_name, * title, * comment;
 
-  /* Work area */
+  /* Shared work area */
+  double     * shared, * * shared_ptr;
+
+  /* Per-channel work area */
   int        WORK;  /* Start of work area is marked by this dummy variable. */
+  size_t     skip;
   int        dft_size, step_size, block_steps, block_num, rows, cols, read;
-  int        end, end_min, last_end;
+  int        x_size, end, end_min, last_end;
   sox_bool   truncated;
-  double     buf[MAX_DFT_SIZE], dft_buf[MAX_DFT_SIZE], window[MAX_DFT_SIZE];
-  double     block_norm, max, magnitudes[(MAX_DFT_SIZE>>1) + 1];
-  int        bit_rev_table[100];                       /* For Ooura fft */
-  double     sin_cos_table[dft_sc_len(MAX_DFT_SIZE)];  /* ditto */
+  double     buf[MAX_FFT_SIZE], dft_buf[MAX_FFT_SIZE], window[MAX_FFT_SIZE];
+  double     block_norm, max, magnitudes[(MAX_FFT_SIZE>>1) + 1];
   float      * dBfs;
 } priv_t;
 
@@ -72,48 +64,80 @@
 #define secs(cols) \
   ((double)(cols) * p->step_size * p->block_steps / effp->in_signal.rate)
 
+static unsigned char const alt_palette[] =
+  "\0\0\0\0\0\3\0\1\5\0\1\10\0\1\12\0\1\13\0\1\16\1\2\20\1\2\22\1\2\25\1\2\26"
+  "\1\2\30\1\3\33\1\3\35\1\3\37\1\3\40\1\3\"\1\3$\1\3%\1\3'\1\3(\1\3*\1\3,\1"
+  "\3.\1\3/\1\3""0\1\3""2\1\3""4\2\3""6\4\3""8\5\3""9\7\3;\11\3=\13\3?\16\3"
+  "A\17\2B\21\2D\23\2F\25\2H\27\2J\30\2K\32\2M\35\2O\40\2Q$\2S(\2U+\2W0\2Z3"
+  "\2\\7\2_;\2a>\2cB\2eE\2hI\2jM\2lQ\2nU\2pZ\2r_\2tc\2uh\2vl\2xp\3zu\3|z\3}"
+  "~\3~\203\3\200\207\3\202\214\3\204\220\3\205\223\3\203\226\3\200\230\3~\233"
+  "\3|\236\3z\240\3x\243\3u\246\3s\251\3q\253\3o\256\3m\261\3j\263\3h\266\3"
+  "f\272\3b\274\3^\300\3Z\303\3V\307\3R\312\3N\315\3J\321\3F\324\3C\327\3>\333"
+  "\3:\336\3""6\342\3""2\344\3/\346\7-\350\15,\352\21+\354\27*\355\33)\356\40"
+  "(\360&'\362*&\364/$\3654#\3669#\370>!\372C\40\374I\40\374O\"\374V&\374]*"
+  "\374d,\374k0\374r3\374z7\375\201;\375\210>\375\217B\375\226E\375\236I\375"
+  "\245M\375\254P\375\261T\375\267X\375\274\\\375\301a\375\306e\375\313i\375"
+  "\320m\376\325q\376\332v\376\337z\376\344~\376\351\202\376\356\206\376\363"
+  "\213\375\365\217\374\366\223\373\367\230\372\367\234\371\370\241\370\371"
+  "\245\367\371\252\366\372\256\365\372\263\364\373\267\363\374\274\361\375"
+  "\300\360\375\305\360\376\311\357\376\314\357\376\317\360\376\321\360\376"
+  "\324\360\376\326\360\376\330\360\376\332\361\377\335\361\377\337\361\377"
+  "\341\361\377\344\361\377\346\362\377\350\362\377\353";
+#define alt_palette_len ((array_length(alt_palette) - 1) / 3)
+
 static int getopts(sox_effect_t * effp, int argc, char **argv)
 {
   priv_t * p = (priv_t *)effp->priv;
-  size_t duration_1e5;
+  size_t duration;
   char const * next;
   int c;
 
-  assert(array_length(p->bit_rev_table) >= (size_t)dft_br_len(MAX_DFT_SIZE));
-
-  p->pixels_per_sec = 100, p->y_size = 2, p->dB_range = 120;/* non-0 defaults */
-  p->spectrum_points = 249, p->perm = 1, p->max_cols = MAX_COLS;
+  p->dB_range = 120, p->spectrum_points = 249, p->perm = 1; /* Non-0 defaults */
   p->out_name = "spectrogram.png", p->comment = "Created by SoX";
 
-  while ((c = lsx_getopt(argc, argv, "+M:x:X:y:z:Z:q:p:w:st:c:amlho:")) != -1) switch (c) {
-    GETOPT_NUMERIC('M', max_cols      , 100, 2000)
-    GETOPT_NUMERIC('x', pixels_per_sec,  1 , 5000)
-    GETOPT_NUMERIC('y', y_size        ,  1 , 1 + MAX_DFT_SIZE_SHIFT)
+  while ((c = lsx_getopt(argc, argv, "+S:d:x:X:y:Y:z:Z:q:p:W:w:st:c:AamlhTo:")) != -1) switch (c) {
+    GETOPT_NUMERIC('x', x_size0       , 100, 5000)
+    GETOPT_NUMERIC('X', pixels_per_sec,  1 , 5000)
+    GETOPT_NUMERIC('y', y_size        , 64 , 1200)
+    GETOPT_NUMERIC('Y', Y_size        , 130, MAX_FFT_SIZE / 2 + 2)
     GETOPT_NUMERIC('z', dB_range      , 20 , 180)
     GETOPT_NUMERIC('Z', gain          ,-100, 100)
     GETOPT_NUMERIC('q', spectrum_points, 0 , p->spectrum_points)
     GETOPT_NUMERIC('p', perm          ,  1 , 6)
+    GETOPT_NUMERIC('W', window_adjust , -10, 10)
     case 'w': p->win_type = lsx_enum_option(c, window_options);   break;
-    case 's': p->slack_overlap = sox_true; break;
-    case 'X': 
-      next = lsx_parsesamples(1e5, lsx_optarg, &duration_1e5, 't');
-      if (next == NULL || *next != '\0' || duration_1e5 > p->max_cols * 1e5
-          || duration_1e5 < 1e5 / 5000 * p->max_cols)
-        return lsx_usage(effp);
-      p->pixels_per_sec = p->max_cols * 1e5 / duration_1e5;
-      break;
-    case 't': p->title    = lsx_optarg;   break;
-    case 'c': p->comment  = lsx_optarg;   break;
-    case 'a': p->no_axes  = sox_true; break;
-    case 'm': p->monochrome = sox_true; break;
-    case 'l': p->light_background = sox_true; break;
-    case 'h': p->high_colour = sox_true; break;
-    case 'o': p->out_name = lsx_optarg;   break;
+    case 's': p->slack_overlap    = sox_true;   break;
+    case 'A': p->alt_palette      = sox_true;   break;
+    case 'a': p->no_axes          = sox_true;   break;
+    case 'm': p->monochrome       = sox_true;   break;
+    case 'l': p->light_background = sox_true;   break;
+    case 'h': p->high_colour      = sox_true;   break;
+    case 'T': p->truncate         = sox_true;   break;
+    case 't': p->title            = lsx_optarg; break;
+    case 'c': p->comment          = lsx_optarg; break;
+    case 'o': p->out_name         = lsx_optarg; break;
+    case 'S': next = lsx_parsesamples(1e5, lsx_optarg, &duration, 't');
+      if (next && !*next) {p->start_time = duration * 1e-5; break;}
+      return lsx_usage(effp);
+    case 'd': next = lsx_parsesamples(1e5, lsx_optarg, &duration, 't');
+      if (next && !*next) {p->duration = duration * 1e-5; break;}
+      return lsx_usage(effp);
     default: lsx_fail("invalid option `-%c'", optopt); return lsx_usage(effp);
   }
+  if (!!p->x_size0 + !!p->pixels_per_sec + !!p->duration > 2) {
+    lsx_fail("only two of -x, -X, -d may be given");
+    return SOX_EOF;
+  }
+  if (p->y_size && p->Y_size) {
+    lsx_fail("only one of -y, -Y may be given");
+    return SOX_EOF;
+  }
   p->gain = -p->gain;
-  --p->y_size, --p->perm;
+  --p->perm;
   p->spectrum_points += 2;
+  if (p->alt_palette)
+    p->spectrum_points = min(p->spectrum_points, (int)alt_palette_len);
+  p->shared_ptr = &p->shared;
   return lsx_optind !=argc || p->win_type == INT_MAX? lsx_usage(effp) : SOX_SUCCESS;
 }
 
@@ -129,7 +153,8 @@
     case Window_Hamming: lsx_apply_hamming(w, n); break;
     case Window_Bartlett: lsx_apply_bartlett(w, n); break;
     case Window_Rectangular: break;
-    default: lsx_apply_kaiser(w, n, lsx_kaiser_beta(p->dB_range + 20.));
+    default: lsx_apply_kaiser(w, n, lsx_kaiser_beta(
+        (p->dB_range + p->gain) * (1.1 + p->window_adjust / 50)));
   }
   for (i = 0; i < p->dft_size; ++i) sum += p->window[i];
   for (i = 0; i < p->dft_size; ++i) p->window[i] *= 2 / sum
@@ -137,27 +162,80 @@
   return sum;
 }
 
+static double * rdft_init(int n)
+{
+  double * q = lsx_malloc(2 * (n / 2 + 1) * n * sizeof(*q)), * p = q;
+  int i, j;
+  for (j = 0; j <= n / 2; ++j) for (i = 0; i < n; ++i)
+    *p++ = cos(2 * M_PI * j * i / n), *p++ = sin(2 * M_PI * j * i / n);
+  return q;
+}
+
+#define _ re += in[i] * *q++, im += in[i++] * *q++,
+static void rdft_p(double const * q, double const * in, double * out, int n)
+{
+  int i, j;
+  for (j = 0; j <= n / 2; ++j) {
+    double re = 0, im = 0;
+    for (i = 0; i < (n & ~7);) _ _ _ _ _ _ _ _ 0;
+    while (i < n) _ 0;
+    *out++ += re * re + im * im;
+  }
+}
+
 static int start(sox_effect_t * effp)
 {
   priv_t * p = (priv_t *)effp->priv;
-  double actual;
+  double actual, duration = p->duration, pixels_per_sec = p->pixels_per_sec;
 
-  if (effp->in_signal.channels != 1) {
-    lsx_fail("only 1 channel is supported");
-    return SOX_EOF;
-  }
   memset(&p->WORK, 0, sizeof(*p) - field_offset(priv_t, WORK));
-  p->end = p->dft_size = DFT_BASE_SIZE << p->y_size;
+  
+  p->skip = p->start_time * effp->in_signal.rate + .5;
+  p->x_size = p->x_size0;
+  while (sox_true) {
+    if (!pixels_per_sec && p->x_size && duration)
+      pixels_per_sec = min(5000, p->x_size / duration);
+    else if (!p->x_size && pixels_per_sec && duration)
+      p->x_size = min(5000, (int)(pixels_per_sec * duration + .5));
+    if (!duration && effp->in_signal.length) {
+      duration = effp->in_signal.length / (effp->in_signal.rate * effp->in_signal.channels);
+      duration -= p->start_time;
+      if (duration <= 0)
+        duration = 1;
+      continue;
+    } else if (!p->x_size) {
+      p->x_size = 800;
+      continue;
+    } else if (!pixels_per_sec) {
+      pixels_per_sec = 100;
+      continue;
+    }
+    break;
+  }
+
+  if (p->y_size) {
+    p->dft_size = 2 * (p->y_size - 1);
+    if (!is_p2(p->dft_size) && !effp->flow)
+      p->shared = rdft_init(p->dft_size);
+  } else {
+   int y = max(32, (p->Y_size? p->Y_size : 550) / effp->in_signal.channels - 2);
+   for (p->dft_size = 128; p->dft_size <= y; p->dft_size <<= 1);
+  }
+  if (is_p2(p->dft_size) && !effp->flow)
+    lsx_safe_rdft(p->dft_size, 1, p->dft_buf);
+  lsx_debug("duration=%g x_size=%i pixels_per_sec=%g dft_size=%i", duration, p->x_size, pixels_per_sec, p->dft_size);
+
+  p->end = p->dft_size;
   p->rows = (p->dft_size >> 1) + 1;
   actual = make_window(p, p->last_end = 0);
   lsx_debug("window_density=%g", actual / p->dft_size);
   p->step_size = (p->slack_overlap? sqrt(actual * p->dft_size) : actual) + .5;
-  p->block_steps = effp->in_signal.rate / p->pixels_per_sec;
+  p->block_steps = effp->in_signal.rate / pixels_per_sec;
   p->step_size = p->block_steps / ceil((double)p->block_steps / p->step_size) +.5;
   p->block_steps = floor((double)p->block_steps / p->step_size +.5);
   p->block_norm = 1. / p->block_steps;
   actual = effp->in_signal.rate / p->step_size / p->block_steps;
-  if (actual != p->pixels_per_sec)
+  if (actual != pixels_per_sec)
     lsx_report("actual pixels/s = %g", actual);
   lsx_debug("step_size=%i block_steps=%i", p->step_size, p->block_steps);
   p->max = -p->dB_range;
@@ -170,13 +248,14 @@
   priv_t * p = (priv_t *)effp->priv;
   int i;
 
-  if (p->cols == p->max_cols) {
-    lsx_warn("PNG truncated at %g seconds", secs(p->cols));
+  if (p->cols == p->x_size) {
     p->truncated = sox_true;
-    return SOX_EOF;
+    if (!effp->flow)
+      lsx_report("PNG truncated at %g seconds", secs(p->cols));
+    return p->truncate? SOX_EOF : SOX_SUCCESS;
   }
   ++p->cols;
-  p->dBfs = realloc(p->dBfs, p->cols * p->rows * sizeof(*p->dBfs));
+  p->dBfs = lsx_realloc(p->dBfs, p->cols * p->rows * sizeof(*p->dBfs));
   for (i = 0; i < p->rows; ++i) {
     double dBfs = 10 * log10(p->magnitudes[i] * p->block_norm);
     p->dBfs[(p->cols - 1) * p->rows + i] = dBfs + p->gain;
@@ -192,13 +271,21 @@
     size_t * isamp, size_t * osamp)
 {
   priv_t * p = (priv_t *)effp->priv;
-  size_t len = min(*isamp, *osamp);
+  size_t len = *isamp = *osamp = min(*isamp, *osamp);
   int i;
 
   memcpy(obuf, ibuf, len * sizeof(*obuf)); /* Pass on audio unaffected */
-  *isamp = *osamp = len;
 
-  while (sox_true) {
+  if (p->skip) {
+    if (p->skip >= len) {
+      p->skip -= len;
+      return SOX_SUCCESS;
+    }
+    ibuf += p->skip;
+    len -= p->skip;
+    p->skip = 0;
+  }
+  while (!p->truncated) {
     SOX_SAMPLE_LOCALS;
     if (p->read == p->step_size) {
       memmove(p->buf, p->buf + p->step_size,
@@ -206,7 +293,8 @@
       p->read = 0;
     }
     for (; len && p->read < p->step_size; --len, ++p->read, --p->end)
-      p->buf[p->dft_size - p->step_size + p->read] = FROM_SOX(*ibuf++,);
+      p->buf[p->dft_size - p->step_size + p->read] =
+        SOX_SAMPLE_TO_FLOAT_64BIT(*ibuf++,);
     if (p->read != p->step_size)
       break;
 
@@ -213,12 +301,14 @@
     if ((p->end = max(p->end, p->end_min)) != p->last_end)
       make_window(p, p->last_end = p->end);
     for (i = 0; i < p->dft_size; ++i) p->dft_buf[i] = p->buf[i] * p->window[i];
-    lsx_rdft(p->dft_size, 1, p->dft_buf, p->bit_rev_table, p->sin_cos_table);
-    p->magnitudes[0] += sqr(p->dft_buf[0]);
-    for (i = 1; i < p->dft_size >> 1; ++i)
-      p->magnitudes[i] += sqr(p->dft_buf[2*i]) + sqr(p->dft_buf[2*i+1]);
-    p->magnitudes[p->dft_size >> 1] += sqr(p->dft_buf[1]);
-
+    if (is_p2(p->dft_size)) {
+      lsx_rdft(p->dft_size, 1, p->dft_buf, lsx_fft_br, lsx_fft_sc);
+      p->magnitudes[0] += sqr(p->dft_buf[0]);
+      for (i = 1; i < p->dft_size >> 1; ++i)
+        p->magnitudes[i] += sqr(p->dft_buf[2*i]) + sqr(p->dft_buf[2*i+1]);
+      p->magnitudes[p->dft_size >> 1] += sqr(p->dft_buf[1]);
+    }
+    else rdft_p(*p->shared_ptr, p->dft_buf, p->magnitudes, p->dft_size);
     if (++p->block_num == p->block_steps && do_column(effp) == SOX_EOF)
       return SOX_EOF;
   }
@@ -230,8 +320,8 @@
   priv_t * p = (priv_t *)effp->priv;
 
   if (!p->truncated) {
-    sox_sample_t * ibuf = calloc(p->dft_size, sizeof(*ibuf));
-    sox_sample_t * obuf = calloc(p->dft_size, sizeof(*obuf));
+    sox_sample_t * ibuf = lsx_calloc(p->dft_size, sizeof(*ibuf));
+    sox_sample_t * obuf = lsx_calloc(p->dft_size, sizeof(*obuf));
     size_t isamp = (p->dft_size - p->step_size) / 2;
     int left_over = (isamp + p->read) % p->step_size;
 
@@ -255,7 +345,7 @@
 
 static unsigned colour(priv_t const * p, double x)
 {
-  unsigned c = x < -p->dB_range? 0 : x >= 0? p->spectrum_points - 1 : 
+  unsigned c = x < -p->dB_range? 0 : x >= 0? p->spectrum_points - 1 :
       1 + (1 + x / p->dB_range) * (p->spectrum_points - 2);
   return fixed_palette + c;
 }
@@ -277,7 +367,7 @@
   }
   for (i = 0; i < p->spectrum_points; ++i) {
     double c[3], x = (double)i / (p->spectrum_points - 1);
-    int at = (p->light_background)? p->spectrum_points - 1 - i : i;
+    int at = p->light_background? p->spectrum_points - 1 - i : i;
     if (p->monochrome) {
       c[2] = c[1] = c[0] = x;
       if (p->high_colour) {
@@ -302,6 +392,11 @@
         case 4: c[j] =      7 * x - phase_num;  break;
         case 5: c[j] = 1 - (7 * x - phase_num); break;
       }
+    } else if (p->alt_palette) {
+      int n = (double)i / (p->spectrum_points - 1) * (alt_palette_len - 1) + .5;
+      c[0] = alt_palette[3 * n + 0] / 255.;
+      c[1] = alt_palette[3 * n + 1] / 255.;
+      c[2] = alt_palette[3 * n + 2] / 255.;
     } else {
       if      (x < .13) c[0] = 0;
       else if (x < .73) c[0] = 1  * sin((x - .13) / .60 * M_PI / 2);
@@ -368,7 +463,7 @@
 
 static int axis(double to, int max_steps, double * limit, char * * prefix)
 {
-  double scale = 1, step = 1;
+  double scale = 1, step = max(1, 10 * to);
   int i, prefix_num = 0;
   if (max_steps) {
     double try, log_10 = HUGE_VAL, min_step = (to *= 10) / max_steps;
@@ -393,27 +488,26 @@
   priv_t *    p        = (priv_t *) effp->priv;
   FILE *      file     = fopen(p->out_name, "wb");
   uLong       font_len = 96 * font_y;
-  int         rows     = below + p->rows + 30 + 20 * !!p->title;
+  int         chans    = effp->in_signal.channels;
+  int         c_rows   = p->rows * chans + chans - 1;
+  int         rows     = below + c_rows + 30 + 20 * !!p->title;
   int         cols     = left + p->cols + between + spectrum_width + right;
-  png_byte *  pixels   = malloc(cols * rows * sizeof(*pixels));
-  png_bytepp  png_rows = malloc(rows * sizeof(*png_rows));
+  png_byte *  pixels   = lsx_malloc(cols * rows * sizeof(*pixels));
+  png_bytepp  png_rows = lsx_malloc(rows * sizeof(*png_rows));
   png_structp png      = png_create_write_struct(PNG_LIBPNG_VER_STRING, 0, 0,0);
   png_infop   png_info = png_create_info_struct(png);
   png_color   palette[256];
-  int         i, j, step, tick_len = 3 - p->no_axes;
+  int         i, j, k, base, step, tick_len = 3 - p->no_axes;
   char        text[200], * prefix;
   double      limit;
 
+  free(p->shared);
   if (!file) {
-    lsx_fail("failed to create `%s' :(", p->out_name);
-    png_destroy_write_struct(&png, &png_info);
-    free(png_rows);
-    free(pixels);
-    free(p->dBfs);
-    return SOX_EOF;
+    lsx_fail("failed to create `%s': %s", p->out_name, strerror(errno));
+    goto error;
   }
   lsx_debug("signal-max=%g", p->max);
-  font = malloc(font_len);
+  font = lsx_malloc(font_len);
   assert(uncompress(font, &font_len, fixed, sizeof(fixed)-1) == Z_OK);
   make_palette(p, palette);
   memset(pixels, Background, cols * rows * sizeof(*pixels));
@@ -431,14 +525,18 @@
     print_at(1, font_y, Text, p->comment);
 
   /* Spectrogram */
-  for (j = 0; j < p->rows; ++j) {
-    for (i = 0; i < p->cols; ++i)
-      pixel(left + i, below + j) = colour(p, p->dBfs[i*p->rows + j]);
-    if (!p->no_axes)                                   /* Y-axis lines */
-      pixel(left - 1, below + j) = pixel(left + p->cols,below + j) = Grid;
+  for (k = 0; k < chans; ++k) {
+    priv_t * q = (priv_t *)(effp - effp->flow + k)->priv;
+    base = below + (chans - 1 - k) * (p->rows + 1);
+    for (j = 0; j < p->rows; ++j) {
+      for (i = 0; i < p->cols; ++i)
+        pixel(left + i, base + j) = colour(p, q->dBfs[i*p->rows + j]);
+      if (!p->no_axes)                                 /* Y-axis lines */
+        pixel(left - 1, base + j) = pixel(left + p->cols, base + j) = Grid;
+    }
+    if (!p->no_axes) for (i = -1; i <= p->cols; ++i)   /* X-axis lines */
+      pixel(left + i, base - 1) = pixel(left + i, base + p->rows) = Grid;
   }
-  if (!p->no_axes) for (i = -1; i <= p->cols; ++i)     /* X-axis lines */
-    pixel(left + i, below - 1) = pixel(left + i, below + p->rows) = Grid;
 
   /* X-axis */
   step = axis(secs(p->cols), p->cols / (font_X * 9 / 2), &limit, &prefix);
@@ -447,13 +545,13 @@
   for (i = 0; i <= limit; i += step) {
     int y, x = limit? (double)i / limit * p->cols + .5 : 0;
     for (y = 0; y < tick_len; ++y)                     /* Ticks */
-      pixel(left-1+x, below-1-y) = pixel(left-1+x, below+p->rows+y) = Grid;
+      pixel(left-1+x, below-1-y) = pixel(left-1+x, below+c_rows+y) = Grid;
     if (step == 5 && (i%10))
       continue;
     sprintf(text, "%g", .1 * i);                       /* Tick labels */
     x = left + x - 3 * strlen(text);
     print_at(x, below - 6, Labels, text);
-    print_at(x, below + p->rows + 14, Labels, text);
+    print_at(x, below + c_rows + 14, Labels, text);
   }
 
   /* Y-axis */
@@ -460,66 +558,78 @@
   step = axis(effp->in_signal.rate / 2,
       (p->rows - 1) / ((font_y * 3 + 1) >> 1), &limit, &prefix);
   sprintf(text, "Frequency (%.1sHz)", prefix);         /* Axis label */
-  print_up(10, below + (p->rows - font_X * (int)strlen(text)) / 2, Text, text);
-  for (i = 0; i <= limit; i += step) {
-    int x, y = limit? (double)i / limit * (p->rows - 1) + .5 : 0;
-    for (x = 0; x < tick_len; ++x)                     /* Ticks */
-      pixel(left-1-x, below+y) = pixel(left+p->cols+x, below+y) = Grid;
-    if (step == 5 && (i%10))
-      continue;
-    sprintf(text, i?"%5g":"   DC", .1 * i);            /* Tick labels */
-    print_at(left - 4 - font_X * 5, below + y + 5, Labels, text);
-    sprintf(text, i?"%g":"DC", .1 * i);
-    print_at(left + p->cols + 6, below + y + 5, Labels, text);
+  print_up(10, below + (c_rows - font_X * (int)strlen(text)) / 2, Text, text);
+  for (k = 0; k < chans; ++k) {
+    base = below + k * (p->rows + 1);
+    for (i = 0; i <= limit; i += step) {
+      int x, y = limit? (double)i / limit * (p->rows - 1) + .5 : 0;
+      for (x = 0; x < tick_len; ++x)                   /* Ticks */
+        pixel(left-1-x, base+y) = pixel(left+p->cols+x, base+y) = Grid;
+      if ((step == 5 && (i%10)) || (!i && k && chans > 1))
+        continue;
+      sprintf(text, i?"%5g":"   DC", .1 * i);          /* Tick labels */
+      print_at(left - 4 - font_X * 5, base + y + 5, Labels, text);
+      sprintf(text, i?"%g":"DC", .1 * i);
+      print_at(left + p->cols + 6, base + y + 5, Labels, text);
+    }
   }
 
   /* Z-axis */
-  print_at(cols - right - 2 - font_X, below - 13, Text, "dBFS");/* Axis label */
-  for (j = 0; j < p->rows; ++j) {                      /* Spectrum */
-    png_byte b = colour(p, p->dB_range * (j / (p->rows - 1.) - 1));
+  k = min(400, c_rows);
+  base = below + (c_rows - k) / 2;
+  print_at(cols - right - 2 - font_X, base - 13, Text, "dBFS");/* Axis label */
+  for (j = 0; j < k; ++j) {                            /* Spectrum */
+    png_byte b = colour(p, p->dB_range * (j / (k - 1.) - 1));
     for (i = 0; i < spectrum_width; ++i)
-      pixel(cols - right - 1 - i, below + j) = b;
+      pixel(cols - right - 1 - i, base + j) = b;
   }
-  for (i = 0; i <= p->dB_range; i += 10) {             /* (Tick) labels */
-    int y = (double)i / p->dB_range * (p->rows - 1) + .5;
+  step = 10 * ceil(p->dB_range / 10. * (font_y + 2) / (k - 1));
+  for (i = 0; i <= p->dB_range; i += step) {           /* (Tick) labels */
+    int y = (double)i / p->dB_range * (k - 1) + .5;
     sprintf(text, "%+i", i - p->gain - p->dB_range);
-    print_at(cols - right + 1, below + y + 5, Labels, text);
+    print_at(cols - right + 1, base + y + 5, Labels, text);
   }
 
   free(font);
   png_set_rows(png, png_info, png_rows);
   png_write_png(png, png_info, PNG_TRANSFORM_IDENTITY, NULL);
-  png_destroy_write_struct(&png, &png_info);
+  fclose(file);
+error: png_destroy_write_struct(&png, &png_info);
   free(png_rows);
   free(pixels);
-  fclose(file);
   free(p->dBfs);
   return SOX_SUCCESS;
 }
 
+static int end(sox_effect_t * effp) {return effp->flow? SOX_SUCCESS:stop(effp);}
+
 sox_effect_handler_t const * lsx_spectrogram_effect_fn(void)
 {
-  static sox_effect_handler_t handler = {
-    "spectrogram", 0, SOX_EFF_MODIFY, getopts, start, flow, drain, stop, 0, sizeof(priv_t)};
+  static sox_effect_handler_t handler = {"spectrogram", 0, SOX_EFF_MODIFY,
+    getopts, start, flow, drain, end, 0, sizeof(priv_t)};
   static char const * lines[] = {
     "[options]",
-    "\t-M num\tMaximum width of spectrogram in pixels, default 1000",
-    "\t-x num\tX-axis pixels/second, default 100.  -x & -X are alternatives",
-    "\t-X time\tAudio duration to fit to X-axis e.g. $(soxi -D file)",
-    "\t-y num\tY-axis resolution (1 - 4), default 2",
-    "\t-z num\tZ-axis range in dB, default 120",
-    "\t-Z num\tZ-axis maximum in dBFS, default 0",
-    "\t-q num\tZ-axis quantisation (0 - 249), default 249",
+    "\t-x num\tX-axis size in pixels; default derived or 800",
+    "\t-X num\tX-axis pixels/second; default derived or 100",
+    "\t-y num\tY-axis size in pixels (per channel); slow if not 1 + 2^n",
+    "\t-Y num\tY-height total (i.e. not per channel); default 550",
+    "\t-z num\tZ-axis range in dB; default 120",
+    "\t-Z num\tZ-axis maximum in dBFS; default 0",
+    "\t-q num\tZ-axis quantisation (0 - 249); default 249",
     "\t-w name\tWindow: Hann (default), Hamming, Bartlett, Rectangular, Kaiser",
+    "\t-W num\tWindow adjust parameter (-10 - 10); applies only to Kaiser",
     "\t-s\tSlack overlap of windows",
     "\t-a\tSuppress axis lines",
     "\t-l\tLight background",
     "\t-m\tMonochrome",
     "\t-h\tHigh colour",
-    "\t-p num\tPermute colours (1 - 6)",
+    "\t-p num\tPermute colours (1 - 6); default 1",
+    "\t-A\tAlternative, inferior, fixed colour-set (for compatibility only)",
     "\t-t text\tTitle text",
     "\t-c text\tComment text",
-    "\t-o text\tOutput file name, default `spectrogram.png'",
+    "\t-o text\tOutput file name; default `spectrogram.png'",
+    "\t-d time\tAudio duration to fit to X-axis; e.g. 1:00, 48",
+    "\t-S time\tStart the spectrogram at the given time through the input",
   };
   static char * usage;
   handler.usage = lsx_usage_lines(&usage, lines, array_length(lines));
--- a/src/trim.c
+++ b/src/trim.c
@@ -83,6 +83,7 @@
     trim->index = 0;
     trim->trimmed = 0;
 
+    effp->out_signal.length = trim->length;
     return (SOX_SUCCESS);
 }