shithub: sox

Download patch

ref: 784ea2ae8fca17ecaa52815271e7079eaa48407b
parent: d2b295b9eaa96e09e002034992e1d72451c6d6b1
author: robs <robs>
date: Sat Jun 21 16:35:29 EDT 2008

more colour control

--- a/soxeffect.7
+++ b/soxeffect.7
@@ -470,7 +470,7 @@
 .B vol
 effect\*mhandy for those who prefer to work in dBs by default.
 .TP
-\fBhighpass\fR\^|\^\fBlowpass\fR [\fB-1\fR|\fB-2\fR] \fIfrequency\fR[\fBk\fR]\fR [\fRwidth\fR[\fBq\fR\^|\^\fBo\fR\^|\^\fBh\fR\^|\^\fBk\fR]]
+\fBhighpass\fR\^|\^\fBlowpass\fR [\fB\-1\fR|\fB\-2\fR] \fIfrequency\fR[\fBk\fR]\fR [\fRwidth\fR[\fBq\fR\^|\^\fBo\fR\^|\^\fBh\fR\^|\^\fBk\fR]]
 Apply a high-pass or low-pass filter with 3dB point \fIfrequency\fR.
 The filter can be either single-pole (with
 .BR \-1 ),
@@ -511,7 +511,7 @@
 and one audio output port can be used.  If found, the environment varible
 LADSPA_PATH will be used as search path for plugins.
 .TP
-\fBlowpass\fR [\fB-1\fR|\fB-2\fR] \fIfrequency\fR[\fBk\fR]\fR [\fRwidth\fR[\fBq\fR\^|\^\fBo\fR\^|\^\fBh\fR\^|\^\fBk\fR]]
+\fBlowpass\fR [\fB\-1\fR|\fB\-2\fR] \fIfrequency\fR[\fBk\fR]\fR [\fRwidth\fR[\fBq\fR\^|\^\fBo\fR\^|\^\fBh\fR\^|\^\fBk\fR]]
 Apply a low-pass filter.
 See the description of the \fBhighpass\fR effect for details.
 .TP
@@ -634,7 +634,7 @@
 	sox noisy.au -n trim 0 1 noiseprof | play noisy.au noisered
 .EE
 .TP
-\fBnorm\fR [\fB-i\fR] [\fIlevel\fR]
+\fBnorm\fR [\fB\-i\fR] [\fIlevel\fR]
 Normalise audio to 0dB FSD or to a given level relative to 0dB.
 Requires temporary file space to store the audio to be normalised.
 .SP
@@ -761,7 +761,7 @@
 during sample rate conversion, and
 .B Rej dB
 is the level of noise rejection.
-The default quality level is `high' (\fB-h\fR).
+The default quality level is `high' (\fB\-h\fR).
 .SP
 See also
 .BR resample ,
@@ -918,7 +918,7 @@
 Note that repeating once yields two copies: the original audio and the
 repeated audio.
 .TP
-\fBreverb\fR [\fB-w\fR|\fB--wet-only\fR] [\fIreverberance\fR (50%) [\fIHF-damping\fR (50%)
+\fBreverb\fR [\fB\-w\fR|\fB\-\-wet-only\fR] [\fIreverberance\fR (50%) [\fIHF-damping\fR (50%)
 [\fIroom-scale\fR (100%) [\fIstereo-depth\fR (100%)
 .br
 [\fIpre-delay\fR (0ms) [\fIwet-gain\fR (0dB)]]]]]]
@@ -1036,8 +1036,18 @@
 and shows time in the X-axis, frequency in the Y-axis, and audio signal
 magnitude in the Z-axis.  Z-axis values are represented by the colour
 (or intensity) of the pixels in the X-Y plane.
+.SP
+This effect supports only one channel; for multi-channel input files,
+use either SoX's
+.B \-c 1
+option with the output file (to obtain a spectrogram on the mix-down),
+or the
+.B remix
+.I n
+effect to select a particular channel.  Be aware though, that both of
+these methods affect the audio in the effects chain.
 .RS
-.IP \fB-x\ \fInum\fR
+.IP \fB\-x\ \fInum\fR
 X-axis pixels/second, default 100.  This controls the width of the
 spectrogram;
 .I num
@@ -1060,7 +1070,7 @@
   sox audio.ogg -n trim 1:00 spectrogram
 .EE
 starts the spectrogram at 1 minute through the audio.
-.IP \fB-y\ \fInum\fR
+.IP \fB\-y\ \fInum\fR
 Y-axis resolution (1 - 4), default 2.
 This controls the height of the spectrogram;
 .I num
@@ -1084,24 +1094,28 @@
 .EE
 allows detailed analysis of frequencies up to 2kHz (half the sampling
 rate).
-.IP \fB-z\ \fInum\fR
+.IP \fB\-z\ \fInum\fR
 Z-axis (colour) range in dB, default 120.  This sets the dynamic-range
 of the spectrogram to be \-\fInum\fR\ dBFS to 0\ dBFS.
 .I Num
-may range from 40 to 180.  Decreasing dynamic-range, effectively
+may range from 20 to 180.  Decreasing dynamic-range, effectively
 increases the `contrast' of the spectrogram display, and vice versa.
-.IP \fB-g\ \fInum\fR
-Apply
+.IP \fB\-Z\ \fInum\fR
+Sets the upper limit of the Z-axis in dBFS.
+A negative
 .I num
-dB gain (or attenuation if
-.I num
-is negative) to the audio before displaying it.  Appling gain or
-attenuation shifts the the range of the Z-axis and effectively controls
-the `brightness' of the spectrogram display.  Note that the audio in the
-SoX effects chain remains unaffected by this option (cf. the
-.B gain
-effect).
-.IP \fB-w\ \fIname\fR
+effectively increases the `brightness' of the spectrogram display,
+and vice versa.
+.IP \fB\-q\ \fInum\fR
+Sets the Z-axis quantisation, i.e. the number of different colours (or
+intensities) in which to render Z-axis
+values.  A small number (e.g. 4) will give a `poster'-like effect making
+it easier to discern magnitude bands of similar level.  Smaller numbers
+also usually
+result in a smaller PNG files.  The number given specifies the number of
+colours to use inside the Z-axis range; two colours are reserved to
+represent out-of-range values.
+.IP \fB\-w\ \fIname\fR
 Window: Hann (default), Hamming, Bartlett, Rectangular or Kaiser.  The
 spectrogram is produced using the Discrete Fourier Transform (DFT)
 algorithm.  A significant parameter to this algorithm is the choice of
@@ -1109,30 +1123,42 @@
 all-round frequency-resolution and dynamic-range properties.  For better
 frequency resolution (but lower dynamic-range), select a Hamming window;
 for higher dynamic-range (but poorer frequency-resolution), select a
-Kaiser window.  Selecting a window other than Hann will usually require
+Kaiser window.  Bartlett and Rectangular windows are also available.
+Selecting a window other than Hann will usually require
 a corresponding 
 .B \-z
 setting.
-.IP \fB-s\fR
+.IP \fB\-s\fR
 Allow slack overlapping of DFT windows.
 This can, in some cases, increase image sharpness and give greater adherence
 to the
 .B \-x
 value, but at the expense of a little spectral loss.
-.IP \fB-m\fR
+.IP \fB\-m\fR
 Creates a monochrome spectrogram (the default is colour).
-.IP \fB-l\fR
+.IP \fB\-h\fR
+Selects a high-colour palette\*mless visually pleasing than the default
+colour palette, but may make it easier to differentiate different levels.
+Also works with
+.B \-m
+to create a hybrid monochrome/colour palette.
+.IP \fB\-p\ \fInum\fR
+Permute the colours in a colour or hybrid palette.
+The
+.I num
+parameter (from 1 to 6) selects the permutation.
+.IP \fB\-l\fR
 Creates a `printer friendly' spectrogram with a light background (the
 default has a dark background).
-.IP \fB-a\fR
-Suppress the display of the axes lines.  This is sometimes useful in
+.IP \fB\-a\fR
+Suppress the display of the axis lines.  This is sometimes useful in
 helping to discern artefacts at the spectrogram edges.
-.IP \fB-t\ \fItext\fR
+.IP \fB\-t\ \fItext\fR
 Set the image title\*mtext to display above the spectrogram.
-.IP \fB-c\ \fItext\fR
+.IP \fB\-c\ \fItext\fR
 Set the image comment\*mtext to display below and to the left of the
 spectrogram.
-.IP \fB-o\ \fItext\fR
+.IP \fB\-o\ \fItext\fR
 Name of the spectrogram output PNG file, default `spectrogram.png'.
 .RE
 For the ability to perform off-line processing of spectral data, see the
@@ -1291,7 +1317,7 @@
 be given, the characteristics of which will be used to set the
 synthesised audio length, the number of channels, and the sampling rate;
 however, since the input file's audio is not normally needed, a `null
-file' (with the special name \fB-n\fR) is often given instead (and the
+file' (with the special name \fB\-n\fR) is often given instead (and the
 length specified as a parameter to \fBsynth\fR or by another given
 effect that can has an associated length).
 .SP
--- a/src/spectrogram.c
+++ b/src/spectrogram.c
@@ -89,18 +89,16 @@
 
 typedef struct {
   int        y_size, dft_size, rows, cols, step_size, block_steps, block_num;
-  double     buf[MAX_DFT_SIZE];
-  double     dft_buf[MAX_DFT_SIZE];
-  double     window[MAX_DFT_SIZE];
-  int        bit_rev_table[100];  /* For Ooura fft */
-  double     sin_cos_table[dft_sc_len(MAX_DFT_SIZE)];  /* ditto */
-  double     magnitudes[(MAX_DFT_SIZE >> 1) + 1];
-  float      * dBfs;
-  double     pixels_per_sec, block_norm, max;
   int        dB_range, gain, style, read, end, end_min, last_end;
+  int        spectrum_points, perm;
   char const * out_name, * title, * comment;
-  sox_bool   slack_overlap, no_axes;
+  sox_bool   high_colour, slack_overlap, no_axes, truncated;
   win_type_t win_type;
+  double     buf[MAX_DFT_SIZE], dft_buf[MAX_DFT_SIZE], window[MAX_DFT_SIZE];
+  double     pixels_per_sec, block_norm, max, magnitudes[(MAX_DFT_SIZE>>1) + 1];
+  int        bit_rev_table[100];  /* For Ooura fft */
+  double     sin_cos_table[dft_sc_len(MAX_DFT_SIZE)];  /* ditto */
+  float      * dBfs;
 } priv_t;
 
 #define GETOPT_NUMERIC(ch, name, min, max) case ch:{ \
@@ -142,13 +140,16 @@
 
   --argv, ++argc, optind = 1, opterr = 0;                /* re-jig for getopt */
   p->pixels_per_sec = 100, p->y_size = 2, p->dB_range = 120;/* non-0 defaults */
+  p->spectrum_points = 249, p->perm = 1;
   p->style = 1, p->out_name = "spectrogram.png", p->comment = "Created by SoX";
 
-  while ((c = getopt(argc, argv, "+x:y:z:g:w:st:c:amlo:")) != -1) switch (c) {
+  while ((c = getopt(argc, argv, "+x:y:z:Z:q:p:w:st:c:amlho:")) != -1) switch (c) {
     GETOPT_NUMERIC('x', pixels_per_sec,  1 , 5000)
     GETOPT_NUMERIC('y', y_size        ,  1 , 1 + MAX_DFT_SIZE_SHIFT)
-    GETOPT_NUMERIC('z', dB_range      , 40 , 180)
-    GETOPT_NUMERIC('g', gain          ,-100, 100)
+    GETOPT_NUMERIC('z', dB_range      , 20 , 180)
+    GETOPT_NUMERIC('Z', gain          ,-100, 100)
+    GETOPT_NUMERIC('q', spectrum_points, 0 , p->spectrum_points)
+    GETOPT_NUMERIC('p', perm          ,  1 , 6)
     case 'w': p->win_type = enum_option(c, window_options);   break;
     case 's': p->slack_overlap = sox_true; break;
     case 't': p->title    = optarg;   break;
@@ -156,11 +157,14 @@
     case 'a': p->no_axes  = sox_true; break;
     case 'm': monochrome  = sox_true; break;
     case 'l': light_background = sox_true; break;
+    case 'h': p->high_colour = sox_true; break;
     case 'o': p->out_name = optarg;   break;
     default: sox_fail("unknown option `-%c'", optopt); return lsx_usage(effp);
   }
-  p->y_size -= 1;
+  p->gain = - p->gain;
+  --p->y_size, --p->perm;
   p->style = 2 * monochrome + light_background;
+  p->spectrum_points += 2;
   argc -= optind, optind = callers_optind, opterr = callers_opterr;
   return argc || p->win_type == INT_MAX? lsx_usage(effp) : SOX_SUCCESS;
 }
@@ -219,6 +223,7 @@
 
   if (p->cols == MAX_COLS) {
     sox_warn("PNG truncated at %g seconds", (double)MAX_COLS * p->step_size * p->block_steps / effp->in_signal.rate);
+    p->truncated = sox_true;
     return SOX_EOF;
   }
   ++p->cols;
@@ -275,7 +280,7 @@
 {
   priv_t * p = (priv_t *)effp->priv;
 
-  if (p->cols != MAX_COLS) {
+  if (!p->truncated) {
     sox_sample_t * ibuf = calloc(p->dft_size, sizeof(*ibuf));
     sox_sample_t * obuf = calloc(p->dft_size, sizeof(*obuf));
     sox_size_t isamp = (p->dft_size - p->step_size) / 2;
@@ -298,52 +303,72 @@
   return SOX_SUCCESS;
 }
 
-#define spectrum_points (1 + 250)
-enum {Background = spectrum_points, Text, Labels, Grid, palette_size};
+enum {Background, Text, Labels, Grid, fixed_palette};
 
-static unsigned colour(double x, int range)
+static unsigned colour(priv_t const * p, double x)
 {
-  x = range_limit(x, -range, 0);
-  return (1 + x / range) * (spectrum_points - 1) + .5;
+  unsigned c = x < -p->dB_range? 0 : x >= 0? p->spectrum_points - 1 : 
+      1 + (1 + x / p->dB_range) * (p->spectrum_points - 2);
+  return fixed_palette + c;
 }
 
-static void make_palette(png_byte * palette, int style)
+static void make_palette(priv_t const * p, png_color * palette)
 {
-  int i, j, at;
-  for (i = j = 0; j < spectrum_points; i += 3, ++j) {
-    double r, g, b, x = (double)j / (spectrum_points - 1);
-    if (style > 1)
-      palette[i+2] = palette[i+1] = palette[i+0] = style == 2? j : 255 - j;
-    else {
-      at = style? (spectrum_points - 1) * 3 - i : i;
-      if      (x < .16) r = 0;
-      else if (x < .72) r = .99 *           sin((x - .16) / .56 * M_PI / 2);
-      else              r = .01 *               (x - .72) / .28          + .99;
-      if      (x < .52) g = 0;
-      else if (x < .86) g = .99 * .5 * (1 - cos((x - .52) / .34 * M_PI));
-      else              g = .01 *               (x - .86) / .14          + .99;
-      if      (x < .34) b = .5  *           sin((x - .00) / .34 * M_PI / 2);
-      else if (x < .63) b = .5  * .5 * (1 + cos((x - .34) / .29 * M_PI));
-      else              b =            (1 - cos((x - .63) / .37 * M_PI / 2));
-      palette[at + 0] = r * 255 + .5;
-      palette[at + 1] = g * 255 + .5;
-      palette[at + 2] = b * 255 + .5;
+  int i;
+
+  if (p->style & 1) {
+    memcpy(palette++, (p->style & 2)? "\337\337\337":"\335\330\320", 3);
+    memcpy(palette++, "\0\0\0"      , 3);
+    memcpy(palette++, "\077\077\077", 3);
+    memcpy(palette++, "\077\077\077", 3);
+  } else {
+    memcpy(palette++, "\0\0\0"      , 3);
+    memcpy(palette++, "\377\377\377", 3);
+    memcpy(palette++, "\277\277\277", 3);
+    memcpy(palette++, "\177\177\177", 3);
+  }
+  for (i = 0; i < p->spectrum_points; ++i) {
+    double c[3] , x = (double)i / (p->spectrum_points - 1);
+    int at = (p->style & 1)? p->spectrum_points - 1 - i : i;
+    if (p->style > 1) {
+      c[2] = c[1] = c[0] = x;
+      if (p->high_colour) {
+        c[(1 + p->perm) % 3] = x < .4? 0 : 5 / 3. * (x - .4);
+        if (p->perm < 3)
+          c[(2 + p->perm) % 3] = x < .4? 0 : 5 / 3. * (x - .4);
+      }
+      palette[at].red  = .5 + 255 * c[0];
+      palette[at].green= .5 + 255 * c[1];
+      palette[at].blue = .5 + 255 * c[2];
+      continue;
     }
+    if (p->high_colour) {
+      static const int states[3][7] = {
+        {4,5,0,0,2,1,1}, {0,0,2,1,1,3,2}, {4,1,1,3,0,0,2}};
+      int j, phase_num = min(6, x * 7);
+      for (j = 0; j < 3; ++j) switch (states[j][phase_num]) {
+        case 0: c[j] = 0; break;
+        case 1: c[j] = 1; break;
+        case 2: c[j] = sin((7 * x - phase_num) * M_PI / 2); break;
+        case 3: c[j] = cos((7 * x - phase_num) * M_PI / 2); break;
+        case 4: c[j] = 7 * x - phase_num; break;
+        case 5: c[j] = 1 - (7 * x - phase_num); break;
+      }
+    } else {
+      if      (x < .16) c[0] = 0;
+      else if (x < .72) c[0] = .99 *           sin((x - .16) / .56 * M_PI / 2);
+      else              c[0] = .01 *               (x - .72) / .28        + .99;
+      if      (x < .52) c[1] = 0;
+      else if (x < .86) c[1] = .99 * .5 * (1 - cos((x - .52) / .34 * M_PI));
+      else              c[1] = .01 *               (x - .86) / .14        + .99;
+      if      (x < .34) c[2] = .5  *           sin((x - .00) / .34 * M_PI / 2);
+      else if (x < .63) c[2] = .5  * .5 * (1 + cos((x - .34) / .29 * M_PI));
+      else              c[2] =            (1 - cos((x - .63) / .37 * M_PI / 2));
+    }
+    palette[at].red  = .5 + 255 * c[p->perm % 3];
+    palette[at].green= .5 + 255 * c[(1 + p->perm + (p->perm % 2)) % 3];
+    palette[at].blue = .5 + 255 * c[(2 + p->perm - (p->perm % 2)) % 3];
   }
-  i = spectrum_points * 3;
-  switch (style) {
-    case 0: case 2:
-      memcpy(palette + i, "\0\0\0"      , 3), i+= 3;
-      memcpy(palette + i, "\377\377\377", 3), i+= 3;
-      memcpy(palette + i, "\277\277\277", 3), i+= 3;
-      memcpy(palette + i, "\177\177\177", 3), i+= 3;
-      return;
-    case 1:  memcpy(palette + i, "\335\330\320", 3), i+= 3; break;
-    default: memcpy(palette + i, "\337\337\337", 3), i+= 3;
-  }
-  memcpy(palette + i, "\0\0\0"      , 3), i+= 3;
-  memcpy(palette + i, "\077\077\077", 3), i+= 3;
-  memcpy(palette + i, "\077\077\077", 3), i+= 3;
 }
 
 static const Bytef fixed[] =
@@ -405,7 +430,7 @@
     scale = pow(10., -3. * prefix_num);
   }
   *prefix = "pnum-kMGTPE" + prefix_num + (prefix_num? 4 : 11);
-  *limit = to * scale + .5;
+  *limit = to * scale;
   return step * scale + .5;
 }
 
@@ -427,7 +452,7 @@
   png_bytepp  png_rows = malloc(rows * sizeof(*png_rows));
   png_structp png      = png_create_write_struct(PNG_LIBPNG_VER_STRING, 0, 0,0);
   png_infop   png_info = png_create_info_struct(png);
-  png_byte    palette[palette_size * 3];
+  png_color   palette[256];
   int         i, j, step, tick_len = 2 + do_frame;
   char        text[200], * prefix;
   double      limit;
@@ -443,10 +468,10 @@
   sox_debug("signal-max=%g", p->max);
   font = malloc(font_len);
   assert(uncompress(font, &font_len, fixed, sizeof(fixed)-1) == Z_OK);
-  make_palette(palette, p->style);
+  make_palette(p, palette);
   memset(pixels, Background, cols * rows * sizeof(*pixels));
   png_init_io(png, file);
-  png_set_PLTE(png, png_info, (png_colorp)palette, palette_size);
+  png_set_PLTE(png, png_info, palette, fixed_palette + p->spectrum_points);
   png_set_IHDR(png, png_info, (size_t)cols, (size_t)rows, 8,
       PNG_COLOR_TYPE_PALETTE, PNG_INTERLACE_NONE,
       PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
@@ -461,7 +486,7 @@
   /* Spectrogram */
   for (j = 0; j < p->rows; ++j) {
     for (i = 0; i < p->cols; ++i)
-      pixel(left + i, below + j) = colour(p->dBfs[i*p->rows + j], p->dB_range);
+      pixel(left + i, below + j) = colour(p, p->dBfs[i*p->rows + j]);
     if (do_frame)                                      /* Frame verticals */
       pixel(left - 1, below + j) = pixel(left + p->cols,below + j) = Grid;
   }
@@ -503,7 +528,7 @@
   /* Z-axis */
   print_at(cols - right - 2 - font_X, below - 13, Text, "dBFS");/* Axis label */
   for (j = 0; j < p->rows; ++j) {                      /* Spectrum */
-    png_byte b = colour(p->dB_range * (j / (p->rows - 1.) - 1), p->dB_range);
+    png_byte b = colour(p, p->dB_range * (j / (p->rows - 1.) - 1));
     for (i = 0; i < spectrum_width; ++i)
       pixel(cols - right - 1 - i, below + j) = b;
   }
@@ -528,18 +553,21 @@
 {
   static sox_effect_handler_t handler = {
     "spectrogram", "[options]\n"
-      "  -x num    X-axis pixels/second, default 100\n"
-      "  -y num    Y-axis resolution (1 - 4), default 2\n"
-      "  -z num    Z-axis (colour) range in dB, default 120\n"
-      "  -g num    Apply num dB gain; to shift Z-axis\n"
-      "  -w name   Window: Hann (default), Hamming, Bartlett, Rectangular, Kaiser\n"
-      "  -s        Slack overlap\n"
-      "  -a        Suppress axes' lines\n"
-      "  -t text   Title text\n"
-      "  -c text   Comment text\n"
-      "  -l        Light background\n"
-      "  -m        Monochrome display\n"
-      "  -o text   output file name, default `spectrogram.png'\n",
+      "\t-x num\tX-axis pixels/second, default 100\n"
+      "\t-y num\tY-axis resolution (1 - 4), default 2\n"
+      "\t-z num\tZ-axis range in dB, default 120\n"
+      "\t-Z num\tZ-axis maximum in dBFS, default 0\n"
+      "\t-q num\tZ-axis quantisation, default 249\n"
+      "\t-w name\tWindow: Hann (default), Hamming, Bartlett, Rectangular, Kaiser\n"
+      "\t-s\tSlack overlap\n"
+      "\t-a\tSuppress axis lines\n"
+      "\t-l\tLight background\n"
+      "\t-m\tMonochrome\n"
+      "\t-h\tHigh colour\n"
+      "\t-p num\tPermute colours\n"
+      "\t-t text\tTitle text\n"
+      "\t-c text\tComment text\n"
+      "\t-o text\toutput file name, default `spectrogram.png'\n",
     0, getopts, start, flow, drain, stop, NULL, sizeof(priv_t)};
   return &handler;
 }