ref: 0daa88772dca7ce53e235e4e988e4bc739442970
parent: 8025dd7861959189fc0abaade8d5be47244034da
author: robs <robs>
date: Mon May 25 11:04:15 EDT 2009
multi-channel support for spectrogram
--- a/ChangeLog
+++ b/ChangeLog
@@ -85,7 +85,9 @@
o New `biquad' filter effect using external coefficients. (robs)
o New `overdrive' effect. (robs)
o New `pluck' and `tpdf' types for `synth'. (robs)
- o New `-X duration' option for `spectrogram'. (robs)
+ o New multi-channel support and revised sizing options for `spectrogram'.
+ N.B. revised options are not directly backwards compatible -- see the
+ man page for details of the new syntax. (robs)
o [2778142] just intonation for `synth'. (robs)
o Can now set common parameters for multiple `synth' channels. (robs)
o Richer gain/normalise options. (robs)
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,7 @@
o Automatically `dither' as needed.
o Improvements to AIFF, WAV, FLAC, MP3 handlers
o ALSA driver now supports 24-bit.
+ o `spectrogram' effect enhancements including multi-channel support.
o `synth' effect enhancements including new `pluck' type.
o More gain/normalise options.
o Now uses CPU multi-core to speed up some effects.
@@ -22,7 +23,7 @@
o Can now auto-detect file-type even when inputing from a pipe.
For the complete list of changes, see the ChangeLog at
- http://sox.cvs.sourceforge.net/sox/sox/ChangeLog?revision=1.225&view=markup
+ http://sox.cvs.sourceforge.net/sox/sox/ChangeLog?revision=1.228&view=markup
Thanks to all who contributed to this release.
--- a/sox.1
+++ b/sox.1
@@ -733,7 +733,7 @@
SoX's global options.
For example:
.EX
- SOX_OPTS="--buffer 20000 --play-rate-arg -hs"
+ SOX_OPTS="--buffer 20000 --play-rate-arg -hs --temp /mnt/temp"
.EE
Note that setting SOX_OPTS can potentially create unwanted changes in
the behaviour of scripts or other programs that invoke SoX. So SOX_OPTS
@@ -2949,9 +2949,8 @@
.SP
This effect supports the \fB\-\-plot\fR global option.
.TP
-\fBsilence \fR[\fB\-l\fR] \fIabove-periods\fR [\fIduration
-threshold\fR[\fBd\fR\^|\^\fB%\fR] [\fIbelow-periods duration
-threshold\fR[\fBd\fR\^|\^\fB%\fR]]
+\fBsilence \fR[\fB\-l\fR] \fIabove-periods\fR [\fIduration threshold\fR[\fBd\fR\^|\^\fB%\fR]
+[\fIbelow-periods duration threshold\fR[\fBd\fR\^|\^\fB%\fR]]
.SP
Removes silence from the beginning, middle, or end of the audio.
Silence is anything below a specified threshold.
@@ -3087,31 +3086,81 @@
.B tempo
effects.
.TP
-\fBspectrogram \fR[options]
-Create a spectrogram of the audio. This effect is optional; type \fBsox
-\-\-help\fR and check the list of supported effects to see if it has
-been included.
+\fBspectrogram \fR[\fIoptions\fR]
+Create a spectrogram of the audio; the audio is passed unmodified
+through the SoX processing chain. This effect is optional\*mtype
+\fBsox \-\-help\fR and check the list of supported effects to see if
+it has been included.
.SP
The spectrogram is rendered in a Portable Network Graphic (PNG) file,
-and shows time in the X-axis, frequency in the Y-axis, and audio signal
-magnitude in the Z-axis. Z-axis values are represented by the colour
-(or intensity) of the pixels in the X-Y plane.
+and shows time in the X-axis, frequency in the Y-axis, and audio
+signal magnitude in the Z-axis. Z-axis values are represented by the
+colour (or optionally the intensity) of the pixels in the X-Y plane.
+If the audio signal contains multiple channels then these are shown
+from top to bottom starting from channel 1 (which is the left channel
+for stereo audio).
.SP
-This effect supports only one channel.
-For multi-channel input files, use the
-.B remix
-effect as either
-.B remix \-
-to obtain a spectrogram on the mix-down,
-or
-.B remix
-.I n
-to select a particular channel; be aware though, that both of
-these methods affect the audio in the effects chain.
+For example, if `my.wav' is a stereo file, then with
+.EX
+ sox my.wav -n spectrogram
+.EE
+a spectrogram of the entire file will be created in the file
+`spectrogram.png'. More often though, analysis of a smaller portion
+of the audio is required; e.g. with
+.EX
+ sox my.wav -n remix 2 trim 20 30 spectrogram
+.EE
+the spectrogram shows information only from the second (right)
+channel, and of thirty seconds of audio starting from twenty seconds
+in. To analyse a small portion of the frequency domain, the
+.B rate
+effect may be used, e.g.
+.EX
+ sox my.wav -n rate 6k spectrogram
+.EE
+allows detailed analysis of frequencies up to 3kHz (half the sampling
+rate) i.e. where the human auditory system is most sensitive.
+With
+.EX
+ sox my.wav -n trim 0 10 spectrogram -x 600 -y 200 -z 100
+.EE
+the given options control the size of the spectrogram's X, Y & Z axes
+(in this case, the spectrogram area of the produced image will be 600
+by 200 pixels in size and the Z-axis range will be 100 dB). Note that
+the produced image includes axes legends etc. and so will be a little
+larger than the specified spectrogram size. In this example:
+.EX
+ sox -n -n synth 6 tri 10k:14k spectrogram -z 100 -w kaiser
+.EE
+an analysis `window' with high dynamic range is selected to best
+display the spectrogram of a swept triangular wave. For a smilar
+example, append the following to the `chime' command in the
+description of the
+.B delay
+effect (above):
+.EX
+ rate 2k spectrogram -X 200 -Z -10 -w kaiser
+.EE
+Options are also avaliable to control the appearance (colour-set,
+brightness, contrast, etc.) and filename of the spectrogram; e.g. with
+.EX
+ sox my.wav -n spectrogram -m -l -o print.png
+.EE
+a spectrogram is created suitable for printing on a `black and white'
+printer.
+.SP
+.I Options:
.RS
.IP \fB\-x\ \fInum\fR
-X-axis pixels/second, default 100. This controls the width of the
-spectrogram;
+Change the (maximum) width (X-axis) of the spectrogram from its default
+value of 800 pixels to a given number between 100 and 5000.
+See also \fB\-X\fR and \fB\-d\fR.
+.IP \fB\-X\ \fInum\fR
+X-axis pixels/second; the default is auto-calculated to fit the given
+or known audio duration to the X-axis size, or 100 otherwise. If
+given in conjunction with \fB\-d\fR, this option affects the width of
+the spectrogram; otherwise, it affects the duration of the
+spectrogram.
.I num
can be from 1 (low time resolution) to 5000 (high time resolution)
and need not be an integer. SoX
@@ -3120,61 +3169,26 @@
(viewable when the SoX global option
.B \-V
is in effect).
-.SP
-The maximum width of the spectrogram is 999 pixels; if the audio length
-and the given
-.B \-x
-number are such that this would be exceeded, then the spectrogram (and
-the effects chain) will be truncated. To move the spectrogram to a
-point later in the audio stream, first invoke the
-.B trim
-effect; e.g.
-.EX
- sox audio.ogg -n trim 1:00 spectrogram
-.EE
-starts the spectrogram at 1 minute through the audio.
-.SP
-See also
-.B \-X
-for an alternative way of setting the X-axis resolution.
-.IP \fB\-X\ \fIduration\fR
-Sets the X-axis resolution such that audio with the given
-.I duration
-([[HH:]MM:]SS) fits the maximum X-axis width. For example,
-.EX
- sox my.mp3 -n remix - spectrogram -X 03:24
-.EE
-or, with Bourne shell, PowerShell, etc.,
-.EX
- sox my.mp3 -n remix - spectrogram -X $(soxi -D my.mp3)
-.EE
-.SP
-See also
-.B \-x
-for an alternative way of setting the X-axis resolution.
+See also \fB\-x\fR and \fB\-d\fR.
.IP \fB\-y\ \fInum\fR
-Y-axis resolution (1 \- 4), default 2.
-This controls the height of the spectrogram;
-.I num
-can be from 1 (low frequency resolution) to 4 (high frequency
-resolution). For values greater than 2, the resulting image may be too
-tall to display on the screen; if so, a graphic manipulation package
-(such as
-.BR ImageMagick (1))
-can be used to re-size the image.
-.SP
-To increase the frequency resolution without increasing the height of
-the spectrogram, the
-.B rate
-effect may be invoked to reduce the sampling rate of the signal before
-invoking
-.BR spectrogram ;
-e.g.
-.EX
- sox audio.ogg -n rate 4k spectrogram
-.EE
-allows detailed analysis of frequencies up to 2kHz (half the sampling
-rate).
+Sets the Y-axis size in pixels (per channel); this is the number of
+frequency `bins' used in the Fourier analysis that produces the
+spectrogram. N.B. it can be slow to produce the spectrogram if this
+number is not one more than a power of two (e.g. 129). By default the
+Y-axis size is chosen automatically (depending on the number of
+channels). See
+.B \-Y
+for alternative way of setting spectrogram height.
+.IP \fB\-Y\ \fInum\fR
+Sets the target total height of the spectrogram(s). The default value
+is 550 pixels. Using this option (and by default), SoX will choose a
+height for individual spectrogram channels that is one more than a
+power of two, so the actual total height may fall short of the given
+number. However, there is also a minimum height per channel so if
+there are many channels, the number may be exceeded.
+See
+.B \-y
+for alternative way of setting spectrogram height.
.IP \fB\-z\ \fInum\fR
Z-axis (colour) range in dB, default 120. This sets the dynamic-range
of the spectrogram to be \-\fInum\fR\ dBFS to 0\ dBFS.
@@ -3205,10 +3219,10 @@
frequency resolution (but lower dynamic-range), select a Hamming window;
for higher dynamic-range (but poorer frequency-resolution), select a
Kaiser window. Bartlett and Rectangular windows are also available.
-Selecting a window other than Hann will usually require
-a corresponding
-.B \-z
-setting.
+.IP \fB\-W\ \fInum\fR
+Window adjustment parameter. This can be used to make small
+adjustments to the Kaiser window shape. A positive number (up to
+ten) increases its dynamic range, a negative number decreases it.
.IP \fB\-s\fR
Allow slack overlapping of DFT windows.
This can, in some cases, increase image sharpness and give greater adherence
@@ -3227,7 +3241,7 @@
Permute the colours in a colour or hybrid palette.
The
.I num
-parameter (from 1 to 6) selects the permutation.
+parameter, from 1 (the default) to 6, selects the permutation.
.IP \fB\-l\fR
Creates a `printer friendly' spectrogram with a light background (the
default has a dark background).
@@ -3234,26 +3248,57 @@
.IP \fB\-a\fR
Suppress the display of the axis lines. This is sometimes useful in
helping to discern artefacts at the spectrogram edges.
+.IP \fB\-A\fR
+Selects an alternative, fixed colour-set. This is provided only for
+compatibility with spectrograms produced by another package. It should
+not normally be used as it has some problems, not least, a lack of
+differentiation at the bottom end which results in masking of low-level
+artefacts.
.IP \fB\-t\ \fItext\fR
Set the image title\*mtext to display above the spectrogram.
.IP \fB\-c\ \fItext\fR
-Set the image comment\*mtext to display below and to the left of the
-spectrogram.
+Set (or clear) the image comment\*mtext to display below and to the
+left of the spectrogram.
.IP \fB\-o\ \fItext\fR
Name of the spectrogram output PNG file, default `spectrogram.png'.
.RE
.TP
\
-For example, to see the spectrogram of a swept triangular wave:
+.I Advanced Options:
+.br
+In order to process a smaller section of audio without affecting other
+effects or the output signal (unlike when the
+.B trim
+effect is used), the following options may be used.
+.RS
+.IP \fB\-d\ \fIduration\fR
+This option sets the X-axis resolution such that audio with the given
+.I duration
+([[HH:]MM:]SS) fits the selected (or default) X-axis width. For
+example,
.EX
- sox -n -n synth 6 tri 10k:14k spectrogram -z 100 -w k
+ sox input.mp3 output.wav -n spectrogram -d 1:00 stats
.EE
-Append the following to the `chime' example in the
-.B delay
-effect to see its spectrogram:
+creates a spectrogram showing the first minute of the audio, whilst
+.EE
+the
+.B stats
+effect is applied to the entire audio signal.
+.SP
+See also
+.B \-X
+for an alternative way of setting the X-axis resolution.
+.IP \fB\-S\ \fItime\fR
+Start the spectrogram at the given point in the audio stream. For
+example
.EX
- rate 2k spectrogram -x 200 -Z -15 -w k
+ sox input.aiff output.wav spectrogram -S 1:00
.EE
+creates a spectrogram showing all but the first minute of the audio
+(the output file however, receives the entire audio stream).
+.RE
+.TP
+\
For the ability to perform off-line processing of spectral data, see the
.B stat
effect.
@@ -3991,7 +4036,6 @@
.BR libsox (3)
.br
.BR audacity (1),
-.BR ImageMagick (1),
.BR gnuplot (1),
.BR octave (1),
.BR wget (1)
--- a/src/spectrogram.c
+++ b/src/spectrogram.c
@@ -1,4 +1,4 @@
-/* libSoX effect: Spectrogram (c) 2008 robs@users.sourceforge.net
+/* libSoX effect: Spectrogram (c) 2008-9 robs@users.sourceforge.net
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by
@@ -15,11 +15,6 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-/* TODO
- * o Two-channel support.
- * o Option for a larger font (for use with image down-scaling).
- */
-
#ifdef NDEBUG /* Enable assert always. */
#undef NDEBUG /* Must undef above assert.h or other that might include it. */
#endif
@@ -31,14 +26,8 @@
#include <math.h>
#include <png.h>
-#define malloc lsx_malloc
-#define calloc lsx_calloc
-#define realloc lsx_realloc
-#define FROM_SOX SOX_SAMPLE_TO_FLOAT_64BIT
-#define DFT_BASE_SIZE 512
-#define MAX_DFT_SIZE_SHIFT 3
-#define MAX_DFT_SIZE (DFT_BASE_SIZE << MAX_DFT_SIZE_SHIFT)
-#define MAX_COLS 1000 /* Also max seconds */
+#define MAX_FFT_SIZE 4096
+#define is_p2(x) !(x & (x - 1))
typedef enum {Window_Hann, Window_Hamming, Window_Bartlett, Window_Rectangular, Window_Kaiser} win_type_t;
static lsx_enum_item const window_options[] = {
@@ -51,21 +40,24 @@
typedef struct {
/* Parameters */
- double pixels_per_sec;
- int y_size, dB_range, gain, spectrum_points, perm, max_cols;
+ double pixels_per_sec, duration, start_time, window_adjust;
+ int x_size0, y_size, Y_size, dB_range, gain, spectrum_points, perm;
sox_bool monochrome, light_background, high_colour, slack_overlap, no_axes;
+ sox_bool alt_palette, truncate;
win_type_t win_type;
char const * out_name, * title, * comment;
- /* Work area */
+ /* Shared work area */
+ double * shared, * * shared_ptr;
+
+ /* Per-channel work area */
int WORK; /* Start of work area is marked by this dummy variable. */
+ size_t skip;
int dft_size, step_size, block_steps, block_num, rows, cols, read;
- int end, end_min, last_end;
+ int x_size, end, end_min, last_end;
sox_bool truncated;
- double buf[MAX_DFT_SIZE], dft_buf[MAX_DFT_SIZE], window[MAX_DFT_SIZE];
- double block_norm, max, magnitudes[(MAX_DFT_SIZE>>1) + 1];
- int bit_rev_table[100]; /* For Ooura fft */
- double sin_cos_table[dft_sc_len(MAX_DFT_SIZE)]; /* ditto */
+ double buf[MAX_FFT_SIZE], dft_buf[MAX_FFT_SIZE], window[MAX_FFT_SIZE];
+ double block_norm, max, magnitudes[(MAX_FFT_SIZE>>1) + 1];
float * dBfs;
} priv_t;
@@ -72,48 +64,80 @@
#define secs(cols) \
((double)(cols) * p->step_size * p->block_steps / effp->in_signal.rate)
+static unsigned char const alt_palette[] =
+ "\0\0\0\0\0\3\0\1\5\0\1\10\0\1\12\0\1\13\0\1\16\1\2\20\1\2\22\1\2\25\1\2\26"
+ "\1\2\30\1\3\33\1\3\35\1\3\37\1\3\40\1\3\"\1\3$\1\3%\1\3'\1\3(\1\3*\1\3,\1"
+ "\3.\1\3/\1\3""0\1\3""2\1\3""4\2\3""6\4\3""8\5\3""9\7\3;\11\3=\13\3?\16\3"
+ "A\17\2B\21\2D\23\2F\25\2H\27\2J\30\2K\32\2M\35\2O\40\2Q$\2S(\2U+\2W0\2Z3"
+ "\2\\7\2_;\2a>\2cB\2eE\2hI\2jM\2lQ\2nU\2pZ\2r_\2tc\2uh\2vl\2xp\3zu\3|z\3}"
+ "~\3~\203\3\200\207\3\202\214\3\204\220\3\205\223\3\203\226\3\200\230\3~\233"
+ "\3|\236\3z\240\3x\243\3u\246\3s\251\3q\253\3o\256\3m\261\3j\263\3h\266\3"
+ "f\272\3b\274\3^\300\3Z\303\3V\307\3R\312\3N\315\3J\321\3F\324\3C\327\3>\333"
+ "\3:\336\3""6\342\3""2\344\3/\346\7-\350\15,\352\21+\354\27*\355\33)\356\40"
+ "(\360&'\362*&\364/$\3654#\3669#\370>!\372C\40\374I\40\374O\"\374V&\374]*"
+ "\374d,\374k0\374r3\374z7\375\201;\375\210>\375\217B\375\226E\375\236I\375"
+ "\245M\375\254P\375\261T\375\267X\375\274\\\375\301a\375\306e\375\313i\375"
+ "\320m\376\325q\376\332v\376\337z\376\344~\376\351\202\376\356\206\376\363"
+ "\213\375\365\217\374\366\223\373\367\230\372\367\234\371\370\241\370\371"
+ "\245\367\371\252\366\372\256\365\372\263\364\373\267\363\374\274\361\375"
+ "\300\360\375\305\360\376\311\357\376\314\357\376\317\360\376\321\360\376"
+ "\324\360\376\326\360\376\330\360\376\332\361\377\335\361\377\337\361\377"
+ "\341\361\377\344\361\377\346\362\377\350\362\377\353";
+#define alt_palette_len ((array_length(alt_palette) - 1) / 3)
+
static int getopts(sox_effect_t * effp, int argc, char **argv)
{
priv_t * p = (priv_t *)effp->priv;
- size_t duration_1e5;
+ size_t duration;
char const * next;
int c;
- assert(array_length(p->bit_rev_table) >= (size_t)dft_br_len(MAX_DFT_SIZE));
-
- p->pixels_per_sec = 100, p->y_size = 2, p->dB_range = 120;/* non-0 defaults */
- p->spectrum_points = 249, p->perm = 1, p->max_cols = MAX_COLS;
+ p->dB_range = 120, p->spectrum_points = 249, p->perm = 1; /* Non-0 defaults */
p->out_name = "spectrogram.png", p->comment = "Created by SoX";
- while ((c = lsx_getopt(argc, argv, "+M:x:X:y:z:Z:q:p:w:st:c:amlho:")) != -1) switch (c) {
- GETOPT_NUMERIC('M', max_cols , 100, 2000)
- GETOPT_NUMERIC('x', pixels_per_sec, 1 , 5000)
- GETOPT_NUMERIC('y', y_size , 1 , 1 + MAX_DFT_SIZE_SHIFT)
+ while ((c = lsx_getopt(argc, argv, "+S:d:x:X:y:Y:z:Z:q:p:W:w:st:c:AamlhTo:")) != -1) switch (c) {
+ GETOPT_NUMERIC('x', x_size0 , 100, 5000)
+ GETOPT_NUMERIC('X', pixels_per_sec, 1 , 5000)
+ GETOPT_NUMERIC('y', y_size , 64 , 1200)
+ GETOPT_NUMERIC('Y', Y_size , 130, MAX_FFT_SIZE / 2 + 2)
GETOPT_NUMERIC('z', dB_range , 20 , 180)
GETOPT_NUMERIC('Z', gain ,-100, 100)
GETOPT_NUMERIC('q', spectrum_points, 0 , p->spectrum_points)
GETOPT_NUMERIC('p', perm , 1 , 6)
+ GETOPT_NUMERIC('W', window_adjust , -10, 10)
case 'w': p->win_type = lsx_enum_option(c, window_options); break;
- case 's': p->slack_overlap = sox_true; break;
- case 'X':
- next = lsx_parsesamples(1e5, lsx_optarg, &duration_1e5, 't');
- if (next == NULL || *next != '\0' || duration_1e5 > p->max_cols * 1e5
- || duration_1e5 < 1e5 / 5000 * p->max_cols)
- return lsx_usage(effp);
- p->pixels_per_sec = p->max_cols * 1e5 / duration_1e5;
- break;
- case 't': p->title = lsx_optarg; break;
- case 'c': p->comment = lsx_optarg; break;
- case 'a': p->no_axes = sox_true; break;
- case 'm': p->monochrome = sox_true; break;
- case 'l': p->light_background = sox_true; break;
- case 'h': p->high_colour = sox_true; break;
- case 'o': p->out_name = lsx_optarg; break;
+ case 's': p->slack_overlap = sox_true; break;
+ case 'A': p->alt_palette = sox_true; break;
+ case 'a': p->no_axes = sox_true; break;
+ case 'm': p->monochrome = sox_true; break;
+ case 'l': p->light_background = sox_true; break;
+ case 'h': p->high_colour = sox_true; break;
+ case 'T': p->truncate = sox_true; break;
+ case 't': p->title = lsx_optarg; break;
+ case 'c': p->comment = lsx_optarg; break;
+ case 'o': p->out_name = lsx_optarg; break;
+ case 'S': next = lsx_parsesamples(1e5, lsx_optarg, &duration, 't');
+ if (next && !*next) {p->start_time = duration * 1e-5; break;}
+ return lsx_usage(effp);
+ case 'd': next = lsx_parsesamples(1e5, lsx_optarg, &duration, 't');
+ if (next && !*next) {p->duration = duration * 1e-5; break;}
+ return lsx_usage(effp);
default: lsx_fail("invalid option `-%c'", optopt); return lsx_usage(effp);
}
+ if (!!p->x_size0 + !!p->pixels_per_sec + !!p->duration > 2) {
+ lsx_fail("only two of -x, -X, -d may be given");
+ return SOX_EOF;
+ }
+ if (p->y_size && p->Y_size) {
+ lsx_fail("only one of -y, -Y may be given");
+ return SOX_EOF;
+ }
p->gain = -p->gain;
- --p->y_size, --p->perm;
+ --p->perm;
p->spectrum_points += 2;
+ if (p->alt_palette)
+ p->spectrum_points = min(p->spectrum_points, (int)alt_palette_len);
+ p->shared_ptr = &p->shared;
return lsx_optind !=argc || p->win_type == INT_MAX? lsx_usage(effp) : SOX_SUCCESS;
}
@@ -129,7 +153,8 @@
case Window_Hamming: lsx_apply_hamming(w, n); break;
case Window_Bartlett: lsx_apply_bartlett(w, n); break;
case Window_Rectangular: break;
- default: lsx_apply_kaiser(w, n, lsx_kaiser_beta(p->dB_range + 20.));
+ default: lsx_apply_kaiser(w, n, lsx_kaiser_beta(
+ (p->dB_range + p->gain) * (1.1 + p->window_adjust / 50)));
}
for (i = 0; i < p->dft_size; ++i) sum += p->window[i];
for (i = 0; i < p->dft_size; ++i) p->window[i] *= 2 / sum
@@ -137,27 +162,80 @@
return sum;
}
+static double * rdft_init(int n)
+{
+ double * q = lsx_malloc(2 * (n / 2 + 1) * n * sizeof(*q)), * p = q;
+ int i, j;
+ for (j = 0; j <= n / 2; ++j) for (i = 0; i < n; ++i)
+ *p++ = cos(2 * M_PI * j * i / n), *p++ = sin(2 * M_PI * j * i / n);
+ return q;
+}
+
+#define _ re += in[i] * *q++, im += in[i++] * *q++,
+static void rdft_p(double const * q, double const * in, double * out, int n)
+{
+ int i, j;
+ for (j = 0; j <= n / 2; ++j) {
+ double re = 0, im = 0;
+ for (i = 0; i < (n & ~7);) _ _ _ _ _ _ _ _ 0;
+ while (i < n) _ 0;
+ *out++ += re * re + im * im;
+ }
+}
+
static int start(sox_effect_t * effp)
{
priv_t * p = (priv_t *)effp->priv;
- double actual;
+ double actual, duration = p->duration, pixels_per_sec = p->pixels_per_sec;
- if (effp->in_signal.channels != 1) {
- lsx_fail("only 1 channel is supported");
- return SOX_EOF;
- }
memset(&p->WORK, 0, sizeof(*p) - field_offset(priv_t, WORK));
- p->end = p->dft_size = DFT_BASE_SIZE << p->y_size;
+
+ p->skip = p->start_time * effp->in_signal.rate + .5;
+ p->x_size = p->x_size0;
+ while (sox_true) {
+ if (!pixels_per_sec && p->x_size && duration)
+ pixels_per_sec = min(5000, p->x_size / duration);
+ else if (!p->x_size && pixels_per_sec && duration)
+ p->x_size = min(5000, (int)(pixels_per_sec * duration + .5));
+ if (!duration && effp->in_signal.length) {
+ duration = effp->in_signal.length / (effp->in_signal.rate * effp->in_signal.channels);
+ duration -= p->start_time;
+ if (duration <= 0)
+ duration = 1;
+ continue;
+ } else if (!p->x_size) {
+ p->x_size = 800;
+ continue;
+ } else if (!pixels_per_sec) {
+ pixels_per_sec = 100;
+ continue;
+ }
+ break;
+ }
+
+ if (p->y_size) {
+ p->dft_size = 2 * (p->y_size - 1);
+ if (!is_p2(p->dft_size) && !effp->flow)
+ p->shared = rdft_init(p->dft_size);
+ } else {
+ int y = max(32, (p->Y_size? p->Y_size : 550) / effp->in_signal.channels - 2);
+ for (p->dft_size = 128; p->dft_size <= y; p->dft_size <<= 1);
+ }
+ if (is_p2(p->dft_size) && !effp->flow)
+ lsx_safe_rdft(p->dft_size, 1, p->dft_buf);
+ lsx_debug("duration=%g x_size=%i pixels_per_sec=%g dft_size=%i", duration, p->x_size, pixels_per_sec, p->dft_size);
+
+ p->end = p->dft_size;
p->rows = (p->dft_size >> 1) + 1;
actual = make_window(p, p->last_end = 0);
lsx_debug("window_density=%g", actual / p->dft_size);
p->step_size = (p->slack_overlap? sqrt(actual * p->dft_size) : actual) + .5;
- p->block_steps = effp->in_signal.rate / p->pixels_per_sec;
+ p->block_steps = effp->in_signal.rate / pixels_per_sec;
p->step_size = p->block_steps / ceil((double)p->block_steps / p->step_size) +.5;
p->block_steps = floor((double)p->block_steps / p->step_size +.5);
p->block_norm = 1. / p->block_steps;
actual = effp->in_signal.rate / p->step_size / p->block_steps;
- if (actual != p->pixels_per_sec)
+ if (actual != pixels_per_sec)
lsx_report("actual pixels/s = %g", actual);
lsx_debug("step_size=%i block_steps=%i", p->step_size, p->block_steps);
p->max = -p->dB_range;
@@ -170,13 +248,14 @@
priv_t * p = (priv_t *)effp->priv;
int i;
- if (p->cols == p->max_cols) {
- lsx_warn("PNG truncated at %g seconds", secs(p->cols));
+ if (p->cols == p->x_size) {
p->truncated = sox_true;
- return SOX_EOF;
+ if (!effp->flow)
+ lsx_report("PNG truncated at %g seconds", secs(p->cols));
+ return p->truncate? SOX_EOF : SOX_SUCCESS;
}
++p->cols;
- p->dBfs = realloc(p->dBfs, p->cols * p->rows * sizeof(*p->dBfs));
+ p->dBfs = lsx_realloc(p->dBfs, p->cols * p->rows * sizeof(*p->dBfs));
for (i = 0; i < p->rows; ++i) {
double dBfs = 10 * log10(p->magnitudes[i] * p->block_norm);
p->dBfs[(p->cols - 1) * p->rows + i] = dBfs + p->gain;
@@ -192,13 +271,21 @@
size_t * isamp, size_t * osamp)
{
priv_t * p = (priv_t *)effp->priv;
- size_t len = min(*isamp, *osamp);
+ size_t len = *isamp = *osamp = min(*isamp, *osamp);
int i;
memcpy(obuf, ibuf, len * sizeof(*obuf)); /* Pass on audio unaffected */
- *isamp = *osamp = len;
- while (sox_true) {
+ if (p->skip) {
+ if (p->skip >= len) {
+ p->skip -= len;
+ return SOX_SUCCESS;
+ }
+ ibuf += p->skip;
+ len -= p->skip;
+ p->skip = 0;
+ }
+ while (!p->truncated) {
SOX_SAMPLE_LOCALS;
if (p->read == p->step_size) {
memmove(p->buf, p->buf + p->step_size,
@@ -206,7 +293,8 @@
p->read = 0;
}
for (; len && p->read < p->step_size; --len, ++p->read, --p->end)
- p->buf[p->dft_size - p->step_size + p->read] = FROM_SOX(*ibuf++,);
+ p->buf[p->dft_size - p->step_size + p->read] =
+ SOX_SAMPLE_TO_FLOAT_64BIT(*ibuf++,);
if (p->read != p->step_size)
break;
@@ -213,12 +301,14 @@
if ((p->end = max(p->end, p->end_min)) != p->last_end)
make_window(p, p->last_end = p->end);
for (i = 0; i < p->dft_size; ++i) p->dft_buf[i] = p->buf[i] * p->window[i];
- lsx_rdft(p->dft_size, 1, p->dft_buf, p->bit_rev_table, p->sin_cos_table);
- p->magnitudes[0] += sqr(p->dft_buf[0]);
- for (i = 1; i < p->dft_size >> 1; ++i)
- p->magnitudes[i] += sqr(p->dft_buf[2*i]) + sqr(p->dft_buf[2*i+1]);
- p->magnitudes[p->dft_size >> 1] += sqr(p->dft_buf[1]);
-
+ if (is_p2(p->dft_size)) {
+ lsx_rdft(p->dft_size, 1, p->dft_buf, lsx_fft_br, lsx_fft_sc);
+ p->magnitudes[0] += sqr(p->dft_buf[0]);
+ for (i = 1; i < p->dft_size >> 1; ++i)
+ p->magnitudes[i] += sqr(p->dft_buf[2*i]) + sqr(p->dft_buf[2*i+1]);
+ p->magnitudes[p->dft_size >> 1] += sqr(p->dft_buf[1]);
+ }
+ else rdft_p(*p->shared_ptr, p->dft_buf, p->magnitudes, p->dft_size);
if (++p->block_num == p->block_steps && do_column(effp) == SOX_EOF)
return SOX_EOF;
}
@@ -230,8 +320,8 @@
priv_t * p = (priv_t *)effp->priv;
if (!p->truncated) {
- sox_sample_t * ibuf = calloc(p->dft_size, sizeof(*ibuf));
- sox_sample_t * obuf = calloc(p->dft_size, sizeof(*obuf));
+ sox_sample_t * ibuf = lsx_calloc(p->dft_size, sizeof(*ibuf));
+ sox_sample_t * obuf = lsx_calloc(p->dft_size, sizeof(*obuf));
size_t isamp = (p->dft_size - p->step_size) / 2;
int left_over = (isamp + p->read) % p->step_size;
@@ -255,7 +345,7 @@
static unsigned colour(priv_t const * p, double x)
{
- unsigned c = x < -p->dB_range? 0 : x >= 0? p->spectrum_points - 1 :
+ unsigned c = x < -p->dB_range? 0 : x >= 0? p->spectrum_points - 1 :
1 + (1 + x / p->dB_range) * (p->spectrum_points - 2);
return fixed_palette + c;
}
@@ -277,7 +367,7 @@
}
for (i = 0; i < p->spectrum_points; ++i) {
double c[3], x = (double)i / (p->spectrum_points - 1);
- int at = (p->light_background)? p->spectrum_points - 1 - i : i;
+ int at = p->light_background? p->spectrum_points - 1 - i : i;
if (p->monochrome) {
c[2] = c[1] = c[0] = x;
if (p->high_colour) {
@@ -302,6 +392,11 @@
case 4: c[j] = 7 * x - phase_num; break;
case 5: c[j] = 1 - (7 * x - phase_num); break;
}
+ } else if (p->alt_palette) {
+ int n = (double)i / (p->spectrum_points - 1) * (alt_palette_len - 1) + .5;
+ c[0] = alt_palette[3 * n + 0] / 255.;
+ c[1] = alt_palette[3 * n + 1] / 255.;
+ c[2] = alt_palette[3 * n + 2] / 255.;
} else {
if (x < .13) c[0] = 0;
else if (x < .73) c[0] = 1 * sin((x - .13) / .60 * M_PI / 2);
@@ -368,7 +463,7 @@
static int axis(double to, int max_steps, double * limit, char * * prefix)
{
- double scale = 1, step = 1;
+ double scale = 1, step = max(1, 10 * to);
int i, prefix_num = 0;
if (max_steps) {
double try, log_10 = HUGE_VAL, min_step = (to *= 10) / max_steps;
@@ -393,27 +488,26 @@
priv_t * p = (priv_t *) effp->priv;
FILE * file = fopen(p->out_name, "wb");
uLong font_len = 96 * font_y;
- int rows = below + p->rows + 30 + 20 * !!p->title;
+ int chans = effp->in_signal.channels;
+ int c_rows = p->rows * chans + chans - 1;
+ int rows = below + c_rows + 30 + 20 * !!p->title;
int cols = left + p->cols + between + spectrum_width + right;
- png_byte * pixels = malloc(cols * rows * sizeof(*pixels));
- png_bytepp png_rows = malloc(rows * sizeof(*png_rows));
+ png_byte * pixels = lsx_malloc(cols * rows * sizeof(*pixels));
+ png_bytepp png_rows = lsx_malloc(rows * sizeof(*png_rows));
png_structp png = png_create_write_struct(PNG_LIBPNG_VER_STRING, 0, 0,0);
png_infop png_info = png_create_info_struct(png);
png_color palette[256];
- int i, j, step, tick_len = 3 - p->no_axes;
+ int i, j, k, base, step, tick_len = 3 - p->no_axes;
char text[200], * prefix;
double limit;
+ free(p->shared);
if (!file) {
- lsx_fail("failed to create `%s' :(", p->out_name);
- png_destroy_write_struct(&png, &png_info);
- free(png_rows);
- free(pixels);
- free(p->dBfs);
- return SOX_EOF;
+ lsx_fail("failed to create `%s': %s", p->out_name, strerror(errno));
+ goto error;
}
lsx_debug("signal-max=%g", p->max);
- font = malloc(font_len);
+ font = lsx_malloc(font_len);
assert(uncompress(font, &font_len, fixed, sizeof(fixed)-1) == Z_OK);
make_palette(p, palette);
memset(pixels, Background, cols * rows * sizeof(*pixels));
@@ -431,14 +525,18 @@
print_at(1, font_y, Text, p->comment);
/* Spectrogram */
- for (j = 0; j < p->rows; ++j) {
- for (i = 0; i < p->cols; ++i)
- pixel(left + i, below + j) = colour(p, p->dBfs[i*p->rows + j]);
- if (!p->no_axes) /* Y-axis lines */
- pixel(left - 1, below + j) = pixel(left + p->cols,below + j) = Grid;
+ for (k = 0; k < chans; ++k) {
+ priv_t * q = (priv_t *)(effp - effp->flow + k)->priv;
+ base = below + (chans - 1 - k) * (p->rows + 1);
+ for (j = 0; j < p->rows; ++j) {
+ for (i = 0; i < p->cols; ++i)
+ pixel(left + i, base + j) = colour(p, q->dBfs[i*p->rows + j]);
+ if (!p->no_axes) /* Y-axis lines */
+ pixel(left - 1, base + j) = pixel(left + p->cols, base + j) = Grid;
+ }
+ if (!p->no_axes) for (i = -1; i <= p->cols; ++i) /* X-axis lines */
+ pixel(left + i, base - 1) = pixel(left + i, base + p->rows) = Grid;
}
- if (!p->no_axes) for (i = -1; i <= p->cols; ++i) /* X-axis lines */
- pixel(left + i, below - 1) = pixel(left + i, below + p->rows) = Grid;
/* X-axis */
step = axis(secs(p->cols), p->cols / (font_X * 9 / 2), &limit, &prefix);
@@ -447,13 +545,13 @@
for (i = 0; i <= limit; i += step) {
int y, x = limit? (double)i / limit * p->cols + .5 : 0;
for (y = 0; y < tick_len; ++y) /* Ticks */
- pixel(left-1+x, below-1-y) = pixel(left-1+x, below+p->rows+y) = Grid;
+ pixel(left-1+x, below-1-y) = pixel(left-1+x, below+c_rows+y) = Grid;
if (step == 5 && (i%10))
continue;
sprintf(text, "%g", .1 * i); /* Tick labels */
x = left + x - 3 * strlen(text);
print_at(x, below - 6, Labels, text);
- print_at(x, below + p->rows + 14, Labels, text);
+ print_at(x, below + c_rows + 14, Labels, text);
}
/* Y-axis */
@@ -460,66 +558,78 @@
step = axis(effp->in_signal.rate / 2,
(p->rows - 1) / ((font_y * 3 + 1) >> 1), &limit, &prefix);
sprintf(text, "Frequency (%.1sHz)", prefix); /* Axis label */
- print_up(10, below + (p->rows - font_X * (int)strlen(text)) / 2, Text, text);
- for (i = 0; i <= limit; i += step) {
- int x, y = limit? (double)i / limit * (p->rows - 1) + .5 : 0;
- for (x = 0; x < tick_len; ++x) /* Ticks */
- pixel(left-1-x, below+y) = pixel(left+p->cols+x, below+y) = Grid;
- if (step == 5 && (i%10))
- continue;
- sprintf(text, i?"%5g":" DC", .1 * i); /* Tick labels */
- print_at(left - 4 - font_X * 5, below + y + 5, Labels, text);
- sprintf(text, i?"%g":"DC", .1 * i);
- print_at(left + p->cols + 6, below + y + 5, Labels, text);
+ print_up(10, below + (c_rows - font_X * (int)strlen(text)) / 2, Text, text);
+ for (k = 0; k < chans; ++k) {
+ base = below + k * (p->rows + 1);
+ for (i = 0; i <= limit; i += step) {
+ int x, y = limit? (double)i / limit * (p->rows - 1) + .5 : 0;
+ for (x = 0; x < tick_len; ++x) /* Ticks */
+ pixel(left-1-x, base+y) = pixel(left+p->cols+x, base+y) = Grid;
+ if ((step == 5 && (i%10)) || (!i && k && chans > 1))
+ continue;
+ sprintf(text, i?"%5g":" DC", .1 * i); /* Tick labels */
+ print_at(left - 4 - font_X * 5, base + y + 5, Labels, text);
+ sprintf(text, i?"%g":"DC", .1 * i);
+ print_at(left + p->cols + 6, base + y + 5, Labels, text);
+ }
}
/* Z-axis */
- print_at(cols - right - 2 - font_X, below - 13, Text, "dBFS");/* Axis label */
- for (j = 0; j < p->rows; ++j) { /* Spectrum */
- png_byte b = colour(p, p->dB_range * (j / (p->rows - 1.) - 1));
+ k = min(400, c_rows);
+ base = below + (c_rows - k) / 2;
+ print_at(cols - right - 2 - font_X, base - 13, Text, "dBFS");/* Axis label */
+ for (j = 0; j < k; ++j) { /* Spectrum */
+ png_byte b = colour(p, p->dB_range * (j / (k - 1.) - 1));
for (i = 0; i < spectrum_width; ++i)
- pixel(cols - right - 1 - i, below + j) = b;
+ pixel(cols - right - 1 - i, base + j) = b;
}
- for (i = 0; i <= p->dB_range; i += 10) { /* (Tick) labels */
- int y = (double)i / p->dB_range * (p->rows - 1) + .5;
+ step = 10 * ceil(p->dB_range / 10. * (font_y + 2) / (k - 1));
+ for (i = 0; i <= p->dB_range; i += step) { /* (Tick) labels */
+ int y = (double)i / p->dB_range * (k - 1) + .5;
sprintf(text, "%+i", i - p->gain - p->dB_range);
- print_at(cols - right + 1, below + y + 5, Labels, text);
+ print_at(cols - right + 1, base + y + 5, Labels, text);
}
free(font);
png_set_rows(png, png_info, png_rows);
png_write_png(png, png_info, PNG_TRANSFORM_IDENTITY, NULL);
- png_destroy_write_struct(&png, &png_info);
+ fclose(file);
+error: png_destroy_write_struct(&png, &png_info);
free(png_rows);
free(pixels);
- fclose(file);
free(p->dBfs);
return SOX_SUCCESS;
}
+static int end(sox_effect_t * effp) {return effp->flow? SOX_SUCCESS:stop(effp);}
+
sox_effect_handler_t const * lsx_spectrogram_effect_fn(void)
{
- static sox_effect_handler_t handler = {
- "spectrogram", 0, SOX_EFF_MODIFY, getopts, start, flow, drain, stop, 0, sizeof(priv_t)};
+ static sox_effect_handler_t handler = {"spectrogram", 0, SOX_EFF_MODIFY,
+ getopts, start, flow, drain, end, 0, sizeof(priv_t)};
static char const * lines[] = {
"[options]",
- "\t-M num\tMaximum width of spectrogram in pixels, default 1000",
- "\t-x num\tX-axis pixels/second, default 100. -x & -X are alternatives",
- "\t-X time\tAudio duration to fit to X-axis e.g. $(soxi -D file)",
- "\t-y num\tY-axis resolution (1 - 4), default 2",
- "\t-z num\tZ-axis range in dB, default 120",
- "\t-Z num\tZ-axis maximum in dBFS, default 0",
- "\t-q num\tZ-axis quantisation (0 - 249), default 249",
+ "\t-x num\tX-axis size in pixels; default derived or 800",
+ "\t-X num\tX-axis pixels/second; default derived or 100",
+ "\t-y num\tY-axis size in pixels (per channel); slow if not 1 + 2^n",
+ "\t-Y num\tY-height total (i.e. not per channel); default 550",
+ "\t-z num\tZ-axis range in dB; default 120",
+ "\t-Z num\tZ-axis maximum in dBFS; default 0",
+ "\t-q num\tZ-axis quantisation (0 - 249); default 249",
"\t-w name\tWindow: Hann (default), Hamming, Bartlett, Rectangular, Kaiser",
+ "\t-W num\tWindow adjust parameter (-10 - 10); applies only to Kaiser",
"\t-s\tSlack overlap of windows",
"\t-a\tSuppress axis lines",
"\t-l\tLight background",
"\t-m\tMonochrome",
"\t-h\tHigh colour",
- "\t-p num\tPermute colours (1 - 6)",
+ "\t-p num\tPermute colours (1 - 6); default 1",
+ "\t-A\tAlternative, inferior, fixed colour-set (for compatibility only)",
"\t-t text\tTitle text",
"\t-c text\tComment text",
- "\t-o text\tOutput file name, default `spectrogram.png'",
+ "\t-o text\tOutput file name; default `spectrogram.png'",
+ "\t-d time\tAudio duration to fit to X-axis; e.g. 1:00, 48",
+ "\t-S time\tStart the spectrogram at the given time through the input",
};
static char * usage;
handler.usage = lsx_usage_lines(&usage, lines, array_length(lines));
--- a/src/trim.c
+++ b/src/trim.c
@@ -83,6 +83,7 @@
trim->index = 0;
trim->trimmed = 0;
+ effp->out_signal.length = trim->length;
return (SOX_SUCCESS);
}