ref: fff250a95553eda37d81f3d80ef3db2adab33de0
dir: /src/mp3.c/
/* MP3 support for SoX * * Uses libmad for MP3 decoding * and libmp3lame for MP3 encoding * * Written by Fabrizio Gennari <fabrizio.ge@tiscali.it> * * The decoding part is based on the decoder-tutorial program madlld * written by Bertrand Petit <madlld@phoe.fmug.org>, */ #include "sox_i.h" #include <string.h> #ifdef HAVE_MAD_H #include <mad.h> #endif #ifdef HAVE_LAME_LAME_H #include <lame/lame.h> #endif #if HAVE_ID3TAG && HAVE_UNISTD_H #include <id3tag.h> #include <unistd.h> #else #define ID3_TAG_FLAG_FOOTERPRESENT 0x10 #endif #if defined HAVE_LIBLTDL #include <ltdl.h> #if defined DL_MAD mad_timer_t const mad_timer_zero; #endif #endif #define INPUT_BUFFER_SIZE (sox_globals.bufsiz) /* Private data */ typedef struct { #ifdef HAVE_MAD_H struct mad_stream Stream; struct mad_frame Frame; struct mad_synth Synth; mad_timer_t Timer; unsigned char *InputBuffer; ptrdiff_t cursamp; size_t FrameCount; void (*mad_stream_buffer)(struct mad_stream *, unsigned char const *, unsigned long); void (*mad_stream_skip)(struct mad_stream *, unsigned long); int (*mad_stream_sync)(struct mad_stream *); void (*mad_stream_init)(struct mad_stream *); void (*mad_frame_init)(struct mad_frame *); void (*mad_synth_init)(struct mad_synth *); int (*mad_frame_decode)(struct mad_frame *, struct mad_stream *); void (*mad_timer_add)(mad_timer_t *, mad_timer_t); void (*mad_synth_frame)(struct mad_synth *, struct mad_frame const *); char const *(*mad_stream_errorstr)(struct mad_stream const *); void (*mad_frame_finish)(struct mad_frame *); void (*mad_stream_finish)(struct mad_stream *); unsigned long (*mad_bit_read)(struct mad_bitptr *, unsigned int); int (*mad_header_decode)(struct mad_header *, struct mad_stream *); void (*mad_header_init)(struct mad_header *); signed long (*mad_timer_count)(mad_timer_t, enum mad_units); void (*mad_timer_multiply)(mad_timer_t *, signed long); #if defined HAVE_LIBLTDL && defined DL_MAD lt_dlhandle mad_lth; #endif #endif /*HAVE_MAD_H*/ #ifdef HAVE_LAME_LAME_H lame_global_flags *gfp; lame_global_flags * (*lame_init)(void); int (*lame_set_num_channels)(lame_global_flags *, int); int (*lame_get_num_channels)(const lame_global_flags *); int (*lame_set_in_samplerate)(lame_global_flags *, int); int (*lame_set_bWriteVbrTag)(lame_global_flags *, int); int (*lame_init_params)(lame_global_flags *); int (*lame_set_errorf)(lame_global_flags *, void (*func)(const char *, va_list)); int (*lame_set_debugf)(lame_global_flags *, void (*func)(const char *, va_list)); int (*lame_set_msgf)(lame_global_flags *, void (*func)(const char *, va_list)); int (*lame_encode_buffer)(lame_global_flags *, const short int[], const short int[], const int, unsigned char *, const int); int (*lame_encode_flush)(lame_global_flags *, unsigned char *, int); int (*lame_close)(lame_global_flags *); #if defined HAVE_LIBLTDL && defined DL_LAME lt_dlhandle lame_lth; #endif #endif /*HAVE_LAME_LAME_H*/ } priv_t; #ifdef HAVE_MAD_H /* This function merges the functions tagtype() and id3_tag_query() from MAD's libid3tag, so we don't have to link to it Returns 0 if the frame is not an ID3 tag, tag length if it is */ static int tagtype(const unsigned char *data, size_t length) { if (length >= 3 && data[0] == 'T' && data[1] == 'A' && data[2] == 'G') { return 128; /* ID3V1 */ } if (length >= 10 && (data[0] == 'I' && data[1] == 'D' && data[2] == '3') && data[3] < 0xff && data[4] < 0xff && data[6] < 0x80 && data[7] < 0x80 && data[8] < 0x80 && data[9] < 0x80) { /* ID3V2 */ unsigned char flags; unsigned int size; flags = data[5]; size = 10 + (data[6]<<21) + (data[7]<<14) + (data[8]<<7) + data[9]; if (flags & ID3_TAG_FLAG_FOOTERPRESENT) size += 10; for (; size < length && !data[size]; ++size); /* Consume padding */ return size; } return 0; } #include "mp3-duration.h" /* * (Re)fill the stream buffer that is to be decoded. If any data * still exists in the buffer then they are first shifted to be * front of the stream buffer. */ static int sox_mp3_input(sox_format_t * ft) { priv_t *p = (priv_t *) ft->priv; size_t bytes_read; size_t remaining; remaining = p->Stream.bufend - p->Stream.next_frame; /* libmad does not consume all the buffer it's given. Some * data, part of a truncated frame, is left unused at the * end of the buffer. That data must be put back at the * beginning of the buffer and taken in account for * refilling the buffer. This means that the input buffer * must be large enough to hold a complete frame at the * highest observable bit-rate (currently 448 kb/s). * TODO: Is 2016 bytes the size of the largest frame? * (448000*(1152/32000))/8 */ memmove(p->InputBuffer, p->Stream.next_frame, remaining); bytes_read = lsx_readbuf(ft, p->InputBuffer+remaining, INPUT_BUFFER_SIZE-remaining); if (bytes_read == 0) { return SOX_EOF; } p->mad_stream_buffer(&p->Stream, p->InputBuffer, bytes_read+remaining); p->Stream.error = 0; return SOX_SUCCESS; } /* Attempts to read an ID3 tag at the current location in stream and * consume it all. Returns SOX_EOF if no tag is found. Its up to * caller to recover. * */ static int sox_mp3_inputtag(sox_format_t * ft) { priv_t *p = (priv_t *) ft->priv; int rc = SOX_EOF; size_t remaining; size_t tagsize; /* FIXME: This needs some more work if we are to ever * look at the ID3 frame. This is because the Stream * may not be able to hold the complete ID3 frame. * We should consume the whole frame inside tagtype() * instead of outside of tagframe(). That would support * recovering when Stream contains less then 8-bytes (header) * and also when ID3v2 is bigger then Stream buffer size. * Need to pass in stream so that buffer can be * consumed as well as letting additional data to be * read in. */ remaining = p->Stream.bufend - p->Stream.next_frame; if ((tagsize = tagtype(p->Stream.this_frame, remaining))) { p->mad_stream_skip(&p->Stream, tagsize); rc = SOX_SUCCESS; } /* We know that a valid frame hasn't been found yet * so help libmad out and go back into frame seek mode. * This is true whether an ID3 tag was found or not. */ p->mad_stream_sync(&p->Stream); return rc; } static int startread(sox_format_t * ft) { priv_t *p = (priv_t *) ft->priv; size_t ReadSize; sox_bool ignore_length = ft->signal.length == SOX_IGNORE_LENGTH; #if defined HAVE_LIBLTDL && defined DL_MAD #define DL_LIB_NAME "MAD decoder library (libmad" #define LOAD_FN_PTR(x) \ if (!(ltptr.ptr = lt_dlsym(p->mad_lth, #x))) { \ lsx_fail("incompatible " DL_LIB_NAME " is missing "#x")"); \ return SOX_EOF; \ } \ p->x = ltptr.fn; union {void (* fn)(); lt_ptr ptr;} ltptr; if (!lt_dlinit()) p->mad_lth = lt_dlopenext("libmad"); if (!p->mad_lth) { lsx_fail("could not find " DL_LIB_NAME ")"); return SOX_EOF; } #else #define DL_LIB_NAME #define LOAD_FN_PTR(x) p->x = x; #endif LOAD_FN_PTR(mad_bit_read) LOAD_FN_PTR(mad_frame_decode) LOAD_FN_PTR(mad_frame_finish) LOAD_FN_PTR(mad_frame_init) LOAD_FN_PTR(mad_header_decode) LOAD_FN_PTR(mad_header_init) LOAD_FN_PTR(mad_stream_buffer) LOAD_FN_PTR(mad_stream_errorstr) LOAD_FN_PTR(mad_stream_finish) LOAD_FN_PTR(mad_stream_init) LOAD_FN_PTR(mad_stream_skip) LOAD_FN_PTR(mad_stream_sync) LOAD_FN_PTR(mad_synth_frame) LOAD_FN_PTR(mad_synth_init) LOAD_FN_PTR(mad_timer_add) LOAD_FN_PTR(mad_timer_count) LOAD_FN_PTR(mad_timer_multiply) #undef LOAD_FN_PTR #undef DL_LIB_NAME p->InputBuffer = NULL; p->InputBuffer=lsx_malloc(INPUT_BUFFER_SIZE); ft->signal.length = SOX_UNSPEC; if (ft->seekable) { #if HAVE_ID3TAG && HAVE_UNISTD_H read_comments(ft); rewind(ft->fp); if (!ft->signal.length) #endif if (!ignore_length) ft->signal.length = mp3_duration_ms(ft, p->InputBuffer); } p->mad_stream_init(&p->Stream); p->mad_frame_init(&p->Frame); p->mad_synth_init(&p->Synth); mad_timer_reset(&p->Timer); ft->encoding.encoding = SOX_ENCODING_MP3; /* Decode at least one valid frame to find out the input * format. The decoded frame will be saved off so that it * can be processed later. */ ReadSize = lsx_readbuf(ft, p->InputBuffer, INPUT_BUFFER_SIZE); if (ReadSize != INPUT_BUFFER_SIZE && ferror(ft->fp)) return SOX_EOF; p->mad_stream_buffer(&p->Stream, p->InputBuffer, ReadSize); /* Find a valid frame before starting up. This makes sure * that we have a valid MP3 and also skips past ID3v2 tags * at the beginning of the audio file. */ p->Stream.error = 0; while (p->mad_frame_decode(&p->Frame,&p->Stream)) { /* check whether input buffer needs a refill */ if (p->Stream.error == MAD_ERROR_BUFLEN) { if (sox_mp3_input(ft) == SOX_EOF) return SOX_EOF; continue; } /* Consume any ID3 tags */ sox_mp3_inputtag(ft); /* FIXME: We should probably detect when we've read * a bunch of non-ID3 data and still haven't found a * frame. In that case we can abort early without * scanning the whole file. */ p->Stream.error = 0; } if (p->Stream.error) { lsx_fail_errno(ft,SOX_EOF,"No valid MP3 frame found"); return SOX_EOF; } switch(p->Frame.header.mode) { case MAD_MODE_SINGLE_CHANNEL: case MAD_MODE_DUAL_CHANNEL: case MAD_MODE_JOINT_STEREO: case MAD_MODE_STEREO: ft->signal.channels = MAD_NCHANNELS(&p->Frame.header); break; default: lsx_fail_errno(ft, SOX_EFMT, "Cannot determine number of channels"); return SOX_EOF; } p->FrameCount=1; p->mad_timer_add(&p->Timer,p->Frame.header.duration); p->mad_synth_frame(&p->Synth,&p->Frame); ft->signal.rate=p->Synth.pcm.samplerate; if (ignore_length) ft->signal.length = SOX_UNSPEC; else { ft->signal.length = ft->signal.length * .001 * ft->signal.rate + .5; ft->signal.length *= ft->signal.channels; /* Keep separate from line above! */ } p->cursamp = 0; return SOX_SUCCESS; } /* * Read up to len samples from p->Synth * If needed, read some more MP3 data, decode them and synth them * Place in buf[]. * Return number of samples read. */ static size_t sox_mp3read(sox_format_t * ft, sox_sample_t *buf, size_t len) { priv_t *p = (priv_t *) ft->priv; size_t donow,i,done=0; mad_fixed_t sample; size_t chan; do { size_t x = (p->Synth.pcm.length - p->cursamp)*ft->signal.channels; donow=min(len, x); i=0; while(i<donow){ for(chan=0;chan<ft->signal.channels;chan++){ sample=p->Synth.pcm.samples[chan][p->cursamp]; if (sample < -MAD_F_ONE) sample=-MAD_F_ONE; else if (sample >= MAD_F_ONE) sample=MAD_F_ONE-1; *buf++=(sox_sample_t)(sample<<(32-1-MAD_F_FRACBITS)); i++; } p->cursamp++; }; len-=donow; done+=donow; if (len==0) break; /* check whether input buffer needs a refill */ if (p->Stream.error == MAD_ERROR_BUFLEN) { if (sox_mp3_input(ft) == SOX_EOF) { lsx_debug("sox_mp3_input EOF"); break; } } if (p->mad_frame_decode(&p->Frame,&p->Stream)) { if(MAD_RECOVERABLE(p->Stream.error)) { sox_mp3_inputtag(ft); continue; } else { if (p->Stream.error == MAD_ERROR_BUFLEN) continue; else { lsx_report("unrecoverable frame level error (%s).", p->mad_stream_errorstr(&p->Stream)); break; } } } p->FrameCount++; p->mad_timer_add(&p->Timer,p->Frame.header.duration); p->mad_synth_frame(&p->Synth,&p->Frame); p->cursamp=0; } while(1); return done; } static int stopread(sox_format_t * ft) { priv_t *p=(priv_t*) ft->priv; mad_synth_finish(&p->Synth); p->mad_frame_finish(&p->Frame); p->mad_stream_finish(&p->Stream); free(p->InputBuffer); #if defined HAVE_LIBLTDL && defined DL_MAD if (!lt_dlclose(p->mad_lth)) lt_dlexit(); #endif return SOX_SUCCESS; } #else /*HAVE_MAD_H*/ static int startread(sox_format_t * ft) { lsx_fail_errno(ft,SOX_EOF,"SoX was compiled without MP3 decoding support"); return SOX_EOF; } #define sox_mp3read NULL #define stopread NULL #endif /*HAVE_MAD_H*/ #ifdef HAVE_LAME_LAME_H static void null_error_func(const char* string UNUSED, va_list va UNUSED) { return; } static int startwrite(sox_format_t * ft) { priv_t *p = (priv_t *) ft->priv; #if defined HAVE_LIBLTDL && defined DL_LAME #define DL_LIB_NAME "LAME encoder library (libmp3lame" #define LOAD_FN_PTR(x) \ if (!(ltptr.ptr = lt_dlsym(p->lame_lth, #x))) { \ lsx_fail("incompatible " DL_LIB_NAME " is missing "#x")"); \ return SOX_EOF; \ } \ p->x = ltptr.fn; union {int (* fn)(); lt_ptr ptr;} ltptr; if (!lt_dlinit()) p->lame_lth = lt_dlopenext("libmp3lame"); if (!p->lame_lth) { lsx_fail("could not find " DL_LIB_NAME ")"); return SOX_EOF; } #else #define DL_LIB_NAME #define LOAD_FN_PTR(x) p->x = x; #endif LOAD_FN_PTR(lame_init) LOAD_FN_PTR(lame_set_num_channels) LOAD_FN_PTR(lame_get_num_channels) LOAD_FN_PTR(lame_set_in_samplerate) LOAD_FN_PTR(lame_set_bWriteVbrTag) LOAD_FN_PTR(lame_init_params) LOAD_FN_PTR(lame_set_errorf) LOAD_FN_PTR(lame_set_debugf) LOAD_FN_PTR(lame_set_msgf) LOAD_FN_PTR(lame_encode_buffer) LOAD_FN_PTR(lame_encode_flush) LOAD_FN_PTR(lame_close) #undef LOAD_FN_PTR #undef DL_LIB_NAME if (ft->encoding.encoding != SOX_ENCODING_MP3) { if(ft->encoding.encoding != SOX_ENCODING_UNKNOWN) lsx_report("Encoding forced to MP3"); ft->encoding.encoding = SOX_ENCODING_MP3; } p->gfp = p->lame_init(); if (p->gfp == NULL){ lsx_fail_errno(ft,SOX_EOF,"Initialization of LAME library failed"); return(SOX_EOF); } if (ft->signal.channels != SOX_ENCODING_UNKNOWN) { if ( (p->lame_set_num_channels(p->gfp,(int)ft->signal.channels)) < 0) { lsx_fail_errno(ft,SOX_EOF,"Unsupported number of channels"); return(SOX_EOF); } } else ft->signal.channels = p->lame_get_num_channels(p->gfp); /* LAME default */ p->lame_set_in_samplerate(p->gfp,(int)ft->signal.rate); p->lame_set_bWriteVbrTag(p->gfp, 0); /* disable writing VBR tag */ /* The bitrate, mode, quality and other settings are the default ones, since SoX's command line options do not allow to set them */ /* FIXME: Someone who knows about lame could implement adjustable compression here. E.g. by using the -C value as an index into a table of params or as a compressed bit-rate. */ if (ft->encoding.compression != HUGE_VAL) lsx_warn("-C option not supported for mp3; using default compression rate"); if (p->lame_init_params(p->gfp) < 0){ lsx_fail_errno(ft,SOX_EOF,"LAME initialization failed"); return(SOX_EOF); } p->lame_set_errorf(p->gfp,null_error_func); p->lame_set_debugf(p->gfp,null_error_func); p->lame_set_msgf (p->gfp,null_error_func); return(SOX_SUCCESS); } static size_t sox_mp3write(sox_format_t * ft, const sox_sample_t *buf, size_t samp) { priv_t *p = (priv_t *)ft->priv; unsigned char *mp3buffer; size_t mp3buffer_size; short signed int *buffer_l, *buffer_r = NULL; int nsamples = samp/ft->signal.channels; int i,j; ptrdiff_t done = 0; size_t written; SOX_SAMPLE_LOCALS; /* NOTE: This logic assumes that "short int" is 16-bits * on all platforms. It happens to be for all that I know * about. * * Lame ultimately wants data scaled to 16-bit samples * and assumes for the majority of cases that your passing * in something scaled based on passed in datatype * (16, 32, 64, and float). * * If we used long buffers then this means it expects * different scalling between 32-bit and 64-bit CPU's. * * We might as well scale it ourselfs to 16-bit to allow * lsx_malloc()'ing a smaller buffer and call a consistent * interface. */ buffer_l = lsx_malloc(nsamples * sizeof(short signed int)); if (ft->signal.channels == 2) { /* lame doesn't support iterleaved samples so we must break * them out into seperate buffers. */ buffer_r = lsx_malloc(nsamples* sizeof(short signed int)); j=0; for (i=0; i<nsamples; i++) { buffer_l[i]=SOX_SAMPLE_TO_SIGNED_16BIT(buf[j++], ft->clips); buffer_r[i]=SOX_SAMPLE_TO_SIGNED_16BIT(buf[j++], ft->clips); } } else { j=0; for (i=0; i<nsamples; i++) buffer_l[i]=SOX_SAMPLE_TO_SIGNED_16BIT(buf[j++], ft->clips); } mp3buffer_size = 1.25 * nsamples + 7200; mp3buffer = lsx_malloc(mp3buffer_size); if ((written = p->lame_encode_buffer(p->gfp,buffer_l, buffer_r, nsamples, mp3buffer, (int)mp3buffer_size)) > mp3buffer_size){ lsx_fail_errno(ft,SOX_EOF,"Encoding failed"); goto end; } if (lsx_writebuf(ft, mp3buffer, written) < written) { lsx_fail_errno(ft,SOX_EOF,"File write failed"); goto end; } done = nsamples*ft->signal.channels; end: free(mp3buffer); if (ft->signal.channels == 2) free(buffer_r); free(buffer_l); return done; } static int stopwrite(sox_format_t * ft) { priv_t *p = (priv_t *) ft->priv; unsigned char mp3buffer[7200]; int written = p->lame_encode_flush(p->gfp, mp3buffer, (int)sizeof(mp3buffer)); if (written < 0) lsx_fail_errno(ft, SOX_EOF, "Encoding failed"); else if (lsx_writebuf(ft, mp3buffer, (size_t)written) < (size_t)written) lsx_fail_errno(ft, SOX_EOF, "File write failed"); p->lame_close(p->gfp); #if defined HAVE_LIBLTDL && defined DL_LAME if (!lt_dlclose(p->lame_lth)) lt_dlexit(); #endif return SOX_SUCCESS; } #else /* HAVE_LAME_LAME_H */ static int startwrite(sox_format_t * ft UNUSED) { lsx_fail_errno(ft,SOX_EOF,"SoX was compiled without MP3 encoding support"); return SOX_EOF; } #define sox_mp3write NULL #define stopwrite NULL #endif /* HAVE_LAME_LAME_H */ LSX_FORMAT_HANDLER(mp3) { static char const * const names[] = {"mp3", "mp2", "audio/mpeg", NULL}; static unsigned const write_encodings[] = { SOX_ENCODING_MP3, 0, 0}; static sox_format_handler_t const handler = {SOX_LIB_VERSION_CODE, "MPEG Layer 3 lossy audio compression", names, 0, startread, sox_mp3read, stopread, startwrite, sox_mp3write, stopwrite, NULL, write_encodings, NULL, sizeof(priv_t) }; return &handler; }