shithub: sox

Download patch

ref: 4506c187bc03cba118baf720a81494999e18f7a1
parent: 9d30067b235c76179f7303e85c7979d9b34773be
author: robs <robs>
date: Tue Sep 15 14:37:40 EDT 2009

mp3 compression params

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -118,6 +118,9 @@
   set(HAVE_MP3 1)
 endif (HAVE_LAME_LAME_H OR HAVE_MAD_H)
 
+set(CMAKE_REQUIRED_LIBRARIES mp3lame m)
+check_function_exists("lame_set_VBR_quality" HAVE_LAME_SET_VBR_QUALITY)
+
 if (HAVE_SUN_AUDIOIO_H OR HAVE_SYS_AUDIOIO_H)
   set(HAVE_SUN_AUDIO 1)
 endif (HAVE_SUN_AUDIOIO_H OR HAVE_SYS_AUDIOIO_H)
--- a/ChangeLog
+++ b/ChangeLog
@@ -36,6 +36,7 @@
 
   o New Grandstream ring-tone (gsrt) format.  (robs)
   o CVSD encode/decode speed-ups.  (Kimberly Rockwell, P. Chaintreuil)
+  o Add ability to select MP3 compression parameters.  (Jim Harkins)
 
 Audio device drivers:
 
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,7 @@
   o New MS-Windows native audio driver.
   o New Grandstream ringtone file format.
   o New support in libSoX for memory based file IO.
+  o Now able to set MP3 compression parameters.
 
 For the complete list of changes, see the ChangeLog at
   http://sox.cvs.sourceforge.net/sox/sox/ChangeLog?revision=SOX_14_3_1&view=markup
--- a/configure.ac
+++ b/configure.ac
@@ -337,6 +337,10 @@
       AC_DEFINE(DL_LAME, 1, [Define if to dlopen() lame.])
     else
       AC_CHECK_LIB(mp3lame, lame_init, MP3_LIBS="$MP3_LIBS -lmp3lame",using_lame=no)
+      AC_CHECK_LIB(mp3lame, lame_set_VBR_quality)
+      if test "$ac_cv_lib_mp3lame_lame_set_VBR_quality" = yes; then
+        AC_DEFINE(HAVE_LAME_SET_VBR_QUALITY, 1, [Define to 1 if you have lame_set_VBR_quality.])
+      fi
       if test "$with_lame" = "yes" -a "$using_lame" = "no"; then
         AC_MSG_FAILURE([cannot find LAME])
       fi
--- a/soxformat.7
+++ b/soxformat.7
@@ -399,10 +399,43 @@
 Because MP3 is patented, SoX cannot be distributed with MP3 support without
 incurring the patent holder's fees.  Users who require SoX with MP3 support
 must currently compile and build SoX with the MP3 libraries (LAME & MAD)
-from source code.
+from source code, or, in some cases, obtain pre-built dynamically loadable
+libraries.
 .SP
-Currently, SoX does not allow any encoding options to be specified when
-writing MP3 files; files are encoded at 128k constant bit-rate.
+MP3 compression parameters can be selected using SoX's \fB\-C\fR option
+as follows
+(note that the current syntax is subject to change):
+.SP
+The primary parameter to the LAME encoder is the bit rate. If the
+value of the \fB\-C\fR value is a positive integer, it's taken as
+the bitrate in kbps (e.g. if you specify 128, it uses 128 kbps).
+.SP
+The second most important parameter is probably "quality" (really
+performance), which allows balancing encoding speed vs. quality.
+In LAME, 0 specifies highest quality but is very slow, while
+9 selects poor quality, but is fast. (5 is the default and 2 is
+recommend as a good trade-off for high quality encodes.)
+.SP
+Becaues the \fB\-C\fR value is a float, the fractional part is used
+to select quality. 128.2 selects 128 kbps encoding with a quality
+of 2. There is one problem with this approach. We need 128 to specify
+128 kbps encoding with default quality, so 0 means use default. Instead
+of 0 you have to use .01 (or .99) to specify the highest quality
+(128.01 or 128.99).
+.SP
+LAME uses bitrate to specify a constant bitrate, but higher quality
+can be acheived using Variable Bit Rate (VBR). VBR quality (really
+size) is selected using a number from 0 to 9. Use a value of 0 for high
+quality, larger files, and 9 for smaller files of lower quality. 4 is
+the default.
+.SP
+In order to squeeze the selection of VBR into the the \fB\-C\fR value
+float we use negative numbers to select VRR. -4.2 would select default
+VBR encoding (size) with high quality (speed). One special case is 0,
+which is a valid VBR encoding parameter but not a valid bitrate.
+Compression value of 0 is always treated as a high quality vbr, as a
+result both -0.2 and 0.2 are treated as highest quality VBR (size) and
+high quality (speed).
 .SP
 See also
 .B Ogg Vorbis
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -69,7 +69,7 @@
 	rate_filters.h rate_half_fir.h rate_poly_fir0.h rate_poly_fir.h \
 	remix.c repeat.c reverb.c reverse.c silence.c sinc.c skeleff.c speed.c \
 	splice.c stat.c stats.c stretch.c swap.c synth.c tempo.c tremolo.c \
-	trim.c vad.c vol.c
+	trim.c vad.c vol.c mp3-1.h
 if HAVE_PNG
     libsox_la_SOURCES += spectrogram.c
 endif
--- /dev/null
+++ b/src/mp3-1.h
@@ -1,0 +1,30 @@
+/* This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#if defined __GNUC__
+  #pragma GCC system_header
+#elif defined __SUNPRO_CC
+  #pragma disable_warn
+#elif defined _MSC_VER
+  #pragma warning(push, 1)
+#endif
+
+      if (p->lame_set_VBR_quality(p->gfp, floor_compression) < 0)
+
+#if defined __SUNPRO_CC
+  #pragma enable_warn
+#elif defined _MSC_VER
+  #pragma warning(pop)
+#endif
--- a/src/mp3.c
+++ b/src/mp3.c
@@ -79,6 +79,7 @@
   int (*lame_get_num_channels)(const lame_global_flags *);
   int (*lame_set_in_samplerate)(lame_global_flags *, int);
   int (*lame_set_bWriteVbrTag)(lame_global_flags *, int);
+  int (*lame_get_bWriteVbrTag)(lame_global_flags const *);
   int (*lame_init_params)(lame_global_flags *);
   int (*lame_set_errorf)(lame_global_flags *, 
                          void (*func)(const char *, va_list));
@@ -92,6 +93,15 @@
   int (*lame_encode_flush)(lame_global_flags *, unsigned char *,
                            int);
   int (*lame_close)(lame_global_flags *);
+  void (*lame_mp3_tags_fid)(lame_global_flags *, FILE *);
+  int (*lame_get_brate)(const lame_global_flags *);
+  int (*lame_set_brate)(lame_global_flags *, int);
+  int (*lame_set_quality)(lame_global_flags *, int);
+  int (*lame_set_VBR)(lame_global_flags *, vbr_mode);
+  int (*lame_set_VBR_min_bitrate_kbps)(lame_global_flags *, int);
+  int (*lame_set_VBR_quality)(lame_global_flags *, float);
+  vbr_mode (*lame_get_VBR)(const lame_global_flags *);
+
   #if defined HAVE_LIBLTDL && defined DL_LAME
   lt_dlhandle lame_lth;
   #endif
@@ -448,11 +458,34 @@
 #endif /*HAVE_MAD_H*/
 
 #ifdef HAVE_LAME_LAME_H
-static void null_error_func(const char* string UNUSED, va_list va UNUSED)
+
+/* Adapters for lame message callbacks: */
+
+static void errorf(const char* fmt, va_list va)
 {
+  sox_globals.subsystem=__FILE__;
+  if (sox_globals.output_message_handler)
+    (*sox_globals.output_message_handler)(1,sox_globals.subsystem,fmt,va);
   return;
 }
 
+static void debugf(const char* fmt, va_list va)
+{
+  sox_globals.subsystem=__FILE__;
+  if (sox_globals.output_message_handler)
+    (*sox_globals.output_message_handler)(4,sox_globals.subsystem,fmt,va);
+  return;
+}
+
+static void msgf(const char* fmt, va_list va)
+{
+  sox_globals.subsystem=__FILE__;
+  if (sox_globals.output_message_handler)
+    (*sox_globals.output_message_handler)(3,sox_globals.subsystem,fmt,va);
+  return;
+}
+
+
 static int startwrite(sox_format_t * ft)
 {
   priv_t *p = (priv_t *) ft->priv;
@@ -482,6 +515,7 @@
   LOAD_FN_PTR(lame_get_num_channels)
   LOAD_FN_PTR(lame_set_in_samplerate)
   LOAD_FN_PTR(lame_set_bWriteVbrTag)
+  LOAD_FN_PTR(lame_get_bWriteVbrTag)
   LOAD_FN_PTR(lame_init_params)
   LOAD_FN_PTR(lame_set_errorf)
   LOAD_FN_PTR(lame_set_debugf)
@@ -489,6 +523,16 @@
   LOAD_FN_PTR(lame_encode_buffer)
   LOAD_FN_PTR(lame_encode_flush)
   LOAD_FN_PTR(lame_close)
+  LOAD_FN_PTR(lame_mp3_tags_fid)
+  LOAD_FN_PTR(lame_get_brate)
+  LOAD_FN_PTR(lame_set_brate)
+  LOAD_FN_PTR(lame_set_quality)
+  LOAD_FN_PTR(lame_set_VBR)
+  LOAD_FN_PTR(lame_set_VBR_min_bitrate_kbps)
+#if HAVE_LAME_SET_VBR_QUALITY
+  LOAD_FN_PTR(lame_set_VBR_quality)
+#endif
+  LOAD_FN_PTR(lame_get_VBR)
 
 #undef LOAD_FN_PTR
 #undef DL_LIB_NAME
@@ -500,11 +544,17 @@
   }
 
   p->gfp = p->lame_init();
+
   if (p->gfp == NULL){
     lsx_fail_errno(ft,SOX_EOF,"Initialization of LAME library failed");
     return(SOX_EOF);
   }
 
+  /* First set message callbacks so we don't miss any messages: */
+  p->lame_set_errorf(p->gfp,errorf);
+  p->lame_set_debugf(p->gfp,debugf);
+  p->lame_set_msgf  (p->gfp,msgf);
+
   if (ft->signal.channels != SOX_ENCODING_UNKNOWN) {
     if ( (p->lame_set_num_channels(p->gfp,(int)ft->signal.channels)) < 0) {
         lsx_fail_errno(ft,SOX_EOF,"Unsupported number of channels");
@@ -518,21 +568,111 @@
 
   p->lame_set_bWriteVbrTag(p->gfp, 0); /* disable writing VBR tag */
 
-  /* The bitrate, mode, quality and other settings are the default ones,
-     since SoX's command line options do not allow to set them */
+  /* The primary parameter to the LAME encoder is the bit rate. If the
+   * value of encoding.compression is a positive integer, it's taken as
+   * the bitrate in kbps (that is if you specify 128, it use 128 kbps).
+   *
+   * The second most important parameter is probably "quality" (really
+   * performance), which allows balancing encoding speed vs. quality.
+   * In LAME, 0 specifies highest quality but is very slow, while
+   * 9 selects poor quality, but is fast. (5 is the default and 2 is
+   * recommend as a good trade-off for high quality encodes.)
+   *
+   * Becaues encoding.compression is a float, the fractional part is used
+   * to select quality. 128.2 selects 128 kbps encoding with a quality
+   * of 2. There is one problem with this approach. We need 128 to specify
+   * 128 kbps encoding with default quality, so 0 means use default. Instead
+   * of 0 you have to use .01 (or .99) to specify the highest quality
+   * (128.01 or 128.99).
+   *
+   * LAME uses bitrate to specify a constant bitrate, but higher quality
+   * can be acheived using Variable Bit Rate (VBR). VBR quality (really
+   * size) is selected using a number from 0 to 9. Use a value of 0 for high
+   * quality, larger files, and 9 for smaller files of lower quality. 4 is
+   * the default.
+   *
+   * In order to squeeze the selection of VBR into the encoding.compression
+   * float we use negative numbers to select VRR. -4.2 would select default
+   * VBR encoding (size) with high quality (speed). One special case is 0,
+   * which is a valid VBR encoding parameter but not a valid bitrate.
+   * Compression value of 0 is always treated as a high quality vbr, as a
+   * result both -0.2 and 0.2 are treated as highest quality VBR (size) and
+   * high quality (speed).
+   *
+   * Note: It would have been nice to simply use low values, 0-9, to trigger
+   * VBR mode, but 8 kbps is a valid bit rate, so negative values were
+   * used instead.
+  */
 
-  /* FIXME: Someone who knows about lame could implement adjustable compression
-     here.  E.g. by using the -C value as an index into a table of params or
-     as a compressed bit-rate. */
-  if (ft->encoding.compression != HUGE_VAL)
-      lsx_warn("-C option not supported for mp3; using default compression rate");
+  lsx_debug("-C option is %f", ft->encoding.compression);
+
+  if (ft->encoding.compression == HUGE_VAL) {
+    /* Do nothing, use defaults: */
+    lsx_report("using MP3 encoding defaults");
+  } else {
+    double abs_compression = fabs(ft->encoding.compression);
+    double floor_compression = floor(abs_compression);
+    double fraction_compression = abs_compression - floor_compression;
+
+    if (floor(ft->encoding.compression) <= 0) {
+      if (p->lame_get_VBR(p->gfp) == vbr_off)
+        p->lame_set_VBR(p->gfp, vbr_default);
+
+      if (ft->seekable) {
+        p->lame_set_bWriteVbrTag(p->gfp, 1); /* enable writing VBR tag */
+      } else {
+        lsx_warn("unable to write VBR Tag because we can't seek");
+      }
+
+#if HAVE_LAME_SET_VBR_QUALITY
+#include "mp3-1.h"
+      {
+        lsx_fail_errno(ft, SOX_EOF,
+          "lame_set_VBR_quality(%f) failed (should be between 0 and 9)",
+          floor_compression);
+        return(SOX_EOF);
+      }
+      lsx_report("lame_set_VBR_quality(%f)", floor_compression);
+#else
+      /* TODO lsx_warn */
+#endif
+    } else {
+      if (p->lame_set_brate(p->gfp, (int)floor_compression) < 0) {
+        lsx_fail_errno(ft, SOX_EOF,
+          "lame_set_brate(%d) failed", (int)floor_compression);
+        return(SOX_EOF);
+      }
+      p->lame_set_VBR_min_bitrate_kbps(p->gfp, p->lame_get_brate(p->gfp));
+      lsx_report("lame_set_brate(%d)", (int)floor_compression);
+    }
+
+    /* Set Quality */
+
+    if (0.0 == fraction_compression) {
+      /* use default quality value */
+      lsx_report("using MP3 default quality");
+    }
+    else if (fraction_compression <= 0.01 || 0.99 <= fraction_compression) {
+      if (p->lame_set_quality(p->gfp, 0) < 0) {
+        lsx_fail_errno(ft, SOX_EOF, "lame_set_quality(0) failed");
+        return(SOX_EOF);
+      }
+      lsx_report("lame_set_quality(0)");
+    } else {
+      int quality = (int)(0.5 + fraction_compression * 10);
+      if (p->lame_set_quality(p->gfp, quality) < 0) {
+        lsx_fail_errno(ft, SOX_EOF,
+          "lame_set_quality(%d) failed", quality);
+        return(SOX_EOF);
+      }
+      lsx_report("lame_set_quality(%d)", quality);
+    }
+  }
+
   if (p->lame_init_params(p->gfp) < 0){
         lsx_fail_errno(ft,SOX_EOF,"LAME initialization failed");
         return(SOX_EOF);
   }
-  p->lame_set_errorf(p->gfp,null_error_func);
-  p->lame_set_debugf(p->gfp,null_error_func);
-  p->lame_set_msgf  (p->gfp,null_error_func);
 
   return(SOX_SUCCESS);
 }
@@ -624,7 +764,12 @@
   if (written < 0)
     lsx_fail_errno(ft, SOX_EOF, "Encoding failed");
   else if (lsx_writebuf(ft, mp3buffer, (size_t)written) < (size_t)written)
+  {
     lsx_fail_errno(ft, SOX_EOF, "File write failed");
+  }
+  else if (p->lame_get_bWriteVbrTag(p->gfp) && ft->seekable) {
+    p->lame_mp3_tags_fid(p->gfp, ft->fp);
+  }
 
   p->lame_close(p->gfp);
 #if defined HAVE_LIBLTDL && defined DL_LAME
--- a/src/soxconfig.h.cmake
+++ b/src/soxconfig.h.cmake
@@ -19,6 +19,7 @@
 #cmakedefine HAVE_INTTYPES_H          1
 #cmakedefine HAVE_IO_H                1
 #cmakedefine HAVE_LAME_LAME_H         1
+#cmakedefine HAVE_LAME_SET_VBR_QUALITY 1
 #define HAVE_LPC10                    1
 #cmakedefine HAVE_LTDL_H              1
 #cmakedefine HAVE_MACHINE_SOUNDCARD_H 1