shithub: aacenc

--- a/ChangeLog

+++ b/ChangeLog

@@ -1,3 +1,4 @@

+	* SSE quantizer

 	* modified functioning of ABR mode (-b option)

 	* improved autotools support

 	* allowed even higher bitrates, including ADTS

--- a/libfaac/Makefile.am

+++ b/libfaac/Makefile.am

@@ -1,7 +1,7 @@

 common_SOURCES = aacquant.c bitstream.c fft.c frame.c midside.c blockswitch.c util.c channels.c filtbank.c huffman.c tns.c quantize.c

 common_INCLUDES = aacquant.h channels.h filtbank.h hufftab.h blockswitch.h coder.h frame.h midside.h tns.h bitstream.h fft.h huffman.h util.h quantize.h version.h

 common_LIBADD = -lm

-common_CFLAGS = -fvisibility=hidden

+common_CFLAGS = -fvisibility=hidden -msse2

 if USE_DRM

 lib_LTLIBRARIES = libfaac_drm.la

--- a/libfaac/quantize.c

+++ b/libfaac/quantize.c

@@ -121,6 +121,41 @@

       sfac = lrint(log(bandqual[sb] / rmsx) * sfstep);

       sfacfix = exp(sfac / sfstep);

+      coderInfo->scale_factor[coderInfo->sfcnt++] = sfac;

+#ifdef __GNUC__

+typedef float v4sf __attribute__ ((vector_size (16)));

+typedef int v4si __attribute__ ((vector_size (16)));

+      if (__builtin_cpu_supports("sse2"))

+      {

+          static const v4sf zero = {0, 0, 0, 0};

+          static const v4sf magic = {MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER};

+          for (cnt = start; cnt < end; cnt += 4)

+          {

+              float fin[4];

+              fin[0] = xr[cnt];

+              fin[1] = xr[cnt+1];

+              fin[2] = xr[cnt+2];

+              fin[3] = xr[cnt+3];

+              v4sf x = __builtin_ia32_loadups(fin);

+              x = __builtin_ia32_maxps(x, __builtin_ia32_subps(zero, x));

+              v4sf fix = {sfacfix, sfacfix, sfacfix, sfacfix};

+              x = __builtin_ia32_mulps(x, fix);

+              x = __builtin_ia32_mulps(x , __builtin_ia32_sqrtps(x));

+              x = __builtin_ia32_sqrtps(x);

+              x = __builtin_ia32_addps(x, magic);

+              v4si vi = __builtin_ia32_cvttps2dq(x);

+              memcpy(xi+cnt,&vi,16);

+          }

+          continue;

+      }

+#endif

       for (cnt = start; cnt < end; cnt++)

           double tmp = fabs(xr[cnt]);

@@ -130,8 +165,6 @@

           xi[cnt] = (int)(tmp + MAGIC_NUMBER);

-      coderInfo->scale_factor[coderInfo->sfcnt++] = sfac;

--

⑨