ref: 01ab3c253d2cf436b5907a3e29f85cc7a219f7df
parent: 51c7787a6371a29e3ea7ecbc54ea4457616aba35
author: Krzysztof Nikiel <knik@users.sourceforge.net>
date: Wed Oct 4 14:04:19 EDT 2017
added SSE quantizer
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,4 @@
+ * SSE quantizer
* modified functioning of ABR mode (-b option)
* improved autotools support
* allowed even higher bitrates, including ADTS
--- a/libfaac/Makefile.am
+++ b/libfaac/Makefile.am
@@ -1,7 +1,7 @@
common_SOURCES = aacquant.c bitstream.c fft.c frame.c midside.c blockswitch.c util.c channels.c filtbank.c huffman.c tns.c quantize.c
common_INCLUDES = aacquant.h channels.h filtbank.h hufftab.h blockswitch.h coder.h frame.h midside.h tns.h bitstream.h fft.h huffman.h util.h quantize.h version.h
common_LIBADD = -lm
-common_CFLAGS = -fvisibility=hidden
+common_CFLAGS = -fvisibility=hidden -msse2
if USE_DRM
lib_LTLIBRARIES = libfaac_drm.la
--- a/libfaac/quantize.c
+++ b/libfaac/quantize.c
@@ -121,6 +121,41 @@
sfac = lrint(log(bandqual[sb] / rmsx) * sfstep);
sfacfix = exp(sfac / sfstep);
+ coderInfo->scale_factor[coderInfo->sfcnt++] = sfac;
+
+#ifdef __GNUC__
+typedef float v4sf __attribute__ ((vector_size (16)));
+typedef int v4si __attribute__ ((vector_size (16)));
+ if (__builtin_cpu_supports("sse2"))
+ {
+ static const v4sf zero = {0, 0, 0, 0};
+ static const v4sf magic = {MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER};
+
+ for (cnt = start; cnt < end; cnt += 4)
+ {
+ float fin[4];
+ fin[0] = xr[cnt];
+ fin[1] = xr[cnt+1];
+ fin[2] = xr[cnt+2];
+ fin[3] = xr[cnt+3];
+
+ v4sf x = __builtin_ia32_loadups(fin);
+ x = __builtin_ia32_maxps(x, __builtin_ia32_subps(zero, x));
+
+ v4sf fix = {sfacfix, sfacfix, sfacfix, sfacfix};
+ x = __builtin_ia32_mulps(x, fix);
+ x = __builtin_ia32_mulps(x , __builtin_ia32_sqrtps(x));
+ x = __builtin_ia32_sqrtps(x);
+
+ x = __builtin_ia32_addps(x, magic);
+ v4si vi = __builtin_ia32_cvttps2dq(x);
+ memcpy(xi+cnt,&vi,16);
+ }
+
+ continue;
+ }
+#endif
+
for (cnt = start; cnt < end; cnt++)
{
double tmp = fabs(xr[cnt]);
@@ -130,8 +165,6 @@
xi[cnt] = (int)(tmp + MAGIC_NUMBER);
}
-
- coderInfo->scale_factor[coderInfo->sfcnt++] = sfac;
}
}