shithub: aacdec

Download patch

ref: 0b79b1c3c52c4ad1535bf8429ae5b7c701c4a907
parent: e82315d28bb5b1e866dad32fadc801c14c5cb1b4
author: menno <menno>
date: Wed Feb 20 08:05:57 EST 2002

- Nicer defines in common.h
- 1% speedup in MDCT

--- a/libfaad/common.h
+++ b/libfaad/common.h
@@ -16,7 +16,7 @@
 ** along with this program; if not, write to the Free Software 
 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 **
-** $Id: common.h,v 1.1 2002/02/18 10:01:05 menno Exp $
+** $Id: common.h,v 1.2 2002/02/20 13:05:57 menno Exp $
 **/
 
 #ifndef __COMMON_H__
@@ -52,9 +52,18 @@
 #endif
 
 
-//#define USE_DOUBLE_PRECISION
+/* COMPILE TIME DEFINITIONS */
 
+/* use double precision */
+/* #define USE_DOUBLE_PRECISION */
 
+/* use table lookup twiddle factors in MDCT [more memory, higher speed],
+   otherwise recurrence relations are used [no memory usage, lower speed] */
+#define USE_TWIDDLE_TABLE
+
+/* END COMPILE TIME DEFINITIONS */
+
+
 #if defined(_WIN32)
 
 
@@ -66,16 +75,7 @@
 typedef __int8  int8_t;
 typedef float float32_t;
 
-#ifndef USE_DOUBLE_PRECISION
-typedef float real_t;
-#ifdef __ICL /* only Intel C compiler has fmath ??? */
-#define USE_FMATH
-#endif
-#else
-typedef double real_t;
-#endif
 
-
 #elif defined(LINUX) || defined(DJGPP)
 
 
@@ -91,27 +91,57 @@
 typedef float float32_t;
 #endif
 
-#ifndef USE_DOUBLE_PRECISION
-typedef float real_t;
-#else
-typedef double real_t;
-#endif
 
-
 #else /* Some other OS */
 
 
 #include <inttypes.h>
 
-#ifndef USE_DOUBLE_PRECISION
-typedef float real_t;
-#else
-typedef double real_t;
 #endif
 
 
+#ifndef USE_DOUBLE_PRECISION
+
+  typedef float real_t;
+
+  #ifdef __ICL /* only Intel C compiler has fmath ??? */
+
+    #include <mathf.h>
+
+    #define sin sinf
+    #define cos cosf
+    #define pow powf
+    #define floor floorf
+    #define sqrt sqrtf
+
+  #else
+
+    #include <math.h>
+
+#ifdef HAVE_SINF
+#  define sin sinf
 #endif
+#ifdef HAVE_COSF
+#  define cos cosf
+#endif
+#ifdef HAVE_POWF
+#  define pow powf
+#endif
+#ifdef HAVE_FLOORF
+#  define floor floorf
+#endif
+#ifdef HAVE_SQRTF
+#  define sqrt sqrtf
+#endif
 
+  #endif
+
+#else
+
+  typedef double real_t;
+  #include <math.h>
+
+#endif
 
 #ifdef __cplusplus
 }
--- a/libfaad/decoder.c
+++ b/libfaad/decoder.c
@@ -16,7 +16,7 @@
 ** along with this program; if not, write to the Free Software 
 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 **
-** $Id: decoder.c,v 1.8 2002/02/18 10:01:05 menno Exp $
+** $Id: decoder.c,v 1.9 2002/02/20 13:05:57 menno Exp $
 **/
 
 #include <stdlib.h>
@@ -250,7 +250,7 @@
 #else
     real_t *pow2_table     =  NULL;
 #endif
-    uint8_t *window_shape_prev =  hDecoder->window_shape_prev;
+    uint8_t *window_shape_prev = hDecoder->window_shape_prev;
     real_t **time_state    =  hDecoder->time_state;
     real_t **time_out      =  hDecoder->time_out;
     fb_info *fb            = &hDecoder->fb;
--- a/libfaad/drc.c
+++ b/libfaad/drc.c
@@ -16,16 +16,11 @@
 ** along with this program; if not, write to the Free Software 
 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 **
-** $Id: drc.c,v 1.2 2002/02/18 10:01:05 menno Exp $
+** $Id: drc.c,v 1.3 2002/02/20 13:05:57 menno Exp $
 **/
 
 #include "common.h"
 
-#ifdef USE_FMATH
-#include <mathf.h>
-#else
-#include <math.h>
-#endif
 #include <memory.h>
 #include "syntax.h"
 #include "drc.h"
@@ -58,17 +53,9 @@
 
         /* Decode DRC gain factor */
         if (drc->dyn_rng_sgn[bd])  /* compress */
-#ifdef USE_FMATH
-            factor = powf(2.0f, (-drc->ctrl1 * drc->dyn_rng_ctl[bd]/24.0f));
-#else
             factor = (real_t)pow(2.0, (-drc->ctrl1 * drc->dyn_rng_ctl[bd]/24.0));
-#endif
         else /* boost */
-#ifdef USE_FMATH
-            factor = powf(2.0f, (drc->ctrl2 * drc->dyn_rng_ctl[bd]/24.0f));
-#else
             factor = (real_t)pow(2.0, (drc->ctrl2 * drc->dyn_rng_ctl[bd]/24.0));
-#endif
 
         /* Level alignment between different programs (if desired) */
         /* If program reference normalization is done in the digital domain,
@@ -78,11 +65,7 @@
            modification avoids problems with reduced DAC SNR (if signal is
            attenuated) or clipping (if signal is boosted)
          */
-#ifdef USE_FMATH
-        factor *= powf(0.5f, ((DRC_REF_LEVEL - drc->prog_ref_level)/24.0f));
-#else
         factor *= (real_t)pow(0.5, ((DRC_REF_LEVEL - drc->prog_ref_level)/24.0));
-#endif
 
         /* Apply gain factor */
         for (i = bottom; i < top; i++)
--- a/libfaad/filtbank.c
+++ b/libfaad/filtbank.c
@@ -16,17 +16,12 @@
 ** along with this program; if not, write to the Free Software 
 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 **
-** $Id: filtbank.c,v 1.2 2002/02/18 10:01:05 menno Exp $
+** $Id: filtbank.c,v 1.3 2002/02/20 13:05:57 menno Exp $
 **/
 
 #include "common.h"
 
 #include <stdlib.h>
-#ifdef USE_FMATH
-#include <mathf.h>
-#else
-#include <math.h>
-#endif
 #include <assert.h>
 #include "filtbank.h"
 #include "syntax.h"
@@ -41,7 +36,8 @@
 {
     uint16_t i;
 
-    make_fft_order(fb->unscrambled64, fb->unscrambled512);
+    mdct_init(&(fb->mdct256), 256);
+    mdct_init(&(fb->mdct2048), 2048);
 
     fb->sin_long  = malloc(BLOCK_LEN_LONG*sizeof(real_t));
     fb->sin_short = malloc(BLOCK_LEN_SHORT*sizeof(real_t));
@@ -53,21 +49,16 @@
 
     /* calculate the sine windows */
     for (i = 0; i < BLOCK_LEN_LONG; i++)
-#ifdef USE_FMATH
-        fb->sin_long[i] = sinf(M_PI / (2.0f * BLOCK_LEN_LONG) * (i + 0.5));
-#else
         fb->sin_long[i] = (real_t)sin(M_PI / (2.0 * BLOCK_LEN_LONG) * (i + 0.5));
-#endif
     for (i = 0; i < BLOCK_LEN_SHORT; i++)
-#ifdef USE_FMATH
-        fb->sin_short[i] = sinf(M_PI / (2.0f * BLOCK_LEN_SHORT) * (i + 0.5));
-#else
         fb->sin_short[i] = (real_t)sin(M_PI / (2.0 * BLOCK_LEN_SHORT) * (i + 0.5));
-#endif
 }
 
 void filter_bank_end(fb_info *fb)
 {
+    mdct_end(&(fb->mdct256));
+    mdct_end(&(fb->mdct2048));
+
     if (fb->sin_long) free(fb->sin_long);
     if (fb->sin_short) free(fb->sin_short);
 }
@@ -164,10 +155,10 @@
     switch (len)
     {
     case 2048:
-        IMDCT_long(in_data, out_data, fb->unscrambled512);
+        IMDCT_long(&(fb->mdct2048), in_data, out_data);
         return;
     case 256:
-        IMDCT_short(in_data, out_data, fb->unscrambled64);
+        IMDCT_short(&(fb->mdct256), in_data, out_data);
         return;
     }
 }
@@ -177,10 +168,10 @@
     switch (len)
     {
     case 2048:
-        MDCT_long(in_data, out_data, fb->unscrambled512);
+        MDCT_long(&(fb->mdct2048), in_data, out_data);
         return;
     case 256:
-        MDCT_short(in_data, out_data, fb->unscrambled64);
+        MDCT_short(&(fb->mdct256), in_data, out_data);
         return;
     }
 }
--- a/libfaad/filtbank.h
+++ b/libfaad/filtbank.h
@@ -16,7 +16,7 @@
 ** along with this program; if not, write to the Free Software 
 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 **
-** $Id: filtbank.h,v 1.2 2002/02/18 10:01:05 menno Exp $
+** $Id: filtbank.h,v 1.3 2002/02/20 13:05:57 menno Exp $
 **/
 
 #ifndef __FILTBANK_H__
@@ -26,6 +26,7 @@
 extern "C" {
 #endif
 
+#include "mdct.h"
 
 #define BLOCK_LEN_LONG  1024
 #define BLOCK_LEN_SHORT  128
@@ -33,11 +34,11 @@
 
 typedef struct
 {
-    uint16_t unscrambled64[64];
-    uint16_t unscrambled512[512];
-
     real_t *sin_long;
     real_t *sin_short;
+
+    mdct_info mdct256;
+    mdct_info mdct2048;
 } fb_info;
 
 void filter_bank_init(fb_info *fb);
--- a/libfaad/is.c
+++ b/libfaad/is.c
@@ -16,16 +16,11 @@
 ** along with this program; if not, write to the Free Software 
 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 **
-** $Id: is.c,v 1.2 2002/02/18 10:01:05 menno Exp $
+** $Id: is.c,v 1.3 2002/02/20 13:05:57 menno Exp $
 **/
 
 #include "common.h"
 
-#ifdef USE_FMATH
-#include <mathf.h>
-#else
-#include <math.h>
-#endif
 #include "syntax.h"
 #include "is.h"
 
@@ -55,11 +50,7 @@
 
                     scale = is_intensity(icsr, g, sfb) *
                         invert_intensity(ics, g, sfb) *
-#ifdef USE_FMATH
-                        powf(0.5f, (0.25f*icsr->scale_factors[g][sfb]));
-#else
                         (real_t)pow(0.5, (0.25*icsr->scale_factors[g][sfb]));
-#endif
 
                     /* Scale from left to right channel,
                        do not touch left channel */
--- a/libfaad/mdct.c
+++ b/libfaad/mdct.c
@@ -16,36 +16,60 @@
 ** along with this program; if not, write to the Free Software 
 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 **
-** $Id: mdct.c,v 1.2 2002/02/18 10:01:05 menno Exp $
+** $Id: mdct.c,v 1.3 2002/02/20 13:05:57 menno Exp $
 **/
 
 #include "common.h"
 
-#ifdef USE_FMATH
-#include <mathf.h>
-#else
-#include <math.h>
-#endif
+#include <stdlib.h>
 #include "mdct.h"
 
+void mdct_init(mdct_info *mdct, uint16_t len)
+{
+    uint16_t i;
 
-void MDCT_long(fftw_real *in_data, fftw_real *out_data, uint16_t *unscrambled)
+    mdct->len = len;
+
+    mdct->unscrambled = malloc(len/4*sizeof(uint16_t));
+    make_fft_order(mdct->unscrambled, len/4);
+
+#ifdef USE_TWIDDLE_TABLE
+    mdct->twiddlers = malloc(len/2*sizeof(real_t));
+    for (i = 0; i < len/4; i++)
+    {
+        real_t angle = 2.0f * M_PI * (i + 1.0f/8.0f) / (real_t)len;
+        mdct->twiddlers[2*i]     = cos(angle);
+        mdct->twiddlers[2*i + 1] = sin(angle);
+    }
+#endif
+}
+
+void mdct_end(mdct_info *mdct)
 {
+    if (mdct->unscrambled) free(mdct->unscrambled);
+#ifdef USE_TWIDDLE_TABLE
+    if (mdct->twiddlers) free(mdct->twiddlers);
+#endif
+}
+
+void MDCT_long(mdct_info *mdct, fftw_real *in_data, fftw_real *out_data)
+{
     fftw_complex FFTarray[512];
-    fftw_real tempr, tempi, c, s, cold, cfreq, sfreq;
-    fftw_real fac,cosfreq8,sinfreq8;
+    fftw_real tempr, tempi, fac;
+
+#ifdef USE_TWIDDLE_TABLE
+    /* use twiddle factor tables */
+    real_t *twiddlers = mdct->twiddlers;
+#else
+    /* temps for pre and post twiddle */
+    real_t cosfreq8, sinfreq8, c, s, cold, cfreq, sfreq;
+#endif
     uint16_t i;
-    uint16_t b = 2048 >> 1;
-    uint16_t N4 = 2048 >> 2;
-    uint16_t N2 = 2048 >> 1;
-    uint16_t a = 2048 - b;
-    uint16_t a2 = a >> 1;
-    uint16_t a4 = a >> 2;
-    uint16_t b4 = b >> 2;
 
 
     fac = 2.; /* 2 from MDCT inverse  to forward */
 
+#ifndef USE_TWIDDLE_TABLE
     /* prepare for recurrence relation in pre-twiddle */
     cfreq = 0.99999529123306274f;
     sfreq = 0.0030679567717015743f;
@@ -54,22 +78,27 @@
 
     c = cosfreq8;
     s = sinfreq8;
+#endif
 
-    for (i = 0; i < N4; i++)
+    for (i = 0; i < 512; i++)
     {
-        uint16_t n = 2048 / 2 - 1 - 2 * i;
-        if (i < b4)
-            tempr = in_data[a2 + n] + in_data[2048 + a2 - 1 - n];
+        uint16_t n = 1023 - (i << 1);
+        if (i < 256)
+            tempr = in_data[512 + n] + in_data[2559 - n];
         else
-            tempr = in_data[a2 + n] - in_data[a2 - 1 - n];
+            tempr = in_data[512 + n] - in_data[511 - n];
 
-        n = 2 * i;
-        if (i < a4)
-            tempi = in_data[a2 + n] - in_data[a2 - 1 - n];
+        n = (i << 1);
+        if (i < 256)
+            tempi = in_data[512 + n] - in_data[511 - n];
         else
-            tempi = in_data[a2 + n] + in_data[2048 + a2 - 1 - n];
+            tempi = in_data[512 + n] + in_data[2559 - n];
 
         /* calculate pre-twiddled FFT input */
+#ifdef USE_TWIDDLE_TABLE
+        FFTarray[i].re = tempr * twiddlers[n] + tempi * twiddlers[n + 1];
+        FFTarray[i].im = tempi * twiddlers[n] - tempr * twiddlers[n + 1];
+#else
         FFTarray[i].re = tempr * c + tempi * s;
         FFTarray[i].im = tempi * c - tempr * s;
 
@@ -77,6 +106,7 @@
         cold = c;
         c = c * cfreq - s * sfreq;
         s = s * cfreq + cold * sfreq;
+#endif
     }
 
     /* Perform in-place complex FFT of length N/4 */
@@ -83,50 +113,61 @@
     pfftw_512(FFTarray);
 
 
+#ifndef USE_TWIDDLE_TABLE
     /* prepare for recurrence relations in post-twiddle */
     c = cosfreq8;
     s = sinfreq8;
+#endif
 
     /* post-twiddle FFT output and then get output data */
-    for (i = 0; i < N4; i++)
+    for (i = 0; i < 512; i++)
     {
-        /* get post-twiddled FFT output  */
-        uint16_t unscr = unscrambled[i];
+        uint16_t n = i << 1;
+        uint16_t unscr = mdct->unscrambled[i];
 
+        /* get post-twiddled FFT output  */
+#ifdef USE_TWIDDLE_TABLE
+        tempr = fac * (FFTarray[unscr].re * twiddlers[n] + FFTarray[unscr].im * twiddlers[n + 1]);
+        tempi = fac * (FFTarray[unscr].im * twiddlers[n] - FFTarray[unscr].re * twiddlers[n + 1]);
+#else
         tempr = fac * (FFTarray[unscr].re * c + FFTarray[unscr].im * s);
         tempi = fac * (FFTarray[unscr].im * c - FFTarray[unscr].re * s);
+#endif
 
         /* fill in output values */
-        out_data[2 * i]            = -tempr;  /* first half even */
-        out_data[N2 - 1 - 2 * i]   =  tempi;  /* first half odd */
-        out_data[N2 + 2 * i]       = -tempi;  /* second half even */
-        out_data[2048 - 1 - 2 * i] =  tempr;  /* second half odd */
+        out_data[n]        = -tempr;  /* first half even */
+        out_data[1023 - n] =  tempi;  /* first half odd */
+        out_data[1024 + n] = -tempi;  /* second half even */
+        out_data[2047 - n] =  tempr;  /* second half odd */
 
+#ifndef USE_TWIDDLE_TABLE
         /* use recurrence to prepare cosine and sine for next value of i */
         cold = c;
         c = c * cfreq - s * sfreq;
         s = s * cfreq + cold * sfreq;
+#endif
     }
 }
 
-void MDCT_short(fftw_real *in_data, fftw_real *out_data, uint16_t *unscrambled)
+void MDCT_short(mdct_info *mdct, fftw_real *in_data, fftw_real *out_data)
 {
     fftw_complex FFTarray[64];    /* the array for in-place FFT */
-    fftw_real tempr, tempi, c, s, cold, cfreq, sfreq; /* temps for pre and post twiddle */
-    fftw_real fac,cosfreq8,sinfreq8;
+    fftw_real tempr, tempi, fac;
+
+#ifdef USE_TWIDDLE_TABLE
+    /* use twiddle factor tables */
+    real_t *twiddlers = mdct->twiddlers;
+#else
+    /* temps for pre and post twiddle */
+    real_t cosfreq8, sinfreq8, c, s, cold, cfreq, sfreq;
+#endif
     uint16_t i;
-    uint16_t b = 256 >> 1;
-    uint16_t N4 = 256 >> 2;
-    uint16_t N2 = 256 >> 1;
-    uint16_t a = 256 - b;
-    uint16_t a2 = a >> 1;
-    uint16_t a4 = a >> 2;
-    uint16_t b4 = b >> 2;
 
 
     /* Choosing to allocate 2/N factor to Inverse Xform! */
     fac = 2.; /* 2 from MDCT inverse  to forward */
 
+#ifndef USE_TWIDDLE_TABLE
     /* prepare for recurrence relation in pre-twiddle */
     cfreq = 0.99969881772994995f;
     sfreq = 0.024541229009628296f;
@@ -135,22 +176,27 @@
 
     c = cosfreq8;
     s = sinfreq8;
+#endif
 
-    for (i = 0; i < N4; i++)
+    for (i = 0; i < 64; i++)
     {
-        uint16_t n = 256 / 2 - 1 - 2 * i;
-        if (i < b4)
-            tempr = in_data[a2 + n] + in_data[256 + a2 - 1 - n];
+        uint16_t n = 127 - (i << 1);
+        if (i < 32)
+            tempr = in_data[64 + n] + in_data[319 - n];
         else
-            tempr = in_data[a2 + n] - in_data[a2 - 1 - n];
+            tempr = in_data[64 + n] - in_data[63 - n];
 
-        n = 2 * i;
-        if (i < a4)
-            tempi = in_data[a2 + n] - in_data[a2 - 1 - n];
+        n = i << 1;
+        if (i < 32)
+            tempi = in_data[64 + n] - in_data[63 - n];
         else
-            tempi = in_data[a2 + n] + in_data[256 + a2 - 1 - n];
+            tempi = in_data[64 + n] + in_data[319 - n];
 
         /* calculate pre-twiddled FFT input */
+#ifdef USE_TWIDDLE_TABLE
+        FFTarray[i].re = tempr * twiddlers[n] + tempi * twiddlers[n + 1];
+        FFTarray[i].im = tempi * twiddlers[n] - tempr * twiddlers[n + 1];
+#else
         FFTarray[i].re = tempr * c + tempi * s;
         FFTarray[i].im = tempi * c - tempr * s;
 
@@ -158,50 +204,65 @@
         cold = c;
         c = c * cfreq - s * sfreq;
         s = s * cfreq + cold * sfreq;
+#endif
     }
 
     /* Perform in-place complex FFT of length N/4 */
     pfftw_64(FFTarray);
 
+#ifndef USE_TWIDDLE_TABLE
     /* prepare for recurrence relations in post-twiddle */
     c = cosfreq8;
     s = sinfreq8;
+#endif
 
     /* post-twiddle FFT output and then get output data */
-    for (i = 0; i < N4; i++)
+    for (i = 0; i < 64; i++)
     {
-        uint16_t unscr = unscrambled[i];
+        uint16_t n = i << 1;
+        uint16_t unscr = mdct->unscrambled[i];
 
+#ifdef USE_TWIDDLE_TABLE
+        tempr = fac * (FFTarray[unscr].re * twiddlers[n] + FFTarray[unscr].im * twiddlers[n + 1]);
+        tempi = fac * (FFTarray[unscr].im * twiddlers[n] - FFTarray[unscr].re * twiddlers[n + 1]);
+#else
         tempr = fac * (FFTarray[unscr].re * c + FFTarray[unscr].im * s);
         tempi = fac * (FFTarray[unscr].im * c - FFTarray[unscr].re * s);
+#endif
 
         /* fill in output values */
-        out_data[2 * i]           = -tempr;  /* first half even */
-        out_data[N2 - 1 - 2 * i]  =  tempi;  /* first half odd */
-        out_data[N2 + 2 * i]      = -tempi;  /* second half even */
-        out_data[256 - 1 - 2 * i] =  tempr;  /* second half odd */
+        out_data[n]           = -tempr;  /* first half even */
+        out_data[127 - n] =  tempi;  /* first half odd */
+        out_data[128 + n]     = -tempi;  /* second half even */
+        out_data[255 - n] =  tempr;  /* second half odd */
 
+#ifndef USE_TWIDDLE_TABLE
         /* use recurrence to prepare cosine and sine for next value of i */
         cold = c;
         c = c * cfreq - s * sfreq;
         s = s * cfreq + cold * sfreq;
+#endif
     }
 }
 
-void IMDCT_long(fftw_real *in_data, fftw_real *out_data, uint16_t *unscrambled)
+void IMDCT_long(mdct_info *mdct, fftw_real *in_data, fftw_real *out_data)
 {
     fftw_complex FFTarray[512];    /* the array for in-place FFT */
-    fftw_real tempr, tempi, c, s, cold, cfreq, sfreq; /* temps for pre and post twiddle */
+    fftw_real tempr, tempi, fac;
 
-    fftw_real fac, cosfreq8, sinfreq8;
+#ifdef USE_TWIDDLE_TABLE
+    /* use twiddle factor tables */
+    real_t *twiddlers = mdct->twiddlers;
+#else
+    /* temps for pre and post twiddle */
+    real_t cosfreq8, sinfreq8, c, s, cold, cfreq, sfreq;
+#endif
     uint16_t i;
-    uint16_t Nd2 = 2048 >> 1;
-    uint16_t Nd4 = 2048 >> 2;
-    uint16_t Nd8 = 2048 >> 3;
 
     /* Choosing to allocate 2/N factor to Inverse Xform! */
     fac = 0.0009765625f;
 
+#ifndef USE_TWIDDLE_TABLE
     /* prepare for recurrence relation in pre-twiddle */
     cfreq = 0.99999529123306274f;
     sfreq = 0.0030679567717015743f;
@@ -210,15 +271,21 @@
 
     c = cosfreq8;
     s = sinfreq8;
+#endif
 
-    for (i = 0; i < Nd4; i++)
+    for (i = 0; i < 512; i++)
     {
-        uint16_t unscr = unscrambled[i];
+        uint16_t n = i << 1;
+        uint16_t unscr = mdct->unscrambled[i];
 
-        tempr = -in_data[2 * i];
-        tempi =  in_data[Nd2 - 1 - 2 * i];
+        tempr = -in_data[n];
+        tempi =  in_data[1023 - n];
 
         /* calculate pre-twiddled FFT input */
+#ifdef USE_TWIDDLE_TABLE
+        FFTarray[unscr].re = tempr * twiddlers[n] - tempi * twiddlers[n + 1];
+        FFTarray[unscr].im = tempi * twiddlers[n] + tempr * twiddlers[n + 1];
+#else
         FFTarray[unscr].re = tempr * c - tempi * s;
         FFTarray[unscr].im = tempi * c + tempr * s;
 
@@ -226,55 +293,72 @@
         cold = c;
         c = c * cfreq - s * sfreq;
         s = s * cfreq + cold * sfreq;
+#endif
     }
 
     /* Perform in-place complex IFFT of length N/4 */
     pfftwi_512(FFTarray);
 
+#ifndef USE_TWIDDLE_TABLE
     /* prepare for recurrence relations in post-twiddle */
     c = cosfreq8;
     s = sinfreq8;
+#endif
 
     /* post-twiddle FFT output and then get output data */
-    for (i = 0; i < Nd4; i++)
+    for (i = 0; i < 512; i++)
     {
+        uint16_t n = i << 1;
         /* get post-twiddled FFT output  */
+#ifdef USE_TWIDDLE_TABLE
+        tempr = fac * (FFTarray[i].re * twiddlers[n] - FFTarray[i].im * twiddlers[n + 1]);
+        tempi = fac * (FFTarray[i].im * twiddlers[n] + FFTarray[i].re * twiddlers[n + 1]);
+#else
         tempr = fac * (FFTarray[i].re * c - FFTarray[i].im * s);
         tempi = fac * (FFTarray[i].im * c + FFTarray[i].re * s);
+#endif
 
         /* fill in output values */
-        out_data [Nd2 + Nd4 - 1 - 2 * i] = tempr;
-        if (i < Nd8)
-            out_data[Nd2 + Nd4 + 2 * i] = tempr;
+        out_data [1535 - n] = tempr;
+        if (i < 256)
+            out_data[1536 + n] = tempr;
         else
-            out_data[2 * i - Nd4] = -tempr;
+            out_data[n - 512] = -tempr;
 
-        out_data [Nd4 + 2 * i] = tempi;
-        if (i < Nd8)
-            out_data[Nd4 - 1 - 2 * i] = -tempi;
+        out_data [512 + n] = tempi;
+        if (i < 256)
+            out_data[511 - n] = -tempi;
         else
-            out_data[Nd4 + 2048 - 1 - 2*i] = tempi;
+            out_data[2559 - n] = tempi;
 
+#ifndef USE_TWIDDLE_TABLE
         /* use recurrence to prepare cosine and sine for next value of i */
         cold = c;
         c = c * cfreq - s * sfreq;
         s = s * cfreq + cold * sfreq;
+#endif
     }
 }
 
-void IMDCT_short(fftw_real *in_data, fftw_real *out_data, uint16_t *unscrambled)
+void IMDCT_short(mdct_info *mdct, fftw_real *in_data, fftw_real *out_data)
 {
     fftw_complex FFTarray[64];    /* the array for in-place FFT */
-    fftw_real tempr, tempi, c, s, cold, cfreq, sfreq; /* temps for pre and post twiddle */
-    fftw_real fac, cosfreq8, sinfreq8;
+    fftw_real tempr, tempi;
+    fftw_real fac;
+
+#ifdef USE_TWIDDLE_TABLE
+    /* use twiddle factor tables */
+    real_t *twiddlers = mdct->twiddlers;
+#else
+    /* temps for pre and post twiddle */
+    real_t cosfreq8, sinfreq8, c, s, cold, cfreq, sfreq;
+#endif
     uint16_t i;
-    uint16_t Nd2 = 256 >> 1;
-    uint16_t Nd4 = 256 >> 2;
-    uint16_t Nd8 = 256 >> 3;
 
     /* Choosing to allocate 2/N factor to Inverse Xform! */
     fac = 0.0078125f; /* remaining 2/N from 4/N IFFT factor */
 
+#ifndef USE_TWIDDLE_TABLE
     /* prepare for recurrence relation in pre-twiddle */
     cfreq = 0.99969881772994995f;
     sfreq = 0.024541229009628296f;
@@ -283,15 +367,21 @@
 
     c = cosfreq8;
     s = sinfreq8;
+#endif
 
-    for (i = 0; i < Nd4; i++)
+    for (i = 0; i < 64; i++)
     {
-        uint16_t unscr = unscrambled[i];
+        uint16_t n = i << 1;
+        uint16_t unscr = mdct->unscrambled[i];
 
-        tempr = -in_data[2 * i];
-        tempi = in_data[Nd2 - 1 - 2 * i];
+        tempr = -in_data[n];
+        tempi = in_data[127 - n];
 
         /* calculate pre-twiddled FFT input */
+#ifdef USE_TWIDDLE_TABLE
+        FFTarray[unscr].re = tempr * twiddlers[n] - tempi * twiddlers[n + 1];
+        FFTarray[unscr].im = tempi * twiddlers[n] + tempr * twiddlers[n + 1];
+#else
         FFTarray[unscr].re = tempr * c - tempi * s;
         FFTarray[unscr].im = tempi * c + tempr * s;
 
@@ -299,39 +389,52 @@
         cold = c;
         c = c * cfreq - s * sfreq;
         s = s * cfreq + cold * sfreq;
+#endif
     }
 
     /* Perform in-place complex IFFT of length N/4 */
     pfftwi_64(FFTarray);
 
+#ifndef USE_TWIDDLE_TABLE
     /* prepare for recurrence relations in post-twiddle */
     c = cosfreq8;
     s = sinfreq8;
+#endif
 
     /* post-twiddle FFT output and then get output data */
-    for (i = 0; i < Nd4; i++)
+    for (i = 0; i < 64; i++)
     {
+        uint16_t n = i << 1;
+
+#ifdef USE_TWIDDLE_TABLE
         /* get post-twiddled FFT output  */
+        tempr = fac * (FFTarray[i].re * twiddlers[n] - FFTarray[i].im * twiddlers[n + 1]);
+        tempi = fac * (FFTarray[i].im * twiddlers[n] + FFTarray[i].re * twiddlers[n + 1]);
+#else
+        /* get post-twiddled FFT output  */
         tempr = fac * (FFTarray[i].re * c - FFTarray[i].im * s);
         tempi = fac * (FFTarray[i].im * c + FFTarray[i].re * s);
+#endif
 
         /* fill in output values */
-        out_data [Nd2 + Nd4 - 1 - 2 * i] = tempr;
-        if (i < Nd8)
-            out_data[Nd2 + Nd4 + 2 * i] = tempr;
+        out_data [191 - n] = tempr;
+        if (i < 32)
+            out_data[192 + n] = tempr;
         else
-            out_data[2 * i - Nd4] = -tempr;
+            out_data[n - 64] = -tempr;
 
-        out_data [Nd4 + 2 * i] = tempi;
-        if (i < Nd8)
-            out_data[Nd4 - 1 - 2 * i] = -tempi;
+        out_data [64 + n] = tempi;
+        if (i < 32)
+            out_data[63 - n] = -tempi;
         else
-            out_data[Nd4 + 256 - 1 - 2*i] = tempi;
+            out_data[319 - n] = tempi;
 
+#ifndef USE_TWIDDLE_TABLE
         /* use recurrence to prepare cosine and sine for next value of i */
         cold = c;
         c = c * cfreq - s * sfreq;
         s = s * cfreq + cold * sfreq;
+#endif
     }
 }
 
@@ -711,7 +814,7 @@
 { -0.999698818696204, 0.0245412285229123 },
 };
 
-void PFFTW(16) (fftw_complex * input) {
+static void PFFTW(16) (fftw_complex * input) {
      fftw_real tmp332;
      fftw_real tmp331;
      fftw_real tmp330;
@@ -1018,7 +1121,7 @@
      c_im(input[14]) = st8;
 }
 
-void PFFTW(32) (fftw_complex * input) {
+static void PFFTW(32) (fftw_complex * input) {
      fftw_real tmp714;
      fftw_real tmp713;
      fftw_real tmp712;
@@ -1801,7 +1904,7 @@
      c_im(input[1]) = st1;
 }
 
-void  PFFTW(64)(fftw_complex *input)
+static void  PFFTW(64)(fftw_complex *input)
 {
      PFFTW(twiddle_4)(input, PFFTW(W_64), 16);
      PFFTW(16)(input );
@@ -1810,7 +1913,7 @@
      PFFTW(16)(input + 48);
 }
 
-void PFFTW(128)(fftw_complex *input)
+static void PFFTW(128)(fftw_complex *input)
 {
      PFFTW(twiddle_4)(input, PFFTW(W_128), 32);
      PFFTW(32)(input );
@@ -1819,7 +1922,7 @@
      PFFTW(32)(input + 96);
 }
 
-void PFFTW(512)(fftw_complex *input)
+static void PFFTW(512)(fftw_complex *input)
 {
      PFFTW(twiddle_4)(input, PFFTW(W_512), 128);
      PFFTW(128)(input );
@@ -1828,7 +1931,7 @@
      PFFTW(128)(input + 384);
 }
 
-void PFFTWI(16) (fftw_complex * input) {
+static void PFFTWI(16) (fftw_complex * input) {
      fftw_real tmp333;
      fftw_real tmp332;
      fftw_real tmp331;
@@ -2137,7 +2240,7 @@
      c_im(input[2]) = st1;
 }
 
-void PFFTWI(32) (fftw_complex * input) {
+static void PFFTWI(32) (fftw_complex * input) {
      fftw_real tmp714;
      fftw_real tmp713;
      fftw_real tmp712;
@@ -2920,7 +3023,7 @@
      c_re(input[3]) = st1;
 }
 
-void PFFTWI(64)(fftw_complex *input)
+static void PFFTWI(64)(fftw_complex *input)
 {
      PFFTWI(16)(input );
      PFFTWI(16)(input + 16);
@@ -2929,7 +3032,7 @@
      PFFTWI(twiddle_4)(input, PFFTW(W_64), 16);
 }
 
-void PFFTWI(128)(fftw_complex *input)
+static void PFFTWI(128)(fftw_complex *input)
 {
      PFFTWI(32)(input );
      PFFTWI(32)(input + 32);
@@ -2938,7 +3041,7 @@
      PFFTWI(twiddle_4)(input, PFFTW(W_128), 32);
 }
 
-void PFFTWI(512)(fftw_complex *input)
+static void PFFTWI(512)(fftw_complex *input)
 {
      PFFTWI(128)(input );
      PFFTWI(128)(input + 128);
@@ -2947,7 +3050,7 @@
      PFFTWI(twiddle_4)(input, PFFTW(W_512), 128);
 }
 
-void  PFFTW(twiddle_4) (fftw_complex * A, const fftw_complex * W, uint16_t iostride) {
+static void  PFFTW(twiddle_4) (fftw_complex * A, const fftw_complex * W, uint16_t iostride) {
      uint16_t i;
      fftw_complex *inout;
      inout = A;
@@ -3060,7 +3163,7 @@
      } while (i > 0);
 }
 
-void PFFTWI(twiddle_4) (fftw_complex * A, const fftw_complex * W, uint16_t iostride) {
+static void PFFTWI(twiddle_4) (fftw_complex * A, const fftw_complex * W, uint16_t iostride) {
      uint16_t i;
      fftw_complex *inout;
      inout = A;
@@ -3177,7 +3280,7 @@
      } while (i > 0);
 }
 
-uint16_t PFFTW(permutation_64)(uint16_t i)
+static uint16_t PFFTW(permutation_64)(uint16_t i)
 {
     uint16_t i1 = i % 4;
     uint16_t i2 = i / 4;
@@ -3187,7 +3290,7 @@
        return (i1 * 16 + ((i2 + 1) % 16));
 }
 
-uint16_t PFFTW(permutation_128)(uint16_t i)
+static uint16_t PFFTW(permutation_128)(uint16_t i)
 {
     uint16_t i1 = i % 4;
     uint16_t i2 = i / 4;
@@ -3197,7 +3300,7 @@
        return (i1 * 32 + ((i2 + 1) % 32));
 }
 
-uint16_t PFFTW(permutation_512)(uint16_t i)
+static uint16_t PFFTW(permutation_512)(uint16_t i)
 {
     uint16_t i1 = i % 4;
     uint16_t i2 = i / 4;
@@ -3207,13 +3310,19 @@
        return (i1 * 128 + PFFTW(permutation_128)((i2 + 1) % 128));
 }
 
-void make_fft_order(uint16_t *unscrambled64, uint16_t *unscrambled512)
+static void make_fft_order(uint16_t *unscrambled, uint16_t len)
 {
     uint16_t i;
 
-    for (i = 0; i < 64; i++)
-        unscrambled64[i] = PFFTW(permutation_64)(i);
-
-    for (i = 0; i < 512; i++)
-        unscrambled512[i] = PFFTW(permutation_512)(i);
+    switch (len)
+    {
+    case 64:
+        for (i = 0; i < len; i++)
+            unscrambled[i] = PFFTW(permutation_64)(i);
+        break;
+    case 512:
+        for (i = 0; i < len; i++)
+            unscrambled[i] = PFFTW(permutation_512)(i);
+        break;
+    }
 }
--- a/libfaad/mdct.h
+++ b/libfaad/mdct.h
@@ -16,7 +16,7 @@
 ** along with this program; if not, write to the Free Software 
 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 **
-** $Id: mdct.h,v 1.2 2002/02/18 10:01:05 menno Exp $
+** $Id: mdct.h,v 1.3 2002/02/20 13:05:57 menno Exp $
 **/
 
 #ifndef __MDCT_H__
@@ -26,6 +26,12 @@
 extern "C" {
 #endif
 
+typedef struct {
+    uint16_t len;
+    real_t *twiddlers;
+    uint16_t *unscrambled;
+} mdct_info;
+
 typedef real_t fftw_real;
 
 typedef struct {
@@ -46,13 +52,17 @@
 DEFINE_PFFTW(128)
 DEFINE_PFFTW(512)
 
-void make_fft_order(uint16_t *unscrambled64, uint16_t *unscrambled512);
-void IMDCT_long(fftw_real *in_data, fftw_real *out_data, uint16_t *unscrambled);
-void IMDCT_short(fftw_real *in_data, fftw_real *out_data, uint16_t *unscrambled);
+void mdct_init(mdct_info *mdct, uint16_t len);
+void mdct_end(mdct_info *mdct);
 
-void MDCT_long(fftw_real *in_data, fftw_real *out_data, uint16_t *unscrambled);
-void MDCT_short(fftw_real *in_data, fftw_real *out_data, uint16_t *unscrambled);
+void IMDCT_long(mdct_info *mdct, fftw_real *in_data, fftw_real *out_data);
+void IMDCT_short(mdct_info *mdct, fftw_real *in_data, fftw_real *out_data);
 
+void MDCT_long(mdct_info *mdct, fftw_real *in_data, fftw_real *out_data);
+void MDCT_short(mdct_info *mdct, fftw_real *in_data, fftw_real *out_data);
+
+static void make_fft_order(uint16_t *unscrambled, uint16_t len);
+
 #define PFFTW(name)  CONCAT(pfftw_, name)
 #define PFFTWI(name)  CONCAT(pfftwi_, name)
 #define CONCAT_AUX(a, b) a ## b
@@ -59,8 +69,8 @@
 #define CONCAT(a, b) CONCAT_AUX(a,b)
 #define FFTW_KONST(x) ((fftw_real) x)
 
-void PFFTW(twiddle_4)(fftw_complex *A, const fftw_complex *W, uint16_t iostride);
-void PFFTWI(twiddle_4)(fftw_complex *A, const fftw_complex *W, uint16_t iostride);
+static void PFFTW(twiddle_4)(fftw_complex *A, const fftw_complex *W, uint16_t iostride);
+static void PFFTWI(twiddle_4)(fftw_complex *A, const fftw_complex *W, uint16_t iostride);
 
 #ifdef __cplusplus
 }
--- a/libfaad/output.c
+++ b/libfaad/output.c
@@ -16,16 +16,11 @@
 ** along with this program; if not, write to the Free Software 
 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 **
-** $Id: output.c,v 1.3 2002/02/18 10:01:05 menno Exp $
+** $Id: output.c,v 1.4 2002/02/20 13:05:57 menno Exp $
 **/
 
 #include "common.h"
 
-#ifdef USE_FMATH
-#include <mathf.h>
-#else
-#include <math.h>
-#endif
 #include "output.h"
 #include "decoder.h"
 
@@ -32,11 +27,7 @@
 
 #define ftol(A,B) {tmp = *(int32_t*) & A - 0x4B7F8000; \
                    B = (int16_t)((tmp==(int16_t)tmp) ? tmp : (tmp>>31)^0x7FFF);}
-#ifdef USE_FMATH
-#define ROUND(x) ((int32_t)floorf((x) + 0.5f))
-#else
 #define ROUND(x) ((int32_t)floor((x) + 0.5))
-#endif
 
 #define HAVE_IEEE754_FLOAT
 #ifdef HAVE_IEEE754_FLOAT
--- a/libfaad/pns.c
+++ b/libfaad/pns.c
@@ -16,16 +16,11 @@
 ** along with this program; if not, write to the Free Software 
 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 **
-** $Id: pns.c,v 1.3 2002/02/18 10:01:05 menno Exp $
+** $Id: pns.c,v 1.4 2002/02/20 13:05:57 menno Exp $
 **/
 
 #include "common.h"
 
-#ifdef USE_FMATH
-#include <mathf.h>
-#else
-#include <math.h>
-#endif
 #include "pns.h"
 
 
@@ -73,13 +68,8 @@
     /* 14496-3 says:
        scale = 1.0f/(size * (real_t)sqrt(MEAN_NRG));
     */
-#ifdef USE_FMATH
-    scale = 1.0f/sqrtf(size * MEAN_NRG);
-    scale *= powf(2.0f, 0.25f*scale_factor);
-#else
     scale = 1.0f/(real_t)sqrt(size * MEAN_NRG);
     scale *= (real_t)pow(2.0, 0.25*scale_factor);
-#endif
 
     /* Scale random vector to desired target energy */
     for (i = 0; i < size; i++)
--- a/libfaad/specrec.c
+++ b/libfaad/specrec.c
@@ -16,7 +16,7 @@
 ** along with this program; if not, write to the Free Software 
 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 **
-** $Id: specrec.c,v 1.3 2002/02/18 10:01:05 menno Exp $
+** $Id: specrec.c,v 1.4 2002/02/20 13:05:57 menno Exp $
 **/
 
 /*
@@ -28,11 +28,6 @@
 
 #include "common.h"
 
-#ifdef USE_FMATH
-#include <mathf.h>
-#else
-#include <math.h>
-#endif
 #include "specrec.h"
 #include "syntax.h"
 #include "data.h"
@@ -220,21 +215,13 @@
     /* build pow() table for inverse quantization */
     for(i = 0; i < IQ_TABLE_SIZE; i++)
     {
-#ifdef USE_FMATH
-        iq_table[i] = powf(i, 4.0f/3.0f);
-#else
         iq_table[i] = (real_t)pow(i, 4.0/3.0);
-#endif
     }
 
     /* build pow(2, 0.25) table for scalefactors */
     for(i = 0; i < POW_TABLE_SIZE; i++)
     {
-#ifdef USE_FMATH
-        pow2_table[i] = powf(2.0f, 0.25f * (i-100));
-#else
         pow2_table[i] = (real_t)pow(2.0, 0.25 * (i-100));
-#endif
     }
 }
 
@@ -245,21 +232,13 @@
         if (q < IQ_TABLE_SIZE)
             return iq_table[q];
         else
-#ifdef USE_FMATH
-            return powf(q, 4.0f/3.0f);
-#else
             return (real_t)pow(q, 4.0/3.0);
-#endif
     } else if (q < 0) {
         q = -q;
         if (q < IQ_TABLE_SIZE)
             return -iq_table[q];
         else
-#ifdef USE_FMATH
-            return -powf(q, 4.0f/3.0f);
-#else
             return -(real_t)pow(q, 4.0/3.0);
-#endif
     } else {
         return 0.0f;
     }
@@ -289,11 +268,7 @@
     if (scale_factor < POW_TABLE_SIZE)
         return pow2_table[scale_factor];
     else
-#ifdef USE_FMATH
-        return powf(2.0f, 0.25f * (scale_factor - 100));
-#else
         return (real_t)pow(2.0, 0.25 * (scale_factor - 100));
-#endif
 }
 
 void apply_scalefactors(ic_stream *ics, real_t *x_invquant, real_t *pow2_table)
--- a/libfaad/tns.c
+++ b/libfaad/tns.c
@@ -16,16 +16,11 @@
 ** along with this program; if not, write to the Free Software 
 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 **
-** $Id: tns.c,v 1.2 2002/02/18 10:01:05 menno Exp $
+** $Id: tns.c,v 1.3 2002/02/20 13:05:57 menno Exp $
 **/
 
 #include "common.h"
 
-#ifdef USE_FMATH
-#include <mathf.h>
-#else
-#include <math.h>
-#endif
 #include "syntax.h"
 #include "tns.h"
 
@@ -154,11 +149,7 @@
     iqfac_m = ((1 << (coef_res_bits-1)) + 0.5f) / (M_PI/2.0f);
 
     for (i = 0; i < order; i++)
-#ifdef USE_FMATH
-        tmp2[i] = sinf(tmp[i] / ((tmp[i] >= 0) ? iqfac : iqfac_m));
-#else
         tmp2[i] = (real_t)sin(tmp[i] / ((tmp[i] >= 0) ? iqfac : iqfac_m));
-#endif
 
     /* Conversion to LPC coefficients */
     a[0] = 1;