shithub: aacdec

--- a/ChangeLog

+++ b/ChangeLog

@@ -1,3 +1,28 @@

+22 june 2004     mbakker(at)nero.com

+    - ps_dec.c: Removed samplerate dependancy in PS, likely to be updated in corrigendum

+15 june 2004     mbakker(at)nero.com

+    - sbr_dct.c,h: Removed huge ugly DCT_4_64...

+    - sbr_qmf.c: replaced DCT_IV in qmf synthesis

+13 june 2004     mbakker(at)nero.com

+    - ps_dec.c: Fixed fixed point code, no more cos() and sin() used

+7 june 2004      mbakker(at)nero.com

+    - sbr_*.c,h: Overall speedups and improvements in SBR code.

+4 june 2004      gpascutto(at)nero.com

+    - drm_dec.c: Fixed a bug in the standard (PAN mixing)

+3 june 2004      mbakker(at)nero.com

+    - ps_syntax.c: fixed a bug in one of the huffman tables

+2 june 2004      gpascutto(at)nero.com

+    - drm_dec.c: Fully working fixed point support

+27 may 2004      mbakker(at)nero.com

+    - sbr_qmf.c, sbr_dec.h: Fixed fixed point problems with downsampled QMF

 17 may 2004      mbakker(at)nero.com

     - syntax.c, decoder.c: Added some logging output

--- a/libfaad/cfft_tab.h

+++ b/libfaad/cfft_tab.h

@@ -1,19 +1,19 @@

/*

 ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding

 ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com

-**

+**

 ** This program is free software; you can redistribute it and/or modify

 ** it under the terms of the GNU General Public License as published by

 ** the Free Software Foundation; either version 2 of the License, or

 ** (at your option) any later version.

-**

+**

 ** This program is distributed in the hope that it will be useful,

 ** but WITHOUT ANY WARRANTY; without even the implied warranty of

 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 ** GNU General Public License for more details.

-**

+**

 ** You should have received a copy of the GNU General Public License

-** along with this program; if not, write to the Free Software

+** along with this program; if not, write to the Free Software

 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

**

 ** Any non-GPL usage of this software or parts of this software is strictly

@@ -22,7 +22,7 @@

 ** Commercial non-GPL licensing of this software is possible.

 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.

**

-** $Id: cfft_tab.h,v 1.14 2004/05/17 10:18:02 menno Exp $

+** $Id: cfft_tab.h,v 1.16 2004/07/31 15:48:55 menno Exp $

**/

 #ifndef __CFFT_TAB_H__

--- a/libfaad/common.h

+++ b/libfaad/common.h

@@ -22,7 +22,7 @@

 ** Commercial non-GPL licensing of this software is possible.

 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.

**

-** $Id: common.h,v 1.61 2004/05/17 10:18:02 menno Exp $

+** $Id: common.h,v 1.63 2004/07/31 15:48:55 menno Exp $

**/

 #ifndef __COMMON_H__

@@ -57,6 +57,9 @@

 /* use fixed point reals */

 //#define FIXED_POINT

 //#define BIG_IQ_TABLE

+/* Use if target platform has address generators with autoincrement */

+//#define PREFER_POINTERS

 #ifdef _WIN32_WCE

 #define FIXED_POINT

--- a/libfaad/decoder.c

+++ b/libfaad/decoder.c

@@ -22,7 +22,7 @@

 ** Commercial non-GPL licensing of this software is possible.

 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.

**

-** $Id: decoder.c,v 1.103 2004/05/17 10:18:02 menno Exp $

+** $Id: decoder.c,v 1.105 2004/07/31 15:48:55 menno Exp $

**/

 #include "common.h"

@@ -771,8 +771,11 @@

         for (i = 0; i < ((buffer_size+3)>>2); i++)

             uint8_t *buf;

+            uint32_t temp = 0;

             buf = faad_getbitbuffer(&ld, 32);

-            printf("%d\n", getdword((void*)buf));

+            //temp = getdword((void*)buf);

+            temp = *((uint32_t*)buf);

+            printf("0x%.8X\n", temp);

             free(buf);

         faad_endbits(&ld);

--- a/libfaad/filtbank.c

+++ b/libfaad/filtbank.c

@@ -22,7 +22,7 @@

 ** Commercial non-GPL licensing of this software is possible.

 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.

**

-** $Id: filtbank.c,v 1.37 2004/05/17 10:18:02 menno Exp $

+** $Id: filtbank.c,v 1.38 2004/06/30 12:45:56 menno Exp $

**/

 #include "common.h"

@@ -350,7 +350,8 @@

 #if 0

     for (i = 0; i < 1024; i++)

-        printf("%d\n", time_out[i]);

+        //printf("%d\n", time_out[i]);

+        printf("0x%.8X\n", time_out[i]);

 #endif

--- a/libfaad/ps_dec.c

+++ b/libfaad/ps_dec.c

@@ -22,7 +22,7 @@

 ** Commercial non-GPL licensing of this software is possible.

 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.

**

-** $Id: ps_dec.c,v 1.7 2004/05/17 10:18:03 menno Exp $

+** $Id: ps_dec.c,v 1.10 2004/09/04 14:56:28 menno Exp $

**/

 #include "common.h"

@@ -95,10 +95,16 @@

     FRAC_CONST(0.25)

};

+#ifdef PARAM_32KHZ

 static const uint8_t delay_length_d[2][NO_ALLPASS_LINKS] = {

     { 1, 2, 3 } /* d_24kHz */,

     { 3, 4, 5 } /* d_48kHz */

};

+#else

+static const uint8_t delay_length_d[NO_ALLPASS_LINKS] = {

+    3, 4, 5 /* d_48kHz */

+};

+#endif

 static const real_t filter_a[NO_ALLPASS_LINKS] = { /* a(m) = exp(-d_48kHz(m)/7) */

     FRAC_CONST(0.65143905753106),

     FRAC_CONST(0.56471812200776),

@@ -164,9 +170,12 @@

 static void hybrid_synthesis(hyb_info *hyb, qmf_t X[32][64], qmf_t X_hybrid[32][32],

                              uint8_t use34);

 static int8_t delta_clip(int8_t i, int8_t min, int8_t max);

-static void delta_decode(uint8_t enable, int8_t *index, uint8_t *index_prev,

+static void delta_decode(uint8_t enable, int8_t *index, int8_t *index_prev,

                          uint8_t dt_flag, uint8_t nr_par, uint8_t stride,

                          int8_t min_index, int8_t max_index);

+static void delta_modulo_decode(uint8_t enable, int8_t *index, int8_t *index_prev,

+                                uint8_t dt_flag, uint8_t nr_par, uint8_t stride,

+                                int8_t log2modulo);

 static void map20indexto34(int8_t *index, uint8_t bins);

 #ifdef PS_LOW_POWER

 static void map34indexto20(int8_t *index, uint8_t bins);

@@ -598,8 +607,10 @@

         return i;

+//int iid = 0;

 /* delta decode array */

-static void delta_decode(uint8_t enable, int8_t *index, uint8_t *index_prev,

+static void delta_decode(uint8_t enable, int8_t *index, int8_t *index_prev,

                          uint8_t dt_flag, uint8_t nr_par, uint8_t stride,

                          int8_t min_index, int8_t max_index)

@@ -622,8 +633,23 @@

             /* delta coded in time direction */

             for (i = 0; i < nr_par; i++)

+                //int8_t tmp2;

+                //int8_t tmp = index[i];

+                //printf("%d %d\n", index_prev[i*stride], index[i]);

+                //printf("%d\n", index[i]);

                 index[i] = index_prev[i*stride] + index[i];

+                //tmp2 = index[i];

                 index[i] = delta_clip(index[i], min_index, max_index);

+                //if (iid)

+                //{

+                //    if (index[i] == 7)

+                //    {

+                //        printf("%d %d %d\n", index_prev[i*stride], tmp, tmp2);

+                //    }

+                //}

     } else {

@@ -644,6 +670,54 @@

+/* delta modulo decode array */

+/* in: log2 value of the modulo value to allow using AND instead of MOD */

+static void delta_modulo_decode(uint8_t enable, int8_t *index, int8_t *index_prev,

+                                uint8_t dt_flag, uint8_t nr_par, uint8_t stride,

+                                int8_t log2modulo)

+{

+    int8_t i;

+    if (enable == 1)

+    {

+        if (dt_flag == 0)

+        {

+            /* delta coded in frequency direction */

+            index[0] = 0 + index[0];

+            index[0] &= log2modulo;

+            for (i = 1; i < nr_par; i++)

+            {

+                index[i] = index[i-1] + index[i];

+                index[i] &= log2modulo;

+            }

+        } else {

+            /* delta coded in time direction */

+            for (i = 0; i < nr_par; i++)

+            {

+                index[i] = index_prev[i*stride] + index[i];

+                index[i] &= log2modulo;

+            }

+        }

+    } else {

+        /* set indices to zero */

+        for (i = 0; i < nr_par; i++)

+        {

+            index[i] = 0;

+        }

+    }

+    /* coarse */

+    if (stride == 2)

+    {

+        index[0] = 0;

+        for (i = (nr_par<<1)-1; i > 0; i--)

+        {

+            index[i] = index[i>>1];

+        }

+    }

+}

 #ifdef PS_LOW_POWER

 static void map34indexto20(int8_t *index, uint8_t bins)

@@ -751,11 +825,13 @@

             opd_index_prev = ps->opd_index[env - 1];

+//        iid = 1;

         /* delta decode iid parameters */

         delta_decode(ps->enable_iid, ps->iid_index[env], iid_index_prev,

             ps->iid_dt[env], ps->nr_iid_par,

             (ps->iid_mode == 0 || ps->iid_mode == 3) ? 2 : 1,

             -num_iid_steps, num_iid_steps);

+//        iid = 0;

         /* delta decode icc parameters */

         delta_decode(ps->enable_icc, ps->icc_index[env], icc_index_prev,

@@ -763,13 +839,13 @@

             (ps->icc_mode == 0 || ps->icc_mode == 3) ? 2 : 1,

             0, 7);

-        /* delta decode ipd parameters */

-        delta_decode(ps->enable_ipdopd, ps->ipd_index[env], ipd_index_prev,

-            ps->ipd_dt[env], ps->nr_ipdopd_par, 1, -8, 8);

+        /* delta modulo decode ipd parameters */

+        delta_modulo_decode(ps->enable_ipdopd, ps->ipd_index[env], ipd_index_prev,

+            ps->ipd_dt[env], ps->nr_ipdopd_par, 1, /*log2(8)*/ 3);

-        /* delta decode opd parameters */

-        delta_decode(ps->enable_ipdopd, ps->opd_index[env], opd_index_prev,

-            ps->opd_dt[env], ps->nr_ipdopd_par, 1, -8, 8);

+        /* delta modulo decode opd parameters */

+        delta_modulo_decode(ps->enable_ipdopd, ps->opd_index[env], opd_index_prev,

+            ps->opd_dt[env], ps->nr_ipdopd_par, 1, /*log2(8)*/ 3);

     /* handle error case */

@@ -1318,6 +1394,30 @@

 #define ps_sqrt(A) sqrt(A)

 #endif

+static const real_t ipdopd_cos_tab[] = {

+    FRAC_CONST(1.000000000000000),

+    FRAC_CONST(0.707106781186548),

+    FRAC_CONST(0.000000000000000),

+    FRAC_CONST(-0.707106781186547),

+    FRAC_CONST(-1.000000000000000),

+    FRAC_CONST(-0.707106781186548),

+    FRAC_CONST(-0.000000000000000),

+    FRAC_CONST(0.707106781186547),

+    FRAC_CONST(1.000000000000000)

+};

+static const real_t ipdopd_sin_tab[] = {

+    FRAC_CONST(0.000000000000000),

+    FRAC_CONST(0.707106781186547),

+    FRAC_CONST(1.000000000000000),

+    FRAC_CONST(0.707106781186548),

+    FRAC_CONST(0.000000000000000),

+    FRAC_CONST(-0.707106781186547),

+    FRAC_CONST(-1.000000000000000),

+    FRAC_CONST(-0.707106781186548),

+    FRAC_CONST(-0.000000000000000)

+};

 static void ps_mix_phase(ps_info *ps, qmf_t X_left[38][64], qmf_t X_right[38][64],

                          qmf_t X_hybrid_left[32][32], qmf_t X_hybrid_right[32][32])

@@ -1379,6 +1479,8 @@

                 beta = alpha * ( c_1 - c_2 ) / sqrt(2.0);

*/

+                //printf("%d\n", ps->iid_index[env][bk]);

                 /* calculate the scalefactors c_1 and c_2 from the intensity differences */

                 c_1 = sf_iid[no_iid_steps + ps->iid_index[env][bk]];

                 c_2 = sf_iid[no_iid_steps - ps->iid_index[env][bk]];

@@ -1507,10 +1609,10 @@

 #endif

                 /* save current value */

-                RE(ps->ipd_prev[bk][i]) = (float)cos((float)( M_PI/4.0f ) * ps->ipd_index[env][bk]);

-                IM(ps->ipd_prev[bk][i]) = (float)sin((float)( M_PI/4.0f ) * ps->ipd_index[env][bk]);

-                RE(ps->opd_prev[bk][i]) = (float)cos((float)( M_PI/4.0f ) * ps->opd_index[env][bk]);

-                IM(ps->opd_prev[bk][i]) = (float)sin((float)( M_PI/4.0f ) * ps->opd_index[env][bk]);

+                RE(ps->ipd_prev[bk][i]) = ipdopd_cos_tab[abs(ps->ipd_index[env][bk])];

+                IM(ps->ipd_prev[bk][i]) = ipdopd_sin_tab[abs(ps->ipd_index[env][bk])];

+                RE(ps->opd_prev[bk][i]) = ipdopd_cos_tab[abs(ps->opd_index[env][bk])];

+                IM(ps->opd_prev[bk][i]) = ipdopd_sin_tab[abs(ps->opd_index[env][bk])];

                 /* add current value */

                 RE(tempLeft)  += RE(ps->ipd_prev[bk][i]);

@@ -1550,6 +1652,10 @@

                 RE(phaseRight) = (float)cos(opd);

                 IM(phaseRight) = (float)sin(opd);

 #else

+                // x = IM(tempLeft)

+                // y = RE(tempLeft)

+                // p = IM(tempRight)

+                // q = RE(tempRight)

                 // cos(atan2(x,y)) = 1/sqrt(1 + (x*x)/(y*y))

                 // sin(atan2(x,y)) = x/(y*sqrt(1 + (x*x)/(y*y)))

                 // cos(atan2(x,y)-atan2(p,q)) = (y*q+x*p)/(y*q * sqrt(1 + (x*x)/(y*y)) * sqrt(1 + (p*p)/(q*q)));

@@ -1756,6 +1862,7 @@

     for (i = 0; i < NO_ALLPASS_LINKS; i++)

         ps->delay_buf_index_ser[i] = 0;

+#ifdef PARAM_32KHZ

         if (sr_index <= 5) /* >= 32 kHz*/

             ps->num_sample_delay_ser[i] = delay_length_d[1][i];

@@ -1762,8 +1869,13 @@

         } else {

             ps->num_sample_delay_ser[i] = delay_length_d[0][i];

+#else

+        /* THESE ARE CONSTANTS NOW */

+        ps->num_sample_delay_ser[i] = delay_length_d[i];

+#endif

+#ifdef PARAM_32KHZ

     if (sr_index <= 5) /* >= 32 kHz*/

         short_delay_band = 35;

@@ -1776,7 +1888,15 @@

         ps->alpha_decay = FRAC_CONST(0.58664621951003);

         ps->alpha_smooth = FRAC_CONST(0.6);

+#else

+    /* THESE ARE CONSTANTS NOW */

+    short_delay_band = 35;

+    ps->nr_allpass_bands = 22;

+    ps->alpha_decay = FRAC_CONST(0.76592833836465);

+    ps->alpha_smooth = FRAC_CONST(0.25);

+#endif

+    /* THESE ARE CONSTANT NOW IF PS IS INDEPENDANT OF SAMPLERATE */

     for (i = 0; i < short_delay_band; i++)

         ps->delay_D[i] = 14;

--- a/libfaad/ps_syntax.c

+++ b/libfaad/ps_syntax.c

@@ -22,7 +22,7 @@

 ** Commercial non-GPL licensing of this software is possible.

 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.

**

-** $Id: ps_syntax.c,v 1.2 2004/05/17 10:18:03 menno Exp $

+** $Id: ps_syntax.c,v 1.4 2004/07/31 15:48:56 menno Exp $

**/

 #include "common.h"

@@ -94,7 +94,7 @@

     { /*4*/ -27, 9 },             /* index 8: 9 bits: 11111111x */

     { /*-5*/ -36, 10 },           /* index 9: 10 bits: 111111111x */

     { /*5*/ -26, 11 },            /* index 10: 11 bits: 1111111111x */

-    { /*-6*/ -27, 12 },           /* index 11: 12 bits: 11111111111x */

+    { /*-6*/ -37, 12 },           /* index 11: 12 bits: 11111111111x */

     { /*6*/ -25, 13 },            /* index 12: 13 bits: 111111111111x */

     { /*7*/ -24, 14 },            /* index 13: 14 bits: 1111111111111x */

     { /*-7*/ -38, 15 },           /* index 14: 15 bits: 11111111111111x */

--- a/libfaad/sbr_dct.c

+++ b/libfaad/sbr_dct.c

@@ -22,7 +22,7 @@

 ** Commercial non-GPL licensing of this software is possible.

 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.

**

-** $Id: sbr_dct.c,v 1.12 2004/03/10 19:45:41 menno Exp $

+** $Id: sbr_dct.c,v 1.15 2004/09/04 14:56:28 menno Exp $

**/

 #include "common.h"

@@ -1449,933 +1449,414 @@

 #else

-void DCT4_64(real_t *y, real_t *x)

+#define n 32

+#define log2n 5

+// w_array_real[i] = cos(2*M_PI*i/32)

+static const real_t w_array_real[] = {

+    FRAC_CONST(1.000000000000000), FRAC_CONST(0.980785279337272),

+    FRAC_CONST(0.923879528329380), FRAC_CONST(0.831469603195765),

+    FRAC_CONST(0.707106765732237), FRAC_CONST(0.555570210304169),

+    FRAC_CONST(0.382683402077046), FRAC_CONST(0.195090284503576),

+    FRAC_CONST(0.000000000000000), FRAC_CONST(-0.195090370246552),

+    FRAC_CONST(-0.382683482845162), FRAC_CONST(-0.555570282993553),

+    FRAC_CONST(-0.707106827549476), FRAC_CONST(-0.831469651765257),

+    FRAC_CONST(-0.923879561784627), FRAC_CONST(-0.980785296392607)

+};

+// w_array_imag[i] = sin(-2*M_PI*i/32)

+static const real_t w_array_imag[] = {

+    FRAC_CONST(0.000000000000000), FRAC_CONST(-0.195090327375064),

+    FRAC_CONST(-0.382683442461104), FRAC_CONST(-0.555570246648862),

+    FRAC_CONST(-0.707106796640858), FRAC_CONST(-0.831469627480512),

+    FRAC_CONST(-0.923879545057005), FRAC_CONST(-0.980785287864940),

+    FRAC_CONST(-1.000000000000000), FRAC_CONST(-0.980785270809601),

+    FRAC_CONST(-0.923879511601754), FRAC_CONST(-0.831469578911016),

+    FRAC_CONST(-0.707106734823616), FRAC_CONST(-0.555570173959476),

+    FRAC_CONST(-0.382683361692986), FRAC_CONST(-0.195090241632088)

+};

+// FFT decimation in frequency

+// 4*16*2+16=128+16=144 multiplications

+// 6*16*2+10*8+4*16*2=192+80+128=400 additions

+static void fft_dif(real_t * Real, real_t * Imag)

-    int16_t i0;

-    ALIGN static real_t t2[64];

+    real_t w_real, w_imag; // For faster access

+    real_t point1_real, point1_imag, point2_real, point2_imag; // For faster access

+    uint32_t j, i, i2, w_index; // Counters

-    t2[0] = x[0];

-    for (i0=0; i0<31; i0++)

+    // First 2 stages of 32 point FFT decimation in frequency

+    // 4*16*2=64*2=128 multiplications

+    // 6*16*2=96*2=192 additions

+	// Stage 1 of 32 point FFT decimation in frequency

+    for (i = 0; i < 16; i++)

-        t2[2*i0+1] = x[2*i0+1] - x[2*i0+2];

-        t2[2*i0+2] = x[2*i0+1] + x[2*i0+2];

+        point1_real = Real[i];

+        point1_imag = Imag[i];

+        i2 = i+16;

+        point2_real = Real[i2];

+        point2_imag = Imag[i2];

+        w_real = w_array_real[i];

+        w_imag = w_array_imag[i];

+        // temp1 = x[i] - x[i2]

+        point1_real -= point2_real;

+        point1_imag -= point2_imag;

+        // x[i1] = x[i] + x[i2]

+        Real[i] += point2_real;

+        Imag[i] += point2_imag;

+        // x[i2] = (x[i] - x[i2]) * w

+        Real[i2] = (MUL_F(point1_real,w_real) - MUL_F(point1_imag,w_imag));

+        Imag[i2] = (MUL_F(point1_real,w_imag) + MUL_F(point1_imag,w_real));

+     }

+    // Stage 2 of 32 point FFT decimation in frequency

+    for (j = 0, w_index = 0; j < 8; j++, w_index += 2)

+    {

+        w_real = w_array_real[w_index];

+        w_imag = w_array_imag[w_index];

+    	i = j;

+        point1_real = Real[i];

+        point1_imag = Imag[i];

+        i2 = i+8;

+        point2_real = Real[i2];

+        point2_imag = Imag[i2];

+        // temp1 = x[i] - x[i2]

+        point1_real -= point2_real;

+        point1_imag -= point2_imag;

+        // x[i1] = x[i] + x[i2]

+        Real[i] += point2_real;

+        Imag[i] += point2_imag;

+        // x[i2] = (x[i] - x[i2]) * w

+        Real[i2] = (MUL_F(point1_real,w_real) - MUL_F(point1_imag,w_imag));

+        Imag[i2] = (MUL_F(point1_real,w_imag) + MUL_F(point1_imag,w_real));

+        i = j+16;

+        point1_real = Real[i];

+        point1_imag = Imag[i];

+        i2 = i+8;

+        point2_real = Real[i2];

+        point2_imag = Imag[i2];

+        // temp1 = x[i] - x[i2]

+        point1_real -= point2_real;

+        point1_imag -= point2_imag;

+        // x[i1] = x[i] + x[i2]

+        Real[i] += point2_real;

+        Imag[i] += point2_imag;

+        // x[i2] = (x[i] - x[i2]) * w

+        Real[i2] = (MUL_F(point1_real,w_real) - MUL_F(point1_imag,w_imag));

+        Imag[i2] = (MUL_F(point1_real,w_imag) + MUL_F(point1_imag,w_real));

-    t2[63] = x[63];

-    DCT4_64_kernel(y, t2);

-}

+    // Stage 3 of 32 point FFT decimation in frequency

+    // 2*4*2=16 multiplications

+    // 4*4*2+6*4*2=10*8=80 additions

+    for (i = 0; i < n; i += 8)

+    {

+        i2 = i+4;

+        point1_real = Real[i];

+        point1_imag = Imag[i];

-void DCT4_64_kernel(real_t *y, real_t *t2)

-{

-    real_t f2, f3, f4, f5, f6, f7, f8;

-    real_t f9, f10, f11, f12, f13, f14, f15;

-    real_t f16, f17, f18, f19, f20, f21, f22;

-    real_t f23, f24, f25, f26, f27, f28, f29;

-    real_t f30, f31, f32, f33, f34, f35, f36;

-    real_t f37, f38, f39, f40, f41, f42, f43;

-    real_t f44, f45, f46, f47, f48, f49, f50;

-    real_t f51, f52, f53, f54, f55, f56, f57;

-    real_t f58, f59, f60, f61, f62, f63, f64;

-    real_t f65, f66, f67, f68, f69, f70, f71;

-    real_t f72, f73, f74, f75, f76, f77, f78;

-    real_t f79, f80, f81, f82, f83, f84, f85;

-    real_t f86, f87, f88, f89, f90, f91, f92;

-    real_t f93, f94, f95, f96, f97, f98, f99;

-    real_t f100, f101, f102, f103, f104, f105, f106;

-    real_t f107, f108, f109, f110, f111, f112, f113;

-    real_t f114, f115, f116, f117, f118, f119, f120;

-    real_t f121, f122, f123, f124, f125, f126, f127;

-    real_t f128, f129, f130, f131, f132, f133, f134;

-    real_t f135, f136, f137, f138, f139, f140, f141;

-    real_t f142, f143, f144, f145, f146, f147, f148;

-    real_t f149, f150, f151, f152, f153, f154, f155;

-    real_t f156, f157, f158, f159, f160, f161, f162;

-    real_t f163, f164, f165, f166, f167, f168, f169;

-    real_t f170, f171, f172, f173, f174, f175, f176;

-    real_t f177, f178, f179, f180, f181, f182, f183;

-    real_t f184, f185, f186, f187, f188, f189, f190;

-    real_t f191, f192, f193, f194, f195, f196, f197;

-    real_t f198, f199, f200, f201, f202, f203, f204;

-    real_t f205, f206, f207, f208, f209, f210, f211;

-    real_t f212, f213, f214, f215, f216, f217, f218;

-    real_t f219, f220, f221, f222, f223, f224, f225;

-    real_t f226, f227, f228, f229, f230, f231, f232;

-    real_t f233, f234, f235, f236, f237, f238, f239;

-    real_t f240, f241, f242, f243, f244, f245, f246;

-    real_t f247, f248, f249, f250, f251, f252, f253;

-    real_t f254, f255, f256, f257, f258, f259, f260;

-    real_t f261, f262, f263, f264, f265, f266, f267;

-    real_t f268, f269, f270, f271, f272, f273, f274;

-    real_t f275, f276, f277, f278, f279, f280, f281;

-    real_t f282, f283, f284, f285, f286, f287, f288;

-    real_t f289, f290, f291, f292, f293, f294, f295;

-    real_t f296, f297, f298, f299, f300, f301, f302;

-    real_t f303, f304, f305, f306, f307, f308, f309;

-    real_t f310, f311, f312, f313, f314, f315, f316;

-    real_t f317, f318, f319, f320, f321, f322, f323;

-    real_t f324, f325, f326, f327, f328, f329, f330;

-    real_t f331, f332, f333, f334, f335, f336, f337;

-    real_t f338, f339, f340, f341, f342, f343, f344;

-    real_t f345, f346, f347, f348, f349, f350, f351;

-    real_t f352, f353, f354, f355, f356, f357, f358;

-    real_t f359, f360, f361, f362, f363, f364, f365;

-    real_t f366, f367, f368, f369, f370, f371, f372;

-    real_t f373, f374, f375, f376, f377, f378, f379;

-    real_t f380, f381, f382, f383, f384, f385, f386;

-    real_t f387, f388, f389, f390, f391, f392, f393;

-    real_t f394, f395, f396, f397, f398, f399, f400;

-    real_t f401, f402, f403, f404, f405, f406, f407;

-    real_t f408, f409, f410, f411, f412, f413, f414;

-    real_t f415, f416, f417, f418, f419, f420, f421;

-    real_t f422, f423, f424, f425, f426, f427, f428;

-    real_t f429, f430, f431, f432, f433, f434, f435;

-    real_t f436, f437, f438, f439, f440, f441, f442;

-    real_t f443, f444, f445, f446, f447, f448, f449;

-    real_t f450, f451, f452, f453, f454, f455, f456;

-    real_t f457, f458, f459, f460, f461, f462, f463;

-    real_t f464, f465, f466, f467, f468, f469, f470;

-    real_t f471, f472, f473, f474, f475, f476, f477;

-    real_t f478, f479, f480, f481, f482, f483, f484;

-    real_t f485, f486, f487, f488, f489, f490, f491;

-    real_t f492, f493, f494, f495, f496, f497, f498;

-    real_t f499, f500, f501, f502, f503, f504, f505;

-    real_t f506, f507, f508, f509, f510, f511, f512;

-    real_t f513, f514, f515, f516, f517, f518, f519;

-    real_t f520, f521, f522, f523, f524, f525, f526;

-    real_t f527, f528, f529, f530, f531, f532, f533;

-    real_t f534, f535, f536, f537, f538, f539, f540;

-    real_t f541, f542, f543, f544, f545, f546, f547;

-    real_t f548, f549, f550, f551, f552, f553, f554;

-    real_t f555, f556, f557, f558, f559, f560, f561;

-    real_t f562, f563, f564, f565, f566, f567, f568;

-    real_t f569, f570, f571, f572, f573, f574, f575;

-    real_t f576, f577, f578, f579, f580, f581, f582;

-    real_t f583, f584, f585, f586, f587, f588, f589;

-    real_t f590, f591, f592, f593, f594, f595, f596;

-    real_t f597, f598, f599, f600, f601, f602, f603;

-    real_t f604, f605, f606, f607, f608, f609, f610;

-    real_t f611, f612, f613, f614, f615, f618, f619;

-    real_t f620, f621, f624, f625, f626, f627, f630;

-    real_t f631, f632, f633, f636, f637, f638, f639;

-    real_t f642, f643, f644, f645, f648, f649, f650;

-    real_t f651, f654, f655, f656, f657, f660, f661;

-    real_t f662, f663, f666, f667, f668, f669, f672;

-    real_t f673, f674, f675, f678, f679, f680, f681;

-    real_t f684, f685, f686, f687, f690, f691, f692;

-    real_t f693, f696, f697, f698, f699, f702, f703;

-    real_t f704, f705, f708, f709, f710, f711, f714;

-    real_t f715, f716, f717, f720, f721, f722, f723;

-    real_t f726, f727, f728, f729, f732, f733, f734;

-    real_t f735, f738, f739, f740, f741, f744, f745;

-    real_t f746, f747, f750, f751, f752, f753, f756;

-    real_t f757, f758, f759, f762, f763, f764, f765;

-    real_t f768, f769, f770, f771, f774, f775, f776;

-    real_t f777, f780, f781, f782, f783, f786, f787;

-    real_t f788, f789, f792, f793, f794, f795, f798;

-    real_t f799, f800, f801;

+        point2_real = Real[i2];

+        point2_imag = Imag[i2];

-    f2 = MUL_F(FRAC_CONST(0.7071067811865476), t2[32]);

-    f3 = t2[0] - f2;

-    f4 = t2[0] + f2;

-    f5 = t2[16] + t2[48];

-    f6 = MUL_C(COEF_CONST(1.3065629648763766), t2[16]);

-    f7 = MUL_F(FRAC_CONST(-0.9238795325112866), f5);

-    f8 = MUL_F(FRAC_CONST(-0.5411961001461967), t2[48]);

-    f9 = f6 + f7;

-    f10 = f8 - f7;

-    f11 = f4 - f10;

-    f12 = f4 + f10;

-    f13 = f3 - f9;

-    f14 = f3 + f9;

-    f15 = t2[8] + t2[56];

-    f16 = MUL_C(COEF_CONST(1.1758756024193588), t2[8]);

-    f17 = MUL_F(FRAC_CONST(-0.9807852804032304), f15);

-    f18 = MUL_F(FRAC_CONST(-0.7856949583871021), t2[56]);

-    f19 = f16 + f17;

-    f20 = f18 - f17;

-    f21 = t2[24] + t2[40];

-    f22 = MUL_C(COEF_CONST(1.3870398453221473), t2[24]);

-    f23 = MUL_F(FRAC_CONST(-0.8314696123025455), f21);

-    f24 = MUL_F(FRAC_CONST(-0.2758993792829436), t2[40]);

-    f25 = f22 + f23;

-    f26 = f24 - f23;

-    f27 = f20 - f26;

-    f28 = f20 + f26;

-    f29 = MUL_F(FRAC_CONST(0.7071067811865476), f27);

-    f30 = f19 - f25;

-    f31 = f19 + f25;

-    f32 = MUL_F(FRAC_CONST(0.7071067811865476), f31);

-    f33 = f29 - f32;

-    f34 = f29 + f32;

-    f35 = f12 - f28;

-    f36 = f12 + f28;

-    f37 = f14 - f34;

-    f38 = f14 + f34;

-    f39 = f13 - f33;

-    f40 = f13 + f33;

-    f41 = f11 - f30;

-    f42 = f11 + f30;

-    f43 = t2[4] + t2[60];

-    f44 = MUL_C(COEF_CONST(1.0932018670017569), t2[4]);

-    f45 = MUL_F(FRAC_CONST(-0.9951847266721969), f43);

-    f46 = MUL_F(FRAC_CONST(-0.8971675863426368), t2[60]);

-    f47 = f44 + f45;

-    f48 = f46 - f45;

-    f49 = t2[12] + t2[52];

-    f50 = MUL_C(COEF_CONST(1.2472250129866711), t2[12]);

-    f51 = MUL_F(FRAC_CONST(-0.9569403357322089), f49);

-    f52 = MUL_F(FRAC_CONST(-0.6666556584777469), t2[52]);

-    f53 = f50 + f51;

-    f54 = f52 - f51;

-    f55 = t2[20] + t2[44];

-    f56 = MUL_C(COEF_CONST(1.3533180011743526), t2[20]);

-    f57 = MUL_F(FRAC_CONST(-0.8819212643483551), f55);

-    f58 = MUL_F(FRAC_CONST(-0.4105245275223575), t2[44]);

-    f59 = f56 + f57;

-    f60 = f58 - f57;

-    f61 = t2[28] + t2[36];

-    f62 = MUL_C(COEF_CONST(1.4074037375263826), t2[28]);

-    f63 = MUL_F(FRAC_CONST(-0.7730104533627369), f61);

-    f64 = MUL_F(FRAC_CONST(-0.1386171691990913), t2[36]);

-    f65 = f62 + f63;

-    f66 = f64 - f63;

-    f67 = f48 - f66;

-    f68 = f48 + f66;

-    f69 = f54 - f60;

-    f70 = f54 + f60;

-    f71 = f68 - f70;

-    f72 = f68 + f70;

-    f73 = MUL_F(FRAC_CONST(0.7071067811865476), f71);

-    f74 = f67 + f69;

-    f75 = MUL_C(COEF_CONST(1.3065629648763766), f67);

-    f76 = MUL_F(FRAC_CONST(-0.9238795325112866), f74);

-    f77 = MUL_F(FRAC_CONST(-0.5411961001461967), f69);

-    f78 = f75 + f76;

-    f79 = f77 - f76;

-    f80 = f47 - f65;

-    f81 = f47 + f65;

-    f82 = f53 - f59;

-    f83 = f53 + f59;

-    f84 = f81 + f83;

-    f85 = MUL_C(COEF_CONST(1.3065629648763770), f81);

-    f86 = MUL_F(FRAC_CONST(-0.3826834323650904), f84);

-    f87 = MUL_F(FRAC_CONST(0.5411961001461961), f83);

-    f88 = f85 + f86;

-    f89 = f87 - f86;

-    f90 = f80 - f82;

-    f91 = f80 + f82;

-    f92 = MUL_F(FRAC_CONST(0.7071067811865476), f91);

-    f93 = f79 - f89;

-    f94 = f79 + f89;

-    f95 = f73 - f92;

-    f96 = f73 + f92;

-    f97 = f78 - f88;

-    f98 = f78 + f88;

-    f99 = f36 - f72;

-    f100 = f36 + f72;

-    f101 = f38 - f94;

-    f102 = f38 + f94;

-    f103 = f40 - f93;

-    f104 = f40 + f93;

-    f105 = f42 - f96;

-    f106 = f42 + f96;

-    f107 = f41 - f95;

-    f108 = f41 + f95;

-    f109 = f39 - f98;

-    f110 = f39 + f98;

-    f111 = f37 - f97;

-    f112 = f37 + f97;

-    f113 = f35 - f90;

-    f114 = f35 + f90;

-    f115 = t2[2] + t2[62];

-    f116 = MUL_C(COEF_CONST(1.0478631305325901), t2[2]);

-    f117 = MUL_F(FRAC_CONST(-0.9987954562051724), f115);

-    f118 = MUL_F(FRAC_CONST(-0.9497277818777548), t2[62]);

-    f119 = f116 + f117;

-    f120 = f118 - f117;

-    f121 = t2[10] + t2[54];

-    f122 = MUL_C(COEF_CONST(1.2130114330978077), t2[10]);

-    f123 = MUL_F(FRAC_CONST(-0.9700312531945440), f121);

-    f124 = MUL_F(FRAC_CONST(-0.7270510732912803), t2[54]);

-    f125 = f122 + f123;

-    f126 = f124 - f123;

-    f127 = t2[18] + t2[46];

-    f128 = MUL_C(COEF_CONST(1.3315443865537255), t2[18]);

-    f129 = MUL_F(FRAC_CONST(-0.9039892931234433), f127);

-    f130 = MUL_F(FRAC_CONST(-0.4764341996931612), t2[46]);

-    f131 = f128 + f129;

-    f132 = f130 - f129;

-    f133 = t2[26] + t2[38];

-    f134 = MUL_C(COEF_CONST(1.3989068359730781), t2[26]);

-    f135 = MUL_F(FRAC_CONST(-0.8032075314806453), f133);

-    f136 = MUL_F(FRAC_CONST(-0.2075082269882124), t2[38]);

-    f137 = f134 + f135;

-    f138 = f136 - f135;

-    f139 = t2[34] + t2[30];

-    f140 = MUL_C(COEF_CONST(1.4125100802019777), t2[34]);

-    f141 = MUL_F(FRAC_CONST(-0.6715589548470187), f139);

-    f142 = MUL_F(FRAC_CONST(0.0693921705079402), t2[30]);

-    f143 = f140 + f141;

-    f144 = f142 - f141;

-    f145 = t2[42] + t2[22];

-    f146 = MUL_C(COEF_CONST(1.3718313541934939), t2[42]);

-    f147 = MUL_F(FRAC_CONST(-0.5141027441932219), f145);

-    f148 = MUL_F(FRAC_CONST(0.3436258658070501), t2[22]);

-    f149 = f146 + f147;

-    f150 = f148 - f147;

-    f151 = t2[50] + t2[14];

-    f152 = MUL_C(COEF_CONST(1.2784339185752409), t2[50]);

-    f153 = MUL_F(FRAC_CONST(-0.3368898533922200), f151);

-    f154 = MUL_F(FRAC_CONST(0.6046542117908008), t2[14]);

-    f155 = f152 + f153;

-    f156 = f154 - f153;

-    f157 = t2[58] + t2[6];

-    f158 = MUL_C(COEF_CONST(1.1359069844201433), t2[58]);

-    f159 = MUL_F(FRAC_CONST(-0.1467304744553624), f157);

-    f160 = MUL_F(FRAC_CONST(0.8424460355094185), t2[6]);

-    f161 = f158 + f159;

-    f162 = f160 - f159;

-    f163 = f120 - f144;

-    f164 = f120 + f144;

-    f165 = f119 - f143;

-    f166 = f119 + f143;

-    f167 = f126 - f150;

-    f168 = f126 + f150;

-    f169 = f125 - f149;

-    f170 = f125 + f149;

-    f171 = f132 - f156;

-    f172 = f132 + f156;

-    f173 = f131 - f155;

-    f174 = f131 + f155;

-    f175 = f138 - f162;

-    f176 = f138 + f162;

-    f177 = f137 - f161;

-    f178 = f137 + f161;

-    f179 = f163 + f165;

-    f180 = MUL_C(COEF_CONST(1.1758756024193588), f163);

-    f181 = MUL_F(FRAC_CONST(-0.9807852804032304), f179);

-    f182 = MUL_F(FRAC_CONST(-0.7856949583871021), f165);

-    f183 = f180 + f181;

-    f184 = f182 - f181;

-    f185 = f167 + f169;

-    f186 = MUL_C(COEF_CONST(1.3870398453221475), f167);

-    f187 = MUL_F(FRAC_CONST(-0.5555702330196022), f185);

-    f188 = MUL_F(FRAC_CONST(0.2758993792829431), f169);

-    f189 = f186 + f187;

-    f190 = f188 - f187;

-    f191 = f171 + f173;

-    f192 = MUL_F(FRAC_CONST(0.7856949583871022), f171);

-    f193 = MUL_F(FRAC_CONST(0.1950903220161283), f191);

-    f194 = MUL_C(COEF_CONST(1.1758756024193586), f173);

-    f195 = f192 + f193;

-    f196 = f194 - f193;

-    f197 = f175 + f177;

-    f198 = MUL_F(FRAC_CONST(-0.2758993792829430), f175);

-    f199 = MUL_F(FRAC_CONST(0.8314696123025452), f197);

-    f200 = MUL_C(COEF_CONST(1.3870398453221475), f177);

-    f201 = f198 + f199;

-    f202 = f200 - f199;

-    f203 = f164 - f172;

-    f204 = f164 + f172;

-    f205 = f166 - f174;

-    f206 = f166 + f174;

-    f207 = f168 - f176;

-    f208 = f168 + f176;

-    f209 = f170 - f178;

-    f210 = f170 + f178;

-    f211 = f184 - f196;

-    f212 = f184 + f196;

-    f213 = f183 - f195;

-    f214 = f183 + f195;

-    f215 = f190 - f202;

-    f216 = f190 + f202;

-    f217 = f189 - f201;

-    f218 = f189 + f201;

-    f219 = f203 + f205;

-    f220 = MUL_C(COEF_CONST(1.3065629648763766), f203);

-    f221 = MUL_F(FRAC_CONST(-0.9238795325112866), f219);

-    f222 = MUL_F(FRAC_CONST(-0.5411961001461967), f205);

-    f223 = f220 + f221;

-    f224 = f222 - f221;

-    f225 = f207 + f209;

-    f226 = MUL_F(FRAC_CONST(0.5411961001461969), f207);

-    f227 = MUL_F(FRAC_CONST(0.3826834323650898), f225);

-    f228 = MUL_C(COEF_CONST(1.3065629648763766), f209);

-    f229 = f226 + f227;

-    f230 = f228 - f227;

-    f231 = f211 + f213;

-    f232 = MUL_C(COEF_CONST(1.3065629648763766), f211);

-    f233 = MUL_F(FRAC_CONST(-0.9238795325112866), f231);

-    f234 = MUL_F(FRAC_CONST(-0.5411961001461967), f213);

-    f235 = f232 + f233;

-    f236 = f234 - f233;

-    f237 = f215 + f217;

-    f238 = MUL_F(FRAC_CONST(0.5411961001461969), f215);

-    f239 = MUL_F(FRAC_CONST(0.3826834323650898), f237);

-    f240 = MUL_C(COEF_CONST(1.3065629648763766), f217);

-    f241 = f238 + f239;

-    f242 = f240 - f239;

-    f243 = f204 - f208;

-    f244 = f204 + f208;

-    f245 = f206 - f210;

-    f246 = f206 + f210;

-    f247 = f224 - f230;

-    f248 = f224 + f230;

-    f249 = f223 - f229;

-    f250 = f223 + f229;

-    f251 = f212 - f216;

-    f252 = f212 + f216;

-    f253 = f214 - f218;

-    f254 = f214 + f218;

-    f255 = f236 - f242;

-    f256 = f236 + f242;

-    f257 = f235 - f241;

-    f258 = f235 + f241;

-    f259 = f243 - f245;

-    f260 = f243 + f245;

-    f261 = MUL_F(FRAC_CONST(0.7071067811865474), f259);

-    f262 = MUL_F(FRAC_CONST(0.7071067811865474), f260);

-    f263 = f247 - f249;

-    f264 = f247 + f249;

-    f265 = MUL_F(FRAC_CONST(0.7071067811865474), f263);

-    f266 = MUL_F(FRAC_CONST(0.7071067811865474), f264);

-    f267 = f251 - f253;

-    f268 = f251 + f253;

-    f269 = MUL_F(FRAC_CONST(0.7071067811865474), f267);

-    f270 = MUL_F(FRAC_CONST(0.7071067811865474), f268);

-    f271 = f255 - f257;

-    f272 = f255 + f257;

-    f273 = MUL_F(FRAC_CONST(0.7071067811865474), f271);

-    f274 = MUL_F(FRAC_CONST(0.7071067811865474), f272);

-    f275 = f100 - f244;

-    f276 = f100 + f244;

-    f277 = f102 - f252;

-    f278 = f102 + f252;

-    f279 = f104 - f256;

-    f280 = f104 + f256;

-    f281 = f106 - f248;

-    f282 = f106 + f248;

-    f283 = f108 - f266;

-    f284 = f108 + f266;

-    f285 = f110 - f274;

-    f286 = f110 + f274;

-    f287 = f112 - f270;

-    f288 = f112 + f270;

-    f289 = f114 - f262;

-    f290 = f114 + f262;

-    f291 = f113 - f261;

-    f292 = f113 + f261;

-    f293 = f111 - f269;

-    f294 = f111 + f269;

-    f295 = f109 - f273;

-    f296 = f109 + f273;

-    f297 = f107 - f265;

-    f298 = f107 + f265;

-    f299 = f105 - f250;

-    f300 = f105 + f250;

-    f301 = f103 - f258;

-    f302 = f103 + f258;

-    f303 = f101 - f254;

-    f304 = f101 + f254;

-    f305 = f99 - f246;

-    f306 = f99 + f246;

-    f307 = t2[1] - t2[61];

-    f308 = MUL_C(COEF_CONST(1.0478631305325901), t2[1]);

-    f309 = MUL_F(FRAC_CONST(-0.9987954562051724), f307);

-    f310 = MUL_F(FRAC_CONST(-0.9497277818777548), t2[61]);

-    f311 = f308 + f309;

-    f312 = f309 + f310;

-    f313 = t2[9] - t2[53];

-    f314 = MUL_C(COEF_CONST(1.2130114330978077), t2[9]);

-    f315 = MUL_F(FRAC_CONST(-0.9700312531945440), f313);

-    f316 = MUL_F(FRAC_CONST(-0.7270510732912803), t2[53]);

-    f317 = f314 + f315;

-    f318 = f315 + f316;

-    f319 = t2[17] - t2[45];

-    f320 = MUL_C(COEF_CONST(1.3315443865537255), t2[17]);

-    f321 = MUL_F(FRAC_CONST(-0.9039892931234433), f319);

-    f322 = MUL_F(FRAC_CONST(-0.4764341996931612), t2[45]);

-    f323 = f320 + f321;

-    f324 = f321 + f322;

-    f325 = t2[25] - t2[37];

-    f326 = MUL_C(COEF_CONST(1.3989068359730781), t2[25]);

-    f327 = MUL_F(FRAC_CONST(-0.8032075314806453), f325);

-    f328 = MUL_F(FRAC_CONST(-0.2075082269882124), t2[37]);

-    f329 = f326 + f327;

-    f330 = f327 + f328;

-    f331 = t2[33] - t2[29];

-    f332 = MUL_C(COEF_CONST(1.4125100802019777), t2[33]);

-    f333 = MUL_F(FRAC_CONST(-0.6715589548470187), f331);

-    f334 = MUL_F(FRAC_CONST(0.0693921705079402), t2[29]);

-    f335 = f332 + f333;

-    f336 = f333 + f334;

-    f337 = t2[41] - t2[21];

-    f338 = MUL_C(COEF_CONST(1.3718313541934939), t2[41]);

-    f339 = MUL_F(FRAC_CONST(-0.5141027441932219), f337);

-    f340 = MUL_F(FRAC_CONST(0.3436258658070501), t2[21]);

-    f341 = f338 + f339;

-    f342 = f339 + f340;

-    f343 = t2[49] - t2[13];

-    f344 = MUL_C(COEF_CONST(1.2784339185752409), t2[49]);

-    f345 = MUL_F(FRAC_CONST(-0.3368898533922200), f343);

-    f346 = MUL_F(FRAC_CONST(0.6046542117908008), t2[13]);

-    f347 = f344 + f345;

-    f348 = f345 + f346;

-    f349 = t2[57] - t2[5];

-    f350 = MUL_C(COEF_CONST(1.1359069844201433), t2[57]);

-    f351 = MUL_F(FRAC_CONST(-0.1467304744553624), f349);

-    f352 = MUL_F(FRAC_CONST(0.8424460355094185), t2[5]);

-    f353 = f350 + f351;

-    f354 = f351 + f352;

-    f355 = f336 - f312;

-    f356 = f312 + f336;

-    f357 = f311 - f335;

-    f358 = f311 + f335;

-    f359 = f342 - f318;

-    f360 = f318 + f342;

-    f361 = f317 - f341;

-    f362 = f317 + f341;

-    f363 = f348 - f324;

-    f364 = f324 + f348;

-    f365 = f323 - f347;

-    f366 = f323 + f347;

-    f367 = f354 - f330;

-    f368 = f330 + f354;

-    f369 = f329 - f353;

-    f370 = f329 + f353;

-    f371 = f355 + f357;

-    f372 = MUL_C(COEF_CONST(1.1758756024193588), f355);

-    f373 = MUL_F(FRAC_CONST(-0.9807852804032304), f371);

-    f374 = MUL_F(FRAC_CONST(-0.7856949583871021), f357);

-    f375 = f372 + f373;

-    f376 = f374 - f373;

-    f377 = f359 + f361;

-    f378 = MUL_C(COEF_CONST(1.3870398453221475), f359);

-    f379 = MUL_F(FRAC_CONST(-0.5555702330196022), f377);

-    f380 = MUL_F(FRAC_CONST(0.2758993792829431), f361);

-    f381 = f378 + f379;

-    f382 = f380 - f379;

-    f383 = f363 + f365;

-    f384 = MUL_F(FRAC_CONST(0.7856949583871022), f363);

-    f385 = MUL_F(FRAC_CONST(0.1950903220161283), f383);

-    f386 = MUL_C(COEF_CONST(1.1758756024193586), f365);

-    f387 = f384 + f385;

-    f388 = f386 - f385;

-    f389 = f367 + f369;

-    f390 = MUL_F(FRAC_CONST(-0.2758993792829430), f367);

-    f391 = MUL_F(FRAC_CONST(0.8314696123025452), f389);

-    f392 = MUL_C(COEF_CONST(1.3870398453221475), f369);

-    f393 = f390 + f391;

-    f394 = f392 - f391;

-    f395 = f364 - f356;

-    f396 = f356 + f364;

-    f397 = f358 - f366;

-    f398 = f358 + f366;

-    f399 = f368 - f360;

-    f400 = f360 + f368;

-    f401 = f362 - f370;

-    f402 = f362 + f370;

-    f403 = f376 - f388;

-    f404 = f376 + f388;

-    f405 = f375 - f387;

-    f406 = f375 + f387;

-    f407 = f382 - f394;

-    f408 = f382 + f394;

-    f409 = f381 - f393;

-    f410 = f381 + f393;

-    f411 = f395 + f397;

-    f412 = MUL_C(COEF_CONST(1.3065629648763766), f395);

-    f413 = MUL_F(FRAC_CONST(-0.9238795325112866), f411);

-    f414 = MUL_F(FRAC_CONST(-0.5411961001461967), f397);

-    f415 = f412 + f413;

-    f416 = f414 - f413;

-    f417 = f399 + f401;

-    f418 = MUL_F(FRAC_CONST(0.5411961001461969), f399);

-    f419 = MUL_F(FRAC_CONST(0.3826834323650898), f417);

-    f420 = MUL_C(COEF_CONST(1.3065629648763766), f401);

-    f421 = f418 + f419;

-    f422 = f420 - f419;

-    f423 = f403 + f405;

-    f424 = MUL_C(COEF_CONST(1.3065629648763766), f403);

-    f425 = MUL_F(FRAC_CONST(-0.9238795325112866), f423);

-    f426 = MUL_F(FRAC_CONST(-0.5411961001461967), f405);

-    f427 = f424 + f425;

-    f428 = f426 - f425;

-    f429 = f407 + f409;

-    f430 = MUL_F(FRAC_CONST(0.5411961001461969), f407);

-    f431 = MUL_F(FRAC_CONST(0.3826834323650898), f429);

-    f432 = MUL_C(COEF_CONST(1.3065629648763766), f409);

-    f433 = f430 + f431;

-    f434 = f432 - f431;

-    f435 = f400 - f396;

-    f436 = f396 + f400;

-    f437 = f398 - f402;

-    f438 = f398 + f402;

-    f439 = f416 - f422;

-    f440 = f416 + f422;

-    f441 = f415 - f421;

-    f442 = f415 + f421;

-    f443 = f404 - f408;

-    f444 = f404 + f408;

-    f445 = f406 - f410;

-    f446 = f406 + f410;

-    f447 = f428 - f434;

-    f448 = f428 + f434;

-    f449 = f427 - f433;

-    f450 = f427 + f433;

-    f451 = f435 - f437;

-    f452 = f435 + f437;

-    f453 = MUL_F(FRAC_CONST(0.7071067811865474), f451);

-    f454 = MUL_F(FRAC_CONST(0.7071067811865474), f452);

-    f455 = f439 - f441;

-    f456 = f439 + f441;

-    f457 = MUL_F(FRAC_CONST(0.7071067811865474), f455);

-    f458 = MUL_F(FRAC_CONST(0.7071067811865474), f456);

-    f459 = f443 - f445;

-    f460 = f443 + f445;

-    f461 = MUL_F(FRAC_CONST(0.7071067811865474), f459);

-    f462 = MUL_F(FRAC_CONST(0.7071067811865474), f460);

-    f463 = f447 - f449;

-    f464 = f447 + f449;

-    f465 = MUL_F(FRAC_CONST(0.7071067811865474), f463);

-    f466 = MUL_F(FRAC_CONST(0.7071067811865474), f464);

-    f467 = MUL_F(FRAC_CONST(0.7071067811865476), t2[31]);

-    f468 = t2[63] - f467;

-    f469 = t2[63] + f467;

-    f470 = t2[47] + t2[15];

-    f471 = MUL_C(COEF_CONST(1.3065629648763766), t2[47]);

-    f472 = MUL_F(FRAC_CONST(-0.9238795325112866), f470);

-    f473 = MUL_F(FRAC_CONST(-0.5411961001461967), t2[15]);

-    f474 = f471 + f472;

-    f475 = f473 - f472;

-    f476 = f469 - f475;

-    f477 = f469 + f475;

-    f478 = f468 - f474;

-    f479 = f468 + f474;

-    f480 = t2[55] + t2[7];

-    f481 = MUL_C(COEF_CONST(1.1758756024193588), t2[55]);

-    f482 = MUL_F(FRAC_CONST(-0.9807852804032304), f480);

-    f483 = MUL_F(FRAC_CONST(-0.7856949583871021), t2[7]);

-    f484 = f481 + f482;

-    f485 = f483 - f482;

-    f486 = t2[39] + t2[23];

-    f487 = MUL_C(COEF_CONST(1.3870398453221473), t2[39]);

-    f488 = MUL_F(FRAC_CONST(-0.8314696123025455), f486);

-    f489 = MUL_F(FRAC_CONST(-0.2758993792829436), t2[23]);

-    f490 = f487 + f488;

-    f491 = f489 - f488;

-    f492 = f485 - f491;

-    f493 = f485 + f491;

-    f494 = MUL_F(FRAC_CONST(0.7071067811865476), f492);

-    f495 = f484 - f490;

-    f496 = f484 + f490;

-    f497 = MUL_F(FRAC_CONST(0.7071067811865476), f496);

-    f498 = f494 - f497;

-    f499 = f494 + f497;

-    f500 = f477 - f493;

-    f501 = f477 + f493;

-    f502 = f479 - f499;

-    f503 = f479 + f499;

-    f504 = f478 - f498;

-    f505 = f478 + f498;

-    f506 = f476 - f495;

-    f507 = f476 + f495;

-    f508 = t2[59] + t2[3];

-    f509 = MUL_C(COEF_CONST(1.0932018670017569), t2[59]);

-    f510 = MUL_F(FRAC_CONST(-0.9951847266721969), f508);

-    f511 = MUL_F(FRAC_CONST(-0.8971675863426368), t2[3]);

-    f512 = f509 + f510;

-    f513 = f511 - f510;

-    f514 = t2[51] + t2[11];

-    f515 = MUL_C(COEF_CONST(1.2472250129866711), t2[51]);

-    f516 = MUL_F(FRAC_CONST(-0.9569403357322089), f514);

-    f517 = MUL_F(FRAC_CONST(-0.6666556584777469), t2[11]);

-    f518 = f515 + f516;

-    f519 = f517 - f516;

-    f520 = t2[43] + t2[19];

-    f521 = MUL_C(COEF_CONST(1.3533180011743526), t2[43]);

-    f522 = MUL_F(FRAC_CONST(-0.8819212643483551), f520);

-    f523 = MUL_F(FRAC_CONST(-0.4105245275223575), t2[19]);

-    f524 = f521 + f522;

-    f525 = f523 - f522;

-    f526 = t2[35] + t2[27];

-    f527 = MUL_C(COEF_CONST(1.4074037375263826), t2[35]);

-    f528 = MUL_F(FRAC_CONST(-0.7730104533627369), f526);

-    f529 = MUL_F(FRAC_CONST(-0.1386171691990913), t2[27]);

-    f530 = f527 + f528;

-    f531 = f529 - f528;

-    f532 = f513 - f531;

-    f533 = f513 + f531;

-    f534 = f519 - f525;

-    f535 = f519 + f525;

-    f536 = f533 - f535;

-    f537 = f533 + f535;

-    f538 = MUL_F(FRAC_CONST(0.7071067811865476), f536);

-    f539 = f532 + f534;

-    f540 = MUL_C(COEF_CONST(1.3065629648763766), f532);

-    f541 = MUL_F(FRAC_CONST(-0.9238795325112866), f539);

-    f542 = MUL_F(FRAC_CONST(-0.5411961001461967), f534);

-    f543 = f540 + f541;

-    f544 = f542 - f541;

-    f545 = f512 - f530;

-    f546 = f512 + f530;

-    f547 = f518 - f524;

-    f548 = f518 + f524;

-    f549 = f546 + f548;

-    f550 = MUL_C(COEF_CONST(1.3065629648763770), f546);

-    f551 = MUL_F(FRAC_CONST(-0.3826834323650904), f549);

-    f552 = MUL_F(FRAC_CONST(0.5411961001461961), f548);

-    f553 = f550 + f551;

-    f554 = f552 - f551;

-    f555 = f545 - f547;

-    f556 = f545 + f547;

-    f557 = MUL_F(FRAC_CONST(0.7071067811865476), f556);

-    f558 = f544 - f554;

-    f559 = f544 + f554;

-    f560 = f538 - f557;

-    f561 = f538 + f557;

-    f562 = f543 - f553;

-    f563 = f543 + f553;

-    f564 = f501 - f537;

-    f565 = f501 + f537;

-    f566 = f503 - f559;

-    f567 = f503 + f559;

-    f568 = f505 - f558;

-    f569 = f505 + f558;

-    f570 = f507 - f561;

-    f571 = f507 + f561;

-    f572 = f506 - f560;

-    f573 = f506 + f560;

-    f574 = f504 - f563;

-    f575 = f504 + f563;

-    f576 = f502 - f562;

-    f577 = f502 + f562;

-    f578 = f500 - f555;

-    f579 = f500 + f555;

-    f580 = f438 - f565;

-    f581 = f438 + f565;

-    f582 = f446 + f567;

-    f583 = f446 - f567;

-    f584 = f450 - f569;

-    f585 = f450 + f569;

-    f586 = f442 + f571;

-    f587 = f442 - f571;

-    f588 = f457 - f573;

-    f589 = f457 + f573;

-    f590 = f465 + f575;

-    f591 = f465 - f575;

-    f592 = f461 - f577;

-    f593 = f461 + f577;

-    f594 = f453 + f579;

-    f595 = f453 - f579;

-    f596 = f454 - f578;

-    f597 = f454 + f578;

-    f598 = f462 + f576;

-    f599 = f462 - f576;

-    f600 = f466 - f574;

-    f601 = f466 + f574;

-    f602 = f458 + f572;

-    f603 = f458 - f572;

-    f604 = f440 - f570;

-    f605 = f440 + f570;

-    f606 = f448 + f568;

-    f607 = f448 - f568;

-    f608 = f444 - f566;

-    f609 = f444 + f566;

-    f610 = f564 - f436;

-    f611 = f436 + f564;

-    f612 = f581 + f276;

-    f613 = MUL_F(FRAC_CONST(-0.9876531635534246), f581);

-    f614 = MUL_F(FRAC_CONST(0.9999247018391445), f612);

-    f615 = MUL_C(COEF_CONST(1.0121962401248645), f276);

-    y[0] = f613 + f614;

-    y[63] = f615 - f614;

-    f618 = f583 + f278;

-    f619 = MUL_F(FRAC_CONST(-0.9625151616469906), f583);

-    f620 = MUL_F(FRAC_CONST(0.9993223845883495), f618);

-    f621 = MUL_C(COEF_CONST(1.0361296075297086), f278);

-    y[1] = f619 + f620;

-    y[62] = f621 - f620;

-    f624 = f585 + f280;

-    f625 = MUL_F(FRAC_CONST(-0.9367973765979405), f585);

-    f626 = MUL_F(FRAC_CONST(0.9981181129001492), f624);

-    f627 = MUL_C(COEF_CONST(1.0594388492023579), f280);

-    y[2] = f625 + f626;

-    y[61] = f627 - f626;

-    f630 = f587 + f282;

-    f631 = MUL_F(FRAC_CONST(-0.9105152998383381), f587);

-    f632 = MUL_F(FRAC_CONST(0.9963126121827780), f630);

-    f633 = MUL_C(COEF_CONST(1.0821099245272179), f282);

-    y[3] = f631 + f632;

-    y[60] = f633 - f632;

-    f636 = f589 + f284;

-    f637 = MUL_F(FRAC_CONST(-0.8836847627084729), f589);

-    f638 = MUL_F(FRAC_CONST(0.9939069700023561), f636);

-    f639 = MUL_C(COEF_CONST(1.1041291772962392), f284);

-    y[4] = f637 + f638;

-    y[59] = f639 - f638;

-    f642 = f591 + f286;

-    f643 = MUL_F(FRAC_CONST(-0.8563219269206538), f591);

-    f644 = MUL_F(FRAC_CONST(0.9909026354277800), f642);

-    f645 = MUL_C(COEF_CONST(1.1254833439349063), f286);

-    y[5] = f643 + f644;

-    y[58] = f645 - f644;

-    f648 = f593 + f288;

-    f649 = MUL_F(FRAC_CONST(-0.8284432748239970), f593);

-    f650 = MUL_F(FRAC_CONST(0.9873014181578584), f648);

-    f651 = MUL_C(COEF_CONST(1.1461595614917197), f288);

-    y[6] = f649 + f650;

-    y[57] = f651 - f650;

-    f654 = f595 + f290;

-    f655 = MUL_F(FRAC_CONST(-0.8000655994760753), f595);

-    f656 = MUL_F(FRAC_CONST(0.9831054874312163), f654);

-    f657 = MUL_C(COEF_CONST(1.1661453753863573), f290);

-    y[7] = f655 + f656;

-    y[56] = f657 - f656;

-    f660 = f597 + f292;

-    f661 = MUL_F(FRAC_CONST(-0.7712059945274091), f597);

-    f662 = MUL_F(FRAC_CONST(0.9783173707196277), f660);

-    f663 = MUL_C(COEF_CONST(1.1854287469118463), f292);

-    y[8] = f661 + f662;

-    y[55] = f663 - f662;

-    f666 = f599 + f294;

-    f667 = MUL_F(FRAC_CONST(-0.7418818439248888), f599);

-    f668 = MUL_F(FRAC_CONST(0.9729399522055601), f666);

-    f669 = MUL_C(COEF_CONST(1.2039980604862313), f294);

-    y[9] = f667 + f668;

-    y[54] = f669 - f668;

-    f672 = f601 + f296;

-    f673 = MUL_F(FRAC_CONST(-0.7121108114403374), f601);

-    f674 = MUL_F(FRAC_CONST(0.9669764710448521), f672);

-    f675 = MUL_C(COEF_CONST(1.2218421306493668), f296);

-    y[10] = f673 + f674;

-    y[53] = f675 - f674;

-    f678 = f603 + f298;

-    f679 = MUL_F(FRAC_CONST(-0.6819108300305128), f603);

-    f680 = MUL_F(FRAC_CONST(0.9604305194155658), f678);

-    f681 = MUL_C(COEF_CONST(1.2389502088006188), f298);

-    y[11] = f679 + f680;

-    y[52] = f681 - f680;

-    f684 = f605 + f300;

-    f685 = MUL_F(FRAC_CONST(-0.6513000910349656), f605);

-    f686 = MUL_F(FRAC_CONST(0.9533060403541938), f684);

-    f687 = MUL_C(COEF_CONST(1.2553119896734219), f300);

-    y[12] = f685 + f686;

-    y[51] = f687 - f686;

-    f690 = f607 + f302;

-    f691 = MUL_F(FRAC_CONST(-0.6202970332182582), f607);

-    f692 = MUL_F(FRAC_CONST(0.9456073253805213), f690);

-    f693 = MUL_C(COEF_CONST(1.2709176175427843), f302);

-    y[13] = f691 + f692;

-    y[50] = f693 - f692;

-    f696 = f609 + f304;

-    f697 = MUL_F(FRAC_CONST(-0.5889203316631404), f609);

-    f698 = MUL_F(FRAC_CONST(0.9373390119125750), f696);

-    f699 = MUL_C(COEF_CONST(1.2857576921620095), f304);

-    y[14] = f697 + f698;

-    y[49] = f699 - f698;

-    f702 = f306 - f611;

-    f703 = MUL_F(FRAC_CONST(-0.5571888865213779), f611);

-    f704 = MUL_F(FRAC_CONST(0.9285060804732155), f702);

-    f705 = MUL_C(COEF_CONST(1.2998232744250531), f306);

-    y[15] = f704 - f703;

-    y[48] = f705 - f704;

-    f708 = f610 + f305;

-    f709 = MUL_F(FRAC_CONST(-0.5251218116290097), f610);

-    f710 = MUL_F(FRAC_CONST(0.9191138516900578), f708);

-    f711 = MUL_C(COEF_CONST(1.3131058917511058), f305);

-    y[16] = f709 + f710;

-    y[47] = f711 - f710;

-    f714 = f608 + f303;

-    f715 = MUL_F(FRAC_CONST(-0.4927384229928850), f608);

-    f716 = MUL_F(FRAC_CONST(0.9091679830905223), f714);

-    f717 = MUL_C(COEF_CONST(1.3255975431881595), f303);

-    y[17] = f715 + f716;

-    y[46] = f717 - f716;

-    f720 = f606 + f301;

-    f721 = MUL_F(FRAC_CONST(-0.4600582271554261), f606);

-    f722 = MUL_F(FRAC_CONST(0.8986744656939538), f720);

-    f723 = MUL_C(COEF_CONST(1.3372907042324815), f301);

-    y[18] = f721 + f722;

-    y[45] = f723 - f722;

-    f726 = f604 + f299;

-    f727 = MUL_F(FRAC_CONST(-0.4271009094446139), f604);

-    f728 = MUL_F(FRAC_CONST(0.8876396204028539), f726);

-    f729 = MUL_C(COEF_CONST(1.3481783313610940), f299);

-    y[19] = f727 + f728;

-    y[44] = f729 - f728;

-    f732 = f602 + f297;

-    f733 = MUL_F(FRAC_CONST(-0.3938863221162838), f602);

-    f734 = MUL_F(FRAC_CONST(0.8760700941954066), f732);

-    f735 = MUL_C(COEF_CONST(1.3582538662745294), f297);

-    y[20] = f733 + f734;

-    y[43] = f735 - f734;

-    f738 = f600 + f295;

-    f739 = MUL_F(FRAC_CONST(-0.3604344723958691), f600);

-    f740 = MUL_F(FRAC_CONST(0.8639728561215867), f738);

-    f741 = MUL_C(COEF_CONST(1.3675112398473042), f295);

-    y[21] = f739 + f740;

-    y[42] = f741 - f740;

-    f744 = f598 + f293;

-    f745 = MUL_F(FRAC_CONST(-0.3267655104267964), f598);

-    f746 = MUL_F(FRAC_CONST(0.8513551931052652), f744);

-    f747 = MUL_C(COEF_CONST(1.3759448757837340), f293);

-    y[22] = f745 + f746;

-    y[41] = f747 - f746;

-    f750 = f596 + f291;

-    f751 = MUL_F(FRAC_CONST(-0.2928997171327915), f596);

-    f752 = MUL_F(FRAC_CONST(0.8382247055548380), f750);

-    f753 = MUL_C(COEF_CONST(1.3835496939768843), f291);

-    y[23] = f751 + f752;

-    y[40] = f753 - f752;

-    f756 = f594 + f289;

-    f757 = MUL_F(FRAC_CONST(-0.2588574920014121), f594);

-    f758 = MUL_F(FRAC_CONST(0.8245893027850253), f756);

-    f759 = MUL_C(COEF_CONST(1.3903211135686386), f289);

-    y[24] = f757 + f758;

-    y[39] = f759 - f758;

-    f762 = f592 + f287;

-    f763 = MUL_F(FRAC_CONST(-0.2246593407961559), f592);

-    f764 = MUL_F(FRAC_CONST(0.8104571982525948), f762);

-    f765 = MUL_C(COEF_CONST(1.3962550557090336), f287);

-    y[25] = f763 + f764;

-    y[38] = f765 - f764;

-    f768 = f590 + f285;

-    f769 = MUL_F(FRAC_CONST(-0.1903258632045579), f590);

-    f770 = MUL_F(FRAC_CONST(0.7958369046088835), f768);

-    f771 = MUL_C(COEF_CONST(1.4013479460132090), f285);

-    y[26] = f769 + f770;

-    y[37] = f771 - f770;

-    f774 = f588 + f283;

-    f775 = MUL_F(FRAC_CONST(-0.1558777404297079), f588);

-    f776 = MUL_F(FRAC_CONST(0.7807372285720944), f774);

-    f777 = MUL_C(COEF_CONST(1.4055967167144807), f283);

-    y[27] = f775 + f776;

-    y[36] = f777 - f776;

-    f780 = f586 + f281;

-    f781 = MUL_F(FRAC_CONST(-0.1213357227326675), f586);

-    f782 = MUL_F(FRAC_CONST(0.7651672656224590), f780);

-    f783 = MUL_C(COEF_CONST(1.4089988085122505), f281);

-    y[28] = f781 + f782;

-    y[35] = f783 - f782;

-    f786 = f584 + f279;

-    f787 = MUL_F(FRAC_CONST(-0.0867206169332875), f584);

-    f788 = MUL_F(FRAC_CONST(0.7491363945234593), f786);

-    f789 = MUL_C(COEF_CONST(1.4115521721136310), f279);

-    y[29] = f787 + f788;

-    y[34] = f789 - f788;

-    f792 = f582 + f277;

-    f793 = MUL_F(FRAC_CONST(-0.0520532738769597), f582);

-    f794 = MUL_F(FRAC_CONST(0.7326542716724128), f792);

-    f795 = MUL_C(COEF_CONST(1.4132552694678659), f277);

-    y[30] = f793 + f794;

-    y[33] = f795 - f794;

-    f798 = f580 + f275;

-    f799 = MUL_F(FRAC_CONST(-0.0173545758748457), f580);

-    f800 = MUL_F(FRAC_CONST(0.7157308252838186), f798);

-    f801 = MUL_C(COEF_CONST(1.4141070746927915), f275);

-    y[31] = f799 + f800;

-    y[32] = f801 - f800;

+        // out[i1] = point1 + point2

+        Real[i] += point2_real;

+        Imag[i] += point2_imag;

+        // out[i2] = point1 - point2

+        Real[i2] = point1_real - point2_real;

+        Imag[i2] = point1_imag - point2_imag;

+    }

+    w_real = w_array_real[4]; // = sqrt(2)/2

+    // w_imag = -w_real; // = w_array_imag[4]; // = -sqrt(2)/2

+    for (i = 1; i < n; i += 8)

+    {

+        i2 = i+4;

+        point1_real = Real[i];

+        point1_imag = Imag[i];

+        point2_real = Real[i2];

+        point2_imag = Imag[i2];

+        // temp1 = x[i] - x[i2]

+        point1_real -= point2_real;

+        point1_imag -= point2_imag;

+        // x[i1] = x[i] + x[i2]

+        Real[i] += point2_real;

+        Imag[i] += point2_imag;

+        // x[i2] = (x[i] - x[i2]) * w

+        Real[i2] = MUL_F(point1_real+point1_imag, w_real);

+        Imag[i2] = MUL_F(point1_imag-point1_real, w_real);

+    }

+    for (i = 2; i < n; i += 8)

+    {

+        i2 = i+4;

+        point1_real = Real[i];

+        point1_imag = Imag[i];

+        point2_real = Real[i2];

+        point2_imag = Imag[i2];

+        // x[i] = x[i] + x[i2]

+        Real[i] += point2_real;

+        Imag[i] += point2_imag;

+        // x[i2] = (x[i] - x[i2]) * (-i)

+        Real[i2] = point1_imag - point2_imag;

+        Imag[i2] = point2_real - point1_real;

+    }

+    w_real = w_array_real[12]; // = -sqrt(2)/2

+    // w_imag = w_real; // = w_array_imag[12]; // = -sqrt(2)/2

+    for (i = 3; i < n; i += 8)

+    {

+        i2 = i+4;

+        point1_real = Real[i];

+        point1_imag = Imag[i];

+        point2_real = Real[i2];

+        point2_imag = Imag[i2];

+        // temp1 = x[i] - x[i2]

+        point1_real -= point2_real;

+        point1_imag -= point2_imag;

+        // x[i1] = x[i] + x[i2]

+        Real[i] += point2_real;

+        Imag[i] += point2_imag;

+        // x[i2] = (x[i] - x[i2]) * w

+        Real[i2] = MUL_F(point1_real-point1_imag, w_real);

+        Imag[i2] = MUL_F(point1_real+point1_imag, w_real);

+    }

+    // Stage 4 of 32 point FFT decimation in frequency (no multiplications)

+    // 16*4=64 additions

+    for (i = 0; i < n; i += 4)

+    {

+        i2 = i+2;

+        point1_real = Real[i];

+        point1_imag = Imag[i];

+        point2_real = Real[i2];

+        point2_imag = Imag[i2];

+        // x[i1] = x[i] + x[i2]

+        Real[i] += point2_real;

+        Imag[i] += point2_imag;

+        // x[i2] = x[i] - x[i2]

+        Real[i2] = point1_real - point2_real;

+        Imag[i2] = point1_imag - point2_imag;

+    }

+    for (i = 1; i < n; i += 4)

+    {

+        i2 = i+2;

+        point1_real = Real[i];

+        point1_imag = Imag[i];

+        point2_real = Real[i2];

+        point2_imag = Imag[i2];

+        // x[i] = x[i] + x[i2]

+        Real[i] += point2_real;

+        Imag[i] += point2_imag;

+        // x[i2] = (x[i] - x[i2]) * (-i)

+        Real[i2] = point1_imag - point2_imag;

+        Imag[i2] = point2_real - point1_real;

+    }

+    // Stage 5 of 32 point FFT decimation in frequency (no multiplications)

+    // 16*4=64 additions

+    for (i = 0; i < n; i += 2)

+    {

+        i2 = i+1;

+        point1_real = Real[i];

+        point1_imag = Imag[i];

+        point2_real = Real[i2];

+        point2_imag = Imag[i2];

+        // out[i1] = point1 + point2

+        Real[i] += point2_real;

+        Imag[i] += point2_imag;

+        // out[i2] = point1 - point2

+        Real[i2] = point1_real - point2_real;

+        Imag[i2] = point1_imag - point2_imag;

+    }

+#ifdef REORDER_IN_FFT

+    FFTReorder(Real, Imag);

+#endif // #ifdef REORDER_IN_FFT

+}

+#undef n

+#undef log2n

+static const real_t dct4_64_tab[] = {

+    COEF_CONST(0.999924719333649), COEF_CONST(0.998118102550507),

+    COEF_CONST(0.993906974792480), COEF_CONST(0.987301409244537),

+    COEF_CONST(0.978317379951477), COEF_CONST(0.966976463794708),

+    COEF_CONST(0.953306019306183), COEF_CONST(0.937339007854462),

+    COEF_CONST(0.919113874435425), COEF_CONST(0.898674488067627),

+    COEF_CONST(0.876070082187653), COEF_CONST(0.851355195045471),

+    COEF_CONST(0.824589252471924), COEF_CONST(0.795836925506592),

+    COEF_CONST(0.765167236328125), COEF_CONST(0.732654273509979),

+    COEF_CONST(0.698376238346100), COEF_CONST(0.662415742874146),

+    COEF_CONST(0.624859452247620), COEF_CONST(0.585797846317291),

+    COEF_CONST(0.545324981212616), COEF_CONST(0.503538429737091),

+    COEF_CONST(0.460538715124130), COEF_CONST(0.416429549455643),

+    COEF_CONST(0.371317148208618), COEF_CONST(0.325310230255127),

+    COEF_CONST(0.278519600629807), COEF_CONST(0.231058135628700),

+    COEF_CONST(0.183039888739586), COEF_CONST(0.134580686688423),

+    COEF_CONST(0.085797272622585), COEF_CONST(0.036807164549828),

+    COEF_CONST(-1.012196302413940), COEF_CONST(-1.059438824653626),

+    COEF_CONST(-1.104129195213318), COEF_CONST(-1.146159529685974),

+    COEF_CONST(-1.185428738594055), COEF_CONST(-1.221842169761658),

+    COEF_CONST(-1.255311965942383), COEF_CONST(-1.285757660865784),

+    COEF_CONST(-1.313105940818787), COEF_CONST(-1.337290763854981),

+    COEF_CONST(-1.358253836631775), COEF_CONST(-1.375944852828980),

+    COEF_CONST(-1.390321016311646), COEF_CONST(-1.401347875595093),

+    COEF_CONST(-1.408998727798462), COEF_CONST(-1.413255214691162),

+    COEF_CONST(-1.414107084274292), COEF_CONST(-1.411552190780640),

+    COEF_CONST(-1.405596733093262), COEF_CONST(-1.396255016326904),

+    COEF_CONST(-1.383549690246582), COEF_CONST(-1.367511272430420),

+    COEF_CONST(-1.348178386688232), COEF_CONST(-1.325597524642944),

+    COEF_CONST(-1.299823284149170), COEF_CONST(-1.270917654037476),

+    COEF_CONST(-1.238950133323669), COEF_CONST(-1.203998088836670),

+    COEF_CONST(-1.166145324707031), COEF_CONST(-1.125483393669128),

+    COEF_CONST(-1.082109928131104), COEF_CONST(-1.036129593849182),

+    COEF_CONST(-0.987653195858002), COEF_CONST(-0.936797380447388),

+    COEF_CONST(-0.883684754371643), COEF_CONST(-0.828443288803101),

+    COEF_CONST(-0.771206021308899), COEF_CONST(-0.712110757827759),

+    COEF_CONST(-0.651300072669983), COEF_CONST(-0.588920354843140),

+    COEF_CONST(-0.525121808052063), COEF_CONST(-0.460058242082596),

+    COEF_CONST(-0.393886327743530), COEF_CONST(-0.326765477657318),

+    COEF_CONST(-0.258857429027557), COEF_CONST(-0.190325915813446),

+    COEF_CONST(-0.121335685253143), COEF_CONST(-0.052053272724152),

+    COEF_CONST(0.017354607582092), COEF_CONST(0.086720645427704),

+    COEF_CONST(0.155877828598022), COEF_CONST(0.224659323692322),

+    COEF_CONST(0.292899727821350), COEF_CONST(0.360434412956238),

+    COEF_CONST(0.427100926637650), COEF_CONST(0.492738455533981),

+    COEF_CONST(0.557188928127289), COEF_CONST(0.620297133922577),

+    COEF_CONST(0.681910991668701), COEF_CONST(0.741881847381592),

+    COEF_CONST(0.800065577030182), COEF_CONST(0.856321990489960),

+    COEF_CONST(0.910515367984772), COEF_CONST(0.962515234947205),

+    COEF_CONST(1.000000000000000), COEF_CONST(0.998795449733734),

+    COEF_CONST(0.995184719562531), COEF_CONST(0.989176511764526),

+    COEF_CONST(0.980785250663757), COEF_CONST(0.970031261444092),

+    COEF_CONST(0.956940352916718), COEF_CONST(0.941544055938721),

+    COEF_CONST(0.923879504203796), COEF_CONST(0.903989315032959),

+    COEF_CONST(0.881921231746674), COEF_CONST(0.857728600502014),

+    COEF_CONST(0.831469595432281), COEF_CONST(0.803207516670227),

+    COEF_CONST(0.773010432720184), COEF_CONST(0.740951120853424),

+    COEF_CONST(0.707106769084930), COEF_CONST(0.671558916568756),

+    COEF_CONST(0.634393274784088), COEF_CONST(0.595699310302734),

+    COEF_CONST(0.555570185184479), COEF_CONST(0.514102697372437),

+    COEF_CONST(0.471396654844284), COEF_CONST(0.427555114030838),

+    COEF_CONST(0.382683426141739), COEF_CONST(0.336889833211899),

+    COEF_CONST(0.290284633636475), COEF_CONST(0.242980122566223),

+    COEF_CONST(0.195090234279633), COEF_CONST(0.146730497479439),

+    COEF_CONST(0.098017133772373), COEF_CONST(0.049067649990320),

+    COEF_CONST(-1.000000000000000), COEF_CONST(-1.047863125801086),

+    COEF_CONST(-1.093201875686646), COEF_CONST(-1.135906934738159),

+    COEF_CONST(-1.175875544548035), COEF_CONST(-1.213011503219605),

+    COEF_CONST(-1.247225046157837), COEF_CONST(-1.278433918952942),

+    COEF_CONST(-1.306562900543213), COEF_CONST(-1.331544399261475),

+    COEF_CONST(-1.353317975997925), COEF_CONST(-1.371831417083740),

+    COEF_CONST(-1.387039899826050), COEF_CONST(-1.398906826972961),

+    COEF_CONST(-1.407403707504273), COEF_CONST(-1.412510156631470),

+    COEF_CONST(0), COEF_CONST(-1.412510156631470),

+    COEF_CONST(-1.407403707504273), COEF_CONST(-1.398906826972961),

+    COEF_CONST(-1.387039899826050), COEF_CONST(-1.371831417083740),

+    COEF_CONST(-1.353317975997925), COEF_CONST(-1.331544399261475),

+    COEF_CONST(-1.306562900543213), COEF_CONST(-1.278433918952942),

+    COEF_CONST(-1.247225046157837), COEF_CONST(-1.213011384010315),

+    COEF_CONST(-1.175875544548035), COEF_CONST(-1.135907053947449),

+    COEF_CONST(-1.093201875686646), COEF_CONST(-1.047863125801086),

+    COEF_CONST(-1.000000000000000), COEF_CONST(-0.949727773666382),

+    COEF_CONST(-0.897167563438416), COEF_CONST(-0.842446029186249),

+    COEF_CONST(-0.785694956779480), COEF_CONST(-0.727051079273224),

+    COEF_CONST(-0.666655659675598), COEF_CONST(-0.604654192924500),

+    COEF_CONST(-0.541196048259735), COEF_CONST(-0.476434230804443),

+    COEF_CONST(-0.410524487495422), COEF_CONST(-0.343625843524933),

+    COEF_CONST(-0.275899350643158), COEF_CONST(-0.207508206367493),

+    COEF_CONST(-0.138617098331451), COEF_CONST(-0.069392144680023),

+    COEF_CONST(0), COEF_CONST(0.069392263889313),

+    COEF_CONST(0.138617157936096), COEF_CONST(0.207508206367493),

+    COEF_CONST(0.275899469852448), COEF_CONST(0.343625962734222),

+    COEF_CONST(0.410524636507034), COEF_CONST(0.476434201002121),

+    COEF_CONST(0.541196107864380), COEF_CONST(0.604654192924500),

+    COEF_CONST(0.666655719280243), COEF_CONST(0.727051138877869),

+    COEF_CONST(0.785695075988770), COEF_CONST(0.842446029186249),

+    COEF_CONST(0.897167563438416), COEF_CONST(0.949727773666382)

+};

+/* size 64 only! */

+void dct4_kernel(real_t * in_real, real_t * in_imag, real_t * out_real, real_t * out_imag)

+{

+    // Tables with bit reverse values for 5 bits, bit reverse of i at i-th position

+    const uint8_t bit_rev_tab[32] = { 0,16,8,24,4,20,12,28,2,18,10,26,6,22,14,30,1,17,9,25,5,21,13,29,3,19,11,27,7,23,15,31 };

+    uint16_t i, i_rev;

+    /* Step 2: modulate */

+    // 3*32=96 multiplications

+    // 3*32=96 additions

+    for (i = 0; i < 32; i++)

+    {

+    	real_t x_re, x_im, tmp;

+    	x_re = in_real[i];

+    	x_im = in_imag[i];

+        tmp =        MUL_C(x_re + x_im, dct4_64_tab[i]);

+        in_real[i] = MUL_C(x_im, dct4_64_tab[i + 64]) + tmp;

+        in_imag[i] = MUL_C(x_re, dct4_64_tab[i + 32]) + tmp;

+    }

+    /* Step 3: FFT, but with output in bit reverse order */

+    fft_dif(in_real, in_imag);

+    /* Step 4: modulate + bitreverse reordering */

+    // 3*31+2=95 multiplications

+    // 3*31+2=95 additions

+    for (i = 0; i < 16; i++)

+    {

+    	real_t x_re, x_im, tmp;

+    	i_rev = bit_rev_tab[i];

+    	x_re = in_real[i_rev];

+    	x_im = in_imag[i_rev];

+        tmp =         MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]);

+        out_real[i] = MUL_C(x_im, dct4_64_tab[i + 5*32]) + tmp;

+        out_imag[i] = MUL_C(x_re, dct4_64_tab[i + 4*32]) + tmp;

+    }

+    // i = 16, i_rev = 1 = rev(16);

+    out_imag[16] = MUL_C(in_imag[1] - in_real[1], dct4_64_tab[16 + 3*32]);

+    out_real[16] = MUL_C(in_real[1] + in_imag[1], dct4_64_tab[16 + 3*32]);

+    for (i = 17; i < 32; i++)

+    {

+    	real_t x_re, x_im, tmp;

+    	i_rev = bit_rev_tab[i];

+    	x_re = in_real[i_rev];

+    	x_im = in_imag[i_rev];

+        tmp =         MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]);

+        out_real[i] = MUL_C(x_im, dct4_64_tab[i + 5*32]) + tmp;

+        out_imag[i] = MUL_C(x_re, dct4_64_tab[i + 4*32]) + tmp;

+    }

 void DST4_32(real_t *y, real_t *x)

--- a/libfaad/sbr_dct.h

+++ b/libfaad/sbr_dct.h

@@ -22,7 +22,7 @@

 ** Commercial non-GPL licensing of this software is possible.

 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.

**

-** $Id: sbr_dct.h,v 1.12 2004/03/10 19:45:41 menno Exp $

+** $Id: sbr_dct.h,v 1.15 2004/09/04 14:56:28 menno Exp $

**/

 #ifndef __SBR_DCT_H__

@@ -32,9 +32,9 @@

 extern "C" {

 #endif

+void dct4_kernel(real_t * in_real, real_t * in_imag, real_t * out_real, real_t * out_imag);

 void DCT3_32_unscaled(real_t *y, real_t *x);

-void DCT4_64(real_t *y, real_t *x);

-void DCT4_64_kernel(real_t *y, real_t *t2);

 void DCT4_32(real_t *y, real_t *x);

 void DST4_32(real_t *y, real_t *x);

 void DCT2_32_unscaled(real_t *y, real_t *x);

--- a/libfaad/sbr_dec.c

+++ b/libfaad/sbr_dec.c

@@ -22,7 +22,7 @@

 ** Commercial non-GPL licensing of this software is possible.

 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.

**

-** $Id: sbr_dec.c,v 1.36 2004/05/17 10:18:03 menno Exp $

+** $Id: sbr_dec.c,v 1.37 2004/06/30 12:45:56 menno Exp $

**/

@@ -248,6 +248,19 @@

 #endif

+//#define PRE_QMF_PRINT

+#ifdef PRE_QMF_PRINT

+    {

+        int i;

+        for (i = 0; i < 1024; i++)

+        {

+            printf("%d\n", channel_buf[i]);

+        }

+    }

+#endif

     /* subband analysis */

     if (dont_process)

         sbr_qmf_analysis_32(sbr, sbr->qmfa[ch], channel_buf, sbr->Xsbr[ch], sbr->tHFGen, 32);

@@ -422,6 +435,21 @@

     sbr->frame++;

+//#define POST_QMF_PRINT

+#ifdef POST_QMF_PRINT

+    {

+        int i;

+        for (i = 0; i < 2048; i++)

+        {

+            printf("%d\n", left_chan[i]);

+        }

+        for (i = 0; i < 2048; i++)

+        {

+            printf("%d\n", right_chan[i]);

+        }

+    }

+#endif

     return 0;

@@ -477,6 +505,17 @@

     sbr_save_matrix(sbr, 0);

     sbr->frame++;

+//#define POST_QMF_PRINT

+#ifdef POST_QMF_PRINT

+    {

+        int i;

+        for (i = 0; i < 2048; i++)

+        {

+            printf("%d\n", channel[i]);

+        }

+    }

+#endif

     return 0;

--- a/libfaad/sbr_dec.h

+++ b/libfaad/sbr_dec.h

@@ -22,7 +22,7 @@

 ** Commercial non-GPL licensing of this software is possible.

 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.

**

-** $Id: sbr_dec.h,v 1.32 2004/05/17 10:18:03 menno Exp $

+** $Id: sbr_dec.h,v 1.35 2004/09/04 14:56:28 menno Exp $

**/

 #ifndef __SBR_DEC_H__

@@ -58,7 +58,6 @@

     real_t *v;

     int16_t v_index;

     uint8_t channels;

-    complex_t *pre_twiddle;

 } qmfs_info;

 typedef struct

--- a/libfaad/sbr_hfadj.c

+++ b/libfaad/sbr_hfadj.c

@@ -22,7 +22,7 @@

 ** Commercial non-GPL licensing of this software is possible.

 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.

**

-** $Id: sbr_hfadj.c,v 1.15 2004/05/17 10:18:03 menno Exp $

+** $Id: sbr_hfadj.c,v 1.18 2004/09/04 14:56:28 menno Exp $

**/

 /* High Frequency adjustment */

@@ -1559,15 +1559,17 @@

 #ifndef SBR_LOW_POWER

                 if (h_SL != 0)

+                	uint8_t ri = sbr->GQ_ringbuf_index[ch];

                     for (n = 0; n <= 4; n++)

-                        uint8_t ri = sbr->GQ_ringbuf_index[ch] + 1 + n;

+                        real_t curr_h_smooth = h_smooth[n];

+                        ri++;

                         if (ri >= 5)

                             ri -= 5;

-                        G_filt += MUL_F(sbr->G_temp_prev[ch][ri][m], h_smooth[n]);

-                        Q_filt += MUL_F(sbr->Q_temp_prev[ch][ri][m], h_smooth[n]);

+                        G_filt += MUL_F(sbr->G_temp_prev[ch][ri][m], curr_h_smooth);

+                        Q_filt += MUL_F(sbr->Q_temp_prev[ch][ri][m], curr_h_smooth);

-                } else {

+               } else {

 #endif

                     G_filt = sbr->G_temp_prev[ch][sbr->GQ_ringbuf_index[ch]][m];

                     Q_filt = sbr->Q_temp_prev[ch][sbr->GQ_ringbuf_index[ch]][m];

--- a/libfaad/sbr_hfgen.c

+++ b/libfaad/sbr_hfgen.c

@@ -22,7 +22,7 @@

 ** Commercial non-GPL licensing of this software is possible.

 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.

**

-** $Id: sbr_hfgen.c,v 1.18 2004/05/17 10:18:03 menno Exp $

+** $Id: sbr_hfgen.c,v 1.21 2004/09/04 14:56:28 menno Exp $

**/

 /* High Frequency generation */

@@ -67,7 +67,6 @@

     uint8_t first = sbr->t_E[ch][0];

     uint8_t last = sbr->t_E[ch][sbr->L_E[ch]];

     calc_chirp_factors(sbr, ch);

 #ifdef SBR_LOW_POWER

@@ -88,7 +87,7 @@

         for (x = 0; x < sbr->patchNoSubbands[i]; x++)

-            complex_t a0, a1;

+            real_t a0_r, a0_i, a1_r, a1_i;

             real_t bw, bw2;

             uint8_t q, p, k, g;

@@ -116,39 +115,54 @@

             /* with or without filtering */

             if (bw2 > 0)

+                real_t temp1_r, temp2_r, temp3_r;

 #ifndef SBR_LOW_POWER

+                real_t temp1_i, temp2_i, temp3_i;

                 calc_prediction_coef(sbr, Xlow, alpha_0, alpha_1, p);

 #endif

-                RE(a0) = MUL_C(RE(alpha_0[p]), bw);

-                RE(a1) = MUL_C(RE(alpha_1[p]), bw2);

+                a0_r = MUL_C(RE(alpha_0[p]), bw);

+                a1_r = MUL_C(RE(alpha_1[p]), bw2);

 #ifndef SBR_LOW_POWER

-                IM(a0) = MUL_C(IM(alpha_0[p]), bw);

-                IM(a1) = MUL_C(IM(alpha_1[p]), bw2);

+                a0_i = MUL_C(IM(alpha_0[p]), bw);

+                a1_i = MUL_C(IM(alpha_1[p]), bw2);

 #endif

+            	temp2_r = QMF_RE(Xlow[first - 2 + offset][p]);

+            	temp3_r = QMF_RE(Xlow[first - 1 + offset][p]);

+#ifndef SBR_LOW_POWER

+            	temp2_i = QMF_IM(Xlow[first - 2 + offset][p]);

+            	temp3_i = QMF_IM(Xlow[first - 1 + offset][p]);

+#endif

 				for (l = first; l < last; l++)

-                    QMF_RE(Xhigh[l + offset][k]) = QMF_RE(Xlow[l + offset][p]);

+                	temp1_r = temp2_r;

+                	temp2_r = temp3_r;

+                	temp3_r = QMF_RE(Xlow[l + offset][p]);

 #ifndef SBR_LOW_POWER

-                    QMF_IM(Xhigh[l + offset][k]) = QMF_IM(Xlow[l + offset][p]);

+                	temp1_i = temp2_i;

+                	temp2_i = temp3_i;

+                    temp3_i = QMF_IM(Xlow[l + offset][p]);

 #endif

 #ifdef SBR_LOW_POWER

-                    QMF_RE(Xhigh[l + offset][k]) += (

-                        MUL_R(RE(a0), QMF_RE(Xlow[l - 1 + offset][p])) +

-                        MUL_R(RE(a1), QMF_RE(Xlow[l - 2 + offset][p])));

+                    QMF_RE(Xhigh[l + offset][k]) =

+                        temp3_r

+                      +(MUL_R(a0_r, temp2_r) +

+                        MUL_R(a1_r, temp1_r));

 #else

-                    QMF_RE(Xhigh[l + offset][k]) += (

-                        MUL_R(RE(a0), QMF_RE(Xlow[l - 1 + offset][p])) -

-                        MUL_R(IM(a0), QMF_IM(Xlow[l - 1 + offset][p])) +

-                        MUL_R(RE(a1), QMF_RE(Xlow[l - 2 + offset][p])) -

-                        MUL_R(IM(a1), QMF_IM(Xlow[l - 2 + offset][p])));

-                    QMF_IM(Xhigh[l + offset][k]) += (

-                        MUL_R(IM(a0), QMF_RE(Xlow[l - 1 + offset][p])) +

-                        MUL_R(RE(a0), QMF_IM(Xlow[l - 1 + offset][p])) +

-                        MUL_R(IM(a1), QMF_RE(Xlow[l - 2 + offset][p])) +

-                        MUL_R(RE(a1), QMF_IM(Xlow[l - 2 + offset][p])));

+                    QMF_RE(Xhigh[l + offset][k]) =

+                        temp3_r

+                      +(MUL_R(a0_r, temp2_r) -

+                        MUL_R(a0_i, temp2_i) +

+                        MUL_R(a1_r, temp1_r) -

+                        MUL_R(a1_i, temp1_i));

+                    QMF_IM(Xhigh[l + offset][k]) =

+                        temp3_i

+                      +(MUL_R(a0_i, temp2_r) +

+                        MUL_R(a0_r, temp2_i) +

+                        MUL_R(a1_i, temp1_r) +

+                        MUL_R(a1_r, temp1_i));

 #endif

             } else {

@@ -254,9 +268,11 @@

                              uint8_t bd, uint8_t len)

     real_t r01r = 0, r01i = 0, r02r = 0, r02i = 0, r11r = 0;

+    real_t temp1_r, temp1_i, temp2_r, temp2_i, temp3_r, temp3_i, temp4_r, temp4_i, temp5_r, temp5_i;

 #ifdef FIXED_POINT

     const real_t rel = FRAC_CONST(0.999999); // 1 / (1 + 1e-6f);

     uint32_t mask, exp;

+    real_t pow2_to_exp;

 #else

     const real_t rel = 1 / (1 + 1e-6f);

 #endif

@@ -280,60 +296,105 @@

     /* improves accuracy */

     if (exp > 0)

         exp -= 1;

+    pow2_to_exp = 1<<(exp-1);

+    temp2_r = (QMF_RE(buffer[offset-2][bd]) + pow2_to_exp) >> exp;

+    temp2_i = (QMF_IM(buffer[offset-2][bd]) + pow2_to_exp) >> exp;

+    temp3_r = (QMF_RE(buffer[offset-1][bd]) + pow2_to_exp) >> exp;

+    temp3_i = (QMF_IM(buffer[offset-1][bd]) + pow2_to_exp) >> exp;

+    // Save these because they are needed after loop

+    temp4_r = temp2_r;

+    temp4_i = temp2_i;

+    temp5_r = temp3_r;

+    temp5_i = temp3_i;

     for (j = offset; j < len + offset; j++)

-        real_t rbuf_j = ((QMF_RE(buffer[j][bd])+(1<<(exp-1)))>>exp);

-        real_t ibuf_j = ((QMF_IM(buffer[j][bd])+(1<<(exp-1)))>>exp);

-        real_t rbuf_j_1 = ((QMF_RE(buffer[j-1][bd])+(1<<(exp-1)))>>exp);

-        real_t ibuf_j_1 = ((QMF_IM(buffer[j-1][bd])+(1<<(exp-1)))>>exp);

-        real_t rbuf_j_2 = ((QMF_RE(buffer[j-2][bd])+(1<<(exp-1)))>>exp);

-        real_t ibuf_j_2 = ((QMF_IM(buffer[j-2][bd])+(1<<(exp-1)))>>exp);

-        r01r += MUL_R(rbuf_j, rbuf_j_1) + MUL_R(ibuf_j, ibuf_j_1);

-        r01i += MUL_R(ibuf_j, rbuf_j_1) - MUL_R(rbuf_j, ibuf_j_1);

-        r02r += MUL_R(rbuf_j, rbuf_j_2) + MUL_R(ibuf_j, ibuf_j_2);

-        r02i += MUL_R(ibuf_j, rbuf_j_2) - MUL_R(rbuf_j, ibuf_j_2);

-        r11r += MUL_R(rbuf_j_1, rbuf_j_1) + MUL_R(ibuf_j_1, ibuf_j_1);

+    	temp1_r = temp2_r; // temp1_r = (QMF_RE(buffer[offset-2][bd] + (1<<(exp-1))) >> exp;

+    	temp1_i = temp2_i; // temp1_i = (QMF_IM(buffer[offset-2][bd] + (1<<(exp-1))) >> exp;

+    	temp2_r = temp3_r; // temp2_r = (QMF_RE(buffer[offset-1][bd] + (1<<(exp-1))) >> exp;

+    	temp2_i = temp3_i; // temp2_i = (QMF_IM(buffer[offset-1][bd] + (1<<(exp-1))) >> exp;

+        temp3_r = (QMF_RE(buffer[j][bd]) + pow2_to_exp) >> exp;

+        temp3_i = (QMF_IM(buffer[j][bd]) + pow2_to_exp) >> exp;

+        r01r += MUL_R(temp3_r, temp2_r) + MUL_R(temp3_i, temp2_i);

+        r01i += MUL_R(temp3_i, temp2_r) - MUL_R(temp3_r, temp2_i);

+        r02r += MUL_R(temp3_r, temp1_r) + MUL_R(temp3_i, temp1_i);

+        r02i += MUL_R(temp3_i, temp1_r) - MUL_R(temp3_r, temp1_i);

+        r11r += MUL_R(temp2_r, temp2_r) + MUL_R(temp2_i, temp2_i);

+    // These are actual values in temporary variable at this point

+    // temp1_r = (QMF_RE(buffer[len+offset-1-2][bd] + (1<<(exp-1))) >> exp;

+    // temp1_i = (QMF_IM(buffer[len+offset-1-2][bd] + (1<<(exp-1))) >> exp;

+    // temp2_r = (QMF_RE(buffer[len+offset-1-1][bd] + (1<<(exp-1))) >> exp;

+    // temp2_i = (QMF_IM(buffer[len+offset-1-1][bd] + (1<<(exp-1))) >> exp;

+    // temp3_r = (QMF_RE(buffer[len+offset-1][bd]) + (1<<(exp-1))) >> exp;

+    // temp3_i = (QMF_IM(buffer[len+offset-1][bd]) + (1<<(exp-1))) >> exp;

+    // temp4_r = (QMF_RE(buffer[offset-2][bd]) + (1<<(exp-1))) >> exp;

+    // temp4_i = (QMF_IM(buffer[offset-2][bd]) + (1<<(exp-1))) >> exp;

+    // temp5_r = (QMF_RE(buffer[offset-1][bd]) + (1<<(exp-1))) >> exp;

+    // temp5_i = (QMF_IM(buffer[offset-1][bd]) + (1<<(exp-1))) >> exp;

     RE(ac->r12) = r01r -

-        (MUL_R(((QMF_RE(buffer[len+offset-1][bd])+(1<<(exp-1)))>>exp), ((QMF_RE(buffer[len+offset-2][bd])+(1<<(exp-1)))>>exp)) +

-        MUL_R(((QMF_IM(buffer[len+offset-1][bd])+(1<<(exp-1)))>>exp), ((QMF_IM(buffer[len+offset-2][bd])+(1<<(exp-1)))>>exp))) +

-        (MUL_R(((QMF_RE(buffer[offset-1][bd])+(1<<(exp-1)))>>exp), ((QMF_RE(buffer[offset-2][bd])+(1<<(exp-1)))>>exp)) +

-        MUL_R(((QMF_IM(buffer[offset-1][bd])+(1<<(exp-1)))>>exp), ((QMF_IM(buffer[offset-2][bd])+(1<<(exp-1)))>>exp)));

+        (MUL_R(temp3_r, temp2_r) + MUL_R(temp3_i, temp2_i)) +

+        (MUL_R(temp5_r, temp4_r) + MUL_R(temp5_i, temp4_i));

     IM(ac->r12) = r01i -

-        (MUL_R(((QMF_IM(buffer[len+offset-1][bd])+(1<<(exp-1)))>>exp), ((QMF_RE(buffer[len+offset-2][bd])+(1<<(exp-1)))>>exp)) -

-        MUL_R(((QMF_RE(buffer[len+offset-1][bd])+(1<<(exp-1)))>>exp), ((QMF_IM(buffer[len+offset-2][bd])+(1<<(exp-1)))>>exp))) +

-        (MUL_R(((QMF_IM(buffer[offset-1][bd])+(1<<(exp-1)))>>exp), ((QMF_RE(buffer[offset-2][bd])+(1<<(exp-1)))>>exp)) -

-        MUL_R(((QMF_RE(buffer[offset-1][bd])+(1<<(exp-1)))>>exp), ((QMF_IM(buffer[offset-2][bd])+(1<<(exp-1)))>>exp)));

+        (MUL_R(temp3_i, temp2_r) - MUL_R(temp3_r, temp2_i)) +

+        (MUL_R(temp5_i, temp4_r) - MUL_R(temp5_r, temp4_i));

     RE(ac->r22) = r11r -

-        (MUL_R(((QMF_RE(buffer[len+offset-2][bd])+(1<<(exp-1)))>>exp), ((QMF_RE(buffer[len+offset-2][bd])+(1<<(exp-1)))>>exp)) +

-        MUL_R(((QMF_IM(buffer[len+offset-2][bd])+(1<<(exp-1)))>>exp), ((QMF_IM(buffer[len+offset-2][bd])+(1<<(exp-1)))>>exp))) +

-        (MUL_R(((QMF_RE(buffer[offset-2][bd])+(1<<(exp-1)))>>exp), ((QMF_RE(buffer[offset-2][bd])+(1<<(exp-1)))>>exp)) +

-        MUL_R(((QMF_IM(buffer[offset-2][bd])+(1<<(exp-1)))>>exp), ((QMF_IM(buffer[offset-2][bd])+(1<<(exp-1)))>>exp)));

+        (MUL_R(temp2_r, temp2_r) + MUL_R(temp2_i, temp2_i)) +

+        (MUL_R(temp4_r, temp4_r) + MUL_R(temp4_i, temp4_i));

 #else

+    temp2_r = QMF_RE(buffer[offset-2][bd]);

+    temp2_i = QMF_IM(buffer[offset-2][bd]);

+    temp3_r = QMF_RE(buffer[offset-1][bd]);

+    temp3_i = QMF_IM(buffer[offset-1][bd]);

+    // Save these because they are needed after loop

+    temp4_r = temp2_r;

+    temp4_i = temp2_i;

+    temp5_r = temp3_r;

+    temp5_i = temp3_i;

     for (j = offset; j < len + offset; j++)

-        r01r += QMF_RE(buffer[j][bd]) * QMF_RE(buffer[j-1][bd]) +

-            QMF_IM(buffer[j][bd]) * QMF_IM(buffer[j-1][bd]);

-        r01i += QMF_IM(buffer[j][bd]) * QMF_RE(buffer[j-1][bd]) -

-            QMF_RE(buffer[j][bd]) * QMF_IM(buffer[j-1][bd]);

-        r02r += QMF_RE(buffer[j][bd]) * QMF_RE(buffer[j-2][bd]) +

-            QMF_IM(buffer[j][bd]) * QMF_IM(buffer[j-2][bd]);

-        r02i += QMF_IM(buffer[j][bd]) * QMF_RE(buffer[j-2][bd]) -

-            QMF_RE(buffer[j][bd]) * QMF_IM(buffer[j-2][bd]);

-        r11r += QMF_RE(buffer[j-1][bd]) * QMF_RE(buffer[j-1][bd]) +

-            QMF_IM(buffer[j-1][bd]) * QMF_IM(buffer[j-1][bd]);

+    	temp1_r = temp2_r; // temp1_r = QMF_RE(buffer[j-2][bd];

+    	temp1_i = temp2_i; // temp1_i = QMF_IM(buffer[j-2][bd];

+    	temp2_r = temp3_r; // temp2_r = QMF_RE(buffer[j-1][bd];

+    	temp2_i = temp3_i; // temp2_i = QMF_IM(buffer[j-1][bd];

+        temp3_r = QMF_RE(buffer[j][bd]);

+        temp3_i = QMF_IM(buffer[j][bd]);

+        r01r += temp3_r * temp2_r + temp3_i * temp2_i;

+        r01i += temp3_i * temp2_r - temp3_r * temp2_i;

+        r02r += temp3_r * temp1_r + temp3_i * temp1_i;

+        r02i += temp3_i * temp1_r - temp3_r * temp1_i;

+        r11r += temp2_r * temp2_r + temp2_i * temp2_i;

+    // These are actual values in temporary variable at this point

+    // temp1_r = QMF_RE(buffer[len+offset-1-2][bd];

+    // temp1_i = QMF_IM(buffer[len+offset-1-2][bd];

+    // temp2_r = QMF_RE(buffer[len+offset-1-1][bd];

+    // temp2_i = QMF_IM(buffer[len+offset-1-1][bd];

+    // temp3_r = QMF_RE(buffer[len+offset-1][bd]);

+    // temp3_i = QMF_IM(buffer[len+offset-1][bd]);

+    // temp4_r = QMF_RE(buffer[offset-2][bd]);

+    // temp4_i = QMF_IM(buffer[offset-2][bd]);

+    // temp5_r = QMF_RE(buffer[offset-1][bd]);

+    // temp5_i = QMF_IM(buffer[offset-1][bd]);

     RE(ac->r12) = r01r -

-        (QMF_RE(buffer[len+offset-1][bd]) * QMF_RE(buffer[len+offset-2][bd]) + QMF_IM(buffer[len+offset-1][bd]) * QMF_IM(buffer[len+offset-2][bd])) +

-        (QMF_RE(buffer[offset-1][bd]) * QMF_RE(buffer[offset-2][bd]) + QMF_IM(buffer[offset-1][bd]) * QMF_IM(buffer[offset-2][bd]));

+        (temp3_r * temp2_r + temp3_i * temp2_i) +

+        (temp5_r * temp4_r + temp5_i * temp4_i);

     IM(ac->r12) = r01i -

-        (QMF_IM(buffer[len+offset-1][bd]) * QMF_RE(buffer[len+offset-2][bd]) - QMF_RE(buffer[len+offset-1][bd]) * QMF_IM(buffer[len+offset-2][bd])) +

-        (QMF_IM(buffer[offset-1][bd]) * QMF_RE(buffer[offset-2][bd]) - QMF_RE(buffer[offset-1][bd]) * QMF_IM(buffer[offset-2][bd]));

+        (temp3_i * temp2_r - temp3_r * temp2_i) +

+        (temp5_i * temp4_r - temp5_r * temp4_i);

     RE(ac->r22) = r11r -

-        (QMF_RE(buffer[len+offset-2][bd]) * QMF_RE(buffer[len+offset-2][bd]) + QMF_IM(buffer[len+offset-2][bd]) * QMF_IM(buffer[len+offset-2][bd])) +

-        (QMF_RE(buffer[offset-2][bd]) * QMF_RE(buffer[offset-2][bd]) + QMF_IM(buffer[offset-2][bd]) * QMF_IM(buffer[offset-2][bd]));

+        (temp2_r * temp2_r + temp2_i * temp2_i) +

+        (temp4_r * temp4_r + temp4_i * temp4_i);

 #endif

     RE(ac->r01) = r01r;

--- a/libfaad/sbr_qmf.c

+++ b/libfaad/sbr_qmf.c

@@ -1,19 +1,19 @@

/*

 ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding

 ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com

-**

+**

 ** This program is free software; you can redistribute it and/or modify

 ** it under the terms of the GNU General Public License as published by

 ** the Free Software Foundation; either version 2 of the License, or

 ** (at your option) any later version.

-**

+**

 ** This program is distributed in the hope that it will be useful,

 ** but WITHOUT ANY WARRANTY; without even the implied warranty of

 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 ** GNU General Public License for more details.

-**

+**

 ** You should have received a copy of the GNU General Public License

-** along with this program; if not, write to the Free Software

+** along with this program; if not, write to the Free Software

 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

**

 ** Any non-GPL usage of this software or parts of this software is strictly

@@ -22,7 +22,7 @@

 ** Commercial non-GPL licensing of this software is possible.

 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.

**

-** $Id: sbr_qmf.c,v 1.24 2004/05/17 10:18:03 menno Exp $

+** $Id: sbr_qmf.c,v 1.27 2004/09/04 14:56:28 menno Exp $

**/

 #include "common.h"

@@ -38,7 +38,6 @@

 #include "sbr_qmf_c.h"

 #include "sbr_syntax.h"

 qmfa_info *qmfa_init(uint8_t channels)

     qmfa_info *qmfa = (qmfa_info*)faad_malloc(sizeof(qmfa_info));

@@ -69,7 +68,7 @@

     ALIGN real_t u[64];

 #ifndef SBR_LOW_POWER

-    ALIGN real_t x[64], y[64];

+    ALIGN real_t in_real[32], in_imag[32], out_real[32], out_imag[32];

 #else

     ALIGN real_t y[32];

 #endif

@@ -134,30 +133,51 @@

 #else

-        x[0] = u[0];

-        for (n = 0; n < 31; n++)

+        // Reordering of data moved from DCT_IV to here

+        in_imag[31] = u[1];

+        in_real[0] = u[0];

+        for (n = 1; n < 31; n++)

-            x[2*n+1] = u[n+1] + u[63-n];

-            x[2*n+2] = u[n+1] - u[63-n];

+            in_imag[31 - n] = u[n+1];

+            in_real[n] = -u[64-n];

-        x[63] = u[32];

+        in_imag[0] = u[32];

+        in_real[31] = -u[33];

-        DCT4_64_kernel(y, x);

+        // dct4_kernel is DCT_IV without reordering which is done before and after FFT

+        dct4_kernel(in_real, in_imag, out_real, out_imag);

-        for (n = 0; n < 32; n++)

-        {

-            if (n < kx)

-            {

+        // Reordering of data moved from DCT_IV to here

+        for (n = 0; n < 16; n++) {

+            if (2*n+1 < kx) {

 #ifdef FIXED_POINT

-                QMF_RE(X[l + offset][n]) = y[n] /*<< 1*/;

-                QMF_IM(X[l + offset][n]) = -y[63-n] /*<< 1*/;

+                QMF_RE(X[l + offset][2*n])   = out_real[n];

+                QMF_IM(X[l + offset][2*n])   = out_imag[n];

+                QMF_RE(X[l + offset][2*n+1]) = -out_imag[31-n];

+                QMF_IM(X[l + offset][2*n+1]) = -out_real[31-n];

 #else

-                QMF_RE(X[l + offset][n]) = 2. * y[n];

-                QMF_IM(X[l + offset][n]) = -2. * y[63-n];

+                QMF_RE(X[l + offset][2*n])   = 2. * out_real[n];

+                QMF_IM(X[l + offset][2*n])   = 2. * out_imag[n];

+                QMF_RE(X[l + offset][2*n+1]) = -2. * out_imag[31-n];

+                QMF_IM(X[l + offset][2*n+1]) = -2. * out_real[31-n];

 #endif

             } else {

-                QMF_RE(X[l + offset][n]) = 0;

-                QMF_IM(X[l + offset][n]) = 0;

+                if (2*n < kx) {

+#ifdef FIXED_POINT

+                    QMF_RE(X[l + offset][2*n])   = out_real[n];

+                    QMF_IM(X[l + offset][2*n])   = out_imag[n];

+#else

+                    QMF_RE(X[l + offset][2*n])   = 2. * out_real[n];

+                    QMF_IM(X[l + offset][2*n])   = 2. * out_imag[n];

+#endif

+                }

+                else {

+                    QMF_RE(X[l + offset][2*n]) = 0;

+                    QMF_IM(X[l + offset][2*n]) = 0;

+                }

+                QMF_RE(X[l + offset][2*n+1]) = 0;

+                QMF_IM(X[l + offset][2*n+1]) = 0;

 #endif

@@ -164,6 +184,42 @@

+static const complex_t qmf32_pre_twiddle[] =

+{

+    { FRAC_CONST(0.999924701839145), FRAC_CONST(-0.012271538285720) },

+    { FRAC_CONST(0.999322384588350), FRAC_CONST(-0.036807222941359) },

+    { FRAC_CONST(0.998118112900149), FRAC_CONST(-0.061320736302209) },

+    { FRAC_CONST(0.996312612182778), FRAC_CONST(-0.085797312344440) },

+    { FRAC_CONST(0.993906970002356), FRAC_CONST(-0.110222207293883) },

+    { FRAC_CONST(0.990902635427780), FRAC_CONST(-0.134580708507126) },

+    { FRAC_CONST(0.987301418157858), FRAC_CONST(-0.158858143333861) },

+    { FRAC_CONST(0.983105487431216), FRAC_CONST(-0.183039887955141) },

+    { FRAC_CONST(0.978317370719628), FRAC_CONST(-0.207111376192219) },

+    { FRAC_CONST(0.972939952205560), FRAC_CONST(-0.231058108280671) },

+    { FRAC_CONST(0.966976471044852), FRAC_CONST(-0.254865659604515) },

+    { FRAC_CONST(0.960430519415566), FRAC_CONST(-0.278519689385053) },

+    { FRAC_CONST(0.953306040354194), FRAC_CONST(-0.302005949319228) },

+    { FRAC_CONST(0.945607325380521), FRAC_CONST(-0.325310292162263) },

+    { FRAC_CONST(0.937339011912575), FRAC_CONST(-0.348418680249435) },

+    { FRAC_CONST(0.928506080473216), FRAC_CONST(-0.371317193951838) },

+    { FRAC_CONST(0.919113851690058), FRAC_CONST(-0.393992040061048) },

+    { FRAC_CONST(0.909167983090522), FRAC_CONST(-0.416429560097637) },

+    { FRAC_CONST(0.898674465693954), FRAC_CONST(-0.438616238538528) },

+    { FRAC_CONST(0.887639620402854), FRAC_CONST(-0.460538710958240) },

+    { FRAC_CONST(0.876070094195407), FRAC_CONST(-0.482183772079123) },

+    { FRAC_CONST(0.863972856121587), FRAC_CONST(-0.503538383725718) },

+    { FRAC_CONST(0.851355193105265), FRAC_CONST(-0.524589682678469) },

+    { FRAC_CONST(0.838224705554838), FRAC_CONST(-0.545324988422046) },

+    { FRAC_CONST(0.824589302785025), FRAC_CONST(-0.565731810783613) },

+    { FRAC_CONST(0.810457198252595), FRAC_CONST(-0.585797857456439) },

+    { FRAC_CONST(0.795836904608884), FRAC_CONST(-0.605511041404326) },

+    { FRAC_CONST(0.780737228572094), FRAC_CONST(-0.624859488142386) },

+    { FRAC_CONST(0.765167265622459), FRAC_CONST(-0.643831542889791) },

+    { FRAC_CONST(0.749136394523459), FRAC_CONST(-0.662415777590172) },

+    { FRAC_CONST(0.732654271672413), FRAC_CONST(-0.680600997795453) },

+    { FRAC_CONST(0.715730825283819), FRAC_CONST(-0.698376249408973) }

+};

 qmfs_info *qmfs_init(uint8_t channels)

     qmfs_info *qmfs = (qmfs_info*)faad_malloc(sizeof(qmfs_info));

@@ -172,22 +228,6 @@

     qmfs->v = (real_t*)faad_malloc(2 * channels * 20 * sizeof(real_t));

     memset(qmfs->v, 0, 2 * channels * 20 * sizeof(real_t));

-#ifndef SBR_LOW_POWER

-    if (channels == 32)

-    {

-        /* downsampled filterbank */

-        uint8_t k;

-        qmfs->pre_twiddle = (complex_t*)faad_malloc(channels * sizeof(complex_t));

-        /* calculate pre-twiddle factors */

-        for (k = 0; k < channels; k++)

-        {

-            RE(qmfs->pre_twiddle[k]) = cos(-M_PI*(0.5*k + 0.25)/64.);

-            IM(qmfs->pre_twiddle[k]) = sin(-M_PI*(0.5*k + 0.25)/64.);

-        }

-    }

-#endif

     qmfs->v_index = 0;

     qmfs->channels = channels;

@@ -199,10 +239,6 @@

     if (qmfs)

-        if (qmfs->channels == 32)

-        {

-            if (qmfs->pre_twiddle) faad_free(qmfs->pre_twiddle);

-        }

         if (qmfs->v) faad_free(qmfs->v);

         faad_free(qmfs);

@@ -371,8 +407,8 @@

         /* complex pre-twiddle */

         for (k = 0; k < 32; k++)

-            x1[k] = QMF_RE(X[l][k]) * RE(qmfs->pre_twiddle[k]) - QMF_IM(X[l][k]) * IM(qmfs->pre_twiddle[k]);

-            x2[k] = QMF_IM(X[l][k]) * RE(qmfs->pre_twiddle[k]) + QMF_RE(X[l][k]) * IM(qmfs->pre_twiddle[k]);

+            x1[k] = MUL_F(QMF_RE(X[l][k]), RE(qmf32_pre_twiddle[k])) - MUL_F(QMF_IM(X[l][k]), IM(qmf32_pre_twiddle[k]));

+            x2[k] = MUL_F(QMF_IM(X[l][k]), RE(qmf32_pre_twiddle[k])) + MUL_F(QMF_RE(X[l][k]), IM(qmf32_pre_twiddle[k]));

 #ifndef FIXED_POINT

             x1[k] *= scale;

@@ -418,7 +454,24 @@

 void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64],

                           real_t *output)

-    ALIGN real_t x1[64], x2[64];

+//    ALIGN real_t x1[64], x2[64];

+#ifndef SBR_LOW_POWER

+    ALIGN real_t in_real1[32], in_imag1[32], out_real1[32], out_imag1[32];

+    ALIGN real_t in_real2[32], in_imag2[32], out_real2[32], out_imag2[32];

+#endif

+    qmf_t * pX;

+    real_t * pring_buffer_1, * pring_buffer_3;

+//    real_t * ptemp_1, * ptemp_2;

+#ifdef PREFER_POINTERS

+    // These pointers are used if target platform has autoinc address generators

+    real_t * pring_buffer_2, * pring_buffer_4;

+    real_t * pring_buffer_5, * pring_buffer_6;

+    real_t * pring_buffer_7, * pring_buffer_8;

+    real_t * pring_buffer_9, * pring_buffer_10;

+    const real_t * pqmf_c_1, * pqmf_c_2, * pqmf_c_3, * pqmf_c_4;

+    const real_t * pqmf_c_5, * pqmf_c_6, * pqmf_c_7, * pqmf_c_8;

+    const real_t * pqmf_c_9, * pqmf_c_10;

+#endif // #ifdef PREFER_POINTERS

 #ifndef FIXED_POINT

     real_t scale = 1.f/64.f;

 #endif

@@ -435,57 +488,138 @@

         /* calculate 128 samples */

 #ifndef FIXED_POINT

-        x1[0] = scale*QMF_RE(X[l][0]);

-        x2[63] = scale*QMF_IM(X[l][0]);

-        for (k = 0; k < 31; k++)

-        {

-            x1[2*k+1] = scale*(QMF_RE(X[l][2*k+1]) - QMF_RE(X[l][2*k+2]));

-            x1[2*k+2] = scale*(QMF_RE(X[l][2*k+1]) + QMF_RE(X[l][2*k+2]));

-            x2[61 - 2*k] = scale*(QMF_IM(X[l][2*k+2]) - QMF_IM(X[l][2*k+1]));

-            x2[62 - 2*k] = scale*(QMF_IM(X[l][2*k+2]) + QMF_IM(X[l][2*k+1]));

+        pX = X[l];

+        in_imag1[31] = scale*QMF_RE(pX[1]);

+        in_real1[0]  = scale*QMF_RE(pX[0]);

+        in_imag2[31] = scale*QMF_IM(pX[63-1]);

+        in_real2[0]  = scale*QMF_IM(pX[63-0]);

+        for (k = 1; k < 31; k++)

+        {

+            in_imag1[31 - k] = scale*QMF_RE(pX[2*k + 1]);

+            in_real1[     k] = scale*QMF_RE(pX[2*k    ]);

+            in_imag2[31 - k] = scale*QMF_IM(pX[63 - (2*k + 1)]);

+            in_real2[     k] = scale*QMF_IM(pX[63 - (2*k    )]);

-        x1[63] = scale*QMF_RE(X[l][63]);

-        x2[0] = scale*QMF_IM(X[l][63]);

+        in_imag1[0]  = scale*QMF_RE(pX[63]);

+        in_real1[31] = scale*QMF_RE(pX[62]);

+        in_imag2[0]  = scale*QMF_IM(pX[63-63]);

+        in_real2[31] = scale*QMF_IM(pX[63-62]);

 #else

-        x1[0] = QMF_RE(X[l][0])>>1;

-        x2[63] = QMF_IM(X[l][0])>>1;

-        for (k = 0; k < 31; k++)

-        {

-            x1[2*k+1] = (QMF_RE(X[l][2*k+1]) - QMF_RE(X[l][2*k+2]))>>1;

-            x1[2*k+2] = (QMF_RE(X[l][2*k+1]) + QMF_RE(X[l][2*k+2]))>>1;

-            x2[61 - 2*k] = (QMF_IM(X[l][2*k+2]) - QMF_IM(X[l][2*k+1]))>>1;

-            x2[62 - 2*k] = (QMF_IM(X[l][2*k+2]) + QMF_IM(X[l][2*k+1]))>>1;

+        pX = X[l];

+        in_imag1[31] = QMF_RE(pX[1]) >> 1;

+        in_real1[0]  = QMF_RE(pX[0]) >> 1;

+        in_imag2[31] = QMF_IM(pX[62]) >> 1;

+        in_real2[0]  = QMF_IM(pX[63]) >> 1;

+        for (k = 1; k < 31; k++)

+        {

+            in_imag1[31 - k] = QMF_RE(pX[2*k + 1]) >> 1;

+            in_real1[     k] = QMF_RE(pX[2*k    ]) >> 1;

+            in_imag2[31 - k] = QMF_IM(pX[63 - (2*k + 1)]) >> 1;

+            in_real2[     k] = QMF_IM(pX[63 - (2*k    )]) >> 1;

-        x1[63] = QMF_RE(X[l][63])>>1;

-        x2[0] = QMF_IM(X[l][63])>>1;

+        in_imag1[0]  = QMF_RE(pX[63]) >> 1;

+        in_real1[31] = QMF_RE(pX[62]) >> 1;

+        in_imag2[0]  = QMF_IM(pX[0]) >> 1;

+        in_real2[31] = QMF_IM(pX[1]) >> 1;

 #endif

-        DCT4_64_kernel(x1, x1);

-        DCT4_64_kernel(x2, x2);

+        // dct4_kernel is DCT_IV without reordering which is done before and after FFT

+        dct4_kernel(in_real1, in_imag1, out_real1, out_imag1);

+        dct4_kernel(in_real2, in_imag2, out_real2, out_imag2);

+        pring_buffer_1 = qmfs->v + qmfs->v_index;

+        pring_buffer_3 = pring_buffer_1 + 1280;

+#ifdef PREFER_POINTERS

+        pring_buffer_2 = pring_buffer_1 + 127;

+        pring_buffer_4 = pring_buffer_1 + (1280 + 127);

+#endif // #ifdef PREFER_POINTERS

+//        ptemp_1 = x1;

+//        ptemp_2 = x2;

+#ifdef PREFER_POINTERS

+        for (n = 0; n < 32; n ++)

+        {

+            //real_t x1 = *ptemp_1++;

+            //real_t x2 = *ptemp_2++;

+            // pring_buffer_3 and pring_buffer_4 are needed only for double ring buffer

+            *pring_buffer_1++ = *pring_buffer_3++ = out_real2[n] - out_real1[n];

+            *pring_buffer_2-- = *pring_buffer_4-- = out_real2[n] + out_real1[n];

+            //x1 = *ptemp_1++;

+            //x2 = *ptemp_2++;

+            *pring_buffer_1++ = *pring_buffer_3++ = out_imag2[31-n] + out_imag1[31-n];

+            *pring_buffer_2-- = *pring_buffer_4-- = out_imag2[31-n] - out_imag1[31-n];

+        }

+#else // #ifdef PREFER_POINTERS

         for (n = 0; n < 32; n++)

-            qmfs->v[qmfs->v_index + 2*n]       = qmfs->v[qmfs->v_index + 1280 + 2*n]       =  x2[2*n]   - x1[2*n];

-            qmfs->v[qmfs->v_index + 127 - 2*n] = qmfs->v[qmfs->v_index + 1280 + 127 - 2*n] =  x2[2*n]   + x1[2*n];

-            qmfs->v[qmfs->v_index + 2*n+1]     = qmfs->v[qmfs->v_index + 1280 + 2*n+1]     = -x2[2*n+1] - x1[2*n+1];

-            qmfs->v[qmfs->v_index + 126 - 2*n] = qmfs->v[qmfs->v_index + 1280 + 126 - 2*n] = -x2[2*n+1] + x1[2*n+1];

+            // pring_buffer_3 and pring_buffer_4 are needed only for double ring buffer

+            pring_buffer_1[2*n]         = pring_buffer_3[2*n]         = out_real2[n] - out_real1[n];

+            pring_buffer_1[127-2*n]     = pring_buffer_3[127-2*n]     = out_real2[n] + out_real1[n];

+            pring_buffer_1[2*n+1]       = pring_buffer_3[2*n+1]       = out_imag2[31-n] + out_imag1[31-n];

+            pring_buffer_1[127-(2*n+1)] = pring_buffer_3[127-(2*n+1)] = out_imag2[31-n] - out_imag1[31-n];

+#endif // #ifdef PREFER_POINTERS

+        pring_buffer_1 = qmfs->v + qmfs->v_index;

+#ifdef PREFER_POINTERS

+        pring_buffer_2 = pring_buffer_1 + 192;

+        pring_buffer_3 = pring_buffer_1 + 256;

+        pring_buffer_4 = pring_buffer_1 + (256 + 192);

+        pring_buffer_5 = pring_buffer_1 + 512;

+        pring_buffer_6 = pring_buffer_1 + (512 + 192);

+        pring_buffer_7 = pring_buffer_1 + 768;

+        pring_buffer_8 = pring_buffer_1 + (768 + 192);

+        pring_buffer_9 = pring_buffer_1 + 1024;

+        pring_buffer_10 = pring_buffer_1 + (1024 + 192);

+        pqmf_c_1 = qmf_c;

+        pqmf_c_2 = qmf_c + 64;

+        pqmf_c_3 = qmf_c + 128;

+        pqmf_c_4 = qmf_c + 192;

+        pqmf_c_5 = qmf_c + 256;

+        pqmf_c_6 = qmf_c + 320;

+        pqmf_c_7 = qmf_c + 384;

+        pqmf_c_8 = qmf_c + 448;

+        pqmf_c_9 = qmf_c + 512;

+        pqmf_c_10 = qmf_c + 576;

+#endif // #ifdef PREFER_POINTERS

         /* calculate 64 output samples and window */

         for (k = 0; k < 64; k++)

-            output[out++] = MUL_F(qmfs->v[qmfs->v_index + k], qmf_c[k]) +

-                MUL_F(qmfs->v[qmfs->v_index + 192 + k], qmf_c[64 + k]) +

-                MUL_F(qmfs->v[qmfs->v_index + 256 + k], qmf_c[128 + k]) +

-                MUL_F(qmfs->v[qmfs->v_index + 256 + 192 + k], qmf_c[128 + 64 + k]) +

-                MUL_F(qmfs->v[qmfs->v_index + 512 + k], qmf_c[256 + k]) +

-                MUL_F(qmfs->v[qmfs->v_index + 512 + 192 + k], qmf_c[256 + 64 + k]) +

-                MUL_F(qmfs->v[qmfs->v_index + 768 + k], qmf_c[384 + k]) +

-                MUL_F(qmfs->v[qmfs->v_index + 768 + 192 + k], qmf_c[384 + 64 + k]) +

-                MUL_F(qmfs->v[qmfs->v_index + 1024 + k], qmf_c[512 + k]) +

-                MUL_F(qmfs->v[qmfs->v_index + 1024 + 192 + k], qmf_c[512 + 64 + k]);

+#ifdef PREFER_POINTERS

+            output[out++] =

+                MUL_F(*pring_buffer_1++,  *pqmf_c_1++) +

+                MUL_F(*pring_buffer_2++,  *pqmf_c_2++) +

+                MUL_F(*pring_buffer_3++,  *pqmf_c_3++) +

+                MUL_F(*pring_buffer_4++,  *pqmf_c_4++) +

+                MUL_F(*pring_buffer_5++,  *pqmf_c_5++) +

+                MUL_F(*pring_buffer_6++,  *pqmf_c_6++) +

+                MUL_F(*pring_buffer_7++,  *pqmf_c_7++) +

+                MUL_F(*pring_buffer_8++,  *pqmf_c_8++) +

+                MUL_F(*pring_buffer_9++,  *pqmf_c_9++) +

+                MUL_F(*pring_buffer_10++, *pqmf_c_10++);

+#else // #ifdef PREFER_POINTERS

+            output[out++] =

+                MUL_F(pring_buffer_1[k+0],          qmf_c[k+0])   +

+                MUL_F(pring_buffer_1[k+192],        qmf_c[k+64])  +

+                MUL_F(pring_buffer_1[k+256],        qmf_c[k+128]) +

+                MUL_F(pring_buffer_1[k+(256+192)],  qmf_c[k+192]) +

+                MUL_F(pring_buffer_1[k+512],        qmf_c[k+256]) +

+                MUL_F(pring_buffer_1[k+(512+192)],  qmf_c[k+320]) +

+                MUL_F(pring_buffer_1[k+768],        qmf_c[k+384]) +

+                MUL_F(pring_buffer_1[k+(768+192)],  qmf_c[k+448]) +

+                MUL_F(pring_buffer_1[k+1024],       qmf_c[k+512]) +

+                MUL_F(pring_buffer_1[k+(1024+192)], qmf_c[k+576]);

+#endif // #ifdef PREFER_POINTERS

         /* update ringbuffer index */

--- a/libfaad/specrec.c

+++ b/libfaad/specrec.c

@@ -22,7 +22,7 @@

 ** Commercial non-GPL licensing of this software is possible.

 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.

**

-** $Id: specrec.c,v 1.50 2004/05/17 10:18:03 menno Exp $

+** $Id: specrec.c,v 1.51 2004/06/30 12:45:57 menno Exp $

**/

/*

@@ -436,7 +436,14 @@

     if (q < IQ_TABLE_SIZE)

+    {

+//#define IQUANT_PRINT

+#ifdef IQUANT_PRINT

+        //printf("0x%.8X\n", sgn * tab[q]);

+        printf("%d\n", sgn * tab[q]);

+#endif

         return sgn * tab[q];

+    }

 #ifndef BIG_IQ_TABLE

     if (q >= 8192)

@@ -617,6 +624,18 @@

                     spec_data[gindex+(win*win_inc)+j+bin+1] = MUL_C(spec_data[gindex+(win*win_inc)+j+bin+1],pow2_table[frac]);

                     spec_data[gindex+(win*win_inc)+j+bin+2] = MUL_C(spec_data[gindex+(win*win_inc)+j+bin+2],pow2_table[frac]);

                     spec_data[gindex+(win*win_inc)+j+bin+3] = MUL_C(spec_data[gindex+(win*win_inc)+j+bin+3],pow2_table[frac]);

+//#define SCFS_PRINT

+#ifdef SCFS_PRINT

+                    //printf("%d\n", spec_data[gindex+(win*win_inc)+j+bin+0]);

+                    //printf("%d\n", spec_data[gindex+(win*win_inc)+j+bin+1]);

+                    //printf("%d\n", spec_data[gindex+(win*win_inc)+j+bin+2]);

+                    //printf("%d\n", spec_data[gindex+(win*win_inc)+j+bin+3]);

+                    printf("0x%.8X\n", spec_data[gindex+(win*win_inc)+j+bin+0]);

+                    printf("0x%.8X\n", spec_data[gindex+(win*win_inc)+j+bin+1]);

+                    printf("0x%.8X\n", spec_data[gindex+(win*win_inc)+j+bin+2]);

+                    printf("0x%.8X\n", spec_data[gindex+(win*win_inc)+j+bin+3]);

+#endif

 #endif

                     gincrease += 4;

--- a/libfaad/syntax.c

+++ b/libfaad/syntax.c

@@ -22,7 +22,7 @@

 ** Commercial non-GPL licensing of this software is possible.

 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.

**

-** $Id: syntax.c,v 1.79 2004/05/17 10:18:03 menno Exp $

+** $Id: syntax.c,v 1.80 2004/06/30 12:45:57 menno Exp $

**/

/*

@@ -1979,10 +1979,17 @@

 #endif

+//#define SFBO_PRINT

+#ifdef SFBO_PRINT

+                printf("%d\n", ics->sect_sfb_offset[g][ics->sect_start[g][i]]);

+#endif

                 p += (ics->sect_sfb_offset[g][ics->sect_end[g][i]] -

                     ics->sect_sfb_offset[g][ics->sect_start[g][i]]);

                 break;

             default:

+#ifdef SFBO_PRINT

+                printf("%d\n", ics->sect_sfb_offset[g][ics->sect_start[g][i]]);

+#endif

                 for (k = ics->sect_sfb_offset[g][ics->sect_start[g][i]];

                      k < ics->sect_sfb_offset[g][ics->sect_end[g][i]]; k += inc)

--- a/libfaad/tns.c

+++ b/libfaad/tns.c

@@ -22,7 +22,7 @@

 ** Commercial non-GPL licensing of this software is possible.

 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.

**

-** $Id: tns.c,v 1.32 2004/04/12 18:17:42 menno Exp $

+** $Id: tns.c,v 1.33 2004/06/30 12:45:57 menno Exp $

**/

 #include "common.h"

@@ -259,6 +259,12 @@

         *spectrum = y;

         spectrum += inc;

+//#define TNS_PRINT

+#ifdef TNS_PRINT

+        //printf("%d\n", y);

+        printf("0x%.8X\n", y);

+#endif

--

⑨