shithub: aacenc

Download patch

ref: 30c4210d3fdf1bb1227dd8cddcc1ff3d947f8f41
parent: 562ee6e14b265790070b040d2d436011cd1c590e
author: lenox <lenox>
date: Thu Jan 6 05:39:33 EST 2000

new restructured mdct and fft

--- a/fastfft.c
+++ b/fastfft.c
@@ -1,4 +1,3 @@
-#include "transfo.h"
 #include "fastfft.h"
 
 int unscambled64[64];
@@ -5,15 +4,16 @@
 int unscambled256[256];
 int unscambled512[512];
 int unscambled2048[2048];
+fftw_complex FFTarray[2048];    /* the array for in-place FFT */
 
-static fftw_real K831469612[1] =
-{FFTW_KONST(+0.831469612302545237078788377617905756738560812)};
-static fftw_real K555570233[1] =
-{FFTW_KONST(+0.555570233019602224742830813948532874374937191)};
-static fftw_real K195090322[1] =
-{FFTW_KONST(+0.195090322016128267848284868477022240927691618)};
 static fftw_real K980785280[1] =
 {FFTW_KONST(+0.980785280403230449126182236134239036973933731)};
+static fftw_real K195090322[1] =
+{FFTW_KONST(+0.195090322016128267848284868477022240927691618)};
+static fftw_real K555570233[1] =
+{FFTW_KONST(+0.555570233019602224742830813948532874374937191)};
+static fftw_real K831469612[1] =
+{FFTW_KONST(+0.831469612302545237078788377617905756738560812)};
 static fftw_real K923879532[1] =
 {FFTW_KONST(+0.923879532511286756128183189396788286822416626)};
 static fftw_real K382683432[1] =
@@ -1529,7 +1529,9 @@
 { 0.00306795676296614, 0.999995293809576 },
 { -0.999981175282601, 0.0061358846491548 },
 };
+///////////////////////////////////////////////////////////////
 
+
 void PFFTW(16) (fftw_complex * input) {
      fftw_real tmp332;
      fftw_real tmp331;
@@ -2665,6 +2667,1145 @@
      PFFTW(512)(input + 1536);
 }
 
+///////////////////////////////////////////////////////////////
+void PFFTWI(16) (fftw_complex * input) {
+     fftw_real tmp333;
+     fftw_real tmp332;
+     fftw_real tmp331;
+     fftw_real tmp330;
+     fftw_real tmp329;
+     fftw_real tmp328;
+     fftw_real tmp327;
+     fftw_real tmp326;
+     fftw_real tmp325;
+     fftw_real tmp324;
+     fftw_real tmp323;
+     fftw_real tmp322;
+     fftw_real tmp321;
+     fftw_real tmp320;
+     fftw_real tmp319;
+     fftw_real tmp318;
+     fftw_real tmp317;
+     fftw_real tmp316;
+     fftw_real tmp315;
+     fftw_real tmp314;
+     fftw_real tmp313;
+     fftw_real tmp312;
+     fftw_real tmp311;
+     fftw_real tmp310;
+     fftw_real tmp309;
+     fftw_real tmp308;
+     fftw_real tmp307;
+     fftw_real tmp306;
+     fftw_real tmp305;
+     fftw_real tmp304;
+     fftw_real tmp303;
+     fftw_real tmp302;
+     fftw_real tmp301;
+     fftw_real st1;
+     fftw_real st2;
+     fftw_real st3;
+     fftw_real st4;
+     fftw_real st5;
+     fftw_real st6;
+     fftw_real st7;
+     fftw_real st8;
+     st8 = c_re(input[4]);
+     st8 = st8 - c_re(input[12]);
+     st7 = c_im(input[0]);
+     st7 = st7 - c_im(input[8]);
+     st6 = c_re(input[0]);
+     st6 = st6 - c_re(input[8]);
+     st5 = st8 + st7;
+     st7 = st7 - st8;
+     st4 = c_im(input[4]);
+     st4 = st4 - c_im(input[12]);
+     st3 = c_re(input[0]);
+     st3 = st3 + c_re(input[8]);
+     st2 = st6 - st4;
+     st6 = st6 + st4;
+     st1 = c_re(input[4]);
+     st1 = st1 + c_re(input[12]);
+     st8 = c_im(input[0]);
+     st8 = st8 + c_im(input[8]);
+     st4 = st3 + st1;
+     st3 = st3 - st1;
+     st1 = c_im(input[4]);
+     st1 = st1 + c_im(input[12]);
+     tmp301 = st6;
+     st6 = c_re(input[2]);
+     st6 = st6 + c_re(input[10]);
+     tmp302 = st7;
+     st7 = st8 + st1;
+     st8 = st8 - st1;
+     st1 = c_re(input[6]);
+     st1 = st1 + c_re(input[14]);
+     tmp303 = st2;
+     st2 = c_im(input[2]);
+     st2 = st2 + c_im(input[10]);
+     tmp304 = st5;
+     st5 = st6 + st1;
+     st6 = st6 - st1;
+     st1 = c_im(input[6]);
+     st1 = st1 + c_im(input[14]);
+     tmp305 = st3;
+     st3 = st4 + st5;
+     st4 = st4 - st5;
+     st5 = st2 + st1;
+     st2 = st2 - st1;
+     st1 = st6 + st8;
+     tmp306 = st1;
+     st1 = st7 - st5;
+     st7 = st7 + st5;
+     st5 = tmp305 - st2;
+     st2 = tmp305 + st2;
+     st8 = st8 - st6;
+     st6 = c_re(input[6]);
+     st6 = st6 - c_re(input[14]);
+     tmp307 = st8;
+     st8 = c_im(input[2]);
+     st8 = st8 - c_im(input[10]);
+     tmp308 = st2;
+     st2 = c_re(input[2]);
+     st2 = st2 - c_re(input[10]);
+     tmp309 = st4;
+     st4 = st6 + st8;
+     st8 = st8 - st6;
+     st6 = c_im(input[6]);
+     st6 = st6 - c_im(input[14]);
+     tmp310 = st5;
+     st5 = c_re(input[1]);
+     st5 = st5 - c_re(input[9]);
+     tmp311 = st7;
+     st7 = st2 - st6;
+     st2 = st2 + st6;
+     st6 = c_im(input[5]);
+     tmp312 = st1;
+     st1 = st4 + st7;
+     st7 = st7 - st4;
+     st4 = st2 - st8;
+     st1 = st1 * K707106781[0];
+     st8 = st8 + st2;
+     st7 = st7 * K707106781[0];
+     st6 = st6 - c_im(input[13]);
+     st4 = st4 * K707106781[0];
+     st2 = tmp304 - st1;
+     st8 = st8 * K707106781[0];
+     tmp313 = st3;
+     st3 = st5 - st6;
+     st5 = st5 + st6;
+     st6 = tmp303 + st7;
+     tmp314 = st6;
+     st6 = st3 * K923879532[0];
+     st7 = tmp303 - st7;
+     st3 = st3 * K382683432[0];
+     st1 = tmp304 + st1;
+     tmp315 = st1;
+     st1 = st5 * K382683432[0];
+     tmp316 = st7;
+     st7 = tmp302 - st4;
+     st5 = st5 * K923879532[0];
+     st4 = tmp302 + st4;
+     tmp317 = st4;
+     st4 = tmp301 - st8;
+     st8 = tmp301 + st8;
+     tmp318 = st8;
+     st8 = c_re(input[5]);
+     st8 = st8 - c_re(input[13]);
+     tmp319 = st4;
+     st4 = c_im(input[1]);
+     st4 = st4 - c_im(input[9]);
+     tmp320 = st7;
+     st7 = c_re(input[3]);
+     st7 = st7 - c_re(input[11]);
+     tmp321 = st2;
+     st2 = st8 + st4;
+     st4 = st4 - st8;
+     st8 = c_im(input[7]);
+     tmp322 = st5;
+     st5 = st2 * K382683432[0];
+     st8 = st8 - c_im(input[15]);
+     st2 = st2 * K923879532[0];
+     st6 = st6 - st5;
+     st5 = st4 * K923879532[0];
+     st2 = st2 + st3;
+     st4 = st4 * K382683432[0];
+     st1 = st1 - st5;
+     st3 = st7 - st8;
+     st4 = st4 + tmp322;
+     st7 = st7 + st8;
+     st8 = st3 * K382683432[0];
+     st5 = c_re(input[7]);
+     st3 = st3 * K923879532[0];
+     st5 = st5 - c_re(input[15]);
+     tmp323 = st4;
+     st4 = st7 * K923879532[0];
+     tmp324 = st1;
+     st1 = c_im(input[3]);
+     st7 = st7 * K382683432[0];
+     st1 = st1 - c_im(input[11]);
+     tmp325 = st2;
+     st2 = c_re(input[1]);
+     st2 = st2 + c_re(input[9]);
+     tmp326 = st6;
+     st6 = st5 + st1;
+     st1 = st1 - st5;
+     st5 = c_re(input[5]);
+     tmp327 = st7;
+     st7 = st6 * K923879532[0];
+     st5 = st5 + c_re(input[13]);
+     st6 = st6 * K382683432[0];
+     st8 = st8 - st7;
+     st7 = st1 * K382683432[0];
+     st6 = st6 + st3;
+     st1 = st1 * K923879532[0];
+     st7 = st7 - st4;
+     st3 = st2 + st5;
+     st1 = st1 + tmp327;
+     st2 = st2 - st5;
+     st4 = tmp326 - st8;
+     st8 = tmp326 + st8;
+     st5 = tmp325 - st6;
+     tmp328 = st2;
+     st2 = tmp321 - st4;
+     st4 = tmp321 + st4;
+     tmp329 = st3;
+     st3 = tmp314 - st8;
+     st8 = tmp314 + st8;
+     tmp330 = st2;
+     st2 = tmp316 - st5;
+     st5 = tmp316 + st5;
+     c_re(input[9]) = st3;
+     c_re(input[1]) = st8;
+     c_re(input[5]) = st2;
+     c_re(input[13]) = st5;
+     st6 = tmp325 + st6;
+     st3 = tmp324 - st7;
+     st8 = tmp323 - st1;
+     st2 = tmp315 - st6;
+     st6 = tmp315 + st6;
+     st5 = tmp320 - st3;
+     st3 = tmp320 + st3;
+     tmp331 = st5;
+     st5 = tmp317 - st8;
+     st8 = tmp317 + st8;
+     st7 = tmp324 + st7;
+     st1 = tmp323 + st1;
+     tmp332 = st3;
+     st3 = c_im(input[1]);
+     c_im(input[1]) = st6;
+     st3 = st3 + c_im(input[9]);
+     c_im(input[9]) = st2;
+     st2 = tmp319 - st7;
+     st7 = tmp319 + st7;
+     st6 = tmp318 - st1;
+     st1 = tmp318 + st1;
+     tmp333 = st5;
+     st5 = c_im(input[5]);
+     c_im(input[5]) = st4;
+     st5 = st5 + c_im(input[13]);
+     c_im(input[13]) = tmp330;
+     st4 = st3 - st5;
+     st3 = st3 + st5;
+     st5 = c_re(input[3]);
+     c_re(input[3]) = st7;
+     st5 = st5 + c_re(input[11]);
+     c_re(input[11]) = st2;
+     st2 = c_re(input[7]);
+     c_re(input[7]) = st6;
+     st2 = st2 + c_re(input[15]);
+     c_re(input[15]) = st1;
+     st7 = st5 + st2;
+     st5 = st5 - st2;
+     st6 = c_im(input[3]);
+     c_im(input[3]) = st8;
+     st6 = st6 + c_im(input[11]);
+     c_im(input[11]) = tmp333;
+     st8 = tmp329 + st7;
+     st7 = tmp329 - st7;
+     st1 = st5 + st4;
+     st4 = st4 - st5;
+     st2 = tmp313 - st8;
+     st8 = tmp313 + st8;
+     st5 = st7 + tmp312;
+     st7 = tmp312 - st7;
+     c_re(input[8]) = st2;
+     c_re(input[0]) = st8;
+     c_im(input[4]) = st5;
+     c_im(input[12]) = st7;
+     st2 = c_im(input[7]);
+     c_im(input[7]) = tmp332;
+     st2 = st2 + c_im(input[15]);
+     c_im(input[15]) = tmp331;
+     st8 = st6 - st2;
+     st6 = st6 + st2;
+     st5 = tmp328 - st8;
+     st8 = tmp328 + st8;
+     st7 = st3 - st6;
+     st2 = st5 - st1;
+     st1 = st1 + st5;
+     st3 = st3 + st6;
+     st2 = st2 * K707106781[0];
+     st6 = st4 + st8;
+     st1 = st1 * K707106781[0];
+     st8 = st8 - st4;
+     st6 = st6 * K707106781[0];
+     st4 = tmp311 + st3;
+     st8 = st8 * K707106781[0];
+     st3 = tmp311 - st3;
+     st5 = tmp310 - st2;
+     c_im(input[0]) = st4;
+     c_im(input[8]) = st3;
+     c_re(input[10]) = st5;
+     st2 = tmp310 + st2;
+     st4 = tmp309 + st7;
+     st7 = tmp309 - st7;
+     st3 = tmp308 - st6;
+     c_re(input[2]) = st2;
+     c_re(input[12]) = st4;
+     c_re(input[4]) = st7;
+     c_re(input[6]) = st3;
+     st6 = tmp308 + st6;
+     st5 = tmp307 - st8;
+     st8 = tmp307 + st8;
+     st2 = tmp306 - st1;
+     c_re(input[14]) = st6;
+     c_im(input[14]) = st5;
+     c_im(input[6]) = st8;
+     c_im(input[10]) = st2;
+     st1 = tmp306 + st1;
+     c_im(input[2]) = st1;
+}
+
+void PFFTWI(32) (fftw_complex * input) {
+     fftw_real tmp714;
+     fftw_real tmp713;
+     fftw_real tmp712;
+     fftw_real tmp711;
+     fftw_real tmp710;
+     fftw_real tmp709;
+     fftw_real tmp708;
+     fftw_real tmp707;
+     fftw_real tmp706;
+     fftw_real tmp705;
+     fftw_real tmp704;
+     fftw_real tmp703;
+     fftw_real tmp702;
+     fftw_real tmp701;
+     fftw_real tmp700;
+     fftw_real tmp699;
+     fftw_real tmp698;
+     fftw_real tmp697;
+     fftw_real tmp696;
+     fftw_real tmp695;
+     fftw_real tmp694;
+     fftw_real tmp693;
+     fftw_real tmp692;
+     fftw_real tmp691;
+     fftw_real tmp690;
+     fftw_real tmp689;
+     fftw_real tmp688;
+     fftw_real tmp687;
+     fftw_real tmp686;
+     fftw_real tmp685;
+     fftw_real tmp684;
+     fftw_real tmp683;
+     fftw_real tmp682;
+     fftw_real tmp681;
+     fftw_real tmp680;
+     fftw_real tmp679;
+     fftw_real tmp678;
+     fftw_real tmp677;
+     fftw_real tmp676;
+     fftw_real tmp675;
+     fftw_real tmp674;
+     fftw_real tmp673;
+     fftw_real tmp672;
+     fftw_real tmp671;
+     fftw_real tmp670;
+     fftw_real tmp669;
+     fftw_real tmp668;
+     fftw_real tmp667;
+     fftw_real tmp666;
+     fftw_real tmp665;
+     fftw_real tmp664;
+     fftw_real tmp663;
+     fftw_real tmp662;
+     fftw_real tmp661;
+     fftw_real tmp660;
+     fftw_real tmp659;
+     fftw_real tmp658;
+     fftw_real tmp657;
+     fftw_real tmp656;
+     fftw_real tmp655;
+     fftw_real tmp654;
+     fftw_real tmp653;
+     fftw_real tmp652;
+     fftw_real tmp651;
+     fftw_real tmp650;
+     fftw_real tmp649;
+     fftw_real tmp648;
+     fftw_real tmp647;
+     fftw_real tmp646;
+     fftw_real tmp645;
+     fftw_real tmp644;
+     fftw_real tmp643;
+     fftw_real tmp642;
+     fftw_real tmp641;
+     fftw_real tmp640;
+     fftw_real tmp639;
+     fftw_real tmp638;
+     fftw_real tmp637;
+     fftw_real tmp636;
+     fftw_real tmp635;
+     fftw_real tmp634;
+     fftw_real tmp633;
+     fftw_real tmp632;
+     fftw_real tmp631;
+     fftw_real tmp630;
+     fftw_real tmp629;
+     fftw_real tmp628;
+     fftw_real tmp627;
+     fftw_real tmp626;
+     fftw_real tmp625;
+     fftw_real tmp624;
+     fftw_real tmp623;
+     fftw_real tmp622;
+     fftw_real tmp621;
+     fftw_real st1;
+     fftw_real st2;
+     fftw_real st3;
+     fftw_real st4;
+     fftw_real st5;
+     fftw_real st6;
+     fftw_real st7;
+     fftw_real st8;
+     st8 = c_re(input[8]);
+     st8 = st8 - c_re(input[24]);
+     st7 = c_im(input[0]);
+     st7 = st7 - c_im(input[16]);
+     st6 = st8 + st7;
+     st7 = st7 - st8;
+     st5 = c_re(input[0]);
+     st5 = st5 + c_re(input[16]);
+     st4 = c_re(input[8]);
+     st4 = st4 + c_re(input[24]);
+     st3 = st5 + st4;
+     st5 = st5 - st4;
+     st2 = c_im(input[0]);
+     st2 = st2 + c_im(input[16]);
+     st1 = c_im(input[8]);
+     st1 = st1 + c_im(input[24]);
+     st8 = st2 + st1;
+     st2 = st2 - st1;
+     st4 = c_re(input[4]);
+     st4 = st4 + c_re(input[20]);
+     st1 = c_re(input[28]);
+     st1 = st1 + c_re(input[12]);
+     tmp621 = st6;
+     st6 = st4 + st1;
+     st4 = st4 - st1;
+     st1 = st3 + st6;
+     st3 = st3 - st6;
+     st6 = st2 - st4;
+     st4 = st4 + st2;
+     st2 = c_im(input[4]);
+     st2 = st2 + c_im(input[20]);
+     tmp622 = st4;
+     st4 = c_im(input[28]);
+     st4 = st4 + c_im(input[12]);
+     tmp623 = st6;
+     st6 = st2 + st4;
+     st4 = st4 - st2;
+     st2 = st8 + st6;
+     st8 = st8 - st6;
+     st6 = st5 - st4;
+     st5 = st5 + st4;
+     st4 = c_re(input[0]);
+     st4 = st4 - c_re(input[16]);
+     tmp624 = st5;
+     st5 = c_im(input[8]);
+     st5 = st5 - c_im(input[24]);
+     tmp625 = st6;
+     st6 = st4 - st5;
+     st4 = st4 + st5;
+     st5 = c_re(input[4]);
+     st5 = st5 - c_re(input[20]);
+     tmp626 = st3;
+     st3 = c_im(input[4]);
+     st3 = st3 - c_im(input[20]);
+     tmp627 = st2;
+     st2 = st5 + st3;
+     st5 = st5 - st3;
+     st3 = c_im(input[28]);
+     st3 = st3 - c_im(input[12]);
+     tmp628 = st8;
+     st8 = c_re(input[28]);
+     st8 = st8 - c_re(input[12]);
+     tmp629 = st1;
+     st1 = st3 - st8;
+     st8 = st8 + st3;
+     st3 = st2 + st1;
+     st3 = st3 * K707106781[0];
+     st1 = st1 - st2;
+     st1 = st1 * K707106781[0];
+     st2 = st5 + st8;
+     st2 = st2 * K707106781[0];
+     st5 = st5 - st8;
+     st5 = st5 * K707106781[0];
+     st8 = st7 - st5;
+     tmp630 = st8;
+     st8 = st4 - st1;
+     tmp631 = st8;
+     st8 = tmp621 - st3;
+     tmp632 = st8;
+     st8 = st6 - st2;
+     st3 = tmp621 + st3;
+     st6 = st6 + st2;
+     st7 = st7 + st5;
+     st4 = st4 + st1;
+     st1 = c_re(input[2]);
+     st1 = st1 + c_re(input[18]);
+     st2 = c_re(input[10]);
+     st2 = st2 + c_re(input[26]);
+     st5 = st1 + st2;
+     st1 = st1 - st2;
+     st2 = c_im(input[2]);
+     st2 = st2 + c_im(input[18]);
+     tmp633 = st4;
+     st4 = c_im(input[10]);
+     st4 = st4 + c_im(input[26]);
+     tmp634 = st7;
+     st7 = st2 + st4;
+     st2 = st2 - st4;
+     st4 = st1 + st2;
+     st1 = st1 - st2;
+     st2 = c_re(input[30]);
+     st2 = st2 + c_re(input[14]);
+     tmp635 = st6;
+     st6 = c_re(input[6]);
+     st6 = st6 + c_re(input[22]);
+     tmp636 = st3;
+     st3 = st2 + st6;
+     st2 = st2 - st6;
+     st6 = st5 + st3;
+     st5 = st5 - st3;
+     st3 = tmp629 + st6;
+     st6 = tmp629 - st6;
+     tmp637 = st6;
+     st6 = tmp628 - st5;
+     st5 = st5 + tmp628;
+     tmp638 = st5;
+     st5 = c_im(input[30]);
+     st5 = st5 + c_im(input[14]);
+     tmp639 = st6;
+     st6 = c_im(input[6]);
+     st6 = st6 + c_im(input[22]);
+     tmp640 = st3;
+     st3 = st5 + st6;
+     st5 = st5 - st6;
+     st6 = st7 + st3;
+     st3 = st3 - st7;
+     st7 = st5 - st2;
+     tmp641 = st8;
+     st8 = st7 - st4;
+     st8 = st8 * K707106781[0];
+     st4 = st4 + st7;
+     st4 = st4 * K707106781[0];
+     st2 = st2 + st5;
+     st5 = st1 - st2;
+     st5 = st5 * K707106781[0];
+     st1 = st1 + st2;
+     st1 = st1 * K707106781[0];
+     st7 = tmp627 - st6;
+     st6 = tmp627 + st6;
+     st2 = tmp626 + st3;
+     st3 = tmp626 - st3;
+     tmp642 = st3;
+     st3 = tmp623 + st5;
+     st5 = tmp623 - st5;
+     tmp643 = st5;
+     st5 = tmp625 - st8;
+     st8 = tmp625 + st8;
+     tmp644 = st8;
+     st8 = tmp622 + st4;
+     st4 = tmp622 - st4;
+     tmp645 = st4;
+     st4 = tmp624 - st1;
+     st1 = tmp624 + st1;
+     tmp646 = st1;
+     st1 = c_re(input[6]);
+     st1 = st1 - c_re(input[22]);
+     tmp647 = st4;
+     st4 = c_im(input[30]);
+     st4 = st4 - c_im(input[14]);
+     tmp648 = st8;
+     st8 = st1 + st4;
+     tmp649 = st5;
+     st5 = st8 * K382683432[0];
+     st4 = st4 - st1;
+     st8 = st8 * K923879532[0];
+     st1 = c_re(input[30]);
+     tmp650 = st3;
+     st3 = st4 * K923879532[0];
+     st1 = st1 - c_re(input[14]);
+     st4 = st4 * K382683432[0];
+     tmp651 = st2;
+     st2 = c_im(input[6]);
+     st2 = st2 - c_im(input[22]);
+     tmp652 = st6;
+     st6 = st1 - st2;
+     tmp653 = st7;
+     st7 = st6 * K923879532[0];
+     st1 = st1 + st2;
+     st6 = st6 * K382683432[0];
+     st5 = st5 + st7;
+     st2 = st1 * K382683432[0];
+     st8 = st8 - st6;
+     st1 = st1 * K923879532[0];
+     st3 = st3 + st2;
+     st4 = st4 - st1;
+     st7 = c_re(input[2]);
+     st7 = st7 - c_re(input[18]);
+     st6 = c_im(input[10]);
+     st6 = st6 - c_im(input[26]);
+     st2 = st7 - st6;
+     st1 = st2 * K923879532[0];
+     st7 = st7 + st6;
+     st2 = st2 * K382683432[0];
+     st6 = c_re(input[10]);
+     tmp654 = st8;
+     st8 = st7 * K382683432[0];
+     st6 = st6 - c_re(input[26]);
+     st7 = st7 * K923879532[0];
+     tmp655 = st5;
+     st5 = c_im(input[2]);
+     st5 = st5 - c_im(input[18]);
+     tmp656 = st4;
+     st4 = st6 + st5;
+     tmp657 = st3;
+     st3 = st4 * K382683432[0];
+     st5 = st5 - st6;
+     st4 = st4 * K923879532[0];
+     st1 = st1 - st3;
+     st6 = st5 * K923879532[0];
+     st4 = st4 + st2;
+     st5 = st5 * K382683432[0];
+     st8 = st8 - st6;
+     st5 = st5 + st7;
+     st2 = st8 - tmp657;
+     st7 = tmp630 + st2;
+     st2 = tmp630 - st2;
+     st3 = tmp656 - st5;
+     st6 = tmp631 - st3;
+     st3 = tmp631 + st3;
+     tmp658 = st3;
+     st3 = st1 - tmp655;
+     tmp659 = st2;
+     st2 = tmp632 - st3;
+     st3 = tmp632 + st3;
+     tmp660 = st3;
+     st3 = tmp654 - st4;
+     tmp661 = st2;
+     st2 = tmp641 + st3;
+     st3 = tmp641 - st3;
+     st4 = st4 + tmp654;
+     tmp662 = st3;
+     st3 = tmp636 - st4;
+     st4 = tmp636 + st4;
+     st1 = st1 + tmp655;
+     tmp663 = st4;
+     st4 = tmp635 + st1;
+     st1 = tmp635 - st1;
+     st5 = st5 + tmp656;
+     tmp664 = st1;
+     st1 = tmp634 + st5;
+     st5 = tmp634 - st5;
+     st8 = st8 + tmp657;
+     tmp665 = st5;
+     st5 = tmp633 - st8;
+     st8 = tmp633 + st8;
+     tmp666 = st8;
+     st8 = c_re(input[1]);
+     st8 = st8 + c_re(input[17]);
+     tmp667 = st5;
+     st5 = c_re(input[9]);
+     st5 = st5 + c_re(input[25]);
+     tmp668 = st1;
+     st1 = st8 + st5;
+     st8 = st8 - st5;
+     st5 = c_im(input[1]);
+     st5 = st5 + c_im(input[17]);
+     tmp669 = st4;
+     st4 = c_im(input[9]);
+     st4 = st4 + c_im(input[25]);
+     tmp670 = st3;
+     st3 = st5 + st4;
+     st5 = st5 - st4;
+     st4 = c_re(input[5]);
+     st4 = st4 + c_re(input[21]);
+     tmp671 = st2;
+     st2 = c_re(input[29]);
+     st2 = st2 + c_re(input[13]);
+     tmp672 = st6;
+     st6 = st4 + st2;
+     st4 = st4 - st2;
+     st2 = st1 + st6;
+     st1 = st1 - st6;
+     st6 = st4 + st5;
+     tmp673 = st7;
+     st7 = st6 * K923879532[0];
+     st5 = st5 - st4;
+     st6 = st6 * K382683432[0];
+     st4 = c_im(input[5]);
+     tmp674 = st2;
+     st2 = st5 * K382683432[0];
+     st4 = st4 + c_im(input[21]);
+     st5 = st5 * K923879532[0];
+     tmp675 = st7;
+     st7 = c_im(input[29]);
+     st7 = st7 + c_im(input[13]);
+     tmp676 = st5;
+     st5 = st4 + st7;
+     st7 = st7 - st4;
+     st4 = st3 + st5;
+     st3 = st3 - st5;
+     st5 = st1 - st3;
+     st1 = st1 + st3;
+     st3 = st8 + st7;
+     tmp677 = st1;
+     st1 = st3 * K382683432[0];
+     st8 = st8 - st7;
+     st3 = st3 * K923879532[0];
+     st3 = st3 - st6;
+     st6 = st8 * K923879532[0];
+     st2 = st2 + st6;
+     st8 = st8 * K382683432[0];
+     st8 = st8 - tmp676;
+     st1 = tmp675 + st1;
+     st7 = c_re(input[31]);
+     st7 = st7 + c_re(input[15]);
+     st6 = c_re(input[7]);
+     st6 = st6 + c_re(input[23]);
+     tmp678 = st1;
+     st1 = st7 + st6;
+     st7 = st7 - st6;
+     st6 = c_im(input[31]);
+     st6 = st6 + c_im(input[15]);
+     tmp679 = st3;
+     st3 = c_im(input[7]);
+     st3 = st3 + c_im(input[23]);
+     tmp680 = st2;
+     st2 = st6 + st3;
+     st6 = st6 - st3;
+     st3 = c_re(input[3]);
+     st3 = st3 + c_re(input[19]);
+     tmp681 = st8;
+     st8 = c_re(input[27]);
+     st8 = st8 + c_re(input[11]);
+     tmp682 = st5;
+     st5 = st3 + st8;
+     st3 = st3 - st8;
+     st8 = st1 + st5;
+     st1 = st1 - st5;
+     st5 = st3 + st6;
+     tmp683 = st4;
+     st4 = st5 * K923879532[0];
+     st6 = st6 - st3;
+     st5 = st5 * K382683432[0];
+     st3 = tmp674 + st8;
+     tmp684 = st4;
+     st4 = st6 * K382683432[0];
+     tmp685 = st5;
+     st5 = tmp640 - st3;
+     st6 = st6 * K923879532[0];
+     st3 = tmp640 + st3;
+     st8 = tmp674 - st8;
+     c_re(input[16]) = st5;
+     st5 = st8 + tmp653;
+     st8 = tmp653 - st8;
+     c_re(input[0]) = st3;
+     st3 = c_im(input[3]);
+     st3 = st3 + c_im(input[19]);
+     c_im(input[8]) = st5;
+     st5 = c_im(input[27]);
+     st5 = st5 + c_im(input[11]);
+     c_im(input[24]) = st8;
+     st8 = st3 + st5;
+     st5 = st5 - st3;
+     st3 = st2 + st8;
+     st2 = st2 - st8;
+     st8 = st1 + st2;
+     st2 = st2 - st1;
+     st1 = st7 + st5;
+     tmp686 = st4;
+     st4 = st1 * K382683432[0];
+     st7 = st7 - st5;
+     st1 = st1 * K923879532[0];
+     st5 = tmp683 + st3;
+     tmp687 = st4;
+     st4 = st7 * K923879532[0];
+     tmp688 = st1;
+     st1 = tmp652 - st5;
+     st7 = st7 * K382683432[0];
+     st5 = tmp652 + st5;
+     st3 = st3 - tmp683;
+     c_im(input[16]) = st1;
+     st1 = tmp637 - st3;
+     st3 = tmp637 + st3;
+     c_im(input[0]) = st5;
+     st5 = tmp682 - st8;
+     st5 = st5 * K707106781[0];
+     c_re(input[24]) = st1;
+     st1 = tmp639 - st5;
+     st5 = tmp639 + st5;
+     st8 = tmp682 + st8;
+     st8 = st8 * K707106781[0];
+     c_re(input[8]) = st3;
+     st3 = tmp651 - st8;
+     st8 = tmp651 + st8;
+     c_im(input[28]) = st1;
+     st1 = st2 - tmp677;
+     st1 = st1 * K707106781[0];
+     c_im(input[12]) = st5;
+     st5 = tmp642 - st1;
+     st1 = tmp642 + st1;
+     st2 = tmp677 + st2;
+     st2 = st2 * K707106781[0];
+     c_re(input[20]) = st3;
+     st3 = tmp638 - st2;
+     st2 = tmp638 + st2;
+     st6 = st6 + st7;
+     st7 = tmp681 - st6;
+     st6 = tmp681 + st6;
+     st4 = tmp686 - st4;
+     c_re(input[4]) = st8;
+     st8 = tmp680 + st4;
+     st4 = st4 - tmp680;
+     c_re(input[28]) = st5;
+     st5 = tmp650 - st8;
+     st8 = tmp650 + st8;
+     c_re(input[12]) = st1;
+     st1 = tmp649 - st4;
+     st4 = tmp649 + st4;
+     c_im(input[20]) = st3;
+     st3 = tmp643 - st7;
+     st7 = tmp643 + st7;
+     c_im(input[4]) = st2;
+     st2 = tmp644 - st6;
+     st6 = tmp644 + st6;
+     c_im(input[22]) = st5;
+     st5 = tmp685 + tmp688;
+     c_im(input[6]) = st8;
+     st8 = tmp679 - st5;
+     st5 = tmp679 + st5;
+     c_re(input[30]) = st1;
+     st1 = tmp684 - tmp687;
+     c_re(input[14]) = st4;
+     st4 = tmp678 + st1;
+     st1 = st1 - tmp678;
+     c_im(input[30]) = st3;
+     st3 = tmp648 - st4;
+     st4 = tmp648 + st4;
+     c_im(input[14]) = st7;
+     st7 = tmp647 - st1;
+     st1 = tmp647 + st1;
+     c_re(input[22]) = st2;
+     st2 = tmp645 - st8;
+     st8 = tmp645 + st8;
+     c_re(input[6]) = st6;
+     st6 = tmp646 - st5;
+     st5 = tmp646 + st5;
+     c_im(input[18]) = st3;
+     st3 = c_re(input[31]);
+     st3 = st3 - c_re(input[15]);
+     c_im(input[2]) = st4;
+     st4 = c_im(input[7]);
+     st4 = st4 - c_im(input[23]);
+     c_re(input[26]) = st7;
+     st7 = st3 - st4;
+     st3 = st3 + st4;
+     c_re(input[10]) = st1;
+     st1 = c_re(input[7]);
+     st1 = st1 - c_re(input[23]);
+     c_im(input[26]) = st2;
+     st2 = c_im(input[31]);
+     st2 = st2 - c_im(input[15]);
+     c_im(input[10]) = st8;
+     st8 = st1 + st2;
+     st2 = st2 - st1;
+     c_re(input[18]) = st6;
+     st6 = c_re(input[3]);
+     st6 = st6 - c_re(input[19]);
+     c_re(input[2]) = st5;
+     st5 = c_im(input[3]);
+     st5 = st5 - c_im(input[19]);
+     st4 = st6 - st5;
+     st6 = st6 + st5;
+     st1 = c_re(input[27]);
+     st1 = st1 - c_re(input[11]);
+     st5 = c_im(input[27]);
+     st5 = st5 - c_im(input[11]);
+     tmp689 = st2;
+     st2 = st1 + st5;
+     st5 = st5 - st1;
+     st1 = st4 + st2;
+     st1 = st1 * K707106781[0];
+     st4 = st4 - st2;
+     st4 = st4 * K707106781[0];
+     st2 = st6 + st5;
+     st2 = st2 * K707106781[0];
+     st5 = st5 - st6;
+     st5 = st5 * K707106781[0];
+     st6 = st7 - st1;
+     tmp690 = st4;
+     st4 = st6 * K831469612[0];
+     st7 = st7 + st1;
+     st6 = st6 * K555570233[0];
+     st1 = st3 - st5;
+     tmp691 = st6;
+     st6 = st1 * K195090322[0];
+     st3 = st3 + st5;
+     st1 = st1 * K980785280[0];
+     st5 = st8 - st2;
+     tmp692 = st3;
+     st3 = st5 * K555570233[0];
+     st8 = st8 + st2;
+     st5 = st5 * K831469612[0];
+     st2 = tmp689 - tmp690;
+     tmp693 = st5;
+     st5 = st2 * K980785280[0];
+     tmp694 = st4;
+     st4 = tmp689 + tmp690;
+     st2 = st2 * K195090322[0];
+     st5 = st5 + st6;
+     st6 = st8 * K980785280[0];
+     st2 = st2 - st1;
+     st1 = st7 * K195090322[0];
+     st3 = st3 - tmp694;
+     st7 = st7 * K980785280[0];
+     tmp695 = st3;
+     st3 = tmp691 + tmp693;
+     st8 = st8 * K195090322[0];
+     st6 = st6 - st1;
+     st1 = st4 * K555570233[0];
+     st7 = st7 + st8;
+     st8 = tmp692 * K831469612[0];
+     st1 = st1 + st8;
+     st4 = st4 * K831469612[0];
+     st8 = c_re(input[1]);
+     tmp696 = st1;
+     st1 = tmp692 * K555570233[0];
+     st8 = st8 - c_re(input[17]);
+     st4 = st4 - st1;
+     st1 = c_im(input[9]);
+     st1 = st1 - c_im(input[25]);
+     tmp697 = st4;
+     st4 = st8 - st1;
+     st8 = st8 + st1;
+     st1 = c_re(input[9]);
+     st1 = st1 - c_re(input[25]);
+     tmp698 = st7;
+     st7 = c_im(input[1]);
+     st7 = st7 - c_im(input[17]);
+     tmp699 = st6;
+     st6 = st1 + st7;
+     st7 = st7 - st1;
+     st1 = c_re(input[5]);
+     st1 = st1 - c_re(input[21]);
+     tmp700 = st3;
+     st3 = c_im(input[5]);
+     st3 = st3 - c_im(input[21]);
+     tmp701 = st2;
+     st2 = st1 - st3;
+     st1 = st1 + st3;
+     st3 = c_re(input[29]);
+     st3 = st3 - c_re(input[13]);
+     tmp702 = st5;
+     st5 = c_im(input[29]);
+     st5 = st5 - c_im(input[13]);
+     tmp703 = st7;
+     st7 = st3 + st5;
+     st5 = st5 - st3;
+     st3 = st2 + st7;
+     st3 = st3 * K707106781[0];
+     st2 = st2 - st7;
+     st2 = st2 * K707106781[0];
+     st7 = st1 + st5;
+     st7 = st7 * K707106781[0];
+     st5 = st5 - st1;
+     st5 = st5 * K707106781[0];
+     st1 = st4 - st3;
+     tmp704 = st2;
+     st2 = st1 * K831469612[0];
+     st4 = st4 + st3;
+     st1 = st1 * K555570233[0];
+     st3 = st8 - st5;
+     tmp705 = st1;
+     st1 = st3 * K195090322[0];
+     st8 = st8 + st5;
+     st3 = st3 * K980785280[0];
+     st5 = st6 - st7;
+     tmp706 = st2;
+     st2 = st5 * K555570233[0];
+     st6 = st6 + st7;
+     st5 = st5 * K831469612[0];
+     st7 = tmp703 - tmp704;
+     tmp707 = st5;
+     st5 = st7 * K980785280[0];
+     tmp708 = st2;
+     st2 = tmp703 + tmp704;
+     st7 = st7 * K195090322[0];
+     st1 = st1 - st5;
+     st5 = st4 * K195090322[0];
+     tmp709 = st5;
+     st5 = st1 - tmp702;
+     tmp710 = st5;
+     st5 = st6 * K980785280[0];
+     st1 = st1 + tmp702;
+     st4 = st4 * K980785280[0];
+     st7 = st7 + st3;
+     st6 = st6 * K195090322[0];
+     st3 = st7 + tmp701;
+     tmp711 = st4;
+     st4 = st8 * K831469612[0];
+     st7 = tmp701 - st7;
+     tmp712 = st4;
+     st4 = st2 * K555570233[0];
+     tmp713 = st4;
+     st4 = tmp673 - st3;
+     st2 = st2 * K831469612[0];
+     st8 = st8 * K555570233[0];
+     st3 = tmp673 + st3;
+     c_im(input[23]) = st4;
+     st4 = tmp672 - st7;
+     st7 = tmp672 + st7;
+     c_im(input[7]) = st3;
+     st3 = tmp659 - tmp710;
+     c_re(input[31]) = st4;
+     st4 = tmp659 + tmp710;
+     c_re(input[15]) = st7;
+     st7 = tmp658 - st1;
+     st1 = tmp658 + st1;
+     c_im(input[31]) = st3;
+     st3 = tmp706 + tmp708;
+     c_im(input[15]) = st4;
+     st4 = st3 + tmp695;
+     st3 = tmp695 - st3;
+     c_re(input[23]) = st7;
+     st7 = tmp705 - tmp707;
+     c_re(input[7]) = st1;
+     st1 = st7 - tmp700;
+     st7 = st7 + tmp700;
+     tmp714 = st2;
+     st2 = tmp661 - st1;
+     st1 = tmp661 + st1;
+     c_im(input[29]) = st2;
+     st2 = tmp671 - st7;
+     st7 = tmp671 + st7;
+     c_im(input[13]) = st1;
+     st1 = tmp660 - st4;
+     st4 = tmp660 + st4;
+     c_re(input[21]) = st2;
+     st2 = tmp662 - st3;
+     st3 = tmp662 + st3;
+     st5 = tmp709 + st5;
+     c_re(input[5]) = st7;
+     st7 = st5 + tmp699;
+     st5 = tmp699 - st5;
+     st6 = tmp711 - st6;
+     c_im(input[21]) = st1;
+     st1 = st6 - tmp698;
+     st6 = st6 + tmp698;
+     c_im(input[5]) = st4;
+     st4 = tmp670 - st1;
+     st1 = tmp670 + st1;
+     c_re(input[29]) = st2;
+     st2 = tmp669 - st6;
+     st6 = tmp669 + st6;
+     c_re(input[13]) = st3;
+     st3 = tmp663 - st7;
+     st7 = tmp663 + st7;
+     c_im(input[25]) = st4;
+     st4 = tmp664 - st5;
+     st5 = tmp664 + st5;
+     c_im(input[9]) = st1;
+     st1 = tmp712 - tmp713;
+     c_re(input[17]) = st2;
+     st2 = st1 - tmp696;
+     st1 = st1 + tmp696;
+     st8 = tmp714 + st8;
+     c_re(input[1]) = st6;
+     st6 = st8 + tmp697;
+     st8 = tmp697 - st8;
+     c_im(input[17]) = st3;
+     st3 = tmp668 - st6;
+     st6 = tmp668 + st6;
+     c_im(input[1]) = st7;
+     st7 = tmp667 - st8;
+     st8 = tmp667 + st8;
+     c_re(input[25]) = st4;
+     st4 = tmp665 - st2;
+     st2 = tmp665 + st2;
+     c_re(input[9]) = st5;
+     st5 = tmp666 - st1;
+     st1 = tmp666 + st1;
+     c_im(input[19]) = st3;
+     c_im(input[3]) = st6;
+     c_re(input[27]) = st7;
+     c_re(input[11]) = st8;
+     c_im(input[27]) = st4;
+     c_im(input[11]) = st2;
+     c_re(input[19]) = st5;
+     c_re(input[3]) = st1;
+}
+
+void PFFTWI(64)(fftw_complex *input) 
+{
+     PFFTWI(16)(input );
+     PFFTWI(16)(input + 16);
+     PFFTWI(16)(input + 32);
+     PFFTWI(16)(input + 48);
+     PFFTWI(twiddle_4)(input, PFFTW(W_64), 16);
+}
+
+void PFFTWI(128)(fftw_complex *input) 
+{
+     PFFTWI(32)(input );
+     PFFTWI(32)(input + 32);
+     PFFTWI(32)(input + 64);
+     PFFTWI(32)(input + 96);
+     PFFTWI(twiddle_4)(input, PFFTW(W_128), 32);
+}
+
+void PFFTWI(256)(fftw_complex *input)
+{
+     PFFTWI(64)(input );
+     PFFTWI(64)(input + 64);
+     PFFTWI(64)(input + 128);
+     PFFTWI(64)(input + 192);
+     PFFTWI(twiddle_4)(input, PFFTW(W_256), 64);
+}
+
+void PFFTWI(512)(fftw_complex *input) 
+{
+     PFFTWI(128)(input );
+     PFFTWI(128)(input + 128);
+     PFFTWI(128)(input + 256);
+     PFFTWI(128)(input + 384);
+     PFFTWI(twiddle_4)(input, PFFTW(W_512), 128);
+}
+
+void PFFTWI(2048)(fftw_complex *input)
+{
+     PFFTWI(512)(input );
+     PFFTWI(512)(input + 512);
+     PFFTWI(512)(input + 1024);
+     PFFTWI(512)(input + 1536);
+     PFFTWI(twiddle_4)(input, PFFTW(W_2048), 512);
+}
+
+///////////////////////////////////////////////////////////////////////////
 void  PFFTW(twiddle_4) (fftw_complex * A, const fftw_complex * W, int iostride) {
      int i;
      fftw_complex *inout;
@@ -2778,6 +3919,123 @@
      } while (i > 0);
 }
 
+void PFFTWI(twiddle_4) (fftw_complex * A, const fftw_complex * W, int iostride) {
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     {
+	  fftw_real st1;
+	  fftw_real st2;
+	  fftw_real st3;
+	  fftw_real st4;
+	  fftw_real st5;
+	  fftw_real st6;
+	  fftw_real st7;
+	  fftw_real st8;
+	  st8 = c_re(inout[0]);
+	  st8 = st8 + c_re(inout[2 * iostride]);
+	  st7 = c_re(inout[iostride]);
+	  st7 = st7 + c_re(inout[3 * iostride]);
+	  st6 = st8 - st7;
+	  st8 = st8 + st7;
+	  st5 = c_im(inout[0]);
+	  st5 = st5 + c_im(inout[2 * iostride]);
+	  st4 = c_im(inout[iostride]);
+	  st4 = st4 + c_im(inout[3 * iostride]);
+	  st3 = st5 - st4;
+	  st5 = st5 + st4;
+	  st2 = c_re(inout[iostride]);
+	  st2 = st2 - c_re(inout[3 * iostride]);
+	  st1 = c_im(inout[0]);
+	  st1 = st1 - c_im(inout[2 * iostride]);
+	  st7 = st2 + st1;
+	  st1 = st1 - st2;
+	  st4 = c_re(inout[0]);
+	  st4 = st4 - c_re(inout[2 * iostride]);
+	  c_re(inout[2 * iostride]) = st6;
+	  st6 = c_im(inout[iostride]);
+	  st6 = st6 - c_im(inout[3 * iostride]);
+	  c_re(inout[0]) = st8;
+	  st8 = st4 - st6;
+	  st4 = st4 + st6;
+	  c_im(inout[0]) = st5;
+	  c_im(inout[2 * iostride]) = st3;
+	  c_im(inout[iostride]) = st7;
+	  c_im(inout[3 * iostride]) = st1;
+	  c_re(inout[iostride]) = st8;
+	  c_re(inout[3 * iostride]) = st4;
+     }
+     inout = inout + 1;
+     i = iostride - 1;
+     do {
+	  {
+	       fftw_real st1;
+	       fftw_real st2;
+	       fftw_real st3;
+	       fftw_real st4;
+	       fftw_real st5;
+	       fftw_real st6;
+	       fftw_real st7;
+	       fftw_real st8;
+	       st8 = c_re(inout[2 * iostride]);
+	       st8 = st8 * c_re(W[1]);
+	       st7 = c_im(inout[2 * iostride]);
+	       st7 = st7 * c_im(W[1]);
+	       st8 = st8 - st7;
+	       st6 = st8 + c_re(inout[0]);
+	       st8 = c_re(inout[0]) - st8;
+	       st5 = c_re(inout[2 * iostride]);
+	       st5 = st5 * c_im(W[1]);
+	       st4 = c_im(inout[2 * iostride]);
+	       st4 = st4 * c_re(W[1]);
+	       st5 = st5 + st4;
+	       st3 = st5 + c_im(inout[0]);
+	       st5 = c_im(inout[0]) - st5;
+	       st2 = c_re(inout[iostride]);
+	       st2 = st2 * c_re(W[0]);
+	       st1 = c_im(inout[iostride]);
+	       st1 = st1 * c_im(W[0]);
+	       st2 = st2 - st1;
+	       st7 = c_re(inout[3 * iostride]);
+	       st7 = st7 * c_re(W[0]);
+	       st4 = c_im(inout[3 * iostride]);
+	       st4 = st4 * c_im(W[0]);
+	       st7 = st7 + st4;
+	       st1 = st2 + st7;
+	       st2 = st2 - st7;
+	       st4 = st6 - st1;
+	       st6 = st6 + st1;
+	       st7 = st2 + st5;
+	       st5 = st5 - st2;
+	       st1 = c_re(inout[iostride]);
+	       st1 = st1 * c_im(W[0]);
+	       st2 = c_im(inout[iostride]);
+	       st2 = st2 * c_re(W[0]);
+	       st1 = st1 + st2;
+	       c_re(inout[2 * iostride]) = st4;
+	       st4 = c_im(inout[3 * iostride]);
+	       st4 = st4 * c_re(W[0]);
+	       c_re(inout[0]) = st6;
+	       st6 = c_re(inout[3 * iostride]);
+	       st6 = st6 * c_im(W[0]);
+	       st4 = st4 - st6;
+	       c_im(inout[iostride]) = st7;
+	       st7 = st1 - st4;
+	       st1 = st1 + st4;
+	       c_im(inout[3 * iostride]) = st5;
+	       st5 = st8 - st7;
+	       st8 = st8 + st7;
+	       st2 = st1 + st3;
+	       st3 = st3 - st1;
+	       c_re(inout[iostride]) = st5;
+	       c_re(inout[3 * iostride]) = st8;
+	       c_im(inout[0]) = st2;
+	       c_im(inout[2 * iostride]) = st3;
+	  }
+	  i = i - 1, inout = inout + 1, W = W + 2;
+     } while (i > 0);
+}
+//////////////////////////////////////////////////////////////////////
 int PFFTW(permutation_64)(int i)
 {
     int i1 = i % 4;
@@ -2827,14 +4085,14 @@
     else
        return (i1 * 512 + PFFTW(permutation_512)((i2 + 1) % 512));
 }
-
+/////////////////////////////////////////////////////////////////
 void MakeFFTOrder(void)
 {
 int i;
 for (i=0 ; i < 2048 ; i++){
-     if (i < 64) unscambled64[i]=PFFTW(permutation_64)(i);
-     if (i < 256) unscambled256[i]=PFFTW(permutation_256)(i);
-     if (i < 512) unscambled512[i]=PFFTW(permutation_512)(i);
-     unscambled2048[i]=PFFTW(permutation_2048)(i);
+     if (i < 64) unscambled64[i] = PFFTW(permutation_64)(i);
+     if (i < 256) unscambled256[i] = PFFTW(permutation_256)(i);
+     if (i < 512) unscambled512[i] = PFFTW(permutation_512)(i);
+     unscambled2048[i] = PFFTW(permutation_2048)(i);
      }
 }
--- a/fastfft.h
+++ b/fastfft.h
@@ -1,11 +1,11 @@
-#define fftw_real double
-#define fftw_complex fftw_complex_d
+#include "transfo.h"
 
+#define PFFTW(name)  CONCAT(pfftw_, name)
+#define PFFTWI(name)  CONCAT(pfftwi_, name)
 #define CONCAT_AUX(a, b) a ## b
 #define CONCAT(a, b) CONCAT_AUX(a,b)
-
-#define PFFTW(name)  CONCAT(pfftw_d_, name)
-
 #define FFTW_KONST(x) ((fftw_real) x)
 
 void PFFTW(twiddle_4)(fftw_complex *A, const fftw_complex *W, int iostride);
+void PFFTWI(twiddle_4)(fftw_complex *A, const fftw_complex *W, int iostride);
+
--- a/psych.c
+++ b/psych.c
@@ -52,9 +52,9 @@
 
 Source file:
 
-$Id: psych.c,v 1.17 2000/01/06 10:15:51 menno Exp $
-$Id: psych.c,v 1.17 2000/01/06 10:15:51 menno Exp $
-$Id: psych.c,v 1.17 2000/01/06 10:15:51 menno Exp $
+$Id: psych.c,v 1.18 2000/01/06 10:39:33 lenox Exp $
+$Id: psych.c,v 1.18 2000/01/06 10:39:33 lenox Exp $
+$Id: psych.c,v 1.18 2000/01/06 10:39:33 lenox Exp $
 
 **********************************************************************/
 
@@ -669,7 +669,7 @@
 		FFTarray[i].im = 0.0;
     }
 
-    pfftw_d_2048(FFTarray);
+    pfftw_2048(FFTarray);
 
         for(w = 0; w < BLOCK_LEN_LONG; w++){
  		unscambled = unscambled2048[w];
@@ -705,7 +705,7 @@
 			FFTarray[i].im = 0.0;
 		}
 
-                pfftw_d_256(FFTarray);
+                pfftw_256(FFTarray);
 
 		for(w = 0; w < BLOCK_LEN_SHORT; w++){
          		unscambled = unscambled256[w];
--- a/transfo.c
+++ b/transfo.c
@@ -1,10 +1,7 @@
 #include <math.h>
+#include "all.h"
 #include "transfo.h"
 
-fftw_complex_d FFTarray[2048];    /* the array for in-place FFT */
-extern int unscambled64[64];    /* the permutation array for FFT64*/
-extern int unscambled512[512];  /* the permutation array for FFT512*/
-
 #ifndef M_PI
 #define M_PI        3.14159265358979323846
 #endif
@@ -14,14 +11,13 @@
 #endif
 
 /*****************************
-  Fast MDCT Code
+  Fast MDCT & IMDCT Code
 *****************************/
+void MDCT (fftw_real *data, int N) {
 
-void MDCT (double *data, int N) {
-
-    double tempr, tempi, c, s, cold, cfreq, sfreq; /* temps for pre and post twiddle */
-    double freq = 2.0 * M_PI / N;
-    double fac,cosfreq8,sinfreq8;
+    fftw_real tempr, tempi, c, s, cold, cfreq, sfreq; /* temps for pre and post twiddle */
+    fftw_real freq = 2.0 * M_PI / N;
+    fftw_real fac,cosfreq8,sinfreq8;
     int i, n;
     int isign = 1;
     int b = N >> 1;
@@ -32,7 +28,7 @@
     int a4 = a >> 2;
     int b4 = b >> 2;
     int unscambled;
-    
+
     /* Choosing to allocate 2/N factor to Inverse Xform! */
     fac = 2.; /* 2 from MDCT inverse  to forward */
 
@@ -72,9 +68,9 @@
 
     /* Perform in-place complex FFT of length N/4 */
     switch (N) {
-	case 256: pfftw_d_64(FFTarray);
+	case 256: pfftw_64(FFTarray);
 		break;
-	case 2048:pfftw_d_512(FFTarray);
+	case 2048:pfftw_512(FFTarray);
 	}
 
     /* prepare for recurrence relations in post-twiddle */
@@ -110,360 +106,87 @@
     }
 }
 
+void IMDCT(fftw_real *data, int N)
+{
+	fftw_real tempr, tempi, c, s, cold, cfreq, sfreq; /* temps for pre and post twiddle */
+    	fftw_real freq = 2.0 * M_PI / N;
+	fftw_real fac, cosfreq8, sinfreq8;
+	int i;
+	int Nd2 = N >> 1;
+	int Nd4 = N >> 2;
+	int Nd8 = N >> 3;
+	int unscambled;
 
-/*****************************
-  Fast MDCT Code
-*****************************/
+	/* Choosing to allocate 2/N factor to Inverse Xform! */
+	fac = 2. / N; /* remaining 2/N from 4/N IFFT factor */
 
-#define SWAP(a,b) tempr=a;a=b;b=tempr
-void CompFFT (double *data, int nn, int isign);
-void FFT (double *data, int nn, int isign);
+	/* prepare for recurrence relation in pre-twiddle */
+	cfreq = cos (freq);
+	sfreq = sin (freq);
+	cosfreq8 = cos (freq * 0.125);
+	sinfreq8 = sin (freq * 0.125);
+	c = cosfreq8;
+	s = sinfreq8;
 
-double *NewFloat (int N) {
-/* Allocate array of N Floats */
+	for (i = 0; i < Nd4; i++) {
 
-    double *temp;
+		/* calculate real and imaginary parts of g(n) or G(p) */
+		tempr = -data [2 * i];
+		tempi = data [Nd2 - 1 - 2 * i];
 
-    temp = (double *) malloc (N * sizeof (double));
-    if (!temp) {
-		exit (1);
-		}
-    return temp;
-}
+		/* calculate pre-twiddled FFT input */
+	switch (N) {
+	case 256:
+		unscambled = unscambled64[i];
+		break;
+	case 2048:
+		unscambled = unscambled512[i];
+	}
+		FFTarray [unscambled].re = tempr * c - tempi * s;
+		FFTarray [unscambled].im = tempi * c + tempr * s;
 
-typedef double *pFloat;
-
-double **NewFloatMatrix (int N, int M) {
-/* Allocate NxM matrix of Floats */
-
-    double **temp;
-    int i;
-
-/* allocate N pointers to double arrays */
-    temp = (pFloat *) malloc (N * sizeof (pFloat));
-    if (!temp) {
-		exit (1);
-		}
-
-/* Allocate a double array M long for each of the N array pointers */
-
-    for (i = 0; i < N; i++) {
-		temp [i] = (double *) malloc (M * sizeof (double));
-		if (! temp [i]) {
-			exit (1);
-			}
-		}
-    return temp;
-}
-
-
-void IMDCT (double *data, int N) {
-
-    static double *FFTarray = 0;    /* the array for in-place FFT */
-    static double tempr, tempi, c, s, cold, cfreq, sfreq; /* temps for pre and post twiddle */
-    double freq = 2. * M_PI / N;
-    static double fac;
-    static int i, n;
-	int isign = -1;
-	int b = N/2;
-    int a = N - b;
-
-    /* Choosing to allocate 2/N factor to Inverse Xform! */
-    if (isign == 1) {
-      fac = 2.; /* 2 from MDCT inverse  to forward */
-    } 
-    else {
-      fac = 2. / N; /* remaining 2/N from 4/N IFFT factor */
-    }
-    
-    
-    {
-      static int oldN = 0;
-      if (N > oldN) {
-	if (FFTarray) {
-	  free (FFTarray);
-	  FFTarray = 0;
+		/* use recurrence to prepare cosine and sine for next value of i */
+		cold = c;
+		c = c * cfreq - s * sfreq;
+		s = s * cfreq + cold * sfreq;
 	}
-	oldN = N;
-      }
-      
-      if (! FFTarray) 
-	FFTarray = NewFloat (N / 2);  /* holds N/4 complex values */
-    }
-    
-    /* prepare for recurrence relation in pre-twiddle */
-    cfreq = cos (freq);
-    sfreq = sin (freq);
-    c = cos (freq * 0.125);
-    s = sin (freq * 0.125);
 
-    for (i = 0; i < N / 4; i++) {
-      
-      /* calculate real and imaginary parts of g(n) or G(p) */
-      
-      if (isign == 1) { /* Forward Transform */
-	n = N / 2 - 1 - 2 * i;
-	if (i < b / 4) {
-	  tempr = data [a / 2 + n] + data [N + a / 2 - 1 - n]; /* use second form of e(n) for n = N / 2 - 1 - 2i */
-	} 
-	else {
-	  tempr = data [a / 2 + n] - data [a / 2 - 1 - n]; /* use first form of e(n) for n = N / 2 - 1 - 2i */
+	/* Perform in-place complex IFFT of length N/4 */
+    switch (N) {
+	case 256: pfftwi_64(FFTarray);
+		break;
+	case 2048:pfftwi_512(FFTarray);
 	}
-	n = 2 * i;
-	if (i < a / 4) {
-	  tempi = data [a / 2 + n] - data [a / 2 - 1 - n]; /* use first form of e(n) for n=2i */
-	} 
-	else {
-	  tempi = data [a / 2 + n] + data [N + a / 2 - 1 - n]; /* use second form of e(n) for n=2i*/
-	}
-      } 
-      else { /* Inverse Transform */
-	tempr = -data [2 * i];
-	tempi = data [N / 2 - 1 - 2 * i];
-      }
-      
-      /* calculate pre-twiddled FFT input */
-      FFTarray [2 * i] = tempr * c + isign * tempi * s;
-      FFTarray [2 * i + 1] = tempi * c - isign * tempr * s;
-      
-      /* use recurrence to prepare cosine and sine for next value of i */
-      cold = c;
-      c = c * cfreq - s * sfreq;
-      s = s * cfreq + cold * sfreq;
-    }
-    
-    /* Perform in-place complex FFT (or IFFT) of length N/4 */
-    /* Note: FFT has physics (opposite) sign convention and doesn't do 1/N factor */
-    CompFFT (FFTarray, N / 4, -isign);
-    
-    /* prepare for recurrence relations in post-twiddle */
-    c = cos (freq * 0.125);
-    s = sin (freq * 0.125);
-    
-    /* post-twiddle FFT output and then get output data */
-    for (i = 0; i < N / 4; i++) {
-      
-      /* get post-twiddled FFT output  */
-      /* Note: fac allocates 4/N factor from IFFT to forward and inverse */
-      tempr = fac * (FFTarray [2 * i] * c + isign * FFTarray [2 * i + 1] * s);
-      tempi = fac * (FFTarray [2 * i + 1] * c - isign * FFTarray [2 * i] * s);
-      
-      /* fill in output values */
-      if (isign == 1) { /* Forward Transform */
-	data [2 * i] = -tempr;   /* first half even */
-	data [N / 2 - 1 - 2 * i] = tempi;  /* first half odd */
-	data [N / 2 + 2 * i] = -tempi;  /* second half even */
-	data [N - 1 - 2 * i] = tempr;  /* second half odd */
-      } 
-      else { /* Inverse Transform */
-	data [N / 2 + a / 2 - 1 - 2 * i] = tempr;
-	if (i < b / 4) {
-	  data [N / 2 + a / 2 + 2 * i] = tempr;
-	} 
-	else {
-	  data [2 * i - b / 2] = -tempr;
-	}
-	data [a / 2 + 2 * i] = tempi;
-	if (i < a / 4) {
-	  data [a / 2 - 1 - 2 * i] = -tempi;
-	} 
-	else {
-	  data [a / 2 + N - 1 - 2*i] = tempi;
-	}
-      }
-      
-      /* use recurrence to prepare cosine and sine for next value of i */
-      cold = c;
-      c = c * cfreq - s * sfreq;
-      s = s * cfreq + cold * sfreq;
-    }
-    
-    /*    DeleteFloat (FFTarray);   */
-    
-}
 
 
-void CompFFT (double *data, int nn, int isign) {
+	/* prepare for recurrence relations in post-twiddle */
+	c = cosfreq8;
+	s = sinfreq8;
 
-    static int i, j, k, kk;
-    static int p1, q1;
-    static int m, n, logq1;
-    static double **intermed = 0;
-    static double ar, ai;
-    static double d2pn;
-    static double ca, sa, curcos, cursin, oldcos, oldsin;
-    static double ca1, sa1, curcos1, cursin1, oldcos1, oldsin1;
+	/* post-twiddle FFT output and then get output data */
+	for (i = 0; i < Nd4; i++) {
 
+		/* get post-twiddled FFT output  */
+	tempr = fac * (FFTarray[i].re * c - FFTarray[i].im * s);
+    	tempi = fac * (FFTarray[i].im * c + FFTarray[i].re * s);
 
-/* Factorize n;  n = p1*q1 where q1 is a power of 2.
-    For n = 1152, p1 = 9, q1 = 128.		*/
-
-    n = nn;
-    logq1 = 0;
-
-    for (;;) {
-		m = n >> 1;  /* shift right by one*/
-		if ((m << 1) == n) {
-		    logq1++;
-		    n = m;
-			} 
-		else {
-			break;
-			}
+		/* fill in output values */
+		data [Nd2 + Nd4 - 1 - 2 * i] = tempr;
+		if (i < Nd8) {
+			data [Nd2 + Nd4 + 2 * i] = tempr;
+		} else {
+			data [2 * i - Nd4] = -tempr;
 		}
-
-    p1 = n;
-    q1 = 1;
-    q1 <<= logq1;
-
-    d2pn = 2*M_PI / nn;
-
-{static int oldp1 = 0, oldq1 = 0;
-
-	if ((oldp1 < p1) || (oldq1 < q1)) {
-		if (intermed) {
-			free (intermed);
-			intermed = 0;
-			}
-		if (oldp1 < p1) oldp1 = p1;
-		if (oldq1 < q1) oldq1 = q1;;
+		data [Nd4 + 2 * i] = tempi;
+		if (i < Nd8) {
+			data [Nd4 - 1 - 2 * i] = -tempi;
+		} else {
+			data [Nd4 + N - 1 - 2*i] = tempi;
 		}
-		
-	if (!intermed)
-		intermed = NewFloatMatrix (oldp1, 2 * oldq1);
-}
 
-/* Sort the p1 sequences */
-
-    for	(i = 0; i < p1; i++) {
-		for (j = 0; j < q1; j++){
-		    intermed [i] [2 * j] = data [2 * (p1 * j + i)];
-			intermed [i] [2 * j + 1] = data [2 * (p1 * j + i) + 1];
-			}
-		}
-
-/* compute the power of two fft of the p1 sequences of length q1 */
-
-    for (i = 0; i < p1; i++) {
-/* Forward FFT in place for n complex items */
-		FFT (intermed [i], q1, isign);
-		}
-
-/* combine the FFT results into one seqquence of length N */
-
-    ca1 = cos (d2pn);
-    sa1 = sin (d2pn);
-    curcos1 = 1.;
-    cursin1 = 0.;
-
-    for (k = 0; k < nn; k++) {
-		data [2 * k] = 0.;
-		data [2 * k + 1] = 0.;
-		kk = k % q1;
-
-		ca = curcos1;
-		sa = cursin1;
-		curcos = 1.;
-		cursin = 0.;
-
-		for (j = 0; j < p1; j++) {
-			ar = curcos;
-			ai = isign * cursin;
-			data [2 * k] += intermed [j] [2 * kk] * ar - intermed [j] [2 * kk + 1] * ai;
-			data [2 * k + 1] += intermed [j] [2 * kk] * ai + intermed [j] [2 * kk + 1] * ar;
-
-		    oldcos = curcos;
-		    oldsin = cursin;
-			curcos = oldcos * ca - oldsin * sa;
-			cursin = oldcos * sa + oldsin * ca;
-			}
-		oldcos1 = curcos1;
-		oldsin1 = cursin1;
-		curcos1 = oldcos1 * ca1 - oldsin1 * sa1;
-		cursin1 = oldcos1 * sa1 + oldsin1 * ca1;
-		}
-
+		/* use recurrence to prepare cosine and sine for next value of i */
+		cold = c;
+		c = c * cfreq - s * sfreq;
+		s = s * cfreq + cold * sfreq;
+	}
 }
-
-
-void FFT (double *data, int nn, int isign)  {
-/* Varient of Numerical Recipes code from off the internet.  It takes nn
-interleaved complex input data samples in the array data and returns nn interleaved
-complex data samples in place where the output is the FFT of input if isign==1 and it
-is nn times the IFFT of the input if isign==-1. 
-(Note: it doesn't renormalize by 1/N when doing the inverse transform!!!)
-(Note: this follows physicists convention of +i, not EE of -j in forward
-transform!!!!)
- */
-
-/* Press, Flannery, Teukolsky, Vettering "Numerical
- * Recipes in C" tuned up ; Code works only when nn is
- * a power of 2  */
-
-    static int n, mmax, m, j, i;
-    static double wtemp, wr, wpr, wpi, wi, theta, wpin;
-    static double tempr, tempi, datar, datai;
-    static double data1r, data1i;
-
-    n = nn * 2;
-
-/* bit reversal */
-
-    j = 0;
-    for (i = 0; i < n; i += 2) {
-		if (j > i) {  /* could use j>i+1 to help compiler analysis */
-			SWAP (data [j], data [i]);
-			SWAP (data [j + 1], data [i + 1]);
-			}
-		m = nn;
-		while (m >= 2 && j >= m) {
-			j -= m;
-			m >>= 1;
-			}
-		j += m;
-		}
-
-    theta = 3.141592653589795 * .5;
-    if (isign < 0)
-		theta = -theta;
-    wpin = 0;   /* sin(+-PI) */
-    for (mmax = 2; n > mmax; mmax *= 2) {
-		wpi = wpin;
-		wpin = sin (theta);
-		wpr = 1 - wpin * wpin - wpin * wpin; /* cos(theta*2) */
-		theta *= .5;
-		wr = 1;
-		wi = 0;
-		for (m = 0; m < mmax; m += 2) {
-			j = m + mmax;
-			tempr = (double) wr * (data1r = data [j]);
-			tempi = (double) wi * (data1i = data [j + 1]);
-			for (i = m; i < n - mmax * 2; i += mmax * 2) {
-/* mixed precision not significantly more
- * accurate here; if removing double casts,
- * tempr and tempi should be double */
-				tempr -= tempi;
-				tempi = (double) wr *data1i + (double) wi *data1r;
-/* don't expect compiler to analyze j > i + 1 */
-				data1r = data [j + mmax * 2];
-				data1i = data [j + mmax * 2 + 1];
-				data [i] = (datar = data [i]) + tempr;
-				data [i + 1] = (datai = data [i + 1]) + tempi;
-				data [j] = datar - tempr;
-				data [j + 1] = datai - tempi;
-				tempr = (double) wr *data1r;
-				tempi = (double) wi *data1i;
-				j += mmax * 2;
-				}
-			tempr -= tempi;
-			tempi = (double) wr * data1i + (double) wi *data1r;
-			data [i] = (datar = data [i]) + tempr;
-			data [i + 1] = (datai = data [i + 1]) + tempi;
-			data [j] = datar - tempr;
-			data [j + 1] = datai - tempi;
-			wr = (wtemp = wr) * wpr - wi * wpi;
-			wi = wtemp * wpi + wi * wpr;
-			}
-		}
-}
-
--- a/transfo.h
+++ b/transfo.h
@@ -1,20 +1,23 @@
 #ifndef TRANSFORM_H
-#define TRANSFORM_H 
+#define TRANSFORM_H
 
-void MDCT(double* data, int N);
-void IMDCT(double* data, int N);
-void FFT(double *data, int nn, int isign);
+// Use this for decoder - single precision
+//typedef float fftw_real;
 
-#define c_re(c)  ((c).re)
-#define c_im(c)  ((c).im)
+// Use this for encoder - double precision
+typedef double fftw_real;
 
 typedef struct {
-     double re, im;
-} fftw_complex_d;
+     fftw_real re, im;
+} fftw_complex;
 
+#define c_re(c)  ((c).re)
+#define c_im(c)  ((c).im)
+
 #define DEFINE_PFFTW(size)			\
- void pfftw_d_##size(fftw_complex_d *input);	\
- int pfftw_d_permutation_##size(int i);		
+ void pfftwi_##size(fftw_complex *input);	\
+ void pfftw_##size(fftw_complex *input);	\
+ int  pfftw_permutation_##size(int i);
 
 DEFINE_PFFTW(16)
 DEFINE_PFFTW(32)
@@ -25,11 +28,13 @@
 DEFINE_PFFTW(2048)
 
 void MakeFFTOrder(void);
-void MakeFFT2Order(void);
+void IMDCT(fftw_real *data, int N);
+void MDCT(fftw_real *data, int N);
+
 extern int unscambled64[64];    /* the permutation array for FFT64*/
 extern int unscambled256[256];    /* the permutation array for FFT256*/
 extern int unscambled512[512];  /* the permutation array for FFT512*/
 extern int unscambled2048[2048];    /* the permutation array for FFT2048*/
-extern fftw_complex_d FFTarray[2048];    /* the array for in-place FFT */
+extern fftw_complex FFTarray[2048];    /* the array for in-place FFT */
 
-#endif
+#endif	  /*	TRANSFORM_H		*/