ref: 6211c90def21b649e7c30e75ea1ea07e2a4c8483
parent: d7dfb00886d88e82614418c21a582101a0517622
	author: Jean-Marc Valin <Jean-Marc.Valin@csiro.au>
	date: Fri Feb  8 09:21:20 EST 2008
	
Split the radix functions into forward and backward versions, removed the "inverse" flag from the state so it can be shared between the forward and inverse transforms.
--- a/libcelt/_kiss_fft_guts.h
+++ b/libcelt/_kiss_fft_guts.h
@@ -29,7 +29,6 @@
 struct kiss_fft_state{int nfft;
- int inverse;
int factors[2*MAXFACTORS];
int *bitrev;
kiss_fft_cpx twiddles[1];
--- a/libcelt/kiss_fft.c
+++ b/libcelt/kiss_fft.c
@@ -1,6 +1,8 @@
/*
Copyright (c) 2003-2004, Mark Borgerding
+Lots of modifications by JMV
Copyright (c) 2005-2007, Jean-Marc Valin
+Copyright (c) 2008, Jean-Marc Valin, CSIRO
All rights reserved.
@@ -27,372 +29,457 @@
*/
static void kf_bfly2(
- kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_cfg st,
- int m,
- int N,
- int mm
- )
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m,
+ int N,
+ int mm
+ )
 {- kiss_fft_cpx * Fout2;
- kiss_fft_cpx * tw1;
- kiss_fft_cpx t;
-    if (!st->inverse) {- int i,j;
- kiss_fft_cpx * Fout_beg = Fout;
- for (i=0;i<N;i++)
-       {- Fout = Fout_beg + i*mm;
- Fout2 = Fout + m;
- tw1 = st->twiddles;
- for(j=0;j<m;j++)
-          {+ kiss_fft_cpx * Fout2;
+ kiss_fft_cpx * tw1;
+ kiss_fft_cpx t;
+ int i,j;
+ kiss_fft_cpx * Fout_beg = Fout;
+ for (i=0;i<N;i++)
+   {+ Fout = Fout_beg + i*mm;
+ Fout2 = Fout + m;
+ tw1 = st->twiddles;
+ for(j=0;j<m;j++)
+      {/* Almost the same as the code path below, except that we divide the input by two
- (while keeping the best accuracy possible) */
- celt_word32_t tr, ti;
- tr = SHR32(SUB32(MULT16_16(Fout2->r , tw1->r),MULT16_16(Fout2->i , tw1->i)), 1);
- ti = SHR32(ADD32(MULT16_16(Fout2->i , tw1->r),MULT16_16(Fout2->r , tw1->i)), 1);
- tw1 += fstride;
- Fout2->r = PSHR32(SUB32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
- Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
- Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
- Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
- ++Fout2;
- ++Fout;
- }
- }
-    } else {- int i,j;
- kiss_fft_cpx * Fout_beg = Fout;
- for (i=0;i<N;i++)
-       {- Fout = Fout_beg + i*mm;
- Fout2 = Fout + m;
- tw1 = st->twiddles;
- for(j=0;j<m;j++)
-          {- C_MULC (t, *Fout2 , *tw1);
- tw1 += fstride;
- C_SUB( *Fout2 , *Fout , t );
- C_ADDTO( *Fout , t );
- ++Fout2;
- ++Fout;
- }
- }
- }
+ (while keeping the best accuracy possible) */
+ celt_word32_t tr, ti;
+ tr = SHR32(SUB32(MULT16_16(Fout2->r , tw1->r),MULT16_16(Fout2->i , tw1->i)), 1);
+ ti = SHR32(ADD32(MULT16_16(Fout2->i , tw1->r),MULT16_16(Fout2->r , tw1->i)), 1);
+ tw1 += fstride;
+ Fout2->r = PSHR32(SUB32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
+ Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
+ Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
+ Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
+ ++Fout2;
+ ++Fout;
+ }
+ }
}
+static void ki_bfly2(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m,
+ int N,
+ int mm
+ )
+{+ kiss_fft_cpx * Fout2;
+ kiss_fft_cpx * tw1;
+ kiss_fft_cpx t;
+ int i,j;
+ kiss_fft_cpx * Fout_beg = Fout;
+ for (i=0;i<N;i++)
+   {+ Fout = Fout_beg + i*mm;
+ Fout2 = Fout + m;
+ tw1 = st->twiddles;
+ for(j=0;j<m;j++)
+      {+ C_MULC (t, *Fout2 , *tw1);
+ tw1 += fstride;
+ C_SUB( *Fout2 , *Fout , t );
+ C_ADDTO( *Fout , t );
+ ++Fout2;
+ ++Fout;
+ }
+ }
+}
+
static void kf_bfly4(
- kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_cfg st,
- int m,
- int N,
- int mm
- )
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m,
+ int N,
+ int mm
+ )
 {- kiss_fft_cpx *tw1,*tw2,*tw3;
- kiss_fft_cpx scratch[6];
- const size_t m2=2*m;
- const size_t m3=3*m;
- int i, j;
+ kiss_fft_cpx *tw1,*tw2,*tw3;
+ kiss_fft_cpx scratch[6];
+ const size_t m2=2*m;
+ const size_t m3=3*m;
+ int i, j;
- if (st->inverse)
-    {- kiss_fft_cpx * Fout_beg = Fout;
- for (i=0;i<N;i++)
-       {- Fout = Fout_beg + i*mm;
- tw3 = tw2 = tw1 = st->twiddles;
- for (j=0;j<m;j++)
-          {- C_MULC(scratch[0],Fout[m] , *tw1 );
- C_MULC(scratch[1],Fout[m2] , *tw2 );
- C_MULC(scratch[2],Fout[m3] , *tw3 );
+ kiss_fft_cpx * Fout_beg = Fout;
+ for (i=0;i<N;i++)
+   {+ Fout = Fout_beg + i*mm;
+ tw3 = tw2 = tw1 = st->twiddles;
+ for (j=0;j<m;j++)
+      {+ C_MUL4(scratch[0],Fout[m] , *tw1 );
+ C_MUL4(scratch[1],Fout[m2] , *tw2 );
+ C_MUL4(scratch[2],Fout[m3] , *tw3 );
- C_SUB( scratch[5] , *Fout, scratch[1] );
- C_ADDTO(*Fout, scratch[1]);
- C_ADD( scratch[3] , scratch[0] , scratch[2] );
- C_SUB( scratch[4] , scratch[0] , scratch[2] );
- C_SUB( Fout[m2], *Fout, scratch[3] );
- tw1 += fstride;
- tw2 += fstride*2;
- tw3 += fstride*3;
- C_ADDTO( *Fout , scratch[3] );
+ Fout->r = PSHR16(Fout->r, 2);
+ Fout->i = PSHR16(Fout->i, 2);
+ C_SUB( scratch[5] , *Fout, scratch[1] );
+ C_ADDTO(*Fout, scratch[1]);
+ C_ADD( scratch[3] , scratch[0] , scratch[2] );
+ C_SUB( scratch[4] , scratch[0] , scratch[2] );
+ Fout[m2].r = PSHR16(Fout[m2].r, 2);
+ Fout[m2].i = PSHR16(Fout[m2].i, 2);
+ C_SUB( Fout[m2], *Fout, scratch[3] );
+ tw1 += fstride;
+ tw2 += fstride*2;
+ tw3 += fstride*3;
+ C_ADDTO( *Fout , scratch[3] );
- Fout[m].r = scratch[5].r - scratch[4].i;
- Fout[m].i = scratch[5].i + scratch[4].r;
- Fout[m3].r = scratch[5].r + scratch[4].i;
- Fout[m3].i = scratch[5].i - scratch[4].r;
- ++Fout;
- }
- }
- } else
-    {- kiss_fft_cpx * Fout_beg = Fout;
- for (i=0;i<N;i++)
-       {- Fout = Fout_beg + i*mm;
- tw3 = tw2 = tw1 = st->twiddles;
- for (j=0;j<m;j++)
-          {- C_MUL4(scratch[0],Fout[m] , *tw1 );
- C_MUL4(scratch[1],Fout[m2] , *tw2 );
- C_MUL4(scratch[2],Fout[m3] , *tw3 );
+ Fout[m].r = scratch[5].r + scratch[4].i;
+ Fout[m].i = scratch[5].i - scratch[4].r;
+ Fout[m3].r = scratch[5].r - scratch[4].i;
+ Fout[m3].i = scratch[5].i + scratch[4].r;
+ ++Fout;
+ }
+ }
+}
+
+static void ki_bfly4(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m,
+ int N,
+ int mm
+ )
+{+ kiss_fft_cpx *tw1,*tw2,*tw3;
+ kiss_fft_cpx scratch[6];
+ const size_t m2=2*m;
+ const size_t m3=3*m;
+ int i, j;
+
+ kiss_fft_cpx * Fout_beg = Fout;
+ for (i=0;i<N;i++)
+   {+ Fout = Fout_beg + i*mm;
+ tw3 = tw2 = tw1 = st->twiddles;
+ for (j=0;j<m;j++)
+      {+ C_MULC(scratch[0],Fout[m] , *tw1 );
+ C_MULC(scratch[1],Fout[m2] , *tw2 );
+ C_MULC(scratch[2],Fout[m3] , *tw3 );
- Fout->r = PSHR16(Fout->r, 2);
- Fout->i = PSHR16(Fout->i, 2);
- C_SUB( scratch[5] , *Fout, scratch[1] );
- C_ADDTO(*Fout, scratch[1]);
- C_ADD( scratch[3] , scratch[0] , scratch[2] );
- C_SUB( scratch[4] , scratch[0] , scratch[2] );
- Fout[m2].r = PSHR16(Fout[m2].r, 2);
- Fout[m2].i = PSHR16(Fout[m2].i, 2);
- C_SUB( Fout[m2], *Fout, scratch[3] );
- tw1 += fstride;
- tw2 += fstride*2;
- tw3 += fstride*3;
- C_ADDTO( *Fout , scratch[3] );
+ C_SUB( scratch[5] , *Fout, scratch[1] );
+ C_ADDTO(*Fout, scratch[1]);
+ C_ADD( scratch[3] , scratch[0] , scratch[2] );
+ C_SUB( scratch[4] , scratch[0] , scratch[2] );
+ C_SUB( Fout[m2], *Fout, scratch[3] );
+ tw1 += fstride;
+ tw2 += fstride*2;
+ tw3 += fstride*3;
+ C_ADDTO( *Fout , scratch[3] );
- Fout[m].r = scratch[5].r + scratch[4].i;
- Fout[m].i = scratch[5].i - scratch[4].r;
- Fout[m3].r = scratch[5].r - scratch[4].i;
- Fout[m3].i = scratch[5].i + scratch[4].r;
- ++Fout;
- }
- }
- }
+ Fout[m].r = scratch[5].r - scratch[4].i;
+ Fout[m].i = scratch[5].i + scratch[4].r;
+ Fout[m3].r = scratch[5].r + scratch[4].i;
+ Fout[m3].i = scratch[5].i - scratch[4].r;
+ ++Fout;
+ }
+ }
}
+
static void kf_bfly3(
- kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_cfg st,
- size_t m
- )
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ size_t m
+ )
 {- size_t k=m;
- const size_t m2 = 2*m;
- kiss_fft_cpx *tw1,*tw2;
- kiss_fft_cpx scratch[5];
- kiss_fft_cpx epi3;
- epi3 = st->twiddles[fstride*m];
+ size_t k=m;
+ const size_t m2 = 2*m;
+ kiss_fft_cpx *tw1,*tw2;
+ kiss_fft_cpx scratch[5];
+ kiss_fft_cpx epi3;
+ epi3 = st->twiddles[fstride*m];
- tw1=tw2=st->twiddles;
-     if (st->inverse) {-        do{-           if (!st->inverse) {- C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
- }
+ tw1=tw2=st->twiddles;
+   do{+ C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
- C_MULC(scratch[1],Fout[m] , *tw1);
- C_MULC(scratch[2],Fout[m2] , *tw2);
+ C_MUL(scratch[1],Fout[m] , *tw1);
+ C_MUL(scratch[2],Fout[m2] , *tw2);
- C_ADD(scratch[3],scratch[1],scratch[2]);
- C_SUB(scratch[0],scratch[1],scratch[2]);
- tw1 += fstride;
- tw2 += fstride*2;
+ C_ADD(scratch[3],scratch[1],scratch[2]);
+ C_SUB(scratch[0],scratch[1],scratch[2]);
+ tw1 += fstride;
+ tw2 += fstride*2;
- Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
- Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
+ Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
+ Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
- C_MULBYSCALAR( scratch[0] , -epi3.i );
+ C_MULBYSCALAR( scratch[0] , epi3.i );
- C_ADDTO(*Fout,scratch[3]);
+ C_ADDTO(*Fout,scratch[3]);
- Fout[m2].r = Fout[m].r + scratch[0].i;
- Fout[m2].i = Fout[m].i - scratch[0].r;
+ Fout[m2].r = Fout[m].r + scratch[0].i;
+ Fout[m2].i = Fout[m].i - scratch[0].r;
- Fout[m].r -= scratch[0].i;
- Fout[m].i += scratch[0].r;
+ Fout[m].r -= scratch[0].i;
+ Fout[m].i += scratch[0].r;
- ++Fout;
- }while(--k);
-     } else {-        do{-           if (!st->inverse) {- C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
- }
+ ++Fout;
+ }while(--k);
+}
- C_MUL(scratch[1],Fout[m] , *tw1);
- C_MUL(scratch[2],Fout[m2] , *tw2);
+static void ki_bfly3(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ size_t m
+ )
+{+ size_t k=m;
+ const size_t m2 = 2*m;
+ kiss_fft_cpx *tw1,*tw2;
+ kiss_fft_cpx scratch[5];
+ kiss_fft_cpx epi3;
+ epi3 = st->twiddles[fstride*m];
- C_ADD(scratch[3],scratch[1],scratch[2]);
- C_SUB(scratch[0],scratch[1],scratch[2]);
- tw1 += fstride;
- tw2 += fstride*2;
+ tw1=tw2=st->twiddles;
+   do{- Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
- Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
+ C_MULC(scratch[1],Fout[m] , *tw1);
+ C_MULC(scratch[2],Fout[m2] , *tw2);
- C_MULBYSCALAR( scratch[0] , epi3.i );
+ C_ADD(scratch[3],scratch[1],scratch[2]);
+ C_SUB(scratch[0],scratch[1],scratch[2]);
+ tw1 += fstride;
+ tw2 += fstride*2;
- C_ADDTO(*Fout,scratch[3]);
+ Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
+ Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
- Fout[m2].r = Fout[m].r + scratch[0].i;
- Fout[m2].i = Fout[m].i - scratch[0].r;
+ C_MULBYSCALAR( scratch[0] , -epi3.i );
- Fout[m].r -= scratch[0].i;
- Fout[m].i += scratch[0].r;
+ C_ADDTO(*Fout,scratch[3]);
- ++Fout;
- }while(--k);
- }
+ Fout[m2].r = Fout[m].r + scratch[0].i;
+ Fout[m2].i = Fout[m].i - scratch[0].r;
+
+ Fout[m].r -= scratch[0].i;
+ Fout[m].i += scratch[0].r;
+
+ ++Fout;
+ }while(--k);
}
+
static void kf_bfly5(
- kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_cfg st,
- int m
- )
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
 {- kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
- int u;
- kiss_fft_cpx scratch[13];
- kiss_fft_cpx * twiddles = st->twiddles;
- kiss_fft_cpx *tw;
- kiss_fft_cpx ya,yb;
- ya = twiddles[fstride*m];
- yb = twiddles[fstride*2*m];
+ kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+ int u;
+ kiss_fft_cpx scratch[13];
+ kiss_fft_cpx * twiddles = st->twiddles;
+ kiss_fft_cpx *tw;
+ kiss_fft_cpx ya,yb;
+ ya = twiddles[fstride*m];
+ yb = twiddles[fstride*2*m];
- Fout0=Fout;
- Fout1=Fout0+m;
- Fout2=Fout0+2*m;
- Fout3=Fout0+3*m;
- Fout4=Fout0+4*m;
+ Fout0=Fout;
+ Fout1=Fout0+m;
+ Fout2=Fout0+2*m;
+ Fout3=Fout0+3*m;
+ Fout4=Fout0+4*m;
- tw=st->twiddles;
-    if (st->inverse) {+ tw=st->twiddles;
+   for ( u=0; u<m; ++u ) {+ C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
+ scratch[0] = *Fout0;
-       for ( u=0; u<m; ++u ) {-          if (!st->inverse) {- C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
- }
- scratch[0] = *Fout0;
+ C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
+ C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
+ C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
+ C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
- C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
- C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
- C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
- C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
+ C_ADD( scratch[7],scratch[1],scratch[4]);
+ C_SUB( scratch[10],scratch[1],scratch[4]);
+ C_ADD( scratch[8],scratch[2],scratch[3]);
+ C_SUB( scratch[9],scratch[2],scratch[3]);
- C_ADD( scratch[7],scratch[1],scratch[4]);
- C_SUB( scratch[10],scratch[1],scratch[4]);
- C_ADD( scratch[8],scratch[2],scratch[3]);
- C_SUB( scratch[9],scratch[2],scratch[3]);
+ Fout0->r += scratch[7].r + scratch[8].r;
+ Fout0->i += scratch[7].i + scratch[8].i;
- Fout0->r += scratch[7].r + scratch[8].r;
- Fout0->i += scratch[7].i + scratch[8].i;
+ scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
+ scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
- scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
- scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
+ scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
+ scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
- scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
- scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
+ C_SUB(*Fout1,scratch[5],scratch[6]);
+ C_ADD(*Fout4,scratch[5],scratch[6]);
- C_SUB(*Fout1,scratch[5],scratch[6]);
- C_ADD(*Fout4,scratch[5],scratch[6]);
+ scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
+ scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
+ scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
+ scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
- scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
- scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
- scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
- scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
+ C_ADD(*Fout2,scratch[11],scratch[12]);
+ C_SUB(*Fout3,scratch[11],scratch[12]);
- C_ADD(*Fout2,scratch[11],scratch[12]);
- C_SUB(*Fout3,scratch[11],scratch[12]);
+ ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+ }
+}
- ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
- }
-    } else {-       for ( u=0; u<m; ++u ) {-          if (!st->inverse) {- C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
- }
- scratch[0] = *Fout0;
+static void ki_bfly5(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{+ kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+ int u;
+ kiss_fft_cpx scratch[13];
+ kiss_fft_cpx * twiddles = st->twiddles;
+ kiss_fft_cpx *tw;
+ kiss_fft_cpx ya,yb;
+ ya = twiddles[fstride*m];
+ yb = twiddles[fstride*2*m];
- C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
- C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
- C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
- C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
+ Fout0=Fout;
+ Fout1=Fout0+m;
+ Fout2=Fout0+2*m;
+ Fout3=Fout0+3*m;
+ Fout4=Fout0+4*m;
- C_ADD( scratch[7],scratch[1],scratch[4]);
- C_SUB( scratch[10],scratch[1],scratch[4]);
- C_ADD( scratch[8],scratch[2],scratch[3]);
- C_SUB( scratch[9],scratch[2],scratch[3]);
+ tw=st->twiddles;
+   for ( u=0; u<m; ++u ) {+ scratch[0] = *Fout0;
- Fout0->r += scratch[7].r + scratch[8].r;
- Fout0->i += scratch[7].i + scratch[8].i;
+ C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
+ C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
+ C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
+ C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
- scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
- scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
+ C_ADD( scratch[7],scratch[1],scratch[4]);
+ C_SUB( scratch[10],scratch[1],scratch[4]);
+ C_ADD( scratch[8],scratch[2],scratch[3]);
+ C_SUB( scratch[9],scratch[2],scratch[3]);
- scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
- scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
+ Fout0->r += scratch[7].r + scratch[8].r;
+ Fout0->i += scratch[7].i + scratch[8].i;
- C_SUB(*Fout1,scratch[5],scratch[6]);
- C_ADD(*Fout4,scratch[5],scratch[6]);
+ scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
+ scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
- scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
- scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
- scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
- scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
+ scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
+ scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
- C_ADD(*Fout2,scratch[11],scratch[12]);
- C_SUB(*Fout3,scratch[11],scratch[12]);
+ C_SUB(*Fout1,scratch[5],scratch[6]);
+ C_ADD(*Fout4,scratch[5],scratch[6]);
- ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
- }
- }
+ scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
+ scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
+ scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
+ scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
+
+ C_ADD(*Fout2,scratch[11],scratch[12]);
+ C_SUB(*Fout3,scratch[11],scratch[12]);
+
+ ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+ }
}
/* perform the butterfly for one stage of a mixed radix FFT */
static void kf_bfly_generic(
- kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_cfg st,
- int m,
- int p
- )
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m,
+ int p
+ )
 {- int u,k,q1,q;
- kiss_fft_cpx * twiddles = st->twiddles;
- kiss_fft_cpx t;
- kiss_fft_cpx scratchbuf[17];
- int Norig = st->nfft;
+ int u,k,q1,q;
+ kiss_fft_cpx * twiddles = st->twiddles;
+ kiss_fft_cpx t;
+ kiss_fft_cpx scratchbuf[17];
+ int Norig = st->nfft;
- /*CHECKBUF(scratchbuf,nscratchbuf,p);*/
- if (p>17)
-       celt_fatal("KissFFT: max radix supported is 17");+ /*CHECKBUF(scratchbuf,nscratchbuf,p);*/
+ if (p>17)
+      celt_fatal("KissFFT: max radix supported is 17");-    for ( u=0; u<m; ++u ) {- k=u;
-        for ( q1=0 ; q1<p ; ++q1 ) {- scratchbuf[q1] = Fout[ k ];
-        if (!st->inverse) {- C_FIXDIV(scratchbuf[q1],p);
- }
- k += m;
- }
+   for ( u=0; u<m; ++u ) {+ k=u;
+      for ( q1=0 ; q1<p ; ++q1 ) {+ scratchbuf[q1] = Fout[ k ];
+ C_FIXDIV(scratchbuf[q1],p);
+ k += m;
+ }
- k=u;
-        for ( q1=0 ; q1<p ; ++q1 ) {- int twidx=0;
- Fout[ k ] = scratchbuf[0];
-            for (q=1;q<p;++q ) {- twidx += fstride * k;
- if (twidx>=Norig) twidx-=Norig;
- if (st->inverse)
- C_MULC(t,scratchbuf[q] , twiddles[twidx] );
- else
- C_MUL(t,scratchbuf[q] , twiddles[twidx] );
- C_ADDTO( Fout[ k ] ,t);
- }
- k += m;
- }
- }
+ k=u;
+      for ( q1=0 ; q1<p ; ++q1 ) {+ int twidx=0;
+ Fout[ k ] = scratchbuf[0];
+         for (q=1;q<p;++q ) {+ twidx += fstride * k;
+ if (twidx>=Norig) twidx-=Norig;
+ C_MUL(t,scratchbuf[q] , twiddles[twidx] );
+ C_ADDTO( Fout[ k ] ,t);
+ }
+ k += m;
+ }
+ }
}
+static void ki_bfly_generic(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m,
+ int p
+ )
+{+ int u,k,q1,q;
+ kiss_fft_cpx * twiddles = st->twiddles;
+ kiss_fft_cpx t;
+ kiss_fft_cpx scratchbuf[17];
+ int Norig = st->nfft;
+
+ /*CHECKBUF(scratchbuf,nscratchbuf,p);*/
+ if (p>17)
+      celt_fatal("KissFFT: max radix supported is 17");+
+   for ( u=0; u<m; ++u ) {+ k=u;
+      for ( q1=0 ; q1<p ; ++q1 ) {+ scratchbuf[q1] = Fout[ k ];
+ k += m;
+ }
+
+ k=u;
+      for ( q1=0 ; q1<p ; ++q1 ) {+ int twidx=0;
+ Fout[ k ] = scratchbuf[0];
+         for (q=1;q<p;++q ) {+ twidx += fstride * k;
+ if (twidx>=Norig) twidx-=Norig;
+ C_MULC(t,scratchbuf[q] , twiddles[twidx] );
+ C_ADDTO( Fout[ k ] ,t);
+ }
+ k += m;
+ }
+ }
+}
+
static
void compute_bitrev_table(
int * Fout,
@@ -456,6 +543,36 @@
}
}
+static
+ void ki_work(
+ kiss_fft_cpx * Fout,
+ const kiss_fft_cpx * f,
+ const size_t fstride,
+ int in_stride,
+ int * factors,
+ const kiss_fft_cfg st,
+ int N,
+ int s2,
+ int m2
+ )
+{+ int i;
+ kiss_fft_cpx * Fout_beg=Fout;
+ const int p=*factors++; /* the radix */
+ const int m=*factors++; /* stage's fft length/p */
+   /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/+ if (m!=1)
+ ki_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m);
+
+   switch (p) {+ case 2: ki_bfly2(Fout,fstride,st,m, N, m2); break;
+      case 3: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; ki_bfly3(Fout,fstride,st,m);} break; + case 4: ki_bfly4(Fout,fstride,st,m, N, m2); break;
+      case 5: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; ki_bfly5(Fout,fstride,st,m);} break; +      default: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; ki_bfly_generic(Fout,fstride,st,m,p);} break;+ }
+}
+
/* facbuf is populated by p1,m1,p2,m2, ...
where
p[i] * m[i] = m[i-1]
@@ -488,7 +605,7 @@
* The return value is a contiguous block of memory, allocated with malloc. As such,
* It can be freed with free(), rather than a kiss_fft-specific function.
* */
-kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
+kiss_fft_cfg kiss_fft_alloc(int nfft,void * mem,size_t * lenmem )
 {kiss_fft_cfg st=NULL;
size_t memneeded = sizeof(struct kiss_fft_state)
@@ -504,10 +621,9 @@
     if (st) {int i;
st->nfft=nfft;
- st->inverse = inverse_fft;
#ifdef FIXED_POINT
         for (i=0;i<nfft;++i) {- celt_word32_t phase = i;
+ celt_word32_t phase = -i;
kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft));
}
#else
@@ -546,5 +662,24 @@
void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
 {kiss_fft_stride(cfg,fin,fout,1);
+}
+
+void kiss_ifft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
+{+ if (fin == fout)
+   {+      celt_fatal("In-place FFT not supported");+   } else {+ /* Bit-reverse the input */
+ int i;
+ for (i=0;i<st->nfft;i++)
+ fout[i] = fin[st->bitrev[i]];
+ ki_work( fout, fin, 1,in_stride, st->factors,st, 1, in_stride, 1);
+ }
+}
+
+void kiss_ifft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{+ kiss_ifft_stride(cfg,fin,fout,1);
}
--- a/libcelt/kiss_fft.h
+++ b/libcelt/kiss_fft.h
@@ -71,7 +71,7 @@
* buffer size in *lenmem.
* */
-kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem);
+kiss_fft_cfg kiss_fft_alloc(int nfft,void * mem,size_t * lenmem);
/*
* kiss_fft(cfg,in_out_buf)
@@ -84,11 +84,13 @@
f[k].r and f[k].i
* */
void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+void kiss_ifft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
/*
A more generic version of the above function. It reads its input from every Nth sample.
* */
void kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride);
+void kiss_ifft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride);
/* If kiss_fft_alloc allocated a buffer, it is one contiguous
buffer and can be simply free()d when no longer needed*/
--- a/libcelt/kiss_fftr.c
+++ b/libcelt/kiss_fftr.c
@@ -33,7 +33,7 @@
#endif
};
-kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)
+kiss_fftr_cfg kiss_fftr_alloc(int nfft,void * mem,size_t * lenmem)
 {int i;
kiss_fftr_cfg st = NULL;
@@ -45,7 +45,7 @@
}
nfft >>= 1;
- kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize);
+ kiss_fft_alloc (nfft, NULL, &subsize);
memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 2);
     if (lenmem == NULL) {@@ -61,7 +61,7 @@
st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */
st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize);
st->super_twiddles = st->tmpbuf + nfft;
- kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
+ kiss_fft_alloc(nfft, st->substate, &subsize);
#ifdef FIXED_POINT
     for (i=0;i<nfft;++i) {@@ -85,10 +85,6 @@
kiss_fft_cpx f2k,tdc;
celt_word32_t f1kr, f1ki, twr, twi;
-   if ( st->substate->inverse) {-      celt_fatal("kiss fft usage error: improper alloc\n");- }
-
ncfft = st->substate->nfft;
/*perform the parallel fft of two real signals packed in real,imag*/
@@ -142,10 +138,6 @@
/* input buffer timedata is stored row-wise */
int k, ncfft;
-   if (st->substate->inverse == 0) {-      celt_fatal ("kiss fft usage error: improper alloc\n");- }
-
ncfft = st->substate->nfft;
st->tmpbuf[0].r = freqdata[0] + freqdata[1];
@@ -169,5 +161,5 @@
st->tmpbuf[ncfft - k].i *= -1;
#endif
}
- kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
+ kiss_ifft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
}
--- a/libcelt/kiss_fftr.h
+++ b/libcelt/kiss_fftr.h
@@ -18,7 +18,7 @@
typedef struct kiss_fftr_state *kiss_fftr_cfg;
-kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem);
+kiss_fftr_cfg kiss_fftr_alloc(int nfft,void * mem, size_t * lenmem);
/*
nfft must be even
--- a/libcelt/mdct.c
+++ b/libcelt/mdct.c
@@ -54,8 +54,7 @@
l->n = N;
N2 = N/2;
N4 = N/4;
- l->kfft = kiss_fft_alloc(N4, 0, NULL, NULL);
- l->ikfft = kiss_fft_alloc(N4, 1, NULL, NULL);
+ l->kfft = kiss_fft_alloc(N4, NULL, NULL);
l->trig = celt_alloc(N2*sizeof(float));
/* We have enough points that sine isn't necessary */
for (i=0;i<N2;i++)
@@ -66,7 +65,6 @@
void mdct_clear(mdct_lookup *l)
 {kiss_fft_free(l->kfft);
- kiss_fft_free(l->ikfft);
celt_free(l->trig);
}
@@ -131,7 +129,7 @@
}
/* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
- kiss_fft(l->ikfft, (const kiss_fft_cpx *)out, (kiss_fft_cpx *)f);
+ kiss_ifft(l->kfft, (const kiss_fft_cpx *)out, (kiss_fft_cpx *)f);
/* Post-rotate */
for(i=0;i<N4;i++)
--- a/libcelt/mdct.h
+++ b/libcelt/mdct.h
@@ -47,7 +47,6 @@
 typedef struct {int n;
kiss_fft_cfg kfft;
- kiss_fft_cfg ikfft;
float *trig;
float scale;
} mdct_lookup;
--- a/tests/dft-test.c
+++ b/tests/dft-test.c
@@ -42,7 +42,7 @@
kiss_fft_cpx * in = (kiss_fft_cpx*)malloc(buflen);
kiss_fft_cpx * out= (kiss_fft_cpx*)malloc(buflen);
- kiss_fft_cfg cfg = kiss_fft_alloc(nfft,isinverse,0,0);
+ kiss_fft_cfg cfg = kiss_fft_alloc(nfft,0,0);
int k;
     for (k=0;k<nfft;++k) {@@ -50,7 +50,10 @@
in[k].i = (rand() % 65536) - 32768;
}
- kiss_fft(cfg,in,out);
+ if (isinverse)
+ kiss_ifft(cfg,in,out);
+ else
+ kiss_fft(cfg,in,out);
check(in,out,nfft,isinverse);
--- a/tests/real-fft-test.c
+++ b/tests/real-fft-test.c
@@ -96,8 +96,8 @@
cin[i].i = zero;
}
- kiss_fft_state = kiss_fft_alloc(NFFT,0,0,0);
- kiss_fftr_state = kiss_fftr_alloc(NFFT,0,0,0);
+ kiss_fft_state = kiss_fft_alloc(NFFT,0,0);
+ kiss_fftr_state = kiss_fftr_alloc(NFFT,0,0);
kiss_fft(kiss_fft_state,cin,cout);
kiss_fftr(kiss_fftr_state,rin,sout);
@@ -118,12 +118,6 @@
     printf("%d complex ffts took %gs, real took %gs\n",NUMFFTS,tfft,trfft);- free(kiss_fft_state);
- free(kiss_fftr_state);
-
- kiss_fft_state = kiss_fft_alloc(NFFT,1,0,0);
- kiss_fftr_state = kiss_fftr_alloc(NFFT,1,0,0);
-
memset(cin,0,sizeof(cin));
#if 1
cin[0].r = rand_scalar();
@@ -153,7 +147,7 @@
fin[2*i+1] = cin[i].i;
}
- kiss_fft(kiss_fft_state,cin,cout);
+ kiss_ifft(kiss_fft_state,cin,cout);
kiss_fftri(kiss_fftr_state,fin,rout);
/*
     printf(" results from inverse kiss_fft : (%f,%f), (%f,%f), (%f,%f), (%f,%f), (%f,%f) ...\n "--
⑨