shithub: aacenc

Download patch

ref: 528d5005f13cfefab9c73d3423b9e4cb6316662d
parent: d3bfd6c0e4da61ea0d423ef1837a098190944c1d
author: knik <knik>
date: Wed Aug 21 12:52:25 EDT 2002

new simplier and faster fft routine and correct real fft
new real fft is just a complex fft wrapper so it is slower than optimal but
by surprise it seems to be at least as fast as the old buggy function

--- a/libfaac/fft.c
+++ b/libfaac/fft.c
@@ -1,6 +1,7 @@
 /*
  * FAAC - Freeware Advanced Audio Coder
- * Copyright (C) 2001 Menno Bakker
+ * $Id: fft.c,v 1.7 2002/08/21 16:52:25 knik Exp $
+ * Copyright (C) 2002 Krzysztof Nikiel
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -16,430 +17,192 @@
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  *
- * $Id: fft.c,v 1.6 2002/08/07 18:10:39 knik Exp $
  */
 
 #include <math.h>
 #include <stdlib.h>
+#include <stdio.h>
 
 #include "fft.h"
 #include "util.h"
 
-#define  MAXLOGM     11    /* max FFT length = 2^MAXLOGM */
-#define  TWOPI       6.28318530717958647692
-#define  SQHALF      0.707106781186547524401
+#define MAXLOGM 11
 
-static   double    *tabr[MAXLOGM]; /* tables of butterfly angles */
+typedef float fftfloat;
 
-static void build_table(int logm)
+static fftfloat *costbl[MAXLOGM + 1] = {NULL};	// size/2
+static fftfloat *negsintbl[MAXLOGM + 1] = {NULL};	// size/2
+static unsigned short *reordertbl[MAXLOGM + 1] = {NULL};	//size
+
+static void reorder(double *x, int logm)
 {
-   static   int      m, m2, m4, m8, nel, n;
-   static   double    *cn, *spcn, *smcn;
-   static   double    ang, c, s;
+  int i;
+  int size = 1 << logm;
+  unsigned short *r;	//size
 
-   /* Compute a few constants */
-   m = 1 << logm; m2 = m / 2; m4 = m2 / 2; m8 = m4 /2;
 
-   /* Allocate memory for tables */
-   nel = m4 - 2;
-   tabr[logm-4] = (double *)AllocMemory(3 * nel * sizeof(double));
-/*
-   if ((tab[logm-4] = (double *) AllocMemory(3 * nel * sizeof(double))) == NULL) {
-      printf("Error : RSFFT : not enough memory for cosine tables.\n");
-      error_exit();
-   }
-*/
+  if (!reordertbl[logm])
+    // create bit reversing table
+  {
+    reordertbl[logm] = AllocMemory(size * sizeof(*(reordertbl[0])));
 
-   /* Initialize pointers */
-   cn  = tabr[logm-4]; spcn  = cn + nel;  smcn  = spcn + nel;
+    for (i = 0; i < size; i++)
+    {
+      int reversed = 0;
+      int b0;
+      int tmp = i;
 
-   /* Compute tables */
-   for (n = 1; n < m4; n++) {
-      if (n == m8) continue;
-      ang = n * TWOPI / m;
-      c = cos(ang); s = sin(ang);
-      *cn++ = c; *spcn++ = - (s + c); *smcn++ = s - c;
-   }
-}
-
-/*--------------------------------------------------------------------*
- *    Recursive part of the RSFFT algorithm.  Not externally          *
- *    callable.                                                       *
- *--------------------------------------------------------------------*/
-
-static void rsrec(double *x, int logm)
-{
-   static   int      m, m2, m4, m8, nel, n;
-   static   double    *xr1, *xr2, *xi1;
-   static   double    *cn, *spcn, *smcn;
-   static   double    tmp1, tmp2;
-
-   /* Compute trivial cases */
-   if (logm < 2) {
-      if (logm == 1) {   /* length m = 2 */
-         xr2  = x + 1;
-         tmp1 = *x + *xr2;
-         *xr2 = *x - *xr2;
-         *x   = tmp1;
-         return;
+      for (b0 = 0; b0 < logm; b0++)
+      {
+	reversed = (reversed << 1) | (tmp & 1);
+	tmp >>= 1;
       }
-      else if (logm == 0) return;   /* length m = 1 */
-   }
+      reordertbl[logm][i] = reversed;
+    }
+  }
 
-   /* Compute a few constants */
-   m = 1 << logm; m2 = m / 2; m4 = m2 / 2; m8 = m4 /2;
+  r = reordertbl[logm];
 
-   /* Build tables of butterfly coefficients, if necessary */
-   if ((logm >= 4) && (tabr[logm-4] == NULL))
-      build_table(logm);
+  for (i = 0; i < size; i++)
+  {
+    int j = r[i];
+    double tmp;
 
-   /* Step 1 */
-   xr1 = x; xr2 = xr1 + m2;
-   for (n = 0; n < m2; n++) {
-      tmp1 = *xr1 + *xr2;
-      *xr2 = *xr1 - *xr2;
-      *xr1 = tmp1;
-      xr1++; xr2++;
-   }
+    if (j <= i)
+      continue;
 
-   /* Step 2 */
-   xr1 = x + m2 + m4;
-   for (n = 0; n < m4; n++) {
-      *xr1 = - *xr1;
-      xr1++;
-   }
+    tmp = x[i];
+    x[i] = x[j];
+    x[j] = tmp;
+  }
+}
 
-   /* Step 3: multiplication by W_M^n */
-   xr1 = x + m2; xi1 = xr1 + m4;
-   if (logm >= 4) {
-      nel = m4 - 2;
-      cn  = tabr[logm-4]; spcn  = cn + nel;  smcn  = spcn + nel;
-   }
-   xr1++; xi1++;
-   for (n = 1; n < m4; n++) {
-      if (n == m8) {
-         tmp1 =  SQHALF * (*xr1 + *xi1);
-         *xi1 =  SQHALF * (*xi1 - *xr1);
-         *xr1 =  tmp1;
-      } else {
-         tmp2 = *cn++ * (*xr1 + *xi1);
-         tmp1 = *spcn++ * *xr1 + tmp2;
-         *xr1 = *smcn++ * *xi1 + tmp2;
-         *xi1 = tmp1;
-      }
-      xr1++; xi1++;
-   }
+static void fft_proc(double *xr, double *xi,
+		     fftfloat *refac, fftfloat *imfac, int size)
+{
+  int step, shift, pos;
+  int exp, estep;
 
-   /* Call rsrec again with half DFT length */
-   rsrec(x, logm-1);
+  estep = size;
+  for (step = 1; step < size; step *= 2)
+  {
+    int x1;
+    int x2 = 0;
+    estep >>= 1;
+    for (pos = 0; pos < size; pos += (2 * step))
+    {
+      x1 = x2;
+      x2 += step;
+      exp = 0;
+      for (shift = 0; shift < step; shift++)
+      {
+	double v2r, v2i;
 
-   /* Step 4: Call complex DFT routine, with quarter DFT length.
-      Constants have to be recomputed, because they are static! */
-   m = 1 << logm; m2 = m / 2; m4 = 3 * (m / 4);
-   srrec(x + m2, x + m4, logm-2);
+	v2r = xr[x2] * refac[exp] - xi[x2] * imfac[exp];
+	v2i = xr[x2] * imfac[exp] + xi[x2] * refac[exp];
 
-   /* Step 5: sign change & data reordering */
-   m = 1 << logm; m2 = m / 2; m4 = m2 / 2; m8 = m4 / 2;
-   xr1 = x + m2 + m4;
-   xr2 = x + m - 1;
-   for (n = 0; n < m8; n++) {
-      tmp1   =    *xr1;
-      *xr1++ =  - *xr2;
-      *xr2-- =  - tmp1;
-   }
-   xr1 = x + m2 + 1;
-   xr2 = x + m - 2;
-   for (n = 0; n < m8; n++) {
-      tmp1   =    *xr1;
-      *xr1++ =  - *xr2;
-      *xr2-- =    tmp1;
-      xr1++;
-      xr2--;
-   }
-   if (logm == 2) x[3] = -x[3];
-}
+	xr[x2] = xr[x1] - v2r;
+	xr[x1] += v2r;
 
-/*--------------------------------------------------------------------*
- *    Direct transform for real inputs                                *
- *--------------------------------------------------------------------*/
+	xi[x2] = xi[x1] - v2i;
 
-/*--------------------------------------------------------------------*
- *    Data unshuffling according to bit-reversed indexing.            *
- *                                                                    *
- *    Bit reversal is done using Evan's algorithm (Ref: D. M. W.      *
- *    Evans, "An improved digit-reversal permutation algorithm ...",  *
- *    IEEE Trans. ASSP, Aug. 1987, pp. 1120-1125).                    *
- *--------------------------------------------------------------------*/
-static   int   brseed[256];   /* Evans' seed table */
-static   int   brsflg;        /* flag for table building */
+	xi[x1] += v2i;
 
-static void BR_permute(double *x, int logm)
-{
-   int      i, j, imax, lg2, n;
-   int      off, fj, gno, *brp;
-   double    tmp, *xp, *xq;
+	exp += estep;
 
-   lg2 = logm >> 1;
-   n = 1 << lg2;
-   if (logm & 1) lg2++;
-
-   /* Create seed table if not yet built */
-   if (brsflg != logm) {
-      brsflg = logm;
-      brseed[0] = 0;
-      brseed[1] = 1;
-      for (j = 2; j <= lg2; j++) {
-         imax = 1 << (j - 1);
-         for (i = 0; i < imax; i++) {
-            brseed[i] <<= 1;
-            brseed[i + imax] = brseed[i] + 1;
-         }
+	x1++;
+	x2++;
       }
-   }
-
-   /* Unshuffling loop */
-   for (off = 1; off < n; off++) {
-      fj = n * brseed[off]; i = off; j = fj;
-      tmp = x[i]; x[i] = x[j]; x[j] = tmp;
-      xp = &x[i];
-      brp = &brseed[1];
-      for (gno = 1; gno < brseed[off]; gno++) {
-         xp += n;
-         j = fj + *brp++;
-         xq = x + j;
-         tmp = *xp; *xp = *xq; *xq = tmp;
-      }
-   }
+    }
+  }
 }
 
-/*--------------------------------------------------------------------*
- *    Recursive part of the SRFFT algorithm.                          *
- *--------------------------------------------------------------------*/
-
-static void srrec(double *xr, double *xi, int logm)
+static void check_tables(int logm)
 {
-   static   int      m, m2, m4, m8, nel, n;
-   static   double    *xr1, *xr2, *xi1, *xi2;
-   static   double    *cn, *spcn, *smcn, *c3n, *spc3n, *smc3n;
-   static   double    tmp1, tmp2, ang, c, s;
-   static   double    *tab[MAXLOGM];
 
-   /* Compute trivial cases */
-   if (logm < 3) {
-      if (logm == 2) {  /* length m = 4 */
-         xr2  = xr + 2;
-         xi2  = xi + 2;
-         tmp1 = *xr + *xr2;
-         *xr2 = *xr - *xr2;
-         *xr  = tmp1;
-         tmp1 = *xi + *xi2;
-         *xi2 = *xi - *xi2;
-         *xi  = tmp1;
-         xr1  = xr + 1;
-         xi1  = xi + 1;
-         xr2++;
-         xi2++;
-         tmp1 = *xr1 + *xr2;
-         *xr2 = *xr1 - *xr2;
-         *xr1 = tmp1;
-         tmp1 = *xi1 + *xi2;
-         *xi2 = *xi1 - *xi2;
-         *xi1 = tmp1;
-         xr2  = xr + 1;
-         xi2  = xi + 1;
-         tmp1 = *xr + *xr2;
-         *xr2 = *xr - *xr2;
-         *xr  = tmp1;
-         tmp1 = *xi + *xi2;
-         *xi2 = *xi - *xi2;
-         *xi  = tmp1;
-         xr1  = xr + 2;
-         xi1  = xi + 2;
-         xr2  = xr + 3;
-         xi2  = xi + 3;
-         tmp1 = *xr1 + *xi2;
-         tmp2 = *xi1 + *xr2;
-         *xi1 = *xi1 - *xr2;
-         *xr2 = *xr1 - *xi2;
-         *xr1 = tmp1;
-         *xi2 = tmp2;
-         return;
-      }
-      else if (logm == 1) {   /* length m = 2 */
-         xr2  = xr + 1;
-         xi2  = xi + 1;
-         tmp1 = *xr + *xr2;
-         *xr2 = *xr - *xr2;
-         *xr  = tmp1;
-         tmp1 = *xi + *xi2;
-         *xi2 = *xi - *xi2;
-         *xi  = tmp1;
-         return;
-      }
-      else if (logm == 0) return;   /* length m = 1 */
-   }
+  if (!(costbl[logm]))
+  {
+    int i;
+    int size = 1 << logm;
 
-   /* Compute a few constants */
-   m = 1 << logm; m2 = m / 2; m4 = m2 / 2; m8 = m4 /2;
+    if (negsintbl[logm])
+      FreeMemory(negsintbl[logm]);
 
-   /* Build tables of butterfly coefficients, if necessary */
-   if ((logm >= 4) && (tab[logm-4] == NULL)) {
+    costbl[logm] = AllocMemory((size / 2) * sizeof(*(costbl[0])));
+    negsintbl[logm] = AllocMemory((size / 2) * sizeof(*(negsintbl[0])));
 
-      /* Allocate memory for tables */
-      nel = m4 - 2;
-      tab[logm-4] = (double *)AllocMemory(6 * nel * sizeof(double));
-/*
-      if ((tab[logm-4] = (double *)AllocMemory(6 * nel * sizeof(double))) == NULL) {
-         error_exit();
-      }
-*/
+    for (i = 0; i < (size >> 1); i++)
+    {
+      double theta = 2.0 * M_PI * ((double) i) / (double) size;
+      costbl[logm][i] = cos(theta);
+      negsintbl[logm][i] = -sin(theta);
+    }
+  }
+}
 
-      /* Initialize pointers */
-      cn  = tab[logm-4]; spcn  = cn + nel;  smcn  = spcn + nel;
-      c3n = smcn + nel;  spc3n = c3n + nel; smc3n = spc3n + nel;
+void fft(double *xr, double *xi, int logm)
+{
+  if (logm > MAXLOGM)
+  {
+    fprintf(stderr, "fft size too big\n");
+    exit(1);
+  }
 
-      /* Compute tables */
-      for (n = 1; n < m4; n++) {
-         if (n == m8) continue;
-         ang = n * TWOPI / m;
-         c = cos(ang); s = sin(ang);
-         *cn++ = c; *spcn++ = - (s + c); *smcn++ = s - c;
-         ang = 3 * n * TWOPI / m;
-         c = cos(ang); s = sin(ang);
-         *c3n++ = c; *spc3n++ = - (s + c); *smc3n++ = s - c;
-      }
-   }
+  if (logm < 1)
+  {
+    //printf("logm < 1\n");
+    return;
+  }
 
-   /* Step 1 */
-   xr1 = xr; xr2 = xr1 + m2;
-   xi1 = xi; xi2 = xi1 + m2;
-   for (n = 0; n < m2; n++) {
-      tmp1 = *xr1 + *xr2;
-      *xr2 = *xr1 - *xr2;
-      *xr1 = tmp1;
-      tmp2 = *xi1 + *xi2;
-      *xi2 = *xi1 - *xi2;
-      *xi1 = tmp2;
-      xr1++; xr2++; xi1++; xi2++;
-   }
+  check_tables(logm);
 
-   /* Step 2 */
-   xr1 = xr + m2; xr2 = xr1 + m4;
-   xi1 = xi + m2; xi2 = xi1 + m4;
-   for (n = 0; n < m4; n++) {
-      tmp1 = *xr1 + *xi2;
-      tmp2 = *xi1 + *xr2;
-      *xi1 = *xi1 - *xr2;
-      *xr2 = *xr1 - *xi2;
-      *xr1 = tmp1;
-      *xi2 = tmp2;
-      xr1++; xr2++; xi1++; xi2++;
-   }
+  reorder(xr, logm);
+  reorder(xi, logm);
 
-   /* Steps 3 & 4 */
-   xr1 = xr + m2; xr2 = xr1 + m4;
-   xi1 = xi + m2; xi2 = xi1 + m4;
-   if (logm >= 4) {
-      nel = m4 - 2;
-      cn  = tab[logm-4]; spcn  = cn + nel;  smcn  = spcn + nel;
-      c3n = smcn + nel;  spc3n = c3n + nel; smc3n = spc3n + nel;
-   }
-   xr1++; xr2++; xi1++; xi2++;
-   for (n = 1; n < m4; n++) {
-      if (n == m8) {
-         tmp1 =  SQHALF * (*xr1 + *xi1);
-         *xi1 =  SQHALF * (*xi1 - *xr1);
-         *xr1 =  tmp1;
-         tmp2 =  SQHALF * (*xi2 - *xr2);
-         *xi2 = -SQHALF * (*xr2 + *xi2);
-         *xr2 =  tmp2;
-      } else {
-         tmp2 = *cn++ * (*xr1 + *xi1);
-         tmp1 = *spcn++ * *xr1 + tmp2;
-         *xr1 = *smcn++ * *xi1 + tmp2;
-         *xi1 = tmp1;
-         tmp2 = *c3n++ * (*xr2 + *xi2);
-         tmp1 = *spc3n++ * *xr2 + tmp2;
-         *xr2 = *smc3n++ * *xi2 + tmp2;
-         *xi2 = tmp1;
-      }
-      xr1++; xr2++; xi1++; xi2++;
-   }
-
-   /* Call ssrec again with half DFT length */
-   srrec(xr, xi, logm-1);
-
-   /* Call ssrec again twice with one quarter DFT length.
-      Constants have to be recomputed, because they are static! */
-   m = 1 << logm; m2 = m / 2;
-   srrec(xr + m2, xi + m2, logm-2);
-   m = 1 << logm; m4 = 3 * (m / 4);
-   srrec(xr + m4, xi + m4, logm-2);
+  fft_proc(xr, xi, costbl[logm], negsintbl[logm], 1 << logm);
 }
 
-/*--------------------------------------------------------------------*
- *    Direct transform                                                *
- *--------------------------------------------------------------------*/
-void  srfft(double *xr, double *xi, int logm)
+void rfft(double *x, int logm)
 {
-   /* Call recursive routine */
-   srrec(xr, xi, logm);
+   static double xi[1 << MAXLOGM];
 
-   /* Output array unshuffling using bit-reversed indices */
-   if (logm > 1) {
-      BR_permute(xr, logm);
-      BR_permute(xi, logm);
+   if (logm > MAXLOGM)
+   {
+     fprintf(stderr, "fft size too big\n");
+     exit(1);
    }
-}
 
-/*--------------------------------------------------------------------*
- *    Inverse transform.  Uses Duhamel's trick (Ref: P. Duhamel       *
- *    et. al., "On computing the inverse DFT", IEEE Trans. ASSP,      *
- *    Feb. 1988, pp. 285-286).                                        *
- *--------------------------------------------------------------------*/
-void srifft(double *xr, double *xi, int logm)
-{
-   int      i, m;
-   double    fac, *xrp, *xip;
+   memset(xi, 0, (1 << logm) * sizeof(xi[0]));
 
-   /* Call direct FFT, swapping real & imaginary addresses */
-   srfft(xi, xr, logm);
+   fft(x, xi, logm);
 
-   /* Normalization */
-   m = 1 << logm;
-   fac = 1.0 / m;
-   xrp = xr; xip = xi;
-   for (i = 0; i < m; i++) {
-      *xrp++ *= fac;
-      *xip++ *= fac;
-   }
+   memcpy(x + (1 << (logm - 1)), xi, (1 << (logm - 1)) * sizeof(*x));
 }
 
-void rsfft(double *x, int logm)
+void ffti(double *xr, double *xi, int logm)
 {
-   int i;
-   int size;
+  int i, size;
+  double fac;
+  double *xrp, *xip;
 
-   /* Call recursive routine */
-   rsrec(x, logm);
+  fft(xi, xr, logm);
 
-   /* Output array unshuffling using bit-reversed indices */
-   if (logm > 1) {
-      BR_permute(x, logm);
-   }
+  size = 1 << logm;
+  fac = 1.0 / size;
+  xrp = xr;
+  xip = xi;
+  for (i = 0; i < size; i++)
+  {
+    *xrp++ *= fac;
+    *xip++ *= fac;
+  }
+}
 
-   size = 1 << (logm - 1);
+/*
+$Log: fft.c,v $
+Revision 1.7  2002/08/21 16:52:25  knik
+new simplier and faster fft routine and correct real fft
+new real fft is just a complex fft wrapper so it is slower than optimal but
+by surprise it seems to be at least as fast as the old buggy function
 
-   for (i = 0; i < (size / 2); i++)
-   {
-     double tmp;
-
-     tmp = 1 * x[i];
-     x[i] = 1 * x[size - 1 - i];
-     x[size - 1 - i] = tmp;
-
-     tmp = 1 * x[size + i];
-     x[size + i] = 1 * x[2*size - 1 - i];
-     x[2*size - 1 - i] = tmp;
-   }
-}
+*/