ref: 91082ce9a12c7bceaef1b806a8cffa3781699d59
dir: /libfaad/cfft.c/
/*
** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding
** Copyright (C) 2003 M. Bakker, Ahead Software AG, http://www.nero.com
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
**
** Any non-GPL usage of this software or parts of this software is strictly
** forbidden.
**
** Commercial non-GPL licensing of this software is possible.
** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
**
** $Id: cfft.c,v 1.17 2003/11/04 21:43:30 menno Exp $
**/
/*
* Algorithmically based on Fortran-77 FFTPACK
* by Paul N. Swarztrauber(Version 4, 1985).
*
* Does even sized fft only
*/
/* isign is +1 for backward and -1 for forward transforms */
#include "common.h"
#include "structs.h"
#include <stdlib.h>
#include "cfft.h"
#include "cfft_tab.h"
/*----------------------------------------------------------------------
passf2, passf3, passf4, passf5. Complex FFT passes fwd and bwd.
----------------------------------------------------------------------*/
static void passf2(const uint16_t ido, const uint16_t l1, const complex_t *cc,
complex_t *ch, const complex_t *wa, const int8_t isign)
{
uint16_t i, k, ah, ac;
if (ido == 1)
{
for (k = 0; k < l1; k++)
{
ah = 2*k;
ac = 4*k;
RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]);
RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]);
IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]);
IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]);
}
} else {
for (k = 0; k < l1; k++)
{
ah = k*ido;
ac = 2*k*ido;
for (i = 0; i < ido; i++)
{
complex_t t2;
RE(ch[ah+i]) = RE(cc[ac+i]) + RE(cc[ac+i+ido]);
RE(t2) = RE(cc[ac+i]) - RE(cc[ac+i+ido]);
IM(ch[ah+i]) = IM(cc[ac+i]) + IM(cc[ac+i+ido]);
IM(t2) = IM(cc[ac+i]) - IM(cc[ac+i+ido]);
RE(ch[ah+i+l1*ido]) = MUL_R_C(RE(t2),RE(wa[i])) - MUL_R_C(IM(t2),IM(wa[i]))*isign;
IM(ch[ah+i+l1*ido]) = MUL_R_C(IM(t2),RE(wa[i])) + MUL_R_C(RE(t2),IM(wa[i]))*isign;
}
}
}
}
static void passf3(const uint16_t ido, const uint16_t l1, const complex_t *cc,
complex_t *ch, const complex_t *wa1, const complex_t *wa2,
const int8_t isign)
{
static real_t taur = COEF_CONST(-0.5);
static real_t taui = COEF_CONST(0.866025403784439);
uint16_t i, k, ac, ah;
complex_t c2, c3, d2, d3, t2;
if (ido == 1)
{
for (k = 0; k < l1; k++)
{
ac = 3*k+1;
ah = k;
RE(t2) = RE(cc[ac]) + RE(cc[ac+1]);
IM(t2) = IM(cc[ac]) + IM(cc[ac+1]);
RE(c2) = RE(cc[ac-1]) + MUL_R_C(RE(t2),taur);
IM(c2) = IM(cc[ac-1]) + MUL_R_C(IM(t2),taur);
RE(ch[ah]) = RE(cc[ac-1]) + RE(t2);
IM(ch[ah]) = IM(cc[ac-1]) + IM(t2);
RE(c3) = MUL_R_C((RE(cc[ac]) - RE(cc[ac+1])), taui)*isign;
IM(c3) = MUL_R_C((IM(cc[ac]) - IM(cc[ac+1])), taui)*isign;
RE(ch[ah+l1]) = RE(c2) - IM(c3);
IM(ch[ah+l1]) = IM(c2) + RE(c3);
RE(ch[ah+2*l1]) = RE(c2) + IM(c3);
IM(ch[ah+2*l1]) = IM(c2) - RE(c3);
}
} else {
for (k = 0; k < l1; k++)
{
for (i = 0; i < ido; i++)
{
ac = i + (3*k+1)*ido;
ah = i + k * ido;
RE(t2) = RE(cc[ac]) + RE(cc[ac+ido]);
RE(c2) = RE(cc[ac-ido]) + MUL_R_C(RE(t2),taur);
IM(t2) = IM(cc[ac]) + IM(cc[ac+ido]);
IM(c2) = IM(cc[ac-ido]) + MUL_R_C(IM(t2),taur);
RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2);
IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2);
RE(c3) = MUL_R_C((RE(cc[ac]) - RE(cc[ac+ido])), taui)*isign;
IM(c3) = MUL_R_C((IM(cc[ac]) - IM(cc[ac+ido])), taui)*isign;
RE(d2) = RE(c2) - IM(c3);
IM(d3) = IM(c2) - RE(c3);
RE(d3) = RE(c2) + IM(c3);
IM(d2) = IM(c2) + RE(c3);
RE(ch[ah+l1*ido]) = MUL_R_C(RE(d2),RE(wa1[i])) - MUL_R_C(IM(d2),IM(wa1[i]))*isign;
IM(ch[ah+l1*ido]) = MUL_R_C(IM(d2),RE(wa1[i])) + MUL_R_C(RE(d2),IM(wa1[i]))*isign;
RE(ch[ah+l1*2*ido]) = MUL_R_C(RE(d3),RE(wa2[i])) - MUL_R_C(IM(d3),IM(wa2[i]))*isign;
IM(ch[ah+l1*2*ido]) = MUL_R_C(IM(d3),RE(wa2[i])) + MUL_R_C(RE(d3),IM(wa2[i]))*isign;
}
}
}
}
#if 0
typedef real_t simd_complex_t[4];
/*
complex_add_sub(c1, c2, a1, a2);
complex_mult(a1, c1, w0);
complex_mult(a2, c2, w2);
*/
static INLINE void complex_func(simd_complex_t a1, simd_complex_t a2,
const simd_complex_t z1, const simd_complex_t z2,
const simd_complex_t w1, const simd_complex_t w2)
{
__asm {
mov eax, a1
mov ebx, a2
movups xmm0, [eax]
movups xmm2, [ebx]
movups xmm4, [eax]
addps xmm0, xmm2 ; xmm0 = c1
subps xmm4, xmm2 ; xmm4 = c2
; complex mult
mov ecx, w1
movups xmm1, [ecx]
movups xmm2, xmm0
movups xmm3, xmm1
mulps xmm0, xmm1
shufps xmm2, xmm2, 0xB1
shufps xmm0, xmm0, 0xD8
mulps xmm2, xmm3
movhlps xmm1, xmm0
shufps xmm2, xmm2, 0xD8
subps xmm0, xmm1
movhlps xmm3, xmm2
addps xmm2, xmm3
unpcklps xmm0, xmm2
movups [eax], xmm0
; complex mult
mov ecx, w2
movups xmm1, [ecx]
movups xmm2, xmm4
movups xmm3, xmm1
mulps xmm4, xmm1
shufps xmm2, xmm2, 0xB1
shufps xmm4, xmm4, 0xD8
mulps xmm2, xmm3
movhlps xmm1, xmm4
shufps xmm2, xmm2, 0xD8
subps xmm4, xmm1
movhlps xmm3, xmm2
addps xmm2, xmm3
unpcklps xmm4, xmm2
movups [ebx], xmm4
}
}
/* complex a = z1*z2 */
static INLINE void complex_mult(simd_complex_t a, const simd_complex_t z1, const simd_complex_t z2)
{
#if 0
a[0] = MUL_R_C(z1[0],z2[0]) - MUL_R_C(z1[1],z2[1]);
a[1] = MUL_R_C(z1[1],z2[0]) + MUL_R_C(z1[0],z2[1]);
a[2] = MUL_R_C(z1[2],z2[2]) - MUL_R_C(z1[3],z2[3]);
a[3] = MUL_R_C(z1[3],z2[2]) + MUL_R_C(z1[2],z2[3]);
#else
__asm {
mov eax, z1
mov ecx, z2
mov edx, a
movups xmm0, [eax]
movups xmm1, [ecx]
movaps xmm2, xmm0
movaps xmm3, xmm1
mulps xmm0, xmm1
shufps xmm2, xmm2, 0xB1
shufps xmm0, xmm0, 0xD8
mulps xmm2, xmm3
movhlps xmm1, xmm0
shufps xmm2, xmm2, 0xD8
subps xmm0, xmm1
movhlps xmm3, xmm2
addps xmm2, xmm3
unpcklps xmm0, xmm2
movups [edx], xmm0
}
#endif
}
/* complex a = z1+z2 */
static void complex_add(complex_t a, const complex_t z1, const complex_t z2)
{
RE(a) = RE(z1) + RE(z2);
IM(a) = IM(z1) + IM(z2);
}
/* complex a = z1-z2 */
static void complex_sub(complex_t a, const complex_t z1, const complex_t z2)
{
RE(a) = RE(z1) - RE(z2);
IM(a) = IM(z1) - IM(z2);
}
/* complex a1 = z1+z2; a2 = z1-z2 */
static INLINE void complex_add_sub(simd_complex_t a1, simd_complex_t a2,
const simd_complex_t z1, const simd_complex_t z2)
{
#if 0
a1[0] = z1[0] + z2[0];
a1[1] = z1[1] + z2[1];
a1[2] = z1[2] + z2[2];
a1[3] = z1[3] + z2[3];
a2[0] = z1[0] - z2[0];
a2[1] = z1[1] - z2[1];
a2[2] = z1[2] - z2[2];
a2[3] = z1[3] - z2[3];
#else
__asm {
mov eax, DWORD PTR z1
mov ebx, DWORD PTR z2
mov ecx, DWORD PTR a1
mov edx, DWORD PTR a2
movups xmm1, [eax]
movups xmm2, [ebx]
movups xmm3, [eax]
addps xmm1, xmm2
subps xmm3, xmm2
movups [ecx], xmm1
movups [edx], xmm3
}
#endif
}
static void passf4(const uint16_t ido, const uint16_t l1, const complex_t *cc,
complex_t *ch, const complex_t *wa1, const complex_t *wa2,
const complex_t *wa3, const int8_t isign)
{
uint16_t i, k, ac, ah;
if (ido == 1)
{
for (k = 0; k < l1; k++)
{
complex_t t1, t2, t3, t4;
ac = 4*k;
ah = k;
RE(t2) = RE(cc[ac]) + RE(cc[ac+2]);
RE(t1) = RE(cc[ac]) - RE(cc[ac+2]);
IM(t2) = IM(cc[ac]) + IM(cc[ac+2]);
IM(t1) = IM(cc[ac]) - IM(cc[ac+2]);
RE(t3) = RE(cc[ac+1]) + RE(cc[ac+3]);
IM(t4) = RE(cc[ac+1]) - RE(cc[ac+3]);
IM(t3) = IM(cc[ac+3]) + IM(cc[ac+1]);
RE(t4) = IM(cc[ac+3]) - IM(cc[ac+1]);
RE(ch[ah]) = RE(t2) + RE(t3);
RE(ch[ah+2*l1]) = RE(t2) - RE(t3);
IM(ch[ah]) = IM(t2) + IM(t3);
IM(ch[ah+2*l1]) = IM(t2) - IM(t3);
RE(ch[ah+l1]) = RE(t1) + RE(t4)*isign;
RE(ch[ah+3*l1]) = RE(t1) - RE(t4)*isign;
IM(ch[ah+l1]) = IM(t1) + IM(t4)*isign;
IM(ch[ah+3*l1]) = IM(t1) - IM(t4)*isign;
}
} else {
for (k = 0; k < l1; k++)
{
ac = 4*k*ido;
ah = k*ido;
for (i = 0; i < ido; i++)
{
simd_complex_t c1, c2, t1, t2;
simd_complex_t w0 = {1,0,0,0};
simd_complex_t w2;
w0[2] = wa1[i][0]*isign;
w0[3] = wa1[i][1]*isign;
w2[0] = wa2[i][0]*isign;
w2[1] = wa2[i][1]*isign;
w2[2] = wa3[i][0]*isign;
w2[3] = wa3[i][1]*isign;
t1[0] = RE(cc[ac+i]) + RE(cc[ac+i+2*ido]);
t1[1] = IM(cc[ac+i]) + IM(cc[ac+i+2*ido]);
t1[2] = RE(cc[ac+i]) - RE(cc[ac+i+2*ido]);
t1[3] = IM(cc[ac+i]) - IM(cc[ac+i+2*ido]);
t2[0] = RE(cc[ac+i+ido]) + RE(cc[ac+i+3*ido]);
t2[3] = RE(cc[ac+i+ido]) - RE(cc[ac+i+3*ido]);
t2[1] = IM(cc[ac+i+3*ido]) + IM(cc[ac+i+ido]);
t2[2] = IM(cc[ac+i+3*ido]) - IM(cc[ac+i+ido]);
t2[2] *= isign;
t2[3] *= isign;
#if 0
complex_add_sub(c1, c2, t1, t2);
complex_mult(t1, c1, w0);
complex_mult(t2, c2, w2);
#else
complex_func(t1, t2, c1, c2, w0, w2);
#endif
RE(ch[ah+i]) = t1[0];
IM(ch[ah+i]) = t1[1];
RE(ch[ah+i+l1*ido]) = t1[2];
IM(ch[ah+i+l1*ido]) = t1[3];
RE(ch[ah+i+2*l1*ido]) = t2[0];
IM(ch[ah+i+2*l1*ido]) = t2[1];
RE(ch[ah+i+3*l1*ido]) = t2[2];
IM(ch[ah+i+3*l1*ido]) = t2[3];
}
}
}
}
#else
static void passf4(const uint16_t ido, const uint16_t l1, const complex_t *cc,
complex_t *ch, const complex_t *wa1, const complex_t *wa2,
const complex_t *wa3, const int8_t isign)
{
uint16_t i, k, ac, ah;
if (ido == 1)
{
for (k = 0; k < l1; k++)
{
complex_t t1, t2, t3, t4;
ac = 4*k;
ah = k;
RE(t2) = RE(cc[ac]) + RE(cc[ac+2]);
RE(t1) = RE(cc[ac]) - RE(cc[ac+2]);
IM(t2) = IM(cc[ac]) + IM(cc[ac+2]);
IM(t1) = IM(cc[ac]) - IM(cc[ac+2]);
RE(t3) = RE(cc[ac+1]) + RE(cc[ac+3]);
IM(t4) = RE(cc[ac+1]) - RE(cc[ac+3]);
IM(t3) = IM(cc[ac+3]) + IM(cc[ac+1]);
RE(t4) = IM(cc[ac+3]) - IM(cc[ac+1]);
RE(ch[ah]) = RE(t2) + RE(t3);
RE(ch[ah+2*l1]) = RE(t2) - RE(t3);
IM(ch[ah]) = IM(t2) + IM(t3);
IM(ch[ah+2*l1]) = IM(t2) - IM(t3);
RE(ch[ah+l1]) = RE(t1) + RE(t4)*isign;
RE(ch[ah+3*l1]) = RE(t1) - RE(t4)*isign;
IM(ch[ah+l1]) = IM(t1) + IM(t4)*isign;
IM(ch[ah+3*l1]) = IM(t1) - IM(t4)*isign;
}
} else {
for (k = 0; k < l1; k++)
{
ac = 4*k*ido;
ah = k*ido;
for (i = 0; i < ido; i++)
{
complex_t c2, c3, c4, t1, t2, t3, t4;
RE(t2) = RE(cc[ac+i]) + RE(cc[ac+i+2*ido]);
RE(t1) = RE(cc[ac+i]) - RE(cc[ac+i+2*ido]);
IM(t2) = IM(cc[ac+i]) + IM(cc[ac+i+2*ido]);
IM(t1) = IM(cc[ac+i]) - IM(cc[ac+i+2*ido]);
RE(t3) = RE(cc[ac+i+ido]) + RE(cc[ac+i+3*ido]);
IM(t4) = RE(cc[ac+i+ido]) - RE(cc[ac+i+3*ido]);
IM(t3) = IM(cc[ac+i+3*ido]) + IM(cc[ac+i+ido]);
RE(t4) = IM(cc[ac+i+3*ido]) - IM(cc[ac+i+ido]);
RE(c2) = RE(t1) + RE(t4)*isign;
RE(c4) = RE(t1) - RE(t4)*isign;
IM(c2) = IM(t1) + IM(t4)*isign;
IM(c4) = IM(t1) - IM(t4)*isign;
RE(ch[ah+i]) = RE(t2) + RE(t3);
RE(c3) = RE(t2) - RE(t3);
IM(ch[ah+i]) = IM(t2) + IM(t3);
IM(c3) = IM(t2) - IM(t3);
IM(ch[ah+i+l1*ido]) = MUL_R_C(IM(c2),RE(wa1[i])) + MUL_R_C(RE(c2),IM(wa1[i]))*isign;
RE(ch[ah+i+l1*ido]) = MUL_R_C(RE(c2),RE(wa1[i])) - MUL_R_C(IM(c2),IM(wa1[i]))*isign;
IM(ch[ah+i+2*l1*ido]) = MUL_R_C(IM(c3),RE(wa2[i])) + MUL_R_C(RE(c3),IM(wa2[i]))*isign;
RE(ch[ah+i+2*l1*ido]) = MUL_R_C(RE(c3),RE(wa2[i])) - MUL_R_C(IM(c3),IM(wa2[i]))*isign;
IM(ch[ah+i+3*l1*ido]) = MUL_R_C(IM(c4),RE(wa3[i])) + MUL_R_C(RE(c4),IM(wa3[i]))*isign;
RE(ch[ah+i+3*l1*ido]) = MUL_R_C(RE(c4),RE(wa3[i])) - MUL_R_C(IM(c4),IM(wa3[i]))*isign;
}
}
}
}
#endif
static void passf5(const uint16_t ido, const uint16_t l1, const complex_t *cc,
complex_t *ch, const complex_t *wa1, const complex_t *wa2, const complex_t *wa3,
const complex_t *wa4, const int8_t isign)
{
static real_t tr11 = COEF_CONST(0.309016994374947);
static real_t ti11 = COEF_CONST(0.951056516295154);
static real_t tr12 = COEF_CONST(-0.809016994374947);
static real_t ti12 = COEF_CONST(0.587785252292473);
uint16_t i, k, ac, ah;
complex_t c2, c3, c4, c5, d3, d4, d5, d2, t2, t3, t4, t5;
if (ido == 1)
{
for (k = 0; k < l1; k++)
{
ac = 5*k + 1;
ah = k;
RE(t2) = RE(cc[ac]) + RE(cc[ac+3]);
IM(t2) = IM(cc[ac]) + IM(cc[ac+3]);
RE(t3) = RE(cc[ac+1]) + RE(cc[ac+2]);
IM(t3) = IM(cc[ac+1]) + IM(cc[ac+2]);
RE(t4) = RE(cc[ac+1]) - RE(cc[ac+2]);
IM(t4) = IM(cc[ac+1]) - IM(cc[ac+2]);
RE(t5) = RE(cc[ac]) - RE(cc[ac+3]);
IM(t5) = IM(cc[ac]) - IM(cc[ac+3]);
RE(ch[ah]) = RE(cc[ac-1]) + RE(t2) + RE(t3);
IM(ch[ah]) = IM(cc[ac-1]) + IM(t2) + IM(t3);
RE(c2) = RE(cc[ac-1]) + MUL_R_C(RE(t2),tr11) + MUL_R_C(RE(t3),tr12);
IM(c2) = IM(cc[ac-1]) + MUL_R_C(IM(t2),tr11) + MUL_R_C(IM(t3),tr12);
RE(c3) = RE(cc[ac-1]) + MUL_R_C(RE(t2),tr12) + MUL_R_C(RE(t3),tr11);
IM(c3) = IM(cc[ac-1]) + MUL_R_C(IM(t2),tr12) + MUL_R_C(IM(t3),tr11);
RE(c4) = (MUL_R_C(RE(t5),ti12)*isign - MUL_R_C(RE(t4),ti11));
IM(c4) = (MUL_R_C(IM(t5),ti12)*isign - MUL_R_C(IM(t4),ti11));
RE(c5) = (MUL_R_C(RE(t5),ti11)*isign + MUL_R_C(RE(t4),ti12));
IM(c5) = (MUL_R_C(IM(t5),ti11)*isign + MUL_R_C(IM(t4),ti12));
RE(ch[ah+l1]) = RE(c2) - IM(c5);
IM(ch[ah+l1]) = IM(c2) + RE(c5);
RE(ch[ah+2*l1]) = RE(c3) - IM(c4);
IM(ch[ah+2*l1]) = IM(c3) + RE(c4);
RE(ch[ah+3*l1]) = RE(c3) + IM(c4);
IM(ch[ah+3*l1]) = IM(c3) - RE(c4);
RE(ch[ah+4*l1]) = RE(c2) + IM(c5);
IM(ch[ah+4*l1]) = IM(c2) - RE(c5);
}
} else {
for (k = 0; k < l1; k++)
{
for (i = 0; i < ido; i++)
{
ac = i + (k*5 + 1) * ido;
ah = i + k * ido;
RE(t2) = RE(cc[ac]) + RE(cc[ac+3*ido]);
IM(t2) = IM(cc[ac]) + IM(cc[ac+3*ido]);
RE(t3) = RE(cc[ac+ido]) + RE(cc[ac+2*ido]);
IM(t3) = IM(cc[ac+ido]) + IM(cc[ac+2*ido]);
RE(t4) = RE(cc[ac+ido]) - RE(cc[ac+2*ido]);
IM(t4) = IM(cc[ac+ido]) - IM(cc[ac+2*ido]);
RE(t5) = RE(cc[ac]) - RE(cc[ac+3*ido]);
IM(t5) = IM(cc[ac]) - IM(cc[ac+3*ido]);
RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2) + RE(t3);
IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2) + IM(t3);
RE(c2) = RE(cc[ac-ido]) + MUL_R_C(RE(t2),tr11) + MUL_R_C(RE(t3),tr12);
IM(c2) = IM(cc[ac-ido]) + MUL_R_C(IM(t2),tr11) + MUL_R_C(IM(t3),tr12);
RE(c3) = RE(cc[ac-ido]) + MUL_R_C(RE(t2),tr12) + MUL_R_C(RE(t3),tr11);
IM(c3) = IM(cc[ac-ido]) + MUL_R_C(IM(t2),tr12) + MUL_R_C(IM(t3),tr11);
RE(c4) = (MUL_R_C(RE(t5),ti12)*isign - MUL_R_C(RE(t4),ti11));
IM(c4) = (MUL_R_C(IM(t5),ti12)*isign - MUL_R_C(IM(t4),ti11));
RE(c5) = (MUL_R_C(RE(t5),ti11)*isign + MUL_R_C(RE(t4),ti12));
IM(c5) = (MUL_R_C(IM(t5),ti11)*isign + MUL_R_C(IM(t4),ti12));
IM(d2) = IM(c2) + RE(c5);
IM(d3) = IM(c3) + RE(c4);
RE(d4) = RE(c3) + IM(c4);
RE(d5) = RE(c2) + IM(c5);
RE(d2) = RE(c2) - IM(c5);
IM(d5) = IM(c2) - RE(c5);
RE(d3) = RE(c3) - IM(c4);
IM(d4) = IM(c3) - RE(c4);
RE(ch[ah+l1*ido]) = MUL_R_C(RE(d2),RE(wa1[i])) - MUL_R_C(IM(d2),IM(wa1[i]))*isign;
IM(ch[ah+l1*ido]) = MUL_R_C(IM(d2),RE(wa1[i])) + MUL_R_C(RE(d2),IM(wa1[i]))*isign;
RE(ch[ah+2*l1*ido]) = MUL_R_C(RE(d3),RE(wa2[i])) - MUL_R_C(IM(d3),IM(wa2[i]))*isign;
IM(ch[ah+2*l1*ido]) = MUL_R_C(IM(d3),RE(wa2[i])) + MUL_R_C(RE(d3),IM(wa2[i]))*isign;
RE(ch[ah+3*l1*ido]) = MUL_R_C(RE(d4),RE(wa3[i])) - MUL_R_C(IM(d4),IM(wa3[i]))*isign;
IM(ch[ah+3*l1*ido]) = MUL_R_C(IM(d4),RE(wa3[i])) + MUL_R_C(RE(d4),IM(wa3[i]))*isign;
RE(ch[ah+4*l1*ido]) = MUL_R_C(RE(d5),RE(wa4[i])) - MUL_R_C(IM(d5),IM(wa4[i]))*isign;
IM(ch[ah+4*l1*ido]) = MUL_R_C(IM(d5),RE(wa4[i])) + MUL_R_C(RE(d5),IM(wa4[i]))*isign;
}
}
}
}
/*----------------------------------------------------------------------
cfftf1, cfftf, cfftb, cffti1, cffti. Complex FFTs.
----------------------------------------------------------------------*/
INLINE void cfftf1(uint16_t n, complex_t *c, complex_t *ch,
uint16_t *ifac, complex_t *wa, int8_t isign)
{
uint16_t i;
uint16_t k1, l1, l2;
uint16_t na, nf, ip, iw, ix2, ix3, ix4, ido, idl1;
nf = ifac[1];
na = 0;
l1 = 1;
iw = 0;
for (k1 = 2; k1 <= nf+1; k1++)
{
ip = ifac[k1];
l2 = ip*l1;
ido = n / l2;
idl1 = ido*l1;
switch (ip)
{
case 4:
ix2 = iw + ido;
ix3 = ix2 + ido;
if (na == 0)
passf4(ido, l1, c, ch, &wa[iw], &wa[ix2], &wa[ix3], isign);
else
passf4(ido, l1, ch, c, &wa[iw], &wa[ix2], &wa[ix3], isign);
na = 1 - na;
break;
case 2:
if (na == 0)
passf2(ido, l1, c, ch, &wa[iw], isign);
else
passf2(ido, l1, ch, c, &wa[iw], isign);
na = 1 - na;
break;
case 3:
ix2 = iw + ido;
if (na == 0)
passf3(ido, l1, c, ch, &wa[iw], &wa[ix2], isign);
else
passf3(ido, l1, ch, c, &wa[iw], &wa[ix2], isign);
na = 1 - na;
break;
case 5:
ix2 = iw + ido;
ix3 = ix2 + ido;
ix4 = ix3 + ido;
if (na == 0)
passf5(ido, l1, c, ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign);
else
passf5(ido, l1, ch, c, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign);
na = 1 - na;
break;
}
l1 = l2;
iw += (ip-1) * ido;
}
if (na == 0)
return;
for (i = 0; i < n; i++)
{
RE(c[i]) = RE(ch[i]);
IM(c[i]) = IM(ch[i]);
}
}
void cfftf(cfft_info *cfft, complex_t *c)
{
cfftf1(cfft->n, c, cfft->work, cfft->ifac, cfft->tab, -1);
}
void cfftb(cfft_info *cfft, complex_t *c)
{
cfftf1(cfft->n, c, cfft->work, cfft->ifac, cfft->tab, +1);
}
static void cffti1(uint16_t n, complex_t *wa, uint16_t *ifac)
{
static uint16_t ntryh[4] = {3, 4, 2, 5};
#ifndef FIXED_POINT
real_t arg, argh, argld, fi;
uint16_t ido, ipm;
uint16_t i1, k1, l1, l2;
uint16_t ld, ii, ip;
#endif
uint16_t ntry, i, j;
uint16_t ib;
uint16_t nf, nl, nq, nr;
nl = n;
nf = 0;
j = 0;
startloop:
j++;
if (j <= 4)
ntry = ntryh[j-1];
else
ntry += 2;
do
{
nq = nl / ntry;
nr = nl - ntry*nq;
if (nr != 0)
goto startloop;
nf++;
ifac[nf+1] = ntry;
nl = nq;
if (ntry == 2 && nf != 1)
{
for (i = 2; i <= nf; i++)
{
ib = nf - i + 2;
ifac[ib+1] = ifac[ib];
}
ifac[2] = 2;
}
} while (nl != 1);
ifac[0] = n;
ifac[1] = nf;
#ifndef FIXED_POINT
argh = (real_t)2.0*M_PI / (real_t)n;
i = 0;
l1 = 1;
for (k1 = 1; k1 <= nf; k1++)
{
ip = ifac[k1+1];
ld = 0;
l2 = l1*ip;
ido = n / l2;
ipm = ip - 1;
for (j = 0; j < ipm; j++)
{
i1 = i;
RE(wa[i]) = 1.0;
IM(wa[i]) = 0.0;
ld += l1;
fi = 0;
argld = ld*argh;
for (ii = 0; ii < ido; ii++)
{
i++;
fi++;
arg = fi * argld;
RE(wa[i]) = (real_t)cos(arg);
IM(wa[i]) = (real_t)sin(arg);
}
if (ip > 5)
{
RE(wa[i1]) = RE(wa[i]);
IM(wa[i1]) = IM(wa[i]);
}
}
l1 = l2;
}
#endif
}
cfft_info *cffti(uint16_t n)
{
cfft_info *cfft = (cfft_info*)malloc(sizeof(cfft_info));
cfft->n = n;
cfft->work = (complex_t*)malloc(n*sizeof(complex_t));
#ifndef FIXED_POINT
cfft->tab = (complex_t*)malloc(n*sizeof(complex_t));
cffti1(n, cfft->tab, cfft->ifac);
#else
cffti1(n, NULL, cfft->ifac);
switch (n)
{
case 64: cfft->tab = cfft_tab_64; break;
case 512: cfft->tab = cfft_tab_512; break;
#ifdef LD_DEC
case 256: cfft->tab = cfft_tab_256; break;
#endif
#ifdef ALLOW_SMALL_FRAMELENGTH
case 60: cfft->tab = cfft_tab_60; break;
case 480: cfft->tab = cfft_tab_480; break;
#ifdef LD_DEC
case 240: cfft->tab = cfft_tab_240; break;
#endif
#endif
}
#endif
return cfft;
}
void cfftu(cfft_info *cfft)
{
if (cfft->work) free(cfft->work);
#ifndef FIXED_POINT
if (cfft->tab) free(cfft->tab);
#endif
if (cfft) free(cfft);
}