ref: 922513e1151f9cfa68768e4d7d0d914d5b1248a2
parent: 178d2f4e52662b325a770a58dee562f7461e9ce0
author: menno <menno>
date: Tue Dec 23 13:41:42 EST 2003
cfft optimised with SSE output.c optimised (help specifically for ICL) some error handling changes for DRM
--- a/libfaad/cfft.c
+++ b/libfaad/cfft.c
@@ -22,7 +22,7 @@
** Commercial non-GPL licensing of this software is possible.
** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
**
-** $Id: cfft.c,v 1.21 2003/12/17 14:43:16 menno Exp $
+** $Id: cfft.c,v 1.22 2003/12/23 18:41:42 menno Exp $
**/
/*
@@ -47,8 +47,9 @@
passf2, passf3, passf4, passf5. Complex FFT passes fwd and bwd.
----------------------------------------------------------------------*/
-static void passf2(const uint16_t ido, const uint16_t l1, const complex_t *cc,
- complex_t *ch, const complex_t *wa, const int8_t isign)
+#ifdef USE_SSE
+static void passf2pos_sse(const uint16_t ido, const uint16_t l1, const complex_t *cc,
+ complex_t *ch, const complex_t *wa)
{
uint16_t i, k, ah, ac;
@@ -60,51 +61,146 @@
ac = 4*k;
RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]);
- RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]);
IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]);
+
+ RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]);
IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]);
}
} else {
- if (isign == 1)
+ for (k = 0; k < l1; k++)
{
- for (k = 0; k < l1; k++)
+ ah = k*ido;
+ ac = 2*k*ido;
+
+ for (i = 0; i < ido; i+=4)
{
- ah = k*ido;
- ac = 2*k*ido;
+ __m128 m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14;
+ __m128 m15, m16, m17, m18, m19, m20, m21, m22, m23, m24;
+ __m128 w1, w2, w3, w4;
- for (i = 0; i < ido; i++)
- {
- complex_t t2;
+ m1 = _mm_load_ps(&RE(cc[ac+i]));
+ m2 = _mm_load_ps(&RE(cc[ac+ido+i]));
+ m5 = _mm_load_ps(&RE(cc[ac+i+2]));
+ m6 = _mm_load_ps(&RE(cc[ac+ido+i+2]));
+ w1 = _mm_load_ps(&RE(wa[i]));
+ w3 = _mm_load_ps(&RE(wa[i+2]));
- RE(ch[ah+i]) = RE(cc[ac+i]) + RE(cc[ac+i+ido]);
- RE(t2) = RE(cc[ac+i]) - RE(cc[ac+i+ido]);
+ m3 = _mm_add_ps(m1, m2);
+ m15 = _mm_add_ps(m5, m6);
- IM(ch[ah+i]) = IM(cc[ac+i]) + IM(cc[ac+i+ido]);
- IM(t2) = IM(cc[ac+i]) - IM(cc[ac+i+ido]);
+ m4 = _mm_sub_ps(m1, m2);
+ m16 = _mm_sub_ps(m5, m6);
- ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]),
- IM(t2), RE(t2), RE(wa[i]), IM(wa[i]));
- }
+ _mm_store_ps(&RE(ch[ah+i]), m3);
+ _mm_store_ps(&RE(ch[ah+i+2]), m15);
+
+ w2 = _mm_shuffle_ps(w1, w1, _MM_SHUFFLE(2, 3, 0, 1));
+ w4 = _mm_shuffle_ps(w3, w3, _MM_SHUFFLE(2, 3, 0, 1));
+
+ m7 = _mm_mul_ps(m4, w1);
+ m17 = _mm_mul_ps(m16, w3);
+ m8 = _mm_mul_ps(m4, w2);
+ m18 = _mm_mul_ps(m16, w4);
+
+ m9 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(2, 0, 2, 0));
+ m19 = _mm_shuffle_ps(m17, m18, _MM_SHUFFLE(2, 0, 2, 0));
+ m10 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(3, 1, 3, 1));
+ m20 = _mm_shuffle_ps(m17, m18, _MM_SHUFFLE(3, 1, 3, 1));
+
+ m11 = _mm_add_ps(m9, m10);
+ m21 = _mm_add_ps(m19, m20);
+ m12 = _mm_sub_ps(m9, m10);
+ m22 = _mm_sub_ps(m19, m20);
+
+ m13 = _mm_shuffle_ps(m11, m11, _MM_SHUFFLE(0, 0, 3, 2));
+ m23 = _mm_shuffle_ps(m21, m21, _MM_SHUFFLE(0, 0, 3, 2));
+
+ m14 = _mm_unpacklo_ps(m12, m13);
+ m24 = _mm_unpacklo_ps(m22, m23);
+
+ _mm_store_ps(&RE(ch[ah+i+l1*ido]), m14);
+ _mm_store_ps(&RE(ch[ah+i+2+l1*ido]), m24);
}
- } else {
- for (k = 0; k < l1; k++)
- {
- ah = k*ido;
- ac = 2*k*ido;
+ }
+ }
+}
+#endif
- for (i = 0; i < ido; i++)
- {
- complex_t t2;
+static void passf2pos(const uint16_t ido, const uint16_t l1, const complex_t *cc,
+ complex_t *ch, const complex_t *wa)
+{
+ uint16_t i, k, ah, ac;
- RE(ch[ah+i]) = RE(cc[ac+i]) + RE(cc[ac+i+ido]);
- RE(t2) = RE(cc[ac+i]) - RE(cc[ac+i+ido]);
+ if (ido == 1)
+ {
+ for (k = 0; k < l1; k++)
+ {
+ ah = 2*k;
+ ac = 4*k;
- IM(ch[ah+i]) = IM(cc[ac+i]) + IM(cc[ac+i+ido]);
- IM(t2) = IM(cc[ac+i]) - IM(cc[ac+i+ido]);
+ RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]);
+ RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]);
+ IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]);
+ IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]);
+ }
+ } else {
+ for (k = 0; k < l1; k++)
+ {
+ ah = k*ido;
+ ac = 2*k*ido;
- ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]),
- RE(t2), IM(t2), RE(wa[i]), IM(wa[i]));
- }
+ for (i = 0; i < ido; i++)
+ {
+ complex_t t2;
+
+ RE(ch[ah+i]) = RE(cc[ac+i]) + RE(cc[ac+i+ido]);
+ RE(t2) = RE(cc[ac+i]) - RE(cc[ac+i+ido]);
+
+ IM(ch[ah+i]) = IM(cc[ac+i]) + IM(cc[ac+i+ido]);
+ IM(t2) = IM(cc[ac+i]) - IM(cc[ac+i+ido]);
+
+ ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]),
+ IM(t2), RE(t2), RE(wa[i]), IM(wa[i]));
+ }
+ }
+ }
+}
+
+static void passf2neg(const uint16_t ido, const uint16_t l1, const complex_t *cc,
+ complex_t *ch, const complex_t *wa)
+{
+ uint16_t i, k, ah, ac;
+
+ if (ido == 1)
+ {
+ for (k = 0; k < l1; k++)
+ {
+ ah = 2*k;
+ ac = 4*k;
+
+ RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]);
+ RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]);
+ IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]);
+ IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]);
+ }
+ } else {
+ for (k = 0; k < l1; k++)
+ {
+ ah = k*ido;
+ ac = 2*k*ido;
+
+ for (i = 0; i < ido; i++)
+ {
+ complex_t t2;
+
+ RE(ch[ah+i]) = RE(cc[ac+i]) + RE(cc[ac+i+ido]);
+ RE(t2) = RE(cc[ac+i]) - RE(cc[ac+i+ido]);
+
+ IM(ch[ah+i]) = IM(cc[ac+i]) + IM(cc[ac+i+ido]);
+ IM(t2) = IM(cc[ac+i]) - IM(cc[ac+i+ido]);
+
+ ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]),
+ RE(t2), IM(t2), RE(wa[i]), IM(wa[i]));
}
}
}
@@ -234,153 +330,315 @@
}
}
-static void passf4(const uint16_t ido, const uint16_t l1, const complex_t *cc,
- complex_t *ch, const complex_t *wa1, const complex_t *wa2,
- const complex_t *wa3, const int8_t isign)
+#ifdef USE_SSE
+static void passf4pos_sse(const uint16_t ido, const uint16_t l1, const complex_t *cc,
+ complex_t *ch, const complex_t *wa1, const complex_t *wa2,
+ const complex_t *wa3)
{
uint16_t i, k, ac, ah;
if (ido == 1)
{
- if (isign == 1)
+ for (k = 0; k < l1; k+=2)
{
- for (k = 0; k < l1; k++)
- {
- complex_t t1, t2, t3, t4;
+ __m128 m1, m2, m3, m4, m5, m6, m7, m8, m9, m10;
+ __m128 n1, n2, n3, n4, n5, n6, n7, n8, n9, n10;
+ __m128 neg1 = _mm_set_ps(-1.0, 1.0, 1.0, 1.0);
- ac = 4*k;
- ah = k;
+ m1 = _mm_load_ps(&RE(cc[4*k]));
+ m2 = _mm_load_ps(&RE(cc[4*k+2]));
+ n1 = _mm_load_ps(&RE(cc[4*k+4]));
+ n2 = _mm_load_ps(&RE(cc[4*k+6]));
- RE(t2) = RE(cc[ac]) + RE(cc[ac+2]);
- RE(t1) = RE(cc[ac]) - RE(cc[ac+2]);
- IM(t2) = IM(cc[ac]) + IM(cc[ac+2]);
- IM(t1) = IM(cc[ac]) - IM(cc[ac+2]);
- RE(t3) = RE(cc[ac+1]) + RE(cc[ac+3]);
- IM(t4) = RE(cc[ac+1]) - RE(cc[ac+3]);
- IM(t3) = IM(cc[ac+3]) + IM(cc[ac+1]);
- RE(t4) = IM(cc[ac+3]) - IM(cc[ac+1]);
+ m3 = _mm_add_ps(m1, m2);
- RE(ch[ah]) = RE(t2) + RE(t3);
- RE(ch[ah+2*l1]) = RE(t2) - RE(t3);
+ n4 = _mm_mul_ps(neg1, n1);
+ n5 = _mm_mul_ps(neg1, n2);
+ m4 = _mm_mul_ps(neg1, m1);
+ m5 = _mm_mul_ps(neg1, m2);
- IM(ch[ah]) = IM(t2) + IM(t3);
- IM(ch[ah+2*l1]) = IM(t2) - IM(t3);
+ n3 = _mm_add_ps(n1, n2);
+ m6 = _mm_sub_ps(m4, m5);
- RE(ch[ah+l1]) = RE(t1) + RE(t4);
- RE(ch[ah+3*l1]) = RE(t1) - RE(t4);
+ m7 = _mm_shuffle_ps(m3, n3, _MM_SHUFFLE(1, 0, 1, 0));
+ n6 = _mm_sub_ps(n4, n5);
+ m8 = _mm_shuffle_ps(m3, n3, _MM_SHUFFLE(3, 2, 3, 2));
- IM(ch[ah+l1]) = IM(t1) + IM(t4);
- IM(ch[ah+3*l1]) = IM(t1) - IM(t4);
- }
- } else {
- for (k = 0; k < l1; k++)
+ n7 = _mm_shuffle_ps(m6, n6, _MM_SHUFFLE(1, 0, 1, 0));
+ m9 = _mm_add_ps(m7, m8);
+ n8 = _mm_shuffle_ps(m6, n6, _MM_SHUFFLE(2, 3, 2, 3));
+
+ m10 = _mm_sub_ps(m7, m8);
+ n9 = _mm_add_ps(n7, n8);
+
+ _mm_store_ps(&RE(ch[k]), m9);
+ n10 = _mm_sub_ps(n7, n8);
+ _mm_store_ps(&RE(ch[k+l1]), n9);
+ _mm_store_ps(&RE(ch[k+2*l1]), m10);
+ _mm_store_ps(&RE(ch[k+3*l1]), n10);
+ }
+ } else {
+ for (k = 0; k < l1; k++)
+ {
+ ac = 4*k*ido;
+ ah = k*ido;
+
+ for (i = 0; i < ido; i+=2)
{
- complex_t t1, t2, t3, t4;
+ __m128 m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14, m15, m16;
+ __m128 n1, n2, n3, n4, n5, n6, n7, n8, n9, m17, m18, m19, m20, m21, m22, m23;
+ __m128 w1, w2, w3, w4, w5, w6, m24, m25, m26, m27, m28, m29, m30;
+ __m128 neg1 = _mm_set_ps(-1.0, 1.0, -1.0, 1.0);
- ac = 4*k;
- ah = k;
+ m1 = _mm_load_ps(&RE(cc[ac+i]));
+ m2 = _mm_load_ps(&RE(cc[ac+i+2*ido]));
+ m3 = _mm_add_ps(m1, m2);
+ m4 = _mm_sub_ps(m1, m2);
- RE(t2) = RE(cc[ac]) + RE(cc[ac+2]);
- RE(t1) = RE(cc[ac]) - RE(cc[ac+2]);
- IM(t2) = IM(cc[ac]) + IM(cc[ac+2]);
- IM(t1) = IM(cc[ac]) - IM(cc[ac+2]);
- RE(t3) = RE(cc[ac+1]) + RE(cc[ac+3]);
- IM(t4) = RE(cc[ac+1]) - RE(cc[ac+3]);
- IM(t3) = IM(cc[ac+3]) + IM(cc[ac+1]);
- RE(t4) = IM(cc[ac+3]) - IM(cc[ac+1]);
+ n1 = _mm_load_ps(&RE(cc[ac+i+ido]));
+ n2 = _mm_load_ps(&RE(cc[ac+i+3*ido]));
+ n3 = _mm_add_ps(n1, n2);
- RE(ch[ah]) = RE(t2) + RE(t3);
- RE(ch[ah+2*l1]) = RE(t2) - RE(t3);
+ n4 = _mm_mul_ps(neg1, n1);
+ n5 = _mm_mul_ps(neg1, n2);
+ n6 = _mm_sub_ps(n4, n5);
- IM(ch[ah]) = IM(t2) + IM(t3);
- IM(ch[ah+2*l1]) = IM(t2) - IM(t3);
+ m5 = _mm_add_ps(m3, n3);
- RE(ch[ah+l1]) = RE(t1) - RE(t4);
- RE(ch[ah+3*l1]) = RE(t1) + RE(t4);
+ n7 = _mm_shuffle_ps(n6, n6, _MM_SHUFFLE(2, 3, 0, 1));
+ n8 = _mm_add_ps(m4, n7);
- IM(ch[ah+l1]) = IM(t1) - IM(t4);
- IM(ch[ah+3*l1]) = IM(t1) + IM(t4);
+ m6 = _mm_sub_ps(m3, n3);
+ n9 = _mm_sub_ps(m4, n7);
+
+ _mm_store_ps(&RE(ch[ah+i]), m5);
+
+#if 0
+ static INLINE void ComplexMult(real_t *y1, real_t *y2,
+ real_t x1, real_t x2, real_t c1, real_t c2)
+ {
+ *y1 = MUL_F(x1, c1) + MUL_F(x2, c2);
+ *y2 = MUL_F(x2, c1) - MUL_F(x1, c2);
+ }
+
+ m7.0 = RE(c2)*RE(wa1[i])
+ m7.1 = IM(c2)*IM(wa1[i])
+ m7.2 = RE(c6)*RE(wa1[i+1])
+ m7.3 = IM(c6)*IM(wa1[i+1])
+
+ m8.0 = RE(c2)*IM(wa1[i])
+ m8.1 = IM(c2)*RE(wa1[i])
+ m8.2 = RE(c6)*IM(wa1[i+1])
+ m8.3 = IM(c6)*RE(wa1[i+1])
+
+ RE(0) = m7.0 - m7.1
+ IM(0) = m8.0 + m8.1
+ RE(1) = m7.2 - m7.3
+ IM(1) = m8.2 + m8.3
+
+////
+ RE(0) = RE(c2)*RE(wa1[i]) - IM(c2)*IM(wa1[i])
+ IM(0) = RE(c2)*IM(wa1[i]) + IM(c2)*RE(wa1[i])
+ RE(1) = RE(c6)*RE(wa1[i+1]) - IM(c6)*IM(wa1[i+1])
+ IM(1) = RE(c6)*IM(wa1[i+1]) + IM(c6)*RE(wa1[i+1])
+#endif
+
+ w1 = _mm_load_ps(&RE(wa1[i]));
+ w3 = _mm_load_ps(&RE(wa2[i]));
+ w5 = _mm_load_ps(&RE(wa3[i]));
+
+ w2 = _mm_shuffle_ps(w1, w1, _MM_SHUFFLE(2, 3, 0, 1));
+ w4 = _mm_shuffle_ps(w3, w3, _MM_SHUFFLE(2, 3, 0, 1));
+ w6 = _mm_shuffle_ps(w5, w5, _MM_SHUFFLE(2, 3, 0, 1));
+
+ m7 = _mm_mul_ps(n8, w1);
+ m15 = _mm_mul_ps(m6, w3);
+ m23 = _mm_mul_ps(n9, w5);
+ m8 = _mm_mul_ps(n8, w2);
+ m16 = _mm_mul_ps(m6, w4);
+ m24 = _mm_mul_ps(n9, w6);
+
+ m9 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(2, 0, 2, 0));
+ m17 = _mm_shuffle_ps(m15, m16, _MM_SHUFFLE(2, 0, 2, 0));
+ m25 = _mm_shuffle_ps(m23, m24, _MM_SHUFFLE(2, 0, 2, 0));
+ m10 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(3, 1, 3, 1));
+ m18 = _mm_shuffle_ps(m15, m16, _MM_SHUFFLE(3, 1, 3, 1));
+ m26 = _mm_shuffle_ps(m23, m24, _MM_SHUFFLE(3, 1, 3, 1));
+
+ m11 = _mm_add_ps(m9, m10);
+ m19 = _mm_add_ps(m17, m18);
+ m27 = _mm_add_ps(m25, m26);
+ m12 = _mm_sub_ps(m9, m10);
+ m20 = _mm_sub_ps(m17, m18);
+ m28 = _mm_sub_ps(m25, m26);
+
+ m13 = _mm_shuffle_ps(m11, m11, _MM_SHUFFLE(0, 0, 3, 2));
+ m21 = _mm_shuffle_ps(m19, m19, _MM_SHUFFLE(0, 0, 3, 2));
+ m29 = _mm_shuffle_ps(m27, m27, _MM_SHUFFLE(0, 0, 3, 2));
+ m14 = _mm_unpacklo_ps(m12, m13);
+ m22 = _mm_unpacklo_ps(m20, m21);
+ m30 = _mm_unpacklo_ps(m28, m29);
+
+ _mm_store_ps(&RE(ch[ah+i+l1*ido]), m14);
+ _mm_store_ps(&RE(ch[ah+i+2*l1*ido]), m22);
+ _mm_store_ps(&RE(ch[ah+i+3*l1*ido]), m30);
}
}
+ }
+}
+#endif
+
+static void passf4pos(const uint16_t ido, const uint16_t l1, const complex_t *cc,
+ complex_t *ch, const complex_t *wa1, const complex_t *wa2,
+ const complex_t *wa3)
+{
+ uint16_t i, k, ac, ah;
+
+ if (ido == 1)
+ {
+ for (k = 0; k < l1; k++)
+ {
+ complex_t t1, t2, t3, t4;
+
+ ac = 4*k;
+ ah = k;
+
+ RE(t2) = RE(cc[ac]) + RE(cc[ac+2]);
+ RE(t1) = RE(cc[ac]) - RE(cc[ac+2]);
+ IM(t2) = IM(cc[ac]) + IM(cc[ac+2]);
+ IM(t1) = IM(cc[ac]) - IM(cc[ac+2]);
+ RE(t3) = RE(cc[ac+1]) + RE(cc[ac+3]);
+ IM(t4) = RE(cc[ac+1]) - RE(cc[ac+3]);
+ IM(t3) = IM(cc[ac+3]) + IM(cc[ac+1]);
+ RE(t4) = IM(cc[ac+3]) - IM(cc[ac+1]);
+
+ RE(ch[ah]) = RE(t2) + RE(t3);
+ RE(ch[ah+2*l1]) = RE(t2) - RE(t3);
+
+ IM(ch[ah]) = IM(t2) + IM(t3);
+ IM(ch[ah+2*l1]) = IM(t2) - IM(t3);
+
+ RE(ch[ah+l1]) = RE(t1) + RE(t4);
+ RE(ch[ah+3*l1]) = RE(t1) - RE(t4);
+
+ IM(ch[ah+l1]) = IM(t1) + IM(t4);
+ IM(ch[ah+3*l1]) = IM(t1) - IM(t4);
+ }
} else {
- if (isign == 1)
+ for (k = 0; k < l1; k++)
{
- for (k = 0; k < l1; k++)
+ ac = 4*k*ido;
+ ah = k*ido;
+
+ for (i = 0; i < ido; i++)
{
- ac = 4*k*ido;
- ah = k*ido;
+ complex_t c2, c3, c4, t1, t2, t3, t4;
- for (i = 0; i < ido; i++)
- {
- complex_t c2, c3, c4, t1, t2, t3, t4;
+ RE(t2) = RE(cc[ac+i]) + RE(cc[ac+i+2*ido]);
+ RE(t1) = RE(cc[ac+i]) - RE(cc[ac+i+2*ido]);
+ IM(t2) = IM(cc[ac+i]) + IM(cc[ac+i+2*ido]);
+ IM(t1) = IM(cc[ac+i]) - IM(cc[ac+i+2*ido]);
+ RE(t3) = RE(cc[ac+i+ido]) + RE(cc[ac+i+3*ido]);
+ IM(t4) = RE(cc[ac+i+ido]) - RE(cc[ac+i+3*ido]);
+ IM(t3) = IM(cc[ac+i+3*ido]) + IM(cc[ac+i+ido]);
+ RE(t4) = IM(cc[ac+i+3*ido]) - IM(cc[ac+i+ido]);
- RE(t2) = RE(cc[ac+i]) + RE(cc[ac+i+2*ido]);
- RE(t1) = RE(cc[ac+i]) - RE(cc[ac+i+2*ido]);
- IM(t2) = IM(cc[ac+i]) + IM(cc[ac+i+2*ido]);
- IM(t1) = IM(cc[ac+i]) - IM(cc[ac+i+2*ido]);
- RE(t3) = RE(cc[ac+i+ido]) + RE(cc[ac+i+3*ido]);
- IM(t4) = RE(cc[ac+i+ido]) - RE(cc[ac+i+3*ido]);
- IM(t3) = IM(cc[ac+i+3*ido]) + IM(cc[ac+i+ido]);
- RE(t4) = IM(cc[ac+i+3*ido]) - IM(cc[ac+i+ido]);
+ RE(c2) = RE(t1) + RE(t4);
+ RE(c4) = RE(t1) - RE(t4);
- RE(c2) = RE(t1) + RE(t4);
- RE(c4) = RE(t1) - RE(t4);
+ IM(c2) = IM(t1) + IM(t4);
+ IM(c4) = IM(t1) - IM(t4);
- IM(c2) = IM(t1) + IM(t4);
- IM(c4) = IM(t1) - IM(t4);
+ RE(ch[ah+i]) = RE(t2) + RE(t3);
+ RE(c3) = RE(t2) - RE(t3);
- RE(ch[ah+i]) = RE(t2) + RE(t3);
- RE(c3) = RE(t2) - RE(t3);
+ IM(ch[ah+i]) = IM(t2) + IM(t3);
+ IM(c3) = IM(t2) - IM(t3);
- IM(ch[ah+i]) = IM(t2) + IM(t3);
- IM(c3) = IM(t2) - IM(t3);
-
- ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]),
- IM(c2), RE(c2), RE(wa1[i]), IM(wa1[i]));
- ComplexMult(&IM(ch[ah+i+2*l1*ido]), &RE(ch[ah+i+2*l1*ido]),
- IM(c3), RE(c3), RE(wa2[i]), IM(wa2[i]));
- ComplexMult(&IM(ch[ah+i+3*l1*ido]), &RE(ch[ah+i+3*l1*ido]),
- IM(c4), RE(c4), RE(wa3[i]), IM(wa3[i]));
- }
+ ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]),
+ IM(c2), RE(c2), RE(wa1[i]), IM(wa1[i]));
+ ComplexMult(&IM(ch[ah+i+2*l1*ido]), &RE(ch[ah+i+2*l1*ido]),
+ IM(c3), RE(c3), RE(wa2[i]), IM(wa2[i]));
+ ComplexMult(&IM(ch[ah+i+3*l1*ido]), &RE(ch[ah+i+3*l1*ido]),
+ IM(c4), RE(c4), RE(wa3[i]), IM(wa3[i]));
}
- } else {
- for (k = 0; k < l1; k++)
- {
- ac = 4*k*ido;
- ah = k*ido;
+ }
+ }
+}
- for (i = 0; i < ido; i++)
- {
- complex_t c2, c3, c4, t1, t2, t3, t4;
+static void passf4neg(const uint16_t ido, const uint16_t l1, const complex_t *cc,
+ complex_t *ch, const complex_t *wa1, const complex_t *wa2,
+ const complex_t *wa3)
+{
+ uint16_t i, k, ac, ah;
- RE(t2) = RE(cc[ac+i]) + RE(cc[ac+i+2*ido]);
- RE(t1) = RE(cc[ac+i]) - RE(cc[ac+i+2*ido]);
- IM(t2) = IM(cc[ac+i]) + IM(cc[ac+i+2*ido]);
- IM(t1) = IM(cc[ac+i]) - IM(cc[ac+i+2*ido]);
- RE(t3) = RE(cc[ac+i+ido]) + RE(cc[ac+i+3*ido]);
- IM(t4) = RE(cc[ac+i+ido]) - RE(cc[ac+i+3*ido]);
- IM(t3) = IM(cc[ac+i+3*ido]) + IM(cc[ac+i+ido]);
- RE(t4) = IM(cc[ac+i+3*ido]) - IM(cc[ac+i+ido]);
+ if (ido == 1)
+ {
+ for (k = 0; k < l1; k++)
+ {
+ complex_t t1, t2, t3, t4;
- RE(c2) = RE(t1) - RE(t4);
- RE(c4) = RE(t1) + RE(t4);
+ ac = 4*k;
+ ah = k;
- IM(c2) = IM(t1) - IM(t4);
- IM(c4) = IM(t1) + IM(t4);
+ RE(t2) = RE(cc[ac]) + RE(cc[ac+2]);
+ RE(t1) = RE(cc[ac]) - RE(cc[ac+2]);
+ IM(t2) = IM(cc[ac]) + IM(cc[ac+2]);
+ IM(t1) = IM(cc[ac]) - IM(cc[ac+2]);
+ RE(t3) = RE(cc[ac+1]) + RE(cc[ac+3]);
+ IM(t4) = RE(cc[ac+1]) - RE(cc[ac+3]);
+ IM(t3) = IM(cc[ac+3]) + IM(cc[ac+1]);
+ RE(t4) = IM(cc[ac+3]) - IM(cc[ac+1]);
- RE(ch[ah+i]) = RE(t2) + RE(t3);
- RE(c3) = RE(t2) - RE(t3);
+ RE(ch[ah]) = RE(t2) + RE(t3);
+ RE(ch[ah+2*l1]) = RE(t2) - RE(t3);
- IM(ch[ah+i]) = IM(t2) + IM(t3);
- IM(c3) = IM(t2) - IM(t3);
+ IM(ch[ah]) = IM(t2) + IM(t3);
+ IM(ch[ah+2*l1]) = IM(t2) - IM(t3);
- ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]),
- RE(c2), IM(c2), RE(wa1[i]), IM(wa1[i]));
- ComplexMult(&RE(ch[ah+i+2*l1*ido]), &IM(ch[ah+i+2*l1*ido]),
- RE(c3), IM(c3), RE(wa2[i]), IM(wa2[i]));
- ComplexMult(&RE(ch[ah+i+3*l1*ido]), &IM(ch[ah+i+3*l1*ido]),
- RE(c4), IM(c4), RE(wa3[i]), IM(wa3[i]));
- }
+ RE(ch[ah+l1]) = RE(t1) - RE(t4);
+ RE(ch[ah+3*l1]) = RE(t1) + RE(t4);
+
+ IM(ch[ah+l1]) = IM(t1) - IM(t4);
+ IM(ch[ah+3*l1]) = IM(t1) + IM(t4);
+ }
+ } else {
+ for (k = 0; k < l1; k++)
+ {
+ ac = 4*k*ido;
+ ah = k*ido;
+
+ for (i = 0; i < ido; i++)
+ {
+ complex_t c2, c3, c4, t1, t2, t3, t4;
+
+ RE(t2) = RE(cc[ac+i]) + RE(cc[ac+i+2*ido]);
+ RE(t1) = RE(cc[ac+i]) - RE(cc[ac+i+2*ido]);
+ IM(t2) = IM(cc[ac+i]) + IM(cc[ac+i+2*ido]);
+ IM(t1) = IM(cc[ac+i]) - IM(cc[ac+i+2*ido]);
+ RE(t3) = RE(cc[ac+i+ido]) + RE(cc[ac+i+3*ido]);
+ IM(t4) = RE(cc[ac+i+ido]) - RE(cc[ac+i+3*ido]);
+ IM(t3) = IM(cc[ac+i+3*ido]) + IM(cc[ac+i+ido]);
+ RE(t4) = IM(cc[ac+i+3*ido]) - IM(cc[ac+i+ido]);
+
+ RE(c2) = RE(t1) - RE(t4);
+ RE(c4) = RE(t1) + RE(t4);
+
+ IM(c2) = IM(t1) - IM(t4);
+ IM(c4) = IM(t1) + IM(t4);
+
+ RE(ch[ah+i]) = RE(t2) + RE(t3);
+ RE(c3) = RE(t2) - RE(t3);
+
+ IM(ch[ah+i]) = IM(t2) + IM(t3);
+ IM(c3) = IM(t2) - IM(t3);
+
+ ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]),
+ RE(c2), IM(c2), RE(wa1[i]), IM(wa1[i]));
+ ComplexMult(&RE(ch[ah+i+2*l1*ido]), &IM(ch[ah+i+2*l1*ido]),
+ RE(c3), IM(c3), RE(wa2[i]), IM(wa2[i]));
+ ComplexMult(&RE(ch[ah+i+3*l1*ido]), &IM(ch[ah+i+3*l1*ido]),
+ RE(c4), IM(c4), RE(wa3[i]), IM(wa3[i]));
}
}
}
@@ -584,8 +842,9 @@
cfftf1, cfftf, cfftb, cffti1, cffti. Complex FFTs.
----------------------------------------------------------------------*/
-INLINE void cfftf1(uint16_t n, complex_t *c, complex_t *ch,
- const uint16_t *ifac, const complex_t *wa, const int8_t isign)
+#ifdef USE_SSE
+INLINE void cfftf1pos_sse(uint16_t n, complex_t *c, complex_t *ch,
+ const uint16_t *ifac, const complex_t *wa, const int8_t isign)
{
uint16_t i;
uint16_t k1, l1, l2;
@@ -610,17 +869,17 @@
ix3 = ix2 + ido;
if (na == 0)
- passf4((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3], isign);
+ passf4pos_sse((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]);
else
- passf4((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3], isign);
+ passf4pos_sse((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]);
na = 1 - na;
break;
case 2:
if (na == 0)
- passf2((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], isign);
+ passf2pos_sse((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]);
else
- passf2((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], isign);
+ passf2pos_sse((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]);
na = 1 - na;
break;
@@ -661,16 +920,180 @@
IM(c[i]) = IM(ch[i]);
}
}
+#endif
+INLINE void cfftf1pos(uint16_t n, complex_t *c, complex_t *ch,
+ const uint16_t *ifac, const complex_t *wa, const int8_t isign)
+{
+ uint16_t i;
+ uint16_t k1, l1, l2;
+ uint16_t na, nf, ip, iw, ix2, ix3, ix4, ido, idl1;
+
+ nf = ifac[1];
+ na = 0;
+ l1 = 1;
+ iw = 0;
+
+ for (k1 = 2; k1 <= nf+1; k1++)
+ {
+ ip = ifac[k1];
+ l2 = ip*l1;
+ ido = n / l2;
+ idl1 = ido*l1;
+
+ switch (ip)
+ {
+ case 4:
+ ix2 = iw + ido;
+ ix3 = ix2 + ido;
+
+ if (na == 0)
+ passf4pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]);
+ else
+ passf4pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]);
+
+ na = 1 - na;
+ break;
+ case 2:
+ if (na == 0)
+ passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]);
+ else
+ passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]);
+
+ na = 1 - na;
+ break;
+ case 3:
+ ix2 = iw + ido;
+
+ if (na == 0)
+ passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], isign);
+ else
+ passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], isign);
+
+ na = 1 - na;
+ break;
+ case 5:
+ ix2 = iw + ido;
+ ix3 = ix2 + ido;
+ ix4 = ix3 + ido;
+
+ if (na == 0)
+ passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign);
+ else
+ passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign);
+
+ na = 1 - na;
+ break;
+ }
+
+ l1 = l2;
+ iw += (ip-1) * ido;
+ }
+
+ if (na == 0)
+ return;
+
+ for (i = 0; i < n; i++)
+ {
+ RE(c[i]) = RE(ch[i]);
+ IM(c[i]) = IM(ch[i]);
+ }
+}
+
+INLINE void cfftf1neg(uint16_t n, complex_t *c, complex_t *ch,
+ const uint16_t *ifac, const complex_t *wa, const int8_t isign)
+{
+ uint16_t i;
+ uint16_t k1, l1, l2;
+ uint16_t na, nf, ip, iw, ix2, ix3, ix4, ido, idl1;
+
+ nf = ifac[1];
+ na = 0;
+ l1 = 1;
+ iw = 0;
+
+ for (k1 = 2; k1 <= nf+1; k1++)
+ {
+ ip = ifac[k1];
+ l2 = ip*l1;
+ ido = n / l2;
+ idl1 = ido*l1;
+
+ switch (ip)
+ {
+ case 4:
+ ix2 = iw + ido;
+ ix3 = ix2 + ido;
+
+ if (na == 0)
+ passf4neg((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]);
+ else
+ passf4neg((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]);
+
+ na = 1 - na;
+ break;
+ case 2:
+ if (na == 0)
+ passf2neg((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]);
+ else
+ passf2neg((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]);
+
+ na = 1 - na;
+ break;
+ case 3:
+ ix2 = iw + ido;
+
+ if (na == 0)
+ passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], isign);
+ else
+ passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], isign);
+
+ na = 1 - na;
+ break;
+ case 5:
+ ix2 = iw + ido;
+ ix3 = ix2 + ido;
+ ix4 = ix3 + ido;
+
+ if (na == 0)
+ passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign);
+ else
+ passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign);
+
+ na = 1 - na;
+ break;
+ }
+
+ l1 = l2;
+ iw += (ip-1) * ido;
+ }
+
+ if (na == 0)
+ return;
+
+ for (i = 0; i < n; i++)
+ {
+ RE(c[i]) = RE(ch[i]);
+ IM(c[i]) = IM(ch[i]);
+ }
+}
+
void cfftf(cfft_info *cfft, complex_t *c)
{
- cfftf1(cfft->n, c, cfft->work, (const uint16_t*)cfft->ifac, (const complex_t*)cfft->tab, -1);
+ cfftf1neg(cfft->n, c, cfft->work, (const uint16_t*)cfft->ifac, (const complex_t*)cfft->tab, -1);
}
void cfftb(cfft_info *cfft, complex_t *c)
{
- cfftf1(cfft->n, c, cfft->work, (const uint16_t*)cfft->ifac, (const complex_t*)cfft->tab, +1);
+ cfftf1pos(cfft->n, c, cfft->work, (const uint16_t*)cfft->ifac, (const complex_t*)cfft->tab, +1);
}
+
+#ifdef USE_SSE
+void cfftb_sse(cfft_info *cfft, complex_t *c)
+{
+ cfftf1pos_sse(cfft->n, c, cfft->work, (const uint16_t*)cfft->ifac, (const complex_t*)cfft->tab, +1);
+}
+#endif
static void cffti1(uint16_t n, complex_t *wa, uint16_t *ifac)
{
--- a/libfaad/cfft.h
+++ b/libfaad/cfft.h
@@ -22,7 +22,7 @@
** Commercial non-GPL licensing of this software is possible.
** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
**
-** $Id: cfft.h,v 1.14 2003/12/17 14:43:16 menno Exp $
+** $Id: cfft.h,v 1.15 2003/12/23 18:41:42 menno Exp $
**/
#ifndef __CFFT_H__
@@ -47,13 +47,23 @@
void cfftu(cfft_info *cfft);
-static void passf2(const uint16_t ido, const uint16_t l1, const complex_t *cc,
- complex_t *ch, const complex_t *wa, const int8_t isign);
+#ifdef USE_SSE
+void cfftb_sse(cfft_info *cfft, complex_t *c);
+static void passf2pos_sse(const uint16_t ido, const uint16_t l1, const complex_t *cc,
+ complex_t *ch, const complex_t *wa);
+static void passf4pos_sse(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch,
+ const complex_t *wa1, const complex_t *wa2, const complex_t *wa3);
+#endif
+static void passf2pos(const uint16_t ido, const uint16_t l1, const complex_t *cc,
+ complex_t *ch, const complex_t *wa);
+static void passf2neg(const uint16_t ido, const uint16_t l1, const complex_t *cc,
+ complex_t *ch, const complex_t *wa);
static void passf3(const uint16_t ido, const uint16_t l1, const complex_t *cc,
complex_t *ch, const complex_t *wa1, const complex_t *wa2, const int8_t isign);
-static void passf4(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch,
- const complex_t *wa1, const complex_t *wa2, const complex_t *wa3,
- const int8_t isign);
+static void passf4pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch,
+ const complex_t *wa1, const complex_t *wa2, const complex_t *wa3);
+static void passf4neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch,
+ const complex_t *wa1, const complex_t *wa2, const complex_t *wa3);
static void passf5(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch,
const complex_t *wa1, const complex_t *wa2, const complex_t *wa3,
const complex_t *wa4, const int8_t isign);
--- a/libfaad/common.c
+++ b/libfaad/common.c
@@ -22,7 +22,7 @@
** Commercial non-GPL licensing of this software is possible.
** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
**
-** $Id: common.c,v 1.13 2003/12/17 14:43:16 menno Exp $
+** $Id: common.c,v 1.14 2003/12/23 18:41:42 menno Exp $
**/
/* just some common functions that could be used anywhere */
@@ -30,37 +30,60 @@
#include "common.h"
#include "structs.h"
-#include <malloc.h>
#include <stdlib.h>
#include "syntax.h"
#ifdef USE_SSE
-uint8_t cpu_has_sse()
+__declspec(naked) static int32_t __fastcall test_cpuid()
{
- uint32_t feature;
-
- __try
+ __asm
{
- __asm
- {
- xor eax, eax
- cpuid
- }
+ pushf
+ pop eax
+ mov ecx,eax
+ xor eax,(1<<21)
+ push eax
+ popf
+ pushf
+ pop eax
+ push ecx
+ popf
+ cmp eax,ecx
+ mov eax,0
+ setne al
+ ret
}
- __except (1)
- {
- return 0;
- }
+}
+__declspec(naked) static void __fastcall run_cpuid(int32_t param, int32_t out[4])
+{
__asm
{
- mov eax, 1
+ pushad
+ push edx
+ mov eax,ecx
cpuid
- mov feature, edx
+ pop edi
+ mov [edi+0],eax
+ mov [edi+4],ebx
+ mov [edi+8],ecx
+ mov [edi+12],edx
+ popad
+ ret
}
+}
+uint8_t cpu_has_sse()
+{
+ int32_t features[4];
+
+ if (test_cpuid())
+ {
+ run_cpuid(1, features);
+ }
+
/* check for SSE */
- if (feature & 0x02000000)
+ if (features[3] & 0x02000000)
return 1;
return 0;
--- a/libfaad/common.h
+++ b/libfaad/common.h
@@ -22,7 +22,7 @@
** Commercial non-GPL licensing of this software is possible.
** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
**
-** $Id: common.h,v 1.40 2003/12/17 16:37:34 menno Exp $
+** $Id: common.h,v 1.41 2003/12/23 18:41:42 menno Exp $
**/
#ifndef __COMMON_H__
@@ -117,7 +117,7 @@
# endif
#endif
-#if ((defined(_WIN32) && !defined(_WIN32_WCE)) || ((__GNUC__ >= 3) && defined(i386)))
+#if ((defined(_WIN32) && !defined(_WIN32_WCE)) /* || ((__GNUC__ >= 3) && defined(__i386__)) */ )
#ifndef FIXED_POINT
/* includes <xmmintrin.h> to enable SSE intrinsics */
#define USE_SSE
@@ -300,6 +300,19 @@
fld f
fistp i
}
+ return i;
+ }
+ #elif (defined(__i386__) && defined(__GNUC__))
+ #define HAS_LRINTF
+ // from http://www.stereopsis.com/FPU.html
+ static INLINE int lrintf(float f)
+ {
+ int i;
+ __asm__ __volatile__ (
+ "flds %1 \n\t"
+ "fistpl %0 \n\t"
+ : "=m" (i)
+ : "m" (f));
return i;
}
#endif
--- a/libfaad/error.c
+++ b/libfaad/error.c
@@ -22,7 +22,7 @@
** Commercial non-GPL licensing of this software is possible.
** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
**
-** $Id: error.c,v 1.17 2003/12/17 14:43:16 menno Exp $
+** $Id: error.c,v 1.18 2003/12/23 18:41:42 menno Exp $
**/
#include "common.h"
@@ -45,5 +45,7 @@
"Maximum number of bitstream elements exceeded",
"Input data buffer too small",
"Array index out of range",
- "Maximum number of scalefactor bands exceeded"
+ "Maximum number of scalefactor bands exceeded",
+ "Quantised value out of range",
+ "LTP lag out of range"
};
\ No newline at end of file
--- a/libfaad/error.h
+++ b/libfaad/error.h
@@ -22,7 +22,7 @@
** Commercial non-GPL licensing of this software is possible.
** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
**
-** $Id: error.h,v 1.12 2003/12/17 14:43:16 menno Exp $
+** $Id: error.h,v 1.13 2003/12/23 18:41:42 menno Exp $
**/
#ifndef __ERROR_H__
@@ -32,7 +32,7 @@
extern "C" {
#endif
-#define NUM_ERROR_MESSAGES 17
+#define NUM_ERROR_MESSAGES 19
extern int8_t *err_msg[];
#ifdef __cplusplus
--- a/libfaad/mdct.c
+++ b/libfaad/mdct.c
@@ -22,7 +22,7 @@
** Commercial non-GPL licensing of this software is possible.
** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
**
-** $Id: mdct.c,v 1.35 2003/12/17 14:43:16 menno Exp $
+** $Id: mdct.c,v 1.36 2003/12/23 18:41:42 menno Exp $
**/
/*
@@ -192,6 +192,7 @@
#ifdef PROFILE
mdct->cycles = 0;
+ mdct->fft_cycles = 0;
#endif
return mdct;
@@ -203,6 +204,7 @@
{
#ifdef PROFILE
printf("MDCT[%.4d]: %I64d cycles\n", mdct->N, mdct->cycles);
+ printf("CFFT[%.4d]: %I64d cycles\n", mdct->N/4, mdct->fft_cycles);
#endif
cfftu(mdct->cfft);
@@ -287,6 +289,7 @@
#ifdef PROFILE
count2 = faad_get_ts() - count2;
+ mdct->fft_cycles += count1;
mdct->cycles += (count2 - count1);
#endif
}
@@ -359,7 +362,7 @@
#endif
/* complex IFFT, any non-scaling FFT can be used here */
- cfftb(mdct->cfft, Z1);
+ cfftb_sse(mdct->cfft, Z1);
#ifdef PROFILE
count1 = faad_get_ts() - count1;
@@ -445,6 +448,7 @@
#ifdef PROFILE
count2 = faad_get_ts() - count2;
+ mdct->fft_cycles += count1;
mdct->cycles += (count2 - count1);
#endif
}
--- a/libfaad/output.c
+++ b/libfaad/output.c
@@ -22,7 +22,7 @@
** Commercial non-GPL licensing of this software is possible.
** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
**
-** $Id: output.c,v 1.32 2003/12/17 14:43:16 menno Exp $
+** $Id: output.c,v 1.33 2003/12/23 18:41:42 menno Exp $
**/
#include "common.h"
@@ -39,10 +39,11 @@
#define DM_MUL REAL_CONST(0.4142135623730950488) // 1/(1+sqrt(2))
#define RSQRT2 REAL_CONST(0.7071067811865475244) // 1/sqrt(2)
+
static INLINE real_t get_sample(real_t **input, uint8_t channel, uint16_t sample,
- uint8_t downMatrix, uint8_t *internal_channel)
+ uint8_t down_matrix, uint8_t *internal_channel)
{
- if (!downMatrix)
+ if (!down_matrix)
return input[internal_channel[channel]][sample];
if (channel == 0)
@@ -57,127 +58,309 @@
}
}
-void* output_to_PCM(faacDecHandle hDecoder,
- real_t **input, void *sample_buffer, uint8_t channels,
- uint16_t frame_len, uint8_t format)
-{
- uint8_t ch;
- uint16_t i, j = 0;
- uint8_t internal_channel;
+#ifndef HAS_LRINTF
+#define CLIP(sample, max, min) \
+if (sample >= 0.0f) \
+{ \
+ sample += 0.5f; \
+ if (sample >= max) \
+ sample = max; \
+} else { \
+ sample += -0.5f; \
+ if (sample <= min) \
+ sample = min; \
+}
+#else
+#define CLIP(sample, max, min) \
+if (sample >= 0.0f) \
+{ \
+ if (sample >= max) \
+ sample = max; \
+} else { \
+ if (sample <= min) \
+ sample = min; \
+}
+#endif
- int16_t *short_sample_buffer = (int16_t*)sample_buffer;
- int32_t *int_sample_buffer = (int32_t*)sample_buffer;
- float32_t *float_sample_buffer = (float32_t*)sample_buffer;
- double *double_sample_buffer = (double*)sample_buffer;
+#define CONV(a,b) ((a<<1)|(b&0x1))
-#ifdef PROFILE
- int64_t count = faad_get_ts();
-#endif
+static void to_PCM_16bit(faacDecHandle hDecoder, real_t **input,
+ uint8_t channels, uint16_t frame_len,
+ int16_t **sample_buffer)
+{
+ uint8_t ch, ch1;
+ uint16_t i;
- /* Copy output to a standard PCM buffer */
- for (ch = 0; ch < channels; ch++)
+ switch (CONV(channels,hDecoder->downMatrix))
{
- internal_channel = hDecoder->internal_channel[ch];
+ case CONV(1,0):
+ case CONV(1,1):
+ for(i = 0; i < frame_len; i++)
+ {
+ real_t inp = input[hDecoder->internal_channel[0]][i];
- switch (format)
+ CLIP(inp, 32767.0f, -32768.0f);
+
+ (*sample_buffer)[i] = (int16_t)lrintf(inp);
+ }
+ break;
+ case CONV(2,0):
+ ch = hDecoder->internal_channel[0];
+ ch1 = hDecoder->internal_channel[1];
+ for(i = 0; i < frame_len; i++)
{
- case FAAD_FMT_16BIT:
+ real_t inp0 = input[ch ][i];
+ real_t inp1 = input[ch1][i];
+
+ CLIP(inp0, 32767.0f, -32768.0f);
+ CLIP(inp1, 32767.0f, -32768.0f);
+
+ (*sample_buffer)[(i*2)+0] = (int16_t)lrintf(inp0);
+ (*sample_buffer)[(i*2)+1] = (int16_t)lrintf(inp1);
+ }
+ break;
+ default:
+ for (ch = 0; ch < channels; ch++)
+ {
for(i = 0; i < frame_len; i++)
{
real_t inp = get_sample(input, ch, i, hDecoder->downMatrix, hDecoder->internal_channel);
- if (inp >= 0.0f)
- {
-#ifndef HAS_LRINTF
- inp += 0.5f;
-#endif
- if (inp >= 32767.0f)
- {
- inp = 32767.0f;
- }
- } else {
-#ifndef HAS_LRINTF
- inp += -0.5f;
-#endif
- if (inp <= -32768.0f)
- {
- inp = -32768.0f;
- }
- }
- short_sample_buffer[(i*channels)+ch] = (int16_t)lrintf(inp);
- }
- break;
- case FAAD_FMT_24BIT:
+
+ CLIP(inp, 32767.0f, -32768.0f);
+
+ (*sample_buffer)[(i*channels)+ch] = (int16_t)lrintf(inp);
+ }
+ }
+ break;
+ }
+}
+
+static void to_PCM_24bit(faacDecHandle hDecoder, real_t **input,
+ uint8_t channels, uint16_t frame_len,
+ int32_t **sample_buffer)
+{
+ uint8_t ch, ch1;
+ uint16_t i;
+
+ switch (CONV(channels,hDecoder->downMatrix))
+ {
+ case CONV(1,0):
+ case CONV(1,1):
+ for(i = 0; i < frame_len; i++)
+ {
+ real_t inp = input[hDecoder->internal_channel[0]][i];
+
+ inp *= 256.0f;
+ CLIP(inp, 8388607.0f, -8388608.0f);
+
+ (*sample_buffer)[i] = (int32_t)lrintf(inp);
+ }
+ break;
+ case CONV(2,0):
+ ch = hDecoder->internal_channel[0];
+ ch1 = hDecoder->internal_channel[1];
+ for(i = 0; i < frame_len; i++)
+ {
+ real_t inp0 = input[ch ][i];
+ real_t inp1 = input[ch1][i];
+
+ inp0 *= 256.0f;
+ inp1 *= 256.0f;
+ CLIP(inp0, 8388607.0f, -8388608.0f);
+ CLIP(inp1, 8388607.0f, -8388608.0f);
+
+ (*sample_buffer)[(i*2)+0] = (int32_t)lrintf(inp0);
+ (*sample_buffer)[(i*2)+1] = (int32_t)lrintf(inp1);
+ }
+ break;
+ default:
+ for (ch = 0; ch < channels; ch++)
+ {
for(i = 0; i < frame_len; i++)
{
real_t inp = get_sample(input, ch, i, hDecoder->downMatrix, hDecoder->internal_channel);
+
inp *= 256.0f;
- if (inp >= 0.0f)
- {
-#ifndef HAS_LRINTF
- inp += 0.5f;
-#endif
- if (inp >= 8388607.0f)
- {
- inp = 8388607.0f;
- }
- } else {
-#ifndef HAS_LRINTF
- inp += -0.5f;
-#endif
- if (inp <= -8388608.0f)
- {
- inp = -8388608.0f;
- }
- }
- int_sample_buffer[(i*channels)+ch] = lrintf(inp);
- }
- break;
- case FAAD_FMT_32BIT:
+ CLIP(inp, 8388607.0f, -8388608.0f);
+
+ (*sample_buffer)[(i*channels)+ch] = (int32_t)lrintf(inp);
+ }
+ }
+ break;
+ }
+}
+
+static void to_PCM_32bit(faacDecHandle hDecoder, real_t **input,
+ uint8_t channels, uint16_t frame_len,
+ int32_t **sample_buffer)
+{
+ uint8_t ch, ch1;
+ uint16_t i;
+
+ switch (CONV(channels,hDecoder->downMatrix))
+ {
+ case CONV(1,0):
+ case CONV(1,1):
+ for(i = 0; i < frame_len; i++)
+ {
+ real_t inp = input[hDecoder->internal_channel[0]][i];
+
+ inp *= 65536.0f;
+ CLIP(inp, 2147483647.0f, -2147483648.0f);
+
+ (*sample_buffer)[i] = (int32_t)lrintf(inp);
+ }
+ break;
+ case CONV(2,0):
+ ch = hDecoder->internal_channel[0];
+ ch1 = hDecoder->internal_channel[1];
+ for(i = 0; i < frame_len; i++)
+ {
+ real_t inp0 = input[ch ][i];
+ real_t inp1 = input[ch1][i];
+
+ inp0 *= 65536.0f;
+ inp1 *= 65536.0f;
+ CLIP(inp0, 2147483647.0f, -2147483648.0f);
+ CLIP(inp1, 2147483647.0f, -2147483648.0f);
+
+ (*sample_buffer)[(i*2)+0] = (int32_t)lrintf(inp0);
+ (*sample_buffer)[(i*2)+1] = (int32_t)lrintf(inp1);
+ }
+ break;
+ default:
+ for (ch = 0; ch < channels; ch++)
+ {
for(i = 0; i < frame_len; i++)
{
real_t inp = get_sample(input, ch, i, hDecoder->downMatrix, hDecoder->internal_channel);
+
inp *= 65536.0f;
- if (inp >= 0.0f)
- {
-#ifndef HAS_LRINTF
- inp += 0.5f;
-#endif
- if (inp >= 2147483647.0f)
- {
- inp = 2147483647.0f;
- }
- } else {
-#ifndef HAS_LRINTF
- inp += -0.5f;
-#endif
- if (inp <= -2147483648.0f)
- {
- inp = -2147483648.0f;
- }
- }
- int_sample_buffer[(i*channels)+ch] = lrintf(inp);
- }
- break;
- case FAAD_FMT_FLOAT:
+ CLIP(inp, 2147483647.0f, -2147483648.0f);
+
+ (*sample_buffer)[(i*channels)+ch] = (int32_t)lrintf(inp);
+ }
+ }
+ break;
+ }
+}
+
+static void to_PCM_float(faacDecHandle hDecoder, real_t **input,
+ uint8_t channels, uint16_t frame_len,
+ float32_t **sample_buffer)
+{
+ uint8_t ch, ch1;
+ uint16_t i;
+
+ switch (CONV(channels,hDecoder->downMatrix))
+ {
+ case CONV(1,0):
+ case CONV(1,1):
+ for(i = 0; i < frame_len; i++)
+ {
+ real_t inp = input[hDecoder->internal_channel[0]][i];
+ (*sample_buffer)[i] = inp*FLOAT_SCALE;
+ }
+ break;
+ case CONV(2,0):
+ ch = hDecoder->internal_channel[0];
+ ch1 = hDecoder->internal_channel[1];
+ for(i = 0; i < frame_len; i++)
+ {
+ real_t inp0 = input[ch ][i];
+ real_t inp1 = input[ch1][i];
+ (*sample_buffer)[(i*2)+0] = inp0*FLOAT_SCALE;
+ (*sample_buffer)[(i*2)+1] = inp1*FLOAT_SCALE;
+ }
+ break;
+ default:
+ for (ch = 0; ch < channels; ch++)
+ {
for(i = 0; i < frame_len; i++)
{
- //real_t inp = input[internal_channel][i];
real_t inp = get_sample(input, ch, i, hDecoder->downMatrix, hDecoder->internal_channel);
- float_sample_buffer[(i*channels)+ch] = inp*FLOAT_SCALE;
+ (*sample_buffer)[(i*channels)+ch] = inp*FLOAT_SCALE;
}
- break;
- case FAAD_FMT_DOUBLE:
+ }
+ break;
+ }
+}
+
+static void to_PCM_double(faacDecHandle hDecoder, real_t **input,
+ uint8_t channels, uint16_t frame_len,
+ double **sample_buffer)
+{
+ uint8_t ch, ch1;
+ uint16_t i;
+
+ switch (CONV(channels,hDecoder->downMatrix))
+ {
+ case CONV(1,0):
+ case CONV(1,1):
+ for(i = 0; i < frame_len; i++)
+ {
+ real_t inp = input[hDecoder->internal_channel[0]][i];
+ (*sample_buffer)[i] = (double)inp*FLOAT_SCALE;
+ }
+ break;
+ case CONV(2,0):
+ ch = hDecoder->internal_channel[0];
+ ch1 = hDecoder->internal_channel[1];
+ for(i = 0; i < frame_len; i++)
+ {
+ real_t inp0 = input[ch ][i];
+ real_t inp1 = input[ch1][i];
+ (*sample_buffer)[(i*2)+0] = (double)inp0*FLOAT_SCALE;
+ (*sample_buffer)[(i*2)+1] = (double)inp1*FLOAT_SCALE;
+ }
+ break;
+ default:
+ for (ch = 0; ch < channels; ch++)
+ {
for(i = 0; i < frame_len; i++)
{
- //real_t inp = input[internal_channel][i];
real_t inp = get_sample(input, ch, i, hDecoder->downMatrix, hDecoder->internal_channel);
- double_sample_buffer[(i*channels)+ch] = (double)inp*FLOAT_SCALE;
+ (*sample_buffer)[(i*channels)+ch] = (double)inp*FLOAT_SCALE;
}
- break;
}
+ break;
}
+}
+void *output_to_PCM(faacDecHandle hDecoder,
+ real_t **input, void *sample_buffer, uint8_t channels,
+ uint16_t frame_len, uint8_t format)
+{
+ int16_t *short_sample_buffer = (int16_t*)sample_buffer;
+ int32_t *int_sample_buffer = (int32_t*)sample_buffer;
+ float32_t *float_sample_buffer = (float32_t*)sample_buffer;
+ double *double_sample_buffer = (double*)sample_buffer;
+
#ifdef PROFILE
+ int64_t count = faad_get_ts();
+#endif
+
+ /* Copy output to a standard PCM buffer */
+ switch (format)
+ {
+ case FAAD_FMT_16BIT:
+ to_PCM_16bit(hDecoder, input, channels, frame_len, &short_sample_buffer);
+ break;
+ case FAAD_FMT_24BIT:
+ to_PCM_24bit(hDecoder, input, channels, frame_len, &int_sample_buffer);
+ break;
+ case FAAD_FMT_32BIT:
+ to_PCM_32bit(hDecoder, input, channels, frame_len, &int_sample_buffer);
+ break;
+ case FAAD_FMT_FLOAT:
+ to_PCM_float(hDecoder, input, channels, frame_len, &float_sample_buffer);
+ break;
+ case FAAD_FMT_DOUBLE:
+ to_PCM_double(hDecoder, input, channels, frame_len, &double_sample_buffer);
+ break;
+ }
+
+#ifdef PROFILE
count = faad_get_ts() - count;
hDecoder->output_cycles += count;
#endif
@@ -208,13 +391,13 @@
if (tmp >= 0)
{
tmp += (1 << (REAL_BITS-1));
- if (tmp >= REAL_CONST(32768))
+ if (tmp >= REAL_CONST(32767))
{
tmp = REAL_CONST(32767);
}
} else {
tmp += -(1 << (REAL_BITS-1));
- if (tmp <= REAL_CONST(-32769))
+ if (tmp <= REAL_CONST(-32768))
{
tmp = REAL_CONST(-32768);
}
@@ -231,7 +414,7 @@
{
tmp += (1 << (REAL_BITS-9));
tmp >>= (REAL_BITS-8);
- if (tmp >= 8388608)
+ if (tmp >= 8388607)
{
tmp = 8388607;
}
@@ -238,7 +421,7 @@
} else {
tmp += -(1 << (REAL_BITS-9));
tmp >>= (REAL_BITS-8);
- if (tmp <= -8388609)
+ if (tmp <= -8388608)
{
tmp = -8388608;
}
--- a/libfaad/specrec.c
+++ b/libfaad/specrec.c
@@ -22,7 +22,7 @@
** Commercial non-GPL licensing of this software is possible.
** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
**
-** $Id: specrec.c,v 1.35 2003/12/17 16:37:34 menno Exp $
+** $Id: specrec.c,v 1.36 2003/12/23 18:41:42 menno Exp $
**/
/*
@@ -461,7 +461,7 @@
memcpy(spec_data, tmp_spec, frame_len*sizeof(real_t));
}
-static INLINE real_t iquant(int16_t q, const real_t *tab)
+static INLINE real_t iquant(int16_t q, const real_t *tab, uint8_t *error)
{
#ifdef FIXED_POINT
static const real_t errcorr[] = {
@@ -488,35 +488,41 @@
#else
if (q < 0)
{
- if (-q >= IQ_TABLE_SIZE)
- return 0;
-
/* tab contains a value for all possible q [0,8192] */
- return -tab[-q];
- }
+ if (-q < IQ_TABLE_SIZE)
+ return -tab[-q];
- if (q >= IQ_TABLE_SIZE)
+ *error = 17;
return 0;
+ } else {
+ /* tab contains a value for all possible q [0,8192] */
+ if (q < IQ_TABLE_SIZE)
+ return tab[q];
- /* tab contains a value for all possible q [0,8192] */
- return tab[q];
+ *error = 17;
+ return 0;
+ }
#endif
}
-static void inverse_quantization(real_t *x_invquant, const int16_t *x_quant, const uint16_t frame_len)
+static uint8_t inverse_quantization(real_t *x_invquant, const int16_t *x_quant, const uint16_t frame_len)
{
int16_t i;
+ uint8_t error = 0; /* Init error flag */
const real_t *tab = iq_table;
for(i = 0; i < frame_len; i+=4)
{
- x_invquant[i] = iquant(x_quant[i], tab);
- x_invquant[i+1] = iquant(x_quant[i+1], tab);
- x_invquant[i+2] = iquant(x_quant[i+2], tab);
- x_invquant[i+3] = iquant(x_quant[i+3], tab);
+ x_invquant[i] = iquant(x_quant[i], tab, &error);
+ x_invquant[i+1] = iquant(x_quant[i+1], tab, &error);
+ x_invquant[i+2] = iquant(x_quant[i+2], tab, &error);
+ x_invquant[i+3] = iquant(x_quant[i+3], tab, &error);
}
+
+ return error;
}
+#ifndef FIXED_POINT
ALIGN static const real_t pow2sf_tab[] = {
2.9802322387695313E-008, 5.9604644775390625E-008, 1.1920928955078125E-007,
2.384185791015625E-007, 4.76837158203125E-007, 9.5367431640625E-007,
@@ -540,12 +546,15 @@
8589934592, 17179869184, 34359738368,
68719476736, 137438953472, 274877906944
};
+#endif
ALIGN static real_t pow2_table[] =
{
+#if 0
COEF_CONST(0.59460355750136053335874998528024), /* 2^-0.75 */
COEF_CONST(0.70710678118654752440084436210485), /* 2^-0.5 */
COEF_CONST(0.84089641525371454303112547623321), /* 2^-0.25 */
+#endif
COEF_CONST(1.0),
COEF_CONST(1.1892071150027210667174999705605), /* 2^0.25 */
COEF_CONST(1.4142135623730950488016887242097), /* 2^0.5 */
@@ -573,10 +582,11 @@
{
top = ics->sect_sfb_offset[g][sfb+1];
- exp = (ics->scale_factors[g][sfb] - 100) >> 2;
- frac = (ics->scale_factors[g][sfb] - 100) & 3;
+ exp = (ics->scale_factors[g][sfb] /* - 100 */) >> 2;
+ frac = (ics->scale_factors[g][sfb] /* - 100 */) & 3;
#ifdef FIXED_POINT
+ exp -= 25;
/* IMDCT pre-scaling */
if (hDecoder->object_type == LD)
{
@@ -606,16 +616,16 @@
x_invquant[k+(groups*nshort)+3] <<= exp;
}
#else
- x_invquant[k+(groups*nshort)] = x_invquant[k+(groups*nshort)] * pow2sf_tab[exp+25];
- x_invquant[k+(groups*nshort)+1] = x_invquant[k+(groups*nshort)+1] * pow2sf_tab[exp+25];
- x_invquant[k+(groups*nshort)+2] = x_invquant[k+(groups*nshort)+2] * pow2sf_tab[exp+25];
- x_invquant[k+(groups*nshort)+3] = x_invquant[k+(groups*nshort)+3] * pow2sf_tab[exp+25];
+ x_invquant[k+(groups*nshort)] = x_invquant[k+(groups*nshort)] * pow2sf_tab[exp/*+25*/];
+ x_invquant[k+(groups*nshort)+1] = x_invquant[k+(groups*nshort)+1] * pow2sf_tab[exp/*+25*/];
+ x_invquant[k+(groups*nshort)+2] = x_invquant[k+(groups*nshort)+2] * pow2sf_tab[exp/*+25*/];
+ x_invquant[k+(groups*nshort)+3] = x_invquant[k+(groups*nshort)+3] * pow2sf_tab[exp/*+25*/];
#endif
- x_invquant[k+(groups*nshort)] = MUL_C(x_invquant[k+(groups*nshort)],pow2_table[frac + 3]);
- x_invquant[k+(groups*nshort)+1] = MUL_C(x_invquant[k+(groups*nshort)+1],pow2_table[frac + 3]);
- x_invquant[k+(groups*nshort)+2] = MUL_C(x_invquant[k+(groups*nshort)+2],pow2_table[frac + 3]);
- x_invquant[k+(groups*nshort)+3] = MUL_C(x_invquant[k+(groups*nshort)+3],pow2_table[frac + 3]);
+ x_invquant[k+(groups*nshort)] = MUL_C(x_invquant[k+(groups*nshort)],pow2_table[frac /* + 3*/]);
+ x_invquant[k+(groups*nshort)+1] = MUL_C(x_invquant[k+(groups*nshort)+1],pow2_table[frac /* + 3*/]);
+ x_invquant[k+(groups*nshort)+2] = MUL_C(x_invquant[k+(groups*nshort)+2],pow2_table[frac /* + 3*/]);
+ x_invquant[k+(groups*nshort)+3] = MUL_C(x_invquant[k+(groups*nshort)+3],pow2_table[frac /* + 3*/]);
}
}
groups += ics->window_group_length[g];
@@ -644,16 +654,16 @@
{
top = ics->sect_sfb_offset[g][sfb+1];
- exp = (ics->scale_factors[g][sfb] - 100) >> 2;
- frac = (ics->scale_factors[g][sfb] - 100) & 3;
+ exp = (ics->scale_factors[g][sfb] /* - 100 */) >> 2;
+ frac = (ics->scale_factors[g][sfb] /* - 100 */) & 3;
/* minimum size of a sf band is 4 and always a multiple of 4 */
for ( ; k < top; k += 4)
{
__m128 m1 = _mm_load_ps(&x_invquant[k+(groups*nshort)]);
- __m128 m2 = _mm_load_ps1(&pow2sf_tab[exp+25]);
+ __m128 m2 = _mm_load_ps1(&pow2sf_tab[exp /*+25*/]);
+ __m128 m3 = _mm_load_ps1(&pow2_table[frac /* + 3*/]);
__m128 m4 = _mm_mul_ps(m1, m2);
- __m128 m3 = _mm_load_ps1(&pow2_table[frac + 3]);
__m128 m5 = _mm_mul_ps(m3, m4);
_mm_store_ps(&x_invquant[k+(groups*nshort)], m5);
}
@@ -663,9 +673,10 @@
}
#endif
-void reconstruct_single_channel(faacDecHandle hDecoder, ic_stream *ics,
- element *sce, int16_t *spec_data)
+uint8_t reconstruct_single_channel(faacDecHandle hDecoder, ic_stream *ics,
+ element *sce, int16_t *spec_data)
{
+ uint8_t retval;
ALIGN real_t spec_coef[1024];
#ifdef PROFILE
@@ -673,7 +684,9 @@
#endif
/* inverse quantization */
- inverse_quantization(spec_coef, spec_data, hDecoder->frameLength);
+ retval = inverse_quantization(spec_coef, spec_data, hDecoder->frameLength);
+ if (retval > 0)
+ return retval;
/* apply scalefactors */
#ifndef USE_SSE
@@ -682,16 +695,16 @@
hDecoder->apply_sf_func(hDecoder, ics, spec_coef, hDecoder->frameLength);
#endif
+ /* deinterleave short block grouping */
+ if (ics->window_sequence == EIGHT_SHORT_SEQUENCE)
+ quant_to_spec(ics, spec_coef, hDecoder->frameLength);
+
#ifdef PROFILE
count = faad_get_ts() - count;
hDecoder->requant_cycles += count;
#endif
- /* deinterleave short block grouping */
- if (ics->window_sequence == EIGHT_SHORT_SEQUENCE)
- quant_to_spec(ics, spec_coef, hDecoder->frameLength);
-
/* pns decoding */
pns_decode(ics, NULL, spec_coef, NULL, hDecoder->frameLength, 0, hDecoder->object_type);
@@ -810,11 +823,14 @@
hDecoder->time_out[sce->channel]+hDecoder->frameLength, hDecoder->frameLength, hDecoder->object_type);
}
#endif
+
+ return 0;
}
-void reconstruct_channel_pair(faacDecHandle hDecoder, ic_stream *ics1, ic_stream *ics2,
- element *cpe, int16_t *spec_data1, int16_t *spec_data2)
+uint8_t reconstruct_channel_pair(faacDecHandle hDecoder, ic_stream *ics1, ic_stream *ics2,
+ element *cpe, int16_t *spec_data1, int16_t *spec_data2)
{
+ uint8_t retval;
ALIGN real_t spec_coef1[1024];
ALIGN real_t spec_coef2[1024];
@@ -823,9 +839,14 @@
#endif
/* inverse quantization */
- inverse_quantization(spec_coef1, spec_data1, hDecoder->frameLength);
- inverse_quantization(spec_coef2, spec_data2, hDecoder->frameLength);
+ retval = inverse_quantization(spec_coef1, spec_data1, hDecoder->frameLength);
+ if (retval > 0)
+ return retval;
+ retval = inverse_quantization(spec_coef2, spec_data2, hDecoder->frameLength);
+ if (retval > 0)
+ return retval;
+
/* apply scalefactors */
#ifndef USE_SSE
apply_scalefactors(hDecoder, ics1, spec_coef1, hDecoder->frameLength);
@@ -835,11 +856,6 @@
hDecoder->apply_sf_func(hDecoder, ics2, spec_coef2, hDecoder->frameLength);
#endif
-#ifdef PROFILE
- count = faad_get_ts() - count;
- hDecoder->requant_cycles += count;
-#endif
-
/* deinterleave short block grouping */
if (ics1->window_sequence == EIGHT_SHORT_SEQUENCE)
quant_to_spec(ics1, spec_coef1, hDecoder->frameLength);
@@ -846,7 +862,12 @@
if (ics2->window_sequence == EIGHT_SHORT_SEQUENCE)
quant_to_spec(ics2, spec_coef2, hDecoder->frameLength);
+#ifdef PROFILE
+ count = faad_get_ts() - count;
+ hDecoder->requant_cycles += count;
+#endif
+
/* pns decoding */
if (ics1->ms_mask_present)
{
@@ -1036,4 +1057,6 @@
hDecoder->object_type);
}
#endif
+
+ return 0;
}
--- a/libfaad/specrec.h
+++ b/libfaad/specrec.h
@@ -22,7 +22,7 @@
** Commercial non-GPL licensing of this software is possible.
** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
**
-** $Id: specrec.h,v 1.21 2003/12/17 14:43:16 menno Exp $
+** $Id: specrec.h,v 1.22 2003/12/23 18:41:42 menno Exp $
**/
#ifndef __SPECREC_H__
@@ -36,7 +36,7 @@
uint8_t window_grouping_info(faacDecHandle hDecoder, ic_stream *ics);
static void quant_to_spec(ic_stream *ics, real_t *spec_data, uint16_t frame_len);
-static void inverse_quantization(real_t *x_invquant, const int16_t *x_quant, const uint16_t frame_len);
+static uint8_t inverse_quantization(real_t *x_invquant, const int16_t *x_quant, const uint16_t frame_len);
void apply_scalefactors(faacDecHandle hDecoder, ic_stream *ics, real_t *x_invquant,
uint16_t frame_len);
#ifdef USE_SSE
@@ -43,9 +43,9 @@
void apply_scalefactors_sse(faacDecHandle hDecoder, ic_stream *ics, real_t *x_invquant,
uint16_t frame_len);
#endif
-void reconstruct_channel_pair(faacDecHandle hDecoder, ic_stream *ics1, ic_stream *ics2,
- element *cpe, int16_t *spec_data1, int16_t *spec_data2);
-void reconstruct_single_channel(faacDecHandle hDecoder, ic_stream *ics, element *sce,
+uint8_t reconstruct_channel_pair(faacDecHandle hDecoder, ic_stream *ics1, ic_stream *ics2,
+ element *cpe, int16_t *spec_data1, int16_t *spec_data2);
+uint8_t reconstruct_single_channel(faacDecHandle hDecoder, ic_stream *ics, element *sce,
int16_t *spec_data);
#ifdef __cplusplus
--- a/libfaad/syntax.c
+++ b/libfaad/syntax.c
@@ -22,7 +22,7 @@
** Commercial non-GPL licensing of this software is possible.
** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
**
-** $Id: syntax.c,v 1.62 2003/12/17 14:43:16 menno Exp $
+** $Id: syntax.c,v 1.63 2003/12/23 18:41:42 menno Exp $
**/
/*
@@ -530,7 +530,9 @@
return retval;
/* noiseless coding is done, spectral reconstruction is done now */
- reconstruct_single_channel(hDecoder, ics, &sce, spec_data);
+ retval = reconstruct_single_channel(hDecoder, ics, &sce, spec_data);
+ if (retval > 0)
+ return retval;
return 0;
}
@@ -581,7 +583,10 @@
if ((ics1->ltp.data_present = faad_get1bit(ld
DEBUGVAR(1,50,"channel_pair_element(): ltp.data_present"))) & 1)
{
- ltp_data(hDecoder, ics1, &(ics1->ltp), ld);
+ if ((result = ltp_data(hDecoder, ics1, &(ics1->ltp), ld)) > 0)
+ {
+ return result;
+ }
}
}
#endif
@@ -604,7 +609,10 @@
if ((ics1->ltp2.data_present = faad_get1bit(ld
DEBUGVAR(1,50,"channel_pair_element(): ltp.data_present"))) & 1)
{
- ltp_data(hDecoder, ics1, &(ics1->ltp2), ld);
+ if ((result = ltp_data(hDecoder, ics1, &(ics1->ltp2), ld)) > 0)
+ {
+ return result;
+ }
}
}
#endif
@@ -616,7 +624,11 @@
}
/* noiseless coding is done, spectral reconstruction is done now */
- reconstruct_channel_pair(hDecoder, ics1, ics2, &cpe, spec_data1, spec_data2);
+ if ((result = reconstruct_channel_pair(hDecoder, ics1, ics2, &cpe,
+ spec_data1, spec_data2)) > 0)
+ {
+ return result;
+ }
return 0;
}
@@ -685,7 +697,10 @@
if ((ics->ltp.data_present = faad_get1bit(ld
DEBUGVAR(1,50,"ics_info(): ltp.data_present"))) & 1)
{
- ltp_data(hDecoder, ics, &(ics->ltp), ld);
+ if ((retval = ltp_data(hDecoder, ics, &(ics->ltp), ld)) > 0)
+ {
+ return retval;
+ }
}
if (common_window)
{
@@ -692,7 +707,10 @@
if ((ics->ltp2.data_present = faad_get1bit(ld
DEBUGVAR(1,51,"ics_info(): ltp2.data_present"))) & 1)
{
- ltp_data(hDecoder, ics, &(ics->ltp2), ld);
+ if ((retval = ltp_data(hDecoder, ics, &(ics->ltp2), ld)) > 0)
+ {
+ return retval;
+ }
}
}
}
@@ -1064,9 +1082,13 @@
if (this_layer_stereo)
{
- reconstruct_channel_pair(hDecoder, ics1, ics2, &cpe, spec_data1, spec_data2);
+ hInfo->error = reconstruct_channel_pair(hDecoder, ics1, ics2, &cpe, spec_data1, spec_data2);
+ if (hInfo->error > 0)
+ return;
} else {
- reconstruct_single_channel(hDecoder, ics1, &cpe, spec_data1);
+ hInfo->error = reconstruct_single_channel(hDecoder, ics1, &cpe, spec_data1);
+ if (hInfo->error > 0)
+ return;
}
hDecoder->element_id[hDecoder->fr_ch_ele] = cpe.ele_id;
@@ -1173,7 +1195,10 @@
if ((ics->ltp.data_present = faad_get1bit(ld
DEBUGVAR(1,310,"aac_scalable_main_header(): ltp.data_present"))) & 1)
{
- ltp_data(hDecoder, ics, &(ics->ltp), ld);
+ if ((retval = ltp_data(hDecoder, ics, &(ics->ltp), ld)) > 0)
+ {
+ return retval;
+ }
}
#if 0
}
@@ -1594,7 +1619,7 @@
#ifdef LTP_DEC
/* Table 4.4.28 */
-static void ltp_data(faacDecHandle hDecoder, ic_stream *ics, ltp_info *ltp, bitfile *ld)
+static uint8_t ltp_data(faacDecHandle hDecoder, ic_stream *ics, ltp_info *ltp, bitfile *ld)
{
uint8_t sfb, w;
@@ -1621,7 +1646,7 @@
/* Check length of lag */
if (ltp->lag > (hDecoder->frameLength << 1))
- ltp->lag = 0; // FIXME: Error handling
+ return 18;
ltp->coef = (uint8_t)faad_getbits(ld, 3
DEBUGVAR(1,82,"ltp_data(): coef"));
@@ -1651,6 +1676,8 @@
DEBUGVAR(1,86,"ltp_data(): long_used"));
}
}
+
+ return 0;
}
#endif
--- a/libfaad/syntax.h
+++ b/libfaad/syntax.h
@@ -22,7 +22,7 @@
** Commercial non-GPL licensing of this software is possible.
** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
**
-** $Id: syntax.h,v 1.43 2003/12/17 14:43:17 menno Exp $
+** $Id: syntax.h,v 1.44 2003/12/23 18:41:42 menno Exp $
**/
#ifndef __SYNTAX_H__
@@ -149,7 +149,7 @@
#endif
static uint8_t pulse_data(ic_stream *ics, pulse_info *pul, bitfile *ld);
static void tns_data(ic_stream *ics, tns_info *tns, bitfile *ld);
-static void ltp_data(faacDecHandle hDecoder, ic_stream *ics, ltp_info *ltp, bitfile *ld);
+static uint8_t ltp_data(faacDecHandle hDecoder, ic_stream *ics, ltp_info *ltp, bitfile *ld);
static uint8_t adts_fixed_header(adts_header *adts, bitfile *ld);
static void adts_variable_header(adts_header *adts, bitfile *ld);
static void adts_error_check(adts_header *adts, bitfile *ld);