shithub: aacdec

Download patch

ref: 877579bfaf220ce9d28f5254b4893ed136a03b3a
parent: d8578de53c9a0af9bf3cc9d2a008cc0a66eb9b1d
author: menno <menno>
date: Sat Mar 27 06:14:49 EST 2004

Using double ringbuffers where possible
mp4ff faster tag reading
changed structures to work better on 64 bit platforms
made copyright notice smaller

--- a/common/mp4ff/drms.c
+++ b/common/mp4ff/drms.c
@@ -2,7 +2,7 @@
  * drms.c : DRMS
  *****************************************************************************
  * Copyright (C) 2004 VideoLAN
- * $Id: drms.c,v 1.3 2004/01/11 15:52:18 menno Exp $
+ * $Id: drms.c,v 1.4 2004/03/27 11:14:48 menno Exp $
  *
  * Author: Jon Lech Johansen <jon-vl@nanocrew.net>
  *
@@ -23,6 +23,7 @@
 
 #include <stdlib.h>                                      /* malloc(), free() */
 
+/* #include <vlc/vlc.h> */
 #include "mp4ffint.h"
 
 #ifdef ITUNES_DRM
--- a/common/mp4ff/drms.h
+++ b/common/mp4ff/drms.h
@@ -2,7 +2,7 @@
  * drms.h : DRMS
  *****************************************************************************
  * Copyright (C) 2004 VideoLAN
- * $Id: drms.h,v 1.3 2004/01/11 15:52:18 menno Exp $
+ * $Id: drms.h,v 1.4 2004/03/27 11:14:48 menno Exp $
  *
  * Author: Jon Lech Johansen <jon-vl@nanocrew.net>
  *
@@ -20,7 +20,6 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  *****************************************************************************/
-
 
 #define DRMS_INIT_UKEY  0
 #define DRMS_INIT_IVIV  1
--- a/common/mp4ff/drmstables.h
+++ b/common/mp4ff/drmstables.h
@@ -2,7 +2,7 @@
  * drmstables.h : DRMS tables
  *****************************************************************************
  * Copyright (C) 2004 VideoLAN
- * $Id: drmstables.h,v 1.2 2004/01/11 15:52:18 menno Exp $
+ * $Id: drmstables.h,v 1.3 2004/03/27 11:14:48 menno Exp $
  *
  * Author: Jon Lech Johansen <jon-vl@nanocrew.net>
  *
@@ -20,7 +20,6 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  *****************************************************************************/
-
 
 static uint32_t p_drms_tab1[ 10 ] =
 {
--- a/common/mp4ff/mp4atom.c
+++ b/common/mp4ff/mp4atom.c
@@ -22,7 +22,7 @@
 ** Commercial non-GPL licensing of this software is possible.
 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
 **
-** $Id: mp4atom.c,v 1.17 2004/01/11 15:52:18 menno Exp $
+** $Id: mp4atom.c,v 1.18 2004/03/27 11:14:48 menno Exp $
 **/
 
 #include <stdlib.h>
@@ -371,7 +371,7 @@
     size = mp4ff_atom_read_header(f, &atom_type, &header_size);
     if (atom_type == ATOM_SINF)
     {
-        parse_sub_atoms(f, size-header_size);
+        parse_sub_atoms(f, size-header_size,0);
     }
 
     return 0;
--- a/common/mp4ff/mp4ff.c
+++ b/common/mp4ff/mp4ff.c
@@ -22,7 +22,7 @@
 ** Commercial non-GPL licensing of this software is possible.
 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
 **
-** $Id: mp4ff.c,v 1.15 2004/01/11 15:52:18 menno Exp $
+** $Id: mp4ff.c,v 1.16 2004/03/27 11:14:48 menno Exp $
 **/
 
 #include <stdlib.h>
@@ -39,11 +39,24 @@
 
     ff->stream = f;
 
-    parse_atoms(ff);
+    parse_atoms(ff,0);
 
     return ff;
 }
 
+mp4ff_t *mp4ff_open_read_metaonly(mp4ff_callback_t *f)
+{
+    mp4ff_t *ff = malloc(sizeof(mp4ff_t));
+
+    memset(ff, 0, sizeof(mp4ff_t));
+
+    ff->stream = f;
+
+    parse_atoms(ff,1);
+
+    return ff;
+}
+
 void mp4ff_close(mp4ff_t *ff)
 {
     int32_t i;
@@ -96,8 +109,35 @@
     memset(f->track[f->total_tracks - 1], 0, sizeof(mp4ff_track_t));
 }
 
+static int need_parse_when_meta_only(uint8_t atom_type)
+{
+	switch(atom_type)
+	{
+	case ATOM_EDTS:
+//	case ATOM_MDIA:
+//	case ATOM_MINF:
+	case ATOM_DRMS:
+	case ATOM_SINF:
+	case ATOM_SCHI:
+//	case ATOM_STBL:
+//	case ATOM_STSD:
+	case ATOM_STTS:
+	case ATOM_STSZ:
+	case ATOM_STZ2:
+	case ATOM_STCO:
+	case ATOM_STSC:
+//	case ATOM_CTTS:
+	case ATOM_FRMA:
+	case ATOM_IVIV:
+	case ATOM_PRIV:
+		return 0;
+	default:
+		return 1;
+	}
+}
+
 /* parse atoms that are sub atoms of other atoms */
-int32_t parse_sub_atoms(mp4ff_t *f, const uint64_t total_size)
+int32_t parse_sub_atoms(mp4ff_t *f, const uint64_t total_size,int meta_only)
 {
     uint64_t size;
     uint8_t atom_type = 0;
@@ -122,9 +162,12 @@
         }
 
         /* parse subatoms */
-        if (atom_type < SUBATOMIC)
+		if (meta_only && !need_parse_when_meta_only(atom_type))
+		{
+			mp4ff_set_position(f, mp4ff_position(f)+size-header_size);
+		} else if (atom_type < SUBATOMIC)
         {
-            parse_sub_atoms(f, size-header_size);
+            parse_sub_atoms(f, size-header_size,meta_only);
         } else {
             mp4ff_atom_read(f, (uint32_t)size, atom_type);
         }
@@ -134,7 +177,7 @@
 }
 
 /* parse root atoms */
-int32_t parse_atoms(mp4ff_t *f)
+int32_t parse_atoms(mp4ff_t *f,int meta_only)
 {
     uint64_t size;
     uint8_t atom_type = 0;
@@ -162,9 +205,12 @@
         }
 
         /* parse subatoms */
-        if (atom_type < SUBATOMIC)
+		if (meta_only && !need_parse_when_meta_only(atom_type))
+		{
+			mp4ff_set_position(f, mp4ff_position(f)+size-header_size);
+		} else if (atom_type < SUBATOMIC)
         {
-            parse_sub_atoms(f, size-header_size);
+            parse_sub_atoms(f, size-header_size,meta_only);
         } else {
             /* skip this atom */
             mp4ff_set_position(f, mp4ff_position(f)+size-header_size);
--- a/common/mp4ff/mp4ff.h
+++ b/common/mp4ff/mp4ff.h
@@ -22,7 +22,7 @@
 ** Commercial non-GPL licensing of this software is possible.
 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
 **
-** $Id: mp4ff.h,v 1.19 2004/01/11 15:52:18 menno Exp $
+** $Id: mp4ff.h,v 1.20 2004/03/27 11:14:49 menno Exp $
 **/
 
 #ifndef MP4FF_H
@@ -51,6 +51,7 @@
 /* API */
 
 mp4ff_t *mp4ff_open_read(mp4ff_callback_t *f);
+mp4ff_t *mp4ff_open_read_metaonly(mp4ff_callback_t *f);
 void mp4ff_close(mp4ff_t *f);
 int32_t mp4ff_get_sample_duration(const mp4ff_t *f, const int32_t track, const int32_t sample);
 int32_t mp4ff_get_sample_duration_use_offsets(const mp4ff_t *f, const int32_t track, const int32_t sample);
--- a/common/mp4ff/mp4ffint.h
+++ b/common/mp4ff/mp4ffint.h
@@ -22,7 +22,7 @@
 ** Commercial non-GPL licensing of this software is possible.
 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
 **
-** $Id: mp4ffint.h,v 1.15 2004/01/14 20:50:22 menno Exp $
+** $Id: mp4ffint.h,v 1.16 2004/03/27 11:14:49 menno Exp $
 **/
 
 #ifndef MP4FF_INTERNAL_H
@@ -35,7 +35,7 @@
 #include "mp4ff_int_types.h"
 
 
-#ifdef _WIN32
+#if defined(_WIN32) && !defined(_WIN32_WCE)
 #define ITUNES_DRM
 #endif
 
@@ -112,7 +112,7 @@
 #include "../../config.h"
 #endif
 
-#ifndef _WIN32
+#if !(defined(_WIN32) || defined(_WIN32_WCE))
 #define stricmp strcasecmp
 #endif
 
@@ -302,8 +302,8 @@
 #endif
 void mp4ff_close(mp4ff_t *ff);
 void mp4ff_track_add(mp4ff_t *f);
-int32_t parse_sub_atoms(mp4ff_t *f, const uint64_t total_size);
-int32_t parse_atoms(mp4ff_t *f);
+int32_t parse_sub_atoms(mp4ff_t *f, const uint64_t total_size,int meta_only);
+int32_t parse_atoms(mp4ff_t *f,int meta_only);
 
 int32_t mp4ff_get_sample_duration(const mp4ff_t *f, const int32_t track, const int32_t sample);
 int64_t mp4ff_get_sample_position(const mp4ff_t *f, const int32_t track, const int32_t sample);
--- a/common/mp4ff/mp4meta.c
+++ b/common/mp4ff/mp4meta.c
@@ -22,7 +22,7 @@
 ** Commercial non-GPL licensing of this software is possible.
 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
 **
-** $Id: mp4meta.c,v 1.13 2004/01/11 15:52:18 menno Exp $
+** $Id: mp4meta.c,v 1.14 2004/03/27 11:14:49 menno Exp $
 **/
 
 #ifdef USE_TAGGING
@@ -31,6 +31,8 @@
 #include <stdio.h>
 #include <string.h>
 #include "mp4ffint.h"
+
+
 
 static int32_t mp4ff_tag_add_field(mp4ff_metadata_t *tags, const char *item, const char *value)
 {
--- a/common/mp4ff/mp4sample.c
+++ b/common/mp4ff/mp4sample.c
@@ -22,7 +22,7 @@
 ** Commercial non-GPL licensing of this software is possible.
 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
 **
-** $Id: mp4sample.c,v 1.15 2004/01/11 15:52:19 menno Exp $
+** $Id: mp4sample.c,v 1.16 2004/03/27 11:14:49 menno Exp $
 **/
 
 #include <stdlib.h>
--- a/common/mp4ff/mp4tagupdate.c
+++ b/common/mp4ff/mp4tagupdate.c
@@ -606,7 +606,7 @@
     ff->stream = f;
 	mp4ff_set_position(ff,0);
 
-    parse_atoms(ff);
+    parse_atoms(ff,1);
 
 
 	if (!modify_moov(ff,data,&new_moov_data,&new_moov_size))
--- a/common/mp4ff/mp4util.c
+++ b/common/mp4ff/mp4util.c
@@ -22,7 +22,7 @@
 ** Commercial non-GPL licensing of this software is possible.
 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
 **
-** $Id: mp4util.c,v 1.15 2004/01/11 15:52:19 menno Exp $
+** $Id: mp4util.c,v 1.16 2004/03/27 11:14:49 menno Exp $
 **/
 
 #include "mp4ffint.h"
--- a/frontend/main.c
+++ b/frontend/main.c
@@ -22,7 +22,7 @@
 ** Commercial non-GPL licensing of this software is possible.
 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
 **
-** $Id: main.c,v 1.75 2004/03/10 19:45:40 menno Exp $
+** $Id: main.c,v 1.76 2004/03/27 11:14:49 menno Exp $
 **/
 
 #ifdef _WIN32
@@ -1025,13 +1025,7 @@
         fprintf(stderr, " Floating point version\n");
     fprintf(stderr, "\n");
     fprintf(stderr, " This program is free software; you can redistribute it and/or modify\n");
-    fprintf(stderr, " it under the terms of the GNU General Public License as published by\n");
-    fprintf(stderr, " the Free Software Foundation; either version 2 of the License, or\n");
-    fprintf(stderr, " (at your option) any later version.\n");
-    fprintf(stderr, "\n");
-    fprintf(stderr, " You should have received a copy of the GNU General Public License\n");
-    fprintf(stderr, " along with this program; if not, write to the Free Software\n");
-    fprintf(stderr, " Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.\n");
+    fprintf(stderr, " it under the terms of the GNU General Public License.\n");
     fprintf(stderr, "\n");
     fprintf(stderr, " **************************************************************************\n\n");
 
--- a/libfaad/common.h
+++ b/libfaad/common.h
@@ -22,7 +22,7 @@
 ** Commercial non-GPL licensing of this software is possible.
 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
 **
-** $Id: common.h,v 1.56 2004/03/19 10:37:55 menno Exp $
+** $Id: common.h,v 1.58 2004/04/03 10:49:14 menno Exp $
 **/
 
 #ifndef __COMMON_H__
@@ -37,7 +37,7 @@
 #endif
 
 #define INLINE __inline
-#if defined(_WIN32) && !defined(_WIN32_WCE)
+#if 0 //defined(_WIN32) && !defined(_WIN32_WCE)
 #define ALIGN __declspec(align(16))
 #else
 #define ALIGN
--- a/libfaad/pns.c
+++ b/libfaad/pns.c
@@ -22,7 +22,7 @@
 ** Commercial non-GPL licensing of this software is possible.
 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
 **
-** $Id: pns.c,v 1.30 2004/02/26 09:29:27 menno Exp $
+** $Id: pns.c,v 1.31 2004/03/27 11:14:49 menno Exp $
 **/
 
 #include "common.h"
@@ -70,9 +70,6 @@
 
 static real_t pow2_table[] =
 {
-    COEF_CONST(0.59460355750136),
-    COEF_CONST(0.70710678118655),
-    COEF_CONST(0.84089641525371),
     COEF_CONST(1.0),
     COEF_CONST(1.18920711500272),
     COEF_CONST(1.41421356237310),
@@ -131,8 +128,8 @@
     {
         scale = DIV(REAL_CONST(1),energy);
 
-        exp = scale_factor / 4;
-        frac = scale_factor % 4;
+        exp = scale_factor >> 2;
+        frac = scale_factor & 3;
 
         /* IMDCT pre-scaling */
         exp -= sub;
@@ -143,7 +140,7 @@
             scale <<= exp;
 
         if (frac)
-            scale = MUL_C(scale, pow2_table[frac + 3]);
+            scale = MUL_C(scale, pow2_table[frac]);
 
         for (i = 0; i < size; i++)
         {
--- a/libfaad/ps_dec.c
+++ b/libfaad/ps_dec.c
@@ -22,7 +22,7 @@
 ** Commercial non-GPL licensing of this software is possible.
 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
 **
-** $Id: ps_dec.c,v 1.2 2004/03/19 10:37:55 menno Exp $
+** $Id: ps_dec.c,v 1.3 2004/03/27 11:14:49 menno Exp $
 **/
 
 #include "common.h"
@@ -225,41 +225,28 @@
 
     for (i = 0; i < frame_len; i++)
     {
+        real_t r0 = MUL_F(filter[0],(QMF_RE(buffer[0+i]) + QMF_RE(buffer[12+i])));
+        real_t r1 = MUL_F(filter[1],(QMF_RE(buffer[1+i]) + QMF_RE(buffer[11+i])));
+        real_t r2 = MUL_F(filter[2],(QMF_RE(buffer[2+i]) + QMF_RE(buffer[10+i])));
+        real_t r3 = MUL_F(filter[3],(QMF_RE(buffer[3+i]) + QMF_RE(buffer[9+i])));
+        real_t r4 = MUL_F(filter[4],(QMF_RE(buffer[4+i]) + QMF_RE(buffer[8+i])));
+        real_t r5 = MUL_F(filter[5],(QMF_RE(buffer[5+i]) + QMF_RE(buffer[7+i])));
+        real_t r6 = MUL_F(filter[6],QMF_RE(buffer[6+i]));
+        real_t i0 = MUL_F(filter[0],(QMF_IM(buffer[0+i]) + QMF_IM(buffer[12+i])));
+        real_t i1 = MUL_F(filter[1],(QMF_IM(buffer[1+i]) + QMF_IM(buffer[11+i])));
+        real_t i2 = MUL_F(filter[2],(QMF_IM(buffer[2+i]) + QMF_IM(buffer[10+i])));
+        real_t i3 = MUL_F(filter[3],(QMF_IM(buffer[3+i]) + QMF_IM(buffer[9+i])));
+        real_t i4 = MUL_F(filter[4],(QMF_IM(buffer[4+i]) + QMF_IM(buffer[8+i])));
+        real_t i5 = MUL_F(filter[5],(QMF_IM(buffer[5+i]) + QMF_IM(buffer[7+i])));
+        real_t i6 = MUL_F(filter[6],QMF_IM(buffer[6+i]));
+
         /* q = 0 */
-        QMF_RE(X_hybrid[i][0]) =
-            MUL_F(filter[0],(QMF_RE(buffer[0+i]) + QMF_RE(buffer[12+i]))) +
-            MUL_F(filter[1],(QMF_RE(buffer[1+i]) + QMF_RE(buffer[11+i]))) +
-            MUL_F(filter[2],(QMF_RE(buffer[2+i]) + QMF_RE(buffer[10+i]))) +
-            MUL_F(filter[3],(QMF_RE(buffer[3+i]) + QMF_RE(buffer[9+i]))) +
-            MUL_F(filter[4],(QMF_RE(buffer[4+i]) + QMF_RE(buffer[8+i]))) +
-            MUL_F(filter[5],(QMF_RE(buffer[5+i]) + QMF_RE(buffer[7+i]))) +
-            MUL_F(filter[6],QMF_RE(buffer[6+i]));
-        QMF_IM(X_hybrid[i][0]) =
-            MUL_F(filter[0],(QMF_IM(buffer[0+i]) + QMF_IM(buffer[12+i]))) +
-            MUL_F(filter[1],(QMF_IM(buffer[1+i]) + QMF_IM(buffer[11+i]))) +
-            MUL_F(filter[2],(QMF_IM(buffer[2+i]) + QMF_IM(buffer[10+i]))) +
-            MUL_F(filter[3],(QMF_IM(buffer[3+i]) + QMF_IM(buffer[9+i]))) +
-            MUL_F(filter[4],(QMF_IM(buffer[4+i]) + QMF_IM(buffer[8+i]))) +
-            MUL_F(filter[5],(QMF_IM(buffer[5+i]) + QMF_IM(buffer[7+i]))) +
-            MUL_F(filter[6],QMF_IM(buffer[6+i]));
+        QMF_RE(X_hybrid[i][0]) = r0 + r1 + r2 + r3 + r4 + r5 + r6;
+        QMF_IM(X_hybrid[i][0]) = i0 + i1 + i2 + i3 + i4 + i5 + i6;
 
         /* q = 1 */
-        QMF_RE(X_hybrid[i][1]) =
-            MUL_F(filter[0],(QMF_RE(buffer[0+i]) + QMF_RE(buffer[12+i]))) -
-            MUL_F(filter[1],(QMF_RE(buffer[1+i]) + QMF_RE(buffer[11+i]))) +
-            MUL_F(filter[2],(QMF_RE(buffer[2+i]) + QMF_RE(buffer[10+i]))) -
-            MUL_F(filter[3],(QMF_RE(buffer[3+i]) + QMF_RE(buffer[9+i]))) +
-            MUL_F(filter[4],(QMF_RE(buffer[4+i]) + QMF_RE(buffer[8+i]))) -
-            MUL_F(filter[5],(QMF_RE(buffer[5+i]) + QMF_RE(buffer[7+i]))) +
-            MUL_F(filter[6],QMF_RE(buffer[6+i]));
-        QMF_IM(X_hybrid[i][1]) =
-            MUL_F(filter[0],(QMF_IM(buffer[0+i]) + QMF_IM(buffer[12+i]))) -
-            MUL_F(filter[1],(QMF_IM(buffer[1+i]) + QMF_IM(buffer[11+i]))) +
-            MUL_F(filter[2],(QMF_IM(buffer[2+i]) + QMF_IM(buffer[10+i]))) -
-            MUL_F(filter[3],(QMF_IM(buffer[3+i]) + QMF_IM(buffer[9+i]))) +
-            MUL_F(filter[4],(QMF_IM(buffer[4+i]) + QMF_IM(buffer[8+i]))) -
-            MUL_F(filter[5],(QMF_IM(buffer[5+i]) + QMF_IM(buffer[7+i]))) +
-            MUL_F(filter[6],QMF_IM(buffer[6+i]));
+        QMF_RE(X_hybrid[i][1]) = r0 - r1 + r2 - r3 + r4 - r5 + r6;
+        QMF_IM(X_hybrid[i][1]) = i0 - i1 + i2 - i3 + i4 - i5 + i6;
     }
 }
 
--- a/libfaad/sbr_dec.c
+++ b/libfaad/sbr_dec.c
@@ -22,7 +22,7 @@
 ** Commercial non-GPL licensing of this software is possible.
 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
 **
-** $Id: sbr_dec.c,v 1.32 2004/03/19 10:37:55 menno Exp $
+** $Id: sbr_dec.c,v 1.33 2004/03/27 11:14:49 menno Exp $
 **/
 
 
@@ -373,11 +373,7 @@
     {
         sbr_qmf_synthesis_32(sbr, sbr->qmfs[0], X, left_chan);
     } else {
-#ifndef USE_SSE
         sbr_qmf_synthesis_64(sbr, sbr->qmfs[0], X, left_chan);
-#else
-        sbr->qmfs[ch]->qmf_func(sbr, sbr->qmfs[0], X, left_chan);
-#endif
     }
 
     sbr_process_channel(sbr, right_chan, X, 1, dont_process, downSampledSBR);
@@ -386,11 +382,7 @@
     {
         sbr_qmf_synthesis_32(sbr, sbr->qmfs[1], X, right_chan);
     } else {
-#ifndef USE_SSE
         sbr_qmf_synthesis_64(sbr, sbr->qmfs[1], X, right_chan);
-#else
-        sbr->qmfs[ch]->qmf_func(sbr, sbr->qmfs[1], X, right_chan);
-#endif
     }
 
     if (sbr->bs_header_flag)
@@ -446,11 +438,7 @@
     {
         sbr_qmf_synthesis_32(sbr, sbr->qmfs[0], X, channel);
     } else {
-#ifndef USE_SSE
         sbr_qmf_synthesis_64(sbr, sbr->qmfs[0], X, channel);
-#else
-        sbr->qmfs[ch]->qmf_func(sbr, sbr->qmfs[0], X, channel);
-#endif
     }
 
     if (sbr->bs_header_flag)
@@ -538,13 +526,8 @@
         sbr_qmf_synthesis_32(sbr, sbr->qmfs[0], X_left, left_channel);
         sbr_qmf_synthesis_32(sbr, sbr->qmfs[1], X_right, right_channel);
     } else {
-#ifndef USE_SSE
         sbr_qmf_synthesis_64(sbr, sbr->qmfs[0], X_left, left_channel);
         sbr_qmf_synthesis_64(sbr, sbr->qmfs[1], X_right, right_channel);
-#else
-        sbr->qmfs[ch]->qmf_func(sbr, sbr->qmfs[0], X_left, left_channel);
-        sbr->qmfs[ch]->qmf_func(sbr, sbr->qmfs[1], X_right, right_channel);
-#endif
     }
 
     if (sbr->bs_header_flag)
--- a/libfaad/sbr_dec.h
+++ b/libfaad/sbr_dec.h
@@ -22,7 +22,7 @@
 ** Commercial non-GPL licensing of this software is possible.
 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
 **
-** $Id: sbr_dec.h,v 1.28 2004/03/19 10:37:55 menno Exp $
+** $Id: sbr_dec.h,v 1.29 2004/03/27 11:14:49 menno Exp $
 **/
 
 #ifndef __SBR_DEC_H__
@@ -46,17 +46,15 @@
 
 typedef struct {
     real_t *x;
+	int16_t x_index;
     uint8_t channels;
 } qmfa_info;
 
 typedef struct {
-    real_t *v[2];
-    uint8_t v_index;
+    real_t *v;
+    int16_t v_index;
     uint8_t channels;
     complex_t *pre_twiddle;
-#ifdef USE_SSE
-    void (*qmf_func)(void *a, void *b, void *c, void *d);
-#endif
 } qmfs_info;
 
 typedef struct
--- a/libfaad/sbr_qmf.c
+++ b/libfaad/sbr_qmf.c
@@ -22,7 +22,7 @@
 ** Commercial non-GPL licensing of this software is possible.
 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
 **
-** $Id: sbr_qmf.c,v 1.22 2004/03/10 19:45:42 menno Exp $
+** $Id: sbr_qmf.c,v 1.23 2004/03/27 11:14:49 menno Exp $
 **/
 
 #include "common.h"
@@ -42,9 +42,14 @@
 qmfa_info *qmfa_init(uint8_t channels)
 {
     qmfa_info *qmfa = (qmfa_info*)faad_malloc(sizeof(qmfa_info));
-    qmfa->x = (real_t*)faad_malloc(channels * 10 * sizeof(real_t));
-    memset(qmfa->x, 0, channels * 10 * sizeof(real_t));
 
+	/* x is implemented as double ringbuffer */
+    qmfa->x = (real_t*)faad_malloc(2 * channels * 10 * sizeof(real_t));
+    memset(qmfa->x, 0, 2 * channels * 10 * sizeof(real_t));
+
+	/* ringbuffer index */
+	qmfa->x_index = 0;
+
     qmfa->channels = channels;
 
     return qmfa;
@@ -77,15 +82,16 @@
         int16_t n;
 
         /* shift input buffer x */
-        memmove(qmfa->x + 32, qmfa->x, (320-32)*sizeof(real_t));
+		/* input buffer is not shifted anymore, x is implemented as double ringbuffer */
+        //memmove(qmfa->x + 32, qmfa->x, (320-32)*sizeof(real_t));
 
         /* add new samples to input buffer x */
         for (n = 32 - 1; n >= 0; n--)
         {
 #ifdef FIXED_POINT
-            qmfa->x[n] = (input[in++]) >> 4;
+            qmfa->x[qmfa->x_index + n] = qmfa->x[qmfa->x_index + n + 320] = (input[in++]) >> 4;
 #else
-            qmfa->x[n] = input[in++];
+            qmfa->x[qmfa->x_index + n] = qmfa->x[qmfa->x_index + n + 320] = input[in++];
 #endif
         }
 
@@ -92,13 +98,18 @@
         /* window and summation to create array u */
         for (n = 0; n < 64; n++)
         {
-            u[n] = MUL_F(qmfa->x[n], qmf_c[2*n]) +
-                MUL_F(qmfa->x[n + 64], qmf_c[2*(n + 64)]) +
-                MUL_F(qmfa->x[n + 128], qmf_c[2*(n + 128)]) +
-                MUL_F(qmfa->x[n + 192], qmf_c[2*(n + 192)]) +
-                MUL_F(qmfa->x[n + 256], qmf_c[2*(n + 256)]);
+            u[n] = MUL_F(qmfa->x[qmfa->x_index + n], qmf_c[2*n]) +
+                MUL_F(qmfa->x[qmfa->x_index + n + 64], qmf_c[2*(n + 64)]) +
+                MUL_F(qmfa->x[qmfa->x_index + n + 128], qmf_c[2*(n + 128)]) +
+                MUL_F(qmfa->x[qmfa->x_index + n + 192], qmf_c[2*(n + 192)]) +
+                MUL_F(qmfa->x[qmfa->x_index + n + 256], qmf_c[2*(n + 256)]);
         }
 
+		/* update ringbuffer index */
+		qmfa->x_index -= 32;
+		if (qmfa->x_index < 0)
+			qmfa->x_index = (320-32);
+
         /* calculate 32 subband samples by introducing X */
 #ifdef SBR_LOW_POWER
         y[0] = u[48];
@@ -157,12 +168,11 @@
 {
     qmfs_info *qmfs = (qmfs_info*)faad_malloc(sizeof(qmfs_info));
 
-#ifndef SBR_LOW_POWER
-    qmfs->v[0] = (real_t*)faad_malloc(channels * 10 * sizeof(real_t));
-    memset(qmfs->v[0], 0, channels * 10 * sizeof(real_t));
-    qmfs->v[1] = (real_t*)faad_malloc(channels * 10 * sizeof(real_t));
-    memset(qmfs->v[1], 0, channels * 10 * sizeof(real_t));
+	/* v is a double ringbuffer */
+    qmfs->v = (real_t*)faad_malloc(2 * channels * 20 * sizeof(real_t));
+    memset(qmfs->v, 0, 2 * channels * 20 * sizeof(real_t));
 
+#ifndef SBR_LOW_POWER
     if (channels == 32)
     {
         /* downsampled filterbank */
@@ -176,10 +186,6 @@
             IM(qmfs->pre_twiddle[k]) = sin(-M_PI*(0.5*k + 0.25)/64.);
         }
     }
-#else
-    qmfs->v[0] = (real_t*)faad_malloc(channels * 20 * sizeof(real_t));
-    memset(qmfs->v[0], 0, channels * 20 * sizeof(real_t));
-    qmfs->v[1] = NULL;
 #endif
 
     qmfs->v_index = 0;
@@ -186,15 +192,6 @@
 
     qmfs->channels = channels;
 
-#ifdef USE_SSE
-    if (cpu_has_sse())
-    {
-        qmfs->qmf_func = sbr_qmf_synthesis_64_sse;
-    } else {
-        qmfs->qmf_func = sbr_qmf_synthesis_64;
-    }
-#endif
-
     return qmfs;
 }
 
@@ -206,10 +203,7 @@
         {
             if (qmfs->pre_twiddle) faad_free(qmfs->pre_twiddle);
         }
-        if (qmfs->v[0]) faad_free(qmfs->v[0]);
-#ifndef SBR_LOW_POWER
-        if (qmfs->v[1]) faad_free(qmfs->v[1]);
-#endif
+        if (qmfs->v) faad_free(qmfs->v);
         faad_free(qmfs);
     }
 }
@@ -227,17 +221,10 @@
     /* qmf subsample l */
     for (l = 0; l < sbr->numTimeSlotsRate; l++)
     {
-        //real_t *v0, *v1;
-
         /* shift buffers */
-        //memmove(qmfs->v[0] + 64, qmfs->v[0], (640-64)*sizeof(real_t));
-        //memmove(qmfs->v[1] + 64, qmfs->v[1], (640-64)*sizeof(real_t));
-        memmove(qmfs->v[0] + 64, qmfs->v[0], (640-64)*sizeof(real_t));
+        /* we are not shifting v, it is a double ringbuffer */
+        //memmove(qmfs->v + 64, qmfs->v, (640-64)*sizeof(real_t));
 
-        //v0 = qmfs->v[qmfs->v_index];
-        //v1 = qmfs->v[(qmfs->v_index + 1) & 0x1];
-        //qmfs->v_index = (qmfs->v_index + 1) & 0x1;
-
         /* calculate 64 samples */
         for (k = 0; k < 16; k++)
         {
@@ -257,46 +244,38 @@
 
         for (n = 8; n < 24; n++)
         {
-            qmfs->v[0][n*2] = x[n-8];
-            qmfs->v[0][n*2+1] = y[n-8];
+            qmfs->v[qmfs->v_index + n*2] = qmfs->v[qmfs->v_index + 640 + n*2] = x[n-8];
+            qmfs->v[qmfs->v_index + n*2+1] = qmfs->v[qmfs->v_index + 640 + n*2+1] = y[n-8];
         }
         for (n = 0; n < 16; n++)
         {
-            qmfs->v[0][n] = qmfs->v[0][32-n];
+            qmfs->v[qmfs->v_index + n] = qmfs->v[qmfs->v_index + 640 + n] = qmfs->v[qmfs->v_index + 32-n];
         }
-        qmfs->v[0][48] = 0;
+        qmfs->v[qmfs->v_index + 48] = qmfs->v[qmfs->v_index + 640 + 48] = 0;
         for (n = 1; n < 16; n++)
         {
-            qmfs->v[0][48+n] = -qmfs->v[0][48-n];
+            qmfs->v[qmfs->v_index + 48+n] = qmfs->v[qmfs->v_index + 640 + 48+n] = -qmfs->v[qmfs->v_index + 48-n];
         }
 
         /* calculate 32 output samples and window */
         for (k = 0; k < 32; k++)
         {
-#if 1
-            output[out++] = MUL_F(qmfs->v[0][k], qmf_c[2*k]) +
-                MUL_F(qmfs->v[0][96 + k], qmf_c[64 + 2*k]) +
-                MUL_F(qmfs->v[0][128 + k], qmf_c[128 + 2*k]) +
-                MUL_F(qmfs->v[0][224 + k], qmf_c[192 + 2*k]) +
-                MUL_F(qmfs->v[0][256 + k], qmf_c[256 + 2*k]) +
-                MUL_F(qmfs->v[0][352 + k], qmf_c[320 + 2*k]) +
-                MUL_F(qmfs->v[0][384 + k], qmf_c[384 + 2*k]) +
-                MUL_F(qmfs->v[0][480 + k], qmf_c[448 + 2*k]) +
-                MUL_F(qmfs->v[0][512 + k], qmf_c[512 + 2*k]) +
-                MUL_F(qmfs->v[0][608 + k], qmf_c[576 + 2*k]);
-#else
-            output[out++] = MUL_F(v0[k], qmf_c[2*k]) +
-                MUL_F(v0[32 + k], qmf_c[64 + 2*k]) +
-                MUL_F(v0[64 + k], qmf_c[128 + 2*k]) +
-                MUL_F(v0[96 + k], qmf_c[192 + 2*k]) +
-                MUL_F(v0[128 + k], qmf_c[256 + 2*k]) +
-                MUL_F(v0[160 + k], qmf_c[320 + 2*k]) +
-                MUL_F(v0[192 + k], qmf_c[384 + 2*k]) +
-                MUL_F(v0[224 + k], qmf_c[448 + 2*k]) +
-                MUL_F(v0[256 + k], qmf_c[512 + 2*k]) +
-                MUL_F(v0[288 + k], qmf_c[576 + 2*k]);
-#endif
+            output[out++] = MUL_F(qmfs->v[qmfs->v_index + k], qmf_c[2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 96 + k], qmf_c[64 + 2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 128 + k], qmf_c[128 + 2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 224 + k], qmf_c[192 + 2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 256 + k], qmf_c[256 + 2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 352 + k], qmf_c[320 + 2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 384 + k], qmf_c[384 + 2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 480 + k], qmf_c[448 + 2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 512 + k], qmf_c[512 + 2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 608 + k], qmf_c[576 + 2*k]);
         }
+
+        /* update the ringbuffer index */
+        qmfs->v_index -= 64;
+        if (qmfs->v_index < 0)
+            qmfs->v_index = (640-64);
     }
 }
 
@@ -312,17 +291,10 @@
     /* qmf subsample l */
     for (l = 0; l < sbr->numTimeSlotsRate; l++)
     {
-        //real_t *v0, *v1;
-
         /* shift buffers */
-        //memmove(qmfs->v[0] + 64, qmfs->v[0], (640-64)*sizeof(real_t));
-        //memmove(qmfs->v[1] + 64, qmfs->v[1], (640-64)*sizeof(real_t));
-        memmove(qmfs->v[0] + 128, qmfs->v[0], (1280-128)*sizeof(real_t));
+        /* we are not shifting v, it is a double ringbuffer */
+        //memmove(qmfs->v + 128, qmfs->v, (1280-128)*sizeof(real_t));
 
-        //v0 = qmfs->v[qmfs->v_index];
-        //v1 = qmfs->v[(qmfs->v_index + 1) & 0x1];
-        //qmfs->v_index = (qmfs->v_index + 1) & 0x1;
-
         /* calculate 128 samples */
         for (k = 0; k < 32; k++)
         {
@@ -342,46 +314,38 @@
 
         for (n = 16; n < 48; n++)
         {
-            qmfs->v[0][n*2] = x[n-16];
-            qmfs->v[0][n*2+1] = y[n-16];
+            qmfs->v[qmfs->v_index + n*2]   = qmfs->v[qmfs->v_index + 1280 + n*2]   = x[n-16];
+            qmfs->v[qmfs->v_index + n*2+1] = qmfs->v[qmfs->v_index + 1280 + n*2+1] = y[n-16];
         }
         for (n = 0; n < 32; n++)
         {
-            qmfs->v[0][n] = qmfs->v[0][64-n];
+            qmfs->v[qmfs->v_index + n] = qmfs->v[qmfs->v_index + 1280 + n] = qmfs->v[qmfs->v_index + 64-n];
         }
-        qmfs->v[0][96] = 0;
+        qmfs->v[qmfs->v_index + 96] = qmfs->v[qmfs->v_index + 1280 + 96] = 0;
         for (n = 1; n < 32; n++)
         {
-            qmfs->v[0][96+n] = -qmfs->v[0][96-n];
+            qmfs->v[qmfs->v_index + 96+n] = qmfs->v[qmfs->v_index + 1280 + 96+n] = -qmfs->v[qmfs->v_index + 96-n];
         }
 
         /* calculate 64 output samples and window */
         for (k = 0; k < 64; k++)
         {
-#if 1
-            output[out++] = MUL_F(qmfs->v[0][k], qmf_c[k]) +
-                MUL_F(qmfs->v[0][192 + k], qmf_c[64 + k]) +
-                MUL_F(qmfs->v[0][256 + k], qmf_c[128 + k]) +
-                MUL_F(qmfs->v[0][256 + 192 + k], qmf_c[128 + 64 + k]) +
-                MUL_F(qmfs->v[0][512 + k], qmf_c[256 + k]) +
-                MUL_F(qmfs->v[0][512 + 192 + k], qmf_c[256 + 64 + k]) +
-                MUL_F(qmfs->v[0][768 + k], qmf_c[384 + k]) +
-                MUL_F(qmfs->v[0][768 + 192 + k], qmf_c[384 + 64 + k]) +
-                MUL_F(qmfs->v[0][1024 + k], qmf_c[512 + k]) +
-                MUL_F(qmfs->v[0][1024 + 192 + k], qmf_c[512 + 64 + k]);
-#else
-            output[out++] = MUL_F(v0[k], qmf_c[k]) +
-                MUL_F(v0[64 + k], qmf_c[64 + k]) +
-                MUL_F(v0[128 + k], qmf_c[128 + k]) +
-                MUL_F(v0[192 + k], qmf_c[192 + k]) +
-                MUL_F(v0[256 + k], qmf_c[256 + k]) +
-                MUL_F(v0[320 + k], qmf_c[320 + k]) +
-                MUL_F(v0[384 + k], qmf_c[384 + k]) +
-                MUL_F(v0[448 + k], qmf_c[448 + k]) +
-                MUL_F(v0[512 + k], qmf_c[512 + k]) +
-                MUL_F(v0[576 + k], qmf_c[576 + k]);
-#endif
+            output[out++] = MUL_F(qmfs->v[qmfs->v_index + k], qmf_c[k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 192 + k], qmf_c[64 + k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 256 + k], qmf_c[128 + k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 256 + 192 + k], qmf_c[128 + 64 + k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 512 + k], qmf_c[256 + k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 512 + 192 + k], qmf_c[256 + 64 + k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 768 + k], qmf_c[384 + k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 768 + 192 + k], qmf_c[384 + 64 + k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 1024 + k], qmf_c[512 + k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 1024 + 192 + k], qmf_c[512 + 64 + k]);
         }
+
+        /* update the ringbuffer index */
+        qmfs->v_index -= 128;
+        if (qmfs->v_index < 0)
+            qmfs->v_index = (1280-128);
     }
 }
 #else
@@ -399,16 +363,10 @@
     /* qmf subsample l */
     for (l = 0; l < sbr->numTimeSlotsRate; l++)
     {
-        real_t *v0, *v1;
+        /* shift buffer v */
+        /* buffer is not shifted, we are using a ringbuffer */
+        //memmove(qmfs->v + 64, qmfs->v, (640-64)*sizeof(real_t));
 
-        /* shift buffers */
-        memmove(qmfs->v[0] + 32, qmfs->v[0], (320-32)*sizeof(real_t));
-        memmove(qmfs->v[1] + 32, qmfs->v[1], (320-32)*sizeof(real_t));
-
-        v0 = qmfs->v[qmfs->v_index];
-        v1 = qmfs->v[(qmfs->v_index + 1) & 0x1];
-        qmfs->v_index = (qmfs->v_index + 1) & 0x1;
-
         /* calculate 64 samples */
         /* complex pre-twiddle */
         for (k = 0; k < 32; k++)
@@ -431,24 +389,29 @@
 
         for (n = 0; n < 32; n++)
         {
-            v0[n]    = -x1[n] + x2[n];
-            v1[31-n] =  x1[n] + x2[n];
+            qmfs->v[qmfs->v_index + n]      = qmfs->v[qmfs->v_index + 640 + n]      = -x1[n] + x2[n];
+            qmfs->v[qmfs->v_index + 63 - n] = qmfs->v[qmfs->v_index + 640 + 63 - n] =  x1[n] + x2[n];
         }
 
         /* calculate 32 output samples and window */
         for (k = 0; k < 32; k++)
         {
-            output[out++] = MUL_F(v0[k], qmf_c[2*k]) +
-                MUL_F(v0[32 + k], qmf_c[64 + 2*k]) +
-                MUL_F(v0[64 + k], qmf_c[128 + 2*k]) +
-                MUL_F(v0[96 + k], qmf_c[192 + 2*k]) +
-                MUL_F(v0[128 + k], qmf_c[256 + 2*k]) +
-                MUL_F(v0[160 + k], qmf_c[320 + 2*k]) +
-                MUL_F(v0[192 + k], qmf_c[384 + 2*k]) +
-                MUL_F(v0[224 + k], qmf_c[448 + 2*k]) +
-                MUL_F(v0[256 + k], qmf_c[512 + 2*k]) +
-                MUL_F(v0[288 + k], qmf_c[576 + 2*k]);
+            output[out++] = MUL_F(qmfs->v[qmfs->v_index + k], qmf_c[2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 96 + k], qmf_c[64 + 2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 128 + k], qmf_c[128 + 2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 224 + k], qmf_c[192 + 2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 256 + k], qmf_c[256 + 2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 352 + k], qmf_c[320 + 2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 384 + k], qmf_c[384 + 2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 480 + k], qmf_c[448 + 2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 512 + k], qmf_c[512 + 2*k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 608 + k], qmf_c[576 + 2*k]);
         }
+
+        /* update ringbuffer index */
+        qmfs->v_index -= 64;
+        if (qmfs->v_index < 0)
+            qmfs->v_index = (640 - 64);
     }
 }
 
@@ -466,16 +429,10 @@
     /* qmf subsample l */
     for (l = 0; l < sbr->numTimeSlotsRate; l++)
     {
-        real_t *v0, *v1;
+        /* shift buffer v */
+		/* buffer is not shifted, we use double ringbuffer */
+		//memmove(qmfs->v + 128, qmfs->v, (1280-128)*sizeof(real_t));
 
-        /* shift buffers */
-        memmove(qmfs->v[0] + 64, qmfs->v[0], (640-64)*sizeof(real_t));
-        memmove(qmfs->v[1] + 64, qmfs->v[1], (640-64)*sizeof(real_t));
-
-        v0 = qmfs->v[qmfs->v_index];
-        v1 = qmfs->v[(qmfs->v_index + 1) & 0x1];
-        qmfs->v_index = (qmfs->v_index + 1) & 0x1;
-
         /* calculate 128 samples */
 #ifndef FIXED_POINT
         x1[0] = scale*QMF_RE(X[l][0]);
@@ -510,150 +467,33 @@
 
         for (n = 0; n < 32; n++)
         {
-            v0[   2*n]   =  x2[2*n]   - x1[2*n];
-            v1[63-2*n]   =  x2[2*n]   + x1[2*n];
-            v0[   2*n+1] = -x2[2*n+1] - x1[2*n+1];
-            v1[62-2*n]   = -x2[2*n+1] + x1[2*n+1];
+            qmfs->v[qmfs->v_index + 2*n]       = qmfs->v[qmfs->v_index + 1280 + 2*n]       =  x2[2*n]   - x1[2*n];
+            qmfs->v[qmfs->v_index + 127 - 2*n] = qmfs->v[qmfs->v_index + 1280 + 127 - 2*n] =  x2[2*n]   + x1[2*n];
+            qmfs->v[qmfs->v_index + 2*n+1]     = qmfs->v[qmfs->v_index + 1280 + 2*n+1]     = -x2[2*n+1] - x1[2*n+1];
+            qmfs->v[qmfs->v_index + 126 - 2*n] = qmfs->v[qmfs->v_index + 1280 + 126 - 2*n] = -x2[2*n+1] + x1[2*n+1];
         }
 
         /* calculate 64 output samples and window */
         for (k = 0; k < 64; k++)
         {
-            output[out++] = MUL_F(v0[k], qmf_c[k]) +
-                MUL_F(v0[64 + k], qmf_c[64 + k]) +
-                MUL_F(v0[128 + k], qmf_c[128 + k]) +
-                MUL_F(v0[192 + k], qmf_c[192 + k]) +
-                MUL_F(v0[256 + k], qmf_c[256 + k]) +
-                MUL_F(v0[320 + k], qmf_c[320 + k]) +
-                MUL_F(v0[384 + k], qmf_c[384 + k]) +
-                MUL_F(v0[448 + k], qmf_c[448 + k]) +
-                MUL_F(v0[512 + k], qmf_c[512 + k]) +
-                MUL_F(v0[576 + k], qmf_c[576 + k]);
+            output[out++] = MUL_F(qmfs->v[qmfs->v_index + k], qmf_c[k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 192 + k], qmf_c[64 + k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 256 + k], qmf_c[128 + k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 256 + 192 + k], qmf_c[128 + 64 + k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 512 + k], qmf_c[256 + k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 512 + 192 + k], qmf_c[256 + 64 + k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 768 + k], qmf_c[384 + k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 768 + 192 + k], qmf_c[384 + 64 + k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 1024 + k], qmf_c[512 + k]) +
+                MUL_F(qmfs->v[qmfs->v_index + 1024 + 192 + k], qmf_c[512 + 64 + k]);
         }
-    }
-}
 
-#ifdef USE_SSE
-void memmove_sse_576(real_t *out, const real_t *in)
-{
-    __m128 m[144];
-    uint16_t i;
-
-    for (i = 0; i < 144; i++)
-    {
-        m[i] = _mm_load_ps(&in[i*4]);
+        /* update ringbuffer index */
+        qmfs->v_index -= 128;
+        if (qmfs->v_index < 0)
+            qmfs->v_index = (1280 - 128);
     }
-    for (i = 0; i < 144; i++)
-    {
-        _mm_store_ps(&out[i*4], m[i]);
-    }
 }
-
-void sbr_qmf_synthesis_64_sse(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64],
-                              real_t *output)
-{
-    ALIGN real_t x1[64], x2[64];
-    real_t scale = 1.f/64.f;
-    int16_t n, k, out = 0;
-    uint8_t l;
-
-
-    /* qmf subsample l */
-    for (l = 0; l < sbr->numTimeSlotsRate; l++)
-    {
-        real_t *v0, *v1;
-
-        /* shift buffers */
-        memmove_sse_576(qmfs->v[0] + 64, qmfs->v[0]);
-        memmove_sse_576(qmfs->v[1] + 64, qmfs->v[1]);
-
-        v0 = qmfs->v[qmfs->v_index];
-        v1 = qmfs->v[(qmfs->v_index + 1) & 0x1];
-        qmfs->v_index = (qmfs->v_index + 1) & 0x1;
-
-        /* calculate 128 samples */
-        x1[0] = scale*QMF_RE(X[l][0]);
-        x2[63] = scale*QMF_IM(X[l][0]);
-        for (k = 0; k < 31; k++)
-        {
-            x1[2*k+1] = scale*(QMF_RE(X[l][2*k+1]) - QMF_RE(X[l][2*k+2]));
-            x1[2*k+2] = scale*(QMF_RE(X[l][2*k+1]) + QMF_RE(X[l][2*k+2]));
-
-            x2[61 - 2*k] = scale*(QMF_IM(X[l][2*k+2]) - QMF_IM(X[l][2*k+1]));
-            x2[62 - 2*k] = scale*(QMF_IM(X[l][2*k+2]) + QMF_IM(X[l][2*k+1]));
-        }
-        x1[63] = scale*QMF_RE(X[l][63]);
-        x2[0] = scale*QMF_IM(X[l][63]);
-
-        DCT4_64_kernel(x1, x1);
-        DCT4_64_kernel(x2, x2);
-
-        for (n = 0; n < 32; n++)
-        {
-            v0[    2*n   ] =  x2[2*n]   - x1[2*n];
-            v1[63- 2*n   ] =  x2[2*n]   + x1[2*n];
-            v0[    2*n+1 ] = -x2[2*n+1] - x1[2*n+1];
-            v1[63-(2*n+1)] = -x2[2*n+1] + x1[2*n+1];
-        }
-
-        /* calculate 64 output samples and window */
-        for (k = 0; k < 64; k+=4)
-        {
-            __m128 m0, m1, m2, m3, m4, m5, m6, m7, m8, m9;
-            __m128 c0, c1, c2, c3, c4, c5, c6, c7, c8, c9;
-            __m128 s1, s2, s3, s4, s5, s6, s7, s8, s9;
-
-            m0 = _mm_load_ps(&v0[k]);
-            m1 = _mm_load_ps(&v0[k + 64]);
-            m2 = _mm_load_ps(&v0[k + 128]);
-            m3 = _mm_load_ps(&v0[k + 192]);
-            m4 = _mm_load_ps(&v0[k + 256]);
-            c0 = _mm_load_ps(&qmf_c[k]);
-            c1 = _mm_load_ps(&qmf_c[k + 64]);
-            c2 = _mm_load_ps(&qmf_c[k + 128]);
-            c3 = _mm_load_ps(&qmf_c[k + 192]);
-            c4 = _mm_load_ps(&qmf_c[k + 256]);
-
-            m0 = _mm_mul_ps(m0, c0);
-            m1 = _mm_mul_ps(m1, c1);
-            m2 = _mm_mul_ps(m2, c2);
-            m3 = _mm_mul_ps(m3, c3);
-            m4 = _mm_mul_ps(m4, c4);
-
-            s1 = _mm_add_ps(m0, m1);
-            s2 = _mm_add_ps(m2, m3);
-            s6 = _mm_add_ps(s1, s2);
-
-            m5 = _mm_load_ps(&v0[k + 320]);
-            m6 = _mm_load_ps(&v0[k + 384]);
-            m7 = _mm_load_ps(&v0[k + 448]);
-            m8 = _mm_load_ps(&v0[k + 512]);
-            m9 = _mm_load_ps(&v0[k + 576]);
-            c5 = _mm_load_ps(&qmf_c[k + 320]);
-            c6 = _mm_load_ps(&qmf_c[k + 384]);
-            c7 = _mm_load_ps(&qmf_c[k + 448]);
-            c8 = _mm_load_ps(&qmf_c[k + 512]);
-            c9 = _mm_load_ps(&qmf_c[k + 576]);
-
-            m5 = _mm_mul_ps(m5, c5);
-            m6 = _mm_mul_ps(m6, c6);
-            m7 = _mm_mul_ps(m7, c7);
-            m8 = _mm_mul_ps(m8, c8);
-            m9 = _mm_mul_ps(m9, c9);
-
-            s3 = _mm_add_ps(m4, m5);
-            s4 = _mm_add_ps(m6, m7);
-            s5 = _mm_add_ps(m8, m9);
-            s7 = _mm_add_ps(s3, s4);
-            s8 = _mm_add_ps(s5, s6);
-            s9 = _mm_add_ps(s7, s8);
-
-            _mm_store_ps(&output[out], s9);
-            out += 4;
-        }
-    }
-}
-#endif
 #endif
 
 #endif
--- a/libfaad/structs.h
+++ b/libfaad/structs.h
@@ -1,19 +1,19 @@
 /*
 ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding
 ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com
-**  
+**
 ** This program is free software; you can redistribute it and/or modify
 ** it under the terms of the GNU General Public License as published by
 ** the Free Software Foundation; either version 2 of the License, or
 ** (at your option) any later version.
-** 
+**
 ** This program is distributed in the hope that it will be useful,
 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 ** GNU General Public License for more details.
-** 
+**
 ** You should have received a copy of the GNU General Public License
-** along with this program; if not, write to the Free Software 
+** along with this program; if not, write to the Free Software
 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 **
 ** Any non-GPL usage of this software or parts of this software is strictly
@@ -22,7 +22,7 @@
 ** Commercial non-GPL licensing of this software is possible.
 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
 **
-** $Id: structs.h,v 1.36 2004/03/11 11:40:13 menno Exp $
+** $Id: structs.h,v 1.37 2004/03/27 11:14:49 menno Exp $
 **/
 
 #ifndef __STRUCTS_H__
@@ -305,59 +305,59 @@
 typedef struct mp4AudioSpecificConfig
 {
     /* Audio Specific Info */
-    uint8_t objectTypeIndex;
-    uint8_t samplingFrequencyIndex;
-    uint32_t samplingFrequency;
-    uint8_t channelsConfiguration;
+    /*uint8_t*/ unsigned char objectTypeIndex;
+    /*uint8_t*/ unsigned char samplingFrequencyIndex;
+    /*uint32_t*/ unsigned long samplingFrequency;
+    /*uint8_t*/ unsigned char channelsConfiguration;
 
     /* GA Specific Info */
-    uint8_t frameLengthFlag;
-    uint8_t dependsOnCoreCoder;
-    uint16_t coreCoderDelay;
-    uint8_t extensionFlag;
-    uint8_t aacSectionDataResilienceFlag;
-    uint8_t aacScalefactorDataResilienceFlag;
-    uint8_t aacSpectralDataResilienceFlag;
-    uint8_t epConfig;
+    /*uint8_t*/ unsigned char frameLengthFlag;
+    /*uint8_t*/ unsigned char dependsOnCoreCoder;
+    /*uint16_t*/ unsigned short coreCoderDelay;
+    /*uint8_t*/ unsigned char extensionFlag;
+    /*uint8_t*/ unsigned char aacSectionDataResilienceFlag;
+    /*uint8_t*/ unsigned char aacScalefactorDataResilienceFlag;
+    /*uint8_t*/ unsigned char aacSpectralDataResilienceFlag;
+    /*uint8_t*/ unsigned char epConfig;
 
-    int8_t sbr_present_flag;
-    int8_t forceUpSampling;
-    int8_t downSampledSBR;
+    /*uint8_t*/ char sbr_present_flag;
+    /*uint8_t*/ char forceUpSampling;
+    /*uint8_t*/ char downSampledSBR;
 } mp4AudioSpecificConfig;
 
 typedef struct NeAACDecConfiguration
 {
-    uint8_t defObjectType;
-    uint32_t defSampleRate;
-    uint8_t outputFormat;
-    uint8_t downMatrix;
-    uint8_t useOldADTSFormat;
-    uint8_t dontUpSampleImplicitSBR;
+    /*uint8_t*/ unsigned char defObjectType;
+    /*uint32_t*/ unsigned long defSampleRate;
+    /*uint8_t*/ unsigned char outputFormat;
+    /*uint8_t*/ unsigned char downMatrix;
+    /*uint8_t*/ unsigned char useOldADTSFormat;
+    /*uint8_t*/ unsigned char dontUpSampleImplicitSBR;
 } NeAACDecConfiguration, *NeAACDecConfigurationPtr;
 
 typedef struct NeAACDecFrameInfo
 {
-    uint32_t bytesconsumed;
-    uint32_t samples;
-    uint8_t channels;
-    uint8_t error;
-    uint32_t samplerate;
+    /*uint32_t*/ unsigned long bytesconsumed;
+    /*uint32_t*/ unsigned long samples;
+    /*uint8_t*/ unsigned char channels;
+    /*uint8_t*/ unsigned char error;
+    /*uint32_t*/ unsigned long samplerate;
 
     /* SBR: 0: off, 1: on; normal, 2: on; downsampled */
-    uint8_t sbr;
+    /*uint8_t*/ unsigned char sbr;
 
     /* MPEG-4 ObjectType */
-    uint8_t object_type;
+    /*uint8_t*/ unsigned char object_type;
 
     /* AAC header type; MP4 will be signalled as RAW also */
-    uint8_t header_type;
+    /*uint8_t*/ unsigned char header_type;
 
     /* multichannel configuration */
-    uint8_t num_front_channels;
-    uint8_t num_side_channels;
-    uint8_t num_back_channels;
-    uint8_t num_lfe_channels;
-    uint8_t channel_position[MAX_CHANNELS];
+    /*uint8_t*/ unsigned char num_front_channels;
+    /*uint8_t*/ unsigned char num_side_channels;
+    /*uint8_t*/ unsigned char num_back_channels;
+    /*uint8_t*/ unsigned char num_lfe_channels;
+    /*uint8_t*/ unsigned char channel_position[MAX_CHANNELS];
 } NeAACDecFrameInfo;
 
 typedef struct
--- a/libfaad/tns.c
+++ b/libfaad/tns.c
@@ -22,7 +22,7 @@
 ** Commercial non-GPL licensing of this software is possible.
 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
 **
-** $Id: tns.c,v 1.30 2004/02/26 09:29:28 menno Exp $
+** $Id: tns.c,v 1.32 2004/04/12 18:17:42 menno Exp $
 **/
 
 #include "common.h"
@@ -239,22 +239,24 @@
 
     uint8_t j;
     uint16_t i;
-    real_t y, state[TNS_MAX_ORDER];
+    real_t y;
+    /* state is stored as a double ringbuffer */
+    real_t state[2*TNS_MAX_ORDER] = {0};
+    int8_t state_index = 0;
 
-    for (i = 0; i < order; i++)
-        state[i] = 0;
-
     for (i = 0; i < size; i++)
     {
         y = *spectrum;
 
         for (j = 0; j < order; j++)
-            y -= MUL_C(state[j], lpc[j+1]);
+            y -= MUL_C(state[state_index+j], lpc[j+1]);
 
-        for (j = order-1; j > 0; j--)
-            state[j] = state[j-1];
+        /* double ringbuffer state */
+        state_index--;
+        if (state_index < 0)
+            state_index = order-1;
+        state[state_index] = state[state_index + order] = y;
 
-        state[0] = y;
         *spectrum = y;
         spectrum += inc;
     }
@@ -274,11 +276,11 @@
 
     uint8_t j;
     uint16_t i;
-    real_t y, state[TNS_MAX_ORDER];
+    real_t y;
+    /* state is stored as a double ringbuffer */
+    real_t state[2*TNS_MAX_ORDER] = {0};
+    int8_t state_index = 0;
 
-    for (i = 0; i < order; i++)
-        state[i] = REAL_CONST(0.0);
-
     for (i = 0; i < size; i++)
     {
         y = *spectrum;
@@ -286,10 +288,12 @@
         for (j = 0; j < order; j++)
             y += MUL_C(state[j], lpc[j+1]);
 
-        for (j = order-1; j > 0; j--)
-            state[j] = state[j-1];
+        /* double ringbuffer state */
+        state_index--;
+        if (state_index < 0)
+            state_index = order-1;
+        state[state_index] = state[state_index + order] = *spectrum;
 
-        state[0] = *spectrum;
         *spectrum = y;
         spectrum += inc;
     }