shithub: opus-tools

Download patch

ref: 1d5431d2d8a19e1f733799598a235c8573bb3c1d
parent: 62534c9b2303facfc227a9a1ffc80d35ed6c12f6
author: Mark Harris <mark.hsj@gmail.com>
date: Wed Feb 7 16:16:27 EST 2018

opusdec: Update resampler from speexdsp

Fix use of reserved identifiers, eliminate compiler warnings.

--- a/Makefile.am
+++ b/Makefile.am
@@ -49,7 +49,7 @@
 
 dist_man_MANS = man/opusenc.1 man/opusdec.1 man/opusinfo.1
 
-resampler_CPPFLAGS = -DSPX_RESAMPLE_EXPORT= -DRANDOM_PREFIX=opustools -DOUTSIDE_SPEEX -DFLOATING_POINT
+resampler_CPPFLAGS = -DRANDOM_PREFIX=opustools -DOUTSIDE_SPEEX -DRESAMPLE_FULL_SINC_TABLE
 
 opusenc_SOURCES = src/opus_header.c src/opusenc.c src/picture.c src/audio-in.c src/diag_range.c src/flac.c win32/unicode_support.c
 opusenc_CPPFLAGS = $(AM_CPPFLAGS)
--- a/Makefile.unix
+++ b/Makefile.unix
@@ -30,7 +30,7 @@
 $(VERSIONED_OBJS): CFLAGS += -DPACKAGE_NAME='"opus-tools"' -DPACKAGE_VERSION='$(PACKAGE_VERSION)'
 $(VERSIONED_OBJS): package_version
 
-RESAMPLER_CPPFLAGS = -DSPX_RESAMPLE_EXPORT= -DRANDOM_PREFIX=opustools -DOUTSIDE_SPEEX -DFLOATING_POINT
+RESAMPLER_CPPFLAGS = -DRANDOM_PREFIX=opustools -DOUTSIDE_SPEEX -DRESAMPLE_FULL_SINC_TABLE
 
 src/opusdec.o src/resample.o src/audio-in.o: CFLAGS += $(RESAMPLER_CPPFLAGS)
 
--- a/configure.ac
+++ b/configure.ac
@@ -304,8 +304,8 @@
 CFLAGS="$CFLAGS -W"
 
 saved_CFLAGS="$CFLAGS"
-CFLAGS="$CFLAGS -Wall -Wextra -Wcast-align -Wnested-externs -Wshadow -Wstrict-prototypes"
-AC_MSG_CHECKING([if ${CC} supports -Wall -Wextra -Wcast-align -Wnested-externs -Wshadow -Wstrict-prototypes])
+CFLAGS="$CFLAGS -Wall -Wextra -Wcast-align -Wnested-externs -Wshadow -Wstrict-prototypes -Wno-sign-compare"
+AC_MSG_CHECKING([if ${CC} supports -Wall -Wextra -Wcast-align -Wnested-externs -Wshadow -Wstrict-prototypes -Wno-sign-compare])
 AC_COMPILE_IFELSE([AC_LANG_SOURCE([[char foo;]])],
     [ AC_MSG_RESULT([yes]) ],
     [ AC_MSG_RESULT([no])
--- a/src/arch.h
+++ b/src/arch.h
@@ -7,18 +7,18 @@
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions
    are met:
-   
+
    - Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
-   
+
    - Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.
-   
+
    - Neither the name of the Xiph.org Foundation nor the names of its
    contributors may be used to endorse or promote products derived from
    this software without specific prior written permission.
-   
+
    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,47 +35,23 @@
 #ifndef ARCH_H
 #define ARCH_H
 
-#ifndef SPEEX_VERSION
-#define SPEEX_MAJOR_VERSION 1         /**< Major Speex version. */
-#define SPEEX_MINOR_VERSION 1         /**< Minor Speex version. */
-#define SPEEX_MICRO_VERSION 15        /**< Micro Speex version. */
-#define SPEEX_EXTRA_VERSION ""        /**< Extra Speex version. */
-#define SPEEX_VERSION "speex-1.2beta3"  /**< Speex version string. */
-#endif
-
 /* A couple test to catch stupid option combinations */
 #ifdef FIXED_POINT
 
-#ifdef FLOATING_POINT
-#error You cannot compile as floating point and fixed point at the same time
-#endif
-#if defined(__SSE__)
-#error SSE is only for floating-point
-#endif
 #if ((defined (ARM4_ASM)||defined (ARM4_ASM)) && defined(BFIN_ASM)) || (defined (ARM4_ASM)&&defined(ARM5E_ASM))
 #error Make up your mind. What CPU do you have?
 #endif
-#ifdef VORBIS_PSYCHO
-#error Vorbis-psy model currently not implemented in fixed-point
-#endif
 
 #else
 
-#ifndef FLOATING_POINT
-#error You now need to define either FIXED_POINT or FLOATING_POINT
-#endif
 #if defined (ARM4_ASM) || defined(ARM5E_ASM) || defined(BFIN_ASM)
 #error I suppose you can have a [ARM4/ARM5E/Blackfin] that has float instructions?
 #endif
-#ifdef FIXED_POINT_DEBUG
-#error "Don't you think enabling fixed-point is a good thing to do if you want to debug that?"
-#endif
 
-
 #endif
 
 #ifndef OUTSIDE_SPEEX
-#include "../include/speex/speex_types.h"
+#include "speex/speexdsp_types.h"
 #endif
 
 #define ABS(x) ((x) < 0 ? (-(x)) : (x))      /**< Absolute integer value. */
@@ -109,6 +85,8 @@
 #define SIG_SHIFT    14
 #define GAIN_SHIFT   6
 
+#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
+
 #define VERY_SMALL 0
 #define VERY_LARGE32 ((spx_word32_t)2147483647)
 #define VERY_LARGE16 ((spx_word16_t)32767)
@@ -171,6 +149,7 @@
 #define VSHR32(a,shift) (a)
 #define SATURATE16(x,a) (x)
 #define SATURATE32(x,a) (x)
+#define SATURATE32PSHR(x,shift,a) (x)
 
 #define PSHR(a,shift)       (a)
 #define SHR(a,shift)       (a)
@@ -210,7 +189,8 @@
 #define DIV32(a,b)     (((spx_word32_t)(a))/(spx_word32_t)(b))
 #define PDIV32(a,b)     (((spx_word32_t)(a))/(spx_word32_t)(b))
 
-
+#define WORD2INT(x) ((x) < -32767.5f ? -32768 : \
+                    ((x) > 32766.5f ? 32767 : (spx_int16_t)floor(.5 + (x))))
 #endif
 
 
@@ -217,11 +197,11 @@
 #if defined (CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)
 
 /* 2 on TI C5x DSP */
-#define BYTES_PER_CHAR 2 
+#define BYTES_PER_CHAR 2
 #define BITS_PER_CHAR 16
 #define LOG2_BITS_PER_CHAR 4
 
-#else 
+#else
 
 #define BYTES_PER_CHAR 1
 #define BITS_PER_CHAR 8
--- a/src/resample.c
+++ b/src/resample.c
@@ -1,6 +1,6 @@
 /* Copyright (C) 2007-2008 Jean-Marc Valin
    Copyright (C) 2008      Thorvald Natvig
-      
+
    File: resample.c
    Arbitrary resampling code
 
@@ -38,41 +38,40 @@
       - Low memory requirement
       - Good *perceptual* quality (and not best SNR)
 
-   Warning: This resampler is relatively new. Although I think I got rid of 
+   Warning: This resampler is relatively new. Although I think I got rid of
    all the major bugs and I don't expect the API to change anymore, there
    may be something I've missed. So use with caution.
 
    This algorithm is based on this original resampling algorithm:
    Smith, Julius O. Digital Audio Resampling Home Page
-   Center for Computer Research in Music and Acoustics (CCRMA), 
+   Center for Computer Research in Music and Acoustics (CCRMA),
    Stanford University, 2007.
-   Web published at http://www-ccrma.stanford.edu/~jos/resample/.
+   Web published at https://ccrma.stanford.edu/~jos/resample/.
 
-   There is one main difference, though. This resampler uses cubic 
+   There is one main difference, though. This resampler uses cubic
    interpolation instead of linear interpolation in the above paper. This
    makes the table much smaller and makes it possible to compute that table
-   on a per-stream basis. In turn, being able to tweak the table for each 
-   stream makes it possible to both reduce complexity on simple ratios 
-   (e.g. 2/3), and get rid of the rounding operations in the inner loop. 
+   on a per-stream basis. In turn, being able to tweak the table for each
+   stream makes it possible to both reduce complexity on simple ratios
+   (e.g. 2/3), and get rid of the rounding operations in the inner loop.
    The latter both reduces CPU time and makes the algorithm more SIMD-friendly.
 */
 
 #ifdef HAVE_CONFIG_H
-# include "config.h"
+#include "config.h"
 #endif
 
-#define RESAMPLE_HUGEMEM 1
-
 #ifdef OUTSIDE_SPEEX
 #include <stdlib.h>
 static void *speex_alloc (int size) {return calloc(size,1);}
 static void *speex_realloc (void *ptr, int size) {return realloc(ptr, size);}
 static void speex_free (void *ptr) {free(ptr);}
+#define EXPORT
 #include "speex_resampler.h"
 #include "arch.h"
 #else /* OUTSIDE_SPEEX */
-               
-#include "../include/speex/speex_resampler.h"
+
+#include "speex/speex_resampler.h"
 #include "arch.h"
 #include "os_support.h"
 #endif /* OUTSIDE_SPEEX */
@@ -79,17 +78,12 @@
 
 #include "stack_alloc.h"
 #include <math.h>
+#include <limits.h>
 
 #ifndef M_PI
-#define M_PI 3.14159263
+#define M_PI 3.14159265358979323846
 #endif
 
-#ifdef FIXED_POINT
-#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))  
-#else
-#define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))  
-#endif
-               
 #define IMAX(a,b) ((a) > (b) ? (a) : (b))
 #define IMIN(a,b) ((a) < (b) ? (a) : (b))
 
@@ -97,10 +91,18 @@
 #define NULL 0
 #endif
 
-#if defined(FLOATING_POINT) && defined(__SSE__)
-# include "resample_sse.h"
+#ifndef UINT32_MAX
+#define UINT32_MAX 4294967296U
 #endif
 
+#if defined(__SSE__) && !defined(FIXED_POINT)
+#include "resample_sse.h"
+#endif
+
+#ifdef USE_NEON
+#include "resample_neon.h"
+#endif
+
 /* Numer of elements to allocate on the stack */
 #ifdef VAR_ARRAYS
 #define FIXED_STACK_ALLOC 8192
@@ -115,7 +117,7 @@
    spx_uint32_t out_rate;
    spx_uint32_t num_rate;
    spx_uint32_t den_rate;
-   
+
    int    quality;
    spx_uint32_t nb_channels;
    spx_uint32_t filt_len;
@@ -127,17 +129,17 @@
    spx_uint32_t oversample;
    int          initialised;
    int          started;
-   
+
    /* These are per-channel */
    spx_int32_t  *last_sample;
    spx_uint32_t *samp_frac_num;
    spx_uint32_t *magic_samples;
-   
+
    spx_word16_t *mem;
    spx_word16_t *sinc_table;
    spx_uint32_t sinc_table_length;
    resampler_basic_func resampler_ptr;
-         
+
    int    in_stride;
    int    out_stride;
 } ;
@@ -179,7 +181,7 @@
    0.32108304, 0.27619388, 0.23465776, 0.19672670, 0.16255380, 0.13219758,
    0.10562887, 0.08273982, 0.06335451, 0.04724088, 0.03412321, 0.02369490,
    0.01563093, 0.00959968, 0.00527363, 0.00233883, 0.00050000, 0.00000000};
-   
+
 static const double kaiser6_table[36] = {
    0.99733006, 1.00000000, 0.99733006, 0.98935595, 0.97618418, 0.95799003,
    0.93501423, 0.90755855, 0.87598009, 0.84068475, 0.80211977, 0.76076565,
@@ -192,18 +194,16 @@
    const double *table;
    int oversample;
 };
-      
-static const struct FuncDef _KAISER12 = {kaiser12_table, 64};
-#define KAISER12 (&_KAISER12)
-/*static struct FuncDef _KAISER12 = {kaiser12_table, 32};
-#define KAISER12 (&_KAISER12)*/
-static const struct FuncDef _KAISER10 = {kaiser10_table, 32};
-#define KAISER10 (&_KAISER10)
-static const struct FuncDef _KAISER8 = {kaiser8_table, 32};
-#define KAISER8 (&_KAISER8)
-static const struct FuncDef _KAISER6 = {kaiser6_table, 32};
-#define KAISER6 (&_KAISER6)
 
+static const struct FuncDef kaiser12_funcdef = {kaiser12_table, 64};
+#define KAISER12 (&kaiser12_funcdef)
+static const struct FuncDef kaiser10_funcdef = {kaiser10_table, 32};
+#define KAISER10 (&kaiser10_funcdef)
+static const struct FuncDef kaiser8_funcdef = {kaiser8_table, 32};
+#define KAISER8 (&kaiser8_funcdef)
+static const struct FuncDef kaiser6_funcdef = {kaiser6_table, 32};
+#define KAISER6 (&kaiser6_funcdef)
+
 struct QualityMapping {
    int base_length;
    int oversample;
@@ -214,7 +214,7 @@
 
 
 /* This table maps conversion quality to internal parameters. There are two
-   reasons that explain why the up-sampling bandwidth is larger than the 
+   reasons that explain why the up-sampling bandwidth is larger than the
    down-sampling bandwidth:
    1) When up-sampling, we can assume that the spectrum is already attenuated
       close to the Nyquist rate (from an A/D or a previous resampling filter)
@@ -240,7 +240,7 @@
 {
    float y, frac;
    double interp[4];
-   int ind; 
+   int ind;
    y = x*func->oversample;
    ind = (int)floor(y);
    frac = (y-ind);
@@ -251,7 +251,7 @@
    interp[0] = -0.3333333333*frac + 0.5*(frac*frac) - 0.1666666667*(frac*frac*frac);
    /* Just to make sure we don't have rounding problems */
    interp[1] = 1.f-interp[3]-interp[2]-interp[0];
-   
+
    /*sum = frac*accum[1] + (1-frac)*accum[2];*/
    return interp[0]*func->table[ind] + interp[1]*func->table[ind+1] + interp[2]*func->table[ind+2] + interp[3]*func->table[ind+3];
 }
@@ -362,11 +362,12 @@
       }
       sum = accum[0] + accum[1] + accum[2] + accum[3];
 */
+      sum = SATURATE32PSHR(sum, 15, 32767);
 #else
       sum = inner_product_single(sinct, iptr, N);
 #endif
 
-      out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 15), 32767);
+      out[out_stride * out_sample++] = sum;
       last_sample += int_advance;
       samp_frac_num += frac_advance;
       if (samp_frac_num >= den_rate)
@@ -472,12 +473,13 @@
 
       cubic_coef(frac, interp);
       sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
+      sum = SATURATE32PSHR(sum, 15, 32767);
 #else
       cubic_coef(frac, interp);
       sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
 #endif
-      
-      out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 14), 32767);
+
+      out[out_stride * out_sample++] = sum;
       last_sample += int_advance;
       samp_frac_num += frac_advance;
       if (samp_frac_num >= den_rate)
@@ -538,7 +540,7 @@
       cubic_coef(frac, interp);
       sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
 #endif
-      
+
       out[out_stride * out_sample++] = PSHR32(sum,15);
       last_sample += int_advance;
       samp_frac_num += frac_advance;
@@ -555,21 +557,70 @@
 }
 #endif
 
-static void update_filter(SpeexResamplerState *st)
+/* This resampler is used to produce zero output in situations where memory
+   for the filter could not be allocated.  The expected numbers of input and
+   output samples are still processed so that callers failing to check error
+   codes are not surprised, possibly getting into infinite loops. */
+static int resampler_basic_zero(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len)
 {
-   spx_uint32_t old_length;
-   
-   old_length = st->filt_len;
+   int out_sample = 0;
+   int last_sample = st->last_sample[channel_index];
+   spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
+   const int out_stride = st->out_stride;
+   const int int_advance = st->int_advance;
+   const int frac_advance = st->frac_advance;
+   const spx_uint32_t den_rate = st->den_rate;
+
+   (void)in;
+   while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
+   {
+      out[out_stride * out_sample++] = 0;
+      last_sample += int_advance;
+      samp_frac_num += frac_advance;
+      if (samp_frac_num >= den_rate)
+      {
+         samp_frac_num -= den_rate;
+         last_sample++;
+      }
+   }
+
+   st->last_sample[channel_index] = last_sample;
+   st->samp_frac_num[channel_index] = samp_frac_num;
+   return out_sample;
+}
+
+static int multiply_frac(spx_uint32_t *result, spx_uint32_t value, spx_uint32_t num, spx_uint32_t den)
+{
+   spx_uint32_t major = value / den;
+   spx_uint32_t remain = value % den;
+   /* TODO: Could use 64 bits operation to check for overflow. But only guaranteed in C99+ */
+   if (remain > UINT32_MAX / num || major > UINT32_MAX / num
+       || major * num > UINT32_MAX - remain * num / den)
+      return RESAMPLER_ERR_OVERFLOW;
+   *result = remain * num / den + major * num;
+   return RESAMPLER_ERR_SUCCESS;
+}
+
+static int update_filter(SpeexResamplerState *st)
+{
+   spx_uint32_t old_length = st->filt_len;
+   spx_uint32_t old_alloc_size = st->mem_alloc_size;
+   int use_direct;
+   spx_uint32_t min_sinc_table_length;
+   spx_uint32_t min_alloc_size;
+
+   st->int_advance = st->num_rate/st->den_rate;
+   st->frac_advance = st->num_rate%st->den_rate;
    st->oversample = quality_map[st->quality].oversample;
    st->filt_len = quality_map[st->quality].base_length;
-   
+
    if (st->num_rate > st->den_rate)
    {
       /* down-sampling */
       st->cutoff = quality_map[st->quality].downsample_bandwidth * st->den_rate / st->num_rate;
-      /* FIXME: divide the numerator and denominator by a certain amount if they're too large */
-      st->filt_len = st->filt_len*st->num_rate / st->den_rate;
-      /* Round up to make sure we have a multiple of 8 */
+      if (multiply_frac(&st->filt_len,st->filt_len,st->num_rate,st->den_rate) != RESAMPLER_ERR_SUCCESS)
+         goto fail;
+      /* Round up to make sure we have a multiple of 8 for SSE */
       st->filt_len = ((st->filt_len-1)&(~0x7))+8;
       if (2*st->den_rate < st->num_rate)
          st->oversample >>= 1;
@@ -586,21 +637,36 @@
       st->cutoff = quality_map[st->quality].upsample_bandwidth;
    }
 
-#ifdef RESAMPLE_HUGEMEM
-   if (st->den_rate <= 16*(st->oversample+8))
+#ifdef RESAMPLE_FULL_SINC_TABLE
+   use_direct = 1;
+   if (INT_MAX/sizeof(spx_word16_t)/st->den_rate < st->filt_len)
+      goto fail;
 #else
    /* Choose the resampling type that requires the least amount of memory */
-   if (st->den_rate <= (st->oversample+8))
+   use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
+                && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
 #endif
+   if (use_direct)
    {
+      min_sinc_table_length = st->filt_len*st->den_rate;
+   } else {
+      if ((INT_MAX/sizeof(spx_word16_t)-8)/st->oversample < st->filt_len)
+         goto fail;
+
+      min_sinc_table_length = st->filt_len*st->oversample+8;
+   }
+   if (st->sinc_table_length < min_sinc_table_length)
+   {
+      spx_word16_t *sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,min_sinc_table_length*sizeof(spx_word16_t));
+      if (!sinc_table)
+         goto fail;
+
+      st->sinc_table = sinc_table;
+      st->sinc_table_length = min_sinc_table_length;
+   }
+   if (use_direct)
+   {
       spx_uint32_t i;
-      if (!st->sinc_table)
-         st->sinc_table = (spx_word16_t *)speex_alloc(st->filt_len*st->den_rate*sizeof(spx_word16_t));
-      else if (st->sinc_table_length < st->filt_len*st->den_rate)
-      {
-         st->sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,st->filt_len*st->den_rate*sizeof(spx_word16_t));
-         st->sinc_table_length = st->filt_len*st->den_rate;
-      }
       for (i=0;i<st->den_rate;i++)
       {
          spx_int32_t j;
@@ -620,13 +686,6 @@
       /*fprintf (stderr, "resampler uses direct sinc table and normalised cutoff %f\n", cutoff);*/
    } else {
       spx_int32_t i;
-      if (!st->sinc_table)
-         st->sinc_table = (spx_word16_t *)speex_alloc((st->filt_len*st->oversample+8)*sizeof(spx_word16_t));
-      else if (st->sinc_table_length < st->filt_len*st->oversample+8)
-      {
-         st->sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,(st->filt_len*st->oversample+8)*sizeof(spx_word16_t));
-         st->sinc_table_length = st->filt_len*st->oversample+8;
-      }
       for (i=-4;i<(spx_int32_t)(st->oversample*st->filt_len+4);i++)
          st->sinc_table[i+4] = sinc(st->cutoff,(i/(float)st->oversample - st->filt_len/2), st->filt_len, quality_map[st->quality].window_func);
 #ifdef FIXED_POINT
@@ -639,51 +698,47 @@
 #endif
       /*fprintf (stderr, "resampler uses interpolated sinc table and normalised cutoff %f\n", cutoff);*/
    }
-   st->int_advance = st->num_rate/st->den_rate;
-   st->frac_advance = st->num_rate%st->den_rate;
 
-   
    /* Here's the place where we update the filter memory to take into account
       the change in filter length. It's probably the messiest part of the code
       due to handling of lots of corner cases. */
-   if (!st->mem)
+
+   /* Adding buffer_size to filt_len won't overflow here because filt_len
+      could be multiplied by sizeof(spx_word16_t) above. */
+   min_alloc_size = st->filt_len-1 + st->buffer_size;
+   if (min_alloc_size > st->mem_alloc_size)
    {
-      spx_uint32_t i;
-      st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
-      st->mem = (spx_word16_t*)speex_alloc(st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
-      for (i=0;i<st->nb_channels*st->mem_alloc_size;i++)
-         st->mem[i] = 0;
-      /*speex_warning("init filter");*/
-   } else if (!st->started)
+      spx_word16_t *mem;
+      if (INT_MAX/sizeof(spx_word16_t)/st->nb_channels < min_alloc_size)
+          goto fail;
+      else if (!(mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*min_alloc_size * sizeof(*mem))))
+          goto fail;
+
+      st->mem = mem;
+      st->mem_alloc_size = min_alloc_size;
+   }
+   if (!st->started)
    {
       spx_uint32_t i;
-      st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
-      st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
       for (i=0;i<st->nb_channels*st->mem_alloc_size;i++)
          st->mem[i] = 0;
       /*speex_warning("reinit filter");*/
    } else if (st->filt_len > old_length)
    {
-      spx_int32_t i;
+      spx_uint32_t i;
       /* Increase the filter length */
       /*speex_warning("increase filter size");*/
-      int old_alloc_size = st->mem_alloc_size;
-      if ((st->filt_len-1 + st->buffer_size) > st->mem_alloc_size)
+      for (i=st->nb_channels;i--;)
       {
-         st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
-         st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
-      }
-      for (i=st->nb_channels-1;i>=0;i--)
-      {
-         spx_int32_t j;
+         spx_uint32_t j;
          spx_uint32_t olen = old_length;
          /*if (st->magic_samples[i])*/
          {
             /* Try and remove the magic samples as if nothing had happened */
-            
+
             /* FIXME: This is wrong but for now we need it to avoid going over the array bounds */
             olen = old_length + 2*st->magic_samples[i];
-            for (j=old_length-2+st->magic_samples[i];j>=0;j--)
+            for (j=old_length-1+st->magic_samples[i];j--;)
                st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]] = st->mem[i*old_alloc_size+j];
             for (j=0;j<st->magic_samples[i];j++)
                st->mem[i*st->mem_alloc_size+j] = 0;
@@ -724,19 +779,28 @@
          st->magic_samples[i] += old_magic;
       }
    }
+   return RESAMPLER_ERR_SUCCESS;
 
+fail:
+   st->resampler_ptr = resampler_basic_zero;
+   /* st->mem may still contain consumed input samples for the filter.
+      Restore filt_len so that filt_len - 1 still points to the position after
+      the last of these samples. */
+   st->filt_len = old_length;
+   return RESAMPLER_ERR_ALLOC_FAILED;
 }
 
-SPX_RESAMPLE_EXPORT SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err)
+EXPORT SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err)
 {
    return speex_resampler_init_frac(nb_channels, in_rate, out_rate, in_rate, out_rate, quality, err);
 }
 
-SPX_RESAMPLE_EXPORT SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err)
+EXPORT SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err)
 {
-   spx_uint32_t i;
    SpeexResamplerState *st;
-   if (quality > 10 || quality < 0)
+   int filter_err;
+
+   if (nb_channels == 0 || ratio_num == 0 || ratio_den == 0 || quality > 10 || quality < 0)
    {
       if (err)
          *err = RESAMPLER_ERR_INVALID_ARG;
@@ -743,6 +807,12 @@
       return NULL;
    }
    st = (SpeexResamplerState *)speex_alloc(sizeof(SpeexResamplerState));
+   if (!st)
+   {
+      if (err)
+         *err = RESAMPLER_ERR_ALLOC_FAILED;
+      return NULL;
+   }
    st->initialised = 0;
    st->started = 0;
    st->in_rate = 0;
@@ -755,43 +825,46 @@
    st->filt_len = 0;
    st->mem = 0;
    st->resampler_ptr = 0;
-         
+
    st->cutoff = 1.f;
    st->nb_channels = nb_channels;
    st->in_stride = 1;
    st->out_stride = 1;
-   
-#ifdef FIXED_POINT
+
    st->buffer_size = 160;
-#else
-   st->buffer_size = 160;
-#endif
-   
+
    /* Per channel data */
-   st->last_sample = (spx_int32_t*)speex_alloc(nb_channels*sizeof(spx_int32_t));
-   st->magic_samples = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t));
-   st->samp_frac_num = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t));
-   for (i=0;i<nb_channels;i++)
-   {
-      st->last_sample[i] = 0;
-      st->magic_samples[i] = 0;
-      st->samp_frac_num[i] = 0;
-   }
+   if (!(st->last_sample = (spx_int32_t*)speex_alloc(nb_channels*sizeof(spx_int32_t))))
+      goto fail;
+   if (!(st->magic_samples = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t))))
+      goto fail;
+   if (!(st->samp_frac_num = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t))))
+      goto fail;
 
    speex_resampler_set_quality(st, quality);
    speex_resampler_set_rate_frac(st, ratio_num, ratio_den, in_rate, out_rate);
 
-   
-   update_filter(st);
-   
-   st->initialised = 1;
+   filter_err = update_filter(st);
+   if (filter_err == RESAMPLER_ERR_SUCCESS)
+   {
+      st->initialised = 1;
+   } else {
+      speex_resampler_destroy(st);
+      st = NULL;
+   }
    if (err)
-      *err = RESAMPLER_ERR_SUCCESS;
+      *err = filter_err;
 
    return st;
+
+fail:
+   if (err)
+      *err = RESAMPLER_ERR_ALLOC_FAILED;
+   speex_resampler_destroy(st);
+   return NULL;
 }
 
-SPX_RESAMPLE_EXPORT void speex_resampler_destroy(SpeexResamplerState *st)
+EXPORT void speex_resampler_destroy(SpeexResamplerState *st)
 {
    speex_free(st->mem);
    speex_free(st->sinc_table);
@@ -808,17 +881,17 @@
    int out_sample = 0;
    spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
    spx_uint32_t ilen;
-   
+
    st->started = 1;
-   
+
    /* Call the right resampler through the function ptr */
    out_sample = st->resampler_ptr(st, channel_index, mem, in_len, out, out_len);
-   
+
    if (st->last_sample[channel_index] < (spx_int32_t)*in_len)
       *in_len = st->last_sample[channel_index];
    *out_len = out_sample;
    st->last_sample[channel_index] -= *in_len;
-   
+
    ilen = *in_len;
 
    for(j=0;j<N-1;++j)
@@ -831,11 +904,11 @@
    spx_uint32_t tmp_in_len = st->magic_samples[channel_index];
    spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
    const int N = st->filt_len;
-   
+
    speex_resampler_process_native(st, channel_index, &tmp_in_len, *out, &out_len);
 
    st->magic_samples[channel_index] -= tmp_in_len;
-   
+
    /* If we couldn't process all "magic" input samples, save the rest for next time */
    if (st->magic_samples[channel_index])
    {
@@ -848,9 +921,9 @@
 }
 
 #ifdef FIXED_POINT
-SPX_RESAMPLE_EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
+EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
 #else
-SPX_RESAMPLE_EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
+EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
 #endif
 {
    int j;
@@ -861,13 +934,13 @@
    const spx_uint32_t xlen = st->mem_alloc_size - filt_offs;
    const int istride = st->in_stride;
 
-   if (st->magic_samples[channel_index]) 
+   if (st->magic_samples[channel_index])
       olen -= speex_resampler_magic(st, channel_index, &out, olen);
    if (! st->magic_samples[channel_index]) {
       while (ilen && olen) {
         spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
         spx_uint32_t ochunk = olen;
- 
+
         if (in) {
            for(j=0;j<ichunk;++j)
               x[j+filt_offs]=in[j*istride];
@@ -885,13 +958,13 @@
    }
    *in_len -= ilen;
    *out_len -= olen;
-   return RESAMPLER_ERR_SUCCESS;
+   return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
 }
 
 #ifdef FIXED_POINT
-SPX_RESAMPLE_EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
+EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
 #else
-SPX_RESAMPLE_EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
+EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
 #endif
 {
    int j;
@@ -911,7 +984,7 @@
 #endif
 
    st->out_stride = 1;
-   
+
    while (ilen && olen) {
      spx_word16_t *y = ystack;
      spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
@@ -948,7 +1021,7 @@
 #else
         out[j*ostride_save] = WORD2INT(ystack[j]);
 #endif
-     
+
      ilen -= ichunk;
      olen -= ochunk;
      out += (ochunk+omagic) * ostride_save;
@@ -959,10 +1032,10 @@
    *in_len -= ilen;
    *out_len -= olen;
 
-   return RESAMPLER_ERR_SUCCESS;
+   return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
 }
 
-SPX_RESAMPLE_EXPORT int speex_resampler_process_interleaved_float(SpeexResamplerState *st, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
+EXPORT int speex_resampler_process_interleaved_float(SpeexResamplerState *st, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
 {
    spx_uint32_t i;
    int istride_save, ostride_save;
@@ -982,10 +1055,10 @@
    }
    st->in_stride = istride_save;
    st->out_stride = ostride_save;
-   return RESAMPLER_ERR_SUCCESS;
+   return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
 }
-               
-SPX_RESAMPLE_EXPORT int speex_resampler_process_interleaved_int(SpeexResamplerState *st, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
+
+EXPORT int speex_resampler_process_interleaved_int(SpeexResamplerState *st, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
 {
    spx_uint32_t i;
    int istride_save, ostride_save;
@@ -1005,66 +1078,79 @@
    }
    st->in_stride = istride_save;
    st->out_stride = ostride_save;
-   return RESAMPLER_ERR_SUCCESS;
+   return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
 }
 
-SPX_RESAMPLE_EXPORT int speex_resampler_set_rate(SpeexResamplerState *st, spx_uint32_t in_rate, spx_uint32_t out_rate)
+EXPORT int speex_resampler_set_rate(SpeexResamplerState *st, spx_uint32_t in_rate, spx_uint32_t out_rate)
 {
    return speex_resampler_set_rate_frac(st, in_rate, out_rate, in_rate, out_rate);
 }
 
-SPX_RESAMPLE_EXPORT void speex_resampler_get_rate(SpeexResamplerState *st, spx_uint32_t *in_rate, spx_uint32_t *out_rate)
+EXPORT void speex_resampler_get_rate(SpeexResamplerState *st, spx_uint32_t *in_rate, spx_uint32_t *out_rate)
 {
    *in_rate = st->in_rate;
    *out_rate = st->out_rate;
 }
 
-SPX_RESAMPLE_EXPORT int speex_resampler_set_rate_frac(SpeexResamplerState *st, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate)
+static inline spx_uint32_t compute_gcd(spx_uint32_t a, spx_uint32_t b)
 {
+   while (b != 0)
+   {
+      spx_uint32_t temp = a;
+
+      a = b;
+      b = temp % b;
+   }
+   return a;
+}
+
+EXPORT int speex_resampler_set_rate_frac(SpeexResamplerState *st, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate)
+{
    spx_uint32_t fact;
    spx_uint32_t old_den;
    spx_uint32_t i;
+
+   if (ratio_num == 0 || ratio_den == 0)
+      return RESAMPLER_ERR_INVALID_ARG;
+
    if (st->in_rate == in_rate && st->out_rate == out_rate && st->num_rate == ratio_num && st->den_rate == ratio_den)
       return RESAMPLER_ERR_SUCCESS;
-   
+
    old_den = st->den_rate;
    st->in_rate = in_rate;
    st->out_rate = out_rate;
    st->num_rate = ratio_num;
    st->den_rate = ratio_den;
-   /* FIXME: This is terribly inefficient, but who cares (at least for now)? */
-   for (fact=2;fact<=IMIN(st->num_rate, st->den_rate);fact++)
-   {
-      while ((st->num_rate % fact == 0) && (st->den_rate % fact == 0))
-      {
-         st->num_rate /= fact;
-         st->den_rate /= fact;
-      }
-   }
-      
+
+   fact = compute_gcd(st->num_rate, st->den_rate);
+
+   st->num_rate /= fact;
+   st->den_rate /= fact;
+
    if (old_den > 0)
    {
       for (i=0;i<st->nb_channels;i++)
       {
-         st->samp_frac_num[i]=st->samp_frac_num[i]*st->den_rate/old_den;
+         if (multiply_frac(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS)
+            return RESAMPLER_ERR_OVERFLOW;
          /* Safety net */
          if (st->samp_frac_num[i] >= st->den_rate)
             st->samp_frac_num[i] = st->den_rate-1;
       }
    }
-   
+
    if (st->initialised)
-      update_filter(st);
+      return update_filter(st);
    return RESAMPLER_ERR_SUCCESS;
 }
 
-SPX_RESAMPLE_EXPORT void speex_resampler_get_ratio(SpeexResamplerState *st, spx_uint32_t *ratio_num, spx_uint32_t *ratio_den)
+EXPORT void speex_resampler_get_ratio(SpeexResamplerState *st, spx_uint32_t *ratio_num, spx_uint32_t *ratio_den)
 {
    *ratio_num = st->num_rate;
    *ratio_den = st->den_rate;
 }
 
-SPX_RESAMPLE_EXPORT int speex_resampler_set_quality(SpeexResamplerState *st, int quality)
+EXPORT int speex_resampler_set_quality(SpeexResamplerState *st, int quality)
 {
    if (quality > 10 || quality < 0)
       return RESAMPLER_ERR_INVALID_ARG;
@@ -1072,46 +1158,46 @@
       return RESAMPLER_ERR_SUCCESS;
    st->quality = quality;
    if (st->initialised)
-      update_filter(st);
+      return update_filter(st);
    return RESAMPLER_ERR_SUCCESS;
 }
 
-SPX_RESAMPLE_EXPORT void speex_resampler_get_quality(SpeexResamplerState *st, int *quality)
+EXPORT void speex_resampler_get_quality(SpeexResamplerState *st, int *quality)
 {
    *quality = st->quality;
 }
 
-SPX_RESAMPLE_EXPORT void speex_resampler_set_input_stride(SpeexResamplerState *st, spx_uint32_t stride)
+EXPORT void speex_resampler_set_input_stride(SpeexResamplerState *st, spx_uint32_t stride)
 {
    st->in_stride = stride;
 }
 
-SPX_RESAMPLE_EXPORT void speex_resampler_get_input_stride(SpeexResamplerState *st, spx_uint32_t *stride)
+EXPORT void speex_resampler_get_input_stride(SpeexResamplerState *st, spx_uint32_t *stride)
 {
    *stride = st->in_stride;
 }
 
-SPX_RESAMPLE_EXPORT void speex_resampler_set_output_stride(SpeexResamplerState *st, spx_uint32_t stride)
+EXPORT void speex_resampler_set_output_stride(SpeexResamplerState *st, spx_uint32_t stride)
 {
    st->out_stride = stride;
 }
 
-SPX_RESAMPLE_EXPORT void speex_resampler_get_output_stride(SpeexResamplerState *st, spx_uint32_t *stride)
+EXPORT void speex_resampler_get_output_stride(SpeexResamplerState *st, spx_uint32_t *stride)
 {
    *stride = st->out_stride;
 }
 
-SPX_RESAMPLE_EXPORT int speex_resampler_get_input_latency(SpeexResamplerState *st)
+EXPORT int speex_resampler_get_input_latency(SpeexResamplerState *st)
 {
   return st->filt_len / 2;
 }
 
-SPX_RESAMPLE_EXPORT int speex_resampler_get_output_latency(SpeexResamplerState *st)
+EXPORT int speex_resampler_get_output_latency(SpeexResamplerState *st)
 {
   return ((st->filt_len / 2) * st->den_rate + (st->num_rate >> 1)) / st->num_rate;
 }
 
-SPX_RESAMPLE_EXPORT int speex_resampler_skip_zeros(SpeexResamplerState *st)
+EXPORT int speex_resampler_skip_zeros(SpeexResamplerState *st)
 {
    spx_uint32_t i;
    for (i=0;i<st->nb_channels;i++)
@@ -1119,7 +1205,7 @@
    return RESAMPLER_ERR_SUCCESS;
 }
 
-SPX_RESAMPLE_EXPORT int speex_resampler_reset_mem(SpeexResamplerState *st)
+EXPORT int speex_resampler_reset_mem(SpeexResamplerState *st)
 {
    spx_uint32_t i;
    for (i=0;i<st->nb_channels;i++)
@@ -1133,7 +1219,7 @@
    return RESAMPLER_ERR_SUCCESS;
 }
 
-SPX_RESAMPLE_EXPORT const char *speex_resampler_strerror(int err)
+EXPORT const char *speex_resampler_strerror(int err)
 {
    switch (err)
    {
--- a/src/resample_sse.h
+++ b/src/resample_sse.h
@@ -9,18 +9,18 @@
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions
    are met:
-   
+
    - Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
-   
+
    - Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.
-   
+
    - Neither the name of the Xiph.org Foundation nor the names of its
    contributors may be used to endorse or promote products derived from
    this software without specific prior written permission.
-   
+
    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -41,23 +41,15 @@
 {
    int i;
    float ret;
-   if (1)
+   __m128 sum = _mm_setzero_ps();
+   for (i=0;i<len;i+=8)
    {
-      __m128 sum = _mm_setzero_ps();
-      for (i=0;i<len;i+=8)
-      {
-         sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)));
-         sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4)));
-      }
-      sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
-      sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
-      _mm_store_ss(&ret, sum);
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)));
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4)));
    }
-   else
-   {
-      ret = 0;
-      for (i=0;i<len;i++) ret += a[i] * b[i];
-   }
+   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+   _mm_store_ss(&ret, sum);
    return ret;
 }
 
@@ -65,34 +57,18 @@
 static inline float interpolate_product_single(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
   int i;
   float ret;
-  if (1)
+  __m128 sum = _mm_setzero_ps();
+  __m128 f = _mm_loadu_ps(frac);
+  for(i=0;i<len;i+=2)
   {
-    __m128 sum = _mm_setzero_ps();
-    __m128 f = _mm_loadu_ps(frac);
-    for(i=0;i<len;i+=2)
-    {
-      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample)));
-      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample)));
-    }
-    sum = _mm_mul_ps(f, sum);
-    sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
-    sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
-    _mm_store_ss(&ret, sum);
+    sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample)));
+    sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample)));
   }
-  else
-  {
-    float accum[4] = {0,0,0,0};
-    for(i=0;i<len;i++)
-    {
-      const float curr_in=a[i];
-      accum[0] += curr_in * b[i * oversample + 0];
-      accum[1] += curr_in * b[i * oversample + 1];
-      accum[2] += curr_in * b[i * oversample + 2];
-      accum[3] += curr_in * b[i * oversample + 3];
-    }
-    ret = accum[0] * frac[0] + accum[1] * frac[1] + accum[2] * frac[2] + accum[3] * frac[3];
-  }
-  return ret;
+   sum = _mm_mul_ps(f, sum);
+   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+   _mm_store_ss(&ret, sum);
+   return ret;
 }
 
 #ifdef __SSE2__
@@ -115,7 +91,7 @@
       sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
       sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
    }
-   sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum));
+   sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
    _mm_store_sd(&ret, sum);
    return ret;
 }
@@ -144,7 +120,7 @@
   sum1 = _mm_mul_pd(f1, sum1);
   sum2 = _mm_mul_pd(f2, sum2);
   sum = _mm_add_pd(sum1, sum2);
-  sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum));
+  sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
   _mm_store_sd(&ret, sum);
   return ret;
 }
--- a/src/speex_resampler.h
+++ b/src/speex_resampler.h
@@ -1,8 +1,8 @@
 /* Copyright (C) 2007 Jean-Marc Valin
-      
+
    File: speex_resampler.h
    Resampling code
-      
+
    The design goals of this code are:
       - Very fast algorithm
       - Low memory requirement
@@ -43,7 +43,7 @@
 
 /********* WARNING: MENTAL SANITY ENDS HERE *************/
 
-/* If the resampler is defined outside of Speex, we change the symbol names so that 
+/* If the resampler is defined outside of Speex, we change the symbol names so that
    there won't be any clash if linking with Speex later on. */
 
 /* #define RANDOM_PREFIX your software name here */
@@ -53,7 +53,7 @@
 
 #define CAT_PREFIX2(a,b) a ## b
 #define CAT_PREFIX(a,b) CAT_PREFIX2(a, b)
-      
+
 #define speex_resampler_init CAT_PREFIX(RANDOM_PREFIX,_resampler_init)
 #define speex_resampler_init_frac CAT_PREFIX(RANDOM_PREFIX,_resampler_init_frac)
 #define speex_resampler_destroy CAT_PREFIX(RANDOM_PREFIX,_resampler_destroy)
@@ -81,14 +81,12 @@
 #define spx_int32_t int
 #define spx_uint16_t unsigned short
 #define spx_uint32_t unsigned int
-      
+
+#define speex_assert(cond)
+
 #else /* OUTSIDE_SPEEX */
 
-#ifdef _BUILD_SPEEX
-# include "speex_types.h"
-#else
-# include <speex/speex_types.h>
-#endif
+#include "speexdsp_types.h"
 
 #endif /* OUTSIDE_SPEEX */
 
@@ -108,7 +106,8 @@
    RESAMPLER_ERR_BAD_STATE       = 2,
    RESAMPLER_ERR_INVALID_ARG     = 3,
    RESAMPLER_ERR_PTR_OVERLAP     = 4,
-   
+   RESAMPLER_ERR_OVERFLOW        = 5,
+
    RESAMPLER_ERR_MAX_ERROR
 };
 
@@ -124,14 +123,14 @@
  * @return Newly created resampler state
  * @retval NULL Error: not enough memory
  */
-SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, 
-                                          spx_uint32_t in_rate, 
-                                          spx_uint32_t out_rate, 
+SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels,
+                                          spx_uint32_t in_rate,
+                                          spx_uint32_t out_rate,
                                           int quality,
                                           int *err);
 
-/** Create a new resampler with fractional input/output rates. The sampling 
- * rate ratio is an arbitrary rational number with both the numerator and 
+/** Create a new resampler with fractional input/output rates. The sampling
+ * rate ratio is an arbitrary rational number with both the numerator and
  * denominator being 32-bit integers.
  * @param nb_channels Number of channels to be processed
  * @param ratio_num Numerator of the sampling rate ratio
@@ -143,11 +142,11 @@
  * @return Newly created resampler state
  * @retval NULL Error: not enough memory
  */
-SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels, 
-                                               spx_uint32_t ratio_num, 
-                                               spx_uint32_t ratio_den, 
-                                               spx_uint32_t in_rate, 
-                                               spx_uint32_t out_rate, 
+SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels,
+                                               spx_uint32_t ratio_num,
+                                               spx_uint32_t ratio_den,
+                                               spx_uint32_t in_rate,
+                                               spx_uint32_t out_rate,
                                                int quality,
                                                int *err);
 
@@ -158,24 +157,24 @@
 
 /** Resample a float array. The input and output buffers must *not* overlap.
  * @param st Resampler state
- * @param channel_index Index of the channel to process for the multi-channel 
+ * @param channel_index Index of the channel to process for the multi-channel
  * base (0 otherwise)
  * @param in Input buffer
- * @param in_len Number of input samples in the input buffer. Returns the 
+ * @param in_len Number of input samples in the input buffer. Returns the
  * number of samples processed
  * @param out Output buffer
  * @param out_len Size of the output buffer. Returns the number of samples written
  */
-int speex_resampler_process_float(SpeexResamplerState *st, 
-                                   spx_uint32_t channel_index, 
-                                   const float *in, 
-                                   spx_uint32_t *in_len, 
-                                   float *out, 
+int speex_resampler_process_float(SpeexResamplerState *st,
+                                   spx_uint32_t channel_index,
+                                   const float *in,
+                                   spx_uint32_t *in_len,
+                                   float *out,
                                    spx_uint32_t *out_len);
 
 /** Resample an int array. The input and output buffers must *not* overlap.
  * @param st Resampler state
- * @param channel_index Index of the channel to process for the multi-channel 
+ * @param channel_index Index of the channel to process for the multi-channel
  * base (0 otherwise)
  * @param in Input buffer
  * @param in_len Number of input samples in the input buffer. Returns the number
@@ -183,11 +182,11 @@
  * @param out Output buffer
  * @param out_len Size of the output buffer. Returns the number of samples written
  */
-int speex_resampler_process_int(SpeexResamplerState *st, 
-                                 spx_uint32_t channel_index, 
-                                 const spx_int16_t *in, 
-                                 spx_uint32_t *in_len, 
-                                 spx_int16_t *out, 
+int speex_resampler_process_int(SpeexResamplerState *st,
+                                 spx_uint32_t channel_index,
+                                 const spx_int16_t *in,
+                                 spx_uint32_t *in_len,
+                                 spx_int16_t *out,
                                  spx_uint32_t *out_len);
 
 /** Resample an interleaved float array. The input and output buffers must *not* overlap.
@@ -199,10 +198,10 @@
  * @param out_len Size of the output buffer. Returns the number of samples written.
  * This is all per-channel.
  */
-int speex_resampler_process_interleaved_float(SpeexResamplerState *st, 
-                                               const float *in, 
-                                               spx_uint32_t *in_len, 
-                                               float *out, 
+int speex_resampler_process_interleaved_float(SpeexResamplerState *st,
+                                               const float *in,
+                                               spx_uint32_t *in_len,
+                                               float *out,
                                                spx_uint32_t *out_len);
 
 /** Resample an interleaved int array. The input and output buffers must *not* overlap.
@@ -214,10 +213,10 @@
  * @param out_len Size of the output buffer. Returns the number of samples written.
  * This is all per-channel.
  */
-int speex_resampler_process_interleaved_int(SpeexResamplerState *st, 
-                                             const spx_int16_t *in, 
-                                             spx_uint32_t *in_len, 
-                                             spx_int16_t *out, 
+int speex_resampler_process_interleaved_int(SpeexResamplerState *st,
+                                             const spx_int16_t *in,
+                                             spx_uint32_t *in_len,
+                                             spx_int16_t *out,
                                              spx_uint32_t *out_len);
 
 /** Set (change) the input/output sampling rates (integer value).
@@ -225,8 +224,8 @@
  * @param in_rate Input sampling rate (integer number of Hz).
  * @param out_rate Output sampling rate (integer number of Hz).
  */
-int speex_resampler_set_rate(SpeexResamplerState *st, 
-                              spx_uint32_t in_rate, 
+int speex_resampler_set_rate(SpeexResamplerState *st,
+                              spx_uint32_t in_rate,
                               spx_uint32_t out_rate);
 
 /** Get the current input/output sampling rates (integer value).
@@ -234,11 +233,11 @@
  * @param in_rate Input sampling rate (integer number of Hz) copied.
  * @param out_rate Output sampling rate (integer number of Hz) copied.
  */
-void speex_resampler_get_rate(SpeexResamplerState *st, 
-                              spx_uint32_t *in_rate, 
+void speex_resampler_get_rate(SpeexResamplerState *st,
+                              spx_uint32_t *in_rate,
                               spx_uint32_t *out_rate);
 
-/** Set (change) the input/output sampling rates and resampling ratio 
+/** Set (change) the input/output sampling rates and resampling ratio
  * (fractional values in Hz supported).
  * @param st Resampler state
  * @param ratio_num Numerator of the sampling rate ratio
@@ -246,10 +245,10 @@
  * @param in_rate Input sampling rate rounded to the nearest integer (in Hz).
  * @param out_rate Output sampling rate rounded to the nearest integer (in Hz).
  */
-int speex_resampler_set_rate_frac(SpeexResamplerState *st, 
-                                   spx_uint32_t ratio_num, 
-                                   spx_uint32_t ratio_den, 
-                                   spx_uint32_t in_rate, 
+int speex_resampler_set_rate_frac(SpeexResamplerState *st,
+                                   spx_uint32_t ratio_num,
+                                   spx_uint32_t ratio_den,
+                                   spx_uint32_t in_rate,
                                    spx_uint32_t out_rate);
 
 /** Get the current resampling ratio. This will be reduced to the least
@@ -258,24 +257,24 @@
  * @param ratio_num Numerator of the sampling rate ratio copied
  * @param ratio_den Denominator of the sampling rate ratio copied
  */
-void speex_resampler_get_ratio(SpeexResamplerState *st, 
-                               spx_uint32_t *ratio_num, 
+void speex_resampler_get_ratio(SpeexResamplerState *st,
+                               spx_uint32_t *ratio_num,
                                spx_uint32_t *ratio_den);
 
 /** Set (change) the conversion quality.
  * @param st Resampler state
- * @param quality Resampling quality between 0 and 10, where 0 has poor 
+ * @param quality Resampling quality between 0 and 10, where 0 has poor
  * quality and 10 has very high quality.
  */
-int speex_resampler_set_quality(SpeexResamplerState *st, 
+int speex_resampler_set_quality(SpeexResamplerState *st,
                                  int quality);
 
 /** Get the conversion quality.
  * @param st Resampler state
- * @param quality Resampling quality between 0 and 10, where 0 has poor 
+ * @param quality Resampling quality between 0 and 10, where 0 has poor
  * quality and 10 has very high quality.
  */
-void speex_resampler_get_quality(SpeexResamplerState *st, 
+void speex_resampler_get_quality(SpeexResamplerState *st,
                                  int *quality);
 
 /** Set (change) the input stride.
@@ -282,7 +281,7 @@
  * @param st Resampler state
  * @param stride Input stride
  */
-void speex_resampler_set_input_stride(SpeexResamplerState *st, 
+void speex_resampler_set_input_stride(SpeexResamplerState *st,
                                       spx_uint32_t stride);
 
 /** Get the input stride.
@@ -289,7 +288,7 @@
  * @param st Resampler state
  * @param stride Input stride copied
  */
-void speex_resampler_get_input_stride(SpeexResamplerState *st, 
+void speex_resampler_get_input_stride(SpeexResamplerState *st,
                                       spx_uint32_t *stride);
 
 /** Set (change) the output stride.
@@ -296,7 +295,7 @@
  * @param st Resampler state
  * @param stride Output stride
  */
-void speex_resampler_set_output_stride(SpeexResamplerState *st, 
+void speex_resampler_set_output_stride(SpeexResamplerState *st,
                                       spx_uint32_t stride);
 
 /** Get the output stride.
@@ -303,7 +302,7 @@
  * @param st Resampler state copied
  * @param stride Output stride
  */
-void speex_resampler_get_output_stride(SpeexResamplerState *st, 
+void speex_resampler_get_output_stride(SpeexResamplerState *st,
                                       spx_uint32_t *stride);
 
 /** Get the latency introduced by the resampler measured in input samples.
@@ -316,8 +315,8 @@
  */
 int speex_resampler_get_output_latency(SpeexResamplerState *st);
 
-/** Make sure that the first samples to go out of the resamplers don't have 
- * leading zeros. This is only useful before starting to use a newly created 
+/** Make sure that the first samples to go out of the resamplers don't have
+ * leading zeros. This is only useful before starting to use a newly created
  * resampler. It is recommended to use that when resampling an audio file, as
  * it will generate a file with the same length. For real-time processing,
  * it is probably easier not to use this call (so that the output duration
--- a/win32/config.h
+++ b/win32/config.h
@@ -5,6 +5,8 @@
 #define HAVE_LIBFLAC          1
 /* comment to compile with dynamic flac */
 #define FLAC__NO_DLL
+/* comment to use slower resampler that uses less memory */
+#define RESAMPLE_FULL_SINC_TABLE
 
 #define OUTSIDE_SPEEX         1
 #define OPUSTOOLS             1
@@ -13,7 +15,6 @@
 #define alloca _alloca
 #define getpid _getpid
 #define USE_ALLOCA            1
-#define FLOATING_POINT        1
 
 #ifdef HAVE_LIBFLAC
 #ifdef FLAC__NO_DLL
@@ -23,7 +24,6 @@
 #endif
 #endif
 
-#define SPX_RESAMPLE_EXPORT
 #define __SSE__
 
 #define RANDOM_PREFIX opustools