shithub: libopusenc

Download patch

ref: 5c91523b2d49a02b4651f6abfb10209b584db441
parent: 79c0e61a606f5ae594c29c5a3e20ff69d88ace90
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Fri May 12 15:13:08 EDT 2017

Sync resampler with libspeexdsp

--- a/Makefile.am
+++ b/Makefile.am
@@ -13,7 +13,6 @@
 noinst_HEADERS = src/arch.h \
 		 src/ogg_packer.h \
 		 src/opus_header.h \
-		 src/os_support.h \
 		 src/picture.h \
 		 src/resample_sse.h \
 		 src/speex_resampler.h \
--- a/src/arch.h
+++ b/src/arch.h
@@ -7,18 +7,18 @@
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions
    are met:
-   
+
    - Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
-   
+
    - Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.
-   
+
    - Neither the name of the Xiph.org Foundation nor the names of its
    contributors may be used to endorse or promote products derived from
    this software without specific prior written permission.
-   
+
    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,14 +35,6 @@
 #ifndef ARCH_H
 #define ARCH_H
 
-#ifndef SPEEX_VERSION
-#define SPEEX_MAJOR_VERSION 1         /**< Major Speex version. */
-#define SPEEX_MINOR_VERSION 1         /**< Minor Speex version. */
-#define SPEEX_MICRO_VERSION 15        /**< Micro Speex version. */
-#define SPEEX_EXTRA_VERSION ""        /**< Extra Speex version. */
-#define SPEEX_VERSION "speex-1.2beta3"  /**< Speex version string. */
-#endif
-
 /* A couple test to catch stupid option combinations */
 #ifdef FIXED_POINT
 
@@ -49,7 +41,7 @@
 #ifdef FLOATING_POINT
 #error You cannot compile as floating point and fixed point at the same time
 #endif
-#if defined(__SSE__)
+#ifdef _USE_SSE
 #error SSE is only for floating-point
 #endif
 #if ((defined (ARM4_ASM)||defined (ARM4_ASM)) && defined(BFIN_ASM)) || (defined (ARM4_ASM)&&defined(ARM5E_ASM))
@@ -75,7 +67,7 @@
 #endif
 
 #ifndef OUTSIDE_SPEEX
-#include "../include/speex/speex_types.h"
+#include "speex/speexdsp_types.h"
 #endif
 
 #define ABS(x) ((x) < 0 ? (-(x)) : (x))      /**< Absolute integer value. */
@@ -109,6 +101,8 @@
 #define SIG_SHIFT    14
 #define GAIN_SHIFT   6
 
+#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
+
 #define VERY_SMALL 0
 #define VERY_LARGE32 ((spx_word32_t)2147483647)
 #define VERY_LARGE16 ((spx_word16_t)32767)
@@ -171,6 +165,7 @@
 #define VSHR32(a,shift) (a)
 #define SATURATE16(x,a) (x)
 #define SATURATE32(x,a) (x)
+#define SATURATE32PSHR(x,shift,a) (x)
 
 #define PSHR(a,shift)       (a)
 #define SHR(a,shift)       (a)
@@ -210,6 +205,7 @@
 #define DIV32(a,b)     (((spx_word32_t)(a))/(spx_word32_t)(b))
 #define PDIV32(a,b)     (((spx_word32_t)(a))/(spx_word32_t)(b))
 
+#define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))
 
 #endif
 
@@ -217,11 +213,11 @@
 #if defined (CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)
 
 /* 2 on TI C5x DSP */
-#define BYTES_PER_CHAR 2 
+#define BYTES_PER_CHAR 2
 #define BITS_PER_CHAR 16
 #define LOG2_BITS_PER_CHAR 4
 
-#else 
+#else
 
 #define BYTES_PER_CHAR 1
 #define BITS_PER_CHAR 8
--- a/src/os_support.h
+++ /dev/null
@@ -1,167 +1,0 @@
-/* Copyright (C) 2007 Jean-Marc Valin
-      
-   File: os_support.h
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-   1. Redistributions of source code must retain the above copyright notice,
-   this list of conditions and the following disclaimer.
-
-   2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
-
-   3. The name of the author may not be used to endorse or promote products
-   derived from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
-   IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-   DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
-   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-   ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef OS_SUPPORT_H
-#define OS_SUPPORT_H
-
-#include <string.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-#ifdef OS_SUPPORT_CUSTOM
-#include "os_support_custom.h"
-#endif
-
-/** Speex wrapper for calloc. To do your own dynamic allocation, all you need to do is replace this function, speex_realloc and speex_free 
-    NOTE: speex_alloc needs to CLEAR THE MEMORY */
-#ifndef OVERRIDE_SPEEX_ALLOC
-static inline void *speex_alloc (int size)
-{
-   /* WARNING: this is not equivalent to malloc(). If you want to use malloc() 
-      or your own allocator, YOU NEED TO CLEAR THE MEMORY ALLOCATED. Otherwise
-      you will experience strange bugs */
-   return calloc(size,1);
-}
-#endif
-
-/** Same as speex_alloc, except that the area is only needed inside a Speex call (might cause problem with wideband though) */
-#ifndef OVERRIDE_SPEEX_ALLOC_SCRATCH
-static inline void *speex_alloc_scratch (int size)
-{
-   /* Scratch space doesn't need to be cleared */
-   return calloc(size,1);
-}
-#endif
-
-/** Speex wrapper for realloc. To do your own dynamic allocation, all you need to do is replace this function, speex_alloc and speex_free */
-#ifndef OVERRIDE_SPEEX_REALLOC
-static inline void *speex_realloc (void *ptr, int size)
-{
-   return realloc(ptr, size);
-}
-#endif
-
-/** Speex wrapper for calloc. To do your own dynamic allocation, all you need to do is replace this function, speex_realloc and speex_alloc */
-#ifndef OVERRIDE_SPEEX_FREE
-static inline void speex_free (void *ptr)
-{
-   free(ptr);
-}
-#endif
-
-/** Same as speex_free, except that the area is only needed inside a Speex call (might cause problem with wideband though) */
-#ifndef OVERRIDE_SPEEX_FREE_SCRATCH
-static inline void speex_free_scratch (void *ptr)
-{
-   free(ptr);
-}
-#endif
-
-/** Copy n elements from src to dst. The 0* term provides compile-time type checking  */
-#ifndef OVERRIDE_SPEEX_COPY
-#define SPEEX_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
-#endif
-
-/** Copy n elements from src to dst, allowing overlapping regions. The 0* term
-    provides compile-time type checking */
-#ifndef OVERRIDE_SPEEX_MOVE
-#define SPEEX_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
-#endif
-
-/** For n elements worth of memory, set every byte to the value of c, starting at address dst */
-#ifndef OVERRIDE_SPEEX_MEMSET
-#define SPEEX_MEMSET(dst, c, n) (memset((dst), (c), (n)*sizeof(*(dst))))
-#endif
-
-
-#ifndef OVERRIDE_SPEEX_FATAL
-static inline void _speex_fatal(const char *str, const char *file, int line)
-{
-   fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);
-   exit(1);
-}
-#endif
-
-#ifndef OVERRIDE_SPEEX_WARNING
-static inline void speex_warning(const char *str)
-{
-#ifndef DISABLE_WARNINGS
-   fprintf (stderr, "warning: %s\n", str);
-#endif
-}
-#endif
-
-#ifndef OVERRIDE_SPEEX_WARNING_INT
-static inline void speex_warning_int(const char *str, int val)
-{
-#ifndef DISABLE_WARNINGS
-   fprintf (stderr, "warning: %s %d\n", str, val);
-#endif
-}
-#endif
-
-#ifndef OVERRIDE_SPEEX_NOTIFY
-static inline void speex_notify(const char *str)
-{
-#ifndef DISABLE_NOTIFICATIONS
-   fprintf (stderr, "notification: %s\n", str);
-#endif
-}
-#endif
-
-#ifndef OVERRIDE_SPEEX_PUTC
-/** Speex wrapper for putc */
-static inline void _speex_putc(int ch, void *file)
-{
-   FILE *f = (FILE *)file;
-   fprintf(f, "%c", ch);
-}
-#endif
-
-#define speex_fatal(str) _speex_fatal(str, __FILE__, __LINE__);
-#define speex_assert(cond) {if (!(cond)) {speex_fatal("assertion failed: " #cond);}}
-
-#ifndef RELEASE
-static inline void print_vec(float *vec, int len, char *name)
-{
-   int i;
-   printf ("%s ", name);
-   for (i=0;i<len;i++)
-      printf (" %f", vec[i]);
-   printf ("\n");
-}
-#endif
-
-#endif
-
--- a/src/resample.c
+++ b/src/resample.c
@@ -1,6 +1,6 @@
 /* Copyright (C) 2007-2008 Jean-Marc Valin
    Copyright (C) 2008      Thorvald Natvig
-      
+
    File: resample.c
    Arbitrary resampling code
 
@@ -38,41 +38,41 @@
       - Low memory requirement
       - Good *perceptual* quality (and not best SNR)
 
-   Warning: This resampler is relatively new. Although I think I got rid of 
+   Warning: This resampler is relatively new. Although I think I got rid of
    all the major bugs and I don't expect the API to change anymore, there
    may be something I've missed. So use with caution.
 
    This algorithm is based on this original resampling algorithm:
    Smith, Julius O. Digital Audio Resampling Home Page
-   Center for Computer Research in Music and Acoustics (CCRMA), 
+   Center for Computer Research in Music and Acoustics (CCRMA),
    Stanford University, 2007.
    Web published at http://www-ccrma.stanford.edu/~jos/resample/.
 
-   There is one main difference, though. This resampler uses cubic 
+   There is one main difference, though. This resampler uses cubic
    interpolation instead of linear interpolation in the above paper. This
    makes the table much smaller and makes it possible to compute that table
-   on a per-stream basis. In turn, being able to tweak the table for each 
-   stream makes it possible to both reduce complexity on simple ratios 
-   (e.g. 2/3), and get rid of the rounding operations in the inner loop. 
+   on a per-stream basis. In turn, being able to tweak the table for each
+   stream makes it possible to both reduce complexity on simple ratios
+   (e.g. 2/3), and get rid of the rounding operations in the inner loop.
    The latter both reduces CPU time and makes the algorithm more SIMD-friendly.
 */
 
 #ifdef HAVE_CONFIG_H
-# include "config.h"
+#include "config.h"
 #endif
 
-#define RESAMPLE_HUGEMEM 1
-
 #ifdef OUTSIDE_SPEEX
 #include <stdlib.h>
 static void *speex_alloc (int size) {return calloc(size,1);}
 static void *speex_realloc (void *ptr, int size) {return realloc(ptr, size);}
 static void speex_free (void *ptr) {free(ptr);}
+#define speex_assert(x)
+#define EXPORT
 #include "speex_resampler.h"
 #include "arch.h"
 #else /* OUTSIDE_SPEEX */
-               
-#include "../include/speex/speex_resampler.h"
+
+#include "speex/speex_resampler.h"
 #include "arch.h"
 #include "os_support.h"
 #endif /* OUTSIDE_SPEEX */
@@ -79,17 +79,12 @@
 
 #include "stack_alloc.h"
 #include <math.h>
+#include <limits.h>
 
 #ifndef M_PI
-#define M_PI 3.14159263
+#define M_PI 3.14159265358979323846
 #endif
 
-#ifdef FIXED_POINT
-#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))  
-#else
-#define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))  
-#endif
-               
 #define IMAX(a,b) ((a) > (b) ? (a) : (b))
 #define IMIN(a,b) ((a) < (b) ? (a) : (b))
 
@@ -97,10 +92,18 @@
 #define NULL 0
 #endif
 
-#if defined(FLOATING_POINT) && defined(__SSE__)
-# include "resample_sse.h"
+#ifndef UINT32_MAX
+#define UINT32_MAX 4294967296U
 #endif
 
+#ifdef _USE_SSE
+#include "resample_sse.h"
+#endif
+
+#ifdef _USE_NEON
+#include "resample_neon.h"
+#endif
+
 /* Numer of elements to allocate on the stack */
 #ifdef VAR_ARRAYS
 #define FIXED_STACK_ALLOC 8192
@@ -115,7 +118,7 @@
    spx_uint32_t out_rate;
    spx_uint32_t num_rate;
    spx_uint32_t den_rate;
-   
+
    int    quality;
    spx_uint32_t nb_channels;
    spx_uint32_t filt_len;
@@ -127,17 +130,17 @@
    spx_uint32_t oversample;
    int          initialised;
    int          started;
-   
+
    /* These are per-channel */
    spx_int32_t  *last_sample;
    spx_uint32_t *samp_frac_num;
    spx_uint32_t *magic_samples;
-   
+
    spx_word16_t *mem;
    spx_word16_t *sinc_table;
    spx_uint32_t sinc_table_length;
    resampler_basic_func resampler_ptr;
-         
+
    int    in_stride;
    int    out_stride;
 } ;
@@ -179,7 +182,7 @@
    0.32108304, 0.27619388, 0.23465776, 0.19672670, 0.16255380, 0.13219758,
    0.10562887, 0.08273982, 0.06335451, 0.04724088, 0.03412321, 0.02369490,
    0.01563093, 0.00959968, 0.00527363, 0.00233883, 0.00050000, 0.00000000};
-   
+
 static const double kaiser6_table[36] = {
    0.99733006, 1.00000000, 0.99733006, 0.98935595, 0.97618418, 0.95799003,
    0.93501423, 0.90755855, 0.87598009, 0.84068475, 0.80211977, 0.76076565,
@@ -192,7 +195,7 @@
    const double *table;
    int oversample;
 };
-      
+
 static const struct FuncDef _KAISER12 = {kaiser12_table, 64};
 #define KAISER12 (&_KAISER12)
 /*static struct FuncDef _KAISER12 = {kaiser12_table, 32};
@@ -214,7 +217,7 @@
 
 
 /* This table maps conversion quality to internal parameters. There are two
-   reasons that explain why the up-sampling bandwidth is larger than the 
+   reasons that explain why the up-sampling bandwidth is larger than the
    down-sampling bandwidth:
    1) When up-sampling, we can assume that the spectrum is already attenuated
       close to the Nyquist rate (from an A/D or a previous resampling filter)
@@ -240,7 +243,7 @@
 {
    float y, frac;
    double interp[4];
-   int ind; 
+   int ind;
    y = x*func->oversample;
    ind = (int)floor(y);
    frac = (y-ind);
@@ -251,7 +254,7 @@
    interp[0] = -0.3333333333*frac + 0.5*(frac*frac) - 0.1666666667*(frac*frac*frac);
    /* Just to make sure we don't have rounding problems */
    interp[1] = 1.f-interp[3]-interp[2]-interp[0];
-   
+
    /*sum = frac*accum[1] + (1-frac)*accum[2];*/
    return interp[0]*func->table[ind] + interp[1]*func->table[ind+1] + interp[2]*func->table[ind+2] + interp[3]*func->table[ind+3];
 }
@@ -362,11 +365,12 @@
       }
       sum = accum[0] + accum[1] + accum[2] + accum[3];
 */
+      sum = SATURATE32PSHR(sum, 15, 32767);
 #else
       sum = inner_product_single(sinct, iptr, N);
 #endif
 
-      out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 15), 32767);
+      out[out_stride * out_sample++] = sum;
       last_sample += int_advance;
       samp_frac_num += frac_advance;
       if (samp_frac_num >= den_rate)
@@ -472,12 +476,13 @@
 
       cubic_coef(frac, interp);
       sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
+      sum = SATURATE32PSHR(sum, 15, 32767);
 #else
       cubic_coef(frac, interp);
       sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
 #endif
-      
-      out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 14), 32767);
+
+      out[out_stride * out_sample++] = sum;
       last_sample += int_advance;
       samp_frac_num += frac_advance;
       if (samp_frac_num >= den_rate)
@@ -538,7 +543,7 @@
       cubic_coef(frac, interp);
       sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
 #endif
-      
+
       out[out_stride * out_sample++] = PSHR32(sum,15);
       last_sample += int_advance;
       samp_frac_num += frac_advance;
@@ -555,21 +560,70 @@
 }
 #endif
 
-static void update_filter(SpeexResamplerState *st)
+/* This resampler is used to produce zero output in situations where memory
+   for the filter could not be allocated.  The expected numbers of input and
+   output samples are still processed so that callers failing to check error
+   codes are not surprised, possibly getting into infinite loops. */
+static int resampler_basic_zero(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len)
 {
-   spx_uint32_t old_length;
-   
-   old_length = st->filt_len;
+   int out_sample = 0;
+   int last_sample = st->last_sample[channel_index];
+   spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
+   const int out_stride = st->out_stride;
+   const int int_advance = st->int_advance;
+   const int frac_advance = st->frac_advance;
+   const spx_uint32_t den_rate = st->den_rate;
+
+   while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
+   {
+      out[out_stride * out_sample++] = 0;
+      last_sample += int_advance;
+      samp_frac_num += frac_advance;
+      if (samp_frac_num >= den_rate)
+      {
+         samp_frac_num -= den_rate;
+         last_sample++;
+      }
+   }
+
+   st->last_sample[channel_index] = last_sample;
+   st->samp_frac_num[channel_index] = samp_frac_num;
+   return out_sample;
+}
+
+static int _muldiv(spx_uint32_t *result, spx_uint32_t value, spx_uint32_t mul, spx_uint32_t div)
+{
+   speex_assert(result);
+   spx_uint32_t major = value / div;
+   spx_uint32_t remainder = value % div;
+   /* TODO: Could use 64 bits operation to check for overflow. But only guaranteed in C99+ */
+   if (remainder > UINT32_MAX / mul || major > UINT32_MAX / mul
+       || major * mul > UINT32_MAX - remainder * mul / div)
+      return RESAMPLER_ERR_OVERFLOW;
+   *result = remainder * mul / div + major * mul;
+   return RESAMPLER_ERR_SUCCESS;
+}
+
+static int update_filter(SpeexResamplerState *st)
+{
+   spx_uint32_t old_length = st->filt_len;
+   spx_uint32_t old_alloc_size = st->mem_alloc_size;
+   int use_direct;
+   spx_uint32_t min_sinc_table_length;
+   spx_uint32_t min_alloc_size;
+
+   st->int_advance = st->num_rate/st->den_rate;
+   st->frac_advance = st->num_rate%st->den_rate;
    st->oversample = quality_map[st->quality].oversample;
    st->filt_len = quality_map[st->quality].base_length;
-   
+
    if (st->num_rate > st->den_rate)
    {
       /* down-sampling */
       st->cutoff = quality_map[st->quality].downsample_bandwidth * st->den_rate / st->num_rate;
-      /* FIXME: divide the numerator and denominator by a certain amount if they're too large */
-      st->filt_len = st->filt_len*st->num_rate / st->den_rate;
-      /* Round up to make sure we have a multiple of 8 */
+      if (_muldiv(&st->filt_len,st->filt_len,st->num_rate,st->den_rate) != RESAMPLER_ERR_SUCCESS)
+         goto fail;
+      /* Round up to make sure we have a multiple of 8 for SSE */
       st->filt_len = ((st->filt_len-1)&(~0x7))+8;
       if (2*st->den_rate < st->num_rate)
          st->oversample >>= 1;
@@ -586,21 +640,36 @@
       st->cutoff = quality_map[st->quality].upsample_bandwidth;
    }
 
-#ifdef RESAMPLE_HUGEMEM
-   if (st->den_rate <= 16*(st->oversample+8))
-#else
    /* Choose the resampling type that requires the least amount of memory */
-   if (st->den_rate <= (st->oversample+8))
+#ifdef RESAMPLE_FULL_SINC_TABLE
+   use_direct = 1;
+   if (INT_MAX/sizeof(spx_word16_t)/st->den_rate < st->filt_len)
+      goto fail;
+#else
+   use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
+                && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
 #endif
+   if (use_direct)
    {
+      min_sinc_table_length = st->filt_len*st->den_rate;
+   } else {
+      if ((INT_MAX/sizeof(spx_word16_t)-8)/st->oversample < st->filt_len)
+         goto fail;
+
+      min_sinc_table_length = st->filt_len*st->oversample+8;
+   }
+   if (st->sinc_table_length < min_sinc_table_length)
+   {
+      spx_word16_t *sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,min_sinc_table_length*sizeof(spx_word16_t));
+      if (!sinc_table)
+         goto fail;
+
+      st->sinc_table = sinc_table;
+      st->sinc_table_length = min_sinc_table_length;
+   }
+   if (use_direct)
+   {
       spx_uint32_t i;
-      if (!st->sinc_table)
-         st->sinc_table = (spx_word16_t *)speex_alloc(st->filt_len*st->den_rate*sizeof(spx_word16_t));
-      else if (st->sinc_table_length < st->filt_len*st->den_rate)
-      {
-         st->sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,st->filt_len*st->den_rate*sizeof(spx_word16_t));
-         st->sinc_table_length = st->filt_len*st->den_rate;
-      }
       for (i=0;i<st->den_rate;i++)
       {
          spx_int32_t j;
@@ -620,13 +689,6 @@
       /*fprintf (stderr, "resampler uses direct sinc table and normalised cutoff %f\n", cutoff);*/
    } else {
       spx_int32_t i;
-      if (!st->sinc_table)
-         st->sinc_table = (spx_word16_t *)speex_alloc((st->filt_len*st->oversample+8)*sizeof(spx_word16_t));
-      else if (st->sinc_table_length < st->filt_len*st->oversample+8)
-      {
-         st->sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,(st->filt_len*st->oversample+8)*sizeof(spx_word16_t));
-         st->sinc_table_length = st->filt_len*st->oversample+8;
-      }
       for (i=-4;i<(spx_int32_t)(st->oversample*st->filt_len+4);i++)
          st->sinc_table[i+4] = sinc(st->cutoff,(i/(float)st->oversample - st->filt_len/2), st->filt_len, quality_map[st->quality].window_func);
 #ifdef FIXED_POINT
@@ -639,51 +701,47 @@
 #endif
       /*fprintf (stderr, "resampler uses interpolated sinc table and normalised cutoff %f\n", cutoff);*/
    }
-   st->int_advance = st->num_rate/st->den_rate;
-   st->frac_advance = st->num_rate%st->den_rate;
 
-   
    /* Here's the place where we update the filter memory to take into account
       the change in filter length. It's probably the messiest part of the code
       due to handling of lots of corner cases. */
-   if (!st->mem)
+
+   /* Adding buffer_size to filt_len won't overflow here because filt_len
+      could be multiplied by sizeof(spx_word16_t) above. */
+   min_alloc_size = st->filt_len-1 + st->buffer_size;
+   if (min_alloc_size > st->mem_alloc_size)
    {
-      spx_uint32_t i;
-      st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
-      st->mem = (spx_word16_t*)speex_alloc(st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
-      for (i=0;i<st->nb_channels*st->mem_alloc_size;i++)
-         st->mem[i] = 0;
-      /*speex_warning("init filter");*/
-   } else if (!st->started)
+      spx_word16_t *mem;
+      if (INT_MAX/sizeof(spx_word16_t)/st->nb_channels < min_alloc_size)
+          goto fail;
+      else if (!(mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*min_alloc_size * sizeof(*mem))))
+          goto fail;
+
+      st->mem = mem;
+      st->mem_alloc_size = min_alloc_size;
+   }
+   if (!st->started)
    {
       spx_uint32_t i;
-      st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
-      st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
       for (i=0;i<st->nb_channels*st->mem_alloc_size;i++)
          st->mem[i] = 0;
       /*speex_warning("reinit filter");*/
    } else if (st->filt_len > old_length)
    {
-      spx_int32_t i;
+      spx_uint32_t i;
       /* Increase the filter length */
       /*speex_warning("increase filter size");*/
-      int old_alloc_size = st->mem_alloc_size;
-      if ((st->filt_len-1 + st->buffer_size) > st->mem_alloc_size)
+      for (i=st->nb_channels;i--;)
       {
-         st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
-         st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
-      }
-      for (i=st->nb_channels-1;i>=0;i--)
-      {
-         spx_int32_t j;
+         spx_uint32_t j;
          spx_uint32_t olen = old_length;
          /*if (st->magic_samples[i])*/
          {
             /* Try and remove the magic samples as if nothing had happened */
-            
+
             /* FIXME: This is wrong but for now we need it to avoid going over the array bounds */
             olen = old_length + 2*st->magic_samples[i];
-            for (j=old_length-2+st->magic_samples[i];j>=0;j--)
+            for (j=old_length-1+st->magic_samples[i];j--;)
                st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]] = st->mem[i*old_alloc_size+j];
             for (j=0;j<st->magic_samples[i];j++)
                st->mem[i*st->mem_alloc_size+j] = 0;
@@ -724,18 +782,28 @@
          st->magic_samples[i] += old_magic;
       }
    }
+   return RESAMPLER_ERR_SUCCESS;
 
+fail:
+   st->resampler_ptr = resampler_basic_zero;
+   /* st->mem may still contain consumed input samples for the filter.
+      Restore filt_len so that filt_len - 1 still points to the position after
+      the last of these samples. */
+   st->filt_len = old_length;
+   return RESAMPLER_ERR_ALLOC_FAILED;
 }
 
-SPX_RESAMPLE_EXPORT SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err)
+EXPORT SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err)
 {
    return speex_resampler_init_frac(nb_channels, in_rate, out_rate, in_rate, out_rate, quality, err);
 }
 
-SPX_RESAMPLE_EXPORT SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err)
+EXPORT SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err)
 {
    spx_uint32_t i;
    SpeexResamplerState *st;
+   int filter_err;
+
    if (quality > 10 || quality < 0)
    {
       if (err)
@@ -743,6 +811,12 @@
       return NULL;
    }
    st = (SpeexResamplerState *)speex_alloc(sizeof(SpeexResamplerState));
+   if (!st)
+   {
+      if (err)
+         *err = RESAMPLER_ERR_ALLOC_FAILED;
+      return NULL;
+   }
    st->initialised = 0;
    st->started = 0;
    st->in_rate = 0;
@@ -755,43 +829,46 @@
    st->filt_len = 0;
    st->mem = 0;
    st->resampler_ptr = 0;
-         
+
    st->cutoff = 1.f;
    st->nb_channels = nb_channels;
    st->in_stride = 1;
    st->out_stride = 1;
-   
-#ifdef FIXED_POINT
+
    st->buffer_size = 160;
-#else
-   st->buffer_size = 160;
-#endif
-   
+
    /* Per channel data */
-   st->last_sample = (spx_int32_t*)speex_alloc(nb_channels*sizeof(spx_int32_t));
-   st->magic_samples = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t));
-   st->samp_frac_num = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t));
-   for (i=0;i<nb_channels;i++)
-   {
-      st->last_sample[i] = 0;
-      st->magic_samples[i] = 0;
-      st->samp_frac_num[i] = 0;
-   }
+   if (!(st->last_sample = (spx_int32_t*)speex_alloc(nb_channels*sizeof(spx_int32_t))))
+      goto fail;
+   if (!(st->magic_samples = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t))))
+      goto fail;
+   if (!(st->samp_frac_num = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t))))
+      goto fail;
 
    speex_resampler_set_quality(st, quality);
    speex_resampler_set_rate_frac(st, ratio_num, ratio_den, in_rate, out_rate);
 
-   
-   update_filter(st);
-   
-   st->initialised = 1;
+   filter_err = update_filter(st);
+   if (filter_err == RESAMPLER_ERR_SUCCESS)
+   {
+      st->initialised = 1;
+   } else {
+      speex_resampler_destroy(st);
+      st = NULL;
+   }
    if (err)
-      *err = RESAMPLER_ERR_SUCCESS;
+      *err = filter_err;
 
    return st;
+
+fail:
+   if (err)
+      *err = RESAMPLER_ERR_ALLOC_FAILED;
+   speex_resampler_destroy(st);
+   return NULL;
 }
 
-SPX_RESAMPLE_EXPORT void speex_resampler_destroy(SpeexResamplerState *st)
+EXPORT void speex_resampler_destroy(SpeexResamplerState *st)
 {
    speex_free(st->mem);
    speex_free(st->sinc_table);
@@ -808,17 +885,17 @@
    int out_sample = 0;
    spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
    spx_uint32_t ilen;
-   
+
    st->started = 1;
-   
+
    /* Call the right resampler through the function ptr */
    out_sample = st->resampler_ptr(st, channel_index, mem, in_len, out, out_len);
-   
+
    if (st->last_sample[channel_index] < (spx_int32_t)*in_len)
       *in_len = st->last_sample[channel_index];
    *out_len = out_sample;
    st->last_sample[channel_index] -= *in_len;
-   
+
    ilen = *in_len;
 
    for(j=0;j<N-1;++j)
@@ -831,11 +908,11 @@
    spx_uint32_t tmp_in_len = st->magic_samples[channel_index];
    spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
    const int N = st->filt_len;
-   
+
    speex_resampler_process_native(st, channel_index, &tmp_in_len, *out, &out_len);
 
    st->magic_samples[channel_index] -= tmp_in_len;
-   
+
    /* If we couldn't process all "magic" input samples, save the rest for next time */
    if (st->magic_samples[channel_index])
    {
@@ -848,9 +925,9 @@
 }
 
 #ifdef FIXED_POINT
-SPX_RESAMPLE_EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
+EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
 #else
-SPX_RESAMPLE_EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
+EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
 #endif
 {
    int j;
@@ -861,13 +938,13 @@
    const spx_uint32_t xlen = st->mem_alloc_size - filt_offs;
    const int istride = st->in_stride;
 
-   if (st->magic_samples[channel_index]) 
+   if (st->magic_samples[channel_index])
       olen -= speex_resampler_magic(st, channel_index, &out, olen);
    if (! st->magic_samples[channel_index]) {
       while (ilen && olen) {
         spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
         spx_uint32_t ochunk = olen;
- 
+
         if (in) {
            for(j=0;j<ichunk;++j)
               x[j+filt_offs]=in[j*istride];
@@ -885,13 +962,13 @@
    }
    *in_len -= ilen;
    *out_len -= olen;
-   return RESAMPLER_ERR_SUCCESS;
+   return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
 }
 
 #ifdef FIXED_POINT
-SPX_RESAMPLE_EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
+EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
 #else
-SPX_RESAMPLE_EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
+EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
 #endif
 {
    int j;
@@ -911,7 +988,7 @@
 #endif
 
    st->out_stride = 1;
-   
+
    while (ilen && olen) {
      spx_word16_t *y = ystack;
      spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
@@ -948,7 +1025,7 @@
 #else
         out[j*ostride_save] = WORD2INT(ystack[j]);
 #endif
-     
+
      ilen -= ichunk;
      olen -= ochunk;
      out += (ochunk+omagic) * ostride_save;
@@ -959,10 +1036,10 @@
    *in_len -= ilen;
    *out_len -= olen;
 
-   return RESAMPLER_ERR_SUCCESS;
+   return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
 }
 
-SPX_RESAMPLE_EXPORT int speex_resampler_process_interleaved_float(SpeexResamplerState *st, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
+EXPORT int speex_resampler_process_interleaved_float(SpeexResamplerState *st, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
 {
    spx_uint32_t i;
    int istride_save, ostride_save;
@@ -982,10 +1059,10 @@
    }
    st->in_stride = istride_save;
    st->out_stride = ostride_save;
-   return RESAMPLER_ERR_SUCCESS;
+   return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
 }
-               
-SPX_RESAMPLE_EXPORT int speex_resampler_process_interleaved_int(SpeexResamplerState *st, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
+
+EXPORT int speex_resampler_process_interleaved_int(SpeexResamplerState *st, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
 {
    spx_uint32_t i;
    int istride_save, ostride_save;
@@ -1005,66 +1082,75 @@
    }
    st->in_stride = istride_save;
    st->out_stride = ostride_save;
-   return RESAMPLER_ERR_SUCCESS;
+   return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
 }
 
-SPX_RESAMPLE_EXPORT int speex_resampler_set_rate(SpeexResamplerState *st, spx_uint32_t in_rate, spx_uint32_t out_rate)
+EXPORT int speex_resampler_set_rate(SpeexResamplerState *st, spx_uint32_t in_rate, spx_uint32_t out_rate)
 {
    return speex_resampler_set_rate_frac(st, in_rate, out_rate, in_rate, out_rate);
 }
 
-SPX_RESAMPLE_EXPORT void speex_resampler_get_rate(SpeexResamplerState *st, spx_uint32_t *in_rate, spx_uint32_t *out_rate)
+EXPORT void speex_resampler_get_rate(SpeexResamplerState *st, spx_uint32_t *in_rate, spx_uint32_t *out_rate)
 {
    *in_rate = st->in_rate;
    *out_rate = st->out_rate;
 }
 
-SPX_RESAMPLE_EXPORT int speex_resampler_set_rate_frac(SpeexResamplerState *st, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate)
+static inline spx_uint32_t _gcd(spx_uint32_t a, spx_uint32_t b)
 {
+   while (b != 0)
+   {
+      spx_uint32_t temp = a;
+
+      a = b;
+      b = temp % b;
+   }
+   return a;
+}
+
+EXPORT int speex_resampler_set_rate_frac(SpeexResamplerState *st, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate)
+{
    spx_uint32_t fact;
    spx_uint32_t old_den;
    spx_uint32_t i;
    if (st->in_rate == in_rate && st->out_rate == out_rate && st->num_rate == ratio_num && st->den_rate == ratio_den)
       return RESAMPLER_ERR_SUCCESS;
-   
+
    old_den = st->den_rate;
    st->in_rate = in_rate;
    st->out_rate = out_rate;
    st->num_rate = ratio_num;
    st->den_rate = ratio_den;
-   /* FIXME: This is terribly inefficient, but who cares (at least for now)? */
-   for (fact=2;fact<=IMIN(st->num_rate, st->den_rate);fact++)
-   {
-      while ((st->num_rate % fact == 0) && (st->den_rate % fact == 0))
-      {
-         st->num_rate /= fact;
-         st->den_rate /= fact;
-      }
-   }
-      
+
+   fact = _gcd (st->num_rate, st->den_rate);
+
+   st->num_rate /= fact;
+   st->den_rate /= fact;
+
    if (old_den > 0)
    {
       for (i=0;i<st->nb_channels;i++)
       {
-         st->samp_frac_num[i]=st->samp_frac_num[i]*st->den_rate/old_den;
+         if (_muldiv(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS)
+            return RESAMPLER_ERR_OVERFLOW;
          /* Safety net */
          if (st->samp_frac_num[i] >= st->den_rate)
             st->samp_frac_num[i] = st->den_rate-1;
       }
    }
-   
+
    if (st->initialised)
-      update_filter(st);
+      return update_filter(st);
    return RESAMPLER_ERR_SUCCESS;
 }
 
-SPX_RESAMPLE_EXPORT void speex_resampler_get_ratio(SpeexResamplerState *st, spx_uint32_t *ratio_num, spx_uint32_t *ratio_den)
+EXPORT void speex_resampler_get_ratio(SpeexResamplerState *st, spx_uint32_t *ratio_num, spx_uint32_t *ratio_den)
 {
    *ratio_num = st->num_rate;
    *ratio_den = st->den_rate;
 }
 
-SPX_RESAMPLE_EXPORT int speex_resampler_set_quality(SpeexResamplerState *st, int quality)
+EXPORT int speex_resampler_set_quality(SpeexResamplerState *st, int quality)
 {
    if (quality > 10 || quality < 0)
       return RESAMPLER_ERR_INVALID_ARG;
@@ -1072,46 +1158,46 @@
       return RESAMPLER_ERR_SUCCESS;
    st->quality = quality;
    if (st->initialised)
-      update_filter(st);
+      return update_filter(st);
    return RESAMPLER_ERR_SUCCESS;
 }
 
-SPX_RESAMPLE_EXPORT void speex_resampler_get_quality(SpeexResamplerState *st, int *quality)
+EXPORT void speex_resampler_get_quality(SpeexResamplerState *st, int *quality)
 {
    *quality = st->quality;
 }
 
-SPX_RESAMPLE_EXPORT void speex_resampler_set_input_stride(SpeexResamplerState *st, spx_uint32_t stride)
+EXPORT void speex_resampler_set_input_stride(SpeexResamplerState *st, spx_uint32_t stride)
 {
    st->in_stride = stride;
 }
 
-SPX_RESAMPLE_EXPORT void speex_resampler_get_input_stride(SpeexResamplerState *st, spx_uint32_t *stride)
+EXPORT void speex_resampler_get_input_stride(SpeexResamplerState *st, spx_uint32_t *stride)
 {
    *stride = st->in_stride;
 }
 
-SPX_RESAMPLE_EXPORT void speex_resampler_set_output_stride(SpeexResamplerState *st, spx_uint32_t stride)
+EXPORT void speex_resampler_set_output_stride(SpeexResamplerState *st, spx_uint32_t stride)
 {
    st->out_stride = stride;
 }
 
-SPX_RESAMPLE_EXPORT void speex_resampler_get_output_stride(SpeexResamplerState *st, spx_uint32_t *stride)
+EXPORT void speex_resampler_get_output_stride(SpeexResamplerState *st, spx_uint32_t *stride)
 {
    *stride = st->out_stride;
 }
 
-SPX_RESAMPLE_EXPORT int speex_resampler_get_input_latency(SpeexResamplerState *st)
+EXPORT int speex_resampler_get_input_latency(SpeexResamplerState *st)
 {
   return st->filt_len / 2;
 }
 
-SPX_RESAMPLE_EXPORT int speex_resampler_get_output_latency(SpeexResamplerState *st)
+EXPORT int speex_resampler_get_output_latency(SpeexResamplerState *st)
 {
   return ((st->filt_len / 2) * st->den_rate + (st->num_rate >> 1)) / st->num_rate;
 }
 
-SPX_RESAMPLE_EXPORT int speex_resampler_skip_zeros(SpeexResamplerState *st)
+EXPORT int speex_resampler_skip_zeros(SpeexResamplerState *st)
 {
    spx_uint32_t i;
    for (i=0;i<st->nb_channels;i++)
@@ -1119,7 +1205,7 @@
    return RESAMPLER_ERR_SUCCESS;
 }
 
-SPX_RESAMPLE_EXPORT int speex_resampler_reset_mem(SpeexResamplerState *st)
+EXPORT int speex_resampler_reset_mem(SpeexResamplerState *st)
 {
    spx_uint32_t i;
    for (i=0;i<st->nb_channels;i++)
@@ -1133,7 +1219,7 @@
    return RESAMPLER_ERR_SUCCESS;
 }
 
-SPX_RESAMPLE_EXPORT const char *speex_resampler_strerror(int err)
+EXPORT const char *speex_resampler_strerror(int err)
 {
    switch (err)
    {
--- a/src/resample_sse.h
+++ b/src/resample_sse.h
@@ -9,18 +9,18 @@
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions
    are met:
-   
+
    - Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
-   
+
    - Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.
-   
+
    - Neither the name of the Xiph.org Foundation nor the names of its
    contributors may be used to endorse or promote products derived from
    this software without specific prior written permission.
-   
+
    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -41,23 +41,15 @@
 {
    int i;
    float ret;
-   if (1)
+   __m128 sum = _mm_setzero_ps();
+   for (i=0;i<len;i+=8)
    {
-      __m128 sum = _mm_setzero_ps();
-      for (i=0;i<len;i+=8)
-      {
-         sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)));
-         sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4)));
-      }
-      sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
-      sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
-      _mm_store_ss(&ret, sum);
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)));
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4)));
    }
-   else
-   {
-      ret = 0;
-      for (i=0;i<len;i++) ret += a[i] * b[i];
-   }
+   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+   _mm_store_ss(&ret, sum);
    return ret;
 }
 
@@ -65,37 +57,21 @@
 static inline float interpolate_product_single(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
   int i;
   float ret;
-  if (1)
+  __m128 sum = _mm_setzero_ps();
+  __m128 f = _mm_loadu_ps(frac);
+  for(i=0;i<len;i+=2)
   {
-    __m128 sum = _mm_setzero_ps();
-    __m128 f = _mm_loadu_ps(frac);
-    for(i=0;i<len;i+=2)
-    {
-      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample)));
-      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample)));
-    }
-    sum = _mm_mul_ps(f, sum);
-    sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
-    sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
-    _mm_store_ss(&ret, sum);
+    sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample)));
+    sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample)));
   }
-  else
-  {
-    float accum[4] = {0,0,0,0};
-    for(i=0;i<len;i++)
-    {
-      const float curr_in=a[i];
-      accum[0] += curr_in * b[i * oversample + 0];
-      accum[1] += curr_in * b[i * oversample + 1];
-      accum[2] += curr_in * b[i * oversample + 2];
-      accum[3] += curr_in * b[i * oversample + 3];
-    }
-    ret = accum[0] * frac[0] + accum[1] * frac[1] + accum[2] * frac[2] + accum[3] * frac[3];
-  }
-  return ret;
+   sum = _mm_mul_ps(f, sum);
+   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+   _mm_store_ss(&ret, sum);
+   return ret;
 }
 
-#ifdef __SSE2__
+#ifdef _USE_SSE2
 #include <emmintrin.h>
 #define OVERRIDE_INNER_PRODUCT_DOUBLE
 
@@ -115,7 +91,7 @@
       sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
       sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
    }
-   sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum));
+   sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
    _mm_store_sd(&ret, sum);
    return ret;
 }
@@ -144,7 +120,7 @@
   sum1 = _mm_mul_pd(f1, sum1);
   sum2 = _mm_mul_pd(f2, sum2);
   sum = _mm_add_pd(sum1, sum2);
-  sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum));
+  sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
   _mm_store_sd(&ret, sum);
   return ret;
 }
--- a/src/speex_resampler.h
+++ b/src/speex_resampler.h
@@ -1,8 +1,8 @@
 /* Copyright (C) 2007 Jean-Marc Valin
-      
+
    File: speex_resampler.h
    Resampling code
-      
+
    The design goals of this code are:
       - Very fast algorithm
       - Low memory requirement
@@ -43,7 +43,7 @@
 
 /********* WARNING: MENTAL SANITY ENDS HERE *************/
 
-/* If the resampler is defined outside of Speex, we change the symbol names so that 
+/* If the resampler is defined outside of Speex, we change the symbol names so that
    there won't be any clash if linking with Speex later on. */
 
 /* #define RANDOM_PREFIX your software name here */
@@ -53,7 +53,7 @@
 
 #define CAT_PREFIX2(a,b) a ## b
 #define CAT_PREFIX(a,b) CAT_PREFIX2(a, b)
-      
+
 #define speex_resampler_init CAT_PREFIX(RANDOM_PREFIX,_resampler_init)
 #define speex_resampler_init_frac CAT_PREFIX(RANDOM_PREFIX,_resampler_init_frac)
 #define speex_resampler_destroy CAT_PREFIX(RANDOM_PREFIX,_resampler_destroy)
@@ -81,14 +81,10 @@
 #define spx_int32_t int
 #define spx_uint16_t unsigned short
 #define spx_uint32_t unsigned int
-      
+
 #else /* OUTSIDE_SPEEX */
 
-#ifdef _BUILD_SPEEX
-# include "speex_types.h"
-#else
-# include <speex/speex_types.h>
-#endif
+#include "speexdsp_types.h"
 
 #endif /* OUTSIDE_SPEEX */
 
@@ -108,7 +104,8 @@
    RESAMPLER_ERR_BAD_STATE       = 2,
    RESAMPLER_ERR_INVALID_ARG     = 3,
    RESAMPLER_ERR_PTR_OVERLAP     = 4,
-   
+   RESAMPLER_ERR_OVERFLOW        = 5,
+
    RESAMPLER_ERR_MAX_ERROR
 };
 
@@ -124,14 +121,14 @@
  * @return Newly created resampler state
  * @retval NULL Error: not enough memory
  */
-SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, 
-                                          spx_uint32_t in_rate, 
-                                          spx_uint32_t out_rate, 
+SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels,
+                                          spx_uint32_t in_rate,
+                                          spx_uint32_t out_rate,
                                           int quality,
                                           int *err);
 
-/** Create a new resampler with fractional input/output rates. The sampling 
- * rate ratio is an arbitrary rational number with both the numerator and 
+/** Create a new resampler with fractional input/output rates. The sampling
+ * rate ratio is an arbitrary rational number with both the numerator and
  * denominator being 32-bit integers.
  * @param nb_channels Number of channels to be processed
  * @param ratio_num Numerator of the sampling rate ratio
@@ -143,11 +140,11 @@
  * @return Newly created resampler state
  * @retval NULL Error: not enough memory
  */
-SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels, 
-                                               spx_uint32_t ratio_num, 
-                                               spx_uint32_t ratio_den, 
-                                               spx_uint32_t in_rate, 
-                                               spx_uint32_t out_rate, 
+SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels,
+                                               spx_uint32_t ratio_num,
+                                               spx_uint32_t ratio_den,
+                                               spx_uint32_t in_rate,
+                                               spx_uint32_t out_rate,
                                                int quality,
                                                int *err);
 
@@ -158,24 +155,24 @@
 
 /** Resample a float array. The input and output buffers must *not* overlap.
  * @param st Resampler state
- * @param channel_index Index of the channel to process for the multi-channel 
+ * @param channel_index Index of the channel to process for the multi-channel
  * base (0 otherwise)
  * @param in Input buffer
- * @param in_len Number of input samples in the input buffer. Returns the 
+ * @param in_len Number of input samples in the input buffer. Returns the
  * number of samples processed
  * @param out Output buffer
  * @param out_len Size of the output buffer. Returns the number of samples written
  */
-int speex_resampler_process_float(SpeexResamplerState *st, 
-                                   spx_uint32_t channel_index, 
-                                   const float *in, 
-                                   spx_uint32_t *in_len, 
-                                   float *out, 
+int speex_resampler_process_float(SpeexResamplerState *st,
+                                   spx_uint32_t channel_index,
+                                   const float *in,
+                                   spx_uint32_t *in_len,
+                                   float *out,
                                    spx_uint32_t *out_len);
 
 /** Resample an int array. The input and output buffers must *not* overlap.
  * @param st Resampler state
- * @param channel_index Index of the channel to process for the multi-channel 
+ * @param channel_index Index of the channel to process for the multi-channel
  * base (0 otherwise)
  * @param in Input buffer
  * @param in_len Number of input samples in the input buffer. Returns the number
@@ -183,11 +180,11 @@
  * @param out Output buffer
  * @param out_len Size of the output buffer. Returns the number of samples written
  */
-int speex_resampler_process_int(SpeexResamplerState *st, 
-                                 spx_uint32_t channel_index, 
-                                 const spx_int16_t *in, 
-                                 spx_uint32_t *in_len, 
-                                 spx_int16_t *out, 
+int speex_resampler_process_int(SpeexResamplerState *st,
+                                 spx_uint32_t channel_index,
+                                 const spx_int16_t *in,
+                                 spx_uint32_t *in_len,
+                                 spx_int16_t *out,
                                  spx_uint32_t *out_len);
 
 /** Resample an interleaved float array. The input and output buffers must *not* overlap.
@@ -199,10 +196,10 @@
  * @param out_len Size of the output buffer. Returns the number of samples written.
  * This is all per-channel.
  */
-int speex_resampler_process_interleaved_float(SpeexResamplerState *st, 
-                                               const float *in, 
-                                               spx_uint32_t *in_len, 
-                                               float *out, 
+int speex_resampler_process_interleaved_float(SpeexResamplerState *st,
+                                               const float *in,
+                                               spx_uint32_t *in_len,
+                                               float *out,
                                                spx_uint32_t *out_len);
 
 /** Resample an interleaved int array. The input and output buffers must *not* overlap.
@@ -214,10 +211,10 @@
  * @param out_len Size of the output buffer. Returns the number of samples written.
  * This is all per-channel.
  */
-int speex_resampler_process_interleaved_int(SpeexResamplerState *st, 
-                                             const spx_int16_t *in, 
-                                             spx_uint32_t *in_len, 
-                                             spx_int16_t *out, 
+int speex_resampler_process_interleaved_int(SpeexResamplerState *st,
+                                             const spx_int16_t *in,
+                                             spx_uint32_t *in_len,
+                                             spx_int16_t *out,
                                              spx_uint32_t *out_len);
 
 /** Set (change) the input/output sampling rates (integer value).
@@ -225,8 +222,8 @@
  * @param in_rate Input sampling rate (integer number of Hz).
  * @param out_rate Output sampling rate (integer number of Hz).
  */
-int speex_resampler_set_rate(SpeexResamplerState *st, 
-                              spx_uint32_t in_rate, 
+int speex_resampler_set_rate(SpeexResamplerState *st,
+                              spx_uint32_t in_rate,
                               spx_uint32_t out_rate);
 
 /** Get the current input/output sampling rates (integer value).
@@ -234,11 +231,11 @@
  * @param in_rate Input sampling rate (integer number of Hz) copied.
  * @param out_rate Output sampling rate (integer number of Hz) copied.
  */
-void speex_resampler_get_rate(SpeexResamplerState *st, 
-                              spx_uint32_t *in_rate, 
+void speex_resampler_get_rate(SpeexResamplerState *st,
+                              spx_uint32_t *in_rate,
                               spx_uint32_t *out_rate);
 
-/** Set (change) the input/output sampling rates and resampling ratio 
+/** Set (change) the input/output sampling rates and resampling ratio
  * (fractional values in Hz supported).
  * @param st Resampler state
  * @param ratio_num Numerator of the sampling rate ratio
@@ -246,10 +243,10 @@
  * @param in_rate Input sampling rate rounded to the nearest integer (in Hz).
  * @param out_rate Output sampling rate rounded to the nearest integer (in Hz).
  */
-int speex_resampler_set_rate_frac(SpeexResamplerState *st, 
-                                   spx_uint32_t ratio_num, 
-                                   spx_uint32_t ratio_den, 
-                                   spx_uint32_t in_rate, 
+int speex_resampler_set_rate_frac(SpeexResamplerState *st,
+                                   spx_uint32_t ratio_num,
+                                   spx_uint32_t ratio_den,
+                                   spx_uint32_t in_rate,
                                    spx_uint32_t out_rate);
 
 /** Get the current resampling ratio. This will be reduced to the least
@@ -258,24 +255,24 @@
  * @param ratio_num Numerator of the sampling rate ratio copied
  * @param ratio_den Denominator of the sampling rate ratio copied
  */
-void speex_resampler_get_ratio(SpeexResamplerState *st, 
-                               spx_uint32_t *ratio_num, 
+void speex_resampler_get_ratio(SpeexResamplerState *st,
+                               spx_uint32_t *ratio_num,
                                spx_uint32_t *ratio_den);
 
 /** Set (change) the conversion quality.
  * @param st Resampler state
- * @param quality Resampling quality between 0 and 10, where 0 has poor 
+ * @param quality Resampling quality between 0 and 10, where 0 has poor
  * quality and 10 has very high quality.
  */
-int speex_resampler_set_quality(SpeexResamplerState *st, 
+int speex_resampler_set_quality(SpeexResamplerState *st,
                                  int quality);
 
 /** Get the conversion quality.
  * @param st Resampler state
- * @param quality Resampling quality between 0 and 10, where 0 has poor 
+ * @param quality Resampling quality between 0 and 10, where 0 has poor
  * quality and 10 has very high quality.
  */
-void speex_resampler_get_quality(SpeexResamplerState *st, 
+void speex_resampler_get_quality(SpeexResamplerState *st,
                                  int *quality);
 
 /** Set (change) the input stride.
@@ -282,7 +279,7 @@
  * @param st Resampler state
  * @param stride Input stride
  */
-void speex_resampler_set_input_stride(SpeexResamplerState *st, 
+void speex_resampler_set_input_stride(SpeexResamplerState *st,
                                       spx_uint32_t stride);
 
 /** Get the input stride.
@@ -289,7 +286,7 @@
  * @param st Resampler state
  * @param stride Input stride copied
  */
-void speex_resampler_get_input_stride(SpeexResamplerState *st, 
+void speex_resampler_get_input_stride(SpeexResamplerState *st,
                                       spx_uint32_t *stride);
 
 /** Set (change) the output stride.
@@ -296,7 +293,7 @@
  * @param st Resampler state
  * @param stride Output stride
  */
-void speex_resampler_set_output_stride(SpeexResamplerState *st, 
+void speex_resampler_set_output_stride(SpeexResamplerState *st,
                                       spx_uint32_t stride);
 
 /** Get the output stride.
@@ -303,7 +300,7 @@
  * @param st Resampler state copied
  * @param stride Output stride
  */
-void speex_resampler_get_output_stride(SpeexResamplerState *st, 
+void speex_resampler_get_output_stride(SpeexResamplerState *st,
                                       spx_uint32_t *stride);
 
 /** Get the latency introduced by the resampler measured in input samples.
@@ -316,8 +313,8 @@
  */
 int speex_resampler_get_output_latency(SpeexResamplerState *st);
 
-/** Make sure that the first samples to go out of the resamplers don't have 
- * leading zeros. This is only useful before starting to use a newly created 
+/** Make sure that the first samples to go out of the resamplers don't have
+ * leading zeros. This is only useful before starting to use a newly created
  * resampler. It is recommended to use that when resampling an audio file, as
  * it will generate a file with the same length. For real-time processing,
  * it is probably easier not to use this call (so that the output duration
--- a/src/stack_alloc.h
+++ b/src/stack_alloc.h
@@ -35,16 +35,15 @@
 #ifndef STACK_ALLOC_H
 #define STACK_ALLOC_H
 
-#ifdef WIN32
-# include <malloc.h>
-# ifndef alloca
-#   define alloca(_x) _alloca(_x);
-# endif
-#else
-#ifdef HAVE_ALLOCA_H
-#  include <alloca.h>
+#ifdef USE_ALLOCA
+# ifdef WIN32
+#  include <malloc.h>
 # else
-#  include <stdlib.h>
+#  ifdef HAVE_ALLOCA_H
+#   include <alloca.h>
+#  else
+#   include <stdlib.h>
+#  endif
 # endif
 #endif