shithub: opus

Download patch

ref: a4854afac86a3481374e5956813e2cdd797649d4
parent: 4141c4d5293b97fb44062d36718657ac76ba7189
author: Jean-Marc Valin <jeanmarcv@google.com>
date: Sat Jun 8 17:46:22 EDT 2024

Preserving 24-bit accuracy for fixed-point decoder

Convert to 16 bits only at the very end

--- a/celt/arch.h
+++ b/celt/arch.h
@@ -106,6 +106,14 @@
 #define UADD32(a,b) ((a)+(b))
 #define USUB32(a,b) ((a)-(b))
 
+/* Throughout the code, we use the following scaling for signals:
+   FLOAT: used for float API, normalized to +/-1.
+   INT16: used for 16-bit API, normalized to +/- 32768
+   RES: internal Opus resolution, defined as +/-1. in float builds, or either 16-bit or 24-bit int for fixed-point builds
+   SIG: internal CELT resolution: defined as +/- 32768. in float builds, or Q27 in fixed-point builds (int16 shifted by 12)
+*/
+
+
 /* Set this if opus_int64 is a native type of the CPU. */
 /* Assume that all LP64 architectures have fast 64-bit types; also x86_64
    (which can be ILP32 for x32) and Win64 (which is LLP64). */
@@ -127,6 +135,26 @@
 typedef opus_val16 celt_norm;
 typedef opus_val32 celt_ener;
 
+#ifdef ENABLE_RES24
+typedef opus_val32 opus_res;
+#define RES_SHIFT 8
+#define SCALEIN(a)      (a)
+#define SIG2RES(a)      PSHR32(a, SIG_SHIFT-RES_SHIFT)
+#define RES2INT16(a)    SAT16(PSHR32(a, RES_SHIFT))
+#define RES2FLOAT(a)    ((1.f/32768.f/256.)*(a))
+#define INT16TORES(a)   SHL32(EXTEND32(a), RES_SHIFT)
+#define ADD_RES(a, b)   ADD32(a, b)
+#else
+typedef opus_val16 opus_res;
+#define RES_SHIFT 0
+#define SCALEIN(a)      (a)
+#define SIG2RES(a)      SIG2WORD16(a)
+#define RES2INT16(a)    (a)
+#define RES2FLOAT(a)    ((1.f/32768.f)*(a))
+#define INT16TORES(a)   (a)
+#define ADD_RES(a, b)   SAT16(ADD32((a), (b)));
+#endif
+
 #define celt_isnan(x) 0
 
 #define Q15ONE 32767
@@ -150,8 +178,6 @@
 #define VERY_LARGE16 ((opus_val16)32767)
 #define Q15_ONE ((opus_val16)32767)
 
-#define SCALEIN(a)      (a)
-#define SCALEOUT(a)     (a)
 
 #define ABS16(x) ((x) < 0 ? (-(x)) : (x))
 #define ABS32(x) ((x) < 0 ? (-(x)) : (x))
@@ -192,6 +218,8 @@
 typedef float celt_norm;
 typedef float celt_ener;
 
+typedef float opus_res;
+
 #ifdef FLOAT_APPROX
 /* This code should reliably detect NaN/inf even when -ffast-math is used.
    Assumes IEEE 754 format. */
@@ -279,9 +307,12 @@
 #define DIV32(a,b)     (((opus_val32)(a))/(opus_val32)(b))
 
 #define SCALEIN(a)      ((a)*CELT_SIG_SCALE)
-#define SCALEOUT(a)     ((a)*(1/CELT_SIG_SCALE))
 
-#define SIG2WORD16(x) (x)
+#define SIG2RES(a)      ((1/CELT_SIG_SCALE)*(a))
+#define RES2INT16(a)    FLOAT2INT16(a)
+#define RES2FLOAT(a)    (a)
+#define INT16TORES(a)   ((a)*(1/CELT_SIG_SCALE))
+#define ADD_RES(a, b)   ADD32(a, b)
 
 #endif /* !FIXED_POINT */
 
--- a/celt/celt.h
+++ b/celt/celt.h
@@ -154,7 +154,7 @@
 int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels);
 
 int celt_decode_with_ec_dred(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
-      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
+      int len, opus_res * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
 #ifdef ENABLE_DEEP_PLC
       ,LPCNetPLCState *lpcnet
 #endif
@@ -161,7 +161,7 @@
       );
 
 int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data,
-      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum);
+      int len, opus_res * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum);
 
 #define celt_encoder_ctl opus_custom_encoder_ctl
 #define celt_decoder_ctl opus_custom_decoder_ctl
@@ -239,7 +239,7 @@
 void init_caps(const CELTMode *m,int *cap,int LM,int C);
 
 #ifdef RESYNTH
-void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, int accum);
+void deemphasis(celt_sig *in[], opus_res *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, int accum);
 void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
       opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient,
       int LM, int downsample, int silence, int arch);
--- a/celt/celt_decoder.c
+++ b/celt/celt_decoder.c
@@ -246,7 +246,7 @@
 /* Special case for stereo with no downsampling and no accumulation. This is
    quite common and we can make it faster by processing both channels in the
    same loop, reducing overhead due to the dependency loop in the IIR filter. */
-static void deemphasis_stereo_simple(celt_sig *in[], opus_val16 *pcm, int N, const opus_val16 coef0,
+static void deemphasis_stereo_simple(celt_sig *in[], opus_res *pcm, int N, const opus_val16 coef0,
       celt_sig *mem)
 {
    celt_sig * OPUS_RESTRICT x0;
@@ -265,8 +265,8 @@
       tmp1 = SATURATE(x1[j] + VERY_SMALL + m1, SIG_SAT);
       m0 = MULT16_32_Q15(coef0, tmp0);
       m1 = MULT16_32_Q15(coef0, tmp1);
-      pcm[2*j  ] = SCALEOUT(SIG2WORD16(tmp0));
-      pcm[2*j+1] = SCALEOUT(SIG2WORD16(tmp1));
+      pcm[2*j  ] = SIG2RES(tmp0);
+      pcm[2*j+1] = SIG2RES(tmp1);
    }
    mem[0] = m0;
    mem[1] = m1;
@@ -276,7 +276,7 @@
 #ifndef RESYNTH
 static
 #endif
-void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef,
+void deemphasis(celt_sig *in[], opus_res *pcm, int N, int C, int downsample, const opus_val16 *coef,
       celt_sig *mem, int accum)
 {
    int c;
@@ -293,10 +293,6 @@
       return;
    }
 #endif
-#ifndef FIXED_POINT
-   (void)accum;
-   celt_assert(accum==0);
-#endif
    ALLOC(scratch, N, celt_sig);
    coef0 = coef[0];
    Nd = N/downsample;
@@ -303,7 +299,7 @@
    c=0; do {
       int j;
       celt_sig * OPUS_RESTRICT x;
-      opus_val16  * OPUS_RESTRICT y;
+      opus_res  * OPUS_RESTRICT y;
       celt_sig m = mem[c];
       x =in[c];
       y = pcm+c;
@@ -335,7 +331,6 @@
          apply_downsampling=1;
       } else {
          /* Shortcut for the standard (non-custom modes) case */
-#ifdef FIXED_POINT
          if (accum)
          {
             for (j=0;j<N;j++)
@@ -342,16 +337,15 @@
             {
                celt_sig tmp = SATURATE(x[j] + m + VERY_SMALL, SIG_SAT);
                m = MULT16_32_Q15(coef0, tmp);
-               y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(tmp))));
+               y[j*C] = ADD_RES(y[j*C], SIG2RES(tmp));
             }
          } else
-#endif
          {
             for (j=0;j<N;j++)
             {
                celt_sig tmp = SATURATE(x[j] + VERY_SMALL + m, SIG_SAT);
                m = MULT16_32_Q15(coef0, tmp);
-               y[j*C] = SCALEOUT(SIG2WORD16(tmp));
+               y[j*C] = SIG2RES(tmp);
             }
          }
       }
@@ -360,16 +354,14 @@
       if (apply_downsampling)
       {
          /* Perform down-sampling */
-#ifdef FIXED_POINT
          if (accum)
          {
             for (j=0;j<Nd;j++)
-               y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(scratch[j*downsample]))));
+               y[j*C] = ADD_RES(y[j*C], SIG2RES(scratch[j*downsample]));
          } else
-#endif
          {
             for (j=0;j<Nd;j++)
-               y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
+               y[j*C] = SIG2RES(scratch[j*downsample]);
          }
       }
    } while (++c<C);
@@ -968,7 +960,7 @@
 }
 
 int celt_decode_with_ec_dred(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
-      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
+      int len, opus_res * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
 #ifdef ENABLE_DEEP_PLC
       ,LPCNetPLCState *lpcnet
 #endif
@@ -1369,7 +1361,7 @@
 }
 
 int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
-      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
+      int len, opus_res * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
 {
    return celt_decode_with_ec_dred(st, data, len, pcm, frame_size, dec, accum
 #ifdef ENABLE_DEEP_PLC
@@ -1381,16 +1373,40 @@
 #ifdef CUSTOM_MODES
 
 #ifdef FIXED_POINT
+#ifdef ENABLE_RES24
 int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
 {
+   int j, ret, C, N;
+   VARDECL(opus_res, out);
+   ALLOC_STACK;
+
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+
+   C = st->channels;
+   N = frame_size;
+
+   ALLOC(out, C*N, opus_res);
+   ret = celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
+   if (ret>0)
+      for (j=0;j<C*ret;j++)
+         pcm[j]=RES2INT16(out[j]);
+
+   RESTORE_STACK;
+   return ret;
+}
+#else
+int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
+{
    return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
 }
+#endif
 
 #ifndef DISABLE_FLOAT_API
 int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
 {
    int j, ret, C, N;
-   VARDECL(opus_int16, out);
+   VARDECL(opus_res, out);
    ALLOC_STACK;
 
    if (pcm==NULL)
@@ -1399,11 +1415,11 @@
    C = st->channels;
    N = frame_size;
 
-   ALLOC(out, C*N, opus_int16);
+   ALLOC(out, C*N, opus_res);
    ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
    if (ret>0)
       for (j=0;j<C*ret;j++)
-         pcm[j]=out[j]*(1.f/32768.f);
+         pcm[j]=RES2FLOAT(out[j]);
 
    RESTORE_STACK;
    return ret;
@@ -1434,7 +1450,7 @@
 
    if (ret>0)
       for (j=0;j<C*ret;j++)
-         pcm[j] = FLOAT2INT16 (out[j]);
+         pcm[j] = RES2INT16 (out[j]);
 
    RESTORE_STACK;
    return ret;
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@@ -2591,7 +2591,7 @@
    N=frame_size;
    ALLOC(in, C*N, celt_sig);
    for (j=0;j<C*N;j++) {
-     in[j] = SCALEOUT(pcm[j]);
+     in[j] = (1.0f/32768)*pcm[j];
    }
 
    ret = celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
--- a/configure.ac
+++ b/configure.ac
@@ -151,6 +151,14 @@
   AC_DEFINE([DISABLE_FLOAT_API], [1], [Do not build the float API])
 ])
 
+AC_ARG_ENABLE([fixed-res24],
+	      [AS_HELP_STRING([--enable-fixed-res24], [Use 24-bit internal resolution for fixed-point implementation])],,
+    [enable_fixed_res24=no])
+
+AS_IF([test "$enable_fixed_res24" = "yes"],[
+  AC_DEFINE([ENABLE_RES24], [1], [24-bit internal resolution for fixed-point])
+])
+
 AC_ARG_ENABLE([custom-modes],
     [AS_HELP_STRING([--enable-custom-modes], [enable non-Opus modes, e.g. 44.1 kHz & 2^n frames])],,
     [enable_custom_modes=no])
--- a/silk/API.h
+++ b/silk/API.h
@@ -129,7 +129,7 @@
     opus_int                        lostFlag,           /* I    0: no loss, 1 loss, 2 decode fec                */
     opus_int                        newPacketFlag,      /* I    Indicates first decoder call for this packet    */
     ec_dec                          *psRangeDec,        /* I/O  Compressor data structure                       */
-    opus_int16                      *samplesOut,        /* O    Decoded output speech vector                    */
+    opus_res                        *samplesOut,        /* O    Decoded output speech vector                    */
     opus_int32                      *nSamplesOut,       /* O    Number of samples decoded                       */
 #ifdef ENABLE_DEEP_PLC
     LPCNetPLCState                  *lpcnet,
--- a/silk/dec_API.c
+++ b/silk/dec_API.c
@@ -135,7 +135,7 @@
     opus_int                        lostFlag,           /* I    0: no loss, 1 loss, 2 decode fec                */
     opus_int                        newPacketFlag,      /* I    Indicates first decoder call for this packet    */
     ec_dec                          *psRangeDec,        /* I/O  Compressor data structure                       */
-    opus_int16                      *samplesOut,        /* O    Decoded output speech vector                    */
+    opus_res                        *samplesOut,        /* O    Decoded output speech vector                    */
     opus_int32                      *nSamplesOut,       /* O    Number of samples decoded                       */
 #ifdef ENABLE_DEEP_PLC
     LPCNetPLCState                  *lpcnet,
@@ -147,7 +147,6 @@
     opus_int32 nSamplesOutDec, LBRR_symbol;
     opus_int16 *samplesOut1_tmp[ 2 ];
     VARDECL( opus_int16, samplesOut1_tmp_storage1 );
-    VARDECL( opus_int16, samplesOut1_tmp_storage2 );
     VARDECL( opus_int16, samplesOut2_tmp );
     opus_int32 MS_pred_Q13[ 2 ] = { 0 };
     opus_int16 *resample_out_ptr;
@@ -155,7 +154,6 @@
     silk_decoder_state *channel_state = psDec->channel_state;
     opus_int has_side;
     opus_int stereo_to_mono;
-    int delay_stack_alloc;
     SAVE_STACK;
 
     celt_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
@@ -312,19 +310,10 @@
     /* Check if the temp buffer fits into the output PCM buffer. If it fits,
        we can delay allocating the temp buffer until after the SILK peak stack
        usage. We need to use a < and not a <= because of the two extra samples. */
-    delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal
-          < decControl->API_sampleRate*decControl->nChannelsAPI;
-    ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE
-           : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ),
+    ALLOC( samplesOut1_tmp_storage1, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ),
            opus_int16 );
-    if ( delay_stack_alloc )
-    {
-       samplesOut1_tmp[ 0 ] = samplesOut;
-       samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2;
-    } else {
-       samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1;
-       samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2;
-    }
+    samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1;
+    samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2;
 
     if( lostFlag == FLAG_DECODE_NORMAL ) {
         has_side = !decode_only_middle;
@@ -384,23 +373,9 @@
     *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
 
     /* Set up pointers to temp buffers */
-    ALLOC( samplesOut2_tmp,
-           decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 );
-    if( decControl->nChannelsAPI == 2 ) {
-        resample_out_ptr = samplesOut2_tmp;
-    } else {
-        resample_out_ptr = samplesOut;
-    }
+    ALLOC( samplesOut2_tmp, *nSamplesOut, opus_int16 );
+    resample_out_ptr = samplesOut2_tmp;
 
-    ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc
-           ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 )
-           : ALLOC_NONE,
-           opus_int16 );
-    if ( delay_stack_alloc ) {
-       OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2));
-       samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2;
-       samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2;
-    }
     for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
 
         /* Resample decoded signal to API_sampleRate */
@@ -409,8 +384,12 @@
         /* Interleave if stereo output and stereo stream */
         if( decControl->nChannelsAPI == 2 ) {
             for( i = 0; i < *nSamplesOut; i++ ) {
-                samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
+                samplesOut[ n + 2 * i ] = INT16TORES(resample_out_ptr[ i ]);
             }
+        } else {
+            for( i = 0; i < *nSamplesOut; i++ ) {
+                samplesOut[ i ] = INT16TORES(resample_out_ptr[ i ]);
+            }
         }
     }
 
@@ -422,7 +401,7 @@
             ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec );
 
             for( i = 0; i < *nSamplesOut; i++ ) {
-                samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
+                samplesOut[ 1 + 2 * i ] = INT16TORES(resample_out_ptr[ i ]);
             }
         } else {
             for( i = 0; i < *nSamplesOut; i++ ) {
--- a/src/mapping_matrix.c
+++ b/src/mapping_matrix.c
@@ -118,7 +118,7 @@
 
 void mapping_matrix_multiply_channel_out_float(
     const MappingMatrix *matrix,
-    const opus_val16 *input,
+    const opus_res *input,
     int input_row,
     int input_rows,
     float *output,
@@ -137,11 +137,7 @@
 
   for (i = 0; i < frame_size; i++)
   {
-#if defined(FIXED_POINT)
-    input_sample = (1/32768.f)*input[input_rows * i];
-#else
-    input_sample = input[input_rows * i];
-#endif
+    input_sample = RES2FLOAT(input[input_rows * i]);
     for (row = 0; row < output_rows; row++)
     {
       float tmp =
@@ -195,7 +191,7 @@
 
 void mapping_matrix_multiply_channel_out_short(
     const MappingMatrix *matrix,
-    const opus_val16 *input,
+    const opus_res *input,
     int input_row,
     int input_rows,
     opus_int16 *output,
@@ -213,11 +209,7 @@
 
   for (i = 0; i < frame_size; i++)
   {
-#if defined(FIXED_POINT)
-    input_sample = (opus_int32)input[input_rows * i];
-#else
-    input_sample = (opus_int32)FLOAT2INT16(input[input_rows * i]);
-#endif
+    input_sample = RES2INT16(input[input_rows * i]);
     for (row = 0; row < output_rows; row++)
     {
       opus_int32 tmp =
--- a/src/mapping_matrix.h
+++ b/src/mapping_matrix.h
@@ -74,7 +74,7 @@
 
 void mapping_matrix_multiply_channel_out_float(
     const MappingMatrix *matrix,
-    const opus_val16 *input,
+    const opus_res *input,
     int input_row,
     int input_rows,
     float *output,
@@ -95,7 +95,7 @@
 
 void mapping_matrix_multiply_channel_out_short(
     const MappingMatrix *matrix,
-    const opus_val16 *input,
+    const opus_res *input,
     int input_row,
     int input_rows,
     opus_int16 *output,
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@@ -202,8 +202,9 @@
    return st;
 }
 
-static void smooth_fade(const opus_val16 *in1, const opus_val16 *in2,
-      opus_val16 *out, int overlap, int channels,
+#ifdef ENABLE_RES24
+static void smooth_fade(const opus_res *in1, const opus_res *in2,
+      opus_res *out, int overlap, int channels,
       const opus_val16 *window, opus_int32 Fs)
 {
    int i, c;
@@ -213,11 +214,29 @@
       for (i=0;i<overlap;i++)
       {
          opus_val16 w = MULT16_16_Q15(window[i*inc], window[i*inc]);
+         out[i*channels+c] = ADD32(MULT16_32_Q15(w,in2[i*channels+c]),
+                                   MULT16_32_Q15(Q15ONE-w, in1[i*channels+c]));
+      }
+   }
+}
+#else
+static void smooth_fade(const opus_res *in1, const opus_res *in2,
+      opus_res *out, int overlap, int channels,
+      const opus_val16 *window, opus_int32 Fs)
+{
+   int i, c;
+   int inc = 48000/Fs;
+   for (c=0;c<channels;c++)
+   {
+      for (i=0;i<overlap;i++)
+      {
+         opus_val16 w = MULT16_16_Q15(window[i*inc], window[i*inc]);
          out[i*channels+c] = SHR32(MAC16_16(MULT16_16(w,in2[i*channels+c]),
                                    Q15ONE-w, in1[i*channels+c]), 15);
       }
    }
 }
+#endif
 
 static int opus_packet_get_mode(const unsigned char *data)
 {
@@ -235,7 +254,7 @@
 }
 
 static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
-      opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
+      opus_int32 len, opus_res *pcm, int frame_size, int decode_fec)
 {
    void *silk_dec;
    CELTDecoder *celt_dec;
@@ -242,15 +261,13 @@
    int i, silk_ret=0, celt_ret=0;
    ec_dec dec;
    opus_int32 silk_frame_size;
-   int pcm_silk_size;
-   VARDECL(opus_int16, pcm_silk);
    int pcm_transition_silk_size;
-   VARDECL(opus_val16, pcm_transition_silk);
+   VARDECL(opus_res, pcm_transition_silk);
    int pcm_transition_celt_size;
-   VARDECL(opus_val16, pcm_transition_celt);
-   opus_val16 *pcm_transition=NULL;
+   VARDECL(opus_res, pcm_transition_celt);
+   opus_res *pcm_transition=NULL;
    int redundant_audio_size;
-   VARDECL(opus_val16, redundant_audio);
+   VARDECL(opus_res, redundant_audio);
 
    int audiosize;
    int mode;
@@ -335,11 +352,7 @@
 
    /* In fixed-point, we can tell CELT to do the accumulation on top of the
       SILK PCM buffer. This saves some stack space. */
-#ifdef FIXED_POINT
-   celt_accum = (mode != MODE_CELT_ONLY) && (frame_size >= F10);
-#else
-   celt_accum = 0;
-#endif
+   celt_accum = (mode != MODE_CELT_ONLY);
 
    pcm_transition_silk_size = ALLOC_NONE;
    pcm_transition_celt_size = ALLOC_NONE;
@@ -355,7 +368,7 @@
       else
          pcm_transition_silk_size = F5*st->channels;
    }
-   ALLOC(pcm_transition_celt, pcm_transition_celt_size, opus_val16);
+   ALLOC(pcm_transition_celt, pcm_transition_celt_size, opus_res);
    if (transition && mode == MODE_CELT_ONLY)
    {
       pcm_transition = pcm_transition_celt;
@@ -370,21 +383,22 @@
       frame_size = audiosize;
    }
 
-   /* Don't allocate any memory when in CELT-only mode */
-   pcm_silk_size = (mode != MODE_CELT_ONLY && !celt_accum) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE;
-   ALLOC(pcm_silk, pcm_silk_size, opus_int16);
-
    /* SILK processing */
    if (mode != MODE_CELT_ONLY)
    {
       int lost_flag, decoded_samples;
-      opus_int16 *pcm_ptr;
-#ifdef FIXED_POINT
-      if (celt_accum)
-         pcm_ptr = pcm;
-      else
-#endif
+      opus_res *pcm_ptr;
+      int pcm_too_small;
+      int pcm_silk_size = ALLOC_NONE;
+      VARDECL(opus_res, pcm_silk);
+      pcm_too_small = (frame_size < F10);
+      if (pcm_too_small)
+         pcm_silk_size = F10*st->channels;
+      ALLOC(pcm_silk, pcm_silk_size, opus_res);
+      if (pcm_too_small)
          pcm_ptr = pcm_silk;
+      else
+         pcm_ptr = pcm;
 
       if (st->prev_mode==MODE_CELT_ONLY)
          silk_ResetDecoder( silk_dec );
@@ -447,6 +461,9 @@
         pcm_ptr += silk_frame_size * st->channels;
         decoded_samples += silk_frame_size;
       } while( decoded_samples < frame_size );
+     if (pcm_too_small) {
+        OPUS_COPY(pcm, pcm_silk, frame_size*st->channels);
+     }
    }
 
    start_band = 0;
@@ -488,7 +505,7 @@
       pcm_transition_silk_size=ALLOC_NONE;
    }
 
-   ALLOC(pcm_transition_silk, pcm_transition_silk_size, opus_val16);
+   ALLOC(pcm_transition_silk, pcm_transition_silk_size, opus_res);
 
    if (transition && mode != MODE_CELT_ONLY)
    {
@@ -526,7 +543,7 @@
 
    /* Only allocation memory for redundancy if/when needed */
    redundant_audio_size = redundancy ? F5*st->channels : ALLOC_NONE;
-   ALLOC(redundant_audio, redundant_audio_size, opus_val16);
+   ALLOC(redundant_audio, redundant_audio_size, opus_res);
 
    /* 5 ms redundant frame for CELT->SILK*/
    if (redundancy && celt_to_silk)
@@ -574,18 +591,7 @@
       }
    }
 
-   if (mode != MODE_CELT_ONLY && !celt_accum)
    {
-#ifdef FIXED_POINT
-      for (i=0;i<frame_size*st->channels;i++)
-         pcm[i] = SAT16(ADD32(pcm[i], pcm_silk[i]));
-#else
-      for (i=0;i<frame_size*st->channels;i++)
-         pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]);
-#endif
-   }
-
-   {
       const CELTMode *celt_mode;
       MUST_SUCCEED(celt_decoder_ctl(celt_dec, CELT_GET_MODE(&celt_mode)));
       window = celt_mode->window;
@@ -668,7 +674,7 @@
 }
 
 int opus_decode_native(OpusDecoder *st, const unsigned char *data,
-      opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec,
+      opus_int32 len, opus_res *pcm, int frame_size, int decode_fec,
       int self_delimited, opus_int32 *packet_offset, int soft_clip, const OpusDRED *dred, opus_int32 dred_offset)
 {
    int i, nb_samples;
@@ -811,7 +817,41 @@
 }
 
 #ifdef FIXED_POINT
+#ifdef ENABLE_RES24
+int opus_decode(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
+{
+	   VARDECL(opus_res, out);
+	   int ret, i;
+	   int nb_samples;
+	   ALLOC_STACK;
 
+	   if(frame_size<=0)
+	   {
+	      RESTORE_STACK;
+	      return OPUS_BAD_ARG;
+	   }
+	   if (data != NULL && len > 0 && !decode_fec)
+	   {
+	      nb_samples = opus_decoder_get_nb_samples(st, data, len);
+	      if (nb_samples>0)
+	         frame_size = IMIN(frame_size, nb_samples);
+	      else
+	         return OPUS_INVALID_PACKET;
+	   }
+	   celt_assert(st->channels == 1 || st->channels == 2);
+	   ALLOC(out, frame_size*st->channels, opus_res);
+
+	   ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
+	   if (ret > 0)
+	   {
+	      for (i=0;i<ret*st->channels;i++)
+	         pcm[i] = RES2INT16(out[i]);
+	   }
+	   RESTORE_STACK;
+	   return ret;
+}
+#else
 int opus_decode(OpusDecoder *st, const unsigned char *data,
       opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
 {
@@ -819,12 +859,13 @@
       return OPUS_BAD_ARG;
    return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
 }
+#endif
 
 #ifndef DISABLE_FLOAT_API
 int opus_decode_float(OpusDecoder *st, const unsigned char *data,
       opus_int32 len, float *pcm, int frame_size, int decode_fec)
 {
-   VARDECL(opus_int16, out);
+   VARDECL(opus_res, out);
    int ret, i;
    int nb_samples;
    ALLOC_STACK;
@@ -843,13 +884,13 @@
          return OPUS_INVALID_PACKET;
    }
    celt_assert(st->channels == 1 || st->channels == 2);
-   ALLOC(out, frame_size*st->channels, opus_int16);
+   ALLOC(out, frame_size*st->channels, opus_res);
 
    ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
    if (ret > 0)
    {
       for (i=0;i<ret*st->channels;i++)
-         pcm[i] = (1.f/32768.f)*(out[i]);
+         pcm[i] = RES2FLOAT(out[i]);
    }
    RESTORE_STACK;
    return ret;
@@ -887,7 +928,7 @@
    if (ret > 0)
    {
       for (i=0;i<ret*st->channels;i++)
-         pcm[i] = FLOAT2INT16(out[i]);
+         pcm[i] = RES2INT16(out[i]);
    }
    RESTORE_STACK;
    return ret;
--- a/src/opus_multistream_decoder.c
+++ b/src/opus_multistream_decoder.c
@@ -193,7 +193,7 @@
    int s, c;
    char *ptr;
    int do_plc=0;
-   VARDECL(opus_val16, buf);
+   VARDECL(opus_res, buf);
    ALLOC_STACK;
 
    VALIDATE_MS_DECODER(st);
@@ -205,7 +205,7 @@
    /* Limit frame_size to avoid excessive stack allocations. */
    MUST_SUCCEED(opus_multistream_decoder_ctl(st, OPUS_GET_SAMPLE_RATE(&Fs)));
    frame_size = IMIN(frame_size, Fs/25*3);
-   ALLOC(buf, 2*frame_size, opus_val16);
+   ALLOC(buf, 2*frame_size, opus_res);
    ptr = (char*)st + align(sizeof(OpusMSDecoder));
    coupled_size = opus_decoder_get_size(2);
    mono_size = opus_decoder_get_size(1);
@@ -311,7 +311,7 @@
   void *dst,
   int dst_stride,
   int dst_channel,
-  const opus_val16 *src,
+  const opus_res *src,
   int src_stride,
   int frame_size,
   void *user_data
@@ -324,11 +324,7 @@
    if (src != NULL)
    {
       for (i=0;i<frame_size;i++)
-#if defined(FIXED_POINT)
-         float_dst[i*dst_stride+dst_channel] = (1/32768.f)*src[i*src_stride];
-#else
-         float_dst[i*dst_stride+dst_channel] = src[i*src_stride];
-#endif
+         float_dst[i*dst_stride+dst_channel] = RES2FLOAT(src[i*src_stride]);
    }
    else
    {
@@ -342,7 +338,7 @@
   void *dst,
   int dst_stride,
   int dst_channel,
-  const opus_val16 *src,
+  const opus_res *src,
   int src_stride,
   int frame_size,
   void *user_data
@@ -355,11 +351,7 @@
    if (src != NULL)
    {
       for (i=0;i<frame_size;i++)
-#if defined(FIXED_POINT)
-         short_dst[i*dst_stride+dst_channel] = src[i*src_stride];
-#else
-         short_dst[i*dst_stride+dst_channel] = FLOAT2INT16(src[i*src_stride]);
-#endif
+         short_dst[i*dst_stride+dst_channel] = RES2INT16(src[i*src_stride]);
    }
    else
    {
--- a/src/opus_private.h
+++ b/src/opus_private.h
@@ -138,7 +138,7 @@
   void *dst,
   int dst_stride,
   int dst_channel,
-  const opus_val16 *src,
+  const opus_res *src,
   int src_stride,
   int frame_size,
   void *user_data
@@ -186,7 +186,7 @@
       int analysis_channels, downmix_func downmix, int float_api);
 
 int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len,
-      opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,
+      opus_res *pcm, int frame_size, int decode_fec, int self_delimited,
       opus_int32 *packet_offset, int soft_clip, const OpusDRED *dred, opus_int32 dred_offset);
 
 /* Make sure everything is properly aligned. */
--- a/src/opus_projection_decoder.c
+++ b/src/opus_projection_decoder.c
@@ -49,7 +49,7 @@
   void *dst,
   int dst_stride,
   int dst_channel,
-  const opus_val16 *src,
+  const opus_res *src,
   int src_stride,
   int frame_size,
   void *user_data)
@@ -72,7 +72,7 @@
   void *dst,
   int dst_stride,
   int dst_channel,
-  const opus_val16 *src,
+  const opus_res *src,
   int src_stride,
   int frame_size,
   void *user_data)
--- a/tests/test_opus_projection.c
+++ b/tests/test_opus_projection.c
@@ -94,13 +94,13 @@
 
   int i, ret;
   opus_int32 simple_matrix_size;
-  opus_val16 *input_val16;
+  opus_res *input_pcm;
   opus_val16 *output_val16;
   opus_int16 *output_int16;
   MappingMatrix *simple_matrix;
 
   /* Allocate input/output buffers. */
-  input_val16 = (opus_val16 *)opus_alloc(sizeof(opus_val16) * SIMPLE_MATRIX_INPUT_SIZE);
+  input_pcm = (opus_res *)opus_alloc(sizeof(opus_res) * SIMPLE_MATRIX_INPUT_SIZE);
   output_int16 = (opus_int16 *)opus_alloc(sizeof(opus_int16) * SIMPLE_MATRIX_OUTPUT_SIZE);
   output_val16 = (opus_val16 *)opus_alloc(sizeof(opus_val16) * SIMPLE_MATRIX_OUTPUT_SIZE);
 
@@ -118,11 +118,7 @@
   /* Copy inputs. */
   for (i = 0; i < SIMPLE_MATRIX_INPUT_SIZE; i++)
   {
-#ifdef FIXED_POINT
-    input_val16[i] = input_int16[i];
-#else
-    input_val16[i] = (1/32768.f)*input_int16[i];
-#endif
+    input_pcm[i] = INT16TORES(input_int16[i]);
   }
 
   /* _in_short */
@@ -144,7 +140,7 @@
   for (i = 0; i < simple_matrix->cols; i++)
   {
     mapping_matrix_multiply_channel_out_short(simple_matrix,
-      &input_val16[i], i, simple_matrix->cols, output_int16,
+      &input_pcm[i], i, simple_matrix->cols, output_int16,
       simple_matrix->rows, SIMPLE_MATRIX_FRAME_SIZE);
   }
   ret = assert_is_equal_short(output_int16, expected_output_int16, SIMPLE_MATRIX_OUTPUT_SIZE, ERROR_TOLERANCE);
@@ -158,7 +154,7 @@
   for (i = 0; i < simple_matrix->rows; i++)
   {
     mapping_matrix_multiply_channel_in_float(simple_matrix,
-      input_val16, simple_matrix->cols, &output_val16[i], i,
+      input_pcm, simple_matrix->cols, &output_val16[i], i,
       simple_matrix->rows, SIMPLE_MATRIX_FRAME_SIZE);
   }
   ret = assert_is_equal(output_val16, expected_output_int16, SIMPLE_MATRIX_OUTPUT_SIZE, ERROR_TOLERANCE);
@@ -171,7 +167,7 @@
   for (i = 0; i < simple_matrix->cols; i++)
   {
     mapping_matrix_multiply_channel_out_float(simple_matrix,
-      &input_val16[i], i, simple_matrix->cols, output_val16,
+      &input_pcm[i], i, simple_matrix->cols, output_val16,
       simple_matrix->rows, SIMPLE_MATRIX_FRAME_SIZE);
   }
   ret = assert_is_equal(output_val16, expected_output_int16, SIMPLE_MATRIX_OUTPUT_SIZE, ERROR_TOLERANCE);
@@ -179,7 +175,7 @@
     test_failed();
 #endif
 
-  opus_free(input_val16);
+  opus_free(input_pcm);
   opus_free(output_int16);
   opus_free(output_val16);
   opus_free(simple_matrix);
--