shithub: opus

--- a/celt/arch.h

+++ b/celt/arch.h

@@ -105,8 +105,8 @@

 #define IMAX(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum int value.   */

 #define UADD32(a,b) ((a)+(b))

 #define USUB32(a,b) ((a)-(b))

-#define MAXG(a,b) MAX16(a, b)

-#define MING(a,b) MIN16(a, b)

+#define MAXG(a,b) MAX32(a, b)

+#define MING(a,b) MIN32(a, b)

 /* Throughout the code, we use the following scaling for signals:

    FLOAT: used for float API, normalized to +/-1.

@@ -136,7 +136,7 @@

 typedef opus_val32 celt_sig;

 typedef opus_val16 celt_norm;

 typedef opus_val32 celt_ener;

-typedef opus_val16 celt_glog;

+typedef opus_val32 celt_glog;

 #ifdef ENABLE_RES24

 typedef opus_val32 opus_res;

@@ -190,7 +190,7 @@

 #define NORM_SCALING 16384

-#define DB_SHIFT 10

+#define DB_SHIFT 24

 #define EPSILON 1

 #define VERY_SMALL 0

--- a/celt/bands.c

+++ b/celt/bands.c

@@ -231,16 +231,16 @@

    for (i=start;i<end;i++)

       int j, band_end;

-      opus_val16 g;

-      opus_val16 lg;

+      opus_val32 g;

+      celt_glog lg;

 #ifdef FIXED_POINT

       int shift;

 #endif

       j=M*eBands[i];

       band_end = M*eBands[i+1];

-      lg = SATURATE16(ADD32(bandLogE[i], SHL32((opus_val32)eMeans[i],DB_SHIFT-4)));

+      lg = ADD32(bandLogE[i], SHL32((opus_val32)eMeans[i],DB_SHIFT-4));

 #ifndef FIXED_POINT

-      g = celt_exp2(MIN32(32.f, lg));

+      g = celt_exp2_db(MIN32(32.f, lg));

 #else

       /* Handle the integer part of the log energy */

       shift = 16-(lg>>DB_SHIFT);

@@ -250,7 +250,7 @@

          g=0;

       } else {

          /* Handle the fractional part. */

-         g = celt_exp2_frac((lg&((1<<DB_SHIFT)-1))>>(DB_SHIFT-10));

+         g = celt_exp2_db_frac((lg&((1<<DB_SHIFT)-1)));

       /* Handle extreme gains with negative shift. */

       if (shift<0)

@@ -260,17 +260,17 @@

             This shouldn't trigger unless the bitstream is already corrupted. */

          if (shift <= -2)

-            g = 16384;

+            g = 16384*32768;

             shift = -2;

          do {

-            *f++ = SHL32(MULT16_16(*x++, g), -shift);

+            *f++ = SHL32(MULT16_32_Q15(*x++, g), -shift);

          } while (++j<band_end);

       } else

 #endif

          /* Be careful of the fixed-point "else" just above when changing this code */

          do {

-            *f++ = SHR32(MULT16_16(*x++, g), shift);

+            *f++ = SHR32(MULT16_32_Q15(*x++, g), shift);

          } while (++j<band_end);

    celt_assert(start <= end);

@@ -328,13 +328,13 @@

             prev1 = MAXG(prev1,prev1logE[m->nbEBands+i]);

             prev2 = MAXG(prev2,prev2logE[m->nbEBands+i]);

-         Ediff = EXTEND32(logE[c*m->nbEBands+i])-EXTEND32(MING(prev1,prev2));

+         Ediff = logE[c*m->nbEBands+i]-MING(prev1,prev2);

          Ediff = MAX32(0, Ediff);

 #ifdef FIXED_POINT

          if (Ediff < GCONST(16.f))

-            opus_val32 r32 = SHR32(celt_exp2(-EXTRACT16(Ediff)),1);

+            opus_val32 r32 = SHR32(celt_exp2_db(-Ediff),1);

             r = 2*MIN16(16383,r32);

          } else {

             r = 0;

@@ -346,7 +346,7 @@

 #else

          /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because

             short blocks don't have the same energy as long */

-         r = 2.f*celt_exp2(-Ediff);

+         r = 2.f*celt_exp2_db(-Ediff);

          if (LM==3)

             r *= 1.41421356f;

          r = MIN16(thresh, r);

--- a/celt/celt_decoder.c

+++ b/celt/celt_decoder.c

@@ -1177,6 +1177,7 @@

                E1 = oldLogE[c*nbEBands+i];

                E2 = oldLogE2[c*nbEBands+i];

                slope = MAX32(E1 - E0, HALF32(E2 - E0));

+               slope = MING(slope, GCONST(2.f));

                E0 -= MAX32(0, (1+missing)*slope);

                oldBandE[c*nbEBands+i] = MAX32(-GCONST(20.f), E0);

             } else {

--- a/celt/celt_encoder.c

+++ b/celt/celt_encoder.c

@@ -457,7 +457,7 @@

          opus_val16 x1, x2;

          x1 = MAXG(0, newE[i + c*nbEBands]);

          x2 = MAXG(0, spread_old[i]);

-         mean_diff = ADD32(mean_diff, EXTEND32(MAXG(0, SUB16(x1, x2))));

+         mean_diff = ADD32(mean_diff, MAXG(0, SUB32(x1, x2)));

    } while (++c<C);

    mean_diff = DIV32(mean_diff, C*(end-1-IMAX(2,start)));

@@ -864,12 +864,12 @@

    c=0; do {

       for (i=0;i<end-1;i++)

-         diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-end);

+         diff += SHR32(bandLogE[i+c*m->nbEBands], 5)*(opus_int32)(2+2*i-end);

    } while (++c<C);

    diff /= C*(end-1);

    /*printf("%f\n", diff);*/

-   trim -= MAX32(-QCONST16(2.f, 8), MIN32(QCONST16(2.f, 8), SHR32(diff+GCONST(1.f),DB_SHIFT-8)/6 ));

+   trim -= MAX32(-QCONST16(2.f, 8), MIN32(QCONST16(2.f, 8), SHR32(diff+QCONST32(1.f, DB_SHIFT-5),DB_SHIFT-13)/6 ));

    trim -= SHR16(surround_trim, DB_SHIFT-8);

    trim -= 2*SHR16(tf_estimate, 14-8);

 #ifndef DISABLE_FLOAT_API

@@ -1010,9 +1010,9 @@

       /* Noise floor must take into account eMeans, the depth, the width of the bands

          and the preemphasis filter (approx. square of bark band ID) */

-      noise_floor[i] = MULT16_16(GCONST(0.0625f),logN[i])

-            +GCONST(.5f)+SHL16(9-lsb_depth,DB_SHIFT)-SHL16(eMeans[i],DB_SHIFT-4)

-            +MULT16_16(GCONST(.0062),(i+5)*(i+5));

+      noise_floor[i] = GCONST(0.0625f)*logN[i]

+            +GCONST(.5f)+SHL32(9-lsb_depth,DB_SHIFT)-SHL32(eMeans[i],DB_SHIFT-4)

+            +GCONST(.0062f)*(i+5)*(i+5);

    c=0;do

@@ -1111,7 +1111,7 @@

             /* Consider 24 dB "cross-talk" */

             follower[nbEBands+i] = MAXG(follower[nbEBands+i], follower[         i]-GCONST(4.f));

             follower[         i] = MAXG(follower[         i], follower[nbEBands+i]-GCONST(4.f));

-            follower[i] = HALF16(MAXG(0, bandLogE[i]-follower[i]) + MAXG(0, bandLogE[nbEBands+i]-follower[nbEBands+i]));

+            follower[i] = HALF32(MAXG(0, bandLogE[i]-follower[i]) + MAXG(0, bandLogE[nbEBands+i]-follower[nbEBands+i]));

       } else {

          for (i=start;i<end;i++)

@@ -1124,9 +1124,9 @@

       for (i=start;i<end;i++)

 #ifdef FIXED_POINT

-         importance[i] = PSHR32(13*celt_exp2(MING(follower[i], GCONST(4.f))), 16);

+         importance[i] = PSHR32(13*celt_exp2_db(MING(follower[i], GCONST(4.f))), 16);

 #else

-         importance[i] = (int)floor(.5f+13*celt_exp2(MING(follower[i], GCONST(4.f))));

+         importance[i] = (int)floor(.5f+13*celt_exp2_db(MING(follower[i], GCONST(4.f))));

 #endif

       /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */

@@ -1173,16 +1173,17 @@

          follower[i] = MING(follower[i], GCONST(4));

+         follower[i] = SHR32(follower[i], 8);

          width = C*(eBands[i+1]-eBands[i])<<LM;

          if (width<6)

-            boost = (int)SHR32(EXTEND32(follower[i]),DB_SHIFT);

+            boost = (int)SHR32(follower[i],DB_SHIFT-8);

             boost_bits = boost*width<<BITRES;

          } else if (width > 48) {

-            boost = (int)SHR32(EXTEND32(follower[i])*8,DB_SHIFT);

+            boost = (int)SHR32(follower[i]*8,DB_SHIFT-8);

             boost_bits = (boost*width<<BITRES)/8;

          } else {

-            boost = (int)SHR32(EXTEND32(follower[i])*width/6,DB_SHIFT);

+            boost = (int)SHR32(follower[i]*width/6,DB_SHIFT-8);

             boost_bits = boost*6<<BITRES;

          /* For CBR and non-transient CVBR frames, limit dynalloc to 2/3 of the bits */

@@ -1491,7 +1492,7 @@

       int constrained_vbr, opus_val16 stereo_saving, int tot_boost,

       opus_val16 tf_estimate, int pitch_change, celt_glog maxDepth,

       int lfe, int has_surround_mask, celt_glog surround_masking,

-      opus_val16 temporal_vbr)

+      celt_glog temporal_vbr)

    /* The target rate in 8th bits per frame */

    opus_int32 target;

@@ -1559,7 +1560,7 @@

    if (has_surround_mask&&!lfe)

-      opus_int32 surround_target = target + (opus_int32)SHR32(MULT16_16(surround_masking,coded_bins<<BITRES), DB_SHIFT);

+      opus_int32 surround_target = target + (opus_int32)SHR32(MULT16_16(SHR32(surround_masking,DB_SHIFT-10),coded_bins<<BITRES), 10);

       /*printf("%f %d %d %d %d %d %d ", surround_masking, coded_bins, st->end, st->intensity, surround_target, target, st->bitrate);*/

       target = IMAX(target/4, surround_target);

@@ -1569,7 +1570,7 @@

       int bins;

       bins = eBands[nbEBands-2]<<LM;

       /*floor_depth = SHR32(MULT16_16((C*bins<<BITRES),celt_log2(SHL32(MAX16(1,sample_max),13))), DB_SHIFT);*/

-      floor_depth = (opus_int32)SHR32(MULT16_16((C*bins<<BITRES),maxDepth), DB_SHIFT);

+      floor_depth = (opus_int32)SHR32(MULT16_32_Q15((C*bins<<BITRES),maxDepth), DB_SHIFT-15);

       floor_depth = IMAX(floor_depth, target>>2);

       target = IMIN(target, floor_depth);

       /*printf("%f %d\n", maxDepth, floor_depth);*/

@@ -1587,7 +1588,7 @@

       opus_val16 amount;

       opus_val16 tvbr_factor;

       amount = MULT16_16_Q15(QCONST16(.0000031f, 30), IMAX(0, IMIN(32000, 96000-bitrate)));

-      tvbr_factor = SHR32(MULT16_16(temporal_vbr, amount), DB_SHIFT);

+      tvbr_factor = SHR32(MULT16_16(SHR32(temporal_vbr, DB_SHIFT-10), amount), 10);

       target += (opus_int32)MULT16_32_Q15(tvbr_factor, target);

@@ -1662,7 +1663,7 @@

    int signalBandwidth;

    int transient_got_disabled=0;

    celt_glog surround_masking=0;

-   opus_val16 temporal_vbr=0;

+   celt_glog temporal_vbr=0;

    celt_glog surround_trim = 0;

    opus_int32 equiv_rate;

    int hybrid;

@@ -1959,19 +1960,21 @@

          for(i=0;i<mask_end;i++)

             celt_glog mask;

+            opus_val16 mask16;

             mask = MAXG(MING(st->energy_mask[nbEBands*c+i],

                    GCONST(.25f)), -GCONST(2.0f));

             if (mask > 0)

                mask = HALF32(mask);

-            mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]);

+            mask16 = SHR32(mask, DB_SHIFT-10);

+            mask_avg += MULT16_16(mask16, eBands[i+1]-eBands[i]);

             count += eBands[i+1]-eBands[i];

-            diff += MULT16_16(mask, 1+2*i-mask_end);

+            diff += MULT16_16(mask16, 1+2*i-mask_end);

       celt_assert(count>0);

-      mask_avg = DIV32_16(mask_avg,count);

+      mask_avg = SHL32(DIV32_16(mask_avg,count), DB_SHIFT-10);

       mask_avg += GCONST(.2f);

-      diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end);

+      diff = SHL32(diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end), DB_SHIFT-10);

       /* Again, being conservative */

       diff = HALF32(diff);

       diff = MAX32(MIN32(diff, GCONST(.031f)), -GCONST(.031f));

@@ -2021,20 +2024,20 @@

    /* Temporal VBR (but not for LFE) */

    if (!st->lfe)

-      celt_glog follow=-GCONST(10.0f);

+      celt_glog follow=-QCONST32(10.0f, DB_SHIFT-5);

       opus_val32 frame_avg=0;

-      celt_glog offset = shortBlocks?HALF32(SHL32(LM, DB_SHIFT)):0;

+      celt_glog offset = shortBlocks?HALF32(SHL32(LM, DB_SHIFT-5)):0;

       for(i=start;i<end;i++)

-         follow = MAXG(follow-GCONST(1.f), bandLogE[i]-offset);

+         follow = MAXG(follow-QCONST32(1.0f, DB_SHIFT-5), SHR32(bandLogE[i],5)-offset);

          if (C==2)

-            follow = MAXG(follow, bandLogE[i+nbEBands]-offset);

+            follow = MAXG(follow, SHR32(bandLogE[i+nbEBands],5)-offset);

          frame_avg += follow;

       frame_avg /= (end-start);

-      temporal_vbr = SUB16(frame_avg,st->spec_avg);

+      temporal_vbr = SUB32(SHL32(frame_avg, 5),st->spec_avg);

       temporal_vbr = MING(GCONST(3.f), MAXG(-GCONST(1.5f), temporal_vbr));

-      st->spec_avg += MULT16_16_Q15(QCONST16(.02f, 15), temporal_vbr);

+      st->spec_avg += MULT16_32_Q15(QCONST16(.02f, 15), temporal_vbr);

    /*for (i=0;i<21;i++)

       printf("%f ", bandLogE[i]);

@@ -2123,7 +2126,7 @@

             better than fluctuations). */

          if (ABS32(SUB32(bandLogE[i+c*nbEBands], oldBandE[i+c*nbEBands])) < GCONST(2.f))

-            bandLogE[i+c*nbEBands] -= MULT16_16_Q15(energyError[i+c*nbEBands], QCONST16(0.25f, 15));

+            bandLogE[i+c*nbEBands] -= MULT16_32_Q15(QCONST16(0.25f, 15), energyError[i+c*nbEBands]);

    } while (++c < C);

--- a/celt/mathops.h

+++ b/celt/mathops.h

@@ -190,6 +190,9 @@

 #define celt_exp2(x) ((float)exp(0.6931471805599453094*(x)))

 #endif

+#define celt_exp2_db celt_exp2

+#define celt_log2_db celt_log2

 #endif

 #ifdef FIXED_POINT

@@ -225,13 +228,13 @@

    opus_val16 n, frac;

    /* -0.41509302963303146, 0.9609890551383969, -0.31836011537636605,

        0.15530808010959576, -0.08556153059057618 */

-   static const opus_val16 C[5] = {-6801+(1<<(13-DB_SHIFT)), 15746, -5217, 2545, -1401};

+   static const opus_val16 C[5] = {-6801+(1<<(13-10)), 15746, -5217, 2545, -1401};

    if (x==0)

       return -32767;

    i = celt_ilog2(x);

    n = VSHR32(x,i-15)-32768-16384;

    frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, C[4]))))))));

-   return SHL16(i-13,DB_SHIFT)+SHR16(frac,14-DB_SHIFT);

+   return SHL32(i-13,10)+SHR32(frac,14-10);

/*

@@ -270,6 +273,39 @@

    frac = celt_exp2_frac(x-SHL16(integer,10));

    return VSHR32(EXTEND32(frac), -integer-2);

+#ifdef ENABLE_QEXT

+static OPUS_INLINE opus_val32 celt_log2_db(opus_val32 x) {

+   return (int)floor(.5 + (1<<DB_SHIFT) * 1.4426950409f*log(x/(float)(1<<14)));

+}

+static OPUS_INLINE opus_val32 celt_exp2_db_frac(opus_val32 x)

+{

+   return (int)floor(.5f + (1<<14)*exp(0.6931471806f*x/(float)(1<<DB_SHIFT))*32768.f);

+}

+/** Base-2 exponential approximation (2^x). (DB input, Q16 output) */

+static OPUS_INLINE opus_val32 celt_exp2_db(opus_val32 x)

+{

+   int integer;

+   opus_val32 frac;

+   integer = SHR32(x,DB_SHIFT);

+   if (integer>14)

+      return 0x7f000000;

+   else if (integer <= -14)

+      return 0;

+   frac = celt_exp2_db_frac(x-SHL32(integer,DB_SHIFT));

+   return VSHR32(frac, -integer-2+15);

+}

+#else

+#define celt_log2_db(x) SHL32(EXTEND32(celt_log2(x)), DB_SHIFT-10)

+#define celt_exp2_db_frac(x) SHL32(celt_exp2_frac(PSHR32(x, DB_SHIFT-10)), 15)

+#define celt_exp2_db(x) celt_exp2(PSHR32(x, DB_SHIFT-10))

+#endif

 opus_val32 celt_rcp(opus_val32 x);

--- a/celt/quant_bands.c

+++ b/celt/quant_bands.c

@@ -146,11 +146,11 @@

    c=0; do {

       for (i=start;i<end;i++)

-         celt_glog d = SUB32(SHR32(eBands[i+c*len], 3), SHR32(oldEBands[i+c*len], 3));

+         celt_glog d = PSHR32(SUB32(eBands[i+c*len], oldEBands[i+c*len]), DB_SHIFT-7);

          dist = MAC16_16(dist, d,d);

    } while (++c<C);

-   return MIN32(200,SHR32(dist,2*DB_SHIFT-6));

+   return MIN32(200,SHR32(dist,14));

 static int quant_coarse_energy_impl(const CELTMode *m, int start, int end,

@@ -191,11 +191,10 @@

          x = eBands[i+c*m->nbEBands];

          oldE = MAXG(-GCONST(9.f), oldEBands[i+c*m->nbEBands]);

 #ifdef FIXED_POINT

-         f = SHL32(EXTEND32(x),7) - PSHR32(MULT16_16(coef,oldE), 8) - prev[c];

+         f = x - MULT16_32_Q15(coef,oldE) - prev[c];

          /* Rounding to nearest integer here is really important! */

-         qi = (f+QCONST32(.5f,DB_SHIFT+7))>>(DB_SHIFT+7);

-         decay_bound = EXTRACT16(MAX32(-GCONST(28.f),

-               SUB32((opus_val32)oldEBands[i+c*m->nbEBands],max_decay)));

+         qi = (f+QCONST32(.5f,DB_SHIFT))>>DB_SHIFT;

+         decay_bound = MAXG(-GCONST(28.f), SUB32((opus_val32)oldEBands[i+c*m->nbEBands],max_decay));

 #else

          f = x-coef*oldE-prev[c];

          /* Rounding to nearest integer here is really important! */

@@ -243,16 +242,16 @@

          else

             qi = -1;

-         error[i+c*m->nbEBands] = PSHR32(f,7) - SHL32(qi,DB_SHIFT);

+         error[i+c*m->nbEBands] = f - SHL32(qi,DB_SHIFT);

          badness += abs(qi0-qi);

          q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT);

-         tmp = PSHR32(MULT16_16(coef,oldE),8) + prev[c] + SHL32(q,7);

+         tmp = MULT16_32_Q15(coef,oldE) + prev[c] + q;

 #ifdef FIXED_POINT

-         tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp);

+         tmp = MAX32(-GCONST(28.f), tmp);

 #endif

-         oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7);

-         prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8));

+         oldEBands[i+c*m->nbEBands] = tmp;

+         prev[c] = prev[c] + q - MULT16_32_Q15(beta,q);

       } while (++c < C);

    return lfe ? 0 : badness;

@@ -286,7 +285,7 @@

    if (end-start>10)

 #ifdef FIXED_POINT

-      max_decay = MIN32(max_decay, SHL32(EXTEND32(nbAvailableBytes),DB_SHIFT-3));

+      max_decay = SHL32(MIN32(SHR32(max_decay,DB_SHIFT-3), EXTEND32(nbAvailableBytes)),DB_SHIFT-3);

 #else

       max_decay = MIN32(max_decay, .125f*nbAvailableBytes);

 #endif

@@ -384,7 +383,7 @@

             q2 = 0;

          ec_enc_bits(enc, q2, fine_quant[i]);

 #ifdef FIXED_POINT

-         offset = SUB32(SHR32(SHL32(EXTEND32(q2),DB_SHIFT)+GCONST(.5f),fine_quant[i]),GCONST(.5f));

+         offset = SUB32(VSHR32(2*q2+1, fine_quant[i]-DB_SHIFT+1), GCONST(.5f));

 #else

          offset = (q2+.5f)*(1<<(14-fine_quant[i]))*(1.f/16384) - .5f;

 #endif

@@ -480,12 +479,12 @@

          q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT);

          oldEBands[i+c*m->nbEBands] = MAXG(-GCONST(9.f), oldEBands[i+c*m->nbEBands]);

-         tmp = PSHR32(MULT16_16(coef,oldEBands[i+c*m->nbEBands]),8) + prev[c] + SHL32(q,7);

+         tmp = MULT16_32_Q15(coef,oldEBands[i+c*m->nbEBands]) + prev[c] + q;

 #ifdef FIXED_POINT

-         tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp);

+         tmp = MAX32(-GCONST(28.f), tmp);

 #endif

-         oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7);

-         prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8));

+         oldEBands[i+c*m->nbEBands] = tmp;

+         prev[c] = prev[c] + q - MULT16_32_Q15(beta,q);

       } while (++c < C);

@@ -504,7 +503,7 @@

          celt_glog offset;

          q2 = ec_dec_bits(dec, fine_quant[i]);

 #ifdef FIXED_POINT

-         offset = SUB32(SHR32(SHL32(EXTEND32(q2),DB_SHIFT)+GCONST(.5f),fine_quant[i]),GCONST(.5f));

+         offset = SUB32(VSHR32(2*q2+1, fine_quant[i]-DB_SHIFT+1), GCONST(.5f));

 #else

          offset = (q2+.5f)*(1<<(14-fine_quant[i]))*(1.f/16384) - .5f;

 #endif

@@ -550,7 +549,7 @@

       for (i=0;i<effEnd;i++)

          bandLogE[i+c*m->nbEBands] =

-               celt_log2(bandE[i+c*m->nbEBands])

+               celt_log2_db(bandE[i+c*m->nbEBands])

                - SHL32((celt_glog)eMeans[i],DB_SHIFT-4);

 #ifdef FIXED_POINT

          /* Compensate for bandE[] being Q12 but celt_log2() taking a Q14 input. */

--- a/src/opus_encoder.c

+++ b/src/opus_encoder.c

@@ -1981,7 +1981,7 @@

            /* Conservative rate reduction, we cut the masking in half */

            masking_depth = mask_sum / end*st->channels;

            masking_depth += GCONST(.2f);

-           rate_offset = (opus_int32)PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT);

+           rate_offset = (opus_int32)PSHR32(MULT16_16(srate, SHR32(masking_depth, DB_SHIFT-10)), 10);

            rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3);

            /* Split the rate change between the SILK and CELT part for hybrid. */

            if (st->bandwidth==OPUS_BANDWIDTH_SUPERWIDEBAND || st->bandwidth==OPUS_BANDWIDTH_FULLBAND)

--- a/src/opus_multistream_encoder.c

+++ b/src/opus_multistream_encoder.c

@@ -189,7 +189,7 @@

    celt_glog max;

    celt_glog diff;

    celt_glog frac;

-   static const opus_val16 diff_table[17] = {

+   static const celt_glog diff_table[17] = {

          GCONST(0.5000000f), GCONST(0.2924813f), GCONST(0.1609640f), GCONST(0.0849625f),

          GCONST(0.0437314f), GCONST(0.0221971f), GCONST(0.0111839f), GCONST(0.0056136f),

          GCONST(0.0028123f)

@@ -198,21 +198,21 @@

    if (a>b)

       max = a;

-      diff = SUB32(EXTEND32(a),EXTEND32(b));

+      diff = SUB32(a,b);

    } else {

       max = b;

-      diff = SUB32(EXTEND32(b),EXTEND32(a));

+      diff = SUB32(b,a);

    if (!(diff < GCONST(8.f)))  /* inverted to catch NaNs */

       return max;

 #ifdef FIXED_POINT

    low = SHR32(diff, DB_SHIFT-1);

-   frac = SHL32(diff - SHL32(low, DB_SHIFT-1), 16-DB_SHIFT);

+   frac = VSHR32(diff - SHL32(low, DB_SHIFT-1), DB_SHIFT-16);

 #else

    low = (int)floor(2*diff);

    frac = 2*diff - low;

 #endif

-   return max + diff_table[low] + MULT16_16_Q15(frac, SUB32(diff_table[low+1], diff_table[low]));

+   return max + diff_table[low] + MULT16_32_Q15(frac, SUB32(diff_table[low+1], diff_table[low]));

 #else

 opus_val16 logSum(opus_val16 a, opus_val16 b)

--

⑨