shithub: opus-tools

Download patch

ref: 51319268d6ccf4aca8ec0afefe2a3a41acb31497
parent: c8ea3edb24db14c9a4386d458b0823c297cae5fc
author: Gregory Maxwell <greg@xiph.org>
date: Tue Aug 23 08:51:24 EDT 2011

Add noise shaping dither to opusdec.

--- a/src/opusdec.c
+++ b/src/opusdec.c
@@ -42,6 +42,7 @@
 #include "getopt_win.h"
 #endif*/
 #include <stdlib.h>
+#include <limits.h>
 #include <string.h>
 
 #include <opus.h>
@@ -82,6 +83,10 @@
 #include "opus_header.h"
 #include "speex_resampler.h"
 
+#define MINI(_a,_b)      ((_a)<(_b)?(_a):(_b))
+#define MAXI(_a,_b)      ((_a)>(_b)?(_a):(_b))
+#define CLAMPI(_a,_b,_c) (MAXI(_a,MINI(_b,_c)))
+
 #define MAX_FRAME_SIZE (2*960*3)
 
 #define readint(buf, base) (((buf[base+3]<<24)&0xff000000)| \
@@ -89,6 +94,93 @@
                            ((buf[base+1]<<8)&0xff00)| \
   	           	    (buf[base]&0xff))
 
+typedef struct shapestate shapestate;
+struct shapestate {
+  float * b_buf;
+  float * a_buf;
+  int fs;
+  int mute;
+};
+
+static unsigned int rngseed = 22222;
+static inline unsigned int fast_rand() {
+  rngseed = (rngseed * 96314165) + 907633515;
+  return rngseed;
+}
+
+/* This implements a 16 bit quantization with full triangular dither
+   and IIR noise shaping. The noise shaping filters were designed by
+   Sebastian Gesemann based on the LAME ATH curves with flattening
+   to limit their peak gain to 20dB.
+   (Everyone elses' noise shaping filters are mildly crazy)
+   The 48kHz version of this filter is just a warped version of the
+   44.1kHz filter and probably could be improved by shifting the
+   HF shelf up in frequency a little bit since 48k has a bit more
+   room and being more conservative against bat-ears is probably
+   more important than more noise suppression.
+   This process can increase the peak level of the signal (in theory
+   by the peak error of 1.5 +20dB though this much is unobservable rare)
+   so to avoid clipping the signal is attenuated by a couple thousandths
+   of a dB. Initially the approach taken here was to only attenuate by
+   the 99.9th percentile, making clipping rare but not impossible (like
+   SoX) but the limited gain of the filter means that the worst case was
+   only two thousandths of a dB more, so this just uses the worst case.
+   The attenuation is probably also helpful to prevent clipping in the DAC
+   reconstruction filters or downstream resampling in any case.*/
+static inline void shape_dither_toshort(shapestate *_ss, short *_o, float *_i, int _n, int _CC)
+{
+  const float gains[3]={32768.f-15.f,32768.f-15.f,32768.f-3.f};
+  const float fcoef[3][8] =
+  {
+    {2.2374f, -.7339f, -.1251f, -.6033f, 0.9030f, .0116f, -.5853f, -.2571f}, /* 48.0kHz noise shaping filter sd=2.34*/
+    {2.2061f, -.4706f, -.2534f, -.6214f, 1.0587f, .0676f, -.6054f, -.2738f}, /* 44.1kHz noise shaping filter sd=2.51*/
+    {1.0000f, 0.0000f, 0.0000f, 0.0000f, 0.0000f,0.0000f, 0.0000f, 0.0000f}, /* lowpass noise shaping filter sd=0.65*/
+  };
+  int i;
+  int rate=_ss->fs==44100?1:(_ss->fs==48000?0:2);
+  float gain=gains[rate];
+  float *b_buf;
+  float *a_buf;
+  int mute=_ss->mute;
+  b_buf=_ss->b_buf;
+  a_buf=_ss->a_buf;
+  if(mute>64)
+    memset(a_buf,0,sizeof(float)*_CC*4);
+  for(i=0;i<_n;i++)
+  {
+    int c;
+    int pos = i*_CC;
+    int silent=1;
+    for(c=0;c<_CC;c++)
+    {
+      int j, si;
+      float r,s,err=0;
+      silent&=_i[pos+c]==0;
+      s=_i[pos+c]*gain;
+      for(j=0;j<4;j++)
+        err += fcoef[rate][j]*b_buf[c*4+j] - fcoef[rate][j+4]*a_buf[c*4+j];
+      memmove(&a_buf[c*4+1],&a_buf[c*4],sizeof(float)*3);
+      memmove(&b_buf[c*4+1],&b_buf[c*4],sizeof(float)*3);
+      a_buf[c*4]=err;
+      s = s - err;
+      r=(float)fast_rand()*(1/(float)UINT_MAX) - (float)fast_rand()*(1/(float)UINT_MAX);
+      if (mute>16)r=0;
+      /*Clamp in float out of paranoia that the input will be >96dBFS and wrap if the
+        integer is clamped.*/
+      _o[pos+c] = si = lrintf(fmaxf(-32768,fminf(s + r,32767)));
+      /*Including clipping in the noise shaping is generally disastrous:
+        the futile effort to restore the clipped energy results in more clipping.
+        However, small amounts-- at the level which could normally be created by
+        dither and rounding-- are harmless and can even reduce clipping somewhat
+        due to the clipping sometimes reducing the dither+rounding error.*/
+      b_buf[c*4] = (mute>16)?0:fmaxf(-1.5f,fminf(si - s,1.5f));
+    }
+    mute++;
+    if(!silent)mute=0;
+  }
+  _ss->mute=MINI(mute,960);
+}
+
 static void print_comments(char *comments, int length)
 {
    char *c=comments;
@@ -273,6 +365,7 @@
    printf (" --mono                Force decoding in mono\n");
    printf (" --stereo              Force decoding in stereo\n");
    printf (" --rate n              Force decoding at sampling rate n Hz\n");
+   printf (" --no-dither           Do not dither 16-bit output\n");
    printf (" --packet-loss n       Simulate n %% random packet loss\n");
    printf (" -V                    Verbose mode (show bit-rate)\n"); 
    printf (" -h, --help            This help\n");
@@ -282,13 +375,13 @@
 
 void version(void)
 {
-   printf ("opusenc (based on %s)\n",opus_get_version_string());
+   printf ("opusdec (based on %s)\n",opus_get_version_string());
    printf ("Copyright (C) 2008-2011 Jean-Marc Valin\n");
 }
 
 void version_short(void)
 {
-   printf ("opusenc (based on %s)\n",opus_get_version_string());
+   printf ("opusdec (based on %s)\n",opus_get_version_string());
    printf ("Copyright (C) 2008-2011 Jean-Marc Valin\n");
 }
 
@@ -327,7 +420,7 @@
       printf("Playback gain: %f (%f dB)\n", *gain, header.gain/256.);
    if (!quiet)
    {
-      fprintf (stderr, "Decoding %d Hz audio in", *rate);
+      fprintf (stderr, "Decoding %d Hz audio", *rate);
 
       if (*channels==1)
          fprintf (stderr, " (mono");
@@ -339,7 +432,7 @@
    return st;
 }
 
-void audio_write(float *pcm, int channels, int frame_size, FILE *fout, SpeexResamplerState *resampler, int *skip, int file)
+void audio_write(float *pcm, int channels, int frame_size, FILE *fout, SpeexResamplerState *resampler, int *skip, shapestate *shapemem, int file)
 {
    int i,tmp_skip;
    unsigned out_len;
@@ -370,14 +463,17 @@
      }
 
      /*Convert to short and save to output file*/
-     /*FIXME: This should dither for integer output*/
-     if (file){
+     if (shapemem){
+       shape_dither_toshort(shapemem,out,output,out_len,channels);
+     }else{
        for (i=0;i<out_len*channels;i++)
-         out[i]=le_short((short)lrintf(fmax(fmin(output[i]*32768.f+0.5f,32767),-32768)));
-     } else {
+         out[i]=(short)lrintf(fmax(-32768,fmin(output[i]*32768.f,32767)));
+     }
+     if ((le_short(1)!=1)&&file){
        for (i=0;i<out_len*channels;i++)
-         out[i]=(short)lrintf(fmax(fmin(output[i]*32768.f+0.5f,32767),-32768));
+         out[i]=le_short(out[i]);
      }
+
      fwrite(out+tmp_skip*channels, 2, (out_len-tmp_skip)*channels, fout);
    } while (frame_size != 0);
 }
@@ -405,6 +501,7 @@
       {"rate", required_argument, NULL, 0},
       {"mono", no_argument, NULL, 0},
       {"stereo", no_argument, NULL, 0},
+      {"no-dither", no_argument, NULL, 0},
       {"packet-loss", required_argument, NULL, 0},
       {0, 0, 0, 0}
    };
@@ -423,8 +520,15 @@
    int wav_format=0;
    int preskip=0;
    int opus_serialno = -1;
+   int dither=1;
+   shapestate shapemem;
    SpeexResamplerState *resampler=NULL;
    float gain=1;
+
+   shapemem.a_buf=0;
+   shapemem.b_buf=0;
+   shapemem.mute=960;
+   shapemem.fs=0;
    
    enh_enabled = 1;
 
@@ -460,6 +564,9 @@
          } else if (strcmp(long_options[option_index].name,"stereo")==0)
          {
             channels=2;
+         } else if (strcmp(long_options[option_index].name,"no-dither")==0)
+         {
+            dither=0;
          } else if (strcmp(long_options[option_index].name,"rate")==0)
          {
             rate=atoi (optarg);
@@ -560,6 +667,13 @@
             if (packet_count==0)
             {
                st = process_header(&op, &rate, &channels, &preskip, &gain, quiet);
+               if(shapemem.a_buf)
+                 free(shapemem.a_buf);
+               if(shapemem.b_buf)
+                 free(shapemem.b_buf);
+               shapemem.a_buf=calloc(channels,sizeof(float)*4);
+               shapemem.b_buf=calloc(channels,sizeof(float)*4);
+               shapemem.fs=rate;
                /* Converting preskip to output sampling rate */
                preskip = preskip*(rate/48000.);
                if (!st)
@@ -626,7 +740,7 @@
                      if (truncate > frame_size)
                         truncate = frame_size;
                      new_frame_size = frame_size - truncate;
-                     audio_write(output, channels, new_frame_size, fout, resampler, &preskip, strlen(outFile)==0);
+                     audio_write(output, channels, new_frame_size, fout, resampler, &preskip, dither?&shapemem:0, strlen(outFile)==0);
                      audio_size+=sizeof(short)*new_frame_size*channels;
                   }
                }
@@ -653,7 +767,7 @@
          int tmp = drain;
          if (tmp > 100)
             tmp = 100;
-         audio_write(zeros, channels, tmp, fout, resampler, NULL, strlen(outFile)==0);
+         audio_write(zeros, channels, tmp, fout, resampler, NULL, &shapemem, strlen(outFile)==0);
          drain -= tmp;
       } while (drain>0);
    }