shithub: opus-tools

Download patch

ref: d7545970c6a8eb2c668c8362bec84d062b14d75b
parent: d35a677985c542319c598d40b691fca751602165
author: Gregory Maxwell <greg@xiph.org>
date: Tue Dec 13 19:50:06 EST 2011

Encoder-side changes for exact durations for 44.1k roundtrips also reduce framing latency and support very high rate (>15mbit) multichannel.

--- a/src/audio-in.c
+++ b/src/audio-in.c
@@ -52,6 +52,7 @@
 #define N_(X) (X)
 #endif
 
+#include <ogg/ogg.h>
 #include "opusenc.h"
 #include "speex_resampler.h"
 
@@ -797,6 +798,7 @@
 typedef struct {
     audio_read_func real_reader;
     void *real_readdata;
+    ogg_int64_t *original_samples;
     int channels;
     int *extra_samples;
 } padder;
@@ -806,6 +808,8 @@
     long in_samples = d->real_reader(d->real_readdata, buffer, samples);
     int i, extra=0;
 
+    if(d->original_samples)*d->original_samples+=in_samples;
+
     if(in_samples<samples){
       extra=samples-in_samples;
       if(extra>*d->extra_samples)extra=*d->extra_samples;
@@ -815,7 +819,7 @@
     return in_samples+extra;
 }
 
-void setup_padder(oe_enc_opt *opt) {
+void setup_padder(oe_enc_opt *opt,ogg_int64_t *original_samples) {
     padder *d = calloc(1, sizeof(padder));
 
     d->real_reader = opt->read_samples;
@@ -825,6 +829,7 @@
     opt->readdata = d;
     d->channels = opt->channels;
     d->extra_samples = &opt->extraout;
+    d->original_samples=original_samples;
 }
 
 void clear_padder(oe_enc_opt *opt) {
--- a/src/opusenc.c
+++ b/src/opusenc.c
@@ -58,6 +58,8 @@
 #include <fcntl.h>
 #endif
 
+#define OLD_LIBOGG 1
+
 #ifdef VALGRIND
 #include <valgrind/memcheck.h>
 #define VG_UNDEF(x,y) VALGRIND_MAKE_MEM_UNDEFINED((x),(y))
@@ -214,7 +216,10 @@
   ogg_page           og;
   ogg_packet         op;
   ogg_int64_t        last_granulepos=0;
+  ogg_int64_t        enc_granulepos=0;
+  ogg_int64_t        original_samples=0;
   ogg_int32_t        id=-1;
+  int                last_segments=0;
   int                eos=0;
   OpusHeader         header;
   int                comments_length;
@@ -229,7 +234,7 @@
   opus_int32         nbBytes;
   opus_int32         nb_samples;
   opus_int32         peak_bytes=0;
-  opus_int32         min_bytes=MAX_FRAME_BYTES;
+  opus_int32         min_bytes;
   struct timeval     start_time;
   struct timeval     stop_time;
   time_t             last_spin=0;
@@ -236,6 +241,7 @@
   int                last_spin_len=0;
   /*Settings*/
   int                quiet=0;
+  int                max_frame_bytes;
   opus_int32         bitrate=-1;
   opus_int32         rate=48000;
   opus_int32         coding_rate=48000;
@@ -271,11 +277,6 @@
   snprintf(vendor_string, sizeof(vendor_string), "%s\n",opus_version);
   comment_init(&comments, &comments_length, vendor_string);
 
-  packet=malloc(sizeof(unsigned char)*MAX_FRAME_BYTES);
-  if(packet==NULL){
-    fprintf(stderr,"Error allocating packet buffer.\n");
-    exit(1);
-  }
 
   /*Process command-line options*/
   while(1){
@@ -486,7 +487,7 @@
   inopt.skip=0;
 
   /*In order to code the complete length we'll need to do a little padding*/
-  setup_padder(&inopt);
+  setup_padder(&inopt,&original_samples);
 
   if(rate>24000)coding_rate=48000;
   else if(rate>16000)coding_rate=24000;
@@ -527,6 +528,13 @@
   header.gain=0;
   header.input_sample_rate=rate;
 
+  min_bytes=max_frame_bytes=(1275*3+7)*header.nb_streams;
+  packet=malloc(sizeof(unsigned char)*max_frame_bytes);
+  if(packet==NULL){
+    fprintf(stderr,"Error allocating packet buffer.\n");
+    exit(1);
+  }
+
   /*Initialize OPUS encoder*/
   st=opus_multistream_encoder_create(coding_rate, chan, header.nb_streams, header.nb_coupled, mapping, OPUS_APPLICATION_AUDIO, &ret);
   if(ret!=OPUS_OK){
@@ -556,7 +564,7 @@
 
   bitrate=bitrate>0?bitrate:64000*header.nb_streams+32000*header.nb_coupled;
 
-  if(bitrate>2048000||bitrate<500){
+  if(bitrate>(1024000*chan)||bitrate<500){
     fprintf(stderr,"Error: Bitrate %d bits/sec is insane.\nDid you mistake bits for kilobits?\n",bitrate);
     fprintf(stderr,"--bitrate values from 6-256 kbit/sec per channel are meaningful.\n");
     exit(1);
@@ -721,33 +729,40 @@
     exit(1);
   }
 
-  nb_samples = inopt.read_samples(inopt.readdata,input,frame_size);
-
-  if(nb_samples==0)eos=1;
-  total_samples+=nb_samples;
-  nb_encoded=-header.preskip;
   /*Main encoding loop (one frame per iteration)*/
-  while(!eos){
+  eos=0;
+  nb_samples=-1;
+  while(!op.e_o_s){
+    int size_segments,cur_frame_size;
     id++;
-    /*Encode current frame*/
 
-    if(nb_samples<frame_size){
-    /*FIXME*/
-      //printf("X: %d %d\n",nb_samples,frame_size);
-      for(i=nb_samples*chan;i<frame_size*chan;i++)input[i]=0;
+    if(nb_samples<0){
+      nb_samples = inopt.read_samples(inopt.readdata,input,frame_size);
+      total_samples+=nb_samples;
+      if(nb_samples<frame_size)op.e_o_s=1;
+      else op.e_o_s=0;
     }
+    op.e_o_s|=eos;
 
-    VG_UNDEF(packet,MAX_FRAME_BYTES);
-    VG_CHECK(input,sizeof(float)*chan*frame_size);
-    nbBytes=opus_multistream_encode_float(st, input, frame_size, packet, MAX_FRAME_BYTES);
+    cur_frame_size=frame_size;
+
+    /*No fancy end padding, just fill with zeros for now.*/
+    if(nb_samples<cur_frame_size)for(i=nb_samples*chan;i<cur_frame_size*chan;i++)input[i]=0;
+
+    /*Encode current frame*/
+    VG_UNDEF(packet,max_frame_bytes);
+    VG_CHECK(input,sizeof(float)*chan*cur_frame_size);
+    nbBytes=opus_multistream_encode_float(st, input, cur_frame_size, packet, max_frame_bytes);
     if(nbBytes<0){
       fprintf(stderr, "Encoding failed: %s. Aborting.\n", opus_strerror(nbBytes));
       break;
     }
     VG_CHECK(packet,nbBytes);
-    VG_UNDEF(input,sizeof(float)*chan*frame_size);
-    nb_encoded+=frame_size;
+    VG_UNDEF(input,sizeof(float)*chan*cur_frame_size);
+    nb_encoded+=cur_frame_size;
+    enc_granulepos+=cur_frame_size*48000/coding_rate;
     total_bytes+=nbBytes;
+    size_segments=(nbBytes+255)/255;
     peak_bytes=IMAX(nbBytes,peak_bytes);
     min_bytes=IMIN(nbBytes,min_bytes);
 
@@ -758,50 +773,75 @@
         ret=opus_multistream_encoder_ctl(st,OPUS_MULTISTREAM_GET_ENCODER_STATE(i,&oe));
         ret=opus_encoder_ctl(oe,OPUS_GET_FINAL_RANGE(&rngs[i]));
       }
-      save_range(frange,frame_size*(48000/coding_rate),packet,nbBytes,
+      save_range(frange,cur_frame_size*(48000/coding_rate),packet,nbBytes,
                  rngs,header.nb_streams);
     }
 
-    nb_samples = inopt.read_samples(inopt.readdata,input,frame_size);
-    if(nb_samples==0)eos=1;
-    if(eos && total_samples<=nb_encoded)op.e_o_s=1;
-    else op.e_o_s=0;
+    /*Flush early if adding this packet would make us end up with a
+      continued page which we wouldn't have otherwise.*/
+    while((((size_segments<=255)&&(last_segments+size_segments>255))||
+           (enc_granulepos-last_granulepos>max_ogg_delay))&&
+#ifdef OLD_LIBOGG
+           ogg_stream_flush(&os, &og)){
+#else
+           ogg_stream_flush_fill(&os, &og,255*255)){
+#endif
+      if(ogg_page_packets(&og)!=0)last_granulepos=ogg_page_granulepos(&og);
+      last_segments-=og.header[26];
+      ret=oe_write_page(&og, fout);
+      if(ret!=og.header_len+og.body_len){
+         fprintf(stderr,"Error: failed writing data to output stream\n");
+         exit(1);
+      }
+      bytes_written+=ret;
+      pages_out++;
+    }
 
-    total_samples+=nb_samples;
+    /*The downside of early reading is if the input is an exact
+      multiple of the frame_size you'll get an extra frame that needs
+      to get cropped off. The downside of late reading is added delay.
+      If your ogg_delay is 120ms or less we'll assume you want the
+      low delay behavior.*/
+    if((!op.e_o_s)&&max_ogg_delay>5760){
+      nb_samples = inopt.read_samples(inopt.readdata,input,frame_size);
+      total_samples+=nb_samples;
+      if(nb_samples<frame_size)eos=1;
+      if(nb_samples==0)op.e_o_s=1;
+    } else nb_samples=-1;
 
     op.packet=(unsigned char *)packet;
     op.bytes=nbBytes;
     op.b_o_s=0;
-    /*Is this redundent?*/
-    if(eos && total_samples<=nb_encoded)op.e_o_s=1;
-    else op.e_o_s=0;
-    /*FIXME: this doesn't cope with the frame size changing*/
-    op.granulepos=(id+1)*frame_size*(48000/coding_rate);
-    if(op.granulepos>total_samples)op.granulepos=total_samples*(48000/coding_rate);
+    op.granulepos=enc_granulepos;
+    if(op.e_o_s){
+      /*We compute the final GP as ceil(len*48k/input_rate). When a resampling
+        decoder does the matching floor(len*input/48k) conversion the length will
+        be exactly the same as the input.*/
+      op.granulepos=((original_samples*48000+rate-1)/rate)+header.preskip;
+    }
     op.packetno=2+id;
-    /*printf("granulepos: %d %d %d\n", (int)op.granulepos, op.packetno, op.bytes);*/
     ogg_stream_packetin(&os, &op);
+    last_segments+=size_segments;
 
-    /*Write all new pages (most likely 0 or 1)
-      Flush if we've buffered >max_ogg_delay second to avoid excessive framing delay. */
-    while(eos||(op.granulepos-last_granulepos+(frame_size*(48000/coding_rate))>max_ogg_delay)?
-#if 0
+    /*If the stream is over or we're sure that the delayed flush will fire,
+      go ahead and flush now to avoid adding delay.*/
+    while((op.e_o_s||(enc_granulepos+frame_size-last_granulepos>max_ogg_delay)||
+           (last_segments>=255))?
+#ifdef OLD_LIBOGG
     /*Libogg > 1.2.2 allows us to achieve lower overhead by
       producing larger pages. For 20ms frames this is only relevant
-      above ~32kbit/sec. We still target somewhat smaller than the
-      maximum size in order to avoid continued pages.*/
-           ogg_stream_flush_fill(&os, &og,255*255-7*1276):
-           ogg_stream_pageout_fill(&os, &og,255*255-7*1276))
-#else
-#define OLD_LIBOGG
+      above ~32kbit/sec.*/
            ogg_stream_flush(&os, &og):
-           ogg_stream_pageout(&os, &og))
+           ogg_stream_pageout(&os, &og)){
+#else
+           ogg_stream_flush_fill(&os, &og,255*255):
+           ogg_stream_pageout_fill(&os, &og,255*255)){
 #endif
-    {
       if(ogg_page_packets(&og)!=0)last_granulepos=ogg_page_granulepos(&og);
+      last_segments-=og.header[26];
       ret=oe_write_page(&og, fout);
       if(ret!=og.header_len+og.body_len){
-         fprintf(stderr,"Error: failed writing header to output stream\n");
+         fprintf(stderr,"Error: failed writing data to output stream\n");
          exit(1);
       }
       bytes_written+=ret;
@@ -847,17 +887,6 @@
   for(i=0;i<last_spin_len;i++)fprintf(stderr," ");
   if(last_spin_len)fprintf(stderr,"\r");
 
-  /*Flush all pages left to be written*/
-  while(ogg_stream_flush(&os, &og)){
-    ret=oe_write_page(&og, fout);
-    if(ret!=og.header_len+og.body_len){
-      fprintf(stderr,"Error: failed writing header to output stream\n");
-      exit(1);
-    }
-    bytes_written+=ret;
-    pages_out++;
-  }
-
   if(!quiet){
     double coded_seconds=nb_encoded/(double)coding_rate;
     double wall_time=(stop_time.tv_sec-start_time.tv_sec)+
@@ -877,7 +906,7 @@
             peak_bytes*8*((double)coding_rate/frame_size/1000.),min_bytes,peak_bytes);
     fprintf(stderr,"   Overhead: %0.3g%% (container+metadata)\n",(bytes_written-total_bytes)/(double)total_bytes*100.);
 #ifdef OLD_LIBOGG
-    if(max_ogg_delay>(frame_size*(48000/coding_rate)*4))fprintf(stderr,"   (use libogg 1.2.2 or later for lower overhead)\n");
+    if(max_ogg_delay>(frame_size*(48000/coding_rate)*4))fprintf(stderr,"   (use libogg 1.3 or later for lower overhead)\n");
 #endif
     fprintf(stderr,"\n");
   }
--- a/src/opusenc.h
+++ b/src/opusenc.h
@@ -35,7 +35,7 @@
 
 void setup_scaler(oe_enc_opt *opt, float scale);
 void clear_scaler(oe_enc_opt *opt);
-void setup_padder(oe_enc_opt *opt);
+void setup_padder(oe_enc_opt *opt, ogg_int64_t *original_samples);
 void clear_padder(oe_enc_opt *opt);
 int setup_downmix(oe_enc_opt *opt, int out_channels);
 void clear_downmix(oe_enc_opt *opt);
--