shithub: opus

Download patch

ref: 977ee01670a9e5271ffeeddf5597583458002dfd
parent: 8b16ce9055b80c6092cbc66a91c778f4ccf5b92d
author: Jean-Marc Valin <jeanmarcv@google.com>
date: Wed Jun 12 08:57:48 EDT 2024

Adds 24-bit Opus encoder/decoder API

--- a/celt/arch.h
+++ b/celt/arch.h
@@ -166,6 +166,7 @@
 #define RES2VAL16(a)    RES2INT16(a)
 #define FLOAT2SIG(a)    float2int(((opus_int32)32768<<SIG_SHIFT)*(a))
 #define INT16TOSIG(a)   SHL32(EXTEND32(a), SIG_SHIFT)
+#define INT24TOSIG(a)   SHL32(a, SIG_SHIFT-8)
 
 #define celt_isnan(x) 0
 
@@ -332,6 +333,7 @@
 #define RES2VAL16(a)    (a)
 #define FLOAT2SIG(a)    ((a)*CELT_SIG_SCALE)
 #define INT16TOSIG(a)   ((float)(a))
+#define INT24TOSIG(a)   ((float)(a)*(1.f/256.f))
 
 
 #endif /* !FIXED_POINT */
--- a/include/opus.h
+++ b/include/opus.h
@@ -268,6 +268,42 @@
     opus_int32 max_data_bytes
 ) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
 
+/** Encodes an Opus frame.
+  * @param [in] st <tt>OpusEncoder*</tt>: Encoder state
+  * @param [in] pcm <tt>opus_int32*</tt>: Input signal (interleaved if 2 channels) representing (or slightly exceeding) 24-bit values. length is frame_size*channels*sizeof(opus_int32)
+  * @param [in] frame_size <tt>int</tt>: Number of samples per channel in the
+  *                                      input signal.
+  *                                      This must be an Opus frame size for
+  *                                      the encoder's sampling rate.
+  *                                      For example, at 48 kHz the permitted
+  *                                      values are 120, 240, 480, 960, 1920,
+  *                                      and 2880.
+  *                                      Passing in a duration of less than
+  *                                      10 ms (480 samples at 48 kHz) will
+  *                                      prevent the encoder from using the LPC
+  *                                      or hybrid modes.
+  * @param [out] data <tt>unsigned char*</tt>: Output payload.
+  *                                            This must contain storage for at
+  *                                            least \a max_data_bytes.
+  * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
+  *                                                 memory for the output
+  *                                                 payload. This may be
+  *                                                 used to impose an upper limit on
+  *                                                 the instant bitrate, but should
+  *                                                 not be used as the only bitrate
+  *                                                 control. Use #OPUS_SET_BITRATE to
+  *                                                 control the bitrate.
+  * @returns The length of the encoded packet (in bytes) on success or a
+  *          negative error code (see @ref opus_errorcodes) on failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_encode24(
+    OpusEncoder *st,
+    const opus_int32 *pcm,
+    int frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
 /** Encodes an Opus frame from floating point input.
   * @param [in] st <tt>OpusEncoder*</tt>: Encoder state
   * @param [in] pcm <tt>float*</tt>: Input in float format (interleaved if 2 channels), with a normal range of +/-1.0.
@@ -483,6 +519,31 @@
     int decode_fec
 ) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
 
+/** Decode an Opus packet.
+  * @param [in] st <tt>OpusDecoder*</tt>: Decoder state
+  * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
+  * @param [in] len <tt>opus_int32</tt>: Number of bytes in payload*
+  * @param [out] pcm <tt>opus_int32*</tt>: Output signal (interleaved if 2 channels) representing (or slightly exceeding) 24-bit values. length
+  *  is frame_size*channels*sizeof(opus_int32)
+  * @param [in] frame_size Number of samples per channel of available space in \a pcm.
+  *  If this is less than the maximum packet duration (120ms; 5760 for 48kHz), this function will
+  *  not be capable of decoding some packets. In the case of PLC (data==NULL) or FEC (decode_fec=1),
+  *  then frame_size needs to be exactly the duration of audio that is missing, otherwise the
+  *  decoder will not be in the optimal state to decode the next incoming packet. For the PLC and
+  *  FEC cases, frame_size <b>must</b> be a multiple of 2.5 ms.
+  * @param [in] decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band forward error correction data be
+  *  decoded. If no such data is available, the frame is decoded as if it were lost.
+  * @returns Number of decoded samples or @ref opus_errorcodes
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decode24(
+    OpusDecoder *st,
+    const unsigned char *data,
+    opus_int32 len,
+    opus_int32 *pcm,
+    int frame_size,
+    int decode_fec
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
 /** Decode an Opus packet with floating point output.
   * @param [in] st <tt>OpusDecoder*</tt>: Decoder state
   * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
@@ -596,7 +657,7 @@
   */
 OPUS_EXPORT int opus_dred_process(OpusDREDDecoder *dred_dec, const OpusDRED *src, OpusDRED *dst);
 
-/** Decode audio from an Opus DRED packet with floating point output.
+/** Decode audio from an Opus DRED packet with 16-bit output.
   * @param [in] st <tt>OpusDecoder*</tt>: Decoder state
   * @param [in] dred <tt>OpusDRED*</tt>: DRED state
   * @param [in] dred_offset <tt>opus_int32</tt>: position of the redundancy to decode (in samples before the beginning of the real audio data in the packet).
@@ -607,6 +668,18 @@
   * @returns Number of decoded samples or @ref opus_errorcodes
   */
 OPUS_EXPORT int opus_decoder_dred_decode(OpusDecoder *st, const OpusDRED *dred, opus_int32 dred_offset, opus_int16 *pcm, opus_int32 frame_size);
+
+/** Decode audio from an Opus DRED packet with 24-bit output.
+  * @param [in] st <tt>OpusDecoder*</tt>: Decoder state
+  * @param [in] dred <tt>OpusDRED*</tt>: DRED state
+  * @param [in] dred_offset <tt>opus_int32</tt>: position of the redundancy to decode (in samples before the beginning of the real audio data in the packet).
+  * @param [out] pcm <tt>opus_int32*</tt>: Output signal (interleaved if 2 channels). length
+  *  is frame_size*channels*sizeof(opus_int16)
+  * @param [in] frame_size Number of samples per channel to decode in \a pcm.
+  *  frame_size <b>must</b> be a multiple of 2.5 ms.
+  * @returns Number of decoded samples or @ref opus_errorcodes
+  */
+OPUS_EXPORT int opus_decoder_dred_decode24(OpusDecoder *st, const OpusDRED *dred, opus_int32 dred_offset, opus_int32 *pcm, opus_int32 frame_size);
 
 /** Decode audio from an Opus DRED packet with floating point output.
   * @param [in] st <tt>OpusDecoder*</tt>: Decoder state
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@@ -819,7 +819,7 @@
 #ifdef FIXED_POINT
 #ifdef ENABLE_RES24
 int opus_decode(OpusDecoder *st, const unsigned char *data,
-      opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
+      opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec)
 {
 	   VARDECL(opus_res, out);
 	   int ret, i;
@@ -851,14 +851,59 @@
 	   RESTORE_STACK;
 	   return ret;
 }
+
+int opus_decode24(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, opus_int32 *pcm, int frame_size, int decode_fec)
+{
+   if(frame_size<=0)
+      return OPUS_BAD_ARG;
+   return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
+}
+
 #else
+
 int opus_decode(OpusDecoder *st, const unsigned char *data,
-      opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
+      opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec)
 {
    if(frame_size<=0)
       return OPUS_BAD_ARG;
    return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
 }
+
+int opus_decode24(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, opus_int32 *pcm, int frame_size, int decode_fec)
+{
+       VARDECL(opus_res, out);
+       int ret, i;
+       int nb_samples;
+       ALLOC_STACK;
+
+       if(frame_size<=0)
+       {
+          RESTORE_STACK;
+          return OPUS_BAD_ARG;
+       }
+       if (data != NULL && len > 0 && !decode_fec)
+       {
+          nb_samples = opus_decoder_get_nb_samples(st, data, len);
+          if (nb_samples>0)
+             frame_size = IMIN(frame_size, nb_samples);
+          else
+             return OPUS_INVALID_PACKET;
+       }
+       celt_assert(st->channels == 1 || st->channels == 2);
+       ALLOC(out, frame_size*st->channels, opus_res);
+
+       ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
+       if (ret > 0)
+       {
+          for (i=0;i<ret*st->channels;i++)
+             pcm[i] = RES2INT24(out[i]);
+       }
+       RESTORE_STACK;
+       return ret;
+}
+
 #endif
 
 #ifndef DISABLE_FLOAT_API
@@ -934,6 +979,41 @@
    return ret;
 }
 
+int opus_decode24(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, opus_int32 *pcm, int frame_size, int decode_fec)
+{
+   VARDECL(float, out);
+   int ret, i;
+   int nb_samples;
+   ALLOC_STACK;
+
+   if(frame_size<=0)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+
+   if (data != NULL && len > 0 && !decode_fec)
+   {
+      nb_samples = opus_decoder_get_nb_samples(st, data, len);
+      if (nb_samples>0)
+         frame_size = IMIN(frame_size, nb_samples);
+      else
+         return OPUS_INVALID_PACKET;
+   }
+   celt_assert(st->channels == 1 || st->channels == 2);
+   ALLOC(out, frame_size*st->channels, float);
+
+   ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1, NULL, 0);
+   if (ret > 0)
+   {
+      for (i=0;i<ret*st->channels;i++)
+         pcm[i] = RES2INT24(out[i]);
+   }
+   RESTORE_STACK;
+   return ret;
+}
+
 int opus_decode_float(OpusDecoder *st, const unsigned char *data,
       opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
 {
@@ -1498,7 +1578,41 @@
    if (ret > 0)
    {
       for (i=0;i<ret*st->channels;i++)
-         pcm[i] = FLOAT2INT16(out[i]);
+         pcm[i] = RES2INT16(out[i]);
+   }
+   RESTORE_STACK;
+   return ret;
+#else
+   (void)st;
+   (void)dred;
+   (void)dred_offset;
+   (void)pcm;
+   (void)frame_size;
+   return OPUS_UNIMPLEMENTED;
+#endif
+}
+
+int opus_decoder_dred_decode24(OpusDecoder *st, const OpusDRED *dred, opus_int32 dred_offset, opus_int32 *pcm, opus_int32 frame_size)
+{
+#ifdef ENABLE_DRED
+   VARDECL(float, out);
+   int ret, i;
+   ALLOC_STACK;
+
+   if(frame_size<=0)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+
+   celt_assert(st->channels == 1 || st->channels == 2);
+   ALLOC(out, frame_size*st->channels, float);
+
+   ret = opus_decode_native(st, NULL, 0, out, frame_size, 0, 0, NULL, 1, dred, dred_offset);
+   if (ret > 0)
+   {
+      for (i=0;i<ret*st->channels;i++)
+         pcm[i] = RES2INT24(out[i]);
    }
    RESTORE_STACK;
    return ret;
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -739,6 +739,29 @@
    }
 }
 
+void downmix_int24(const void *_x, opus_val32 *y, int subframe, int offset, int c1, int c2, int C)
+{
+   const opus_int32 *x;
+   int j;
+
+   x = (const opus_int32 *)_x;
+   for (j=0;j<subframe;j++)
+      y[j] = INT24TOSIG(x[(j+offset)*C+c1]);
+   if (c2>-1)
+   {
+      for (j=0;j<subframe;j++)
+         y[j] += INT24TOSIG(x[(j+offset)*C+c2]);
+   } else if (c2==-2)
+   {
+      int c;
+      for (c=1;c<C;c++)
+      {
+         for (j=0;j<subframe;j++)
+            y[j] += INT24TOSIG(x[(j+offset)*C+c]);
+      }
+   }
+}
+
 opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)
 {
    int new_size;
@@ -2534,6 +2557,16 @@
    RESTORE_STACK;
    return ret;
 }
+
+opus_int32 opus_encode24(OpusEncoder *st, const opus_int32 *pcm, int analysis_frame_size,
+                unsigned char *data, opus_int32 max_data_bytes)
+{
+   int frame_size;
+   frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs);
+   return opus_encode_native(st, pcm, frame_size, data, max_data_bytes, 16,
+                             pcm, analysis_frame_size, 0, -2, st->channels, downmix_int24, 0);
+}
+
 #else
 opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,
                 unsigned char *data, opus_int32 max_data_bytes)
@@ -2543,6 +2576,30 @@
    return opus_encode_native(st, pcm, frame_size, data, max_data_bytes, 16,
                              pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0);
 }
+
+opus_int32 opus_encode24(OpusEncoder *st, const opus_int32 *pcm, int analysis_frame_size,
+                unsigned char *data, opus_int32 max_data_bytes)
+{
+   int i, ret;
+   int frame_size;
+   VARDECL(opus_res, in);
+   ALLOC_STACK;
+
+   frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs);
+   if (frame_size <= 0)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+   ALLOC(in, frame_size*st->channels, opus_res);
+
+   for (i=0;i<frame_size*st->channels;i++)
+      in[i] = INT24TORES(pcm[i]);
+   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16,
+                            pcm, analysis_frame_size, 0, -2, st->channels, downmix_int24, 1);
+   RESTORE_STACK;
+   return ret;
+}
 #endif /* ENABLE_RES24 */
 
 #else
@@ -2563,12 +2620,37 @@
    ALLOC(in, frame_size*st->channels, float);
 
    for (i=0;i<frame_size*st->channels;i++)
-      in[i] = (1.0f/32768)*pcm[i];
+      in[i] = INT16TORES(pcm[i]);
    ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16,
                             pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0);
    RESTORE_STACK;
    return ret;
 }
+
+opus_int32 opus_encode24(OpusEncoder *st, const opus_int32 *pcm, int analysis_frame_size,
+      unsigned char *data, opus_int32 max_data_bytes)
+{
+   int i, ret;
+   int frame_size;
+   VARDECL(float, in);
+   ALLOC_STACK;
+
+   frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs);
+   if (frame_size <= 0)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+   ALLOC(in, frame_size*st->channels, float);
+
+   for (i=0;i<frame_size*st->channels;i++)
+      in[i] = INT24TORES(pcm[i]);
+   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16,
+                            pcm, analysis_frame_size, 0, -2, st->channels, downmix_int24, 0);
+   RESTORE_STACK;
+   return ret;
+}
+
 opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,
                       unsigned char *data, opus_int32 out_data_bytes)
 {
--- a/src/opus_private.h
+++ b/src/opus_private.h
@@ -174,6 +174,7 @@
 typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int);
 void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
 void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
+void downmix_int24(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
 int is_digital_silence(const opus_res* pcm, int frame_size, int channels, int lsb_depth);
 
 int encode_size(int size, unsigned char *data);
--- a/tests/test_opus_custom.c
+++ b/tests/test_opus_custom.c
@@ -233,17 +233,32 @@
             }
          }
          else {
-            opus_int16* input = (opus_int16*)inbuf;
-            len = opus_encode(enc,
-                              &input[samp_count*num_channels],
-                              frame_size,
-                              packet,
-                              MAX_PACKET);
-            if (len <= 0) {
-                fprintf(stderr, "opus_encode() failed: %s\n", opus_strerror(len));
-                ret = -1;
-                break;
+            if (params.encoder_bit_depth == 24) {
+               opus_int32* input = (opus_int32*)inbuf;
+               len = opus_encode24(enc,
+                                   &input[samp_count*num_channels],
+                                   frame_size,
+                                   packet,
+                                   MAX_PACKET);
+               if (len <= 0) {
+                  fprintf(stderr, "opus_encode24() failed: %s\n", opus_strerror(len));
+                  ret = -1;
+                  break;
+               }
             }
+            else {
+               opus_int16* input = (opus_int16*)inbuf;
+               len = opus_encode(enc,
+                                 &input[samp_count*num_channels],
+                                 frame_size,
+                                 packet,
+                                 MAX_PACKET);
+               if (len <= 0) {
+                   fprintf(stderr, "opus_encode() failed: %s\n", opus_strerror(len));
+                   ret = -1;
+                   break;
+               }
+            }
          }
       }
 
@@ -356,18 +371,35 @@
             }
          }
          else {
-            opus_int16* output = (opus_int16*)outbuf;
-            samples_decoded = opus_decode(dec,
-                                          packet,
-                                          len,
-                                          &output[samp_count*num_channels],
-                                          frame_size,
-                                          0);
-            if (samples_decoded != frame_size) {
-                fprintf(stderr, "opus_decode() returned %d\n", samples_decoded);
-                ret = -1;
-                break;
+            if (params.decoder_bit_depth == 24) {
+               opus_int32* output = (opus_int32*)outbuf;
+               samples_decoded = opus_decode24(dec,
+                                               packet,
+                                               len,
+                                               &output[samp_count*num_channels],
+                                               frame_size,
+                                               0);
+
+               if (samples_decoded != frame_size) {
+                  fprintf(stderr, "opus_decode24() returned %d\n", samples_decoded);
+                  ret = -1;
+                  break;
+               }
             }
+            else {
+               opus_int16* output = (opus_int16*)outbuf;
+               samples_decoded = opus_decode(dec,
+                                             packet,
+                                             len,
+                                             &output[samp_count*num_channels],
+                                             frame_size,
+                                             0);
+               if (samples_decoded != frame_size) {
+                   fprintf(stderr, "opus_decode() returned %d\n", samples_decoded);
+                   ret = -1;
+                   break;
+               }
+            }
          }
       }
 
@@ -375,8 +407,13 @@
    } while (samp_count + frame_size <= input_samples);
 
 #ifdef RESYNTH
-   /* Resynth only works with OpusCustom encoder */
-   if (params.custom_encode && params.custom_decode) {
+   /* Resynth only works with OpusCustom encoder. Also, we don't enable it if there's
+      a 16-bit bottleneck in the decoder that can cause clipping. */
+   if (params.custom_encode && (params.custom_decode
+#if !defined(FIXED_POINT) || defined(ENABLE_RES24)
+         || params.decoder_bit_depth > 16
+#endif
+         )) {
       if (params.float_encode) {
          float* input = (float*)inbuf;
          float* output = (float*)outbuf;
@@ -552,18 +589,8 @@
          params.float_encode = 0;
          params.float_decode = 0;
 #endif
-         if (params.custom_encode) {
-            params.encoder_bit_depth = RAND_SAMPLE(encoder_bit_depths);
-         }
-         else {
-            params.encoder_bit_depth = 16;
-         }
-         if (params.custom_decode) {
-            params.decoder_bit_depth = RAND_SAMPLE(decoder_bit_depths);
-         }
-         else {
-            params.decoder_bit_depth = 16;
-         }
+         params.encoder_bit_depth = RAND_SAMPLE(encoder_bit_depths);
+         params.decoder_bit_depth = RAND_SAMPLE(decoder_bit_depths);
 #ifdef RESYNTH
          /* Resynth logic works best when encoder/decoder use same datatype */
          params.float_decode = params.float_encode;
--