ref: 977ee01670a9e5271ffeeddf5597583458002dfd
parent: 8b16ce9055b80c6092cbc66a91c778f4ccf5b92d
author: Jean-Marc Valin <jeanmarcv@google.com>
date: Wed Jun 12 08:57:48 EDT 2024
Adds 24-bit Opus encoder/decoder API
--- a/celt/arch.h
+++ b/celt/arch.h
@@ -166,6 +166,7 @@
#define RES2VAL16(a) RES2INT16(a)
#define FLOAT2SIG(a) float2int(((opus_int32)32768<<SIG_SHIFT)*(a))
#define INT16TOSIG(a) SHL32(EXTEND32(a), SIG_SHIFT)
+#define INT24TOSIG(a) SHL32(a, SIG_SHIFT-8)
#define celt_isnan(x) 0
@@ -332,6 +333,7 @@
#define RES2VAL16(a) (a)
#define FLOAT2SIG(a) ((a)*CELT_SIG_SCALE)
#define INT16TOSIG(a) ((float)(a))
+#define INT24TOSIG(a) ((float)(a)*(1.f/256.f))
#endif /* !FIXED_POINT */
--- a/include/opus.h
+++ b/include/opus.h
@@ -268,6 +268,42 @@
opus_int32 max_data_bytes
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+/** Encodes an Opus frame.
+ * @param [in] st <tt>OpusEncoder*</tt>: Encoder state
+ * @param [in] pcm <tt>opus_int32*</tt>: Input signal (interleaved if 2 channels) representing (or slightly exceeding) 24-bit values. length is frame_size*channels*sizeof(opus_int32)
+ * @param [in] frame_size <tt>int</tt>: Number of samples per channel in the
+ * input signal.
+ * This must be an Opus frame size for
+ * the encoder's sampling rate.
+ * For example, at 48 kHz the permitted
+ * values are 120, 240, 480, 960, 1920,
+ * and 2880.
+ * Passing in a duration of less than
+ * 10 ms (480 samples at 48 kHz) will
+ * prevent the encoder from using the LPC
+ * or hybrid modes.
+ * @param [out] data <tt>unsigned char*</tt>: Output payload.
+ * This must contain storage for at
+ * least \a max_data_bytes.
+ * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
+ * memory for the output
+ * payload. This may be
+ * used to impose an upper limit on
+ * the instant bitrate, but should
+ * not be used as the only bitrate
+ * control. Use #OPUS_SET_BITRATE to
+ * control the bitrate.
+ * @returns The length of the encoded packet (in bytes) on success or a
+ * negative error code (see @ref opus_errorcodes) on failure.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_encode24(
+ OpusEncoder *st,
+ const opus_int32 *pcm,
+ int frame_size,
+ unsigned char *data,
+ opus_int32 max_data_bytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
/** Encodes an Opus frame from floating point input.
* @param [in] st <tt>OpusEncoder*</tt>: Encoder state
* @param [in] pcm <tt>float*</tt>: Input in float format (interleaved if 2 channels), with a normal range of +/-1.0.
@@ -483,6 +519,31 @@
int decode_fec
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+/** Decode an Opus packet.
+ * @param [in] st <tt>OpusDecoder*</tt>: Decoder state
+ * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
+ * @param [in] len <tt>opus_int32</tt>: Number of bytes in payload*
+ * @param [out] pcm <tt>opus_int32*</tt>: Output signal (interleaved if 2 channels) representing (or slightly exceeding) 24-bit values. length
+ * is frame_size*channels*sizeof(opus_int32)
+ * @param [in] frame_size Number of samples per channel of available space in \a pcm.
+ * If this is less than the maximum packet duration (120ms; 5760 for 48kHz), this function will
+ * not be capable of decoding some packets. In the case of PLC (data==NULL) or FEC (decode_fec=1),
+ * then frame_size needs to be exactly the duration of audio that is missing, otherwise the
+ * decoder will not be in the optimal state to decode the next incoming packet. For the PLC and
+ * FEC cases, frame_size <b>must</b> be a multiple of 2.5 ms.
+ * @param [in] decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band forward error correction data be
+ * decoded. If no such data is available, the frame is decoded as if it were lost.
+ * @returns Number of decoded samples or @ref opus_errorcodes
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decode24(
+ OpusDecoder *st,
+ const unsigned char *data,
+ opus_int32 len,
+ opus_int32 *pcm,
+ int frame_size,
+ int decode_fec
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
/** Decode an Opus packet with floating point output.
* @param [in] st <tt>OpusDecoder*</tt>: Decoder state
* @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
@@ -596,7 +657,7 @@
*/
OPUS_EXPORT int opus_dred_process(OpusDREDDecoder *dred_dec, const OpusDRED *src, OpusDRED *dst);
-/** Decode audio from an Opus DRED packet with floating point output.
+/** Decode audio from an Opus DRED packet with 16-bit output.
* @param [in] st <tt>OpusDecoder*</tt>: Decoder state
* @param [in] dred <tt>OpusDRED*</tt>: DRED state
* @param [in] dred_offset <tt>opus_int32</tt>: position of the redundancy to decode (in samples before the beginning of the real audio data in the packet).
@@ -607,6 +668,18 @@
* @returns Number of decoded samples or @ref opus_errorcodes
*/
OPUS_EXPORT int opus_decoder_dred_decode(OpusDecoder *st, const OpusDRED *dred, opus_int32 dred_offset, opus_int16 *pcm, opus_int32 frame_size);
+
+/** Decode audio from an Opus DRED packet with 24-bit output.
+ * @param [in] st <tt>OpusDecoder*</tt>: Decoder state
+ * @param [in] dred <tt>OpusDRED*</tt>: DRED state
+ * @param [in] dred_offset <tt>opus_int32</tt>: position of the redundancy to decode (in samples before the beginning of the real audio data in the packet).
+ * @param [out] pcm <tt>opus_int32*</tt>: Output signal (interleaved if 2 channels). length
+ * is frame_size*channels*sizeof(opus_int16)
+ * @param [in] frame_size Number of samples per channel to decode in \a pcm.
+ * frame_size <b>must</b> be a multiple of 2.5 ms.
+ * @returns Number of decoded samples or @ref opus_errorcodes
+ */
+OPUS_EXPORT int opus_decoder_dred_decode24(OpusDecoder *st, const OpusDRED *dred, opus_int32 dred_offset, opus_int32 *pcm, opus_int32 frame_size);
/** Decode audio from an Opus DRED packet with floating point output.
* @param [in] st <tt>OpusDecoder*</tt>: Decoder state
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@@ -819,7 +819,7 @@
#ifdef FIXED_POINT
#ifdef ENABLE_RES24
int opus_decode(OpusDecoder *st, const unsigned char *data,
- opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
+ opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec)
{
VARDECL(opus_res, out);
int ret, i;
@@ -851,14 +851,59 @@
RESTORE_STACK;
return ret;
}
+
+int opus_decode24(OpusDecoder *st, const unsigned char *data,
+ opus_int32 len, opus_int32 *pcm, int frame_size, int decode_fec)
+{
+ if(frame_size<=0)
+ return OPUS_BAD_ARG;
+ return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
+}
+
#else
+
int opus_decode(OpusDecoder *st, const unsigned char *data,
- opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
+ opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec)
{
if(frame_size<=0)
return OPUS_BAD_ARG;
return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
}
+
+int opus_decode24(OpusDecoder *st, const unsigned char *data,
+ opus_int32 len, opus_int32 *pcm, int frame_size, int decode_fec)
+{
+ VARDECL(opus_res, out);
+ int ret, i;
+ int nb_samples;
+ ALLOC_STACK;
+
+ if(frame_size<=0)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+ if (data != NULL && len > 0 && !decode_fec)
+ {
+ nb_samples = opus_decoder_get_nb_samples(st, data, len);
+ if (nb_samples>0)
+ frame_size = IMIN(frame_size, nb_samples);
+ else
+ return OPUS_INVALID_PACKET;
+ }
+ celt_assert(st->channels == 1 || st->channels == 2);
+ ALLOC(out, frame_size*st->channels, opus_res);
+
+ ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
+ if (ret > 0)
+ {
+ for (i=0;i<ret*st->channels;i++)
+ pcm[i] = RES2INT24(out[i]);
+ }
+ RESTORE_STACK;
+ return ret;
+}
+
#endif
#ifndef DISABLE_FLOAT_API
@@ -934,6 +979,41 @@
return ret;
}
+int opus_decode24(OpusDecoder *st, const unsigned char *data,
+ opus_int32 len, opus_int32 *pcm, int frame_size, int decode_fec)
+{
+ VARDECL(float, out);
+ int ret, i;
+ int nb_samples;
+ ALLOC_STACK;
+
+ if(frame_size<=0)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+
+ if (data != NULL && len > 0 && !decode_fec)
+ {
+ nb_samples = opus_decoder_get_nb_samples(st, data, len);
+ if (nb_samples>0)
+ frame_size = IMIN(frame_size, nb_samples);
+ else
+ return OPUS_INVALID_PACKET;
+ }
+ celt_assert(st->channels == 1 || st->channels == 2);
+ ALLOC(out, frame_size*st->channels, float);
+
+ ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1, NULL, 0);
+ if (ret > 0)
+ {
+ for (i=0;i<ret*st->channels;i++)
+ pcm[i] = RES2INT24(out[i]);
+ }
+ RESTORE_STACK;
+ return ret;
+}
+
int opus_decode_float(OpusDecoder *st, const unsigned char *data,
opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
{
@@ -1498,7 +1578,41 @@
if (ret > 0)
{
for (i=0;i<ret*st->channels;i++)
- pcm[i] = FLOAT2INT16(out[i]);
+ pcm[i] = RES2INT16(out[i]);
+ }
+ RESTORE_STACK;
+ return ret;
+#else
+ (void)st;
+ (void)dred;
+ (void)dred_offset;
+ (void)pcm;
+ (void)frame_size;
+ return OPUS_UNIMPLEMENTED;
+#endif
+}
+
+int opus_decoder_dred_decode24(OpusDecoder *st, const OpusDRED *dred, opus_int32 dred_offset, opus_int32 *pcm, opus_int32 frame_size)
+{
+#ifdef ENABLE_DRED
+ VARDECL(float, out);
+ int ret, i;
+ ALLOC_STACK;
+
+ if(frame_size<=0)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+
+ celt_assert(st->channels == 1 || st->channels == 2);
+ ALLOC(out, frame_size*st->channels, float);
+
+ ret = opus_decode_native(st, NULL, 0, out, frame_size, 0, 0, NULL, 1, dred, dred_offset);
+ if (ret > 0)
+ {
+ for (i=0;i<ret*st->channels;i++)
+ pcm[i] = RES2INT24(out[i]);
}
RESTORE_STACK;
return ret;
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -739,6 +739,29 @@
}
}
+void downmix_int24(const void *_x, opus_val32 *y, int subframe, int offset, int c1, int c2, int C)
+{
+ const opus_int32 *x;
+ int j;
+
+ x = (const opus_int32 *)_x;
+ for (j=0;j<subframe;j++)
+ y[j] = INT24TOSIG(x[(j+offset)*C+c1]);
+ if (c2>-1)
+ {
+ for (j=0;j<subframe;j++)
+ y[j] += INT24TOSIG(x[(j+offset)*C+c2]);
+ } else if (c2==-2)
+ {
+ int c;
+ for (c=1;c<C;c++)
+ {
+ for (j=0;j<subframe;j++)
+ y[j] += INT24TOSIG(x[(j+offset)*C+c]);
+ }
+ }
+}
+
opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)
{
int new_size;
@@ -2534,6 +2557,16 @@
RESTORE_STACK;
return ret;
}
+
+opus_int32 opus_encode24(OpusEncoder *st, const opus_int32 *pcm, int analysis_frame_size,
+ unsigned char *data, opus_int32 max_data_bytes)
+{
+ int frame_size;
+ frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs);
+ return opus_encode_native(st, pcm, frame_size, data, max_data_bytes, 16,
+ pcm, analysis_frame_size, 0, -2, st->channels, downmix_int24, 0);
+}
+
#else
opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,
unsigned char *data, opus_int32 max_data_bytes)
@@ -2543,6 +2576,30 @@
return opus_encode_native(st, pcm, frame_size, data, max_data_bytes, 16,
pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0);
}
+
+opus_int32 opus_encode24(OpusEncoder *st, const opus_int32 *pcm, int analysis_frame_size,
+ unsigned char *data, opus_int32 max_data_bytes)
+{
+ int i, ret;
+ int frame_size;
+ VARDECL(opus_res, in);
+ ALLOC_STACK;
+
+ frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs);
+ if (frame_size <= 0)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+ ALLOC(in, frame_size*st->channels, opus_res);
+
+ for (i=0;i<frame_size*st->channels;i++)
+ in[i] = INT24TORES(pcm[i]);
+ ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16,
+ pcm, analysis_frame_size, 0, -2, st->channels, downmix_int24, 1);
+ RESTORE_STACK;
+ return ret;
+}
#endif /* ENABLE_RES24 */
#else
@@ -2563,12 +2620,37 @@
ALLOC(in, frame_size*st->channels, float);
for (i=0;i<frame_size*st->channels;i++)
- in[i] = (1.0f/32768)*pcm[i];
+ in[i] = INT16TORES(pcm[i]);
ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16,
pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0);
RESTORE_STACK;
return ret;
}
+
+opus_int32 opus_encode24(OpusEncoder *st, const opus_int32 *pcm, int analysis_frame_size,
+ unsigned char *data, opus_int32 max_data_bytes)
+{
+ int i, ret;
+ int frame_size;
+ VARDECL(float, in);
+ ALLOC_STACK;
+
+ frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs);
+ if (frame_size <= 0)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+ ALLOC(in, frame_size*st->channels, float);
+
+ for (i=0;i<frame_size*st->channels;i++)
+ in[i] = INT24TORES(pcm[i]);
+ ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16,
+ pcm, analysis_frame_size, 0, -2, st->channels, downmix_int24, 0);
+ RESTORE_STACK;
+ return ret;
+}
+
opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,
unsigned char *data, opus_int32 out_data_bytes)
{
--- a/src/opus_private.h
+++ b/src/opus_private.h
@@ -174,6 +174,7 @@
typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int);
void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
+void downmix_int24(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
int is_digital_silence(const opus_res* pcm, int frame_size, int channels, int lsb_depth);
int encode_size(int size, unsigned char *data);
--- a/tests/test_opus_custom.c
+++ b/tests/test_opus_custom.c
@@ -233,17 +233,32 @@
}
}
else {
- opus_int16* input = (opus_int16*)inbuf;
- len = opus_encode(enc,
- &input[samp_count*num_channels],
- frame_size,
- packet,
- MAX_PACKET);
- if (len <= 0) {
- fprintf(stderr, "opus_encode() failed: %s\n", opus_strerror(len));
- ret = -1;
- break;
+ if (params.encoder_bit_depth == 24) {
+ opus_int32* input = (opus_int32*)inbuf;
+ len = opus_encode24(enc,
+ &input[samp_count*num_channels],
+ frame_size,
+ packet,
+ MAX_PACKET);
+ if (len <= 0) {
+ fprintf(stderr, "opus_encode24() failed: %s\n", opus_strerror(len));
+ ret = -1;
+ break;
+ }
}
+ else {
+ opus_int16* input = (opus_int16*)inbuf;
+ len = opus_encode(enc,
+ &input[samp_count*num_channels],
+ frame_size,
+ packet,
+ MAX_PACKET);
+ if (len <= 0) {
+ fprintf(stderr, "opus_encode() failed: %s\n", opus_strerror(len));
+ ret = -1;
+ break;
+ }
+ }
}
}
@@ -356,18 +371,35 @@
}
}
else {
- opus_int16* output = (opus_int16*)outbuf;
- samples_decoded = opus_decode(dec,
- packet,
- len,
- &output[samp_count*num_channels],
- frame_size,
- 0);
- if (samples_decoded != frame_size) {
- fprintf(stderr, "opus_decode() returned %d\n", samples_decoded);
- ret = -1;
- break;
+ if (params.decoder_bit_depth == 24) {
+ opus_int32* output = (opus_int32*)outbuf;
+ samples_decoded = opus_decode24(dec,
+ packet,
+ len,
+ &output[samp_count*num_channels],
+ frame_size,
+ 0);
+
+ if (samples_decoded != frame_size) {
+ fprintf(stderr, "opus_decode24() returned %d\n", samples_decoded);
+ ret = -1;
+ break;
+ }
}
+ else {
+ opus_int16* output = (opus_int16*)outbuf;
+ samples_decoded = opus_decode(dec,
+ packet,
+ len,
+ &output[samp_count*num_channels],
+ frame_size,
+ 0);
+ if (samples_decoded != frame_size) {
+ fprintf(stderr, "opus_decode() returned %d\n", samples_decoded);
+ ret = -1;
+ break;
+ }
+ }
}
}
@@ -375,8 +407,13 @@
} while (samp_count + frame_size <= input_samples);
#ifdef RESYNTH
- /* Resynth only works with OpusCustom encoder */
- if (params.custom_encode && params.custom_decode) {
+ /* Resynth only works with OpusCustom encoder. Also, we don't enable it if there's
+ a 16-bit bottleneck in the decoder that can cause clipping. */
+ if (params.custom_encode && (params.custom_decode
+#if !defined(FIXED_POINT) || defined(ENABLE_RES24)
+ || params.decoder_bit_depth > 16
+#endif
+ )) {
if (params.float_encode) {
float* input = (float*)inbuf;
float* output = (float*)outbuf;
@@ -552,18 +589,8 @@
params.float_encode = 0;
params.float_decode = 0;
#endif
- if (params.custom_encode) {
- params.encoder_bit_depth = RAND_SAMPLE(encoder_bit_depths);
- }
- else {
- params.encoder_bit_depth = 16;
- }
- if (params.custom_decode) {
- params.decoder_bit_depth = RAND_SAMPLE(decoder_bit_depths);
- }
- else {
- params.decoder_bit_depth = 16;
- }
+ params.encoder_bit_depth = RAND_SAMPLE(encoder_bit_depths);
+ params.decoder_bit_depth = RAND_SAMPLE(decoder_bit_depths);
#ifdef RESYNTH
/* Resynth logic works best when encoder/decoder use same datatype */
params.float_decode = params.float_encode;
--
⑨