shithub: opus

--- /dev/null

+++ b/silk/dred_coding.c

@@ -1,0 +1,196 @@

+/* Copyright (c) 2022 Amazon

+   Written by Jean-Marc Valin */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#include <math.h>

+#include <stdio.h>

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include "celt/vq.h"

+#include "celt/cwrs.h"

+#include "celt/laplace.h"

+#define LATENT_DIM 80

+#define PVQ_DIM 24

+#define PVQ_K 82

+static void encode_pvq(const int *iy, int N, int K, ec_enc *enc) {

+    int fits;

+    celt_assert(N==24 || N==12 || N==6);

+    fits = (N==24 && K<=9) || (N==12 && K<=16) || (N==6);

+    /*printf("encode(%d,%d), fits=%d\n", N, K, fits);*/

+    if (fits) encode_pulses(iy, N, K, enc);

+    else {

+        int N2 = N/2;

+        int K0=0;

+        int i;

+        for (i=0;i<N2;i++) K0 += abs(iy[i]);

+        /* FIXME: Don't use uniform probability for K0. */

+        ec_enc_uint(enc, K0, K+1);

+        /*printf("K0 = %d\n", K0);*/

+        encode_pvq(iy, N2, K0, enc);

+        encode_pvq(&iy[N2], N2, K-K0, enc);

+    }

+}

+void dred_encode_state(ec_enc *enc, float *x) {

+    int k;

+    int iy[PVQ_DIM];

+    op_pvq_search_c(x, iy, PVQ_K, PVQ_DIM, 0);

+    encode_pvq(iy, PVQ_DIM, PVQ_K, enc);

+}

+void dred_encode_latents(ec_enc *enc, const float *x, const opus_int16 *scale, const opus_int16 *dzone, const opus_int16 *r, const opus_int16 *p0) {

+    int i;

+    float eps = .1f;

+    int tell1 = ec_tell(enc);

+    for (i=0;i<LATENT_DIM;i++) {

+        float delta;

+        float xq;

+        int q;

+        delta = dzone[i]*(1.f/1024.f);

+        xq = x[i]*scale[i]*(1.f/256.f);

+        xq = xq - delta*tanh(xq/(delta+eps));

+        q = (int)floor(.5f+xq);

+        ec_laplace_encode_p0(enc, q, p0[i], r[i]);

+    }

+}

+static void decode_pvq(int *iy, int N, int K, ec_dec *dec) {

+    int fits;

+    celt_assert(N==24 || N==12 || N==6);

+    fits = (N==24 && K<=9) || (N==12 && K<=16) || (N==6);

+    /*printf("encode(%d,%d), fits=%d\n", N, K, fits);*/

+    if (fits) decode_pulses(iy, N, K, dec);

+    else {

+        int N2 = N/2;

+        int K0;

+        /* FIXME: Don't use uniform probability for K0. */

+        K0 = ec_dec_uint(dec, K+1);

+        /*printf("K0 = %d\n", K0);*/

+        decode_pvq(iy, N2, K0, dec);

+        decode_pvq(&iy[N2], N2, K-K0, dec);

+    }

+}

+void dred_decode_state(ec_enc *dec, float *x) {

+    int k;

+    int iy[PVQ_DIM];

+    float norm = 0;

+    int tell1 = ec_tell(dec);

+    decode_pvq(iy, PVQ_DIM, PVQ_K, dec);

+    /*printf("tell: %d\n", ec_tell(dec)-tell1);*/

+    for (k = 0; k < PVQ_DIM; k++)

+    {

+        norm += (float) iy[k] * iy[k];

+    }

+    norm = 1 / sqrtf(norm);

+    for (k = 0; k < PVQ_DIM; k++)

+    {

+        x[k] = iy[k] * norm;

+    }

+}

+void dred_decode_latents(ec_dec *dec, float *x, const opus_int16 *scale, const opus_int16 *r, const opus_int16 *p0) {

+    int i;

+    for (i=0;i<LATENT_DIM;i++) {

+        float xq;

+        int q;

+        q = ec_laplace_decode_p0(dec, p0[i], r[i]);

+        x[i] = q*256.f/(scale[i] == 0 ? 1 : scale[i]);

+    }

+}

+#if 0

+#include <stdlib.h>

+#define DATA_SIZE 10000

+int main()

+{

+    ec_enc enc;

+    ec_dec dec;

+    int iter;

+    int bytes;

+    opus_int16 scale[LATENT_DIM];

+    opus_int16 dzone[LATENT_DIM];

+    opus_int16 r[LATENT_DIM];

+    opus_int16 p0[LATENT_DIM];

+    unsigned char *ptr;

+    int k;

+    for (k=0;k<LATENT_DIM;k++) {

+        scale[k] = 256;

+        dzone[k] = 0;

+        r[k] = 12054;

+        p0[k] = 12893;

+    }

+    ptr = (unsigned char *)malloc(DATA_SIZE);

+    ec_enc_init(&enc,ptr,DATA_SIZE);

+    for (iter=0;iter<1;iter++) {

+        float x[PVQ_DIM];

+        float sum=1e-30;

+        for (k=0;k<PVQ_DIM;k++) {

+            x[k] = log(1e-15+(float)rand()/RAND_MAX)-log(1e-15+(float)rand()/RAND_MAX);

+            sum += fabs(x[k]);

+        }

+        for (k=0;k<PVQ_DIM;k++) x[k] *= (1.f/sum);

+        /*for (k=0;k<PVQ_DIM;k++) printf("%f ", x[k]);

+        printf("\n");*/

+        dred_encode_state(&enc, x);

+    }

+    for (iter=0;iter<1;iter++) {

+        float x[LATENT_DIM];

+        for (k=0;k<LATENT_DIM;k++) {

+            x[k] = log(1e-15+(float)rand()/RAND_MAX)-log(1e-15+(float)rand()/RAND_MAX);

+        }

+        for (k=0;k<LATENT_DIM;k++) printf("%f ", x[k]);

+        printf("\n");

+        dred_encode_latents(&enc, x, scale, dzone, r, p0);

+    }

+    bytes = (ec_tell(&enc)+7)/8;

+    ec_enc_shrink(&enc, bytes);

+    ec_enc_done(&enc);

+    ec_dec_init(&dec,ec_get_buffer(&enc),bytes);

+    for (iter=0;iter<1;iter++) {

+        float x[PVQ_DIM];

+        dred_decode_state(&dec, x);

+    }

+    for (iter=0;iter<1;iter++) {

+        float x[LATENT_DIM];

+        dred_decode_latents(&dec, x, scale, r, p0);

+        for (k=0;k<LATENT_DIM;k++) printf("%f ", x[k]);

+        printf("\n");

+    }

+}

+#endif

\ No newline at end of file

--- a/silk/dred_encoder.c

+++ b/silk/dred_encoder.c

@@ -1,6 +1,14 @@

 #include <string.h>

+#include <stdio.h>

+#include <math.h>

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

 #include "dred_encoder.h"

+#include "dred_coding.h"

 void init_dred_encoder(DREDEnc* enc)

@@ -12,19 +20,103 @@

 void dred_encode_silk_frame(DREDEnc *enc, const opus_int16 *silk_frame)

+    opus_int16 *dead_zone       = DRED_rdovae_get_dead_zone_pointer();

+    opus_int16 *p0              = DRED_rdovae_get_p0_pointer();

+    opus_int16 *quant_scales    = DRED_rdovae_get_quant_scales_pointer();

+    opus_int16 *r               = DRED_rdovae_get_r_pointer();

+    float input_buffer[2*DRED_NUM_FEATURES] = {0};

+    int bytes;

+    int q_level;

+    int i;

+    int offset;

     /* delay signal by 79 samples */

-    memmove(enc->input_buffer, enc->input_buffer + DRED_SILK_ENCODER_DELAY, DRED_SILK_ENCODER_DELAY * sizeof(*enc->input_buffer));

+    memmove(enc->input_buffer, enc->input_buffer + DRED_DFRAME_SIZE, DRED_SILK_ENCODER_DELAY * sizeof(*enc->input_buffer));

     memcpy(enc->input_buffer + DRED_SILK_ENCODER_DELAY, silk_frame, DRED_DFRAME_SIZE * sizeof(*silk_frame));

     /* shift latents buffer */

-    memmove(enc->latents_buffer + DRED_LATENT_DIM, enc->latents_buffer, DRED_LATENT_DIM * sizeof(*enc->latents_buffer));

+    memmove(enc->latents_buffer + DRED_LATENT_DIM, enc->latents_buffer, (DRED_MAX_FRAMES - 1) * DRED_LATENT_DIM * sizeof(*enc->latents_buffer));

     /* calculate LPCNet features */

     lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer, enc->feature_buffer);

-    lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, enc->feature_buffer + DRED_NUM_FEATURES);

+    lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, enc->feature_buffer + 36);

+    /* prepare input buffer (discard LPC coefficients) */

+    memcpy(input_buffer, enc->feature_buffer, DRED_NUM_FEATURES * sizeof(input_buffer[0]));

+    memcpy(input_buffer + DRED_NUM_FEATURES, enc->feature_buffer + 36, DRED_NUM_FEATURES * sizeof(input_buffer[0]));

     /* run RDOVAE encoder */

-    DRED_rdovae_encode_dframe(enc->rdovae_enc, enc->latents_buffer, enc->state_buffer, enc->feature_buffer);

+    DRED_rdovae_encode_dframe(enc->rdovae_enc, enc->latents_buffer, enc->state_buffer, input_buffer);

     /* entropy coding of state and latents */

+    ec_enc_init(&enc->ec_encoder, enc->ec_buffer, DRED_MAX_DATA_SIZE);

+    dred_encode_state(&enc->ec_encoder, enc->state_buffer);

+    for (i = 0; i < DRED_NUM_REDUNDANCY_FRAMES; i += 2)

+    {

+        q_level = (int) round(DRED_ENC_Q0 + 1.f * (DRED_ENC_Q1 - DRED_ENC_Q0) * i / (DRED_NUM_REDUNDANCY_FRAMES - 2));

+        offset = q_level * DRED_LATENT_DIM;

+        dred_encode_latents(

+            &enc->ec_encoder,

+            enc->latents_buffer + i * DRED_LATENT_DIM,

+            quant_scales + offset,

+            dead_zone + offset,

+            r + offset,

+            p0 + offset

+        );

+    }

+    bytes = (ec_tell(&enc->ec_encoder)+7)/8;

+    ec_enc_shrink(&enc->ec_encoder, bytes);

+    ec_enc_done(&enc->ec_encoder);

+#if 1

+    printf("packet size: %d\n", bytes*8);

+#endif

+#if 0

+    /* trial decoding */

+    float state[24];

+    float features[4 * 20];

+    float latents[80];

+    float zeros[36 - 20] = {0};

+    static FILE *fid;

+    RDOVAEDec *rdovae_dec = DRED_rdovae_create_decoder();

+    if (fid == NULL)

+    {

+        fid = fopen("features_last.f32", "wb");

+    }

+    /* decode state */

+    ec_enc ec_dec;

+    ec_dec_init(&ec_dec, ec_get_buffer(&enc->ec_encoder), bytes);

+    dred_decode_state(&ec_dec, state);

+    dred_decode_latents(

+        &ec_dec,

+        latents,

+        quant_scales + offset,

+        r + offset,

+        p0 + offset

+        );

+    DRED_rdovae_dec_init_states(rdovae_dec, state);

+    DRED_rdovae_decode_qframe(rdovae_dec, features, latents);

+    DRED_rdovae_destroy_decoder(rdovae_dec);

+    fwrite(features + 40, sizeof(float), 20, fid);

+    fwrite(zeros, sizeof(float), 16, fid);

+    fwrite(features + 60, sizeof(float), 20, fid);

+    fwrite(zeros, sizeof(float), 16, fid);

+#endif

\ No newline at end of file

--- a/silk/dred_encoder.h

+++ b/silk/dred_encoder.h

@@ -14,12 +14,17 @@

 #define DRED_SILK_ENCODER_DELAY 79

 #define DRED_FRAME_SIZE 160

 #define DRED_DFRAME_SIZE (2 * (DRED_FRAME_SIZE))

+#define DRED_MAX_DATA_SIZE 10000

+#define DRED_ENC_Q0 9

+#define DRED_ENC_Q1 15

+#define DRED_NUM_REDUNDANCY_FRAMES 50

 typedef struct {

-    opus_int16 input_buffer[79 + 2 * 160];

+    opus_int16 input_buffer[DRED_DFRAME_SIZE + DRED_SILK_ENCODER_DELAY];

     float feature_buffer[2 * 36];

     float latents_buffer[DRED_MAX_FRAMES * DRED_LATENT_DIM];

     float state_buffer[24];

+    unsigned char ec_buffer[DRED_MAX_DATA_SIZE];

     ec_enc ec_encoder;

     LPCNetEncState *lpcnet_enc_state;

     RDOVAEEnc *rdovae_enc;

--

⑨