shithub: opus

Download patch

ref: fea85b89f5e68190df5aafed5ad8a2a42122a1e0
parent: 2df55d3583ce73186c91edd86e7e02c9df621522
author: Jan Buethe <jbuethe@amazon.de>
date: Tue Oct 25 08:16:39 EDT 2022

finished encoder implementation

--- /dev/null
+++ b/silk/dred_coding.c
@@ -1,0 +1,196 @@
+/* Copyright (c) 2022 Amazon
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <math.h>
+#include <stdio.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "celt/vq.h"
+#include "celt/cwrs.h"
+#include "celt/laplace.h"
+
+#define LATENT_DIM 80
+#define PVQ_DIM 24
+#define PVQ_K 82
+
+static void encode_pvq(const int *iy, int N, int K, ec_enc *enc) {
+    int fits;
+    celt_assert(N==24 || N==12 || N==6);
+    fits = (N==24 && K<=9) || (N==12 && K<=16) || (N==6);
+    /*printf("encode(%d,%d), fits=%d\n", N, K, fits);*/
+    if (fits) encode_pulses(iy, N, K, enc);
+    else {
+        int N2 = N/2;
+        int K0=0;
+        int i;
+        for (i=0;i<N2;i++) K0 += abs(iy[i]);
+        /* FIXME: Don't use uniform probability for K0. */
+        ec_enc_uint(enc, K0, K+1);
+        /*printf("K0 = %d\n", K0);*/
+        encode_pvq(iy, N2, K0, enc);
+        encode_pvq(&iy[N2], N2, K-K0, enc);
+    }
+}
+
+void dred_encode_state(ec_enc *enc, float *x) {
+    int k;
+    int iy[PVQ_DIM];
+    op_pvq_search_c(x, iy, PVQ_K, PVQ_DIM, 0);
+    encode_pvq(iy, PVQ_DIM, PVQ_K, enc);
+}
+
+void dred_encode_latents(ec_enc *enc, const float *x, const opus_int16 *scale, const opus_int16 *dzone, const opus_int16 *r, const opus_int16 *p0) {
+    int i;
+    float eps = .1f;
+    int tell1 = ec_tell(enc);
+    for (i=0;i<LATENT_DIM;i++) {
+        float delta;
+        float xq;
+        int q;
+        delta = dzone[i]*(1.f/1024.f);
+        xq = x[i]*scale[i]*(1.f/256.f);
+        xq = xq - delta*tanh(xq/(delta+eps));
+        q = (int)floor(.5f+xq);
+        ec_laplace_encode_p0(enc, q, p0[i], r[i]);
+    }
+}
+
+
+
+static void decode_pvq(int *iy, int N, int K, ec_dec *dec) {
+    int fits;
+    celt_assert(N==24 || N==12 || N==6);
+    fits = (N==24 && K<=9) || (N==12 && K<=16) || (N==6);
+    /*printf("encode(%d,%d), fits=%d\n", N, K, fits);*/
+    if (fits) decode_pulses(iy, N, K, dec);
+    else {
+        int N2 = N/2;
+        int K0;
+        /* FIXME: Don't use uniform probability for K0. */
+        K0 = ec_dec_uint(dec, K+1);
+        /*printf("K0 = %d\n", K0);*/
+        decode_pvq(iy, N2, K0, dec);
+        decode_pvq(&iy[N2], N2, K-K0, dec);
+    }
+}
+
+void dred_decode_state(ec_enc *dec, float *x) {
+    int k;
+    int iy[PVQ_DIM];
+    float norm = 0;
+    int tell1 = ec_tell(dec);
+    decode_pvq(iy, PVQ_DIM, PVQ_K, dec);
+    /*printf("tell: %d\n", ec_tell(dec)-tell1);*/
+    for (k = 0; k < PVQ_DIM; k++)
+    {
+        norm += (float) iy[k] * iy[k];
+    }
+    norm = 1 / sqrtf(norm);
+    for (k = 0; k < PVQ_DIM; k++)
+    {
+        x[k] = iy[k] * norm;
+    }
+    
+}
+
+void dred_decode_latents(ec_dec *dec, float *x, const opus_int16 *scale, const opus_int16 *r, const opus_int16 *p0) {
+    int i;
+    for (i=0;i<LATENT_DIM;i++) {
+        float xq;
+        int q;
+        q = ec_laplace_decode_p0(dec, p0[i], r[i]);
+        x[i] = q*256.f/(scale[i] == 0 ? 1 : scale[i]);
+    }
+}
+
+#if 0
+#include <stdlib.h>
+
+#define DATA_SIZE 10000
+
+int main()
+{
+    ec_enc enc;
+    ec_dec dec;
+    int iter;
+    int bytes;
+    opus_int16 scale[LATENT_DIM];
+    opus_int16 dzone[LATENT_DIM];
+    opus_int16 r[LATENT_DIM];
+    opus_int16 p0[LATENT_DIM];
+    unsigned char *ptr;
+    int k;
+    
+    for (k=0;k<LATENT_DIM;k++) {
+        scale[k] = 256;
+        dzone[k] = 0;
+        r[k] = 12054;
+        p0[k] = 12893;
+    }
+    ptr = (unsigned char *)malloc(DATA_SIZE);
+    ec_enc_init(&enc,ptr,DATA_SIZE);
+    for (iter=0;iter<1;iter++) {
+        float x[PVQ_DIM];
+        float sum=1e-30;
+        for (k=0;k<PVQ_DIM;k++) {
+            x[k] = log(1e-15+(float)rand()/RAND_MAX)-log(1e-15+(float)rand()/RAND_MAX);
+            sum += fabs(x[k]);
+        }
+        for (k=0;k<PVQ_DIM;k++) x[k] *= (1.f/sum);
+        /*for (k=0;k<PVQ_DIM;k++) printf("%f ", x[k]);
+        printf("\n");*/
+        dred_encode_state(&enc, x);
+    }
+    for (iter=0;iter<1;iter++) {
+        float x[LATENT_DIM];
+        for (k=0;k<LATENT_DIM;k++) {
+            x[k] = log(1e-15+(float)rand()/RAND_MAX)-log(1e-15+(float)rand()/RAND_MAX);
+        }
+        for (k=0;k<LATENT_DIM;k++) printf("%f ", x[k]);
+        printf("\n");
+        dred_encode_latents(&enc, x, scale, dzone, r, p0);
+    }
+    bytes = (ec_tell(&enc)+7)/8;
+    ec_enc_shrink(&enc, bytes);
+    ec_enc_done(&enc);
+
+    ec_dec_init(&dec,ec_get_buffer(&enc),bytes);
+    for (iter=0;iter<1;iter++) {
+        float x[PVQ_DIM];
+        dred_decode_state(&dec, x);        
+    }
+    for (iter=0;iter<1;iter++) {
+        float x[LATENT_DIM];
+        dred_decode_latents(&dec, x, scale, r, p0);
+        for (k=0;k<LATENT_DIM;k++) printf("%f ", x[k]);
+        printf("\n");
+    }
+}
+#endif
\ No newline at end of file
--- a/silk/dred_encoder.c
+++ b/silk/dred_encoder.c
@@ -1,6 +1,14 @@
 #include <string.h>
 
+#include <stdio.h>
+#include <math.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
 #include "dred_encoder.h"
+#include "dred_coding.h"
 
 
 void init_dred_encoder(DREDEnc* enc)
@@ -12,19 +20,103 @@
 
 void dred_encode_silk_frame(DREDEnc *enc, const opus_int16 *silk_frame)
 {
+    opus_int16 *dead_zone       = DRED_rdovae_get_dead_zone_pointer();
+    opus_int16 *p0              = DRED_rdovae_get_p0_pointer();
+    opus_int16 *quant_scales    = DRED_rdovae_get_quant_scales_pointer();
+    opus_int16 *r               = DRED_rdovae_get_r_pointer();
+    
+    float input_buffer[2*DRED_NUM_FEATURES] = {0};
+
+    int bytes;
+    int q_level;
+    int i;
+    int offset;
+
     /* delay signal by 79 samples */
-    memmove(enc->input_buffer, enc->input_buffer + DRED_SILK_ENCODER_DELAY, DRED_SILK_ENCODER_DELAY * sizeof(*enc->input_buffer));
+    memmove(enc->input_buffer, enc->input_buffer + DRED_DFRAME_SIZE, DRED_SILK_ENCODER_DELAY * sizeof(*enc->input_buffer));
     memcpy(enc->input_buffer + DRED_SILK_ENCODER_DELAY, silk_frame, DRED_DFRAME_SIZE * sizeof(*silk_frame));
 
     /* shift latents buffer */
-    memmove(enc->latents_buffer + DRED_LATENT_DIM, enc->latents_buffer, DRED_LATENT_DIM * sizeof(*enc->latents_buffer));
+    memmove(enc->latents_buffer + DRED_LATENT_DIM, enc->latents_buffer, (DRED_MAX_FRAMES - 1) * DRED_LATENT_DIM * sizeof(*enc->latents_buffer));
 
     /* calculate LPCNet features */
     lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer, enc->feature_buffer);
-    lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, enc->feature_buffer + DRED_NUM_FEATURES);
+    lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, enc->feature_buffer + 36);
 
+    /* prepare input buffer (discard LPC coefficients) */
+    memcpy(input_buffer, enc->feature_buffer, DRED_NUM_FEATURES * sizeof(input_buffer[0]));
+    memcpy(input_buffer + DRED_NUM_FEATURES, enc->feature_buffer + 36, DRED_NUM_FEATURES * sizeof(input_buffer[0]));
+
     /* run RDOVAE encoder */
-    DRED_rdovae_encode_dframe(enc->rdovae_enc, enc->latents_buffer, enc->state_buffer, enc->feature_buffer);
+    DRED_rdovae_encode_dframe(enc->rdovae_enc, enc->latents_buffer, enc->state_buffer, input_buffer);
 
     /* entropy coding of state and latents */
+    ec_enc_init(&enc->ec_encoder, enc->ec_buffer, DRED_MAX_DATA_SIZE);
+    dred_encode_state(&enc->ec_encoder, enc->state_buffer);   
+
+    for (i = 0; i < DRED_NUM_REDUNDANCY_FRAMES; i += 2)
+    {
+        q_level = (int) round(DRED_ENC_Q0 + 1.f * (DRED_ENC_Q1 - DRED_ENC_Q0) * i / (DRED_NUM_REDUNDANCY_FRAMES - 2));
+        offset = q_level * DRED_LATENT_DIM;
+
+        dred_encode_latents(
+            &enc->ec_encoder,
+            enc->latents_buffer + i * DRED_LATENT_DIM,
+            quant_scales + offset,
+            dead_zone + offset,
+            r + offset,
+            p0 + offset
+        );
+    }
+
+    bytes = (ec_tell(&enc->ec_encoder)+7)/8;
+    ec_enc_shrink(&enc->ec_encoder, bytes);
+    ec_enc_done(&enc->ec_encoder);
+
+#if 1
+    printf("packet size: %d\n", bytes*8);
+#endif
+
+
+#if 0
+    /* trial decoding */
+    float state[24];
+    float features[4 * 20];
+    float latents[80];
+    float zeros[36 - 20] = {0};
+    static FILE *fid;
+    RDOVAEDec *rdovae_dec = DRED_rdovae_create_decoder();
+
+    if (fid == NULL)
+    {
+        fid = fopen("features_last.f32", "wb");
+    }
+
+    /* decode state */
+    ec_enc ec_dec;
+    ec_dec_init(&ec_dec, ec_get_buffer(&enc->ec_encoder), bytes);
+    dred_decode_state(&ec_dec, state);
+
+    dred_decode_latents(
+        &ec_dec,
+        latents,
+        quant_scales + offset,
+        r + offset,
+        p0 + offset
+        );
+
+    DRED_rdovae_dec_init_states(rdovae_dec, state);
+
+    DRED_rdovae_decode_qframe(rdovae_dec, features, latents);
+
+    DRED_rdovae_destroy_decoder(rdovae_dec);
+
+    fwrite(features + 40, sizeof(float), 20, fid);
+    fwrite(zeros, sizeof(float), 16, fid);
+    fwrite(features + 60, sizeof(float), 20, fid);
+    fwrite(zeros, sizeof(float), 16, fid);
+
+#endif
+
+
 }
\ No newline at end of file
--- a/silk/dred_encoder.h
+++ b/silk/dred_encoder.h
@@ -14,12 +14,17 @@
 #define DRED_SILK_ENCODER_DELAY 79
 #define DRED_FRAME_SIZE 160
 #define DRED_DFRAME_SIZE (2 * (DRED_FRAME_SIZE))
+#define DRED_MAX_DATA_SIZE 10000
+#define DRED_ENC_Q0 9
+#define DRED_ENC_Q1 15
+#define DRED_NUM_REDUNDANCY_FRAMES 50
 
 typedef struct {
-    opus_int16 input_buffer[79 + 2 * 160];
+    opus_int16 input_buffer[DRED_DFRAME_SIZE + DRED_SILK_ENCODER_DELAY];
     float feature_buffer[2 * 36];
     float latents_buffer[DRED_MAX_FRAMES * DRED_LATENT_DIM];
     float state_buffer[24];
+    unsigned char ec_buffer[DRED_MAX_DATA_SIZE];
     ec_enc ec_encoder;
     LPCNetEncState *lpcnet_enc_state;
     RDOVAEEnc *rdovae_enc;
--