shithub: opus

--- /dev/null

+++ b/dnn/ceps_vq_train.c

@@ -1,0 +1,399 @@

+#include <valgrind/memcheck.h>

+#include <stdlib.h>

+#include <stdio.h>

+#include <math.h>

+#define MIN(a,b) ((a)<(b)?(a):(b))

+#define COEF 0.75f

+#define MAX_ENTRIES 16384

+void compute_weights(const float *x, float *w, int ndim)

+{

+  int i;

+  w[0] = MIN(x[0], x[1]-x[0]);

+  for (i=1;i<ndim-1;i++)

+    w[i] = MIN(x[i]-x[i-1], x[i+1]-x[i]);

+  w[ndim-1] = MIN(x[ndim-1]-x[ndim-2], M_PI-x[ndim-1]);

+  for (i=0;i<ndim;i++)

+    w[i] = 1./(.01+w[i]);

+  w[0]*=3;

+  w[1]*=2;

+  /*

+  for (i=0;i<ndim;i++)

+    w[i] = 1;*/

+}

+int find_nearest(const float *codebook, int nb_entries, const float *x, int ndim, float *dist)

+{

+  int i, j;

+  float min_dist = 1e15;

+  int nearest = 0;

+  for (i=0;i<nb_entries;i++)

+  {

+    float dist=0;

+    for (j=0;j<ndim;j++)

+      dist += (x[j]-codebook[i*ndim+j])*(x[j]-codebook[i*ndim+j]);

+    if (dist<min_dist)

+    {

+      min_dist = dist;

+      nearest = i;

+    }

+  }

+  if (dist)

+    *dist = min_dist;

+  return nearest;

+}

+int find_nearest_weighted(const float *codebook, int nb_entries, float *x, const float *w, int ndim)

+{

+  int i, j;

+  float min_dist = 1e15;

+  int nearest = 0;

+  for (i=0;i<nb_entries;i++)

+  {

+    float dist=0;

+    for (j=0;j<ndim;j++)

+      dist += w[j]*(x[j]-codebook[i*ndim+j])*(x[j]-codebook[i*ndim+j]);

+    if (dist<min_dist)

+    {

+      min_dist = dist;

+      nearest = i;

+    }

+  }

+  return nearest;

+}

+int quantize_lsp(const float *x, const float *codebook1, const float *codebook2,

+		 const float *codebook3, int nb_entries, float *xq, int ndim)

+{

+  int i, n1, n2, n3;

+  float err[ndim], err2[ndim], err3[ndim];

+  float w[ndim], w2[ndim], w3[ndim];

+  w[0] = MIN(x[0], x[1]-x[0]);

+  for (i=1;i<ndim-1;i++)

+    w[i] = MIN(x[i]-x[i-1], x[i+1]-x[i]);

+  w[ndim-1] = MIN(x[ndim-1]-x[ndim-2], M_PI-x[ndim-1]);

+  /*

+  for (i=0;i<ndim;i++)

+    w[i] = 1./(.003+w[i]);

+  w[0]*=3;

+  w[1]*=2;*/

+  compute_weights(x, w, ndim);

+  for (i=0;i<ndim;i++)

+    err[i] = x[i]-COEF*xq[i];

+  n1 = find_nearest(codebook1, nb_entries, err, ndim, NULL);

+  for (i=0;i<ndim;i++)

+  {

+    xq[i] = COEF*xq[i] + codebook1[ndim*n1+i];

+    err[i] -= codebook1[ndim*n1+i];

+  }

+  for (i=0;i<ndim/2;i++)

+  {

+    err2[i] = err[2*i];

+    err3[i] = err[2*i+1];

+    w2[i] = w[2*i];

+    w3[i] = w[2*i+1];

+  }

+  n2 = find_nearest_weighted(codebook2, nb_entries, err2, w2, ndim/2);

+  n3 = find_nearest_weighted(codebook3, nb_entries, err3, w3, ndim/2);

+  for (i=0;i<ndim/2;i++)

+  {

+    xq[2*i] += codebook2[ndim*n2/2+i];

+    xq[2*i+1] += codebook3[ndim*n3/2+i];

+  }

+  return 0;

+}

+void split(float *codebook, int nb_entries, int ndim)

+{

+  int i,j;

+  for (i=0;i<nb_entries;i++)

+  {

+    for (j=0;j<ndim;j++)

+    {

+      float delta = .01*(rand()/(float)RAND_MAX-.5);

+      codebook[i*ndim+j] += delta;

+      codebook[(i+nb_entries)*ndim+j] = codebook[i*ndim+j] - delta;

+    }

+  }

+}

+void split1(float *codebook, int nb_entries, const float *data, int nb_vectors, int ndim)

+{

+  int i,j;

+  int nearest[nb_vectors];

+  float dist[nb_entries];

+  int count[nb_entries];

+  int worst;

+  for (i=0;i<nb_entries;i++)

+    dist[i] = 0;

+  for (i=0;i<nb_entries;i++)

+    count[i]=0;

+  for (i=0;i<nb_vectors;i++)

+  {

+    float d;

+    nearest[i] = find_nearest(codebook, nb_entries, data+i*ndim, ndim, &d);

+    dist[nearest[i]] += d;

+    count[nearest[i]]++;

+  }

+  worst=0;

+  for (i=1;i<nb_entries;i++)

+  {

+    if (dist[i] > dist[worst])

+      worst=i;

+  }

+  for (j=0;j<ndim;j++)

+  {

+    float delta = .001*(rand()/(float)RAND_MAX-.5);

+    codebook[worst*ndim+j] += delta;

+    codebook[nb_entries*ndim+j] = codebook[worst*ndim+j] - delta;

+  }

+}

+void update(float *data, int nb_vectors, float *codebook, int nb_entries, int ndim)

+{

+  int i,j;

+  int count[nb_entries];

+  int nearest[nb_vectors];

+  double err=0;

+  for (i=0;i<nb_entries;i++)

+    count[i] = 0;

+  for (i=0;i<nb_vectors;i++)

+  {

+    float dist;

+    nearest[i] = find_nearest(codebook, nb_entries, data+i*ndim, ndim, &dist);

+    err += dist;

+  }

+  printf("RMS error = %f\n", sqrt(err/nb_vectors/ndim));

+  for (i=0;i<nb_entries*ndim;i++)

+    codebook[i] = 0;

+  for (i=0;i<nb_vectors;i++)

+  {

+    int n = nearest[i];

+    count[n]++;

+    for (j=0;j<ndim;j++)

+      codebook[n*ndim+j] += data[i*ndim+j];

+  }

+  float w2=0;

+  for (i=0;i<nb_entries;i++)

+  {

+    for (j=0;j<ndim;j++)

+      codebook[i*ndim+j] *= (1./count[i]);

+    w2 += (count[i]/(float)nb_vectors)*(count[i]/(float)nb_vectors);

+  }

+  //fprintf(stderr, "%f / %d\n", 1./w2, nb_entries);

+}

+void update_weighted(float *data, float *weight, int nb_vectors, float *codebook, int nb_entries, int ndim)

+{

+  int i,j;

+  float count[MAX_ENTRIES][ndim];

+  int nearest[nb_vectors];

+  for (i=0;i<nb_entries;i++)

+    for (j=0;j<ndim;j++)

+      count[i][j] = 0;

+  for (i=0;i<nb_vectors;i++)

+  {

+    nearest[i] = find_nearest_weighted(codebook, nb_entries, data+i*ndim, weight+i*ndim, ndim);

+  }

+  for (i=0;i<nb_entries*ndim;i++)

+    codebook[i] = 0;

+  for (i=0;i<nb_vectors;i++)

+  {

+    int n = nearest[i];

+    for (j=0;j<ndim;j++)

+    {

+      float w = sqrt(weight[i*ndim+j]);

+      count[n][j]+=w;

+      codebook[n*ndim+j] += w*data[i*ndim+j];

+    }

+  }

+  //float w2=0;

+  for (i=0;i<nb_entries;i++)

+  {

+    for (j=0;j<ndim;j++)

+      codebook[i*ndim+j] *= (1./count[i][j]);

+    //w2 += (count[i]/(float)nb_vectors)*(count[i]/(float)nb_vectors);

+  }

+  //fprintf(stderr, "%f / %d\n", 1./w2, nb_entries);

+}

+void vq_train(float *data, int nb_vectors, float *codebook, int nb_entries, int ndim)

+{

+  int i, j, e;

+  e = 1;

+  for (j=0;j<ndim;j++)

+    codebook[j] = 0;

+  for (i=0;i<nb_vectors;i++)

+    for (j=0;j<ndim;j++)

+      codebook[j] += data[i*ndim+j];

+  for (j=0;j<ndim;j++)

+    codebook[j] *= (1./nb_vectors);

+  while (e< nb_entries)

+  {

+#if 1

+    split(codebook, e, ndim);

+    e<<=1;

+#else

+    split1(codebook, e, data, nb_vectors, ndim);

+    e++;

+#endif

+    fprintf(stderr, "%d\n", e);

+    for (j=0;j<4;j++)

+      update(data, nb_vectors, codebook, e, ndim);

+  }

+  for (j=0;j<ndim*2;j++)

+    update(data, nb_vectors, codebook, e, ndim);

+}

+void vq_train_weighted(float *data, float *weight, int nb_vectors, float *codebook, int nb_entries, int ndim)

+{

+  int i, j, e;

+  e = 1;

+  for (j=0;j<ndim;j++)

+    codebook[j] = 0;

+  for (i=0;i<nb_vectors;i++)

+    for (j=0;j<ndim;j++)

+      codebook[j] += data[i*ndim+j];

+  for (j=0;j<ndim;j++)

+    codebook[j] *= (1./nb_vectors);

+  while (e< nb_entries)

+  {

+#if 0

+    split(codebook, e, ndim);

+    e<<=1;

+#else

+    split1(codebook, e, data, nb_vectors, ndim);

+    e++;

+#endif

+    fprintf(stderr, "%d\n", e);

+    for (j=0;j<ndim;j++)

+      update_weighted(data, weight, nb_vectors, codebook, e, ndim);

+  }

+}

+int main(int argc, char **argv)

+{

+  int i,j;

+  int nb_vectors, nb_entries, ndim, ndim0, total_dim;

+  float *data, *pred, *codebook, *codebook2;

+  float *delta;

+  double err;

+  FILE *fout;

+  ndim = atoi(argv[1]);

+  ndim0 = ndim-1;

+  total_dim = atoi(argv[2]);

+  nb_vectors = atoi(argv[3]);

+  nb_entries = 1<<atoi(argv[4]);

+  data = malloc((nb_vectors*ndim+total_dim)*sizeof(*data));

+  pred = malloc(nb_vectors*ndim0*sizeof(*pred));

+  codebook = malloc(nb_entries*ndim0*sizeof(*codebook));

+  codebook2 = malloc(nb_entries*ndim0*sizeof(*codebook2));

+  for (i=0;i<nb_vectors;i++)

+  {

+    fread(&data[i*ndim], sizeof(float), total_dim, stdin);

+    if (feof(stdin))

+      break;

+  }

+  nb_vectors = i;

+  VALGRIND_CHECK_MEM_IS_DEFINED(data, nb_entries*ndim);

+  for (i=0;i<4;i++)

+  {

+    for (j=0;j<ndim0;j++)

+      pred[i*ndim0+j] = 0;

+  }

+  for (i=4;i<nb_vectors;i++)

+  {

+    for (j=0;j<ndim0;j++)

+      pred[i*ndim0+j] = data[i*ndim+j+1] - COEF*data[(i-4)*ndim+j+1];

+  }

+  VALGRIND_CHECK_MEM_IS_DEFINED(pred, nb_entries*ndim0);

+  vq_train(pred, nb_vectors, codebook, nb_entries, ndim0);

+  delta = malloc(nb_vectors*ndim0*sizeof(*data));

+  err = 0;

+  for (i=0;i<nb_vectors;i++)

+  {

+    int nearest = find_nearest(codebook, nb_entries, &pred[i*ndim0], ndim0, NULL);

+    for (j=0;j<ndim0;j++)

+    {

+      delta[i*ndim0+j] = pred[i*ndim0+j] - codebook[nearest*ndim0+j];

+      err += delta[i*ndim0+j]*delta[i*ndim0+j];

+    }

+    //printf("\n");

+  }

+  fprintf(stderr, "Cepstrum RMS error: %f\n", sqrt(err/nb_vectors/ndim));

+  vq_train(delta, nb_vectors, codebook2, nb_entries, ndim0);

+  err=0;

+  for (i=0;i<nb_vectors;i++)

+  {

+    int n1;

+    n1 = find_nearest(codebook2, nb_entries, &delta[i*ndim0], ndim0, NULL);

+    for (j=0;j<ndim0;j++)

+    {

+      delta[i*ndim0+j] = delta[i*ndim0+j] - codebook2[n1*ndim0+j];

+      err += delta[i*ndim0+j]*delta[i*ndim0+j];

+    }

+  }

+  fprintf(stderr, "Cepstrum RMS error after stage 2: %f)\n", sqrt(err/nb_vectors/ndim));

+  fout = fopen("ceps_codebooks.c", "w");

+  fprintf(fout, "/* This file is automatically generated */\n\n");

+  fprintf(fout, "float ceps_codebook1[%d*%d] = {\n",nb_entries, ndim0);

+  for (i=0;i<nb_entries;i++)

+  {

+    for (j=0;j<ndim0;j++)

+      fprintf(fout, "%f, ", codebook[i*ndim0+j]);

+    fprintf(fout, "\n");

+  }

+  fprintf(fout, "};\n\n");

+  fprintf(fout, "float ceps_codebook2[%d*%d] = {\n",nb_entries, ndim0);

+  for (i=0;i<nb_entries;i++)

+  {

+    for (j=0;j<ndim0;j++)

+      fprintf(fout, "%f, ", codebook2[i*ndim0+j]);

+    fprintf(fout, "\n");

+  }

+  fprintf(fout, "};\n\n");

+  fclose(fout);

+  return 0;

+}

--- a/dnn/dump_data.c

+++ b/dnn/dump_data.c

@@ -51,7 +51,67 @@

 #define NB_FEATURES (2*NB_BANDS+3+LPC_ORDER)

+#include "ceps_codebooks.c"

+int vq_quantize(const float *codebook, int nb_entries, const float *x, int ndim, float *dist)

+{

+  int i, j;

+  float min_dist = 1e15;

+  int nearest = 0;

+  for (i=0;i<nb_entries;i++)

+  {

+    float dist=0;

+    for (j=0;j<ndim;j++)

+      dist += (x[j]-codebook[i*ndim+j])*(x[j]-codebook[i*ndim+j]);

+    if (dist<min_dist)

+    {

+      min_dist = dist;

+      nearest = i;

+    }

+  }

+  if (dist)

+    *dist = min_dist;

+  return nearest;

+}

+#define NB_BANDS_1 (NB_BANDS - 1)

+float vq_mem[NB_BANDS_1];

+int quantize(float *x, float *mem)

+{

+    int i;

+    int id, id2;

+    float ref[NB_BANDS_1];

+    RNN_COPY(ref, x, NB_BANDS_1);

+    for (i=0;i<NB_BANDS_1;i++) {

+        x[i] -= 0.0f*mem[i];

+    }

+    id = vq_quantize(ceps_codebook1, 1024, x, NB_BANDS_1, NULL);

+    for (i=0;i<NB_BANDS_1;i++) {

+        x[i] -= ceps_codebook1[id*NB_BANDS_1 + i];

+    }

+    id2 = vq_quantize(ceps_codebook2, 1024, x, NB_BANDS_1, NULL);

+    for (i=0;i<NB_BANDS_1;i++) {

+        x[i] = ceps_codebook2[id2*NB_BANDS_1 + i];

+    }

+    for (i=0;i<NB_BANDS_1;i++) {

+        x[i] += ceps_codebook1[id*NB_BANDS_1 + i];

+    }

+    for (i=0;i<NB_BANDS_1;i++) {

+        x[i] += 0.0f*mem[i];

+        mem[i] = x[i];

+    }

+    if (0) {

+        float err = 0;

+        for (i=0;i<NB_BANDS_1;i++) {

+            err += (x[i]-ref[i])*(x[i]-ref[i]);

+        }

+        printf("%f\n", sqrt(err/NB_BANDS_1));

+    }

+    return id;

+}

 typedef struct {

   float analysis_mem[OVERLAP_SIZE];

   float cepstral_mem[CEPS_MEM][NB_BANDS];

@@ -140,6 +200,7 @@

     E += Ex[i];

   dct(st->features[st->pcount], Ly);

+  quantize(&st->features[st->pcount][1], vq_mem);

   st->features[st->pcount][0] -= 4;

   g = lpc_from_cepstrum(st->lpc, st->features[st->pcount]);

   st->features[st->pcount][2*NB_BANDS+2] = log10(g);

--- a/dnn/lpcnet.py

+++ b/dnn/lpcnet.py

@@ -153,6 +153,11 @@

     gru_out2, _ = rnn2(Concatenate()([gru_out1, rep(cfeat)]))

     ulaw_prob = md(gru_out2)

+    rnn.trainable=False

+    rnn2.trainable=False

+    md.trainable=False

+    embed.Trainable=False

     model = Model([pcm, feat, pitch], ulaw_prob)

     model.rnn_units1 = rnn_units1

     model.rnn_units2 = rnn_units2

--- a/dnn/train_lpcnet.py

+++ b/dnn/train_lpcnet.py

@@ -103,8 +103,8 @@

 del in_exc

 # dump models to disk as we go

-checkpoint = ModelCheckpoint('lpcnet24b_384_10_G16_{epoch:02d}.h5')

+checkpoint = ModelCheckpoint('lpcnet24fq_384_10_G16_{epoch:02d}.h5')

-#model.load_weights('lpcnet9b_384_10_G16_01.h5')

-model.compile(optimizer=Adam(0.001, amsgrad=True, decay=5e-5), loss='sparse_categorical_crossentropy')

-model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, lpcnet.Sparsify(2000, 40000, 400, (0.05, 0.05, 0.2))])

+model.load_weights('lpcnet24f_384_10_G16_31.h5')

+model.compile(optimizer=Adam(0.0005, amsgrad=True, decay=5e-5), loss='sparse_categorical_crossentropy')

+model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, lpcnet.Sparsify(0, 0, 1, (0.05, 0.05, 0.2))])

--

⑨