ref: 87cd75f6f4a5ad6c113f4c58f91acfc0e3369009
parent: 785a2b2e84f955dd3013536b35529bb9fa1afb6b
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Thu Aug 16 18:43:13 EDT 2018
Training seems to work
--- a/dnn/lpcnet.py
+++ b/dnn/lpcnet.py
@@ -41,7 +41,7 @@
}
def new_wavernn_model():
- pcm = Input(shape=(None, 1))
+ pcm = Input(shape=(None, 2))
pitch = Input(shape=(None, 1))
feat = Input(shape=(None, nb_used_features))
dec_feat = Input(shape=(None, 32))
@@ -61,7 +61,7 @@
cpitch = pitch
embed = Embedding(256, 128, embeddings_initializer=PCMInit())
- cpcm = Reshape((-1, 128))(embed(pcm))
+ cpcm = Reshape((-1, 128*2))(embed(pcm))
cfeat = fconv2(fconv1(feat))
--- a/dnn/train_wavenet_audio.py
+++ b/dnn/train_wavenet_audio.py
@@ -51,8 +51,18 @@
pred = np.fromfile(pred_file, dtype='int16')
pred = pred[:nb_frames*pcm_chunk_size]
+
+pred_in = 32768.*ulaw2lin(data)
+for i in range(2, nb_frames*feature_chunk_size):
+ pred[i*frame_size:(i+1)*frame_size] = 0
+ if i % 100000 == 0:
+ print(i)
+ for k in range(16):
+ pred[i*frame_size:(i+1)*frame_size] = pred[i*frame_size:(i+1)*frame_size] - \
+ pred_in[i*frame_size-k-1:(i+1)*frame_size-k-1]*features[i, nb_features-16+k]
+
pred = np.minimum(127, lin2ulaw(pred/32768.))
-pred = pred + np.random.randint(-1, 1, len(data))
+#pred = pred + np.random.randint(-1, 1, len(data))
pitch = 1.*data
@@ -72,7 +82,7 @@
pred = np.reshape(pred, (nb_frames, pcm_chunk_size, 1))
pred = (pred.astype('int16')+128).astype('uint8')-#in_data = np.concatenate([in_data, pred], axis=-1)
+in_data = np.concatenate([in_data, pred], axis=-1)
#in_data = np.concatenate([in_data, in_pitch], axis=-1)
--
⑨