ref: f3eb6164551b17bc06e740d23e0821f77300e603
parent: 9756feefbd3ac2e4f9d5beb7d82e3632378d10aa
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Fri Oct 12 23:41:17 EDT 2018
Cleaning up the synthesis code Remove all kinds of useless code. Making it use all features continuously and fixing a bug that skipped one every 15 frames.
--- a/dnn/test_wavenet_audio.py
+++ b/dnn/test_wavenet_audio.py
@@ -25,38 +25,21 @@
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
#model.summary()
-pcmfile = sys.argv[1]
-feature_file = sys.argv[2]
+feature_file = sys.argv[1]
frame_size = 160
nb_features = 55
nb_used_features = lpcnet.nb_used_features
-feature_chunk_size = 15
-pcm_chunk_size = frame_size*feature_chunk_size
-data = np.fromfile(pcmfile, dtype='int16')
-data = lin2ulaw(data)
-nb_frames = len(data)//pcm_chunk_size
-
features = np.fromfile(feature_file, dtype='float32')
+features = np.resize(features, (-1, nb_features))
+nb_frames = 1
+feature_chunk_size = features.shape[0]
+pcm_chunk_size = frame_size*feature_chunk_size
-data = data[:nb_frames*pcm_chunk_size]
-features = features[:nb_frames*feature_chunk_size*nb_features]
-
-in_data = np.concatenate([data[0:1], data[:-1]]);
-
-features = np.reshape(features, (nb_frames*feature_chunk_size, nb_features))
-
-in_data = np.reshape(in_data, (nb_frames, pcm_chunk_size, 1))
-in_data = in_data.astype('uint8')-out_data = np.reshape(data, (nb_frames, pcm_chunk_size, 1))
-out_data = out_data.astype('uint8')features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features))
-features = features[:, :, :]
periods = (50*features[:,:,36:37]+100).astype('int16')-in_data = np.reshape(in_data, (nb_frames*pcm_chunk_size, 1))
-out_data = np.reshape(data, (nb_frames*pcm_chunk_size, 1))
model.load_weights('wavenet5e3_60.h5')@@ -63,24 +46,23 @@
order = 16
-pcm = 0.*out_data
+pcm = np.zeros((nb_frames*pcm_chunk_size, ))
fexc = np.zeros((1, 1, 2), dtype='float32')
iexc = np.zeros((1, 1, 1), dtype='int16')
state1 = np.zeros((1, lpcnet.rnn_units1), dtype='float32')
state2 = np.zeros((1, lpcnet.rnn_units2), dtype='float32')
-for c in range(1, nb_frames):
+
+mem = 0
+coef = 0.85
+
+skip = order + 1
+for c in range(0, nb_frames):
cfeat = enc.predict([features[c:c+1, :, :nb_used_features], periods[c:c+1, :, :]])
- for fr in range(1, feature_chunk_size):
+ for fr in range(0, feature_chunk_size):
f = c*feature_chunk_size + fr
a = features[c, fr, nb_features-order:]
-
- #print(a)
- gain = 1.;
- period = int(50*features[c, fr, 36]+100)
- period = period - 4
- for i in range(frame_size):
- #fexc[0, 0, 0] = iexc + 128
- pred = -sum(a*pcm[f*frame_size + i - 1:f*frame_size + i - order-1:-1, 0])
+ for i in range(skip, frame_size):
+ pred = -sum(a*pcm[f*frame_size + i - 1:f*frame_size + i - order-1:-1])
fexc[0, 0, 1] = lin2ulaw(pred)
p, state1, state2 = dec.predict([fexc, iexc, cfeat[:, fr:fr+1, :], state1, state2])
@@ -90,8 +72,10 @@
p = p/(1e-8 + np.sum(p))
iexc[0, 0, 0] = np.argmax(np.random.multinomial(1, p[0,0,:], 1))
- pcm[f*frame_size + i, 0] = pred + ulaw2lin(iexc[0, 0, 0])
- fexc[0, 0, 0] = lin2ulaw(pcm[f*frame_size + i, 0])
- print(iexc[0, 0, 0], ulaw2lin(out_data[f*frame_size + i, 0]), pcm[f*frame_size + i, 0], pred)
+ pcm[f*frame_size + i] = pred + ulaw2lin(iexc[0, 0, 0])
+ fexc[0, 0, 0] = lin2ulaw(pcm[f*frame_size + i])
+ mem = coef*mem + pcm[f*frame_size + i]
+ print(mem)
+ skip = 0
--
⑨