ref: 5d8a1313d621491a2bd6743740ef5385d9a6d89d
parent: 638252a965076cb6f63cd4b35f6676f8e2156974
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Tue Jul 10 21:30:30 EDT 2018
decodes something...
--- a/dnn/lpcnet.py
+++ b/dnn/lpcnet.py
@@ -9,7 +9,7 @@
import h5py
import sys
-rnn_units=64
+rnn_units=512
pcm_bits = 8
pcm_levels = 2**pcm_bits
nb_used_features = 37
@@ -20,6 +20,7 @@
pitch = Input(shape=(None, 1))
feat = Input(shape=(None, nb_used_features))
dec_feat = Input(shape=(None, 32))
+ dec_state = Input(shape=(rnn_units,))
conv1 = Conv1D(16, 7, padding='causal')
pconv1 = Conv1D(16, 5, padding='same')
@@ -48,8 +49,8 @@
encoder = Model(feat, cfeat)
dec_rnn_in = Concatenate()([cpcm, cpitch, dec_feat])
- dec_gru_out, state = rnn(dec_rnn_in)
+ dec_gru_out, state = rnn(dec_rnn_in, initial_state=dec_state)
dec_ulaw_prob = md(dec_gru_out)
- decoder = Model([pcm, pitch, dec_feat], [dec_ulaw_prob, state])
+ decoder = Model([pcm, pitch, dec_feat, dec_state], [dec_ulaw_prob, state])
return model, encoder, decoder
--- a/dnn/test_lpcnet.py
+++ b/dnn/test_lpcnet.py
@@ -21,7 +21,7 @@
model, enc, dec = lpcnet.new_wavernn_model()
model.compile(optimizer=Adadiff(), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
-model.summary()
+#model.summary()
pcmfile = sys.argv[1]
feature_file = sys.argv[2]
@@ -47,14 +47,15 @@
out_data = np.reshape(data, (nb_frames*pcm_chunk_size, 1))
-model.load_weights('lpcnet1h_30.h5')+model.load_weights('lpcnet1i_30.h5')order = 16
pcm = 0.*out_data
-exc = 0.*out_data
+exc = out_data-0
pitch = np.zeros((1, 1, 1), dtype='float32')
-iexc = np.zeros((1, 1, 1), dtype='float32')
+fexc = np.zeros((1, 1, 1), dtype='float32')
+iexc = np.zeros((1, 1, 1), dtype='int16')
state = np.zeros((1, lpcnet.rnn_units), dtype='float32')
for c in range(1, nb_frames):
cfeat = enc.predict(features[c:c+1, :, :nb_used_features])
@@ -68,7 +69,14 @@
period = period - 4
for i in range(frame_size):
pitch[0, 0, 0] = exc[f*frame_size + i - period, 0]
- #p, state = dec.predict([
- pcm[f*frame_size + i, 0] = gain*out_data[f*frame_size + i, 0] - sum(a*pcm[f*frame_size + i - 1:f*frame_size + i - order-1:-1, 0])
- print(pcm[f*frame_size + i, 0])
+ fexc[0, 0, 0] = exc[f*frame_size + i - 1]
+ #print(cfeat.shape)
+ p, state = dec.predict([fexc, pitch, cfeat[:, fr:fr+1, :], state])
+ p = p/(1e-5 + np.sum(p))
+ #print(np.sum(p))
+ iexc[0, 0, 0] = np.argmax(np.random.multinomial(1, p[0,0,:], 1))-128
+ exc[f*frame_size + i] = iexc[0, 0, 0]/16.
+ #out_data[f*frame_size + i, 0] = iexc[0, 0, 0]
+ pcm[f*frame_size + i, 0] = gain*iexc[0, 0, 0] - sum(a*pcm[f*frame_size + i - 1:f*frame_size + i - order-1:-1, 0])
+ print(iexc[0, 0, 0], out_data[f*frame_size + i, 0], pcm[f*frame_size + i, 0])
--
⑨