ref: d75b51b18a1b62d9a8ecd135a8fdad16f91494d4
parent: c74876bbc699f57ef921b1fdc6467343955e9b1d
author: Jean-Marc Valin <jmvalin@jmvalin.ca>
date: Sat Oct 13 10:52:30 EDT 2018
Reduce sampling temperature for voiced frames
--- a/dnn/test_wavenet_audio.py
+++ b/dnn/test_wavenet_audio.py
@@ -66,9 +66,11 @@
fexc[0, 0, 1] = lin2ulaw(pred)
p, state1, state2 = dec.predict([fexc, iexc, cfeat[:, fr:fr+1, :], state1, state2])
- #p = p*p
- #p = p/(1e-18 + np.sum(p))
- p = np.maximum(p-0.001, 0).astype('float64')+ #Lower the temperature for voiced frames to reduce noisiness
+ p *= np.power(p, np.maximum(0, 1.5*features[c, fr, 37] - .5))
+ p = p/(1e-18 + np.sum(p))
+ #Cut off the tail of the remaining distribution
+ p = np.maximum(p-0.0005, 0).astype('float64')p = p/(1e-8 + np.sum(p))
iexc[0, 0, 0] = np.argmax(np.random.multinomial(1, p[0,0,:], 1))
--
⑨