ref: 6bc1cdd62c79c27c771421dbbf7b8fe19d733a42
parent: 74ee76b6cdc98826b2a31475625ad21d36989801
parent: c109948f2e47fe1a6e18dcff98f4ec1559364ce8
author: Kevin Lenzo <66268906+lenzo-duo@users.noreply.github.com>
date: Fri Aug 14 14:42:26 EDT 2020
Merge pull request #14 from earboxer/pitch * SSML: Ability to pass pitch and range prosody attributes
--- a/src/cg/cst_cg.c
+++ b/src/cg/cst_cg.c
@@ -445,15 +445,15 @@
/* Smooth F0 and mark unvoice frames as 0.0 */
cst_item *mcep;
int i;
- float mean, stddev;
+ float base_mean, base_stddev;
/* cg_smooth_F0_naive(param_track); */
cg_F0_interpolate_spline(utt,param_track);
- mean = get_param_float(utt->features,"int_f0_target_mean", cg_db->f0_mean);
- mean *= get_param_float(utt->features,"f0_shift", 1.0);
- stddev =
+ base_mean = get_param_float(utt->features,"int_f0_target_mean", cg_db->f0_mean);
+ base_mean *= get_param_float(utt->features,"f0_shift", 1.0);
+ base_stddev =
get_param_float(utt->features,"int_f0_target_stddev", cg_db->f0_stddev);
#if 0
FILE *ftt; int ii;
@@ -469,6 +469,25 @@
{
if (voiced_frame(mcep))
{
+ float mean = base_mean;
+ float stddev = base_stddev;
+ float local_f0_mean =
+ ffeature_float(mcep,
+ "R:mcep_link.parent.R:segstate.parent.R:SylStructure.parent.parent.R:Token.parent.local_f0_mean"
+ );
+ if (local_f0_mean != 0.0)
+ {
+ mean = local_f0_mean;
+ }
+ float local_f0_range =
+ ffeature_float(mcep,
+ "R:mcep_link.parent.R:segstate.parent.R:SylStructure.parent.parent.R:Token.parent.local_f0_range"
+ );
+ if (local_f0_range > 0.0)
+ {
+ /* feature_float returns 0 by default, shifted to allow 0 to be passed. */
+ stddev = local_f0_range - 1.0;
+ }
/* scale the F0 -- which normally wont change it at all */
param_track->frames[i][0] =
(((param_track->frames[i][0]-cg_db->f0_mean)/cg_db->f0_stddev)
--- a/src/synth/cst_ssml.c
+++ b/src/synth/cst_ssml.c
@@ -93,14 +93,22 @@
while (!cst_streq(">",name))
{
/* I want names and values to be const */
- if (i == 0)
+ fnn = "_name0";
+ vnn = "_val0";
+ // Tags with more than one attribute need to have additional
+ // attributes defined here.
+ if (cst_streq("volume", name))
{
- fnn="_name0"; vnn="_val0";
+ fnn = "_name1"; vnn = "_val1";
}
- else
+ else if (cst_streq("pitch", name))
{
- fnn="_name1"; vnn="_val1";
+ fnn = "_name2"; vnn = "_val2";
}
+ else if (cst_streq("range", name))
+ {
+ fnn = "_name3"; vnn = "_val3";
+ }
if (cst_streq(name,"/"))
feat_set_string(a,"_type","startend");
else
@@ -200,20 +208,29 @@
if (cst_streq("rate",get_param_string(attributes,"_name0","")))
feat_set_float(word_feats,"local_duration_stretch",
1.0/feat_float(attributes,"_val0"));
- if (cst_streq("rate",get_param_string(attributes,"_name1","")))
- feat_set_float(word_feats,"local_duration_stretch",
- 1.0/feat_float(attributes,"_val1"));
- if (cst_streq("volume",get_param_string(attributes,"_name0","")))
- feat_set_float(word_feats,"local_gain",
- feat_float(attributes,"_val0")/100.0);
+ // volume is stored in _name1
if (cst_streq("volume",get_param_string(attributes,"_name1","")))
feat_set_float(word_feats,"local_gain",
feat_float(attributes,"_val1")/100.0);
+ // pitch is stored in _name2
+ if (cst_streq("pitch", get_param_string(attributes, "_name2", "")))
+ {
+ feat_set_float(word_feats, "local_f0_mean", feat_float(attributes, "_val2"));
+ }
+ // range is stored in _name3
+ if (cst_streq("range", get_param_string(attributes, "_name3", "")))
+ {
+ feat_set_float(word_feats, "local_f0_range",
+ // shift by + 1.0 to allow 0.0 to be passed.
+ feat_float(attributes, "_val3") + 1.0);
+ }
}
else if (cst_streq("end",feat_string(attributes,"_type")))
{
feat_remove(word_feats,"local_duration_stretch");
feat_remove(word_feats,"local_gain");
+ feat_remove(word_feats, "local_f0_mean");
+ feat_remove(word_feats, "local_f0_range");
}
}