ref: 8061b6e9a0021f9cbc7733b7d704edeebff0f749
dir: /src/cg/cst_cg.c/
/*************************************************************************/ /* */ /* Language Technologies Institute */ /* Carnegie Mellon University */ /* Copyright (c) 2007-2017 */ /* All Rights Reserved. */ /* */ /* Permission is hereby granted, free of charge, to use and distribute */ /* this software and its documentation without restriction, including */ /* without limitation the rights to use, copy, modify, merge, publish, */ /* distribute, sublicense, and/or sell copies of this work, and to */ /* permit persons to whom this work is furnished to do so, subject to */ /* the following conditions: */ /* 1. The code must retain the above copyright notice, this list of */ /* conditions and the following disclaimer. */ /* 2. Any modifications must be clearly marked as such. */ /* 3. Original authors' names are not deleted. */ /* 4. The authors' names are not used to endorse or promote products */ /* derived from this software without specific prior written */ /* permission. */ /* */ /* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */ /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */ /* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */ /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ /* THIS SOFTWARE. */ /* */ /*************************************************************************/ /* Authors: Alan W Black (awb@cs.cmu.edu) */ /* Date: November 2007 */ /*************************************************************************/ /* */ /* Implementation of Clustergen, Statistical Parameter Synthesizer in */ /* Flite */ /* */ /* A statistical corpus based synthesizer. */ /* See Black, A. (2006), CLUSTERGEN: A Statistical Parametric */ /* Synthesizer using Trajectory Modeling", Interspeech 2006 - ICSLP, */ /* Pittsburgh, PA. */ /* http://www.cs.cmu.edu/~awb/papers/is2006/IS061394.PDF */ /* */ /* Uses MLSA for resynthesis and MLPG for smoothing */ /* mlsa and mlpg come from Festvox's VC code (which came in turn */ /* came from NITECH's HTS */ /* */ /*************************************************************************/ #include "cst_cg.h" #include "cst_spamf0.h" #include "cst_hrg.h" #include "cst_utt_utils.h" #include "cst_audio.h" CST_VAL_REGISTER_TYPE(cg_db,cst_cg_db) static cst_utterance *cg_make_hmmstates(cst_utterance *utt); static cst_utterance *cg_make_params(cst_utterance *utt); static cst_utterance *cg_predict_params(cst_utterance *utt); static cst_utterance *cg_resynth(cst_utterance *utt); void delete_cg_db(cst_cg_db *db) { int i,j; if (db->freeable == 0) return; /* its in the data segment, so not freeable */ /* Woo Hoo! We're gonna free this garbage with a big mallet */ /* In spite of what the const qualifiers say ... */ cst_free((void *)db->name); for (i=0; db->types && db->types[i]; i++) cst_free((void *)db->types[i]); cst_free((void *)db->types); for (j=0; j<db->num_f0_models; j++) { for (i=0; db->f0_trees[j] && db->f0_trees[j][i]; i++) delete_cart((cst_cart *)(void *)db->f0_trees[j][i]); cst_free((void *)db->f0_trees[j]); } cst_free((void *)db->f0_trees); for (j=0; j<db->num_param_models; j++) { for (i=0; db->param_trees[j] && db->param_trees[j][i]; i++) delete_cart((cst_cart *)(void *)db->param_trees[j][i]); cst_free((void *)db->param_trees[j]); } cst_free((void *)db->param_trees); if (db->spamf0) { delete_cart((cst_cart *)(void *)db->spamf0_accent_tree); delete_cart((cst_cart *)(void *)db->spamf0_phrase_tree); for (i=0; i< db->num_frames_spamf0_accent; i++) cst_free((void *)db->spamf0_accent_vectors[i]); cst_free((void *)db->spamf0_accent_vectors); } for (j=0; j<db->num_param_models; j++) { for (i=0; i<db->num_frames[j]; i++) cst_free((void *)db->model_vectors[j][i]); cst_free((void *)db->model_vectors[j]); } cst_free((void *)db->model_min); cst_free((void *)db->model_range); if (db->model_shape != CST_CG_MODEL_SHAPE_BASE_MINRANGE) { for (j = 0; j<db->num_param_models; j++) { for (i=0; i<db->num_channels[j]; i++) cst_free((void *)db->qtable[j][i]); cst_free((void *)db->qtable[j]); } } cst_free((void *)db->qtable); /* Moved to here so they can be used for the model_shape freeing */ cst_free(db->num_channels); cst_free(db->num_frames); cst_free((void *)db->model_vectors); for (j = 0; j<db->num_dur_models; j++) { for (i=0; db->dur_stats[j] && db->dur_stats[j][i]; i++) { cst_free((void *)db->dur_stats[j][i]->phone); cst_free((void *)db->dur_stats[j][i]); } cst_free((void *)db->dur_stats[j]); delete_cart((cst_cart *)(void *)db->dur_cart[j]); } cst_free((void *)db->dur_stats); cst_free((void *)db->dur_cart); for (i=0; db->phone_states && db->phone_states[i]; i++) { for (j=0; db->phone_states[i][j]; j++) cst_free((void *)db->phone_states[i][j]); cst_free((void *)db->phone_states[i]); } cst_free((void *)db->phone_states); cst_free((void *)db->dynwin); for (i=0; i<db->ME_num; i++) cst_free((void *)db->me_h[i]); cst_free((void *)db->me_h); cst_free((void *)db); } /* */ cst_utterance *cg_synth(cst_utterance *utt) { cst_cg_db *cg_db; cg_db = val_cg_db(utt_feat_val(utt,"cg_db")); cg_make_hmmstates(utt); cg_make_params(utt); cg_predict_params(utt); if (cg_db->spamf0) { cst_spamf0(utt); } cg_resynth(utt); return utt; } static float cg_state_duration(cst_item *s, cst_cg_db *cg_db) { float zdur, dur; const char *n; int i, x, dm; for (dm=0,zdur=0.0; dm < cg_db->num_dur_models; dm++) zdur += val_float(cart_interpret(s,cg_db->dur_cart[dm])); zdur /= dm; /* get average zdur prediction from all dur models */ n = item_feat_string(s,"name"); /* Note we only use the dur stats from the first model, that is */ /* correct, but wouldn't be if the dur tree was trained on different */ /* data */ for (x=i=0; cg_db->dur_stats[0][i]; i++) { if (cst_streq(cg_db->dur_stats[0][i]->phone,n)) { x=i; break; } } if (!cg_db->dur_stats[0][i]) /* unknown type name */ x = 0; dur = (zdur*cg_db->dur_stats[0][x]->stddev)+cg_db->dur_stats[0][x]->mean; /* dur = 1.2 * (float)exp((float)dur); */ return dur; } static cst_utterance *cg_make_hmmstates(cst_utterance *utt) { /* Build HMM state structure below the segment structure */ cst_cg_db *cg_db; cst_relation *hmmstate, *segstate; cst_item *seg, *s, *ss; const char *segname; int sp,p; cg_db = val_cg_db(utt_feat_val(utt,"cg_db")); hmmstate = utt_relation_create(utt,"HMMstate"); segstate = utt_relation_create(utt,"segstate"); for (seg = utt_rel_head(utt,"Segment"); seg; seg=item_next(seg)) { ss = relation_append(segstate,seg); segname = item_feat_string(seg,"name"); for (p=0; cg_db->phone_states[p]; p++) if (cst_streq(segname,cg_db->phone_states[p][0])) break; if (cg_db->phone_states[p] == NULL) p = 0; /* unknown phoneme */ for (sp=1; cg_db->phone_states[p][sp]; sp++) { s = relation_append(hmmstate,NULL); item_add_daughter(ss,s); item_set_string(s,"name",cg_db->phone_states[p][sp]); item_set_int(s,"statepos",sp); } } return utt; } static cst_utterance *cg_make_params(cst_utterance *utt) { /* puts in the frame items */ /* historically called "mcep" but can actually be any random vectors */ cst_cg_db *cg_db; cst_relation *mcep, *mcep_link; cst_item *s, *mcep_parent, *mcep_frame; int num_frames; float start, end; float dur_stretch, tok_stretch, rdur; cg_db = val_cg_db(utt_feat_val(utt,"cg_db")); mcep = utt_relation_create(utt,"mcep"); mcep_link = utt_relation_create(utt,"mcep_link"); end = 0.0; num_frames = 0; dur_stretch = get_param_float(utt->features,"duration_stretch", 1.0); for (s = utt_rel_head(utt,"HMMstate"); s; s=item_next(s)) { start = end; tok_stretch = ffeature_float(s,"R:segstate.parent.R:SylStructure.parent.parent.R:Token.parent.local_duration_stretch"); if (tok_stretch == 0) tok_stretch = 1.0; rdur = tok_stretch*dur_stretch*cg_state_duration(s,cg_db); /* Guarantee duration to be alt least one frame */ if (rdur < cg_db->frame_advance) end = start + cg_db->frame_advance; else end = start + rdur; item_set_float(s,"end",end); mcep_parent = relation_append(mcep_link, s); for ( ; (num_frames * cg_db->frame_advance) <= end; num_frames++ ) { mcep_frame = relation_append(mcep,NULL); item_add_daughter(mcep_parent,mcep_frame); item_set_int(mcep_frame,"frame_number",num_frames); item_set(mcep_frame,"name",item_feat(mcep_parent,"name")); } } /* Copy duration up onto Segment relation */ for (s = utt_rel_head(utt,"Segment"); s; s=item_next(s)) item_set(s,"end",ffeature(s,"R:segstate.daughtern.end")); utt_set_feat_int(utt,"param_track_num_frames",num_frames); return utt; } #if CG_OLD static int voiced_frame(cst_item *m) { const char *ph_vc; const char *ph_cvox; ph_vc = ffeature_string(m,"R:mcep_link.parent.R:segstate.parent.ph_vc"); ph_cvox = ffeature_string(m,"R:mcep_link.parent.R:segstate.parent.ph_cvox"); if (cst_streq("-",ph_vc) && cst_streq("-",ph_cvox)) return 0; /* unvoiced */ else return 1; /* voiced */ } #endif static int voiced_frame(cst_item *m) { const char *ph_vc; const char *ph_name; ph_vc = ffeature_string(m,"R:mcep_link.parent.R:segstate.parent.ph_vc"); ph_name = ffeature_string(m,"R:mcep_link.parent.R:segstate.parent.name"); if (cst_streq(ph_name,"pau")) return 0; /* unvoiced */ else if (cst_streq("+",ph_vc)) return 1; /* voiced */ else if (item_feat_float(m,"voicing") > 0.5) /* Even though the range is 0-10, I *do* mean 0.5 */ return 1; /* voiced */ else return 0; /* unvoiced */ } static float catmull_rom_spline(float p,float p0,float p1,float p2,float p3) /* http://www.mvps.org/directx/articles/ */ { float q; q = ( 0.5 * ( ( 2.0 * p1 ) + ( p * (-p0 + p2) ) + ( (p*p) * (((2.0 * p0) - (5.0 * p1)) + ((4.0 * p2) - p3))) + ( (p*p*p) * (-p0 + ((3.0 * p1) - (3.0 * p2)) + p3)))); /* (set! q (* 0.5 (+ (* 2 p1) (* (+ (* -1 p0) p2) p) (* (+ (- (* 2 p0) (* 5 p1)) (- (* 4 p2) p3)) (* p p)) (* (+ (* -1 p0) (- (* 3 p1) (* 3 p2)) p3) (* p p p))))) */ return q; } static void cg_F0_interpolate_spline(cst_utterance *utt, cst_track *param_track) { float start_f0, mid_f0, end_f0; int start_index, end_index, mid_index; int nsi, nei, nmi; /* next syllable indices */ float nmid_f0, pmid_f0; cst_item *syl; int i; float m; start_f0 = mid_f0 = end_f0 = -1.0; for (syl=utt_rel_head(utt,"Syllable"); syl; syl=item_next(syl)) { start_index = ffeature_int(syl,"R:SylStructure.daughter1.R:segstate.daughter1.R:mcep_link.daughter1.frame_number"); end_index = ffeature_int(syl,"R:SylStructure.daughtern.R:segstate.daughtern.R:mcep_link.daughtern.frame_number"); mid_index = (int)((start_index + end_index)/2.0); if (end_index <= start_index) continue; start_f0 = param_track->frames[start_index][0]; if (end_f0 > 0.0) start_f0 = end_f0; /* not first time through */ if (mid_f0 < 0.0) pmid_f0 = start_f0; /* first time through */ else pmid_f0 = mid_f0; mid_f0 = param_track->frames[mid_index][0]; if (item_next(syl)) /* not last syllable */ end_f0 = (param_track->frames[end_index-1][0]+ param_track->frames[end_index][0])/2.0; else end_f0 = param_track->frames[end_index-1][0]; nmid_f0=end_f0; /* in case there is no next syl */ if (item_next(syl)) { nsi = ffeature_int(syl,"n.R:SylStructure.daughter1.R:segstate.daughter1.R:mcep_link.daughter1.frame_number"); nei = ffeature_int(syl,"n.R:SylStructure.daughtern.R:segstate.daughtern.R:mcep_link.daughtern.frame_number"); nmi = (int)((nsi + nei)/2.0); nmid_f0 = param_track->frames[nmi][0]; } /* start to mid syl */ m = 1.0 / (mid_index - start_index); for (i=0; ((start_index+i)<mid_index); i++) param_track->frames[start_index+i][0] = catmull_rom_spline(i*m,pmid_f0,start_f0,mid_f0,end_f0); /* mid syl to end */ m = 1.0 / (end_index - mid_index); for (i=0; ((mid_index+i)<end_index); i++) param_track->frames[mid_index+i][0] = catmull_rom_spline(i*m,start_f0,mid_f0,end_f0,nmid_f0); } return; } #if 0 static void cg_smooth_F0_naive(cst_track *param_track) { float l,s; int i,c; l = 0.0; for (i=0; i<param_track->num_frames-1; i++) { c = 0; s = 0; if (l > 0.0) { c++; s+=l; } if (param_track->frames[i+1][0] > 0.0) { c++; s+=param_track->frames[i+1][0]; } l = param_track->frames[i][0]; if (param_track->frames[i][0] > 0.0) { c++; s+=param_track->frames[i][0]; param_track->frames[i][0] = s/c; } } return; } #endif static void cg_smooth_F0(cst_utterance *utt, cst_cg_db *cg_db, cst_track *param_track) { /* Smooth F0 and mark unvoice frames as 0.0 */ cst_item *mcep; int i; float base_mean, base_stddev; /* cg_smooth_F0_naive(param_track); */ cg_F0_interpolate_spline(utt,param_track); base_mean = get_param_float(utt->features,"int_f0_target_mean", cg_db->f0_mean); base_mean *= get_param_float(utt->features,"f0_shift", 1.0); base_stddev = get_param_float(utt->features,"int_f0_target_stddev", cg_db->f0_stddev); #if 0 FILE *ftt; int ii; ftt = cst_fopen("awb.f0",CST_OPEN_WRITE); printf("awb_debug saving F0\n"); for (ii=0; ii<param_track->num_frames; ii++) cst_fprintf(ftt,"%f %f\n",param_track->frames[ii][0], param_track->frames[ii][param_track->num_channels-2]); cst_fclose(ftt); #endif for (i=0,mcep=utt_rel_head(utt,"mcep"); mcep; i++,mcep=item_next(mcep)) { if (voiced_frame(mcep)) { float mean = base_mean; float stddev = base_stddev; float local_f0_mean = ffeature_float(mcep, "R:mcep_link.parent.R:segstate.parent.R:SylStructure.parent.parent.R:Token.parent.local_f0_mean" ); if (local_f0_mean != 0.0) { mean = local_f0_mean; } float local_f0_range = ffeature_float(mcep, "R:mcep_link.parent.R:segstate.parent.R:SylStructure.parent.parent.R:Token.parent.local_f0_range" ); if (local_f0_range > 0.0) { /* feature_float returns 0 by default, shifted to allow 0 to be passed. */ stddev = local_f0_range - 1.0; } /* scale the F0 -- which normally wont change it at all */ param_track->frames[i][0] = (((param_track->frames[i][0]-cg_db->f0_mean)/cg_db->f0_stddev) *stddev)+mean; /* Some safety checks */ if (param_track->frames[i][0] < 50) param_track->frames[i][0] = 50; if (param_track->frames[i][0] > 700) param_track->frames[i][0] = 700; } else /* Unvoice it */ param_track->frames[i][0] = 0.0; } return; } static int unpack_model_vector(cst_cg_db *cg_db,int pm,int f,float *v) { /* This unpacked the potentially compressed/quantized data from the model */ int i,j; if (cg_db->model_shape == CST_CG_MODEL_SHAPE_QUANTIZED_PARAMS) { for (i=0; i<cg_db->num_channels[pm]/2; i++) { v[i*2] = cg_db->qtable[pm][i*2][cg_db->model_vectors[pm][f][i]/256]; v[(i*2)+1] = cg_db->qtable[pm][(i*2)+1][cg_db->model_vectors[pm][f][i]%256]; } #if 0 printf("awb_debug %d\n",f); for (i=0; i<cg_db->num_channels[pm]; i++) printf("%f ",v[i]); printf("\n"); for (i=0; i<cg_db->num_channels[pm]/2; i++) printf("%d %d ",cg_db->model_vectors[pm][f][i]/256, cg_db->model_vectors[pm][f][i]%256); printf("\n"); #endif return 0; } if (cg_db->model_shape == CST_CG_MODEL_SHAPE_QUANTIZED_PARAMS_41) { j=1; /* skip F0 mean/stddev */ for (i=0; i<25; i++,j++) /* mcep static mean/stddev */ { v[j*2] = cg_db->qtable[pm][j*2][cg_db->model_vectors[pm][f][i]/256]; v[(j*2)+1] = cg_db->qtable[pm][(j*2)+1][cg_db->model_vectors[pm][f][i]%256]; } for (i=25; i<25+12; i+=1,j+=2) /* mcep deltas no mean/stddev */ { v[(j*2)+1] = cg_db->qtable[pm][(j*2)+1][cg_db->model_vectors[pm][f][i]/256]; v[(j*2)+3] = cg_db->qtable[pm][(j*2)+3][cg_db->model_vectors[pm][f][i]%256]; } /* one delta, one me */ v[(j*2)+1] = cg_db->qtable[pm][(j*2)+1][cg_db->model_vectors[pm][f][i]/256]; v[(j*2)+2] = cg_db->qtable[pm][(j*2)+2][cg_db->model_vectors[pm][f][i]%256]; i++; j+=2; /* one me, another me */ v[(j*2)] = cg_db->qtable[pm][j*2][cg_db->model_vectors[pm][f][i]/256]; v[(j*2)+2] = cg_db->qtable[pm][(j*2)+2][cg_db->model_vectors[pm][f][i]%256]; i++; j+=2; /* one me, another me */ v[(j*2)] = cg_db->qtable[pm][j*2][cg_db->model_vectors[pm][f][i]/256]; v[(j*2)+2] = cg_db->qtable[pm][(j*2)+2][cg_db->model_vectors[pm][f][i]%256]; i++; j+=2; /* one voicing and another v-stddef */ v[(j*2)] = cg_db->qtable[pm][j*2][cg_db->model_vectors[pm][f][i]/256]; v[(j*2)+1] = cg_db->qtable[pm][(j*2)+1][cg_db->model_vectors[pm][f][i]%256]; #if 0 printf("awb_debug pm %d frame %d\n",pm,f); for (i=0; i<cg_db->num_channels[pm]; i++) printf("%f ",v[i]); printf("\n"); #endif return 0; } /* if (cg_db->model_shape == CST_CG_MODEL_SHAPE_BASE_MINRANGE) */ else /* let's always do this second one in case model_shape isn't set */ { for (i=0; i<cg_db->num_channels[pm]; i++) { v[i] = cg_db->model_min[i]+ ((float)((cg_db->model_vectors[pm][f][i])/ 65535.0)*cg_db->model_range[i]); } return 0; } } static cst_utterance *cg_predict_params(cst_utterance *utt) { cst_cg_db *cg_db; cst_track *param_track; cst_track *str_track = NULL; cst_item *mcep; const cst_cart *mcep_tree, *f0_tree; int i,j,f,p,o,pm; const char *mname; float *unpacked_vector; float f0_val, f0_bit; float local_gain, voicing; int fff; int extra_feats = 0; cg_db = val_cg_db(utt_feat_val(utt,"cg_db")); param_track = new_track(); if (cg_db->do_mlpg) /* which should be the default */ fff = 1; /* copy details with stddevs */ else fff = 2; /* copy details without stddevs */ extra_feats = 1; /* voicing */ if (cg_db->mixed_excitation) { extra_feats += 5; str_track = new_track(); cst_track_resize(str_track, utt_feat_int(utt,"param_track_num_frames"), 5); } cst_track_resize(param_track, utt_feat_int(utt,"param_track_num_frames"), (cg_db->num_channels[0]/fff)- (2 * extra_feats));/* no voicing or str */ unpacked_vector = cst_alloc(float,cg_db->num_channels[0]); f = 0; for (i=0,mcep=utt_rel_head(utt,"mcep"); mcep; i++,mcep=item_next(mcep)) { mname = item_feat_string(mcep,"name"); local_gain = ffeature_float(mcep,"R:mcep_link.parent.R:segstate.parent.R:SylStructure.parent.parent.R:Token.parent.local_gain"); if (local_gain == 0.0) local_gain = 1.0; for (p=0; cg_db->types[p]; p++) if (cst_streq(mname,cg_db->types[p])) break; if (cg_db->types[p] == NULL) p=0; /* if there isn't a matching tree, use the first one */ /* Predict F0 */ for (f0_val=pm=0; pm<cg_db->num_f0_models; pm++) { f0_tree = cg_db->f0_trees[pm][p]; f0_bit = val_float(cart_interpret(mcep,f0_tree)); f0_val += f0_bit; } param_track->frames[i][0] = f0_val/cg_db->num_f0_models; if (param_track->frames[i][0] < 50.0) param_track->frames[i][0] = 0.0; /* what about stddev ? */ /* We only have multiple models now, but the default is one model */ /* Predict spectral coeffs */ voicing = 0.0; for (pm=0; pm<cg_db->num_param_models; pm++) { mcep_tree = cg_db->param_trees[pm][p]; f = val_int(cart_interpret(mcep,mcep_tree)); /* If there is one model this will be fine, if there are */ /* multiple models this will be the nth model */ item_set_int(mcep,"clustergen_param_frame",f); /* Unpack the model[pm][f] vector */ unpack_model_vector(cg_db,pm,f,unpacked_vector); /* Old code used to average in param[0] with F0 too (???) */ for (j=2; j<param_track->num_channels; j++) { if (pm == 0) param_track->frames[i][j] = 0.0; param_track->frames[i][j] += unpacked_vector[j*fff]/ (float)cg_db->num_param_models; } if (cg_db->mixed_excitation) { o = j; for (j=0; j<5; j++) { if (pm == 0) str_track->frames[i][j] = 0.0; str_track->frames[i][j] += unpacked_vector[(o+(2*j))*fff] / (float)cg_db->num_param_models; } } /* last coefficient is average voicing for cluster */ voicing /= (float)(pm+1); voicing += unpacked_vector[cg_db->num_channels[pm]-2] / (float)(pm+1); } item_set_float(mcep,"voicing",voicing); /* Apply local gain to c0 */ param_track->frames[i][2] *= local_gain; param_track->times[i] = i * cg_db->frame_advance; } cst_free(unpacked_vector); cg_smooth_F0(utt,cg_db,param_track); utt_set_feat(utt,"param_track",track_val(param_track)); if (cg_db->mixed_excitation) utt_set_feat(utt,"str_track",track_val(str_track)); return utt; } static cst_utterance *cg_resynth(cst_utterance *utt) { cst_cg_db *cg_db; cst_wave *w; cst_track *param_track; cst_track *str_track = NULL; cst_track *smoothed_track; const cst_val *streaming_info_val; cst_audio_streaming_info *asi = NULL; int mlsa_speed_param = 0; streaming_info_val=get_param_val(utt->features,"streaming_info",NULL); if (streaming_info_val) { asi = val_audio_streaming_info(streaming_info_val); asi->utt = utt; } /* Values 5-15 might be reasonably to speed things up. This number */ /* is used to reduce the number of parameters used in the mceps */ /* e.g. value 10 will speed up from 21.0 faster than real time */ /* to 26.4 times faster than real time (for builtin rms) */ mlsa_speed_param = get_param_int(utt->features,"mlsa_speed_param",0); cg_db = val_cg_db(utt_feat_val(utt,"cg_db")); param_track = val_track(utt_feat_val(utt,"param_track")); if (cg_db->mixed_excitation) str_track = val_track(utt_feat_val(utt,"str_track")); if (cg_db->do_mlpg) { smoothed_track = mlpg(param_track, cg_db); w = mlsa_resynthesis(smoothed_track,str_track,cg_db, asi,mlsa_speed_param); delete_track(smoothed_track); } else w=mlsa_resynthesis(param_track,str_track,cg_db, asi,mlsa_speed_param); if (w == NULL) { /* Synthesis Failed, probably because it was interrupted */ utt_set_feat_int(utt,"Interrupted",1); w = new_wave(); } #if 0 /* Apply local gain */ for (i=0,tok=utt_rel_head(utt,"Token"); tok; i++,tok=item_next(tok)) { if (item_feat_present(tok,"local_gain")) local_gain = item_feat_float(tokget_param_fffeature_float(tok,"R:mcep_link.parent.R:segstate.parent.R:SylStructure.parent.parent.R:Token.parent.local_gain"); } #endif utt_set_wave(utt,w); return utt; }