shithub: flite

ref: f136f4eb6b2d32aa04499aeee872d3d7586e925f
dir: /lang/usenglish/us_f0_model.c/

View raw version
/*************************************************************************/
/*                                                                       */
/*                  Language Technologies Institute                      */
/*                     Carnegie Mellon University                        */
/*                         Copyright (c) 2001                            */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission is hereby granted, free of charge, to use and distribute  */
/*  this software and its documentation without restriction, including   */
/*  without limitation the rights to use, copy, modify, merge, publish,  */
/*  distribute, sublicense, and/or sell copies of this work, and to      */
/*  permit persons to whom this work is furnished to do so, subject to   */
/*  the following conditions:                                            */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*   4. The authors' names are not used to endorse or promote products   */
/*      derived from this software without specific prior written        */
/*      permission.                                                      */
/*                                                                       */
/*  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*             Author:  Alan W Black (awb@cs.cmu.edu)                    */
/*               Date:  January 2001                                     */
/*************************************************************************/
/*                                                                       */
/*  An F0 model                                                          */
/*    This is derived fromthe f2b model freely distributed in Festival   */
/*                                                                       */
/*************************************************************************/

#include "cst_hrg.h"
#include "cst_phoneset.h"
#include "us_f0.h"

static void apply_lr_model(cst_item *s,
			   const us_f0_lr_term *f0_lr_terms,
			   float *start,
			   float *mid,
			   float *end)
{
    int i;
    const cst_val *v=0;
    float fv;

    /* Interceptors */
    *start = f0_lr_terms[0].start;
    *mid = f0_lr_terms[0].mid;
    *end = f0_lr_terms[0].end;
    for (i=1; f0_lr_terms[i].feature; i++)
    {
	if (!cst_streq(f0_lr_terms[i].feature,f0_lr_terms[i-1].feature))
	    v = ffeature(s,f0_lr_terms[i].feature);
	if (f0_lr_terms[i].type)
	{
	    if (cst_streq(val_string(v),f0_lr_terms[i].type))
		fv = 1.0;
	    else
		fv = 0.0;
	}
	else
	    fv = val_float(v);
	(*start) += fv*f0_lr_terms[i].start;
	(*mid) += fv*f0_lr_terms[i].mid;
	(*end) += fv*f0_lr_terms[i].end;
/*	printf("f %s start %f mid %f end %f\n",
	       f0_lr_terms[i].feature,
	       *start,*mid,*end);  */
    }
}

static void add_target_point(cst_relation *targ,float pos, float f0)
{
    cst_item *t;

/*    printf("target %f at %f\n",f0,pos); */
    t = relation_append(targ,NULL);
    item_set_float(t,"pos",pos);
    /* them there can sometimes do silly things, so guard for that */
    if (f0 > 500.0)
	item_set_float(t,"f0",500.0);
    else if (f0 < 50.0)
	item_set_float(t,"f0",50.0);
    else
	item_set_float(t,"f0",f0);
}

/* model mean and stddev take from f2b/kal_diphone */
#define model_mean 170.0
#define model_stddev 34
#define map_f0(v,m,s) ((((v-model_mean)/model_stddev)*s)+m)

static int post_break(cst_item *syl)
{
    if ((item_prev(syl) == 0) ||
	(cst_streq("pau",
		   ffeature_string(syl,
				   "R:SylStructure.daughter.R:Segment.p.name"))))
	return TRUE;
    else
	return FALSE;
}

static int pre_break(cst_item *syl)
{
    if ((item_next(syl) == 0) ||
	(cst_streq("pau",
		   ffeature_string(syl,
				   "R:SylStructure.daughtern.R:Segment.n.name"))))
	return TRUE;
    else
	return FALSE;
}

static float vowel_mid(cst_item *syl)
{
    /* return time point mid way in vowel in this syl */
    cst_item *s;
    cst_item *ts;
    const cst_phoneset *ps = item_phoneset(syl);

    ts = item_daughter(item_as(syl,"SylStructure"));
    for (s=ts; s; s = item_next(s))
    {
	if (cst_streq("+", phone_feature_string(ps,item_feat_string(s,"name"),
						"vc")))
	{
	    return (item_feat_float(s,"end")+
		    ffeature_float(s,"R:Segment.p.end"))/2.0;
	}
    }

    /* no segments, shouldn't happen */
    if (ts == 0)
	return 0;

    /* no vowel in syllable, shouldn't happen */
    return (item_feat_float(ts,"end")+
	    ffeature_float(ts,"R:Segment.p.end"))/2.0;
}

cst_utterance *us_f0_model(cst_utterance *u)
{
    /* F0 target model: Black and Hunt ICSLP96, three points per syl  */
    cst_item *syl, *t, *nt;
    cst_relation *targ_rel;
    float mean, stddev, local_mean, local_stddev;
    float start, mid, end, lend;
    float seg_end;

    if (feat_present(u->features,"no_f0_target_model"))
        return u;

    targ_rel = utt_relation_create(u,"Target");
    mean = get_param_float(u->features,"int_f0_target_mean", 100.0);
    mean *= get_param_float(u->features,"f0_shift", 1.0);
    stddev = get_param_float(u->features,"int_f0_target_stddev", 12.0);
    
    lend = 0;
    for (syl=relation_head(utt_relation(u,"Syllable"));
	 syl;
	 syl = item_next(syl))

    {
/*	printf("word %s, accent %s endtone %s\n",
	       ffeature_string(syl,"R:SylStructure.parent.name"),
	       ffeature_string(syl,"accent"),
	       ffeature_string(syl,"endtone")); */
	if (!item_daughter(item_as(syl,"SylStructure")))
	    continue;  /* no segs in syl */

	local_mean = ffeature_float(syl,"R:SylStructure.parent.R:Token.parent.local_f0_shift");
	if (local_mean)
		local_mean *= mean;
	else
		local_mean = mean;
	local_stddev = ffeature_float(syl,"R:SylStructure.parent.R:Token.parent.local_f0_range");
	if (local_stddev == 0.0)
		local_stddev = stddev;

	apply_lr_model(syl,f0_lr_terms,&start,&mid,&end);
	if (post_break(syl))
	    lend = map_f0(start,local_mean,local_stddev);
	add_target_point(targ_rel,
			 ffeature_float(syl,
				"R:SylStructure.daughter.R:Segment.p.end"),
			 map_f0((start+lend)/2.0,local_mean,local_stddev));
	add_target_point(targ_rel,
			 vowel_mid(syl),
			 map_f0(mid,local_mean,local_stddev));
	lend = map_f0(end,local_mean,local_stddev);
	if (pre_break(syl))
	    add_target_point(targ_rel,
			  ffeature_float(syl,"R:SylStructure.daughtern.end"),
			     map_f0(end,local_mean,local_stddev));
    }
    
    /* Guarantee targets go from start to end of utterance */
    t = relation_head(targ_rel);
    if (t == 0)
	add_target_point(targ_rel,0,mean);
    else if (item_feat_float(t,"pos") > 0)
    {
	nt = item_prepend(t,NULL);
	item_set_float(nt,"pos",0.0);
	item_set_float(nt,"f0",item_feat_float(t,"f0"));
    }
	
    t = relation_tail(targ_rel);
    seg_end = item_feat_float(relation_tail(utt_relation(u,"Segment")),"end");
    if (item_feat_float(t,"pos") < seg_end)
	add_target_point(targ_rel,seg_end,item_feat_float(t,"f0"));

    return u;
}