ref: 99d8b205ba2bad5287eb55dc1a76b769a541cd2b
dir: /src/lexicon/cst_lts.c/
/*************************************************************************/ /* */ /* Language Technologies Institute */ /* Carnegie Mellon University */ /* Copyright (c) 1999 */ /* All Rights Reserved. */ /* */ /* Permission is hereby granted, free of charge, to use and distribute */ /* this software and its documentation without restriction, including */ /* without limitation the rights to use, copy, modify, merge, publish, */ /* distribute, sublicense, and/or sell copies of this work, and to */ /* permit persons to whom this work is furnished to do so, subject to */ /* the following conditions: */ /* 1. The code must retain the above copyright notice, this list of */ /* conditions and the following disclaimer. */ /* 2. Any modifications must be clearly marked as such. */ /* 3. Original authors' names are not deleted. */ /* 4. The authors' names are not used to endorse or promote products */ /* derived from this software without specific prior written */ /* permission. */ /* */ /* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */ /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */ /* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */ /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ /* THIS SOFTWARE. */ /* */ /*************************************************************************/ /* Author: Alan W Black (awb@cs.cmu.edu) */ /* Date: December 1999 */ /*************************************************************************/ /* */ /* Letter to sound rule support */ /* */ /*************************************************************************/ #include "cst_features.h" #include "cst_lts.h" #include "cst_endian.h" static cst_lts_phone apply_model(cst_lts_letter *vals, cst_lts_addr start, const cst_lts_model *model); cst_lts_rules *new_lts_rules() { cst_lts_rules *lt = cst_alloc(cst_lts_rules,1); lt->name = 0; lt->letter_index = 0; lt->models = 0; lt->phone_table = 0; lt->context_window_size = 0; lt->context_extra_feats = 0; lt->letter_table = 0; return lt; } cst_val *lts_apply_val(const cst_val *wlist,const char *feats,const cst_lts_rules *r) { /* for symbol to symbol mapping */ const cst_val *v; cst_val *p; char *word; int i,j; word = cst_alloc(char,val_length(wlist)+1); for (v=wlist,i=0; v; v=val_cdr(v),i++) { for (j=0; r->letter_table[j]; j++) if (cst_streq(val_string(val_car(v)),r->letter_table[j])) { word[i] = j; break; } if (!r->letter_table[j]) { #if 0 printf("awb_debug unknown letter >%s<\n",val_string(val_car(v))); #endif i--; /* can't find this letter so skip it */ } } p = lts_apply(word,feats,r); cst_free(word); return p; } cst_val *lts_apply(const char *word,const char *feats,const cst_lts_rules *r) { int pos, index, i; cst_val *phones=0; cst_lts_letter *fval_buff; cst_lts_letter *full_buff; cst_lts_phone phone; char *left, *right; const char *p; char hash; char zeros[8]; /* For feature vals for each letter */ fval_buff = cst_alloc(cst_lts_letter, (r->context_window_size*2)+ r->context_extra_feats); /* Buffer with added contexts */ full_buff = cst_alloc(cst_lts_letter, (r->context_window_size*2)+ cst_strlen(word)+1); /* TBD assumes single POS feat */ if (r->letter_table) { for (i=0; i<8; i++) zeros[i] = 2; cst_sprintf((char *)full_buff, "%.*s%c%s%c%.*s", r->context_window_size-1, zeros, 1, word, 1, r->context_window_size-1, zeros); hash = 1; } else { /* Assumes l_letter is a char and context < 8 */ cst_sprintf((char *)full_buff, "%.*s#%s#%.*s", r->context_window_size-1, "00000000", word, r->context_window_size-1, "00000000"); hash = '#'; } /* Do the prediction backwards so we don't need to reverse the answer */ for (pos = r->context_window_size + cst_strlen(word) - 1; full_buff[pos] != hash; pos--) { /* Fill the features buffer for the predictor */ cst_sprintf((char *)fval_buff, "%.*s%.*s%s", r->context_window_size, full_buff+pos-r->context_window_size, r->context_window_size, full_buff+pos+1, feats); if ((!r->letter_table && ((full_buff[pos] < 'a') || (full_buff[pos] > 'z')))) { #ifdef EXCESSIVELY_CHATTY cst_errmsg("lts:skipping unknown char \"%c\"\n", full_buff[pos]); #endif continue; } if (r->letter_table) index = full_buff[pos] - 3; else index = (full_buff[pos]-'a')%26; phone = apply_model(fval_buff, r->letter_index[index], r->models); #if 0 /* But these are in reverse order */ printf("letter-phone(s) alignment: %c %s\n", full_buff[pos], (const char *)r->phone_table[phone]); #endif /* delete epsilons and split dual-phones */ if (cst_streq("epsilon",r->phone_table[phone])) continue; else if ((p=strchr(r->phone_table[phone],'-')) != NULL) { left = cst_substr(r->phone_table[phone],0, cst_strlen(r->phone_table[phone])-cst_strlen(p)); right = cst_substr(r->phone_table[phone], (cst_strlen(r->phone_table[phone])-cst_strlen(p))+1, (cst_strlen(p)-1)); phones = cons_val(string_val(left), cons_val(string_val(right),phones)); cst_free(left); cst_free(right); } else phones = cons_val(string_val(r->phone_table[phone]),phones); } cst_free(full_buff); cst_free(fval_buff); return phones; } static void cst_lts_get_state(cst_lts_rule *state, const cst_lts_model *model, unsigned short n, int rule_size) { /* As some OS's require a more elaborate access than a simple lookup */ memmove(state,&model[n*rule_size],rule_size); } static cst_lts_phone apply_model(cst_lts_letter *vals,cst_lts_addr start, const cst_lts_model *model) { /* because some machines (arm/mips) can't deal with addrs not on */ /* word boundaries we use a static and copy the rule values each time */ /* so we know its properly aligned */ /* Hmm this still might be wrong on some machines that align the */ /* structure cst_lts_rules differently */ cst_lts_rule state; unsigned short nstate; static const int sizeof_cst_lts_rule = 6; cst_lts_get_state(&state,model,start,sizeof_cst_lts_rule); for ( ; state.feat != CST_LTS_EOR; ) { /* printf("awb_debug %s %c %c %d\n",vals,vals[state.feat],state.val, (vals[state.feat] == state.val) ? 1 : 0); */ if (vals[state.feat] == state.val) nstate = state.qtrue; else nstate = state.qfalse; /* This should really happen at compilation time */ if (CST_BIG_ENDIAN) nstate = SWAPSHORT(nstate); cst_lts_get_state(&state,model,nstate,sizeof_cst_lts_rule); } return (cst_lts_phone)state.val; }