shithub: flite

ref: a3ff166a5a99ea97f9037a82f5c2a578348291ac
dir: /lang/cmu_indic_lex/cmu_indic_lex.c/

View raw version
/*************************************************************************/
/*                                                                       */
/*                  Language Technologies Institute                      */
/*                     Carnegie Mellon University                        */
/*                         Copyright (c) 2013                            */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission is hereby granted, free of charge, to use and distribute  */
/*  this software and its documentation without restriction, including   */
/*  without limitation the rights to use, copy, modify, merge, publish,  */
/*  distribute, sublicense, and/or sell copies of this work, and to      */
/*  permit persons to whom this work is furnished to do so, subject to   */
/*  the following conditions:                                            */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*   4. The authors' names are not used to endorse or promote products   */
/*      derived from this software without specific prior written        */
/*      permission.                                                      */
/*                                                                       */
/*  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*                                                                       */
/*  indic Lexical function                                            */
/*                                                                       */
/*************************************************************************/
#include "flite.h"
#include "cst_val.h"
#include "cst_voice.h"
#include "cst_lexicon.h"
#include "cst_ffeatures.h"
#include "cmu_indic_lex.h"
#include "cst_tokenstream.h"
#include "../cmu_indic_lang/cmu_indic_lang.h"

extern cst_lexicon cmu_lex;

 int utf8_sequence_length(char c0);

static const struct cmu_indic_char cmu_indic_offset_char[128] = {
  /*000*/ {IND_IGNORE, ""},
  /*001*/ {IND_ANUSWAAR, "nX"},
  /*002*/ {IND_ANUSWAAR, "nX"},
  /*003*/ {IND_VISARGA, "h"},

  /*004*/ {IND_INDEPENDENT_VOWEL, "A"},
  /*005*/ {IND_INDEPENDENT_VOWEL, "A"},
  /*006*/ {IND_INDEPENDENT_VOWEL, "A:"},
  /*007*/ {IND_INDEPENDENT_VOWEL, "i"},
  /*008*/ {IND_INDEPENDENT_VOWEL, "i:"},
  /*009*/ {IND_INDEPENDENT_VOWEL, "u"},
  /*00A*/ {IND_INDEPENDENT_VOWEL, "u:"},
  /*00B*/ {IND_INDEPENDENT_VOWEL, "9r="},
  /*00C*/ {IND_INDEPENDENT_VOWEL, "l="},
  /*00D*/ {IND_INDEPENDENT_VOWEL, "ay"},
  /*00E*/ {IND_INDEPENDENT_VOWEL, "e"},
  /*00F*/ {IND_INDEPENDENT_VOWEL, "e"},
  /*010*/ {IND_INDEPENDENT_VOWEL, "aI"},
  /*011*/ {IND_INDEPENDENT_VOWEL, "ow"},
  /*012*/ {IND_INDEPENDENT_VOWEL, "ow"},
  /*013*/ {IND_INDEPENDENT_VOWEL, "o"},
  /*014*/ {IND_INDEPENDENT_VOWEL, "aU"},

  /*015*/ {IND_CONSONANT, "k"},
  /*016*/ {IND_CONSONANT, "kh"},
  /*017*/ {IND_CONSONANT, "g"},
  /*018*/ {IND_CONSONANT, "gh"},
  /*019*/ {IND_CONSONANT, "N"},

  /*01A*/ {IND_CONSONANT, "c"},
  /*01B*/ {IND_CONSONANT, "ch"},
  /*01C*/ {IND_CONSONANT, "J"},
  /*01D*/ {IND_CONSONANT, "Jh"},
  /*01E*/ {IND_CONSONANT, "n~"},

  /*01F*/ {IND_CONSONANT, "tr"},
  /*020*/ {IND_CONSONANT, "tR"},
  /*021*/ {IND_CONSONANT, "dr"},
  /*022*/ {IND_CONSONANT, "dR"},
  /*023*/ {IND_CONSONANT, "nr"},

  /*024*/ {IND_CONSONANT, "tB"},
  /*025*/ {IND_CONSONANT, "tBh"},
  /*026*/ {IND_CONSONANT, "dB"},
  /*027*/ {IND_CONSONANT, "dBh"},
  /*028*/ {IND_CONSONANT, "nB"},
  /*029*/ {IND_CONSONANT, "nB"},

  /*02A*/ {IND_CONSONANT, "p"},
  /*02B*/ {IND_CONSONANT, "ph"},
  /*02C*/ {IND_CONSONANT, "b"},
  /*02D*/ {IND_CONSONANT, "bh"},
  /*02E*/ {IND_CONSONANT, "m"},

  /*02F*/ {IND_CONSONANT, "j"},
  /*030*/ {IND_CONSONANT, "9r"},
  /*031*/ {IND_CONSONANT, "9r"},
  /*032*/ {IND_CONSONANT, "l"},
  /*033*/ {IND_CONSONANT, "lr"},
  /*034*/ {IND_CONSONANT, "lr"},

  /*035*/ {IND_CONSONANT, "v"},
  /*036*/ {IND_CONSONANT, "c}"},
  /*037*/ {IND_CONSONANT, "sr"},
  /*038*/ {IND_CONSONANT, "s"},
  /*039*/ {IND_CONSONANT, "hv"},

  /*03A*/ {IND_VOWEL, "e"},
  /*03B*/ {IND_VOWEL, "e"},

  /*03C*/ {IND_NUKTA, ""},
  /*03D*/ {IND_AVAGRAHA, ""},

  /*03E*/ {IND_VOWEL, "A:"},
  /*03F*/ {IND_VOWEL, "i"},
  /*040*/ {IND_VOWEL, "i:"},
  /*041*/ {IND_VOWEL, "u"},
  /*042*/ {IND_VOWEL, "u:"},
  /*043*/ {IND_VOWEL, "9r="},
  /*044*/ {IND_VOWEL, "9r="},
  /*045*/ {IND_VOWEL, "ay"},
  /*046*/ {IND_VOWEL, "e"},
  /*047*/ {IND_VOWEL, "e"},
  /*048*/ {IND_VOWEL, "aI"},
  /*049*/ {IND_VOWEL, "ow"},
  /*04A*/ {IND_VOWEL, "o"},
  /*04B*/ {IND_VOWEL, "o"},
  /*04C*/ {IND_VOWEL, "aU"},

  /*04D*/ {IND_HALANT, ""},

  /*04E*/ {IND_IGNORE, ""},

  /*04F*/ {IND_VOWEL, "ow"},

  /*050*/ {IND_INDEPENDENT_VOWEL, "A u m"},

  /*051*/ {IND_IGNORE, ""},
  /*052*/ {IND_IGNORE, ""},
  /*053*/ {IND_IGNORE, ""},
  /*054*/ {IND_IGNORE, ""},

  /*055*/ {IND_VOWEL, "e"},
  /*056*/ {IND_VOWEL, "u e"},
  /*057*/ {IND_VOWEL, "u: e"},

  /*058*/ {IND_CONSONANT, "q"},
  /*059*/ {IND_CONSONANT, "x"},
  /*05A*/ {IND_CONSONANT, "G"},
  /*05B*/ {IND_CONSONANT, "z"},
  /*05C*/ {IND_CONSONANT, "rr"},
  /*05D*/ {IND_CONSONANT, "rrh"},
  /*05E*/ {IND_CONSONANT, "f"},
  /*05F*/ {IND_CONSONANT, "j"},

  /*060*/ {IND_INDEPENDENT_VOWEL, "9r="},
  /*061*/ {IND_INDEPENDENT_VOWEL, "lr="},
  /*062*/ {IND_VOWEL, "lr="},
  /*063*/ {IND_VOWEL, "lr="},

  /*064*/ {IND_PUNC, ""},
  /*065*/ {IND_PUNC, ""},

  /*066*/ {IND_DIGIT, ""},
  /*067*/ {IND_DIGIT, ""},
  /*068*/ {IND_DIGIT, ""},
  /*069*/ {IND_DIGIT, ""},
  /*06A*/ {IND_DIGIT, ""},
  /*06B*/ {IND_DIGIT, ""},
  /*06C*/ {IND_DIGIT, ""},
  /*06D*/ {IND_DIGIT, ""},
  /*06E*/ {IND_DIGIT, ""},
  /*06F*/ {IND_DIGIT, ""},

  /*070*/ {IND_ANUSWAAR, "nX"},
  /*071*/ {IND_ADDAK, ""},

  /*072*/ {IND_INDEPENDENT_VOWEL, "ay"},
  /*073*/ {IND_INDEPENDENT_VOWEL, "u e"},
  /*074*/ {IND_INDEPENDENT_VOWEL, "u: e"},
  /*075*/ {IND_INDEPENDENT_VOWEL, "ow"},
  /*076*/ {IND_INDEPENDENT_VOWEL, "u e"},
  /*077*/ {IND_INDEPENDENT_VOWEL, "u: e"},

  /*078*/ {IND_IGNORE, ""},

  /*079*/ {IND_CONSONANT, "z"},
  /*07A*/ {IND_CONSONANT, "j"},
  /*07B*/ {IND_CONSONANT, "G"},
  /*07C*/ {IND_CONSONANT, "z"},
  /*07D*/ {IND_CONSONANT, ""},
  /*07E*/ {IND_CONSONANT, "dr"},
  /*07F*/ {IND_CONSONANT, "b"},

};

static void replace_car(const cst_val *it,const cst_val *newcar)
{
    /* Destructively replace it's car with newcar, deleting old car */
    /* This does this by casting out of consts -- something you shouldn't do */
    cst_val *tbd;

    tbd = (cst_val *)val_car(it);
    set_car((cst_val *)it,newcar);
    delete_val(tbd);
}

static void replace_cdr(const cst_val *it,const cst_val *newcdr)
{
    /* Destructively replace it's car with newcar, deleting old cdr */
    /* This does this by casting out of consts -- something you shouldn't do */
    cst_val *tbd;

    tbd = (cst_val *)val_cdr(it);
    set_cdr((cst_val *)it,newcdr);
    set_cdr((cst_val *)tbd,NULL);
    delete_val(tbd);
}

static int cmu_indic_is_vowel(const char *p)
{
  /* this happens to work for the vowels in the indic set */
  if (strchr("aeiouAEIOU",p[0]) == NULL)
    return FALSE;
  else
    return TRUE;
}

static int cmu_indic_lex_ord_to_offset(const int indic_ord) {
  int output=0;
  int i;
  int offset;

  i = indic_ord;
  if ((i >= 0x0900) && (i <= 0x097F))
    offset = 0x0900;
  if ((i >= 0x0980) && (i <= 0x09FF))
    offset = 0x0980;
  if ((i >= 0x0A00) && (i <= 0x0A7F))
    offset = 0x0A00;
  if ((i >= 0x0A80) && (i <= 0x0AFF))
    offset = 0x0A80;
  if ((i >= 0x0B00) && (i <= 0x0B7F))
    offset = 0x0B00;
  if ((i >= 0x0B80) && (i <= 0x0BFF))
    offset = 0x0B80;
  if ((i >= 0x0C00) && (i <= 0x0C7F))
    offset = 0x0C00;
  if ((i >= 0x0C80) && (i <= 0x0CFF))
    offset = 0x0C80;
  if ((i >= 0x0D00) && (i <= 0x0D7F))
    offset = 0x0D00;

  if (!offset) {
    cst_errmsg("Indic language can not process character 0x%x\n", i);
  } else {
    output = i - offset;
  }

  return output;
}

static int cmu_indic_get_char_type(const cst_val *indic_char) 
{
    int c;

    if (!indic_char) return IND_IGNORE;

    c = val_int(indic_char);
    if ((c < 0x0900) || (c > 0x0D7F))
        return IND_IGNORE;
    c = cmu_indic_lex_ord_to_offset(c);
    return cmu_indic_offset_char[c].type;
}

static int indic_text_splitable(const char *s,int i,int len1)
{
    /* Returns true only if this and next chars are not both digits */
    /* or both non-digits */

    char *ccc, *ddd;    /* Store this character and the next character */
    int len2;           /* Length of next character */

    int flag;

    ccc = cst_strdup(&s[i]);
    ddd = cst_strdup(&s[i+len1]);

    len2 = utf8_sequence_length(ddd[0]);

    ccc[len1] = '\0';
    ddd[len2] = '\0';

    /* Makeshift NOR */
    flag = (indic_digit_to_offset(ccc) == -1)? !(indic_digit_to_offset(ddd) == -1):
	       (indic_digit_to_offset(ddd) == -1);

    cst_free(ccc);
    cst_free(ddd);

    return flag;
}

static const char *cmu_indic_get_char_phoneme(const cst_val *indic_char) 
{
    int c;

    if (!indic_char) return "";

    c = val_int(indic_char);

    /* Language-specific exceptions to the g2p mapping, like the
       length distinction b/w e and e: */
    /* e/e: & o/o: distinction: */  
    /* Kannada */
    if ((c == 0x0C8F)||(c == 0x0CC7)) return "e:";
    if (c == 0x0C92) return "o";
    if ((c == 0x0C93)||(c == 0x0CCB)) return "o:";
    /* Malayalam */
    if ((c == 0x0D0F)||(c == 0x0D47)) return "e:";
    if (c == 0x0D12) return "o";
    if ((c == 0x0D13)||(c == 0x0D4B)) return "o:";
    if (c == 0x0D34) return "zr"; /* Retroflex approximant */
    if (c == 0x0D31) return "rr"; /* Retroflex flap */
    /* Punjabi */
    if (c == 0x0A33) return "l";
    /* Tamil */
    if ((c == 0x0B8F)||(c == 0x0BC7)) return "e:";
    if (c == 0x0B92) return "o";
    if ((c == 0x0B93)||(c == 0x0BCB)) return "o:";
    if (c == 0x0BA9) return "n"; /* Tamil alveolar nasal */
    if (c == 0x0BB1) return "rr";
    if (c == 0x0BB4) return "zr";
    
    /* Telugu */
    if ((c == 0x0C0F)||(c == 0x0C47)) return "e:";
    if (c == 0x0C12) return "o";
    if ((c == 0x0C13)||(c == 0x0C4B)) return "o:";

    /* Not a special case */
    c = cmu_indic_lex_ord_to_offset(c);

    /*    printf("awb_debug get_char_phone %x %s\n",c,cmu_indic_offset_char[c].phoneme);*/
    return cmu_indic_offset_char[c].phoneme;
}

static cst_val *cmu_indic_lex_remove_ignore_chars(const cst_val *indic_ords) {
  cst_val *output = 0;
  const cst_val *v;

  for(v=indic_ords; v; v=val_cdr(v)) {
    if (cmu_indic_get_char_type(val_car(v)) == IND_IGNORE)
      continue;
    output = cons_val(val_car(v), output);
  }
  return val_reverse(output);
}

static cst_val *cmu_indic_lex_map_nukta_chars(const cst_val *indic_ords) {
  cst_val *output = 0;
  const cst_val *v;
  int i;
  int mapped_val;

  for(v=indic_ords; v; v=val_cdr(v)) {
    i = val_int(val_car(v));
    if (val_cdr(v) && cmu_indic_get_char_type(val_car(val_cdr(v))) == IND_NUKTA) {
      switch (i) {
          /* Devanagari */
      case 2325: mapped_val=2392; break; /* क़ */
      case 2326: mapped_val=2393; break; /* ख़ */
      case 2327: mapped_val=2394; break; /* ग़ */
      case 2332: mapped_val=2395; break; /* ज़ */
      case 2337: mapped_val=2396; break; /* ड़ */
      case 2338: mapped_val=2397; break; /* ढ़ */
      case 2347: mapped_val=2398; break; /* फ़ */
      case 2351: mapped_val=2399; break; /* य़ */

          /* Bengali */
        case 2465: mapped_val=2524; break;
        case 2566: mapped_val=2525; break;
        case 2479: mapped_val=2527; break;

            /* Tamil */
        case 2962: mapped_val=2964; break;

            /* Gurmukhi */
      case 2582: mapped_val=2649; break; /* ਖ਼ */
      case 2583: mapped_val=2650; break; /* ਗ਼ */
      case 2588: mapped_val=2651; break; /* ਜ਼ */
      case 2603: mapped_val=2654; break; /* ਫ਼ */
      case 2610: mapped_val=2610; break; /* ਲ਼ */
      case 2616: mapped_val=2614; break; /* ਸ਼ */

        default:
          mapped_val=i;
      };
      output = cons_val(int_val(mapped_val), output);
      v = val_cdr(v);
    } else {
        /* Copy current item into output */
      output = cons_val(val_car(v), output);
    }
  }

  return val_reverse(output);
}

cst_val *cmu_indic_lex_ord_to_phones(const cst_val *ords,
                                     int cmu_indic_variant_deletes_word_final_schwa,
                                     const cst_features *feats)
{
    cst_val *in_ords = 0;
    cst_val *t_ords = 0;
    cst_val *out_phone_strings = 0;
    cst_val *out_phones = 0;

    const cst_val *prev_char = 0;
    const cst_val *cur_char = 0;
    const cst_val *next_char = 0;
    const cst_val *remainder = 0;
    const char *indic_variant = 0;

    int cur_char_type = 0;
    int next_char_type = 0;
    int prev_char_type = 0;

    indic_variant = get_param_string(feats, "variant", "none");

    /* Ignore chars (filter) */
    in_ords = cmu_indic_lex_remove_ignore_chars(ords);

    if (!in_ords) {
        return out_phones;
    }

    /* Map Nukta Chars (filter) */
    t_ords = cmu_indic_lex_map_nukta_chars(in_ords);
    delete_val(in_ords);
    in_ords = t_ords;

    prev_char = 0;
    cur_char = val_car(in_ords);
    if (val_cdr(in_ords)) {
        next_char = val_car(val_cdr(in_ords));
        remainder = val_cdr(val_cdr(in_ords));
    }
    else {
        next_char = 0;
        remainder = 0;
    }

    while (cur_char) {
        /*        printf("awb_debug out_phone_strings "); val_print(stdout,out_phone_strings); printf("\n"); */
        cur_char_type = cmu_indic_get_char_type(cur_char);
        if (prev_char)
            prev_char_type = cmu_indic_get_char_type(prev_char);
        if (next_char)
            next_char_type = cmu_indic_get_char_type(next_char);
        else
            next_char_type = IND_IGNORE;

        if (cur_char_type == IND_CONSONANT) {
            /* Add consonant to the output list */

            /* This part needs to be incorporated into Festvox code! */
            /* In Tamil, if visarga comes right before p or J, it is not */
            /* realized as "h" but instead maps p to f, or J to z.  This */
            /* takes care of that. */
            if ((prev_char) &&
                ((prev_char_type == IND_VISARGA) &&
                 (((cst_streq("J", cmu_indic_get_char_phoneme(cur_char))) ||
                   (cst_streq("p", cmu_indic_get_char_phoneme(cur_char)))) &&
                  (cst_streq(indic_variant,"tam"))))) {
                /* Don't add current character to out_phones; */
                /* the correct mapped character is already added when  */
                /* cur_char is visarga */
            } else {
                out_phone_strings =
                    cons_val(string_val(cmu_indic_get_char_phoneme(cur_char)),
                             out_phone_strings);
                /* If a consonant is followed by a combination vowel, a */
                /* halant, a punctuation then don't add a schwa after */
                /* it. Otherwise, insert a schwa. For end-of-word, check */
                /* whether we should insert schwa in this language. */
                if (!next_char) { /* We are in last char. Add schwa? */
                    if ((!prev_char) || /* Always add schwa for one-char words */
                        (!cmu_indic_variant_deletes_word_final_schwa)) {
                        out_phone_strings = cons_val(string_val("A"), out_phone_strings);
                    } else {
                        /* Schwa deletion should probably happen depending */
                        /* on whether there is a consonant cluster or not, */
                        /* at the end. But Adding that rule here seems to */
                        /* not have worked properly. Hence, we always */
                        /* delete the final schwa. */
                    }
                } else { /* Not a final char */
                    if ( (next_char_type != IND_VOWEL) &&
                         (next_char_type != IND_PUNC) &&
                         (next_char_type != IND_HALANT) &&
                         (next_char_type != IND_IGNORE)) {
                        out_phone_strings = cons_val(string_val("A"), out_phone_strings);
                    }
                }
            }
        } else if ((cur_char_type == IND_VOWEL) ||
                   (cur_char_type == IND_INDEPENDENT_VOWEL) ||
                   (cur_char_type == IND_DIGIT)) {
            /* Add whatever the pronunciation is */
            out_phone_strings =
                cons_val(string_val(cmu_indic_get_char_phoneme(cur_char)),
                                out_phone_strings);
        } else if (cur_char_type == IND_HALANT) {
            /* Ignore */
        } else if (cur_char_type == IND_AVAGRAHA) {
            /* Lengthen previous vowel */
            if (cmu_indic_get_char_type(prev_char) == IND_VOWEL) {
                out_phone_strings =
                    cons_val(string_val(cmu_indic_get_char_phoneme(prev_char)),
                             out_phone_strings);
            }
        } else if (cur_char_type == IND_ANUSWAAR) {
            /* The realization of anuswaar is context dependent: We */
            /* only generate a placeholder symbol and let postlexical */
            /* rules take care of this. */
            out_phone_strings = cons_val(string_val("nX"),out_phone_strings);

        } else if ((cur_char_type == IND_VISARGA) &&
                   (!cst_streq(indic_variant,"tam"))) {
             
            /* If we are not in Tamil, add the glottal fricative ("h") */
            out_phone_strings = cons_val(string_val("h"),out_phone_strings);
        }
        else if ((cur_char_type == IND_VISARGA) &&
                 (cst_streq(indic_variant,"tam"))) {
            /* If there's a next character */
            if (next_char) {
                /* If next character is p or J, do mapping and add to output */
                if (cst_streq("p", cmu_indic_get_char_phoneme(next_char))) {
                    out_phone_strings = cons_val(string_val("f"),
                                                 out_phone_strings);

                } else if (cst_streq("J", cmu_indic_get_char_phoneme(next_char))) {
                    out_phone_strings = cons_val(string_val("z"),
                                                 out_phone_strings);
                    /* Otherwise it's "h" */
                } else {
                    out_phone_strings = cons_val(string_val("h"),
                                                 out_phone_strings);
                }
                /* If this is the last character, it's "h" */
            } else {
                out_phone_strings = cons_val(string_val("h"),
                                             out_phone_strings);
            }

        } else if (cur_char_type == IND_ADDAK) {
            /* In Gurmukhi, this diacritic geminates the following consonant. */
            out_phone_strings =
                cons_val(string_val(cmu_indic_get_char_phoneme(next_char)),
                         out_phone_strings);
        } else {
            /* IDEALLY we should warn for unhandled characters! */
            /* TODO? */
        }

        prev_char = cur_char;
        cur_char = next_char;
        if (remainder) {
            next_char = val_car(remainder);
            remainder = val_cdr(remainder);
        } else {
            next_char = 0;
        }
    }

    /* There may be multiple phones in the expansion, but they are just */
    /* space separated tokens in the val, so we need to make them into  */
    /* an actual val list of phones */
    const cst_val *v, *vx;
    cst_val *ph;
    for(v=out_phone_strings; v; v=val_cdr(v)) {
        ph = val_readlist_string(val_string(val_car(v)));
        for(vx=ph; vx; vx=val_cdr(vx)) {
            out_phones = cons_val(val_car(vx), out_phones);
        }
        delete_val(ph);
    }
    delete_val(in_ords);
    delete_val(out_phone_strings);
    return val_reverse(out_phones);

}

static cst_val *cmu_indic_lex_kannada_spelling_postfixes(cst_val *in_phones)
{
    const cst_val *p;
    
    p=in_phones;

    for ( ; p && val_cdr(p) ; p=val_cdr(p))
    {
        /* Correct spell-errors ( e u:/aI ) => ( o/aI ) */
        if ((cst_streq(val_string(val_car(p)),"e")) &&
            ((cst_streq(val_string(val_car(val_cdr(p))),"u:")) ||
             (cst_streq(val_string(val_car(val_cdr(p))),"aI"))))
        {
                if (cst_streq(val_string(val_car(val_cdr(p))),"u:"))
                    replace_car(p,string_val("o"));
                else
                    replace_car(p,string_val("aI"));
                set_cdr((cst_val *)p, val_cdr(val_cdr(p)));
        }
    }
    return in_phones;
}

cst_val *cmu_indic_lex_nasal_postfixes(cst_val *in_phones) 
{
    /* Given a phone sequence containing a special character nX        */
    /* (contextual nasal), replace it with the appropriate nasal phone */
    /* based on its context                                            */
    char *tmpstr;
    const cst_val *p;

    /* printf("awb_debug: pre "); val_print(stdout,in_phones); printf("\n"); */
    for( p=in_phones; p && val_cdr(p); p=val_cdr(p)) 
    {
        /* Nazalise vowels at ends of words */
        if ((cmu_indic_is_vowel(val_string(val_car(p)))) &&
            (cst_streq("nX", val_string(val_car(val_cdr(p))))) &&
            ((!val_cdr(val_cdr(p))) || 
             (!val_car(val_cdr(val_cdr(p)))))) 
        {
            if (cst_streq("A", val_string(val_car(p)))) 
            {   /* If it's a schwa, it's not nasalized. nX becomes m */
                replace_car(val_cdr(p),string_val("m"));
            } else {
                tmpstr = cst_strcat(val_string(val_car(p)),"nas");
                replace_car(p,string_val(tmpstr));
                cst_free(tmpstr);
                replace_cdr(p,val_cdr(val_cdr(p)));
            }
        } else if (cst_streq("nX", val_string(val_car(p))))
        {   /* Choose nasal variant based on next consonant */
            const char *next_c = 
                val_string(phone_feature(&cmu_indic_phoneset,
                                         val_string(val_car(val_cdr(p))),
                                         "cplace"));
            const char *repl_ph;
            if (next_c) {
                switch (next_c[0]) {
                case 'v': repl_ph = "N"; break;
                case 'p': repl_ph = "n~"; break;
                case 'a': repl_ph = "nr"; break;
                case 'd': repl_ph = "nB"; break;
                case 'l': repl_ph = "m"; break;
                default: repl_ph = "nB";
                };
                replace_car(p,string_val(repl_ph));
            }
        }
    }

    /* printf("awb_debug: post "); val_print(stdout,in_phones); printf("\n");*/
    return in_phones;
}

static cst_val *cmu_indic_lex_jnyan_replacement(cst_val *in_phones,
                                                const cst_features *feats)
{
    /* Changes instances of ( J n~ ) to ( g n~ ) or ( g j ) depending on the language*/
    const cst_val *p;
   
    const char *indic_variant = 0;
   
    indic_variant = get_param_string(feats, "variant", "none");
   
    for (p=in_phones; p && val_cdr(p); p=val_cdr(p))
    {
        if ((cst_streq(val_string(val_car(p)),"J")) &&
            (cst_streq(val_string(val_car(val_cdr(p))),"n~")))
        {   /* Change "J" to "g" */
            replace_car(p,string_val("g"));
            if (cst_streq(indic_variant,"hin"))
                /*Only Hindi pronounces this digraph as ( g j ), AFAIK -shyam*/
                replace_car(val_cdr(p),string_val("j"));
            p = val_cdr(p); /* Skip over them */
        }
    }
    return in_phones;
}

static cst_val *cmu_indic_lex_punjabi_vowel_postfixes(cst_val *in_phones) 
{
    const cst_val *p;
    
    p=in_phones;
    
    /* Provide better approximates for 3rd person singular pronouns */
    /* Check for orthographic variant of ihn/uhn, written inh/unh */
    /* Equivalent to the punjabi_pronoun_postfixes function */
    if (p && val_cdr(p) && val_cdr(val_cdr(p)) &&        
        (cst_streq(val_string(val_car(val_cdr(p))),"nB")) &&
        (cst_streq(val_string(val_car(val_cdr(val_cdr(p)))),"hv")) &&
        ((cst_streq(val_string(val_car(p)),"i")) ||
         (cst_streq(val_string(val_car(p)),"u"))))
    {
        if (cst_streq(val_string(val_car(p)),"i"))
            replace_car(p,string_val("e"));
        else if (cst_streq(val_string(val_car(p)),"u"))
            replace_car(p,string_val("o"));
        p = val_cdr(p);
        replace_car(p,string_val("hv"));
        set_cdr((cst_val *)p,cons_val(string_val("nB"),val_cdr(val_cdr(p))));
        return in_phones;
    }

    for ( ; p && val_cdr(p); p=val_cdr(p))
    {
        /* Change sequences ( A hv i/u ) => ( aI/aU hv ) */
        if ((cst_streq(val_string(val_car(p)),"A")) &&
            (cst_streq(val_string(val_car(val_cdr(p))),"hv")) &&
            val_cdr(val_cdr(p)) && (val_cdr(val_cdr(val_cdr(p)))) &&
            ((cst_streq(val_string(val_car(val_cdr(val_cdr(p)))),"i")) ||
             (cst_streq(val_string(val_car(val_cdr(val_cdr(p)))),"u"))))
        {
            if (cst_streq(val_string(val_car(val_cdr(val_cdr(p)))),"i"))
                replace_car(p,string_val("aI"));
            else if (cst_streq(val_string(val_car(val_cdr(val_cdr(p)))),"u"))
                replace_car(p,string_val("aU"));
            set_cdr((cst_val *)p,cons_val(string_val("hv"),val_cdr(val_cdr(val_cdr(p)))));
            p = val_cdr(val_cdr(p)); /* Skip over them */
        }
        
        /* Change sequences ( i/u hv ) => ( e/o hv ) */
        else if (((cst_streq(val_string(val_car(p)),"i")) ||
                  (cst_streq(val_string(val_car(p)),"u"))) &&
                 val_cdr(val_cdr(p)) &&
                 (cst_streq(val_string(val_car(val_cdr(p))),"hv")))
        {
            if (cst_streq(val_string(val_car(p)),"i"))
                replace_car(p,string_val("e"));
            else if (cst_streq(val_string(val_car(p)),"u"))
                replace_car(p,string_val("o"));
            p = val_cdr(p); /* Skip over them */
        }
        
        /* Change sequences ( A: u/A ) => ( aU/A: ) */
        else if ((cst_streq(val_string(val_car(p)),"A:")) &&
                 val_cdr(val_cdr(p)) &&
                 ((cst_streq(val_string(val_car(val_cdr(p))),"u")) ||
                  (cst_streq(val_string(val_car(val_cdr(p))),"A")))
                 )
        {
            if (cst_streq(val_string(val_car(val_cdr(p))),"u"))
                replace_car(p,string_val("aU"));
            set_cdr((cst_val *)p,val_cdr(val_cdr(p)));
            p = val_cdr(p); /* Skip over them */
        }
    }
    return in_phones;
}


static cst_val *cmu_indic_lex_punjabi_glide_postfixes(cst_val *in_phones) 
{
    const cst_val *p;
    
    /* Inserts glides/semivowels corresponding to i/u */
    for (p=in_phones; p && val_cdr(p); p=val_cdr(p)) 
    {
        if (cmu_indic_is_vowel(val_string(val_car(val_cdr(p)))) &&
            ((cst_streq(val_string(val_car(p)),"i")) ||
             (cst_streq(val_string(val_car(p)),"u")) ||
             (cst_streq(val_string(val_car(p)),"i:")) ||
             (cst_streq(val_string(val_car(p)),"u:"))))
        {
            /* Change sequences ( i/u V ) => ( j/v V ) */
            if (cst_streq(val_string(val_car(p)),"i"))
                replace_car(p,string_val("j"));
            else if (cst_streq(val_string(val_car(p)),"u"))
                replace_car(p,string_val("v"));
            
            /* Change sequences ( i:/u: V ) => ( i/u j/v V ) */
            else if (cst_streq(val_string(val_car(p)),"i:"))
            {
                replace_car(p,string_val("i"));
                set_cdr((cst_val *)p,cons_val(string_val("j"),val_cdr(p)));
            }
            else if (cst_streq(val_string(val_car(p)),"u:"))
            {
                replace_car(p,string_val("u"));
                set_cdr((cst_val *)p,cons_val(string_val("v"),val_cdr(p)));
            }
            p = val_cdr(p); /* Skip over them */
        }
        
        /* Change sequences ( V i ) => ( V j ) */
        else  if (cst_streq(val_string(val_car(val_cdr(p))),"i") &&
                  cmu_indic_is_vowel(val_string(val_car(p))))
        {
            p = val_cdr(p);
            replace_car(p,string_val("j"));
        }
        
        /* Change sequence ( V i: V ) => ( V j j V ) */
        else  if (cst_streq(val_string(val_car(val_cdr(p))),"i:") &&
                  cmu_indic_is_vowel(val_string(val_car(p))) &&
                  (val_cdr(val_cdr(p))) &&
                  cmu_indic_is_vowel(val_string(val_car(val_cdr(val_cdr(p))))))
        {
            p = val_cdr(p);
            replace_car(p,string_val("j"));
            set_cdr((cst_val *)p,cons_val(string_val("j"),val_cdr(p)));
            p = val_cdr(p); /* Skip over them */
        }
    }
    return in_phones;
}

static cst_val *cmu_indic_lex_tamil_tr_replacement(cst_val *in_phones) 
{
    /* Changes instances of ( rr rr ) to ( tr tr rr ) */
    const cst_val *p;

    for (p=in_phones; p && val_cdr(p); p=val_cdr(p)) 
    {
        if ((cst_streq(val_string(val_car(p)),"rr")) &&
            (cst_streq(val_string(val_car(val_cdr(p))),"rr")))
        {   /* Change the first one to tr tr */
            replace_car(p,string_val("tr"));
            set_cdr((cst_val *)p,cons_val(string_val("tr"),val_cdr(p)));
            p = val_cdr(val_cdr(p)); /* Skip over them */
        }
    }
    return in_phones;
}

static cst_val *cmu_indic_lex_tamil_nr_replacement(cst_val *in_phones) 
{
    /* Changes instances of ( n rr ) to ( nr dr rr) */
    const cst_val *p;

    for (p=in_phones; p && val_cdr(p); p=val_cdr(p)) 
    {
        if ((cst_streq(val_string(val_car(p)),"n")) &&
            (cst_streq(val_string(val_car(val_cdr(p))),"rr")))
        {   /* Insert dB between them */
            replace_car(p,string_val("nr"));
            set_cdr((cst_val *)p,cons_val(string_val("dr"),val_cdr(p)));
            p = val_cdr(val_cdr(p)); /* Skip over them */
        }
    }
    return in_phones;
}

static cst_val *cmu_indic_lex_tamil_final_u(cst_val *in_phones) 
{
    /* Changes instances of final u to uy */
    const cst_val *p;

    for (p=in_phones; p;  p=val_cdr(p)) 
    {
        if ((val_cdr(p) == NULL) &&
            (cst_streq(val_string(val_car(p)),"u")))
        {   /* change u to uy */
            replace_car(p,string_val("uy"));
        }
    }
    return in_phones;
}


cst_val *cmu_indic_lex_tamil_voicing_postfixes(cst_val *phones)
{
    /* Destructively modify voicing in list of phones */
    const cst_val *p;
    const char *next_phone, *this_phone;
    const char *voice_ph, *len_ph;

    p = phones;
    /* word-initial c becomes s */
    if (p && (cst_streq(val_string(val_car(p)),"c")) &&
        val_cdr(p) && !cst_streq(val_string(val_car(val_cdr(p))),"c"))
    {
        replace_car(p,string_val("s"));
        p = val_cdr(p);
    }

    for( ; p && val_cdr(p); p=val_cdr(p))
    {
        this_phone = val_string(val_car(p));
        next_phone = val_string(val_car(val_cdr(p)));
        /* Next phone is a stop that could be mapped. */
        if ((cst_streq(next_phone,"k")) ||
            (cst_streq(next_phone,"c")) ||
            (cst_streq(next_phone,"tr")) ||
            (cst_streq(next_phone,"tB")) ||
            (cst_streq(next_phone,"p")))
        {
            if (cst_streq(next_phone,"k")) { voice_ph = "g"; len_ph = "G";
            }
            else if (cst_streq(next_phone,"c")) { voice_ph = "J"; len_ph =
                                                                      "s"; }
            else if (cst_streq(next_phone,"tr")) { voice_ph = "dr"; len_ph
                                                                        = "rrh"; }
            else if (cst_streq(next_phone,"tB")) { voice_ph = "dB"; len_ph
                                                                        = "dh"; }
            else if (cst_streq(next_phone,"p")) { voice_ph = "b"; len_ph =
                                                                      "B"; }
            else { voice_ph = next_phone; len_ph = next_phone; }

            /* If current phone is a nasal/voiced stop, add voicing. */
            if ((!cmu_indic_is_vowel(this_phone)) &&

                (cst_streq(val_string(phone_feature(&cmu_indic_phoneset,

                                                    this_phone,"ctype")),"n")))
            {
                replace_car(val_cdr(p),string_val(voice_ph));
                p=val_cdr(p); /* skip */
            }
            /* If current phone is a vowel/approximant and next.next is
               also a vowel
               then stop undergoes lenition  */
            else if ((cmu_indic_is_vowel(this_phone)) ||

                     (cst_streq(val_string(phone_feature(&cmu_indic_phoneset,

                                                         this_phone,"ctype")),"r")))
            {
                if ((val_cdr(val_cdr(p))) &&

                    (cmu_indic_is_vowel(val_string(val_car(val_cdr(val_cdr(p)))))))
                {
                    replace_car(val_cdr(p),string_val(len_ph));
                    p=val_cdr(p); /* skip */
                }
            }
            /* If current is vowel, but this is last syllable,
               then leave voicing as it is. */
            else if ((cmu_indic_is_vowel(this_phone)) &&
                     (!val_cdr(val_cdr(p))))
            {
                continue;
            }
        }
    }

    return phones;
}

/* For English derived pronunciation (latin scripted tokens) we map them */
/* to (hindi) phones -- this has to modified for other indic languages */
static const char * const eng_to_indic_orig[99][3] =
    {
        {"aa", "A:", NULL },
        {"ae", "A", NULL },  /* changed this to A rather than e */
        {"ah", "A", NULL },
        {"ao", "o", NULL },
        {"aw", "aU", NULL },
        {"ax", "A", NULL },
        {"axr", "A", NULL },
        {"ay", "aI", NULL },
        {"b", "b", NULL },
        {"ch", "c", NULL },
        {"d", "dB", NULL },
        {"dh", "dB", NULL },
        {"eh", "e", NULL },
        {"er", "9r", NULL },
        {"ey", "ay", NULL },
        {"f", "ph", NULL },
        {"g", "g", NULL },
        {"hh", "hv", NULL },
        {"ih", "i", NULL },
        {"iy", "i:", NULL },
        {"jh", "J", NULL },
        {"k", "k", NULL },
        {"l", "l", NULL },
        {"m", "m", NULL },
        {"n", "nB", NULL },
        {"nx", "nB", NULL },
        {"ng", "nB", NULL },
        {"ow", "o", NULL },
        {"oy", "o", "j" },
        {"p", "p", NULL },
        {"r", "9r", NULL },
        {"s", "s", NULL },
        {"sh", "c}", NULL },
        {"t", "tr", NULL },
        {"th", "tBh", NULL },
        {"uh", "u", NULL },
        {"uw", "u:", NULL },
        {"v", "v", NULL },
        {"w", "v", NULL },
        {"y", "j", NULL },
        {"z", "s", NULL },
        {"zh", "c}", NULL },
        {NULL, NULL, NULL }
    };


/* For English derived pronunciation (latin scripted tokens) we map them */
/* to (hindi) phones -- this has to modified for other indic languages */
/* Sai Krishna */
/* 07 July 2017 */
/* Making this v1 as Shyam's mapping looks a bit different */

static const char * const eng_to_indic_v1[99][3] =
    {
        {"aa", "aa", NULL },
        {"ae", "ae", NULL },  /* changed this to A rather than e */
        {"ah", "ah", NULL },
        {"ao", "ao", NULL },
        {"aw", "aw", NULL },
        {"ax", "ax", NULL },
        {"axr", "axr", NULL },
        {"ay", "ay", NULL },
        {"b", "b", NULL },
        {"ch", "ch", NULL },
        {"d", "d", NULL },
        {"dh", "dh", NULL },
        {"eh", "eh", NULL },
        {"er", "er", NULL },
        {"ey", "ey", NULL },
        {"f", "f", NULL },
        {"g", "g", NULL },
        {"hh", "hh", NULL },
        {"ih", "ih", NULL },
        {"iy", "iy", NULL },
        {"jh", "jh", NULL },
        {"k", "k", NULL },
        {"l", "l", NULL },
        {"m", "m", NULL },
        {"n", "n", NULL },
        {"nx", "n", NULL },
        {"ng", "n", NULL },
        {"ow", "ow", NULL },
        {"oy", "oy", "j" },
        {"p", "p", NULL },
        {"r", "r", NULL },
        {"s", "s", NULL },
        {"sh", "sh", NULL },
        {"t", "t", NULL },
        {"th", "th", NULL },
        {"uh", "uh", NULL },
        {"uw", "uw", NULL },
        {"v", "v", NULL },
        {"w", "w", NULL },
        {"y", "y", NULL },
        {"z", "z", NULL },
        {"zh", "zh", NULL },
        {NULL, NULL, NULL }
    };


static const char * const eng_to_indic[99][3] =
    {
        {"aa", "A:", NULL },
        {"ae", "aI", NULL },  
        {"ah", "A", NULL },
        {"ao", "aU", NULL },
        {"aw", "A:", "u" },
        {"ax", "A", NULL },
        {"axr", "A", "9r" },
        {"ay", "A:", "i" },
        {"b", "b", NULL },
        {"ch", "c", NULL },
        {"d", "dr", NULL },
        {"dh", "dB", NULL },
        {"eh", "E", NULL },
        {"er", "A", "9r" },
        {"ey", "e", NULL },
        {"f", "ph", NULL },
        {"g", "g", NULL },
        {"hh", "hv", NULL },
        {"ih", "i", NULL },
        {"iy", "i:", NULL },
        {"jh", "J", NULL },
        {"k", "k", NULL },
        {"l", "l", NULL },
        {"m", "m", NULL },
        {"n", "nB", NULL },
        {"nx", "nB", NULL },
        {"ng", "N", NULL },
        {"ow", "o", NULL },
        {"oy", "aU", "i" },
        {"p", "p", NULL },
        {"r", "9r", NULL },
        {"s", "s", NULL },
        {"sh", "c}", NULL },
        {"t", "tr", NULL },
        {"th", "tBh", NULL },
        {"uh", "u", NULL },
        {"uw", "u:", NULL },
        {"v", "v", NULL },
        {"w", "v", NULL },
        {"y", "j", NULL },
        {"z", "z", NULL },
        {"zh", "c}", NULL },
        {NULL, NULL, NULL }
    };
    

cst_val *map_english_to_indic_phones(const char *indic_variant,
                                     const cst_val *english_phones)
{
    /* Map English (radio) phones to their Indic equivalent */
    cst_val *ip = NULL;
    const cst_val *v;
    char *english_phone;
    int i;

    for (v=english_phones; v; v=val_cdr(v))
    {
        english_phone = cst_strdup(val_string(val_car(v)));
        if ((english_phone[cst_strlen(english_phone)-1] == '0') ||
            (english_phone[cst_strlen(english_phone)-1] == '1'))
            /* It has a stress value on it */
            english_phone[cst_strlen(english_phone)-1] = '\0';
        /* *** mapping table should be indic variant specific */
        for (i=0; eng_to_indic[i][0]; i++)
        {
            if (cst_streq(english_phone,eng_to_indic[i][0]))
            {
                ip = cons_val(string_val(eng_to_indic[i][1]),ip);
                if (eng_to_indic[i][2])
                    ip = cons_val(string_val(eng_to_indic[i][2]),ip);
            }
            /* if there is no mapping, we drop the phone */
        }
        cst_free(english_phone);
    }
    ip = val_reverse(ip);

    return ip;
}

static cst_val *delete_medial_schwa(cst_val *rphones)
{
    /* This schwa deletion follows the technique by Narsimhan et al (2001). */
    /* 1. Process input from right to left                                  */
    /* 2. If a schwa is found in a VC_CV context, then delete it.           */

    /* There are exceptions to this: (i) Phonotactic constraints of         */
    /* Hindi not violated, and no (ii) morpheme boundary present on the     */
    /* left. But I don't know how to handle these yet. So this will be      */
    /* wrong more often than the 11% reported in that paper. -- AUP         */
    const cst_val *p;
    cst_val *tbd;

    for (p=rphones; p && val_cdr(p); p=val_cdr(p)) 
    {
        if ((val_length(p) > 4) &&
             (cst_streq(val_string(val_car(val_cdr(val_cdr(p)))),"A")))
        {   /* we have a schwa coming up */
            if ((cmu_indic_is_vowel(val_string(val_car(p)))) &&
                (!cmu_indic_is_vowel(val_string(val_car(val_cdr(p))))) &&
                (!cmu_indic_is_vowel(val_string(val_car(val_cdr(val_cdr(val_cdr(p))))))) &&
                (cmu_indic_is_vowel(val_string(val_car(val_cdr(val_cdr(val_cdr(val_cdr(p)))))))))
            {
                /* delete the schwa */
                tbd = (cst_val *)val_cdr(val_cdr(p));
                set_cdr((cst_val *)val_cdr(p),val_cdr(val_cdr(val_cdr(p))));
                set_cdr(tbd,NULL);
                delete_val(tbd);
            }
        }
    }

    return rphones;
}

static cst_val *cmu_indic_hindi_schwa_fixes(cst_val *phones)
{
    cst_val *dd;

    if ((val_length(phones) > 3) &&
        (cst_streq("A",val_string(val_car(val_cdr(phones))))) &&
        (cst_streq("hv",val_string(val_car(val_cdr(val_cdr(phones)))))) &&
        ((cst_streq("A",val_string(val_car(val_cdr(val_cdr(val_cdr(phones))))))) ||
         (!cmu_indic_is_vowel(val_string(val_car(val_cdr(val_cdr(val_cdr(phones)))))))))
    {
        dd = (cst_val *)(void *)val_cdr(phones);
        set_cdr(phones,val_cdr(val_cdr(phones)));
        set_cdr(dd,NULL);
        delete_val(dd);
        return phones;
    }
    else
        return phones;
}

cst_val *cmu_indic_lex_lts_function(const struct lexicon_struct *l,
                                    const char *word, const char *pos,
                                    const cst_features *feats)
{
    cst_val *utflets = 0;
    cst_val *ords = 0;
    cst_val *english_phones;
    cst_val *base_phones = NULL;
    const char *indic_variant = 0;
    const char *eng_bilingual_flag = 0;
    int cmu_indic_variant_deletes_word_final_schwa=0;
    const cst_val *v;
    cst_val *tmpv;

    indic_variant = get_param_string(feats, "variant", "none");

    if (cst_streq(indic_variant, "hin")) {
      cmu_indic_variant_deletes_word_final_schwa = 1;
    } else if (cst_streq(indic_variant, "mar")) {
      cmu_indic_variant_deletes_word_final_schwa = 1;
    } else if (cst_streq(indic_variant, "ben")) {
      cmu_indic_variant_deletes_word_final_schwa = 1;
    } else if (cst_streq(indic_variant, "raj")) {
      cmu_indic_variant_deletes_word_final_schwa = 1;
    } else if (cst_streq(indic_variant, "guj")) {
      cmu_indic_variant_deletes_word_final_schwa = 1;
    } else if (cst_streq(indic_variant, "asm")) {
      cmu_indic_variant_deletes_word_final_schwa = 1;
    } else if (cst_streq(indic_variant, "pan")) {
      cmu_indic_variant_deletes_word_final_schwa = 1;
    } else if (cst_streq(indic_variant, "tel")) {
      cmu_indic_variant_deletes_word_final_schwa = 0;
    } else if (cst_streq(indic_variant, "tam")) {
      cmu_indic_variant_deletes_word_final_schwa = 0;
    } else if (cst_streq(indic_variant, "kan")) {
      cmu_indic_variant_deletes_word_final_schwa = 0;
    } else {
      cmu_indic_variant_deletes_word_final_schwa = 0;
      printf("Unknown indic variant: %s\n", indic_variant);
    }

    if (cst_regex_match(cst_rx_not_indic,word))
    {  /* It's English like so us English Lexicon to find Phones and map
          to indic phones */
        /* printf("awb_debug cmu_indic_lex: English >%s<\n",word); */
        english_phones = lex_lookup(&cmu_lex,word,pos,feats);
        
        eng_bilingual_flag = get_param_string(feats, "eng_shared", "0");
        
        if (cst_streq(eng_bilingual_flag, "1"))
        {
            base_phones = english_phones;
        }
        else
        {
            base_phones =
                map_english_to_indic_phones(indic_variant,english_phones);
            delete_val(english_phones);
        }

        return base_phones;
    }
    else
    {
        /* string to utf8 chars */
        utflets = cst_utf8_explode(word);
        /* chars to ord */
        for (v=utflets; v; v=val_cdr(v)) {
            tmpv = cst_utf8_ord(val_car(v));
            ords = cons_val(tmpv, ords);
        }
        ords = val_reverse(ords);

        /* Ords to Phone List (Map) (with final schwa deletion) */
        base_phones = 
            val_reverse(cmu_indic_lex_ord_to_phones(ords,
                             cmu_indic_variant_deletes_word_final_schwa,
                                                    feats));

        delete_val(utflets);
        delete_val(ords);
    }

    if (cst_streq(indic_variant,"tam")){
       printf("Tamil doesnt have anuswara");
    } 
    else
    cmu_indic_lex_nasal_postfixes(base_phones);
    base_phones = cmu_indic_lex_jnyan_replacement(base_phones,feats);

    /* Postfix Indic Nasals, Voicing, Medial Schwa deletion */
    if (cst_streq(indic_variant,"tam")) 
    {
        /* Tamil voicing rules */
        base_phones = cmu_indic_lex_tamil_voicing_postfixes(base_phones);
        /* Tamil tr replacement */
        base_phones = cmu_indic_lex_tamil_tr_replacement(base_phones);
        /* Tamil nr replacement */
        base_phones = cmu_indic_lex_tamil_nr_replacement(base_phones);
        /* final u -> uy */
        base_phones = cmu_indic_lex_tamil_final_u(base_phones);
    }
    
    if (cst_streq(indic_variant,"pan")) 
    {
        /* Punjabi vowel and pronoun rules */
        base_phones = cmu_indic_lex_punjabi_vowel_postfixes(base_phones);
        /* Punjabi glide rules */
        base_phones = cmu_indic_lex_punjabi_glide_postfixes(base_phones);
    }

    if (cst_streq(indic_variant,"kan"))
      cmu_indic_lex_kannada_spelling_postfixes(base_phones); 
    
 
    if ((cst_streq(indic_variant,"hin")) || (cst_streq(indic_variant,"mar")) ||
        (cst_streq(indic_variant,"guj")) || (cst_streq(indic_variant,"raj")) || 
        (cst_streq(indic_variant,"pan")))
    {   /* Do medial schwa deletion */
        base_phones=val_reverse(delete_medial_schwa(val_reverse(base_phones)));
        base_phones = cmu_indic_hindi_schwa_fixes(base_phones);
    }
    /* phone list to syls (done as post lexical rules) */

#if 0
    printf("cmu_indic_lex.c: indic word \"%s\" \n",word);
    val_print(stdout, base_phones);
    printf("\n");
#endif

    return base_phones;
}

static int cmu_indic_contains_vowel(const cst_val *r)
{
    const cst_val *x;

    for (x=r; x; x=val_cdr(x))
    {
        if (cmu_indic_is_vowel(val_string(val_car(x))))
            return TRUE;
    }

    return FALSE;
}

static int cmu_indic_has_vowel_in_syl(const cst_item *i)
{
    const cst_item *n;

    for (n=i; n; n=item_prev(n))
	if (cmu_indic_is_vowel(ffeature_string(n,"name")))
	    return TRUE;
    return FALSE;
}

int cmu_indic_syl_boundary(const cst_item *i,const cst_val *rest)
{
    if (!rest)
        return TRUE;
    else if (!cmu_indic_contains_vowel(rest))
        return FALSE;
    else if (!cmu_indic_has_vowel_in_syl(i))
        return FALSE;
    else if (rest && val_cdr(rest) &&
             cst_streq("n",val_string(val_car(rest))) &&
             !cmu_indic_is_vowel(val_string(val_car(rest))))
        return FALSE;
    else if (rest && val_cdr(rest) &&
             cmu_indic_is_vowel(ffeature_string(i,"name")) &&
             !cmu_indic_is_vowel(val_string(val_car(rest))) &&
             !cmu_indic_is_vowel(val_string(val_car(val_cdr(rest)))))
        return FALSE;
    else if (rest && val_cdr(rest) && val_cdr(val_cdr(rest)) &&
             !cmu_indic_is_vowel(val_string(val_car(rest))) &&
             !cmu_indic_is_vowel(val_string(val_car(val_cdr(rest)))) &&
             !cmu_indic_is_vowel(val_string(val_car(val_cdr(val_cdr(rest))))))
        return FALSE;
    else if (rest && val_cdr(rest) &&
             (cst_streq(val_string(val_car(rest)),
                        val_string(val_car(val_cdr(rest))))))
        return FALSE;
    else
        return TRUE;
}

#if 0
int cmu_indic_syl_boundary(const cst_item *i,const cst_val *rest)
{
    /* For debugging the syl boundary code */
    int x;

    printf("syl boundary %s | ",ffeature_string(i,"name"));
    if (rest)
        printf("%s ",val_string(val_car(rest)));
    if (rest && val_cdr(rest))
        printf("%s ",val_string(val_car(val_cdr(rest))));
    if (rest && val_cdr(rest) && val_cdr(val_cdr(rest)))
        printf("%s ",val_string(val_car(val_cdr(val_cdr(rest)))));

    x = cmu_indic_syl_boundary_x(i,rest);
    printf("is %d",x);

    printf("\n");

    return x;
}
#endif

cst_utterance *cmu_indic_assign_lex_stress(cst_utterance *u)
{
    /* Assign stress to a lexical entry: from indic_lexicon.scm (aup) */
    /* The stress is placed on the syllable with the highest weight.  */
    /* If there is a tie, the last-most syllable with highest weight  */
    /* is chosen.  However, the last syllable of the word does not    */
    /* participate in tie-breaking. That is, it is stressed only when */
    /* there are no ties. (Hussein 1997)                              */
    /* not sure this code actually follows the above rules, but its   */
    /* the same as the festival code -- awb 20140606                  */
    const cst_item *syl, *w;
    const char *x1, *x2, *x3;
    int syl_weight, best_weight, sw;
    const cst_item *stress_position;

    for (w=relation_head(utt_relation(u,"SylStructure")); w; w=item_next(w))
    {
        for (syl=item_daughter(w); syl; syl=item_next(syl))
        {   /* Assign weight to each syllable */
            syl_weight = 0;
            x1 = ffeature_string(syl,"R:SylStructure.daughtern.name");
            x2 = ffeature_string(syl,"R:SylStructure.daughtern.p.name");
            x3 = ffeature_string(syl,"R:SylStructure.daughtern.p.p.name");
            if (cmu_indic_is_vowel(x1))
            {   /* If syllable is open vowel */
                if (cst_streq(x1,"A") || cst_streq(x1,"i") ||
                    cst_streq(x1,"u"))
                    syl_weight = 1;
                else
                    syl_weight = 2;
            }
            else
            {
                if (cmu_indic_is_vowel(x2))
                {
                    if (cst_streq(x2,"A") || cst_streq(x2,"i") ||
                        cst_streq(x2,"u"))
                        syl_weight = 2;
                    else
                        syl_weight = 3;
                }
                else if (cmu_indic_is_vowel(x3))
                    syl_weight = 3;
            }
            item_set_int(syl,"syl_weight",syl_weight);
        }
    }

    for (w=relation_head(utt_relation(u,"SylStructure")); w; w=item_next(w))
    {
        best_weight = 0;
        stress_position = NULL;
        for (syl=item_daughter(w); syl; syl=item_next(syl))
        {
            sw = ffeature_int(syl,"syl_weight");
            if (sw > best_weight)
            {
                best_weight = sw;
                stress_position = syl;
            }
            else if ((sw == best_weight) && item_next(syl))
                stress_position = syl;
        }
        if (stress_position)
            item_set_string(stress_position,"stress","1");
    }

    return u;
}

cst_utterance *cmu_indic_postlex(cst_utterance *u)
{
    /* Post lexical rules */
    const char *indic_variant;

    indic_variant = get_param_string(u->features, "variant", "none");

    if ((cst_streq(indic_variant,"hin")) ||
        (cst_streq(indic_variant,"mar")) ||
        (cst_streq(indic_variant,"pan")) ||
        (cst_streq(indic_variant,"raj")) ||
        (cst_streq(indic_variant,"asm")) ||
        (cst_streq(indic_variant,"ben")))
        cmu_indic_assign_lex_stress(u);

#if 0
    /* Print out words as 'festival' lexical entries */
    const cst_item *word, *syl, *seg;

    for (word=relation_head(utt_relation(u,"Word"));
	 word; word=item_next(word))
    {
        printf("( \"%s\" nil (",ffeature_string(word,"name"));
        for (syl=item_daughter(item_as(word,"SylStructure"));
             syl;syl=item_next(syl))
        {
            printf("(( ");
            for (seg=item_daughter(item_as(syl,"SylStructure")); seg;
                 seg=item_next(seg))
            {
                printf("%s ",ffeature_string(seg,"name"));
            }
            printf(") %s %d) ",
                   ffeature_string(syl,"stress"),
                   ffeature_int(syl,"syl_weight"));

        }
        printf("))\n");
    }
#endif

    return u;
}

cst_lexicon cmu_indic_lex;

cst_lexicon *cmu_indic_lex_init(void)
{
    /* Should it be global const or dynamic */
    /* Can make lts_rules just a cart tree like others */
    cst_lexicon *l;

    if (cmu_indic_lex.lts_function)
        return &cmu_indic_lex;
    l = &cmu_indic_lex;
    l->name = "cmu_indic_lex";

    l->lts_function = cmu_indic_lex_lts_function;
    l->syl_boundary = cmu_indic_syl_boundary;
    l->postlex = cmu_indic_postlex;
    return l;

}