ref: b66315e2c408ce553082f722b0a9587a5ab5460e
dir: /tools/make_lts.scm/
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; ;;; Language Technologies Institute ;;; ;;; Carnegie Mellon University ;;; ;;; Copyright (c) 1999 ;;; ;;; All Rights Reserved. ;;; ;;; ;;; ;;; Permission is hereby granted, free of charge, to use and distribute ;;; ;;; this software and its documentation without restriction, including ;;; ;;; without limitation the rights to use, copy, modify, merge, publish, ;;; ;;; distribute, sublicense, and/or sell copies of this work, and to ;;; ;;; permit persons to whom this work is furnished to do so, subject to ;;; ;;; the following conditions: ;;; ;;; 1. The code must retain the above copyright notice, this list of ;;; ;;; conditions and the following disclaimer. ;;; ;;; 2. Any modifications must be clearly marked as such. ;;; ;;; 3. Original authors' names are not deleted. ;;; ;;; 4. The authors' names are not used to endorse or promote products ;;; ;;; derived from this software without specific prior written ;;; ;;; permission. ;;; ;;; ;;; ;;; CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK ;;; ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;; ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;; ;;; SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE ;;; ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;; ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;; ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;; ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;; ;;; THIS SOFTWARE. ;;; ;;; ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Author: Alan W Black (awb@cs.cmu.edu) ;;; ;;; Date: December 1999 ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; ;;; Generate a C compilable lts rules. ;;; ;;; ;;; ;;; Two modes, from decision graphs as wfsts or from CART trees ;;; ;;; ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; These are preordained by the LTS building process (set! lts_context_window_size 4) (set! lts_context_extra_feats 1) (define (ltsregextoC name trees idir odir) "(ltsregextoC name idir odir) Converts its wfsts to a C compilation structure for flite. Assumes $idir/[a-z].tree.wfst to compile from." (let ((ofde (fopen (path-append odir (string-append name "_lts_rules.c")) "w")) (ofdm (fopen (path-append odir (string-append name "_lts_model.c")) "w")) (ofdh (fopen (path-append odir (string-append name "_lts_model.h")) "w")) (ifd) (rule_index nil)) (set! lts_pos 0) (set! phone_table (list "epsilon")) (set! letter_table (list "nothing" "#" "0")) (set! letter_table (append letter_table (mapcar car trees))) (format ofde "/*******************************************************/\n") (format ofde "/** Autogenerated lts rules (regex) for %s */\n" name) (format ofde "/** from %s */\n" idir) (format ofde "/*******************************************************/\n") (format ofde "\n") (format ofde "#include \"cst_string.h\"\n") (format ofde "#include \"cst_lts.h\"\n") (format ofde "#include \"cst_lexicon.h\"\n") (format ofdm "/*******************************************************/\n") (format ofdm "/** Autogenerated lts rules (regex) for %s */\n" name) (format ofdm "/** from %s */\n" idir) (format ofdm "/*******************************************************/\n") (format ofdm "\n") (format ofdm "#include \"cst_string.h\"\n") (format ofdm "#include \"cst_lts.h\"\n") (format ofdm "#include \"cst_lexicon.h\"\n") (format ofdm "#include \"%s_lts_model.h\"\n\n" name) (format ofdm "const cst_lts_model %s_lts_model[] = \n" name) (format ofdm "{\n") (set! ln 0) (mapcar (lambda (l) (let ((ifd (fopen (path-append idir (string-append l ".tree.wfst")) "r"))) (format t "doing: %s\n" l) (format ofdm " /** letter \"%s\" **/\n" l) (format ofdh " /** letter \"%s\" **/\n" l) (set! rule_index (cons (list l lts_pos ln) rule_index)) (set! lts_pos (dump_lts_wfst ln ifd ofdm ofdh lts_pos)) (fclose ifd) (set! ln (+ 1 ln)) )) (cdr (cddr letter_table)) ) (format ofdm " 0, 0, 0,0, 0,0\n") (format ofdm "};\n") ;; Make the letter table be the same order as the rule inde (+2) ;; The phone table (bytes to phone names) (format ofde "\n") (format ofde "const char * const %s_lts_phone_table[%d] = \n" name (+ 1 (length phone_table))) (format ofde "{\n") (mapcar (lambda (p) (format ofde " \"%s\",\n" p)) phone_table) (format ofde " NULL\n") (format ofde "};\n") ;; The letter_table (bytes to letter names) (format ofde "\n") (format ofde "const char * const %s_lts_letter_table[%d] = \n" name (+ 1 (length letter_table))) (format ofde "{\n") (mapcar (lambda (p) (format ofde " \"%s\",\n" p)) letter_table) (format ofde " NULL\n") (format ofde "};\n") ;; Which rule starts where (format ofde "\n") (format ofde "const cst_lts_addr %s_lts_letter_index[%d] = \n" name (+ 1 (length rule_index)) ) (format ofde "{\n") (mapcar (lambda (p) (format ofde " %d, /* %s */\n" (car (cdr p)) (car p))) (reverse rule_index)) (format ofde " 0\n") (format ofde "};\n") (format ofde "\n") ; (format ofde "const cst_lts_rules %s_lts_rules = {\n" name) ; (format ofde " \"%s\",\n" name) ; (format ofde " %s_lts_letter_index,\n" name) ; (format ofde " %s_lts_model,\n" name) ; (format ofde " %s_lts_phone_table,\n" name) ; (format ofde " 4, /* context_window_size */\n") ; (format ofde " 1, /* context_extra_feats */\n") ; (format ofde " %s_lts_letter_table\n" name) ; (format ofde "};\n") ; (format ofde "\n") (fclose ofde) (fclose ofdh) (fclose ofdm) )) (define (dump_lts_wfst ln ifd ofde ofdh lts_pos) "(dump_lts_wfst ifd ofde ofdh lts_pos) Dump the WFST as a byte table to ifd. Jumps are dumped as #define's to ofdh so forward references work. lts_pos is the rule position. Each state is saves as feature value true_addr false_addr Feature and value are single bytes, which addrs are double bytes." (let ((state)) ;; Skip WFST header (while (not (string-equal (set! state (readfp ifd)) "EST_Header_End")) (if (equal? state (eof-val)) (error "eof in lts regex file"))) (while (not (equal? (set! state (readfp ifd)) (eof-val))) (format ofdh "#define LTS_STATE_%d_%d %s\n" ln (car (car state)) (lts_bytify lts_pos)) (cond ((string-equal "final" (car (cdr (car state)))) (set! lts_pos (- lts_pos 1)) t) ;; do nothing ((string-matches (car (car (cdr state))) ".*_.*") (format ofde " %s, %d, %s , %s , \n" (lts_feat (car (car (cdr state)))) ; (lts_val (car (car (cdr state)))) (lts_let_num (lts_letter (car (car (cdr state)))) 0 letter_table) (format nil "LTS_STATE_%d_%d" ln (car (cdr (cdr (car (cdr (cdr state))))))) (format nil "LTS_STATE_%d_%d" ln (car (cdr (cdr (car (cdr state)))))))) (t ;; its a letter output state (format ofde " 255, %s, 0,0 , 0,0 , \n" (lts_phone (car (car (cdr state))) 0 phone_table)))) (set! lts_pos (+ 1 lts_pos))) lts_pos)) (define (lts_feat trans) "(lts_feat trans) Returns the feature number represented in this transition name." (let ((fname (substring trans 5 (- (length trans) 11)))) (if (string-matches fname ".*_i?") (set! fname (string-before fname "_"))) (cond ((string-equal fname "p.p.p.p.name") 0) ((string-equal fname "p.p.p.name") 1) ((string-equal fname "p.p.name") 2) ((string-equal fname "p.name") 3) ((string-equal fname "n.name") 4) ((string-equal fname "n.n.name") 5) ((string-equal fname "n.n.n.name") 6) ((string-equal fname "n.n.n.n.name") 7) (t (error (format nil "ltsregex2C: unknown feat %s %s\n" fname trans )))))) (define (lts_letter trans) "(lts_val trans) The letter being tested." (string-before (string-after trans "is_") "_")) (define (lts_val trans) "(lts_val trans) The letter being tested." (substring trans (- (length trans) 2) 1)) (define (lts_phone p n table) (cond ((string-equal p (car table)) n) ((not (cdr table)) ;; new p (set-cdr! table (list p)) (+ 1 n)) (t (lts_phone p (+ 1 n) (cdr table))))) (define (lts_let_num p n table) (cond ((null table) (format t "no entry %s %d\n" p n) (errrr)) ((string-equal p (car table)) n) (t (lts_let_num p (+ 1 n) (cdr table))))) (define (lts_bytify n) "(lts_bytify n) Return this short as a two byte comma separated string." (let ((xx (format nil "%04x" n))) ;; This is unfortunately byte order specific (format nil "0x%s,0x%s" (substring xx 2 2) (substring xx 0 2)))) (provide 'make_lts)