ref: 60929ada1a066c49396d48bf45cf347a50f8e363
dir: /src/to-kana.c/
/* ******************************************************************************* * Author: Ethan Long * Licence: Public Domain * Email: ethandavidlong@gmail.com, u7281759@anu.edu.au * Description: to-kana is a program for converting streams of romaji to * hiragana or katakana. */ #include <u.h> #include <libc.h> #include <stdio.h> void eval(int, int); void kanafill(Rune, char*, int, Rune*); void kanalook(char*, Rune*, int); void printhelp(void); int strappend(char*, char); void main(int argc, char *argv[]) { int fpin, fpout; ARGBEGIN{ case 's': fpin = fileno(stdin); fpout = fileno(stdout); eval(fpin, fpout); break; case 'h': printhelp(); exits(0); default: fprint(2, "usage: %s [-s][-h]\n", argv0); printhelp(); exits("usage"); }ARGEND; exits(nil); } void printhelp(void) { print("options:\n"); print(" [-s] - use stdin/out streams\n"); print(" [-h] - show this help\n"); return; } /* * Runs through the input at the file pointer fpin until it reaches EOF * outputs kana to file pointer fpout. */ void eval(int fpin, int fpout) { int reading = 1; int katakana = 0; char charin; char buf[10]; Rune kana[10]; while(reading){ if(read(fpin, &charin, 1)){ if(charin == '!') katakana = (0 == katakana); else if(strappend(buf, charin)){ kanalook(buf, kana, katakana); fprint(fpout, "%S", kana); for(int i=0; i<10; i++){ kana[i] = 0; buf[i] = 0; } } } else{ reading = 0; } } fprint(fpout, "%s", buf); return; } /* * Appends the provided character to the end of the provided string. * If this results in a complete kana being formable, return 1. */ int strappend(char* string, char in) { int end; int i; for(i=0; string[i] != 0; i++); end = (in == 'a' || in == 'i' || in == 'u' || in == 'e' || in == 'o'); if (in == 'n'){ if (string[i-1] == 'n') end = 1; } string[i] = in; return end; } /* * kanalook takes in a pointer to a string and a pointer to a string of Rune * and converts the string in romaji to a string of runes of kana. */ void kanalook(char* buf, Rune* str, int katakana) { int index; char* end; katakana = katakana * 96; index = 0; if(buf[0] == buf[1]){ str[index] = L'っ'; index ++; } end = buf + 1 + index; switch(buf[0]){ /* あ family */ case 'a': str[0] = (Rune)((int)(L'あ') + katakana); break; case 'i': str[0] = (Rune)((int)(L'い') + katakana); break; case 'u': str[0] = (Rune)((int)(L'う') + katakana); break; case 'e': str[0] = (Rune)((int)(L'え') + katakana); break; case 'o': str[0] = (Rune)((int)(L'お') + katakana); break; /* か family */ case 'k': kanafill((Rune)((int)(L'か') + katakana), end, 0, str + index); break; case 'g': kanafill((Rune)((int)(L'か') + katakana), end, 1, str + index); break; /* さ family */ case 's': if(buf[index + 1] == 'h'){ end[0] = buf[index + 2]; end[1] = 0; } kanafill((Rune)((int)(L'さ') + katakana), end, 0, str + index); break; case 'z': kanafill((Rune)((int)(L'さ') + katakana), end, 1, str + index); break; case 'j': if(buf[index + 1] != 'y' && buf[index + 1] != 'i'){ end[0] = 'y'; end[1] = buf[index + 1]; kanafill((Rune)((int)(L'さ') + katakana), end, 1, str + index); } else kanafill((Rune)((int)(L'さ') + katakana), end, 1, str + index); break; /* た family */ case 't': kanafill((Rune)((int)(L'た') + katakana), end, 0, str + index); break; case 'c': if(buf[index + 1] != 'y' && buf[index + 1] != 'i'){ end[0] = 'y'; end[1] = buf[index + 1]; kanafill((Rune)((int)(L'た') + katakana), end, 0, str + index); } else kanafill((Rune)((int)(L'た') + katakana), end, 0, str + index); break; case 'd': kanafill((Rune)((int)(L'た') + katakana), end, 1, str + index); break; /* な family (and ん) */ case 'n': if(buf[1] == 'n') str[0] = (Rune)((int)(L'ん') + katakana); else kanafill((Rune)((int)(L'な') + katakana), end, 0, str + index); break; /* は family */ case 'h': kanafill((Rune)((int)(L'は') + katakana), end, 0, str + index); break; case 'f': end[0] = 'f'; end[1] = buf[index + 1]; kanafill((Rune)((int)(L'は') + katakana), end, 0, str + index); break; case 'b': kanafill((Rune)((int)(L'は') + katakana), end, 1, str + index); break; case 'p': kanafill((Rune)((int)(L'は') + katakana), end, 2, str + index); break; /* ま family */ case 'm': kanafill((Rune)((int)(L'ま') + katakana), end, 0, str + index); break; /* や family */ case 'y': kanafill((Rune)((int)(L'や') + katakana), end, 0, str + index); break; /* ら family */ case 'r': kanafill((Rune)((int)(L'ら') + katakana), end, 0, str + index); break; /* わ family */ case 'w': kanafill((Rune)((int)(L'わ') + katakana), end, 0, str + index); break; default: break; } return; } /* * The kanafill function takes in the base kana of the family to be output, * the character representation of the ending syllabaries, an integer * representing the type of dakuten (1 for regular, 2 for maru), and a pointer * to a rune array that is being edited. */ void kanafill(Rune base, char* in, int dakuten, Rune* out) { switch(base){ /* Special Boys */ case L'や': case L'ヤ': switch(in[0]){ case 'a': out[0] = base; break; case 'u': out[0] = (Rune)((int)base + 2); break; case 'o': out[0] = (Rune)((int)base + 4); break; default: break; } break; case L'わ': case L'ワ': switch(in[0]){ case 'a': out[0] = base; break; case 'i': out[0] = (Rune)((int)base + 1); break; case 'e': out[0] = (Rune)((int)base + 2); break; case 'o': out[0] = (Rune)((int)base + 3); break; default: break; } break; case L'ん': case L'ン': out[0] = L'ん'; break; /* は family has marudakuten */ case L'は': case L'ハ': switch(in[0]){ case 'a': out[0] = (Rune)((int)base + dakuten); break; case 'i': out[0] = (Rune)((int)base + 3 + dakuten); break; case 'u': out[0] = (Rune)((int)base + 6 + dakuten); break; case 'e': out[0] = (Rune)((int)base + 9 + dakuten); break; case 'o': out[0] = (Rune)((int)base + 12 + dakuten); break; case 'y': out[0] = (Rune)((int)base + 3 + dakuten); switch(in[1]){ case 'a': out[1] = L'ゃ'; break; case 'u': out[1] = L'ゅ'; break; case 'o': out[1] = L'ょ'; break; default: break; } default: break; } break; /* families with no dakuten or small forms */ case L'ら': case L'ラ': case L'ま': case L'マ': case L'な': case L'ナ': switch(in[0]){ case 'a': out[0] = base; break; case 'i': out[0] = (Rune)((int)base + 1); break; case 'u': out[0] = (Rune)((int)base + 2); break; case 'e': out[0] = (Rune)((int)base + 3); break; case 'o': out[0] = (Rune)((int)base + 4); break; case 'y': out[0] = (Rune)((int)base + 1); switch(in[1]){ case 'a': out[1] = L'ゃ'; break; case 'u': out[1] = L'ゅ'; break; case 'o': out[1] = L'ょ'; break; default: break; } default: break; } break; /* families with just dakuten */ case L'か': case L'カ': case L'さ': case L'サ': switch(in[0]){ case 'a': out[0] = (Rune)((int)base + dakuten); break; case 'i': out[0] = (Rune)((int)base + 2 + dakuten); break; case 'u': out[0] = (Rune)((int)base + 4 + dakuten); break; case 'e': out[0] = (Rune)((int)base + 6 + dakuten); break; case 'o': out[0] = (Rune)((int)base + 8 + dakuten); break; case 'y': out[0] = (Rune)((int)base + 2 + dakuten); switch(in[1]){ case 'a': out[1] = L'ゃ'; break; case 'u': out[1] = L'ゅ'; break; case 'o': out[1] = L'ょ'; break; default: break; } default: break; } break; /* た family has small つ and dakuten */ case L'た': case L'タ': switch(in[0]){ case 'a': out[0] = (Rune)((int)base + dakuten); break; case 'i': out[0] = (Rune)((int)base + 2 + dakuten); break; case 'u': out[0] = (Rune)((int)base + 5 + dakuten); break; case 'e': out[0] = (Rune)((int)base + 7 + dakuten); break; case 'o': out[0] = (Rune)((int)base + 9 + dakuten); break; case 'y': out[0] = (Rune)((int)base + 2 + dakuten); switch(in[1]){ case 'a': out[1] = L'ゃ'; break; case 'u': out[1] = L'ゅ'; break; case 'o': out[1] = L'ょ'; break; default: break; } default: break; } break; default: break; } return; }