shithub: nime

ref: 6775c0a9a046673bb6e472bd286271725013968a
dir: /src/to-kana.c/

View raw version
#include <u.h>
#include <stdio.h>
#include <libc.h>

void eval(int, int);
void kanafill(Rune, char*, int, Rune*);
void kanalook(char*, Rune*);
void printhelp(void);
int strappend(char*, char);


void
main(int argc, char *argv[])
{
	int fpin, fpout;
	ARGBEGIN{
	case 's':
		fpin = fileno(stdin);
		fpout = fileno(stdout);
		eval(fpin, fpout);
		break;
	case 'h':
		printhelp();
		exits(0);
	default:
		fprint(2, "usage: %s [-s][-h]\n", argv0);
		printhelp();
		exits("usage");
	}ARGEND;
	exits(nil);
}

void
printhelp(void)
{
	print("options:\n");
	print(" [-s] - use stdin stream\n");
	print(" [-h] - show this help\n");
	return;
}

/*
 * Runs through the input at the file pointer fpin until it reaches EOF
 * outputs kana to file pointer fpout.
 */
void
eval(int fpin, int fpout)
{
	int reading = 1;
	char charin;
	char buf[10];
	Rune kana[10];
	while(reading){
		if(read(fpin, &charin, 1)){
			if(strappend(buf, charin)){
				kanalook(buf, kana);
				fprint(fpout, "%s %S\n", buf, kana);
				for(int i=0; i<10; i++){
					kana[i] = 0;
					buf[i] = 0;
				}
			}
		}
		else{
			reading = 0;
		}
	}
	fprint(fpout, "%s\n", buf);
	return;
}

/*
 * Appends the provided character to the end of the provided string.
 * If this results in a complete kana being formable, return 1.
 */
int
strappend(char* string, char in)
{
	int end;
	int i;
	
	for(i=0; string[i] != 0; i++);
	end = (in == 'a' || in == 'i' || in == 'u' || in == 'e' || in == 'o');
	if (in == 'n'){
		if (string[i-1] == 'n')
			end = 1;
	}
	string[i] = in;
	return end;
}

/*
 * The kanafill function takes in the base kana of the family to be output,
 * the character representation of the ending syllabaries, an integer
 * representing the type of dakuten (1 for regular, 2 for maru), and a pointer
 * to a rune array that is being edited.
 */
void
kanafill(Rune base, char* in, int dakuten, Rune* out)
{
	switch(base){
	/* Special Boys */
	case L'や':
		switch(in[0]){
		case 'a':
			out[0] = L'や';
			break;
		case 'u':
			out[0] = L'ゆ';
			break;
		case 'o':
			out[0] = L'よ';
			break;
		default:
			break;
		}
		break;

	case L'ん':
		out[0] = L'ん';
		break;
	
	/* は family has marudakuten */
	case L'は':
		switch(in[0]){
		case 'a':
			out[0] = (Rune)((int)base + dakuten);
			break;
		case 'i':
			out[0] = (Rune)((int)base + 3 + dakuten);
			break;
		case 'u':
			out[0] = (Rune)((int)base + 6 + dakuten);
			break;
		case 'e':
			out[0] = (Rune)((int)base + 9 + dakuten);
			break;
		case 'o':
			out[0] = (Rune)((int)base + 12 + dakuten);
			break;
		case 'y':
			out[0] = (Rune)((int)base + 3 + dakuten);
			switch(in[1]){
			case 'a':
				out[1] = L'ゃ';
				break;
			case 'u':
				out[1] = L'ゅ';
				break;
			case 'o':
				out[1] = L'ょ';
				break;
			default:
				break;
			}
		default:
			break;
		}
		break;
	
	/* families with no dakuten or small forms */
	case L'ら':
	case L'ま':
	case L'な':
	case L'わ':
		switch(in[0]){
		case 'a':
			out[0] = base;
			break;
		case 'i':
			out[0] = (Rune)((int)base + 1);
			break;
		case 'u':
			out[0] = (Rune)((int)base + 2);
			break;
		case 'e':
			out[0] = (Rune)((int)base + 3);
			break;
		case 'o':
			out[0] = (Rune)((int)base + 4);
			break;
		case 'y':
			out[0] = (Rune)((int)base + 1);
			switch(in[1]){
			case 'a':
				out[1] = L'ゃ';
				break;
			case 'u':
				out[1] = L'ゅ';
				break;
			case 'o':
				out[1] = L'ょ';
				break;
			default:
				break;
			}
		default:
			break;
		}
		break;
	
	/* families with just dakuten */
	case L'か':
	case L'さ':
		switch(in[0]){
		case 'a':
			out[0] = (Rune)((int)base + dakuten);
			break;
		case 'i':
			out[0] = (Rune)((int)base + 2 + dakuten);
			break;
		case 'u':
			out[0] = (Rune)((int)base + 4 + dakuten);
			break;
		case 'e':
			out[0] = (Rune)((int)base + 6 + dakuten);
			break;
		case 'o':
			out[0] = (Rune)((int)base + 8 + dakuten);
			break;
		case 'y':
			out[0] = (Rune)((int)base + 2 + dakuten);
			switch(in[1]){
			case 'a':
				out[1] = L'ゃ';
				break;
			case 'u':
				out[1] = L'ゅ';
				break;
			case 'o':
				out[1] = L'ょ';
				break;
			default:
				break;
			}
		default:
			break;
		}
		break;
	
	/* た family has small つ and dakuten */
	case L'た':
		switch(in[0]){
		case 'a':
			out[0] = (Rune)((int)base + dakuten);
			break;
		case 'i':
			out[0] = (Rune)((int)base + 2 + dakuten);
			break;
		case 'u':
			out[0] = (Rune)((int)base + 5 + dakuten);
			break;
		case 'e':
			out[0] = (Rune)((int)base + 7 + dakuten);
			break;
		case 'o':
			out[0] = (Rune)((int)base + 9 + dakuten);
			break;
		case 'y':
			out[0] = (Rune)((int)base + 2 + dakuten);
			switch(in[1]){
			case 'a':
				out[1] = L'ゃ';
				break;
			case 'u':
				out[1] = L'ゅ';
				break;
			case 'o':
				out[1] = L'ょ';
				break;
			default:
				break;
			}
		default:
			break;
		}
		break;
	
	default:
		break;
	}
	return;
}

/*
 * kanalook takes in a pointer to a string and a pointer to a string of Rune
 * and converts the string in romaji to a string of runes of kana.
 */
void
kanalook(char* buf, Rune* str)
{
	char* end;
	end = buf + 1;
	switch(buf[0]){
	/* あ family */
	case 'a':
		str[0] = L'あ';
		break;
	case 'i':
		str[0] = L'い';
		break;
	case 'u':
		str[0] = L'う';
		break;
	case 'e':
		str[0] = L'え';
		break;
	case 'o':
		str[0] = L'お';
		break;
	
	/* か family */
	case 'k':
		kanafill(L'か', end, 0, str);
		break;
	case 'g':
		kanafill(L'か', end, 1, str);
		break;
	
	/* さ family */
	case 's':
		kanafill(L'さ', end, 0, str);
		break;
	case 'z':
		kanafill(L'さ', end, 1, str);
		break;
	case 'j':
		if(buf[1] != 'y' && buf[1] != 'i'){
			end[0] = 'y';
			end[1] = buf[1];
			kanafill(L'さ', end, 1, str);
		} else
			kanafill(L'さ', end, 1, str);
		break;
	
	/* た family */
	case 't':
		kanafill(L'た', end, 0, str);
		break;
	case 'c':
		if(buf[1] != 'y' && buf[1] != 'i'){
			end[0] = 'y';
			end[1] = buf[1];
			kanafill(L'た', end, 0, str);
		} else
			kanafill(L'た', end, 0, str);
		break;
	case 'd':
		kanafill(L'た', end, 1, str);
		break;
	
	/* な family (and ん) */
	case 'n':
		if(buf[1] == 'n')
			str[0] = L'ん';
		else
			kanafill(L'な', end, 0, str);
		break;
	
	/* は family */
	case 'h':
		kanafill(L'は', end, 0, str);
		break;
	case 'f':
		end[0] = 'f';
		end[1] = buf[1];
		kanafill(L'は', end, 0, str);
		break;
	case 'b':
		kanafill(L'は', end, 1, str);
		break;
	case 'p':
		kanafill(L'は', end, 2, str);
		break;
	
	/* ま family */
	case 'm':
		kanafill(L'ま', end, 0, str);
		break;
	
	/* や family */
	case 'y':
		kanafill(L'や', end, 0, str);
		break;
	
	/* ら family */
	case 'r':
		kanafill(L'ら', end, 0, str);
		break;
	
	/* わ family */
	case 'w':
		kanafill(L'わ', end, 0, str);
		break;
	
	default:
		break;
	}
	return;
}