shithub: riscv

ref: b3dc2af6d671b6311ccfc8a5f053dc85151cb7ed
dir: /sys/src/cmd/ktrans/main.c/

View raw version
/*
 *   Mostly based on the original source codes of Plan 9 release 2
 *   distribution.
 *             by Kenji Okamoto, August 4 2000
 *                   Osaka Prefecture Univ.
 *                   okamoto@granite.cias.osakafu-u.ac.jp
 */

/*
 * A glossary on some of the Japanese vocabulary used:
 * kana: syllabic letting, either hiragana(ひらがな) or katakana(カタカナ)
 * kanji(漢字): borrowed characters, 楽 in 楽しい
 * Okurigana(送り仮名): kana tail to kanji, しい in 楽しい
 * Joshi(助詞): particle, は in 私は
 * Jisho(辞書): dictionary
 * kouho(候補): candidate
 */

#include <u.h>
#include <libc.h>
#include <bio.h>
#include "hash.h"
#include "ktrans.h"

#define	LSIZE	256

Rune	lbuf[LSIZE];		/* hiragana buffer for key input written by send() */
Hmap	*table;
uchar	okurigana[LSIZE];	/* buffer for okurigana */
char	okuri = 0;		/* buffer/flag for capital input char */
int	in, out;
int	llen, olen, joshi = 0;
int	natural = 1;		/* not Japanese but English mode */

int	changelang(int);
int	dotrans(Hmap*);
int	nrune(char *);
void	send(uchar *, int);
Hmap*	opendict(Hmap *, char *);

void
kbdopen(void)
{
	int n, kinfd, koutfd, fd[2];
	char buf[128];
	int kbd;

	kbd = 1;
	if((kinfd = open("/dev/kbd", OREAD)) < 0){
		kbd = 0;
		if((kinfd = open("/dev/cons", OREAD)) < 0)
			sysfatal("open kbd: %r");
	}
	if(bind("#|", "/n/temp", MREPL) < 0)
		sysfatal("bind /n/temp: %r");
	if((koutfd = open("/n/temp/data1", OWRITE)) < 0)
		sysfatal("open kbd pipe: %r");
	if(bind("/n/temp/data", kbd? "/dev/kbd": "/dev/cons", MREPL) < 0)
		sysfatal("bind kbd pipe: %r");
	unmount(nil, "/n/temp");
	if(!kbd){
		in = kinfd;
		out = koutfd;
		return;
	}
	if(pipe(fd) < 0)
		sysfatal("pipe: %r");
	if(fork()){
		in = out = fd[0];
		close(fd[1]);
		close(kinfd);
		close(koutfd);
		return;
	}
	close(fd[0]);
	if(fork()){
		Biobuf b;
		long r;

		Binit(&b, fd[1], OREAD);
		while((r = Bgetrune(&b)) >= 0){
			n = snprint(buf, sizeof(buf), "c%C", (Rune)r)+1;
			write(koutfd, buf, n); /* pass on result */
		}
	} else
		while((n = read(kinfd, buf, sizeof(buf))) > 0){
			buf[n-1] = 0;
			if(n < 2 || buf[0] != 'c')
				write(koutfd, buf, n); /* pass on */
			else
				write(fd[1], buf+1, n-2); /* to translator */
		}
	exits(nil);
}

Map signalmore = {
	"_", nil, 1,
};

Hmap*
initmap(Map *m, int n)
{
	int i, j;
	char buf[16];
	char *s;
	Map prev;
	Hmap *h;

	h = hmapalloc(n, sizeof(Map));
	for(i = 0; i < n; i++){
		if(m[i].roma == nil || m[i].roma[0] == '\0')
			continue;

		//We mark all partial strings so we know when
		//we have partial match when ingesting.
		j = 2;
		for(s = m[i].roma; *s && j <= sizeof buf; s++){
			snprint(buf, j, "%s", m[i].roma);
			prev = m[i];
			if(hmapget(h, buf, &prev) == 0){
				if(prev.leadstomore == 1 && s[1] == '\0'){
					//confict; partial & valid input
					prev = m[i];
					prev.leadstomore = 1;
				}
			}

			if(s[1] == '\0'){
				hmaprepl(&h, strdup(buf), &prev, nil, 1);
			} else {
				hmaprepl(&h, strdup(buf), &signalmore, nil, 1);
			}
			j++;
		}
	}
	return h;
}

void
usage(void)
{
	fprint(2, "usage: %s\n", argv0);
	exits("usage");
}

void
main(int argc, char *argv[])
{

	uchar *bp, *ep, buf[128];
	Map lkup, last;
	int wantmore;
	int n, c;
	char *jishoname, *zidianname;
	Hmap *jisho, *zidian;

	ARGBEGIN{
	default: usage();
	}ARGEND;
	if(argc != 0)
		usage();

	if((jishoname = getenv("jisho")) == nil)
		jishoname = "/lib/kanji.jisho";
	jisho = opendict(nil, jishoname);

	if((zidianname = getenv("zidian")) == nil)
		zidianname = "/lib/hanzi.zidian";
	zidian = opendict(nil, zidianname);

	hira 	= table = initmap(mhira, nelem(mhira));
	kata 	= initmap(mkata, nelem(mkata));
	greek 	= initmap(mgreek, nelem(mgreek));
	cyril 	= initmap(mcyril, nelem(mcyril));
	hangul 	= initmap(mhangul, nelem(mhangul));
	last = (Map){nil, nil, -1};

	kbdopen();
	if(fork())
		exits(nil); /* parent process will exit */

	bp = ep = buf;
	wantmore = 0;
	for (;;) { /* key board input loop */
	getmore:
		if (bp>=ep || wantmore) {
			if (wantmore==0)
				bp = ep = buf; /* clear all */
			n = read(in, ep, &buf[sizeof(buf)]-ep);
			if (n<=0)
				exits("");
			ep += n;
			*ep = '\0';
		}
		while (bp<ep) { /* there are input data */
			if (table == hira && natural != 1 && (*bp>'A' && *bp<='Z') && ep-bp<2
			   && !strchr("EIOU", *bp)) {
				wantmore = 1;
				goto getmore;
			}
			if (!fullrune((char *)bp, ep-bp)) { /* not enough length of input */
				wantmore = 1;
				goto getmore;
			}
			wantmore = 0;

			if (*bp=='') { /* ^x read ktrans-jisho once more */
				jisho = opendict(jisho, jishoname);
				zidian = opendict(zidian, zidianname);
				llen = 0;
				olen = okuri = joshi = 0;
				wantmore=0;
				bp=ep=buf;
				continue;
			}
			if (*bp=='') { /* ^\ (start translation command) */
				if (table == hanzi)
					c = dotrans(zidian);
				else
					c = dotrans(jisho);
				if (c)
					*bp = c; /* pointer to translated rune */
				else
					bp++;
				continue;
			}
			if (*bp=='') { /* ^l (no translate command) */
				bp++;
				llen = 0;
				olen = okuri = joshi = 0;
				last.kana = nil;
				continue;
			}
			if (changelang(*bp)) { /* change language mode OK */
				bp++;
				olen = okuri = joshi = 0;
				last.kana = nil;
				continue;
			}
			if (natural || *bp<=' ' || *bp>='{') { /* English mode but not ascii */
				Rune r;
				int rlen = chartorune(&r, (char *)bp);
				send(bp, rlen); /* write bp to /dev/cons */
				bp += rlen;
				last.kana = nil;
				continue;
			}
			if (table == hira && (*bp >= 'A' && *bp <= 'Z') && (*(bp+1) < 'A'
			   || *(bp+1) > 'Z')) {
				*bp = okuri = tolower(*bp);
				joshi = olen = 0;
			} else if (table == hira && (*bp >= 'A' && *bp <= 'Z') &&
			   (*(bp+1) >= 'A' && *(bp+1) <= 'Z')) {
				*bp = okuri = tolower(*bp);
				*(bp+1) = tolower(*(bp+1));
				joshi = 1;
				olen = 0;
			}
			if(hmapget(table, (char*)bp, &lkup) < 0){
				if(last.kana != nil){
					send((uchar*)last.kana, strlen(last.kana));
					bp += strlen(last.roma);
				} else
					send(bp++, 1);
				last.kana = nil;
				break;
			}
			/* concatinations; only advance a single character */
			if(lkup.kana != nil && strstr("ッっ", lkup.kana))
				lkup.roma = "_";
			/* partial match */
			if(lkup.kana == nil || lkup.leadstomore == 1){
				if(lkup.kana != nil)
					last = lkup;

				wantmore = 1;
				break;
			}
			last.kana = nil;
			send((uchar*)lkup.kana, strlen(lkup.kana));
			bp += strlen(lkup.roma);
		}
	}
}

/*
 * send UTF string (p) with length (n) to stdout
 * and write rune (r) in global lbuf[] buffer
 * or okurigana[] buffer if okuri (verb or joshi) mode
 */
void
send(uchar *p, int n)
{
	Rune r;
	uchar *ep;

	if (write(out, (char*)p, n) != n)
		sysfatal("write: %r");

	if (llen>LSIZE-64) {
		memmove((char*)lbuf, (char*)lbuf+64, 64*sizeof(Rune));
		llen -= 64;
	}

	if(table != hira && table != hanzi)
		return;
	if(natural && table != hanzi)
		return;

	ep = p+n;
	if(okuri)
		while (olen<LSIZE && p<ep)
			okurigana[olen++] = *p++;
	else
		while (llen<LSIZE && p<ep) {
			p += chartorune(&r, (char*)p);
			if (r=='\b') {
				if (llen>0)
				llen--;
				continue;
			}
			if (r==0x80) /* ignore view key */
				continue;
			lbuf[llen++] = r;
	   }
}

int
changelang(int c)
{
	switch(c){
	case '': /* ^t (English mode) */
		natural = 1;
		table = hira;
		llen = 0;
		return 1;
		break;

	case '': /* ^n (Japanese hiragana mode ) */
		natural = 0;
		table = hira;
		llen = 0;
		return 1;
		break;

	case '': /* ^k (Japanese katakana mode) */
		natural = 0;
		table = kata;
		llen = 0;
		return 1;
		break;

	case '': /* ^r (Russian mode) */
		natural = 0;
		table = cyril;
		llen = 0;
		return 1;
		break;

	case '': /* ^o (Greek mode) */
		natural = 0;
		table = greek;
		llen = 0;
		return 1;
		break;

	case '': /* ^s (Korean mode) */
		natural = 0;
		table = hangul;
		llen = 0;
		return 1;
		break;

	case '': /* ^c (Chinese mode) */
		natural = 1;
		table = hanzi;
		llen = 0;
		return 1;
		break;
	}
	return 0;
}

Hmap*
opendict(Hmap *h, char *name)
{
	Biobuf *b;
	char *p;
	char *dot, *rest;
	char *kouho[16];
	int i;

	b = Bopen(name, OREAD);
	if(b == nil)
		return nil;

	if(h == nil)
		h = hmapalloc(8192, sizeof(kouho));
	else
		hmapreset(h, 1);
	while(p = Brdstr(b, '\n', 1)){
		if(p[0] == '\0' || p[0] == ';'){
		Err:
			free(p);
			continue;
		}
		dot = utfrune(p, '\t');
		if(dot == nil)
			goto Err;

		*dot = '\0';
		rest = dot+1;
		if(*rest == '\0')
			goto Err;

		memset(kouho, 0, sizeof kouho);
		i = 0;
		while(i < nelem(kouho)-1 && (dot = utfrune(rest, ' '))){
			*dot = '\0';
			kouho[i++] = rest;
			rest = dot+1;
		}
		if(i < nelem(kouho)-1)
			kouho[i] = rest;

		/* key is the base pointer; overwrites clean up for us */
		hmaprepl(&h, p, kouho, nil, 1);
	}
	Bterm(b);
	return h;
}

/*
 * write translated kanji runes to stdout and return last character
 * if it's not ctl-\. if the last is ctl-\, proceed with
 * translation of the next kouho
 */
int
dotrans(Hmap *dic)
{
	Rune *res, r[1];
	char v[1024], *p, tbuf[64], hirabuf[64];
	int j, lastlen, nokouho = 0;
	char ch;
	int i;
	char *kouho[16];

	if (llen==0)
		return 0; /* don't use kanji transform function */
	if (okuri && joshi != 1) {
		   lbuf[llen++] = (Rune)okuri;
		   lbuf[llen] = 0;
	}else
		lbuf[llen] = 0;
	okurigana[olen] = 0;

	/*
	 * search the matched index for the key word in the dict hash table, and
	 * return a pointer to the matched kouho, 0 otherwise.
	 */
	res = lbuf;
	for (j=0; *res != L'\0'; j += runetochar(v+j, res++))
		;
	v[j] = '\0';
	strcpy(tbuf, v);
	strcpy(hirabuf, v); /* to remember the initial hiragana input */

	if (okuri && joshi != 1) /* verb mode */
		hirabuf[strlen(hirabuf) - 1] = '\0';

	if(hmapget(dic, v, kouho) < 0){
		llen = olen = okuri = joshi = 0;
		okurigana[0] = 0;
		return 0;
	}
	for(i = 0; i < nelem(kouho) && kouho[i] != nil; i++) {
		p = kouho[i];
		lastlen = nrune(tbuf); /* number of rune chars */

		if (okuri && joshi != 1) /* verb mode */
	   		for (j=0; j<lastlen-1; j++)
				write(out, "\b", 1); /* clear hiragana input */
		else
			for (j=0; j<lastlen; j++)
				write(out, "\b", 1); /* clear hiragana input */

		if (okuri) {
			lastlen = nrune((char *)okurigana);
			for (j=0; j<lastlen; j++)
				write(out, "\b", 1);
		}

		write(out, p, strlen(p)); /* write kanji to stdout */
		if (okuri)
			write(out, (char *)okurigana, olen);

		if (read(in, &ch, 1)<=0) /* read from stdin */
			exits(nil);

		if (ch == '') { /* if next input is ^\, once again */
			if(i+1 < nelem(kouho) && kouho[i+1] != nil) { /* have next kouho */
				nokouho = 0;
				strcpy(tbuf, p);

				if (okuri && joshi != 1) /* verb mode */
					for (j=0; j<nrune(tbuf); j++)
						write(out, "\b", 1);
				continue;
			} else { /* the last kouho */
				if (okuri) {
					lastlen = nrune((char *)okurigana);
					for (j=0; j<lastlen; j++)
						write(out, "\b", 1);
				}

				for (lastlen=0; *p != 0; p += j) {
					j = chartorune(r, p);
					lastlen++;
				}

				for (j=0; j<lastlen; j++)
					write(out, "\b", 1);

				if(hirabuf[0])
					write(out, hirabuf, strlen(hirabuf));

				if(okurigana[0])
					write(out, (char *)okurigana, olen);

				olen = okuri = joshi = 0;
				okurigana[0] = 0;
				break;
			}
		} else {
			if(!nokouho && i != 0){  /* learn the previous use of the kouho */
				p = kouho[0];
				kouho[0] = kouho[i];
				kouho[i] = p;
				hmapupd(&dic, v, kouho);
			}

			olen = okuri = joshi = 0;
			okurigana[0] = 0;
			break;
		}
	}
	llen = 0;
	return ch;
}

/*
 * returns the number of characters in the pointed Rune
 */
int
nrune(char *p)
{
	int n = 0;
	Rune r;

	while (*p) {
		p += chartorune(&r, p);
		n++;
	}
	return n;
}