shithub: neatmkfn

Download patch

ref: 50918a0b71ee777bbb3b7ba7bebb590aeb86f151
author: Ali Gholami Rudi <ali@rudi.ir>
date: Fri Sep 6 18:15:36 EDT 2013

mktrfn: otfdump and improved afm support

--- /dev/null
+++ b/Makefile
@@ -1,0 +1,11 @@
+CC = cc
+CFLAGS = -O2 -Wall
+LDFLAGS =
+
+all: mktrfn
+%.o: %.c
+	$(CC) -c $(CFLAGS) $<
+mktrfn: mktrfn.o trfn.o sbuf.o tab.o
+	$(CC) -o $@ $^ $(LDFLAGS)
+clean:
+	rm -f *.o mktrfn
--- /dev/null
+++ b/mktrfn.c
@@ -1,0 +1,120 @@
+/*
+ * mktrfn - produce troff font descriptions
+ *
+ * Copyright (C) 2012-2013 Ali Gholami Rudi <ali at rudi dot ir>
+ *
+ * This program is released under the Modified BSD license.
+ */
+#include <ctype.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "trfn.h"
+
+#define TOKLEN		256
+
+static void otfdump_read(void)
+{
+	char cmd[TOKLEN];
+	char name[TOKLEN];
+	char ch[TOKLEN];
+	char c1[TOKLEN], c2[TOKLEN];
+	char wid[TOKLEN];
+	while (scanf("%s", cmd) == 1) {
+		if (!strcmp("name", cmd)) {
+			scanf("%s", name);
+			trfn_psfont(name);
+		}
+		if (!strcmp("char", cmd)) {
+			scanf("%s width %s", ch, wid);
+			trfn_char(ch, NULL, atoi(wid), -1);
+		}
+		if (!strcmp("kernpair", cmd)) {
+			scanf("%s %s width %s", c1, c2, wid);
+			trfn_kern(c1, c2, atoi(wid));
+		}
+		if (!strcmp("feature", cmd)) {
+			scanf("%s substitution %s %s", name, c1, c2);
+		}
+	}
+}
+
+static void afm_read(void)
+{
+	char ch[TOKLEN], pos[TOKLEN];
+	char c1[TOKLEN], c2[TOKLEN];
+	char wid[TOKLEN];
+	char ln[1024];
+	while (fgets(ln, sizeof(ln), stdin)) {
+		if (ln[0] == '#')
+			continue;
+		if (!strncmp("FontName ", ln, 8)) {
+			sscanf(ln, "FontName %s", ch);
+			trfn_psfont(ch);
+			continue;
+		}
+		if (!strncmp("StartCharMetrics", ln, 16))
+			break;
+	}
+	while (fgets(ln, sizeof(ln), stdin)) {
+		if (ln[0] == '#')
+			continue;
+		if (!strncmp("EndCharMetrics", ln, 14))
+			break;
+		if (sscanf(ln, "C %s ; WX %s ; N %s", pos, wid, ch) == 3)
+			trfn_char(ch, pos, atoi(wid), -1);
+	}
+	while (fgets(ln, sizeof(ln), stdin)) {
+		if (ln[0] == '#')
+			continue;
+		if (!strncmp("StartKernPairs", ln, 14))
+			break;
+	}
+	while (fgets(ln, sizeof(ln), stdin)) {
+		if (ln[0] == '#')
+			continue;
+		if (!strncmp("EndKernPairs", ln, 12))
+			break;
+		if (sscanf(ln, "KPX %s %s %s", c1, c2, wid) == 3)
+			trfn_kern(c1, c2, atoi(wid));
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	int afm = 0;
+	int i = 1;
+	int res = 720;
+	for (i = 1; i < argc && argv[i][0] == '-'; i++) {
+		switch (argv[i][1]) {
+		case 'a':
+			afm = 1;
+			break;
+		case 'o':
+			afm = 0;
+			break;
+		case 'r':
+			res = atoi(argv[i][2] ? argv[i] + 2 : argv[++i]);
+			break;
+		case 't':
+			trfn_trfont(argv[i][2] ? argv[i] + 2 : argv[++i]);
+			break;
+		case 'p':
+			trfn_psfont(argv[i][2] ? argv[i] + 2 : argv[++i]);
+			break;
+		default:
+			printf("usage: mktrfn -a -o -t name -p psname\n");
+			return 0;
+		}
+	}
+	trfn_init(res);
+	if (afm)
+		afm_read();
+	else
+		otfdump_read();
+	trfn_print();
+	trfn_done();
+	return 0;
+}
--- /dev/null
+++ b/sbuf.c
@@ -1,0 +1,84 @@
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "sbuf.h"
+
+#define MAX(a, b)	((a) < (b) ? (b) : (a))
+#define SBUF_SZ		512
+
+static void sbuf_extend(struct sbuf *sbuf, int amount)
+{
+	char *s = sbuf->s;
+	sbuf->sz = (MAX(1, amount) + SBUF_SZ - 1) & ~(SBUF_SZ - 1);
+	sbuf->s = malloc(sbuf->sz);
+	if (sbuf->n)
+		memcpy(sbuf->s, s, sbuf->n);
+	free(s);
+}
+
+void sbuf_init(struct sbuf *sbuf)
+{
+	memset(sbuf, 0, sizeof(*sbuf));
+	sbuf_extend(sbuf, SBUF_SZ);
+}
+
+void sbuf_add(struct sbuf *sbuf, int c)
+{
+	if (sbuf->n + 2 >= sbuf->sz)
+		sbuf_extend(sbuf, sbuf->sz * 2);
+	sbuf->s[sbuf->n++] = c;
+}
+
+void sbuf_append(struct sbuf *sbuf, char *s)
+{
+	int len = strlen(s);
+	if (sbuf->n + len + 1 >= sbuf->sz)
+		sbuf_extend(sbuf, sbuf->n + len + 1);
+	memcpy(sbuf->s + sbuf->n, s, len);
+	sbuf->n += len;
+}
+
+void sbuf_printf(struct sbuf *sbuf, char *s, ...)
+{
+	char buf[1024];
+	va_list ap;
+	va_start(ap, s);
+	vsprintf(buf, s, ap);
+	va_end(ap);
+	sbuf_append(sbuf, buf);
+}
+
+void sbuf_putnl(struct sbuf *sbuf)
+{
+	if (sbuf->n && sbuf->s[sbuf->n - 1] != '\n')
+		sbuf_add(sbuf, '\n');
+}
+
+int sbuf_empty(struct sbuf *sbuf)
+{
+	return !sbuf->n;
+}
+
+char *sbuf_buf(struct sbuf *sbuf)
+{
+	sbuf->s[sbuf->n] = '\0';
+	return sbuf->s;
+}
+
+int sbuf_len(struct sbuf *sbuf)
+{
+	return sbuf->n;
+}
+
+/* shorten the sbuf */
+void sbuf_cut(struct sbuf *sbuf, int n)
+{
+	if (sbuf->n > n)
+		sbuf->n = n;
+}
+
+void sbuf_done(struct sbuf *sbuf)
+{
+	free(sbuf->s);
+}
--- /dev/null
+++ b/sbuf.h
@@ -1,0 +1,11 @@
+/* variable length string buffer */
+struct sbuf {
+	char *s;		/* allocated buffer */
+	int sz;			/* buffer size */
+	int n;			/* length of the string stored in s */
+};
+
+void sbuf_init(struct sbuf *sbuf);
+void sbuf_done(struct sbuf *sbuf);
+char *sbuf_buf(struct sbuf *sbuf);
+void sbuf_printf(struct sbuf *sbuf, char *s, ...);
--- /dev/null
+++ b/tab.c
@@ -1,0 +1,52 @@
+#include <stdlib.h>
+#include <string.h>
+#include "tab.h"
+
+struct tab {
+	char **keys;
+	void **vals;
+	int n;
+	int *next;
+	int head[256];
+};
+
+struct tab *tab_alloc(int sz)
+{
+	struct tab *tab = malloc(sizeof(*tab));
+	int i;
+	memset(tab, 0, sizeof(*tab));
+	tab->keys = malloc(sz * sizeof(tab->keys[0]));
+	tab->vals = malloc(sz * sizeof(tab->vals[0]));
+	tab->next = malloc(sz * sizeof(tab->next[0]));
+	for (i = 0; i < 256; i++)
+		tab->head[i] = -1;
+	return tab;
+}
+
+void tab_free(struct tab *tab)
+{
+	free(tab->keys);
+	free(tab->vals);
+	free(tab->next);
+	free(tab);
+}
+
+void tab_put(struct tab *tab, char *k, void *v)
+{
+	tab->keys[tab->n] = k;
+	tab->vals[tab->n] = v;
+	tab->next[tab->n] = tab->head[(unsigned char) k[0]];
+	tab->head[(unsigned char) k[0]] = tab->n;
+	tab->n++;
+}
+
+void *tab_get(struct tab *tab, char *k)
+{
+	int i = tab->head[(unsigned char) k[0]];
+	while (i >= 0) {
+		if (k[1] == tab->keys[i][1] && !strcmp(k, tab->keys[i]))
+			return tab->vals[i];
+		i = tab->next[i];
+	}
+	return NULL;
+}
--- /dev/null
+++ b/tab.h
@@ -1,0 +1,4 @@
+struct tab *tab_alloc(int sz);
+void tab_free(struct tab *tab);
+void tab_put(struct tab *tab, char *k, void *v);
+void *tab_get(struct tab *tab, char *k);
--- /dev/null
+++ b/trfn.c
@@ -1,0 +1,262 @@
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "sbuf.h"
+#include "tab.h"
+#include "trfn.h"
+#include "trfn_ch.h"
+
+#define WX(w)		(((w) < 0 ? (w) - trfn_div / 2 : (w) + trfn_div / 2) / trfn_div)
+#define LEN(a)		((sizeof(a) / sizeof((a)[0])))
+#define HEXDIGS		"0123456789abcdef"
+#define GNLEN		32
+#define AGLLEN		(8 * 1024)
+
+static struct sbuf sbuf_char;	/* charset section */
+static struct sbuf sbuf_kern;	/* kernpairs section */
+static int trfn_div;		/* divisor of widths */
+static int trfn_swid;		/* space width */
+static char trfn_ligs[1024];	/* font ligatures */
+static char trfn_trname[256];	/* font troff name */
+static char trfn_psname[256];	/* font ps name */
+
+/* adobe glyphlist mapping */
+static char agl_key[AGLLEN][GNLEN];
+static char agl_val[AGLLEN][GNLEN];
+static int agl_n;
+
+/* lookup tables */
+static struct tab *tab_agl;
+static struct tab *tab_alts;
+static struct tab *tab_ctyp;
+
+static void pututf8(char **d, int c)
+{
+	int l;
+	if (c > 0xffff) {
+		*(*d)++ = 0xf0 | (c >> 18);
+		l = 3;
+	} else if (c > 0x7ff) {
+		*(*d)++ = 0xe0 | (c >> 12);
+		l = 2;
+	} else if (c > 0x7f) {
+		*(*d)++ = 0xc0 | (c >> 6);
+		l = 1;
+	} else {
+		*(*d)++ = c > 0 ? c : ' ';
+		l = 0;
+	}
+	while (l--)
+		*(*d)++ = 0x80 | ((c >> (l * 6)) & 0x3f);
+	**d = '\0';
+}
+
+static int hexval(char *s, int len)
+{
+	char *digs = HEXDIGS;
+	int n = 0;
+	int i;
+	for (i = 0; i < len; i++) {
+		if (s[i] && strchr(digs, tolower(s[i])))
+			n = n * 16 + (strchr(digs, tolower(s[i])) - digs);
+		else
+			break;
+	}
+	return len == 1 ? n << 4 : n;
+}
+
+static int agl_read(char *path)
+{
+	FILE *fin = fopen(path, "r");
+	char ln[GNLEN * 8];
+	char val[GNLEN];
+	char *s, *d;
+	int i;
+	if (!fin)
+		return 1;
+	while (fgets(ln, sizeof(ln), fin)) {
+		s = strchr(ln, ';');
+		if (ln[0] == '#' || !s)
+			continue;
+		*s++ = '\0';
+		d = val;
+		while (s && *s) {
+			while (*s == ' ')
+				s++;
+			pututf8(&d, hexval(s, 6));
+			s = strchr(s, ' ');
+		}
+		*d = '\0';
+		strcpy(agl_key[agl_n], ln);
+		strcpy(agl_val[agl_n], val);
+		agl_n++;
+	}
+	fclose(fin);
+	tab_agl = tab_alloc(agl_n);
+	for (i = 0; i < agl_n; i++)
+		tab_put(tab_agl, agl_key[i], agl_val[i]);
+	return 0;
+}
+
+static char *agl_map(char *s)
+{
+	return tab_get(tab_agl, s);
+}
+
+static int achar_map(char *name)
+{
+	int i;
+	for (i = 0; i < LEN(achars); i++) {
+		struct achar *a = &achars[i];
+		if (!strncmp(a->name, name, strlen(a->name))) {
+			char *postfix = name + strlen(a->name);
+			if (!*postfix)
+				return a->c;
+			if (!strcmp("isolated", postfix))
+				return a->s ? a->s : a->c;
+			if (!strcmp("initial", postfix))
+				return a->i ? a->i : a->c;
+			if (!strcmp("medial", postfix))
+				return a->m ? a->m : a->c;
+			if (!strcmp("final", postfix))
+				return a->f ? a->f : a->c;
+		}
+	}
+	return 0;
+}
+
+static int trfn_name(char *dst, char *src)
+{
+	char ch[GNLEN];
+	char *s;
+	int i;
+	if (src[0] == '.')
+		return 1;
+	if (src[1] && strchr(src, '.'))
+		return 1;	/* ignore opentype features for now */
+	while (*src && *src != '.') {
+		s = ch;
+		if (src[0] == '_')
+			src++;
+		while (*src && *src != '_' && *src != '.')
+			*s++ = *src++;
+		*s = '\0';
+		if (agl_map(ch)) {
+			strcpy(dst, agl_map(ch));
+			for (i = 0; i < LEN(agl_exceptions); i++)
+				if (!strcmp(agl_exceptions[i][0], dst))
+					strcpy(dst, agl_exceptions[i][1]);
+			dst = strchr(dst, '\0');
+		} else if (ch[0] == 'u' && ch[1] == 'n' && ch[2] == 'i') {
+			for (i = 0; strlen(ch + 3 + 4 * i) >= 4; i++)
+				pututf8(&dst, hexval(ch + 3 + 4 * i, 4));
+		} else if (ch[0] == 'u' && ch[1] && strchr(HEXDIGS, tolower(ch[1]))) {
+			pututf8(&dst, hexval(ch + 1, 6));
+		} else if (achar_map(ch)) {
+			pututf8(&dst, achar_map(ch));
+		} else {
+			return 1;
+		}
+	}
+	return src[0];
+}
+
+static void trfn_lig(char *c)
+{
+	int i;
+	for (i = 0; i < LEN(ligs); i++)
+		if (!strcmp(ligs[i], c))
+			sprintf(strchr(trfn_ligs, '\0'), "%s ", c);
+}
+
+static int trfn_type(char *c)
+{
+	struct ctype *t = tab_get(tab_ctyp, c);
+	return t ? t->type : 3;
+}
+
+void trfn_char(char *c, char *n, int wid, int typ)
+{
+	char uc[GNLEN];
+	char pos[GNLEN];
+	char **a;
+	if (trfn_name(uc, c))
+		strcpy(uc, "---");
+	if (strchr(uc, ' ')) {		/* space not allowed in character names */
+		if (!trfn_swid && !strcmp(" ", uc))
+			trfn_swid = WX(wid);
+		return;
+	}
+	trfn_lig(uc);
+	if (typ < 0)
+		typ = trfn_type(uc);
+	strcpy(pos, c);
+	if (n && atoi(n) >= 0 && atoi(n) < 256)
+		strcpy(pos, n);
+	if (!n && !uc[1] && uc[0] >= 32 && uc[0] <= 125)
+		sprintf(pos, "%d", uc[0]);
+	sbuf_printf(&sbuf_char, "%s\t%d\t%d\t%s\n", uc, WX(wid), typ, pos);
+	a = tab_get(tab_alts, uc);
+	while (a && *a)
+		sbuf_printf(&sbuf_char, "%s\t\"\n", *a++);
+}
+
+void trfn_kern(char *c1, char *c2, int x)
+{
+	char g1[GNLEN], g2[GNLEN];
+	if (!trfn_name(g1, c1) && !trfn_name(g2, c2) && abs(WX(x)) > WX(20))
+		if (!strchr(g1, ' ') && !strchr(g2, ' '))
+			sbuf_printf(&sbuf_kern, "%s\t%s\t%d\n", g1, g2, WX(x));
+}
+
+void trfn_trfont(char *name)
+{
+	if (!trfn_trname[0])
+		strcpy(trfn_trname, name);
+}
+
+void trfn_psfont(char *name)
+{
+	if (!trfn_psname[0])
+		strcpy(trfn_psname, name);
+}
+
+void trfn_print(void)
+{
+	if (trfn_trname[0])
+		printf("name %s\n", trfn_trname);
+	if (trfn_psname[0])
+		printf("fontname %s\n", trfn_psname);
+	printf("spacewidth %d\n", trfn_swid);
+	printf("ligatures %s 0\n", trfn_ligs);
+	printf("charset\n");
+	printf("%s", sbuf_buf(&sbuf_char));
+	printf("kernpairs\n");
+	printf("%s", sbuf_buf(&sbuf_kern));
+}
+
+void trfn_init(int res)
+{
+	int i;
+	trfn_div = 7200 / res;
+	agl_read("glyphlist.txt");
+	sbuf_init(&sbuf_char);
+	sbuf_init(&sbuf_kern);
+	tab_alts = tab_alloc(LEN(alts));
+	tab_ctyp = tab_alloc(LEN(ctype));
+	for (i = 0; i < LEN(alts); i++)
+		tab_put(tab_alts, alts[i][0], alts[i] + 1);
+	for (i = 0; i < LEN(ctype); i++)
+		tab_put(tab_ctyp, ctype[i].ch, &ctype[i]);
+}
+
+void trfn_done(void)
+{
+	sbuf_done(&sbuf_char);
+	sbuf_done(&sbuf_kern);
+	tab_free(tab_alts);
+	tab_free(tab_ctyp);
+	if (tab_agl)
+		tab_free(tab_agl);
+}
--- /dev/null
+++ b/trfn.h
@@ -1,0 +1,7 @@
+void trfn_init(int res);
+void trfn_done(void);
+void trfn_trfont(char *name);
+void trfn_psfont(char *fontname);
+void trfn_print(void);
+void trfn_char(char *c, char *n, int wid, int typ);
+void trfn_kern(char *c1, char *c2, int x);
--- /dev/null
+++ b/trfn_ch.h
@@ -1,0 +1,688 @@
+/* ligatures */
+static char *ligs[] = {
+	"fh", "fi", "fj", "fk", "fl", "ff", "ffi", "ffj", "ffl", "fft", "ft", "Th"
+};
+
+/* AGL exceptions; expanding ligatures */
+static char *agl_exceptions[][2] = {
+	{"ff", "ff"},
+	{"fi", "fi"},
+	{"fl", "fl"},
+	{"ffi", "ffi"},
+	{"ffl", "ffl"},
+	{"st", "st"},
+};
+
+/* troff aliases */
+static char *alts[][8] = {
+	{"\\", "bs"},
+	{"`", "ga"},
+	{"¡", "!!"},
+	{"¢", "c|", "ct"},
+	{"£", "L-", "ps"},
+	{"¤", "xo", "cr"},
+	{"¥", "Y-", "yn"},
+	{"¦", "||"},
+	{"§", "so", "sc"},
+	{"¨", "\"\"", ":a"},
+	{"©", "co"},
+	{"ª", "a_"},
+	{"«", "<<"},
+	{"»", ">>"},
+	{"¬", "-,"},
+	{"¬", "-,"},
+	{"-", "hy"},
+	{"−", "--"},
+	{"®", "ro", "rg"},
+	{"¯", "a^", "-a"},
+	{"°", "0^"},
+	{"±", "+-"},
+	{"²", "2^"},
+	{"³", "3^"},
+	{"´", "\\'", "aa"},
+	{"µ", "mu"},
+	{"¶", "P!", "pg"},
+	{"·", ".^"},
+	{"¸", ",,", ",a"},
+	{"¹", "1^"},
+	{"º", "o_"},
+	{"»", ">>"},
+	{"¼", "14"},
+	{"½", "12"},
+	{"¾", "34"},
+	{"¿", "??"},
+	{"À", "A`"},
+	{"Á", "A'"},
+	{"Â", "A^"},
+	{"Ã", "A~"},
+	{"Ä", "A:", "A\""},
+	{"Å", "A*"},
+	{"Æ", "AE"},
+	{"Ç", "C,"},
+	{"Č", "C<"},
+	{"È", "E`"},
+	{"É", "E'"},
+	{"Ê", "E^"},
+	{"Ë", "E:", "E\""},
+	{"Ì", "I`"},
+	{"Í", "I'"},
+	{"Î", "I^"},
+	{"Ï", "I:", "I\""},
+	{"Ð", "D-"},
+	{"Ď", "D<"},
+	{"Ñ", "N~"},
+	{"Ò", "O`"},
+	{"Ó", "O'"},
+	{"Ô", "O^"},
+	{"Õ", "O~"},
+	{"Ö", "O:"},
+	{"Ő", "O\""},
+	{"×", "xx"},
+	{"Ø", "O/"},
+	{"Ř", "R<"},
+	{"Š", "S<"},
+	{"Ť", "T<"},
+	{"Ù", "U`"},
+	{"Ú", "U'"},
+	{"Û", "U^"},
+	{"Ü", "U:"},
+	{"Ű", "U\""},
+	{"Ů", "U0"},
+	{"Ý", "Y'"},
+	{"Ÿ", "Y:", "Y\""},
+	{"Ž", "Z<"},
+	{"Þ", "TH"},
+	{"ß", "ss"},
+	{"à", "a`"},
+	{"á", "a'"},
+	{"â", "a^"},
+	{"ã", "a~"},
+	{"ä", "a:", "a\""},
+	{"å", "a*"},
+	{"æ", "ae"},
+	{"ç", "c,"},
+	{"č", "c<"},
+	{"ď", "d<"},
+	{"è", "e`"},
+	{"é", "e'"},
+	{"ê", "e^"},
+	{"ë", "e:", "e\""},
+	{"ě", "e<"},
+	{"ì", "i`"},
+	{"í", "i'"},
+	{"î", "i^"},
+	{"ï", "i\""},
+	{"ð", "d-"},
+	{"ñ", "n~"},
+	{"ň", "n<"},
+	{"ò", "o`"},
+	{"ó", "o'"},
+	{"ô", "o^"},
+	{"õ", "o~"},
+	{"ö", "o:"},
+	{"ő", "o\""},
+	{"÷", "-:"},
+	{"ø", "o/"},
+	{"ř", "r<"},
+	{"š", "s<"},
+	{"ť", "t<"},
+	{"ù", "u`"},
+	{"ú", "u'"},
+	{"û", "u^"},
+	{"ü", "u:"},
+	{"ű", "u\""},
+	{"ů", "u0"},
+	{"ý", "y'"},
+	{"ž", "z<"},
+	{"þ", "th"},
+	{"ÿ", "y:", "y\""},
+	{"˘", "Ua"},
+	{"˙", ".a"},
+	{"˚", "oa"},
+	{"˝", "\"a"},
+	{"˛", "Ca"},
+	{"ˇ", "va"},
+	{"∀", "fa"},
+	{"∃", "te"},
+	{"∋", "st"},
+	{"∗", "**"},
+	{"+", "pl"},
+	{"−", "mi"},
+	{"/", "sl"},
+	{"=", "eq"},
+	{"≅", "cg"},
+	{"Α", "*A"},
+	{"Β", "*B"},
+	{"Χ", "*X"},
+	{"∆", "*D"},
+	{"Ε", "*E"},
+	{"Φ", "*F"},
+	{"Γ", "*G"},
+	{"Η", "*Y"},
+	{"Ι", "*I"},
+	{"Κ", "*K"},
+	{"Λ", "*L"},
+	{"Μ", "*M"},
+	{"Ν", "*N"},
+	{"Ο", "*O"},
+	{"Π", "*P"},
+	{"Θ", "*H"},
+	{"Ρ", "*R"},
+	{"Σ", "*S"},
+	{"Τ", "*T"},
+	{"Υ", "*U"},
+	{"ς", "ts"},
+	{"Ω", "*W"},
+	{"Ξ", "*C"},
+	{"Ψ", "*Q"},
+	{"Ζ", "*Z"},
+	{"∴", "tf"},
+	{"⊥", "pp"},
+	{"", "rn"},
+	{"α", "*a"},
+	{"β", "*b"},
+	{"χ", "*x"},
+	{"δ", "*d"},
+	{"ε", "*e"},
+	{"φ", "*f"},
+	{"γ", "*g"},
+	{"η", "*y"},
+	{"ι", "*i"},
+	{"κ", "*k"},
+	{"λ", "*l"},
+	{"μ", "*m"},
+	{"ν", "*n"},
+	{"ο", "*o"},
+	{"π", "*p"},
+	{"θ", "*h"},
+	{"ρ", "*r"},
+	{"σ", "*s"},
+	{"τ", "*t"},
+	{"υ", "*u"},
+	{"ω", "*w"},
+	{"ξ", "*c"},
+	{"ψ", "*q"},
+	{"ζ", "*z"},
+	{"|", "or"},
+	{"∼", "ap"},
+	{"′", "fm"},
+	{"≤", "<="},
+	{"⁄", "fr"},
+	{"∞", "if"},
+	{"ƒ", "fn",},
+	{"↔", "ab"},
+	{"←", "<-"},
+	{"↑", "ua"},
+	{"→", "->"},
+	{"↓", "da"},
+	{"°", "de"},
+	{"±", "+-"},
+	{"≥", ">="},
+	{"×", "mu"},
+	{"∝", "pt"},
+	{"∂", "pd"},
+	{"•", "bu"},
+	{"÷", "di"},
+	{"≠", "!="},
+	{"≡", "=="},
+	{"≈", "~~"},
+	{"…", "el"},
+	{"", "av"},
+	{"", "ah"},
+	{"↵", "CR"},
+	{"ℵ", "af"},
+	{"ℑ", "If"},
+	{"ℜ", "Rf"},
+	{"ℛ", "ws"},
+	{"⊗", "Ox"},
+	{"⊕", "O+"},
+	{"∅", "es"},
+	{"∩", "ca"},
+	{"∪", "cu"},
+	{"⊃", "sp"},
+	{"⊇", "ip"},
+	{"⊄", "!b"},
+	{"⊂", "sb"},
+	{"⊆", "ib"},
+	{"∈", "mo"},
+	{"∉", "!m"},
+	{"∠", "an"},
+	{"∇", "gr"},
+	{"", "rg"},
+	{"", "co"},
+	{"", "tm"},
+	{"∏", "pr"},
+	{"√", "sr"},
+	{"⋅", "c."},
+	{"¬", "no"},
+	{"∧", "l&"},
+	{"∨", "l|"},
+	{"◊", "lz"},
+	{"〈", "b<"},
+	{"", "RG"},
+	{"", "CO"},
+	{"", "TM"},
+	{"∑", "su"},
+	{"", "LT"},
+	{"", "br", "LX"},
+	{"", "LB"},
+	{"", "lc"},
+	{"", "lx"},
+	{"", "lf"},
+	{"", "lt"},
+	{"", "lk"},
+	{"", "lb"},
+	{"", "bv", "|",},
+	{"〉", "b>"},
+	{"∫", "is"},
+	{"", "RT"},
+	{"", "RX"},
+	{"", "RB"},
+	{"", "rc"},
+	{"", "rx"},
+	{"", "rf"},
+	{"", "rt"},
+	{"", "rk"},
+	{"", "rb"},
+	{"ff", "ff"},
+	{"fi", "fi"},
+	{"fl", "fl"},
+	{"ffi", "ffi"},
+	{"ffl", "ffl"},
+	{"st", "st"},
+};
+
+/* different shapes of arabic and farsi characters */
+static struct achar {
+	char *name;
+	unsigned c;
+	unsigned s;
+	unsigned i;
+	unsigned m;
+	unsigned f;
+} achars[] = {
+	{"hamza", 0x0621, 0xfe80},
+	{"alefwithmaddaabove", 0x0622, 0xfe81, 0, 0, 0xfe82},
+	{"alefwithhamzaabove", 0x0623, 0xfe83, 0, 0, 0xfe84},
+	{"wawwithhamzaabove", 0x0624, 0xfe85, 0, 0, 0xfe86},
+	{"alefwithhamzabelow", 0x0625, 0xfe87, 0, 0, 0xfe88},
+	{"yehwithhamzaabove", 0x0626, 0xfe89, 0xfe8b, 0xfe8c, 0xfe8a},
+	{"alef", 0x0627, 0xfe8d, 0, 0, 0xfe8e},
+	{"arabicalef", 0x0627},
+	{"beh", 0x0628, 0xfe8f, 0xfe91, 0xfe92, 0xfe90},
+	{"tehmarbuta", 0x0629, 0xfe93, 0, 0, 0xfe94},
+	{"teh", 0x062a, 0xfe95, 0xfe97, 0xfe98, 0xfe96},
+	{"theh", 0x062b, 0xfe99, 0xfe9b, 0xfe9c, 0xfe9a},
+	{"jeem", 0x062c, 0xfe9d, 0xfe9f, 0xfea0, 0xfe9e},
+	{"hah", 0x062d, 0xfea1, 0xfea3, 0xfea4, 0xfea2},
+	{"khah", 0x062e, 0xfea5, 0xfea7, 0xfea8, 0xfea6},
+	{"dal", 0x062f, 0xfea9, 0, 0, 0xfeaa},
+	{"thal", 0x0630, 0xfeab, 0, 0, 0xfeac},
+	{"reh", 0x0631, 0xfead, 0, 0, 0xfeae},
+	{"zain", 0x0632, 0xfeaf, 0, 0, 0xfeb0},
+	{"seen", 0x0633, 0xfeb1, 0xfeb3, 0xfeb4, 0xfeb2},
+	{"sheen", 0x0634, 0xfeb5, 0xfeb7, 0xfeb8, 0xfeb6},
+	{"sad", 0x0635, 0xfeb9, 0xfebb, 0xfebc, 0xfeba},
+	{"dad", 0x0636, 0xfebd, 0xfebf, 0xfec0, 0xfebe},
+	{"tah", 0x0637, 0xfec1, 0xfec3, 0xfec4, 0xfec2},
+	{"zah", 0x0638, 0xfec5, 0xfec7, 0xfec8, 0xfec6},
+	{"ain", 0x0639, 0xfec9, 0xfecb, 0xfecc, 0xfeca},
+	{"ghain", 0x063a, 0xfecd, 0xfecf, 0xfed0, 0xfece},
+	{"tatweel", 0x0640},
+	{"feh", 0x0641, 0xfed1, 0xfed3, 0xfed4, 0xfed2},
+	{"qaf", 0x0642, 0xfed5, 0xfed7, 0xfed8, 0xfed6},
+	{"kaf", 0x0643, 0xfed9, 0xfedb, 0xfedc, 0xfeda},
+	{"lam", 0x0644, 0xfedd, 0xfedf, 0xfee0, 0xfede},
+	{"meem", 0x0645, 0xfee1, 0xfee3, 0xfee4, 0xfee2},
+	{"noon", 0x0646, 0xfee5, 0xfee7, 0xfee8, 0xfee6},
+	{"heh", 0x0647, 0xfee9, 0xfeeb, 0xfeec, 0xfeea},
+	{"waw", 0x0648, 0xfeed, 0, 0, 0xfeee},
+	{"alefmaksura", 0x0649, 0xfeef, 0, 0, 0xfef0},
+	{"yeh", 0x064a, 0xfef1, 0xfef3, 0xfef4, 0xfef2},
+	{"fathatan", 0x064b, 0xfe70},
+	{"dammatan", 0x064c, 0xfe72},
+	{"kasratan", 0x064d, 0xfe74},
+	{"fatha", 0x064e, 0xfe76, 0, 0xfe77, 0},
+	{"damma", 0x064f, 0xfe78, 0, 0xfe79, 0},
+	{"kasra", 0x0650, 0xfe7a, 0, 0xfe7b, 0},
+	{"shadda", 0x0651, 0xfe7c, 0, 0xfe7c, 0},
+	{"sukun", 0x0652, 0xfe7e, 0, 0xfe7f, 0},
+	{"peh", 0x067e, 0xfb56, 0xfb58, 0xfb59, 0xfb57},
+	{"tcheh", 0x0686, 0xfb7a, 0xfb7c, 0xfb7d, 0xfb7b},
+	{"jeh", 0x0698, 0xfb8a, 0, 0, 0xfb8b},
+	{"keheh", 0x06a9, 0xfb8e, 0xfb90, 0xfb91, 0xfb8f},
+	{"gaf", 0x06af, 0xfb92, 0xfb94, 0xfb95, 0xfb93},
+	{"farsiyeh", 0x06cc, 0xfbfc, 0xfbfe, 0xfbff, 0xfbfd},
+	{"lamwithalef", 0xfefb, 0xfefb, 0, 0, 0xfefc},
+};
+
+static struct ctype {
+	char *ch;	/* character name */
+	int type;	/* ascender/descender type */
+} ctype[] = {
+	{"Ï", 2},
+	{"²", 2},
+	{"ì", 2},
+	{"u", 0},
+	{"Ì", 2},
+	{"U", 2},
+	{"∞", 0},
+	{"ú", 2},
+	{"∫", 3},
+	{"Ú", 2},
+	{"a", 0},
+	{"ι", 0},
+	{"û", 2},
+	{"A", 2},
+	{"Ι", 2},
+	{"Û", 2},
+	{"á", 2},
+	{"j", 3},
+	{"ü", 2},
+	{"Á", 2},
+	{"J", 3},
+	{"Ü", 2},
+	{"â", 2},
+	{"k", 2},
+	{"ù", 2},
+	{"Â", 2},
+	{"K", 2},
+	{"Ù", 2},
+	{"´", 2},
+	{"κ", 0},
+	{"ű", 2},
+	{"ä", 2},
+	{"Κ", 2},
+	{"Ű", 2},
+	{"Ä", 2},
+	{"l", 2},
+	{"_", 1},
+	{"æ", 0},
+	{"L", 2},
+	{"υ", 0},
+	{"Æ", 2},
+	{"λ", 2},
+	{"Υ", 2},
+	{"à", 2},
+	{"Λ", 2},
+	{"ů", 2},
+	{"À", 2},
+	{"<", 0},
+	{"Ů", 2},
+	{"α", 0},
+	{"≤", 2},
+	{"v", 0},
+	{"Α", 2},
+	{"¬", 0},
+	{"V", 2},
+	{"&", 2},
+	{"◊", 2},
+	{"w", 0},
+	{"≈", 0},
+	{"m", 0},
+	{"W", 2},
+	{"å", 2},
+	{"M", 2},
+	{"x", 0},
+	{"Å", 2},
+	{"¯", 2},
+	{"X", 2},
+	{"^", 2},
+	{"−", 0},
+	{"ξ", 3},
+	{"~", 0},
+	{"µ", 1},
+	{"Ξ", 2},
+	{"*", 2},
+	{"Μ", 2},
+	{"y", 1},
+	{"@", 3},
+	{"×", 0},
+	{"Y", 2},
+	{"ã", 2},
+	{"n", 0},
+	{"ý", 3},
+	{"Ã", 2},
+	{"N", 2},
+	{"Ý", 2},
+	{"b", 2},
+	{"ň", 2},
+	{"ÿ", 3},
+	{"B", 2},
+	{"9", 2},
+	{"¥", 2},
+	{"\\", 2},
+	{"≠", 0},
+	{"z", 0},
+	{"|", 3},
+	{"ñ", 2},
+	{"Z", 2},
+	{"β", 3},
+	{"Ñ", 2},
+	{"ž", 2},
+	{"Β", 2},
+	{"ν", 0},
+	{"Ž", 2},
+	{"{", 3},
+	{"Ν", 2},
+	{"0", 2},
+	{"}", 3},
+	{"#", 2},
+	{"ζ", 3},
+	{"[", 3},
+	{"o", 0},
+	{"Ζ", 2},
+	{"]", 3},
+	{"O", 2},
+	{"", 3},
+	{"˘", 2},
+	{"ó", 2},
+	{"", 2},
+	{"¦", 3},
+	{"Ó", 2},
+	{"", 3},
+	{"•", 0},
+	{"ô", 2},
+	{"", 2},
+	{"c", 0},
+	{"Ô", 2},
+	{"", 2},
+	{"C", 2},
+	{"ö", 2},
+	{"", 2},
+	{"ˇ", 2},
+	{"Ö", 2},
+	{"", 2},
+	{"č", 2},
+	{"˛", 1},
+	{"", 2},
+	{"Č", 2},
+	{"ò", 2},
+	{"", 2},
+	{"ç", 1},
+	{"Ò", 2},
+	{"،", 0},
+	{"Ç", 3},
+	{"ő", 2},
+	{"؛", 2},
+	{"¸", 1},
+	{"Ő", 2},
+	{"؟", 2},
+	{"¢", 2},
+	{"ω", 0},
+	{"٠", 0},
+	{"χ", 1},
+	{"Ω", 2},
+	{"١", 2},
+	{"Χ", 2},
+	{"ο", 0},
+	{"٢", 2},
+	{"ˆ", 2},
+	{"Ο", 2},
+	{"٣", 2},
+	{":", 0},
+	{"1", 2},
+	{"٤", 2},
+	{",", 1},
+	{"½", 2},
+	{"٥", 2},
+	{"©", 2},
+	{"¼", 2},
+	{"٦", 2},
+	{"¤", 2},
+	{"¹", 2},
+	{"٧", 2},
+	{"d", 2},
+	{"ª", 2},
+	{"٨", 2},
+	{"D", 2},
+	{"º", 2},
+	{"٩", 2},
+	{"ď", 2},
+	{"ø", 0},
+	{"۰", 0},
+	{"Ď", 2},
+	{"Ø", 2},
+	{"۱", 2},
+	{"°", 2},
+	{"õ", 2},
+	{"۲", 2},
+	{"δ", 2},
+	{"Õ", 2},
+	{"۳", 2},
+	{"∆", 2},
+	{"p", 1},
+	{"۴", 2},
+	{"¨", 2},
+	{"P", 2},
+	{"۵", 2},
+	{"÷", 0},
+	{"¶", 3},
+	{"۶", 2},
+	{"$", 2},
+	{"(", 3},
+	{"۷", 2},
+	{"˙", 2},
+	{")", 3},
+	{"۸", 2},
+	{"e", 0},
+	{"∂", 2},
+	{"۹", 2},
+	{"E", 2},
+	{"%", 2},
+	{"٪", 2},
+	{"é", 2},
+	{".", 0},
+	{"‌", 0},
+	{"É", 2},
+	{"·", 0},
+	{"‍", 0},
+	{"ě", 2},
+	{"φ", 3},
+	{"ê", 2},
+	{"Φ", 2},
+	{"Ê", 2},
+	{"π", 0},
+	{"ë", 2},
+	{"Π", 2},
+	{"Ë", 2},
+	{"+", 0},
+	{"è", 2},
+	{"±", 2},
+	{"È", 2},
+	{"∏", 2},
+	{"8", 2},
+	{"ψ", 3},
+	{"…", 0},
+	{"Ψ", 2},
+	{"ε", 0},
+	{"q", 1},
+	{"Ε", 2},
+	{"Q", 3},
+	{"=", 0},
+	{"?", 2},
+	{"η", 1},
+	{"¿", 1},
+	{"Η", 2},
+	{"\"", 2},
+	{"ð", 2},
+	{"‘", 2},
+	{"Ð", 2},
+	{"’", 2},
+	{"!", 2},
+	{"r", 0},
+	{"¡", 1},
+	{"R", 2},
+	{"f", 2},
+	{"√", 2},
+	{"F", 2},
+	{"ř", 2},
+	{"ff", 2},
+	{"Ř", 2},
+	{"ffi", 2},
+	{"®", 2},
+	{"ffj", 3},
+	{"ρ", 1},
+	{"ffl", 2},
+	{"Ρ", 2},
+	{"fi", 2},
+	{"˚", 2},
+	{"fi", 2},
+	{"s", 0},
+	{"5", 2},
+	{"S", 2},
+	{"fj", 3},
+	{"š", 2},
+	{"fl", 2},
+	{"Š", 2},
+	{"fl", 2},
+	{"§", 2},
+	{"ƒ", 3},
+	{";", 1},
+	{"4", 2},
+	{"7", 2},
+	{"⁄", 2},
+	{"σ", 0},
+	{"g", 1},
+	{"Σ", 2},
+	{"G", 2},
+	{"6", 2},
+	{"γ", 1},
+	{"/", 2},
+	{"Γ", 2},
+	{"£", 2},
+	{"ß", 2},
+	{"∑", 2},
+	{"`", 2},
+	{"t", 2},
+	{">", 0},
+	{"T", 2},
+	{"≥", 2},
+	{"τ", 0},
+	{"«", 0},
+	{"Τ", 2},
+	{"»", 0},
+	{"ť", 2},
+	{"h", 2},
+	{"Ť", 2},
+	{"H", 2},
+	{"Th", 2},
+	{"˝", 2},
+	{"θ", 2},
+	{"-", 0},
+	{"Θ", 2},
+	{"i", 2},
+	{"þ", 3},
+	{"I", 2},
+	{"Þ", 2},
+	{"í", 2},
+	{"3", 2},
+	{"Í", 2},
+	{"¾", 2},
+	{"î", 2},
+	{"³", 2},
+	{"Î", 2},
+	{"˜", 2},
+	{"ï", 2},
+	{"2", 2},
+};