ref: 50918a0b71ee777bbb3b7ba7bebb590aeb86f151
author: Ali Gholami Rudi <ali@rudi.ir>
date: Fri Sep 6 18:15:36 EDT 2013
mktrfn: otfdump and improved afm support
--- /dev/null
+++ b/Makefile
@@ -1,0 +1,11 @@
+CC = cc
+CFLAGS = -O2 -Wall
+LDFLAGS =
+
+all: mktrfn
+%.o: %.c
+ $(CC) -c $(CFLAGS) $<
+mktrfn: mktrfn.o trfn.o sbuf.o tab.o
+ $(CC) -o $@ $^ $(LDFLAGS)
+clean:
+ rm -f *.o mktrfn
--- /dev/null
+++ b/mktrfn.c
@@ -1,0 +1,120 @@
+/*
+ * mktrfn - produce troff font descriptions
+ *
+ * Copyright (C) 2012-2013 Ali Gholami Rudi <ali at rudi dot ir>
+ *
+ * This program is released under the Modified BSD license.
+ */
+#include <ctype.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "trfn.h"
+
+#define TOKLEN 256
+
+static void otfdump_read(void)
+{
+ char cmd[TOKLEN];
+ char name[TOKLEN];
+ char ch[TOKLEN];
+ char c1[TOKLEN], c2[TOKLEN];
+ char wid[TOKLEN];
+ while (scanf("%s", cmd) == 1) {
+ if (!strcmp("name", cmd)) {
+ scanf("%s", name);
+ trfn_psfont(name);
+ }
+ if (!strcmp("char", cmd)) {
+ scanf("%s width %s", ch, wid);
+ trfn_char(ch, NULL, atoi(wid), -1);
+ }
+ if (!strcmp("kernpair", cmd)) {
+ scanf("%s %s width %s", c1, c2, wid);
+ trfn_kern(c1, c2, atoi(wid));
+ }
+ if (!strcmp("feature", cmd)) {
+ scanf("%s substitution %s %s", name, c1, c2);
+ }
+ }
+}
+
+static void afm_read(void)
+{
+ char ch[TOKLEN], pos[TOKLEN];
+ char c1[TOKLEN], c2[TOKLEN];
+ char wid[TOKLEN];
+ char ln[1024];
+ while (fgets(ln, sizeof(ln), stdin)) {
+ if (ln[0] == '#')
+ continue;
+ if (!strncmp("FontName ", ln, 8)) {
+ sscanf(ln, "FontName %s", ch);
+ trfn_psfont(ch);
+ continue;
+ }
+ if (!strncmp("StartCharMetrics", ln, 16))
+ break;
+ }
+ while (fgets(ln, sizeof(ln), stdin)) {
+ if (ln[0] == '#')
+ continue;
+ if (!strncmp("EndCharMetrics", ln, 14))
+ break;
+ if (sscanf(ln, "C %s ; WX %s ; N %s", pos, wid, ch) == 3)
+ trfn_char(ch, pos, atoi(wid), -1);
+ }
+ while (fgets(ln, sizeof(ln), stdin)) {
+ if (ln[0] == '#')
+ continue;
+ if (!strncmp("StartKernPairs", ln, 14))
+ break;
+ }
+ while (fgets(ln, sizeof(ln), stdin)) {
+ if (ln[0] == '#')
+ continue;
+ if (!strncmp("EndKernPairs", ln, 12))
+ break;
+ if (sscanf(ln, "KPX %s %s %s", c1, c2, wid) == 3)
+ trfn_kern(c1, c2, atoi(wid));
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int afm = 0;
+ int i = 1;
+ int res = 720;
+ for (i = 1; i < argc && argv[i][0] == '-'; i++) {
+ switch (argv[i][1]) {
+ case 'a':
+ afm = 1;
+ break;
+ case 'o':
+ afm = 0;
+ break;
+ case 'r':
+ res = atoi(argv[i][2] ? argv[i] + 2 : argv[++i]);
+ break;
+ case 't':
+ trfn_trfont(argv[i][2] ? argv[i] + 2 : argv[++i]);
+ break;
+ case 'p':
+ trfn_psfont(argv[i][2] ? argv[i] + 2 : argv[++i]);
+ break;
+ default:
+ printf("usage: mktrfn -a -o -t name -p psname\n");
+ return 0;
+ }
+ }
+ trfn_init(res);
+ if (afm)
+ afm_read();
+ else
+ otfdump_read();
+ trfn_print();
+ trfn_done();
+ return 0;
+}
--- /dev/null
+++ b/sbuf.c
@@ -1,0 +1,84 @@
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "sbuf.h"
+
+#define MAX(a, b) ((a) < (b) ? (b) : (a))
+#define SBUF_SZ 512
+
+static void sbuf_extend(struct sbuf *sbuf, int amount)
+{
+ char *s = sbuf->s;
+ sbuf->sz = (MAX(1, amount) + SBUF_SZ - 1) & ~(SBUF_SZ - 1);
+ sbuf->s = malloc(sbuf->sz);
+ if (sbuf->n)
+ memcpy(sbuf->s, s, sbuf->n);
+ free(s);
+}
+
+void sbuf_init(struct sbuf *sbuf)
+{
+ memset(sbuf, 0, sizeof(*sbuf));
+ sbuf_extend(sbuf, SBUF_SZ);
+}
+
+void sbuf_add(struct sbuf *sbuf, int c)
+{
+ if (sbuf->n + 2 >= sbuf->sz)
+ sbuf_extend(sbuf, sbuf->sz * 2);
+ sbuf->s[sbuf->n++] = c;
+}
+
+void sbuf_append(struct sbuf *sbuf, char *s)
+{
+ int len = strlen(s);
+ if (sbuf->n + len + 1 >= sbuf->sz)
+ sbuf_extend(sbuf, sbuf->n + len + 1);
+ memcpy(sbuf->s + sbuf->n, s, len);
+ sbuf->n += len;
+}
+
+void sbuf_printf(struct sbuf *sbuf, char *s, ...)
+{
+ char buf[1024];
+ va_list ap;
+ va_start(ap, s);
+ vsprintf(buf, s, ap);
+ va_end(ap);
+ sbuf_append(sbuf, buf);
+}
+
+void sbuf_putnl(struct sbuf *sbuf)
+{
+ if (sbuf->n && sbuf->s[sbuf->n - 1] != '\n')
+ sbuf_add(sbuf, '\n');
+}
+
+int sbuf_empty(struct sbuf *sbuf)
+{
+ return !sbuf->n;
+}
+
+char *sbuf_buf(struct sbuf *sbuf)
+{
+ sbuf->s[sbuf->n] = '\0';
+ return sbuf->s;
+}
+
+int sbuf_len(struct sbuf *sbuf)
+{
+ return sbuf->n;
+}
+
+/* shorten the sbuf */
+void sbuf_cut(struct sbuf *sbuf, int n)
+{
+ if (sbuf->n > n)
+ sbuf->n = n;
+}
+
+void sbuf_done(struct sbuf *sbuf)
+{
+ free(sbuf->s);
+}
--- /dev/null
+++ b/sbuf.h
@@ -1,0 +1,11 @@
+/* variable length string buffer */
+struct sbuf {
+ char *s; /* allocated buffer */
+ int sz; /* buffer size */
+ int n; /* length of the string stored in s */
+};
+
+void sbuf_init(struct sbuf *sbuf);
+void sbuf_done(struct sbuf *sbuf);
+char *sbuf_buf(struct sbuf *sbuf);
+void sbuf_printf(struct sbuf *sbuf, char *s, ...);
--- /dev/null
+++ b/tab.c
@@ -1,0 +1,52 @@
+#include <stdlib.h>
+#include <string.h>
+#include "tab.h"
+
+struct tab {
+ char **keys;
+ void **vals;
+ int n;
+ int *next;
+ int head[256];
+};
+
+struct tab *tab_alloc(int sz)
+{
+ struct tab *tab = malloc(sizeof(*tab));
+ int i;
+ memset(tab, 0, sizeof(*tab));
+ tab->keys = malloc(sz * sizeof(tab->keys[0]));
+ tab->vals = malloc(sz * sizeof(tab->vals[0]));
+ tab->next = malloc(sz * sizeof(tab->next[0]));
+ for (i = 0; i < 256; i++)
+ tab->head[i] = -1;
+ return tab;
+}
+
+void tab_free(struct tab *tab)
+{
+ free(tab->keys);
+ free(tab->vals);
+ free(tab->next);
+ free(tab);
+}
+
+void tab_put(struct tab *tab, char *k, void *v)
+{
+ tab->keys[tab->n] = k;
+ tab->vals[tab->n] = v;
+ tab->next[tab->n] = tab->head[(unsigned char) k[0]];
+ tab->head[(unsigned char) k[0]] = tab->n;
+ tab->n++;
+}
+
+void *tab_get(struct tab *tab, char *k)
+{
+ int i = tab->head[(unsigned char) k[0]];
+ while (i >= 0) {
+ if (k[1] == tab->keys[i][1] && !strcmp(k, tab->keys[i]))
+ return tab->vals[i];
+ i = tab->next[i];
+ }
+ return NULL;
+}
--- /dev/null
+++ b/tab.h
@@ -1,0 +1,4 @@
+struct tab *tab_alloc(int sz);
+void tab_free(struct tab *tab);
+void tab_put(struct tab *tab, char *k, void *v);
+void *tab_get(struct tab *tab, char *k);
--- /dev/null
+++ b/trfn.c
@@ -1,0 +1,262 @@
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "sbuf.h"
+#include "tab.h"
+#include "trfn.h"
+#include "trfn_ch.h"
+
+#define WX(w) (((w) < 0 ? (w) - trfn_div / 2 : (w) + trfn_div / 2) / trfn_div)
+#define LEN(a) ((sizeof(a) / sizeof((a)[0])))
+#define HEXDIGS "0123456789abcdef"
+#define GNLEN 32
+#define AGLLEN (8 * 1024)
+
+static struct sbuf sbuf_char; /* charset section */
+static struct sbuf sbuf_kern; /* kernpairs section */
+static int trfn_div; /* divisor of widths */
+static int trfn_swid; /* space width */
+static char trfn_ligs[1024]; /* font ligatures */
+static char trfn_trname[256]; /* font troff name */
+static char trfn_psname[256]; /* font ps name */
+
+/* adobe glyphlist mapping */
+static char agl_key[AGLLEN][GNLEN];
+static char agl_val[AGLLEN][GNLEN];
+static int agl_n;
+
+/* lookup tables */
+static struct tab *tab_agl;
+static struct tab *tab_alts;
+static struct tab *tab_ctyp;
+
+static void pututf8(char **d, int c)
+{
+ int l;
+ if (c > 0xffff) {
+ *(*d)++ = 0xf0 | (c >> 18);
+ l = 3;
+ } else if (c > 0x7ff) {
+ *(*d)++ = 0xe0 | (c >> 12);
+ l = 2;
+ } else if (c > 0x7f) {
+ *(*d)++ = 0xc0 | (c >> 6);
+ l = 1;
+ } else {
+ *(*d)++ = c > 0 ? c : ' ';
+ l = 0;
+ }
+ while (l--)
+ *(*d)++ = 0x80 | ((c >> (l * 6)) & 0x3f);
+ **d = '\0';
+}
+
+static int hexval(char *s, int len)
+{
+ char *digs = HEXDIGS;
+ int n = 0;
+ int i;
+ for (i = 0; i < len; i++) {
+ if (s[i] && strchr(digs, tolower(s[i])))
+ n = n * 16 + (strchr(digs, tolower(s[i])) - digs);
+ else
+ break;
+ }
+ return len == 1 ? n << 4 : n;
+}
+
+static int agl_read(char *path)
+{
+ FILE *fin = fopen(path, "r");
+ char ln[GNLEN * 8];
+ char val[GNLEN];
+ char *s, *d;
+ int i;
+ if (!fin)
+ return 1;
+ while (fgets(ln, sizeof(ln), fin)) {
+ s = strchr(ln, ';');
+ if (ln[0] == '#' || !s)
+ continue;
+ *s++ = '\0';
+ d = val;
+ while (s && *s) {
+ while (*s == ' ')
+ s++;
+ pututf8(&d, hexval(s, 6));
+ s = strchr(s, ' ');
+ }
+ *d = '\0';
+ strcpy(agl_key[agl_n], ln);
+ strcpy(agl_val[agl_n], val);
+ agl_n++;
+ }
+ fclose(fin);
+ tab_agl = tab_alloc(agl_n);
+ for (i = 0; i < agl_n; i++)
+ tab_put(tab_agl, agl_key[i], agl_val[i]);
+ return 0;
+}
+
+static char *agl_map(char *s)
+{
+ return tab_get(tab_agl, s);
+}
+
+static int achar_map(char *name)
+{
+ int i;
+ for (i = 0; i < LEN(achars); i++) {
+ struct achar *a = &achars[i];
+ if (!strncmp(a->name, name, strlen(a->name))) {
+ char *postfix = name + strlen(a->name);
+ if (!*postfix)
+ return a->c;
+ if (!strcmp("isolated", postfix))
+ return a->s ? a->s : a->c;
+ if (!strcmp("initial", postfix))
+ return a->i ? a->i : a->c;
+ if (!strcmp("medial", postfix))
+ return a->m ? a->m : a->c;
+ if (!strcmp("final", postfix))
+ return a->f ? a->f : a->c;
+ }
+ }
+ return 0;
+}
+
+static int trfn_name(char *dst, char *src)
+{
+ char ch[GNLEN];
+ char *s;
+ int i;
+ if (src[0] == '.')
+ return 1;
+ if (src[1] && strchr(src, '.'))
+ return 1; /* ignore opentype features for now */
+ while (*src && *src != '.') {
+ s = ch;
+ if (src[0] == '_')
+ src++;
+ while (*src && *src != '_' && *src != '.')
+ *s++ = *src++;
+ *s = '\0';
+ if (agl_map(ch)) {
+ strcpy(dst, agl_map(ch));
+ for (i = 0; i < LEN(agl_exceptions); i++)
+ if (!strcmp(agl_exceptions[i][0], dst))
+ strcpy(dst, agl_exceptions[i][1]);
+ dst = strchr(dst, '\0');
+ } else if (ch[0] == 'u' && ch[1] == 'n' && ch[2] == 'i') {
+ for (i = 0; strlen(ch + 3 + 4 * i) >= 4; i++)
+ pututf8(&dst, hexval(ch + 3 + 4 * i, 4));
+ } else if (ch[0] == 'u' && ch[1] && strchr(HEXDIGS, tolower(ch[1]))) {
+ pututf8(&dst, hexval(ch + 1, 6));
+ } else if (achar_map(ch)) {
+ pututf8(&dst, achar_map(ch));
+ } else {
+ return 1;
+ }
+ }
+ return src[0];
+}
+
+static void trfn_lig(char *c)
+{
+ int i;
+ for (i = 0; i < LEN(ligs); i++)
+ if (!strcmp(ligs[i], c))
+ sprintf(strchr(trfn_ligs, '\0'), "%s ", c);
+}
+
+static int trfn_type(char *c)
+{
+ struct ctype *t = tab_get(tab_ctyp, c);
+ return t ? t->type : 3;
+}
+
+void trfn_char(char *c, char *n, int wid, int typ)
+{
+ char uc[GNLEN];
+ char pos[GNLEN];
+ char **a;
+ if (trfn_name(uc, c))
+ strcpy(uc, "---");
+ if (strchr(uc, ' ')) { /* space not allowed in character names */
+ if (!trfn_swid && !strcmp(" ", uc))
+ trfn_swid = WX(wid);
+ return;
+ }
+ trfn_lig(uc);
+ if (typ < 0)
+ typ = trfn_type(uc);
+ strcpy(pos, c);
+ if (n && atoi(n) >= 0 && atoi(n) < 256)
+ strcpy(pos, n);
+ if (!n && !uc[1] && uc[0] >= 32 && uc[0] <= 125)
+ sprintf(pos, "%d", uc[0]);
+ sbuf_printf(&sbuf_char, "%s\t%d\t%d\t%s\n", uc, WX(wid), typ, pos);
+ a = tab_get(tab_alts, uc);
+ while (a && *a)
+ sbuf_printf(&sbuf_char, "%s\t\"\n", *a++);
+}
+
+void trfn_kern(char *c1, char *c2, int x)
+{
+ char g1[GNLEN], g2[GNLEN];
+ if (!trfn_name(g1, c1) && !trfn_name(g2, c2) && abs(WX(x)) > WX(20))
+ if (!strchr(g1, ' ') && !strchr(g2, ' '))
+ sbuf_printf(&sbuf_kern, "%s\t%s\t%d\n", g1, g2, WX(x));
+}
+
+void trfn_trfont(char *name)
+{
+ if (!trfn_trname[0])
+ strcpy(trfn_trname, name);
+}
+
+void trfn_psfont(char *name)
+{
+ if (!trfn_psname[0])
+ strcpy(trfn_psname, name);
+}
+
+void trfn_print(void)
+{
+ if (trfn_trname[0])
+ printf("name %s\n", trfn_trname);
+ if (trfn_psname[0])
+ printf("fontname %s\n", trfn_psname);
+ printf("spacewidth %d\n", trfn_swid);
+ printf("ligatures %s 0\n", trfn_ligs);
+ printf("charset\n");
+ printf("%s", sbuf_buf(&sbuf_char));
+ printf("kernpairs\n");
+ printf("%s", sbuf_buf(&sbuf_kern));
+}
+
+void trfn_init(int res)
+{
+ int i;
+ trfn_div = 7200 / res;
+ agl_read("glyphlist.txt");
+ sbuf_init(&sbuf_char);
+ sbuf_init(&sbuf_kern);
+ tab_alts = tab_alloc(LEN(alts));
+ tab_ctyp = tab_alloc(LEN(ctype));
+ for (i = 0; i < LEN(alts); i++)
+ tab_put(tab_alts, alts[i][0], alts[i] + 1);
+ for (i = 0; i < LEN(ctype); i++)
+ tab_put(tab_ctyp, ctype[i].ch, &ctype[i]);
+}
+
+void trfn_done(void)
+{
+ sbuf_done(&sbuf_char);
+ sbuf_done(&sbuf_kern);
+ tab_free(tab_alts);
+ tab_free(tab_ctyp);
+ if (tab_agl)
+ tab_free(tab_agl);
+}
--- /dev/null
+++ b/trfn.h
@@ -1,0 +1,7 @@
+void trfn_init(int res);
+void trfn_done(void);
+void trfn_trfont(char *name);
+void trfn_psfont(char *fontname);
+void trfn_print(void);
+void trfn_char(char *c, char *n, int wid, int typ);
+void trfn_kern(char *c1, char *c2, int x);
--- /dev/null
+++ b/trfn_ch.h
@@ -1,0 +1,688 @@
+/* ligatures */
+static char *ligs[] = {
+ "fh", "fi", "fj", "fk", "fl", "ff", "ffi", "ffj", "ffl", "fft", "ft", "Th"
+};
+
+/* AGL exceptions; expanding ligatures */
+static char *agl_exceptions[][2] = {
+ {"ff", "ff"},
+ {"fi", "fi"},
+ {"fl", "fl"},
+ {"ffi", "ffi"},
+ {"ffl", "ffl"},
+ {"st", "st"},
+};
+
+/* troff aliases */
+static char *alts[][8] = {
+ {"\\", "bs"},
+ {"`", "ga"},
+ {"¡", "!!"},
+ {"¢", "c|", "ct"},
+ {"£", "L-", "ps"},
+ {"¤", "xo", "cr"},
+ {"¥", "Y-", "yn"},
+ {"¦", "||"},
+ {"§", "so", "sc"},
+ {"¨", "\"\"", ":a"},
+ {"©", "co"},
+ {"ª", "a_"},
+ {"«", "<<"},
+ {"»", ">>"},
+ {"¬", "-,"},
+ {"¬", "-,"},
+ {"-", "hy"},
+ {"−", "--"},
+ {"®", "ro", "rg"},
+ {"¯", "a^", "-a"},
+ {"°", "0^"},
+ {"±", "+-"},
+ {"²", "2^"},
+ {"³", "3^"},
+ {"´", "\\'", "aa"},
+ {"µ", "mu"},
+ {"¶", "P!", "pg"},
+ {"·", ".^"},
+ {"¸", ",,", ",a"},
+ {"¹", "1^"},
+ {"º", "o_"},
+ {"»", ">>"},
+ {"¼", "14"},
+ {"½", "12"},
+ {"¾", "34"},
+ {"¿", "??"},
+ {"À", "A`"},
+ {"Á", "A'"},
+ {"Â", "A^"},
+ {"Ã", "A~"},
+ {"Ä", "A:", "A\""},
+ {"Å", "A*"},
+ {"Æ", "AE"},
+ {"Ç", "C,"},
+ {"Č", "C<"},
+ {"È", "E`"},
+ {"É", "E'"},
+ {"Ê", "E^"},
+ {"Ë", "E:", "E\""},
+ {"Ì", "I`"},
+ {"Í", "I'"},
+ {"Î", "I^"},
+ {"Ï", "I:", "I\""},
+ {"Ð", "D-"},
+ {"Ď", "D<"},
+ {"Ñ", "N~"},
+ {"Ò", "O`"},
+ {"Ó", "O'"},
+ {"Ô", "O^"},
+ {"Õ", "O~"},
+ {"Ö", "O:"},
+ {"Ő", "O\""},
+ {"×", "xx"},
+ {"Ø", "O/"},
+ {"Ř", "R<"},
+ {"Š", "S<"},
+ {"Ť", "T<"},
+ {"Ù", "U`"},
+ {"Ú", "U'"},
+ {"Û", "U^"},
+ {"Ü", "U:"},
+ {"Ű", "U\""},
+ {"Ů", "U0"},
+ {"Ý", "Y'"},
+ {"Ÿ", "Y:", "Y\""},
+ {"Ž", "Z<"},
+ {"Þ", "TH"},
+ {"ß", "ss"},
+ {"à", "a`"},
+ {"á", "a'"},
+ {"â", "a^"},
+ {"ã", "a~"},
+ {"ä", "a:", "a\""},
+ {"å", "a*"},
+ {"æ", "ae"},
+ {"ç", "c,"},
+ {"č", "c<"},
+ {"ď", "d<"},
+ {"è", "e`"},
+ {"é", "e'"},
+ {"ê", "e^"},
+ {"ë", "e:", "e\""},
+ {"ě", "e<"},
+ {"ì", "i`"},
+ {"í", "i'"},
+ {"î", "i^"},
+ {"ï", "i\""},
+ {"ð", "d-"},
+ {"ñ", "n~"},
+ {"ň", "n<"},
+ {"ò", "o`"},
+ {"ó", "o'"},
+ {"ô", "o^"},
+ {"õ", "o~"},
+ {"ö", "o:"},
+ {"ő", "o\""},
+ {"÷", "-:"},
+ {"ø", "o/"},
+ {"ř", "r<"},
+ {"š", "s<"},
+ {"ť", "t<"},
+ {"ù", "u`"},
+ {"ú", "u'"},
+ {"û", "u^"},
+ {"ü", "u:"},
+ {"ű", "u\""},
+ {"ů", "u0"},
+ {"ý", "y'"},
+ {"ž", "z<"},
+ {"þ", "th"},
+ {"ÿ", "y:", "y\""},
+ {"˘", "Ua"},
+ {"˙", ".a"},
+ {"˚", "oa"},
+ {"˝", "\"a"},
+ {"˛", "Ca"},
+ {"ˇ", "va"},
+ {"∀", "fa"},
+ {"∃", "te"},
+ {"∋", "st"},
+ {"∗", "**"},
+ {"+", "pl"},
+ {"−", "mi"},
+ {"/", "sl"},
+ {"=", "eq"},
+ {"≅", "cg"},
+ {"Α", "*A"},
+ {"Β", "*B"},
+ {"Χ", "*X"},
+ {"∆", "*D"},
+ {"Ε", "*E"},
+ {"Φ", "*F"},
+ {"Γ", "*G"},
+ {"Η", "*Y"},
+ {"Ι", "*I"},
+ {"Κ", "*K"},
+ {"Λ", "*L"},
+ {"Μ", "*M"},
+ {"Ν", "*N"},
+ {"Ο", "*O"},
+ {"Π", "*P"},
+ {"Θ", "*H"},
+ {"Ρ", "*R"},
+ {"Σ", "*S"},
+ {"Τ", "*T"},
+ {"Υ", "*U"},
+ {"ς", "ts"},
+ {"Ω", "*W"},
+ {"Ξ", "*C"},
+ {"Ψ", "*Q"},
+ {"Ζ", "*Z"},
+ {"∴", "tf"},
+ {"⊥", "pp"},
+ {"", "rn"},
+ {"α", "*a"},
+ {"β", "*b"},
+ {"χ", "*x"},
+ {"δ", "*d"},
+ {"ε", "*e"},
+ {"φ", "*f"},
+ {"γ", "*g"},
+ {"η", "*y"},
+ {"ι", "*i"},
+ {"κ", "*k"},
+ {"λ", "*l"},
+ {"μ", "*m"},
+ {"ν", "*n"},
+ {"ο", "*o"},
+ {"π", "*p"},
+ {"θ", "*h"},
+ {"ρ", "*r"},
+ {"σ", "*s"},
+ {"τ", "*t"},
+ {"υ", "*u"},
+ {"ω", "*w"},
+ {"ξ", "*c"},
+ {"ψ", "*q"},
+ {"ζ", "*z"},
+ {"|", "or"},
+ {"∼", "ap"},
+ {"′", "fm"},
+ {"≤", "<="},
+ {"⁄", "fr"},
+ {"∞", "if"},
+ {"ƒ", "fn",},
+ {"↔", "ab"},
+ {"←", "<-"},
+ {"↑", "ua"},
+ {"→", "->"},
+ {"↓", "da"},
+ {"°", "de"},
+ {"±", "+-"},
+ {"≥", ">="},
+ {"×", "mu"},
+ {"∝", "pt"},
+ {"∂", "pd"},
+ {"•", "bu"},
+ {"÷", "di"},
+ {"≠", "!="},
+ {"≡", "=="},
+ {"≈", "~~"},
+ {"…", "el"},
+ {"", "av"},
+ {"", "ah"},
+ {"↵", "CR"},
+ {"ℵ", "af"},
+ {"ℑ", "If"},
+ {"ℜ", "Rf"},
+ {"ℛ", "ws"},
+ {"⊗", "Ox"},
+ {"⊕", "O+"},
+ {"∅", "es"},
+ {"∩", "ca"},
+ {"∪", "cu"},
+ {"⊃", "sp"},
+ {"⊇", "ip"},
+ {"⊄", "!b"},
+ {"⊂", "sb"},
+ {"⊆", "ib"},
+ {"∈", "mo"},
+ {"∉", "!m"},
+ {"∠", "an"},
+ {"∇", "gr"},
+ {"", "rg"},
+ {"", "co"},
+ {"", "tm"},
+ {"∏", "pr"},
+ {"√", "sr"},
+ {"⋅", "c."},
+ {"¬", "no"},
+ {"∧", "l&"},
+ {"∨", "l|"},
+ {"◊", "lz"},
+ {"〈", "b<"},
+ {"", "RG"},
+ {"", "CO"},
+ {"", "TM"},
+ {"∑", "su"},
+ {"", "LT"},
+ {"", "br", "LX"},
+ {"", "LB"},
+ {"", "lc"},
+ {"", "lx"},
+ {"", "lf"},
+ {"", "lt"},
+ {"", "lk"},
+ {"", "lb"},
+ {"", "bv", "|",},
+ {"〉", "b>"},
+ {"∫", "is"},
+ {"", "RT"},
+ {"", "RX"},
+ {"", "RB"},
+ {"", "rc"},
+ {"", "rx"},
+ {"", "rf"},
+ {"", "rt"},
+ {"", "rk"},
+ {"", "rb"},
+ {"ff", "ff"},
+ {"fi", "fi"},
+ {"fl", "fl"},
+ {"ffi", "ffi"},
+ {"ffl", "ffl"},
+ {"st", "st"},
+};
+
+/* different shapes of arabic and farsi characters */
+static struct achar {
+ char *name;
+ unsigned c;
+ unsigned s;
+ unsigned i;
+ unsigned m;
+ unsigned f;
+} achars[] = {
+ {"hamza", 0x0621, 0xfe80},
+ {"alefwithmaddaabove", 0x0622, 0xfe81, 0, 0, 0xfe82},
+ {"alefwithhamzaabove", 0x0623, 0xfe83, 0, 0, 0xfe84},
+ {"wawwithhamzaabove", 0x0624, 0xfe85, 0, 0, 0xfe86},
+ {"alefwithhamzabelow", 0x0625, 0xfe87, 0, 0, 0xfe88},
+ {"yehwithhamzaabove", 0x0626, 0xfe89, 0xfe8b, 0xfe8c, 0xfe8a},
+ {"alef", 0x0627, 0xfe8d, 0, 0, 0xfe8e},
+ {"arabicalef", 0x0627},
+ {"beh", 0x0628, 0xfe8f, 0xfe91, 0xfe92, 0xfe90},
+ {"tehmarbuta", 0x0629, 0xfe93, 0, 0, 0xfe94},
+ {"teh", 0x062a, 0xfe95, 0xfe97, 0xfe98, 0xfe96},
+ {"theh", 0x062b, 0xfe99, 0xfe9b, 0xfe9c, 0xfe9a},
+ {"jeem", 0x062c, 0xfe9d, 0xfe9f, 0xfea0, 0xfe9e},
+ {"hah", 0x062d, 0xfea1, 0xfea3, 0xfea4, 0xfea2},
+ {"khah", 0x062e, 0xfea5, 0xfea7, 0xfea8, 0xfea6},
+ {"dal", 0x062f, 0xfea9, 0, 0, 0xfeaa},
+ {"thal", 0x0630, 0xfeab, 0, 0, 0xfeac},
+ {"reh", 0x0631, 0xfead, 0, 0, 0xfeae},
+ {"zain", 0x0632, 0xfeaf, 0, 0, 0xfeb0},
+ {"seen", 0x0633, 0xfeb1, 0xfeb3, 0xfeb4, 0xfeb2},
+ {"sheen", 0x0634, 0xfeb5, 0xfeb7, 0xfeb8, 0xfeb6},
+ {"sad", 0x0635, 0xfeb9, 0xfebb, 0xfebc, 0xfeba},
+ {"dad", 0x0636, 0xfebd, 0xfebf, 0xfec0, 0xfebe},
+ {"tah", 0x0637, 0xfec1, 0xfec3, 0xfec4, 0xfec2},
+ {"zah", 0x0638, 0xfec5, 0xfec7, 0xfec8, 0xfec6},
+ {"ain", 0x0639, 0xfec9, 0xfecb, 0xfecc, 0xfeca},
+ {"ghain", 0x063a, 0xfecd, 0xfecf, 0xfed0, 0xfece},
+ {"tatweel", 0x0640},
+ {"feh", 0x0641, 0xfed1, 0xfed3, 0xfed4, 0xfed2},
+ {"qaf", 0x0642, 0xfed5, 0xfed7, 0xfed8, 0xfed6},
+ {"kaf", 0x0643, 0xfed9, 0xfedb, 0xfedc, 0xfeda},
+ {"lam", 0x0644, 0xfedd, 0xfedf, 0xfee0, 0xfede},
+ {"meem", 0x0645, 0xfee1, 0xfee3, 0xfee4, 0xfee2},
+ {"noon", 0x0646, 0xfee5, 0xfee7, 0xfee8, 0xfee6},
+ {"heh", 0x0647, 0xfee9, 0xfeeb, 0xfeec, 0xfeea},
+ {"waw", 0x0648, 0xfeed, 0, 0, 0xfeee},
+ {"alefmaksura", 0x0649, 0xfeef, 0, 0, 0xfef0},
+ {"yeh", 0x064a, 0xfef1, 0xfef3, 0xfef4, 0xfef2},
+ {"fathatan", 0x064b, 0xfe70},
+ {"dammatan", 0x064c, 0xfe72},
+ {"kasratan", 0x064d, 0xfe74},
+ {"fatha", 0x064e, 0xfe76, 0, 0xfe77, 0},
+ {"damma", 0x064f, 0xfe78, 0, 0xfe79, 0},
+ {"kasra", 0x0650, 0xfe7a, 0, 0xfe7b, 0},
+ {"shadda", 0x0651, 0xfe7c, 0, 0xfe7c, 0},
+ {"sukun", 0x0652, 0xfe7e, 0, 0xfe7f, 0},
+ {"peh", 0x067e, 0xfb56, 0xfb58, 0xfb59, 0xfb57},
+ {"tcheh", 0x0686, 0xfb7a, 0xfb7c, 0xfb7d, 0xfb7b},
+ {"jeh", 0x0698, 0xfb8a, 0, 0, 0xfb8b},
+ {"keheh", 0x06a9, 0xfb8e, 0xfb90, 0xfb91, 0xfb8f},
+ {"gaf", 0x06af, 0xfb92, 0xfb94, 0xfb95, 0xfb93},
+ {"farsiyeh", 0x06cc, 0xfbfc, 0xfbfe, 0xfbff, 0xfbfd},
+ {"lamwithalef", 0xfefb, 0xfefb, 0, 0, 0xfefc},
+};
+
+static struct ctype {
+ char *ch; /* character name */
+ int type; /* ascender/descender type */
+} ctype[] = {
+ {"Ï", 2},
+ {"²", 2},
+ {"ì", 2},
+ {"u", 0},
+ {"Ì", 2},
+ {"U", 2},
+ {"∞", 0},
+ {"ú", 2},
+ {"∫", 3},
+ {"Ú", 2},
+ {"a", 0},
+ {"ι", 0},
+ {"û", 2},
+ {"A", 2},
+ {"Ι", 2},
+ {"Û", 2},
+ {"á", 2},
+ {"j", 3},
+ {"ü", 2},
+ {"Á", 2},
+ {"J", 3},
+ {"Ü", 2},
+ {"â", 2},
+ {"k", 2},
+ {"ù", 2},
+ {"Â", 2},
+ {"K", 2},
+ {"Ù", 2},
+ {"´", 2},
+ {"κ", 0},
+ {"ű", 2},
+ {"ä", 2},
+ {"Κ", 2},
+ {"Ű", 2},
+ {"Ä", 2},
+ {"l", 2},
+ {"_", 1},
+ {"æ", 0},
+ {"L", 2},
+ {"υ", 0},
+ {"Æ", 2},
+ {"λ", 2},
+ {"Υ", 2},
+ {"à", 2},
+ {"Λ", 2},
+ {"ů", 2},
+ {"À", 2},
+ {"<", 0},
+ {"Ů", 2},
+ {"α", 0},
+ {"≤", 2},
+ {"v", 0},
+ {"Α", 2},
+ {"¬", 0},
+ {"V", 2},
+ {"&", 2},
+ {"◊", 2},
+ {"w", 0},
+ {"≈", 0},
+ {"m", 0},
+ {"W", 2},
+ {"å", 2},
+ {"M", 2},
+ {"x", 0},
+ {"Å", 2},
+ {"¯", 2},
+ {"X", 2},
+ {"^", 2},
+ {"−", 0},
+ {"ξ", 3},
+ {"~", 0},
+ {"µ", 1},
+ {"Ξ", 2},
+ {"*", 2},
+ {"Μ", 2},
+ {"y", 1},
+ {"@", 3},
+ {"×", 0},
+ {"Y", 2},
+ {"ã", 2},
+ {"n", 0},
+ {"ý", 3},
+ {"Ã", 2},
+ {"N", 2},
+ {"Ý", 2},
+ {"b", 2},
+ {"ň", 2},
+ {"ÿ", 3},
+ {"B", 2},
+ {"9", 2},
+ {"¥", 2},
+ {"\\", 2},
+ {"≠", 0},
+ {"z", 0},
+ {"|", 3},
+ {"ñ", 2},
+ {"Z", 2},
+ {"β", 3},
+ {"Ñ", 2},
+ {"ž", 2},
+ {"Β", 2},
+ {"ν", 0},
+ {"Ž", 2},
+ {"{", 3},
+ {"Ν", 2},
+ {"0", 2},
+ {"}", 3},
+ {"#", 2},
+ {"ζ", 3},
+ {"[", 3},
+ {"o", 0},
+ {"Ζ", 2},
+ {"]", 3},
+ {"O", 2},
+ {"", 3},
+ {"˘", 2},
+ {"ó", 2},
+ {"", 2},
+ {"¦", 3},
+ {"Ó", 2},
+ {"", 3},
+ {"•", 0},
+ {"ô", 2},
+ {"", 2},
+ {"c", 0},
+ {"Ô", 2},
+ {"", 2},
+ {"C", 2},
+ {"ö", 2},
+ {"", 2},
+ {"ˇ", 2},
+ {"Ö", 2},
+ {"", 2},
+ {"č", 2},
+ {"˛", 1},
+ {"", 2},
+ {"Č", 2},
+ {"ò", 2},
+ {"", 2},
+ {"ç", 1},
+ {"Ò", 2},
+ {"،", 0},
+ {"Ç", 3},
+ {"ő", 2},
+ {"؛", 2},
+ {"¸", 1},
+ {"Ő", 2},
+ {"؟", 2},
+ {"¢", 2},
+ {"ω", 0},
+ {"٠", 0},
+ {"χ", 1},
+ {"Ω", 2},
+ {"١", 2},
+ {"Χ", 2},
+ {"ο", 0},
+ {"٢", 2},
+ {"ˆ", 2},
+ {"Ο", 2},
+ {"٣", 2},
+ {":", 0},
+ {"1", 2},
+ {"٤", 2},
+ {",", 1},
+ {"½", 2},
+ {"٥", 2},
+ {"©", 2},
+ {"¼", 2},
+ {"٦", 2},
+ {"¤", 2},
+ {"¹", 2},
+ {"٧", 2},
+ {"d", 2},
+ {"ª", 2},
+ {"٨", 2},
+ {"D", 2},
+ {"º", 2},
+ {"٩", 2},
+ {"ď", 2},
+ {"ø", 0},
+ {"۰", 0},
+ {"Ď", 2},
+ {"Ø", 2},
+ {"۱", 2},
+ {"°", 2},
+ {"õ", 2},
+ {"۲", 2},
+ {"δ", 2},
+ {"Õ", 2},
+ {"۳", 2},
+ {"∆", 2},
+ {"p", 1},
+ {"۴", 2},
+ {"¨", 2},
+ {"P", 2},
+ {"۵", 2},
+ {"÷", 0},
+ {"¶", 3},
+ {"۶", 2},
+ {"$", 2},
+ {"(", 3},
+ {"۷", 2},
+ {"˙", 2},
+ {")", 3},
+ {"۸", 2},
+ {"e", 0},
+ {"∂", 2},
+ {"۹", 2},
+ {"E", 2},
+ {"%", 2},
+ {"٪", 2},
+ {"é", 2},
+ {".", 0},
+ {"", 0},
+ {"É", 2},
+ {"·", 0},
+ {"", 0},
+ {"ě", 2},
+ {"φ", 3},
+ {"ê", 2},
+ {"Φ", 2},
+ {"Ê", 2},
+ {"π", 0},
+ {"ë", 2},
+ {"Π", 2},
+ {"Ë", 2},
+ {"+", 0},
+ {"è", 2},
+ {"±", 2},
+ {"È", 2},
+ {"∏", 2},
+ {"8", 2},
+ {"ψ", 3},
+ {"…", 0},
+ {"Ψ", 2},
+ {"ε", 0},
+ {"q", 1},
+ {"Ε", 2},
+ {"Q", 3},
+ {"=", 0},
+ {"?", 2},
+ {"η", 1},
+ {"¿", 1},
+ {"Η", 2},
+ {"\"", 2},
+ {"ð", 2},
+ {"‘", 2},
+ {"Ð", 2},
+ {"’", 2},
+ {"!", 2},
+ {"r", 0},
+ {"¡", 1},
+ {"R", 2},
+ {"f", 2},
+ {"√", 2},
+ {"F", 2},
+ {"ř", 2},
+ {"ff", 2},
+ {"Ř", 2},
+ {"ffi", 2},
+ {"®", 2},
+ {"ffj", 3},
+ {"ρ", 1},
+ {"ffl", 2},
+ {"Ρ", 2},
+ {"fi", 2},
+ {"˚", 2},
+ {"fi", 2},
+ {"s", 0},
+ {"5", 2},
+ {"S", 2},
+ {"fj", 3},
+ {"š", 2},
+ {"fl", 2},
+ {"Š", 2},
+ {"fl", 2},
+ {"§", 2},
+ {"ƒ", 3},
+ {";", 1},
+ {"4", 2},
+ {"7", 2},
+ {"⁄", 2},
+ {"σ", 0},
+ {"g", 1},
+ {"Σ", 2},
+ {"G", 2},
+ {"6", 2},
+ {"γ", 1},
+ {"/", 2},
+ {"Γ", 2},
+ {"£", 2},
+ {"ß", 2},
+ {"∑", 2},
+ {"`", 2},
+ {"t", 2},
+ {">", 0},
+ {"T", 2},
+ {"≥", 2},
+ {"τ", 0},
+ {"«", 0},
+ {"Τ", 2},
+ {"»", 0},
+ {"ť", 2},
+ {"h", 2},
+ {"Ť", 2},
+ {"H", 2},
+ {"Th", 2},
+ {"˝", 2},
+ {"θ", 2},
+ {"-", 0},
+ {"Θ", 2},
+ {"i", 2},
+ {"þ", 3},
+ {"I", 2},
+ {"Þ", 2},
+ {"í", 2},
+ {"3", 2},
+ {"Í", 2},
+ {"¾", 2},
+ {"î", 2},
+ {"³", 2},
+ {"Î", 2},
+ {"˜", 2},
+ {"ï", 2},
+ {"2", 2},
+};