ref: 603ad1a37d08437b3a51a0f62b88834c4a7611ab
parent: 42e5c24a2b0096f408dd975fa6b1027d4ac0a730
author: Ali Gholami Rudi <ali@rudi.ir>
date: Tue Nov 25 12:02:23 EST 2014
font: use a set for finding rules that match the first glyph This improves neatroff's performance when using fonts with more than 20k rules by more than 350%.
--- a/font.c
+++ b/font.c
@@ -6,27 +6,28 @@
/* convert wid in device unitwidth size to size sz */
#define DEVWID(sz, wid) (((wid) * (sz) + (dev_uwid / 2)) / dev_uwid)
-#define GHASH(g1, g2) ((((g2) + 1) << 16) | ((g1) + 1))
+/* flags for gpat->flg */
#define GF_PAT 1 /* gsub/gpos pattern glyph */
#define GF_REP 2 /* gsub replacement glyph */
#define GF_CON 4 /* context glyph */
#define GF_GRP 8 /* glyph group */
-/* glyph pattern for gsub and gpos tables; each grule has some gpats */
-struct gpat {
- short g; /* glyph index */
- short flg; /* pattern flags; GF_* */
- short x, y, xadv, yadv; /* gpos data */
-};
+/* mapping integers to sets */
+static struct iset *iset_make(int keycnt);
+static void iset_free(struct iset *iset);
+static int *iset_get(struct iset *iset, int key);
+static void iset_put(struct iset *iset, int key, int ent);
/* glyph substitution and positioning rules */
struct grule {
- struct gpat *pats;
- short len; /* pats[] length */
- short feat; /* feature owning this rule */
- short pos; /* position of this rule in the file */
- int hash; /* hash of this rule for sorting and comparison */
+ struct gpat { /* rule description */
+ short g; /* glyph index */
+ short flg; /* pattern flags; GF_* */
+ short x, y, xadv, yadv; /* gpos data */
+ } *pats; /* rule pattern */
+ short len; /* pats[] length */
+ short feat; /* feature owning this rule */
};
struct font {
@@ -49,6 +50,8 @@
int gsub_n;
struct grule gpos[NGRULES]; /* glyph positioning rules */
int gpos_n;
+ struct iset *gsub0; /* rules matching a glyph at pos 0 */
+ struct iset *gpos0; /* rules matching a glyph at pos 0 */
int *ggrp[NGRULES]; /* glyph groups */
int ggrp_len[NGRULES];
};
@@ -119,47 +122,6 @@
return g ? g - fn->gl : -1;
}
-/* compare their hashes, then their positions to make qsort() stable */
-static int grulecmp(void *v1, void *v2)
-{
- struct grule *r1 = v1;
- struct grule *r2 = v2;
- return r1->hash == r2->hash ? r1->pos - r2->pos : r1->hash - r2->hash;
-}
-
-/* the hashing function for grule structs, based on their first two glyphs */
-static int grule_hash(struct grule *rule)
-{
- int g1 = -1, g2 = -1;
- int i = 0;
- /* finding the first glyph; -1 if FG_GRP */
- while (i < rule->len && rule->pats[i].flg & (GF_REP | GF_CON))
- i++; /* skipping replacement and context glyphs */
- if (i < rule->len && rule->pats[i].flg == GF_PAT)
- g1 = rule->pats[i].g;
- i++;
- /* finding the second glyph; -1 if FG_GRP */
- while (i < rule->len && rule->pats[i].flg & GF_REP)
- i++; /* skipping replacement glyphs */
- if (i < rule->len && rule->pats[i].flg == GF_PAT)
- g2 = rule->pats[i].g;
- return GHASH(g1, g2);
-}
-
-static int grule_find(struct grule *rules, int n, int hash)
-{
- int l = 0;
- int h = n;
- while (l < h) {
- int m = (l + h) >> 1;
- if (rules[m].hash >= hash)
- h = m;
- else
- l = m + 1;
- }
- return l;
-}
-
static int font_gpatmatch(struct font *fn, struct gpat *p, int g)
{
int i;
@@ -200,31 +162,36 @@
return 1;
}
+static int font_findrule(struct font *fn, int gsub, int pos,
+ int *fwd, int fwdlen, int *ctx, int ctxlen)
+{
+ struct grule *rules = gsub ? fn->gsub : fn->gpos;
+ int *r1 = iset_get(gsub ? fn->gsub0 : fn->gpos0, fwd[0]);
+ int i = -1;
+ while (r1 && r1[++i] >= 0) {
+ if (r1[i] >= pos && font_rulematch(fn, &rules[r1[i]], fwd,
+ fwdlen, ctx, ctxlen))
+ return r1[i];
+ }
+ return -1;
+}
+
/* perform all possible gpos rules on src */
static void font_performgpos(struct font *fn, int *src, int slen,
int *x, int *y, int *xadv, int *yadv)
{
struct grule *gpos = fn->gpos;
- int n = fn->gpos_n;
- int i, j, k;
+ int i, k;
for (i = 0; i < slen; i++) {
- /* possible hash values for matching gpos rules at src + slen */
- for (j = 0; j < 4 && j < (slen << 1); j++) {
- int hash = GHASH(j & 1 ? src[i] : -1, j & 2 ? src[i + 1] : -1);
- int idx = grule_find(gpos, n, hash);
- while (idx < n && gpos[idx].hash == hash) {
- if (font_rulematch(fn, &gpos[idx],
- src + i, slen - i, src + i, i)) {
- struct gpat *pats = gpos[idx].pats;
- /* we should accumulate the values... */
- for (k = 0; k < gpos[idx].len; k++) {
- x[i + k] = pats[k].x;
- y[i + k] = pats[k].y;
- xadv[i + k] = pats[k].xadv;
- yadv[i + k] = pats[k].yadv;
- }
- }
- idx++;
+ int r = font_findrule(fn, 0, 0, src + i, slen - i, src + i, i);
+ if (r >= 0) {
+ struct gpat *pats = gpos[r].pats;
+ /* we should accumulate the values... */
+ for (k = 0; k < gpos[r].len; k++) {
+ x[i + k] = pats[k].x;
+ y[i + k] = pats[k].y;
+ xadv[i + k] = pats[k].xadv;
+ yadv[i + k] = pats[k].yadv;
}
}
}
@@ -231,26 +198,14 @@
}
/* find the first gsub rule after pos that matches any glyph in src */
-static struct grule *font_firstgsub(struct font *fn, int pos, int *src, int slen)
+static int font_firstgsub(struct font *fn, int pos, int *src, int slen)
{
- struct grule *rules = fn->gsub;
- int n = fn->gsub_n;
- struct grule *best = NULL;
- int i, j;
+ int best = -1;
+ int i;
for (i = 0; i < slen; i++) {
- /* possible hash values for matching gsub rules at src + slen */
- for (j = 0; j < 2 && i + j < slen; j++) {
- int hash = GHASH(src[i], j ? src[i + 1] : -1);
- int idx = grule_find(rules, n, hash);
- while (idx < n && rules[idx].hash == hash &&
- (!best || rules[idx].pos < best->pos)) {
- if (rules[idx].pos >= pos)
- if (font_rulematch(fn, &rules[idx],
- src + i, slen - i, src + i, i))
- best = &rules[idx];
- idx++;
- }
- }
+ int r = font_findrule(fn, 1, pos, src + i, slen - i, src + i, i);
+ if (r >= 0 && (best < 0 || r < best))
+ best = r;
}
return best;
}
@@ -265,12 +220,9 @@
int i, j;
memset(dmap, 0, slen * sizeof(dmap[i]));
for (i = 0; i < slen; i++) {
- int hash1 = GHASH(src[i], -1);
- int hash2 = GHASH(src[i], i + 1 < slen ? src[i + 1] : -1);
- int hmatch = rule->hash == hash1 || rule->hash == hash2;
dmap[dlen] = smap[i];
- if (hmatch && font_rulematch(fn, rule, src + i,
- slen - i, dst + dlen, dlen)) {
+ if (font_rulematch(fn, rule, src + i, slen - i,
+ dst + dlen, dlen)) {
for (j = 0; j < rule->len; j++) {
if (rule->pats[j].flg & GF_REP)
dst[dlen++] = rule->pats[j].g;
@@ -290,12 +242,11 @@
/* perform all possible gsub rules on src */
static int font_performgsub(struct font *fn, int *src, int slen, int *smap)
{
- int i = 0;
- while (i >= 0) {
- struct grule *rule = font_firstgsub(fn, i, src, slen);
- if (rule)
- slen = font_gsubapply(fn, rule, src, slen, smap);
- i = rule ? rule->pos + 1 : -1;
+ int i = -1;
+ while (++i >= 0) {
+ if ((i = font_firstgsub(fn, i, src, slen)) < 0)
+ break;
+ slen = font_gsubapply(fn, &fn->gsub[i], src, slen, smap);
}
return slen;
}
@@ -544,6 +495,27 @@
} while (c != '\n' && c != EOF);
}
+static struct gpat *font_rulefirstpat(struct font *fn, struct grule *rule)
+{
+ int i;
+ for (i = 0; i < rule->len; i++)
+ if (!(rule->pats[i].flg & (GF_REP | GF_CON)))
+ return &rule->pats[i];
+ return NULL;
+}
+
+static void font_isetinsert(struct font *fn, struct iset *iset, int rule, struct gpat *p)
+{
+ int j;
+ if (p->flg & GF_GRP) {
+ for (j = 0; j < fn->ggrp_len[p->g]; j++)
+ iset_put(iset, fn->ggrp[p->g][j], rule);
+ } else {
+ if (p->g >= 0)
+ iset_put(iset, p->g, rule);
+ }
+}
+
struct font *font_open(char *path)
{
struct font *fn;
@@ -602,16 +574,14 @@
for (i = 0; i < ligs_n; i++)
font_lig(fn, ligs[i]);
fclose(fin);
+ fn->gsub0 = iset_make(fn->gl_n);
+ fn->gpos0 = iset_make(fn->gl_n);
for (i = 0; i < fn->gsub_n; i++)
- fn->gsub[i].pos = i;
+ font_isetinsert(fn, fn->gsub0, i,
+ font_rulefirstpat(fn, &fn->gsub[i]));
for (i = 0; i < fn->gpos_n; i++)
- fn->gpos[i].pos = i;
- for (i = 0; i < fn->gsub_n; i++)
- fn->gsub[i].hash = grule_hash(&fn->gsub[i]);
- for (i = 0; i < fn->gpos_n; i++)
- fn->gpos[i].hash = grule_hash(&fn->gpos[i]);
- qsort(fn->gsub, fn->gsub_n, sizeof(fn->gsub[0]), (void *) grulecmp);
- qsort(fn->gpos, fn->gpos_n, sizeof(fn->gpos[0]), (void *) grulecmp);
+ font_isetinsert(fn, fn->gpos0, i,
+ font_rulefirstpat(fn, &fn->gpos[i]));
return fn;
}
@@ -627,6 +597,8 @@
dict_done(&fn->gl_dict);
dict_done(&fn->ch_dict);
dict_done(&fn->ch_map);
+ iset_free(fn->gsub0);
+ iset_free(fn->gpos0);
free(fn);
}
@@ -696,4 +668,59 @@
if (idx >= 0)
fn->feat_set[idx] = val != 0;
return old;
+}
+
+/* iset structure to store a mapping from integers to sets */
+struct iset {
+ int **set;
+ int *sz;
+ int *len;
+ int keycnt;
+};
+
+static struct iset *iset_make(int keycnt)
+{
+ struct iset *iset = malloc(sizeof(*iset));
+ memset(iset, 0, sizeof(*iset));
+ iset->keycnt = keycnt;
+ iset->set = malloc(keycnt * sizeof(iset->set[0]));
+ memset(iset->set, 0, keycnt * sizeof(iset->set[0]));
+ iset->len = malloc(keycnt * sizeof(iset->len[0]));
+ memset(iset->len, 0, keycnt * sizeof(iset->len[0]));
+ iset->sz = malloc(keycnt * sizeof(iset->sz[0]));
+ memset(iset->sz, 0, keycnt * sizeof(iset->sz[0]));
+ return iset;
+}
+
+static void iset_free(struct iset *iset)
+{
+ int i;
+ for (i = 0; i < iset->keycnt; i++)
+ free(iset->set[i]);
+ free(iset->set);
+ free(iset->len);
+ free(iset->sz);
+}
+
+static int *iset_get(struct iset *iset, int key)
+{
+ return iset->set[key];
+}
+
+static void iset_put(struct iset *iset, int key, int ent)
+{
+ if (iset->len[key] + 1 >= iset->sz[key]) {
+ int olen = iset->sz[key];
+ int nlen = iset->sz[key] * 2 + 8;
+ void *nset = malloc(nlen * sizeof(iset->set[key][0]));
+ if (iset->set[key]) {
+ memcpy(nset, iset->set[key],
+ olen * sizeof(iset->set[key][0]));
+ free(iset->set[key]);
+ }
+ iset->sz[key] = nlen;
+ iset->set[key] = nset;
+ }
+ iset->set[key][iset->len[key]++] = ent;
+ iset->set[key][iset->len[key]] = -1;
}