shithub: neatroff

Download patch

ref: 603ad1a37d08437b3a51a0f62b88834c4a7611ab
parent: 42e5c24a2b0096f408dd975fa6b1027d4ac0a730
author: Ali Gholami Rudi <ali@rudi.ir>
date: Tue Nov 25 12:02:23 EST 2014

font: use a set for finding rules that match the first glyph

This improves neatroff's performance when using fonts with more than
20k rules by more than 350%.

--- a/font.c
+++ b/font.c
@@ -6,27 +6,28 @@
 
 /* convert wid in device unitwidth size to size sz */
 #define DEVWID(sz, wid)		(((wid) * (sz) + (dev_uwid / 2)) / dev_uwid)
-#define GHASH(g1, g2)		((((g2) + 1) << 16) | ((g1) + 1))
 
+/* flags for gpat->flg */
 #define GF_PAT		1	/* gsub/gpos pattern glyph */
 #define GF_REP		2	/* gsub replacement glyph */
 #define GF_CON		4	/* context glyph */
 #define GF_GRP		8	/* glyph group */
 
-/* glyph pattern for gsub and gpos tables; each grule has some gpats */
-struct gpat {
-	short g;		/* glyph index */
-	short flg;		/* pattern flags; GF_* */
-	short x, y, xadv, yadv;	/* gpos data */
-};
+/* mapping integers to sets */
+static struct iset *iset_make(int keycnt);
+static void iset_free(struct iset *iset);
+static int *iset_get(struct iset *iset, int key);
+static void iset_put(struct iset *iset, int key, int ent);
 
 /* glyph substitution and positioning rules */
 struct grule {
-	struct gpat *pats;
-	short len;	/* pats[] length */
-	short feat;	/* feature owning this rule */
-	short pos;	/* position of this rule in the file */
-	int hash;	/* hash of this rule for sorting and comparison */
+	struct gpat {			/* rule description */
+		short g;		/* glyph index */
+		short flg;		/* pattern flags; GF_* */
+		short x, y, xadv, yadv;	/* gpos data */
+	} *pats;			/* rule pattern */
+	short len;			/* pats[] length */
+	short feat;			/* feature owning this rule */
 };
 
 struct font {
@@ -49,6 +50,8 @@
 	int gsub_n;
 	struct grule gpos[NGRULES];	/* glyph positioning rules */
 	int gpos_n;
+	struct iset *gsub0;		/* rules matching a glyph at pos 0 */
+	struct iset *gpos0;		/* rules matching a glyph at pos 0 */
 	int *ggrp[NGRULES];		/* glyph groups */
 	int ggrp_len[NGRULES];
 };
@@ -119,47 +122,6 @@
 	return g ? g - fn->gl : -1;
 }
 
-/* compare their hashes, then their positions to make qsort() stable */
-static int grulecmp(void *v1, void *v2)
-{
-	struct grule *r1 = v1;
-	struct grule *r2 = v2;
-	return r1->hash == r2->hash ? r1->pos - r2->pos : r1->hash - r2->hash;
-}
-
-/* the hashing function for grule structs, based on their first two glyphs */
-static int grule_hash(struct grule *rule)
-{
-	int g1 = -1, g2 = -1;
-	int i = 0;
-	/* finding the first glyph; -1 if FG_GRP */
-	while (i < rule->len && rule->pats[i].flg & (GF_REP | GF_CON))
-		i++;		/* skipping replacement and context glyphs */
-	if (i < rule->len && rule->pats[i].flg == GF_PAT)
-		g1 = rule->pats[i].g;
-	i++;
-	/* finding the second glyph; -1 if FG_GRP */
-	while (i < rule->len && rule->pats[i].flg & GF_REP)
-		i++;		/* skipping replacement glyphs */
-	if (i < rule->len && rule->pats[i].flg == GF_PAT)
-		g2 = rule->pats[i].g;
-	return GHASH(g1, g2);
-}
-
-static int grule_find(struct grule *rules, int n, int hash)
-{
-	int l = 0;
-	int h = n;
-	while (l < h) {
-		int m = (l + h) >> 1;
-		if (rules[m].hash >= hash)
-			h = m;
-		else
-			l = m + 1;
-	}
-	return l;
-}
-
 static int font_gpatmatch(struct font *fn, struct gpat *p, int g)
 {
 	int i;
@@ -200,31 +162,36 @@
 	return 1;
 }
 
+static int font_findrule(struct font *fn, int gsub, int pos,
+		int *fwd, int fwdlen, int *ctx, int ctxlen)
+{
+	struct grule *rules = gsub ? fn->gsub : fn->gpos;
+	int *r1 = iset_get(gsub ? fn->gsub0 : fn->gpos0, fwd[0]);
+	int i = -1;
+	while (r1 && r1[++i] >= 0) {
+		if (r1[i] >= pos && font_rulematch(fn, &rules[r1[i]], fwd,
+							fwdlen, ctx, ctxlen))
+			return r1[i];
+	}
+	return -1;
+}
+
 /* perform all possible gpos rules on src */
 static void font_performgpos(struct font *fn, int *src, int slen,
 		int *x, int *y, int *xadv, int *yadv)
 {
 	struct grule *gpos = fn->gpos;
-	int n = fn->gpos_n;
-	int i, j, k;
+	int i, k;
 	for (i = 0; i < slen; i++) {
-		/* possible hash values for matching gpos rules at src + slen */
-		for (j = 0; j < 4 && j < (slen << 1); j++) {
-			int hash = GHASH(j & 1 ? src[i] : -1, j & 2 ? src[i + 1] : -1);
-			int idx = grule_find(gpos, n, hash);
-			while (idx < n && gpos[idx].hash == hash) {
-				if (font_rulematch(fn, &gpos[idx],
-						src + i, slen - i, src + i, i)) {
-					struct gpat *pats = gpos[idx].pats;
-					/* we should accumulate the values... */
-					for (k = 0; k < gpos[idx].len; k++) {
-						x[i + k] = pats[k].x;
-						y[i + k] = pats[k].y;
-						xadv[i + k] = pats[k].xadv;
-						yadv[i + k] = pats[k].yadv;
-					}
-				}
-				idx++;
+		int r = font_findrule(fn, 0, 0, src + i, slen - i, src + i, i);
+		if (r >= 0) {
+			struct gpat *pats = gpos[r].pats;
+			/* we should accumulate the values... */
+			for (k = 0; k < gpos[r].len; k++) {
+				x[i + k] = pats[k].x;
+				y[i + k] = pats[k].y;
+				xadv[i + k] = pats[k].xadv;
+				yadv[i + k] = pats[k].yadv;
 			}
 		}
 	}
@@ -231,26 +198,14 @@
 }
 
 /* find the first gsub rule after pos that matches any glyph in src */
-static struct grule *font_firstgsub(struct font *fn, int pos, int *src, int slen)
+static int font_firstgsub(struct font *fn, int pos, int *src, int slen)
 {
-	struct grule *rules = fn->gsub;
-	int n = fn->gsub_n;
-	struct grule *best = NULL;
-	int i, j;
+	int best = -1;
+	int i;
 	for (i = 0; i < slen; i++) {
-		/* possible hash values for matching gsub rules at src + slen */
-		for (j = 0; j < 2 && i + j < slen; j++) {
-			int hash = GHASH(src[i], j ? src[i + 1] : -1);
-			int idx = grule_find(rules, n, hash);
-			while (idx < n && rules[idx].hash == hash &&
-					(!best || rules[idx].pos < best->pos)) {
-				if (rules[idx].pos >= pos)
-					if (font_rulematch(fn, &rules[idx],
-							src + i, slen - i, src + i, i))
-						best = &rules[idx];
-				idx++;
-			}
-		}
+		int r = font_findrule(fn, 1, pos, src + i, slen - i, src + i, i);
+		if (r >= 0 && (best < 0 || r < best))
+			best = r;
 	}
 	return best;
 }
@@ -265,12 +220,9 @@
 	int i, j;
 	memset(dmap, 0, slen * sizeof(dmap[i]));
 	for (i = 0; i < slen; i++) {
-		int hash1 = GHASH(src[i], -1);
-		int hash2 = GHASH(src[i], i + 1 < slen ? src[i + 1] : -1);
-		int hmatch = rule->hash == hash1 || rule->hash == hash2;
 		dmap[dlen] = smap[i];
-		if (hmatch && font_rulematch(fn, rule, src + i,
-					slen - i, dst + dlen, dlen)) {
+		if (font_rulematch(fn, rule, src + i, slen - i,
+					dst + dlen, dlen)) {
 			for (j = 0; j < rule->len; j++) {
 				if (rule->pats[j].flg & GF_REP)
 					dst[dlen++] = rule->pats[j].g;
@@ -290,12 +242,11 @@
 /* perform all possible gsub rules on src */
 static int font_performgsub(struct font *fn, int *src, int slen, int *smap)
 {
-	int i = 0;
-	while (i >= 0) {
-		struct grule *rule = font_firstgsub(fn, i, src, slen);
-		if (rule)
-			slen = font_gsubapply(fn, rule, src, slen, smap);
-		i = rule ? rule->pos + 1 : -1;
+	int i = -1;
+	while (++i >= 0) {
+		if ((i = font_firstgsub(fn, i, src, slen)) < 0)
+			break;
+		slen = font_gsubapply(fn, &fn->gsub[i], src, slen, smap);
 	}
 	return slen;
 }
@@ -544,6 +495,27 @@
 	} while (c != '\n' && c != EOF);
 }
 
+static struct gpat *font_rulefirstpat(struct font *fn, struct grule *rule)
+{
+	int i;
+	for (i = 0; i < rule->len; i++)
+		if (!(rule->pats[i].flg & (GF_REP | GF_CON)))
+			return &rule->pats[i];
+	return NULL;
+}
+
+static void font_isetinsert(struct font *fn, struct iset *iset, int rule, struct gpat *p)
+{
+	int j;
+	if (p->flg & GF_GRP) {
+		for (j = 0; j < fn->ggrp_len[p->g]; j++)
+			iset_put(iset, fn->ggrp[p->g][j], rule);
+	} else {
+		if (p->g >= 0)
+			iset_put(iset, p->g, rule);
+	}
+}
+
 struct font *font_open(char *path)
 {
 	struct font *fn;
@@ -602,16 +574,14 @@
 	for (i = 0; i < ligs_n; i++)
 		font_lig(fn, ligs[i]);
 	fclose(fin);
+	fn->gsub0 = iset_make(fn->gl_n);
+	fn->gpos0 = iset_make(fn->gl_n);
 	for (i = 0; i < fn->gsub_n; i++)
-		fn->gsub[i].pos = i;
+		font_isetinsert(fn, fn->gsub0, i,
+			font_rulefirstpat(fn, &fn->gsub[i]));
 	for (i = 0; i < fn->gpos_n; i++)
-		fn->gpos[i].pos = i;
-	for (i = 0; i < fn->gsub_n; i++)
-		fn->gsub[i].hash = grule_hash(&fn->gsub[i]);
-	for (i = 0; i < fn->gpos_n; i++)
-		fn->gpos[i].hash = grule_hash(&fn->gpos[i]);
-	qsort(fn->gsub, fn->gsub_n, sizeof(fn->gsub[0]), (void *) grulecmp);
-	qsort(fn->gpos, fn->gpos_n, sizeof(fn->gpos[0]), (void *) grulecmp);
+		font_isetinsert(fn, fn->gpos0, i,
+			font_rulefirstpat(fn, &fn->gpos[i]));
 	return fn;
 }
 
@@ -627,6 +597,8 @@
 	dict_done(&fn->gl_dict);
 	dict_done(&fn->ch_dict);
 	dict_done(&fn->ch_map);
+	iset_free(fn->gsub0);
+	iset_free(fn->gpos0);
 	free(fn);
 }
 
@@ -696,4 +668,59 @@
 	if (idx >= 0)
 		fn->feat_set[idx] = val != 0;
 	return old;
+}
+
+/* iset structure to store a mapping from integers to sets */
+struct iset {
+	int **set;
+	int *sz;
+	int *len;
+	int keycnt;
+};
+
+static struct iset *iset_make(int keycnt)
+{
+	struct iset *iset = malloc(sizeof(*iset));
+	memset(iset, 0, sizeof(*iset));
+	iset->keycnt = keycnt;
+	iset->set = malloc(keycnt * sizeof(iset->set[0]));
+	memset(iset->set, 0, keycnt * sizeof(iset->set[0]));
+	iset->len = malloc(keycnt * sizeof(iset->len[0]));
+	memset(iset->len, 0, keycnt * sizeof(iset->len[0]));
+	iset->sz = malloc(keycnt * sizeof(iset->sz[0]));
+	memset(iset->sz, 0, keycnt * sizeof(iset->sz[0]));
+	return iset;
+}
+
+static void iset_free(struct iset *iset)
+{
+	int i;
+	for (i = 0; i < iset->keycnt; i++)
+		free(iset->set[i]);
+	free(iset->set);
+	free(iset->len);
+	free(iset->sz);
+}
+
+static int *iset_get(struct iset *iset, int key)
+{
+	return iset->set[key];
+}
+
+static void iset_put(struct iset *iset, int key, int ent)
+{
+	if (iset->len[key] + 1 >= iset->sz[key]) {
+		int olen = iset->sz[key];
+		int nlen = iset->sz[key] * 2 + 8;
+		void *nset = malloc(nlen * sizeof(iset->set[key][0]));
+		if (iset->set[key]) {
+			memcpy(nset, iset->set[key],
+				olen * sizeof(iset->set[key][0]));
+			free(iset->set[key]);
+		}
+		iset->sz[key] = nlen;
+		iset->set[key] = nset;
+	}
+	iset->set[key][iset->len[key]++] = ent;
+	iset->set[key][iset->len[key]] = -1;
 }