shithub: neatroff

Download patch

ref: 03d1fce55d597a3b06cc7c078f510791ab90a430
parent: acf6476228825e442f68bd4b36f4dcede001eab0
author: Ali Gholami Rudi <ali@rudi.ir>
date: Fri Jul 26 07:51:00 EDT 2013

wb: allow longer ligatures

Now ligatures can be LIGLEN long; wb struct stores the last
LIGLEN characters for wb_lig() and wb_eos().  Also after
this change, kerning pairs do not prevent ligatures from
being matched.  Note however that this makes neatroff
slower.

--- a/font.c
+++ b/font.c
@@ -99,13 +99,32 @@
 	return 1;
 }
 
-/* return 1 if lig is a ligature */
-int font_lig(struct font *fn, char *lig)
+/*
+ * Given a list of characters in the reverse order, font_lig()
+ * returns the number of characters from the beginning of this
+ * list that form a ligature in this font.  Zero naturally means
+ * no ligature was matched.
+ */
+int font_lig(struct font *fn, char **c, int n)
 {
 	int i;
-	for (i = 0; i < fn->nlig; i++)
-		if (!strcmp(lig, fn->lig[i]))
-			return font_find(fn, lig) != NULL;
+	/* concatenated characters in c[], in the correct order */
+	char s[GNLEN * 2] = "";
+	/* b[i] is the number of character of c[] in s + i */
+	int b[GNLEN * 2] = {0};
+	int len = 0;
+	for (i = 0; i < n; i++) {
+		char *cur = c[n - i - 1];
+		b[len] = n - i;
+		strcpy(s + len, cur);
+		len += strlen(cur);
+	}
+	for (i = 0; i < fn->nlig; i++) {
+		int l = strlen(fn->lig[i]);
+		if (b[len - l] && !strcmp(s + len - l, fn->lig[i]))
+			if (font_find(fn, fn->lig[i]))
+				return b[len - l];
+	}
 	return 0;
 }
 
--- a/roff.h
+++ b/roff.h
@@ -26,6 +26,7 @@
 #define NTABS		16	/* number of tab stops */
 #define NFIELDS		32	/* number of fields */
 #define MAXFRAC		100000	/* maximum value of the fractional part */
+#define LIGLEN		4	/* length of ligatures */
 
 /* escape sequences */
 #define ESC_Q	"bCDhHlLNoSvwxX"	/* \X'ccc' quoted escape sequences */
@@ -125,7 +126,7 @@
 void font_close(struct font *fn);
 struct glyph *font_glyph(struct font *fn, char *id);
 struct glyph *font_find(struct font *fn, char *name);
-int font_lig(struct font *fn, char *c);
+int font_lig(struct font *fn, char **c, int n);
 int font_kern(struct font *fn, char *c1, char *c2);
 
 /* glyph handling functions */
@@ -164,7 +165,6 @@
 	char *s;		/* allocated buffer */
 	int sz;			/* buffer size */
 	int n;			/* length of the string stored in s */
-	int prev_n;		/* n before the last sbuf_append() */
 };
 
 void sbuf_init(struct sbuf *sbuf);
@@ -174,7 +174,7 @@
 void sbuf_append(struct sbuf *sbuf, char *s);
 void sbuf_printf(struct sbuf *sbuf, char *s, ...);
 void sbuf_putnl(struct sbuf *sbuf);
-void sbuf_pop(struct sbuf *sbuf);
+void sbuf_cut(struct sbuf *sbuf, int n);
 int sbuf_len(struct sbuf *sbuf);
 int sbuf_empty(struct sbuf *sbuf);
 
@@ -187,10 +187,12 @@
 	int els_neg, els_pos;	/* extra line spacing */
 	int h, v;		/* buffer vertical and horizontal positions */
 	int ct, sb, st;		/* \w registers */
-	char prev_c[GNLEN];	/* previous character added via wb_put() */
-	int prev_h;		/* wb->h after wb_put() calls */
-	int prev_l;		/* sbuf_len(&wb->sbuf) after wb_put() calls */
-	int eos;		/* nonzero if wb ends a sentence (.?!) */
+	/* saving previous characters added via wb_put() */
+	char prev_c[LIGLEN][GNLEN];
+	int prev_l[LIGLEN];	/* sbuf_len(&wb->sbuf) before wb_put() calls */
+	int prev_h[LIGLEN];	/* wb->h before wb_put() calls */
+	int prev_n;		/* number of characters in prev_c[] */
+	int prev_ll;		/* sbuf_len(&wb->sbuf) after the last wb_put() */
 };
 
 void wb_init(struct wb *wb);
--- a/sbuf.c
+++ b/sbuf.c
@@ -35,7 +35,6 @@
 	if (sbuf->n + len + 1 >= sbuf->sz)
 		sbuf_extend(sbuf, sbuf->n + len + 1);
 	memcpy(sbuf->s + sbuf->n, s, len);
-	sbuf->prev_n = sbuf->n;
 	sbuf->n += len;
 }
 
@@ -71,11 +70,11 @@
 	return sbuf->n;
 }
 
-/* undo last sbuf_append() */
-void sbuf_pop(struct sbuf *sbuf)
+/* shorten the sbuf */
+void sbuf_cut(struct sbuf *sbuf, int n)
 {
-	if (sbuf->prev_n < sbuf->n)
-		sbuf->n = sbuf->prev_n;
+	if (sbuf->n > n)
+		sbuf->n = n;
 }
 
 void sbuf_done(struct sbuf *sbuf)
--- a/wb.c
+++ b/wb.c
@@ -76,9 +76,58 @@
 	sbuf_printf(&wb->sbuf, "%cX%s", c_ec, x);
 }
 
+/* make sure nothing is appended to wb after the last wb_put() */
+static void wb_prevcheck(struct wb *wb)
+{
+	if (wb->prev_ll != sbuf_len(&wb->sbuf))
+		wb->prev_n = 0;
+}
+
+/* mark wb->prev_c[] as valid */
+static void wb_prevok(struct wb *wb)
+{
+	wb->prev_ll = sbuf_len(&wb->sbuf);
+}
+
+/* append c to wb->prev_c[] */
+static void wb_prevput(struct wb *wb, char *c, int ll)
+{
+	if (wb->prev_n == LEN(wb->prev_c))
+		wb->prev_n--;
+	memmove(wb->prev_l + 1, wb->prev_l, wb->prev_n * sizeof(wb->prev_l[0]));
+	memmove(wb->prev_h + 1, wb->prev_h, wb->prev_n * sizeof(wb->prev_h[0]));
+	memmove(wb->prev_c + 1, wb->prev_c, wb->prev_n * sizeof(wb->prev_c[0]));
+	wb->prev_l[0] = ll;
+	wb->prev_h[0] = wb->h;
+	strcpy(wb->prev_c[0], c);
+	wb->prev_n++;
+	wb_prevok(wb);
+}
+
+/* strip the last i characters from wb */
+static void wb_prevpop(struct wb *wb, int i)
+{
+	int n = wb->prev_n - i;
+	sbuf_cut(&wb->sbuf, wb->prev_l[i - 1]);
+	wb->h = wb->prev_h[i - 1];
+	memmove(wb->prev_l, wb->prev_l + i, n * sizeof(wb->prev_l[0]));
+	memmove(wb->prev_h, wb->prev_h + i, n * sizeof(wb->prev_h[0]));
+	memmove(wb->prev_c, wb->prev_c + i, n * sizeof(wb->prev_c[0]));
+	wb->prev_n = n;
+	wb->prev_ll = sbuf_len(&wb->sbuf);
+}
+
+/* return the i-th last character inserted via wb_put() */
+static char *wb_prev(struct wb *wb, int i)
+{
+	wb_prevcheck(wb);
+	return i < wb->prev_n ? wb->prev_c[i] : NULL;
+}
+
 void wb_put(struct wb *wb, char *c)
 {
 	struct glyph *g;
+	int ll;
 	if (c[0] == '\n') {
 		wb->part = 0;
 		return;
@@ -94,6 +143,8 @@
 	}
 	g = dev_glyph(c, R_F(wb));
 	wb_font(wb);
+	wb_prevcheck(wb);		/* make sure wb->prev_c[] is valid */
+	ll = sbuf_len(&wb->sbuf);	/* sbuf length before inserting c */
 	if (!c[1] || c[0] == c_ec || c[0] == c_ni ||
 			utf8len((unsigned char) c[0]) == strlen(c)) {
 		if (c[0] == c_ni && c[1] == c_ec)
@@ -107,15 +158,9 @@
 			sbuf_printf(&wb->sbuf, "%cC'%s'", c_ec, c);
 	}
 	if (strcmp(c_hc, c)) {
-		strcpy(wb->prev_c, c);
-		wb->prev_l = sbuf_len(&wb->sbuf);
-		wb->prev_h = wb->h;
+		wb_prevput(wb, c, ll);
 		wb->h += charwid(R_F(wb), R_S(wb), g ? g->wid : SC_DW);
 		wb->ct |= g ? g->type : 0;
-		if (c[1])
-			wb->eos = 0;
-		else if (strchr("'\")]*", c[0]) == NULL)
-			wb->eos = strchr(".?!", c[0]) != NULL;
 		wb_stsb(wb);
 	}
 }
@@ -123,13 +168,19 @@
 /* return zero if c formed a ligature with its previous character */
 int wb_lig(struct wb *wb, char *c)
 {
-	char lig[GNLEN * 2];
-	if (wb->prev_l != sbuf_len(&wb->sbuf) || !wb->prev_c[0])
-		return 1;
-	sprintf(lig, "%s%s", wb->prev_c, c);
-	if (font_lig(dev_font(R_F(wb)), lig)) {
-		wb->h = wb->prev_h;
-		sbuf_pop(&wb->sbuf);
+	char lig[GNLEN] = "";
+	char *cs[LIGLEN + 2];
+	int i = -1;
+	int ligpos;
+	cs[0] = c;
+	while (wb_prev(wb, ++i))
+		cs[i + 1] = wb_prev(wb, i);
+	ligpos = font_lig(dev_font(R_F(wb)), cs, i + 1);
+	if (ligpos > 0) {
+		for (i = 0; i < ligpos - 1; i++)
+			strcat(lig, wb_prev(wb, ligpos - i - 2));
+		strcat(lig, c);
+		wb_prevpop(wb, ligpos - 1);
 		wb_put(wb, lig);
 		return 0;
 	}
@@ -140,11 +191,12 @@
 int wb_kern(struct wb *wb, char *c)
 {
 	int val;
-	if (wb->prev_l != sbuf_len(&wb->sbuf) || !wb->prev_c[0])
+	if (!wb_prev(wb, 0))
 		return 1;
-	val = font_kern(dev_font(R_F(wb)), wb->prev_c, c);
+	val = font_kern(dev_font(R_F(wb)), wb_prev(wb, 0), c);
 	if (val)
 		wb_hmov(wb, charwid(R_F(wb), R_S(wb), val));
+	wb_prevok(wb);		/* kerning should not prevent ligatures */
 	return !val;
 }
 
@@ -274,9 +326,13 @@
 	return sbuf_empty(&wb->sbuf);
 }
 
+/* return 1 if wb ends a sentence (.?!) */
 int wb_eos(struct wb *wb)
 {
-	return wb->eos;
+	int i = 0;
+	while (wb_prev(wb, i) && strchr("'\")]*", wb_prev(wb, i)[0]))
+		i++;
+	return wb_prev(wb, i) && strchr(".?!", wb_prev(wb, i)[0]);
 }
 
 void wb_wconf(struct wb *wb, int *ct, int *st, int *sb)