shithub: neatroff

Download patch

ref: 84f76bbf689f7dcb3c62a904285722f4a20bf4b3
parent: 89196863d2d2767978d626f56646d00740bdd2bc
author: Ali Gholami Rudi <ali@rudi.ir>
date: Sun Aug 18 18:21:07 EDT 2013

char: functions for reading escape sequences and characters

Now most of the helper functions for parsing characters,
escape sequences and their arguments are moved to char.c.
Also now commands like .tr, .tc, .lc, .hc and .mc accept
any character name (multi-byte utf-8, \x, \(xy, \C'xyz'
and \[xyz]).  The same applies to characters for bounding
quoted escape sequences.

--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@
 all: roff
 %.o: %.c roff.h
 	$(CC) -c $(CFLAGS) $<
-roff: roff.o dev.o font.o in.o cp.o tr.o ren.o out.o reg.o sbuf.o adj.o eval.o draw.o wb.o hyph.o map.o clr.o
+roff: roff.o dev.o font.o in.o cp.o tr.o ren.o out.o reg.o sbuf.o adj.o eval.o draw.o wb.o hyph.o map.o clr.o char.o
 	$(CC) -o $@ $^ $(LDFLAGS)
 clean:
 	rm -f *.o roff
--- /dev/null
+++ b/char.c
@@ -1,0 +1,275 @@
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+#include "roff.h"
+
+int utf8len(int c)
+{
+	if (c > 0 && c <= 0x7f)
+		return 1;
+	if (c >= 0xfc)
+		return 6;
+	if (c >= 0xf8)
+		return 5;
+	if (c >= 0xf0)
+		return 4;
+	if (c >= 0xe0)
+		return 3;
+	if (c >= 0xc0)
+		return 2;
+	return c != 0;
+}
+
+int utf8read(char **s, char *d)
+{
+	int l = utf8len((unsigned char) **s);
+	int i;
+	for (i = 0; i < l; i++)
+		d[i] = (*s)[i];
+	d[l] = '\0';
+	*s += l;
+	return l;
+}
+
+int utf8next(char *s, int (*next)(void))
+{
+	int c = next();
+	int l = utf8len(c);
+	int i;
+	if (c < 0)
+		return 0;
+	s[0] = c;
+	for (i = 1; i < l; i++)
+		s[i] = next();
+	s[l] = '\0';
+	return l;
+}
+
+/*
+ * read the next character or escape sequence (x, \x, \(xy, \[xyz], \C'xyz')
+ *
+ * character	returned	contents of c
+ * x		'\0'		x
+ * \4x		c_ni		\4x
+ * \\x		'\\'		\\x
+ * \\(xy	'('		xy
+ * \\[xyz]	'['		xyz
+ * \\C'xyz'	'C'		xyz
+ */
+int charnext(char *c, int (*next)(void), void (*back)(int))
+{
+	int l, n;
+	if (!utf8next(c, next))
+		return -1;
+	if (c[0] == c_ni) {
+		utf8next(c + 1, next);
+		return c_ni;
+	}
+	if (c[0] == c_ec) {
+		utf8next(c + 1, next);
+		if (c[1] == '(') {
+			l = utf8next(c, next);
+			l += utf8next(c + l, next);
+			return '(';
+		} else if (!n_cp && c[1] == '[') {
+			l = 0;
+			n = next();
+			while (n >= 0 && n != '\n' && n != ']' && l < GNLEN - 1) {
+				c[l++] = n;
+				n = next();
+			}
+			c[l] = '\0';
+			return '[';
+		} else if (c[1] == 'C') {
+			argnext(c, 'C', next, back);
+			return 'C';
+		}
+		return '\\';
+	}
+	return '\0';
+}
+
+/* like nextchar(), but return -1 if delim was read */
+int charnext_delim(char *c, int (*next)(void), void (*back)(int), char *delim)
+{
+	int t = charnext(c, next, back);
+	return strcmp(c, delim) ? t : -1;
+}
+
+/* convert back the character read from nextchar() (e.g. xy -> \\(xy) */
+void charnext_str(char *d, char *c)
+{
+	if (c[0] == c_ec || c[0] == c_ni || !c[1] || utf8len(c[0]) == strlen(c)) {
+		strcpy(d, c);
+		return;
+	}
+	if (!c[2] && utf8len(c[0]) == 1)
+		sprintf(d, "%c(%s", c_ec, c);
+	else
+		sprintf(d, "%cC'%s'", c_ec, c);
+}
+
+/* like charnext() for string buffers */
+int charread(char **s, char *c)
+{
+	int ret;
+	sstr_push(*s);
+	ret = charnext(c, sstr_next, sstr_back);
+	*s = sstr_pop();
+	return ret;
+}
+
+/* read the argument of a troff escape sequence */
+void argnext(char *d, int cmd, int (*next)(void), void (*back)(int))
+{
+	char delim[GNLEN], cs[GNLEN];
+	int c;
+	if (strchr(ESC_P, cmd)) {
+		c = next();
+		if (cmd == 's' && (c == '-' || c == '+')) {
+			*d++ = c;
+			c = next();
+		}
+		if (c == '(') {
+			*d++ = next();
+			*d++ = next();
+		} else if (!n_cp && c == '[') {
+			c = next();
+			while (c > 0 && c != '\n' && c != ']') {
+				*d++ = c;
+				c = next();
+			}
+		} else {
+			*d++ = c;
+			if (cmd == 's' && c >= '1' && c <= '3') {
+				c = next();
+				if (isdigit(c))
+					*d++ = c;
+				else
+					back(c);
+			}
+		}
+	}
+	if (strchr(ESC_Q, cmd)) {
+		charnext(delim, next, back);
+		while (charnext_delim(cs, next, back, delim) >= 0) {
+			charnext_str(d, cs);
+			d = strchr(d, '\0');
+		}
+	}
+	*d = '\0';
+}
+
+/* this is called only for internal neatroff strings */
+void argread(char **sp, char *d, int cmd)
+{
+	char *s = *sp;
+	int q;
+	if (strchr(ESC_P, cmd)) {
+		if (cmd == 's' && (*s == '-' || *s == '+'))
+			*d++ = *s++;
+		if (*s == '(') {
+			s++;
+			*d++ = *s++;
+			*d++ = *s++;
+		} else if (!n_cp && *s == '[') {
+			s++;
+			while (*s && *s != ']')
+				*d++ = *s++;
+			if (*s == ']')
+				s++;
+		} else {
+			*d++ = *s++;
+			if (cmd == 's' && s[-1] >= '1' && s[-1] <= '3')
+				if (isdigit(*s))
+					*d++ = *s++;
+		}
+	}
+	if (strchr(ESC_Q, cmd)) {
+		q = *s++;
+		while (*s && *s != q)
+			*d++ = *s++;
+		if (*s == q)
+			s++;
+	}
+	if (cmd == 'z')
+		*d++ = *s++;
+	*d = '\0';
+	*sp = s;
+}
+
+/*
+ * read a glyph or an escape sequence
+ *
+ * This functions reads from s either an output troff request
+ * (only the ones emitted by wb.c) or a glyph name and updates
+ * s.  The return value is the name of the troff request (the
+ * argument is copied into d) or zero for glyph names (it is
+ * copied into d).  Returns -1 when the end of s is reached.
+ */
+int escread(char **s, char *d)
+{
+	char *r = d;
+	if (!**s)
+		return -1;
+	utf8read(s, d);
+	if (d[0] == c_ec) {
+		utf8read(s, d + 1);
+		if (d[1] == '(') {
+			utf8read(s, d);
+			utf8read(s, d + strlen(d));
+		} else if (!n_cp && d[1] == '[') {
+			while (**s && **s != ']')
+				*r++ = *(*s)++;
+			if (**s == ']')
+				(*s)++;
+		} else if (strchr("CDfhmsvXx", d[1])) {
+			int c = d[1];
+			argread(s, d, d[1]);
+			return c == 'C' ? 0 : c;
+		}
+	}
+	if (d[0] == c_ni)
+		utf8read(s, d + 1);
+	return 0;
+}
+
+/*
+ * string streams: provide next()/back() interface for string buffers
+ *
+ * Functions like charnext() require a next()/back() interface
+ * for reading input streams.  In order to provide this interface
+ * for string buffers, the following functions can be used:
+ *
+ *   sstr_push(s);
+ *   charnext(c, sstr_next, sstr_prev);
+ *   sstr_pop();
+ *
+ * The calls to sstr_push()/sstr_pop() may be nested.
+ */
+static char *sstr_bufs[NSSTR];	/* buffer stack */
+static int sstr_n;		/* numbers of items in sstr_bufs[] */
+static char *sstr_s;		/* current buffer */
+
+void sstr_push(char *s)
+{
+	sstr_bufs[sstr_n++] = sstr_s;
+	sstr_s = s;
+}
+
+char *sstr_pop(void)
+{
+	char *ret = sstr_s;
+	sstr_s = sstr_bufs[--sstr_n];
+	return ret;
+}
+
+int sstr_next(void)
+{
+	return *sstr_s ? (unsigned char) *sstr_s++ : -1;
+}
+
+void sstr_back(int c)
+{
+	sstr_s--;
+}
--- a/draw.c
+++ b/draw.c
@@ -11,19 +11,19 @@
 
 static int hchar(char *c)
 {
-	if (c[0] != c_ec)
-		return c[0] == '_';
-	if (c[1] != '(')
+	if (c[0] == c_ec)
 		return c[1] == '_' || c[1] == '-';
-	return (c[2] == 'r' && c[3] == 'u') || (c[2] == 'u' && c[3] == 'l') ||
-		(c[2] == 'r' && c[3] == 'n');
+	if (!c[1])
+		return c[0] == '_';
+	return (c[0] == 'r' && c[1] == 'u') || (c[0] == 'u' && c[1] == 'l') ||
+		(c[0] == 'r' && c[1] == 'n');
 }
 
 static int vchar(char *c)
 {
-	if (c[0] != c_ec || c[1] != '(')
+	if (!c[1])
 		return c[0] == '_';
-	return (c[2] == 'b' && c[3] == 'v') || (c[2] == 'b' && c[3] == 'r');
+	return (c[0] == 'b' && c[1] == 'v') || (c[0] == 'b' && c[1] == 'r');
 }
 
 void ren_hline(struct wb *wb, int l, char *c)
@@ -104,22 +104,26 @@
 
 void ren_hlcmd(struct wb *wb, char *arg)
 {
-	char lc[GNLEN] = {c_ec, '(', 'r', 'u'};
+	char lc[GNLEN];
 	int l = eval_up(&arg, 'm');
 	if (arg[0] == c_ec && arg[1] == '&')	/* \& can be used as a separator */
 		arg += 2;
+	if (!*arg || charread(&arg, lc) < 0)
+		strcpy(lc, "ru");
 	if (l)
-		ren_hline(wb, l, *arg ? arg : lc);
+		ren_hline(wb, l, lc);
 }
 
 void ren_vlcmd(struct wb *wb, char *arg)
 {
-	char lc[GNLEN] = {c_ec, '(', 'b', 'r'};
+	char lc[GNLEN];
 	int l = eval_up(&arg, 'v');
 	if (arg[0] == c_ec && arg[1] == '&')	/* \& can be used as a separator */
 		arg += 2;
+	if (!*arg || charread(&arg, lc) < 0)
+		strcpy(lc, "br");
 	if (l)
-		ren_vline(wb, l, *arg ? arg : lc);
+		ren_vline(wb, l, lc);
 }
 
 static int tok_num(char **s, int scale)
@@ -171,58 +175,25 @@
 	}
 }
 
-/*
- * the implementation of \b and \o
- *
- * ren_bcmd() and ren_ocmd() call ren_char(), which requires
- * next() and back() functions, similar to ren_next() and ren_back().
- * ln_*() here provide such an interface for the given string,
- * added via ln_push().  ln_*() may be called recursively to
- * handle \o'\b"ab"c'.
- */
-static char *ln_s;
-
-static int ln_next(void)
-{
-	return *ln_s ? (unsigned char) *ln_s++ : -1;
-}
-
-static void ln_back(int c)
-{
-	ln_s--;
-}
-
-static char *ln_push(char *s)
-{
-	char *old_s = ln_s;
-	ln_s = s;
-	return old_s;
-}
-
-static void ln_pop(char *s)
-{
-	ln_s = s;
-}
-
 void ren_bcmd(struct wb *wb, char *arg)
 {
 	struct wb wb2;
 	int n = 0, w = 0;
 	int c, center;
-	char *ln_prev = ln_push(arg);
+	sstr_push(arg);		/* using ren_char()'s interface */
 	wb_init(&wb2);
-	c = ln_next();
+	c = sstr_next();
 	while (c >= 0) {
-		ln_back(c);
-		ren_char(&wb2, ln_next, ln_back);
+		sstr_back(c);
+		ren_char(&wb2, sstr_next, sstr_back, NULL);
 		if (wb_wid(&wb2) > w)
 			w = wb_wid(&wb2);
 		wb_hmov(&wb2, -wb_wid(&wb2));
 		wb_vmov(&wb2, SC_HT);
 		n++;
-		c = ln_next();
+		c = sstr_next();
 	}
-	ln_pop(ln_prev);
+	sstr_pop();
 	center = -(n * SC_HT + SC_EM) / 2;
 	wb_vmov(wb, center + SC_HT);
 	wb_cat(wb, &wb2);
@@ -236,13 +207,13 @@
 	struct wb wb2, wb3;
 	int w = 0, wc;
 	int c;
-	char *ln_prev = ln_push(arg);
+	sstr_push(arg);		/* using ren_char()'s interface */
 	wb_init(&wb2);
 	wb_init(&wb3);
-	c = ln_next();
+	c = sstr_next();
 	while (c >= 0) {
-		ln_back(c);
-		ren_char(&wb3, ln_next, ln_back);
+		sstr_back(c);
+		ren_char(&wb3, sstr_next, sstr_back, NULL);
 		wc = wb_wid(&wb3);
 		if (wc > w)
 			w = wc;
@@ -249,9 +220,9 @@
 		wb_hmov(&wb2, -wc / 2);
 		wb_cat(&wb2, &wb3);
 		wb_hmov(&wb2, -wc / 2);
-		c = ln_next();
+		c = sstr_next();
 	}
-	ln_pop(ln_prev);
+	sstr_pop();
 	wb_hmov(wb, w / 2);
 	wb_cat(wb, &wb2);
 	wb_hmov(wb, w / 2);
--- a/out.c
+++ b/out.c
@@ -34,34 +34,6 @@
 	va_end(ap);
 }
 
-int utf8len(int c)
-{
-	if (c <= 0x7f)
-		return 1;
-	if (c >= 0xfc)
-		return 6;
-	if (c >= 0xf8)
-		return 5;
-	if (c >= 0xf0)
-		return 4;
-	if (c >= 0xe0)
-		return 3;
-	if (c >= 0xc0)
-		return 2;
-	return c != 0;
-}
-
-int utf8read(char **s, char *d)
-{
-	int l = utf8len((unsigned char) **s);
-	int i;
-	for (i = 0; i < l; i++)
-		d[i] = (*s)[i];
-	d[l] = '\0';
-	*s += l;
-	return l;
-}
-
 static int o_s = 10;
 static int o_f = 1;
 static int o_m = 0;
@@ -90,43 +62,6 @@
 	}
 }
 
-static void escarg(char **sp, char *d, int cmd)
-{
-	char *s = *sp;
-	int q;
-	if (strchr(ESC_P, cmd)) {
-		if (cmd == 's' && (*s == '-' || *s == '+'))
-			*d++ = *s++;
-		if (*s == '(') {
-			s++;
-			*d++ = *s++;
-			*d++ = *s++;
-		} else if (!n_cp && *s == '[') {
-			s++;
-			while (*s && *s != ']')
-				*d++ = *s++;
-			if (*s == ']')
-				s++;
-		} else {
-			*d++ = *s++;
-			if (cmd == 's' && s[-1] >= '1' && s[-1] <= '3')
-				if (isdigit(*s))
-					*d++ = *s++;
-		}
-	}
-	if (strchr(ESC_Q, cmd)) {
-		q = *s++;
-		while (*s && *s != q)
-			*d++ = *s++;
-		if (*s == q)
-			s++;
-	}
-	if (cmd == 'z')
-		*d++ = *s++;
-	*d = '\0';
-	*sp = s;
-}
-
 static int tok_num(char **s, int scale)
 {
 	char tok[ILNLEN];
@@ -174,42 +109,6 @@
 	outnn("\n");
 }
 
-/*
- * read a glyph or output troff request
- *
- * This functions reads from s either an output troff request
- * (only the ones emitted by wb.c) or a glyph name and updates
- * s.  The return value is the name of the troff request (the
- * argument is copied into d) or zero for glyph names (it is
- * copied into d).  Returns -1 when the end of s is reached.
- */
-int out_readc(char **s, char *d)
-{
-	char *r = d;
-	if (!**s)
-		return -1;
-	utf8read(s, d);
-	if (d[0] == c_ec) {
-		utf8read(s, d + 1);
-		if (d[1] == '(') {
-			utf8read(s, d);
-			utf8read(s, d + strlen(d));
-		} else if (!n_cp && d[1] == '[') {
-			while (**s && **s != ']')
-				*r++ = *(*s)++;
-			if (**s == ']')
-				(*s)++;
-		} else if (strchr("CDfhmsvXx", d[1])) {
-			int c = d[1];
-			escarg(s, d, d[1]);
-			return c == 'C' ? 0 : c;
-		}
-	}
-	if (d[0] == c_ni)
-		utf8read(s, d + 1);
-	return 0;
-}
-
 static void outg(char *c)
 {
 	if (utf8len((unsigned char) c[0]) == strlen(c))
@@ -240,7 +139,7 @@
 {
 	char c[ILNLEN + GNLEN * 4];
 	int t;
-	while ((t = out_readc(&s, c)) >= 0) {
+	while ((t = escread(&s, c)) >= 0) {
 		if (!t) {
 			if (c[0] == c_ni || (c[0] == '\\' && c[1] == '\\')) {
 				c[0] = c[1];
--- a/ren.c
+++ b/ren.c
@@ -585,61 +585,6 @@
 	ren_aborted = 1;
 }
 
-static void escarg_ren(char *d, int cmd, int (*next)(void), void (*back)(int))
-{
-	char delim[GNLEN];
-	int c;
-	if (strchr(ESC_P, cmd)) {
-		c = next();
-		if (cmd == 's' && (c == '-' || c == '+')) {
-			*d++ = c;
-			c = next();
-		}
-		if (c == '(') {
-			*d++ = next();
-			*d++ = next();
-		} else if (!n_cp && c == '[') {
-			c = next();
-			while (c > 0 && c != '\n' && c != ']') {
-				*d++ = c;
-				c = next();
-			}
-		} else {
-			*d++ = c;
-			if (cmd == 's' && c >= '1' && c <= '3') {
-				c = next();
-				if (isdigit(c))
-					*d++ = c;
-				else
-					back(c);
-			}
-		}
-	}
-	if (strchr(ESC_Q, cmd)) {
-		schar_read(delim, next);
-		while (schar_jump(delim, next, back)) {
-			if ((c = next()) < 0)
-				break;
-			*d++ = c;
-		}
-	}
-	*d = '\0';
-}
-
-static int nextchar(char *s, int (*next)(void))
-{
-	int c = next();
-	int l = utf8len(c);
-	int i;
-	if (c < 0)
-		return 0;
-	s[0] = c;
-	for (i = 1; i < l; i++)
-		s[i] = next();
-	s[l] = '\0';
-	return l;
-}
-
 static void ren_cmd(struct wb *wb, int c, char *arg)
 {
 	switch (c) {
@@ -722,49 +667,38 @@
 static void ren_field(struct wb *wb, int (*next)(void), void (*back)(int));
 static void ren_tab(struct wb *wb, char *tc, int (*next)(void), void (*back)(int));
 
-/* read one character and place it inside wb buffer */
-void ren_char(struct wb *wb, int (*next)(void), void (*back)(int))
+/* read one character and place it inside wb buffer; return 1 if read delim */
+int ren_char(struct wb *wb, int (*next)(void), void (*back)(int), char *delim)
 {
 	char c[GNLEN * 4];
 	char arg[ILNLEN];
 	struct glyph *g;
 	char *s;
-	int w, n, l;
-	nextchar(c, next);
+	int w, n;
+	if (charnext(c, next, back) < 0)
+		return -1;
+	if (delim && !strcmp(c, delim))
+		return 1;
 	if (c[0] == ' ' || c[0] == '\n') {
 		wb_put(wb, c);
-		return;
+		return 0;
 	}
 	if (c[0] == '\t' || c[0] == '') {
 		ren_tab(wb, c[0] == '\t' ? c_tc : c_lc, next, back);
-		return;
+		return 0;
 	}
 	if (c[0] == c_fa) {
 		ren_field(wb, next, back);
-		return;
+		return 0;
 	}
-	if (c[0] == c_ni)
-		nextchar(c + 1, next);
 	if (c[0] == c_ec) {
-		nextchar(c + 1, next);
-		if (c[1] == '(') {
-			l = nextchar(c + 2, next);
-			l += nextchar(c + 2 + l, next);
-			c[2 + l] = '\0';
-		} else if (!n_cp && c[1] == '[') {
-			l = 0;
-			n = next();
-			while (n >= 0 && n != '\n' && n != ']' && l < GNLEN - 1) {
-				c[l++] = n;
-				n = next();
-			}
-			c[l] = '\0';
-		} else if (c[1] == 'z') {
+		if (c[1] == 'z') {
 			w = wb_wid(wb);
-			ren_char(wb, next, back);
+			ren_char(wb, next, back, NULL);
 			wb_hmov(wb, w - wb_wid(wb));
-			return;
-		} else if (c[1] == '!') {
+			return 0;
+		}
+		if (c[1] == '!') {
 			if (ren_nl && next == ren_next) {
 				s = arg;
 				n = next();
@@ -775,21 +709,18 @@
 				*s = '\0';
 				ren_transparent(arg);
 			}
-			return;
-		} else if (strchr(" bCcDdfHhkLlmNoprSsuvXxz0^|{}&", c[1])) {
-			escarg_ren(arg, c[1], next, back);
-			if (c[1] == 'N') {
-				g = dev_glyph_byid(arg, n_f);
-				c[1] = 'C';
-				strcpy(arg, g ? g->name : "cnull");
-			}
+			return 0;
+		}
+		if (strchr(" bCcDdfHhkLlmNoprSsuvXxz0^|{}&", c[1])) {
+			argnext(arg, c[1], next, back);
 			if (c[1] == 'S' || c[1] == 'H')
-				return;			/* not implemented */
-			if (c[1] != 'C') {
+				return 0;			/* not implemented */
+			if (c[1] != 'N') {
 				ren_cmd(wb, c[1], arg);
-				return;
+				return 0;
 			}
-			strcpy(c, arg);
+			g = dev_glyph_byid(arg, n_f);
+			strcpy(c, g ? g->name : "cnull");
 		}
 	}
 	if (!n_lg || wb_lig(wb, c)) {
@@ -797,6 +728,7 @@
 			wb_kern(wb, c);
 		wb_put(wb, c);
 	}
+	return 0;
 }
 
 /* read the argument of \w and push its width */
@@ -806,14 +738,13 @@
 	int c, n;
 	struct wb wb;
 	wb_init(&wb);
-	schar_read(delim, next);
+	charnext(delim, next, back);
 	odiv_beg();
 	c = next();
 	while (c >= 0 && c != '\n') {
 		back(c);
-		if (!schar_jump(delim, next, back))
+		if (ren_char(&wb, next, back, delim))
 			break;
-		ren_char(&wb, next, back);
 		c = next();
 	}
 	odiv_end();
@@ -831,9 +762,8 @@
 	c = next();
 	while (c >= 0 && c != '\n' && c != ec) {
 		back(c);
-		if (!schar_jump(delim, next, back))
+		if (ren_char(wb, next, back, delim))
 			break;
-		ren_char(wb, next, back);
 		c = next();
 	}
 	if (c == '\n')
@@ -855,7 +785,7 @@
 	adj = adj_alloc();
 	wb_init(&wb);
 	wb_init(&wb2);
-	schar_read(delim, next);
+	charnext(delim, next, back);
 	/* the left-adjusted string */
 	ren_until(&wb2, delim, '\n', next, back);
 	wb_cpy(&wb, &wb2, 0);
@@ -917,7 +847,7 @@
 		c = next();
 		while (c >= 0 && c != '\n' && c != '\t' && c != '') {
 			back(c);
-			ren_char(&t, next, back);
+			ren_char(&t, next, back, NULL);
 			c = next();
 		}
 		back(c);
@@ -989,7 +919,7 @@
 			n_ce = MAX(0, n_ce - 1);
 		if (c != ' ') {
 			ren_back(c);
-			ren_char(wb, ren_next, ren_back);
+			ren_char(wb, ren_next, ren_back, NULL);
 			if (c != '\n' && wb_empty(wb))
 				adj_nonl(cadj);
 		}
--- a/roff.h
+++ b/roff.h
@@ -25,6 +25,7 @@
 #define NIES		128	/* number of nested .ie commands */
 #define NTABS		16	/* number of tab stops */
 #define NTR		512	/* number of character translations (.tr) */
+#define NSSTR		32	/* number of nested sstr_push() calls */
 #define NFIELDS		32	/* number of fields */
 #define MAXFRAC		100000	/* maximum value of the fractional part */
 #define LIGLEN		4	/* length of ligatures */
@@ -269,7 +270,7 @@
 
 /* rendering */
 int render(void);				/* the main loop */
-void ren_char(struct wb *wb, int (*next)(void), void (*back)(int));
+int ren_char(struct wb *wb, int (*next)(void), void (*back)(int), char *delim);
 int ren_wid(int (*next)(void), void (*back)(int));
 void ren_tl(int (*next)(void), void (*back)(int));
 void ren_hline(struct wb *wb, int l, char *c);	/* horizontal line */
@@ -281,7 +282,6 @@
 
 /* out.c */
 void out_line(char *s);				/* output rendered line */
-int out_readc(char **s, char *d);		/* read request or glyph */
 void out(char *s, ...);				/* output troff cmd */
 
 /* troff commands */
@@ -324,9 +324,20 @@
 /* helpers */
 void errmsg(char *msg, ...);
 int utf8len(int c);
+int utf8next(char *s, int (*next)(void));
 int utf8read(char **s, char *d);
-void schar_read(char *d, int (*next)(void));
-int schar_jump(char *d, int (*next)(void), void (*back)(int));
+int charnext(char *c, int (*next)(void), void (*back)(int));
+int charread(char **s, char *c);
+int charnext_delim(char *c, int (*next)(void), void (*back)(int), char *delim);
+void charnext_str(char *d, char *c);
+void argnext(char *d, int cmd, int (*next)(void), void (*back)(int));
+void argread(char **sp, char *d, int cmd);
+int escread(char **s, char *d);
+/* string streams; nested next()/back() interface for string buffers */
+void sstr_push(char *s);
+char *sstr_pop(void);
+int sstr_next(void);
+void sstr_back(int c);
 
 /* internal commands */
 #define TR_DIVBEG	"\07<"	/* diversion begins */
--- a/tr.c
+++ b/tr.c
@@ -163,43 +163,10 @@
 	macrobody(NULL, args[1] ? args[1] : ".");
 }
 
-void schar_read(char *d, int (*next)(void))
-{
-	d[0] = next();
-	d[1] = '\0';
-	if (d[0] == c_ni) {
-		d[1] = next();
-		d[2] = '\0';
-	}
-	if (d[0] == c_ec) {
-		d[1] = next();
-		d[2] = '\0';
-		if (d[1] == '(') {
-			d[2] = next();
-			d[3] = next();
-			d[4] = '\0';
-		}
-	}
-}
-
-int schar_jump(char *d, int (*next)(void), void (*back)(int))
-{
-	int c, i;
-	for (i = 0; d[i]; i++)
-		if ((c = next()) != d[i])
-			break;
-	if (d[i]) {
-		back(c);
-		while (i > 0)
-			back(d[--i]);
-		return 1;
-	}
-	return 0;
-}
-
 /* read into sbuf until stop; if stop is NULL, stop at whitespace */
 static int read_until(struct sbuf *sbuf, char *stop)
 {
+	char cs[GNLEN], cs2[GNLEN];
 	int c;
 	while ((c = cp_next()) >= 0) {
 		cp_back(c);
@@ -207,9 +174,11 @@
 			return 1;
 		if (!stop && (c == ' ' || c == '\t'))
 			return 0;
-		if (stop && !schar_jump(stop, cp_next, cp_back))
+		charnext(cs, cp_next, cp_back);
+		if (stop && !strcmp(stop, cs))
 			return 0;
-		sbuf_add(sbuf, cp_next());
+		charnext_str(cs2, cs);
+		sbuf_append(sbuf, cs2);
 	}
 	return 1;
 }
@@ -220,7 +189,7 @@
 	char delim[GNLEN];
 	struct sbuf s1, s2;
 	int ret;
-	schar_read(delim, cp_next);
+	charnext(delim, cp_next, cp_back);
 	sbuf_init(&s1);
 	sbuf_init(&s2);
 	read_until(&s1, delim);
@@ -408,7 +377,9 @@
 
 static void tr_hc(char **args)
 {
-	strcpy(c_hc, args[1] ? args[1] : "\\%");
+	char *s = args[1];
+	if (!s || charread(&s, c_hc) < 0)
+		strcpy(c_hc, "\\%");
 }
 
 static void tr_nh(char **args)
@@ -493,9 +464,9 @@
 
 static void tr_mc(char **args)
 {
-	if (args[1]) {
+	char *s = args[1];
+	if (s && charread(&s, c_mc) >= 0) {
 		n_mc = 1;
-		strcpy(c_mc, args[1]);
 		n_mcn = args[2] ? eval(args[2], 'm') : SC_EM;
 	} else {
 		n_mc = 0;
@@ -504,12 +475,16 @@
 
 static void tr_tc(char **args)
 {
-	strcpy(c_tc, args[1] ? args[1] : "");
+	char *s = args[1];
+	if (!s || charread(&s, c_tc) < 0)
+		strcpy(c_tc, "");
 }
 
 static void tr_lc(char **args)
 {
-	strcpy(c_lc, args[1] ? args[1] : "");
+	char *s = args[1];
+	if (!s || charread(&s, c_lc) < 0)
+		strcpy(c_lc, "");
 }
 
 static void tr_lf(char **args)
@@ -553,13 +528,9 @@
 {
 	char *s = args[1];
 	char c1[GNLEN], c2[GNLEN];
-	if (!s)
-		return;
-	while (*s) {
-		utf8read(&s, c1);
-		strcpy(c2, " ");
-		if (*s)
-			utf8read(&s, c2);
+	while (s && charread(&s, c1) >= 0) {
+		if (charread(&s, c2) < 0)
+			strcpy(c2, " ");
 		tr_add(c1, c2);
 	}
 }
--- a/wb.c
+++ b/wb.c
@@ -306,7 +306,7 @@
 	char *s = sbuf_buf(&src->sbuf);
 	char d[ILNLEN];
 	int c, part;
-	while ((c = out_readc(&s, d)) >= 0)
+	while ((c = escread(&s, d)) >= 0)
 		wb_putc(wb, c, d);
 	part = src->part;
 	wb->r_s = -1;
@@ -349,7 +349,7 @@
 	char *r = *s;
 	int c;
 	wb_reset(w1);
-	while ((c = out_readc(s, d)) > 0) {
+	while ((c = escread(s, d)) > 0) {
 		wb_putc(w1, c, d);
 		r = *s;
 	}
@@ -365,7 +365,7 @@
 	char *r = NULL;
 	int c;
 	skipreqs(&s, w1);
-	while ((c = out_readc(&s, d)) >= 0) {
+	while ((c = escread(&s, d)) >= 0) {
 		wb_putc(w1, c, d);
 		if (wb_wid(w1) > w && (!any || r))
 			continue;
@@ -387,7 +387,7 @@
 	char *r = NULL;
 	int c;
 	skipreqs(&s, w1);
-	while ((c = out_readc(&s, d)) >= 0) {
+	while ((c = escread(&s, d)) >= 0) {
 		wb_putc(w1, c, d);
 		if (wb_wid(w1) + wb_dashwid(w1) > w && (!(flg & HY_ANY) || r))
 			continue;
@@ -410,7 +410,7 @@
 	int beg, end;
 	int i, c;
 	skipreqs(&s, w1);
-	while ((c = out_readc(&s, d)) >= 0 && wp + strlen(d) + 1 < we) {
+	while ((c = escread(&s, d)) >= 0 && wp + strlen(d) + 1 < we) {
 		wb_putc(w1, c, d);
 		if (c == 0) {
 			strcpy(wp, d);
@@ -437,7 +437,7 @@
 	int c = -1;
 	wb_reset(w1);
 	wb_reset(w2);
-	while (s != pos && (c = out_readc(&s, d)) >= 0)
+	while (s != pos && (c = escread(&s, d)) >= 0)
 		wb_putc(w1, c, d);
 	if (dash)
 		wb_putc(w1, 0, "hy");
@@ -444,7 +444,7 @@
 	w2->r_s = w1->r_s;
 	w2->r_f = w1->r_f;
 	w2->r_m = w1->r_m;
-	while ((c = out_readc(&s, d)) >= 0)
+	while ((c = escread(&s, d)) >= 0)
 		wb_putc(w2, c, d);
 }