ref: ad5819b2b326fca58c1eacdc433186b09a51d2aa
parent: 3a912712527c7c749cdc090d0c66917ad116ddbc
author: Ali Gholami Rudi <ali@rudi.ir>
date: Wed Mar 11 12:52:59 EDT 2020
post: support Unicode bookmarks and titles Suggested and tested by Dirk-Wilhelm Peters <peters@schwertfisch.de>.
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@
CC = cc
CFLAGS = -Wall -O2 "-DTROFFFDIR=\"$(FDIR)\""
LDFLAGS =
-OBJS = post.o ps.o font.o dev.o clr.o dict.o iset.o
+OBJS = post.o ps.o font.o dev.o clr.o dict.o iset.o sbuf.o
OBJSPDF = post.o pdf.o pdfext.o font.o dev.o clr.o dict.o iset.o sbuf.o
all: post pdf
--- a/pdf.c
+++ b/pdf.c
@@ -733,7 +733,7 @@
if (lnk[0] == '#') { /* internal links */
pdfout(" /A << /S /GoTo /D (%s) >>\n", lnk + 1);
} else { /* external links */
- pdfout(" /A << /S /URI /URI (%s) >>\n", lnk);
+ pdfout(" /A << /S /URI /URI %s >>\n", pdftext_static(lnk));
}
pdfout(">>\n");
obj_end();
@@ -786,7 +786,7 @@
cnt++;
obj_beg(objs[i]);
pdfout("<<\n");
- pdfout(" /Title (%s)\n", desc[i]);
+ pdfout(" /Title %s\n", pdftext_static(desc[i]));
/* the parent field */
for (j = i - 1; j >= 0 && level[j] >= level[i]; j--)
;
@@ -996,9 +996,9 @@
info_id = obj_beg(0);
pdfout("<<\n");
if (pdf_title[0])
- pdfout(" /Title (%s)\n", pdf_title);
+ pdfout(" /Title %s\n", pdftext_static(pdf_title));
if (pdf_author[0])
- pdfout(" /Author (%s)\n", pdf_author);
+ pdfout(" /Author %s\n", pdftext_static(pdf_author));
pdfout(" /Creator (Neatroff)\n");
pdfout(" /Producer (Neatpost)\n");
pdfout(">>\n");
--- a/post.c
+++ b/post.c
@@ -54,18 +54,16 @@
static int utf8len(int c)
{
- if (c <= 0x7f)
+ if (~c & 0x80) /* ASCII */
+ return c > 0;
+ if (~c & 0x40) /* invalid UTF-8 */
return 1;
- if (c >= 0xfc)
- return 6;
- if (c >= 0xf8)
- return 5;
- if (c >= 0xf0)
- return 4;
- if (c >= 0xe0)
- return 3;
- if (c >= 0xc0)
+ if (~c & 0x20)
return 2;
+ if (~c & 0x10)
+ return 3;
+ if (~c & 0x08)
+ return 4;
return 1;
}
@@ -546,6 +544,69 @@
memset(new + oldsz * memsz, 0, (newsz - oldsz) * memsz);
free(old);
return new;
+}
+
+/* the unicode codepoint of the given utf-8 character */
+static int utf8code(char *s)
+{
+ int c = (unsigned char) s[0];
+ if (!(c & 0x80))
+ return c;
+ if (!(c & 0x20))
+ return ((c & 0x1f) << 6) | (s[1] & 0x3f);
+ if (!(c & 0x10))
+ return ((c & 0x0f) << 12) | ((s[1] & 0x3f) << 6) | (s[2] & 0x3f);
+ if (!(c & 0x08))
+ return ((c & 0x07) << 18) | ((s[1] & 0x3f) << 12) | ((s[2] & 0x3f) << 6) | (s[3] & 0x3f);
+ return c;
+}
+
+static int pdftext_ascii(char *s)
+{
+ for (; *s; s++)
+ if (((unsigned char) *s) & 0x80 || *s == '(' || *s == ')')
+ return 0;
+ return 1;
+}
+
+/* encode s as pdf text string */
+static char *pdftext(char *s)
+{
+ struct sbuf *sb = sbuf_make();
+ if (pdftext_ascii(s)) {
+ sbuf_chr(sb, '(');
+ sbuf_str(sb, s);
+ sbuf_chr(sb, ')');
+ return sbuf_done(sb);
+ }
+ /* read utf-8 and write utf-16 */
+ sbuf_str(sb, "<FEFF"); /* unicode byte order marker */
+ while (*s) {
+ int l = utf8len((unsigned char) *s);
+ int c = utf8code(s);
+ if ((c >= 0 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xffff)) {
+ sbuf_printf(sb, "%02X%02X", c >> 8, c & 0xff);
+ }
+ if (c >= 0x010000 && c <= 0x10ffff) {
+ int c1 = 0xd800 + ((c - 0x10000) >> 10);
+ int c2 = 0xdc00 + ((c - 0x10000) & 0x3ff);
+ sbuf_printf(sb, "%02X%02X", c1 >> 8, c1 & 0xff);
+ sbuf_printf(sb, "%02X%02X", c2 >> 8, c2 & 0xff);
+ }
+ s += l;
+ }
+ sbuf_chr(sb, '>');
+ return sbuf_done(sb);
+}
+
+/* encode s as pdf text string; returns a static buffer */
+char *pdftext_static(char *s)
+{
+ static char buf[1024];
+ char *r = pdftext(s);
+ snprintf(buf, sizeof(buf), "%s", r);
+ free(r);
+ return buf;
}
static char *usage =
--- a/post.h
+++ b/post.h
@@ -110,6 +110,8 @@
/* memory allocation */
void *mextend(void *old, long oldsz, long newsz, int memsz);
+/* helper functions */
+char *pdftext_static(char *s);
/* string buffers */
struct sbuf *sbuf_make(void);
--- a/ps.c
+++ b/ps.c
@@ -317,9 +317,9 @@
lnk[0] == '#' ? lnk + 1 : lnk);
} else {
outf("[ /Rect [ %d %d t %d %d t ] "
- "/Action << /Subtype /URI /URI (%s) >> /Open true "
+ "/Action << /Subtype /URI /URI %s >> /Open true "
"/Subtype /Link /LNK pdfmark\n",
- o_h, o_v, o_h + hwid, o_v + vwid, lnk);
+ o_h, o_v, o_h + hwid, o_v + vwid, pdftext_static(lnk));
}
}
@@ -346,7 +346,7 @@
for (j = i + 1; j < n && level[j] > level[i]; j++)
if (level[j] == level[i] + 1)
cnt++;
- outf("[ /Title (%s)", desc[i]);
+ outf("[ /Title %s", pdftext_static(desc[i]));
if (page[i] > 0)
outf(" /Page %d", page[i]);
if (cnt > 0)
@@ -385,9 +385,9 @@
{
out("[");
if (ps_title[0])
- out(" /Title (%s)", ps_title);
+ out(" /Title %s", pdftext_static(ps_title));
if (ps_author[0])
- out(" /Author (%s)", ps_author);
+ out(" /Author %s", pdftext_static(ps_author));
out(" /Creator (Neatroff) /DOCINFO pdfmark\n");
out("%%%%Trailer\n");
out("done\n");