ref: fdd132556d3330e9557c36c7e5631844d7cddb37
parent: 68d9ac00bfee6a2667d8f981c24c9779aee5a012
author: Ali Gholami Rudi <ali@rudi.ir>
date: Sat Apr 14 20:42:53 EDT 2018
pdf: basic support for \X'pdf pic.pdf'
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@
CFLAGS = -Wall -O2 "-DTROFFFDIR=\"$(FDIR)\""
LDFLAGS =
OBJS = post.o ps.o font.o dev.o clr.o dict.o iset.o
-OBJSPDF = post.o pdf.o font.o dev.o clr.o dict.o iset.o sbuf.o
+OBJSPDF = post.o pdf.o pdfext.o font.o dev.o clr.o dict.o iset.o sbuf.o
all: post pdf
%.o: %.c post.h
--- a/pdf.c
+++ b/pdf.c
@@ -1,3 +1,4 @@
+/* PDF post processor functions */
#include <fcntl.h>
#include <stdarg.h>
#include <stdio.h>
@@ -25,6 +26,8 @@
static int p_f, p_s, p_m; /* output font */
static int o_queued; /* queued character type */
static char o_iset[1024]; /* fonts accesssed in this page */
+static int *xobj; /* page xobject object ids */
+static int xobj_sz, xobj_n; /* number of xobjects */
/* loaded PDF fonts */
struct pfont {
@@ -58,6 +61,13 @@
pdf_pos += strlen(s);
}
+/* print pdf output */
+static void pdfmem(char *s, int len)
+{
+ fwrite(s, len, 1, stdout);
+ pdf_pos += len;
+}
+
/* allocate an object number */
static int obj_map(void)
{
@@ -164,7 +174,7 @@
for (i = 0; i < n; i++) {
sbuf_chr(d, hex[((unsigned char) s[i]) >> 4]);
sbuf_chr(d, hex[((unsigned char) s[i]) & 0x0f]);
- if (i % 80 == 79 && i + 1 < n)
+ if (i % 40 == 39 && i + 1 < n)
sbuf_chr(d, '\n');
}
sbuf_str(d, ">\n");
@@ -481,6 +491,130 @@
{
}
+static char *strcut(char *dst, char *src)
+{
+ while (*src == ' ' || *src == '\n')
+ src++;
+ if (src[0] == '"') {
+ src++;
+ while (*src && (src[0] != '"' || src[1] == '"')) {
+ if (*src == '"')
+ src++;
+ *dst++ = *src++;
+ }
+ if (*src == '"')
+ src++;
+ } else {
+ while (*src && *src != ' ' && *src != '\n')
+ *dst++ = *src++;
+ }
+ *dst = '\0';
+ return src;
+}
+
+/* return a copy of a pdf object; returns a static buffer */
+static char *pdf_copy(char *pdf, int len, int pos)
+{
+ static char buf[256];
+ int datlen;
+ pos += pdf_ws(pdf, len, pos);
+ datlen = pdf_len(pdf, len, pos);
+ if (datlen > sizeof(buf) - 1)
+ datlen = sizeof(buf) - 1;
+ memcpy(buf, pdf + pos, datlen);
+ buf[datlen] = '\0';
+ return buf;
+}
+
+/* return stream length */
+static int pdf_slen(char *pdf, int len, int pos, int slen)
+{
+ int old = pos;
+ pos += pdf_ws(pdf, len, pos);
+ pos += strlen("stream");
+ if (pdf[pos] == '\r')
+ pos++;
+ pos += 1 + slen;
+ if (pdf[pos] == '\n')
+ pos++;
+ pos += strlen("endstream");
+ return pos - old;
+}
+
+static int pdfext(char *pdf, int len)
+{
+ char *cont_fields[] = {"/Filter", "/DecodeParms"};
+ int trailer = pdf_trailer(pdf, len);
+ int root, cont, pages, page1, stream;
+ int kids_val, page1_val, val;
+ int xobj_id, length;
+ int bbox;
+ int i;
+ root = pdf_dval_obj(pdf, len, trailer, "/Root");
+ pages = pdf_dval_obj(pdf, len, root, "/Pages");
+ kids_val = pdf_dval_val(pdf, len, pages, "/Kids");
+ page1_val = pdf_lval(pdf, len, kids_val, 0);
+ page1 = pdf_ref(pdf, len, page1_val);
+ cont = pdf_dval_obj(pdf, len, page1, "/Contents");
+ val = pdf_dval_val(pdf, len, cont, "/Length");
+ length = atoi(pdf + val);
+ bbox = pdf_dval_val(pdf, len, page1, "/MediaBox");
+ if (bbox < 0)
+ bbox = pdf_dval_val(pdf, len, pages, "/MediaBox");
+ xobj_id = obj_beg(0);
+ pdfout("<<\n");
+ pdfout(" /Type /XObject\n");
+ pdfout(" /Subtype /Form\n");
+ pdfout(" /FormType 1\n");
+ if (bbox >= 0)
+ pdfout(" /BBox %s\n", pdf_copy(pdf, len, bbox));
+ pdfout(" /Matrix [1 0 0 1 %s]\n", pdfpos(o_h, o_v));
+ pdfout(" /Resources << /ProcSet [/PDF] >>\n");
+ pdfout(" /Length %d\n", length);
+ for (i = 0; i < LEN(cont_fields); i++)
+ if ((val = pdf_dval_val(pdf, len, cont, cont_fields[i])) >= 0)
+ pdfout(" %s %s\n", cont_fields[i],
+ pdf_copy(pdf, len, val));
+ pdfout(">>\n");
+ stream = cont + pdf_len(pdf, len, cont);
+ stream += pdf_ws(pdf, len, stream);
+ pdfmem(pdf + stream, pdf_slen(pdf, len, stream, length));
+ pdfout("\n");
+ obj_end();
+ if (xobj_n == xobj_sz) {
+ xobj_sz += 8;
+ xobj = mextend(xobj, xobj_n, xobj_sz, sizeof(xobj[0]));
+ }
+ xobj[xobj_n++] = xobj_id;
+ return xobj_n - 1;
+}
+
+void outpdf(char *spec)
+{
+ char pdf[1 << 12];
+ char buf[1 << 12];
+ struct sbuf *sb;
+ int xobj_id;
+ int fd, nr;
+ spec = strcut(pdf, spec);
+ if (!pdf[0])
+ return;
+ /* reading the pdf file */
+ sb = sbuf_make();
+ fd = open(pdf, O_RDONLY);
+ while ((nr = read(fd, buf, sizeof(buf))) > 0)
+ sbuf_mem(sb, buf, nr);
+ close(fd);
+ /* the XObject */
+ xobj_id = pdfext(sbuf_buf(sb), sbuf_len(sb));
+ sbuf_free(sb);
+ o_flush();
+ out_fontup();
+ sbuf_printf(pg, "ET /FO%d Do BT\n", xobj_id);
+ p_h = -1;
+ p_v = -1;
+}
+
void outlink(char *spec)
{
}
@@ -687,10 +821,10 @@
/* page contents */
cont_id = obj_beg(0);
pdfout("<<\n");
- pdfout(" /Length %d\n", sbuf_len(pg));
+ pdfout(" /Length %d\n", sbuf_len(pg) - 1);
pdfout(">>\n");
pdfout("stream\n");
- pdfouts(sbuf_buf(pg));
+ pdfmem(sbuf_buf(pg), sbuf_len(pg));
pdfout("endstream\n");
obj_end();
/* the page object */
@@ -714,6 +848,12 @@
}
}
pdfout(" >>\n");
+ if (xobj_n) { /* XObjects */
+ pdfout(" /XObject <<");
+ for (i = 0; i < xobj_n; i++)
+ pdfout(" /FO%d %d 0 R", i, xobj[i]);
+ pdfout(" >>\n");
+ }
pdfout(" >>\n");
pdfout(" /Contents %d 0 R\n", cont_id);
pdfout(">>\n");
@@ -720,4 +860,8 @@
obj_end();
sbuf_free(pg);
memset(o_iset, 0, pfonts_n * sizeof(o_iset[0]));
+ free(xobj);
+ xobj = NULL;
+ xobj_n = 0;
+ xobj_sz = 0;
}
--- /dev/null
+++ b/pdfext.c
@@ -1,0 +1,281 @@
+/* Parse and extract PDF objects */
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "post.h"
+
+/* the number white space characters */
+int pdf_ws(char *pdf, int len, int pos)
+{
+ int i = pos;
+ while (i < len && isspace((unsigned char) pdf[i]))
+ i++;
+ return i - pos;
+}
+
+/* s: string, d: dictionary, l: list, n: number, /: name, r: reference */
+int pdf_type(char *pdf, int len, int pos)
+{
+ pos += pdf_ws(pdf, len, pos);
+ if (pdf[pos] == '/')
+ return '/';
+ if (pdf[pos] == '(')
+ return 's';
+ if (pdf[pos] == '<' && pdf[pos + 1] != '<')
+ return 's';
+ if (pdf[pos] == '<' && pdf[pos + 1] == '<')
+ return 'd';
+ if (pdf[pos] == '[')
+ return 'l';
+ if (strchr("0123456789+-.", (unsigned char) pdf[pos])) {
+ if (!isdigit((unsigned char) pdf[pos]))
+ return 'n';
+ while (pos < len && isdigit((unsigned char) pdf[pos]))
+ pos++;
+ pos += pdf_ws(pdf, len, pos);
+ if (!isdigit((unsigned char) pdf[pos]))
+ return 'n';
+ while (pos < len && isdigit((unsigned char) pdf[pos]))
+ pos++;
+ pos += pdf_ws(pdf, len, pos);
+ return pos < len && pdf[pos] == 'R' ? 'r' : 'n';
+ }
+ return -1;
+}
+
+/* the length of a pdf object */
+int pdf_len(char *pdf, int len, int pos)
+{
+ int c;
+ int old = pos;
+ if (pos >= len)
+ return 0;
+ pos += pdf_ws(pdf, len, pos);
+ c = (unsigned char) pdf[pos];
+ if (strchr("0123456789+-.", c)) {
+ if (pdf_type(pdf, len, pos) == 'r') {
+ char *r = memchr(pdf + pos, 'R', len - pos);
+ return r - (pdf + old) + 1;
+ }
+ pos++;
+ while (pos < len && strchr("0123456789.", (unsigned char) pdf[pos]))
+ pos++;
+ }
+ if (c == '(') {
+ int depth = 1;
+ pos++;
+ while (pos < len && depth > 0) {
+ if (pdf[pos] == '(')
+ depth++;
+ if (pdf[pos] == ')')
+ depth--;
+ if (pdf[pos] == '\\')
+ pos++;
+ pos++;
+ }
+ }
+ if (c == '<' && pos + 1 < len && pdf[pos + 1] == '<') {
+ pos += 2;
+ while (pos + 2 < len && (pdf[pos] != '>' || pdf[pos + 1] != '>')) {
+ pos += pdf_len(pdf, len, pos);
+ pos += pdf_len(pdf, len, pos);
+ pos += pdf_ws(pdf, len, pos);
+ }
+ if (pos + 2 < len)
+ pos += 2;
+ } else if (c == '<') {
+ while (pos < len && pdf[pos] != '>')
+ pos++;
+ if (pos < len)
+ pos++;
+ }
+ if (c == '/') {
+ pos++;
+ while (pos < len && !strchr(" \t\r\n\f()<>[]{}/%",
+ (unsigned char) pdf[pos]))
+ pos++;
+ }
+ if (c == '[') {
+ pos++;
+ while (pos < len && pdf[pos] != ']') {
+ pos += pdf_len(pdf, len, pos);
+ pos += pdf_ws(pdf, len, pos);
+ }
+ pos++;
+ }
+ return pos - old;
+}
+
+static int startswith(char *s, char *t)
+{
+ while (*s && *t)
+ if (*s++ != *t++)
+ return 0;
+ return 1;
+}
+
+/* read an indirect reference */
+int pdf_obj(char *pdf, int len, int pos, int *obj, int *rev)
+{
+ if (pdf_type(pdf, len, pos) != 'r')
+ return -1;
+ *obj = atoi(pdf + pos);
+ pos += pdf_len(pdf, len, pos);
+ *rev = atoi(pdf + pos);
+ return 0;
+}
+
+/* the value of a pdf dictionary key */
+int pdf_dval(char *pdf, int len, int pos, char *key)
+{
+ pos += 2;
+ while (pos + 2 < len && (pdf[pos] != '>' || pdf[pos + 1] != '>')) {
+ pos += pdf_ws(pdf, len, pos);
+ if (startswith(key, pdf + pos)) {
+ pos += pdf_len(pdf, len, pos);
+ pos += pdf_ws(pdf, len, pos);
+ return pos;
+ }
+ pos += pdf_len(pdf, len, pos);
+ pos += pdf_len(pdf, len, pos);
+ pos += pdf_ws(pdf, len, pos);
+ }
+ return -1;
+}
+
+/* return a dictionary key */
+int pdf_dkey(char *pdf, int len, int pos, int key)
+{
+ int i = 0;
+ pos += 2;
+ while (pos + 2 < len && (pdf[pos] != '>' || pdf[pos + 1] != '>')) {
+ pos += pdf_ws(pdf, len, pos);
+ if (i++ == key)
+ return pos;
+ pos += pdf_len(pdf, len, pos);
+ pos += pdf_len(pdf, len, pos);
+ pos += pdf_ws(pdf, len, pos);
+ }
+ return -1;
+}
+
+/* return a list entry */
+int pdf_lval(char *pdf, int len, int pos, int idx)
+{
+ int i = 0;
+ pos++;
+ while (pos < len && pdf[pos] != ']') {
+ if (i++ == idx)
+ return pos;
+ pos += pdf_len(pdf, len, pos);
+ pos += pdf_ws(pdf, len, pos);
+ }
+ return -1;
+}
+
+void *memrchr(void *m, int c, long n);
+
+static int prevline(char *pdf, int len, int off)
+{
+ char *nl = memrchr(pdf, '\n', off);
+ if (nl && nl > pdf) {
+ char *nl2 = memrchr(pdf, '\n', nl - pdf -1);
+ if (nl2)
+ return nl2 - pdf + 1;
+ }
+ return -1;
+}
+
+static int nextline(char *pdf, int len, int off)
+{
+ char *nl = memchr(pdf + off, '\n', len - off);
+ if (nl)
+ return nl - pdf + 1;
+ return -1;
+}
+
+/* the position of the trailer */
+int pdf_trailer(char *pdf, int len)
+{
+ int pos = prevline(pdf, len, len); /* %%EOF */
+ while (!startswith(pdf + pos, "trailer"))
+ if ((pos = prevline(pdf, len, pos)) < 0)
+ return -1;
+ return nextline(pdf, len, pos); /* skip trailer\n */
+}
+
+/* the position of the last xref table */
+static int pdf_xref(char *pdf, int len)
+{
+ int pos = prevline(pdf, len, len); /* %%EOF */
+ if ((pos = prevline(pdf, len, pos)) < 0)
+ return -1;
+ /* read startxref offset */
+ if (sscanf(pdf + pos, "%d", &pos) != 1 || pos >= len || pos < 0)
+ return -1;
+ return nextline(pdf, len, pos); /* skip xref\n */
+}
+
+/* find a pdf object */
+int pdf_find(char *pdf, int len, int obj, int rev)
+{
+ int obj_beg, obj_cnt;
+ int cur_rev, cur_pos;
+ char *beg;
+ int i;
+ int pos = pdf_xref(pdf, len);
+ if (pos < 0)
+ return -1;
+ /* the numbers after xref */
+ while (pos < len && sscanf(pdf + pos, "%d %d", &obj_beg, &obj_cnt) == 2) {
+ for (i = 0; i < obj_cnt; i++) {
+ if ((pos = nextline(pdf, len, pos)) < 0)
+ return -1;
+ if (sscanf(pdf + pos, "%d %d", &cur_pos, &cur_rev) != 2)
+ return -1;
+ if (obj_beg + i == obj && cur_rev == rev) {
+ if (cur_pos < 0 || cur_pos >= len)
+ return -1;
+ if (!(beg = strstr(pdf + cur_pos, "obj")))
+ return -1;
+ pos = beg - pdf + 3;
+ pos += pdf_ws(pdf, len, pos);
+ return pos;
+ }
+ }
+ }
+ return -1;
+}
+
+/* read and dereference an indirect reference */
+int pdf_ref(char *pdf, int len, int pos)
+{
+ int obj, rev;
+ if (pdf_obj(pdf, len, pos, &obj, &rev))
+ return -1;
+ return pdf_find(pdf, len, obj, rev);
+}
+
+/* retrieve and dereference a dictionary entry */
+int pdf_dval_val(char *pdf, int len, int pos, char *key)
+{
+ int val = pdf_dval(pdf, len, pos, key);
+ int val_obj, val_rev;
+ if (val < 0)
+ return -1;
+ if (pdf_type(pdf, len, val) == 'r') {
+ pdf_obj(pdf, len, val, &val_obj, &val_rev);
+ return pdf_find(pdf, len, val_obj, val_rev);
+ }
+ return val;
+}
+
+/* retrieve a dictionary entry, which is an indirect reference */
+int pdf_dval_obj(char *pdf, int len, int pos, char *key)
+{
+ int val = pdf_dval(pdf, len, pos, key);
+ if (val < 0)
+ return -1;
+ return pdf_ref(pdf, len, val);
+}
--- a/post.c
+++ b/post.c
@@ -260,6 +260,8 @@
outrotate(atoi(arg));
if (!strcmp("eps", cmd))
outeps(arg);
+ if (!strcmp("pdf", cmd))
+ outpdf(arg);
if (!strcmp("link", cmd))
outlink(arg);
if (!strcmp("BeginObject", cmd))
--- a/post.h
+++ b/post.h
@@ -57,7 +57,8 @@
void outsize(int s);
void outcolor(int c);
void outrotate(int deg);
-void outeps(char *eps);
+void outeps(char *spec);
+void outpdf(char *spec);
void outlink(char *spec);
void outpage(void);
void outmnt(int f);
@@ -119,3 +120,17 @@
void sbuf_chr(struct sbuf *sbuf, int c);
void sbuf_mem(struct sbuf *sbuf, char *s, int len);
void sbuf_cut(struct sbuf *sb, int len);
+
+/* reading PDF files */
+int pdf_ws(char *pdf, int len, int pos);
+int pdf_len(char *pdf, int len, int pos);
+int pdf_type(char *pdf, int len, int pos);
+int pdf_dval(char *pdf, int len, int pos, char *key);
+int pdf_dkey(char *pdf, int len, int pos, int key);
+int pdf_lval(char *pdf, int len, int pos, int idx);
+int pdf_trailer(char *pdf, int len);
+int pdf_obj(char *pdf, int len, int pos, int *obj, int *rev);
+int pdf_find(char *pdf, int len, int obj, int rev);
+int pdf_ref(char *pdf, int len, int pos);
+int pdf_dval_val(char *pdf, int len, int pos, char *key);
+int pdf_dval_obj(char *pdf, int len, int pos, char *key);
--- a/ps.c
+++ b/ps.c
@@ -330,6 +330,10 @@
outf("EPSFEND\n");
}
+void outpdf(char *spec)
+{
+}
+
void outlink(char *spec)
{
char lnk[1 << 12];