shithub: neatpost

Download patch

ref: 800f77dcdb74497de6a64d48069b8c701ab98dff
parent: fdd132556d3330e9557c36c7e5631844d7cddb37
author: Ali Gholami Rudi <ali@rudi.ir>
date: Sun Apr 15 14:54:26 EDT 2018

pdf: recursively import resources from an external PDF

--- a/pdf.c
+++ b/pdf.c
@@ -1,4 +1,4 @@
-/* PDF post processor functions */
+/* PDF post-processor functions */
 #include <fcntl.h>
 #include <stdarg.h>
 #include <stdio.h>
@@ -512,10 +512,10 @@
 	return src;
 }
 
-/* return a copy of a pdf object; returns a static buffer */
+/* return a copy of a PDF object; returns a static buffer */
 static char *pdf_copy(char *pdf, int len, int pos)
 {
-	static char buf[256];
+	static char buf[1 << 12];
 	int datlen;
 	pos += pdf_ws(pdf, len, pos);
 	datlen = pdf_len(pdf, len, pos);
@@ -526,61 +526,153 @@
 	return buf;
 }
 
-/* return stream length */
-static int pdf_slen(char *pdf, int len, int pos, int slen)
+static void pdf_dictcopy(char *pdf, int len, int pos, struct sbuf *sb);
+
+/* write stream to sb */
+static int pdf_strcopy(char *pdf, int len, int pos, struct sbuf *sb)
 {
-	int old = pos;
+	int slen, val;
+	int beg;
+	if ((val = pdf_dval_val(pdf, len, pos, "/Length")) < 0)
+		return -1;
+	slen = atoi(pdf + val);
+	pos = pos + pdf_len(pdf, len, pos);
 	pos += pdf_ws(pdf, len, pos);
+	if (pos + slen + 15 > len)
+		return -1;
+	beg = pos;
 	pos += strlen("stream");
 	if (pdf[pos] == '\r')
 		pos++;
 	pos += 1 + slen;
+	if (pdf[pos] == '\r' || pdf[pos] == ' ')
+		pos++;
 	if (pdf[pos] == '\n')
 		pos++;
-	pos += strlen("endstream");
-	return pos - old;
+	pos += strlen("endstream") + 1;
+	sbuf_mem(sb, pdf + beg, pos - beg);
+	return 0;
 }
 
+/* copy a PDF object and return its new identifier */
+static int pdf_objcopy(char *pdf, int len, int pos)
+{
+	int id;
+	if ((pos = pdf_ref(pdf, len, pos)) < 0)
+		return -1;
+	if (pdf_type(pdf, len, pos) == 'd') {
+		struct sbuf *sb = sbuf_make();
+		pdf_dictcopy(pdf, len, pos, sb);
+		sbuf_chr(sb, '\n');
+		if (pdf_dval(pdf, len, pos, "/Length") >= 0)
+			pdf_strcopy(pdf, len, pos, sb);
+		id = obj_beg(0);
+		pdfmem(sbuf_buf(sb), sbuf_len(sb));
+		obj_end();
+		sbuf_free(sb);
+	} else {
+		id = obj_beg(0);
+		pdfmem(pdf + pos, pdf_len(pdf, len, pos));
+		pdfout("\n");
+		obj_end();
+	}
+	return id;
+}
+
+/* copy a PDF dictionary recursively */
+static void pdf_dictcopy(char *pdf, int len, int pos, struct sbuf *sb)
+{
+	int i;
+	int key, val, id;
+	sbuf_printf(sb, "<<");
+	for (i = 0; ; i++) {
+		if ((key = pdf_dkey(pdf, len, pos, i)) < 0)
+			break;
+		sbuf_printf(sb, " %s", pdf_copy(pdf, len, key));
+		val = pdf_dval(pdf, len, pos, pdf_copy(pdf, len, key));
+		if (pdf_type(pdf, len, val) == 'r') {
+			if ((id = pdf_objcopy(pdf, len, val)) >= 0)
+				sbuf_printf(sb, " %d 0 R", id);
+		} else {
+			sbuf_printf(sb, " %s", pdf_copy(pdf, len, val));
+		}
+	}
+	sbuf_printf(sb, " >>");
+}
+
+/* copy resources dictionary */
+static void pdf_rescopy(char *pdf, int len, int pos, struct sbuf *sb)
+{
+	char *res_fields[] = {"/ProcSet", "/ExtGState", "/ColorSpace",
+		"/Pattern", "/Shading", "/Properties", "/Font", "/XObject"};
+	int res, i;
+	sbuf_printf(sb, "  /Resources <<\n");
+	for (i = 0; i < LEN(res_fields); i++) {
+		if ((res = pdf_dval_val(pdf, len, pos, res_fields[i])) >= 0) {
+			if (pdf_type(pdf, len, res) == 'd') {
+				sbuf_printf(sb, "    %s ", res_fields[i]);
+				pdf_dictcopy(pdf, len, res, sb);
+				sbuf_printf(sb, "\n");
+			} else {
+				sbuf_printf(sb, "    %s %s\n", res_fields[i],
+					pdf_copy(pdf, len, res));
+			}
+		}
+	}
+	sbuf_printf(sb, "  >>\n");
+}
+
 static int pdfext(char *pdf, int len)
 {
 	char *cont_fields[] = {"/Filter", "/DecodeParms"};
-	int trailer = pdf_trailer(pdf, len);
-	int root, cont, pages, page1, stream;
+	int trailer, root, cont, pages, page1, res;
 	int kids_val, page1_val, val;
 	int xobj_id, length;
 	int bbox;
+	struct sbuf *sb;
 	int i;
-	root = pdf_dval_obj(pdf, len, trailer, "/Root");
-	pages = pdf_dval_obj(pdf, len, root, "/Pages");
-	kids_val = pdf_dval_val(pdf, len, pages, "/Kids");
-	page1_val = pdf_lval(pdf, len, kids_val, 0);
-	page1 = pdf_ref(pdf, len, page1_val);
-	cont = pdf_dval_obj(pdf, len, page1, "/Contents");
-	val = pdf_dval_val(pdf, len, cont, "/Length");
+	if ((trailer = pdf_trailer(pdf, len)) < 0)
+		return -1;
+	if ((root = pdf_dval_obj(pdf, len, trailer, "/Root")) < 0)
+		return -1;
+	if ((pages = pdf_dval_obj(pdf, len, root, "/Pages")) < 0)
+		return -1;
+	if ((kids_val = pdf_dval_val(pdf, len, pages, "/Kids")) < 0)
+		return -1;
+	if ((page1_val = pdf_lval(pdf, len, kids_val, 0)) < 0)
+		return -1;
+	if ((page1 = pdf_ref(pdf, len, page1_val)) < 0)
+		return -1;
+	if ((cont = pdf_dval_obj(pdf, len, page1, "/Contents")) < 0)
+		return -1;
+	if ((val = pdf_dval_val(pdf, len, cont, "/Length")) < 0)
+		return -1;
+	res = pdf_dval_val(pdf, len, page1, "/Resources");
 	length = atoi(pdf + val);
 	bbox = pdf_dval_val(pdf, len, page1, "/MediaBox");
 	if (bbox < 0)
 		bbox = pdf_dval_val(pdf, len, pages, "/MediaBox");
-	xobj_id = obj_beg(0);
-	pdfout("<<\n");
-	pdfout("  /Type /XObject\n");
-	pdfout("  /Subtype /Form\n");
-	pdfout("  /FormType 1\n");
+	sb = sbuf_make();
+	sbuf_printf(sb, "<<\n");
+	sbuf_printf(sb, "  /Type /XObject\n");
+	sbuf_printf(sb, "  /Subtype /Form\n");
+	sbuf_printf(sb, "  /FormType 1\n");
 	if (bbox >= 0)
-		pdfout("  /BBox %s\n", pdf_copy(pdf, len, bbox));
-	pdfout("  /Matrix [1 0 0 1 %s]\n", pdfpos(o_h, o_v));
-	pdfout("  /Resources << /ProcSet [/PDF] >>\n");
-	pdfout("  /Length %d\n", length);
+		sbuf_printf(sb, "  /BBox %s\n", pdf_copy(pdf, len, bbox));
+	sbuf_printf(sb, "  /Matrix [1 0 0 1 %s]\n", pdfpos(o_h, o_v));
+	if (res >= 0)
+		pdf_rescopy(pdf, len, res, sb);
+	sbuf_printf(sb, "  /Length %d\n", length);
 	for (i = 0; i < LEN(cont_fields); i++)
 		if ((val = pdf_dval_val(pdf, len, cont, cont_fields[i])) >= 0)
-			pdfout("  %s %s\n", cont_fields[i],
+			sbuf_printf(sb, "  %s %s\n", cont_fields[i],
 				pdf_copy(pdf, len, val));
-	pdfout(">>\n");
-	stream = cont + pdf_len(pdf, len, cont);
-	stream += pdf_ws(pdf, len, stream);
-	pdfmem(pdf + stream, pdf_slen(pdf, len, stream, length));
-	pdfout("\n");
+	sbuf_printf(sb, ">>\n");
+	pdf_strcopy(pdf, len, cont, sb);
+	xobj_id = obj_beg(0);
+	pdfmem(sbuf_buf(sb), sbuf_len(sb));
 	obj_end();
+	sbuf_free(sb);
 	if (xobj_n == xobj_sz) {
 		xobj_sz += 8;
 		xobj = mextend(xobj, xobj_n, xobj_sz, sizeof(xobj[0]));
--- a/pdfext.c
+++ b/pdfext.c
@@ -132,7 +132,7 @@
 	pos += 2;
 	while (pos + 2 < len && (pdf[pos] != '>' || pdf[pos + 1] != '>')) {
 		pos += pdf_ws(pdf, len, pos);
-		if (startswith(key, pdf + pos)) {
+		if (pdf_len(pdf, len, pos) == strlen(key) && startswith(key, pdf + pos)) {
 			pos += pdf_len(pdf, len, pos);
 			pos += pdf_ws(pdf, len, pos);
 			return pos;