shithub: pdffs

Download patch

ref: dfa5b68d98c20fc1115efcccc437ccbda9b7b78f
parent: 7662781cd7ff279c0b8129465bbd470768cfc9c0
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Sun Jul 2 17:59:47 EDT 2023

reduce pdfobj recursion on number parsing

Since only the next object needs to be looked ahead (it has to be a number)
we should not recurse further because otherwise parsing integer-only arrays
is going to be rather slow and most likely produce a stack overflow.
Add an explicit "flags" to pdfobj function, with Norecurse set when looking
ahead.

--- a/array.c
+++ b/array.c
@@ -24,7 +24,7 @@
 		}
 
 		Sungetc(s);
-		if((m = pdfobj(pdf, s)) == nil){
+		if((m = pdfobj(pdf, s, 0)) == nil){
 			noel = 1;
 			continue;
 		}
--- a/dict.c
+++ b/dict.c
@@ -33,7 +33,7 @@
 		}
 
 		Sungetc(s);
-		if((k = pdfobj(pdf, s)) == nil){
+		if((k = pdfobj(pdf, s, 0)) == nil){
 			nokey = 1;
 			continue;
 		}
@@ -41,7 +41,7 @@
 			werrstr("expected name as a key");
 			goto err;
 		}
-		if((v = pdfobj(pdf, s)) == nil)
+		if((v = pdfobj(pdf, s, 0)) == nil)
 			goto err;
 
 		if((kv = realloc(o->dict.kv, (o->dict.nkv+1)*sizeof(KeyValue))) == nil)
--- a/eval.c
+++ b/eval.c
@@ -25,7 +25,7 @@
 		werrstr("xref seek failed");
 		goto err;
 	}
-	if((ostm = pdfobj(pdf, pdf->s)) == nil)
+	if((ostm = pdfobj(pdf, pdf->s, 0)) == nil)
 		goto err;
 	first = -1;
 	if((nobj = dictint(ostm, "N")) < 1 || (first = dictint(ostm, "First")) < 0){
@@ -44,7 +44,7 @@
 				werrstr("xref obj seek failed");
 				goto err;
 			}
-			if((o = pdfobj(pdf, s)) == nil)
+			if((o = pdfobj(pdf, s, 0)) == nil)
 				goto err;
 			o = pdfeval(o);
 			break;
@@ -96,7 +96,7 @@
 		werrstr("xref seek failed");
 		return &null;
 	}
-	if((d = pdfobj(o->pdf, o->pdf->s)) == nil){
+	if((d = pdfobj(o->pdf, o->pdf->s, 0)) == nil){
 		werrstr("eval: %r [at %p]", (void*)x->off);
 		return &null;
 	}
--- a/object.c
+++ b/object.c
@@ -52,7 +52,7 @@
 
 /* General function to parse an object of any type. */
 Object *
-pdfobj(Pdf *pdf, Stream *s)
+pdfobj(Pdf *pdf, Stream *s, int flags)
 {
 	Object *o, *o2;
 	vlong off;
@@ -155,7 +155,8 @@
 		o->num.i = o->num.d;
 		off = Soffset(s); /* seek here if not an indirect object later */
 
-		if((o2 = pdfobj(pdf, s)) != nil && o2->type == Onum){ /* second object is number too */
+		if((flags & Norecurse) == 0)
+		if((o2 = pdfobj(pdf, s, Norecurse)) != nil && o2->type == Onum){ /* second object is number too */
 			do; while(isws(c = Sgetc(s)));
 			if(c < 0)
 				goto err;
@@ -173,7 +174,7 @@
 				xref.gen = o2->num.i;
 				/* FIXME put into a map */
 				pdfobjfree(o2);
-				if((o2 = pdfobj(pdf, s)) != nil){
+				if((o2 = pdfobj(pdf, s, 0)) != nil){
 					pdfobjfree(o);
 					return o2;
 				}else{
@@ -182,6 +183,7 @@
 				}
 			}
 		}
+		pdfobjfree(o2);
 
 		/* just a number, go back and return it */
 		o->type = Onum;
--- a/op.c
+++ b/op.c
@@ -1471,7 +1471,7 @@
 			s->buf.off += 1;
 		if(s->buf.off == s->buf.sz)
 			break;
-		o = pdfobj(content->pdf, s);
+		o = pdfobj(content->pdf, s, 0);
 		if(o == nil){
 			werrstr("pagerendercontent: failed to parse op: %r");
 			return 0;
--- a/pdf.c
+++ b/pdf.c
@@ -9,7 +9,7 @@
 	Object *o;
 	int prev;
 
-	if((o = pdfobj(pdf, pdf->s)) == nil)
+	if((o = pdfobj(pdf, pdf->s, 0)) == nil)
 		goto err;
 	if(o->type != Odict){
 		werrstr("isn't a dictionary");
--- a/pdf.h
+++ b/pdf.h
@@ -206,7 +206,11 @@
 /*
  * Parse an object.
  */
-Object *pdfobj(Pdf *pdf, Stream *s);
+Object *pdfobj(Pdf *pdf, Stream *s, int flags);
+
+enum {
+	Norecurse = 1<<0, /* pdfobj should not recurse (num parsing). */
+};
 
 /*
  * Deallocate the object and all its children. Refcount is
--- a/xref.c
+++ b/xref.c
@@ -117,7 +117,7 @@
 	int i, ni, nsubsec, subsec;
 
 	s = nil;
-	if((o = pdfobj(pdf, pdf->s)) == nil){
+	if((o = pdfobj(pdf, pdf->s, 0)) == nil){
 		werrstr("xref stream obj: %r");
 		goto err;
 	}