shithub: pdffs

Download patch

ref: c1d5de86be53c8600246fde4f9c1f59eb2706a15
parent: dfa5b68d98c20fc1115efcccc437ccbda9b7b78f
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Thu Aug 10 19:54:25 EDT 2023

xref reading: workaround for broken PDFs with startxref pointint to garbage (not "xref" table)

--- a/pdf.c
+++ b/pdf.c
@@ -109,9 +109,26 @@
 		}else if(isdigit(tmp[0])){ /* could be 7.5.8 xref stream (since PDF 1.5) */
 			if(Sseek(stream, xreftb, 0) != xreftb)
 				goto badxref;
-			if(xrefreadstream(pdf) != 0)
-				goto err;
-			break;
+			if(xrefreadstream(pdf) != 0){
+lastresort:
+				/* some PDFs have "startxref" pointing at garbage */
+				for(i = 0; i < 4096; i++){
+					if(Sread(stream, tmp, 4) != 4)
+						goto badxref;
+					if(memcmp(tmp, "xref", 4) == 0){
+						Sseek(stream, -4, 1);
+						if(xrefreadold(pdf) != 0)
+							goto badxref;
+						break;
+					}
+					Sseek(stream, -3, 1);
+				}
+				if(i >= 4096)
+					goto err;
+			}else
+				break;
+		}else{
+			goto lastresort;
 		}
 	}