ref: 2ff084d1629e80f99b35576f10ea87dc4d9f8941
parent: 474117ed563f8f84f11d3dcf90635c584be29ec1
author: Sigrid Haflínudóttir <ftrvxmtrx@gmail.com>
date: Tue Sep 1 12:19:34 EDT 2020
fix tons of bugs, use proper streaming
--- a/array.c
+++ b/array.c
@@ -1,12 +1,11 @@
#include <u.h>
#include <libc.h>
-#include <bio.h>
#include "pdf.h"
/* 7.3.6 Array Objects */
Object *
-pdfarray(Pdf *pdf, Biobuf *b)
+pdfarray(Pdf *pdf, Stream *s)
{
Object *o, *m;
Object **a;
@@ -15,10 +14,10 @@
o = calloc(1, sizeof(*o));
o->pdf = pdf;
o->type = Oarray;
- Bgetc(b); /* throw away '[' */
+ Sgetc(s); /* throw away '[' */
for(noel = 0;;){
- if((c = Bgetc(b)) < 0 || c == ']')
+ if((c = Sgetc(s)) < 0 || c == ']')
break;
if(noel){
werrstr("no ']'");
@@ -25,8 +24,8 @@
goto err;
}
- Bungetc(b);
- if((m = pdfobj(pdf, b)) == nil){
+ Sungetc(s);
+ if((m = pdfobj(pdf, s)) == nil){
noel = 1;
continue;
}
@@ -65,7 +64,9 @@
{
if(arraylen(o) <= i)
sysfatal("array: indexing out of range");
- return o->type == Oarray ? o->array.e[i] : o;
+ o = o->type == Oarray ? o->array.e[i] : o;
+
+ return pdfeval(&o);
}
int
--- a/buffer.c
+++ b/buffer.c
@@ -64,7 +64,7 @@
}
int
-bufreadn(Buffer *b, Biobuf *bio, int sz)
+bufreadn(Buffer *b, Stream *s, int sz)
{
int n, end;
@@ -71,7 +71,7 @@
if(bufgrow(b, sz) != 0)
return -1;
for(end = b->sz+sz; b->sz < end; b->sz += n){
- if((n = Bread(bio, b->b+b->sz, sz)) < 1)
+ if((n = Sread(s, b->b+b->sz, sz)) < 1)
return -1;
sz -= n;
}
--- a/dict.c
+++ b/dict.c
@@ -1,12 +1,11 @@
#include <u.h>
#include <libc.h>
-#include <bio.h>
#include "pdf.h"
/* 7.3.7 Dictionary Objects */
Object *
-pdfdict(Pdf *pdf, Biobuf *b)
+pdfdict(Pdf *pdf, Stream *s)
{
Object *o, *k, *v;
KeyValue *kv;
@@ -13,7 +12,7 @@
int c, nokey;
/* skip '<<' */
- Bseek(b, 2, 1);
+ Sseek(s, 2, 1);
k = v = nil;
o = calloc(1, sizeof(*o));
@@ -20,10 +19,10 @@
o->type = Odict;
o->pdf = pdf;
for(nokey = 0;;){
- if((c = Bgetc(b)) < 0)
+ if((c = Sgetc(s)) < 0)
goto err;
if(c == '>'){
- if(Bgetc(b) == '>')
+ if(Sgetc(s) == '>')
break;
werrstr("no '>>'");
goto err;
@@ -33,8 +32,8 @@
goto err;
}
- Bungetc(b);
- if((k = pdfobj(pdf, b)) == nil){
+ Sungetc(s);
+ if((k = pdfobj(pdf, s)) == nil){
nokey = 1;
continue;
}
@@ -42,7 +41,7 @@
werrstr("expected name as a key");
goto err;
}
- if((v = pdfobj(pdf, b)) == nil)
+ if((v = pdfobj(pdf, s)) == nil)
goto err;
if((kv = realloc(o->dict.kv, (o->dict.nkv+1)*sizeof(KeyValue))) == nil)
@@ -73,9 +72,14 @@
pdfeval(&o);
if((o->type != Ostream && o->type != Odict) || name == nil)
return &null;
- for(i = 0; i < o->dict.nkv && strcmp(name, o->dict.kv[i].key) != 0; i++);
+ for(i = 0; i < o->dict.nkv; i++){
+ if(strcmp(name, o->dict.kv[i].key) == 0){
+ o = pdfeval(i < o->dict.nkv ? &o->dict.kv[i].value : nil);
+ return o;
+ }
+ }
- return pdfeval(i < o->dict.nkv ? &o->dict.kv[i].value : nil);
+ return &null;
}
vlong
--- a/eval.c
+++ b/eval.c
@@ -1,8 +1,65 @@
#include <u.h>
#include <libc.h>
-#include <bio.h>
#include "pdf.h"
+static Object *
+evalobjstm(Pdf *pdf, Xref *x)
+{
+ Object *ostm, *o;
+ Stream *s;
+ Xref *xstm;
+ int i, off, nobj, first, index;
+
+ ostm = nil;
+ s = nil;
+ o = &null;
+ /* x is pointing at ObjStm, need to eval it to the actual object */
+ for(i = 0; i < pdf->nxref && pdf->xref[i].id != x->objstm; i++);
+ if(i >= pdf->nxref){
+ werrstr("no object id %d in xref", x->objstm);
+ goto err;
+ }
+ xstm = &pdf->xref[i];
+
+ if(Sseek(pdf->s, xstm->off, 0) != xstm->off){
+ werrstr("xref seek failed");
+ goto err;
+ }
+ if((ostm = pdfobj(pdf, pdf->s)) == nil)
+ goto err;
+ first = -1;
+ if((nobj = dictint(ostm, "N")) < 1 || (first = dictint(ostm, "First")) < 0){
+ werrstr("invalid ObjStm: nobj=%d first=%d", nobj, first);
+ goto err;
+ }
+
+ if((s = Sopen(ostm)) == nil)
+ goto err;
+ for(i = 0; i < nobj; i++){
+ Sgeti(s, &index);
+ Sgeti(s, &off);
+ if(x->id == index){
+ off += first;
+ if(Sseek(s, off, 0) != off){
+ werrstr("xref obj seek failed");
+ goto err;
+ }
+ if((o = pdfobj(pdf, s)) == nil)
+ goto err;
+ o = pdfeval(&o);
+ break;
+ }
+ }
+ Sclose(s);
+
+ return o;
+
+err:
+ pdfobjfree(ostm);
+ Sclose(s);
+ return &null;
+}
+
Object *
pdfeval(Object **oo)
{
@@ -11,7 +68,7 @@
int i;
if(oo == nil)
- return &null;
+ sysfatal("nil oo");
if(*oo == nil){
*oo = &null;
return &null;
@@ -20,19 +77,30 @@
if(o->type != Oindir)
return o;
- for(i = 0; i < o->pdf->nxref && o->pdf->xref[i].id != o->indir.id; i++);
+ for(x = nil, i = 0; i < o->pdf->nxref; i++){
+ x = &o->pdf->xref[i];
+ if(x->id == o->indir.id)
+ break;
+ }
if(i >= o->pdf->nxref){
werrstr("no object id %d in xref", o->indir.id);
return &null;
}
- x = &o->pdf->xref[i];
+ if(x->objstm > 0){
+ if((o = evalobjstm(o->pdf, x)) == &null)
+ werrstr("ObjStm: %r");
+ *oo = o;
+ return o;
+ }
- if(Bseek(o->pdf->bio, x->off, 0) != x->off){
+ if(Sseek(o->pdf->s, x->off, 0) != x->off){
werrstr("xref seek failed");
return &null;
}
- if((d = pdfobj(o->pdf, o->pdf->bio)) == nil)
+ if((d = pdfobj(o->pdf, o->pdf->s)) == nil){
+ werrstr("eval: %r [at %p]", (void*)x->off);
return &null;
+ }
*oo = d;
pdfobjfree(o);
--- a/f_flate.c
+++ b/f_flate.c
@@ -1,6 +1,5 @@
#include <u.h>
#include <libc.h>
-#include <bio.h>
#include <flate.h>
#include "pdf.h"
--- a/filter.c
+++ b/filter.c
@@ -1,6 +1,5 @@
#include <u.h>
#include <libc.h>
-#include <bio.h>
#include "pdf.h"
/* 7.4 Filters */
--- a/main.c
+++ b/main.c
@@ -20,6 +20,8 @@
Pdf *pdf;
Biobuf *b;
Object *v;
+ Stream *s;
+ int i, n;
quotefmtinstall();
inflateinit();
@@ -29,28 +31,33 @@
usage();
}ARGEND
-#ifdef TEST
-#define T(x) \
- void x(void); \
- x();
-
- if(argc != 1){
- T(test_pdfstring);
- T(test_pdfname);
- threadexitsall(nil);
- }
-#endif
-
- if(argc != 1)
+ if(argc < 1)
usage();
if((b = Bopen(argv[0], OREAD)) == nil)
sysfatal("%r");
if((pdf = pdfopen(b)) == nil)
sysfatal("%s: %r", argv[0]);
+ for(v = pdf->root, i = 1; i < argc; i++){
+ if(argv[i][0] == '['){
+ n = atoi(argv[i]+1);
+ v = arrayget(v, n);
+ }else if(argv[i][0] == '@' && argv[i][1] == 0 && v->type == Ostream){
+ if((s = Sopen(v)) == nil)
+ sysfatal("%r");
+ print("%.*s\n", s->buf.sz, s->buf.b);
+ Sclose(s);
+ break;
+ }else{
+ v = dictget(v, argv[i]);
+ }
+ }
+ print("%O\n", v);
+/*
if((v = dictget(pdf->info, "Creator")) != nil)
fprint(2, "creator: %s\n", v->str);
if((v = dictget(pdf->info, "Producer")) != nil)
fprint(2, "producer: %s\n", v->str);
+*/
pdfclose(pdf);
threadexitsall(nil);
--- a/misc.c
+++ b/misc.c
@@ -1,6 +1,5 @@
#include <u.h>
#include <libc.h>
-#include <bio.h>
#include "pdf.h"
static char *otypes[] = {
@@ -15,17 +14,58 @@
[Oindir] = "indir",
};
-static char *xtypes[] = {
- [Xusual] = "usual",
- [Xuncompressed] = "uncompressed",
- [Xcompressed] = "compressed",
-};
-
Object null = {
.type = Onull,
};
int
+Ofmt(Fmt *f)
+{
+ Object *o;
+ int i;
+
+ o = va_arg(f->args, Object*);
+ if(o == nil || o == &null)
+ return fmtprint(f, "null");
+ switch(o->type){
+ case Obool:
+ return fmtprint(f, o->bool ? "true" : "false");
+
+ case Onum:
+ return fmtprint(f, "%g", o->num);
+
+ case Ostr:
+ if(isutf8(o->str, o->len))
+ return fmtprint(f, "%q", o->str);
+ return fmtprint(f, "<%.*H>", o->len, o->str);
+
+ case Oname:
+ return fmtprint(f, "/%s", o->name);
+
+ case Oarray:
+ fmtprint(f, "[");
+ for(i = 0; i < o->array.ne; i++)
+ fmtprint(f, "%s%O", i > 0 ? ", " : "", o->array.e[i]);
+ return fmtprint(f, "]");
+
+ case Ostream: /* FIXME dump the stream? */
+ case Odict:
+ fmtprint(f, "<<");
+ for(i = 0; i < o->dict.nkv; i++)
+ fmtprint(f, "%s%s = %O", i > 0 ? ", " : "", o->dict.kv[i].key, o->dict.kv[i].value);
+ return fmtprint(f, ">>%s", o->type == Ostream ? "+stream" : "");
+
+ case Onull:
+ return fmtprint(f, "null");
+
+ case Oindir:
+ return fmtprint(f, "@%d[gen=%d]", o->indir.id, o->indir.gen);
+
+ }
+ return fmtprint(f, "???");
+}
+
+int
Tfmt(Fmt *f)
{
Object *o;
@@ -45,15 +85,10 @@
x = va_arg(f->args, Xref);
- switch(x.type){
- case Xusual:
- return fmtprint(f, "<%s id=%d gen=%d off=%d>", xtypes[x.type], x.id, x.gen, x.off);
- case Xuncompressed:
- return fmtprint(f, "<%s gen=%d off=%d>", xtypes[x.type], x.gen, x.off);
- case Xcompressed:
- return fmtprint(f, "<%s id=%d objnum=%d>", xtypes[x.type], x.id, x.objnum);
- }
- return -1;
+ if(x.objstm > 0)
+ return fmtprint(f, "<compressed id=%d objstm=%d index=%d>", x.id, x.objstm, x.index);
+
+ return fmtprint(f, "<uncompressed id=%d off=%d gen=%d>", x.id, x.off, x.gen);
}
int
@@ -74,13 +109,15 @@
}
int
-Bgetint(Biobuf *b, int *i)
+isutf8(char *s, int len)
{
- double d;
+ int i, n;
+ Rune r;
- if(Bgetd(b, &d) != 1 || isNaN(d))
- return -1;
- *i = d;
+ for(i = 0; i < len; i += n, s += n){
+ if((n = chartorune(&r, s)) < 1 || r == Runeerror)
+ break;
+ }
- return 1;
+ return i >= len;
}
--- a/mkfile
+++ b/mkfile
@@ -1,6 +1,5 @@
</$objtype/mkfile
-CFLAGS=$CFLAGS -DTEST
TARG=pdffs
OFILES=\
--- a/name.c
+++ b/name.c
@@ -1,18 +1,17 @@
#include <u.h>
#include <libc.h>
-#include <bio.h>
#include "pdf.h"
/* 7.3.5 Name Objects */
Object *
-pdfname(Biobuf *b)
+pdfname(Stream *stream)
{
Object *o;
char *s, *r, hex[3];
int c, sz, maxsz;
- Bgetc(b); /* skip '/' */
+ Sgetc(stream); /* skip '/' */
maxsz = 32;
if((s = malloc(maxsz)) == nil)
@@ -19,14 +18,14 @@
goto err;
for(sz = 0;;){
- if((c = Bgetc(b)) < 0){
- if(c == Beof)
+ if((c = Sgetc(stream)) < 0){
+ if(c == -1)
break;
goto err;
}
if(isws(c) || isdelim(c)){
- Bungetc(b);
+ Sungetc(stream);
break;
}
if(c < '!' || c > '~'){
@@ -34,10 +33,10 @@
goto err;
}
if(c == '#'){
- if((c = Bgetc(b)) < 0)
+ if((c = Sgetc(stream)) < 0)
goto err;
hex[0] = c;
- if((c = Bgetc(b)) < 0)
+ if((c = Sgetc(stream)) < 0)
goto err;
hex[1] = c;
if(dec16((uchar*)hex, 1, hex, 2) != 1){
@@ -67,65 +66,3 @@
free(s);
return nil;
}
-
-#ifdef TEST
-static struct {
- char *in;
- char *out;
-}t[] = {
- {"/SimpleName", "SimpleName"},
- {"/.$()", ".$"},
- {"/#30", "0"},
- {"/#3", nil},
- {"/#G0", nil},
- {"/#", nil},
- {"/Space Between", "Space"},
- {"/Two/Names", "Two"},
- {"/\xff", nil,},
-};
-
-static char *s;
-static int off, n;
-
-static int
-rd(Biobufhdr *, void *data, long sz)
-{
- if(sz > n-off)
- sz = n-off;
- memmove(data, s+off, sz);
- off += sz;
- return sz;
-}
-
-void
-test_pdfname(void)
-{
- Object *o;
- Biobuf b;
- int i;
-
- fprint(2, "pdfname\n");
- for(i = 0; i < nelem(t); i++){
- s = t[i].in;
- n = strlen(s);
- off = 0;
- Binit(&b, -1, OREAD);
- Biofn(&b, rd);
-
- fprint(2, "\t%d: ", i);
- o = pdfname(&b);
- if(o == nil && t[i].out != nil)
- fprint(2, "ERROR: expected %q, got error: %r\n", t[i].out);
- else if(o != nil && t[i].out == nil)
- fprint(2, "ERROR: expected error, got %q\n", o->name);
- else if(o == nil && t[i].out == nil)
- fprint(2, "OK (%r)\n");
- else if(strcmp(o->name, t[i].out) != 0)
- fprint(2, "ERROR: expected %q, got %q\n", t[i].out, o->name);
- else
- fprint(2, "OK\n");
- pdfobjfree(o);
- Bterm(&b);
- }
-}
-#endif
--- a/object.c
+++ b/object.c
@@ -1,106 +1,105 @@
#include <u.h>
#include <libc.h>
#include <ctype.h>
-#include <bio.h>
#include "pdf.h"
-Object *pdfstring(Biobuf *b);
-Object *pdfname(Biobuf *b);
-Object *pdfarray(Pdf *pdf, Biobuf *b);
-Object *pdfdict(Pdf *pdf, Biobuf *b);
+Object *pdfstring(Stream *s);
+Object *pdfname(Stream *s);
+Object *pdfarray(Pdf *pdf, Stream *s);
+Object *pdfdict(Pdf *pdf, Stream *s);
/* General function to parse an object of any type. */
Object *
-pdfobj(Pdf *pdf, Biobuf *b)
+pdfobj(Pdf *pdf, Stream *s)
{
Object *o, *o2;
vlong off;
int c, tf;
Xref xref;
- char s[16];
+ char b[16];
o = o2 = nil;
- do; while(isws(c = Bgetc(b)));
+ do; while(isws(c = Sgetc(s)));
if(c < 0)
goto err;
switch(c){
case '<': /* dictionary or a string */
- c = Bgetc(b);
+ c = Sgetc(s);
if(c == '<'){
- Bseek(b, -2, 1);
- if((o = pdfdict(pdf, b)) != nil){
+ Sseek(s, -2, 1);
+ if((o = pdfdict(pdf, s)) != nil){
/* check for attached stream */
- off = Boffset(b);
- do; while(isws(Bgetc(b)));
- Bungetc(b);
- if(Bread(b, s, 7) == 7 && memcmp(s, "stream", 6) == 0 && isws(c = s[6])){
+ off = Soffset(s);
+ do; while(isws(Sgetc(s)));
+ Sungetc(s);
+ if(Sread(s, b, 7) == 7 && memcmp(b, "stream", 6) == 0 && isws(c = b[6])){
/* there IS a stream */
- if(c == '\r' && (c = Bgetc(b)) < 0)
+ if(c == '\r' && (c = Sgetc(s)) < 0)
goto err;
if(c != '\n'){
werrstr("stream has no newline after dict");
goto err;
}
- o->stream.off = Boffset(b);
+ o->stream.off = Soffset(s);
o->type = Ostream;
o->stream.len = dictint(o, "Length");
return o;
}
- Bseek(b, off, 0);
+ Sseek(s, off, 0);
return o;
}
}
- Bungetc(b);
+ Sungetc(s);
/* fall through */
case '(':
- Bungetc(b);
- if((o = pdfstring(b)) != nil)
+ Sungetc(s);
+ if((o = pdfstring(s)) != nil)
o->pdf = pdf;
return o;
case '/':
- Bungetc(b);
- if((o = pdfname(b)) != nil)
+ Sungetc(s);
+ if((o = pdfname(s)) != nil)
o->pdf = pdf;
return o;
case '[':
- Bungetc(b);
- if((o = pdfarray(pdf, b)) != nil)
+ Sungetc(s);
+ if((o = pdfarray(pdf, s)) != nil)
o->pdf = pdf;
return o;
case 'n':
- off = Boffset(b);
- if(Bgetc(b) == 'u' && Bgetc(b) == 'l' && Bgetc(b) == 'l' && (isws(c = Bgetc(b)) || isdelim(c))){
- Bungetc(b);
+ off = Soffset(s);
+ if(Sgetc(s) == 'u' && Sgetc(s) == 'l' && Sgetc(s) == 'l' && (isws(c = Sgetc(s)) || isdelim(c))){
+ Sungetc(s);
return &null;
}
- Bseek(b, off, 0);
+ Sseek(s, off, 0);
c = 'f';
goto unexpected;
case 't':
- off = Boffset(b);
+ off = Soffset(s);
tf = 1;
- if(Bgetc(b) == 'r' && Bgetc(b) == 'u' && Bgetc(b) == 'e' && (isws(c = Bgetc(b)) || isdelim(c)))
+ if(Sgetc(s) == 'r' && Sgetc(s) == 'u' && Sgetc(s) == 'e' && (isws(c = Sgetc(s)) || isdelim(c)))
goto bool;
- Bseek(b, off, 0);
+ Sseek(s, off, 0);
c = 't';
goto unexpected;
case 'f':
- off = Boffset(b);
+ off = Soffset(s);
tf = 0;
- if(Bgetc(b) == 'a' && Bgetc(b) == 'l' && Bgetc(b) == 's' && Bgetc(b) == 'e' && (isws(c = Bgetc(b)) || isdelim(c)))
+ if(Sgetc(s) == 'a' && Sgetc(s) == 'l' && Sgetc(s) == 's' && Sgetc(s) == 'e' && (isws(c = Sgetc(s)) || isdelim(c)))
goto bool;
- Bseek(b, off, 0);
+ Sseek(s, off, 0);
c = 'f';
goto unexpected;
bool:
- Bungetc(b);
+ Sungetc(s);
if((o = calloc(1, sizeof(*o))) == nil)
goto err;
o->type = Obool;
@@ -111,20 +110,20 @@
default:
if(!isdigit(c)){
unexpected:
- Bungetc(b);
+ Sungetc(s);
werrstr("unexpected char '%c'", c);
goto err;
}
/* it could be a number or an indirect object */
- Bungetc(b);
+ Sungetc(s);
if((o = calloc(1, sizeof(*o))) == nil)
goto err;
o->pdf = pdf;
- Bgetd(b, &o->num); /* get the first number */
- off = Boffset(b); /* seek here if not an indirect object later */
+ Sgetd(s, &o->num); /* get the first number */
+ off = Soffset(s); /* seek here if not an indirect object later */
- if((o2 = pdfobj(pdf, b)) != nil && o2->type == Onum){ /* second object is number too */
- do; while(isws(c = Bgetc(b)));
+ if((o2 = pdfobj(pdf, s)) != nil && o2->type == Onum){ /* second object is number too */
+ do; while(isws(c = Sgetc(s)));
if(c < 0)
goto err;
if(c == 'R'){ /* indirect object */
@@ -134,13 +133,13 @@
pdfobjfree(o2);
return o;
}
- if(c == 'o' && Bgetc(b) == 'b' && Bgetc(b) == 'j'){ /* object */
+ if(c == 'o' && Sgetc(s) == 'b' && Sgetc(s) == 'j'){ /* object */
xref.id = o->num;
xref.gen = o2->num;
/* FIXME put into a map */
pdfobjfree(o);
pdfobjfree(o2);
- if((o = pdfobj(pdf, b)) != nil)
+ if((o = pdfobj(pdf, s)) != nil)
return o;
o2 = nil;
}
@@ -148,7 +147,7 @@
/* just a number, go back and return it */
o->type = Onum;
- if(Bseek(b, off, 0) != off){
+ if(Sseek(s, off, 0) != off){
werrstr("seek failed");
goto err;
}
--- a/pdf.c
+++ b/pdf.c
@@ -1,6 +1,5 @@
#include <u.h>
#include <libc.h>
-#include <bio.h>
#include <ctype.h>
#include "pdf.h"
@@ -8,14 +7,16 @@
trailerread(Pdf *pdf)
{
Object *o;
+ int prev;
- if((o = pdfobj(pdf, pdf->bio)) == nil)
+ if((o = pdfobj(pdf, pdf->s)) == nil)
goto err;
-
if(o->type != Odict){
werrstr("isn't a dictionary");
goto err;
}
+ if((prev = dictint(o, "Prev")) > 0 && (Sseek(pdf->s, prev, 0) < 0 || xrefreadold(pdf) != 0))
+ goto err;
pdf->root = pdfref(dictget(o, "Root"));
pdf->info = pdfref(dictget(o, "Info"));
@@ -28,24 +29,28 @@
}
Pdf *
-pdfopen(Biobuf *b)
+pdfopen(void *bio)
{
Pdf *pdf;
Object *o;
+ Stream *stream;
char tmp[64], *s, *x;
int xreftb; /* 7.5.4 xref table offset from the beginning of the file */
int i, n, off;
+ fmtinstall('H', encodefmt);
+ fmtinstall('O', Ofmt);
fmtinstall('T', Tfmt);
fmtinstall(L'⊗', ⊗fmt);
o = nil;
- if((pdf = calloc(1, sizeof(*pdf))) == nil)
+ pdf = nil;
+ if((stream = Sbio(bio)) == nil || (pdf = calloc(1, sizeof(*pdf))) == nil)
goto err;
- pdf->bio = b;
+ pdf->s = stream;
/* check header */
- if(Bread(b, tmp, 8) != 8 ||
+ if(Sread(stream, tmp, 8) != 8 ||
strncmp(tmp, "%PDF-", 5) != 0 || !isdigit(tmp[5]) || tmp[6] != '.' || !isdigit(tmp[7])){
werrstr("not a pdf");
goto err;
@@ -55,8 +60,8 @@
/* read a block of data */
n = sizeof(tmp)-1;
- Bseek(b, -n, 2);
- if(Bread(b, tmp, n) != n){
+ Sseek(stream, -n, 2);
+ if(Sread(stream, tmp, n) != n){
badtrailer:
werrstr("invalid trailer");
goto err;
@@ -75,25 +80,27 @@
goto badtrailer;
/* read xref */
- if(Bseek(b, xreftb, 0) != xreftb){
+ if(Sseek(stream, xreftb, 0) != xreftb){
werrstr("xref position out of range");
goto err;
}
for(;;){
- off = Boffset(b);
- if(Bread(b, tmp, sizeof(tmp)) < 8){
+ while(isspace(Sgetc(stream)));
+ Sungetc(stream);
+ off = Soffset(stream);
+ if(Sread(stream, tmp, sizeof(tmp)) < 8){
badxref:
werrstr("invalid xref: %r");
goto err;
}
if(memcmp(tmp, "xref", 4) == 0){
- if(Bseek(b, -sizeof(tmp)+5, 1) < 0 || xrefreadold(pdf) != 0)
+ if(Sseek(stream, -sizeof(tmp), 1) < 0 || xrefreadold(pdf) != 0)
goto err;
/* there could be more updates, try it */
}else if(memcmp(tmp, "trailer", 7) == 0){ /* 7.5.5 file trailer */
/* move to the trailer dictionary */
n = off + 8;
- if(Bseek(b, n, 0) != n || trailerread(pdf) != 0){
+ if(Sseek(stream, n, 0) != n || trailerread(pdf) != 0){
werrstr("invalid trailer: %r");
goto err;
}
@@ -100,7 +107,7 @@
/* trailer is supposed to be the last thing */
break;
}else if(isdigit(tmp[0])){ /* could be 7.5.8 xref stream (since PDF 1.5) */
- if(Bseek(b, xreftb, 0) != xreftb)
+ if(Sseek(stream, xreftb, 0) != xreftb)
goto badxref;
if(xrefreadstream(pdf) != 0)
goto err;
@@ -110,13 +117,13 @@
/* root is required, info is optional */
if(pdf->root == &null){
- werrstr("no root");
+ werrstr("no root: %r");
goto err;
}
return pdf;
err:
- werrstr("pdfopen: %r [at %p]", (void*)Boffset(b));
+ werrstr("pdfopen: %r [at %p]", (void*)Soffset(stream));
pdfclose(pdf);
pdfobjfree(o);
return nil;
@@ -127,8 +134,8 @@
{
if(pdf == nil)
return;
- if(pdf->bio != nil)
- Bterm(pdf->bio);
+ if(pdf->s != nil)
+ Sclose(pdf->s);
free(pdf->xref);
free(pdf);
}
--- a/pdf.h
+++ b/pdf.h
@@ -8,10 +8,6 @@
Ostream, /* 7.3.8 */
Onull, /* 7.3.9 */
Oindir, /* 7.3.10 */
-
- Xusual = 0,
- Xuncompressed,
- Xcompressed,
};
typedef struct Buffer Buffer;
@@ -82,7 +78,7 @@
};
struct Pdf {
- Biobuf *bio;
+ Stream *s;
Xref *xref;
int nxref; /* 7.5.4 xref subsection number of objects */
@@ -92,28 +88,34 @@
struct Xref {
u32int id;
- union{
- u32int off;
- u32int objnum;
+ union {
+ struct { /* uncompressed */
+ u32int off;
+ u16int gen;
+ };
+
+ struct { /* compressed, objstm > 0 */
+ u16int index; /* index within ObjStm */
+ };
};
- u16int gen;
- u16int type;
+ u16int objstm; /* > 0 means it's compressed and points to the ObjStm */
};
struct Stream {
Buffer buf;
- Biobuf *bio;
+ void *bio;
+ int linelen;
};
extern Object null;
-Pdf *pdfopen(Biobuf *b);
+Pdf *pdfopen(void *bio);
void pdfclose(Pdf *pdf);
/*
* Parse an object.
*/
-Object *pdfobj(Pdf *pdf, Biobuf *bio);
+Object *pdfobj(Pdf *pdf, Stream *s);
/*
* Deallocate the object and all its children. Refcount is
@@ -146,6 +148,8 @@
*/
int isdelim(int c);
+int isutf8(char *s, int len);
+
int arraylen(Object *o);
Object *arrayget(Object *o, int i);
int arrayint(Object *o, int i);
@@ -156,9 +160,19 @@
Object *dictdict(Object *o, char *name);
int dictints(Object *o, char *name, int *el, int nel);
-Stream *streamopen(Object *o);
-int streamsize(Stream *s);
-void streamclose(Stream *s);
+Stream *Sbio(void *bio);
+Stream *Sopen(Object *o);
+int Sread(Stream *s, void *b, int sz);
+int Sgetc(Stream *s);
+int Sungetc(Stream *s);
+int Ssize(Stream *s);
+int Soffset(Stream *s);
+int Sseek(Stream *s, int off, int whence);
+void Sclose(Stream *s);
+int Sgetd(Stream *s, double *d);
+int Sgeti(Stream *s, int *i);
+char *Srdstr(Stream *s, int delim, int zero);
+int Slinelen(Stream *s);
Filter *filteropen(char *name, Object *o);
int filterrun(Filter *f, Buffer *bi, Buffer *bo);
@@ -169,16 +183,17 @@
int bufeof(Buffer *b);
int bufleft(Buffer *b);
uchar *bufdata(Buffer *b, int *sz);
-int bufreadn(Buffer *b, Biobuf *bio, int sz);
+int bufreadn(Buffer *b, Stream *s, int sz);
int bufput(Buffer *b, uchar *d, int sz);
int bufget(Buffer *b, uchar *d, int sz);
void bufdump(Buffer *b);
+#pragma varargck type "O" Object*
#pragma varargck type "T" Object*
#pragma varargck type "⊗" Xref
+int Ofmt(Fmt *f);
int Tfmt(Fmt *f);
int ⊗fmt(Fmt *f);
-int Bgetint(Biobuf *b, int *i);
int xrefreadold(Pdf *pdf);
int xrefreadstream(Pdf *pdf);
--- a/stream.c
+++ b/stream.c
@@ -3,18 +3,20 @@
#include <bio.h>
#include "pdf.h"
-static int
-bufiof(Biobufhdr *b, void *data, long n)
+Stream *
+Sbio(void *bio)
{
Stream *s;
- s = (Stream*)((char*)b - sizeof(*s));
+ if((s = calloc(1, sizeof(*s))) == nil)
+ return nil;
+ s->bio = bio;
- return bufget(&s->buf, data, n);
+ return s;
}
Stream *
-streamopen(Object *o)
+Sopen(Object *o)
{
Stream *s;
Buffer b, x;
@@ -23,13 +25,15 @@
int i, nflts;
s = nil;
- if(pdfeval(&o)->type != Ostream) /* FIXME open a string object as a stream as well? */
+ if(pdfeval(&o)->type != Ostream){ /* FIXME open a string object as a stream as well? */
+ werrstr("not a stream");
return nil;
+ }
bufinit(&b, nil, 0);
- if(Bseek(o->pdf->bio, o->stream.off, 0) != o->stream.off)
+ if(Sseek(o->pdf->s, o->stream.off, 0) != o->stream.off)
return nil;
- if(bufreadn(&b, o->pdf->bio, o->stream.len) < 0)
+ if(bufreadn(&b, o->pdf->s, o->stream.len) < 0)
goto err;
/* see if there are any filters */
@@ -64,14 +68,11 @@
}
}
- if((s = calloc(1, sizeof(*s)+sizeof(Biobuf))) == nil){
+ if((s = calloc(1, sizeof(*s))) == nil){
buffree(&b);
return nil;
}
- s->bio = (Biobuf*)(s+1);
s->buf = b;
- Binit(s->bio, Bfildes(o->pdf->bio), OREAD);
- Biofn(s->bio, bufiof);
return s;
err:
@@ -82,18 +83,164 @@
}
int
-streamsize(Stream *s)
+Sread(Stream *s, void *b, int sz)
{
+ return s->bio != nil ? Bread(s->bio, b, sz) : bufget(&s->buf, b, sz);
+}
+
+int
+Sgetc(Stream *s)
+{
+ int n;
+ uchar c;
+
+ if(s->bio != nil)
+ return Bgetc(s->bio);
+ if((n = bufget(&s->buf, &c, 1)) < 0)
+ return -2;
+
+ return n == 0 ? -1 : (int)c;
+}
+
+int
+Sungetc(Stream *s)
+{
+ return s->bio != nil ? Bungetc(s->bio) : Sseek(s, -1, 1);
+}
+
+int
+Soffset(Stream *s)
+{
+ return s->bio != nil ? Boffset(s->bio) : s->buf.off;
+}
+
+int
+Ssize(Stream *s)
+{
+ assert(s->bio == nil);
return bufleft(&s->buf);
}
+struct sgetd
+{
+ Stream *s;
+ int eof;
+};
+
+static int
+Sgetdf(void *vp)
+{
+ int c;
+ struct sgetd *sg = vp;
+
+ c = Sgetc(sg->s);
+ if(c < 0)
+ sg->eof = 1;
+ return c;
+}
+
+int
+Sgetd(Stream *s, double *dp)
+{
+ double d;
+ struct sgetd b;
+
+ b.s = s;
+ b.eof = 0;
+ d = charstod(Sgetdf, &b);
+ if(b.eof)
+ return -1;
+ Sungetc(s);
+ *dp = d;
+
+ return 1;
+}
+
+int
+Sgeti(Stream *s, int *i)
+{
+ double d;
+ int res, c;
+
+ while((c = isws(Sgetc(s))));
+ if(c < 0)
+ return c;
+ Sungetc(s);
+ res = Sgetd(s, &d);
+ *i = d;
+
+ return res;
+}
+
+int
+Sseek(Stream *s, int off, int whence)
+{
+ if(s->bio != nil)
+ return Bseek(s->bio, off, whence);
+
+ if(whence == 1)
+ off += s->buf.off;
+ else if(whence == 2)
+ off += s->buf.sz;
+ if(off < 0){
+ werrstr("seek: %d < 0", off);
+ off = 0;
+ }else if(off > s->buf.sz){
+ werrstr("seek: %d > %d", off, s->buf.sz);
+ off = s->buf.sz;
+ }
+
+ s->buf.off = off;
+
+ return off;
+}
+
+char *
+Srdstr(Stream *s, int delim, int zero)
+{
+ int i, len;
+ char *line;
+
+ if(s->bio != nil){
+ line = Brdstr(s->bio, delim, zero);
+ s->linelen = Blinelen(s->bio);
+ return line;
+ }
+
+ for(i = s->buf.off; i < s->buf.sz;){
+ i++;
+ if(s->buf.b[i-1] == delim)
+ break;
+ }
+ if(i >= s->buf.sz)
+ return nil;
+ len = i - s->buf.off;
+ if((line = malloc(len+1)) == nil)
+ return nil;
+ memmove(line, s->buf.b+s->buf.off, len);
+ s->buf.off += len;
+ if(line[len-1] == delim && zero)
+ len--;
+ line[len] = 0;
+ s->linelen = len;
+
+ return line;
+}
+
+int
+Slinelen(Stream *s)
+{
+ return s->linelen;
+}
+
void
-streamclose(Stream *s)
+Sclose(Stream *s)
{
if(s == nil)
return;
buffree(&s->buf);
- Bterm(s->bio);
+ if(s->bio != nil)
+ Bterm(s->bio);
free(s);
}
--- a/string.c
+++ b/string.c
@@ -1,6 +1,5 @@
#include <u.h>
#include <libc.h>
-#include <bio.h>
#include "pdf.h"
/* 7.3.4 String Objects */
@@ -18,15 +17,15 @@
};
static Object *
-stringhex(Biobuf *b)
+stringhex(Stream *stream)
{
char *s;
Object *o;
int len, n;
- if((s = Brdstr(b, '>', 0)) == nil)
+ if((s = Srdstr(stream, '>', 0)) == nil)
return nil;
- len = Blinelen(b) - 1;
+ len = Slinelen(stream) - 1;
if(s[len] != '>'){
werrstr("no '>'");
free(s);
@@ -50,7 +49,7 @@
}
Object *
-pdfstring(Biobuf *b)
+pdfstring(Stream *stream)
{
Object *o;
char *s, *r;
@@ -62,14 +61,14 @@
return nil;
for(paren = sz = 0;;){
- if((c = Bgetc(b)) < 0)
+ if((c = Sgetc(stream)) < 0)
break;
switch(c){
case '<':
if(sz == 0){
- Bungetc(b);
- return stringhex(b);
+ Sungetc(stream);
+ return stringhex(stream);
}
break;
@@ -86,16 +85,16 @@
continue;
case '\\':
- if((c = Bgetc(b)) <= 0)
+ if((c = Sgetc(stream)) <= 0)
break;
if(c >= '0' && c <= '7'){ /* octal */
oct[0] = c;
- for(i = 1; i < 3 && (c = Bgetc(b)) >= '0' && c <= '7'; i++)
+ for(i = 1; i < 3 && (c = Sgetc(stream)) >= '0' && c <= '7'; i++)
oct[i] = c;
if(c <= 0)
break;
if(c < '0' || c > '7')
- Bungetc(b);
+ Sungetc(stream);
oct[i] = 0;
c = strtol(oct, nil, 8);
}else if(c >= nelem(esc) || (c = esc[c]) == 0){
@@ -147,77 +146,3 @@
werrstr("string: %r");
return nil;
}
-
-#ifdef TEST
-static struct {
- char *in;
- char *out;
-}t[] = {
- {"", nil},
- {"(test, success)", "test, success"},
- {"(simple string)", "simple string"},
- {"(non-closed paren", nil},
- {"wrong first char", nil},
- {"(parens((()((())))()))", "parens"},
- {"(\\0053)", "\x053"},
- {"(\\053)", "+"},
- {"(\\53)", "+"},
- {"()", ""},
- {")", nil},
- {"(\\)\\()", ")("},
- {"(\\\\)", "\\"},
- {"a", nil},
- {"(1\\\n2)", "12"},
- {"<323130>", "210"},
- {"<32313>", "210"},
- {"<>", ""},
- {"<", nil},
- {"<zz>", nil},
- {">", nil},
-};
-
-static char *s;
-static int off, n;
-
-static int
-rd(Biobufhdr *, void *data, long sz)
-{
- if(sz > n-off)
- sz = n-off;
- memmove(data, s+off, sz);
- off += sz;
- return sz;
-}
-
-void
-test_pdfstring(void)
-{
- Object *o;
- Biobuf b;
- int i;
-
- fprint(2, "pdfstring\n");
- for(i = 0; i < nelem(t); i++){
- s = t[i].in;
- n = strlen(s);
- off = 0;
- Binit(&b, -1, OREAD);
- Biofn(&b, rd);
-
- fprint(2, "\t%d: ", i);
- o = pdfstring(&b);
- if(o == nil && t[i].out != nil)
- fprint(2, "ERROR: expected %q, got error: %r\n", t[i].out);
- else if(o != nil && t[i].out == nil)
- fprint(2, "ERROR: expected error, got %q\n", o->str);
- else if(o == nil && t[i].out == nil)
- fprint(2, "OK (%r)\n");
- else if(strcmp(o->str, t[i].out) != 0)
- fprint(2, "ERROR: expected %q, got %q\n", t[i].out, o->str);
- else
- fprint(2, "OK\n");
- pdfobjfree(o);
- Bterm(&b);
- }
-}
-#endif
--- a/xref.c
+++ b/xref.c
@@ -1,6 +1,5 @@
#include <u.h>
#include <libc.h>
-#include <bio.h>
#include <ctype.h>
#include "pdf.h"
@@ -18,18 +17,21 @@
char *s, *e;
Xref *x;
- if(Bgetint(pdf->bio, &xref0) != 1 || xref0 < 0){
+ Sseek(pdf->s, 4, 1);
+ if(Sgeti(pdf->s, &xref0) != 1 || xref0 < 0){
werrstr("invalid xref0");
return -1;
}
- if(Bgetint(pdf->bio, &nxref) != 1 || nxref < 0){
+ if(Sgeti(pdf->s, &nxref) != 1 || nxref < 0){
werrstr("invalid nxref");
return -1;
}
+ if(nxref < 1)
+ return 0;
/* skip whitespace and move to the first subsection */
- while(isspace(Bgetc(pdf->bio)));
- Bungetc(pdf->bio);
+ do; while(isspace(Sgetc(pdf->s)));
+ Sungetc(pdf->s);
s = nil;
if((x = realloc(pdf->xref, (pdf->nxref + nxref)*sizeof(Xref))) == nil)
@@ -41,12 +43,13 @@
if((s = malloc(sz)) == nil)
goto err;
for(i = 0; i < sz; i += n){
- if((n = Bread(pdf->bio, s+i, sz-i)) < 1)
+ if((n = Sread(pdf->s, s+i, sz-i)) < 1)
goto err;
}
/* store non-free objects only */
newnxref = pdf->nxref;
+ xref.objstm = 0;
for(e = s, i = 0; i < nxref; i++, e += 20){
if(!isspace(e[10]) || !isspace(e[18]) || !isspace(e[19])){
werrstr("invalid xref line (%d/%d)", i, nxref);
@@ -54,8 +57,7 @@
}
xref.id = xref0 + i;
xref.off = strtoul(e, nil, 10);
- /* FIXME xref.gen */
- xref.type = Xusual;
+ xref.gen = strtoul(e+11, nil, 10);
/* search in already existing xrefs, update if found */
for(j = 0; j < pdf->nxref; j++){
@@ -107,19 +109,24 @@
int
xrefreadstream(Pdf *pdf)
{
- Object *o;
+ Object *o, *p, *index;
Stream *s;
Xref *x;
uchar buf[32];
- int w[8], nw, i, c, n, nxref, newnxref, prev, extra;
+ int w[8], nw, c, n, nxref, newnxref, prev, extra;
+ int i, ni, nsubsec, subsec;
s = nil;
- if((o = pdfobj(pdf, pdf->bio)) == nil){
+ if((o = pdfobj(pdf, pdf->s)) == nil){
werrstr("xref stream obj: %r");
goto err;
}
- if((prev = dictint(o, "Prev")) > 0){
- if(Bseek(pdf->bio, prev, 0) != prev){
+
+ index = dictget(o, "Index"); /* 7.5.8.2 subsection indexing */
+ nsubsec = arraylen(index) / 2;
+
+ if((prev = dictint(o, "Prev")) > 0){ /* 7.5.8.2 previous xref stream */
+ if(Sseek(pdf->s, prev, 0) != prev){
werrstr("xref stream prev seek failed");
goto err;
}
@@ -128,7 +135,7 @@
return -1;
}
}
- if((s = streamopen(o)) == nil){
+ if((s = Sopen(o)) == nil){
werrstr("failed to stream xref: %r");
goto err;
}
@@ -143,11 +150,11 @@
werrstr("W is beyond imaginable: %d bytes", n);
goto err;
}
- if((nxref = streamsize(s)/n) < 1){
+ if((nxref = Ssize(s)/n) < 1){
werrstr("no xref elements in the stream");
goto err;
}
- extra = streamsize(s) % (nxref*n);
+ extra = Ssize(s) % (nxref*n);
if(extra != 0)
fprint(2, "extra %d bytes in xref stream", extra);
@@ -156,34 +163,41 @@
goto err;
pdf->xref = x;
x += pdf->nxref;
- while(Bread(s->bio, buf, n) == n){ /* stop on short read or error */
+ i = 0;
+ for(ni = subsec = 0; Sread(s, buf, n) == n; ni--, i++){ /* stop on short read or error */
+ if(ni == 0 && nsubsec > 0){
+ i = arrayint(index, subsec*2+0); /* index of the first object */
+ ni = arrayint(index, subsec*2+1); /* number of objects in the subsection */
+ subsec++;
+ }
+
c = getint(buf, w[0], 1); /* default type is 1 */
- if(c == 1){ /* not compressed */
+ if(c == 1){ /* uncompressed */
+ x->objstm = 0;
+ x->id = i;
x->off = getint(buf+w[0], w[1], 0);
x->gen = getint(buf+w[0]+w[1], w[2], 0);
- x->type = Xuncompressed;
pdf->nxref++;
- fprint(2, "xref %⊗\n", *x);
x++;
}else if(c == 2){ /* compressed */
- x->objnum = getint(buf+w[0], w[1], 0);
- x->id = getint(buf+w[0]+w[1], w[2], 0);
- x->type = Xcompressed;
+ x->id = i;
+ x->objstm = getint(buf+w[0], w[1], 0);
+ x->index = getint(buf+w[0]+w[1], w[2], 0);
pdf->nxref++;
- fprint(2, "xref %⊗\n", *x);
x++;
}
}
- streamclose(s);
- pdf->root = pdfref(dictget(o, "Root"));
- pdf->info = pdfref(dictget(o, "Info"));
+ Sclose(s);
+ if((p = dictget(o, "Root")) != &null)
+ pdf->root = pdfref(p);
+ if((p = dictget(o, "Info")) != &null)
+ pdf->info = pdfref(p);
pdfobjfree(o);
return 0;
err:
- streamclose(s);
+ Sclose(s);
pdfobjfree(o);
return -1;
}
-