ref: a9516693e7142a658f4e3ea190272f4cd73b24be
parent: ef6cdd0d40612067504d1587aa4e64206fe1ea8a
author: Sigrid Haflínudóttir <ftrvxmtrx@gmail.com>
date: Sat Aug 29 12:29:56 EDT 2020
add and use flate filter
--- /dev/null
+++ b/buffer.c
@@ -1,0 +1,121 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "pdf.h"
+
+static int
+bufgrow(Buffer *b, int sz)
+{
+ uchar *r;
+ int maxsz;
+
+ if(b->maxsz < 1){
+ if((b->b = mallocz(128, 1)) == nil)
+ return -1;
+ b->maxsz = 128;
+ }
+ for(maxsz = b->maxsz; b->sz+sz > maxsz; maxsz *= 2);
+ if(maxsz != b->maxsz){
+ if((r = realloc(b->b, maxsz)) == nil)
+ return -1;
+ memset(r+b->maxsz, 0, maxsz-b->maxsz);
+ b->b = r;
+ b->maxsz = maxsz;
+ }
+
+ return 0;
+}
+
+void
+bufinit(Buffer *b, uchar *d, int sz)
+{
+ memset(b, 0, sizeof(*b));
+ if(d != nil){
+ b->b = d;
+ b->sz = sz;
+ b->ro = 1;
+ }
+}
+
+void
+buffree(Buffer *b)
+{
+ if(b->ro == 0)
+ free(b->b);
+}
+
+int
+bufeof(Buffer *b)
+{
+ return b->off == b->sz;
+}
+
+uchar *
+bufdata(Buffer *b, int *sz)
+{
+ *sz = b->sz;
+ return b->b;
+}
+
+int
+bufreadn(Buffer *b, void *bio, int sz)
+{
+ int n, end;
+
+ if(bufgrow(b, sz) != 0)
+ return -1;
+ for(end = b->sz+sz; b->sz < end; b->sz += n){
+ if((n = Bread(bio, b->b+b->sz, sz)) < 1)
+ return -1;
+ sz -= n;
+ }
+ return 0;
+}
+
+int
+bufput(Buffer *b, uchar *d, int sz)
+{
+ if(b->ro)
+ sysfatal("bufferput on readonly buffer");
+ if(bufgrow(b, sz) != 0)
+ return -1;
+
+ memmove(b->b+b->sz, d, sz);
+ b->sz += sz;
+
+ return sz;
+}
+
+int
+bufget(Buffer *b, uchar *d, int sz)
+{
+ if(sz == 0)
+ return 0;
+
+ if(b->off > b->sz)
+ sysfatal("buffer: off(%d) > sz(%d)", b->off, b->sz);
+ if(sz > b->sz - b->off)
+ sz = b->sz - b->off;
+ memmove(d, b->b+b->off, sz);
+ b->off += sz;
+ b->eof = sz == 0;
+
+ return sz;
+}
+
+void
+bufdump(Buffer *b)
+{
+ Biobuf bio;
+ int i, j;
+
+ Binit(&bio, 2, OWRITE);
+ Bprint(&bio, "%d bytes:\n", b->sz);
+ for(i = 0; i < b->sz;){
+ Bprint(&bio, "%04x\t", i);
+ for(j = 0; i < b->sz && j < 16; j++, i++)
+ Bprint(&bio, "%02x%s", b->b[i], (j+1)&7 ? " " : " ");
+ Bprint(&bio, "\n");
+ }
+ Bterm(&bio);
+}
--- a/dict.c
+++ b/dict.c
@@ -69,7 +69,7 @@
{
int i;
- if(o == nil || o->type != Odict || name == nil)
+ if(o == nil || (o->type != Ostream && o->type != Odict) || name == nil)
return nil;
for(i = 0; i < o->dict.nkv && strcmp(name, o->dict.kv[i].key) != 0; i++);
--- /dev/null
+++ b/f_flate.c
@@ -1,0 +1,38 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <flate.h>
+#include "pdf.h"
+
+static int
+bw(void *aux, void *d, int n)
+{
+ return bufput(aux, d, n);
+}
+
+static int
+bget(void *aux)
+{
+ uchar c;
+
+ return bufget(aux, &c, 1) == 1 ? c : -1;
+}
+
+int
+fFlate(void *aux, Buffer *bi, Buffer *bo)
+{
+ int r;
+
+ USED(aux);
+
+ do{
+ r = inflatezlib(bo, bw, bi, bget);
+ }while(r == FlateOk && !bufeof(bi));
+
+ if(r != FlateOk){
+ werrstr("%s", flateerr(r));
+ return -1;
+ }
+
+ return 0;
+}
--- a/filter.c
+++ b/filter.c
@@ -3,17 +3,72 @@
#include <bio.h>
#include "pdf.h"
-/*
-7.4
+/* 7.4 Filters */
-ASCIIHex
-ASCII85
-LZW
-Flate
-RunLength
-CCITTFax
-JBIG2
-DCT
-JPX
-Crypt
-*/
+struct Filter {
+ char *name;
+ int (*readall)(void *aux, Buffer *bi, Buffer *bo);
+ int (*open)(Filter *f, Object *o);
+ void (*close)(Filter *f);
+ void *aux;
+};
+
+int fFlate(void *aux, Buffer *bi, Buffer *bo);
+
+static Filter filters[] = {
+ {"ASCII85Decode", nil, nil, nil},
+ {"ASCIIHexDecode", nil, nil, nil},
+ {"CCITTFaxDecode", nil, nil, nil},
+ {"CryptDecode", nil, nil, nil},
+ {"DCTDecode", nil, nil, nil},
+ {"FlateDecode", fFlate},
+ {"JBIG2Decode", nil, nil, nil},
+ {"JPXDecode", nil, nil, nil},
+ {"LZWDecode", nil, nil, nil},
+ {"RunLengthDecode", nil, nil, nil},
+};
+
+Filter *
+filteropen(char *name, Object *o)
+{
+ int i;
+ Filter *f;
+
+ for(i = 0; i < nelem(filters) && strcmp(filters[i].name, name) != 0; i++);
+ if(i >= nelem(filters)){
+ werrstr("no such filter %q", name);
+ return nil;
+ }
+ if(filters[i].readall == nil){
+ werrstr("filter %q not implemented", name);
+ return nil;
+ }
+ if((f = malloc(sizeof(*f))) == nil)
+ return nil;
+ memmove(f, &filters[i], sizeof(*f));
+ if(f->open != nil && f->open(f, o) != 0){
+ free(f);
+ return nil;
+ }
+
+ return f;
+}
+
+int
+filterrun(Filter *f, Buffer *bi, Buffer *bo)
+{
+ if(f->readall(f->aux, bi, bo) != 0){
+ werrstr("filter[%s]: %r", f->name);
+ return -1;
+ }
+ fprint(2, "filter[%s]: %d → %d %d\n", f->name, bi->sz, bo->sz, bo->off);
+ return 0;
+}
+
+void
+filterclose(Filter *f)
+{
+ if(f->close != nil)
+ f->close(f);
+ free(f);
+}
--- a/main.c
+++ b/main.c
@@ -22,7 +22,7 @@
Object *v;
quotefmtinstall();
- deflateinit();
+ inflateinit();
ARGBEGIN{
default:
--- a/mkfile
+++ b/mkfile
@@ -5,8 +5,10 @@
OFILES=\
array.$O\
+ buffer.$O\
dict.$O\
eval.$O\
+ f_flate.$O\
filter.$O\
main.$O\
misc.$O\
@@ -14,6 +16,7 @@
object.$O\
pdf.$O\
pdfs.$O\
+ stream.$O\
string.$O\
HFILES=\
--- a/object.c
+++ b/object.c
@@ -13,6 +13,31 @@
.type = Onull,
};
+static char *otypes[] = {
+ [Obool] = "bool",
+ [Onum] = "num",
+ [Ostr] = "str",
+ [Oname] = "name",
+ [Oarray] = "array",
+ [Odict] = "dict",
+ [Ostream] = "stream",
+ [Onull] = "null",
+ [Oindir] = "indir",
+};
+
+int
+Tfmt(Fmt *f)
+{
+ Object *o;
+
+ o = va_arg(f->args, Object*);
+ if(o == nil || o == &null)
+ return fmtprint(f, "null");
+ if(o->type < 0 || o->type >= nelem(otypes))
+ return fmtprint(f, "????");
+ return fmtprint(f, "%s", otypes[o->type]);
+}
+
/* General function to parse an object of any type. */
Object *
pdfobj(Pdf *pdf, void *b)
@@ -49,8 +74,8 @@
goto err;
}
o->type = Ostream;
- o->stream.length = m->num;
- o->stream.offset = Boffset(b);
+ o->stream.len = m->num;
+ o->stream.off = Boffset(b);
return o;
}
Bseek(b, off, 0);
--- a/pdf.c
+++ b/pdf.c
@@ -75,9 +75,7 @@
static int
trailerread(Pdf *pdf)
{
- int i;
Object *o;
- KeyValue *kv;
if((o = pdfobj(pdf, pdf->bio)) == nil)
goto err;
@@ -87,12 +85,8 @@
goto err;
}
- for(i = 0, kv = o->dict.kv; i < o->dict.nkv; i++, kv++){
- if(strcmp(kv->key, "Root") == 0)
- pdf->root = kv->value;
- else if(strcmp(kv->key, "Info") == 0)
- pdf->info = kv->value;
- }
+ pdf->root = pdfdictget(o, "Root");
+ pdf->info = pdfdictget(o, "Info");
pdfobjfree(o);
o = nil;
@@ -101,8 +95,6 @@
werrstr("no root");
goto err;
}
- if(pdfeval(pdf, pdf->root) != 0 || pdfeval(pdf, pdf->info) != 0)
- goto err;
return 0;
err:
@@ -121,7 +113,10 @@
int nxref; /* 7.5.4 xref subsection number of objects */
int xreftb; /* 7.5.4 xref table offset from the beginning of the file */
int i, n, off;
+ Stream *stream;
+ fmtinstall('T', Tfmt);
+
b = nil;
o = nil;
if((pdf = calloc(1, sizeof(*pdf))) == nil || (b = Bfdopen(fd, OREAD)) == nil)
@@ -196,10 +191,16 @@
}
}else if(isdigit(tmp[0])){ /* could be 7.5.8 xref stream (since PDF 1.5) */
Bseek(b, xreftb, 0);
- if((o = pdfobj(pdf, b)) == nil || pdfeval(pdf, o) != 0)
- goto err;
-
+ if((o = pdfobj(pdf, b)) == nil || (stream = streamopen(pdf, o)) == nil)
+ goto badxref;
+ streamclose(stream);
+ pdf->root = pdfdictget(o, "Root");
+ pdf->info = pdfdictget(o, "Info");
}
+ if(pdfeval(pdf, pdf->root) != 0 || pdfeval(pdf, pdf->info) != 0)
+ goto err;
+ fprint(2, "root %T\n", pdf->root);
+ fprint(2, "info %T\n", pdf->info);
return pdf;
err:
--- a/pdf.h
+++ b/pdf.h
@@ -10,12 +10,24 @@
Oindir, /* 7.3.10 */
};
+typedef struct Buffer Buffer;
+typedef struct Filter Filter;
typedef struct KeyValue KeyValue;
typedef struct Object Object;
typedef struct Pdf Pdf;
-typedef struct Xref Xref;
typedef struct Stream Stream;
+typedef struct Xref Xref;
+#pragma incomplete Filter
+struct Buffer {
+ uchar *b;
+ int ro;
+ int maxsz;
+ int sz;
+ int off;
+ int eof;
+};
+
struct Object {
int type;
union {
@@ -45,8 +57,8 @@
struct {
KeyValue *kv;
int nkv;
- u32int length; /* packed */
- u32int offset;
+ u32int len; /* packed */
+ u32int off;
}stream;
};
};
@@ -72,9 +84,8 @@
};
struct Stream {
- Biobuf;
- Object *o;
- u32int offset;
+ Buffer buf;
+ void *bio;
};
Pdf *pdfopen(int fd);
@@ -96,3 +107,19 @@
Stream *streamopen(Pdf *pdf, Object *o);
void streamclose(Stream *s);
+
+Filter *filteropen(char *name, Object *o);
+int filterrun(Filter *f, Buffer *bi, Buffer *bo);
+void filterclose(Filter *f);
+
+void bufinit(Buffer *b, uchar *d, int sz);
+void buffree(Buffer *b);
+int bufeof(Buffer *b);
+uchar *bufdata(Buffer *b, int *sz);
+int bufreadn(Buffer *b, void *bio, int sz);
+int bufput(Buffer *b, uchar *d, int sz);
+int bufget(Buffer *b, uchar *d, int sz);
+void bufdump(Buffer *b);
+
+#pragma varargck type "T" Object *
+int Tfmt(Fmt *f);
--- /dev/null
+++ b/stream.c
@@ -1,0 +1,93 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "pdf.h"
+
+static int
+bufiof(Biobufhdr *b, void *data, long n)
+{
+ Stream *s;
+
+ s = (void*)((char*)b - sizeof(*s));
+
+ return bufget(&s->buf, data, n);
+}
+
+Stream *
+streamopen(Pdf *pdf, Object *o)
+{
+ Stream *s;
+ Buffer b, x;
+ Object *of, **flts;
+ Filter *f;
+ int i, nflts;
+
+ s = nil;
+ if(pdfeval(pdf, o) != 0 || o == nil || o->type != Ostream) /* FIXME open a string object as a stream as well? */
+ return nil;
+
+ bufinit(&b, nil, 0);
+ if(Bseek(pdf->bio, o->stream.off, 0) != o->stream.off)
+ return nil;
+ if(bufreadn(&b, pdf->bio, o->stream.len) < 0)
+ goto err;
+ bufdump(&b);
+
+ /* see if there are any filters */
+ if((of = pdfdictget(o, "Filter")) != nil){
+ if(pdfeval(pdf, of) != 0)
+ goto err;
+ if(of->type == Oname){ /* one filter */
+ flts = &of;
+ nflts = 1;
+ }else if(of->type == Oarray){ /* array of filters */
+ flts = of->array.e;
+ nflts = of->array.ne;
+ }else{
+ werrstr("filters type invalid (%T)", of);
+ goto err;
+ }
+
+ for(i = nflts-1; i >= 0; i--){
+ if(flts[i]->type != Oname){
+ werrstr("filter type invalid (%T)", flts[i]);
+ goto err;
+ }
+ if((f = filteropen(flts[i]->name, o)) == nil)
+ goto err;
+ bufinit(&x, nil, 0);
+ if(filterrun(f, &b, &x) != 0){
+ buffree(&x);
+ goto err;
+ }
+ buffree(&b);
+ b = x;
+ }
+ }
+
+ if((s = calloc(1, sizeof(*s)+sizeof(Biobuf))) == nil){
+ buffree(&b);
+ return nil;
+ }
+ s->bio = (uchar*)(s+1);
+ s->buf = b;
+ Binit(s->bio, Bfildes(pdf->bio), OREAD);
+ Biofn(s->bio, bufiof);
+
+ bufdump(&s->buf);
+
+ return s;
+err:
+ werrstr("stream: %r");
+ buffree(&b);
+ free(s);
+ return nil;
+}
+
+void
+streamclose(Stream *s)
+{
+ buffree(&s->buf);
+ Bterm(s->bio);
+ free(s);
+}