shithub: pdffs

ref: 51cd3bfceeb001872d4ff298180875c4229a3d68
dir: /object.c/

View raw version
#include <u.h>
#include <libc.h>
#include <ctype.h>
#include <bio.h>
#include "pdf.h"

Object *pdfstring(Biobuf *b);
Object *pdfname(Biobuf *b);
Object *pdfarray(Pdf *pdf, Biobuf *b);
Object *pdfdict(Pdf *pdf, Biobuf *b);

static Object null = {
	.type = Onull,
};

/* General function to parse an object of any type. */
Object *
pdfobject(Pdf *pdf, void *b)
{
	Object *o, *o2, *m;
	vlong off;
	int c, tf;
	Xref xref;
	char s[16];

	o = o2 = nil;
	do; while(isws(c = Bgetc(b)));
	if(c < 0)
		goto err;

	switch(c){
	case '<': /* dictionary or a string */
		c = Bgetc(b);
		if(c == '<'){
			Bseek(b, -2, 1);
			if((o = pdfdict(pdf, b)) != nil){
				/* check for attached stream */
				off = Boffset(b);
				if(Bread(b, s, 7) == 7 && memcmp(s, "stream", 6) == 0 && isws(c = s[6])){
					/* there IS a stream */
					if((m = pdfdictget(o, "Length")) == nil || pdfeval(pdf, m) != 0 || m->type != Onum){
						werrstr("stream has no valid /Length");
						goto err;
					}
					if(c == '\r' && (c = Bgetc(b)) < 0)
						goto err;
					if(c != '\n'){
						werrstr("stream has no newline after dict");
						goto err;
					}
					o->type = Ostream;
					o->stream.length = m->num;
					o->stream.offset = Boffset(b);
					return o;
				}
				Bseek(b, off, 0);
				return o;
			}
		}
		Bungetc(b);
		/* fall through */

	case '(':
		Bungetc(b);
		return pdfstring(b);

	case '/':
		Bungetc(b);
		return pdfname(b);

	case '[':
		Bungetc(b);
		return pdfarray(pdf, b);

	case 'n':
		off = Boffset(b);
		if(Bgetc(b) == 'u' && Bgetc(b) == 'l' && Bgetc(b) == 'l' && (isws(c = Bgetc(b)) || isdelim(c))){
			Bungetc(b);
			return &null;
		}
		Bseek(b, off, 0);
		c = 'f';
		goto unexpected;

	case 't':
		off = Boffset(b);
		tf = 1;
		if(Bgetc(b) == 'r' && Bgetc(b) == 'u' && Bgetc(b) == 'e' && (isws(c = Bgetc(b)) || isdelim(c)))
			goto bool;
		Bseek(b, off, 0);
		c = 't';
		goto unexpected;

	case 'f':
		off = Boffset(b);
		tf = 0;
		if(Bgetc(b) == 'a' && Bgetc(b) == 'l' && Bgetc(b) == 's' && Bgetc(b) == 'e' && (isws(c = Bgetc(b)) || isdelim(c)))
			goto bool;
		Bseek(b, off, 0);
		c = 'f';
		goto unexpected;
bool:
		Bungetc(b);
		if((o = malloc(sizeof(*o))) == nil)
			goto err;
		o->type = Obool;
		o->bool = tf;
		return o;

	default:
		if((o = malloc(sizeof(*o))) == nil)
			goto err;
		if(!isdigit(c)){
unexpected:
			Bungetc(b);
			werrstr("unexpected char '%c'", c);
			goto err;
		}
		 /* it could be a number or an indirect object */
		Bungetc(b);
		Bgetd(b, &o->num); /* get the first number */
		off = Boffset(b); /* seek here if not an indirect object later */

		if((o2 = pdfobject(pdf, b)) != nil && o2->type == Onum){ /* second object is number too */
			do; while(isws(c = Bgetc(b)));
			if(c < 0)
				goto err;
			if(c == 'R'){ /* indirect object */
				o->type = Oindir;
				o->indir.id = o->num;
				o->indir.gen = o2->num;
				freeobject(o2);
				return o;
			}
			if(c == 'o' && Bgetc(b) == 'b' && Bgetc(b) == 'j'){ /* object */
				xref.id = o->num;
				xref.gen = o2->num;
				/* FIXME put into a map */
				freeobject(o);
				freeobject(o2);
				if((o = pdfobject(pdf, b)) != nil)
					return o;
				o2 = nil;
			}
		}

		/* just a number, go back and return it */
		o->type = Onum;
		if(Bseek(b, off, 0) != off){
			werrstr("seek failed");
			goto err;
		}
		return o;
	}

err:
	werrstr("object: %r");
	freeobject(o);
	freeobject(o2);
	return nil;
}

void
freeobject(Object *o)
{
	int i;

	if(o == nil)
		return;

	switch(o->type){
	case Onull:
		return;

	case Obool:
	case Onum:
	case Ostr:
	case Oname:
		break;

	case Oarray:
		for(i = 0; i < o->array.ne; i++)
			freeobject(o->array.e[i]);
		free(o->array.e);
		break;

	case Odict:
	case Ostream:
		for(i = 0; i < o->dict.nkv; i++){
			free(o->dict.kv[i].key);
			freeobject(o->dict.kv[i].value);
		}
		free(o->dict.kv);
		break;

	case Oindir:
		break;
	}

	free(o);
}