shithub: pdffs

ref: a9feb43f707673f7139317fac35e843d3bab7983
dir: pdffs/object.c

View raw version
#include <u.h>
#include <libc.h>
#include <ctype.h>
#include "pdf.h"

Object *pdfstring(Stream *s);
Object *pdfname(Stream *s);
Object *pdfarray(Pdf *pdf, Stream *s);
Object *pdfdict(Pdf *pdf, Stream *s);

/* returns 1 if str is at the beginning of the stream, and
	is followed either by whitespace or, if delim is 1,
	a delimiter.
	strlen(str) must be in (0, 16)
	on match, the stream seeks to right after the string.
	otherwise, the stream position is unchanged. */
static int
sismatch(Stream *s, char *str, int delim)
{
	long len = strlen(str);
	vlong off = Soffset(s);
	char b[16];
	if(len == 0 || len > 16)
		return 0;
	if(Sread(s, b, len + 1) == len + 1 && memcmp(b, str, len) == 0 && (isws(b[len]) || (delim && isdelim(b[len])))){
		Sungetc(s);
		return 1;
	}

	Sseek(s, off, 0);
	return 0;
}

char *
suntilend(Stream *s)
{
	int sz, c;
	char buf[8];

	for(sz = 0; sz < 7; sz++){
		c = Sgetc(s);
		if(c < 0)
			break;
		if(isws(c) || isdelim(c)){
			Sungetc(s);
			break;
		}
		buf[sz] = c;
	}
	buf[sz] = 0;

	return strdup(buf);
}

/* General function to parse an object of any type. */
Object *
pdfobj(Pdf *pdf, Stream *s, int flags)
{
	Object *o, *o2;
	vlong off;
	int c;
	Xref xref;

	o = o2 = nil;
	do; while(isws(c = Sgetc(s)));
	if(c < 0)
		goto err;

	if(isascii(c) && (isalpha(c)) || c == '\''){
		Sungetc(s);
		// bool, null, or op
		if(sismatch(s, "null", 1)){
			fprint(1, "NULL\n");
			return &null;
		}
		if((o = calloc(1, sizeof(*o))) == nil)
			goto err;
		o->type = Obool;
		o->pdf = pdf;
		if(sismatch(s, "true", 1)){
			o->bool = 1;
			return o;
		}
		if(sismatch(s, "false", 1)){
			o->bool = 0;
			return o;
		}
		o->type = Oop;
		o->str = suntilend(s);
		return o;
	}

	switch(c){
	case '<': /* dictionary or a string */
		c = Sgetc(s);
		if(c == '<'){
			Sseek(s, -2, 1);
			if((o = pdfdict(pdf, s)) != nil){
				/* check for attached stream */
				off = Soffset(s);
				do; while(isws(Sgetc(s)));
				Sungetc(s);
				if(sismatch(s, "stream", 0)){
					c = Sgetc(s);
					/* there IS a stream */
					if(c == '\r' && (c = Sgetc(s)) < 0)
						goto err;
					if(c != '\n'){
						werrstr("stream has no newline after dict");
						goto err;
					}
					o->stream.off = Soffset(s);
					o->type = Ostream;
					o->stream.len = dictint(o, "Length");
					return o;
				}
				Sseek(s, off, 0);
			}
			return o;
		}
		Sungetc(s);
		/* fall through */

	case '(':
		Sungetc(s);
		if((o = pdfstring(s)) != nil)
			o->pdf = pdf;
		return o;

	case '/':
		Sungetc(s);
		if((o = pdfname(s)) != nil)
			o->pdf = pdf;
		return o;

	case '[':
		Sungetc(s);
		if((o = pdfarray(pdf, s)) != nil)
			o->pdf = pdf;
		return o;

	default:
		if(!isdigit(c) && c != '-' && c != '.'){
			Sungetc(s);
			werrstr("unexpected char '%c' at %#x+%#x (%d left)", c, Sobjoffset(s), Soffset(s), Ssize(s));
			goto err;
		}
		 /* it could be a number or an indirect object */
		Sungetc(s);
		if((o = calloc(1, sizeof(*o))) == nil)
			goto err;
		o->pdf = pdf;
		// IMPORTANT: since we modify the union here, we MUST NOT
		// assume any members are zero initialized! Might be worth
		// switching to malloc to avoid giving that false impression.
		Sgetd(s, &o->num.d); /* get the first number */
		o->num.i = o->num.d;
		off = Soffset(s); /* seek here if not an indirect object later */

		if((flags & Norecurse) == 0)
		if((o2 = pdfobj(pdf, s, Norecurse)) != nil && o2->type == Onum){ /* second object is number too */
			do; while(isws(c = Sgetc(s)));
			if(c < 0)
				goto err;
			if(c == 'R'){ /* indirect object */
				o->type = Oindir;
				o->indir.id = o->num.i;
				o->indir.gen = o2->num.i;
				// See note above; this is NOT zero initialized by calloc
				o->indir.o = nil;
				pdfobjfree(o2);
				return o;
			}
			if(c == 'o' && Sgetc(s) == 'b' && Sgetc(s) == 'j'){ /* object */
				xref.id = o->num.i;
				xref.gen = o2->num.i;
				/* FIXME put into a map */
				pdfobjfree(o2);
				if((o2 = pdfobj(pdf, s, 0)) != nil){
					pdfobjfree(o);
					return o2;
				}else{
					werrstr("obj: %r");
					goto err;
				}
			}
		}
		pdfobjfree(o2);

		/* just a number, go back and return it */
		o->type = Onum;
		if(Sseek(s, off, 0) != off){
			werrstr("seek failed");
			goto err;
		}
		return o;
	}

err:
	werrstr("object: %r");
	pdfobjfree(o);
	pdfobjfree(o2);
	return nil;
}

void
pdfobjfree(Object *o)
{
	int i;

	if(o == nil || --o->ref >= 0)
		return;

	switch(o->type){
	case Onull:
		return;

	case Ostr:
	case Oop:
	case Oname:
		free(o->str);
		break;

	case Obool:
	case Onum:
		break;

	case Oarray:
		for(i = 0; i < o->array.ne; i++)
			pdfobjfree(o->array.e[i]);
		free(o->array.e);
		break;

	case Odict:
	case Ostream:
		for(i = 0; i < o->dict.nkv; i++){
			free(o->dict.kv[i].key);
			pdfobjfree(o->dict.kv[i].value);
		}
		free(o->dict.kv);
		break;

	case Oindir:
		break;
	}

	free(o);
}

Object *
pdfref(Object *o)
{
	o->ref++;
	return o;
}