shithub: libmujs

Download patch

ref: c3a035d6f9d8180f330059256a086695164b922e
parent: 157e30c5b46fb90e193c2a15d7a9b6a1b7bce13c
author: Tor Andersson <tor@ccxvii.net>
date: Thu Jan 9 11:07:08 EST 2014

Use setjmp/longjmp to handle parsing errors.

--- a/js-ast.c
+++ b/js-ast.c
@@ -6,6 +6,7 @@
 	js_Ast *node = malloc(sizeof(js_Ast));
 
 	node->type = type;
+	node->line = J->yyline;
 	node->a = a;
 	node->b = b;
 	node->c = c;
@@ -13,6 +14,9 @@
 	node->n = 0;
 	node->s = NULL;
 
+	node->next = J->ast;
+	J->ast = node;
+
 	return node;
 }
 
@@ -28,6 +32,17 @@
 	js_Ast *node = jsP_newnode(J, type, 0, 0, 0, 0);
 	node->n = n;
 	return node;
+}
+
+void jsP_freeast(js_State *J)
+{
+	js_Ast *node = J->ast;
+	while (node) {
+		js_Ast *next = node->next;
+		free(node);
+		node = next;
+	}
+	J->ast = NULL;
 }
 
 static const char *strast(int type)
--- a/js-ast.h
+++ b/js-ast.h
@@ -1,15 +1,14 @@
 #ifndef js_ast_h
 #define js_ast_h
 
-typedef struct js_Ast js_Ast;
-
 struct js_Ast
 {
 	int type;
-	int op;
+	int line;
 	js_Ast *a, *b, *c, *d;
 	double n;
 	const char *s;
+	js_Ast *next; /* next in alloc list */
 };
 
 enum
@@ -118,5 +117,6 @@
 js_Ast *jsP_newnode(js_State *J, int type, js_Ast *a, js_Ast *b, js_Ast *c, js_Ast *d);
 js_Ast *jsP_newsnode(js_State *J, int type, const char *s);
 js_Ast *jsP_newnnode(js_State *J, int type, double n);
+void jsP_freeast(js_State *J);
 
 #endif
--- a/js-lex.c
+++ b/js-lex.c
@@ -1,17 +1,6 @@
 #include "js.h"
 #include "js-parse.h"
 
-static int syntaxerror(js_State *J, const char *fmt, ...)
-{
-	va_list ap;
-	fprintf(stderr, "syntax error: line %d: ", J->yyline);
-	va_start(ap, fmt);
-	vfprintf(stderr, fmt, ap);
-	va_end(ap);
-	fprintf(stderr, "\n");
-	return TK_ERROR;
-}
-
 #define nelem(a) (sizeof (a) / sizeof (a)[0])
 
 static const char *keywords[] = {
@@ -54,9 +43,9 @@
 		return TK_BREAK + i; /* first keyword + i */
 
 	if (findword(s, futurewords, nelem(futurewords)) >= 0)
-		return syntaxerror(J, "'%s' is a future reserved word", s);
+		return jsP_error(J, "'%s' is a future reserved word", s);
 	if (J->strict && findword(s, strictfuturewords, nelem(strictfuturewords)) >= 0)
-		return syntaxerror(J, "'%s' is a strict mode future reserved word", s);
+		return jsP_error(J, "'%s' is a strict mode future reserved word", s);
 
 	return TK_IDENTIFIER;
 }
@@ -210,13 +199,13 @@
 	if ((*sp)[0] == '0' && ((*sp)[1] == 'x' || (*sp)[1] == 'X')) {
 		*sp += 2;
 		if (!ishex(PEEK()))
-			return syntaxerror(J, "0x not followed by hexademical digit");
+			return jsP_error(J, "0x not followed by hexademical digit");
 		J->yynumber = lexhex(sp);
 		return TK_NUMBER;
 	}
 
 	if ((*sp)[0] == '0' && isdec((*sp)[1]))
-		return syntaxerror(J, "number with leading zero");
+		return jsP_error(J, "number with leading zero");
 
 	n = lexinteger(sp);
 	if (LOOK('.'))
@@ -224,7 +213,7 @@
 	n *= pow(10, lexexponent(sp));
 
 	if (isidentifierstart(PEEK()))
-		return syntaxerror(J, "number with letter suffix");
+		return jsP_error(J, "number with letter suffix");
 
 	J->yynumber = n;
 	return TK_NUMBER;
@@ -277,10 +266,10 @@
 
 	while (c != q) {
 		if (c == 0 || isnewline(c))
-			return syntaxerror(J, "string not terminated");
+			return jsP_error(J, "string not terminated");
 		if (c == '\\') {
 			if (lexescape(J, sp))
-				return syntaxerror(J, "malformed escape sequence");
+				return jsP_error(J, "malformed escape sequence");
 		} else {
 			textpush(J, c);
 		}
@@ -321,12 +310,12 @@
 	c = GET();
 	while (c != '/') {
 		if (c == 0 || isnewline(c)) {
-			return syntaxerror(J, "regular expression not terminated");
+			return jsP_error(J, "regular expression not terminated");
 		} else if (c == '\\') {
 			textpush(J, c);
 			c = GET();
 			if (c == 0 || isnewline(c))
-				return syntaxerror(J, "regular expression not terminated");
+				return jsP_error(J, "regular expression not terminated");
 			textpush(J, c);
 			c = GET();
 		} else {
@@ -345,12 +334,12 @@
 		if (c == 'g') J->yyflags.g ++;
 		else if (c == 'i') J->yyflags.i ++;
 		else if (c == 'm') J->yyflags.m ++;
-		else return syntaxerror(J, "illegal flag in regular expression: %c", c);
+		else return jsP_error(J, "illegal flag in regular expression: %c", c);
 		c = NEXTPEEK();
 	}
 
 	if (J->yyflags.g > 1 || J->yyflags.i > 1 || J->yyflags.m > 1)
-		return syntaxerror(J, "duplicated flag in regular expression");
+		return jsP_error(J, "duplicated flag in regular expression");
 
 	return TK_REGEXP;
 }
@@ -396,7 +385,7 @@
 				continue;
 			} else if (LOOK('*')) {
 				if (lexcomment(sp))
-					return syntaxerror(J, "multi-line comment not terminated");
+					return jsP_error(J, "multi-line comment not terminated");
 				continue;
 			} else if (isregexpcontext(J->lasttoken)) {
 				return lexregexp(J, sp);
@@ -541,8 +530,8 @@
 		}
 
 		if (c >= 0x20 && c <= 0x7E)
-			return syntaxerror(J, "unexpected character: '%c'", c);
-		return syntaxerror(J, "unexpected character: \\u%04X", c);
+			return jsP_error(J, "unexpected character: '%c'", c);
+		return jsP_error(J, "unexpected character: \\u%04X", c);
 	}
 }
 
--- a/js-load.c
+++ b/js-load.c
@@ -1,7 +1,7 @@
 #include "js.h"
 #include "js-parse.h"
 
-int js_loadstring(js_State *J, const char *source)
+static int jsP_loadstring(js_State *J, const char *source)
 {
 	int t;
 
@@ -12,6 +12,13 @@
 	return 0;
 }
 
+
+int js_loadstring(js_State *J, const char *source)
+{
+	J->yyfilename = "(string)";
+	return jsP_loadstring(J, source);
+}
+
 int js_loadfile(js_State *J, const char *filename)
 {
 	FILE *f;
@@ -41,7 +48,8 @@
 
 	s[n] = 0; /* zero-terminate string containing file data */
 
-	t = js_loadstring(J, s);
+	J->yyfilename = filename;
+	t = jsP_loadstring(J, s);
 
 	free(s);
 	fclose(f);
--- a/js-parse.c
+++ b/js-parse.c
@@ -41,24 +41,22 @@
 	return 0;
 }
 
-static int expect(js_State *J, int t)
+static void expect(js_State *J, int t)
 {
 	if (accept(J, t))
-		return 1;
-	fprintf(stderr, "syntax error: unexpected token %d (expected %d)\n", J->lookahead, t);
-	return 0;
+		return;
+	jsP_error(J, "unexpected token %d (expected %d)", J->lookahead, t);
 }
 
-static int semicolon(js_State *J)
+static void semicolon(js_State *J)
 {
 	if (J->lookahead == ';') {
 		next(J);
-		return 1;
+		return;
 	}
 	if (J->newline || J->lookahead == '}' || J->lookahead == 0)
-		return 1;
-	fprintf(stderr, "syntax error: expected semicolon\n");
-	return 0;
+		return;
+	jsP_error(J, "unexpected token %d (expected semicolon)", J->lookahead);
 }
 
 static js_Ast *identifier(js_State *J)
@@ -68,50 +66,19 @@
 		next(J);
 		return a;
 	}
-	fprintf(stderr, "syntax error: expected identifier\n");
+	jsP_error(J, "unexpected token %d (expected identifier)", J->lookahead);
 	return NULL;
 }
 
 static js_Ast *identifiername(js_State *J)
 {
-	js_Ast *a = NULL;
-	switch (J->lookahead) {
-	case TK_IDENTIFIER: a = ID(J->yytext); break;
-	case TK_BREAK: a = ID("break"); break;
-	case TK_CASE: a = ID("case"); break;
-	case TK_CATCH: a = ID("catch"); break;
-	case TK_CONTINUE: a = ID("continue"); break;
-	case TK_DEBUGGER: a = ID("debugger"); break;
-	case TK_DEFAULT: a = ID("default"); break;
-	case TK_DELETE: a = ID("delete"); break;
-	case TK_DO: a = ID("do"); break;
-	case TK_ELSE: a = ID("else"); break;
-	case TK_FALSE: a = ID("false"); break;
-	case TK_FINALLY: a = ID("finally"); break;
-	case TK_FOR: a = ID("for"); break;
-	case TK_FUNCTION: a = ID("function"); break;
-	case TK_IF: a = ID("if"); break;
-	case TK_IN: a = ID("in"); break;
-	case TK_INSTANCEOF: a = ID("instanceof"); break;
-	case TK_NEW: a = ID("new"); break;
-	case TK_NULL: a = ID("null"); break;
-	case TK_RETURN: a = ID("a ="); break;
-	case TK_SWITCH: a = ID("switch"); break;
-	case TK_THIS: a = ID("this"); break;
-	case TK_THROW: a = ID("throw"); break;
-	case TK_TRUE: a = ID("true"); break;
-	case TK_TRY: a = ID("try"); break;
-	case TK_TYPEOF: a = ID("typeof"); break;
-	case TK_VAR: a = ID("var"); break;
-	case TK_VOID: a = ID("void"); break;
-	case TK_WHILE: a = ID("while"); break;
-	case TK_WITH: a = ID("with"); break;
-	default:
-		fprintf(stderr, "syntax error: expected identifier name\n");
-		return NULL;
+	if (J->lookahead == TK_IDENTIFIER || J->lookahead >= TK_BREAK) {
+		js_Ast *a = ID(J->yytext);
+		next(J);
+		return a;
 	}
-	next(J);
-	return a;
+	jsP_error(J, "unexpected token %d (expected identifier or keyword)", J->lookahead);
+	return NULL;
 }
 
 static js_Ast *arguments(js_State *J)
@@ -240,7 +207,7 @@
 	if (accept(J, '{')) { a = EXP1(OBJECT, objectliteral(J)); expect(J, '}'); return a; }
 	if (accept(J, '[')) { a = EXP1(ARRAY, arrayliteral(J)); expect(J, ']'); return a; }
 	if (accept(J, '(')) { a = expression(J, 0); expect(J, ')'); return a; }
-	fprintf(stderr, "syntax error\n");
+	jsP_error(J, "unexpected token in primary expression: %d", J->lookahead);
 	return NULL;
 }
 
@@ -451,12 +418,37 @@
 void program(js_State *J)
 {
 	next(J);
-	while (J->lookahead != 0 && J->lookahead != TK_ERROR)
+	while (J->lookahead != 0)
 		statement(J);
 }
 
+int jsP_error(js_State *J, const char *fmt, ...)
+{
+	va_list ap;
+
+	fprintf(stderr, "syntax error: %s:%d: ", J->yyfilename, J->yyline);
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+
+	fprintf(stderr, "\n");
+
+	longjmp(J->jb, 1);
+	return 0;
+}
+
 int jsP_parse(js_State *J)
 {
+	if (setjmp(J->jb)) {
+		jsP_freeast(J);
+		return 1;
+	}
+
 	program(J);
+
+	// TODO: compile to bytecode
+
+	jsP_freeast(J);
 	return 0;
 }
--- a/js-parse.h
+++ b/js-parse.h
@@ -2,9 +2,7 @@
 #define js_parse_h
 
 enum {
-	TK_ERROR = 257,
-
-	TK_IDENTIFIER,
+	TK_IDENTIFIER = 257,
 	TK_NUMBER,
 	TK_STRING,
 	TK_REGEXP,
--- a/js.h
+++ b/js.h
@@ -6,6 +6,7 @@
 #include <stddef.h>
 #include <stdarg.h>
 #include <string.h>
+#include <setjmp.h>
 #include <math.h>
 
 typedef struct js_State js_State;
@@ -25,25 +26,33 @@
 
 /* private */
 
+typedef struct js_Ast js_Ast;
+
 void jsP_initlex(js_State *J, const char *source);
 int jsP_lex(js_State *J);
 int jsP_parse(js_State *J);
+int jsP_error(js_State *J, const char *fmt, ...);
 
 void js_printstringtree(js_State *J);
 
 struct js_State
 {
+	const char *yyfilename;
 	const char *yysource;
+	int yyline;
+
 	char *yytext;
 	size_t yylen, yycap;
 	double yynumber;
 	struct { int g, i, m; } yyflags;
-	int yyline;
 	int lasttoken;
 	int newline;
+
 	int strict;
 
 	int lookahead;
+	jmp_buf jb; /* setjmp buffer for error handling in parser */
+	js_Ast *ast; /* list of allocated nodes to free after parsing */
 
 	js_StringNode *strings;
 };