ref: c3a035d6f9d8180f330059256a086695164b922e
parent: 157e30c5b46fb90e193c2a15d7a9b6a1b7bce13c
author: Tor Andersson <tor@ccxvii.net>
date: Thu Jan 9 11:07:08 EST 2014
Use setjmp/longjmp to handle parsing errors.
--- a/js-ast.c
+++ b/js-ast.c
@@ -6,6 +6,7 @@
js_Ast *node = malloc(sizeof(js_Ast));
node->type = type;
+ node->line = J->yyline;
node->a = a;
node->b = b;
node->c = c;
@@ -13,6 +14,9 @@
node->n = 0;
node->s = NULL;
+ node->next = J->ast;
+ J->ast = node;
+
return node;
}
@@ -28,6 +32,17 @@
js_Ast *node = jsP_newnode(J, type, 0, 0, 0, 0);
node->n = n;
return node;
+}
+
+void jsP_freeast(js_State *J)
+{
+ js_Ast *node = J->ast;
+ while (node) {
+ js_Ast *next = node->next;
+ free(node);
+ node = next;
+ }
+ J->ast = NULL;
}
static const char *strast(int type)
--- a/js-ast.h
+++ b/js-ast.h
@@ -1,15 +1,14 @@
#ifndef js_ast_h
#define js_ast_h
-typedef struct js_Ast js_Ast;
-
struct js_Ast
{
int type;
- int op;
+ int line;
js_Ast *a, *b, *c, *d;
double n;
const char *s;
+ js_Ast *next; /* next in alloc list */
};
enum
@@ -118,5 +117,6 @@
js_Ast *jsP_newnode(js_State *J, int type, js_Ast *a, js_Ast *b, js_Ast *c, js_Ast *d);
js_Ast *jsP_newsnode(js_State *J, int type, const char *s);
js_Ast *jsP_newnnode(js_State *J, int type, double n);
+void jsP_freeast(js_State *J);
#endif
--- a/js-lex.c
+++ b/js-lex.c
@@ -1,17 +1,6 @@
#include "js.h"
#include "js-parse.h"
-static int syntaxerror(js_State *J, const char *fmt, ...)
-{
- va_list ap;
- fprintf(stderr, "syntax error: line %d: ", J->yyline);
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- va_end(ap);
- fprintf(stderr, "\n");
- return TK_ERROR;
-}
-
#define nelem(a) (sizeof (a) / sizeof (a)[0])
static const char *keywords[] = {
@@ -54,9 +43,9 @@
return TK_BREAK + i; /* first keyword + i */
if (findword(s, futurewords, nelem(futurewords)) >= 0)
- return syntaxerror(J, "'%s' is a future reserved word", s);
+ return jsP_error(J, "'%s' is a future reserved word", s);
if (J->strict && findword(s, strictfuturewords, nelem(strictfuturewords)) >= 0)
- return syntaxerror(J, "'%s' is a strict mode future reserved word", s);
+ return jsP_error(J, "'%s' is a strict mode future reserved word", s);
return TK_IDENTIFIER;
}
@@ -210,13 +199,13 @@
if ((*sp)[0] == '0' && ((*sp)[1] == 'x' || (*sp)[1] == 'X')) {
*sp += 2;
if (!ishex(PEEK()))
- return syntaxerror(J, "0x not followed by hexademical digit");
+ return jsP_error(J, "0x not followed by hexademical digit");
J->yynumber = lexhex(sp);
return TK_NUMBER;
}
if ((*sp)[0] == '0' && isdec((*sp)[1]))
- return syntaxerror(J, "number with leading zero");
+ return jsP_error(J, "number with leading zero");
n = lexinteger(sp);
if (LOOK('.'))
@@ -224,7 +213,7 @@
n *= pow(10, lexexponent(sp));
if (isidentifierstart(PEEK()))
- return syntaxerror(J, "number with letter suffix");
+ return jsP_error(J, "number with letter suffix");
J->yynumber = n;
return TK_NUMBER;
@@ -277,10 +266,10 @@
while (c != q) {
if (c == 0 || isnewline(c))
- return syntaxerror(J, "string not terminated");
+ return jsP_error(J, "string not terminated");
if (c == '\\') {
if (lexescape(J, sp))
- return syntaxerror(J, "malformed escape sequence");
+ return jsP_error(J, "malformed escape sequence");
} else {
textpush(J, c);
}
@@ -321,12 +310,12 @@
c = GET();
while (c != '/') {
if (c == 0 || isnewline(c)) {
- return syntaxerror(J, "regular expression not terminated");
+ return jsP_error(J, "regular expression not terminated");
} else if (c == '\\') {
textpush(J, c);
c = GET();
if (c == 0 || isnewline(c))
- return syntaxerror(J, "regular expression not terminated");
+ return jsP_error(J, "regular expression not terminated");
textpush(J, c);
c = GET();
} else {
@@ -345,12 +334,12 @@
if (c == 'g') J->yyflags.g ++;
else if (c == 'i') J->yyflags.i ++;
else if (c == 'm') J->yyflags.m ++;
- else return syntaxerror(J, "illegal flag in regular expression: %c", c);
+ else return jsP_error(J, "illegal flag in regular expression: %c", c);
c = NEXTPEEK();
}
if (J->yyflags.g > 1 || J->yyflags.i > 1 || J->yyflags.m > 1)
- return syntaxerror(J, "duplicated flag in regular expression");
+ return jsP_error(J, "duplicated flag in regular expression");
return TK_REGEXP;
}
@@ -396,7 +385,7 @@
continue;
} else if (LOOK('*')) {
if (lexcomment(sp))
- return syntaxerror(J, "multi-line comment not terminated");
+ return jsP_error(J, "multi-line comment not terminated");
continue;
} else if (isregexpcontext(J->lasttoken)) {
return lexregexp(J, sp);
@@ -541,8 +530,8 @@
}
if (c >= 0x20 && c <= 0x7E)
- return syntaxerror(J, "unexpected character: '%c'", c);
- return syntaxerror(J, "unexpected character: \\u%04X", c);
+ return jsP_error(J, "unexpected character: '%c'", c);
+ return jsP_error(J, "unexpected character: \\u%04X", c);
}
}
--- a/js-load.c
+++ b/js-load.c
@@ -1,7 +1,7 @@
#include "js.h"
#include "js-parse.h"
-int js_loadstring(js_State *J, const char *source)
+static int jsP_loadstring(js_State *J, const char *source)
{
int t;
@@ -12,6 +12,13 @@
return 0;
}
+
+int js_loadstring(js_State *J, const char *source)
+{
+ J->yyfilename = "(string)";
+ return jsP_loadstring(J, source);
+}
+
int js_loadfile(js_State *J, const char *filename)
{
FILE *f;
@@ -41,7 +48,8 @@
s[n] = 0; /* zero-terminate string containing file data */
- t = js_loadstring(J, s);
+ J->yyfilename = filename;
+ t = jsP_loadstring(J, s);
free(s);
fclose(f);
--- a/js-parse.c
+++ b/js-parse.c
@@ -41,24 +41,22 @@
return 0;
}
-static int expect(js_State *J, int t)
+static void expect(js_State *J, int t)
{
if (accept(J, t))
- return 1;
- fprintf(stderr, "syntax error: unexpected token %d (expected %d)\n", J->lookahead, t);
- return 0;
+ return;
+ jsP_error(J, "unexpected token %d (expected %d)", J->lookahead, t);
}
-static int semicolon(js_State *J)
+static void semicolon(js_State *J)
{
if (J->lookahead == ';') {
next(J);
- return 1;
+ return;
}
if (J->newline || J->lookahead == '}' || J->lookahead == 0)
- return 1;
- fprintf(stderr, "syntax error: expected semicolon\n");
- return 0;
+ return;
+ jsP_error(J, "unexpected token %d (expected semicolon)", J->lookahead);
}
static js_Ast *identifier(js_State *J)
@@ -68,50 +66,19 @@
next(J);
return a;
}
- fprintf(stderr, "syntax error: expected identifier\n");
+ jsP_error(J, "unexpected token %d (expected identifier)", J->lookahead);
return NULL;
}
static js_Ast *identifiername(js_State *J)
{
- js_Ast *a = NULL;
- switch (J->lookahead) {
- case TK_IDENTIFIER: a = ID(J->yytext); break;
- case TK_BREAK: a = ID("break"); break;
- case TK_CASE: a = ID("case"); break;
- case TK_CATCH: a = ID("catch"); break;
- case TK_CONTINUE: a = ID("continue"); break;
- case TK_DEBUGGER: a = ID("debugger"); break;
- case TK_DEFAULT: a = ID("default"); break;
- case TK_DELETE: a = ID("delete"); break;
- case TK_DO: a = ID("do"); break;
- case TK_ELSE: a = ID("else"); break;
- case TK_FALSE: a = ID("false"); break;
- case TK_FINALLY: a = ID("finally"); break;
- case TK_FOR: a = ID("for"); break;
- case TK_FUNCTION: a = ID("function"); break;
- case TK_IF: a = ID("if"); break;
- case TK_IN: a = ID("in"); break;
- case TK_INSTANCEOF: a = ID("instanceof"); break;
- case TK_NEW: a = ID("new"); break;
- case TK_NULL: a = ID("null"); break;
- case TK_RETURN: a = ID("a ="); break;
- case TK_SWITCH: a = ID("switch"); break;
- case TK_THIS: a = ID("this"); break;
- case TK_THROW: a = ID("throw"); break;
- case TK_TRUE: a = ID("true"); break;
- case TK_TRY: a = ID("try"); break;
- case TK_TYPEOF: a = ID("typeof"); break;
- case TK_VAR: a = ID("var"); break;
- case TK_VOID: a = ID("void"); break;
- case TK_WHILE: a = ID("while"); break;
- case TK_WITH: a = ID("with"); break;
- default:
- fprintf(stderr, "syntax error: expected identifier name\n");
- return NULL;
+ if (J->lookahead == TK_IDENTIFIER || J->lookahead >= TK_BREAK) {
+ js_Ast *a = ID(J->yytext);
+ next(J);
+ return a;
}
- next(J);
- return a;
+ jsP_error(J, "unexpected token %d (expected identifier or keyword)", J->lookahead);
+ return NULL;
}
static js_Ast *arguments(js_State *J)
@@ -240,7 +207,7 @@
if (accept(J, '{')) { a = EXP1(OBJECT, objectliteral(J)); expect(J, '}'); return a; }
if (accept(J, '[')) { a = EXP1(ARRAY, arrayliteral(J)); expect(J, ']'); return a; }
if (accept(J, '(')) { a = expression(J, 0); expect(J, ')'); return a; }
- fprintf(stderr, "syntax error\n");
+ jsP_error(J, "unexpected token in primary expression: %d", J->lookahead);
return NULL;
}
@@ -451,12 +418,37 @@
void program(js_State *J)
{
next(J);
- while (J->lookahead != 0 && J->lookahead != TK_ERROR)
+ while (J->lookahead != 0)
statement(J);
}
+int jsP_error(js_State *J, const char *fmt, ...)
+{
+ va_list ap;
+
+ fprintf(stderr, "syntax error: %s:%d: ", J->yyfilename, J->yyline);
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ fprintf(stderr, "\n");
+
+ longjmp(J->jb, 1);
+ return 0;
+}
+
int jsP_parse(js_State *J)
{
+ if (setjmp(J->jb)) {
+ jsP_freeast(J);
+ return 1;
+ }
+
program(J);
+
+ // TODO: compile to bytecode
+
+ jsP_freeast(J);
return 0;
}
--- a/js-parse.h
+++ b/js-parse.h
@@ -2,9 +2,7 @@
#define js_parse_h
enum {
- TK_ERROR = 257,
-
- TK_IDENTIFIER,
+ TK_IDENTIFIER = 257,
TK_NUMBER,
TK_STRING,
TK_REGEXP,
--- a/js.h
+++ b/js.h
@@ -6,6 +6,7 @@
#include <stddef.h>
#include <stdarg.h>
#include <string.h>
+#include <setjmp.h>
#include <math.h>
typedef struct js_State js_State;
@@ -25,25 +26,33 @@
/* private */
+typedef struct js_Ast js_Ast;
+
void jsP_initlex(js_State *J, const char *source);
int jsP_lex(js_State *J);
int jsP_parse(js_State *J);
+int jsP_error(js_State *J, const char *fmt, ...);
void js_printstringtree(js_State *J);
struct js_State
{
+ const char *yyfilename;
const char *yysource;
+ int yyline;
+
char *yytext;
size_t yylen, yycap;
double yynumber;
struct { int g, i, m; } yyflags;
- int yyline;
int lasttoken;
int newline;
+
int strict;
int lookahead;
+ jmp_buf jb; /* setjmp buffer for error handling in parser */
+ js_Ast *ast; /* list of allocated nodes to free after parsing */
js_StringNode *strings;
};