shithub: scc

Download patch

ref: 1ffd5f6ee169b5d76958348b3376d82a66a75d63
parent: c990953ff9d588de699e0a19420b5cc2d7e0db51
author: Roberto E. Vargas Caballero <k0ga@shike2.com>
date: Sun Jun 10 08:14:12 EDT 2012

Improved lexical analysis

This patch removes all the complex switch in function next, which is now a
clearer one.

--- a/decl.c
+++ b/decl.c
@@ -10,11 +10,20 @@
 
 char parser_out_home;
 
-#include <stdio.h>	/* TODO: remove this */
 
 static void declarator(void);
 
+static struct symbol *newiden(char *s, unsigned char key)
+{
+	register struct symbol *sym = lookup(yytext, yyhash);
 
+	if (!sym)
+		sym = install(yytext, yyhash);
+	if (sym->level == nested_level)
+		error("redeclaration of '%s'", yytext);
+	return sym;
+}
+
 static void dirdcl(void)
 {
 	if (accept('(')) {
@@ -21,9 +30,7 @@
 		declarator();
 		expect(')');
 	} else if (yytoken == IDEN) {
-		if (yyval.sym && yyval.sym->level == nested_level)
-			error("redeclaration of '%s'", yytext);
-		addsym(yytext, yyhash);
+		newiden(yytext, yyhash);
 		next();
 	} else {
 		error("expected '(' or identifier before of '%s'", yytext);
--- a/lex.c
+++ b/lex.c
@@ -111,9 +111,20 @@
 	return CONSTANT;
 }
 
-static unsigned char iden(void)
+static unsigned char keyword(const char *s, unsigned char key)
 {
 	register struct keyword *kwp;
+
+	key &= NR_KWD_HASH - 1;
+	for (kwp = khash[key]; kwp; kwp = kwp->next) {
+		if (!strcmp(kwp->str, yytext))
+			return kwp->tok;
+	}
+	return 0;
+}
+
+static unsigned char iden(void)
+{
 	register char ch;
 	register char *bp = yytext;
 
@@ -124,27 +135,21 @@
 	}
 	if (bp == yytext + TOKSIZ_MAX)
 		error("identifier too long %s", yytext);
-	ungetc(ch, yyin);
 	*bp = '\0';
-	yyhash &= NR_KWD_HASH - 1;
-	for (kwp = khash[yyhash]; kwp; kwp = kwp->next) {
-		if (!strcmp(kwp->str, yytext))
-			return kwp->tok;
-	}
-	yyval.sym = lookupsym(yytext, yyhash);
+	ungetc(ch, yyin);
+
+	if (ch = keyword(yytext, yyhash))
+		return ch;
 	return IDEN;;
 }
 
-
-
-unsigned char next(void)
+static unsigned char skip(void)
 {
-	static unsigned int c;
-	register unsigned char ch;
+	register int c;
 	extern char parser_out_home;
 
 	while (isspace(c = getc(yyin))) {
-		if ((char) c == '\n')
+		if (c == '\n')
 			++linenum, columnum = 1;
 		else
 			++columnum;
@@ -152,97 +157,85 @@
 	if (c == EOF) {
 		if (parser_out_home)
 			error("Find EOF while parsing");
-		ch = EOFTOK;
-		memcpy(yytext, "EOF", sizeof("EOF"));
-		goto return_token;
+		return 1;
 	}
-	ch = c;
-	if (isalpha(ch) || ch == '_') {
-		ungetc(ch, yyin);
-		ch = iden();
-	} else if (isdigit(ch)) {
-		ungetc(ch, yyin);
-		ch = number();
-	} else {
-		register unsigned char aux;;
-		aux = getc(yyin);
-		yytext[0] = ch;
-		yytext[1] = aux;
-		yytext[2] = '\0';
+	ungetc(c, yyin);
+	return 0;
+}
+
+static unsigned char
+follow(unsigned char op, unsigned char eq, unsigned char rep)
+{
+	register char c;
+
+	if ((c = getc(yyin)) == '=')
+		return eq;
+	else if (c == op && rep)
+		return rep;
+	ungetc(c, yyin);
+	return op;
+}
+
+static unsigned char rel_shift(unsigned char op)
+{
+	static char tokens[2][3] = {
+		{GE, LSHIFT, LSHIFT_EQ},
+		{LE, RSHIFT, RSHIFT_EQ}};
+	register char c;
+	register char *tp = tokens[op == '>'];
 
-		switch (ch) {
-		case '&':
-			switch (aux) {
-			case '&': ch = AND; break;
-			case '=': ch = AND_EQ; break;
-			default:  goto no_doble_character;
-			}
-			break;
-		case '|':
-			switch (aux) {
-			case '|': ch = OR; break;
-			case '=': ch = OR_EQ; break;
-			default: goto no_doble_character;
-			}
-			break;
-		case '<':
-			switch (aux) {
-			case '<':  ch = LSHIFT; break;
-			case '=':  ch = LSHIFT_EQ; break;
-			default: goto no_doble_character;
-			}
-			break;
-		case '>':
-			switch (aux) {
-			case '<':  ch = RSHIFT; break;
-			case '=':  ch = RSHIFT_EQ; break;
-			default: goto no_doble_character;
-			}
-			break;
-		case '-':
-			switch (aux) {
-			case '-':  ch = DEC; break;
-			case '>':  ch = PTR; break;
-			case '=':  ch = SUB_EQ; break;
-			default: goto no_doble_character;
-			}
-			break;
-		case '=':
-			if (aux == '=') ch = EQ;
-			else goto no_doble_character;
-			break;
-		case '^':
-			if (aux == '=') ch = XOR_EQ;
-			else goto no_doble_character;
-			break;
-		case '*':
-			if (aux == '=') ch = LSHIFT_EQ;
-			else goto no_doble_character;
-			break;
-		case '+':
-			if (aux == '+')  ch = INC;
-			else if (aux == '=') ch = ADD_EQ;
-			else goto no_doble_character;
-			break;
-		case '!':
-			if (aux == '=') {
-				ch = NE;
-				break;
-			}
-		no_doble_character:
-		case '/': case ';': case '{': case '}':
-		case '(': case ')': case '~': case ',':
-		case '?': case '[': case ']': case ':':
-			ungetc(aux, yyin);
-			yytext[1] = '\0';
-			break;
-		default:
-			error("Incorrect character '%02x", c);
-		}
+	if ((c = getc(yyin)) == '=') {
+		return tp[0];
+	} else if (c == op) {
+		if ((c = getc(yyin)) == '=')
+			return tp[2];
+		op = tp[1];
 	}
+	ungetc(c, yyin);
+	return c;
+}
 
-return_token:
-	return yytoken = ch;
+static unsigned char minus(void)
+{
+	register int c;
+
+	switch (c = getc(yyin)) {
+	case '-': return DEC;
+	case '>': return PTR;
+	case '=': return SUB_EQ;
+	default:
+		ungetc(c, yyin);
+		return '-';
+	}
+}
+
+unsigned char next(void)
+{
+	register unsigned char c;
+
+	if (!skip())
+		c = EOFTOK;
+	if (isalpha(c = getc(yyin)) || c == '_') {
+		ungetc(c, yyin);
+		c = iden();
+	} else if (isdigit(c)) {
+		ungetc(c, yyin);
+		c = number();
+	} else {
+		switch (c) {
+		case '=': c = follow('=', EQ, 0); break;
+		case '^': c = follow('^', XOR_EQ, 0); break;
+		case '*': c = follow('*', MUL_EQ, 0); break;
+		case '!': c = follow('!', NE, 0); break;
+		case '+': c = follow('+', ADD_EQ, INC); break;
+		case '&': c = follow('&', AND_EQ, AND); break;
+		case '|': c = follow('|', OR_EQ, OR); break;
+		case '<': c = rel_shift('<'); break;
+		case '>': c = rel_shift('>'); break;
+		case '-': c = minus(); break;
+		}
+	}
+	return yytoken = c;
 }
 
 char accept(unsigned char tok)
--- a/symbol.c
+++ b/symbol.c
@@ -21,10 +21,10 @@
 
 static void del_hash_ctx(struct symhash *htable, struct symbol *lim)
 {
-	register struct symbol *bp;
+	register struct symbol *bp, *next, *prev;
 
 	for (bp = htable->top; bp && bp != lim; bp = bp->next) {
-		register struct symbol *next = bp->h_next, *prev = bp->h_prev;
+		next = bp->h_next, prev = bp->h_prev;
 		prev->h_next = next;
 		next->h_prev = prev;
 		free(bp->str);
@@ -46,27 +46,31 @@
 	del_hash_ctx(&iden_hash, ctx_head->next->iden);
 }
 
-struct symbol *addsym(const char *s, unsigned char key)
+struct symbol *install(const char *s, unsigned char key)
 {
 	static struct symbol *head;
 	register struct symbol *sym, *next;
 
 	sym = xmalloc(sizeof(*sym));
-	sym->str = xstrdup(s);
+
 	sym->next = iden_hash.top;
 	iden_hash.top = sym;
 
-	head = &iden_hash.buf[key], next = head->h_next;
+	if (s) {
+		sym->str = xstrdup(s);
 
-	sym->h_next = next;
-	sym->h_prev = next->h_prev;
-	head->h_next = sym;
-	next->h_prev = sym;
-
+		head = &iden_hash.buf[key], next = head->h_next;
+		sym->h_next = next;
+		sym->h_prev = next->h_prev;
+		head->h_next = sym;
+		next->h_prev = sym;
+	} else {
+		sym->h_next = sym->h_prev = sym->str = NULL;
+	}
 	return sym;
 }
 
-struct symbol *lookupsym(char *s, unsigned char key)
+struct symbol *lookup(char *s, unsigned char key)
 {
 	register struct symbol *bp, *head;
 
@@ -80,7 +84,7 @@
 
 void init_symbol(void)
 {
-	struct symbol *bp;
+	register struct symbol *bp;
 
 	for (bp = iden_hash.buf; bp < &iden_hash.buf[NR_SYM_HASH]; ++bp)
 		bp->h_next = bp->h_prev = bp;
--