ref: fc8190e4863a2d300391578be850614b7b3f4b9d
parent: c1c6db84269708d87a5b8d0e78225768c36b36ed
author: Roberto E. Vargas Caballero <k0ga@shike2.com>
date: Wed May 20 14:16:40 EDT 2015
Rewrite lookup() in cc1 This new version removes some ugly cases in decl.c, where we had to lookup, and later on insert. In this new version lookup() accept a new parameter that indicates that we want to insert a new element.
--- a/cc1/cc1.h
+++ b/cc1/cc1.h
@@ -91,6 +91,7 @@
NS_TAG,
NS_LABEL,
NS_CPP,
+ NS_KEYWORD,
NS_STRUCTS
};
@@ -251,8 +252,9 @@
extern Type *mktype(Type *tp, uint8_t op, short nelem, void *data);
/* symbol.c */
-extern Symbol *lookup(char *s, unsigned char ns);
-extern Symbol *install(char *s, unsigned char ns);
+extern Symbol *lookup(uint8_t ns);
+extern Symbol *install(uint8_t ns);
+extern Symbol *newsym(uint8_t ns);
extern void pushctx(void), popctx(void);
/* stmt.c */
@@ -273,6 +275,7 @@
extern void setfline(unsigned short line);
extern bool addinput(char *fname);
extern void delinput(void);
+extern void setnamespace(uint8_t ns);
#define accept(t) ((yytoken == (t)) ? next() : 0)
/* code.c */
--- a/cc1/cpp.c
+++ b/cc1/cpp.c
@@ -132,7 +132,8 @@
static char *
define(char *s)
{
- char *t, name[IDENTSIZ+1];
+ extern char yytext[];
+ char *t;
size_t len;
Symbol *sym;
@@ -142,9 +143,15 @@
/* nothing */;
if ((len = t - s) > IDENTSIZ)
goto too_long;
- strncpy(name, s, len);
- name[len] = '\0';
- sym = install(name, NS_CPP);
+ strncpy(yytext, s, len);
+ yytext[len] = '\0';
+ sym = lookup(NS_CPP);
+ if ((sym->flags & ISDEFINED) && sym->ns == NS_CPP) {
+ warn("'%s' redefined", yytext);
+ free(sym->u.s);
+ }
+ sym->flags |= ISDEFINED;
+ sym->ns = NS_CPP;
for (s = t; isspace(*s); ++s)
/* nothing */;
--- a/cc1/decl.c
+++ b/cc1/decl.c
@@ -88,19 +88,6 @@
return queue(dp, FTN, n, tp);
}
-static Symbol *
-newiden(uint8_t ns)
-{
- Symbol *sym;
- extern uint8_t curctx;
-
- if (yylval.sym && yylval.sym->ctx == curctx && yylval.sym->ns == ns)
- error("redeclaration of '%s'", yytext);
- sym = install(yytext, ns);
- next();
- return sym;
-}
-
static struct dcldata *declarator0(struct dcldata *dp, uint8_t ns);
static struct dcldata *
@@ -112,10 +99,13 @@
dp = declarator0(dp, ns);
expect(')');
} else {
- if (yytoken == IDEN || yytoken == TYPEIDEN)
- sym = newiden(ns);
- else
- sym = install(NULL, ns);
+ if (yytoken == IDEN || yytoken == TYPEIDEN) {
+ if ((sym = install(ns)) == NULL)
+ error("redeclaration of '%s'", yytext);
+ next();
+ } else {
+ sym = newsym(ns);
+ }
dp = queue(dp, IDEN, 0, sym);
}
@@ -269,19 +259,22 @@
}
static Symbol *
-newtag(uint8_t tag)
+newtag(void)
{
Symbol *sym;
+ uint8_t tag = yylval.token;
static uint8_t ns = NS_STRUCTS;
+ setnamespace(NS_TAG);
+ next();
switch (yytoken) {
- case IDEN: case TYPEIDEN:
- if ((sym = lookup(yytext, NS_TAG)) == NULL)
- sym = install(yytext, NS_TAG);
+ case IDEN:
+ case TYPEIDEN:
+ sym = yylval.sym;
next();
break;
default:
- sym = install(NULL, NS_TAG);
+ sym = newsym(NS_TAG);
break;
}
if (!sym->type) {
@@ -290,7 +283,8 @@
sym->type = mktype(NULL, tag, 0, NULL);
sym->type->ns = ns++;
}
-
+
+ sym->flags |= ISDEFINED;
if (sym->type->op != tag)
error("'%s' defined as wrong kind of tag", yytext);
return sym;
@@ -303,12 +297,10 @@
{
Type *tagtype, *buff[NR_MAXSTRUCTS], **bp = &buff[0];
Symbol *tagsym, *sym;
- uint8_t tag, n;
+ uint8_t n;
size_t siz;
- tag = yylval.token;
- next();
- tagsym = newtag(tag);
+ tagsym = newtag();
tagtype = tagsym->type;
if (!accept('{'))
return tagtype;
@@ -368,8 +360,7 @@
Symbol *sym;
int val = 0;
- next();
- tp = newtag(ENUM)->type;
+ tp = newtag()->type;
if (yytoken == ';')
return tp;
@@ -381,7 +372,9 @@
while (yytoken != '}') {
if (yytoken != IDEN)
unexpected();
- sym = newiden(NS_IDEN);
+ if ((sym = install(NS_IDEN)) == NULL)
+ error("duplicated member '%s'", yytext);
+ next();
sym->type = inttype;
if (accept('='))
initializer(sym);
--- a/cc1/expr.c
+++ b/cc1/expr.c
@@ -333,18 +333,16 @@
static Node *
field(Node *np)
{
- extern uint8_t lex_ns;
Symbol *sym;
switch (BTYPE(np)) {
case STRUCT: case UNION:
- lex_ns = np->type->ns;
+ setnamespace(np->type->ns);
next();
if (yytoken != IDEN)
unexpected();
if ((sym = yylval.sym) == NULL)
error("incorrect field in struct/union");
- lex_ns = NS_IDEN;
next();
return node(OFIELD, sym->type, varnode(sym), np);
default:
@@ -470,9 +468,9 @@
next();
break;
case IDEN:
- if (yylval.sym == NULL) {
- yylval.sym = install(yytext, NS_IDEN);
+ if (!(yylval.sym->flags & ISDEFINED)) {
yylval.sym->type = inttype;
+ yylval.sym->flags |= ISDEFINED;
error("'%s' undeclared", yytext);
}
np = varnode(yylval.sym);
--- a/cc1/lex.c
+++ b/cc1/lex.c
@@ -23,7 +23,7 @@
struct input *next;
};
-uint8_t lex_ns = NS_IDEN;
+static uint8_t lex_ns = NS_IDEN;
uint8_t yytoken;
struct yystype yylval;
@@ -246,7 +246,7 @@
convert:
tp = ctype(INT, sign, size);
- sym = install(NULL, NS_IDEN);
+ sym = newsym(NS_IDEN);
sym->type = tp;
v = strtol(s, NULL, base);
if (tp == inttype)
@@ -345,7 +345,7 @@
error("invalid character constant");
++input->p;
- sym = install(NULL, NS_IDEN);
+ sym = newsym(NS_IDEN);
sym->u.i = c;
sym->type = inttype;
yylval.sym = sym;
@@ -386,7 +386,7 @@
}
*bp = '\0';
- sym = install(NULL, NS_IDEN);
+ sym = newsym(NS_IDEN);
sym->u.s = xstrdup(buf);
sym->type = mktype(chartype, ARY, (bp - buf) + 1, NULL);
yylval.sym = sym;
@@ -403,8 +403,8 @@
/* nothing */;
input->p = p;
tok2str();
- sym = yylval.sym = lookup(yytext, lex_ns);
- if (!sym || sym->token == IDEN)
+ sym = yylval.sym = lookup(lex_ns);
+ if (sym->token == IDEN)
return IDEN;
yylval.token = sym->u.token;
return sym->token;
@@ -502,6 +502,13 @@
return t;
}
+/* TODO: Ensure that lex_ns is NS_IDEN after a recovery */
+void
+setnamespace(uint8_t ns)
+{
+ lex_ns = ns;
+}
+
uint8_t
next(void)
{
@@ -525,6 +532,7 @@
} else {
yytoken = operator();
}
+ lex_ns = NS_IDEN;
return yytoken;
}
--- a/cc1/stmt.c
+++ b/cc1/stmt.c
@@ -13,28 +13,6 @@
extern Node *iszero(Node *np), *eval(Node *np);
static void stmt(Symbol *lbreak, Symbol *lcont, Caselist *lswitch);
-static Symbol *
-label(char *s, char define)
-{
- Symbol *sym;
-
- if ((sym = lookup(s, NS_LABEL)) != NULL) {
- if (define) {
- if (sym->flags & ISDEFINED)
- error("label '%s' already defined", s);
- sym->flags |= ISDEFINED;
- }
- return sym;
- }
-
- sym = install(s, NS_LABEL);
- if (define)
- sym->flags |= ISDEFINED;
- else
- sym->flags &= ~ISDEFINED;
- return sym;
-}
-
static void
stmtexp(Symbol *lbreak, Symbol *lcont, Caselist *lswitch)
{
@@ -72,9 +50,9 @@
Symbol *begin, *cond, *end;
Node *np;
- begin = install(NULL, NS_LABEL);
- end = install(NULL, NS_LABEL);
- cond = install(NULL, NS_LABEL);
+ begin = newsym(NS_LABEL);
+ end = newsym(NS_LABEL);
+ cond = newsym(NS_LABEL);
expect(WHILE);
np = condition();
@@ -95,9 +73,9 @@
Symbol *begin, *cond, *end;
Node *econd, *einc, *einit;
- begin = install(NULL, NS_LABEL);
- end = install(NULL, NS_LABEL);
- cond = install(NULL, NS_LABEL);
+ begin = newsym(NS_LABEL);
+ end = newsym(NS_LABEL);
+ cond = newsym(NS_LABEL);
expect(FOR);
expect('(');
@@ -127,8 +105,8 @@
Symbol *begin, *end;
Node *np;
- begin = install(NULL, NS_LABEL);
- end = install(NULL, NS_LABEL);
+ begin = newsym(NS_LABEL);
+ end = newsym(NS_LABEL);
expect(DO);
emit(OBLOOP, NULL);
emit(OLABEL, begin);
@@ -179,9 +157,24 @@
static void
Label(Symbol *lbreak, Symbol *lcont, Caselist *lswitch)
{
+ Symbol *sym;
+
switch (yytoken) {
- case IDEN: case TYPEIDEN:
- emit(OLABEL, label(yytext, 1));
+ case IDEN:
+ case TYPEIDEN:
+ /*
+ * We cannot call to insert() because the call to lookup in
+ * lex.c was done in NS_IDEN namespace, and it is impossibe
+ * to fix this point, because an identifier at the beginning
+ * of a statement may be part of an expression or part of a
+ * label. This double call to lookup() is going to generate
+ * an undefined symbol that is not going to be used ever.
+ */
+ sym = lookup(NS_LABEL);
+ if (sym->flags & ISDEFINED)
+ error("label '%s' already defined", yytoken);
+ sym->flags |= ISDEFINED;
+ emit(OLABEL, sym);
next();
expect(':');
stmt(lbreak, lcont, lswitch);
@@ -204,11 +197,11 @@
static void
Goto(Symbol *lbreak, Symbol *lcont, Caselist *lswitch)
{
- expect(GOTO);
-
+ setnamespace(NS_LABEL);
+ next();
if (yytoken != IDEN)
unexpected();
- emit(OJUMP, label(yytext, 0));
+ emit(OJUMP, yylval.sym);
next();
expect(';');
}
@@ -229,8 +222,8 @@
error("incorrect type in switch statement");
expect (')');
- lbreak = install(NULL, NS_LABEL);
- lcond = install(NULL, NS_LABEL);
+ lbreak = newsym(NS_LABEL);
+ lcond = newsym(NS_LABEL);
emit(OJUMP, lcond);
stmt(lbreak, lcont, &lcase);
emit(OLABEL, lcond);
@@ -263,7 +256,7 @@
pcase = xmalloc(sizeof(*pcase));
pcase->expr = np;
pcase->next = lswitch->head;
- emit(OLABEL, pcase->label = install(NULL, NS_LABEL));
+ emit(OLABEL, pcase->label = newsym(NS_LABEL));
lswitch->head = pcase;
++lswitch->nr;
}
@@ -271,7 +264,7 @@
static void
Default(Symbol *lbreak, Symbol *lcont, Caselist *lswitch)
{
- Symbol *ldefault = install(NULL, NS_LABEL);
+ Symbol *ldefault = newsym(NS_LABEL);
expect(DEFAULT);
expect(':');
@@ -285,7 +278,7 @@
Symbol *end, *lelse;
Node *np;
- lelse = install(NULL, NS_LABEL);
+ lelse = newsym(NS_LABEL);
expect(IF);
np = condition();
emit(OBRANCH, lelse);
@@ -292,7 +285,7 @@
emit(OEXPR, negate(np));
stmt(lbreak, lcont, lswitch);
if (accept(ELSE)) {
- end = install(NULL, NS_LABEL);
+ end = newsym(NS_LABEL);
emit(OJUMP, end);
emit(OLABEL, lelse);
stmt(lbreak, lcont, lswitch);
@@ -355,7 +348,8 @@
case CASE: fun = Case; break;
case DEFAULT: fun = Default; break;
default: fun = stmtexp; break;
- case TYPEIDEN: case IDEN:
+ case TYPEIDEN:
+ case IDEN:
fun = (ahead() == ':') ? Label : stmtexp;
break;
case '@':
--- a/cc1/symbol.c
+++ b/cc1/symbol.c
@@ -69,7 +69,7 @@
sym->id = (curctx) ? ++localcnt : ++globalcnt;
sym->ctx = curctx;
sym->token = IDEN;
- sym->flags = 0;
+ sym->flags = ISDEFINED;
sym->name = NULL;
sym->type = NULL;
sym->hash = NULL;
@@ -79,32 +79,58 @@
}
Symbol *
-lookup(char *s, uint8_t ns)
+lookup(uint8_t ns)
{
- Symbol *sym;
+ Symbol *sym, **h;
+ uint8_t sns;
+ char *t, c;
- for (sym = htab[hash(s)]; sym; sym = sym->hash) {
- if (!strcmp(sym->name, s) && sym->ns == ns)
+ h = &htab[hash(yytext)];
+ c = *yytext;
+ for (sym = *h; sym; sym = sym->hash) {
+ t = sym->name;
+ if (*t != c || strcmp(t, yytext))
+ continue;
+ sns = sym->ns;
+ if (sns == NS_KEYWORD || sns == NS_CPP)
return sym;
+ if (sns != ns)
+ continue;
+ return sym;
}
- return NULL;
+ sym = newsym(ns);
+ sym->name = xstrdup(yytext);
+ sym->flags &= ~ISDEFINED;
+ sym->hash = *h;
+ *h = sym;
+ return sym;
}
Symbol *
-install(char *s, uint8_t ns)
+install(uint8_t ns)
{
- Symbol *sym, **t;
+ Symbol *sym, **h;
+ /*
+ * install() is always called after a call to lookup(), so
+ * yylval.sym always points to a symbol with yytext name.
+ * if the symbol is an undefined symbol and in the same
+ * context, then it was generated in the previous lookup()
+ * call. If the symbol is defined and in the same context
+ * then there is a redefinition
+ */
+ if (yylval.sym->ctx == curctx) {
+ if (yylval.sym->flags & ISDEFINED)
+ return NULL;
+ yylval.sym->flags |= ISDEFINED;
+ return yylval.sym;
+ }
+ h = &htab[hash(yytext)];
sym = newsym(ns);
- sym->flags |= ISDEFINED;
-
- if (s) {
- sym->name = xstrdup(s);
- t = &htab[hash(s)];
- sym->hash = *t;
- *t = sym;
- }
+ sym->name = xstrdup(yytext);
+ sym->hash = *h;
+ *h = sym;
return sym;
}
@@ -154,7 +180,8 @@
Symbol *sym;
for (bp = buff; bp->str; ++bp) {
- sym = install(bp->str, NS_IDEN);
+ strcpy(yytext, bp->str);
+ sym = lookup(NS_KEYWORD);
sym->token = bp->token;
sym->u.token = bp->value;
}