ref: 4eef2ebea51d526b52bd074247d821bf80c12ee5
dir: /cc2/parser.c/
/* See LICENSE file for copyright and license details. */ #include <errno.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "../inc/cc.h" #include "../inc/sizes.h" #include "cc2.h" #define MAXLINE 200 #define STACKSIZ 50 extern Type int8type, int16type, int32type, int64type, uint8type, uint16type, uint32type, uint64type, float32type, float64type, float80type, booltype, ptrtype, voidtype, elipsistype; Type funtype = { .flags = FUNF }; union tokenop { void *arg; unsigned op; }; struct swtch { int nr; Node *first; Node *last; }; static struct swtch swtbl[NR_BLOCK], *swp = swtbl; static Symbol *lastfun; typedef void parsefun(char *, union tokenop); static parsefun type, symbol, getname, unary, binary, ternary, call, constant, composed, binit, einit, jump, oreturn, loop, assign, ocase, bswitch, eswitch; typedef void evalfun(void); static evalfun vardecl, beginfun, endfun, endpars, stmt, array, aggregate, flddecl, labeldcl; static struct decoc { void (*eval)(void); void (*parse)(char *token, union tokenop); union tokenop u; } optbl[] = { /* eval parse args */ ['A'] = { vardecl, symbol, .u.op = SAUTO<<8 | OAUTO}, ['R'] = { vardecl, symbol, .u.op = SREG<<8 | OREG}, ['G'] = { vardecl, symbol, .u.op = SGLOB<<8 | OMEM}, ['X'] = { vardecl, symbol, .u.op = SEXTRN<<8 | OMEM}, ['Y'] = { vardecl, symbol, .u.op = SPRIV<<8 | OMEM}, ['T'] = { vardecl, symbol, .u.op = SLOCAL<<8 | OMEM}, ['M'] = { flddecl, symbol, .u.op = SMEMB<<8 | OMEM}, ['L'] = { labeldcl, symbol, .u.op = SLABEL<<8 | OLABEL}, ['C'] = { NULL, type, .u.arg = &int8type}, ['I'] = { NULL, type, .u.arg = &int16type}, ['W'] = { NULL, type, .u.arg = &int32type}, ['Q'] = { NULL, type, .u.arg = &int64type}, ['K'] = { NULL, type, .u.arg = &uint8type}, ['N'] = { NULL, type, .u.arg = &uint16type}, ['Z'] = { NULL, type, .u.arg = &uint32type}, ['O'] = { NULL, type, .u.arg = &uint64type}, ['J'] = { NULL, type, .u.arg = &float32type}, ['D'] = { NULL, type, .u.arg = &float64type}, ['H'] = { NULL, type, .u.arg = &float80type}, ['0'] = { NULL, type, .u.arg = &voidtype}, ['B'] = { NULL, type, .u.arg = &booltype}, ['P'] = { NULL, type, .u.arg = &ptrtype}, ['E'] = { NULL, type, .u.arg = &elipsistype}, ['F'] = { NULL, type, .u.arg = &funtype}, ['V'] = { array,composed, 0}, ['U'] = {aggregate,composed, 0}, ['S'] = {aggregate,composed, 0}, ['"'] = { NULL, getname, 0}, ['{'] = { beginfun, NULL, 0}, ['}'] = { endfun, NULL, 0}, ['('] = { NULL, binit, 0}, [')'] = { NULL, einit, 0}, ['\\'] = { endpars, NULL, 0}, ['\t'] = { stmt, NULL, 0}, ['~'] = { NULL, unary, .u.op = OCPL}, ['_'] = { NULL, unary, .u.op = OSNEG}, ['\''] = { NULL, unary, .u.op = OADDR}, ['@'] = { NULL, unary, .u.op = OPTR}, ['g'] = { NULL, unary, .u.op = OCAST}, ['p'] = { NULL, unary, .u.op = OPAR}, ['n'] = { NULL, unary, .u.op = ONEG}, ['a'] = { NULL, binary, .u.op = OAND}, ['o'] = { NULL, binary, .u.op = OOR}, ['.'] = { NULL, binary, .u.op = OFIELD}, ['+'] = { NULL, binary, .u.op = OADD}, ['-'] = { NULL, binary, .u.op = OSUB}, ['*'] = { NULL, binary, .u.op = OMUL}, ['%'] = { NULL, binary, .u.op = OMOD}, ['/'] = { NULL, binary, .u.op = ODIV}, ['l'] = { NULL, binary, .u.op = OSHL}, ['r'] = { NULL, binary, .u.op = OSHR}, ['<'] = { NULL, binary, .u.op = OLT}, ['>'] = { NULL, binary, .u.op = OGT}, ['['] = { NULL, binary, .u.op = OLE}, [']'] = { NULL, binary, .u.op = OGE}, ['='] = { NULL, binary, .u.op = OEQ}, ['!'] = { NULL, binary, .u.op = ONE}, ['&'] = { NULL, binary, .u.op = OBAND}, ['|'] = { NULL, binary, .u.op = OBOR}, ['^'] = { NULL, binary, .u.op = OBXOR}, [','] = { NULL, binary, .u.op = OCOMMA}, [':'] = { NULL, assign, .u.op = OASSIG}, ['?'] = { NULL, ternary, .u.op = OASK}, ['c'] = { NULL, call, .u.op = OCALL}, ['#'] = { NULL,constant, .u.op = OCONST}, ['j'] = { NULL, jump, .u.op = OJMP}, ['y'] = { NULL, jump, .u.op = OBRANCH}, ['h'] = { NULL, oreturn, .u.op = ORET}, ['i'] = { NULL, NULL, .u.op = OINC}, ['d'] = { NULL, NULL, .u.op = ODEC}, ['b'] = { NULL, loop, .u.op = OBLOOP}, ['e'] = { NULL, loop, .u.op = OELOOP}, ['v'] = { NULL, ocase, .u.op = OCASE}, ['f'] = { NULL, ocase, .u.op = ODEFAULT}, ['t'] = { NULL, eswitch, .u.op = OESWITCH}, ['s'] = { NULL, bswitch, .u.op = OBSWITCH}, }; static int sclass, inpars, ininit, endf, lineno; static void *stack[STACKSIZ], **sp = stack; static Node * push(void *elem) { if (sp == stack[STACKSIZ]) error(ESTACKO); return *sp++ = elem; } static void * pop(void) { if (sp == stack) error(ESTACKU); return *--sp; } static int empty(void) { return sp == stack; } static void type(char *token, union tokenop u) { push(u.arg); } static void composed(char *token, union tokenop u) { Symbol *sym; sym = getsym(atoi(token+1)); push(&sym->type); } static void getname(char *t, union tokenop u) { push((*++t) ? xstrdup(t) : NULL); } static void symbol(char *token, union tokenop u) { Node *np = newnode(u.op & 0xFF); Symbol *sym = getsym(atoi(token+1)); sclass = u.op >> 8; np->u.sym = sym; np->type = sym->type; push(np); } static Type * gettype(char *token) { struct decoc *dp; dp = &optbl[*token]; if (!dp->parse) error(ESYNTAX); (*dp->parse)(token, dp->u); return pop(); } static void constant(char *token, union tokenop u) { static char letters[] = "0123456789ABCDEF"; Node *np; TUINT v; unsigned c; ++token; if (*token == '"') { ++token; np = newnode(OSTRING); np->type.flags = STRF; np->type.size = strlen(token); np->type.align = int8type.align; np->u.s = xstrdup(token); } else { np = newnode(OCONST); np->type = *gettype(token++); for (v = 0; c = *token++; v += c) { v <<= 4; c = strchr(letters, c) - letters; } np->u.i = v; } push(np); } static void assign(char *token, union tokenop u) { int subop; Node *np = newnode(u.op); switch (subop = *++token) { case '/': case '%': case '+': case '-': case 'l': case 'r': case '&': case '|': case '^': case 'i': case 'd': ++token; subop = optbl[subop].u.op; break; default: subop = 0; break; } np->u.subop = subop; np->type = *gettype(token); np->right = pop(); np->left = pop(); push(np); } static void ternary(char *token, union tokenop u) { Node *ask = newnode(OASK), *colon = newnode(OCOLON); Type *tp = gettype(token+1); colon->right = pop(); colon->left = pop(); ask->type = *tp; ask->left = pop(); ask->right = colon; push(ask); } static void eval(char *tok) { struct decoc *dp; do { dp = &optbl[*tok]; if (!dp->parse) break; (*dp->parse)(tok, dp->u); } while (tok = strtok(NULL, "\t\n")); } static int nextline(void) { char line[MAXLINE]; size_t len; int c; void (*fun)(void); repeat: ++lineno; if (!fgets(line, sizeof(line), stdin)) return 0; if ((len = strlen(line)) == 0 || line[0] == '\n') goto repeat; if (line[len-1] != '\n') error(len < sizeof(line)-1 ? ELNBLNE : ELNLINE); line[len-1] = '\0'; c = *line; eval(strtok(line, "\t\n")); if ((fun = *optbl[c].eval) != NULL) (*fun)(); if (sp != stack) error(ESTACKA); return 1; } static void oreturn(char *token, union tokenop u) { Node *np = newnode(u.op); if (token = strtok(NULL, "\t\n")) eval(token); if (!empty()) np->left = pop(); push(np); } /* * Move np (which is a OCASE/ODEFAULT/OESWITCH) to be contigous with * the last switch table. It is a bit ugly to touch directly curstmt * here, but moving this function to node.c is worse, because we are * putting knowledge of how the text is parsed into the node * represtation module. */ static void waft(Node *np) { Node *lastcase, *next;; struct swtch *cur; extern Node *curstmt; if (swp == swtbl) error(EWTACKU); cur = swp - 1; lastcase = cur->last; next = lastcase->next; np->next = next; np->prev = lastcase; if (next) next->prev = np; lastcase->next = np; if (curstmt == cur->last) curstmt = np; cur->last = np; cur->nr++; } static void bswitch(char *token, union tokenop u) { struct swtch *cur; Node *np = newnode(u.op); if (swp == &swtbl[NR_BLOCK+1]) error(EWTACKO); cur = swp++; cur->nr = 0; eval(strtok(NULL, "\t\n")); np->left = pop(); push(cur->first = cur->last = np); } static void eswitch(char *token, union tokenop u) { struct swtch *cur; if (swp == swtbl) error(EWTACKU); jump(token, u); waft(pop()); cur = --swp; cur->first->u.i = cur->nr; } static void ocase(char *token, union tokenop u) { jump(token, u); waft(pop()); } static void jump(char *token, union tokenop u) { Node *aux, *np = newnode(u.op); eval(strtok(NULL, "\t\n")); if (u.op == OBRANCH || u.op == OCASE) np->left = pop(); aux = pop(); np->u.sym = aux->u.sym; delnode(aux); push(np); } static void loop(char *token, union tokenop u) { push(newnode(u.op)); } static void unary(char *token, union tokenop u) { Node *np = newnode(u.op); np->type = *gettype(token+1); np->left = pop(); np->right = NULL; push(np); } static void call(char *token, union tokenop u) { Node *np, *par, *fun = newnode(u.op); for (par = NULL;; par = np) { np = pop(); if (np->op != OPAR) break; np->right = par; } fun->type = *gettype(token+1); fun->left = np; fun->right = par; push(fun); } static void binary(char *token, union tokenop u) { Node *np = newnode(u.op); np->type = *gettype(token+1); np->right = pop(); np->left = pop(); push(np); } static void binit(char *token, union tokenop u) { ininit = 1; } static void einit(char *token, union tokenop u) { ininit = 0; endinit(); } static void endpars(void) { if (!curfun || !inpars) error(ESYNTAX); inpars = 0; } static void aggregate(void) { Node *align, *size; char *name; Type *tp; Symbol *sym; align = pop(); size = pop(); name = pop(); tp = pop(); tp->size = size->u.i; tp->align = align->u.i; tp->flags = AGGRF; /* * type is the first field of Symbol so we can obtain the * address of the symbol from the address of the type. * We have to do this because composed returns the pointer * to the type, but in this function we also need the * symbol to store the name. */ sym = (Symbol *) tp; sym->name = name; delnode(align); delnode(size); } static void array(void) { Type *tp, *base; Node *size; size = pop(); base = pop(); tp = pop(); tp->size = size->u.i * base->size; /* FIXME check for overflow */ tp->align = base->align; delnode(size); } static void decl(Symbol *sym) { Type *tp = &sym->type; if (tp->flags & FUNF) { lastfun = sym; } else { switch (sym->kind) { case SEXTRN: case SGLOB: case SPRIV: case SLOCAL: defglobal(sym); break; case SAUTO: case SREG: if (!curfun) error(ESYNTAX); ((inpars) ? defpar : defvar)(sym); break; default: abort(); } } } static void vardecl(void) { Type *tp, *rp; Node *np; Symbol *sym; char *name; name = pop(); tp = pop(); if (tp->flags & FUNF) rp = pop(); np = pop(); sym = np->u.sym; /* * We have to free sym->name because in tentative declarations * we can have multiple declarations of the same symbol, and in * this case our parser will allocate twice the memory */ free(sym->name); sym->name = name; sym->type = *tp; if (tp->flags & FUNF) sym->rtype = *rp; sym->kind = sclass; if (ininit) sym->type.flags |= INITF; decl(sym); delnode(np); } static void flddecl(void) { Node *off, *np; char *name; Type *tp; Symbol *sym; off = pop(); name = pop(); tp = pop(); np = pop(); sym = np->u.sym; sym->u.off = off->u.i; sym->name = name; sym->type = *tp; delnode(np); delnode(off); } static void labeldcl(void) { Node *np; Symbol *sym; np = pop(); np->op = ONOP; sym = np->u.sym; sym->kind = SLABEL; sym->u.stmt = np; np->label = sym; addstmt(np, SETCUR); } static void stmt(void) { Node *np; if (empty()) return; np = pop(); if (ininit) { data(np); deltree(np); return; } addstmt(np, SETCUR); } static void beginfun(void) { curfun = lastfun; inpars = 1; pushctx(); addstmt(newnode(OBFUN), SETCUR); } static void endfun(void) { endf = 1; addstmt(newnode(OEFUN), SETCUR); } void parse(void) { cleannodes(); /* remove code of previous function */ popctx(); /* remove context of previous function */ curfun = NULL; endf = 0; while (!endf && nextline()) /* nothing */; if (ferror(stdin)) error(EFERROR, strerror(errno)); }