ref: 1ffd5f6ee169b5d76958348b3376d82a66a75d63
dir: /lex.c/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "cc.h"
#include "symbol.h"
#include "tokens.h"
#define NR_KWD_HASH 32
static struct keyword {
char *str;
unsigned char tok;
struct keyword *next;
} keywords [] = {"auto", AUTO, NULL,
"break", BREAK, NULL,
"_Bool", CHAR, NULL,
"case", CASE, NULL,
"char", CHAR, NULL,
"const", CONST, NULL,
"continue", CONTINUE, NULL,
"default", DEFAULT, NULL,
"do", DO, NULL,
"double", DOUBLE, NULL,
"else", ELSE, NULL,
"enum", ENUM, NULL,
"extern", EXTERN, NULL,
"float", FLOAT, NULL,
"for", FOR, NULL,
"goto", GOTO, NULL,
"if", IF, NULL,
"int", INT, NULL,
"long", LONG, NULL,
"register", REGISTER, NULL,
"restricted", RESTRICTED, NULL,
"return", RETURN, NULL,
"short", SHORT, NULL,
"signed", SIGNED, NULL,
"sizeof", SIZEOF, NULL,
"static", STATIC, NULL,
"struct", STRUCT, NULL,
"switch", SWITCH, NULL,
"typedef", TYPEDEF, NULL,
"union", UNION, NULL,
"unsigned", UNSIGNED, NULL,
"void", VOID, NULL,
"volatile", VOLATILE, NULL,
"while", WHILE, NULL,
NULL, 0, NULL
};
static struct keyword *khash[NR_KWD_HASH];
static FILE *yyin;
union yyval yyval;
unsigned char yytoken;
unsigned char yyhash;
char yytext[TOKSIZ_MAX + 1];
unsigned linenum;
unsigned columnum;
const char *filename;
static unsigned char hashfun(register const char *s)
{
register unsigned char h, ch;
for (h = 0; ch = *s++; h += ch)
/* nothing */;
return h;
}
void init_lex(void)
{
register struct keyword *bp;
static unsigned char h;
for (bp = keywords; bp->str; bp++) {
register struct keyword *aux, *ant;
h = hashfun(bp->str) & (NR_KWD_HASH - 1);
if (!(aux = khash[h]) || strcmp(bp->str, aux->str) < 0) {
khash[h] = bp;
bp->next = aux;
continue;
}
for (ant = aux; aux; ant = aux, aux = aux->next) {
if (strcmp(bp->str, aux->str) < 0)
break;
}
ant->next = bp;
bp->next = aux;
}
}
static char number(void)
{
register char *bp;
register char ch;
for (bp = yytext; bp < yytext + TOKSIZ_MAX; *bp++ = ch) {
if (!isdigit(ch = getc(yyin)))
break;
}
if (bp == yytext + TOKSIZ_MAX)
error("identifier too long %s", yytext);
ungetc(ch, yyin);
*bp = '\0';
return CONSTANT;
}
static unsigned char keyword(const char *s, unsigned char key)
{
register struct keyword *kwp;
key &= NR_KWD_HASH - 1;
for (kwp = khash[key]; kwp; kwp = kwp->next) {
if (!strcmp(kwp->str, yytext))
return kwp->tok;
}
return 0;
}
static unsigned char iden(void)
{
register char ch;
register char *bp = yytext;
for (yyhash = 0; bp < yytext + TOKSIZ_MAX; *bp++ = ch) {
if (!isalnum(ch = getc(yyin)) && ch != '_')
break;
yyhash += ch;
}
if (bp == yytext + TOKSIZ_MAX)
error("identifier too long %s", yytext);
*bp = '\0';
ungetc(ch, yyin);
if (ch = keyword(yytext, yyhash))
return ch;
return IDEN;;
}
static unsigned char skip(void)
{
register int c;
extern char parser_out_home;
while (isspace(c = getc(yyin))) {
if (c == '\n')
++linenum, columnum = 1;
else
++columnum;
}
if (c == EOF) {
if (parser_out_home)
error("Find EOF while parsing");
return 1;
}
ungetc(c, yyin);
return 0;
}
static unsigned char
follow(unsigned char op, unsigned char eq, unsigned char rep)
{
register char c;
if ((c = getc(yyin)) == '=')
return eq;
else if (c == op && rep)
return rep;
ungetc(c, yyin);
return op;
}
static unsigned char rel_shift(unsigned char op)
{
static char tokens[2][3] = {
{GE, LSHIFT, LSHIFT_EQ},
{LE, RSHIFT, RSHIFT_EQ}};
register char c;
register char *tp = tokens[op == '>'];
if ((c = getc(yyin)) == '=') {
return tp[0];
} else if (c == op) {
if ((c = getc(yyin)) == '=')
return tp[2];
op = tp[1];
}
ungetc(c, yyin);
return c;
}
static unsigned char minus(void)
{
register int c;
switch (c = getc(yyin)) {
case '-': return DEC;
case '>': return PTR;
case '=': return SUB_EQ;
default:
ungetc(c, yyin);
return '-';
}
}
unsigned char next(void)
{
register unsigned char c;
if (!skip())
c = EOFTOK;
if (isalpha(c = getc(yyin)) || c == '_') {
ungetc(c, yyin);
c = iden();
} else if (isdigit(c)) {
ungetc(c, yyin);
c = number();
} else {
switch (c) {
case '=': c = follow('=', EQ, 0); break;
case '^': c = follow('^', XOR_EQ, 0); break;
case '*': c = follow('*', MUL_EQ, 0); break;
case '!': c = follow('!', NE, 0); break;
case '+': c = follow('+', ADD_EQ, INC); break;
case '&': c = follow('&', AND_EQ, AND); break;
case '|': c = follow('|', OR_EQ, OR); break;
case '<': c = rel_shift('<'); break;
case '>': c = rel_shift('>'); break;
case '-': c = minus(); break;
}
}
return yytoken = c;
}
char accept(unsigned char tok)
{
if (yytoken == tok) {
next();
return 1;
}
return 0;
}
void expect(unsigned char tok)
{
if (yytoken != tok)
error("unexpected %s", yytext);
next();
}
void open_file(const char *file)
{
if (yyin != NULL)
fclose(yyin);
if (file == NULL) {
yyin = stdin;
filename = "(stdin)";
return;
}
if ((yyin = fopen(file, "r")) == NULL)
die("file '%s' not found", file);
filename = file;
columnum = linenum = 1;
}