ref: 3f1b9619b57cbd1e479f281c86f8ed3fabe2b024
dir: /cc1/lex.c/
#include <assert.h>
#include <inttypes.h>
#include <setjmp.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "../inc/sizes.h"
#include "../inc/cc.h"
#include "cc1.h"
#define INPUTSIZ 120
typedef struct input Input;
struct input {
char *fname;
unsigned short nline;
int cnt;
FILE *fp;
char *line, *ptr;
struct input *next;
};
#define nextchar() ((--input->cnt >= 0) ? \
(unsigned char) *input->ptr++ : readline())
uint8_t lex_ns = NS_IDEN;
uint8_t yytoken;
struct yystype yylval;
char yytext[IDENTSIZ + 1];
static uint8_t safe, comment, commentline;
static Input *input;
bool
addinput(char *fname)
{
Input *ip;
FILE *fp;
unsigned short nline = 1;
if (fname) {
if ((fp = fopen(fname, "r")) == NULL)
return 0;
fname = xstrdup(fname);
} else if (!input) {
fp = stdin;
fname = "(stdin)";
} else {
fname = input->fname;
nline = input->nline;
fp = NULL;
}
ip = xmalloc(sizeof(Input));
ip->fname = fname;
ip->next = input;
ip->line = NULL;
ip->cnt = 0;
ip->nline = nline;
ip->fp = fp;
input = ip;
return 1;
}
void
delinput(void)
{
Input *ip = input;
FILE *fp = ip->fp;
if (fp) {
if (fclose(fp))
die("error reading from input file '%s'", ip->fname);
if (ip->fp != stdin)
free(ip->fname);
}
input = ip->next;
free(ip->line);
free(ip);
}
char *
filename(void)
{
return input->fname;
}
unsigned short
fileline(void)
{
return input->nline;
}
static void
newline(void)
{
if (++input->nline == 0)
die("input file '%s' too long", input->fname);
}
/* TODO: preprocessor error must not rise recover */
static void
preprocessor(void)
{
char str[IDENTSIZ+1], *p, *q;
unsigned short cnt, n;
Symbol *sym;
p = input->ptr;
q = &p[input->cnt-1];
while (q > p && isspace(*q))
++q;
while (isspace(*p))
++p;
for (q = p; isalpha(*q); ++q)
/* nothing */;
if ((n = q - p) > IDENTSIZ)
goto bad_directive;
strncpy(str, p, n);
str[n] = '\0';
/* discard this line for the lexer */
input->cnt = 0;
if ((sym = lookup(str, NS_CPP)) == NULL)
goto bad_directive;
(*sym->u.fun)(q);
return;
bad_directive:
error("incorrect preprocessor directive");
}
void
include(char *s)
{
char fname[FILENAME_MAX], delim, c, *p;
size_t len;
while (isspace(*s))
++s;
if ((c = *s++) == '>')
delim = '>';
else if (c == '"')
delim = '"';
else
goto bad_include;
for (p = s; (c = *p) && c != delim; ++p)
/* nothing */;
if (c == '\0')
goto bad_include;
len = p - s;
if (delim == '"') {
if (len >= FILENAME_MAX)
goto too_long;
strncpy(fname, s, len);
fname[len] = '\0';
if (!addinput(fname))
goto not_found;
return;
}
abort();
return;
not_found:
error("included file '%s' not found", fname);
too_long:
error("file name in include too long");
bad_include:
error("#include expects \"FILENAME\" or <FILENAME>");
}
void
define(char *str)
{
}
void
undef(char *str)
{
fprintf(stderr, "Esto en un undef\n");
}
void
ifdef(char *str)
{
fprintf(stderr, "Esto en un ifdef\n");
}
void
ifndef(char *str)
{
fprintf(stderr, "Esto en un ifndef\n");
}
void
endif(char *str)
{
fprintf(stderr, "Esto en un endif\n");
}
static int
readchar(void)
{
int c;
FILE *fp = input->fp;
repeat:
if ((c = getc(fp)) == '\\') {
if ((c = getc(fp)) == '\n')
goto repeat;
ungetc(c, fp);
c = '\\';
}
return c;
}
static int
readline(void)
{
char *bp, *ptr;
uint8_t n;
int c;
FILE *fp;
repeat:
if (!input)
return EOF;
fp = input->fp;
if (!input->line)
input->line = xmalloc(INPUTSIZ);
bp = ptr = input->ptr = input->line;
while ((c = getc(fp)) != EOF && isspace(c)) {
if (c == '\n')
newline();
}
if (c == EOF) {
delinput();
goto repeat;
}
ungetc(c, fp);
for (;;) {
c = readchar();
nextchar:
if (c == EOF)
break;
if (comment) {
if (c != '*')
continue;
if ((c = readchar()) != '/')
goto nextchar;
comment = 0;
c = ' ';
} else if (commentline) {
if (c != '\n')
continue;
commentline = 0;
c = ' ';
}
if (c == '\n')
break;
if (bp == &ptr[INPUTSIZ-1])
die("line %d too big in file '%s'",
input->nline, input->fname);
if (c == '/') {
if ((c = readchar()) == '*') {
comment = 1;
continue;
} else if (c == '/') {
commentline = 1;
continue;
}
*bp++ = '/';
goto nextchar;
}
*bp++ = c;
}
*bp = ' ';
input->cnt = bp - ptr;
if ((c = *input->ptr++) == '#') {
*bp = '\0';
preprocessor();
goto repeat;
}
return c;
}
static int
backchar(int c)
{
if (!input) {
assert(c == EOF);
return c;
}
++input->cnt;
return *--input->ptr = c;
}
static uint8_t
integer(char *s, char base)
{
static Type *tp;
static Symbol *sym;
static char ch, size, sign;
static long v;
size = sign = 0;
type:
switch (ch = toupper(nextchar())) {
case 'L':
if (size == LLONG)
goto wrong_type;
size = (size == LONG) ? LLONG : LONG;
goto type;
case 'U':
if (sign == UNSIGNED)
goto wrong_type;
goto type;
default:
backchar(ch);
tp = ctype(INT, sign, size);
break;
wrong_type:
error("invalid suffix in integer constant");
}
sym = install("", NS_IDEN);
sym->type = tp;
v = strtol(yytext, NULL, base);
if (tp == inttype)
sym->u.i = v;
yylval.sym = sym;
return CONSTANT;
}
static char *
digits(uint8_t base)
{
char ch, *bp;
for (bp = yytext ; bp < &yytext[IDENTSIZ]; *bp++ = ch) {
ch = nextchar();
switch (base) {
case 8:
if (ch >= '7')
goto end;
/* passthru */
case 10:
if (!isdigit(ch))
goto end;
break;
case 16:
if (!isxdigit(ch))
goto end;
break;
}
}
end:
if (bp == &yytext[IDENTSIZ])
error("number too long %s", yytext);
*bp = '\0';
backchar(ch);
return yytext;
}
static uint8_t
number(void)
{
int ch;
static char base;
if ((ch = nextchar()) == '0') {
if (toupper(ch = nextchar()) == 'X') {
base = 16;
} else {
base = 8;
backchar(ch);
}
} else {
base = 10;
backchar(ch);
}
return integer(digits(base), base);
}
static char *
escape(char *s)
{
uint8_t base;
int c;
repeat:
switch (nextchar()) {
case '\\': c = '\''; break;
case 'a': c = '\a'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'v': c = '\v'; break;
case '\'': c = '\\'; break;
case '"': c ='"'; break;
case '?': c = '?'; break;
case 'u': /* TODO: */
case 'x':
base = 16;
goto number;
case '0':
base = 8;
number:
if ((c = atoi(digits(base))) > 255)
warn("character constant out of range");
break;
case '\n':
newline();
if ((c = nextchar()) == '\\')
goto repeat;
break;
default:
warn("unknown escape sequence");
return s;
}
*s = c;
return ++s;
}
static uint8_t
character(void)
{
static char c;
Symbol *sym;
nextchar(); /* discard the initial ' */
c = nextchar();
if (c == '\\')
escape(&c);
if (nextchar() != '\'')
error("invalid character constant");
sym = install("", NS_IDEN);
sym->u.i = c;
sym->type = inttype;
yylval.sym = sym;
return CONSTANT;
}
static uint8_t
string(void)
{
static char buf[STRINGSIZ+1];
char *bp;
int c;
static Symbol *sym;
nextchar(); /* discard the initial " */
for (bp = buf; bp < &buf[STRINGSIZ]; ) {
switch (c = nextchar()) {
case EOF:
error("found EOF while parsing");
case '"':
goto end_string;
case '\\':
bp = escape(bp);
break;
default:
*bp++ = c;
}
}
end_string:
if (bp == &buf[IDENTSIZ])
error("string too long");
*bp = '\0';
sym = install("", NS_IDEN);
sym->u.s = xstrdup(buf);
sym->type = mktype(chartype, ARY, (bp - buf) + 1, NULL);
yylval.sym = sym;
return CONSTANT;
}
static uint8_t
iden(void)
{
char *bp;
int c;
Symbol *sym;
for (bp = yytext; bp < &yytext[IDENTSIZ]; *bp++ = c) {
if (!isalnum(c = nextchar()) && c != '_')
break;
}
if (bp == &yytext[IDENTSIZ])
error("identifier too long %s", yytext);
*bp = '\0';
backchar(c);
sym = yylval.sym = lookup(yytext, lex_ns);
if (!sym || sym->token == IDEN)
return IDEN;
yylval.token = sym->u.token;
return sym->token;
}
static uint8_t
follow(int expect, int ifyes, int ifno)
{
int c = nextchar();
if (c == expect) {
yytext[1] = c;
yytext[2] = 0;
return ifyes;
}
backchar(c);
return ifno;
}
static uint8_t
minus(void)
{
int c = nextchar();
yytext[1] = c;
yytext[2] = '\0';
switch (c) {
case '-': return DEC;
case '>': return INDIR;
case '=': return SUB_EQ;
default:
yytext[1] = '\0';
backchar(c);
return '-';
}
}
static uint8_t
plus(void)
{
int c = nextchar();
yytext[1] = c;
yytext[2] = '\0';
switch (c) {
case '+': return INC;
case '=': return ADD_EQ;
default:
yytext[1] = '\0';
backchar(c);
return '+';
}
}
static uint8_t
relational(uint8_t op, uint8_t equal, uint8_t shift, uint8_t assig)
{
int c = nextchar();
yytext[1] = c;
yytext[2] = '\0';
if (c == '=')
return equal;
if (c == op)
return follow('=', assig, shift);
backchar(c);
yytext[1] = '\0';
return op;
}
static uint8_t
logic(uint8_t op, uint8_t equal, uint8_t logic)
{
int c = nextchar();
yytext[1] = c;
yytext[2] = '\0';
if (c == '=')
return equal;
if (c == op)
return logic;
backchar(c);
yytext[1] = '\0';
return op;
}
static uint8_t
dot(void)
{
int c;
if ((c = nextchar()) != '.') {
backchar(c);
return '.';
} else if ((c = nextchar()) != '.') {
error("incorrect token '%s'", yytext);
} else {
yytext[2] = yytext[1] = '.';
yytext[3] = '\0';
return ELLIPSIS;
}
}
static uint8_t
operator(void)
{
uint8_t c = nextchar();
yytext[0] = c;
yytext[1] = '\0';
switch (c) {
case '<': return relational('<', LE, SHL, SHL_EQ);
case '>': return relational('>', GE, SHR, SHR_EQ);
case '&': return logic('&', AND_EQ, AND);
case '|': return logic('|', OR_EQ, OR);
case '=': return follow('=', EQ, '=');
case '^': return follow('=', XOR_EQ, '^');
case '*': return follow('=', MUL_EQ, '*');
case '/': return follow('=', DIV_EQ, '/');
case '!': return follow('=', NE, '!');
case '-': return minus();
case '+': return plus();
case '.': return dot();
default: return c;
}
}
static int
skipspaces(void)
{
int c;
while (isspace(c = nextchar())) {
if (c == '\n')
newline();
}
return c;
}
uint8_t
next(void)
{
int c;
backchar(c = skipspaces());
if (isalpha(c) || c == '_') {
yytoken = iden();
} else if (isdigit(c)) {
yytoken = number();
} else if (c == '"') {
yytoken = string();
} else if (c == '\'') {
yytoken = character();
} else if (c == EOF) {
strcpy(yytext, "EOF");
yytoken = EOFTOK;
} else {
yytoken = operator();
}
return yytoken;
}
void
expect(uint8_t tok)
{
if (yytoken != tok) {
if (isgraph(tok))
softerror("expected '%c' before '%s'", tok, yytext);
else
softerror("unexpected '%s'", yytext);
} else {
next();
}
}
uint8_t
ahead(void)
{
int c;
backchar(c = skipspaces());
return c;
}
void
setsafe(uint8_t type)
{
safe = type;
}
void
discard(void)
{
extern jmp_buf recover;
int c;
c = yytoken;
do {
switch (safe) {
case END_COMP:
if (c == '}')
goto jump;
goto semicolon;
case END_COND:
if (c == ')')
goto jump;
break;
case END_LDECL:
if (c == ',')
goto jump;
case END_DECL:
semicolon:
if (c == ';')
goto jump;
break;
}
} while ((c = nextchar()) != EOF);
c = EOFTOK;
jump:
yytoken = c;
longjmp(recover, 1);
}