ref: 1c9aa7e9dfda3ae749639a390e08515a71a01842
parent: 40c421cfdf64efb9b3bbfa0cd14a847193511480
author: Roberto E. Vargas Caballero <k0ga@shike2.com>
date: Wed Jan 18 06:16:06 EST 2017
[cc1] Rewrite the input system The input system was broken for a lot of different reasons. Some of them were due to the fact that the responsability of the different functions was not clear, and the kind of input wasn't explicit. This new version tries to split clearly the responsability of every function (for example spaces are skiped only in skipspaces now).
--- a/cc1/cc1.h
+++ b/cc1/cc1.h
@@ -400,7 +400,6 @@
/* lex.c */
extern char ahead(void);
extern unsigned next(void);
-extern int moreinput(void);
extern void expect(unsigned tok);
extern void discard(void);
extern int addinput(char *fname, Symbol *hide, char *buffer);
@@ -451,7 +450,7 @@
extern char yytext[];
extern unsigned yytoken;
extern unsigned short yylen;
-extern int cppoff, disexpand;
+extern int disexpand;
extern unsigned cppctx;
extern Input *input;
extern int lexmode, namespace, onlycpp;
--- a/cc1/code.c
+++ b/cc1/code.c
@@ -159,7 +159,7 @@
{
extern int failure;
- if (failure)
+ if (failure || onlycpp)
return;
(*opcode[op])(op, arg);
}
--- a/cc1/cpp.c
+++ b/cc1/cpp.c
@@ -16,7 +16,7 @@
static unsigned ncmdlines;
static Symbol *symline, *symfile;
static unsigned char ifstatus[NR_COND];
-static int ninclude;
+static int ninclude, cppoff;
static char **dirinclude;
unsigned cppctx;
@@ -25,7 +25,7 @@
void
defdefine(char *macro, char *val, char *source)
{
- char *def, *fmt = "#define %s %s";
+ char *def, *fmt = "#define %s %s\n";
Symbol dummy = {.flags = SDECLARED};
if (!val)
@@ -345,7 +345,7 @@
cpperror("'#' is not followed by a macro parameter");
return 0;
}
- if (yytoken == EOFTOK)
+ if (yytoken == '\n')
break;
if ((len = strlen(yytext)) >= bufsiz) {
@@ -449,7 +449,7 @@
static void
include(void)
{
- char *file, *p, **bp;
+ char file[FILENAME_MAX], *p, **bp;
size_t filelen;
static char *sysinclude[] = {
PREFIX "/include/scc/" ARCH "/",
@@ -467,19 +467,31 @@
switch (*yytext) {
case '<':
- if ((p = strchr(input->begin, '>')) == NULL || p == yytext + 1)
+ if ((p = strchr(input->begin, '>')) == NULL || p[-1] == '<')
goto bad_include;
- *p = '\0';
- file = input->begin;
- filelen = strlen(file);
+ filelen = p - input->begin;
+ if (filelen >= FILENAME_MAX)
+ goto too_long;
+ memcpy(file, input->begin, filelen);
+ file[filelen] = '\0';
+
input->begin = input->p = p+1;
+ if (next() != '\n')
+ goto trailing_characters;
+
break;
case '"':
- if ((p = strchr(yytext + 1, '"')) == NULL || p == yytext + 1)
+ if (yylen < 3)
goto bad_include;
- *p = '\0';
- file = yytext+1;
- filelen = strlen(file);
+ filelen = yylen-2;
+ if (filelen >= FILENAME_MAX)
+ goto too_long;
+ memcpy(file, yytext+1, filelen);
+ file[filelen] = '\0';
+
+ if (next() != '\n')
+ goto trailing_characters;
+
if (includefile(NULL, file, filelen))
goto its_done;
break;
@@ -499,9 +511,16 @@
cpperror("included file '%s' not found", file);
its_done:
- next();
return;
+trailing_characters:
+ cpperror("trailing characters after preprocessor directive");
+ return;
+
+too_long:
+ cpperror("too long file name in #include");
+ return;
+
bad_include:
cpperror("#include expects \"FILENAME\" or <FILENAME>");
return;
@@ -709,11 +728,15 @@
{0, NULL}
};
int ns;
+ char *p;
- if (*input->p != '#')
- return 0;
- ++input->p;
+ for (p = input->p; isspace(*p); ++p)
+ /* nothing */;
+ if (*p != '#')
+ return cppoff;
+ input->p = p+1;
+
disexpand = 1;
lexmode = CPPMODE;
ns = namespace;
@@ -724,15 +747,23 @@
for (bp = clauses; bp->token && bp->token != yytoken; ++bp)
/* nothing */;
if (!bp->token) {
- errorp("incorrect preprocessor directive");
+ errorp("incorrect preprocessor directive '%s'", yytext);
goto error;
}
+ DBG("CPP %s", yytext);
+
pushctx(); /* create a new context to avoid polish */
(*bp->fun)(); /* the current context, and to get all */
popctx(); /* the symbols freed at the end */
- if (yytoken != EOFTOK && !cppoff)
+ /*
+ * #include changes the content of input->line, so the correctness
+ * of the line must be checked in the own include(), and we have
+ * to skip this tests. For the same reason include() is the only
+ * function which does not prepare the next token
+ */
+ if (yytoken != '\n' && !cppoff && bp->token != INCLUDE)
errorp("trailing characters after preprocessor directive");
error:
--- a/cc1/lex.c
+++ b/cc1/lex.c
@@ -1,5 +1,6 @@
/* See LICENSE file for copyright and license details. */
static char sccsid[] = "@(#) ./cc1/lex.c";
+#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
@@ -16,11 +17,10 @@
struct yystype yylval;
char yytext[STRINGSIZ+3];
unsigned short yylen;
-int cppoff;
int lexmode = CCMODE;
int namespace = NS_IDEN;
-static int safe, eof;
+static int safe;
Input *input;
void
@@ -83,7 +83,7 @@
if (hide->hide == UCHAR_MAX)
die("Too many macro expansions");
++hide->hide;
- flags = IMACRO|IEOF;
+ flags = IMACRO;
} else if (fname) {
/* a new file */
if ((fp = fopen(fname, "r")) == NULL)
@@ -126,24 +126,12 @@
if (fclose(ip->fp))
die("error: failed to read from input file '%s'",
ip->fname);
- if (!ip->next)
- eof = 1;
break;
case IMACRO:
+ assert(hide->hide == 1);
--hide->hide;
- /*
- * If the symbol is not declared then it was
- * an expansion due to a #if directive with
- * a non declared symbol (expanded to 0),
- * thus we have to kill the symbol
- * TODO: review this comment and code
- */
- if ((hide->flags & SDECLARED) == 0)
- killsym(hide);
break;
}
- if (eof)
- return;
input = ip->next;
free(ip->fname);
free(ip->line);
@@ -156,6 +144,10 @@
die("error: input file '%s' too long", input->fname);
}
+/*
+ * Read the next character from the input file, counting number of lines
+ * and joining lines escaped with \
+ */
static int
readchar(void)
{
@@ -162,13 +154,8 @@
FILE *fp = input->fp;
int c;
- if (eof || !fp)
- return 0;
repeat:
switch (c = getc(fp)) {
- case EOF:
- c = '\0';
- break;
case '\\':
if ((c = getc(fp)) == '\n') {
newline();
@@ -185,85 +172,111 @@
return c;
}
+/*
+ * discard a C comment. This function is only called from readline
+ * because it is impossible to have a comment in a macro, because
+ * comments are always discarded before processing any cpp directive
+ */
static void
comment(int type)
{
int c;
- c = -1;
repeat:
- do {
- if (!c || eof) {
- errorp("unterminated comment");
- return;
- }
- } while ((c = readchar()) != type);
+ while ((c = readchar()) != EOF && c != type)
+ /* nothing */;
+ if (c == EOF) {
+ errorp("unterminated comment");
+ return;
+ }
+
if (type == '*' && (c = readchar()) != '/')
goto repeat;
}
+/*
+ * readline is used to read a full logic line from a file.
+ * It discards comments and check that the line fits in
+ * the input buffer
+ */
static int
readline(void)
{
char *bp, *lim;
- char c, peekc = 0;
+ int c, peekc = 0;
-repeat:
-
- if (eof)
+ if (feof(input->fp)) {
+ input->flags |= IEOF;
return 0;
- if (!input->fp) {
- delinput();
- return 1;
}
- if (feof(input->fp)) {
- delinput();
- goto repeat;
- }
*input->line = '\0';
- input->begin = input->p = input->line;
lim = &input->line[INPUTSIZ-1];
- for (bp = input->line; bp < lim; *bp++ = c) {
+ for (bp = input->line; bp < lim-1; *bp++ = c) {
c = (peekc) ? peekc : readchar();
peekc = 0;
- if (c == '\n' || c == '\0')
+ if (c == '\n' || c == EOF)
break;
- if (c != '/' || (peekc = readchar()) != '*' && peekc != '/')
+ if (c != '/')
continue;
- comment((peekc == '/') ? '\n' : peekc);
+
+ /* check for /* or // */
+ peekc = readchar();
+ if (peekc != '*' && peekc != '/')
+ continue;
+ comment((peekc == '/') ? '\n' : '/');
peekc = 0;
c = ' ';
}
- if (bp == lim)
- error("line too long");
+ input->begin = input->p = input->line;
+ if (bp == lim-1) {
+ errorp("line too long");
+ --bp;
+ }
+ *bp++ = '\n';
*bp = '\0';
+
return 1;
}
-int
+/*
+ * moreinput gets more bytes to be passed to the lexer.
+ * It can take more bytes from macro expansions or
+ * directly reading from files. When a cpp directive
+ * is processed the line is discarded because it must not
+ * be passed to the lexer
+ */
+static int
moreinput(void)
{
- static char file[FILENAME_MAX];
- static unsigned nline;
- char *s;
- int wasexpand;
+ int wasexpand = 0;
repeat:
- wasexpand = input->hide != NULL;
- if (!readline())
+ if (!input)
return 0;
- while (isspace(*input->p))
- ++input->p;
- input->begin = input->p;
- if (*input->p == '\0' || cpp() || cppoff) {
- *input->begin = '\0';
- goto repeat;
+
+ if (*input->p == '\0') {
+ if ((input->flags&ITYPE) == IMACRO) {
+ wasexpand = 1;
+ input->flags |= IEOF;
+ }
+ if (input->flags & IEOF) {
+ delinput();
+ goto repeat;
+ }
+ if (!readline() || cpp()) {
+ *input->p = '\0';
+ goto repeat;
+ }
}
if (onlycpp && !wasexpand) {
+ static char file[FILENAME_MAX];
+ static unsigned nline;
+ char *s;
+
putchar('\n');
if (strcmp(file, input->fname)) {
strcpy(file, input->fname);
@@ -276,7 +289,6 @@
nline = input->nline;
printf(s, nline, file);
}
- input->begin = input->p;
return 1;
}
@@ -483,7 +495,7 @@
c = *input->p;
++input->p;
if (*input->p != '\'')
- error("invalid character constant");
+ errorp("invalid character constant");
else
++input->p;
@@ -643,47 +655,50 @@
/* TODO: Ensure that namespace is NS_IDEN after a recovery */
-static void
+/*
+ * skip all the spaces until the next token. When we are in
+ * CPPMODE \n is not considered a whitespace
+ */
+static int
skipspaces(void)
{
-repeat:
- while (isspace(*input->p))
- ++input->p;
- input->begin = input->p;
+ int c;
- if (*input->p != '\0')
- return;
-
- if (lexmode == CPPMODE) {
- /*
- * If we are in cpp mode, we only return eof when
- * we don't have more inputs, or when the next
- * next input is from a file
- */
- if (!input || !input->next || input->next->fp)
- return;
+ for (;;) {
+ switch (c = *input->p) {
+ case '\n':
+ if (lexmode == CPPMODE)
+ goto return_byte;
+ ++input->p;
+ case '\0':
+ if (!moreinput())
+ return EOF;
+ break;
+ case ' ':
+ case '\t':
+ case '\v':
+ case '\r':
+ case '\f':
+ ++input->p;
+ break;
+ default:
+ goto return_byte;
+ }
}
- if (!moreinput())
- return;
- goto repeat;
+
+return_byte:
+ input->begin = input->p;
+ return c;
}
unsigned
next(void)
{
- char c;
+ int c;
- skipspaces();
- c = *input->begin;
- if ((eof || lexmode == CPPMODE) && c == '\0') {
- strcpy(yytext, "<EOF>");
- if (cppctx && eof)
- error("#endif expected");
+ if ((c = skipspaces()) == EOF)
yytoken = EOFTOK;
- goto exit;
- }
-
- if (isalpha(c) || c == '_')
+ else if (isalpha(c) || c == '_')
yytoken = iden();
else if (isdigit(c))
yytoken = number();
@@ -694,7 +709,12 @@
else
yytoken = operator();
-exit:
+ if (yytoken == EOF) {
+ strcpy(yytext, "<EOF>");
+ if (cppctx)
+ errorp("#endif expected");
+ }
+
DBG("TOKEN %s", yytext);
return yytoken;
}