ref: afec1bc0e0ba8999c706adf6438cd74d18715a88
parent: f826b55f340d6d1806060e40c898ec746a96da02
author: Roberto E. Vargas Caballero <k0ga@shike2.com>
date: Tue Apr 5 10:04:12 EDT 2022
cc1: Use control characters for #, ## and macro parameters We were using the ascii characters $,@ and # for them and it had the drawback that the preprocessor could not work with text containing these characters in unexpected places. Using control characters remove that problem and it enables the use of these new tokens in the lexer itself.
--- a/src/cmd/cc/cc1/cc1.h
+++ b/src/cmd/cc/cc1/cc1.h
@@ -129,8 +129,11 @@
RESTRICT = 1 << 1,
VOLATILE = 1 << 2,
INLINE = 1 << 3,
- TQUALIFIER = 1 << 7, /* this value is picked outside of ASCII range */
- TYPE,
+ TQUALIFIER = 1 << 7,
+ MACROPAR = 17,
+ CONCAT = 18,
+ STRINGIZE = 19,
+ TYPE = 129,
IDEN,
SCLASS,
CONSTANT,
--- a/src/cmd/cc/cc1/cpp.c
+++ b/src/cmd/cc/cc1/cpp.c
@@ -242,7 +242,7 @@
bufsiz -= size;
bp += size;
break;
- case '$':
+ case CONCAT:
/* token concatenation operator */
while (bp[-1] == ' ')
--bp, ++bufsiz;
@@ -249,7 +249,7 @@
while (s[1] == ' ')
++s;
break;
- case '#':
+ case STRINGIZE:
/* stringfier operator */
arg = mp->arglist[atoi(s += 2)];
s += 2;
@@ -275,7 +275,7 @@
*bp++ = '"';
break;
- case '@':
+ case MACROPAR:
/* parameter substitution */
arg = mp->arglist[atoi(++s)];
size = expandarg(arg, bp, bufsiz);
@@ -393,10 +393,11 @@
getdefs(Symbol *args[NR_MACROARG], int nargs, char *bp, size_t bufsiz)
{
Symbol **argp;
+ int siz;
size_t len;
int prevc = 0, ispar;
- if (yytoken == '$') {
+ if (yytoken == CONCAT) {
cpperror("'##' cannot appear at either ends of a macro expansion");
return 0;
}
@@ -409,11 +410,13 @@
break;
}
if (argp != &args[nargs]) {
- sprintf(yytext, "@%02d@", (int) (argp - args));
+ siz = argp - args;
+ sprintf(yytext,
+ "%c%02d%c", MACROPAR, siz, MACROPAR);
ispar = 1;
}
}
- if (prevc == '#' && !ispar) {
+ if (prevc == STRINGIZE && !ispar) {
cpperror("'#' is not followed by a macro parameter");
return 0;
}
@@ -424,9 +427,8 @@
cpperror("macro too long");
return 0;
}
- /* $ token is generated by ## */
- if (yytoken == '$') {
- *bp++ = '$';
+ if (yytoken == CONCAT || yytoken == STRINGIZE) {
+ *bp++ = yytoken;
--bufsiz;
} else {
memcpy(bp, yytext, len);
@@ -433,7 +435,7 @@
bp += len;
bufsiz -= len;
}
- if ((prevc = yytoken) != '#') {
+ if ((prevc = yytoken) != STRINGIZE) {
*bp++ = ' ';
--bufsiz;
}
--- a/src/cmd/cc/cc1/lex.c
+++ b/src/cmd/cc/cc1/lex.c
@@ -755,7 +755,7 @@
t = follow('=', NE, '!');
break;
case '#':
- t = follow('#', '$', '#');
+ t = follow('#', CONCAT, STRINGIZE);
break;
case '-':
t = minus();