shithub: rgbds

Download patch

ref: c75a9539bac624b5e524c74ffa37a065f532585a
parent: 54e5bf0f0c60b7ccc80215bfe10571267474e7f8
author: dbrotz <43593771+dbrotz@users.noreply.github.com>
date: Wed Jun 26 13:36:12 EDT 2019

Don't append invalid characters to symbol name
When a macro arg appears in a symbol name, the contents are appended.
However, the contents of the macro arg were not being validated.
Any character, regardless of whether it was allowed in a symbol name,
would be appended. With this change, the contents of the macro arg
are now validated character by character. The symbol name is considered
to end at the last valid character. The remainder of the macro arg is
treated as though it followed the symbol name in the asm source code.

--- a/src/asm/globlex.c
+++ b/src/asm/globlex.c
@@ -124,19 +124,19 @@
 
 uint32_t ParseFixedPoint(char *s, uint32_t size)
 {
-	uint32_t i = 0, dot = 0;
+	uint32_t i;
+	uint32_t dot = 0;
 
-	while (size && dot != 2) {
-		if (s[i] == '.')
-			dot += 1;
+	for (i = 0; i < size; i++) {
+		if (s[i] == '.') {
+			dot++;
 
-		if (dot < 2) {
-			size -= 1;
-			i += 1;
+			if (dot == 2)
+				break;
 		}
 	}
 
-	yyunputbytes(size);
+	yyskipbytes(i);
 
 	yylval.nConstValue = (int32_t)(atof(s) * 65536);
 
@@ -147,56 +147,100 @@
 {
 	char dest[256];
 
+	if (size > 255)
+		fatalerror("Number token too long");
+
 	strncpy(dest, s, size);
 	dest[size] = 0;
 	yylval.nConstValue = ascii2bin(dest);
 
+	yyskipbytes(size);
+
 	return 1;
 }
 
-uint32_t ParseSymbol(char *src, uint32_t size)
+/*
+ * If the symbol name ends before the end of the macro arg, return true
+ * and point "rest" to the rest of the macro arg.
+ * Otherwise, return false.
+ */
+bool AppendMacroArg(char whichArg, char *dest, size_t *destIndex, char **rest)
 {
-	char dest[MAXSYMLEN + 1];
-	int32_t copied = 0, size_backup = size;
+	char *marg;
 
-	while (size && copied < MAXSYMLEN) {
-		if (*src == '\\') {
-			char *marg;
+	if (whichArg == '@')
+		marg = sym_FindMacroArg(-1);
+	else if (whichArg >= '0' && whichArg <= '9')
+		marg = sym_FindMacroArg(whichArg - '0');
+	else
+		fatalerror("Malformed ID");
 
-			src += 1;
-			size -= 1;
+	if (!marg)
+		fatalerror("Macro argument '\\%c' not defined", whichArg);
 
-			if (*src == '@') {
-				marg = sym_FindMacroArg(-1);
-			} else if (*src >= '0' && *src <= '9') {
-				marg = sym_FindMacroArg(*src - '0');
-			} else {
-				fatalerror("Malformed ID");
-				return 0;
-			}
+	char ch;
 
-			src += 1;
-			size -= 1;
+	while ((ch = *marg) != 0) {
+		if ((ch >= 'a' && ch <= 'z')
+		 || (ch >= 'A' && ch <= 'Z')
+		 || (ch >= '0' && ch <= '9')
+		 || ch == '_'
+		 || ch == '@'
+		 || ch == '#') {
+			if (*destIndex >= MAXSYMLEN)
+				fatalerror("Symbol too long");
 
-			if (marg) {
-				while (*marg)
-					dest[copied++] = *marg++;
-			}
+			dest[*destIndex] = ch;
+			(*destIndex)++;
 		} else {
-			dest[copied++] = *src++;
-			size -= 1;
+			*rest = marg;
+			return true;
 		}
+
+		marg++;
 	}
 
-	if (copied >= MAXSYMLEN)
-		fatalerror("Symbol too long");
+	return false;
+}
 
-	dest[copied] = 0;
+uint32_t ParseSymbol(char *src, uint32_t size)
+{
+	char dest[MAXSYMLEN + 1];
+	size_t srcIndex = 0;
+	size_t destIndex = 0;
+	char *rest = NULL;
 
+	while (srcIndex < size) {
+		char ch = src[srcIndex++];
+
+		if (ch == '\\') {
+			/*
+			 * We don't check if srcIndex is still less than size,
+			 * but that can only fail to be true when the
+			 * following char is neither '@' nor a digit.
+			 * In that case, AppendMacroArg() will catch the error.
+			 */
+			ch = src[srcIndex++];
+
+			if (AppendMacroArg(ch, dest, &destIndex, &rest))
+				break;
+		} else {
+			if (destIndex >= MAXSYMLEN)
+				fatalerror("Symbol too long");
+			dest[destIndex++] = ch;
+		}
+	}
+
+	dest[destIndex] = 0;
+
 	if (!oDontExpandStrings && sym_isString(dest)) {
 		char *s;
 
-		yyskipbytes(size_backup);
+		yyskipbytes(srcIndex);
+
+		if (rest)
+			yyunputstr(rest);
+
 		yyunputstr(s = sym_GetStringValue(dest));
 
 		while (*s) {
@@ -205,6 +249,11 @@
 		}
 		return 0;
 	}
+
+	yyskipbytes(srcIndex);
+
+	if (rest)
+		yyunputstr(rest);
 
 	strcpy(yylval.tzSym, dest);
 	return 1;
--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -803,8 +803,6 @@
 				goto scanagain;
 		}
 
-		pLexBuffer += nFloatLen;
-
 		if (token->nToken == T_ID && linestart)
 			return T_LABEL;
 		else
--- /dev/null
+++ b/test/asm/label-macro-arg.asm
@@ -1,0 +1,27 @@
+m1: MACRO
+x\1
+ENDM
+
+S EQUS "y"
+S2 EQUS "yy"
+
+m2: MACRO
+S\1
+ENDM
+
+	m1 = 5
+	m2 = 6
+	m1 x = 7
+	m2 2 = 8
+
+	printv x
+	printt "\n"
+
+	printv y
+	printt "\n"
+
+	printv xx
+	printt "\n"
+
+	printv yy
+	printt "\n"
--- /dev/null
+++ b/test/asm/label-macro-arg.out
@@ -1,0 +1,4 @@
+$5
+$6
+$7
+$8
--- /dev/null
+++ b/test/asm/label-macro-arg.out.pipe
@@ -1,0 +1,4 @@
+$5
+$6
+$7
+$8