shithub: rgbds

Download patch

ref: 56071599e78bd1a437633ff967c01a4fd0919f37
parent: c637447d5db263820b45aba8af08d6eb3b241d9f
author: Rangi <remy.oukaour+rangi42@gmail.com>
date: Sun Feb 28 16:19:15 EST 2021

Allow trailing commas in bare lists

This applies to macro arguments, DB, DW, DL, DS,
PRINT, PRINTLN, EXPORT, PURGE, and OPT.

It also removes support for empty entries in DB/DW/DL.
(Deprecating it would require keeping parser support,
which is ambiguous with trailing commas.)

Fixes #753

--- a/include/asm/lexer.h
+++ b/include/asm/lexer.h
@@ -67,7 +67,6 @@
 };
 
 void lexer_SetMode(enum LexerMode mode);
-bool lexer_IsRawMode(void);
 void lexer_ToggleStringExpansion(bool enable);
 
 uint32_t lexer_GetIFDepth(void);
--- a/include/asm/macro.h
+++ b/include/asm/macro.h
@@ -21,7 +21,7 @@
 
 struct MacroArgs *macro_GetCurrentArgs(void);
 struct MacroArgs *macro_NewArgs(void);
-void macro_AppendArg(struct MacroArgs **args, char *s, bool isLastArg);
+void macro_AppendArg(struct MacroArgs **args, char *s);
 void macro_UseNewArgs(struct MacroArgs *args);
 void macro_FreeArgs(struct MacroArgs *args);
 char const *macro_GetArg(uint32_t i);
--- a/include/asm/warning.h
+++ b/include/asm/warning.h
@@ -19,7 +19,6 @@
 	WARNING_CHARMAP_REDEF,        /* Charmap entry re-definition */
 	WARNING_DIV,		      /* Division undefined behavior */
 	WARNING_EMPTY_DATA_DIRECTIVE, /* `db`, `dw` or `dl` directive without data in ROM */
-	WARNING_EMPTY_ENTRY,	      /* Empty entry in `db`, `dw` or `dl` */
 	WARNING_EMPTY_MACRO_ARG,      /* Empty macro argument */
 	WARNING_EMPTY_STRRPL,	      /* Empty second argument in `STRRPL` */
 	WARNING_LARGE_CONSTANT,	      /* Constants too large */
--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -360,7 +360,6 @@
 	bool disableMacroArgs;
 	bool disableInterpolation;
 	size_t macroArgScanDistance; /* Max distance already scanned for macro args */
-	bool injectNewline; /* Whether to inject a newline at EOF */
 	bool expandStrings;
 	struct Expansion *expansions;
 	size_t expansionOfs; /* Offset into the current top-level expansion (negative = before) */
@@ -382,7 +381,6 @@
 	state->disableMacroArgs = false;
 	state->disableInterpolation = false;
 	state->macroArgScanDistance = 0;
-	state->injectNewline = false;
 	state->expandStrings = true;
 	state->expansions = NULL;
 	state->expansionOfs = 0;
@@ -640,11 +638,6 @@
 	lexerState->mode = mode;
 }
 
-bool lexer_IsRawMode(void)
-{
-	return lexerState->mode == LEXER_RAW;
-}
-
 void lexer_ToggleStringExpansion(bool enable)
 {
 	lexerState->expandStrings = enable;
@@ -2054,10 +2047,6 @@
 			return T_NEWLINE;
 
 		case EOF:
-			if (lexerState->injectNewline) {
-				lexerState->injectNewline = false;
-				return T_NEWLINE;
-			}
 			return T_EOF;
 
 		/* Handle escapes */
@@ -2129,6 +2118,7 @@
 
 	/* This is essentially a modified `appendStringLiteral` */
 	size_t i = 0;
+	int c;
 
 	/* Trim left whitespace (stops at a block comment or line continuation) */
 	while (isWhitespace(peek(0)))
@@ -2135,7 +2125,7 @@
 		shiftChars(1);
 
 	for (;;) {
-		int c = peek(0);
+		c = peek(0);
 
 		switch (c) {
 		case '"': /* String literals inside macro args */
@@ -2151,29 +2141,7 @@
 		case '\r':
 		case '\n':
 		case EOF:
-			// Returning T_COMMAs to the parser would mean that two consecutive commas
-			// (i.e. an empty argument) need to return two different tokens (T_STRING
-			// then T_COMMA) without advancing the read. To avoid this, commas in raw
-			// mode end the current macro argument but are not tokenized themselves.
-			if (c == ',')
-				shiftChars(1);
-			else
-				lexer_SetMode(LEXER_NORMAL);
-			// If a macro is invoked on the last line of a file, with no blank
-			// line afterwards, returning EOF afterwards will cause Bison to
-			// stop parsing, despite the lexer being ready to output more.
-			if (c == EOF)
-				lexerState->injectNewline = true;
-			/* Trim right whitespace */
-			while (i && isWhitespace(yylval.tzString[i - 1]))
-				i--;
-			if (i == sizeof(yylval.tzString)) {
-				i--;
-				warning(WARNING_LONG_STR, "Macro argument too long\n");
-			}
-			yylval.tzString[i] = '\0';
-			dbgPrint("Read raw string \"%s\"\n", yylval.tzString);
-			return T_STRING;
+			goto finish;
 
 		case '/': /* Block comments inside macro args */
 			shiftChars(1); /* Shift the slash */
@@ -2235,6 +2203,49 @@
 			break;
 		}
 	}
+
+finish:
+	if (i == sizeof(yylval.tzString)) {
+		i--;
+		warning(WARNING_LONG_STR, "Macro argument too long\n");
+	}
+	/* Trim right whitespace */
+	while (i && isWhitespace(yylval.tzString[i - 1]))
+		i--;
+	yylval.tzString[i] = '\0';
+
+	dbgPrint("Read raw string \"%s\"\n", yylval.tzString);
+
+	// Returning T_COMMAs to the parser would mean that two consecutive commas
+	// (i.e. an empty argument) need to return two different tokens (T_STRING
+	// then T_COMMA) without advancing the read. To avoid this, commas in raw
+	// mode end the current macro argument but are not tokenized themselves.
+	if (c == ',') {
+		shiftChars(1);
+		return T_STRING;
+	}
+
+	// The last argument may end in a trailing comma, newline, or EOF.
+	// To allow trailing commas, raw mode will continue after the last
+	// argument, immediately lexing the newline or EOF again (i.e. with
+	// an empty raw string before it). This will not be treated as a
+	// macro argument. To pass an empty last argument, use a second
+	// trailing comma.
+	if (i > 0)
+		return T_STRING;
+	lexer_SetMode(LEXER_NORMAL);
+
+	// If a macro is invoked on the last line of a file, with no blank
+	// line afterwards, returning EOF afterwards will cause Bison to
+	// stop parsing, despite the lexer being ready to output more.
+	// To avoid this, return T_NEWLINE for EOF as well.
+	if (c == '\r' || c == '\n') {
+		shiftChars(1);
+		/* Handle CRLF */
+		if (c == '\r' && peek(0) == '\n')
+			shiftChars(1);
+	}
+	return T_NEWLINE;
 }
 
 #undef append_yylval_tzString
--- a/src/asm/macro.c
+++ b/src/asm/macro.c
@@ -59,15 +59,11 @@
 	return args;
 }
 
-void macro_AppendArg(struct MacroArgs **argPtr, char *s, bool isLastArg)
+void macro_AppendArg(struct MacroArgs **argPtr, char *s)
 {
 #define macArgs (*argPtr)
-	if (s[0] == '\0') {
-		/* Zero arguments are parsed as a spurious empty argument; do not append it */
-		if (isLastArg && !macArgs->nbArgs)
-			return;
+	if (s[0] == '\0')
 		warning(WARNING_EMPTY_MACRO_ARG, "Empty macro argument\n");
-	}
 	if (macArgs->nbArgs == MAXMACROARGS)
 		error("A maximum of " EXPAND_AND_STR(MAXMACROARGS) " arguments is allowed\n");
 	if (macArgs->nbArgs >= macArgs->capacity) {
@@ -113,27 +109,26 @@
 	if (macroArgs->shift >= macroArgs->nbArgs)
 		return "";
 
-	size_t len = strlen(macroArgs->args[macroArgs->shift]);
+	size_t len = 0;
 
-	for (uint32_t i = macroArgs->shift + 1; i < macroArgs->nbArgs; i++)
-		len += 1 + strlen(macroArgs->args[i]);
+	for (uint32_t i = macroArgs->shift; i < macroArgs->nbArgs; i++)
+		len += strlen(macroArgs->args[i]) + 1; /* 1 for comma */
 
-	char *str = malloc(len + 1);
+	char *str = malloc(len + 1); /* 1 for '\0' */
+	char *ptr = str;
 
 	if (!str)
 		fatalerror("Failed to allocate memory for expanding '\\#': %s\n", strerror(errno));
 
-	char *ptr = str;
+	for (uint32_t i = macroArgs->shift; i < macroArgs->nbArgs; i++) {
+		size_t n = strlen(macroArgs->args[i]);
 
-	size_t n = strlen(macroArgs->args[macroArgs->shift]);
-
-	memcpy(ptr, macroArgs->args[macroArgs->shift], n);
-	ptr += n;
-	for (uint32_t i = macroArgs->shift + 1; i < macroArgs->nbArgs; i++) {
-		*ptr++ = ','; /* no space after comma */
-		n = strlen(macroArgs->args[i]);
 		memcpy(ptr, macroArgs->args[i], n);
 		ptr += n;
+
+		/* Commas go between args and after a last empty arg */
+		if (i < macroArgs->nbArgs - 1 || n == 0)
+			*ptr++ = ','; /* no space after comma */
 	}
 	*ptr = '\0';
 
--- a/src/asm/parser.y
+++ b/src/asm/parser.y
@@ -430,7 +430,6 @@
 		int32_t step;
 	} forArgs;
 	struct StrFmtArgList strfmtArgs;
-	bool hasEmpty; // Whether `db`, `dw`, `dl` argument lists contain any empty entries
 }
 
 %type	<sVal>		relocexpr
@@ -445,9 +444,6 @@
 %type	<sVal>		reloc_16bit
 %type	<sVal>		reloc_16bit_no_str
 %type	<nConstValue>	sectiontype
-%type	<hasEmpty>	constlist_8bit  constlist_8bit_entry
-%type	<hasEmpty>	constlist_16bit constlist_16bit_entry
-%type	<hasEmpty>	constlist_32bit constlist_32bit_entry
 
 %type	<tzString>	string
 %type	<tzString>	strcat_args
@@ -728,6 +724,7 @@
 ;
 
 macro		: T_ID {
+			// Parsing 'macroargs' will restore the lexer's normal mode
 			lexer_SetMode(LEXER_RAW);
 		} macroargs {
 			fstk_RunMacro($1, $3);
@@ -738,7 +735,7 @@
 			$$ = macro_NewArgs();
 		}
 		| macroargs T_STRING {
-			macro_AppendArg(&($$), strdup($2), !lexer_IsRawMode());
+			macro_AppendArg(&($$), strdup($2));
 		}
 ;
 
@@ -791,6 +788,9 @@
 		| align
 ;
 
+trailing_comma	: %empty | T_COMMA
+;
+
 align		: T_OP_ALIGN uconst {
 			if ($2 > 16)
 				error("Alignment must be between 0 and 16, not %u\n", $2);
@@ -809,14 +809,13 @@
 ;
 
 opt		: T_POP_OPT {
+			// Parsing 'opt_list' will restore the lexer's normal mode
 			lexer_SetMode(LEXER_RAW);
-		} opt_list {
-			lexer_SetMode(LEXER_NORMAL);
-		}
+		} opt_list
 ;
 
 opt_list	: opt_list_entry
-		| opt_list T_COMMA opt_list_entry
+		| opt_list opt_list_entry
 ;
 
 opt_list_entry	: T_STRING		{ opt_Parse($1); }
@@ -984,7 +983,7 @@
 ;
 
 ds		: T_POP_DS uconst	{ out_Skip($2, true); }
-		| T_POP_DS uconst T_COMMA ds_args {
+		| T_POP_DS uconst T_COMMA ds_args trailing_comma {
 			out_RelBytes($2, $4.args, $4.nbArgs);
 			freeDsArgList(&$4);
 		}
@@ -1004,34 +1003,21 @@
 		}
 ;
 
-/* Authorize empty entries if there is only one */
-db		: T_POP_DB constlist_8bit_entry T_COMMA constlist_8bit {
-			if ($2 || $4)
-				warning(WARNING_EMPTY_ENTRY,
-					"Empty entry in list of 8-bit elements (treated as padding).\n");
-		}
-		| T_POP_DB constlist_8bit_entry
+db		: T_POP_DB	{ out_Skip(1, false); }
+		| T_POP_DB constlist_8bit trailing_comma
 ;
 
-dw		: T_POP_DW constlist_16bit_entry T_COMMA constlist_16bit {
-			if ($2 || $4)
-				warning(WARNING_EMPTY_ENTRY,
-					"Empty entry in list of 16-bit elements (treated as padding).\n");
-		}
-		| T_POP_DW constlist_16bit_entry
+dw		: T_POP_DW	{ out_Skip(2, false); }
+		| T_POP_DW constlist_16bit trailing_comma
 ;
 
-dl		: T_POP_DL constlist_32bit_entry T_COMMA constlist_32bit {
-			if ($2 || $4)
-				warning(WARNING_EMPTY_ENTRY,
-					"Empty entry in list of 32-bit elements (treated as padding).\n");
-		}
-		| T_POP_DL constlist_32bit_entry
+dl		: T_POP_DL	{ out_Skip(4, false); }
+		| T_POP_DL constlist_32bit trailing_comma
 ;
 
 purge		: T_POP_PURGE {
 			lexer_ToggleStringExpansion(false);
-		} purge_list {
+		} purge_list trailing_comma {
 			lexer_ToggleStringExpansion(true);
 		}
 ;
@@ -1052,7 +1038,7 @@
 purge_list_entry : scoped_id	{ sym_Purge($1); }
 ;
 
-export		: T_POP_EXPORT export_list
+export		: T_POP_EXPORT export_list trailing_comma
 ;
 
 export_list	: export_list_entry
@@ -1113,11 +1099,11 @@
 popc		: T_POP_POPC	{ charmap_Pop(); }
 ;
 
-print		: T_POP_PRINT print_exprs
+print		: T_POP_PRINT print_exprs trailing_comma
 ;
 
 println		: T_POP_PRINTLN { putchar('\n'); }
-		| T_POP_PRINTLN print_exprs { putchar('\n'); }
+		| T_POP_PRINTLN print_exprs trailing_comma { putchar('\n'); }
 ;
 
 print_exprs	: print_expr
@@ -1165,18 +1151,11 @@
 ;
 
 constlist_8bit	: constlist_8bit_entry
-		| constlist_8bit T_COMMA constlist_8bit_entry {
-			$$ = $1 || $3;
-		}
+		| constlist_8bit T_COMMA constlist_8bit_entry
 ;
 
-constlist_8bit_entry : %empty {
-			out_Skip(1, false);
-			$$ = true;
-		}
-		| reloc_8bit_no_str	{
+constlist_8bit_entry : reloc_8bit_no_str	{
 			out_RelByte(&$1, 0);
-			$$ = false;
 		}
 		| string {
 			uint8_t *output = malloc(strlen($1)); /* Cannot be larger than that */
@@ -1184,23 +1163,15 @@
 
 			out_AbsByteGroup(output, length);
 			free(output);
-			$$ = false;
 		}
 ;
 
 constlist_16bit : constlist_16bit_entry
-		| constlist_16bit T_COMMA constlist_16bit_entry {
-			$$ = $1 || $3;
-		}
+		| constlist_16bit T_COMMA constlist_16bit_entry
 ;
 
-constlist_16bit_entry : %empty {
-			out_Skip(2, false);
-			$$ = true;
-		}
-		| reloc_16bit_no_str	{
+constlist_16bit_entry : reloc_16bit_no_str	{
 			out_RelWord(&$1, 0);
-			$$ = false;
 		}
 		| string {
 			uint8_t *output = malloc(strlen($1)); /* Cannot be larger than that */
@@ -1208,23 +1179,15 @@
 
 			out_AbsWordGroup(output, length);
 			free(output);
-			$$ = false;
 		}
 ;
 
 constlist_32bit : constlist_32bit_entry
-		| constlist_32bit T_COMMA constlist_32bit_entry {
-			$$ = $1 || $3;
-		}
+		| constlist_32bit T_COMMA constlist_32bit_entry
 ;
 
-constlist_32bit_entry : %empty {
-			out_Skip(4, false);
-			$$ = true;
-		}
-		| relocexpr_no_str	{
+constlist_32bit_entry :relocexpr_no_str	{
 			out_RelLong(&$1, 0);
-			$$ = false;
 		}
 		| string {
 			uint8_t *output = malloc(strlen($1)); /* Cannot be larger than that */
@@ -1232,7 +1195,6 @@
 
 			out_AbsLongGroup(output, length);
 			free(output);
-			$$ = false;
 		}
 ;
 
--- a/src/asm/rgbasm.1
+++ b/src/asm/rgbasm.1
@@ -179,9 +179,9 @@
 .Pp
 The following warnings are actual warning flags; with each description, the corresponding warning flag is included.
 Note that each of these flag also has a negation (for example,
-.Fl Wempty-entry
+.Fl Wcharmap-redef
 enables the warning that
-.Fl Wno-empty-entry
+.Fl Wno-charmap-redef
 disables).
 Only the non-default flag is listed here.
 Ignoring the
@@ -209,12 +209,6 @@
 .Fl Wall .
 .It Fl Wdiv
 Warn when dividing the smallest negative integer by -1, which yields itself due to integer overflow.
-.It Fl Wempty-entry
-Warn when an empty entry is encountered in a
-.Ic db , dw , dl
-list.
-This warning is enabled by
-.Fl Wextra .
 .It Fl Wempty-macro-arg
 Warn when a macro argument is empty.
 This warning is enabled by
--- a/src/asm/rgbasm.5
+++ b/src/asm/rgbasm.5
@@ -1282,7 +1282,7 @@
 .Ic DB , DW
 and
 .Ic DL
-without arguments, or leaving empty elements at any point in the list.
+without arguments.
 This works exactly like
 .Ic DS 1 , DS 2
 and
--- a/src/asm/warning.c
+++ b/src/asm/warning.c
@@ -34,7 +34,6 @@
 	[WARNING_CHARMAP_REDEF]		= WARNING_DISABLED,
 	[WARNING_DIV]			= WARNING_DISABLED,
 	[WARNING_EMPTY_DATA_DIRECTIVE]	= WARNING_DISABLED,
-	[WARNING_EMPTY_ENTRY]		= WARNING_DISABLED,
 	[WARNING_EMPTY_MACRO_ARG]	= WARNING_DISABLED,
 	[WARNING_EMPTY_STRRPL]		= WARNING_DISABLED,
 	[WARNING_LARGE_CONSTANT]	= WARNING_DISABLED,
@@ -77,7 +76,6 @@
 	"charmap-redef",
 	"div",
 	"empty-data-directive",
-	"empty-entry",
 	"empty-macro-arg",
 	"empty-strrpl",
 	"large-constant",
@@ -113,7 +111,6 @@
 
 /* Warnings that are less likely to indicate an error */
 static uint8_t const _wextraCommands[] = {
-	WARNING_EMPTY_ENTRY,
 	WARNING_EMPTY_MACRO_ARG,
 	WARNING_MACRO_SHIFT,
 	WARNING_NESTED_COMMENT,
@@ -125,7 +122,6 @@
 	WARNING_BUILTIN_ARG,
 	WARNING_DIV,
 	WARNING_EMPTY_DATA_DIRECTIVE,
-	WARNING_EMPTY_ENTRY,
 	WARNING_EMPTY_MACRO_ARG,
 	WARNING_EMPTY_STRRPL,
 	WARNING_LARGE_CONSTANT,
--- a/test/asm/macro-arguments.asm
+++ b/test/asm/macro-arguments.asm
@@ -9,15 +9,20 @@
 
 	mac /* block
 		...comment */ ; comment
-	mac /*a*/ 1 , 2 /*b*/
+	mac /*a*/ 1 , 2 /*b*/ , ; trailing comma
 	mac \
 	c, d
 	mac 1, 2 + /* another ;
 		; comment */ 2, 3
 
+	mac           a                                                                                                                                                                                                                                                               b           ; truncated
+
 	mac
+	mac ,
+	mac a,
 	mac a,,
 	mac ,,z
 	mac a,,z
 	mac ,a,b,c,
-	mac ,,x,,
+	mac ,,x,,,
+	mac E,O,F ; no newline
\ No newline at end of file
--- a/test/asm/macro-arguments.err
+++ b/test/asm/macro-arguments.err
@@ -1,22 +1,22 @@
-warning: macro-arguments.asm(19): [-Wempty-macro-arg]
+warning: macro-arguments.asm(18): [-Wlong-string]
+    Macro argument too long
+warning: macro-arguments.asm(21): [-Wempty-macro-arg]
     Empty macro argument
-warning: macro-arguments.asm(19): [-Wempty-macro-arg]
+warning: macro-arguments.asm(23): [-Wempty-macro-arg]
     Empty macro argument
-warning: macro-arguments.asm(20): [-Wempty-macro-arg]
+warning: macro-arguments.asm(24): [-Wempty-macro-arg]
     Empty macro argument
-warning: macro-arguments.asm(20): [-Wempty-macro-arg]
+warning: macro-arguments.asm(24): [-Wempty-macro-arg]
     Empty macro argument
-warning: macro-arguments.asm(21): [-Wempty-macro-arg]
+warning: macro-arguments.asm(25): [-Wempty-macro-arg]
     Empty macro argument
-warning: macro-arguments.asm(22): [-Wempty-macro-arg]
+warning: macro-arguments.asm(26): [-Wempty-macro-arg]
     Empty macro argument
-warning: macro-arguments.asm(22): [-Wempty-macro-arg]
+warning: macro-arguments.asm(27): [-Wempty-macro-arg]
     Empty macro argument
-warning: macro-arguments.asm(23): [-Wempty-macro-arg]
+warning: macro-arguments.asm(27): [-Wempty-macro-arg]
     Empty macro argument
-warning: macro-arguments.asm(23): [-Wempty-macro-arg]
+warning: macro-arguments.asm(27): [-Wempty-macro-arg]
     Empty macro argument
-warning: macro-arguments.asm(23): [-Wempty-macro-arg]
-    Empty macro argument
-warning: macro-arguments.asm(23): [-Wempty-macro-arg]
+warning: macro-arguments.asm(27): [-Wempty-macro-arg]
     Empty macro argument
--- a/test/asm/macro-arguments.out
+++ b/test/asm/macro-arguments.out
@@ -13,12 +13,20 @@
 \2: <2 +  2>
 \3: <3>
 
+'mac a':
+\1: <a>
+
 'mac ':
 
+'mac ,':
+\1: <>
+
+'mac a':
+\1: <a>
+
 'mac a,,':
 \1: <a>
 \2: <>
-\3: <>
 
 'mac ,,z':
 \1: <>
@@ -30,17 +38,21 @@
 \2: <>
 \3: <z>
 
-'mac ,a,b,c,':
+'mac ,a,b,c':
 \1: <>
 \2: <a>
 \3: <b>
 \4: <c>
-\5: <>
 
-'mac ,,x,,':
+'mac ,,x,,,':
 \1: <>
 \2: <>
 \3: <x>
 \4: <>
 \5: <>
+
+'mac E,O,F':
+\1: <E>
+\2: <O>
+\3: <F>
 
--- /dev/null
+++ b/test/asm/trailing-commas.asm
@@ -1,0 +1,25 @@
+SECTION "test", ROM0
+
+mac: MACRO
+	println "\#"
+ENDM
+
+	mac 1,2, 3 , ,5,
+
+	db 1,2,3,
+	dw 4,5,6,
+	dl 7,8,9,
+	ds 10, $a, $b, $c,
+
+	print "Hello", " ",
+	println "world", "!",
+
+spam:
+eggs:
+lobsterThermidor:
+
+	export spam, eggs,
+	purge lobsterThermidor,
+
+	opt boO, g.xX#,
+	dw %ooOOOOoo, `XX##..xx,
--- /dev/null
+++ b/test/asm/trailing-commas.err
@@ -1,0 +1,2 @@
+warning: trailing-commas.asm(7): [-Wempty-macro-arg]
+    Empty macro argument
--- /dev/null
+++ b/test/asm/trailing-commas.out
@@ -1,0 +1,2 @@
+1,2,3,,5
+Hello world!
binary files /dev/null b/test/asm/trailing-commas.out.bin differ