shithub: rgbds

Download patch

ref: 6767d11c23a15f1085ff203c94a6fa0b60aa4352
parent: 2dd9015dc60c8e0bf4fffa25039395e14827eb93
author: Jakub Kądziołka <kuba@kadziolka.net>
date: Sun Oct 11 21:06:32 EDT 2020

utf8decoder: Use byte-sized byte argument

This prevents passing a negative value out of a signed char by accident.
Also renders some casts in the code superfluous.

--- a/include/extern/utf8decoder.h
+++ b/include/extern/utf8decoder.h
@@ -9,6 +9,6 @@
 #ifndef EXTERN_UTF8DECODER_H
 #define EXTERN_UTF8DECODER_H
 
-uint32_t decode(uint32_t *state, uint32_t *codep, uint32_t byte);
+uint32_t decode(uint32_t *state, uint32_t *codep, uint8_t byte);
 
 #endif /* EXTERN_UTF8DECODER_H */
--- a/src/asm/parser.y
+++ b/src/asm/parser.y
@@ -61,7 +61,7 @@
 	uint32_t codep = 0;
 
 	while (*s) {
-		switch (decode(&state, &codep, (uint8_t)*s)) {
+		switch (decode(&state, &codep, *s)) {
 		case 1:
 			fatalerror("STRLEN: Invalid UTF-8 character\n");
 			break;
@@ -95,7 +95,7 @@
 
 	/* Advance to starting position in source string. */
 	while (src[srcIndex] && curPos < pos) {
-		switch (decode(&state, &codep, (uint8_t)src[srcIndex])) {
+		switch (decode(&state, &codep, src[srcIndex])) {
 		case 1:
 			fatalerror("STRSUB: Invalid UTF-8 character\n");
 			break;
@@ -113,7 +113,7 @@
 
 	/* Copy from source to destination. */
 	while (src[srcIndex] && destIndex < MAXSTRLEN && curLen < len) {
-		switch (decode(&state, &codep, (uint8_t)src[srcIndex])) {
+		switch (decode(&state, &codep, src[srcIndex])) {
 		case 1:
 			fatalerror("STRSUB: Invalid UTF-8 character\n");
 			break;
--- a/src/asm/util.c
+++ b/src/asm/util.c
@@ -69,7 +69,7 @@
 	size_t i = 0;
 
 	for (;;) {
-		if (decode(&state, &codep, (uint8_t)src[i]) == 1)
+		if (decode(&state, &codep, src[i]) == 1)
 			fatalerror("invalid UTF-8 character\n");
 
 		dest[i] = src[i];
--- a/src/extern/utf8decoder.c
+++ b/src/extern/utf8decoder.c
@@ -38,7 +38,7 @@
 	  1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s8 */
 };
 
-uint32_t decode(uint32_t *state, uint32_t *codep, uint32_t byte)
+uint32_t decode(uint32_t *state, uint32_t *codep, uint8_t byte)
 {
 	uint32_t type = utf8d[byte];
 
--- /dev/null
+++ b/test/asm/invalid-utf-8.asm
@@ -1,0 +1,5 @@
+; This test tries to pass invalid UTF-8 through a macro argument
+; to exercise the lexer's reportGarbageChar
+m:MACRO \1
+ENDM
+	m ��
--- /dev/null
+++ b/test/asm/invalid-utf-8.err
@@ -1,0 +1,5 @@
+ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3):
+    Unknown character 0xCF
+ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3):
+    Unknown character 0xD3
+error: Assembly aborted (2 errors)!