ref: 6767d11c23a15f1085ff203c94a6fa0b60aa4352
parent: 2dd9015dc60c8e0bf4fffa25039395e14827eb93
author: Jakub Kądziołka <kuba@kadziolka.net>
date: Sun Oct 11 21:06:32 EDT 2020
utf8decoder: Use byte-sized byte argument This prevents passing a negative value out of a signed char by accident. Also renders some casts in the code superfluous.
--- a/include/extern/utf8decoder.h
+++ b/include/extern/utf8decoder.h
@@ -9,6 +9,6 @@
#ifndef EXTERN_UTF8DECODER_H
#define EXTERN_UTF8DECODER_H
-uint32_t decode(uint32_t *state, uint32_t *codep, uint32_t byte);
+uint32_t decode(uint32_t *state, uint32_t *codep, uint8_t byte);
#endif /* EXTERN_UTF8DECODER_H */
--- a/src/asm/parser.y
+++ b/src/asm/parser.y
@@ -61,7 +61,7 @@
uint32_t codep = 0;
while (*s) {
- switch (decode(&state, &codep, (uint8_t)*s)) {
+ switch (decode(&state, &codep, *s)) {
case 1:
fatalerror("STRLEN: Invalid UTF-8 character\n");
break;
@@ -95,7 +95,7 @@
/* Advance to starting position in source string. */
while (src[srcIndex] && curPos < pos) {
- switch (decode(&state, &codep, (uint8_t)src[srcIndex])) {
+ switch (decode(&state, &codep, src[srcIndex])) {
case 1:
fatalerror("STRSUB: Invalid UTF-8 character\n");
break;
@@ -113,7 +113,7 @@
/* Copy from source to destination. */
while (src[srcIndex] && destIndex < MAXSTRLEN && curLen < len) {
- switch (decode(&state, &codep, (uint8_t)src[srcIndex])) {
+ switch (decode(&state, &codep, src[srcIndex])) {
case 1:
fatalerror("STRSUB: Invalid UTF-8 character\n");
break;
--- a/src/asm/util.c
+++ b/src/asm/util.c
@@ -69,7 +69,7 @@
size_t i = 0;
for (;;) {
- if (decode(&state, &codep, (uint8_t)src[i]) == 1)
+ if (decode(&state, &codep, src[i]) == 1)
fatalerror("invalid UTF-8 character\n");
dest[i] = src[i];
--- a/src/extern/utf8decoder.c
+++ b/src/extern/utf8decoder.c
@@ -38,7 +38,7 @@
1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s8 */
};
-uint32_t decode(uint32_t *state, uint32_t *codep, uint32_t byte)
+uint32_t decode(uint32_t *state, uint32_t *codep, uint8_t byte)
{
uint32_t type = utf8d[byte];
--- /dev/null
+++ b/test/asm/invalid-utf-8.asm
@@ -1,0 +1,5 @@
+; This test tries to pass invalid UTF-8 through a macro argument
+; to exercise the lexer's reportGarbageChar
+m:MACRO \1
+ENDM
+ m ��
--- /dev/null
+++ b/test/asm/invalid-utf-8.err
@@ -1,0 +1,5 @@
+ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3):
+ Unknown character 0xCF
+ERROR: invalid-utf-8.asm(4) -> invalid-utf-8.asm::m(3):
+ Unknown character 0xD3
+error: Assembly aborted (2 errors)!