ref: 9f598dfdb785d6464d18ffa84801be1c9491d474
parent: 54e5bf0f0c60b7ccc80215bfe10571267474e7f8
parent: 484d15dbb279ca2fca490e278fd81404561b08f7
author: Antonio Niño Díaz <antonio_nd@outlook.com>
date: Sat Jul 6 07:09:43 EDT 2019
Merge pull request #359 from dbrotz/fix-lexer-out-of-bounds Fix out of bounds array access in lexer
--- a/src/asm/lexer.c
+++ b/src/asm/lexer.c
@@ -38,6 +38,8 @@
#define SAFETYMARGIN 1024
+#define BOM_SIZE 3
+
struct sLexFloat tLexFloat[32];
struct sLexString *tLexHash[LEXHASHSIZE];
YY_BUFFER_STATE pCurrentBuffer;
@@ -49,6 +51,9 @@
uint32_t nFloating;
enum eLexerState lexerstate = LEX_STATE_NORMAL;
+/* UTF-8 byte order mark */
+static const unsigned char bom[BOM_SIZE] = { 0xEF, 0xBB, 0xBF };
+
void upperstring(char *s)
{
while (*s) {
@@ -120,11 +125,11 @@
*/
static void yy_buffer_append(YY_BUFFER_STATE buf, uint32_t capacity, char c)
{
- assert(buf->pBuffer[buf->nBufferSize] == 0);
+ assert(buf->pBufferStart[buf->nBufferSize] == 0);
assert(buf->nBufferSize + 1 < capacity);
- buf->pBuffer[buf->nBufferSize++] = c;
- buf->pBuffer[buf->nBufferSize] = 0;
+ buf->pBufferStart[buf->nBufferSize++] = c;
+ buf->pBufferStart[buf->nBufferSize] = 0;
}
YY_BUFFER_STATE yy_scan_bytes(char *mem, uint32_t size)
@@ -221,6 +226,11 @@
*/
capacity += 3;
+ /* Skip UTF-8 byte order mark. */
+ if (pBuffer->nBufferSize >= BOM_SIZE
+ && !memcmp(pBuffer->pBuffer, bom, BOM_SIZE))
+ pBuffer->pBuffer += BOM_SIZE;
+
/* Convert all line endings to LF and spaces */
char *mem = pBuffer->pBuffer;
@@ -281,7 +291,7 @@
}
/* Add newline if file doesn't end with one */
- if (size == 0 || pBuffer->pBuffer[size - 1] != '\n')
+ if (size == 0 || pBuffer->pBufferStart[size - 1] != '\n')
yy_buffer_append(pBuffer, capacity, '\n');
/* Add newline if \ will eat the last newline */
@@ -289,10 +299,10 @@
size_t pos = pBuffer->nBufferSize - 2;
/* Skip spaces */
- while (pos > 0 && pBuffer->pBuffer[pos] == ' ')
+ while (pos > 0 && pBuffer->pBufferStart[pos] == ' ')
pos--;
- if (pBuffer->pBuffer[pos] == '\\')
+ if (pBuffer->pBufferStart[pos] == '\\')
yy_buffer_append(pBuffer, capacity, '\n');
}
@@ -471,17 +481,17 @@
char *s = pLexBuffer;
uint32_t nOldFloatMask = 0;
- uint32_t nFloatMask = tFloatingFirstChar[(int32_t)*s];
+ uint32_t nFloatMask = tFloatingFirstChar[(uint8_t)*s];
if (nFloatMask != 0) {
s++;
nOldFloatMask = nFloatMask;
- nFloatMask &= tFloatingSecondChar[(int32_t)*s];
+ nFloatMask &= tFloatingSecondChar[(uint8_t)*s];
while (nFloatMask != 0) {
s++;
nOldFloatMask = nFloatMask;
- nFloatMask &= tFloatingChars[(int32_t)*s];
+ nFloatMask &= tFloatingChars[(uint8_t)*s];
}
}
@@ -783,10 +793,22 @@
* numeric literal, string, or bracketed symbol, so just return
* the ASCII character.
*/
- if (*pLexBuffer == '\n')
+ unsigned char ch = *pLexBuffer++;
+
+ if (ch == '\n')
AtLineStart = 1;
- return *pLexBuffer++;
+ /*
+ * Check for invalid unprintable characters.
+ * They may not be readily apparent in a text editor,
+ * so this is useful for identifying encoding problems.
+ */
+ if (ch != 0
+ && ch != '\n'
+ && !(ch >= 0x20 && ch <= 0x7E))
+ fatalerror("Found garbage character: 0x%02X", ch);
+
+ return ch;
}
if (pLongestFixed == NULL || nFloatLen > pLongestFixed->nNameLength) {
--- /dev/null
+++ b/test/asm/garbage_char.asm
@@ -1,0 +1,1 @@
+x
\ No newline at end of file
--- /dev/null
+++ b/test/asm/garbage_char.out
@@ -1,0 +1,2 @@
+ERROR: garbage_char.asm(1):
+ Found garbage character: 0xFF
--- /dev/null
+++ b/test/asm/garbage_char.out.pipe
@@ -1,0 +1,2 @@
+ERROR: -(1):
+ Found garbage character: 0xFF