ref: 00d4606c3baf813b7b1c176823b2729bf51002a2
parent: 1e5479084bc9852854feb1ba9bf68b52cd127e02
author: Tor Andersson <tor.andersson@artifex.com>
date: Thu Apr 4 08:06:56 EDT 2019
Bug 700937: Limit recursion in regexp matcher. Also handle negative return code as an error in the JS bindings.
--- a/jsregexp.c
+++ b/jsregexp.c
@@ -29,6 +29,7 @@
void js_RegExp_prototype_exec(js_State *J, js_Regexp *re, const char *text)
{
+ int result;
int i;
int opts;
Resub m;
@@ -46,7 +47,10 @@
}
}
- if (!js_regexec(re->prog, text, &m, opts)) {
+ result = js_regexec(re->prog, text, &m, opts);
+ if (result < 0)
+ js_error(J, "regexec failed");
+ if (result == 0) {
js_newarray(J);
js_pushstring(J, text);
js_setproperty(J, -2, "input");
@@ -71,6 +75,7 @@
{
js_Regexp *re;
const char *text;
+ int result;
int opts;
Resub m;
@@ -90,7 +95,10 @@
}
}
- if (!js_regexec(re->prog, text, &m, opts)) {
+ result = js_regexec(re->prog, text, &m, opts);
+ if (result < 0)
+ js_error(J, "regexec failed");
+ if (result == 0) {
if (re->flags & JS_REGEXP_G)
re->last = re->last + (m.sub[0].ep - text);
js_pushboolean(J, 1);
--- a/jsstring.c
+++ b/jsstring.c
@@ -4,6 +4,14 @@
#include "utf.h"
#include "regexp.h"
+static int js_doregexec(js_State *J, Reprog *prog, const char *string, Resub *sub, int eflags)
+{
+ int result = js_regexec(prog, string, sub, eflags);
+ if (result < 0)
+ js_error(J, "regexec failed");
+ return result;
+}
+
static const char *checkstring(js_State *J, int idx)
{
if (!js_iscoercible(J, idx))
@@ -343,7 +351,7 @@
a = text;
e = text + strlen(text);
while (a <= e) {
- if (js_regexec(re->prog, a, &m, a > text ? REG_NOTBOL : 0))
+ if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0))
break;
b = m.sub[0].sp;
@@ -380,7 +388,7 @@
re = js_toregexp(J, -1);
- if (!js_regexec(re->prog, text, &m, 0))
+ if (!js_doregexec(J, re->prog, text, &m, 0))
js_pushnumber(J, js_utfptrtoidx(text, m.sub[0].sp));
else
js_pushnumber(J, -1);
@@ -397,7 +405,7 @@
source = checkstring(J, 0);
re = js_toregexp(J, 1);
- if (js_regexec(re->prog, source, &m, 0)) {
+ if (js_doregexec(J, re->prog, source, &m, 0)) {
js_copy(J, 0);
return;
}
@@ -471,7 +479,7 @@
else
goto end;
}
- if (!js_regexec(re->prog, source, &m, REG_NOTBOL))
+ if (!js_doregexec(J, re->prog, source, &m, REG_NOTBOL))
goto loop;
}
@@ -576,7 +584,7 @@
/* splitting the empty string */
if (e == text) {
- if (js_regexec(re->prog, text, &m, 0)) {
+ if (js_doregexec(J, re->prog, text, &m, 0)) {
if (len == limit) return;
js_pushliteral(J, "");
js_setindex(J, -2, 0);
@@ -586,7 +594,7 @@
p = a = text;
while (a < e) {
- if (js_regexec(re->prog, a, &m, a > text ? REG_NOTBOL : 0))
+ if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0))
break; /* no match */
b = m.sub[0].sp;
--- a/regexp.c
+++ b/regexp.c
@@ -16,6 +16,7 @@
#define REPINF 255
#define MAXSUB REG_MAXSUB
#define MAXPROG (32 << 10)
+#define MAXREC 1024
typedef struct Reclass Reclass;
typedef struct Renode Renode;
@@ -967,37 +968,51 @@
return 0;
}
-static int match(Reinst *pc, const char *sp, const char *bol, int flags, Resub *out)
+static int match(Reinst *pc, const char *sp, const char *bol, int flags, Resub *out, int depth)
{
Resub scratch;
+ int result;
int i;
Rune c;
+ /* stack overflow */
+ if (depth > MAXREC)
+ return -1;
+
for (;;) {
switch (pc->opcode) {
case I_END:
- return 1;
+ return 0;
case I_JUMP:
pc = pc->x;
break;
case I_SPLIT:
scratch = *out;
- if (match(pc->x, sp, bol, flags, &scratch)) {
+ result = match(pc->x, sp, bol, flags, &scratch, depth+1);
+ if (result == -1)
+ return -1;
+ if (result == 0) {
*out = scratch;
- return 1;
+ return 0;
}
pc = pc->y;
break;
case I_PLA:
- if (!match(pc->x, sp, bol, flags, out))
- return 0;
+ result = match(pc->x, sp, bol, flags, out, depth+1);
+ if (result == -1)
+ return -1;
+ if (result == 1)
+ return 1;
pc = pc->y;
break;
case I_NLA:
scratch = *out;
- if (match(pc->x, sp, bol, flags, &scratch))
- return 0;
+ result = match(pc->x, sp, bol, flags, &scratch, depth+1);
+ if (result == -1)
+ return -1;
+ if (result == 0)
+ return 1;
pc = pc->y;
break;
@@ -1004,37 +1019,37 @@
case I_ANYNL:
sp += chartorune(&c, sp);
if (c == 0)
- return 0;
+ return 1;
pc = pc + 1;
break;
case I_ANY:
sp += chartorune(&c, sp);
if (c == 0)
- return 0;
+ return 1;
if (isnewline(c))
- return 0;
+ return 1;
pc = pc + 1;
break;
case I_CHAR:
sp += chartorune(&c, sp);
if (c == 0)
- return 0;
+ return 1;
if (flags & REG_ICASE)
c = canon(c);
if (c != pc->c)
- return 0;
+ return 1;
pc = pc + 1;
break;
case I_CCLASS:
sp += chartorune(&c, sp);
if (c == 0)
- return 0;
+ return 1;
if (flags & REG_ICASE) {
if (!incclasscanon(pc->cc, canon(c)))
- return 0;
+ return 1;
} else {
if (!incclass(pc->cc, c))
- return 0;
+ return 1;
}
pc = pc + 1;
break;
@@ -1041,13 +1056,13 @@
case I_NCCLASS:
sp += chartorune(&c, sp);
if (c == 0)
- return 0;
+ return 1;
if (flags & REG_ICASE) {
if (incclasscanon(pc->cc, canon(c)))
- return 0;
+ return 1;
} else {
if (incclass(pc->cc, c))
- return 0;
+ return 1;
}
pc = pc + 1;
break;
@@ -1055,10 +1070,10 @@
i = out->sub[pc->n].ep - out->sub[pc->n].sp;
if (flags & REG_ICASE) {
if (strncmpcanon(sp, out->sub[pc->n].sp, i))
- return 0;
+ return 1;
} else {
if (strncmp(sp, out->sub[pc->n].sp, i))
- return 0;
+ return 1;
}
if (i > 0)
sp += i;
@@ -1076,7 +1091,7 @@
break;
}
}
- return 0;
+ return 1;
case I_EOL:
if (*sp == 0) {
pc = pc + 1;
@@ -1088,12 +1103,12 @@
break;
}
}
- return 0;
+ return 1;
case I_WORD:
i = sp > bol && iswordchar(sp[-1]);
i ^= iswordchar(sp[0]);
if (!i)
- return 0;
+ return 1;
pc = pc + 1;
break;
case I_NWORD:
@@ -1100,7 +1115,7 @@
i = sp > bol && iswordchar(sp[-1]);
i ^= iswordchar(sp[0]);
if (i)
- return 0;
+ return 1;
pc = pc + 1;
break;
@@ -1113,7 +1128,7 @@
pc = pc + 1;
break;
default:
- return 0;
+ return 1;
}
}
}
@@ -1130,7 +1145,7 @@
for (i = 0; i < MAXSUB; ++i)
sub->sub[i].sp = sub->sub[i].ep = NULL;
- return !match(prog->start, sp, sp, prog->flags | eflags, sub);
+ return match(prog->start, sp, sp, prog->flags | eflags, sub, 0);
}
#ifdef TEST