shithub: libmujs

Download patch

ref: 00d4606c3baf813b7b1c176823b2729bf51002a2
parent: 1e5479084bc9852854feb1ba9bf68b52cd127e02
author: Tor Andersson <tor.andersson@artifex.com>
date: Thu Apr 4 08:06:56 EDT 2019

Bug 700937: Limit recursion in regexp matcher.

Also handle negative return code as an error in the JS bindings.

--- a/jsregexp.c
+++ b/jsregexp.c
@@ -29,6 +29,7 @@
 
 void js_RegExp_prototype_exec(js_State *J, js_Regexp *re, const char *text)
 {
+	int result;
 	int i;
 	int opts;
 	Resub m;
@@ -46,7 +47,10 @@
 		}
 	}
 
-	if (!js_regexec(re->prog, text, &m, opts)) {
+	result = js_regexec(re->prog, text, &m, opts);
+	if (result < 0)
+		js_error(J, "regexec failed");
+	if (result == 0) {
 		js_newarray(J);
 		js_pushstring(J, text);
 		js_setproperty(J, -2, "input");
@@ -71,6 +75,7 @@
 {
 	js_Regexp *re;
 	const char *text;
+	int result;
 	int opts;
 	Resub m;
 
@@ -90,7 +95,10 @@
 		}
 	}
 
-	if (!js_regexec(re->prog, text, &m, opts)) {
+	result = js_regexec(re->prog, text, &m, opts);
+	if (result < 0)
+		js_error(J, "regexec failed");
+	if (result == 0) {
 		if (re->flags & JS_REGEXP_G)
 			re->last = re->last + (m.sub[0].ep - text);
 		js_pushboolean(J, 1);
--- a/jsstring.c
+++ b/jsstring.c
@@ -4,6 +4,14 @@
 #include "utf.h"
 #include "regexp.h"
 
+static int js_doregexec(js_State *J, Reprog *prog, const char *string, Resub *sub, int eflags)
+{
+	int result = js_regexec(prog, string, sub, eflags);
+	if (result < 0)
+		js_error(J, "regexec failed");
+	return result;
+}
+
 static const char *checkstring(js_State *J, int idx)
 {
 	if (!js_iscoercible(J, idx))
@@ -343,7 +351,7 @@
 	a = text;
 	e = text + strlen(text);
 	while (a <= e) {
-		if (js_regexec(re->prog, a, &m, a > text ? REG_NOTBOL : 0))
+		if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0))
 			break;
 
 		b = m.sub[0].sp;
@@ -380,7 +388,7 @@
 
 	re = js_toregexp(J, -1);
 
-	if (!js_regexec(re->prog, text, &m, 0))
+	if (!js_doregexec(J, re->prog, text, &m, 0))
 		js_pushnumber(J, js_utfptrtoidx(text, m.sub[0].sp));
 	else
 		js_pushnumber(J, -1);
@@ -397,7 +405,7 @@
 	source = checkstring(J, 0);
 	re = js_toregexp(J, 1);
 
-	if (js_regexec(re->prog, source, &m, 0)) {
+	if (js_doregexec(J, re->prog, source, &m, 0)) {
 		js_copy(J, 0);
 		return;
 	}
@@ -471,7 +479,7 @@
 			else
 				goto end;
 		}
-		if (!js_regexec(re->prog, source, &m, REG_NOTBOL))
+		if (!js_doregexec(J, re->prog, source, &m, REG_NOTBOL))
 			goto loop;
 	}
 
@@ -576,7 +584,7 @@
 
 	/* splitting the empty string */
 	if (e == text) {
-		if (js_regexec(re->prog, text, &m, 0)) {
+		if (js_doregexec(J, re->prog, text, &m, 0)) {
 			if (len == limit) return;
 			js_pushliteral(J, "");
 			js_setindex(J, -2, 0);
@@ -586,7 +594,7 @@
 
 	p = a = text;
 	while (a < e) {
-		if (js_regexec(re->prog, a, &m, a > text ? REG_NOTBOL : 0))
+		if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0))
 			break; /* no match */
 
 		b = m.sub[0].sp;
--- a/regexp.c
+++ b/regexp.c
@@ -16,6 +16,7 @@
 #define REPINF 255
 #define MAXSUB REG_MAXSUB
 #define MAXPROG (32 << 10)
+#define MAXREC 1024
 
 typedef struct Reclass Reclass;
 typedef struct Renode Renode;
@@ -967,37 +968,51 @@
 	return 0;
 }
 
-static int match(Reinst *pc, const char *sp, const char *bol, int flags, Resub *out)
+static int match(Reinst *pc, const char *sp, const char *bol, int flags, Resub *out, int depth)
 {
 	Resub scratch;
+	int result;
 	int i;
 	Rune c;
 
+	/* stack overflow */
+	if (depth > MAXREC)
+		return -1;
+
 	for (;;) {
 		switch (pc->opcode) {
 		case I_END:
-			return 1;
+			return 0;
 		case I_JUMP:
 			pc = pc->x;
 			break;
 		case I_SPLIT:
 			scratch = *out;
-			if (match(pc->x, sp, bol, flags, &scratch)) {
+			result = match(pc->x, sp, bol, flags, &scratch, depth+1);
+			if (result == -1)
+				return -1;
+			if (result == 0) {
 				*out = scratch;
-				return 1;
+				return 0;
 			}
 			pc = pc->y;
 			break;
 
 		case I_PLA:
-			if (!match(pc->x, sp, bol, flags, out))
-				return 0;
+			result = match(pc->x, sp, bol, flags, out, depth+1);
+			if (result == -1)
+				return -1;
+			if (result == 1)
+				return 1;
 			pc = pc->y;
 			break;
 		case I_NLA:
 			scratch = *out;
-			if (match(pc->x, sp, bol, flags, &scratch))
-				return 0;
+			result = match(pc->x, sp, bol, flags, &scratch, depth+1);
+			if (result == -1)
+				return -1;
+			if (result == 0)
+				return 1;
 			pc = pc->y;
 			break;
 
@@ -1004,37 +1019,37 @@
 		case I_ANYNL:
 			sp += chartorune(&c, sp);
 			if (c == 0)
-				return 0;
+				return 1;
 			pc = pc + 1;
 			break;
 		case I_ANY:
 			sp += chartorune(&c, sp);
 			if (c == 0)
-				return 0;
+				return 1;
 			if (isnewline(c))
-				return 0;
+				return 1;
 			pc = pc + 1;
 			break;
 		case I_CHAR:
 			sp += chartorune(&c, sp);
 			if (c == 0)
-				return 0;
+				return 1;
 			if (flags & REG_ICASE)
 				c = canon(c);
 			if (c != pc->c)
-				return 0;
+				return 1;
 			pc = pc + 1;
 			break;
 		case I_CCLASS:
 			sp += chartorune(&c, sp);
 			if (c == 0)
-				return 0;
+				return 1;
 			if (flags & REG_ICASE) {
 				if (!incclasscanon(pc->cc, canon(c)))
-					return 0;
+					return 1;
 			} else {
 				if (!incclass(pc->cc, c))
-					return 0;
+					return 1;
 			}
 			pc = pc + 1;
 			break;
@@ -1041,13 +1056,13 @@
 		case I_NCCLASS:
 			sp += chartorune(&c, sp);
 			if (c == 0)
-				return 0;
+				return 1;
 			if (flags & REG_ICASE) {
 				if (incclasscanon(pc->cc, canon(c)))
-					return 0;
+					return 1;
 			} else {
 				if (incclass(pc->cc, c))
-					return 0;
+					return 1;
 			}
 			pc = pc + 1;
 			break;
@@ -1055,10 +1070,10 @@
 			i = out->sub[pc->n].ep - out->sub[pc->n].sp;
 			if (flags & REG_ICASE) {
 				if (strncmpcanon(sp, out->sub[pc->n].sp, i))
-					return 0;
+					return 1;
 			} else {
 				if (strncmp(sp, out->sub[pc->n].sp, i))
-					return 0;
+					return 1;
 			}
 			if (i > 0)
 				sp += i;
@@ -1076,7 +1091,7 @@
 					break;
 				}
 			}
-			return 0;
+			return 1;
 		case I_EOL:
 			if (*sp == 0) {
 				pc = pc + 1;
@@ -1088,12 +1103,12 @@
 					break;
 				}
 			}
-			return 0;
+			return 1;
 		case I_WORD:
 			i = sp > bol && iswordchar(sp[-1]);
 			i ^= iswordchar(sp[0]);
 			if (!i)
-				return 0;
+				return 1;
 			pc = pc + 1;
 			break;
 		case I_NWORD:
@@ -1100,7 +1115,7 @@
 			i = sp > bol && iswordchar(sp[-1]);
 			i ^= iswordchar(sp[0]);
 			if (i)
-				return 0;
+				return 1;
 			pc = pc + 1;
 			break;
 
@@ -1113,7 +1128,7 @@
 			pc = pc + 1;
 			break;
 		default:
-			return 0;
+			return 1;
 		}
 	}
 }
@@ -1130,7 +1145,7 @@
 	for (i = 0; i < MAXSUB; ++i)
 		sub->sub[i].sp = sub->sub[i].ep = NULL;
 
-	return !match(prog->start, sp, sp, prog->flags | eflags, sub);
+	return match(prog->start, sp, sp, prog->flags | eflags, sub, 0);
 }
 
 #ifdef TEST