shithub: libmujs

Download patch

ref: 3f71a1c946093b8d2390cc2f738063cb75bb5a17
parent: ac03b95b184b70f3fafc8f285bc2c4a796698035
author: Tor Andersson <tor.andersson@artifex.com>
date: Wed Aug 31 10:10:14 EDT 2022

Fast path for "simple" arrays.

An array without holes and with only integer properties can be represented
with a "flat" array part that allows for O(1) property access.

If we ever add a non-integer property, create holes in the array,
the whole array is unpacked into a normal string-keyed object.

Also add fast integer indexing to be used on these arrays, before falling
back to converting the integer to a string property lookup.

Use JS_ARRAYLIMIT to restrict size of arrays to avoid integer overflows and out
of memory thrashing.

--- a/jsarray.c
+++ b/jsarray.c
@@ -17,30 +17,6 @@
 	js_setproperty(J, idx < 0 ? idx - 1 : idx, "length");
 }
 
-int js_hasindex(js_State *J, int idx, int i)
-{
-	char buf[32];
-	return js_hasproperty(J, idx, js_itoa(buf, i));
-}
-
-void js_getindex(js_State *J, int idx, int i)
-{
-	char buf[32];
-	js_getproperty(J, idx, js_itoa(buf, i));
-}
-
-void js_setindex(js_State *J, int idx, int i)
-{
-	char buf[32];
-	js_setproperty(J, idx, js_itoa(buf, i));
-}
-
-void js_delindex(js_State *J, int idx, int i)
-{
-	char buf[32];
-	js_delproperty(J, idx, js_itoa(buf, i));
-}
-
 static void jsB_new_Array(js_State *J)
 {
 	int i, top = js_gettop(J);
--- a/jsgc.c
+++ b/jsgc.c
@@ -42,6 +42,8 @@
 		js_free(J, obj->u.r.source);
 		js_regfreex(J->alloc, J->actx, obj->u.r.prog);
 	}
+	if (obj->type == JS_CARRAY && obj->u.a.simple)
+		js_free(J, obj->u.a.array);
 	if (obj->type == JS_CITERATOR)
 		jsG_freeiterator(J, obj->u.iter.head);
 	if (obj->type == JS_CUSERDATA && obj->u.user.finalize)
@@ -100,6 +102,16 @@
 		jsG_markproperty(J, mark, obj->properties);
 	if (obj->prototype && obj->prototype->gcmark != mark)
 		jsG_markobject(J, mark, obj->prototype);
+	if (obj->type == JS_CARRAY && obj->u.a.simple) {
+		int i;
+		for (i = 0; i < obj->u.a.length; ++i) {
+			js_Value *v = &obj->u.a.array[i];
+			if (v->type == JS_TMEMSTR && v->u.memstr->gcmark != mark)
+				v->u.memstr->gcmark = mark;
+			if (v->type == JS_TOBJECT && v->u.object->gcmark != mark)
+				jsG_markobject(J, mark, v->u.object);
+		}
+	}
 	if (obj->type == JS_CITERATOR && obj->u.iter.target->gcmark != mark) {
 		jsG_markobject(J, mark, obj->u.iter.target);
 	}
--- a/jsi.h
+++ b/jsi.h
@@ -86,6 +86,9 @@
 #ifndef JS_TRYLIMIT
 #define JS_TRYLIMIT 64		/* exception stack size */
 #endif
+#ifndef JS_ARRAYLIMIT
+#define JS_ARRAYLIMIT (1<<26)	/* limit arrays to 64M entries (1G of flat array data) */
+#endif
 #ifndef JS_GCFACTOR
 /*
  * GC will try to trigger when memory usage is this value times the minimum
--- a/jsobject.c
+++ b/jsobject.c
@@ -62,7 +62,24 @@
 {
 	js_Object *self = js_toobject(J, 0);
 	const char *name = js_tostring(J, 1);
-	js_Property *ref = jsV_getownproperty(J, self, name);
+	js_Property *ref;
+	int k;
+
+	if (self->type == JS_CSTRING) {
+		if (js_isarrayindex(J, name, &k) && k >= 0 && k < self->u.s.length) {
+			js_pushboolean(J, 1);
+			return;
+		}
+	}
+
+	if (self->type == JS_CARRAY && self->u.a.simple) {
+		if (js_isarrayindex(J, name, &k) && k >= 0 && k < self->u.a.length) {
+			js_pushboolean(J, 1);
+			return;
+		}
+	}
+
+	ref = jsV_getownproperty(J, self, name);
 	js_pushboolean(J, ref != NULL);
 }
 
@@ -110,9 +127,10 @@
 		js_typeerror(J, "not an object");
 	obj = js_toobject(J, 1);
 	ref = jsV_getproperty(J, obj, js_tostring(J, 2));
-	if (!ref)
+	if (!ref) {
+		// TODO: builtin properties (string and array index and length, regexp flags, etc)
 		js_pushundefined(J);
-	else {
+	} else {
 		js_newobject(J);
 		if (!ref->getter && !ref->setter) {
 			js_pushvalue(J, ref->value);
@@ -152,6 +170,7 @@
 static void O_getOwnPropertyNames(js_State *J)
 {
 	js_Object *obj;
+	char name[32];
 	int k;
 	int i;
 
@@ -169,6 +188,13 @@
 	if (obj->type == JS_CARRAY) {
 		js_pushliteral(J, "length");
 		js_setindex(J, -2, i++);
+		if (obj->u.a.simple) {
+			for (k = 0; k < obj->u.a.length; ++k) {
+				js_itoa(name, k);
+				js_pushstring(J, name);
+				js_setindex(J, -2, i++);
+			}
+		}
 	}
 
 	if (obj->type == JS_CSTRING) {
@@ -175,7 +201,8 @@
 		js_pushliteral(J, "length");
 		js_setindex(J, -2, i++);
 		for (k = 0; k < obj->u.s.length; ++k) {
-			js_pushnumber(J, k);
+			js_itoa(name, k);
+			js_pushstring(J, name);
 			js_setindex(J, -2, i++);
 		}
 	}
@@ -359,9 +386,12 @@
 
 static void O_preventExtensions(js_State *J)
 {
+	js_Object *obj;
 	if (!js_isobject(J, 1))
 		js_typeerror(J, "not an object");
-	js_toobject(J, 1)->extensible = 0;
+	obj = js_toobject(J, 1);
+	jsR_unflattenarray(J, obj);
+	obj->extensible = 0;
 	js_copy(J, 1);
 }
 
@@ -389,6 +419,7 @@
 		js_typeerror(J, "not an object");
 
 	obj = js_toobject(J, 1);
+	jsR_unflattenarray(J, obj);
 	obj->extensible = 0;
 
 	if (obj->properties->level)
@@ -446,6 +477,7 @@
 		js_typeerror(J, "not an object");
 
 	obj = js_toobject(J, 1);
+	jsR_unflattenarray(J, obj);
 	obj->extensible = 0;
 
 	if (obj->properties->level)
--- a/jsproperty.c
+++ b/jsproperty.c
@@ -1,6 +1,8 @@
 #include "jsi.h"
 #include "jsvalue.h"
 
+#include <assert.h>
+
 /*
 	Use an AA-tree to quickly look up properties in objects:
 
@@ -226,6 +228,21 @@
 
 /* Flatten hierarchy of enumerable properties into an iterator object */
 
+static js_Iterator *itnewnode(js_State *J, const char *name, js_Iterator *next) {
+	js_Iterator *node = js_malloc(J, offsetof(js_Iterator, buf));
+	node->name = name;
+	node->next = next;
+	return node;
+}
+
+static js_Iterator *itnewnodeix(js_State *J, int ix) {
+	js_Iterator *node = js_malloc(J, sizeof(js_Iterator));
+	js_itoa(node->buf, ix);
+	node->name = node->buf;
+	node->next = NULL;
+	return node;
+}
+
 static js_Iterator *itwalk(js_State *J, js_Iterator *iter, js_Property *prop, js_Object *seen)
 {
 	if (prop->right != &sentinel)
@@ -232,10 +249,7 @@
 		iter = itwalk(J, iter, prop->right, seen);
 	if (!(prop->atts & JS_DONTENUM)) {
 		if (!seen || !jsV_getenumproperty(J, seen, prop->name)) {
-			js_Iterator *head = js_malloc(J, sizeof *head);
-			head->name = prop->name;
-			head->next = iter;
-			iter = head;
+			iter = itnewnode(J, prop->name, iter);
 		}
 	}
 	if (prop->left != &sentinel)
@@ -266,6 +280,7 @@
 	} else {
 		io->u.iter.head = itflatten(J, obj);
 	}
+
 	if (obj->type == JS_CSTRING) {
 		js_Iterator *tail = io->u.iter.head;
 		if (tail)
@@ -274,9 +289,7 @@
 		for (k = 0; k < obj->u.s.length; ++k) {
 			js_itoa(buf, k);
 			if (!jsV_getenumproperty(J, obj, buf)) {
-				js_Iterator *node = js_malloc(J, sizeof *node);
-				node->name = js_intern(J, js_itoa(buf, k));
-				node->next = NULL;
+				js_Iterator *node = itnewnodeix(J, k);
 				if (!tail)
 					io->u.iter.head = tail = node;
 				else {
@@ -286,6 +299,23 @@
 			}
 		}
 	}
+
+	if (obj->type == JS_CARRAY && obj->u.a.simple) {
+		js_Iterator *tail = io->u.iter.head;
+		if (tail)
+			while (tail->next)
+				tail = tail->next;
+		for (k = 0; k < obj->u.a.length; ++k) {
+			js_Iterator *node = itnewnodeix(J, k);
+			if (!tail)
+				io->u.iter.head = tail = node;
+			else {
+				tail->next = node;
+				tail = node;
+			}
+		}
+	}
+
 	return io;
 }
 
@@ -304,6 +334,9 @@
 		if (io->u.iter.target->type == JS_CSTRING)
 			if (js_isarrayindex(J, name, &k) && k < io->u.iter.target->u.s.length)
 				return name;
+		if (io->u.iter.target->type == JS_CARRAY && io->u.iter.target->u.a.simple)
+			if (js_isarrayindex(J, name, &k) && k < io->u.iter.target->u.a.length)
+				return name;
 	}
 	return NULL;
 }
@@ -315,6 +348,7 @@
 	char buf[32];
 	const char *s;
 	int k;
+	assert(!obj->u.a.simple);
 	if (newlen < obj->u.a.length) {
 		if (obj->u.a.length > obj->count * 2) {
 			js_Object *it = jsV_newiterator(J, obj, 1);
--- a/jsrun.c
+++ b/jsrun.c
@@ -5,6 +5,8 @@
 
 #include "utf.h"
 
+#include <assert.h>
+
 static void jsR_run(js_State *J, js_Function *F);
 
 /* Push values on stack */
@@ -517,6 +519,28 @@
 	}
 }
 
+void jsR_unflattenarray(js_State *J, js_Object *obj) {
+	if (obj->type == JS_CARRAY && obj->u.a.simple) {
+		js_Property *ref;
+		int i;
+		char name[32];
+		if (js_try(J)) {
+			obj->properties = NULL;
+			js_throw(J);
+		}
+		for (i = 0; i < obj->u.a.length; ++i) {
+			js_itoa(name, i);
+			ref = jsV_setproperty(J, obj, name);
+			ref->value = obj->u.a.array[i];
+		}
+		js_free(J, obj->u.a.array);
+		obj->u.a.simple = 0;
+		obj->u.a.capacity = 0;
+		obj->u.a.array = NULL;
+		js_endtry(J);
+	}
+}
+
 static int jsR_hasproperty(js_State *J, js_Object *obj, const char *name)
 {
 	js_Property *ref;
@@ -527,6 +551,14 @@
 			js_pushnumber(J, obj->u.a.length);
 			return 1;
 		}
+		if (obj->u.a.simple) {
+			if (js_isarrayindex(J, name, &k)) {
+				if (k >= 0 && k < obj->u.a.length) {
+					js_pushvalue(J, obj->u.a.array[k]);
+					return 1;
+				}
+			}
+		}
 	}
 
 	else if (obj->type == JS_CSTRING) {
@@ -591,6 +623,45 @@
 		js_pushundefined(J);
 }
 
+static int jsR_hasindex(js_State *J, js_Object *obj, int k)
+{
+	char buf[32];
+	if (obj->type == JS_CARRAY && obj->u.a.simple && k >= 0 && k < obj->u.a.length) {
+		js_pushvalue(J, obj->u.a.array[k]);
+		return 1;
+	}
+	return jsR_hasproperty(J, obj, js_itoa(buf, k));
+}
+
+static void jsR_getindex(js_State *J, js_Object *obj, int k)
+{
+	if (!jsR_hasindex(J, obj, k))
+		js_pushundefined(J);
+}
+
+static void jsR_setarrayindex(js_State *J, js_Object *obj, int k, js_Value *value)
+{
+	int newlen = k + 1;
+	assert(obj->u.a.simple);
+	assert(k >= 0);
+	if (newlen > JS_ARRAYLIMIT)
+		js_rangeerror(J, "array too large");
+	if (newlen > obj->u.a.length) {
+		assert(newlen == obj->u.a.length + 1);
+		if (newlen > obj->u.a.capacity) {
+			int newcap = obj->u.a.capacity;
+			if (newcap == 0)
+				newcap = 8;
+			while (newcap < newlen)
+				newcap <<= 1;
+			obj->u.a.array = js_realloc(J, obj->u.a.array, newcap * sizeof(js_Value));
+			obj->u.a.capacity = newcap;
+		}
+		obj->u.a.length = newlen;
+	}
+	obj->u.a.array[k] = *value;
+}
+
 static void jsR_setproperty(js_State *J, js_Object *obj, const char *name, int transient)
 {
 	js_Value *value = stackidx(J, -1);
@@ -604,12 +675,30 @@
 			int newlen = jsV_numbertointeger(rawlen);
 			if (newlen != rawlen || newlen < 0)
 				js_rangeerror(J, "invalid array length");
+			if (newlen > JS_ARRAYLIMIT)
+				js_rangeerror(J, "array too large");
+			if (obj->u.a.simple) {
+				if (newlen <= obj->u.a.length) {
+					obj->u.a.length = newlen;
+					return;
+				}
+				jsR_unflattenarray(J, obj);
+			}
 			jsV_resizearray(J, obj, newlen);
 			return;
 		}
-		if (js_isarrayindex(J, name, &k))
-			if (k >= obj->u.a.length)
+
+		if (js_isarrayindex(J, name, &k)) {
+			if (obj->u.a.simple) {
+				if (k >= 0 && k <= obj->u.a.length) {
+					jsR_setarrayindex(J, obj, k, value);
+					return;
+				}
+				jsR_unflattenarray(J, obj);
+			}
+			if (k + 1 > obj->u.a.length)
 				obj->u.a.length = k + 1;
+		}
 	}
 
 	else if (obj->type == JS_CSTRING) {
@@ -679,6 +768,16 @@
 		js_typeerror(J, "'%s' is read-only", name);
 }
 
+static void jsR_setindex(js_State *J, js_Object *obj, int k, int transient)
+{
+	char buf[32];
+	if (obj->type == JS_CARRAY && obj->u.a.simple && k >= 0 && k <= obj->u.a.length) {
+		jsR_setarrayindex(J, obj, k, stackidx(J, -1));
+	} else {
+		jsR_setproperty(J, obj, js_itoa(buf, k), transient);
+	}
+}
+
 static void jsR_defproperty(js_State *J, js_Object *obj, const char *name,
 	int atts, js_Value *value, js_Object *getter, js_Object *setter,
 	int throw)
@@ -689,6 +788,8 @@
 	if (obj->type == JS_CARRAY) {
 		if (!strcmp(name, "length"))
 			goto readonly;
+		if (obj->u.a.simple)
+			jsR_unflattenarray(J, obj);
 	}
 
 	else if (obj->type == JS_CSTRING) {
@@ -750,6 +851,8 @@
 	if (obj->type == JS_CARRAY) {
 		if (!strcmp(name, "length"))
 			goto dontconf;
+		if (obj->u.a.simple)
+			jsR_unflattenarray(J, obj);
 	}
 
 	else if (obj->type == JS_CSTRING) {
@@ -889,6 +992,28 @@
 	return jsR_hasproperty(J, js_toobject(J, idx), name);
 }
 
+void js_getindex(js_State *J, int idx, int i)
+{
+	jsR_getindex(J, js_toobject(J, idx), i);
+}
+
+int js_hasindex(js_State *J, int idx, int i)
+{
+	return jsR_hasindex(J, js_toobject(J, idx), i);
+}
+
+void js_setindex(js_State *J, int idx, int i)
+{
+	jsR_setindex(J, js_toobject(J, idx), i, !js_isobject(J, idx));
+	js_pop(J, 1);
+}
+
+void js_delindex(js_State *J, int idx, int i)
+{
+	char buf[32];
+	js_delproperty(J, idx, js_itoa(buf, i));
+}
+
 /* Iterator */
 
 void js_pushiterator(js_State *J, int idx, int own)
@@ -1360,6 +1485,16 @@
 	js_stacktrace(J);
 }
 
+static int jsR_isindex(js_State *J, int idx, int *k)
+{
+	js_Value *v = stackidx(J, idx);
+	if (v->type == JS_TNUMBER) {
+		*k = v->u.number;
+		return *k == v->u.number && *k >= 0;
+	}
+	return 0;
+}
+
 static void jsR_run(js_State *J, js_Function *F)
 {
 	js_Function **FT = F->funtab;
@@ -1532,9 +1667,14 @@
 			break;
 
 		case OP_GETPROP:
-			str = js_tostring(J, -1);
-			obj = js_toobject(J, -2);
-			jsR_getproperty(J, obj, str);
+			if (jsR_isindex(J, -1, &ix)) {
+				obj = js_toobject(J, -2);
+				jsR_getindex(J, obj, ix);
+			} else {
+				str = js_tostring(J, -1);
+				obj = js_toobject(J, -2);
+				jsR_getproperty(J, obj, str);
+			}
 			js_rot3pop2(J);
 			break;
 
@@ -1546,10 +1686,16 @@
 			break;
 
 		case OP_SETPROP:
-			str = js_tostring(J, -2);
-			obj = js_toobject(J, -3);
-			transient = !js_isobject(J, -3);
-			jsR_setproperty(J, obj, str, transient);
+			if (jsR_isindex(J, -2, &ix)) {
+				obj = js_toobject(J, -3);
+				transient = !js_isobject(J, -3);
+				jsR_setindex(J, obj, ix, transient);
+			} else {
+				str = js_tostring(J, -2);
+				obj = js_toobject(J, -3);
+				transient = !js_isobject(J, -3);
+				jsR_setproperty(J, obj, str, transient);
+			}
 			js_rot3pop2(J);
 			break;
 
--- a/jsvalue.c
+++ b/jsvalue.c
@@ -423,7 +423,9 @@
 
 void js_newarray(js_State *J)
 {
-	js_pushobject(J, jsV_newobject(J, JS_CARRAY, J->Array_prototype));
+	js_Object *obj = jsV_newobject(J, JS_CARRAY, J->Array_prototype);
+	obj->u.a.simple = 1;
+	js_pushobject(J, obj);
 }
 
 void js_newboolean(js_State *J, int v)
--- a/jsvalue.h
+++ b/jsvalue.h
@@ -93,6 +93,9 @@
 		} s;
 		struct {
 			int length;
+			int simple; // true if array has only non-sparse array properties
+			int capacity;
+			js_Value *array;
 		} a;
 		struct {
 			js_Function *function;
@@ -140,6 +143,7 @@
 {
 	const char *name;
 	js_Iterator *next;
+	char buf[12]; /* for integer iterators */
 };
 
 /* jsrun.c */
@@ -149,6 +153,7 @@
 js_Object *js_toobject(js_State *J, int idx);
 void js_pushvalue(js_State *J, js_Value v);
 void js_pushobject(js_State *J, js_Object *v);
+void jsR_unflattenarray(js_State *J, js_Object *obj);
 
 /* jsvalue.c */
 int jsV_toboolean(js_State *J, js_Value *v);
@@ -158,7 +163,7 @@
 js_Object *jsV_toobject(js_State *J, js_Value *v);
 void jsV_toprimitive(js_State *J, js_Value *v, int preferred);
 
-const char *js_itoa(char buf[32], int a);
+const char *js_itoa(char *buf, int a);
 double js_stringtofloat(const char *s, char **ep);
 int jsV_numbertointeger(double n);
 int jsV_numbertoint32(double n);
@@ -181,6 +186,9 @@
 const char *jsV_nextiterator(js_State *J, js_Object *iter);
 
 void jsV_resizearray(js_State *J, js_Object *obj, int newlen);
+
+void jsV_unflattenarray(js_State *J, js_Object *obj);
+void jsV_growarray(js_State *J, js_Object *obj);
 
 /* jsdump.c */
 void js_dumpobject(js_State *J, js_Object *obj);