shithub: mc

Download patch

ref: 8b110be262c4839c498c09b2bd803f24a8b8e9b7
parent: fc888548ad40d62ba16abc7ab5e38b993bd4e59a
author: Ori Bernstein <ori@eigenstate.org>
date: Tue Aug 25 20:19:40 EDT 2015

Test and fix hash tables.

--- a/libstd/htab.myr
+++ b/libstd/htab.myr
@@ -10,6 +10,7 @@
 		eq	: (a : @k, b : @k -> bool)
 
 		nelt	: size
+		ndead	: size
 		keys	: @k[:]
 		vals	: @v[:]
 		hashes	: uint32[:]
@@ -28,6 +29,8 @@
 
 const Initsz = 32
 
+extern const put : (fmt : byte[:], args : ... -> size)
+
 generic hash = {ht, k
 	var h
 
@@ -39,12 +42,11 @@
 	;;
 }
 
-generic resize = {ht
+generic resize = {ht, sz
 	var oldk
 	var oldv
 	var oldh
 	var oldd
-	var sz
 	var i
 
 	oldk = ht.keys
@@ -51,7 +53,6 @@
 	oldv = ht.vals
 	oldh = ht.hashes
 	oldd = ht.dead
-	sz = 2*max(ht.keys.len, 1)
 	ht.keys = slalloc(sz)
 	ht.vals = slalloc(sz)
 	ht.hashes = slzalloc(sz)
@@ -58,6 +59,7 @@
 	ht.dead = slzalloc(sz)
 
 	ht.nelt = 0
+	ht.ndead = 0
 	for i = 0; i < oldk.len; i++
 		if oldh[i] != 0 && !oldd[i]
 			htput(ht, oldk[i], oldv[i])
@@ -82,15 +84,14 @@
 			i = (h + di) & (ht.keys.len - 1)
 		;;
 
-		if ht.hashes[i] == 0 || ht.dead[i]
+		if ht.hashes[i] == 0
 			-> `None
 		;;
-		if ht.eq(ht.keys[i], k)
+		if ht.hashes[i] == h && !ht.dead[i] && ht.eq(ht.keys[i], k)
 			break
-		else
-			di++
-			i = (h + di) & (ht.keys.len - 1)
 		;;
+		di++
+		i = (h + di) & (ht.keys.len - 1)
 	;;
 	-> `Some i
 }
@@ -104,6 +105,7 @@
 	ht.eq = eq
 
 	ht.nelt = 0
+	ht.ndead = 0
 	ht.keys = slalloc(Initsz)
 	ht.vals = slalloc(Initsz)
 	ht.hashes = slzalloc(Initsz)
@@ -129,14 +131,12 @@
 	i = h & (ht.keys.len - 1)
 	neltincr = 1
 	while ht.hashes[i] != 0 && !ht.dead[i]
-		/*
-		second insertion just overwrites first.
-		nb: comparing keys for dead values is bad.
-		*/
-		if ht.hashes[i] == h
+		/* second insertion overwrites */
+		if ht.hashes[i] == h && !ht.dead[i]
+			/* dead key, we can just insert here */
 			if ht.dead[i]
 				break
-			/* replacing a key shouldn't grow the element count */
+			/* replacing a key */
 			elif ht.eq(ht.keys[i], k)
 				neltincr = 0
 				break
@@ -151,7 +151,7 @@
 	ht.vals[i] = v
 	ht.dead[i] = false
 	if ht.keys.len < ht.nelt * 2
-		resize(ht)
+		resize(ht, 2*ht.keys.len)
 	;;
 }
 
@@ -160,9 +160,11 @@
 	| `Some i:
 		ht.dead[i] = true
 		ht.nelt--
+		ht.ndead++
+		std.put("ndead = {}\n", ht.ndead)
 		/* remove tombstones if we shrink enough */
-		if ht.keys.len > ht.nelt * 4
-			resize(ht)
+		if ht.keys.len < ht.ndead * 4
+			resize(ht, ht.keys.len)
 		;;
 	| _:	
 		/* do nothing */
--- /dev/null
+++ b/libstd/test/htab.myr
@@ -1,0 +1,104 @@
+use std
+
+const insertion = {
+	/*
+	var ht
+	var i
+
+	ht = std.mkht(idhash, ideq)
+	/* only a few values; shouldn't trigger growth */
+	for i = 0; i < 5; i++
+		std.htput(ht, i, i)
+	;;
+	for i = 0; i < 5; i++
+		std.assert(std.htgetv(ht, i, -1) == i, "returned incorrect value from hash table")
+	;;
+
+	/* and grow */
+	for i = 0; i < 5000; i++
+		std.htput(ht, i, i)
+	;;
+	for i = 0; i < 5000; i++
+		std.assert(std.htgetv(ht, i, -1) == i, "returned incorrect value from hash table")
+	;;
+	*/
+}
+
+const deletion = {
+	var ht
+	var i
+
+	ht = std.mkht(idhash, ideq)
+	/* create a hash table with a few hundred values */
+	for i = 0; i < 4000; i++
+		std.htput(ht, i, i)
+	;;
+	for i = 0; i < 200; i++
+		std.htdel(ht, i*2)
+	;;
+	for i = 0; i < 200; i++
+		std.assert(!std.hthas(ht, i*2), "deleted item still present")
+	;;
+	for i = 0; i < 200; i++
+		std.assert(std.hthas(ht, i*2+1), "undeleted item missing")
+	;;
+	for i = 400; i < 4000; i++
+		std.assert(std.hthas(ht, i), "undeleted item missing")
+	;;
+
+}
+
+const collision = {
+	var ht
+	var i
+
+	ht = std.mkht(idhash, ideq)
+	/* insert an element a few hundred times */
+	for i = 0; i < 500; i++
+		std.htput(ht, 0, i)
+	;;
+	std.assert(std.hthas(ht, 0), "inserted element not present")
+	std.assert(std.htgetv(ht, 0, -1) == 499, "inserted element has wrong value")
+	std.htdel(ht, 0)
+	std.assert(!std.hthas(ht, 0), "element left in table")
+}
+
+const tombstonefill = {
+	var ht
+	var i
+
+	ht = std.mkht(idhash, ideq)
+	/* 
+	insert an element into each slot in the hash table, and
+	delete it. With direct hashing, this is guaranteed to have
+	put a tombstone into each slot.
+	*/
+	for i = 0; i <= ht.keys.len; i++
+		std.htput(ht, i, i)
+		std.htdel(ht, i)
+	;;
+	/* make sure we haven't actually got anything in the table */
+	std.assert(ht.nelt == 0, "elements left in hash table")
+	std.assert(!std.hthas(ht, 1), "found phantom element")
+}
+
+const main = {
+	/* only a few elements */
+	std.put("insertion\n")
+	insertion()
+	std.put("deletion\n")
+	deletion()
+	std.put("collision\n")
+	collision()
+
+	/* what happens if we try to fill everything up with tombstones? */
+	tombstonefill()
+}
+
+const idhash = {x
+	-> x
+}
+
+const ideq = {a, b
+	-> a == b
+}
--- a/mk/c.mk
+++ b/mk/c.mk
@@ -6,7 +6,7 @@
 _LIBINCPATHS=$(addprefix -I, $(dir $(DEPS)))
 _LIBPATHS=$(addprefix -l, $(patsubst lib%.a,%,$(notdir $(DEPS))))
 
-CFLAGS += -O0 -Wall -Werror -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -g
+CFLAGS += -O -Wall -Werror -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -g
 CFLAGS += -MMD -MP -MF ${_DEPSDIR}/$(subst /,-,$*).d
 
 LIB ?= $(INSTLIB)
--- a/parse/htab.c
+++ b/parse/htab.c
@@ -150,7 +150,7 @@
     }
     if (!ht->hashes[i]) 
         return -1;
-    if (!ht->cmp(ht->keys[i], k) || (ht->hashes[i] == h && ht->dead[i]))
+    if ((ht->hashes[i] == h && ht->dead[i]) || !ht->cmp(ht->keys[i], k))
         goto searchmore; /* collision */
     return i;
 }