ref: 8b110be262c4839c498c09b2bd803f24a8b8e9b7
parent: fc888548ad40d62ba16abc7ab5e38b993bd4e59a
author: Ori Bernstein <ori@eigenstate.org>
date: Tue Aug 25 20:19:40 EDT 2015
Test and fix hash tables.
--- a/libstd/htab.myr
+++ b/libstd/htab.myr
@@ -10,6 +10,7 @@
eq : (a : @k, b : @k -> bool)
nelt : size
+ ndead : size
keys : @k[:]
vals : @v[:]
hashes : uint32[:]
@@ -28,6 +29,8 @@
const Initsz = 32
+extern const put : (fmt : byte[:], args : ... -> size)
+
generic hash = {ht, k
var h
@@ -39,12 +42,11 @@
;;
}
-generic resize = {ht
+generic resize = {ht, sz
var oldk
var oldv
var oldh
var oldd
- var sz
var i
oldk = ht.keys
@@ -51,7 +53,6 @@
oldv = ht.vals
oldh = ht.hashes
oldd = ht.dead
- sz = 2*max(ht.keys.len, 1)
ht.keys = slalloc(sz)
ht.vals = slalloc(sz)
ht.hashes = slzalloc(sz)
@@ -58,6 +59,7 @@
ht.dead = slzalloc(sz)
ht.nelt = 0
+ ht.ndead = 0
for i = 0; i < oldk.len; i++
if oldh[i] != 0 && !oldd[i]
htput(ht, oldk[i], oldv[i])
@@ -82,15 +84,14 @@
i = (h + di) & (ht.keys.len - 1)
;;
- if ht.hashes[i] == 0 || ht.dead[i]
+ if ht.hashes[i] == 0
-> `None
;;
- if ht.eq(ht.keys[i], k)
+ if ht.hashes[i] == h && !ht.dead[i] && ht.eq(ht.keys[i], k)
break
- else
- di++
- i = (h + di) & (ht.keys.len - 1)
;;
+ di++
+ i = (h + di) & (ht.keys.len - 1)
;;
-> `Some i
}
@@ -104,6 +105,7 @@
ht.eq = eq
ht.nelt = 0
+ ht.ndead = 0
ht.keys = slalloc(Initsz)
ht.vals = slalloc(Initsz)
ht.hashes = slzalloc(Initsz)
@@ -129,14 +131,12 @@
i = h & (ht.keys.len - 1)
neltincr = 1
while ht.hashes[i] != 0 && !ht.dead[i]
- /*
- second insertion just overwrites first.
- nb: comparing keys for dead values is bad.
- */
- if ht.hashes[i] == h
+ /* second insertion overwrites */
+ if ht.hashes[i] == h && !ht.dead[i]
+ /* dead key, we can just insert here */
if ht.dead[i]
break
- /* replacing a key shouldn't grow the element count */
+ /* replacing a key */
elif ht.eq(ht.keys[i], k)
neltincr = 0
break
@@ -151,7 +151,7 @@
ht.vals[i] = v
ht.dead[i] = false
if ht.keys.len < ht.nelt * 2
- resize(ht)
+ resize(ht, 2*ht.keys.len)
;;
}
@@ -160,9 +160,11 @@
| `Some i:
ht.dead[i] = true
ht.nelt--
+ ht.ndead++
+ std.put("ndead = {}\n", ht.ndead)
/* remove tombstones if we shrink enough */
- if ht.keys.len > ht.nelt * 4
- resize(ht)
+ if ht.keys.len < ht.ndead * 4
+ resize(ht, ht.keys.len)
;;
| _:
/* do nothing */
--- /dev/null
+++ b/libstd/test/htab.myr
@@ -1,0 +1,104 @@
+use std
+
+const insertion = {
+ /*
+ var ht
+ var i
+
+ ht = std.mkht(idhash, ideq)
+ /* only a few values; shouldn't trigger growth */
+ for i = 0; i < 5; i++
+ std.htput(ht, i, i)
+ ;;
+ for i = 0; i < 5; i++
+ std.assert(std.htgetv(ht, i, -1) == i, "returned incorrect value from hash table")
+ ;;
+
+ /* and grow */
+ for i = 0; i < 5000; i++
+ std.htput(ht, i, i)
+ ;;
+ for i = 0; i < 5000; i++
+ std.assert(std.htgetv(ht, i, -1) == i, "returned incorrect value from hash table")
+ ;;
+ */
+}
+
+const deletion = {
+ var ht
+ var i
+
+ ht = std.mkht(idhash, ideq)
+ /* create a hash table with a few hundred values */
+ for i = 0; i < 4000; i++
+ std.htput(ht, i, i)
+ ;;
+ for i = 0; i < 200; i++
+ std.htdel(ht, i*2)
+ ;;
+ for i = 0; i < 200; i++
+ std.assert(!std.hthas(ht, i*2), "deleted item still present")
+ ;;
+ for i = 0; i < 200; i++
+ std.assert(std.hthas(ht, i*2+1), "undeleted item missing")
+ ;;
+ for i = 400; i < 4000; i++
+ std.assert(std.hthas(ht, i), "undeleted item missing")
+ ;;
+
+}
+
+const collision = {
+ var ht
+ var i
+
+ ht = std.mkht(idhash, ideq)
+ /* insert an element a few hundred times */
+ for i = 0; i < 500; i++
+ std.htput(ht, 0, i)
+ ;;
+ std.assert(std.hthas(ht, 0), "inserted element not present")
+ std.assert(std.htgetv(ht, 0, -1) == 499, "inserted element has wrong value")
+ std.htdel(ht, 0)
+ std.assert(!std.hthas(ht, 0), "element left in table")
+}
+
+const tombstonefill = {
+ var ht
+ var i
+
+ ht = std.mkht(idhash, ideq)
+ /*
+ insert an element into each slot in the hash table, and
+ delete it. With direct hashing, this is guaranteed to have
+ put a tombstone into each slot.
+ */
+ for i = 0; i <= ht.keys.len; i++
+ std.htput(ht, i, i)
+ std.htdel(ht, i)
+ ;;
+ /* make sure we haven't actually got anything in the table */
+ std.assert(ht.nelt == 0, "elements left in hash table")
+ std.assert(!std.hthas(ht, 1), "found phantom element")
+}
+
+const main = {
+ /* only a few elements */
+ std.put("insertion\n")
+ insertion()
+ std.put("deletion\n")
+ deletion()
+ std.put("collision\n")
+ collision()
+
+ /* what happens if we try to fill everything up with tombstones? */
+ tombstonefill()
+}
+
+const idhash = {x
+ -> x
+}
+
+const ideq = {a, b
+ -> a == b
+}
--- a/mk/c.mk
+++ b/mk/c.mk
@@ -6,7 +6,7 @@
_LIBINCPATHS=$(addprefix -I, $(dir $(DEPS)))
_LIBPATHS=$(addprefix -l, $(patsubst lib%.a,%,$(notdir $(DEPS))))
-CFLAGS += -O0 -Wall -Werror -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -g
+CFLAGS += -O -Wall -Werror -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -g
CFLAGS += -MMD -MP -MF ${_DEPSDIR}/$(subst /,-,$*).d
LIB ?= $(INSTLIB)
--- a/parse/htab.c
+++ b/parse/htab.c
@@ -150,7 +150,7 @@
}
if (!ht->hashes[i])
return -1;
- if (!ht->cmp(ht->keys[i], k) || (ht->hashes[i] == h && ht->dead[i]))
+ if ((ht->hashes[i] == h && ht->dead[i]) || !ht->cmp(ht->keys[i], k))
goto searchmore; /* collision */
return i;
}