ref: bfbbf051c90844ab0e3e18c10d646013b17e38d9
parent: c2026ba77cad42c4cb4b277dd80061c53d79361c
author: JeffBezanson <jeff.bezanson@gmail.com>
date: Tue May 19 23:39:20 EDT 2009
doing a better job hashing circular structure. the hash function is now always fast and gives conservative correct answers, at the expense of fidelity on medium and large size structures. for example (hash (iota 15)) gives the same result as (hash (iota 14)).
--- a/femtolisp/equal.c
+++ b/femtolisp/equal.c
@@ -165,6 +165,7 @@
static value_t cyc_compare(value_t a, value_t b, htable_t *table, int eq)
{
+ cyc_compare_top:
if (a==b)
return fixnum(0);
if (iscons(a)) {
@@ -199,7 +200,9 @@
eq_union(table, a, b, ca, cb);
d = cyc_compare(aa, ab, table, eq);
if (numval(d)!=0) return d;
- return cyc_compare(da, db, table, eq);
+ a = da;
+ b = db;
+ goto cyc_compare_top;
}
else {
return fixnum(1);
@@ -256,11 +259,9 @@
#define doublehash(a) int64to32hash(a)
#endif
-// *flag means max recursion bound exceeded
-// *ut means this happened some time, so we had to start using the table
-static uptrint_t bounded_hash(value_t a, int bound, int *flag, int *ut)
+// *ut means we had to start using the table
+static uptrint_t bounded_hash(value_t a, int bound, int *ut)
{
- *flag = 0;
double d;
numerictype_t nt;
size_t i, len;
@@ -273,8 +274,7 @@
if (h != (uptrint_t)HT_NOTFOUND)
return h;
}
- if (bound <= 0) { *ut = *flag = 1; return 0; }
- int bb, tg = tag(a);
+ int tg = tag(a);
switch(tg) {
case TAG_NUM :
case TAG_NUM1:
@@ -282,7 +282,7 @@
return doublehash(*(int64_t*)&d);
case TAG_FUNCTION:
if (uintval(a) > N_BUILTINS)
- return bounded_hash(((function_t*)ptr(a))->bcode, bound, flag, ut);
+ return bounded_hash(((function_t*)ptr(a))->bcode, bound, ut);
return inthash(a);
case TAG_SYM:
return ((symbol_t*)ptr(a))->hash;
@@ -297,32 +297,38 @@
data = cv_data(cv);
return memhash(data, cv_len(cv));
case TAG_VECTOR:
+ if (bound <= 0) {
+ h = ++(*ut) + (uptrint_t)HT_NOTFOUND;
+ ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h);
+ return h;
+ }
len = vector_size(a);
for(i=0; i < len; i++) {
- h = MIX(h, bounded_hash(vector_elt(a,i), bound-1, flag, ut));
- if (*flag) {
- if (h == (uptrint_t)HT_NOTFOUND) h++;
- ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h);
- }
+ h = MIX(h, bounded_hash(vector_elt(a,i), bound-1, ut)+1);
}
return h;
case TAG_CONS:
+ if (bound <= 0)
+ return 1;
+ return MIX(bounded_hash(car_(a), bound/2, ut),
+ bounded_hash(cdr_(a), bound/2, ut)+2);
+ // this should be able to hash long lists with greater fidelity,
+ // but it does not work yet.
+ /*
+ first = a;
bb = BOUNDED_HASH_BOUND;
do {
- h = MIX(h, bounded_hash(car_(a), bound-1, flag, ut)+1);
- if (*flag) {
- if (h == (uptrint_t)HT_NOTFOUND) h++;
- ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h);
- }
+ h = MIX(h, bounded_hash(car_(a), bound-1, ut));
a = cdr_(a);
bb--;
- if (bb <= 0) { *ut = *flag = 1; return h; }
- if (*ut) {
- if (ptrhash_get(&equal_eq_hashtable, (void*)a) != HT_NOTFOUND)
- return h;
+ if (bb <= 0) {
+ *ut = 1;
+ ptrhash_put(&equal_eq_hashtable, (void*)first, (void*)h);
+ return h;
}
} while (iscons(a));
- return MIX(h, bounded_hash(a, bound-1, flag, ut)+1);
+ return MIX(h, bounded_hash(a, bound-1, ut));
+ */
}
return 0;
}
@@ -336,8 +342,8 @@
uptrint_t hash_lispvalue(value_t a)
{
- int flag, ut=0;
- uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &flag, &ut);
+ int ut=0;
+ uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &ut);
if (ut)
htable_reset(&equal_eq_hashtable, 512);
return n;
--- a/femtolisp/flisp.c
+++ b/femtolisp/flisp.c
@@ -761,7 +761,6 @@
- put the stack in this state
- provide arg count
- respect tail position
- - call correct entry point (either eval_sexpr or apply_cl)
- restore SP
callee's responsibility:
--- a/femtolisp/test.lsp
+++ b/femtolisp/test.lsp
@@ -35,14 +35,6 @@
;(set! a (map-int identity 10000))
;(dotimes (i 200) (rfoldl cons () a))
-; iterative filter
-(define (ifilter pred lst)
- ((label f (lambda (accum lst)
- (cond ((null? lst) (nreverse accum))
- ((not (pred (car lst))) (f accum (cdr lst)))
- (#t (f (cons (car lst) accum) (cdr lst))))))
- () lst))
-
(define (sort l)
(if (or (null? l) (null? (cdr l))) l
(let* ((piv (car l))
@@ -76,11 +68,10 @@
`((lambda (,name) (set! ,name ,fn)) ()))
(define (square x) (* x x))
-(define (evenp x) (= x (* (/ x 2) 2)))
(define (expt b p)
(cond ((= p 0) 1)
((= b 0) 0)
- ((evenp p) (square (expt b (/ p 2))))
+ ((even? p) (square (expt b (div0 p 2))))
(#t (* b (expt b (- p 1))))))
(define (gcd a b)
--- a/femtolisp/unittest.lsp
+++ b/femtolisp/unittest.lsp
@@ -109,5 +109,50 @@
(18 . e) (15 . a) (12 . a) (10 . e) (6 . d) (5 . c) (4 . e)
(3 . d) (2 . c) (0 . b) (1 . a))))
+; hashing strange things
+(assert (equal?
+ (hash '#0=(1 1 #0# . #0#))
+ (hash '#1=(1 1 #1# 1 1 #1# . #1#))))
+
+(assert (not (equal?
+ (hash '#0=(1 1 #0# . #0#))
+ (hash '#1=(1 2 #1# 1 1 #1# . #1#)))))
+
+(assert (equal?
+ (hash '#0=((1 . #0#) . #0#))
+ (hash '#1=((1 . #1#) (1 . #1#) . #1#))))
+
+(assert (not (equal?
+ (hash '#0=((1 . #0#) . #0#))
+ (hash '#1=((2 . #1#) (1 . #1#) . #1#)))))
+
+(assert (not (equal?
+ (hash '#0=((1 . #0#) . #0#))
+ (hash '#1=((1 . #1#) (2 . #1#) . #1#)))))
+
+(assert (equal?
+ (hash #0=[1 [2 [#0#]] 3])
+ (hash #1=[1 [2 [[1 [2 [#1#]] 3]]] 3])))
+
+(assert (not (equal?
+ (hash #0=[1 [2 [#0#]] 3])
+ (hash #1=[1 [2 [[5 [2 [#1#]] 3]]] 3]))))
+
+(assert (equal?
+ (hash #0=[1 #0# [2 [#0#]] 3])
+ (hash #1=[1 #1# [2 [[1 #1# [2 [#1#]] 3]]] 3])))
+
+(assert (not (equal?
+ (hash #0=[1 #0# [2 [#0#]] 3])
+ (hash #1=[6 #1# [2 [[1 #1# [2 [#1#]] 3]]] 3]))))
+
+(assert (equal?
+ (hash [1 [2 [[1 1 [2 [1]] 3]]] 3])
+ (hash [1 [2 [[1 1 [2 [1]] 3]]] 3])))
+
+(assert (not (equal?
+ (hash [6 1 [2 [[3 1 [2 [1]] 3]]] 3])
+ (hash [6 1 [2 [[1 1 [2 [1]] 3]]] 3]))))
+
(princ "all tests pass\n")
#t