shithub: mc

ref: 76fc43acecea75b362d7e56557a0466cbda94572
dir: /lib/std/hashfuncs.myr/

View raw version
use "alloc"
use "chartype"
use "die"
use "getint"
use "sleq"
use "slpush"
use "types"
use "utf"

pkg std =
	const strhash	: (s : byte[:]	-> uint64)
	const streq	: (a : byte[:], b : byte[:]	-> bool)

	const strcasehash	: (s : byte[:]	-> uint64)
	const strcaseeq	: (a : byte[:], b : byte[:]	-> bool)

	generic ptrhash	: (p : @a#	-> uint64)
	generic ptreq	: (a : @a#, b : @a#	-> bool)

	generic inthash	: (v : @a::(integral,numeric)	-> uint64)
	generic inteq	: (a : @a::(integral,numeric), b : @a::(integral,numeric) -> bool)

	const murmurhash2	: (data : byte[:], seed : uint32 -> uint32)
	const siphash24	: (data : byte[:], seed : byte[16] -> uint64)

	generic slhash	: (sl : @a[:] -> uint64)
;;

const Seed : byte[16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]

generic slhash = {data : @a[:]
	-> strhash(slbytes(data))
}

generic slbytes = {data : @a[:]
	var n

	n = data.len * sizeof(@a)
	-> (data : byte#)[:n]
}

const strhash = {s
	-> siphash24(s, Seed)
}

const strcaseeq = {a, b
	var ca, cb

	while true
		if a.len == 0 || b.len == 0
			break
		;;
		(ca, a) = std.charstep(a)
		(cb, b) = std.charstep(b)
		if std.tolower(ca) != std.tolower(cb)
			-> false
		;;
	;;
	-> a.len == b.len
}

const strcasehash = {s
	var chars
	var c, h

	chars = [][:]
	while s.len != 0
		(c, s) = std.charstep(s)
		std.slpush(&chars, std.tolower(c))
	;;
	h = siphash24(slbytes(chars), Seed)
	slfree(chars)
	-> h
}

const streq = {a, b
	-> sleq(a, b)
}

generic ptrhash = {p : @a#
	-> inthash((p : intptr))
}

generic ptreq = {a, b
	-> a == b
}

generic inthash = {v : @a::(integral,numeric)
	var p

	p = (&v : byte#)
	-> siphash24(p[0:sizeof(@a)], Seed)
}

generic inteq = {a, b
	-> a == b
}

const murmurhash2 = {data, seed
	const m = 0x5bd1e995;
	const r = 24
	var h, k
	
	h = seed ^ data.len
	while data.len >= 4
		k = (data[0] : uint32)
		k |= (data[1] : uint32) << 8
		k |= (data[2] : uint32) << 16
		k |= (data[3] : uint32) << 24

		k *= m
		k ^= k >> r
		k *= m

		h *= m
		h ^= k
		data = data[4:]
	;;

	match data.len
	| 3:
		h ^= (data[2] : uint32) << 16
		h ^= (data[1] : uint32) << 8
		h ^= (data[0] : uint32)
		h *= m
	| 2:
		h ^= (data[1] : uint32) << 8
		h ^= (data[0] : uint32)
		h *= m
	| 1:
		h ^= (data[0] : uint32)
		h *= m
	| 0:	/* nothing */
	| _:	die("0 < len < 4 must be true")
	;;

	h ^= h >> 13
	h *= m
	h ^= h >> 15

	-> h
}

const sipround = {v0, v1, v2, v3 -> (uint64, uint64, uint64, uint64)
	v0 += v1
	v1 = (v1 << 13) | (v1 >> 51)
	v1 ^= v0
	v0 = (v0 << 32) | (v0 >> 32)
	v2 += v3
	v3 = (v3 << 16) | (v3 >> 48)
	v3 ^= v2

	v2 += v1
	v1 = (v1 << 17) | (v1 >> 47)
	v1 ^= v2
	v2 = (v2 << 32) | (v2 >> 32)
	v0 += v3
	v3 = (v3 << 21) | (v3 >> 43)
	v3 ^= v0

	-> (v0, v1, v2, v3)
}

const siphash24 = {data, seed
	var k0, k1, m, v0, v1, v2, v3, w
	var tail : byte[8] = [0, 0, 0, 0, 0, 0, 0, 0]

	k0 = std.getle64(seed[0:8])
	k1 = std.getle64(seed[8:16])
	v0 = k0 ^ 0x736f6d6570736575
	v1 = k1 ^ 0x646f72616e646f6d
	v2 = k0 ^ 0x6c7967656e657261
	v3 = k1 ^ 0x7465646279746573
	w = (data.len + 8) / 8 - 1
	for var i = 0; i < w; i++
		m = std.getle64(data[8 * i:8 * (i + 1)])
		v3 ^= m
		(v0, v1, v2, v3) = sipround(v0, v1, v2, v3)
		(v0, v1, v2, v3) = sipround(v0, v1, v2, v3)
		v0 ^= m
	;;
	for var i = 0; i < data.len % 8; i++
		tail[i] = data[8 * w + i]
	;;
	tail[7] = (data.len % 256 : byte)
	m = std.getle64(tail[:])
	v3 ^= m
	(v0, v1, v2, v3) = sipround(v0, v1, v2, v3)
	(v0, v1, v2, v3) = sipround(v0, v1, v2, v3)
	v0 ^= m

	v2 ^= 0xff
	(v0, v1, v2, v3) = sipround(v0, v1, v2, v3)
	(v0, v1, v2, v3) = sipround(v0, v1, v2, v3)
	(v0, v1, v2, v3) = sipround(v0, v1, v2, v3)
	(v0, v1, v2, v3) = sipround(v0, v1, v2, v3)
	-> v0 ^ v1 ^ v2 ^ v3
}