shithub: femtolisp

Download patch

ref: 8bf655dcb8c3e88224162024b1f29f1862ffc043
parent: dcefc02be84f75919752787fe81b66c9dbe982fc
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Wed Nov 6 20:46:15 EST 2024

replace lookup3 with SpookyHash

--- a/3rd/lookup3.c
+++ /dev/null
@@ -1,319 +1,0 @@
-/*
--------------------------------------------------------------------------------
-lookup3.c, by Bob Jenkins, May 2006, Public Domain.
-
-These are functions for producing 32-bit hashes for hash table lookup.
-hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
-are externally useful functions.  You can use this free for any purpose.
-It's in the public domain.  It has no warranty.
-
-If you want to find a hash of, say, exactly 7 integers, do
-  a = i1;  b = i2;  c = i3;
-  mix(a,b,c);
-  a += i4; b += i5; c += i6;
-  mix(a,b,c);
-  a += i7;
-  final(a,b,c);
-then use c as the hash value.  If you have a variable length array of
-4-byte integers to hash, use hashword().  If you have a byte array (like
-a character string), use hashlittle().  If you have several byte arrays, or
-a mix of things, see the comments above hashlittle().
-
-Why is this so big?  I read 12 bytes at a time into 3 4-byte integers,
-then mix those integers.  This is fast (you can do a lot more thorough
-mixing with 12*3 instructions on 3 integers than you can with 3 instructions
-on 1 byte), but shoehorning those bytes into integers efficiently is messy.
--------------------------------------------------------------------------------
-*/
-
-/*
- * My best guess at if you are big-endian or little-endian.  This may
- * need adjustment.
- */
-#if defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && BYTE_ORDER == LITTLE_ENDIAN
-#define HASH_LITTLE_ENDIAN 1
-#define HASH_BIG_ENDIAN 0
-#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN
-#define HASH_LITTLE_ENDIAN 0
-#define HASH_BIG_ENDIAN 1
-#else
-#error endianess unknown
-#endif
-
-#define hashsize(n) ((uint32_t)1<<(n))
-#define hashmask(n) (hashsize(n)-1)
-#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
-
-/*
--------------------------------------------------------------------------------
-mix -- mix 3 32-bit values reversibly.
-
-This is reversible, so any information in (a,b,c) before mix() is
-still in (a,b,c) after mix().
-
-If four pairs of (a,b,c) inputs are run through mix(), or through
-mix() in reverse, there are at least 32 bits of the output that
-are sometimes the same for one pair and different for another pair.
-This was tested for:
-* pairs that differed by one bit, by two bits, in any combination
-  of top bits of (a,b,c), or in any combination of bottom bits of
-  (a,b,c).
-* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
-  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
-  is commonly produced by subtraction) look like a single 1-bit
-  difference.
-* the base values were pseudorandom, all zero but one bit set, or
-  all zero plus a counter that starts at zero.
-
-Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
-satisfy this are
-    4  6  8 16 19  4
-    9 15  3 18 27 15
-   14  9  3  7 17  3
-Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
-for "differ" defined as + with a one-bit base and a two-bit delta.  I
-used http://burtleburtle.net/bob/hash/avalanche.html to choose
-the operations, constants, and arrangements of the variables.
-
-This does not achieve avalanche.  There are input bits of (a,b,c)
-that fail to affect some output bits of (a,b,c), especially of a.  The
-most thoroughly mixed value is c, but it doesn't really even achieve
-avalanche in c.
-
-This allows some parallelism.  Read-after-writes are good at doubling
-the number of bits affected, so the goal of mixing pulls in the opposite
-direction as the goal of parallelism.  I did what I could.  Rotates
-seem to cost as much as shifts on every machine I could lay my hands
-on, and rotates are much kinder to the top and bottom bits, so I used
-rotates.
--------------------------------------------------------------------------------
-*/
-#define mix(a,b,c) \
-{ \
-  a -= c;  a ^= rot(c, 4);  c += b; \
-  b -= a;  b ^= rot(a, 6);  a += c; \
-  c -= b;  c ^= rot(b, 8);  b += a; \
-  a -= c;  a ^= rot(c,16);  c += b; \
-  b -= a;  b ^= rot(a,19);  a += c; \
-  c -= b;  c ^= rot(b, 4);  b += a; \
-}
-
-/*
--------------------------------------------------------------------------------
-final -- final mixing of 3 32-bit values (a,b,c) into c
-
-Pairs of (a,b,c) values differing in only a few bits will usually
-produce values of c that look totally different.  This was tested for
-* pairs that differed by one bit, by two bits, in any combination
-  of top bits of (a,b,c), or in any combination of bottom bits of
-  (a,b,c).
-* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
-  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
-  is commonly produced by subtraction) look like a single 1-bit
-  difference.
-* the base values were pseudorandom, all zero but one bit set, or
-  all zero plus a counter that starts at zero.
-
-These constants passed:
- 14 11 25 16 4 14 24
- 12 14 25 16 4 14 24
-and these came close:
-  4  8 15 26 3 22 24
- 10  8 15 26 3 22 24
- 11  8 15 26 3 22 24
--------------------------------------------------------------------------------
-*/
-#define final(a,b,c) \
-{ \
-  c ^= b; c -= rot(b,14); \
-  a ^= c; a -= rot(c,11); \
-  b ^= a; b -= rot(a,25); \
-  c ^= b; c -= rot(b,16); \
-  a ^= c; a -= rot(c,4);  \
-  b ^= a; b -= rot(a,14); \
-  c ^= b; c -= rot(b,24); \
-}
-
-/*
- * hashlittle2: return 2 32-bit hash values
- *
- * This is identical to hashlittle(), except it returns two 32-bit hash
- * values instead of just one.  This is good enough for hash table
- * lookup with 2^^64 buckets, or if you want a second hash if you're not
- * happy with the first, or if you want a probably-unique 64-bit ID for
- * the key.  *pc is better mixed than *pb, so use *pc first.  If you want
- * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
- */
-static void hashlittle2(
-  const void *key,       /* the key to hash */
-  size_t      length,    /* length of the key */
-  uint32_t   *pc,        /* IN: primary initval, OUT: primary hash */
-  uint32_t   *pb)        /* IN: secondary initval, OUT: secondary hash */
-{
-  uint32_t a,b,c;                                          /* internal state */
-  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
-
-  /* Set up the internal state */
-  a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
-  c += *pb;
-
-  u.ptr = key;
-  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
-    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
-    const uint8_t  *k8;
-
-    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
-    while (length > 12)
-    {
-      a += k[0];
-      b += k[1];
-      c += k[2];
-      mix(a,b,c);
-      length -= 12;
-      k += 3;
-    }
-
-    /*----------------------------- handle the last (probably partial) block */
-    /*
-     * "k[2]&0xffffff" actually reads beyond the end of the string, but
-     * then masks off the part it's not allowed to read.  Because the
-     * string is aligned, the masked-off tail is in the same word as the
-     * rest of the string.  Every machine with memory protection I've seen
-     * does it on word boundaries, so is OK with this.  But VALGRIND will
-     * still catch it and complain.  The masking trick does make the hash
-     * noticably faster for short strings (like English words).
-     */
-#ifndef VALGRIND
-    (void)k8;
-    switch(length)
-    {
-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
-    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
-    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
-    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
-    case 8 : b+=k[1]; a+=k[0]; break;
-    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
-    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
-    case 5 : b+=k[1]&0xff; a+=k[0]; break;
-    case 4 : a+=k[0]; break;
-    case 3 : a+=k[0]&0xffffff; break;
-    case 2 : a+=k[0]&0xffff; break;
-    case 1 : a+=k[0]&0xff; break;
-    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
-    }
-
-#else /* make valgrind happy */
-
-    k8 = (const uint8_t *)k;
-    switch(length)
-    {
-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
-    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
-    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
-    case 9 : c+=k8[8];                   /* fall through */
-    case 8 : b+=k[1]; a+=k[0]; break;
-    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
-    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
-    case 5 : b+=k8[4];                   /* fall through */
-    case 4 : a+=k[0]; break;
-    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
-    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
-    case 1 : a+=k8[0]; break;
-    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
-    }
-
-#endif /* !valgrind */
-
-  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
-    const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
-    const uint8_t  *k8;
-
-    /*--------------- all but last block: aligned reads and different mixing */
-    while (length > 12)
-    {
-      a += k[0] + (((uint32_t)k[1])<<16);
-      b += k[2] + (((uint32_t)k[3])<<16);
-      c += k[4] + (((uint32_t)k[5])<<16);
-      mix(a,b,c);
-      length -= 12;
-      k += 6;
-    }
-
-    /*----------------------------- handle the last (probably partial) block */
-    k8 = (const uint8_t *)k;
-    switch(length)
-    {
-    case 12: c+=k[4]+(((uint32_t)k[5])<<16);
-             b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
-    case 10: c+=k[4];
-             b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 9 : c+=k8[8];                      /* fall through */
-    case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
-    case 6 : b+=k[2];
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 5 : b+=k8[4];                      /* fall through */
-    case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
-    case 2 : a+=k[0];
-             break;
-    case 1 : a+=k8[0];
-             break;
-    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
-    }
-
-  } else {                        /* need to read the key one byte at a time */
-    const uint8_t *k = (const uint8_t *)key;
-
-    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
-    while (length > 12)
-    {
-      a += k[0];
-      a += ((uint32_t)k[1])<<8;
-      a += ((uint32_t)k[2])<<16;
-      a += ((uint32_t)k[3])<<24;
-      b += k[4];
-      b += ((uint32_t)k[5])<<8;
-      b += ((uint32_t)k[6])<<16;
-      b += ((uint32_t)k[7])<<24;
-      c += k[8];
-      c += ((uint32_t)k[9])<<8;
-      c += ((uint32_t)k[10])<<16;
-      c += ((uint32_t)k[11])<<24;
-      mix(a,b,c);
-      length -= 12;
-      k += 12;
-    }
-
-    /*-------------------------------- last block: affect all 32 bits of (c) */
-    switch(length)                   /* all the case statements fall through */
-    {
-    case 12: c+=((uint32_t)k[11])<<24; // fallthrough
-    case 11: c+=((uint32_t)k[10])<<16; // fallthrough
-    case 10: c+=((uint32_t)k[9])<<8; // fallthrough
-    case 9 : c+=k[8]; // fallthrough
-    case 8 : b+=((uint32_t)k[7])<<24; // fallthrough
-    case 7 : b+=((uint32_t)k[6])<<16; // fallthrough
-    case 6 : b+=((uint32_t)k[5])<<8; // fallthrough
-    case 5 : b+=k[4]; // fallthrough
-    case 4 : a+=((uint32_t)k[3])<<24; // fallthrough
-    case 3 : a+=((uint32_t)k[2])<<16; // fallthrough
-    case 2 : a+=((uint32_t)k[1])<<8; // fallthrough
-    case 1 : a+=k[0];
-             break;
-    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
-    }
-  }
-
-  final(a,b,c);
-  *pc=c; *pb=b;
-}
--- /dev/null
+++ b/3rd/spooky.c
@@ -1,0 +1,526 @@
+//
+// SpookyHash - 128-bit noncryptographic hash function
+//
+// Written in 2012 by Bob Jenkins
+//
+// Converted to C in 2015 by Joergen Ibsen
+//
+// To the extent possible under law, the author(s) have dedicated all
+// copyright and related and neighboring rights to this software to the
+// public domain worldwide. This software is distributed without any
+// warranty. <http://creativecommons.org/publicdomain/zero/1.0/>
+//
+// Original comment from SpookyV2.cpp by Bob Jenkins:
+//
+// Spooky Hash
+// A 128-bit noncryptographic hash, for checksums and table lookup
+// By Bob Jenkins.  Public domain.
+//   Oct 31 2010: published framework, disclaimer ShortHash isn't right
+//   Nov 7 2010: disabled ShortHash
+//   Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again
+//   April 10 2012: buffer overflow on platforms without unaligned reads
+//   July 12 2012: was passing out variables in final to in/out in short
+//   July 30 2012: I reintroduced the buffer overflow
+//   August 5 2012: SpookyV2: d = should be d += in short hash, and remove extra mix from long hash
+
+#include "platform.h"
+#include "spooky.h"
+
+#define ALLOW_UNALIGNED_READS 1
+
+//
+// SC_CONST: a constant which:
+//  - is not zero
+//  - is odd
+//  - is a not-very-regular mix of 1's and 0's
+//  - does not need any other special mathematical properties
+//
+#define SC_CONST 0xDEADBEEFDEADBEEFULL
+
+#define ROTL64(x, k) (((x) << (k)) | ((x) >> (64 - (k))))
+
+#ifdef _MSC_VER
+# define restrict __restrict
+# define inline __forceinline
+#endif
+
+static bool
+spooky_is_aligned(const void *p, size_t size)
+{
+	return (uintptr_t) p % size == 0;
+}
+
+static bool
+spooky_is_little_endian(void)
+{
+	const union {
+		uint32_t i;
+		uint8_t c[sizeof(uint32_t)];
+	} x = { 1 };
+
+	return x.c[0];
+}
+
+//
+// Read uint64_t in little-endian order.
+//
+static inline uint64_t
+spooky_read_le64(const uint64_t *s)
+{
+	if (spooky_is_little_endian()) {
+		uint64_t v;
+		memcpy(&v, s, sizeof(v));
+		return v;
+	}
+	else {
+		const uint8_t *p = (const uint8_t *) s;
+		return (uint64_t) p[0]
+		    | ((uint64_t) p[1] << 8)
+		    | ((uint64_t) p[2] << 16)
+		    | ((uint64_t) p[3] << 24)
+		    | ((uint64_t) p[4] << 32)
+		    | ((uint64_t) p[5] << 40)
+		    | ((uint64_t) p[6] << 48)
+		    | ((uint64_t) p[7] << 56);
+	}
+}
+
+//
+// This is used if the input is 96 bytes long or longer.
+//
+// The internal state is fully overwritten every 96 bytes.
+// Every input bit appears to cause at least 128 bits of entropy
+// before 96 other bytes are combined, when run forward or backward
+//   For every input bit,
+//   Two inputs differing in just that input bit
+//   Where "differ" means xor or subtraction
+//   And the base value is random
+//   When run forward or backwards one Mix
+// I tried 3 pairs of each; they all differed by at least 212 bits.
+//
+static inline void
+spooky_mix(const uint64_t *restrict data, uint64_t *restrict s)
+{
+	s[0] += spooky_read_le64(&data[0]);          s[2] ^= s[10];
+	s[11] ^= s[0];   s[0] = ROTL64(s[0], 11);    s[11] += s[1];
+	s[1] += spooky_read_le64(&data[1]);          s[3] ^= s[11];
+	s[0] ^= s[1];    s[1] = ROTL64(s[1], 32);    s[0] += s[2];
+	s[2] += spooky_read_le64(&data[2]);          s[4] ^= s[0];
+	s[1] ^= s[2];    s[2] = ROTL64(s[2], 43);    s[1] += s[3];
+	s[3] += spooky_read_le64(&data[3]);          s[5] ^= s[1];
+	s[2] ^= s[3];    s[3] = ROTL64(s[3], 31);    s[2] += s[4];
+	s[4] += spooky_read_le64(&data[4]);          s[6] ^= s[2];
+	s[3] ^= s[4];    s[4] = ROTL64(s[4], 17);    s[3] += s[5];
+	s[5] += spooky_read_le64(&data[5]);          s[7] ^= s[3];
+	s[4] ^= s[5];    s[5] = ROTL64(s[5], 28);    s[4] += s[6];
+	s[6] += spooky_read_le64(&data[6]);          s[8] ^= s[4];
+	s[5] ^= s[6];    s[6] = ROTL64(s[6], 39);    s[5] += s[7];
+	s[7] += spooky_read_le64(&data[7]);          s[9] ^= s[5];
+	s[6] ^= s[7];    s[7] = ROTL64(s[7], 57);    s[6] += s[8];
+	s[8] += spooky_read_le64(&data[8]);          s[10] ^= s[6];
+	s[7] ^= s[8];    s[8] = ROTL64(s[8], 55);    s[7] += s[9];
+	s[9] += spooky_read_le64(&data[9]);          s[11] ^= s[7];
+	s[8] ^= s[9];    s[9] = ROTL64(s[9], 54);    s[8] += s[10];
+	s[10] += spooky_read_le64(&data[10]);        s[0] ^= s[8];
+	s[9] ^= s[10];   s[10] = ROTL64(s[10], 22);  s[9] += s[11];
+	s[11] += spooky_read_le64(&data[11]);        s[1] ^= s[9];
+	s[10] ^= s[11];  s[11] = ROTL64(s[11], 46);  s[10] += s[0];
+}
+
+//
+// Mix all 12 inputs together so that h0, h1 are a hash of them all.
+//
+// For two inputs differing in just the input bits
+// Where "differ" means xor or subtraction
+// And the base value is random, or a counting value starting at that bit
+// The final result will have each bit of h0, h1 flip
+// For every input bit,
+// with probability 50 +- .3%
+// For every pair of input bits,
+// with probability 50 +- 3%
+//
+// This does not rely on the last Mix() call having already mixed some.
+// Two iterations was almost good enough for a 64-bit result, but a
+// 128-bit result is reported, so End() does three iterations.
+//
+static inline void
+spooky_end_partial(uint64_t *h)
+{
+	h[11] += h[1];  h[2] ^= h[11];  h[1] = ROTL64(h[1], 44);
+	h[0] += h[2];   h[3] ^= h[0];   h[2] = ROTL64(h[2], 15);
+	h[1] += h[3];   h[4] ^= h[1];   h[3] = ROTL64(h[3], 34);
+	h[2] += h[4];   h[5] ^= h[2];   h[4] = ROTL64(h[4], 21);
+	h[3] += h[5];   h[6] ^= h[3];   h[5] = ROTL64(h[5], 38);
+	h[4] += h[6];   h[7] ^= h[4];   h[6] = ROTL64(h[6], 33);
+	h[5] += h[7];   h[8] ^= h[5];   h[7] = ROTL64(h[7], 10);
+	h[6] += h[8];   h[9] ^= h[6];   h[8] = ROTL64(h[8], 13);
+	h[7] += h[9];   h[10] ^= h[7];  h[9] = ROTL64(h[9], 38);
+	h[8] += h[10];  h[11] ^= h[8];  h[10] = ROTL64(h[10], 53);
+	h[9] += h[11];  h[0] ^= h[9];   h[11] = ROTL64(h[11], 42);
+	h[10] += h[0];  h[1] ^= h[10];  h[0] = ROTL64(h[0], 54);
+}
+
+static inline void
+spooky_end(const uint64_t *restrict data, uint64_t *restrict h)
+{
+	h[0] += spooky_read_le64(&data[0]);
+	h[1] += spooky_read_le64(&data[1]);
+	h[2] += spooky_read_le64(&data[2]);
+	h[3] += spooky_read_le64(&data[3]);
+	h[4] += spooky_read_le64(&data[4]);
+	h[5] += spooky_read_le64(&data[5]);
+	h[6] += spooky_read_le64(&data[6]);
+	h[7] += spooky_read_le64(&data[7]);
+	h[8] += spooky_read_le64(&data[8]);
+	h[9] += spooky_read_le64(&data[9]);
+	h[10] += spooky_read_le64(&data[10]);
+	h[11] += spooky_read_le64(&data[11]);
+	spooky_end_partial(h);
+	spooky_end_partial(h);
+	spooky_end_partial(h);
+}
+
+//
+// The goal is for each bit of the input to expand into 128 bits of
+//   apparent entropy before it is fully overwritten.
+// n trials both set and cleared at least m bits of h0 h1 h2 h3
+//   n: 2   m: 29
+//   n: 3   m: 46
+//   n: 4   m: 57
+//   n: 5   m: 107
+//   n: 6   m: 146
+//   n: 7   m: 152
+// when run forwards or backwards
+// for all 1-bit and 2-bit diffs
+// with diffs defined by either xor or subtraction
+// with a base of all zeros plus a counter, or plus another bit, or random
+//
+static inline void
+spooky_short_mix(uint64_t *h)
+{
+	h[2] = ROTL64(h[2], 50);  h[2] += h[3];  h[0] ^= h[2];
+	h[3] = ROTL64(h[3], 52);  h[3] += h[0];  h[1] ^= h[3];
+	h[0] = ROTL64(h[0], 30);  h[0] += h[1];  h[2] ^= h[0];
+	h[1] = ROTL64(h[1], 41);  h[1] += h[2];  h[3] ^= h[1];
+	h[2] = ROTL64(h[2], 54);  h[2] += h[3];  h[0] ^= h[2];
+	h[3] = ROTL64(h[3], 48);  h[3] += h[0];  h[1] ^= h[3];
+	h[0] = ROTL64(h[0], 38);  h[0] += h[1];  h[2] ^= h[0];
+	h[1] = ROTL64(h[1], 37);  h[1] += h[2];  h[3] ^= h[1];
+	h[2] = ROTL64(h[2], 62);  h[2] += h[3];  h[0] ^= h[2];
+	h[3] = ROTL64(h[3], 34);  h[3] += h[0];  h[1] ^= h[3];
+	h[0] = ROTL64(h[0], 5);   h[0] += h[1];  h[2] ^= h[0];
+	h[1] = ROTL64(h[1], 36);  h[1] += h[2];  h[3] ^= h[1];
+}
+
+//
+// Mix all 4 inputs together so that h0, h1 are a hash of them all.
+//
+// For two inputs differing in just the input bits
+// Where "differ" means xor or subtraction
+// And the base value is random, or a counting value starting at that bit
+// The final result will have each bit of h0, h1 flip
+// For every input bit,
+// with probability 50 +- .3% (it is probably better than that)
+// For every pair of input bits,
+// with probability 50 +- .75% (the worst case is approximately that)
+//
+static inline void
+spooky_short_end(uint64_t *h)
+{
+	h[3] ^= h[2];  h[2] = ROTL64(h[2], 15);  h[3] += h[2];
+	h[0] ^= h[3];  h[3] = ROTL64(h[3], 52);  h[0] += h[3];
+	h[1] ^= h[0];  h[0] = ROTL64(h[0], 26);  h[1] += h[0];
+	h[2] ^= h[1];  h[1] = ROTL64(h[1], 51);  h[2] += h[1];
+	h[3] ^= h[2];  h[2] = ROTL64(h[2], 28);  h[3] += h[2];
+	h[0] ^= h[3];  h[3] = ROTL64(h[3], 9);   h[0] += h[3];
+	h[1] ^= h[0];  h[0] = ROTL64(h[0], 47);  h[1] += h[0];
+	h[2] ^= h[1];  h[1] = ROTL64(h[1], 54);  h[2] += h[1];
+	h[3] ^= h[2];  h[2] = ROTL64(h[2], 32);  h[3] += h[2];
+	h[0] ^= h[3];  h[3] = ROTL64(h[3], 25);  h[0] += h[3];
+	h[1] ^= h[0];  h[0] = ROTL64(h[0], 63);  h[1] += h[0];
+}
+
+//
+// short hash ... it could be used on any message,
+// but it's used by Spooky just for short messages.
+//
+static void
+spooky_short(const void *restrict message, size_t length,
+             uint64_t *restrict hash1, uint64_t *restrict hash2)
+{
+	uint64_t buf[2 * SC_NUMVARS];
+	union {
+		const uint8_t *p8;
+		uint64_t *p64;
+	} u;
+
+	u.p8 = (const uint8_t *) message;
+
+	if (ALLOW_UNALIGNED_READS == 0 && !spooky_is_aligned(u.p8, 8)) {
+		memcpy(buf, message, length);
+		u.p64 = buf;
+	}
+
+	size_t left = length % 32;
+	uint64_t h[4];
+	h[0] = *hash1;
+	h[1] = *hash2;
+	h[2] = SC_CONST;
+	h[3] = SC_CONST;
+
+	if (length > 15) {
+		const uint64_t *end = u.p64 + (length / 32) * 4;
+
+		// handle all complete sets of 32 bytes
+		for (; u.p64 < end; u.p64 += 4) {
+			h[2] += spooky_read_le64(&u.p64[0]);
+			h[3] += spooky_read_le64(&u.p64[1]);
+			spooky_short_mix(h);
+			h[0] += spooky_read_le64(&u.p64[2]);
+			h[1] += spooky_read_le64(&u.p64[3]);
+		}
+
+		//Handle the case of 16+ remaining bytes.
+		if (left >= 16) {
+			h[2] += spooky_read_le64(&u.p64[0]);
+			h[3] += spooky_read_le64(&u.p64[1]);
+			spooky_short_mix(h);
+			u.p64 += 2;
+			left -= 16;
+		}
+	}
+
+	// Handle the last 0..15 bytes, and its length
+	h[3] += ((uint64_t) length) << 56;
+	switch (left) {
+	case 15:
+		h[3] += ((uint64_t) u.p8[14]) << 48; // fallthrough
+	case 14:
+		h[3] += ((uint64_t) u.p8[13]) << 40; // fallthrough
+	case 13:
+		h[3] += ((uint64_t) u.p8[12]) << 32; // fallthrough
+	case 12:
+		h[3] += ((uint64_t) u.p8[11]) << 24; // fallthrough
+	case 11:
+		h[3] += ((uint64_t) u.p8[10]) << 16; // fallthrough
+	case 10:
+		h[3] += ((uint64_t) u.p8[9]) << 8; // fallthrough
+	case 9:
+		h[3] += (uint64_t) u.p8[8]; // fallthrough
+	case 8:
+		h[2] += spooky_read_le64(&u.p64[0]);
+		break;
+	case 7:
+		h[2] += ((uint64_t) u.p8[6]) << 48; // fallthrough
+	case 6:
+		h[2] += ((uint64_t) u.p8[5]) << 40; // fallthrough
+	case 5:
+		h[2] += ((uint64_t) u.p8[4]) << 32; // fallthrough
+	case 4:
+		h[2] += ((uint64_t) u.p8[3]) << 24; // fallthrough
+	case 3:
+		h[2] += ((uint64_t) u.p8[2]) << 16; // fallthrough
+	case 2:
+		h[2] += ((uint64_t) u.p8[1]) << 8; // fallthrough
+	case 1:
+		h[2] += (uint64_t) u.p8[0];
+		break;
+	case 0:
+		h[2] += SC_CONST;
+		h[3] += SC_CONST;
+	}
+	spooky_short_end(h);
+	*hash1 = h[0];
+	*hash2 = h[1];
+}
+
+uint64_t
+spooky_hash64(const void *message, size_t length, uint64_t seed)
+{
+	uint64_t hash1 = seed;
+	spooky_hash128(message, length, &hash1, &seed);
+	return hash1;
+}
+
+uint32_t
+spooky_hash32(const void *message, size_t length, uint32_t seed)
+{
+	uint64_t hash1 = seed, hash2 = seed;
+	spooky_hash128(message, length, &hash1, &hash2);
+	return (uint32_t) hash1;
+}
+
+// do the whole hash in one call
+void
+spooky_hash128(const void *restrict message, size_t length,
+               uint64_t *restrict hash1, uint64_t *restrict hash2)
+{
+	if (length < SC_BUFSIZE) {
+		spooky_short(message, length, hash1, hash2);
+		return;
+	}
+
+	uint64_t h[SC_NUMVARS];
+	uint64_t buf[SC_NUMVARS];
+	uint64_t *end;
+	union {
+		const uint8_t *p8;
+		uint64_t *p64;
+	} u;
+	size_t left;
+
+	h[0] = h[3] = h[6] = h[9] = *hash1;
+	h[1] = h[4] = h[7] = h[10] = *hash2;
+	h[2] = h[5] = h[8] = h[11] = SC_CONST;
+
+	u.p8 = (const uint8_t *) message;
+	end = u.p64 + (length / SC_BLOCKSIZE) * SC_NUMVARS;
+
+	// handle all whole SC_BLOCKSIZE blocks of bytes
+	if (ALLOW_UNALIGNED_READS || spooky_is_aligned(u.p8, 8)) {
+		do {
+			spooky_mix(u.p64, h);
+			u.p64 += SC_NUMVARS;
+		} while (u.p64 < end);
+	}
+	else {
+		do {
+			memcpy(buf, u.p64, SC_BLOCKSIZE);
+			spooky_mix(buf, h);
+			u.p64 += SC_NUMVARS;
+		} while (u.p64 < end);
+	}
+
+	// handle the last partial block of SC_BLOCKSIZE bytes
+	left = length - ((const uint8_t *) end - (const uint8_t *) message);
+	memcpy(buf, end, left);
+	memset(((uint8_t *) buf) + left, 0, SC_BLOCKSIZE - left);
+	((uint8_t *) buf)[SC_BLOCKSIZE - 1] = (uint8_t) left;
+
+	// do some final mixing
+	spooky_end(buf, h);
+	*hash1 = h[0];
+	*hash2 = h[1];
+}
+
+/*
+// init spooky state
+void
+spooky_init(struct spooky_state *state, uint64_t seed1, uint64_t seed2)
+{
+	state->length = 0;
+	state->left = 0;
+	state->state[0] = seed1;
+	state->state[1] = seed2;
+}
+
+// add a message fragment to the state
+void
+spooky_update(struct spooky_state *restrict state,
+              const void *restrict message, size_t length)
+{
+	uint64_t h[SC_NUMVARS];
+	size_t newLength = length + state->left;
+	uint8_t left;
+	union {
+		const uint8_t *p8;
+		uint64_t *p64;
+	} u;
+	const uint64_t *end;
+
+	// Is this message fragment too short?  If it is, stuff it away.
+	if (newLength < SC_BUFSIZE) {
+		memcpy(&((uint8_t *) state->data)[state->left], message, length);
+		state->length = length + state->length;
+		state->left = (uint8_t) newLength;
+		return;
+	}
+
+	// init the variables
+	if (state->length < SC_BUFSIZE) {
+		h[0] = h[3] = h[6] = h[9] = state->state[0];
+		h[1] = h[4] = h[7] = h[10] = state->state[1];
+		h[2] = h[5] = h[8] = h[11] = SC_CONST;
+	}
+	else {
+		memcpy(h, state->state, sizeof(state->state));
+	}
+	state->length = length + state->length;
+
+	// if we've got anything stuffed away, use it now
+	if (state->left) {
+		uint8_t prefix = SC_BUFSIZE - state->left;
+		memcpy(&(((uint8_t *) state->data)[state->left]), message, prefix);
+		u.p64 = state->data;
+		spooky_mix(u.p64, h);
+		spooky_mix(&u.p64[SC_NUMVARS], h);
+		u.p8 = ((const uint8_t *) message) + prefix;
+		length -= prefix;
+	}
+	else {
+		u.p8 = (const uint8_t *) message;
+	}
+
+	// handle all whole blocks of SC_BLOCKSIZE bytes
+	end = u.p64 + (length / SC_BLOCKSIZE) * SC_NUMVARS;
+	left = (uint8_t) (length - ((const uint8_t *) end - u.p8));
+	if (ALLOW_UNALIGNED_READS || spooky_is_aligned(u.p8, 8)) {
+		while (u.p64 < end) {
+			spooky_mix(u.p64, h);
+			u.p64 += SC_NUMVARS;
+		}
+	}
+	else {
+		while (u.p64 < end) {
+			memcpy(state->data, u.p8, SC_BLOCKSIZE);
+			spooky_mix(state->data, h);
+			u.p64 += SC_NUMVARS;
+		}
+	}
+
+	// stuff away the last few bytes
+	state->left = left;
+	memcpy(state->data, end, left);
+
+	// stuff away the variables
+	memcpy(state->state, h, sizeof(state->state));
+}
+
+// report the hash for the concatenation of all message fragments so far
+void
+spooky_final(struct spooky_state *restrict state,
+             uint64_t *restrict hash1, uint64_t *restrict hash2)
+{
+	// init the variables
+	if (state->length < SC_BUFSIZE) {
+		*hash1 = state->state[0];
+		*hash2 = state->state[1];
+		spooky_short(state->data, state->length, hash1, hash2);
+		return;
+	}
+
+	const uint64_t *data = (const uint64_t *) state->data;
+	uint8_t left = state->left;
+
+	uint64_t h[SC_NUMVARS];
+	memcpy(h, state->state, sizeof(state->state));
+
+	if (left >= SC_BLOCKSIZE) {
+		// m_data can contain two blocks; handle any whole first block
+		spooky_mix(data, h);
+		data += SC_NUMVARS;
+		left -= SC_BLOCKSIZE;
+	}
+
+	// mix in the last partial block, and the length mod SC_BLOCKSIZE
+	memset(&((uint8_t *) data)[left], 0, (SC_BLOCKSIZE - left));
+
+	((uint8_t *) data)[SC_BLOCKSIZE - 1] = left;
+
+	// do some final mixing
+	spooky_end(data, h);
+
+	*hash1 = h[0];
+	*hash2 = h[1];
+}
+*/
--- /dev/null
+++ b/3rd/spooky.h
@@ -1,0 +1,65 @@
+/*
+ * SpookyHash - 128-bit noncryptographic hash function
+ *
+ * Written in 2012 by Bob Jenkins
+ *
+ * Converted to C in 2015 by Joergen Ibsen
+ *
+ * To the extent possible under law, the author(s) have dedicated all
+ * copyright and related and neighboring rights to this software to the
+ * public domain worldwide. This software is distributed without any
+ * warranty. <http://creativecommons.org/publicdomain/zero/1.0/>
+ *
+ * Original comment from SpookyV2.h by Bob Jenkins:
+ *
+ * SpookyHash: a 128-bit noncryptographic hash function
+ * By Bob Jenkins, public domain
+ *   Oct 31 2010: alpha, framework + SpookyHash::Mix appears right
+ *   Oct 31 2011: alpha again, Mix only good to 2^^69 but rest appears right
+ *   Dec 31 2011: beta, improved Mix, tested it for 2-bit deltas
+ *   Feb  2 2012: production, same bits as beta
+ *   Feb  5 2012: adjusted definitions of uint* to be more portable
+ *   Mar 30 2012: 3 bytes/cycle, not 4.  Alpha was 4 but wasn't thorough enough.
+ *   August 5 2012: SpookyV2 (different results)
+ * 
+ * Up to 3 bytes/cycle for long messages.  Reasonably fast for short messages.
+ * All 1 or 2 bit deltas achieve avalanche within 1% bias per output bit.
+ *
+ * This was developed for and tested on 64-bit x86-compatible processors.
+ * It assumes the processor is little-endian.  There is a macro
+ * controlling whether unaligned reads are allowed (by default they are).
+ * This should be an equally good hash on big-endian machines, but it will
+ * compute different results on them than on little-endian machines.
+ *
+ * Google's CityHash has similar specs to SpookyHash, and CityHash is faster
+ * on new Intel boxes.  MD4 and MD5 also have similar specs, but they are orders
+ * of magnitude slower.  CRCs are two or more times slower, but unlike 
+ * SpookyHash, they have nice math for combining the CRCs of pieces to form 
+ * the CRCs of wholes.  There are also cryptographic hashes, but those are even 
+ * slower than MD5.
+ */
+
+#pragma once
+
+// number of uint64_t's in internal state
+#define SC_NUMVARS 12U
+
+// size of the internal state
+#define SC_BLOCKSIZE (SC_NUMVARS * 8U)
+
+// size of buffer of unhashed data, in bytes
+#define SC_BUFSIZE (2U * SC_BLOCKSIZE)
+
+struct spooky_state {
+	uint64_t data[2 * SC_NUMVARS]; // unhashed data, for partial messages
+	uint64_t state[SC_NUMVARS];    // internal state of the hash
+	size_t length;                 // total length of the input so far
+	uint8_t left;                  // length of unhashed data stashed in data
+};
+
+void spooky_hash128(const void *message, size_t length, uint64_t *hash1, uint64_t *hash2);
+uint64_t spooky_hash64(const void *message, size_t length, uint64_t seed);
+uint32_t spooky_hash32(const void *message, size_t length, uint32_t seed);
+//void spooky_init(struct spooky_state *state, uint64_t seed1, uint64_t seed2);
+//void spooky_update(struct spooky_state *state, const void *message, size_t length);
+//void spooky_final(struct spooky_state *state, uint64_t *hash1, uint64_t *hash2);
--- a/hashing.c
+++ b/hashing.c
@@ -1,5 +1,6 @@
 #include "llt.h"
 #include "hashing.h"
+#include "spooky.h"
 
 lltuint_t
 nextipow2(lltuint_t i)
@@ -55,22 +56,14 @@
 	return (uint32_t)key;
 }
 
-#include "lookup3.c"
-
 uint64_t
-memhash(const char* buf, size_t n)
+memhash(const char *buf, size_t n)
 {
-	uint32_t c = 0xcafe8881, b = 0x4d6a087c;
-
-	hashlittle2(buf, n, &c, &b);
-	return (uint64_t)c | (((uint64_t)b)<<32);
+	return spooky_hash64(buf, n, 0xcafe8881);
 }
 
 uint32_t
-memhash32(const char* buf, size_t n)
+memhash32(const char *buf, size_t n)
 {
-	uint32_t c = 0xcafe8881, b = 0x4d6a087c;
-
-	hashlittle2(buf, n, &c, &b);
-	return c;
+	return spooky_hash32(buf, n, 0xcafe8881);
 }
--- a/meson.build
+++ b/meson.build
@@ -52,6 +52,7 @@
 	'3rd/mp/u16.c',
 	'3rd/mp/u32.c',
 	'3rd/mp/u64.c',
+	'3rd/spooky.c',
 	'3rd/utf/rune.c',
 	'3rd/utf/runeistype.c',
 	'3rd/utf/runetotype.c',
--- a/mkfile
+++ b/mkfile
@@ -12,6 +12,7 @@
 OFILES=\
 	3rd/mt19937-64.$O\
 	3rd/wcwidth.$O\
+	3rd/spooky.$O\
 	bitreverse.$O\
 	bitvector-ops.$O\
 	bitvector.$O\