ref: 5e28ffc053ddf65898820550a7c7f1712bb10543
parent: 64e322cc776ca8c320796b5e7e1becccc240a13a
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Wed Dec 25 20:27:07 EST 2024
import brieflz and add bindings for it
--- /dev/null
+++ b/3rd/brieflz/LICENSE
@@ -1,0 +1,22 @@
+The zlib License (Zlib)
+
+Copyright (c) 2002-2020 Joergen Ibsen
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must
+ not claim that you wrote the original software. If you use this
+ software in a product, an acknowledgment in the product
+ documentation would be appreciated but is not required.
+
+ 2. Altered source versions must be plainly marked as such, and must
+ not be misrepresented as being the original software.
+
+ 3. This notice may not be removed or altered from any source
+ distribution.
--- /dev/null
+++ b/3rd/brieflz/NOTE
@@ -1,0 +1,4 @@
+A note to comply with the zlib license: this is a modification of a subset
+of BriefLZ v1.3.0, small fast Lempel-Ziv originally hosted at https://github.com/jibsen/brieflz
+
+Have a day.
--- /dev/null
+++ b/3rd/brieflz/brieflz.c
@@ -1,0 +1,655 @@
+//
+// BriefLZ - small fast Lempel-Ziv
+//
+// C packer
+//
+// Copyright (c) 2002-2020 Joergen Ibsen
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+//
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+//
+// 3. This notice may not be removed or altered from any source
+// distribution.
+//
+
+#include "brieflz.h"
+
+#if _MSC_VER >= 1400
+# include <intrin.h>
+# define BLZ_BUILTIN_MSVC
+#elif defined(__clang__) && defined(__has_builtin)
+# if __has_builtin(__builtin_clz)
+# define BLZ_BUILTIN_GCC
+# endif
+#elif __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+# define BLZ_BUILTIN_GCC
+#endif
+
+// Type used to store values in workmem.
+//
+// This is used to store positions and lengths, so src_size has to be within
+// the range of this type.
+//
+typedef uint32_t blz_word;
+
+#define BLZ_WORD_MAX UINT32_MAX
+
+// Number of bits of hash to use for lookup.
+//
+// The size of the lookup table (and thus workmem) depends on this.
+//
+// Values between 10 and 18 work well. Lower values generally make compression
+// speed faster but ratio worse. The default value 17 (128k entries) is a
+// compromise.
+//
+#ifndef BLZ_HASH_BITS
+# define BLZ_HASH_BITS 17
+#endif
+
+#define LOOKUP_SIZE (1UL << BLZ_HASH_BITS)
+
+#define NO_MATCH_POS ((blz_word) -1)
+
+// Internal data structure
+struct blz_state {
+ unsigned char *next_out;
+ unsigned char *tag_out;
+ unsigned int tag;
+ int bits_left;
+};
+
+#if !defined(BLZ_NO_LUT)
+static const unsigned short blz_gamma_lookup[512][2] = {
+ {0, 0},
+ {0, 0},
+
+ {0x00, 2}, {0x02, 2},
+
+ {0x04, 4}, {0x06, 4}, {0x0C, 4}, {0x0E, 4},
+
+ {0x14, 6}, {0x16, 6}, {0x1C, 6}, {0x1E, 6},
+ {0x34, 6}, {0x36, 6}, {0x3C, 6}, {0x3E, 6},
+
+ {0x54, 8}, {0x56, 8}, {0x5C, 8}, {0x5E, 8},
+ {0x74, 8}, {0x76, 8}, {0x7C, 8}, {0x7E, 8},
+ {0xD4, 8}, {0xD6, 8}, {0xDC, 8}, {0xDE, 8},
+ {0xF4, 8}, {0xF6, 8}, {0xFC, 8}, {0xFE, 8},
+
+ {0x154, 10}, {0x156, 10}, {0x15C, 10}, {0x15E, 10},
+ {0x174, 10}, {0x176, 10}, {0x17C, 10}, {0x17E, 10},
+ {0x1D4, 10}, {0x1D6, 10}, {0x1DC, 10}, {0x1DE, 10},
+ {0x1F4, 10}, {0x1F6, 10}, {0x1FC, 10}, {0x1FE, 10},
+ {0x354, 10}, {0x356, 10}, {0x35C, 10}, {0x35E, 10},
+ {0x374, 10}, {0x376, 10}, {0x37C, 10}, {0x37E, 10},
+ {0x3D4, 10}, {0x3D6, 10}, {0x3DC, 10}, {0x3DE, 10},
+ {0x3F4, 10}, {0x3F6, 10}, {0x3FC, 10}, {0x3FE, 10},
+
+ {0x554, 12}, {0x556, 12}, {0x55C, 12}, {0x55E, 12},
+ {0x574, 12}, {0x576, 12}, {0x57C, 12}, {0x57E, 12},
+ {0x5D4, 12}, {0x5D6, 12}, {0x5DC, 12}, {0x5DE, 12},
+ {0x5F4, 12}, {0x5F6, 12}, {0x5FC, 12}, {0x5FE, 12},
+ {0x754, 12}, {0x756, 12}, {0x75C, 12}, {0x75E, 12},
+ {0x774, 12}, {0x776, 12}, {0x77C, 12}, {0x77E, 12},
+ {0x7D4, 12}, {0x7D6, 12}, {0x7DC, 12}, {0x7DE, 12},
+ {0x7F4, 12}, {0x7F6, 12}, {0x7FC, 12}, {0x7FE, 12},
+ {0xD54, 12}, {0xD56, 12}, {0xD5C, 12}, {0xD5E, 12},
+ {0xD74, 12}, {0xD76, 12}, {0xD7C, 12}, {0xD7E, 12},
+ {0xDD4, 12}, {0xDD6, 12}, {0xDDC, 12}, {0xDDE, 12},
+ {0xDF4, 12}, {0xDF6, 12}, {0xDFC, 12}, {0xDFE, 12},
+ {0xF54, 12}, {0xF56, 12}, {0xF5C, 12}, {0xF5E, 12},
+ {0xF74, 12}, {0xF76, 12}, {0xF7C, 12}, {0xF7E, 12},
+ {0xFD4, 12}, {0xFD6, 12}, {0xFDC, 12}, {0xFDE, 12},
+ {0xFF4, 12}, {0xFF6, 12}, {0xFFC, 12}, {0xFFE, 12},
+
+ {0x1554, 14}, {0x1556, 14}, {0x155C, 14}, {0x155E, 14},
+ {0x1574, 14}, {0x1576, 14}, {0x157C, 14}, {0x157E, 14},
+ {0x15D4, 14}, {0x15D6, 14}, {0x15DC, 14}, {0x15DE, 14},
+ {0x15F4, 14}, {0x15F6, 14}, {0x15FC, 14}, {0x15FE, 14},
+ {0x1754, 14}, {0x1756, 14}, {0x175C, 14}, {0x175E, 14},
+ {0x1774, 14}, {0x1776, 14}, {0x177C, 14}, {0x177E, 14},
+ {0x17D4, 14}, {0x17D6, 14}, {0x17DC, 14}, {0x17DE, 14},
+ {0x17F4, 14}, {0x17F6, 14}, {0x17FC, 14}, {0x17FE, 14},
+ {0x1D54, 14}, {0x1D56, 14}, {0x1D5C, 14}, {0x1D5E, 14},
+ {0x1D74, 14}, {0x1D76, 14}, {0x1D7C, 14}, {0x1D7E, 14},
+ {0x1DD4, 14}, {0x1DD6, 14}, {0x1DDC, 14}, {0x1DDE, 14},
+ {0x1DF4, 14}, {0x1DF6, 14}, {0x1DFC, 14}, {0x1DFE, 14},
+ {0x1F54, 14}, {0x1F56, 14}, {0x1F5C, 14}, {0x1F5E, 14},
+ {0x1F74, 14}, {0x1F76, 14}, {0x1F7C, 14}, {0x1F7E, 14},
+ {0x1FD4, 14}, {0x1FD6, 14}, {0x1FDC, 14}, {0x1FDE, 14},
+ {0x1FF4, 14}, {0x1FF6, 14}, {0x1FFC, 14}, {0x1FFE, 14},
+ {0x3554, 14}, {0x3556, 14}, {0x355C, 14}, {0x355E, 14},
+ {0x3574, 14}, {0x3576, 14}, {0x357C, 14}, {0x357E, 14},
+ {0x35D4, 14}, {0x35D6, 14}, {0x35DC, 14}, {0x35DE, 14},
+ {0x35F4, 14}, {0x35F6, 14}, {0x35FC, 14}, {0x35FE, 14},
+ {0x3754, 14}, {0x3756, 14}, {0x375C, 14}, {0x375E, 14},
+ {0x3774, 14}, {0x3776, 14}, {0x377C, 14}, {0x377E, 14},
+ {0x37D4, 14}, {0x37D6, 14}, {0x37DC, 14}, {0x37DE, 14},
+ {0x37F4, 14}, {0x37F6, 14}, {0x37FC, 14}, {0x37FE, 14},
+ {0x3D54, 14}, {0x3D56, 14}, {0x3D5C, 14}, {0x3D5E, 14},
+ {0x3D74, 14}, {0x3D76, 14}, {0x3D7C, 14}, {0x3D7E, 14},
+ {0x3DD4, 14}, {0x3DD6, 14}, {0x3DDC, 14}, {0x3DDE, 14},
+ {0x3DF4, 14}, {0x3DF6, 14}, {0x3DFC, 14}, {0x3DFE, 14},
+ {0x3F54, 14}, {0x3F56, 14}, {0x3F5C, 14}, {0x3F5E, 14},
+ {0x3F74, 14}, {0x3F76, 14}, {0x3F7C, 14}, {0x3F7E, 14},
+ {0x3FD4, 14}, {0x3FD6, 14}, {0x3FDC, 14}, {0x3FDE, 14},
+ {0x3FF4, 14}, {0x3FF6, 14}, {0x3FFC, 14}, {0x3FFE, 14},
+
+ {0x5554, 16}, {0x5556, 16}, {0x555C, 16}, {0x555E, 16},
+ {0x5574, 16}, {0x5576, 16}, {0x557C, 16}, {0x557E, 16},
+ {0x55D4, 16}, {0x55D6, 16}, {0x55DC, 16}, {0x55DE, 16},
+ {0x55F4, 16}, {0x55F6, 16}, {0x55FC, 16}, {0x55FE, 16},
+ {0x5754, 16}, {0x5756, 16}, {0x575C, 16}, {0x575E, 16},
+ {0x5774, 16}, {0x5776, 16}, {0x577C, 16}, {0x577E, 16},
+ {0x57D4, 16}, {0x57D6, 16}, {0x57DC, 16}, {0x57DE, 16},
+ {0x57F4, 16}, {0x57F6, 16}, {0x57FC, 16}, {0x57FE, 16},
+ {0x5D54, 16}, {0x5D56, 16}, {0x5D5C, 16}, {0x5D5E, 16},
+ {0x5D74, 16}, {0x5D76, 16}, {0x5D7C, 16}, {0x5D7E, 16},
+ {0x5DD4, 16}, {0x5DD6, 16}, {0x5DDC, 16}, {0x5DDE, 16},
+ {0x5DF4, 16}, {0x5DF6, 16}, {0x5DFC, 16}, {0x5DFE, 16},
+ {0x5F54, 16}, {0x5F56, 16}, {0x5F5C, 16}, {0x5F5E, 16},
+ {0x5F74, 16}, {0x5F76, 16}, {0x5F7C, 16}, {0x5F7E, 16},
+ {0x5FD4, 16}, {0x5FD6, 16}, {0x5FDC, 16}, {0x5FDE, 16},
+ {0x5FF4, 16}, {0x5FF6, 16}, {0x5FFC, 16}, {0x5FFE, 16},
+ {0x7554, 16}, {0x7556, 16}, {0x755C, 16}, {0x755E, 16},
+ {0x7574, 16}, {0x7576, 16}, {0x757C, 16}, {0x757E, 16},
+ {0x75D4, 16}, {0x75D6, 16}, {0x75DC, 16}, {0x75DE, 16},
+ {0x75F4, 16}, {0x75F6, 16}, {0x75FC, 16}, {0x75FE, 16},
+ {0x7754, 16}, {0x7756, 16}, {0x775C, 16}, {0x775E, 16},
+ {0x7774, 16}, {0x7776, 16}, {0x777C, 16}, {0x777E, 16},
+ {0x77D4, 16}, {0x77D6, 16}, {0x77DC, 16}, {0x77DE, 16},
+ {0x77F4, 16}, {0x77F6, 16}, {0x77FC, 16}, {0x77FE, 16},
+ {0x7D54, 16}, {0x7D56, 16}, {0x7D5C, 16}, {0x7D5E, 16},
+ {0x7D74, 16}, {0x7D76, 16}, {0x7D7C, 16}, {0x7D7E, 16},
+ {0x7DD4, 16}, {0x7DD6, 16}, {0x7DDC, 16}, {0x7DDE, 16},
+ {0x7DF4, 16}, {0x7DF6, 16}, {0x7DFC, 16}, {0x7DFE, 16},
+ {0x7F54, 16}, {0x7F56, 16}, {0x7F5C, 16}, {0x7F5E, 16},
+ {0x7F74, 16}, {0x7F76, 16}, {0x7F7C, 16}, {0x7F7E, 16},
+ {0x7FD4, 16}, {0x7FD6, 16}, {0x7FDC, 16}, {0x7FDE, 16},
+ {0x7FF4, 16}, {0x7FF6, 16}, {0x7FFC, 16}, {0x7FFE, 16},
+ {0xD554, 16}, {0xD556, 16}, {0xD55C, 16}, {0xD55E, 16},
+ {0xD574, 16}, {0xD576, 16}, {0xD57C, 16}, {0xD57E, 16},
+ {0xD5D4, 16}, {0xD5D6, 16}, {0xD5DC, 16}, {0xD5DE, 16},
+ {0xD5F4, 16}, {0xD5F6, 16}, {0xD5FC, 16}, {0xD5FE, 16},
+ {0xD754, 16}, {0xD756, 16}, {0xD75C, 16}, {0xD75E, 16},
+ {0xD774, 16}, {0xD776, 16}, {0xD77C, 16}, {0xD77E, 16},
+ {0xD7D4, 16}, {0xD7D6, 16}, {0xD7DC, 16}, {0xD7DE, 16},
+ {0xD7F4, 16}, {0xD7F6, 16}, {0xD7FC, 16}, {0xD7FE, 16},
+ {0xDD54, 16}, {0xDD56, 16}, {0xDD5C, 16}, {0xDD5E, 16},
+ {0xDD74, 16}, {0xDD76, 16}, {0xDD7C, 16}, {0xDD7E, 16},
+ {0xDDD4, 16}, {0xDDD6, 16}, {0xDDDC, 16}, {0xDDDE, 16},
+ {0xDDF4, 16}, {0xDDF6, 16}, {0xDDFC, 16}, {0xDDFE, 16},
+ {0xDF54, 16}, {0xDF56, 16}, {0xDF5C, 16}, {0xDF5E, 16},
+ {0xDF74, 16}, {0xDF76, 16}, {0xDF7C, 16}, {0xDF7E, 16},
+ {0xDFD4, 16}, {0xDFD6, 16}, {0xDFDC, 16}, {0xDFDE, 16},
+ {0xDFF4, 16}, {0xDFF6, 16}, {0xDFFC, 16}, {0xDFFE, 16},
+ {0xF554, 16}, {0xF556, 16}, {0xF55C, 16}, {0xF55E, 16},
+ {0xF574, 16}, {0xF576, 16}, {0xF57C, 16}, {0xF57E, 16},
+ {0xF5D4, 16}, {0xF5D6, 16}, {0xF5DC, 16}, {0xF5DE, 16},
+ {0xF5F4, 16}, {0xF5F6, 16}, {0xF5FC, 16}, {0xF5FE, 16},
+ {0xF754, 16}, {0xF756, 16}, {0xF75C, 16}, {0xF75E, 16},
+ {0xF774, 16}, {0xF776, 16}, {0xF77C, 16}, {0xF77E, 16},
+ {0xF7D4, 16}, {0xF7D6, 16}, {0xF7DC, 16}, {0xF7DE, 16},
+ {0xF7F4, 16}, {0xF7F6, 16}, {0xF7FC, 16}, {0xF7FE, 16},
+ {0xFD54, 16}, {0xFD56, 16}, {0xFD5C, 16}, {0xFD5E, 16},
+ {0xFD74, 16}, {0xFD76, 16}, {0xFD7C, 16}, {0xFD7E, 16},
+ {0xFDD4, 16}, {0xFDD6, 16}, {0xFDDC, 16}, {0xFDDE, 16},
+ {0xFDF4, 16}, {0xFDF6, 16}, {0xFDFC, 16}, {0xFDFE, 16},
+ {0xFF54, 16}, {0xFF56, 16}, {0xFF5C, 16}, {0xFF5E, 16},
+ {0xFF74, 16}, {0xFF76, 16}, {0xFF7C, 16}, {0xFF7E, 16},
+ {0xFFD4, 16}, {0xFFD6, 16}, {0xFFDC, 16}, {0xFFDE, 16},
+ {0xFFF4, 16}, {0xFFF6, 16}, {0xFFFC, 16}, {0xFFFE, 16}
+};
+#endif
+
+static int
+blz_log2(unsigned long n)
+{
+ assert(n > 0);
+
+#if defined(BLZ_BUILTIN_MSVC)
+ unsigned long msb_pos;
+ _BitScanReverse(&msb_pos, n);
+ return (int) msb_pos;
+#elif defined(BLZ_BUILTIN_GCC)
+ return (int) sizeof(n) * CHAR_BIT - 1 - __builtin_clzl(n);
+#else
+ int bits = 0;
+
+ while (n >>= 1) {
+ ++bits;
+ }
+
+ return bits;
+#endif
+}
+
+static unsigned long
+blz_gamma_cost(unsigned long n)
+{
+ assert(n >= 2);
+
+ return 2 * (unsigned long) blz_log2(n);
+}
+
+static unsigned long
+blz_match_cost(unsigned long pos, unsigned long len)
+{
+ return 1 + blz_gamma_cost(len - 2) + blz_gamma_cost((pos >> 8) + 2) + 8;
+}
+
+// Heuristic to compare matches
+static int
+blz_match_better(unsigned long cur, unsigned long new_pos, unsigned long new_len,
+ unsigned long pos, unsigned long len)
+{
+ const unsigned long offs = cur - pos - 1;
+ const unsigned long new_offs = cur - new_pos - 1;
+
+ return (new_len > len + 1)
+ || (new_len >= len + 1 && new_offs / 8 <= offs);
+}
+
+// Heuristic to compare match with match at next position
+static int
+blz_next_match_better(unsigned long cur, unsigned long new_pos, unsigned long new_len,
+ unsigned long pos, unsigned long len)
+{
+ const unsigned long offs = cur - pos - 1;
+ const unsigned long new_offs = cur + 1 - new_pos - 1;
+
+ return (new_len > len + 1 && new_offs / 8 < offs)
+ || (new_len > len && new_offs < offs)
+ || (new_len >= len && new_offs < offs / 4);
+}
+
+static void
+blz_putbit(struct blz_state *bs, unsigned int bit)
+{
+ // Check if tag is full
+ if (!bs->bits_left--) {
+ // Store tag
+ bs->tag_out[0] = bs->tag & 0x00FF;
+ bs->tag_out[1] = (bs->tag >> 8) & 0x00FF;
+
+ // Init next tag
+ bs->tag_out = bs->next_out;
+ bs->next_out += 2;
+ bs->bits_left = 15;
+ }
+
+ // Shift bit into tag
+ bs->tag = (bs->tag << 1) + bit;
+}
+
+static void
+blz_putbits(struct blz_state *bs, unsigned long bits, int num)
+{
+ assert(num >= 0 && num <= 16);
+ assert((bits & (~0UL << num)) == 0);
+
+ // Shift num bits into tag
+ unsigned long tag = ((unsigned long) bs->tag << num) | bits;
+ bs->tag = (unsigned int) tag;
+
+ // Check if tag is full
+ if (bs->bits_left < num) {
+ const unsigned int top16 = (unsigned int) (tag >> (num - bs->bits_left));
+
+ // Store tag
+ bs->tag_out[0] = top16 & 0x00FF;
+ bs->tag_out[1] = (top16 >> 8) & 0x00FF;
+
+ // Init next tag
+ bs->tag_out = bs->next_out;
+ bs->next_out += 2;
+
+ bs->bits_left += 16;
+ }
+
+ bs->bits_left -= num;
+}
+
+// Encode val using a universal code based on Elias gamma.
+//
+// This outputs each bit of val (after the leading one bit) as a pair where
+// the first bit is the value, and the second is zero if this was the last
+// pair, and one otherwise.
+//
+// 2 = 10 -> 00
+// 3 = 11 -> 10
+// 4 = 100 -> 01 00
+// 5 = 101 -> 01 10
+// 6 = 110 -> 11 00
+// ...
+//
+// On modern hardware this variant is slower to decode because we cannot count
+// the leading zeroes to get the number of value bits and then read them
+// directly. However on constrained hardware, it has the advantage of being
+// decodable using only one variable (register) and a tiny loop:
+//
+// result = 1;
+// do { result = (result << 1) + getbit(); } while (getbit());
+//
+// Strictly speaking, this is order-1 exp-Golomb, where we interleave the
+// value bits with the bits of the unary coding of the length, but I've always
+// known it as the gamma2 code. I am not sure where it originated from, but I
+// can see I used it in aPLib around 1998.
+//
+static void
+blz_putgamma(struct blz_state *bs, unsigned long val)
+{
+ assert(val >= 2);
+
+#if !defined(BLZ_NO_LUT)
+ // Output small values using lookup
+ if (val < 512) {
+ const unsigned int bits = blz_gamma_lookup[val][0];
+ const unsigned int shift = blz_gamma_lookup[val][1];
+
+ blz_putbits(bs, bits, (int) shift);
+
+ return;
+ }
+#endif
+
+ // Create a mask for the second-highest bit of val
+#if defined(BLZ_BUILTIN_MSVC)
+ unsigned long msb_pos;
+ _BitScanReverse(&msb_pos, val);
+ unsigned long mask = 1UL << (msb_pos - 1);
+#elif defined(BLZ_BUILTIN_GCC)
+ unsigned long mask = 1UL << ((int) sizeof(val) * CHAR_BIT - 2 - __builtin_clzl(val));
+#else
+ unsigned long mask = val >> 1;
+
+ // Clear bits except highest
+ while (mask & (mask - 1)) {
+ mask &= mask - 1;
+ }
+#endif
+
+ // Output gamma2-encoded bits
+ blz_putbit(bs, (val & mask) ? 1 : 0);
+
+ while (mask >>= 1) {
+ blz_putbit(bs, 1);
+ blz_putbit(bs, (val & mask) ? 1 : 0);
+ }
+
+ blz_putbit(bs, 0);
+}
+
+static unsigned char*
+blz_finalize(struct blz_state *bs)
+{
+ // Trailing one bit to delimit any literal tags
+ blz_putbit(bs, 1);
+
+ // Shift last tag into position and store
+ bs->tag <<= bs->bits_left;
+ bs->tag_out[0] = bs->tag & 0x00FF;
+ bs->tag_out[1] = (bs->tag >> 8) & 0x00FF;
+
+ // Return pointer one past end of output
+ return bs->next_out;
+}
+
+// Hash four bytes starting a p.
+//
+// This is Fibonacci hashing, also known as Knuth's multiplicative hash. The
+// constant is a prime close to 2^32/phi.
+//
+static unsigned long
+blz_hash4_bits(const unsigned char *p, int bits)
+{
+ assert(bits > 0 && bits <= 32);
+
+ uint32_t val = (uint32_t) p[0]
+ | ((uint32_t) p[1] << 8)
+ | ((uint32_t) p[2] << 16)
+ | ((uint32_t) p[3] << 24);
+
+ return (val * 2654435761U) >> (32 - bits);
+}
+
+static unsigned long
+blz_hash4(const unsigned char *p)
+{
+ return blz_hash4_bits(p, BLZ_HASH_BITS);
+}
+
+size_t
+blz_max_packed_size(size_t src_size)
+{
+ return src_size + src_size / 8 + 64;
+}
+
+size_t
+blz_workmem_size(size_t src_size)
+{
+ USED(src_size);
+
+ return LOOKUP_SIZE * sizeof(blz_word);
+}
+
+// Simple LZSS using hashing.
+//
+// The lookup table stores the previous position in the input that had a given
+// hash value, or NO_MATCH_POS if none.
+//
+unsigned long
+blz_pack(const void *src, void *dst, unsigned long src_size, void *workmem)
+{
+ struct blz_state bs;
+ blz_word *const lookup = (blz_word *) workmem;
+ const unsigned char *const in = (const unsigned char *) src;
+ const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0;
+ unsigned long hash_pos = 0;
+ unsigned long cur;
+
+ assert(src_size < BLZ_WORD_MAX);
+
+ // Check for empty input
+ if (src_size == 0) {
+ return 0;
+ }
+
+ bs.next_out = (unsigned char *) dst;
+
+ // First byte verbatim
+ *bs.next_out++ = in[0];
+
+ // Check for 1 byte input
+ if (src_size == 1) {
+ return 1;
+ }
+
+ // Initialize first tag
+ bs.tag_out = bs.next_out;
+ bs.next_out += 2;
+ bs.tag = 0;
+ bs.bits_left = 16;
+
+ // Initialize lookup
+ for (unsigned long i = 0; i < LOOKUP_SIZE; ++i) {
+ lookup[i] = NO_MATCH_POS;
+ }
+
+ // Main compression loop
+ for (cur = 1; cur <= last_match_pos; ) {
+ // Update lookup up to current position
+ while (hash_pos < cur) {
+ lookup[blz_hash4(&in[hash_pos])] = hash_pos;
+ hash_pos++;
+ }
+
+ // Look up match for current position
+ const unsigned long pos = lookup[blz_hash4(&in[cur])];
+ unsigned long len = 0;
+
+ // Check match
+ if (pos != NO_MATCH_POS) {
+ const unsigned long len_limit = src_size - cur;
+
+ while (len < len_limit
+ && in[pos + len] == in[cur + len]) {
+ ++len;
+ }
+ }
+
+ // Output match or literal
+ //
+ // When offs >= 0x1FFE00, encoding a match of length 4
+ // (37 bits) is longer than encoding 4 literals (36 bits).
+ //
+ // The value 0x7E00 is a heuristic that sacrifices some
+ // length 4 matches in the hope that there will be a better
+ // match at the next position.
+ if (len > 4 || (len == 4 && cur - pos - 1 < 0x7E00UL)) {
+ const unsigned long offs = cur - pos - 1;
+
+ // Output match tag
+ blz_putbit(&bs, 1);
+
+ // Output match length
+ blz_putgamma(&bs, len - 2);
+
+ // Output match offset
+ blz_putgamma(&bs, (offs >> 8) + 2);
+ *bs.next_out++ = offs & 0x00FF;
+
+ cur += len;
+ }
+ else {
+ // Output literal tag
+ blz_putbit(&bs, 0);
+
+ // Copy literal
+ *bs.next_out++ = in[cur++];
+ }
+ }
+
+ // Output any remaining literals
+ while (cur < src_size) {
+ // Output literal tag
+ blz_putbit(&bs, 0);
+
+ // Copy literal
+ *bs.next_out++ = in[cur++];
+ }
+
+ // Trailing one bit to delimit any literal tags
+ blz_putbit(&bs, 1);
+
+ // Shift last tag into position and store
+ bs.tag <<= bs.bits_left;
+ bs.tag_out[0] = bs.tag & 0x00FF;
+ bs.tag_out[1] = (bs.tag >> 8) & 0x00FF;
+
+ // Return compressed size
+ return (unsigned long) (bs.next_out - (unsigned char *) dst);
+}
+
+// Include compression algorithms used by blz_pack_level
+#include "brieflz_btparse.h"
+#include "brieflz_hashbucket.h"
+#include "brieflz_lazy.h"
+#include "brieflz_leparse.h"
+
+size_t
+blz_workmem_size_level(size_t src_size, int level)
+{
+ switch (level) {
+ case 1:
+ return blz_workmem_size(src_size);
+ case 2:
+ return blz_lazy_workmem_size(src_size);
+ case 3:
+ return blz_hashbucket_workmem_size(src_size, 2);
+ case 4:
+ return blz_hashbucket_workmem_size(src_size, 4);
+ case 5:
+ case 6:
+ case 7:
+ return blz_leparse_workmem_size(src_size);
+ case 8:
+ case 9:
+ case 10:
+ return blz_btparse_workmem_size(src_size);
+ default:
+ return (size_t) -1;
+ }
+}
+
+unsigned long
+blz_pack_level(const void *src, void *dst, unsigned long src_size,
+ void *workmem, int level)
+{
+ switch (level) {
+ case 1:
+ return blz_pack(src, dst, src_size, workmem);
+ case 2:
+ return blz_pack_lazy(src, dst, src_size, workmem);
+ case 3:
+ return blz_pack_hashbucket(src, dst, src_size, workmem, 2, 16);
+ case 4:
+ return blz_pack_hashbucket(src, dst, src_size, workmem, 4, 16);
+ case 5:
+ return blz_pack_leparse(src, dst, src_size, workmem, 1, 16);
+ case 6:
+ return blz_pack_leparse(src, dst, src_size, workmem, 8, 32);
+ case 7:
+ return blz_pack_leparse(src, dst, src_size, workmem, 64, 64);
+ case 8:
+ return blz_pack_btparse(src, dst, src_size, workmem, 16, 96);
+ case 9:
+ return blz_pack_btparse(src, dst, src_size, workmem, 32, 224);
+ case 10:
+ return blz_pack_btparse(src, dst, src_size, workmem, ULONG_MAX, ULONG_MAX);
+ default:
+ return BLZ_ERROR;
+ }
+}
+
+// clang -g -O1 -fsanitize=fuzzer,address -DBLZ_FUZZING brieflz.c depack.c
+#if defined(BLZ_FUZZING)
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef BLZ_FUZZ_LEVEL
+# define BLZ_FUZZ_LEVEL 1
+#endif
+
+extern int
+LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
+{
+ if (size > ULONG_MAX / 2) { return 0; }
+ void *workmem = malloc(blz_workmem_size_level(size, BLZ_FUZZ_LEVEL));
+ void *packed = malloc(blz_max_packed_size(size));
+ void *depacked = malloc(size);
+ if (!workmem || !packed || !depacked) { abort(); }
+ unsigned long packed_size = blz_pack_level(data, packed, size, workmem, BLZ_FUZZ_LEVEL);
+ blz_depack(packed, depacked, size);
+ if (memcmp(data, depacked, size)) { abort(); }
+ free(depacked);
+ free(packed);
+ free(workmem);
+ return 0;
+}
+#endif
--- /dev/null
+++ b/3rd/brieflz/brieflz.h
@@ -1,0 +1,167 @@
+/*
+ * BriefLZ - small fast Lempel-Ziv
+ *
+ * C/C++ header file
+ *
+ * Copyright (c) 2002-2020 Joergen Ibsen
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must
+ * not claim that you wrote the original software. If you use this
+ * software in a product, an acknowledgment in the product
+ * documentation would be appreciated but is not required.
+ *
+ * 2. Altered source versions must be plainly marked as such, and must
+ * not be misrepresented as being the original software.
+ *
+ * 3. This notice may not be removed or altered from any source
+ * distribution.
+ */
+
+#ifndef BRIEFLZ_H_INCLUDED
+#define BRIEFLZ_H_INCLUDED
+
+#include "platform.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BLZ_VER_MAJOR 1 /**< Major version number */
+#define BLZ_VER_MINOR 3 /**< Minor version number */
+#define BLZ_VER_PATCH 0 /**< Patch version number */
+#define BLZ_VER_STRING "1.3.0" /**< Version number as a string */
+
+#ifdef BLZ_DLL
+# if defined(_WIN32) || defined(__CYGWIN__)
+# ifdef BLZ_DLL_EXPORTS
+# define BLZ_API __declspec(dllexport)
+# else
+# define BLZ_API __declspec(dllimport)
+# endif
+# define BLZ_LOCAL
+# else
+# if __GNUC__ >= 4
+# define BLZ_API __attribute__ ((visibility ("default")))
+# define BLZ_LOCAL __attribute__ ((visibility ("hidden")))
+# else
+# define BLZ_API
+# define BLZ_LOCAL
+# endif
+# endif
+#else
+# define BLZ_API
+# define BLZ_LOCAL
+#endif
+
+/**
+ * Return value on error.
+ *
+ * @see blz_depack_safe
+ */
+#ifndef BLZ_ERROR
+# define BLZ_ERROR ((unsigned long) (-1))
+#endif
+
+/**
+ * Get bound on compressed data size.
+ *
+ * @see blz_pack
+ *
+ * @param src_size number of bytes to compress
+ * @return maximum size of compressed data
+ */
+BLZ_API size_t
+blz_max_packed_size(size_t src_size);
+
+/**
+ * Get required size of `workmem` buffer.
+ *
+ * @see blz_pack
+ *
+ * @param src_size number of bytes to compress
+ * @return required size in bytes of `workmem` buffer
+ */
+BLZ_API size_t
+blz_workmem_size(size_t src_size);
+
+/**
+ * Compress `src_size` bytes of data from `src` to `dst`.
+ *
+ * @param src pointer to data
+ * @param dst pointer to where to place compressed data
+ * @param src_size number of bytes to compress
+ * @param workmem pointer to memory for temporary use
+ * @return size of compressed data
+ */
+BLZ_API unsigned long
+blz_pack(const void *src, void *dst, unsigned long src_size, void *workmem);
+
+/**
+ * Get required size of `workmem` buffer.
+ *
+ * @see blz_pack_level
+ *
+ * @param src_size number of bytes to compress
+ * @param level compression level
+ * @return required size in bytes of `workmem` buffer
+ */
+BLZ_API size_t
+blz_workmem_size_level(size_t src_size, int level);
+
+/**
+ * Compress `src_size` bytes of data from `src` to `dst`.
+ *
+ * Compression levels between 1 and 9 offer a trade-off between
+ * time/space and ratio. Level 10 is optimal but very slow.
+ *
+ * @param src pointer to data
+ * @param dst pointer to where to place compressed data
+ * @param src_size number of bytes to compress
+ * @param workmem pointer to memory for temporary use
+ * @param level compression level
+ * @return size of compressed data
+ */
+BLZ_API unsigned long
+blz_pack_level(const void *src, void *dst, unsigned long src_size,
+ void *workmem, int level);
+
+/**
+ * Decompress `depacked_size` bytes of data from `src` to `dst`.
+ *
+ * @param src pointer to compressed data
+ * @param dst pointer to where to place decompressed data
+ * @param depacked_size size of decompressed data
+ * @return size of decompressed data
+ */
+BLZ_API unsigned long
+blz_depack(const void *src, void *dst, unsigned long depacked_size);
+
+/**
+ * Decompress `depacked_size` bytes of data from `src` to `dst`.
+ *
+ * Reads at most `src_size` bytes from `src`.
+ * Writes at most `depacked_size` bytes to `dst`.
+ *
+ * @param src pointer to compressed data
+ * @param src_size size of compressed data
+ * @param dst pointer to where to place decompressed data
+ * @param depacked_size size of decompressed data
+ * @return size of decompressed data, `BLZ_ERROR` on error
+ */
+BLZ_API unsigned long
+blz_depack_safe(const void *src, unsigned long src_size,
+ void *dst, unsigned long depacked_size);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* BRIEFLZ_H_INCLUDED */
--- /dev/null
+++ b/3rd/brieflz/brieflz_btparse.h
@@ -1,0 +1,332 @@
+//
+// BriefLZ - small fast Lempel-Ziv
+//
+// Forwards dynamic programming parse using binary trees
+//
+// Copyright (c) 2016-2020 Joergen Ibsen
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+//
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+//
+// 3. This notice may not be removed or altered from any source
+// distribution.
+//
+
+#ifndef BRIEFLZ_BTPARSE_H_INCLUDED
+#define BRIEFLZ_BTPARSE_H_INCLUDED
+
+static size_t
+blz_btparse_workmem_size(size_t src_size)
+{
+ return (5 * src_size + 3 + LOOKUP_SIZE) * sizeof(blz_word);
+}
+
+// Forwards dynamic programming parse using binary trees, checking all
+// possible matches.
+//
+// The match search uses a binary tree for each hash entry, which is updated
+// dynamically as it is searched by re-rooting the tree at the search string.
+//
+// This does not result in balanced trees on all inputs, but often works well
+// in practice, and has the advantage that we get the matches in order from
+// closest and back.
+//
+// A drawback is the memory requirement of 5 * src_size words, since we cannot
+// overlap the arrays in a forwards parse.
+//
+// This match search method is found in LZMA by Igor Pavlov, libdeflate
+// by Eric Biggers, and other libraries.
+//
+static unsigned long
+blz_pack_btparse(const void *src, void *dst, unsigned long src_size, void *workmem,
+ const unsigned long max_depth, const unsigned long accept_len)
+{
+ struct blz_state bs;
+ const unsigned char *const in = (const unsigned char *) src;
+ const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0;
+
+ assert(src_size < BLZ_WORD_MAX);
+
+ // Check for empty input
+ if (src_size == 0) {
+ return 0;
+ }
+
+ bs.next_out = (unsigned char *) dst;
+
+ // First byte verbatim
+ *bs.next_out++ = in[0];
+
+ // Check for 1 byte input
+ if (src_size == 1) {
+ return 1;
+ }
+
+ // Initialize first tag
+ bs.tag_out = bs.next_out;
+ bs.next_out += 2;
+ bs.tag = 0;
+ bs.bits_left = 16;
+
+ if (src_size < 4) {
+ for (unsigned long i = 1; i < src_size; ++i) {
+ // Output literal tag
+ blz_putbit(&bs, 0);
+
+ // Copy literal
+ *bs.next_out++ = in[i];
+ }
+
+ // Return compressed size
+ return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst);
+ }
+
+ blz_word *const cost = (blz_word *) workmem;
+ blz_word *const mpos = cost + src_size + 1;
+ blz_word *const mlen = mpos + src_size + 1;
+ blz_word *const nodes = mlen + src_size + 1;
+ blz_word *const lookup = nodes + 2 * src_size;
+
+ // Initialize lookup
+ for (unsigned long i = 0; i < LOOKUP_SIZE; ++i) {
+ lookup[i] = NO_MATCH_POS;
+ }
+
+ // Since we are not processing the first literal, update tree for
+ // position 0
+ lookup[blz_hash4(&in[0])] = 0;
+ nodes[0] = NO_MATCH_POS;
+ nodes[1] = NO_MATCH_POS;
+
+ // Initialize to all literals with infinite cost
+ for (unsigned long i = 0; i <= src_size; ++i) {
+ cost[i] = BLZ_WORD_MAX;
+ mlen[i] = 1;
+ }
+
+ cost[0] = 0;
+ cost[1] = 8;
+
+ // Next position where we are going to check matches
+ //
+ // This is used to skip matching while still updating the trees when
+ // we find a match that is accept_len or longer.
+ //
+ unsigned long next_match_cur = 1;
+
+ // Phase 1: Find lowest cost path arriving at each position
+ for (unsigned long cur = 1; cur <= last_match_pos; ++cur) {
+ // Adjust remaining costs to avoid overflow
+ if (cost[cur] > BLZ_WORD_MAX - 128) {
+ blz_word min_cost = BLZ_WORD_MAX;
+
+ for (unsigned long i = cur; i <= src_size; ++i) {
+ min_cost = cost[i] < min_cost ? cost[i] : min_cost;
+ }
+
+ for (unsigned long i = cur; i <= src_size; ++i) {
+ if (cost[i] != BLZ_WORD_MAX) {
+ cost[i] -= min_cost;
+ }
+ }
+ }
+
+ // Check literal
+ if (cost[cur + 1] > cost[cur] + 9) {
+ cost[cur + 1] = cost[cur] + 9;
+ mlen[cur + 1] = 1;
+ }
+
+ if (cur > next_match_cur) {
+ next_match_cur = cur;
+ }
+
+ unsigned long max_len = 3;
+
+ // Look up first match for current position
+ //
+ // pos is the current root of the tree of strings with this
+ // hash. We are going to re-root the tree so cur becomes the
+ // new root.
+ //
+ const unsigned long hash = blz_hash4(&in[cur]);
+ unsigned long pos = lookup[hash];
+ lookup[hash] = cur;
+
+ blz_word *lt_node = &nodes[2 * cur];
+ blz_word *gt_node = &nodes[2 * cur + 1];
+ unsigned long lt_len = 0;
+ unsigned long gt_len = 0;
+
+ assert(pos == NO_MATCH_POS || pos < cur);
+
+ // If we are checking matches, allow lengths up to end of
+ // input, otherwise compare only up to accept_len
+ const unsigned long len_limit = cur == next_match_cur ? src_size - cur
+ : accept_len < src_size - cur ? accept_len
+ : src_size - cur;
+ unsigned long num_chain = max_depth;
+
+ // Check matches
+ for (;;) {
+ // If at bottom of tree, mark leaf nodes
+ //
+ // In case we reached max_depth, this also prunes the
+ // subtree we have not searched yet and do not know
+ // where belongs.
+ //
+ if (pos == NO_MATCH_POS || num_chain-- == 0) {
+ *lt_node = NO_MATCH_POS;
+ *gt_node = NO_MATCH_POS;
+
+ break;
+ }
+
+ // The string at pos is lexicographically greater than
+ // a string that matched in the first lt_len positions,
+ // and less than a string that matched in the first
+ // gt_len positions, so it must match up to at least
+ // the minimum of these.
+ unsigned long len = lt_len < gt_len ? lt_len : gt_len;
+
+ // Find match len
+ while (len < len_limit && in[pos + len] == in[cur + len]) {
+ ++len;
+ }
+
+ // Extend current match if possible
+ //
+ // Note that we are checking matches in order from the
+ // closest and back. This means for a match further
+ // away, the encoding of all lengths up to the current
+ // max length will always be longer or equal, so we need
+ // only consider the extension.
+ //
+ if (cur == next_match_cur && len > max_len) {
+ for (unsigned long i = max_len + 1; i <= len; ++i) {
+ unsigned long match_cost = blz_match_cost(cur - pos - 1, i);
+
+ assert(match_cost < BLZ_WORD_MAX - cost[cur]);
+
+ unsigned long cost_there = cost[cur] + match_cost;
+
+ if (cost_there < cost[cur + i]) {
+ cost[cur + i] = cost_there;
+ mpos[cur + i] = cur - pos - 1;
+ mlen[cur + i] = i;
+ }
+ }
+
+ max_len = len;
+
+ if (len >= accept_len) {
+ next_match_cur = cur + len;
+ }
+ }
+
+ // If we reach maximum match length, the string at pos
+ // is equal to cur, so we can assign the left and right
+ // subtrees.
+ //
+ // This removes pos from the tree, but we added cur
+ // which is equal and closer for future matches.
+ //
+ if (len >= accept_len || len == len_limit) {
+ *lt_node = nodes[2 * pos];
+ *gt_node = nodes[2 * pos + 1];
+
+ break;
+ }
+
+ // Go to previous match and restructure tree
+ //
+ // lt_node points to a node that is going to contain
+ // elements lexicographically less than cur (the search
+ // string).
+ //
+ // If the string at pos is less than cur, we set that
+ // lt_node to pos. We know that all elements in the
+ // left subtree are less than pos, and thus less than
+ // cur, so we point lt_node at the right subtree of
+ // pos and continue our search there.
+ //
+ // The equivalent applies to gt_node when the string at
+ // pos is greater than cur.
+ //
+ if (in[pos + len] < in[cur + len]) {
+ *lt_node = pos;
+ lt_node = &nodes[2 * pos + 1];
+ assert(*lt_node == NO_MATCH_POS || *lt_node < pos);
+ pos = *lt_node;
+ lt_len = len;
+ }
+ else {
+ *gt_node = pos;
+ gt_node = &nodes[2 * pos];
+ assert(*gt_node == NO_MATCH_POS || *gt_node < pos);
+ pos = *gt_node;
+ gt_len = len;
+ }
+ }
+ }
+
+ for (unsigned long cur = last_match_pos + 1; cur < src_size; ++cur) {
+ // Check literal
+ if (cost[cur + 1] > cost[cur] + 9) {
+ cost[cur + 1] = cost[cur] + 9;
+ mlen[cur + 1] = 1;
+ }
+ }
+
+ // Phase 2: Follow lowest cost path backwards gathering tokens
+ unsigned long next_token = src_size;
+
+ for (unsigned long cur = src_size; cur > 1; cur -= mlen[cur], --next_token) {
+ mlen[next_token] = mlen[cur];
+ mpos[next_token] = mpos[cur];
+ }
+
+ // Phase 3: Output tokens
+ unsigned long cur = 1;
+
+ for (unsigned long i = next_token + 1; i <= src_size; cur += mlen[i++]) {
+ if (mlen[i] == 1) {
+ // Output literal tag
+ blz_putbit(&bs, 0);
+
+ // Copy literal
+ *bs.next_out++ = in[cur];
+ }
+ else {
+ const unsigned long offs = mpos[i];
+
+ // Output match tag
+ blz_putbit(&bs, 1);
+
+ // Output match length
+ blz_putgamma(&bs, mlen[i] - 2);
+
+ // Output match offset
+ blz_putgamma(&bs, (offs >> 8) + 2);
+ *bs.next_out++ = offs & 0x00FF;
+ }
+ }
+
+ // Return compressed size
+ return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst);
+}
+
+#endif /* BRIEFLZ_BTPARSE_H_INCLUDED */
--- /dev/null
+++ b/3rd/brieflz/brieflz_hashbucket.h
@@ -1,0 +1,262 @@
+//
+// BriefLZ - small fast Lempel-Ziv
+//
+// Lazy parsing with multiple previous positions per hash
+//
+// Copyright (c) 2016-2020 Joergen Ibsen
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+//
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+//
+// 3. This notice may not be removed or altered from any source
+// distribution.
+//
+
+#ifndef BRIEFLZ_HASHBUCKET_H_INCLUDED
+#define BRIEFLZ_HASHBUCKET_H_INCLUDED
+
+static size_t
+blz_hashbucket_workmem_size(size_t src_size, unsigned int bucket_size)
+{
+ USED(src_size);
+
+ assert(bucket_size > 0);
+ assert(sizeof(bucket_size) < sizeof(size_t)
+ );//|| bucket_size < SIZE_MAX / (LOOKUP_SIZE * sizeof(blz_word)));
+
+ return (LOOKUP_SIZE * bucket_size) * sizeof(blz_word);
+}
+
+// Lazy parsing with multiple previous positions per hash.
+//
+// Instead of storing only the previous position a given hash occured at,
+// this stores the last bucket_size such positions in lookup. This means we
+// can check each of these and choose the "best".
+//
+// There are multiple options for maintaining the entries of the buckets, we
+// simply insert at the front to maintain the order of matches and avoid extra
+// variables. This gives some overhead for moving elements, but as long as
+// bucket_size is small and everything fits in a cache line it is pretty fast.
+//
+// If we find a match that is accept_len or longer, we stop searching.
+//
+static unsigned long
+blz_pack_hashbucket(const void *src, void *dst, unsigned long src_size, void *workmem,
+ const unsigned int bucket_size, const unsigned long accept_len)
+{
+ struct blz_state bs;
+ blz_word *const lookup = (blz_word *) workmem;
+ const unsigned char *const in = (const unsigned char *) src;
+ const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0;
+ unsigned long hash_pos = 0;
+ unsigned long cur;
+
+ assert(src_size < BLZ_WORD_MAX);
+
+ // Check for empty input
+ if (src_size == 0) {
+ return 0;
+ }
+
+ bs.next_out = (unsigned char *) dst;
+
+ // First byte verbatim
+ *bs.next_out++ = in[0];
+
+ // Check for 1 byte input
+ if (src_size == 1) {
+ return 1;
+ }
+
+ // Initialize first tag
+ bs.tag_out = bs.next_out;
+ bs.next_out += 2;
+ bs.tag = 0;
+ bs.bits_left = 16;
+
+ assert(bucket_size > 0);
+ assert(sizeof(bucket_size) < sizeof(unsigned long)
+ );//|| bucket_size < ULONG_MAX / LOOKUP_SIZE);
+
+ // Initialize lookup
+ for (unsigned long i = 0; i < LOOKUP_SIZE * bucket_size; ++i) {
+ lookup[i] = NO_MATCH_POS;
+ }
+
+ // Main compression loop
+ for (cur = 1; cur <= last_match_pos; ) {
+ // Update lookup up to current position
+ while (hash_pos < cur) {
+ blz_word *const bucket = &lookup[blz_hash4(&in[hash_pos]) * bucket_size];
+ unsigned long next = hash_pos;
+
+ // Insert hash_pos at start of bucket
+ for (unsigned int i = 0; i < bucket_size; ++i) {
+ unsigned long tmp = bucket[i];
+ bucket[i] = next;
+ next = tmp;
+ }
+
+ hash_pos++;
+ }
+
+ unsigned long best_pos = NO_MATCH_POS;
+ unsigned long best_len = 0;
+
+ // Look up first match for current position
+ const blz_word *const bucket = &lookup[blz_hash4(&in[cur]) * bucket_size];
+ unsigned long pos = bucket[0];
+ unsigned int bucket_idx = 0;
+
+ const unsigned long len_limit = src_size - cur;
+
+ // Check matches
+ while (pos != NO_MATCH_POS) {
+ unsigned long len = 0;
+
+ // Check match
+ if (best_len < len_limit
+ && in[pos + best_len] == in[cur + best_len]) {
+ while (len < len_limit && in[pos + len] == in[cur + len]) {
+ ++len;
+ }
+ }
+
+ // Update best match
+ if (blz_match_better(cur, pos, len, best_pos, best_len)) {
+ best_pos = pos;
+ best_len = len;
+ if (best_len >= accept_len) {
+ break;
+ }
+ }
+
+ // Go to previous match
+ if (++bucket_idx == bucket_size) {
+ break;
+ }
+ pos = bucket[bucket_idx];
+ }
+
+ // Check if match at next position is better
+ if (best_len > 3 && best_len < accept_len && cur < last_match_pos) {
+ // Update lookup up to next position
+ {
+ blz_word *const next_bucket = &lookup[blz_hash4(&in[hash_pos]) * bucket_size];
+ unsigned long next = hash_pos;
+
+ // Insert hash_pos at start of bucket
+ for (unsigned int i = 0; i < bucket_size; ++i) {
+ unsigned long tmp = next_bucket[i];
+ next_bucket[i] = next;
+ next = tmp;
+ }
+
+ hash_pos++;
+ }
+
+ // Look up first match for next position
+ const blz_word *const next_bucket = &lookup[blz_hash4(&in[cur + 1]) * bucket_size];
+ unsigned long next_pos = next_bucket[0];
+ unsigned int next_bucket_idx = 0;
+
+ const unsigned long next_len_limit = src_size - (cur + 1);
+
+ // Check matches
+ while (next_pos != NO_MATCH_POS) {
+ unsigned long next_len = 0;
+
+ // Check match
+ if (best_len - 1 < next_len_limit
+ && in[next_pos + best_len - 1] == in[cur + 1 + best_len - 1]) {
+ while (next_len < next_len_limit
+ && in[next_pos + next_len] == in[cur + 1 + next_len]) {
+ ++next_len;
+ }
+ }
+
+ if (next_len >= best_len) {
+ // Replace with next match if it extends backwards
+ if (next_pos > 0 && in[next_pos - 1] == in[cur]) {
+ if (blz_match_better(cur, next_pos - 1, next_len + 1, best_pos, best_len)) {
+ best_pos = next_pos - 1;
+ best_len = next_len + 1;
+ }
+ }
+ else {
+ // Drop current match if next match is better
+ if (blz_next_match_better(cur, next_pos, next_len, best_pos, best_len)) {
+ best_len = 0;
+ break;
+ }
+ }
+ }
+
+ // Go to previous match
+ if (++next_bucket_idx == bucket_size) {
+ break;
+ }
+ next_pos = next_bucket[next_bucket_idx];
+ }
+ }
+
+ // Output match or literal
+ if (best_len > 4 || (best_len == 4 && cur - best_pos - 1 < 0x3FE00UL)) {
+ const unsigned long offs = cur - best_pos - 1;
+
+ // Output match tag
+ blz_putbit(&bs, 1);
+
+ // Output match length
+ blz_putgamma(&bs, best_len - 2);
+
+ // Output match offset
+ blz_putgamma(&bs, (offs >> 8) + 2);
+ *bs.next_out++ = offs & 0x00FF;
+
+ cur += best_len;
+ }
+ else {
+ // Output literal tag
+ blz_putbit(&bs, 0);
+
+ // Copy literal
+ *bs.next_out++ = in[cur++];
+ }
+ }
+
+ // Output any remaining literals
+ while (cur < src_size) {
+ // Output literal tag
+ blz_putbit(&bs, 0);
+
+ // Copy literal
+ *bs.next_out++ = in[cur++];
+ }
+
+ // Trailing one bit to delimit any literal tags
+ blz_putbit(&bs, 1);
+
+ // Shift last tag into position and store
+ bs.tag <<= bs.bits_left;
+ bs.tag_out[0] = bs.tag & 0x00FF;
+ bs.tag_out[1] = (bs.tag >> 8) & 0x00FF;
+
+ // Return compressed size
+ return (unsigned long) (bs.next_out - (unsigned char *) dst);
+}
+
+#endif /* BRIEFLZ_HASHBUCKET_H_INCLUDED */
--- /dev/null
+++ b/3rd/brieflz/brieflz_hashchain.h
@@ -1,0 +1,221 @@
+//
+// BriefLZ - small fast Lempel-Ziv
+//
+// Lazy parsing with chains of previous positions
+//
+// Copyright (c) 2016-2020 Joergen Ibsen
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+//
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+//
+// 3. This notice may not be removed or altered from any source
+// distribution.
+//
+
+#ifndef BRIEFLZ_HASHCHAIN_H_INCLUDED
+#define BRIEFLZ_HASHCHAIN_H_INCLUDED
+
+static size_t
+blz_hashchain_workmem_size(size_t src_size)
+{
+ return (LOOKUP_SIZE + src_size) * sizeof(blz_word);
+}
+
+// Lazy parsing with chains of previous positions.
+//
+// Use the lookup table to create chains of previous positions for a given
+// hash. This version uses an array of previous positions the size of the
+// input.
+//
+// Since the same four bytes may occur many times, it is often required to
+// limit the number of positions checked. The search is limited to following
+// max_depth positions down each chain.
+//
+static unsigned long
+blz_pack_hashchain(const void *src, void *dst, unsigned long src_size, void *workmem,
+ const unsigned long max_depth, const unsigned long accept_len)
+{
+ struct blz_state bs;
+ blz_word *const lookup = (blz_word *) workmem;
+ blz_word *const prev = lookup + LOOKUP_SIZE;
+ const unsigned char *const in = (const unsigned char *) src;
+ const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0;
+ unsigned long cur = 0;
+
+ assert(src_size < BLZ_WORD_MAX);
+
+ // Check for empty input
+ if (src_size == 0) {
+ return 0;
+ }
+
+ bs.next_out = (unsigned char *) dst;
+
+ // First byte verbatim
+ *bs.next_out++ = in[0];
+
+ // Check for 1 byte input
+ if (src_size == 1) {
+ return 1;
+ }
+
+ // Initialize first tag
+ bs.tag_out = bs.next_out;
+ bs.next_out += 2;
+ bs.tag = 0;
+ bs.bits_left = 16;
+
+ // Initialize lookup
+ for (unsigned long i = 0; i < LOOKUP_SIZE; ++i) {
+ lookup[i] = NO_MATCH_POS;
+ }
+
+ // Build hash chains in prev
+ if (last_match_pos > 0) {
+ for (unsigned long i = 0; i <= last_match_pos; ++i) {
+ const unsigned long hash = blz_hash4(&in[i]);
+ prev[i] = lookup[hash];
+ lookup[hash] = i;
+ }
+ }
+
+ // Main compression loop
+ for (cur = 1; cur <= last_match_pos; ) {
+ unsigned long best_pos = NO_MATCH_POS;
+ unsigned long best_len = 0;
+
+ // Look up first match for current position
+ unsigned long pos = prev[cur];
+
+ const unsigned long len_limit = src_size - cur;
+ unsigned long num_chain = max_depth;
+
+ // Check matches
+ while (pos != NO_MATCH_POS && num_chain--) {
+ unsigned long len = 0;
+
+ // If next byte matches, so this has a chance to be a longer match
+ if (best_len < len_limit
+ && in[pos + best_len] == in[cur + best_len]) {
+ // Find match len
+ while (len < len_limit && in[pos + len] == in[cur + len]) {
+ ++len;
+ }
+ }
+
+ // Update best match
+ if (blz_match_better(cur, pos, len, best_pos, best_len)) {
+ best_pos = pos;
+ best_len = len;
+ if (best_len >= accept_len) {
+ break;
+ }
+ }
+
+ // Go to previous match
+ pos = prev[pos];
+ }
+
+ // Check if match at next position is better
+ if (best_len > 3 && best_len < accept_len && cur < last_match_pos) {
+ // Look up first match for next position
+ unsigned long next_pos = prev[cur + 1];
+
+ const unsigned long next_len_limit = src_size - (cur + 1);
+ num_chain = max_depth;
+
+ // Check matches
+ while (next_pos != NO_MATCH_POS && num_chain--) {
+ unsigned long next_len = 0;
+
+ // Check match
+ if (best_len - 1 < next_len_limit
+ && in[next_pos + best_len - 1] == in[cur + 1 + best_len - 1]) {
+ while (next_len < next_len_limit
+ && in[next_pos + next_len] == in[cur + 1 + next_len]) {
+ ++next_len;
+ }
+ }
+
+ if (next_len >= best_len) {
+ // Replace with next match if it extends backwards
+ if (next_pos > 0 && in[next_pos - 1] == in[cur]) {
+ if (blz_match_better(cur, next_pos - 1, next_len + 1, best_pos, best_len)) {
+ best_pos = next_pos - 1;
+ best_len = next_len + 1;
+ }
+ }
+ else {
+ // Drop current match if next match is better
+ if (blz_next_match_better(cur, next_pos, next_len, best_pos, best_len)) {
+ best_len = 0;
+ break;
+ }
+ }
+ }
+
+ // Go to previous match
+ next_pos = prev[next_pos];
+ }
+ }
+
+ // Output match or literal
+ if (best_len > 4 || (best_len == 4 && cur - best_pos - 1 < 0x3FE00UL)) {
+ const unsigned long offs = cur - best_pos - 1;
+
+ // Output match tag
+ blz_putbit(&bs, 1);
+
+ // Output match length
+ blz_putgamma(&bs, best_len - 2);
+
+ // Output match offset
+ blz_putgamma(&bs, (offs >> 8) + 2);
+ *bs.next_out++ = offs & 0x00FF;
+
+ cur += best_len;
+ }
+ else {
+ // Output literal tag
+ blz_putbit(&bs, 0);
+
+ // Copy literal
+ *bs.next_out++ = in[cur++];
+ }
+ }
+
+ // Output any remaining literals
+ while (cur < src_size) {
+ // Output literal tag
+ blz_putbit(&bs, 0);
+
+ // Copy literal
+ *bs.next_out++ = in[cur++];
+ }
+
+ // Trailing one bit to delimit any literal tags
+ blz_putbit(&bs, 1);
+
+ // Shift last tag into position and store
+ bs.tag <<= bs.bits_left;
+ bs.tag_out[0] = bs.tag & 0x00FF;
+ bs.tag_out[1] = (bs.tag >> 8) & 0x00FF;
+
+ // Return compressed size
+ return (unsigned long) (bs.next_out - (unsigned char *) dst);
+}
+
+#endif /* BRIEFLZ_HASHCHAIN_H_INCLUDED */
--- /dev/null
+++ b/3rd/brieflz/brieflz_lazy.h
@@ -1,0 +1,192 @@
+//
+// BriefLZ - small fast Lempel-Ziv
+//
+// Lazy (non-greedy) parsing with one-byte-lookahead
+//
+// Copyright (c) 2016-2020 Joergen Ibsen
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+//
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+//
+// 3. This notice may not be removed or altered from any source
+// distribution.
+//
+
+#ifndef BRIEFLZ_LAZY_H_INCLUDED
+#define BRIEFLZ_LAZY_H_INCLUDED
+
+static size_t
+blz_lazy_workmem_size(size_t src_size)
+{
+ USED(src_size);
+
+ return LOOKUP_SIZE * sizeof(blz_word);
+}
+
+// Lazy (non-greedy) parsing with one-byte-lookahead.
+//
+// Each time we find a match, we check if there is a better match at the next
+// position, and if so encode a literal instead.
+//
+static unsigned long
+blz_pack_lazy(const void *src, void *dst, unsigned long src_size, void *workmem)
+{
+ struct blz_state bs;
+ blz_word *const lookup = (blz_word *) workmem;
+ const unsigned char *const in = (const unsigned char *) src;
+ const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0;
+ unsigned long hash_pos = 0;
+ unsigned long cur;
+
+ assert(src_size < BLZ_WORD_MAX);
+
+ // Check for empty input
+ if (src_size == 0) {
+ return 0;
+ }
+
+ bs.next_out = (unsigned char *) dst;
+
+ // First byte verbatim
+ *bs.next_out++ = in[0];
+
+ // Check for 1 byte input
+ if (src_size == 1) {
+ return 1;
+ }
+
+ // Initialize first tag
+ bs.tag_out = bs.next_out;
+ bs.next_out += 2;
+ bs.tag = 0;
+ bs.bits_left = 16;
+
+ // Initialize lookup
+ for (unsigned long i = 0; i < LOOKUP_SIZE; ++i) {
+ lookup[i] = NO_MATCH_POS;
+ }
+
+ // Main compression loop
+ for (cur = 1; cur <= last_match_pos; ) {
+ // Update lookup up to current position
+ while (hash_pos < cur) {
+ lookup[blz_hash4(&in[hash_pos])] = hash_pos;
+ hash_pos++;
+ }
+
+ // Look up match for current position
+ unsigned long pos = lookup[blz_hash4(&in[cur])];
+ unsigned long len = 0;
+
+ // Check match
+ if (pos != NO_MATCH_POS) {
+ const unsigned long len_limit = src_size - cur;
+
+ while (len < len_limit
+ && in[pos + len] == in[cur + len]) {
+ ++len;
+ }
+ }
+
+ // Check if match at next position is better
+ if (len > 3 && cur < last_match_pos) {
+ // Update lookup up to next position
+ lookup[blz_hash4(&in[hash_pos])] = hash_pos;
+ hash_pos++;
+
+ // Look up match for next position
+ const unsigned long next_pos = lookup[blz_hash4(&in[cur + 1])];
+ unsigned long next_len = 0;
+
+ // Check match
+ if (next_pos != NO_MATCH_POS && next_pos != pos + 1) {
+ const unsigned long next_len_limit = src_size - (cur + 1);
+
+ // If last byte matches, so this has a chance to be a better match
+ if (len - 1 < next_len_limit
+ && in[next_pos + len - 1] == in[cur + 1 + len - 1]) {
+ while (next_len < next_len_limit
+ && in[next_pos + next_len] == in[cur + 1 + next_len]) {
+ ++next_len;
+ }
+ }
+ }
+
+ if (next_len >= len) {
+ // Replace with next match if it extends backwards
+ if (next_pos > 0 && in[next_pos - 1] == in[cur]) {
+ if (blz_match_better(cur, next_pos - 1, next_len + 1, pos, len)) {
+ pos = next_pos - 1;
+ len = next_len + 1;
+ }
+ }
+ else {
+ // Drop current match if next match is better
+ if (blz_next_match_better(cur, next_pos, next_len, pos, len)) {
+ len = 0;
+ }
+ }
+
+ }
+ }
+
+ // Output match or literal
+ if (len > 4 || (len == 4 && cur - pos - 1 < 0x3FE00UL)) {
+ const unsigned long offs = cur - pos - 1;
+
+ // Output match tag
+ blz_putbit(&bs, 1);
+
+ // Output match length
+ blz_putgamma(&bs, len - 2);
+
+ // Output match offset
+ blz_putgamma(&bs, (offs >> 8) + 2);
+ *bs.next_out++ = offs & 0x00FF;
+
+ cur += len;
+ }
+ else {
+ // Output literal tag
+ blz_putbit(&bs, 0);
+
+ // Copy literal
+ *bs.next_out++ = in[cur++];
+ }
+ }
+
+ // Output any remaining literals
+ while (cur < src_size) {
+ // Output literal tag
+ blz_putbit(&bs, 0);
+
+ // Copy literal
+ *bs.next_out++ = in[cur++];
+ }
+
+ // Trailing one bit to delimit any literal tags
+ blz_putbit(&bs, 1);
+
+ // Shift last tag into position and store
+ bs.tag <<= bs.bits_left;
+ bs.tag_out[0] = bs.tag & 0x00FF;
+ bs.tag_out[1] = (bs.tag >> 8) & 0x00FF;
+
+ // Return compressed size
+ return (unsigned long) (bs.next_out - (unsigned char *) dst);
+}
+
+#endif /* BRIEFLZ_LAZY_H_INCLUDED */
--- /dev/null
+++ b/3rd/brieflz/brieflz_leparse.h
@@ -1,0 +1,256 @@
+//
+// BriefLZ - small fast Lempel-Ziv
+//
+// Backwards dynamic programming parse with left-extension of matches
+//
+// Copyright (c) 2016-2020 Joergen Ibsen
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+//
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+//
+// 3. This notice may not be removed or altered from any source
+// distribution.
+//
+
+#ifndef BRIEFLZ_LEPARSE_H_INCLUDED
+#define BRIEFLZ_LEPARSE_H_INCLUDED
+
+static size_t
+blz_leparse_workmem_size(size_t src_size)
+{
+ return (LOOKUP_SIZE < 2 * src_size ? 3 * src_size : src_size + LOOKUP_SIZE)
+ * sizeof(blz_word);
+}
+
+// Backwards dynamic programming parse with left-extension of matches.
+//
+// Whenever we find a match that improves the cost at the current position,
+// we try to extend this match to the left, and if possible we use that
+// left-extension for each position to the left. Since we are processing
+// the input from right to left, this matches repeated patterns without
+// searching at each position.
+//
+// Essentially, this improves the worst case for the parsing at a small cost
+// in ratio. The match finding is still O(n^2) in number of matches though,
+// so may have to limit max_depth on larger block sizes.
+//
+// This is usually within a few percent of the "optimal" parse with the same
+// parameters.
+//
+static unsigned long
+blz_pack_leparse(const void *src, void *dst, unsigned long src_size, void *workmem,
+ const unsigned long max_depth, const unsigned long accept_len)
+{
+ struct blz_state bs;
+ const unsigned char *const in = (const unsigned char *) src;
+ const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0;
+
+ assert(src_size < BLZ_WORD_MAX);
+
+ // Check for empty input
+ if (src_size == 0) {
+ return 0;
+ }
+
+ bs.next_out = (unsigned char *) dst;
+
+ // First byte verbatim
+ *bs.next_out++ = in[0];
+
+ // Check for 1 byte input
+ if (src_size == 1) {
+ return 1;
+ }
+
+ // Initialize first tag
+ bs.tag_out = bs.next_out;
+ bs.next_out += 2;
+ bs.tag = 0;
+ bs.bits_left = 16;
+
+ if (src_size < 4) {
+ for (unsigned long i = 1; i < src_size; ++i) {
+ // Output literal tag
+ blz_putbit(&bs, 0);
+
+ // Copy literal
+ *bs.next_out++ = in[i];
+ }
+
+ // Return compressed size
+ return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst);
+ }
+
+ // With a bit of careful ordering we can fit in 3 * src_size words.
+ //
+ // The idea is that the lookup is only used in the first phase to
+ // build the hash chains, so we overlap it with mpos and mlen.
+ // Also, since we are using prev from right to left in phase two,
+ // and that is the order we fill in cost, we can overlap these.
+ //
+ // One detail is that we actually use src_size + 1 elements of cost,
+ // but we put mpos after it, where we do not need the first element.
+ //
+ blz_word *const prev = (blz_word *) workmem;
+ blz_word *const mpos = prev + src_size;
+ blz_word *const mlen = mpos + src_size;
+ blz_word *const cost = prev;
+ blz_word *const lookup = mpos;
+
+ // Phase 1: Build hash chains
+ const int bits = 2 * src_size < LOOKUP_SIZE ? BLZ_HASH_BITS : blz_log2(src_size);
+
+ // Initialize lookup
+ for (unsigned long i = 0; i < (1UL << bits); ++i) {
+ lookup[i] = NO_MATCH_POS;
+ }
+
+ // Build hash chains in prev
+ if (last_match_pos > 0) {
+ for (unsigned long i = 0; i <= last_match_pos; ++i) {
+ const unsigned long hash = blz_hash4_bits(&in[i], bits);
+ prev[i] = lookup[hash];
+ lookup[hash] = i;
+ }
+ }
+
+ // Initialize last three positions as literals
+ mlen[src_size - 3] = 1;
+ mlen[src_size - 2] = 1;
+ mlen[src_size - 1] = 1;
+
+ cost[src_size - 3] = 27;
+ cost[src_size - 2] = 18;
+ cost[src_size - 1] = 9;
+ cost[src_size] = 0;
+
+ // Phase 2: Find lowest cost path from each position to end
+ for (unsigned long cur = last_match_pos; cur > 0; --cur) {
+ // Since we updated prev to the end in the first phase, we
+ // do not need to hash, but can simply look up the previous
+ // position directly.
+ unsigned long pos = prev[cur];
+
+ assert(pos == NO_MATCH_POS || pos < cur);
+
+ // Start with a literal
+ cost[cur] = cost[cur + 1] + 9;
+ mlen[cur] = 1;
+
+ unsigned long max_len = 3;
+
+ const unsigned long len_limit = src_size - cur;
+ unsigned long num_chain = max_depth;
+
+ // Go through the chain of prev matches
+ for (; pos != NO_MATCH_POS && num_chain--; pos = prev[pos]) {
+ unsigned long len = 0;
+
+ // If next byte matches, so this has a chance to be a longer match
+ if (max_len < len_limit && in[pos + max_len] == in[cur + max_len]) {
+ // Find match len
+ while (len < len_limit && in[pos + len] == in[cur + len]) {
+ ++len;
+ }
+ }
+
+ // Extend current match if possible
+ //
+ // Note that we are checking matches in order from the
+ // closest and back. This means for a match further
+ // away, the encoding of all lengths up to the current
+ // max length will always be longer or equal, so we need
+ // only consider the extension.
+ if (len > max_len) {
+ unsigned long min_cost = ULONG_MAX;
+ unsigned long min_cost_len = 3;
+
+ // Find lowest cost match length
+ for (unsigned long i = max_len + 1; i <= len; ++i) {
+ unsigned long match_cost = blz_match_cost(cur - pos - 1, i);
+ assert(match_cost < BLZ_WORD_MAX - cost[cur + i]);
+ unsigned long cost_here = match_cost + cost[cur + i];
+
+ if (cost_here < min_cost) {
+ min_cost = cost_here;
+ min_cost_len = i;
+ }
+ }
+
+ max_len = len;
+
+ // Update cost if cheaper
+ if (min_cost < cost[cur]) {
+ cost[cur] = min_cost;
+ mpos[cur] = pos;
+ mlen[cur] = min_cost_len;
+
+ // Left-extend current match if possible
+ if (pos > 0 && in[pos - 1] == in[cur - 1]) {
+ do {
+ --cur;
+ --pos;
+ ++min_cost_len;
+ unsigned long match_cost = blz_match_cost(cur - pos - 1, min_cost_len);
+ assert(match_cost < BLZ_WORD_MAX - cost[cur + min_cost_len]);
+ unsigned long cost_here = match_cost + cost[cur + min_cost_len];
+ cost[cur] = cost_here;
+ mpos[cur] = pos;
+ mlen[cur] = min_cost_len;
+ } while (pos > 0 && in[pos - 1] == in[cur - 1]);
+ break;
+ }
+ }
+ }
+
+ if (len >= accept_len || len == len_limit) {
+ break;
+ }
+ }
+ }
+
+ mpos[0] = 0;
+ mlen[0] = 1;
+
+ // Phase 3: Output compressed data, following lowest cost path
+ for (unsigned long i = 1; i < src_size; i += mlen[i]) {
+ if (mlen[i] == 1) {
+ // Output literal tag
+ blz_putbit(&bs, 0);
+
+ // Copy literal
+ *bs.next_out++ = in[i];
+ }
+ else {
+ const unsigned long offs = i - mpos[i] - 1;
+
+ // Output match tag
+ blz_putbit(&bs, 1);
+
+ // Output match length
+ blz_putgamma(&bs, mlen[i] - 2);
+
+ // Output match offset
+ blz_putgamma(&bs, (offs >> 8) + 2);
+ *bs.next_out++ = offs & 0x00FF;
+ }
+ }
+
+ // Return compressed size
+ return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst);
+}
+
+#endif /* BRIEFLZ_LEPARSE_H_INCLUDED */
--- /dev/null
+++ b/3rd/brieflz/brieflz_sliding.h
@@ -1,0 +1,261 @@
+//
+// BriefLZ - small fast Lempel-Ziv
+//
+// Lazy parsing with a sliding window of previous positions
+//
+// Copyright (c) 2016-2020 Joergen Ibsen
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+//
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+//
+// 3. This notice may not be removed or altered from any source
+// distribution.
+//
+
+#ifndef BRIEFLZ_SLIDING_H_INCLUDED
+#define BRIEFLZ_SLIDING_H_INCLUDED
+
+static size_t
+blz_sliding_workmem_size(size_t src_size, int window_log)
+{
+ (void) src_size;
+
+ assert(window_log > 0 && window_log < blz_log2(BLZ_WORD_MAX));
+
+ return (LOOKUP_SIZE + ((size_t) 1 << window_log)) * sizeof(blz_word);
+}
+
+// Lazy parsing with a sliding window of previous positions.
+//
+// Like blz_pack_hashchain, but the buffer of previous positions is limited to
+// a sliding window of 1 << window_log bytes.
+//
+static unsigned long
+blz_pack_sliding(const void *src, void *dst, unsigned long src_size, void *workmem,
+ const int window_log, const unsigned long max_depth,
+ const unsigned long accept_len)
+{
+ struct blz_state bs;
+ blz_word *const lookup = (blz_word *) workmem;
+ blz_word *const prev = lookup + LOOKUP_SIZE;
+ const unsigned char *const in = (const unsigned char *) src;
+ const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0;
+ const unsigned long window_size = 1UL << window_log;
+ const unsigned long window_mask = window_size - 1;
+ unsigned long hash_pos = 0;
+ unsigned long cur = 0;
+
+ assert(src_size < BLZ_WORD_MAX);
+
+ // Check for empty input
+ if (src_size == 0) {
+ return 0;
+ }
+
+ bs.next_out = (unsigned char *) dst;
+
+ // First byte verbatim
+ *bs.next_out++ = in[0];
+
+ // Check for 1 byte input
+ if (src_size == 1) {
+ return 1;
+ }
+
+ // Initialize first tag
+ bs.tag_out = bs.next_out;
+ bs.next_out += 2;
+ bs.tag = 0;
+ bs.bits_left = 16;
+
+ // Initialize lookup
+ for (unsigned long i = 0; i < LOOKUP_SIZE; ++i) {
+ lookup[i] = NO_MATCH_POS;
+ }
+
+ // Main compression loop
+ for (cur = 1; cur <= last_match_pos; ) {
+ const unsigned long window_limit = cur >= window_size ? cur - (window_size - 1) : 0;
+
+ if (hash_pos < window_limit) {
+ hash_pos = window_limit;
+ }
+
+ // Update lookup and prev up to current position
+ while (hash_pos < cur) {
+ const unsigned long hash = blz_hash4(&in[hash_pos]);
+
+ prev[hash_pos & window_mask] = lookup[hash];
+ lookup[hash] = hash_pos;
+ hash_pos++;
+ }
+
+ unsigned long best_pos = NO_MATCH_POS;
+ unsigned long best_len = 0;
+
+ // Look up first match for current position
+ unsigned long pos = lookup[blz_hash4(&in[cur])];
+
+ // Drop matches outside window. Technically this is not
+ // necessary, but it guarantees match positions are limited
+ // to the window size.
+ if (pos < window_limit) {
+ pos = NO_MATCH_POS;
+ }
+
+ const unsigned long len_limit = src_size - cur;
+ unsigned long num_chain = max_depth;
+
+ // Check matches
+ while (pos != NO_MATCH_POS && num_chain--) {
+ unsigned long len = 0;
+
+ // If next byte matches, so this has a chance to be a longer match
+ if (best_len < len_limit
+ && in[pos + best_len] == in[cur + best_len]) {
+ // Find match len
+ while (len < len_limit && in[pos + len] == in[cur + len]) {
+ ++len;
+ }
+ }
+
+ assert(cur - pos < window_size);
+
+ // Update best match
+ if (blz_match_better(cur, pos, len, best_pos, best_len)) {
+ best_pos = pos;
+ best_len = len;
+ if (best_len >= accept_len) {
+ break;
+ }
+ }
+
+ assert(prev[pos & window_mask] == NO_MATCH_POS || prev[pos & window_mask] < pos);
+
+ // Go to previous match
+ pos = prev[pos & window_mask];
+ if (pos < window_limit) {
+ pos = NO_MATCH_POS;
+ }
+ }
+
+ // Check if match at next position is better
+ if (best_len > 3 && best_len < accept_len && cur < last_match_pos) {
+ // Update lookup up to next position
+ const unsigned long hash = blz_hash4(&in[hash_pos]);
+
+ prev[hash_pos & window_mask] = lookup[hash];
+ lookup[hash] = hash_pos;
+ hash_pos++;
+
+ // Look up first match for next position
+ unsigned long next_pos = lookup[blz_hash4(&in[cur + 1])];
+ if (next_pos <= window_limit) {
+ next_pos = NO_MATCH_POS;
+ }
+
+ const unsigned long next_len_limit = src_size - (cur + 1);
+ num_chain = max_depth;
+
+ // Check matches
+ while (next_pos != NO_MATCH_POS && num_chain--) {
+ unsigned long next_len = 0;
+
+ // Check match
+ if (best_len - 1 < next_len_limit
+ && in[next_pos + best_len - 1] == in[cur + 1 + best_len - 1]) {
+ while (next_len < next_len_limit
+ && in[next_pos + next_len] == in[cur + 1 + next_len]) {
+ ++next_len;
+ }
+ }
+
+ assert(cur + 1 - next_pos < window_size);
+
+ if (next_len >= best_len) {
+ // Replace with next match if it extends backwards
+ if (next_pos > 0 && in[next_pos - 1] == in[cur]) {
+ if (blz_match_better(cur, next_pos - 1, next_len + 1, best_pos, best_len)) {
+ best_pos = next_pos - 1;
+ best_len = next_len + 1;
+ }
+ }
+ else {
+ // Drop current match if next match is better
+ if (blz_next_match_better(cur, next_pos, next_len, best_pos, best_len)) {
+ best_len = 0;
+ break;
+ }
+ }
+ }
+
+ assert(prev[next_pos & window_mask] == NO_MATCH_POS || prev[next_pos & window_mask] < next_pos);
+
+ // Go to previous match
+ next_pos = prev[next_pos & window_mask];
+ if (next_pos <= window_limit) {
+ next_pos = NO_MATCH_POS;
+ }
+ }
+ }
+
+ // Output match or literal
+ if (best_len > 4 || (best_len == 4 && cur - best_pos - 1 < 0x3FE00UL)) {
+ const unsigned long offs = cur - best_pos - 1;
+
+ // Output match tag
+ blz_putbit(&bs, 1);
+
+ // Output match length
+ blz_putgamma(&bs, best_len - 2);
+
+ // Output match offset
+ blz_putgamma(&bs, (offs >> 8) + 2);
+ *bs.next_out++ = offs & 0x00FF;
+
+ cur += best_len;
+ }
+ else {
+ // Output literal tag
+ blz_putbit(&bs, 0);
+
+ // Copy literal
+ *bs.next_out++ = in[cur++];
+ }
+ }
+
+ // Output any remaining literals
+ while (cur < src_size) {
+ // Output literal tag
+ blz_putbit(&bs, 0);
+
+ // Copy literal
+ *bs.next_out++ = in[cur++];
+ }
+
+ // Trailing one bit to delimit any literal tags
+ blz_putbit(&bs, 1);
+
+ // Shift last tag into position and store
+ bs.tag <<= bs.bits_left;
+ bs.tag_out[0] = bs.tag & 0x00FF;
+ bs.tag_out[1] = (bs.tag >> 8) & 0x00FF;
+
+ // Return compressed size
+ return (unsigned long) (bs.next_out - (unsigned char *) dst);
+}
+
+#endif /* BRIEFLZ_SLIDING_H_INCLUDED */
--- /dev/null
+++ b/3rd/brieflz/brieflz_ssparse.h
@@ -1,0 +1,239 @@
+//
+// BriefLZ - small fast Lempel-Ziv
+//
+// Backwards dynamic programming parse
+//
+// Copyright (c) 2016-2020 Joergen Ibsen
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must
+// not claim that you wrote the original software. If you use this
+// software in a product, an acknowledgment in the product
+// documentation would be appreciated but is not required.
+//
+// 2. Altered source versions must be plainly marked as such, and must
+// not be misrepresented as being the original software.
+//
+// 3. This notice may not be removed or altered from any source
+// distribution.
+//
+
+#ifndef BRIEFLZ_SSPARSE_H_INCLUDED
+#define BRIEFLZ_SSPARSE_H_INCLUDED
+
+static size_t
+blz_ssparse_workmem_size(size_t src_size)
+{
+ return (LOOKUP_SIZE < 2 * src_size ? 3 * src_size : src_size + LOOKUP_SIZE)
+ * sizeof(blz_word);
+}
+
+// Backwards dynamic programming parse like Storer–Szymanski, but checking all
+// possible matches.
+//
+// In BriefLZ, the length and offset of matches are encoded using a variable
+// length code, this means for instance choosing a literal and a match close
+// by might be better than one match further away. To get a bit optimal parse
+// for BriefLZ, we consider all possible matches for a given position, not
+// just the longest.
+//
+// So if max_depth and accept_len are ULONG_MAX, this version is bit-optimal
+// (up to possibly the final tag). However, this is worst-case O(n^3) for long
+// repeated patterns, which means in practice you usually need to limit the
+// search.
+//
+static unsigned long
+blz_pack_ssparse(const void *src, void *dst, unsigned long src_size, void *workmem,
+ const unsigned long max_depth, const unsigned long accept_len)
+{
+ struct blz_state bs;
+ const unsigned char *const in = (const unsigned char *) src;
+ const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0;
+
+ assert(src_size < BLZ_WORD_MAX);
+
+ // Check for empty input
+ if (src_size == 0) {
+ return 0;
+ }
+
+ bs.next_out = (unsigned char *) dst;
+
+ // First byte verbatim
+ *bs.next_out++ = in[0];
+
+ // Check for 1 byte input
+ if (src_size == 1) {
+ return 1;
+ }
+
+ // Initialize first tag
+ bs.tag_out = bs.next_out;
+ bs.next_out += 2;
+ bs.tag = 0;
+ bs.bits_left = 16;
+
+ if (src_size < 4) {
+ for (unsigned long i = 1; i < src_size; ++i) {
+ // Output literal tag
+ blz_putbit(&bs, 0);
+
+ // Copy literal
+ *bs.next_out++ = in[i];
+ }
+
+ // Return compressed size
+ return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst);
+ }
+
+ // With a bit of careful ordering we can fit in 3 * src_size words.
+ //
+ // The idea is that the lookup is only used in the first phase to
+ // build the hash chains, so we overlap it with mpos and mlen.
+ // Also, since we are using prev from right to left in phase two,
+ // and that is the order we fill in cost, we can overlap these.
+ //
+ // One detail is that we actually use src_size + 1 elements of cost,
+ // but we put mpos after it, where we do not need the first element.
+ //
+ blz_word *const prev = (blz_word *) workmem;
+ blz_word *const mpos = prev + src_size;
+ blz_word *const mlen = mpos + src_size;
+ blz_word *const cost = prev;
+ blz_word *const lookup = mpos;
+
+ // Phase 1: Build hash chains
+ const int bits = 2 * src_size < LOOKUP_SIZE ? BLZ_HASH_BITS : blz_log2(src_size);
+
+ // Initialize lookup
+ for (unsigned long i = 0; i < (1UL << bits); ++i) {
+ lookup[i] = NO_MATCH_POS;
+ }
+
+ // Build hash chains in prev
+ if (last_match_pos > 0) {
+ for (unsigned long i = 0; i <= last_match_pos; ++i) {
+ const unsigned long hash = blz_hash4_bits(&in[i], bits);
+ prev[i] = lookup[hash];
+ lookup[hash] = i;
+ }
+ }
+
+ // Initialize last three positions as literals
+ mlen[src_size - 3] = 1;
+ mlen[src_size - 2] = 1;
+ mlen[src_size - 1] = 1;
+
+ cost[src_size - 3] = 27;
+ cost[src_size - 2] = 18;
+ cost[src_size - 1] = 9;
+ cost[src_size] = 0;
+
+ // Phase 2: Find lowest cost path from each position to end
+ for (unsigned long cur = last_match_pos; cur > 0; --cur) {
+ // Since we updated prev to the end in the first phase, we
+ // do not need to hash, but can simply look up the previous
+ // position directly.
+ unsigned long pos = prev[cur];
+
+ assert(pos == NO_MATCH_POS || pos < cur);
+
+ // Start with a literal
+ cost[cur] = cost[cur + 1] + 9;
+ mlen[cur] = 1;
+
+ unsigned long max_len = 3;
+
+ const unsigned long len_limit = src_size - cur;
+ unsigned long num_chain = max_depth;
+
+ // Go through the chain of prev matches
+ for (; pos != NO_MATCH_POS && num_chain--; pos = prev[pos]) {
+ unsigned long len = 0;
+
+ // If next byte matches, so this has a chance to be a longer match
+ if (max_len < len_limit && in[pos + max_len] == in[cur + max_len]) {
+ // Find match len
+ while (len < len_limit && in[pos + len] == in[cur + len]) {
+ ++len;
+ }
+ }
+
+ // Extend current match if possible
+ //
+ // Note that we are checking matches in order from the
+ // closest and back. This means for a match further
+ // away, the encoding of all lengths up to the current
+ // max length will always be longer or equal, so we need
+ // only consider the extension.
+ if (len > max_len) {
+ unsigned long min_cost = ULONG_MAX;
+ unsigned long min_cost_len = 3;
+
+ // Find lowest cost match length
+ for (unsigned long i = max_len + 1; i <= len; ++i) {
+ unsigned long match_cost = blz_match_cost(cur - pos - 1, i);
+ assert(match_cost < BLZ_WORD_MAX - cost[cur + i]);
+ unsigned long cost_here = match_cost + cost[cur + i];
+
+ if (cost_here < min_cost) {
+ min_cost = cost_here;
+ min_cost_len = i;
+ }
+ }
+
+ max_len = len;
+
+ // Update cost if cheaper
+ if (min_cost < cost[cur]) {
+ cost[cur] = min_cost;
+ mpos[cur] = pos;
+ mlen[cur] = min_cost_len;
+ }
+ }
+
+ if (len >= accept_len || len == len_limit) {
+ break;
+ }
+ }
+ }
+
+ mpos[0] = 0;
+ mlen[0] = 1;
+
+ // Phase 3: Output compressed data, following lowest cost path
+ for (unsigned long i = 1; i < src_size; i += mlen[i]) {
+ if (mlen[i] == 1) {
+ // Output literal tag
+ blz_putbit(&bs, 0);
+
+ // Copy literal
+ *bs.next_out++ = in[i];
+ }
+ else {
+ const unsigned long offs = i - mpos[i] - 1;
+
+ // Output match tag
+ blz_putbit(&bs, 1);
+
+ // Output match length
+ blz_putgamma(&bs, mlen[i] - 2);
+
+ // Output match offset
+ blz_putgamma(&bs, (offs >> 8) + 2);
+ *bs.next_out++ = offs & 0x00FF;
+ }
+ }
+
+ // Return compressed size
+ return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst);
+}
+
+#endif /* BRIEFLZ_SSPARSE_H_INCLUDED */
--- /dev/null
+++ b/3rd/brieflz/depacks.c
@@ -1,0 +1,310 @@
+/*
+ * BriefLZ - small fast Lempel-Ziv
+ *
+ * C safe depacker
+ *
+ * Copyright (c) 2002-2018 Joergen Ibsen
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must
+ * not claim that you wrote the original software. If you use this
+ * software in a product, an acknowledgment in the product
+ * documentation would be appreciated but is not required.
+ *
+ * 2. Altered source versions must be plainly marked as such, and must
+ * not be misrepresented as being the original software.
+ *
+ * 3. This notice may not be removed or altered from any source
+ * distribution.
+ */
+
+#include "brieflz.h"
+
+/* Internal data structure */
+struct blz_state {
+ const unsigned char *src;
+ unsigned char *dst;
+ unsigned long src_avail;
+ unsigned long dst_avail;
+ unsigned int tag;
+ int bits_left;
+};
+
+#if !defined(BLZ_NO_LUT)
+static const unsigned char blz_gamma_lookup[256][2] = {
+ /* 00xxxxxx = 2 */
+ {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+ {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+ {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+ {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+ {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+ {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+ {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+ {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+
+ /* 0100xxxx = 4 */
+ {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4},
+ {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4},
+
+ /* 010100xx = 8 */
+ {8, 6}, {8, 6}, {8, 6}, {8, 6},
+
+ /* 01010100 = 16 01010101 = 16+ 01010110 = 17 01010111 = 17+ */
+ {16, 8}, {16, 0}, {17, 8}, {17, 0},
+
+ /* 010110xx = 9 */
+ {9, 6}, {9, 6}, {9, 6}, {9, 6},
+
+ /* 01011100 = 18 01011101 = 18+ 01011110 = 19 01011111 = 19+ */
+ {18, 8}, {18, 0}, {19, 8}, {19, 0},
+
+ /* 0110xxxx = 5 */
+ {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4},
+ {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4},
+
+ /* 011100xx = 10 */
+ {10, 6}, {10, 6}, {10, 6}, {10, 6},
+
+ /* 01110100 = 20 01110101 = 20+ 01110110 = 21 01110111 = 21+ */
+ {20, 8}, {20, 0}, {21, 8}, {21, 0},
+
+ /* 011110xx = 11 */
+ {11, 6}, {11, 6}, {11, 6}, {11, 6},
+
+ /* 01111100 = 22 01111101 = 22+ 01111110 = 23 01111111 = 23+ */
+ {22, 8}, {22, 0}, {23, 8}, {23, 0},
+
+ /* 10xxxxxx = 3 */
+ {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2},
+ {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2},
+ {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2},
+ {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2},
+ {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2},
+ {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2},
+ {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2},
+ {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2},
+
+ /* 1100xxxx = 6 */
+ {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4},
+ {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4},
+
+ /* 110100xx = 12 */
+ {12, 6}, {12, 6}, {12, 6}, {12, 6},
+
+ /* 11010100 = 24 11010101 = 24+ 11010110 = 25 11010111 = 25+ */
+ {24, 8}, {24, 0}, {25, 8}, {25, 0},
+
+ /* 110110xx = 13 */
+ {13, 6}, {13, 6}, {13, 6}, {13, 6},
+
+ /* 11011100 = 26 11011101 = 26+ 11011110 = 27 11011111 = 27+ */
+ {26, 8}, {26, 0}, {27, 8}, {27, 0},
+
+ /* 1110xxxx = 7 */
+ {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4},
+ {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4},
+
+ /* 111100xx = 14 */
+ {14, 6}, {14, 6}, {14, 6}, {14, 6},
+
+ /* 11110100 = 28 11110101 = 28+ 11110110 = 29 11110111 = 29+ */
+ {28, 8}, {28, 0}, {29, 8}, {29, 0},
+
+ /* 111110xx = 15 */
+ {15, 6}, {15, 6}, {15, 6}, {15, 6},
+
+ /* 11111100 = 30 11111101 = 30+ 11111110 = 31 11111111 = 31+ */
+ {30, 8}, {30, 0}, {31, 8}, {31, 0}
+};
+#endif
+
+static int
+blz_getbit_safe(struct blz_state *bs, unsigned int *result)
+{
+ unsigned int bit;
+
+ /* Check if tag is empty */
+ if (!bs->bits_left--) {
+ if (bs->src_avail < 2) {
+ return 0;
+ }
+ bs->src_avail -= 2;
+
+ /* Load next tag */
+ bs->tag = (unsigned int) bs->src[0]
+ | ((unsigned int) bs->src[1] << 8);
+ bs->src += 2;
+ bs->bits_left = 15;
+ }
+
+ /* Shift bit out of tag */
+ bit = (bs->tag & 0x8000) ? 1 : 0;
+ bs->tag <<= 1;
+
+ *result = bit;
+
+ return 1;
+}
+
+static int
+blz_getgamma_safe(struct blz_state *bs, unsigned long *result)
+{
+ unsigned int bit;
+ unsigned long v = 1;
+
+#if !defined(BLZ_NO_LUT)
+ /* Decode up to 8 bits of gamma2 code using lookup if possible */
+ if (bs->bits_left >= 8) {
+ unsigned int top8 = (bs->tag >> 8) & 0x00FF;
+ int shift;
+
+ v = blz_gamma_lookup[top8][0];
+ shift = (int) blz_gamma_lookup[top8][1];
+
+ if (shift) {
+ bs->tag <<= shift;
+ bs->bits_left -= shift;
+
+ *result = v;
+
+ return 1;
+ }
+
+ bs->tag <<= 8;
+ bs->bits_left -= 8;
+ }
+#endif
+
+ /* Input gamma2-encoded bits */
+ do {
+ if (!blz_getbit_safe(bs, &bit)) {
+ return 0;
+ }
+
+ if (v & 0x80000000UL) {
+ return 0;
+ }
+
+ v = (v << 1) + bit;
+
+ if (!blz_getbit_safe(bs, &bit)) {
+ return 0;
+ }
+ } while (bit);
+
+ *result = v;
+
+ return 1;
+}
+
+unsigned long
+blz_depack_safe(const void *src, unsigned long src_size,
+ void *dst, unsigned long depacked_size)
+{
+ struct blz_state bs;
+ unsigned long dst_size = 0;
+ unsigned int bit;
+
+ bs.src = (const unsigned char *) src;
+ bs.src_avail = src_size;
+ bs.dst = (unsigned char *) dst;
+ bs.dst_avail = depacked_size;
+
+ /* Initialise to one bit left in tag; that bit is zero (a literal) */
+ bs.bits_left = 1;
+ bs.tag = 0;
+
+ /* Main decompression loop */
+ while (dst_size < depacked_size) {
+ if (!blz_getbit_safe(&bs, &bit)) {
+ return BLZ_ERROR;
+ }
+
+ if (bit) {
+ unsigned long len;
+ unsigned long off;
+
+ /* Input match length and offset */
+ if (!blz_getgamma_safe(&bs, &len)) {
+ return BLZ_ERROR;
+ }
+ if (!blz_getgamma_safe(&bs, &off)) {
+ return BLZ_ERROR;
+ }
+
+ len += 2;
+ off -= 2;
+
+ if (off >= 0x00FFFFFFUL) {
+ return BLZ_ERROR;
+ }
+
+ if (!bs.src_avail--) {
+ return BLZ_ERROR;
+ }
+
+ off = (off << 8) + (unsigned long) *bs.src++ + 1;
+
+ if (off > depacked_size - bs.dst_avail) {
+ return BLZ_ERROR;
+ }
+
+ if (len > bs.dst_avail) {
+ return BLZ_ERROR;
+ }
+
+ bs.dst_avail -= len;
+
+ /* Copy match */
+ {
+ const unsigned char *p = bs.dst - off;
+ unsigned long i;
+
+ for (i = len; i > 0; --i) {
+ *bs.dst++ = *p++;
+ }
+ }
+
+ dst_size += len;
+ }
+ else {
+ /* Copy literal */
+ if (!bs.src_avail-- || !bs.dst_avail--) {
+ return BLZ_ERROR;
+ }
+ *bs.dst++ = *bs.src++;
+
+ dst_size++;
+ }
+ }
+
+ /* Return decompressed size */
+ return dst_size;
+}
+
+/* clang -g -O1 -fsanitize=fuzzer,address -DBLZ_FUZZING depacks.c */
+#if defined(BLZ_FUZZING)
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+extern int
+LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
+{
+ if (size > ULONG_MAX / 2) { return 0; }
+ void *depacked = malloc(4096);
+ if (!depacked) { abort(); }
+ blz_depack_safe(data, size, depacked, 4096);
+ free(depacked);
+ return 0;
+}
+#endif
--- a/LICENSE
+++ b/LICENSE
@@ -1,10 +1,13 @@
-9front portions (libmp, utf handling parts) is under MIT license.
+9front portions (3rd/mp, 3rd/utf) is under MIT license.
-SpookyHash implementation is under CC0 license.
+SpookyHash (3rd/spooky.[ch] implementation is under CC0 license.
QP tries (3rd/fn.[ch] 3rd/tbl.[ch]) is under CC0 license.
+BriefLZ (3rd/brieflz/*, modified - see 3rd/brieflz/NOTE) is under zlib license.
+
Copyright (c) 2008 Jeff Bezanson
+Copyright (c) 2025 Sigrid Solveig Haflínudóttir
All rights reserved.
--- /dev/null
+++ b/compress.c
@@ -1,0 +1,58 @@
+#include "flisp.h"
+#include "cvalues.h"
+#include "types.h"
+#include "brieflz.h"
+
+BUILTIN("lz-pack", lz_pack)
+{
+ if(nargs < 1)
+ argcount(nargs, 1);
+ if(nargs > 2)
+ argcount(nargs, 2);
+
+ if(!isarray(args[0]))
+ type_error("array", args[0]);
+ uint8_t *in;
+ size_t insz;
+ to_sized_ptr(args[0], &in, &insz);
+ int level = nargs > 1 ? tofixnum(args[1]) : 0;
+ if(level < 0)
+ level = 0;
+ else if(level > 10)
+ level = 10;
+
+ value_t v = cvalue(cv_class(ptr(args[0])), blz_max_packed_size(insz));
+ uint8_t *out = cvalue_data(v);
+
+ size_t worksz = level > 0
+ ? blz_workmem_size_level(insz, level)
+ : blz_workmem_size(insz);
+ uint8_t *work = MEM_ALLOC(worksz);
+ unsigned long n = level > 0
+ ? blz_pack_level(in, out, insz, work, level)
+ : blz_pack(in, out, insz, work);
+ MEM_FREE(work);
+ if(n == BLZ_ERROR)
+ lerrorf(FL(ArgError), "blz error");
+ cvalue_len(v) = n;
+ return v;
+}
+
+BUILTIN("lz-unpack", lz_unpack)
+{
+ argcount(nargs, 2);
+
+ uint8_t *in;
+ size_t insz;
+ to_sized_ptr(args[0], &in, &insz);
+ if(!isarray(args[0]))
+ type_error("array", args[0]);
+ size_t outsz = tosize(args[1]);
+ value_t v = cvalue(cv_class(ptr(args[0])), outsz);
+ uint8_t *out = cvalue_data(v);
+ unsigned long n = blz_depack_safe(in, insz, out, outsz);
+ if(n == BLZ_ERROR)
+ lerrorf(FL(ArgError), "blz error");
+ cvalue_len(v) = n;
+ return v;
+}
--- a/docs_extra.lsp
+++ b/docs_extra.lsp
@@ -13,4 +13,17 @@
"Print various VM-related information, such as the number of GC calls
so far, heap and stack size, etc.")
+(doc-for (lz-pack data (level 0))
+ "Return data compressed using Lempel-Ziv.
+The data must be an array, returned value will have the same type.
+The optional level is between 0 and 10. With level 0 a simple LZSS
+using hashing will be performed. Levels between 1 and 9 offer a
+trade-off between time/space and ratio. Level 10 is optimal but very
+slow.")
+
+(doc-for (lz-unpack data decompressed-bytes)
+ "Return decompressed data previously compressed using lz-pack.
+The decompressed-bytes parameter is the expected size of the
+decompressed data.")
+
(del! *syntax-environment* 'doc-for)
--- a/flisp.boot
+++ b/flisp.boot
@@ -14,7 +14,8 @@
#fn("6000n201l:" #()) #fn("6000n201m:" #()) 0 #fn("8000z0700}2:" #(vector))
#fn("8000z0700}2:" #(aset!)) 0 0 0 0 0 0 0 0 0 0 0 #fn("9000n3012082>1|:" #(#fn("6000n1A061:" #())))
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 #fn("8000z0700}2:" #(aref)) 0 0)
- *properties* #table(*funvars* #table(void? (x) length= (lst n) help (term) void rest *prompt* nil vm-stats nil) *doc* #table(void? "Return #t if x is #<void> and #f otherwise." length= "Bounded length test.\nUse this instead of (= (length lst) n), since it avoids unnecessary\nwork and always terminates." help "Display documentation for the specified term, if available." void "Return the constant #<void> while ignoring any arguments.\n#<void> is mainly used when a function has side effects but does not\nproduce any meaningful value to return, so even though #t or nil could\nbe returned instead, in case of #<void> alone, REPL will not print\nit." *prompt* "Function called by REPL to signal the user input is required.\nDefault function prints \"#;> \"." vm-stats "Print various VM-related information, such as the number of GC calls\nso far, heap and stack size, etc." *properties* "All properties of symbols recorded with putprop are recorded in this table."))
+ *properties* #table(*funvars* #table(lz-unpack (data decompressed-bytes) void? (x) length= (lst
+ n) help (term) void rest *prompt* nil lz-pack (data (level 0)) vm-stats nil) *doc* #table(lz-unpack "Return decompressed data previously compressed using lz-pack.\nThe decompressed-bytes parameter is the expected size of the\ndecompressed data." void? "Return #t if x is #<void> and #f otherwise." length= "Bounded length test.\nUse this instead of (= (length lst) n), since it avoids unnecessary\nwork and always terminates." help "Display documentation for the specified term, if available." void "Return the constant #<void> while ignoring any arguments.\n#<void> is mainly used when a function has side effects but does not\nproduce any meaningful value to return, so even though #t or nil could\nbe returned instead, in case of #<void> alone, REPL will not print\nit." *prompt* "Function called by REPL to signal the user input is required.\nDefault function prints \"#;> \"." lz-pack "Return data compressed using Lempel-Ziv.\nThe data must be an array, returned value will have the same type.\nThe optional level is between 0 and 10. With level 0 a simple LZSS\nusing hashing will be performed. Levels between 1 and 9 offer a\ntrade-off between time/space and ratio. Level 10 is optimal but very\nslow." vm-stats "Print various VM-related information, such as the number of GC calls\nso far, heap and stack size, etc." *properties* "All properties of symbols recorded with putprop are recorded in this table."))
*runestring-type* (array rune) *string-type* (array byte)
*syntax-environment* #table(unwind-protect #fn("A000n220502050218722q1e3e2e1232402286e12587e12686e2e3e3e387e1e3e3:" #(#fn(gensym)
let λ prog1 trycatch begin raise)) help #fn("<000n170021527002252853\\0738551474504863B07450475086P51@30O47450@B0732627051524745047860:" #(getprop
--- a/meson.build
+++ b/meson.build
@@ -25,13 +25,6 @@
language: 'c',
)
-utf = [
- '3rd/utf/rune.c',
- '3rd/utf/runeistype.c',
- '3rd/utf/runetotype.c',
- '3rd/utf/utfnlen.c',
-]
-
src = [
'3rd/fn.c',
'3rd/mt19937-64.c',
@@ -39,6 +32,7 @@
'3rd/tbl.c',
'bitvector.c',
'builtins.c',
+ 'compress.c',
'cvalues.c',
'equal.c',
'equalhash.c',
@@ -122,6 +116,18 @@
],
)
+brieflz = static_library(
+ 'brieflz',
+ [
+ '3rd/brieflz/brieflz.c',
+ '3rd/brieflz/depacks.c',
+ ],
+ include_directories: include_directories(
+ '3rd/brieflz',
+ 'posix',
+ ),
+)
+
mp = static_library(
'mp',
[
@@ -163,11 +169,24 @@
),
)
+utf = static_library(
+ 'utf',
+ [
+ '3rd/utf/rune.c',
+ '3rd/utf/runeistype.c',
+ '3rd/utf/runetotype.c',
+ '3rd/utf/utfnlen.c',
+ ],
+ include_directories: include_directories(
+ '3rd/utf',
+ 'posix',
+ ),
+)
+
flisp = executable(
'flisp',
sources: [
src,
- utf,
boot,
builtins,
],
@@ -176,12 +195,15 @@
] + libsixel_dep,
include_directories: include_directories(
'3rd',
+ '3rd/brieflz',
'3rd/mp',
'3rd/utf',
'posix',
),
link_with: [
+ brieflz,
mp,
+ utf,
],
)
--- a/mkfile
+++ b/mkfile
@@ -2,7 +2,7 @@
BIN=/$objtype/bin
TARG=flisp
-CFLAGS=$CFLAGS -p -I. -I3rd -Iplan9 -D__plan9__ -D__${objtype}__ -DNDEBUG
+CFLAGS=$CFLAGS -p -I. -I3rd -I3rd/brieflz -Iplan9 -D__plan9__ -D__${objtype}__ -DNDEBUG
CLEANFILES=plan9/flisp.boot.s plan9/builtin_fns.h
HFILES=\
@@ -12,6 +12,8 @@
plan9/platform.h\
OFILES=\
+ 3rd/brieflz/brieflz.$O\
+ 3rd/brieflz/depacks.$O\
3rd/fn.$O\
3rd/mt19937-64.$O\
3rd/spooky.$O\
@@ -20,6 +22,7 @@
bitvector.$O\
builtins.$O\
builtins_plan9`{test -f builtins_plan9_$objtype.s && echo -n _$objtype}.$O\
+ compress.$O\
cvalues.$O\
equal.$O\
equalhash.$O\
--- a/plan9/platform.h
+++ b/plan9/platform.h
@@ -1,3 +1,5 @@
+#pragma once
+
#include <u.h>
#include <libc.h>
#include <ctype.h>
@@ -78,6 +80,7 @@
#define INT64_MIN ((int64_t)0x8000000000000000LL)
#define INT64_MAX 0x7fffffffffffffffLL
#define UINT64_MAX 0xffffffffffffffffULL
+#define ULONG_MAX UINT32_MAX
#define PATHSEP '/'
#define PATHSEPSTRING "/"
--- a/test/unittest.lsp
+++ b/test/unittest.lsp
@@ -445,5 +445,17 @@
;; now allocate enough to trigger GC
(for 1 8000000 (λ (i) (cons 1 2)))
+;; brieflz bindings
+(let* ((level 10)
+ (s (file "unittest.lsp"))
+ (in (io-readall s))
+ (packed (lz-pack in level))
+ (unpacked (lz-unpack packed (sizeof in))))
+ (io-close s)
+ (assert (< (sizeof packed) (sizeof in)))
+ (assert (equal? in unpacked))
+ (princ "lz packing at level " level ": " (sizeof in) " → " (sizeof packed))
+ (newline))
+
(princ "all tests pass")
(newline)