ref: f03d44497b82a28ce1daa833fdc670f17e461507
parent: efdc17331aba84555fe9931748138aced8a45a63
author: Lennart Augustsson <lennart.augustsson@epicgames.com>
date: Sun Sep 29 14:05:58 EDT 2024
Some new compression stuff.
--- /dev/null
+++ b/bw.c
@@ -1,0 +1,194 @@
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+typedef uint32_t index_t;
+
+/*
+ * |.......................................|
+ * ^ ^
+ * a b
+ * <- n ->
+ * <- m ->
+ * <- o ->
+ */
+static uint8_t *compar_arg;
+static size_t compar_len;
+int compar(const void *pa, const void *pb)
+{
+ index_t a = *(index_t*)pa;
+ index_t b = *(index_t*)pb;
+ int r;
+ if (a == b)
+ return 0;
+ if (a < b) {
+ size_t n = compar_len - b; /* bytes until end of buffer */
+ r = memcmp(compar_arg + a, compar_arg + b, n);
+ if (r)
+ return r;
+ size_t m = b - a;
+ r = memcmp(compar_arg + a + n, compar_arg, m);
+ if (r)
+ return r;
+ size_t o = a;
+ return memcmp(compar_arg, compar_arg + m, o);
+ } else {
+ size_t n = compar_len - a; /* bytes until end of buffer */
+ r = memcmp(compar_arg + a, compar_arg + b, n);
+ if (r)
+ return r;
+ size_t m = a - b;
+ r = memcmp(compar_arg, compar_arg + b + n, m);
+ if (r)
+ return r;
+ size_t o = a;
+ return memcmp(compar_arg + m, compar_arg, o);
+
+ }
+ return 0;
+}
+
+/* Sort all rotations of buf, and the indices of the sorted strings in res. */
+void
+sort_buffer(uint8_t *buf, size_t buflen, index_t *res)
+{
+ for(size_t i = 0; i < buflen; i++)
+ res[i] = i;
+ compar_arg = buf;
+ compar_len = buflen;
+ qsort(res, buflen, sizeof(index_t), compar);
+}
+
+void
+put_rep(FILE *out, size_t n)
+{
+ if (n > 127)
+ put_rep(out, n / 128);
+ fputc(n % 128 + 128, out);
+}
+
+#define MINRLE 3
+
+void
+rle(FILE *out, uint8_t *data, size_t len)
+{
+ for(size_t i = 0; i < len; ) {
+ uint8_t c = data[i++];
+ size_t n;
+ for(n = 1; i < len && data[i] == c; i++, n++)
+ ;
+ if (n >= MINRLE) {
+ put_rep(out, n - 1);
+ fputc(c, out);
+ } else {
+ while(n-- > 0)
+ fputc(c, out);
+ }
+ }
+}
+
+size_t
+get_rep(FILE *in)
+{
+ size_t n = 0;
+ for(;;) {
+ int c = fgetc(in);
+ //fprintf(stderr,"get_rep %02x\n", c);
+ if (c < 0) abort();
+ if (c < 128) {
+ ungetc(c, in);
+ return n;
+ }
+ n = n * 128 + c - 128;
+ }
+}
+
+void
+unrle(FILE *in, uint8_t *data, size_t len)
+{
+ for(size_t i = 0; i < len; ) {
+ size_t n = get_rep(in);
+ int c = fgetc(stdin);
+ //fprintf(stderr,"unrle %02x\n", c);
+ if (c < 0) abort();
+ //fprintf(stderr, "n=%d c='%c'\n", (int)n, c);
+ n += 1;
+ for(size_t j = 0; j < n; j++) {
+ data[i++] = c;
+ }
+ //fprintf(stderr, "i=%d\n", (int)i);
+ }
+}
+
+#define MAXSIZE 10000000
+
+int
+main(int argc, char **argv)
+{
+ int encode = argc == 1;
+
+ if (encode) {
+ fprintf(stderr, "encode\n");
+ uint8_t *data = malloc(MAXSIZE);
+ size_t len = fread(data, 1, MAXSIZE, stdin);
+ index_t *res = malloc(len * sizeof(index_t));
+
+ sort_buffer((uint8_t*)data, len, res);
+#if 0
+ for(size_t i = 0; i < len; i++) {
+ index_t offs = res[i];
+ //fprintf(stderr, "offs=%d\n", (int)offs);
+ fwrite(data + offs, 1, len - offs, stdout);
+ fwrite(data,1, offs, stdout);
+ fputs("\n", stdout);
+ }
+#endif
+ uint8_t *last = malloc(len + 1);
+ index_t zero;
+ for(size_t i = 0; i < len; i++) {
+ index_t offs = res[i];
+ last[i] = data[(offs + len - 1) % len];
+ if (offs == 0)
+ zero = i;
+ }
+ last[len] = 0;
+ fprintf(stderr, "len=%d, zero=%d\n", (int)len, (int)zero);
+ // fprintf(stderr, "%s\n", last);
+ put_rep(stdout, len); fputc(0, stdout);
+ put_rep(stdout, zero); fputc(0, stdout);
+ rle(stdout, last, len);
+ // fwrite(last, 1, len, stdout);
+ } else {
+ fprintf(stderr, "decode\n");
+ size_t len = get_rep(stdin); if (fgetc(stdin) != 0) abort();
+ size_t zero = get_rep(stdin); if (fgetc(stdin) != 0) abort();
+ fprintf(stderr, "len=%d, zero=%d\n", (int)len, (int)zero);
+ uint8_t *data = malloc(len);
+ unrle(stdin, data, len);
+ // fwrite(data, 1, len, stdout);
+#define MAXCHAR 128
+ size_t count[MAXCHAR];
+ index_t *pred = malloc(len * sizeof(index_t));
+ uint8_t *odata = malloc(len);
+ for(size_t i = 0; i < MAXCHAR; i++) {
+ count[i] = 0;
+ }
+ for(size_t i = 0; i < len; i++) {
+ pred[i] = count[data[i]]++;
+ }
+ size_t sum = 0;
+ for(size_t i = 0; i < MAXCHAR; i++) {
+ size_t s = count[i];
+ count[i] = sum;
+ sum += s;
+ }
+ size_t i = zero;
+ for(size_t j = len; j > 0; j--) {
+ odata[j - 1] = data[i];
+ i = pred[i] + count[data[i]];
+ }
+ fwrite(odata, 1, len, stdout);
+ }
+ exit(0);
+}
--- a/src/runtime/bfile.c
+++ b/src/runtime/bfile.c
@@ -353,6 +353,160 @@
}
#endif /* WANT_LZ77 */
+
+#if WANT_RLE
+/***************** BFILE via RLE decompression *******************/
+/*
+ * Run Length Encoding for ASCII
+ * Format
+ * c - c one ASCII character
+ * 0x80+n c - n repetitions of ASCII character c
+ * 0x80+n 0x80+m c - n*128+m repetitions of ASCII character c
+ * ... for longer run lengths
+ */
+
+struct BFILE_rle {
+ BFILE mets;
+ BFILE *bfile; /* underlying BFILE */
+ size_t count;
+ int byte;
+ int unget;
+};
+
+int
+get_rep(BFILE *in)
+{
+ size_t n = 0;
+ for(;;) {
+ int c = getb(in);
+ //fprintf(stderr,"get_rep %02x\n", c);
+ if (c < 0)
+ return -1;
+ if (c < 128) {
+ ungetb(c, in);
+ return n;
+ }
+ n = n * 128 + c - 128;
+ }
+}
+
+int
+getb_rle(BFILE *bp)
+{
+ struct BFILE_rle *p = (struct BFILE_rle*)bp;
+ CHECKBFILE(bp, getb_rle);
+ if (p->unget >= 0) {
+ int c = p->unget;
+ p->unget = -1;
+ return c;
+ }
+ if (p->count) {
+ p->count--;
+ return p->byte;
+ } else {
+ int n = get_rep(p->bfile);
+ if (n < 0)
+ return -1;
+ p->count = n;
+ p->byte = getb(p->bfile);
+ return p->byte;
+ }
+}
+
+void
+ungetb_rle(int c, BFILE *bp)
+{
+ struct BFILE_rle *p = (struct BFILE_rle*)bp;
+ CHECKBFILE(bp, getb_rle);
+ p->unget = c;
+}
+
+void
+put_rep(BFILE *out, size_t n)
+{
+ if (n > 127)
+ put_rep(out, n / 128);
+ putb(n % 128 + 128, out);
+}
+
+void
+putb_rle(int b, BFILE *bp)
+{
+ struct BFILE_rle *p = (struct BFILE_rle*)bp;
+ CHECKBFILE(bp, getb_rle);
+
+ if (b == p->byte) {
+ p->count++;
+ } else {
+ if (p->count > 2) {
+ /* More than 2 repeating chars, it's worth compressing */
+ put_rep(p->bfile, p->count - 1);
+ putb(p->byte, p->bfile);
+ } else {
+ while(p->count-- > 0)
+ putb(p->byte, p->bfile);
+ }
+ p->count = 1;
+ p->byte = b;
+ }
+}
+
+void
+closeb_rle(BFILE *bp)
+{
+ struct BFILE_rle *p = (struct BFILE_rle*)bp;
+ CHECKBFILE(bp, getb_rle);
+
+ closeb(p->bfile);
+}
+
+void
+flushb_rle(BFILE *bp)
+{
+ /* There is nothing we can do */
+}
+
+BFILE *
+add_rle_decompressor(BFILE *file)
+{
+ struct BFILE_rle *p = MALLOC(sizeof(struct BFILE_rle));
+
+ if (!p)
+ memerr();
+ p->mets.getb = getb_rle;
+ p->mets.ungetb = ungetb_rle;
+ p->mets.putb = 0;
+ p->mets.flushb = 0;
+ p->mets.closeb = closeb_rle;
+ p->count = 0;
+ p->unget = -1;
+ p->bfile = file;
+
+ return (BFILE*)p;
+}
+
+BFILE *
+add_rle_compressor(BFILE *file)
+{
+ struct BFILE_rle *p = MALLOC(sizeof(struct BFILE_rle));
+
+ if (!p)
+ memerr();
+ p->mets.getb = getb_rle;
+ p->mets.ungetb = 0;
+ p->mets.putb = putb_rle;
+ p->mets.flushb = flushb_rle;
+ p->mets.closeb = closeb_rle;
+ p->count = 0;
+ p->byte = -1;
+ p->bfile = file;
+
+ return (BFILE*)p;
+}
+
+#endif /* WANT_RLE */
+
+
/***************** BFILE with UTF8 encode/decode *******************/
struct BFILE_utf8 {
--- a/src/runtime/eval.c
+++ b/src/runtime/eval.c
@@ -28,6 +28,10 @@
#define WANT_LZ77 1
#endif
+#if !defined(WANT_RLE)
+#define WANT_RLE 1
+#endif
+
#if WANT_LZ77
size_t lz77d(uint8_t *src, size_t srclen, uint8_t **bufp);
size_t lz77c(uint8_t *src, size_t srclen, uint8_t **bufp);