ref: be3809866ce89d2dcac0b02f2ec7b49b5393cb56
dir: /sys/src/libflate/deflate.c/
#include <u.h> #include <libc.h> #include <flate.h> typedef struct Chain Chain; typedef struct Chains Chains; typedef struct Dyncode Dyncode; typedef struct Huff Huff; typedef struct LZblock LZblock; typedef struct LZstate LZstate; enum { /* * deflate format paramaters */ DeflateUnc = 0, /* uncompressed block */ DeflateFix = 1, /* fixed huffman codes */ DeflateDyn = 2, /* dynamic huffman codes */ DeflateEob = 256, /* end of block code in lit/len book */ DeflateMaxBlock = 64*1024-1, /* maximum size of uncompressed block */ DeflateMaxExp = 10, /* maximum expansion for a block */ LenStart = 257, /* start of length codes in litlen */ Nlitlen = 288, /* number of litlen codes */ Noff = 30, /* number of offset codes */ Nclen = 19, /* number of codelen codes */ MaxOff = 32*1024, MinMatch = 3, /* shortest match possible */ MaxMatch = 258, /* longest match possible */ /* * huffman code paramaters */ MaxLeaf = Nlitlen, MaxHuffBits = 16, /* max bits in a huffman code */ ChainMem = 2 * (MaxHuffBits - 1) * MaxHuffBits, /* * coding of the lz parse */ LenFlag = 1 << 3, LenShift = 4, /* leaves enough space for MinMatchMaxOff */ MaxLitRun = LenFlag - 1, /* * internal lz paramaters */ DeflateOut = 4096, /* output buffer size */ BlockSize = 8192, /* attempted input read quanta */ DeflateBlock = DeflateMaxBlock & ~(BlockSize - 1), MinMatchMaxOff = 4096, /* max profitable offset for small match; * assumes 8 bits for len, 5+10 for offset * DONT CHANGE WITHOUT CHANGING LZPARSE CONSTANTS */ HistSlop = 512, /* must be at lead MaxMatch */ HistBlock = 64*1024, HistSize = HistBlock + HistSlop, HashLog = 13, HashSize = 1<<HashLog, MaxOffCode = 256, /* biggest offset looked up in direct table */ EstLitBits = 8, EstLenBits = 4, EstOffBits = 5, }; /* * knuth vol. 3 multiplicative hashing * each byte x chosen according to rules * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4 * with reasonable spread between the bytes & their complements * * the 3 byte value appears to be as almost good as the 4 byte value, * and might be faster on some machines */ /* #define hashit(c) (((ulong)(c) * 0x6b43a9) >> (24 - HashLog)) */ #define hashit(c) ((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) /* * lempel-ziv style compression state */ struct LZstate { uchar hist[HistSize]; ulong pos; /* current location in history buffer */ ulong avail; /* data available after pos */ int eof; ushort hash[HashSize]; /* hash chains */ ushort nexts[MaxOff]; int now; /* pos in hash chains */ int dot; /* dawn of time in history */ int prevlen; /* lazy matching state */ int prevoff; int maxcheck; /* compressor tuning */ uchar obuf[DeflateOut]; uchar *out; /* current position in the output buffer */ uchar *eout; ulong bits; /* bit shift register */ int nbits; int rbad; /* got an error reading the buffer */ int wbad; /* got an error writing the buffer */ int (*w)(void*, void*, int); void *wr; ulong totr; /* total input size */ ulong totw; /* total output size */ int debug; }; struct LZblock { ushort parse[DeflateMaxBlock / 2 + 1]; int lastv; /* value being constucted for parse */ ulong litlencount[Nlitlen]; ulong offcount[Noff]; ushort *eparse; /* limit for parse table */ int bytes; /* consumed from the input */ int excost; /* cost of encoding extra len & off bits */ }; /* * huffman code table */ struct Huff { short bits; /* length of the code */ ushort encode; /* the code */ }; /* * encoding of dynamic huffman trees */ struct Dyncode { int nlit; int noff; int nclen; int ncode; Huff codetab[Nclen]; uchar codes[Nlitlen+Noff]; uchar codeaux[Nlitlen+Noff]; }; static int deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int)); static int lzcomp(LZstate*, LZblock*, uchar*, ushort*, int finish); static void wrblock(LZstate*, int, ushort*, ushort*, Huff*, Huff*); static int bitcost(Huff*, ulong*, int); static int huffcodes(Dyncode*, Huff*, Huff*); static void wrdyncode(LZstate*, Dyncode*); static void lzput(LZstate*, ulong bits, int nbits); static void lzflushbits(LZstate*); static void lzflush(LZstate *lz); static void lzwrite(LZstate *lz, void *buf, int n); static int hufftabinit(Huff*, int, ulong*, int); static int mkgzprecode(Huff*, ulong *, int, int); static int mkprecode(Huff*, ulong *, int, int, ulong*); static void nextchain(Chains*, int); static void leafsort(ulong*, ushort*, int, int); /* conversion from len to code word */ static int lencode[MaxMatch]; /* * conversion from off to code word * off <= MaxOffCode ? offcode[off] : bigoffcode[off >> 7] */ static int offcode[MaxOffCode]; static int bigoffcode[256]; /* litlen code words LenStart-285 extra bits */ static int litlenbase[Nlitlen-LenStart]; static int litlenextra[Nlitlen-LenStart] = { /* 257 */ 0, 0, 0, /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, /* 280 */ 4, 5, 5, 5, 5, 0, 0, 0 }; /* offset code word extra bits */ static int offbase[Noff]; static int offextra[] = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 0, 0, }; /* order code lengths */ static int clenorder[Nclen] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; /* static huffman tables */ static Huff litlentab[Nlitlen]; static Huff offtab[Noff]; static Huff hofftab[Noff]; /* bit reversal for brain dead endian swap in huffman codes */ static uchar revtab[256]; static ulong nlits; static ulong nmatches; int deflateinit(void) { ulong bitcount[MaxHuffBits]; int i, j, ci, n; /* byte reverse table */ for(i=0; i<256; i++) for(j=0; j<8; j++) if(i & (1<<j)) revtab[i] |= 0x80 >> j; /* static Litlen bit lengths */ for(i=0; i<144; i++) litlentab[i].bits = 8; for(i=144; i<256; i++) litlentab[i].bits = 9; for(i=256; i<280; i++) litlentab[i].bits = 7; for(i=280; i<Nlitlen; i++) litlentab[i].bits = 8; memset(bitcount, 0, sizeof(bitcount)); bitcount[8] += 144 - 0; bitcount[9] += 256 - 144; bitcount[7] += 280 - 256; bitcount[8] += Nlitlen - 280; if(!hufftabinit(litlentab, Nlitlen, bitcount, 9)) return FlateInternal; /* static offset bit lengths */ for(i = 0; i < Noff; i++) offtab[i].bits = 5; memset(bitcount, 0, sizeof(bitcount)); bitcount[5] = Noff; if(!hufftabinit(offtab, Noff, bitcount, 5)) return FlateInternal; bitcount[0] = 0; bitcount[1] = 0; if(!mkgzprecode(hofftab, bitcount, 2, MaxHuffBits)) return FlateInternal; /* conversion tables for lens & offs to codes */ ci = 0; for(i = LenStart; i < 286; i++){ n = ci + (1 << litlenextra[i - LenStart]); litlenbase[i - LenStart] = ci; for(; ci < n; ci++) lencode[ci] = i; } /* patch up special case for len MaxMatch */ lencode[MaxMatch-MinMatch] = 285; litlenbase[285-LenStart] = MaxMatch-MinMatch; ci = 0; for(i = 0; i < 16; i++){ n = ci + (1 << offextra[i]); offbase[i] = ci; for(; ci < n; ci++) offcode[ci] = i; } ci = ci >> 7; for(; i < 30; i++){ n = ci + (1 << (offextra[i] - 7)); offbase[i] = ci << 7; for(; ci < n; ci++) bigoffcode[ci] = i; } return FlateOk; } static void deflatereset(LZstate *lz, int level, int debug) { memset(lz->nexts, 0, sizeof lz->nexts); memset(lz->hash, 0, sizeof lz->hash); lz->totr = 0; lz->totw = 0; lz->pos = 0; lz->avail = 0; lz->out = lz->obuf; lz->eout = &lz->obuf[DeflateOut]; lz->prevlen = MinMatch - 1; lz->prevoff = 0; lz->now = MaxOff + 1; lz->dot = lz->now; lz->bits = 0; lz->nbits = 0; lz->maxcheck = (1 << level); lz->maxcheck -= lz->maxcheck >> 2; if(lz->maxcheck < 2) lz->maxcheck = 2; else if(lz->maxcheck > 1024) lz->maxcheck = 1024; lz->debug = debug; } int deflate(void *wr, int (*w)(void*, void*, int), void *rr, int (*r)(void*, void*, int), int level, int debug) { LZstate *lz; LZblock *lzb; int ok; lz = malloc(sizeof *lz + sizeof *lzb); if(lz == nil) return FlateNoMem; lzb = (LZblock*)&lz[1]; deflatereset(lz, level, debug); lz->w = w; lz->wr = wr; lz->wbad = 0; lz->rbad = 0; lz->eof = 0; ok = FlateOk; while(!lz->eof || lz->avail){ ok = deflateb(lz, lzb, rr, r); if(ok != FlateOk) break; } if(ok == FlateOk && lz->rbad) ok = FlateInputFail; if(ok == FlateOk && lz->wbad) ok = FlateOutputFail; free(lz); return ok; } static int deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int)) { Dyncode dyncode, hdyncode; Huff dlitlentab[Nlitlen], dofftab[Noff], hlitlentab[Nlitlen]; ulong litcount[Nlitlen]; long nunc, ndyn, nfix, nhuff; uchar *slop, *hslop; ulong ep; int i, n, m, mm, nslop; memset(lzb->litlencount, 0, sizeof lzb->litlencount); memset(lzb->offcount, 0, sizeof lzb->offcount); lzb->litlencount[DeflateEob]++; lzb->bytes = 0; lzb->eparse = lzb->parse; lzb->lastv = 0; lzb->excost = 0; slop = &lz->hist[lz->pos]; n = lz->avail; while(n < DeflateBlock && (!lz->eof || lz->avail)){ /* * fill the buffer as much as possible, * while leaving room for MaxOff history behind lz->pos, * and not reading more than we can handle. * * make sure we read at least HistSlop bytes. */ if(!lz->eof){ ep = lz->pos + lz->avail; if(ep >= HistBlock) ep -= HistBlock; m = HistBlock - MaxOff - lz->avail; if(m > HistBlock - n) m = HistBlock - n; if(m > (HistBlock + HistSlop) - ep) m = (HistBlock + HistSlop) - ep; if(m & ~(BlockSize - 1)) m &= ~(BlockSize - 1); /* * be nice to the caller: stop reads that are too small. * can only get here when we've already filled the buffer some */ if(m < HistSlop){ if(!m || !lzb->bytes) return FlateInternal; break; } mm = (*r)(rr, &lz->hist[ep], m); if(mm > 0){ /* * wrap data to end if we're read it from the beginning * this way, we don't have to wrap searches. * * wrap reads past the end to the beginning. * this way, we can guarantee minimum size reads. */ if(ep < HistSlop) memmove(&lz->hist[ep + HistBlock], &lz->hist[ep], HistSlop - ep); else if(ep + mm > HistBlock) memmove(&lz->hist[0], &lz->hist[HistBlock], ep + mm - HistBlock); lz->totr += mm; n += mm; lz->avail += mm; }else{ if(mm < 0) lz->rbad = 1; lz->eof = 1; } } ep = lz->pos + lz->avail; if(ep > HistSize) ep = HistSize; if(lzb->bytes + ep - lz->pos > DeflateMaxBlock) ep = lz->pos + DeflateMaxBlock - lzb->bytes; m = lzcomp(lz, lzb, &lz->hist[ep], lzb->eparse, lz->eof); lzb->bytes += m; lz->pos = (lz->pos + m) & (HistBlock - 1); lz->avail -= m; } if(lzb->lastv) *lzb->eparse++ = lzb->lastv; if(lzb->eparse > lzb->parse + nelem(lzb->parse)) return FlateInternal; nunc = lzb->bytes; if(!mkgzprecode(dlitlentab, lzb->litlencount, Nlitlen, MaxHuffBits) || !mkgzprecode(dofftab, lzb->offcount, Noff, MaxHuffBits)) return FlateInternal; ndyn = huffcodes(&dyncode, dlitlentab, dofftab); if(ndyn < 0) return FlateInternal; ndyn += bitcost(dlitlentab, lzb->litlencount, Nlitlen) + bitcost(dofftab, lzb->offcount, Noff) + lzb->excost; memset(litcount, 0, sizeof litcount); nslop = nunc; if(nslop > &lz->hist[HistSize] - slop) nslop = &lz->hist[HistSize] - slop; for(i = 0; i < nslop; i++) litcount[slop[i]]++; hslop = &lz->hist[HistSlop - nslop]; for(; i < nunc; i++) litcount[hslop[i]]++; litcount[DeflateEob]++; if(!mkgzprecode(hlitlentab, litcount, Nlitlen, MaxHuffBits)) return FlateInternal; nhuff = huffcodes(&hdyncode, hlitlentab, hofftab); if(nhuff < 0) return FlateInternal; nhuff += bitcost(hlitlentab, litcount, Nlitlen); nfix = bitcost(litlentab, lzb->litlencount, Nlitlen) + bitcost(offtab, lzb->offcount, Noff) + lzb->excost; lzput(lz, lz->eof && !lz->avail, 1); if(lz->debug){ fprint(2, "block: bytes=%lud entries=%zd extra bits=%d\n\tuncompressed=%lud fixed=%lud dynamic=%lud huffman=%lud\n", nunc, lzb->eparse - lzb->parse, lzb->excost, (nunc + 4) * 8, nfix, ndyn, nhuff); fprint(2, "\tnlit=%lud matches=%lud eof=%d\n", nlits, nmatches, lz->eof && !lz->avail); } if((nunc + 4) * 8 < ndyn && (nunc + 4) * 8 < nfix && (nunc + 4) * 8 < nhuff){ lzput(lz, DeflateUnc, 2); lzflushbits(lz); lzput(lz, nunc & 0xff, 8); lzput(lz, (nunc >> 8) & 0xff, 8); lzput(lz, ~nunc & 0xff, 8); lzput(lz, (~nunc >> 8) & 0xff, 8); lzflush(lz); lzwrite(lz, slop, nslop); lzwrite(lz, &lz->hist[HistSlop], nunc - nslop); }else if(ndyn < nfix && ndyn < nhuff){ lzput(lz, DeflateDyn, 2); wrdyncode(lz, &dyncode); wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, dlitlentab, dofftab); lzput(lz, dlitlentab[DeflateEob].encode, dlitlentab[DeflateEob].bits); }else if(nhuff < nfix){ lzput(lz, DeflateDyn, 2); wrdyncode(lz, &hdyncode); m = 0; for(i = nunc; i > MaxLitRun; i -= MaxLitRun) lzb->parse[m++] = MaxLitRun; lzb->parse[m++] = i; wrblock(lz, slop - lz->hist, lzb->parse, lzb->parse + m, hlitlentab, hofftab); lzput(lz, hlitlentab[DeflateEob].encode, hlitlentab[DeflateEob].bits); }else{ lzput(lz, DeflateFix, 2); wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, litlentab, offtab); lzput(lz, litlentab[DeflateEob].encode, litlentab[DeflateEob].bits); } if(lz->eof && !lz->avail){ lzflushbits(lz); lzflush(lz); } return FlateOk; } static void lzwrite(LZstate *lz, void *buf, int n) { int nw; if(n && lz->w){ nw = (*lz->w)(lz->wr, buf, n); if(nw != n){ lz->w = nil; lz->wbad = 1; }else lz->totw += n; } } static void lzflush(LZstate *lz) { lzwrite(lz, lz->obuf, lz->out - lz->obuf); lz->out = lz->obuf; } static void lzput(LZstate *lz, ulong bits, int nbits) { bits = (bits << lz->nbits) | lz->bits; for(nbits += lz->nbits; nbits >= 8; nbits -= 8){ *lz->out++ = bits; if(lz->out == lz->eout) lzflush(lz); bits >>= 8; } lz->bits = bits; lz->nbits = nbits; } static void lzflushbits(LZstate *lz) { if(lz->nbits) lzput(lz, 0, 8 - (lz->nbits & 7)); } /* * write out a block of n samples, * given lz encoding and counts for huffman tables */ static void wrblock(LZstate *out, int litoff, ushort *soff, ushort *eoff, Huff *litlentab, Huff *offtab) { ushort *off; int i, run, offset, lit, len, c; if(out->debug > 2){ for(off = soff; off < eoff; ){ offset = *off++; run = offset & MaxLitRun; if(run){ for(i = 0; i < run; i++){ lit = out->hist[litoff & (HistBlock - 1)]; litoff++; fprint(2, "\tlit %.2ux %c\n", lit, lit); } if(!(offset & LenFlag)) continue; len = offset >> LenShift; offset = *off++; }else if(offset & LenFlag){ len = offset >> LenShift; offset = *off++; }else{ len = 0; offset >>= LenShift; } litoff += len + MinMatch; fprint(2, "\t<%d, %d>\n", offset + 1, len + MinMatch); } } for(off = soff; off < eoff; ){ offset = *off++; run = offset & MaxLitRun; if(run){ for(i = 0; i < run; i++){ lit = out->hist[litoff & (HistBlock - 1)]; litoff++; lzput(out, litlentab[lit].encode, litlentab[lit].bits); } if(!(offset & LenFlag)) continue; len = offset >> LenShift; offset = *off++; }else if(offset & LenFlag){ len = offset >> LenShift; offset = *off++; }else{ len = 0; offset >>= LenShift; } litoff += len + MinMatch; c = lencode[len]; lzput(out, litlentab[c].encode, litlentab[c].bits); c -= LenStart; if(litlenextra[c]) lzput(out, len - litlenbase[c], litlenextra[c]); if(offset < MaxOffCode) c = offcode[offset]; else c = bigoffcode[offset >> 7]; lzput(out, offtab[c].encode, offtab[c].bits); if(offextra[c]) lzput(out, offset - offbase[c], offextra[c]); } } /* * look for the longest, closest string which matches * the next prefix. the clever part here is looking for * a string 1 longer than the previous best match. * * follows the recommendation of limiting number of chains * which are checked. this appears to be the best heuristic. */ static int lzmatch(int now, int then, uchar *p, uchar *es, ushort *nexts, uchar *hist, int runlen, int check, int *m) { uchar *s, *t; int ml, off, last; ml = check; if(runlen >= 8) check >>= 2; *m = 0; if(p + runlen >= es) return runlen; last = 0; for(; check-- > 0; then = nexts[then & (MaxOff-1)]){ off = (ushort)(now - then); if(off <= last || off > MaxOff) break; s = p + runlen; t = hist + (((p - hist) - off) & (HistBlock-1)); t += runlen; for(; s >= p; s--){ if(*s != *t) goto matchloop; t--; } /* * we have a new best match. * extend it to it's maximum length */ t += runlen + 2; s += runlen + 2; for(; s < es; s++){ if(*s != *t) break; t++; } runlen = s - p; *m = off - 1; if(s == es || runlen > ml) break; matchloop:; last = off; } return runlen; } static int lzcomp(LZstate *lz, LZblock *lzb, uchar *ep, ushort *parse, int finish) { ulong cont, excost, *litlencount, *offcount; uchar *p, *q, *s, *es; ushort *nexts, *hash; int v, i, h, runlen, n, now, then, m, prevlen, prevoff, maxdefer; litlencount = lzb->litlencount; offcount = lzb->offcount; nexts = lz->nexts; hash = lz->hash; now = lz->now; p = &lz->hist[lz->pos]; if(lz->prevlen != MinMatch - 1) p++; /* * hash in the links for any hanging link positions, * and calculate the hash for the current position. */ n = MinMatch; if(n > ep - p) n = ep - p; cont = 0; for(i = 0; i < n - 1; i++){ m = now - ((MinMatch-1) - i); if(m < lz->dot) continue; s = lz->hist + (((p - lz->hist) - (now - m)) & (HistBlock-1)); cont = (s[0] << 16) | (s[1] << 8) | s[2]; h = hashit(cont); prevoff = 0; for(then = hash[h]; ; then = nexts[then & (MaxOff-1)]){ v = (ushort)(now - then); if(v <= prevoff || v >= (MinMatch-1) - i) break; prevoff = v; } if(then == (ushort)m) continue; nexts[m & (MaxOff-1)] = hash[h]; hash[h] = m; } for(i = 0; i < n; i++) cont = (cont << 8) | p[i]; /* * now must point to the index in the nexts array * corresponding to p's position in the history */ prevlen = lz->prevlen; prevoff = lz->prevoff; maxdefer = lz->maxcheck >> 2; excost = 0; v = lzb->lastv; for(;;){ es = p + MaxMatch; if(es > ep){ if(!finish || p >= ep) break; es = ep; } h = hashit(cont); runlen = lzmatch(now, hash[h], p, es, nexts, lz->hist, prevlen, lz->maxcheck, &m); /* * back out of small matches too far in the past */ if(runlen == MinMatch && m >= MinMatchMaxOff){ runlen = MinMatch - 1; m = 0; } /* * record the encoding and increment counts for huffman trees * if we get a match, defer selecting it until we check for * a longer match at the next position. */ if(prevlen >= runlen && prevlen != MinMatch - 1){ /* * old match at least as good; use that one */ n = prevlen - MinMatch; if(v || n){ *parse++ = v | LenFlag | (n << LenShift); *parse++ = prevoff; }else *parse++ = prevoff << LenShift; v = 0; n = lencode[n]; litlencount[n]++; excost += litlenextra[n - LenStart]; if(prevoff < MaxOffCode) n = offcode[prevoff]; else n = bigoffcode[prevoff >> 7]; offcount[n]++; excost += offextra[n]; runlen = prevlen - 1; prevlen = MinMatch - 1; nmatches++; }else if(runlen == MinMatch - 1){ /* * no match; just put out the literal */ if(++v == MaxLitRun){ *parse++ = v; v = 0; } litlencount[*p]++; nlits++; runlen = 1; }else{ if(prevlen != MinMatch - 1){ /* * longer match now. output previous literal, * update current match, and try again */ if(++v == MaxLitRun){ *parse++ = v; v = 0; } litlencount[p[-1]]++; nlits++; } prevoff = m; if(runlen < maxdefer){ prevlen = runlen; runlen = 1; }else{ n = runlen - MinMatch; if(v || n){ *parse++ = v | LenFlag | (n << LenShift); *parse++ = prevoff; }else *parse++ = prevoff << LenShift; v = 0; n = lencode[n]; litlencount[n]++; excost += litlenextra[n - LenStart]; if(prevoff < MaxOffCode) n = offcode[prevoff]; else n = bigoffcode[prevoff >> 7]; offcount[n]++; excost += offextra[n]; prevlen = MinMatch - 1; nmatches++; } } /* * update the hash for the newly matched data * this is constructed so the link for the old * match in this position must be at the end of a chain, * and will expire when this match is added, ie it will * never be examined by the match loop. * add to the hash chain only if we have the real hash data. */ for(q = p + runlen; p != q; p++){ if(p + MinMatch <= ep){ h = hashit(cont); nexts[now & (MaxOff-1)] = hash[h]; hash[h] = now; if(p + MinMatch < ep) cont = (cont << 8) | p[MinMatch]; } now++; } } /* * we can just store away the lazy state and * pick it up next time. the last block will have finish set * so we won't have any pending matches * however, we need to correct for how much we've encoded */ if(prevlen != MinMatch - 1) p--; lzb->excost += excost; lzb->eparse = parse; lzb->lastv = v; lz->now = now; lz->prevlen = prevlen; lz->prevoff = prevoff; return p - &lz->hist[lz->pos]; } /* * make up the dynamic code tables, and return the number of bits * needed to transmit them. */ static int huffcodes(Dyncode *dc, Huff *littab, Huff *offtab) { Huff *codetab; uchar *codes, *codeaux; ulong codecount[Nclen], excost; int i, n, m, v, c, nlit, noff, ncode, nclen; codetab = dc->codetab; codes = dc->codes; codeaux = dc->codeaux; /* * trim the sizes of the tables */ for(nlit = Nlitlen; nlit > 257 && littab[nlit-1].bits == 0; nlit--) ; for(noff = Noff; noff > 1 && offtab[noff-1].bits == 0; noff--) ; /* * make the code-length code */ for(i = 0; i < nlit; i++) codes[i] = littab[i].bits; for(i = 0; i < noff; i++) codes[i + nlit] = offtab[i].bits; /* * run-length compress the code-length code */ excost = 0; c = 0; ncode = nlit+noff; for(i = 0; i < ncode; ){ n = i + 1; v = codes[i]; while(n < ncode && v == codes[n]) n++; n -= i; i += n; if(v == 0){ while(n >= 11){ m = n; if(m > 138) m = 138; codes[c] = 18; codeaux[c++] = m - 11; n -= m; excost += 7; } if(n >= 3){ codes[c] = 17; codeaux[c++] = n - 3; n = 0; excost += 3; } } while(n--){ codes[c++] = v; while(n >= 3){ m = n; if(m > 6) m = 6; codes[c] = 16; codeaux[c++] = m - 3; n -= m; excost += 3; } } } memset(codecount, 0, sizeof codecount); for(i = 0; i < c; i++) codecount[codes[i]]++; if(!mkgzprecode(codetab, codecount, Nclen, 8)) return -1; for(nclen = Nclen; nclen > 4 && codetab[clenorder[nclen-1]].bits == 0; nclen--) ; dc->nlit = nlit; dc->noff = noff; dc->nclen = nclen; dc->ncode = c; return 5 + 5 + 4 + nclen * 3 + bitcost(codetab, codecount, Nclen) + excost; } static void wrdyncode(LZstate *out, Dyncode *dc) { Huff *codetab; uchar *codes, *codeaux; int i, v, c; /* * write out header, then code length code lengths, * and code lengths */ lzput(out, dc->nlit-257, 5); lzput(out, dc->noff-1, 5); lzput(out, dc->nclen-4, 4); codetab = dc->codetab; for(i = 0; i < dc->nclen; i++) lzput(out, codetab[clenorder[i]].bits, 3); codes = dc->codes; codeaux = dc->codeaux; c = dc->ncode; for(i = 0; i < c; i++){ v = codes[i]; lzput(out, codetab[v].encode, codetab[v].bits); if(v >= 16){ if(v == 16) lzput(out, codeaux[i], 2); else if(v == 17) lzput(out, codeaux[i], 3); else /* v == 18 */ lzput(out, codeaux[i], 7); } } } static int bitcost(Huff *tab, ulong *count, int n) { ulong tot; int i; tot = 0; for(i = 0; i < n; i++) tot += count[i] * tab[i].bits; return tot; } static int mkgzprecode(Huff *tab, ulong *count, int n, int maxbits) { ulong bitcount[MaxHuffBits]; int i, nbits; nbits = mkprecode(tab, count, n, maxbits, bitcount); for(i = 0; i < n; i++){ if(tab[i].bits == -1) tab[i].bits = 0; else if(tab[i].bits == 0){ if(nbits != 0 || bitcount[0] != 1) return 0; bitcount[1] = 1; bitcount[0] = 0; nbits = 1; tab[i].bits = 1; } } if(bitcount[0] != 0) return 0; if(nbits == 0){ bitcount[1] = 1; nbits = 1; tab[0].bits = 1; } return hufftabinit(tab, n, bitcount, nbits); } static int hufftabinit(Huff *tab, int n, ulong *bitcount, int nbits) { ulong code, nc[MaxHuffBits]; int i, bits; code = 0; for(bits = 1; bits <= nbits; bits++){ code = (code + bitcount[bits-1]) << 1; nc[bits] = code; } for(i = 0; i < n; i++){ bits = tab[i].bits; if(bits){ code = nc[bits]++ << (16 - bits); if(code & ~0xffff) return 0; tab[i].encode = revtab[code >> 8] | (revtab[code & 0xff] << 8); } } return 1; } /* * this should be in a library */ struct Chain { ulong count; /* occurances of everything in the chain */ ushort leaf; /* leaves to the left of chain, or leaf value */ char col; /* ref count for collecting unused chains */ char gen; /* need to generate chains for next lower level */ Chain *up; /* Chain up in the lists */ }; struct Chains { Chain *lists[(MaxHuffBits - 1) * 2]; ulong leafcount[MaxLeaf]; /* sorted list of leaf counts */ ushort leafmap[MaxLeaf]; /* map to actual leaf number */ int nleaf; /* number of leaves */ Chain chains[ChainMem]; Chain *echains; Chain *free; char col; int nlists; }; /* * fast, low space overhead algorithm for max depth huffman type codes * * J. Katajainen, A. Moffat and A. Turpin, "A fast and space-economical * algorithm for length-limited coding," Proc. Intl. Symp. on Algorithms * and Computation, Cairns, Australia, Dec. 1995, Lecture Notes in Computer * Science, Vol 1004, J. Staples, P. Eades, N. Katoh, and A. Moffat, eds., * pp 12-21, Springer Verlag, New York, 1995. */ static int mkprecode(Huff *tab, ulong *count, int n, int maxbits, ulong *bitcount) { Chains cs; Chain *c; int i, m, em, bits; /* * set up the sorted list of leaves */ m = 0; for(i = 0; i < n; i++){ tab[i].bits = -1; tab[i].encode = 0; if(count[i] != 0){ cs.leafcount[m] = count[i]; cs.leafmap[m] = i; m++; } } if(m < 2){ if(m != 0){ tab[cs.leafmap[0]].bits = 0; bitcount[0] = 1; }else bitcount[0] = 0; return 0; } cs.nleaf = m; leafsort(cs.leafcount, cs.leafmap, 0, m); for(i = 0; i < m; i++) cs.leafcount[i] = count[cs.leafmap[i]]; /* * set up free list */ cs.free = &cs.chains[2]; cs.echains = &cs.chains[ChainMem]; cs.col = 1; /* * initialize chains for each list */ c = &cs.chains[0]; c->count = cs.leafcount[0]; c->leaf = 1; c->col = cs.col; c->up = nil; c->gen = 0; cs.chains[1] = cs.chains[0]; cs.chains[1].leaf = 2; cs.chains[1].count = cs.leafcount[1]; for(i = 0; i < maxbits-1; i++){ cs.lists[i * 2] = &cs.chains[0]; cs.lists[i * 2 + 1] = &cs.chains[1]; } cs.nlists = 2 * (maxbits - 1); m = 2 * m - 2; for(i = 2; i < m; i++) nextchain(&cs, cs.nlists - 2); bits = 0; bitcount[0] = cs.nleaf; for(c = cs.lists[cs.nlists - 1]; c != nil; c = c->up){ m = c->leaf; bitcount[bits++] -= m; bitcount[bits] = m; } m = 0; for(i = bits; i >= 0; i--) for(em = m + bitcount[i]; m < em; m++) tab[cs.leafmap[m]].bits = i; return bits; } /* * calculate the next chain on the list * we can always toss out the old chain */ static void nextchain(Chains *cs, int list) { Chain *c, *oc; int i, nleaf, sumc; oc = cs->lists[list + 1]; cs->lists[list] = oc; if(oc == nil) return; /* * make sure we have all chains needed to make sumc * note it is possible to generate only one of these, * use twice that value for sumc, and then generate * the second if that preliminary sumc would be chosen. * however, this appears to be slower on current tests */ if(oc->gen){ nextchain(cs, list - 2); nextchain(cs, list - 2); oc->gen = 0; } /* * pick up the chain we're going to add; * collect unused chains no free ones are left */ for(c = cs->free; ; c++){ if(c >= cs->echains){ cs->col++; for(i = 0; i < cs->nlists; i++) for(c = cs->lists[i]; c != nil; c = c->up) c->col = cs->col; c = cs->chains; } if(c->col != cs->col) break; } /* * pick the cheapest of * 1) the next package from the previous list * 2) the next leaf */ nleaf = oc->leaf; sumc = 0; if(list > 0 && cs->lists[list-1] != nil) sumc = cs->lists[list-2]->count + cs->lists[list-1]->count; if(sumc != 0 && (nleaf >= cs->nleaf || cs->leafcount[nleaf] > sumc)){ c->count = sumc; c->leaf = oc->leaf; c->up = cs->lists[list-1]; c->gen = 1; }else if(nleaf >= cs->nleaf){ cs->lists[list + 1] = nil; return; }else{ c->leaf = nleaf + 1; c->count = cs->leafcount[nleaf]; c->up = oc->up; c->gen = 0; } cs->free = c + 1; cs->lists[list + 1] = c; c->col = cs->col; } static int pivot(ulong *c, int a, int n) { int j, pi, pj, pk; j = n/6; pi = a + j; /* 1/6 */ j += j; pj = pi + j; /* 1/2 */ pk = pj + j; /* 5/6 */ if(c[pi] < c[pj]){ if(c[pi] < c[pk]){ if(c[pj] < c[pk]) return pj; return pk; } return pi; } if(c[pj] < c[pk]){ if(c[pi] < c[pk]) return pi; return pk; } return pj; } static void leafsort(ulong *leafcount, ushort *leafmap, int a, int n) { ulong t; int j, pi, pj, pn; while(n > 1){ if(n > 10){ pi = pivot(leafcount, a, n); }else pi = a + (n>>1); t = leafcount[pi]; leafcount[pi] = leafcount[a]; leafcount[a] = t; t = leafmap[pi]; leafmap[pi] = leafmap[a]; leafmap[a] = t; pi = a; pn = a + n; pj = pn; for(;;){ do pi++; while(pi < pn && (leafcount[pi] < leafcount[a] || leafcount[pi] == leafcount[a] && leafmap[pi] > leafmap[a])); do pj--; while(pj > a && (leafcount[pj] > leafcount[a] || leafcount[pj] == leafcount[a] && leafmap[pj] < leafmap[a])); if(pj < pi) break; t = leafcount[pi]; leafcount[pi] = leafcount[pj]; leafcount[pj] = t; t = leafmap[pi]; leafmap[pi] = leafmap[pj]; leafmap[pj] = t; } t = leafcount[a]; leafcount[a] = leafcount[pj]; leafcount[pj] = t; t = leafmap[a]; leafmap[a] = leafmap[pj]; leafmap[pj] = t; j = pj - a; n = n-j-1; if(j >= n){ leafsort(leafcount, leafmap, a, j); a += j+1; }else{ leafsort(leafcount, leafmap, a + (j+1), n); n = j; } } }