[hackers] [flate] deflate: freq/code overlap || nsz

From: <hg_AT_suckless.org>
Date: Tue, 11 Aug 2009 13:07:16 +0000 (UTC)

changeset: 96:f7385ae4153e
user: nsz <nszabolcs_AT_gmail.com>
date: Tue Aug 11 13:48:57 2009 +0200
files: deflate.c
description:
deflate: freq/code overlap

diff -r d3f42b7add43 -r f7385ae4153e deflate.c
--- a/deflate.c Tue Aug 11 10:06:10 2009 +0200
+++ b/deflate.c Tue Aug 11 13:48:57 2009 +0200
@@ -5,25 +5,28 @@
 
 /*
 TODO:
- (hufflen: parent+heap only)
- match: check backwards
- block end guess with lfreq, dfreq
- bisect len/distbase -> lookup table
- (rolling hash)
- bounds on the compressend size
- (zlib huffcode trick: if same freq then shorter code goes to the one with smaller subtree)
- overlap freq/code, rbuf/dstwin ?
- verify dstsize
- input from in+nin instead of src+srcend
- setting s->state..
- ushort vs int
-
-spec case tests:
- empty block
- huff overflow
- all zero (rle)
- dist = 32K
- test with small bufsize (1,2..9 bytes)
+ space opt:
+ overlap rbuf/dstwin
+ hufflen: overlap arrays (parent+heap only)
+ time opt:
+ match loop unroll
+ less branching in deflate_state (check avail > MinMatch once)
+ bisect len/distbase -> lookup table
+ rolling hash
+ better compression:
+ block split heuristics with lfreq, dfreq
+ (zlib huffcode trick: if same freq then shorter code goes to the one with smaller subtree)
+ code cleanups:
+ bounds on the compressend size
+ verify dstsize
+ input from in+nin instead of src+srcend
+ setting s->state..
+ ushort vs int
+ tests:
+ empty block
+ all zero (rle)
+ uncompressed block
+ test with small nin/nout (1,2..9 bytes)
 */
 
 enum {
@@ -77,8 +80,8 @@
         LzCode lzbuf[LzSize]; /* literal run length, match len, match dist */
         LzCode *lz; /* current pos in lzbuf */
         int run; /* literal run length */
- int lfreq[Nlitlen];
- int dfreq[Ndist];
+ ushort lfreq[Nlitlen];
+ ushort dfreq[Ndist];
         uchar dstbuf[DstSize];
 } State;
 
@@ -192,7 +195,7 @@
 }
 
 /* symbol frequencies -> code lengths (limited to 255) */
-static void hufflens(uchar *lens, int *freqs, int nsym, int limit) {
+static void hufflens(uchar *lens, ushort *freqs, int nsym, int limit) {
         /* 2 <= nsym <= Nlitlen, log(nsym) <= limit <= CodeBits-1 */
         int parent[2*Nlitlen-1];
         int count[CodeBits];
@@ -334,17 +337,18 @@
 }
 
 static void deflate_block(State *s) {
- int cfreq[Nclen];
         uchar codes[Nlitlen + Ndist], extra[Nlitlen + Ndist];
         uchar llen[Nlitlen], dlen[Ndist], clen[Nclen];
- ushort lcode[Nlitlen], dcode[Ndist], ccode[Nclen];
+ ushort cfreq[Nclen];
+ /* freq can be overwritten by code */
+ ushort *lcode = s->lfreq, *dcode = s->dfreq, *ccode = cfreq;
         int i, c, n, ncodes;
         int nlit, ndist, nclen;
         LzCode *lz;
         uchar *pc;
         int dynsize, fixsize, uncsize;
         int blocklen = s->pos - s->startpos;
-int dyntree;
+/*int dyntree;*/
 
         /* calc dynamic codes */
         hufflens(llen, s->lfreq, Nlitlen, CodeBits-1);
@@ -375,7 +379,7 @@
                 if (c == 18)
                         dynsize += 7;
         }
-dyntree = dynsize - 3;
+/*dyntree = dynsize - 3;*/
         for (lz = s->lzbuf, pc = s->win + s->startpos; lz != s->lz; lz++)
                 if (lz->bits & LzLitFlag)
                         for (n = lz->n; n > 0; n--, pc++) {
@@ -432,9 +436,11 @@
                 memcpy(s->dst, s->win + s->startpos, blocklen);
                 s->dst += blocklen;
         }
+/*
 fprintf(stderr, "blen: %d [%d,%d] lzlen: %d dynlen: %d (tree: %d, rate: %.3f) fixlen: %d (rate: %.3f) unclen: %d (rate: %.3f) avail %d\n",
         blocklen, s->startpos, s->pos, s->lz - s->lzbuf, dynsize, dyntree, dynsize/(float)blocklen,
         fixsize, fixsize/(float)blocklen, uncsize, uncsize/(float)blocklen, s->avail);
+*/
 }
 
 static int bisect(ushort *base, int len, int n) {
Received on Tue Aug 11 2009 - 13:07:16 UTC

This archive was generated by hypermail 2.2.0 : Sun Aug 16 2009 - 14:19:01 UTC