changeset: 96:f7385ae4153e
user: nsz <nszabolcs_AT_gmail.com>
date: Tue Aug 11 13:48:57 2009 +0200
files: deflate.c
description:
deflate: freq/code overlap
diff -r d3f42b7add43 -r f7385ae4153e deflate.c
--- a/deflate.c Tue Aug 11 10:06:10 2009 +0200
+++ b/deflate.c Tue Aug 11 13:48:57 2009 +0200
@@ -5,25 +5,28 @@
/*
TODO:
- (hufflen: parent+heap only)
- match: check backwards
- block end guess with lfreq, dfreq
- bisect len/distbase -> lookup table
- (rolling hash)
- bounds on the compressend size
- (zlib huffcode trick: if same freq then shorter code goes to the one with smaller subtree)
- overlap freq/code, rbuf/dstwin ?
- verify dstsize
- input from in+nin instead of src+srcend
- setting s->state..
- ushort vs int
-
-spec case tests:
- empty block
- huff overflow
- all zero (rle)
- dist = 32K
- test with small bufsize (1,2..9 bytes)
+ space opt:
+ overlap rbuf/dstwin
+ hufflen: overlap arrays (parent+heap only)
+ time opt:
+ match loop unroll
+ less branching in deflate_state (check avail > MinMatch once)
+ bisect len/distbase -> lookup table
+ rolling hash
+ better compression:
+ block split heuristics with lfreq, dfreq
+ (zlib huffcode trick: if same freq then shorter code goes to the one with smaller subtree)
+ code cleanups:
+ bounds on the compressend size
+ verify dstsize
+ input from in+nin instead of src+srcend
+ setting s->state..
+ ushort vs int
+ tests:
+ empty block
+ all zero (rle)
+ uncompressed block
+ test with small nin/nout (1,2..9 bytes)
*/
enum {
@@ -77,8 +80,8 @@
LzCode lzbuf[LzSize]; /* literal run length, match len, match dist */
LzCode *lz; /* current pos in lzbuf */
int run; /* literal run length */
- int lfreq[Nlitlen];
- int dfreq[Ndist];
+ ushort lfreq[Nlitlen];
+ ushort dfreq[Ndist];
uchar dstbuf[DstSize];
} State;
@@ -192,7 +195,7 @@
}
/* symbol frequencies -> code lengths (limited to 255) */
-static void hufflens(uchar *lens, int *freqs, int nsym, int limit) {
+static void hufflens(uchar *lens, ushort *freqs, int nsym, int limit) {
/* 2 <= nsym <= Nlitlen, log(nsym) <= limit <= CodeBits-1 */
int parent[2*Nlitlen-1];
int count[CodeBits];
@@ -334,17 +337,18 @@
}
static void deflate_block(State *s) {
- int cfreq[Nclen];
uchar codes[Nlitlen + Ndist], extra[Nlitlen + Ndist];
uchar llen[Nlitlen], dlen[Ndist], clen[Nclen];
- ushort lcode[Nlitlen], dcode[Ndist], ccode[Nclen];
+ ushort cfreq[Nclen];
+ /* freq can be overwritten by code */
+ ushort *lcode = s->lfreq, *dcode = s->dfreq, *ccode = cfreq;
int i, c, n, ncodes;
int nlit, ndist, nclen;
LzCode *lz;
uchar *pc;
int dynsize, fixsize, uncsize;
int blocklen = s->pos - s->startpos;
-int dyntree;
+/*int dyntree;*/
/* calc dynamic codes */
hufflens(llen, s->lfreq, Nlitlen, CodeBits-1);
@@ -375,7 +379,7 @@
if (c == 18)
dynsize += 7;
}
-dyntree = dynsize - 3;
+/*dyntree = dynsize - 3;*/
for (lz = s->lzbuf, pc = s->win + s->startpos; lz != s->lz; lz++)
if (lz->bits & LzLitFlag)
for (n = lz->n; n > 0; n--, pc++) {
@@ -432,9 +436,11 @@
memcpy(s->dst, s->win + s->startpos, blocklen);
s->dst += blocklen;
}
+/*
fprintf(stderr, "blen: %d [%d,%d] lzlen: %d dynlen: %d (tree: %d, rate: %.3f) fixlen: %d (rate: %.3f) unclen: %d (rate: %.3f) avail %d\n",
blocklen, s->startpos, s->pos, s->lz - s->lzbuf, dynsize, dyntree, dynsize/(float)blocklen,
fixsize, fixsize/(float)blocklen, uncsize, uncsize/(float)blocklen, s->avail);
+*/
}
static int bisect(ushort *base, int len, int n) {
Received on Tue Aug 11 2009 - 13:07:16 UTC
This archive was generated by hypermail 2.2.0 : Sun Aug 16 2009 - 14:19:01 UTC