[hackers] [flate] +sflate, inflate returns extra bytes after compressed input || nsz

From: <hg_AT_suckless.org>
Date: Fri, 21 Aug 2009 13:50:15 +0000 (UTC)

changeset: 110:044cf880002d
user: nsz <nszabolcs_AT_gmail.com>
date: Fri Aug 21 15:03:20 2009 +0200
files: Makefile TODO adler.c crc.c flate.h flatez.c inflate.c sflate.c
description:
+sflate, inflate returns extra bytes after compressed input

diff -r 5db820477dec -r 044cf880002d Makefile
--- a/Makefile Tue Aug 18 23:49:20 2009 +0200
+++ b/Makefile Fri Aug 21 15:03:20 2009 +0200
@@ -2,11 +2,14 @@
 CFLAGS=-O3 -Wall -ansi -pedantic
 LDFLAGS=
 SRC=inflate.c inflate_example.c inflate_simple.c \
- deflate.c deflate_example.c
+ deflate.c deflate_example.c \
+ sflate.c crc.c adler.c
 OBJ=${SRC:.c=.o}
-EXE=inflate inflate_simple deflate
+EXE=sflate inflate inflate_simple deflate
 
 all: ${EXE}
+sflate: sflate.o crc.o adler.o inflate.o deflate.o
+ ${CC} -o $@ $^ ${LDFLAGS}
 inflate: inflate.o inflate_example.o
         ${CC} -o $@ $^ ${LDFLAGS}
 inflate_simple: inflate_simple.o
diff -r 5db820477dec -r 044cf880002d TODO
--- a/TODO Tue Aug 18 23:49:20 2009 +0200
+++ b/TODO Fri Aug 21 15:03:20 2009 +0200
@@ -1,6 +1,8 @@
 flate
 -----
+fill output entirely
 man
+globals
 error message ?
 inflate assumes Flate* < 0
 _init _reset _free ?
@@ -15,7 +17,7 @@
 
 inflate
 -------
-init globals
+callback interface: reading past the end of compressed data: unreachable data
 (rev lookup vs revinc)
 (test/optimize uncompressed block)
 read less than 7 bits in clen decode
@@ -36,6 +38,7 @@
         (zlib huffcode trick: if same freq then shorter code goes to the one with smaller subtree)
         last block can be allowed to be larger
 code cleanups:
+ better organization (configurable blocksize)
         bounds on the compressend size
         input from in+nin instead of src+srcend
         setting s->state..
diff -r 5db820477dec -r 044cf880002d adler.c
--- a/adler.c Tue Aug 18 23:49:20 2009 +0200
+++ b/adler.c Fri Aug 21 15:03:20 2009 +0200
@@ -1,9 +1,11 @@
+#include "flate.h"
+
 enum {
         AdlerBase = 65521, /* largest 16bit prime */
         AdlerN = 5552 /* max iters before 32bit overflow */
 };
 
-uint adlersum(uchar *p, int n, uint adler) {
+uint adler32(uchar *p, int n, uint adler) {
         uint s1 = adler & 0xffff;
         uint s2 = (adler >> 16) & 0xffff;
         uchar *ep;
diff -r 5db820477dec -r 044cf880002d crc.c
--- a/crc.c Tue Aug 18 23:49:20 2009 +0200
+++ b/crc.c Fri Aug 21 15:03:20 2009 +0200
@@ -1,10 +1,14 @@
+#include "flate.h"
+
 uint crctab[256];
 
-void crcinit(void) {
+void crc32init(void) {
         static const uint poly = 0xEDB88320;
+ int i,j;
 
         for (i = 0; i < 256; ++i) {
- crc = i;
+ uint crc = i;
+
                 for (j = 0; j < 8; j++) {
                         if (crc & 1)
                                 crc = (crc >> 1) ^ poly;
@@ -15,11 +19,11 @@
         }
 }
 
-uint crcsum(uchar *p, int n, uint crc) {
+uint crc32(uchar *p, int n, uint crc) {
         uchar *ep = p + n;
 
         crc ^= 0xffffffff;
         while (p < ep)
- crc = crctab[(crc & 0xff) ^ *buf++] ^ (crc >> 8);
+ crc = crctab[(crc & 0xff) ^ *p++] ^ (crc >> 8);
         return crc ^ 0xffffffff;
 }
diff -r 5db820477dec -r 044cf880002d flate.h
--- a/flate.h Tue Aug 18 23:49:20 2009 +0200
+++ b/flate.h Fri Aug 21 15:03:20 2009 +0200
@@ -23,3 +23,7 @@
 int deflate_callback(int (*r)(void *, int, void *), void *rdata, int (*w)(void *, int, void *), void *wdata);
 int inflate(FlateStream *s);
 int inflate_callback(int (*r)(void *, int, void *), void *rdata, int (*w)(void *, int, void *), void *wdata);
+
+uint adler32(uchar *p, int n, uint adler);
+void crc32init(void);
+uint crc32(uchar *p, int n, uint crc);
diff -r 5db820477dec -r 044cf880002d flatez.c
--- a/flatez.c Tue Aug 18 23:49:20 2009 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,132 +0,0 @@
-
-static int checkfooter(uchar *p, uint sum) {
- return sum == ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]);
-}
-
-enum {
- ZlibCM = 7 << 4,
- ZlibCINFO = 8,
- ZlibFLEV = 3 << 6,
- ZlibFDICT = 1 << 5,
- ZlibFCHK = 31 - (((ZlibCM | ZlibCINFO) << 8) | ZlibFLEV) % 31,
-};
-
-int deflate_zlib_header(uchar *p, int n) {
- if (n < 2)
- return FlateErr;
- p[0] = ZlibCM | ZlibCINFO; /* deflate method, 32K window size */
- p[1] = ZlibFLEV | ZlibFCHK; /* highest compression */
- return 2;
-}
-
-int deflate_zlib_footer(uchar *p, int n, uint sum) {
- if (n < 4)
- return FlateErr;
- p[0] = sum >> 24;
- p[1] = sum >> 16;
- p[2] = sum >> 8;
- p[3] = sum;
- return 4;
-}
-
-int inflate_zlib_header(uchar *p, int n) {
- if (n < 2)
- return FlateErr;
- if (((p[0] << 8) | p[1]) % 31)
- return FlateErr;
- if ((p[0] & 0xf0) != ZlibCM || (p[0] & 0x0f) > ZlibCINFO)
- return FlateErr;
- if (p[1] & ZlibFDICT)
- return FlateErr;
- return 2;
-}
-
-int inflate_zlib_footer(uchar *p, int n, uint sum) {
- if (n < 4 || !checkfooter(p, sum))
- return FlateErr;
- return 4;
-}
-
-
-enum {
- GZipID1 = 0x1f,
- GZipID2 = 0x8b,
- GZipCM = 8,
- GZipFHCRC = 1 << 1,
- GZipFEXTRA = 1 << 2,
- GZipFNAME = 1 << 3,
- GZipFCOMM = 1 << 4,
- GZipXFL = 2,
- GZipOS = 255
-};
-
-int deflate_gzip_header(uchar *p, int n) {
- int k;
-
- if (n < 10)
- return FlateErr;
- for (k = 0; k < n; k++)
- p[k] = 0;
- p[0] = GZipID1;
- p[1] = GZipID2;
- p[2] = GZipCM;
- p[8] = GZipXFL;
- p[9] = GZipOS;
- return 10;
-}
-
-int deflate_gzip_footer(uchar *p, int n, uint sum, uint len) {
- if (n < 8)
- return FlateErr;
- p[0] = sum >> 24;
- p[1] = sum >> 16;
- p[2] = sum >> 8;
- p[3] = sum;
- p[4] = len >> 24;
- p[5] = len >> 16;
- p[6] = len >> 8;
- p[7] = len;
- return 8;
-}
-
-int inflate_gzip_header(uchar *p, int n) {
- int k = 10;
-
- if (k > n)
- return FlateErr;
- if (p[0] != GZipID1 || p[1] != GZipID2 || p[2] != GZipCM)
- return FlateErr;
- if (p[3] & GZipFEXTRA) {
- k += 2 + ((p[k] << 8) | p[k+1]);
- if (k > n)
- return FlateErr;
- }
- if (p[3] & GZipFNAME) {
- for (; k < n; k++)
- if (p[k] == 0)
- break;
- k++;
- if (k > n)
- return FlateErr;
- }
- if (p[3] & GZipFCOMM) {
- for (; k < n; k++)
- if (p[k] == 0)
- break;
- k++;
- if (k > n)
- return FlateErr;
- }
- if (p[3] & GZipFHCRC) {
- k += 2;
- if (k > n)
- return FlateErr;
- }
- return k;
-}
-
-int inflate_gzip_footer(uchar *p, int n, uint sum, uint len) {
- if (n < 8 || !checkfooter(p, sum) || !checkfooter(p+4, len))
- return FlateErr;
- return 8;
-}
diff -r 5db820477dec -r 044cf880002d inflate.c
--- a/inflate.c Tue Aug 18 23:49:20 2009 +0200
+++ b/inflate.c Fri Aug 21 15:03:20 2009 +0200
@@ -575,7 +575,7 @@
                         s->state = BlockHead;
                         break;
                 default:
- return s->err = "corrupt state.", FlateErr;
+ return s->err = "corrupt internal state.", FlateErr;
                 }
         }
 }
@@ -631,6 +631,11 @@
                         s->posout = 0;
         }
         if (n == FlateOk || n == FlateErr) {
+ if (s->nbits || s->src < s->srcend) {
+ s->nbits /= 8;
+ stream->in = s->src - s->nbits;
+ stream->nin = s->srcend - s->src + s->nbits;
+ }
                 stream->err = s->err;
                 free(s);
                 stream->state = 0;
diff -r 5db820477dec -r 044cf880002d sflate.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sflate.c Fri Aug 21 15:03:20 2009 +0200
@@ -0,0 +1,359 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "flate.h"
+
+static int checkfooter(uchar *p, uint sum) {
+ return sum == ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]);
+}
+
+enum {
+ ZlibCM = 7 << 4,
+ ZlibCINFO = 8,
+ ZlibFLEV = 3 << 6,
+ ZlibFDICT = 1 << 5,
+ ZlibFCHK = 31 - (((ZlibCM | ZlibCINFO) << 8) | ZlibFLEV) % 31
+};
+
+int deflate_zlib_header(uchar *p, int n) {
+ if (n < 2)
+ return FlateErr;
+ p[0] = ZlibCM | ZlibCINFO; /* deflate method, 32K window size */
+ p[1] = ZlibFLEV | ZlibFCHK; /* highest compression */
+ return 2;
+}
+
+int deflate_zlib_footer(uchar *p, int n, uint sum, uint len, uint zlen) {
+ if (n < 4)
+ return FlateErr;
+ p[0] = sum >> 24;
+ p[1] = sum >> 16;
+ p[2] = sum >> 8;
+ p[3] = sum;
+ return 4;
+}
+
+int inflate_zlib_header(uchar *p, int n) {
+ if (n < 2)
+ return FlateErr;
+ if (((p[0] << 8) | p[1]) % 31)
+ return FlateErr;
+ if ((p[0] & 0xf0) != ZlibCM || (p[0] & 0x0f) > ZlibCINFO)
+ return FlateErr;
+ if (p[1] & ZlibFDICT)
+ return FlateErr;
+ return 2;
+}
+
+int inflate_zlib_footer(uchar *p, int n, uint sum, uint len, uint zlen) {
+ if (n < 4 || !checkfooter(p, sum))
+ return FlateErr;
+ return 4;
+}
+
+
+enum {
+ GZipID1 = 0x1f,
+ GZipID2 = 0x8b,
+ GZipCM = 8,
+ GZipFHCRC = 1 << 1,
+ GZipFEXTRA = 1 << 2,
+ GZipFNAME = 1 << 3,
+ GZipFCOMM = 1 << 4,
+ GZipXFL = 2,
+ GZipOS = 255
+};
+
+int deflate_gzip_header(uchar *p, int n) {
+ int k;
+
+ if (n < 10)
+ return FlateErr;
+ for (k = 0; k < n; k++)
+ p[k] = 0;
+ p[0] = GZipID1;
+ p[1] = GZipID2;
+ p[2] = GZipCM;
+ p[8] = GZipXFL;
+ p[9] = GZipOS;
+ return 10;
+}
+
+int deflate_gzip_footer(uchar *p, int n, uint sum, uint len, uint zlen) {
+ if (n < 8)
+ return FlateErr;
+ p[0] = sum >> 24;
+ p[1] = sum >> 16;
+ p[2] = sum >> 8;
+ p[3] = sum;
+ p[4] = len >> 24;
+ p[5] = len >> 16;
+ p[6] = len >> 8;
+ p[7] = len;
+ return 8;
+}
+
+int inflate_gzip_header(uchar *p, int n) {
+ int k = 10;
+
+ if (k > n)
+ return FlateErr;
+ if (p[0] != GZipID1 || p[1] != GZipID2 || p[2] != GZipCM)
+ return FlateErr;
+ if (p[3] & GZipFEXTRA) {
+ k += 2 + ((p[k] << 8) | p[k+1]);
+ if (k > n)
+ return FlateErr;
+ }
+ if (p[3] & GZipFNAME) {
+ for (; k < n; k++)
+ if (p[k] == 0)
+ break;
+ k++;
+ if (k > n)
+ return FlateErr;
+ }
+ if (p[3] & GZipFCOMM) {
+ for (; k < n; k++)
+ if (p[k] == 0)
+ break;
+ k++;
+ if (k > n)
+ return FlateErr;
+ }
+ if (p[3] & GZipFHCRC) {
+ k += 2;
+ if (k > n)
+ return FlateErr;
+ }
+ return k;
+}
+
+int inflate_gzip_footer(uchar *p, int n, uint sum, uint len, uint zlen) {
+ if (n < 8 || !checkfooter(p, sum) || !checkfooter(p+4, len))
+ return FlateErr;
+ return 8;
+}
+
+
+/* example usage */
+
+static int (*header)(uchar *, int);
+static int (*footer)(uchar *, int, uint, uint, uint);
+static uint (*checksum)(uchar *, int, uint);
+static char *err;
+static uint sum;
+static uint nin;
+static uint nout;
+static uint headerlen;
+static uint footerlen;
+static uint extralen;
+
+static int dummyheader(uchar *p, int n) {
+ return 0;
+}
+static int dummyfooter(uchar *p, int n, uint sum, uint len, uint zlen) {
+ return 0;
+}
+static uint dummysum(uchar *p, int n, uint sum) {
+ return 0;
+}
+
+/* compress, using FlateStream interface */
+int compress_stream(FILE *in, FILE *out) {
+ FlateStream s;
+ int k, n;
+ enum {Nin = 1<<13, Nout = 1<<15};
+
+ s.in = malloc(Nin);
+ s.out = malloc(Nout);
+ s.nin = 0;
+ s.nout = Nout;
+ s.err = 0;
+ s.state = 0;
+
+ k = header(s.out, s.nout);
+ if (k == FlateErr) {
+ s.err = "header error.";
+ n = FlateErr;
+ } else {
+ headerlen = s.nout = k;
+ n = FlateOut;
+ }
+ for (;; n = deflate(&s))
+ switch (n) {
+ case FlateOk:
+ k = footer(s.out, s.nout, sum, nin, nout - headerlen);
+ if (k == FlateErr) {
+ s.err = "footer error.";
+ n = FlateErr;
+ } else if (k != fwrite(s.out, 1, k, out)) {
+ s.err = "write error.";
+ n = FlateErr;
+ } else {
+ footerlen = k;
+ nout += k;
+ }
+ case FlateErr:
+ free(s.in);
+ free(s.out);
+ err = s.err;
+ return n;
+ case FlateIn:
+ s.nin = fread(s.in, 1, Nin, in);
+ nin += s.nin;
+ sum = checksum(s.in, s.nin, sum);
+ break;
+ case FlateOut:
+ k = fwrite(s.out, 1, s.nout, out);
+ if (k != s.nout)
+ s.err = "write error.";
+ nout += k;
+ s.nout = Nout;
+ break;
+ }
+}
+
+/* decompress, using FlateStream interface */
+int decompress_stream(FILE *in, FILE *out) {
+ FlateStream s;
+ uchar *begin;
+ int k, n;
+ enum {Nin = 1<<13, Nout = 1<<15};
+
+ s.in = begin = malloc(Nin);
+ s.out = malloc(Nout);
+ s.nout = Nout;
+ s.err = 0;
+ s.state = 0;
+
+ s.nin = fread(s.in, 1, Nin, in);
+ nin += s.nin;
+ k = header(s.in, s.nin);
+ if (k == FlateErr) {
+ s.err = "header error.";
+ n = FlateErr;
+ } else {
+ headerlen = k;
+ s.nin -= k;
+ s.in += k;
+ n = inflate(&s);
+ }
+ for (;; n = inflate(&s))
+ switch (n) {
+ case FlateOk:
+ memmove(begin, s.in, s.nin);
+ k = fread(begin, 1, Nin-s.nin, in);
+ nin += k;
+ s.nin += k;
+ k = footer(begin, s.nin, sum, nout, nin - s.nin - headerlen);
+ if (k == FlateErr) {
+ s.err = "footer error.";
+ n = FlateErr;
+ } else {
+ footerlen = k;
+ extralen = s.nin - k;
+ }
+ case FlateErr:
+ free(begin);
+ free(s.out);
+ err = s.err;
+ return n;
+ case FlateIn:
+ s.in = begin;
+ s.nin = fread(s.in, 1, Nin, in);
+ nin += s.nin;
+ break;
+ case FlateOut:
+ k = fwrite(s.out, 1, s.nout, out);
+ if (k != s.nout)
+ s.err = "write error.";
+ sum = checksum(s.out, k, sum);
+ nout += k;
+ s.nout = Nout;
+ break;
+ }
+}
+
+int main(int argc, char *argv[]) {
+ char comp = 'c';
+ char fmt = 'r';
+ char verbose = 'q';
+ int (*call)(FILE *, FILE*);
+ int n, i;
+
+ for (i = 1; i < argc; i++) {
+ if (argv[i][0] == '-' && argv[i][1] && argv[i][2] == 0)
+ switch (argv[i][1]) {
+ case 'q':
+ case 'v':
+ verbose = argv[i][1];
+ continue;
+ case 'c':
+ case 'd':
+ comp = argv[i][1];
+ continue;
+ case 'r':
+ case 'g':
+ case 'z':
+ case 'p':
+ fmt = argv[i][1];
+ continue;
+ }
+ fprintf(stderr, "usage: %s [-q|-v] [-c|-d] [-r|-g|-z|-p]\n\n"
+ "deflate stream compression"
+ " -q quiet (default)\n"
+ " -v verbose\n"
+ " -c compress (default)\n"
+ " -d decompress\n"
+ " -r raw (default)\n"
+ " -g gzip\n"
+ " -z zlib\n"
+ " -p pkzip\n", argv[0]);
+ return -1;
+ }
+ call = comp == 'c' ? compress_stream : decompress_stream;
+ switch (fmt) {
+ case 'r':
+ header = dummyheader;
+ footer = dummyfooter;
+ checksum = dummysum;
+ n = call(stdin, stdout);
+ break;
+ case 'g':
+ if (comp == 'c') {
+ header = deflate_gzip_header;
+ footer = deflate_gzip_footer;
+ } else {
+ header = inflate_gzip_header;
+ footer = inflate_gzip_footer;
+ }
+ checksum = crc32;
+ crc32init();
+ n = call(stdin, stdout);
+ break;
+ case 'z':
+ if (comp == 'c') {
+ header = deflate_zlib_header;
+ footer = deflate_zlib_footer;
+ } else {
+ header = inflate_zlib_header;
+ footer = inflate_zlib_footer;
+ }
+ checksum = adler32;
+ n = call(stdin, stdout);
+ break;
+ case 'p':
+ default:
+ err = "uninplemented.";
+ n = FlateErr;
+ break;
+ }
+ if (verbose == 'v')
+ fprintf(stderr, "in:%d out:%d checksum: 0x%08x (header:%d compressed len:%d footer:%d extra input bytes:%s)\n",
+ nin, nout, sum, headerlen, (comp == 'c' ? nout : nin) - headerlen - footerlen - extralen,
+ footerlen, extralen ? "yes" : "no");
+ if (n != FlateOk)
+ fprintf(stderr, "error: %s\n", err);
+ return n;
+}
Received on Fri Aug 21 2009 - 13:50:15 UTC

This archive was generated by hypermail 2.2.0 : Fri Aug 21 2009 - 14:00:14 UTC