[hackers] [sbase] Rewrite od(1) || FRIGN

From: <git_AT_suckless.org>
Date: Mon, 26 Oct 2015 12:55:50 +0100 (CET)

commit 092c95b66c0dca434bde59d00420ace8a6d87050
Author: FRIGN <dev_AT_frign.de>
AuthorDate: Fri Oct 9 01:46:56 2015 +0200
Commit: sin <sin_AT_2f30.org>
CommitDate: Mon Oct 26 11:55:41 2015 +0000

    Rewrite od(1)
    
    Looking at the old code, it became clear that the desired
    functionality with the t-flag could not be added unless the
    underlying data-structures were reworked.
    Thus the only way to be successful was to rewrite the whole thing.
    
    od(1) allows giving arbitrarily many type-specs per call, both via
    -t x1o2... and -t x1 -t o2 and intermixed.
    This fortunately is easy to parse.
    Now, to be flexible, it should not only support types of integral
    length. Erroring out like this is inacceptable:
    
    $ echo -n "shrek"| od -t u3
    od: invalid type string ‘u3’;
    this system doesn't provide a 3-byte integral type
    
    Thus, this new od(1) just collects the bytes until shortly before
    printing, when the numbers are written into a long long with the
    proper offset.
    The bytes per line are just the lcm of all given type-lengths and >= 16.
    They are equal to 16 for all types that are possible to print using
    the old od(1)'s.
    
    Endianness is of course also supported, needs some testing though,
    especially on Big Endian systems.

diff --git a/README b/README
index 0a09317..1201a90 100644
--- a/README
+++ b/README
_AT_@ -56,7 +56,7 @@ The following tools are implemented:
 =*|o nice .
 #*|o nl .
 =*|o nohup .
- od -t
+=* o od .
 #*|o paste .
 =*|x printenv .
 #*|o printf .
diff --git a/od.1 b/od.1
index d1164b4..7aad1e4 100644
--- a/od.1
+++ b/od.1
_AT_@ -31,3 +31,15 @@ he\fIx\fRadecimal. If unspecified, the default is octal.
 .It Fl v
 Always set. Write all input data, including duplicate lines.
 .El
+.Sh STANDARDS
+The
+.Nm
+utility is compliant with the
+.St -p1003.1-2013
+specification.
+.Pp
+The
+.Op Fl v
+flag is enabled by default and the 'd' parameter for the
+.Op Fl t
+flag is interpreted as 'u'.
diff --git a/od.c b/od.c
index 31f8179..50723c7 100644
--- a/od.c
+++ b/od.c
_AT_@ -1,33 +1,44 @@
 /* See LICENSE file for copyright and license details. */
-#include <ctype.h>
-#include <inttypes.h>
+#include <endian.h>
+#include <stdint.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
+#include "queue.h"
 #include "util.h"
 
-static size_t bytes_per_line = 16;
-static off_t maxbytes = -1;
+struct type {
+ unsigned char format;
+ unsigned int len;
+ TAILQ_ENTRY(type) entry;
+};
+
+static TAILQ_HEAD(head, type) head = TAILQ_HEAD_INITIALIZER(head);
+static unsigned char addr_format = 'o';
 static off_t skip = 0;
-static unsigned char radix = 'o';
-static unsigned char type = 'o';
+static off_t max = -1;
+static size_t linelen = 1;
 
 static void
-printaddress(FILE *f, off_t addr)
+printaddress(off_t addr)
 {
- char fmt[] = "%07j# ";
+ char fmt[] = "%07j#";
 
- if (radix == 'n') {
- fputc(' ', f);
+ if (addr_format == 'n') {
+ fputc(' ', stdout);
         } else {
- fmt[4] = radix;
- fprintf(f, fmt, (intmax_t)addr);
+ fmt[4] = addr_format;
+ printf(fmt, (intmax_t)addr);
         }
 }
 
 static void
-printchar(FILE *f, unsigned char c)
-{
+printchunk(unsigned char *s, unsigned char format, size_t len) {
+ long long res, basefac;
+ size_t i;
+ char fmt[] = " %0*ll#";
+
         const char *namedict[] = {
                 "nul", "soh", "stx", "etx", "eot", "enq", "ack",
                 "bel", "bs", "ht", "nl", "vt", "ff", "cr",
_AT_@ -41,72 +52,127 @@ printchar(FILE *f, unsigned char c)
                 ['\n'] = "\\n", ['\v'] = "\\v",
                 ['\f'] = "\\f", ['\r'] = "\\r",
         };
- const char *fmtdict[] = {
- ['d'] = "%4hhd ", ['o'] = "%03hho ",
- ['u'] = "%3hhu ", ['x'] = "%02hhx ",
- };
 
- switch (type) {
+ switch (format) {
         case 'a':
- c &= ~128; /* clear high bit as required by standard */
- if (c < LEN(namedict) || c == 127) {
- fprintf(f, "%3s ", (c == 127) ? "del" : namedict[c]);
+ *s &= ~128; /* clear high bit as required by standard */
+ if (*s < LEN(namedict) || *s == 127) {
+ printf(" %3s", (*s == 127) ? "del" : namedict[*s]);
                 } else {
- fprintf(f, "%3c ", c);
+ printf(" %3c", *s);
                 }
                 break;
         case 'c':
- if (strchr("\a\b\t\n\v\f\r\0", c)) {
- fprintf(f, "%3s ", escdict[c]);
+ if (strchr("\a\b\t\n\v\f\r\0", *s)) {
+ printf(" %3s", escdict[*s]);
                 } else {
- fprintf(f, "%3c ", c);
+ printf(" %3c", *s);
                 }
                 break;
         default:
- fprintf(f, fmtdict[type], c);
+ res = 0;
+ basefac = 1;
+#if __BYTE_ORDER == __BIG_ENDIAN
+ for (i = len; i; i--) {
+ res += s[i - 1] * basefac;
+#else
+ for (i = 0; i < len; i++) {
+ res += s[i] * basefac;
+#endif
+ basefac <<= 8;
+ }
+ fmt[6] = format;
+ printf(fmt, (int)(3 * len + len - 1), res);
+ }
+}
+
+static void
+printline(unsigned char *line, size_t len, off_t addr)
+{
+ struct type *t = NULL;
+ size_t i;
+ int first = 1;
+
+ if (TAILQ_EMPTY(&head))
+ goto once;
+ TAILQ_FOREACH(t, &head, entry) {
+once:
+ if (first) {
+ printaddress(addr);
+ first = 0;
+ } else {
+ printf("%*c", (addr_format == 'n') ? 1 : 7, ' ');
+ }
+ for (i = 0; i < len; ) {
+ printchunk(line + i, t ? t->format : 'o',
+ MIN(len - i, t ? t->len : 4));
+ i += MIN(len - i, t ? t->len : 4);
+ }
+ fputc('\n', stdout);
+ if (TAILQ_EMPTY(&head) || (!len && !first))
+ break;
         }
 }
 
 static void
-od(FILE *in, char *in_name, FILE *out, char *out_name)
+od(FILE *fp, char *fname, int last)
 {
- off_t addr;
- size_t i, chunklen;
+ static unsigned char *line;
+ static size_t lineoff;
+ size_t i;
         unsigned char buf[BUFSIZ];
+ static off_t addr;
+ size_t buflen;
 
- for (addr = 0; (chunklen = fread(buf, 1, BUFSIZ, in)); ) {
- for (i = 0; i < chunklen && (maxbytes == -1 ||
- (addr - skip) < maxbytes); ++i, ++addr) {
- if (addr - skip < 0)
- continue;
- if (((addr - skip) % bytes_per_line) == 0) {
- if (addr - skip)
- fputc('\n', out);
- printaddress(out, addr);
+ while (skip - addr) {
+ buflen = fread(buf, 1, MIN(skip - addr, BUFSIZ), fp);
+ addr += buflen;
+ if (feof(fp) || ferror(fp))
+ return;
+ }
+ if (!line)
+ line = emalloc(linelen);
+
+ while ((buflen = fread(buf, 1, max >= 0 ?
+ max - (addr - skip) : BUFSIZ, fp))) {
+ for (i = 0; i < buflen; i++, addr++) {
+ line[lineoff++] = buf[i];
+ if (lineoff == linelen) {
+ printline(line, lineoff, addr - lineoff + 1);
+ lineoff = 0;
                         }
- printchar(out, buf[i]);
                 }
- if (feof(in) || ferror(in) || ferror(out))
- break;
         }
- if (addr - skip > 0)
- fputc('\n', out);
- if (radix != 'n') {
- printaddress(out, MAX(addr, skip));
- fputc('\n', out);
+ if (lineoff)
+ printline(line, lineoff, addr - lineoff);
+ printline((unsigned char *)"", 0, addr);
+}
+
+static int
+lcm(unsigned int a, unsigned int b)
+{
+ unsigned int c, d, e;
+
+ for (c = a, d = b; c ;) {
+ e = c;
+ c = d % c;
+ d = e;
         }
+
+ return a / d * b;
 }
 
 static void
 usage(void)
 {
- eprintf("usage: %s [-A d|o|x|n] [-t a|c|d|o|u|x] [-v] [file ...]\n", argv0);
+ eprintf("usage: %s", argv0);
 }
 
 int
 main(int argc, char *argv[])
 {
         FILE *fp;
+ struct type *t;
         int ret = 0;
         char *s;
 
_AT_@ -115,31 +181,78 @@ main(int argc, char *argv[])
                 s = EARGF(usage());
                 if (strlen(s) != 1 || !strchr("doxn", s[0]))
                         usage();
- radix = s[0];
+ addr_format = s[0];
                 break;
         case 'j':
                 if ((skip = parseoffset(EARGF(usage()))) < 0)
- return 1;
+ usage();
                 break;
         case 'N':
- if ((maxbytes = parseoffset(EARGF(usage()))) < 0)
- return 1;
+ if ((max = parseoffset(EARGF(usage()))) < 0)
+ usage();
                 break;
         case 't':
                 s = EARGF(usage());
- if (strlen(s) != 1 || !strchr("acdoux", s[0]))
- usage();
- type = s[0];
+ for (; *s; s++) {
+ t = emalloc(sizeof(struct type));
+ switch (*s) {
+ case 'a':
+ case 'c':
+ t->format = *s;
+ t->len = 1;
+ TAILQ_INSERT_TAIL(&head, t, entry);
+ break;
+ case 'd':
+ case 'o':
+ case 'u':
+ case 'x':
+ t->format = *s;
+ /* todo: allow multiple digits */
+ if (*(s+1) > '0' || *(s+1) <= '9') {
+ t->len = *(s+1) - '0';
+ s++;
+ } else {
+ switch (*(s + 1)) {
+ case 'C':
+ t->len = sizeof(char);
+ break;
+ case 'S':
+ t->len = sizeof(short);
+ break;
+ case 'I':
+ t->len = sizeof(int);
+ break;
+ case 'L':
+ t->len = sizeof(long);
+ break;
+ default:
+ t->len = 4;
+ }
+ }
+ TAILQ_INSERT_TAIL(&head, t, entry);
+ break;
+ default:
+ usage();
+ }
+ }
                 break;
         case 'v':
- /* Always set. Use "uniq -f 1 -c" to handle duplicate lines. */
+ /* always set - use uniq(1) to handle duplicate lines */
                 break;
         default:
                 usage();
         } ARGEND;
 
+ /* line length is lcm of type lengths and >= 16 by doubling */
+ TAILQ_FOREACH(t, &head, entry)
+ linelen = lcm(linelen, t->len);
+ if (TAILQ_EMPTY(&head))
+ linelen = 16;
+ while (linelen < 16)
+ linelen *= 2;
+
         if (!argc) {
- od(stdin, "<stdin>", stdout, "<stdout>");
+ od(stdin, "<stdin>", 1);
         } else {
                 for (; *argv; argc--, argv++) {
                         if (!strcmp(*argv, "-")) {
_AT_@ -150,7 +263,7 @@ main(int argc, char *argv[])
                                 ret = 1;
                                 continue;
                         }
- od(fp, *argv, stdout, "<stdout>");
+ od(fp, *argv, (!*(argv + 1)));
                         if (fp != stdin && fshut(fp, *argv))
                                 ret = 1;
                 }
Received on Mon Oct 26 2015 - 12:55:50 CET

This archive was generated by hypermail 2.3.0 : Mon Oct 26 2015 - 13:00:16 CET