[hackers] [sbase] Audit col(1), add UTF-8-support || FRIGN

From: <git_AT_suckless.org>
Date: Mon, 23 Mar 2015 18:35:49 +0100 (CET)

commit e8e3689fb359ba1c2d8b137abcf239782a6cd8d0
Author: FRIGN <dev_AT_frign.de>
Date: Sun Mar 22 21:43:59 2015 +0100

    Audit col(1), add UTF-8-support
    
    Nothing special here, only renaming of variables and adding the
    Rune-utility-functions.
    Also, I refactored the manpage.

diff --git a/README b/README
index 090c841..e5d9b5e 100644
--- a/README
+++ b/README
_AT_@ -19,7 +19,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
 =*| cksum yes none
 =*| cmp yes none
 #*| cols non-posix none
- col yes none
+#*| col non-posix none
 =*| comm yes none
 =*| cp yes none (-i)
 =*| cron non-posix none
diff --git a/col.1 b/col.1
index b9d1c4d..939d354 100644
--- a/col.1
+++ b/col.1
_AT_@ -1,4 +1,4 @@
-.Dd March 6, 2014
+.Dd March 22, 2014
 .Dt COL 1
 .Os sbase
 .Sh NAME
_AT_@ -9,20 +9,15 @@
 .Op Fl bfpx
 .Op Fl l Ar num
 .Sh DESCRIPTION
-The
 .Nm
-utility filters all the reverse (and half reverse) line feeds,
-as they are produced by
+filters all reverse (and half reverse) line feeds,
+as produced by
 .Xr nroff 1
-with .2C of
+with .2C,
 .Xr ms 6
-or by
+or
 .Xr tbl 1 .
-.Nm
-also replaces spaces by tabs when it is possible.
-The control sequences managed by
-.Nm
-are:
+The recognized control sequences are:
 .Bl -tag -width Ds
 .It ESC-7
 Reverse line-feed
_AT_@ -42,25 +37,24 @@ Carriage return
 New line
 .El
 .Pp
-All the other control codes and escape sequences are removed.
+All other control codes and escape sequences are removed.
 .Nm
-transforms all the spaces into tabulators.
+converts all spaces to tabs.
 .Sh OPTIONS
 .Bl -tag -width Ds
 .It Fl p
-Print unknown escape sequences to the output.
+Print unknown escape sequences.
 .It Fl b
-Do not print backspaces in output,
-and print only the last overstriked character in the output.
+Do not print backspaces and instead only print the last
+character written to each column position.
 .It Fl f
 Allow forward half line feeds in the output.
 .It Fl x
-Do not convert spaces in tabulators.
+Do not convert spaces to tabs.
 .It Fl l Ar num
-Increment to
+Buffer
 .Ar num
-the number of lines buffered for
-.Nm
+lines in memory.
 .El
 .Sh SEE ALSO
 .Xr nroff 1 ,
_AT_@ -68,13 +62,9 @@ the number of lines buffered for
 .Xr ms 6
 .Sh BUGS
 .Nm
-only process text with a maximum of 256 lines with 800 bytes per line,
-although the number of lines can be modified with the
-.Fl l
-option.
-When the number of lines is bigger,
-the buffer is flushed to the output,
-so new reverse line feeds can not operate in the flushed lines.
-This implementation ignores SI and SO selection character sets,
-because it is supposed to work only with UTF-8 strings,
-although the UTF-8 support is missed.
+only buffers up to 256 lines with up to 800 bytes per line
+if the line-number hasn't been set differently with the
+.Op Fl l
+flag.
+When the number of lines is bigger, the buffer is flushed and
+reverse line feeds can not operate on the flushed lines.
diff --git a/col.c b/col.c
index 8b86240..46c3332 100644
--- a/col.c
+++ b/col.c
_AT_@ -1,47 +1,47 @@
 /* See LICENSE file for copyright and license details. */
+#include <limits.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
-#include <ctype.h>
 
+#include "utf.h"
 #include "util.h"
 
 #define NLINES 256
 #define NCOLS 800
 
-static char **buff;
+static Rune **buf;
 
-static int obackspace, onotabs, ohalfline, oescape;
-static unsigned nline, ncol, nchar, nspaces, maxline, bs;
-static size_t pagsize = NLINES;
+static int backspace, notabs, halfline, escape;
+static size_t nline, ncol, nchar, nspaces, maxline, bs, pagesize = NLINES;
 
 static void
 flush(void)
 {
- int c;
- unsigned i, j;
+ Rune c;
+ size_t i, j;
 
         for (i = 0; i < maxline; ++i) {
- for (j = 0; j < NCOLS && (c = buff[i][j]) != '\0'; ++j)
- putchar(c);
+ for (j = 0; j < NCOLS && (c = buf[i][j]); ++j)
+ efputrune(&c, stdout, "<stdout>");
                 putchar('\n');
         }
         bs = nchar = nline = ncol = 0;
 }
 
 static void
-forward(unsigned n)
+forward(size_t n)
 {
- unsigned lim;
+ size_t lim;
 
- for (lim = ncol + n; ncol != lim && nchar < NCOLS-1; ++nchar) {
- switch (buff[nline][nchar]) {
+ for (lim = ncol + n; ncol != lim && nchar < NCOLS - 1; ++nchar) {
+ switch (buf[nline][nchar]) {
                 case '\b':
                         --ncol;
                         break;
                 case '\0':
- buff[nline][nchar] = ' ';
+ buf[nline][nchar] = ' ';
                         /* FALLTHROUGH */
                 default:
                         ++ncol;
_AT_@ -53,31 +53,30 @@ forward(unsigned n)
 static void
 linefeed(int up, int rcarriage)
 {
- unsigned oncol = ncol;
+ size_t oncol = ncol;
 
         nspaces = 0;
         if (up > 0) {
- if (nline == pagsize-1) {
+ if (nline == pagesize - 1) {
                         flush();
                 } else {
                         if (++nline > maxline)
                                 maxline = nline;
                 }
- } else {
- if (nline > 0)
- --nline;
+ } else if (nline > 0) {
+ --nline;
         }
         bs = 0;
         if (rcarriage) {
                 forward(oncol);
- nchar = ncol = 0;
+ nchar = ncol = 0;
         }
 }
 
 static void
-newchar(int c)
+newchar(Rune c)
 {
- char *cp;
+ Rune *cp;
 
         forward(nspaces);
         nspaces = 0;
_AT_@ -90,7 +89,7 @@ newchar(int c)
                 nchar = ncol = 0;
                 break;
         case '\t':
- forward(8 - ncol%8);
+ forward(8 - ncol % 8);
                 break;
         case '\b':
                 if (ncol > 0)
_AT_@ -100,20 +99,18 @@ newchar(int c)
                 bs = 1;
                 break;
         default:
- cp = &buff[nline][nchar];
- if (*cp != '\0' && *cp != ' ' && bs && !obackspace) {
- if (nchar != NCOLS-3) {
- memmove(cp + 3, cp + 1, NCOLS - nchar - 2);
- cp[1] = '\b';
- nchar += 2;
- }
+ cp = &buf[nline][nchar];
+ if (*cp && *cp != ' ' && bs && !backspace && nchar != NCOLS - 3) {
+ memmove(cp + 3, cp + 1, (NCOLS - nchar - 2) * sizeof(*cp));
+ cp[1] = '\b';
+ nchar += 2;
                 }
- if (nchar != NCOLS-1) {
- for (cp = buff[nline]; cp < &buff[nline][nchar]; ++cp) {
+ if (nchar != NCOLS - 1) {
+ for (cp = buf[nline]; cp < &buf[nline][nchar]; ++cp) {
                                 if (*cp == '\0')
                                         *cp = ' ';
                         }
- buff[nline][nchar++] = c;
+ buf[nline][nchar++] = c;
                         ++ncol;
                 }
                 bs = 0;
_AT_@ -123,50 +120,52 @@ newchar(int c)
 static void
 col(void)
 {
- int c;
+ Rune r;
+ int ret;
 
- while ((c = getchar()) != EOF) {
- switch (c) {
+ while (efgetrune(&r, stdin, "<stdin>")) {
+ switch (r) {
                 case '\x1b':
- switch (c = getchar()) {
+ ret = efgetrune(&r, stdin, "<stdin>");
+ switch (r) {
                         case '8': /* reverse half-line-feed */
                         case '7': /* reverse line-feed */
                                 linefeed(-1, 0);
                                 continue;
                         case '9': /* forward half-line-feed */
- if (ohalfline)
+ if (halfline)
                                         break;
                                 linefeed(1, 0);
                                 continue;
                         }
- if (!oescape)
+ if (!escape)
                                 continue;
                         newchar('\x1b');
- if (c != EOF)
- newchar(c);
+ if (ret)
+ newchar(r);
                         break;
                 case '\v':
                         linefeed(-1, 0);
                         break;
                 case ' ':
- if (!onotabs) {
+ if (!notabs) {
                                 if (++nspaces != 8)
                                         continue;
- c = '\t';
+ r = '\t';
                                 nspaces = 0;
                         }
                         /* FALLTHROUGH */
                 case '\r':
                 case '\b':
                 case '\t':
- newchar(c);
+ newchar(r);
                         break;
                 case '\n':
                         linefeed(1, 1);
                         break;
                 default:
- if (!iscntrl(c))
- newchar(c);
+ if (!iscntrlrune(r))
+ newchar(r);
                         break;
                 }
         }
_AT_@ -175,17 +174,17 @@ col(void)
 static void
 allocbuf(void)
 {
- char **bp;
+ Rune **bp;
 
- buff = ereallocarray(NULL, pagsize, sizeof(*buff));
- for (bp = buff; bp < &buff[pagsize]; ++bp)
- *bp = emalloc(NCOLS);
+ buf = ereallocarray(NULL, pagesize, sizeof(*buf));
+ for (bp = buf; bp < buf + pagesize; ++bp)
+ *bp = ereallocarray(NULL, NCOLS, sizeof(**buf));
 }
 
 static void
 usage(void)
 {
- enprintf(2, "usage: %s [-p][-l num][-b][-f][-x]\n", argv0);
+ enprintf(2, "usage: %s [-pbfx] [-l num]\n", argv0);
 }
 
 int
_AT_@ -193,35 +192,30 @@ main(int argc, char *argv[])
 {
         ARGBEGIN {
         case 'b':
- obackspace = 1;
+ backspace = 1;
                 break;
         case 'f':
- ohalfline = 1;
+ halfline = 1;
                 break;
         case 'l':
- pagsize = estrtonum(EARGF(usage()), 0, SIZE_MAX);
+ pagesize = estrtonum(EARGF(usage()), 0, MIN(SIZE_MAX, LLONG_MAX));
                 break;
         case 'p':
- oescape = 1;
+ escape = 1;
                 break;
         case 'x':
- onotabs = 1;
+ notabs = 1;
                 break;
         default:
                 usage();
         } ARGEND;
 
- if (argc > 0)
+ if (argc)
                 usage();
 
         allocbuf();
         col();
         flush();
 
- if (ferror(stdin))
- enprintf(1, "error reading input");
- if (ferror(stdout))
- enprintf(2, "error writing output");
-
         return 0;
 }
Received on Mon Mar 23 2015 - 18:35:49 CET

This archive was generated by hypermail 2.3.0 : Mon Mar 23 2015 - 18:36:24 CET