[hackers] [sbase] Audit col(1), add UTF-8-support || FRIGN
commit e8e3689fb359ba1c2d8b137abcf239782a6cd8d0
Author: FRIGN <dev_AT_frign.de>
Date: Sun Mar 22 21:43:59 2015 +0100
Audit col(1), add UTF-8-support
Nothing special here, only renaming of variables and adding the
Rune-utility-functions.
Also, I refactored the manpage.
diff --git a/README b/README
index 090c841..e5d9b5e 100644
--- a/README
+++ b/README
_AT_@ -19,7 +19,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
=*| cksum yes none
=*| cmp yes none
#*| cols non-posix none
- col yes none
+#*| col non-posix none
=*| comm yes none
=*| cp yes none (-i)
=*| cron non-posix none
diff --git a/col.1 b/col.1
index b9d1c4d..939d354 100644
--- a/col.1
+++ b/col.1
_AT_@ -1,4 +1,4 @@
-.Dd March 6, 2014
+.Dd March 22, 2014
.Dt COL 1
.Os sbase
.Sh NAME
_AT_@ -9,20 +9,15 @@
.Op Fl bfpx
.Op Fl l Ar num
.Sh DESCRIPTION
-The
.Nm
-utility filters all the reverse (and half reverse) line feeds,
-as they are produced by
+filters all reverse (and half reverse) line feeds,
+as produced by
.Xr nroff 1
-with .2C of
+with .2C,
.Xr ms 6
-or by
+or
.Xr tbl 1 .
-.Nm
-also replaces spaces by tabs when it is possible.
-The control sequences managed by
-.Nm
-are:
+The recognized control sequences are:
.Bl -tag -width Ds
.It ESC-7
Reverse line-feed
_AT_@ -42,25 +37,24 @@ Carriage return
New line
.El
.Pp
-All the other control codes and escape sequences are removed.
+All other control codes and escape sequences are removed.
.Nm
-transforms all the spaces into tabulators.
+converts all spaces to tabs.
.Sh OPTIONS
.Bl -tag -width Ds
.It Fl p
-Print unknown escape sequences to the output.
+Print unknown escape sequences.
.It Fl b
-Do not print backspaces in output,
-and print only the last overstriked character in the output.
+Do not print backspaces and instead only print the last
+character written to each column position.
.It Fl f
Allow forward half line feeds in the output.
.It Fl x
-Do not convert spaces in tabulators.
+Do not convert spaces to tabs.
.It Fl l Ar num
-Increment to
+Buffer
.Ar num
-the number of lines buffered for
-.Nm
+lines in memory.
.El
.Sh SEE ALSO
.Xr nroff 1 ,
_AT_@ -68,13 +62,9 @@ the number of lines buffered for
.Xr ms 6
.Sh BUGS
.Nm
-only process text with a maximum of 256 lines with 800 bytes per line,
-although the number of lines can be modified with the
-.Fl l
-option.
-When the number of lines is bigger,
-the buffer is flushed to the output,
-so new reverse line feeds can not operate in the flushed lines.
-This implementation ignores SI and SO selection character sets,
-because it is supposed to work only with UTF-8 strings,
-although the UTF-8 support is missed.
+only buffers up to 256 lines with up to 800 bytes per line
+if the line-number hasn't been set differently with the
+.Op Fl l
+flag.
+When the number of lines is bigger, the buffer is flushed and
+reverse line feeds can not operate on the flushed lines.
diff --git a/col.c b/col.c
index 8b86240..46c3332 100644
--- a/col.c
+++ b/col.c
_AT_@ -1,47 +1,47 @@
/* See LICENSE file for copyright and license details. */
+#include <limits.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
-#include <ctype.h>
+#include "utf.h"
#include "util.h"
#define NLINES 256
#define NCOLS 800
-static char **buff;
+static Rune **buf;
-static int obackspace, onotabs, ohalfline, oescape;
-static unsigned nline, ncol, nchar, nspaces, maxline, bs;
-static size_t pagsize = NLINES;
+static int backspace, notabs, halfline, escape;
+static size_t nline, ncol, nchar, nspaces, maxline, bs, pagesize = NLINES;
static void
flush(void)
{
- int c;
- unsigned i, j;
+ Rune c;
+ size_t i, j;
for (i = 0; i < maxline; ++i) {
- for (j = 0; j < NCOLS && (c = buff[i][j]) != '\0'; ++j)
- putchar(c);
+ for (j = 0; j < NCOLS && (c = buf[i][j]); ++j)
+ efputrune(&c, stdout, "<stdout>");
putchar('\n');
}
bs = nchar = nline = ncol = 0;
}
static void
-forward(unsigned n)
+forward(size_t n)
{
- unsigned lim;
+ size_t lim;
- for (lim = ncol + n; ncol != lim && nchar < NCOLS-1; ++nchar) {
- switch (buff[nline][nchar]) {
+ for (lim = ncol + n; ncol != lim && nchar < NCOLS - 1; ++nchar) {
+ switch (buf[nline][nchar]) {
case '\b':
--ncol;
break;
case '\0':
- buff[nline][nchar] = ' ';
+ buf[nline][nchar] = ' ';
/* FALLTHROUGH */
default:
++ncol;
_AT_@ -53,31 +53,30 @@ forward(unsigned n)
static void
linefeed(int up, int rcarriage)
{
- unsigned oncol = ncol;
+ size_t oncol = ncol;
nspaces = 0;
if (up > 0) {
- if (nline == pagsize-1) {
+ if (nline == pagesize - 1) {
flush();
} else {
if (++nline > maxline)
maxline = nline;
}
- } else {
- if (nline > 0)
- --nline;
+ } else if (nline > 0) {
+ --nline;
}
bs = 0;
if (rcarriage) {
forward(oncol);
- nchar = ncol = 0;
+ nchar = ncol = 0;
}
}
static void
-newchar(int c)
+newchar(Rune c)
{
- char *cp;
+ Rune *cp;
forward(nspaces);
nspaces = 0;
_AT_@ -90,7 +89,7 @@ newchar(int c)
nchar = ncol = 0;
break;
case '\t':
- forward(8 - ncol%8);
+ forward(8 - ncol % 8);
break;
case '\b':
if (ncol > 0)
_AT_@ -100,20 +99,18 @@ newchar(int c)
bs = 1;
break;
default:
- cp = &buff[nline][nchar];
- if (*cp != '\0' && *cp != ' ' && bs && !obackspace) {
- if (nchar != NCOLS-3) {
- memmove(cp + 3, cp + 1, NCOLS - nchar - 2);
- cp[1] = '\b';
- nchar += 2;
- }
+ cp = &buf[nline][nchar];
+ if (*cp && *cp != ' ' && bs && !backspace && nchar != NCOLS - 3) {
+ memmove(cp + 3, cp + 1, (NCOLS - nchar - 2) * sizeof(*cp));
+ cp[1] = '\b';
+ nchar += 2;
}
- if (nchar != NCOLS-1) {
- for (cp = buff[nline]; cp < &buff[nline][nchar]; ++cp) {
+ if (nchar != NCOLS - 1) {
+ for (cp = buf[nline]; cp < &buf[nline][nchar]; ++cp) {
if (*cp == '\0')
*cp = ' ';
}
- buff[nline][nchar++] = c;
+ buf[nline][nchar++] = c;
++ncol;
}
bs = 0;
_AT_@ -123,50 +120,52 @@ newchar(int c)
static void
col(void)
{
- int c;
+ Rune r;
+ int ret;
- while ((c = getchar()) != EOF) {
- switch (c) {
+ while (efgetrune(&r, stdin, "<stdin>")) {
+ switch (r) {
case '\x1b':
- switch (c = getchar()) {
+ ret = efgetrune(&r, stdin, "<stdin>");
+ switch (r) {
case '8': /* reverse half-line-feed */
case '7': /* reverse line-feed */
linefeed(-1, 0);
continue;
case '9': /* forward half-line-feed */
- if (ohalfline)
+ if (halfline)
break;
linefeed(1, 0);
continue;
}
- if (!oescape)
+ if (!escape)
continue;
newchar('\x1b');
- if (c != EOF)
- newchar(c);
+ if (ret)
+ newchar(r);
break;
case '\v':
linefeed(-1, 0);
break;
case ' ':
- if (!onotabs) {
+ if (!notabs) {
if (++nspaces != 8)
continue;
- c = '\t';
+ r = '\t';
nspaces = 0;
}
/* FALLTHROUGH */
case '\r':
case '\b':
case '\t':
- newchar(c);
+ newchar(r);
break;
case '\n':
linefeed(1, 1);
break;
default:
- if (!iscntrl(c))
- newchar(c);
+ if (!iscntrlrune(r))
+ newchar(r);
break;
}
}
_AT_@ -175,17 +174,17 @@ col(void)
static void
allocbuf(void)
{
- char **bp;
+ Rune **bp;
- buff = ereallocarray(NULL, pagsize, sizeof(*buff));
- for (bp = buff; bp < &buff[pagsize]; ++bp)
- *bp = emalloc(NCOLS);
+ buf = ereallocarray(NULL, pagesize, sizeof(*buf));
+ for (bp = buf; bp < buf + pagesize; ++bp)
+ *bp = ereallocarray(NULL, NCOLS, sizeof(**buf));
}
static void
usage(void)
{
- enprintf(2, "usage: %s [-p][-l num][-b][-f][-x]\n", argv0);
+ enprintf(2, "usage: %s [-pbfx] [-l num]\n", argv0);
}
int
_AT_@ -193,35 +192,30 @@ main(int argc, char *argv[])
{
ARGBEGIN {
case 'b':
- obackspace = 1;
+ backspace = 1;
break;
case 'f':
- ohalfline = 1;
+ halfline = 1;
break;
case 'l':
- pagsize = estrtonum(EARGF(usage()), 0, SIZE_MAX);
+ pagesize = estrtonum(EARGF(usage()), 0, MIN(SIZE_MAX, LLONG_MAX));
break;
case 'p':
- oescape = 1;
+ escape = 1;
break;
case 'x':
- onotabs = 1;
+ notabs = 1;
break;
default:
usage();
} ARGEND;
- if (argc > 0)
+ if (argc)
usage();
allocbuf();
col();
flush();
- if (ferror(stdin))
- enprintf(1, "error reading input");
- if (ferror(stdout))
- enprintf(2, "error writing output");
-
return 0;
}
Received on Mon Mar 23 2015 - 18:35:49 CET
This archive was generated by hypermail 2.3.0
: Mon Mar 23 2015 - 18:36:24 CET