[hackers] [sbase] Add tablist support and a mandoc-manpage to expand(1) || FRIGN

From: <git_AT_suckless.org>
Date: Tue, 24 Mar 2015 23:53:16 +0100 (CET)

commit 692c11bf2b095cb6a5b221811656921e2a3f330d
Author: FRIGN <dev_AT_frign.de>
Date: Sun Jan 25 14:31:02 2015 +0100

    Add tablist support and a mandoc-manpage to expand(1)
    
    and mark it as finished in the README.
    
    This is another example showing how broken the GNU coreutils are:
    
    $ echo -e "äää\tüüü\tööö" | gnu-expand -t "5,10,20"
    äää üüü ööö
    $ echo -e "äää\tüüü\tööö" | sbase-expand -t "5,10,20"
    äää üüü ööö
    
    This is due to the fact that they are still not UTF8-aware and
    actually see "ä" as two single characters, expanding the "äää" with
    4 spaces to a tab of length 10.
    The correct way however is to expand the "äää" with 2 spaces to a
    tab of length 5.
    One can only imagine how this silently breaks a lot of code around
    the world.
    WHAT WERE THEY THINKING?

diff --git a/README b/README
index eb2441c..2826981 100644
--- a/README
+++ b/README
_AT_@ -28,7 +28,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
 = du no -H, -L, (-x)
 =* echo yes none
 =* env yes none
-# expand yes none
+#* expand yes none
    expr yes none
 =* false yes none
    fold yes none
diff --git a/expand.1 b/expand.1
index 6ae819b..ffdaea6 100644
--- a/expand.1
+++ b/expand.1
_AT_@ -1,25 +1,50 @@
-.TH EXPAND 1 sbase\-VERSION
-.SH NAME
-expand \- expand tabs to spaces
-.SH SYNOPSIS
-.B expand
-.RB [ \-t
-.IR n ]
-.RI [ file ...]
-.SH DESCRIPTION
-expand processes the named files or the standard input, writing the
-standard output with tabs changed into spaces. Backspace characters
-are preserved into the output and decrement the column count for tab
-calculations.
-.SH OPTIONS
-.TP
-.BI \-i
-Only change tabs to spaces at the start of lines.
-.TP
-.BI \-t " n"
-Expand tabs to
-.I n
-spaces. We currently support only a single numerical argument.
-.SH SEE ALSO
-.IR unexpand (1),
-.IR fold (1)
+.Dd January 25, 2015
+.Dt EXPAND 1 sbase\-VERSION
+.Sh NAME
+.Nm expand
+.Nd expand tabs to spaces
+.Sh SYNOPSIS
+.Nm expand
+.Op Fl i
+.Op Fl t Ar tablist
+.Op Ar file ...
+.Sh DESCRIPTION
+.Nm
+converts tabs to spaces in each
+.Ar file
+as specified in
+.Ar tablist .
+If no file is given,
+.Nm
+reads from stdin.
+.Pp
+Backspace characters are preserved and decrement the column count
+for tab calculations.
+.Sh OPTIONS
+.Bl -tag -width Ds
+.It Fl i
+Only expand tabs at the beginning of lines, i.e. expand each
+line until a character different from '\et' and ' ' is reached.
+.It Fl t Ar tablist
+Specify tab size or tabstops.
+.Ar tablist
+is a list of one (in the former case) or multiple (in the latter case)
+strictly positive integers separated by ' ' or ','.
+.Pp
+The default
+.Ar tablist
+is "8".
+.El
+.Sh SEE ALSO
+.Xr unexpand 1 ,
+.Xr fold 1
+.Sh STANDARDS
+The
+.Nm
+utility is compliant with the
+.St -p1003.1-2008
+specification.
+.Pp
+The
+.Op Fl i
+flag is an extension to that specification
diff --git a/expand.c b/expand.c
index 35b7a9a..78b5454 100644
--- a/expand.c
+++ b/expand.c
_AT_@ -1,89 +1,86 @@
 /* See LICENSE file for copyright and license details. */
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 
 #include "utf.h"
 #include "util.h"
 
-static int expand(const char *, FILE *, int);
+static int iflag = 0;
+static size_t *tablist = NULL;
+static size_t tablistlen = 0;
 
-static int iflag = 0;
-
-static void
-usage(void)
+static size_t
+parselist(const char *s, size_t slen)
 {
- eprintf("usage: %s [-i] [-t n] [file...]\n", argv0);
-}
-
-int
-main(int argc, char *argv[])
-{
- FILE *fp;
- int tabstop = 8;
- int ret = 0;
+ size_t i, m, len;
+ char *sep;
 
- ARGBEGIN {
- case 'i':
- iflag = 1;
- break;
- case 't':
- tabstop = estrtol(EARGF(usage()), 0);
- if (!tabstop)
- eprintf("tab size cannot be zero\n");
- break;
- default:
- usage();
- } ARGEND;
+ if (s[0] == ',' || s[0] == ' ')
+ eprintf("expand: tablist can't begin with a ',' or ' '.\n");
+ if (s[slen - 1] == ',' || s[slen - 1] == ' ')
+ eprintf("expand: tablist can't end with a ',' or ' '.\n");
 
- if (argc == 0) {
- expand("<stdin>", stdin, tabstop);
- } else {
- for (; argc > 0; argc--, argv++) {
- if (!(fp = fopen(argv[0], "r"))) {
- weprintf("fopen %s:", argv[0]);
- ret = 1;
- continue;
- }
- expand(argv[0], fp, tabstop);
- fclose(fp);
+ len = 1;
+ for (i = 0; i < slen; i++) {
+ if (s[i] == ',' || s[i] == ' ') {
+ if (i > 0 && (s[i - 1] == ',' || s[i - 1] == ' '))
+ eprintf("expand: empty field in tablist.\n");
+ len++;
                 }
         }
- return ret;
+ tablist = emalloc((len + 1) * sizeof(size_t));
+
+ m = 0;
+ for (i = 0; i < slen; i += sep - (s + i) + 1) {
+ tablist[m++] = strtol(s + i, &sep, 0);
+ if (tablist[m - 1] == 0)
+ eprintf("expand: tab size can't be zero.\n");
+ if (*sep && *sep != ',' && *sep != ' ')
+ eprintf("expand: invalid number in tablist.\n");
+ if (m > 1 && tablist[m - 1] < tablist[m - 2])
+ eprintf("expand: tablist must be ascending.\n");
+ }
+
+ /* tab length = 1 for the overflowing case later in the matcher */
+ tablist[len] = 1;
+ return len;
 }
 
 static int
-expand(const char *file, FILE *fp, int tabstop)
+expand(const char *file, FILE *fp)
 {
- int col = 0;
+ size_t bol = 1, col = 0, i;
         Rune r;
- int bol = 1;
-
- for (;;) {
- if (!readrune(file, fp, &r))
- break;
 
+ while (readrune(file, fp, &r)) {
                 switch (r) {
                 case '\t':
+ if (tablistlen == 1)
+ i = 0;
+ else for (i = 0; i < tablistlen; i++)
+ if (col < tablist[i])
+ break;
                         if (bol || !iflag) {
                                 do {
                                         col++;
                                         putchar(' ');
- } while (col % tabstop);
+ } while (col % tablist[i]);
                         } else {
                                 putchar('\t');
- col += tabstop - col % tabstop;
+ col = tablist[i];
                         }
                         break;
                 case '\b':
+ bol = 0;
                         if (col)
                                 col--;
- bol = 0;
- writerune("<stdout>", stdout, &r);
+ putchar('\b');
                         break;
                 case '\n':
- col = 0;
                         bol = 1;
- writerune("<stdout>", stdout, &r);
+ col = 0;
+ putchar('\n');
                         break;
                 default:
                         col++;
_AT_@ -96,3 +93,47 @@ expand(const char *file, FILE *fp, int tabstop)
 
         return 0;
 }
+
+static void
+usage(void)
+{
+ eprintf("usage: %s [-i] [-t tablist] [file ...]\n", argv0);
+}
+
+int
+main(int argc, char *argv[])
+{
+ FILE *fp;
+ char *tl = "8";
+ int ret = 0;
+
+ ARGBEGIN {
+ case 'i':
+ iflag = 1;
+ break;
+ case 't':
+ tl = EARGF(usage());
+ if (!*tl)
+ eprintf("expand: tablist cannot be empty.\n");
+ break;
+ default:
+ usage();
+ } ARGEND;
+
+ tablistlen = parselist(tl, strlen(tl));
+
+ if (argc == 0)
+ expand("<stdin>", stdin);
+ else {
+ for (; argc > 0; argc--, argv++) {
+ if (!(fp = fopen(argv[0], "r"))) {
+ weprintf("fopen %s:", argv[0]);
+ ret = 1;
+ continue;
+ }
+ expand(argv[0], fp);
+ fclose(fp);
+ }
+ }
+ return ret;
+}
Received on Tue Mar 24 2015 - 23:53:16 CET

This archive was generated by hypermail 2.3.0 : Wed Mar 25 2015 - 00:02:57 CET