[hackers] [sbase] Add -m and -o flags to sort(1) || FRIGN

From: <git_AT_suckless.org>
Date: Mon, 23 Mar 2015 18:35:50 +0100 (CET)

commit cfd95178171d80e4b71172662c2a22f04e6fdb44
Author: FRIGN <dev_AT_frign.de>
Date: Sun Mar 22 23:37:37 2015 +0100

    Add -m and -o flags to sort(1)
    
    Sort comes pretty much automatically, as no script relies on the
    undefined behaviour of the input _not_ being sorted, we might as well
    sort the sorted input already.
    The only downside is memory usage, which can be an issue for large
    files.
    The o-flag was trivial to implement.

diff --git a/README b/README
index bbb6372..184f029 100644
--- a/README
+++ b/README
_AT_@ -66,7 +66,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
 =*| sha256sum non-posix none
 =*| sha512sum non-posix none
 =*| sleep yes none
- sort no -m, -o, -d, -f, -i
+ sort no -d, -f, -i
 =*| split yes none
 =*| sponge non-posix none
 #*| strings yes none
diff --git a/sort.1 b/sort.1
index 9b40cb5..3d40280 100644
--- a/sort.1
+++ b/sort.1
_AT_@ -1,4 +1,4 @@
-.Dd January 30, 2015
+.Dd March 22, 2015
 .Dt SORT 1
 .Os sbase
 .Sh NAME
_AT_@ -6,17 +6,19 @@
 .Nd sort lines
 .Sh SYNOPSIS
 .Nm
-.Op Fl bnru
+.Op Fl Cbcmnru
+.Op Fl o Ar outfile
 .Op Fl t Ar delim
 .Op Fl k Ar key ...
 .Op Ar file ...
 .Sh DESCRIPTION
 .Nm
-writes the sorted concatenation of the given
-.Ar files
-to stdout. If no
+writes the sorted concatenation of each
+.Ar file
+to stdout.
+If no
 .Ar file
-is given,
+is given
 .Nm
 reads from stdin.
 .Sh OPTIONS
_AT_@ -31,10 +33,10 @@ Skip leading whitespace of columns when sorting.
 .It Fl c
 The same as
 .Fl C
-except that when disorder is detected, a message is printed to stderr
+except that when disorder is detected, a message is written to stderr
 indicating the location of the disorder.
 .It Fl k Ar key
-Specifies a key definition of the form
+Specify a key definition of the form
 .Sm off
 .Sy S
 .No [.
_AT_@ -67,12 +69,20 @@ can be used to specify options
 that only apply to this key definition.
 .Sy b
 is special in that it only applies to the column that it was specified after.
+.It Fl m
+Assume sorted input, merge only.
 .It Fl n
 Perform a numeric sort.
+.It Fl o Ar outfile
+Write output to
+.Ar outfile
+rather than stdout.
 .It Fl r
 Reverses the sort.
 .It Fl t Ar delim
-Specifies the field delimiter.
+Set
+.Ar delim
+as the field delimiter.
 .It Fl u
-Prints equal lines only once.
+Print equal lines only once.
 .El
diff --git a/sort.c b/sort.c
index 40b002d..cea25aa 100644
--- a/sort.c
+++ b/sort.c
_AT_@ -107,6 +107,7 @@ linecmp(const char **a, const char **b)
                 free(s1);
                 free(s2);
         }
+
         return res;
 }
 
_AT_@ -127,6 +128,8 @@ parse_flags(char **s, int *flags, int bflag)
                 default:
                         return -1;
                 }
+ }
+
         return 0;
 }
 
_AT_@ -163,9 +166,8 @@ parse_keydef(struct keydef *kd, char *s, int flags)
                 if (parse_flags(&rest, &kd->flags, MOD_ENDB) < 0)
                         return -1;
         }
- if (*rest != '\0')
- return -1;
- return 0;
+
+ return -(*rest);
 }
 
 static char *
_AT_@ -173,6 +175,7 @@ skipblank(char *s)
 {
         while (*s && isblank(*s))
                 s++;
+
         return s;
 }
 
_AT_@ -224,16 +227,17 @@ columns(char *line, const struct keydef *kd)
 static void
 usage(void)
 {
- enprintf(2, "usage: %s [-Cbcnru] [-t delim] [-k def]... [file...]\n", argv0);
+ enprintf(2, "usage: %s [-Cbcmnru] [-o outfile] [-t delim] [-k def]... [file ...]\n", argv0);
 }
 
 int
 main(int argc, char *argv[])
 {
- size_t i;
- FILE *fp;
+ FILE *fp, *ofp = stdout;
         struct linebuf linebuf = EMPTY_LINEBUF;
+ size_t i;
         int global_flags = 0;
+ char *outfile = NULL;
 
         ARGBEGIN {
         case 'C':
_AT_@ -248,9 +252,19 @@ main(int argc, char *argv[])
         case 'k':
                 addkeydef(EARGF(usage()), global_flags);
                 break;
+ case 'm':
+ /* more or less for free, but for perfomance-reasons,
+ * we should keep this flag in mind and maybe some later
+ * day implement it properly so we don't run out of memory
+ * while merging large sorted files.
+ */
+ break;
         case 'n':
                 global_flags |= MOD_N;
                 break;
+ case 'o':
+ outfile = EARGF(usage());
+ break;
         case 'r':
                 global_flags |= MOD_R;
                 break;
_AT_@ -270,15 +284,15 @@ main(int argc, char *argv[])
                 addkeydef("1", global_flags);
         addkeydef("1", global_flags & MOD_R);
 
- if (argc == 0) {
+ if (!argc) {
                 if (Cflag || cflag) {
                         check(stdin);
                 } else {
                         getlines(stdin, &linebuf);
                 }
- } else for (; argc > 0; argc--, argv++) {
- if (!(fp = fopen(argv[0], "r"))) {
- enprintf(2, "fopen %s:", argv[0]);
+ } else for (; *argv; argc--, argv++) {
+ if (!(fp = fopen(*argv, "r"))) {
+ enprintf(2, "fopen %s:", *argv);
                         continue;
                 }
                 if (Cflag || cflag) {
_AT_@ -290,13 +304,16 @@ main(int argc, char *argv[])
         }
 
         if (!Cflag && !cflag) {
+ if (outfile && !(ofp = fopen(outfile, "w")))
+ eprintf("fopen %s:", outfile);
+
                 qsort(linebuf.lines, linebuf.nlines, sizeof *linebuf.lines,
                                 (int (*)(const void *, const void *))linecmp);
 
                 for (i = 0; i < linebuf.nlines; i++) {
                         if (!uflag || i == 0 || linecmp((const char **)&linebuf.lines[i],
                                                 (const char **)&linebuf.lines[i-1])) {
- fputs(linebuf.lines[i], stdout);
+ fputs(linebuf.lines[i], ofp);
                         }
                 }
         }
Received on Mon Mar 23 2015 - 18:35:50 CET

This archive was generated by hypermail 2.3.0 : Mon Mar 23 2015 - 18:36:26 CET