[PATCH] sort: add -k, remove -u

From: Jakob Kramer <jakob.kramer_AT_gmx.de>
Date: Sat, 12 Apr 2014 17:53:10 +0200

Options that are specific to a single key definition are not
supported (e.g. "sort -k 2,3n -k 4,4"). Should you try to specify
such definitions, sort will return with EXIT_FAILURE and an error
message. Instead, all key definitions exclusively use the global
settings.

It always behaves like -b was set.

I removed -u because it does not work the way that it was implemented
here. It should be rewritten so that it checks if the sort function
thinks that the strings were the same.
---
 sort.1 |  20 ++++++--
 sort.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 169 insertions(+), 18 deletions(-)
diff --git a/sort.1 b/sort.1
index 7913357..80fa692 100644
--- a/sort.1
+++ b/sort.1
_AT_@ -3,7 +3,10 @@
 sort \- sort lines
 .SH SYNOPSIS
 .B sort
-.RB [ \-nru ]
+.RB [ \-nr ]
+.RB [ \-k
+.I key
+.R ]...
 .RI [ file ...]
 .SH DESCRIPTION
 .B sort
_AT_@ -17,5 +20,16 @@ perform a numeric sort.
 .B \-r
 reverses the sort.
 .TP
-.B \-u
-prints repeated lines only once.
+.B \-k key
+specifies a key definition of the form \fBS\fR[.\fBs\fR][,\fBE\fR[.\fBe\fR]],
+where
+.B S,
+.B s,
+.B E,
+and
+.B e
+are the starting column, starting character in that column, ending column and
+the ending character of that column respectively.  If they are not specified,
+s refers to the first character of the specified starting column, E refers to
+the last column of every line, and e refers to the last character of that last
+column.
diff --git a/sort.c b/sort.c
index 348e16b..f43464f 100644
--- a/sort.c
+++ b/sort.c
_AT_@ -1,4 +1,5 @@
 /* See LICENSE file for copyright and license details. */
+#include <ctype.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
_AT_@ -7,10 +8,30 @@
 #include "text.h"
 #include "util.h"
 
+struct keydef {
+	unsigned start_column;
+	unsigned end_column;
+	unsigned start_char;
+	unsigned end_char;
+};
+
+struct kdlist {
+	struct keydef keydef;
+	struct kdlist *next;
+};
+
+static struct kdlist *head = NULL;
+static struct kdlist *curr = NULL;
+
+static void addkeydef(char *);
+static void freelist(void);
 static int linecmp(const char **, const char **);
+static char *next_nonblank(char *);
+static char *next_blank(char *);
+static int parse_keydef(struct keydef *, char *);
+static char *columns(char *, const struct keydef *);
 
 static bool rflag = false;
-static bool uflag = false;
 static bool nflag = false;
 
 static struct linebuf linebuf = EMPTY_LINEBUF;
_AT_@ -18,7 +39,7 @@ static struct linebuf linebuf = EMPTY_LINEBUF;
 static void
 usage(void)
 {
-	eprintf("usage: %s [-nru] [file...]\n", argv0);
+	enprintf(2, "usage: %s [-nr] [-k def]... [file...]\n", argv0);
 }
 
 int
_AT_@ -34,18 +55,20 @@ main(int argc, char *argv[])
 	case 'r':
 		rflag = true;
 		break;
-	case 'u':
-		uflag = true;
+	case 'k':
+		addkeydef(EARGF(usage()));
 		break;
 	default:
 		usage();
 	} ARGEND;
 
+	addkeydef("1");
+
 	if(argc == 0) {
 		getlines(stdin, &linebuf);
 	} else for(; argc > 0; argc--, argv++) {
 		if(!(fp = fopen(argv[0], "r"))) {
-			weprintf("fopen %s:", argv[0]);
+			enprintf(2, "fopen %s:", argv[0]);
 			continue;
 		}
 		getlines(fp, &linebuf);
_AT_@ -55,24 +78,138 @@ main(int argc, char *argv[])
 			(int (*)(const void *, const void *))linecmp);
 
 	for(i = 0; i < linebuf.nlines; i++) {
-		if(!uflag || i == 0 || strcmp(linebuf.lines[i],
-					linebuf.lines[i-1]) != 0) {
-			fputs(linebuf.lines[i], stdout);
-		}
+		fputs(linebuf.lines[i], stdout);
 	}
 
+	freelist();
 	return EXIT_SUCCESS;
 }
 
-int
+static void
+addkeydef(char *def)
+{
+	struct kdlist *node;
+
+	node = malloc(sizeof(*node));
+	if(!node)
+		enprintf(2, "malloc:");
+	if(!head)
+		head = node;
+	if(parse_keydef(&node->keydef, def))
+		enprintf(2, "parse_keydef:");
+	if(curr)
+		curr->next = node;
+	node->next = NULL;
+	curr = node;
+}
+
+static void
+freelist(void)
+{
+	struct kdlist *node;
+	struct kdlist *tmp;
+
+	for(node = head; node; node = tmp) {
+		tmp = node->next;
+		free(node);
+	}
+}
+
+static int
 linecmp(const char **a, const char **b)
 {
-	if (nflag) {
-		if (rflag)
-			return strtoul(*b, 0, 10) - strtoul(*a, 0, 10);
+	char *s1, *s2;
+	int res = 0;
+	struct kdlist *node;
+
+	for(node = head; node && res == 0; node = node->next) {
+		s1 = columns((char *)*a, &node->keydef);
+		s2 = columns((char *)*b, &node->keydef);
+
+		/* don't consider modifiers if it's the default key
+		 * definition that was implicitly added */
+		if(!(node == head) && !node->next)
+			res = strcmp(s1, s2);
+		else if(nflag)
+			res = strtoul(s1, 0, 10) - strtoul(s2, 0, 10);
 		else
-			return strtoul(*a, 0, 10) - strtoul(*b, 0, 10);
+			res = strcmp(s1, s2);
+
+		free(s1);
+		free(s2);
+	}
+	return rflag ? -res : res;
+}
+
+static int
+parse_keydef(struct keydef *kd, char *s)
+{
+	char *rest = s;
+	kd->start_column = 1;
+	kd->start_char = 1;
+	/* 0 means end of line */
+	kd->end_column = 0;
+	kd->end_char = 0;
+
+	kd->start_column = strtoul(rest, &rest, 10);
+	if(!kd->start_column)
+		enprintf(2, "starting column cannot be 0\n");
+	if(*rest == '.')
+		kd->start_char = strtoul(rest+1, &rest, 10);
+	if(*rest == ',') {
+		kd->end_column = strtoul(rest+1, &rest, 10);
+		if(kd->end_column < kd->start_column)
+			enprintf(2, ",%u is too small\n", kd->end_column);
 	}
-	return strcmp(*a, *b) * (rflag ? -1 : +1);
+	if(*rest == '.')
+		kd->end_char = strtoul(rest+1, &rest, 10);
+	if(*rest != '\0')
+		return -1;
+	return 0;
 }
 
+static char *
+next_nonblank(char *s)
+{
+	for(; *s && isblank(*s); s++);
+	return s;
+}
+
+static char *
+next_blank(char *s)
+{
+	for(; *s && !isblank(*s); s++);
+	return s;
+}
+
+static char *
+columns(char *line, const struct keydef *kd)
+{
+	char *rest;
+	char *start, *end;
+	unsigned i;
+	for(rest = line, i = 0; i < kd->start_column; i++) {
+		if(i != 0)
+			rest = next_blank(rest);
+		rest = next_nonblank(rest);
+	}
+	for(i = 1; i < kd->start_char && !isblank(*rest); i++, rest++);
+	start = rest;
+
+	if(kd->end_column) {
+		for(rest = line, i = 0; i < kd->end_column; i++) {
+			if(i != 0)
+				rest = next_blank(rest);
+			rest = next_nonblank(rest);
+		}
+		if(kd->end_char) {
+			for(i = 1; i < kd->end_char && *rest && !isblank(*rest); i++, rest++);
+		} else {
+			rest = next_blank(rest);
+		}
+		end = rest;
+	} else {
+		end = rest + strlen(rest);
+	}
+	return strndup(start, end - start);
+}
-- 
1.8.5.1
--------------050008090601030206000706--
Received on Mon Sep 17 2001 - 00:00:00 CEST

This archive was generated by hypermail 2.3.0 : Thu Apr 17 2014 - 15:24:03 CEST