[dev] [sbase] [PATCH] Add cut(1)

From: Truls Becken <truls.becken_AT_gmail.com>
Date: Tue, 8 Oct 2013 21:23:43 +0200

---
LICENSE  |   1 +
Makefile |   1 +
cut.1    |  60 +++++++++++++++++++++++
cut.c    | 164 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 226 insertions(+)
create mode 100644 cut.1
create mode 100644 cut.c
diff --git a/LICENSE b/LICENSE
index 927f594..36f0d2c 100644
--- a/LICENSE
+++ b/LICENSE
_AT_@ -14,6 +14,7 @@ MIT/X Consortium License
© 2012 Robert Ransom <rransom.8774_AT_gmail.com>
© 2013 Jakob Kramer <jakob.kramer_AT_gmx.de>
© 2013 Anselm R Garbe <anselm_AT_garbe.us>
+© 2013 Truls Becken <truls.becken_AT_gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
diff --git a/Makefile b/Makefile
index 3c4840d..ce7d95a 100644
--- a/Makefile
+++ b/Makefile
_AT_@ -36,6 +36,7 @@ SRC = \
	cmp.c      \
	comm.c     \
	cp.c       \
+	cut.c      \
	date.c     \
	dirname.c  \
	echo.c     \
diff --git a/cut.1 b/cut.1
new file mode 100644
index 0000000..0e78ddd
--- /dev/null
+++ b/cut.1
_AT_@ -0,0 +1,60 @@
+.TH CUT 1 sbase\-VERSION
+.SH NAME
+cut \- extract columns of data
+.SH SYNOPSIS
+.B cut \-b
+.I list
+.RB [ \-n ]
+.RI [ file ...]
+.br
+.B cut \-c
+.I list
+.RI [ file ...]
+.br
+.B cut \-f
+.I list
+.RB [ \-d
+.IR delim ]
+.RB [ \-s ]
+.RI [ file ...]
+.SH DESCRIPTION
+.B cut
+out bytes, characters, or delimited fields from each line of the given
+files and write to stdout. With no file, or when file is `-', cut reads
+from stdin.
+.P
+.I list
+is a comma or space separated list of numbers and ranges where numbering
+starts from 1. Ranges are on the form `N-M'. If N or M is missing, the
+beginning or end of line is assumed. Numbers and ranges may be repeated,
+overlapping, and in any order. Selected input is written in the same
+order that it is read, and is written exactly once.
+.SH OPTIONS
+.TP
+.BI \-b \ list
+The
+.I list
+specifies byte positions.
+.TP
+.BI \-c \ list
+The
+.I list
+specifies character positions.
+.TP
+.BI \-d \ delim
+Use first byte of
+.I delim
+as field delimiter, instead of tab.
+.TP
+.BI \-f \ list
+The
+.I list
+specifies field numbers. Lines not containing field delimiters are
+passed through untouched.
+.TP
+.B \-n
+Do not split characters. A character is output if its last byte is
+selected.
+.TP
+.B \-s
+Suppress lines not containing field delimiters.
diff --git a/cut.c b/cut.c
new file mode 100644
index 0000000..72c20bc
--- /dev/null
+++ b/cut.c
_AT_@ -0,0 +1,164 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "text.h"
+#include "util.h"
+
+static void
+usage(void)
+{
+	eprintf("usage: cut -b list [-n] [file...]\n"
+	        "       cut -c list [file...]\n"
+	        "       cut -f list [-d delim] [-s] [file...]\n");
+}
+
+typedef struct Range {
+	size_t min, max;
+	struct Range *next;
+} Range;
+
+static Range *list = NULL;
+static char mode = 0;
+static char delim = '\t';
+static bool nflag = false;
+static bool sflag = false;
+
+static void
+insert(Range *r)
+{
+	Range *l, *p, *t;
+
+	for(p = NULL, l = list; l; p = l, l = l->next) {
+		if(r->max && r->max+1 < l->min) {
+			r->next = l;
+			break;
+		} else if(!l->max || r->min < l->max+2) {
+			l->min = MIN(r->min, l->min);
+			for(p = l, t = l->next; t; p = t, t = t->next)
+				if(r->max && r->max+1 < t->min) break;
+			l->max = (p->max && r->max) ? MAX(p->max, r->max) : 0;
+			l->next = t;
+			return;
+		}
+	}
+	if(p) p->next = r; else list = r;
+}
+
+static void
+parselist(char *str)
+{
+	char *s;
+	size_t n = 1;
+	Range *r;
+
+	for(s = str; *s; s++) {
+		if(*s == ' ') *s = ',';
+		if(*s == ',') n++;
+	}
+	if(!(r = malloc(n * sizeof(Range))))
+		eprintf("malloc:");
+	for(s = str; n; n--, s++) {
+		r->min = (*s == '-') ? 1 : strtoul(s, &s, 10);
+		r->max = (*s == '-') ? strtoul(++s, &s, 10) : r->min;
+		r->next = NULL;
+		if(!r->min || (r->max && r->max < r->min) || (*s && *s != ','))
+			eprintf("cut: bad list value\n");
+		insert(r++);
+	}
+}
+
+static size_t
+seek(const char *s, size_t pos, size_t *prev, size_t count)
+{
+	const char *t;
+	size_t n = pos - *prev;
+
+	if(mode == 'b') {
+		if((t = memchr(s, 0, n)))
+			return t - s;
+		if(nflag)
+			while(n && !UTF8_POINT(s[n])) n--;
+		*prev += n;
+		return n;
+	} else if(mode == 'c') {
+		for(n++, t = s; *t; t++)
+			if(UTF8_POINT(*t) && !--n) break;
+	} else {
+		for(t = (count < 2) ? s : s+1; n && *t; t++)
+			if(*t == delim && !--n && count) break;
+	}
+	*prev = pos;
+	return t - s;
+}
+
+static void
+cut(FILE *fp)
+{
+	static char *buf = NULL;
+	static size_t size = 0;
+	char *s;
+	size_t i, n, p;
+	Range *r;
+
+	while(afgets(&buf, &size, fp)) {
+		if(buf[i = strlen(buf)-1] == '\n')
+			buf[i] = 0;
+		if(mode == 'f' && !strchr(buf, delim)) {
+			if(!sflag)
+				puts(buf);
+			continue;
+		}
+		for(i = 0, p = 1, s = buf, r = list; r; r = r->next, s += n) {
+			s += seek(s, r->min, &p, i++);
+			if(!*s) break;
+			if(!r->max) {
+				fputs(s, stdout);
+				break;
+			}
+			n = seek(s, r->max + 1, &p, i++);
+			if(fwrite(s, 1, n, stdout) != n)
+				eprintf("write error:");
+		}
+		putchar('\n');
+	}
+}
+
+int
+main(int argc, char *argv[])
+{
+	FILE *fp;
+
+	ARGBEGIN {
+	case 'b':
+	case 'c':
+	case 'f':
+		mode = ARGC();
+		parselist(ARGF());
+		break;
+	case 'd':
+		delim = *ARGF();
+		break;
+	case 'n':
+		nflag = true;
+		break;
+	case 's':
+		sflag = true;
+		break;
+	default:
+		usage();
+	} ARGEND;
+
+	if(!mode)
+		usage();
+	if(!argc)
+		cut(stdin);
+	else for(; argc--; argv++) {
+		if(!(fp = strcmp(*argv, "-") ? fopen(*argv, "r") : stdin))
+			eprintf("fopen %s:", *argv);
+		cut(fp);
+		fclose(fp);
+	}
+	return EXIT_SUCCESS;
+}
-- 
1.8.3.1
Received on Tue Oct 08 2013 - 21:23:43 CEST

This archive was generated by hypermail 2.3.0 : Tue Oct 08 2013 - 21:36:03 CEST