[dev] [PATCH] sbase: add cut

From: Strake <strake888_AT_gmail.com>
Date: Tue, 31 Jul 2012 23:07:11 -0500

Will now need libutf.

diff -r 8cf300476909 Makefile
--- a/Makefile Sat Jun 09 18:53:39 2012 +0100
+++ b/Makefile Tue Jul 31 23:06:28 2012 -0500
_AT_@ -27,6 +27,7 @@
         cksum.c \
         cmp.c \
         cp.c \
+ cut.c \
         date.c \
         dirname.c \
         echo.c \
diff -r 8cf300476909 config.mk
--- a/config.mk Sat Jun 09 18:53:39 2012 +0100
+++ b/config.mk Tue Jul 31 23:06:28 2012 -0500
_AT_@ -10,7 +10,7 @@
 LD = $(CC)
 CPPFLAGS = -D_POSIX_C_SOURCE=200112L
 CFLAGS = -g -ansi -Wall -pedantic $(CPPFLAGS)
-LDFLAGS = -g
+LDFLAGS = -g -lutf

 #CC = tcc
 #LD = $(CC)
diff -r 8cf300476909 cut.1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cut.1 Tue Jul 31 23:06:28 2012 -0500
_AT_@ -0,0 +1,51 @@
+.TH CUT 1
+.SH NAME
+cut \- select columns of file
+.SH SYNOPSIS
+.B cut -f
+.I ranges
+[
+.B -d
+.I delimiter
+]
+[
+.I file ...
+]
+.br
+.B cut -c
+.I ranges
+[
+.I file ...
+]
+.br
+.B cut -b
+.I ranges
+[
+.I file ...
+]
+.br
+.SH OPERATION
+Cut reads from given files, or stdin if no files given, and for each
line selects
+.TP
+.B columns,
+with -f flag
+.TP
+.B characters,
+with -c flag
+.TP
+.B bytes,
+with -b flag
+.LP
+within given comma- or space-delimited ranges.
+.LP
+.br
+Each range is either a single decimal number, or of this form:
+.br
+.I x
+-
+.I y
+.br
+where x and y are decimal numbers, or empty.
+Empty x means first, and empty y means last column/character/byte on the line.
+.LP
+If -d option given, then the first character of its argument is the
delimiter; otherwise it is tab.
diff -r 8cf300476909 cut.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cut.c Tue Jul 31 23:06:28 2012 -0500
_AT_@ -0,0 +1,192 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <utf.h>
+#include "text.h"
+
+typedef struct {
+ int min, max;
+} range;
+
+int inRange (range r, unsigned int n) {
+ if ((0 == r.max || n <= r.max) && n >= r.min) return 1;
+ else return 0;
+}
+
+int fputrune (Rune r, FILE *f) {
+ char x[UTFmax];
+ int n;
+ n = runetochar (x, &r);
+ fwrite (x, 1, n, f);
+ return n;
+}
+
+void cutLineF (Rune d, unsigned int s, range *rs, char *x) {
+ int ii, n;
+ if (!utfrune (x, d)) {
+ if (!s) fputs (x, stdout);
+ return;
+ }
+ /* kludge; be warned */
+ for (ii = 0; rs[ii].min; ii++) {
+ char *y;
+ y = x;
+ for (n = 1; y; n++) {
+ char *z;
+ char ch;
+ z = utfrune (y, d);
+ if (z) {
+ ch = *z;
+ *z = 0;
+ }
+ if (inRange (rs[ii], n)) {
+ fputs (y, stdout);
+ fputrune (d, stdout);
+ }
+ if (z) {
+ *z = ch;
+ z += runelen (d);
+ }
+ y = z;
+ }
+ }
+}
+
+void cutLineC (range *rs, char *x) {
+ Rune _r;
+ int ii, n;
+ for (ii = 0; rs[ii].min; ii++) {
+ char *y;
+ y = x;
+ for (n = 1; *y; n++) {
+ int l = chartorune (&_r, y);
+ if (inRange (rs[ii], n)) fwrite (y, 1, l, stdout);
+ y += l;
+ }
+ }
+}
+
+void cutLineB (range *rs, char *x) {
+ int ii, n;
+ for (ii = 0; rs[ii].min; ii++) {
+ for (n = rs[ii].min - 1; rs[ii].max ? n < rs[ii].max : x[n]; n++) {
+ fputc (x[n], stdout);
+ }
+ }
+}
+
+void go (int mode, Rune d, unsigned int s, range *rs) {
+ char *x;
+ size_t size = 0;
+ x = 0;
+
+ while (afgets (&x, &size, stdin)) {
+ int ii;
+ /* must delete newline here, and redo later;
+ otherwise, unknown whether it was included in cut */
+ for (ii = 0; x[ii]; ii++) if (x[ii] == '\n') x[ii] = 0;
+ switch (mode) {
+ case 'f':
+ if (!utfrune (x, d)) {
+ if (!s) {
+ fputs (x, stdout);
+ fputc ('\n', stdout);
+ }
+ }
+ else {
+ cutLineF (d, s, rs, x);
+ fputc ('\n', stdout);
+ }
+ break;
+ case 'c': cutLineC (rs, x); fputc ('\n', stdout); break;
+ case 'b': cutLineB (rs, x); fputc ('\n', stdout); break;
+ }
+ }
+}
+
+int main (int argc, char *argu[]) {
+ int mode = 0;
+ Rune d = '\t';
+ unsigned int s = 0;
+ range *rs = 0;
+ int ii;
+
+ /* parse options */
+ for (ii = 1; ii < argc; ii++) {
+ int jj;
+ if (argu[ii][0] != '-') break;
+ for (jj = 1; argu[ii][jj]; jj++) switch (argu[ii][jj]) {
+ case 'b':
+ case 'c':
+ case 'f':
+ mode = argu[ii][jj];
+
+ if (++ii >= argc) {
+ fputs ("No range argument\n", stderr);
+ return 1;
+ }
+
+ rs = malloc (sizeof (range) * (utflen (argu[ii]) + 1));
+ if (!rs) {
+ fputs ("Failed to allocate memory\n", stderr);
+ return 1;
+ }
+
+ /* ensure space delimitation for strtoul */
+ for (jj = 0; argu[ii][jj]; jj++) if (argu[ii][jj] == ',')
argu[ii][jj] = ' ';
+
+ /* parse ranges */
+ /* max = 0 to denote last */
+ {
+ char *p;
+ p = argu[ii];
+ jj = 0;
+ while (*p) {
+ rs[jj].min = *p == '-' ? 1 : strtoul (p, &p, 10);
+ rs[jj].max = *p == '-' ? strtoul (++p, &p, 10) : rs[jj].min;
+ switch (*p) {
+ case '\0':
+ rs[++jj].min = 0;
+ break;
+ case ' ':
+ case '\f':
+ case '\v':
+ case '\t':
+ case '\r':
+ case '\n':
+ jj++;
+ break;
+ default:
+ fprintf (stderr, "Malformed ranges\n");
+ return 1;
+ }
+ }
+ }
+ goto nextArgument;
+ case 'd':
+ chartorune (&d, argu[++ii]);
+ goto nextArgument;
+ case 's':
+ s = 1;
+ break;
+ }
+nextArgument: ;
+ }
+
+ if (!mode) {
+ fprintf (stderr, "No mode given\n");
+ return 1;
+ }
+
+ if (ii < argc) {
+ for (; ii < argc; ii++) {
+ if (!freopen (argu[ii], "r", stdin)) {
+ fprintf (stderr, "Failed to open file %s\n", argu[ii]);
+ return 1;
+ }
+ go (mode, d, s, rs);
+ }
+ }
+ else go (mode, d, s, rs);
+
+ return 0;
+}
Received on Wed Aug 01 2012 - 06:07:11 CEST

This archive was generated by hypermail 2.3.0 : Wed Aug 01 2012 - 06:12:04 CEST