Re: [dev] [PATCH] sbase: add cut

From: Uriel <uriel_AT_berlinblue.org>
Date: Wed, 1 Aug 2012 14:36:46 +0200

Use awk.

On Wed, Aug 1, 2012 at 6:07 AM, Strake <strake888_AT_gmail.com> wrote:
> Will now need libutf.
>
> diff -r 8cf300476909 Makefile
> --- a/Makefile Sat Jun 09 18:53:39 2012 +0100
> +++ b/Makefile Tue Jul 31 23:06:28 2012 -0500
> _AT_@ -27,6 +27,7 @@
> cksum.c \
> cmp.c \
> cp.c \
> + cut.c \
> date.c \
> dirname.c \
> echo.c \
> diff -r 8cf300476909 config.mk
> --- a/config.mk Sat Jun 09 18:53:39 2012 +0100
> +++ b/config.mk Tue Jul 31 23:06:28 2012 -0500
> _AT_@ -10,7 +10,7 @@
> LD = $(CC)
> CPPFLAGS = -D_POSIX_C_SOURCE=200112L
> CFLAGS = -g -ansi -Wall -pedantic $(CPPFLAGS)
> -LDFLAGS = -g
> +LDFLAGS = -g -lutf
>
> #CC = tcc
> #LD = $(CC)
> diff -r 8cf300476909 cut.1
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/cut.1 Tue Jul 31 23:06:28 2012 -0500
> _AT_@ -0,0 +1,51 @@
> +.TH CUT 1
> +.SH NAME
> +cut \- select columns of file
> +.SH SYNOPSIS
> +.B cut -f
> +.I ranges
> +[
> +.B -d
> +.I delimiter
> +]
> +[
> +.I file ...
> +]
> +.br
> +.B cut -c
> +.I ranges
> +[
> +.I file ...
> +]
> +.br
> +.B cut -b
> +.I ranges
> +[
> +.I file ...
> +]
> +.br
> +.SH OPERATION
> +Cut reads from given files, or stdin if no files given, and for each
> line selects
> +.TP
> +.B columns,
> +with -f flag
> +.TP
> +.B characters,
> +with -c flag
> +.TP
> +.B bytes,
> +with -b flag
> +.LP
> +within given comma- or space-delimited ranges.
> +.LP
> +.br
> +Each range is either a single decimal number, or of this form:
> +.br
> +.I x
> +-
> +.I y
> +.br
> +where x and y are decimal numbers, or empty.
> +Empty x means first, and empty y means last column/character/byte on the line.
> +.LP
> +If -d option given, then the first character of its argument is the
> delimiter; otherwise it is tab.
> diff -r 8cf300476909 cut.c
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/cut.c Tue Jul 31 23:06:28 2012 -0500
> _AT_@ -0,0 +1,192 @@
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <utf.h>
> +#include "text.h"
> +
> +typedef struct {
> + int min, max;
> +} range;
> +
> +int inRange (range r, unsigned int n) {
> + if ((0 == r.max || n <= r.max) && n >= r.min) return 1;
> + else return 0;
> +}
> +
> +int fputrune (Rune r, FILE *f) {
> + char x[UTFmax];
> + int n;
> + n = runetochar (x, &r);
> + fwrite (x, 1, n, f);
> + return n;
> +}
> +
> +void cutLineF (Rune d, unsigned int s, range *rs, char *x) {
> + int ii, n;
> + if (!utfrune (x, d)) {
> + if (!s) fputs (x, stdout);
> + return;
> + }
> + /* kludge; be warned */
> + for (ii = 0; rs[ii].min; ii++) {
> + char *y;
> + y = x;
> + for (n = 1; y; n++) {
> + char *z;
> + char ch;
> + z = utfrune (y, d);
> + if (z) {
> + ch = *z;
> + *z = 0;
> + }
> + if (inRange (rs[ii], n)) {
> + fputs (y, stdout);
> + fputrune (d, stdout);
> + }
> + if (z) {
> + *z = ch;
> + z += runelen (d);
> + }
> + y = z;
> + }
> + }
> +}
> +
> +void cutLineC (range *rs, char *x) {
> + Rune _r;
> + int ii, n;
> + for (ii = 0; rs[ii].min; ii++) {
> + char *y;
> + y = x;
> + for (n = 1; *y; n++) {
> + int l = chartorune (&_r, y);
> + if (inRange (rs[ii], n)) fwrite (y, 1, l, stdout);
> + y += l;
> + }
> + }
> +}
> +
> +void cutLineB (range *rs, char *x) {
> + int ii, n;
> + for (ii = 0; rs[ii].min; ii++) {
> + for (n = rs[ii].min - 1; rs[ii].max ? n < rs[ii].max : x[n]; n++) {
> + fputc (x[n], stdout);
> + }
> + }
> +}
> +
> +void go (int mode, Rune d, unsigned int s, range *rs) {
> + char *x;
> + size_t size = 0;
> + x = 0;
> +
> + while (afgets (&x, &size, stdin)) {
> + int ii;
> + /* must delete newline here, and redo later;
> + otherwise, unknown whether it was included in cut */
> + for (ii = 0; x[ii]; ii++) if (x[ii] == '\n') x[ii] = 0;
> + switch (mode) {
> + case 'f':
> + if (!utfrune (x, d)) {
> + if (!s) {
> + fputs (x, stdout);
> + fputc ('\n', stdout);
> + }
> + }
> + else {
> + cutLineF (d, s, rs, x);
> + fputc ('\n', stdout);
> + }
> + break;
> + case 'c': cutLineC (rs, x); fputc ('\n', stdout); break;
> + case 'b': cutLineB (rs, x); fputc ('\n', stdout); break;
> + }
> + }
> +}
> +
> +int main (int argc, char *argu[]) {
> + int mode = 0;
> + Rune d = '\t';
> + unsigned int s = 0;
> + range *rs = 0;
> + int ii;
> +
> + /* parse options */
> + for (ii = 1; ii < argc; ii++) {
> + int jj;
> + if (argu[ii][0] != '-') break;
> + for (jj = 1; argu[ii][jj]; jj++) switch (argu[ii][jj]) {
> + case 'b':
> + case 'c':
> + case 'f':
> + mode = argu[ii][jj];
> +
> + if (++ii >= argc) {
> + fputs ("No range argument\n", stderr);
> + return 1;
> + }
> +
> + rs = malloc (sizeof (range) * (utflen (argu[ii]) + 1));
> + if (!rs) {
> + fputs ("Failed to allocate memory\n", stderr);
> + return 1;
> + }
> +
> + /* ensure space delimitation for strtoul */
> + for (jj = 0; argu[ii][jj]; jj++) if (argu[ii][jj] == ',')
> argu[ii][jj] = ' ';
> +
> + /* parse ranges */
> + /* max = 0 to denote last */
> + {
> + char *p;
> + p = argu[ii];
> + jj = 0;
> + while (*p) {
> + rs[jj].min = *p == '-' ? 1 : strtoul (p, &p, 10);
> + rs[jj].max = *p == '-' ? strtoul (++p, &p, 10) : rs[jj].min;
> + switch (*p) {
> + case '\0':
> + rs[++jj].min = 0;
> + break;
> + case ' ':
> + case '\f':
> + case '\v':
> + case '\t':
> + case '\r':
> + case '\n':
> + jj++;
> + break;
> + default:
> + fprintf (stderr, "Malformed ranges\n");
> + return 1;
> + }
> + }
> + }
> + goto nextArgument;
> + case 'd':
> + chartorune (&d, argu[++ii]);
> + goto nextArgument;
> + case 's':
> + s = 1;
> + break;
> + }
> +nextArgument: ;
> + }
> +
> + if (!mode) {
> + fprintf (stderr, "No mode given\n");
> + return 1;
> + }
> +
> + if (ii < argc) {
> + for (; ii < argc; ii++) {
> + if (!freopen (argu[ii], "r", stdin)) {
> + fprintf (stderr, "Failed to open file %s\n", argu[ii]);
> + return 1;
> + }
> + go (mode, d, s, rs);
> + }
> + }
> + else go (mode, d, s, rs);
> +
> + return 0;
> +}
>
Received on Wed Aug 01 2012 - 14:36:46 CEST

This archive was generated by hypermail 2.3.0 : Wed Aug 01 2012 - 14:48:03 CEST