--- LICENSE | 1 + Makefile | 1 + cut.1 | 60 +++++++++++++++++++++++ cut.c | 164 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 226 insertions(+) create mode 100644 cut.1 create mode 100644 cut.c diff --git a/LICENSE b/LICENSE index 927f594..36f0d2c 100644 --- a/LICENSE +++ b/LICENSE _AT_@ -14,6 +14,7 @@ MIT/X Consortium License © 2012 Robert Ransom <rransom.8774_AT_gmail.com> © 2013 Jakob Kramer <jakob.kramer_AT_gmx.de> © 2013 Anselm R Garbe <anselm_AT_garbe.us> +© 2013 Truls Becken <truls.becken_AT_gmail.com> Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), diff --git a/Makefile b/Makefile index 3c4840d..ce7d95a 100644 --- a/Makefile +++ b/Makefile _AT_@ -36,6 +36,7 @@ SRC = \ cmp.c \ comm.c \ cp.c \ + cut.c \ date.c \ dirname.c \ echo.c \ diff --git a/cut.1 b/cut.1 new file mode 100644 index 0000000..0e78ddd --- /dev/null +++ b/cut.1 _AT_@ -0,0 +1,60 @@ +.TH CUT 1 sbase\-VERSION +.SH NAME +cut \- extract columns of data +.SH SYNOPSIS +.B cut \-b +.I list +.RB [ \-n ] +.RI [ file ...] +.br +.B cut \-c +.I list +.RI [ file ...] +.br +.B cut \-f +.I list +.RB [ \-d +.IR delim ] +.RB [ \-s ] +.RI [ file ...] +.SH DESCRIPTION +.B cut +out bytes, characters, or delimited fields from each line of the given +files and write to stdout. With no file, or when file is `-', cut reads +from stdin. +.P +.I list +is a comma or space separated list of numbers and ranges where numbering +starts from 1. Ranges are on the form `N-M'. If N or M is missing, the +beginning or end of line is assumed. Numbers and ranges may be repeated, +overlapping, and in any order. Selected input is written in the same +order that it is read, and is written exactly once. +.SH OPTIONS +.TP +.BI \-b \ list +The +.I list +specifies byte positions. +.TP +.BI \-c \ list +The +.I list +specifies character positions. +.TP +.BI \-d \ delim +Use first byte of +.I delim +as field delimiter, instead of tab. +.TP +.BI \-f \ list +The +.I list +specifies field numbers. Lines not containing field delimiters are +passed through untouched. +.TP +.B \-n +Do not split characters. A character is output if its last byte is +selected. +.TP +.B \-s +Suppress lines not containing field delimiters. diff --git a/cut.c b/cut.c new file mode 100644 index 0000000..72c20bc --- /dev/null +++ b/cut.c _AT_@ -0,0 +1,164 @@ +/* See LICENSE file for copyright and license details. */ +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "text.h" +#include "util.h" + +static void +usage(void) +{ + eprintf("usage: cut -b list [-n] [file...]\n" + " cut -c list [file...]\n" + " cut -f list [-d delim] [-s] [file...]\n"); +} + +typedef struct Range { + size_t min, max; + struct Range *next; +} Range; + +static Range *list = NULL; +static char mode = 0; +static char delim = '\t'; +static bool nflag = false; +static bool sflag = false; + +static void +insert(Range *r) +{ + Range *l, *p, *t; + + for(p = NULL, l = list; l; p = l, l = l->next) { + if(r->max && r->max+1 < l->min) { + r->next = l; + break; + } else if(!l->max || r->min < l->max+2) { + l->min = MIN(r->min, l->min); + for(p = l, t = l->next; t; p = t, t = t->next) + if(r->max && r->max+1 < t->min) break; + l->max = (p->max && r->max) ? MAX(p->max, r->max) : 0; + l->next = t; + return; + } + } + if(p) p->next = r; else list = r; +} + +static void +parselist(char *str) +{ + char *s; + size_t n = 1; + Range *r; + + for(s = str; *s; s++) { + if(*s == ' ') *s = ','; + if(*s == ',') n++; + } + if(!(r = malloc(n * sizeof(Range)))) + eprintf("malloc:"); + for(s = str; n; n--, s++) { + r->min = (*s == '-') ? 1 : strtoul(s, &s, 10); + r->max = (*s == '-') ? strtoul(++s, &s, 10) : r->min; + r->next = NULL; + if(!r->min || (r->max && r->max < r->min) || (*s && *s != ',')) + eprintf("cut: bad list value\n"); + insert(r++); + } +} + +static size_t +seek(const char *s, size_t pos, size_t *prev, size_t count) +{ + const char *t; + size_t n = pos - *prev; + + if(mode == 'b') { + if((t = memchr(s, 0, n))) + return t - s; + if(nflag) + while(n && !UTF8_POINT(s[n])) n--; + *prev += n; + return n; + } else if(mode == 'c') { + for(n++, t = s; *t; t++) + if(UTF8_POINT(*t) && !--n) break; + } else { + for(t = (count < 2) ? s : s+1; n && *t; t++) + if(*t == delim && !--n && count) break; + } + *prev = pos; + return t - s; +} + +static void +cut(FILE *fp) +{ + static char *buf = NULL; + static size_t size = 0; + char *s; + size_t i, n, p; + Range *r; + + while(afgets(&buf, &size, fp)) { + if(buf[i = strlen(buf)-1] == '\n') + buf[i] = 0; + if(mode == 'f' && !strchr(buf, delim)) { + if(!sflag) + puts(buf); + continue; + } + for(i = 0, p = 1, s = buf, r = list; r; r = r->next, s += n) { + s += seek(s, r->min, &p, i++); + if(!*s) break; + if(!r->max) { + fputs(s, stdout); + break; + } + n = seek(s, r->max + 1, &p, i++); + if(fwrite(s, 1, n, stdout) != n) + eprintf("write error:"); + } + putchar('\n'); + } +} + +int +main(int argc, char *argv[]) +{ + FILE *fp; + + ARGBEGIN { + case 'b': + case 'c': + case 'f': + mode = ARGC(); + parselist(ARGF()); + break; + case 'd': + delim = *ARGF(); + break; + case 'n': + nflag = true; + break; + case 's': + sflag = true; + break; + default: + usage(); + } ARGEND; + + if(!mode) + usage(); + if(!argc) + cut(stdin); + else for(; argc--; argv++) { + if(!(fp = strcmp(*argv, "-") ? fopen(*argv, "r") : stdin)) + eprintf("fopen %s:", *argv); + cut(fp); + fclose(fp); + } + return EXIT_SUCCESS; +} -- 1.8.3.1Received on Tue Oct 08 2013 - 21:23:43 CEST
This archive was generated by hypermail 2.3.0 : Tue Oct 08 2013 - 21:36:03 CEST