--- Makefile | 1 + TODO | 2 - paste.1 | 122 +++++++++++++++++++++++++++++++++ paste.c | 231 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 354 insertions(+), 2 deletions(-) create mode 100644 paste.1 create mode 100644 paste.c diff --git a/Makefile b/Makefile index c3c36fe..4e1f3e6 100644 --- a/Makefile +++ b/Makefile _AT_@ -43,6 +43,7 @@ SRC = \ mv.c \ nl.c \ nohup.c \ + paste.c \ pwd.c \ rm.c \ sleep.c \ diff --git a/TODO b/TODO index 20be4b8..d6b9e0b 100644 --- a/TODO +++ b/TODO _AT_@ -28,8 +28,6 @@ md5sum [-c] [file...] nice [-n N] [command] -paste [-s] [-d list] [file...] - printenv [variable...] printf [format] [data...] diff --git a/paste.1 b/paste.1 new file mode 100644 index 0000000..983b41f --- /dev/null +++ b/paste.1 _AT_@ -0,0 +1,122 @@ +.TH PASTE 1 paste-VERSION "Apr 2013" +.SH NAME +paste \- merge corresponding or subsequent lines of files +.SH "SYNOPSIS" +.PP +.B paste +[ +.B \-s +] +[ +.B \-d +.I list +] +.I file... +.SH DESCRIPTION +The +.B paste +utility concatenates the corresponding lines of the given input files, +and writes the resulting lines to standard output. The default operation +of +.B paste +concatenates the corresponding lines of the input files. +The newline of every line except the line from the last input file is +replaced with a tab. +If an end-of-file condition is detected on one or more input files, +but not all input files, +.B paste +behaves as though empty lines were read from the files on which +end-of-file was detected, unless the +.B \-s +option is specified. +.SH OPTIONS +.TP +.B \-d list +unless a backslash character appears in +.I list +each character is an element specifying a delimiter. +If a backslash character appears, that and one or more characters +following it are an element specifying a delimiter. +These elements specify one or more characters to use, +instead of the default tab, to replace the newline of the input +lines. The elements in +.I list +are used circularly; that is, when the +.I list +is exhausted the first element from the list is reused. +When the +.B \-s +option is specified, the last newline in a file is not be modified. +The delimiter is reset to the first element of list after each file +operand is processed. +If a backslash character appears in list, it and the character following +it represents the following delimiters: +.RS +.TP +.I \en +newline character +.TP +.I \et +tab character +.TP +.I \e\e +backslash character +.TP +.I \e0 +empty string (not a null character) +.TP +If Any other characters follow the backslash, results are unspecified. +.RE +.TP +.B \-s +concatenate all of the lines of each separate input file in command line +order. The newline of every line except the last line in each input file +are replaced with the tab, unless otherwise specified by the +.B \-d +option. +.PP +If '\-' is specified for one or more input files, the standard input is +used; standard input is read one line at a time, circularly for each +instance of '\-'. +.SH EXIT VALUES +The +.B paste +utility exits 0 on successful completion, and >0 if an error +occurs. +.SH ENVIRONMENT VARIABLES +The following environment variables affect the execution: +.TP +.B LANG +provide a default value for the internationalization variables +that are unset or null. +.TP +.B LC_ALL +if set to a non-empty string value, override the values of all the +other internationalization variables. +.TP +.B LC_CTYPE +determine the locale for the interpretation of sequences of bytes +of text data as characters (for example, single-byte as opposed to +multi-byte characters in arguments and input files). +.TP +.B LC_MESSAGES +determine the locale that should be used to affect the format and +contents of diagnostic messages written to standard error. +.SH CONFORMING TO +The +.B paste +utility is IEEE Std 1003.2 (POSIX.2) compatible. +.SH EXAMPLES +.TP +.I "ls | paste - - - -" +.PP +Write out a directory in four columns. +.TP +.I "paste -s -d '\et\en' file" +.PP +Combine pairs of lines from a file into single lines. +.SH AUTHOR +Written by Lorenzo Cogotti. +.SH SEE ALSO +.BR cut(1) +.BR lam(1) diff --git a/paste.c b/paste.c new file mode 100644 index 0000000..0d0bcdb --- /dev/null +++ b/paste.c _AT_@ -0,0 +1,231 @@ +/* See LICENSE file for copyright and license details. */ +#include <locale.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <wchar.h> +#include "util.h" + +typedef struct { + FILE *fp; + const char *name; +} Fdescr; + +static void eusage(void); +static size_t unescape(wchar_t *); +static wint_t in(Fdescr *); +static void out(wchar_t); +static void sequential(Fdescr *, int, const wchar_t *, size_t); +static void parallel(Fdescr *, int, const wchar_t *, size_t); + +int +main(int argc, char **argv) { + const char *adelim = NULL; + bool seq = false; + wchar_t *delim; + size_t len; + Fdescr *dsc; + int i, c; + + setlocale(LC_CTYPE, ""); + + while((c = getopt(argc, argv, "sd:")) != -1) + switch(c) { + case 's': + seq = true; + break; + case 'd': + adelim = optarg; + break; + case '?': + default: + eusage(); + break; + } + + argc -= optind; + argv += optind; + if(argc == 0) + eusage(); + + /* populate delimeters */ + if(!adelim) + adelim = "\t"; + + len = mbstowcs(NULL, adelim, 0); + if(len == (size_t)-1) + eprintf("invalid delimiter\n"); + + delim = malloc((len + 1) * sizeof(*delim)); + if(!delim) + eprintf("out of memory\n"); + + mbstowcs(delim, adelim, len); + len = unescape(delim); + if(len == 0) + eprintf("no delimiters specified\n"); + + /* populate file list */ + dsc = malloc(argc * sizeof(*dsc)); + if(!dsc) + eprintf("out of memory\n"); + + for(i = 0; i < argc; i++) { + const char *name = argv[i]; + + if(strcmp(name, "-") == 0) + dsc[i].fp = stdin; + else + dsc[i].fp = fopen(name, "r"); + + if(!dsc[i].fp) + eprintf("can't open '%s':", name); + + dsc[i].name = name; + } + + if(seq) + sequential(dsc, argc, delim, len); + else + parallel(dsc, argc, delim, len); + + for(i = 0; i < argc; i++) { + if(dsc[i].fp != stdin) + (void)fclose(dsc[i].fp); + } + + free(delim); + free(dsc); + return 0; +} + +static void +eusage(void) { + eprintf("usage: paste [-s][-d list] file...\n"); +} + +static size_t +unescape(wchar_t *delim) { + wchar_t c; + size_t i; + size_t len; + + for(i = 0, len = 0; (c = delim[i++]) != '\0'; len++) { + if(c == '\\') { + switch(delim[i++]) { + case 'n': + delim[len] = '\n'; + break; + case 't': + delim[len] = '\t'; + break; + case '0': + delim[len] = '\0'; + break; + case '\\': + delim[len] = '\\'; + break; + case '\0': + default: + /* POSIX: unspecified results */ + return len; + } + } else + delim[len] = c; + } + + return len; +} + +static wint_t +in(Fdescr *f) { + wint_t c = fgetwc(f->fp); + + if(c == WEOF && ferror(f->fp)) + eprintf("'%s' read error:", f->name); + + return c; +} + +static void +out(wchar_t c) { + putwchar(c); + if(ferror(stdout)) + eprintf("write error:"); +} + +static void +sequential(Fdescr *dsc, int len, const wchar_t *delim, size_t cnt) { + int i; + + for(i = 0; i < len; i++) { + size_t d = 0; + wint_t c, last = WEOF; + + while((c = in(&dsc[i])) != WEOF) { + if(last == '\n') { + if(delim[d] != '\0') + out(delim[d]); + + d++; + d %= cnt; + } + + if(c != '\n') + out((wchar_t)c); + + last = c; + } + + if(last == '\n') + out((wchar_t)last); + } +} + +static void +parallel(Fdescr *dsc, int len, const wchar_t *delim, size_t cnt) { + int last; + + do { + int i; + + last = 0; + for(i = 0; i < len; i++) { + wint_t c; + wchar_t d = delim[i % cnt]; + + do { + wint_t o = in(&dsc[i]); + + c = o; + switch(c) { + case WEOF: + if(last == 0) + break; + + o = '\n'; + /* fallthrough */ + case '\n': + if(i != len - 1) + o = d; + + break; + default: + break; + } + + if(o != WEOF) { + /* pad with delimiters up to this point */ + while(++last < i) { + if(d != '\0') + out(d); + } + + out((wchar_t)o); + } + } while(c != '\n' && c != WEOF); + } + } while(last > 0); +} -- 1.8.2Received on Mon Apr 29 2013 - 18:54:36 CEST
This archive was generated by hypermail 2.3.0 : Mon Apr 29 2013 - 19:00:06 CEST