[dev][sbase][PATCH] Added POSIX paste(1) command implementation.

From: Lorenzo Cogotti <miciamail_AT_hotmail.it>
Date: Mon, 29 Apr 2013 18:54:36 +0200

This commit adds a simple implementation of the POSIX
standard paste(1) command, and its man page.
TODO and Makefile have been updated accordingly.
---
 Makefile |   1 +
 TODO     |   2 -
 paste.1  | 122 +++++++++++++++++++++++++++++++++
 paste.c  | 231 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 354 insertions(+), 2 deletions(-)
 create mode 100644 paste.1
 create mode 100644 paste.c
diff --git a/Makefile b/Makefile
index c3c36fe..4e1f3e6 100644
--- a/Makefile
+++ b/Makefile
_AT_@ -43,6 +43,7 @@ SRC = \
 	mv.c       \
 	nl.c       \
 	nohup.c    \
+	paste.c    \
 	pwd.c      \
 	rm.c       \
 	sleep.c    \
diff --git a/TODO b/TODO
index 20be4b8..d6b9e0b 100644
--- a/TODO
+++ b/TODO
_AT_@ -28,8 +28,6 @@ md5sum [-c] [file...]
 
 nice [-n N] [command]
 
-paste [-s] [-d list] [file...]
-
 printenv [variable...]
 
 printf [format] [data...]
diff --git a/paste.1 b/paste.1
new file mode 100644
index 0000000..983b41f
--- /dev/null
+++ b/paste.1
_AT_@ -0,0 +1,122 @@
+.TH PASTE 1 paste-VERSION "Apr 2013"
+.SH NAME
+paste \- merge corresponding or subsequent lines of files
+.SH "SYNOPSIS"
+.PP
+.B paste
+[
+.B \-s
+]
+[
+.B \-d
+.I list
+]
+.I file...
+.SH DESCRIPTION
+The
+.B paste
+utility concatenates the corresponding lines of the given input files,
+and writes the resulting lines to standard output. The default operation
+of
+.B paste
+concatenates the corresponding  lines of the input files.
+The newline of every line except the line  from the last input file is
+replaced with a tab.
+If an end-of-file condition is detected on one or more input files, 
+but not all input files,
+.B paste
+behaves as though empty lines were read from the files on which
+end-of-file was detected, unless the
+.B \-s
+option is specified.
+.SH OPTIONS
+.TP
+.B \-d list
+unless a backslash character appears in
+.I list
+each character is an element specifying a delimiter.
+If a backslash character appears, that and one or more characters 
+following it are an element specifying a delimiter.
+These elements specify one or more characters to use, 
+instead of the default tab, to replace the newline of the input 
+lines. The elements in
+.I list
+are used circularly; that is, when the 
+.I list
+is exhausted the first element from the list is reused.
+When the
+.B \-s 
+option is specified, the last newline in a file is not be modified.
+The delimiter is reset to the first element of list after each file
+operand is processed.
+If a backslash character appears in list, it and the character following
+it represents the following delimiters:
+.RS
+.TP
+.I \en
+newline character
+.TP
+.I \et
+tab character
+.TP
+.I \e\e
+backslash character
+.TP
+.I \e0
+empty string (not a null character)
+.TP
+If Any other characters follow the backslash, results are unspecified.
+.RE
+.TP
+.B \-s
+concatenate all of the lines of each separate input file in command line 
+order. The newline of every line except the last line in each input file
+are replaced with the tab, unless otherwise specified by the 
+.B \-d
+option.
+.PP
+If '\-' is specified for one or more input files, the standard input is
+used; standard input is read one line at a time, circularly for each
+instance of '\-'.
+.SH EXIT VALUES
+The
+.B paste
+utility exits 0 on successful completion, and >0 if an error
+occurs.
+.SH ENVIRONMENT VARIABLES
+The following environment variables affect the execution:
+.TP
+.B LANG
+provide a default value for the internationalization variables
+that are unset or null.
+.TP
+.B LC_ALL
+if set to a non-empty string value, override the values of all the
+other internationalization variables.
+.TP
+.B LC_CTYPE
+determine the locale for the interpretation of sequences of bytes
+of text data as characters (for example, single-byte as opposed to
+multi-byte characters in arguments and input files).
+.TP
+.B LC_MESSAGES
+determine the locale that should be used to affect the format and
+contents of diagnostic messages written to standard error.
+.SH CONFORMING TO
+The
+.B paste
+utility is IEEE Std 1003.2 (POSIX.2) compatible.
+.SH EXAMPLES
+.TP
+.I "ls | paste - - - -"
+.PP
+Write out a directory in four columns.
+.TP
+.I "paste -s -d '\et\en' file"
+.PP
+Combine pairs of lines from a file into single lines.
+.SH AUTHOR
+Written by Lorenzo Cogotti.
+.SH SEE ALSO
+.BR cut(1)
+.BR lam(1)
diff --git a/paste.c b/paste.c
new file mode 100644
index 0000000..0d0bcdb
--- /dev/null
+++ b/paste.c
_AT_@ -0,0 +1,231 @@
+/* See LICENSE file for copyright and license details. */
+#include <locale.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+#include "util.h"
+
+typedef struct {
+	FILE *fp;
+	const char *name;
+} Fdescr;
+
+static void eusage(void);
+static size_t unescape(wchar_t *);
+static wint_t in(Fdescr *);
+static void out(wchar_t);
+static void sequential(Fdescr *, int, const wchar_t *, size_t);
+static void parallel(Fdescr *, int, const wchar_t *, size_t);
+
+int
+main(int argc, char **argv) {
+	const char *adelim = NULL;
+	bool seq = false;
+	wchar_t *delim;
+	size_t len;
+	Fdescr *dsc;
+	int i, c;
+	
+	setlocale(LC_CTYPE, "");
+	
+	while((c = getopt(argc, argv, "sd:")) != -1)
+		switch(c) {
+		case 's':
+			seq = true;
+			break;
+		case 'd':
+			adelim = optarg;
+			break;
+		case '?':
+		default:
+			eusage();
+			break;
+		}
+	
+	argc -= optind;
+	argv += optind;
+	if(argc == 0)
+		eusage();
+	
+	/* populate delimeters */
+	if(!adelim)
+		adelim = "\t";
+	
+	len = mbstowcs(NULL, adelim, 0);
+	if(len == (size_t)-1)
+		eprintf("invalid delimiter\n");
+	
+	delim = malloc((len + 1) * sizeof(*delim));
+	if(!delim)
+		eprintf("out of memory\n");
+	
+	mbstowcs(delim, adelim, len);
+	len = unescape(delim);
+	if(len == 0)
+		eprintf("no delimiters specified\n");
+	
+	/* populate file list */
+	dsc = malloc(argc * sizeof(*dsc));
+	if(!dsc)
+		eprintf("out of memory\n");
+	
+	for(i = 0; i < argc; i++) {
+		const char *name = argv[i];
+		
+		if(strcmp(name, "-") == 0)
+			dsc[i].fp = stdin;
+		else
+			dsc[i].fp = fopen(name, "r");
+		
+		if(!dsc[i].fp)
+			eprintf("can't open '%s':", name);
+		
+		dsc[i].name = name;
+	}
+	
+	if(seq)
+		sequential(dsc, argc, delim, len);
+	else
+		parallel(dsc, argc, delim, len);
+	
+	for(i = 0; i < argc; i++) {
+		if(dsc[i].fp != stdin)
+			(void)fclose(dsc[i].fp);
+	}
+	
+	free(delim);
+	free(dsc);
+	return 0;
+}
+
+static void
+eusage(void) {
+	eprintf("usage: paste [-s][-d list] file...\n");
+}
+
+static size_t
+unescape(wchar_t *delim) {
+	wchar_t c;
+	size_t i;
+	size_t len;
+	
+	for(i = 0, len = 0; (c = delim[i++]) != '\0'; len++) {
+		if(c == '\\') {
+			switch(delim[i++]) {
+			case 'n':
+				delim[len] = '\n';
+				break;
+			case 't':
+				delim[len] = '\t';
+				break;
+			case '0':
+				delim[len] = '\0';
+				break;
+			case '\\':
+				delim[len] = '\\';
+				break;
+			case '\0':
+			default:
+				/* POSIX: unspecified results */
+				return len;
+			}
+		} else
+			delim[len] = c;
+	}
+	
+	return len;
+}
+
+static wint_t
+in(Fdescr *f) {
+	wint_t c = fgetwc(f->fp);
+	
+	if(c == WEOF && ferror(f->fp))
+		eprintf("'%s' read error:", f->name);
+	
+	return c;
+}
+
+static void
+out(wchar_t c) {
+	putwchar(c);
+	if(ferror(stdout))
+		eprintf("write error:");
+}
+
+static void
+sequential(Fdescr *dsc, int len, const wchar_t *delim, size_t cnt) {
+	int i;
+	
+	for(i = 0; i < len; i++) {
+		size_t d = 0;
+		wint_t c, last = WEOF;
+		
+		while((c = in(&dsc[i])) != WEOF) {
+			if(last == '\n') {
+				if(delim[d] != '\0')
+					out(delim[d]);
+				
+				d++;
+				d %= cnt;
+			}
+			
+			if(c != '\n')
+				out((wchar_t)c);
+			
+			last = c;
+		}
+		
+		if(last == '\n')
+			out((wchar_t)last);
+	}
+}
+
+static void
+parallel(Fdescr *dsc, int len, const wchar_t *delim, size_t cnt) {
+	int last;
+	
+	do {
+		int i;
+		
+		last = 0;
+		for(i = 0; i < len; i++) {
+			wint_t c;
+			wchar_t d = delim[i % cnt];
+			
+			do {
+				wint_t o = in(&dsc[i]);
+				
+				c = o;
+				switch(c) {
+				case WEOF:
+					if(last == 0)
+						break;
+					
+					o = '\n';
+					/* fallthrough */
+				case '\n':
+					if(i != len - 1)
+						o = d;
+					
+					break;
+				default:
+					break;
+				}
+				
+				if(o != WEOF) {
+					/* pad with delimiters up to this point */
+					while(++last < i) {
+						if(d != '\0')
+							out(d);
+					}
+					
+					out((wchar_t)o);
+				}
+			} while(c != '\n' && c != WEOF);
+		}
+	} while(last > 0);
+}
-- 
1.8.2
Received on Mon Apr 29 2013 - 18:54:36 CEST

This archive was generated by hypermail 2.3.0 : Mon Apr 29 2013 - 19:00:06 CEST