[dev] [sbase][RFC] Add a simplistic version of tr

From: Silvan Jegen <s.jegen_AT_gmail.com>
Date: Tue, 26 Nov 2013 12:01:01 -0800 (PST)

Hi

This is a braindead and incomplete implementation of tr that only
works for one-byte encodings. Do you think it makes sense to use this
implementation as some kind of stopgap-measure until we have a more
robust version of tr?

If you you would rather not take this version, what approach would
you take for the character set mapping when using UTF-8? A hashmap-,
or B-tree-based solution or something else entirely?


Cheers,

Silvan

-- >8 --- (use git am -c)
The taken approach works only for one-byte encodings and is rather slow.
---
 Makefile |   1 +
 tr.c     | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 118 insertions(+)
 create mode 100644 tr.c
diff --git a/Makefile b/Makefile
index 2a72a1c..b78ad2c 100644
--- a/Makefile
+++ b/Makefile
_AT_@ -81,6 +81,7 @@ SRC = \
 	tee.c      \
 	test.c     \
 	touch.c    \
+	tr.c       \
 	true.c     \
 	tty.c      \
 	uname.c    \
diff --git a/tr.c b/tr.c
new file mode 100644
index 0000000..0053a52
--- /dev/null
+++ b/tr.c
_AT_@ -0,0 +1,117 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "text.h"
+#include "util.h"
+
+static void
+usage(void)
+{
+	eprintf("usage: tr set1 [set2]\n");
+}
+
+void
+handle_escapes(char *s)
+{
+    switch(*s) {
+	case 'n':
+		*s = '\x0A';
+		break;
+	case 't':
+		*s = '\x09';
+		break;
+	case '\\':
+		*s = '\x5c';
+		break;
+    }
+}
+
+void
+parse_mapping(char *set1, char *set2, char *mappings)
+{
+	char *s;
+	size_t n = 0;
+	size_t lset2;
+
+	if(set2) {
+	    lset2 = strnlen(set2, 255);
+	} else {
+	    set2 = (char*) &set1[0];
+	    lset2 = 0;
+	}
+
+	for(s = set1; *s; s++) {
+	    if(*s == '\\') {
+		handle_escapes(++s);
+		++n;
+	    }
+
+	    mappings[(int) *s] = set2[n];
+	    if(n < (lset2 - 1))
+		n++;
+	}
+}
+
+void
+map_to_null(const char *mappings, char *in)
+{
+	const char *s;
+
+	for(s = in; *s; s++) {
+	    if(!mappings[(int) *s])
+		putchar((int) *s);
+	}
+}
+
+void
+map_to_set(const char *mappings, char *in)
+{
+	const char *s;
+
+	for(s = in; *s; s++) {
+	    if(!mappings[(int) *s]) {
+		putchar((int) *s);
+	    } else {
+		putchar((int) mappings[(int) *s]);
+	    }
+	}
+}
+
+int
+main(int argc, char *argv[])
+{
+	char mappings[255];
+	char *buf = NULL;
+	size_t size = 0;
+	void (*mapfunc) (const char*, char*);
+
+	memset(mappings, 0, 255);
+
+	ARGBEGIN {
+	default:
+		usage();
+	} ARGEND;
+
+	if(!argc)
+	    usage();
+
+	if(argc >= 2) {
+	    parse_mapping(argv[0], argv[1], mappings);
+	    mapfunc = map_to_set;
+	} else {
+	    parse_mapping(argv[0], NULL, mappings);
+	    mapfunc = map_to_null;
+	}
+
+	while(afgets(&buf, &size, stdin))
+		mapfunc(mappings, buf);
+
+	if (ferror(stdin)) {
+	    eprintf("<stdin>: read error:");
+	    return EXIT_FAILURE;
+	    }
+
+	return EXIT_SUCCESS;
+}
-- 
1.8.4.2
Received on Tue Nov 26 2013 - 21:01:01 CET

This archive was generated by hypermail 2.3.0 : Tue Nov 26 2013 - 21:12:21 CET