--- Makefile | 1 + tr.1 | 50 ++++++++++++++++++++ tr.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 207 insertions(+) create mode 100644 tr.1 create mode 100644 tr.c diff --git a/Makefile b/Makefile index 81dfaf6..ee84221 100644 --- a/Makefile +++ b/Makefile _AT_@ -81,6 +81,7 @@ SRC = \ tee.c \ test.c \ touch.c \ + tr.c \ true.c \ tty.c \ uname.c \ diff --git a/tr.1 b/tr.1 new file mode 100644 index 0000000..7a81c0c --- /dev/null +++ b/tr.1 _AT_@ -0,0 +1,50 @@ +.TH TR 1 sbase\-VERSION +.SH NAME +tr \- translate characters +.SH SYNOPSIS +.B tr +.RB set1 +.RI [ set2 ] +.SH DESCRIPTION +.B tr +reads input from stdin replacing every character in +.B set1 +with the character at the same index in +.B set2. +If set2 is not given +.B tr +deletes the characters in set1 from the input. + +Sets are specified as strings of characters. Almost all represent themselves. The following ones will be interpreted: +.TP +\e\e +backslash +.TP +\ea +audible BEL +.TP +\ef +form feed +.TP +\en +new line +.TP +\er +return +.TP +\et +horizontal tab +.TP +\ev +vertical tab +.PP +If set1 is longer than set2 +.B tr +will map all the remaining characters to the last one in set2. In case set2 is longer than set1, the remaining characters from set2 will be ignored. +.B +.SH NOTES +.B tr +is Unicode-aware but does not yet handle character classes (e.g. [:alnum:] or [:digit:]). +.SH SEE ALSO +.IR sed(1) +.IR awk(1) diff --git a/tr.c b/tr.c new file mode 100644 index 0000000..4fdc28f --- /dev/null +++ b/tr.c _AT_@ -0,0 +1,156 @@ +/* See LICENSE file for copyright and license details. */ +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <locale.h> +#include <wchar.h> +#include "text.h" +#include "util.h" + +static void +usage(void) +{ + eprintf("usage: %s set1 [set2]\n", argv0); +} + +void +handleescapes(char *s) +{ + switch(*s) { + case 'n': + *s = '\n'; + break; + case 't': + *s = '\t'; + break; + case '\\': + *s = '\\'; + break; + case 'r': + *s = '\r'; + break; + case 'f': + *s = '\f'; + break; + case 'a': + *s = '\a'; + break; + case 'b': + *s = '\b'; + break; + case 'v': + *s = '\v'; + break; + } +} + +void +parsemapping(const char *set1, const char *set2, wchar_t *mappings) +{ + char *s; + wchar_t runeleft; + wchar_t runeright; + int leftbytes; + int rightbytes; + size_t n = 0; + size_t lset2; + + if(set2) { + lset2 = strnlen(set2, 255 * sizeof(wchar_t)); + } else { + set2 = &set1[0]; + lset2 = 0; + } + + s = (char *) set1; + while(*s) { + if(*s == '\\') { + handleescapes(++s); + } + + leftbytes = mbtowc(&runeleft, s, 4); + if(set2[n] != '\0') + rightbytes = mbtowc(&runeright, set2 + n, 4); + mappings[runeleft] = runeright; + + s += leftbytes; + if(n < lset2) + n += rightbytes; + } +} + +void +maptonull(const wchar_t *mappings, char *in) +{ + const char *s; + wchar_t runeleft; + int leftbytes = 0; + + s = in; + while(*s) { + leftbytes = mbtowc(&runeleft, s, 4); + if(!mappings[runeleft]) + putwchar(runeleft); + s += leftbytes; + } +} + +void +maptoset(const wchar_t *mappings, char *in) +{ + const char *s; + wchar_t runeleft; + int leftbytes = 0; + + s = in; + while(*s) { + leftbytes = mbtowc(&runeleft, s, 4); + if(!mappings[runeleft]) { + putwchar(runeleft); + } else { + putwchar(mappings[runeleft]); + } + s += leftbytes; + } +} + +int +main(int argc, char *argv[]) +{ + wchar_t *mappings; + char *buf = NULL; + size_t size = 0; + void (*mapfunc) (const wchar_t*, char*); + + setlocale(LC_ALL, ""); + + mappings = (wchar_t *) mmap(NULL, 0x110000 * sizeof(wchar_t), + PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + + ARGBEGIN { + default: + usage(); + } ARGEND; + + if(argc == 0) + usage(); + + if(argc >= 2) { + parsemapping(argv[0], argv[1], mappings); + mapfunc = maptoset; + } else { + parsemapping(argv[0], NULL, mappings); + mapfunc = maptonull; + } + + while(afgets(&buf, &size, stdin)) + mapfunc(mappings, buf); + free(buf); + + if(ferror(stdin)) + eprintf("<stdin>: read error:"); + + return EXIT_SUCCESS; +} -- 1.8.5.3Received on Sat Jan 18 2014 - 16:29:10 CET
This archive was generated by hypermail 2.3.0 : Sat Jan 18 2014 - 16:36:14 CET