[hackers] [sbase] Improved tr || Adria Garriga from git_AT_suckless.org on 2014-07-16 (hackers mail list archive)

From: <git_AT_suckless.org>
Date: Wed, 16 Jul 2014 21:41:23 +0200

commit b3a63a60e4c23daf63d155c22d29cbe3f1399b6f
Author: Adria Garriga <rhaps0dy_AT_installgentoo.com>
Date: Tue Jul 15 00:49:42 2014 +0200

    Improved tr

    - Added support for character ranges ( a-z )
    - Added support for complementary charset ( -c ), only in delete mode
    - Added support for octal escape sequences
    - Unicode now only works when there are no octal escape sequences,
      otherwise behavior is not predictable at first sight.
    - tr now supports null characters in the input
    - Does not yet have support for character classes ( [:upper:] )

diff --git a/tr.1 b/tr.1
index 81141d9..5c299c0 100644
--- a/tr.1
+++ b/tr.1
_AT_@ -3,7 +3,7 @@
tr \- translate characters
.SH SYNOPSIS
.B tr
-.RB [ \-d ]
+.RB [ \-d ] [ \-c ]
.RB set1
.P
.B tr
_AT_@ -13,6 +13,9 @@ tr \- translate characters
.TP
.B \-d
For compatibility. If given, characters in set1 will be deleted from the input and specifying set2 will result in an error.
+.B \-c
+Complementary, causes the specified character set to be inverted, this is all the characters not specified belong to it.
+It only works in conjunction with \-d, because order doesn't make much sense with translation.
.SH DESCRIPTION
.B tr
reads input from stdin replacing every character in
_AT_@ -50,9 +53,15 @@ If set1 is longer than set2
.B tr
will map all the remaining characters to the last one in set2. In case set2 is longer than set1, the remaining characters from set2 will be ignored.
.B
+Character escape sequences, be them characters or octal numbers, are done preceding the token with a "\". You may specify three digits or less for it,
+digits will stop being read when a non-octal character or when three characters are read.
+.B
+Use "A-B" for ordered sets fom A to B.
+.B
.SH NOTES
.B tr
-is Unicode-aware but does not yet handle character classes (e.g. [:alnum:] or [:digit:]).
+is Unicode-aware, but only if you don't specify characters in octal (for example \012), because else it is not predictable. Does not support character
+classes.
.SH SEE ALSO
.IR sed(1)
.IR awk(1)
diff --git a/tr.c b/tr.c
index 388d42f..f902672 100644
--- a/tr.c
+++ b/tr.c
_AT_@ -3,7 +3,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/mman.h>
#include <locale.h>
#include <wchar.h>
#include "text.h"
_AT_@ -12,135 +11,316 @@
static void
usage(void)
{
- eprintf("usage: %s [-d] set1 [set2]
", argv0);
+ eprintf("usage: %s [-d] [-c] set1 [set2]
", argv0);
+}
+
+static int dflag, cflag;
+static wchar_t mappings[0x110000];
+
+struct wset_state {
+ char *s; /* current character */
+ wchar_t rfirst, rlast; /* first and last in range */
+ wchar_t prev; /* previous returned character */
+ int prev_was_range; /* was the previous character part of a c-c range? */
+};
+
+struct set_state {
+ char *s, rfirst, rlast, prev;
+ int prev_was_octal; /* was the previous returned character written in octal? */
+};
+
+static void
+set_state_defaults(struct set_state *s)
+{
+ s->rfirst = 1;
+ s->rlast = 0;
+ s->prev_was_octal = 1;
}

static void
-handleescapes(char *s)
+wset_state_defaults(struct wset_state *s)
{
+ s->rfirst = 1;
+ s->rlast = 0;
+ s->prev_was_range = 1;
+}
+
+/* sets *s to the char that was intended to be written.
+ * returns how many bytes the s pointer has to advance to skip the
+ * escape sequence if it was an octal, always zero otherwise. */
+static int
+resolve_escape(char *s)
+{
+ int i;
+ unsigned char c;
+
         switch(*s) {
         case 'n':
                 *s = '
';
- break;
+ return 0;
         case 't':
                 *s = ' ';
- break;
- case '\':
- *s = '\';
- break;
+ return 0;
         case 'r':
                 *s = ' ';
- break;
+ return 0;
         case 'f':
                 *s = '';
- break;
+ return 0;
         case 'a':
                 *s = '';
- break;
+ return 0;
         case 'b':
                 *s = '';
- break;
+ return 0;
         case 'v':
                 *s = '';
- break;
+ return 0;
+ case '\':
+ *s = '\';
+ return 0;
+ case '
Received on Wed Jul 16 2014 - 21:41:23 CEST

This archive was generated by hypermail 2.3.0 : Wed Jul 16 2014 - 21:48:08 CEST