Re: [hackers] [PATCH v3][sbase] libutil/unescape.c: simplify and add \E

From: Mattias Andrée <maandree_AT_kth.se>
Date: Tue, 7 Feb 2017 01:03:44 +0100

On Mon, 06 Feb 2017 15:50:04 -0800
evan.gates_AT_gmail.com (Evan Gates) wrote:

> Mattias Andrée <maandree_AT_kth.se> wrote:
>
> > On Mon, 06 Feb 2017 15:05:32 -0800
> > evan.gates_AT_gmail.com (Evan Gates) wrote:
> >
> > > Mattias Andrée <maandree_AT_kth.se> wrote:
> > >
> > > > + } else if (escapes[*r & 255]) {
> > > > + *w++ = escapes[*r++ &
> > > > 255];
> > >
> > > Why do you & 255 here? I think a cast to unsigned char
> > > would accomplish what you are trying to do and be more
> > > correct (as char can default to either signed or
> > > unsigned). Although I may misunderstand what is going
> > > on here.
> >
> > Yes. I used &255 because it's does clutter as much.
>
> OK. I'm going to change to a cast to be on the safe side
> due to the "implementation-defined and undefined aspects"
> mentioned in C99 6.5p4:
>
> Some operators (the unary operator ~, and the binary
> operators <<, >>, &, ^, and |, collectively described as
> bitwise operators) are required to have operands that
> have integer type. These operators yield values that
> depend on the internal representations of integers, and
> have implementation-defined and undefined aspects for
> signed types.

Sure.

>
> > > I think this is clearer, even though it adds a
> > > conditional:
> > >
> > > q = q * 16 + isdigit(*r) ? *r - '0' : tolower(*r) -
> > > 'a' + 10;
> >
> > I think
> >
> > if (isdigit(*r))
> > q = q * 16 + (*r - '0');
> > else
> > q = q * 16 + (tolower(*r) - 'a') + 10;
> >
> > is clearer, or at least brackets around the ternary.
>
> You're right, the line was getting a bit long and
> convoluted with the ternary, bad habit of mine.
>
> Another thought just came to mind, isoctal is a reserved
> name. 7.26p1:
>
> The following names are grouped under individual headers
> for convenience. All external names described below are
> reserved no matter what headers are included by the
> program.
>
> 7.26.2p1:
>
> Function names that begin with either is or to, and a
> lowercase letter may be added to the declarations in the
> <ctype.h> header.
>
> I don't know if it's worth being anal about that,
> especially as we have already limited ourselves to C99 do
> we need to worry about future directions? For now I've
> changed isoctal() to is_odigit() but I'm open to
> discussion.

Sure, I didn't think about that, however, I think we can
be pretty confident that if isoctal is added, it will do
that same thing as that macro, is will not make a difference.
However, I think isodigit probably what such a function
will be called since there already is isxdigit, so why not
change it to is_odigit or ODIGIT.

>
> Updated patch for consideration:
>
> ----- 8< ----- 8< ----- 8< -----
> From 30fd43d7f3b8716054eb9867c835aadc423f652c Mon Sep 17
> 00:00:00 2001 From: =?UTF-8?q?Mattias=20Andr=C3=A9e?=
> <maandree_AT_kth.se> Date: Sun, 5 Feb 2017 00:44:35 +0100
> Subject: [PATCH] libutil/unescape.c: simplify and add \E
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Signed-off-by: Mattias Andrée <maandree_AT_kth.se>
> ---
> libutil/unescape.c | 101
> ++++++++++++++++++++++------------------------------- 1
> file changed, 42 insertions(+), 59 deletions(-)
>
> diff --git a/libutil/unescape.c b/libutil/unescape.c
> index d1503e6..7523db3 100644
> --- a/libutil/unescape.c
> +++ b/libutil/unescape.c
> _AT_@ -1,74 +1,57 @@
> /* See LICENSE file for copyright and license details. */
> +#include <ctype.h>
> #include <string.h>
>
> #include "../util.h"
>
> +#define is_odigit(c) ('0' <= c && c <= '7')
> +
> size_t
> unescape(char *s)
> {
> - size_t len, i, off, m, factor, q;
> -
> - len = strlen(s);
> + static const char escapes[256] = {
> + ['"'] = '"',
> + ['\''] = '\'',
> + ['\\'] = '\\',
> + ['a'] = '\a',
> + ['b'] = '\b',
> + ['E'] = 033,
> + ['e'] = 033,
> + ['f'] = '\f',
> + ['n'] = '\n',
> + ['r'] = '\r',
> + ['t'] = '\t',
> + ['v'] = '\v'
> + };
> + size_t m, q;
> + char *r, *w;
>
> - for (i = 0; i < len; i++) {
> - if (s[i] != '\\')
> + for (r = w = s; *r;) {
> + if (*r != '\\') {
> + *w++ = *r++;
> continue;
> - off = 0;
> -
> - switch (s[i + 1]) {
> - case '\\': s[i] = '\\'; off++; break;
> - case '\'': s[i] = '\'', off++; break;
> - case '"': s[i] = '"', off++; break;
> - case 'a': s[i] = '\a'; off++; break;
> - case 'b': s[i] = '\b'; off++; break;
> - case 'e': s[i] = 033; off++; break;
> - case 'f': s[i] = '\f'; off++; break;
> - case 'n': s[i] = '\n'; off++; break;
> - case 'r': s[i] = '\r'; off++; break;
> - case 't': s[i] = '\t'; off++; break;
> - case 'v': s[i] = '\v'; off++; break;
> - case 'x':
> - /* "\xH[H]" hexadecimal escape */
> - for (m = i + 2; m < i + 1 + 3 &&
> m < len; m++)
> - if ((s[m] < '0' && s[m]
> > '9') &&
> - (s[m] < 'A' && s[m]
> > 'F') &&
> - (s[m] < 'a' && s[m]
> > 'f'))
> - break;
> - if (m == i + 2)
> - eprintf("invalid escape
> sequence '\\%c'\n", s[i + 1]);
> - off += m - i - 1;
> - for (--m, q = 0, factor = 1; m >
> i + 1; m--) {
> - if (s[m] >= '0' && s[m]
> <= '9')
> - q += (s[m] -
> '0') * factor;
> - else if (s[m] >= 'A' &&
> s[m] <= 'F')
> - q += ((s[m] -
> 'A') + 10) * factor;
> - else if (s[m] >= 'a' &&
> s[m] <= 'f')
> - q += ((s[m] -
> 'a') + 10) * factor;
> - factor *= 16;
> - }
> - s[i] = q;
> - break;
> - case '\0':
> + }
> + r++;
> + if (!*r) {
> eprintf("null escape
> sequence\n");
> - default:
> - /* "\O[OOO]" octal escape */
> - for (m = i + 1; m < i + 1 + 4 &&
> m < len; m++)
> - if (s[m] < '0' || s[m] >
> '7')
> - break;
> - if (m == i + 1)
> - eprintf("invalid escape
> sequence '\\%c'\n", s[i + 1]);
> - off += m - i - 1;
> - for (--m, q = 0, factor = 1; m >
> i; m--) {
> - q += (s[m] - '0') *
> factor;
> - factor *= 8;
> - }
> - s[i] = (q > 255) ? 255 : q;
> + } else if (escapes[(unsigned char)*r]) {
> + *w++ = escapes[(unsigned
> char)*r++];
> + } else if (is_odigit(*r)) {
> + for (q = 0, m = 4; m &&
> is_odigit(*r); m--, r++)
> + q = q * 8 + (*r - '0');
> + *w++ = MIN(q, 255);
> + } else if (*r == 'x' && isxdigit(r[1])) {
> + r++;
> + for (q = 0, m = 2; m &&
> isxdigit(*r); m--, r++)
> + if (isdigit(*r))
> + q = q * 16 + (*r
> - '0');
> + else
> + q = q * 16 +
> (tolower(*r) - 'a' + 10);
> + *w++ = q;
> + } else {
> + eprintf("invalid escape sequence
> '\\%c'\n", *r); }
> -
> - for (m = i + 1; m <= len - off; m++)
> - s[m] = s[m + off];
> - len -= off;
> }
>
> - return len;
> + return w - s;
> }


Received on Tue Feb 07 2017 - 01:03:44 CET

This archive was generated by hypermail 2.3.0 : Tue Feb 07 2017 - 01:12:20 CET