---
libutil/unescape.c | 40 +++++++++++++++++++++++++++++++++++-----
printf.1 | 11 ++++++-----
2 files changed, 41 insertions(+), 10 deletions(-)
diff --git a/libutil/unescape.c b/libutil/unescape.c
index bed2c61..5845dd4 100644
--- a/libutil/unescape.c
+++ b/libutil/unescape.c
_AT_@ -7,6 +7,32 @@
#define isoctal(c) ('0' <= c && c <= '7')
size_t
+utf8encode(size_t cp, char *out)
+{
+ char head = 0, headmask = (char)0x80, buf[7] = {0, 0, 0, 0, 0, 0, 0};
+ size_t n = 0;
+
+ if (cp < 0x80)
+ return *out = (char)cp, 1;
+ if (cp > 0x7fffffffUL)
+ eprintf("invalid code point %X\n", cp);
+ while (cp) {
+ buf[6 - n] |= (char)0x80;
+ buf[6 - ++n] |= cp & 0x3f;
+ cp >>= 6;
+ head |= headmask;
+ headmask >>= 1;
+ }
+ if (buf[6 - n] & (head | headmask)) {
+ buf[6 - n] |= (char)0x80;
+ n++, head |= headmask;
+ }
+ buf[6 - n] |= head;
+ memcpy(out, buf + 6 - n, n);
+ return n;
+}
+
+size_t
unescape(char *s)
{
static const char escapes[256] = {
_AT_@ -23,8 +49,9 @@ unescape(char *s)
['t'] = '\t',
['v'] = '\v'
};
+ static const char hexlen[256] = {['x'] = 2, ['u'] = 4, ['U'] = 8};
size_t m, q;
- char *r, *w;
+ char *r, *w, hex;
for (r = w = s; *r;) {
if (*r != '\\') {
_AT_@ -40,11 +67,14 @@ unescape(char *s)
for (q = 0, m = 4; m && isoctal(*r); m--, r++)
q = q * 8 + (*r & 7);
*w++ = q > 255 ? 255 : q;
- } else if (*r == 'x' && isxdigit(r[1])) {
- r++;
- for (q = 0, m = 2; m && isxdigit(*r); m--, r++)
+ } else if (hexlen[*r & 255] && isxdigit(r[1])) {
+ m = hexlen[(hex = *r++) & 255];
+ for (q = 0; m && isxdigit(*r); m--, r++)
q = q * 16 + (*r & 15) + 9 * !!isalpha(*r);
- *w++ = q;
+ if (hex == 'x')
+ *w++ = q;
+ else
+ w += utf8encode(q, w);
} else {
eprintf("invalid escape sequence '\\%c'\n", *r);
}
diff --git a/printf.1 b/printf.1
index 78ffb1e..00fa850 100644
--- a/printf.1
+++ b/printf.1
_AT_@ -1,4 +1,4 @@
-.Dd 2015-10-08
+.Dd 2017-02-04
.Dt PRINTF 1
.Os sbase
.Sh NAME
_AT_@ -17,9 +17,9 @@ using each
until drained.
.Pp
.Nm
-interprets the standard escape sequences \e\e, \e', \e", \ea, \eb, \ee,
-\ef, \en, \er, \et, \ev, \exH[H], \eO[OOO], the sequence \ec, which
-terminates further output if it's found inside
+interprets the standard escape sequences \e\e, \e', \e", \ea, \eb,
+\ef, \en, \er, \et, \ev, \exH[H], \eO[OOO], the sequences, \ee, \eE, \euH[HHH],
+\eUH[HHHHHHH], and \ec, which terminates further output if it's found inside
.Ar format
or a %b format string, the format specification %b for an unescaped string and all C
.Xr printf 3
_AT_@ -31,4 +31,5 @@ utility is compliant with the
.St -p1003.1-2013
specification.
.Pp
-The possibility of specifying 4-digit octals is an extension to that specification.
+The escape sequences \ee, \eE, \euH[HHH], \eUH[HHHHHHH], \exH[H] and possibility of
+specifying 4-digit octals is an extension to that specification.
--
2.11.0
Received on Sun Feb 05 2017 - 01:00:54 CET
This archive was generated by hypermail 2.3.0 : Sun Feb 05 2017 - 01:12:19 CET