[hackers] [sbase][PATCH] Rework l command for the Unicode world.

From: Wolfgang Corcoran-Mathe <wcm_AT_sigwinch.xyz>
Date: Thu, 11 May 2017 22:42:39 -0400

Rather than printing byte sequences for any non-ASCII characters,
printable (via isprintrune()) Unicode characters are displayed
normally. The usual \$, \t, \b and \\ escapes are displayed, but
other non-printing characters are replaced with a Unicode escape
(\uXXXX).

This may be controversial, as it contradicts POSIX. Rationale:

* Replacing printing non-ASCII runes with byte sequences
is pointless. There is no reason to escape multibyte
characters.

* UTF-8 sequences should not be printed. It is far more
useful to decode the sequence and print the Unicode code
point. '\u2028' is much easier to understand than
'\xe2\x80\xa8'--we are not forced to decode the
transformation format.
---
 ed.c | 72 +++++++++++++++++++++++++++++++++++++-------------------------------
 1 file changed, 39 insertions(+), 33 deletions(-)
diff --git a/ed.c b/ed.c
index 4b28848..e737d57 100644
--- a/ed.c
+++ b/ed.c
_AT_@ -13,6 +13,7 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "utf.h"
 #include "util.h"
 
 #define REGEXSIZE  100
_AT_@ -653,48 +654,53 @@ doread(const char *fname)
 }
 
 static void
+lprint(char *s)
+{
+	int size;
+	Rune r;
+
+	while ((size = chartorune(&r, s)) > 0 && r != '\n') {
+		switch (r) {
+		case '$':
+			fputs("\\$", stdout);
+			break;
+		case '\t':
+			fputs("\\t", stdout);
+			break;
+		case '\b':
+			fputs("\\b", stdout);
+			break;
+		case '\\':
+			fputs("\\\\", stdout);
+			break;
+		default:
+			if (!isprintrune(r))
+				printf("\\u%04x", 0xFFFF & r);
+			else
+				fputrune(&r, stdout);
+		}
+		s += size;
+	}
+}
+
+static void
 doprint(void)
 {
-	int i, c;
-	char *s, *str;
+	int i;
+	char *s;
 
 	if (line1 <= 0 || line2 > lastln)
 		error("incorrect address");
 	for (i = line1; i <= line2; ++i) {
 		if (pflag == 'n')
 			printf("%d\t", i);
-		for (s = gettxt(i); (c = *s) != '\n'; ++s) {
-			if (pflag != 'l')
-				goto print_char;
-			switch (c) {
-			case '$':
-				str = "\\$";
-				goto print_str;
-			case '\t':
-				str = "\\t";
-				goto print_str;
-			case '\b':
-				str = "\\b";
-				goto print_str;
-			case '\\':
-				str = "\\\\";
-				goto print_str;
-			default:
-				if (!isprint(c)) {
-					printf("\\x%x", 0xFF & c);
-					break;
-				}
-			print_char:
-				putchar(c);
-				break;
-			print_str:
-				fputs(str, stdout);
-				break;
-			}
+		s = gettxt(i);
+		if (pflag == 'l') {
+			lprint(s);
+			fputs("$\n", stdout);
+		} else {
+			fputs(s, stdout);
 		}
-		if (pflag == 'l')
-			fputs("$", stdout);
-		putc('\n', stdout);
 	}
 	curln = i - 1;
 }
-- 
2.9.0
Received on Fri May 12 2017 - 04:42:39 CEST

This archive was generated by hypermail 2.3.0 : Fri May 12 2017 - 04:48:17 CEST