[hackers] [PATCH 1/1] paste: Support -d '\0'

From: Richard Ipsum <richardipsum_AT_vx21.xyz>
Date: Mon, 9 Mar 2020 12:39:16 +0000

POSIX specifies that -d '\0' sets the delimiter to an empty string.
---
 libutf/utf.c          | 12 ++++++++++++
 libutf/utftorunestr.c | 12 ++++++++++++
 libutil/unescape.c    |  3 ++-
 paste.c               | 27 +++++++++++++++------------
 utf.h                 |  4 +++-
 5 files changed, 44 insertions(+), 14 deletions(-)
diff --git a/libutf/utf.c b/libutf/utf.c
index 897c5ef..cf46e57 100644
--- a/libutf/utf.c
+++ b/libutf/utf.c
_AT_@ -62,6 +62,18 @@ utfnlen(const char *s, size_t len)
 	return i;
 }
 
+size_t
+utfmemlen(const char *s, size_t len)
+{
+	const char *p = s;
+	size_t i;
+	Rune r;
+
+	for(i = 0; p - s < len; i++)
+		p += chartorune(&r, p);
+	return i;
+}
+
 char *
 utfrune(const char *s, Rune r)
 {
diff --git a/libutf/utftorunestr.c b/libutf/utftorunestr.c
index 005fe8a..5da9d5f 100644
--- a/libutf/utftorunestr.c
+++ b/libutf/utftorunestr.c
_AT_@ -11,3 +11,15 @@ utftorunestr(const char *str, Rune *r)
 
 	return i;
 }
+
+int
+utfntorunestr(const char *str, size_t len, Rune *r)
+{
+	int i, n;
+	const char *p = str;
+
+	for(i = 0; (n = chartorune(&r[i], p)) && p - str < len; i++)
+		p += n;
+
+	return i;
+}
diff --git a/libutil/unescape.c b/libutil/unescape.c
index d8ed2a2..deca948 100644
--- a/libutil/unescape.c
+++ b/libutil/unescape.c
_AT_@ -21,7 +21,8 @@ unescape(char *s)
 		['n'] = '\n',
 		['r'] = '\r',
 		['t'] = '\t',
-		['v'] = '\v'
+		['v'] = '\v',
+		['0'] = '\0'
 	};
 	size_t m, q;
 	char *r, *w;
diff --git a/paste.c b/paste.c
index b0ac761..051ca72 100644
--- a/paste.c
+++ b/paste.c
_AT_@ -53,7 +53,8 @@ nextline:
 
 		for (; efgetrune(&c, dsc[i].fp, dsc[i].name) ;) {
 			for (m = last + 1; m < i; m++)
-				efputrune(&(delim[m % delimlen]), stdout, "<stdout>");
+				if (delim[m % delimlen] != '\0')
+					efputrune(&(delim[m % delimlen]), stdout, "<stdout>");
 			last = i;
 			if (c == '\n') {
 				if (i != fdescrlen - 1)
_AT_@ -68,7 +69,8 @@ nextline:
 			if (i == fdescrlen - 1)
 				putchar('\n');
 			else
-				efputrune(&d, stdout, "<stdout>");
+				if (d != '\0')
+					efputrune(&d, stdout, "<stdout>");
 			last++;
 		}
 	}
_AT_@ -86,18 +88,18 @@ int
 main(int argc, char *argv[])
 {
 	struct fdescr *dsc;
-	Rune *delim;
-	size_t delimlen, i;
+	Rune *delim_rune = NULL;
+	size_t delim_runelen, i, delim_bytelen = 1;
 	int seq = 0, ret = 0;
-	char *adelim = "\t";
+	char *delim = "\t";
 
 	ARGBEGIN {
 	case 's':
 		seq = 1;
 		break;
 	case 'd':
-		adelim = EARGF(usage());
-		unescape(adelim);
+		delim = EARGF(usage());
+		delim_bytelen = unescape(delim);
 		break;
 	default:
 		usage();
_AT_@ -107,10 +109,11 @@ main(int argc, char *argv[])
 		usage();
 
 	/* populate delimiters */
-	/* TODO: fix libutf to accept sizes */
-	delim = ereallocarray(NULL, utflen(adelim) + 1, sizeof(*delim));
-	if (!(delimlen = utftorunestr(adelim, delim)))
+	delim_rune = ereallocarray(NULL,
+		utfmemlen(delim, delim_bytelen) + 1, sizeof(*delim_rune));
+	if (!(delim_runelen = utfntorunestr(delim, delim_bytelen, delim_rune))) {
 		usage();
+	}
 
 	/* populate file list */
 	dsc = ereallocarray(NULL, argc, sizeof(*dsc));
_AT_@ -126,9 +129,9 @@ main(int argc, char *argv[])
 	}
 
 	if (seq) {
-		sequential(dsc, argc, delim, delimlen);
+		sequential(dsc, argc, delim_rune, delim_runelen);
 	} else {
-		parallel(dsc, argc, delim, delimlen);
+		parallel(dsc, argc, delim_rune, delim_runelen);
 	}
 
 	for (i = 0; i < argc; i++)
diff --git a/utf.h b/utf.h
index 23a9887..fa04f6b 100644
--- a/utf.h
+++ b/utf.h
_AT_@ -38,6 +38,7 @@ int fullrune(const char *, size_t);
 char *utfecpy(char *, char *, const char *);
 size_t utflen(const char *);
 size_t utfnlen(const char *, size_t);
+size_t utfmemlen(const char *, size_t);
 char *utfrune(const char *, Rune);
 char *utfrrune(const char *, Rune);
 char *utfutf(const char *, const char *);
_AT_@ -59,7 +60,8 @@ int isxdigitrune(Rune);
 Rune tolowerrune(Rune);
 Rune toupperrune(Rune);
 
-int utftorunestr(const char*, Rune *);
+int utftorunestr(const char *, Rune *);
+int utfntorunestr(const char *, size_t, Rune *);
 
 int fgetrune(Rune *, FILE *);
 int efgetrune(Rune *, FILE *, const char *);
-- 
2.25.1
Received on Mon Mar 09 2020 - 13:39:16 CET

This archive was generated by hypermail 2.3.0 : Mon Mar 09 2020 - 13:48:37 CET