[hackers] [sbase] [PATCH 11/11] tail: Process bytes with -c option, and add -m option for runes

From: Michael Forney <mforney_AT_mforney.org>
Date: Tue, 6 Dec 2016 02:17:03 -0800

POSIX says that -c specifies a number of bytes, not characters. This
flag is commonly used by scripts that operate on binary files to do
things like extract a header. Treating the offsets as character offsets
will break things in mysterious ways.

Instead, add a -m option (chosen to match `wc -m`, which also operates
on characters) to handle character offsets.
---
I'm tempted to just delete the character functionality instead of
introducing a new non-standard option. I can see the use of tail with
codepoints, but we definitely need to make -c work on bytes so that we
don't break scripts.
I'm also open to changing the option flag to something else. I just
chose -m because that's what wc uses for characters.
 tail.1 |  6 +++---
 tail.c | 29 ++++++++++++++++++++++++-----
 2 files changed, 27 insertions(+), 8 deletions(-)
diff --git a/tail.1 b/tail.1
index 433404d..281560d 100644
--- a/tail.1
+++ b/tail.1
_AT_@ -7,7 +7,7 @@
 .Sh SYNOPSIS
 .Nm
 .Op Fl f
-.Op Fl c Ar num | Fl n Ar num | Fl Ns Ar num
+.Op Fl c Ar num | Fl m Ar num | Fl n Ar num | Fl Ns Ar num
 .Op Ar file ...
 .Sh DESCRIPTION
 .Nm
_AT_@ -20,10 +20,10 @@ is given,
 reads from stdin.
 .Sh OPTIONS
 .Bl -tag -width Ds
-.It Fl c Ar num | Fl n Ar num | Fl Ns Ar num
+.It Fl c Ar num | Fl m Ar num | Fl n Ar num | Fl Ns Ar num
 Display final
 .Ar num
-characters | lines |
+bytes | characters | lines |
 lines. If
 .Ar num
 begins with '+'
diff --git a/tail.c b/tail.c
index ce65a01..ce3be9d 100644
--- a/tail.c
+++ b/tail.c
_AT_@ -26,12 +26,23 @@ dropinit(int fd, const char *fname, size_t count)
 		goto copy;
 	count--;  /* numbering starts at 1 */
 	while (count && (n = read(fd, buf, sizeof(buf))) > 0) {
-		if (mode == 'n') {
+		switch (mode) {
+		case 'n':  /* lines */
 			for (p = buf; count && n > 0; p++, n--) {
 				if (*p == '\n')
 					count--;
 			}
-		} else {
+			break;
+		case 'c':  /* bytes */
+			if (count > n) {
+				count -= n;
+			} else {
+				p = buf + count;
+				n -= count;
+				count = 0;
+			}
+			break;
+		case 'm':  /* runes */
 			for (p = buf; count && n > 0; p += nr, n -= nr, count--) {
 				nr = charntorune(&r, p, n);
 				if (!nr) {
_AT_@ -42,6 +53,7 @@ dropinit(int fd, const char *fname, size_t count)
 					break;
 				}
 			}
+			break;
 		}
 	}
 	if (count) {
_AT_@ -90,7 +102,8 @@ taketail(int fd, const char *fname, size_t count)
 		if (n == 0)
 			break;
 		len += n;
-		if (mode == 'n') {
+		switch (mode) {
+		case 'n':  /* lines */
 			/* ignore the last character; if it is a newline, it
 			 * ends the last line */
 			for (p = buf + len - 2, left = count; p >= buf; p--) {
_AT_@ -102,7 +115,11 @@ taketail(int fd, const char *fname, size_t count)
 					break;
 				}
 			}
-		} else {
+			break;
+		case 'c':  /* bytes */
+			p = count < len ? buf + len - count : buf;
+			break;
+		case 'm':  /* runes */
 			for (p = buf + len - 1, left = count; p >= buf; p--) {
 				/* skip utf-8 continuation bytes */
 				if ((*p & 0xc0) == 0x80)
_AT_@ -111,6 +128,7 @@ taketail(int fd, const char *fname, size_t count)
 				if (!left)
 					break;
 			}
+			break;
 		}
 		if (p > buf) {
 			len -= p - buf;
_AT_@ -125,7 +143,7 @@ taketail(int fd, const char *fname, size_t count)
 static void
 usage(void)
 {
-	eprintf("usage: %s [-f] [-c num | -n num | -num] [file ...]\n", argv0);
+	eprintf("usage: %s [-f] [-c num | -m num | -n num | -num] [file ...]\n", argv0);
 }
 
 int
_AT_@ -143,6 +161,7 @@ main(int argc, char *argv[])
 		fflag = 1;
 		break;
 	case 'c':
+	case 'm':
 	case 'n':
 		mode = ARGC();
 		numstr = EARGF(usage());
-- 
2.10.2
Received on Tue Dec 06 2016 - 11:17:03 CET

This archive was generated by hypermail 2.3.0 : Tue Dec 06 2016 - 11:24:58 CET