[hackers] [sbase] [PATCH v4 1/4] concat: Use plain read/write instead of buffered stdio

From: Michael Forney <mforney_AT_mforney.org>
Date: Mon, 3 Jul 2017 14:58:49 -0700

If we are just copying data from one file to another, we don't need to
fill a complete buffer, just read a chunk at a time, and write it to the
output.
---
 cat.c            |  39 +++++------
 libutil/concat.c |  24 ++++---
 libutil/cp.c     |  49 ++++++--------
 sponge.c         |  31 +++++----
 tail.c           | 199 +++++++++++++++++++++++++++++++++----------------------
 text.h           |   1 -
 util.h           |   1 +
 xinstall.c       |  25 ++++---
 8 files changed, 200 insertions(+), 169 deletions(-)
diff --git a/cat.c b/cat.c
index e3741aa..211e8d1 100644
--- a/cat.c
+++ b/cat.c
_AT_@ -1,22 +1,11 @@
 /* See LICENSE file for copyright and license details. */
-#include <stdio.h>
+#include <fcntl.h>
 #include <string.h>
 #include <unistd.h>
 
-#include "text.h"
 #include "util.h"
 
 static void
-uconcat(FILE *fp1, const char *s1, FILE *fp2, const char *s2)
-{
-	int c;
-
-	setbuf(fp2, NULL);
-	while ((c = getc(fp1)) != EOF)
-		putc(c, fp2);
-}
-
-static void
 usage(void)
 {
 	eprintf("usage: %s [-u] [file ...]\n", argv0);
_AT_@ -25,37 +14,39 @@ usage(void)
 int
 main(int argc, char *argv[])
 {
-	FILE *fp;
-	int ret = 0;
-	void (*cat)(FILE *, const char *, FILE *, const char *) = &concat;
+	int fd, ret = 0;
 
 	ARGBEGIN {
 	case 'u':
-		cat = &uconcat;
 		break;
 	default:
 		usage();
 	} ARGEND
 
 	if (!argc) {
-		cat(stdin, "<stdin>", stdout, "<stdout>");
+		if (concat(0, "<stdin>", 1, "<stdout>") < 0)
+			ret = 1;
 	} else {
 		for (; *argv; argc--, argv++) {
 			if (!strcmp(*argv, "-")) {
 				*argv = "<stdin>";
-				fp = stdin;
-			} else if (!(fp = fopen(*argv, "r"))) {
-				weprintf("fopen %s:", *argv);
+				fd = 0;
+			} else if ((fd = open(*argv, O_RDONLY)) < 0) {
+				weprintf("open %s:", *argv);
 				ret = 1;
 				continue;
 			}
-			cat(fp, *argv, stdout, "<stdout>");
-			if (fp != stdin && fshut(fp, *argv))
+			switch (concat(fd, *argv, 1, "<stdout>")) {
+			case -1:
 				ret = 1;
+				break;
+			case -2:
+				return 1;  /* exit on write error */
+			}
+			if (fd != 0)
+				close(fd);
 		}
 	}
 
-	ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
-
 	return ret;
 }
diff --git a/libutil/concat.c b/libutil/concat.c
index fad9471..2e9aa52 100644
--- a/libutil/concat.c
+++ b/libutil/concat.c
_AT_@ -1,19 +1,23 @@
 /* See LICENSE file for copyright and license details. */
-#include <stdio.h>
+#include <unistd.h>
 
-#include "../text.h"
 #include "../util.h"
 
-void
-concat(FILE *fp1, const char *s1, FILE *fp2, const char *s2)
+int
+concat(int f1, const char *s1, int f2, const char *s2)
 {
 	char buf[BUFSIZ];
-	size_t n;
+	ssize_t n;
 
-	while ((n = fread(buf, 1, sizeof(buf), fp1))) {
-		fwrite(buf, 1, n, fp2);
-
-		if (feof(fp1) || ferror(fp1) || ferror(fp2))
-			break;
+	while ((n = read(f1, buf, sizeof(buf))) > 0) {
+		if (writeall(f2, buf, n) < 0) {
+			weprintf("write %s:", s2);
+			return -2;
+		}
+	}
+	if (n < 0) {
+		weprintf("read %s:", s1);
+		return -1;
 	}
+	return 0;
 }
diff --git a/libutil/cp.c b/libutil/cp.c
index a8db0a2..208a073 100644
--- a/libutil/cp.c
+++ b/libutil/cp.c
_AT_@ -12,7 +12,6 @@
 #include <utime.h>
 
 #include "../fs.h"
-#include "../text.h"
 #include "../util.h"
 
 int cp_aflag  = 0;
_AT_@ -27,7 +26,7 @@ int
 cp(const char *s1, const char *s2, int depth)
 {
 	DIR *dp;
-	FILE *f1, *f2;
+	int f1, f2;
 	struct dirent *d;
 	struct stat st;
 	struct timespec times[2];
_AT_@ -113,46 +112,38 @@ cp(const char *s1, const char *s2, int depth)
 			return 0;
 		}
 	} else {
-		if (!(f1 = fopen(s1, "r"))) {
-			weprintf("fopen %s:", s1);
+		if ((f1 = open(s1, O_RDONLY)) < 0) {
+			weprintf("open %s:", s1);
 			cp_status = 1;
 			return 0;
 		}
-		if (!(f2 = fopen(s2, "w"))) {
-			if (cp_fflag) {
-				if (unlink(s2) < 0 && errno != ENOENT) {
-					weprintf("unlink %s:", s2);
-					cp_status = 1;
-					fclose(f1);
-					return 0;
-				} else if (!(f2 = fopen(s2, "w"))) {
-					weprintf("fopen %s:", s2);
-					cp_status = 1;
-					fclose(f1);
-					return 0;
-				}
-			} else {
-				weprintf("fopen %s:", s2);
+		if ((f2 = creat(s2, st.st_mode)) < 0 && cp_fflag) {
+			if (unlink(s2) < 0 && errno != ENOENT) {
+				weprintf("unlink %s:", s2);
 				cp_status = 1;
-				fclose(f1);
+				close(f1);
 				return 0;
 			}
+			f2 = creat(s2, st.st_mode);
 		}
-		concat(f1, s1, f2, s2);
-
-		/* preserve permissions by default */
-		fchmod(fileno(f2), st.st_mode);
-
-		if (fclose(f2) == EOF) {
-			weprintf("fclose %s:", s2);
+		if (f2 < 0) {
+			weprintf("creat %s:", s2);
 			cp_status = 1;
+			close(f1);
 			return 0;
 		}
-		if (fclose(f1) == EOF) {
-			weprintf("fclose %s:", s1);
+		if (concat(f1, s1, f2, s2) < 0) {
 			cp_status = 1;
+			close(f1);
+			close(f2);
 			return 0;
 		}
+
+		/* preserve permissions by default */
+		fchmod(f2, st.st_mode);
+
+		close(f1);
+		close(f2);
 	}
 
 	if (cp_aflag || cp_pflag) {
diff --git a/sponge.c b/sponge.c
index baeac7f..da8b28c 100644
--- a/sponge.c
+++ b/sponge.c
_AT_@ -1,7 +1,8 @@
 /* See LICENSE file for copyright and license details. */
-#include <stdio.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
 
-#include "text.h"
 #include "util.h"
 
 static void
_AT_@ -13,24 +14,26 @@ usage(void)
 int
 main(int argc, char *argv[])
 {
-	FILE *fp, *tmpfp;
-	int ret = 0;
+	char tmp[] = "/tmp/sponge-XXXXXX";
+	int fd, tmpfd;
 
 	argv0 = argv[0], argc--, argv++;
 
 	if (argc != 1)
 		usage();
 
-	if (!(tmpfp = tmpfile()))
-		eprintf("tmpfile:");
-	concat(stdin, "<stdin>", tmpfp, "<tmpfile>");
-	rewind(tmpfp);
+	if ((tmpfd = mkstemp(tmp)) < 0)
+		eprintf("mkstemp:");
+	unlink(tmp);
+	if (concat(0, "<stdin>", tmpfd, "<tmpfile>") < 0)
+		return 1;
+	if (lseek(tmpfd, 0, SEEK_SET) < 0)
+		eprintf("lseek:");
 
-	if (!(fp = fopen(argv[0], "w")))
-		eprintf("fopen %s:", argv[0]);
-	concat(tmpfp, "<tmpfile>", fp, argv[0]);
+	if ((fd = creat(argv[0], 0666)) < 0)
+		eprintf("creat %s:", argv[0]);
+	if (concat(tmpfd, "<tmpfile>", fd, argv[0]) < 0)
+		return 1;
 
-	ret |= fshut(fp, argv[0]) | fshut(tmpfp, "<tmpfile>");
-
-	return ret;
+	return 0;
 }
diff --git a/tail.c b/tail.c
index 711707f..1ab9d18 100644
--- a/tail.c
+++ b/tail.c
_AT_@ -1,80 +1,125 @@
 /* See LICENSE file for copyright and license details. */
 #include <sys/stat.h>
 
+#include <fcntl.h>
+#include <unistd.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
-#include "text.h"
 #include "utf.h"
 #include "util.h"
 
 static char mode = 'n';
 
-static void
-dropinit(FILE *fp, const char *str, size_t n)
+static int
+dropinit(int fd, const char *fname, size_t count)
 {
 	Rune r;
-	char *buf = NULL;
-	size_t size = 0, i = 1;
-	ssize_t len;
+	char buf[BUFSIZ], *p;
+	ssize_t n;
+	int nr;
+
+	if (count < 2)
+		goto copy;
+	count--;  /* numbering starts at 1 */
+	while (count && (n = read(fd, buf, sizeof(buf))) > 0) {
+		if (mode == 'n') {
+			for (p = buf; count && n > 0; p++, n--) {
+				if (*p == '\n')
+					count--;
+			}
+		} else {
+			for (p = buf; count && n > 0; p += nr, n -= nr, count--) {
+				nr = charntorune(&r, p, n);
+				if (!nr) {
+					/* we don't have a full rune, move
+					 * remaining data to beginning and read
+					 * again */
+					memmove(buf, p, n);
+					break;
+				}
+			}
+		}
+	}
+	if (count) {
+		if (n < 0)
+			weprintf("read %s:", fname);
+		if (n <= 0)
+			return n;
+	}
 
-	if (mode == 'n') {
-		while (i < n && (len = getline(&buf, &size, fp)) > 0)
-			if (len > 0 && buf[len - 1] == '\n')
-				i++;
-	} else {
-		while (i < n && efgetrune(&r, fp, str))
-			i++;
+	/* write the rest of the buffer */
+	if (writeall(1, p, n) < 0)
+		eprintf("write:");
+copy:
+	switch (concat(fd, fname, 1, "<stdout>")) {
+	case -1:  /* read error */
+		return -1;
+	case -2:  /* write error */
+		exit(1);
+	default:
+		return 0;
 	}
-	free(buf);
-	concat(fp, str, stdout, "<stdout>");
 }
 
-static void
-taketail(FILE *fp, const char *str, size_t n)
+static int
+taketail(int fd, const char *fname, size_t count)
 {
-	Rune *r = NULL;
-	struct line *ring = NULL;
-	size_t i, j, *size = NULL;
-	ssize_t len;
-	int seenln = 0;
-
-	if (!n)
-		return;
-
-	if (mode == 'n') {
-		ring = ecalloc(n, sizeof(*ring));
-		size = ecalloc(n, sizeof(*size));
-
-		for (i = j = 0; (len = getline(&ring[i].data,
-		     &size[i], fp)) > 0; seenln = 1) {
-			ring[i].len = len;
-			i = j = (i + 1) % n;
+	static char *buf = NULL;
+	static size_t size = 0;
+	char *p;
+	size_t len = 0, left;
+	ssize_t n;
+
+	if (!count)
+		return 0;
+	for (;;) {
+		if (len + BUFSIZ > size) {
+			/* make sure we have at least BUFSIZ to read */
+			size += 2 * BUFSIZ;
+			buf = erealloc(buf, size);
 		}
-	} else {
-		r = ecalloc(n, sizeof(*r));
-
-		for (i = j = 0; efgetrune(&r[i], fp, str); )
-			i = j = (i + 1) % n;
-	}
-	if (ferror(fp))
-		eprintf("%s: read error:", str);
-
-	do {
-		if (seenln && ring && ring[j].data) {
-			fwrite(ring[j].data, 1, ring[j].len, stdout);
-			free(ring[j].data);
-		} else if (r) {
-			efputrune(&r[j], stdout, "<stdout>");
+		n = read(fd, buf + len, size - len);
+		if (n < 0) {
+			weprintf("read %s:", fname);
+			return -1;
 		}
-	} while ((j = (j + 1) % n) != i);
-
-	free(ring);
-	free(size);
-	free(r);
+		if (n == 0)
+			break;
+		len += n;
+		if (mode == 'n') {
+			/* ignore the last character; if it is a newline, it
+			 * ends the last line */
+			for (p = buf + len - 2, left = count; p >= buf; p--) {
+				if (*p != '\n')
+					continue;
+				left--;
+				if (!left) {
+					p++;
+					break;
+				}
+			}
+		} else {
+			for (p = buf + len - 1, left = count; p >= buf; p--) {
+				/* skip utf-8 continuation bytes */
+				if ((*p & 0xc0) == 0x80)
+					continue;
+				left--;
+				if (!left)
+					break;
+			}
+		}
+		if (p > buf) {
+			len -= p - buf;
+			memmove(buf, p, len);
+		}
+	}
+	if (writeall(1, buf, len) < 0)
+		eprintf("write:");
+	return 0;
 }
 
 static void
_AT_@ -87,11 +132,11 @@ int
 main(int argc, char *argv[])
 {
 	struct stat st1, st2;
-	FILE *fp;
-	size_t tmpsize, n = 10;
+	int fd;
+	size_t n = 10;
 	int fflag = 0, ret = 0, newline = 0, many = 0;
-	char *numstr, *tmp;
-	void (*tail)(FILE *, const char *, size_t) = taketail;
+	char *numstr;
+	int (*tail)(int, const char *, size_t) = taketail;
 
 	ARGBEGIN {
 	case 'f':
_AT_@ -113,17 +158,18 @@ main(int argc, char *argv[])
 		usage();
 	} ARGEND
 
-	if (!argc)
-		tail(stdin, "<stdin>", n);
-	else {
+	if (!argc) {
+		if (tail(0, "<stdin>", n) < 0)
+			ret = 1;
+	} else {
 		if ((many = argc > 1) && fflag)
 			usage();
 		for (newline = 0; *argv; argc--, argv++) {
 			if (!strcmp(*argv, "-")) {
 				*argv = "<stdin>";
-				fp = stdin;
-			} else if (!(fp = fopen(*argv, "r"))) {
-				weprintf("fopen %s:", *argv);
+				fd = 0;
+			} else if ((fd = open(*argv, O_RDONLY)) < 0) {
+				weprintf("open %s:", *argv);
 				ret = 1;
 				continue;
 			}
_AT_@ -134,27 +180,26 @@ main(int argc, char *argv[])
 			if (!(S_ISFIFO(st1.st_mode) || S_ISREG(st1.st_mode)))
 				fflag = 0;
 			newline = 1;
-			tail(fp, *argv, n);
+			if (tail(fd, *argv, n) < 0) {
+				ret = 1;
+				fflag = 0;
+			}
 
 			if (!fflag) {
-				if (fp != stdin && fshut(fp, *argv))
-					ret = 1;
+				if (fd != 0)
+					close(fd);
 				continue;
 			}
-			for (tmp = NULL, tmpsize = 0;;) {
-				while (getline(&tmp, &tmpsize, fp) > 0) {
-					fputs(tmp, stdout);
-					fflush(stdout);
-				}
-				if (ferror(fp))
-					eprintf("readline %s:", *argv);
-				clearerr(fp);
+			for (;;) {
+				if (concat(fd, *argv, 1, "<stdout>") < 0)
+					exit(1);
 				/* ignore error in case file was removed, we continue
 				 * tracking the existing open file descriptor */
 				if (!stat(*argv, &st2)) {
 					if (st2.st_size < st1.st_size) {
 						fprintf(stderr, "%s: file truncated\n", *argv);
-						rewind(fp);
+						if (lseek(fd, SEEK_SET, 0) < 0)
+							eprintf("lseek:");
 					}
 					st1 = st2;
 				}
_AT_@ -163,7 +208,5 @@ main(int argc, char *argv[])
 		}
 	}
 
-	ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
-
 	return ret;
 }
diff --git a/text.h b/text.h
index bceda52..9858592 100644
--- a/text.h
+++ b/text.h
_AT_@ -13,5 +13,4 @@ struct linebuf {
 #define EMPTY_LINEBUF {NULL, 0, 0,}
 void getlines(FILE *, struct linebuf *);
 
-void concat(FILE *, const char *, FILE *, const char *);
 int linecmp(struct line *, struct line *);
diff --git a/util.h b/util.h
index b39d918..1f3cd0c 100644
--- a/util.h
+++ b/util.h
_AT_@ -64,6 +64,7 @@ int eregcomp(regex_t *, const char *, int);
 
 /* io */
 ssize_t writeall(int, const void *, size_t);
+int concat(int, const char *, int, const char *);
 
 /* misc */
 void enmasse(int, char **, int (*)(const char *, const char *, int));
diff --git a/xinstall.c b/xinstall.c
index d0069be..93ce842 100644
--- a/xinstall.c
+++ b/xinstall.c
_AT_@ -2,6 +2,7 @@
 #include <grp.h>
 #include <pwd.h>
 #include <errno.h>
+#include <fcntl.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <string.h>
_AT_@ -10,7 +11,6 @@
 #include <sys/wait.h>
 
 #include "util.h"
-#include "text.h"
 
 static int Dflag = 0;
 static gid_t group;
_AT_@ -44,7 +44,7 @@ static int
 install(const char *s1, const char *s2, int depth)
 {
 	DIR *dp;
-	FILE *f1, *f2;
+	int f1, f2;
 	struct dirent *d;
 	struct stat st;
 	ssize_t r;
_AT_@ -92,23 +92,22 @@ install(const char *s1, const char *s2, int depth)
 		else if (mknod(s2, (st.st_mode & ~07777) | mode, st.st_rdev) < 0)
 			eprintf("mknod %s:", s2);
 	} else {
-		if (!(f1 = fopen(s1, "r")))
-			eprintf("fopen %s:", s1);
-		if (!(f2 = fopen(s2, "w"))) {
+		if ((f1 = open(s1, O_RDONLY)) < 0)
+			eprintf("open %s:", s1);
+		if ((f2 = creat(s2, 0600)) < 0) {
 			if (unlink(s2) < 0 && errno != ENOENT)
 				eprintf("unlink %s:", s2);
-			else if (!(f2 = fopen(s2, "w")))
-				eprintf("fopen %s:", s2);
+			if ((f2 = creat(s2, 0600)) < 0)
+				eprintf("creat %s:", s2);
 		}
-		concat(f1, s1, f2, s2);
+		if (concat(f1, s1, f2, s2) < 0)
+			exit(1);
 
-		if (fchmod(fileno(f2), mode) < 0)
+		if (fchmod(f2, mode) < 0)
 			eprintf("fchmod %s:", s2);
 
-		if (fclose(f2) == EOF)
-			eprintf("fclose %s:", s2);
-		if (fclose(f1) == EOF)
-			eprintf("fclose %s:", s1);
+		close(f1);
+		close(f2);
 	}
 
 	if (lchown(s2, owner, group) < 0)
-- 
2.13.2
Received on Mon Jul 03 2017 - 23:58:49 CEST

This archive was generated by hypermail 2.3.0 : Tue Jul 04 2017 - 00:13:01 CEST