[hackers] [PATCH][sbase] tar: use bigger buffer size to increase performance

From: Silvan Jegen <s.jegen_AT_gmail.com>
Date: Sat, 9 Sep 2017 11:08:42 +0200

From: Jim Beveridge <jimbe_AT_chromium.org>

The original code is by Jim Beveridge working on Fuchsia. I merged it
with slight changes.

Time to tar two 1GB files:

Before patch:

real 0m6.428s
user 0m0.245s
sys 0m4.881s

real 0m6.454s
user 0m0.239s
sys 0m4.883s

real 0m6.515s
user 0m0.259s
sys 0m4.839s

After patch:

real 0m4.755s
user 0m0.026s
sys 0m1.598s

real 0m4.788s
user 0m0.063s
sys 0m1.578s

real 0m4.822s
user 0m0.007s
sys 0m1.662s

A similar speedup can be observed for untaring.

In addition to the buffer size increase we change the code to only create
directories for non-compliant tar files and we check for st to be NULL
in the recursive copy function.
---
 tar.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 55 insertions(+), 17 deletions(-)
diff --git a/tar.c b/tar.c
index 53a737c..8cd1abe 100644
--- a/tar.c
+++ b/tar.c
_AT_@ -16,6 +16,8 @@
 #include "util.h"
 
 #define BLKSIZ 512
+// COPY_CHUNK_SIZE must be a power of 2
+#define COPY_CHUNK_SIZE 8192
 
 enum Type {
 	REG       = '0',
_AT_@ -236,10 +238,13 @@ archive(const char *path)
 	ewrite(tarfd, b, BLKSIZ);
 
 	if (fd != -1) {
-		while ((l = eread(fd, b, BLKSIZ)) > 0) {
-			if (l < BLKSIZ)
-				memset(b + l, 0, BLKSIZ - l);
-			ewrite(tarfd, b, BLKSIZ);
+		char chunk[COPY_CHUNK_SIZE];
+		while ((l = eread(fd, chunk, COPY_CHUNK_SIZE)) > 0) {
+			// Ceiling to BLKSIZ boundary
+			int ceilsize = (l + (BLKSIZ-1)) & ~(BLKSIZ-1);
+			if (l < ceilsize)
+				memset(chunk + l, 0, ceilsize - l);
+			ewrite(tarfd, chunk, ceilsize);
 		}
 		close(fd);
 	}
_AT_@ -250,7 +255,7 @@ archive(const char *path)
 static int
 unarchive(char *fname, ssize_t l, char b[BLKSIZ])
 {
-	char lname[101], *tmp, *p;
+	char lname[101], *p;
 	long mode, major, minor, type, mtime, uid, gid;
 	struct header *h = (struct header *)b;
 	int fd = -1;
_AT_@ -261,9 +266,13 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ])
 	if (remove(fname) < 0 && errno != ENOENT)
 		weprintf("remove %s:", fname);
 
-	tmp = estrdup(fname);
-	mkdirp(dirname(tmp), 0777, 0777);
-	free(tmp);
+	// tar files normally create the directory chain. This is a fallback
+	// for noncompliant tar files.
+	if (h->type != DIRECTORY) {
+		char* tmp = estrdup(fname);
+		mkdirp(dirname(tmp), 0777, 0777);
+		free(tmp);
+	}
 
 	switch (h->type) {
 	case REG:
_AT_@ -319,9 +328,25 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ])
 		eprintf("strtol %s: invalid number\n", h->gid);
 
 	if (fd != -1) {
-		for (; l > 0; l -= BLKSIZ)
-			if (eread(tarfd, b, BLKSIZ) > 0)
-				ewrite(fd, b, MIN(l, BLKSIZ));
+		// Ceiling to BLKSIZ boundary
+		int readsize = (l + (BLKSIZ-1)) & ~(BLKSIZ-1);
+		char chunk[COPY_CHUNK_SIZE];
+		int lastread = 0;
+
+		for (; readsize > 0; l -= lastread, readsize -= lastread) {
+			int chunk_size = MIN(readsize, COPY_CHUNK_SIZE);
+			// Short reads are legal, so don't expect to read
+			// everything that was requested.
+			lastread = eread(tarfd, chunk, chunk_size);
+			if (lastread == 0) {
+				close(fd);
+				remove(fname);
+				eprintf("unexpected end of file reading %s.\n",
+					fname);
+			}
+
+			ewrite(fd, chunk, MIN(l, lastread));
+		}
 		close(fd);
 	}
 
_AT_@ -331,7 +356,7 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ])
 	times[0].tv_sec = times[1].tv_sec = mtime;
 	times[0].tv_nsec = times[1].tv_nsec = 0;
 	if (!mflag && utimensat(AT_FDCWD, fname, times, AT_SYMLINK_NOFOLLOW) < 0)
-		weprintf("utimensat %s:\n", fname);
+		weprintf("utimensat %s %d:\n", fname, errno);
 	if (h->type == SYMLINK) {
 		if (!getuid() && lchown(fname, uid, gid))
 			weprintf("lchown %s:\n", fname);
_AT_@ -349,10 +374,23 @@ static void
 skipblk(ssize_t l)
 {
 	char b[BLKSIZ];
-
-	for (; l > 0; l -= BLKSIZ)
-		if (!eread(tarfd, b, BLKSIZ))
-			break;
+	int lastread = 0;
+	// Ceiling to BLKSIZ boundary
+	int ceilsize = (l + (BLKSIZ-1)) & ~(BLKSIZ-1);
+
+	off_t offset = lseek(tarfd, ceilsize, SEEK_CUR);
+	if (offset >= ceilsize)
+		return;
+	if (errno != ESPIPE)
+		eprintf("unexpected end of file.\n");
+
+	// This is a pipe, socket or FIFO. Fall back to a sequential read.
+	for (; ceilsize > 0; ceilsize -= lastread) {
+		int chunk_size = MIN(ceilsize, BLKSIZ);
+		lastread = eread(tarfd, b, chunk_size);
+		if (lastread == 0)
+			eprintf("unexpected end of file %d.\n", errno);
+        }
 }
 
 static int
_AT_@ -370,7 +408,7 @@ c(const char *path, struct stat *st, void *data, struct recursor *r)
 	if (vflag)
 		puts(path);
 
-	if (S_ISDIR(st->st_mode))
+	if (st && S_ISDIR(st->st_mode))
 		recurse(path, NULL, r);
 }
 
-- 
2.7.4
Received on Sat Sep 09 2017 - 11:08:42 CEST

This archive was generated by hypermail 2.3.0 : Sat Sep 09 2017 - 11:12:24 CEST