[hackers] [PATCH sbase] libutil/recurse: Split into two functions

From: Michael Forney <mforney_AT_mforney.org>
Date: Tue, 23 Jun 2020 02:30:51 -0700

recurse() is peculiar function since it is used for two purposes:

1. To bootstrap the recursion process.
2. To perform recursion when a directory is encountered.

In the first case, we stat() the directory, record it in the history
list, then depending on the value of DIRFIRST, we call the function
and then recurse or vice-versa. The function then calls recurse()
again on the same directory, but since we have already added it to
the history list, this call gets pruned.

In the second case, we stat() the directory a second time (we already
did this when traversing the parent directory's entries), then call
the function on each entry.

This approach leads to a few problems:

- We stat() each directory in the hierarchy twice, once at the
  beginning of recurse(), and once for each contained directory.
- We need a DIRFIRST flag to specify whether the function recurses
  at the start or end, even though just running the function would
  do things in the correct order.
- History is only checked when we are about to recurse on a directory,
  *after* we have already operated on it. So while we do prevent
  infinite loops, we double-count the directory.

The last problem can be demonstrated easily with du(1):

$ mkdir D && ln -s . D/E
$ du -L D
8 D/E
16 D
$

The correct result is:

$ du -L D
8 D
$

To solve these problems, split recurse() into two functions:
recurse(), which initializes the recursor struct then calls the
function on the given path, and recursedir(), which reads the
directory entries, calling the function on each one.
---
 chgrp.c           |   2 +-
 chmod.c           |   4 +-
 chown.c           |   2 +-
 du.c              |   2 +-
 fs.h              |   5 +-
 libutil/recurse.c | 141 ++++++++++++++++++++++++----------------------
 libutil/rm.c      |   2 +-
 tar.c             |   4 +-
 8 files changed, 85 insertions(+), 77 deletions(-)
diff --git a/chgrp.c b/chgrp.c
index 4042a0d..6be7e1d 100644
--- a/chgrp.c
+++ b/chgrp.c
_AT_@ -24,7 +24,7 @@ chgrp(int dirfd, const char *name, struct stat *st, void *data, struct recursor
 		weprintf("chown %s:", r->path);
 		ret = 1;
 	} else if (S_ISDIR(st->st_mode)) {
-		recurse(dirfd, name, NULL, r);
+		recursedir(dirfd, name, NULL, r);
 	}
 }
 
diff --git a/chmod.c b/chmod.c
index c79488b..58f995b 100644
--- a/chmod.c
+++ b/chmod.c
_AT_@ -19,7 +19,7 @@ chmodr(int dirfd, const char *name, struct stat *st, void *data, struct recursor
 		weprintf("chmod %s:", r->path);
 		ret = 1;
 	} else if (S_ISDIR(st->st_mode)) {
-		recurse(dirfd, name, NULL, r);
+		recursedir(dirfd, name, NULL, r);
 	}
 }
 
_AT_@ -32,7 +32,7 @@ usage(void)
 int
 main(int argc, char *argv[])
 {
-	struct recursor r = { .fn = chmodr, .maxdepth = 1, .follow = 'H', .flags = DIRFIRST };
+	struct recursor r = { .fn = chmodr, .maxdepth = 1, .follow = 'H' };
 	size_t i;
 
 	argv0 = *argv, argv0 ? (argc--, argv++) : (void *)0;
diff --git a/chown.c b/chown.c
index 71628eb..27dd3a7 100644
--- a/chown.c
+++ b/chown.c
_AT_@ -28,7 +28,7 @@ chownpwgr(int dirfd, const char *name, struct stat *st, void *data, struct recur
 		weprintf("chown %s:", r->path);
 		ret = 1;
 	} else if (S_ISDIR(st->st_mode)) {
-		recurse(dirfd, name, NULL, r);
+		recursedir(dirfd, name, NULL, r);
 	}
 }
 
diff --git a/du.c b/du.c
index 1815a02..6388159 100644
--- a/du.c
+++ b/du.c
_AT_@ -42,7 +42,7 @@ du(int dirfd, const char *path, struct stat *st, void *data, struct recursor *r)
 
 	subtotal = nblks(st->st_blocks);
 	if (S_ISDIR(st->st_mode))
-		recurse(dirfd, path, &subtotal, r);
+		recursedir(dirfd, path, &subtotal, r);
 	*total += subtotal;
 
 	if (!r->depth)
diff --git a/fs.h b/fs.h
index 00ecd3b..9ee3c5d 100644
--- a/fs.h
+++ b/fs.h
_AT_@ -18,12 +18,12 @@ struct recursor {
 	int maxdepth;
 	int follow;
 	int flags;
+	dev_t dev;
 };
 
 enum {
 	SAMEDEV  = 1 << 0,
-	DIRFIRST = 1 << 1,
-	SILENT   = 1 << 2,
+	SILENT   = 1 << 1,
 };
 
 extern int cp_aflag;
_AT_@ -41,6 +41,7 @@ extern int rm_status;
 extern int recurse_status;
 
 void recurse(int, const char *, void *, struct recursor *);
+void recursedir(int, const char *, void *, struct recursor *);
 
 int cp(const char *, const char *, int);
 void rm(int, const char *, struct stat *st, void *, struct recursor *);
diff --git a/libutil/recurse.c b/libutil/recurse.c
index e66efaf..0403e16 100644
--- a/libutil/recurse.c
+++ b/libutil/recurse.c
_AT_@ -15,22 +15,37 @@
 
 int recurse_status = 0;
 
+/* returns 0 if we have seen this directory, 1 otherwise */
+static int
+histadd(struct recursor *r, struct stat *st)
+{
+	struct history *h;
+
+	if (S_ISDIR(st->st_mode)) {
+		for (h = r->hist; h; h = h->prev) {
+			if (h->ino == st->st_ino && h->dev == st->st_dev)
+				return 0;
+		}
+		h = emalloc(sizeof(struct history));
+		h->prev = r->hist;
+		r->hist = h;
+		h->dev  = st->st_dev;
+		h->ino  = st->st_ino;
+	}
+	return 1;
+}
+
 void
 recurse(int dirfd, const char *name, void *data, struct recursor *r)
 {
-	struct dirent *d;
-	struct history *new, *h;
-	struct stat st, dst;
-	DIR *dp;
-	int flags = 0, fd;
-	size_t pathlen = r->pathlen;
-
-	if (dirfd == AT_FDCWD)
-		pathlen = estrlcpy(r->path, name, sizeof(r->path));
+	struct history *h;
+	struct stat st;
+	int flags = 0;
 
-	if (r->follow == 'P' || (r->follow == 'H' && r->depth))
+	if (r->follow == 'P')
 		flags |= AT_SYMLINK_NOFOLLOW;
-
+	if (!r->pathlen)
+		r->pathlen = estrlcpy(r->path, name, sizeof(r->path));
 	if (fstatat(dirfd, name, &st, flags) < 0) {
 		if (!(r->flags & SILENT)) {
 			weprintf("stat %s:", r->path);
_AT_@ -38,71 +53,63 @@ recurse(int dirfd, const char *name, void *data, struct recursor *r)
 		}
 		return;
 	}
-	if (!S_ISDIR(st.st_mode)) {
-		r->fn(dirfd, name, &st, data, r);
-		return;
-	}
+	r->dev = st.st_dev;
+	histadd(r, &st);
+	r->fn(dirfd, name, &st, data, r);
 
-	new = emalloc(sizeof(struct history));
-	new->prev  = r->hist;
-	r->hist    = new;
-	new->dev   = st.st_dev;
-	new->ino   = st.st_ino;
+	while (r->hist) {
+		h = r->hist;
+		r->hist = r->hist->prev;
+		free(h);
+	}
+}
 
-	for (h = new->prev; h; h = h->prev)
-		if (h->ino == st.st_ino && h->dev == st.st_dev)
-			return;
+void
+recursedir(int dirfd, const char *name, void *data, struct recursor *r)
+{
+	struct dirent *d;
+	struct stat st;
+	DIR *dp;
+	int flags = 0, fd;
+	size_t pathlen = r->pathlen;
 
-	if (!r->depth && (r->flags & DIRFIRST))
-		r->fn(dirfd, name, &st, data, r);
+	if (r->maxdepth && r->depth + 1 >= r->maxdepth)
+		return;
 
-	if (!r->maxdepth || r->depth + 1 < r->maxdepth) {
-		fd = openat(dirfd, name, O_RDONLY | O_CLOEXEC | O_DIRECTORY);
-		if (fd < 0) {
-			weprintf("open %s:", r->path);
+	fd = openat(dirfd, name, O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+	if (fd < 0) {
+		weprintf("open %s:", r->path);
+		recurse_status = 1;
+	}
+	if (!(dp = fdopendir(fd))) {
+		if (!(r->flags & SILENT)) {
+			weprintf("fdopendir:");
 			recurse_status = 1;
 		}
-		if (!(dp = fdopendir(fd))) {
+		return;
+	}
+	if (r->path[pathlen - 1] != '/')
+		r->path[pathlen++] = '/';
+	if (r->follow == 'P' || r->follow == 'H')
+		flags |= AT_SYMLINK_NOFOLLOW;
+	while ((d = readdir(dp))) {
+		if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
+			continue;
+		r->pathlen = pathlen + estrlcpy(r->path + pathlen, d->d_name, sizeof(r->path) - pathlen);
+		if (fstatat(fd, d->d_name, &st, flags) < 0) {
 			if (!(r->flags & SILENT)) {
-				weprintf("fdopendir:");
+				weprintf("stat %s:", r->path);
 				recurse_status = 1;
 			}
-			return;
-		}
-		if (r->path[pathlen - 1] != '/')
-			r->path[pathlen++] = '/';
-		if (r->follow == 'H')
-			flags |= AT_SYMLINK_NOFOLLOW;
-		while ((d = readdir(dp))) {
-			if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
-				continue;
-			r->pathlen = pathlen + estrlcpy(r->path + pathlen, d->d_name, sizeof(r->path) - pathlen);
-			if (fstatat(fd, d->d_name, &dst, flags) < 0) {
-				if (!(r->flags & SILENT)) {
-					weprintf("stat %s:", r->path);
-					recurse_status = 1;
-				}
-			} else if ((r->flags & SAMEDEV) && dst.st_dev != st.st_dev) {
-				continue;
-			} else {
-				r->depth++;
-				r->fn(fd, d->d_name, &dst, data, r);
-				r->depth--;
-			}
-		}
-		r->path[pathlen - 1] = '\0';
-		r->pathlen = pathlen - 1;
-		closedir(dp);
-	}
-
-	if (!r->depth) {
-		if (!(r->flags & DIRFIRST))
-			r->fn(dirfd, name, &st, data, r);
-
-		while (r->hist) {
-			h = r->hist;
-			r->hist = r->hist->prev;
-			free(h);
+		} else if ((r->flags & SAMEDEV) && st.st_dev != r->dev) {
+			continue;
+		} else if (histadd(r, &st)) {
+			r->depth++;
+			r->fn(fd, d->d_name, &st, data, r);
+			r->depth--;
 		}
 	}
+	r->path[pathlen - 1] = '\0';
+	r->pathlen = pathlen - 1;
+	closedir(dp);
 }
diff --git a/libutil/rm.c b/libutil/rm.c
index 8d4be08..4891756 100644
--- a/libutil/rm.c
+++ b/libutil/rm.c
_AT_@ -15,7 +15,7 @@ void
 rm(int dirfd, const char *name, struct stat *st, void *data, struct recursor *r)
 {
 	if (!r->maxdepth && S_ISDIR(st->st_mode)) {
-		recurse(dirfd, name, NULL, r);
+		recursedir(dirfd, name, NULL, r);
 
 		if (unlinkat(dirfd, name, AT_REMOVEDIR) < 0) {
 			if (!(r->flags & SILENT))
diff --git a/tar.c b/tar.c
index b74c134..9937c96 100644
--- a/tar.c
+++ b/tar.c
_AT_@ -375,7 +375,7 @@ c(int dirfd, const char *name, struct stat *st, void *data, struct recursor *r)
 		puts(r->path);
 
 	if (S_ISDIR(st->st_mode))
-		recurse(dirfd, name, NULL, r);
+		recursedir(dirfd, name, NULL, r);
 }
 
 static void
_AT_@ -514,7 +514,7 @@ usage(void)
 int
 main(int argc, char *argv[])
 {
-	struct recursor r = { .fn = c, .follow = 'P', .flags = DIRFIRST };
+	struct recursor r = { .fn = c, .follow = 'P' };
 	struct stat st;
 	char *file = NULL, *dir = ".", mode = '\0';
 	int fd;
-- 
2.27.0
Received on Tue Jun 23 2020 - 11:30:51 CEST

This archive was generated by hypermail 2.3.0 : Tue Jun 23 2020 - 11:36:33 CEST