[hackers] [sbase][PATCH] grep -r: avoid loops

From: Mattias Andrée <maandree_AT_kth.se>
Date: Wed, 30 Mar 2016 18:51:56 +0200

Signed-off-by: Mattias Andrée <maandree_AT_kth.se>
---
 grep.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 62 insertions(+), 6 deletions(-)
diff --git a/grep.c b/grep.c
index fadd661..0f50cc2 100644
--- a/grep.c
+++ b/grep.c
_AT_@ -33,6 +33,16 @@ static int xflag;
 static int many;
 static int mode;
 
+struct fileid {
+	dev_t st_dev;
+	ino_t st_ino;
+};
+
+static struct fileid *visited;
+static size_t visited_sorted;
+static size_t visited_unsorted;
+static size_t visited_alloced;
+
 struct pattern {
 	char *pattern;
 	regex_t preg;
_AT_@ -178,6 +188,51 @@ isdir(const char *path)
 }
 
 static int
+fileidpcmp(const void *a_, const void *b_)
+{
+	const struct fileid *a = a_;
+	const struct fileid *b = b_;
+	if (a->st_ino != b->st_ino)
+		return a->st_ino < b->st_ino ? -1 : +1;
+	return a->st_dev < b->st_dev ? -1 : a->st_dev > b->st_dev;
+}
+
+static int
+visited_p(const char *path)
+{
+	struct stat st;
+	struct fileid id;
+	struct fileid *v = visited;
+	struct fileid *vend;
+	if (stat(path, &st))
+		return 0;
+	id.st_dev = st.st_dev;
+	id.st_ino = st.st_ino;
+	if (bsearch(&id, v, visited_sorted, sizeof(*visited), fileidpcmp))
+		return 1;
+	v += visited_sorted;
+	vend = v + visited_unsorted;
+	for (; v != vend; v++)
+		if (v->st_ino == id.st_ino && v->st_dev == id.st_dev)
+			return 1;
+	if (visited_sorted + visited_unsorted == visited_alloced) {
+		visited_alloced = visited_alloced ? visited_alloced * 2 : 1024;
+		visited = ereallocarray(visited, visited_alloced, sizeof(*visited));
+	}
+	if (!visited_sorted) {
+		visited[visited_sorted++] = id;
+	} else {
+		visited[visited_sorted + visited_unsorted++] = id;
+		if (visited_sorted == visited_unsorted) {
+			visited_sorted += visited_unsorted;
+			visited_unsorted = 0;
+			qsort(visited, visited_sorted, sizeof(*visited), fileidpcmp);
+		}
+	}
+	return 0;
+}
+
+static int
 grepdir(char *path)
 {
 	const char *path_proper;
_AT_@ -190,19 +245,23 @@ grepdir(char *path)
 	struct dir { char *path; TAILQ_ENTRY(dir) entry; } *elem;
 	TAILQ_HEAD(queue, dir) queue = TAILQ_HEAD_INITIALIZER(queue);
 
-	/* FIXME is there a portable way to avoid loops? */
-
 	elem = emalloc(sizeof(*elem));
 	elem->path = estrdup(path);
 	TAILQ_INSERT_TAIL(&queue, elem, entry);
 
 next:
+	if (TAILQ_EMPTY(&queue))
+		return match;
+
 	elem = TAILQ_FIRST(&queue);
 	path = elem->path;
 	TAILQ_REMOVE(&queue, elem, entry);
 	free(elem);
 
 	path_proper = *path ? path : ".";
+	if (visited_p(path_proper))
+		goto next;
+
 	if (!(dir = opendir(path_proper))) {
 		if (!sflag)
 			weprintf("opendir %s:", path_proper);
_AT_@ -252,10 +311,7 @@ next:
 	}
 	closedir(dir);
 
-	if (!TAILQ_EMPTY(&queue))
-		goto next;
-
-	return match;
+	goto next;
 }
 
 static void
-- 
2.7.4
Received on Wed Mar 30 2016 - 18:51:56 CEST

This archive was generated by hypermail 2.3.0 : Wed Mar 30 2016 - 19:00:20 CEST