[hackers] [sbase][PATCH v2] grep: add -r

From: Mattias Andrée <maandree_AT_kth.se>
Date: Mon, 4 Apr 2016 20:32:58 +0200

Unlike your usual grep -r, this implementation
uses breadth-first search. It usually finds
makes it find what you are looking for faster.

Signed-off-by: Mattias Andrée <maandree_AT_kth.se>
---
 fs.h              | 17 ++++++++++--
 grep.1            | 10 ++++---
 grep.c            | 78 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 libutil/recurse.c | 51 +++++++++++++++++++++++++++++++++---
 4 files changed, 138 insertions(+), 18 deletions(-)
diff --git a/fs.h b/fs.h
index 15ae5f4..6e0a4fc 100644
--- a/fs.h
+++ b/fs.h
_AT_@ -2,25 +2,36 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
+#include "queue.h"
+
 struct history {
 	struct history *prev;
 	dev_t dev;
 	ino_t ino;
 };
 
+struct pendingrecurse {
+	char *path;
+	void *data;
+	int depth;
+	TAILQ_ENTRY(pendingrecurse) entry;
+};
+
 struct recursor {
-	void (*fn)(const char *, struct stat *st, void *, struct recursor *);
+	void (*fn)(const char *, struct stat *, void *, struct recursor *);
 	struct history *hist;
 	int depth;
 	int maxdepth;
 	int follow;
 	int flags;
+	TAILQ_HEAD(pending, pendingrecurse) pending;
 };
 
 enum {
 	SAMEDEV  = 1 << 0,
 	DIRFIRST = 1 << 1,
 	SILENT   = 1 << 2,
+	BFS      = 1 << 3,
 };
 
 extern int cp_aflag;
_AT_@ -38,6 +49,8 @@ extern int rm_status;
 extern int recurse_status;
 
 void recurse(const char *, void *, struct recursor *);
+void recurselater(const char *, void *, struct recursor *);
+void recursenow(struct recursor *);
 
 int cp(const char *, const char *, int);
-void rm(const char *, struct stat *st, void *, struct recursor *);
+void rm(const char *, struct stat *, void *, struct recursor *);
diff --git a/grep.1 b/grep.1
index 6f80175..90bcc1f 100644
--- a/grep.1
+++ b/grep.1
_AT_@ -1,4 +1,4 @@
-.Dd 2015-10-08
+.Dd 2016-03-30
 .Dt GREP 1
 .Os sbase
 .Sh NAME
_AT_@ -6,7 +6,7 @@
 .Nd search files for patterns
 .Sh SYNOPSIS
 .Nm
-.Op Fl EFHchilnqsvx
+.Op Fl EFHchilnqrsvx
 .Op Fl e Ar pattern
 .Op Fl f Ar file
 .Op Ar pattern
_AT_@ -55,6 +55,10 @@ Print only the names of files with matching lines.
 Prefix each matching line with its line number in the input.
 .It Fl q
 Print nothing, only return status.
+.It Fl r
+Search directories recursively. If no
+.Ar file
+has been specified, the current working directory is searched.
 .It Fl s
 Suppress the error messages ordinarily written for nonexistent or unreadable
 files.
_AT_@ -89,5 +93,5 @@ utility is compliant with the
 specification.
 .Pp
 The
-.Op Fl Hhw
+.Op Fl Hhrw
 flags are an extension to that specification.
diff --git a/grep.c b/grep.c
index 64ffbe2..0d2a8a6 100644
--- a/grep.c
+++ b/grep.c
_AT_@ -1,10 +1,15 @@
 /* See LICENSE file for copyright and license details. */
+#include <sys/stat.h>
+
+#include <dirent.h>
+#include <errno.h>
 #include <regex.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
 
+#include "fs.h"
 #include "queue.h"
 #include "util.h"
 
_AT_@ -21,12 +26,14 @@ static int eflag;
 static int fflag;
 static int hflag;
 static int iflag;
+static int rflag;
 static int sflag;
 static int vflag;
 static int wflag;
 static int xflag;
 static int many;
 static int mode;
+static int implicitcwd;
 
 struct pattern {
 	char *pattern;
_AT_@ -163,22 +170,60 @@ end:
 	return match;
 }
 
+static int
+isdir(const char *path)
+{
+	struct stat st;
+	if (stat(path, &st))
+		return 0;
+	return S_ISDIR(st.st_mode);
+}
+
+static void
+grepdir(const char *path, struct stat *st, void *data, struct recursor *r)
+{
+	int m, *match = data;
+	FILE *fp;
+	if (implicitcwd) {
+		if (path[0] == '.' && path[1] == '/')
+			path += 2;
+	}
+	if (S_ISDIR(st->st_mode)) {
+		recurselater(path, data, r);
+		return;
+	}
+	if (!(fp = fopen(path, "r"))) {
+		if (!sflag)
+			weprintf("fopen %s:", *path);
+		*match = Error;
+		return;
+	}
+	m = grep(fp, path);
+	if (m == Error || (*match != Error && m == Match))
+		*match = m;
+	if (fshut(fp, path))
+		*match = Error;
+}
+
 static void
 usage(void)
 {
-	enprintf(Error, "usage: %s [-EFHchilnqsvwx] [-e pattern] [-f file] "
+	enprintf(Error, "usage: %s [-EFHchilnqrsvwx] [-e pattern] [-f file] "
 	         "[pattern] [file ...]\n", argv0);
 }
 
 int
 main(int argc, char *argv[])
 {
+	struct recursor r = { .fn = grepdir, .hist = NULL, .depth = 0, .maxdepth = 0,
+	                      .follow = 'H', .flags = BFS };
 	struct pattern *pnode;
 	int m, flags = REG_NOSUB, match = NoMatch;
 	FILE *fp;
 	char *arg;
 
 	SLIST_INIT(&phead);
+	TAILQ_INIT(&r.pending);
 
 	ARGBEGIN {
 	case 'E':
_AT_@ -226,8 +271,12 @@ main(int argc, char *argv[])
 		flags |= REG_ICASE;
 		iflag = 1;
 		break;
+	case 'r':
+		rflag = 1;
+		break;
 	case 's':
 		sflag = 1;
+		r.flags |= SILENT;
 		break;
 	case 'v':
 		vflag = 1;
_AT_@ -259,29 +308,40 @@ main(int argc, char *argv[])
 		/* Compile regex for all search patterns */
 		SLIST_FOREACH(pnode, &phead, entry)
 			enregcomp(Error, &pnode->preg, pnode->pattern, flags);
-	many = (argc > 1);
-	if (argc == 0) {
+	many = (argc > 1) || rflag;
+	if (argc == 0 && !rflag) {
 		match = grep(stdin, "<stdin>");
+	} else if (argc == 0 && rflag) {
+		implicitcwd = 1;
+		recurse(".", &match, &r);
 	} else {
-		for (; *argv; argc--, argv++) {
+		for (; argc; argc--, argv++) {
 			if (!strcmp(*argv, "-")) {
 				*argv = "<stdin>";
 				fp = stdin;
+			} else if (rflag && isdir(*argv)) {
+				fp = 0;
 			} else if (!(fp = fopen(*argv, "r"))) {
 				if (!sflag)
 					weprintf("fopen %s:", *argv);
 				match = Error;
 				continue;
 			}
-			m = grep(fp, *argv);
-			if (m == Error || (match != Error && m == Match))
-				match = m;
-			if (fp != stdin && fshut(fp, *argv))
+			if (fp) {
+				m = grep(fp, *argv);
+				if (m == Error || (match != Error && m == Match))
+					match = m;
+			} else {
+				recurse(*argv, &match, &r);
+			}
+			if (fp && fp != stdin && fshut(fp, *argv))
 				match = Error;
 		}
 	}
 
-	if (fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>"))
+	recursenow(&r);
+
+	if (fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>") | recurse_status)
 		match = Error;
 
 	return match;
diff --git a/libutil/recurse.c b/libutil/recurse.c
index e2b8a6e..bed4ed3 100644
--- a/libutil/recurse.c
+++ b/libutil/recurse.c
_AT_@ -96,12 +96,55 @@ recurse(const char *path, void *data, struct recursor *r)
 		if (!(r->flags & DIRFIRST))
 			(r->fn)(path, &st, data, r);
 
-		for (; r->hist; ) {
-			h = r->hist;
-			r->hist = r->hist->prev;
-			free(h);
+		if (!(r->flags & BFS)) {
+			while (r->hist) {
+				h = r->hist;
+				r->hist = r->hist->prev;
+				free(h);
+			}
 		}
 	}
 
 	closedir(dp);
 }
+
+void
+recurselater(const char *path, void *data, struct recursor *r)
+{
+	struct pendingrecurse *elem = malloc(sizeof(*elem));
+	if (!elem && !(r->flags & SILENT)) {
+		weprintf("malloc:");
+		recurse_status = 1;
+		return;
+	}
+	elem->path = estrdup(path);
+	elem->data = data;
+	elem->depth = r->depth;
+	TAILQ_INSERT_TAIL(&r->pending, elem, entry);
+}
+
+void
+recursenow(struct recursor *r)
+{
+	struct pendingrecurse *elem;
+	char *path;
+	void *data;
+	struct history *h;
+
+	while (!TAILQ_EMPTY(&r->pending)) {
+		elem = TAILQ_FIRST(&r->pending);
+		path = elem->path;
+		data = elem->data;
+		r->depth = elem->depth;
+		TAILQ_REMOVE(&r->pending, elem, entry);
+		free(elem);
+		recurse(path, data, r);
+		free(path);
+	}
+
+	while (r->hist) {
+		h = r->hist;
+		r->hist = r->hist->prev;
+		free(h);
+	}
+}
-- 
2.8.0
Received on Mon Apr 04 2016 - 20:32:58 CEST

This archive was generated by hypermail 2.3.0 : Mon Apr 04 2016 - 20:36:16 CEST