[dev] [sbase][PATCH v2] diff

From: Mattias Andrée <maandree_AT_kth.se>
Date: Sat, 30 Jan 2016 16:13:46 +0100

New command. Should be POSIX-compliant.

Extensions to POSIX:

1) In directories, sockets are not compared.
    POSIX specifies that special devices and FIFO:s
    shall never be compared, and that for other types
    than these and regular files and directories, it
    is implementation-specified.

2) Output is coloured when stdout is a tty.
    This was added to make it easier to spot errors.
    Perhaps this should be removed, but I let it still
    just in case.

There is a comment in the code refering to a post on
the mailing list, for a diff algorithm that chould be
used to improve time and space complexity. However,
this algorithm does not produce a minimal list of
necessary changes, which POSIX specifies that it should
do. In GNU diff, the output is not minimal, even for
short file, unless -d (--minimal) is specified. Some
UNIX-like systems have bdiff that is able to compare
files too big for diff, I assume they produce minimal
output with diff, and use minimal complexity with bdiff.
---
 LICENSE  |   1 +
 Makefile |   1 +
 diff.c   | 873 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 875 insertions(+)
 create mode 100644 diff.c
diff --git a/LICENSE b/LICENSE
index cb5a797..2a26979 100644
--- a/LICENSE
+++ b/LICENSE
_AT_@ -59,3 +59,4 @@ Authors/contributors include:
 © 2015 Quentin Rameau <quinq_AT_quinq.eu.org>
 © 2015 Dionysis Grigoropoulos <info_AT_erethon.com>
 © 2015 Wolfgang Corcoran-Mathe <first.lord.of.teal_AT_gmail.com>
+© 2016 Mattias Andrée <maandree_AT_kth.se>
diff --git a/Makefile b/Makefile
index 1c09cac..74e071e 100644
--- a/Makefile
+++ b/Makefile
_AT_@ -89,6 +89,7 @@ BIN =\
 	cron\
 	cut\
 	date\
+	diff\
 	dirname\
 	du\
 	echo\
diff --git a/diff.c b/diff.c
new file mode 100644
index 0000000..3c99ae8
--- /dev/null
+++ b/diff.c
_AT_@ -0,0 +1,873 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <ctype.h>
+#include <time.h>
+#include <errno.h>
+#include <libgen.h>
+#include <dirent.h>
+#include <sys/stat.h>
+
+#include "arg.h"
+#include "util.h"
+
+/*
+ * Lines that only appear in file-1 are marked 1.
+ * Lines that only appear in file-2 are marked 2.
+ * Lines that appear in both files are marked 0.
+ */
+
+#define END_OF_PATH  127
+#define NO_LF_MARK   "\n\033[7m\\ No newline at end of file\033[27m"
+
+#undef EXIT_FAILURE
+#define EXIT_FAILURE  2
+
+#define emalloc(...)   enmalloc(EXIT_FAILURE, __VA_ARGS__)
+#define erealloc(...)  enrealloc(EXIT_FAILURE, __VA_ARGS__)
+#define eprintf(...)   enprintf(EXIT_FAILURE, __VA_ARGS__)
+#define eperror(...)   (perror(__VA_ARGS__), exit(EXIT_FAILURE))
+
+#define CLASSIFY(f)  (!(f) ? "directory" : (f)->is_empty ? "regular empty file" : "regular file")
+#define BOLD(...)    use_colour ? "\033[1m" : "", __VA_ARGS__, use_colour ? "\033[m" : ""
+
+struct file_data {
+	char **lines;
+	size_t line_count; /* used as length of `lines[0]` if `is_binary` */
+	int lf_terminated;
+	int is_binary;
+	int is_empty;
+	struct stat attr;
+	const char *path;
+};
+
+struct trace {
+	char f;
+	int ch;
+	size_t d;
+	size_t a_len;
+	size_t b_len;
+};
+
+struct chunk {
+	size_t ai;
+	size_t bi;
+	int have_a;
+	int have_b;
+	struct trace *chunk;
+};
+
+static int bflag = 0;
+static int cflag = 0;
+static int eflag = 0;
+static int fflag = 0;
+static int uflag = 0;
+static int rflag = 0;
+static int use_colour = 0;
+static size_t n_context = 0;
+
+static void
+usage(void)
+{
+	eprintf("usage: %s [-c | -C n | -e | -f | -u | -U n] [-br] file1 file2\n", argv0);
+}
+
+static struct file_data *
+load_lines(const char *pathname)
+{
+	int fd, bin = 0;
+	char *buffer;
+	char *p;
+	char *end;
+	size_t ptr, size, n;
+	ssize_t m;
+	struct file_data* rc;
+	struct stat attr;
+
+	p = strrchr(pathname, '/');
+	if (p && !p[1])
+		return 0;
+
+	fd = open(strcmp(pathname, "-") ? pathname : "/dev/stdin", O_RDONLY);
+	if (fd == -1) {
+		if (errno == EISDIR)
+			return 0;
+		eperror(pathname);
+	}
+
+	fstat(fd, &attr);
+	if (S_ISDIR(attr.st_mode))
+		return 0;
+
+	ptr = 0;
+	size = attr.st_blksize ? attr.st_blksize : 8096;
+	buffer = emalloc(size + 1);
+	for (;;) {
+		if (ptr == size)
+			buffer = erealloc(buffer, (size <<= 1) + 1);
+		m = read(fd, buffer + ptr, size - ptr);
+		if (m < 0)
+			eperror(pathname);
+		if (m == 0)
+			break;
+		ptr += (size_t)m;
+	}
+	buffer[ptr] = 0;
+
+	for (n = 1, p = buffer;; n += 1) {
+		char *lf = strchr(p, '\n');
+		if (!lf)
+			break;
+		p = lf + 1;
+	}
+	bin = (strchr(p, '\0') != buffer + ptr);
+
+	rc = erealloc(buffer, sizeof(*rc) + (n + 1) * sizeof(char *) + (ptr + 1 + sizeof(NO_LF_MARK)));
+	buffer = ((char *)rc) + sizeof(*rc) + (n + 1) * sizeof(char *);
+	memmove(buffer, rc, ptr);
+	rc->lines = (char **)((char *)rc + sizeof(*rc));
+	rc->lf_terminated = ptr && buffer[ptr - 1] == '\n';
+	rc->line_count = bin ? ptr : (n -= rc->lf_terminated);
+	buffer[ptr - rc->lf_terminated] = 0;
+	rc->attr = attr;
+	rc->path = pathname;
+	rc->is_binary = bin;
+	rc->is_empty = (ptr == 0);
+
+	close(fd);
+
+	rc->lines[bin ? n : 1] = 0;
+	if (bin) {
+		rc->lines[0] = buffer;
+	} else {
+		for (ptr = 0, p = buffer; p; p = end) {
+			end = strchr(p, '\n');
+			if (end)
+				*end++ = 0;
+			rc->lines[ptr++] = p;
+		}
+	}
+
+	return rc;
+}
+
+static char *
+rstrip(char *text, char *removed)
+{
+	char *end = strchr(text, '\0');
+	while ((end != text) && isspace(end[-1]))
+		end--;
+	*removed = *end;
+	*end = '\0';
+	return end;
+}
+
+static int
+strcmp_rstrip_a(char *a, char *b)
+{
+	static char *last_a = NULL;
+	static char *a_p = NULL;
+	static char a_pc = 0;
+	if (a != last_a) {
+		if (last_a)
+			*a_p = a_pc;
+		if (a)
+			a_p = rstrip(last_a = a, &a_pc);
+	}
+	return a ? strcmp(a, b) : 0;
+}
+
+/* TODO use <20160128154757.GA20170_AT_debian> when `an` is too large. */
+static char *
+diff2_(char **a, char **b, size_t an, size_t bn, int (*cmp)(char *, char *))
+{
+#define matrix (*matrix)
+#define map (*map)
+	char map[an + 1][bn + 1] = emalloc(sizeof(char[an + 1][bn + 1]));
+	size_t matrix[2][bn + 1] = ecalloc(1, sizeof(size_t[2][bn + 1]));
+	char *rc;
+	size_t ai, bi, ri = 0, mi = 0;
+
+	memset(map[0], 2, bn + 1);
+
+	a--, b--;
+	for (ai = 1; ai <= an; ai++) {
+		size_t *last = matrix[mi];
+		size_t *this = matrix[mi ^= 1];
+		map[ai][0] = 1;
+		for (bi = 1; bi <= bn; bi++) {
+			if (!cmp(a[ai], b[bi])) {
+				this[bi] = last[bi - 1] + 1;
+				map[ai][bi] = 0;
+			} else {
+				size_t u = last[bi];
+				size_t l = this[bi - 1];
+				this[bi] = l >= u ? l : u;
+				map[ai][bi] = 1 + (l >= u);
+			}
+		}
+	}
+#undef matrix
+	free(matrix);
+
+	rc = emalloc(an + bn + 1);
+	rc[ri++] = END_OF_PATH;
+	for (ai = an, bi = bn; ai + bi; ri++) {
+		rc[ri] = map[ai][bi];
+		ai -= rc[ri] != 2;
+		bi -= rc[ri] != 1;
+	}
+#undef map
+	free(map);
+
+	return rc + ri;
+}
+
+static struct trace *
+enhance_trace(char *path)
+{
+	char *p = path;
+	size_t len, a_len = 0, b_len = 0, i = 0, d = 0, a = 0, b = 0, j = 0;
+	int have_d = 0, ch = 0;
+	struct trace *rc;
+
+	while (*--p != END_OF_PATH);
+	len = (size_t)(path - p);
+	rc = ecalloc(len, sizeof(*rc));
+
+	/* Find distance from edits, and mark exchanges. (left-to-right) */
+	for (--len; i < len; i++) {
+		rc[i].f = *--path;
+		if (rc[i].f) {
+			d = 0, have_d = 1;
+			ch |= ch ? ch : (3 - rc[i].f);
+			if (rc[i].f == ch)
+				rc[i].ch = 1;
+		} else {
+			ch = 0;
+			rc[i].d = (have_d ? ++d : SIZE_MAX);
+		}
+	}
+	rc[i].f = END_OF_PATH;
+
+	/* Find distance from edits, mark exchanges, and get chunk lengths. (right-to-left) */
+	for (i = len, d = 0, ch = have_d = 0; i-- > 0;) {
+		rc[i].a_len = a_len += (rc[i].f != 2);
+		rc[i].b_len = b_len += (rc[i].f != 1);
+		if (rc[i].f) {
+			d = 0, have_d = 1;
+			ch |= ch ? ch : (3 - rc[i].f);
+			if (rc[i].f == ch)
+				rc[i].ch = 1;
+		} else {
+			ch = 0;
+			if (have_d && (d + 1) < rc[i].d)
+				rc[i].d = ++d;
+			if (rc[i].d > n_context)
+				a_len = b_len = 0;
+		}
+	}
+
+	/* Put removals before additions. */
+	for (i = 0; i < len; i++) {
+		if (rc[i].f == 0) {
+			while (a--)
+				rc[j++].f = 1;
+			while (b--)
+				rc[j++].f = 2;
+			j = i + 1, a = b = 0;
+		} else if (rc[i].f == 1) {
+			a++;
+		} else {
+			b++;
+		}
+	}
+	while (a--)
+		rc[j++].f = 1;
+	while (b--)
+		rc[j++].f = 2;
+
+	free(p);
+	return rc;
+}
+
+static struct trace *
+diff2(char **a, char **b, size_t an, size_t bn, int do_rstrip)
+{
+	size_t skip_start = 0, skip_end = 0;
+	char *rc;
+	int (*cmp)(char *, char *) = (int (*)(char *, char *))strcmp;
+	int transpose = bn < an; 
+
+	if (do_rstrip) {
+		char **lines;
+		char _c;
+		for (lines = !transpose ? b : a; *lines; lines++)
+			rstrip(*lines, &_c);
+		cmp = strcmp_rstrip_a;
+	}
+
+	/* Reduce problem set, by skiping identical head. */
+	for (skip_start = 0;; skip_start++) {
+		char *a_elem = a[skip_start];
+		char *b_elem = b[skip_start];
+		if (!a_elem || !b_elem || cmp(a_elem, b_elem))
+			break;
+	}
+	a += skip_start, an -= skip_start;
+	b += skip_start, bn -= skip_start;
+	/* Reduce problem set, by skiping identical tail. */
+	for (skip_end = 0; an && bn; an--, bn--, skip_end++)
+		if (cmp(a[an - 1], b[bn - 1]))
+			break;
+
+	rc = !transpose ? diff2_(a, b, an, bn, cmp) : diff2_(b, a, bn, an, cmp);
+	if (transpose) {
+		char *path;
+		char trace;
+		for (path = rc; (trace = *--path) != END_OF_PATH;)
+			if (trace)
+				*path = 3 - trace;
+	}
+
+	/* Add skipped part to the path. */
+	if (skip_start || skip_end) {
+		char *path = rc;
+		size_t path_len;
+		while (*--path != END_OF_PATH);
+		path_len = (size_t)(rc - path);
+		path = erealloc(path, skip_end + path_len + skip_start);
+		if (skip_end) {
+			memmove(path + skip_end + 1, path + 1, path_len - 1);
+			memset(path + 1, 0, skip_end);
+		}
+		memset(path + skip_end + path_len, 0, skip_start);
+		rc = path + skip_end + path_len + skip_start;
+	}
+
+	return enhance_trace(rc);
+}
+
+static char *
+get_time_string(const struct stat *attr)
+{
+	static char buf[sizeof("0000-00-00 00:00:00.000000000 +0000")];
+	struct tm *tm;
+
+	tm = localtime(&(attr->st_mtime));
+	if (tm == NULL)
+		eperror("localtime");
+
+#ifdef st_mtime
+	strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S.000000000 %z", tm);
+	sprintf(buf + (sizeof("0000-00-00 00:00:00.") - 1), "%09lu", attr->st_mtim.tv_nsec);
+	buf[sizeof("0000-00-00 00:00:00.") - 1 + 9] = ' ';
+#else
+	strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S %z", tm);
+#endif
+	return buf;
+}
+
+static int
+get_diff_chunks(struct trace *path, size_t an, size_t bn, struct chunk **head, struct chunk **tail)
+{
+#define head  (*head)
+#define tail  (*tail)
+	struct trace trace;
+	size_t ai, bi;
+	int ret = 0, suppressed = 1, have_a = 0, have_b = 0;
+
+	head = ecalloc(an + bn + 1, sizeof(*head));
+	tail = head++;
+
+	for (ai = bi = 0; (trace = *path++).f != END_OF_PATH;) {
+		if (trace.d > n_context) {
+			suppressed = 1;
+			if (head->chunk) {
+				head->have_a = have_a;
+				head->have_b = have_b;
+				head++;
+			}
+			have_a = have_b = 0;
+			goto next;
+		}
+		if (suppressed) {
+			head->ai = ai;
+			head->bi = bi;
+			head->chunk = path - 1;
+		}
+		have_a |= trace.f == 1;
+		have_b |= trace.f == 2;
+		suppressed = 0;
+	next:
+		ret |= trace.f != 0;
+		ai += trace.f != 2;
+		bi += trace.f != 1;
+	}
+	if (head->chunk) {
+		head->have_a = have_a;
+		head->have_b = have_b;
+		head++;
+	}
+
+	return ret;
+#undef head
+#undef tail
+}
+
+#define OUTPUT_BEGIN\
+	struct trace *path;\
+	size_t ai, bi;\
+	int ret = 0, have_a = 0, have_b = 0;\
+	struct trace *chunk;\
+	struct trace *chunk_old;\
+	struct chunk *head;\
+	struct chunk *tail;\
+	char **a = old->lines;\
+	char **b = new->lines\
+
+#define OUTPUT_HEAD(A, B)\
+	printf("%s"A" %s\t%s%s\n", BOLD(old->path, get_time_string(&(old->attr))));\
+	printf("%s"B" %s\t%s%s\n", BOLD(new->path, get_time_string(&(new->attr))))
+
+#define OUTPUT_QUEUE\
+	path = diff2(a, b, old->line_count, new->line_count, bflag);\
+	ret = get_diff_chunks(path, old->line_count, new->line_count, &head, &tail);\
+	(void) chunk_old;\
+	for (head = tail;;) {\
+		head++;\
+		ai = head->ai;\
+		bi = head->bi;\
+		have_a = head->have_a;\
+		have_b = head->have_b;\
+		chunk = head->chunk;\
+		if (!chunk)\
+			break
+
+#define OUTPUT_STACK\
+	path = diff2(a, b, old->line_count, new->line_count, bflag);\
+	ret = get_diff_chunks(path, old->line_count, new->line_count, &head, &tail);\
+	(void) chunk_old;\
+	for (;;) {\
+		head--;\
+		ai = head->ai;\
+		bi = head->bi;\
+		have_a = head->have_a;\
+		have_b = head->have_b;\
+		chunk = head->chunk;\
+		if (!chunk)\
+			break
+
+#define OUTPUT_END\
+	}\
+	free(tail);\
+	free(path);\
+	return ret
+
+static int
+output_unified(struct file_data *old, struct file_data *new)
+{
+	struct trace *path;
+	struct trace *path_;
+	struct trace trace;
+	size_t ai, bi;
+	char **a;
+	char **b;
+	int ret = 0;
+	int suppressed = 1;
+
+	path = diff2(old->lines, new->lines, old->line_count, new->line_count, bflag);
+	path_ = path;
+
+	OUTPUT_HEAD("---", "+++");
+
+	a = old->lines, b = new->lines;
+	for (ai = bi = 0; (trace = *path++).f != END_OF_PATH;) {
+		char f = trace.f;
+		if (trace.d > n_context) {
+			suppressed = 1;
+			goto next;
+		}
+		if (suppressed) {
+			suppressed = 0;
+			printf("%s_AT_@ -%zu", use_colour ? "\033[36m" : "", ai + 1 - !trace.a_len);
+			if (trace.a_len != 1)
+				printf(",%zu", trace.a_len);
+			printf(" +%zu", bi + 1 - !trace.b_len);
+			if (trace.b_len != 1)
+				printf(",%zu", trace.b_len);
+			printf(" _AT_@%s\n",
+			       use_colour ? "\033[m" : "");
+		}
+		if (f == 0)
+			printf(" %s\n", a[ai]);
+		else if (use_colour)
+			printf("\033[3%im%c%s\033[m\n", f, " -+"[(int)f], f == 1 ? a[ai] : b[bi]);
+		else
+			printf("%c%s\n", " -+"[(int)f], f == 1 ? a[ai] : b[bi]);
+	next:
+		ret |= f != 0;
+		ai += f != 2;
+		bi += f != 1;
+	}
+
+	free(path_);
+	return ret;
+}
+
+static int
+output_copied(struct file_data *old, struct file_data *new)
+{
+	OUTPUT_BEGIN;
+	OUTPUT_HEAD("***", "---");
+	OUTPUT_QUEUE;
+#define PRINT_PART(L, C, S, A)\
+	printf("%s"A" %zu", use_colour ? "\033[1;3"#C"m" : "", L##i + 1 - (!have_##L));\
+	if (chunk->L##_len > 1)\
+		printf(",%zu", L##i + chunk->L##_len);\
+	printf(" "A"%s\n", use_colour ? "\033[m" : "");\
+	for (; have_##L && chunk->f != END_OF_PATH && chunk->d <= n_context; chunk++) {\
+		if (chunk->f == 0)\
+			printf("  %s\n", L[L##i]);\
+		else if (chunk->f == (3 - C));\
+		else if (use_colour)\
+			printf("\033[3%im%c %s\033[m\n", chunk->ch ? 3 : C, S"!"[chunk->ch], L[L##i]);\
+		else\
+			printf("%c %s\n", S"!"[chunk->ch], L[L##i]);\
+		L##i += chunk->f != (3 - C);\
+	}
+
+	printf("%s\n", use_colour ? "\033[36m***************\033[m" : "***************");
+	chunk_old = chunk;
+	PRINT_PART(a, 1, "-", "***");
+	chunk = chunk_old;
+	PRINT_PART(b, 2, "+", "---");
+#undef PRINT_PART
+	OUTPUT_END;
+}
+
+static int
+output_default(struct file_data *old, struct file_data *new)
+{
+	OUTPUT_BEGIN;
+	OUTPUT_QUEUE;
+#define PRINT_PART(L, C, S)\
+	for (; have_##L && chunk->f != END_OF_PATH && chunk->d <= n_context; chunk++) {\
+		if (chunk->f == 0)\
+			printf("  %s\n", L[L##i]);\
+		else if (chunk->f == (3 - C));\
+		else if (use_colour)\
+			printf("\033[3"#C"m"S" %s\033[m\n", L[L##i]);\
+		else\
+			printf(S" %s\n", L[L##i]);\
+		L##i += chunk->f != (3 - C);\
+	}
+
+	printf("%s%zu", use_colour ? "\033[36m" : "", ai + 1 - (!have_a));
+	if (chunk->a_len > 1)
+		printf(",%zu", ai + chunk->a_len);
+	printf("%c", " dac"[have_a + 2 * have_b]);
+	printf("%zu", bi + 1 - (!have_b));
+	if (chunk->b_len > 1)
+		printf(",%zu", bi + chunk->b_len);
+	printf("%s\n", use_colour ? "\033[m" : "");
+
+	chunk_old = chunk;
+	PRINT_PART(a, 1, "<");
+	if (have_a && have_b)
+		printf("%s\n", use_colour ? "\033[36m---\033[m" : "---");
+	chunk = chunk_old;
+	PRINT_PART(b, 2, ">");
+#undef PRINT_PART
+	OUTPUT_END;
+}
+
+static int
+output_ed(struct file_data *old, struct file_data *new)
+{
+	OUTPUT_BEGIN;
+	OUTPUT_STACK;
+	if (!have_b) {
+		printf("%zud\n", ai + 1);
+	} else {
+		int have_dot = 0;
+		printf("%zu", ai + 1 - (!have_a));
+		if (chunk->a_len > 1)
+			printf(",%zu", ai + chunk->a_len);
+		printf("%c\n", "ac"[chunk->ch]);
+		for (; chunk->f != END_OF_PATH && chunk->d <= n_context; chunk++) {
+			if (chunk->f == 1);
+			else if (use_colour)
+				printf("\033[3%im%s%s\033[m\n", chunk->ch ? 3 : 2,
+				       b[bi][0] == '.' ? "." : "", b[bi]);
+			else
+				printf("%s%s\n",
+				       b[bi][0] == '.' ? "." : "", b[bi]);
+			have_dot = (chunk->f == 2 && b[bi][0] == '.');
+			if (have_dot)
+				printf(".\ns/.//\na\n");
+			bi += chunk->f != 1;
+		}
+		if (!have_dot)
+			printf(".\n");
+	}
+	OUTPUT_END;
+}
+
+static int
+output_ed_alternative(struct file_data *old, struct file_data *new)
+{
+	OUTPUT_BEGIN;
+	OUTPUT_QUEUE;
+	if (!have_b) {
+		printf("d%zu\n", ai + 1);
+	} else {
+		printf("%c%zu", "ac"[chunk->ch], ai + 1 - (!have_a));
+		if (chunk->a_len > 1)
+			printf(" %zu", ai + chunk->a_len);
+		printf("\n");
+		for (; chunk->f != END_OF_PATH && chunk->d <= n_context; chunk++) {
+			if (chunk->f == 1);
+			else if (use_colour)
+				printf("\033[3%im%s\033[m\n", chunk->ch ? 3 : 2, b[bi]);
+			else
+				printf("%s\n", b[bi]);
+			bi += chunk->f != 1;
+		}
+		printf(".\n");
+	}
+	OUTPUT_END;
+}
+
+static int
+do_binaries_differ(struct file_data *old, struct file_data *new)
+{
+#define TURN_INTO_BINARY(f)\
+	if (!f->is_binary) {\
+		char **lines = f->lines;\
+		size_t len = 0, part_len;\
+		for (; *lines; lines++) {\
+			len += 1 + (part_len = strlen(*lines));\
+			(*lines)[part_len] = '\n';\
+		}\
+		f->line_count = len - !f->lf_terminated;\
+	}
+
+	TURN_INTO_BINARY(old);
+	TURN_INTO_BINARY(new);
+
+	if (old->line_count != new->line_count)
+		return 1;
+
+	return memcmp(old->lines[0], new->lines[0], old->line_count);
+}
+
+static int
+compare_files(struct file_data *old, struct file_data *new)
+{
+	int ret;
+
+	if (old->is_binary || new->is_binary) {
+		if (do_binaries_differ(old, new)) {
+			printf("Binary files %s and %s differ\n", old->path, new->path);
+			ret = 2;
+		}
+		return ret;
+	}
+
+	if (!(eflag || fflag)) {
+		if (!old->lf_terminated)
+			strcpy(strchr(old->lines[old->line_count - 1], '\0'), NO_LF_MARK);
+		if (!new->lf_terminated)
+			strcpy(strchr(new->lines[new->line_count - 1], '\0'), NO_LF_MARK);
+	}
+
+	ret = (uflag ? output_unified :
+	       cflag ? output_copied :
+	       eflag ? output_ed :
+	       fflag ? output_ed_alternative :
+	               output_default)(old, new);
+
+	if (eflag || fflag) {
+		if (!old->lf_terminated)
+			fprintf(stderr, "%s: %s: No newline at end of file\n\n", argv0, old->path);
+		if (!new->lf_terminated)
+			fprintf(stderr, "%s: %s: No newline at end of file\n\n", argv0, new->path);
+		ret = (!old->lf_terminated || !new->lf_terminated) ? 2 : ret;
+	}
+
+	return ret;
+}
+
+static int
+compare_directories(const char *old, const char *new, const char *diff_line)
+{
+#define GET_FILENAME(buf, i)\
+	(buf = emalloc(strlen(paths[i]) + strlen(file->d_name) + 2),\
+	 stpcpy(stpcpy(stpcpy(buf, paths[i]), "/"), file->d_name))
+
+	int ret = 0, r, i = 0, j = 1;
+	DIR *dir;
+	const char *paths[2] = { old, new };
+	struct dirent *file;
+	struct file_data *a;
+	struct file_data *b;
+	char *b_path;
+	char *a_path;
+	struct stat a_attr;
+	struct stat b_attr;
+
+again:
+	dir = opendir(paths[i]);
+	if (!dir)
+		eperror(paths[i]);
+	while ((errno = 0, file = readdir(dir))) {
+		if (!strcmp(file->d_name, ".") || !strcmp(file->d_name, ".."))
+			continue;
+		GET_FILENAME(b_path, j);
+		if (access(b_path, F_OK)) {
+			printf("%sOnly i %s: %s%s\n", BOLD(paths[i], file->d_name));
+			ret = ret > 1 ? ret : 1;
+			goto next;
+		} else if (i == 1) {
+			goto next;
+		}
+		GET_FILENAME(a_path, i);
+
+		if (stat(a_path, &a_attr))
+			eperror(a_path);
+		if (stat(b_path, &b_attr))
+			eperror(a_path);
+
+		if (a_attr.st_dev == b_attr.st_dev && a_attr.st_ino == b_attr.st_ino)
+			goto skip;
+		/* POSIX specifies that if a and b refer to the same special device,
+		 * there should be no comparision. This seems unnecessary since it
+		 * also specifies that special devices and FIFO:s shall not be compared.
+		 * We extend this to not compare sockets either. POSIX says that it
+		 * is implementation-specified for other types than special files,
+		 * FIFO:s, regular files and directories. */
+#define IS_INCOMMENSURABLE(mode)  (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || S_ISSOCK(mode))
+		if (IS_INCOMMENSURABLE(a_attr.st_mode) || IS_INCOMMENSURABLE(b_attr.st_mode))
+			goto skip;
+
+		a = load_lines(a_path);
+		b = load_lines(b_path);
+
+		if (!a ^ !b) {
+			printf("%sFile %s is a %s while file %s is a %s%s\n",
+			       BOLD(a_path, CLASSIFY(a), b_path, CLASSIFY(b)));
+		ret = ret > 1 ? ret : 1;
+		} else if (!a && !b && !rflag) {
+			printf("%sCommon subdirectories: %s and %s%s\n", BOLD(a_path, b_path));
+			ret = ret > 1 ? ret : 1;
+		} else if (!a && !b) {
+			r = compare_directories(a_path, b_path, diff_line);
+			ret = ret > r ? ret : r;
+		} else {
+			printf("%s%s %s %s%s\n", BOLD(diff_line, a_path, b_path));
+			r = compare_files(a, b);
+			ret = ret > r ? ret : r;
+		}
+
+		free(a);
+		free(b);
+	skip:
+		free(a_path);
+	next:
+		free(b_path);
+	}
+	if (errno)
+		eperror("readdir");
+	closedir(dir);
+
+
+	if (i)
+		return ret;
+	i = 1, j = 0;
+	goto again;
+}
+
+int
+main(int argc, char *argv[])
+{
+	struct file_data *old;
+	struct file_data *new;
+	char *old_proper = 0;
+	char *new_proper = 0;
+	int ret;
+	char *diff_line = 0;
+	char *p;
+
+	/* Construct the 'diff OPTIONS FILE-1 FILE-2' line used diff:ing directories. */
+	if (argc > 2) {
+		size_t len = 0;
+		int i;
+		p = strrchr(argv[0], '/');
+		if (p)
+			argv[0] = p + 1;
+		for (i = 0; i < argc - 2; i++)
+			len += strlen(argv[i]) + 1;
+		p = diff_line = emalloc(len + 1);
+		for (i = 0; i < argc - 2; i++)
+			p = stpcpy(stpcpy(p, argv[i]), " ");
+		p[-1] = 0;
+	}
+
+	ARGBEGIN {
+	case 'b':  bflag++;  break;
+	case 'c':  cflag++;  n_context = 3;                     break;
+	case 'C':  cflag++;  n_context = atol(EARGF(usage()));  break;
+	case 'e':  eflag++;  break;
+	case 'f':  fflag++;  break;
+	case 'u':  uflag++;  n_context = 3;                     break;
+	case 'U':  uflag++;  n_context = atol(EARGF(usage()));  break;
+	case 'r':  rflag++;  break;
+	default:
+		usage();
+	} ARGEND;
+	/* Use of `atol` is intentional, '-U -1' and '-C -1' shall display the entire file. */
+
+	if (argc != 2 || (bflag | rflag) > 1 || cflag + eflag + fflag + uflag > 1)
+		usage();
+
+	use_colour = isatty(STDOUT_FILENO);
+
+redo:
+	old = load_lines(old_proper ? old_proper : argv[0]);
+	new = load_lines(new_proper ? new_proper : argv[1]);
+
+	if ((old_proper || new_proper) && (!old || !new)) {
+		printf("%sFile %s is a %s while file %s is a %s%s\n",
+		       BOLD(old_proper ? old_proper : argv[0], CLASSIFY(old),
+		            new_proper ? new_proper : argv[1], CLASSIFY(new)));
+		ret = 1;
+	} else if (!old && new) {
+		old_proper = emalloc(strlen(argv[0]) + strlen(argv[1]) + 2);
+		stpcpy(stpcpy(stpcpy(old_proper, argv[0]), "/"), basename(argv[1]));
+		goto redo;
+	} else if (old && !new) {
+		old_proper = emalloc(strlen(argv[0]) + strlen(argv[1]) + 2);
+		stpcpy(stpcpy(stpcpy(old_proper, argv[0]), "/"), basename(argv[1]));
+		goto redo;
+	} else if (!old && !new) {
+		ret = compare_directories(argv[0], argv[1], diff_line);
+	} else {
+		ret = compare_files(old, new);
+	}
+
+done:
+	if (fshut(stdout, "<stdout>"))
+		ret = EXIT_FAILURE;
+
+	free(old);
+	free(new);
+	free(old_proper);
+	free(new_proper);
+	free(diff_line);
+	return ret;
+}
-- 
2.7.0
Received on Sat Jan 30 2016 - 16:13:46 CET

This archive was generated by hypermail 2.3.0 : Sat Jan 30 2016 - 16:24:14 CET