---
Makefile | 12 +-
README | 2 +
TODO | 1 -
bdiff.1 | 84 ++++
diff.1 | 107 ++++
diff.c | 1397 ++++++++++++++++++++++++++++++++++++++++++++++++++++
libutil/ncprintf.c | 50 ++
util.h | 1 +
8 files changed, 1649 insertions(+), 5 deletions(-)
create mode 100644 bdiff.1
create mode 100644 diff.1
create mode 100644 diff.c
create mode 100644 libutil/ncprintf.c
diff --git a/Makefile b/Makefile
index 754f4d2..2ebf48a 100644
--- a/Makefile
+++ b/Makefile
_AT_@ -56,6 +56,7 @@ LIBUTILSRC =\
libutil/md5.c\
libutil/mkdirp.c\
libutil/mode.c\
+ libutil/ncprintf.c\
libutil/parseoffset.c\
libutil/putword.c\
libutil/reallocarray.c\
_AT_@ -89,6 +90,7 @@ BIN =\
cron\
cut\
date\
+ diff\
dirname\
du\
echo\
_AT_@ -169,7 +171,7 @@ LIBUTFOBJ = $(LIBUTFSRC:.c=.o)
LIBUTILOBJ = $(LIBUTILSRC:.c=.o)
OBJ = $(BIN:=.o) $(LIBUTFOBJ) $(LIBUTILOBJ)
SRC = $(BIN:=.c)
-MAN = $(BIN:=.1)
+MAN = $(BIN:=.1) bdiff.1
all: $(BIN)
_AT_@ -199,7 +201,7 @@ confstr_l.h limits_l.h sysconf_l.h pathconf_l.h: getconf.sh
install: all
mkdir -p $(DESTDIR)$(PREFIX)/bin
cp -f $(BIN) $(DESTDIR)$(PREFIX)/bin
- cd $(DESTDIR)$(PREFIX)/bin && ln -f test [ && chmod 755 $(BIN)
+ cd $(DESTDIR)$(PREFIX)/bin && ln -f test [ && ln -f diff bdiff && chmod 755 $(BIN)
mv -f $(DESTDIR)$(PREFIX)/bin/xinstall $(DESTDIR)$(PREFIX)/bin/install
mkdir -p $(DESTDIR)$(MANPREFIX)/man1
for m in $(MAN); do sed "s/^\.Os sbase/.Os sbase $(VERSION)/g" < "$$m" > $(DESTDIR)$(MANPREFIX)/man1/"$$m"; done
_AT_@ -207,8 +209,8 @@ install: all
mv -f $(DESTDIR)$(MANPREFIX)/man1/xinstall.1 $(DESTDIR)$(MANPREFIX)/man1/install.1
uninstall:
- cd $(DESTDIR)$(PREFIX)/bin && rm -f $(BIN) [ install
- cd $(DESTDIR)$(MANPREFIX)/man1 && rm -f $(MAN)
+ cd $(DESTDIR)$(PREFIX)/bin && rm -f $(BIN) [ install bdiff
+ cd $(DESTDIR)$(MANPREFIX)/man1 && rm -f $(MAN) install.1
dist: clean
mkdir -p sbase-$(VERSION)
_AT_@ -231,6 +233,7 @@ sbase-box: $(LIB) $(SRC)
echo 'int main(int argc, char *argv[]) { char *s = basename(argv[0]);' >> build/$_AT_.c
echo 'if(!strcmp(s,"sbase-box")) { argc--; argv++; s = basename(argv[0]); } if(0) ;' >> build/$_AT_.c
echo "else if (!strcmp(s, \"install\")) return xinstall_main(argc, argv);" >> build/$_AT_.c
+ echo "else if (!strcmp(s, \"bdiff\")) return diff_main(argc, argv);" >> build/$_AT_.c
echo "else if (!strcmp(s, \"[\")) return test_main(argc, argv);" >> build/$_AT_.c
for f in $(SRC); do echo "else if(!strcmp(s, \"$${f%.c}\")) return $${f%.c}_main(argc, argv);"; done >> build/$_AT_.c
echo 'else { fputs("[ ", stdout);' >> build/$_AT_.c
_AT_@ -245,6 +248,7 @@ sbase-box-install: sbase-box
chmod 755 $(DESTDIR)$(PREFIX)/bin/sbase-box
for f in $(BIN); do ln -sf sbase-box $(DESTDIR)$(PREFIX)/bin/"$$f"; done
ln -sf sbase-box $(DESTDIR)$(PREFIX)/bin/[
+ ln -sf sbase-box $(DESTDIR)$(PREFIX)/bin/bdiff
mv -f $(DESTDIR)$(PREFIX)/bin/xinstall $(DESTDIR)$(PREFIX)/bin/install
mkdir -p $(DESTDIR)$(MANPREFIX)/man1
for m in $(MAN); do sed "s/^\.Os sbase/.Os sbase $(VERSION)/g" < "$$m" > $(DESTDIR)$(MANPREFIX)/man1/"$$m"; done
diff --git a/README b/README
index cddf485..4f246cf 100644
--- a/README
+++ b/README
_AT_@ -13,6 +13,7 @@ The following tools are implemented:
UTILITY MISSING
------- -------
=*|o basename .
+=* x bdiff .
=*|o cal .
=*|o cat .
=*|o chgrp .
_AT_@ -27,6 +28,7 @@ The following tools are implemented:
=*|x cron .
#*|o cut .
=*|o date .
+=* o diff .
=*|o dirname .
=*|o du .
=*|o echo .
diff --git a/TODO b/TODO
index fc0730d..3d5474f 100644
--- a/TODO
+++ b/TODO
_AT_@ -6,7 +6,6 @@ sbase such as vi and sh are also not listed here.
at
awk
bc
-diff
ed manpage
patch
stty
diff --git a/bdiff.1 b/bdiff.1
new file mode 100644
index 0000000..1e114d6
--- /dev/null
+++ b/bdiff.1
_AT_@ -0,0 +1,84 @@
+.Dd 2016-02-21
+.Dt BDIFF 1
+.Os sbase
+.Sh NAME
+.Nm bdiff
+.Nd compare two big files
+.Sh SYNOPSIS
+.Nm
+.Op Fl c | C Ar n | Fl e | f | u | U Ar n
+.Op Fl bdDr
+.Ar file1 file2
+.Sh DESCRIPTION
+.Nm
+compares the contents of two files and prints
+the set of changes necessary to modify
+.Ar file1
+into
+.Ar file2 .
+.Pp
+Unlike
+.Xr diff 1 ,
+.Nm
+is designed to be able to compare huge files
+and files with vast about of differences. This
+comes at the cost of the result seldom being
+minimal.
+.Sh OPTIONS
+.Bl -tag -width Ds
+.It Fl b
+Ignore changes in trailing whitespace.
+.It Fl c
+Produce output with three lines of copied context.
+.It Fl C Ar n
+Produce output with
+.Ar n
+lines of copied context. If
+.Ar n
+is -1, use the entire file for context.
+.It Fl d
+Ignored. Exists for symmetry with
+.Xr diff 1 .
+.It Fl D
+Produce colored output if stdout, is a terminal.
+Always produce colored output if used twice or more.
+.It Fl e
+Produce output suitable as input to
+.Xr ed 1 .
+.It Fl f
+Produce output similar to the format of
+.Fl e ,
+but intended for reading by a human.
+.It Fl r
+Apply
+.Nm
+recursively to subdirectories of
+.Ar file1
+and
+.Ar file2 .
+.It Fl u
+Produce output with three lines of unified context.
+.It Fl U Ar n
+Produce output with
+.Ar n
+lines of unified context. If
+.Ar n
+is -1, use the entire file for context.
+.El
+.Sh EXIT STATUS
+.Bl -tag -width Ds
+.It 0
+The files are identical.
+.It 1
+The files are different.
+.It 3
+Binary files are different.
+.It > 3
+An error occurred.
+.El
+.Sh SEE ALSO
+.Xr cmp 1 ,
+.Xr comm 1 ,
+.Xr diff 1 ,
+.Xr ed 1 ,
+.Xr patch 1
diff --git a/diff.1 b/diff.1
new file mode 100644
index 0000000..1898c84
--- /dev/null
+++ b/diff.1
_AT_@ -0,0 +1,107 @@
+.Dd 2016-02-18
+.Dt DIFF 1
+.Os sbase
+.Sh NAME
+.Nm diff
+.Nd compare two files
+.Sh SYNOPSIS
+.Nm
+.Op Fl c | C Ar n | Fl e | f | u | U Ar n
+.Op Fl bdDr
+.Ar file1 file2
+.Sh DESCRIPTION
+.Nm
+compares the contents of two files and prints
+the minimal set of changes necessary to modify
+.Pp
+If
+.Nm
+detemines that it is too difficult to calculate
+the minimal set of changes fast enough and without
+usage too much memory,
+.Nm
+will fall back to behave like
+.Xr bdiff 1 ,
+unless
+.Fl d
+is used, and the exit status will not be 2
+instead of 1.
+.Ar file1
+into
+.Ar file2 .
+.Sh OPTIONS
+.Bl -tag -width Ds
+.It Fl b
+Ignore changes in trailing whitespace.
+.It Fl c
+Produce output with three lines of copied context.
+.It Fl C Ar n
+Produce output with
+.Ar n
+lines of copied context. If
+.Ar n
+is -1, use the entire file for context.
+.It Fl d
+Do not fall back to behave like
+.Xr bdiff 1 ,
+even if its appears to be impossible to calculate
+the minimal set of changes.
+.It Fl D
+Produce colored output if stdout, is a terminal.
+Always produce colored output if used twice or more.
+.It Fl e
+Produce output suitable as input to
+.Xr ed 1 .
+.It Fl f
+Produce output similar to the format of
+.Fl e ,
+but intended for reading by a human.
+.It Fl r
+Apply
+.Nm
+recursively to subdirectories of
+.Ar file1
+and
+.Ar file2 .
+.It Fl u
+Produce output with three lines of unified context.
+.It Fl U Ar n
+Produce output with
+.Ar n
+lines of unified context. If
+.Ar n
+is -1, use the entire file for context.
+.El
+.Sh EXIT STATUS
+.Bl -tag -width Ds
+.It 0
+The files are identical.
+.It 1
+The files are different, output is minimal.
+.It 2
+The files are different, output is not necessarily minimal.
+.It 3
+Binary files are different.
+.It > 3
+An error occurred.
+.El
+.Sh SEE ALSO
+.Xr bdiff 1 ,
+.Xr cmp 1 ,
+.Xr comm 1 ,
+.Xr ed 1 ,
+.Xr patch 1
+.Sh STANDARDS
+The
+.Nm
+utility is compliant with the
+.St -p1003.1-2013
+specification, except exit status 1 has been split to
+exit status 1, 2, and 3, and exit status > 1 has been
+shifted to > 3, and the output is only minimal if
+determined to be feasible to calculate with generally
+exceptable time and memory usage.
+.Pp
+The
+.Op Fl dD
+flags is an extension to that specification.
diff --git a/diff.c b/diff.c
new file mode 100644
index 0000000..f425f10
--- /dev/null
+++ b/diff.c
_AT_@ -0,0 +1,1397 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <ctype.h>
+#include <time.h>
+#include <errno.h>
+#include <libgen.h>
+#include <dirent.h>
+#include <sys/stat.h>
+
+#include "arg.h"
+#include "util.h"
+
+/*
+ * Lines that only appear in file1 are marked 1.
+ * Lines that only appear in file2 are marked 2.
+ * Lines that appear in both files are marked 0.
+ */
+
+/*
+ * Excluded optimisations.
+ *
+ * POSIX mandates that the output change set is minimal
+ * (of course, this is not always possibly.) It does
+ * however not mandate that the total output is minimal,
+ * and I see no compelling reason to minimise anything
+ * but the change set. There the size of the hunks are
+ * not minimalised, which could be done by shifting
+ * lumps of changes without the same hunk.
+ *
+ * After compressing the data set, it is possible to
+ * divide and “conquer” the problem. Find any lump
+ * that is at least as large as the sum of all other
+ * lumps. Solver the problem recursively on both sides
+ * of that lump. Ther the base case is not solved
+ * directly, but independently.
+ */
+
+#define END_OF_PATH 127
+#define NO_LF_MARK "\n\\ No newline at end of file"
+#define COLOURED_NO_LF_MARK "\n\033[7m\\ No newline at end of file\033[27m"
+
+#undef EXIT_FAILURE
+#define EXIT_FILES_DIFFER 1
+#define EXIT_NOT_MINIMAL 2
+#define EXIT_BINARIES_DIFFER 3
+#define EXIT_FAILURE 4
+
+#define emalloc(...) enmalloc(EXIT_FAILURE, __VA_ARGS__)
+#define erealloc(...) enrealloc(EXIT_FAILURE, __VA_ARGS__)
+#define eprintf(...) enprintf(EXIT_FAILURE, __VA_ARGS__)
+#define eperror(...) (perror(__VA_ARGS__), exit(EXIT_FAILURE))
+
+#define line_eq(a, b) ((a)->hash == (b)->hash && !strcmp((a)->line, (b)->line))
+#define sort(base, nel, cmp) qsort(base, nel, sizeof(*base), cmp)
+
+
+struct file_data {
+ char **lines;
+ size_t line_count; /* used as length of `lines[0]` if `is_binary` */
+ unsigned lf_terminated : 1;
+ unsigned is_binary : 1;
+ unsigned is_empty : 1;
+ struct stat attr;
+ const char *path;
+};
+
+struct trace {
+ char f;
+ int ch;
+ size_t d;
+ size_t a_len;
+ size_t b_len;
+};
+
+struct chunk {
+ size_t ai;
+ size_t bi;
+ unsigned have_a : 1;
+ unsigned have_b : 1;
+ struct trace *chunk;
+};
+
+struct line {
+ char *line;
+ char *end;
+ char old_end_char;
+ unsigned uncommon : 1;
+ size_t hash;
+ size_t compression;
+ size_t number;
+};
+
+
+static int bflag = 0;
+static int cflag = 0;
+static int eflag = 0;
+static int fflag = 0;
+static int uflag = 0;
+static int rflag = 0;
+static size_t n_context = 0;
+
+/* Minimal output or cheap comparsion? */
+static int bdiff = 0;
+static int dflag = 0;
+static int cheap_algorithm_used = 0;
+
+static int use_colour = 0;
+static int (*cprintf)(const char *, ...);
+
+
+static void
+usage(void)
+{
+ eprintf("usage: %s [-c | -C n | -e | -f | -u | -U n] [-bdDr] file1 file2\n", argv0);
+}
+
+static int
+is_minimal_feasible(size_t a, size_t b)
+{
+ size_t max = a > b ? a : b;
+
+ if (max > (SIZE_MAX / max) / 2)
+ return 0;
+
+ /* Keep in mind, this is not the size of the files, but rather the size of the
+ * set that is compared after reduction and compression. It is important to
+ * remember that the user may want to run diff one a large set of files, not
+ * just one file. */
+ return (a * b) < 1000UL * 1000UL;
+}
+
+#ifdef DEBUG
+# include <time.h>
+static void
+measure_time(const char *label)
+{
+ static clock_t clocks[10];
+ static int i = 0, j = 0;
+ clock_t duration;
+ double seconds;
+ if (!label) {
+ clocks[i++] = clock();
+ } else {
+ duration = clock() - clocks[--i];
+ seconds = duration;
+ seconds /= CLOCKS_PER_SEC;
+ fprintf(stderr, "\033[1;3%imTIME MEASUREMENT [%*s%s]: %lu clocks = %lf seconds\033[m\n",
+ (j ^= 1) ? 1 : 5, 2 * i, "", label, (unsigned long)duration, seconds);
+ }
+}
+#else
+# define measure_time(x) ((void)0)
+#endif
+
+
+/* Functions for diff:ing directories. { */
+
+static char *
+join_paths(const char *a, const char *b)
+{
+ char *rc = emalloc(strlen(a) + strlen(b) + sizeof("/"));
+ sprintf(rc, "%s/%s", a, b);
+ return rc;
+}
+
+static const char *
+classify(struct file_data *f)
+{
+ return !f ? "directory" : f->is_empty ? "regular empty file" : "regular file";
+}
+
+static int
+is_incommensurable(mode_t mode)
+{
+ /* POSIX specifies that if a and b refer to the same special device,
+ * there should be no comparision. This seems unnecessary since it
+ * also specifies that special devices and FIFO:s shall not be compared.
+ * We extend this to not compare sockets either. POSIX says that it
+ * is implementation-specified for other types than special files,
+ * FIFO:s, regular files and directories. */
+ return S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || S_ISSOCK(mode);
+}
+
+/* } */
+
+
+/* Comparsion functions for lines and pointers to lines. { */
+
+#define LPCMP(NAME, CMPS)\
+ static int NAME(const void *a_, const void *b_)\
+ { const struct line *const *ap = a_, *const *bp = b_; CMPS; return 0; }
+#define LCMP(NAME, CMPS)\
+ static int NAME(const void *a_, const void *b_)\
+ { const struct line *a = a_, *b = b_; CMPS; return 0; }
+#define DO(...) do { __VA_ARGS__ } while (0)
+#define LCMP_INT_(ATTR) DO(if (a->ATTR != b->ATTR) return (a->ATTR > b->ATTR) * 2 - 1;)
+#define LCMP_LINE DO(int cmp = strcmp(a->line, b->line); if (cmp) return cmp;)
+#define LCMP_HASH LCMP_INT_(hash)
+#define LCMP_NO LCMP_INT_(number)
+
+#define a (*ap)
+#define b (*bp)
+LPCMP(linepcmp_hash_line_stable, LCMP_HASH; LCMP_LINE; LCMP_NO)
+#undef a
+#undef b
+
+LCMP(linecmp_hash_line_stable, LCMP_HASH; LCMP_LINE; LCMP_NO)
+LCMP(linecmp_hash_line, LCMP_HASH; LCMP_LINE)
+
+/* } */
+
+
+/* Functions for supporting -b. { */
+
+static char *
+rstrip(char *text, char *removed)
+{
+ char *end = strchr(text, '\0');
+ while ((end != text) && isspace(*--end));
+ *removed = *end, *end = '\0';
+ return end;
+}
+
+static void
+rstrip_lines(struct line *lines, size_t n)
+{
+ size_t i;
+ for (i = 0; bflag && i < n; i++)
+ lines[i].end = rstrip(lines[i].line, &(lines[i].old_end_char));
+}
+
+static void
+unrstrip_lines(struct line *lines, size_t n)
+{
+ size_t i;
+ for (i = 0; bflag && i < n; i++)
+ *(lines[i].end) = lines[i].old_end_char;
+}
+
+/* } */
+
+
+/* Auxiliary functions used by the input reducers and the differs. { */
+
+static void
+hash_lines(struct line *lines, size_t n)
+{
+ size_t hash;
+ char *str;
+ while (n--) {
+ for (hash = 0, str = lines[n].line; *str; str++)
+ hash = 31 * hash + (size_t)*str;
+ lines[n].hash = hash;
+ }
+}
+
+static void
+retain_longest_increasing_subsequence(size_t *map, size_t n)
+{
+ size_t best_length = 0, low, high, mid, p, i;
+ ssize_t j;
+
+ /* Map from i to largest j such that j < i, where j
+ * is an included index in the same subsequence. */
+ size_t *previous = emalloc(n * sizeof(*previous));
+ /* Map from a length of a subsequence to the last
+ * included index in such subsequence. */
+ size_t *end_of = emalloc((n + 1) * sizeof(*end_of));
+
+ /* SIZE_MAX marks unset. We do this so that
+ * the last part of the algorithm terminates. */
+ memset(end_of, ~0, (n + 1) * sizeof(*end_of));
+
+ for (i = 0; i < n; i++) {
+ /* Find longest subsequence that can be extended. */
+ low = 1;
+ high = best_length;
+ while (low <= high) {
+ mid = (low + high + 1) / 2; /* Cannot overflow in our case. */
+ if (map[end_of[mid]] < map[i])
+ low = mid + 1;
+ else
+ high = mid - 1;
+ }
+
+ /* Extend that subsequence. */
+ previous[i] = end_of[low - 1];
+ end_of[low] = i;
+
+ /* Get the length of the longest, yet known, subsequence. */
+ if (low > best_length)
+ best_length = low;
+ }
+
+ /* Erase elements that where not selected, so that only
+ * the longest increasing subsequence is present. */
+ p = end_of[best_length];
+ j = (ssize_t)n - 1;
+ while (p < SIZE_MAX) {
+ while ((ssize_t)p < j)
+ map[j--] = SIZE_MAX;
+ j--;
+ p = previous[p];
+ }
+ while (j >= 0)
+ map[j--] = SIZE_MAX;
+
+ free(previous);
+ free(end_of);
+}
+
+/* } */
+
+
+/* Diff algorithm for large input. { */
+
+/* This algorithm runs in O(n log n) rather than O(n²) time, and
+ * O(n) rather than O(n²) space, and is used in the event that O(n²)
+ * is deemed too expensive. The output is far from optimal, but
+ * it does not matter too much, the program reduces the problem
+ * enough that this will probably not be used too often. */
+static char *
+diff2_cheap(struct line *a_, struct line *b_, size_t an, size_t bn)
+{
+ /*
+ * The idea behind the algorithm.
+ *
+ * For lines that appear in both files, create a map from line numbers in the old file to
+ * line numbers in the new file. For duplicate lines that do not appear the same number of
+ * times in both files, do not care too much.
+ *
+ * The map is not ordered, as find the longest increasing subsequence (there are gaps in
+ * subsequences) and erase the rest from the map.
+ *
+ * Removes lines from the old files are indicated by missing entries in the map
+ * (marked with SIZE_MAX in this implementation) and added lines can be inferred
+ * from gaps in the map.
+ */
+
+ size_t ai, bi, ri;
+ struct line *a = emalloc(an * sizeof(*a));
+ struct line *b = emalloc(bn * sizeof(*b));
+ size_t *map = emalloc(an * sizeof(*map));
+ char *rc;
+
+ memcpy(a, a_, an * sizeof(*a));
+ memcpy(b, b_, bn * sizeof(*b));
+
+ /* Remember the original position _and_ make stable sort possible. */
+ for (ai = 0; ai < an; ai++) a[ai].number = ai;
+ for (bi = 0; bi < bn; bi++) b[bi].number = bi;
+
+ /* Perform stable total ordering (not really proper sort) of line content. */
+ sort(a, an, linecmp_hash_line_stable);
+ sort(b, bn, linecmp_hash_line_stable);
+
+ /* Get new line numbers, and mark removed lines with SIZE_MAX. */
+ memset(map, ~0, an * sizeof(*map));
+ for (ai = bi = 0; ai < an && bi < bn;) {
+ int cmp = linecmp_hash_line(a + ai, b + bi);
+ if (!cmp)
+ map[a[ai].number] = b[bi].number;
+ ai += (cmp <= 0);
+ bi += (cmp >= 0);
+ }
+ free(a);
+ free(b);
+
+ /* Find the longest increasing subsequence of map. The map is not monotonic
+ * unless we do this. This is done in O(n log n), just like sorting, so we
+ * get a better result (compare to selecting naïvely) without any significant
+ * performance penalty. */
+ retain_longest_increasing_subsequence(map, an);
+
+ /* Create change set from removed lines and gaps in new line numbers. */
+ rc = emalloc(an + bn + 1);
+ ri = an + bn;
+ for (ai = bi = 0; ai < an; ai++) {
+ if (map[ai] == SIZE_MAX) {
+ rc[ri--] = 1;
+ } else {
+ while (map[ai] > bi++)
+ rc[ri--] = 2;
+ rc[ri--] = 0;
+ }
+ }
+ free(map);
+ while (bi++ < bn)
+ rc[ri--] = 2;
+ rc[ri] = END_OF_PATH;
+ memmove(rc, rc + ri, an + bn - ri + 1);
+
+ return rc + an + bn - ri + 1;
+}
+
+/* } */
+
+
+/* Diff algorithm for minimal output. { */
+
+static char *
+diff2_minimal(struct line *a, struct line *b, size_t an, size_t bn)
+{
+ /*
+ * This function calculates the minimal edit set for getting from `a`
+ * to `b`. But rather than using an algorithm that calculates the
+ * smallest changes set, this function uses an algorithm for calculating
+ * the longest common (non-consecutive) subsequence, which is an identical
+ * (with the allowed change operations) except for the value returned
+ * at the end, which is not inspected anyways. This algorithms has the
+ * advantages of requiring simpler initialisation.
+ */
+
+ char (*map)[an + 1][bn + 1] = emalloc(sizeof(char[an + 1][bn + 1]));
+ size_t (*matrix)[2][bn + 1] = ecalloc(1, sizeof(size_t[2][bn + 1]));
+ char *rc;
+ size_t ai, bi, ri = 0, mi = 0;
+
+ memset((*map)[0], 2, bn + 1);
+
+ a--, b--;
+ for (ai = 1; ai <= an; ai++) {
+ size_t *last = (*matrix)[mi];
+ size_t *this = (*matrix)[mi ^= 1];
+ (*map)[ai][0] = 1;
+ for (bi = 1; bi <= bn; bi++) {
+ size_t u = last[bi];
+ size_t d = last[bi - 1] + 1;
+ size_t l = this[bi - 1];
+ size_t lu = l >= u ? l : u;
+ if (line_eq(a + ai, b + bi) && d > lu) {
+ /* The > in this comparison makes changes happen as late
+ * as possible. >= would make the changes happen as early
+ * as possible. */
+ this[bi] = d;
+ (*map)[ai][bi] = 0;
+ } else {
+ this[bi] = lu;
+ (*map)[ai][bi] = 1 + (l >= u);
+ }
+ }
+ }
+ free(matrix);
+
+ rc = emalloc(an + bn + 1);
+ rc[ri++] = END_OF_PATH;
+ for (ai = an, bi = bn; ai + bi; ri++) {
+ rc[ri] = (*map)[ai][bi];
+ ai -= rc[ri] != 2;
+ bi -= rc[ri] != 1;
+ }
+ free(map);
+
+ return rc + ri;
+}
+
+static char *
+diff2_minimal_maybe_transpose(struct line *a, struct line *b, size_t an, size_t bn)
+{
+ /*
+ * The minimise memory usage in diff2_minimal, swap the files if it helps.
+ */
+
+ int transpose = bn < an;
+ char trace, *path, *rc = !transpose ? diff2_minimal(a, b, an, bn) : diff2_minimal(b, a, bn, an);
+ for (path = rc; transpose && (trace = *--path) != END_OF_PATH;)
+ if (trace) *path = 3 - trace;
+ return rc;
+}
+
+/* } */
+
+
+/* Result enhancements, better output and more details for the output functions. { */
+
+static struct trace *
+enhance_path(char *path)
+{
+ char *p = path;
+ size_t len, a_len = 0, b_len = 0, i = 0, d = 0, a = 0, b = 0, j = 0;
+ int have_d = 0, ch = 0, differs = 0;
+ struct trace *rc;
+
+ while (*--p != END_OF_PATH);
+ len = (size_t)(path - p);
+ rc = ecalloc(len, sizeof(*rc));
+
+ /* Find distance from edits, and mark exchanges. (left-to-right) */
+ for (--len; i < len; i++) {
+ rc[i].f = *--path;
+ if (rc[i].f) {
+ d = 0, have_d = 1;
+ ch |= ch ? ch : (3 - rc[i].f);
+ if (rc[i].f == ch)
+ rc[i].ch = 1;
+ } else {
+ ch = 0;
+ rc[i].d = (have_d ? ++d : SIZE_MAX);
+ }
+ }
+ rc[i].f = END_OF_PATH;
+
+ /* Find distance from edits, mark exchanges, and get chunk lengths. (right-to-left) */
+ for (i = len, d = 0, ch = have_d = 0; i-- > 0;) {
+ rc[i].a_len = a_len += (rc[i].f != 2);
+ rc[i].b_len = b_len += (rc[i].f != 1);
+ if (rc[i].f) {
+ d = 0, have_d = 1;
+ ch |= ch ? ch : (3 - rc[i].f);
+ if (rc[i].f == ch)
+ rc[i].ch = 1;
+ } else {
+ ch = 0;
+ if (have_d && (d + 1) < rc[i].d)
+ rc[i].d = ++d;
+ if (rc[i].d > n_context)
+ a_len = b_len = 0;
+ }
+ }
+
+ /* Put removals before additions. */
+ for (i = 0; i < len; i++) {
+ if (rc[i].f == 0) {
+ while (a--) rc[j++].f = 1;
+ while (b--) rc[j++].f = 2;
+ j = i + 1, a = b = 0;
+ } else if (rc[i].f == 1) {
+ a++, differs = 1;
+ } else {
+ b++, differs = 1;
+ }
+ }
+ while (a--) rc[j++].f = 1;
+ while (b--) rc[j++].f = 2;
+
+ free(p);
+ if (!differs) {
+ free(rc);
+ return 0;
+ }
+ return rc;
+}
+
+/* } */
+
+
+/* When you cannot reduce the time or space complexity, reduce the input. { */
+
+/*
+ * Anything that can be executed in O(n log n) time and space is fine.
+ * If it is above that, it is probably not a good idea. Anything that
+ * is above o(n²) time or space is pointless.
+ */
+
+static void
+reduce_problem(struct line **ap, struct line **bp, size_t *anp, size_t *bnp, size_t *start, size_t *end)
+{
+ size_t skip_start = 0, skip_end = 0, an = *anp, bn = *bnp;
+ struct line *a = *ap, *b = *bp;
+
+ /* Reduce problem set, by skipping identical head. */
+ for (skip_start = 0; skip_start < an && skip_start < bn; skip_start++)
+ if (strcmp(a[skip_start].line, b[skip_start].line))
+ break;
+
+ a += skip_start, an -= skip_start;
+ b += skip_start, bn -= skip_start;
+
+ /* Reduce problem set, by skipping identical tail. */
+ for (skip_end = 0; an && bn; an--, bn--, skip_end++)
+ if (strcmp(a[an - 1].line, b[bn - 1].line))
+ break;
+
+ *ap = a, *anp = an, *start = skip_start;
+ *bp = b, *bnp = bn, *end = skip_end;
+}
+
+static void
+unreduce_problem(char **pathp, struct line **ap, struct line **bp, size_t skip_start, size_t skip_end)
+{
+ char *path = *pathp;
+ size_t path_len;
+
+ if (!skip_start && !skip_end)
+ return;
+
+ while (*--path != END_OF_PATH);
+ path_len = (size_t)(*pathp - path);
+ path = erealloc(path, skip_end + path_len + skip_start);
+ if (skip_end) {
+ memmove(path + skip_end + 1, path + 1, path_len - 1);
+ memset(path + 1, 0, skip_end);
+ }
+ memset(path + skip_end + path_len, 0, skip_start);
+ *pathp = path + skip_end + path_len + skip_start;
+
+ *ap -= skip_start;
+ *bp -= skip_start;
+}
+
+
+static void
+compress_problem_1(struct line *a, struct line *b, size_t an, size_t bn,
+ struct line **acp, struct line **bcp, size_t *acnp, size_t *bcnp)
+{
+ /*
+ * The idea behind the algorithm.
+ *
+ * For lines that appear in both files the same number of times, create a
+ * map from line numbers in the old file to line numbers in the new file.
+ *
+ * The map is not ordered, as find the longest increasing subsequence
+ * (there are gaps in subsequences) and erase the rest from the map.
+ *
+ * Inspect the map for gaps in the old line numbers and the new line numbers.
+ * The gaps can be lumped together, but non-unique lines have to divide the
+ * lumps. Parts between gaps can be lumped together too.
+ */
+
+ size_t ai, bi, ao, bo;
+ struct line **a_sorted = emalloc(an * sizeof(*a_sorted));
+ struct line **b_sorted = emalloc(bn * sizeof(*b_sorted));
+ struct line *ac, *bc;
+ size_t *map = emalloc(an * sizeof(*map));
+
+ for (ai = 0; ai < an; ai++) (a_sorted[ai] = a + ai)->number = ai;
+ for (bi = 0; bi < bn; bi++) (b_sorted[bi] = b + bi)->number = bi;
+
+ sort(a_sorted, an, linepcmp_hash_line_stable);
+ sort(b_sorted, bn, linepcmp_hash_line_stable);
+
+ memset(map, ~0, an * sizeof(*map));
+ for (ai = bi = 0; ai < an && bi < bn;) {
+ int cmp = linecmp_hash_line(a_sorted[ai], b_sorted[bi]);
+ if (cmp < 0) {
+ a_sorted[ai++]->uncommon = 1;
+ } else if (cmp > 0) {
+ b_sorted[bi++]->uncommon = 1;
+ } else {
+ ao = ai, bo = bi;
+ while (++ai < an && line_eq(a_sorted[ai], a_sorted[ai - 1]));
+ while (++bi < bn && line_eq(b_sorted[bi], b_sorted[bi - 1]));
+ if (ai - ao == bi - bo)
+ while (ao < ai)
+ map[a_sorted[ao++]->number] = b_sorted[bo++]->number;
+ }
+ }
+ while (ai < an) a_sorted[ai++]->uncommon = 1;
+ while (bi < bn) b_sorted[bi++]->uncommon = 1;
+
+ free(a_sorted);
+ free(b_sorted);
+
+ retain_longest_increasing_subsequence(map, an);
+
+ for (ai = bi = ao = bo = 0; ai < an; ai++) {
+ if (map[ai] == SIZE_MAX) {
+ if (ai > 0 && a[ai - 1].uncommon && a[ai].uncommon) {
+ a[ao].compression += 1 + a[ai].compression;
+ } else if (a[ai].uncommon) {
+ ao = ai;
+ }
+ } else if (ai > 0 && map[ai] > 0 && map[ai - 1] == map[ai] - 1) {
+ a[ao].compression += 1 + a[ai].compression;
+ b[bo].compression += 1 + b[bi].compression;
+ bi++;
+ } else {
+ for (bo = SIZE_MAX; bi < map[ai]; bi++)
+ if (!b[bi].uncommon)
+ bo = SIZE_MAX;
+ else if (bo == SIZE_MAX)
+ bo = bi;
+ else
+ b[bo].compression += 1 + b[bi].compression;
+ ao = ai, bo = bi;
+ }
+ }
+ for (bo = SIZE_MAX; bi < bn; bi++)
+ if (!b[bi].uncommon)
+ bo = SIZE_MAX;
+ else if (bo == SIZE_MAX)
+ bo = bi;
+ else
+ b[bo].compression += 1 + b[bi].compression;
+
+ free(map);
+
+ *acp = ac = emalloc(an * sizeof(*ac));
+ *bcp = bc = emalloc(bn * sizeof(*bc));
+
+ for (ai = ao = 0; ai < an; ai++, ao++) ai += (ac[ao] = a[ai]).compression;
+ for (bi = bo = 0; bi < bn; bi++, bo++) bi += (bc[bo] = b[bi]).compression;
+
+ *acnp = ao;
+ *bcnp = bo;
+}
+
+static void
+compress_problem(struct line *a, struct line *b, size_t an, size_t bn,
+ struct line **acp, struct line **bcp, size_t *acnp, size_t *bcnp)
+{
+ if (bdiff) {
+ *acp = a, *acnp = an;
+ *bcp = b, *bcnp = bn;
+ return;
+ }
+
+ compress_problem_1(a, b, an, bn, acp, bcp, acnp, bcnp);
+}
+
+static void
+decompress_problem(size_t an, size_t bn, struct line *a_compressed, struct line *b_compressed, char **pathp)
+{
+ char *path = *pathp;
+ char *new_path;
+ char trace;
+ size_t ai = 0, bi = 0, pi = an + bn;
+
+ if (bdiff)
+ return;
+
+ new_path = emalloc(an + bn + 1);
+
+ while ((trace = *--path) != END_OF_PATH) {
+ size_t dups = (trace == 1 ? (a_compressed + ai) : (b_compressed + bi))->compression;
+ do
+ new_path[pi--] = trace;
+ while (dups-- > 0);
+ ai += trace != 2;
+ bi += trace != 1;
+ }
+ new_path[pi] = END_OF_PATH;
+ memmove(new_path, new_path + pi, an + bn - pi + 1);
+
+ *pathp = new_path + an + bn - pi + 1;
+
+ free(path);
+ free(a_compressed);
+ free(b_compressed);
+}
+
+/* } */
+
+
+/* diff implementation for files. { */
+
+static struct trace *
+diff2(char **a, char **b, size_t an, size_t bn)
+{
+ size_t skip_start, skip_end, i, an_compressed, bn_compressed;
+ char *path;
+ struct line *a_compressed, *b_compressed;
+ struct line *a_lines = ecalloc(an, sizeof(*a_lines));
+ struct line *b_lines = ecalloc(bn, sizeof(*b_lines));
+ char *(*algorithm)(struct line *, struct line *, size_t, size_t);
+
+ measure_time(0);
+
+ for (i = 0; i < an; i++) a_lines[i].line = a[i];
+ for (i = 0; i < bn; i++) b_lines[i].line = b[i];
+
+ rstrip_lines(a_lines, an);
+ rstrip_lines(b_lines, bn);
+
+ reduce_problem(&a_lines, &b_lines, &an, &bn, &skip_start, &skip_end);
+
+ hash_lines(a_lines, an);
+ hash_lines(b_lines, bn);
+
+ measure_time(0);
+ compress_problem(a_lines, b_lines, an, bn, &a_compressed, &b_compressed, &an_compressed, &bn_compressed);
+ measure_time("compress_problem");
+
+ if (bdiff) {
+ algorithm = diff2_cheap;
+ } else if (dflag || is_minimal_feasible(an_compressed, bn_compressed)) {
+ algorithm = diff2_minimal_maybe_transpose;
+ } else {
+ algorithm = diff2_cheap;
+ cheap_algorithm_used = 1;
+ }
+ measure_time(0);
+ path = algorithm(a_compressed, b_compressed, an_compressed, bn_compressed);
+ measure_time("diff");
+
+ decompress_problem(an, bn, a_compressed, b_compressed, &path);
+
+ unreduce_problem(&path, &a_lines, &b_lines, skip_start, skip_end);
+
+ unrstrip_lines(a_lines, an);
+ unrstrip_lines(b_lines, bn);
+
+ measure_time("diff2");
+
+ free(a_lines);
+ free(b_lines);
+ return enhance_path(path);
+}
+
+/* } */
+
+
+/* Functions for producing diff output. { */
+
+static char *
+get_time_string(const struct stat *attr)
+{
+ static char buf[sizeof("0000-00-00 00:00:00.000000000 +0000")];
+ struct tm *tm;
+
+ tm = localtime(&(attr->st_mtime));
+ if (tm == NULL)
+ eperror("localtime");
+
+#ifdef st_mtime
+ strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S.000000000 %z", tm);
+ sprintf(buf + (sizeof("0000-00-00 00:00:00.") - 1), "%09lu", attr->st_mtim.tv_nsec);
+ buf[sizeof("0000-00-00 00:00:00.") - 1 + 9] = ' ';
+#else
+ strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S %z", tm);
+#endif
+ return buf;
+}
+
+static void
+get_diff_chunks(struct trace *path, size_t an, size_t bn, struct chunk **headp, struct chunk **tailp)
+{
+ struct trace trace;
+ size_t ai, bi;
+ int suppressed = 1, have_a = 0, have_b = 0;
+ struct chunk *head = *headp;
+
+ head = ecalloc(an + bn + 1, sizeof(*head));
+ *tailp = head++;
+
+ for (ai = bi = 0; (trace = *path++).f != END_OF_PATH;) {
+ if (trace.d > n_context) {
+ suppressed = 1;
+ if (head->chunk) {
+ head->have_a = have_a;
+ head->have_b = have_b;
+ head++;
+ }
+ have_a = have_b = 0;
+ goto next;
+ }
+ if (suppressed) {
+ head->ai = ai;
+ head->bi = bi;
+ head->chunk = path - 1;
+ }
+ have_a |= trace.f == 1;
+ have_b |= trace.f == 2;
+ suppressed = 0;
+ next:
+ ai += trace.f != 2;
+ bi += trace.f != 1;
+ }
+ if (head->chunk) {
+ head->have_a = have_a;
+ head->have_b = have_b;
+ head++;
+ }
+
+ *headp = head;
+}
+
+#define OUTPUT_BEGIN\
+ struct trace *path;\
+ size_t ai, bi;\
+ int have_a = 0, have_b = 0;\
+ struct trace *chunk;\
+ struct trace *chunk_old;\
+ struct chunk *head;\
+ struct chunk *tail;\
+ char **a = old->lines;\
+ char **b = new->lines\
+
+#define OUTPUT_DIFF\
+ path = diff2(old->lines, new->lines, old->line_count, new->line_count);\
+ if (!path)\
+ return 0
+
+#define OUTPUT_HEAD(A, B)\
+ cprintf("\033[1m"A" %s\033[21m\t%s\033[0m\n", old->path, get_time_string(&(old->attr)));\
+ cprintf("\033[1m"B" %s\033[21m\t%s\033[0m\n", new->path, get_time_string(&(new->attr)))
+
+#define OUTPUT_QUEUE\
+ get_diff_chunks(path, old->line_count, new->line_count, &head, &tail);\
+ (void) chunk_old, (void) a, (void) b;\
+ for (head = tail;;) {\
+ head++;\
+ ai = head->ai;\
+ bi = head->bi;\
+ have_a = head->have_a;\
+ have_b = head->have_b;\
+ chunk = head->chunk;\
+ if (!chunk)\
+ break
+
+#define OUTPUT_STACK\
+ get_diff_chunks(path, old->line_count, new->line_count, &head, &tail);\
+ (void) chunk_old, (void) a, (void) b;\
+ for (;;) {\
+ head--;\
+ ai = head->ai;\
+ bi = head->bi;\
+ have_a = head->have_a;\
+ have_b = head->have_b;\
+ chunk = head->chunk;\
+ if (!chunk)\
+ break
+
+#define OUTPUT_END\
+ }\
+ free(tail);\
+ free(path);\
+ return EXIT_FILES_DIFFER
+
+static int
+output_unified(struct file_data *old, struct file_data *new)
+{
+ struct trace *path;
+ struct trace *path_;
+ struct trace trace;
+ size_t ai, bi;
+ char **a;
+ char **b;
+ int suppressed = 1;
+
+ OUTPUT_DIFF;
+ OUTPUT_HEAD("---", "+++");
+
+ a = old->lines, b = new->lines, path_ = path;
+ for (ai = bi = 0; (trace = *path++).f != END_OF_PATH;) {
+ char f = trace.f;
+ if (trace.d > n_context) {
+ suppressed = 1;
+ goto next;
+ }
+ if (suppressed) {
+ suppressed = 0;
+ cprintf("\033[1;34m_AT_@ -%zu", ai + 1 - !trace.a_len);
+ if (trace.a_len != 1)
+ printf(",%zu", trace.a_len);
+ printf(" +%zu", bi + 1 - !trace.b_len);
+ if (trace.b_len != 1)
+ printf(",%zu", trace.b_len);
+ cprintf(" _AT_@\033[m\n");
+ }
+ if (f == 0)
+ printf(" %s\n", a[ai]);
+ else
+ cprintf("\033[3%im%c%s\033[m\n", (int)f, " -+"[(int)f], f == 1 ? a[ai] : b[bi]);
+ next:
+ ai += f != 2;
+ bi += f != 1;
+ }
+
+ free(path_);
+ return EXIT_FILES_DIFFER;
+}
+
+static int
+output_copied(struct file_data *old, struct file_data *new)
+{
+#define PRINT_PART(L, C, S, A)\
+ cprintf("\033[1;3"#C"m"A" %zu", L##i + 1 - (!have_##L));\
+ if (chunk->L##_len > 1)\
+ printf(",%zu", L##i + chunk->L##_len);\
+ cprintf(" "A"\033[m\n");\
+ for (; have_##L && chunk->f != END_OF_PATH && chunk->d <= n_context; chunk++) {\
+ if (chunk->f == 0)\
+ printf(" %s\n", L[L##i]);\
+ else if (chunk->f != (3 - C))\
+ cprintf("\033[3%im%c %s\033[m\n", chunk->ch ? 3 : C, S"!"[chunk->ch], L[L##i]);\
+ L##i += chunk->f != (3 - C);\
+ }
+
+ OUTPUT_BEGIN;
+ OUTPUT_DIFF;
+ OUTPUT_HEAD("***", "---");
+ OUTPUT_QUEUE;
+
+ cprintf("\033[1;34m***************\033[m\n");
+ chunk_old = chunk;
+ PRINT_PART(a, 1, "-", "***");
+ chunk = chunk_old;
+ PRINT_PART(b, 2, "+", "---");
+
+ OUTPUT_END;
+#undef PRINT_PART
+}
+
+static int
+output_default(struct file_data *old, struct file_data *new)
+{
+#define PRINT_PART(L, C, S)\
+ for (; have_##L && chunk->f != END_OF_PATH && chunk->d <= n_context; chunk++) {\
+ if (chunk->f == 0)\
+ printf(" %s\n", L[L##i]);\
+ else if (chunk->f != (3 - C))\
+ cprintf("\033[3"#C"m"S" %s\033[m\n", L[L##i]);\
+ L##i += chunk->f != (3 - C);\
+ }
+
+ OUTPUT_BEGIN;
+ OUTPUT_DIFF;
+ OUTPUT_QUEUE;
+
+ cprintf("\033[1;34m%zu", ai + 1 - !have_a);
+ if (chunk->a_len > 1)
+ printf(",%zu", ai + chunk->a_len);
+ printf("%c", " dac"[have_a + 2 * have_b]);
+ printf("%zu", bi + 1 - !have_b);
+ if (chunk->b_len > 1)
+ printf(",%zu", bi + chunk->b_len);
+ cprintf("\033[m\n");
+
+ chunk_old = chunk;
+ PRINT_PART(a, 1, "<");
+ if (have_a && have_b)
+ cprintf("\033[34m---\033[m\n");
+ chunk = chunk_old;
+ PRINT_PART(b, 2, ">");
+
+ OUTPUT_END;
+#undef PRINT_PART
+}
+
+static int
+output_ed(struct file_data *old, struct file_data *new)
+{
+ OUTPUT_BEGIN;
+ OUTPUT_DIFF;
+ OUTPUT_STACK;
+ if (!have_b) {
+ printf("%zud\n", ai + 1);
+ } else {
+ int have_dot = 0;
+ printf("%zu", ai + 1 - !have_a);
+ if (chunk->a_len > 1)
+ printf(",%zu", ai + chunk->a_len);
+ printf("%c\n", "ac"[chunk->ch]);
+ for (; chunk->f != END_OF_PATH && chunk->d <= n_context; chunk++) {
+ if (chunk->f != 1)
+ cprintf("\033[3%im%s%s\033[m\n", chunk->ch ? 3 : 2,
+ b[bi][0] == '.' ? "." : "", b[bi]);
+ have_dot = (chunk->f == 2 && b[bi][0] == '.');
+ if (have_dot)
+ printf(".\ns/.//\na\n");
+ bi += chunk->f != 1;
+ }
+ if (!have_dot)
+ printf(".\n");
+ }
+ OUTPUT_END;
+}
+
+static int
+output_ed_alternative(struct file_data *old, struct file_data *new)
+{
+ OUTPUT_BEGIN;
+ OUTPUT_DIFF;
+ OUTPUT_QUEUE;
+ if (!have_b) {
+ printf("d%zu\n", ai + 1);
+ } else {
+ printf("%c%zu", "ac"[chunk->ch], ai + 1 - !have_a);
+ if (chunk->a_len > 1)
+ printf(" %zu", ai + chunk->a_len);
+ printf("\n");
+ for (; chunk->f != END_OF_PATH && chunk->d <= n_context; chunk++) {
+ if (chunk->f != 1)
+ cprintf("\033[3%im%s\033[m\n", chunk->ch ? 3 : 2, b[bi]);
+ bi += chunk->f != 1;
+ }
+ printf(".\n");
+ }
+ OUTPUT_END;
+}
+
+/* } */
+
+
+static struct file_data *
+load_lines(const char *pathname)
+{
+ int fd, bin = 0;
+ char *buffer;
+ char *p;
+ char *end;
+ size_t ptr, size, n;
+ ssize_t m;
+ struct file_data* rc;
+ struct stat attr;
+
+ p = strrchr(pathname, '/');
+ if (p && !p[1])
+ return 0;
+
+ fd = strcmp(pathname, "-") ? open(pathname, O_RDONLY) : STDIN_FILENO;
+ if (fd == -1) {
+ if (errno == EISDIR)
+ return 0;
+ eperror(pathname);
+ }
+
+ if (fstat(fd, &attr))
+ eperror(pathname);
+ if (S_ISDIR(attr.st_mode))
+ return 0;
+
+ ptr = 0;
+ size = attr.st_blksize ? attr.st_blksize : 8096;
+ buffer = emalloc(size + 1);
+ for (;;) {
+ if (ptr == size)
+ buffer = erealloc(buffer, (size <<= 1) + 1);
+ m = read(fd, buffer + ptr, size - ptr);
+ if (m < 0)
+ eperror(pathname);
+ if (m == 0)
+ break;
+ ptr += (size_t)m;
+ }
+ buffer[ptr] = 0;
+
+ for (n = 1, p = buffer;; n += 1) {
+ char *lf = strchr(p, '\n');
+ if (!lf)
+ break;
+ p = lf + 1;
+ }
+ bin = (strchr(p, '\0') != buffer + ptr);
+
+ /* This is a bit ugly, if not done this way, it would require unnecessarily many
+ * malloc:s to create rc and unnecessarily many free:s to destroy it. */
+ rc = erealloc(buffer, sizeof(*rc) + (n + 1) * sizeof(char *) + (ptr + 1 + sizeof(COLOURED_NO_LF_MARK)));
+ buffer = ((char *)rc) + sizeof(*rc) + (n + 1) * sizeof(char *);
+ memmove(buffer, rc, ptr);
+ rc->lines = (char **)((char *)rc + sizeof(*rc));
+ rc->lf_terminated = ptr && buffer[ptr - 1] == '\n';
+ rc->line_count = bin ? ptr : (n -= rc->lf_terminated);
+ buffer[ptr - rc->lf_terminated] = 0;
+ rc->attr = attr;
+ rc->path = pathname;
+ rc->is_binary = bin;
+ rc->is_empty = (ptr == 0);
+
+ close(fd);
+
+ n = bin ? n : 1;
+ rc->lines[n] = 0;
+ if (bin) {
+ rc->lines[0] = buffer;
+ } else {
+ for (ptr = 0, p = buffer; p; p = end) {
+ end = strchr(p, '\n');
+ if (end)
+ *end++ = 0;
+ rc->lines[ptr++] = p;
+ }
+ }
+
+ return rc;
+}
+
+static int
+do_binaries_differ(struct file_data *old, struct file_data *new)
+{
+ struct file_data *f = old;
+ do if (!f->is_binary) {
+ char **lines = f->lines;
+ size_t len = 0, part_len;
+ for (; *lines; lines++) {
+ len += 1 + (part_len = strlen(*lines));
+ (*lines)[part_len] = '\n';
+ }
+ f->line_count = len - !f->lf_terminated;
+ } while (f == old ? (f = new) : (void*)0);
+
+ if (old->line_count != new->line_count)
+ return 1;
+
+ return memcmp(old->lines[0], new->lines[0], old->line_count);
+}
+
+static int
+compare_files(struct file_data *old, struct file_data *new)
+{
+ int ret;
+
+ if (old->is_binary || new->is_binary) {
+ if (do_binaries_differ(old, new)) {
+ cprintf("\033[1mBinary files %s and %s differ\033[m\n", old->path, new->path);
+ return EXIT_BINARIES_DIFFER;
+ }
+ return 0;
+ }
+
+ if (!eflag && !fflag) {
+ if (!old->lf_terminated)
+ strcat(old->lines[old->line_count - 1], use_colour ? COLOURED_NO_LF_MARK : NO_LF_MARK);
+ if (!new->lf_terminated)
+ strcat(new->lines[new->line_count - 1], use_colour ? COLOURED_NO_LF_MARK : NO_LF_MARK);
+ }
+
+ ret = (uflag ? output_unified :
+ cflag ? output_copied :
+ eflag ? output_ed :
+ fflag ? output_ed_alternative :
+ output_default)(old, new);
+
+ if ((eflag || fflag) && 1 <= ret && ret <= 2) {
+ if (!old->lf_terminated)
+ ret = 2, fprintf(stderr, "%s: %s: No newline at end of file\n\n", argv0, old->path);
+ if (!new->lf_terminated)
+ ret = 2, fprintf(stderr, "%s: %s: No newline at end of file\n\n", argv0, new->path);
+ }
+
+ return ret;
+}
+
+static int
+compare_directories(const char *old, const char *new, const char *diff_line)
+{
+ int ret = 0, r, i = 0, j = 1;
+ DIR *dir;
+ const char *paths[2] = { old, new };
+ struct dirent *file;
+ struct file_data *a;
+ struct file_data *b;
+ char *b_path;
+ char *a_path;
+ struct stat a_attr;
+ struct stat b_attr;
+
+again:
+ dir = opendir(paths[i]);
+ if (!dir)
+ eperror(paths[i]);
+ while ((errno = 0, file = readdir(dir))) {
+ if (!strcmp(file->d_name, ".") || !strcmp(file->d_name, ".."))
+ continue;
+ b_path = join_paths(paths[j], file->d_name);
+ if (access(b_path, F_OK)) {
+ cprintf("\033[1mOnly in %s: %s\033[m\n", paths[i], file->d_name);
+ ret = ret > EXIT_FILES_DIFFER ? ret : EXIT_FILES_DIFFER;
+ goto next;
+ } else if (i == 1) {
+ goto next;
+ }
+ a_path = join_paths(paths[i], file->d_name);
+
+ if (stat(a_path, &a_attr)) eperror(a_path);
+ if (stat(b_path, &b_attr)) eperror(b_path);
+
+ if (a_attr.st_dev == b_attr.st_dev && a_attr.st_ino == b_attr.st_ino)
+ goto skip;
+ if (is_incommensurable(a_attr.st_mode) || is_incommensurable(b_attr.st_mode))
+ goto skip;
+
+ a = load_lines(a_path);
+ b = load_lines(b_path);
+
+ if (!a ^ !b) {
+ printf("\033[1mFile %s is a %s while file %s is a %s\033[m\n",
+ a_path, classify(a), b_path, classify(b));
+ r = EXIT_FILES_DIFFER;
+ } else if (!a && !b && !rflag) {
+ printf("\033[1mCommon subdirectories: %s and %s\033[m\n", a_path, b_path);
+ r = 0;
+ } else if (!a && !b) {
+ r = compare_directories(a_path, b_path, diff_line);
+ } else {
+ printf("\033[1m%s %s %s\033[m\n", diff_line, a_path, b_path);
+ r = compare_files(a, b);
+ }
+ ret = ret > r ? ret : r;
+
+ free(a);
+ free(b);
+ skip:
+ free(a_path);
+ next:
+ free(b_path);
+ }
+ if (errno)
+ eperror("readdir");
+ closedir(dir);
+
+
+ if (i)
+ return ret;
+ i = 1, j = 0;
+ goto again;
+}
+
+int
+main(int argc, char *argv[])
+{
+ struct file_data *old;
+ struct file_data *new;
+ char *old_proper = 0;
+ char *new_proper = 0;
+ int ret;
+ char *diff_line = 0;
+ char *p;
+
+ /* Construct the 'diff options file1 file2' line used diff:ing directories. */
+ if (argc > 2) {
+ size_t len = 0;
+ int i;
+ p = strrchr(argv[0], '/');
+ if (p)
+ argv[0] = p + 1;
+ for (i = 0; i < argc - 2; i++)
+ len += strlen(argv[i]) + 1;
+ p = diff_line = emalloc(len + 1);
+ for (i = 0; i < argc - 2; i++)
+ p += sprintf(p, "%s ", argv[i]);
+ p[-1] = 0;
+ }
+
+ ARGBEGIN {
+ case 'b': bflag++; break;
+ case 'c': cflag++; n_context = 3; break;
+ case 'C': cflag++; n_context = atol(EARGF(usage())); break;
+ case 'e': eflag++; break;
+ case 'f': fflag++; break;
+ case 'u': uflag++; n_context = 3; break;
+ case 'U': uflag++; n_context = atol(EARGF(usage())); break;
+ case 'r': rflag++; break;
+ case 'd': dflag++; break;
+ case 'D': use_colour++; break;
+ default:
+ usage();
+ } ARGEND;
+ /* Use of `atol` is intentional, '-U -1' and '-C -1' shall display the entire file.
+ * This is a not specified in POSIX, but appears in other implementations and is
+ * useful whilst removing complexity. */
+
+ if (argc != 2 || (bflag | rflag) > 1 || cflag + eflag + fflag + uflag > 1)
+ usage();
+
+ if (!strcmp(argv0, "bdiff")) {
+ bdiff = 1;
+ } else {
+ p = strrchr(argv0, '/');
+ if (p && !strcmp(p, "/bdiff"))
+ bdiff = 1;
+ }
+
+ use_colour = use_colour == 1 ? isatty(STDOUT_FILENO) : use_colour;
+ cprintf = use_colour ? printf : ncprintf;
+
+redo:
+ old = load_lines(old_proper ? old_proper : argv[0]);
+ new = load_lines(new_proper ? new_proper : argv[1]);
+
+ if ((old_proper || new_proper) && (!old || !new)) {
+ printf("\033[1mFile %s is a %s while file %s is a %s\033[m\n",
+ old_proper ? old_proper : argv[0], classify(old),
+ new_proper ? new_proper : argv[1], classify(new));
+ ret = 1;
+ } else if (!old && new) {
+ old_proper = join_paths(argv[0], basename(argv[1]));
+ goto redo;
+ } else if (old && !new) {
+ new_proper = join_paths(argv[1], basename(argv[0]));
+ goto redo;
+ } else if (!old && !new) {
+ ret = compare_directories(argv[0], argv[1], diff_line);
+ } else {
+ ret = compare_files(old, new);
+ }
+
+ if (fshut(stdout, "<stdout>"))
+ ret = EXIT_FAILURE;
+
+ free(old);
+ free(new);
+ free(old_proper);
+ free(new_proper);
+ free(diff_line);
+
+ if (!bdiff && cheap_algorithm_used && ret == EXIT_FILES_DIFFER)
+ ret = EXIT_NOT_MINIMAL;
+
+ return ret;
+}
diff --git a/libutil/ncprintf.c b/libutil/ncprintf.c
new file mode 100644
index 0000000..f745cfe
--- /dev/null
+++ b/libutil/ncprintf.c
_AT_@ -0,0 +1,50 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+
+#include "../util.h"
+
+/* Variant of printf that removes colors from the format string. Only CSI m is
+ * recognized, no other appearances of ESC may occour. %i, %u and %c, but not
+ * other %-codes, may be used inside the CSI m sequence, but only if they refer
+ * to the very first arguments after the format string. Not thread-safe. */
+int
+ncprintf(const char *format, ...)
+{
+ static const char *prev_format = 0;
+ static size_t skips = 0;
+ static char fmt[128];
+ size_t r = 0, w = 0;
+ va_list args;
+ int rc, escape = 0;
+
+ va_start(args, format);
+
+ if (format == prev_format)
+ goto print;
+
+ prev_format = format;
+
+ skips = 0;
+ for (;; r++) {
+ if (escape) {
+ escape = (format[r] != 'm');
+ skips += (format[r] == '%');
+ } else if (format[r] == '\033') {
+ escape = 1;
+ } else {
+ if (!(fmt[w++] = format[r]))
+ break;
+ }
+ }
+ fmt[w] = 0;
+
+print:
+ for (r = skips; r--;)
+ (void) va_arg(args, int);
+
+ rc = vprintf(fmt, args);
+ va_end(args);
+ return rc;
+}
diff --git a/util.h b/util.h
index 4c973d1..62fd9ea 100644
--- a/util.h
+++ b/util.h
_AT_@ -76,3 +76,4 @@ long long enstrtonum(int, const char *, long long, long long);
long long estrtonum(const char *, long long, long long);
size_t unescape(char *);
int mkdirp(const char *);
+int ncprintf(const char *, ...);
--
2.7.1
Received on Wed Feb 24 2016 - 11:26:44 CET
This archive was generated by hypermail 2.3.0 : Wed Feb 24 2016 - 11:36:09 CET