Re: [dev] [sbase][PATCH v2] diff

From: Hiltjo Posthuma <hiltjo_AT_codemadness.org>
Date: Sun, 31 Jan 2016 12:39:34 +0100

On Sat, Jan 30, 2016 at 4:13 PM, Mattias Andrée <maandree_AT_kth.se> wrote:
> New command. Should be POSIX-compliant.
>
> Extensions to POSIX:
>
> 1) In directories, sockets are not compared.
> POSIX specifies that special devices and FIFO:s
> shall never be compared, and that for other types
> than these and regular files and directories, it
> is implementation-specified.
>
> 2) Output is coloured when stdout is a tty.
> This was added to make it easier to spot errors.
> Perhaps this should be removed, but I let it still
> just in case.
>
> There is a comment in the code refering to a post on
> the mailing list, for a diff algorithm that chould be
> used to improve time and space complexity. However,
> this algorithm does not produce a minimal list of
> necessary changes, which POSIX specifies that it should
> do. In GNU diff, the output is not minimal, even for
> short file, unless -d (--minimal) is specified. Some
> UNIX-like systems have bdiff that is able to compare
> files too big for diff, I assume they produce minimal
> output with diff, and use minimal complexity with bdiff.
> ---
> LICENSE | 1 +
> Makefile | 1 +
> diff.c | 873 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 875 insertions(+)
> create mode 100644 diff.c
>
> diff --git a/LICENSE b/LICENSE
> index cb5a797..2a26979 100644
> --- a/LICENSE
> +++ b/LICENSE
> _AT_@ -59,3 +59,4 @@ Authors/contributors include:
> © 2015 Quentin Rameau <quinq_AT_quinq.eu.org>
> © 2015 Dionysis Grigoropoulos <info_AT_erethon.com>
> © 2015 Wolfgang Corcoran-Mathe <first.lord.of.teal_AT_gmail.com>
> +© 2016 Mattias Andrée <maandree_AT_kth.se>
> diff --git a/Makefile b/Makefile
> index 1c09cac..74e071e 100644
> --- a/Makefile
> +++ b/Makefile
> _AT_@ -89,6 +89,7 @@ BIN =\
> cron\
> cut\
> date\
> + diff\
> dirname\
> du\
> echo\
> diff --git a/diff.c b/diff.c
> new file mode 100644
> index 0000000..3c99ae8
> --- /dev/null
> +++ b/diff.c
> _AT_@ -0,0 +1,873 @@
> +/* See LICENSE file for copyright and license details. */
> +#include <stdio.h>
> +#include <fcntl.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <stdint.h>
> +#include <ctype.h>
> +#include <time.h>
> +#include <errno.h>
> +#include <libgen.h>
> +#include <dirent.h>
> +#include <sys/stat.h>
> +
> +#include "arg.h"
> +#include "util.h"
> +
> +/*
> + * Lines that only appear in file-1 are marked 1.
> + * Lines that only appear in file-2 are marked 2.
> + * Lines that appear in both files are marked 0.
> + */
> +
> +#define END_OF_PATH 127
> +#define NO_LF_MARK "\n\033[7m\\ No newline at end of file\033[27m"
> +
> +#undef EXIT_FAILURE
> +#define EXIT_FAILURE 2
> +
> +#define emalloc(...) enmalloc(EXIT_FAILURE, __VA_ARGS__)
> +#define erealloc(...) enrealloc(EXIT_FAILURE, __VA_ARGS__)
> +#define eprintf(...) enprintf(EXIT_FAILURE, __VA_ARGS__)
> +#define eperror(...) (perror(__VA_ARGS__), exit(EXIT_FAILURE))
> +
> +#define CLASSIFY(f) (!(f) ? "directory" : (f)->is_empty ? "regular empty file" : "regular file")
> +#define BOLD(...) use_colour ? "\033[1m" : "", __VA_ARGS__, use_colour ? "\033[m" : ""
> +
> +struct file_data {
> + char **lines;
> + size_t line_count; /* used as length of `lines[0]` if `is_binary` */
> + int lf_terminated;
> + int is_binary;
> + int is_empty;
> + struct stat attr;
> + const char *path;
> +};
> +
> +struct trace {
> + char f;
> + int ch;
> + size_t d;
> + size_t a_len;
> + size_t b_len;
> +};
> +
> +struct chunk {
> + size_t ai;
> + size_t bi;
> + int have_a;
> + int have_b;
> + struct trace *chunk;
> +};
> +
> +static int bflag = 0;
> +static int cflag = 0;
> +static int eflag = 0;
> +static int fflag = 0;
> +static int uflag = 0;
> +static int rflag = 0;
> +static int use_colour = 0;
> +static size_t n_context = 0;
> +
> +static void
> +usage(void)
> +{
> + eprintf("usage: %s [-c | -C n | -e | -f | -u | -U n] [-br] file1 file2\n", argv0);
> +}
> +
> +static struct file_data *
> +load_lines(const char *pathname)
> +{
> + int fd, bin = 0;
> + char *buffer;
> + char *p;
> + char *end;
> + size_t ptr, size, n;
> + ssize_t m;
> + struct file_data* rc;
> + struct stat attr;
> +
> + p = strrchr(pathname, '/');
> + if (p && !p[1])
> + return 0;
> +
> + fd = open(strcmp(pathname, "-") ? pathname : "/dev/stdin", O_RDONLY);
> + if (fd == -1) {
> + if (errno == EISDIR)
> + return 0;
> + eperror(pathname);
> + }
> +
> + fstat(fd, &attr);
> + if (S_ISDIR(attr.st_mode))
> + return 0;
> +
> + ptr = 0;
> + size = attr.st_blksize ? attr.st_blksize : 8096;
> + buffer = emalloc(size + 1);
> + for (;;) {
> + if (ptr == size)
> + buffer = erealloc(buffer, (size <<= 1) + 1);
> + m = read(fd, buffer + ptr, size - ptr);
> + if (m < 0)
> + eperror(pathname);
> + if (m == 0)
> + break;
> + ptr += (size_t)m;
> + }
> + buffer[ptr] = 0;
> +
> + for (n = 1, p = buffer;; n += 1) {
> + char *lf = strchr(p, '\n');
> + if (!lf)
> + break;
> + p = lf + 1;
> + }
> + bin = (strchr(p, '\0') != buffer + ptr);
> +
> + rc = erealloc(buffer, sizeof(*rc) + (n + 1) * sizeof(char *) + (ptr + 1 + sizeof(NO_LF_MARK)));
> + buffer = ((char *)rc) + sizeof(*rc) + (n + 1) * sizeof(char *);
> + memmove(buffer, rc, ptr);
> + rc->lines = (char **)((char *)rc + sizeof(*rc));
> + rc->lf_terminated = ptr && buffer[ptr - 1] == '\n';
> + rc->line_count = bin ? ptr : (n -= rc->lf_terminated);
> + buffer[ptr - rc->lf_terminated] = 0;
> + rc->attr = attr;
> + rc->path = pathname;
> + rc->is_binary = bin;
> + rc->is_empty = (ptr == 0);
> +
> + close(fd);
> +
> + rc->lines[bin ? n : 1] = 0;
> + if (bin) {
> + rc->lines[0] = buffer;
> + } else {
> + for (ptr = 0, p = buffer; p; p = end) {
> + end = strchr(p, '\n');
> + if (end)
> + *end++ = 0;
> + rc->lines[ptr++] = p;
> + }
> + }
> +
> + return rc;
> +}
> +
> +static char *
> +rstrip(char *text, char *removed)
> +{
> + char *end = strchr(text, '\0');
> + while ((end != text) && isspace(end[-1]))
> + end--;
> + *removed = *end;
> + *end = '\0';
> + return end;
> +}
> +
> +static int
> +strcmp_rstrip_a(char *a, char *b)
> +{
> + static char *last_a = NULL;
> + static char *a_p = NULL;
> + static char a_pc = 0;
> + if (a != last_a) {
> + if (last_a)
> + *a_p = a_pc;
> + if (a)
> + a_p = rstrip(last_a = a, &a_pc);
> + }
> + return a ? strcmp(a, b) : 0;
> +}
> +
> +/* TODO use <20160128154757.GA20170_AT_debian> when `an` is too large. */
> +static char *
> +diff2_(char **a, char **b, size_t an, size_t bn, int (*cmp)(char *, char *))
> +{
> +#define matrix (*matrix)
> +#define map (*map)
> + char map[an + 1][bn + 1] = emalloc(sizeof(char[an + 1][bn + 1]));
> + size_t matrix[2][bn + 1] = ecalloc(1, sizeof(size_t[2][bn + 1]));
> + char *rc;
> + size_t ai, bi, ri = 0, mi = 0;
> +
> + memset(map[0], 2, bn + 1);
> +
> + a--, b--;
> + for (ai = 1; ai <= an; ai++) {
> + size_t *last = matrix[mi];
> + size_t *this = matrix[mi ^= 1];
> + map[ai][0] = 1;
> + for (bi = 1; bi <= bn; bi++) {
> + if (!cmp(a[ai], b[bi])) {
> + this[bi] = last[bi - 1] + 1;
> + map[ai][bi] = 0;
> + } else {
> + size_t u = last[bi];
> + size_t l = this[bi - 1];
> + this[bi] = l >= u ? l : u;
> + map[ai][bi] = 1 + (l >= u);
> + }
> + }
> + }
> +#undef matrix
> + free(matrix);
> +
> + rc = emalloc(an + bn + 1);
> + rc[ri++] = END_OF_PATH;
> + for (ai = an, bi = bn; ai + bi; ri++) {
> + rc[ri] = map[ai][bi];
> + ai -= rc[ri] != 2;
> + bi -= rc[ri] != 1;
> + }
> +#undef map
> + free(map);
> +
> + return rc + ri;
> +}
> +
> +static struct trace *
> +enhance_trace(char *path)
> +{
> + char *p = path;
> + size_t len, a_len = 0, b_len = 0, i = 0, d = 0, a = 0, b = 0, j = 0;
> + int have_d = 0, ch = 0;
> + struct trace *rc;
> +
> + while (*--p != END_OF_PATH);
> + len = (size_t)(path - p);
> + rc = ecalloc(len, sizeof(*rc));
> +
> + /* Find distance from edits, and mark exchanges. (left-to-right) */
> + for (--len; i < len; i++) {
> + rc[i].f = *--path;
> + if (rc[i].f) {
> + d = 0, have_d = 1;
> + ch |= ch ? ch : (3 - rc[i].f);
> + if (rc[i].f == ch)
> + rc[i].ch = 1;
> + } else {
> + ch = 0;
> + rc[i].d = (have_d ? ++d : SIZE_MAX);
> + }
> + }
> + rc[i].f = END_OF_PATH;
> +
> + /* Find distance from edits, mark exchanges, and get chunk lengths. (right-to-left) */
> + for (i = len, d = 0, ch = have_d = 0; i-- > 0;) {
> + rc[i].a_len = a_len += (rc[i].f != 2);
> + rc[i].b_len = b_len += (rc[i].f != 1);
> + if (rc[i].f) {
> + d = 0, have_d = 1;
> + ch |= ch ? ch : (3 - rc[i].f);
> + if (rc[i].f == ch)
> + rc[i].ch = 1;
> + } else {
> + ch = 0;
> + if (have_d && (d + 1) < rc[i].d)
> + rc[i].d = ++d;
> + if (rc[i].d > n_context)
> + a_len = b_len = 0;
> + }
> + }
> +
> + /* Put removals before additions. */
> + for (i = 0; i < len; i++) {
> + if (rc[i].f == 0) {
> + while (a--)
> + rc[j++].f = 1;
> + while (b--)
> + rc[j++].f = 2;
> + j = i + 1, a = b = 0;
> + } else if (rc[i].f == 1) {
> + a++;
> + } else {
> + b++;
> + }
> + }
> + while (a--)
> + rc[j++].f = 1;
> + while (b--)
> + rc[j++].f = 2;
> +
> + free(p);
> + return rc;
> +}
> +
> +static struct trace *
> +diff2(char **a, char **b, size_t an, size_t bn, int do_rstrip)
> +{
> + size_t skip_start = 0, skip_end = 0;
> + char *rc;
> + int (*cmp)(char *, char *) = (int (*)(char *, char *))strcmp;
> + int transpose = bn < an;
> +
> + if (do_rstrip) {
> + char **lines;
> + char _c;
> + for (lines = !transpose ? b : a; *lines; lines++)
> + rstrip(*lines, &_c);
> + cmp = strcmp_rstrip_a;
> + }
> +
> + /* Reduce problem set, by skiping identical head. */
> + for (skip_start = 0;; skip_start++) {
> + char *a_elem = a[skip_start];
> + char *b_elem = b[skip_start];
> + if (!a_elem || !b_elem || cmp(a_elem, b_elem))
> + break;
> + }
> + a += skip_start, an -= skip_start;
> + b += skip_start, bn -= skip_start;
> + /* Reduce problem set, by skiping identical tail. */
> + for (skip_end = 0; an && bn; an--, bn--, skip_end++)
> + if (cmp(a[an - 1], b[bn - 1]))
> + break;
> +
> + rc = !transpose ? diff2_(a, b, an, bn, cmp) : diff2_(b, a, bn, an, cmp);
> + if (transpose) {
> + char *path;
> + char trace;
> + for (path = rc; (trace = *--path) != END_OF_PATH;)
> + if (trace)
> + *path = 3 - trace;
> + }
> +
> + /* Add skipped part to the path. */
> + if (skip_start || skip_end) {
> + char *path = rc;
> + size_t path_len;
> + while (*--path != END_OF_PATH);
> + path_len = (size_t)(rc - path);
> + path = erealloc(path, skip_end + path_len + skip_start);
> + if (skip_end) {
> + memmove(path + skip_end + 1, path + 1, path_len - 1);
> + memset(path + 1, 0, skip_end);
> + }
> + memset(path + skip_end + path_len, 0, skip_start);
> + rc = path + skip_end + path_len + skip_start;
> + }
> +
> + return enhance_trace(rc);
> +}
> +
> +static char *
> +get_time_string(const struct stat *attr)
> +{
> + static char buf[sizeof("0000-00-00 00:00:00.000000000 +0000")];
> + struct tm *tm;
> +
> + tm = localtime(&(attr->st_mtime));
> + if (tm == NULL)
> + eperror("localtime");
> +
> +#ifdef st_mtime
> + strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S.000000000 %z", tm);
> + sprintf(buf + (sizeof("0000-00-00 00:00:00.") - 1), "%09lu", attr->st_mtim.tv_nsec);
> + buf[sizeof("0000-00-00 00:00:00.") - 1 + 9] = ' ';
> +#else
> + strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S %z", tm);
> +#endif
> + return buf;
> +}
> +
> +static int
> +get_diff_chunks(struct trace *path, size_t an, size_t bn, struct chunk **head, struct chunk **tail)
> +{
> +#define head (*head)
> +#define tail (*tail)
> + struct trace trace;
> + size_t ai, bi;
> + int ret = 0, suppressed = 1, have_a = 0, have_b = 0;
> +
> + head = ecalloc(an + bn + 1, sizeof(*head));
> + tail = head++;
> +
> + for (ai = bi = 0; (trace = *path++).f != END_OF_PATH;) {
> + if (trace.d > n_context) {
> + suppressed = 1;
> + if (head->chunk) {
> + head->have_a = have_a;
> + head->have_b = have_b;
> + head++;
> + }
> + have_a = have_b = 0;
> + goto next;
> + }
> + if (suppressed) {
> + head->ai = ai;
> + head->bi = bi;
> + head->chunk = path - 1;
> + }
> + have_a |= trace.f == 1;
> + have_b |= trace.f == 2;
> + suppressed = 0;
> + next:
> + ret |= trace.f != 0;
> + ai += trace.f != 2;
> + bi += trace.f != 1;
> + }
> + if (head->chunk) {
> + head->have_a = have_a;
> + head->have_b = have_b;
> + head++;
> + }
> +
> + return ret;
> +#undef head
> +#undef tail
> +}
> +
> +#define OUTPUT_BEGIN\
> + struct trace *path;\
> + size_t ai, bi;\
> + int ret = 0, have_a = 0, have_b = 0;\
> + struct trace *chunk;\
> + struct trace *chunk_old;\
> + struct chunk *head;\
> + struct chunk *tail;\
> + char **a = old->lines;\
> + char **b = new->lines\
> +
> +#define OUTPUT_HEAD(A, B)\
> + printf("%s"A" %s\t%s%s\n", BOLD(old->path, get_time_string(&(old->attr))));\
> + printf("%s"B" %s\t%s%s\n", BOLD(new->path, get_time_string(&(new->attr))))
> +
> +#define OUTPUT_QUEUE\
> + path = diff2(a, b, old->line_count, new->line_count, bflag);\
> + ret = get_diff_chunks(path, old->line_count, new->line_count, &head, &tail);\
> + (void) chunk_old;\
> + for (head = tail;;) {\
> + head++;\
> + ai = head->ai;\
> + bi = head->bi;\
> + have_a = head->have_a;\
> + have_b = head->have_b;\
> + chunk = head->chunk;\
> + if (!chunk)\
> + break
> +
> +#define OUTPUT_STACK\
> + path = diff2(a, b, old->line_count, new->line_count, bflag);\
> + ret = get_diff_chunks(path, old->line_count, new->line_count, &head, &tail);\
> + (void) chunk_old;\
> + for (;;) {\
> + head--;\
> + ai = head->ai;\
> + bi = head->bi;\
> + have_a = head->have_a;\
> + have_b = head->have_b;\
> + chunk = head->chunk;\
> + if (!chunk)\
> + break
> +
> +#define OUTPUT_END\
> + }\
> + free(tail);\
> + free(path);\
> + return ret
> +
> +static int
> +output_unified(struct file_data *old, struct file_data *new)
> +{
> + struct trace *path;
> + struct trace *path_;
> + struct trace trace;
> + size_t ai, bi;
> + char **a;
> + char **b;
> + int ret = 0;
> + int suppressed = 1;
> +
> + path = diff2(old->lines, new->lines, old->line_count, new->line_count, bflag);
> + path_ = path;
> +
> + OUTPUT_HEAD("---", "+++");
> +
> + a = old->lines, b = new->lines;
> + for (ai = bi = 0; (trace = *path++).f != END_OF_PATH;) {
> + char f = trace.f;
> + if (trace.d > n_context) {
> + suppressed = 1;
> + goto next;
> + }
> + if (suppressed) {
> + suppressed = 0;
> + printf("%s_AT_@ -%zu", use_colour ? "\033[36m" : "", ai + 1 - !trace.a_len);
> + if (trace.a_len != 1)
> + printf(",%zu", trace.a_len);
> + printf(" +%zu", bi + 1 - !trace.b_len);
> + if (trace.b_len != 1)
> + printf(",%zu", trace.b_len);
> + printf(" _AT_@%s\n",
> + use_colour ? "\033[m" : "");
> + }
> + if (f == 0)
> + printf(" %s\n", a[ai]);
> + else if (use_colour)
> + printf("\033[3%im%c%s\033[m\n", f, " -+"[(int)f], f == 1 ? a[ai] : b[bi]);
> + else
> + printf("%c%s\n", " -+"[(int)f], f == 1 ? a[ai] : b[bi]);
> + next:
> + ret |= f != 0;
> + ai += f != 2;
> + bi += f != 1;
> + }
> +
> + free(path_);
> + return ret;
> +}
> +
> +static int
> +output_copied(struct file_data *old, struct file_data *new)
> +{
> + OUTPUT_BEGIN;
> + OUTPUT_HEAD("***", "---");
> + OUTPUT_QUEUE;
> +#define PRINT_PART(L, C, S, A)\
> + printf("%s"A" %zu", use_colour ? "\033[1;3"#C"m" : "", L##i + 1 - (!have_##L));\
> + if (chunk->L##_len > 1)\
> + printf(",%zu", L##i + chunk->L##_len);\
> + printf(" "A"%s\n", use_colour ? "\033[m" : "");\
> + for (; have_##L && chunk->f != END_OF_PATH && chunk->d <= n_context; chunk++) {\
> + if (chunk->f == 0)\
> + printf(" %s\n", L[L##i]);\
> + else if (chunk->f == (3 - C));\
> + else if (use_colour)\
> + printf("\033[3%im%c %s\033[m\n", chunk->ch ? 3 : C, S"!"[chunk->ch], L[L##i]);\
> + else\
> + printf("%c %s\n", S"!"[chunk->ch], L[L##i]);\
> + L##i += chunk->f != (3 - C);\
> + }
> +
> + printf("%s\n", use_colour ? "\033[36m***************\033[m" : "***************");
> + chunk_old = chunk;
> + PRINT_PART(a, 1, "-", "***");
> + chunk = chunk_old;
> + PRINT_PART(b, 2, "+", "---");
> +#undef PRINT_PART
> + OUTPUT_END;
> +}
> +
> +static int
> +output_default(struct file_data *old, struct file_data *new)
> +{
> + OUTPUT_BEGIN;
> + OUTPUT_QUEUE;
> +#define PRINT_PART(L, C, S)\
> + for (; have_##L && chunk->f != END_OF_PATH && chunk->d <= n_context; chunk++) {\
> + if (chunk->f == 0)\
> + printf(" %s\n", L[L##i]);\
> + else if (chunk->f == (3 - C));\
> + else if (use_colour)\
> + printf("\033[3"#C"m"S" %s\033[m\n", L[L##i]);\
> + else\
> + printf(S" %s\n", L[L##i]);\
> + L##i += chunk->f != (3 - C);\
> + }
> +
> + printf("%s%zu", use_colour ? "\033[36m" : "", ai + 1 - (!have_a));
> + if (chunk->a_len > 1)
> + printf(",%zu", ai + chunk->a_len);
> + printf("%c", " dac"[have_a + 2 * have_b]);
> + printf("%zu", bi + 1 - (!have_b));
> + if (chunk->b_len > 1)
> + printf(",%zu", bi + chunk->b_len);
> + printf("%s\n", use_colour ? "\033[m" : "");
> +
> + chunk_old = chunk;
> + PRINT_PART(a, 1, "<");
> + if (have_a && have_b)
> + printf("%s\n", use_colour ? "\033[36m---\033[m" : "---");
> + chunk = chunk_old;
> + PRINT_PART(b, 2, ">");
> +#undef PRINT_PART
> + OUTPUT_END;
> +}
> +
> +static int
> +output_ed(struct file_data *old, struct file_data *new)
> +{
> + OUTPUT_BEGIN;
> + OUTPUT_STACK;
> + if (!have_b) {
> + printf("%zud\n", ai + 1);
> + } else {
> + int have_dot = 0;
> + printf("%zu", ai + 1 - (!have_a));
> + if (chunk->a_len > 1)
> + printf(",%zu", ai + chunk->a_len);
> + printf("%c\n", "ac"[chunk->ch]);
> + for (; chunk->f != END_OF_PATH && chunk->d <= n_context; chunk++) {
> + if (chunk->f == 1);
> + else if (use_colour)
> + printf("\033[3%im%s%s\033[m\n", chunk->ch ? 3 : 2,
> + b[bi][0] == '.' ? "." : "", b[bi]);
> + else
> + printf("%s%s\n",
> + b[bi][0] == '.' ? "." : "", b[bi]);
> + have_dot = (chunk->f == 2 && b[bi][0] == '.');
> + if (have_dot)
> + printf(".\ns/.//\na\n");
> + bi += chunk->f != 1;
> + }
> + if (!have_dot)
> + printf(".\n");
> + }
> + OUTPUT_END;
> +}
> +
> +static int
> +output_ed_alternative(struct file_data *old, struct file_data *new)
> +{
> + OUTPUT_BEGIN;
> + OUTPUT_QUEUE;
> + if (!have_b) {
> + printf("d%zu\n", ai + 1);
> + } else {
> + printf("%c%zu", "ac"[chunk->ch], ai + 1 - (!have_a));
> + if (chunk->a_len > 1)
> + printf(" %zu", ai + chunk->a_len);
> + printf("\n");
> + for (; chunk->f != END_OF_PATH && chunk->d <= n_context; chunk++) {
> + if (chunk->f == 1);
> + else if (use_colour)
> + printf("\033[3%im%s\033[m\n", chunk->ch ? 3 : 2, b[bi]);
> + else
> + printf("%s\n", b[bi]);
> + bi += chunk->f != 1;
> + }
> + printf(".\n");
> + }
> + OUTPUT_END;
> +}
> +
> +static int
> +do_binaries_differ(struct file_data *old, struct file_data *new)
> +{
> +#define TURN_INTO_BINARY(f)\
> + if (!f->is_binary) {\
> + char **lines = f->lines;\
> + size_t len = 0, part_len;\
> + for (; *lines; lines++) {\
> + len += 1 + (part_len = strlen(*lines));\
> + (*lines)[part_len] = '\n';\
> + }\
> + f->line_count = len - !f->lf_terminated;\
> + }
> +
> + TURN_INTO_BINARY(old);
> + TURN_INTO_BINARY(new);
> +
> + if (old->line_count != new->line_count)
> + return 1;
> +
> + return memcmp(old->lines[0], new->lines[0], old->line_count);
> +}
> +
> +static int
> +compare_files(struct file_data *old, struct file_data *new)
> +{
> + int ret;
> +
> + if (old->is_binary || new->is_binary) {
> + if (do_binaries_differ(old, new)) {
> + printf("Binary files %s and %s differ\n", old->path, new->path);
> + ret = 2;
> + }
> + return ret;
> + }
> +
> + if (!(eflag || fflag)) {
> + if (!old->lf_terminated)
> + strcpy(strchr(old->lines[old->line_count - 1], '\0'), NO_LF_MARK);
> + if (!new->lf_terminated)
> + strcpy(strchr(new->lines[new->line_count - 1], '\0'), NO_LF_MARK);
> + }
> +
> + ret = (uflag ? output_unified :
> + cflag ? output_copied :
> + eflag ? output_ed :
> + fflag ? output_ed_alternative :
> + output_default)(old, new);
> +
> + if (eflag || fflag) {
> + if (!old->lf_terminated)
> + fprintf(stderr, "%s: %s: No newline at end of file\n\n", argv0, old->path);
> + if (!new->lf_terminated)
> + fprintf(stderr, "%s: %s: No newline at end of file\n\n", argv0, new->path);
> + ret = (!old->lf_terminated || !new->lf_terminated) ? 2 : ret;
> + }
> +
> + return ret;
> +}
> +
> +static int
> +compare_directories(const char *old, const char *new, const char *diff_line)
> +{
> +#define GET_FILENAME(buf, i)\
> + (buf = emalloc(strlen(paths[i]) + strlen(file->d_name) + 2),\
> + stpcpy(stpcpy(stpcpy(buf, paths[i]), "/"), file->d_name))
> +
> + int ret = 0, r, i = 0, j = 1;
> + DIR *dir;
> + const char *paths[2] = { old, new };
> + struct dirent *file;
> + struct file_data *a;
> + struct file_data *b;
> + char *b_path;
> + char *a_path;
> + struct stat a_attr;
> + struct stat b_attr;
> +
> +again:
> + dir = opendir(paths[i]);
> + if (!dir)
> + eperror(paths[i]);
> + while ((errno = 0, file = readdir(dir))) {
> + if (!strcmp(file->d_name, ".") || !strcmp(file->d_name, ".."))
> + continue;
> + GET_FILENAME(b_path, j);
> + if (access(b_path, F_OK)) {
> + printf("%sOnly i %s: %s%s\n", BOLD(paths[i], file->d_name));
> + ret = ret > 1 ? ret : 1;
> + goto next;
> + } else if (i == 1) {
> + goto next;
> + }
> + GET_FILENAME(a_path, i);
> +
> + if (stat(a_path, &a_attr))
> + eperror(a_path);
> + if (stat(b_path, &b_attr))
> + eperror(a_path);
> +
> + if (a_attr.st_dev == b_attr.st_dev && a_attr.st_ino == b_attr.st_ino)
> + goto skip;
> + /* POSIX specifies that if a and b refer to the same special device,
> + * there should be no comparision. This seems unnecessary since it
> + * also specifies that special devices and FIFO:s shall not be compared.
> + * We extend this to not compare sockets either. POSIX says that it
> + * is implementation-specified for other types than special files,
> + * FIFO:s, regular files and directories. */
> +#define IS_INCOMMENSURABLE(mode) (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || S_ISSOCK(mode))
> + if (IS_INCOMMENSURABLE(a_attr.st_mode) || IS_INCOMMENSURABLE(b_attr.st_mode))
> + goto skip;
> +
> + a = load_lines(a_path);
> + b = load_lines(b_path);
> +
> + if (!a ^ !b) {
> + printf("%sFile %s is a %s while file %s is a %s%s\n",
> + BOLD(a_path, CLASSIFY(a), b_path, CLASSIFY(b)));
> + ret = ret > 1 ? ret : 1;
> + } else if (!a && !b && !rflag) {
> + printf("%sCommon subdirectories: %s and %s%s\n", BOLD(a_path, b_path));
> + ret = ret > 1 ? ret : 1;
> + } else if (!a && !b) {
> + r = compare_directories(a_path, b_path, diff_line);
> + ret = ret > r ? ret : r;
> + } else {
> + printf("%s%s %s %s%s\n", BOLD(diff_line, a_path, b_path));
> + r = compare_files(a, b);
> + ret = ret > r ? ret : r;
> + }
> +
> + free(a);
> + free(b);
> + skip:
> + free(a_path);
> + next:
> + free(b_path);
> + }
> + if (errno)
> + eperror("readdir");
> + closedir(dir);
> +
> +
> + if (i)
> + return ret;
> + i = 1, j = 0;
> + goto again;
> +}
> +
> +int
> +main(int argc, char *argv[])
> +{
> + struct file_data *old;
> + struct file_data *new;
> + char *old_proper = 0;
> + char *new_proper = 0;
> + int ret;
> + char *diff_line = 0;
> + char *p;
> +
> + /* Construct the 'diff OPTIONS FILE-1 FILE-2' line used diff:ing directories. */
> + if (argc > 2) {
> + size_t len = 0;
> + int i;
> + p = strrchr(argv[0], '/');
> + if (p)
> + argv[0] = p + 1;
> + for (i = 0; i < argc - 2; i++)
> + len += strlen(argv[i]) + 1;
> + p = diff_line = emalloc(len + 1);
> + for (i = 0; i < argc - 2; i++)
> + p = stpcpy(stpcpy(p, argv[i]), " ");
> + p[-1] = 0;
> + }
> +
> + ARGBEGIN {
> + case 'b': bflag++; break;
> + case 'c': cflag++; n_context = 3; break;
> + case 'C': cflag++; n_context = atol(EARGF(usage())); break;
> + case 'e': eflag++; break;
> + case 'f': fflag++; break;
> + case 'u': uflag++; n_context = 3; break;
> + case 'U': uflag++; n_context = atol(EARGF(usage())); break;
> + case 'r': rflag++; break;
> + default:
> + usage();
> + } ARGEND;
> + /* Use of `atol` is intentional, '-U -1' and '-C -1' shall display the entire file. */
> +
> + if (argc != 2 || (bflag | rflag) > 1 || cflag + eflag + fflag + uflag > 1)
> + usage();
> +
> + use_colour = isatty(STDOUT_FILENO);
> +
> +redo:
> + old = load_lines(old_proper ? old_proper : argv[0]);
> + new = load_lines(new_proper ? new_proper : argv[1]);
> +
> + if ((old_proper || new_proper) && (!old || !new)) {
> + printf("%sFile %s is a %s while file %s is a %s%s\n",
> + BOLD(old_proper ? old_proper : argv[0], CLASSIFY(old),
> + new_proper ? new_proper : argv[1], CLASSIFY(new)));
> + ret = 1;
> + } else if (!old && new) {
> + old_proper = emalloc(strlen(argv[0]) + strlen(argv[1]) + 2);
> + stpcpy(stpcpy(stpcpy(old_proper, argv[0]), "/"), basename(argv[1]));
> + goto redo;
> + } else if (old && !new) {
> + old_proper = emalloc(strlen(argv[0]) + strlen(argv[1]) + 2);
> + stpcpy(stpcpy(stpcpy(old_proper, argv[0]), "/"), basename(argv[1]));
> + goto redo;
> + } else if (!old && !new) {
> + ret = compare_directories(argv[0], argv[1], diff_line);
> + } else {
> + ret = compare_files(old, new);
> + }
> +
> +done:
> + if (fshut(stdout, "<stdout>"))
> + ret = EXIT_FAILURE;
> +
> + free(old);
> + free(new);
> + free(old_proper);
> + free(new_proper);
> + free(diff_line);
> + return ret;
> +}
> --
> 2.7.0
>
>

Some notes from me:

- I would avoid using variadic arrays, use a sane subset of C99.
- Use snprintf, instead of sprintf and the check.
- Use strlcpy or snprintf instead of strcpy.
- Avoid using the "inline" #defines.
- Disable colour output by default, maybe just add a flag for it to
explicitly enable it.
- We should also allow using stdin for one of the file inputs (so
can't "reread" file): diff file.c -

Kind regards,
Hiltjo
Received on Sun Jan 31 2016 - 12:39:34 CET

This archive was generated by hypermail 2.3.0 : Sun Jan 31 2016 - 12:48:13 CET