[dev] [sbase] diff

From: Mattias Andrée <maandree_AT_member.fsf.org>
Date: Wed, 27 Jan 2016 20:18:17 +0100

The beginning of an implementation of diff.

It only does the bare essentials, and it is not
usable implemention of diff yet.

Signed-off-by: Mattias Andrée <maandree_AT_kth.se>
---
 LICENSE  |   1 +
 Makefile |   1 +
 diff.c   | 207 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 209 insertions(+)
 create mode 100644 diff.c
diff --git a/LICENSE b/LICENSE
index cb5a797..2a26979 100644
--- a/LICENSE
+++ b/LICENSE
_AT_@ -59,3 +59,4 @@ Authors/contributors include:
 © 2015 Quentin Rameau <quinq_AT_quinq.eu.org>
 © 2015 Dionysis Grigoropoulos <info_AT_erethon.com>
 © 2015 Wolfgang Corcoran-Mathe <first.lord.of.teal_AT_gmail.com>
+© 2016 Mattias Andrée <maandree_AT_kth.se>
diff --git a/Makefile b/Makefile
index 1c09cac..74e071e 100644
--- a/Makefile
+++ b/Makefile
_AT_@ -89,6 +89,7 @@ BIN =\
 	cron\
 	cut\
 	date\
+	diff\
 	dirname\
 	du\
 	echo\
diff --git a/diff.c b/diff.c
new file mode 100644
index 0000000..21f9849
--- /dev/null
+++ b/diff.c
_AT_@ -0,0 +1,207 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "util.h"
+
+#define END_OF_PATH  127
+
+#define printf(...)  do  {if (printf(__VA_ARGS__) < 0) perror("printf"), exit(EXIT_FAILURE); } while(0)
+
+struct file_data {
+	char **lines;
+	size_t line_count;
+	int lf_terminated;
+};
+
+struct output {
+	char colour;
+	char* text;
+};
+
+static struct output *red = 0;
+static struct output *green = 0;
+
+static struct file_data *
+load_lines(const char *pathname)
+{
+	int fd;
+	char *buffer;
+	char *p;
+	char *end;
+	size_t ptr, size, n;
+	ssize_t m;
+	struct file_data* rc;
+
+	fd = open(pathname, O_RDONLY);
+	if (fd == -1)
+		perror(pathname), exit(EXIT_FAILURE);
+
+	ptr = 0;
+	buffer = emalloc((size = 8096) + 1);
+	for (;;) {
+		if (ptr == size)
+			buffer = erealloc(buffer, (size <<= 1) + 1);
+		m = read(fd, buffer + ptr, size - ptr);
+		if (m < 0)
+			perror(pathname), exit(EXIT_FAILURE);
+		if (m == 0)
+			break;
+		ptr += (size_t)m;
+	}
+	buffer[ptr] = 0;
+
+	close(fd);
+
+	for (n = 1, p = buffer;; n += 1) {
+		char *lf = strchr(p, '\n');
+		if (!lf)
+			break;
+		p = lf + 1;
+	}
+	if (strchr(p, '\0') != buffer + ptr)
+		enprintf(1, "%s: file is binary\n", pathname);
+
+	rc = erealloc(buffer, sizeof(*rc) + (n + 1) * sizeof(char *) + (ptr + 1));
+	buffer = ((char *)rc) + sizeof(*rc) + (n + 1) * sizeof(char *);
+	memmove(buffer, rc, ptr);
+	rc->lines = (char **)((char *)rc + sizeof(*rc));
+	rc->lf_terminated = ptr && buffer[ptr - 1] == '\n';
+	rc->line_count = n -= rc->lf_terminated;
+	buffer[ptr - rc->lf_terminated] = 0;
+
+	rc->lines[n] = 0;
+	for (ptr = 0, p = buffer; p; p = end) {
+		end = strchr(p, '\n');
+		if (end)
+		  *end++ = 0;
+		rc->lines[ptr++] = p;
+	}
+
+	return rc;
+}
+
+static char *
+diff2(char **a, char **b, size_t an, size_t bn)
+{
+	size_t **matrix = emalloc((an + 1) * sizeof(size_t *) + (an + 1) * (bn + 1) * sizeof(size_t));
+	char   **map    = emalloc((an + 1) * sizeof(char   *) + (an + 1) * (bn + 1) * sizeof(char));
+	size_t ai, bi, ri = 0;
+	char *rc;
+
+	for (ai = 0; ai <= an; ai++) {
+		matrix[ai] = (size_t *)(matrix + an + 1) + ai * (bn + 1);
+		map   [ai] = (char   *)(map    + an + 1) + ai * (bn + 1);
+		matrix[ai][0] = ai;
+		map   [ai][0] = 1;
+	}
+	for (bi = 0; bi <= bn; bi++) {
+		matrix[0][bi] = bi;
+		map   [0][bi] = 2;
+	}
+	map[0][0] = 0;
+
+	a--, b--;
+
+	for (ai = 1; ai <= an; ai++) {
+		for (bi = 1; bi <= bn; bi++) {
+			size_t d = matrix[ai - 1][bi - 1];
+			size_t u = matrix[ai - 1][bi    ];
+			size_t l = matrix[ai    ][bi - 1];
+			size_t lu = 1 + (l < u ? l : u);
+			int ch = strcmp(a[ai], b[bi]); /* remember !! if using d += ch */
+			matrix[ai][bi] = (lu < d || ch) ? lu : d;
+			map   [ai][bi] = (lu < d || ch) ? 1 + (l < u) : 0;
+		}
+	}
+
+	rc = emalloc(an + bn + 2);
+	rc[ri++] = END_OF_PATH;
+	for (ai = an, bi = bn; ai + bi; ri++) {
+		rc[ri] = map[ai][bi];
+		ai -= rc[ri] != 2;
+		bi -= rc[ri] != 1;
+	}
+
+	free(matrix);
+	free(map);
+	return rc + ri;
+}
+
+static void
+flush_output(struct output *output, size_t n)
+{
+	if (!n)
+		return;
+
+	if (output->colour == 0) {
+		printf("%s%s\n", " ", output->text);
+	} else {
+		size_t colour_n[] = { [1] = 0, [2] = 0 }, i;
+		static size_t last_size = 0;
+
+		if (last_size < n) {
+			red   = erealloc(red,   n * sizeof(*red));
+			green = erealloc(green, n * sizeof(*green));
+			last_size = n;
+		}
+
+		for (i = 0; i < n; i++)
+			(output[i].colour == 1 ? red : green)[colour_n[(int)(output[i].colour)]++] = output[i];
+
+		for (i = 0, n = colour_n[1]; i < n; i++)
+			printf("\033[31m-%s\033[m\n", red[i].text);
+		for (i = 0, n = colour_n[2]; i < n; i++)
+			printf("\033[32m+%s\033[m\n", green[i].text);
+	}
+}
+
+int
+main(int argc, char *argv[])
+{
+	struct file_data *old;
+	struct file_data *new;
+	char *path;
+	char trace;
+	struct output *output;
+	size_t n, ai, bi;
+	char **a;
+	char **b;
+	int ret = 0;
+
+	argv0 = argv[0];
+
+	old = load_lines(argv[1]);
+	new = load_lines(argv[2]);
+
+	a = old->lines, b = new->lines;
+	path = diff2(a, b, old->line_count, new->line_count);
+	
+	output = emalloc((old->line_count + new->line_count) * sizeof(*output));
+	for (n = ai = bi = 0; (trace = *--path) != END_OF_PATH;) {
+		if (trace) {
+			output[n++] = (struct output){ trace, trace == 1 ? a[ai] : b[bi] };
+		} else {
+			flush_output(output, n), n = 0;
+			output[n++] = (struct output){ trace, a[ai] };
+			flush_output(output, n), n = 0;
+		}
+		ai += trace != 2;
+		bi += trace != 1;
+	}
+	flush_output(output, n);
+
+	if (fshut(stdout, "<stdout>"))
+		ret = 2;
+
+	free(red);
+	free(green);
+	free(old);
+	free(new);
+	free(path);
+	free(output);
+	return ret;
+}
-- 
2.7.0
Received on Wed Jan 27 2016 - 20:18:17 CET

This archive was generated by hypermail 2.3.0 : Wed Jan 27 2016 - 20:24:10 CET