---
diff.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/diff.c b/diff.c
index 558f834..95d3533 100644
--- a/diff.c
+++ b/diff.c
_AT_@ -213,6 +213,14 @@ strcmp_rstrip_a(char *a, char *b)
static char *
diff2_minimal(char **a, char **b, size_t an, size_t bn, int (*cmp)(char *, char *))
{
+ /* This function calculates the minimal edit set for getting from `a`
+ * to `b`. But rather than using an algorithm that calculates the
+ * smallest changes set, this function uses an algorithm for calculating
+ * the longest common non-consecutive subsequence, which is an identical
+ * (with the allowed change operations) except for the value returned
+ * at the end, which is not inspected anyways. This algorithms has the
+ * advantages of requiring simpler initialisation. */
+
char (*map)[an + 1][bn + 1] = emalloc(sizeof(char[an + 1][bn + 1]));
size_t (*matrix)[2][bn + 1] = ecalloc(1, sizeof(size_t[2][bn + 1]));
char *rc;
_AT_@ -226,13 +234,18 @@ diff2_minimal(char **a, char **b, size_t an, size_t bn, int (*cmp)(char *, char
size_t *this = (*matrix)[mi ^= 1];
(*map)[ai][0] = 1;
for (bi = 1; bi <= bn; bi++) {
- if (!cmp(a[ai], b[bi])) {
- this[bi] = last[bi - 1] + 1;
+ size_t u = last[bi];
+ size_t d = last[bi - 1] + 1;
+ size_t l = this[bi - 1];
+ size_t lu = l >= u ? l : u;
+ if (!cmp(a[ai], b[bi]) && d > lu) {
+ /* The > in this comparison makes changes happen as late
+ * as possible. >= would make the changes happen as early
+ * as possible. */
+ this[bi] = d;
(*map)[ai][bi] = 0;
} else {
- size_t u = last[bi];
- size_t l = this[bi - 1];
- this[bi] = l >= u ? l : u;
+ this[bi] = lu;
(*map)[ai][bi] = 1 + (l >= u);
}
}
--
2.7.0
Received on Sun Jan 31 2016 - 23:47:46 CET
This archive was generated by hypermail 2.3.0 : Mon Feb 01 2016 - 00:00:30 CET