--- diff.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/diff.c b/diff.c index 558f834..95d3533 100644 --- a/diff.c +++ b/diff.c _AT_@ -213,6 +213,14 @@ strcmp_rstrip_a(char *a, char *b) static char * diff2_minimal(char **a, char **b, size_t an, size_t bn, int (*cmp)(char *, char *)) { + /* This function calculates the minimal edit set for getting from `a` + * to `b`. But rather than using an algorithm that calculates the + * smallest changes set, this function uses an algorithm for calculating + * the longest common non-consecutive subsequence, which is an identical + * (with the allowed change operations) except for the value returned + * at the end, which is not inspected anyways. This algorithms has the + * advantages of requiring simpler initialisation. */ + char (*map)[an + 1][bn + 1] = emalloc(sizeof(char[an + 1][bn + 1])); size_t (*matrix)[2][bn + 1] = ecalloc(1, sizeof(size_t[2][bn + 1])); char *rc; _AT_@ -226,13 +234,18 @@ diff2_minimal(char **a, char **b, size_t an, size_t bn, int (*cmp)(char *, char size_t *this = (*matrix)[mi ^= 1]; (*map)[ai][0] = 1; for (bi = 1; bi <= bn; bi++) { - if (!cmp(a[ai], b[bi])) { - this[bi] = last[bi - 1] + 1; + size_t u = last[bi]; + size_t d = last[bi - 1] + 1; + size_t l = this[bi - 1]; + size_t lu = l >= u ? l : u; + if (!cmp(a[ai], b[bi]) && d > lu) { + /* The > in this comparison makes changes happen as late + * as possible. >= would make the changes happen as early + * as possible. */ + this[bi] = d; (*map)[ai][bi] = 0; } else { - size_t u = last[bi]; - size_t l = this[bi - 1]; - this[bi] = l >= u ? l : u; + this[bi] = lu; (*map)[ai][bi] = 1 + (l >= u); } } -- 2.7.0Received on Sun Jan 31 2016 - 23:47:46 CET
This archive was generated by hypermail 2.3.0 : Mon Feb 01 2016 - 00:00:30 CET