Re: [hackers] [PATCH][sbase] Add patch(1)

From: Mattias Andrée <maandree_AT_kth.se>
Date: Sun, 24 Sep 2017 18:28:57 +0200

On Sun, 24 Sep 2017 14:08:41 +0200
Silvan Jegen <s.jegen_AT_gmail.com> wrote:

> Heyho Mattias!
>
> I had a look at the patch. It's a lot of code (still only about 1/3 of
> GNU's patch size though) and it was rather hard for me to follow so more
> review should be done. Find my comments below.
>
> On Sun, Sep 03, 2017 at 07:13:20PM +0200, Mattias Andrée wrote:
> > +static void
> > +save_file_cpp(FILE *f, struct file_data *file)
> > +{
> > + size_t i, j, n;
> > + char annot = ' ';
> > +
> > + for (i = 0; i <= file->n; i++) {
> > + if ((n = file->d[i].nold)) {
>
> In other places you iterate with "i < file->n" (see save_file below for
> example) so I think this may be an off-by-one error.

There is an if-statement, that breaks the loop if `i == file->`,
after this clause, so this should be correct. I'll add blank lines
around that if-statement to make it clearer.

>
>
> > + fprintf(f, "%s\n", annot == '+' ? "#else" : ifndef);
> > + for (j = 0; j < n; j++) {
> > + fwriteline(f, file->d[i].old[j]);
> > + if (missinglf(file->d[i].old[j]))
> > + fprintf(f, "\n");
> > + }
> > + annot = '-';
> > + }
> > + if (i == file->n)
> > + break;
> > + if (annot == '-')
> > + fprintf(f, "%s\n", file->d[i].new ? "#else" : "#endif");
> > + else if (annot == ' ' && file->d[i].new)
> > + fprintf(f, "%s\n", ifdef);
> > + else if (annot == '+' && !file->d[i].new)
> > + fprintf(f, "#endif\n");
> > + fwriteline(f, file->d[i].line);
> > + if ((i + 1 < file->n || file->d[i].new) && missinglf(file->d[i].line))
> > + fprintf(f, "\n");
> > + annot = file->d[i].new ? '+' : ' ';
> > + }
> > + if (annot != ' ')
> > + fprintf(f, "#endif\n");
> > +}
> > +
> > +static void
> > +parse_hunk_copied(struct hunk *hunk, struct parsed_hunk *parsed)
> > +{
> > + struct hunk_content *old = &parsed->old, *new = &parsed->new;
> > + size_t i = 0, a, b;
> > + char *p;
> > +
> > + free(hunk->head->data);
> > +
> > + old->lines = enmalloc(FAILURE, hunk->nlines * sizeof(*old->lines));
> > + new->lines = enmalloc(FAILURE, hunk->nlines * sizeof(*new->lines));
> > + parsed->annot = enmalloc(FAILURE, hunk->nlines + 1);
> > +
> > + p = hunk->lines[i++].data + 4;
> > + old->start = strtoul(p, &p, 10);
> > + old->len = 0;
> > +
> > + for (; hunk->lines[i].data[1] == ' '; i++)
> > + subline(old->lines + old->len++, hunk->lines + i, 2);
> > +
> > + p = hunk->lines[i++].data + 4;
> > + new->start = strtoul(p, &p, 10);
> > + new->len = 0;
> > +
> > + if (old->len) {
> > + for (; i < hunk->nlines; i++)
> > + subline(new->lines + new->len++, hunk->lines + i, 2);
> > + } else {
> > + for (; i < hunk->nlines; i++) {
> > + subline(new->lines + new->len++, hunk->lines + i, 2);
> > + if (hunk->lines[i].data[0] != '+')
> > + subline(old->lines + old->len++, hunk->lines + i, 2);
> > + }
> > + }
>
> I think this if-else block can be rewritten like this.
>
> for (; i < hunk->nlines; i++) {
> subline(new->lines + new->len++, hunk->lines + i, 2);
> if (old->len == 0 && hunk->lines[i].data[0] != '+')
> subline(old->lines + old->len++, hunk->lines + i, 2);
> }

I will use `!old->len` instead of `old->len == 0`.

>
>
> > +
> > + if (!new->len)
> > + for (i = 0; i < old->len; i++)
> > + if (old->lines[i].data[-2] != '-')
>
> I think according to the standard, refering to data[-2] invokes undefined
> behaviour, since at this time, data points to the beginning of the array.

I'm not sure whether it is defined or undefined; I would think that it
defined, but that adding integers larger than INTPTR_MAX is undefined.
I will change to `*(data - 2)` as this is clearly defined.

>
>
> > + new->lines[new->len++] = old->lines[i];
> > +
> > +#define OLDLINE a < old->len && old->lines[a].data[-2]
> > +#define NEWLINE b < new->len && new->lines[b].data[-2]
> > +
> > + for (i = a = b = 0; a < old->len || b < new->len;) {
> > + if (OLDLINE == '-') parsed->annot[i++] = '-', a++;
> > + if (NEWLINE == '+') parsed->annot[i++] = '+', b++;
> > + while (OLDLINE == ' ' && NEWLINE == ' ')
> > + parsed->annot[i++] = ' ', a++, b++;
> > + while (OLDLINE == '!') parsed->annot[i++] = '<', a++;
> > + while (NEWLINE == '!') parsed->annot[i++] = '>', b++;
> > + }
> > + parsed->annot[i] = 0;
> > +}
> > +
> > +static void
> > +apply_contiguous_edit(struct file_data *f, size_t ln, size_t rm, size_t ad, struct line *newlines, const char *annot)
> > +{
> > +#define LN (f->d[ln])
> > +
> > + size_t rm_end, ad_end, n, a, b, start, extra, i, j, k;
> > + struct line_data *orig;
> > +
> > + rm_end = ln + rm;
> > + ad_end = ln + ad;
> > + n = f->n;
> > +
> > + orig = enmemdup(FAILURE, f->d + ln, (rm + 1) * sizeof(*f->d));
> > + memmove(f->d + ln, f->d + rm_end, (n - rm_end + 1) * sizeof(*f->d));
> > +
> > + f->n -= rm;
> > + if (f->n == 1 && !*(f->d->line.data))
> > + f->n--, n--;
> > + f->n += ad;
> > +
> > + f->d = enrealloc(FAILURE, f->d, (f->n + 1) * sizeof(*f->d));
> > + memmove(f->d + ad_end, f->d + ln, (n - rm_end + 1) * sizeof(*f->d));
> > + memset(f->d + ln, 0, ad * sizeof(*f->d));
> > +
> > + for (i = a = b = 0; a < rm || b < ad; ln++) {
> > + for (start = i, extra = 0; a < rm && strchr("<-", annot[i]); a++, i++)
> > + extra += orig[a].nold;
> > + if (start < i) {
> > + n = i - start + extra;
> > + a -= i - start;
> > + LN.old = enrealloc(FAILURE, LN.old, (LN.nold + n) * sizeof(*f->d->old));
> > + memmove(LN.old + n, LN.old, LN.nold * sizeof(*f->d->old));
> > + for (j = extra = 0; j < n - extra; a++) {
> > + for (k = 0; k < orig[a].nold; k++)
> > + linedup(LN.old + j++, orig[a].old + k);
> > + if (orig[a].orig)
> > + linedup(LN.old + j++, &orig[a].line);
> > + else
> > + extra++;
> > + }
> > + memmove(LN.old + n - extra, LN.old + n, LN.nold * sizeof(*f->d->old));
> > + LN.nold += n - extra;
> > + }
> > + if (b == ad)
> > + continue;
> > + if (annot[i++] == ' ') {
> > + LN.line = orig[a].line;
> > + LN.orig = orig[a].orig;
> > + LN.new = orig[a].new;
> > + a++, b++;
> > + } else {
> > + LN.new = 1;
> > + linedup(&LN.line, newlines + b++);
> > + }
> > + }
>
> I can't claim to understand everything you are doing in this loop but
> from what I can tell, you are moving and reallocating a lot of lines
> in f->d. It seems to me that using a linked-list there would simplify a
> lot of the patch handling since it would allow you to easily insert or
> replace lines in the out file instead of having to move around memory.

I'm not sure a linked list with simply the code, however, adding
functions for editing the lists might make the code look cleaner.

>
>
> > +
> > + free(orig);
> > +}
> > +
> > +static int
> > +parse_ed_script(struct patch *patch, struct file_data *file)
> > +{
> > + [...]
> > +
> > + for (i = 0, j = patch->nhunks - 1; i < j; i++, j--) {
> > + temp = patch->hunks[i];
> > + patch->hunks[i] = patch->hunks[j];
> > + patch->hunks[j] = temp;
> > + }
>
> Is there a particular reason for which you reversing the hunks order here?

If I recall correctly, it is because ed-patches lists edits
from the bottom rather than from the top.
See https://github.com/maandree/base-util-tests/blob/master/patch-test/ed/patch-e

>
>
> > +
> > + for (i = 0; i < patch->nhunks; i++) {
> > + hunk = patch->hunks + i;
> > + start = strtoul(hunk->head->data + 4, &p, 10);
> > + count = strtoul(p + 1, &p, 10);
> > + p = strchr(q = p + 2, ',') + 1;
> > + added = strtoul(p, 0, 10);
> > + q += sprintf(q, "%zu", start + offset - !added + !count);
> > + *q++ = ',';
> > + memmove(q, p, strlen(p) + 1);
> > + offset -= count;
> > + offset += added;
> > + hunk->head->len = strlen(hunk->head->data);
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +int
> > +main(int argc, char *argv[])
> > +{
> > + [...]
> > +
> > + if (Dflag) {
> > + p = Dflag + (*Dflag == '!');
> > + if (!strcmp(p, "0") || !strcmp(p, "1")) {
> > + enasprintf(FAILURE, &ifdef, "#if %s", p);
> > + enasprintf(FAILURE, &ifndef, "#if %i", 1 - (*p - '0'));
> > + } else {
> > + enasprintf(FAILURE, &ifdef, "#ifdef %s", p);
> > + enasprintf(FAILURE, &ifndef, "#ifndef %s", p);
> > + }
> > + if (*Dflag == '!')
> > + p = ifdef, ifdef = ifndef, ifndef = p;
> > + }
> > +
> > + load_lines(patchfile, &patchfile_data, 1, 0);
> > + parse_patchfile(&patchset, get_lines(&patchfile_data));
>
> Nitpick: both patchfile_data and patchset seem to be out arguments. I
> would have them consistently in the same parameter position. Now
> patchfile_data is the second argument to load_lines but patchset is the
> first one of parse_patchfile.
>
> The memory allocated by get_lines is never freed but that shouldn't be
> an issue in practice.
>
> Thanks for the patch!
>
>
> Cheers,
>
> Silvan
>

Received on Sun Sep 24 2017 - 18:28:57 CEST

This archive was generated by hypermail 2.3.0 : Sun Sep 24 2017 - 18:36:23 CEST