ref: 14806f5a6b9c28df86a14e726260520c733faa17
parent: 33d0b194f3b0df7b830e643c5a9847b6f9a1f74c
author: Ori Bernstein <ori@eigenstate.org>
date: Sun Feb 12 13:18:57 EST 2023
merge3: implement it.
--- a/diff.c
+++ b/diff.c
@@ -3,32 +3,9 @@
#include <bio.h>
#include "diff.h"
-#define DIRECTORY(s) ((s)->qid.type&QTDIR)
-#define REGULAR_FILE(s) ((s)->type == 'M' && !DIRECTORY(s))
-
-Biobuf stdout;
-char mode; /* '\0', 'e', 'f', 'h' */
-char bflag; /* ignore multiple and trailing blanks */
-char rflag; /* recurse down directory trees */
-char mflag; /* pseudo flag: doing multiple files, one dir */
-int anychange;
-
-static char *tmp[] = {"/tmp/diff1XXXXXXXXXXX", "/tmp/diff2XXXXXXXXXXX"};
-static int whichtmp;
-
-static void
-rmtmpfiles(void)
-{
- while (whichtmp > 0) {
- whichtmp--;
- remove(tmp[whichtmp]);
- }
-}
-
void
done(int status)
{
- rmtmpfiles();
switch(status)
{
case 0:
@@ -38,142 +15,9 @@
default:
exits("error");
}
- /*NOTREACHED*/
}
void
-panic(int status, char *fmt, ...)
-{
- va_list arg;
-
- Bflush(&stdout);
-
- fprint(2, "%s: ", argv0);
- va_start(arg, fmt);
- vfprint(2, fmt, arg);
- va_end(arg);
- if (status)
- done(status);
- /*NOTREACHED*/
-}
-
-static int
-catch(void *a, char *msg)
-{
- USED(a);
- panic(2, msg);
- return 1;
-}
-
-int
-mkpathname(char *pathname, char *path, char *name)
-{
- if (strlen(path) + strlen(name) > MAXPATHLEN) {
- panic(0, "pathname %s/%s too long\n", path, name);
- return 1;
- }
- sprint(pathname, "%s/%s", path, name);
- return 0;
-}
-
-static char *
-mktmpfile(int input, Dir **sb)
-{
- int fd, i;
- char *p;
- char buf[8192];
-
- atnotify(catch, 1);
- p = mktemp(tmp[whichtmp++]);
- fd = create(p, OWRITE, 0600);
- if (fd < 0) {
- panic(mflag ? 0: 2, "cannot create %s: %r\n", p);
- return 0;
- }
- while ((i = read(input, buf, sizeof(buf))) > 0) {
- if ((i = write(fd, buf, i)) < 0)
- break;
- }
- *sb = dirfstat(fd);
- close(fd);
- if (i < 0) {
- panic(mflag ? 0: 2, "cannot read/write %s: %r\n", p);
- return 0;
- }
- return p;
-}
-
-static char *
-statfile(char *file, Dir **sb)
-{
- Dir *dir;
- int input;
-
- dir = dirstat(file);
- if(dir == nil) {
- if (strcmp(file, "-") || (dir = dirfstat(0)) == nil) {
- panic(mflag ? 0: 2, "cannot stat %s: %r\n", file);
- return 0;
- }
- free(dir);
- return mktmpfile(0, sb);
- } else if (!REGULAR_FILE(dir) && !DIRECTORY(dir)) {
- free(dir);
- if ((input = open(file, OREAD)) == -1) {
- panic(mflag ? 0: 2, "cannot open %s: %r\n", file);
- return 0;
- }
- file = mktmpfile(input, sb);
- close(input);
- } else
- *sb = dir;
- return file;
-}
-
-void
-diff(char *f, char *t, int level)
-{
- char *fp, *tp, *p, fb[MAXPATHLEN+1], tb[MAXPATHLEN+1];
- Dir *fsb, *tsb;
-
- if ((fp = statfile(f, &fsb)) == 0)
- goto Return;
- if ((tp = statfile(t, &tsb)) == 0){
- free(fsb);
- goto Return;
- }
- if (DIRECTORY(fsb) && DIRECTORY(tsb)) {
- if (rflag || level == 0)
- diffdir(fp, tp, level);
- else
- Bprint(&stdout, "Common subdirectories: %s and %s\n", fp, tp);
- }
- else if (REGULAR_FILE(fsb) && REGULAR_FILE(tsb))
- diffreg(fp, f, tp, t);
- else {
- if (REGULAR_FILE(fsb)) {
- if ((p = utfrrune(f, '/')) == 0)
- p = f;
- else
- p++;
- if (mkpathname(tb, tp, p) == 0)
- diffreg(fp, f, tb, t);
- } else {
- if ((p = utfrrune(t, '/')) == 0)
- p = t;
- else
- p++;
- if (mkpathname(fb, fp, p) == 0)
- diffreg(fb, f, tp, t);
- }
- }
- free(fsb);
- free(tsb);
-Return:
- rmtmpfiles();
-}
-
-void
usage(void)
{
fprint(2, "usage: %s [-abcefmnrw] file1 ... file2\n", argv0);
@@ -219,14 +63,14 @@
if (argc < 2)
usage();
if ((tsb = dirstat(argv[argc-1])) == nil)
- panic(2, "can't stat %s\n", argv[argc-1]);
+ sysfatal("can't stat %s", argv[argc-1]);
if (argc > 2) {
if (!DIRECTORY(tsb))
- panic(2, "not directory: %s", argv[argc-1]);
+ sysfatal("not directory: %s", argv[argc-1]);
mflag = 1;
} else {
if ((fsb = dirstat(argv[0])) == nil)
- panic(2, "can't stat %s\n", argv[0]);
+ sysfatal("can't stat %s", argv[0]);
if (DIRECTORY(fsb) && DIRECTORY(tsb))
mflag = 1;
free(fsb);
@@ -234,28 +78,7 @@
free(tsb);
for (i = 0; i < argc-1; i++)
diff(argv[i], argv[argc-1], 0);
+
done(anychange);
/*NOTREACHED*/
-}
-
-static char noroom[] = "out of memory - try diff -h\n";
-
-void *
-emalloc(unsigned n)
-{
- register void *p;
-
- if ((p = malloc(n)) == 0)
- panic(2, noroom);
- return p;
-}
-
-void *
-erealloc(void *p, unsigned n)
-{
- void *rp;
-
- if ((rp = realloc(p, n)) == 0)
- panic(2, noroom);
- return rp;
}
--- a/diff.h
+++ b/diff.h
@@ -41,6 +41,8 @@
char *file1;
char *file2;
Biobuf *input[2];
+ Biobuf *b0;
+ Biobuf *b1;
int firstchange;
Change *changes;
int nchanges;
@@ -55,15 +57,21 @@
#define MAXPATHLEN 1024
+#define DIRECTORY(s) ((s)->qid.type&QTDIR)
+#define REGULAR_FILE(s) ((s)->type == 'M' && !DIRECTORY(s))
+
int mkpathname(char *, char *, char *);
+char *mktmpfile(int, Dir **);
+char *statfile(char *, Dir **);
void *emalloc(unsigned);
void *erealloc(void *, unsigned);
void diff(char *, char *, int);
+void diffreg(char*, char*, char*, char*);
void diffdir(char *, char *, int);
-void diffreg(char *, char *, char *, char *);
+void calcdiff(Diff *, char *, char *, char *, char *);
Biobuf *prepare(Diff*, int, char *, char *);
-void panic(int, char *, ...);
void check(Diff *, Biobuf *, Biobuf *);
void change(Diff *, int, int, int, int);
-void flushchanges(Diff *d);
-
+void freediff(Diff *);
+void flushchanges(Diff *);
+void fetch(Diff *d, long *f, int a, int b, Biobuf *bp, char *s);
--- a/diffdir.c
+++ b/diffdir.c
@@ -111,3 +111,45 @@
free(dirf);
free(dirt);
}
+
+void
+diff(char *f, char *t, int level)
+{
+ char *fp, *tp, *p, fb[MAXPATHLEN+1], tb[MAXPATHLEN+1];
+ Dir *fsb, *tsb;
+
+ fsb = nil;
+ tsb = nil;
+ if ((fp = statfile(f, &fsb)) == 0)
+ goto Return;
+ if ((tp = statfile(t, &tsb)) == 0)
+ goto Return;
+ if (DIRECTORY(fsb) && DIRECTORY(tsb)) {
+ if (rflag || level == 0)
+ diffdir(fp, tp, level);
+ else
+ Bprint(&stdout, "Common subdirectories: %s and %s\n", fp, tp);
+ }
+ else if (REGULAR_FILE(fsb) && REGULAR_FILE(tsb))
+ diffreg(fp, f, tp, t);
+ else {
+ if (REGULAR_FILE(fsb)) {
+ if ((p = utfrrune(f, '/')) == 0)
+ p = f;
+ else
+ p++;
+ if (mkpathname(tb, tp, p) == 0)
+ diffreg(fp, f, tb, t);
+ } else {
+ if ((p = utfrrune(t, '/')) == 0)
+ p = t;
+ else
+ p++;
+ if (mkpathname(fb, fp, p) == 0)
+ diffreg(fb, f, tp, t);
+ }
+ }
+Return:
+ free(fsb);
+ free(tsb);
+}
\ No newline at end of file
--- a/diffio.c
+++ b/diffio.c
@@ -111,7 +111,7 @@
bp = Bopen(arg, OREAD);
if (!bp) {
- panic(mflag ? 0: 2, "cannot open %s: %r\n", arg);
+ sysfatal("cannot open %s: %r", arg);
return 0;
}
if (d->binary)
@@ -209,7 +209,7 @@
Bprint(&stdout, "%s%d", separator, b);
}
-static void
+void
fetch(Diff *d, long *f, int a, int b, Biobuf *bp, char *s)
{
char buf[MAXLINELEN];
@@ -229,6 +229,7 @@
while (a++ <= b) {
readline(bp, buf);
Bprint(&stdout, "%s%s\n", s, buf);
+ Bflush(&stdout);
}
}
--- a/diffreg.c
+++ b/diffreg.c
@@ -253,44 +253,6 @@
d->J[q->x+d->pref] = q->y+d->pref;
}
-static void
-output(Diff *d)
-{
- int m, i0, i1, j0, j1;
-
- m = d->len[0];
- d->J[0] = 0;
- d->J[m+1] = d->len[1]+1;
- if (mode != 'e') {
- for (i0 = 1; i0 <= m; i0 = i1+1) {
- while (i0 <= m && d->J[i0] == d->J[i0-1]+1)
- i0++;
- j0 = d->J[i0-1]+1;
- i1 = i0-1;
- while (i1 < m && d->J[i1+1] == 0)
- i1++;
- j1 = d->J[i1+1]-1;
- d->J[i1] = j1;
- change(d, i0, i1, j0, j1);
- }
- } else {
- for (i0 = m; i0 >= 1; i0 = i1-1) {
- while (i0 >= 1 && d->J[i0] == d->J[i0+1]-1 && d->J[i0])
- i0--;
- j0 = d->J[i0+1]-1;
- i1 = i0+1;
- while (i1 > 1 && d->J[i1-1] == 0)
- i1--;
- j1 = d->J[i1-1]+1;
- d->J[i1] = j1;
- change(d, i1 , i0, j1, j0);
- }
- }
- if (m == 0)
- change(d, 1, 0, 1, d->len[1]);
- flushchanges(d);
-}
-
#define BUF 4096
static int
cmp(Biobuf* b1, Biobuf* b2)
@@ -338,23 +300,20 @@
}
void
-diffreg(char *f, char *fo, char *t, char *to)
+calcdiff(Diff *d, char *f, char *fo, char *t, char *to)
{
Biobuf *b0, *b1;
- Diff d;
int k;
- d.binary = 0;
- memset(&d, 0, sizeof(d));
- b0 = prepare(&d, 0, f, fo);
+ b0 = prepare(d, 0, f, fo);
if (!b0)
return;
- b1 = prepare(&d, 1, t, to);
+ b1 = prepare(d, 1, t, to);
if (!b1) {
Bterm(b0);
return;
}
- if (d.binary){
+ if (d->binary){
// could use b0 and b1 but this is simpler.
if (cmp(b0, b1))
print("binary files %s %s differ\n", f, t);
@@ -362,38 +321,91 @@
Bterm(b1);
return;
}
- d.clen = 0;
- prune(&d);
- sort(d.sfile[0], d.slen[0]);
- sort(d.sfile[1], d.slen[1]);
+ d->clen = 0;
+ prune(d);
+ sort(d->sfile[0], d->slen[0]);
+ sort(d->sfile[1], d->slen[1]);
- d.member = (int *)d.file[1];
- equiv(d.sfile[0], d.slen[0], d.sfile[1], d.slen[1], d.member);
- d.member = erealloc(d.member, (d.slen[1]+2)*sizeof(int));
+ d->member = (int *)d->file[1];
+ equiv(d->sfile[0], d->slen[0], d->sfile[1], d->slen[1], d->member);
+ d->member = erealloc(d->member, (d->slen[1]+2)*sizeof(int));
- d.class = (int *)d.file[0];
- unsort(d.sfile[0], d.slen[0], d.class);
- d.class = erealloc(d.class, (d.slen[0]+2)*sizeof(int));
+ d->class = (int *)d->file[0];
+ unsort(d->sfile[0], d->slen[0], d->class);
+ d->class = erealloc(d->class, (d->slen[0]+2)*sizeof(int));
- d.klist = emalloc((d.slen[0]+2)*sizeof(int));
- d.clist = emalloc(sizeof(Cand));
- k = stone(&d, d.class, d.slen[0], d.member, d.klist);
- free(d.member);
- free(d.class);
+ d->klist = emalloc((d->slen[0]+2)*sizeof(int));
+ d->clist = emalloc(sizeof(Cand));
+ k = stone(d, d->class, d->slen[0], d->member, d->klist);
+ free(d->member);
+ free(d->class);
- d.J = emalloc((d.len[0]+2)*sizeof(int));
- unravel(&d, d.klist[k]);
- free(d.clist);
- free(d.klist);
+ d->J = emalloc((d->len[0]+2)*sizeof(int));
+ unravel(d, d->klist[k]);
+ free(d->clist);
+ free(d->klist);
- d.ixold = emalloc((d.len[0]+2)*sizeof(long));
- d.ixnew = emalloc((d.len[1]+2)*sizeof(long));
+ d->ixold = emalloc((d->len[0]+2)*sizeof(long));
+ d->ixnew = emalloc((d->len[1]+2)*sizeof(long));
Bseek(b0, 0, 0); Bseek(b1, 0, 0);
- check(&d, b0, b1);
+ check(d, b0, b1);
+}
+
+static void
+output(Diff *d)
+{
+ int m, i0, i1, j0, j1;
+
+ m = d->len[0];
+ d->J[0] = 0;
+ d->J[m+1] = d->len[1]+1;
+ if (mode != 'e') {
+ for (i0 = 1; i0 <= m; i0 = i1+1) {
+ while (i0 <= m && d->J[i0] == d->J[i0-1]+1)
+ i0++;
+ j0 = d->J[i0-1]+1;
+ i1 = i0-1;
+ while (i1 < m && d->J[i1+1] == 0)
+ i1++;
+ j1 = d->J[i1+1]-1;
+ d->J[i1] = j1;
+ change(d, i0, i1, j0, j1);
+ }
+ } else {
+ for (i0 = m; i0 >= 1; i0 = i1-1) {
+ while (i0 >= 1 && d->J[i0] == d->J[i0+1]-1 && d->J[i0])
+ i0--;
+ j0 = d->J[i0+1]-1;
+ i1 = i0+1;
+ while (i1 > 1 && d->J[i1-1] == 0)
+ i1--;
+ j1 = d->J[i1-1]+1;
+ d->J[i1] = j1;
+ change(d, i1 , i0, j1, j0);
+ }
+ }
+ if (m == 0)
+ change(d, 1, 0, 1, d->len[1]);
+ flushchanges(d);
+}
+
+void
+diffreg(char *f, char *fo, char *t, char *to)
+{
+ Diff d;
+
+ memset(&d, 0, sizeof(d));
+ calcdiff(&d, f, fo, t, to);
output(&d);
- free(d.J);
- free(d.ixold);
- free(d.ixnew);
- Bterm(b0);
- Bterm(b1);
+ freediff(&d);
+}
+
+void
+freediff(Diff *d)
+{
+ Bterm(d->input[0]);
+ Bterm(d->input[1]);
+ free(d->J);
+ free(d->ixold);
+ free(d->ixnew);
}
--- /dev/null
+++ b/merge3.c
@@ -1,0 +1,143 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "diff.h"
+
+static int
+changecmp(void *a, void *b)
+{
+ return ((Change*)a)->a - ((Change*)b)->a;
+}
+
+static void
+addchange(Diff *df, int a, int b, int c, int d)
+{
+ Change *ch;
+
+ if (a > b && c > d)
+ return;
+ if(df->nchanges%1024 == 0)
+ df->changes = erealloc(df->changes, (df->nchanges+1024)*sizeof(df->changes[0]));
+ ch = &df->changes[df->nchanges++];
+ ch->a = a;
+ ch->b = b;
+ ch->c = c;
+ ch->d = d;
+}
+
+static void
+collect(Diff *d)
+{
+ int m, i0, i1, j0, j1;
+
+ m = d->len[0];
+ d->J[0] = 0;
+ d->J[m+1] = d->len[1]+1;
+ for (i0 = m; i0 >= 1; i0 = i1-1) {
+ while (i0 >= 1 && d->J[i0] == d->J[i0+1]-1 && d->J[i0])
+ i0--;
+ j0 = d->J[i0+1]-1;
+ i1 = i0+1;
+ while (i1 > 1 && d->J[i1-1] == 0)
+ i1--;
+ j1 = d->J[i1-1]+1;
+ d->J[i1] = j1;
+ addchange(d, i1 , i0, j1, j0);
+ }
+ if (m == 0)
+ change(d, 1, 0, 1, d->len[1]);
+ qsort(d->changes, d->nchanges, sizeof(Change), changecmp);
+}
+
+static int
+overlaps(Change *a, Change *b)
+{
+ if(a == nil || b == nil)
+ return 0;
+ if(a->a <= b->a)
+ return a->b >= b->a;
+ else
+ return b->b >= a->a;
+}
+
+char*
+merge(Diff *l, Diff *r)
+{
+ int il, ir, a, b;
+ Change *lc, *rc;
+ vlong ln;
+
+ il = 0;
+ ir = 0;
+ ln = 0;
+ collect(l);
+ collect(r);
+ while(il < l->nchanges || ir < r->nchanges){
+ lc = nil;
+ rc = nil;
+ if(il < l->nchanges)
+ lc = &l->changes[il];
+ if(ir < r->nchanges)
+ rc = &r->changes[il];
+ if(overlaps(lc, rc)){
+ a = (lc->a < rc->a) ? lc->a : rc->a;
+ b = (lc->b > rc->b) ? lc->b : rc->b;
+ fetch(l, l->ixold, ln, a-1, l->input[0], "");
+ Bprint(&stdout, "<<<<<<<<<< %s\n", l->file2);
+ fetch(l, l->ixnew, lc->c, lc->d, l->input[1], "");
+ Bprint(&stdout, "========== common\n");
+ fetch(l, l->ixold, a, b, l->input[0], "");
+ Bprint(&stdout, "========== %s\n", r->file2);
+ fetch(r, r->ixnew, rc->c, rc->d, r->input[1], "");
+ Bprint(&stdout, ">>>>>>>>>>\n");
+ ln = b+1;
+ il++;
+ ir++;
+ }else if(rc == nil || lc->a < rc->a){
+ fetch(l, l->ixold, ln, lc->a-1, l->input[0], "");
+ fetch(l, l->ixnew, lc->c, lc->d, l->input[1], "");
+ ln = lc->b+1;
+ il++;
+ }else if(lc == nil || rc->a < lc->a){
+ fetch(l, l->ixold, ln, rc->a-1, l->input[0], "");
+ fetch(r, r->ixnew, rc->c, rc->d, r->input[1], "");
+ ln = lc->b+1;
+ ir++;
+ }else
+ abort();
+ }
+ if(ln < l->len[0])
+ fetch(l, l->ixold, ln, l->len[0], l->input[0], "");
+ return nil;
+}
+
+void
+usage(void)
+{
+ fprint(2, "usage: %s theirs base ours\n", argv0);
+ exits("usage");
+}
+
+void
+main(int argc, char **argv)
+{
+ Diff l, r;
+ char *x;
+
+ ARGBEGIN{
+ default:
+ usage();
+ }ARGEND;
+
+ if(argc != 3)
+ usage();
+ Binit(&stdout, 1, OWRITE);
+ memset(&l, 0, sizeof(l));
+ memset(&r, 0, sizeof(r));
+ calcdiff(&l, argv[1], argv[1], argv[0], argv[0]);
+ calcdiff(&r, argv[1], argv[1], argv[2], argv[2]);
+ x = merge(&l, &r);
+ freediff(&l);
+ freediff(&r);
+ exits(x);
+}
--- a/mkfile
+++ b/mkfile
@@ -1,10 +1,11 @@
< /$objtype/mkfile
-TARG=diff
+TARG=diff merge3
OFILES=\
diffdir.$O\
diffio.$O\
diffreg.$O\
+ util.$O
HFILES=diff.h
--- /dev/null
+++ b/util.c
@@ -1,0 +1,107 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "diff.h"
+
+Biobuf stdout;
+char mode; /* '\0', 'e', 'f', 'h' */
+char bflag; /* ignore multiple and trailing blanks */
+char rflag; /* recurse down directory trees */
+char mflag; /* pseudo flag: doing multiple files, one dir */
+int anychange;
+
+static char *tmp[] = {"/tmp/diff1XXXXXXXXXXX", "/tmp/diff2XXXXXXXXXXX"};
+static int whichtmp;
+
+void *
+emalloc(unsigned n)
+{
+ register void *p;
+
+ if ((p = malloc(n)) == 0)
+ sysfatal("malloc: %r");
+ return p;
+}
+
+void *
+erealloc(void *p, unsigned n)
+{
+ void *rp;
+
+ if ((rp = realloc(p, n)) == 0)
+ sysfatal("realloc: %r");
+ return rp;
+}
+
+int
+mkpathname(char *pathname, char *path, char *name)
+{
+ if (strlen(path) + strlen(name) > MAXPATHLEN) {
+ sysfatal("pathname %s/%s too long", path, name);
+ return 1;
+ }
+ sprint(pathname, "%s/%s", path, name);
+ return 0;
+}
+
+char *
+mktmpfile(int input, Dir **sb)
+{
+ int fd, i;
+ char *p;
+ char buf[8192];
+
+ p = mktemp(tmp[whichtmp++]);
+ fd = create(p, OWRITE|ORCLOSE, 0600);
+ if (fd < 0) {
+ sysfatal("cannot create %s: %r", p);
+ return 0;
+ }
+ while ((i = read(input, buf, sizeof(buf))) > 0) {
+ if ((i = write(fd, buf, i)) < 0)
+ break;
+ }
+ *sb = dirfstat(fd);
+ close(fd);
+ if (i < 0) {
+ sysfatal("cannot read/write %s: %r", p);
+ return 0;
+ }
+ return p;
+}
+
+void
+rmtmpfiles(void)
+{
+ while (whichtmp > 0) {
+ whichtmp--;
+ remove(tmp[whichtmp]);
+ }
+}
+
+char *
+statfile(char *file, Dir **sb)
+{
+ Dir *dir;
+ int input;
+
+ dir = dirstat(file);
+ if(dir == nil) {
+ if (strcmp(file, "-") || (dir = dirfstat(0)) == nil) {
+ sysfatal("cannot stat %s: %r", file);
+ return 0;
+ }
+ free(dir);
+ return mktmpfile(0, sb);
+ } else if (!REGULAR_FILE(dir) && !DIRECTORY(dir)) {
+ free(dir);
+ if ((input = open(file, OREAD)) == -1) {
+ sysfatal("cannot open %s: %r", file);
+ return 0;
+ }
+ file = mktmpfile(input, sb);
+ close(input);
+ } else
+ *sb = dir;
+ return file;
+}