shithub: gefs

Download patch

ref: c0e7c83c5eb13de7e799f7bf0b3359b6c1569b24
parent: 015bcdb965ed6055a2075ce1acdd94b4bb81990e
author: Ori Bernstein <ori@eigenstate.org>
date: Thu Dec 28 22:48:40 EST 2023

all: add tracing facility, on by default

--- a/blk.c
+++ b/blk.c
@@ -249,6 +249,7 @@
 	initblk(lb, o, -1, Tlog);
 	finalize(lb);
 	syncblk(lb);
+	traceb("logblk" , lb->bp);
 	return lb;
 }
 
@@ -335,6 +336,7 @@
 
 
 	dprint("loadlog %B\n", bp);
+	traceb("loadlog", bp);
 	while(1){
 		b = getblk(bp, 0);
 		dprint("\tload %B chain %B\n", bp, b->logp);
@@ -401,8 +403,11 @@
 	Bptr hd;
 	char *p;
 
-	if(a->logtl != nil && checkflag(a->logtl, Bdirty))
+	tracem("compresslog");
+	if(a->logtl != nil){
+		finalize(a->logtl);
 		syncblk(a->logtl);
+	}
 	/*
 	 * Prepare what we're writing back.
 	 * Arenas must be sized so that we can
@@ -650,6 +655,7 @@
 	b = cachepluck();
 	initblk(b, bp, t->memgen, ty);
 	b->alloced = getcallerpc(&t);
+	tracex("newblk" , b->bp, ty, -1);
 	return b;
 }
 
@@ -661,6 +667,7 @@
 	if((r = newblk(t, b->type, 0)) == nil)
 		return nil;
 
+	tracex("dup" , b->bp, b->type, t->gen);
 	setflag(r, Bdirty);
 	r->bp.hash = -1;
 	r->nval = b->nval;
@@ -720,6 +727,7 @@
 	int i;
 
 	i = ihash(bp.addr) % nelem(fs->blklk);
+	tracex("get" , bp, getcallerpc(&bp), -1);
 	qlock(&fs->blklk[i]);
 	if(waserror()){
 		qunlock(&fs->blklk[i]);
@@ -822,6 +830,7 @@
 		return;
 	}
 
+	tracex("freeb", bp, getcallerpc(&t), -1);
 	f = emalloc(sizeof(Bfree), 0);
 	f->op = DFblk;
 	f->bp = bp;
@@ -936,6 +945,7 @@
 	a = getarena(b->bp.addr);
 	holdblk(b);
 	finalize(b);
+	traceb("queueb", b->bp);
 	setflag(b, Bqueued);
 	b->queued = getcallerpc(&b);
 	qe.op = Qwrite;
@@ -1045,6 +1055,7 @@
 		qe = qpop(q);
 		switch(qe.op){
 		case Qfree:
+			tracex("qfreeb", qe.bp, qe.qgen, -1);
 			a = getarena(qe.bp.addr);
 			qlock(a);
 			cachedel(qe.bp.addr);
@@ -1054,6 +1065,7 @@
 			qunlock(a);
 			break;
 		case Qfence:
+			tracev("qfence", qe.qgen);
 			qlock(&fs->synclk);
 			if(--fs->syncing == 0)
 				rwakeupall(&fs->syncrz);
@@ -1060,6 +1072,7 @@
 			qunlock(&fs->synclk);
 			break;
 		case Qwrite:
+			tracex("qsyncb", qe.bp, qe.qgen, -1);
 			if(checkflag(qe.b, Bfreed) == 0)
 				syncblk(qe.b);
 			dropblk(qe.b);
--- a/cache.c
+++ b/cache.c
@@ -83,6 +83,7 @@
 	h = ihash(b->bp.addr);
 	bkt = &fs->bcache[h % fs->cmax];
 	qlock(&fs->lrulk);
+	traceb("cache", b->bp);
 	lock(bkt);
 	if(checkflag(b, Bcached)){
 		unlock(bkt);
@@ -110,6 +111,7 @@
 	if(addr == -1)
 		return;
 
+	tracex("uncache", Zb, addr, getcallerpc(&addr));
 	h = ihash(addr);
 	bkt = &fs->bcache[h % fs->cmax];
 	lock(bkt);
@@ -130,6 +132,7 @@
 cachedel(vlong addr)
 {
 	qlock(&fs->lrulk);
+	tracex("uncachelk", Zb, addr, getcallerpc(&addr));
 	cachedel_lk(addr);
 	qunlock(&fs->lrulk);
 }
--- a/cons.c
+++ b/cons.c
@@ -2,6 +2,7 @@
 #include <libc.h>
 #include <fcall.h>
 #include <avl.h>
+#include <bio.h>
 
 #include "dat.h"
 #include "fns.h"
@@ -296,6 +297,37 @@
 }
 
 static void
+savetrace(int fd, char **ap, int na)
+{
+	Biobuf *bfd;
+	Trace *t;
+	int i;
+
+	if(na == 0)
+		bfd = Bfdopen(dup(fd, -1), OWRITE);
+	else
+		bfd = Bopen(ap[0], OWRITE);
+	if(bfd == nil){
+		fprint(fd, "error opening output");
+		return;
+	}
+	for(i = 0; i < fs->tracesz; i++){
+		t = &fs->trace[(fs->traceidx + i) % fs->tracesz];
+		if(t->msg[0] == 0)
+			continue;
+		Bprint(bfd, "[%d@%d] %s", t->tid, t->qgen, t->msg);
+		if(t->bp.addr != -1)
+			Bprint(bfd, " %B", t->bp);
+		if(t->v0 != -1)
+			Bprint(bfd, " %llx", t->v0);
+		if(t->v1 != -1)
+			Bprint(bfd, " %llx", t->v1);
+		Bprint(bfd, "\n");
+	}
+	fprint(fd, "saved\n");
+}
+
+static void
 unreserve(int fd, char **ap, int)
 {
 	if(strcmp(ap[0], "on") == 0)
@@ -306,6 +338,7 @@
 		fprint(2, "unknown reserve %s\n", ap[0]);
 	fprint(fd, "reserve: %d → %d\n", !permissive, permissive);
 }
+
 static void
 help(int fd, char**, int)
 {
@@ -344,7 +377,7 @@
 	{.name="show",		.sub="users",	.minarg=0, .maxarg=0, .fn=showusers},
 	{.name="show",		.sub="bstate",	.minarg=0, .maxarg=0, .fn=showbstate},
 	{.name="debug",		.sub=nil,	.minarg=0, .maxarg=1, .fn=setdbg},
-
+	{.name="save",		.sub="trace",	.minarg=0, .maxarg=1, .fn=savetrace},
 	{.name=nil, .sub=nil},
 };
 
--- a/dat.h
+++ b/dat.h
@@ -20,6 +20,7 @@
 typedef struct Chan	Chan;
 typedef struct Syncq	Syncq;
 typedef struct Qent	Qent;
+typedef struct Trace	Trace;
 typedef struct Tree	Tree;
 typedef struct Dlist	Dlist;
 typedef struct Mount	Mount;
@@ -361,6 +362,7 @@
 };
 
 struct Errctx {
+	long	tid;
 	char	err[128];
 	jmp_buf	errlab[Estacksz];
 	int	nerrlab;
@@ -479,6 +481,15 @@
 	int	heapsz;
 };
 
+struct Trace {
+	int	tid;
+	int	qgen;
+	char	msg[16];
+	Bptr	bp;
+	vlong	v0;
+	vlong	v1;
+};
+
 /*
  * Overall state of the file sytem.
  * Shadows the superblock contents.
@@ -562,6 +573,10 @@
 
 	RWLock	flushq[Nflushtab];
 	int	flushop[Nflushtab];
+
+	Trace	*trace;
+	long	traceidx;
+	long	tracesz;
 };
 
 struct Arena {
--- a/fns.h
+++ b/fns.h
@@ -12,7 +12,7 @@
 extern int	permissive;
 extern int	usereserve;
 extern char*	reamuser;
-extern void**	errctx;
+extern Errctx**	errctx;
 extern Blk*	blkbuf;
 extern int	noneid;
 extern int	nogroupid;
@@ -131,6 +131,7 @@
 	do{ \
 		if(debug) fprint(2, __VA_ARGS__); \
 	}while(0)
+
 #define fatal(...) \
 	do{ \
 		fprint(2, __VA_ARGS__); \
@@ -137,15 +138,25 @@
 		abort(); \
 	}while(0)
 
+#define tracex(msg, bp, v0, v1) \
+	do{ \
+		if(fs->trace != nil) \
+			_trace(msg, bp, v0, v1); \
+	} while(0)
+
+#define traceb(msg, bp)	tracex(msg, bp, -1, -1)
+#define tracev(msg, v0)	tracex(msg, Zb, v0, -1)
+#define tracem(msg)	tracex(msg, Zb, -1, -1)
+
 jmp_buf*	_waserror(void);
 _Noreturn void	error(char*, ...);
 _Noreturn void	broke(char*, ...);
 _Noreturn void	nexterror(void);
 #define waserror()	(setjmp(*_waserror()))
-#define errmsg()	(((Errctx*)*errctx)->err)
-#define	poperror()	assert(((Errctx*)*errctx)->nerrlab-- > 0)
-#define estacksz()	(((Errctx*)*errctx)->nerrlab)
-
+#define errmsg()	((*errctx)->err)
+#define	poperror()	assert((*errctx)->nerrlab-- > 0)
+#define estacksz()	((*errctx)->nerrlab)
+void	_trace(char*, Bptr, vlong, vlong);
 char*	packstr(char*, char*, char*);
 
 void	dir2kv(vlong, Xdir*, Kvp*, char*, int);
--- a/fs.c
+++ b/fs.c
@@ -38,6 +38,7 @@
 	int i;
 	
 	aincv(&fs->qgen, 1);
+	tracev("barrier", fs->qgen);
 	fs->syncing = fs->nsyncers;
 	for(i = 0; i < fs->nsyncers; i++){
 		qe.op = Qfence;
@@ -50,6 +51,7 @@
 	aincv(&fs->qgen, 1);
 	while(fs->syncing != 0)
 		rsleep(&fs->syncrz);
+	tracev("flushed", fs->qgen);
 }
 
 static void
@@ -76,6 +78,7 @@
 	/* 
 	 * Wait for data that we're syncing to hit disk
 	 */
+	tracem("flush1");
 	wrbarrier();
 	/*
 	 * pass 0: Update all open snapshots, and
@@ -86,6 +89,7 @@
 	 *  can take a consistent snapshot.
          */
 	qlock(&fs->mutlk);
+	tracem("packb");
 	dlsync();
 	for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next)
 		updatesnap(&mnt->root, mnt->root, mnt->name);
@@ -124,6 +128,7 @@
 	 *  the block footers are consistent, and we can
 	 *  use them.
 	 */
+	tracem("arenas0");
 	for(i = 0; i < fs->narena; i++)
 		enqueue(fs->arenas[i].h0);
 	wrbarrier();
@@ -136,6 +141,7 @@
 	 * time around.
          */
 	qlock(&fs->mutlk);
+	tracem("supers");
 	syncblk(fs->sb0);
 	syncblk(fs->sb1);
 
@@ -144,6 +150,7 @@
 	 *  the block headers are consistent, and we can
 	 *  use them.
          */
+	tracem("arenas1");
 	for(i = 0; i < fs->narena; i++)
 		enqueue(fs->arenas[i].h1);
 
@@ -150,6 +157,7 @@
 	/*
 	 * Pass 4: clean up the old snap tree's deadlist
 	 */
+	tracem("snapdl");
 	freedl(&fs->snapdl, 1);
 	fs->snapdl.hd.addr = -1;
 	fs->snapdl.hd.hash = -1;
@@ -161,6 +169,7 @@
 	wrbarrier();
 	qunlock(&fs->mutlk);
 	qunlock(&fs->synclk);
+	tracem("synced");
 	poperror();
 }
 
@@ -2337,6 +2346,7 @@
 		}
 		switch(am->op){
 		case AOsync:
+			tracem("syncreq");
 			if(waserror()){
 				fprint(2, "sync error: %s\n", errmsg());
 				ainc(&fs->rdonly);
@@ -2352,7 +2362,7 @@
 				if(a->nlog < a->reserve/(10*Blksz)){
 					oldhd[i].addr = -1;
 					oldhd[i].hash = -1;
-					oldhd[i].gen = -1; //(Bptr){-1, -1, -1};
+					oldhd[i].gen = -1;
 					qunlock(a);
 					continue;
 				}
@@ -2390,6 +2400,7 @@
 			break;
 
 		case AOsnap:
+			tracem("snapreq");
 			if(waserror()){
 				fprint(2, "taking snap: %s\n", errmsg());
 				ainc(&fs->rdonly);
@@ -2418,6 +2429,7 @@
 			break;
 
 		case AOclear:
+			tracem("bgclear");
 			if(waserror()){
 				fprint(2, "clear file %llx: %s\n", am->qpath, errmsg());
 				ainc(&fs->rdonly);
--- a/main.c
+++ b/main.c
@@ -19,7 +19,9 @@
 int	permissive;
 int	usereserve;
 char	*reamuser;
+int	notrace;
 char	*dev;
+vlong	tracesz		= 16*MiB;
 vlong	cachesz 	= 512*MiB;
 char	*srvname 	= "gefs";
 int	noneid		= 0;
@@ -26,8 +28,24 @@
 int	nogroupid	= 9999;
 int	admid		= -1;
 Blk	*blkbuf;
-void	**errctx;
+Errctx	**errctx;
 
+void
+_trace(char *msg, Bptr bp, vlong v0, vlong v1)
+{
+	Trace *t;
+	int idx;
+
+	idx = aincl(&fs->traceidx, 1);
+	t = &fs->trace[(idx-1) % fs->tracesz];
+	strecpy(t->msg, t->msg+sizeof(t->msg), msg);
+	t->tid = (*errctx)->tid;
+	t->qgen = agetv(&fs->qgen);
+	t->bp = bp;
+	t->v0 = v0;
+	t->v1 = v1;
+}
+
 static void
 nokill(void)
 {
@@ -143,6 +161,10 @@
 	if((fs = mallocz(sizeof(Gefs), 1)) == nil)
 		sysfatal("malloc: %r");
 
+	if(!notrace){
+		fs->trace = emalloc(tracesz, 1);
+		fs->tracesz = tracesz/sizeof(Trace);
+	}
 	fs->lrurz.l = &fs->lrulk;
 	fs->syncrz.l = &fs->synclk;
 	fs->noauth = noauth;
@@ -184,6 +206,7 @@
 		id = aincl(&fs->nworker, 1);
 		if((*errctx = mallocz(sizeof(Errctx), 1)) == nil)
 			sysfatal("malloc: %r");
+		(*errctx)->tid = id;
 		procsetname("%s.%ld", text, id);
 		(*f)(id, arg);
 		exits("child returned");
@@ -272,6 +295,10 @@
 	case 'g':
 		grow = 1;
 		break;
+	case 't':
+		tracesz = strtoll(EARGF(usage()), &e, 0);
+		tracesz *= MiB;
+		break;
 	case 'm':
 		v = strtoll(EARGF(usage()), &e, 0);
 		switch(*e){
@@ -308,6 +335,9 @@
 		break;
 	case 'C':
 		check = 1;
+		break;
+	case 'T':
+		notrace=1;
 		break;
 	default:
 		usage();
--- a/snap.c
+++ b/snap.c
@@ -18,6 +18,7 @@
 	if(!checkflag(dl->ins, Bdirty))
 		return;
 
+	traceb("dlflush", dl->ins->bp);
 	enqueue(dl->ins);
 	m.op = Oinsert;
 	dlist2kv(dl, &m, kvbuf, sizeof(kvbuf));
@@ -165,10 +166,12 @@
 				fb.addr = UNPACK64(p);
 				fb.hash = -1;
 				fb.gen = -1;
+				traceb("dlclear", fb);
 				freeblk(nil, nil, fb);
 			}
 		}
 		bp = b->logp;
+		traceb("dlfreeb", b->bp);
 		freeblk(&fs->snap, b, b->bp);
 		dropblk(b);
 	}
@@ -402,6 +405,7 @@
 	if(!o->dirty)
 		return;
 
+	traceb("updatesnap", o->bp);
 	/* update the old kvp */
 	o->nlbl--;
 	o->nref++;
@@ -518,6 +522,7 @@
 {
 	Dlist *dl, *n;
 
+	tracem("dlsync");
 	for(dl = fs->dlhead; dl != nil; dl = n){
 		n = dl->cnext;
 		dlflush(dl);
@@ -546,6 +551,7 @@
 	 */
 	if(bp.gen <= t->base)
 		return;
+	traceb("killb", bp);
 	if(t == &fs->snap)
 		dl = &fs->snapdl;
 	else
--- a/tree.c
+++ b/tree.c
@@ -1282,6 +1282,7 @@
 	assert(rb->bp.addr != 0);
 
 	lock(&t->lk);
+	traceb("setroot", rb->bp);
 	t->ht += dh;
 	t->bp = rb->bp;
 	t->dirty = 1;
@@ -1291,8 +1292,10 @@
 	freepath(t, path, npath);
 	poperror();
 
-	if(npull != nmsg)
+	if(npull != nmsg){
+		tracem("short pull");
 		goto Again;
+	}
 }
 
 Blk*