shithub: gefs

Download patch

ref: b927f42f5f6ebbe9af2a7650a9bd504f2f0cd808
parent: 8e49cdfd1994af3ca0f112220b3614d685da4759
author: Ori Bernstein <ori@eigenstate.org>
date: Mon Dec 25 11:26:34 EST 2023

fs: don't drop mutlk between syncing the snaps and serializing blocks

--- a/blk.c
+++ b/blk.c
@@ -161,7 +161,6 @@
 		a = &fs->arenas[mid];
 		alo = a->h0->bp.addr;
 		ahi = alo + a->size + 2*Blksz;
-//print("getarena %d [%d] %d (%#llx %#llx) %llx\n", lo, mid, hi, alo, ahi, b);
 		if(b < alo)
 			hi = mid-1;
 		else if(b > ahi)
@@ -349,7 +348,7 @@
 			switch(op){
 			case LogSync:
 				gen = ent >> 8;
-				dprint("\tlog@%d: sync %llx\n", i, gen);
+				dprint("\tlog@%x: sync %lld\n", i, gen);
 				if(gen >= fs->qgen){
 					if(a->logtl == nil){
 						b->logsz = i;
@@ -364,7 +363,7 @@
 			case LogAlloc:
 			case LogAlloc1:
 				len = (op >= Log2wide) ? UNPACK64(d+8) : Blksz;
-				dprint("\tlog@%d alloc: %llx+%llx\n", i, off, len);
+				dprint("\tlog@%x alloc: %llx+%llx\n", i, off, len);
 				grabrange(a->free, off & ~0xff, len);
 				a->used += len;
 				break;
@@ -371,13 +370,13 @@
 			case LogFree:
 			case LogFree1:
 				len = (op >= Log2wide) ? UNPACK64(d+8) : Blksz;
-				dprint("\tlog@%d free: %llx+%llx\n", i, off, len);
+				dprint("\tlog@%x free: %llx+%llx\n", i, off, len);
 				freerange(a->free, off & ~0xff, len);
 				a->used -= len;
 				break;
 			default:
 				n = 0;
-				dprint("\tlog@%d: log op %d\n", i, op);
+				dprint("\tlog@%x: log op %d\n", i, op);
 				abort();
 				break;
 			}
@@ -924,10 +923,11 @@
 	Arena *a;
 	Qent qe;
 
-	b->enqueued = getcallerpc(&b);
-	a = getarena(b->bp.addr);
 	assert(checkflag(b, Bdirty));
 	assert(b->bp.addr >= 0);
+
+	b->enqueued = getcallerpc(&b);
+	a = getarena(b->bp.addr);
 	holdblk(b);
 	finalize(b);
 	setflag(b, Bqueued);
@@ -954,14 +954,10 @@
 {
 	if(a->qgen != b->qgen)
 		return (a->qgen < b->qgen) ? -1 : 1;
+	if(a->op != b->op)
+		return (a->op < b->op) ? -1 : 1;
 	if(a->bp.addr != b->bp.addr)
 		return (a->bp.addr < b->bp.addr) ? -1 : 1;
-	if(a->op != b->op){
-		if(a->op == Qfence)
-			return -1;
-		if(a->op == Qfree)
-			return 1;
-	}
 	return 0;
 }
 
@@ -1029,7 +1025,6 @@
 		e.b->queued = 0;
 	}
 	return e;
-
 }
 
 void
@@ -1068,119 +1063,4 @@
 		}
 		assert(estacksz() == 0);
 	}
-}
-
-void
-wrbarrier(void)
-{
-	Qent qe;
-	int i;
-	
-	aincv(&fs->qgen, 1);
-	fs->syncing = fs->nsyncers;
-	for(i = 0; i < fs->nsyncers; i++){
-		qe.op = Qfence;
-		qe.bp.addr = 0;
-		qe.bp.hash = -1;
-		qe.bp.gen = -1;
-		qe.b = nil;
-		qput(&fs->syncq[i], qe);
-	}
-	while(fs->syncing != 0)
-		rsleep(&fs->syncrz);
-}
-
-void
-sync(void)
-{
-	Arena *a;
-	int i;
-
-
-	if(fs->rdonly)
-		return;
-	qlock(&fs->synclk);
-	if(!fs->snap.dirty){
-		qunlock(&fs->synclk);
-		return;
-	}
-	if(waserror()){
-		fprint(2, "failed to sync: %s\n", errmsg());
-		qunlock(&fs->synclk);
-		nexterror();
-	}
-
-	/*
-	 * pass 0: Pack the blocks we want to sync
-	 *  while holding the write lock, and then
-	 *  wait until all the blocks they point at
-	 *  have hit disk; once they're on disk, we
-	 *  can take a consistent snapshot.
-         */
-	qlock(&fs->mutlk);
-	flushdlcache(1);
-	for(i = 0; i < fs->narena; i++){
-		a = &fs->arenas[i];
-		qlock(a);
-		setflag(a->logtl, Bdirty);
-		enqueue(a->logtl);
-		logbarrier(a, fs->qgen);
-
-		packarena(a->h0->data, Blksz, a);
-		packarena(a->h1->data, Blksz, a);
-		finalize(a->h0);
-		finalize(a->h1);
-		setflag(a->h0, Bdirty);
-		setflag(a->h1, Bdirty);
-		fs->arenabp[i] = a->h0->bp;
-		qunlock(a);
-	}
-
-	packsb(fs->sb0->buf, Blksz, fs);
-	packsb(fs->sb1->buf, Blksz, fs);
-	finalize(fs->sb0);
-	finalize(fs->sb1);
-	fs->snap.dirty = 0;
-	qunlock(&fs->mutlk);
-	wrbarrier();
-	/*
-	 * pass 1: sync block headers; if we crash here,
-	 *  the block footers are consistent, and we can
-	 *  use them.
-	 */	wrbarrier();
-	for(i = 0; i < fs->narena; i++)
-		enqueue(fs->arenas[i].h0);
-	wrbarrier();
-
-	/*
-	 * pass 2: sync superblock; we have a consistent
-	 * set of block headers, so if we crash, we can
-	 * use the loaded block headers; the footers will
-	 * get synced after so that we can use them next
-	 * time around.
-         */
-	syncblk(fs->sb0);
-	syncblk(fs->sb1);
-
-	/*
-	 * pass 3: sync block footers; if we crash here,
-	 *  the block headers are consistent, and we can
-	 *  use them.
-         */
-	for(i = 0; i < fs->narena; i++)
-		enqueue(fs->arenas[i].h1);
-
-	/*
-	 * Pass 4: clean up the old snap tree's deadlist
-	 */
-	freedl(&fs->snapdl, 1);
-	fs->snapdl.hd.addr = -1;
-	fs->snapdl.hd.hash = -1;
-	fs->snapdl.hd.gen = -1;
-	fs->snapdl.tl.addr = -1;
-	fs->snapdl.tl.hash = -1;
-	fs->snapdl.tl.gen = -1;
-	fs->snapdl.ins = nil;
-	qunlock(&fs->synclk);
-	poperror();
 }
--- a/dat.h
+++ b/dat.h
@@ -454,6 +454,7 @@
 };
 
 enum {
+	/* in priority order */
 	Qnone,
 	Qfence,
 	Qwrite,
@@ -461,7 +462,7 @@
 };
 
 struct Qent {
-	long	qgen;
+	vlong	qgen;
 	Bptr	bp;
 	Blk	*b;
 	int	op;
--- a/fns.h
+++ b/fns.h
@@ -61,6 +61,7 @@
 void	epochclean(void);
 void	limbo(Bfree*);
 void	freeblk(Tree*, Blk*, Bptr);
+int	logbarrier(Arena *, vlong);
 void	dlappend(Dlist *dl, Bptr);
 void	killblk(Tree*, Bptr);
 void	blkdealloc(vlong);
@@ -84,12 +85,11 @@
 void	growfs(char*);
 void	loadarena(Arena*, Bptr);
 void	loadfs(char*);
-void	sync(void);
 void	loadlog(Arena*, Bptr);
 int	scandead(Dlist*, int, void(*)(Bptr, void*), void*);
 int	endfs(void);
 int	compresslog(Arena*);
-void	flushdlcache(int);
+void	dlsync(void);
 void	setval(Blk*, Kvp*);
 
 Conn*	newconn(int, int);
--- a/fs.c
+++ b/fs.c
@@ -32,6 +32,137 @@
 }
 
 static void
+wrbarrier(void)
+{
+	Qent qe;
+	int i;
+	
+	aincv(&fs->qgen, 1);
+	fs->syncing = fs->nsyncers;
+	for(i = 0; i < fs->nsyncers; i++){
+		qe.op = Qfence;
+		qe.bp.addr = 0;
+		qe.bp.hash = -1;
+		qe.bp.gen = -1;
+		qe.b = nil;
+		qput(&fs->syncq[i], qe);
+	}
+	aincv(&fs->qgen, 1);
+	while(fs->syncing != 0)
+		rsleep(&fs->syncrz);
+}
+
+static void
+sync(void)
+{
+	Mount *mnt;
+	Arena *a;
+	int i;
+
+
+	if(fs->rdonly)
+		return;
+	qlock(&fs->synclk);
+	if(!fs->snap.dirty){
+		qunlock(&fs->synclk);
+		return;
+	}
+	if(waserror()){
+		fprint(2, "failed to sync: %s\n", errmsg());
+		qunlock(&fs->synclk);
+		nexterror();
+	}
+
+	/*
+	 * pass 0: Update all open snapshots, and
+	 *  pack the blocks we want to sync. Snap
+	 *  while holding the write lock, and then
+	 *  wait until all the blocks they point at
+	 *  have hit disk; once they're on disk, we
+	 *  can take a consistent snapshot.
+         */
+	qlock(&fs->mutlk);
+	lock(&fs->mountlk);
+	for(mnt = fs->mounts; mnt != nil; mnt = mnt->next)
+		updatesnap(&mnt->root, mnt->root, mnt->name);
+	unlock(&fs->mountlk);
+	dlsync();
+	for(i = 0; i < fs->narena; i++){
+		a = &fs->arenas[i];
+		qlock(a);
+		/*
+		 * because the log uses preallocated
+		 * blocks, we need to write the log
+		 * block out synchronously, or it may
+		 * get reused.
+		 */
+		logbarrier(a, fs->qgen);
+		finalize(a->logtl);
+		syncblk(a->logtl);
+
+		packarena(a->h0->data, Blksz, a);
+		packarena(a->h1->data, Blksz, a);
+		finalize(a->h0);
+		finalize(a->h1);
+		setflag(a->h0, Bdirty);
+		setflag(a->h1, Bdirty);
+		fs->arenabp[i] = a->h0->bp;
+		qunlock(a);
+	}
+
+	packsb(fs->sb0->buf, Blksz, fs);
+	packsb(fs->sb1->buf, Blksz, fs);
+	finalize(fs->sb0);
+	finalize(fs->sb1);
+	fs->snap.dirty = 0;
+	qunlock(&fs->mutlk);
+	wrbarrier();
+
+	/*
+	 * pass 1: sync block headers; if we crash here,
+	 *  the block footers are consistent, and we can
+	 *  use them.
+	 */	wrbarrier();
+	for(i = 0; i < fs->narena; i++)
+		enqueue(fs->arenas[i].h0);
+	wrbarrier();
+
+	/*
+	 * pass 2: sync superblock; we have a consistent
+	 * set of block headers, so if we crash, we can
+	 * use the loaded block headers; the footers will
+	 * get synced after so that we can use them next
+	 * time around.
+         */
+	syncblk(fs->sb0);
+	syncblk(fs->sb1);
+
+	/*
+	 * pass 3: sync block footers; if we crash here,
+	 *  the block headers are consistent, and we can
+	 *  use them.
+         */
+	for(i = 0; i < fs->narena; i++)
+		enqueue(fs->arenas[i].h1);
+	wrbarrier();
+
+	/*
+	 * Pass 4: clean up the old snap tree's deadlist
+	 */
+	freedl(&fs->snapdl, 1);
+	fs->snapdl.hd.addr = -1;
+	fs->snapdl.hd.hash = -1;
+	fs->snapdl.hd.gen = -1;
+	fs->snapdl.tl.addr = -1;
+	fs->snapdl.tl.hash = -1;
+	fs->snapdl.tl.gen = -1;
+	fs->snapdl.ins = nil;
+	wrbarrier();
+	qunlock(&fs->synclk);
+	poperror();
+}
+
+static void
 snapfs(Amsg *a, Tree **tp)
 {
 	Tree *t, *s;
@@ -2190,7 +2321,6 @@
 	char buf[Offksz];
 	Bptr bp, nb, *oldhd;
 	vlong off;
-	Mount *mnt;
 	Tree *t;
 	Arena *a;
 	Amsg *am;
@@ -2231,17 +2361,8 @@
 				epochend(id);
 				epochclean();
 			}
-			qlock(&fs->mutlk);
 			if(am->halt)
 				ainc(&fs->rdonly);
-			epochstart(id);
-			lock(&fs->mountlk);
-			for(mnt = fs->mounts; mnt != nil; mnt = mnt->next)
-				updatesnap(&mnt->root, mnt->root, mnt->name);
-			unlock(&fs->mountlk);
-			qunlock(&fs->mutlk);
-			epochend(id);
-			epochclean();
 			sync();
 
 			for(i = 0; i < fs->narena; i++){
--- a/snap.c
+++ b/snap.c
@@ -8,7 +8,7 @@
 #include "atomic.h"
 
 static void
-dlsync(Dlist *dl)
+dlflush(Dlist *dl)
 {
 	char kvbuf[512];
 	Msg m;
@@ -128,7 +128,7 @@
 	dlcachedel(dl, 0);
 	while(fs->dltail != nil && fs->dlcount >= fs->dlcmax){
 		dt = fs->dltail;
-		dlsync(dt);
+		dlflush(dt);
 		dlcachedel(dt, 1);
 		dropblk(dt->ins);
 		free(dt);
@@ -306,7 +306,7 @@
 		nm++;
 	}
 	assert(nm <= nelem(m));
-	flushdlcache(1);
+	dlsync();
 	btupsert(&fs->snap, m, nm);
 	reclaimblocks(t->gen, succ, t->pred);
 	if(deltree){
@@ -514,23 +514,13 @@
 }
 
 void
-flushdlcache(int clear)
+dlsync(void)
 {
 	Dlist *dl, *n;
 
 	for(dl = fs->dlhead; dl != nil; dl = n){
 		n = dl->cnext;
-		dlsync(dl);
-		if(clear){
-			if(dl->ins != nil)
-				dropblk(dl->ins);
-			free(dl);
-		}
-	}
-	if(clear){
-		fs->dlhead = nil;
-		fs->dltail = nil;
-		memset(fs->dlcache, 0, fs->dlcmax*sizeof(Dlist*));
+		dlflush(dl);
 	}
 }