shithub: gefs

Download patch

ref: 231ad466a77c696cdf729b068ee79d3e77687d95
parent: 54321e719b4375e7c6b8076c1bd3f5649ef8f1f1
author: Ori Bernstein <ori@eigenstate.org>
date: Fri Dec 15 00:42:37 EST 2023

all: shuffle fs format a bit

no big wins here, but it makes it a little easier to have
variable sized arenas, and removes a few odd edge cases.

--- a/blk.c
+++ b/blk.c
@@ -54,7 +54,7 @@
 	assert(b->bp.addr >= 0);
 	clrflag(b, Bdirty);
 	if(pwrite(fs->fd, b->buf, Blksz, b->bp.addr) == -1)
-		broke("%s: %r", Eio);
+		broke("%B %s: %r", b->bp, Eio);
 }
 
 static Blk*
@@ -96,7 +96,7 @@
 	b->type = (flg&GBraw) ? Tdat : UNPACK16(b->buf+0);
 	switch(b->type){
 	default:
-		broke("invalid block type %d @%llx\n", b->type, bp);
+		broke("invalid block type %d @%llx", b->type, bp);
 		break;
 	case Tdat:
 	case Tsuper:
@@ -148,17 +148,30 @@
 Arena*
 getarena(vlong b)
 {
-	int i;
+	int hi, lo, mid;
+	vlong alo, ahi;
+	Arena *a;
 
-	i = b / fs->arenasz;
-	if(i < 0 || i >= fs->narena){
-		werrstr("out of range block %lld", b);
-		abort();
-		return nil;
+	lo = 0;
+	hi = fs->narena;
+	if(b == 0)
+		return &fs->arenas[0];
+	while(1){
+		mid = (hi + lo)/2;
+		a = &fs->arenas[mid];
+		alo = a->h0->bp.addr;
+		ahi = alo + a->size + 2*Blksz;
+//print("getarena %d [%d] %d (%#llx %#llx) %llx\n", lo, mid, hi, alo, ahi, b);
+		if(b < alo)
+			hi = mid-1;
+		else if(b > ahi)
+			lo = mid+1;
+		else
+			return a;
 	}
-	return &fs->arenas[i];
 }
 
+
 static void
 freerange(Avltree *t, vlong off, vlong len)
 {
@@ -249,7 +262,7 @@
 static void
 logappend(Arena *a, vlong off, vlong len, int op)
 {
-	vlong o;
+	vlong o, start, end;
 	Blk *nl, *lb;
 	char *p, *name;
 
@@ -257,9 +270,11 @@
 	assert((off & 0xff) == 0);
 	assert(op == LogAlloc || op == LogFree || op == LogSync);
 	if(op != LogSync){
+		start = a->h0->bp.addr;
+		end = start + a->size + 2*Blksz;
 		assert(lb == nil || lb->type == Tlog);
-		assert(off >= a->hd->bp.addr + Blksz);
-		assert(off < a->tl->bp.addr);
+		assert(off >= start);
+		assert(off <= end);
 	}
 	switch(op){
 	case LogAlloc:	name = "alloc";	break;
@@ -278,7 +293,8 @@
 	 * and chaining.
 	 */
 	if(lb == nil || lb->logsz >= Logspc - Logslop){
-		o = blkalloc_lk(a, 1);
+		if((o = blkalloc_lk(a, 1)) == -1)
+			error(Efs);
 		nl = mklogblk(a, o);
 		p = lb->data + lb->logsz;
 		PACK64(p, o|LogAlloc1);
@@ -418,7 +434,8 @@
 		return -1;
 	}
 	for(i = 0; i < nblks; i++)
-		blks[i] = blkalloc_lk(a, 1);
+		if((blks[i] = blkalloc_lk(a, 1)) == -1)
+			error(Efs);
 
 	/* fill up the log with the ranges from the tree */
 	i = 0;
@@ -489,7 +506,7 @@
 	t = a->free;
 	r = (Arange*)t->root;
 	if(!force && a->size - a->used <= a->reserve)
-		error(Efull);
+		return -1;
 	if(r == nil)
 		broke(Estuffed);
 
@@ -542,10 +559,8 @@
 	tries = 0;
 Again:
 	a = pickarena(ty, hint, tries);
-	if(a == nil || tries == fs->narena){
-		werrstr("no empty arenas");
-		return -1;
-	}
+	if(tries == fs->narena)
+		error(Efull);
 	/*
 	 * TODO: there's an extreme edge case
 	 * here.
@@ -559,8 +574,11 @@
 	 * correctly.
 	 */
 	tries++;
-	if(canqlock(a) == 0)
-		goto Again;
+	if(tries < fs->narena){
+		if(canqlock(a) == 0)
+			goto Again;
+	}else
+		qlock(a);
 	if(waserror()){
 		qunlock(a);
 		nexterror();
@@ -592,9 +610,11 @@
 	b->bp.gen = gen;
 	switch(ty){
 	case Tdat:
-	case Tarena:
 		b->data = b->buf;
 		break;
+	case Tarena:
+		b->data = b->buf+2;
+		break;
 	case Tdlist:
 	case Tlog:
 		b->logsz = 0;
@@ -708,8 +728,8 @@
 	}
 	if((b = cacheget(bp.addr)) != nil){
 		b->lasthold = getcallerpc(&bp);
-		poperror();
 		qunlock(&fs->blklk[i]);
+		poperror();
 		return b;
 	}
 	b = readblk(bp.addr, flg);
@@ -724,13 +744,13 @@
 			ck = b->bp.hash;
 		}
 		if(ck != xh)
-			error("%s: %llx != %llx", Ecorrupt, xh, ck);
+			broke("%s: %llux != %llux", Ecorrupt, xh, ck);
 	}
 	b->bp.gen = bp.gen;
 	b->lasthold = getcallerpc(&bp);
 	cacheins(b);
-	poperror();
 	qunlock(&fs->blklk[i]);
+	poperror();
 
 	return b;
 }
@@ -1077,7 +1097,7 @@
 		qunlock(&fs->synclk);
 		nexterror();
 	}
-	flushdlcache(0);
+	flushdlcache(1);
 	gen = aincv(&fs->qgen, 1);
 	fs->syncing = fs->nsyncers;
 	for(i = 0; i < fs->nsyncers; i++){
@@ -1107,9 +1127,9 @@
 	for(i = 0; i < fs->narena; i++){
 		a = &fs->arenas[i];
 		qlock(a);
-		packarena(a->hd->data, Blksz, a);
-		finalize(a->hd);
-		syncblk(a->hd);
+		packarena(a->h0->data, Blksz, a);
+		finalize(a->h0);
+		syncblk(a->h0);
 		qunlock(a);
 	}
 	/*
@@ -1120,7 +1140,7 @@
 	 * time around.
          */
 	for(i = 0; i < fs->narena; i++)
-		fs->arenabp[i] = fs->arenas[i].hd->bp;
+		fs->arenabp[i] = fs->arenas[i].h0->bp;
 	packsb(fs->sb0->buf, Blksz, fs);
 	packsb(fs->sb1->buf, Blksz, fs);
 	finalize(fs->sb0);
@@ -1136,9 +1156,9 @@
 	for(i = 0; i < fs->narena; i++){
 		a = &fs->arenas[i];
 		qlock(a);
-		packarena(a->tl->data, Blksz, a);
-		finalize(a->tl);
-		syncblk(a->tl);
+		packarena(a->h1->data, Blksz, a);
+		finalize(a->h1);
+		syncblk(a->h1);
 		qunlock(a);
 	}
 	/*
--- a/cons.c
+++ b/cons.c
@@ -210,6 +210,7 @@
 		size += a->size;
 		used += a->used;
 		qunlock(a);
+		fprint(fd, "arena %d: %llx/%llx (%.2f)\n", i, a->used, a->size, (double)a->used/(double)a->size);
 	}
 	hsize = size;
 	hused = used;
--- a/dat.h
+++ b/dat.h
@@ -198,8 +198,7 @@
  *			snapshot.
  *	rooth[8]	hash of root node
  *	narena[4]	number of arenas in tree
- *	arenasz[8]	maximum size of arenas;
- *			they may be smaller.
+ *	flags[8]	feature flags
  *	gen[8]		The flush generation
  *
  * The arena zone blocks have this layout, and
@@ -486,7 +485,7 @@
 	Tree	snap;
 	Dlist	snapdl;
 	int	narena;
-	vlong	arenasz;
+	vlong	flags;
 	vlong	nextqid;
 	vlong	nextgen;
 	vlong	qgen;
@@ -503,7 +502,6 @@
 	long	nsyncers;
 	long	nreaders;
 
-	int	gotinfo;
 	QLock	synclk;
 	Rendez	syncrz;
 
@@ -570,8 +568,8 @@
 	int	nqueue;
 	int	lbidx;
 	Blk	*logbuf[2];	/* preallocated log pages */
-	Blk	*hd;		/* arena header */
-	Blk	*tl;		/* arena footer */
+	Blk	*h0;		/* arena header */
+	Blk	*h1;		/* arena footer */
 	Blk	**q;		/* write queue */
 	vlong	nq;
 	vlong	size;
--- a/dump.c
+++ b/dump.c
@@ -74,7 +74,10 @@
 			break;
 		case Onop:
 		case Oinsert:
-			n = fmtprint(fmt, "ptr:%B", unpackbp(v->v, v->nv));
+			if(v->nv == Ptrsz)
+				n = fmtprint(fmt, "ptr:%B", unpackbp(v->v, v->nv));
+			else
+				n = fmtprint(fmt, "BROKEN ptr %.*H", v->nk, v->k);
 			break;
 		}
 		break;
--- a/fns.h
+++ b/fns.h
@@ -81,7 +81,7 @@
 void	closesnap(Tree*);
 void	reamfs(char*);
 void	growfs(char*);
-void	loadarena(Arena*, Bptr, vlong);
+void	loadarena(Arena*, Bptr);
 void	loadfs(char*);
 void	sync(void);
 void	loadlog(Arena*, Bptr);
@@ -139,7 +139,6 @@
 	}while(0)
 
 jmp_buf*	_waserror(void);
-_Noreturn void	errorv(char*, va_list);
 _Noreturn void	error(char*, ...);
 _Noreturn void	broke(char*, ...);
 _Noreturn void	nexterror(void);
--- a/fs.c
+++ b/fs.c
@@ -39,6 +39,7 @@
 
 	lock(&fs->mountlk);
 	if(waserror()){
+		*tp = nil;
 		unlock(&fs->mountlk);
 		nexterror();
 	}
@@ -2089,11 +2090,24 @@
 {
 	Fmsg *m;
 	Amsg *a;
+	Fid *f;
 
 	while(1){
 		a = nil;
 		m = chrecv(fs->wrchan);
 		if(fs->rdonly){
+			/*
+			 * special case: even if Tremove fails, we need
+			 * to clunk the fid.
+			 */
+			if(m->type == Tremove){
+				if((f = getfid(m->conn, m->fid)) == nil){
+					rerror(m, Enofid);
+					continue;
+				}
+				clunkfid(m->conn, f);
+				putfid(f);
+			}
 			rerror(m, Erdonly);
 			continue;
  		}
@@ -2216,6 +2230,7 @@
 			if(waserror()){
 				fprint(2, "sync error: %s\n", errmsg());
 				ainc(&fs->rdonly);
+				break;
 			}
 			for(i = 0; i < fs->narena; i++){
 				a = &fs->arenas[i];
@@ -2269,6 +2284,7 @@
 			if(waserror()){
 				fprint(2, "taking snap: %s\n", errmsg());
 				ainc(&fs->rdonly);
+				break;
 			}
 
 			qlock(&fs->mutlk);
@@ -2365,8 +2381,10 @@
 		sleep(5000);
 		if(fs->rdonly)
 			continue;
-		if(waserror())
+		if(waserror()){
 			fprint(2, "task error: %s", errmsg());
+			continue;
+		}
 		a = emalloc(sizeof(Amsg), 1);
 		a->op = AOsync;
 		a->halt = 0;
--- a/load.c
+++ b/load.c
@@ -17,43 +17,45 @@
 }
 
 void
-loadarena(Arena *a, Bptr hdbp, vlong asz)
+loadarena(Arena *a, Bptr hd)
 {
-	Blk *hd, *tl, *b;
+	Blk *h0, *h1, *b;
 	Bptr bp;
 
 	/* try to load block pointers with consistency check */
-	bp = hdbp;
-	hd = nil;
-	tl = nil;
+	bp = hd;
+	h0 = nil;
+	h1 = nil;
 	if(!waserror()){
-		hd = getblk(bp, 0);
+		h0 = getblk(bp, 0);
 		poperror();
-	}
-	bp.addr += asz;
+	}else
+		print("error %s\n", errmsg());
+	bp.addr += Blksz;
 	if(!waserror()){
-		tl = getblk(bp, 0);
+		h1 = getblk(bp, 0);
 		poperror();
-	}
+	}else
+		print("error %s\n", errmsg());
 
 	/* if neither head nor tail is consistent, we're hosed */
-	b = (hd != nil) ? hd : tl;
+	b = (h0 != nil) ? h0 : h1;
 	if(b == nil)
 		error(Efs);
 
 	/* otherwise, we could have crashed mid-pass, just load the blocks */
-	bp = hdbp;
-	if(hd == nil)
-		hd = getblk(bp, GBnochk);
-	bp.addr += asz;
-	if(tl == nil)
-		tl = getblk(bp, GBnochk);
+	bp = hd;
+	if(h0 == nil)
+		h0 = getblk(bp, GBnochk);
+	bp.addr += Blksz;
+	if(h1 == nil)
+		h1 = getblk(bp, GBnochk);
 
 	unpackarena(a, b->data, Arenasz);
 	if((a->free = avlcreate(rangecmp)) == nil)
 		error(Enomem);
-	a->hd = hd;
-	a->tl = tl;
+	a->h0 = h0;
+	a->h1 = h1;
 	a->used = a->size;
 }
 
@@ -60,11 +62,13 @@
 void
 loadfs(char *dev)
 {
+	Bptr bhd, btl;
 	Mount *dump;
 	Arena *a;
-	Bptr bp;
 	Tree *t;
-	int i, k;
+	Dir *d;
+	int i;
+	vlong eb;
 
 	if((dump = mallocz(sizeof(*dump), 1)) == nil)
 		sysfatal("malloc: %r");
@@ -76,31 +80,39 @@
 	dump->root = &fs->snap;
 
 	fs->snapmnt = dump;
-	fs->gotinfo = 0;
 	fs->narena = 1;
 	if((fs->fd = open(dev, ORDWR)) == -1)
 		sysfatal("open %s: %r", dev);
-	bp = (Bptr){0, -1, -1};
-	fs->sb0 = getblk(bp, GBnochk);
-	bp = (Bptr){512*MiB, -1, -1};
-	fs->sb1 = getblk(bp, GBnochk);
-	unpacksb(fs, fs->sb0->buf, Blksz);
+	if((d = dirfstat(fs->fd)) == nil)
+		sysfatal("stat %s: %r", dev);
+
+	eb = d->length;
+	eb = eb - (eb%Blksz) - Blksz;
+	bhd = (Bptr){0, -1, -1};
+	btl = (Bptr){eb, -1, -1};
+	fs->sb0 = getblk(bhd, GBnochk);
+	fs->sb1 = getblk(btl, GBnochk);
+	if(!waserror()){
+		unpacksb(fs, fs->sb0->buf, Blksz);
+		poperror();
+	}else{
+		fprint(2, "unable to load primary superblock: %s\n", errmsg());
+		if(waserror()){
+			fprint(2, "unable to load primary superblock: %s\n", errmsg());
+			exits("corrupt");
+		}
+		unpacksb(fs, fs->sb1->buf, Blksz);
+		poperror();
+	}
+
 	if((fs->arenas = calloc(fs->narena, sizeof(Arena))) == nil)
 		sysfatal("malloc: %r");
 	for(i = 0; i < fs->narena; i++){
 		a = &fs->arenas[i];
-		memset(a, 0, sizeof(Arena));
-		loadarena(a, fs->arenabp[i], fs->arenasz);
+		loadarena(a, fs->arenabp[i]);
 		a->reserve = a->size / 1024;
 		if(a->reserve < 32*MiB)
 			a->reserve = 32*MiB;
-		if(!fs->gotinfo){
-			if((fs->arenas = realloc(fs->arenas, fs->narena*sizeof(Arena))) == nil)
-				sysfatal("malloc: %r");
-			for(k = 1; k < fs->narena; k++)
-				memset(&fs->arenas[k], 0, sizeof(Arena));
-			fs->gotinfo = 1;
-		}
 	}
 	for(i = 0; i < fs->narena; i++){
 		a = &fs->arenas[i];
@@ -119,7 +131,7 @@
 	fprint(2, "load %s:\n", dev);
 	fprint(2, "\tsnaptree:\t%B\n", fs->snap.bp);
 	fprint(2, "\tnarenas:\t%d\n", fs->narena);
-	fprint(2, "\tarenasz:\t%lld MiB\n", fs->arenasz/MiB);
+	fprint(2, "\tfeatures:\t%lld\n", fs->flags);
 	fprint(2, "\tnextqid:\t%lld\n", fs->nextqid);
 	fprint(2, "\tnextgen:\t%lld\n", fs->nextgen);
 	fprint(2, "\tblocksize:\t%lld\n", Blksz);
--- a/main.c
+++ b/main.c
@@ -62,13 +62,15 @@
 	return c->errlab + (c->nerrlab-1);
 }
 
-_Noreturn void
-errorv(char *fmt, va_list ap)
+_Noreturn static void
+errorv(char *fmt, va_list ap, int broke)
 {
 	Errctx *c;
 
 	c = *errctx;
 	vsnprint(c->err, sizeof(c->err), fmt, ap);
+	if(broke)
+		fprint(2, "%s\n", c->err);
 	assert(c->nerrlab > 0 && c->nerrlab < Estacksz);
 	longjmp(c->errlab[--c->nerrlab], -1);
 }
@@ -80,8 +82,8 @@
 
 	aincl(&fs->rdonly, 1);
 	va_start(ap, fmt);
-	vfprint(2, fmt, ap);
-	errorv(fmt, ap);
+//abort();
+	errorv(fmt, ap, 1);
 }
 
 _Noreturn void
@@ -90,7 +92,7 @@
 	va_list ap;
 
 	va_start(ap, fmt);
-	errorv(fmt, ap);
+	errorv(fmt, ap, 0);
 }
 
 _Noreturn void
--- a/pack.c
+++ b/pack.c
@@ -433,12 +433,16 @@
 }
 
 char*
-packsb(char *p, int sz, Gefs *fi)
+packsb(char *p0, int sz, Gefs *fi)
 {
+	uvlong h;
+	char *p;
 	int i;
 
 	assert(sz == Blksz);
-	memcpy(p, "gefs0008", 8);	p += 8;
+	assert(fi->narena < 512);
+	p = p0;
+	memcpy(p, "gefs0009", 8);	p += 8;
 	PACK32(p, Blksz);		p += 4;
 	PACK32(p, Bufspc);		p += 4;
 	PACK32(p, fi->snap.ht);		p += 4;
@@ -449,7 +453,7 @@
 	PACK64(p, fi->snapdl.tl.addr);	p += 8;
 	PACK64(p, fi->snapdl.tl.hash);	p += 8;
 	PACK32(p, fi->narena);		p += 4;
-	PACK64(p, fi->arenasz);		p += 8;
+	PACK64(p, fi->flags);		p += 8;
 	PACK64(p, fi->nextqid);		p += 8;
 	PACK64(p, fi->nextgen);		p += 8;
 	PACK64(p, fi->qgen);		p += 8;
@@ -457,16 +461,21 @@
 		PACK64(p, fi->arenabp[i].addr);	p += 8;
 		PACK64(p, fi->arenabp[i].hash);	p += 8;
 	}
+	h = bufhash(p0, p - p0);
+	PACK64(p, h);			p += 8;
 	return p;
 }
 
 char*
-unpacksb(Gefs *fi, char *p, int sz)
+unpacksb(Gefs *fi, char *p0, int sz)
 {
+	uvlong dh, xh;
+	char *p;
 	int i;
 
 	assert(sz == Blksz);
-	if(memcmp(p, "gefs0008", 8) != 0)
+	p = p0;
+	if(memcmp(p, "gefs0009", 8) != 0)
 		error("%s %.8s", Efsvers, p);
 	p += 8;
 	fi->blksz = UNPACK32(p);		p += 4;
@@ -481,17 +490,21 @@
 	fi->snapdl.tl.addr = UNPACK64(p);	p += 8;
 	fi->snapdl.tl.hash = UNPACK64(p);	p += 8;
 	fi->snapdl.gen = -1;			p += 0;
-	fi->snapdl.gen = 
 	fi->narena = UNPACK32(p);		p += 4;
-	fi->arenasz = UNPACK64(p);		p += 8;
+	fi->flags = UNPACK64(p);		p += 8;
 	fi->nextqid = UNPACK64(p);		p += 8;
 	fi->nextgen = UNPACK64(p);		p += 8;
 	fi->qgen = UNPACK64(p);	p += 8;
-	fi->arenabp = malloc(fi->narena * sizeof(Bptr));
+	fi->arenabp = emalloc(fi->narena * sizeof(Bptr), 0);
 	for(i = 0; i < fi->narena; i++){
 		fi->arenabp[i].addr = UNPACK64(p);	p += 8;
 		fi->arenabp[i].hash = UNPACK64(p);	p += 8;
 		fi->arenabp[i].gen = -1;
 	}
+	xh = bufhash(p0, p - p0);
+	dh = UNPACK64(p);			p += 8;
+	if(dh != xh)
+		error("corrupt superblock: %llx != %llx", dh, xh);
+	assert(fi->narena < 256);	/* should be more than anyone needs */
 	return p;
 }
--- a/ream.c
+++ b/ream.c
@@ -161,22 +161,14 @@
 }
 
 static void
-initarena(Arena *a, vlong start, vlong asz)
+initarena(Arena *a, uvlong hdaddr, vlong asz)
 {
-	vlong addr, bo, bh;
+	Blk *b, *h0, *h1;
+	uvlong addr;
 	char *p;
-	Blk *b, *hd, *tl;
 
 	b = cachepluck();
-	if(start == 512*MiB){
-		start += Blksz;
-		asz -= Blksz;
-	}
-	addr = start+Blksz;	/* leave room for arena hdr */
-	if(addr == 512*MiB){
-		addr += Blksz;
-		asz -= Blksz;
-	}
+	addr = hdaddr+2*Blksz;	/* leave room for arena hdr */
 
 	a->loghd.addr = -1;
 	a->loghd.hash = -1;
@@ -184,7 +176,7 @@
 
 	memset(b->buf, 0, sizeof(b->buf));
 	b->type = Tlog;
-	b->bp.addr = addr+Blksz;
+	b->bp.addr = addr;
 	b->logsz = 0;
 	b->logp = (Bptr){-1, -1, -1};
 	b->data = b->buf + Loghdsz;
@@ -193,14 +185,9 @@
 	p = b->buf + Loghdsz;
 	b->logp = (Bptr){-1, -1, -1};
 	PACK64(p, addr|LogFree);	p += 8;	/* addr */
-	PACK64(p, asz-Blksz);		p += 8;	/* len */
+	PACK64(p, asz-2*Blksz);		p += 8;	/* len */
 	PACK64(p, b->bp.addr|LogAlloc);	p += 8;	/* addr */
 	PACK64(p, Blksz);		p += 8;	/* len */
-	/* backup sb */
-	if(start <= 512*MiB && start+asz > 512*MiB){
-		PACK64(p, (512*MiB)|LogAlloc1);
-		p += 8;
-	}
 	PACK64(p, (uvlong)LogSync);	p += 8;	/* barrier */
 	b->logsz = p - b->data;
 	finalize(b);
@@ -207,38 +194,34 @@
 	syncblk(b);
 	dropblk(b);
 
-	bh = b->bp.hash;
-	bo = b->bp.addr;
-
-	a->loghd.addr = bo;
-	a->loghd.hash = bh;
+	a->loghd = b->bp;
 	a->loghd.gen = -1;
 	a->size = asz;
 	a->used = Blksz;
 
-	hd = cachepluck();
-	tl = cachepluck();
+	h0 = cachepluck();
+	h1 = cachepluck();
 
-	memset(hd->buf, 0, sizeof(hd->buf));
-	hd->type = Tarena;
-	hd->bp.addr = start;
-	hd->data = hd->buf+2;
-	finalize(hd);
+	memset(h0->buf, 0, sizeof(h0->buf));
+	h0->type = Tarena;
+	h0->bp.addr = hdaddr;
+	h0->data = h0->buf+2;
+	finalize(h0);
 
-	memset(tl->buf, 0, sizeof(tl->buf));
-	tl->type = Tarena;
-	tl->bp.addr = start+asz;
-	tl->data = tl->buf+2;
-	finalize(tl);
+	memset(h1->buf, 0, sizeof(h1->buf));
+	h1->type = Tarena;
+	h1->bp.addr = hdaddr+Blksz;
+	h1->data = h1->buf+2;
+	finalize(h1);
 
-	packarena(hd->data, Arenasz, a);
-	packarena(tl->data, Arenasz, a);
-	finalize(hd);
-	finalize(tl);
-	syncblk(hd);
-	syncblk(tl);
-	a->hd = hd;
-	a->tl = tl;
+	packarena(h0->data, Arenasz, a);
+	packarena(h1->data, Arenasz, a);
+	finalize(h0);
+	finalize(h1);
+	syncblk(h0);
+	syncblk(h1);
+	a->h0 = h0;
+	a->h1 = h1;
 }
 
 void
@@ -261,7 +244,7 @@
 	sz = d->length;
 	free(d);
 
-	if(sz < 512*MiB+Blksz)
+	if(sz < 128*MiB+Blksz)
 		sysfatal("ream: disk too small");
 	mnt = emalloc(sizeof(Mount), 1);
 	mnt->root = mallocz(sizeof(Tree), 1);
@@ -268,10 +251,10 @@
 	adm = mallocz(sizeof(Mount), 1);
 	adm->root = mallocz(sizeof(Tree), 1);
 
-	sz = sz - sz%Blksz;
+	sz = sz - sz%Blksz - 2*Blksz;
 	fs->narena = (sz + 4096ULL*GiB - 1) / (4096ULL*GiB);
-	if(fs->narena < 4)
-		fs->narena = 4;
+	if(fs->narena < 8)
+		fs->narena = 8;
 	if(fs->narena >= 32)
 		fs->narena = 32;
 	fs->arenas = emalloc(fs->narena*sizeof(Arena), 1);
@@ -280,12 +263,11 @@
 	off = Blksz;
 	asz = sz/fs->narena;
 	asz = asz - (asz % Blksz) - 2*Blksz;
-	fs->arenasz = asz;
 
 	sb0 = cachepluck();
 	sb1 = cachepluck();
 	sb0->bp = (Bptr){0, -1, -1};
-	sb1->bp = (Bptr){512*MiB, -1, -1};
+	sb1->bp = (Bptr){sz+Blksz, -1, -1};
 
 	fs->arenabp = emalloc(fs->narena * sizeof(Bptr), 1);
 	for(i = 0; i < fs->narena; i++){
@@ -292,7 +274,7 @@
 		a = &fs->arenas[i];
 		print("\tarena %d: %lld blocks at %llx\n", i, asz/Blksz, off);
 		initarena(a, off, asz);
-		fs->arenabp[i] = a->hd->bp;
+		fs->arenabp[i] = a->h0->bp;
 		off += asz+2*Blksz;
 
 	}
@@ -299,7 +281,7 @@
 	
 	for(i = 0; i < fs->narena; i++){
 		a = &fs->arenas[i];
-		loadarena(a, a->hd->bp, asz);
+		loadarena(a, a->h0->bp);
 		loadlog(a, a->loghd);
 	}
 
@@ -368,15 +350,15 @@
 		a = &fs->arenas[i];
 		finalize(a->logtl);
 		syncblk(a->logtl);
-		packarena(a->hd->data, Blksz, a);
-		finalize(a->hd);
-		syncblk(a->hd);
-		packarena(a->tl->data, Blksz, a);
-		finalize(a->tl);
-		syncblk(a->tl);
-		fs->arenabp[i] = a->hd->bp;
-		dropblk(a->hd);
-		dropblk(a->tl);
+		packarena(a->h0->data, Blksz, a);
+		finalize(a->h0);
+		syncblk(a->h0);
+		packarena(a->h1->data, Blksz, a);
+		finalize(a->h1);
+		syncblk(a->h1);
+		fs->arenabp[i] = a->h0->bp;
+		dropblk(a->h0);
+		dropblk(a->h1);
 	}
 
 	dropblk(mb);
@@ -403,10 +385,10 @@
 void
 growfs(char *dev)
 {
-	vlong sz, off;
+	vlong oldsz, newsz, asz, off, eb;
 	int i, narena;
-	Bptr bp;
 	Arena *a;
+	Bptr bp;
 	Dir *d;
 
 	if(waserror())
@@ -415,51 +397,65 @@
 		sysfatal("open %s: %r", dev);
 	if((d = dirfstat(fs->fd)) == nil)
 		sysfatal("ream: %r");
-	sz = d->length;
-	free(d);
 
 	bp = (Bptr){0, -1, -1};
-	if((fs->sb0 = getblk(bp, GBnochk)) == nil)
-		sysfatal("superblock: %r\n");
-	if(unpacksb(fs, fs->sb0->buf, Blksz) == nil)
-		sysfatal("superblock: %r");
+	fs->sb0 = getblk(bp, GBnochk);
+	unpacksb(fs, fs->sb0->buf, Blksz);
 	if((fs->arenas = calloc(fs->narena, sizeof(Arena))) == nil)
 		sysfatal("malloc: %r");
 	for(i = 0; i < fs->narena; i++){
 		a = &fs->arenas[i];
-		loadarena(a, fs->arenabp[i], fs->arenasz);
+		loadarena(a, fs->arenabp[i]);
+		fs->arenabp[i] = a->h0->bp;
 	}
-	narena = sz/fs->arenasz;
-	off = fs->arenasz * fs->narena;
-	if(narena <= fs->narena)
-		sysfatal("disk too small for more arenas");
+	a = &fs->arenas[fs->narena-1];
+	oldsz = a->h0->bp.addr + a->size + 2*Blksz;
+	newsz = d->length - d->length%Blksz - 2*Blksz;
+	if(newsz - oldsz < 64*MiB)
+		sysfatal("new arenas too small (%lld < %lld), not growing", newsz - oldsz, 64*MiB);
+	asz = (newsz - oldsz)/4;
+	asz = asz - asz % Blksz - 2*Blksz;
+	narena = fs->narena + 4;
+	assert(oldsz % Blksz == 0);
 	if((fs->arenas = realloc(fs->arenas, narena*sizeof(Arena))) == nil)
 		error(Enomem);
-	if((fs->arenabp = realloc(fs->arenas, narena*sizeof(Bptr))) == nil)
+	if((fs->arenabp = realloc(fs->arenabp, narena*sizeof(Bptr))) == nil)
 		error(Enomem);
+
+	off = oldsz;
 	for(i = fs->narena; i < narena; i++){
 		a = &fs->arenas[i];
-		print("\tadding %d: %lld blocks at %llx\n", i, fs->arenasz/Blksz, off);
-		initarena(&fs->arenas[i], off, fs->arenasz);
-		loadarena(a, fs->arenabp[i], fs->arenasz);
-		fs->arenabp[i] = a->hd->bp;
-		off += fs->arenasz;
-	}
-	fs->narena = narena;
-	for(i = 0; i < narena; i++){
+		print("\tnew arena %d: adding %lld blocks at %llx\n", i, asz/Blksz, off);
+		initarena(&fs->arenas[i], off, asz);
+		loadarena(a, a->h0->bp);
+		loadlog(a, a->loghd);
 		a = &fs->arenas[i];
-		packarena(a->hd->data, Blksz, a);
-		packarena(a->tl->data, Blksz, a);
-		finalize(a->hd);
-		finalize(a->tl);
-		syncblk(a->hd);
-		syncblk(a->tl);
+		packarena(a->h0->data, Blksz, a);
+		packarena(a->h1->data, Blksz, a);
+		finalize(a->h0);
+		finalize(a->h1);
+		syncblk(a->h0);
+		syncblk(a->h1);
+
+		fs->arenabp[i] = a->h0->bp;
+		off += asz+2*Blksz;
 	}
+	fs->narena = narena;
 	packsb(fs->sb0->buf, Blksz, fs);
-	packsb(fs->sb1->buf, Blksz, fs);
 	finalize(fs->sb0);
-	finalize(fs->sb1);
 	syncblk(fs->sb0);
-	syncblk(fs->sb1);
+	/*
+	 * We're being a bit tricksy here: because we're on a bigger
+	 * partition, we don't know where the end is; just load the
+	 * first block, and patch the address in to the right place
+	 * when we write it back.
+	 */
+	eb = d->length;
+	eb = eb - (eb%Blksz) - Blksz;
+	fs->sb0->bp = (Bptr){eb, -1, -1};
+	packsb(fs->sb0->buf, Blksz, fs);
+	finalize(fs->sb0);
+	syncblk(fs->sb0);
+	free(d);
 	poperror();
 }
--- a/tree.c
+++ b/tree.c
@@ -1154,7 +1154,7 @@
 
 	if((r = dupblk(t, b)) == nil)
 		error(Enomem);
-	
+
 	nbuf = r->nbuf;
 	for(i = 0; i < nmsg; i++)
 		setmsg(r, &msg[i]);