ref: 43d8da8b53e5f773e780928b3cf5ae2ce252d908
dir: /snap.c/
#include <u.h>
#include <libc.h>
#include <fcall.h>
#include <avl.h>
#include "dat.h"
#include "fns.h"
#include "atomic.h"
static char*
dlsync(Dlist *dl)
{
char kvbuf[512];
Msg m;
if(dl->ins == nil)
return nil;
if(checkflag(dl->ins, Bdirty))
enqueue(dl->ins);
dl->hd = dl->ins->bp;
if(dl->hd.addr == dl->tl.addr)
dl->tl = dl->hd;
m.op = Oinsert;
dlist2kv(dl, &m, kvbuf, sizeof(kvbuf));
return btupsert(&fs->snap, &m, 1);
}
static void
dlcachedel(Dlist *dl, int hdel)
{
uint h;
Dlist *d, **p;
h = ihash(dl->gen) ^ ihash(dl->bgen);
if(hdel){
p = &fs->dlcache[h % fs->dlcmax];
for(d = *p; d != nil; d = d->chain){
if(d->gen == dl->gen && d->bgen == dl->bgen)
break;
p = &d->chain;
}
if(d != nil)
*p = d->chain;
}
if(dl == fs->dlhead)
fs->dlhead = dl->cnext;
if(dl == fs->dltail)
fs->dltail = dl->cprev;
if(dl->cnext != nil)
dl->cnext->cprev = dl->cprev;
if(dl->cprev != nil)
dl->cprev->cnext = dl->cnext;
dl->cnext = nil;
dl->cprev = nil;
}
static Dlist*
dlcacheget(vlong gen, vlong bgen)
{
Dlist *dl;
uint h;
h = ihash(gen) ^ ihash(bgen);
for(dl = fs->dlcache[h % nelem(fs->dlcache)]; dl != nil; dl = dl->chain)
if(dl->gen == gen && dl->bgen == bgen)
break;
if(dl != nil)
dlcachedel(dl, 0);
return dl;
}
static Dlist*
getdl(vlong gen, vlong bgen)
{
char kbuf[Dlksz], kvbuf[Dlkvpsz];
Dlist *dl, **p;
uint h;
Msg m;
Kvp kv;
Key k;
if((dl = dlcacheget(gen, bgen)) != nil)
return dl;
if((dl = mallocz(sizeof(Dlist), 1)) == nil)
return nil;
kbuf[0] = Kdlist;
PACK64(kbuf+1, gen);
PACK64(kbuf+9, bgen);
k.k = kbuf;
k.nk = sizeof(kbuf);
/* load up existing dlist */
if(btlookup(&fs->snap, &k, &kv, kvbuf, sizeof(kvbuf)) == nil){
kv2dlist(&kv, dl);
if(dl->hd.addr != -1 && (dl->ins = getblk(dl->hd, 0)) == nil){
free(dl);
return nil;
}
goto Found;
}
/* create a new one if it didn't exist */
dl->gen = gen;
dl->bgen = bgen;
dl->hd.addr = -1;
dl->tl.addr = -1;
dl->ins = nil;
m.op = Oinsert;
dlist2kv(dl, &m, kvbuf, sizeof(kvbuf));
if(btupsert(&fs->snap, &m, 1) != nil){
free(dl);
return nil;
}
Found:
h = ihash(gen) ^ ihash(bgen);
p = &fs->dlcache[h % nelem(fs->dlcache)];
dl->chain = *p;
*p = dl;
return dl;
}
void
putdl(Dlist *dl)
{
Dlist *dt;
dlcachedel(dl, 0);
dl->cprev = nil;
dl->cnext = fs->dlhead;
fs->dlhead = dl;
while(fs->ctail != nil && fs->dlcount >= fs->dlcmax){
dt = fs->dltail;
dlsync(dt);
dlcachedel(dt, 1);
dropblk(dt->ins);
free(dt);
}
}
static char*
freedl(Dlist *dl, int docontents)
{
Bptr bp;
Blk *b;
char *p;
bp = dl->hd;
while(bp.addr != -1){
/* ugly: when we merge deadlists, we change their hash */
if((b = getblk(bp, GBnochk)) == nil)
return Efs;
if(docontents){
for(p = b->data; p != b->data+b->deadsz; p += 8){
if(blkrelease(UNPACK64(p)) == -1){
dropblk(b);
return Efs;
}
}
}
bp = b->deadp;
freeblk(&fs->snap, b);
dropblk(b);
}
return nil;
}
static char*
mergedl(vlong succ, vlong gen, vlong bgen)
{
char *err, buf[2][Kvmax];
Dlist *d, *m;
Msg msg[2];
Blk *b;
// vlong x;
err = nil;
d = getdl(gen, bgen);
m = getdl(succ, bgen);
if(d == nil || m == nil){
err = Efs;
goto Out;
}
if(m->ins == nil){
m->hd = d->hd;
m->tl = d->tl;
m->ins = d->ins;
}else{
m->hd = d->hd;
/* ugly: when we merge deadlists, we change their hash */
if((b = getblk(d->tl, GBnochk)) == nil)
goto Out;
msg[0].op = Odelete;
dlist2kv(d, &msg[0], buf[0], sizeof(buf[0]));
msg[1].op = Oinsert;
dlist2kv(m, &msg[1], buf[1], sizeof(buf[1]));
if((err = btupsert(&fs->snap, msg, 2)) != nil)
sysfatal("merge deadlists: %s", err);
packbp(b->buf+2, Blksz, &m->hd);
enqueue(b);
dropblk(b);
// TODO: merge
// if(m->ins->deadsz + d->ins->deadsz < Dlspc){
// p = d->ins->data;
// q = m->ins->data + m->ins->deadsz;
// for(i = 0; i < d->ins->deadsz; i += 8){
// m->ins->deadsz += 8;
// x = UNPACK64(p);
// PACK64(q, x);
// p += 8;
// q += 8;
// }
// }
}
Out:
if(d != nil)
putdl(d);
if(m != nil)
putdl(m);
return err;
}
static vlong
successor(vlong gen)
{
char *e, pfx[9];
vlong id, succ;
Scan s;
pfx[0] = Kslink;
succ = -1;
PACK64(pfx+1, gen);
btnewscan(&s, pfx, sizeof(pfx));
if((e = btenter(&fs->snap, &s)) != nil)
goto Err;
if((e = btnext(&fs->snap, &s, &s.kv)) != nil)
goto Err;
if(!s.done)
kv2link(&s.kv, &id, &succ);
if((e = btnext(&fs->snap, &s, &s.kv)) != nil)
goto Err;
if(!s.done)
sysfatal("multiple successors in cleanup");
btexit(&fs->snap, &s);
return succ;
Err:
fprint(2, "error getting snap successor: %s", e);
btexit(&fs->snap, &s);
return -1;
}
static char*
reclaimblocks(vlong gen, vlong prev)
{
char *e, pfx[9];
vlong succ;
Dlist dl;
Scan s;
Msg m;
succ = successor(gen);
pfx[0] = Kdlist;
if(succ == -1)
PACK64(pfx+1, gen);
else
PACK64(pfx+1, succ);
btnewscan(&s, pfx, sizeof(pfx));
if((e = btenter(&fs->snap, &s)) != nil)
return e;
while(1){
if((e = btnext(&fs->snap, &s, &s.kv)) != nil)
break;
if(s.done)
break;
kv2dlist(&s.kv, &dl);
/*
* delete the dlist before processing it; it's
* better to leak blocks than to double free.
*/
m.op = Odelete;
m.Kvp = s.kv;
if((e = btupsert(&fs->snap, &m, 1)) != nil)
break;
if(succ == -1)
e = freedl(&dl, 1);
else if(dl.bgen > prev)
e = freedl(&dl, 0);
else
e = mergedl(succ, dl.gen, dl.bgen);
if(e != nil)
break;
}
btexit(&fs->snap, &s);
return e;
}
/*
* Removes a label from a snapshot, allowing
* it to be reclaimed if it is not a direct
* predecessor of more than one other snapshot.
*
* If it has one successor and no label, then
* it will be merged with that successor.
*/
char*
delsnap(Tree *t, char *name)
{
char buf[2][Kvmax], *e;
Msg m[2];
int nm;
if(strcmp(name, "dump") == 0 || strcmp(name, "empty") == 0 || strcmp(name, "adm") == 0)
return Ename;
nm = 0;
m[nm].op = Odelete;
m[nm].k = buf[nm];
if((e = packlabel(buf[nm], sizeof(buf[nm]), name)) == nil)
return Ename;
m[nm].nk = e - m[nm].k;
m[nm].v = nil;
m[nm].nv = 0;
nm++;
t->nlbl--;
if(t->nlbl == 0 && t->nsucc <= 1){
m[nm].op = Odelete;
m[nm].k = buf[nm];
if((e = packsnap(buf[nm], sizeof(buf[nm]), t->gen)) == nil)
return Ename;
m[nm].nk = e - m[nm].k;
m[nm].v = nil;
m[nm].nv = 0;
nm++;
}else{
m[nm].op = Oinsert;
tree2kv(t, &m[nm], buf[nm], sizeof(buf[nm]));
nm++;
}
if((e = flushdlcache(1)) != nil)
return e;
if((e = btupsert(&fs->snap, m, nm)) != nil)
return e;
if((e = reclaimblocks(t->gen, t->prev)) != nil)
return e;
return nil;
}
/*
* Attaches a label to a tree, incrementing
* its reference count. This labelled snapshot
* will show up in the dump.
*/
char*
labelsnap(Tree *t, char *name)
{
char buf[2][Kvmax];
Msg m[2];
if(strcmp(name, "dump") == 0 || strcmp(name, "empty") == 0 || strcmp(name, "adm") == 0)
return Ename;
t->nlbl++;
m[0].op = Oinsert;
tree2kv(t, &m[0], buf[0], sizeof(buf[0]));
m[1].op = Oinsert;
lbl2kv(name, t->gen, &m[1], buf[1], sizeof(buf[1]));
return btupsert(&fs->snap, m, 2);
}
/*
* Updates a snapshot; keeps the generation the same if possible,
* otherwise moves to a new generation. A snapshot may only stay
* at the same generation as long as it is at the tip of a snapshot
* list; once it's observable by a derived snapshot it must be
* immutable.
*/
char*
updatesnap(Tree **r, Tree *o, char *lbl)
{
char buf[4][Kvmax], *e;
Msg m[4];
Tree *t;
if(!o->dirty)
return nil;
/* this snap can be modified in place, so do that */
if(o->nlbl == 1 && o->nsucc == 0){
m[0].op = Oinsert;
tree2kv(o, &m[0], buf[0], sizeof(buf[0]));
if((e = btupsert(&fs->snap, &m[0], 1)) != nil)
return e;
o->dirty = 0;
return nil;
}
/* update the old kvp */
o->nsucc++;
o->nlbl--;
m[0].op = Oinsert;
tree2kv(o, &m[0], buf[0], sizeof(buf[0]));
/* create the new one */
if((t = mallocz(sizeof(Tree), 1)) == nil)
return Enomem;
t->memref = 1;
t->dirty = 0;
t->nlbl = 1;
t->nsucc = 0;
t->ht = o->ht;
t->bp = o->bp;
t->prev = o->gen;
t->gen = aincv(&fs->nextgen, 1);
m[1].op = Oinsert;
tree2kv(t, &m[1], buf[1], sizeof(buf[1]));
m[2].op = Oinsert;
link2kv(t->prev, t->gen, &m[2], buf[2], sizeof(buf[2]));
m[3].op = Oinsert;
lbl2kv(lbl, t->gen, &m[3], buf[3], sizeof(buf[3]));
if((e = btupsert(&fs->snap, m, 4)) != nil){
free(t);
return e;
}
/* only update the dirty status after we sync */
o->dirty = 0;
closesnap(o);
*r = t;
return nil;
}
/*
* open snapshot by label, returning a tree.
*/
Tree*
opensnap(char *label)
{
char *p, buf[Kvmax];
Tree *t;
vlong gen;
Kvp kv;
Key k;
/* Klabel{"name"} => Ksnap{id} */
if((p = packlabel(buf, sizeof(buf), label)) == nil)
return nil;
k.k = buf;
k.nk = p - buf;
if(btlookup(&fs->snap, &k, &kv, buf, sizeof(buf)) != nil)
return nil;
if(kv.nv != Snapsz)
abort();
gen = UNPACK64(kv.v + 1);
if((t = mallocz(sizeof(Tree), 1)) == nil)
goto Error;
if((p = packsnap(buf, sizeof(buf), gen)) == nil)
goto Error;
k.k = buf;
k.nk = p - buf;
if(btlookup(&fs->snap, &k, &kv, buf, sizeof(buf)) != nil)
abort();
if(unpacktree(t, kv.v, kv.nv) == nil)
abort();
ainc(&t->memref);
return t;
Error:
free(t);
return nil;
}
/*
* close snapshot, flushing and freeing in-memory
* representation.
*/
void
closesnap(Tree *t)
{
if(t == nil || adec(&t->memref) != 0)
return;
assert(!t->dirty);
free(t);
}
char*
flushdlcache(int clear)
{
Dlist *dl, *n;
char *e;
for(dl = fs->dlhead; dl != nil; dl = n){
n = dl->cnext;
if((e = dlsync(dl)) != nil)
return e;
if(clear){
if(dl->ins != nil)
dropblk(dl->ins);
free(dl);
}
}
if(clear){
fs->dlhead = nil;
fs->dltail = nil;
memset(fs->dlcache, 0, fs->dlcmax*sizeof(Dlist*));
}
return nil;
}
/*
* Marks a block as killed by the tree
* t, which means that it will be free
* for use after t is reclaimed.
*
* t must be an active snapshot with
* no successors.
*/
int
killblk(Tree *t, Bptr bp)
{
Dlist *dl;
Blk *b;
char *p;
if((dl = getdl(t->gen, bp.gen)) == nil)
return -1;
if(dl->ins == nil || dl->ins->deadsz == Dlspc){
if((b = newblk(&fs->snap, Tdlist)) == nil){
putdl(dl);
return -1;
}
if(dl->ins != nil){
enqueue(dl->ins);
/* enqueuing updates the hashes */
if(dl->ins->bp.addr == dl->tl.addr)
dl->tl = dl->ins->bp;
b->deadp = dl->ins->bp;
}else{
dl->tl = b->bp;
b->deadp = (Bptr){-1, -1, -1};
}
dl->hd = b->bp;
dl->ins = b;
}
p = dl->ins->data + dl->ins->deadsz;
dl->ins->flag |= Bdirty;
dl->ins->deadsz += 8;
PACK64(p, bp.addr);
putdl(dl);
return 0;
}