ref: 08cdfaedb4d05f28fc75bade6a1c0a309c8df9a8
dir: /dat.h/
typedef struct Blk Blk;
typedef struct Amsg Amsg;
typedef struct Gefs Gefs;
typedef struct Fmsg Fmsg;
typedef struct Fid Fid;
typedef struct Msg Msg;
typedef struct Key Key;
typedef struct Val Val;
typedef struct Kvp Kvp;
typedef struct Xdir Xdir;
typedef struct Bptr Bptr;
typedef struct Bfree Bfree;
typedef struct Scan Scan;
typedef struct Dent Dent;
typedef struct Scanp Scanp;
typedef struct Arena Arena;
typedef struct Arange Arange;
typedef struct Bucket Bucket;
typedef struct Chan Chan;
typedef struct Syncq Syncq;
typedef struct Qent Qent;
typedef struct Tree Tree;
typedef struct Dlist Dlist;
typedef struct Mount Mount;
typedef struct User User;
typedef struct Stats Stats;
typedef struct Conn Conn;
enum {
KiB = 1024ULL,
MiB = 1024ULL*KiB,
GiB = 1024ULL*MiB,
TiB = 1024ULL*GiB,
Lgblk = 14,
Blksz = (1ULL<<Lgblk),
Nrefbuf = 1024, /* number of ref incs before syncing */
Nfidtab = 1024, /* number of fit hash entries */
Nflushtab = 1024, /* flush table size */
Ndtab = 1024, /* number of dir tab entries */
Max9p = 32*KiB, /* biggest message size we're willing to negotiate */
Nsec = 1000LL*1000*1000, /* nanoseconds to the second */
Maxname = 256, /* maximum size of a name element */
Maxent = 9+Maxname+1, /* maximum size of ent key, with terminator */
Maxtag = 1<<16, /* maximum tag in 9p */
/*
* Kpmax must be no more than 1/4 of pivspc, or
* there is no way to get a valid split of a
* maximally filled tree.
*/
Keymax = 128, /* key data limit */
Inlmax = 512, /* inline data limit */
Ptrsz = 24, /* off, hash, gen */
Pptrsz = 26, /* off, hash, gen, fill */
Fillsz = 2, /* block fill count */
Offksz = 17, /* type, qid, off */
Snapsz = 9, /* tag, snapid */
Dpfxsz = 9, /* directory prefix */
Upksz = 9, /* directory prefix */
Dlksz = 1+8+8, /* tag, death, birth */
Dlvsz = Ptrsz+Ptrsz, /* hd,tl of deadlist */
Dlkvpsz = Dlksz+Dlvsz, /* full size of dlist kvp */
Linksz = 1+8+8, /* gen, prev of snap */
Treesz = 4+4+4+4+8+8+8+Ptrsz, /* ref, ht, flg, gen, prev, base, root */
Kvmax = Keymax + Inlmax, /* Key and value */
Kpmax = Keymax + Ptrsz, /* Key and pointer */
Wstatmax = 4+8+8+8, /* mode, size, atime, mtime */
Arenasz = 8+8+8+8, /* loghd, loghash, size, used */
Pivhdsz = 10,
Leafhdsz = 6,
Loghdsz = 2+2+8+Ptrsz, /* type, len, hash, chain */
Rootsz = 4+Ptrsz, /* root pointer */
Pivsz = Blksz - Pivhdsz,
Bufspc = (Blksz - Pivhdsz)/2, /* pivot room */
Pivspc = Blksz - Pivhdsz - Bufspc,
Logspc = Blksz - Loghdsz,
Logslop = 16+16+8, /* val, nextb, chain */
Leafspc = Blksz - Leafhdsz,
Msgmax = 1 + (Kvmax > Kpmax ? Kvmax : Kpmax)
};
enum {
Eactive = 1UL<<30, /* epoch active flag */
};
enum {
/*
* dent: pqid[8] qid[8] -- a directory entry key.
* ptr: off[8] hash[8] gen[8] -- a key for an Dir block.
* dir: serialized Xdir
*/
/* fs keys */
Kdat, /* qid[8] off[8] => ptr: pointer to data page */
Kent, /* pqid[8] name[n] => dir[n]: serialized Dir */
Kup, /* qid[8] => Kent: parent dir */
/* snapshot keys */
Klabel, /* name[] => snapid[]: snapshot label */
Ktref, /* tag[8] = snapid[] scratch snapshot label */
Ksnap, /* sid[8] => ref[8], tree[52]: snapshot root */
Kdlist, /* snap[8] gen[8] => hd[ptr],tl[ptr] deadlist */
Kslink, /* snap[8] next[8] => [] snap successor list */
};
enum {
Bdirty = 1 << 0,
Bfinal = 1 << 1,
Bfreed = 1 << 2,
Bcached = 1 << 3,
};
enum {
Tforked = 1 << 0,
};
enum {
Lmut = 1 << 0,
};
enum {
Qdump = 1ULL << 63,
};
#define Zb (Bptr){-1, -1, -1}
/* internal errors */
#define Efs (abort(), "fs broke")
extern char Eimpl[];
extern char Ebotch[];
extern char Eio[];
extern char Enofid[];
extern char Efid[];
extern char Etype[];
extern char Edscan[];
extern char Esrch[];
extern char Eexist[];
extern char Emode[];
extern char Efull[];
extern char Eauth[];
extern char Elength[];
extern char Eperm[];
extern char Einuse[];
extern char Ebadf[];
extern char Ename[];
extern char Enomem[];
extern char Eattach[];
extern char Enosnap[];
extern char Esnap[];
extern char Edir[];
extern char Esyntax[];
extern char Enouser[];
extern char Efsize[];
extern char Ebadu[];
extern char Erdonly[];
extern char Elocked[];
extern char Eauthp[];
extern char Eauthd[];
extern char Eauthph[];
extern char Ephase[];
extern char Enone[];
extern char Enoauth[];
extern char Ewstatb[];
extern char Ewstatd[];
extern char Ewstatg[];
extern char Ewstatl[];
extern char Ewstatm[];
extern char Ewstato[];
extern char Ewstatp[];
extern char Ewstatq[];
extern char Ewstatu[];
extern char Ewstatv[];
extern char Enempty[];
/*
* All metadata blocks share a common header:
*
* type[2]
*
* The None type is reserved for file data blocks
* and refcount blocks.
*
* The superblock has this layout:
* version[8] always "gefsNNNNN"
* blksz[4] block size in bytes
* bufsz[4] portion of leaf nodes
* allocated to buffers,
* in bytes
* height[4] tree height of root node
* rootb[8] address of root in last
* snapshot.
* rooth[8] hash of root node
* narena[4] number of arenas in tree
* arenasz[8] maximum size of arenas;
* they may be smaller.
* gen[8] The flush generation
*
* The arena zone blocks have this layout, and
* are overwritten in place:
*
* log[8] The head of the alloc log
* logh[8] The hash of the alloc log
*
* The log blocks have this layout, and are one of
* two types of blocks that get overwritten in place:
*
* hash[8] The hash of the previous log block
*
* The remainder of the block is filled with log
* entries. Each log entry has at least 8 bytes
* of entry. Some are longer. The opcode is or'ed
* into the low order bits of the first vlong.
* These ops take the following form:
*
* Alloc, Free:
* off[8] len[8]
* Alloc1, Free1:
* off[8]
* Ref:
* off[8]
* Flush:
* gen[8]
*
* Pivots have the following layout:
*
* nval[2]
* valsz[2]
* nbuf[2]
* bufsz[2]
*
* Leaves have the following layout:
*
* nval[2]
* valsz[2]
* pad[4]sure,
*
* Within these nodes, pointers have the following
* layout:
*
* off[8] hash[8] fill[2]
*/
enum {
Tdat,
Tpivot,
Tleaf,
Tlog,
Tdlist,
Tmagic,
Tarena,
Tsuper = 0x6765, /* 'ge' bigendian */
};
enum {
Vinl, /* Inline value */
Vref, /* Block pointer */
};
enum {
GBraw = 1<<0,
GBwrite = 1<<1,
GBnochk = 1<<2,
};
enum {
Onop, /* nothing */
Oinsert, /* new kvp */
Odelete, /* delete kvp */
Oclearb, /* free block ptr if exists */
Oclobber, /* remove file if it exists */
Owstat, /* update kvp dirent */
Oretag, /* reference a snap id */
Nmsgtype, /* maximum message type */
};
enum {
Magic = 0x979b929e98969c8c,
};
/*
* Wstat ops come with associated data, in the order
* of the bit flags.
*/
enum{
/* wstat flags */
Owsize = 1<<0, /* [8]fsize: update file size */
Owmode = 1<<1, /* [4]mode: update file mode */
Owmtime = 1<<2, /* [8]mtime: update mtime, in nsec */
Owatime = 1<<3, /* [8]atime: update atime, in nsec */
Owuid = 1<<4, /* [4]uid: set uid */
Owgid = 1<<5, /* [4]uid: set gid */
Owmuid = 1<<6, /* [4]uid: set muid */
};
/*
* Operations for the allocation log.
*/
enum {
LogNop, /* unused */
/* 1-wide entries */
LogAlloc1, /* alloc a block */
LogFree1, /* free a block */
LogSync, /* sync barrier for replay */
/* 2-wide entries */
#define Log2wide LogAlloc
LogAlloc, /* alloc a range */
LogFree, /* free a range */
};
enum {
AOnone,
AOsnap,
AOsync,
AOclear,
};
struct Bptr {
vlong addr;
uvlong hash;
vlong gen;
};
struct Key{
char *k;
int nk;
};
struct Val {
short nv;
char *v;
};
struct Kvp {
Key;
Val;
};
struct Msg {
char op;
Kvp;
};
struct Dlist {
Dlist *cnext; /* cache next entry */
Dlist *cprev; /* cache prev entry */
Dlist *chain; /* hash table chain */
Blk *ins; /* loaded head */
vlong gen; /* deadlist gen */
vlong bgen; /* birth gen */
Bptr hd; /* deadlist head */
Bptr tl; /* deadlist tail */
};
struct Arange {
Avl;
vlong off;
vlong len;
};
struct Bucket {
Lock;
Blk *b;
};
struct Amsg {
int op;
int fd;
union {
struct { /* AOsnap */
char old[128];
char new[128];
int mutable;
int delete;
};
struct { /* AOsync */
int halt;
};
struct { /* AOclear */
Mount *mnt;
Dent *dent;
vlong qpath;
vlong off;
vlong length;
};
};
};
struct Fmsg {
Fcall;
Conn *conn;
int sz; /* the size of the message buf */
uchar buf[];
};
struct Tree {
/* in-memory */
Lock lk;
long memref; /* number of in-memory references to this */
vlong memgen; /* wip next generation */
int dirty;
/* on-disk */
int nsucc; /* number snapshots after us */
int nlbl; /* number of labels referring to us */
int ht; /* height of the tree */
uint flag; /* flag set */
Bptr bp; /* block pointer of root */
vlong gen; /* generation */
vlong prev; /* previous snapshot */
vlong base; /* base snapshot */
Msg mq[64];
int qsz;
int nq;
char buf[Bufspc];
};
enum {
DFblk,
DFtree,
};
struct Bfree {
Bfree *next;
int op;
Tree *t;
Blk *b;
Bptr bp;
};
struct User {
int id;
int lead;
int *memb;
int nmemb;
char name[128];
};
enum {
Qflush,
Qwrite,
Qfree,
};
struct Qent {
long qgen;
Bptr bp;
Blk *b;
int op;
};
struct Syncq {
QLock lk;
Rendez fullrz;
Rendez emptyrz;
Qent *heap;
int nheap;
int heapsz;
};
struct Stats {
vlong cachehit;
vlong cachelook;
};
/*
* Overall state of the file sytem.
* Shadows the superblock contents.
*/
struct Gefs {
int blksz;
int bufspc;
Tree snap;
Dlist snapdl;
int narena;
vlong arenasz;
vlong nextqid;
vlong nextgen;
vlong qgen;
Bptr *arenabp;
/* superblocks */
Blk *sb0; /* primary */
Blk *sb1; /* backup */
/* arena allocation */
Arena *arenas;
long roundrobin;
long syncing;
long nsyncers;
long nreaders;
int gotinfo;
QLock synclk;
Rendez syncrz;
Lock mountlk;
Mount *mounts;
Mount *snapmnt;
Lock connlk;
Conn *conns;
Chan *wrchan;
Chan *admchan;
Chan **rdchan;
QLock mutlk;
long nworker;
long epoch;
long lepoch[32];
Bfree *limbo[3];
Syncq syncq[32];
int fd;
long broken;
long rdonly;
int noauth;
/* user list */
RWLock userlk;
User *users;
int nusers;
/* open directory entries */
Lock dtablk;
Dent *dtab[Ndtab];
/* slow block io */
QLock blklk[32];
/* deadlist cache */
Dlist **dlcache;
Dlist *dlhead;
Dlist *dltail;
int dlcount;
int dlcmax;
/* block lru */
QLock lrulk;
Rendez lrurz;
Bucket *bcache;
Blk *chead;
Blk *ctail;
usize ccount;
usize cmax;
RWLock flushq[Nflushtab];
Stats stats;
};
struct Arena {
QLock;
Avltree *free;
Blk **queue;
int nqueue;
Blk *hd; /* arena header */
Blk *tl; /* arena footer */
Blk **q; /* write queue */
vlong nq;
vlong size;
vlong used;
vlong reserve;
/* allocation log */
vlong nlog; /* logged since last copression */
Bptr loghd; /* allocation log */
Blk *logtl; /* end of the log, open for writing */
Syncq *sync;
};
struct Xdir {
/* file data */
uvlong flag; /* storage flags */
Qid qid; /* unique id from server */
ulong mode; /* permissions */
vlong atime; /* last read time: nsec */
vlong mtime; /* last write time: nsec */
uvlong length; /* file length */
int uid; /* owner name */
int gid; /* group name */
int muid; /* last modifier name */
char *name; /* last element of path */
};
struct Dent {
RWLock;
Key;
Xdir;
Dent *next;
vlong up;
long ref;
char gone;
char buf[Maxent];
};
struct Mount {
Lock;
Mount *next;
long ref;
vlong gen;
char *name;
int mutable;
Tree *root; /* EBR protected */
};
struct Conn {
Conn *next;
QLock wrlk;
int rfd;
int wfd;
int iounit;
int versioned;
/* fid hash table */
Lock fidtablk[Nfidtab];
Fid *fidtab[Nfidtab];
};
struct Fid {
Lock;
Fid *next;
/*
* if opened with OEXEC, we want to use a snapshot,
* instead of the most recent root, to prevent
* paging in the wrong executable.
*/
Mount *mnt;
Scan *scan; /* in progres scan */
Dent *dent; /* (pqid, name) ref, modified on rename */
void *auth;
u32int fid;
vlong qpath;
vlong pqpath;
long ref;
int mode;
int iounit;
int uid;
int duid;
int dgid;
int dmode;
};
enum {
POmod,
POrot,
POsplit,
POmerge,
};
struct Scanp {
int bi;
int vi;
Blk *b;
};
struct Scan {
vlong offset; /* last read offset */
char first;
char done;
char overflow;
char present;
int ht;
Kvp kv;
Key pfx;
char kvbuf[Kvmax];
char pfxbuf[Keymax];
Scanp *path;
};
struct Blk {
/* cache entry */
Blk *cnext;
Blk *cprev;
Blk *hnext;
/* Freelist entry */
Blk *fnext;
long flag;
/* serialized to disk in header */
short type; /* @0, for all */
union {
struct {
short nval; /* @2, for Leaf, Pivot: data[0:2] */
short valsz; /* @4, for Leaf, Pivot: data[2:4] */
short nbuf; /* @6, for Pivot */
short bufsz; /* @8, for Pivot */
};
struct {
int logsz; /* @2 for allocation log */
uvlong logh; /* @4 for log body hash */
Bptr logp; /* @12 next deadlist chain */
};
};
/* debug */
uintptr lasthold;
uintptr lastdrop;
uintptr enqueued;
uintptr cached;
uintptr uncached;
uintptr alloced;
uintptr freed;
Bptr bp;
long ref;
char *data;
char buf[Blksz];
vlong magic;
};
struct Chan {
int size; /* size of queue */
long count; /* how many in queue (semaphore) */
long avail; /* how many available to send (semaphore) */
Lock rl, wl; /* circular pointers */
void **rp;
void **wp;
void* args[]; /* list of saved pointers, [->size] */
};