ref: ed3a576e8b103032b659febc5d3c62565c9cf7d7
dir: /sys/src/9/xen/sdxen.c/
/* * Xen block storage device frontend * * The present implementation follows the principle of * "what's the simplest thing that could possibly work?". * We can think about performance later. * We can think about dynamically attaching and removing devices later. */ #include "u.h" #include "../port/lib.h" #include "mem.h" #include "dat.h" #include "fns.h" #include "io.h" #include "ureg.h" #include "../port/error.h" #include "../port/sd.h" #define LOG(a) /* * conversions to machine page numbers, pages and addresses */ #define MFN(pa) (patomfn[(pa)>>PGSHIFT]) #define MFNPG(pa) (MFN(pa)<<PGSHIFT) #define PA2MA(pa) (MFNPG(pa) | PGOFF(pa)) #define VA2MA(va) PA2MA(PADDR(va)) #define VA2MFN(va) MFN(PADDR(va)) enum { Ndevs = 4, MajorDevSD = 0x800, MajorDevHDA = 0x300, MajorDevHDC = 0x1600, MajorDevXVD = 0xCA00, }; extern SDifc sdxenifc; typedef struct Ctlr Ctlr; struct Ctlr { int online; ulong secsize; ulong sectors; int backend; int devid; int evtchn; blkif_front_ring_t ring; int ringref; Lock ringlock; char *frame; QLock iolock; int iodone; Rendez wiodone; }; static int ringinit(Ctlr *ctlr, char *a) { blkif_sring_t *sr; sr = (blkif_sring_t*)a; memset(sr, 0, BY2PG); SHARED_RING_INIT(sr); FRONT_RING_INIT(&ctlr->ring, sr, BY2PG); ctlr->ringref = shareframe(ctlr->backend, sr, 1); return BY2PG; } static int vbdsend(Ctlr *ctlr, int write, int ref, int nb, uvlong bno) { blkif_request_t *req; int i, notify; ilock(&ctlr->ringlock); // XXX conservative i = ctlr->ring.req_prod_pvt; req = RING_GET_REQUEST(&ctlr->ring, i); // XXX overflow? req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ; req->nr_segments = 1; req->handle = ctlr->devid; req->id = 1; req->sector_number = bno; req->seg[0].gref = ref; req->seg[0].first_sect = 0; req->seg[0].last_sect = nb-1; ctlr->ring.req_prod_pvt = i+1; RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&ctlr->ring, notify); iunlock(&ctlr->ringlock); return notify; } static void backendconnect(Ctlr *ctlr) { char dir[64]; char buf[64]; sprint(dir, "device/vbd/%d/", ctlr->devid); xenstore_setd(dir, "ring-ref", ctlr->ringref); xenstore_setd(dir, "event-channel", ctlr->evtchn); xenstore_setd(dir, "state", XenbusStateInitialised); xenstore_gets(dir, "backend", buf, sizeof buf); sprint(dir, "%s/", buf); HYPERVISOR_yield(); xenstore_gets(dir, "state", buf, sizeof buf); while (strtol(buf, 0, 0) != XenbusStateConnected) { print("sdxen: waiting for vbd %d to connect\n", ctlr->devid); tsleep(&up->sleep, return0, 0, 50); xenstore_gets(dir, "state", buf, sizeof buf); } xenstore_gets(dir, "sector-size", buf, sizeof buf); ctlr->secsize = strtol(buf, 0, 0); xenstore_gets(dir, "sectors", buf, sizeof buf); ctlr->sectors = strtol(buf, 0, 0); print("sdxen: backend %s secsize %ld sectors %ld\n", dir, ctlr->secsize, ctlr->sectors); if (ctlr->secsize > BY2PG) panic("sdxen: sector size bigger than mmu page size"); } static void backendactivate(Ctlr *ctlr) { char dir[64]; sprint(dir, "device/vbd/%d/", ctlr->devid); xenstore_setd(dir, "state", XenbusStateConnected); } static SDev* xenpnp(void) { SDev *sdev[Ndevs]; static char idno[Ndevs] = { '0', 'C', 'D', 'E' }; static char nunit[Ndevs] = { 8, 2, 2, 8 }; int i; for (i = 0; i < Ndevs; i++) { sdev[i] = mallocz(sizeof(SDev), 1); sdev[i]->ifc = &sdxenifc; sdev[i]->idno = idno[i]; sdev[i]->nunit = nunit[i]; sdev[i]->ctlr = (Ctlr**)mallocz(sdev[i]->nunit*sizeof(Ctlr*), 1); if (i > 0) sdev[i]->next = sdev[i-1]; } return sdev[Ndevs-1]; } static int linuxdev(int idno, int subno) { switch (idno) { case '0': return MajorDevSD + 16*subno; case 'C': return MajorDevHDA + 64*subno; case 'D': return MajorDevHDC + 64*subno; case 'E': return MajorDevXVD + 16*subno; default: return 0; } } static int xenverify(SDunit *unit) { Ctlr *ctlr; char dir[64]; char buf[64]; int devid; int npage; char *p; if (unit->subno > unit->dev->nunit) return 0; devid = linuxdev(unit->dev->idno, unit->subno); sprint(dir, "device/vbd/%d/", devid); if (xenstore_gets(dir, "backend-id", buf, sizeof buf) <= 0) return 0; ctlr = mallocz(sizeof(Ctlr), 1); ((Ctlr**)unit->dev->ctlr)[unit->subno] = ctlr; ctlr->devid = devid; ctlr->backend = strtol(buf, 0, 0); npage = 2; p = xspanalloc(npage<<PGSHIFT, BY2PG, 0); p += ringinit(ctlr, p); ctlr->frame = p; ctlr->evtchn = xenchanalloc(ctlr->backend); backendconnect(ctlr); unit->inquiry[0] = 0; // XXX how do we know if it's a CD? unit->inquiry[2] = 2; unit->inquiry[3] = 2; unit->inquiry[4] = sizeof(unit->inquiry)-4; strcpy((char*)&unit->inquiry[8], "Xen block device"); return 1; } static int wiodone(void *a) { return ((Ctlr*)a)->iodone != 0; } static void sdxenintr(Ureg *, void *a) { Ctlr *ctlr = a; blkif_response_t *rsp; int i, avail; ilock(&ctlr->ringlock); // XXX conservative for (;;) { RING_FINAL_CHECK_FOR_RESPONSES(&ctlr->ring, avail); if (!avail) break; i = ctlr->ring.rsp_cons; rsp = RING_GET_RESPONSE(&ctlr->ring, i); LOG(dprint("sdxen rsp %llud %d %d\n", rsp->id, rsp->operation, rsp->status);) if (rsp->status == BLKIF_RSP_OKAY) ctlr->iodone = 1; else ctlr->iodone = -1; ctlr->ring.rsp_cons = ++i; } iunlock(&ctlr->ringlock); if (ctlr->iodone) wakeup(&ctlr->wiodone); } static Ctlr *kickctlr; static void kickme(void) { Ctlr *ctlr = kickctlr; shared_info_t *s; if (ctlr) { s = HYPERVISOR_shared_info; dprint("tick %d %d prod %d cons %d pending %x mask %x\n", m->ticks, ctlr->iodone, ctlr->ring.sring->rsp_prod, ctlr->ring.rsp_cons, s->evtchn_pending[0], s->evtchn_mask[0]); sdxenintr(0, ctlr); } } static int xenonline(SDunit *unit) { Ctlr *ctlr; ctlr = ((Ctlr**)unit->dev->ctlr)[unit->subno]; unit->sectors = ctlr->sectors; unit->secsize = ctlr->secsize; if (ctlr->online == 0) { intrenable(ctlr->evtchn, sdxenintr, ctlr, BUSUNKNOWN, "vbd"); //kickctlr = ctlr; //addclock0link(kickme, 10000); backendactivate(ctlr); ctlr->online = 1; } return 1; } static int xenrio(SDreq*) { return -1; } static long xenbio(SDunit* unit, int lun, int write, void* data, long nb, uvlong bno) { Ctlr *ctlr; char *buf; long bcount, len; int ref; int n; USED(lun); // XXX meaningless ctlr = ((Ctlr**)unit->dev->ctlr)[unit->subno]; LOG(("xenbio %c %lux %ld %lld\n", write? 'w' : 'r', (ulong)data, nb, bno);) buf = data; // XXX extra copying & fragmentation could be avoided by // redefining sdmalloc() to get page-aligned buffers if ((ulong)data&(BY2PG-1)) buf = ctlr->frame; bcount = BY2PG/unit->secsize; qlock(&ctlr->iolock); for (n = nb; n > 0; n -= bcount) { ref = shareframe(ctlr->backend, buf, !write); if (bcount > n) bcount = n; len = bcount*unit->secsize; if (write && buf == ctlr->frame) memmove(buf, data, len); ctlr->iodone = 0; if (vbdsend(ctlr, write, ref, bcount, bno)) xenchannotify(ctlr->evtchn); LOG(dprint("sleeping %d prod %d cons %d pending %x mask %x \n", ctlr->iodone, ctlr->ring.sring->rsp_prod, ctlr->ring.rsp_cons, HYPERVISOR_shared_info->evtchn_pending[0], HYPERVISOR_shared_info->evtchn_mask[0]);) sleep(&ctlr->wiodone, wiodone, ctlr); xengrantend(ref); if (ctlr->iodone < 0) { qunlock(&ctlr->iolock); return -1; } if (buf == ctlr->frame) { if (!write) memmove(data, buf, len); data = (char*)data + len; } else buf += len; bno += bcount; } qunlock(&ctlr->iolock); return (nb-n)*unit->secsize; } static void xenclear(SDev *) { } SDifc sdxenifc = { "xen", /* name */ xenpnp, /* pnp */ 0, /* legacy */ 0, /* enable */ 0, /* disable */ xenverify, /* verify */ xenonline, /* online */ xenrio, /* rio */ 0, /* rctl */ 0, /* wctl */ xenbio, /* bio */ 0, /* probe */ xenclear, /* clear */ 0, /* stat */ };