ref: 41d1a883e53baf66918c35cb0e84eab899a16259
dir: /sys/src/cmd/vmx/virtio.c/
#include <u.h> #include <libc.h> #include <thread.h> #include "dat.h" #include "fns.h" #include <ip.h> /* parseether() */ #include <libsec.h> /* genrandom() */ typedef struct VIODev VIODev; typedef struct VIOQueue VIOQueue; typedef struct VIOBuf VIOBuf; typedef struct VIONetDev VIONetDev; typedef struct VIOBlkDev VIOBlkDev; enum { BUFCHAIN = 1, BUFWR = 2, USEDNOIRQ = 1, DRIVEROK = 4, /* devstat */ }; struct VIOBuf { u32int flags; VIOQueue *qu; void *p; u64int addr; u32int len; u32int idx; VIOBuf *next, *head; u32int rptr, wptr; }; struct VIOQueue { QLock; Rendez; VIODev *d; u8int (*desc)[16], *avail, *used; u16int size; u32int addr; u16int availidx, usedidx; void (*notify)(VIOQueue*); int livebuf; Rendez livebufrend; }; struct VIONetDev { int readfd, writefd; u8int mac[6]; enum { VNETPROMISC = 1, VNETALLMULTI = 2, VNETALLUNI = 4, VNETNOMULTI = 8, VNETNOUNI = 16, VNETNOBCAST = 32, VNETHEADER = 1<<31, } flags; u64int macbloom, multibloom; }; struct VIOBlkDev { int fd; uvlong size; }; struct VIODev { PCIDev *pci; u32int devfeat, guestfeat; u16int qsel; u8int devstat, isrstat; VIOQueue *qu; int nqu, allocqu; u32int (*io)(int, u16int, u32int, int, VIODev *); void (*reset)(VIODev *); union { VIONetDev net; VIOBlkDev blk; }; }; static void vioirq_(void *arg) { VIODev *d; int val; d = ((void**)arg)[0]; val = (uintptr)((void**)arg)[1]; if(val != 0) d->isrstat |= val; else d->isrstat = 0; pciirq(d->pci, d->isrstat); free(arg); } static void vioirq(VIODev *d, int val) { void **v; assert(d != nil); v = emalloc(sizeof(void*)*2); v[0] = d; v[1] = (void *) val; sendnotif(vioirq_, v); } static void * checkdesc(VIOQueue *q, int i) { if(i >= q->size){ vmerror("virtio device %#x: invalid next pointer %d in queue (size %d), ignoring descriptor", q->d->pci->bdf, i, q->size); return nil; } return q->desc[i]; } VIOBuf * viogetbuf(VIOQueue *q, int wait) { u16int gidx; VIOBuf *b, *rb, **bp; void *dp; qlock(q); waitloop: while((q->d->devstat & DRIVEROK) == 0 || q->desc == nil || (gidx = GET16(q->avail, 2), gidx == q->availidx)){ if(!wait){ qunlock(q); return nil; } rsleep(q); } dp = checkdesc(q, GET16(q->avail, 4 + 2 * (q->availidx % q->size))); rb = nil; bp = &rb; for(;;){ b = emalloc(sizeof(VIOBuf)); b->qu = q; b->idx = (u8int(*)[16])dp - q->desc; b->addr = GET64(dp, 0); b->len = GET32(dp, 8); b->flags = GET16(dp, 12); b->p = gptr(b->addr, b->len); if(b->p == nil){ vmerror("virtio device %#x: invalid buffer pointer %#p in queue, ignoring descriptor", q->d->pci->bdf, (void*)b->addr); free(b); break; } *bp = b; b->head = rb; bp = &b->next; if((b->flags & BUFCHAIN) == 0) break; dp = checkdesc(q, GET16(dp, 14)); if(dp == nil) break; } q->availidx++; if(rb == nil) goto waitloop; q->livebuf++; qunlock(q); return rb; } void vioputbuf(VIOBuf *b) { VIOBuf *bn; VIOQueue *q; u8int *p; if(b == nil) return; q = b->qu; qlock(q); if((q->d->devstat & DRIVEROK) == 0){ qunlock(q); goto end; } if(q->used == nil) vmerror("virtio device %#x: address was set to an invalid value while holding buffer", q->d->pci->bdf); else{ p = q->used + 4 + 8 * (q->usedidx % q->size); PUT32(p, 4, b->wptr); PUT32(p, 0, b->idx); PUT16(q->used, 2, ++q->usedidx); } if(--q->livebuf <= 0) rwakeup(&q->livebufrend); qunlock(q); if(q->avail != nil && (GET16(q->avail, 0) & USEDNOIRQ) == 0) vioirq(q->d, 1); end: while(b != nil){ bn = b->next; free(b); b = bn; } } ulong vioqread(VIOBuf *b, void *v, ulong n) { VIOBuf *c; u32int p; int rc; ulong m; p = b->rptr; c = b; rc = 0; for(;;){ if(rc >= n) return rc; for(;;){ if(c == nil) return rc; if((c->flags & BUFWR) == 0){ if(p < c->len) break; p -= c->len; } c = c->next; } m = c->len - p; if(m > n - rc) m = n - rc; memmove(v, (u8int*)c->p + p, m); p += m, rc += m; v = (u8int*)v + m; b->rptr += m; } } ulong vioqwrite(VIOBuf *b, void *v, ulong n) { VIOBuf *c; u32int p; int rc; ulong m; p = b->wptr; c = b; rc = 0; for(;;){ if(rc >= n) return rc; for(;;){ if(c == nil) return rc; if((c->flags & BUFWR) != 0){ if(p < c->len) break; p -= c->len; } c = c->next; } m = c->len - p; if(m > n - rc) m = n - rc; memmove((u8int*)c->p + p, v, m); p += m, rc += m; v = (u8int*)v + m; b->wptr += m; } } ulong vioqrem(VIOBuf *b, int wr) { VIOBuf *c; u32int p; ulong rc; p = wr ? b->wptr : b->rptr; for(c = b;; c = c->next){ if(c == nil) return 0; if(((c->flags & BUFWR) != 0) == wr){ if(p < c->len) break; p -= c->len; } } rc = c->len - p; for(c = c->next; c != nil; c = c->next) if(((c->flags & BUFWR) != 0) == wr) rc += c->len; return rc; } static void vioqaddrset(VIOQueue *q, u64int addr) { void *p; int sz1, sz; addr <<= 12; sz1 = -(-(18 * q->size + 4) & -4096); sz = sz1 + (-(-(8 * q->size + 6) & -4096)); p = gptr(addr, sz); if(p == nil) vmerror("virtio device %#x: attempt to set queue to invalid address %#p", q->d->pci->bdf, (void *) addr); qlock(q); q->addr = addr; if(p == nil){ q->desc = nil; q->avail = nil; q->used = nil; }else{ q->desc = p; q->avail = (u8int*)p + 16 * q->size; q->used = (u8int*)p + sz1; rwakeupall(q); } qunlock(q); } static void vioqreset(VIOQueue *q) { q->desc = nil; q->avail = nil; q->used = nil; q->addr = 0; q->availidx = 0; q->usedidx = 0; } static void viodevstatset(VIODev *v, u32int val) { int i; v->devstat = val; if(val == 0){ if(v->reset != nil) v->reset(v); v->guestfeat = 0; vioirq(v, 0); for(i = 0; i < v->nqu; i++){ qlock(&v->qu[i]); while(v->qu[i].livebuf > 0) rsleep(&v->qu[i].livebufrend); vioqreset(&v->qu[i]); qunlock(&v->qu[i]); } }else{ for(i = 0; i < v->nqu; i++) v->qu[i].notify(&v->qu[i]); } } u32int vioio(int isin, u16int port, u32int val, int sz, void *vp) { VIODev *v; int rc; static char whinebuf[32]; v = vp; switch(isin << 16 | port){ case 0x4: v->guestfeat = val; return 0; case 0x8: if(v->qsel < v->nqu) vioqaddrset(&v->qu[v->qsel], val); return 0; case 0xe: v->qsel = val; return 0; case 0x10: if(val < v->nqu) v->qu[val].notify(&v->qu[val]); return 0; case 0x12: viodevstatset(v, val); return 0; case 0x10000: return v->devfeat; case 0x10004: return v->guestfeat; case 0x10008: return v->qsel >= v->nqu ? 0 : v->qu[v->qsel].addr; case 0x1000c: return v->qsel >= v->nqu ? 0 : v->qu[v->qsel].size; case 0x1000e: return v->qsel; case 0x10010: return 0; case 0x10012: return v->devstat; case 0x10013: rc = v->isrstat; vioirq(v, 0); return rc; } if(port >= 20 && v->io != nil) return v->io(isin, port - 20, val, sz, v); snprint(whinebuf, sizeof(whinebuf), "virtio device %6x", v->pci->bdf); return iowhine(isin, port, val, sz, whinebuf); } VIODev * mkviodev(u16int devid, u32int pciclass, u32int subid, int queues) { VIODev *d; d = emalloc(sizeof(VIODev)); d->pci = mkpcidev(allocbdf(), devid << 16 | 0x1AF4, pciclass << 8, 1); d->pci->subid = subid << 16; mkpcibar(d->pci, BARIO, 0, 256, vioio, d); d->qu = emalloc(queues * sizeof(VIOQueue)); d->allocqu = queues; return d; } static void viowakeup(VIOQueue *q) { qlock(q); rwakeupall(q); qunlock(q); } VIOQueue * mkvioqueue(VIODev *d, int sz, void (*fn)(VIOQueue*)) { VIOQueue *q; assert(sz > 0 && sz <= 32768 && (sz & sz - 1) == 0 && fn != nil && d->nqu < d->allocqu); q = d->qu + d->nqu++; q->Rendez.l = q; q->livebufrend.l = q; q->size = sz; q->d = d; q->notify = fn; vioqreset(q); return q; } int bloomhash(u8int *mac) { int x; x = mac[0]; x ^= mac[0] >> 6 ^ mac[1] << 2; x ^= mac[1] >> 4 ^ mac[2] << 4; x ^= mac[2] >> 2; x ^= mac[3]; x ^= mac[3] >> 6 ^ mac[4] << 2; x ^= mac[4] >> 4 ^ mac[5] << 4; x ^= mac[5] >> 2; return x & 63; } int viomacok(VIODev *d, u8int *mac) { static u8int bcast[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; if((d->net.flags & VNETPROMISC) != 0) return 1; if((mac[0] & 1) == 0){ if((d->net.flags & (VNETNOUNI|VNETALLUNI)) != 0) return (d->net.flags & VNETNOUNI) == 0; if(memcmp(mac, d->net.mac, 6) == 0) return 1; if(d->net.macbloom == 0) return 0; return d->net.macbloom >> bloomhash(mac) & 1; }else if(memcmp(mac, bcast, 6) == 0) return (d->net.flags & VNETNOBCAST) == 0; else{ if((d->net.flags & (VNETNOMULTI|VNETALLMULTI)) != 0) return (d->net.flags & VNETNOMULTI) == 0; if(d->net.multibloom == 0) return 0; return d->net.multibloom >> bloomhash(mac) & 1; } } void vionetrproc(void *vp) { VIODev *v; VIOQueue *q; VIOBuf *vb; uchar rxhead[10]; uchar rxbuf[1600]; int rc; threadsetname("vionetrproc"); v = vp; q = &v->qu[0]; memset(rxhead, 0, sizeof(rxhead)); for(;;){ rc = read(v->net.readfd, rxbuf, sizeof(rxbuf)); if(rc == 0){ vmerror("read(vionetrproc): eof"); threadexits("read: eof"); } if(rc < 0){ vmerror("read(vionetrproc): %r"); threadexits("read: %r"); } if(rc < 14){ vmerror("vionetrproc: short packet received (len=%d)", rc); continue; } if(!viomacok(v, rxbuf)) continue; vb = viogetbuf(q, 1); if(vb == nil){ vmerror("viogetbuf: %r"); continue; } vioqwrite(vb, rxhead, sizeof(rxhead)); vioqwrite(vb, rxbuf, rc); vioputbuf(vb); } } void vionetwproc(void *vp) { VIODev *v; VIOQueue *q; VIOBuf *vb; uchar txhead[10]; uchar txbuf[1610]; int rc, len; uvlong ns; threadsetname("vionetwproc"); v = vp; q = &v->qu[1]; for(;;){ vb = viogetbuf(q, 1); if(vb == nil){ vmerror("viogetbuf: %r"); threadexits("viogetbuf: %r"); } vioqread(vb, txhead, sizeof(txhead)); len = vioqread(vb, txbuf+10, sizeof(txbuf)-10); if(len == sizeof(txbuf)-10){ vmerror("virtio net: ignoring excessively long packet"); vioputbuf(vb); continue; } if(len < 14){ /* openbsd ends up sending lots of zero length packets sometimes */ if(len != 0) vmerror("virtio net: ignoring short packet (length=%d)", len); vioputbuf(vb); continue; }else if(len < 60){ /* openbsd doesn't seem to know about ethernet minimum packet lengths either */ memset(txbuf + 10 + len, 0, 60 - len); len = 60; } if((v->net.flags & VNETHEADER) != 0){ txbuf[0] = len >> 8; txbuf[1] = len; ns = nanosec(); txbuf[2] = ns >> 56; txbuf[3] = ns >> 48; txbuf[4] = ns >> 40; txbuf[5] = ns >> 32; txbuf[6] = ns >> 24; txbuf[7] = ns >> 16; txbuf[8] = ns >> 8; txbuf[9] = ns; rc = write(v->net.writefd, txbuf, len + 10); }else rc = write(v->net.writefd, txbuf + 10, len); vioputbuf(vb); if(rc < 0){ vmerror("write(vionetwproc): %r"); continue; } if(rc < len){ vmerror("write(vionetwproc): incomplete write (%d < %d)", rc, len); continue; } } } u32int vionetio(int isin, u16int port, u32int val, int sz, VIODev *v) { switch(isin << 16 | port){ case 0x10000: case 0x10001: case 0x10002: case 0x10003: return GET32(v->net.mac, 0) >> (port & 3) * 8; case 0x10004: case 0x10005: case 0x10006: case 0x10007: return (GET16(v->net.mac, 4) | 1 << 16) >> (port & 3) * 8; } return iowhine(isin, port, val, sz, "virtio net"); } int vionettables(VIODev *d, VIOBuf *b) { u8int buf[4]; u8int mac[6]; u64int bloom[2]; int i, l; bloom[0] = 0; bloom[1] = 0; for(i = 0; i < 2; i++){ if(vioqread(b, buf, 4) < 4) return 1; l = GET32(buf, 0); while(l--){ if(vioqread(b, mac, 6) < 6) return 1; bloom[i] |= 1ULL<<bloomhash(mac); } } d->net.macbloom = bloom[0]; d->net.multibloom = bloom[1]; return 0; } void vionetcmd(VIOQueue *q) { VIODev *d; VIOBuf *b; u8int cmd[2], buf[6]; u8int ack; int fl; d = q->d; for(; b = viogetbuf(q, 0), b != nil; vioputbuf(b)){ if(vioqread(b, cmd, 2) < 2){ ack = 1; vioqwrite(b, &ack, 1); continue; } ack = 0; switch(cmd[0] << 8 | cmd[1]){ case 0x0000: fl = VNETPROMISC; goto flag; case 0x0001: fl = VNETALLMULTI; goto flag; case 0x0002: fl = VNETALLUNI; goto flag; case 0x0003: fl = VNETNOMULTI; goto flag; case 0x0004: fl = VNETNOUNI; goto flag; case 0x0005: fl = VNETNOBCAST; goto flag; flag: if(vioqread(b, buf, 1) < 1) ack = 1; else if(buf[0] == 1) d->net.flags |= fl; else if(buf[0] == 0) d->net.flags &= ~fl; else ack = 1; break; case 0x0100: /* MAC_TABLE_SET */ ack = vionettables(d, b); break; case 0x0101: /* MAC_ADDR_SET */ if(vioqread(b, buf, 6) < 6) ack = 1; else memmove(d->net.mac, buf, 6); break; default: ack = 1; } vioqwrite(b, &ack, 1); } } void vionetreset(VIODev *d) { d->net.flags &= VNETHEADER; d->net.macbloom = 0; d->net.multibloom = 0; } int mkvionet(char *net) { int fd, cfd; VIODev *d; char *ea; int flags; enum { VNETFILE = 1 }; ea = nil; flags = 0; for(;;){ if(strncmp(net, "hdr!", 4) == 0){ net += 4; flags |= VNETHEADER; }else if(strncmp(net, "file!", 5) == 0){ net += 5; flags |= VNETFILE; }else if(strncmp(net, "ea:", 3) == 0){ net = strchr(ea = net+3, '!'); if(net++ == nil){ werrstr("missing: !"); return -1; } }else break; } if((flags & VNETFILE) != 0){ flags &= ~VNETFILE; fd = open(net, ORDWR); if(fd < 0) return -1; }else{ fd = dial(netmkaddr("-1", net, nil), nil, nil, &cfd); if(fd < 0) return -1; if(cfd >= 0) { write(cfd, "promiscuous", 11); write(cfd, "bridge", 6); } } d = mkviodev(0x1000, 0x020000, 1, 3); mkvioqueue(d, 1024, viowakeup); mkvioqueue(d, 1024, viowakeup); mkvioqueue(d, 32, vionetcmd); if(ea == nil || parseether(d->net.mac, ea)){ genrandom(d->net.mac, 6); d->net.mac[0] = d->net.mac[0] & ~1 | 2; } d->net.flags = flags; d->devfeat = 1<<5|1<<16|1<<17|1<<18|1<<20; d->io = vionetio; d->reset = vionetreset; d->net.readfd = d->net.writefd = fd; proccreate(vionetrproc, d, 8192); proccreate(vionetwproc, d, 8192); return 0; } u32int vioblkio(int isin, u16int port, u32int val, int sz, VIODev *v) { switch(isin << 16 | port){ case 0x10000: case 0x10001: case 0x10002: case 0x10003: return (u32int)v->blk.size >> (port & 3) * 8; case 0x10004: case 0x10005: case 0x10006: case 0x10007: return (u32int)(v->blk.size >> 32) >> (port & 3) * 8; } return iowhine(isin, port, val, sz, "virtio blk"); } void vioblkproc(void *vp) { VIODev *v; VIOQueue *q; VIOBuf *b; u8int cmd[16]; u8int ack; char buf[8192]; uvlong addr; int rc, n, m; threadsetname("vioblkproc"); v = vp; q = &v->qu[0]; for(;;){ b = viogetbuf(q, 1); if(b == nil){ vmerror("vioblkproc: viogetbuf: %r"); threadexits("vioblkproc: viogetbuf: %r"); } ack = 0; if(vioqread(b, cmd, sizeof(cmd)) < sizeof(cmd)) goto nope; addr = GET64(cmd, 8); switch(GET32(cmd, 0)){ case 0: n = vioqrem(b, 1) - 1; if(n < 0 || addr * 512 + n > v->blk.size * 512){ ack = 1; break; } seek(v->blk.fd, addr << 9, 0); for(; n > 0; n -= rc){ rc = sizeof(buf); if(n < rc) rc = n; rc = read(v->blk.fd, buf, rc); if(rc < 0) vmerror("read(vioblkproc): %r"); if(rc <= 0){ ack = 1; break; } vioqwrite(b, buf, rc); } break; case 1: n = vioqrem(b, 0); if(addr * 512 + n > v->blk.size * 512){ ack = 1; break; } seek(v->blk.fd, addr << 9, 0); for(; n > 0; n -= rc){ m = vioqread(b, buf, sizeof(buf)); rc = write(v->blk.fd, buf, m); if(rc < 0) vmerror("write(vioblkproc): %r"); if(rc < m){ ack = 1; break; } } break; default: nope: ack = 2; } vioqwrite(b, &ack, 1); vioputbuf(b); } } int mkvioblk(char *fn) { int fd; VIODev *d; fd = open(fn, ORDWR); if(fd < 0) return -1; d = mkviodev(0x1001, 0x018000, 2, 1); mkvioqueue(d, 32, viowakeup); d->io = vioblkio; d->blk.fd = fd; d->blk.size = seek(fd, 0, 2) >> 9; proccreate(vioblkproc, d, 16384); return 0; }