shithub: riscv

Download patch

ref: 1fe69754b6e51dbdcbcc7846e870386a6cf2a6e6
parent: e584f7f374b861bb31d3b5d92646542adb052cf4
author: mischief <mischief@offblast.org>
date: Sat Dec 6 17:13:44 EST 2014

pc, pc64: add and enable ethervirtio driver

many thanks to cinap_lenrek, erik quanstrom and david du colombier for feedback and testing.

--- /dev/null
+++ b/sys/src/9/pc/ethervirtio.c
@@ -1,0 +1,626 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+#include "../port/netif.h"
+#include "etherif.h"
+
+/*
+ * virtio ethernet driver
+ * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
+ *
+ * TODO
+ *
+ * implement control queue
+ */
+
+typedef struct Vring Vring;
+typedef struct Vdesc Vdesc;
+typedef struct Vused Vused;
+typedef struct Vheader Vheader;
+typedef struct Vqueue Vqueue;
+typedef struct Ctlr Ctlr;
+
+enum {
+	/* §2.1 Device Status Field */
+	Sacknowledge = 1,
+	Sdriver = 2,
+	Sdriverok = 4,
+	Sfeatureok = 8,
+	Sfailed = 128,
+
+	/* §4.1.4.8 Legacy Interfaces: A Note on PCI Device Layout */
+	Qdevfeat = 0,
+	Qdrvfeat = 4,
+	Qaddr = 8,
+	Qsize = 12,
+	Qselect = 14,
+	Qnotify = 16,
+	Qstatus = 18,
+	Qisr = 19,
+	Qmac = 20,
+	Qnetstatus = 26,
+
+	/* flags in Qnetstatus */
+	Nlinkup = (1<<0),
+	Nannounce = (1<<1),
+
+	/* feature bits */
+	Fmac = (1<<5),
+	Fstatus = (1<<16),
+	Fctrlvq = (1<<17),
+
+	/* vring used flags */
+	Unonotify = 1,
+	/* vring avail flags */
+	Rnointerrupt = 1,
+
+	/* descriptor flags */
+	Dnext = 1,
+	Dwrite = 2,
+	Dindirect = 4,
+
+	/* struct sizes */
+	VringSize = 4,
+	VdescSize = 16,
+	VusedSize = 8,
+	VheaderSize = 10,
+
+	/* §4.1.5.1.4.1 says pages are 4096 bytes
+	 * for the purposes of the driver.
+	 */
+	VBY2PG	= 4096,
+#define VPGROUND(s)	ROUND(s, VBY2PG)
+
+	Vrxq	= 0,
+	Vtxq	= 1,
+	Vctlq	= 2,
+};
+
+struct Vring
+{
+	u16int	flags;
+	u16int	idx;
+};
+
+struct Vdesc
+{
+	u64int	addr;
+	u32int	len;
+	u16int	flags;
+	u16int	next;
+};
+
+struct Vused
+{
+	u32int	id;
+	u32int	len;
+};
+
+struct Vheader
+{
+	u8int	flags;
+	u8int	segtype;
+	u16int	hlen;
+	u16int	seglen;
+	u16int	csumstart;
+	u16int	csumend;
+};
+
+/* §2.4 Virtqueues */
+struct Vqueue
+{
+	Rendez;
+
+	uint	qsize;
+	uint	qmask;
+
+	Vdesc	*desc;
+
+	Vring	*avail;
+	u16int	*availent;
+	u16int	*availevent;
+
+	Vring	*used;
+	Vused	*usedent;
+	u16int	*usedevent;
+	u16int	lastused;
+
+	Vheader *header;
+	Block	**block;
+};
+
+struct Ctlr {
+	Lock;
+
+	int		attached;
+
+	int		port;
+	Pcidev*	pcidev;
+	Ctlr*	next;
+	int		active;
+	int		id;
+	int		typ;
+	ulong	feat;
+	int		nqueue;
+
+	/* virtioether has 3 queues: rx, tx and ctl */
+	Vqueue	*queue[3];
+
+	/* MAC address */
+	uchar	ea[Eaddrlen];
+};
+
+static Ctlr *ctlrhead;
+
+static int
+vhasroom(void *v)
+{
+	Vqueue *q = v;
+	return q->lastused != q->used->idx;
+}
+
+static void
+txproc(void *v)
+{
+	Ether *edev;
+	Ctlr *ctlr;
+	Vqueue *q;
+	Vused *u;
+	Block *b;
+	int i, j;
+
+	edev = v;
+	ctlr = edev->ctlr;
+	q = ctlr->queue[Vtxq];
+
+	while(waserror())
+		;
+
+	for(i = 0; i < q->qsize/2; i++){
+		j = i << 1;
+		q->desc[j].addr = PADDR(q->header);
+		q->desc[j].len = VheaderSize;
+		q->desc[j].next = j | 1;
+		q->desc[j].flags = Dnext;
+
+		q->availent[i] = q->availent[i + q->qsize/2] = j;
+
+		j |= 1;
+		q->desc[j].next = 0;
+		q->desc[j].flags = 0;
+	}
+
+	q->used->flags &= ~Rnointerrupt;
+
+	while((b = qbread(edev->oq, 1000000)) != nil){
+		i = q->avail->idx & (q->qmask >> 1);
+		if(q->block[i] == nil) {
+			/* slot free, fill in descriptor */
+			q->block[i] = b;
+			j = (i << 1) | 1;
+			q->desc[j].addr = PADDR(b->rp);
+			q->desc[j].len = BLEN(b);
+			coherence();
+			q->avail->idx++;
+			outs(ctlr->port+Qnotify, Vtxq);
+		} else {
+			/* transmit ring is full */
+			freeb(b);
+			if(!vhasroom(q))
+				sleep(q, vhasroom, q);
+		}
+
+		/* free completed packets */
+		while((i = q->lastused) != q->used->idx){
+			u = &q->usedent[i & q->qmask];
+			i = (u->id & q->qmask) >> 1;
+			if((b = q->block[i]) == nil)
+				break;
+			q->block[i] = nil;
+			freeb(b);
+			q->lastused++;
+		}
+	}
+
+	pexit("ether out queue closed", 1);
+}
+
+static void
+rxproc(void *v)
+{
+	Ether *edev;
+	Ctlr *ctlr;
+	Vqueue *q;
+	Vused *u;
+	Block *b;
+	int i, j;
+
+	edev = v;
+	ctlr = edev->ctlr;
+	q = ctlr->queue[Vrxq];
+
+	while(waserror())
+		;
+
+	for(i = 0; i < q->qsize/2; i++){
+		j = i << 1;
+		q->desc[j].addr = PADDR(q->header);
+		q->desc[j].len = VheaderSize;
+		q->desc[j].next = j | 1;
+		q->desc[j].flags = Dwrite|Dnext;
+
+		q->availent[i] = q->availent[i + q->qsize/2] = j;
+
+		j |= 1;
+		q->desc[j].next = 0;
+		q->desc[j].flags = Dwrite;
+	}
+
+	q->used->flags &= ~Rnointerrupt;
+
+	for(;;){
+		/* replenish receive ring */
+		do {
+			i = q->avail->idx & (q->qmask >> 1);
+			if(q->block[i] != nil)
+				break;
+			if((b = iallocb(ETHERMAXTU)) == nil)
+				break;
+			q->block[i] = b;
+			j = (i << 1) | 1;
+			q->desc[j].addr = PADDR(b->rp);
+			q->desc[j].len = BALLOC(b);
+			coherence();
+			q->avail->idx++;
+			outs(ctlr->port+Qnotify, Vrxq);
+		} while(q->avail->idx != q->used->idx);
+
+		/* wait for any packets to complete */
+		if(!vhasroom(q))
+			sleep(q, vhasroom, q);
+
+		/* retire completed packets */
+		while((i = q->lastused) != q->used->idx) {
+			u = &q->usedent[i & q->qmask];
+			i = (u->id & q->qmask) >> 1;
+			if((b = q->block[i]) == nil)
+				break;
+
+			q->block[i] = nil;
+
+			b->wp = b->rp + u->len;
+			etheriq(edev, b, 1);
+			q->lastused++;
+		}
+	}
+}
+
+static void
+interrupt(Ureg*, void* arg)
+{
+	Ether *edev;
+	Ctlr* ctlr;
+	Vqueue *q;
+
+	edev = arg;
+	ctlr = edev->ctlr;
+
+	if(inb(ctlr->port+Qisr) & 1){
+		if(vhasroom(q = ctlr->queue[Vtxq]))
+			wakeup(q);
+		if(vhasroom(q = ctlr->queue[Vrxq]))
+			wakeup(q);
+	}
+}
+
+static void
+attach(Ether* edev)
+{
+	char name[KNAMELEN];
+	Ctlr* ctlr;
+
+	ctlr = edev->ctlr;
+
+	lock(ctlr);
+	if(!ctlr->attached){
+		ctlr->attached = 1;
+
+		/* start kprocs */
+		snprint(name, sizeof name, "#l%drx", edev->ctlrno);
+		kproc(name, rxproc, edev);
+		snprint(name, sizeof name, "#l%dtx", edev->ctlrno);
+		kproc(name, txproc, edev);
+
+		/* ready to go */
+		outb(ctlr->port+Qstatus, inb(ctlr->port+Qstatus) | Sdriverok);
+	}
+
+	unlock(ctlr);
+}
+
+static long
+ifstat(Ether *edev, void *a, long n, ulong offset)
+{
+	int i, l;
+	char *p;
+	Ctlr *ctlr;
+	Vqueue *q;
+
+	ctlr = edev->ctlr;
+
+	p = smalloc(READSTR);
+
+	l = snprint(p, READSTR, "devfeat %32.32lub\n", ctlr->feat);
+	l += snprint(p+l, READSTR-l, "drvfeat %32.32lub\n", inl(ctlr->port+Qdrvfeat));
+	l += snprint(p+l, READSTR-l, "devstatus %8.8ub\n", inb(ctlr->port+Qstatus));
+	l += snprint(p+l, READSTR-l, "isr %8.8ub\n",  inb(ctlr->port+Qisr));
+	l += snprint(p+l, READSTR-l, "netstatus %8.8ub\n",  inb(ctlr->port+Qnetstatus));
+
+	for(i = 0; i < ctlr->nqueue; i++){
+		q = ctlr->queue[i];
+		l += snprint(p+l, READSTR-l, "vq%d %#p size %d avail->idx %d used->idx %d lastused %hud\n",
+			i, q, q->qsize, q->avail->idx, q->used->idx, q->lastused);
+	}
+
+	n = readstr(offset, a, n, p);
+	free(p);
+
+	return n;
+}
+
+/* XXX: not done */
+static long
+ctl(Ether *, void *, long)
+{
+	return 0;
+}
+
+/* XXX: not done */
+static void
+promiscuous(void *v, int on)
+{
+	Ether *edev;
+	Ctlr *ctlr;
+
+	edev = v;
+	ctlr = edev->ctlr;
+
+	USED(ctlr, on);
+}
+
+/* XXX: not done */
+static void
+shutdown(Ether* ether)
+{
+	Ctlr *ctlr;
+
+	ctlr = (Ctlr*) ether;
+
+	outb(ctlr->port+Qstatus, 0);
+}
+
+/* XXX: not done */
+static void
+multicast(void *arg, uchar*, int)
+{
+	Ether *edev;
+	Ctlr *ctlr;
+
+	edev = arg;
+	ctlr = edev->ctlr;
+
+	USED(ctlr);
+}
+
+/* §2.4.2 Legacy Interfaces: A Note on Virtqueue Layout */
+static ulong
+queuesize(ulong size)
+{
+	return VPGROUND(VdescSize*size + sizeof(u16int)*(3+size))
+		+ VPGROUND(sizeof(u16int)*3 + VusedSize*size);
+}
+
+static Vqueue*
+mkqueue(int size)
+{
+	Vqueue *q;
+	uchar *p;
+
+	/* §2.4: Queue Size value is always a power of 2 and <= 32768 */
+	assert(!(size & (size - 1)) && size <= 32768);
+
+	q = mallocz(sizeof(Vqueue), 1);
+	p = mallocalign(queuesize(size), VBY2PG, 0, 0);
+	if(p == nil || q == nil){
+		print("ethervirtio: no memory for Vqueue\n");
+		free(p);
+		free(q);
+		return nil;
+	}
+
+	q->desc = (void*)p;
+	p += VdescSize*size;
+	q->avail = (void*)p;
+	p += VringSize;
+	q->availent = (void*)p;
+	p += sizeof(u16int)*size;
+	q->availevent = (void*)p;
+	p += sizeof(u16int);
+
+	p = (uchar*)VPGROUND((uintptr)p);
+	q->used = (void*)p;
+	p += VringSize;
+	q->usedent = (void*)p;
+	p += VusedSize*size;
+	q->usedevent = (void*)p;
+
+	q->qsize = size;
+	q->qmask = q->qsize - 1;
+
+	q->lastused = q->avail->idx = q->used->idx = 0;
+
+	q->block = mallocz(sizeof(Block*) * size, 1);
+	q->header = mallocz(VheaderSize, 1);
+
+	/* disable interrupts
+	 * virtio spec says we still get interrupts if
+	 * VnotifyEmpty is set in Drvfeat */
+	q->used->flags |= Rnointerrupt;
+
+	return q;
+}
+
+static Ctlr*
+pciprobe(int typ)
+{
+	Ctlr *c, *h, *t;
+	Pcidev *p;
+	int n, i;
+
+	h = t = nil;
+
+	/* §4.1.2 PCI Device Discovery */
+	for(p = nil; p = pcimatch(p, 0, 0);){
+		if(p->vid != 0x1AF4)
+			continue;
+		/* the two possible DIDs for virtio-net
+		if(p->did != 0x1000 && p->did != 0x1041)
+			continue;
+		/* non-transitional devices will have a revision > 0 */
+		if(p->rid != 0)
+			continue;
+		/* non-transitional device will have typ+0x40 */
+		if(pcicfgr16(p, 0x2E) != typ)
+			continue;
+		if((c = malloc(sizeof(Ctlr))) == nil){
+			print("ethervirtio: no memory for Ctlr\n");
+			break;
+		}
+
+		c->port = p->mem[0].bar & ~0x1;
+
+		if(ioalloc(c->port, p->mem[0].size, 0, "ethervirtio") < 0){
+			print("ethervirtio: port %ux in use\n", c->port);
+			free(c);
+			continue;
+		}
+
+		c->typ = typ;
+		c->pcidev = p;
+		c->id = (p->did<<16)|p->vid;
+
+		/* §3.1.2 Legacy Device Initialization */
+		outb(c->port+Qstatus, 0);
+
+		outb(c->port+Qstatus, Sacknowledge|Sdriver);
+
+		c->feat = inl(c->port+Qdevfeat);
+
+		if((c->feat & (Fmac|Fstatus|Fctrlvq)) != (Fmac|Fstatus|Fctrlvq)){
+			print("ethervirtio: feature mismatch %32.32lub\n", c->feat);
+			outb(c->port+Qstatus, Sfailed);
+			iofree(c->port);
+			free(c);
+			continue;
+		}
+
+		outl(c->port+Qdrvfeat, Fmac|Fstatus|Fctrlvq);
+
+		/* part of the 1.0 spec, not used in legacy */
+		/*
+		outb(vd->port+Status, inb(vd->port+Status) | FeatureOk);
+		i = inb(vd->port+Status);
+		if(!(i & FeatureOk)){
+			print("ethervirtio: feature mismatch %32.32lub\n", vd->feat);
+			outb(vd->port+Status, Failed);
+			iofree(vd->port);
+			free(vd);
+			continue;
+		}
+		*/
+
+		/* §4.1.5.1.4 Virtqueue Configuration */
+		for(i=0; i<nelem(c->queue); i++){
+			outs(c->port+Qselect, i);
+			n = ins(c->port+Qsize);
+			if(n == 0 || (n & (n-1)) != 0){
+				c->queue[i] = nil;
+				break;
+			}
+			if((c->queue[i] = mkqueue(n)) == nil)
+				break;
+			coherence();
+			outl(c->port+Qaddr, PADDR(c->queue[i]->desc)/VBY2PG);
+		}
+		c->nqueue = i;
+	
+		/* read virtio mac */
+		for(i = 0; i < Eaddrlen; i++)
+			c->ea[i] = inb(c->port+Qmac+i);
+
+		if(h == nil)
+			h = c;
+		else
+			t->next = c;
+		t = c;
+	}
+
+	return h;
+}
+
+
+static int
+reset(Ether* edev)
+{
+	Ctlr *ctlr;
+
+	if(ctlrhead == nil) {
+		ctlrhead = pciprobe(1);
+	}
+
+	for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){
+		if(ctlr->active)
+			continue;
+		if(edev->port == 0 || edev->port == ctlr->port){
+			ctlr->active = 1;
+			break;
+		}
+	}
+
+	if(ctlr == nil)
+		return -1;
+
+	edev->ctlr = ctlr;
+	edev->port = ctlr->port;
+	edev->irq = ctlr->pcidev->intl;
+	edev->tbdf = ctlr->pcidev->tbdf;
+	edev->mbps = 1000;
+	edev->link = 1;
+
+	memmove(edev->ea, ctlr->ea, Eaddrlen);
+
+	edev->arg = edev;
+
+	edev->attach = attach;
+	edev->shutdown = shutdown;
+
+	edev->interrupt = interrupt;
+
+	edev->ifstat = ifstat;
+	edev->ctl = ctl;
+	edev->promiscuous = promiscuous;
+	edev->multicast = multicast;
+
+	return 0;
+}
+
+void
+ethervirtiolink(void)
+{
+	addethercard("ethervirtio", reset);
+}
+
--- a/sys/src/9/pc/pcf
+++ b/sys/src/9/pc/pcf
@@ -71,6 +71,7 @@
 	etherwavelan	wavelan devi82365 cis pci
 	etheriwl	pci wifi
 	etherrt2860	pci wifi
+	ethervirtio	pci
 	ethermedium
 	pcmciamodem
 	netdevmedium
--- a/sys/src/9/pc64/pc64
+++ b/sys/src/9/pc64/pc64
@@ -69,6 +69,7 @@
 #	etherwavelan	wavelan devi82365 cis pci
 	etheriwl	pci wifi
 #	etherrt2860	pci wifi
+	ethervirtio	pci
 	ethermedium
 #	pcmciamodem
 	netdevmedium