shithub: riscv

Download patch

ref: d5e55f51407fe5b8516f6488f5813d026d05fd56
parent: 8029c3d8c4bdae74b9a68beeff1edb8a21cceeed
author: aiju <devnull@localhost>
date: Tue Jun 13 10:18:14 EDT 2017

add vmx(1)

diff: cannot open b/sys/src/cmd/vmx//null: file does not exist: 'b/sys/src/cmd/vmx//null'
--- /dev/null
+++ b/sys/src/cmd/vmx/dat.h
@@ -1,0 +1,64 @@
+typedef struct PCIDev PCIDev;
+typedef struct PCICap PCICap;
+typedef struct PCIBar PCIBar;
+typedef struct Region Region;
+
+extern int halt, irqactive;
+
+enum {
+	BY2PG = 4096
+};
+
+#define RPC "pc"
+#define RSP "sp"
+#define RAX "ax"
+#define RBX "bx"
+#define RCX "cx"
+#define RDX "dx"
+
+enum {
+	MMIORD = 0,
+	MMIOWRP = 1,
+	MMIOWR = 2,
+};
+
+struct Region {
+	uintptr start, end;
+	enum { REGNO, REGMEM, REGFB } type;
+	char *segname;
+	uvlong segoff;
+	void *v, *ve;
+	Region *next;
+};
+
+extern Region *mmap;
+
+#define BDF(b,d,f) ((b)<<16&0xff0000|(d)<<11&0xf800|(f)<<8&0x700)
+
+struct PCIBar {
+	PCIDev *d;
+	u8int type;
+	u32int addr, length;
+	PCIBar *busnext, *busprev;
+	u32int (*io)(int, u16int, u32int, int, void *);
+	void *aux;
+};
+
+struct PCIDev {
+	u32int bdf, viddid, clrev, subid;
+	u16int ctrl;
+	u8int irqno, irqactive;
+	PCIBar bar[6];
+	PCIDev *next;
+	PCICap *cap;
+	u8int capalloc;
+};
+
+struct PCICap {
+	PCIDev *dev;
+	u8int length;
+	u8int addr;
+	u32int (*read)(PCICap *, u8int);
+	void (*write)(PCICap *, u8int, u32int, u32int);
+	PCICap *next;
+};
--- /dev/null
+++ b/sys/src/cmd/vmx/exith.c
@@ -1,0 +1,387 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include <bio.h>
+#include "dat.h"
+#include "fns.h"
+
+typedef struct ExitInfo ExitInfo;
+struct ExitInfo {
+	char *raw;
+	char *name;
+	uvlong qual;
+	uvlong pa, va;
+	u32int ilen, iinfo;
+};
+
+static void
+skipinstr(ExitInfo *ei)
+{
+	rset(RPC, rget(RPC) + ei->ilen);
+}
+
+static int
+stepmmio(uvlong pa, uvlong *val, int size, ExitInfo *ei)
+{
+	extern uchar *tmp;
+	extern uvlong tmpoff;
+	void *targ;
+	uvlong pc, si;
+	char buf[ERRMAX];
+	extern int getexit;
+	
+	memset(tmp, 0, BY2PG);
+	targ = tmp + (pa & 0xfff);
+	switch(size){
+	case 1: *(u8int*)targ = *val; break;
+	case 2: *(u16int*)targ = *val; break;
+	case 4: *(u32int*)targ = *val; break;
+	case 8: *(u64int*)targ = *val; break;
+	}
+	pc = rget(RPC);
+	si = rget("si");
+	rcflush(0);
+	if(ctl("step -map %#ullx vm %#ullx", pa & ~0xfff, tmpoff) < 0){
+		rerrstr(buf, sizeof(buf));
+		if(strcmp(buf, "step failed") == 0){
+			vmerror("vmx step failure (old pc=%#ullx, new pc=%#ullx, cause=%#q)", pc, rget(RPC), ei->raw);
+			getexit++;
+			return -1;
+		}
+		sysfatal("ctl(stepmmio): %r");
+	}
+	switch(size){
+	case 1: *val = *(u8int*)targ; break;
+	case 2: *val = *(u16int*)targ; break;
+	case 4: *val = *(u32int*)targ; break;
+	case 8: *val = *(u64int*)targ; break;
+	}
+	return 0;
+}
+
+extern u32int io(int, u16int, u32int, int);
+
+static void
+iohandler(ExitInfo *ei)
+{
+	int port, len, isin;
+	u32int val;
+	u64int ax;
+	
+	port = ei->qual >> 16 & 0xffff;
+	len = (ei->qual & 7) + 1;
+	isin = (ei->qual & 8) != 0;
+	if((ei->qual & 1<<4) != 0){
+		vmerror("i/o string instruction not implemented");
+		postexc("#ud", 0);
+		return;
+	}
+	if(isin){
+		val = io(1, port, 0, len);
+		ax = rget(RAX);
+		if(len == 1) ax = ax & ~0xff | val & 0xff;
+		else if(len == 2) ax = ax & ~0xffff | val & 0xffff;
+		else ax = val;
+		rset(RAX, ax);
+	}else{
+		ax = rget(RAX);
+		if(len == 1) ax = (u8int) ax;
+		else if(len == 2) ax = (u16int) ax;
+		io(0, port, ax, len);
+	}
+	skipinstr(ei);
+}
+
+typedef struct MemHandler MemHandler;
+struct MemHandler {
+	uvlong lo, hi;
+	uvlong (*f)(int, uvlong, uvlong);
+};
+
+MemHandler memh[32];
+int nmemh;
+
+static uvlong
+defaultmmio(int op, uvlong addr, uvlong val)
+{
+	switch(op){
+	case MMIORD:
+		vmerror("read from unmapped address %#ullx (pc=%#ullx)", addr, rget(RPC));
+		break;
+	case MMIOWR:
+		vmerror("write to unmapped address %#ullx (val=%#ullx,pc=%#ullx)", addr, val, rget(RPC));
+		break;
+	}
+	return 0;
+}
+
+static void
+eptfault(ExitInfo *ei)
+{
+	MemHandler *h;
+	static MemHandler def = {.f defaultmmio};
+	int size;
+	uvlong val;
+	
+	for(h = memh; h < memh + nmemh; h++)
+		if(ei->pa >= h->lo && ei->pa <= h->hi)
+			break;
+	if(h == memh + nmemh)
+		h = &def;
+	size = 8;
+	if((ei->qual & 5) != 0){
+		val = h->f(MMIORD, ei->pa, 0);
+		stepmmio(ei->pa, &val, size, ei);
+	}else{
+		val = h->f(MMIOWRP, ei->pa, 0);
+		if(stepmmio(ei->pa, &val, size, ei) < 0)
+			return;
+		h->f(MMIOWR, ei->pa, val);
+	}
+}
+
+void
+registermmio(uvlong lo, uvlong hi, uvlong (*f)(int, uvlong, uvlong))
+{
+	assert(nmemh < nelem(memh));
+	memh[nmemh].lo = lo;
+	memh[nmemh].hi = hi;
+	memh[nmemh].f = f;
+	nmemh++;
+}
+
+typedef struct CPUID CPUID;
+struct CPUID {
+	u32int idx;
+	u32int ax, bx, cx, dx;
+};
+static CPUID *cpuidf;
+static int ncpuidf;
+
+static void
+auxcpuidproc(void *vpfd)
+{
+	int *pfd;
+	
+	pfd = vpfd;
+	close(pfd[1]);
+	close(0);
+	open("/dev/null", OREAD);
+	dup(pfd[0], 1);
+	close(pfd[0]);
+	procexecl(nil, "/bin/aux/cpuid", "cpuid", "-r", nil);
+	threadexits("exec: %r");
+}
+
+void
+cpuidinit(void)
+{
+	int pfd[2];
+	Biobuf *bp;
+	char *l, *f[5];
+	CPUID *cp;
+	
+	pipe(pfd);
+	procrfork(auxcpuidproc, pfd, 4096, RFFDG);
+	close(pfd[0]);
+	bp = Bfdopen(pfd[1], OREAD);
+	if(bp == nil) sysfatal("Bopenfd: %r");
+	for(; l = Brdstr(bp, '\n', 1), l != nil; free(l)){
+		if(tokenize(l, f, 5) < 5) continue;
+		cpuidf = realloc(cpuidf, (ncpuidf + 1) * sizeof(CPUID));
+		cp = cpuidf + ncpuidf++;
+		cp->idx = strtoul(f[0], nil, 16);
+		cp->ax = strtoul(f[1], nil, 16);
+		cp->bx = strtoul(f[2], nil, 16);
+		cp->cx = strtoul(f[3], nil, 16);
+		cp->dx = strtoul(f[4], nil, 16);
+	}
+	Bterm(bp);
+	close(pfd[1]);
+}
+
+CPUID *
+getcpuid(ulong idx)
+{
+	CPUID *cp;
+	
+	for(cp = cpuidf; cp < cpuidf + ncpuidf; cp++)
+		if(cp->idx == idx)
+			return cp;
+	return nil;
+}
+
+static void
+cpuid(ExitInfo *ei)
+{
+	u32int ax, bx, cx, dx;
+	CPUID *cp;
+	static CPUID def;
+	
+	ax = rget(RAX);
+	cp = getcpuid(ax);
+	if(cp == nil) cp = &def;
+	switch(ax){
+	case 0: /* highest register & GenuineIntel */
+		ax = 7;
+		bx = cp->bx;
+		dx = cp->dx;
+		cx = cp->cx;
+		break;
+	case 1: /* features */
+		ax = cp->ax;
+		bx = cp->bx & 0xffff;
+		cx = cp->cx & 0x60de2203;
+		dx = cp->dx & 0x0682a179;
+		break;
+	case 2: goto literal; /* cache stuff */
+	case 3: goto zero; /* processor serial number */
+	case 4: goto literal; /* cache stuff */
+	case 5: goto zero; /* monitor/mwait */
+	case 6: goto zero; /* thermal management */
+	case 7: goto zero; /* more features */
+	case 0x80000000: /* highest register */
+		ax = 0x80000008;
+		bx = cx = dx = 0;
+		break;
+	case 0x80000001: /* signature & ext features */
+		ax = cp->ax;
+		bx = 0;
+		cx = cp->cx & 0x121;
+		dx = cp->dx & 0x04100000;
+		break;
+	case 0x80000002: goto literal; /* brand string */
+	case 0x80000003: goto literal; /* brand string */
+	case 0x80000004: goto literal; /* brand string */
+	case 0x80000005: goto zero; /* reserved */
+	case 0x80000006: goto literal; /* cache info */
+	case 0x80000007: goto zero; /* invariant tsc */
+	case 0x80000008: goto literal; /* address bits */
+	literal:
+		ax = cp->ax;
+		bx = cp->bx;
+		cx = cp->cx;
+		dx = cp->dx;
+		break;
+	default:
+		vmerror("unknown cpuid field eax=%#ux", ax);
+	zero:
+		ax = 0;
+		bx = 0;
+		cx = 0;
+		dx = 0;
+		break;
+	}
+	rset(RAX, ax);
+	rset(RBX, bx);
+	rset(RCX, cx);
+	rset(RDX, dx);
+	skipinstr(ei);
+}
+
+static void
+rdwrmsr(ExitInfo *ei)
+{
+	u32int cx;
+	u64int val;
+	int rd;
+	
+	rd = ei->name[1] == 'r';
+	cx = rget(RCX);
+	val = (uvlong)rget(RDX) << 32 | rget(RAX);
+	switch(cx){
+	default:
+		if(rd)
+			vmerror("read from unknown MSR %#x ignored", cx);
+		else
+			vmerror("write to unknown MSR %#x ignored (val=%#ullx)", cx, val);
+		break;
+	}
+	if(rd){
+		rset(RAX, val);
+		rset(RDX, val >> 32);
+	}
+	skipinstr(ei);
+}
+
+static void
+hlt(ExitInfo *ei)
+{
+	if(irqactive == 0)
+		halt = 1;
+	skipinstr(ei);
+}
+
+static void
+irqackhand(ExitInfo *ei)
+{
+	irqack(ei->qual);
+}
+
+typedef struct ExitType ExitType;
+struct ExitType {
+	char *name;
+	void (*f)(ExitInfo *);
+};
+static ExitType etypes[] = {
+	{"io", iohandler},
+	{".cpuid", cpuid},
+	{".hlt", hlt},
+	{"eptfault", eptfault},
+	{"*ack", irqackhand},
+	{".rdmsr", rdwrmsr},
+	{".wrmsr", rdwrmsr},
+};
+
+void
+processexit(char *msg)
+{
+	static char msgc[1024];
+	char *f[32];
+	int nf;
+	ExitType *et;
+	int i;
+	ExitInfo ei;
+	extern int getexit;
+
+	strcpy(msgc, msg);
+	nf = tokenize(msgc, f, nelem(f));
+	if(nf < 2) sysfatal("invalid wait message: %s", msg);
+	memset(&ei, 0, sizeof(ei));
+	ei.raw = msg;
+	ei.name = f[0];
+	if(strcmp(ei.name, "io") != 0 && strcmp(ei.name, "eptfault") != 0 && strcmp(ei.name, "*ack") != 0 && strcmp(ei.name, ".hlt") != 0) vmdebug("exit: %s", msg);
+	ei.qual = strtoull(f[1], nil, 0);
+	for(i = 2; i < nf; i += 2){
+		if(strcmp(f[i], "pc") == 0)
+			rpoke(RPC, strtoull(f[i+1], nil, 0), 1);
+		else if(strcmp(f[i], "sp") == 0)
+			rpoke(RSP, strtoull(f[i+1], nil, 0), 1);
+		else if(strcmp(f[i], "ax") == 0)
+			rpoke(RAX, strtoull(f[i+1], nil, 0), 1);
+		else if(strcmp(f[i], "ilen") == 0)
+			ei.ilen = strtoul(f[i+1], nil, 0);
+		else if(strcmp(f[i], "iinfo") == 0)
+			ei.iinfo = strtoul(f[i+1], nil, 0);
+		else if(strcmp(f[i], "pa") == 0)
+			ei.pa = strtoull(f[i+1], nil, 0);
+		else if(strcmp(f[i], "va") == 0)
+			ei.va = strtoull(f[i+1], nil, 0);
+	}
+	if(*f[0] == '*') getexit++;
+	for(et = etypes; et < etypes + nelem(etypes); et++)
+		if(strcmp(et->name, f[0]) == 0){
+			et->f(&ei);
+			return;
+		}
+	if(*f[0] == '.'){
+		vmerror("vmx: unknown instruction %s", f[0]+1);
+		postexc("#ud", 0);
+		return;
+	}
+	if(*f[0] == '*'){
+		vmerror("vmx: unknown notification %s", f[0]+1);
+		return;
+	}
+	sysfatal("vmx: unknown exit: %s", msg);
+}
--- /dev/null
+++ b/sys/src/cmd/vmx/fns.h
@@ -1,0 +1,32 @@
+void *emalloc(ulong);
+void loadkernel(char *);
+uvlong rget(char *);
+void rpoke(char *, uvlong, int);
+#define rset(a,b) rpoke(a,b,0)
+void processexit(char *);
+void pitadvance(void);
+void vmerror(char *, ...);
+#define vmdebug vmerror
+int ctl(char *, ...);
+void registermmio(uvlong, uvlong, uvlong (*)(int, uvlong, uvlong));
+void irqline(int, int);
+void irqack(int);
+void postexc(char *, u32int);
+void vgaresize(void);
+void uartinit(int, char *);
+void sendnotif(void (*)(void *), void *);
+PCIDev *mkpcidev(u32int, u32int, u32int, int);
+PCIBar *mkpcibar(PCIDev *, u8int, u32int, void *, void *);
+PCICap *mkpcicap(PCIDev *, u8int, u32int (*)(PCICap *, u8int), void(*)(PCICap *, u8int, u32int, u32int));
+u32int allocbdf(void);
+void *gptr(u64int, u64int);
+void *gend(void *);
+uintptr gpa(void *);
+uintptr gavail(void *);
+void pciirq(PCIDev *, int);
+u32int iowhine(int, u16int, u32int, int, void *);
+void elcr(u16int);
+int mkvionet(char *);
+int mkvioblk(char *);
+char* rcflush(int);
+void i8042kick(void *);
--- /dev/null
+++ b/sys/src/cmd/vmx/io.c
@@ -1,0 +1,981 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include <draw.h>
+#include <cursor.h>
+#include <mouse.h>
+#include "dat.h"
+#include "fns.h"
+
+static uchar
+bcd(uchar c)
+{
+	return c / 10 << 4 | c % 10;
+}
+
+static u32int
+rtcio(int isin, u16int port, u32int val, int sz, void *)
+{
+	static u8int addr;
+	uintptr basemem, extmem;
+	Tm *tm;
+	
+	switch(isin << 16 | port){
+	case 0x10070: return addr;
+	case 0x70: addr = val; return 0;
+	case 0x10071:
+		tm = gmtime(time(nil));
+		basemem = gavail(gptr(0, 0)) >> 10;
+		if(basemem > 640) basemem = 640;
+		extmem = gavail(gptr(1<<20, 0)) >> 10;
+		if(extmem >= 65535) extmem = 65535;
+		switch(addr){
+		case 0x00: return bcd(tm->sec);
+		case 0x02: return bcd(tm->min);
+		case 0x04: return bcd(tm->hour);
+		case 0x06: return bcd(tm->wday + 1);
+		case 0x07: return bcd(tm->mday);
+		case 0x08: return bcd(tm->mon + 1);
+		case 0x09: return bcd(tm->year % 100);
+		case 0x0a: return 0x26;
+		case 0x0b: return 1<<1 | 1<<2;
+		case 0x0d: return 1<<7; /* cmos valid */
+		case 0x0e: return 0; /* diagnostics ok */
+		case 0x10: return 0; /* no floppies */
+		case 0x15: return basemem;
+		case 0x16: return basemem >> 8;
+		case 0x17: return extmem;
+		case 0x18: return extmem >> 8;
+		case 0x32: return bcd(tm->year / 100 + 19);
+		default: vmerror("rtc read from unknown address %#x", addr); return 0;
+		}
+	}
+	return iowhine(isin, port, val, sz, "rtc");
+}
+
+typedef struct Pic Pic;
+struct Pic {
+	enum {
+		AEOI = 1,
+		ROTAEOI = 2,
+		MASKMODE = 4,
+		POLL = 8,
+		READSR = 16,
+	} flags;
+	u8int lines;
+	u8int irr, isr;
+	u8int imr;
+	u8int elcr;
+	u8int init;
+	u8int prio;
+	u8int base;
+} pic[2];
+int irqactive = -1;
+
+static u8int
+picprio(u8int v, u8int p, u8int *n)
+{
+	p++;
+	v = v >> p | v << 8 - p;
+	v &= -v;
+	v = v << p | v >> 8 - p;
+	if(n != nil)
+		*n = ((v & 0xf0) != 0) << 2 | ((v & 0xcc) != 0) << 1 | (v & 0xaa) != 0;
+	return v;
+}
+
+static u8int
+piccheck(Pic *p, u8int *n)
+{
+	u8int s;
+	
+	s = p->isr;
+	if((p->flags & MASKMODE) != 0 && p->imr != 0)
+		s = 0;
+	return picprio(p->irr & ~p->imr | s, p->prio, n) & ~s;
+}
+
+static void
+picaeoi(Pic *p, u8int b)
+{
+	if((p->flags & AEOI) == 0)
+		return;
+	p->isr &= ~(1<<b);
+	if((p->flags & ROTAEOI) != 0)
+		p->prio = b;
+}
+
+static void
+picupdate(Pic *p)
+{
+	u8int m, n;
+	
+	if(p->init != 4) return;
+	m = piccheck(p, &n);
+	if(p == &pic[1])
+		irqline(2, m != 0);
+	else{
+		if(m != 0 && n == 2){
+			m = piccheck(&pic[1], &n);
+			n |= pic[1].base;
+		}else
+			n |= p->base;
+		if(m != 0 && irqactive != n){
+			if(ctl("irq %d", n) < 0)
+				sysfatal("ctl: %r");
+			halt = 0;
+			irqactive = n;
+		}else if(m == 0 && irqactive >= 0){
+			if(ctl("irq") < 0)
+				sysfatal("ctl: %r");
+			irqactive = -1;
+		}
+	}
+}
+
+void
+irqline(int n, int s)
+{
+	Pic *p;
+	u8int ol, m;
+	
+	assert(n >= 0 && n <= 15);
+	p = &pic[n / 8];
+	n %= 8;
+	ol = p->lines;
+	m = 1<<n;
+	if(s == 1)
+		p->lines |= m;
+	else if(s == 0)
+		p->lines &= ~m;
+	else if(s == -1)
+		p->lines ^= m;
+	if((p->elcr & m) != 0)
+		p->irr = p->irr & ~m | ~p->lines & m;
+	else
+		p->irr |= p->lines & ~ol & m;
+	picupdate(p);
+}
+
+void
+irqack(int n)
+{
+	Pic *p;
+	extern int nextexit;
+	
+	irqactive = -1;
+	if((n & ~7) == pic[0].base)
+		p = &pic[0];
+	else if((n & ~7) == pic[1].base)
+		p = &pic[1];
+	else
+		return;
+	if(p == &pic[1]) irqack(pic[0].base + 2);
+	n &= 7;
+	p->irr &= ~(1<<n);
+	p->isr |= 1<<n;
+	picaeoi(p, n);
+	picupdate(p);
+}
+
+void
+elcr(u16int a)
+{
+	pic[0].elcr = a;
+	pic[1].elcr = a >> 8;
+}
+
+static u32int
+picio(int isin, u16int port, u32int val, int sz, void *)
+{
+	Pic *p;
+	u8int m, b;
+	
+	p = &pic[(port & 0x80) != 0];
+	val = (u8int)val;
+	switch(isin << 16 | port){
+	case 0x20:
+	case 0xa0:
+		if((val & 1<<4) != 0){ /* ICW1 */
+			if(irqactive){
+				if(ctl("irq") < 0)
+					sysfatal("ctl: %r");
+				irqactive = -1;
+			}
+			p->irr = 0;
+			p->isr = 0;
+			p->imr = 0;
+			p->prio = 7;
+			p->flags = 0;
+			if((val & 0x0b) != 0x01) vmerror("PIC%ld ICW1 with unsupported value %#ux", p-pic, val);
+			p->init = 1;
+			return 0;
+		}
+		if((val & 0x18) == 0){ /* OCW2 */
+			switch(val >> 5){
+			case 0: /* rotate in automatic eoi mode (clear) */
+				p->flags &= ~ROTAEOI;
+				break;
+			case 1: /* non-specific eoi command */
+				p->isr &= ~picprio(p->isr, p->prio, nil);
+				break;
+			case 2: /* no operation */
+				break;
+			case 3: /* specific eoi command */
+				p->isr &= 1<<(val & 7);
+				break;
+			case 4: /* rotate in automatic eoi mode (set) */
+				p->flags |= ROTAEOI;
+				break;
+			case 5: /* rotate on non-specific eoi command */
+				p->isr &= ~picprio(p->isr, p->prio, &p->prio);
+				break;
+			case 6: /* set priority */
+				p->prio = val & 7;
+				break;
+			case 7: /* rotate on specific eoi command */
+				p->isr &= 1<<(val & 7);
+				p->prio = val & 7;
+				break;
+			}
+			picupdate(p);
+			return 0;
+		}
+		if((val & 0x98) == 8){ /* OCW3 */
+			if((val & 0x40) != 0)
+				if((val & 0x20) != 0)
+					p->flags |= MASKMODE;
+				else
+					p->flags &= ~MASKMODE;
+			if((val & 4) != 0)
+				p->flags |= POLL;
+			if((val & 2) != 0)
+				if((val & 10) != 0)
+					p->flags |= READSR;
+				else
+					p->flags &= ~READSR;
+			picupdate(p);
+			
+		}
+		return 0;
+	case 0x21:
+	case 0xa1:
+		switch(p->init){
+		default:
+			vmerror("write to PIC%ld in init=%d state", p-pic, p->init);
+			return 0;
+		case 1:
+			p->base = val;
+			p->init = 2;
+			return 0;
+		case 2:
+			if(p == &pic[0] && val != 4 || p == &pic[1] && val != 2)
+				vmerror("PIC%ld ICW3 with unsupported value %#ux", p-pic, val);
+			p->init = 3;
+			return 0;
+		case 3:
+			if((val & 0xfd) != 1) vmerror("PIC%ld ICW4 with unsupported value %#ux", p-pic, val);
+			if((val & 2) != 0) p->flags |= AEOI;
+			p->init = 4;
+			picupdate(p);
+			return 0;
+		case 4:
+			p->imr = val;
+			picupdate(p); 
+			return 0;
+		}
+		break;
+	case 0x10020:
+		if((p->flags & READSR) != 0)
+			return p->isr;
+		if((p->flags & POLL) != 0){
+			p->flags &= ~POLL;
+			m = piccheck(p, &b);
+			if(m != 0){
+				p->irr &= ~m;
+				p->isr |= m;
+				picaeoi(p, b);
+				picupdate(p);
+				return 1<<7 | b;
+			}
+			return 0;
+		}
+		return p->irr;
+	case 0x100a0:
+	case 0x10021:
+	case 0x100a1:
+		return p->imr;
+	case 0x4d0:
+	case 0x4d1:
+		pic[port & 1].elcr = val;
+		return 0;
+	case 0x104d0:
+	case 0x104d1:
+		return pic[port & 1].elcr;
+	}
+	return iowhine(isin, port, val, sz, "pic");
+}
+
+typedef struct PITChannel PITChannel;
+
+struct PITChannel {
+	u8int mode;
+	u8int bcd;
+	u8int access;
+	u8int state;
+	u16int count, reload;
+	int latch;
+	enum { READLO, READHI, READLATLO, READLATHI } readstate;
+	u8int writestate;
+	vlong lastnsec;
+};
+PITChannel pit[3];
+enum { PERIOD = 838 };
+
+void
+settimer(vlong targ)
+{
+	extern vlong timerevent;
+	extern Lock timerlock;
+	extern int timerid;
+	int sendint;
+
+	sendint = 0;
+	lock(&timerlock);
+	if(targ < timerevent){
+		timerevent = targ;
+		sendint = 1;
+	}
+	unlock(&timerlock);
+	if(sendint)
+		threadint(timerid);
+}
+
+void
+pitadvance(void)
+{
+	int i;
+	int nc;
+	PITChannel *p;
+	vlong nt, t;
+	int rel;
+
+	for(i = 0; i < 3; i++){
+		p = &pit[i];
+		nt = nsec();
+		t = nt - p->lastnsec;
+		p->lastnsec = nt;
+		switch(p->mode){
+		case 3:
+			if(p->state != 0){
+				nc = 2 * (t / PERIOD);
+				if(p->count > nc)
+					p->count -= nc;
+				else{
+					rel = p->reload;
+					if(rel <= 1) rel = 65536;
+					nc -= p->count;
+					nc %= rel;
+					p->count = rel - nc;
+					if(i == 0)
+						irqline(0, -1);
+				}
+				p->lastnsec -= t % PERIOD;
+				settimer(p->lastnsec + p->count / 2 * PERIOD);
+			}
+			break;
+		}
+	}
+}
+
+static void
+pitsetreload(int n, int hi, u8int v)
+{
+	PITChannel *p;
+	
+	p = &pit[n];
+	if(hi)
+		p->reload = p->reload >> 8 | v << 8;
+	else
+		p->reload = p->reload & 0xff00 | v;
+	switch(p->mode){
+	case 3:
+		if(p->state == 0 && (p->access != 3 || hi)){
+			p->count = p->reload;
+			p->state = 1;
+			p->lastnsec = nsec();
+		}
+		break;
+	default:
+		vmerror("PIT reload in mode %d not implemented", p->mode);
+		break;
+	}	
+}
+
+static u32int
+pitio(int isin, u16int port, u32int val, int sz, void *)
+{
+	int n;
+
+	val = (u8int) val;
+	pitadvance();
+	switch(isin << 16 | port){
+	case 0x10040:
+	case 0x10041:
+	case 0x10042:
+		n = port & 3;
+		switch(pit[n].readstate){
+		case READLO:
+			if(pit[n].access == 3)
+				pit[n].readstate = READHI;
+			return pit[n].count;
+		case READHI:
+			if(pit[n].access == 3)
+				pit[n].readstate = READLO;
+			return pit[n].count >> 8;
+		case READLATLO:
+			pit[n].readstate = READLATHI;
+			return pit[n].latch;
+		case READLATHI:
+			pit[n].readstate = pit[n].access == 1 ? READHI : READLO;
+			return pit[n].latch >> 8;
+		}
+		return 0;
+	case 0x40:
+	case 0x41:
+	case 0x42:
+		n = port & 3;
+		switch(pit[n].writestate){
+		case READLO:
+			if(pit[n].access == 3)
+				pit[n].writestate = READHI;
+			pitsetreload(n, 0, val);
+			break;
+		case READHI:
+			if(pit[n].access == 3)
+				pit[n].writestate = READLO;
+			pitsetreload(n, 1, val);
+			break;
+		}
+		return 0;
+	case 0x43:
+		n = val >> 6;
+		if(n == 3) return 0;
+		if((val & ~0xc0) == 0){
+			pit[n].latch = pit[n].count;
+			pit[n].readstate = READLATLO;
+		}else{
+			pit[n].mode = val >> 1 & 7;
+			pit[n].access = val >> 4 & 3;
+			pit[n].bcd = val & 1;
+			pit[n].state = 0;
+			pit[n].count = 0;
+			pit[n].reload = 0;
+			pit[n].readstate = pit[n].access == 1 ? READHI : READLO;
+			pit[n].writestate = pit[n].access == 1 ? READHI : READLO;
+			pit[n].lastnsec = nsec();
+			if(n == 0)
+				irqline(0, 1);
+		}
+		return 0;
+	}
+	return iowhine(isin, port, val, sz, "pit");
+}
+
+typedef struct I8042 I8042;
+struct I8042 {
+	u8int cfg, stat, oport;
+	int cmd;
+	u16int buf; /* |0x100 == kbd, |0x200 == mouse, |0x400 == cmd */
+} i8042 = {
+	.cfg 0x34,
+	.stat 0x10,
+	.oport 0x01,
+	.cmd -1,
+};
+Channel *kbdch, *mousech;
+typedef struct PCMouse PCMouse;
+struct PCMouse {
+	Mouse;
+	u8int gotmouse;
+	enum {
+		MOUSERESET,
+		MOUSESTREAM,
+		MOUSEREMOTE,
+		MOUSEREP = 0x10,
+		MOUSEWRAP = 0x20,
+	} state;
+	u8int buf[64];
+	u8int bufr, bufw;
+	u8int actcmd;
+	u8int scaling21, res, rate;
+} mouse = {
+	.res = 2,
+	.rate = 100
+};
+#define mouseputc(c) mouse.buf[mouse.bufw++ & 63] = (c)
+
+static void
+i8042putbuf(u16int val)
+{
+	i8042.buf = val;
+	i8042.stat = i8042.stat & ~0x20 | val >> 4 & 0x20;
+	if((i8042.cfg & 1) != 0 && (val & 0x100) != 0){
+		irqline(1, 1);
+		i8042.oport |= 0x10;
+	}
+	if((i8042.cfg & 2) != 0 && (val & 0x200) != 0){
+		irqline(12, 1);
+		i8042.oport |= 0x20;
+	}
+	if(val == 0){
+		irqline(1, 0);
+		irqline(12, 0);
+		i8042.oport &= ~0x30;
+		i8042.stat &= ~1;
+		i8042kick(nil);
+	}else
+		i8042.stat |= 1;
+}
+
+static void
+kbdcmd(u8int val)
+{
+	vmerror("unknown kbd command %#ux", val);
+}
+
+static void
+updatemouse(void)
+{
+	Mouse m;
+	
+	while(nbrecv(mousech, &m) > 0){
+		mouse.xy = addpt(mouse.xy, m.xy);
+		mouse.buttons = m.buttons;
+		mouse.gotmouse = 1;
+	}
+}
+
+static void
+clearmouse(void)
+{
+	updatemouse();
+	mouse.xy = Pt(0, 0);
+	mouse.gotmouse = 0;
+}
+
+static void
+mousepacket(int force)
+{
+	int dx, dy;
+	u8int b0;
+
+	updatemouse();
+	if(!mouse.gotmouse && !force)
+		return;
+	dx = mouse.xy.x;
+	dy = -mouse.xy.y;
+	b0 = 8;
+	if((ulong)(dx + 256) > 511) dx = dx >> 31 & 0x1ff ^ 0xff;
+	if((ulong)(dy + 256) > 511) dy = dy >> 31 & 0x1ff ^ 0xff;
+	b0 |= dx >> 5 & 0x10 | dy >> 4 & 0x20;
+	b0 |= (mouse.buttons * 0x111 & 0x421) % 7;
+	mouseputc(b0);
+	mouseputc((u8int)dx);
+	mouseputc((u8int)dy);
+	mouse.xy.x -= dx;
+	mouse.xy.y += dy;
+	mouse.gotmouse = 0;
+}
+
+static void
+mousedefaults(void)
+{
+	clearmouse();
+	mouse.res = 2;
+	mouse.rate = 100;
+}
+
+static void
+mousecmd(u8int val)
+{
+	if((mouse.state & MOUSEWRAP) != 0 && val != 0xec && val != 0xff){
+		mouseputc(val);
+		i8042kick(nil);
+		return;
+	}
+	switch(mouse.actcmd){
+	case 0xe8: /* set resolution */
+		mouse.res = val;
+		mouseputc(0xfa);
+		mouse.actcmd = 0;
+		break;
+	case 0xf3: /* set sampling rate */
+		mouse.rate = val;
+		mouseputc(0xfa);
+		mouse.actcmd = 0;
+		break;
+	default:
+		switch(val){
+		case 0xf3: case 0xe8: mouseputc(0xfa); mouse.actcmd = val; break;
+		
+		case 0xff: mouseputc(0xfa); mousedefaults(); mouse.state = MOUSERESET; break; /* reset */
+		case 0xf6: mouseputc(0xfa); mousedefaults(); mouse.state = mouse.state & ~0xf | MOUSESTREAM; break; /* set defaults */
+		case 0xf5: mouseputc(0xfa); clearmouse(); if((mouse.state&0xf) == MOUSESTREAM) mouse.state &= ~MOUSEREP; break; /* disable reporting */
+		case 0xf4: mouseputc(0xfa); clearmouse(); if((mouse.state&0xf) == MOUSESTREAM) mouse.state |= MOUSEREP; break; /* enable reporting */
+		case 0xf2: mouseputc(0xfa); mouseputc(0x00); clearmouse(); break; /* report device id */
+		case 0xf0: mouseputc(0xfa); clearmouse(); mouse.state = mouse.state & ~0xf | MOUSEREMOTE; break; /* set remote mode */
+		case 0xee: mouseputc(0xfa); clearmouse(); mouse.state |= MOUSEWRAP; break; /* set wrap mode */
+		case 0xec: mouseputc(0xfa); clearmouse(); mouse.state &= ~MOUSEWRAP; break; /* reset wrap mode */
+		case 0xeb: mouseputc(0xfa); mousepacket(1); break; /* read data */
+		case 0xea: mouseputc(0xfa); clearmouse(); mouse.state = mouse.state & ~0xf | MOUSESTREAM; break; /* set stream mode */
+		case 0xe9: /* status request */
+			mouseputc(0xfa);
+			mouseputc(((mouse.state & 0xf) == MOUSEREMOTE) << 6 | ((mouse.state & MOUSEREP) != 0) << 5 | mouse.scaling21 << 4 | (mouse.buttons * 0x111 & 0x142) % 7);
+			mouseputc(mouse.res);
+			mouseputc(mouse.rate);
+			break;
+		case 0xe7: mouseputc(0xfa); mouse.scaling21 = 1; break; /* set 2:1 scaling */
+		case 0xe6: mouseputc(0xfa); mouse.scaling21 = 0; break; /* set 1:1 scaling */
+		default: vmerror("unknown mouse command %#ux", val); mouseputc(0xfc);
+		}
+	}
+	i8042kick(nil);
+}
+
+static void
+mousekick(void)
+{	
+	switch(mouse.state){
+	case MOUSERESET:
+		mouseputc(0xaa);
+		mouseputc(0);
+		mouse.state = MOUSESTREAM;
+		break;
+	case MOUSESTREAM | MOUSEREP:
+		if(mouse.actcmd == 0)
+			mousepacket(0);
+		break;
+	}
+}
+
+
+void
+i8042kick(void *)
+{
+	ulong ch;
+	
+	if((i8042.cfg & 0x10) == 0 && i8042.buf == 0)
+		if(nbrecv(kbdch, &ch) > 0)
+			i8042putbuf(0x100 | (u8int)ch);
+	if((i8042.cfg & 0x20) == 0 && i8042.buf == 0){
+		if(mouse.bufr == mouse.bufw)
+			mousekick();
+		if(mouse.bufr != mouse.bufw)
+			i8042putbuf(0x200 | mouse.buf[mouse.bufr++ & 63]);
+	}
+}
+
+static u32int
+i8042io(int isin, u16int port, u32int val, int sz, void *)
+{
+	int rc;
+
+	val = (u8int)val;
+	switch(isin << 16 | port){
+	case 0x60:
+		i8042.stat &= ~8;
+		switch(i8042.cmd){
+		case 0x60: i8042.cfg = val; break;
+		case 0xd1:
+			i8042.oport = val;
+			irqline(1, i8042.oport >> 4 & 1);
+			irqline(12, i8042.oport >> 5 & 1);
+			break;
+		case 0xd2: i8042putbuf(0x100 | val); break;
+		case 0xd3: i8042putbuf(0x200 | val); break;
+		case 0xd4: mousecmd(val); break;
+		case -1: kbdcmd(val); break;
+		}
+		i8042.cmd = -1;
+		return 0;
+	case 0x10060:
+		i8042kick(nil);
+		rc = i8042.buf;
+		i8042putbuf(0);
+		return rc;
+	case 0x64:
+		i8042.stat |= 8;
+		switch(val){
+		case 0x20: i8042putbuf(0x400 | i8042.cfg); return 0;
+		case 0xa1: i8042putbuf(0x4f1); return 0; /* no keyboard password */
+		case 0xa7: i8042.cfg |= 1<<5; return 0;
+		case 0xa8: i8042.cfg &= ~(1<<5); return 0;
+		case 0xa9: i8042putbuf(0x400); return 0; /* test second port */
+		case 0xaa: i8042putbuf(0x455); return 0; /* test controller */
+		case 0xab: i8042putbuf(0x400); return 0; /* test first port */
+		case 0xad: i8042.cfg |= 1<<4; return 0;
+		case 0xae: i8042.cfg &= ~(1<<4); return 0;
+		case 0xd0: i8042putbuf(0x400 | i8042.oport); return 0;
+		case 0x60: case 0xd1: case 0xd2: case 0xd3: case 0xd4:
+			i8042.cmd = val;
+			return 0;
+		}
+		vmerror("unknown i8042 command %#ux", val);
+		return 0;
+	case 0x10064:
+		i8042kick(nil);
+		return i8042.stat | i8042.cfg & 4;
+	}
+	return iowhine(isin, port, val, sz, "i8042");
+}
+
+typedef struct UART UART;
+struct UART {
+	u8int ier, fcr, lcr, lsr, mcr, scr, dll, dlh;
+	u8int rbr, tbr;
+	enum {
+		UARTTXIRQ = 1,
+		UARTRXIRQ = 2,
+	} irq;
+	int infd, outfd;
+	Channel *inch, *outch;
+} uart[2] = { { .lsr = 0x60 }, { .lsr = 0x60 } };
+
+static void
+uartkick(UART *p)
+{
+	char c;
+
+	irqline(4 - (p - uart), (p->irq & p->ier) != 0);
+	if((p->irq & UARTRXIRQ) == 0 && p->inch != nil && nbrecv(p->inch, &c) > 0){
+		p->rbr = c;
+		p->irq |= UARTRXIRQ;
+	}
+	if((p->lsr & 1<<5) == 0){
+		if(p->outch == nil){
+			p->lsr |= 3<<5;
+			p->irq |= UARTTXIRQ;
+		}else if(nbsend(p->outch, &p->tbr) > 0){
+			p->tbr = 0;
+			p->lsr |= 3<<5;
+			p->irq |= UARTTXIRQ;
+		}
+	}
+	irqline(4 - (p - uart), (p->irq & p->ier) != 0);
+}
+
+static u32int
+uartio(int isin, u16int port, u32int val, int sz, void *)
+{
+	UART *p;
+	int rc;
+
+	if((port & 0xff8) == 0x3f8) p = &uart[0];
+	else if((port & 0xff8) == 0x2f8) p = &uart[1];
+	else return 0;
+	
+	val = (u8int) val;
+	switch(isin << 4 | port & 7){
+	case 0x00:
+		if((p->lcr & 1<<7) != 0)
+			p->dll = val;
+		else{ /* transmit byte */
+			if((p->mcr & 1<<4) != 0){
+				p->irq |= UARTRXIRQ;
+				p->rbr = val;
+				p->lsr |= 3<<5;
+			}else{
+				p->tbr = val;
+				p->lsr &= ~(3<<5);
+				p->irq &= ~UARTTXIRQ;
+			}
+			uartkick(p);
+		}
+		return 0;
+	case 0x01:
+		if((p->lcr & 1<<7) != 0)
+			p->dlh = val;
+		else
+			p->ier = val & 15;
+		return 0;
+	case 0x02: p->fcr = val; return 0;
+	case 0x03: p->lcr = val; return 0;
+	case 0x04: p->mcr = val & 0x1f; return 0;
+	case 0x07: p->scr = val; return 0;
+	case 0x10:
+		if((p->lcr & 1<<7) != 0) return p->dll;
+		p->irq &= ~UARTRXIRQ;
+		rc = p->rbr;
+		uartkick(p);
+		return rc;
+	case 0x11:
+		if((p->lcr & 1<<7) != 0) return p->dlh;
+		return p->ier;
+	case 0x12:
+		rc = (p->fcr & 1) != 0 ? 0x40 : 0;
+		uartkick(p);
+		if((p->irq & UARTRXIRQ) != 0)
+			return rc | 4;
+		else if((p->irq & UARTTXIRQ) != 0){
+			p->irq &= ~UARTTXIRQ;
+			uartkick(p);
+			return rc | 2;
+		}else
+			return rc | 1;
+	case 0x13: return p->lcr;
+	case 0x14: return p->mcr;
+	case 0x15:
+		uartkick(p);
+		rc = p->lsr; /* line status */
+		if((p->irq & UARTRXIRQ) != 0)
+			rc |= 1;
+		return rc;
+	case 0x16: /* modem status */
+		if((p->mcr & 0x10) != 0)
+			return p->mcr << 1 & 2 | p->mcr >> 1 & 1 | p->mcr & 0xc;
+		return 0;
+	case 0x17: return p->scr;
+	}
+	return iowhine(isin, port, val, sz, "uart");
+}
+
+static void
+uartrxproc(void *uv)
+{
+	UART *u;
+	char buf[128], *p;
+	int rc;
+	
+	threadsetname("uart rx");
+	u = uv;
+	for(;;){
+		rc = read(u->infd, buf, sizeof(buf));
+		if(rc < 0){
+			vmerror("read(uartrx): %r");
+			threadexits("read: %r");
+		}
+		if(rc == 0){
+			vmerror("read(uartrx): eof");
+			threadexits("read: eof");
+		}
+		for(p = buf; p < buf + rc; p++){
+			send(u->inch, p);
+			sendnotif((void(*)(void*))uartkick, u);
+		}
+	}
+}
+
+static void
+uarttxproc(void *uv)
+{
+	UART *u;
+	char buf[128], *p;
+	
+	threadsetname("uart tx");
+	u = uv;
+	for(;;){
+		p = buf;
+		recv(u->outch, p);
+		p++;
+		while(sendnotif((void(*)(void*))uartkick, u), p < buf+sizeof(buf) && nbrecv(u->outch, p) > 0)
+			p++;
+		if(write(u->outfd, buf, p - buf) < p - buf)
+			vmdebug("write(uarttx): %r");
+	}
+}
+
+void
+uartinit(int n, char *cfg)
+{
+	char *p, *infn, *outfn;
+	
+	p = strchr(cfg, ',');
+	if(p == nil){
+		infn = cfg;
+		outfn = cfg;
+	}else{
+		*p = 0;
+		infn = cfg;
+		outfn = p + 1;
+	}
+	if(infn != nil && *infn != 0){
+		uart[n].infd = open(infn, OREAD);
+		if(uart[n].infd < 0)
+			sysfatal("open: %r");
+		uart[n].inch = chancreate(sizeof(char), 256);
+		proccreate(uartrxproc, &uart[n], 4096);
+	}
+	if(outfn != nil && *outfn != 0){
+		uart[n].outfd = open(outfn, OWRITE);
+		if(uart[n].outfd < 0)
+			sysfatal("open: %r");
+		uart[n].outch = chancreate(sizeof(char), 256);
+		proccreate(uarttxproc, &uart[n], 4096);
+	}
+}
+
+static u32int
+nopio(int, u16int, u32int, int, void *)
+{
+	return 0;
+}
+
+u32int
+iowhine(int isin, u16int port, u32int val, int sz, void *mod)
+{
+	if(isin)
+		vmerror("%s%sread from unknown i/o port %#ux ignored (sz=%d)", mod != nil ? mod : "", mod != nil ? ": " : "", port, sz);
+	else
+		vmerror("%s%swrite to unknown i/o port %#ux ignored (val=%#ux, sz=%d)", mod != nil ? mod : "", mod != nil ? ": " : "", port, val, sz);
+	return 0;
+}
+
+typedef struct IOHandler IOHandler;
+struct IOHandler {
+	u16int lo, hi;
+	u32int (*io)(int, u16int, u32int, int, void *);
+	void *aux;
+};
+
+u32int vgaio(int, u16int, u32int, int, void *);
+u32int pciio(int, u16int, u32int, int, void *);
+IOHandler handlers[] = {
+	0x20, 0x21, picio, nil,
+	0x40, 0x43, pitio, nil,
+	0x70, 0x71, rtcio, nil,
+	0xa0, 0xa1, picio, nil,
+	0x60, 0x60, i8042io, nil,
+	0x64, 0x64, i8042io, nil,
+	0x2f8, 0x2ff, uartio, nil,
+	0x3d4, 0x3d5, vgaio, nil,
+	0x3f8, 0x3ff, uartio, nil,
+	0x4d0, 0x4d1, picio, nil,
+	0xcf8, 0xcff, pciio, nil,
+
+	0x061, 0x061, nopio, nil, /* pc speaker */
+	0x110, 0x110, nopio, nil, /* elnk3 */
+	0x170, 0x177, nopio, nil, /* ide secondary */
+	0x1f0, 0x1f7, nopio, nil, /* ide primary */
+	0x280, 0x28f, nopio, nil, /* 8003 */
+	0x378, 0x37a, nopio, nil, /* LPT1 */
+	0x3e0, 0x3e3, nopio, nil, /* cardbus */
+	0x3f0, 0x3f5, nopio, nil, /* floppy */
+	0x778, 0x77a, nopio, nil, /* LPT1 (ECP) */
+};
+
+u32int
+io(int dir, u16int port, u32int val, int size)
+{
+	IOHandler *h;
+	extern PCIBar iobars;
+	PCIBar *p;
+
+	for(h = handlers; h < handlers + nelem(handlers); h++)
+		if(port >= h->lo && port <= h->hi)
+			return h->io(dir, port, val, size, h->aux);
+	for(p = iobars.busnext; p != &iobars; p = p->busnext)
+		if(port >= p->addr && port < p->addr + p->length)
+			return p->io(dir, port - p->addr, val, size, p->aux);
+	return iowhine(dir, port, val, size, nil);
+}
--- /dev/null
+++ b/sys/src/cmd/vmx/ksetup.c
@@ -1,0 +1,168 @@
+#include <u.h>
+#include <libc.h>
+#include "dat.h"
+#include "fns.h"
+
+static uchar hdr[8192];
+static int fd;
+
+extern int bootmodn;
+extern char **bootmod;
+
+static int
+putmmap(uchar *p0)
+{
+	u32int *p;
+	Region *r;
+	
+	p = (u32int *) p0;
+	for(r = mmap; r != nil; r = r->next){
+		if(r->type != REGMEM) continue;
+		if(gavail(p) < 20) sysfatal("out of guest memory");
+		p[0] = 20;
+		p[1] = r->start;
+		p[2] = r->end - r->start;
+		p[3] = 1;
+	}
+	return (uchar *) p - p0;
+}
+
+static int
+putcmdline(uchar *p0)
+{
+	int i;
+	char *p, *e;
+	extern int cmdlinen;
+	extern char **cmdlinev;
+	
+	if(cmdlinen == 0) return 0;
+	p = (char*)p0;
+	e = gend(p0);
+	if(p >= e) return 0;
+	for(i = 0; i < cmdlinen; i++){
+		p = strecpy(p, e, cmdlinev[i]);
+		if(i != cmdlinen - 1) *p++ = ' ';
+	}
+	return p - (char*)p0 + 1;
+}
+
+static int
+putmods(uchar *p0)
+{
+	int i, fd, rc;
+	u32int *p;
+	uchar *q;
+	char dummy;
+
+	if(bootmodn == 0) return 0;
+	p = (u32int*)p0;
+	q = (uchar*)(p + 4 * bootmodn);
+	for(i = 0; i < bootmodn; i++){
+		q = gptr(-(-gpa(q) & -BY2PG), 1);
+		if(q == nil) sysfatal("out of guest memory");
+		fd = open(bootmod[i], OREAD);
+		if(fd == -1) sysfatal("module open: %r");
+		p[0] = gpa(q);
+		rc = readn(fd, q, gavail(q));
+		if(rc < 0) sysfatal("module read: %r");
+		if(read(fd, &dummy, 1) == 1) sysfatal("out of guest memory");
+		close(fd);
+		q += rc;
+		p[1] = gpa(q);
+		p[2] = 0;
+		p[3] = 0;
+		p += 4;
+	}
+	bootmodn = ((uchar*)p - p0) / 16;
+	return q - p0;
+}
+
+static int
+trymultiboot(void)
+{
+	u32int *p, flags;
+	u32int header, load, loadend, bssend, entry;
+	u32int filestart;
+	uchar *gp;
+	uchar *modp;
+	int len;
+	int rc;
+
+	for(p = (u32int*)hdr; p < (u32int*)hdr + sizeof(hdr)/4; p++)
+		if(*p == 0x1badb002)
+			break;
+	if(p == (u32int*)hdr + sizeof(hdr)/4)
+		return 0;
+	if((u32int)(p[0] + p[1] + p[2]) != 0)
+		sysfatal("invalid multiboot checksum");
+	flags = p[1];
+	if((flags & 1<<16) == 0)
+		sysfatal("no size info in multiboot header");
+	header = p[3];
+	load = p[4];
+	loadend = p[5];
+	bssend = p[6];
+	entry = p[7];
+	filestart = (uchar*)p - hdr - (header - load);
+	gp = gptr(load, bssend != 0 ? bssend - load : loadend != 0 ? loadend - load : BY2PG);
+	if(gp == nil)
+		sysfatal("kernel image out of bounds");
+	seek(fd, filestart, 0);
+	if(loadend == 0){
+		rc = readn(fd, gp, gavail(gp));
+		if(rc <= 0) sysfatal("readn: %r");
+		loadend = load + rc;
+	}else{
+		rc = readn(fd, gp, loadend - load);
+		if(rc < 0) sysfatal("readn: %r");
+		if(rc < loadend - load) sysfatal("short kernel image");
+	}
+	if(bssend == 0) bssend = loadend;
+	bssend = -(-bssend & -BY2PG);
+	p = gptr(bssend, 128);
+	if(p == nil) sysfatal("no space for multiboot structure");
+	p[0] = 1<<0;
+	p[1] = gavail(gptr(0, 0)) >> 10;
+	if(p[1] > 640) p[1] = 640;
+	p[2] = gavail(gptr(1048576, 0)) >> 10;	
+	modp = gptr(bssend + 128, 1);
+	if(modp == nil) sysfatal("out of guest memory");
+	len = putmmap(modp);
+	if(len != 0){
+		p[0] |= 1<<6;
+		p[11] = len;
+		p[12] = gpa(modp);
+		modp += len;
+	}
+	len = putcmdline(modp);
+	if(len != 0){
+		p[0] |= 1<<2;
+		p[4] = gpa(modp);
+		modp += len + 7 & -8;
+	}
+	len = putmods(modp);
+	if(len != 0){
+		p[0] |= 1<<3;
+		p[5] = bootmodn;
+		p[6] = gpa(modp);
+		modp += len + 7 & -8;
+	}
+	
+	USED(modp);
+	rset(RPC, entry);
+	rset(RAX, 0x2badb002);
+	rset(RBX, bssend);
+	return 1;
+}
+
+void
+loadkernel(char *fn)
+{
+	fd = open(fn, OREAD);
+	if(fd < 0) sysfatal("open: %r");
+	if(readn(fd, hdr, sizeof(hdr)) <= 0)
+		sysfatal("readn: %r");
+	if(!trymultiboot())
+		sysfatal("%s: unknown format", fn);
+	close(fd);
+}
--- /dev/null
+++ b/sys/src/cmd/vmx/mkfile
@@ -1,0 +1,15 @@
+</$objtype/mkfile
+
+BIN=/$objtype/bin
+TARG=vmx
+HFILES=dat.h fns.h
+OFILES=\
+	vmx.$O \
+	ksetup.$O \
+	exith.$O \
+	io.$O \
+	vga.$O \
+	pci.$O \
+	virtio.$O \
+
+</sys/src/cmd/mkone
--- /dev/null
+++ b/sys/src/cmd/vmx/pci.c
@@ -1,0 +1,304 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include "dat.h"
+#include "fns.h"
+
+PCIDev *pcidevs;
+PCIBar membars, iobars;
+
+PCIDev *
+mkpcidev(u32int bdf, u32int viddid, u32int clrev, int needirq)
+{
+	PCIDev *d;
+	int n;
+	
+	d = emalloc(sizeof(PCIDev));
+	d->bdf = bdf;
+	d->viddid = viddid;
+	d->clrev = clrev;
+	d->next = pcidevs;
+	d->irqno = needirq ? 0 : 0xff;
+	for(n = 0; n < nelem(d->bar); n++){
+		d->bar[n].d = d;
+		d->bar[n].busnext = &d->bar[n];
+		d->bar[n].busprev = &d->bar[n];
+	}
+	d->capalloc = 64;
+	pcidevs = d;
+	return d;
+}
+
+u32int
+allocbdf(void)
+{
+	static int dev = 1;
+	
+	return BDF(0, dev++, 0);
+}
+
+PCIBar *
+mkpcibar(PCIDev *d, u8int t, u32int l, void *fn, void *aux)
+{
+	PCIBar *b;
+
+	assert((t & 1) == 0 || (t & 2) == 0);
+	assert((t & 1) != 0 || (t & 6) == 0);
+	if((t & 1) != 0 && l < 4) l = 4;
+	if((t & 1) == 0 && l < 4096) l = 4096;
+	if((l & l-1) != 0){
+		do
+			l &= l-1;
+		while((l & l-1) == 0);
+		l <<= 1;
+		assert(l != 0);
+	}
+	for(b = d->bar; b < d->bar + nelem(d->bar); b++)
+		if(b->length == 0)
+			break;
+	b->type = t;
+	b->length = l;
+	b->busnext = b;
+	b->busprev = b;
+	b->d = d;
+	if((b->type & 1) != 0)
+		b->io = fn;
+	b->aux = aux;
+	return b;
+}
+
+static void
+updatebar(PCIBar *b)
+{
+	b->busnext->busprev = b->busprev;
+	b->busprev->busnext = b->busnext;
+	b->busnext = b;
+	b->busprev = b;
+	if(b->length == 0) return;
+	if((b->type & 1) == 0){
+		if((b->d->ctrl & 2) == 0) return;
+		b->busnext = &membars;
+		b->busprev = membars.busprev;
+		b->busnext->busprev = b;
+		b->busprev->busnext = b;
+	}else{
+		if((b->d->ctrl & 1) == 0 || b->addr == 0 || b->io == nil) return;
+		b->busnext = &iobars;
+		b->busprev = iobars.busprev;
+		b->busnext->busprev = b;
+		b->busprev->busnext = b;
+	}
+}
+
+static void
+pciirqupdate(void)
+{
+	PCIDev *d;
+	int irqs, act, i;
+	
+	irqs = 0;
+	act = 0;
+	for(d = pcidevs; d != nil; d = d->next){
+		if(d->irqno < 16){
+			irqs |= 1<<d->irqno;
+			act |= d->irqactive<<d->irqno;
+		}
+	}
+	for(i = 0; i < 16; i++)
+		if((irqs & 1<<i) != 0)
+			irqline(i, ~act>>i & 1);
+}
+
+PCICap *
+mkpcicap(PCIDev *d, u8int length, u32int (*readf)(PCICap *, u8int), void (*writef)(PCICap *, u8int, u32int, u32int))
+{
+	PCICap *c, **p;
+
+	assert(readf != nil);
+	if(d->capalloc + length > 256)
+		sysfatal("mkpcicap (dev %#ux): out of configuration space", d->bdf);
+	c = emalloc(sizeof(PCICap));
+	c->dev = d;
+	c->read = readf;
+	c->write = writef;
+	c->length = length;
+	
+	c->addr = d->capalloc;
+	d->capalloc += length;
+	for(p = &d->cap; *p != nil; p = &(*p)->next)
+		;
+	*p = c;
+	return c;
+}
+
+static PCIDev *
+findpcidev(u32int bdf)
+{
+	PCIDev *d;
+
+	for(d = pcidevs; d != nil; d = d->next)
+		if(d->bdf == bdf)
+			return d;
+	return nil;
+}
+
+static PCICap *
+findpcicap(PCIDev *d, u8int addr)
+{
+	PCICap *c;
+	
+	for(c = d->cap; c != nil; c = c->next)
+		if((uint)(addr - c->addr) < c->length)
+			return c;
+	return nil;
+}
+
+static u32int
+pciread(PCIDev *d, int addr)
+{
+	u32int val;
+	PCICap *c;
+	int n;
+
+	switch(addr){
+	case 0x00: return d->viddid;
+	case 0x04: return 0xa00000 | (d->cap != nil ? 1<<20 : 0) | d->ctrl;
+	case 0x08: return d->clrev;
+	case 0x0c: return 0; /* BIST, Header Type, Latency Timer, Cache Size */
+	case 0x10: case 0x14: case 0x18: case 0x1c: case 0x20: case 0x24:
+		n = addr - 0x10 >> 2;
+		return d->bar[n].addr | d->bar[n].type;
+	case 0x28: return 0; /* Cardbus */
+	case 0x2c: return d->subid; /* Subsystem ID */
+	case 0x30: return 0; /* Expansion ROM */
+	case 0x34: return d->cap != nil ? d->cap->addr : 0; /* Capabilities */
+	case 0x38: return 0; /* Reserved */
+	case 0x3c: return 1 << 8 | d->irqno; /* Max_Lat, Min_Gnt, IRQ Pin, IRQ Line */
+	}
+	c = findpcicap(d, addr);
+	if(c != nil){
+		val = c->read(c, addr - c->addr);
+		if(addr == c->addr){
+			val &= ~0xff00;
+			if(c->next != nil)
+				val |= c->next->addr << 8;
+		}
+		return val;
+	}
+	vmdebug("pcidev %.6ux: ignoring read from addr %#ux", d->bdf, addr);
+	return 0;
+}
+
+static void
+pciwrite(PCIDev *d, int addr, u32int val, u32int mask)
+{
+	int n;
+	PCICap *c;
+	
+	switch(addr){
+	case 0x04:
+		d->ctrl = (d->ctrl & ~mask | val & mask) & 0x21f;
+		for(n = 0; n < nelem(d->bar); n++)
+			updatebar(&d->bar[n]);
+		return;
+	case 0x10: case 0x14: case 0x18: case 0x1c: case 0x20: case 0x24:
+		n = addr - 0x10 >> 2;
+		val &= (d->bar[n].type & 1) != 0 ? ~15 : ~3;
+		d->bar[n].addr = (d->bar[n].addr & ~mask | val & mask) & ~(d->bar[n].length - 1);
+		updatebar(&d->bar[n]);
+		return;
+	case 0x3c: d->irqno = (d->irqno & ~mask | val & mask) & 0xff; pciirqupdate(); return;
+	}
+	c = findpcicap(d, addr);
+	if(c != nil && c->write != nil){
+		c->write(c, addr - c->addr, val, mask);
+		return;
+	}
+	vmdebug("pcidev %.6ux: ignoring write to addr %#ux, val %#ux", d->bdf, addr, val);
+}
+
+u32int
+pciio(int isin, u16int port, u32int val, int sz, void *)
+{
+	static u32int cfgaddr;
+	u32int mask;
+	PCIDev *d;
+
+	switch(isin << 16 | port){
+	case 0x0cf8: cfgaddr = val; return 0;
+	case 0x10cf8: return cfgaddr;
+	case 0xcfc: case 0xcfd: case 0xcfe: case 0xcff:
+		val <<= 8 * (port & 3);
+		mask = -1UL >> 32 - 8 * sz << 8 * (port & 3);
+		if((cfgaddr & 1<<31) != 0 && (d = findpcidev(cfgaddr & 0xffff00), d != nil))
+			pciwrite(d, cfgaddr & 0xfc, val, mask);
+		return 0;
+	case 0x10cfc: case 0x10cfd: case 0x10cfe: case 0x10cff:
+		if((cfgaddr & 1<<31) == 0 || (d = findpcidev(cfgaddr & 0xffff00), d == nil))
+			return -1;
+		return pciread(d, cfgaddr & 0xfc) >> 8 * (port & 3);
+	}
+	return iowhine(isin, port, val, sz, "pci");
+}
+
+void
+pciirq(PCIDev *d, int status)
+{
+	d->irqactive = status != 0;
+	pciirqupdate();
+}
+
+void
+pciinit(void)
+{
+	iobars.busnext = &iobars;
+	iobars.busprev = &iobars;
+	membars.busprev = &membars;
+	membars.busnext = &membars;
+	mkpcidev(BDF(0,0,0), 0x01008086, 0x06000000, 0);
+}
+
+void
+pcibusmap(void)
+{
+	u16int iop;
+	u16int irqs, uirqs;
+	PCIDev *d;
+	PCIBar *b;
+	int irq;
+	int i;
+	
+	iop = 0x1000;
+	irqs = 1<<5|1<<7|1<<9|1<<10|1<<11|1<<14|1<<15;
+	uirqs = 0;
+	irq = 0;
+	for(d = pcidevs; d != nil; d = d->next){
+		d->ctrl |= 3;
+		for(b = d->bar; b < d->bar + nelem(d->bar); b++){
+			if(b->length == 0)
+				continue;
+			if((b->type & 1) == 0){
+				vmerror("pci device %.6ux: memory bars unsupported", d->bdf);
+				continue;
+			}
+			if(iop + b->length >= 0x10000){
+				vmerror("pci device %.6ux: not enough I/O address space for BAR%d (len=%d)", d->bdf, (int)(b - d->bar), b->length);
+				continue;
+			}
+			b->addr = iop;
+			iop += b->length;
+			updatebar(b);
+		}
+		if(d->irqno == 0){
+			do
+				irq = irq + 1 & 15;
+			while((irqs & 1<<irq) == 0);
+			d->irqno = irq;
+			uirqs |= 1<<irq;
+		}
+	}
+	elcr(uirqs);
+	for(i = 0; i < 16; i++)
+		if((uirqs & 1<<i) != 0)
+			irqline(i, 1);
+}
--- /dev/null
+++ b/sys/src/cmd/vmx/vga.c
@@ -1,0 +1,431 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <thread.h>
+#include <draw.h>
+#include <cursor.h>
+#include <mouse.h>
+#include <keyboard.h>
+#include "dat.h"
+#include "fns.h"
+
+static uchar *fb;
+uintptr fbsz;
+uintptr fbaddr;
+int textmode;
+static ulong screenchan;
+
+static int picw, pich, hbytes;
+static Image *img, *bg;
+static Mousectl *mc;
+static Rectangle picr;
+Channel *kbdch, *mousech;
+static u16int cursorpos;
+u8int mousegrab;
+static uchar *sfb;
+
+static void
+screeninit(void)
+{
+	Point p;
+
+	p = divpt(addpt(screen->r.min, screen->r.max), 2);
+	picr = (Rectangle){subpt(p, Pt(picw/2, pich/2)), addpt(p, Pt((picw+1)/2, (pich+1)/2))};
+	bg = allocimage(display, Rect(0, 0, 1, 1), screen->chan, 1, 0xCCCCCCFF);
+	img = allocimage(display, Rect(0, 0, picw, pich), screenchan == 0 ? screen->chan : screenchan, 0, 0);
+	draw(screen, screen->r, bg, nil, ZP);
+}
+
+u32int
+vgaio(int isin, u16int port, u32int val, int sz, void *)
+{
+	static u8int cgaidx;
+
+	val = (u8int) val;
+	switch(isin << 16 | port){
+	case 0x3d4:
+		cgaidx = val;
+		return 0;
+	case 0x103d4:
+		return cgaidx;
+	case 0x3d5:
+		switch(cgaidx){
+		case 14:
+			cursorpos = cursorpos >> 8 | val << 8;
+			break;
+		case 15:
+			cursorpos = cursorpos & 0xff00 | val;
+			break;
+		default:
+			vmerror("write to unknown VGA register, 3d5/%#ux (val=%#ux)", cgaidx, val);
+		}
+		return 0;
+	case 0x103d5:
+		switch(cgaidx){
+		case 14:
+			return cursorpos >> 8;
+		case 15:
+			return (u8int)cursorpos;
+		default:
+			vmerror("read from unknown VGA register, 3d5/%#ux", cgaidx);
+			return 0;
+		}		
+	}
+	return iowhine(isin, port, val, sz, "vga");
+}
+
+typedef struct Key Key;
+struct Key {
+	Rune r;
+	int code;
+	Key *next;
+};
+Key *kbdmap[128];
+
+static void
+defkey(Rune r, int code)
+{
+	Key *k, **kp;
+
+	for(kp = &kbdmap[r % nelem(kbdmap)]; *kp != nil; kp = &(*kp)->next)
+		if((*kp)->r == r)
+			return;
+	k = emalloc(sizeof(Key));
+	k->r = r;
+	k->code = code;
+	*kp = k;
+}
+
+void
+kbdlayout(char *fn)
+{
+	Biobuf *bp;
+	char *s, *p, *f[10];
+	int nf, x, y;
+	Rune z;
+	
+	defkey(Kshift, 0x2a);
+	defkey(Kctl, 0x1d);
+	defkey(Kalt, 0x38);
+	defkey(Kctl, 0x11d);
+	defkey(Kprint, 0x137);
+	defkey(Kaltgr, 0x138);
+	defkey(Kbreak, 0x146);
+	defkey(Khome, 0x147);
+	defkey(Kup, 0x148);
+	defkey(Kpgup, 0x149);
+	defkey(Kleft, 0x14b);
+	defkey(Kright, 0x14d);
+	defkey(Kend, 0x14f);
+	defkey(Kdown, 0x150);
+	defkey(Kpgdown, 0x151);
+	defkey(Kins, 0x152);
+	defkey(Kdel, 0x153);
+	defkey(Kup, 0x179);
+
+	bp = Bopen(fn, OREAD);
+	if(bp == nil){
+		vmerror("kbdlayout: %r");
+		return;
+	}
+	for(;; free(s)){
+		s = Brdstr(bp, '\n', 1);
+		if(s == nil) break;
+		nf = getfields(s, f, nelem(f), 1, " \t");
+		if(nf < 3) continue;
+		x = strtol(f[0], &p, 0);
+		if(*p != 0) continue;
+		y = strtol(f[1], &p, 0);
+		if(*p != 0) continue;
+		if(*f[2] == '\'' || *f[2] == '^'){
+			chartorune(&z, f[2]+1);
+			if(*f[2] == '^') z -= '@';
+		}else{
+			z = strtol(f[2], &p, 0);
+			if(*p != 0) continue;
+		}
+		
+		if(x != 0 || z == 0) continue;
+		defkey(z, y);
+	}
+	Bterm(bp);
+}
+
+void
+keyproc(void *)
+{
+	int fd, n;
+	static char buf[256];
+	static uvlong kdown[8], nkdown[8];
+	uvlong set, rls;
+	int i, j;
+	char *s;
+	Rune r;
+	Key *k;
+
+	threadsetname("keyproc");
+	fd = open("/dev/kbd", OREAD);
+	if(fd < 0)
+		sysfatal("open: %r");
+	for(;;){
+		if(buf[0] != 0){
+			n = strlen(buf)+1;
+			memmove(buf, buf+n, sizeof(buf)-n);
+		}
+		if(buf[0] == 0){
+			n = read(fd, buf, sizeof(buf)-1);
+			if(n <= 0)
+				sysfatal("read /dev/kbd: %r");
+			buf[n-1] = 0;
+			buf[n] = 0;
+		}
+		if(buf[0] != 'k' && buf[0] != 'K')
+			continue;
+		s = buf + 1;
+		memset(nkdown, 0, sizeof(nkdown));
+		while(*s != 0){
+			s += chartorune(&r, s);
+			for(k = kbdmap[r % nelem(kbdmap)]; k != nil; k = k->next)
+				if(k->r == r){
+					nkdown[k->code >> 6] |= 1ULL<<(k->code&63);
+					break;
+				}
+			if(k == nil) vmerror("unknown key %d", r);
+		}
+		if(mousegrab && (nkdown[0]>>29 & 1) != 0 && (nkdown[0]>>56 & 1) != 0){
+			mousegrab = 0;
+			setcursor(mc, nil);
+		}
+		for(i = 0; i < 8; i++){
+			if(nkdown[i] == kdown[i]) continue;
+			set = nkdown[i] & ~kdown[i];
+			rls = ~nkdown[i] & kdown[i];
+			for(j = 0; j < 64; j++, set>>=1, rls >>= 1)
+				if(((set|rls) & 1) != 0){
+					if(i >= 4)
+						sendul(kbdch, 0xe0);
+					sendul(kbdch, j | i<<6&0xff | ((rls&1) != 0 ? 0x80 : 0));
+					sendnotif(i8042kick, nil);
+				}
+			kdown[i] = nkdown[i];
+		}
+	}
+}
+
+void
+mousethread(void *)
+{
+	Mouse m;
+	static Mouse mm, om;
+	int gotm;
+	Point mid;
+	Rectangle grabout;
+	int clicked;
+	static Cursor blank;
+	
+	gotm = 0;
+	clicked = 0;
+	for(;;){
+		Alt a[] = {
+			{mc->c, &m, CHANRCV},
+			{mousech, &mm, gotm ? CHANSND : CHANNOP},
+			{nil, nil, CHANEND},
+		};
+		
+		switch(alt(a)){
+		case 0:
+			mid = divpt(addpt(picr.max, picr.min), 2);
+			grabout = insetrect(Rpt(mid, mid), -50);
+			if(!ptinrect(m.xy, picr)){
+				clicked = 0;
+				break;
+			}
+			if(!mousegrab){
+				if(clicked && (m.buttons & 1) == 0 && !textmode){
+					mousegrab = 1;
+					setcursor(mc, &blank);
+				}
+				clicked = m.buttons & 1;
+				break;
+			}
+			gotm = 1;
+			if(!ptinrect(m.xy, grabout)){
+				moveto(mc, mid);
+				m.xy = mid;
+				om.xy = mid;
+			}
+			mm.xy = addpt(mm.xy, subpt(m.xy, om.xy));
+			om = m;
+			mm.buttons = m.buttons;
+			break;
+		case 1:
+			sendnotif(i8042kick, nil);
+			mm.xy = Pt(0,0);
+			gotm = 0;
+			break;
+		}
+	}
+}
+
+static Rune cp437[256] = {
+	0x0020, 0x263a, 0x263b, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022, 0x25d8, 0x25cb, 0x25d9, 0x2642, 0x2640, 0x266a, 0x266b, 0x263c,
+	0x25ba, 0x25c4, 0x2195, 0x203c, 0x00b6, 0x00a7, 0x25ac, 0x21a8, 0x2191, 0x2193, 0x2192, 0x2190, 0x221f, 0x2194, 0x25b2, 0x25bc, 
+	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 
+	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 
+	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 
+	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 
+	0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 
+	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x2302, 
+	0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5, 
+	0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192, 
+	0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, 
+	0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, 
+	0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, 
+	0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, 
+	0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4, 0x221e, 0x03c6, 0x03b5, 0x2229, 
+	0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0, 
+};
+
+static void
+drawtext(void)
+{
+	Rune buf[80];
+	uchar *p;
+	int y, x;
+	Point pt;
+	
+	draw(img, img->r, display->black, nil, ZP);
+	for(y = 0; y < 25; y++){
+		p = &fb[y * 160];
+		for(x = 0; x < 80; x++)
+			buf[x] = cp437[p[2*x]];
+		runestringn(img, Pt(0, 16 * y), display->white, ZP, display->defaultfont, buf, 80);
+	}
+	if(cursorpos < 80*25){
+		buf[0] = cp437[fb[cursorpos*2]];
+		pt = Pt(cursorpos % 80 * 8, cursorpos / 80 * 16);
+		draw(img, Rect(pt.x, pt.y, pt.x + 8, pt.y + 16), display->white, nil, ZP);
+		runestringn(img, pt, display->black, ZP, display->defaultfont, buf, 1);
+	}
+	draw(screen, picr, img, nil, ZP);
+	flushimage(display, 1);	
+}
+
+static void
+drawfb(void)
+{
+	u32int *p, *q;
+	Rectangle upd;
+	int xb, y;
+
+	p = (u32int *) fb;
+	q = (u32int *) sfb;
+	upd.min.y = upd.max.y = -1;
+	xb = 0;
+	y = 0;
+	while(p < (u32int*)(fb + fbsz)){
+		if(*p != *q){
+			if(upd.min.y < 0) upd.min.y = y;
+			upd.max.y = y + 1 + (xb + 4 > hbytes);
+			*q = *p;
+		}
+		p++;
+		q++;
+		xb += 4;
+		if(xb >= hbytes){
+			xb -= hbytes;
+			y++;
+		}
+	}
+	if(upd.min.y == upd.max.y) return;
+	upd.min.x = 0;
+	upd.max.x = picw;
+	if(screenchan != screen->chan){
+		loadimage(img, upd, sfb + upd.min.y * hbytes, (upd.max.y - upd.min.y) * hbytes);
+		draw(screen, rectaddpt(upd, picr.min), img, nil, upd.min);
+	}else
+		loadimage(screen, rectaddpt(upd, picr.min), sfb + upd.min.y * hbytes, (upd.max.y - upd.min.y) * hbytes);
+	flushimage(display, 1);
+}
+
+void
+drawproc(void *)
+{
+	ulong ul;
+
+	threadsetname("draw");
+	sfb = emalloc(fbsz);
+	for(;; sleep(20)){
+		while(nbrecv(mc->resizec, &ul) > 0){
+			if(getwindow(display, Refnone) < 0)
+				sysfatal("resize failed: %r");
+			screeninit();
+		}
+		if(textmode)
+			drawtext();
+		else
+			drawfb();
+	}
+}
+
+void
+vgafbparse(char *fbstring)
+{
+	char buf[512];
+	char *p, *q;
+	uvlong addr;
+
+	if(picw != 0) sysfatal("vga specified twice");
+	if(strcmp(fbstring, "text") == 0){
+		picw = 640;
+		pich = 400;
+		fbsz = 80*25*2;
+		fbaddr = 0xb8000;
+		textmode++;
+		screenchan = 0;
+	}else{
+		strecpy(buf, buf + nelem(buf), fbstring);
+		picw = strtol(buf, &p, 10);
+		if(*p != 'x')
+		nope:
+			sysfatal("vgafbparse: invalid framebuffer specifier: %#q (should be WxHxCHAN@ADDR or 'text')", fbstring);
+		pich = strtol(p+1, &p, 10);
+		if(*p != 'x') goto nope;
+		q = strchr(p+1, '@');
+		if(q == nil) goto nope;
+		*q = 0;
+		screenchan = strtochan(p+1);
+		if(screenchan == 0) goto nope;
+		p = q + 1;
+		if(*p == 0) goto nope;
+		addr = strtoull(p, &p, 0);
+		fbaddr = addr;
+		if(fbaddr != addr) goto nope;
+		if(*p != 0) goto nope;
+		hbytes = chantodepth(screenchan) * picw + 7 >> 3;
+		fbsz = hbytes * pich;
+	}
+}
+
+void
+vgainit(void)
+{
+	char buf[512];
+
+	if(picw == 0) return;
+	fb = gptr(fbaddr, fbsz);
+	if(fb == nil)
+		sysfatal("got nil ptr for framebuffer");
+	snprint(buf, sizeof(buf), "-dx %d -dy %d", picw+50, pich+50);
+	newwindow(buf);
+	initdraw(nil, nil, "vmx");
+	screeninit();
+	flushimage(display, 1);
+	kbdlayout("/sys/lib/kbmap/us");
+	mc = initmouse(nil, screen);
+	kbdch = chancreate(sizeof(ulong), 128);
+	mousech = chancreate(sizeof(Mouse), 32);
+	proccreate(mousethread, nil, 4096);
+	proccreate(keyproc, nil, 4096);
+	proccreate(drawproc, nil, 4096);
+}
--- /dev/null
+++ b/sys/src/cmd/vmx/virtio.c
@@ -1,0 +1,655 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include "dat.h"
+#include "fns.h"
+
+typedef struct VIODev VIODev;
+typedef struct VIOQueue VIOQueue;
+typedef struct VIOBuf VIOBuf;
+typedef struct VIONetDev VIONetDev;
+typedef struct VIOBlkDev VIOBlkDev;
+
+#define GET8(p,n) (*((u8int*)(p)+(n)))
+#define GET16(p,n) (*(u16int*)((u8int*)(p)+(n)))
+#define GET32(p,n) (*(u32int*)((u8int*)(p)+(n)))
+#define GET64(p,n) (*(u64int*)((u8int*)(p)+(n)))
+#define PUT8(p,n,v) (*((u8int*)(p)+(n)) = (v))
+#define PUT16(p,n,v) (*(u16int*)((u8int*)(p)+(n)) = (v))
+#define PUT32(p,n,v) (*(u32int*)((u8int*)(p)+(n)) = (v))
+#define PUT64(p,n,v) (*(u64int*)((u8int*)(p)+(n)) = (v))
+
+enum {
+	BUFCHAIN = 1,
+	BUFWR = 2,
+	
+	USEDNOIRQ = 1,
+};
+
+struct VIOBuf {
+	u32int flags;
+	VIOQueue *qu;
+	void *p;
+	u64int addr;
+	u32int len;
+	u32int idx;
+	VIOBuf *next, *head;
+	u32int rptr, wptr;
+};
+
+struct VIOQueue {
+	QLock;
+	Rendez;
+	VIODev *d;
+	u8int (*desc)[16], *avail, *used;
+	u16int size;
+	u32int addr;
+	u16int availidx, usedidx;
+	void (*notify)(VIOQueue*);
+};
+
+struct VIONetDev {
+	int readfd, writefd;
+	u8int mac[6];
+	enum {
+		VNETPROMISC = 1,
+		VNETALLMULTI = 2,
+		VNETALLUNI = 4,
+		VNETNOMULTI = 8,
+		VNETNOUNI = 16,
+		VNETNOBCAST = 32,
+	} flags;
+	u64int macbloom, multibloom;
+};
+
+struct VIOBlkDev {
+	int fd;
+	uvlong size;
+};
+
+struct VIODev {
+	PCIDev *pci;
+	u32int devfeat, guestfeat;
+	u16int qsel;
+	u8int devstat, isrstat;
+	VIOQueue *qu;
+	int nqu;
+	u32int (*io)(int, u16int, u32int, int, VIODev *);
+	union {
+		VIONetDev net;
+		VIOBlkDev blk;
+	};
+};
+
+static void
+vioirq_(void *arg)
+{
+	VIODev *d;
+	int val;
+	
+	d = ((void**)arg)[0];
+	val = (int) ((void**)arg)[1];
+	if(val != 0)
+		d->isrstat |= val;
+	else
+		d->isrstat = 0;
+	pciirq(d->pci, d->isrstat);
+	free(arg);
+}
+
+static void
+vioirq(VIODev *d, int val)
+{
+	void **v;
+	
+	assert(d != nil);
+	v = emalloc(sizeof(void*)*2);
+	v[0] = d;
+	v[1] = (void *) val;
+	sendnotif(vioirq_, v);
+}
+
+static void *
+checkdesc(VIOQueue *q, int i)
+{
+	if(i >= q->size){
+		vmerror("virtio device %#x: invalid next pointer %d in queue (size %d), ignoring descriptor", q->d->pci->bdf, i, q->size);
+		return nil;
+	}
+	return q->desc[i];
+}
+
+VIOBuf *
+viogetbuf(VIOQueue *q, int wait)
+{
+	u16int gidx;
+	VIOBuf *b, *rb, **bp;
+	void *dp;
+	
+	qlock(q);
+waitloop:
+	while(q->desc == nil || (gidx = GET16(q->avail, 2), gidx == q->availidx)){
+		if(!wait){
+			qunlock(q);
+			return nil;
+		}
+		rsleep(q);
+	}
+	dp = checkdesc(q, GET16(q->avail, 4 + 2 * (q->availidx % q->size)));
+	rb = nil;
+	bp = &rb;
+	for(;;){
+		b = emalloc(sizeof(VIOBuf));
+		b->qu = q;
+		b->idx = (u8int(*)[16])dp - q->desc;
+		b->addr = GET64(dp, 0);
+		b->len = GET32(dp, 8);
+		b->flags = GET16(dp, 12);
+		b->p = gptr(b->addr, b->len);
+		if(b->p == nil){
+			vmerror("virtio device %#x: invalid buffer pointer %p in queue, ignoring descriptor", q->d->pci->bdf, (void*)b->addr);
+			free(b);
+			break;
+		}
+		*bp = b;
+		b->head = rb;
+		bp = &b->next;
+		if((b->flags & BUFCHAIN) == 0) break;
+		dp = checkdesc(q, GET16(dp, 14));
+		if(dp == nil) break;
+	}
+	q->availidx++;
+	if(rb == nil) goto waitloop;
+	qunlock(q);
+	return rb;
+}
+
+void
+vioputbuf(VIOBuf *b)
+{
+	VIOBuf *bn;
+	VIOQueue *q;
+	u8int *p;
+	
+	if(b == nil) return;
+	q = b->qu;
+	qlock(q);
+	if(q->used == nil)
+		vmerror("virtio device %#x: address was set to an invalid value while holding buffer", q->d->pci->bdf);
+	else{
+		p = q->used + 4 + 8 * (q->usedidx % q->size);
+		PUT32(p, 4, b->wptr);
+		PUT32(p, 0, b->idx);
+		PUT16(q->used, 2, ++q->usedidx);
+	}
+	qunlock(q);
+	if(q->avail != nil && (GET16(q->avail, 0) & USEDNOIRQ) == 0)
+		vioirq(q->d, 1);
+	while(b != nil){
+		bn = b->next;
+		free(b);
+		b = bn;
+	}
+}
+
+ulong
+vioqread(VIOBuf *b, void *v, ulong n)
+{
+	VIOBuf *c;
+	u32int p;
+	int rc;
+	ulong m;
+	
+	p = b->rptr;
+	c = b;
+	rc = 0;
+	for(;;){
+		if(rc >= n) return rc;
+		for(;;){
+			if(c == nil) return rc;
+			if((c->flags & BUFWR) == 0){
+				if(p < c->len) break;
+				p -= c->len;
+			}
+			c = c->next;
+		}
+		m = c->len - p;
+		if(m > n - rc) m = n - rc;
+		memmove(v, (u8int*)c->p + p, m);
+		p += m, rc += m;
+		v = (u8int*)v + p;
+		b->rptr += m;
+	}
+}
+
+ulong
+vioqwrite(VIOBuf *b, void *v, ulong n)
+{
+	VIOBuf *c;
+	u32int p;
+	int rc;
+	ulong m;
+	
+	p = b->wptr;
+	c = b;
+	rc = 0;
+	for(;;){
+		if(rc >= n) return rc;
+		for(;;){
+			if(c == nil) return rc;
+			if((c->flags & BUFWR) != 0){
+				if(p < c->len) break;
+				p -= c->len;
+			}
+			c = c->next;
+		}
+		m = c->len - p;
+		if(m > n - rc) m = n - rc;
+		memmove((u8int*)c->p + p, v, m);
+		p += m, rc += m;
+		v = (u8int*)v + p;
+		b->wptr += m;
+	}
+}
+
+static void
+vioqaddrset(VIOQueue *q, u64int addr)
+{
+	void *p;
+	int sz1, sz;
+
+	addr <<= 12;
+	sz1 = -(-(18 * q->size + 4) & -4096);
+	sz = sz1 + (-(-(8 * q->size + 6) & -4096));
+	p = gptr(addr, sz);
+	if(p == nil)
+		vmerror("virtio device %#x: attempt to set queue to invalid address %p", q->d->pci->bdf, (void *) addr);
+	qlock(q);
+	q->addr = addr;
+	if(p == nil){
+		q->desc = nil;
+		q->avail = nil;
+		q->used = nil;
+	}else{
+		q->desc = p;
+		q->avail = (u8int*)p + 16 * q->size;
+		q->used = (u8int*)p + sz1;
+		rwakeupall(q);
+	}
+	qunlock(q);
+}
+
+u32int
+vioio(int isin, u16int port, u32int val, int sz, void *vp)
+{
+	VIODev *v;
+	int rc;
+	static char whinebuf[32];
+	
+	v = vp;
+	switch(isin << 16 | port){
+	case 0x4: v->guestfeat = val; return 0;
+	case 0x8: if(v->qsel < v->nqu) vioqaddrset(&v->qu[v->qsel], val); return 0;
+	case 0xe: v->qsel = val; return 0;
+	case 0x10: if(val < v->nqu) v->qu[val].notify(&v->qu[val]); return 0;
+	case 0x12: v->devstat = val; return 0;
+	case 0x10000: return v->devfeat;
+	case 0x10004: return v->guestfeat;
+	case 0x10008: return v->qsel >= v->nqu ? 0 : v->qu[v->qsel].addr;
+	case 0x1000c: return v->qsel >= v->nqu ? 0 : v->qu[v->qsel].size;
+	case 0x1000e: return v->qsel;
+	case 0x10010: return 0;
+	case 0x10012: return v->devstat;
+	case 0x10013: rc = v->isrstat; vioirq(v, 0); return rc;
+	}
+	if(port >= 20 && v->io != nil)
+		return v->io(isin, port - 20, val, sz, v);
+	snprint(whinebuf, sizeof(whinebuf), "virtio device %6x", v->pci->bdf);
+	return iowhine(isin, port, val, sz, whinebuf);
+}
+
+VIODev *
+mkviodev(u16int devid, u32int pciclass, u32int subid)
+{
+	VIODev *d;
+	
+	d = emalloc(sizeof(VIODev));
+	d->pci = mkpcidev(allocbdf(), devid << 16 | 0x1AF4, pciclass << 8, 1);
+	d->pci->subid = subid << 16;
+	mkpcibar(d->pci, 1, 256, vioio, d);
+	return d;
+}
+
+static void
+viowakeup(VIOQueue *q)
+{
+	qlock(q);
+	rwakeupall(q);
+	qunlock(q);
+}
+
+VIOQueue *
+mkvioqueue(VIODev *d, int sz, void (*fn)(VIOQueue*))
+{
+	VIOQueue *q;
+
+	assert(sz > 0 && sz <= 32768 && (sz & sz - 1) == 0 && fn != nil);
+	d->qu = realloc(d->qu, (d->nqu + 1) * sizeof(VIOQueue));
+	if(d->qu == nil)
+		sysfatal("realloc: %r");
+	q = d->qu + d->nqu++;
+	memset(q, 0, sizeof(VIOQueue));
+	q->Rendez.l = q;
+	q->size = sz;
+	q->d = d;
+	q->notify = fn;
+	return q;
+}
+
+int
+bloomhash(u8int *mac)
+{
+	int x;
+
+	x = mac[0];
+	x ^= mac[0] >> 6 ^ mac[1] << 2;
+	x ^= mac[1] >> 4 ^ mac[2] << 4;
+	x ^= mac[2] >> 2;
+	x ^= mac[3];
+	x ^= mac[3] >> 6 ^ mac[4] << 2;
+	x ^= mac[4] >> 4 ^ mac[5] << 4;
+	x ^= mac[5] >> 2;
+	return x & 63;
+}
+
+int
+viomacok(VIODev *d, u8int *mac)
+{
+	static u8int bcast[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+
+	if((d->net.flags & VNETPROMISC) != 0) return 1;
+	if((mac[0] & 1) == 0){
+		if((d->net.flags & (VNETNOUNI|VNETALLUNI)) != 0)
+			return (d->net.flags & VNETNOUNI) == 0;
+		if(memcmp(mac, d->net.mac, 6) == 0) return 1;
+		if(d->net.macbloom == 0) return 0;
+		return d->net.macbloom & 1ULL<<bloomhash(mac);
+	}else if(memcmp(mac, bcast, 6) == 0)
+		return (d->net.flags & VNETNOBCAST) == 0;
+	else{
+		if((d->net.flags & (VNETNOMULTI|VNETALLMULTI)) != 0)
+			return (d->net.flags & VNETNOMULTI) == 0;
+		if(d->net.multibloom == 0) return 0;
+		return d->net.multibloom & 1ULL<<bloomhash(mac);
+	}
+}
+
+void
+vionetrproc(void *vp)
+{
+	VIODev *v;
+	VIOQueue *q;
+	VIOBuf *vb;
+	uchar rxhead[10];
+	uchar rxbuf[1600];
+	int rc;
+	
+	threadsetname("vionetrproc");
+	v = vp;
+	q = &v->qu[0];
+	for(;;){
+		rc = read(v->net.readfd, rxbuf, sizeof(rxbuf));
+		if(rc == 0){
+			vmerror("read(vionetrproc): eof");
+			threadexits("read: eof");
+		}
+		if(rc < 0){
+			vmerror("read(vionetrproc): %r");
+			threadexits("read: %r");
+		}
+		if(rc < 14){
+			vmerror("vionetrproc: short packet received (len=%d)", rc);
+			continue;
+		}
+		if(!viomacok(v, rxbuf))
+			continue;
+		vb = viogetbuf(q, 1);
+		if(vb == nil){
+			vmerror("viogetbuf: %r");
+			continue;
+		}
+		vioqwrite(vb, rxhead, sizeof(rxhead));
+		vioqwrite(vb, rxbuf, rc);
+		vioputbuf(vb);
+	}
+}
+
+void
+vionetwproc(void *vp)
+{
+	VIODev *v;
+	VIOQueue *q;
+	VIOBuf *vb;
+	uchar txhead[10];
+	uchar txbuf[1600];
+	int rc, len;
+	
+	threadsetname("vionetwproc");
+	v = vp;
+	q = &v->qu[1];
+	for(;;){
+		vb = viogetbuf(q, 1);
+		if(vb == nil){
+			vmerror("viogetbuf: %r");
+			threadexits("viogetbuf: %r");
+		}
+		vioqread(vb, txhead, sizeof(txhead));
+		len = vioqread(vb, txbuf, sizeof(txbuf));
+		if(len == sizeof(txbuf)){
+			vmerror("virtio net: ignoring excessively long packet");
+			vioputbuf(vb);
+			continue;
+		}
+		if(len < 14){
+			vmerror("virtio net: ignoring short packet (length=%d)", len);
+			vioputbuf(vb);
+			continue;
+		}	
+		rc = write(v->net.writefd, txbuf, len);
+		vioputbuf(vb);
+		if(rc < len){
+			vmerror("write(vionetwproc): incomplete write");
+			continue;
+		}
+		if(rc < 0){
+			vmerror("write(vionetwproc): %r");
+			continue;
+		}
+	}
+}
+
+u32int
+vionetio(int isin, u16int port, u32int val, int sz, VIODev *v)
+{
+	switch(isin << 16 | port){
+	case 0x10000: case 0x10001: case 0x10002: case 0x10003:
+		return GET32(v->net.mac, 0) >> (port & 3) * 8;
+	case 0x10004: case 0x10005: case 0x10006: case 0x10007:
+		return (GET16(v->net.mac, 4) | 1 << 16) >> (port & 3) * 8;
+	}
+	return iowhine(isin, port, val, sz, "virtio net");
+}
+
+int
+vionettables(VIODev *d, VIOBuf *b)
+{
+	u8int buf[4];
+	u8int mac[6];
+	u64int bloom[2];
+	int i, l;
+	
+	bloom[0] = 0;
+	bloom[1] = 0;
+	for(i = 0; i < 2; i++){
+		if(vioqread(b, buf, 4) < 4)
+			return 1;
+		l = GET32(buf, 0);
+		while(l--){
+			if(vioqread(b, mac, 6) < 6)
+				return 1;
+			bloom[i] |= 1ULL<<bloomhash(mac);
+		}
+	}
+	d->net.macbloom = bloom[0];
+	d->net.multibloom = bloom[1];
+	return 0;
+}
+
+void
+vionetcmd(VIOQueue *q)
+{
+	VIODev *d;
+	VIOBuf *b;
+	u8int cmd[2], buf[6];
+	u8int ack;
+	int fl;
+
+	d = q->d;
+	for(; b = viogetbuf(q, 0), b != nil; vioputbuf(b)){
+		if(vioqread(b, cmd, 2) < 2){
+			ack = 1;
+			vioqwrite(b, &ack, 1);
+			continue;
+		}
+		ack = 0;
+		switch(cmd[0] << 8 | cmd[1]){
+		case 0x0000: fl = VNETPROMISC; goto flag;
+		case 0x0001: fl = VNETALLMULTI; goto flag;
+		case 0x0002: fl = VNETALLUNI; goto flag;
+		case 0x0003: fl = VNETNOMULTI; goto flag;
+		case 0x0004: fl = VNETNOUNI; goto flag;
+		case 0x0005: fl = VNETNOBCAST; goto flag;
+		flag:
+			if(vioqread(b, buf, 1) < 1) ack = 1;
+			else if(buf[0] == 1) d->net.flags |= fl;
+			else if(buf[0] == 0) d->net.flags &= ~fl;
+			else ack = 1;
+			break;
+		case 0x0100: /* MAC_TABLE_SET */
+			ack = vionettables(d, b);
+			break;
+		case 0x0101: /* MAC_ADDR_SET */
+			if(vioqread(b, buf, 6) < 6) ack = 1;
+			else memmove(d->net.mac, buf, 6);
+			break;
+		default:
+			ack = 1;
+		}
+		vioqwrite(b, &ack, 1);
+	}
+}
+
+int
+mkvionet(char *net)
+{
+	int fd, cfd;
+	VIODev *d;
+	int i;
+
+	fd = dial(netmkaddr("-1", net, nil), nil, nil, &cfd);
+	if(fd < 0) return -1;
+	if(cfd >= 0) fprint(cfd, "promiscuous");
+	d = mkviodev(0x1000, 0x020000, 1);
+	mkvioqueue(d, 1024, viowakeup);
+	mkvioqueue(d, 1024, viowakeup);
+	mkvioqueue(d, 32, vionetcmd);
+	for(i = 0; i < 6; i++)
+		d->net.mac[i] = rand();
+	d->net.mac[0] = d->net.mac[0] & ~1 | 2;
+	d->devfeat = 1<<5|1<<16|1<<17|1<<18|1<<20;
+	d->io = vionetio;
+	d->net.readfd = d->net.writefd = fd;
+	proccreate(vionetrproc, d, 8192);
+	proccreate(vionetwproc, d, 8192);
+	return 0;
+}
+
+u32int
+vioblkio(int isin, u16int port, u32int val, int sz, VIODev *v)
+{
+	switch(isin << 16 | port){
+	case 0x10000: case 0x10001: case 0x10002: case 0x10003:
+		return (u32int)v->blk.size >> (port & 3) * 8;
+	case 0x10004: case 0x10005: case 0x10006: case 0x10007:
+		return (u32int)(v->blk.size >> 32) >> (port & 3) * 8;
+	}
+	return iowhine(isin, port, val, sz, "virtio blk");
+}
+
+void
+vioblkproc(void *vp)
+{
+	VIODev *v;
+	VIOQueue *q;
+	VIOBuf *b;
+	u8int cmd[16];
+	u8int ack;
+	char buf[512];
+	uvlong addr;
+	int rc;
+	
+	threadsetname("vioblkproc");
+	v = vp;
+	q = &v->qu[0];
+	for(;;){
+		b = viogetbuf(q, 1);
+		if(b == nil){
+			vmerror("vioblkproc: viogetbuf: %r");
+			threadexits("vioblkproc: viogetbuf: %r");
+		}
+		ack = 0;
+		if(vioqread(b, cmd, sizeof(cmd)) < sizeof(cmd)) goto nope;
+		addr = GET64(cmd, 8);
+		switch(GET32(cmd, 0)){
+		case 0:
+			if(addr >> 55 != 0) rc = 0;
+			else rc = pread(v->blk.fd, buf, 512, addr << 9);
+			if(rc < 0) vmerror("pread(vioblkproc): %r");
+			if(rc < 512){
+				memset(buf, 0, 512);
+				ack = 1;
+			}
+			vioqwrite(b, buf, 512);
+			break;
+		case 1:
+			if(vioqread(b, buf, 512) < 512) rc = 0;
+			else if(addr >> 55 != 0) rc = 0;
+			else rc = pwrite(v->blk.fd, buf, 512, addr << 9);
+			if(rc < 0) vmerror("pwrite(vioblkproc): %r");
+			if(rc < 512) ack = 1;
+			break;
+		default:
+		nope:
+			ack = 2;
+		}
+		vioqwrite(b, &ack, 1);
+		vioputbuf(b);
+	}
+}
+
+int
+mkvioblk(char *fn)
+{
+	int fd;
+	VIODev *d;
+	
+	fd = open(fn, ORDWR);
+	if(fd < 0) return -1;
+	d = mkviodev(0x1000, 0x018000, 2);
+	mkvioqueue(d, 32, viowakeup);
+	d->io = vioblkio;
+	d->blk.fd = fd;
+	d->blk.size = seek(fd, 0, 2) >> 9;
+	proccreate(vioblkproc, d, 8192);
+	return 0;
+}
--- /dev/null
+++ b/sys/src/cmd/vmx/vmx.c
@@ -1,0 +1,555 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include <draw.h>
+#include "dat.h"
+#include "fns.h"
+
+Region *mmap;
+int ctlfd, regsfd, waitfd;
+Channel *waitch, *sleepch, *notifch;
+enum { MSEC = 1000*1000, MinSleep = MSEC, SleeperPoll = 2000*MSEC } ;
+int getexit, halt;
+typedef struct VmxNotif VmxNotif;
+struct VmxNotif {
+	void (*f)(void *);
+	void *arg;
+};
+
+int mainstacksize = 65536;
+
+void *
+emalloc(ulong sz)
+{
+	void *v;
+	
+	v = malloc(sz);
+	if(v == nil)
+		sysfatal("malloc: %r");
+	memset(v, 0, sz);
+	setmalloctag(v, getcallerpc(&sz));
+	return v;
+}
+
+void
+vmerror(char *fmt, ...)
+{
+	Fmt f;
+	char buf[256];
+	va_list arg;
+	
+	fmtfdinit(&f, 2, buf, sizeof buf);
+	va_start(arg, fmt);
+	fmtvprint(&f, fmt, arg);
+	va_end(arg);
+	fmtprint(&f, "\n");
+	fmtfdflush(&f);
+}
+
+int
+ctl(char *fmt, ...)
+{
+	va_list va;
+	int rc;
+	
+	va_start(va, fmt);
+	rc = vfprint(ctlfd, fmt, va);
+	va_end(va);
+	return rc;
+}
+
+static void
+vmxsetup(void)
+{
+	static int fd;
+	static char buf[128];
+	Region *r;
+	int rc;
+	
+	fd = open("#X/status", OREAD);
+	if(fd < 0) sysfatal("open: %r");
+	rc = read(fd, buf, sizeof(buf)-1);
+	if(rc < 0) sysfatal("read: %r");
+	close(fd);
+	buf[rc] = 0;
+
+	ctlfd = open("#X/ctl", ORDWR);
+	if(ctlfd < 0) sysfatal("open: %r");
+	if(strcmp(buf, "inactive\n") != 0)
+		if(ctl("quit") < 0)
+			sysfatal("ctl: %r");
+	if(ctl("init") < 0)
+		sysfatal("ctl: %r");
+	regsfd = open("#X/regs", ORDWR);
+	if(regsfd < 0) sysfatal("open: %r");
+	
+	fd = open("#X/map", OWRITE|OTRUNC);
+	if(fd < 0) sysfatal("open: %r");
+	for(r = mmap; r != nil; r = r->next)
+		if(r->segname != nil && fprint(fd, "rwx wb %#ullx %#ullx %s %#ullx\n", (uvlong)r->start, (uvlong)r->end, r->segname, r->segoff) < 0)
+			sysfatal("writing memory map: %r");
+	close(fd);
+	
+	waitfd = open("#X/wait", OREAD);
+	if(waitfd < 0) sysfatal("open: %r");
+}
+
+enum { RCENT = 256 };
+char *rcname[RCENT];
+uvlong rcval[RCENT];
+uvlong rcvalid[(RCENT+63)/64], rcdirty[(RCENT+63)/64];
+
+static int
+rclookup(char *n)
+{
+	int i;
+	
+	for(i = 0; i < RCENT; i++)
+		if(rcname[i] != nil && strcmp(n, rcname[i]) == 0)
+			return i;
+	return -1;
+}
+
+char *
+rcflush(int togo)
+{
+	int i, j;
+	static char buf[4096];
+	char *p, *e;
+	uvlong v;
+	
+	p = buf;
+	e = buf + sizeof(buf);
+	*p = 0;
+	for(i = 0; i < (RCENT+63)/64; i++){
+		if(v = rcdirty[i], v != 0){
+			for(j = 0; j < 64; j++)
+				if((v>>j & 1) != 0)
+					p = seprint(p, e, "%s%c%#ullx%c", rcname[i*64+j], togo?'=':' ', rcval[i*64+j], togo?';':'\n');
+			rcdirty[i] = 0;
+		}
+		rcvalid[i] = 0;
+	}
+	if(!togo && p != buf && write(regsfd, buf, p - buf) < p - buf)
+		sysfatal("rcflush: write: %r");
+	return p != buf ? buf : nil;
+}
+
+static void
+rcload(void)
+{
+	char buf[4096];
+	char *p, *q, *f[2];
+	int nf;
+	int i, rc;
+
+	rcflush(0);
+	rc = pread(regsfd, buf, sizeof(buf) - 1, 0);
+	if(rc < 0) sysfatal("rcload: pread: %r");
+	buf[rc] = 0;
+	p = buf;
+	for(i = 0; i < nelem(rcname); i++){
+		q = strchr(p, '\n');
+		if(q == nil) break;
+		*q = 0;
+		nf = tokenize(p, f, nelem(f));
+		p = q + 1;
+		if(nf < 2) break;
+		free(rcname[i]);
+		rcname[i] = strdup(f[0]);
+		rcval[i] = strtoull(f[1], nil, 0);
+		rcvalid[i>>6] |= 1ULL<<(i&63);
+	}
+	for(; i < nelem(rcname); i++){
+		free(rcname[i]);
+		rcname[i] = 0;
+		rcvalid[i>>6] &= ~(1ULL<<(i&63));
+	}
+}
+
+uvlong
+rget(char *reg)
+{
+	int i;
+
+	i = rclookup(reg);
+	if(i < 0 || (rcvalid[i>>6]>>i&1) == 0){
+		rcload();
+		i = rclookup(reg);
+		if(i < 0) sysfatal("unknown register %s", reg);
+	}
+	return rcval[i];
+}
+
+void
+rpoke(char *reg, uvlong val, int clean)
+{
+	int i;
+
+	i = rclookup(reg);
+	if(i >= 0){
+		if((rcvalid[i>>6]>>(i&63)&1) != 0 && rcval[i] == val) return;
+		goto goti;
+	}
+	for(i = 0; i < nelem(rcname); i++)
+		if(rcname[i] == nil){
+			rcname[i] = strdup(reg);
+			break;
+		}
+	assert(i < nelem(rcname));
+goti:
+	rcval[i] = val;
+	rcvalid[i>>6] |= 1ULL<<(i&63);
+	if(!clean)
+		rcdirty[i>>6] |= 1ULL<<(i&63);
+}
+
+Region *
+mkregion(u64int pa, u64int len, int type)
+{
+	Region *r, **rp;
+	
+	assert(pa + len >= pa);
+	r = emalloc(sizeof(Region));
+	if((pa & BY2PG-1) != 0) sysfatal("address %p not page aligned", (void*)pa);
+	r->start = pa;
+	len = -(-len & -BY2PG);
+	r->end = pa + len;
+	r->type = type;
+	for(rp = &mmap; *rp != nil; rp = &(*rp)->next)
+		;
+	*rp = r;
+	return r;
+}
+
+void *
+gptr(u64int addr, u64int len)
+{
+	Region *r;
+
+	if(addr + len < addr)
+		return nil;
+	for(r = mmap; r != nil; r = r->next)
+		if(addr >= r->start && addr < r->end){
+			if(addr + len > r->end)
+				return nil;
+			return (uchar *) r->v + (addr - r->start);
+		}
+	return nil;
+}
+
+uintptr
+gpa(void *v)
+{
+	Region *r;
+
+	for(r = mmap; r != nil; r = r->next)
+		if(v >= r->v && v < r->ve)
+			return (uchar *) v - (uchar *) r->v;
+	return -1;
+}
+
+uintptr
+gavail(void *v)
+{
+	Region *r;
+	
+	for(r = mmap; r != nil; r = r->next)
+		if(v >= r->v && v < r->ve)
+			return (uchar *) r->ve - (uchar *) v;
+	return 0;
+}
+
+void *
+gend(void *v)
+{
+	return (u8int *) v + gavail(v);
+}
+
+void *tmp;
+uvlong tmpoff;
+
+static void
+mksegment(char *sn)
+{
+	uintptr sz;
+	int fd;
+	Region *r;
+	char buf[256];
+	u8int *gmem, *p;
+
+	sz = BY2PG;
+	for(r = mmap; r != nil; r = r->next){
+		switch(r->type){
+		case REGMEM: case REGFB: break;
+		default: continue;
+		}
+		r->segname = sn;
+		if(sz + (r->end - r->start) < sz)
+			sysfatal("out of address space");
+		sz += r->end - r->start;
+	}
+	gmem = segattach(0, sn, nil, sz);
+	if(gmem == (void*)-1){
+		snprint(buf, sizeof(buf), "#g/%s", sn);
+		fd = create(buf, OREAD, DMDIR | 0777);
+		if(fd < 0) sysfatal("create: %r");
+		close(fd);
+		snprint(buf, sizeof(buf), "#g/%s/ctl", sn);
+		fd = open(buf, OWRITE|OTRUNC);
+		if(fd < 0) sysfatal("open: %r");
+		snprint(buf, sizeof(buf), "va %#ullx %#ullx fixed", 0x10000000ULL, (uvlong)sz);
+		if(write(fd, buf, strlen(buf)) < 0) sysfatal("write: %r");
+		close(fd);
+		gmem = segattach(0, sn, nil, sz);
+		if(gmem == (void*)-1) sysfatal("segattach: %r");
+	}
+	memset(gmem, 0, sz);
+	p = gmem;
+	for(r = mmap; r != nil; r = r->next){
+		if(r->segname == nil) continue;
+		r->segoff = p - gmem;
+		r->v = p;
+		p += r->end - r->start;
+		r->ve = p;
+	}
+	tmp = p;
+	tmpoff = p - gmem;
+}
+
+void
+postexc(char *name, u32int)
+{
+	if(ctl("exc %s", name) < 0)
+		sysfatal("ctl(postexc): %r");
+}
+
+void
+launch(void)
+{
+	char *s;
+
+	s = rcflush(1);
+	if(ctl("go %s", s == nil ? "" : s) < 0)
+		sysfatal("go: %r");
+	getexit++;
+}
+
+static void
+waitproc(void *)
+{
+	static char buf[512];
+	char *p;
+	int rc;
+
+	threadsetname("waitexit");
+	for(;;){
+		rc = read(waitfd, buf, sizeof(buf) - 1);
+		if(rc < 0)
+			sysfatal("read: %r");
+		buf[rc] = 0;
+		p = strchr(buf, '\n');
+		if(p != nil) *p = 0;
+		sendp(waitch, strdup(buf));
+	}
+}
+
+vlong timerevent = -1;
+Lock timerlock;
+int timerid;
+
+static void
+sleeperproc(void *)
+{
+	vlong then, now;
+
+	timerid = threadid();
+	timerevent = nsec() + SleeperPoll;
+	unlock(&timerlock);
+	threadsetname("sleeper");
+	for(;;){
+		lock(&timerlock);
+		then = timerevent;
+		now = nsec();
+		if(then <= now) timerevent = now + SleeperPoll;
+		unlock(&timerlock);
+		if(then - now >= MinSleep){
+			sleep((then - now) / MSEC);
+			continue;
+		}
+		while(nsec() < then)
+			;
+		sendul(sleepch, 0);
+	}
+}
+
+static void
+runloop(void)
+{
+	char *waitmsg;
+	ulong ul;
+	VmxNotif notif;
+
+	lock(&timerlock);
+	proccreate(waitproc, nil, 4096);
+	proccreate(sleeperproc, nil, 4096);
+	launch();
+	for(;;){
+		enum {
+			WAIT,
+			SLEEP,
+			NOTIF,
+		};
+		Alt a[] = {
+			[WAIT] {waitch, &waitmsg, CHANRCV},
+			[SLEEP] {sleepch, &ul, CHANRCV},
+			[NOTIF] {notifch, &notif, CHANRCV},
+			{nil, nil, CHANEND}
+		};
+		switch(alt(a)){
+		case WAIT:
+			getexit--;
+			processexit(waitmsg);
+			free(waitmsg);
+			break;
+		case SLEEP:
+			pitadvance();
+			break;
+		case NOTIF:
+			notif.f(notif.arg);
+			break;
+		}
+		if(getexit == 0 && halt == 0)
+			launch();
+	}
+}
+
+static int mainid;
+
+void
+sendnotif(void (*f)(void *), void *arg)
+{
+	VmxNotif notif = {f, arg};
+	
+	if(threadid() == mainid)
+		f(arg);
+	else
+		send(notifch, &notif);
+}
+
+extern void vgainit(void);
+extern void pciinit(void);
+extern void pcibusmap(void);
+extern void cpuidinit(void);
+extern void vgafbparse(char *);
+
+int cmdlinen;
+char **cmdlinev;
+int bootmodn;
+char **bootmod;
+
+static uvlong
+siparse(char *s)
+{
+	uvlong l;
+	char *p;
+	
+	l = strtoull(s, &p, 0);
+	switch(*p){
+	case 'k': case 'K': p++; l *= 1<<10; break;
+	case 'm': case 'M': p++; l *= 1<<20; break;
+	case 'g': case 'G': p++; l *= 1<<30; break;
+	}
+	if(*p != 0) sysfatal("invalid argument: %s", s);
+	return l;
+}
+
+static void
+usage(void)
+{
+	char *blanks, *p;
+	
+	blanks = strdup(argv0);
+	for(p = blanks; *p != 0; p++)
+		*p = ' ';
+	fprint(2, "usage: %s [ -M mem ] [ -c com1rd[,com1wr] ] [ -C com2rd[,com2r] ] [ -n nic ]\n", argv0);
+	fprint(2, "       %s [ -d blockfile ] [ -m module ] [ -v vga ] kernel [ args ... ]\n", blanks);
+	threadexitsall("usage");
+}
+
+void
+threadmain(int argc, char **argv)
+{
+	static int (*edev[16])(char *);
+	static char *edevt[nelem(edev)];
+	static char *edevaux[nelem(edev)];
+	static int edevn;
+	static uvlong gmemsz = 64*1024*1024;
+	extern uintptr fbsz, fbaddr;
+	extern int textmode;
+	int i;
+
+	quotefmtinstall();
+	mainid = threadid();
+	cpuidinit();
+	waitch = chancreate(sizeof(char *), 32);
+	sleepch = chancreate(sizeof(ulong), 32);
+	notifch = chancreate(sizeof(VmxNotif), 16);
+	
+	ARGBEGIN {
+	case 'm':
+		bootmod = realloc(bootmod, (bootmodn + 1) * sizeof(char *));
+		bootmod[bootmodn++] = strdup(EARGF(usage()));
+		break;
+	case 'c':
+		uartinit(0, EARGF(usage()));
+		break;
+	case 'C':
+		uartinit(1, EARGF(usage()));
+		break;
+	case 'n':
+		assert(edevn < nelem(edev));
+		edev[edevn] = mkvionet;
+		edevt[edevn] = "virtio network";
+		edevaux[edevn++] = strdup(EARGF(usage()));
+		break;
+	case 'd':
+		assert(edevn < nelem(edev));
+		edev[edevn] = mkvioblk;
+		edevt[edevn] = "virtio block";
+		edevaux[edevn++] = strdup(EARGF(usage()));
+		break;
+	case 'M':
+		gmemsz = siparse(EARGF(usage()));
+		if(gmemsz != (uintptr) gmemsz) sysfatal("too much memory for address space");
+		break;
+	case 'v':
+		vgafbparse(EARGF(usage()));
+		break;
+	default:
+		usage();
+	} ARGEND;
+	if(argc < 1) usage();
+	cmdlinen = argc - 1;
+	cmdlinev = argv + 1;
+	
+	mkregion(0, gmemsz, REGMEM);
+	if(fbsz != 0 && textmode == 0){
+		if(fbaddr + fbsz < fbaddr) sysfatal("invalid fb address");
+		if(fbaddr + fbsz < gmemsz) sysfatal("framebuffer overlaps with physical memory");
+		mkregion(fbaddr, fbsz, REGFB);
+	}
+	mksegment("vm");
+	vmxsetup();
+	loadkernel(argv[0]);
+	pciinit();
+
+	vgainit();
+	for(i = 0; i < edevn; i++)
+		if(edev[i](edevaux[i]) < 0)
+			sysfatal("%s: %r", edevt[i]);
+
+	pcibusmap();
+	runloop();
+	exits(nil);
+}