shithub: riscv

Download patch

ref: becb89bae53e01785bfd843478737eeb77a9419f
parent: 9fb2001658fb59fa0c7437f856491ef5f4214ebe
author: aiju <devnull@localhost>
date: Tue Jun 20 11:10:08 EDT 2017

pc, pc64: adapt devvmx to work on pc64

--- a/sys/src/9/pc/dat.h
+++ b/sys/src/9/pc/dat.h
@@ -332,6 +332,18 @@
 	Rdrnd	= 1<<30,	/* RDRAND support bit */
 };
 
+/* model-specific registers, for compatibility with pc64 code */
+enum {
+	Efer		= 0xc0000080,		/* Extended Feature Enable */
+	Star		= 0xc0000081,		/* Legacy Target IP and [CS]S */
+	Lstar		= 0xc0000082,		/* Long Mode Target IP */
+	Cstar		= 0xc0000083,		/* Compatibility Target IP */
+	Sfmask		= 0xc0000084,		/* SYSCALL Flags Mask */
+	FSbase		= 0xc0000100,		/* 64-bit FS Base Address */
+	GSbase		= 0xc0000101,		/* 64-bit GS Base Address */
+	KernelGSbase	= 0xc0000102,		/* SWAPGS instruction */
+};
+
 /*
  *  a parsed plan9.ini line
  */
--- a/sys/src/9/pc/devvmx.c
+++ b/sys/src/9/pc/devvmx.c
@@ -55,6 +55,7 @@
 	PROCB_EXITMOVDR = 1<<23,
 	PROCB_EXITIO = 1<<24,
 	PROCB_MONTRAP = 1<<27,
+	PROCB_MSRBITMAP = 1<<28,
 	PROCB_EXITMONITOR = 1<<29,
 	PROCB_EXITPAUSE = 1<<30,
 	PROCB_USECTLS2 = 1<<31,
@@ -69,6 +70,7 @@
 	PFAULT_MASK = 0x4006,
 	PFAULT_MATCH = 0x4008,
 	CR3_TARGCNT = 0x400a,
+	MSR_BITMAP = 0x2004,
 	
 	VMEXIT_CTLS = 0x400c,
 	VMEXIT_ST_DEBUG = 1<<2,
@@ -81,6 +83,9 @@
 	
 	VMEXIT_MSRSTCNT = 0x400e,
 	VMEXIT_MSRLDCNT = 0x4010,
+	VMEXIT_MSRSTADDR = 0x2006,
+	VMEXIT_MSRLDADDR = 0x2008,
+	VMENTRY_MSRLDADDR = 0x200A,
 	
 	VMENTRY_CTLS = 0x4012,
 	VMENTRY_LD_DEBUG = 1<<2,
@@ -204,10 +209,14 @@
 	CR4SMXE = 1<<14,
 	CR4PKE = 1<<22,
 	
-	CR0KERNEL = CR0RSVD | (uintptr)0xFFFFFFFF00000000ULL,
+	CR0KERNEL = CR0RSVD | 0x30 | (uintptr)0xFFFFFFFF00000000ULL,
 	CR4KERNEL = CR4RSVD | CR4VMXE | CR4SMXE | CR4MCE | CR4PKE | (uintptr)0xFFFFFFFF00000000ULL
 };
 
+enum {
+	MAXMSR = 512,
+};
+
 typedef struct Vmx Vmx;
 typedef struct VmCmd VmCmd;
 typedef struct VmMem VmMem;
@@ -237,9 +246,11 @@
 	} state;
 	char errstr[ERRMAX];
 	Ureg ureg;
+	uintptr cr2;
 	uintptr dr[8]; /* DR7 is also kept in VMCS */
 	FPsave *fp;
 	u8int launched;
+	u8int on;
 	u8int vpid;
 	enum {
 		FLUSHVPID = 1,
@@ -264,6 +275,10 @@
 	} got;
 	VmMem *stepmap;
 	VmIntr exc, irq, irqack;
+	
+	u64int *msrhost, *msrguest;
+	u32int *msrbits;
+	int nmsr;
 };
 
 struct VmCmd {
@@ -330,14 +345,13 @@
 }
 
 static uvlong
-parseval(char *s, int sz)
+parseval(char *s)
 {
 	uvlong v;
 	char *p;
-	
-	if(sz == 0) sz = sizeof(uintptr);
+
 	v = strtoull(s, &p, 0);
-	if(p == s || *p != 0 || v >> sz * 8 != 0) error("invalid value");
+	if(p == s || *p != 0) error("invalid value");
 	return v;
 }
 
@@ -363,13 +377,30 @@
 	return seprint(p, e, "%#.*ullx", sizeof(uintptr) * 2, guest & ~mask | shadow & mask);
 }
 
+static void
+updatelma(void)
+{
+	uvlong cr0, efer, nefer, ectrl;
+
+	if(sizeof(uintptr) != 8) return;
+	cr0 = vmcsread(GUEST_CR0);
+	efer = vmcsread(GUEST_IA32_EFER);
+	nefer = efer & ~0x400 | efer << 2 & cr0 >> 21 & 0x400;
+	if(efer == nefer) return;
+	vmcswrite(GUEST_IA32_EFER, nefer);
+	ectrl = vmcsread(VMENTRY_CTLS);
+	ectrl = ectrl & ~0x200 | nefer >> 1 & 0x200;
+	vmcswrite(VMENTRY_CTLS, ectrl);
+}
+
 static int
 cr0realwrite(char *s)
 {
 	uvlong v;
 	
-	v = parseval(s, 8);
+	v = parseval(s);
 	vmcswrite(GUEST_CR0, vmcsread(GUEST_CR0) & CR0KERNEL | v & ~CR0KERNEL);
+	updatelma();
 	return 0;
 }
 
@@ -378,17 +409,28 @@
 {
 	uvlong v;
 	
-	v = parseval(s, 8);
-	vmcswrite(GUEST_CR0MASK, vmcsread(GUEST_CR0MASK) | CR0KERNEL);
+	v = parseval(s);
+	vmcswrite(GUEST_CR0MASK, v | CR0KERNEL);
 	return 0;
 }
 
 static int
+eferwrite(char *s)
+{
+	uvlong v;
+	
+	v = parseval(s);
+	vmcswrite(GUEST_IA32_EFER, v);
+	updatelma();
+	return 0;
+}
+
+static int
 cr4realwrite(char *s)
 {
 	uvlong v;
 	
-	v = parseval(s, 8);
+	v = parseval(s);
 	vmcswrite(GUEST_CR4, vmcsread(GUEST_CR4) & CR4KERNEL | v & ~CR4KERNEL);
 	return 0;
 }
@@ -398,8 +440,8 @@
 {
 	uvlong v;
 	
-	v = parseval(s, 8);
-	vmcswrite(GUEST_CR4MASK, vmcsread(GUEST_CR4MASK) | CR4KERNEL);
+	v = parseval(s);
+	vmcswrite(GUEST_CR4MASK, v | CR4KERNEL);
 	return 0;
 }
 
@@ -408,7 +450,7 @@
 {
 	uvlong v;
 	
-	v = (u32int) parseval(s, 8);
+	v = (u32int) parseval(s);
 	vmcswrite(GUEST_DR7, vmx.dr[7] = (u32int) v);
 	return 0;
 }
@@ -424,7 +466,7 @@
 {
 	uvlong v;
 	
-	v = parseval(s, 8);
+	v = parseval(s);
 	vmx.dr[6] = (u32int) v;
 	return 0;
 }
@@ -450,6 +492,16 @@
 	{UREG(bp), 0, "bp"},
 	{UREG(si), 0, "si"},
 	{UREG(di), 0, "di"},
+#ifdef RMACH
+	{UREG(r8), 0, "r8"},
+	{UREG(r9), 0, "r9"},
+	{UREG(r10), 0, "r10"},
+	{UREG(r11), 0, "r11"},
+	{UREG(r12), 0, "r12"},
+	{UREG(r13), 0, "r13"},
+	{UREG(r14), 0, "r14"},
+	{UREG(r15), 0, "r15"},
+#endif
 	{GUEST_GDTRBASE, 0, "gdtrbase"},
 	{GUEST_GDTRLIMIT, 4, "gdtrlimit"},
 	{GUEST_IDTRBASE, 0, "idtrbase"},
@@ -489,13 +541,13 @@
 	{GUEST_CR0, 0, "cr0real", nil, cr0realwrite},
 	{GUEST_CR0SHADOW, 0, "cr0fake", cr0fakeread},
 	{GUEST_CR0MASK, 0, "cr0mask", nil, cr0maskwrite},
-	{UREG(trap), 0, "cr2"},
+	{VMXVAR(cr2), 0, "cr2"},
 	{GUEST_CR3, 0, "cr3"},
 	{GUEST_CR4, 0, "cr4real", nil, cr4realwrite},
 	{GUEST_CR4SHADOW, 0, "cr4fake", cr4fakeread},
 	{GUEST_CR4MASK, 0, "cr4mask", nil, cr4maskwrite},
 	{GUEST_IA32_PAT, 8, "pat"},
-	{GUEST_IA32_EFER, 8, "efer"},
+	{GUEST_IA32_EFER, 8, "efer", nil, eferwrite},
 	{VMXVAR(dr[0]), 0, "dr0"},
 	{VMXVAR(dr[1]), 0, "dr1"},
 	{VMXVAR(dr[2]), 0, "dr2"},
@@ -529,6 +581,7 @@
 	int i;
 	
 	tab = vmx.pml4;
+	if(tab == nil) error(Egreg);
 	for(i = 3; i >= 1; i--){
 		tab += addr >> 12 + 9 * i & 0x1ff;
 		v = *tab;
@@ -550,6 +603,7 @@
 	int i;
 	uvlong v, *t;
 	
+	if(tab == nil) error(Egreg);
 	if(level < 3){
 		for(i = 0; i < 512; i++){
 			v = tab[i];
@@ -611,7 +665,7 @@
 cmdclearmeminfo(VmCmd *, va_list)
 {
 	VmMem *mp, *mn;
-	
+
 	eptfree(vmx.pml4, 0);
 	for(mp = vmx.mem.next; mp != &vmx.mem; mp = mn){
 		mn = mp->next;
@@ -633,7 +687,9 @@
 	char *f[10];
 	VmMem *mp;
 	int rc;
-	
+
+	if(vmx.pml4 == nil)
+		error(Egreg);	
 	p0 = va_arg(va, char *);
 	p = p0;
 	mp = nil;
@@ -725,11 +781,49 @@
 static void
 vmxshutdown(void)
 {
-	if(vmx.state != NOVMX && vmx.state != VMXINACTIVE)
+	if(vmx.on){
 		vmxoff();
+		vmx.on = 0;
+	}
 }
 
 static void
+vmxaddmsr(u32int msr, u64int gval)
+{
+	int i;
+
+	if(vmx.nmsr >= MAXMSR)
+		error("too many MSRs");
+	i = 2 * vmx.nmsr++;
+	vmx.msrhost[i] = msr;
+	rdmsr(msr, (vlong *) &vmx.msrhost[i+1]);
+	vmx.msrguest[i] = msr;
+	vmx.msrguest[i+1] = gval;
+	vmcswrite(VMENTRY_MSRLDCNT, vmx.nmsr);
+	vmcswrite(VMEXIT_MSRSTCNT, vmx.nmsr);
+	vmcswrite(VMEXIT_MSRLDCNT, vmx.nmsr);
+}
+
+static void
+vmxtrapmsr(u32int msr, enum { TRAPRD = 1, TRAPWR = 2 } state)
+{
+	u32int m;
+	
+	if(msr >= 0x2000 && (u32int)(msr - 0xc0000000) >= 0x2000)
+		return;
+	msr = msr & 0x1fff | msr >> 18 & 0x2000;
+	m = 1<<(msr & 31);
+	if((state & TRAPRD) != 0)
+		vmx.msrbits[msr / 32] |= m;
+	else
+		vmx.msrbits[msr / 32] &= ~m;
+	if((state & TRAPWR) != 0)
+		vmx.msrbits[msr / 32 + 512] |= m;
+	else
+		vmx.msrbits[msr / 32 + 512] &= ~m;
+}
+
+static void
 vmcsinit(void)
 {
 	vlong msr;
@@ -737,7 +831,7 @@
 	
 	memset(&vmx.ureg, 0, sizeof(vmx.ureg));
 	vmx.launched = 0;
-	vmx.onentry = 0;
+	vmx.onentry = 0;	
 	
 	if(rdmsr(VMX_BASIC_MSR, &msr) < 0) error("rdmsr(VMX_BASIC_MSR) failed");
 	if((msr & 1ULL<<55) != 0){
@@ -757,7 +851,7 @@
 	if(rdmsr(VMX_PROCB_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_PROCB_CTLS_MSR failed");
 	x = (u32int)procb_ctls | 1<<1 | 7<<4 | 1<<8 | 1<<13 | 1<<14 | 1<<26; /* currently reserved default1 bits */
 	x |= PROCB_EXITHLT | PROCB_EXITMWAIT;
-	x |= PROCB_EXITMOVDR | PROCB_EXITIO | PROCB_EXITMONITOR;
+	x |= PROCB_EXITMOVDR | PROCB_EXITIO | PROCB_EXITMONITOR | PROCB_MSRBITMAP;
 	x |= PROCB_USECTLS2;
 	x &= msr >> 32;
 	vmcswrite(PROCB_CTLS, x);
@@ -770,21 +864,17 @@
 	if(rdmsr(VMX_VMEXIT_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_VMEXIT_CTLS_MSR failed");
 	x = (u32int)msr;
 	if(sizeof(uintptr) == 8) x |= VMEXIT_HOST64;
-	x |= VMEXIT_LD_IA32_PAT | VMEXIT_LD_IA32_EFER | VMEXIT_ST_DEBUG;
+	x |= VMEXIT_LD_IA32_PAT | VMEXIT_LD_IA32_EFER | VMEXIT_ST_DEBUG | VMEXIT_ST_IA32_EFER;
 	x &= msr >> 32;
 	vmcswrite(VMEXIT_CTLS, x);
 	
 	if(rdmsr(VMX_VMENTRY_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_VMENTRY_CTLS_MSR failed");
 	x = (u32int)msr;
-	if(sizeof(uintptr) == 8) x |= VMENTRY_GUEST64;
 	x |= VMENTRY_LD_IA32_PAT | VMENTRY_LD_IA32_EFER | VMENTRY_LD_DEBUG;
 	x &= msr >> 32;
 	vmcswrite(VMENTRY_CTLS, x);
 	
 	vmcswrite(CR3_TARGCNT, 0);
-	vmcswrite(VMEXIT_MSRLDCNT, 0);
-	vmcswrite(VMEXIT_MSRSTCNT, 0);
-	vmcswrite(VMENTRY_MSRLDCNT, 0);
 	vmcswrite(VMENTRY_INTRINFO, 0);
 	vmcswrite(VMCS_LINK, -1);
 	
@@ -798,9 +888,9 @@
 	vmcswrite(HOST_CR0, getcr0() & ~0xe);
 	vmcswrite(HOST_CR3, getcr3());
 	vmcswrite(HOST_CR4, getcr4());
-	rdmsr(0xc0000100, &msr);
+	rdmsr(FSbase, &msr);
 	vmcswrite(HOST_FSBASE, msr);
-	rdmsr(0xc0000101, &msr);
+	rdmsr(GSbase, &msr);
 	vmcswrite(HOST_GSBASE, msr);
 	vmcswrite(HOST_TRBASE, (uintptr) m->tss);
 	vmcswrite(HOST_GDTR, (uintptr) m->gdt);
@@ -807,7 +897,7 @@
 	vmcswrite(HOST_IDTR, IDTADDR);
 	if(rdmsr(0x277, &msr) < 0) error("rdmsr(IA32_PAT) failed");
 	vmcswrite(HOST_IA32_PAT, msr);
-	if(rdmsr(0xc0000080, &msr) < 0) error("rdmsr(IA32_EFER) failed");
+	if(rdmsr(Efer, &msr) < 0) error("rdmsr(IA32_EFER) failed");
 	vmcswrite(HOST_IA32_EFER, msr);
 	
 	vmcswrite(EXC_BITMAP, 1<<18|1<<1);
@@ -836,16 +926,16 @@
 
 	vmcswrite(GUEST_CR0MASK, CR0KERNEL);
 	vmcswrite(GUEST_CR4MASK, CR4KERNEL);
-	vmcswrite(GUEST_CR0, getcr0() & ~(1<<31));
+	vmcswrite(GUEST_CR0, getcr0() & CR0KERNEL | 0x31);
 	vmcswrite(GUEST_CR3, 0);
-	vmcswrite(GUEST_CR4, getcr4());
-	vmcswrite(GUEST_CR0SHADOW, getcr0());
-	vmcswrite(GUEST_CR4SHADOW, getcr4() & ~CR4VMXE);
+	vmcswrite(GUEST_CR4, getcr4() & CR4KERNEL);
+	vmcswrite(GUEST_CR0SHADOW, getcr0() & CR0KERNEL | 0x31);
+	vmcswrite(GUEST_CR4SHADOW, getcr4() & ~CR4VMXE & CR4KERNEL);
 	
 	vmcswrite(GUEST_IA32_PAT, 0x0007040600070406ULL);
 	vmcswrite(GUEST_IA32_EFER, 0);
 	
-	vmcswrite(GUEST_TRBASE, (uintptr) m->tss);
+	vmcswrite(GUEST_TRBASE, 0);
 	vmcswrite(GUEST_TRLIMIT, 0xffff);
 	vmcswrite(GUEST_TRPERM, (SEGTSS|SEGPL(0)|SEGP) >> 8 | 2);
 	
@@ -864,6 +954,33 @@
 		error(Enomem);
 	fpinit();
 	fpsave(vmx.fp);
+	
+	vmx.msrhost = mallocalign(MAXMSR*16, 16, 0, 0);
+	vmx.msrguest = mallocalign(MAXMSR*16, 16, 0, 0);
+	vmx.msrbits = mallocalign(4096, 4096, 0, 0);
+	if(vmx.msrhost == nil || vmx.msrguest == nil || vmx.msrbits == nil)
+		error(Enomem);
+	memset(vmx.msrbits, -1, 4096);
+	vmxtrapmsr(Efer, 0);
+	vmcswrite(VMENTRY_MSRLDADDR, PADDR(vmx.msrguest));
+	vmcswrite(VMEXIT_MSRSTADDR, PADDR(vmx.msrguest));
+	vmcswrite(VMEXIT_MSRLDADDR, PADDR(vmx.msrhost));
+	vmcswrite(MSR_BITMAP, PADDR(vmx.msrbits));
+	
+	if(sizeof(uintptr) == 8){
+		vmxaddmsr(Star, 0);
+		vmxaddmsr(Lstar, 0);
+		vmxaddmsr(Cstar, 0);
+		vmxaddmsr(Sfmask, 0);
+		vmxaddmsr(KernelGSbase, 0);
+		vmxtrapmsr(Star, 0);
+		vmxtrapmsr(Lstar, 0);
+		vmxtrapmsr(Cstar, 0);
+		vmxtrapmsr(Sfmask, 0);
+		vmxtrapmsr(FSbase, 0);
+		vmxtrapmsr(GSbase, 0);
+		vmxtrapmsr(KernelGSbase, 0);
+	}
 }
 
 static void
@@ -870,9 +987,20 @@
 vmxstart(void)
 {
 	static uchar *vmcs; /* also vmxon region */
+	vlong msr, msr2;
+	uintptr cr;
 	vlong x;
 
-	putcr4(getcr4() | 0x2000);
+	putcr4(getcr4() | 0x2000); /* set VMXE */
+	putcr0(getcr0() | 0x20); /* set NE */
+	cr = getcr0();
+	if(rdmsr(VMX_CR0_FIXED0, &msr) < 0) error("rdmsr(VMX_CR0_FIXED0) failed");
+	if(rdmsr(VMX_CR0_FIXED1, &msr2) < 0) error("rdmsr(VMX_CR0_FIXED1) failed");
+	if((cr & ~msr & ~msr2 | ~cr & msr & msr2) != 0) error("invalid CR0 value");
+	cr = getcr4();
+	if(rdmsr(VMX_CR4_FIXED0, &msr) < 0) error("rdmsr(VMX_CR4_FIXED0) failed");
+	if(rdmsr(VMX_CR4_FIXED1, &msr2) < 0) error("rdmsr(VMX_CR4_FIXED1) failed");
+	if((cr & ~msr & ~msr2 | ~cr & msr & msr2) != 0) error("invalid CR4 value");
 
 	if(vmcs == nil){
 		vmcs = mallocalign(8192, 4096, 0, 0);
@@ -885,6 +1013,7 @@
 	*(ulong*)&vmcs[4096] = x;
 	if(vmxon(PADDR(vmcs + 4096)) < 0)
 		error("vmxon failed");
+	vmx.on = 1;
 	if(vmclear(PADDR(vmcs)) < 0)
 		error("vmclear failed");
 	if(vmptrld(PADDR(vmcs)) < 0)
@@ -931,16 +1060,25 @@
 cmdquit(VmCmd *p, va_list va)
 {
 	vmx.state = VMXENDING;
-	cmdclearmeminfo(p, va);
 	killcmds(p);
 
-	free(vmx.pml4);
-	vmx.pml4 = nil;
+	if(vmx.pml4 != nil){
+		cmdclearmeminfo(p, va);
+		free(vmx.pml4);
+		vmx.pml4 = nil;
+	}
 	vmx.got = 0;
 	vmx.onentry = 0;
 	vmx.stepmap = nil;
+	
+	free(vmx.msrhost);
+	free(vmx.msrguest);
+	vmx.msrhost = nil;
+	vmx.msrguest = nil;
+	vmx.nmsr = 0;
 
-	vmxoff();
+	if(vmx.on)
+		vmxoff();
 	vmx.state = VMXINACTIVE;
 	cmdrelease(p, 0);
 	pexit(Equit, 1);
@@ -973,7 +1111,7 @@
 			break;
 		}
 	if((vmx.onentry & STEP) != 0){
-		iprint("VMX: exit reason %#x when expected step...\n", reason & 0xffff);
+		print("VMX: exit reason %#x when expected step...\n", reason & 0xffff);
 		vmx.onentry &= ~STEP;
 		vmx.got |= GOTSTEP|GOTSTEPERR;
 	}
@@ -1042,7 +1180,7 @@
 		val = strtoull(f[1], &rp, 0);
 		sz = r->size;
 		if(sz == 0) sz = sizeof(uintptr);
-		if(rp == f[1] || *rp != 0 || val >> 8 * sz != 0) error("invalid value");
+		if(rp == f[1] || *rp != 0) error("invalid value");
 		if(r->offset >= 0)
 			vmcswrite(r->offset, val);
 		else{
@@ -1192,7 +1330,7 @@
 		if((vmx.got & GOTSTEP) != 0 || (vmx.onentry & STEP) != 0)
 			error(Einuse);
 		if(vmx.state != VMXREADY){
-			iprint("pre-step in state %s\n", statenames[vmx.state]);
+			print("pre-step in state %s\n", statenames[vmx.state]);
 			error("not ready");
 		}
 		vmx.stepmap = va_arg(va, VmMem *);
@@ -1202,7 +1340,7 @@
 		return 1;
 	case 1:
 		if(vmx.state != VMXREADY){
-			iprint("post-step in state %s\n", statenames[vmx.state]);
+			print("post-step in state %s\n", statenames[vmx.state]);
 			vmx.onentry &= ~STEP;
 			vmx.got &= ~(GOTSTEP|GOTSTEPERR);
 			error("not ready");
@@ -1404,6 +1542,7 @@
 {
 	int init, rc, x;
 	u32int procbctls, defprocbctls;
+	vlong v;
 
 	procwired(up, 0);
 	sched();
@@ -1462,11 +1601,17 @@
 			vmx.got &= ~GOTEXIT;
 			
 			x = splhi();
+			if(sizeof(uintptr) == 8){
+				rdmsr(FSbase, &v);
+				vmwrite(HOST_FSBASE, v);
+			}
 			if((vmx.dr[7] & ~0xd400) != 0)
 				putdr01236(vmx.dr);
-			fpsserestore0(vmx.fp);
+			fpsserestore(vmx.fp);
+			putcr2(vmx.cr2);
 			rc = vmlaunch(&vmx.ureg, vmx.launched);
-			fpssesave0(vmx.fp);
+			vmx.cr2 = getcr2();
+			fpssesave(vmx.fp);
 			splx(x);
 			if(rc < 0)
 				error("vmlaunch failed");
--- a/sys/src/9/pc/fns.h
+++ b/sys/src/9/pc/fns.h
@@ -164,6 +164,7 @@
 void	procsetup(Proc*);
 void	procfork(Proc*);
 void	putcr0(ulong);
+void	putcr2(ulong);
 void	putcr3(ulong);
 void	putcr4(ulong);
 void	putdr(u32int*);
--- a/sys/src/9/pc/l.s
+++ b/sys/src/9/pc/l.s
@@ -437,6 +437,11 @@
 	MOVL	CR2, AX
 	RET
 
+TEXT putcr2(SB), $0
+	MOVL	cr2+0(FP), AX
+	MOVL	AX, CR2
+	RET
+
 TEXT getcr3(SB), $0				/* CR3 - page directory base */
 	MOVL	CR3, AX
 	RET
@@ -910,8 +915,6 @@
 	MOVL	resume+4(FP), AX
 	TESTL	AX, AX
 	MOVL	ureg+0(FP), DI
-	MOVL	32(DI), AX
-	MOVL	AX, CR2
 	MOVL	4(DI), SI
 	MOVL	8(DI), BP
 	MOVL	16(DI), BX
@@ -920,10 +923,10 @@
 	MOVL	28(DI), AX
 	MOVL	0(DI), DI
 	JNE	_vmresume
-	BYTE	$0x0f; BYTE $0x01; BYTE	$0xc2 /* VMLAUNCH	*/
+	BYTE	$0x0f; BYTE $0x01; BYTE	$0xc2 /* VMLAUNCH */
 	JMP	_vmout
 _vmresume:
-	BYTE	$0x0f; BYTE $0x01; BYTE $0xc3 /* VMRESUME	*/
+	BYTE	$0x0f; BYTE $0x01; BYTE $0xc3 /* VMRESUME */
 	JMP _vmout
 
 TEXT vmrestore(SB), $0
@@ -936,8 +939,6 @@
 	MOVL	DX, 20(DI)
 	MOVL	CX, 24(DI)
 	MOVL	AX, 28(DI)
-	MOVL	CR2, AX
-	MOVL	AX, 32(DI)
 	XORL	AX, AX
 	RET
 
--- a/sys/src/9/pc/main.c
+++ b/sys/src/9/pc/main.c
@@ -54,7 +54,7 @@
 	if((multiboot[0] & (1<<6)) != 0 && (l = multiboot[11]) >= 24){
 		cp = seprint(cp, ep, "*e820=");
 		m = KADDR(multiboot[12]);
-		while(m[0] >= 20 && m[0] <= l-4){
+		while(m[0] >= 20 && m[0]+4 <= l){
 			uvlong base, size;
 			m++;
 			base = ((uvlong)m[0] | (uvlong)m[1]<<32);
--- a/sys/src/9/pc64/fns.h
+++ b/sys/src/9/pc64/fns.h
@@ -34,6 +34,7 @@
 int	ecread(uchar addr);
 int	ecwrite(uchar addr, uchar val);
 #define	evenaddr(x)				/* x86 doesn't care */
+void	fpinit(void);
 void	(*fprestore)(FPsave*);
 void	(*fpsave)(FPsave*);
 void	fpsserestore(FPsave*);
@@ -108,6 +109,7 @@
 void	mtrrclock(void);
 int	mtrrprint(char *, long);
 void	mtrrsync(void);
+void	netconsole(void);
 void	noteret(void);
 uchar	nvramread(int);
 void	nvramwrite(int, uchar);
@@ -157,9 +159,11 @@
 void	procsetup(Proc*);
 void	procfork(Proc*);
 void	putcr0(u64int);
+void	putcr2(u64int);
 void	putcr3(u64int);
 void	putcr4(u64int);
 void	putdr(u64int*);
+void	putdr01236(u64int*);
 void	putdr6(u64int);
 void	putdr7(u64int);
 void*	rampage(void);
--- a/sys/src/9/pc64/l.s
+++ b/sys/src/9/pc64/l.s
@@ -379,6 +379,10 @@
 	MOVQ	CR2, AX
 	RET
 
+TEXT putcr2(SB), 1, $-4
+	MOVQ	BP, CR2
+	RET
+
 TEXT getcr3(SB), 1, $-4				/* PML4 Base */
 	MOVQ	CR3, AX
 	RET
@@ -694,9 +698,11 @@
 
 /* debug register access */
 
-TEXT putdr(SB), $0
+TEXT putdr(SB), 1, $-4
 	MOVQ	56(BP), AX
 	MOVQ	AX, DR7
+	/* wet floor */
+TEXT putdr01236(SB), 1, $-4
 	MOVQ	0(BP), AX
 	MOVQ	AX, DR0
 	MOVQ	8(BP), AX
@@ -709,16 +715,132 @@
 	MOVQ	AX, DR6
 	RET
 
-TEXT getdr6(SB), $0
+TEXT getdr6(SB), 1, $-4
 	MOVQ	DR6, AX
 	RET
 
-TEXT putdr6(SB), $0
+TEXT putdr6(SB), 1, $-4
 	MOVQ	BP, DR6
 	RET
 
-TEXT putdr7(SB), $0
+TEXT putdr7(SB), 1, $-4
 	MOVQ	BP, DR7
+	RET
+
+/* VMX instructions */
+TEXT vmxon(SB), 1, $-4
+	MOVQ	BP, 8(SP)
+	/* VMXON 8(SP) */
+	BYTE	$0xf3; BYTE $0x0f; BYTE $0xc7; BYTE $0x74; BYTE $0x24; BYTE $0x08
+	JMP	_vmout
+
+TEXT vmxoff(SB), 1, $-4
+	BYTE	$0x0f; BYTE $0x01; BYTE $0xc4
+	JMP	_vmout
+
+TEXT vmclear(SB), 1, $-4
+	MOVQ	BP, 8(SP)
+	/* VMCLEAR 8(SP) */
+	BYTE	$0x66;	BYTE $0x0f; BYTE $0xc7; BYTE $0x74; BYTE $0x24; BYTE $0x08
+	JMP	_vmout
+
+TEXT vmlaunch(SB), 1, $-4
+	MOVL	$0x6C14, DI
+	MOVQ	SP, DX
+	BYTE	$0x0f; BYTE $0x79; BYTE $0xfa /* VMWRITE DX, DI */
+	JBE	_vmout
+	MOVL	$0x6C16, DI
+	MOVQ	$vmrestore(SB), DX
+	BYTE	$0x0f; BYTE $0x79; BYTE $0xfa /* VMWRITE DX, DI */
+	JBE	_vmout
+	
+	MOVQ	BP, ureg+0(FP)
+	MOVL	resume+8(FP), AX
+	TESTL	AX, AX
+	MOVQ	0x00(BP), AX
+	MOVQ	0x08(BP), BX
+	MOVQ	0x10(BP), CX
+	MOVQ	0x18(BP), DX
+	MOVQ	0x20(BP), SI
+	MOVQ	0x28(BP), DI
+	MOVQ	0x38(BP), R8
+	MOVQ	0x40(BP), R9
+	MOVQ	0x48(BP), R10
+	MOVQ	0x50(BP), R11
+	MOVQ	0x58(BP), R12
+	MOVQ	0x60(BP), R13
+	MOVQ	0x68(BP), R14
+	MOVQ	0x70(BP), R15
+	MOVQ	0x30(BP), BP
+	JNE	_vmresume
+	BYTE	$0x0f; BYTE $0x01; BYTE	$0xc2 /* VMLAUNCH */
+	JMP	_vmout
+_vmresume:
+	BYTE	$0x0f; BYTE $0x01; BYTE $0xc3 /* VMRESUME */
+	JMP _vmout
+	
+TEXT vmrestore(SB), 1, $-4
+	PUSHQ	BP
+	MOVQ	ureg+0(FP), BP
+	MOVQ	AX, 0x00(BP)
+	MOVQ	BX, 0x08(BP)
+	MOVQ	CX, 0x10(BP)
+	MOVQ	DX, 0x18(BP)
+	MOVQ	SI, 0x20(BP)
+	MOVQ	DI, 0x28(BP)
+	POPQ	0x30(BP)
+	MOVQ	R8, 0x38(BP)
+	MOVQ	R9, 0x40(BP)
+	MOVQ	R10, 0x48(BP)
+	MOVQ	R11, 0x50(BP)
+	MOVQ	R12, 0x58(BP)
+	MOVQ	R13, 0x60(BP)
+	MOVQ	R14, 0x68(BP)
+	MOVQ	R15, 0x70(BP)
+	
+	BYTE	$0x65; MOVQ 0, RMACH /* MOVQ GS:(0), RMACH */
+	MOVQ	16(RMACH), RUSER
+	XORL	AX, AX
+	RET
+
+TEXT vmptrld(SB), 1, $-4
+	MOVQ	BP, 8(SP)
+	/* VMMPTRLD 8(SP) */
+	BYTE	$0x0f; BYTE $0xc7; BYTE $0x74; BYTE $0x24; BYTE $0x08
+	JMP _vmout
+
+TEXT vmwrite(SB), 1, $-4
+	MOVQ	val+8(FP), DX
+	/* VMWRITE DX, BP */
+	BYTE	$0x0f; BYTE $0x79; BYTE $0xea
+	JMP _vmout
+
+TEXT vmread(SB), 1, $-4
+	MOVQ	valp+8(FP), DI
+	/* VMREAD BP, (DI) */
+	BYTE	$0x0f; BYTE $0x78; BYTE $0x2f
+	JMP _vmout
+
+TEXT invept(SB), 1, $-4
+	/* INVEPT BP, 16(SP) */
+	BYTE	$0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x80; BYTE $0x6c; BYTE $0x24; BYTE $0x10
+	JMP _vmout
+
+TEXT invvpid(SB), 1, $-4
+	/* INVVPID BP, 16(SP) */
+	BYTE	$0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x81; BYTE $0x6c; BYTE $0x24; BYTE $0x10
+	JMP _vmout
+
+_vmout:
+	JC	_vmout1
+	JZ	_vmout2
+	XORL	AX, AX
+	RET
+_vmout1:
+	MOVQ	$-1, AX
+	RET
+_vmout2:
+	MOVQ	$-2, AX
 	RET
 
 /*
--- a/sys/src/9/pc64/main.c
+++ b/sys/src/9/pc64/main.c
@@ -57,7 +57,7 @@
 	if((multiboot[0] & (1<<6)) != 0 && (l = multiboot[11]) >= 24){
 		cp = seprint(cp, ep, "*e820=");
 		m = KADDR(multiboot[12]);
-		while(m[0] >= 20 && m[0] <= l-4){
+		while(m[0] >= 20 && m[0]+4 <= l){
 			uvlong base, size;
 			m++;
 			base = ((uvlong)m[0] | (uvlong)m[1]<<32);
@@ -514,6 +514,7 @@
 	}else
 		links();
 	chandevreset();
+	netconsole();
 	preallocpages();
 	pageinit();
 	swapinit();
@@ -688,6 +689,24 @@
 	mathnote(up->fpsave.mxcsr & 0x3f, ureg->pc);
 }
 
+void
+fpinit(void)
+{
+	/*
+	 * A process tries to use the FPU for the
+	 * first time and generates a 'device not available'
+	 * exception.
+	 * Turn the FPU on and initialise it for use.
+	 * Set the precision and mask the exceptions
+	 * we don't care about from the generic Mach value.
+	 */
+	_clts();
+	_fninit();
+	_fwait();
+	_fldcw(0x0232);
+	_ldmxcsr(0x1900);
+}
+
 /*
  *  math coprocessor emulation fault
  */
@@ -703,19 +722,7 @@
 	}
 	switch(up->fpstate){
 	case FPinit:
-		/*
-		 * A process tries to use the FPU for the
-		 * first time and generates a 'device not available'
-		 * exception.
-		 * Turn the FPU on and initialise it for use.
-		 * Set the precision and mask the exceptions
-		 * we don't care about from the generic Mach value.
-		 */
-		_clts();
-		_fninit();
-		_fwait();
-		_fldcw(0x0232);
-		_ldmxcsr(0x1900);
+		fpinit();
 		up->fpstate = FPactive;
 		break;
 	case FPinactive:
--- a/sys/src/9/pc64/mem.h
+++ b/sys/src/9/pc64/mem.h
@@ -99,6 +99,7 @@
 #define	SELECTOR(i, t, p)	(((i)<<3) | (t) | (p))
 
 #define	NULLSEL	SELECTOR(NULLSEG, SELGDT, 0)
+#define KDSEL	NULLSEL
 #define	KESEL	SELECTOR(KESEG, SELGDT, 0)
 #define	UE32SEL	SELECTOR(UE32SEG, SELGDT, 3)
 #define	UDSEL	SELECTOR(UDSEG, SELGDT, 3)
--- a/sys/src/9/pc64/pc64
+++ b/sys/src/9/pc64/pc64
@@ -36,6 +36,9 @@
 #	i82365		cis
 	uart
 	usb
+	
+	segment
+	vmx
 
 link
 #	devpccard