ref: 8029c3d8c4bdae74b9a68beeff1edb8a21cceeed
parent: 13869bab113881f28ac72de7d8a68bb8bb5d9c38
author: aiju <devnull@localhost>
date: Tue Jun 13 10:15:09 EDT 2017
pc: add vmx device
--- /dev/null
+++ b/sys/src/9/pc/devvmx.c
@@ -1,0 +1,1690 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "ureg.h"
+
+extern int vmxon(u64int);
+extern int vmxoff(void);
+extern int vmclear(u64int);
+extern int vmptrld(u64int);
+extern int vmlaunch(Ureg *, int, FPsave *);
+extern int vmread(u32int, uintptr *);
+extern int vmwrite(u32int, uintptr);
+extern int invept(u32int, uvlong, uvlong);
+extern int invvpid(u32int, uvlong, uvlong);
+
+static vlong procb_ctls, pinb_ctls;
+
+enum {
+ VMX_BASIC_MSR = 0x480,
+ VMX_PINB_CTLS_MSR = 0x481,
+ VMX_PROCB_CTLS_MSR = 0x482,
+ VMX_VMEXIT_CTLS_MSR = 0x483,
+ VMX_VMENTRY_CTLS_MSR = 0x484,
+ VMX_MISC_MSR = 0x485,
+ VMX_CR0_FIXED0 = 0x486,
+ VMX_CR0_FIXED1 = 0x487,
+ VMX_CR4_FIXED0 = 0x488,
+ VMX_CR4_FIXED1 = 0x489,
+ VMX_VMCS_ENUM = 0x48A,
+ VMX_PROCB_CTLS2_MSR = 0x48B,
+ VMX_TRUE_PINB_CTLS_MSR = 0x48D,
+ VMX_TRUE_PROCB_CTLS_MSR = 0x48E,
+ VMX_TRUE_EXIT_CTLS_MSR = 0x48F,
+ VMX_TRUE_ENTRY_CTLS_MSR = 0x490,
+ VMX_VMFUNC_MSR = 0x491,
+
+ PINB_CTLS = 0x4000,
+ PINB_EXITIRQ = 1<<0,
+ PINB_EXITNMI = 1<<3,
+
+ PROCB_CTLS = 0x4002,
+ PROCB_IRQWIN = 1<<2,
+ PROCB_EXITHLT = 1<<7,
+ PROCB_EXITINVLPG = 1<<9,
+ PROCB_EXITMWAIT = 1<<10,
+ PROCB_EXITRDPMC = 1<<11,
+ PROCB_EXITRDTSC = 1<<12,
+ PROCB_EXITCR3LD = 1<<15,
+ PROCB_EXITCR3ST = 1<<16,
+ PROCB_EXITCR8LD = 1<<19,
+ PROCB_EXITCR8ST = 1<<20,
+ PROCB_EXITMOVDR = 1<<23,
+ PROCB_EXITIO = 1<<24,
+ PROCB_MONTRAP = 1<<27,
+ PROCB_EXITMONITOR = 1<<29,
+ PROCB_EXITPAUSE = 1<<30,
+ PROCB_USECTLS2 = 1<<31,
+
+ PROCB_CTLS2 = 0x401E,
+ PROCB_EPT = 1<<1,
+ PROCB_EXITGDT = 1<<2,
+ PROCB_VPID = 1<<5,
+ PROCB_UNRESTR = 1<<7,
+
+ EXC_BITMAP = 0x4004,
+ PFAULT_MASK = 0x4006,
+ PFAULT_MATCH = 0x4008,
+ CR3_TARGCNT = 0x400a,
+
+ VMEXIT_CTLS = 0x400c,
+ VMEXIT_HOST64 = 1<<9,
+
+ VMEXIT_MSRSTCNT = 0x400e,
+ VMEXIT_MSRLDCNT = 0x4010,
+
+ VMENTRY_CTLS = 0x4012,
+ VMENTRY_GUEST64 = 1<<9,
+
+ VMENTRY_MSRLDCNT = 0x4014,
+ VMENTRY_INTRINFO = 0x4016,
+ VMENTRY_INTRCODE = 0x4018,
+ VMENTRY_INTRILEN = 0x401a,
+
+ VMCS_LINK = 0x2800,
+
+ GUEST_ES = 0x800,
+ GUEST_CS = 0x802,
+ GUEST_SS = 0x804,
+ GUEST_DS = 0x806,
+ GUEST_FS = 0x808,
+ GUEST_GS = 0x80A,
+ GUEST_LDTR = 0x80C,
+ GUEST_TR = 0x80E,
+ GUEST_CR0 = 0x6800,
+ GUEST_CR3 = 0x6802,
+ GUEST_CR4 = 0x6804,
+ GUEST_ESLIMIT = 0x4800,
+ GUEST_CSLIMIT = 0x4802,
+ GUEST_SSLIMIT = 0x4804,
+ GUEST_DSLIMIT = 0x4806,
+ GUEST_FSLIMIT = 0x4808,
+ GUEST_GSLIMIT = 0x480A,
+ GUEST_LDTRLIMIT = 0x480C,
+ GUEST_TRLIMIT = 0x480E,
+ GUEST_GDTRLIMIT = 0x4810,
+ GUEST_IDTRLIMIT = 0x4812,
+ GUEST_ESPERM = 0x4814,
+ GUEST_CSPERM = 0x4816,
+ GUEST_SSPERM = 0x4818,
+ GUEST_DSPERM = 0x481A,
+ GUEST_FSPERM = 0x481C,
+ GUEST_GSPERM = 0x481E,
+ GUEST_LDTRPERM = 0x4820,
+ GUEST_TRPERM = 0x4822,
+ GUEST_CR0MASK = 0x6000,
+ GUEST_CR4MASK = 0x6002,
+ GUEST_CR0SHADOW = 0x6004,
+ GUEST_CR4SHADOW = 0x6006,
+ GUEST_ESBASE = 0x6806,
+ GUEST_CSBASE = 0x6808,
+ GUEST_SSBASE = 0x680A,
+ GUEST_DSBASE = 0x680C,
+ GUEST_FSBASE = 0x680E,
+ GUEST_GSBASE = 0x6810,
+ GUEST_LDTRBASE = 0x6812,
+ GUEST_TRBASE = 0x6814,
+ GUEST_GDTRBASE = 0x6816,
+ GUEST_IDTRBASE = 0x6818,
+ GUEST_DR7 = 0x681A,
+ GUEST_RSP = 0x681C,
+ GUEST_RIP = 0x681E,
+ GUEST_RFLAGS = 0x6820,
+
+ HOST_ES = 0xC00,
+ HOST_CS = 0xC02,
+ HOST_SS = 0xC04,
+ HOST_DS = 0xC06,
+ HOST_FS = 0xC08,
+ HOST_GS = 0xC0A,
+ HOST_TR = 0xC0C,
+ HOST_CR0 = 0x6C00,
+ HOST_CR3 = 0x6C02,
+ HOST_CR4 = 0x6C04,
+ HOST_FSBASE = 0x6C06,
+ HOST_GSBASE = 0x6C08,
+ HOST_TRBASE = 0x6C0A,
+ HOST_GDTR = 0x6C0C,
+ HOST_IDTR = 0x6C0E,
+ HOST_RSP = 0x6C14,
+ HOST_RIP = 0x6C16,
+
+ GUEST_CANINTR = 0x4824,
+
+ VM_INSTRERR = 0x4400,
+ VM_EXREASON = 0x4402,
+ VM_EXINTRINFO = 0x4404,
+ VM_EXINTRCODE = 0x4406,
+ VM_IDTVECINFO = 0x4408,
+ VM_IDTVECCODE = 0x440A,
+ VM_EXINSTRLEN = 0x440C,
+ VM_EXINSTRINFO = 0x440E,
+ VM_EXQUALIF = 0x6400,
+ VM_IORCX = 0x6402,
+ VM_IORSI = 0x6404,
+ VM_IORDI = 0x6406,
+ VM_IORIP = 0x6408,
+ VM_GUESTVA = 0x640A,
+ VM_GUESTPA = 0x2400,
+
+ VM_VPID = 0x000,
+ VM_EPTPIDX = 0x0004,
+
+ VM_EPTP = 0x201A,
+ VM_EPTPLA = 0x2024,
+
+ INVLOCAL = 1,
+};
+
+typedef struct Vmx Vmx;
+typedef struct VmCmd VmCmd;
+typedef struct VmMem VmMem;
+typedef struct VmIntr VmIntr;
+
+struct VmMem {
+ uvlong lo, hi;
+ Segment *seg;
+ uintptr off;
+ VmMem *next, *prev;
+ u16int attr;
+};
+
+struct VmIntr {
+ u32int info, code, ilen;
+};
+
+struct Vmx {
+ enum {
+ NOVMX,
+ VMXINACTIVE,
+ VMXINIT,
+ VMXREADY,
+ VMXRUNNING,
+ VMXDEAD,
+ VMXENDING,
+ } state;
+ char errstr[ERRMAX];
+ Ureg ureg;
+ FPsave *fp;
+ u8int launched;
+ u8int vpid;
+ enum {
+ FLUSHVPID = 1,
+ FLUSHEPT = 2,
+ STEP = 4,
+ POSTEX = 8,
+ POSTIRQ = 16,
+ } onentry;
+
+ Rendez cmdwait;
+ Lock cmdlock;
+ VmCmd *firstcmd, **lastcmd;
+ VmCmd *postponed;
+ uvlong *pml4;
+ VmMem mem;
+
+ enum {
+ GOTEXIT = 1,
+ GOTIRQACK = 2,
+ GOTSTEP = 4,
+ GOTSTEPERR = 8,
+ } got;
+ VmMem *stepmap;
+ VmIntr exc, irq, irqack;
+};
+
+struct VmCmd {
+ enum {
+ CMDFDONE = 1,
+ CMDFFAIL = 2,
+ CMDFPOSTP = 4,
+ } flags;
+ u8int scratched;
+ Rendez;
+ Lock;
+ int (*cmd)(VmCmd *, va_list);
+ int retval;
+ char *errstr;
+ va_list va;
+ VmCmd *next;
+};
+
+static char Equit[] = "vmx: ending";
+
+static char *statenames[] = {
+ [NOVMX] "novmx",
+ [VMXINACTIVE] "inactive",
+ [VMXINIT] "init",
+ [VMXREADY] "ready",
+ [VMXRUNNING] "running",
+ [VMXDEAD] "dead",
+ [VMXENDING]"ending"
+};
+
+static Vmx vmx;
+
+static u64int
+vmcsread(u32int addr)
+{
+ int rc;
+ u64int val;
+
+ val = 0;
+ rc = vmread(addr, (uintptr *) &val);
+ if(rc >= 0 && sizeof(uintptr) == 4 && (addr & 0x6000) == 0x2000)
+ rc = vmread(addr | 1, (uintptr *) &val + 1);
+ if(rc < 0){
+ char errbuf[128];
+ snprint(errbuf, sizeof(errbuf), "vmcsread failed (%#.4ux)", addr);
+ error(errbuf);
+ }
+ return val;
+}
+
+static void
+vmcswrite(u32int addr, u64int val)
+{
+ int rc;
+
+ rc = vmwrite(addr, val);
+ if(rc >= 0 && sizeof(uintptr) == 4 && (addr & 0x6000) == 0x2000)
+ rc = vmwrite(addr | 1, val >> 32);
+ if(rc < 0){
+ char errbuf[128];
+ snprint(errbuf, sizeof(errbuf), "vmcswrite failed (%#.4ux = %#.16ullx)", addr, val);
+ error(errbuf);
+ }
+}
+
+static char *
+cr0read(char *p, char *e)
+{
+ uvlong guest, mask, shadow;
+
+ guest = vmcsread(GUEST_CR0);
+ mask = vmcsread(GUEST_CR0MASK);
+ shadow = vmcsread(GUEST_CR0SHADOW);
+ return seprint(p, e, "%#.*ullx", sizeof(uintptr) * 2, guest & mask | shadow & ~mask);
+}
+
+static char *
+cr4read(char *p, char *e)
+{
+ uvlong guest, mask, shadow;
+
+ guest = vmcsread(GUEST_CR4);
+ mask = vmcsread(GUEST_CR4MASK);
+ shadow = vmcsread(GUEST_CR4SHADOW);
+ return seprint(p, e, "%#.*ullx", sizeof(uintptr) * 2, guest & mask | shadow & ~mask);
+}
+
+static int
+readonly(char *)
+{
+ return -1;
+}
+
+typedef struct GuestReg GuestReg;
+struct GuestReg {
+ int offset;
+ u8int size; /* in bytes; 0 means == uintptr */
+ char *name;
+ char *(*read)(char *, char *);
+ int (*write)(char *);
+};
+#define UREG(x) ~(ulong)&((Ureg*)0)->x
+static GuestReg guestregs[] = {
+ {GUEST_RIP, 0, "pc"},
+ {GUEST_RSP, 0, "sp"},
+ {GUEST_RFLAGS, 0, "flags"},
+ {UREG(ax), 0, "ax"},
+ {UREG(bx), 0, "bx"},
+ {UREG(cx), 0, "cx"},
+ {UREG(dx), 0, "dx"},
+ {UREG(bp), 0, "bp"},
+ {UREG(si), 0, "si"},
+ {UREG(di), 0, "di"},
+ {GUEST_GDTRBASE, 0, "gdtrbase"},
+ {GUEST_GDTRLIMIT, 4, "gdtrlimit"},
+ {GUEST_IDTRBASE, 0, "idtrbase"},
+ {GUEST_IDTRLIMIT, 4, "idtrlimit"},
+ {GUEST_CS, 2, "cs"},
+ {GUEST_CSBASE, 0, "csbase"},
+ {GUEST_CSLIMIT, 4, "cslimit"},
+ {GUEST_CSPERM, 4, "csperm"},
+ {GUEST_DS, 2, "ds"},
+ {GUEST_DSBASE, 0, "dsbase"},
+ {GUEST_DSLIMIT, 4, "dslimit"},
+ {GUEST_DSPERM, 4, "dsperm"},
+ {GUEST_ES, 2, "es"},
+ {GUEST_ESBASE, 0, "esbase"},
+ {GUEST_ESLIMIT, 4, "eslimit"},
+ {GUEST_ESPERM, 4, "esperm"},
+ {GUEST_FS, 2, "fs"},
+ {GUEST_FSBASE, 0, "fsbase"},
+ {GUEST_FSLIMIT, 4, "fslimit"},
+ {GUEST_FSPERM, 4, "fsperm"},
+ {GUEST_GS, 2, "gs"},
+ {GUEST_GSBASE, 0, "gsbase"},
+ {GUEST_GSLIMIT, 4, "gslimit"},
+ {GUEST_GSPERM, 4, "gsperm"},
+ {GUEST_SS, 2, "ss"},
+ {GUEST_SSBASE, 0, "ssbase"},
+ {GUEST_SSLIMIT, 4, "sslimit"},
+ {GUEST_SSPERM, 4, "ssperm"},
+ {GUEST_TR, 2, "tr"},
+ {GUEST_TRBASE, 0, "trbase"},
+ {GUEST_TRLIMIT, 4, "trlimit"},
+ {GUEST_TRPERM, 4, "trperm"},
+ {GUEST_LDTR, 2, "ldtr"},
+ {GUEST_LDTRBASE, 0, "ldtrbase"},
+ {GUEST_LDTRLIMIT, 4, "ldtrlimit"},
+ {GUEST_LDTRPERM, 4, "ldtrperm"},
+ {GUEST_CR0, 0, "cr0", cr0read, readonly},
+ {UREG(trap), 0, "cr2"},
+ {GUEST_CR3, 0, "cr3"},
+ {GUEST_CR4, 0, "cr4", cr4read, readonly},
+ {VM_INSTRERR, 4, "instructionerror", nil, readonly},
+ {VM_EXREASON, 4, "exitreason", nil, readonly},
+ {VM_EXQUALIF, 0, "exitqualification", nil, readonly},
+ {VM_EXINTRINFO, 4, "exitinterruptinfo", nil, readonly},
+ {VM_EXINTRCODE, 4, "exitinterruptcode", nil, readonly},
+ {VM_EXINSTRLEN, 4, "exitinstructionlen", nil, readonly},
+ {VM_EXINSTRINFO, 4, "exitinstructioninfo", nil, readonly},
+ {VM_GUESTVA, 0, "exitva", nil, readonly},
+ {VM_GUESTPA, 0, "exitpa", nil, readonly},
+ {VM_IDTVECINFO, 4, "idtinterruptinfo", nil, readonly},
+ {VM_IDTVECCODE, 4, "idtinterruptcode", nil, readonly},
+};
+
+static int
+vmokpage(u64int addr)
+{
+ return (addr & 0xfff) == 0 && addr >> 48 == 0;
+}
+
+static uvlong *
+eptwalk(uvlong addr)
+{
+ uvlong *tab, *nt;
+ uvlong v;
+ int i;
+
+ tab = vmx.pml4;
+ for(i = 3; i >= 1; i--){
+ tab += addr >> 12 + 9 * i & 0x1ff;
+ v = *tab;
+ if((v & 3) == 0){
+ nt = mallocalign(BY2PG, BY2PG, 0, 0);
+ if(nt == nil) error(Enomem);
+ memset(nt, 0, BY2PG);
+ v = PADDR(nt) | 0x407;
+ *tab = v;
+ }
+ tab = KADDR(v & ~0xfff);
+ }
+ return tab + (addr >> 12 & 0x1ff);
+}
+
+static void
+eptfree(uvlong *tab, int level)
+{
+ int i;
+ uvlong v, *t;
+
+ if(level < 3){
+ for(i = 0; i < 512; i++){
+ v = tab[i];
+ if((v & 3) == 0) continue;
+ t = KADDR(v & ~0xfff);
+ eptfree(t, level + 1);
+ tab[i] = 0;
+ }
+ }
+ if(level > 0)
+ free(tab);
+}
+
+static void
+epttranslate(VmMem *mp)
+{
+ uvlong p, hpa;
+
+ if(mp->seg != nil && (mp->seg->type & SG_TYPE) != SG_FIXED || (mp->lo & 0xfff) != 0 || (mp->hi & 0xfff) != 0 || (uint)mp->attr >= 0x1000)
+ error(Egreg);
+ if(mp->seg != nil){
+ if(mp->seg->base + mp->off + (mp->hi - mp->lo) > mp->seg->top)
+ error(Egreg);
+ hpa = mp->seg->map[0]->pages[0]->pa + mp->off;
+ }else
+ hpa = 0;
+ for(p = mp->lo; p < mp->hi; p += BY2PG)
+ *eptwalk(p) = hpa + (p - mp->lo) + mp->attr;
+ vmx.onentry |= FLUSHEPT;
+}
+
+static char *mtype[] = {"uc", "wc", "02", "03", "wt", "wp", "wb", "07"};
+
+static int
+cmdgetmeminfo(VmCmd *, va_list va)
+{
+ VmMem *mp;
+ char *p0, *e, *p;
+ char attr[4];
+ char mt[4];
+
+ p0 = va_arg(va, char *);
+ e = va_arg(va, char *);
+ p = p0;
+ for(mp = vmx.mem.next; mp != &vmx.mem; mp = mp->next){
+ attr[0] = (mp->attr & 1) != 0 ? 'r' : '-';
+ attr[1] = (mp->attr & 2) != 0 ? 'w' : '-';
+ attr[2] = (mp->attr & 4) != 0 ? 'x' : '-';
+ attr[3] = 0;
+ *(ushort*)mt = *(u16int*)mtype[mp->attr >> 3 & 7];
+ mt[2] = (mp->attr & 0x40) != 0 ? '!' : 0;
+ mt[3] = 0;
+ p = seprint(p, e, "%s %s %#llux %#llux %p %#llux\n", attr, mt, mp->lo, mp->hi, mp->seg, (uvlong)mp->off);
+ }
+ return p - p0;
+}
+
+static int
+cmdclearmeminfo(VmCmd *, va_list)
+{
+ VmMem *mp, *mn;
+
+ eptfree(vmx.pml4, 0);
+ for(mp = vmx.mem.next; mp != &vmx.mem; mp = mn){
+ mn = mp->next;
+ free(mp);
+ }
+ vmx.mem.prev = &vmx.mem;
+ vmx.mem.next = &vmx.mem;
+ vmx.onentry |= FLUSHEPT;
+ return 0;
+}
+
+extern Segment* (*_globalsegattach)(char*);
+
+static int
+cmdsetmeminfo(VmCmd *, va_list va)
+{
+ char *p0, *p, *q, *r;
+ int j;
+ char *f[10];
+ VmMem *mp;
+ int rc;
+
+ p0 = va_arg(va, char *);
+ p = p0;
+ mp = nil;
+ for(;;){
+ q = strchr(p, '\n');
+ if(q == 0) break;
+ *q = 0;
+ if(mp == nil)
+ mp = malloc(sizeof(VmMem));
+ if(waserror()){
+ free(mp);
+ nexterror();
+ }
+ rc = tokenize(p, f, nelem(f));
+ p = q + 1;
+ if(rc == 0) goto next;
+ if(rc != 4 && rc != 6) error("number of fields wrong");
+ memset(mp, 0, sizeof(VmMem));
+ for(q = f[0]; *q != 0; q++)
+ switch(*q){
+ case 'r': if((mp->attr & 1) != 0) goto tinval; mp->attr |= 1; break;
+ case 'w': if((mp->attr & 2) != 0) goto tinval; mp->attr |= 2; break;
+ case 'x': if((mp->attr & 4) != 0) goto tinval; mp->attr |= 0x404; break;
+ case '-': break;
+ default: tinval: error("invalid access field");
+ }
+ for(j = 0; j < 8; j++)
+ if(strncmp(mtype[j], f[1], 2) == 0){
+ mp->attr |= j << 3;
+ break;
+ }
+ if(j == 8 || strlen(f[1]) > 3) error("invalid memory type");
+ if(f[1][2] == '!') mp->attr |= 0x40;
+ else if(f[1][2] != 0) error("invalid memory type");
+ mp->lo = strtoull(f[2], &r, 0);
+ if(*r != 0 || !vmokpage(mp->lo)) error("invalid low guest physical address");
+ mp->hi = strtoull(f[3], &r, 0);
+ if(*r != 0 || !vmokpage(mp->hi) || mp->hi <= mp->lo) error("invalid high guest physical address");
+ mp->off = strtoull(f[5], &r, 0);
+ if(*r != 0 || !vmokpage(mp->off)) error("invalid offset");
+ if((mp->attr & 7) != 0){
+ if(rc != 6) error("number of fields wrong");
+ mp->seg = _globalsegattach(f[4]);
+ if(mp->seg == nil) error("no such segment");
+ if(mp->seg->base + mp->off + (mp->hi - mp->lo) > mp->seg->top) error("out of bounds");
+ }
+ epttranslate(mp);
+ mp->prev = vmx.mem.prev;
+ mp->next = &vmx.mem;
+ mp->prev->next = mp;
+ mp->next->prev = mp;
+ mp = nil;
+ next:
+ poperror();
+ }
+ free(mp);
+ return p - p0;
+}
+
+static void
+vmxreset(void)
+{
+ ulong regs[4];
+ vlong msr;
+
+ cpuid(1, regs);
+ if((regs[2] & 1<<5) == 0) return;
+ /* check if disabled by BIOS */
+ if(rdmsr(0x3a, &msr) < 0) return;
+ if((msr & 5) != 5){
+ if((msr & 1) == 0){ /* msr still unlocked */
+ wrmsr(0x3a, msr | 5);
+ if(rdmsr(0x3a, &msr) < 0)
+ return;
+ }
+ if((msr & 5) != 5)
+ return;
+ }
+ if(rdmsr(VMX_PROCB_CTLS_MSR, &msr) < 0) return;
+ if((vlong)msr >= 0) return;
+ if(rdmsr(VMX_PROCB_CTLS2_MSR, &msr) < 0) return;
+ if((msr >> 32 & PROCB_EPT) == 0 || (msr >> 32 & PROCB_VPID) == 0) return;
+ vmx.state = VMXINACTIVE;
+ vmx.lastcmd = &vmx.firstcmd;
+ vmx.mem.next = &vmx.mem;
+ vmx.mem.prev = &vmx.mem;
+}
+
+static void
+vmxshutdown(void)
+{
+ if(vmx.state != NOVMX && vmx.state != VMXINACTIVE)
+ vmxoff();
+}
+
+static void
+vmcsinit(void)
+{
+ vlong msr;
+ u32int x;
+
+ memset(&vmx.ureg, 0, sizeof(vmx.ureg));
+ vmx.launched = 0;
+ vmx.onentry = 0;
+
+ if(rdmsr(VMX_BASIC_MSR, &msr) < 0) error("rdmsr(VMX_BASIC_MSR) failed");
+ if((msr & 1ULL<<55) != 0){
+ if(rdmsr(VMX_TRUE_PROCB_CTLS_MSR, &procb_ctls) < 0) error("rdmsr(VMX_TRUE_PROCB_CTLS_MSR) failed");
+ if(rdmsr(VMX_TRUE_PINB_CTLS_MSR, &pinb_ctls) < 0) error("rdmsr(VMX_TRUE_PINB_CTLS_MSR) failed");
+ }else{
+ if(rdmsr(VMX_PROCB_CTLS_MSR, &procb_ctls) < 0) error("rdmsr(VMX_PROCB_CTLS_MSR) failed");
+ if(rdmsr(VMX_PINB_CTLS_MSR, &pinb_ctls) < 0) error("rdmsr(VMX_PINB_CTLS_MSR) failed");
+ }
+
+ if(rdmsr(VMX_PINB_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_PINB_CTLS_MSR failed");
+ x = (u32int)pinb_ctls | 1<<1 | 1<<2 | 1<<4; /* currently reserved default1 bits */
+ x |= PINB_EXITIRQ | PINB_EXITNMI;
+ x &= pinb_ctls >> 32;
+ vmcswrite(PINB_CTLS, x);
+
+ if(rdmsr(VMX_PROCB_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_PROCB_CTLS_MSR failed");
+ x = (u32int)procb_ctls | 1<<1 | 7<<4 | 1<<8 | 1<<13 | 1<<14 | 1<<26; /* currently reserved default1 bits */
+ x |= PROCB_EXITHLT | PROCB_EXITMWAIT;
+ x |= PROCB_EXITMOVDR | PROCB_EXITIO | PROCB_EXITMONITOR | PROCB_EXITPAUSE;
+ x |= PROCB_USECTLS2;
+ x &= msr >> 32;
+ vmcswrite(PROCB_CTLS, x);
+
+ if(rdmsr(VMX_PROCB_CTLS2_MSR, &msr) < 0) error("rdmsr(VMX_PROCB_CTLS2_MSR failed");
+ x = PROCB_EPT | PROCB_VPID | PROCB_UNRESTR;
+ x &= msr >> 32;
+ vmcswrite(PROCB_CTLS2, x);
+
+ if(rdmsr(VMX_VMEXIT_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_VMEXIT_CTLS_MSR failed");
+ x = (u32int)msr;
+ if(sizeof(uintptr) == 8) x |= VMEXIT_HOST64;
+ x &= msr >> 32;
+ vmcswrite(VMEXIT_CTLS, x);
+
+ if(rdmsr(VMX_VMENTRY_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_VMENTRY_CTLS_MSR failed");
+ x = (u32int)msr;
+ if(sizeof(uintptr) == 8) x |= VMENTRY_GUEST64;
+ x &= msr >> 32;
+ vmcswrite(VMENTRY_CTLS, x);
+
+ vmcswrite(CR3_TARGCNT, 0);
+ vmcswrite(VMEXIT_MSRLDCNT, 0);
+ vmcswrite(VMEXIT_MSRSTCNT, 0);
+ vmcswrite(VMENTRY_MSRLDCNT, 0);
+ vmcswrite(VMENTRY_INTRINFO, 0);
+ vmcswrite(VMCS_LINK, -1);
+
+ vmcswrite(HOST_CS, KESEL);
+ vmcswrite(HOST_DS, KDSEL);
+ vmcswrite(HOST_ES, KDSEL);
+ vmcswrite(HOST_FS, KDSEL);
+ vmcswrite(HOST_GS, KDSEL);
+ vmcswrite(HOST_SS, KDSEL);
+ vmcswrite(HOST_TR, TSSSEL);
+ vmcswrite(HOST_CR0, getcr0() & ~0xe);
+ vmcswrite(HOST_CR3, getcr3());
+ vmcswrite(HOST_CR4, getcr4());
+ rdmsr(0xc0000100, &msr);
+ vmcswrite(HOST_FSBASE, msr);
+ rdmsr(0xc0000101, &msr);
+ vmcswrite(HOST_GSBASE, msr);
+ vmcswrite(HOST_TRBASE, (uintptr) m->tss);
+ vmcswrite(HOST_GDTR, (uintptr) m->gdt);
+ vmcswrite(HOST_IDTR, IDTADDR);
+
+ vmcswrite(EXC_BITMAP, 1<<18);
+ vmcswrite(PFAULT_MASK, 0);
+ vmcswrite(PFAULT_MATCH, 0);
+
+ vmcswrite(GUEST_CSBASE, 0);
+ vmcswrite(GUEST_DSBASE, 0);
+ vmcswrite(GUEST_ESBASE, 0);
+ vmcswrite(GUEST_FSBASE, 0);
+ vmcswrite(GUEST_GSBASE, 0);
+ vmcswrite(GUEST_SSBASE, 0);
+ vmcswrite(GUEST_CSLIMIT, -1);
+ vmcswrite(GUEST_DSLIMIT, -1);
+ vmcswrite(GUEST_ESLIMIT, -1);
+ vmcswrite(GUEST_FSLIMIT, -1);
+ vmcswrite(GUEST_GSLIMIT, -1);
+ vmcswrite(GUEST_SSLIMIT, -1);
+ vmcswrite(GUEST_CSPERM, (SEGG|SEGD|SEGP|SEGPL(0)|SEGEXEC|SEGR) >> 8 | 1);
+ vmcswrite(GUEST_DSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
+ vmcswrite(GUEST_ESPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
+ vmcswrite(GUEST_FSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
+ vmcswrite(GUEST_GSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
+ vmcswrite(GUEST_SSPERM, (SEGG|SEGB|SEGP|SEGPL(0)|SEGDATA|SEGW) >> 8 | 1);
+ vmcswrite(GUEST_LDTRPERM, 1<<16);
+
+ enum {
+ CR0RSVD = 0x1ffaffc0,
+ CR4RSVD = 0xff889000,
+ CR4VMXE = 1<<13,
+ CR4SMXE = 1<<14,
+ };
+ vmcswrite(GUEST_CR0MASK, CR0RSVD | (uintptr)0xFFFFFFFF00000000ULL);
+ vmcswrite(GUEST_CR4MASK, CR4RSVD | CR4VMXE | CR4SMXE | (uintptr)0xFFFFFFFF00000000ULL);
+ vmcswrite(GUEST_CR0, getcr0() & ~(1<<31));
+ vmcswrite(GUEST_CR3, 0);
+ vmcswrite(GUEST_CR4, getcr4());
+ vmcswrite(GUEST_CR0SHADOW, getcr0());
+ vmcswrite(GUEST_CR4SHADOW, getcr4() & ~CR4VMXE);
+
+ vmcswrite(GUEST_TRBASE, (uintptr) m->tss);
+ vmcswrite(GUEST_TRLIMIT, 0xffff);
+ vmcswrite(GUEST_TRPERM, (SEGTSS|SEGPL(0)|SEGP) >> 8 | 2);
+
+ vmx.pml4 = mallocalign(BY2PG, BY2PG, 0, 0);
+ memset(vmx.pml4, 0, BY2PG);
+ vmcswrite(VM_EPTP, PADDR(vmx.pml4) | 3<<3);
+ vmx.vpid = 1;
+ vmcswrite(VM_VPID, vmx.vpid);
+
+ vmcswrite(GUEST_RFLAGS, 2);
+
+ vmx.onentry = FLUSHVPID | FLUSHEPT;
+
+ vmx.fp = mallocalign(512, 512, 0, 0);
+ if(vmx.fp == nil)
+ error(Enomem);
+ fpinit();
+ fpsave(vmx.fp);
+}
+
+static void
+vmxstart(void)
+{
+ static uchar *vmcs; /* also vmxon region */
+ vlong x;
+
+ putcr4(getcr4() | 0x2000);
+
+ if(vmcs == nil){
+ vmcs = mallocalign(8192, 4096, 0, 0);
+ if(vmcs == nil)
+ error(Enomem);
+ }
+ memset(vmcs, 0, 8192);
+ rdmsr(VMX_BASIC_MSR, &x);
+ *(ulong*)vmcs = x;
+ *(ulong*)&vmcs[4096] = x;
+ if(vmxon(PADDR(vmcs + 4096)) < 0)
+ error("vmxon failed");
+ if(vmclear(PADDR(vmcs)) < 0)
+ error("vmclear failed");
+ if(vmptrld(PADDR(vmcs)) < 0)
+ error("vmptrld failed");
+ vmcsinit();
+}
+
+static void
+cmdrelease(VmCmd *p, int f)
+{
+ lock(p);
+ p->flags |= CMDFDONE | f;
+ wakeup(p);
+ unlock(p);
+}
+
+static void
+killcmds(VmCmd *notme)
+{
+ VmCmd *p, *pn;
+
+ for(p = vmx.postponed; p != nil; p = pn){
+ pn = p->next;
+ p->next = nil;
+ if(p == notme) continue;
+ kstrcpy(p->errstr, Equit, ERRMAX);
+ cmdrelease(p, CMDFFAIL);
+ }
+ vmx.postponed = nil;
+ ilock(&vmx.cmdlock);
+ for(p = vmx.firstcmd; p != nil; p = pn){
+ pn = p->next;
+ p->next = nil;
+ if(p == notme) continue;
+ kstrcpy(p->errstr, Equit, ERRMAX);
+ cmdrelease(p, CMDFFAIL);
+ }
+ vmx.firstcmd = nil;
+ vmx.lastcmd = &vmx.firstcmd;
+ iunlock(&vmx.cmdlock);
+}
+
+static int
+cmdquit(VmCmd *p, va_list va)
+{
+ vmx.state = VMXENDING;
+ cmdclearmeminfo(p, va);
+ killcmds(p);
+
+ free(vmx.pml4);
+ vmx.pml4 = nil;
+ vmx.got = 0;
+ vmx.onentry = 0;
+ vmx.stepmap = nil;
+
+ vmxoff();
+ vmx.state = VMXINACTIVE;
+ cmdrelease(p, 0);
+ pexit(Equit, 1);
+ return 0;
+}
+
+static void
+processexit(void)
+{
+ u32int reason;
+
+ reason = vmcsread(VM_EXREASON);
+ if((reason & 1<<31) == 0)
+ switch(reason & 0xffff){
+ case 1: /* external interrupt */
+ case 3: /* INIT */
+ case 4: /* SIPI */
+ case 5: /* IO SMI */
+ case 6: /* SMI */
+ case 7: /* IRQ window */
+ case 8: /* NMI window */
+ return;
+ case 37:
+ if((vmx.onentry & STEP) != 0){
+ vmx.state = VMXREADY;
+ vmx.got |= GOTSTEP;
+ vmx.onentry &= ~STEP;
+ return;
+ }
+ break;
+ }
+ if((vmx.onentry & STEP) != 0){
+ iprint("VMX: exit reason %#x when expected step...\n", reason & 0xffff);
+ vmx.onentry &= ~STEP;
+ vmx.got |= GOTSTEP|GOTSTEPERR;
+ }
+ vmx.state = VMXREADY;
+ vmx.got |= GOTEXIT;
+}
+
+static int
+cmdgetregs(VmCmd *, va_list va)
+{
+ char *p0, *e;
+ GuestReg *r;
+ uvlong val;
+ int s;
+ char *p;
+
+ p0 = va_arg(va, char *);
+ e = va_arg(va, char *);
+ p = p0;
+ for(r = guestregs; r < guestregs + nelem(guestregs); r++){
+ if(r->offset >= 0)
+ val = vmcsread(r->offset);
+ else
+ val = *(uintptr*)((uchar*)&vmx.ureg + ~r->offset);
+ s = r->size;
+ if(s == 0) s = sizeof(uintptr);
+ p = seprint(p, e, "%s %#.*llux\n", r->name, s * 2, val);
+ }
+ return p - p0;
+}
+
+static int
+setregs(char *p0, char rs, char *fs)
+{
+ char *p, *q, *rp;
+ char *f[10];
+ GuestReg *r;
+ uvlong val;
+ int sz;
+ int rc;
+
+ p = p0;
+ for(;;){
+ q = strchr(p, rs);
+ if(q == 0) break;
+ *q = 0;
+ rc = getfields(p, f, nelem(f), 1, fs);
+ p = q + 1;
+ if(rc == 0) continue;
+ if(rc != 2) error("number of fields wrong");
+
+ for(r = guestregs; r < guestregs + nelem(guestregs); r++)
+ if(strcmp(r->name, f[0]) == 0)
+ break;
+ if(r == guestregs + nelem(guestregs))
+ error("unknown register");
+ if(r->write != nil){
+ r->write(f[1]);
+ continue;
+ }
+ val = strtoull(f[1], &rp, 0);
+ sz = r->size;
+ if(sz == 0) sz = sizeof(uintptr);
+ if(*rp != 0 || val >> 8 * sz != 0) error("invalid value");
+ if(r->offset >= 0)
+ vmcswrite(r->offset, val);
+ else{
+ assert((u32int)~r->offset + sz <= sizeof(Ureg));
+ switch(sz){
+ case 1: *(u8int*)((u8int*)&vmx.ureg + (u32int)~r->offset) = val; break;
+ case 2: *(u16int*)((u8int*)&vmx.ureg + (u32int)~r->offset) = val; break;
+ case 4: *(u32int*)((u8int*)&vmx.ureg + (u32int)~r->offset) = val; break;
+ case 8: *(u64int*)((u8int*)&vmx.ureg + (u32int)~r->offset) = val; break;
+ default: error(Egreg);
+ }
+ }
+ }
+ return p - p0;
+}
+
+static int
+cmdsetregs(VmCmd *, va_list va)
+{
+ return setregs(va_arg(va, char *), '\n', " \t");
+}
+
+static int
+cmdgetfpregs(VmCmd *, va_list va)
+{
+ uchar *p;
+
+ p = va_arg(va, uchar *);
+ memmove(p, vmx.fp, sizeof(FPsave));
+ return sizeof(FPsave);
+}
+
+static int
+cmdsetfpregs(VmCmd *, va_list va)
+{
+ uchar *p;
+ ulong n;
+ vlong off;
+
+ p = va_arg(va, uchar *);
+ n = va_arg(va, ulong);
+ off = va_arg(va, vlong);
+ if(off < 0 || off >= sizeof(FPsave)) n = 0;
+ else if(off + n > sizeof(FPsave)) n = sizeof(FPsave) - n;
+ memmove((uchar*)vmx.fp + off, p, n);
+ return n;
+}
+
+static int
+cmdgo(VmCmd *, va_list va)
+{
+ char *r;
+
+ if(vmx.state != VMXREADY)
+ error("VM not ready");
+ r = va_arg(va, char *);
+ if(r != nil) setregs(r, ';', "=");
+ vmx.state = VMXRUNNING;
+ return 0;
+}
+
+static int
+cmdstop(VmCmd *, va_list)
+{
+ if(vmx.state != VMXREADY && vmx.state != VMXRUNNING)
+ error("VM not ready or running");
+ vmx.state = VMXREADY;
+ return 0;
+}
+
+static int
+cmdstatus(VmCmd *, va_list va)
+{
+ kstrcpy(va_arg(va, char *), vmx.errstr, ERRMAX);
+ return vmx.state;
+}
+
+static char *exitreasons[] = {
+ [0] "exc", [1] "extirq", [2] "triplef", [3] "initsig", [4] "sipi", [5] "smiio", [6] "smiother", [7] "irqwin",
+ [8] "nmiwin", [9] "taskswitch", [10] ".cpuid", [11] ".getsec", [12] ".hlt", [13] ".invd", [14] ".invlpg", [15] ".rdpmc",
+ [16] ".rdtsc", [17] ".rsm", [18] ".vmcall", [19] ".vmclear", [20] ".vmlaunch", [21] ".vmptrld", [22] ".vmptrst", [23] ".vmread",
+ [24] ".vmresume", [25] ".vmwrite", [26] ".vmxoff", [27] ".vmxon", [28] "movcr", [29] ".movdr", [30] "io", [31] ".rdmsr",
+ [32] ".wrmsr", [33] "entrystate", [34] "entrymsr", [36] ".mwait", [37] "monitortrap", [39] ".monitor",
+ [40] ".pause", [41] "mcheck", [43] "tpr", [44] "apicacc", [45] "eoi", [46] "gdtr_idtr", [47] "ldtr_tr",
+ [48] "eptfault", [49] "eptinval", [50] ".invept", [51] ".rdtscp", [52] "preempt", [53] ".invvpid", [54] ".wbinvd", [55] ".xsetbv",
+ [56] "apicwrite", [57] ".rdrand", [58] ".invpcid", [59] ".vmfunc", [60] ".encls", [61] ".rdseed", [62] "pmlfull", [63] ".xsaves",
+ [64] ".xrstors",
+};
+
+static char *except[] = {
+ [0] "#de", [1] "#db", [3] "#bp", [4] "#of", [5] "#br", [6] "#ud", [7] "#nm",
+ [8] "#df", [10] "#ts", [11] "#np", [12] "#ss", [13] "#gp", [14] "#pf",
+ [16] "#mf", [17] "#ac", [18] "#mc", [19] "#xm", [20] "#ve",
+};
+
+static int
+cmdwait(VmCmd *cp, va_list va)
+{
+ char *p, *p0, *e;
+ u32int reason, intr;
+ uvlong qual;
+ u16int rno;
+
+ if(cp->scratched)
+ error(Eintr);
+ p0 = p = va_arg(va, char *);
+ e = va_arg(va, char *);
+ if((vmx.got & GOTIRQACK) != 0){
+ p = seprint(p, e, "*ack %d\n", vmx.irqack.info & 0xff);
+ vmx.got &= ~GOTIRQACK;
+ return p - p0;
+ }
+ if((vmx.got & GOTEXIT) == 0){
+ cp->flags |= CMDFPOSTP;
+ return -1;
+ }
+ vmx.got &= ~GOTEXIT;
+ reason = vmcsread(VM_EXREASON);
+ qual = vmcsread(VM_EXQUALIF);
+ rno = reason;
+ intr = vmcsread(VM_EXINTRINFO);
+ if((reason & 1<<31) != 0)
+ p = seprint(p, e, "!");
+ if(rno == 0 && (intr & 1<<31) != 0){
+ if((intr & 0xff) >= nelem(except) || except[intr & 0xff] == nil)
+ p = seprint(p, e, "#%d ", intr & 0xff);
+ else
+ p = seprint(p, e, "%s ", except[intr & 0xff]);
+ }else if(rno >= nelem(exitreasons) || exitreasons[rno] == nil)
+ p = seprint(p, e, "?%d ", rno);
+ else
+ p = seprint(p, e, "%s ", exitreasons[rno]);
+ p = seprint(p, e, "%#ullx pc %#ullx sp %#ullx ilen %#ullx iinfo %#ullx", qual, vmcsread(GUEST_RIP), vmcsread(GUEST_RSP), vmcsread(VM_EXINSTRLEN), vmcsread(VM_EXINSTRINFO));
+ if((intr & 1<<11) != 0) p = seprint(p, e, " excode %#ullx", vmcsread(VM_EXINTRCODE));
+ if(rno == 48 && (qual & 0x80) != 0) p = seprint(p, e, " va %#ullx", vmcsread(VM_GUESTVA));
+ if(rno == 48 || rno == 49) p = seprint(p, e, " pa %#ullx", vmcsread(VM_GUESTPA));
+ if(rno == 30) p = seprint(p, e, " ax %#ullx", (uvlong)vmx.ureg.ax);
+ p = seprint(p, e, "\n");
+ return p - p0;
+}
+
+static int
+cmdstep(VmCmd *cp, va_list va)
+{
+ switch(cp->retval){
+ case 0:
+ if((vmx.got & GOTSTEP) != 0 || (vmx.onentry & STEP) != 0)
+ error(Einuse);
+ if(vmx.state != VMXREADY){
+ iprint("pre-step in state %s\n", statenames[vmx.state]);
+ error("not ready");
+ }
+ vmx.stepmap = va_arg(va, VmMem *);
+ vmx.onentry |= STEP;
+ vmx.state = VMXRUNNING;
+ cp->flags |= CMDFPOSTP;
+ return 1;
+ case 1:
+ if(vmx.state != VMXREADY){
+ iprint("post-step in state %s\n", statenames[vmx.state]);
+ vmx.onentry &= ~STEP;
+ vmx.got &= ~(GOTSTEP|GOTSTEPERR);
+ error("not ready");
+ }
+ if((vmx.got & GOTSTEP) == 0){
+ cp->flags |= CMDFPOSTP;
+ return 1;
+ }
+ if((vmx.got & GOTSTEPERR) != 0){
+ vmx.got &= ~(GOTSTEP|GOTSTEPERR);
+ error("step failed");
+ }
+ vmx.got &= ~(GOTSTEP|GOTSTEPERR);
+ return 1;
+ }
+ return 0;
+}
+
+static void
+eventparse(char *p, VmIntr *vi)
+{
+ char *q, *r;
+ int i;
+
+ memset(vi, 0, sizeof(VmIntr));
+ q = nil;
+ kstrdup(&q, p);
+ if(waserror()){
+ free(q);
+ memset(vi, 0, sizeof(VmIntr));
+ nexterror();
+ }
+ vi->info = 1<<31;
+ r = strchr(q, ',');
+ if(r != nil) *r++ = 0;
+ for(i = 0; i < nelem(except); i++)
+ if(except[i] != nil && strcmp(except[i], q) == 0)
+ break;
+ if(*q == '#'){
+ q++;
+ vi->info |= 3 << 8;
+ }
+ if(i == nelem(except)){
+ i = strtoul(q, &q, 10);
+ if(*q != 0 || i > 255) error(Ebadctl);
+ }
+ vi->info |= i;
+ if((vi->info & 0x7ff) == 3 || (vi->info & 0x7ff) == 4)
+ vi->info += 3 << 8;
+ if(r == nil) goto out;
+ if(*r != ','){
+ vi->code = strtoul(r, &r, 0);
+ vi->info |= 1<<11;
+ }else r++;
+ if(*r == ',')
+ vi->ilen = strtoul(r + 1, &r, 0);
+ if(*r != 0) error(Ebadctl);
+out:
+ poperror();
+ free(q);
+}
+
+static int
+cmdexcept(VmCmd *cp, va_list va)
+{
+ if(cp->scratched) error(Eintr);
+ if((vmx.onentry & POSTEX) != 0){
+ cp->flags |= CMDFPOSTP;
+ return 0;
+ }
+ eventparse(va_arg(va, char *), &vmx.exc);
+ vmx.onentry |= POSTEX;
+ return 0;
+}
+
+static int
+cmdirq(VmCmd *, va_list va)
+{
+ char *p;
+ VmIntr vi;
+
+ p = va_arg(va, char *);
+ if(p == nil)
+ vmx.onentry &= ~POSTIRQ;
+ else{
+ eventparse(p, &vi);
+ vmx.irq = vi;
+ vmx.onentry |= POSTIRQ;
+ }
+ return 0;
+}
+
+
+static int
+gotcmd(void *)
+{
+ int rc;
+
+ ilock(&vmx.cmdlock);
+ rc = vmx.firstcmd != nil;
+ iunlock(&vmx.cmdlock);
+ return rc;
+}
+
+static void
+markcmddone(VmCmd *p, VmCmd ***pp)
+{
+ if((p->flags & (CMDFFAIL|CMDFPOSTP)) == CMDFPOSTP){
+ **pp = p;
+ *pp = &p->next;
+ }else{
+ p->flags = p->flags & ~CMDFPOSTP;
+ cmdrelease(p, 0);
+ }
+}
+
+static VmCmd **
+markppcmddone(VmCmd **pp)
+{
+ VmCmd *p;
+
+ p = *pp;
+ if((p->flags & (CMDFFAIL|CMDFPOSTP)) == CMDFPOSTP)
+ return &p->next;
+ *pp = p->next;
+ p->next = nil;
+ p->flags = p->flags & ~CMDFPOSTP;
+ cmdrelease(p, 0);
+ return pp;
+}
+
+
+static void
+runcmd(void)
+{
+ VmCmd *p, **pp;
+
+ for(pp = &vmx.postponed; p = *pp, p != nil; ){
+ if(waserror()){
+ kstrcpy(p->errstr, up->errstr, ERRMAX);
+ p->flags |= CMDFFAIL;
+ pp = markppcmddone(pp);
+ continue;
+ }
+ p->flags &= ~CMDFPOSTP;
+ p->retval = p->cmd(p, p->va);
+ poperror();
+ pp = markppcmddone(pp);
+ }
+ for(;;){
+ ilock(&vmx.cmdlock);
+ p = vmx.firstcmd;
+ if(p == nil){
+ iunlock(&vmx.cmdlock);
+ break;
+ }
+ vmx.firstcmd = p->next;
+ if(vmx.lastcmd == &p->next)
+ vmx.lastcmd = &vmx.firstcmd;
+ iunlock(&vmx.cmdlock);
+ p->next = nil;
+ if(waserror()){
+ kstrcpy(p->errstr, up->errstr, ERRMAX);
+ p->flags |= CMDFFAIL;
+ markcmddone(p, &pp);
+ continue;
+ }
+ if(p->scratched) error(Eintr);
+ p->retval = p->cmd(p, p->va);
+ poperror();
+ markcmddone(p, &pp);
+ }
+}
+
+static void
+dostep(int setup)
+{
+ static uvlong oldmap;
+ static uvlong *mapptr;
+
+ if(setup){
+ if(vmx.stepmap != nil){
+ mapptr = eptwalk(vmx.stepmap->lo);
+ oldmap = *mapptr;
+ epttranslate(vmx.stepmap);
+ }
+ }else{
+ vmcswrite(PROCB_CTLS, vmcsread(PROCB_CTLS) & ~(uvlong)PROCB_MONTRAP);
+ if(vmx.stepmap != nil){
+ *mapptr = oldmap;
+ vmx.stepmap = nil;
+ vmx.onentry |= FLUSHEPT;
+ }
+ }
+}
+
+static void
+vmxproc(void *)
+{
+ int init;
+ u32int procbctls, defprocbctls;
+
+ procwired(up, 0);
+ sched();
+ init = 0;
+ defprocbctls = 0;
+ while(waserror()){
+ kstrcpy(vmx.errstr, up->errstr, ERRMAX);
+ vmx.state = VMXDEAD;
+ }
+ for(;;){
+ if(!init){
+ init = 1;
+ vmxstart();
+ vmx.state = VMXREADY;
+ defprocbctls = vmcsread(PROCB_CTLS);
+ }
+ runcmd();
+ if(vmx.state == VMXRUNNING){
+ procbctls = defprocbctls;
+ if((vmx.onentry & STEP) != 0){
+ procbctls |= PROCB_MONTRAP;
+ dostep(1);
+ if(waserror()){
+ dostep(0);
+ nexterror();
+ }
+ }
+ if((vmx.onentry & POSTEX) != 0){
+ vmcswrite(VMENTRY_INTRINFO, vmx.exc.info);
+ vmcswrite(VMENTRY_INTRCODE, vmx.exc.code);
+ vmcswrite(VMENTRY_INTRILEN, vmx.exc.ilen);
+ vmx.onentry &= ~POSTEX;
+ }
+ if((vmx.onentry & POSTIRQ) != 0 && (vmx.onentry & STEP) == 0){
+ if((vmx.onentry & POSTEX) == 0 && (vmcsread(GUEST_RFLAGS) & 1<<9) != 0 && (vmcsread(GUEST_CANINTR) & 3) == 0){
+ vmcswrite(VMENTRY_INTRINFO, vmx.irq.info);
+ vmcswrite(VMENTRY_INTRCODE, vmx.irq.code);
+ vmcswrite(VMENTRY_INTRILEN, vmx.irq.ilen);
+ vmx.onentry &= ~POSTIRQ;
+ vmx.got |= GOTIRQACK;
+ vmx.irqack = vmx.irq;
+ }else
+ procbctls |= PROCB_IRQWIN;
+ }
+ if((vmx.onentry & FLUSHVPID) != 0){
+ if(invvpid(INVLOCAL, vmx.vpid, 0) < 0)
+ error("invvpid failed");
+ vmx.onentry &= ~FLUSHVPID;
+ }
+ if((vmx.onentry & FLUSHEPT) != 0){
+ if(invept(INVLOCAL, PADDR(vmx.pml4) | 3<<3, 0) < 0)
+ error("invept failed");
+ vmx.onentry &= ~FLUSHEPT;
+ }
+ vmcswrite(PROCB_CTLS, procbctls);
+ vmx.got &= ~GOTEXIT;
+ if(vmlaunch(&vmx.ureg, vmx.launched, vmx.fp) < 0)
+ error("vmlaunch failed");
+ vmx.launched = 1;
+ if((vmx.onentry & STEP) != 0){
+ dostep(0);
+ poperror();
+ }
+ processexit();
+ }else{
+ up->psstate = "Idle";
+ sleep(&vmx.cmdwait, gotcmd, nil);
+ up->psstate = nil;
+ }
+ }
+}
+
+enum {
+ Qdir,
+ Qctl,
+ Qregs,
+ Qstatus,
+ Qmap,
+ Qwait,
+ Qfpregs,
+};
+
+static Dirtab vmxdir[] = {
+ ".", { Qdir, 0, QTDIR }, 0, 0550,
+ "ctl", { Qctl, 0, 0 }, 0, 0660,
+ "regs", { Qregs, 0, 0 }, 0, 0660,
+ "status", { Qstatus, 0, 0 }, 0, 0440,
+ "map", { Qmap, 0, 0 }, 0, 0660,
+ "wait", { Qwait, 0, 0 }, 0, 0440,
+ "fpregs", { Qfpregs, 0, 0 }, 0, 0660,
+};
+
+enum {
+ CMinit,
+ CMquit,
+ CMgo,
+ CMstop,
+ CMstep,
+ CMexc,
+ CMirq,
+};
+
+static Cmdtab vmxctlmsg[] = {
+ CMinit, "init", 1,
+ CMquit, "quit", 1,
+ CMgo, "go", 0,
+ CMstop, "stop", 1,
+ CMstep, "step", 0,
+ CMexc, "exc", 2,
+ CMirq, "irq", 0,
+};
+
+static int
+iscmddone(void *cp)
+{
+ return (((VmCmd*)cp)->flags & CMDFDONE) != 0;
+}
+
+static int
+vmxcmd(int (*f)(VmCmd *, va_list), ...)
+{
+ VmCmd cmd;
+
+ if(vmx.state == VMXINACTIVE)
+ error("no VM");
+ if(vmx.state == VMXENDING)
+ ending:
+ error(Equit);
+ memset(&cmd, 0, sizeof(VmCmd));
+ cmd.errstr = up->errstr;
+ cmd.cmd = f;
+ va_start(cmd.va, f);
+
+ ilock(&vmx.cmdlock);
+ if(vmx.state == VMXENDING){
+ iunlock(&vmx.cmdlock);
+ goto ending;
+ }
+ *vmx.lastcmd = &cmd;
+ vmx.lastcmd = &cmd.next;
+ iunlock(&vmx.cmdlock);
+
+ while(waserror())
+ cmd.scratched = 1;
+ wakeup(&vmx.cmdwait);
+ do
+ sleep(&cmd, iscmddone, &cmd);
+ while(!iscmddone(&cmd));
+ poperror();
+ lock(&cmd);
+ unlock(&cmd);
+ if((cmd.flags & CMDFFAIL) != 0)
+ error(up->errstr);
+ return cmd.retval;
+}
+
+static Chan *
+vmxattach(char *spec)
+{
+ if(vmx.state == NOVMX) error(Enodev);
+ return devattach('X', spec);
+}
+
+static Walkqid*
+vmxwalk(Chan *c, Chan *nc, char **name, int nname)
+{
+ return devwalk(c, nc, name, nname, vmxdir, nelem(vmxdir), devgen);
+}
+
+static int
+vmxstat(Chan *c, uchar *dp, int n)
+{
+ return devstat(c, dp, n, vmxdir, nelem(vmxdir), devgen);
+}
+
+static Chan*
+vmxopen(Chan* c, int omode)
+{
+ Chan *ch;
+
+ if(c->qid.path != Qdir && !iseve()) error(Eperm);
+ ch = devopen(c, omode, vmxdir, nelem(vmxdir), devgen);
+ if(ch->qid.path == Qmap){
+ if((omode & OTRUNC) != 0)
+ vmxcmd(cmdclearmeminfo);
+ }
+ return ch;
+}
+
+static void
+vmxclose(Chan*)
+{
+}
+
+static long
+vmxread(Chan* c, void* a, long n, vlong off)
+{
+ static char regbuf[4096];
+ static char membuf[4096];
+ int rc;
+
+ switch((ulong)c->qid.path){
+ case Qdir:
+ return devdirread(c, a, n, vmxdir, nelem(vmxdir), devgen);
+ case Qregs:
+ if(off == 0)
+ vmxcmd(cmdgetregs, regbuf, regbuf + sizeof(regbuf));
+ return readstr(off, a, n, regbuf);
+ case Qmap:
+ if(off == 0)
+ vmxcmd(cmdgetmeminfo, membuf, membuf + sizeof(membuf));
+ return readstr(off, a, n, membuf);
+ case Qstatus:
+ {
+ char buf[ERRMAX+128];
+ char errbuf[ERRMAX];
+ int status;
+
+ status = vmx.state;
+ if(status == VMXDEAD){
+ vmxcmd(cmdstatus, errbuf);
+ snprint(buf, sizeof(buf), "%s %#q\n", statenames[status], errbuf);
+ }else if(status >= 0 && status < nelem(statenames))
+ snprint(buf, sizeof(buf), "%s\n", statenames[status]);
+ else
+ snprint(buf, sizeof(buf), "%d\n", status);
+ return readstr(off, a, n, buf);
+ }
+ case Qwait:
+ {
+ char buf[512];
+
+ rc = vmxcmd(cmdwait, buf, buf + sizeof(buf));
+ if(rc > n) rc = n;
+ if(rc > 0) memmove(a, buf, rc);
+ return rc;
+ }
+ case Qfpregs:
+ {
+ char buf[sizeof(FPsave)];
+
+ vmxcmd(cmdgetfpregs, buf);
+ if(n < 0 || off < 0 || off >= sizeof(buf)) n = 0;
+ else if(off + n > sizeof(buf)) n = sizeof(buf) - off;
+ if(n != 0) memmove(a, buf + off, n);
+ return n;
+ }
+ default:
+ error(Egreg);
+ break;
+ }
+ return 0;
+}
+
+static long
+vmxwrite(Chan* c, void* a, long n, vlong off)
+{
+ static QLock initlock;
+ Cmdbuf *cb;
+ Cmdtab *ct;
+ char *s;
+ int rc;
+ int i;
+ VmMem tmpmem;
+
+ switch((ulong)c->qid.path){
+ case Qdir:
+ error(Eperm);
+ case Qctl:
+ cb = parsecmd(a, n);
+ if(waserror()){
+ free(cb);
+ nexterror();
+ }
+ ct = lookupcmd(cb, vmxctlmsg, nelem(vmxctlmsg));
+ switch(ct->index){
+ case CMinit:
+ qlock(&initlock);
+ if(waserror()){
+ qunlock(&initlock);
+ nexterror();
+ }
+ if(vmx.state != VMXINACTIVE)
+ error("vmx already active");
+ vmx.state = VMXINIT;
+ kproc("kvmx", vmxproc, nil);
+ poperror();
+ qunlock(&initlock);
+ if(vmxcmd(cmdstatus, up->errstr) == VMXDEAD)
+ error(up->errstr);
+ break;
+ case CMquit:
+ vmxcmd(cmdquit);
+ break;
+ case CMgo:
+ s = nil;
+ if(cb->nf == 2) kstrdup(&s, cb->f[1]);
+ else if(cb->nf != 1) error(Ebadarg);
+ if(waserror()){
+ free(s);
+ nexterror();
+ }
+ vmxcmd(cmdgo, s);
+ poperror();
+ free(s);
+ break;
+ case CMstop:
+ vmxcmd(cmdstop);
+ break;
+ case CMstep:
+ rc = 0;
+ for(i = 1; i < cb->nf; i++)
+ if(strcmp(cb->f[i], "-map") == 0){
+ rc = 1;
+ if(i+4 > cb->nf) error("missing argument");
+ memset(&tmpmem, 0, sizeof(tmpmem));
+ tmpmem.lo = strtoull(cb->f[i+1], &s, 0);
+ if(*s != 0 || !vmokpage(tmpmem.lo)) error("invalid address");
+ tmpmem.hi = tmpmem.lo + BY2PG;
+ tmpmem.attr = 0x407;
+ tmpmem.seg = _globalsegattach(cb->f[i+2]);
+ if(tmpmem.seg == nil) error("unknown segment");
+ tmpmem.off = strtoull(cb->f[i+3], &s, 0);
+ if(*s != 0 || !vmokpage(tmpmem.off)) error("invalid offset");
+ i += 3;
+ }else
+ error(Ebadctl);
+ vmxcmd(cmdstep, rc ? &tmpmem : nil);
+ break;
+ case CMexc:
+ s = nil;
+ kstrdup(&s, cb->f[1]);
+ if(waserror()){
+ free(s);
+ nexterror();
+ }
+ vmxcmd(cmdexcept, s);
+ poperror();
+ free(s);
+ break;
+ case CMirq:
+ s = nil;
+ if(cb->nf == 2)
+ kstrdup(&s, cb->f[1]);
+ if(waserror()){
+ free(s);
+ nexterror();
+ }
+ vmxcmd(cmdirq, s);
+ poperror();
+ free(s);
+ break;
+ default:
+ error(Egreg);
+ }
+ poperror();
+ free(cb);
+ break;
+ case Qmap:
+ case Qregs:
+ s = malloc(n+1);
+ if(s == nil) error(Enomem);
+ if(waserror()){
+ free(s);
+ nexterror();
+ }
+ memmove(s, a, n);
+ s[n] = 0;
+ rc = vmxcmd((ulong)c->qid.path == Qregs ? cmdsetregs : cmdsetmeminfo, s);
+ poperror();
+ free(s);
+ return rc;
+ case Qfpregs:
+ {
+ char buf[sizeof(FPsave)];
+
+ if(n > sizeof(FPsave)) n = sizeof(FPsave);
+ memmove(buf, a, n);
+ return vmxcmd(cmdsetfpregs, buf, n, off);
+ }
+ default:
+ error(Egreg);
+ break;
+ }
+ return n;
+}
+
+Dev vmxdevtab = {
+ 'X',
+ "vmx",
+
+ vmxreset,
+ devinit,
+ vmxshutdown,
+ vmxattach,
+ vmxwalk,
+ vmxstat,
+ vmxopen,
+ devcreate,
+ vmxclose,
+ vmxread,
+ devbread,
+ vmxwrite,
+ devbwrite,
+ devremove,
+ devwstat,
+};
--- a/sys/src/9/pc/pcf
+++ b/sys/src/9/pc/pcf
@@ -38,6 +38,7 @@
usb
segment
+ vmx
link
segdesc