ref: 037bc7b4328948994f8b8fa2be487aac1cca835f
dir: /sys/src/9/xen/xensystem.c/
/* * xensystem.c * * TODO: we could handle mmu updates more efficiently by * using a multicall. * XXX perhaps we should check return values and panic on failure? */ #include "u.h" #include "../port/lib.h" #include "mem.h" #include "dat.h" #include "fns.h" #include "io.h" #include "ureg.h" #define LOG(a) /* * These functions replace all the inlines that are used on Linux systems */ /* in xen.s */ int xencall1(int op); int xencall2(int op, ulong arg1); int xencall3(int op, ulong arg1, ulong arg2); int xencall4(int op, ulong arg1, ulong arg2, ulong arg3); int xencall5(int op, ulong arg1, ulong arg2, ulong arg3, ulong arg4); int xencall6(int op, ulong arg1, ulong arg2, ulong arg3, ulong arg4, ulong arg5); int HYPERVISOR_update_va_mapping(ulong va, uvlong newval, ulong flags) { int ret; ret = xencall5(__HYPERVISOR_update_va_mapping, va, newval, newval>>32, flags); if(ret < 0) panic("update_va_mapping failed"); return ret; } long HYPERVISOR_set_timer_op(uvlong timeout) { ulong hi, lo; hi = timeout>>32; lo = timeout; return xencall3(__HYPERVISOR_set_timer_op, lo, hi); } int HYPERVISOR_set_trap_table(trap_info_t *table) { return xencall2(__HYPERVISOR_set_trap_table, (ulong)table); } int HYPERVISOR_mmu_update(mmu_update_t *req, int count, int *success_count, domid_t domid) { return xencall5(__HYPERVISOR_mmu_update, (ulong)req, count, (ulong)success_count, domid); } int HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, int *scount, domid_t domid) { return xencall5(__HYPERVISOR_mmuext_op, (ulong)op, count, (ulong)scount, domid); } int HYPERVISOR_set_gdt(unsigned long *frame_list, int entries) { return xencall3(__HYPERVISOR_set_gdt, (ulong)frame_list, entries); } int HYPERVISOR_stack_switch(ulong ss, ulong esp) { return xencall3(__HYPERVISOR_stack_switch, ss, esp); } /* XXX match evfunc and fsfunc prototypes? */ int HYPERVISOR_set_callbacks(ulong evss, ulong evfunc, ulong fsss, ulong fsfunc) { return xencall5(__HYPERVISOR_set_callbacks, evss, evfunc, fsss, fsfunc); } int HYPERVISOR_fpu_taskswitch(void) { return xencall1(__HYPERVISOR_fpu_taskswitch); } int HYPERVISOR_yield(void) { return xencall3(__HYPERVISOR_sched_op, SCHEDOP_yield, 0); } int HYPERVISOR_block(void) { return xencall3(__HYPERVISOR_sched_op, SCHEDOP_block, 0); } int HYPERVISOR_shutdown(int reboot) { sched_shutdown_t arg; arg.reason = reboot? SHUTDOWN_reboot : SHUTDOWN_poweroff; return xencall3(__HYPERVISOR_sched_op, SCHEDOP_shutdown, (ulong)&arg); } int HYPERVISOR_multicall(void *call_list, int nr_calls) { return xencall3(__HYPERVISOR_multicall, (ulong)call_list, nr_calls); } int HYPERVISOR_event_channel_op(void *op) { return xencall2(__HYPERVISOR_event_channel_op, (ulong)op); } int HYPERVISOR_xen_version(int cmd, void *arg) { return xencall3(__HYPERVISOR_xen_version, cmd, (ulong)arg); } int HYPERVISOR_console_io(int cmd, int count, char *str) { return xencall4(__HYPERVISOR_console_io, cmd, count, (ulong)str); } int HYPERVISOR_grant_table_op(int cmd, gnttab_setup_table_t *setup, int count) { return xencall4(__HYPERVISOR_grant_table_op, cmd, (ulong)setup, count); } int HYPERVISOR_memory_op(int cmd, struct xen_memory_reservation *arg) { return xencall3(__HYPERVISOR_memory_op, cmd, (ulong)arg); } /* * XXX this comment is leftover from old code. revisit and update. * * The use of 'barrier' in the following reflects their use as local-lock * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following * critical operations are executed. All critical operatiosn must complete * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also * includes these barriers, for example. */ /* * conversions to machine page numbers, pages and addresses */ #define MFN(pa) (patomfn[(pa)>>PGSHIFT]) #define MFNPG(pa) ((uvlong)MFN(pa)<<PGSHIFT) #define PA2MA(pa) (MFNPG(pa) | PGOFF(pa)) #define VA2MA(va) PA2MA(PADDR(va)) #define VA2MFN(va) MFN(PADDR(va)) ulong hypervisor_virt_start; ulong xentop; start_info_t *xenstart; shared_info_t *HYPERVISOR_shared_info; ulong *patomfn; ulong *matopfn; int xenpdptpin(ulong va) { struct mmuext_op op; ulong mfn; mfn = MFN(PADDR(va)); LOG(dprint("pdptpin %lux %lux\n", va, mfn);) print("pdptpin %lux %lux\n", va, mfn); /* mark page readonly first */ HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID, UVMF_INVLPG|UVMF_LOCAL); /* L3 here refers to page directory pointer table (PAE mode) */ op.cmd = MMUEXT_PIN_L3_TABLE; op.arg1.mfn = mfn; if (HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF) == 0) return 1; HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL); return 0; } int xenpgdpin(ulong va) { struct mmuext_op op; ulong mfn; mfn = MFN(PADDR(va)); LOG(dprint("pdpin %lux %lux\n", va, mfn);) /* mark page readonly first */ HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID, UVMF_INVLPG|UVMF_LOCAL); /* to confuse you, L2 here refers to page directories */ op.cmd = MMUEXT_PIN_L2_TABLE; op.arg1.mfn = mfn; if (HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF) == 0) return 1; HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL); return 0; } int xenptpin(ulong va) { struct mmuext_op op; ulong mfn; mfn = MFN(PADDR(va)); LOG(dprint("pin %lux %lux\n", va, mfn);) /* mark page readonly first */ HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID, UVMF_INVLPG|UVMF_LOCAL); /* to confuse you, L1 here refers to page tables */ op.cmd = MMUEXT_PIN_L1_TABLE; op.arg1.mfn = mfn; if (HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF) == 0) return 1; HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL); return 0; } void xenptunpin(ulong va) { struct mmuext_op op; ulong mfn; mfn = MFN(PADDR(va)); LOG(dprint("unpin %lux %lux\n", va, mfn);) op.cmd = MMUEXT_UNPIN_TABLE; op.arg1.mfn = mfn; if(HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF)<0) panic("xenptunpin va=%lux called from %lux", va, getcallerpc(&va)); /* mark page read-write */ HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL); } void xenptswitch(ulong pa) { struct mmuext_op op; op.cmd = MMUEXT_NEW_BASEPTR; op.arg1.mfn = MFN(pa); if(HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF)<0) panic("xenptswitch"); } void xentlbflush(void) { struct mmuext_op op; op.cmd = MMUEXT_TLB_FLUSH_LOCAL; HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF); } /* update a pte using a machine page frame number */ void xenupdatema(ulong *ptr, uvlong val) { mmu_update_t u; u.ptr = VA2MA(ptr); u.val = val; if(HYPERVISOR_mmu_update(&u, 1, 0, DOMID_SELF) < 0) panic("xenupdatema - pte %lux value %llux (was %llux) called from %lux", (ulong)ptr, val, *(uvlong*)ptr, getcallerpc(&ptr)); } /* update a pte using a guest "physical" page number */ void xenupdate(ulong *ptr, ulong val) { mmu_update_t u; u.ptr = VA2MA(ptr); u.val = PA2MA(val); if(HYPERVISOR_mmu_update(&u, 1, 0, DOMID_SELF) < 0) panic("xenupdate - pte %lux value %lux (%llux) called from %lux", (ulong)ptr, val, PA2MA(val), getcallerpc(&ptr)); } void acceptframe(int ref, void *va) { ulong mfn; mfn = xengrantend(ref); if (mfn == 0) panic("can't accept page frame"); LOG(dprint("acceptframe ref %d va %lux mfn %lux\n", ref, (ulong)va, mfn);) VA2MFN(va) = mfn; mmumapframe((ulong)va, mfn); } int donateframe(int domid, void *va) { ulong mfn; int ref; ulong *pte; struct xen_memory_reservation mem; mfn = VA2MFN(va); ref = xengrant(domid, mfn, GTF_accept_transfer); LOG(dprint("grant transfer %lux (%lux) -> %d\n", (ulong)va, mfn, ref);) pte = mmuwalk(m->pdb, (ulong)va, 2, 0); xenupdatema(pte, 0); set_xen_guest_handle(mem.extent_start, &mfn); mem.nr_extents = 1; mem.extent_order = 0; mem.address_bits = 0; mem.domid = DOMID_SELF; if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, &mem) != 1) panic("XENMEM_decrease_reservation"); VA2MFN(va) = ~0; return ref; } int shareframe(int domid, void *va, int write) { ulong mfn; int ref; int flags; mfn = VA2MFN(va); flags = GTF_permit_access; if (!write) flags |= GTF_readonly; ref = xengrant(domid, mfn, flags); LOG(dprint("grant shared %lux (%lux) -> %d\n", (ulong)va, mfn, ref);) return ref; } /* * Upcall from hypervisor, entered with evtchn_upcall_pending masked. */ void xenupcall(Ureg *ureg) { vcpu_info_t *vcpu; shared_info_t *s; ulong sel1, sel2, n1, n2, port; ureg->ecode = 0; s = HYPERVISOR_shared_info; vcpu = &HYPERVISOR_shared_info->vcpu_info[0]; for (;;) { vcpu->evtchn_upcall_pending = 0; sel1 = xchgl((uint*)&vcpu->evtchn_pending_sel, 0); while(sel1) { n1 = ffs(sel1); sel1 &= ~(1<<n1); sel2 = xchgl((uint*)&s->evtchn_pending[n1], 0); while(sel2) { n2 = ffs(sel2); sel2 &= ~(1<<n2); port = (n1<<5) + n2; ureg->trap = 100+port; trap(ureg); } } if (vcpu->evtchn_upcall_pending) continue; vcpu->evtchn_upcall_mask = 0; if (vcpu->evtchn_upcall_pending == 0) break; vcpu->evtchn_upcall_mask = 1; } } static int xenirqenable(Vctl *v, int shared) { if(!shared){ uint port = v->vno-100; HYPERVISOR_shared_info->evtchn_mask[port/32] &= ~(1<<(port%32)); } return 0; } static int xenirqdisable(Vctl *v, int shared) { if(!shared){ uint port = v->vno-100; HYPERVISOR_shared_info->evtchn_mask[port/32] |= (1<<(port%32)); } return 0; } /* * tbdf field is abused to distinguish virqs from channels: * * tbdf=BUSUNKNOWN -> irq is a virq to be bound to a channel * tbdf=0 -> irq is a channel number */ int xenintrassign(Vctl *v) { evtchn_op_t op; uint port; if (v->tbdf != BUSUNKNOWN) { op.cmd = EVTCHNOP_bind_virq; op.u.bind_virq.virq = v->irq; op.u.bind_virq.vcpu = m->machno; if(HYPERVISOR_event_channel_op(&op) != 0){ print("xenintrenable: bind %d failed", v->irq); return -1; } port = op.u.bind_virq.port; } else port = v->irq; if (port > 155) return -1; v->enable = xenirqenable; v->disable = xenirqdisable; return 100+port; } int xenintrvecno(int irq) { return irq; } int islo(void) { vcpu_info_t *cpu; cpu = &HYPERVISOR_shared_info->vcpu_info[m->machno]; // XXX m->shared return (cpu->evtchn_upcall_mask == 0); } /* * Note: Portable code expects spllo <= spl* <= spldone for * accounting purposes. Lets hope the compiler doesn't reorder * us. */ int spllo(void) { vcpu_info_t *cpu = &HYPERVISOR_shared_info->vcpu_info[m->machno]; // XXX m->shared if(cpu->evtchn_upcall_mask == 0) return 0; m->splpc = 0; cpu->evtchn_upcall_mask = 0; /* * If an event arrived while masked off, * use a dummy call to trigger delivery */ if (cpu->evtchn_upcall_pending) HYPERVISOR_xen_version(0, 0); return 1; } int splhi(void) { ulong dummy; vcpu_info_t *cpu = &HYPERVISOR_shared_info->vcpu_info[m->machno]; // XXX m->shared int oldmask; oldmask = xchgb(&cpu->evtchn_upcall_mask, 1); if (cpu->evtchn_upcall_mask != 1) panic("xchgb"); /* XXX ad-hoc ¨getcallerpc" because we have no arguments */ m->splpc = (&dummy)[1]; return oldmask; } void splx(int x) { if(x) splhi(); else spllo(); } /* marker for profiling in portable code */ void spldone(void) { } /* allocate an event channel */ int xenchanalloc(int dom) { evtchn_op_t op; op.cmd = EVTCHNOP_alloc_unbound; op.u.alloc_unbound.dom = DOMID_SELF; op.u.alloc_unbound.remote_dom = dom; if (HYPERVISOR_event_channel_op(&op) != 0) panic("xenchanalloc"); return op.u.alloc_unbound.port; } /* notify over an event channel */ void xenchannotify(int port) { evtchn_op_t op; op.cmd = EVTCHNOP_send; op.u.send.port = port; HYPERVISOR_event_channel_op(&op); } void halt(void) { extern int nrdy; splhi(); if (nrdy) { spllo(); return; } HYPERVISOR_block(); } void mb(void) { coherence(); }