shithub: riscv

Download patch

ref: b0377e84cf31c48afed765c433abe7eebb4135a5
parent: 599fe05b17e25a3c7028f9aa296a7fb5face2882
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Sat Jul 8 05:25:39 EDT 2023

pc64: make FPU available in interrupts

Instead of only supporting FPU to be used from syscalls
and pagefault handler, make it available everywhere
enabling libmemdraw to potentially use simd instructions.

Each process now has a explicit kfpstate/kfpsave that
devproc does make available in kfpregs file.

Each trap/interrupt protects/saves the FPU registers
by calling fpukenter() and restores on fpukexit().

With separate kfpstate fields, the states have been
simplified (kernel state is just FPinit, FPactive
and FPinactive). User fpstate has additional
FPprotected for lazy saving.

--- a/sys/src/9/pc64/dat.h
+++ b/sys/src/9/pc64/dat.h
@@ -81,32 +81,22 @@
 
 enum
 {
-	/* this is a state */
-	FPinit=		0,
-	FPactive=	1,
-	FPinactive=	2,
+	FPinit,
+	FPactive,
+	FPinactive,
+	FPprotected,
 
-	/*
-	 * the following are bits that can be or'd into the state.
-	 *
-	 * this is biased so that FPinit, FPactive and FPinactive
-	 * without any flags refer to user fp state in fpslot[0].
-	 */
-	FPillegal=	1<<8,	/* fp forbidden in note handler */
-	FPpush=		2<<8,	/* trap on use and initialize new fpslot */
-	FPnouser=	4<<8,	/* fpslot[0] is kernel regs */
-	FPkernel=	8<<8,	/* fp use in kernel (user in fpslot[0] when !FPnouser) */
-
-	FPindexs=	16,
-	FPindex1=	1<<FPindexs,
-	FPindexm=	3<<FPindexs,
+	FPillegal=	0x100,	/* fp forbidden in note handler */
 };
 
+#define KFPSTATE
+
 struct PFPU
 {
 	int	fpstate;
-	FPsave	*fpsave;	/* fpslot[fpstate>>FPindexs] */
-	FPsave	*fpslot[(FPindexm+1)>>FPindexs];
+	int	kfpstate;
+	FPsave	*fpsave;
+	FPsave	*kfpsave;
 };
 
 struct Confmem
@@ -215,6 +205,9 @@
 	char	havepge;
 	char	havewatchpt8;
 	char	havenx;
+
+	int	fpstate;		/* FPU state for interrupts */
+	FPsave	*fpsave;
 
 	u64int*	pml4;			/* pml4 base for this processor (va) */
 	Tss*	tss;			/* tss for this processor */
--- a/sys/src/9/pc64/fns.h
+++ b/sys/src/9/pc64/fns.h
@@ -39,12 +39,13 @@
 void	fpinit(void);
 void	(*fprestore)(FPsave*);
 void	(*fpsave)(FPsave*);
-void	fpuprocsetup(Proc*);
+FPsave*	fpukenter(Ureg*);
+void	fpukexit(Ureg*, FPsave*);
 void	fpuprocfork(Proc*);
-void	fpuprocsave(Proc*);
 void	fpuprocrestore(Proc*);
-int	fpusave(void);
-void	fpurestore(int);
+void	fpuprocsave(Proc*);
+void	fpuprocsetup(Proc*);
+
 u64int	getcr0(void);
 u64int	getcr2(void);
 u64int	getcr3(void);
--- a/sys/src/9/pc64/fpu.c
+++ b/sys/src/9/pc64/fpu.c
@@ -30,6 +30,8 @@
 extern void _ldmxcsr(u32int);
 extern void _stts(void);
 
+static void mathemu(Ureg *ureg, void*);
+
 static void
 fpssesave(FPsave *s)
 {
@@ -76,6 +78,21 @@
 	_stts();
 }
 
+/*
+ *  Turn the FPU on and initialise it for use.
+ *  Set the precision and mask the exceptions
+ *  we don't care about from the generic Mach value.
+ */
+void
+fpinit(void)
+{
+	_clts();
+	_fninit();
+	_fwait();
+	_fldcw(0x0232);
+	_ldmxcsr(0x1900);
+}
+
 static char* mathmsg[] =
 {
 	nil,	/* handled below */
@@ -87,7 +104,7 @@
 };
 
 static void
-mathnote(ulong status, uintptr pc)
+mathnote(ulong status, uintptr pc, int kernel)
 {
 	char *msg, note[ERRMAX];
 	int i;
@@ -112,8 +129,10 @@
 		}else
 			msg = "invalid operation";
 	}
-	snprint(note, sizeof note, "sys: fp: %s fppc=%#p status=0x%lux",
-		msg, pc, status);
+	snprint(note, sizeof note, "sys: fp: %s fppc=%#p status=0x%lux", msg, pc, status);
+	if(kernel)
+		panic("%s", note);
+
 	postnote(up, 1, note, NDebug);
 }
 
@@ -121,14 +140,21 @@
  *  math coprocessor error
  */
 static void
-matherror(Ureg *, void*)
+matherror(Ureg *ureg, void*)
 {
-	/*
-	 * Save FPU state to check out the error.
-	 */
-	fpsave(up->fpsave);
-	up->fpstate = FPinactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
-	mathnote(up->fpsave->fsw, up->fpsave->rip);
+	if(!userureg(ureg)){
+		if(up == nil)
+			mathnote(m->fpsave->fsw, m->fpsave->rip, 1);
+		else
+			mathnote(up->kfpsave->fsw, up->kfpsave->rip, 1);
+		return;
+	}
+	if(up->fpstate != FPinactive){
+		_clts();
+		fpsave(up->fpsave);
+		up->fpstate = FPinactive;
+	}
+	mathnote(up->fpsave->fsw, up->fpsave->rip, 0);
 }
 
 /*
@@ -137,90 +163,19 @@
 static void
 simderror(Ureg *ureg, void*)
 {
-	fpsave(up->fpsave);
-	up->fpstate = FPinactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
-	mathnote(up->fpsave->mxcsr & 0x3f, ureg->pc);
-}
-
-void
-fpinit(void)
-{
-	/*
-	 * A process tries to use the FPU for the
-	 * first time and generates a 'device not available'
-	 * exception.
-	 * Turn the FPU on and initialise it for use.
-	 * Set the precision and mask the exceptions
-	 * we don't care about from the generic Mach value.
-	 */
-	_clts();
-	_fninit();
-	_fwait();
-	_fldcw(0x0232);
-	_ldmxcsr(0x1900);
-}
-
-/*
- *  math coprocessor emulation fault
- */
-static void
-mathemu(Ureg *ureg, void*)
-{
-	ulong status, control;
-	int index;
-
-	if(up->fpstate & FPillegal){
-		/* someone did floating point in a note handler */
-		postnote(up, 1, "sys: floating point in note handler", NDebug);
+	if(!userureg(ureg)){
+		if(up == nil)
+			mathnote(m->fpsave->mxcsr & 0x3f, ureg->pc, 1);
+		else
+			mathnote(up->kfpsave->mxcsr & 0x3f, ureg->pc, 1);
 		return;
 	}
-	switch(up->fpstate & ~(FPnouser|FPkernel|FPindexm)){
-	case FPactive	| FPpush:
+	if(up->fpstate != FPinactive){
 		_clts();
 		fpsave(up->fpsave);
-	case FPinactive	| FPpush:
-		up->fpstate += FPindex1;
-	case FPinit	| FPpush:
-	case FPinit:
-		fpinit();
-		index = up->fpstate >> FPindexs;
-		if(index < 0 || index > (FPindexm>>FPindexs))
-			panic("fpslot index overflow: %d", index);
-		if(userureg(ureg)){
-			if(index != 0)
-				panic("fpslot index %d != 0 for user", index);
-		} else {
-			if(index == 0)
-				up->fpstate |= FPnouser;
-			up->fpstate |= FPkernel;
-		}
-		while(up->fpslot[index] == nil)
-			up->fpslot[index] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
-		up->fpsave = up->fpslot[index];
-		up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
-		break;
-	case FPinactive:
-		/*
-		 * Before restoring the state, check for any pending
-		 * exceptions, there's no way to restore the state without
-		 * generating an unmasked exception.
-		 * More attention should probably be paid here to the
-		 * exception masks and error summary.
-		 */
-		status = up->fpsave->fsw;
-		control = up->fpsave->fcw;
-		if((status & ~control) & 0x07F){
-			mathnote(status, up->fpsave->rip);
-			break;
-		}
-		fprestore(up->fpsave);
-		up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
-		break;
-	case FPactive:
-		panic("math emu pid %ld %s pc %#p", 
-			up->pid, up->text, ureg->pc);
-		break;
+		up->fpstate = FPinactive;
 	}
+	mathnote(up->fpsave->mxcsr & 0x3f, ureg->pc, 0);
 }
 
 /*
@@ -227,8 +182,11 @@
  *  math coprocessor segment overrun
  */
 static void
-mathover(Ureg*, void*)
+mathover(Ureg *ureg, void*)
 {
+	if(!userureg(ureg))
+		panic("math overrun");
+
 	pexit("math overrun", 0);
 }
 
@@ -244,7 +202,7 @@
 }
 
 /*
- * fpuinit(), called from cpuidentify() for each cpu.
+ *  fpuinit(), called from cpuidentify() for each cpu.
  */
 void
 fpuinit(void)
@@ -274,13 +232,35 @@
 		fpsave = fpssesave;
 		fprestore = fpsserestore;
 	}
+
+	m->fpsave = nil;
+	m->fpstate = FPinit;
+	_stts();
 }
 
+static FPsave*
+fpalloc(void)
+{
+	FPsave *save;
+
+	while((save = mallocalign(sizeof(FPsave), FPalign, 0, 0)) == nil){
+		spllo();
+		resrcwait("no memory for FPsave");
+		splhi();
+	}
+	return save;
+}
+
+static void
+fpfree(FPsave *save)
+{
+	free(save);
+}
+
 void
 fpuprocsetup(Proc *p)
 {
 	p->fpstate = FPinit;
-	_stts();
 }
 
 void
@@ -288,21 +268,19 @@
 {
 	int s;
 
-	/* save floating point state */
 	s = splhi();
 	switch(up->fpstate & ~FPillegal){
-	case FPactive	| FPpush:
+	case FPprotected:
 		_clts();
+		/* wet floor */
 	case FPactive:
 		fpsave(up->fpsave);
-		up->fpstate = FPinactive | (up->fpstate & FPpush);
-	case FPactive	| FPkernel:
-	case FPinactive	| FPkernel:
-	case FPinactive	| FPpush:
+		up->fpstate = FPinactive;
+		/* wet floor */
 	case FPinactive:
-		while(p->fpslot[0] == nil)
-			p->fpslot[0] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
-		memmove(p->fpsave = p->fpslot[0], up->fpslot[0], sizeof(FPsave));
+		if(p->fpsave == nil)
+			p->fpsave = fpalloc();
+		memmove(p->fpsave, up->fpsave, sizeof(FPsave));
 		p->fpstate = FPinactive;
 	}
 	splx(s);
@@ -311,63 +289,224 @@
 void
 fpuprocsave(Proc *p)
 {
-	switch(p->fpstate & ~(FPnouser|FPkernel|FPindexm)){
-	case FPactive	| FPpush:
-		_clts();
-	case FPactive:
-		if(p->state == Moribund){
+	if(p->state == Moribund){
+		if(p->fpstate == FPactive || p->kfpstate == FPactive){
 			_fnclex();
 			_stts();
-			break;
 		}
-		/*
-		 * Fpsave() stores without handling pending
-		 * unmasked exeptions. Postnote() can't be called
-		 * so the handling of pending exceptions is delayed
-		 * until the process runs again and generates an
-		 * emulation fault to activate the FPU.
-		 */
-		fpsave(p->fpsave);
-		p->fpstate = FPinactive | (p->fpstate & ~FPactive);
-		break;
+		fpfree(p->fpsave);
+		fpfree(p->kfpsave);
+		p->fpsave = p->kfpsave = nil;
+		p->fpstate = p->kfpstate = FPinit;
+		return;
 	}
+	if(p->kfpstate == FPactive){
+		fpsave(p->kfpsave);
+		p->kfpstate = FPinactive;
+		return;
+	}
+	if(p->fpstate == FPprotected)
+		_clts();
+	else if(p->fpstate != FPactive)
+		return;
+	fpsave(p->fpsave);
+	p->fpstate = FPinactive;
 }
 
 void
 fpuprocrestore(Proc*)
 {
+	/*
+	 * when the scheduler switches,
+	 * we can discard its fp state.
+	 */
+	switch(m->fpstate){
+	case FPactive:
+		_fnclex();
+		_stts();
+		/* wet floor */
+	case FPinactive:
+		fpfree(m->fpsave);
+		m->fpsave = nil;
+		m->fpstate = FPinit;
+	}
 }
 
-
 /*
- * Fpusave and fpurestore lazily save and restore FPU state across
- * system calls and the pagefault handler so that we can take
- * advantage of SSE instructions such as AES-NI in the kernel.
+ *  Protect or save FPU state and setup new state
+ *  (lazily in the case of user process) for the kernel.
+ *  All syscalls, traps and interrupts (except mathemu()!)
+ *  are handled between fpukenter() and fpukexit(),
+ *  so they can use floating point and vector instructions.
  */
-int
-fpusave(void)
+FPsave*
+fpukenter(Ureg *)
 {
-	int ostate = up->fpstate;
-	if((ostate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
+	if(up == nil){
+		switch(m->fpstate){
+		case FPactive:
+			fpsave(m->fpsave);
+			/* wet floor */
+		case FPinactive:
+			m->fpstate = FPinit;
+			return m->fpsave;
+		}
+		return nil;
+	}
+
+	switch(up->fpstate){
+	case FPactive:
+		up->fpstate = FPprotected;
 		_stts();
-	up->fpstate = FPpush | (ostate & ~FPillegal);
-	return ostate;
+		/* wet floor */
+	case FPprotected:
+		return nil;
+	}
+
+	switch(up->kfpstate){
+	case FPactive:
+		fpsave(up->kfpsave);
+		/* wet floor */
+	case FPinactive:
+		up->kfpstate = FPinit;
+		return up->kfpsave;
+	}
+	return nil;
 }
+
 void
-fpurestore(int ostate)
+fpukexit(Ureg *ureg, FPsave *save)
 {
-	int astate = up->fpstate;
-	if(astate == (FPpush | (ostate & ~FPillegal))){
-		if((ostate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
+	if(up == nil){
+		switch(m->fpstate){
+		case FPactive:
+			_fnclex();
+			_stts();
+			/* wet floor */
+		case FPinactive:
+			fpfree(m->fpsave);
+			m->fpstate = FPinit;
+		}
+		m->fpsave = save;
+		if(save != nil)
+			m->fpstate = FPinactive;
+		return;
+	}
+
+	if(up->fpstate == FPprotected){
+		if(userureg(ureg)){
+			up->fpstate = FPactive;
 			_clts();
-	} else {
-		if(astate == FPinit)	/* don't restore on procexec()/procsetup() */
+		}
+		return;
+	}
+
+	switch(up->kfpstate){
+	case FPactive:
+		_fnclex();
+		_stts();
+		/* wet floor */
+	case FPinactive:
+		fpfree(up->kfpsave);
+		up->kfpstate = FPinit;
+	}
+	up->kfpsave = save;
+	if(save != nil)
+		up->kfpstate = FPinactive;
+}
+
+/*
+ *  Before restoring the state, check for any pending
+ *  exceptions, there's no way to restore the state without
+ *  generating an unmasked exception.
+ *  More attention should probably be paid here to the
+ *  exception masks and error summary.
+ */
+static int
+fpcheck(FPsave *save, int kernel)
+{
+	ulong status, control;
+
+	status = save->fsw;
+	control = save->fcw;
+	if((status & ~control) & 0x07F){
+		mathnote(status, save->rip, kernel);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ *  math coprocessor emulation fault
+ */
+static void
+mathemu(Ureg *ureg, void*)
+{
+	if(!userureg(ureg)){
+		if(up == nil){
+			switch(m->fpstate){
+			case FPinit:
+				m->fpsave = fpalloc();
+				m->fpstate = FPactive;
+				fpinit();
+				break;
+			case FPinactive:
+				fpcheck(m->fpsave, 1);
+				fprestore(m->fpsave);
+				m->fpstate = FPactive;
+				break;
+			default:
+				panic("floating point error in irq");
+			}
 			return;
-		if((astate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
-			_stts();
-		up->fpsave = up->fpslot[ostate>>FPindexs];
-		if(ostate & FPactive)
-			ostate = FPinactive | (ostate & ~FPactive);
+		}
+
+		if(up->fpstate == FPprotected){
+			_clts();
+			fpsave(up->fpsave);
+			up->fpstate = FPinactive;
+		}
+
+		switch(up->kfpstate){
+		case FPinit:
+			up->kfpsave = fpalloc();
+			up->kfpstate = FPactive;
+			fpinit();
+			break;
+		case FPinactive:
+			fpcheck(up->kfpsave, 1);
+			fprestore(up->kfpsave);
+			up->kfpstate = FPactive;
+			break;
+		default:
+			panic("floating point error in trap");
+		}
+		return;
 	}
-	up->fpstate = ostate;
+
+	if(up->fpstate & FPillegal){
+		postnote(up, 1, "sys: floating point in note handler", NDebug);
+		return;
+	}
+	switch(up->fpstate){
+	case FPinit:
+		if(up->fpsave == nil)
+			up->fpsave = fpalloc();
+		up->fpstate = FPactive;
+		fpinit();
+		break;
+	case FPinactive:
+		if(fpcheck(up->fpsave, 0))
+			break;
+		fprestore(up->fpsave);
+		up->fpstate = FPactive;
+		break;
+	case FPprotected:
+		up->fpstate = FPactive;
+		_clts();
+		break;
+	case FPactive:
+		postnote(up, 1, "sys: floating point error", NDebug);
+		break;
+	}
 }
--- a/sys/src/9/pc64/main.c
+++ b/sys/src/9/pc64/main.c
@@ -169,6 +169,9 @@
 	sp[3] = sp[2] = nil;
 	strcpy(sp[1] = (char*)&sp[4], "boot");
 	sp[0] = nil;
+
+	splhi();
+	fpukexit(nil, nil);
 	touser(sp);
 }
 
@@ -192,11 +195,12 @@
 	ramdiskinit();
 	confinit();
 	xinit();
+	trapinit();
+	mathinit();
 	if(i8237alloc != nil)
 		i8237alloc();
 	pcicfginit();
 	bootscreeninit();
-	trapinit();
 	printinit();
 	cpuidprint();
 	mmuinit();
@@ -203,7 +207,6 @@
 	if(arch->intrinit)
 		arch->intrinit();
 	timersinit();
-	mathinit();
 	if(arch->clockenable)
 		arch->clockenable();
 	procinit0();
--- a/sys/src/9/pc64/trap.c
+++ b/sys/src/9/pc64/trap.c
@@ -129,14 +129,17 @@
 	return 0;
 }
 
-/* go to user space */
 void
 trap(Ureg *ureg)
 {
 	int vno, user;
+	FPsave *f = nil;
 
-	user = kenter(ureg);
 	vno = ureg->type;
+	user = kenter(ureg);
+	if(vno != VectorCNA)
+		f = fpukenter(ureg);
+
 	if(!irqhandled(ureg, vno) && (!user || !usertrap(vno))){
 		if(!user){
 			void (*pc)(void);
@@ -150,12 +153,12 @@
 				if(vno == VectorGPF){
 					ureg->bp = -1;
 					ureg->pc += 2;
-					return;
+					goto out;
 				}
 			} else if(pc == _peekinst){
 				if(vno == VectorGPF || vno == VectorPF){
 					ureg->pc += 2;
-					return;
+					goto out;
 				}
 			}
 
@@ -173,13 +176,15 @@
 			panic("%s", excname[vno]);
 		panic("unknown trap/intr: %d", vno);
 	}
+out:
 	splhi();
-
 	if(user){
 		if(up->procctl || up->nnote)
 			notify(ureg);
 		kexit(ureg);
 	}
+	if(vno != VectorCNA)
+		fpukexit(ureg, f);
 }
 
 void
@@ -372,45 +377,35 @@
 faultamd64(Ureg* ureg, void*)
 {
 	uintptr addr;
-	int read, user, n, insyscall, f;
+	int read, user;
 	char buf[ERRMAX];
 
 	addr = getcr2();
 	read = !(ureg->error & 2);
 	user = userureg(ureg);
-	if(!user){
-		{
-			extern void _peekinst(void);
-			
-			if((void(*)(void))ureg->pc == _peekinst){
-				ureg->pc += 2;
-				return;
-			}
+	if(user)
+		up->insyscall = 1;
+	else {
+		extern void _peekinst(void);
+
+		if((void(*)(void))ureg->pc == _peekinst){
+			ureg->pc += 2;
+			return;
 		}
 		if(addr >= USTKTOP)
 			panic("kernel fault: bad address pc=%#p addr=%#p", ureg->pc, addr);
 		if(up == nil)
 			panic("kernel fault: no user process pc=%#p addr=%#p", ureg->pc, addr);
-	}
-	if(up == nil)
-		panic("user fault: up=0 pc=%#p addr=%#p", ureg->pc, addr);
-
-	insyscall = up->insyscall;
-	up->insyscall = 1;
-	f = fpusave();
-	if(!user && waserror()){
-		if(up->nerrlab == 0){
-			pprint("suicide: sys: %s\n", up->errstr);
-			pexit(up->errstr, 1);
+		if(waserror()){
+			if(up->nerrlab == 0){
+				pprint("suicide: sys: %s\n", up->errstr);
+				pexit(up->errstr, 1);
+			}
+			nexterror();
 		}
-		int s = splhi();
-		fpurestore(f);
-		up->insyscall = insyscall;
-		splx(s);
-		nexterror();
 	}
-	n = fault(addr, ureg->pc, read);
-	if(n < 0){
+
+	if(fault(addr, ureg->pc, read) < 0){
 		if(!user){
 			dumpregs(ureg);
 			panic("fault: %#p", addr);
@@ -420,10 +415,11 @@
 			read ? "read" : "write", addr);
 		postnote(up, 1, buf, NDebug);
 	}
-	if(!user) poperror();
-	splhi();
-	fpurestore(f);
-	up->insyscall = insyscall;
+
+	if(user)
+		up->insyscall = 0;
+	else
+		poperror();
 }
 
 /*
@@ -440,21 +436,21 @@
 	char *e;
 	uintptr	sp;
 	long long ret;
-	int	i, s, f;
+	int i, s;
 	ulong scallnr;
 	vlong startns, stopns;
 
 	if(!kenter(ureg))
 		panic("syscall: cs 0x%4.4lluX", ureg->cs);
+	fpukenter(ureg);
 
 	m->syscall++;
 	up->insyscall = 1;
-	up->pc = ureg->pc;
 
+	up->pc = ureg->pc;
 	sp = ureg->sp;
 	scallnr = ureg->bp;	/* RARG */
 	up->scallnr = scallnr;
-	f = fpusave();
 	spllo();
 
 	ret = -1;
@@ -518,13 +514,7 @@
 		splx(s);
 	}
 
-	splhi();
-	fpurestore(f);
-	up->insyscall = 0;
-	up->psstate = 0;
-
 	if(scallnr == NOTED){
-		noted(ureg, *((ulong*)up->s.args));
 		/*
 		 * normally, syscall() returns to forkret()
 		 * not restoring general registers when going
@@ -534,17 +524,28 @@
 		 * to it when returning form syscall()
 		 */
 		((void**)&ureg)[-1] = (void*)noteret;
+
+		noted(ureg, *((ulong*)up->s.args));
+		splhi();
+		up->fpstate &= ~FPillegal;
 	}
+	else
+		splhi();
 
-	if(scallnr!=RFORK && (up->procctl || up->nnote)){
-		notify(ureg);
+	if(scallnr != RFORK && (up->procctl || up->nnote) && notify(ureg))
 		((void**)&ureg)[-1] = (void*)noteret;	/* loads RARG */
-	}
 
+	up->insyscall = 0;
+	up->psstate = nil;
+
 	/* if we delayed sched because we held a lock, sched now */
-	if(up->delaysched)
+	if(up->delaysched){
 		sched();
+		splhi();
+	}
+
 	kexit(ureg);
+	fpukexit(ureg, nil);
 }
 
 /*
@@ -561,6 +562,7 @@
 		procctl();
 	if(up->nnote == 0)
 		return 0;
+
 	spllo();
 	qlock(&up->debug);
 	msg = popnote(ureg);
@@ -579,7 +581,6 @@
 	if(!okaddr((uintptr)up->notify, 1, 0)
 	|| !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
 		qunlock(&up->debug);
-		up->fpstate &= ~FPillegal;
 		pprint("suicide: bad address in notify\n");
 		pexit("Suicide", 0);
 	}
@@ -600,10 +601,7 @@
 	ureg->ss = UDSEL;
 	qunlock(&up->debug);
 	splhi();
-	if(up->fpstate == FPactive){
-		fpsave(up->fpsave);
-		up->fpstate = FPinactive;
-	}
+	fpuprocsave(up);
 	up->fpstate |= FPillegal;
 	return 1;
 }
@@ -617,8 +615,6 @@
 	Ureg *nureg;
 	uintptr oureg, sp;
 
-	up->fpstate &= ~FPillegal;
-	spllo();
 	qlock(&up->debug);
 	if(arg0!=NRSTR && !up->notified) {
 		qunlock(&up->debug);
@@ -663,7 +659,6 @@
 		}
 		qunlock(&up->debug);
 		sp = oureg-4*BY2WD-ERRMAX;
-		splhi();
 		ureg->sp = sp;
 		ureg->bp = oureg;		/* arg 1 passed in RARG */
 		((uintptr*)sp)[1] = oureg;	/* arg 1 0(FP) is ureg* */
--- a/sys/src/9/port/devproc.c
+++ b/sys/src/9/port/devproc.c
@@ -18,6 +18,9 @@
 	Qctl,
 	Qfd,
 	Qfpregs,
+#ifdef KFPSTATE
+	Qkfpregs,
+#endif
 	Qkregs,
 	Qmem,
 	Qnote,
@@ -86,6 +89,9 @@
 	"ctl",		{Qctl},		0,			0000,
 	"fd",		{Qfd},		0,			0444,
 	"fpregs",	{Qfpregs},	sizeof(FPsave),		0000,
+#ifdef KFPSTATE
+	"kfpregs",	{Qkfpregs},	sizeof(FPsave),		0400,
+#endif
 	"kregs",	{Qkregs},	sizeof(Ureg),		0400,
 	"mem",		{Qmem},		0,			0000,
 	"note",		{Qnote},	0,			0000,
@@ -418,6 +424,9 @@
 	case Qstatus:
 	case Qppid:
 	case Qproc:
+#ifdef KFPSTATE
+	case Qkfpregs:
+#endif
 	case Qkregs:
 	case Qsegment:
 	case Qns:
@@ -967,7 +976,14 @@
 		rptr = (uchar*)&kur;
 		rsize = sizeof(Ureg);
 		goto regread;
-
+#ifdef KFPSTATE
+	case Qkfpregs:
+		if(p->kfpstate != FPinactive)
+			error(Enoreg);
+		rptr = (uchar*)p->kfpsave;
+		rsize = sizeof(FPsave);
+		goto regread;
+#endif
 	case Qfpregs:
 		if(p->fpstate != FPinactive)
 			error(Enoreg);
--- a/sys/src/9/port/proc.c
+++ b/sys/src/9/port/proc.c
@@ -656,6 +656,9 @@
 
 	p->psstate = "New";
 	p->fpstate = FPinit;
+#ifdef KFPSTATE
+	p->kfpstate = FPinit;
+#endif
 	p->procctl = 0;
 	p->ureg = nil;
 	p->dbgreg = nil;
@@ -1025,10 +1028,8 @@
 
 	if(up->notify == nil || up->notified){
 		qunlock(&up->debug);
-		if(up->lastnote->flag == NDebug){
-			up->fpstate &= ~FPillegal;
+		if(up->lastnote->flag == NDebug)
 			pprint("suicide: %s\n", up->lastnote->msg);
-		}
 		pexit(up->lastnote->msg, up->lastnote->flag!=NDebug);
 	}
 	up->notified = 1;
@@ -1202,7 +1203,6 @@
 	Chan *dot;
 	void (*pt)(Proc*, int, vlong);
 
-	up->fpstate &= ~FPillegal;
 	up->alarm = 0;
 	timerdel(up);
 	pt = proctrace;
@@ -1592,7 +1592,6 @@
 	switch(up->procctl) {
 	case Proc_exitbig:
 		spllo();
-		up->fpstate &= ~FPillegal;
 		pprint("Killed: Insufficient physical memory\n");
 		pexit("Killed: Insufficient physical memory", 1);