ref: 042f98784acb9f09013f65b0c93b7f56e1b8a10a
dir: /sys/src/9/omap/fpiarm.c/
/*
 * this doesn't attempt to implement ARM floating-point properties
 * that aren't visible in the Inferno environment.
 * all arithmetic is done in double precision.
 * the FP trap status isn't updated.
 */
#include	"u.h"
#include	"../port/lib.h"
#include	"mem.h"
#include	"dat.h"
#include	"fns.h"
#include	"ureg.h"
#include	"arm.h"
#include	"fpi.h"
/* undef this if correct kernel r13 isn't in Ureg;
 * check calculation in fpiarm below
 */
#define	REG(ur, x) (*(long*)(((char*)(ur))+roff[(x)]))
#define	FR(ufp, x) (*(Internal*)(ufp)->regs[(x)&(Nfpctlregs - 1)])
typedef struct FP2 FP2;
typedef struct FP1 FP1;
struct FP2 {
	char*	name;
	void	(*f)(Internal, Internal, Internal*);
};
struct FP1 {
	char*	name;
	void	(*f)(Internal*, Internal*);
};
enum {
	N = 1<<31,
	Z = 1<<30,
	C = 1<<29,
	V = 1<<28,
	REGPC = 15,
};
enum {
	fpemudebug = 0,
};
#undef OFR
#define	OFR(X)	((ulong)&((Ureg*)0)->X)
static	int	roff[] = {
	OFR(r0), OFR(r1), OFR(r2), OFR(r3),
	OFR(r4), OFR(r5), OFR(r6), OFR(r7),
	OFR(r8), OFR(r9), OFR(r10), OFR(r11),
	OFR(r12), OFR(r13), OFR(r14), OFR(pc),
};
static Internal fpconst[8] = {		/* indexed by op&7 (ARM 7500 FPA) */
	/* s, e, l, h */
	{0, 0x1, 0x00000000, 0x00000000}, /* 0.0 */
	{0, 0x3FF, 0x00000000, 0x08000000},	/* 1.0 */
	{0, 0x400, 0x00000000, 0x08000000},	/* 2.0 */
	{0, 0x400, 0x00000000, 0x0C000000},	/* 3.0 */
	{0, 0x401, 0x00000000, 0x08000000},	/* 4.0 */
	{0, 0x401, 0x00000000, 0x0A000000},	/* 5.0 */
	{0, 0x3FE, 0x00000000, 0x08000000},	/* 0.5 */
	{0, 0x402, 0x00000000, 0x0A000000},	/* 10.0 */
};
/*
 * arm binary operations
 */
static void
fadd(Internal m, Internal n, Internal *d)
{
	(m.s == n.s? fpiadd: fpisub)(&m, &n, d);
}
static void
fsub(Internal m, Internal n, Internal *d)
{
	m.s ^= 1;
	(m.s == n.s? fpiadd: fpisub)(&m, &n, d);
}
static void
fsubr(Internal m, Internal n, Internal *d)
{
	n.s ^= 1;
	(n.s == m.s? fpiadd: fpisub)(&n, &m, d);
}
static void
fmul(Internal m, Internal n, Internal *d)
{
	fpimul(&m, &n, d);
}
static void
fdiv(Internal m, Internal n, Internal *d)
{
	fpidiv(&m, &n, d);
}
static void
fdivr(Internal m, Internal n, Internal *d)
{
	fpidiv(&n, &m, d);
}
/*
 * arm unary operations
 */
static void
fmov(Internal *m, Internal *d)
{
	*d = *m;
}
static void
fmovn(Internal *m, Internal *d)
{
	*d = *m;
	d->s ^= 1;
}
static void
fabsf(Internal *m, Internal *d)
{
	*d = *m;
	d->s = 0;
}
static void
frnd(Internal *m, Internal *d)
{
	short e;
	(m->s? fsub: fadd)(fpconst[6], *m, d);
	if(IsWeird(d))
		return;
	fpiround(d);
	e = (d->e - ExpBias) + 1;
	if(e <= 0)
		SetZero(d);
	else if(e > FractBits){
		if(e < 2*FractBits)
			d->l &= ~((1<<(2*FractBits - e))-1);
	}else{
		d->l = 0;
		if(e < FractBits)
			d->h &= ~((1<<(FractBits-e))-1);
	}
}
/*
 * ARM 7500 FPA opcodes
 */
static	FP1	optab1[16] = {	/* Fd := OP Fm */
[0]	{"MOVF",	fmov},
[1]	{"NEGF",	fmovn},
[2]	{"ABSF",	fabsf},
[3]	{"RNDF",	frnd},
[4]	{"SQTF",	/*fsqt*/0},
/* LOG, LGN, EXP, SIN, COS, TAN, ASN, ACS, ATN all `deprecated' */
/* URD and NRM aren't implemented */
};
static	FP2	optab2[16] = {	/* Fd := Fn OP Fm */
[0]	{"ADDF",	fadd},
[1]	{"MULF",	fmul},
[2]	{"SUBF",	fsub},
[3]	{"RSUBF",	fsubr},
[4]	{"DIVF",	fdiv},
[5]	{"RDIVF",	fdivr},
/* POW, RPW deprecated */
[8]	{"REMF",	/*frem*/0},
[9]	{"FMF",	fmul},	/* fast multiply */
[10]	{"FDV",	fdiv},	/* fast divide */
[11]	{"FRD",	fdivr},	/* fast reverse divide */
/* POL deprecated */
};
/*
 * ARM VFP opcodes
 */
static	FP1	voptab1[32] = {	/* Vd := OP Vm */
[0]	{"MOVF",	fmov},
[1]	{"ABSF",	fabsf},
[2]	{"NEGF",	fmovn},
[15]	{"CVTF",	fmov},
};
static	FP2	voptab2[16] = {	/* Vd := Vn FOP Fm */
[4]	{"MULF",	fmul},
[6]	{"ADDF",	fadd},
[7]	{"SUBF",	fsub},
[8]	{"DIVF",	fdiv},
};
static ulong
fcmp(Internal *n, Internal *m)
{
	int i;
	Internal rm, rn;
	if(IsWeird(m) || IsWeird(n)){
		/* BUG: should trap if not masked */
		return V|C;
	}
	rn = *n;
	rm = *m;
	fpiround(&rn);
	fpiround(&rm);
	i = fpicmp(&rn, &rm);
	if(i > 0)
		return C;
	else if(i == 0)
		return C|Z;
	else
		return N;
}
static void
fld(void (*f)(Internal*, void*), int d, ulong ea, int n, FPsave *ufp)
{
	void *mem;
	mem = (void*)ea;
	(*f)(&FR(ufp, d), mem);
	if(fpemudebug)
		print("MOV%c #%lux, F%d\n", n==8? 'D': 'F', ea, d);
}
static void
fst(void (*f)(void*, Internal*), ulong ea, int s, int n, FPsave *ufp)
{
	Internal tmp;
	void *mem;
	mem = (void*)ea;
	tmp = FR(ufp, s);
	if(fpemudebug)
		print("MOV%c	F%d,#%lux\n", n==8? 'D': 'F', s, ea);
	(*f)(mem, &tmp);
}
static int
condok(int cc, int c)
{
	switch(c){
	case 0:	/* Z set */
		return cc&Z;
	case 1:	/* Z clear */
		return (cc&Z) == 0;
	case 2:	/* C set */
		return cc&C;
	case 3:	/* C clear */
		return (cc&C) == 0;
	case 4:	/* N set */
		return cc&N;
	case 5:	/* N clear */
		return (cc&N) == 0;
	case 6:	/* V set */
		return cc&V;
	case 7:	/* V clear */
		return (cc&V) == 0;
	case 8:	/* C set and Z clear */
		return cc&C && (cc&Z) == 0;
	case 9:	/* C clear or Z set */
		return (cc&C) == 0 || cc&Z;
	case 10:	/* N set and V set, or N clear and V clear */
		return (~cc&(N|V))==0 || (cc&(N|V)) == 0;
	case 11:	/* N set and V clear, or N clear and V set */
		return (cc&(N|V))==N || (cc&(N|V))==V;
	case 12:	/* Z clear, and either N set and V set or N clear and V clear */
		return (cc&Z) == 0 && ((~cc&(N|V))==0 || (cc&(N|V))==0);
	case 13:	/* Z set, or N set and V clear or N clear and V set */
		return (cc&Z) || (cc&(N|V))==N || (cc&(N|V))==V;
	case 14:	/* always */
		return 1;
	case 15:	/* never (reserved) */
		return 0;
	}
	return 0;	/* not reached */
}
static void
unimp(ulong pc, ulong op)
{
	char buf[60];
	snprint(buf, sizeof(buf), "sys: fp: pc=%lux unimp fp 0x%.8lux", pc, op);
	if(fpemudebug)
		print("FPE: %s\n", buf);
	error(buf);
	/* no return */
}
static void
fpaemu(ulong pc, ulong op, Ureg *ur, FPsave *ufp)
{
	int rn, rd, tag, o;
	long off;
	ulong ea;
	Internal tmp, *fm, *fn;
	/* note: would update fault status here if we noted numeric exceptions */
	/*
	 * LDF, STF; 10.1.1
	 */
	if(((op>>25)&7) == 6){
		if(op & (1<<22))
			unimp(pc, op);	/* packed or extended */
		rn = (op>>16)&0xF;
		off = (op&0xFF)<<2;
		if((op & (1<<23)) == 0)
			off = -off;
		ea = REG(ur, rn);
		if(rn == REGPC)
			ea += 8;
		if(op & (1<<24))
			ea += off;
		rd = (op>>12)&7;
		if(op & (1<<20)){
			if(op & (1<<15))
				fld(fpid2i, rd, ea, 8, ufp);
			else
				fld(fpis2i, rd, ea, 4, ufp);
		}else{
			if(op & (1<<15))
				fst(fpii2d, ea, rd, 8, ufp);
			else
				fst(fpii2s, ea, rd, 4, ufp);
		}
		if((op & (1<<24)) == 0)
			ea += off;
		if(op & (1<<21))
			REG(ur, rn) = ea;
		return;
	}
	/*
	 * CPRT/transfer, 10.3
	 */
	if(op & (1<<4)){
		rd = (op>>12) & 0xF;
		/*
		 * compare, 10.3.1
		 */
		if(rd == 15 && op & (1<<20)){
			rn = (op>>16)&7;
			fn = &FR(ufp, rn);
			if(op & (1<<3)){
				fm = &fpconst[op&7];
				if(fpemudebug)
					tag = 'C';
			}else{
				fm = &FR(ufp, op&7);
				if(fpemudebug)
					tag = 'F';
			}
			switch((op>>21)&7){
			default:
				unimp(pc, op);
			case 4:	/* CMF: Fn :: Fm */
			case 6:	/* CMFE: Fn :: Fm (with exception) */
				ur->psr &= ~(N|C|Z|V);
				ur->psr |= fcmp(fn, fm);
				break;
			case 5:	/* CNF: Fn :: -Fm */
			case 7:	/* CNFE: Fn :: -Fm (with exception) */
				tmp = *fm;
				tmp.s ^= 1;
				ur->psr &= ~(N|C|Z|V);
				ur->psr |= fcmp(fn, &tmp);
				break;
			}
			if(fpemudebug)
				print("CMPF	%c%d,F%ld =%#lux\n",
					tag, rn, op&7, ur->psr>>28);
			return;
		}
		/*
		 * other transfer, 10.3
		 */
		switch((op>>20)&0xF){
		default:
			unimp(pc, op);
		case 0:	/* FLT */
			rn = (op>>16) & 7;
			fpiw2i(&FR(ufp, rn), ®(ur, rd));
			if(fpemudebug)
				print("MOVW[FD]	R%d, F%d\n", rd, rn);
			break;
		case 1:	/* FIX */
			if(op & (1<<3))
				unimp(pc, op);
			rn = op & 7;
			tmp = FR(ufp, rn);
			fpii2w(®(ur, rd), &tmp);
			if(fpemudebug)
				print("MOV[FD]W	F%d, R%d =%ld\n", rn, rd, REG(ur, rd));
			break;
		case 2:	/* FPSR := Rd */
			ufp->status = REG(ur, rd);
			if(fpemudebug)
				print("MOVW	R%d, FPSR\n", rd);
			break;
		case 3:	/* Rd := FPSR */
			REG(ur, rd) = ufp->status;
			if(fpemudebug)
				print("MOVW	FPSR, R%d\n", rd);
			break;
		case 4:	/* FPCR := Rd */
			ufp->control = REG(ur, rd);
			if(fpemudebug)
				print("MOVW	R%d, FPCR\n", rd);
			break;
		case 5:	/* Rd := FPCR */
			REG(ur, rd) = ufp->control;
			if(fpemudebug)
				print("MOVW	FPCR, R%d\n", rd);
			break;
		}
		return;
	}
	/*
	 * arithmetic
	 */
	if(op & (1<<3)){	/* constant */
		fm = &fpconst[op&7];
		if(fpemudebug)
			tag = 'C';
	}else{
		fm = &FR(ufp, op&7);
		if(fpemudebug)
			tag = 'F';
	}
	rd = (op>>12)&7;
	o = (op>>20)&0xF;
	if(op & (1<<15)){	/* monadic */
		FP1 *fp;
		fp = &optab1[o];
		if(fp->f == nil)
			unimp(pc, op);
		if(fpemudebug)
			print("%s	%c%ld,F%d\n", fp->name, tag, op&7, rd);
		(*fp->f)(fm, &FR(ufp, rd));
	} else {
		FP2 *fp;
		fp = &optab2[o];
		if(fp->f == nil)
			unimp(pc, op);
		rn = (op>>16)&7;
		if(fpemudebug)
			print("%s	%c%ld,F%d,F%d\n", fp->name, tag, op&7, rn, rd);
		(*fp->f)(*fm, FR(ufp, rn), &FR(ufp, rd));
	}
}
static void
vfpoptoi(Internal *fm, uchar o2, uchar o4)
{
	fm->s = o2>>3;
	fm->e = ((o2>>3) | ~(o2 & 4)) - 3 + ExpBias;
	fm->l = 0;
	fm->h = o4 << (20+NGuardBits);
	if(fm->e)
		fm->h |= HiddenBit;
	else
		fm->e++;
}
static void
vfpemu(ulong pc, ulong op, Ureg *ur, FPsave *ufp)
{
	int sz, vd, o1, o2, o3, o4, o, tag;
	long off;
	ulong ea;
	Word w;
	
	Internal *fm, fc;
	/* note: would update fault status here if we noted numeric exceptions */
	sz = op & (1<<8);
	o1 = (op>>20) & 0xF;
	o2 = (op>>16) & 0xF;
	vd = (op>>12) & 0xF;
	switch((op>>24) & 0xF){
	default:
		unimp(pc, op);
	case 0xD:
		/* 
		 * Extension Register load/store A7.6 
		 */
		off = (op&0xFF)<<2;
		if((op & (1<<23)) == 0)
			off = -off;
		ea = REG(ur, o2) + off;
		switch(o1&0x7){	/* D(Bit 22) = 0 (5l) */
		default:
			unimp(pc, op);
		case 0:
			if(sz)
				fst(fpii2d, ea, vd, sz, ufp);
			else
				fst(fpii2s, ea, vd, sz, ufp);
			break;
		case 1:
			if(sz)
				fld(fpid2i, vd, ea, sz, ufp);
			else
				fld(fpis2i, vd, ea, sz, ufp);
			break;
		}
		break;
	case 0xE:
		if(op & (1<<4)){
			/* 
			 * Register transfer between Core & Extension A7.8
			 */
			if(sz)	/* C(Bit 8) != 0 */
				unimp(pc, op);
			switch(o1){
			default:
				unimp(pc, op);
			case 0:	/* Fn := Rt */
				*((Word*)&FR(ufp, o2)) = REG(ur, vd);
				if(fpemudebug)
					print("MOVWF	R%d, F%d\n", vd, o2);
				break;
			case 1:	/* Rt := Fn */
				REG(ur, vd) = *((Word*)&FR(ufp, o2));
				if(fpemudebug)
					print("MOVFW	F%d, R%d =%ld\n", o2, vd, REG(ur, vd));
				break;
			case 0xE:	/* FPSCR := Rt */
				ufp->status = REG(ur, vd);
				if(fpemudebug)
					print("MOVW	R%d, FPSCR\n", vd);
				break;
			case 0xF:	/* Rt := FPSCR */
				if(vd == 0xF){
					ur->psr = ufp->status;
					if(fpemudebug)
						print("MOVW	FPSCR, PSR\n");
				}else{
					REG(ur, vd) = ufp->status;
					if(fpemudebug)
						print("MOVW	FPSCR, R%d\n", vd);
				}
				break;
			}
		}
		else{
			/*
			 * VFP data processing instructions A7.5
			 * Note: As per 5l we ignore (D, N, M) bits
			 */
			if(fpemudebug)
				tag = 'F';
			o3 = (op>>6) & 0x3;
			o4 = op & 0xF;
			fm = &FR(ufp, o4);
			if(o1 == 0xB){	/* A7-17 */
				if(o3 & 0x1){
					switch(o2){
					default:
						o = (o2<<1) | (o3>>1);
						break;
					case 0x8:	/* CVT int -> float/double */
						w = *((Word*)fm);
						fpiw2i(&FR(ufp, vd), &w);
						if(fpemudebug)
							print("CVTW%c	F%d, F%d\n", sz?'D':'F', o4, vd);
						return;
					case 0xD:	/* CVT float/double -> int */
						fpii2w(&w, fm);
						*((Word*)&FR(ufp, vd)) = w;
						if(fpemudebug)
							print("CVT%cW	F%d, F%d\n", sz?'D':'F', o4, vd);
						return;
					case 0x5:	/* CMPF(E) */
							fm = &fpconst[0];
							if(fpemudebug)
								tag = 'C';
					case 0x4:	/* CMPF(E) */
						ufp->status &= ~(N|C|Z|V);
						ufp->status |= fcmp(&FR(ufp, vd), fm);
						if(fpemudebug)
							print("CMPF	%c%d,F%d =%#lux\n",
									tag, (o2&0x1)? 0: o4, vd, ufp->status>>28);
						return;
					}
				}else{	/* VMOV imm (VFPv3 & v4) (5l doesn't generate) */
					vfpoptoi(&fc, o2, o4);
					fm = &fc;
					o = 0;
					if(fpemudebug)
						tag = 'C';
				}
				FP1 *vfp;
				vfp = &voptab1[o];
				if(vfp->f == nil)
					unimp(pc, op);
				if(fpemudebug)
					print("%s	%c%d,F%d\n", vfp->name, tag, o4, vd);
				(*vfp->f)(fm, &FR(ufp, vd));
			}
			else {	/* A7-16 */
				FP2 *vfp;
				o = ((o1&0x3)<<1) | (o1&0x8) | (o3&0x1);
				vfp = &voptab2[o];
				if(vfp->f == nil)
					unimp(pc, op);
				if(fpemudebug)
					print("%s	F%d,F%d,F%d\n", vfp->name, o4, o2, vd);
				(*vfp->f)(*fm, FR(ufp, o2), &FR(ufp, vd));
			}
		}
		break;
	}
}
void
casemu(ulong pc, ulong op, Ureg *ur)
{
	ulong *rp, ro, rn, *rd;
	USED(pc);
	rp = (ulong*)ur;
	ro = rp[op>>16 & 0x7];
	rn = rp[op>>0 & 0x7];
	rd = rp + (op>>12 & 0x7);
	rp = (ulong*)*rd;
	validaddr((uintptr)rp, 4, 1);
	splhi();
	if(*rd = (*rp == ro))
		*rp = rn;
	spllo();
}
int ldrexvalid;
void
ldrex(ulong pc, ulong op, Ureg *ur)
{
	ulong *rp, *rd, *addr;
	USED(pc);
	rp = (ulong*)ur;
	rd = rp + (op>>16 & 0x7);
	addr = (ulong*)*rd;
	validaddr((uintptr)addr, 4, 0);
	ldrexvalid = 1;
	rp[op>>12 & 0x7] = *addr;
	if(fpemudebug)
		print("ldrex, r%ld = [r%ld]@0x%8.8p = 0x%8.8lux",
			op>>12 & 0x7, op>>16 & 0x7, addr, rp[op>>12 & 0x7]);
}
void
strex(ulong pc, ulong op, Ureg *ur)
{
	ulong *rp, rn, *rd, *addr;
	USED(pc);
	rp = (ulong*)ur;
	rd = rp + (op>>16 & 0x7);
	rn = rp[op>>0 & 0x7];
	addr = (ulong*)*rd;
	validaddr((uintptr)addr, 4, 1);
	splhi();
	if(ldrexvalid){
		if(fpemudebug)
			print("strex valid, [r%ld]@0x%8.8p = r%ld = 0x%8.8lux",
				op>>16 & 0x7, addr, op>>0 & 0x7, rn);
		*addr = rn;
		ldrexvalid = 0;
		rp[op>>12 & 0x7] = 0;
	}else{
		if(fpemudebug)
			print("strex invalid, r%ld = 1", op>>16 & 0x7);
		rp[op>>12 & 0x7] = 1;
	}
	spllo();
}
struct {
	ulong	opc;
	ulong	mask;
	void	(*f)(ulong, ulong, Ureg*);
} specialopc[] = {
	{ 0x01900f9f, 0x0ff00fff, ldrex },
	{ 0x01800f90, 0x0ff00ff0, strex },
	{ 0x0ed00100, 0x0ef08100, casemu },
	{ 0x00000000, 0x00000000, nil }
};
/*
 * returns the number of FP instructions emulated
 */
int
fpiarm(Ureg *ur)
{
	ulong op, o, cp;
	FPsave *ufp;
	int i, n;
	if(up == nil)
		panic("fpiarm not in a process");
	ufp = &up->fpsave;
	/*
	 * because all the emulated fp state is in the proc structure,
	 * it need not be saved/restored
	 */
	if(up->fpstate != FPactive){
//		assert(sizeof(Internal) == sizeof(ufp->regs[0]));
		up->fpstate = FPactive;
		ufp->control = 0;
		ufp->status = (0x01<<28)|(1<<12);	/* software emulation, alternative C flag */
		for(n = 0; n < Nfpctlregs; n++)
			FR(ufp, n) = fpconst[0];
	}
	for(n=0; ;n++){
		validaddr(ur->pc, 4, 0);
		op = *(ulong*)(ur->pc);
		if(fpemudebug)
			print("%#lux: %#8.8lux ", ur->pc, op);
		o = (op>>24) & 0xF;
		cp = (op>>8) & 0xF;
		for(i = 0; specialopc[i].f; i++)
			if((op & specialopc[i].mask) == specialopc[i].opc)
				break;
		if(specialopc[i].f){
			if(condok(ur->psr, op>>28))
				specialopc[i].f(ur->pc, op, ur);
		}
		else if(ISVFPOP(cp, o)){
			if(condok(ur->psr, op>>28))
				vfpemu(ur->pc, op, ur, ufp);
		}
		else if(ISFPAOP(cp, o)){
			if(condok(ur->psr, op>>28))
				fpaemu(ur->pc, op, ur, ufp);
		}
		else
			break;
		ur->pc += 4;		/* pretend cpu executed the instr */
	}
	if(fpemudebug)
		print("\n");
	return n;
}