shithub: riscv

Download patch

ref: 8738adf91dfbd656908a578fd567a811536ce557
parent: 200c6d18bf01353dc735b042a3c00135a3b39cdc
author: adventuresin9 <adventuresin9@gmail.com>
date: Wed Mar 8 08:37:23 EST 2023

mt7688 kernel

diff: cannot open b/sys/src/9/mt7688//null: file does not exist: 'b/sys/src/9/mt7688//null'
--- /dev/null
+++ b/sys/src/9/mt7688/arch.c
@@ -1,0 +1,127 @@
+/*
+ * Machine specific stuff
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "tos.h"
+
+#include "ureg.h"
+
+
+/*
+ * Some of these functions are expected by the 
+ * port code, but might need to be implement in 
+ * ways specific to the achitecture
+ */
+
+void
+idlehands(void)
+{
+	idle();
+}
+
+
+/*
+ * called in sysfile.c
+ */
+void
+evenaddr(uintptr va)
+{
+	if((va & 3) != 0){
+		dumpstack();
+		postnote(up, 1, "sys: odd address", NDebug);
+		error(Ebadarg);
+	}
+}
+
+void
+procsetup(Proc *p)
+{
+	p->fpstate = FPinit;
+	p->fpsave->fpstatus = initfp.fpstatus;
+
+//	memmove(p->fpsave, &initfp, sizeof(FPsave));
+}
+
+
+void
+procfork(Proc*)
+{
+// stub
+}
+
+void
+procsave(Proc*)
+{
+// stub
+}
+
+void
+procrestore(Proc*)
+{
+}
+
+
+ulong
+userpc(void)
+{
+	Ureg *ur;
+
+	ur = (Ureg*)up->dbgreg;
+	return ur->pc;
+}
+
+
+/*
+ * This routine must save the values of registers the user is not
+ * permitted to write from devproc and then restore the saved values
+ * before returning
+ */
+void
+setregisters(Ureg *xp, char *pureg, char *uva, int n)
+{
+	ulong status, r27;
+
+	r27 = xp->r27;			/* return PC for GEVector() */
+	status = xp->status;
+	memmove(pureg, uva, n);
+	xp->r27 = r27;
+	xp->status = status;
+}
+
+/*
+ * Give enough context in the ureg to produce a kernel stack for
+ * a sleeping process
+ */
+void
+setkernur(Ureg *xp, Proc *p)
+{
+	xp->pc = p->sched.pc;
+	xp->sp = p->sched.sp;
+	xp->r24 = (ulong)p;		/* up */
+	xp->r31 = (ulong)sched;
+}
+
+ulong
+dbgpc(Proc *p)
+{
+	Ureg *ur;
+
+	ur = p->dbgreg;
+	if(ur == 0)
+		return 0;
+
+	return ur->pc;
+}
+
+void
+kprocchild(Proc *p, void (*entry)(void))
+{
+	p->sched.pc = (ulong)entry;
+	p->sched.sp = (ulong)p;
+}
--- /dev/null
+++ b/sys/src/9/mt7688/bootargs.c
@@ -1,0 +1,133 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+
+
+
+/* store plan9.ini contents here at least until we stash them in #ec */
+
+#define BOOTARGS	((char*)CONFADDR)
+#define	BOOTARGSLEN	(16*1024)		/* limit in devenv.c */
+#define	MAXCONF		64
+#define MAXCONFLINE	160
+#define isascii(c) ((uchar)(c) > 0 && (uchar)(c) < 0177)
+
+static char confname[MAXCONF][KNAMELEN];
+static char confval[MAXCONF][MAXCONFLINE];
+int nconf;
+
+
+/* plan9.ini stuff */
+
+
+static int
+findconf(char *name)
+{
+	int i;
+
+	for(i = 0; i < nconf; i++)
+		if(cistrcmp(confname[i], name) == 0)
+			return i;
+	return -1;
+}
+
+char*
+getconf(char *name)
+{
+	int i;
+
+	i = findconf(name);
+	if(i >= 0)
+		return confval[i];
+	return nil;
+}
+
+static void
+addconf(char *name, char *val)
+{
+	int i;
+
+	i = findconf(name);
+	if(i < 0){
+		if(val == nil || nconf >= MAXCONF)
+			return;
+		i = nconf++;
+		strecpy(confname[i], confname[i]+sizeof(confname[i]), name);
+	}
+//	confval[i] = val;
+	strecpy(confval[i], confval[i]+sizeof(confval[i]), val);
+}
+
+static void
+writeconf(void)
+{
+	char *p, *q;
+	int n;
+
+	p = getconfenv();
+
+	if(waserror()) {
+		free(p);
+		nexterror();
+	}
+
+	/* convert to name=value\n format */
+	for(q=p; *q; q++) {
+		q += strlen(q);
+		*q = '=';
+		q += strlen(q);
+		*q = '\n';
+	}
+	n = q - p + 1;
+	if(n >= BOOTARGSLEN)
+		error("kernel configuration too large");
+	memmove(BOOTARGS, p, n);
+	poperror();
+	free(p);
+}
+
+void
+setconfenv(void)
+{
+	int i;
+
+	for(i = 0; i < nconf; i++){
+		if(confname[i][0] != '*')
+			ksetenv(confname[i], confval[i], 0);
+		ksetenv(confname[i], confval[i], 1);
+	}
+}
+
+/*
+ * assumes that we have loaded our /cfg/pxe/mac 
+ * file at 0x10000 with tftp in u-boot.  
+ * no longer uses malloc, so can be called early.
+ */
+void
+plan9iniinit(void)
+{
+	char *k, *v, *next;
+
+	k = (char *)CONFADDR;
+	if(!isascii(*k))
+		return;
+
+	for(; k && *k != '\0'; k = next) {
+		if (!isascii(*k))		/* sanity check */
+			break;
+		next = strchr(k, '\n');
+		if (next)
+			*next++ = '\0';
+
+		if (*k == '\0' || *k == '\n' || *k == '#')
+			continue;
+		v = strchr(k, '=');
+		if(v == nil)
+			continue;		/* mal-formed line */
+		*v++ = '\0';
+
+		addconf(k, v);
+	}
+}
--- /dev/null
+++ b/sys/src/9/mt7688/clock.c
@@ -1,0 +1,167 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+
+#include	"ureg.h"
+
+enum {
+	Cyccntres	= 2, /* counter advances at ½ clock rate */
+	Basetickfreq	= 580*Mhz / Cyccntres,	/* sgi/indy */
+
+	Instrs		= 10*Mhz,
+};
+
+static long
+issue1loop(void)
+{
+	register int i;
+	long st;
+
+	i = Instrs;
+	st = perfticks();
+	do {
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i; --i; --i; --i; --i; --i;
+		--i; --i; --i; --i; --i;
+		/* omit 3 (--i) to account for conditional branch, nop & jump */
+		i -= 1+3;	 /* --i plus 3 omitted (--i) instructions */
+	} while(--i >= 0);
+	return perfticks() - st;
+}
+
+/* estimate instructions/s. */
+static int
+guessmips(long (*loop)(void), char *)
+{
+	int s;
+	long cyc;
+
+	do {
+		s = splhi();
+		cyc = loop();
+		splx(s);
+		if (cyc < 0)
+			iprint("again...");
+	} while (cyc < 0);
+	/*
+	 * Instrs instructions took cyc cycles @ Basetickfreq Hz.
+	 * round the result.
+	 */
+	return (((vlong)Basetickfreq * Instrs) / cyc + Mhz/2) / Mhz;
+}
+
+void
+clockinit(void)
+{
+	int mips;
+
+	/*
+	 * calibrate fastclock
+	 */
+	mips = guessmips(issue1loop, "single");
+
+	/*
+	 * m->delayloop should be the number of delay loop iterations
+	 * needed to consume 1 ms, assuming 2 instr'ns in the delay loop.
+	 */
+	m->delayloop = mips*Mhz / (1000 * 2);
+	if(m->delayloop == 0)
+		m->delayloop = 1;
+
+	m->speed = mips;
+	m->hz = m->speed*Mhz;
+	m->cyclefreq = Basetickfreq;
+	m->maxperiod = Basetickfreq / HZ;
+	m->minperiod = Basetickfreq / (100*HZ);
+	m->lastcount = rdcount();
+	wrcompare(m->lastcount+m->maxperiod);
+
+	intron(INTR7);
+}
+
+void
+clock(Ureg *ur)
+{
+	wrcompare(rdcount()+m->maxperiod);	/* side-effect: dismiss intr */
+	timerintr(ur, 0);
+}
+
+void
+microdelay(int n)
+{
+	ulong now;
+	now = µs();
+	while(µs() - now < n);
+}
+
+void
+delay(int n)
+{
+	while(--n >= 0)
+		microdelay(1000);
+}
+
+ulong
+µs(void)
+{
+	return fastticks2us(fastticks(nil));
+}
+
+uvlong
+fastticks(uvlong *hz)
+{
+	int x;
+	ulong delta, count;
+
+	if(hz)
+		*hz = Basetickfreq;
+
+	/* avoid reentry on interrupt or trap, to prevent recursion */
+	x = splhi();
+	count = rdcount();
+	if(rdcompare() - count > m->maxperiod)
+		wrcompare(count+m->maxperiod);
+
+	if (count < m->lastcount)		/* wrapped around? */
+		delta = count + ((1ull<<32) - m->lastcount);
+	else
+		delta = count - m->lastcount;
+	m->lastcount = count;
+	m->fastticks += delta;
+	splx(x);
+	return m->fastticks;
+}
+
+ulong
+perfticks(void)
+{
+	return rdcount();
+}
+
+void
+timerset(Tval next)
+{
+	int x;
+	long period;
+
+	if(next == 0)
+		return;
+	x = splhi();			/* don't let us get scheduled */
+	period = next - fastticks(nil);
+	if(period > m->maxperiod - m->minperiod)
+		period = m->maxperiod;
+	else if(period < m->minperiod)
+		period = m->minperiod;
+	wrcompare(rdcount()+period);
+	splx(x);
+}
--- /dev/null
+++ b/sys/src/9/mt7688/dat.h
@@ -1,0 +1,242 @@
+typedef struct Conf	Conf;
+typedef struct Confmem	Confmem;
+typedef struct FPsave	FPsave;
+typedef struct PFPU	PFPU;
+typedef struct KMap	KMap;
+typedef struct Lance	Lance;
+typedef struct Lancemem	Lancemem;
+typedef struct Label	Label;
+typedef struct Lock	Lock;
+typedef struct Mach	Mach;
+typedef struct MMU	MMU;
+typedef struct PMMU	PMMU;
+typedef struct Softtlb	Softtlb;
+typedef struct Ureg	Ureg;
+typedef struct Proc	Proc;
+typedef struct ISAConf	ISAConf;
+typedef uvlong		Tval;
+
+#define MAXSYSARG	5	/* for mount(fd, afd, mpt, flag, arg) */
+
+/*
+ *  parameters for sysproc.c and rebootcmd.c
+ */
+//#define AOUT_MAGIC	(P_MAGIC)
+#define AOUT_MAGIC	N_MAGIC || magic==P_MAGIC
+/* r3k or r4k boot images */
+#define BOOT_MAGIC	(0x160<<16) || magic == ((0x160<<16)|3)
+
+/* fron legacy /mips/include/u.h */
+#define FPCOND	(1<<23)
+
+/*
+ *  machine dependent definitions used by ../port/dat.h
+ */
+
+struct Lock
+{
+	ulong	key;			/* semaphore (non-zero = locked) */
+	ulong	sr;
+	ulong	pc;
+	Proc	*p;
+	Mach	*m;
+	ushort	isilock;
+};
+
+struct Label
+{
+	uintptr	sp;
+	uintptr	pc;
+};
+
+struct Confmem
+{
+	ulong	base;
+	ulong	npage;
+	ulong	kbase;
+	ulong	klimit;
+};
+
+struct Conf
+{
+	ulong	nmach;		/* processors */
+	ulong	nproc;		/* processes */
+	Confmem	mem[4];
+	ulong	npage;		/* total physical pages of memory */
+	ulong	upages;		/* user page pool */
+	ulong	nimage;		/* number of page cache image headers */
+	ulong	nswap;		/* number of swap pages */
+	int	nswppo;		/* max # of pageouts per segment pass */
+	ulong	copymode;	/* 0 is copy on write, 1 is copy on reference */
+	ulong	ialloc;		/* bytes available for interrupt-time allocation */
+	ulong	pipeqsize;	/* size in bytes of pipe queues */
+	int	nuart;		/* number of uart devices */
+	int	monitor;
+	int	keyboard;
+};
+
+struct ISAConf
+{
+	char	*type;
+	ulong	port;
+	int	irq;
+	int	nopt;
+	char	*opt[1];
+};
+
+#define BUSUNKNOWN -1
+
+/*
+ * floating point registers
+ */
+enum {
+	Nfpregs		= 32,		/* floats; half as many doubles */
+};
+
+/*
+ * emulated floating point (mips32r2 with ieee fp regs)
+ * fpstate is separate, kept in Proc
+ */
+struct FPsave
+{
+	/* /dev/proc expects the registers to be first in FPsave */
+	ulong	reg[Nfpregs];		/* the canonical bits */
+	union {
+		ulong	fpstatus;	/* both are fcr31 */
+		ulong	fpcontrol;
+	};
+
+	int	fpdelayexec;		/* executing delay slot of branch */
+	uintptr	fpdelaypc;		/* pc to resume at after */
+	ulong	fpdelaysts;	/* save across user-mode delay-slot execution */
+
+	/* stuck-fault detection */
+	uintptr	fppc;			/* addr of last fault */
+	int	fpcnt;			/* how many consecutive at that addr */
+};
+
+int fpemudebug;
+
+struct PFPU
+{
+	int	fpstate;
+	FPsave	fpsave[1];
+};
+
+enum
+{
+	/* floating point state */
+	FPinit,
+	FPactive,
+	FPinactive,
+	FPemu,
+
+	/* bit meaning floating point illegal */
+	FPillegal= 0x100,
+};
+
+/*
+ *  mmu goo in the Proc structure
+ */
+struct PMMU
+{
+	int	pidonmach[MAXMACH];
+};
+
+#include "../port/portdat.h"
+
+struct Mach
+{
+	/* the following are all known by l.s and cannot be moved */
+	int	machno;			/* physical id of processor */
+	Softtlb*stb;
+	Proc*	proc;			/* process on this processor */
+	uintptr	splpc;			/* pc that called splhi() */
+	ulong	tlbfault;
+
+	/* the following is safe to move */
+	PMach;
+
+	int	lastpid;		/* last pid allocated on this machine */
+	Proc*	pidproc[NTLBPID];	/* proc that owns tlbpid on this mach */
+
+	KMap*	kactive;		/* active on this machine */
+	int	knext;
+	uchar	ktlbx[NTLB];		/* tlb index used for kmap */
+	uchar	ktlbnext;
+
+	/* for per-processor timers */
+	uvlong	fastticks;
+	ulong	lastcount;
+
+	ulong	hz;
+	int	speed;			/* cpu speed */
+	ulong	delayloop;		/* for the delay() routine */
+	ulong	maxperiod;
+	ulong	minperiod;
+
+	int	hashcoll;		/* soft-tlb hash collisions */
+	int	paststartup;		/* for putktlb */
+
+	uintptr	stack[1];
+};
+
+struct KMap
+{
+	Ref;
+	ulong	virt;
+	ulong	phys0;
+	ulong	phys1;
+	KMap*	next;
+	KMap*	konmach[MAXMACH];
+	Page*	pg;
+	ulong	pc;			/* of caller to kmap() */
+};
+
+#define	VA(k)		((k)->virt)
+#define PPN(x)		((ulong)(x)>>6)
+
+struct Softtlb
+{
+	ulong	virt;
+	ulong	phys0;
+	ulong	phys1;
+};
+
+struct
+{
+	char	machs[MAXMACH];		/* active cpus */
+	short	exiting;
+}active;
+
+enum {
+	Mhz		= 1000*1000,
+};
+
+
+extern register Mach	*m;
+extern register Proc	*up;
+
+extern Mach* machaddr[MAXMACH];
+#define	MACHP(n)	(machaddr[n])
+extern int	normalprint;
+
+extern FPsave initfp;
+
+#define CONSOLE	0
+
+/*
+ *  hardware info about a device
+ */
+typedef struct {
+	ulong	port;
+	int	size;
+} Devport;
+
+struct DevConf
+{
+	ulong	intnum;			/* interrupt number */
+	char	*type;			/* card type, malloced */
+	int	nports;			/* Number of ports */
+	Devport	*ports;			/* The ports themselves */
+};
--- /dev/null
+++ b/sys/src/9/mt7688/ether7688.c
@@ -1,0 +1,837 @@
+/*
+ * MediaTek Ethernet for the MT7688
+ * thank you to the folks at NetBSD
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+#include "../port/netif.h"
+#include "../port/etherif.h"
+#include "../port/ethermii.h"
+
+
+/* RX Descriptor Format */
+#define RXD_LEN1(x)	(((x) >> 0) & 0x3fff)
+#define RXD_LAST1	(1 << 14)
+#define RXD_LEN0(x)	(((x) >> 16) & 0x3fff)
+#define RXD_LAST0	(1 << 30)
+#define RXD_DDONE	(1 << 31)
+#define RXD_FOE(x)	(((x) >> 0) & 0x3fff)
+#define RXD_FVLD	(1 << 14)
+#define RXD_INFO(x)	(((x) >> 16) & 0xff)
+#define RXD_PORT(x)	(((x) >> 24) & 0x7)
+#define RXD_INFO_CPU	(1 << 27)
+#define RXD_L4_FAIL	(1 << 28)
+#define RXD_IP_FAIL	(1 << 29)
+#define RXD_L4_VLD	(1 << 30)
+#define RXD_IP_VLD	(1 << 31)
+
+
+/* TX Descriptor Format */
+#define TXD_LEN1(x)	(((x) & 0x3fff) << 0)
+#define TXD_LAST1	(1 << 14)
+#define TXD_BURST	(1 << 15)
+#define TXD_LEN0(x)	(((x) & 0x3fff) << 16)
+#define TXD_LAST0	(1 << 30)
+#define TXD_DDONE	(1 << 31)
+#define TXD_VIDX(x)	(((x) & 0xf) << 0)
+#define TXD_VPRI(x)	(((x) & 0x7) << 4)
+#define TXD_VEN		(1 << 7)
+#define TXD_SIDX(x)	(((x) & 0xf) << 8)
+#define TXD_SEN(x)	(1 << 13)
+#define TXD_QN(x)	(((x) & 0x7) << 16)
+#define TXD_PN(x)	(((x) & 0x7) << 24)
+#define	TXD_PN_CPU	0
+#define	TXD_PN_GDMA1	1
+#define	TXD_PN_GDMA2	2
+#define TXD_TCP_EN	(1 << 29)
+#define TXD_UDP_EN	(1 << 30)
+#define TXD_IP_EN	(1 << 31)
+
+
+/* pdma global cfgs */
+#define  GLO_CFG_TX_WB_DDONE	(1 << 6)
+#define   GLO_CFG_BURST_SZ_4	(0 << 4)
+#define   GLO_CFG_BURST_SZ_8	(1 << 4)
+#define   GLO_CFG_BURST_SZ_16	(2 << 4)
+#define  GLO_CFG_RX_DMA_EN		(1 << 2)
+#define  GLO_CFG_TX_DMA_EN		(1 << 0)
+#define	RX_DMA_BUSY				(1 << 3)
+#define TX_DMA_BUSY				(1 << 1)
+
+
+/* interupt masks */
+#define  INT_RX_DONE_INT1		(1 << 17)
+#define  INT_RX_DONE_INT0		(1 << 16)
+#define  INT_TX_DONE_INT3		(1 << 3)
+#define  INT_TX_DONE_INT2		(1 << 2)
+#define  INT_TX_DONE_INT1		(1 << 1)
+#define  INT_TX_DONE_INT0		(1 << 0)
+#define  INT_RX_DUKKHA			(1 << 31)
+#define  INT_TX_DUKKHA			(1 << 29)
+
+
+/* mii stuff */
+#define  PCTL0_WR_VAL(x)	(((x) & 0xffff) << 16)
+#define  PCTL0_RD_CMD		(1 << 14)
+#define  PCTL0_WR_CMD		(1 << 13)
+#define  PCTL0_REG(x)		(((x) & 0x1f) << 8)
+#define  PCTL0_ADDR(x)		(((x) & 0x1f) << 0)
+#define  PCTL1_RD_VAL(x)	(((x) >> 16) & 0xffff)
+#define  PCTL1_RD_DONE		(1 << 1)	/* read clear */
+#define  PCTL1_WR_DONE		(1 << 0)	/* read clear */
+
+
+/* Debugging options */
+enum{
+	Miidebug	=	0,
+	Ethdebug	=	0,
+	Attchbug	=	0,
+};
+
+
+enum{
+	Nrd		= 256,	/* Number rx descriptors */
+	Ntd		= 64,	/* Number tx descriptors */
+	Rbsz	= 2048,	/* block size */
+};
+
+
+typedef struct Desc Desc;
+typedef struct Ctlr Ctlr;
+
+
+struct Desc
+{
+	u32int	ptr0;
+	u32int	info1;
+	u32int	ptr1;
+	u32int	info2;
+};
+
+
+struct Ctlr
+{
+	int		attached;
+	QLock;
+	Ether	*edev;		/* point back */
+
+	struct {
+		Block	*b[Nrd];
+		Desc	*d;
+		Rendez;
+		Lock;
+	}	rx[1];
+
+	struct {
+		Block	*b[Ntd];
+		Desc	*d;
+		Rendez;
+	}	tx[1];
+
+	Mii	*mii;
+
+	QLock	statlock;
+	int		rxstat;
+	int		rxintr;
+	int		rxdmaerr;
+	int		txstat;
+	int		txintr;
+	int		txdmaerr;
+	int		nointr;
+	int		badrx;
+};
+
+
+static u32int
+sysrd(int offset)
+{
+	return *IO(u32int, (SYSCTLBASE + offset));
+}
+
+
+static void
+syswr(int offset, u32int val)
+{
+	*IO(u32int, (SYSCTLBASE + offset)) = val;
+}
+
+
+static u32int
+ethrd(int offset)
+{
+	return *IO(u32int, (ETHBASE + offset));
+}
+
+
+static void
+ethwr(int offset, u32int val)
+{
+	*IO(u32int, (ETHBASE + offset)) = val;
+}
+
+
+static u32int
+swrd(int offset)
+{
+	return *IO(u32int, (SWCHBASE + offset));
+}
+
+
+static void
+swwr(int offset, u32int val)
+{
+	*IO(u32int, (SWCHBASE + offset)) = val;
+}
+
+
+static int
+miird(Mii *mii, int pa, int ra)
+{
+	int val = 0;
+	int	timeout;
+
+	if(Miidebug)
+		iprint("miird, phy_addr; %d phy_reg: %d\n", pa, ra);
+
+	if(pa > 5)
+		return -1;
+
+
+	swwr(SW_PCTL0, PCTL0_RD_CMD | PCTL0_ADDR(pa) | PCTL0_REG(ra));
+	delay(1);
+
+	for(timeout = 0; timeout < 2000; timeout++){
+		if((val = swrd(SW_PCTL1)) & PCTL1_RD_DONE)
+			break;
+		microdelay(100);
+	}
+
+	if(!(val & PCTL1_RD_DONE))
+		return -1;
+
+	return PCTL1_RD_VAL(val);
+}
+
+
+static int
+miiwr(Mii *mii, int pa, int ra, int val)
+{
+	int timeout;
+
+	if(Miidebug)
+		iprint("miiwr, phy_addr; %d phy_reg: %d val: 0x%04X\n", pa, ra, val);
+
+	if(pa > 5)
+		return -1;
+
+	swwr(SW_PCTL0, PCTL0_WR_CMD | PCTL0_WR_VAL(val) | PCTL0_ADDR(pa) | PCTL0_REG(ra));
+	delay(1);
+
+	for(timeout = 0; timeout < 2000; timeout++){
+		if((val = swrd(SW_PCTL1)) & PCTL1_WR_DONE)
+			break;
+		microdelay(100);
+	}
+
+	if(!(val & PCTL1_WR_DONE))
+		return -1;
+
+	return 0;
+}
+
+
+static void
+getmacaddr(Ether *edev)
+{
+	ulong msb, lsb;
+
+	lsb = ethrd(GDMA1_MAC_LSB);
+	msb = ethrd(GDMA1_MAC_MSB);
+
+	edev->ea[0] = msb>>8;
+	edev->ea[1] = msb>>0;
+	edev->ea[2]	= lsb>>24;
+	edev->ea[3] = lsb>>16;
+	edev->ea[4] = lsb>>8;
+	edev->ea[5] = lsb>>0;
+
+	if(Attchbug){
+		iprint("ether getmac: %04lX %08lX\n", (msb & 0xFFFF), lsb);
+		delay(10);
+	}
+}
+
+
+static void
+ethreset(Ether *edev)
+{
+	ulong buf;
+	int i;
+	Ctlr *ctlr = edev->ctlr;
+	Mii *mii = ctlr->mii;
+
+	iprint("reset eth and ephy\n");
+	delay(10);
+
+
+
+
+	buf = sysrd(SYSCTL_RST);
+	buf |= (1<<24);
+	syswr(SYSCTL_RST, buf);
+	delay(1);
+	buf ^= (1<<24);
+	syswr(SYSCTL_RST, buf);
+
+
+	miiwr(mii, 0, 31, 0x2000);
+	miiwr(mii, 0, 26, 0x0020);
+
+	for(i = 0; i < 5; i++){
+		miiwr(mii, i, 31, 0x8000);
+		miiwr(mii, i, 0, 0x3100);
+		miiwr(mii, i, 30, 0xa000);
+		miiwr(mii, i, 31, 0xa000);
+		miiwr(mii, i, 16, 0x0606);
+		miiwr(mii, i, 23, 0x0f0e);
+		miiwr(mii, i, 24, 0x1610);
+		miiwr(mii, i, 30, 0x1f15);
+		miiwr(mii, i, 28, 0x6111);
+	}
+
+	miiwr(mii, 0, 31, 0x5000);
+	miiwr(mii, 0, 19, 0x004a);
+	miiwr(mii, 0, 20, 0x015a);
+	miiwr(mii, 0, 21, 0x00ee);
+	miiwr(mii, 0, 22, 0x0033);
+	miiwr(mii, 0, 23, 0x020a);
+	miiwr(mii, 0, 24, 0x0000);
+	miiwr(mii, 0, 25, 0x024a);
+	miiwr(mii, 0, 26, 0x035a);
+	miiwr(mii, 0, 27, 0x02ee);
+	miiwr(mii, 0, 28, 0x0233);
+	miiwr(mii, 0, 29, 0x000a);
+	miiwr(mii, 0, 30, 0x0000);
+	miiwr(mii, 0, 31, 0x4000);
+	miiwr(mii, 0, 29, 0x000d);
+	miiwr(mii, 0, 30, 0x0500);
+
+
+}
+
+static void
+doreset(Ether *edev)
+{
+	ulong buf;
+
+	buf = sysrd(SYSCTL_RST);
+	buf |= (1<<24);
+	syswr(SYSCTL_RST, buf);
+	delay(1);
+	buf ^= (1<<24);
+	syswr(SYSCTL_RST, buf);
+
+	iprint("reset switch\n");
+	delay(10);
+
+	if(Attchbug){
+		iprint("ether did a reset\n");
+		delay(10);
+	}
+
+/* basic switch init */
+	swwr(SW_FCT0, 0xC8A07850);
+	swwr(SW_SGC2, 0x00000000);
+	swwr(SW_PFC1, 0x00405555);	//vlan options
+	swwr(SW_POC0, 0x00007f7f);
+	swwr(SW_POC2, 0x00007f7f);
+	swwr(SW_FCT2, 0x0002500c);
+	swwr(SW_SWGC, 0x0008a301);
+	swwr(SW_SOCPC, 0x02404040);
+	swwr(SW_FPORT, 0x3f502b28);
+	swwr(SW_FPA, 0x00000000);
+
+	USED(edev);
+
+}
+
+
+static int
+rdfull(void *arg)
+{
+	Desc *d = arg;
+	return (d->info1 & RXD_DDONE) == 1;
+}
+
+
+static void
+rxproc(void *arg)
+{
+	Ether	*edev = arg;
+	Ctlr	*ctlr = edev->ctlr;
+	Block	*b;
+	Desc	*d;
+	int		len, i;
+
+
+
+	i = ethrd(PDMA_RX0_CPU_IDX);	/* get current index */
+
+	while(waserror())
+		;
+
+	for(;;){
+		ctlr->rxstat++;
+		i = (i + 1) % Nrd;
+		d = &ctlr->rx->d[i];
+
+		if((d->info1 & RXD_DDONE) == 0)
+			sleep(ctlr->rx, rdfull, d);
+
+		len = RXD_LEN0(d->info1);	/* get length of packet */
+		b = ctlr->rx->b[i];
+
+		if(len > 0){
+			b->wp = b->rp + len;
+			dcflush(b->rp, BLEN(b));	/* move block to ram */
+			etheriq(edev, b);			/* move block to ether input queue */
+
+		if(Ethdebug)
+			iprint("rxproc: (%d) len=%d | ", i, len);
+
+		} else {
+			ctlr->badrx++;
+			freeb(b);
+		}
+
+			/* replenish */
+			b = iallocb(Rbsz);
+			if(b == nil)
+				panic("NO RX BLOCKS");
+			ctlr->rx->b[i] = b;
+			dcflush(b->rp, Rbsz);
+			d->ptr0 = PADDR(b->rp);	/* point to fresh block */
+			d->info1 = 0;			/* clear out info1 & 2 */
+			d->info2 = 0;
+			d->info1 = RXD_LAST0;	/* clear ddone */
+
+		ethwr(PDMA_RX0_CPU_IDX, i);	/* move to next index */
+	}
+}
+
+
+static int
+tdfree(void *arg)
+{
+	Desc *d = arg;
+	return (d->info1 & (TXD_LAST0  | TXD_DDONE)) == (TXD_LAST0  | TXD_DDONE);
+}
+
+
+static void
+txproc(void *arg)
+{
+	Ether *edev = arg;
+	Ctlr *ctlr = edev->ctlr;
+	Block *b;
+	Desc *d;
+	int i, len, Δlen;
+
+	i = ethrd(PDMA_TX0_CPU_IDX);	/* get current index */
+
+	while(waserror())
+		;
+
+	for(;;){
+		ctlr->txstat++;
+		if((b = qbread(edev->oq, 100000)) == nil)	/* fetch packet from queue */
+			break;
+		
+
+		d = &ctlr->tx->d[i];
+		while(!tdfree(d))
+			sleep(ctlr->tx, tdfree, d);
+
+		ilock(ctlr->tx);	/* helps with packet loss */
+		if(ctlr->tx->b[i] != nil)
+			freeb(ctlr->tx->b[i]);
+
+		ctlr->tx->b[i] = b;
+		len = BLEN(b);
+
+		if(len < 64){	/* tx needs at least 64 per packet */
+			Δlen = 64 - len;
+			b = padblock(b, -Δlen);
+			len = BLEN(b) + Δlen;
+		}
+
+		if(Ethdebug)
+			iprint("txproc: (%d) len=%d | ", i, len);
+
+		dcflush(b->rp, Rbsz);	/* move packet to ram */
+		d->ptr0 = PADDR(b->rp);
+	//	d->info2 = TXD_QN(3) | TXD_PN(TXD_PN_GDMA1);
+		d->info1 = TXD_LEN0(len) | TXD_LAST0;
+
+		i = (i + 1) % Ntd;
+		ethwr(PDMA_TX0_CPU_IDX, i);
+		iunlock(ctlr->tx);
+	}
+}
+
+
+static void
+etherinterrupt(Ureg*, void *arg)
+{
+	Ether *edev = arg;
+	Ctlr *ctlr = edev->ctlr;
+	u32int irq;
+	int rxintΔ, txintΔ;
+
+
+	rxintΔ = ctlr->rxintr;
+	txintΔ = ctlr->txintr;
+
+	irq = ethrd(INT_STATUS);	/* get interrupt requests */
+
+	if(Ethdebug){
+		iprint("ether interrupt: %08uX |", irq);
+		delay(10);
+	}
+
+
+	if(irq & (INT_RX_DONE_INT0 | INT_RX_DONE_INT1)){
+		ctlr->rxintr++;
+		wakeup(ctlr->rx);
+	}
+
+	if(irq & (INT_TX_DONE_INT0 | INT_TX_DONE_INT1 | 
+		INT_TX_DONE_INT2 | INT_TX_DONE_INT3)){
+		ctlr->txintr++;
+		wakeup(ctlr->tx);
+	}
+
+	if((rxintΔ == ctlr->rxintr) && (txintΔ == ctlr->txintr)){
+		ctlr->nointr++;
+		iprint("etherinterrupt: spurious %X\n", irq);
+	}
+
+	if(irq & INT_TX_DUKKHA)
+		ctlr->txdmaerr++;
+
+	if(irq & INT_RX_DUKKHA)
+		ctlr->rxdmaerr++;
+
+	ethwr(INT_STATUS, irq);	/* writing back 1's clears irqs */
+}
+
+
+static int
+initmii(Ctlr *ctlr)
+{
+
+/*
+ *	since the ethernet is wired right into 
+ *	a 7? port switch, much of the mii stuff
+ * 	is handled by the switch start up
+ */
+
+	MiiPhy *phy;
+	Ether	*edev = ctlr->edev;
+	int		i, buf;
+	
+
+	if((ctlr->mii = malloc(sizeof(Mii))) == nil)
+		return -1;
+
+	ctlr->mii->ctlr	= ctlr;
+	ctlr->mii->mir	= miird;
+	ctlr->mii->miw	= miiwr;
+
+	if(mii(ctlr->mii, ~0) == 0 || (phy = ctlr->mii->curphy) == nil){
+		iprint("#l%d: init mii failure\n", edev->ctlrno);
+		free(ctlr->mii);
+		ctlr->mii = nil;
+		return -1;
+	}
+	
+	iprint("#l%d: phy%d id %.8ux oui %x\n", 
+		edev->ctlrno, ctlr->mii->curphy->phyno, 
+		ctlr->mii->curphy->id, ctlr->mii->curphy->oui);
+
+	miireset(ctlr->mii);
+
+	miiane(ctlr->mii, ~0, ~0, ~0);
+
+	return 0;
+}
+
+
+static void
+attach(Ether *edev)	//keep it minimal
+{
+	int i;
+	ulong	buf;
+	Ctlr *ctlr;
+	Desc *d;
+
+	ctlr = edev->ctlr;
+
+	if(Attchbug){
+		iprint("ether attach called\n");
+		delay(10);
+	}
+
+	qlock(ctlr);
+	if(ctlr->attached){
+		qunlock(ctlr);
+
+		if(Attchbug){
+			iprint("ether attach already?\n");
+			delay(10);
+		}
+
+		return;
+	}
+
+	if(waserror()){
+		qunlock(ctlr);
+
+		if(Attchbug){
+			iprint("ether attach waserror?\n");
+			delay(10);
+		}
+
+		free(ctlr->rx->d);
+		free(ctlr->tx->d);
+		nexterror();
+	}
+
+	doreset(edev);
+
+	ethreset(edev);
+
+	if(initmii(ctlr) < 0)
+		error("mii failed");
+
+	/* Allocate Rx/Tx ring KSEG1, is uncached memmory */
+	ctlr->tx->d = (Desc *)KSEG1ADDR(xspanalloc(sizeof(Desc) * Ntd, CACHELINESZ, 0));
+	ctlr->rx->d = (Desc *)KSEG1ADDR(xspanalloc(sizeof(Desc) * Nrd, CACHELINESZ, 0));
+
+	if(ctlr->tx->d == nil || ctlr->rx->d == nil)
+		error(Enomem);
+
+	/* Allocate Rx blocks, initialize Rx ring. */
+	for(i = 0; i < Nrd; i++){
+		Block *b = iallocb(Rbsz);
+		if(b == nil)
+			error("rxblock");
+		ctlr->rx->b[i] = b;
+		dcflush(b->rp, Rbsz);
+		d = &ctlr->rx->d[i];
+		d->ptr0 = PADDR(b->rp);
+		d->info1 = RXD_LAST0;
+	}
+
+	/* Initialize Tx ring */
+	for(i = 0; i < Ntd; i++){
+		ctlr->tx->b[i] = nil;
+		ctlr->tx->d[i].info1 = TXD_LAST0 | TXD_DDONE;
+		ctlr->tx->d[i].info2 = TXD_QN(3) | TXD_PN(TXD_PN_GDMA1);
+	}
+
+	if(Attchbug){
+		iprint("ether attach clear nic\n");
+		delay(10);
+	}
+
+	/* turn off and clear defaults */
+	buf = ethrd(PDMA_GLOBAL_CFG);
+	buf &= 0xFF;
+	ethwr(PDMA_GLOBAL_CFG, buf); 
+	delay(1);
+
+	/* give Tx ring to nic */
+	ethwr(PDMA_TX0_PTR, PADDR(ctlr->tx->d));
+	ethwr(PDMA_TX0_COUNT, Ntd);
+	ethwr(PDMA_TX0_CPU_IDX, 0);
+	ethwr(PDMA_IDX_RST, 1 << 0);
+	coherence();
+
+	/* give Rx ring to nic */
+	ethwr(PDMA_RX0_PTR, PADDR(ctlr->rx->d));
+	ethwr(PDMA_RX0_COUNT, Nrd);
+	ethwr(PDMA_RX0_CPU_IDX, (Nrd - 1));
+	ethwr(PDMA_IDX_RST, 1 << 16);
+	coherence();
+
+
+	/* clear pending irqs */
+	buf = ethrd(INT_STATUS);
+	ethwr(INT_STATUS, buf);
+
+	/* setup interupts */
+	ethwr(INT_MASK,
+		INT_RX_DONE_INT1 |
+		INT_RX_DONE_INT0 |
+		INT_TX_DONE_INT3 |
+		INT_TX_DONE_INT2 |
+		INT_TX_DONE_INT1 |
+		INT_TX_DONE_INT0);
+
+	if(Attchbug){
+		iprint("ether attach start\n");
+		delay(10);
+	}
+
+
+	/* start dma */
+	ethwr(PDMA_GLOBAL_CFG, GLO_CFG_TX_WB_DDONE | GLO_CFG_RX_DMA_EN | GLO_CFG_TX_DMA_EN); 
+
+
+	if(Attchbug){
+		iprint("ether attach vlan\n");
+		delay(10);
+	}
+
+	/* outer vlan id */
+	ethwr(SDM_CON, 0x8100);
+
+	edev->link = 1;
+	ctlr->attached = 1;
+
+	if(Attchbug){
+		iprint("ether attach kprocs\n");
+		delay(10);
+	}
+
+	kproc("rxproc", rxproc, edev);
+	kproc("txproc", txproc, edev);
+
+	qunlock(ctlr);
+	poperror();
+
+	if(Attchbug)
+		iprint("ether attach done\n");
+}
+
+
+static void
+shutdown(Ether *edev)
+{
+	USED(edev);
+}
+
+/* promiscuous stub */
+static void
+prom(void*, int)
+{
+}
+
+/* multicast stub */
+static void
+multi(void*, uchar*, int)
+{
+}
+
+
+static long
+ifstat(Ether* edev, void* a, long n, ulong offset)
+{
+	char* p;
+	Ctlr* ctlr;
+	int l;
+	Desc	*t, *r;
+
+	ctlr = edev->ctlr;
+
+	p = smalloc(READSTR);
+	l = 0;
+	qlock(ctlr);
+	l += snprint(p+l, READSTR-l, "tx: %d\n", ctlr->txstat);
+	l += snprint(p+l, READSTR-l, "rx: %d\n", ctlr->rxstat);
+	l += snprint(p+l, READSTR-l, "txintr: %d\n", ctlr->txintr);
+	l += snprint(p+l, READSTR-l, "rxintr: %d\n", ctlr->rxintr);
+	l += snprint(p+l, READSTR-l, "nointr: %d\n", ctlr->nointr);
+	l += snprint(p+l, READSTR-l, "bad rx: %d\n", ctlr->badrx);
+	l += snprint(p+l, READSTR-l, "\n");
+	l += snprint(p+l, READSTR-l, "dma errs: tx: %d rx: %d\n", ctlr->txdmaerr, ctlr->rxdmaerr);
+	l += snprint(p+l, READSTR-l, "\n");
+	l += snprint(p+l, READSTR-l, "txptr: %08uX\n", ethrd(PDMA_TX0_PTR));
+	l += snprint(p+l, READSTR-l, "txcnt: %uX\n", ethrd(PDMA_TX0_COUNT));
+	l += snprint(p+l, READSTR-l, "txidx: %uX\n", ethrd(PDMA_TX0_CPU_IDX));
+	l += snprint(p+l, READSTR-l, "txdtx: %uX\n", ethrd(PDMA_TX0_DMA_IDX));
+	l += snprint(p+l, READSTR-l, "\n");
+	l += snprint(p+l, READSTR-l, "rxptr: %08uX\n", ethrd(PDMA_RX0_PTR));
+	l += snprint(p+l, READSTR-l, "rxcnt: %uX\n", ethrd(PDMA_RX0_COUNT));
+	l += snprint(p+l, READSTR-l, "rxidx: %uX\n", ethrd(PDMA_RX0_CPU_IDX));
+	l += snprint(p+l, READSTR-l, "rxdtx: %uX\n", ethrd(PDMA_RX0_DMA_IDX));
+	l += snprint(p+l, READSTR-l, "\n");
+	l += snprint(p+l, READSTR-l, "GLOBAL CFG: %08uX\n", ethrd(PDMA_GLOBAL_CFG));
+	l += snprint(p+l, READSTR-l, "INT STATUS: %08uX\n", ethrd(INT_STATUS));
+	l += snprint(p+l, READSTR-l, "INT   MASK: %08uX\n", ethrd(INT_MASK));
+	snprint(p+l, READSTR-l, "\n");
+
+	n = readstr(offset, a, n, p);
+	free(p);
+
+	qunlock(ctlr);
+
+	return n;
+}
+
+/* set Ether and Ctlr */
+static int
+pnp(Ether *edev)
+{
+	static Ctlr ctlr[1];
+
+	if(Attchbug)
+		iprint("ether pnp called\n");
+
+	if(edev->ctlr != nil)
+		return -1;
+
+	/* only one controller */
+	if(edev->ctlrno != 0)
+		return -1;
+
+
+	ctlr->edev	=	edev;
+
+	edev->port	= (uintptr)(KSEG1|ETHBASE);
+	edev->ctlr	= ctlr;
+	edev->irq	= IRQethr;
+	edev->mbps	= 100;
+	edev->maxmtu = 1536;
+	edev->arg	= edev;
+
+	edev->attach = attach;
+	edev->shutdown = shutdown;
+	edev->ifstat = ifstat;
+//	edev->ctl = ctl;
+	edev->promiscuous = prom;
+	edev->multicast = multi;
+
+	getmacaddr(edev);
+
+	intrenable(edev->irq, etherinterrupt, edev, 0, edev->name);
+
+	if(Attchbug)
+		iprint("ether pnp done\n");
+
+	return 0;
+}
+
+
+void
+ether7688link(void)
+{
+	addethercard("ether7688", pnp);
+}
--- /dev/null
+++ b/sys/src/9/mt7688/faultmips.c
@@ -1,0 +1,249 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"ureg.h"
+#include	"../port/error.h"
+#include	"io.h"
+
+enum {
+	Debug = 0,
+};
+
+typedef struct Fault Fault;
+struct Fault {
+	uintptr	va;
+	ulong	pid;
+	uintptr	pc;
+	int	cnt;
+	char	*prog;
+	int	code;
+};
+
+extern char *excname[];
+
+static Fault lflt, maxflt;
+
+/* removed for fpimips */
+//ulong*
+//reg(Ureg *ur, int regno)
+//{
+//	ulong *l;
+//
+//	switch(regno) {
+//	case 31: return &ur->r31;
+//	case 30: return &ur->r30;
+//	case 29: return &ur->sp;
+//	default:
+//		l = &ur->r1;
+//		return &l[regno-1];
+//	}
+//}
+
+/*
+ * Ask if the instruction at EPC could have cause this badvaddr
+ */
+int
+tstbadvaddr(Ureg *ur)
+{
+	int rn;
+	ulong iw, off, ea;
+
+	iw = ur->pc;
+	if(ur->cause & BD)
+		iw += 4;
+
+	if(seg(up, iw, 0) == 0)
+		return 0;
+
+	iw = *(ulong*)iw;
+
+/*	print("iw: %#lux\n", iw);	/**/
+
+	switch((iw>>26) & 0x3f) {
+	default:
+		return 1;
+	case 0x20:	/* LB */
+	case 0x24:	/* LBU */
+			/* LD */
+	case 0x35:
+	case 0x36:
+	case 0x37:	/* LDCz */
+	case 0x1A:	/* LDL */
+	case 0x1B:	/* LDR */
+	case 0x21:	/* LH */
+	case 0x25:	/* LHU */
+	case 0x30:	/* LL */
+	case 0x34:	/* LLD */
+	case 0x23:	/* LW */
+	case 0x31:
+	case 0x32:	/* LWCz possible 0x33 */
+	case 0x27:	/* LWU */
+	case 0x22:	/* LWL */
+	case 0x26:	/* LWR */
+		break;
+
+	case 0x28:	/* SB */
+	case 0x38:	/* SC */
+	case 0x3C:	/* SCD */
+	case 0x3D:
+	case 0x3E:
+	case 0x3F:	/* SDCz */
+	case 0x2C:	/* SDL */
+	case 0x2D:	/* SDR */
+	case 0x29:	/* SH */
+	case 0x2B:	/* SW */
+	case 0x39:
+	case 0x3A:	/* SWCz */
+	case 0x2A:	/* SWL */
+	case 0x2E:	/* SWR */
+		break;
+	}
+
+	off = iw & 0xffff;
+	if(off & 0x8000)
+		off |= ~0xffff;
+
+	rn = (iw>>21) & 0x1f;
+	ea = *reg(ur, rn);
+	if(rn == 0)
+		ea = 0;
+	ea += off;
+
+	/* print("ea %#lux %#lux(R%d) bv %#lux pc %#lux\n", ea, off, rn, ur->badvaddr, ur->pc); /**/
+
+	if(ur->badvaddr == ea)
+		return 0;
+
+	return 1;
+}
+
+/*
+ * we think we get consecutive page faults from unlucky combinations of
+ * scheduling and stlb hashes, and they only happen with 16K pages.
+ * however, we also get page faults while servicing the exact same fault.
+ * more than 5 consecutive faults is unusual, now that we have a better
+ * hash function.
+ *
+ * this can be helpful during mmu and cache debugging.
+ */
+static int
+ckfaultstuck(Ureg *ur, int read, int code)
+{
+	uintptr pc, va;
+
+	va = ur->badvaddr;
+	pc = ur->pc;
+	if (va != lflt.va || up->pid != lflt.pid || pc != lflt.pc ||
+	    code != lflt.code) {
+		/* at least one address or cause is different from last time */
+		lflt.cnt = 1;
+		lflt.va = va;
+		lflt.pid = up->pid;
+		lflt.pc = pc;
+		lflt.code = code;
+		return 0;
+	}
+	++lflt.cnt;
+	if (lflt.cnt >= 1000)	/* fixfault() isn't fixing underlying cause? */
+		panic("fault: %d consecutive faults for va %#p", lflt.cnt, va);
+	if (lflt.cnt > maxflt.cnt) {
+		maxflt.cnt = lflt.cnt;
+		maxflt.va = va;
+		maxflt.pid = up->pid;
+		maxflt.pc = pc;
+		kstrdup(&maxflt.prog, up->text);
+	}
+
+	/* we're servicing that fault now! */
+	/* adjust the threshold and program name to suit */
+	if (lflt.cnt < 5 || strncmp(up->text, "8l", 2) != 0)
+		return 0;
+	iprint("%d consecutive faults for va %#p at pc %#p in %s "
+		"pid %ld\n", lflt.cnt, lflt.va, pc, up->text, lflt.pid);
+	iprint("\t%s: %s%s r31 %#lux tlbvirt %#lux\n",
+		excname[code], va == pc? "[instruction] ": "",
+		(read? "read": "write"), ur->r31, tlbvirt());
+	return 0;
+}
+
+char *
+faultsprint(char *p, char *ep)
+{
+	if (Debug)
+		p = seprint(p, ep,
+			"max consecutive faults %d for va %#p in %s\n",
+			maxflt.cnt, maxflt.va, maxflt.prog);
+	return p;
+}
+
+/*
+ *  find out fault address and type of access.
+ *  Call common fault handler.
+ */
+void
+faultmips(Ureg *ur, int user, int code)
+{
+	int read;
+	ulong addr;
+	char *p, buf[ERRMAX];
+
+	addr = ur->badvaddr;
+	addr &= ~(BY2PG-1);
+
+	read = !(code==CTLBM || code==CTLBS);
+
+//	iprint("fault: %s code %d va %#p pc %#p r31 %#lux tlbvirt %#lux\n", up->text, code, ur->badvaddr, ur->pc, ur->r31, tlbvirt());
+//	delay(20);
+
+	if (Debug && ckfaultstuck(ur, read, code) || fault(addr, ur->pc, read) == 0)
+		return;
+
+	if(user) {
+		p = "store";
+		if(read)
+			p = "load";
+		snprint(buf, sizeof buf, "sys: trap: fault %s addr=%#lux r31=%#lux",
+			p, ur->badvaddr, ur->r31);
+		postnote(up, 1, buf, NDebug);
+		return;
+	}
+
+	splhi();
+	serialoq = nil;
+	print("kernel %s vaddr=%#lux\n", excname[code], ur->badvaddr);
+	print("st=%#lux pc=%#lux r31=%#lux sp=%#lux\n",
+		ur->status, ur->pc, ur->r31, ur->sp);
+	dumpregs(ur);
+	panic("fault");
+}
+
+/*
+ * called in syscallfmt.c, sysfile.c, sysproc.c
+ */
+void
+validalign(uintptr addr, unsigned align)
+{
+	/*
+	 * Plan 9 is a 32-bit O/S, and the hardware it runs on
+	 * does not usually have instructions which move 64-bit
+	 * quantities directly, synthesizing the operations
+	 * with 32-bit move instructions. Therefore, the compiler
+	 * (and hardware) usually only enforce 32-bit alignment,
+	 * if at all.
+	 *
+	 * Take this out if the architecture warrants it.
+	 */
+	if(align == sizeof(vlong))
+		align = sizeof(long);
+
+	/*
+	 * Check align is a power of 2, then addr alignment.
+	 */
+	if((align != 0 && !(align & (align-1))) && !(addr & (align-1)))
+		return;
+	postnote(up, 1, "sys: odd address", NDebug);
+	error(Ebadarg);
+	/*NOTREACHED*/
+}
--- /dev/null
+++ b/sys/src/9/mt7688/fns.h
@@ -1,0 +1,105 @@
+#include "../port/portfns.h"
+
+ulong	cankaddr(ulong);
+void	clock(Ureg*);
+void	clockinit(void);
+int	cmpswap(long*, long, long);
+void	coherence(void);
+void	cycles(uvlong *);
+void	dcflush(void*, ulong);
+void	evenaddr(uintptr);
+void	faultmips(Ureg*, int, int);
+ulong	fcr31(void);
+void	fpclear(void);
+void	fptrap(Ureg*);
+void	fpwatch(Ureg *);
+int		fpuemu(Ureg *);
+char*	getconf(char*);
+ulong	getpagemask(void);
+ulong	getrandom(void);
+int		gettlbp(ulong, ulong*);
+ulong	gettlbvirt(int);
+int		isaconfig(char*, int, ISAConf*);
+void	icflush(void *, ulong);
+void	idlehands(void);
+void	introff(int);
+void	intron(int);
+void	kfault(Ureg*);
+KMap*	kmap(Page*);
+void	kmapdump(void);
+void	kmapinit(void);
+void	kmapinval(void);
+void	kunmap(KMap*);
+void	links(void);
+void	outl(void*, void*, ulong);
+ulong	prid(void);
+void	procfork(Proc *);
+void	procrestore(Proc *);
+void	procsave(Proc *);
+void	procsetup(Proc *);
+void	purgetlb(int);
+void	puttlbx(int, ulong, ulong, ulong, int);
+ulong	rdcompare(void);
+ulong	rdcount(void);
+ulong*	reg(Ureg*, int);
+void	restfpregs(FPsave*, ulong);
+void	intrenable(int, void(*)(Ureg *, void *), void *, int, char*);
+void	intrdisable(int, void (*)(Ureg*, void *), void*, int, char*);
+void	screeninit(void);
+void	setpagemask(ulong);
+void	setwired(ulong);
+ulong	stlbhash(ulong);
+void	syscall(Ureg*);
+int	tas(void*);
+void	tlbinit(void);
+ulong	tlbvirt(void);
+void	touser(void*);
+#define	userureg(ur) ((ur)->status & KUSER)
+void	validalign(uintptr, unsigned);
+void	wrcompare(ulong);
+void	wrcount(ulong);
+
+#define PTR2UINT(p)	((uintptr)(p))
+#define UINT2PTR(i)	((void*)(i))
+
+//#define KADDR(a)	((void*)((ulong)(a)|KSEG0))
+#define KADDR(pa)	((void*)(KZERO | ((uintptr)(pa) & ~KSEGM)))
+//#define PADDR(a)	((ulong)(a)&~KSEGM)
+#define PADDR(va)	(((uintptr)(va)) & ~KSEGM)
+#define FMASK(o, w)	(((1<<(w))-1)<<(o))
+
+#define KSEG1ADDR(a)	((void*)((ulong)(a)|KSEG1))
+
+
+void	_uartputs(char*, int);
+int		_uartprint(char*, ...);
+void	uartinit(void);
+void	zoot(void);
+void	idle(void);
+ulong	getstatus(void);
+void	setstatus(ulong);
+ulong	getcause(void);
+ulong	getconfig(void);
+ulong	getconfig1(void);
+ulong	getconfig2(void);
+ulong	getconfig3(void);
+ulong	getconfig7(void);
+ulong	gethwreg3(void);
+void	plan9iniinit(void);
+void	noted(Ureg*, ulong);
+int		notify(Ureg*);
+void	intrinit(void);
+int		i8250console(void);
+void	setconfenv(void);
+void	uartkirkwoodconsole(void);
+void	serialputs(char*, int);
+void	intrclear(int);
+
+ulong	incraw(void);
+ulong	incmask(void);
+ulong	incstat(void);
+ulong	incsel(void);
+
+void	setwatchhi0(ulong);
+void	setwatchlo0(ulong);
+void	getfcr0(ulong);
--- /dev/null
+++ b/sys/src/9/mt7688/fpi.c
@@ -1,0 +1,305 @@
+/*
+ * Floating Point Interpreter.
+ * shamelessly stolen from an original by ark.
+ *
+ * NB: the Internal arguments to fpisub and fpidiv are reversed from
+ * what you might naively expect: they compute y-x and y/x, respectively.
+ */
+#include "fpi.h"
+
+void
+fpiround(Internal *i)
+{
+	unsigned long guard;
+
+	guard = i->l & GuardMask;
+	i->l &= ~GuardMask;
+	if(guard > (LsBit>>1) || (guard == (LsBit>>1) && (i->l & LsBit))){
+		i->l += LsBit;
+		if(i->l & CarryBit){
+			i->l &= ~CarryBit;
+			i->h++;
+			if(i->h & CarryBit){
+				if (i->h & 0x01)
+					i->l |= CarryBit;
+				i->l >>= 1;
+				i->h >>= 1;
+				i->e++;
+			}
+		}
+	}
+}
+
+static void
+matchexponents(Internal *x, Internal *y)
+{
+	int count;
+
+	count = y->e - x->e;
+	x->e = y->e;
+	if(count >= 2*FractBits){
+		x->l = x->l || x->h;
+		x->h = 0;
+		return;
+	}
+	if(count >= FractBits){
+		count -= FractBits;
+		x->l = x->h|(x->l != 0);
+		x->h = 0;
+	}
+	while(count > 0){
+		count--;
+		if(x->h & 0x01)
+			x->l |= CarryBit;
+		if(x->l & 0x01)
+			x->l |= 2;
+		x->l >>= 1;
+		x->h >>= 1;
+	}
+}
+
+static void
+shift(Internal *i)
+{
+	i->e--;
+	i->h <<= 1;
+	i->l <<= 1;
+	if(i->l & CarryBit){
+		i->l &= ~CarryBit;
+		i->h |= 0x01;
+	}
+}
+
+static void
+normalise(Internal *i)
+{
+	while((i->h & HiddenBit) == 0)
+		shift(i);
+}
+
+static void
+renormalise(Internal *i)
+{
+	if(i->e < -2 * FractBits)
+		i->e = -2 * FractBits;
+	while(i->e < 1){
+		i->e++;
+		if(i->h & 0x01)
+			i->l |= CarryBit;
+		i->h >>= 1;
+		i->l = (i->l>>1)|(i->l & 0x01);
+	}
+	if(i->e >= ExpInfinity)
+		SetInfinity(i);
+}
+
+void
+fpinormalise(Internal *x)
+{
+	if(!IsWeird(x) && !IsZero(x))
+		normalise(x);
+}
+
+void
+fpiadd(Internal *x, Internal *y, Internal *i)
+{
+	Internal *t;
+
+	i->s = x->s;
+	if(IsWeird(x) || IsWeird(y)){
+		if(IsNaN(x) || IsNaN(y))
+			SetQNaN(i);
+		else
+			SetInfinity(i);
+		return;
+	}
+	if(x->e > y->e){
+		t = x;
+		x = y;
+		y = t;
+	}
+	matchexponents(x, y);
+	i->e = x->e;
+	i->h = x->h + y->h;
+	i->l = x->l + y->l;
+	if(i->l & CarryBit){
+		i->h++;
+		i->l &= ~CarryBit;
+	}
+	if(i->h & (HiddenBit<<1)){
+		if(i->h & 0x01)
+			i->l |= CarryBit;
+		i->l = (i->l>>1)|(i->l & 0x01);
+		i->h >>= 1;
+		i->e++;
+	}
+	if(IsWeird(i))
+		SetInfinity(i);
+}
+
+void
+fpisub(Internal *x, Internal *y, Internal *i)
+{
+	Internal *t;
+
+	if(y->e < x->e
+	   || (y->e == x->e && (y->h < x->h || (y->h == x->h && y->l < x->l)))){
+		t = x;
+		x = y;
+		y = t;
+	}
+	i->s = y->s;
+	if(IsNaN(y)){
+		SetQNaN(i);
+		return;
+	}
+	if(IsInfinity(y)){
+		if(IsInfinity(x))
+			SetQNaN(i);
+		else
+			SetInfinity(i);
+		return;
+	}
+	matchexponents(x, y);
+	i->e = y->e;
+	i->h = y->h - x->h;
+	i->l = y->l - x->l;
+	if(i->l < 0){
+		i->l += CarryBit;
+		i->h--;
+	}
+	if(i->h == 0 && i->l == 0)
+		SetZero(i);
+	else while(i->e > 1 && (i->h & HiddenBit) == 0)
+		shift(i);
+}
+
+#define	CHUNK		(FractBits/2)
+#define	CMASK		((1<<CHUNK)-1)
+#define	HI(x)		((short)((x)>>CHUNK) & CMASK)
+#define	LO(x)		((short)(x) & CMASK)
+#define	SPILL(x)	((x)>>CHUNK)
+#define	M(x, y)		((long)a[x]*(long)b[y])
+#define	C(h, l)		(((long)((h) & CMASK)<<CHUNK)|((l) & CMASK))
+
+void
+fpimul(Internal *x, Internal *y, Internal *i)
+{
+	long a[4], b[4], c[7], f[4];
+
+	i->s = x->s^y->s;
+	if(IsWeird(x) || IsWeird(y)){
+		if(IsNaN(x) || IsNaN(y) || IsZero(x) || IsZero(y))
+			SetQNaN(i);
+		else
+			SetInfinity(i);
+		return;
+	}
+	else if(IsZero(x) || IsZero(y)){
+		SetZero(i);
+		return;
+	}
+	normalise(x);
+	normalise(y);
+	i->e = x->e + y->e - (ExpBias - 1);
+
+	a[0] = HI(x->h); b[0] = HI(y->h);
+	a[1] = LO(x->h); b[1] = LO(y->h);
+	a[2] = HI(x->l); b[2] = HI(y->l);
+	a[3] = LO(x->l); b[3] = LO(y->l);
+
+	c[6] =                               M(3, 3);
+	c[5] =                     M(2, 3) + M(3, 2) + SPILL(c[6]);
+	c[4] =           M(1, 3) + M(2, 2) + M(3, 1) + SPILL(c[5]);
+	c[3] = M(0, 3) + M(1, 2) + M(2, 1) + M(3, 0) + SPILL(c[4]);
+	c[2] = M(0, 2) + M(1, 1) + M(2, 0)           + SPILL(c[3]);
+	c[1] = M(0, 1) + M(1, 0)                     + SPILL(c[2]);
+	c[0] = M(0, 0)                               + SPILL(c[1]);
+
+	f[0] = c[0];
+	f[1] = C(c[1], c[2]);
+	f[2] = C(c[3], c[4]);
+	f[3] = C(c[5], c[6]);
+
+	if((f[0] & HiddenBit) == 0){
+		f[0] <<= 1;
+		f[1] <<= 1;
+		f[2] <<= 1;
+		f[3] <<= 1;
+		if(f[1] & CarryBit){
+			f[0] |= 1;
+			f[1] &= ~CarryBit;
+		}
+		if(f[2] & CarryBit){
+			f[1] |= 1;
+			f[2] &= ~CarryBit;
+		}
+		if(f[3] & CarryBit){
+			f[2] |= 1;
+			f[3] &= ~CarryBit;
+		}
+		i->e--;
+	}
+	i->h = f[0];
+	i->l = f[1];
+	if(f[2] || f[3])
+		i->l |= 1;
+	renormalise(i);
+}
+
+void
+fpidiv(Internal *x, Internal *y, Internal *i)
+{
+	i->s = x->s^y->s;
+	if(IsNaN(x) || IsNaN(y)
+	   || (IsInfinity(x) && IsInfinity(y)) || (IsZero(x) && IsZero(y))){
+		SetQNaN(i);
+		return;
+	}
+	else if(IsZero(x) || IsInfinity(y)){
+		SetInfinity(i);
+		return;
+	}
+	else if(IsInfinity(x) || IsZero(y)){
+		SetZero(i);
+		return;
+	}
+	normalise(x);
+	normalise(y);
+	i->h = 0;
+	i->l = 0;
+	i->e = y->e - x->e + (ExpBias + 2*FractBits - 1);
+	do{
+		if(y->h > x->h || (y->h == x->h && y->l >= x->l)){
+			i->l |= 0x01;
+			y->h -= x->h;
+			y->l -= x->l;
+			if(y->l < 0){
+				y->l += CarryBit;
+				y->h--;
+			}
+		}
+		shift(y);
+		shift(i);
+	}while ((i->h & HiddenBit) == 0);
+	if(y->h || y->l)
+		i->l |= 0x01;
+	renormalise(i);
+}
+
+int
+fpicmp(Internal *x, Internal *y)
+{
+	if(IsNaN(x) && IsNaN(y))
+		return 0;
+	if(IsInfinity(x) && IsInfinity(y))
+		return y->s - x->s;
+	if(IsZero(x) && IsZero(y))
+		return 0;
+	if(x->e == y->e && x->h == y->h && x->l == y->l)
+		return y->s - x->s;
+	if(x->e < y->e
+	   || (x->e == y->e && (x->h < y->h || (x->h == y->h && x->l < y->l))))
+		return y->s ? 1: -1;
+	return x->s ? -1: 1;
+}
--- /dev/null
+++ b/sys/src/9/mt7688/fpi.h
@@ -1,0 +1,71 @@
+#ifndef nil
+#include <u.h>
+#endif
+
+typedef long Word;
+typedef long long Vlong;
+typedef unsigned long Single;
+
+/* use u.h's FPdbleword */
+#define Double	FPdbleword
+#define h hi
+#define l lo
+
+enum {
+	FractBits	= 28,
+	CarryBit	= 0x10000000,
+	HiddenBit	= 0x08000000,
+	MsBit		= HiddenBit,
+	NGuardBits	= 3,
+	GuardMask	= 0x07,
+	LsBit		= (1<<NGuardBits),
+
+	SingleExpBias	= 127,
+	SingleExpMax	= 255,
+	DoubleExpBias	= 1023,
+	DoubleExpMax	= 2047,
+
+	ExpBias		= DoubleExpBias,
+	ExpInfinity	= DoubleExpMax,
+};
+
+typedef struct {
+	/* order matters: must start with s, e, l, h in that order */
+	unsigned char s;
+	short e;
+	/* double bits */
+	long l;				/* 0000FFFFFFFFFFFFFFFFFFFFFFFFFGGG */
+	long h;				/* 0000HFFFFFFFFFFFFFFFFFFFFFFFFFFF */
+} Internal;
+
+#define IsWeird(n)	((n)->e >= ExpInfinity)
+#define	IsInfinity(n)	(IsWeird(n) && (n)->h == HiddenBit && (n)->l == 0)
+#define	SetInfinity(n)	((n)->e = ExpInfinity, (n)->h = HiddenBit, (n)->l = 0)
+#define IsNaN(n)	(IsWeird(n) && (((n)->h & ~HiddenBit) || (n)->l))
+#define	SetQNaN(n)	((n)->s = 0, (n)->e = ExpInfinity, 		\
+			 (n)->h = HiddenBit|(LsBit<<1), (n)->l = 0)
+#define IsZero(n)	((n)->e == 1 && (n)->h == 0 && (n)->l == 0)
+#define SetZero(n)	((n)->e = 1, (n)->h = 0, (n)->l = 0)
+
+/*
+ * fpi.c
+ */
+extern void fpiround(Internal *);
+extern void fpiadd(Internal *, Internal *, Internal *);
+extern void fpisub(Internal *, Internal *, Internal *);
+extern void fpimul(Internal *, Internal *, Internal *);
+extern void fpidiv(Internal *, Internal *, Internal *);
+extern int fpicmp(Internal *, Internal *);
+extern void fpinormalise(Internal*);
+
+/*
+ * fpimem.c
+ */
+extern void fpis2i(Internal *, void *);
+extern void fpid2i(Internal *, void *);
+extern void fpiw2i(Internal *, void *);
+extern void fpiv2i(Internal *, void *);
+extern void fpii2s(void *, Internal *);
+extern void fpii2d(void *, Internal *);
+extern void fpii2w(Word *, Internal *);
+extern void fpii2v(Vlong *, Internal *);
--- /dev/null
+++ b/sys/src/9/mt7688/fpimem.c
@@ -1,0 +1,199 @@
+#include "fpi.h"
+
+/*
+ * the following routines depend on memory format, not the machine
+ */
+
+enum {
+	Sign	= 1u << 31,
+};
+
+void
+fpis2i(Internal *i, void *v)
+{
+	Single *s = v;
+
+	i->s = (*s & Sign) ? 1: 0;
+	if((*s & ~Sign) == 0){
+		SetZero(i);
+		return;
+	}
+	i->e = ((*s>>23) & 0x00FF) - SingleExpBias + ExpBias;
+	i->h = (*s & 0x007FFFFF)<<(1+NGuardBits);
+	i->l = 0;
+	if(i->e)
+		i->h |= HiddenBit;
+	else
+		i->e++;
+}
+
+void
+fpid2i(Internal *i, void *v)
+{
+	Double *d = v;
+
+	i->s = (d->h & Sign) ? 1: 0;
+	i->e = (d->h>>20) & 0x07FF;
+	i->h = ((d->h & 0x000FFFFF)<<(4+NGuardBits))|((d->l>>25) & 0x7F);
+	i->l = (d->l & 0x01FFFFFF)<<NGuardBits;
+	if(i->e)
+		i->h |= HiddenBit;
+	else
+		i->e++;
+}
+
+void
+fpiw2i(Internal *i, void *v)
+{
+	Word w, word = *(Word*)v;
+	short e;
+
+	if(word < 0){
+		i->s = 1;
+		word = -word;
+	}
+	else
+		i->s = 0;
+	if(word == 0){
+		SetZero(i);
+		return;
+	}
+	if(word > 0){
+		for (e = 0, w = word; w; w >>= 1, e++)
+			;
+	} else
+		e = 32;
+	if(e > FractBits){
+		i->h = word>>(e - FractBits);
+		i->l = (word & ((1<<(e - FractBits)) - 1))<<(2*FractBits - e);
+	}
+	else {
+		i->h = word<<(FractBits - e);
+		i->l = 0;
+	}
+	i->e = (e - 1) + ExpBias;
+}
+
+void
+fpiv2i(Internal *i, void *v)
+{
+	Vlong w, word = *(Vlong*)v;
+	short e;
+
+	if(word < 0){
+		i->s = 1;
+		word = -word;
+	}
+	else
+		i->s = 0;
+	if(word == 0){
+		SetZero(i);
+		return;
+	}
+	if(word > 0){
+		for (e = 0, w = word; w; w >>= 1, e++)
+			;
+	} else
+		e = 64;
+	if(e > FractBits){
+		i->h = word>>(e - FractBits);
+		i->l = (word & ((1<<(e - FractBits)) - 1))<<(2*FractBits - e);
+	}
+	else {
+		i->h = word<<(FractBits - e);
+		i->l = 0;
+	}
+	i->e = (e - 1) + ExpBias;
+}
+
+/*
+ * Note that all of these conversions from Internal format
+ * potentially alter *i, so it should be a disposable copy
+ * of the value to be converted.
+ */
+
+void
+fpii2s(void *v, Internal *i)
+{
+	short e;
+	Single *s = (Single*)v;
+
+	fpiround(i);
+	if(i->h & HiddenBit)
+		i->h &= ~HiddenBit;
+	else
+		i->e--;
+	*s = i->s ? Sign: 0;
+	e = i->e;
+	if(e < ExpBias){
+		if(e <= (ExpBias - SingleExpBias))
+			return;
+		e = SingleExpBias - (ExpBias - e);
+	}
+	else  if(e >= (ExpBias + (SingleExpMax-SingleExpBias))){
+		*s |= SingleExpMax<<23;
+		return;
+	}
+	else
+		e = SingleExpBias + (e - ExpBias);
+	*s |= (e<<23)|(i->h>>(1+NGuardBits));
+}
+
+void
+fpii2d(void *v, Internal *i)
+{
+	Double *d = (Double*)v;
+
+	fpiround(i);
+	if(i->h & HiddenBit)
+		i->h &= ~HiddenBit;
+	else
+		i->e--;
+	i->l = ((i->h & GuardMask)<<25)|(i->l>>NGuardBits);
+	i->h >>= NGuardBits;
+	d->h = i->s ? Sign: 0;
+	d->h |= (i->e<<20)|((i->h & 0x00FFFFFF)>>4);
+	d->l = (i->h<<28)|i->l;
+}
+
+void
+fpii2w(Word *word, Internal *i)
+{
+	Word w;
+	short e;
+
+	fpiround(i);
+	e = (i->e - ExpBias) + 1;
+	if(e <= 0)
+		w = 0;
+	else if(e > 31)
+		w = 0x7FFFFFFF;
+	else if(e > FractBits)
+		w = (i->h<<(e - FractBits))|(i->l>>(2*FractBits - e));
+	else
+		w = i->h>>(FractBits-e);
+	if(i->s)
+		w = -w;
+	*word = w;
+}
+
+void
+fpii2v(Vlong *word, Internal *i)
+{
+	Vlong w;
+	short e;
+
+	fpiround(i);
+	e = (i->e - ExpBias) + 1;
+	if(e <= 0)
+		w = 0;
+	else if(e > 63)
+		w = (1ull<<63) - 1;		/* maxlong */
+	else if(e > FractBits)
+		w = (Vlong)i->h<<(e - FractBits) | i->l>>(2*FractBits - e);
+	else
+		w = i->h>>(FractBits-e);
+	if(i->s)
+		w = -w;
+	*word = w;
+}
--- /dev/null
+++ b/sys/src/9/mt7688/fpimips.c
@@ -1,0 +1,1495 @@
+/*
+ * this doesn't attempt to implement MIPS floating-point properties
+ * that aren't visible in the Inferno environment.
+ * all arithmetic is done in double precision.
+ * the FP trap status isn't updated.
+ *
+ * we emulate the original MIPS FP register model: 32-bits each,
+ * F(2n) and F(2n+1) are a double, with lower-order word first;
+ * note that this is little-endian order, unlike the rest of the
+ * machine, so double-word operations will need to swap the words
+ * when transferring between FP registers and memory.
+ *
+ *	This has been modified for spim (little-endian)
+ *
+ * on some machines, we can convert to an FP internal representation when
+ * moving to FPU registers and back (to integer, for example) when moving
+ * from them.  the MIPS is different: its conversion instructions operate
+ * on FP registers only, and there's no way to tell if data being moved
+ * into an FP register is integer or FP, so it must be possible to store
+ * integers in FP registers without conversion.  Furthermore, pairs of FP
+ * registers can be combined into a double.  So we keep the raw bits
+ * around as the canonical representation and convert only to and from
+ * Internal FP format when we must (i.e., before calling the common fpi
+ * code).
+ */
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"ureg.h"
+#include	"fpi.h"
+#include	"tos.h"
+
+#ifdef FPEMUDEBUG
+#define DBG(bits) (fpemudebug & (bits))
+#define intpr _intpr
+#define internsane _internsane
+#define dbgstuck _dbgstuck
+#else
+#define DBG(bits) (0)
+#define internsane(i, ur)	do { USED(ur); } while(0)
+#define intpr(i, reg, fmt, ufp)	do {} while(0)
+#define dbgstuck(pc, ur, ufp)	do {} while(0)
+#endif
+
+#define	OFR(memb) (uintptr)&((Ureg*)0)->memb	/* offset into Ureg of memb */
+#define	REG(ur, r) *acpureg(ur, r)			/* cpu reg in Ureg */
+#define	FREG(ufp, fr) (ufp)->reg[(fr) & REGMASK]	/* fp reg raw bits */
+
+/*
+ * instruction decoding for COP1 instructions; integer instructions
+ * are laid out differently.
+ */
+#define OP(ul)	 ((ul) >> 26)
+#define REGMASK MASK(5)				/* mask for a register number */
+#define FMT(ul)	 (((ul) >> 21) & REGMASK)	/* data type */
+#define REGT(ul) (((ul) >> 16) & REGMASK)	/* source2 register */
+#define REGS(ul) (((ul) >> 11) & REGMASK)	/* source1 register */
+#define REGD(ul) (((ul) >>  6) & REGMASK)	/* destination register */
+#define FUNC(ul) ((ul) & MASK(6))
+
+
+enum {
+	Dbgbasic = 1<<0,	/* base debugging: ops, except'ns */
+	Dbgmoves = 1<<1,	/* not very exciting usually */
+	Dbgregs	 = 1<<2,	/* print register contents around ops */
+	Dbgdelay = 1<<3,	/* branch-delay-slot-related machinery */
+
+	/* fpimips status codes */
+	Failed = -1,
+	Advpc,				/* advance pc normally */
+	Leavepc,			/* don't change the pc */
+	Leavepcret,			/* ... and return to user mode now */
+	Nomatch,
+
+	/* no-ops */
+	NOP	= 0x27,			/* NOR R0, R0, R0 */
+	MIPSNOP = 0,			/* SLL R0, R0, R0 */
+
+	/* fp op-codes */
+	COP1	= 0x11,			/* fpu op */
+	LWC1	= 0x31,			/* load float/long */
+	LDC1	= 0x35,			/* load double/vlong */
+	SWC1	= 0x39,			/* store float/long */
+	SDC1	= 0x3d,			/* store double/vlong */
+
+	N = 1<<31,			/* condition codes */
+	Z = 1<<30,
+	C = 1<<29,
+	V = 1<<28,
+
+	/* data types (format field values) */
+	MFC1	= 0,			/* and func == 0 ... */
+	DMFC1,				/* vlong move */
+	CFC1,				/* ctl word move */
+	MTC1	= 4,
+	DMTC1,
+	CTC1,				/* ... end `and func == 0' */
+	BRANCH	= 8,
+	Ffloat	= 16,
+	Fdouble,
+	Flong	= 20,
+	Fvlong,
+
+	/* fp control registers */
+	Fpimp	= 0,
+	Fpcsr	= 31,
+};
+
+typedef struct FP1 FP1;
+typedef struct FP2 FP2;
+typedef struct FPcvt FPcvt;
+typedef struct Instr Instr;
+
+struct Instr {	/* a COP1 instruction, broken out and registers converted */
+	int	iw;		/* whole word */
+	uintptr	pc;
+	int	o;		/* opcode or cop1 func code */
+	int	fmt;		/* operand format */
+	int	rm;		/* first operand register */
+	int	rn;		/* second operand register */
+	int	rd;		/* destination register */
+
+	Internal *fm;		/* converted from FREG(ufp, rm) */
+	Internal *fn;
+	char	*dfmt;
+	FPsave	*ufp;		/* fp state, including fp registers */
+	Ureg	*ur;		/* user registers */
+};
+
+struct FP2 {
+	char*	name;
+	void	(*f)(Internal*, Internal*, Internal*);
+};
+
+struct FP1 {
+	char*	name;
+	void	(*f)(Internal*, Internal*);
+};
+
+struct FPcvt {
+	char*	name;
+	void	(*f)(int, int, int, Ureg *, FPsave *);
+};
+
+static	int	roff[32] = {
+	0,       OFR(r1), OFR(r2), OFR(r3),
+	OFR(r4), OFR(r5), OFR(r6), OFR(r7),
+	OFR(r8), OFR(r9), OFR(r10), OFR(r11),
+	OFR(r12), OFR(r13), OFR(r14), OFR(r15),
+	OFR(r16), OFR(r17), OFR(r18), OFR(r19),
+	OFR(r20), OFR(r21), OFR(r22), OFR(r23),
+	OFR(r24), OFR(r25), OFR(r26), OFR(r27),
+	OFR(r28), OFR(sp),  OFR(r30), OFR(r31),
+};
+
+/*
+ * plan 9 assumes F24 initialized to 0.0, F26 to 0.5, F28 to 1.0, F30 to 2.0.
+ */
+enum {
+	FZERO = 24,
+	FHALF = 26,
+};
+static Internal fpconst[Nfpregs] = {		/* indexed by register no. */
+	/* s, e, l, h */
+[FZERO]	{0, 0x1, 0x00000000, 0x00000000},	/* 0.0 */
+[FHALF]	{0, 0x3FE, 0x00000000, 0x08000000},	/* 0.5 */
+[28]	{0, 0x3FF, 0x00000000, 0x08000000},	/* 1.0 */
+[30]	{0, 0x400, 0x00000000, 0x08000000},	/* 2.0 */
+};
+
+static char *fmtnames[] = {
+[MFC1]	"MF",
+[DMFC1]	"DMF",
+[CFC1]	"CF",
+[MTC1]	"MT",
+[DMTC1]	"DMT",
+[CTC1]	"CT",
+[BRANCH]"BR",
+
+[Ffloat]"F",
+[Fdouble]"D",
+[Flong]	"W",
+[Fvlong]"L",
+};
+
+static char *prednames[] = {
+[0]	"F",
+[1]	"UN",
+[2]	"EQ",
+[3]	"UEQ",
+[4]	"OLT",
+[5]	"ULT",
+[6]	"OLE",
+[7]	"ULE",
+[8]	"SF",
+[9]	"NGLE",
+[10]	"SEQ",
+[11]	"NGL",
+[12]	"LT",
+[13]	"NGE",
+[14]	"LE",
+[15]	"NGT",
+};
+
+int fpemudebug = 0;			/* settable via /dev/archctl , someday*/
+
+static ulong dummyr0;
+static QLock watchlock;			/* lock for watch-points */
+
+ulong	branch(Ureg*, ulong);
+int	isbranch(ulong *);
+
+static int	fpimips(ulong, ulong, Ureg *, FPsave *);
+
+char *
+fpemuprint(char *p, char *ep)
+{
+#ifdef FPEMUDEBUG
+	return seprint(p, ep, "fpemudebug %d\n", fpemudebug);
+#else
+	USED(ep);
+	return p;
+#endif
+}
+
+static ulong *
+acpureg(Ureg *ur, int r)
+{
+	r &= REGMASK;
+	if (r == 0 || roff[r] == 0) {
+		dummyr0 = 0;
+		return &dummyr0;
+	}
+	return (ulong *)((char*)ur + roff[r]);
+}
+
+ulong *
+reg(Ureg *ur, int r)		/* for faultmips */
+{
+	return &REG(ur, r);
+}
+
+static void
+_internsane(Internal *i, Ureg *ur)
+{
+	static char buf[ERRMAX];
+
+	USED(i);
+	if (!(DBG(Dbgbasic)))
+		return;
+	if ((unsigned)i->s > 1) {
+		snprint(buf, sizeof buf,
+			"fpuemu: bogus Internal sign at pc=%#p", ur->pc);
+		error(buf);
+	}
+	if ((unsigned)i->e > DoubleExpMax) {
+		snprint(buf, sizeof buf,
+			"fpuemu: bogus Internal exponent at pc=%#p", ur->pc);
+		error(buf);
+	}
+}
+
+/*
+ * mips binary operations (d = n operator m)
+ */
+
+static void
+fadd(Internal *m, Internal *n, Internal *d)
+{
+	(m->s == n->s? fpiadd: fpisub)(m, n, d);
+}
+
+static void
+fsub(Internal *m, Internal *n, Internal *d)
+{
+	m->s ^= 1;
+	(m->s == n->s? fpiadd: fpisub)(m, n, d);
+}
+
+/*
+ * mips unary operations
+ */
+
+static void
+frnd(Internal *m, Internal *d)
+{
+	short e;
+	Internal tmp;
+
+	tmp = fpconst[FHALF];
+	(m->s? fsub: fadd)(&tmp, m, d);
+	if(IsWeird(d))
+		return;
+	fpiround(d);
+	e = (d->e - ExpBias) + 1;
+	if(e <= 0)
+		SetZero(d);
+	else if(e > FractBits){
+		if(e < 2*FractBits)
+			d->l &= ~((1<<(2*FractBits - e))-1);
+	}else{
+		d->l = 0;
+		if(e < FractBits)
+			d->h &= ~((1<<(FractBits-e))-1);
+	}
+}
+
+/* debugging: print internal representation of an fp reg */
+static void
+_intpr(Internal *i, int reg, int fmt, FPsave *ufp)
+{
+	USED(i);
+	if (!(DBG(Dbgregs)))
+		return;
+	if (fmt == Fdouble && reg < 31)
+		iprint("\tD%02d: l %08lux h %08lux =\ts %d e %04d h %08lux l %08lux\n",
+			reg, FREG(ufp, reg), FREG(ufp, reg+1),
+			i->s, i->e, i->h, i->l);
+	else
+		iprint("\tF%02d: %08lux =\ts %d e %04d h %08lux l %08lux\n",
+			reg, FREG(ufp, reg),
+			i->s, i->e, i->h, i->l);
+	delay(75);
+}
+
+/* little-endian, swapped the l and h */
+static void
+dreg2dbl(Double *dp, int reg, FPsave *ufp)
+{
+	reg &= ~1;
+	dp->h = FREG(ufp, reg);
+	dp->l = FREG(ufp, reg+1);
+}
+
+static void
+dbl2dreg(int reg, Double *dp, FPsave *ufp)
+{
+	reg &= ~1;
+	FREG(ufp, reg)   = dp->h;
+	FREG(ufp, reg+1) = dp->l;
+}
+
+static void
+vreg2dbl(Double *dp, int reg, FPsave *ufp)
+{
+	reg &= ~1;
+	dp->h = FREG(ufp, reg+1);
+	dp->l = FREG(ufp, reg);
+}
+
+static void
+dbl2vreg(int reg, Double *dp, FPsave *ufp)
+{
+	reg &= ~1;
+	FREG(ufp, reg+1) = dp->h;
+	FREG(ufp, reg)   = dp->l;
+}
+
+/* convert fmt (rm) to double (rd) */
+static void
+fcvtd(int fmt, int rm, int rd, Ureg *ur, FPsave *ufp)
+{
+	Double d;
+	Internal intrn;
+
+	switch (fmt) {
+	case Ffloat:
+		fpis2i(&intrn, &FREG(ufp, rm));
+		internsane(&intrn, ur);
+		fpii2d(&d, &intrn);
+		break;
+	case Fdouble:
+		dreg2dbl(&d, rm, ufp);
+		break;
+	case Flong:
+		fpiw2i(&intrn, &FREG(ufp, rm));
+		internsane(&intrn, ur);
+		fpii2d(&d, &intrn);
+		break;
+	case Fvlong:
+		vreg2dbl(&d, rm, ufp);
+		fpiv2i(&intrn, &d);
+		internsane(&intrn, ur);
+		fpii2d(&d, &intrn);
+		break;
+	}
+	dbl2dreg(rd, &d, ufp);
+	if (fmt != Fdouble && DBG(Dbgregs))
+		intpr(&intrn, rm, Fdouble, ufp);
+}
+
+/* convert fmt (rm) to single (rd) */
+static void
+fcvts(int fmt, int rm, int rd, Ureg *ur, FPsave *ufp)
+{
+	Double d;
+	Internal intrn;
+
+	switch (fmt) {
+	case Ffloat:
+		FREG(ufp, rd) = FREG(ufp, rm);
+		break;
+	case Fdouble:
+		dreg2dbl(&d, rm, ufp);
+		fpid2i(&intrn, &d);
+		break;
+	case Flong:
+		fpiw2i(&intrn, &FREG(ufp, rm));
+		break;
+	case Fvlong:
+		vreg2dbl(&d, rm, ufp);
+		fpiv2i(&intrn, &d);
+		break;
+	}
+	if (fmt != Ffloat) {
+		if(DBG(Dbgregs))
+			intpr(&intrn, rm, Ffloat, ufp);
+		internsane(&intrn, ur);
+		fpii2s(&FREG(ufp, rd), &intrn);
+	}
+}
+
+/* convert fmt (rm) to long (rd) */
+static void
+fcvtw(int fmt, int rm, int rd, Ureg *ur, FPsave *ufp)
+{
+	Double d;
+	Internal intrn;
+
+	switch (fmt) {
+	case Ffloat:
+		fpis2i(&intrn, &FREG(ufp, rm));
+		break;
+	case Fdouble:
+		dreg2dbl(&d, rm, ufp);
+		fpid2i(&intrn, &d);
+		break;
+	case Flong:
+		FREG(ufp, rd) = FREG(ufp, rm);
+		break;
+	case Fvlong:
+		vreg2dbl(&d, rm, ufp);
+		fpiv2i(&intrn, &d);
+		break;
+	}
+	if (fmt != Flong) {
+		if(DBG(Dbgregs))
+			intpr(&intrn, rm, Flong, ufp);
+		internsane(&intrn, ur);
+		fpii2w((long *)&FREG(ufp, rd), &intrn);
+	}
+}
+
+/* convert fmt (rm) to vlong (rd) */
+static void
+fcvtv(int fmt, int rm, int rd, Ureg *ur, FPsave *ufp)
+{
+	Double d;
+	Internal intrn;
+
+	switch (fmt) {
+	case Ffloat:
+		fpis2i(&intrn, &FREG(ufp, rm));
+		break;
+	case Fdouble:
+		dreg2dbl(&d, rm, ufp);
+		fpid2i(&intrn, &d);
+		break;
+	case Flong:
+		fpiw2i(&intrn, &FREG(ufp, rm));
+		break;
+	case Fvlong:
+		vreg2dbl(&d, rm, ufp);
+		dbl2vreg(rd, &d, ufp);
+		break;
+	}
+	if (fmt != Fvlong) {
+		if(DBG(Dbgregs))
+			intpr(&intrn, rm, Fvlong, ufp);
+		internsane(&intrn, ur);
+		fpii2v((vlong *)&FREG(ufp, rd), &intrn);
+	}
+}
+
+/*
+ * MIPS function codes
+ */
+
+static	FP2	optab2[] = {	/* Fd := Fn OP Fm (binary) */
+[0]	{"ADDF",	fadd},	/* can ignore fmt, just use doubles */
+[1]	{"SUBF",	fsub},
+[2]	{"MULF",	fpimul},
+[3]	{"DIVF",	fpidiv},
+};
+
+static	FP1	optab1[32] = {	/* Fd := OP Fm (unary) */
+[4]	{"SQTF",	/*fsqt*/0},
+[5]	{"ABSF",	/*fabsf*/0},	/* inline in unaryemu... */
+[6]	{"MOVF",	/*fmov*/0},
+[7]	{"NEGF",	/*fmovn*/0},
+[8]	{"ROUND.L",	/*froundl*/0},	/* 64-bit integer results ... */
+[9]	{"TRUNC.L",	/*ftruncl*/0},
+[10]	{"CEIL.L",	/*fceill*/0},
+[11]	{"FLOOR.L",	/*ffloorl*/0},
+[12]	{"ROUND.W",	frnd},		/* 32-bit integer results ... */
+[13]	{"TRUNC.W",	/*ftrunc*/0},
+[14]	{"CEIL.W",	/*fceil*/0},
+[15]	{"FLOOR.W",	/*ffloor*/0},
+/* 17—19 are newish MIPS32/64 conditional moves */
+/* 21, 22, 28—31 are newish reciprocal or sqrt */
+};
+
+static	FPcvt	optabcvt[] = {	/* Fd := OP(fmt, Fm) (unary) */
+[32]	{"CVT.S",	fcvts},		/* must honour fmt as src format */
+[33]	{"CVT.D",	fcvtd},
+[36]	{"CVT.W",	fcvtw},
+[37]	{"CVT.L",	fcvtv},
+};
+
+/*
+ * No type conversion is implied and the type of the cpu register is
+ * unknown, so copy the bits into reg.
+ * Later instructions will have to know the correct type and use the
+ * right format specifier to convert to or from Internal FP.
+ */
+static void
+fld(int d, ulong ea, int n, FPsave *ufp)
+{
+	if(DBG(Dbgmoves))
+		iprint("MOV%c #%lux, F%d\n", n==8? 'D': 'F', ea, d);
+	if (n == 4)
+		memmove(&FREG(ufp, d), (void *)ea, 4);
+	else if (n == 8){
+		d &= ~1;
+		/* NB: we swap order of the words */
+//		memmove(&FREG(ufp, d), (void *)(ea+4), 4);
+//		memmove(&FREG(ufp, d+1), (void *)ea, 4);
+		/* ad9, swapped back for little-endian */
+		memmove(&FREG(ufp, d), (void *)ea, 4);
+		memmove(&FREG(ufp, d+1), (void *)(ea+4), 4);
+	} else
+		panic("fld: n (%d) not 4 nor 8", n);
+}
+
+static void
+fst(ulong ea, int s, int n, FPsave *ufp)
+{
+	if(DBG(Dbgmoves))
+		iprint("MOV%c	F%d,#%lux\n", n==8? 'D': 'F', s, ea);
+	if (n == 4)
+		memmove((void *)ea, &FREG(ufp, s), 4);
+	else if (n == 8){
+		s &= ~1;
+		/* NB: we swap order of the words */
+//		memmove((void *)(ea+4), &FREG(ufp, s), 4);
+//		memmove((void *)ea, &FREG(ufp, s+1), 4);
+		/* ad9, swapped back for little-endian */
+		memmove((void *)ea, &FREG(ufp, s), 4);
+		memmove((void *)(ea+4), &FREG(ufp, s+1), 4);
+	} else
+		panic("fst: n (%d) not 4 nor 8", n);
+}
+
+void
+unimp(ulong pc, ulong op, char *msg)
+{
+	char buf[120];
+
+	snprint(buf, sizeof(buf), "sys: fp: pc=%#lux unimp fp %#.8lux: %s",
+		pc, op, msg);
+	if(DBG(Dbgbasic))
+		iprint("FPE: %s\n", buf);
+	error(buf);
+	/* no return */
+}
+
+static int
+isfpop(ulong iw)
+{
+	switch (OP(iw)) {
+	case COP1:
+	case LWC1:
+	case LDC1:
+	case SWC1:
+	case SDC1:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static int
+ldst(ulong op, Ureg *ur, FPsave *ufp)
+{
+	int rn, rd, o, size, wr;
+	short off;
+	ulong ea;
+
+	/* we're using the COP1 macros, but the fields have diff'nt meanings */
+	o = OP(op);
+	rn = FMT(op);
+	off = op;
+	ea = REG(ur, rn) + off;
+	rd = REGT(op);
+//iprint("fpemu: ld/st (F%d)=%#lux + %d => ea %#lux\n", rn, REG(ur, rn), off, ea);
+
+	size = 4;
+	if (o == LDC1 || o == SDC1)
+		size = 8;
+	wr = (o == SWC1 || o == SDC1);
+	validaddr(ea, size, wr);
+
+	switch (o) {
+	case LWC1:	/* load an fp register, rd, from memory */
+	case LDC1:	/* load an fp register pair, (rd, rd+1), from memory */
+		fld(rd, ea, size, ufp);
+		break;
+	case SWC1:	/* store an fp register, rd, into memory */
+	case SDC1:	/* store an fp register pair, (rd, rd+1), into memory */
+		fst(ea, rd, size, ufp);
+		break;
+	default:
+		unimp(ur->pc, op, "unknown non-COP1 load or store");
+		return Failed;
+	}
+	return Advpc;
+}
+
+static int
+cop1mov(Instr *ip)
+{
+	int fs, rt;
+	uvlong vl;
+	FPsave *ufp;
+	Ureg *ur;
+
+	fs = ip->rm;		/* F(s) aka rm */
+	rt = ip->rn;		/* R(t) aka rn */
+	ur = ip->ur;
+	ufp = ip->ufp;
+//iprint("fpemu: cop1 prob ld/st (R%d)=%#lux FREG%d\n", rt, REG(ip->ur, rt), fs);
+
+	/* MIPS fp register pairs are in little-endian order: low word first */
+	switch (ip->fmt) {
+	case MTC1:
+		/* load an fp register, F(s), from cpu register R(t) */
+		fld(fs, (uintptr)&REG(ur, rt), 4, ufp);
+		return Advpc;
+	case DMTC1:
+		/*
+		 * load an fp register pair, (F(s), F(s+1)),
+		 * from cpu registers (rt, rt+1)
+		 */
+		iprint("fpemu: 64-bit DMTC1 may have words backward\n");
+		rt &= ~1;
+		vl = (uvlong)REG(ur, rt+1) << 32 | REG(ur, rt);
+		fld(fs & ~1, (uintptr)&vl, 8, ufp);
+		return Advpc;
+	case MFC1:
+		/* store an fp register, fs, into a cpu register rt */
+		fst((uintptr)&REG(ur, rt), fs, 4, ufp);
+		return Advpc;
+	case DMFC1:
+		/*
+		 * store an fp register pair, (F(s), F(s+1)),
+		 * into cpu registers (rt, rt+1)
+		 */
+		iprint("fpemu: 64-bit DMFC1 may have words backward\n");
+		fst((uintptr)&vl, fs & ~1, 8, ufp);
+		rt &= ~1;
+		REG(ur, rt) = (ulong)vl;
+		REG(ur, rt+1) = vl>>32;
+		return Advpc;
+	case CFC1:
+		switch (fs) {
+		case Fpimp:			/* MOVW FCR0,Rn */
+			REG(ur, rt) = 0x500;	/* claim to be r4k */
+			break;
+		case Fpcsr:
+			REG(ur, rt) = ufp->fpcontrol;
+			break;
+		}
+		if(DBG(Dbgbasic))
+			iprint("MOVW	FCR%d, R%d\n", fs, rt);
+		return Advpc;
+	case CTC1:
+		switch (fs) {
+		case Fpcsr:
+			ufp->fpcontrol = REG(ur, rt);
+			break;
+		}
+		if(DBG(Dbgbasic))
+			iprint("MOVW	R%d, FCR%d\n", rt, fs);
+		return Advpc;
+	}
+	return Nomatch;			/* not a load or store; keep looking */
+}
+
+static char *
+decodefmt(int fmt)
+{
+	if (fmtnames[fmt])
+		return fmtnames[fmt];
+	else
+		return "GOK";
+}
+
+static char *
+predname(int pred)			/* predicate name */
+{
+	if (prednames[pred])
+		return prednames[pred];
+	else
+		return "GOK";
+}
+
+static int
+fcmpf(Internal m, Internal n, int, int cond)
+{
+	int i;
+
+	if(IsWeird(&m) || IsWeird(&n)){
+		/* BUG: should trap if not masked */
+		return 0;
+	}
+	fpiround(&n);
+	fpiround(&m);
+	i = fpicmp(&m, &n);		/* returns -1, 0, or 1 */
+	switch (cond) {
+	case 0:			/* F - false */
+	case 1:			/* UN - unordered */
+		return 0;
+	case 2:			/* EQ */
+	case 3:			/* UEQ */
+		return i == 0;
+	case 4:			/* OLT */
+	case 5:			/* ULT */
+		return i < 0;
+	case 6:			/* OLE */
+	case 7:			/* ULE */
+		return i <= 0;
+	case 8:			/* SF */
+	case 9:			/* NGLE - not >, < or = */
+		return 0;
+	case 10:		/* SEQ */
+		return i == 0;
+	case 11:		/* NGL */
+		return i != 0;
+	case 12:		/* LT */
+	case 13:		/* NGE */
+		return i < 0;
+	case 14:		/* LE */
+	case 15:		/* NGT */
+		return i <= 0;
+	}
+	return 0;
+}
+
+/*
+ * assuming that ur->pc points to a branch instruction,
+ * change it to point to the branch's target and return it.
+ */
+static uintptr
+followbr(Ureg *ur)
+{
+	uintptr npc;
+
+	npc = branch(ur, up->fpsave->fpstatus);
+	if(npc == 0)
+		panic("fpemu: branch expected but not seen at %#p", ur->pc);
+	ur->pc = npc;
+	return npc;
+}
+
+/* emulate COP1 instruction in branch delay slot */
+static void
+dsemu(Instr *ip, ulong dsinsn, Ureg *ur, FPsave *ufp)
+{
+	uintptr npc;
+
+	npc = ur->pc;		/* save ur->pc since fpemu will change it */
+	if(DBG(Dbgdelay))
+		iprint(">>> emulating br delay slot\n");
+
+	fpimips(ip->pc + 4, dsinsn, ur, ufp);
+
+	if(DBG(Dbgdelay))
+		iprint("<<< done emulating br delay slot\n");
+	ur->pc = npc;
+}
+
+/*
+ * execute non-COP1 instruction in branch delay slot, in user mode with
+ * user registers, then trap so we can finish up and take the branch.
+ */
+static void
+dsexec(Instr *ip, Ureg *ur, FPsave *ufp)
+{
+	ulong dsaddr, wpaddr;
+	Tos *tos;
+
+	/*
+	 * copy delay slot, EHB, EHB, EHB to tos->kscr, flush caches,
+	 * point pc there, set watch point on tos->kscr[2], return.
+	 * this is safe since we've already checked for branches (and FP
+	 * instructions) in the delay slot, so the instruction can be
+	 * executed at any address.
+	 */
+	dsaddr = ip->pc + 4;
+	tos = (Tos*)(USTKTOP-sizeof(Tos));
+	tos->kscr[0] = *(ulong *)dsaddr;
+	tos->kscr[1] = 0xc0;		/* EHB; we could use some trap instead */
+	tos->kscr[2] = 0xc0;			/* EHB */
+	tos->kscr[3] = 0xc0;			/* EHB */
+	dcflush(tos->kscr, sizeof tos->kscr);
+	icflush(tos->kscr, sizeof tos->kscr);
+
+	wpaddr = (ulong)&tos->kscr[2] & ~7;	/* clear I/R/W bits */
+	ufp->fpdelayexec = 1;
+	ufp->fpdelaypc = ip->pc;		/* remember branch ip->pc */
+	ufp->fpdelaysts = ufp->fpstatus;	/* remember state of FPCOND */
+	ur->pc = (ulong)tos->kscr;		/* restart in tos */
+	qlock(&watchlock);			/* wait for first watchpoint */
+	setwatchlo0(wpaddr | 1<<2);	/* doubleword addr(!); i-fetches only */
+	setwatchhi0(TLBPID(tlbvirt())<<16);	/* asid; see mmu.c */
+	if (DBG(Dbgdelay))
+		iprint("fpemu: set %s watch point at %#lux, after br ds %#lux...",
+			up->text, wpaddr, *(ulong *)dsaddr);
+	/* return to user mode, await fpwatch() trap */
+}
+
+void
+fpwatch(Ureg *ur)			/* called on watch-point trap */
+{
+	FPsave *ufp;
+
+	ufp = up->fpsave;
+	if(ufp->fpdelayexec == 0)
+		panic("fpwatch: unexpected watch trap");
+
+	/* assume we got here after branch-delay-slot execution */
+	ufp->fpdelayexec = 0;
+	setwatchlo0(0);
+	setwatchhi0(0);
+	qunlock(&watchlock);
+
+	ur->pc = ufp->fpdelaypc;	/* pc of fp branch */
+	ur->cause &= BD;		/* take no chances */
+	ufp->fpstatus = ufp->fpdelaysts;
+	followbr(ur);			/* sets ur->pc to fp branch target */
+	if (DBG(Dbgdelay))
+		iprint("delay slot executed; resuming at %#lux\n", ur->pc);
+}
+
+static ulong
+validiw(uintptr pc)
+{
+	validaddr(pc, 4, 0);
+	return *(ulong*)pc;
+}
+
+/*
+ * COP1 (6) | BRANCH (5) | cc (3) | likely | true | offset(16)
+ *	cc = ip->rn >> 2;			// assume cc == 0
+ */
+static int
+bremu(Instr *ip)
+{
+	int off, taken;
+	ulong dsinsn;
+	FPsave *ufp;
+	Ureg *ur;
+
+	if (ip->iw & (1<<17))
+		error("fpuemu: `likely' fp branch (obs)");
+	ufp = ip->ufp;
+	if (ufp->fpstatus & FPCOND)
+		taken = ip->iw & (1<<16);	/* taken iff BCT */
+	else
+		taken = !(ip->iw & (1<<16));	/* taken iff BCF */
+	dsinsn = validiw(ip->pc + 4);		/* delay slot addressible? */
+	if(DBG(Dbgdelay)){
+		off = (short)(ip->iw & MASK(16));
+		iprint("BFP%c\t%d(PC): %staken\n", (ip->iw & (1<<16)? 'T': 'F'),
+			off, taken? "": "not ");
+		iprint("\tdelay slot: %08lux\n", dsinsn);
+		delay(75);
+	}
+	ur = ip->ur;
+	assert(ur->pc == ip->pc);
+	if(!taken)
+		return Advpc;	/* didn't branch, so return to delay slot */
+
+	/*
+	 * fp branch taken; emulate or execute the delay slot, then jump.
+	 */
+	if(dsinsn == NOP || dsinsn == MIPSNOP){
+		;				/* delay slot does nothing */
+	}else if(isbranch((ulong *)(ip->pc + 4)))
+		error("fpuemu: branch in fp branch delay slot");
+	else if (isfpop(dsinsn))
+		dsemu(ip, dsinsn, ur, ufp);	/* emulate delay slot */
+	else{
+		/*
+		 * The hard case: we need to execute the delay slot
+		 * in user mode with user registers.  Set a watch point,
+		 * return to user mode, await fpwatch() trap.
+		 */
+		dsexec(ip, ur, ufp);
+		return Leavepcret;
+	}
+	followbr(ur);
+	return Leavepc;
+}
+
+/* interpret fp reg as fmt (float or double) and convert to Internal */
+static void
+reg2intern(Internal *i, int reg, int fmt, Ureg *ur)
+{
+	Double d;
+	FPsave *ufp;
+
+	/* we may see other fmt types on conversion or unary ops; ignore */
+	ufp = up->fpsave;
+	switch (fmt) {
+	case Ffloat:
+		fpis2i(i, &FREG(ufp, reg));
+		internsane(i, ur);
+		break;
+	case Fdouble:
+		dreg2dbl(&d, reg, ufp);
+		fpid2i(i, &d);
+		internsane(i, ur);
+		break;
+	default:
+		SetQNaN(i);		/* cause trouble if we try to use i */
+		break;
+	}
+}
+
+/* convert Internal to fp reg as fmt (float or double) */
+static void
+intern2reg(int reg, Internal *i, int fmt, Ureg *ur)
+{
+	Double d;
+	FPsave *ufp;
+	Internal tmp;
+
+	ufp = up->fpsave;
+	tmp = *i;		/* make a disposable copy */
+	internsane(&tmp, ur);
+	switch (fmt) {
+	case Ffloat:
+		fpii2s(&FREG(ufp, reg), &tmp);
+		break;
+	case Fdouble:
+		fpii2d(&d, &tmp);
+		dbl2dreg(reg, &d, ufp);
+		break;
+	default:
+		panic("intern2reg: bad fmt %d", fmt);
+	}
+}
+
+/*
+ * comparisons - encoded slightly differently than arithmetic:
+ * COP1 (6) | fmt(5) | ft (5) | fs (5) | # same
+ *	cc (3) | 0 | A=0 |		# diff, was REGD
+ *	FC=11 | cond (4)		# FUNC
+ */
+static int
+cmpemu(Instr *ip)
+{
+	int cc, cond;
+
+	cc = ip->rd >> 2;
+	cond = ip->o & MASK(4);
+	reg2intern(ip->fn, ip->rn, ip->fmt, ip->ur);
+	/* fpicmp args are swapped, so this is `n compare m' */
+	if (fcmpf(*ip->fm, *ip->fn, cc, cond))
+		ip->ufp->fpstatus |= FPCOND;
+	else
+		ip->ufp->fpstatus &= ~FPCOND;
+	if(DBG(Dbgbasic))
+		iprint("CMP%s.%s	F%d,F%d =%d\n", predname(cond), ip->dfmt,
+			ip->rm, ip->rn, (ip->ufp->fpstatus & FPCOND? 1: 0));
+	if(DBG(Dbgregs)) {
+		intpr(ip->fm, ip->rm, ip->fmt, ip->ufp);
+		intpr(ip->fn, ip->rn, ip->fmt, ip->ufp);
+		delay(75);
+	}
+	return Advpc;
+}
+
+static int
+binemu(Instr *ip)
+{
+	FP2 *fp;
+	Internal fd, prfd;
+	Internal *fn;
+
+	fp = &optab2[ip->o];
+	if(fp->f == nil)
+		unimp(ip->pc, ip->iw, "missing binary op");
+
+	/* convert the second operand */
+	fn = ip->fn;
+	reg2intern(fn, ip->rn, ip->fmt, ip->ur);
+	if(DBG(Dbgregs))
+		intpr(fn, ip->rn, ip->fmt, ip->ufp);
+
+	if(DBG(Dbgbasic)){
+		iprint("%s.%s\tF%d,F%d,F%d\n", fp->name, ip->dfmt,
+			ip->rm, ip->rn, ip->rd);
+		delay(75);
+	}
+	/*
+	 * fn and fm are scratch Internals just for this instruction,
+	 * so it's okay to let the fpi routines trash them in the course
+	 * of operation.
+	 */
+	/* NB: fpi routines take m and n (s and t) in reverse order */
+	(*fp->f)(fn, ip->fm, &fd);
+
+	/* convert the result */
+	if(DBG(Dbgregs))
+		prfd = fd;			/* intern2reg modifies fd */
+	intern2reg(ip->rd, &fd, ip->fmt, ip->ur);
+	if(DBG(Dbgregs))
+		intpr(&prfd, ip->rd, ip->fmt, ip->ufp);
+	return Advpc;
+}
+
+static int
+unaryemu(Instr *ip)
+{
+	int o;
+	FP1 *fp;
+	FPsave *ufp;
+
+	o = ip->o;
+	fp = &optab1[o];
+	if(DBG(Dbgbasic)){
+		iprint("%s.%s\tF%d,F%d\n", fp->name, ip->dfmt, ip->rm, ip->rd);
+		delay(75);
+	}
+	if(o == 6){			/* MOV */
+		int rm, rd;
+
+		ufp = ip->ufp;
+		rd = ip->rd;
+		rm = ip->rm;
+		if(ip->fmt == Fdouble){
+			rd &= ~1;
+			rm &= ~1;
+			FREG(ufp, rd+1) = FREG(ufp, rm+1);
+		}
+		FREG(ufp, rd) = FREG(ufp, rm);
+	}else{
+		Internal fdint, prfd;
+		Internal *fd;
+
+		switch(o){
+		case 5:			/* ABS */
+			fd = ip->fm;	/* use src Internal as dest */
+			fd->s = 0;
+			break;
+		case 7:			/* NEG */
+			fd = ip->fm;	/* use src Internal as dest */
+			fd->s ^= 1;
+			break;
+		default:
+			if(fp->f == nil)
+				unimp(ip->pc, ip->iw, "missing unary op");
+			fd = &fdint;
+			(*fp->f)(ip->fm, fd);
+			break;
+		}
+		if(DBG(Dbgregs))
+			prfd = *fd;		/* intern2reg modifies fd */
+		intern2reg(ip->rd, fd, ip->fmt, ip->ur);
+		if(DBG(Dbgregs))
+			intpr(&prfd, ip->rd, ip->fmt, ip->ufp);
+	}
+	return Advpc;
+}
+
+static int
+cvtemu(Instr *ip)
+{
+	FPcvt *fp;
+
+	fp = &optabcvt[ip->o];
+	if(fp->f == nil)
+		unimp(ip->pc, ip->iw, "missing conversion op");
+	if(DBG(Dbgbasic)){
+		iprint("%s.%s\tF%d,F%d\n", fp->name, ip->dfmt, ip->rm, ip->rd);
+		delay(75);
+	}
+	(*fp->f)(ip->fmt, ip->rm, ip->rd, ip->ur, ip->ufp);
+	return Advpc;
+}
+
+static void
+cop1decode(Instr *ip, ulong iw, ulong pc, Ureg *ur, FPsave *ufp,
+	Internal *imp, Internal *inp)
+{
+	ip->iw = iw;
+	ip->pc = pc;
+	ip->ur = ur;
+	ip->ufp = ufp;
+	ip->fmt = FMT(iw);
+	ip->rm = REGS(iw);		/* 1st operand */
+	ip->rn = REGT(iw);		/* 2nd operand (ignored by unary ops) */
+	ip->rd = REGD(iw);		/* destination */
+	ip->o = FUNC(iw);
+	ip->fm = imp;
+	ip->fn = inp;
+	if (DBG(Dbgbasic))
+		ip->dfmt = decodefmt(ip->fmt);
+}
+
+void
+fpstuck(uintptr pc, FPsave *fp)
+{
+	USED(pc);
+	if(!(DBG(Dbgbasic)))
+		return;
+	if (fp->fppc == pc) {
+		fp->fpcnt++;
+		if (fp->fpcnt > 4)
+			panic("fpuemu: cpu%d stuck at pid %ld %s pc %#p "
+				"instr %#8.8lux", m->machno, up->pid, up->text,
+				pc, *(ulong *)pc);
+	} else {
+		fp->fppc = pc;
+		fp->fpcnt = 0;
+	}
+}
+
+static void
+_dbgstuck(ulong pc, Ureg *ur, FPsave *ufp)
+{
+	fpstuck(pc, ufp);
+	if (DBG(Dbgdelay) && ur->cause & BD)
+		iprint("fpuemu: FP in a branch delay slot\n");
+}
+
+/* decode the opcode and call common emulation code */
+static int
+fpimips(ulong pc, ulong op, Ureg *ur, FPsave *ufp)
+{
+	int r, o;
+	Instr insn;
+	Instr *ip;
+	Internal im, in;
+
+	/* note: would update fault status here if we noted numeric exceptions */
+	dummyr0 = 0;
+	switch (OP(op)) {
+	case LWC1:
+	case LDC1:
+	case SWC1:
+	case SDC1:
+		dbgstuck(pc, ur, ufp);
+		return ldst(op, ur, ufp);
+	default:
+		unimp(pc, op, "non-FP instruction");
+		return Failed;
+	case COP1:
+		dbgstuck(pc, ur, ufp);
+		break;
+	}
+
+	ip = &insn;
+	cop1decode(ip, op, pc, ur, ufp, &im, &in);
+	if (ip->fmt == BRANCH) {		/* FP conditional branch? */
+		r = bremu(ip);
+		if(DBG(Dbgdelay)){
+			iprint("resuming after br, at %#lux", ur->pc);
+			if (r == Leavepcret)
+				iprint("...");	/* we'll be right back */
+			else
+				iprint("\n");
+		}
+		return r;
+	}
+	o = ip->o;
+	if (o == 0 && ip->rd == 0) {	/* *[TF]C1 load or store? */
+		r = cop1mov(ip);
+		if (r != Nomatch)
+			return r;
+		/* else wasn't a [tf]c1 move */
+	}
+	/* don't decode & print rm yet; it might be an integer */
+	if(o >= 32 && o < 40)		/* conversion? */
+		return cvtemu(ip);
+
+	/* decode the mandatory operand, rm */
+	reg2intern(ip->fm, ip->rm, ip->fmt, ip->ur);
+	if(DBG(Dbgregs))
+		intpr(&im, ip->rm, ip->fmt, ip->ufp);
+
+	/*
+	 * arithmetic
+	 * all operands must be of the same format
+	 */
+	if(o >= 4 && o < 32)		/* monadic */
+		return unaryemu(ip);
+	if(o < 4)			/* the few binary ops */
+		return binemu(ip);
+
+	if(o >= 48 && (ip->rd & MASK(2)) == 0)	/* comparison? */
+		return cmpemu(ip);
+
+	/* don't recognise the opcode */
+	if(DBG(Dbgbasic))
+		iprint("fp at %#lux: %#8.8lux BOGON\n", pc, op);
+	unimp(pc, op, "unknown opcode");
+	return Failed;
+}
+
+static FPsave *
+fpinit(Ureg *ur)
+{
+	int i, n;
+	Double d;
+	FPsave *ufp;
+	Internal tmp;
+
+	/*
+	 * because all the emulated fp state is in the proc structure,
+	 * it need not be saved/restored
+	 */
+	ufp = up->fpsave;
+	switch(up->fpstate){
+	case FPactive:
+	case FPinactive:
+		error("fpu (in)active but fp is emulated");
+	case FPinit:
+		up->fpstate = FPemu;
+		ufp->fpcontrol = 0;
+		ufp->fpstatus = 0;
+		ufp->fpcnt = 0;
+		ufp->fppc = 0;
+		for(n = 0; n < Nfpregs-1; n += 2) {
+			if (fpconst[n].h == 0)	/* uninitialised consts */
+				i = FZERO;	/* treated as 0.0 */
+			else
+				i = n;
+			tmp = fpconst[i];
+			internsane(&tmp, ur);
+			fpii2d(&d, &tmp);
+			dbl2dreg(n, &d, ufp);
+		}
+		break;
+	}
+	return ufp;
+}
+
+/*
+ * called from trap.c's CCPU case, only to deal with user-mode
+ * instruction faults.  
+ *
+ * libc/mips/lock.c reads FCR0 to determine what kind of system
+ * this is (and thus if it can use LL/SC or must use some
+ * system-dependent method).  So we simulate the move from FCR0.
+ * All modern mips have LL/SC, so just claim to be an r4k.
+ */
+int
+fpuemu(Ureg *ureg)
+{
+	int s;
+	uintptr pc;
+	ulong iw, r;
+
+	if(waserror()){
+		postnote(up, 1, up->errstr, NDebug);
+		return -1;
+	}
+
+	if(up->fpstate & FPillegal)
+		error("floating point in note handler");
+	if(up->fpsave->fpdelayexec)
+		panic("fpuemu: entered with outstanding watch trap");
+
+	pc = ureg->pc;
+	validaddr(pc, 4, 0);
+	/* only the first instruction can be in a branch delay slot */
+	if(ureg->cause & BD) {
+		pc += 4;
+		validaddr(pc, 4, 0);		/* check branch delay slot */
+	}
+	iw = *(ulong*)pc;
+	do {
+		/* recognise & optimise a common case */
+		if (iw == 0x44410000){		/* MOVW FCR0,R1 (CFC1) */
+			ureg->r1 = 0x500;	/* claim an r4k */
+			r = Advpc;
+			if (DBG(Dbgbasic))
+				iprint("faked MOVW FCR0,R1\n");
+		}else{
+			s = spllo();
+			if(waserror()){
+				splx(s);
+				nexterror();
+			}
+			r = fpimips(pc, iw, ureg, fpinit(ureg));
+			splx(s);
+			poperror();
+			if (r == Failed || r == Leavepcret)
+				break;
+		}
+		if (r == Advpc)	/* simulation succeeded, advance the pc? */
+			if(ureg->cause & BD)
+				followbr(ureg);
+			else
+				ureg->pc += 4;
+		ureg->cause &= ~BD;
+
+		pc = ureg->pc;
+		iw = validiw(pc);
+		while (iw == NOP || iw == MIPSNOP) {	/* skip NOPs */
+			pc += 4;
+			ureg->pc = pc;
+			iw = validiw(pc);
+		}
+		/* is next ins'n also FP? */
+	} while (isfpop(iw));
+	if (r == Failed){
+		iprint("fpuemu: fp emulation failed for %#lux"
+			" at pc %#p in %lud %s\n",
+			iw, ureg->pc, up->pid, up->text);
+		unimp(ureg->pc, iw, "no fp instruction");
+		/* no return */
+	}
+	ureg->cause &= ~BD;
+	poperror();
+	return 0;
+}
+
+int
+isbranch(ulong *pc)
+{
+	ulong iw;
+
+	iw = *(ulong*)pc;
+	/*
+	 * Integer unit jumps first
+	 */
+	switch(iw>>26){
+	case 0:			/* SPECIAL: JR or JALR */
+		switch(iw&0x3F){
+		case 0x09:	/* JALR */
+		case 0x08:	/* JR */
+			return 1;
+		default:
+			return 0;
+		}
+	case 1:			/* BCOND */
+		switch((iw>>16) & 0x1F){
+		case 0x10:	/* BLTZAL */
+		case 0x00:	/* BLTZ */
+		case 0x11:	/* BGEZAL */
+		case 0x01:	/* BGEZ */
+			return 1;
+		default:
+			return 0;
+		}
+	case 3:			/* JAL */
+	case 2:			/* JMP */
+	case 4:			/* BEQ */
+	case 5:			/* BNE */
+	case 6:			/* BLEZ */
+	case 7:			/* BGTZ */
+		return 1;
+	}
+	/*
+	 * Floating point unit jumps
+	 */
+	if((iw>>26) == COP1)
+		switch((iw>>16) & 0x3C1){
+		case 0x101:	/* BCT */
+		case 0x181:	/* BCT */
+		case 0x100:	/* BCF */
+		case 0x180:	/* BCF */
+			return 1;
+		}
+	return 0;
+}
+
+/*
+ * if current instruction is a (taken) branch, return new pc and,
+ * for jump-and-links, set r31.
+ */
+ulong
+branch(Ureg *ur, ulong fcr31)
+{
+	ulong iw, npc, rs, rt, rd, offset, targ, next;
+
+	iw = ur->pc;
+	iw = *(ulong*)iw;
+	rs = (iw>>21) & 0x1F;
+	if(rs)
+		rs = REG(ur, rs);
+	rt = (iw>>16) & 0x1F;
+	if(rt)
+		rt = REG(ur, rt);
+	offset = iw & ((1<<16)-1);
+	if(offset & (1<<15))	/* sign extend */
+		offset |= ~((1<<16)-1);
+	offset <<= 2;
+	targ = ur->pc + 4 + offset;	/* branch target */
+	/* ins'n after delay slot (assumes delay slot has already been exec'd) */
+	next = ur->pc + 8;
+	/*
+	 * Integer unit jumps first
+	 */
+	switch(iw>>26){
+	case 0:			/* SPECIAL: JR or JALR */
+		switch(iw&0x3F){
+		case 0x09:	/* JALR */
+			rd = (iw>>11) & 0x1F;
+			if(rd)
+				REG(ur, rd) = next;
+			/* fall through */
+		case 0x08:	/* JR */
+			return rs;
+		default:
+			return 0;
+		}
+	case 1:			/* BCOND */
+		switch((iw>>16) & 0x1F){
+		case 0x10:	/* BLTZAL */
+			ur->r31 = next;
+			/* fall through */
+		case 0x00:	/* BLTZ */
+			if((long)rs < 0)
+				return targ;
+			return next;
+		case 0x11:	/* BGEZAL */
+			ur->r31 = next;
+			/* fall through */
+		case 0x01:	/* BGEZ */
+			if((long)rs >= 0)
+				return targ;
+			return next;
+		default:
+			return 0;
+		}
+	case 3:			/* JAL */
+		ur->r31 = next;
+		/* fall through */
+	case 2:			/* JMP */
+		npc = iw & ((1<<26)-1);
+		npc <<= 2;
+		return npc | (ur->pc&0xF0000000);
+	case 4:			/* BEQ */
+		if(rs == rt)
+			return targ;
+		return next;
+	case 5:			/* BNE */
+		if(rs != rt)
+			return targ;
+		return next;
+	case 6:			/* BLEZ */
+		if((long)rs <= 0)
+			return targ;
+		return next;
+	case 7:			/* BGTZ */
+		if((long)rs > 0)
+			return targ;
+		return next;
+	}
+	/*
+	 * Floating point unit jumps
+	 */
+	if((iw>>26) == COP1)
+		switch((iw>>16) & 0x3C1){
+		case 0x101:	/* BCT */
+		case 0x181:	/* BCT */
+			if(fcr31 & FPCOND)
+				return targ;
+			return next;
+		case 0x100:	/* BCF */
+		case 0x180:	/* BCF */
+			if(!(fcr31 & FPCOND))
+				return targ;
+			return next;
+		}
+	/* shouldn't get here */
+	return 0;
+}
--- /dev/null
+++ b/sys/src/9/mt7688/init9.s
@@ -1,0 +1,8 @@
+TEXT	_main(SB), $8
+	MOVW	$setR30(SB), R30
+	MOVW	$boot(SB), R1
+	ADDU	$12, R29, R2	/* get a pointer to 0(FP) */
+	MOVW	R1, 4(R29)
+	MOVW	R2, 8(R29)
+	JAL	startboot(SB)
+
--- /dev/null
+++ b/sys/src/9/mt7688/io.h
@@ -1,0 +1,317 @@
+/*
+ *  various things to IO with
+ */
+
+#define	IO(t,x)		((t*)(KSEG1|((ulong)x)))
+
+/* for mt7688 testing on onion Ω 2 + */
+#define	SYSCTLBASE	0x10000000
+#define TIMERBASE	0x10000100
+#define IRQBASE		0x10000200
+#define MEMCBASE	0x10000300
+#define RBUSBASE	0x10000400
+#define	MCNTBASE	0x10000500
+#define GPIOBASE	0x10000600
+#define	I2CBASE		0x10000900
+#define I2SBASE		0x10000A00
+#define SPIBASE		0x10000B00
+#define UARTLBASE	0x10000C00
+#define UART1BASE	0x10000D00
+#define UART2BASE	0x10000E00
+
+#define	DMABASE		0x10002800
+#define AESBASE		0x10004000	/* crypto engine */
+
+#define ETHBASE		0x10100000
+#define SWCHBASE	0x10110000
+#define	PCIBASE		0x10140000
+#define PCIWIN		0x10150000
+#define	WIFIBASE	0x10300000
+#define USBBASE		0x101C0000
+
+
+
+/*
+ *  duarts, frequency and registers
+ */
+#define DUARTFREQ	40000000  /* mt7688 has a 40MHz clock */	
+
+#define UART_RBR	0x00
+#define UART_THR	0x00
+#define	UART_IER	0x04
+#define UART_IIR	0x08
+#define	UART_FCR	0x08
+#define	UART_LCR	0x0C
+#define UART_MCR	0x10
+#define UART_LSR	0x14
+#define	UART_MSR	0x18
+#define	UART_SCR	0x1C
+#define UART_DLL	0x00
+#define UART_DLM	0x04
+
+
+/*
+ *	system control
+ */
+
+#define SYSCTL_RST			0x34
+
+
+/*
+ *  interrupt levels
+ */
+
+#define IRQshift	8;
+
+/* for cpu */
+enum {
+	IRQsw1		=	0,	//INTR0
+	IRQsw2,
+	IRQlow,				//INTR2
+	IRQhigh,
+	IRQpci,
+	IRQethr,
+	IRQwifi,
+	IRQtimer,			//INTR7
+	IRQinc0,				// psuedo numbers for INC
+	IRQsys,
+	IRQtimer0,
+	IRQillacc,
+	IRQpcm,
+	IRQinc5,
+	IRQgpio,
+	IRQdma,
+	IRQinc8,
+	IRQinc9,
+	IRQi2s,
+	IRQuartf,
+	IRQspi,
+	IRQcrypto,
+	IRQnand,
+	IRQperf,
+	IRQinc16,
+	IRQethsw,
+	IRQusbh,
+	IRQusbd,
+	IRQuartl,
+	IRQuart1,
+	IRQuart2,
+	IRQwdog,
+	IRQmax,
+};
+
+
+/*
+ * Interrupts on side controller
+ */
+
+#define INC_SYSCTL		1
+#define INC_TIMER0		2
+#define INC_ILLACC		3
+#define INC_PCM			4
+
+#define INC_GPIO		6
+#define INC_DMA			7
+#define INC_I2S			10
+#define	INC_UARTF		11
+#define INC_SPI			12 //?
+#define INC_CRYPTO		13 //?
+#define INC_NAND		14
+#define INC_PERF		15
+#define INC_ETHSW		17
+#define	INC_USBH		18
+#define	INC_USBD		19
+#define	INC_UARTL		20
+#define	INC_UART1		21
+#define	INC_UART2		22
+#define INC_WDOG		24
+
+#define INC_GLOBAL		31
+
+
+
+//#define INC_SDHC		14 //?
+//#define INC_R2P			15 //?
+
+
+
+/*
+ * Interrupt Controller Registers
+ */
+
+#define IRQ_STAT		0x9C
+#define FIQ_STAT		0xA0
+#define IRQ_SEL0		0x00	/* set as IRQ */
+#define	FIQ_SEL			0x6C	/* set as FIQ */
+#define INT_PURE		0xA4	/* raw */
+#define	IRQ_MASK		0x70	/* mask */
+#define IRQ_MASK_SET	0x80	/* enable */
+#define IRQ_MASK_CLR	0x78	/* disable */
+#define	IRQ_EOI			0x88	/* call end to irq */
+
+
+/*
+ * timer controls
+ */
+
+#define TIME_GLB	0x00
+
+#define CLK0_CTL	0x10
+#define CLK0_LOAD	0x14
+#define CLK0_TIME	0x18
+
+#define WDOG_CTL	0x20
+#define WDOG_LOAD	0x24
+#define WDOG_TIME	0x28
+
+#define GLB_T0_IRQ  (1<<0)
+#define GLB_WD_IRQ	(1<<1)
+#define GLB_T1_IRQ	(1<<2)
+#define GLB_T0_RST	(1<<8)
+#define	GLB_WD_RST	(1<<9)
+#define	GLB_T1_RST	(1<<10)
+
+#define TIMER_EN	(1<<7)  /* used on X_CTL regs */
+#define AUTOLOAD	(1<<4)
+#define CLK_PRSC(x)	((x)<<16)
+
+
+/* for MIPS CNT */
+#define MCNT_CFG	0x00
+#define MCNT_CMP	0x04
+#define	MCNT_CNT	0x08
+
+#define MCNT_EN		1	/* for MCNT_CFG */
+
+
+/* Frame Engine, ethernet controller */
+
+#define TX_BASE_PTR_0	0x800	/*  TX Ring #0 Base Pointer */
+#define	TX_MAX_CNT_0	0x804	/*  TX Ring #0 Maximum Count */
+#define TX_CTX_IDX_0	0x808	/*  TX Ring #0 CPU pointer */
+#define TX_DTX_IDX_0	0x80c	/*  TX Ring #0 DMA pointer */
+#define PDMA_TX0_PTR	TX_BASE_PTR_0
+#define PDMA_TX0_COUNT	TX_MAX_CNT_0
+#define PDMA_TX0_CPU_IDX	TX_CTX_IDX_0
+#define PDMA_TX0_DMA_IDX	TX_DTX_IDX_0
+#define TX_BASE_PTR_1	0x810	/*  TX Ring #1 Base Pointer */
+#define TX_MAX_CNT_1	0x814	/*  TX Ring #1 Maximum Count */
+#define TX_CTX_IDX_1	0x818	/*  TX Ring #1 CPU pointer */
+#define TX_DTX_IDX_1	0x81c	/*  TX Ring #1 DMA pointer */
+#define TX_BASE_PTR_2	0x820	/*  TX Ring #2 Base Pointer */
+#define TX_MAX_CNT_2	0x824	/*  TX Ring #2 Maximum Count */
+#define TX_CTX_IDX_2	0x828	/*  TX Ring #2 CPU pointer */
+#define TX_DTX_IDX_2	0x82c	/*  TX Ring #2 DMA pointer */
+#define TX_BASE_PTR_3	0x830	/*  TX Ring #3 Base Pointer */
+#define TX_MAX_CNT_3	0x834	/*  TX Ring #3 Maximum Count */
+#define TX_CTX_IDX_3	0x838	/*  TX Ring #3 CPU pointer */
+#define TX_DTX_IDX_3	0x83c	/*  TX Ring #3 DMA pointer */
+#define RX_BASE_PTR_0	0x900	/*  RX Ring #0 Base Pointer */
+#define RX_MAX_CNT_0	0x904	/*  RX Ring #0 Maximum Count */
+#define RX_CRX_IDX_0	0x908	/*  RX Ring #0 CPU pointer */
+#define RX_DRX_IDX_0	0x90c	/*  RX Ring #0 DMA pointer */
+#define PDMA_RX0_PTR	RX_BASE_PTR_0
+#define PDMA_RX0_COUNT	RX_MAX_CNT_0
+#define PDMA_RX0_CPU_IDX	RX_CRX_IDX_0
+#define PDMA_RX0_DMA_IDX	RX_DRX_IDX_0
+#define RX_BASE_PTR_1	0x910	/*  RX Ring #1 Base Pointer */
+#define RX_MAX_CNT_1	0x914	/*  RX Ring #1 Maximum Count */
+#define RX_CRX_IDX_1	0x918	/*  RX Ring #1 CPU pointer */
+#define RX_DRX_IDX_1	0x91c	/*  RX Ring #1 DMA pointer */
+#define PDMA_INFO		0xa00	/*  PDMA Information */
+#define PDMA_GLOBAL_CFG	0xa04	/*  PDMA Global Configuration */
+#define	PDMA_IDX_RST	0xa08	/*	ring index reset ? */
+#define DELAY_INT_CFG	0xa0c	/*  Delay Interrupt Configuration */
+#define FREEQ_THRES		0xa10	/*  Free Queue Threshold */
+#define INT_STATUS		0xa20	/*  Interrupt Status */
+#define INT_MASK		0xa28	/*  Interrupt Mask */
+#define PDMA_SCH		0xa80	/*  Scheduler Configuration for Q0&Q1 */
+#define PDMA_WRR		0xa84	/*  Scheduler Configuration for Q2&Q3 */
+#define SDM_CON			0xc00	/*  Switch DMA Control */
+#define SDM_RING		0xc04	/*  Switch DMA Rx Ring */
+#define SDM_TRING		0xc08	/*  Switch DMA TX Ring */
+#define SDM_MAC_ADRL	0xc0c	/*  Switch MAC Address LSB */
+#define SDM_MAC_ADRH	0xc10	/*  Switch MAC Address MSB */
+#define GDMA1_MAC_LSB	SDM_MAC_ADRL
+#define GDMA1_MAC_MSB	SDM_MAC_ADRH
+#define SDM_TPCNT		0xd00	/*  Switch DMA Tx Packet Count */
+#define SDM_TBCNT		0xd04	/*  Switch DMA TX Byte Count */
+#define SDM_RPCNT		0xd08	/*  Switch DMA RX Packet Count */
+#define SDM_RBCNT		0xd0c	/*  Switch DMA RX Byte Count */
+#define SDM_CS_ERR		0xd10	/*  Switch DMA RX Checksum Error */
+
+
+/*
+ * 10/100 Switch registers
+ */
+#define SW_ISR		0x00
+#define SW_IMR		0x04
+#define SW_FCT0		0x08
+#define  SW_FCT0_FC_RLS_TH(x)	(((x) & 0xff) << 24)
+#define  SW_FCT0_FC_SET_TH(x)	(((x) & 0xff) << 16)
+#define  SW_FCT0_DROP_RLS_TH(x)	(((x) & 0xff) << 8)
+#define  SW_FCT0_DROP_SET_TH(x)	(((x) & 0xff) << 0)
+#define SW_FCT1		0x0C
+#define  SW_FCT1_PORT_TH(x)	(((x) & 0xff) << 0)
+#define SW_PFC0		0x10
+#define SW_PFC1		0x14
+#define SW_PFC2		0x18
+#define SW_QCS0		0x1C
+#define SW_QCS1		0x20
+#define SW_ATS		0x24
+#define	SW_ATS0		0x28
+#define SW_ATS1		0x2C
+#define	SW_ATS2		0x30
+#define SW_WMAD0	0x34
+#define SW_WMAD1	0x38
+#define SW_WMAD2	0x3C
+#define SW_PVIDC0	0x40
+#define SW_PVIDC1	0x44
+#define SW_PVIDC2	0x48
+#define SW_PVIDC3	0x4C
+#define SW_VLANI0	0x50
+#define SW_VLANI1	0x54
+#define SW_VLANI2	0x58
+#define SW_VLANI3	0x5C
+#define SW_VLANI4	0x60
+#define SW_VLANI5	0x64
+#define SW_VLANI6	0x68
+#define SW_VLANI7	0x6C
+#define SW_VMSC0	0x70
+#define SW_VMSC1	0x74
+#define SW_VMSC2	0x78
+#define SW_VMSC3	0x7C
+#define SW_POA		0x80
+#define SW_FPA		0x84
+#define SW_PTS		0x88
+#define SW_SOCPC	0x8C
+#define SW_POC0		0x90
+#define SW_POC1		0x94
+#define SW_POC2		0x98
+#define SW_SWGC		0x9C
+#define SW_RST		0xA0
+#define SW_LEDP0	0xA4
+#define SW_LEDP1	0xA8
+#define SW_LEDP2	0xAC
+#define SW_LEDP3	0xB0
+#define SW_LEDP4	0xB4
+#define SW_WDOG		0xB8
+#define SW_DBG		0xBC
+#define SW_PCTL0	0xC0	/* PCR0 */
+#define SW_PCTL1	0xC4	/* PCR1 */
+#define SW_FPORT	0xC8
+#define SW_FCT2		0xCC
+#define SW_QSS0		0xD0
+#define SW_QSS1		0xD4
+#define SW_DBGC		0xD8
+#define SW_MTI1		0xDC
+#define SW_PPC		0xE0
+#define SW_SGC2		0xE4
+#define SW_PCNT0	0xE8
+#define SW_PCNT1	0xEC
+#define SW_PCNT2	0xF0
+#define SW_PCNT3	0xF4
+#define SW_PCNT4	0xF8
+#define SW_PCNT5	0xFC
+
+
--- /dev/null
+++ b/sys/src/9/mt7688/irq.c
@@ -1,0 +1,320 @@
+/*
+ *  Interrupt Handling for the MT7688
+ */
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"ureg.h"
+#include	"io.h"
+#include	"../port/error.h"
+
+
+
+/* map the irq number to the interrupt controller */
+static const int irq2inc[32] = {
+	/* cpu based interrupts */
+	[IRQsw1]	=	-1,
+	[IRQsw2]	=	-1,
+	[IRQlow]	=	-1,
+	[IRQhigh]	=	-1,
+	[IRQpci]	=	-1,
+	[IRQethr]	=	-1,
+	[IRQwifi]	=	-1,
+	[IRQtimer]	=	-1,
+
+	/* irqs on the SoC interrupt controller */
+	[IRQsys]	=	INC_SYSCTL,
+	[IRQtimer0]	=	INC_TIMER0,
+	[IRQwdog]	=	INC_WDOG,
+	[IRQillacc]	=	INC_ILLACC,
+	[IRQpcm]	=	INC_PCM,
+	[IRQuartf]	=	INC_UARTF,
+	[IRQgpio]	=	INC_GPIO,
+	[IRQdma]	=	INC_DMA,
+	[IRQnand]	=	INC_NAND,
+	[IRQperf]	=	INC_PERF,
+	[IRQi2s]	=	INC_I2S,
+	[IRQspi]	=	INC_SPI,
+	[IRQuartl]	=	INC_UARTL,
+	[IRQcrypto]	=	INC_CRYPTO,
+//	[IRQsdhc]	=	INC_SDHC,
+//	[IRQr2p]	=	INC_R2P,
+	[IRQethsw]	=	INC_ETHSW,
+	[IRQusbh]	=	INC_USBH,
+	[IRQusbd]	=	INC_USBD,
+};
+
+
+static const int inc2irq[32] = {
+	[INC_SYSCTL]	=	IRQsys,
+	[INC_TIMER0]	=	IRQtimer0,
+	[INC_WDOG]		=	IRQwdog,
+	[INC_ILLACC]	=	IRQillacc,
+	[INC_PCM]		=	IRQpcm,
+	[INC_UARTF]		=	IRQuartf,
+	[INC_GPIO]		=	IRQgpio,
+	[INC_DMA]		=	IRQdma,
+	[INC_NAND]		=	IRQnand,
+	[INC_PERF]		=	IRQperf,
+	[INC_I2S]		=	IRQi2s,
+	[INC_SPI]		=	IRQspi,
+	[INC_UARTL]		=	IRQuartl,
+	[INC_CRYPTO]	=	IRQcrypto,
+//	[INC_SDHC]		=	IRQsdhc,
+//	[INC_R2P]		=	IRQr2p,
+	[INC_ETHSW]		=	IRQethsw,
+	[INC_USBH]		=	IRQusbh,
+	[INC_USBD]		=	IRQusbd,
+};
+
+
+
+
+typedef struct Handler Handler;
+
+struct Handler {
+	Handler *next;
+	void 	(*f)(Ureg*, void *);
+	void	*arg;
+	int		irq;
+};
+
+static Lock intrlock;
+static Handler handlers[IRQmax+1];
+
+
+void incintr(Ureg*, void*);
+
+
+
+static u32int
+incread(int offset)
+{
+	return *IO(u32int, (IRQBASE + offset));
+}
+
+
+static void
+incwrite(int offset, u32int val)
+{
+	*IO(u32int, (IRQBASE + offset)) = val;
+}
+
+/*
+ * called by main(), clears all the irq's
+ * sets SoC interrupt controller to relay
+ * IRQs through CPU interrupts 2 and 3
+ */
+
+void
+intrinit(void)
+{
+	incwrite(IRQ_MASK_CLR, 0xFFFFFFFF);
+
+	intrenable(IRQlow, incintr, (void *)0, 0, "inclow");
+//	intrenable(IRQhigh, incintr, (void *)1, 1, "inchigh");
+}
+
+
+/* called by drivers to setup irq's */
+void
+intrenable(int irq, void (*f)(Ureg*, void *), void *arg, int priority, char *name)
+{
+	Handler *hp;
+	u32int r;
+
+
+	if(irq > IRQmax || irq < 0)
+		panic("intrenable: %s gave bad irq number of %d", name, irq);
+
+	/* debugging */
+	if(irq == 0 || irq == 1)
+		iprint("software irq enabled?");
+
+	hp = &handlers[irq];
+	ilock(&intrlock);
+
+	if(hp->f != nil) {
+		for(; hp->next != nil; hp = hp->next)
+			;
+		if((hp->next = xalloc(sizeof *hp)) == nil)
+			panic("intrenable: out of memory");
+		hp = hp->next;
+		hp->next = nil;
+	}
+
+	hp->f = f;
+	hp->arg = arg;
+	hp->irq = irq;
+
+	iunlock(&intrlock);
+
+	if(irq > IRQtimer) {
+		r = incread(FIQ_SEL);
+		r |= (priority << irq2inc[irq]);
+		incwrite(FIQ_SEL, r);
+		incwrite(IRQ_MASK_SET, (1 << irq2inc[irq]));
+	} else {
+		intron(INTR0 << irq);
+	}
+
+}
+
+
+
+void
+intrdisable(int irq, void (*)(Ureg*, void *), void*, int, char *name)
+{
+	if(irq > IRQmax || irq < 0)
+		panic("intrdisable: %s gave bad irq number of %d", name, irq);
+
+	if(irq > IRQtimer) {
+		incwrite(IRQ_MASK_CLR, (1 << irq2inc[irq]));
+	} else {
+		introff(INTR0 << irq);
+	}
+}
+
+
+/* called by trap to handle requests, returns true if a clock interrupt */
+int
+intr(Ureg* ur)
+{	
+	ulong cause, mask;
+	int clockintr;
+	Handler *hh, *hp;
+
+	m->intr++;
+	clockintr = 0;
+	/*
+	 * ignore interrupts that we have disabled, even if their cause bits
+	 * are set.
+	 */
+	cause = ur->cause & ur->status & INTMASK;
+	cause &= ~(INTR1|INTR0);		/* ignore sw interrupts */
+
+	if (cause == 0)
+		iprint("spurious interrupt\n");
+
+	if(cause & INTR7){
+		clock(ur);
+		cause &= ~INTR7;
+		clockintr = 1;
+	}
+
+//	iprint("INTR %luX\n", cause);
+
+	hh = &handlers[2];
+	for(mask = INTR2; cause != 0 && mask < INTR7; mask <<= 1){
+		if(cause & mask){
+			for(hp = hh; hp != nil; hp = hp->next){
+				if(hp->f != nil){
+					hp->f(ur, hp->arg);
+					cause &= ~mask;
+				}
+			}
+		}
+		hh++;
+	}
+	if(cause != 0)
+		iprint("unhandled interrupts %lux\n", cause);
+
+	
+
+	/* preemptive scheduling */
+	if(up != nil && !clockintr)
+		preempted();
+	/* if it was a clockintr, sched will be called at end of trap() */
+	return clockintr;
+}
+
+
+/* off to handle requests for the SoC interrupt controller */
+/*
+ * the interrupts controller on the mt7688 SoC can be mapped to 
+ * either CPU interrupt 2 or 3.  So when those are tripped, 
+ * this code then checks the secondary interrupt controller 
+ * to see which IRQ it has.  The controller defines CPU INTR2 
+ * as "low priority" IRQ, and INTR3 as "high priority" FIQ.
+ */
+
+void
+incintr(Ureg *ureg, void *arg)
+{
+	u32int p;
+	u32int reg;
+	u32int pending;
+	u32int mask;
+	Handler *hh, *hp;
+
+
+	p = (uintptr)arg;
+	reg = (p == 0) ? IRQ_STAT : FIQ_STAT;
+	pending = incread(reg);
+
+	hh = &handlers[8];
+	for(mask = 1 ; pending != 0 && mask < 0x80000000; mask <<= 1) {
+		if(pending & mask) {
+			for(hp = hh; hp != nil; hp = hp->next) {
+				if(hp->f != nil) {
+					hp->f(ureg, hp->arg);
+					pending &= ~mask;
+				}
+			}
+		}
+		hh++;
+	}
+
+	if(pending != 0){
+		iprint("unhandled inc interrupts %uX\n", pending);
+		delay(2000);
+	}
+}
+
+
+void
+intrclear(int irq)
+{
+	incwrite(IRQ_EOI, 1 << irq2inc[irq]);
+}
+
+
+void
+intrshutdown(void)
+{
+	introff(INTMASK);
+	incwrite(IRQ_MASK_CLR, 0xFFFF);
+	coherence();
+
+}
+
+/*
+ * left over debugging stuff
+ */
+
+ulong
+incraw(void)
+{
+	return incread(INT_PURE);
+}
+
+ulong
+incmask(void)
+{
+	return incread(IRQ_MASK);
+}
+
+ulong
+incstat(void)
+{
+	return incread(IRQ_STAT);
+}
+
+ulong
+incsel(void)
+{
+	return incread(IRQ_SEL0);
+}
+
--- /dev/null
+++ b/sys/src/9/mt7688/l.s
@@ -1,0 +1,909 @@
+/*
+ * mips 24k machine assist for mt7688
+ */
+
+#include "mem.h"
+#include "mips24k.s"
+
+
+#define SANITY 0x12345678
+
+	NOSCHED
+
+/*
+ * Boot only processor
+ */
+
+TEXT	start(SB), $-4
+	MOVW	$setR30(SB), R30
+
+	DI(0)
+
+	MOVW	sanity(SB), R1
+	CONST(SANITY, R2)
+	SUBU	R1, R2, R2
+	BNE	R2, insane
+	NOP
+
+
+	MOVW	R0, M(COMPARE)
+	EHB
+
+	/* don't enable any interrupts nor FP, but leave BEV on. */
+	MOVW	$BEV,R1
+	MOVW	R1, M(STATUS)
+	UBARRIERS(7, R7, stshb)		/* returns to kseg1 space */
+	MOVW	R0, M(CAUSE)
+	EHB
+
+	/* disable watchdog and other resets */
+	MOVW	$(KSEG1|0x10000038), R1
+	MOVW	R0, (R1)			/* set no action */
+	SYNC
+
+	MOVW	$PE, R1
+	MOVW	R1, M(CACHEECC)		/* aka ErrCtl */
+	EHB
+	JAL	cleancache(SB)
+	NOOP
+
+
+	MOVW	$TLBROFF, R1
+	MOVW	R1, M(WIRED)
+
+	MOVW	R0, M(CONTEXT)
+	EHB
+
+	/* set KSEG0 cachability before trying LL/SC in lock code */
+	MOVW	M(CONFIG), R1
+	AND	$~CFG_K0, R1
+	/* make kseg0 cachable, enable write-through merging */
+	OR	$((PTECACHED>>3)|CFG_MM), R1
+	MOVW	R1, M(CONFIG)
+	BARRIERS(7, R7, cfghb)			/* back to kseg0 space */
+
+	MOVW	$setR30(SB), R30		/* again */
+
+	/* initialize Mach, including stack */
+	MOVW	$MACHADDR, R(MACH)
+	ADDU	$(MACHSIZE-BY2V), R(MACH), SP
+	MOVW	R(MACH), R1
+clrmach:
+	MOVW	R0, (R1)
+	ADDU	$BY2WD, R1
+	BNE	R1, SP, clrmach
+	NOOP
+
+	MOVW	$edata(SB), R1
+	MOVW	$end(SB), R2
+clrbss:
+	MOVB	R0, (R1)
+	ADDU	$1, R1
+	BNE	R1, R2, clrbss
+	NOOP
+
+	MOVW	$0x16, R16
+	MOVW	$0x17, R17
+	MOVW	$0x18, R18
+	MOVW	$0x19, R19
+	MOVW	$0x20, R20
+	MOVW	$0x21, R21
+	MOVW	$0x22, R22
+	MOVW	$0x23, R23
+
+	MOVW	R0, HI
+	MOVW	R0, LO
+
+	MOVW	R0, 0(R(MACH))			/* m->machno = 0 */
+	MOVW	R0, R(USER)			/* up = nil */
+
+	JAL	main(SB)
+	NOOP
+
+PUTC('X', R1, R2)
+NOOP
+
+
+insane:
+	PUTC('D', R1, R2)
+	NOOP
+
+TEXT	arcs(SB), $256
+	MOVW	R24, 0x80(SP)
+	MOVW	R25, 0x84(SP)
+	MOVW	R26, 0x88(SP)
+	MOVW	R27, 0x8C(SP)
+
+	MOVW	$SPBADDR, R4
+	MOVW	0x20(R4), R5
+	ADDU	R1, R5
+	MOVW	(R5), R2
+
+	MOVW	16(FP), R7
+	MOVW	12(FP), R6
+	MOVW	8(FP), R5
+	MOVW	4(FP), R4
+
+	JAL	(R2)
+	NOOP
+
+	MOVW	$setR30(SB), R30
+
+	MOVW	0x80(SP), R24
+	MOVW	0x84(SP), R25
+	MOVW	0x88(SP), R26
+	MOVW	0x8C(SP), R27
+
+	MOVW	R2, R1
+	RETURN
+
+/*
+ * Take first processor into user mode
+ * 	- argument is stack pointer to user
+ */
+
+TEXT	touser(SB), $-4
+	MOVW	M(STATUS), R4
+	MOVW	$(UTZERO+32), R2	/* header appears in text */
+	MOVW	R2, M(EPC)
+	MOVW	R1, SP
+	AND	$(~KMODEMASK), R4
+	OR	$(KUSER|IE|EXL), R4	/* switch to user mode, intrs on, exc */
+	MOVW	R4, M(STATUS)		/* " */
+	NOOP
+	ERET				/* clears EXL */
+	NOOP
+
+
+/* target for JALRHB in BARRIERS */
+TEXT ret(SB), $-4
+	JMP	(R22)
+	NOP
+
+/* the i and d caches may be different sizes, so clean them separately */
+TEXT	cleancache(SB), $-4
+	DI(10)				/* intrs off, old status -> R10 */
+
+	UBARRIERS(7, R7, cchb);		/* return to kseg1 (uncached) */
+	MOVW	R0, R1			/* index, not address */
+	MOVW	$ICACHESIZE, R9
+iccache:
+	CACHE	PI+IWBI, (R1)		/* flush & invalidate I by index */
+	SUBU	$CACHELINESZ, R9
+	BGTZ	R9, iccache
+	ADDU	$CACHELINESZ, R1	/* delay slot */
+
+	BARRIERS(7, R7, cc2hb);		/* return to kseg0 (cached) */
+
+	MOVW	R0, R1			/* index, not address */
+	MOVW	$DCACHESIZE, R9
+dccache:
+	CACHE	PD+IWBI, (R1)		/* flush & invalidate D by index */
+	SUBU	$CACHELINESZ, R9
+	BGTZ	R9, dccache
+	ADDU	$CACHELINESZ, R1	/* delay slot */
+
+	SYNC
+	MOVW	R10, M(STATUS)
+	JRHB(31)			/* return and clear all hazards */
+
+/*
+ * manipulate interrupts
+ */
+
+
+/* enable an interrupt; bit is in R1 */
+TEXT	intron(SB), $0
+	MOVW	M(STATUS), R2
+	OR	R1, R2
+	MOVW	R2, M(STATUS)
+	EHB
+	RETURN
+
+/* disable an interrupt; bit is in R1 */
+TEXT	introff(SB), $0
+	MOVW	M(STATUS), R2
+	XOR	$-1, R1
+	AND	R1, R2
+	MOVW	R2, M(STATUS)
+	EHB
+	RETURN
+
+TEXT	splhi(SB), $0
+	EHB
+	MOVW	R31, 12(R(MACH))	/* save PC in m->splpc */
+	DI(1)				/* old M(STATUS) into R1 */
+	EHB
+	RETURN
+
+TEXT	splx(SB), $0
+	EHB
+	MOVW	R31, 12(R(MACH))	/* save PC in m->splpc */
+	MOVW	M(STATUS), R2
+	AND	$IE, R1
+	AND	$~IE, R2
+	OR	R2, R1
+	MOVW	R1, M(STATUS)
+	EHB
+	RETURN
+
+TEXT	spllo(SB), $0
+	EHB
+	EI(1)				/* old M(STATUS) into R1 */
+	EHB
+	RETURN
+
+TEXT	spldone(SB), $0
+	RETURN
+
+TEXT	islo(SB), $0
+	MOVW	M(STATUS), R1
+	AND	$IE, R1
+	RETURN
+
+
+TEXT	coherence(SB), $-4
+	BARRIERS(7, R7, cohhb)
+	SYNC
+	EHB
+	RETURN
+
+TEXT	idle(SB), $-4
+	EI(1)				/* old M(STATUS) into R1 */
+	EHB
+	/* fall through */
+
+TEXT	wait(SB), $-4
+	WAIT
+	  NOP
+
+	MOVW	R1, M(STATUS)		/* interrupts restored */
+	EHB
+	RETURN
+
+/*
+ * process switching
+ */
+
+TEXT	setlabel(SB), $-4
+	MOVW	SP, 0(R1)
+	MOVW	R31, 4(R1)
+	MOVW	R0, R1
+	RETURN
+
+TEXT	gotolabel(SB), $-4
+	MOVW	0(R1), SP
+	MOVW	4(R1), R31
+	MOVW	$1, R1
+	RETURN
+
+/*
+ * the tlb routines need to be called at splhi.
+ */
+
+TEXT	getwired(SB),$0
+	MOVW	M(WIRED), R1
+	RETURN
+
+TEXT	setwired(SB),$0
+	MOVW	R1, M(WIRED)
+	RETURN
+
+TEXT	getrandom(SB),$0
+	MOVW	M(RANDOM), R1
+	RETURN
+
+TEXT	getpagemask(SB),$0
+	MOVW	M(PAGEMASK), R1
+	RETURN
+
+TEXT	setpagemask(SB),$0
+	MOVW	R1, M(PAGEMASK)
+	MOVW	R0, R1			/* prevent accidents */
+	RETURN
+
+TEXT	puttlbx(SB), $0	/* puttlbx(index, virt, phys0, phys1, pagemask) */
+	MOVW	4(FP), R2
+	MOVW	8(FP), R3
+	MOVW	12(FP), R4
+	MOVW	$((2*BY2PG-1) & ~0x1fff), R5
+	MOVW	R2, M(TLBVIRT)
+	MOVW	R3, M(TLBPHYS0)
+	MOVW	R4, M(TLBPHYS1)
+	MOVW	R5, M(PAGEMASK)
+	MOVW	R1, M(INDEX)
+	NOOP
+	NOOP
+	TLBWI
+	NOOP
+	RETURN
+
+TEXT	tlbvirt(SB), $0
+	MOVW	M(TLBVIRT), R1
+	NOOP
+	RETURN
+
+TEXT	gettlbx(SB), $0			/* gettlbx(index, &entry) */
+	MOVW	4(FP), R4
+	MOVW	R1, M(INDEX)
+	NOOP
+	NOOP
+	TLBR
+	NOOP
+	NOOP
+	NOOP
+	MOVW	M(TLBVIRT), R1
+	MOVW	M(TLBPHYS0), R2
+	MOVW	M(TLBPHYS1), R3
+	NOOP
+	MOVW	R1, 0(R4)
+	MOVW	R2, 4(R4)
+	MOVW	R3, 8(R4)
+	RETURN
+
+TEXT	gettlbp(SB), $0			/* gettlbp(tlbvirt, &entry) */
+	MOVW	4(FP), R5
+	MOVW	R1, M(TLBVIRT)
+	NOOP
+	NOOP
+	NOOP
+	TLBP
+	NOOP
+	NOOP
+	MOVW	M(INDEX), R1
+	NOOP
+	BLTZ	R1, gettlbp1
+	TLBR
+	NOOP
+	NOOP
+	NOOP
+	MOVW	M(TLBVIRT), R2
+	MOVW	M(TLBPHYS0), R3
+	MOVW	M(TLBPHYS1), R4
+	NOOP
+	MOVW	R2, 0(R5)
+	MOVW	R3, 4(R5)
+	MOVW	R4, 8(R5)
+gettlbp1:
+	RETURN
+
+TEXT	gettlbvirt(SB), $0		/* gettlbvirt(index) */
+	MOVW	R1, M(INDEX)
+	NOOP
+	NOOP
+	TLBR
+	NOOP
+	NOOP
+	NOOP
+	MOVW	M(TLBVIRT), R1
+	NOOP
+	RETURN
+
+/*
+ * compute stlb hash index.
+ *
+ * M(TLBVIRT) [page & asid] in arg, result in arg.
+ * stir in swizzled asid; we get best results with asid in both high & low bits.
+ */
+#define STLBHASH(arg, tmp)		\
+	AND	$0xFF, arg, tmp;	\
+	SRL	$(PGSHIFT+1), arg;	\
+	XOR	tmp, arg;		\
+	SLL	$(STLBLOG-8), tmp;	\
+	XOR	tmp, arg;		\
+	CONST	(STLBSIZE-1, tmp);	\
+	AND	tmp, arg
+
+TEXT	stlbhash(SB), $0		/* for mmu.c */
+	STLBHASH(R1, R2)
+	RETURN
+
+TEXT	utlbmiss(SB), $-4
+	GETMACH(R26)
+	MOVW	R27, 12(R26)		/* m->splpc = R27 */
+
+	MOVW	16(R26), R27
+	ADDU	$1, R27
+	MOVW	R27,16(R26)		/* m->tlbfault++ */
+
+	MOVW	M(TLBVIRT), R27
+	NOOP
+	STLBHASH(R27, R26)
+
+	/* scale to a byte index (multiply by 12) */
+	SLL	$1, R27, R26		/* × 2 */
+	ADDU	R26, R27		/* × 3 */
+	SLL	$2, R27			/* × 12 */
+
+	GETMACH(R26)
+	MOVW	4(R26), R26
+	ADDU	R26, R27		/* R27 = &m->stb[hash] */
+
+	MOVW	M(BADVADDR), R26
+	NOOP
+	AND	$BY2PG, R26
+
+	BNE	R26, utlbodd		/* odd page? */
+	NOOP
+
+utlbeven:
+	MOVW	4(R27), R26		/* R26 = m->stb[hash].phys0 */
+	BEQ	R26, stlbm		/* nothing cached? do it the hard way */
+	NOOP
+	MOVW	R26, M(TLBPHYS0)
+	MOVW	8(R27), R26		/* R26 = m->stb[hash].phys1 */
+	JMP	utlbcom
+	MOVW	R26, M(TLBPHYS1)	/* branch delay slot */
+
+utlbodd:
+	MOVW	8(R27), R26		/* R26 = m->stb[hash].phys1 */
+	BEQ	R26, stlbm		/* nothing cached? do it the hard way */
+	NOOP
+	MOVW	R26, M(TLBPHYS1)
+	MOVW	4(R27), R26		/* R26 = m->stb[hash].phys0 */
+	MOVW	R26, M(TLBPHYS0)
+
+utlbcom:
+	EHB
+	MOVW	M(TLBVIRT), R26
+	MOVW	0(R27), R27		/* R27 = m->stb[hash].virt */
+	BEQ	R27, stlbm		/* nothing cached? do it the hard way */
+	NOOP
+	/* is the stlb entry for the right virtual address? */
+	BNE	R26, R27, stlbm		/* M(TLBVIRT) != m->stb[hash].virt? */
+	NOOP
+
+	/* if an entry exists, overwrite it, else write a random one */
+	CONST	(PGSZ, R27)
+	MOVW	R27, M(PAGEMASK)	/* select page size */
+	TLBP				/* probe tlb */
+	NOOP
+	NOOP
+	MOVW	M(INDEX), R26
+	NOOP
+	BGEZ	R26, utlbindex		/* if tlb entry found, rewrite it */
+	NOOP
+	MOVW	M(RANDOM), R26
+	MOVW	R26, M(INDEX)
+utlbindex:
+	NOOP
+	NOOP
+	TLBWI				/* write indexed tlb entry */
+	NOOP
+
+utlbret:
+	GETMACH(R26)
+	MOVW	12(R26), R27		/* R27 = m->splpc */
+	MOVW	M(EPC), R26
+	JMP	(R27)
+	NOOP
+
+stlbm:
+	GETMACH(R26)
+	MOVW	12(R26), R27		/* R27 = m->splpc */
+
+	/* fall through */
+
+TEXT	gevector(SB), $-4
+	MOVW	M(STATUS), R26
+	NOOP
+	AND	$KUSER, R26
+
+	BNE	R26, wasuser
+	MOVW	SP, R26			/* delay slot, old SP in R26 */
+
+waskernel:
+	JMP	dosave
+	SUBU	$UREGSIZE, SP		/* delay slot, allocate frame on kernel stack */
+
+wasuser:				/* get kernel stack for this user process */
+	GETMACH	(SP)
+	MOVW	8(SP), SP		/*  m->proc */
+	SUBU	$(UREGSIZE), SP
+
+dosave:
+	MOVW	R31, 0x28(SP)
+
+	JAL	saveregs(SB)
+	MOVW	R26, 0x10(SP)		/* delay slot, save old SP */
+
+	GETMACH(R(MACH))
+	MOVW	8(R(MACH)), R(USER)	/* R24 = m->proc */
+	NOOP
+	MOVW	$setR30(SB), R30
+
+	BEQ	R26, dosys		/* set by saveregs() */
+	NOOP
+
+dotrap:
+	MOVW	$forkret(SB), R31
+	JMP	trap(SB)
+	MOVW	4(SP), R1		/* delay slot, first arg to trap() */
+
+dosys:
+	JAL	syscall(SB)
+	MOVW	4(SP), R1		/* delay slot, first arg to syscall() */
+
+	/* fall through */
+
+TEXT	forkret(SB), $-4
+	JAL	restregs(SB)		/* restores old PC in R26 */
+	MOVW	0x14(SP), R1		/* delay slot, CAUSE */
+
+	MOVW	0x28(SP), R31
+
+	JMP	(R27)
+	MOVW	0x10(SP), SP		/* delay slot */
+
+/*
+ * SP->	0x00	--- (spill R31)
+ *	0x04	--- (trap()/syscall() arg1)
+ *	0x08	status
+ *	0x0C	pc
+ *	0x10	sp/usp
+ *	0x14	cause
+ *	0x18	badvaddr
+ *	0x1C	tlbvirt
+ *	0x20	hi
+ *	0x24	lo
+ *	0x28	r31
+ *	.....
+ *	0x9c	r1
+ */
+
+TEXT	saveregs(SB), $-4
+	MOVW	R1, 0x9C(SP)
+	MOVW	R2, 0x98(SP)
+	MOVW	M(STATUS), R2
+	ADDU	$8, SP, R1
+	MOVW	R1, 0x04(SP)		/* arg to base of regs */
+	MOVW	$~KMODEMASK, R1
+	AND	R2, R1
+	MOVW	R1, M(STATUS)		/* so we can take another trap */
+	MOVW	R2, 0x08(SP)
+	MOVW	M(EPC), R2
+	MOVW	M(CAUSE), R1
+	MOVW	R2, 0x0C(SP)
+	MOVW	R1, 0x14(SP)
+	AND	$(EXCMASK<<2), R1
+	SUBU	$(CSYS<<2), R1, R26
+
+	BEQ	R26, notsaved		/* is syscall? */
+	MOVW	R27, 0x34(SP)		/* delay slot */
+
+	MOVW	M(BADVADDR), R1
+	MOVW	M(TLBVIRT), R2
+	MOVW	R1, 0x18(SP)
+	MOVW	R2, 0x1C(SP)
+
+	MOVW	HI, R1
+	MOVW	LO, R2
+	MOVW	R1, 0x20(SP)
+	MOVW	R2, 0x24(SP)
+
+	MOVW	R25, 0x3C(SP)
+	MOVW	R24, 0x40(SP)
+	MOVW	R23, 0x44(SP)
+	MOVW	R22, 0x48(SP)
+	MOVW	R21, 0x4C(SP)
+	MOVW	R20, 0x50(SP)
+	MOVW	R19, 0x54(SP)
+	MOVW	R18, 0x58(SP)
+	MOVW	R17, 0x5C(SP)
+	MOVW	R16, 0x60(SP)
+	MOVW	R15, 0x64(SP)
+	MOVW	R14, 0x68(SP)
+	MOVW	R13, 0x6C(SP)
+	MOVW	R12, 0x70(SP)
+	MOVW	R11, 0x74(SP)
+	MOVW	R10, 0x78(SP)
+	MOVW	R9, 0x7C(SP)
+	MOVW	R8, 0x80(SP)
+	MOVW	R7, 0x84(SP)
+	MOVW	R6, 0x88(SP)
+	MOVW	R5, 0x8C(SP)
+	MOVW	R4, 0x90(SP)
+	MOVW	R3, 0x94(SP)
+
+notsaved:
+	MOVW	R30, 0x2C(SP)
+
+	RET
+	MOVW	R28, 0x30(SP)		/* delay slot */
+
+TEXT	restregs(SB), $-4
+	AND	$(EXCMASK<<2), R1
+	SUBU	$(CSYS<<2), R1, R26
+
+	BEQ	R26, notrestored	/* is syscall? */
+	MOVW	0x34(SP), R27		/* delay slot */
+
+	MOVW	0x3C(SP), R25
+	MOVW	0x40(SP), R24
+	MOVW	0x44(SP), R23
+	MOVW	0x48(SP), R22
+	MOVW	0x4C(SP), R21
+	MOVW	0x50(SP), R20
+	MOVW	0x54(SP), R19
+	MOVW	0x58(SP), R18
+	MOVW	0x5C(SP), R17
+	MOVW	0x60(SP), R16
+	MOVW	0x64(SP), R15
+	MOVW	0x68(SP), R14
+	MOVW	0x6C(SP), R13
+	MOVW	0x70(SP), R12
+	MOVW	0x74(SP), R11
+	MOVW	0x78(SP), R10
+	MOVW	0x7C(SP), R9
+	MOVW	0x80(SP), R8
+	MOVW	0x84(SP), R7
+	MOVW	0x88(SP), R6
+	MOVW	0x8C(SP), R5
+	MOVW	0x90(SP), R4
+	MOVW	0x94(SP), R3
+
+	MOVW	0x24(SP), R2
+	MOVW	0x20(SP), R1
+	MOVW	R2, LO
+	MOVW	R1, HI
+
+	MOVW	0x98(SP), R2
+
+notrestored:
+	MOVW	0x08(SP), R1
+	MOVW	R1, M(STATUS)
+	MOVW	0x0C(SP), R26		/* old PC */
+	MOVW	R26, M(EPC)
+
+	MOVW	0x30(SP), R28
+	MOVW	0x2C(SP), R30
+
+	RET
+	MOVW	0x9C(SP), R1		/* delay slot */
+
+/*
+ * hardware interrupt vectors
+ */
+
+TEXT	vector0(SB), $-4
+	NOOP
+	CONST	(SPBADDR+0x18, R26)
+	MOVW	$eret(SB), R27
+	MOVW	(R26), R26
+	JMP	(R26)
+	NOOP
+
+TEXT	vector180(SB), $-4
+	NOOP
+	CONST	(SPBADDR+0x14, R26)
+	MOVW	$eret(SB), R27
+	MOVW	(R26), R26
+	JMP	(R26)
+	NOOP
+
+TEXT	eret(SB), $-4		
+	ERET
+	NOOP
+
+/*
+ *  floating-point stuff
+ */
+
+/*
+ * degenerate floating-point stuff ad9!
+ */
+
+TEXT	clrfpintr(SB), $0
+	RETURN
+
+TEXT	savefpregs(SB), $0
+	RETURN
+
+TEXT	restfpregs(SB), $0
+	RETURN
+
+TEXT	fcr31(SB), $0			/* fp csr */
+	MOVW	R0, R1
+	RETURN
+
+
+
+/*
+ * Emulate 68020 test and set: load linked / store conditional
+ */
+
+TEXT	tas(SB), $0
+TEXT	_tas(SB), $0
+	MOVW	R1, R2		/* address of key */
+tas1:
+	MOVW	$1, R3
+	LL(2, 1)
+	NOOP
+	SC(2, 3)
+	NOOP
+	BEQ	R3, tas1
+	NOOP
+	RETURN
+
+/* used by the semaphore implementation */
+TEXT cmpswap(SB), $0
+	MOVW	R1, R2		/* address of key */
+	MOVW	old+4(FP), R3	/* old value */
+	MOVW	new+8(FP), R4	/* new value */
+	LL(2, 1)		/* R1 = (R2) */
+	NOOP
+	BNE	R1, R3, fail
+	NOOP
+	MOVW	R4, R1
+	SC(2, 1)	/* (R2) = R1 if (R2) hasn't changed; R1 = success */
+	NOOP
+	RETURN
+fail:
+	MOVW	R0, R1
+	RETURN
+
+/*
+ *  cache manipulation
+ */
+
+TEXT	icflush(SB), $-4			/* icflush(virtaddr, count) */
+	MOVW	4(FP), R9
+	DI(10)						/* intrs off, old status -> R10 */
+	UBARRIERS(7, R7, ichb);		/* return to kseg1 (uncached) */
+	ADDU	R1, R9			/* R9 = last address */
+	MOVW	$(~0x3f), R8
+	AND	R1, R8			/* R8 = first address, rounded down */
+	ADDU	$0x3f, R9
+	AND	$(~0x3f), R9		/* round last address up */
+	SUBU	R8, R9			/* R9 = revised count */
+icflush1:			/* primary cache line size is 16 bytes */
+	CACHE	PD+HWB, 0x00(R8)
+	CACHE	PI+HINV, 0x00(R8)
+	CACHE	PD+HWB, 0x10(R8)
+	CACHE	PI+HINV, 0x10(R8)
+	CACHE	PD+HWB, 0x20(R8)
+	CACHE	PI+HINV, 0x20(R8)
+	CACHE	PD+HWB, 0x30(R8)
+	CACHE	PI+HINV, 0x30(R8)
+	SUBU	$0x40, R9
+	BGTZ	R9, icflush1
+	ADDU	$0x40, R8			/* delay slot */
+	BARRIERS(7, R7, ic2hb);		/* return to kseg0 (cached) */
+	MOVW	R10, M(STATUS)
+	JRHB(31)
+
+TEXT	dcflush(SB), $-4			/* dcflush(virtaddr, count) */
+	MOVW	4(FP), R9
+	DI(10)						/* intrs off, old status -> R10 */
+	SYNC
+	EHB
+	ADDU	R1, R9			/* R9 = last address */
+	MOVW	$(~0x3f), R8
+	AND	R1, R8			/* R8 = first address, rounded down */
+	ADDU	$0x3f, R9
+	AND	$(~0x3f), R9		/* round last address up */
+	SUBU	R8, R9			/* R9 = revised count */
+dcflush1:			/* primary cache line size is 16 bytes */
+	CACHE	PD+HWB, 0x00(R8)
+	CACHE	PD+HWB, 0x10(R8)
+	CACHE	PD+HWB, 0x20(R8)
+	CACHE	PD+HWB, 0x30(R8)
+	SUBU	$0x40, R9
+	BGTZ	R9, dcflush1
+	ADDU	$0x40, R8			/* delay slot */
+	SYNC
+	EHB
+	MOVW	R10, M(STATUS)
+	RETURN
+
+TEXT	outl(SB), $0
+	MOVW	4(FP), R2
+	MOVW	8(FP), R3
+	SLL	$2, R3
+	ADDU	R2, R3
+outl1:
+	BEQ	R2, R3, outl2
+	MOVW	(R2), R4
+	MOVW	R4, (R1)
+	JMP	outl1
+	ADDU	$4, R2
+outl2:
+	RETURN
+
+/*
+ * access to CP0 registers
+ */
+
+TEXT	prid(SB), $0
+	MOVW	M(PRID), R1
+	RETURN
+
+TEXT	rdcount(SB), $0
+	MOVW	M(COUNT), R1
+	RETURN
+
+TEXT	wrcount(SB), $0
+	MOVW	R1, M(COUNT)
+	RETURN
+
+TEXT	wrcompare(SB), $0
+	MOVW	R1, M(COMPARE)
+	RETURN
+
+TEXT	rdcompare(SB), $0
+	MOVW	M(COMPARE), R1
+	RETURN
+
+TEXT	getstatus(SB), $0
+	MOVW	M(STATUS), R1
+	RETURN
+
+TEXT	setstatus(SB), $0
+	MOVW	R1, M(STATUS)
+	EHB
+	RETURN
+
+TEXT	getcause(SB), $-4
+	MOVW	M(CAUSE), R1
+	RETURN
+
+TEXT	getconfig(SB), $-4
+	MOVW	M(CONFIG), R1
+	RETURN
+
+TEXT	getconfig1(SB), $-4
+	MFC0(CONFIG, 1, 1)
+	RETURN
+
+TEXT	getconfig2(SB), $-4
+	MFC0(CONFIG, 2, 1)
+	RETURN
+
+TEXT	getconfig3(SB), $-4
+	MFC0(CONFIG, 3, 1)
+	RETURN
+
+TEXT	getconfig4(SB), $-4
+	MFC0(CONFIG, 4, 1)
+	RETURN
+
+TEXT	getconfig7(SB), $-4
+	MFC0(CONFIG, 7, 1)
+	RETURN
+
+TEXT	gethwreg3(SB), $-4
+	RDHWR(3, 1)
+	RETURN
+
+TEXT	getdebugreg(SB), $0
+	MOVW	M(DEBUGREG), R1
+	RETURN
+
+TEXT	setwatchhi0(SB), $0
+	MOVW	R1, M(WATCHHI)
+	EHB
+	RETURN
+
+/*
+ * beware that the register takes a double-word address, so it's not
+ * precise to the individual instruction.
+ */
+TEXT	setwatchlo0(SB), $0
+	MOVW	R1, M(WATCHLO)
+	EHB
+	RETURN
+
+TEXT	getfcr0(SB), $0
+	MOVW	FCR0, R1
+	RET
+
+/* zoot is just for debug waves */
+TEXT	zoot(SB), $0
+	PUTC('W', R1, R2)
+	NOP
+	RETURN
+
+	GLOBL	sanity(SB), $4
+	DATA	sanity(SB)/4, $SANITY
+
+	SCHED
--- /dev/null
+++ b/sys/src/9/mt7688/main.c
@@ -1,0 +1,317 @@
+#include	"u.h"
+#include	"tos.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+#include	<pool.h>
+#include	"../ip/ip.h"
+#include	<../port/error.h>
+
+
+FPsave initfp;
+
+/*
+ * software tlb simulation
+ */
+static Softtlb stlb[MAXMACH][STLBSIZE];
+
+Conf	conf;
+Mach* machaddr[MAXMACH];
+
+int normalprint;
+
+static void
+checkclock0(void)
+{
+	print("count=%luX compare=%luX %d\n", rdcount(), rdcompare(), m->speed);
+	delay(20);
+}
+
+
+static void
+checkconf0(void)
+{
+	iprint("frc0 check = %uX \n", getfcr0);
+// for debug stuff
+}
+
+static void
+prcpuid(void)
+{
+	ulong cpuid, cfg1;
+	char *cpu;
+
+	cpuid = prid();
+	if (((cpuid>>16) & MASK(8)) == 0)		/* vendor */
+		cpu = "old mips";
+	else if (((cpuid>>16) & MASK(8)) == 1)
+		switch ((cpuid>>8) & MASK(8)) {		/* processor */
+		case 0x93:
+			cpu = "mips 24k";
+			break;
+		case 0x96:
+			cpu = "mips 24KEc";
+			break;
+		default:
+			cpu = "mips";
+			break;
+		}
+	else
+		cpu = "other mips";
+	delay(20);
+	print("cpu%d: %ldMHz %s %s v%ld %ld rev %ld, ",
+		m->machno, m->hz / Mhz, cpu, getconfig() & (1<<15)? "b": "l",
+		(cpuid>>5) & MASK(3), (cpuid>>2) & MASK(3), cpuid & MASK(2));
+	delay(200);
+	cfg1 = getconfig1();
+	print("%s fpu\n", (cfg1 & 1? "has": "no"));
+	print("cpu%d: %ld tlb entries, using %dK pages\n", m->machno,
+		((cfg1>>25) & MASK(6)) + 1, BY2PG/1024);
+	delay(50);
+	print("cpu%d: l1 i cache: %d sets 4 ways 32 bytes/line\n", m->machno,
+		64 << ((cfg1>>22) & MASK(3)));
+	delay(50);
+	print("cpu%d: l1 d cache: %d sets 4 ways 32 bytes/line\n", m->machno,
+		64 << ((cfg1>>13) & MASK(3)));
+	delay(500);
+/* i changed this if from 0 to 1 */
+	if (1) 
+		print("cpu%d: cycle counter res = %ld\n",
+			m->machno, gethwreg3());
+}
+
+
+static void
+fmtinit(void)
+{
+	printinit();
+
+}
+
+static int
+ckpagemask(ulong mask, ulong size)
+{
+	int s;
+	ulong pm;
+
+	s = splhi();
+	setpagemask(mask);
+	pm = getpagemask();
+	splx(s);
+	if(pm != mask){
+		iprint("page size %ldK not supported on this cpu; "
+			"mask %#lux read back as %#lux\n", size/1024, mask, pm);
+		return -1;
+	}
+	return 0;
+}
+
+
+void
+main(void)
+{
+	savefpregs(&initfp);
+
+	uartinit();
+	quotefmtinstall();
+
+	confinit();
+	machinit();			/* calls clockinit */
+	active.exiting = 0;
+	active.machs[0] = 1;
+	normalprint = 1;
+
+	kmapinit();
+	xinit();
+	timersinit();
+	plan9iniinit();
+	intrinit();
+
+
+	iprint("\nPlan 9 \n");
+	prcpuid();
+	delay(50);
+	checkclock0();
+	print("(m)status %lub\n", getstatus());
+
+	ckpagemask(PGSZ, BY2PG);
+	if (PTECACHED == PTENONCOHERWT)
+		print("caches configured as write-through\n");
+	tlbinit();
+	pageinit();
+	delay(50);
+
+	printinit();	/* what does this do? */
+	procinit0();
+	initseg();
+	links();
+	chandevreset();
+	userinit();
+
+	schedinit();
+
+	panic("schedinit returned");
+}
+
+/*
+ *  initialize a processor's mach structure.  each processor does this
+ *  for itself.
+ */
+void
+machinit(void)
+{
+	extern void gevector(void);	/* l.s */
+	extern void utlbmiss(void);
+	extern void vector0(void);
+	extern void vector180(void);
+
+	void **sbp = (void*)SPBADDR;
+
+	MACHP(0) = (Mach*)MACHADDR;
+
+	memset(m, 0, sizeof(Mach));
+	m->machno = 0;
+	machaddr[m->machno] = m;
+
+	/*
+	 *  set up CPU's mach structure
+	 *  cpu0's was zeroed in l.s and our stack is in Mach, so don't zero it.
+	 */
+	m->speed = 580;			/* initial guess at MHz */
+	m->hz = m->speed * Mhz;
+	conf.nmach = 1;
+
+
+	m->stb = stlb[m->machno];
+	m->ticks = 1;
+	m->perf.period = 1;
+
+
+	/* install exception handlers */
+	sbp[0x18/4] = utlbmiss;
+	sbp[0x14/4] = gevector;
+
+	/* we could install our own vectors directly, but we'll try to play nice */
+	if(1){
+		memmove((void*)(KSEG0+0x0), (void*)vector0, 0x80);
+		memmove((void*)(KSEG0+0x180), (void*)vector180, 0x80);
+		icflush((void*)(KSEG0+0x0), 0x80);
+		icflush((void*)(KSEG0+0x180), 0x80);
+	}
+
+	setstatus(getstatus() & ~BEV);
+
+	up = nil;
+
+	/* Ensure CU1 is off */
+	clrfpintr();
+	clockinit();
+}
+
+void
+init0(void)
+{
+	char buf[128], **sp;
+
+	i8250console();
+
+	chandevinit();
+
+	if(!waserror()){
+		ksetenv("cputype", "spim", 0);
+		snprint(buf, sizeof buf, "mips %s", conffile);
+		ksetenv("terminal", buf, 0);
+		if(cpuserver)
+			ksetenv("service", "cpu", 0);
+		else
+			ksetenv("service", "terminal", 0);
+
+		setconfenv();
+
+		poperror();
+	}
+
+	checkconf0();
+
+	kproc("alarm", alarmkproc, 0);
+
+	sp = (char**)(USTKTOP-sizeof(Tos) - 8 - sizeof(sp[0])*4);
+	sp[3] = sp[2] = sp[1] = nil;
+	strcpy(sp[0] = (char*)&sp[4], "boot");
+
+	touser(sp);
+}
+
+void
+exit(int)
+{
+	iprint("main exit called\n");
+	delay(50);
+	cpushutdown();
+	splhi();
+}
+
+void
+reboot(void *, void *, ulong)
+{
+}
+
+
+void
+confinit(void)
+{
+	ulong kpages, ktop;
+
+	/*
+	 *  divide memory twixt user pages and kernel.
+	 */
+	conf.mem[0].base = ktop = PADDR(PGROUND((ulong)end));
+	/* fixed memory on routerboard */
+	conf.mem[0].npage = MEMSIZE/BY2PG - ktop/BY2PG;
+	conf.npage = conf.mem[0].npage;
+
+	kpages = conf.npage - (conf.npage*80)/100;
+	if(kpages > (64*MB + conf.npage*sizeof(Page))/BY2PG){
+		kpages = (64*MB + conf.npage*sizeof(Page))/BY2PG;
+		kpages += (conf.nproc*KSTACK)/BY2PG;
+	}
+	conf.upages = conf.npage - kpages;
+	conf.ialloc = (kpages/2)*BY2PG;
+
+	kpages *= BY2PG;
+	kpages -= conf.upages*sizeof(Page)
+		+ conf.nproc*sizeof(Proc)
+		+ conf.nimage*sizeof(Image)
+		+ conf.nswap
+		+ conf.nswppo*sizeof(Page);
+	mainmem->maxsize = kpages;
+//	mainmem->flags |= POOL_PARANOIA;
+
+
+	/* set up other configuration parameters */
+	conf.nproc = 2000;
+	conf.nswap = 262144;
+	conf.nswppo = 4096;
+	conf.nimage = 200;
+
+	conf.copymode = 0;		/* copy on write */
+
+
+}
+
+void
+setupwatchpts(Proc *, Watchpt *, int n)
+{
+	if(n > 0)
+		error("no watchpoints");
+}
+
+int
+isaconfig(char *, int, ISAConf*)
+{
+	return 0;
+}
+
+
--- /dev/null
+++ b/sys/src/9/mt7688/mem.h
@@ -1,0 +1,281 @@
+/*
+ * Memory and machine-specific definitions.  Used in C and assembler.
+ */
+
+#define MIN(a, b)	((a) < (b)? (a): (b))
+#define MAX(a, b)	((a) > (b)? (a): (b))
+
+#define PHYSCONS	(KSEG1|0x10000C00)
+
+#define	CONFADDR	0x80010000
+
+#define MEMSIZE	(128*MB)
+
+/*
+ * Sizes
+ */
+
+#define	BI2BY		8			/* bits per byte */
+#define	BI2WD		32			/* bits per word */
+#define	BY2WD		4			/* bytes per word */
+#define	BY2V		8			/* bytes per vlong */
+
+#define	ROUND(s, sz)	(((s)+((sz)-1))&~((sz)-1))
+#define	PGROUND(s)	ROUND(s, BY2PG)
+
+#define MAXBY2PG (16*1024) /* rounding for UTZERO in executables; see mkfile */
+#define UTROUND(t)	ROUNDUP((t), MAXBY2PG)
+
+#ifndef BIGPAGES
+#define	BY2PG		4096			/* bytes per page */
+#define	PGSHIFT		12			/* log2(BY2PG) */
+#define	PGSZ		PGSZ4K
+#else
+/* 16K pages work very poorly */
+#define	BY2PG		(16*1024)		/* bytes per page */
+#define	PGSHIFT		14			/* log2(BY2PG) */
+#define PGSZ		PGSZ16K
+#endif
+
+#define	KSTACK		(8*1024)		/* Size of kernel stack */
+#define MACHSIZE	(BY2PG+KSTACK)
+//#define MACHSIZE	(2*BY2PG)
+#define	WD2PG		(BY2PG/BY2WD)		/* words per page */
+
+#define	MAXMACH		1   /* max # cpus system can run; see active.machs */
+#define STACKALIGN(sp)	((sp) & ~7)		/* bug: assure with alloc */
+#define	BLOCKALIGN	16
+#define CACHELINESZ	32			/* mips24k */
+#define ICACHESIZE	(64*1024)		/* rb450g */
+#define DCACHESIZE	(32*1024)		/* rb450g */
+
+#define MASK(w)		FMASK(0, w)
+
+/*
+ * Time
+ */
+#define	HZ		100			/* clock frequency */
+#define	MS2HZ		(1000/HZ)		/* millisec per clock tick */
+#define	TK2SEC(t)	((t)/HZ)		/* ticks to seconds */
+
+/*
+ * CP0 registers
+ */
+
+#define INDEX		0
+#define RANDOM		1
+#define TLBPHYS0	2	/* aka ENTRYLO0 */
+#define TLBPHYS1	3	/* aka ENTRYLO1 */
+#define CONTEXT		4
+#define PAGEMASK	5
+#define WIRED		6
+#define BADVADDR	8
+#define COUNT		9
+#define TLBVIRT		10	/* aka ENTRYHI */
+#define COMPARE		11
+#define STATUS		12
+#define CAUSE		13
+#define EPC			14
+#define	PRID		15
+#define	CONFIG		16
+#define	LLADDR		17
+#define	WATCHLO		18
+#define	WATCHHI		19
+#define DEBUGREG	23
+#define DEPC		24
+#define PERFCOUNT	25
+#define	CACHEECC	26
+#define	CACHEERR	27
+#define	TAGLO		28
+#define	TAGHI		29
+#define	ERROREPC	30
+#define DESAVE		31
+
+/*
+ * M(STATUS) bits
+ */
+#define KMODEMASK	0x0000001f
+#define IE			0x00000001	/* master interrupt enable */
+#define EXL			0x00000002	/* exception level */
+#define ERL			0x00000004	/* error level */
+#define KSUPER		0x00000008
+#define KUSER		0x00000010
+#define KSU			0x00000018
+#define UX			0x00000020
+#define SX			0x00000040
+#define KX			0x00000080
+#define INTMASK		0x0000ff00
+#define SW0			0x00000100
+#define SW1			0x00000200
+#define INTR0		0x00000100	/* interrupt enable bits */
+#define INTR1		0x00000200
+#define INTR2		0x00000400
+#define INTR3		0x00000800
+#define INTR4		0x00001000
+#define INTR5		0x00002000
+#define INTR6		0x00004000
+#define INTR7		0x00008000
+#define DE			0x00010000
+#define TS			0x00200000	/* tlb shutdown; on 24k at least */
+#define BEV			0x00400000	/* bootstrap exception vectors */
+#define RE			0x02000000	/* reverse-endian in user mode */
+#define FR			0x04000000	/* enable 32 FP regs */
+#define CU0			0x10000000
+#define CU1			0x20000000	/* FPU enable */
+
+/*
+ * M(CONFIG) bits
+ */
+
+#define CFG_K0		7	/* kseg0 cachability */
+#define CFG_MM		(1<<18)	/* write-through merging enabled */
+
+/*
+ * M(CAUSE) bits
+ */
+
+#define BD		(1<<31)	/* last excep'n occurred in branch delay slot */
+
+/*
+ * Exception codes
+ */
+#define	EXCMASK	0x1f		/* mask of all causes */
+#define	CINT	 0		/* external interrupt */
+#define	CTLBM	 1		/* TLB modification: store to unwritable page */
+#define	CTLBL	 2		/* TLB miss (load or fetch) */
+#define	CTLBS	 3		/* TLB miss (store) */
+#define	CADREL	 4		/* address error (load or fetch) */
+#define	CADRES	 5		/* address error (store) */
+#define	CBUSI	 6		/* bus error (fetch) */
+#define	CBUSD	 7		/* bus error (data load or store) */
+#define	CSYS	 8		/* system call */
+#define	CBRK	 9		/* breakpoint */
+#define	CRES	10		/* reserved instruction */
+#define	CCPU	11		/* coprocessor unusable */
+#define	COVF	12		/* arithmetic overflow */
+#define	CTRAP	13		/* trap */
+#define	CVCEI	14		/* virtual coherence exception (instruction) */
+#define	CFPE	15		/* floating point exception */
+#define CTLBRI	19		/* tlb read-inhibit */
+#define CTLBXI	20		/* tlb execute-inhibit */
+#define	CWATCH	23		/* watch exception */
+#define CMCHK	24		/* machine checkcore */
+#define CCACHERR 30		/* cache error */
+#define	CVCED	31		/* virtual coherence exception (data) */
+
+/*
+ * M(CACHEECC) a.k.a. ErrCtl bits
+ */
+#define PE	(1<<31)
+#define LBE	(1<<25)
+#define WABE	(1<<24)
+
+/*
+ * Trap vectors
+ */
+
+#define	UTLBMISS	(KSEG0+0x000)
+#define	XEXCEPTION	(KSEG0+0x080)
+#define	CACHETRAP	(KSEG0+0x100)
+#define	EXCEPTION	(KSEG0+0x180)
+
+/*
+ * Magic registers
+ */
+
+#define	USER		24		/* R24 is up-> */
+#define	MACH		25		/* R25 is m-> */
+
+#define UREGSIZE	0xA0		/* sizeof(Ureg)+8 */
+
+/*
+ * MMU
+ */
+#define	PGSZ4K		(0x00<<13)
+#define PGSZ16K		(0x03<<13)	/* on 24k */
+#define	PGSZ64K		(0x0F<<13)
+#define	PGSZ256K	(0x3F<<13)
+#define	PGSZ1M		(0xFF<<13)
+#define	PGSZ4M		(0x3FF<<13)
+#define PGSZ8M		(0x7FF<<13)	/* not on 24k */
+#define	PGSZ16M		(0xFFF<<13)
+#define PGSZ64M		(0x3FFF<<13)	/* on 24k */
+#define PGSZ256M	(0xFFFF<<13)	/* on 24k */
+
+/* mips address spaces, tlb-mapped unless marked otherwise */
+#define	KUSEG	0x00000000	/* user process */
+#define KSEG0	0x80000000	/* kernel (direct mapped, cached) */
+#define KSEG1	0xA0000000	/* kernel (direct mapped, uncached: i/o) */
+#define	KSEG2	0xC0000000	/* kernel, was used for TSTKTOP */
+#define	KSEG3	0xE0000000	/* kernel, used by kmap */
+#define	KSEGM	0xE0000000	/* mask to check which seg */
+
+/*
+ * Fundamental addresses
+ */
+
+#define	REBOOTADDR	KADDR(0x1000)	/* just above vectors */
+#define	MACHADDR	0x80005000
+#define	KMAPADDR	0xE0000000	/* kmap'd addresses */
+#define SPBADDR		0x80001000
+
+#define PIDXSHFT	12
+#ifndef BIGPAGES
+#define NCOLOR		8
+#define PIDX		((NCOLOR-1)<<PIDXSHFT)
+#define getpgcolor(a)	(((ulong)(a)>>PIDXSHFT) % NCOLOR)
+#else
+/* no cache aliases are possible with pages of 16K or larger */
+#define NCOLOR		1
+#define PIDX		0
+#define getpgcolor(a)	0
+#endif
+#define KMAPSHIFT	15
+
+#define	PTEGLOBL	(1<<0)
+#define	PTEVALID	(1<<1)
+#define	PTEWRITE	(1<<2)
+#define PTERONLY	0
+#define PTEALGMASK	(7<<3)
+#define PTENONCOHERWT	(0<<3)		/* cached, write-through (slower) */
+#define PTEUNCACHED	(2<<3)
+#define PTENONCOHERWB	(3<<3)		/* cached, write-back */
+#define PTEUNCACHEDACC	(7<<3)
+/* rest are reserved on 24k */
+#define PTECOHERXCL	(4<<3)
+#define PTECOHERXCLW	(5<<3)
+#define PTECOHERUPDW	(6<<3)
+
+/* how much faster is it? mflops goes from about .206 (WT) to .37 (WB) */
+#define PTECACHED PTENONCOHERWT	/* 24k erratum 48 disallows WB */
+//#define PTECACHED	PTENONCOHERWB
+
+#define	PTEPID(n)	(n)
+#define PTEMAPMEM	(1024*1024)
+#define	PTEPERTAB	(PTEMAPMEM/BY2PG)
+#define SEGMAPSIZE	512
+#define SSEGMAPSIZE	16
+
+#define STLBLOG		15
+#define STLBSIZE	(1<<STLBLOG)	/* entries in the soft TLB */
+/* page # bits that don't fit in STLBLOG bits */
+#define HIPFNBITS	(BI2WD - (PGSHIFT+1) - STLBLOG)
+#define KPTELOG		8
+#define KPTESIZE	(1<<KPTELOG)	/* entries in the kfault soft TLB */
+
+#define TLBPID(n) ((n)&0xFF)
+#define	NTLBPID	256		/* # of pids (affects size of Mach) */
+#define	NTLB	32		/* # of entries (mips 24k) */
+#define TLBOFF	1		/* first tlb entry (0 used within mmuswitch) */
+#define NKTLB	2		/* # of initial kfault tlb entries */
+#define TLBROFF	(TLBOFF+NKTLB)	/* first large IO window tlb entry */
+
+/*
+ * Address spaces
+ */
+#define	UZERO	KUSEG			/* base of user address space */
+#define	UTZERO	(UZERO+MAXBY2PG)	/* 1st user text address; see mkfile */
+#define	USTKTOP	(KZERO-BY2PG)		/* byte just beyond user stack */
+#define	USTKSIZE (8*1024*1024)		/* size of user stack */
+#define	KZERO	KSEG0			/* base of kernel address space */
+#define	KTZERO	(KZERO+0x20000)	/* first address in kernel text */
--- /dev/null
+++ b/sys/src/9/mt7688/mips24k.s
@@ -1,0 +1,65 @@
+
+#define	SP	R29
+
+#define NOOP		NOR R0, R0, R0
+#define NOP			NOR R0, R0, R0
+#define RETURN		RET; NOOP
+#define CONST(i, v)	MOVW $((i) & 0xffff0000), v; OR $((i) & 0xffff), v;
+#define GETMACH(r)	CONST(MACHADDR, r)
+
+#define PUTC(c, r1, r2)	CONST(PHYSCONS, r1); MOVW $(c), r2; MOVW r2, (r1); NOOP
+
+
+/* new instructions in mips 24k (mips32r2) */
+#define DI(rt)	WORD $(0x41606000|((rt)<<16))	/* interrupts off */
+#define EI(rt)	WORD $(0x41606020|((rt)<<16))	/* interrupts on */
+#define EHB	WORD $0xc0
+
+#define SYNC	WORD $0xf			/* all sync barriers */
+#define WAIT	WORD $0x42000020		/* wait for interrupt */
+
+/* all barriers, clears all hazards; clobbers r/Reg and R22 */
+#define BARRIERS(r, Reg, label) \
+	SYNC; EHB; MOVW $ret(SB), Reg; JALRHB(r)
+
+/* same but return to KSEG1 */
+#define UBARRIERS(r, Reg, label) \
+	SYNC; EHB; MOVW $ret(SB), Reg; OR $KSEG1, Reg; JALRHB(r)
+
+/* jalr with hazard barrier, link in R22 */
+#define JALRHB(r) WORD $(((r)<<21)|(22<<11)|(1<<10)|9); NOOP
+
+/* jump register with hazard barrier */
+#define JRHB(r)	WORD $(((r)<<21)|(1<<10)|8); NOOP
+
+/*
+ *  R4000 instructions
+ */
+
+/* a mips 24k erratum requires a NOP after; experience dictates EHB before */
+#define	ERET	EHB; WORD $0x42000018; NOP
+//#define	ERET		WORD	$0x42000018
+#define	LL(base, rt)	WORD	$((060<<26)|((base)<<21)|((rt)<<16))
+#define	SC(base, rt)	WORD	$((070<<26)|((base)<<21)|((rt)<<16))
+
+#define MFC0(src,sel,dst) WORD $(0x40000000|((src)<<11)|((dst)<<16)|(sel))
+#define MTC0(src,dst,sel) WORD $(0x40800000|((dst)<<11)|((src)<<16)|(sel))
+#define RDHWR(hwr, r)	WORD $(0x7c00003b|((hwr)<<11)|((r)<<16))
+
+/*
+ *  cache manipulation
+ */
+#define	CACHE	BREAK		/* overloaded op-code */
+
+#define	PI	R((0		/* primary I cache */
+#define	PD	R((1		/* primary D cache */
+#define	SD	R((3		/* secondary combined I/D cache */
+
+#define	IWBI	(0<<2)))	/* index write-back invalidate */
+#define	ILT	(1<<2)))	/* index load tag */
+#define	IST	(2<<2)))	/* index store tag */
+#define	CDE	(3<<2)))	/* create dirty exclusive */
+#define	HINV	(4<<2)))	/* hit invalidate */
+#define	HWBI	(5<<2)))	/* hit write back invalidate */
+#define	HWB	(6<<2)))	/* hit write back */
+#define	HSV	(7<<2)))	/* hit set virtual */
--- /dev/null
+++ b/sys/src/9/mt7688/mkfile
@@ -1,0 +1,94 @@
+CONF=mt7688
+CONFLIST=mt7688
+
+objtype=spim
+</$objtype/mkfile
+p=9
+# must match mem.h
+KTZERO=0x80020000
+UTZERO=0x4020
+
+# CFLAGS=$CFLAGS -DFPEMUDEBUG
+# CFLAGS=$CFLAGS -BIGPAGES
+
+DEVS=`{rc ../port/mkdevlist $CONF}
+
+PORT=\
+	alarm.$O\
+	alloc.$O\
+	allocb.$O\
+	auth.$O\
+	cache.$O\
+	chan.$O\
+	clock.$O\
+	dev.$O\
+	edf.$O\
+	fault.$O\
+	mul64fract.$O\
+	page.$O\
+	parse.$O\
+	pgrp.$O\
+	portclock.$O\
+	print.$O\
+	proc.$O\
+	qio.$O\
+	qlock.$O\
+	rdb.$O\
+	rebootcmd.$O\
+	segment.$O\
+	syscallfmt.$O\
+	sysfile.$O\
+	sysproc.$O\
+	taslock.$O\
+	tod.$O\
+	xalloc.$O\
+	userinit.$O\
+
+OBJ=\
+	l.$O\
+	arch.$O\
+	bootargs.$O\
+	clock.$O\
+	faultmips.$O\
+	irq.$O\
+	main.$O\
+	mmu.$O\
+	random.$O\
+	syscall.$O\
+	trap.$O\
+	$CONF.root.$O\
+	$CONF.rootc.$O\
+	$DEVS\
+	$PORT\
+
+LIB=\
+	/$objtype/lib/libmemlayer.a\
+	/$objtype/lib/libmemdraw.a\
+	/$objtype/lib/libdraw.a\
+	/$objtype/lib/libauth.a\
+	/$objtype/lib/libsec.a\
+	/$objtype/lib/libmp.a\
+	/$objtype/lib/libip.a\
+	/$objtype/lib/libc.a\
+
+$p$CONF:	$OBJ $CONF.$O $LIB
+	$LD -o $target -l -R4 -H6 -T$KTZERO $prereq 
+
+install:V:	$p$CONF
+	cp $p$CONF /$objtype/
+
+<../boot/bootmkfile
+<../port/portmkfile
+<|../port/mkbootrules $CONF
+
+initcode.out: init9.$O initcode.$O /$objtype/lib/libc.a
+	$LD -T$UTZERO -R4 -s -o $target $prereq 
+
+l.$O: mips24k.s
+fpimips.$O arch.$O faultmips.$O mmu.$O syscall.$O trap.$O irq.$O: /$objtype/include/ureg.h
+main.$O:	/$objtype/include/ureg.h errstr.h
+main.$O trap.$O syscall.$O fpimips.$O: /sys/include/tos.h
+fpi.$O fpimips.$O fpimem.$O: fpi.h
+
+%.clean:V:
+	rm -f $stem.c [9bz]$stem [9bz]$stem.gz boot$stem.*
--- /dev/null
+++ b/sys/src/9/mt7688/mmu.c
@@ -1,0 +1,470 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"ureg.h"
+
+/*
+ *  tlb entry 0 is used only by mmuswitch() to set the current tlb pid.
+ *
+ *  It is apparently assumed that user tlb entries are not
+ *  overwritten during start-up, so ...
+ *  During system start-up (before up first becomes non-nil),
+ *  Kmap entries start at tlb index 1 and work their way up until
+ *  kmapinval() removes them.  They then restart at 1.  As long as there
+ *  are few kmap entries they will not pass tlbroff (the WIRED tlb entry
+ *  limit) and interfere with user tlb entries.
+ *  Once start-up is over, we combine the kernel and user tlb pools into one,
+ *  in the hope of making better use of the tlb on systems with small ones.
+ *
+ *  All invalidations of the tlb are via indexed entries.  The virtual
+ *  address used is always 'KZERO | (x<<(PGSHIFT+1) | currentpid' where
+ *  'x' is the index into the tlb.  This ensures that the current pid doesn't
+ *  change and that no two invalidated entries have matching virtual
+ *  addresses just in case SGI/MIPS ever makes a chip that cares (as
+ *  they keep threatening).  These entries should never be used in
+ *  lookups since accesses to KZERO addresses don't go through the tlb
+ *  (actually only true of KSEG0 and KSEG1; KSEG2 and KSEG3 do go
+ *  through the tlb).
+ */
+
+#define TLBINVAL(x, pid) puttlbx(x, KZERO|((x)<<(PGSHIFT+1))|(pid), 0, 0, PGSZ)
+
+enum {
+	Debugswitch	= 0,
+	Debughash	= 0,
+};
+
+static ulong ktime[8];		/* only for first 8 cpus */
+
+void
+tlbinit(void)
+{
+	int i;
+
+	for(i=0; i<NTLB; i++)
+		TLBINVAL(i, 0);
+}
+
+Lock	kmaplock;
+KMap	kpte[KPTESIZE];
+KMap*	kmapfree;
+
+static int minfree = KPTESIZE;
+static int lastfree;
+static int tlbroff = TLBROFF;
+
+static void
+nfree(void)
+{
+	int i;
+	KMap *k;
+
+	i = 0;
+	for(k=kmapfree; k; k=k->next)
+		i++;
+	if(i<minfree){
+		iprint("%d free\n", i);
+		minfree = i;
+	}
+	lastfree = i;
+}
+
+void
+kmapinit(void)
+{
+	KMap *k, *klast;
+
+	lock(&kmaplock);
+	kmapfree = kpte;
+	klast = &kpte[KPTESIZE-1];
+	for(k=kpte; k<klast; k++)
+		k->next = k+1;
+	k->next = 0;
+	unlock(&kmaplock);
+
+	m->ktlbnext = TLBOFF;
+}
+
+void
+kmapdump(void)
+{
+	int i;
+
+	for(i=0; i<KPTESIZE; i++)
+		iprint("%d: %lud pc=%#lux - ", i, kpte[i].ref, kpte[i].pc);
+}
+
+static int
+putktlb(KMap *k)
+{
+	int x;
+	ulong virt;
+	ulong tlbent[3];
+
+	virt = k->virt & ~BY2PG | TLBPID(tlbvirt());
+	x = gettlbp(virt, tlbent);
+	if (!m->paststartup)
+		if (up) {			/* startup just ended? */
+			tlbroff = 1;
+			setwired(tlbroff);	/* share all-but-one entries */
+			m->paststartup = 1;
+		} else if (x < 0) {		/* no such entry? use next */
+			x = m->ktlbnext++;
+			if(m->ktlbnext >= tlbroff)
+				m->ktlbnext = TLBOFF;
+		}
+	if (x < 0)		/* no entry for va? overwrite random one */
+		x = getrandom();
+	puttlbx(x, virt, k->phys0, k->phys1, PGSZ);
+	m->ktlbx[x] = 1;
+	return x;
+}
+
+/*
+ *  Arrange that the KMap'd virtual address will hit the same
+ *  primary cache line as pg->va by making bits 14...12 of the
+ *  tag the same as virtual address.  These bits are the index
+ *  into the primary cache and are checked whenever accessing
+ *  the secondary cache through the primary.  Violation causes
+ *  a VCE trap.
+ */
+KMap *
+kmap(Page *pg)
+{
+	int s, printed = 0;
+	ulong pte, virt;
+	KMap *k;
+
+	s = splhi();
+	lock(&kmaplock);
+
+	if(kmapfree == 0) {
+retry:
+		unlock(&kmaplock);
+		kmapinval();		/* try and free some */
+		lock(&kmaplock);
+		if(kmapfree == 0){
+			unlock(&kmaplock);
+			splx(s);
+			if(printed++ == 0){
+			/* using iprint here we get mixed up with other prints */
+				print("%d KMAP RETRY %#lux ktime %ld %ld %ld %ld %ld %ld %ld %ld\n",
+					m->machno, getcallerpc(&pg),
+					ktime[0], ktime[1], ktime[2], ktime[3],
+					ktime[4], ktime[5], ktime[6], ktime[7]);
+				delay(200);
+			}
+			splhi();
+			lock(&kmaplock);
+			goto retry;
+		}
+	}
+
+	k = kmapfree;
+	kmapfree = k->next;
+
+	k->pg = pg;
+	/*
+	 * One for the allocation,
+	 * One for kactive
+	 */
+	k->pc = getcallerpc(&pg);
+	k->ref = 2;
+	k->konmach[m->machno] = m->kactive;
+	m->kactive = k;
+
+	virt = pg->va;
+	/* bits 14..12 form the secondary-cache virtual index */
+	virt &= PIDX;
+	virt |= KMAPADDR | ((k-kpte)<<KMAPSHIFT);
+
+	k->virt = virt;
+	pte = PPN(pg->pa)|PTECACHED|PTEGLOBL|PTEWRITE|PTEVALID;
+	if(virt & BY2PG) {
+		k->phys0 = PTEGLOBL | PTECACHED;
+		k->phys1 = pte;
+	}
+	else {
+		k->phys0 = pte;
+		k->phys1 = PTEGLOBL | PTECACHED;
+	}
+
+	putktlb(k);
+	unlock(&kmaplock);
+
+	splx(s);
+	return k;
+}
+
+void
+kunmap(KMap *k)
+{
+	int s;
+
+	s = splhi();
+	if(decref(k) == 0) {
+		k->virt = 0;
+		k->phys0 = 0;
+		k->phys1 = 0;
+		k->pg = 0;
+
+		lock(&kmaplock);
+		k->next = kmapfree;
+		kmapfree = k;
+//		nfree();
+		unlock(&kmaplock);
+	}
+	splx(s);
+}
+
+void
+kfault(Ureg *ur)			/* called from trap() */
+{
+	ulong index, addr;
+	KMap *k, *f;
+
+	addr = ur->badvaddr;
+	index = (addr & ~KSEGM) >> KMAPSHIFT;
+	if(index >= KPTESIZE)
+		panic("kmapfault: va=%#lux", addr);
+
+	k = &kpte[index];
+	if(k->virt == 0)
+		panic("kmapfault: unmapped %#lux", addr);
+
+	for(f = m->kactive; f; f = f->konmach[m->machno])
+		if(f == k)
+			break;
+	if(f == 0) {
+		incref(k);
+		k->konmach[m->machno] = m->kactive;
+		m->kactive = k;
+	}
+	putktlb(k);
+}
+
+void
+kmapinval(void)
+{
+	int mno, i, curpid;
+	KMap *k, *next;
+	uchar *ktlbx;
+
+	if(m->machno < nelem(ktime))
+		ktime[m->machno] = MACHP(0)->ticks;
+	if(m->kactive == 0)
+		return;
+
+	curpid = PTEPID(TLBPID(tlbvirt()));
+	ktlbx = m->ktlbx;
+	for(i = 0; i < NTLB; i++, ktlbx++){
+		if(*ktlbx == 0)
+			continue;
+		TLBINVAL(i, curpid);
+		*ktlbx = 0;
+	}
+
+	mno = m->machno;
+	for(k = m->kactive; k; k = next) {
+		next = k->konmach[mno];
+		kunmap(k);
+	}
+
+	m->kactive = 0;
+	m->ktlbnext = TLBOFF;
+}
+
+/*
+ * Process must be splhi
+ */
+static int
+newtlbpid(Proc *p)
+{
+	int i, s;
+	Proc **h;
+
+	i = m->lastpid;
+	h = m->pidproc;
+	for(s = 0; s < NTLBPID; s++) {
+		i++;
+		if(i >= NTLBPID)
+			i = 1;
+		if(h[i] == 0)
+			break;
+	}
+
+	if(h[i])
+		purgetlb(i);
+	if(h[i] != 0)
+		panic("newtlb");
+
+	m->pidproc[i] = p;
+	p->pidonmach[m->machno] = i;
+	m->lastpid = i;
+
+	return i;
+}
+
+void
+mmuswitch(Proc *p)
+{
+	int tp;
+	static char lasttext[32];
+
+	if(Debugswitch && !p->kp){
+		if(strncmp(lasttext, p->text, sizeof lasttext) != 0)
+			iprint("[%s]", p->text);
+		strncpy(lasttext, p->text, sizeof lasttext);
+	}
+
+	if(p->newtlb) {
+		memset(p->pidonmach, 0, sizeof p->pidonmach);
+		p->newtlb = 0;
+	}
+	tp = p->pidonmach[m->machno];
+	if(tp == 0)
+		tp = newtlbpid(p);
+	puttlbx(0, KZERO|PTEPID(tp), 0, 0, PGSZ);
+}
+
+void
+mmurelease(Proc *p)
+{
+	memset(p->pidonmach, 0, sizeof p->pidonmach);
+}
+
+
+/* tlbvirt also has TLBPID() in its low byte as the asid */
+static Softtlb*
+putstlb(ulong tlbvirt, ulong tlbphys)
+{
+	int odd;
+	Softtlb *entry;
+
+	/* identical calculation in l.s/utlbmiss */
+	entry = &m->stb[stlbhash(tlbvirt)];
+	odd = tlbvirt & BY2PG;		/* even/odd bit */
+	tlbvirt &= ~BY2PG;		/* zero even/odd bit */
+	if(entry->virt != tlbvirt) {	/* not my entry? overwrite it */
+		if(entry->virt != 0) {
+			m->hashcoll++;
+			if (Debughash)
+				iprint("putstlb: hash collision: %#lx old virt "
+					"%#lux new virt %#lux page %#lux\n",
+					entry - m->stb, entry->virt, tlbvirt,
+					tlbvirt >> (PGSHIFT+1));
+		}
+		entry->virt = tlbvirt;
+		entry->phys0 = 0;
+		entry->phys1 = 0;
+	}
+
+	if(odd)
+		entry->phys1 = tlbphys;
+	else
+		entry->phys0 = tlbphys;
+
+	if(entry->phys0 == 0 && entry->phys1 == 0)
+		entry->virt = 0;
+
+	return entry;
+}
+
+void
+putmmu(ulong tlbvirt, ulong tlbphys, Page *pg)
+{
+	short tp;
+	ulong tlbent[3];
+	Softtlb *entry;
+	int s, x;
+
+	s = splhi();
+	tp = up->pidonmach[m->machno];
+	if(tp == 0)
+		tp = newtlbpid(up);
+
+	tlbvirt |= PTEPID(tp);
+	entry = putstlb(tlbvirt, tlbphys);
+	x = gettlbp(tlbvirt, tlbent);
+	if(x < 0) x = getrandom();
+	puttlbx(x, entry->virt, entry->phys0, entry->phys1, PGSZ);
+	if(needtxtflush(pg)){
+		icflush((void*)pg->va, BY2PG);
+		donetxtflush(pg);
+	}
+	splx(s);
+}
+
+void
+purgetlb(int pid)
+{
+	int i, mno;
+	Proc *sp, **pidproc;
+	Softtlb *entry, *etab;
+
+	m->tlbpurge++;
+
+	/*
+	 * find all pid entries that are no longer used by processes
+	 */
+	mno = m->machno;
+	pidproc = m->pidproc;
+	for(i=1; i<NTLBPID; i++) {
+		sp = pidproc[i];
+		if(sp && sp->pidonmach[mno] != i)
+			pidproc[i] = 0;
+	}
+
+	/*
+	 * shoot down the one we want
+	 */
+	sp = pidproc[pid];
+	if(sp != 0)
+		sp->pidonmach[mno] = 0;
+	pidproc[pid] = 0;
+
+	/*
+	 * clean out all dead pids from the stlb;
+	 */
+	entry = m->stb;
+	for(etab = &entry[STLBSIZE]; entry < etab; entry++)
+		if(pidproc[TLBPID(entry->virt)] == 0)
+			entry->virt = 0;
+
+	/*
+	 * clean up the hardware
+	 */
+	for(i=tlbroff; i<NTLB; i++)
+		if(pidproc[TLBPID(gettlbvirt(i))] == 0)
+			TLBINVAL(i, pid);
+}
+
+void
+flushmmu(void)
+{
+	int s;
+
+	s = splhi();
+	up->newtlb = 1;
+	mmuswitch(up);
+	splx(s);
+}
+
+void
+checkmmu(ulong, ulong)
+{
+}
+
+/*
+ * Return the number of bytes that can be accessed via KADDR(pa).
+ * If pa is not a valid argument to KADDR, return 0.
+ */
+ulong
+cankaddr(ulong pa)
+{
+	if(pa >= KZERO)
+		return 0;
+	return -KZERO - pa;
+}
--- /dev/null
+++ b/sys/src/9/mt7688/mt7688
@@ -1,0 +1,57 @@
+# Onion Omega 2  mt7688
+dev
+	root
+	cons
+	swap
+	env
+	pipe
+	proc
+	mnt
+	srv
+	shr
+	dup
+#	arch
+#	draw
+#	rtc
+#	ssl
+	tls
+	cap
+	kprof
+	fs
+	sd
+	ether		netif
+	ip		arp chandial inferno ip ipv6 ipaux iproute netlog nullmedium pktmedium inferno
+	uart
+
+link
+	loopbackmedium
+	ethermedium
+	netdevmedium
+	ether7688	ethermii
+
+misc
+	uarti8250
+# emulated fp
+	fpi
+	fpimips
+	fpimem
+
+ip
+	tcp
+	udp
+	ipifc
+	icmp
+	icmp6
+	gre
+	ipmux
+	esp
+
+port
+	int cpuserver = 0;
+
+bootdir
+	/$objtype/bin/paqfs
+	/$objtype/bin/auth/factotum
+	bootfs.paq
+	boot
+#	nvram
--- /dev/null
+++ b/sys/src/9/mt7688/syscall.c
@@ -1,0 +1,296 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "../port/systab.h"
+
+#include "tos.h"
+#include "ureg.h"
+
+FPsave initfp;
+
+
+/*
+ * called directly from assembler, not via trap()
+ */
+void
+syscall(Ureg* ureg)
+{
+	char *e;
+	u32int s;
+	ulong sp;
+	long ret;
+	int i;
+	vlong startns, stopns;
+	ulong scallnr;
+
+	if(!kenter(ureg))
+		panic("syscall from kernel");
+
+	m->syscall++;
+	up->insyscall = 1;
+	up->pc = ureg->pc;
+
+	scallnr = ureg->r1;
+	up->scallnr = ureg->r1;
+	spllo();
+	sp = ureg->sp;
+
+	up->nerrlab = 0;
+	ret = -1;
+
+	if(!waserror()){
+
+		if(scallnr >= nsyscall){
+			iprint("bad sys call number %lud pc %#lux\n",
+				scallnr, ureg->pc);
+			postnote(up, 1, "sys: bad sys call", NDebug);
+			error(Ebadarg);
+		}
+
+		if(up->procctl == Proc_tracesyscall){
+			iprint("tracesyscall\n");
+			delay(50);
+			syscallfmt(scallnr, ureg->pc, (va_list)up->s.args);
+			s = splhi();
+			up->procctl = Proc_stopme;
+			procctl();
+			splx(s);
+			startns = todget(nil);
+		}
+
+		if(sp < (USTKTOP-BY2PG) || sp > (USTKTOP-sizeof(Sargs)))
+			validaddr(sp, sizeof(Sargs), 0);
+
+		up->s = *((Sargs*)(sp));	/* spim's libc is different to mips ... */
+		up->psstate = sysctab[scallnr];
+
+//		iprint("[%luX] %s: syscall %s\n", (ulong)&ureg, up->text, sysctab[scallnr]?sysctab[scallnr]:"huh?");
+//		delay(20);
+//		dumpregs(ureg);
+
+		ret = systab[scallnr]((va_list)up->s.args);
+		poperror();
+	}else{
+		/* failure: save the error buffer for errstr */
+		e = up->syserrstr;
+		up->syserrstr = up->errstr;
+		up->errstr = e;
+//		iprint("[%lud %s] syscall %lud: %s\n",up->pid, up->text, scallnr, up->errstr);
+	}
+
+	if(up->nerrlab){
+		iprint("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
+		for(i = 0; i < NERR; i++)
+			iprint("sp=%#p pc=%#p\n",
+				up->errlab[i].sp, up->errlab[i].pc);
+		panic("error stack");
+	}
+
+	/*
+	 *  Put return value in frame.  On the x86 the syscall is
+	 *  just another trap and the return value from syscall is
+	 *  ignored.  On other machines the return value is put into
+	 *  the results register by caller of syscall.
+	 */
+	ureg->pc += 4;
+	ureg->r1 = ret;
+
+	if(up->procctl == Proc_tracesyscall){
+		stopns = todget(nil);
+		sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
+		s = splhi();
+		up->procctl = Proc_stopme;
+		procctl();
+		splx(s);
+	}
+
+	up->insyscall = 0;
+	up->psstate = 0;
+
+	if(scallnr == NOTED)
+		noted(ureg, *((ulong*)up->s.args));
+
+	splhi();
+	if(scallnr != RFORK && (up->procctl || up->nnote))
+		notify(ureg);
+
+	/* if we delayed sched because we held a lock, sched now */
+	if(up->delaysched){
+		sched();
+	}
+
+	kexit(ureg);
+
+	/* restore EXL in status */
+	setstatus(getstatus() | EXL);
+
+}
+
+
+int
+notify(Ureg *ur)
+{
+	int s;
+	ulong sp;
+	char *msg;
+
+	if(up->procctl)
+		procctl();
+	if(up->nnote == 0)
+		return 0;
+
+	s = spllo();
+	qlock(&up->debug);
+	up->fpstate |= FPillegal;
+	msg = popnote(ur);
+	if(msg == nil){
+		qunlock(&up->debug);
+		splx(s);
+		return 0;
+	}
+
+
+	sp = ur->usp - sizeof(Ureg) - BY2WD; /* spim libc */
+
+	if(!okaddr((ulong)up->notify, BY2WD, 0) ||
+	   !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)) {
+		iprint("suicide: bad address or sp in notify\n");
+		qunlock(&up->debug);
+		pexit("Suicide", 0);
+	}
+
+	memmove((Ureg*)sp, ur, sizeof(Ureg));	/* push user regs */
+	*(Ureg**)(sp-BY2WD) = up->ureg;	/* word under Ureg is old up->ureg */
+	up->ureg = (void*)sp;
+
+	sp -= BY2WD+ERRMAX;
+	memmove((char*)sp, msg, ERRMAX);	/* push err string */
+
+	sp -= 3*BY2WD;
+	*(ulong*)(sp+2*BY2WD) = sp+3*BY2WD;	/* arg 2 is string */
+	ur->r1 = (long)up->ureg;		/* arg 1 is ureg* */
+	((ulong*)sp)[1] = (ulong)up->ureg;	/* arg 1 0(FP) is ureg* */
+	((ulong*)sp)[0] = 0;			/* arg 0 is pc */
+	ur->usp = sp;
+	/*
+	 * arrange to resume at user's handler as if handler(ureg, errstr)
+	 * were being called.
+	 */
+	ur->pc = (ulong)up->notify;
+
+	qunlock(&up->debug);
+	splx(s);
+	return 1;
+}
+
+
+/*
+ * Return user to state before notify(); called from user's handler.
+ */
+void
+noted(Ureg *kur, ulong arg0)
+{
+	Ureg *nur;
+	ulong oureg, sp;
+
+	qlock(&up->debug);
+	if(arg0!=NRSTR && !up->notified) {
+		qunlock(&up->debug);
+		pprint("call to noted() when not notified\n");
+		pexit("Suicide", 0);
+	}
+	up->notified = 0;
+
+	up->fpstate &= ~FPillegal;
+
+	nur = up->ureg;
+
+	oureg = (ulong)nur;
+	if((oureg & (BY2WD-1)) || !okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
+		pprint("bad up->ureg in noted or call to noted() when not notified\n");
+		qunlock(&up->debug);
+		pexit("Suicide", 0);
+	}
+
+	setregisters(kur, (char*)kur, (char*)up->ureg, sizeof(Ureg));
+	switch(arg0) {
+	case NCONT:
+	case NRSTR:				/* only used by APE */
+		if(!okaddr(nur->pc, BY2WD, 0) || !okaddr(nur->usp, BY2WD, 0)){
+			pprint("suicide: trap in noted\n");
+			qunlock(&up->debug);
+			pexit("Suicide", 0);
+		}
+		up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
+		qunlock(&up->debug);
+		splhi();
+		break;
+
+	case NSAVE:				/* only used by APE */
+		if(!okaddr(nur->pc, BY2WD, 0) || !okaddr(nur->usp, BY2WD, 0)){
+			pprint("suicide: trap in noted\n");
+			qunlock(&up->debug);
+			pexit("Suicide", 0);
+		}
+		qunlock(&up->debug);
+		sp = oureg-4*BY2WD-ERRMAX;
+		splhi();
+		kur->sp = sp;
+		kur->r1 = oureg;		/* arg 1 is ureg* */
+		((ulong*)sp)[1] = oureg;	/* arg 1 0(FP) is ureg* */
+		((ulong*)sp)[0] = 0;		/* arg 0 is pc */
+		break;
+
+	default:
+		pprint("unknown noted arg %#lux\n", arg0);
+		up->lastnote->flag = NDebug;
+		/* fall through */
+
+	case NDFLT:
+		if(up->lastnote->flag == NDebug)
+			pprint("suicide: %s\n", up->lastnote->msg);
+		qunlock(&up->debug);
+		pexit(up->lastnote->msg, up->lastnote->flag!=NDebug);
+	}
+}
+
+
+void
+forkchild(Proc *p, Ureg *ur)
+{
+	Ureg *cur;
+
+//	iprint("%lud setting up for forking child %lud\n", up->pid, p->pid);
+	p->sched.sp = (ulong)p - UREGSIZE;
+	p->sched.pc = (ulong)forkret;
+
+	cur = (Ureg*)(p->sched.sp+2*BY2WD);
+	memmove(cur, ur, sizeof(Ureg));
+
+	cur->r1 = 0;
+	cur->pc += 4;
+}
+
+
+/* set up user registers before return from exec() */
+uintptr
+execregs(ulong entry, ulong ssize, ulong nargs)
+{
+	Ureg *ur;
+	ulong *sp;
+
+	sp = (ulong*)(USTKTOP - ssize);
+	*--sp = nargs;
+
+	ur = (Ureg*)up->dbgreg;
+	ur->usp = (ulong)sp;
+	ur->pc = entry - 4;		/* syscall advances it */
+
+//	iprint("%lud: %s EXECREGS pc %#luX sp %#luX nargs %ld", up->pid, up->text, ur->pc, ur->usp, nargs);
+//	delay(20);
+
+	return USTKTOP-sizeof(Tos);	/* address of kernel/user shared data */
+}
--- /dev/null
+++ b/sys/src/9/mt7688/trap.c
@@ -1,0 +1,381 @@
+/*
+ * traps, exceptions, faults and interrupts on ar7161
+ */
+#include	"u.h"
+#include	"tos.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"ureg.h"
+#include	"io.h"
+#include	"../port/error.h"
+
+
+int	intr(Ureg*);
+void	kernfault(Ureg*, int);
+void	noted(Ureg*, ulong);
+void	rfnote(Ureg**);
+
+char *excname[] =
+{
+	"trap: external interrupt",
+	"trap: TLB modification (store to unwritable)",
+	"trap: TLB miss (load or fetch)",
+	"trap: TLB miss (store)",
+	"trap: address error (load or fetch)",
+	"trap: address error (store)",
+	"trap: bus error (fetch)",
+	"trap: bus error (data load or store)",
+	"trap: system call",
+	"breakpoint",
+	"trap: reserved instruction",
+	"trap: coprocessor unusable",
+	"trap: arithmetic overflow",
+	"trap: TRAP exception",
+	"trap: VCE (instruction)",
+	"trap: floating-point exception",
+	"trap: coprocessor 2 implementation-specific", /* used as sys call for debugger */
+	"trap: corextend unusable",
+	"trap: precise coprocessor 2 exception",
+	"trap: TLB read-inhibit",
+	"trap: TLB execute-inhibit",
+	"trap: undefined 21",
+	"trap: undefined 22",
+	"trap: WATCH exception",
+	"trap: machine checkcore",
+	"trap: undefined 25",
+	"trap: undefined 26",
+	"trap: undefined 27",
+	"trap: undefined 28",
+	"trap: undefined 29",
+	"trap: cache error",
+	"trap: VCE (data)",
+};
+
+char *fpcause[] =
+{
+	"inexact operation",
+	"underflow",
+	"overflow",
+	"division by zero",
+	"invalid operation",
+};
+char	*fpexcname(Ureg*, ulong, char*, uint);
+#define FPEXPMASK	(0x3f<<12)	/* Floating exception bits in fcr31 */
+
+struct {
+	char	*name;
+	uint	off;
+} regname[] = {
+	"STATUS", offsetof(Ureg, status),
+	"PC",	offsetof(Ureg, pc),
+	"SP",	offsetof(Ureg, sp),
+	"CAUSE",offsetof(Ureg, cause),
+	"BADADDR", offsetof(Ureg, badvaddr),
+	"TLBVIRT", offsetof(Ureg, tlbvirt),
+	"HI",	offsetof(Ureg, hi),
+	"LO",	offsetof(Ureg, lo),
+	"R31",	offsetof(Ureg, r31),
+	"R30",	offsetof(Ureg, r30),
+	"R28",	offsetof(Ureg, r28),
+	"R27",	offsetof(Ureg, r27),
+	"R26",	offsetof(Ureg, r26),
+	"R25",	offsetof(Ureg, r25),
+	"R24",	offsetof(Ureg, r24),
+	"R23",	offsetof(Ureg, r23),
+	"R22",	offsetof(Ureg, r22),
+	"R21",	offsetof(Ureg, r21),
+	"R20",	offsetof(Ureg, r20),
+	"R19",	offsetof(Ureg, r19),
+	"R18",	offsetof(Ureg, r18),
+	"R17",	offsetof(Ureg, r17),
+	"R16",	offsetof(Ureg, r16),
+	"R15",	offsetof(Ureg, r15),
+	"R14",	offsetof(Ureg, r14),
+	"R13",	offsetof(Ureg, r13),
+	"R12",	offsetof(Ureg, r12),
+	"R11",	offsetof(Ureg, r11),
+	"R10",	offsetof(Ureg, r10),
+	"R9",	offsetof(Ureg, r9),
+	"R8",	offsetof(Ureg, r8),
+	"R7",	offsetof(Ureg, r7),
+	"R6",	offsetof(Ureg, r6),
+	"R5",	offsetof(Ureg, r5),
+	"R4",	offsetof(Ureg, r4),
+	"R3",	offsetof(Ureg, r3),
+	"R2",	offsetof(Ureg, r2),
+	"R1",	offsetof(Ureg, r1),
+};
+
+
+void
+kvce(Ureg *ur, int ecode)
+{
+	char c;
+	Pte **p;
+	Page **pg;
+	Segment *s;
+	ulong addr, soff;
+
+	c = 'D';
+	if(ecode == CVCEI)
+		c = 'I';
+	print("Trap: VCE%c: addr=%#lux\n", c, ur->badvaddr);
+	if(up && !(ur->badvaddr & KSEGM)) {
+		addr = ur->badvaddr;
+		s = seg(up, addr, 0);
+		if(s == nil){
+			print("kvce: no seg for %#lux\n", addr);
+			for(;;);
+		}
+		addr &= ~(BY2PG-1);
+		soff = addr - s->base;
+		p = &s->map[soff/PTEMAPMEM];
+		if(*p){
+			pg = &(*p)->pages[(soff&(PTEMAPMEM-1))/BY2PG];
+			if(*pg)
+				print("kvce: pa=%#lux, va=%#lux\n",
+					(*pg)->pa, (*pg)->va);
+			else
+				print("kvce: no *pg\n");
+		}else
+			print("kvce: no *p\n");
+	}
+}
+
+void
+trap(Ureg *ur)
+{
+	int ecode, clockintr, user, cop, x, fpchk;
+	ulong fpfcr31;
+	char buf[2*ERRMAX], buf1[ERRMAX], *fpexcep;
+	static int dumps;
+
+	if (up && (char *)(ur) - ((char *)up - KSTACK) < 1024 && dumps++ == 0) {
+		iprint("trap: proc %ld kernel stack getting full\n", up->pid);
+		dumpregs(ur);
+		dumpstack();
+		for(;;);
+	}
+	if (up == nil &&
+	    (char *)(ur) - (char *)m->stack < 1024 && dumps++ == 0) {
+		iprint("trap: cpu%d kernel stack getting full\n", m->machno);
+		dumpregs(ur);
+		dumpstack();
+		for(;;);
+	}
+	user = kenter(ur);
+	if (ur->cause & TS)
+		panic("trap: tlb shutdown");
+	ecode = (ur->cause>>2)&EXCMASK;
+	fpchk = 0;
+	clockintr = 0;
+	switch(ecode){
+	case CINT:
+		clockintr = intr(ur);
+		break;
+
+	case CFPE:
+		panic("FP exception but no FPU");	/* no fpu on 24KEc */
+		break;
+
+	case CTLBM:
+	case CTLBL:
+	case CTLBS:
+		if(up == nil || !user && (ur->badvaddr & KSEGM) == KSEG3) {
+			kfault(ur);
+			break;
+		}
+		x = up->insyscall;
+		up->insyscall = 1;
+		spllo();
+		faultmips(ur, user, ecode);
+		up->insyscall = x;
+		break;
+
+	case CVCEI:
+	case CVCED:
+		kvce(ur, ecode);
+		goto Default;
+
+	case CWATCH:
+		if(!user)
+			panic("watchpoint trap from kernel mode pc=%#p",
+				ur->pc);
+		fpwatch(ur);
+		break;
+
+	case CCPU:
+		cop = (ur->cause>>28)&3;
+		if(user && up && cop == 1) {
+			if(up->fpstate & FPillegal) {
+				/* someone used floating point in a note handler */
+				postnote(up, 1,
+					"sys: floating point in note handler",
+					NDebug);
+				break;
+			}
+			/* no fpu, so we can only emulate fp ins'ns */
+			if (fpuemu(ur) < 0)
+				postnote(up, 1,
+					"sys: fp instruction not emulated",
+					NDebug);
+			else
+				fpchk = 1;
+			break;
+		}
+		/* Fallthrough */
+
+	Default:
+	default:
+		if(user) {
+			spllo();
+			snprint(buf, sizeof buf, "sys: %s", excname[ecode]);
+			postnote(up, 1, buf, NDebug);
+			break;
+		}
+		if (ecode == CADREL || ecode == CADRES)
+			iprint("kernel addr exception for va %#p pid %#ld %s\n",
+				ur->badvaddr, (up? up->pid: 0),
+				(up? up->text: ""));
+		print("cpu%d: kernel %s pc=%#lux\n",
+			m->machno, excname[ecode], ur->pc);
+		dumpregs(ur);
+		dumpstack();
+		if(m->machno == 0)
+			spllo();
+		exit(1);
+	}
+
+	if(fpchk) {
+		fpfcr31 = up->fpsave->fpstatus;
+		if((fpfcr31>>12) & ((fpfcr31>>7)|0x20) & 0x3f) {
+			spllo();
+			fpexcep	= fpexcname(ur, fpfcr31, buf1, sizeof buf1);
+			snprint(buf, sizeof buf, "sys: fp: %s", fpexcep);
+			postnote(up, 1, buf, NDebug);
+		}
+	}
+
+	splhi();
+
+	/* delaysched set because we held a lock or because our quantum ended */
+	if(up && up->delaysched && clockintr){
+		sched();
+		splhi();
+	}
+
+	if(user){
+		notify(ur);
+		/* replicate fpstate to ureg status */
+	//	if(up->fpstate != FPactive)
+	//		ur->status &= ~CU1;
+		kexit(ur);
+	}
+}
+
+
+char*
+fpexcname(Ureg *ur, ulong fcr31, char *buf, uint size)
+{
+	int i;
+	char *s;
+	ulong fppc;
+
+	fppc = ur->pc;
+	if(ur->cause & BD)	/* branch delay */
+		fppc += 4;
+	s = 0;
+	if(fcr31 & (1<<17))
+		s = "unimplemented operation";
+	else{
+		fcr31 >>= 7;		/* trap enable bits */
+		fcr31 &= (fcr31>>5);	/* anded with exceptions */
+		for(i=0; i<5; i++)
+			if(fcr31 & (1<<i))
+				s = fpcause[i];
+	}
+
+	if(s == 0)
+		return "no floating point exception";
+
+	snprint(buf, size, "%s fppc=%#lux", s, fppc);
+	return buf;
+}
+
+
+static void
+getpcsp(ulong *pc, ulong *sp)
+{
+	*pc = getcallerpc(&pc);
+	*sp = (ulong)&pc-4;
+}
+
+void
+callwithureg(void (*fn)(Ureg*))
+{
+	Ureg ureg;
+
+	memset(&ureg, 0, sizeof ureg);
+	getpcsp((ulong*)&ureg.pc, (ulong*)&ureg.sp);
+	ureg.r31 = getcallerpc(&fn);
+	fn(&ureg);
+}
+
+static void
+_dumpstack(Ureg *ureg)
+{
+	ulong l, v, top, i;
+	extern ulong etext;
+
+	iprint("ktrace /kernel/path %.8lux %.8lux %.8lux\n",
+		ureg->pc, ureg->sp, ureg->r31);
+	if(up == nil)
+		top = (ulong)MACHADDR + MACHSIZE;
+	else
+		top = (ulong)up;
+	i = 0;
+	for(l=ureg->sp; l < top; l += BY2WD) {
+		v = *(ulong*)l;
+		if(KTZERO < v && v < (ulong)&etext) {
+			iprint("%.8lux=%.8lux ", l, v);
+			if((++i%4) == 0){
+				print("\n");
+				delay(200);
+			}
+		}
+	}
+	print("\n");
+}
+
+void
+dumpstack(void)
+{
+	callwithureg(_dumpstack);
+}
+
+static ulong
+R(Ureg *ur, int i)
+{
+	uchar *s;
+
+	s = (uchar*)ur;
+	return *(ulong*)(s + regname[i].off);
+}
+
+void
+dumpregs(Ureg *ur)
+{
+	int i;
+
+	if(up)
+		iprint("registers for %s %lud\n", up->text, up->pid);
+	else
+		iprint("registers for kernel\n");
+
+	for(i = 0; i < nelem(regname); i += 2)
+		iprint("%s\t%#.8lux\t%s\t%#.8lux\n",
+			regname[i].name,   R(ur, i),
+			regname[i+1].name, R(ur, i+1));
+}
--- /dev/null
+++ b/sys/src/9/mt7688/uarti8250.c
@@ -1,0 +1,850 @@
+/*
+ * 8250-like UART
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+enum {
+	Pollstuckoutput = 1,
+};
+
+
+enum {					/* registers */
+	Rbr		= 0,		/* Receiver Buffer (RO) */
+	Thr		= 0,		/* Transmitter Holding (WO) */
+	Ier		= 1,		/* Interrupt Enable */
+	Iir		= 2,		/* Interrupt Identification (RO) */
+	Fcr		= 2,		/* FIFO Control (WO) */
+	Lcr		= 3,		/* Line Control */
+	Mcr		= 4,		/* Modem Control */
+	Lsr		= 5,		/* Line Status */
+	Msr		= 6,		/* Modem Status */
+	Scr		= 7,		/* Scratch Pad */
+//	Mdr		= 8,		/* Mode Def'n (omap rw) missing on mt7688*/
+//	Usr		= 31,		/* Uart Status Register; missing in omap? */
+	Dll		= 0,		/* Divisor Latch LSB */
+	Dlm		= 1,		/* Divisor Latch MSB */
+};
+
+enum {					/* Usr */
+	Busy		= 0x01,
+};
+
+enum {					/* Ier */
+	Erda		= 0x01,		/* Enable Received Data Available */
+	Ethre		= 0x02,		/* Enable Thr Empty */
+	Erls		= 0x04,		/* Enable Receiver Line Status */
+	Ems		= 0x08,		/* Enable Modem Status */
+};
+
+enum {					/* Iir */
+	Ims		= 0x00,		/* Ms interrupt */
+	Ip		= 0x01,		/* Interrupt Pending (not) */
+	Ithre		= 0x02,		/* Thr Empty */
+	Irda		= 0x04,		/* Received Data Available */
+	Irls		= 0x06,		/* Receiver Line Status */
+	Ictoi		= 0x0C,		/* Character Time-out Indication */
+	IirMASK		= 0x3F,
+	Ifena		= 0xC0,		/* FIFOs enabled */
+};
+
+enum {					/* Fcr */
+	FIFOena		= 0x01,		/* FIFO enable */
+	FIFOrclr	= 0x02,		/* clear Rx FIFO */
+	FIFOtclr	= 0x04,		/* clear Tx FIFO */
+//	FIFOdma		= 0x08,
+	FIFO1		= 0x00,		/* Rx FIFO trigger level 1 byte */
+	FIFO4		= 0x40,		/*	4 bytes */
+	FIFO8		= 0x80,		/*	8 bytes */
+	FIFO14		= 0xC0,		/*	14 bytes */
+};
+
+enum {					/* Lcr */
+	Wls5		= 0x00,		/* Word Length Select 5 bits/byte */
+	Wls6		= 0x01,		/*	6 bits/byte */
+	Wls7		= 0x02,		/*	7 bits/byte */
+	Wls8		= 0x03,		/*	8 bits/byte */
+	WlsMASK		= 0x03,
+	Stb		= 0x04,		/* 2 stop bits */
+	Pen		= 0x08,		/* Parity Enable */
+	Eps		= 0x10,		/* Even Parity Select */
+	Stp		= 0x20,		/* Stick Parity */
+	Brk		= 0x40,		/* Break */
+	Dlab		= 0x80,		/* Divisor Latch Access Bit */
+};
+
+enum {					/* Mcr */
+	Dtr		= 0x01,		/* Data Terminal Ready */
+	Rts		= 0x02,		/* Ready To Send */
+	Out1		= 0x04,		/* no longer in use */
+//	Ie		= 0x08,		/* IRQ Enable (cd_sts_ch on omap) */
+	Dm		= 0x10,		/* Diagnostic Mode loopback */
+};
+
+enum {					/* Lsr */
+	Dr		= 0x01,		/* Data Ready */
+	Oe		= 0x02,		/* Overrun Error */
+	Pe		= 0x04,		/* Parity Error */
+	Fe		= 0x08,		/* Framing Error */
+	Bi		= 0x10,		/* Break Interrupt */
+	Thre		= 0x20,		/* Thr Empty */
+	Temt		= 0x40,		/* Transmitter Empty */
+	FIFOerr		= 0x80,		/* error in receiver FIFO */
+};
+
+enum {					/* Msr */
+	Dcts		= 0x01,		/* Delta Cts */
+	Ddsr		= 0x02,		/* Delta Dsr */
+	Teri		= 0x04,		/* Trailing Edge of Ri */
+	Ddcd		= 0x08,		/* Delta Dcd */
+	Cts		= 0x10,		/* Clear To Send */
+	Dsr		= 0x20,		/* Data Set Ready */
+	Ri		= 0x40,		/* Ring Indicator */
+	Dcd		= 0x80,		/* Carrier Detect */
+};
+
+enum {					/* Mdr */
+	Modemask	= 7,
+	Modeuart	= 0,
+};
+
+
+typedef struct Ctlr {
+	u32int*	io;
+	int	irq;
+	int	tbdf;
+	int	iena;
+	int	poll;
+
+	uchar	sticky[Scr+1];
+
+	Lock;
+	int	hasfifo;
+	int	checkfifo;
+	int	fena;
+} Ctlr;
+
+extern PhysUart i8250physuart;
+
+static Ctlr i8250ctlr[] = {
+{	.io	= (u32int*)PHYSCONS,
+	.irq	= IRQuartl,
+	.tbdf	= -1,
+	.poll	= 0, },
+};
+
+static Uart i8250uart[] = {
+{	.regs	= &i8250ctlr[0], /* not [2] */
+	.name	= "uartL",
+	.freq	= 3686000,	/* Not used, we use the global i8250freq */
+	.phys	= &i8250physuart,
+	.console = 1,
+	.next	= nil, },
+};
+
+#define csr8r(c, r)	((c)->io[r])
+#define csr8w(c, r, v)	((c)->io[r] = (c)->sticky[r] | (v))
+#define csr8o(c, r, v)	((c)->io[r] = (v))
+
+static long
+i8250status(Uart* uart, void* buf, long n, long offset)
+{
+	char *p;
+	Ctlr *ctlr;
+	uchar ier, lcr, mcr, msr;
+
+	ctlr = uart->regs;
+	p = malloc(READSTR);
+	mcr = ctlr->sticky[Mcr];
+	msr = csr8r(ctlr, Msr);
+	ier = ctlr->sticky[Ier];
+	lcr = ctlr->sticky[Lcr];
+	snprint(p, READSTR,
+		"b%d c%d d%d e%d l%d m%d p%c r%d s%d i%d\n"
+		"dev(%d) type(%d) framing(%d) overruns(%d) "
+		"berr(%d) serr(%d)%s%s%s%s\n",
+
+		uart->baud,
+		uart->hup_dcd,
+		(msr & Dsr) != 0,
+		uart->hup_dsr,
+		(lcr & WlsMASK) + 5,
+		(ier & Ems) != 0,
+		(lcr & Pen) ? ((lcr & Eps) ? 'e': 'o'): 'n',
+		(mcr & Rts) != 0,
+		(lcr & Stb) ? 2: 1,
+		ctlr->fena,
+
+		uart->dev,
+		uart->type,
+		uart->ferr,
+		uart->oerr,
+		uart->berr,
+		uart->serr,
+		(msr & Cts) ? " cts": "",
+		(msr & Dsr) ? " dsr": "",
+		(msr & Dcd) ? " dcd": "",
+		(msr & Ri) ? " ring": ""
+	);
+	n = readstr(offset, buf, n, p);
+	free(p);
+
+	return n;
+}
+
+static void
+i8250fifo(Uart* uart, int level)
+{
+	Ctlr *ctlr;
+
+	ctlr = uart->regs;
+	if(ctlr->hasfifo == 0)
+		return;
+
+	/*
+	 * Changing the FIFOena bit in Fcr flushes data
+	 * from both receive and transmit FIFOs; there's
+	 * no easy way to guarantee not losing data on
+	 * the receive side, but it's possible to wait until
+	 * the transmitter is really empty.
+	 */
+	ilock(ctlr);
+	while(!(csr8r(ctlr, Lsr) & Temt))
+		;
+
+	/*
+	 * Set the trigger level, default is the max.
+	 * value.
+	 * Some UARTs require FIFOena to be set before
+	 * other bits can take effect, so set it twice.
+	 */
+	ctlr->fena = level;
+	switch(level){
+	case 0:
+		break;
+	case 1:
+		level = FIFO1|FIFOena;
+		break;
+	case 4:
+		level = FIFO4|FIFOena;
+		break;
+	case 8:
+		level = FIFO8|FIFOena;
+		break;
+	default:
+		level = FIFO14|FIFOena;
+		break;
+	}
+	csr8w(ctlr, Fcr, level);
+	csr8w(ctlr, Fcr, level);
+	iunlock(ctlr);
+}
+
+static void
+i8250dtr(Uart* uart, int on)
+{
+	Ctlr *ctlr;
+
+	/*
+	 * Toggle DTR.
+	 */
+	ctlr = uart->regs;
+	if(on)
+		ctlr->sticky[Mcr] |= Dtr;
+	else
+		ctlr->sticky[Mcr] &= ~Dtr;
+	csr8w(ctlr, Mcr, 0);
+}
+
+static void
+i8250rts(Uart* uart, int on)
+{
+	Ctlr *ctlr;
+
+	/*
+	 * Toggle RTS.
+	 */
+	ctlr = uart->regs;
+	if(on)
+		ctlr->sticky[Mcr] |= Rts;
+	else
+		ctlr->sticky[Mcr] &= ~Rts;
+	csr8w(ctlr, Mcr, 0);
+}
+
+static void
+i8250modemctl(Uart* uart, int on)
+{
+	Ctlr *ctlr;
+
+	ctlr = uart->regs;
+	ilock(&uart->tlock);
+	if(on){
+		ctlr->sticky[Ier] |= Ems;
+		csr8w(ctlr, Ier, 0);
+		uart->modem = 1;
+		uart->cts = csr8r(ctlr, Msr) & Cts;
+	}
+	else{
+		ctlr->sticky[Ier] &= ~Ems;
+		csr8w(ctlr, Ier, 0);
+		uart->modem = 0;
+		uart->cts = 1;
+	}
+	iunlock(&uart->tlock);
+
+	/* modem needs fifo */
+	(*uart->phys->fifo)(uart, on);
+}
+
+static int
+i8250parity(Uart* uart, int parity)
+{
+	int lcr;
+	Ctlr *ctlr;
+
+	ctlr = uart->regs;
+	lcr = ctlr->sticky[Lcr] & ~(Eps|Pen);
+
+	switch(parity){
+	case 'e':
+		lcr |= Eps|Pen;
+		break;
+	case 'o':
+		lcr |= Pen;
+		break;
+	case 'n':
+		break;
+	default:
+		return -1;
+	}
+	ctlr->sticky[Lcr] = lcr;
+	csr8w(ctlr, Lcr, 0);
+
+	uart->parity = parity;
+
+	return 0;
+}
+
+static int
+i8250stop(Uart* uart, int stop)
+{
+	int lcr;
+	Ctlr *ctlr;
+
+	ctlr = uart->regs;
+	lcr = ctlr->sticky[Lcr] & ~Stb;
+
+	switch(stop){
+	case 1:
+		break;
+	case 2:
+		lcr |= Stb;
+		break;
+	default:
+		return -1;
+	}
+	ctlr->sticky[Lcr] = lcr;
+	csr8w(ctlr, Lcr, 0);
+
+	uart->stop = stop;
+
+	return 0;
+}
+
+static int
+i8250bits(Uart* uart, int bits)
+{
+	int lcr;
+	Ctlr *ctlr;
+
+	ctlr = uart->regs;
+	lcr = ctlr->sticky[Lcr] & ~WlsMASK;
+
+	switch(bits){
+	case 5:
+		lcr |= Wls5;
+		break;
+	case 6:
+		lcr |= Wls6;
+		break;
+	case 7:
+		lcr |= Wls7;
+		break;
+	case 8:
+		lcr |= Wls8;
+		break;
+	default:
+		return -1;
+	}
+	ctlr->sticky[Lcr] = lcr;
+	csr8w(ctlr, Lcr, 0);
+
+	uart->bits = bits;
+
+	return 0;
+}
+
+static int
+i8250baud(Uart* uart, int baud)
+{
+#ifdef notdef				/* don't change the speed */
+	ulong bgc;
+	Ctlr *ctlr;
+	extern int i8250freq;	/* In the config file */
+
+	/*
+	 * Set the Baud rate by calculating and setting the Baud rate
+	 * Generator Constant. This will work with fairly non-standard
+	 * Baud rates.
+	 */
+	if(i8250freq == 0 || baud <= 0)
+		return -1;
+	bgc = (i8250freq+8*baud-1)/(16*baud);
+
+	ctlr = uart->regs;
+	while(csr8r(ctlr, Usr) & Busy)
+		delay(1);
+	csr8w(ctlr, Lcr, Dlab);		/* begin kludge */
+	csr8o(ctlr, Dlm, bgc>>8);
+	csr8o(ctlr, Dll, bgc);
+	csr8w(ctlr, Lcr, 0);
+#endif
+	uart->baud = baud;
+	return 0;
+}
+
+static void
+i8250break(Uart* uart, int ms)
+{
+	Ctlr *ctlr;
+
+	if (up == nil)
+		panic("i8250break: nil up");
+	/*
+	 * Send a break.
+	 */
+	if(ms <= 0)
+		ms = 200;
+
+	ctlr = uart->regs;
+	csr8w(ctlr, Lcr, Brk);
+	tsleep(&up->sleep, return0, 0, ms);
+	csr8w(ctlr, Lcr, 0);
+}
+
+static void
+emptyoutstage(Uart *uart, int n)
+{
+	_uartputs((char *)uart->op, n);
+	uart->op = uart->oe = uart->ostage;
+}
+
+static void
+i8250kick(Uart* uart)
+{
+	int i;
+	Ctlr *ctlr;
+
+	if(/* uart->cts == 0 || */ uart->blocked)
+		return;
+
+	if(!normalprint) {			/* early */
+		if (uart->op < uart->oe)
+			emptyoutstage(uart, uart->oe - uart->op);
+		while ((i = uartstageoutput(uart)) > 0)
+			emptyoutstage(uart, i);
+		return;
+	}
+
+	/* nothing more to send? then disable xmit intr */
+	ctlr = uart->regs;
+	if (uart->op >= uart->oe && qlen(uart->oq) == 0 &&
+	    csr8r(ctlr, Lsr) & Temt) {
+		ctlr->sticky[Ier] &= ~Ethre;
+		csr8w(ctlr, Ier, 0);
+		return;
+	}
+
+	/*
+	 *  128 here is an arbitrary limit to make sure
+	 *  we don't stay in this loop too long.  If the
+	 *  chip's output queue is longer than 128, too
+	 *  bad -- presotto
+	 */
+	for(i = 0; i < 128; i++){
+		if(!(csr8r(ctlr, Lsr) & Thre))
+			break;
+		if(uart->op >= uart->oe && uartstageoutput(uart) == 0)
+			break;
+		csr8o(ctlr, Thr, *uart->op++);		/* start tx */
+		ctlr->sticky[Ier] |= Ethre;
+		csr8w(ctlr, Ier, 0);			/* intr when done */
+	}
+}
+
+void
+serialkick(void)
+{
+	uartkick(&i8250uart[CONSOLE]);
+}
+
+static void
+i8250interrupt(Ureg*, void* arg)
+{
+	Ctlr *ctlr;
+	Uart *uart;
+	int iir, lsr, old, r;
+
+	uart = arg;
+	ctlr = uart->regs;
+	for(iir = csr8r(ctlr, Iir); !(iir & Ip); iir = csr8r(ctlr, Iir)){
+		switch(iir & IirMASK){
+		case Ims:		/* Ms interrupt */
+			r = csr8r(ctlr, Msr);
+			if(r & Dcts){
+				ilock(&uart->tlock);
+				old = uart->cts;
+				uart->cts = r & Cts;
+				if(old == 0 && uart->cts)
+					uart->ctsbackoff = 2;
+				iunlock(&uart->tlock);
+			}
+		 	if(r & Ddsr){
+				old = r & Dsr;
+				if(uart->hup_dsr && uart->dsr && !old)
+					uart->dohup = 1;
+				uart->dsr = old;
+			}
+		 	if(r & Ddcd){
+				old = r & Dcd;
+				if(uart->hup_dcd && uart->dcd && !old)
+					uart->dohup = 1;
+				uart->dcd = old;
+			}
+			break;
+		case Ithre:		/* Thr Empty */
+			uartkick(uart);
+			break;
+		case Irda:		/* Received Data Available */
+		case Irls:		/* Receiver Line Status */
+		case Ictoi:		/* Character Time-out Indication */
+			/*
+			 * Consume any received data.
+			 * If the received byte came in with a break,
+			 * parity or framing error, throw it away;
+			 * overrun is an indication that something has
+			 * already been tossed.
+			 */
+			while((lsr = csr8r(ctlr, Lsr)) & Dr){
+				if(lsr & (FIFOerr|Oe))
+					uart->oerr++;
+				if(lsr & Pe)
+					uart->perr++;
+				if(lsr & Fe)
+					uart->ferr++;
+				r = csr8r(ctlr, Rbr);
+				if(!(lsr & (Bi|Fe|Pe)))
+					uartrecv(uart, r);
+			}
+			break;
+
+		default:
+			iprint("weird uart interrupt type %#2.2uX\n", iir);
+			break;
+		}
+	}
+}
+
+static void
+i8250disable(Uart* uart)
+{
+	Ctlr *ctlr;
+
+	/*
+	 * Turn off DTR and RTS, disable interrupts and fifos.
+	 */
+	(*uart->phys->dtr)(uart, 0);
+	(*uart->phys->rts)(uart, 0);
+	(*uart->phys->fifo)(uart, 0);
+
+	ctlr = uart->regs;
+	ctlr->sticky[Ier] = 0;
+	csr8w(ctlr, Ier, 0);
+
+	if(ctlr->iena != 0){
+		intrdisable(ctlr->irq, i8250interrupt, uart, 0, uart->name);
+		ctlr->iena = 0;
+	}
+}
+
+static void
+i8250clock(void)
+{
+	i8250interrupt(nil, &i8250uart[CONSOLE]);
+}
+
+static void
+i8250enable(Uart* uart, int ie)
+{
+	int mode;
+	Ctlr *ctlr;
+
+	if (up == nil)
+		return;				/* too soon */
+
+	ctlr = uart->regs;
+
+	ctlr->sticky[Lcr] = Wls8;		/* no parity */
+	csr8w(ctlr, Lcr, 0);
+
+	/*
+	 * Check if there is a FIFO.
+	 * Changing the FIFOena bit in Fcr flushes data
+	 * from both receive and transmit FIFOs; there's
+	 * no easy way to guarantee not losing data on
+	 * the receive side, but it's possible to wait until
+	 * the transmitter is really empty.
+	 * Also, reading the Iir outwith i8250interrupt()
+	 * can be dangerous, but this should only happen
+	 * once, before interrupts are enabled.
+	 */
+	ilock(ctlr);
+	if(!ctlr->checkfifo){
+		/*
+		 * Wait until the transmitter is really empty.
+		 */
+		while(!(csr8r(ctlr, Lsr) & Temt))
+			;
+		csr8w(ctlr, Fcr, FIFOena);
+		if(csr8r(ctlr, Iir) & Ifena)
+			ctlr->hasfifo = 1;
+		csr8w(ctlr, Fcr, 0);
+		ctlr->checkfifo = 1;
+	}
+	iunlock(ctlr);
+
+	/*
+	 * Enable interrupts and turn on DTR and RTS.
+	 * Be careful if this is called to set up a polled serial line
+	 * early on not to try to enable interrupts as interrupt-
+	 * -enabling mechanisms might not be set up yet.
+	 */
+	if(ie){
+		if(ctlr->iena == 0 && !ctlr->poll){
+			intrenable(ctlr->irq, i8250interrupt, uart, 0, uart->name);
+			ctlr->iena = 1;
+		}
+		ctlr->sticky[Ier] = Erda;
+//		ctlr->sticky[Mcr] |= Ie;		/* not on omap */
+		ctlr->sticky[Mcr] = 0;
+	}
+	else{
+		ctlr->sticky[Ier] = 0;
+		ctlr->sticky[Mcr] = 0;
+	}
+	csr8w(ctlr, Ier, 0);
+	csr8w(ctlr, Mcr, 0);
+
+	(*uart->phys->dtr)(uart, 1);
+	(*uart->phys->rts)(uart, 1);
+
+	/*
+	 * During startup, the i8259 interrupt controller is reset.
+	 * This may result in a lost interrupt from the i8250 uart.
+	 * The i8250 thinks the interrupt is still outstanding and does not
+	 * generate any further interrupts. The workaround is to call the
+	 * interrupt handler to clear any pending interrupt events.
+	 * Note: this must be done after setting Ier.
+	 */
+	if(ie){
+		i8250interrupt(nil, uart);
+		/*
+		 * force output to resume if stuck.  shouldn't be needed.
+		 */
+//		if (Pollstuckoutput)
+//			addclock0link(i8250clock, 10);
+	}
+}
+
+static Uart*
+i8250pnp(void)
+{
+	return i8250uart;
+}
+
+static int
+i8250getc(Uart* uart)
+{
+	Ctlr *ctlr;
+
+	ctlr = uart->regs;
+	while(!(csr8r(ctlr, Lsr) & Dr))
+		delay(1);
+	return csr8r(ctlr, Rbr);
+}
+
+static void
+i8250putc(Uart* uart, int c)
+{
+	int i, s;
+	Ctlr *ctlr;
+
+	if (!normalprint) {		/* too early; use brute force */
+		int s = splhi();
+
+		while (!(((ulong *)PHYSCONS)[Lsr] & Thre))
+			;
+		((ulong *)PHYSCONS)[Thr] = c;
+		splx(s);
+		return;
+	}
+
+	ctlr = uart->regs;
+	s = splhi();
+	for(i = 0; !(csr8r(ctlr, Lsr) & Thre) && i < 128; i++)
+		delay(1);
+	csr8o(ctlr, Thr, (uchar)c);
+	for(i = 0; !(csr8r(ctlr, Lsr) & Thre) && i < 128; i++)
+		delay(1);
+	splx(s);
+}
+
+void
+serialputc(int c)
+{
+	i8250putc(&i8250uart[CONSOLE], c);
+}
+
+void
+serialputs(char* s, int n)
+{
+	_uartputs(s, n);
+}
+
+#ifdef notdef
+static void
+i8250poll(Uart* uart)
+{
+	Ctlr *ctlr;
+
+	/*
+	 * If PhysUart has a non-nil .poll member, this
+	 * routine will be called from the uartclock timer.
+	 * If the Ctlr .poll member is non-zero, when the
+	 * Uart is enabled interrupts will not be enabled
+	 * and the result is polled input and output.
+	 * Not very useful here, but ports to new hardware
+	 * or simulators can use this to get serial I/O
+	 * without setting up the interrupt mechanism.
+	 */
+	ctlr = uart->regs;
+	if(ctlr->iena || !ctlr->poll)
+		return;
+	i8250interrupt(nil, uart);
+}
+#endif
+
+PhysUart i8250physuart = {
+	.name		= "i8250",
+	.pnp		= i8250pnp,
+	.enable		= i8250enable,
+	.disable	= i8250disable,
+	.kick		= i8250kick,
+	.dobreak	= i8250break,
+	.baud		= i8250baud,
+	.bits		= i8250bits,
+	.stop		= i8250stop,
+	.parity		= i8250parity,
+	.modemctl	= i8250modemctl,
+	.rts		= i8250rts,
+	.dtr		= i8250dtr,
+	.status		= i8250status,
+	.fifo		= i8250fifo,
+	.getc		= i8250getc,
+	.putc		= i8250putc,
+//	.poll		= i8250poll,		/* only in 9k, not 9 */
+};
+
+static void
+i8250dumpregs(Ctlr* ctlr)
+{
+	int dlm, dll;
+	int _uartprint(char*, ...);
+
+	csr8w(ctlr, Lcr, Dlab);
+	dlm = csr8r(ctlr, Dlm);
+	dll = csr8r(ctlr, Dll);
+	csr8w(ctlr, Lcr, 0);
+
+	_uartprint("dlm %#ux dll %#ux\n", dlm, dll);
+}
+
+Uart*	uartenable(Uart *p);
+
+/* must call this from a process's context */
+int
+i8250console(void)
+{
+	Uart *uart = &i8250uart[CONSOLE];
+
+	if (up == nil)
+		return -1;			/* too early */
+
+	if(uartenable(uart) != nil /* && uart->console */){
+//		iprint("i8250console: enabling console uart\n");
+//		serialoq = uart->oq;
+/*
+ * on mt7688
+ * uart->oq seems to fill and block, this bypasses that
+ * see port/devcons, putstrn0
+ */
+		serialoq = nil;  
+		uart->opens++;
+		consuart = uart;
+//		i8250disable(uart);
+		i8250enable(uart, 1);
+//		screenputs = _uartputs;
+	}
+	uartctl(uart, "b115200 l8 pn s1");
+	return 0;
+}
+
+void
+_uartputs(char* s, int n)
+{
+	char *e;
+
+	for(e = s+n; s < e; s++){
+		if(*s == '\n')
+			i8250putc(&i8250uart[CONSOLE], '\r');
+		i8250putc(&i8250uart[CONSOLE], *s);
+	}
+}
+
+int
+_uartprint(char* fmt, ...)
+{
+	int n;
+	va_list arg;
+	char buf[PRINTSIZE];
+
+	va_start(arg, fmt);
+	n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf;
+	va_end(arg);
+	_uartputs(buf, n);
+
+	return n;
+}
+
+
+void
+uartinit(void)
+{
+	consuart = &i8250uart[CONSOLE];
+}
--- /dev/null
+++ b/sys/src/9/mt7688/words
@@ -1,0 +1,84 @@
+9 Front kernel for the MediaTek MT7688
+
+currently tested on;
+Onion Omega 2
+Hi-Link HKL7688A
+
+
+
+FPU Emulation;
+The MIPS24KEc core used in the MT7688 does 
+not have a FPU.  Because of the way Mips 
+does branch delay slots, tos.h needs to 
+include a scratch space for fpimips.c
+
+	/* scratch space for kernel use (e.g., mips fp delay-slot execution) */
+	ulong	kscr[4];
+	/* top of stack is here */
+
+Since lots of things use tos.h, the enitre 
+spim environment needs to be nuked and reinstalled 
+to keep things consistent.
+
+
+Build Environment;
+Because libc for spim ends up using a combination 
+of portable code, and mips and spim machine specific 
+code, I have seen some issues where things being 
+built in the wrong order causes failure in the 
+kernel.  For best results;
+
+cd /sys/src
+objtype=spim
+mk nuke
+mk libs
+mk install
+
+
+Drivers;
+The first UART, often documented as 
+"UART Lite" or "UARTL", is set up, but no others.
+
+The MT7688 has 1 Ethernet device wired directly 
+into a 7 port switch, usually on port 5.  Port 0 
+is typically used as the WAN port on routers, and 
+ports 1-4 are the LAN ports.  
+
+Right now, the Ethernet driver, ether7688.c, just 
+sets up the switch to run ports 1-5 as a simple 
+unmanaged switch to allow outside connections.  
+This has been tested on the HKL7688A, but not on 
+the Onion Omega 2 with the Ethernet expansion.
+
+WiFi, SPI, I²C, GPIO, USB, MMC, and a dedicated 
+switch driver are works in progress.
+
+
+u-boot;
+The kernel currently assumes that it will be 
+loaded at 0x80020000 and that plan9.ini will 
+be loaded at 0x80010000 (CONFADDR)
+
+
+nvram;
+In order to connect to your auth and fileserver, 
+copy a nvram with the credentials you want into 
+a file, and add the location of that file (ex. /usr/
+glenda/nvram) to the bootdir section of the kernel 
+configuration file (mt7688).
+
+In plan9.ini, specify to use the nvram in the 
+/boot directory in the kernel;
+nvram=/boot/nvram
+nvroff=0
+nvrlen=512
+
+
+Erratum 48;
+There is a known issue with the 24K series cores, 
+where data is lost if 3 cache writes are done in 
+a row.  The mips linker in some legacy Plan 9 
+distributions has a change in vl/noop.c where it 
+can be set to insert a no-op between stores to 
+avoid this issue.
+