shithub: riscv

Download patch

ref: c6ad540af56be95b458008ae3abd3432b71d49dd
parent: 1a7c224b3e36342623d4050953ca0cf3cb8a8bd5
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Fri May 3 19:14:57 EDT 2019

bcm64: add experimental work in progress arm64 kernel for raspberry pi 3

diff: cannot open b/sys/src/9/bcm64//null: file does not exist: 'b/sys/src/9/bcm64//null'
--- /dev/null
+++ b/sys/src/9/bcm64/archbcm3.c
@@ -1,0 +1,168 @@
+/*
+ * bcm2836 (e.g.raspberry pi 3) architecture-specific stuff
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "io.h"
+#include "sysreg.h"
+
+typedef struct Mbox Mbox;
+typedef struct Mboxes Mboxes;
+
+#define	POWERREGS	(VIRTIO+0x100000)
+
+Soc soc = {
+	.dramsize	= GiB,
+	.physio		= 0x3F000000,
+	.busdram	= 0xC0000000,
+	.busio		= 0x7E000000,
+	.armlocal	= 0x40000000,
+};
+
+enum {
+	Wdogfreq	= 65536,
+	Wdogtime	= 10,	/* seconds, ≤ 15 */
+};
+
+/*
+ * Power management / watchdog registers
+ */
+enum {
+	Rstc		= 0x1c>>2,
+		Password	= 0x5A<<24,
+		CfgMask		= 0x03<<4,
+		CfgReset	= 0x02<<4,
+	Rsts		= 0x20>>2,
+	Wdog		= 0x24>>2,
+};
+
+/*
+ * Arm local regs for smp
+ */
+struct Mbox {
+	u32int	doorbell;
+	u32int	mbox1;
+	u32int	mbox2;
+	u32int	startcpu;
+};
+struct Mboxes {
+	Mbox	set[4];
+	Mbox	clr[4];
+};
+
+enum {
+	Mboxregs	= 0x80,
+};
+
+void
+archreset(void)
+{
+}
+
+void
+archreboot(void)
+{
+	u32int *r;
+
+	r = (u32int*)POWERREGS;
+	r[Wdog] = Password | 1;
+	r[Rstc] = Password | (r[Rstc] & ~CfgMask) | CfgReset;
+	coherence();
+	for(;;)
+		;
+}
+
+void
+wdogfeed(void)
+{
+	u32int *r;
+
+	r = (u32int*)POWERREGS;
+	r[Wdog] = Password | (Wdogtime * Wdogfreq);
+	r[Rstc] = Password | (r[Rstc] & ~CfgMask) | CfgReset;
+}
+
+void
+wdogoff(void)
+{
+	u32int *r;
+
+	r = (u32int*)POWERREGS;
+	r[Rstc] = Password | (r[Rstc] & ~CfgMask);
+}
+
+
+char *
+cputype2name(char *buf, int size)
+{
+	u32int r, part;
+	char *p;
+
+	r = sysrd(MIDR_EL1);
+	part = (r >> 4) & 0xFFF;
+	switch(part){
+	case 0xc07:
+		p = seprint(buf, buf + size, "Cortex-A7");
+		break;
+	case 0xd03:
+		p = seprint(buf, buf + size, "Cortex-A53");
+		break;
+	default:
+		p = seprint(buf, buf + size, "Unknown-%#x", part);
+		break;
+	}
+	seprint(p, buf + size, " r%udp%ud", (r >> 20) & 0xF, r & 0xF);
+	return buf;
+}
+
+void
+cpuidprint(void)
+{
+	char name[64];
+
+	cputype2name(name, sizeof name);
+	iprint("cpu%d: %dMHz ARM %s\n", m->machno, m->cpumhz, name);
+}
+
+int
+getncpus(void)
+{
+	int n, max;
+	char *p;
+	n = 4;
+	if(n > MAXMACH)
+		n = MAXMACH;
+	p = getconf("*ncpu");
+	if(p && (max = atoi(p)) > 0 && n > max)
+		n = max;
+	return n;
+}
+
+void
+mboxclear(uint cpu)
+{
+	Mboxes *mb;
+
+	mb = (Mboxes*)(ARMLOCAL + Mboxregs);
+	mb->clr[cpu].mbox1 = 1;
+}
+
+void
+wakecpu(uint cpu)
+{
+	Mboxes *mb;
+
+	mb = (Mboxes*)(ARMLOCAL + Mboxregs);
+	mb->set[cpu].mbox1 = 1;
+}
+
+void
+archbcm3link(void)
+{
+//	addclock0link(wdogfeed, HZ);
+}
--- /dev/null
+++ b/sys/src/9/bcm64/cache.v8.s
@@ -1,0 +1,212 @@
+#include "sysreg.h"
+
+#undef	SYSREG
+#define	SYSREG(op0,op1,Cn,Cm,op2)	SPR(((op0)<<19|(op1)<<16|(Cn)<<12|(Cm)<<8|(op2)<<5))
+
+/*
+ * instruction cache operations
+ */
+TEXT cacheiinvse(SB), 1, $-4
+	MOVWU	len+8(FP), R2
+	ADD	R0, R2
+
+	MRS	DAIF, R11
+	MSR	$0x2, DAIFSet
+	MOVWU	$1, R10
+	MSR	R10, CSSELR_EL1
+	ISB	$SY
+	MRS	CCSIDR_EL1, R4
+
+	ANDW	$7, R4
+	ADDW	$4, R4		// log2(linelen)
+	LSL	R4, R10
+	LSR	R4, R0
+	LSL	R4, R0
+
+_iinvse:
+	IC	R0, 3,7,5,1	// IVAU
+	ADD	R10, R0
+	CMP	R0, R2
+	BGT	_iinvse
+	DSB	$NSH
+	ISB	$SY
+	MSR	R11, DAIF
+	RETURN
+
+TEXT cacheiinv(SB), 1, $-4
+	IC	R0, 0,7,5,0	// IALLU
+	DSB	$NSH
+	ISB	$SY
+	RETURN
+
+TEXT cacheuwbinv(SB), 1, $0
+	BL	cachedwbinv(SB)
+	BL	cacheiinv(SB)
+	RETURN
+
+/*
+ * data cache operations
+ */
+TEXT cachedwbse(SB), 1, $-4
+	MOV	LR, R29
+	BL	cachedva<>(SB)
+TEXT dccvac(SB), 1, $-4
+	DC	R0, 3,7,10,1	// CVAC
+	RETURN
+
+TEXT cacheduwbse(SB), 1, $-4
+	MOV	LR, R29
+	BL	cachedva<>(SB)
+TEXT dccvau(SB), 1, $-4
+	DC	R0, 3,7,11,1	// CVAU
+	RETURN
+
+TEXT cachedinvse(SB), 1, $-4
+	MOV	LR, R29
+	BL	cachedva<>(SB)
+TEXT dcivac(SB), 1, $-4
+	DC	R0, 0,7,6,1	// IVAC
+	RETURN
+
+TEXT cachedwbinvse(SB), 1, $-4
+	MOV	LR, R29
+	BL	cachedva<>(SB)
+TEXT dccivac(SB), 1, $-4
+	DC	R0, 3,7,14,1	// CIVAC
+	RETURN
+
+TEXT cachedva<>(SB), 1, $-4
+	MOV	LR, R1
+	MOVWU	len+8(FP), R2
+	ADD	R0, R2
+
+	MRS	DAIF, R11
+	MSR	$0x2, DAIFSet
+	MOVWU	$0, R10
+	MSR	R10, CSSELR_EL1
+	ISB	$SY
+	MRS	CCSIDR_EL1, R4
+
+	ANDW	$7, R4
+	ADDW	$4, R4		// log2(linelen)
+	MOVWU	$1, R10
+	LSL	R4, R10
+	LSR	R4, R0
+	LSL	R4, R0
+
+	DSB	$SY
+	ISB	$SY
+_cachedva:
+	BL	(R1)
+	ADD	R10, R0
+	CMP	R0, R2
+	BGT	_cachedva
+	DSB	$SY
+	ISB	$SY
+	MSR	R11, DAIF
+	RET	R29
+
+/*
+ * l1 cache operations
+ */
+TEXT cachedwb(SB), 1, $-4
+	MOVWU	$0, R0
+_cachedwb:
+	MOV	LR, R29
+	BL	cachedsw<>(SB)
+TEXT dccsw(SB), 1, $-4
+	DC	R0, 0,7,10,2	// CSW
+	RETURN
+
+TEXT cachedinv(SB), 1, $-4
+	MOVWU	$0, R0
+_cachedinv:
+	MOV	LR, R29
+	BL	cachedsw<>(SB)
+TEXT dcisw(SB), 1, $-4
+	DC	R0, 0,7,6,2	// ISW
+	RETURN
+
+TEXT cachedwbinv(SB), 1, $-4
+	MOVWU	$0, R0
+_cachedwbinv:
+	MOV	LR, R29
+	BL	cachedsw<>(SB)
+TEXT dccisw(SB), 1, $-4
+	DC	R0, 0,7,14,2	// CISW
+	RETURN
+
+/*
+ * l2 cache operations
+ */
+TEXT l2cacheuwb(SB), 1, $-4
+	MOVWU	$1, R0
+	B	_cachedwb
+TEXT l2cacheuinv(SB), 1, $-4
+	MOVWU	$1, R0
+	B	_cachedinv
+TEXT l2cacheuwbinv(SB), 1, $-4
+	MOVWU	$1, R0
+	B	_cachedwbinv
+
+TEXT cachesize(SB), 1, $-4
+	MRS	DAIF, R11
+	MSR	$0x2, DAIFSet
+	MSR	R0, CSSELR_EL1
+	ISB	$SY
+	MRS	CCSIDR_EL1, R0
+	MSR	R11, DAIF
+	RETURN
+
+TEXT cachedsw<>(SB), 1, $-4
+	MOV	LR, R1
+
+	MRS	DAIF, R11
+	MSR	$0x2, DAIFSet
+	ADDW	R0, R0, R8
+	MSR	R8, CSSELR_EL1
+	ISB	$SY
+	MRS	CCSIDR_EL1, R4
+
+	LSR	$3, R4, R7
+	ANDW	$1023, R7	// lastway
+	ADDW	$1, R7, R5	// #ways
+
+	LSR	$13, R4, R2
+	ANDW	$32767, R2	// lastset
+	ADDW	$1, R2		// #sets
+
+	ANDW	$7, R4
+	ADDW	$4, R4		// log2(linelen)
+
+	MOVWU	$32, R3		// wayshift = 32 - log2(#ways)
+_countlog2ways:
+	CBZ	R7, _loop	// lastway == 0?
+	LSR	$1, R7		// lastway >>= 1
+	SUB	$1, R3		// wayshift--
+	B _countlog2ways
+_loop:
+	DSB	$SY
+	ISB	$SY
+_nextway:
+	MOVWU	$0, R6		// set
+_nextset:
+	LSL	R3, R7, R0	// way<<wayshift
+	LSL	R4, R6, R9	// set<<log2(linelen)
+	ORRW	R8, R0		// level
+	ORRW	R9, R0		// setway
+
+	BL	(R1)		// op(setway)
+
+	ADDW	$1, R6		// set++
+	CMPW	R2, R6
+	BLT	_nextset
+
+	ADDW	$1, R7		// way++
+	CMPW	R5, R7
+	BLT	_nextway
+
+	DSB	$SY
+	ISB	$SY
+	MSR	R11, DAIF
+	RET	R29
--- /dev/null
+++ b/sys/src/9/bcm64/clock.c
@@ -1,0 +1,267 @@
+/*
+ * bcm283[56] timers
+ *	System timers run at 1MHz (timers 1 and 2 are used by GPU)
+ *	ARM timer usually runs at 250MHz (may be slower in low power modes)
+ *	Cycle counter runs at 700MHz (unless overclocked)
+ *    All are free-running up-counters
+ *
+ * Use system timer 3 (64 bits) for hzclock interrupts and fastticks
+ *   For smp on bcm2836, use local generic timer for interrupts on cpu1-3
+ * Use ARM timer (32 bits) for perfticks
+ * Use ARM timer to force immediate interrupt
+ * Use cycle counter for cycles()
+ */
+
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+#include "sysreg.h"
+
+enum {
+	SYSTIMERS	= VIRTIO+0x3000,
+	ARMTIMER	= VIRTIO+0xB400,
+
+	Localctl	= 0x00,
+	Prescaler	= 0x08,
+	GPUirqroute	= 0x0C,
+
+	SystimerFreq	= 1*Mhz,
+	MaxPeriod	= SystimerFreq / HZ,
+	MinPeriod	= 10,
+};
+
+typedef struct Systimers Systimers;
+typedef struct Armtimer Armtimer;
+
+struct Systimers {
+	u32int	cs;
+	u32int	clo;
+	u32int	chi;
+	u32int	c0;
+	u32int	c1;
+	u32int	c2;
+	u32int	c3;
+};
+
+struct Armtimer {
+	u32int	load;
+	u32int	val;
+	u32int	ctl;
+	u32int	irqack;
+	u32int	irq;
+	u32int	maskedirq;
+	u32int	reload;
+	u32int	predivider;
+	u32int	count;
+};
+
+enum {
+	CntPrescaleShift= 16,	/* freq is sys_clk/(prescale+1) */
+	CntPrescaleMask	= 0xFF,
+	CntEnable	= 1<<9,
+	TmrDbgHalt	= 1<<8,
+	TmrEnable	= 1<<7,
+	TmrIntEnable	= 1<<5,
+	TmrPrescale1	= 0x00<<2,
+	TmrPrescale16	= 0x01<<2,
+	TmrPrescale256	= 0x02<<2,
+	CntWidth16	= 0<<1,
+	CntWidth32	= 1<<1,
+
+	/* generic timer (cortex-a7) */
+	Enable	= 1<<0,
+	Imask	= 1<<1,
+	Istatus = 1<<2,
+};
+
+static void
+clockintr(Ureg *ureg, void *)
+{
+	Systimers *tn;
+
+	if(m->machno != 0)
+		panic("cpu%d: unexpected system timer interrupt", m->machno);
+	tn = (Systimers*)SYSTIMERS;
+	/* dismiss interrupt */
+	tn->cs = 1<<3;
+	timerintr(ureg, 0);
+}
+
+static void
+localclockintr(Ureg *ureg, void *)
+{
+	if(m->machno == 0)
+		panic("cpu0: Unexpected local generic timer interrupt");
+	timerintr(ureg, 0);
+}
+
+void
+clockshutdown(void)
+{
+	Armtimer *tm;
+
+	tm = (Armtimer*)ARMTIMER;
+	tm->ctl = 0;
+}
+
+void
+clockinit(void)
+{
+	Systimers *tn;
+	Armtimer *tm;
+	ulong t0, t1, tstart, tend;
+
+	syswr(PMCR_EL0, 1<<6 | 7);
+	syswr(PMCNTENSET, 1<<31);
+	syswr(PMUSERENR_EL0, 1<<2);
+
+	syswr(CNTP_TVAL_EL0, ~0UL);
+	if(m->machno == 0){
+		syswr(CNTP_CTL_EL0, Imask);
+
+		*(u32int*)(ARMLOCAL + GPUirqroute) = 0;
+
+		/* input clock is 19.2Mhz crystal */
+		*(u32int*)(ARMLOCAL + Localctl) = 0;
+		/* divide by (2^31/Prescaler) */
+		*(u32int*)(ARMLOCAL + Prescaler) = (((uvlong)SystimerFreq<<31)/19200000)&~1UL;
+	} else {
+		syswr(CNTP_CTL_EL0, Enable);
+		intrenable(IRQcntpns, localclockintr, nil, 0, "clock");
+	}
+
+	tn = (Systimers*)SYSTIMERS;
+	tstart = tn->clo;
+	do{
+		t0 = lcycles();
+	}while(tn->clo == tstart);
+	tend = tstart + (SystimerFreq/100);
+	do{
+		t1 = lcycles();
+	}while(tn->clo < tend);
+	t1 -= t0;
+	m->cpuhz = 100 * t1;
+	m->cpumhz = (m->cpuhz + Mhz/2 - 1) / Mhz;
+	m->cyclefreq = m->cpuhz;
+
+	if(m->machno == 0){
+		tn->cs = 1<<3;
+		tn->c3 = tn->clo - 1;
+		intrenable(IRQtimer3, clockintr, nil, 0, "clock");
+
+		tm = (Armtimer*)ARMTIMER;
+		tm->load = 0;
+		tm->ctl = TmrPrescale1|CntEnable|CntWidth32;
+	}
+}
+
+void
+timerset(uvlong next)
+{
+	Systimers *tn;
+	uvlong now;
+	long period;
+
+	now = fastticks(nil);
+	period = next - now;
+	if(period < MinPeriod)
+		period = MinPeriod;
+	else if(period > MaxPeriod)
+		period = MaxPeriod;
+	if(m->machno)
+		syswr(CNTP_TVAL_EL0, period);
+	else{
+		tn = (Systimers*)SYSTIMERS;
+		tn->c3 = tn->clo + period;
+	}
+}
+
+uvlong
+fastticks(uvlong *hz)
+{
+	Systimers *tn;
+	ulong lo, hi;
+	uvlong now;
+
+	if(hz)
+		*hz = SystimerFreq;
+	tn = (Systimers*)SYSTIMERS;
+	do{
+		hi = tn->chi;
+		lo = tn->clo;
+	}while(tn->chi != hi);
+	now = (uvlong)hi<<32 | lo;
+	return now;
+}
+
+ulong
+perfticks(void)
+{
+	Armtimer *tm;
+
+	tm = (Armtimer*)ARMTIMER;
+	return tm->count;
+}
+
+void
+armtimerset(int n)
+{
+	Armtimer *tm;
+
+	tm = (Armtimer*)ARMTIMER;
+	if(n > 0){
+		tm->ctl |= TmrEnable|TmrIntEnable;
+		tm->load = n;
+	}else{
+		tm->load = 0;
+		tm->ctl &= ~(TmrEnable|TmrIntEnable);
+		tm->irq = 1;
+	}
+}
+
+ulong
+µs(void)
+{
+	if(SystimerFreq != 1*Mhz)
+		return fastticks2us(fastticks(nil));
+	return ((Systimers*)SYSTIMERS)->clo;
+}
+
+void
+microdelay(int n)
+{
+	ulong now;
+
+	now = µs();
+	while(µs() - now < n);
+}
+
+void
+delay(int n)
+{
+	while(--n >= 0)
+		microdelay(1000);
+}
+
+void
+synccycles(void)
+{
+	static Ref r1, r2;
+	int s;
+
+	s = splhi();
+	r2.ref = 0;
+	incref(&r1);
+	while(r1.ref != conf.nmach)
+		;
+//	syswr(PMCR_EL0, 1<<6 | 7);
+	incref(&r2);
+	while(r2.ref != conf.nmach)
+		;
+	r1.ref = 0;
+	splx(s);
+}
--- /dev/null
+++ b/sys/src/9/bcm64/dat.h
@@ -1,0 +1,272 @@
+/*
+ * Time.
+ *
+ * HZ should divide 1000 evenly, ideally.
+ * 100, 125, 200, 250 and 333 are okay.
+ */
+#define	HZ		100			/* clock frequency */
+#define	MS2HZ		(1000/HZ)		/* millisec per clock tick */
+#define	TK2SEC(t)	((t)/HZ)		/* ticks to seconds */
+
+enum {
+	Mhz	= 1000 * 1000,
+};
+
+typedef struct Conf	Conf;
+typedef struct Confmem	Confmem;
+typedef struct FPsave	FPsave;
+typedef struct PFPU	PFPU;
+typedef struct ISAConf	ISAConf;
+typedef struct Label	Label;
+typedef struct Lock	Lock;
+typedef struct Memcache	Memcache;
+typedef struct MMMU	MMMU;
+typedef struct Mach	Mach;
+typedef struct Page	Page;
+typedef struct PhysUart	PhysUart;
+typedef struct PMMU	PMMU;
+typedef struct Proc	Proc;
+typedef u64int		PTE;
+typedef struct Soc	Soc;
+typedef struct Uart	Uart;
+typedef struct Ureg	Ureg;
+typedef uvlong		Tval;
+typedef void		KMap;
+
+#pragma incomplete Ureg
+
+#define MAXSYSARG	5	/* for mount(fd, mpt, flag, arg, srv) */
+
+/*
+ *  parameters for sysproc.c
+ */
+#define AOUT_MAGIC	(R_MAGIC)
+
+struct Lock
+{
+	ulong	key;
+	u32int	sr;
+	uintptr	pc;
+	Proc*	p;
+	Mach*	m;
+	int	isilock;
+};
+
+struct Label
+{
+	uintptr	sp;
+	uintptr	pc;
+};
+
+struct FPsave
+{
+	uvlong	regs[32][2];
+
+	ulong	control;
+	ulong	status;
+};
+
+struct PFPU
+{
+	FPsave	fpsave[1];
+
+	int	fpstate;
+};
+
+enum
+{
+	FPinit,
+	FPactive,
+	FPinactive,
+
+	/* bits or'd with the state */
+	FPillegal= 0x100,
+};
+
+struct Confmem
+{
+	uintptr	base;
+	usize	npage;
+	uintptr	limit;
+	uintptr	kbase;
+	uintptr	klimit;
+};
+
+struct Conf
+{
+	ulong	nmach;		/* processors */
+	ulong	nproc;		/* processes */
+	Confmem	mem[1];		/* physical memory */
+	ulong	npage;		/* total physical pages of memory */
+	usize	upages;		/* user page pool */
+	ulong	copymode;	/* 0 is copy on write, 1 is copy on reference */
+	ulong	ialloc;		/* max interrupt time allocation in bytes */
+	ulong	pipeqsize;	/* size in bytes of pipe queues */
+	ulong	nimage;		/* number of page cache image headers */
+	ulong	nswap;		/* number of swap pages */
+	int	nswppo;		/* max # of pageouts per segment pass */
+	ulong	hz;		/* processor cycle freq */
+	ulong	mhz;
+	int	monitor;	/* flag */
+};
+
+/*
+ *  MMU stuff in Mach.
+ */
+struct MMMU
+{
+	PTE*	mmul1;		/* l1 for this processor */
+};
+
+/*
+ *  MMU stuff in proc
+ */
+#define NCOLOR	1		/* 1 level cache, don't worry about VCE's */
+
+struct PMMU
+{
+	Page*	mmul1;
+	Page*	mmul1tail;
+
+	Page*	mmul2;
+	Page*	mmul2tail;
+
+	Page*	mmufree;
+
+	int	asid;
+
+	uintptr	tpidr;
+};
+
+#include "../port/portdat.h"
+
+struct Mach
+{
+	int	machno;			/* physical id of processor */
+	uintptr	splpc;			/* pc of last caller to splhi */
+
+	Proc*	proc;			/* current process */
+
+	MMMU;
+	int	flushmmu;		/* flush current proc mmu state */
+
+	ulong	ticks;			/* of the clock since boot time */
+	Label	sched;			/* scheduler wakeup */
+	Lock	alarmlock;		/* access to alarm list */
+	void*	alarm;			/* alarms bound to this clock */
+
+	Proc*	readied;		/* for runproc */
+	ulong	schedticks;		/* next forced context switch */
+
+	int	cputype;
+	ulong	delayloop;
+
+	/* stats */
+	int	tlbfault;
+	int	tlbpurge;
+	int	pfault;
+	int	cs;
+	int	syscall;
+	int	load;
+	int	intr;
+	uvlong	fastclock;		/* last sampled value */
+	uvlong	inidle;			/* time spent in idlehands() */
+	ulong	spuriousintr;
+	int	lastintr;
+	int	ilockdepth;
+	Perf	perf;			/* performance counters */
+
+	int	cpumhz;
+	uvlong	cpuhz;			/* speed of cpu */
+	uvlong	cyclefreq;		/* Frequency of user readable cycle counter */
+
+	int	stack[1];
+};
+
+struct
+{
+	char	machs[MAXMACH];		/* active CPUs */
+	int	exiting;		/* shutdown */
+}active;
+
+#define MACHP(n)	((Mach*)MACHADDR(n))
+
+extern register Mach* m;			/* R27 */
+extern register Proc* up;			/* R26 */
+extern int normalprint;
+extern ulong memsize;
+
+/*
+ *  a parsed plan9.ini line
+ */
+#define NISAOPT		8
+
+struct ISAConf {
+	char	*type;
+	ulong	port;
+	int	irq;
+	ulong	dma;
+	ulong	mem;
+	ulong	size;
+	ulong	freq;
+
+	int	nopt;
+	char	*opt[NISAOPT];
+};
+
+/*
+ * Horrid. But the alternative is 'defined'.
+ */
+#ifdef _DBGC_
+#define DBGFLG		(dbgflg[_DBGC_])
+#else
+#define DBGFLG		(0)
+#endif /* _DBGC_ */
+
+int vflag;
+extern char dbgflg[256];
+
+#define dbgprint	print		/* for now */
+
+/*
+ *  hardware info about a device
+ */
+typedef struct {
+	ulong	port;
+	int	size;
+} Devport;
+
+struct DevConf
+{
+	ulong	intnum;			/* interrupt number */
+	char	*type;			/* card type, malloced */
+	int	nports;			/* Number of ports */
+	Devport	*ports;			/* The ports themselves */
+};
+
+struct Soc {			/* SoC dependent configuration */
+	ulong	dramsize;
+	uintptr	physio;
+	uintptr	busdram;
+	uintptr	busio;
+	uintptr	armlocal;
+	u32int	l1ptedramattrs;
+	u32int	l2ptedramattrs;
+};
+extern Soc soc;
+
+#define BUSUNKNOWN -1
+
+/*
+ * GPIO
+ */
+enum {
+	Input	= 0x0,
+	Output	= 0x1,
+	Alt0	= 0x4,
+	Alt1	= 0x5,
+	Alt2	= 0x6,
+	Alt3	= 0x7,
+	Alt4	= 0x3,
+	Alt5	= 0x2,
+};
--- /dev/null
+++ b/sys/src/9/bcm64/devgen.c
@@ -1,0 +1,34 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"../port/error.h"
+
+/*
+ * the zeroth element of the table MUST be the directory itself for ..
+*/
+int
+devgen(Chan *c, char *name, Dirtab *tab, int ntab, int i, Dir *dp)
+{
+	if(tab == 0)
+		return -1;
+	if(i == DEVDOTDOT){
+		/* nothing */
+	}else if(name){
+		for(i=1; i<ntab; i++)
+			if(strcmp(tab[i].name, name) == 0)
+				break;
+		if(i==ntab)
+			return -1;
+		tab += i;
+	}else{
+		/* skip over the first element, that for . itself */
+		i++;
+		if(i >= ntab)
+			return -1;
+		tab += i;
+	}
+	devdir(c, tab->qid, tab->name, tab->length, eve, tab->perm, dp);
+	return 1;
+}
--- /dev/null
+++ b/sys/src/9/bcm64/fns.h
@@ -1,0 +1,168 @@
+#include "../port/portfns.h"
+
+#define	waserror()	(up->nerrlab++, setlabel(&up->errlab[up->nerrlab-1]))
+
+/* l.s */
+extern void sev(void);
+extern int tas(void *);
+extern int cmpswap(long*, long, long);
+extern void coherence(void);
+extern void idlehands(void);
+extern uvlong cycles(void);
+extern int splfhi(void);
+extern void splflo(void);
+extern void touser(uintptr sp);
+extern void forkret(void);
+extern void noteret(void);
+extern void returnto(void*);
+extern void fpsaveregs(void*);
+extern void fploadregs(void*);
+extern void magic(void);
+
+extern void setttbr(uintptr pa);
+extern uintptr getfar(void);
+
+extern void flushasidva(uintptr asidva);
+extern void tlbivae1is(uintptr asidva);
+
+extern void flushasidvall(uintptr asidva);
+extern void tlbivale1is(uintptr asidva);
+
+extern void flushasid(uintptr asid);
+extern void tlbiaside1is(uintptr asid);
+
+extern void flushtlb(void);
+extern void tlbivmalle1(void);
+
+/* cache */
+extern ulong cachesize(int level);
+
+extern void cacheiinvse(void*, int);
+extern void cacheuwbinv(void);
+extern void cacheiinv(void);
+
+extern void cachedwbse(void*, int);
+extern void cacheduwbse(void*, int);
+extern void cachedinvse(void*, int);
+extern void cachedwbinvse(void*, int);
+
+extern void cachedwb(void);
+extern void cachedinv(void);
+extern void cachedwbinv(void);
+
+extern void l2cacheuwb(void);
+extern void l2cacheuinv(void);
+extern void l2cacheuwbinv(void);
+
+/* mmu */
+#define	getpgcolor(a)	0
+extern uintptr paddr(void*);
+#define PADDR(a) paddr((void*)(a))
+extern uintptr cankaddr(uintptr);
+extern void* kaddr(uintptr);
+#define KADDR(a) kaddr(a)
+extern void kmapinval(void);
+#define	VA(k)	((uintptr)(k))
+extern KMap *kmap(Page*);
+extern void kunmap(KMap*);
+extern uintptr mmukmap(uintptr, uintptr, usize);
+
+extern void mmu0init(uintptr*);
+extern void mmu0clear(uintptr*);
+extern void mmu1init(void);
+
+extern void putasid(Proc*);
+
+/* clock */
+extern void clockinit(void);
+extern void synccycles(void);
+extern void armtimerset(int);
+
+/* fpu */
+extern void fpuinit(void);
+extern void fpoff(void);
+extern void fpinit(void);
+extern void fpclear(void);
+extern void fpsave(FPsave*);
+extern void fprestore(FPsave*);
+extern void mathtrap(Ureg*);
+
+/* trap */
+extern void trapinit(void);
+extern int userureg(Ureg*);
+extern void evenaddr(uintptr);
+extern void setkernur(Ureg*, Proc*);
+extern void procfork(Proc*);
+extern void procsetup(Proc*);
+extern void procsave(Proc*);
+extern void procrestore(Proc *);
+extern void trap(Ureg*);
+extern void syscall(Ureg*);
+extern void noted(Ureg*, ulong);
+extern void faultarm64(Ureg*);
+extern void dumpstack(void);
+extern void dumpregs(Ureg*);
+
+/* irq */
+extern void intrcpushutdown(void);
+extern void intrsoff(void);
+#define intrenable(i, f, a, b, n)	irqenable((i), (f), (a))
+extern void irqenable(int, void (*)(Ureg*, void*), void*);
+extern int irq(Ureg*);
+extern void fiq(Ureg*);
+
+/* sysreg */
+extern uvlong	sysrd(ulong);
+extern void	syswr(ulong, uvlong);
+
+/* gpio */
+extern void gpiosel(uint, int);
+extern void gpiopull(uint, int);
+extern void gpiopullup(uint);
+extern void gpiopulloff(uint);
+extern void gpiopulldown(uint);
+extern void gpioout(uint, int);
+extern int gpioin(uint);
+extern void gpioselevent(uint, int, int);
+extern int gpiogetevent(uint);
+extern void gpiomeminit(void);
+
+/* arch */
+extern char *cputype2name(char*, int);
+extern void cpuidprint(void);
+extern void uartconsinit(void);
+extern void links(void);
+extern int getncpus(void);
+extern int startcpu(uint);
+extern void okay(int);
+
+/* dma */
+extern uintptr dmaaddr(void*);
+extern void dmastart(int, int, int, void*, void*, int);
+extern int dmawait(int);
+
+/* vcore */
+extern void* fbinit(int set, int *width, int *height, int *depth);
+extern int fbblank(int blank);
+extern void setpower(int dev, int on);
+extern int getpower(int dev);
+extern char* getethermac(void);
+extern uint getboardrev(void);
+extern uint getfirmware(void);
+extern void getramsize(Confmem *mem);
+extern ulong getclkrate(int clkid);
+extern void setclkrate(int clkid, ulong hz);
+extern uint getcputemp(void);
+extern void vgpinit(void);
+extern void vgpset(uint port, int on);
+
+/* bootargs */
+extern void bootargsinit(void);
+extern char *getconf(char *name);
+extern void setconfenv(void);
+extern void writeconf(void);
+
+/* screen */
+extern void screeninit(void);
+
+extern int isaconfig(char*, int, ISAConf*);
--- /dev/null
+++ b/sys/src/9/bcm64/fpu.c
@@ -1,0 +1,92 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#include "ureg.h"
+#include "sysreg.h"
+
+/* libc */
+extern ulong getfcr(void);
+extern void setfcr(ulong fcr);
+extern ulong getfsr(void);
+extern void setfsr(ulong fsr);
+
+void
+fpuinit(void)
+{
+	fpoff();
+}
+
+void
+fpon(void)
+{
+	syswr(CPACR_EL1, 3<<20);
+}
+
+void
+fpoff(void)
+{
+	syswr(CPACR_EL1, 0<<20);
+}
+
+void
+fpinit(void)
+{
+	fpon();
+	setfcr(0);
+	setfsr(0);
+}
+
+void
+fpclear(void)
+{
+	fpoff();
+}
+
+void
+fpsave(FPsave *p)
+{
+	p->control = getfcr();
+	p->status = getfsr();
+	fpsaveregs(p->regs);
+	fpoff();
+}
+
+void
+fprestore(FPsave *p)
+{
+	fpon();
+	setfcr(p->control);
+	setfsr(p->status);
+	fploadregs(p->regs);
+}
+
+void
+mathtrap(Ureg*)
+{
+	int s;
+
+	if((up->fpstate & FPillegal) != 0){
+		postnote(up, 1, "sys: floating point in note handler", NDebug);
+		return;
+	}
+	switch(up->fpstate){
+	case FPinit:
+		s = splhi();
+		fpinit();
+		up->fpstate = FPactive;
+		splx(s);
+		break;
+	case FPinactive:
+		s = splhi();
+		fprestore(up->fpsave);
+		up->fpstate = FPactive;
+		splx(s);
+		break;
+	case FPactive:
+		postnote(up, 1, "sys: floating point error", NDebug);
+		break;
+	}
+}
--- /dev/null
+++ b/sys/src/9/bcm64/init9.s
@@ -1,0 +1,4 @@
+TEXT main(SB), 1, $8
+	MOV	$setSB(SB), R28		/* load the SB */
+	MOV	$boot(SB), R0
+	B	startboot(SB)
--- /dev/null
+++ b/sys/src/9/bcm64/l.s
@@ -1,0 +1,749 @@
+#include "mem.h"
+#include "sysreg.h"
+
+#undef	SYSREG
+#define	SYSREG(op0,op1,Cn,Cm,op2)	SPR(((op0)<<19|(op1)<<16|(Cn)<<12|(Cm)<<8|(op2)<<5))
+
+TEXT _start(SB), 1, $-4
+	MOV	$setSB-KZERO(SB), R28
+	BL	svcmode<>(SB)
+
+	/* use dedicated stack pointer per exception level */
+	MOVWU	$1, R1
+	MSR	R1, SPSel
+
+	BL	mmudisable<>(SB)
+
+	/* invalidate local caches */
+	BL	cachedinv(SB)
+	BL	cacheiinv(SB)
+
+	MOV	$(MACHADDR(0)-KZERO), R27
+	MRS	MPIDR_EL1, R1
+	ANDW	$(MAXMACH-1), R1
+	MOVWU	$MACHSIZE, R2
+	MULW	R1, R2, R2
+	SUB	R2, R27
+
+	ADD	$(MACHSIZE-16), R27, R2
+	MOV	R2, SP
+
+	CBNZ	R1, _startup
+
+	/* clear page table and machs */
+	MOV	$(L1-KZERO), R1
+	MOV	$(MACHADDR(-1)-KZERO), R2
+_zerol1:
+	MOV	ZR, (R1)8!
+	CMP	R1, R2
+	BNE	_zerol1
+
+	/* clear BSS */
+	MOV	$edata-KZERO(SB), R1
+	MOV	$end-KZERO(SB), R2
+_zerobss:
+	MOV	ZR, (R1)8!
+	CMP	R1, R2
+	BNE	_zerobss
+
+	/* setup page tables */
+	MOV	$(L1-KZERO), R0
+	BL	mmu0init(SB)
+
+	BL	cachedwbinv(SB)
+	BL	l2cacheuwbinv(SB)
+	SEVL
+_startup:
+	WFE
+	BL	mmuenable<>(SB)
+
+	MOV	$0, R26
+	ORR	$KZERO, R27
+	MSR	R27, TPIDR_EL1
+	MOV	$setSB(SB), R28
+
+	BL	main(SB)
+
+TEXT	stop<>(SB), 1, $-4
+_stop:
+	WFE
+	B	_stop
+
+TEXT sev(SB), 1, $-4
+	SEV
+	WFE
+	RETURN
+
+TEXT PUTC(SB), 1, $-4
+	MOVWU $(0x3F000000+0x215040), R14
+	MOVB R0, (R14)
+	RETURN
+
+TEXT svcmode<>(SB), 1, $-4
+	MSR	$0xF, DAIFSet
+	MRS	CurrentEL, R0
+	ANDW	$(3<<2), R0
+	CMPW	$(1<<2), R0
+	BEQ	el1
+	CMPW	$(2<<2), R0
+	BEQ	el2
+	B	stop<>(SB)
+el2:
+	MOV	$0, R0
+	MSR	R0, MDCR_EL2
+	ISB	$SY
+
+	/* HCR = RW, HCD, SWIO, BSU, FB */
+	MOVWU	$(1<<31 | 1<<29 | 1<<2 | 0<<10 | 0<<9), R0
+	MSR	R0, HCR_EL2
+	ISB	$SY
+
+	/* SCTLR = RES1 */
+	MOVWU	$(3<<4 | 1<<11 | 1<<16 | 1<<18 | 3<<22 | 3<<28), R0
+	ISB	$SY
+	MSR	R0, SCTLR_EL2
+	ISB	$SY
+
+	/* set VMID to zero */
+	MOV	$0, R0
+	MSR	R0, VTTBR_EL2
+	ISB	$SY
+
+	MOVWU	$(0xF<<6 | 4), R0
+	MSR	R0, SPSR_EL2
+	MSR	LR, ELR_EL2
+	ERET
+el1:
+	RETURN
+
+TEXT mmudisable<>(SB), 1, $-4
+#define SCTLRCLR \
+	/* RES0 */	( 3<<30 \
+	/* RES0 */	| 1<<27 \
+	/* UCI */	| 1<<26 \
+	/* EE */	| 1<<25 \
+	/* RES0 */	| 1<<21 \
+	/* E0E */	| 1<<24 \
+	/* WXN */	| 1<<19 \
+	/* nTWE */	| 1<<18 \
+	/* RES0 */	| 1<<17 \
+	/* nTWI */	| 1<<16 \
+	/* UCT */	| 1<<15 \
+	/* DZE */	| 1<<14 \
+	/* RES0 */	| 1<<13 \
+	/* RES0 */	| 1<<10 \
+	/* UMA */	| 1<<9 \
+	/* SA0 */	| 1<<4 \
+	/* SA */	| 1<<3 \
+	/* A */		| 1<<1 )
+#define SCTLRSET \
+	/* RES1 */	( 3<<28 \
+	/* RES1 */	| 3<<22 \
+	/* RES1 */	| 1<<20 \
+	/* RES1 */	| 1<<11 )
+#define SCTLRMMU \
+	/* I */		( 1<<12 \
+	/* C */		| 1<<2 \
+	/* M */		| 1<<0 )
+
+	/* initialise SCTLR, MMU and caches off */
+	ISB	$SY
+	MRS	SCTLR_EL1, R0
+	BIC	$(SCTLRCLR | SCTLRMMU), R0
+	ORR	$SCTLRSET, R0
+	ISB	$SY
+	MSR	R0, SCTLR_EL1
+	ISB	$SY
+
+	B	flushlocaltlb(SB)
+
+TEXT mmuenable<>(SB), 1, $-4
+	/* return to virtual */
+	ORR	$KZERO, LR
+	MOV	LR, -16(RSP)!
+
+	BL	cachedwbinv(SB)
+	BL	flushlocaltlb(SB)
+
+	/* memory attributes */
+#define MAIRINIT \
+	( 0xFF << MA_MEM_WB*8 \
+	| 0x33 << MA_MEM_WT*8 \
+	| 0x44 << MA_MEM_UC*8 \
+	| 0x00 << MA_DEV_nGnRnE*8 \
+	| 0x04 << MA_DEV_nGnRE*8 \
+	| 0x08 << MA_DEV_nGRE*8 \
+	| 0x0C << MA_DEV_GRE*8 )
+	MOV	$MAIRINIT, R1
+	MSR	R1, MAIR_EL1
+	ISB	$SY
+
+	/* translation control */
+#define TCRINIT \
+	/* TBI1 */	( 0<<38 \
+	/* TBI0 */	| 0<<37 \
+	/* AS */	| 0<<36 \
+	/* TG1 */	| (((3<<16|1<<14|2<<12)>>PGSHIFT)&3)<<30 \
+	/* SH1 */	| SHARE_INNER<<28 \
+	/* ORGN1 */	| CACHE_WB<<26 \
+	/* IRGN1 */	| CACHE_WB<<24 \
+	/* EPD1 */	| 0<<23 \
+	/* A1 */	| 0<<22 \
+	/* T1SZ */	| (64-EVASHIFT)<<16 \
+	/* TG0 */	| (((1<<16|2<<14|0<<12)>>PGSHIFT)&3)<<14 \
+	/* SH0 */	| SHARE_INNER<<12 \
+	/* ORGN0 */	| CACHE_WB<<10 \
+	/* IRGN0 */	| CACHE_WB<<8 \
+	/* EPD0 */	| 0<<7 \
+	/* T0SZ */	| (64-EVASHIFT)<<0 )
+	MOV	$TCRINIT, R1
+	MRS	ID_AA64MMFR0_EL1, R2
+	ANDW	$0xF, R2	// IPS
+	ADD	R2<<32, R1
+	MSR	R1, TCR_EL1
+	ISB	$SY
+
+	/* load the page tables */
+	MOV	$(L1TOP-KZERO), R0
+	ISB	$SY
+	MSR	R0, TTBR0_EL1
+	MSR	R0, TTBR1_EL1
+	ISB	$SY
+
+	/* enable MMU and caches */
+	MRS	SCTLR_EL1, R1
+	ORR	$SCTLRMMU, R1
+	ISB	$SY
+	MSR	R1, SCTLR_EL1
+	ISB	$SY
+
+	MOV	RSP, R1
+	ORR	$KZERO, R1
+	MOV	R1, RSP
+	MOV	(RSP)16!, LR
+	B	cacheiinv(SB)
+
+TEXT touser(SB), 1, $-4
+	MSR	$0x3, DAIFSet	// interrupts off
+	MOVWU	$0x10028, R1	// entry
+	MOVWU	$0, R2		// psr
+	MSR	R0, SP_EL0	// sp
+	MSR	R1, ELR_EL1
+	MSR	R2, SPSR_EL1
+	ERET
+
+TEXT cas(SB), 1, $-4
+TEXT cmpswap(SB), 1, $-4
+	MOVW	ov+8(FP), R1
+	MOVW	nv+16(FP), R2
+_cas1:
+	LDXRW	(R0), R3
+	CMP	R3, R1
+	BNE	_cas0
+	STXRW	R2, (R0), R4
+	CBNZ	R4, _cas1
+	MOVW	$1, R0
+	DMB	$ISH
+	RETURN
+_cas0:
+	CLREX
+	MOVW	$0, R0
+	RETURN
+
+TEXT tas(SB), 1, $-4
+TEXT _tas(SB), 1, $-4
+	MOVW	$0xdeaddead, R2
+_tas1:
+	LDXRW	(R0), R1
+	STXRW	R2, (R0), R3
+	CBNZ	R3, _tas1
+	MOVW	R1, R0
+
+TEXT coherence(SB), 1, $-4
+	DMB	$ISH
+	RETURN
+
+TEXT islo(SB), 1, $-4
+	MRS	DAIF, R0
+	AND	$(0x2<<6), R0
+	EOR	$(0x2<<6), R0
+	RETURN
+
+TEXT splhi(SB), 1, $-4
+	MRS	DAIF, R0
+	MSR	$0x2, DAIFSet
+	RETURN
+
+TEXT splfhi(SB), 1, $-4
+	MRS	DAIF, R0
+	MSR	$0x3, DAIFSet
+	RETURN
+
+TEXT spllo(SB), 1, $-4
+	MSR	$0x3, DAIFClr
+	RETURN
+
+TEXT splflo(SB), 1, $-4
+	MSR	$0x1, DAIFClr
+	RETURN
+
+TEXT splx(SB), 1, $-4
+	MSR	R0, DAIF
+	RETURN
+
+TEXT cycles(SB), 1, $-4
+TEXT lcycles(SB), 1, $-4
+	MRS	PMCCNTR_EL0, R0
+	RETURN
+
+TEXT setlabel(SB), 1, $-4
+	MOV	LR, 8(R0)
+	MOV	SP, R1
+	MOV	R1, 0(R0)
+	MOVW	$0, R0
+	RETURN
+
+TEXT gotolabel(SB), 1, $-4
+	MOV	8(R0), LR	/* link */
+	MOV	0(R0), R1	/* sp */
+	MOV	R1, SP
+	MOVW	$1, R0
+	RETURN
+
+TEXT returnto(SB), 1, $-4
+	MOV	R0, 0(SP)
+	RETURN
+
+TEXT getfar(SB), 1, $-4
+	MRS	FAR_EL1, R0
+	RETURN
+
+TEXT setttbr(SB), 1, $-4
+	DSB	$ISHST
+	MSR	R0, TTBR0_EL1
+	DSB	$ISH
+	ISB	$SY
+
+	B	cacheiinv(SB)
+
+TEXT magic(SB), 1, $-4
+	DSB	$SY
+	ISB	$SY
+	DSB	$SY
+	ISB	$SY
+	DSB	$SY
+	ISB	$SY
+	DSB	$SY
+	ISB	$SY
+	RETURN
+
+/*
+ * TLB maintenance operations.
+ * these broadcast to all cpu's in the cluser
+ * (inner sharable domain).
+ */
+TEXT flushasidva(SB), 1, $-4
+TEXT tlbivae1is(SB), 1, $-4
+	DSB	$ISHST
+	TLBI	R0, 0,8,3,1	/* VAE1IS */
+	DSB	$ISH
+	ISB	$SY
+	RETURN
+
+TEXT flushasidvall(SB), 1, $-4
+TEXT tlbivale1is(SB), 1, $-4
+	DSB	$ISHST
+	TLBI	R0, 0,8,3,5	/* VALE1IS */
+	DSB	$ISH
+	ISB	$SY
+	RETURN
+
+TEXT flushasid(SB), 1, $-4
+TEXT tlbiaside1is(SB), 1, $-4
+	DSB	$ISHST
+	TLBI	R0, 0,8,3,2	/* ASIDE1IS */
+	DSB	$ISH
+	ISB	$SY
+	RETURN
+
+TEXT flushtlb(SB), 1, $-4
+TEXT tlbivmalle1is(SB), 1, $-4
+	DSB	$ISHST
+	TLBI	R0, 0,8,3,0	/* VMALLE1IS */
+	DSB	$ISH
+	ISB	$SY
+	RETURN
+
+/*
+ * flush the tlb of this cpu. no broadcast.
+ */
+TEXT flushlocaltlb(SB), 1, $-4
+TEXT tlbivmalle1(SB), 1, $-4
+	DSB	$NSHST
+	TLBI	R0, 0,8,7,0	/* VMALLE1 */
+	DSB	$NSH
+	ISB	$SY
+	RETURN
+
+TEXT fpsaveregs(SB), 1, $-4
+	WORD	$(1<<30 | 3 << 26 | 2<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 0)  /* MOV { V0, V1, V2, V3  }, (R0)64! */
+	WORD	$(1<<30 | 3 << 26 | 2<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 4)  /* MOV { V4, V5, V6, V7  }, (R0)64! */
+	WORD	$(1<<30 | 3 << 26 | 2<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 8)  /* MOV { V8, V9, V10,V11 }, (R0)64! */
+	WORD	$(1<<30 | 3 << 26 | 2<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 12) /* MOV { V12,V13,V14,V15 }, (R0)64! */
+	WORD	$(1<<30 | 3 << 26 | 2<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 16) /* MOV { V16,V17,V18,V19 }, (R0)64! */
+	WORD	$(1<<30 | 3 << 26 | 2<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 20) /* MOV { V20,V21,V22,V23 }, (R0)64! */
+	WORD	$(1<<30 | 3 << 26 | 2<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 24) /* MOV { V24,V25,V26,V27 }, (R0)64! */
+	WORD	$(1<<30 | 3 << 26 | 2<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 28) /* MOV { V28,V29,V30,V31 }, (R0)64! */
+	RETURN
+
+TEXT fploadregs(SB), 1, $-4
+	WORD	$(1<<30 | 3 << 26 | 3<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 0)  /* MOV (R0)64!, { V0, V1, V2, V3  } */
+	WORD	$(1<<30 | 3 << 26 | 3<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 4)  /* MOV (R0)64!, { V4, V5, V6, V7  } */
+	WORD	$(1<<30 | 3 << 26 | 3<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 8)  /* MOV (R0)64!, { V8, V9, V10,V11 } */
+	WORD	$(1<<30 | 3 << 26 | 3<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 12) /* MOV (R0)64!, { V12,V13,V14,V15 } */
+	WORD	$(1<<30 | 3 << 26 | 3<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 16) /* MOV (R0)64!, { V16,V17,V18,V19 } */
+	WORD	$(1<<30 | 3 << 26 | 3<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 20) /* MOV (R0)64!, { V20,V21,V22,V23 } */
+	WORD	$(1<<30 | 3 << 26 | 3<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 24) /* MOV (R0)64!, { V24,V25,V26,V27 } */
+	WORD	$(1<<30 | 3 << 26 | 3<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 28) /* MOV (R0)64!, { V28,V29,V30,V31 } */
+	RETURN
+
+// syscall or trap from EL0
+TEXT vsys0(SB), 1, $-4
+	LSRW	$26, R0, R17	// ec
+	CMPW	$0x15, R17	// SVC trap?
+	BNE	_itsatrap	// nope.
+
+	MOV	R26, 224(RSP)	// special
+	MOV	R27, 232(RSP)	// special
+	MOV	R28, 240(RSP)	// sb
+	MOV	R29, 248(RSP)	// special
+
+	MRS	SP_EL0, R1
+	MRS	ELR_EL1, R2
+	MRS	SPSR_EL1, R3
+
+	MOV	R0, 288(RSP)	// type
+	MOV	R1, 264(RSP)	// sp
+	MOV	R2, 272(RSP)	// pc
+	MOV	R3, 280(RSP)	// psr
+
+	MOV	$setSB(SB), R28
+	MRS	TPIDR_EL1, R27
+	MOV	16(R27), R26
+
+	ADD	$16, RSP, R0	// ureg
+	BL	syscall(SB)
+
+TEXT forkret(SB), 1, $-4
+	MSR	$0x3, DAIFSet	// interrupts off
+
+	ADD	$16, RSP, R0	// ureg
+
+	MOV	16(RSP), R0	// ret
+	MOV	264(RSP), R1	// sp
+	MOV	272(RSP), R2	// pc
+	MOV	280(RSP), R3	// psr
+
+	MSR	R1, SP_EL0
+	MSR	R2, ELR_EL1
+	MSR	R3, SPSR_EL1
+
+	MOV	224(RSP), R26	// special
+	MOV	232(RSP), R27	// special
+	MOV	240(RSP), R28	// sb
+	MOV	248(RSP), R29	// special
+
+	MOV	256(RSP), R30	// link
+
+	ADD	$TRAPFRAMESIZE, RSP
+	ERET
+
+TEXT itsatrap<>(SB), 1, $-4
+_itsatrap:
+	MOV	R1, 24(RSP)
+	MOV	R2, 32(RSP)
+	MOV	R3, 40(RSP)
+	MOV	R4, 48(RSP)
+	MOV	R5, 56(RSP)
+	MOV	R6, 64(RSP)
+	MOV	R7, 72(RSP)
+	MOV	R8, 80(RSP)
+	MOV	R9, 88(RSP)
+	MOV	R10, 96(RSP)
+	MOV	R11, 104(RSP)
+	MOV	R12, 112(RSP)
+	MOV	R13, 120(RSP)
+	MOV	R14, 128(RSP)
+	MOV	R15, 136(RSP)
+	MOV	R16, 144(RSP)
+
+	MOV	R18, 160(RSP)
+	MOV	R19, 168(RSP)
+	MOV	R20, 176(RSP)
+	MOV	R21, 184(RSP)
+	MOV	R22, 192(RSP)
+	MOV	R23, 200(RSP)
+	MOV	R24, 208(RSP)
+	MOV	R25, 216(RSP)
+
+// trap/irq/fiq/serr from EL0
+TEXT vtrap0(SB), 1, $-4
+	MOV	R26, 224(RSP)	// special
+	MOV	R27, 232(RSP)	// special
+	MOV	R28, 240(RSP)	// sb
+	MOV	R29, 248(RSP)	// special
+
+	MRS	SP_EL0, R1
+	MRS	ELR_EL1, R2
+	MRS	SPSR_EL1, R3
+
+	MOV	R0, 288(RSP)	// type
+	MOV	R1, 264(RSP)	// sp
+	MOV	R2, 272(RSP)	// pc
+	MOV	R3, 280(RSP)	// psr
+
+	MOV	$setSB(SB), R28
+	MRS	TPIDR_EL1, R27
+	MOV	16(R27), R26
+
+	ADD	$16, RSP, R0	// ureg
+	BL	trap(SB)
+
+TEXT noteret(SB), 1, $-4
+	MSR	$0x3, DAIFSet	// interrupts off
+
+	ADD	$16, RSP, R0	// ureg
+
+	MOV	264(RSP), R1	// sp
+	MOV	272(RSP), R2	// pc
+	MOV	280(RSP), R3	// psr
+
+	MSR	R1, SP_EL0
+	MSR	R2, ELR_EL1
+	MSR	R3, SPSR_EL1
+
+	MOV	224(RSP), R26	// special
+	MOV	232(RSP), R27	// special
+	MOV	240(RSP), R28	// sb
+	MOV	248(RSP), R29	// special
+
+_intrreturn:
+	MOV	16(RSP), R0
+	MOV	24(RSP), R1
+	MOV	32(RSP), R2
+	MOV	40(RSP), R3
+	MOV	48(RSP), R4
+	MOV	56(RSP), R5
+	MOV	64(RSP), R6
+	MOV	72(RSP), R7
+	MOV	80(RSP), R8
+	MOV	88(RSP), R9
+	MOV	96(RSP), R10
+	MOV	104(RSP), R11
+	MOV	112(RSP), R12
+	MOV	120(RSP), R13
+	MOV	128(RSP), R14
+	MOV	136(RSP), R15
+	MOV	144(RSP), R16
+	MOV	152(RSP), R17
+	MOV	160(RSP), R18
+	MOV	168(RSP), R19
+	MOV	176(RSP), R20
+	MOV	184(RSP), R21
+	MOV	192(RSP), R22
+	MOV	200(RSP), R23
+	MOV	208(RSP), R24
+	MOV	216(RSP), R25
+
+	MOV	256(RSP), R30	// link
+
+	ADD	$TRAPFRAMESIZE, RSP
+	ERET
+
+// irq/fiq/trap/serr from EL1
+TEXT vtrap1(SB), 1, $-4
+	MOV	R29, 248(RSP)	// special
+
+	ADD	$TRAPFRAMESIZE, RSP, R1
+	MRS	ELR_EL1, R2
+	MRS	SPSR_EL1, R3
+
+	MOV	R0, 288(RSP)	// type
+	MOV	R1, 264(RSP)	// sp
+	MOV	R2, 272(RSP)	// pc
+	MOV	R3, 280(RSP)	// psr
+
+	ADD	$16, RSP, R0	// ureg
+	BL	trap(SB)
+
+	MSR	$0x3, DAIFSet	// interrupts off
+
+	MOV	272(RSP), R2	// pc
+	MOV	280(RSP), R3	// psr
+
+	MSR	R2, ELR_EL1
+	MSR	R3, SPSR_EL1
+
+	MOV	248(RSP), R29	// special
+	B	_intrreturn	
+
+// vector tables
+TEXT vsys(SB), 1, $-4
+	SUB	$TRAPFRAMESIZE, RSP
+
+	MOV	R0, 16(RSP)
+	MOV	R30, 256(RSP)	// link
+
+	MOV	R17, 152(RSP)	// temp
+
+	MRS	ESR_EL1, R0	// type
+
+_vsyspatch:
+	B	_vsyspatch	// branch to vsys0() patched in
+
+TEXT vtrap(SB), 1, $-4
+	SUB	$TRAPFRAMESIZE, RSP
+
+	MOV	R0, 16(RSP)
+	MOV	R1, 24(RSP)
+	MOV	R2, 32(RSP)
+	MOV	R3, 40(RSP)
+	MOV	R4, 48(RSP)
+	MOV	R5, 56(RSP)
+	MOV	R6, 64(RSP)
+	MOV	R7, 72(RSP)
+	MOV	R8, 80(RSP)
+	MOV	R9, 88(RSP)
+	MOV	R10, 96(RSP)
+	MOV	R11, 104(RSP)
+	MOV	R12, 112(RSP)
+	MOV	R13, 120(RSP)
+	MOV	R14, 128(RSP)
+	MOV	R15, 136(RSP)
+	MOV	R16, 144(RSP)
+	MOV	R17, 152(RSP)
+	MOV	R18, 160(RSP)
+	MOV	R19, 168(RSP)
+	MOV	R20, 176(RSP)
+	MOV	R21, 184(RSP)
+	MOV	R22, 192(RSP)
+	MOV	R23, 200(RSP)
+	MOV	R24, 208(RSP)
+	MOV	R25, 216(RSP)
+
+	MOV	R30, 256(RSP)	// link
+
+	MRS	ESR_EL1, R0	// type
+
+_vtrappatch:
+	B	_vtrappatch	// branch to vtrapX() patched in
+
+TEXT virq(SB), 1, $-4
+	SUB	$TRAPFRAMESIZE, RSP
+
+	MOV	R0, 16(RSP)
+	MOV	R1, 24(RSP)
+	MOV	R2, 32(RSP)
+	MOV	R3, 40(RSP)
+	MOV	R4, 48(RSP)
+	MOV	R5, 56(RSP)
+	MOV	R6, 64(RSP)
+	MOV	R7, 72(RSP)
+	MOV	R8, 80(RSP)
+	MOV	R9, 88(RSP)
+	MOV	R10, 96(RSP)
+	MOV	R11, 104(RSP)
+	MOV	R12, 112(RSP)
+	MOV	R13, 120(RSP)
+	MOV	R14, 128(RSP)
+	MOV	R15, 136(RSP)
+	MOV	R16, 144(RSP)
+	MOV	R17, 152(RSP)
+	MOV	R18, 160(RSP)
+	MOV	R19, 168(RSP)
+	MOV	R20, 176(RSP)
+	MOV	R21, 184(RSP)
+	MOV	R22, 192(RSP)
+	MOV	R23, 200(RSP)
+	MOV	R24, 208(RSP)
+	MOV	R25, 216(RSP)
+
+	MOV	R30, 256(RSP)	// link
+
+	MOV	$(1<<32), R0	// type irq
+
+_virqpatch:
+	B	_virqpatch	// branch to vtrapX() patched in
+
+TEXT vfiq(SB), 1, $-4
+	SUB	$TRAPFRAMESIZE, RSP
+
+	MOV	R0, 16(RSP)
+	MOV	R1, 24(RSP)
+	MOV	R2, 32(RSP)
+	MOV	R3, 40(RSP)
+	MOV	R4, 48(RSP)
+	MOV	R5, 56(RSP)
+	MOV	R6, 64(RSP)
+	MOV	R7, 72(RSP)
+	MOV	R8, 80(RSP)
+	MOV	R9, 88(RSP)
+	MOV	R10, 96(RSP)
+	MOV	R11, 104(RSP)
+	MOV	R12, 112(RSP)
+	MOV	R13, 120(RSP)
+	MOV	R14, 128(RSP)
+	MOV	R15, 136(RSP)
+	MOV	R16, 144(RSP)
+	MOV	R17, 152(RSP)
+	MOV	R18, 160(RSP)
+	MOV	R19, 168(RSP)
+	MOV	R20, 176(RSP)
+	MOV	R21, 184(RSP)
+	MOV	R22, 192(RSP)
+	MOV	R23, 200(RSP)
+	MOV	R24, 208(RSP)
+	MOV	R25, 216(RSP)
+
+	MOV	R30, 256(RSP)	// link
+	MOV	$(2<<32), R0	// type fiq
+
+_vfiqpatch:
+	B	_vfiqpatch	// branch to vtrapX() patched in
+
+TEXT vserr(SB), 1, $-4
+	SUB	$TRAPFRAMESIZE, RSP
+
+	MOV	R0, 16(RSP)
+	MOV	R1, 24(RSP)
+	MOV	R2, 32(RSP)
+	MOV	R3, 40(RSP)
+	MOV	R4, 48(RSP)
+	MOV	R5, 56(RSP)
+	MOV	R6, 64(RSP)
+	MOV	R7, 72(RSP)
+	MOV	R8, 80(RSP)
+	MOV	R9, 88(RSP)
+	MOV	R10, 96(RSP)
+	MOV	R11, 104(RSP)
+	MOV	R12, 112(RSP)
+	MOV	R13, 120(RSP)
+	MOV	R14, 128(RSP)
+	MOV	R15, 136(RSP)
+	MOV	R16, 144(RSP)
+	MOV	R17, 152(RSP)
+	MOV	R18, 160(RSP)
+	MOV	R19, 168(RSP)
+	MOV	R20, 176(RSP)
+	MOV	R21, 184(RSP)
+	MOV	R22, 192(RSP)
+	MOV	R23, 200(RSP)
+	MOV	R24, 208(RSP)
+	MOV	R25, 216(RSP)
+
+	MOV	R30, 256(RSP)	// link
+
+	MRS	ESR_EL1, R0
+	ORR	$(3<<32), R0	// type
+_vserrpatch:
+	B	_vserrpatch	// branch to vtrapX() patched in
--- /dev/null
+++ b/sys/src/9/bcm64/main.c
@@ -1,0 +1,337 @@
+#include "u.h"
+#include "tos.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "io.h"
+#include "init.h"
+#include "sysreg.h"
+
+#include <pool.h>
+#include <libsec.h>
+
+Conf conf;
+ulong memsize = GiB;
+
+/*
+ *  starting place for first process
+ */
+void
+init0(void)
+{
+	char buf[2*KNAMELEN], **sp;
+
+	up->nerrlab = 0;
+	spllo();
+
+	/*
+	 * These are o.k. because rootinit is null.
+	 * Then early kproc's will have a root and dot.
+	 */
+	up->slash = namec("#/", Atodir, 0, 0);
+	pathclose(up->slash->path);
+	up->slash->path = newpath("/");
+	up->dot = cclone(up->slash);
+	chandevinit();
+
+	if(!waserror()){
+		snprint(buf, sizeof(buf), "%s %s", "ARM64", conffile);
+		ksetenv("terminal", buf, 0);
+		ksetenv("cputype", "arm64", 0);
+		if(cpuserver)
+			ksetenv("service", "cpu", 0);
+		else
+			ksetenv("service", "terminal", 0);
+		snprint(buf, sizeof(buf), "-a %s", getethermac());
+		ksetenv("etherargs", buf, 0);
+
+		/* convert plan9.ini variables to #e and #ec */
+		setconfenv();
+		poperror();
+	}
+	kproc("alarm", alarmkproc, 0);
+
+	sp = (char**)(USTKTOP-sizeof(Tos) - 8 - sizeof(sp[0])*4);
+	sp[3] = sp[2] = sp[1] = nil;
+	strcpy(sp[1] = (char*)&sp[4], "boot");
+	sp[0] = (void*)&sp[1];
+
+	touser((uintptr)sp);
+
+	assert(0);			/* shouldn't have returned */
+}
+
+/*
+ *  create the first process
+ */
+void
+userinit(void)
+{
+	Proc *p;
+	Segment *s;
+	KMap *k;
+	Page *pg;
+
+	/* no processes yet */
+	up = nil;
+
+	p = newproc();
+	p->pgrp = newpgrp();
+	p->egrp = smalloc(sizeof(Egrp));
+	p->egrp->ref = 1;
+	p->fgrp = dupfgrp(nil);
+	p->rgrp = newrgrp();
+	p->procmode = 0640;
+
+	kstrdup(&eve, "");
+	kstrdup(&p->text, "*init*");
+	kstrdup(&p->user, eve);
+
+	/*
+	 * Kernel Stack
+	 */
+	p->sched.pc = (uintptr)init0;
+	p->sched.sp = (uintptr)p->kstack+KSTACK-sizeof(up->s.args)-sizeof(uintptr);
+	p->sched.sp = STACKALIGN(p->sched.sp);
+	*(void**)p->sched.sp = kproc; // fake
+
+	/*
+	 * User Stack
+	 *
+	 * Technically, newpage can't be called here because it
+	 * should only be called when in a user context as it may
+	 * try to sleep if there are no pages available, but that
+	 * shouldn't be the case here.
+	 */
+	s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKSIZE/BY2PG);
+	s->flushme++;
+	p->seg[SSEG] = s;
+	pg = newpage(1, 0, USTKTOP-BY2PG);
+	segpage(s, pg);
+	k = kmap(pg);
+	memset((void*)VA(k), 0, BY2PG);
+	kunmap(k);
+
+	/*
+	 * Text
+	 */
+	s = newseg(SG_TEXT, UTZERO, 1);
+	p->seg[TSEG] = s;
+	pg = newpage(1, 0, UTZERO);
+	pg->txtflush = ~0;
+	segpage(s, pg);
+	k = kmap(s->map[0]->pages[0]);
+	memmove((void*)VA(k), initcode, sizeof initcode);
+	kunmap(k);
+
+	ready(p);
+}
+
+void
+confinit(void)
+{
+	int i, userpcnt;
+	ulong kpages;
+	uintptr pa;
+	char *p;
+
+	if(p = getconf("service")){
+		if(strcmp(p, "cpu") == 0)
+			cpuserver = 1;
+		else if(strcmp(p,"terminal") == 0)
+			cpuserver = 0;
+	}
+
+	if(p = getconf("*kernelpercent"))
+		userpcnt = 100 - strtol(p, 0, 0);
+	else
+		userpcnt = 0;
+
+	if((p = getconf("*maxmem")) != nil){
+		memsize = strtoul(p, 0, 0) - PHYSDRAM;
+		if (memsize < 16*MB)		/* sanity */
+			memsize = 16*MB;
+	}
+
+	getramsize(&conf.mem[0]);
+	if(conf.mem[0].limit == 0){
+		conf.mem[0].base = PHYSDRAM;
+		conf.mem[0].limit = PHYSDRAM + memsize;
+	}else if(p != nil)
+		conf.mem[0].limit = conf.mem[0].base + memsize;
+
+	conf.npage = 0;
+	pa = PADDR(PGROUND((uintptr)end));
+
+	/*
+	 *  we assume that the kernel is at the beginning of one of the
+	 *  contiguous chunks of memory and fits therein.
+	 */
+	for(i=0; i<nelem(conf.mem); i++){
+		/* take kernel out of allocatable space */
+		if(pa > conf.mem[i].base && pa < conf.mem[i].limit)
+			conf.mem[i].base = pa;
+
+		conf.mem[i].npage = (conf.mem[i].limit - conf.mem[i].base)/BY2PG;
+		conf.npage += conf.mem[i].npage;
+	}
+
+	if(userpcnt < 10)
+		userpcnt = 60 + cpuserver*10;
+	kpages = conf.npage - (conf.npage*userpcnt)/100;
+
+	/*
+	 * can't go past the end of virtual memory
+	 * (uintptr)-KZERO is 2^32 - KZERO
+	 */
+	if(kpages > ((uintptr)-KZERO)/BY2PG)
+		kpages = ((uintptr)-KZERO)/BY2PG;
+
+	conf.upages = conf.npage - kpages;
+	conf.ialloc = (kpages/2)*BY2PG;
+
+	conf.nmach = getncpus();
+
+	/* set up other configuration parameters */
+	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
+	if(cpuserver)
+		conf.nproc *= 3;
+	if(conf.nproc > 2000)
+		conf.nproc = 2000;
+	conf.nswap = conf.npage*3;
+	conf.nswppo = 4096;
+	conf.nimage = 200;
+
+	conf.copymode = conf.nmach > 1;
+
+	/*
+	 * Guess how much is taken by the large permanent
+	 * datastructures. Mntcache and Mntrpc are not accounted for.
+	 */
+	kpages = conf.npage - conf.upages;
+	kpages *= BY2PG;
+	kpages -= conf.upages*sizeof(Page)
+		+ conf.nproc*sizeof(Proc)
+		+ conf.nimage*sizeof(Image)
+		+ conf.nswap
+		+ conf.nswppo*sizeof(Page*);
+	mainmem->maxsize = kpages;
+	if(!cpuserver)
+		/*
+		 * give terminals lots of image memory, too; the dynamic
+		 * allocation will balance the load properly, hopefully.
+		 * be careful with 32-bit overflow.
+		 */
+		imagmem->maxsize = kpages;
+}
+
+void
+machinit(void)
+{
+	m->ticks = 1;
+	m->perf.period = 1;
+	active.machs[m->machno] = 1;
+}
+
+void
+mpinit(void)
+{
+	extern void _start(void);
+	int i;
+
+	for(i = 0; i < MAXMACH; i++)
+		((uintptr*)SPINTABLE)[i] = 0;
+
+	for(i = 1; i < conf.nmach; i++)
+		MACHP(i)->machno = i;
+
+	cachedwbinv();
+
+	for(i = 1; i < conf.nmach; i++)
+		((uintptr*)SPINTABLE)[i] = PADDR(_start);
+
+	cachedwbinvse((void*)SPINTABLE, MAXMACH*8);
+	sev();
+	delay(100);
+	sev();
+	synccycles();
+}
+
+void
+idlehands(void)
+{
+}
+
+void
+main(void)
+{
+	machinit();
+	if(m->machno){
+		trapinit();
+		fpuinit();
+		clockinit();
+		cpuidprint();
+		synccycles();
+		timersinit();
+		flushtlb();
+		mmu1init();
+		delay(4000);	/* usb initilization is busted multicore, let cpu0 do it */
+		m->ticks = MACHP(0)->ticks;
+		schedinit();
+		return;
+	}
+	bootargsinit();
+	confinit();
+	xinit();
+	printinit();
+	fmtinstall('H', encodefmt);
+	quotefmtinstall();
+	uartconsinit();
+	screeninit();
+	print("\nPlan 9\n");
+	xsummary();
+
+	/* set clock rate to arm_freq from config.txt */
+	setclkrate(ClkArm, 0);
+
+	trapinit();
+	fpuinit();
+	clockinit();
+	cpuidprint();
+	timersinit();
+	pageinit();
+	procinit0();
+	initseg();
+	links();
+	chandevreset();
+	userinit();
+	mpinit();
+	mmu0clear((uintptr*)L1);
+	flushtlb();
+	mmu1init();
+	schedinit();
+}
+
+void
+exit(int)
+{
+	cpushutdown();
+	for(;;);
+}
+
+void
+reboot(void*, void*, ulong)
+{
+	error(Egreg);
+}
+
+/*
+ * stub for ../omap/devether.c
+ */
+int
+isaconfig(char *, int, ISAConf *)
+{
+	return 0;
+}
--- /dev/null
+++ b/sys/src/9/bcm64/mem.h
@@ -1,0 +1,139 @@
+/*
+ * Memory and machine-specific definitions.  Used in C and assembler.
+ */
+#define KiB		1024u			/* Kibi 0x0000000000000400 */
+#define MiB		1048576u		/* Mebi 0x0000000000100000 */
+#define GiB		1073741824u		/* Gibi 000000000040000000 */
+
+#define	HOWMANY(x,y)	(((x)+((y)-1))/(y))
+#define	ROUNDUP(x,y)	(HOWMANY((x),(y))*(y))
+#define	PGROUND(s)	ROUNDUP(s, BY2PG)
+#define	ROUND(s, sz)	(((s)+(sz-1))&~(sz-1))
+
+/*
+ * Sizes:
+ * 	L0	L1	L2	L3
+ *	4K	2M	1G	512G
+ *	16K	32M	64G	128T
+ *	64K	512M	4T	-
+ */
+#define	PGSHIFT		12		/* log(BY2PG) */
+#define	BY2PG		(1ULL<<PGSHIFT)	/* bytes per page */
+
+/* effective virtual address space */
+#define EVASHIFT	36
+#define EVAMASK		((1ULL<<EVASHIFT)-1)
+
+#define PTSHIFT		(PGSHIFT-3)
+#define PTLEVELS	HOWMANY(EVASHIFT-PGSHIFT, PTSHIFT)
+#define PTLX(v, l)	((((v) & EVAMASK) >> (PGSHIFT + (l)*PTSHIFT)) & ((1 << PTSHIFT)-1))
+#define PGLSZ(l)	(1ULL << (PGSHIFT + (l)*PTSHIFT))
+
+#define PTL1X(v, l)	(L1TABLEX(v, l) | PTLX(v, l))
+#define L1TABLEX(v, l)	(L1TABLE(v, l) << PTSHIFT)
+#define L1TABLES	HOWMANY(-KZERO, PGLSZ(2))
+#define L1TABLE(v, l)	(L1TABLES-1 - ((PTLX(v, 2) % L1TABLES) >> (((l)-1)*PTSHIFT)) + (l)-1)
+#define L1TOPSIZE	(1ULL << (EVASHIFT - PTLEVELS*PTSHIFT))
+
+#define	MAXMACH		4			/* max # cpus system can run */
+#define	MACHSIZE	(8*KiB)
+
+#define KSTACK		(8*KiB)
+#define STACKALIGN(sp)	((sp) & ~7)		/* bug: assure with alloc */
+#define TRAPFRAMESIZE	(38*8)
+
+/*
+ * Address spaces.
+ * KTZERO is used by kprof and dumpstack (if any).
+ *
+ * KZERO is mapped to physical 0 (start of ram).
+ */
+
+#define	KZERO		0xFFFFFFFF80000000ULL	/* kernel address space */
+
+#define SPINTABLE	(KZERO+0xd8)
+#define CONFADDR	(KZERO+0x100)
+#define	REBOOTADDR	(0x1c00)		/* reboot code - physical address */
+#define	VCBUFFER	(KZERO+0x3400)		/* videocore mailbox buffer */
+
+#define L1		(L1TOP-L1SIZE)
+#define L1SIZE		((L1TABLES+PTLEVELS-3)*BY2PG)
+#define L1TOP		((MACHADDR(MAXMACH-1)-L1TOPSIZE)&-BY2PG)
+
+#define MACHADDR(n)	(KTZERO-((n)+1)*MACHSIZE)
+
+#define	KTZERO		(KZERO+0x80000)		/* kernel text start */
+#define FRAMEBUFFER	0xFFFFFFFFC0000000ULL
+#define VIRTIO		0xFFFFFFFFE0000000ULL	/* i/o registers */
+#define	ARMLOCAL	(VIRTIO+IOSIZE)
+#define	VGPIO		0			/* virtual gpio for pi3 ACT LED */
+
+#define	UZERO		0ULL			/* user segment */
+#define	UTZERO		(UZERO+0x10000)		/* user text start */
+#define	USTKTOP		((EVAMASK>>1)-0xFFFF)	/* user segment end +1 */
+#define	USTKSIZE	(16*1024*1024)		/* user stack size */
+
+#define BLOCKALIGN	64			/* only used in allocb.c */
+
+/*
+ * Sizes
+ */
+#define BI2BY		8			/* bits per byte */
+#define BY2SE		4
+#define BY2WD		8
+#define BY2V		8			/* only used in xalloc.c */
+
+#define	PTEMAPMEM	(1024*1024)
+#define	PTEPERTAB	(PTEMAPMEM/BY2PG)
+#define	SEGMAPSIZE	1984
+#define	SSEGMAPSIZE	16
+#define	PPN(x)		((x)&~(BY2PG-1))
+
+#define SHARE_NONE	0
+#define SHARE_OUTER	2
+#define SHARE_INNER	3
+
+#define CACHE_UC	0
+#define CACHE_WB	1
+#define CACHE_WT	2
+#define CACHE_WB_NA	3
+
+#define MA_MEM_WB	0
+#define MA_MEM_WT	1
+#define MA_MEM_UC	2
+#define MA_DEV_nGnRnE	3
+#define MA_DEV_nGnRE	4
+#define MA_DEV_nGRE	5
+#define MA_DEV_GRE	6
+
+#define	PTEVALID	1
+#define PTEBLOCK	0
+#define PTETABLE	2
+#define PTEPAGE		2
+
+#define PTEMA(x)	((x)<<2)
+#define PTEAP(x)	((x)<<6)
+#define PTESH(x)	((x)<<8)
+
+#define PTEAF		(1<<10)
+#define PTENG		(1<<11)
+
+#define PTEKERNEL	PTEAP(0)
+#define PTEUSER		PTEAP(1)
+#define PTEWRITE	PTEAP(0)
+#define PTERONLY	PTEAP(2)
+
+#define PTEWT		PTEMA(MA_MEM_WT)
+#define	PTEUNCACHED	PTEMA(MA_MEM_UC)
+#define	PTEDEVICE	PTEMA(MA_DEV_nGnRnE)
+
+/*
+ * Physical machine information from here on.
+ *	PHYS addresses as seen from the arm cpu.
+ *	BUS  addresses as seen from the videocore gpu.
+ */
+#define	PHYSDRAM	0
+#define	IOSIZE		(16*MiB)
+
+#define MIN(a, b)	((a) < (b)? (a): (b))
+#define MAX(a, b)	((a) > (b)? (a): (b))
--- /dev/null
+++ b/sys/src/9/bcm64/mkfile
@@ -1,0 +1,146 @@
+CONF=pi3
+CONFLIST=pi3
+EXTRACOPIES=
+
+loadaddr=0xffffffff80080000
+
+objtype=arm64
+</$objtype/mkfile
+p=9
+
+OS=7
+
+DEVS=`{rc ../port/mkdevlist $CONF}
+
+PORT=\
+	alarm.$O\
+	alloc.$O\
+	allocb.$O\
+	auth.$O\
+	cache.$O\
+	chan.$O\
+	dev.$O\
+	edf.$O\
+	fault.$O\
+	mul64fract.$O\
+	page.$O\
+	parse.$O\
+	pgrp.$O\
+	portclock.$O\
+	print.$O\
+	proc.$O\
+	qio.$O\
+	qlock.$O\
+	rdb.$O\
+	rebootcmd.$O\
+	segment.$O\
+	syscallfmt.$O\
+	sysfile.$O\
+	sysproc.$O\
+	taslock.$O\
+	tod.$O\
+	xalloc.$O\
+
+OBJ=\
+	l.$O\
+	cache.v8.$O\
+	bootargs.$O\
+	clock.$O\
+	fpu.$O\
+	irq.$O\
+	main.$O\
+	mmu.$O\
+	sysreg.$O\
+	random.$O\
+	trap.$O\
+	$CONF.root.$O\
+	$CONF.rootc.$O\
+	$DEVS\
+	$PORT\
+
+# HFILES=
+
+LIB=\
+	/$objtype/lib/libmemlayer.a\
+	/$objtype/lib/libmemdraw.a\
+	/$objtype/lib/libdraw.a\
+	/$objtype/lib/libip.a\
+	/$objtype/lib/libsec.a\
+	/$objtype/lib/libmp.a\
+	/$objtype/lib/libc.a\
+
+9:V: $p$CONF s$p$CONF
+
+$p$CONF:DQ:	$CONF.c $OBJ $LIB mkfile
+	$CC $CFLAGS '-DKERNDATE='`{date -n} $CONF.c
+	echo '# linking raw kernel'	# H6: no headers, data segment aligned
+	$LD -l -o $target -H6 -R4096 -T$loadaddr $OBJ $CONF.$O $LIB
+
+s$p$CONF:DQ:	$CONF.$O $OBJ $LIB
+	echo '# linking kernel with symbols'
+	$LD -l -o $target -R4096 -T$loadaddr $OBJ $CONF.$O $LIB
+	size $target
+
+$p$CONF.gz:D:	$p$CONF
+	gzip -9 <$p$CONF >$target
+
+$OBJ: $HFILES
+
+install:V: /$objtype/$p$CONF
+
+/$objtype/$p$CONF:D: $p$CONF s$p$CONF
+	cp -x $p$CONF s$p$CONF /$objtype/ &
+	for(i in $EXTRACOPIES)
+		{ 9fs $i && cp $p$CONF s$p$CONF /n/$i/$objtype && echo -n $i... & }
+	wait
+	echo
+	touch $target
+
+
+REPCC=`{../port/mkfilelist ../bcm}
+^($REPCC)\.$O:R:	'../bcm/\1.c'
+	$CC $CFLAGS -I. -. ../bcm/$stem1.c
+
+<../boot/bootmkfile
+<../port/portmkfile
+<|../port/mkbootrules $CONF
+
+arch.$O clock.$O fpiarm.$O main.$O mmu.$O screen.$O syscall.$O trap.$O: \
+	/$objtype/include/ureg.h
+
+l.$O cache.v8.$O lexception.$O lproc.$O mmu.$O: mem.h
+l.$O cache.v8.$O archbcm3.$O clock.$O fpu.$O trap.$O mmu.$O: sysreg.h
+
+devmouse.$O mouse.$O screen.$O: screen.h
+usbdwc.$O: dwcotg.h ../port/usb.h
+
+io.h:D:	../bcm/io.h
+	echo '#include "../bcm/io.h"' > io.h
+screen.h:D: ../bcm/screen.h
+	echo '#include "../bcm/screen.h"' > screen.h
+dwcotg.h:D: ../bcm/dwcotg.h
+	echo '#include "../bcm/dwcotg.h"' > dwcotg.h
+
+init.h:D:	../port/initcode.c init9.s
+	$CC ../port/initcode.c
+	$AS init9.s
+	$LD -l -R1 -s -o init.out init9.$O initcode.$O /$objtype/lib/libc.a
+	{echo 'uchar initcode[]={'
+	 xd -1x <init.out |
+		sed -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g'
+	 echo '};'} > init.h
+
+#reboot.h:D:	rebootcode.s arm.s arm.h mem.h
+#	$AS rebootcode.s
+#	# -T arg is REBOOTADDR
+#	$LD -l -s -T0x1c00 -R4 -o reboot.out rebootcode.$O
+#	{echo 'uchar rebootcode[]={'
+#	 xd -1x reboot.out |
+#		sed -e '1,2d' -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g'
+#	 echo '};'} > reboot.h
+
+errstr.h:D:	../port/mkerrstr ../port/error.h
+	rc ../port/mkerrstr > errstr.h
+
+$CONF.clean:
+	rm -rf $p$CONF s$p$CONF errstr.h reboot.h io.h screen.h dwcotg.h $CONF.c boot$CONF.c
--- /dev/null
+++ b/sys/src/9/bcm64/mmu.c
@@ -1,0 +1,384 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "sysreg.h"
+
+void
+mmu0init(uintptr *l1)
+{
+	uintptr va, pa, pe;
+
+	/* 0 identity map */
+	pe = PHYSDRAM + soc.dramsize;
+	for(pa = PHYSDRAM; pa < pe; pa += PGLSZ(1))
+		l1[PTL1X(pa, 1)] = pa | PTEVALID | PTEBLOCK | PTEWRITE | PTEAF
+			 | PTEKERNEL | PTESH(SHARE_INNER);
+	if(PTLEVELS > 2)
+	for(pa = PHYSDRAM; pa < pe; pa += PGLSZ(2))
+		l1[PTL1X(pa, 2)] = (uintptr)&l1[L1TABLEX(pa, 1)] | PTEVALID | PTETABLE;
+	if(PTLEVELS > 3)
+	for(pa = PHYSDRAM; pa < pe; pa += PGLSZ(3))
+		l1[PTL1X(pa, 3)] = (uintptr)&l1[L1TABLEX(pa, 2)] | PTEVALID | PTETABLE;
+
+	/* KZERO */
+	for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1))
+		l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | PTEWRITE | PTEAF
+			| PTEKERNEL | PTESH(SHARE_INNER);
+	if(PTLEVELS > 2)
+	for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2))
+		l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE;
+	if(PTLEVELS > 3)
+	for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3))
+		l1[PTL1X(va, 3)] = (uintptr)&l1[L1TABLEX(va, 2)] | PTEVALID | PTETABLE;
+
+	/* VIRTIO */
+	pe = -VIRTIO + soc.physio;
+	for(pa = soc.physio, va = VIRTIO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1))
+		l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | PTEWRITE | PTEAF
+			| PTEKERNEL | PTESH(SHARE_OUTER) | PTEDEVICE;
+	if(PTLEVELS > 2)
+	for(pa = soc.physio, va = VIRTIO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2))
+		l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE;
+	if(PTLEVELS > 3)
+	for(pa = soc.physio, va = VIRTIO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3))
+		l1[PTL1X(va, 3)] = (uintptr)&l1[L1TABLEX(va, 2)] | PTEVALID | PTETABLE;
+}
+
+void
+mmu0clear(uintptr *l1)
+{
+	uintptr va, pa, pe;
+
+	pe = PHYSDRAM + soc.dramsize;
+
+	if(PTLEVELS > 3)
+	for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3)){
+		if(PTL1X(pa, 3) != PTL1X(va, 3))
+			l1[PTL1X(pa, 3)] = 0;
+	}
+	if(PTLEVELS > 2)
+	for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2)){
+		if(PTL1X(pa, 2) != PTL1X(va, 2))
+			l1[PTL1X(pa, 2)] = 0;
+	}
+	for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)){
+		if(PTL1X(pa, 1) != PTL1X(va, 1))
+			l1[PTL1X(pa, 1)] = 0;
+	}
+}
+
+void
+mmu1init(void)
+{
+	m->mmul1 = mallocalign(L1SIZE+L1TOPSIZE, BY2PG, L1SIZE, 0);
+	if(m->mmul1 == nil)
+		panic("mmu1init: no memory for mmul1");
+	memset(m->mmul1, 0, L1SIZE+L1TOPSIZE);
+	mmuswitch(nil);
+}
+
+uintptr
+paddr(void *va)
+{
+	if((uintptr)va >= KZERO)
+		return (uintptr)va-KZERO;
+	panic("paddr: va=%#p pc=%#p", va, getcallerpc(&va));
+	return 0;
+}
+
+uintptr
+cankaddr(uintptr pa)
+{
+	if(pa < (uintptr)-KZERO)
+		return -KZERO - pa;
+	return 0;
+}
+
+void*
+kaddr(uintptr pa)
+{
+	if(pa < (uintptr)-KZERO)
+		return (void*)(pa + KZERO);
+	panic("kaddr: pa=%#p pc=%#p", pa, getcallerpc(&pa));
+	return nil;
+}
+
+void
+kmapinval(void)
+{
+}
+
+KMap*
+kmap(Page *p)
+{
+	return kaddr(p->pa);
+}
+
+void
+kunmap(KMap*)
+{
+}
+
+uintptr
+mmukmap(uintptr va, uintptr pa, usize size)
+{
+	uintptr a, pe, off;
+
+	if(va == 0)
+		return 0;
+
+	assert((va % PGLSZ(1)) == 0);
+	off = pa % PGLSZ(1);
+	a = va + off;
+	pe = (pa + size + (PGLSZ(1)-1)) & -PGLSZ(1);
+	while(pa < pe){
+		((uintptr*)L1)[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | PTEWRITE | PTEAF
+			| PTEKERNEL | PTESH(SHARE_OUTER) | PTEDEVICE;
+		pa += PGLSZ(1);
+		va += PGLSZ(1);
+	}
+	flushtlb();
+	return a;
+}
+
+static uintptr*
+mmuwalk(uintptr va, int level)
+{
+	uintptr *table, pte;
+	Page *pg;
+	int i, x;
+
+	x = PTLX(va, PTLEVELS-1);
+	table = &m->mmul1[L1TABLEX(va, PTLEVELS-1)];
+	for(i = PTLEVELS-2; i >= level; i--){
+		pte = table[x];
+		if(pte & PTEVALID) {
+			if(pte & (0xFFFFULL<<48))
+				iprint("strange pte %#p va %#p\n", pte, va);
+			pte &= ~(0xFFFFULL<<48 | BY2PG-1);
+			table = KADDR(pte);
+		} else {
+			if(i < 2){
+				pg = up->mmufree;
+				if(pg == nil)
+					return nil;
+				up->mmufree = pg->next;
+				switch(i){
+				case 0:
+					pg->va = va & -PGLSZ(1);
+					if((pg->next = up->mmul1) == nil)
+						up->mmul1tail = pg;
+					up->mmul1 = pg;
+					break;
+				case 1:
+					pg->va = va & -PGLSZ(2);
+					if((pg->next = up->mmul2) == nil)
+						up->mmul2tail = pg;
+					up->mmul2 = pg;
+					break;
+				}
+				memset(KADDR(pg->pa), 0, BY2PG);
+				coherence();
+				table[x] = pg->pa | PTEVALID | PTETABLE;
+				table = KADDR(pg->pa);
+			} else {
+				table[x] = PADDR(&m->mmul1[L1TABLEX(va, 2)]) | PTEVALID | PTETABLE;
+				table = &m->mmul1[L1TABLEX(va, 2)];
+			}
+		}
+		x = PTLX(va, (uintptr)i);
+	}
+	return &table[x];
+}
+
+static Proc *asidlist[256];
+
+static int
+allocasid(Proc *p)
+{
+	static Lock lk;
+	Proc *x;
+	int a;
+
+	lock(&lk);
+	a = p->asid;
+	if(a < 0)
+		a = -a;
+	if(a == 0)
+		a = p->pid;
+	for(;; a++){
+		a %= nelem(asidlist);
+		if(a == 0)
+			continue;	// reserved
+		x = asidlist[a];
+		if(x == p || x == nil || (x->asid < 0 && x->mach == nil))
+			break;
+	}
+	p->asid = a;
+	asidlist[a] = p;
+	unlock(&lk);
+
+	return x != p;
+}
+
+static void
+freeasid(Proc *p)
+{
+	int a;
+
+	a = p->asid;
+	if(a < 0)
+		a = -a;
+	if(a > 0 && asidlist[a] == p)
+		asidlist[a] = nil;
+	p->asid = 0;
+}
+
+void
+putasid(Proc *p)
+{
+	/*
+	 * Prevent the following scenario:
+	 *	pX sleeps on cpuA, leaving its page tables in mmul1
+	 *	pX wakes up on cpuB, and exits, freeing its page tables
+	 *  pY on cpuB allocates a freed page table page and overwrites with data
+	 *  cpuA takes an interrupt, and is now running with bad page tables
+	 * In theory this shouldn't hurt because only user address space tables
+	 * are affected, and mmuswitch will clear mmul1 before a user process is
+	 * dispatched.  But empirically it correlates with weird problems, eg
+	 * resetting of the core clock at 0x4000001C which confuses local timers.
+	 */
+	if(conf.nmach > 1)
+		mmuswitch(nil);
+
+	if(p->asid > 0)
+		p->asid = -p->asid;
+}
+
+void
+putmmu(uintptr va, uintptr pa, Page *pg)
+{
+	uintptr *pte, old;
+	int s;
+
+// iprint("cpu%d: putmmu va %#p asid %d proc %lud %s\n", m->machno, va, up->asid, up->pid, up->text);
+	s = splhi();
+	while((pte = mmuwalk(va, 0)) == nil){
+		spllo();
+		assert(up->mmufree == nil);
+		up->mmufree = newpage(0, nil, 0);
+		splhi();
+	}
+	old = *pte;
+	*pte = 0;
+	if((old & PTEVALID) != 0)
+		flushasidvall((uvlong)up->asid<<48 | va>>12);
+	else
+		flushasidva((uvlong)up->asid<<48 | va>>12);
+	*pte = pa | PTEPAGE | PTEUSER | PTENG | PTEAF | PTESH(SHARE_INNER);
+	if(pg->txtflush & (1UL<<m->machno)){
+		/* pio() sets PG_TXTFLUSH whenever a text pg has been written */
+		cachedwbinvse((void*)KADDR(pg->pa), BY2PG);
+		cacheiinvse((void*)va, BY2PG);
+		pg->txtflush &= ~(1UL<<m->machno);
+	}
+	splx(s);
+}
+
+static void
+mmufree(Proc *p)
+{
+	freeasid(p);
+
+	if(p->mmul1 == nil){
+		assert(p->mmul2 == nil);
+		return;
+	}
+	p->mmul1tail->next = p->mmufree;
+	p->mmufree = p->mmul1;
+	p->mmul1 = p->mmul1tail = nil;
+
+	if(PTLEVELS > 2){
+		p->mmul2tail->next = p->mmufree;
+		p->mmufree = p->mmul2;
+		p->mmul2 = p->mmul2tail = nil;
+	}
+}
+
+void
+mmuswitch(Proc *p)
+{
+	uintptr va;
+	Page *t;
+
+	for(va = UZERO; va < USTKTOP; va += PGLSZ(PTLEVELS-1))
+		m->mmul1[PTL1X(va, PTLEVELS-1)] = 0;
+
+	if(p == nil){
+		setttbr(PADDR(&m->mmul1[L1TABLEX(0, PTLEVELS-1)]));
+		return;
+	}
+
+	if(p->newtlb){
+		mmufree(p);
+		p->newtlb = 0;
+	}
+
+	if(PTLEVELS == 2){
+		for(t = p->mmul1; t != nil; t = t->next){
+			va = t->va;
+			m->mmul1[PTL1X(va, 1)] = t->pa | PTEVALID | PTETABLE;
+		}
+	} else {
+		for(t = p->mmul2; t != nil; t = t->next){
+			va = t->va;
+			m->mmul1[PTL1X(va, 2)] = t->pa | PTEVALID | PTETABLE;
+			if(PTLEVELS > 3)
+				m->mmul1[PTL1X(va, 3)] = PADDR(&m->mmul1[L1TABLEX(va, 2)]) |
+					PTEVALID | PTETABLE;
+		}
+	}
+
+	if(allocasid(p))
+		flushasid((uvlong)p->asid<<48);
+
+// iprint("cpu%d: mmuswitch asid %d proc %lud %s\n", m->machno, p->asid, p->pid, p->text);
+	setttbr((uvlong)p->asid<<48 | PADDR(&m->mmul1[L1TABLEX(0, PTLEVELS-1)]));
+}
+
+void
+mmurelease(Proc *p)
+{
+	Page *t;
+
+	mmuswitch(nil);
+	mmufree(p);
+
+	if((t = p->mmufree) != nil){
+		do {
+			p->mmufree = t->next;
+			if(--t->ref != 0)
+				panic("mmurelease: bad page ref");
+			pagechainhead(t);
+		} while((t = p->mmufree) != nil);
+		pagechaindone();
+	}
+}
+
+void
+flushmmu(void)
+{
+	int x;
+
+	x = splhi();
+	up->newtlb = 1;
+	mmuswitch(up);
+	splx(x);
+}
+
+void
+checkmmu(uintptr, uintptr)
+{
+}
--- /dev/null
+++ b/sys/src/9/bcm64/pi3
@@ -1,0 +1,53 @@
+dev
+	root
+	cons
+	swap
+	env
+	pipe
+	proc
+	mnt
+	srv
+	shr
+	dup
+	arch
+	ssl
+	tls
+	cap
+	fs
+	ip		arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium inferno
+	draw	screen swcursor
+	mouse	mouse
+	uart	gpio
+#	gpio	gpio
+	sd
+	usb
+
+link
+	loopbackmedium
+	ethermedium
+	archbcm3
+	usbdwc
+
+ip
+	tcp
+	udp
+	ipifc
+	icmp
+	icmp6
+	ipmux
+
+misc
+	uartmini
+	uartpl011
+	sdmmc	emmc
+	dma
+	vcore
+
+port
+	int cpuserver = 0;
+
+bootdir
+	/$objtype/bin/paqfs
+	/$objtype/bin/auth/factotum
+	bootfs.paq
+	boot
--- /dev/null
+++ b/sys/src/9/bcm64/sysreg.c
@@ -1,0 +1,58 @@
+/*
+ * ARMv8 system registers
+ * mainly to cope with arm hard-wiring register numbers into instructions.
+ *
+ * these routines must be callable from KZERO.
+ *
+ * on a multiprocessor, process switching to another cpu is assumed
+ * to be inhibited by the caller as these registers are local to the cpu.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+static void*
+mkinstr(ulong wd)
+{
+	static ulong ib[256], *ep[MAXMACH+1];
+	static Lock lk;
+	ulong *ip, *ie;
+
+	ie = ep[m->machno];
+	for(ip = ib; ip < ie; ip += 2)
+		if(*ip == wd)
+			return ip;
+
+	ilock(&lk);
+	ie = ep[MAXMACH];
+	for(; ip < ie; ip += 2)
+		if(*ip == wd)
+			goto Found;
+	if(ip >= &ib[nelem(ib)])
+		panic("mkinstr: out of instrucuction buffer");
+	ip[0] = wd;
+	ip[1] = 0xd65f03c0;	// RETURN
+	ep[MAXMACH] = ie = ip + 2;
+	cachedwbinvse(ip, 2*sizeof(*ip));
+Found:
+	iunlock(&lk);
+	cacheiinv();
+	ep[m->machno] = ie;
+	return ip;
+}
+
+uvlong
+sysrd(ulong spr)
+{
+	uvlong (*fp)(void) = mkinstr(0xd5380000UL | spr);
+	return fp();
+}
+
+void
+syswr(ulong spr, uvlong val)
+{
+	void (*fp)(uvlong) = mkinstr(0xd5180000UL | spr);
+	fp(val);
+}
--- /dev/null
+++ b/sys/src/9/bcm64/sysreg.h
@@ -1,0 +1,53 @@
+#define MIDR_EL1			SYSREG(3,0,0,0,0)
+#define MPIDR_EL1			SYSREG(3,0,0,0,5)
+#define ID_AA64MMFR0_EL1		SYSREG(3,0,0,7,0)
+#define SCTLR_EL1			SYSREG(3,0,1,0,0)
+#define CPACR_EL1			SYSREG(3,0,1,0,2)
+#define MAIR_EL1			SYSREG(3,0,10,2,0)
+#define TCR_EL1				SYSREG(3,0,2,0,2)
+#define TTBR0_EL1			SYSREG(3,0,2,0,0)
+#define TTBR1_EL1			SYSREG(3,0,2,0,1)
+#define ESR_EL1				SYSREG(3,0,5,2,0)
+#define FAR_EL1				SYSREG(3,0,6,0,0)
+#define VBAR_EL1			SYSREG(3,0,12,0,0)
+#define VTTBR_EL2			SYSREG(3,4,2,1,0)
+#define SP_EL0				SYSREG(3,0,4,1,0)
+#define SP_EL1				SYSREG(3,4,4,1,0)
+#define SP_EL2				SYSREG(3,6,4,1,0)
+#define SCTLR_EL2			SYSREG(3,4,1,0,0)
+#define HCR_EL2				SYSREG(3,4,1,1,0)
+#define MDCR_EL2			SYSREG(3,4,1,1,1)
+#define PMCR_EL0			SYSREG(3,3,9,12,0)
+#define PMCNTENSET			SYSREG(3,3,9,12,1)
+#define PMCCNTR_EL0			SYSREG(3,3,9,13,0)
+#define PMUSERENR_EL0			SYSREG(3,3,9,14,0)
+
+#define CNTP_TVAL_EL0			SYSREG(3,3,14,2,0)
+#define CNTP_CTL_EL0			SYSREG(3,3,14,2,1)
+#define CNTP_CVAL_EL0			SYSREG(3,3,14,2,2)
+
+#define TPIDR_EL0			SYSREG(3,3,13,0,2)
+#define TPIDR_EL1			SYSREG(3,0,13,0,4)
+
+#define CCSIDR_EL1			SYSREG(3,1,0,0,0)
+#define CSSELR_EL1			SYSREG(3,2,0,0,0)
+
+#define ACTLR_EL2			SYSREG(3,4,1,0,1)
+#define CPUACTLR_EL1			SYSREG(3,1,15,2,0)
+#define CPUECTLR_EL1			SYSREG(3,1,15,2,1)
+
+/* l.s redefines this for the assembler */
+#define SYSREG(op0,op1,Cn,Cm,op2)	((op0)<<19|(op1)<<16|(Cn)<<12|(Cm)<<8|(op2)<<5)
+
+#define	OSHLD	(0<<2 | 1)
+#define OSHST	(0<<2 | 2)
+#define	OSH	(0<<2 | 3)
+#define NSHLD	(1<<2 | 1)
+#define NSHST	(1<<2 | 2)
+#define NSH	(1<<2 | 3)
+#define ISHLD	(2<<2 | 1)
+#define ISHST	(2<<2 | 2)
+#define ISH	(2<<2 | 3)
+#define LD	(3<<2 | 1)
+#define ST	(3<<2 | 2)
+#define SY	(3<<2 | 3)
--- /dev/null
+++ b/sys/src/9/bcm64/trap.c
@@ -1,0 +1,752 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "../port/systab.h"
+
+#include <tos.h>
+#include "ureg.h"
+#include "sysreg.h"
+
+/* SPSR bits user can modify */
+#define USPSRMASK	(0xFULL<<28)
+
+static void
+setupvector(u32int *v, void (*t)(void), void (*f)(void))
+{
+	int i;
+
+	for(i = 0; i < 0x80/4; i++){
+		v[i] = ((u32int*)t)[i];
+		if(v[i] == 0x14000000){
+			v[i] |= ((u32int*)f - &v[i]) & 0x3ffffff;
+			return;
+		}
+	}
+	panic("bug in vector code");
+}
+
+void
+trapinit(void)
+{
+	extern void vsys(void);
+	extern void vtrap(void);
+	extern void virq(void);
+	extern void vfiq(void);
+	extern void vserr(void);
+
+	extern void vsys0(void);
+	extern void vtrap0(void);
+	extern void vtrap1(void);
+
+	static u32int *v;
+
+	intrcpushutdown();
+	if(v == nil){
+		/* disable everything */
+		intrsoff();
+
+		v = mallocalign(0x80*4*4, 1<<11, 0, 0);
+		if(v == nil)
+			panic("no memory for vector table");
+
+		setupvector(&v[0x000/4], vtrap,	vtrap0);
+		setupvector(&v[0x080/4], virq,	vtrap0);
+		setupvector(&v[0x100/4], vfiq,	vtrap0);
+		setupvector(&v[0x180/4], vserr,	vtrap0);
+
+		setupvector(&v[0x200/4], vtrap,	vtrap1);
+		setupvector(&v[0x280/4], virq,	vtrap1);
+		setupvector(&v[0x300/4], vfiq,	vtrap1);
+		setupvector(&v[0x380/4], vserr,	vtrap1);
+
+		setupvector(&v[0x400/4], vsys,	vsys0);
+		setupvector(&v[0x480/4], virq,	vtrap0);
+		setupvector(&v[0x500/4], vfiq,	vtrap0);
+		setupvector(&v[0x580/4], vserr, vtrap0);
+
+		setupvector(&v[0x600/4], vtrap,	vtrap0);
+		setupvector(&v[0x680/4], virq,	vtrap0);
+		setupvector(&v[0x700/4], vfiq,	vtrap0);
+		setupvector(&v[0x780/4], vserr,	vtrap0);
+
+		cacheduwbse(v, 0x80*4*4);
+	}
+	cacheiinvse(v, 0x80*4*4);
+	syswr(VBAR_EL1, (uintptr)v);
+	splx(0x3<<6);	// unmask serr and debug
+}
+
+void
+kexit(Ureg*)
+{
+	Tos *tos;
+	uvlong t;
+
+	t = cycles();
+
+	tos = (Tos*)(USTKTOP-sizeof(Tos));
+	tos->kcycles += t - up->kentry;
+	tos->pcycles = t + up->pcycles;
+	tos->pid = up->pid;
+}
+
+static char *traps[64] = {
+	[0x00]	"sys: trap: unknown",
+	[0x01]	"sys: trap: WFI or WFE instruction execution",
+	[0x0E]	"sys: trap: illegal execution state",
+	[0x18]	"sys: trap: illegal MSR/MRS access",
+	[0x22]	"sys: trap: misaligned pc",
+	[0x26]	"sys: trap: stack pointer misaligned",
+	[0x30]	"sys: trap: breakpoint",
+	[0x32]	"sys: trap: software step",
+	[0x34]	"sys: trap: watchpoint",
+	[0x3C]	"sys: trap: BRK instruction",
+};
+
+void
+trap(Ureg *ureg)
+{
+	u32int type, intr;
+	
+	intr = ureg->type >> 32;
+	if(intr == 2){
+		fiq(ureg);
+		return;
+	}
+	splflo();
+	if(userureg(ureg)){
+		up->dbgreg = ureg;
+		up->kentry = cycles();
+	}
+	type = (u32int)ureg->type >> 26;
+	switch(type){
+	case 0x20:	// instruction abort from lower level
+	case 0x21:	// instruction abort from same level
+	case 0x24:	// data abort from lower level
+	case 0x25:	// data abort from same level
+		faultarm64(ureg);
+		break;
+	case 0x07:	// SIMD/FP
+	case 0x2C:	// FPU exception (A64 only)
+		mathtrap(ureg);
+		break;
+	case 0x00:	// unknown
+		if(intr == 1){
+			if(irq(ureg) && up != nil && up->delaysched)
+				sched();
+			break;
+		}
+		if(intr == 3){
+	case 0x2F:	// SError interrupt
+			dumpregs(ureg);
+			panic("SError interrupt");
+			break;
+		}
+		/* wet floor */
+	case 0x01:	// WFI or WFE instruction execution
+	case 0x03:	// MCR or MRC access to CP15 (A32 only)
+	case 0x04:	// MCRR or MRC access to CP15 (A32 only)
+	case 0x05:	// MCR or MRC access to CP14 (A32 only)
+	case 0x06:	// LDC or STD access to CP14 (A32 only)
+	case 0x08:	// MCR or MRC to CP10 (A32 only)
+	case 0x0C:	// MRC access to CP14 (A32 only)
+	case 0x0E:	// Illegal Execution State
+	case 0x11:	// SVC instruction execution (A32 only)
+	case 0x12:	// HVC instruction execution (A32 only)
+	case 0x13:	// SMC instruction execution (A32 only)
+	case 0x15:	// SVC instruction execution (A64 only)
+	case 0x16:	// HVC instruction execution (A64 only)
+	case 0x17:	// SMC instruction execution (A64 only)
+	case 0x18:	// MSR/MRS (A64)
+	case 0x22:	// misaligned pc
+	case 0x26:	// stack pointer misaligned
+	case 0x28:	// FPU exception (A32 only)
+	case 0x30:	// breakpoint from lower level
+	case 0x31:	// breakpoint from same level
+	case 0x32:	// software step from lower level
+	case 0x33:	// software step from same level
+	case 0x34:	// watchpoint execution from lower level
+	case 0x35:	// watchpoint exception from same level
+	case 0x38:	// breapoint (A32 only)
+	case 0x3A:	// vector catch exception (A32 only)
+	case 0x3C:	// BRK instruction (A64 only)
+	default:
+		if(!userureg(ureg)){
+			dumpregs(ureg);
+			panic("unhandled trap");
+		}
+		if(traps[type] == nil) type = 0;	// unknown
+		postnote(up, 1, traps[type], NDebug);
+		break;
+	}
+	splhi();
+	if(userureg(ureg)){
+		if(up->procctl || up->nnote)
+			notify(ureg);
+		kexit(ureg);
+	}
+}
+
+void
+syscall(Ureg *ureg)
+{
+	vlong startns, stopns;
+	uintptr sp, ret;
+	ulong scallnr;
+	int i, s;
+	char *e;
+
+	up->kentry = cycles();
+	
+	m->syscall++;
+	up->insyscall = 1;
+	up->pc = ureg->pc;
+	up->dbgreg = ureg;
+	
+	sp = ureg->sp;
+	up->scallnr = scallnr = ureg->r0;
+
+	spllo();
+	
+	up->nerrlab = 0;
+	startns = 0;
+	ret = -1;
+	if(!waserror()){
+		if(sp < USTKTOP - BY2PG || sp > USTKTOP - sizeof(Sargs) - BY2WD){
+			validaddr(sp, sizeof(Sargs)+BY2WD, 0);
+			evenaddr(sp);
+		}
+		up->s = *((Sargs*) (sp + BY2WD));
+
+		if(up->procctl == Proc_tracesyscall){
+			syscallfmt(scallnr, ureg->pc, (va_list) up->s.args);
+			s = splhi();
+			up->procctl = Proc_stopme;
+			procctl();
+			splx(s);
+			startns = todget(nil);
+		}
+		
+		if(scallnr >= nsyscall || systab[scallnr] == nil){
+			pprint("bad sys call number %lud pc %#p", scallnr, ureg->pc);
+			postnote(up, 1, "sys: bad sys call", NDebug);
+			error(Ebadarg);
+		}
+		up->psstate = sysctab[scallnr];
+		ret = systab[scallnr]((va_list)up->s.args);
+		poperror();
+	}else{
+		e = up->syserrstr;
+		up->syserrstr = up->errstr;
+		up->errstr = e;
+	}
+	if(up->nerrlab){
+		print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
+		for(i = 0; i < NERR; i++)
+			print("sp=%#p pc=%#p\n", up->errlab[i].sp, up->errlab[i].pc);
+		panic("error stack");
+	}
+	ureg->r0 = ret;
+	if(up->procctl == Proc_tracesyscall){
+		stopns = todget(nil);
+		sysretfmt(scallnr, (va_list) up->s.args, ret, startns, stopns);
+		s = splhi();
+		up->procctl = Proc_stopme;
+		procctl();
+		splx(s);
+	}
+	
+	up->insyscall = 0;
+	up->psstate = 0;
+	if(scallnr == NOTED){
+		noted(ureg, *((ulong*) up->s.args));
+		/*
+		 * normally, syscall() returns to forkret()
+		 * not restoring general registers when going
+		 * to userspace. to completely restore the
+		 * interrupted context, we have to return thru
+		 * noteret(). we override return pc to jump to
+		 * to it when returning form syscall()
+		 */
+		returnto(noteret);
+	}
+
+	if(scallnr != RFORK && (up->procctl || up->nnote)){
+		splhi();
+		notify(ureg);
+	}
+	if(up->delaysched)
+		sched();
+	kexit(ureg);
+}
+
+int
+notify(Ureg *ureg)
+{
+	int l;
+	uintptr s, sp;
+	Note *n;
+
+	if(up->procctl)
+		procctl();
+	if(up->nnote == 0)
+		return 0;
+	if(up->fpstate == FPactive){
+		fpsave(up->fpsave);
+		up->fpstate = FPinactive;
+	}
+	up->fpstate |= FPillegal;
+
+	s = spllo();
+	qlock(&up->debug);
+	up->notepending = 0;
+	n = &up->note[0];
+	if(strncmp(n->msg, "sys:", 4) == 0){
+		l = strlen(n->msg);
+		if(l > ERRMAX-23)	/* " pc=0x0123456789abcdef\0" */
+			l = ERRMAX-23;
+		sprint(n->msg+l, " pc=%#p", ureg->pc);
+	}
+
+	if(n->flag!=NUser && (up->notified || up->notify==0)){
+		qunlock(&up->debug);
+		if(n->flag == NDebug)
+			pprint("suicide: %s\n", n->msg);
+		pexit(n->msg, n->flag!=NDebug);
+	}
+
+	if(up->notified){
+		qunlock(&up->debug);
+		splhi();
+		return 0;
+	}
+
+	if(!up->notify){
+		qunlock(&up->debug);
+		pexit(n->msg, n->flag!=NDebug);
+	}
+	sp = ureg->sp;
+	sp -= 256;	/* debugging: preserve context causing problem */
+	sp -= sizeof(Ureg);
+	sp = STACKALIGN(sp);
+
+	if(!okaddr((uintptr)up->notify, 1, 0)
+	|| !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)
+	|| ((uintptr) up->notify & 3) != 0
+	|| (sp & 7) != 0){
+		qunlock(&up->debug);
+		pprint("suicide: bad address in notify: handler=%#p sp=%#p\n",
+			up->notify, sp);
+		pexit("Suicide", 0);
+	}
+
+	memmove((Ureg*)sp, ureg, sizeof(Ureg));
+	*(Ureg**)(sp-BY2WD) = up->ureg;	/* word under Ureg is old up->ureg */
+	up->ureg = (void*)sp;
+	sp -= BY2WD+ERRMAX;
+	memmove((char*)sp, up->note[0].msg, ERRMAX);
+	sp -= 3*BY2WD;
+	*(uintptr*)(sp+2*BY2WD) = sp+3*BY2WD;
+	*(uintptr*)(sp+1*BY2WD) = (uintptr)up->ureg;
+	ureg->r0 = (uintptr) up->ureg;
+	ureg->sp = sp;
+	ureg->pc = (uintptr) up->notify;
+	ureg->link = 0;
+	up->notified = 1;
+	up->nnote--;
+	memmove(&up->lastnote, &up->note[0], sizeof(Note));
+	memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
+
+	qunlock(&up->debug);
+	splx(s);
+	return 1;
+}
+
+void
+noted(Ureg *ureg, ulong arg0)
+{
+	Ureg *nureg;
+	uintptr oureg, sp;
+	
+	qlock(&up->debug);
+	if(arg0 != NRSTR && !up->notified){
+		qunlock(&up->debug);
+		pprint("call to noted() when not notified\n");
+		pexit("Suicide", 0);
+	}
+	up->notified = 0;
+	
+	nureg = up->ureg;
+	up->fpstate &= ~FPillegal;
+	
+	oureg = (uintptr) nureg;
+	if(!okaddr(oureg - BY2WD, BY2WD + sizeof(Ureg), 0) || (oureg & 7) != 0){
+		qunlock(&up->debug);
+		pprint("bad ureg in noted or call to noted when not notified\n");
+		pexit("Suicide", 0);
+	}
+
+	nureg->psr = (nureg->psr & USPSRMASK) | (ureg->psr & ~USPSRMASK);
+	memmove(ureg, nureg, sizeof(Ureg));
+	
+	switch(arg0){
+	case NCONT: case NRSTR:
+		if(!okaddr(nureg->pc, BY2WD, 0) || !okaddr(nureg->sp, BY2WD, 0) ||
+				(nureg->pc & 3) != 0 || (nureg->sp & 7) != 0){
+			qunlock(&up->debug);
+			pprint("suicide: trap in noted\n");
+			pexit("Suicide", 0);
+		}
+		up->ureg = (Ureg *) (*(uintptr*) (oureg - BY2WD));
+		qunlock(&up->debug);
+		break;
+	
+	case NSAVE:
+		if(!okaddr(nureg->pc, BY2WD, 0) || !okaddr(nureg->sp, BY2WD, 0) ||
+				(nureg->pc & 3) != 0 || (nureg->sp & 7) != 0){
+			qunlock(&up->debug);
+			pprint("suicide: trap in noted\n");
+			pexit("Suicide", 0);
+		}
+		qunlock(&up->debug);
+		sp = oureg - 4 * BY2WD - ERRMAX;
+		splhi();
+		ureg->sp = sp;
+		ureg->r0 = (uintptr) oureg;
+		((uintptr *) sp)[1] = oureg;
+		((uintptr *) sp)[0] = 0;
+		break;
+	
+	default:
+		up->lastnote.flag = NDebug;
+	
+	case NDFLT:
+		qunlock(&up->debug);
+		if(up->lastnote.flag == NDebug)
+			pprint("suicide: %s\n", up->lastnote.msg);
+		pexit(up->lastnote.msg, up->lastnote.flag != NDebug);
+	}
+}
+
+void
+faultarm64(Ureg *ureg)
+{
+	extern void checkpages(void);
+	char buf[ERRMAX];
+	int read, insyscall;
+	uintptr addr;
+
+	insyscall = up->insyscall;
+	up->insyscall = 1;
+
+	if(!userureg(ureg) && waserror()){
+		if(up->nerrlab == 0){
+			pprint("suicide: sys: %s\n", up->errstr);
+			pexit(up->errstr, 1);
+		}
+		up->insyscall = insyscall;
+		nexterror();
+	}
+
+	addr = getfar();
+	read = (ureg->type & (1<<6)) == 0;
+
+	switch((u32int)ureg->type & 0x3F){
+	case  4: case  5: case  6: case  7:	// Tanslation fault.
+	case  8: case  9: case 10: case 11:	// Access flag fault.
+	case 12: case 13: case 14: case 15:	// Permission fault.
+	case 48:				// tlb conflict fault.
+		if(fault(addr, read) == 0)
+			break;
+
+		/* wet floor */
+	case  0: case  1: case  2: case  3:	// Address size fault.
+	case 16: 				// synchronous external abort.
+	case 24: 				// synchronous parity error on a memory access.
+	case 20: case 21: case 22: case 23:	// synchronous external abort on a table walk.
+	case 28: case 29: case 30: case 31:	// synchronous parity error on table walk.
+	case 33:				// alignment fault.
+	case 52:				// implementation defined, lockdown abort.
+	case 53:				// implementation defined, unsuppoted exclusive.
+	case 61:				// first level domain fault
+	case 62:				// second level domain fault
+	default:
+		if(!userureg(ureg)){
+			dumpregs(ureg);
+			panic("fault: %s addr=%#p", read ? "read" : "write", addr);
+		}
+		checkpages();
+		sprint(buf, "sys: trap: fault %s addr=%#p", read ? "read" : "write", addr);
+		postnote(up, 1, buf, NDebug);
+	}
+
+	if(!userureg(ureg))
+		poperror();
+
+	up->insyscall = insyscall;
+}
+
+int
+userureg(Ureg* ureg)
+{
+	return (ureg->psr & 15) == 0;
+}
+
+uintptr
+userpc(void)
+{
+	Ureg *ur = up->dbgreg;
+	return ur->pc;
+}
+
+uintptr
+dbgpc(Proc *)
+{
+	Ureg *ur = up->dbgreg;
+	if(ur == nil)
+		return 0;
+	return ur->pc;
+}
+
+void
+procfork(Proc *p)
+{
+	int s;
+
+	s = splhi();
+	switch(up->fpstate & ~FPillegal){
+	case FPactive:
+		fpsave(up->fpsave);
+		up->fpstate = FPinactive;
+	case FPinactive:
+		memmove(p->fpsave, up->fpsave, sizeof(FPsave));
+		p->fpstate = FPinactive;
+	}
+	splx(s);
+
+	p->tpidr = up->tpidr;
+
+	p->kentry = up->kentry;
+	p->pcycles = -p->kentry;
+}
+
+void
+procsetup(Proc *p)
+{
+	p->fpstate = FPinit;
+	fpoff();
+
+	p->tpidr = 0;
+	syswr(TPIDR_EL0, p->tpidr);
+
+	p->kentry = cycles();
+	p->pcycles = -p->kentry;
+}
+
+void
+procsave(Proc *p)
+{
+	uvlong t;
+
+	if(p->fpstate == FPactive){
+		if(p->state == Moribund)
+			fpclear();
+		else
+			fpsave(p->fpsave);
+		p->fpstate = FPinactive;
+	}
+
+	if(p->kp == 0)
+		p->tpidr = sysrd(TPIDR_EL0);
+
+	putasid(p);	// release asid
+
+	t = cycles();
+	p->kentry -= t;
+	p->pcycles += t;
+}
+
+void
+procrestore(Proc *p)
+{
+	uvlong t;
+
+	if(p->kp)
+		return;
+	
+	syswr(TPIDR_EL0, p->tpidr);
+
+	t = cycles();
+	p->kentry += t;
+	p->pcycles -= t;
+}
+
+static void
+linkproc(void)
+{
+	spllo();
+	up->kpfun(up->kparg);
+	pexit("kproc dying", 0);
+}
+
+void
+kprocchild(Proc* p, void (*func)(void*), void* arg)
+{
+	p->sched.pc = (uintptr) linkproc;
+	p->sched.sp = (uintptr) p->kstack + KSTACK - 16;
+	*(void**)p->sched.sp = kprocchild;	/* fake */
+
+	p->kpfun = func;
+	p->kparg = arg;
+}
+
+void
+forkchild(Proc *p, Ureg *ureg)
+{
+	Ureg *cureg;
+
+	p->sched.pc = (uintptr) forkret;
+	p->sched.sp = (uintptr) p->kstack + KSTACK - TRAPFRAMESIZE;
+
+	cureg = (Ureg*) (p->sched.sp + 16);
+	memmove(cureg, ureg, sizeof(Ureg));
+	cureg->r0 = 0;
+
+	p->psstate = 0;
+	p->insyscall = 0;
+}
+
+uintptr
+execregs(uintptr entry, ulong ssize, ulong nargs)
+{
+	uintptr *sp;
+	Ureg *ureg;
+
+	sp = (uintptr*)(USTKTOP - ssize);
+	*--sp = nargs;
+
+	ureg = up->dbgreg;
+	ureg->sp = (uintptr)sp;
+	ureg->pc = entry;
+	ureg->link = 0;
+	return USTKTOP-sizeof(Tos);
+}
+
+void
+evenaddr(uintptr addr)
+{
+	if(addr & 3){
+		postnote(up, 1, "sys: odd address", NDebug);
+		error(Ebadarg);
+	}
+}
+
+void
+callwithureg(void (*f) (Ureg *))
+{
+	Ureg u;
+	
+	u.pc = getcallerpc(&f);
+	u.sp = (uintptr) &f;
+	f(&u);
+}
+
+void
+setkernur(Ureg *ureg, Proc *p)
+{
+	ureg->pc = p->sched.pc;
+	ureg->sp = p->sched.sp;
+	ureg->link = (uintptr)sched;
+}
+
+void
+setupwatchpts(Proc*, Watchpt*, int)
+{
+}
+
+void
+setregisters(Ureg* ureg, char* pureg, char* uva, int n)
+{
+	ulong v;
+
+	v = ureg->psr;
+	memmove(pureg, uva, n);
+	ureg->psr = (ureg->psr & USPSRMASK) | (v & ~USPSRMASK);
+}
+
+static void
+dumpstackwithureg(Ureg *ureg)
+{
+	uintptr v, estack, sp;
+	char *s;
+	int i;
+
+	if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
+		iprint("dumpstack disabled\n");
+		return;
+	}
+	iprint("ktrace /kernel/path %#p %#p %#p # pc, sp, link\n",
+		ureg->pc, ureg->sp, ureg->link);
+	delay(2000);
+
+	sp = ureg->sp;
+	if(sp < KZERO || (sp & 7) != 0)
+		sp = (uintptr)&ureg;
+
+	estack = (uintptr)m+MACHSIZE;
+	if(up != nil && sp <= (uintptr)up->kstack+KSTACK)
+		estack = (uintptr)up->kstack+KSTACK;
+
+	if(sp > estack){
+		if(up != nil)
+			iprint("&up->kstack %#p sp %#p\n", up->kstack, sp);
+		else
+			iprint("&m %#p sp %#p\n", m, sp);
+		return;
+	}
+
+	i = 0;
+	for(; sp < estack; sp += sizeof(uintptr)){
+		v = *(uintptr*)sp;
+		if(KTZERO < v && v < (uintptr)etext && (v & 3) == 0){
+			iprint("%#8.8lux=%#8.8lux ", (ulong)sp, (ulong)v);
+			i++;
+		}
+		if(i == 4){
+			i = 0;
+			iprint("\n");
+		}
+	}
+	if(i)
+		iprint("\n");
+}
+
+void
+dumpstack(void)
+{
+	callwithureg(dumpstackwithureg);
+}
+
+void
+dumpregs(Ureg *ureg)
+{
+	u64int *r;
+	int i, x;
+
+	x = splhi();
+	if(up != nil)
+		iprint("cpu%d: dumpregs ureg %#p process %lud: %s\n", m->machno, ureg,
+			up->pid, up->text);
+	else
+		iprint("cpu%d: dumpregs ureg %#p\n", m->machno, ureg);
+	r = &ureg->r0;
+	for(i = 0; i < 30; i += 3)
+		iprint("R%d %.16llux  R%d %.16llux  R%d %.16llux\n", i, r[i], i+1, r[i+1], i+2, r[i+2]);
+	iprint("PC %#p  SP %#p  LR %#p  PSR %llux  TYPE %llux\n",
+		ureg->pc, ureg->sp, ureg->link,
+		ureg->psr, ureg->type);
+	splx(x);
+}