shithub: riscv

Download patch

ref: cb2103879e7e1cb869ed1eb8455c468a756e7ef0
parent: e08cc065177138fe821abb84dc6381fd0400e944
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Mon Dec 29 11:02:57 EST 2014

zymq: lilu dallas, multicore

implement multiprocessor support.

--- a/sys/src/9/zynq/dat.h
+++ b/sys/src/9/zynq/dat.h
@@ -140,7 +140,6 @@
 	Proc*	readied;		/* for runproc */
 	ulong	schedticks;		/* next forced context switch */
 
-	int	cputype;
 	ulong	delayloop;
 
 	/* stats */
--- a/sys/src/9/zynq/fns.h
+++ b/sys/src/9/zynq/fns.h
@@ -9,6 +9,7 @@
 void procsave(Proc *);
 void procrestore(Proc *);
 void idlehands(void);
+void sendevent(void);
 void coherence(void);
 void procfork(Proc *);
 void procsetup(Proc *);
@@ -37,7 +38,9 @@
 void links(void);
 void* vmap(uintptr, ulong);
 void timerinit(void);
+void synccycles(void);
 void setpmcr(ulong);
+void setpmcnten(ulong);
 void* tmpmap(uintptr);
 void tmpunmap(void*);
 void flushpg(void *);
--- a/sys/src/9/zynq/intr.c
+++ b/sys/src/9/zynq/intr.c
@@ -44,6 +44,9 @@
 	mpcore[ICCICR] = 7;
 	mpcore[ICCBPR] = 3;
 	mpcore[ICCPMR] = 255;
+
+	if(m->machno != 0)
+		return;
 	
 	/* disable all irqs and clear any pending interrupts */
 	for(i = 0; i < NINTR/32; i++){
@@ -66,7 +69,7 @@
 		panic("intrenable: invalid irq %d", irq);
 	if(type != LEVEL && type != EDGE)
 		panic("intrenable: invalid type %d", type);
-	if(irqs[irq].f != nil)
+	if(irqs[irq].f != nil && irqs[irq].f != f)
 		panic("intrenable: handler already assigned");
 	if(irq >= NPRIVATE){
 		e = &mpcore[ICDIPTR + (irq >> 2)];
--- a/sys/src/9/zynq/l.s
+++ b/sys/src/9/zynq/l.s
@@ -18,7 +18,7 @@
 
 	PUTC('l')
 	MOVW $SECSZ, R0
-	MOVW $(CPU0L1-KZERO), R4
+	MOVW $(MACHL1(0)-KZERO), R4
 	MOVW $KZERO, R1
 	ADD R1>>(SECSH-2), R4, R1
 	MOVW $(L1SEC|L1CACHED|L1KERRW), R2
@@ -42,13 +42,20 @@
 	BGE _start2
 
 	MOVW $(UART_BASE|L2VALID|L2DEVICE|L2KERRW), R0
-	MOVW $(VMAPL2 - KZERO), R1
+	MOVW $(VMAPL2-KZERO), R1
 	MOVW R0, (R1)
-	
+
 	PUTC('n')
+
+	MOVW $(MACH(0)-KZERO), R(Rmach)
+_start3:
+	/* enable MMU permission checking */
+	MOVW $0x55555555, R0
+	MCR 15, 0, R0, C(3), C(0), 0
+
 	MOVW $0, R0
 	MCR 15, 0, R0, C(8), C(7), 0
-	MOVW $(CPU0L1 - KZERO | TTBATTR), R1
+	ADD $TTBATTR, R4, R1
 	MCR 15, 0, R1, C(2), C(0), 0
 	MOVW $0x20c5047b, R1
 	MOVW $_virt(SB), R2
@@ -59,17 +66,18 @@
 TEXT _virt(SB), $-4
 	DSB
 	ISB
-	
-	MOVW $(MACH(0) + MACHSIZE), R13
-	MOVW $(MACH(0) + 12), R0
+
+	ADD $KZERO, R(Rmach)
+	MOVW R(Rmach), R13
+	ADD $MACHSIZE, R13
+
+	MOVW R(Rmach), R0
+	ADD $12, R0
 	BL loadsp(SB)
+
 	MOVW $vectors(SB), R1
 	MCR 15, 0, R1, C(12), C(0)
 	
-	/* enable MMU permission checking */
-	MOVW $0x55555555, R0
-	MCR 15, 0, R0, C(3), C(0), 0
-	
 	/* enable maths coprocessors in CPACR but disable them in FPEXC */
 	MRC 15, 0, R0, C(1), C(0), 2
 	ORR $(15<<20), R0
@@ -96,14 +104,25 @@
 	MOVW $(VMAP+0x30), R8
 	PUTC('9')
 	
+	/* kernel Mach* in TPIDRPRW */
+	MCR 15, 0, R(Rmach), C(13), C(0), 4
+
 	MOVW $setR12(SB), R12
-	MOVW $MACH(0), R(Rmach)
 	MOVW $0, R(Rup)
+
 	BL main(SB)
 	B idlehands(SB)
 
 	BL _div(SB) /* hack to load _div */
 
+TEXT mpbootstrap(SB), $-4
+	MOVW $0xE0001030, R8
+	PUTC('M')
+	PUTC('P')
+	MOVW $(MACH(1)-KZERO), R(Rmach)
+	MOVW $(MACHL1(1)-KZERO), R4
+	B _start3
+
 TEXT touser(SB), $-4
 	CPS(CPSID)
 
@@ -238,6 +257,10 @@
 	WFE
 	RET
 
+TEXT sendevent(SB), $0
+	SEV
+	RET
+
 TEXT ttbget(SB), $0
 	MRC 15, 0, R0, C(2), C(0), 0
 	BIC $0x7f, R0
@@ -282,6 +305,10 @@
 	MCR 15, 0, R0, C(9), C(12), 0
 	RET
 
+TEXT setpmcnten(SB), $0
+	MCR 15, 0, R0, C(9), C(12), 1
+	RET
+
 TEXT perfticks(SB), $0
 	MRC 15, 0, R0, C(9), C(13), 0
 	RET
@@ -453,3 +480,4 @@
 	DSB
 	MRC 15, 0, R0, C(7), C(4), 0
 	RET
+
--- a/sys/src/9/zynq/ltrap.s
+++ b/sys/src/9/zynq/ltrap.s
@@ -39,7 +39,8 @@
 	SUB $(18*4), R13
 	MOVM.IA [R0-R14], (R13)
 
-	MOVW $MACH(0), R(Rmach) /* FIXME */
+	/* get Mach* from TPIDRPRW */
+	MRC 15, 0, R(Rmach), C(13), C(0), 4
 	MOVW 8(R(Rmach)), R(Rup)
 	MOVW $setR12(SB), R12
 	
@@ -79,7 +80,8 @@
 	MOVM.DB.S [R0-R14], (R13)
 	SUB $(15*4), R13
 	
-	MOVW $MACH(0), R(Rmach) /* FIXME */
+	/* get Mach* from TPIDRPRW */
+	MRC 15, 0, R(Rmach), C(13), C(0), 4
 	MOVW 8(R(Rmach)), R(Rup)
 	MOVW $setR12(SB), R12
 	
--- a/sys/src/9/zynq/main.c
+++ b/sys/src/9/zynq/main.c
@@ -182,9 +182,9 @@
 	int i;
 
 	conf.nmach = 1;
-	conf.nproc = 100;
+	conf.nproc = 2000;
 	conf.ialloc = 16*1024*1024;
-	conf.nimage = conf.nproc;
+	conf.nimage = 200;
 	conf.mem[0].base = PGROUND((ulong)end - KZERO);
 	conf.mem[0].limit = 1024*1024*1024;
 	conf.npage = 0;
@@ -347,9 +347,63 @@
 }
 
 void
+cpuidprint(void)
+{
+	print("\ncpu%d: %dMHz ARM Cortex-A9\n", m->machno, m->cpumhz);
+}
+
+void
+mpinit(void)
+{
+	extern void mpbootstrap(void);	/* l.s */
+	Mach *m1;
+	ulong *v;
+	int i;
+
+	if(getconf("*nomp"))
+		return;
+
+	conf.nmach = 2;
+	conf.copymode = 1;
+
+	m1 = MACHP(1);
+	memset(m1, 0, MACHSIZE);
+	m1->machno = 1;
+	m1->l1.pa = MACHL1(m1->machno)-KZERO;
+	m1->l1.va = KADDR(m1->l1.pa);
+
+	memset(m1->l1.va, 0, L1SZ);
+	for(i=0; i<L1X(VMAPSZ); i++)
+		m1->l1.va[L1X(VMAP)+i] = m->l1.va[L1X(VMAP)+i];
+	for(i=0; i<L1X(-KZERO); i++)
+		m1->l1.va[L1X(KZERO)+i] = m->l1.va[L1X(KZERO)+i];
+	coherence();
+	cleandse((uchar*)KZERO, (uchar*)0xFFFFFFFF);
+
+	v = tmpmap(0xFFFFF000);
+	v[0xFF0/4] = PADDR(mpbootstrap);
+	coherence();
+	cleandse(v, (uchar*)v+BY2PG);
+	tmpunmap(v);
+
+	sendevent();
+	synccycles();
+}
+
+void
 main(void)
 {
-	active.machs = 1;
+	active.machs |= (1 << m->machno);
+	if(m->machno != 0){
+		mmuinit();
+		intrinit();
+		timerinit();
+		cpuidprint();
+		synccycles();
+		timersinit();
+		schedinit();
+		return;
+	}
 	uartinit();
 	mmuinit();
 	l2init();
@@ -361,6 +415,7 @@
 	xinit();
 	printinit();
 	quotefmtinstall();
+	cpuidprint();
 	sanity();
 	todinit();
 	timersinit();
@@ -376,5 +431,6 @@
 	swapinit();
 	screeninit();
 	userinit();
+	mpinit();
 	schedinit();
 }
--- a/sys/src/9/zynq/mem.h
+++ b/sys/src/9/zynq/mem.h
@@ -40,8 +40,8 @@
 #define MACHSIZE 8192
 #define MACH(n) (KZERO+(n)*MACHSIZE)
 #define MACHP(n) ((Mach *)MACH(n))
-#define CPU0L1 ROUND(MACH(MAXMACH), L1SZ)
-#define VMAPL2 (CPU0L1 + L1SZ)
+#define MACHL1(n) (ROUND(MACH(MAXMACH), L1SZ) + (n)*L1SZ)
+#define VMAPL2 MACHL1(MAXMACH)
 #define VMAPL2SZ (L2SZ * (VMAPSZ / SECSZ))
 #define TMAPL2(n) (VMAPL2 + VMAPL2SZ + (n) * L2SZ)
 #define TMAPL2SZ (MAXMACH * L2SZ)
@@ -80,6 +80,7 @@
 #define DSB WORD $0xf57ff04f
 #define ISB WORD $0xf57ff06f
 #define WFE WORD $0xe320f002
+#define SEV WORD $0xe320f004
 #define CPS(m) WORD $(0xf1000000|(m))
 #define CPSMODE (1<<17)
 #define CPSIE (3<<6|2<<18)
@@ -121,4 +122,4 @@
 #define L2WRITE 0
 #define L2LOCAL (1<<11)
 
-#define TTBATTR (1<<6|1<<3)
+#define TTBATTR (1<<6|1<<3|1<<1)
--- a/sys/src/9/zynq/mmu.c
+++ b/sys/src/9/zynq/mmu.c
@@ -12,10 +12,13 @@
 {
 	m->l1.pa = ttbget();
 	m->l1.va = KADDR(m->l1.pa);
-	mpcore = vmap(MPCORE_BASE, 0x2000);
-	slcr = vmap(SLCR_BASE, 0x1000);
+	memset((uchar*)TMAPL2(m->machno), 0, TMAPL2SZ);
 	m->l1.va[L1X(TMAP)] = PADDR(TMAPL2(m->machno)) | L1PT;
 	incref(&m->l1);
+	if(mpcore != nil)
+		return;
+	mpcore = vmap(MPCORE_BASE, 0x2000);
+	slcr = vmap(SLCR_BASE, 0x1000);
 }
 
 void
@@ -38,29 +41,28 @@
 	int s;
 
 	s = splhi();
-	if(m->l1free != nil){
-		p = m->l1free;
+	p = m->l1free;
+	if(p != nil){
+		m->l1free = p->next;
 		p->next = nil;
-		m->l1free = m->l1free->next;
 		m->nfree--;
 		splx(s);
 		return p;
-	}else{
-		p = smalloc(sizeof(L1));
-		for(;;){
-			p->va = mallocalign(L1SZ, L1SZ, 0, 0);
-			if(p->va != nil)
-				break;
-			if(!waserror()){
-				resrcwait("no memory for L1 table");
-				poperror();
-			}
+	}
+	splx(s);
+	p = smalloc(sizeof(L1));
+	for(;;){
+		p->va = mallocalign(L1SZ, L1SZ, 0, 0);
+		if(p->va != nil)
+			break;
+		if(!waserror()){
+			resrcwait("no memory for L1 table");
+			poperror();
 		}
-		memmove(p->va, m->l1.va, L1SZ);
-		p->pa = PADDR(p->va);
-		splx(s);
-		return p;
 	}
+	p->pa = PADDR(p->va);
+	memmove(p->va, m->l1.va, L1SZ);
+	return p;
 }
 
 static void
@@ -89,11 +91,9 @@
 	p = l1alloc();
 	s = splhi();
 	if(up->l1 != nil)
-		l1free(p);
-	else{
-		up->l1 = p;
-		l1switch(p, 1);
-	}
+		panic("upalloc1: up->l1 != nil");
+	up->l1 = p;
+	l1switch(p, 1);
 	splx(s);
 }
 
@@ -114,6 +114,7 @@
 		*t = 0;
 		l = &p->next;
 	}
+	proc->l1->va[L1X(TMAP)] = 0;
 	*l = proc->mmufree;
 	proc->mmufree = proc->mmuused;
 	proc->mmuused = 0;
@@ -139,6 +140,7 @@
 	ulong *e;
 	ulong *l2;
 	PTE old;
+	char *ctl;
 	uintptr l2p;
 	int s;
 
@@ -178,10 +180,11 @@
 	splx(s);
 	if((old & L2VALID) != 0)
 		flushpg((void *) va);
-	if(pg->cachectl[0] == PG_TXTFLUSH){
+	ctl = &pg->cachectl[m->machno];
+	if(*ctl == PG_TXTFLUSH){
 		cleandse((void *) va, (void *) (va + BY2PG));
 		invalise((void *) va, (void *) (va + BY2PG));
-		pg->cachectl[0] = PG_NOFLUSH;
+		*ctl = PG_NOFLUSH;
 	}
 }
 
@@ -188,7 +191,6 @@
 void
 checkmmu(uintptr, uintptr)
 {
-	print("checkmmu\n");
 }
 
 void
@@ -286,7 +288,7 @@
 	e = &up->l1->va[L1X(KMAP)];
 	if((*e & 3) == 0){
 		if(up->kmaptable != nil)
-			panic("kmaptable");
+			panic("kmaptable != nil");
 		up->kmaptable = newpage(0, 0, 0);
 		s = splhi();
 		v = tmpmap(up->kmaptable->pa);
@@ -300,7 +302,7 @@
 		return (KMap *) KMAP;
 	}
 	if(up->kmaptable == nil)
-		panic("kmaptable");
+		panic("kmaptable == nil");
 	e = (ulong *) (KMAP + NKMAP * BY2PG);
 	for(i = 0; i < NKMAP; i++)
 		if((e[i] & 3) == 0){
@@ -338,7 +340,6 @@
 tmpmap(ulong pa)
 {
 	ulong *u, *ub, *ue;
-	void *v;
 
 	if(islo())
 		panic("tmpmap: islow %#p", getcallerpc(&pa));
@@ -349,9 +350,13 @@
 	for(u = ub; u < ue; u++)
 		if((*u & 3) == 0){
 			*u = pa | L2VALID | L2CACHED | L2KERRW;
+
+			assert(m->l1.va[L1X(TMAP)] != 0);
+			if(up != nil && up->l1 != nil)
+				up->l1->va[L1X(TMAP)] = m->l1.va[L1X(TMAP)];
+
 			coherence();
-			v = (void *) ((u - ub) * BY2PG + TMAP);
-			return v;
+			return (void *) ((u - ub) * BY2PG + TMAP);
 		}
 	panic("tmpmap: full (pa=%#.8lux)", pa);
 	return nil;
@@ -361,7 +366,7 @@
 tmpunmap(void *v)
 {
 	ulong *u;
-	
+
 	if(v >= (void*) KZERO)
 		return;
 	if(v < (void*)TMAP || v >= (void*)(TMAP + TMAPSZ))
--- a/sys/src/9/zynq/timer.c
+++ b/sys/src/9/zynq/timer.c
@@ -80,12 +80,39 @@
 void
 timerinit(void)
 {
-	int mhz;
-	
-	mhz = PS_CLK * (slcr[ARM_PLL_CTRL] >> 12 & 0x7f) / (slcr[ARM_CLK_CTRL] >> 8 & 0x3f);
-	timerhz = mhz * 500000;
+	m->cpumhz = PS_CLK * (slcr[ARM_PLL_CTRL] >> 12 & 0x7f) / (slcr[ARM_CLK_CTRL] >> 8 & 0x3f);
+	m->cpuhz = m->cpumhz * 1000000;
+	timerhz = m->cpuhz / 2;
 	mpcore[GTIMERCTL] = TIMERDIV - 1 << 8 | 3;
 	mpcore[LTIMERCTL] = LTIMERDIV - 1 << 8 | 4;
 	intrenable(TIMERIRQ, timerirq, nil, EDGE, "clock");
+
+	/* enable and reset cycle counter register */
+	m->cyclefreq = m->cpuhz;
+	setpmcnten((1<<31));
+	coherence();
 	setpmcr(7);
+}
+
+/*
+ * synchronize all cpu's cycle counter registers
+ */
+void
+synccycles(void)
+{
+	static Ref r1, r2;
+	int s;
+
+	s = splhi();
+	r2.ref = 0;
+	incref(&r1);
+	while(r1.ref != conf.nmach)
+		;
+	setpmcr(7);
+	m->cycleshi = MACHP(0)->cycleshi;
+	incref(&r2);
+	while(r2.ref != conf.nmach)
+		;
+	r1.ref = 0;
+	splx(s);
 }
--- a/sys/src/9/zynq/trap.c
+++ b/sys/src/9/zynq/trap.c
@@ -20,7 +20,7 @@
 		iprint("dumpstack disabled\n");
 		return;
 	}
-	iprint("dumpstack\n");
+	iprint("cpu%d: dumpstack\n", m->machno);
 
 	x = 0;
 	x += iprint("ktrace /arm/9zynq %.8lux %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp, ureg->r14);
@@ -57,7 +57,7 @@
 {
 	int user, insyscall, read, n;
 	static char buf[ERRMAX];
-	
+
 	read = (fsr & (1<<11)) == 0;
 	user = userureg(ureg);
 	if(!user){
@@ -88,6 +88,8 @@
 static void
 mathtrap(Ureg *, ulong)
 {
+	int s;
+
 	if((up->fpstate & FPillegal) != 0){
 		postnote(up, 1, "sys: floating point in note handler", NDebug);
 		return;
@@ -94,12 +96,16 @@
 	}
 	switch(up->fpstate){
 	case FPinit:
+		s = splhi();
 		fpinit();
 		up->fpstate = FPactive;
+		splx(s);
 		break;
 	case FPinactive:
+		s = splhi();
 		fprestore(&up->fpsave);
 		up->fpstate = FPactive;
+		splx(s);
 		break;
 	case FPactive:
 		postnote(up, 1, "sys: floating point error", NDebug);
@@ -138,6 +144,7 @@
 			postnote(up, 1, "sys: trap: invalid opcode", NDebug);
 			break;
 		}
+		dumpregs(ureg);
 		panic("invalid opcode at pc=%#.8lux lr=%#.8lux", ureg->pc, ureg->r14);
 		break;
 	case PsrMiabt:
@@ -153,7 +160,7 @@
 		intr(ureg);
 		break;
 	default:
-		print("unknown trap type %ulx\n", ureg->type);
+		iprint("cpu%d: unknown trap type %ulx\n", m->machno, ureg->type);
 	}
 	splhi();
 	if(user){
@@ -408,9 +415,17 @@
 }
 
 void
-dumpregs(Ureg *)
+dumpregs(Ureg *ureg)
 {
-	print("dumpregs\n");
+	iprint("trap: %lux psr %8.8lux type %2.2lux pc %8.8lux link %8.8lux\n",
+		ureg->type, ureg->psr, ureg->type, ureg->pc, ureg->link);
+	iprint("R14 %8.8lux R13 %8.8lux R12 %8.8lux R11 %8.8lux R10 %8.8lux\n",
+		ureg->r14, ureg->r13, ureg->r12, ureg->r11, ureg->r10);
+	iprint("R9  %8.8lux R8  %8.8lux R7  %8.8lux R6  %8.8lux R5  %8.8lux\n",
+		ureg->r9, ureg->r8, ureg->r7, ureg->r6, ureg->r5);
+	iprint("R4  %8.8lux R3  %8.8lux R2  %8.8lux R1  %8.8lux R0  %8.8lux\n",
+		ureg->r4, ureg->r3, ureg->r2, ureg->r1, ureg->r0);
+	iprint("pc %#lux link %#lux\n", ureg->pc, ureg->link);
 }
 
 void
@@ -476,7 +491,7 @@
 	cycles(&t);
 	p->kentry -= t;
 	p->pcycles += t;
-	
+
 	l1switch(&m->l1, 0);
 }