shithub: riscv

Download patch

ref: cc17845f6cf72cc78097e996d296cb57e317eb7e
parent: 69793014c2b902bae193b03e992e5d9a43628f85
author: Jacob Moody <moody@posixcafe.org>
date: Sat Apr 6 21:56:57 EDT 2024

ape: bring in entrypoint changes from libc and fix profiling across the board

* copy _callmain structure from libc
* assembly functions without a prelude can not be profiled
* add missing files for profiling on some archs
* reduce minor style differences between ape/libc profile code

--- a/sys/src/ape/lib/ap/386/main9.s
+++ b/sys/src/ape/lib/ap/386/main9.s
@@ -1,31 +1,8 @@
-#define NPRIVATES	16
-
 GLOBL	_tos(SB), $4
-GLOBL	_errnoloc(SB), $4
-GLOBL	_plan9err(SB), $4
-GLOBL	_privates(SB), $4
-GLOBL	_nprivates(SB), $4
 
-TEXT	_main(SB), 1, $(12+4+128+NPRIVATES*4)
-
-	/* _tos = arg */
+TEXT	_main(SB), 1, $0
 	MOVL	AX, _tos(SB)
-	LEAL	12(SP), AX
-	MOVL	AX, _errnoloc(SB)
-	ADDL	$4, AX
-	MOVL	AX, _plan9err(SB)
-	ADDL	$128, AX
-	MOVL	AX, _privates(SB)
-	MOVL	$NPRIVATES, _nprivates(SB)
-
-	CALL	_envsetup(SB)
-	MOVL	inargc-4(FP), AX
-	MOVL	AX, 0(SP)
-	LEAL	inargv+0(FP), AX
-	MOVL	AX, 4(SP)
-	MOVL	environ(SB), AX
-	MOVL	AX, 8(SP)
-	CALL	main(SB)
-	MOVL	AX, 0(SP)
-	CALL	exit(SB)
-	RET
+	MOVL	$_apemain(SB), AX
+	PUSHL	AX
+	PUSHL	$0
+	JMPF	_callmain(SB)
--- a/sys/src/ape/lib/ap/386/main9p.s
+++ b/sys/src/ape/lib/ap/386/main9p.s
@@ -1,47 +1,11 @@
-#define NPRIVATES	16
-
 GLOBL	_tos(SB), $4
-GLOBL	_errnoloc(SB), $4
-GLOBL	_plan9err(SB), $4
-GLOBL	_privates(SB), $4
-GLOBL	_nprivates(SB), $4
 
-TEXT	_mainp(SB), 1, $(12+4+128+NPRIVATES*4)
-
-	/* _tos = arg */
+TEXT	_mainp(SB), 1, $0
 	MOVL	AX, _tos(SB)
-	LEAL	12(SP), AX
-	MOVL	AX, _errnoloc(SB)
-	ADDL	$4, AX
-	MOVL	AX, _plan9err(SB)
-	ADDL	$128, AX
-	MOVL	AX, _privates(SB)
-	MOVL	$NPRIVATES, _nprivates(SB)
-
-	/* _profmain(); */
-	CALL	_profmain(SB)
-
-	/* _tos->prof.pp = _tos->prof.next; */
-	MOVL	_tos+0(SB),DX
-	MOVL	4(DX),CX
-	MOVL	CX,(DX)
-
-	CALL	_envsetup(SB)
-
-	/* main(argc, argv, environ); */
-	MOVL	inargc-4(FP), AX
-	MOVL	AX, 0(SP)
-	LEAL	inargv+0(FP), AX
-	MOVL	AX, 4(SP)
-	MOVL	environ(SB), AX
-	MOVL	AX, 8(SP)
-	CALL	main(SB)
-loop:
-	MOVL	AX, 0(SP)
-	CALL	exit(SB)
-	MOVL	$_profin(SB), AX	/* force loading of profile */
-	MOVL	$0, AX
-	JMP	loop
+	MOVL	$_profmain(SB), AX
+	PUSHL	AX
+	PUSHL	$0
+	JMPF	_callmain(SB)
 
 TEXT	_saveret(SB), 1, $0
 TEXT	_savearg(SB), 1, $0
--- a/sys/src/ape/lib/ap/68020/main9.s
+++ b/sys/src/ape/lib/ap/68020/main9.s
@@ -1,33 +1,8 @@
-#define NPRIVATES	16
-
 GLOBL	_tos(SB), $4
-GLOBL	_errnoloc(SB), $4
-GLOBL	_plan9err(SB), $4
-GLOBL	_privates(SB), $4
-GLOBL	_nprivates(SB), $4
 
-TEXT	_main(SB), 1, $(4+128+NPRIVATES*4)
+TEXT	_main(SB), 1, $0
 	MOVL	$a6base(SB), A6
-
-	/* _tos = arg */
-	MOVL	R0, _tos(SB)		/* return value of sys exec!! */
-
-	MOVL	A7, A1
-	MOVL	A1, _errnoloc(SB)
-	ADDL	$4, A1
-	MOVL	A1, _plan9err(SB)
-	ADDL	$128, A1
-	MOVL	A1, _privates(SB)
-	MOVL	$NPRIVATES, _nprivates(SB)
-
-	BSR	_envsetup(SB)
-
-	/* main(argc, argv, environ); */
-	MOVL	environ(SB), TOS
-	PEA	inargv+0(FP)
-	MOVL	inargc-4(FP), TOS
-	BSR	main(SB)
-
-	MOVL	R0,TOS
-	BSR	exit(SB)
-	RTS
+	MOVL	R0, _tos(SB)
+	PEA	_apemain(SB)
+	MOVL	$0, TOS
+	JMP	_callmain(SB)
--- a/sys/src/ape/lib/ap/68020/main9p.s
+++ b/sys/src/ape/lib/ap/68020/main9p.s
@@ -1,47 +1,13 @@
-#define NPRIVATES	16
-
 GLOBL	_tos(SB), $4
-GLOBL	_errnoloc(SB), $4
-GLOBL	_plan9err(SB), $4
-GLOBL	_privates(SB), $4
-GLOBL	_nprivates(SB), $4
 
-TEXT	_mainp(SB), 1, $(4+128+NPRIVATES*4)
+TEXT	_mainp(SB), 1, $0
 	MOVL	$a6base(SB), A6
+	MOVL	R0, _tos(SB)
+	PEA	_profmain(SB)
+	MOVL	$0, TOS
+	JMP	_callmain(SB)
 
-	/* _tos = arg */
-	MOVL	R0, _tos(SB)		/* return value of sys exec!! */
-
-	MOVL	A7, A1
-	MOVL	A1, _errnoloc(SB)
-	ADDL	$4, A1
-	MOVL	A1, _plan9err(SB)
-	ADDL	$128, A1
-	MOVL	A1, _privates(SB)
-	MOVL	$NPRIVATES, _nprivates(SB)
-
-	/* _profmain(); */
-	BSR	_profmain(SB)
-
-	/* _tos->prof.pp = _tos->prof.next; */
-	MOVL	_tos+0(SB),A1
-	MOVL	4(A1),(A1)
-
-	BSR	_envsetup(SB)
-
-	/* main(argc, argv, environ); */
-	MOVL	environ(SB), TOS
-	PEA	inargv+0(FP)
-	MOVL	inargc-4(FP), TOS
-	BSR	main(SB)
-
-loop:
-	MOVL	R0,TOS
-	BSR	exit(SB)
-	LEA	_profin(SB), A0		/* force loading of profile */
-	BRA	loop
-
-
+TEXT	_saveret(SB), 1, $0
 TEXT	_savearg(SB), 1, $0
 	RTS
 
--- a/sys/src/ape/lib/ap/amd64/main9.s
+++ b/sys/src/ape/lib/ap/amd64/main9.s
@@ -1,33 +1,8 @@
-#define NPRIVATES	16
-
 GLOBL	_tos(SB), $8
-GLOBL	_errnoloc(SB), $8
-GLOBL	_plan9err(SB), $8
-GLOBL	_privates(SB), $8
-GLOBL	_nprivates(SB), $8
 
-TEXT	_main(SB), 1, $(24+8+128+NPRIVATES*8)
-
-	/* _tos = arg */
+TEXT	_main(SB), 1, $0
 	MOVQ	AX, _tos(SB)
-
-	LEAQ	24(SP), AX
-	MOVQ	AX, _errnoloc(SB)
-	ADDQ	$8, AX
-	MOVQ	AX, _plan9err(SB)
-	ADDQ	$128, AX
-	MOVQ	AX, _privates(SB)
-	MOVQ	$NPRIVATES, _nprivates(SB)
-
-	CALL	_envsetup(SB)
-
-	MOVL	inargc-8(FP), RARG
-	LEAQ	inargv+0(FP), AX
-	MOVQ	AX, 8(SP)
-	MOVQ	environ(SB), AX
-	MOVQ	AX, 16(SP)
-	CALL	main(SB)
-
-	MOVQ	AX, RARG
-	CALL	exit(SB)
-	RET
+	MOVQ	$_apemain(SB), RARG
+	PUSHQ	RARG
+	PUSHQ	$0
+	JMPF	_callmain(SB)
--- a/sys/src/ape/lib/ap/amd64/main9p.s
+++ b/sys/src/ape/lib/ap/amd64/main9p.s
@@ -1,55 +1,18 @@
-#define NPRIVATES	16
-
 GLOBL	_tos(SB), $8
-GLOBL	_errnoloc(SB), $8
-GLOBL	_plan9err(SB), $8
-GLOBL	_privates(SB), $8
-GLOBL	_nprivates(SB), $8
 
-TEXT	_mainp(SB), 1, $(24+8+128+NPRIVATES*8)
-
-	/* _tos = arg */
+TEXT	_mainp(SB), 1, $0
 	MOVQ	AX, _tos(SB)
+	MOVQ	$_profmain(SB), RARG
+	PUSHQ	RARG
+	PUSHQ	$0
+	JMPF	_callmain(SB)
 
-	LEAQ	24(SP), AX
-	MOVQ	AX, _errnoloc(SB)
-	ADDQ	$8, AX
-	MOVQ	AX, _plan9err(SB)
-	ADDQ	$128, AX
-	MOVQ	AX, _privates(SB)
-	MOVQ	$NPRIVATES, _nprivates(SB)
-
-	/* _profmain(); */
-	CALL	_profmain(SB)
-
-	/* _tos->prof.pp = _tos->prof.next; */
-	MOVQ	_tos+0(SB),DX
-	MOVQ	4(DX),CX
-	MOVQ	CX,(DX)
-
-	CALL	_envsetup(SB)
-
-	/* main(argc, argv, environ); */
-	MOVL	inargc-8(FP), RARG
-	LEAQ	inargv+0(FP), AX
-	MOVQ	AX, 8(SP)
-	MOVQ	environ(SB), AX
-	MOVQ	AX, 16(SP)
-	CALL	main(SB)
-
-loop:
-	MOVL	AX, RARG
-	CALL	exit(SB)
-	MOVQ	$_profin(SB), AX	/* force loading of profile */
-	MOVL	$0, AX
-	JMP	loop
-
-TEXT	_saveret(SB), 1, $0
-	RET
-
 TEXT	_savearg(SB), 1, $0
 	MOVQ	RARG, AX
 	RET
+
+TEXT	_saveret(SB), 1, $0
+	RET				/* we want RARG in RARG */
 
 TEXT	_callpc(SB), 1, $0
 	MOVQ	8(RARG), AX
--- a/sys/src/ape/lib/ap/arm/main9.s
+++ b/sys/src/ape/lib/ap/arm/main9.s
@@ -1,43 +1,17 @@
 arg=0
 sp=13
 sb=12
+lr=14
 
-#define NPRIVATES	16
-
 GLOBL	_tos(SB), $4
-GLOBL	_errnoloc(SB), $4
-GLOBL	_plan9err(SB), $4
-GLOBL	_privates(SB), $4
-GLOBL	_nprivates(SB), $4
 
-TEXT	_main(SB), 1, $(16+4+128+NPRIVATES*4)
-
+TEXT	_main(SB), 1, $0
+	SUB	$8, R(sp)
 	MOVW	$setR12(SB), R(sb)
-
-	/* _tos = arg */
 	MOVW	R(arg), _tos(SB)
 
-	MOVW	$16(R(sp)), R1
-	MOVW	R1, _errnoloc(SB)
-	ADD	$4, R1
-	MOVW	R1, _plan9err(SB)
-	ADD	$128, R1
-	MOVW	R1, _privates(SB)
-	MOVW	$NPRIVATES, R1
-	MOVW	R1, _nprivates(SB)
+	MOVW	$_apemain(SB), R(arg)
+	MOVW	$0, R(lr)
+	B	_callmain(SB)
 
-	BL	_envsetup(SB)
-
-	/* main(argc, argv, environ); */
-	MOVW	environ(SB), R(arg)
-	MOVW	R(arg), 12(R(sp))
-	MOVW	$inargv+0(FP), R(arg)
-	MOVW	R(arg), 8(R(sp))
-	MOVW	inargc-4(FP), R(arg)
-	MOVW	R(arg), 4(R(sp))
-	BL	main(SB)
-loop:
-	MOVW	R(arg), 4(R(sp))
-	BL	exit(SB)
-	BL	_div(SB)
-	B	loop
+	BL	_div(SB)	/* force loading of div */
--- a/sys/src/ape/lib/ap/arm/main9p.s
+++ b/sys/src/ape/lib/ap/arm/main9p.s
@@ -1,60 +1,25 @@
 arg=0
 sp=13
 sb=12
+lr=14
 
-#define NPRIVATES	16
-
 GLOBL	_tos(SB), $4
-GLOBL	_errnoloc(SB), $4
-GLOBL	_plan9err(SB), $4
-GLOBL	_privates(SB), $4
-GLOBL	_nprivates(SB), $4
 
-TEXT	_mainp(SB), 1, $(16+4+128+NPRIVATES*4)
-
+TEXT	_mainp(SB), 1, $0
+	SUB	$8, R(sp)
 	MOVW	$setR12(SB), R(sb)
-
-	/* _tos = arg */
 	MOVW	R(arg), _tos(SB)
 
-	MOVW	$16(R(sp)), R1
-	MOVW	R1, _errnoloc(SB)
-	ADD	$4, R1
-	MOVW	R1, _plan9err(SB)
-	ADD	$128, R1
-	MOVW	R1, _privates(SB)
-	MOVW	$NPRIVATES, R1
-	MOVW	R1, _nprivates(SB)
+	MOVW	$_profmain(SB), R(arg)
+	MOVW	$0, R(lr)
+	B	_callmain(SB)
 
-	/* _profmain(); */
-	BL	_profmain(SB)
-
-	/* _tos->prof.pp = _tos->prof.next; */
-	MOVW	_tos+0(SB),R1
-	MOVW	4(R1), R2
-	MOVW	R2, 0(R1)
-
-	BL	_envsetup(SB)
-
-	/* main(argc, argv, environ); */
-	MOVW	environ(SB), R(arg)
-	MOVW	R(arg), 12(R(sp))
-	MOVW	$inargv+0(FP), R(arg)
-	MOVW	R(arg), 8(R(sp))
-	MOVW	inargc-4(FP), R(arg)
-	MOVW	R(arg), 4(R(sp))
-	BL	main(SB)
-loop:
-	MOVW	R(arg), 4(R(sp))
-	BL	exit(SB)
 	MOVW	$_div(SB), R(arg)	/* force loading of div */
-	MOVW	$_profin(SB), R(arg)	/* force loading of profile */
-	B	loop
 
 TEXT	_saveret(SB), 1, $0
 TEXT	_savearg(SB), 1, $0
 	RET
 
-TEXT	_callpc(SB), 1, $0
-	MOVW	argp-4(FP), R(arg)
+TEXT	_callpc(SB), 1, $-4
+	MOVW	0(R13), R(arg)
 	RET
--- a/sys/src/ape/lib/ap/arm/memmove.s
+++ b/sys/src/ape/lib/ap/arm/memmove.s
@@ -5,9 +5,8 @@
 TMP = 3					/* N and TMP don't overlap */
 TMP1 = 4
 
-TEXT memcpy(SB), $-4
-	B	_memmove
-TEXT memmove(SB), $-4
+TEXT memcpy(SB), 1, $-4
+TEXT memmove(SB), $0
 _memmove:
 	MOVW	R(TS), to+0(FP)		/* need to save for return value */
 	MOVW	from+4(FP), R(FROM)
--- a/sys/src/ape/lib/ap/arm/strchr.s
+++ b/sys/src/ape/lib/ap/arm/strchr.s
@@ -1,4 +1,4 @@
-TEXT strchr(SB), $-4
+TEXT strchr(SB), $0
 	MOVBU	c+4(FP), R1
 	CMP	$0, R1
 	BEQ	_null
--- a/sys/src/ape/lib/ap/arm/strcmp.s
+++ b/sys/src/ape/lib/ap/arm/strcmp.s
@@ -1,4 +1,4 @@
-TEXT strcmp(SB), $-4
+TEXT strcmp(SB), $0
 	MOVW	R0, R1
 	MOVW	s2+4(FP), R2
 
--- a/sys/src/ape/lib/ap/arm/strcpy.s
+++ b/sys/src/ape/lib/ap/arm/strcpy.s
@@ -1,4 +1,4 @@
-TEXT strcpy(SB), $-4
+TEXT strcpy(SB), $0
 	MOVW		R0, to+0(FP)	/* need to save for return value */
 	MOVW		from+4(FP), R1
 	MOVW		$0xFF, R2	/* mask */
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm64/cycles.s
@@ -1,0 +1,7 @@
+#define	SYSREG(op0,op1,Cn,Cm,op2)	SPR(((op0)<<19|(op1)<<16|(Cn)<<12|(Cm)<<8|(op2)<<5))
+#define CNTVCT_EL0			SYSREG(3,3,14,0,2)
+
+TEXT _cycles(SB), 1, $-4
+	MRS	CNTVCT_EL0, R1
+	MOV	R1, (R0)
+	RETURN
--- a/sys/src/ape/lib/ap/arm64/main9.s
+++ b/sys/src/ape/lib/ap/arm64/main9.s
@@ -1,39 +1,11 @@
-#define NPRIVATES	16
-
 GLOBL	_tos(SB), $8
-GLOBL	_errnoloc(SB), $8
-GLOBL	_plan9err(SB), $128
-GLOBL	_privates(SB), $8
-GLOBL	_nprivates(SB), $4
 
-TEXT	_main(SB), 1, $(32 + 8+128 + NPRIVATES*8)
+TEXT	_main(SB), 1, $0
+	SUB	$16, SP
 	MOV	$setSB(SB), R28
 	MOV	R0, _tos(SB)
 
-	ADD	$32, RSP, R1
+	MOV	$_apemain(SB), R0
 
-	MOV	R1, _errnoloc(SB)
-	ADD	$8, R1
-
-	MOV	R1, _plan9err(SB)
-	ADD	$128, R1
-
-	MOV	R1, _privates(SB)
-	MOVW	$NPRIVATES, R2
-	MOVW	R2, _nprivates(SB)
-
-	BL	_envsetup(SB)
-
-	MOV	environ(SB), R2
-	MOV	R2, 24(RSP)
-
-	MOV	$inargv+0(FP), R1
-	MOV	R1, 16(RSP)
-
-	MOVW	inargc-8(FP), R0
-	MOV	R0, 8(RSP)
-
-	BL	main(SB)
-loop:
-	BL	exit(SB)
-	B	loop
+	MOV	$0, R30
+	B	_callmain(SB)
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm64/main9p.s
@@ -1,0 +1,17 @@
+GLOBL	_tos(SB), $8
+
+TEXT	_mainp(SB), 1, $0
+	SUB	$16, SP
+	MOV	$setSB(SB), R28
+	MOV	R0, _tos(SB)
+
+	MOV	$_profmain(SB), R0
+
+	MOV	$0, R30
+	B	_callmain(SB)
+
+TEXT	_callpc(SB), 1, $-4
+	MOV	0(SP), R0
+TEXT	_saveret(SB), 1, $-4
+TEXT	_savearg(SB), 1, $-4
+	RETURN
--- a/sys/src/ape/lib/ap/arm64/memmove.s
+++ b/sys/src/ape/lib/ap/arm64/memmove.s
@@ -1,5 +1,5 @@
-TEXT memcpy(SB), $-4
-TEXT memmove(SB), $-4
+TEXT memcpy(SB), 1, $-4
+TEXT memmove(SB), $0
 	MOV	from+8(FP), R1
 	MOVWU	n+16(FP), R2
 
--- a/sys/src/ape/lib/ap/arm64/memset.s
+++ b/sys/src/ape/lib/ap/arm64/memset.s
@@ -1,4 +1,4 @@
-TEXT memset(SB), $-4
+TEXT memset(SB), $0
 	MOVBU	c+8(FP), R1
 	MOVWU	n+16(FP), R2
 
--- a/sys/src/ape/lib/ap/arm64/mkfile
+++ b/sys/src/ape/lib/ap/arm64/mkfile
@@ -3,9 +3,11 @@
 LIB=/$objtype/lib/ape/libap.a
 OFILES=\
 	_seek.$O\
+	cycles.$O\
 	getfcr.$O\
 	lock.$O\
 	main9.$O\
+	main9p.$O\
 	memmove.$O\
 	memset.$O\
 	notetramp.$O\
--- a/sys/src/ape/lib/ap/mips/cycles.c
+++ b/sys/src/ape/lib/ap/mips/cycles.c
@@ -1,3 +1,5 @@
+#pragma profile off
+
 void
 _cycles(unsigned long long *u)
 {
--- a/sys/src/ape/lib/ap/mips/main9.s
+++ b/sys/src/ape/lib/ap/mips/main9.s
@@ -1,38 +1,10 @@
-#define NPRIVATES	16
-
 GLOBL	_tos(SB), $4
-GLOBL	_errnoloc(SB), $4
-GLOBL	_plan9err(SB), $4
-GLOBL	_privates(SB), $4
-GLOBL	_nprivates(SB), $4
 
-TEXT	_main(SB), 1, $(16+4+128+NPRIVATES*4)
+TEXT	_main(SB), 1, $0
+	ADD	$-8, R29
 	MOVW	$setR30(SB), R30
-
-	/* _tos = arg */
 	MOVW	R1, _tos(SB)
 
-	MOVW	$16(R29), R1
-	MOVW	R1, _errnoloc(SB)
-	ADDU	$4, R1
-	MOVW	R1, _plan9err(SB)
-	ADDU	$128, R1
-	MOVW	R1, _privates(SB)
-	MOVW	$NPRIVATES, R1
-	MOVW	R1, _nprivates(SB)
-
-	JAL	_envsetup(SB)
-
-	/* main(argc, argv, environ); */
-	MOVW	inargc-4(FP), R1
-	MOVW	$inargv+0(FP), R2
-	MOVW	environ(SB), R3
-	MOVW	R1, 4(R29)
-	MOVW	R2, 8(R29)
-	MOVW	R3, 12(R29)
-
-	JAL	main(SB)
-loop:
-	MOVW	R1, 4(R29)
-	JAL	exit(SB)
-	JMP	loop
+	MOVW	$_apemain(SB), R1
+	MOVW	$0, R31
+	JMP	_callmain(SB)
--- a/sys/src/ape/lib/ap/mips/main9p.s
+++ b/sys/src/ape/lib/ap/mips/main9p.s
@@ -1,49 +1,13 @@
-#define NPRIVATES	16
-
 GLOBL	_tos(SB), $4
-GLOBL	_errnoloc(SB), $4
-GLOBL	_plan9err(SB), $4
-GLOBL	_privates(SB), $4
-GLOBL	_nprivates(SB), $4
 
-TEXT	_mainp(SB), 1, $(16+4+128+NPRIVATES*4)
+TEXT	_mainp(SB), 1, $0
+	ADD	$-8, R29
 	MOVW	$setR30(SB), R30
-
-	/* _tos = arg */
 	MOVW	R1, _tos(SB)
 
-	MOVW	$16(R29), R1
-	MOVW	R1, _errnoloc(SB)
-	ADDU	$4, R1
-	MOVW	R1, _plan9err(SB)
-	ADDU	$128, R1
-	MOVW	R1, _privates(SB)
-	MOVW	$NPRIVATES, R1
-	MOVW	R1, _nprivates(SB)
-
-	/* _profmain(); */
-	JAL	_profmain(SB)
-
-	/* _tos->prof.pp = _tos->prof.next; */
-	MOVW	_tos+0(SB),R1
-	MOVW	4(R1),R2
-	MOVW	R2,(R1)
-
-	JAL	_envsetup(SB)
-
-	/* main(argc, argv, environ); */
-	MOVW	inargc-4(FP), R1
-	MOVW	$inargv+0(FP), R2
-	MOVW	environ(SB), R3
-	MOVW	R1, 4(R29)
-	MOVW	R2, 8(R29)
-	MOVW	R3, 12(R29)
-	JAL	main(SB)
-loop:
-	MOVW	R1, 4(R29)
-	JAL	exit(SB)
-	MOVW	$_profin(SB), R0	/* force loading of profile */
-	JMP	loop
+	MOVW	$_profmain(SB), R1
+	MOVW	$0, R31
+	JMP	_callmain(SB)
 
 TEXT	_saveret(SB), 1, $0
 TEXT	_savearg(SB), 1, $0
--- a/sys/src/ape/lib/ap/plan9/_envsetup.c
+++ b/sys/src/ape/lib/ap/plan9/_envsetup.c
@@ -21,7 +21,6 @@
 char **environ;
 int *_errnoloc;
 unsigned long _clock;
-static void fdsetup(char *, char *);
 static void sigsetup(char *, char *);
 
 enum {
--- /dev/null
+++ b/sys/src/ape/lib/ap/plan9/callmain.c
@@ -1,0 +1,42 @@
+#include "lib.h"
+#include "sys9.h"
+#include <stdlib.h>
+
+extern	void _envsetup(void);
+extern	char **environ;
+extern	int *_errnoloc;
+
+void **_privates;
+int _nprivates;
+char *_plan9err;
+
+enum{
+	NPRIVATES=16,
+};
+
+#pragma profile off
+
+void
+_callmain(int (*f)(int, char**), int argc, char *arg0)
+{
+	int errno;
+	char err[ERRMAX];
+	void *privates[NPRIVATES];
+
+	err[0] = '\0';
+	_privates = privates;
+	_nprivates = NPRIVATES;
+	_errnoloc = &errno;
+	_plan9err = &err[0];
+	exit(f(argc, &arg0));
+}
+
+int
+_apemain(int argc, char **argv)
+{
+	_envsetup();
+	extern int main(int, char**, char**);
+	return main(argc, argv, environ);
+}
+
+#pragma profile on
--- a/sys/src/ape/lib/ap/plan9/mkfile
+++ b/sys/src/ape/lib/ap/plan9/mkfile
@@ -20,6 +20,7 @@
 	access.$O\
 	alarm.$O\
 	brk.$O\
+	callmain.$O\
 	cfgetospeed.$O\
 	chdir.$O\
 	chroot.$O\
--- a/sys/src/ape/lib/ap/plan9/profile.c
+++ b/sys/src/ape/lib/ap/plan9/profile.c
@@ -23,8 +23,8 @@
 
 extern	void*	sbrk(ulong);
 extern	long	_callpc(void**);
-extern	void*	_savearg(void);
 extern	void*	_saveret(void);
+extern	void*	_savearg(void);
 extern	void	_cycles(uvlong*);	/* 64-bit value of the cycle counter if there is one, 0 if there isn't */
 
 static ulong	khz;
@@ -45,7 +45,7 @@
 #pragma profile off
 
 static void*
-_restore(void*, void *ret)
+_restore(void*, void* ret)
 {
 	return ret;
 }
@@ -56,7 +56,7 @@
 	void *dummy;
 	long pc;
 	Plink *pp, *p;
-	void *ret, *arg;
+	void *arg, *ret;
 	vlong t;
 
 	ret = _saveret();
@@ -65,7 +65,6 @@
 	pp = _tos->prof.pp;
 	if(pp == 0 || (_tos->prof.pid && _tos->pid != _tos->prof.pid))
 		return _restore(arg, ret);
-
 	for(p=pp->down; p; p=p->link)
 		if(p->pc == pc)
 			goto out;
@@ -95,7 +94,7 @@
 		/* Add kernel cycles on proc entry */
 		p->time = p->time + _tos->kcycles;
 		/* fall through */
-	case Proftime:	
+	case Proftime:
 	proftime:		/* Subtract cycle counter on proc entry */
 		_cycles((uvlong*)&t);
 		p->time = p->time - t;
@@ -118,7 +117,7 @@
 	arg = _savearg();
 	p = _tos->prof.pp;
 	if (p == NULL || (_tos->prof.pid != 0 && _tos->pid != _tos->prof.pid))
-		return arg;	/* Not our process */
+		return _restore(arg, ret);	/* Not our process */
 	switch(_tos->prof.what){
 	case Profkernel:		/* Add proc cycles on proc entry */
 		p->time = p->time + _tos->pcycles;
@@ -126,7 +125,7 @@
 	case Profuser:			/* Subtract kernel cycles on proc entry */
 		p->time = p->time - _tos->kcycles;
 		/* fall through */
-	case Proftime:	
+	case Proftime:
 	proftime:				/* Add cycle counter on proc entry */
 		_cycles((uvlong*)&t);
 		p->time = p->time + t;
@@ -262,7 +261,6 @@
 	}
 	write(f, (char*)_tos->prof.first, vp - (char*)_tos->prof.first);
 	close(f);
-
 }
 
 void
@@ -279,8 +277,8 @@
 	_tos->clock = 1;
 }
 
-void
-_profmain(void)
+int
+_profmain(int argc, char **argv)
 {
 	char ename[50];
 	int n, f;
@@ -290,7 +288,7 @@
 		khz = _tos->cyclefreq / 1000;	/* Report times in milliseconds */
 		havecycles = 1;
 	}
-	f = open("/env/profsize", OREAD);
+	f = open("/env/profsize", OREAD|OCEXEC);
 	if(f >= 0) {
 		memset(ename, 0, sizeof(ename));
 		read(f, ename, sizeof(ename)-1);
@@ -298,7 +296,7 @@
 		n = atol(ename);
 	}
 	_tos->prof.what = Profuser;
-	f = open("/env/proftype", OREAD);
+	f = open("/env/proftype", OREAD|OCEXEC);
 	if(f >= 0) {
 		memset(ename, 0, sizeof(ename));
 		read(f, ename, sizeof(ename)-1);
@@ -319,6 +317,9 @@
 	_tos->prof.pid = _tos->pid;
 	atexit(_profdump);
 	_tos->clock = 1;
+	_tos->prof.pp = _tos->prof.next;
+	extern int _apemain(int, char**);
+	return _apemain(argc, argv);
 }
 
 void prof(void (*fn)(void*), void *arg, int entries, int what)
--- a/sys/src/ape/lib/ap/power/main9.s
+++ b/sys/src/ape/lib/ap/power/main9.s
@@ -1,37 +1,12 @@
-#define NPRIVATES	16
-
 GLOBL	_tos(SB), $4
-GLOBL	_errnoloc(SB), $4
-GLOBL	_plan9err(SB), $4
-GLOBL	_privates(SB), $4
-GLOBL	_nprivates(SB), $4
 
-TEXT	_main(SB), 1, $(12+4+128+NPRIVATES*4)
-
+TEXT	_main(SB), 1, $0
+	SUB	$8, R1
 	MOVW	$setSB(SB), R2
-
-	/* _tos = arg */
 	MOVW	R3, _tos(SB)
 
-	MOVW	$12(R1), R3
-	MOVW	R3, _errnoloc(SB)
-	ADD	$4, R3
-	MOVW	R3, _plan9err(SB)
-	ADD	$128, R3
-	MOVW	R3, _privates(SB)
-	MOVW	$NPRIVATES, R3
-	MOVW	R3, _nprivates(SB)
-
-	BL	_envsetup(SB)
-
-	MOVW	inargc-4(FP), R3
-	MOVW	$inargv+0(FP), R4
-	MOVW	environ(SB), R5
-	MOVW	R3, 4(R1)
-	MOVW	R4, 8(R1)
-	MOVW	R5, 12(R1)
-	BL	main(SB)
-loop:
-	MOVW	R3, 4(R1)
-	BL	exit(SB)
-	BR	loop
+	MOVW	$_apemain(SB), R3
+	MOVW	R0, LR
+	MOVW	$_callmain(SB), R4
+	MOVW	R4, CTR
+	BR	(CTR)
--- a/sys/src/ape/lib/ap/power/main9p.s
+++ b/sys/src/ape/lib/ap/power/main9p.s
@@ -1,54 +1,20 @@
-#define NPRIVATES	16
-
 GLOBL	_tos(SB), $4
-GLOBL	_errnoloc(SB), $4
-GLOBL	_plan9err(SB), $4
-GLOBL	_privates(SB), $4
-GLOBL	_nprivates(SB), $4
 
-TEXT	_mainp(SB), 1, $(12+4+128+NPRIVATES*4)
-
+TEXT	_mainp(SB), 1, $0
+	SUB	$8, R1
 	MOVW	$setSB(SB), R2
-
-	/* _tos = arg */
 	MOVW	R3, _tos(SB)
 
-	MOVW	$12(R1), R3
-	MOVW	R3, _errnoloc(SB)
-	ADD	$4, R3
-	MOVW	R3, _plan9err(SB)
-	ADD	$128, R3
-	MOVW	R3, _privates(SB)
-	MOVW	$NPRIVATES, R3
-	MOVW	R3, _nprivates(SB)
+	MOVW	$_profmain(SB), R3
+	MOVW	R0, LR
+	MOVW	$_callmain(SB), R4
+	MOVW	R4, CTR
+	BR	(CTR)
 
-	/* _profmain(); */
-	BL	_envsetup(SB)
-
-	/* _tos->prof.pp = _tos->prof.next; */
-	MOVW	_tos+0(SB),R3
-	MOVW	4(R3),R2
-	MOVW	R2,(R3)
-
-	/* main(argc, argv, environ); */
-	MOVW	inargc-4(FP), R3
-	MOVW	$inargv+0(FP), R4
-	MOVW	environ(SB), R5
-	MOVW	R3, 4(R1)
-	MOVW	R4, 8(R1)
-	MOVW	R5, 12(R1)
-	BL	main(SB)
-loop:
-	MOVW	R3, 4(R1)
-	BL	exit(SB)
-	MOVW	$_profin(SB), R4	/* force loading of profile */
-	BR	loop
-
 TEXT	_saveret(SB), 1, $0
 TEXT	_savearg(SB), 1, $0
 	RETURN
 
 TEXT	_callpc(SB), 1, $0
-	MOVW	argp+0(FP), R3
-	MOVW	4(R3), R3
+	MOVW	argp-4(FP), R3
 	RETURN
--- a/sys/src/ape/lib/ap/sparc/main9.s
+++ b/sys/src/ape/lib/ap/sparc/main9.s
@@ -1,16 +1,10 @@
-#define NPRIVATES	16
-
 GLOBL	_tos(SB), $4
-GLOBL	_errnoloc(SB), $4
-GLOBL	_plan9err(SB), $4
-GLOBL	_privates(SB), $4
-GLOBL	_nprivates(SB), $4
 
-TEXT	_main(SB), 1, $(12+4+128+NPRIVATES*4)
+TEXT	_main(SB), 1, $0
+	SUB	$8, R1
 	MOVW	$setSB(SB), R2
-
-	/* _tos = arg */
 	MOVW	R7, _tos(SB)
+
 /*
 	MOVW	_fpsr+0(SB), FSR
 	FMOVD	$0.5, F26
@@ -18,27 +12,9 @@
 	FADDD	F26, F26, F28
 	FADDD	F28, F28, F30
 */
-	MOVW	$12(R1), R7
-	MOVW	R7, _errnoloc(SB)
-	ADD	$4, R7
-	MOVW	R7, _plan9err(SB)
-	ADD	$128, R7
-	MOVW	R7, _privates(SB)
-	MOVW	$NPRIVATES, R7
-	MOVW	R7, _nprivates(SB)
 
-	JMPL	_envsetup(SB)
-
-	MOVW	inargc-4(FP), R7
-	MOVW	$inargv+0(FP), R8
-	MOVW	environ(SB), R9
-	MOVW	R7, 4(R1)
-	MOVW	R8, 8(R1)
-	MOVW	R9, 12(R1)
-	JMPL	main(SB)
-
-loop:
-	MOVW	R7, 4(R1)
-	JMPL	exit(SB)
-	MOVW	$_mul(SB),R7
-	JMP	loop
+	MOVW	$_apemain(SB), R7
+	MOVW	$_callmain(SB), R24
+	MOVW	$0, R15
+	JMP	(R24)
+	MOVW	$_mul(SB), R8	/* force loading of muldiv */
--- a/sys/src/ape/lib/ap/sparc/main9p.s
+++ b/sys/src/ape/lib/ap/sparc/main9p.s
@@ -1,15 +1,10 @@
-#define NPRIVATES	16
-
 GLOBL	_tos(SB), $4
-GLOBL	_errnoloc(SB), $4
-GLOBL	_privates(SB), $4
-GLOBL	_nprivates(SB), $4
 
-TEXT	_mainp(SB), 1, $(16+NPRIVATES*4)
+TEXT	_mainp(SB), 1, $0
+	SUB	$8, R1
 	MOVW	$setSB(SB), R2
-
-	/* _tos = arg */
 	MOVW	R7, _tos(SB)
+
 /*
 	MOVW	_fpsr+0(SB), FSR
 	FMOVD	$0.5, F26
@@ -17,39 +12,12 @@
 	FADDD	F26, F26, F28
 	FADDD	F28, F28, F30
 */
-	MOVW	$12(R1), R7
-	MOVW	R7, _errnoloc(SB)
-	ADD	$4, R7
-	MOVW	R7, _plan9err(SB)
-	ADD	$128, R7
-	MOVW	R7, _privates(SB)
-	MOVW	$NPRIVATES, R7
-	MOVW	R7, _nprivates(SB)
 
-	/* _profmain(); */
-	JMPL	_profmain(SB)
-
-	/* _tos->prof.pp = _tos->prof.next; */
-	MOVW	_tos+0(SB),R7
-	MOVW	4(R7),R8
-	MOVW	R8,(R7)
-
-	JMPL	_envsetup(SB)
-
-	/* main(argc, argv, environ); */
-	MOVW	inargc-4(FP), R7
-	MOVW	$inargv+0(FP), R8
-	MOVW	environ(SB), R9
-	MOVW	R7, 4(R1)
-	MOVW	R8, 8(R1)
-	MOVW	R9, 12(R1)
-	JMPL	main(SB)
-
-loop:
-	JMPL	exit(SB)
-	MOVW	$_mul(SB), R0		/* force loading of muldiv */
-	MOVW	$_profin(SB), R0	/* force loading of profile */
-	JMP	loop
+	MOVW	$_profmain(SB), R7
+	MOVW	$_callmain(SB), R24
+	MOVW	$0, R15
+	JMP	(R24)
+	MOVW	$_mul(SB), R8		/* force loading of muldiv */
 
 TEXT	_saveret(SB), 1, $0
 TEXT	_savearg(SB), 1, $0
--- a/sys/src/ape/lib/ap/sparc/tas.s
+++ b/sys/src/ape/lib/ap/sparc/tas.s
@@ -1,7 +1,7 @@
 /*
  *	tas uses LDSTUB
  */
-	TEXT	tas(SB),$-4
+	TEXT	tas(SB),1,$-4
 
 	TAS	(R7),R7
 	RETURN
--- a/sys/src/libc/port/profile.c
+++ b/sys/src/libc/port/profile.c
@@ -18,7 +18,7 @@
 	Plink	*link;
 	long	pc;
 	long	count;
-	vlong time;
+	vlong	time;
 };
 
 #pragma profile off
@@ -48,7 +48,7 @@
 		if(p->pc == pc)
 			goto out;
 	p = _tos->prof.next + 1;
-	if(p >= _tos->prof.last) {
+	if(p >= _tos->prof.last){
 		_tos->prof.pp = 0;
 		perr++;
 		return _restore(arg, ret);
--- a/sys/src/libc/sparc/tas.s
+++ b/sys/src/libc/sparc/tas.s
@@ -1,7 +1,7 @@
 /*
  *	tas uses LDSTUB
  */
-	TEXT	_tas(SB),$-4
+	TEXT	_tas(SB),1,$-4
 
 	TAS	(R7),R7
 	RETURN