shithub: riscv

Download patch

ref: f811708ffcb776be52a81637224c06b700177566
parent: 3d05e77ca1f743e5b4091c6bfe311460175ed9ae
author: cinap_lenrek <cinap_lenrek@gmx.de>
date: Sat Sep 21 15:55:52 EDT 2013

ape: change tas/sleep locks to cas/semacquire/semrelease locks (from sources)

--- a/sys/include/ape/lock.h
+++ b/sys/include/ape/lock.h
@@ -10,7 +10,8 @@
 
 typedef struct
 {
-	int	val;
+	long	key;
+	long	sem;
 } Lock;
 
 #ifdef __cplusplus
--- /dev/null
+++ b/sys/src/ape/lib/ap/386/atom.s
@@ -1,0 +1,121 @@
+TEXT ainc(SB), 1, $-4				/* int ainc(int*); */
+	MOVL	arg+0(FP), BX
+	MOVL	$1, AX
+	LOCK; BYTE $0x0f; BYTE $0xc1; BYTE $0x03/* XADDL AX, (BX) */
+	ADDL	$1, AX				/* overflow if -ve or 0 */
+	RET
+
+TEXT adec(SB), 1, $-4				/* int adec(int*); */
+	MOVL	arg+0(FP), BX
+	MOVL	$-1, AX
+	LOCK; BYTE $0x0f; BYTE $0xc1; BYTE $0x03/* XADDL AX, (BX) */
+	SUBL	$1, AX				/* underflow if -ve */
+	RET
+
+/*
+ * int cas32(u32int *p, u32int ov, u32int nv);
+ * int cas(uint *p, int ov, int nv);
+ * int casp(void **p, void *ov, void *nv);
+ * int casl(ulong *p, ulong ov, ulong nv);
+ */
+
+/*
+ * CMPXCHG (CX), DX: 0000 1111 1011 000w oorr rmmm,
+ * mmm = CX = 001; rrr = DX = 010
+ */
+
+#define CMPXCHG		BYTE $0x0F; BYTE $0xB1; BYTE $0x11
+
+TEXT	cas32+0(SB),0,$0
+TEXT	cas+0(SB),0,$0
+TEXT	casp+0(SB),0,$0
+TEXT	casl+0(SB),0,$0
+	MOVL	p+0(FP), CX
+	MOVL	ov+4(FP), AX
+	MOVL	nv+8(FP), DX
+	LOCK
+	CMPXCHG
+	JNE	fail
+	MOVL	$1,AX
+	RET
+fail:
+	MOVL	$0,AX
+	RET
+
+/*
+ * int cas64(u64int *p, u64int ov, u64int nv);
+ */
+
+/*
+ * CMPXCHG64 (DI): 0000 1111 1100 0111 0000 1110,
+ */
+
+#define CMPXCHG64		BYTE $0x0F; BYTE $0xC7; BYTE $0x0F
+
+TEXT	cas64+0(SB),0,$0
+	MOVL	p+0(FP), DI
+	MOVL	ov+0x4(FP), AX
+	MOVL	ov+0x8(FP), DX
+	MOVL	nv+0xc(FP), BX
+	MOVL	nv+0x10(FP), CX
+	LOCK
+	CMPXCHG64
+	JNE	fail
+	MOVL	$1,AX
+	RET
+
+/*
+ * Versions of compare-and-swap that return the old value
+ * (i.e., the value of *p at the time of the operation
+ * 	xcas(p, o, n) == o
+ * yields the same value as
+ *	cas(p, o, n)
+ * xcas can be used in constructs like
+ *	for(o = *p; (oo = xcas(p, o, o+1)) != o; o = oo)
+ *		;
+ * to avoid the extra dereference of *p (the example is a silly
+ * way to increment *p atomically)
+ *
+ * u32int	xcas32(u32int *p, u32int ov, u32int nv);
+ * u64int	xcas64(u64int *p, u64int ov, u64int nv);
+ * int		xcas(int *p, int ov, int nv);
+ * void*	xcasp(void **p, void *ov, void *nv);
+ * ulong	xcasl(ulong *p, ulong ov, ulong nv);
+ */
+
+TEXT	xcas32+0(SB),0,$0
+TEXT	xcas+0(SB),0,$0
+TEXT	xcasp+0(SB),0,$0
+TEXT	xcasl+0(SB),0,$0
+	MOVL	p+0(FP), CX
+	MOVL	ov+4(FP), AX	/* accumulator */
+	MOVL	nv+8(FP), DX
+	LOCK
+	CMPXCHG
+	RET
+	
+/*
+ * The CMPXCHG8B instruction also requires three operands:
+ * a 64-bit value in EDX:EAX, a 64-bit value in ECX:EBX,
+ * and a destination operand in memory. The instruction compar
+ * es the 64-bit value in the EDX:EAX registers with the
+ * destination operand. If they are equal, the 64-bit value
+ * in the ECX:EBX register is stored in the destination
+ * operand. If the EDX:EAX register and the destination ar
+ * e not equal, the destination is loaded in the EDX:EAX
+ * register. The CMPXCHG8B instruction can be combined with
+ * the LOCK prefix to perform the operation atomically
+ */
+
+TEXT	xcas64+0(SB),0,$0
+	MOVL	p+4(FP), DI
+	MOVL	ov+0x8(FP), AX
+	MOVL	ov+0xc(FP), DX
+	MOVL	nv+0x10(FP), BX
+	MOVL	nv+0x14(FP), CX
+	LOCK
+	CMPXCHG64
+	MOVL	.ret+0x0(FP),CX	/* pointer to return value */
+	MOVL	AX,0x0(CX)
+	MOVL	DX,0x4(CX)
+	RET
--- a/sys/src/ape/lib/ap/386/lock.c
+++ b/sys/src/ape/lib/ap/386/lock.c
@@ -1,26 +1,35 @@
-#define _LOCK_EXTENSION
+#include "../plan9/lib.h"
 #include "../plan9/sys9.h"
+#define _LOCK_EXTENSION
 #include <lock.h>
 
-int	tas(int*);
-
 void
-lock(Lock *lk)
+lock(Lock *l)
 {
-	while(tas(&lk->val))
-		_SLEEP(0);
+	if(ainc(&l->key) == 1)
+		return;	/* changed from 0 -> 1: we hold lock */
+	/* otherwise wait in kernel */
+	while(_SEMACQUIRE(&l->sem, 1) < 0){
+		/* interrupted; try again */
+	}
 }
 
-int
-canlock(Lock *lk)
+void
+unlock(Lock *l)
 {
-	if(tas(&lk->val))
-		return 0;
-	return 1;
+	if(adec(&l->key) == 0)
+		return;	/* changed from 1 -> 0: no contention */
+	_SEMRELEASE(&l->sem, 1);
 }
 
-void
-unlock(Lock *lk)
+int
+canlock(Lock *l)
 {
-	lk->val = 0;
+	if(ainc(&l->key) == 1)
+		return 1;	/* changed from 0 -> 1: success */
+	/* Undo increment (but don't miss wakeup) */
+	if(adec(&l->key) == 0)
+		return 0;	/* changed from 1 -> 0: no contention */
+	_SEMRELEASE(&l->sem, 1);
+	return 0;
 }
--- a/sys/src/ape/lib/ap/386/mkfile
+++ b/sys/src/ape/lib/ap/386/mkfile
@@ -2,6 +2,7 @@
 <$APE/config
 LIB=/$objtype/lib/ape/libap.a
 OFILES=\
+	atom.$O\
 	cycles.$O\
 	lock.$O\
 	main9.$O\
--- a/sys/src/ape/lib/ap/68020/lock.c
+++ b/sys/src/ape/lib/ap/68020/lock.c
@@ -2,12 +2,10 @@
 #include "../plan9/sys9.h"
 #include <lock.h>
 
-int	tas(int*);
-
 void
 lock(Lock *lk)
 {
-	while(tas(&lk->val))
+	while(tas((int*)&lk->key))
 		_SLEEP(0);
 }
 
@@ -14,7 +12,7 @@
 int
 canlock(Lock *lk)
 {
-	if(tas(&lk->val))
+	if(tas((int*)&lk->key))
 		return 0;
 	return 1;
 }
@@ -22,5 +20,5 @@
 void
 unlock(Lock *lk)
 {
-	lk->val = 0;
+	lk->key = 0;
 }
--- a/sys/src/ape/lib/ap/alpha/lock.c
+++ b/sys/src/ape/lib/ap/alpha/lock.c
@@ -2,12 +2,10 @@
 #include "../plan9/sys9.h"
 #include <lock.h>
 
-int	tas(int*);
-
 void
 lock(Lock *lk)
 {
-	while(tas(&lk->val))
+	while(tas((int*)&lk->key))
 		_SLEEP(0);
 }
 
@@ -14,7 +12,7 @@
 int
 canlock(Lock *lk)
 {
-	if(tas(&lk->val))
+	if(tas((int*)&lk->key))
 		return 0;
 	return 1;
 }
@@ -22,5 +20,5 @@
 void
 unlock(Lock *lk)
 {
-	lk->val = 0;
+	lk->key = 0;
 }
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm/atom.s
@@ -1,0 +1,58 @@
+#define	CLREX		WORD	$0xf57ff01f
+#define	LDREX(a,r)	WORD	$(0xe<<28|0x01900f9f | (a)<<16 | (r)<<12)
+/* `The order of operands is from left to right in dataflow order' - asm man */
+#define	STREX(v,a,r)	WORD	$(0xe<<28|0x01800f90 | (a)<<16 | (r)<<12 | (v)<<0)
+
+/*
+ * int cas(ulong *p, ulong ov, ulong nv);
+ */
+
+TEXT	cas+0(SB),0,$0		/* r0 holds p */
+TEXT	casp+0(SB),0,$0		/* r0 holds p */
+	MOVW	ov+4(FP), R1
+	MOVW	nv+8(FP), R2
+spincas:
+	LDREX(0,3)	/*	LDREX	0(R0),R3	*/
+	CMP.S	R3, R1
+	BNE	fail
+	STREX(2,0,4)	/*	STREX	0(R0),R2,R4	*/
+	CMP.S	$0, R4
+	BNE	spincas
+	MOVW	$1, R0
+	RET
+fail:
+	CLREX
+	MOVW	$0, R0
+	RET
+
+TEXT _xinc(SB), $0	/* void	_xinc(long *); */
+TEXT ainc(SB), $0	/* long ainc(long *); */
+spinainc:
+	LDREX(0,3)	/*	LDREX	0(R0),R3	*/
+	ADD	$1,R3
+	STREX(3,0,4)	/*	STREX	0(R0),R3,R4	*/
+	CMP.S	$0, R4
+	BNE	spinainc
+	MOVW	R3, R0
+	RET
+
+TEXT _xdec(SB), $0	/* long _xdec(long *); */
+TEXT adec(SB), $0	/* long adec(long *); */
+spinadec:
+	LDREX(0,3)	/*	LDREX	0(R0),R3	*/
+	SUB	$1,R3
+	STREX(3,0,4)	/*	STREX	0(R0),R3,R4	*/
+	CMP.S	$0, R4
+	BNE	spinadec
+	MOVW	R3, R0
+	RET
+
+TEXT loadlinked(SB), $0	/* long loadlinked(long *); */
+	LDREX(0,0)	/*	LDREX	0(R0),R0	*/
+	RET
+
+TEXT storecond(SB), $0	/* int storecond(long *, long); */
+	MOVW	ov+4(FP), R3
+	STREX(3,0,0)	/*	STREX	0(R0),R3,R0	*/
+	RSB	$1, R0
+	RET
--- a/sys/src/ape/lib/ap/arm/lock.c
+++ b/sys/src/ape/lib/ap/arm/lock.c
@@ -1,26 +1,35 @@
-#define _LOCK_EXTENSION
+#include "../plan9/lib.h"
 #include "../plan9/sys9.h"
+#define _LOCK_EXTENSION
 #include <lock.h>
 
-int	tas(int*);
-
 void
-lock(Lock *lk)
+lock(Lock *l)
 {
-	while(tas(&lk->val))
-		_SLEEP(0);
+	if(ainc(&l->key) == 1)
+		return;	/* changed from 0 -> 1: we hold lock */
+	/* otherwise wait in kernel */
+	while(_SEMACQUIRE(&l->sem, 1) < 0){
+		/* interrupted; try again */
+	}
 }
 
-int
-canlock(Lock *lk)
+void
+unlock(Lock *l)
 {
-	if(tas(&lk->val))
-		return 0;
-	return 1;
+	if(adec(&l->key) == 0)
+		return;	/* changed from 1 -> 0: no contention */
+	_SEMRELEASE(&l->sem, 1);
 }
 
-void
-unlock(Lock *lk)
+int
+canlock(Lock *l)
 {
-	lk->val = 0;
+	if(ainc(&l->key) == 1)
+		return 1;	/* changed from 0 -> 1: success */
+	/* Undo increment (but don't miss wakeup) */
+	if(adec(&l->key) == 0)
+		return 0;	/* changed from 1 -> 0: no contention */
+	_SEMRELEASE(&l->sem, 1);
+	return 0;
 }
--- a/sys/src/ape/lib/ap/arm/mkfile
+++ b/sys/src/ape/lib/ap/arm/mkfile
@@ -2,6 +2,7 @@
 <$APE/config
 LIB=/$objtype/lib/ape/libap.a
 OFILES=\
+	atom.$O\
 	cycles.$O\
 	div.$O\
 	getfcr.$O\
--- /dev/null
+++ b/sys/src/ape/lib/ap/mips/atom.s
@@ -1,0 +1,52 @@
+/*
+ *	R4000 user-level atomic operations
+ */
+
+#define	LL(base, rt)	WORD	$((060<<26)|((base)<<21)|((rt)<<16))
+#define	SC(base, rt)	WORD	$((070<<26)|((base)<<21)|((rt)<<16))
+#define	NOOP		WORD	$0x27
+
+TEXT ainc(SB), 1, $-4			/* long ainc(long *); */
+TEXT _xinc(SB), 1, $-4			/* void _xinc(long *); */
+	MOVW	R1, R2			/* address of counter */
+loop:	MOVW	$1, R3
+	LL(2, 1)
+	NOOP
+	ADDU	R1, R3
+	MOVW	R3, R1			/* return new value */
+	SC(2, 3)
+	NOOP
+	BEQ	R3,loop
+	RET
+
+TEXT adec(SB), 1, $-4			/* long adec(long*); */
+TEXT _xdec(SB), 1, $-4			/* long _xdec(long *); */
+	MOVW	R1, R2			/* address of counter */
+loop1:	MOVW	$-1, R3
+	LL(2, 1)
+	NOOP
+	ADDU	R1, R3
+	MOVW	R3, R1			/* return new value */
+	SC(2, 3)
+	NOOP
+	BEQ	R3,loop1
+	RET
+
+/*
+ * int cas(uint* p, int ov, int nv);
+ */
+TEXT cas(SB), 1, $-4
+	MOVW	ov+4(FP), R2
+	MOVW	nv+8(FP), R3
+spincas:
+	LL(1, 4)			/* R4 = *R1 */
+	NOOP
+	BNE	R2, R4, fail
+	SC(1, 3)			/* *R1 = R3 */
+	NOOP
+	BEQ	R3, spincas		/* R3 == 0 means store failed */
+	MOVW	$1, R1
+	RET
+fail:
+	MOVW	$0, R1
+	RET
--- a/sys/src/ape/lib/ap/mips/lock.c
+++ b/sys/src/ape/lib/ap/mips/lock.c
@@ -1,171 +1,35 @@
-#define _LOCK_EXTENSION
-#include <stdlib.h>
-#include <string.h>
+#include "../plan9/lib.h"
 #include "../plan9/sys9.h"
+#define _LOCK_EXTENSION
 #include <lock.h>
 
-enum
+void
+lock(Lock *l)
 {
-	Pagesize	= 4096,
-	Semperpg	= Pagesize/(16*sizeof(unsigned int)),
-	Lockaddr	= 0x60000000,
-
-	POWER		= 0x320,
-	MAGNUM		= 0x330,
-	MAGNUMII	= 0x340,
-	R4K		= 0x500,
-};
-
-static	int arch;
-extern	int C_3ktas(int*);
-extern	int C_4ktas(int*);
-extern	int C_fcr0(void);
-
-static void
-lockinit(void)
-{
-	int n;
-
-	if(arch != 0)
-		return;	/* allow multiple calls */
-	arch = C_fcr0();
-	switch(arch) {
-	case POWER:
-		if(_SEGATTACH(0,  "lock", (void*)Lockaddr, Pagesize) == (void*)-1) {
-			arch = MAGNUM;
-			break;
-		}
-		memset((void*)Lockaddr, 0, Pagesize);
-		break;
-	case MAGNUM:
-	case MAGNUMII:
-	case R4K:
-		break;
-	default:
-		abort();
+	if(ainc(&l->key) == 1)
+		return;	/* changed from 0 -> 1: we hold lock */
+	/* otherwise wait in kernel */
+	while(_SEMACQUIRE(&l->sem, 1) < 0){
+		/* interrupted; try again */
 	}
-	
 }
 
 void
-lock(Lock *lk)
+unlock(Lock *l)
 {
-	int *hwsem;
-	int hash;
-
-retry:
-	switch(arch) {
-	case 0:
-		lockinit();
-		goto retry;
-	case MAGNUM:
-	case MAGNUMII:
-		while(C_3ktas(&lk->val))
-			_SLEEP(0);
-		return;
-	case R4K:
-		for(;;){
-			while(lk->val)
-				;
-			if(C_4ktas(&lk->val) == 0)
-				return;
-		}
-		break;
-	case POWER:
-		/* Use low order lock bits to generate hash */
-		hash = ((int)lk/sizeof(int)) & (Semperpg-1);
-		hwsem = (int*)Lockaddr+hash;
-
-		for(;;) {
-			if((*hwsem & 1) == 0) {
-				if(lk->val)
-					*hwsem = 0;
-				else {
-					lk->val = 1;
-					*hwsem = 0;
-					return;
-				}
-			}
-			while(lk->val)
-				;
-		}
-	}	
+	if(adec(&l->key) == 0)
+		return;	/* changed from 1 -> 0: no contention */
+	_SEMRELEASE(&l->sem, 1);
 }
 
 int
-canlock(Lock *lk)
+canlock(Lock *l)
 {
-	int *hwsem;
-	int hash;
-
-retry:
-	switch(arch) {
-	case 0:
-		lockinit();
-		goto retry;
-	case MAGNUM:
-	case MAGNUMII:
-		if(C_3ktas(&lk->val))
-			return 0;
-		return 1;
-	case R4K:
-		if(C_4ktas(&lk->val))
-			return 0;
-		return 1;
-	case POWER:
-		/* Use low order lock bits to generate hash */
-		hash = ((int)lk/sizeof(int)) & (Semperpg-1);
-		hwsem = (int*)Lockaddr+hash;
-
-		if((*hwsem & 1) == 0) {
-			if(lk->val)
-				*hwsem = 0;
-			else {
-				lk->val = 1;
-				*hwsem = 0;
-				return 1;
-			}
-		}
-		return 0;
-	}	
-}
-
-void
-unlock(Lock *lk)
-{
-	lk->val = 0;
-}
-
-int
-tas(int *p)
-{
-	int *hwsem;
-	int hash;
-
-retry:
-	switch(arch) {
-	case 0:
-		lockinit();
-		goto retry;
-	case MAGNUM:
-	case MAGNUMII:
-		return C_3ktas(p);
-	case R4K:
-		return C_4ktas(p);
-	case POWER:
-		/* Use low order lock bits to generate hash */
-		hash = ((int)p/sizeof(int)) & (Semperpg-1);
-		hwsem = (int*)Lockaddr+hash;
-
-		if((*hwsem & 1) == 0) {
-			if(*p)
-				*hwsem = 0;
-			else {
-				*p = 1;
-				*hwsem = 0;
-				return 0;
-			}
-		}
-		return 1;
-	}	
+	if(ainc(&l->key) == 1)
+		return 1;	/* changed from 0 -> 1: success */
+	/* Undo increment (but don't miss wakeup) */
+	if(adec(&l->key) == 0)
+		return 0;	/* changed from 1 -> 0: no contention */
+	_SEMRELEASE(&l->sem, 1);
+	return 0;
 }
--- a/sys/src/ape/lib/ap/mips/mkfile
+++ b/sys/src/ape/lib/ap/mips/mkfile
@@ -2,6 +2,7 @@
 <$APE/config
 LIB=/$objtype/lib/ape/libap.a
 OFILES=\
+	atom.$O\
 	cycles.$O\
 	getfcr.$O\
 	lock.$O\
--- a/sys/src/ape/lib/ap/mips/tas.s
+++ b/sys/src/ape/lib/ap/mips/tas.s
@@ -17,6 +17,7 @@
 	BLTZ	R1, btas
 	RET
 
+	TEXT	tas(SB),$0
 	TEXT	C_4ktas(SB), $0
 	MOVW	R1, R2		/* address of key */
 tas1:
--- a/sys/src/ape/lib/ap/plan9/sys9.h
+++ b/sys/src/ape/lib/ap/plan9/sys9.h
@@ -106,8 +106,11 @@
 extern	int	_SEGFLUSH(void*, unsigned long);
 extern	int	_SEGFREE(void*, unsigned long);
 extern	long long	_SEEK(int, long long, int);
+extern	int	_SEMACQUIRE(long*, int);
+extern	long	_SEMRELEASE(long*, long);
 extern	int	_SLEEP(long);
 extern	int	_STAT(const char*, unsigned char*, int);
+extern	int	_TSEMACQUIRE(long*, unsigned long);
 extern	Waitmsg*	_WAIT(void);
 extern	long	_WRITE(int, const void*, long);
 extern	int	_WSTAT(const char*, unsigned char*, int);
@@ -119,3 +122,9 @@
 extern	int	__link(char *, int);
 extern	int	__stat(char *, struct stat *);
 extern	int	__unlink(char *);
+
+/*
+ * atomic
+ */
+extern long	ainc(long*);
+extern long	adec(long*);
--- /dev/null
+++ b/sys/src/ape/lib/ap/power/atom.s
@@ -1,0 +1,63 @@
+TEXT	_xinc(SB),$0	/* void _xinc(long *); */
+TEXT	ainc(SB),$0	/* long ainc(long *); */
+	MOVW	R3, R4
+xincloop:
+	LWAR	(R4), R3
+	ADD	$1, R3
+	DCBT	(R4)				/* fix 405 errata cpu_210 */
+	STWCCC	R3, (R4)
+	BNE	xincloop
+	RETURN
+
+TEXT	_xdec(SB),$0	/* long _xdec(long *); */
+TEXT	adec(SB),$0	/* long adec(long *); */
+	MOVW	R3, R4
+xdecloop:
+	LWAR	(R4), R3
+	ADD	$-1, R3
+	DCBT	(R4)				/* fix 405 errata cpu_210 */
+	STWCCC	R3, (R4)
+	BNE	xdecloop
+	RETURN
+
+TEXT	loadlink(SB), $0
+
+	LWAR	(R3), R3
+	RETURN
+
+TEXT	storecond(SB), $0
+
+	MOVW	val+4(FP), R4
+	DCBT	(R3)				/* fix 405 errata cpu_210 */
+	STWCCC	R4, (R3)
+	BNE	storecondfail
+	MOVW	$1, R3
+	RETURN
+storecondfail:
+	MOVW	$0, R3
+	RETURN
+
+/*
+ * int cas(uint *p, int ov, int nv);
+ * int casp(void **p, void *ov, void *nv);
+ */
+
+TEXT	cas+0(SB),0,$0
+TEXT	casp+0(SB),0,$0
+	MOVW	ov+4(FP),R4
+	MOVW	nv+8(FP),R8
+	LWAR	(R3),R5
+	CMP	R5,R4
+	BNE	fail
+	DCBT	(R3)				/* fix 405 errata cpu_210 */
+	STWCCC	R8,(R3)
+	BNE	fail1
+	MOVW	$1,R3
+	RETURN
+fail:
+	DCBT	(R3)				/* fix 405 errata cpu_210 */
+	STWCCC	R5,(R3)	/* give up exclusive access */
+fail1:
+	MOVW	R0,R3
+	RETURN
+	END
--- a/sys/src/ape/lib/ap/power/lock.c
+++ b/sys/src/ape/lib/ap/power/lock.c
@@ -3,43 +3,33 @@
 #define _LOCK_EXTENSION
 #include <lock.h>
 
-int	tas(int*);
-
 void
-lock(Lock *lk)
+lock(Lock *l)
 {
-	int i;
-
-	/* once fast */
-	if(!tas(&lk->val))
-		return;
-	/* a thousand times pretty fast */
-	for(i=0; i<1000; i++){
-		if(!tas(&lk->val))
-			return;
-		_SLEEP(0);
+	if(ainc(&l->key) == 1)
+		return;	/* changed from 0 -> 1: we hold lock */
+	/* otherwise wait in kernel */
+	while(_SEMACQUIRE(&l->sem, 1) < 0){
+		/* interrupted; try again */
 	}
-	/* now nice and slow */
-	for(i=0; i<1000; i++){
-		if(!tas(&lk->val))
-			return;
-		_SLEEP(100);
-	}
-	/* take your time */
-	while(tas(&lk->val))
-		_SLEEP(1000);
 }
 
-int
-canlock(Lock *lk)
+void
+unlock(Lock *l)
 {
-	if(tas(&lk->val))
-		return 0;
-	return 1;
+	if(adec(&l->key) == 0)
+		return;	/* changed from 1 -> 0: no contention */
+	_SEMRELEASE(&l->sem, 1);
 }
 
-void
-unlock(Lock *lk)
+int
+canlock(Lock *l)
 {
-	lk->val = 0;
+	if(ainc(&l->key) == 1)
+		return 1;	/* changed from 0 -> 1: success */
+	/* Undo increment (but don't miss wakeup) */
+	if(adec(&l->key) == 0)
+		return 0;	/* changed from 1 -> 0: no contention */
+	_SEMRELEASE(&l->sem, 1);
+	return 0;
 }
--- a/sys/src/ape/lib/ap/power/main9p.s
+++ b/sys/src/ape/lib/ap/power/main9p.s
@@ -1,7 +1,7 @@
 #define NPRIVATES	16
 
 GLOBL	_tos(SB), $4
-GLOBAL	_errnoloc(SB), $4
+GLOBL	_errnoloc(SB), $4
 GLOBL	_privates(SB), $4
 GLOBL	_nprivates(SB), $4
 
--- a/sys/src/ape/lib/ap/power/mkfile
+++ b/sys/src/ape/lib/ap/power/mkfile
@@ -2,6 +2,7 @@
 <$APE/config
 LIB=/$objtype/lib/ape/libap.a
 OFILES=\
+	atom.$O\
 	cycles.$O\
 	getfcr.$O\
 	lock.$O\
--- a/sys/src/ape/lib/ap/sparc/lock.c
+++ b/sys/src/ape/lib/ap/sparc/lock.c
@@ -2,12 +2,10 @@
 #include "../plan9/sys9.h"
 #include <lock.h>
 
-int	tas(int*);
-
 void
 lock(Lock *lk)
 {
-	while(tas(&lk->val))
+	while(tas((int*)&lk->key))
 		_SLEEP(0);
 }
 
@@ -14,7 +12,7 @@
 int
 canlock(Lock *lk)
 {
-	if(tas(&lk->val))
+	if(tas((int*)&lk->key))
 		return 0;
 	return 1;
 }
@@ -22,5 +20,5 @@
 void
 unlock(Lock *lk)
 {
-	lk->val = 0;
+	lk->key = 0;
 }
--