shithub: riscv

Download patch

ref: cdc2c30e99f2fb3d65dfbc8ef73efd433a3f1966
parent: b4cdfc6c5517390d6be05b2c01e56bacc9e85ea8
author: cinap_lenrek <cinap_lenrek@gmx.de>
date: Thu Sep 26 18:24:31 EDT 2013

reverting semaphore lock changes from sources (r41ccd6d221da, rb28756e5ba29)

semaphore locks have much higher overhead than initially presented
in the "Semaphores in Plan9" paper. until the reason for it has been
found out i will revert the changes.

--- a/sys/include/ape/lock.h
+++ b/sys/include/ape/lock.h
@@ -10,8 +10,7 @@
 
 typedef struct
 {
-	long	key;
-	long	sem;
+	int	val;
 } Lock;
 
 #ifdef __cplusplus
--- a/sys/include/libc.h
+++ b/sys/include/libc.h
@@ -410,18 +410,11 @@
 extern	void	prof(void (*fn)(void*), void *arg, int entries, int what);
 
 /*
- *  atomic
- */
-extern	long	ainc(long*);
-extern	long	adec(long*);
-
-/*
  *  synchronization
  */
 typedef
 struct Lock {
-	long	key;
-	long	sem;
+	int	val;
 } Lock;
 
 extern int	_tas(int*);
@@ -707,6 +700,9 @@
 extern	char*	sysname(void);
 extern	void	werrstr(char*, ...);
 #pragma	varargck	argpos	werrstr	1
+
+extern	long	ainc(long*);
+extern	long	adec(long*);
 
 extern char *argv0;
 #define	ARGBEGIN	for((argv0||(argv0=*argv)),argv++,argc--;\
--- a/sys/src/ape/lib/ap/386/atom.s
+++ /dev/null
@@ -1,121 +1,0 @@
-TEXT ainc(SB), 1, $-4				/* int ainc(int*); */
-	MOVL	arg+0(FP), BX
-	MOVL	$1, AX
-	LOCK; BYTE $0x0f; BYTE $0xc1; BYTE $0x03/* XADDL AX, (BX) */
-	ADDL	$1, AX				/* overflow if -ve or 0 */
-	RET
-
-TEXT adec(SB), 1, $-4				/* int adec(int*); */
-	MOVL	arg+0(FP), BX
-	MOVL	$-1, AX
-	LOCK; BYTE $0x0f; BYTE $0xc1; BYTE $0x03/* XADDL AX, (BX) */
-	SUBL	$1, AX				/* underflow if -ve */
-	RET
-
-/*
- * int cas32(u32int *p, u32int ov, u32int nv);
- * int cas(uint *p, int ov, int nv);
- * int casp(void **p, void *ov, void *nv);
- * int casl(ulong *p, ulong ov, ulong nv);
- */
-
-/*
- * CMPXCHG (CX), DX: 0000 1111 1011 000w oorr rmmm,
- * mmm = CX = 001; rrr = DX = 010
- */
-
-#define CMPXCHG		BYTE $0x0F; BYTE $0xB1; BYTE $0x11
-
-TEXT	cas32+0(SB),0,$0
-TEXT	cas+0(SB),0,$0
-TEXT	casp+0(SB),0,$0
-TEXT	casl+0(SB),0,$0
-	MOVL	p+0(FP), CX
-	MOVL	ov+4(FP), AX
-	MOVL	nv+8(FP), DX
-	LOCK
-	CMPXCHG
-	JNE	fail
-	MOVL	$1,AX
-	RET
-fail:
-	MOVL	$0,AX
-	RET
-
-/*
- * int cas64(u64int *p, u64int ov, u64int nv);
- */
-
-/*
- * CMPXCHG64 (DI): 0000 1111 1100 0111 0000 1110,
- */
-
-#define CMPXCHG64		BYTE $0x0F; BYTE $0xC7; BYTE $0x0F
-
-TEXT	cas64+0(SB),0,$0
-	MOVL	p+0(FP), DI
-	MOVL	ov+0x4(FP), AX
-	MOVL	ov+0x8(FP), DX
-	MOVL	nv+0xc(FP), BX
-	MOVL	nv+0x10(FP), CX
-	LOCK
-	CMPXCHG64
-	JNE	fail
-	MOVL	$1,AX
-	RET
-
-/*
- * Versions of compare-and-swap that return the old value
- * (i.e., the value of *p at the time of the operation
- * 	xcas(p, o, n) == o
- * yields the same value as
- *	cas(p, o, n)
- * xcas can be used in constructs like
- *	for(o = *p; (oo = xcas(p, o, o+1)) != o; o = oo)
- *		;
- * to avoid the extra dereference of *p (the example is a silly
- * way to increment *p atomically)
- *
- * u32int	xcas32(u32int *p, u32int ov, u32int nv);
- * u64int	xcas64(u64int *p, u64int ov, u64int nv);
- * int		xcas(int *p, int ov, int nv);
- * void*	xcasp(void **p, void *ov, void *nv);
- * ulong	xcasl(ulong *p, ulong ov, ulong nv);
- */
-
-TEXT	xcas32+0(SB),0,$0
-TEXT	xcas+0(SB),0,$0
-TEXT	xcasp+0(SB),0,$0
-TEXT	xcasl+0(SB),0,$0
-	MOVL	p+0(FP), CX
-	MOVL	ov+4(FP), AX	/* accumulator */
-	MOVL	nv+8(FP), DX
-	LOCK
-	CMPXCHG
-	RET
-	
-/*
- * The CMPXCHG8B instruction also requires three operands:
- * a 64-bit value in EDX:EAX, a 64-bit value in ECX:EBX,
- * and a destination operand in memory. The instruction compar
- * es the 64-bit value in the EDX:EAX registers with the
- * destination operand. If they are equal, the 64-bit value
- * in the ECX:EBX register is stored in the destination
- * operand. If the EDX:EAX register and the destination ar
- * e not equal, the destination is loaded in the EDX:EAX
- * register. The CMPXCHG8B instruction can be combined with
- * the LOCK prefix to perform the operation atomically
- */
-
-TEXT	xcas64+0(SB),0,$0
-	MOVL	p+4(FP), DI
-	MOVL	ov+0x8(FP), AX
-	MOVL	ov+0xc(FP), DX
-	MOVL	nv+0x10(FP), BX
-	MOVL	nv+0x14(FP), CX
-	LOCK
-	CMPXCHG64
-	MOVL	.ret+0x0(FP),CX	/* pointer to return value */
-	MOVL	AX,0x0(CX)
-	MOVL	DX,0x4(CX)
-	RET
--- a/sys/src/ape/lib/ap/386/lock.c
+++ b/sys/src/ape/lib/ap/386/lock.c
@@ -1,35 +1,26 @@
-#include "../plan9/lib.h"
-#include "../plan9/sys9.h"
 #define _LOCK_EXTENSION
+#include "../plan9/sys9.h"
 #include <lock.h>
 
+int	tas(int*);
+
 void
-lock(Lock *l)
+lock(Lock *lk)
 {
-	if(ainc(&l->key) == 1)
-		return;	/* changed from 0 -> 1: we hold lock */
-	/* otherwise wait in kernel */
-	while(_SEMACQUIRE(&l->sem, 1) < 0){
-		/* interrupted; try again */
-	}
+	while(tas(&lk->val))
+		_SLEEP(0);
 }
 
-void
-unlock(Lock *l)
+int
+canlock(Lock *lk)
 {
-	if(adec(&l->key) == 0)
-		return;	/* changed from 1 -> 0: no contention */
-	_SEMRELEASE(&l->sem, 1);
+	if(tas(&lk->val))
+		return 0;
+	return 1;
 }
 
-int
-canlock(Lock *l)
+void
+unlock(Lock *lk)
 {
-	if(ainc(&l->key) == 1)
-		return 1;	/* changed from 0 -> 1: success */
-	/* Undo increment (but don't miss wakeup) */
-	if(adec(&l->key) == 0)
-		return 0;	/* changed from 1 -> 0: no contention */
-	_SEMRELEASE(&l->sem, 1);
-	return 0;
+	lk->val = 0;
 }
--- a/sys/src/ape/lib/ap/386/mkfile
+++ b/sys/src/ape/lib/ap/386/mkfile
@@ -2,7 +2,6 @@
 <$APE/config
 LIB=/$objtype/lib/ape/libap.a
 OFILES=\
-	atom.$O\
 	cycles.$O\
 	lock.$O\
 	main9.$O\
--- a/sys/src/ape/lib/ap/68020/lock.c
+++ b/sys/src/ape/lib/ap/68020/lock.c
@@ -2,10 +2,12 @@
 #include "../plan9/sys9.h"
 #include <lock.h>
 
+int	tas(int*);
+
 void
 lock(Lock *lk)
 {
-	while(tas((int*)&lk->key))
+	while(tas(&lk->val))
 		_SLEEP(0);
 }
 
@@ -12,7 +14,7 @@
 int
 canlock(Lock *lk)
 {
-	if(tas((int*)&lk->key))
+	if(tas(&lk->val))
 		return 0;
 	return 1;
 }
@@ -20,5 +22,5 @@
 void
 unlock(Lock *lk)
 {
-	lk->key = 0;
+	lk->val = 0;
 }
--- a/sys/src/ape/lib/ap/alpha/lock.c
+++ b/sys/src/ape/lib/ap/alpha/lock.c
@@ -2,10 +2,12 @@
 #include "../plan9/sys9.h"
 #include <lock.h>
 
+int	tas(int*);
+
 void
 lock(Lock *lk)
 {
-	while(tas((int*)&lk->key))
+	while(tas(&lk->val))
 		_SLEEP(0);
 }
 
@@ -12,7 +14,7 @@
 int
 canlock(Lock *lk)
 {
-	if(tas((int*)&lk->key))
+	if(tas(&lk->val))
 		return 0;
 	return 1;
 }
@@ -20,5 +22,5 @@
 void
 unlock(Lock *lk)
 {
-	lk->key = 0;
+	lk->val = 0;
 }
--- a/sys/src/ape/lib/ap/arm/atom.s
+++ /dev/null
@@ -1,58 +1,0 @@
-#define	CLREX		WORD	$0xf57ff01f
-#define	LDREX(a,r)	WORD	$(0xe<<28|0x01900f9f | (a)<<16 | (r)<<12)
-/* `The order of operands is from left to right in dataflow order' - asm man */
-#define	STREX(v,a,r)	WORD	$(0xe<<28|0x01800f90 | (a)<<16 | (r)<<12 | (v)<<0)
-
-/*
- * int cas(ulong *p, ulong ov, ulong nv);
- */
-
-TEXT	cas+0(SB),0,$0		/* r0 holds p */
-TEXT	casp+0(SB),0,$0		/* r0 holds p */
-	MOVW	ov+4(FP), R1
-	MOVW	nv+8(FP), R2
-spincas:
-	LDREX(0,3)	/*	LDREX	0(R0),R3	*/
-	CMP.S	R3, R1
-	BNE	fail
-	STREX(2,0,4)	/*	STREX	0(R0),R2,R4	*/
-	CMP.S	$0, R4
-	BNE	spincas
-	MOVW	$1, R0
-	RET
-fail:
-	CLREX
-	MOVW	$0, R0
-	RET
-
-TEXT _xinc(SB), $0	/* void	_xinc(long *); */
-TEXT ainc(SB), $0	/* long ainc(long *); */
-spinainc:
-	LDREX(0,3)	/*	LDREX	0(R0),R3	*/
-	ADD	$1,R3
-	STREX(3,0,4)	/*	STREX	0(R0),R3,R4	*/
-	CMP.S	$0, R4
-	BNE	spinainc
-	MOVW	R3, R0
-	RET
-
-TEXT _xdec(SB), $0	/* long _xdec(long *); */
-TEXT adec(SB), $0	/* long adec(long *); */
-spinadec:
-	LDREX(0,3)	/*	LDREX	0(R0),R3	*/
-	SUB	$1,R3
-	STREX(3,0,4)	/*	STREX	0(R0),R3,R4	*/
-	CMP.S	$0, R4
-	BNE	spinadec
-	MOVW	R3, R0
-	RET
-
-TEXT loadlinked(SB), $0	/* long loadlinked(long *); */
-	LDREX(0,0)	/*	LDREX	0(R0),R0	*/
-	RET
-
-TEXT storecond(SB), $0	/* int storecond(long *, long); */
-	MOVW	ov+4(FP), R3
-	STREX(3,0,0)	/*	STREX	0(R0),R3,R0	*/
-	RSB	$1, R0
-	RET
--- a/sys/src/ape/lib/ap/arm/lock.c
+++ b/sys/src/ape/lib/ap/arm/lock.c
@@ -1,35 +1,26 @@
-#include "../plan9/lib.h"
-#include "../plan9/sys9.h"
 #define _LOCK_EXTENSION
+#include "../plan9/sys9.h"
 #include <lock.h>
 
+int	tas(int*);
+
 void
-lock(Lock *l)
+lock(Lock *lk)
 {
-	if(ainc(&l->key) == 1)
-		return;	/* changed from 0 -> 1: we hold lock */
-	/* otherwise wait in kernel */
-	while(_SEMACQUIRE(&l->sem, 1) < 0){
-		/* interrupted; try again */
-	}
+	while(tas(&lk->val))
+		_SLEEP(0);
 }
 
-void
-unlock(Lock *l)
+int
+canlock(Lock *lk)
 {
-	if(adec(&l->key) == 0)
-		return;	/* changed from 1 -> 0: no contention */
-	_SEMRELEASE(&l->sem, 1);
+	if(tas(&lk->val))
+		return 0;
+	return 1;
 }
 
-int
-canlock(Lock *l)
+void
+unlock(Lock *lk)
 {
-	if(ainc(&l->key) == 1)
-		return 1;	/* changed from 0 -> 1: success */
-	/* Undo increment (but don't miss wakeup) */
-	if(adec(&l->key) == 0)
-		return 0;	/* changed from 1 -> 0: no contention */
-	_SEMRELEASE(&l->sem, 1);
-	return 0;
+	lk->val = 0;
 }
--- a/sys/src/ape/lib/ap/arm/mkfile
+++ b/sys/src/ape/lib/ap/arm/mkfile
@@ -2,7 +2,6 @@
 <$APE/config
 LIB=/$objtype/lib/ape/libap.a
 OFILES=\
-	atom.$O\
 	cycles.$O\
 	div.$O\
 	getfcr.$O\
--- a/sys/src/ape/lib/ap/mips/atom.s
+++ /dev/null
@@ -1,52 +1,0 @@
-/*
- *	R4000 user-level atomic operations
- */
-
-#define	LL(base, rt)	WORD	$((060<<26)|((base)<<21)|((rt)<<16))
-#define	SC(base, rt)	WORD	$((070<<26)|((base)<<21)|((rt)<<16))
-#define	NOOP		WORD	$0x27
-
-TEXT ainc(SB), 1, $-4			/* long ainc(long *); */
-TEXT _xinc(SB), 1, $-4			/* void _xinc(long *); */
-	MOVW	R1, R2			/* address of counter */
-loop:	MOVW	$1, R3
-	LL(2, 1)
-	NOOP
-	ADDU	R1, R3
-	MOVW	R3, R1			/* return new value */
-	SC(2, 3)
-	NOOP
-	BEQ	R3,loop
-	RET
-
-TEXT adec(SB), 1, $-4			/* long adec(long*); */
-TEXT _xdec(SB), 1, $-4			/* long _xdec(long *); */
-	MOVW	R1, R2			/* address of counter */
-loop1:	MOVW	$-1, R3
-	LL(2, 1)
-	NOOP
-	ADDU	R1, R3
-	MOVW	R3, R1			/* return new value */
-	SC(2, 3)
-	NOOP
-	BEQ	R3,loop1
-	RET
-
-/*
- * int cas(uint* p, int ov, int nv);
- */
-TEXT cas(SB), 1, $-4
-	MOVW	ov+4(FP), R2
-	MOVW	nv+8(FP), R3
-spincas:
-	LL(1, 4)			/* R4 = *R1 */
-	NOOP
-	BNE	R2, R4, fail
-	SC(1, 3)			/* *R1 = R3 */
-	NOOP
-	BEQ	R3, spincas		/* R3 == 0 means store failed */
-	MOVW	$1, R1
-	RET
-fail:
-	MOVW	$0, R1
-	RET
--- a/sys/src/ape/lib/ap/mips/lock.c
+++ b/sys/src/ape/lib/ap/mips/lock.c
@@ -1,35 +1,171 @@
-#include "../plan9/lib.h"
-#include "../plan9/sys9.h"
 #define _LOCK_EXTENSION
+#include <stdlib.h>
+#include <string.h>
+#include "../plan9/sys9.h"
 #include <lock.h>
 
-void
-lock(Lock *l)
+enum
 {
-	if(ainc(&l->key) == 1)
-		return;	/* changed from 0 -> 1: we hold lock */
-	/* otherwise wait in kernel */
-	while(_SEMACQUIRE(&l->sem, 1) < 0){
-		/* interrupted; try again */
+	Pagesize	= 4096,
+	Semperpg	= Pagesize/(16*sizeof(unsigned int)),
+	Lockaddr	= 0x60000000,
+
+	POWER		= 0x320,
+	MAGNUM		= 0x330,
+	MAGNUMII	= 0x340,
+	R4K		= 0x500,
+};
+
+static	int arch;
+extern	int C_3ktas(int*);
+extern	int C_4ktas(int*);
+extern	int C_fcr0(void);
+
+static void
+lockinit(void)
+{
+	int n;
+
+	if(arch != 0)
+		return;	/* allow multiple calls */
+	arch = C_fcr0();
+	switch(arch) {
+	case POWER:
+		if(_SEGATTACH(0,  "lock", (void*)Lockaddr, Pagesize) == (void*)-1) {
+			arch = MAGNUM;
+			break;
+		}
+		memset((void*)Lockaddr, 0, Pagesize);
+		break;
+	case MAGNUM:
+	case MAGNUMII:
+	case R4K:
+		break;
+	default:
+		abort();
 	}
+	
 }
 
 void
-unlock(Lock *l)
+lock(Lock *lk)
 {
-	if(adec(&l->key) == 0)
-		return;	/* changed from 1 -> 0: no contention */
-	_SEMRELEASE(&l->sem, 1);
+	int *hwsem;
+	int hash;
+
+retry:
+	switch(arch) {
+	case 0:
+		lockinit();
+		goto retry;
+	case MAGNUM:
+	case MAGNUMII:
+		while(C_3ktas(&lk->val))
+			_SLEEP(0);
+		return;
+	case R4K:
+		for(;;){
+			while(lk->val)
+				;
+			if(C_4ktas(&lk->val) == 0)
+				return;
+		}
+		break;
+	case POWER:
+		/* Use low order lock bits to generate hash */
+		hash = ((int)lk/sizeof(int)) & (Semperpg-1);
+		hwsem = (int*)Lockaddr+hash;
+
+		for(;;) {
+			if((*hwsem & 1) == 0) {
+				if(lk->val)
+					*hwsem = 0;
+				else {
+					lk->val = 1;
+					*hwsem = 0;
+					return;
+				}
+			}
+			while(lk->val)
+				;
+		}
+	}	
 }
 
 int
-canlock(Lock *l)
+canlock(Lock *lk)
 {
-	if(ainc(&l->key) == 1)
-		return 1;	/* changed from 0 -> 1: success */
-	/* Undo increment (but don't miss wakeup) */
-	if(adec(&l->key) == 0)
-		return 0;	/* changed from 1 -> 0: no contention */
-	_SEMRELEASE(&l->sem, 1);
-	return 0;
+	int *hwsem;
+	int hash;
+
+retry:
+	switch(arch) {
+	case 0:
+		lockinit();
+		goto retry;
+	case MAGNUM:
+	case MAGNUMII:
+		if(C_3ktas(&lk->val))
+			return 0;
+		return 1;
+	case R4K:
+		if(C_4ktas(&lk->val))
+			return 0;
+		return 1;
+	case POWER:
+		/* Use low order lock bits to generate hash */
+		hash = ((int)lk/sizeof(int)) & (Semperpg-1);
+		hwsem = (int*)Lockaddr+hash;
+
+		if((*hwsem & 1) == 0) {
+			if(lk->val)
+				*hwsem = 0;
+			else {
+				lk->val = 1;
+				*hwsem = 0;
+				return 1;
+			}
+		}
+		return 0;
+	}	
+}
+
+void
+unlock(Lock *lk)
+{
+	lk->val = 0;
+}
+
+int
+tas(int *p)
+{
+	int *hwsem;
+	int hash;
+
+retry:
+	switch(arch) {
+	case 0:
+		lockinit();
+		goto retry;
+	case MAGNUM:
+	case MAGNUMII:
+		return C_3ktas(p);
+	case R4K:
+		return C_4ktas(p);
+	case POWER:
+		/* Use low order lock bits to generate hash */
+		hash = ((int)p/sizeof(int)) & (Semperpg-1);
+		hwsem = (int*)Lockaddr+hash;
+
+		if((*hwsem & 1) == 0) {
+			if(*p)
+				*hwsem = 0;
+			else {
+				*p = 1;
+				*hwsem = 0;
+				return 0;
+			}
+		}
+		return 1;
+	}	
 }
--- a/sys/src/ape/lib/ap/mips/mkfile
+++ b/sys/src/ape/lib/ap/mips/mkfile
@@ -2,7 +2,6 @@
 <$APE/config
 LIB=/$objtype/lib/ape/libap.a
 OFILES=\
-	atom.$O\
 	cycles.$O\
 	getfcr.$O\
 	lock.$O\
--- a/sys/src/ape/lib/ap/mips/tas.s
+++ b/sys/src/ape/lib/ap/mips/tas.s
@@ -17,7 +17,6 @@
 	BLTZ	R1, btas
 	RET
 
-	TEXT	tas(SB),$0
 	TEXT	C_4ktas(SB), $0
 	MOVW	R1, R2		/* address of key */
 tas1:
--- a/sys/src/ape/lib/ap/plan9/sys9.h
+++ b/sys/src/ape/lib/ap/plan9/sys9.h
@@ -106,11 +106,8 @@
 extern	int	_SEGFLUSH(void*, unsigned long);
 extern	int	_SEGFREE(void*, unsigned long);
 extern	long long	_SEEK(int, long long, int);
-extern	int	_SEMACQUIRE(long*, int);
-extern	long	_SEMRELEASE(long*, long);
 extern	int	_SLEEP(long);
 extern	int	_STAT(const char*, unsigned char*, int);
-extern	int	_TSEMACQUIRE(long*, unsigned long);
 extern	Waitmsg*	_WAIT(void);
 extern	long	_WRITE(int, const void*, long);
 extern	int	_WSTAT(const char*, unsigned char*, int);
@@ -122,9 +119,3 @@
 extern	int	__link(char *, int);
 extern	int	__stat(char *, struct stat *);
 extern	int	__unlink(char *);
-
-/*
- * atomic
- */
-extern long	ainc(long*);
-extern long	adec(long*);
--- a/sys/src/ape/lib/ap/power/atom.s
+++ /dev/null
@@ -1,63 +1,0 @@
-TEXT	_xinc(SB),$0	/* void _xinc(long *); */
-TEXT	ainc(SB),$0	/* long ainc(long *); */
-	MOVW	R3, R4
-xincloop:
-	LWAR	(R4), R3
-	ADD	$1, R3
-	DCBT	(R4)				/* fix 405 errata cpu_210 */
-	STWCCC	R3, (R4)
-	BNE	xincloop
-	RETURN
-
-TEXT	_xdec(SB),$0	/* long _xdec(long *); */
-TEXT	adec(SB),$0	/* long adec(long *); */
-	MOVW	R3, R4
-xdecloop:
-	LWAR	(R4), R3
-	ADD	$-1, R3
-	DCBT	(R4)				/* fix 405 errata cpu_210 */
-	STWCCC	R3, (R4)
-	BNE	xdecloop
-	RETURN
-
-TEXT	loadlink(SB), $0
-
-	LWAR	(R3), R3
-	RETURN
-
-TEXT	storecond(SB), $0
-
-	MOVW	val+4(FP), R4
-	DCBT	(R3)				/* fix 405 errata cpu_210 */
-	STWCCC	R4, (R3)
-	BNE	storecondfail
-	MOVW	$1, R3
-	RETURN
-storecondfail:
-	MOVW	$0, R3
-	RETURN
-
-/*
- * int cas(uint *p, int ov, int nv);
- * int casp(void **p, void *ov, void *nv);
- */
-
-TEXT	cas+0(SB),0,$0
-TEXT	casp+0(SB),0,$0
-	MOVW	ov+4(FP),R4
-	MOVW	nv+8(FP),R8
-	LWAR	(R3),R5
-	CMP	R5,R4
-	BNE	fail
-	DCBT	(R3)				/* fix 405 errata cpu_210 */
-	STWCCC	R8,(R3)
-	BNE	fail1
-	MOVW	$1,R3
-	RETURN
-fail:
-	DCBT	(R3)				/* fix 405 errata cpu_210 */
-	STWCCC	R5,(R3)	/* give up exclusive access */
-fail1:
-	MOVW	R0,R3
-	RETURN
-	END
--- a/sys/src/ape/lib/ap/power/lock.c
+++ b/sys/src/ape/lib/ap/power/lock.c
@@ -3,33 +3,43 @@
 #define _LOCK_EXTENSION
 #include <lock.h>
 
+int	tas(int*);
+
 void
-lock(Lock *l)
+lock(Lock *lk)
 {
-	if(ainc(&l->key) == 1)
-		return;	/* changed from 0 -> 1: we hold lock */
-	/* otherwise wait in kernel */
-	while(_SEMACQUIRE(&l->sem, 1) < 0){
-		/* interrupted; try again */
+	int i;
+
+	/* once fast */
+	if(!tas(&lk->val))
+		return;
+	/* a thousand times pretty fast */
+	for(i=0; i<1000; i++){
+		if(!tas(&lk->val))
+			return;
+		_SLEEP(0);
 	}
+	/* now nice and slow */
+	for(i=0; i<1000; i++){
+		if(!tas(&lk->val))
+			return;
+		_SLEEP(100);
+	}
+	/* take your time */
+	while(tas(&lk->val))
+		_SLEEP(1000);
 }
 
-void
-unlock(Lock *l)
+int
+canlock(Lock *lk)
 {
-	if(adec(&l->key) == 0)
-		return;	/* changed from 1 -> 0: no contention */
-	_SEMRELEASE(&l->sem, 1);
+	if(tas(&lk->val))
+		return 0;
+	return 1;
 }
 
-int
-canlock(Lock *l)
+void
+unlock(Lock *lk)
 {
-	if(ainc(&l->key) == 1)
-		return 1;	/* changed from 0 -> 1: success */
-	/* Undo increment (but don't miss wakeup) */
-	if(adec(&l->key) == 0)
-		return 0;	/* changed from 1 -> 0: no contention */
-	_SEMRELEASE(&l->sem, 1);
-	return 0;
+	lk->val = 0;
 }
--- a/sys/src/ape/lib/ap/power/mkfile
+++ b/sys/src/ape/lib/ap/power/mkfile
@@ -2,7 +2,6 @@
 <$APE/config
 LIB=/$objtype/lib/ape/libap.a
 OFILES=\
-	atom.$O\
 	cycles.$O\
 	getfcr.$O\
 	lock.$O\
--- a/sys/src/ape/lib/ap/sparc/lock.c
+++ b/sys/src/ape/lib/ap/sparc/lock.c
@@ -2,10 +2,12 @@
 #include "../plan9/sys9.h"
 #include <lock.h>
 
+int	tas(int*);
+
 void
 lock(Lock *lk)
 {
-	while(tas((int*)&lk->key))
+	while(tas(&lk->val))
 		_SLEEP(0);
 }
 
@@ -12,7 +14,7 @@
 int
 canlock(Lock *lk)
 {
-	if(tas((int*)&lk->key))
+	if(tas(&lk->val))
 		return 0;
 	return 1;
 }
@@ -20,5 +22,5 @@
 void
 unlock(Lock *lk)
 {
-	lk->key = 0;
+	lk->val = 0;
 }
--- /dev/null
+++ b/sys/src/libc/68000/cycles.c
@@ -1,0 +1,7 @@
+#include <u.h>
+#include <libc.h>
+
+void	cycles(uvlong*u)
+{
+	*u = 0LL;
+}
--- a/sys/src/libc/68000/mkfile
+++ b/sys/src/libc/68000/mkfile
@@ -24,6 +24,7 @@
 	strlen.s\
 
 CFILES=\
+	cycles.c\
 	notejmp.c\
 	vlrt.c\
 
--- a/sys/src/libc/68020/lock.c
+++ /dev/null
@@ -1,41 +1,0 @@
-#include <u.h>
-#include <libc.h>
-
-void
-lock(Lock *lk)
-{
-	int i;
-
-	/* once fast */
-	if(!_tas((int*)&lk->key))
-		return;
-	/* a thousand times pretty fast */
-	for(i=0; i<1000; i++){
-		if(!_tas((int*)&lk->key))
-			return;
-		sleep(0);
-	}
-	/* now nice and slow */
-	for(i=0; i<1000; i++){
-		if(!_tas((int*)&lk->key))
-			return;
-		sleep(100);
-	}
-	/* take your time */
-	while(_tas((int*)&lk->key))
-		sleep(1000);
-}
-
-int
-canlock(Lock *lk)
-{
-	if(_tas((int*)&lk->key))
-		return 0;
-	return 1;
-}
-
-void
-unlock(Lock *lk)
-{
-	lk->key = 0;
-}
--- a/sys/src/libc/68020/mkfile
+++ b/sys/src/libc/68020/mkfile
@@ -26,7 +26,7 @@
 	vlop.s\
 
 CFILES=\
-	lock.c\
+	cycles.c\
 	notejmp.c\
 	vlrt.c\
 
--- /dev/null
+++ b/sys/src/libc/alpha/cycles.c
@@ -1,0 +1,7 @@
+#include <u.h>
+#include <libc.h>
+
+void	cycles(uvlong*u)
+{
+	*u = 0LL;
+}
--- a/sys/src/libc/alpha/lock.c
+++ /dev/null
@@ -1,41 +1,0 @@
-#include <u.h>
-#include <libc.h>
-
-void
-lock(Lock *lk)
-{
-	int i;
-
-	/* once fast */
-	if(!_tas((int*)&lk->key))
-		return;
-	/* a thousand times pretty fast */
-	for(i=0; i<1000; i++){
-		if(!_tas((int*)&lk->key))
-			return;
-		sleep(0);
-	}
-	/* now nice and slow */
-	for(i=0; i<1000; i++){
-		if(!_tas((int*)&lk->key))
-			return;
-		sleep(100);
-	}
-	/* take your time */
-	while(_tas((int*)&lk->key))
-		sleep(1000);
-}
-
-int
-canlock(Lock *lk)
-{
-	if(_tas((int*)&lk->key))
-		return 0;
-	return 1;
-}
-
-void
-unlock(Lock *lk)
-{
-	lk->key = 0;
-}
--- a/sys/src/libc/alpha/mkfile
+++ b/sys/src/libc/alpha/mkfile
@@ -18,7 +18,7 @@
 
 CFILES=\
 	_seek.c\
-	lock.c\
+	cycles.c\
 	notejmp.c\
 
 HFILES=/sys/include/libc.h
--- /dev/null
+++ b/sys/src/libc/arm/cycles.c
@@ -1,0 +1,10 @@
+#include <u.h>
+#include <libc.h>
+
+#pragma profile off
+
+void
+cycles(uvlong*u)
+{
+	*u = 0LL;
+}
--- a/sys/src/libc/arm/mkfile
+++ b/sys/src/libc/arm/mkfile
@@ -20,6 +20,7 @@
 	vlop.s\
 
 CFILES=\
+	cycles.c\
 	notejmp.c\
 	vlrt.c\
 
--- a/sys/src/libc/mips/atom.s
+++ b/sys/src/libc/mips/atom.s
@@ -12,8 +12,7 @@
 loop:	MOVW	$1, R3
 	LL(2, 1)
 	NOOP
-	ADDU	R1, R3
-	MOVW	R3, R1			/* return new value */
+	ADD	R1,R3,R3
 	SC(2, 3)
 	NOOP
 	BEQ	R3,loop
@@ -25,8 +24,8 @@
 loop1:	MOVW	$-1, R3
 	LL(2, 1)
 	NOOP
-	ADDU	R1, R3
-	MOVW	R3, R1			/* return new value */
+	ADD	R1,R3,R3
+	MOVW	R3, R1
 	SC(2, 3)
 	NOOP
 	BEQ	R3,loop1
@@ -49,4 +48,10 @@
 	RET
 fail:
 	MOVW	$0, R1
+	RET
+
+/* general-purpose abort */
+_trap:
+	MOVD	$0, R0
+	MOVD	0(R0), R0
 	RET
--- /dev/null
+++ b/sys/src/libc/mips/lock.c
@@ -1,0 +1,171 @@
+#include <u.h>
+#include <libc.h>
+
+enum
+{
+	Pagesize	= 4096,
+	Semperpg	= Pagesize/(16*sizeof(uint)),
+	Lockaddr	= 0x60000000,
+
+	POWER		= 0x320,
+	MAGNUM		= 0x330,
+	MAGNUMII	= 0x340,
+	R4K		= 0x500,
+};
+
+static	int arch;
+extern	int C_3ktas(int*);
+extern	int C_4ktas(int*);
+extern	int C_fcr0(void);
+
+static void
+lockinit(void)
+{
+	void *v;
+
+	if(arch != 0)
+		return;	/* allow multiple calls */
+	arch = C_fcr0();
+	switch(arch) {
+	case POWER:
+		v = (void*)Lockaddr;
+		if(segattach(SG_CEXEC, "lock", v, Pagesize) == (void*)-1) {
+			arch = MAGNUM;
+			break;
+		}
+		memset(v, 0, Pagesize);
+		break;
+	case MAGNUM:
+	case MAGNUMII:
+	case R4K:
+		break;
+	default:
+		arch = R4K;
+		break;
+	}
+}
+
+void
+lock(Lock *lk)
+{
+	int *hwsem;
+	int hash;
+
+retry:
+	switch(arch) {
+	case 0:
+		lockinit();
+		goto retry;
+	case MAGNUM:
+	case MAGNUMII:
+		while(C_3ktas(&lk->val))
+			sleep(0);
+		return;
+	case R4K:
+		for(;;){
+			while(lk->val)
+				;
+			if(C_4ktas(&lk->val) == 0)
+				return;
+		}
+		break;
+	case POWER:
+		/* Use low order lock bits to generate hash */
+		hash = ((int)lk/sizeof(int)) & (Semperpg-1);
+		hwsem = (int*)Lockaddr+hash;
+
+		for(;;) {
+			if((*hwsem & 1) == 0) {
+				if(lk->val)
+					*hwsem = 0;
+				else {
+					lk->val = 1;
+					*hwsem = 0;
+					return;
+				}
+			}
+			while(lk->val)
+				;
+		}
+	}
+}
+
+int
+canlock(Lock *lk)
+{
+	int *hwsem;
+	int hash;
+
+retry:
+	switch(arch) {
+	case 0:
+		lockinit();
+		goto retry;
+	case MAGNUM:
+	case MAGNUMII:
+		if(C_3ktas(&lk->val))
+			return 0;
+		return 1;
+	case R4K:
+		if(C_4ktas(&lk->val))
+			return 0;
+		return 1;
+	case POWER:
+		/* Use low order lock bits to generate hash */
+		hash = ((int)lk/sizeof(int)) & (Semperpg-1);
+		hwsem = (int*)Lockaddr+hash;
+
+		if((*hwsem & 1) == 0) {
+			if(lk->val)
+				*hwsem = 0;
+			else {
+				lk->val = 1;
+				*hwsem = 0;
+				return 1;
+			}
+		}
+		break;
+	}
+	return 0;
+}
+
+void
+unlock(Lock *lk)
+{
+	lk->val = 0;
+}
+
+int
+_tas(int *p)
+{
+	int *hwsem;
+	int hash;
+
+retry:
+	switch(arch) {
+	case 0:
+		lockinit();
+		goto retry;
+	case MAGNUM:
+	case MAGNUMII:
+		return C_3ktas(p);
+	case R4K:
+		return C_4ktas(p);
+	case POWER:
+		/* Use low order lock bits to generate hash */
+		hash = ((int)p/sizeof(int)) & (Semperpg-1);
+		hwsem = (int*)Lockaddr+hash;
+
+		if((*hwsem & 1) == 0) {
+			if(*p)
+				*hwsem = 0;
+			else {
+				*p = 1;
+				*hwsem = 0;
+				return 0;
+			}
+		}
+		break;
+	}
+	return 1;
+}
--- a/sys/src/libc/mips/mkfile
+++ b/sys/src/libc/mips/mkfile
@@ -23,6 +23,7 @@
 
 CFILES=\
 	cycles.c\
+	lock.c\
 	notejmp.c\
 	sqrt.c\
 	vlrt.c\
--- a/sys/src/libc/mips/tas.s
+++ b/sys/src/libc/mips/tas.s
@@ -17,7 +17,6 @@
 	BLTZ	R1, btas
 	RET
 
-	TEXT	_tas(SB),$0
 	TEXT	C_4ktas(SB), $0
 	MOVW	R1, R2		/* address of key */
 tas1:
--- a/sys/src/libc/mkfile
+++ b/sys/src/libc/mkfile
@@ -37,7 +37,7 @@
 		cd $i
 		mk $MKFLAGS update
 	}
-	update $UPDATEFLAGS /$objtype/lib/libc.a
+	update $UPDATEFLAGS /386/lib/libc.a
 
 installall:V:
 	for(objtype in $CPUS) mk $MKFLAGS install
--- a/sys/src/libc/port/cycles.c
+++ /dev/null
@@ -1,7 +1,0 @@
-#include <u.h>
-#include <libc.h>
-
-void	cycles(uvlong*u)
-{
-	*u = 0LL;
-}
--- a/sys/src/libc/port/lock.c
+++ b/sys/src/libc/port/lock.c
@@ -2,32 +2,40 @@
 #include <libc.h>
 
 void
-lock(Lock *l)
+lock(Lock *lk)
 {
-	if(ainc(&l->key) == 1)
-		return;	/* changed from 0 -> 1: we hold lock */
-	/* otherwise wait in kernel */
-	while(semacquire(&l->sem, 1) < 0){
-		/* interrupted; try again */
+	int i;
+
+	/* once fast */
+	if(!_tas(&lk->val))
+		return;
+	/* a thousand times pretty fast */
+	for(i=0; i<1000; i++){
+		if(!_tas(&lk->val))
+			return;
+		sleep(0);
 	}
+	/* now nice and slow */
+	for(i=0; i<1000; i++){
+		if(!_tas(&lk->val))
+			return;
+		sleep(100);
+	}
+	/* take your time */
+	while(_tas(&lk->val))
+		sleep(1000);
 }
 
-void
-unlock(Lock *l)
+int
+canlock(Lock *lk)
 {
-	if(adec(&l->key) == 0)
-		return;	/* changed from 1 -> 0: no contention */
-	semrelease(&l->sem, 1);
+	if(_tas(&lk->val))
+		return 0;
+	return 1;
 }
 
-int
-canlock(Lock *l)
+void
+unlock(Lock *lk)
 {
-	if(ainc(&l->key) == 1)
-		return 1;	/* changed from 0 -> 1: success */
-	/* Undo increment (but don't miss wakeup) */
-	if(adec(&l->key) == 0)
-		return 0;	/* changed from 1 -> 0: no contention */
-	semrelease(&l->sem, 1);
-	return 0;
+	lk->val = 0;
 }
--- a/sys/src/libc/port/malloc.acid
+++ b/sys/src/libc/port/malloc.acid
@@ -122,18 +122,16 @@
 Profkernel = 2;
 Proftime = 3;
 Profsample = 4;
-sizeofLock = 8;
+sizeofLock = 4;
 aggr Lock
 {
-	'D' 0 key;
-	'D' 4 sem;
+	'D' 0 val;
 };
 
 defn
 Lock(addr) {
 	complex Lock addr;
-	print("	key	", addr.key, "\n");
-	print("	sem	", addr.sem, "\n");
+	print("	val	", addr.val, "\n");
 };
 
 sizeofQLp = 12;
@@ -152,13 +150,13 @@
 	print("	state	", addr.state, "\n");
 };
 
-sizeofQLock = 20;
+sizeofQLock = 16;
 aggr QLock
 {
 	Lock 0 lock;
-	'D' 8 locked;
-	'A' QLp 12 $head;
-	'A' QLp 16 $tail;
+	'D' 4 locked;
+	'A' QLp 8 $head;
+	'A' QLp 12 $tail;
 };
 
 defn
@@ -172,14 +170,14 @@
 	print("	$tail	", addr.$tail\X, "\n");
 };
 
-sizeofRWLock = 24;
+sizeofRWLock = 20;
 aggr RWLock
 {
 	Lock 0 lock;
-	'D' 8 readers;
-	'D' 12 writer;
-	'A' QLp 16 $head;
-	'A' QLp 20 $tail;
+	'D' 4 readers;
+	'D' 8 writer;
+	'A' QLp 12 $head;
+	'A' QLp 16 $tail;
 };
 
 defn
@@ -440,12 +438,12 @@
 };
 
 complex Tos _tos;
-sizeofPrivate = 16;
+sizeofPrivate = 12;
 aggr Private
 {
 	Lock 0 lk;
-	'D' 8 pid;
-	'D' 12 printfd;
+	'D' 4 pid;
+	'D' 8 printfd;
 };
 
 defn
--- a/sys/src/libc/port/mkfile
+++ b/sys/src/libc/port/mkfile
@@ -19,7 +19,6 @@
 	cleanname.c\
 	crypt.c\
 	ctype.c\
-	cycles.c\
 	encodefmt.c\
 	execl.c\
 	exp.c\
--- a/sys/src/libc/port/pool.acid
+++ b/sys/src/libc/port/pool.acid
@@ -122,18 +122,16 @@
 Profkernel = 2;
 Proftime = 3;
 Profsample = 4;
-sizeofLock = 8;
+sizeofLock = 4;
 aggr Lock
 {
-	'D' 0 key;
-	'D' 4 sem;
+	'D' 0 val;
 };
 
 defn
 Lock(addr) {
 	complex Lock addr;
-	print("	key	", addr.key, "\n");
-	print("	sem	", addr.sem, "\n");
+	print("	val	", addr.val, "\n");
 };
 
 sizeofQLp = 12;
@@ -152,13 +150,13 @@
 	print("	state	", addr.state, "\n");
 };
 
-sizeofQLock = 20;
+sizeofQLock = 16;
 aggr QLock
 {
 	Lock 0 lock;
-	'D' 8 locked;
-	'A' QLp 12 $head;
-	'A' QLp 16 $tail;
+	'D' 4 locked;
+	'A' QLp 8 $head;
+	'A' QLp 12 $tail;
 };
 
 defn
@@ -172,14 +170,14 @@
 	print("	$tail	", addr.$tail\X, "\n");
 };
 
-sizeofRWLock = 24;
+sizeofRWLock = 20;
 aggr RWLock
 {
 	Lock 0 lock;
-	'D' 8 readers;
-	'D' 12 writer;
-	'A' QLp 16 $head;
-	'A' QLp 20 $tail;
+	'D' 4 readers;
+	'D' 8 writer;
+	'A' QLp 12 $head;
+	'A' QLp 16 $tail;
 };
 
 defn
@@ -506,20 +504,34 @@
 complex Free checklist:t;
 complex Free checklist:q;
 complex Free checktree:t;
+complex Free ltreewalk:t;
+complex Free ltreewalk:f;
+complex Free treeinsert:tree;
+complex Free treeinsert:node;
+complex Free treeinsert:loc;
+complex Free treeinsert:repl;
+complex Free treedelete:tree;
+complex Free treedelete:node;
+complex Free treedelete:loc;
+complex Free treedelete:lsucc;
+complex Free treedelete:succ;
 complex Free treelookupgt:t;
 complex Free treelookupgt:lastgood;
-complex Free treesplay:t;
-complex Free treesplay:N;
-complex Free treesplay:l;
-complex Free treesplay:r;
-complex Free treesplay:y;
+complex Free listadd:list;
+complex Free listadd:node;
+complex Free listdelete:list;
+complex Free listdelete:node;
 complex Pool pooladd:p;
 complex Alloc pooladd:anode;
+complex Free pooladd:lst;
+complex Free pooladd:olst;
 complex Free pooladd:node;
-complex Free pooladd:root;
+complex Free pooladd:parent;
 complex Pool pooldel:p;
 complex Free pooldel:node;
-complex Free pooldel:root;
+complex Free pooldel:lst;
+complex Free pooldel:olst;
+complex Free pooldel:parent;
 complex Pool dsize2bsize:p;
 complex Pool bsize2asize:p;
 complex Pool blockmerge:pool;
--- a/sys/src/libc/sparc/lock.c
+++ /dev/null
@@ -1,41 +1,0 @@
-#include <u.h>
-#include <libc.h>
-
-void
-lock(Lock *lk)
-{
-	int i;
-
-	/* once fast */
-	if(!_tas((int*)&lk->key))
-		return;
-	/* a thousand times pretty fast */
-	for(i=0; i<1000; i++){
-		if(!_tas((int*)&lk->key))
-			return;
-		sleep(0);
-	}
-	/* now nice and slow */
-	for(i=0; i<1000; i++){
-		if(!_tas((int*)&lk->key))
-			return;
-		sleep(100);
-	}
-	/* take your time */
-	while(_tas((int*)&lk->key))
-		sleep(1000);
-}
-
-int
-canlock(Lock *lk)
-{
-	if(_tas((int*)&lk->key))
-		return 0;
-	return 1;
-}
-
-void
-unlock(Lock *lk)
-{
-	lk->key = 0;
-}
--- a/sys/src/libc/sparc/mkfile
+++ b/sys/src/libc/sparc/mkfile
@@ -22,7 +22,7 @@
 	vlop.s
 
 CFILES=\
-	lock.c\
+	cycles.c\
 	notejmp.c\
 	sqrt.c\
 	vlrt.c\
--