shithub: mc

--- /dev/null

+++ b/lib/thread/atomic-impl+plan9-x64.s

@@ -1,0 +1,71 @@

+TEXT thread$xget32+0(SB),1,$0

+	MOVL	(DI), AX

+	RET

+TEXT thread$xget64+0(SB),1,$0

+	MOVQ	(DI), AX

+	RET

+TEXT thread$xgetp+0(SB),1,$0

+	MOVQ	(DI), AX

+	RET

+TEXT thread$xset32+0(SB),1,$0

+	MOVL	SI, (DI)

+	RET

+TEXT thread$xset64+0(SB),1,$0

+	MOVQ	SI, (DI)

+	RET

+TEXT thread$xsetp+0(SB),1,$0

+	MOVQ	SI, (DI)

+	RET

+TEXT thread$xadd32+0(SB),1,$0

+	LOCK; XADDL	SI, (DI)

+	MOVL	SI, AX

+	RET

+TEXT thread$xadd64+0(SB),1,$0

+	LOCK; XADDQ	SI, (DI)

+	MOVQ	SI, AX

+	RET

+TEXT thread$xaddp+0(SB),1,$0

+	LOCK; XADDQ	SI, (DI)

+	MOVQ	SI, AX

+	RET

+TEXT thread$xsub32+0(SB),1,$0

+	LOCK; XSUBL	SI, (DI)

+	MOVL	SI, AX

+	RET

+TEXT thread$xsub64+0(SB),1,$0

+	LOCK; XSUBQ	SI, (DI)

+	MOVQ	SI, AX

+	RET

+TEXT thread$xsubp+0(SB),1,$0

+	LOCK; XSUBQ	SI, (DI)

+	MOVQ	SI, AX

+	RET

+TEXT thread$xcas32+0(SB),1,$0

+	MOVL	SI, AX

+	LOCK; CMPXCHGL	DX, (DI)

+	RET

+TEXT thread$xcas64+0(SB),1,$0

+	MOVQ	SI, AX

+	LOCK; CMPXCHGQ	DX, (DI)

+	RET

+TEXT thread$xcasp+0(SB),1,$0

+	MOVQ	SI, AX

+	LOCK; CMPXCHGQ	DX, (DI)

+	RET

+TEXT thread$xchg32+0(SB),1,$0

+	MOVL	SI, AX

+	LOCK; XCHGL	(DI), AX

+	RET

+TEXT thread$xchg64+0(SB),1,$0

+	MOVQ	SI, AX

+	LOCK; XCHGQ	(DI), AX

+	RET

+TEXT thread$xchgp+0(SB),1,$0

+	MOVQ	SI, AX

+	LOCK; XCHGQ	(DI), AX

+	RET

--- /dev/null

+++ b/lib/thread/atomic-impl+x64.s

@@ -1,0 +1,90 @@

+.globl thread$xget32

+.globl _thread$xget32

+thread$xget32:

+_thread$xget32:

+	movl	(%rdi), %eax

+	ret

+.globl thread$xget64

+.globl thread$xgetp

+.globl _thread$xget64

+.globl _thread$xgetp

+thread$xget64:

+thread$xgetp:

+_thread$xget64:

+_thread$xgetp:

+	movq	(%rdi), %rax

+	ret

+.globl thread$xset32

+.globl _thread$xset32

+thread$xset32:

+_thread$xset32:

+	movl	%esi, (%rdi)

+	ret

+.globl thread$xset64

+.globl thread$xsetp

+.globl _thread$xset64

+.globl _thread$xsetp

+thread$xset64:

+thread$xsetp:

+_thread$xset64:

+_thread$xsetp:

+	movq	%rsi, (%rdi)

+	ret

+.globl thread$xadd32

+.globl _thread$xadd32

+thread$xadd32:

+_thread$xadd32:

+	lock xaddl	%esi, (%rdi)

+	movl %esi,%eax

+	ret

+.globl thread$xadd64

+.globl thread$xaddp

+.globl _thread$xadd64

+.globl _thread$xaddp

+thread$xadd64:

+thread$xaddp:

+_thread$xadd64:

+_thread$xaddp:

+	lock xaddq	%rsi, (%rdi)

+	movq %rsi,%rax

+	ret

+.globl thread$xcas32

+.globl _thread$xcas32

+thread$xcas32:

+_thread$xcas32:

+	movl	%esi, %eax

+	lock cmpxchgl	%edx, (%rdi)

+	ret

+.globl thread$xcas64

+.globl thread$xcasp

+.globl _thread$xcas64

+.globl _thread$xcasp

+thread$xcas64:

+thread$xcasp:

+_thread$xcas64:

+_thread$xcasp:

+	movq	%rsi, %rax

+	lock cmpxchgq	%rdx, (%rdi)

+	ret

+.globl thread$xchg32

+.globl _thread$xchg32

+thread$xchg32:

+_thread$xchg32:

+	movl	%esi, %eax

+	lock xchgl	(%rdi), %eax

+	ret

+.globl thread$xchg64

+.globl thread$xchgp

+.globl _thread$xchg64

+.globl _thread$xchgp

+thread$xchg64:

+thread$xchgp:

+_thread$xchg64:

+_thread$xchgp:

+	movq	%rsi, %rax

+	lock xchgq	(%rdi), %rax

+	ret

--- /dev/null

+++ b/lib/thread/atomic.myr

@@ -1,0 +1,78 @@

+use std

+pkg thread =

+	trait atomic @a::(integral,numeric) =

+		xget	: (p : @a# -> @a)

+		xset	: (p : @a#, v : @a -> void)

+		xadd	: (p : @a#, v : @a -> @a)

+		xcas	: (p : @a#, old : @a, new : @a -> @a)

+		xchg	: (p : @a#, new : @a -> @a)

+	;;

+	impl atomic int32

+	impl atomic int64

+	impl atomic uint32

+	impl atomic uint64

+;;

+impl atomic int32 =

+	xget	= {p; -> xget32(p castto(uint32#)) castto(int32)}

+	xset	= {p, v; xset32(p castto(uint32#), v castto(uint32))}

+	xadd	= {p, v; -> xadd32(p castto(uint32#), v castto(uint32)) castto(int32)}

+	xcas	= {p, old, new; -> xcas32(p castto(uint32#), old castto(uint32), new castto(uint32)) castto(int32)}

+	xchg	= {p, v; -> xchg32(p castto(uint32#), v castto(uint32)) castto(int32)}

+;;

+impl atomic int64 =

+	xget	= {p; -> xget64(p castto(uint64#)) castto(int64)}

+	xset	= {p, v; xset64(p castto(uint64#), v castto(uint64))}

+	xadd	= {p, v; -> xadd64(p castto(uint64#), v castto(uint64)) castto(int64)}

+	xcas	= {p, old, new; -> xcas64(p castto(uint64#), old castto(uint64), new castto(uint64)) castto(int64)}

+	xchg	= {p, v; -> xchg64(p castto(uint64#), v castto(uint64)) castto(int64)}

+;;

+impl atomic uint32 =

+	xget	= {p; -> xget32(p)}

+	xset	= {p, v; xset32(p, v)}

+	xadd	= {p, v; -> xadd32(p, v)}

+	xcas	= {p, old, new; -> xcas32(p, old, new)}

+	xchg	= {p, v; -> xchg32(p, v)}

+;;

+impl atomic uint64 =

+	xget	= {p; -> xget64(p)}

+	xset	= {p, v; xset64(p, v)}

+	xadd	= {p, v; -> xadd64(p, v)}

+	xcas	= {p, old, new; -> xcas64(p, old, new)}

+	xchg	= {p, v; -> xchg64(p, v)}

+;;

+impl atomic std.intptr =

+	xget	= {p; -> xgetp(p)}

+	xset	= {p, v; xsetp(p, v)}

+	xadd	= {p, v; -> xaddp(p, v)}

+	xcas	= {p, old, new; -> xcasp(p, old, new)}

+	xchg	= {p, v; -> xchgp(p, v)}

+;;

+extern const xget32	: (p : uint32# -> uint32)

+extern const xget64	: (p : uint64# -> uint64)

+extern const xgetp	: (p : std.intptr# -> std.intptr)

+extern const xset32	: (p : uint32#, v : uint32 -> void)

+extern const xset64	: (p : uint64#, v : uint64 -> void)

+extern const xsetp	: (p : std.intptr#, v : std.intptr -> void)

+extern const xadd32	: (p : uint32#, v : uint32 -> uint32)

+extern const xadd64	: (p : uint64#, v : uint64 -> uint64)

+extern const xaddp	: (p : std.intptr#, v : std.intptr -> std.intptr)

+extern const xcas32	: (p : uint32#, old: uint32, new : uint32 -> uint32)

+extern const xcas64	: (p : uint64#, old: uint64, new : uint64 -> uint64)

+extern const xcasp	: (p : std.intptr#, old: std.intptr, new : std.intptr -> std.intptr)

+extern const xchg32	: (p : uint32#, v : uint32 -> uint32)

+extern const xchg64	: (p : uint64#, v : uint64 -> uint64)

+extern const xchgp	: (p : std.intptr#, v : std.intptr -> std.intptr)

--- /dev/null

+++ b/lib/thread/bld.proj

@@ -1,0 +1,31 @@

+lib thread =

+	common.myr

+	# linux impl of basic thread primitives

+	condvar+linux.myr

+	mutex+linux.myr

+	spawn+linux.myr

+	exit+linux-x64.s

+	# freebsd impl of thread primitives

+	condvar+freebsd.myr

+	mutex+freebsd.myr

+	spawn+freebsd.myr

+	exit+freebsd-x64.s

+	# osx impl of thread primitives

+	#condvar+osx.myr

+	mutex+osx.myr

+	spawn+osx.myr

+	start+osx-x64.s

+	# 9front impl of thread primitives

+	#condvar+plan9.myr

+	mutex+plan9.myr

+	spawn+plan9.myr

+	atomic-impl+plan9-x64.s

+	atomic-impl+x64.s

+	atomic.myr

+;;

--- /dev/null

+++ b/lib/thread/common.myr

@@ -1,0 +1,5 @@

+use std

+pkg thread =

+	generic Zptr = 0 castto(@a#)

+;;

--- /dev/null

+++ b/lib/thread/condvar+freebsd.myr

@@ -1,0 +1,59 @@

+use std

+use sys

+use "atomic.use"

+use "common.use"

+use "mutex.use"

+pkg thread =

+	type cond = struct

+		_mtx	: mutex#

+		_seq	: uint32

+	;;

+	const mkcond	: (mtx : mutex# -> cond)

+	const condwait	: (cond : cond# -> void)

+	const condsignal	: (cond : cond# -> void)

+	const condbroadcast	: (cond : cond# -> void)

+;;

+const mkcond = {mtx

+	-> [._mtx = mtx, ._seq = 0]

+}

+const condwait = {cond

+	var seq

+	var mtx

+	mtx = cond._mtx

+	seq = cond._seq

+	mtxunlock(mtx)

+	sys.umtx_op(&cond._seq castto(void#), \

+		sys.Umtxwaituintpriv, \

+		seq castto(uint64), \

+		Zptr, Zptr)

+	/*

+	We need to atomically set the mutex to contended. This allows us to

+	pass responsibility for waking up the potential other waiters on to the

+	unlocker of the mutex.

+	*/

+	while xchg(&mtx._state, Contended) != Unlocked

+		sys.umtx_op(&mtx._state castto(void#), \

+			sys.Umtxwaituintpriv, \

+			Contended castto(uint64), \

+			Zptr, Zptr)

+	;;

+}

+const condsignal = {cond : cond#

+	xadd(&cond._seq, 1)

+	sys.umtx_op(&cond._seq castto(void#), sys.Umtxwakepriv, 1, Zptr, Zptr)

+}

+const condbroadcast = {cond : cond#

+	xadd(&cond._seq, 1)

+	sys.umtx_op(&cond._seq castto(void#), sys.Umtxwakepriv, 0x7ffffff, Zptr, Zptr)

+}

--- /dev/null

+++ b/lib/thread/condvar+linux.myr

@@ -1,0 +1,61 @@

+use std

+use sys

+use "atomic.use"

+use "common.use"

+use "mutex.use"

+pkg thread =

+	type cond = struct

+		_mtx	: mutex#

+		_seq	: int32

+	;;

+	const mkcond	: (mtx : mutex# -> cond)

+	const condwait	: (cond : cond# -> void)

+	const condsignal	: (cond : cond# -> void)

+	const condbroadcast	: (cond : cond# -> void)

+;;

+const mkcond = {mtx

+	-> [._mtx = mtx, ._seq = 0]

+}

+const condwait = {cond

+	var seq

+	var mtx

+	mtx = cond._mtx

+	seq = cond._seq

+	mtxunlock(mtx)

+	sys.futex(&cond._seq, sys.Futexwait | sys.Futexpriv, seq, Zptr, Zptr, 0)

+	/*

+	We need to atomically set the mutex to contended. This allows us to

+	pass responsibility for waking up the potential other waiters on to the

+	unlocker of the mutex.

+	*/

+	while xchg(&mtx._state, Contended) != Unlocked

+		sys.futex(&mtx._state, sys.Futexwait | sys.Futexpriv, \

+			Contended, Zptr, Zptr, 0)

+	;;

+}

+const condsignal = {cond : cond#

+	xadd(&cond._seq, 1)

+	sys.futex(&cond._seq, sys.Futexwake | sys.Futexpriv, 1, Zptr, Zptr, 0)

+}

+const condbroadcast = {cond : cond#

+	xadd(&cond._seq, 1)

+	/*

+	The futex docs seem to be broken -- the timeout parameter seems to be

+	used for the number of threads to move, and is not ignored when

+	requeueing

+	*/

+	sys.futex(&cond._seq, sys.Futexcmprequeue | sys.Futexpriv, \

+		1, 0x7fffffff castto(sys.timespec#), \

+		&cond._mtx._state, cond._seq)

+}

--- /dev/null

+++ b/lib/thread/exit+freebsd-x64.s

@@ -1,0 +1,23 @@

+/*

+const thread.exit	: (stacksz : std.size -> void)

+NOTE: must be called from the bottom of the stack, since

+we assume that %rbp is in the top 4k of the stack.

+*/

+.globl thread$exit

+thread$exit:

+	/* find top of stack */

+	movq	%rbp,%rdi	/* addr */

+	andq	$~0xfff,%rdi	/* align it */

+	addq	$0x1000,%rdi

+	/* munmap(base, size) */

+	movq	$73,%rax	/* munmap */

+	movq	-8(%rdi),%rsi	/* size */

+	subq	%rsi,%rdi	/* move to base ptr */

+	syscall

+	/* thr_exit(null) */

+	movq	$431,%rax	/* exit */

+	xorq	%rdi,%rdi	/* 0 */

+	syscall

--- /dev/null

+++ b/lib/thread/exit+linux-x64.s

@@ -1,0 +1,23 @@

+/*

+const thread.exit	: (stacksz : std.size -> void)

+NOTE: must be called from the bottom of the stack, since

+we assume that %rbp is in the top 4k of the stack.

+*/

+.globl thread$exit

+thread$exit:

+	/* find top of stack */

+	movq	%rbp,%rdi	/* addr */

+	andq	$~0xfff,%rdi	/* align it */

+	addq	$0x1000,%rdi

+	/* munmap(base, size) */

+	movq	$11,%rax	/* munmap */

+	movq	-8(%rdi),%rsi	/* size */

+	subq	%rsi,%rdi	/* move to base ptr */

+	syscall

+	/* thread_exit(0) */

+	movq	$60,%rax	/* exit */

+	xorq	%rdi,%rdi	/* 0 */

+	syscall

--- /dev/null

+++ b/lib/thread/future.myr

@@ -1,0 +1,63 @@

+use std

+use "mutex.use"

+pkg thread =

+	type future(@a) = struct

+		mtx	: mutex

+		set	: bool

+		val	: @a

+	;;

+	generic mkfut	: (-> future(@a))

+	generic futset	: (fut : future(@a)#, val : @a -> bool)

+	generic futget	: (fut : future(@a)# -> @a)

+	generic futtryget	: (fut : future(@a)# -> std.option(@a))

+	generic futclear	: (fut : future(@a)# -> void)

+;;

+const Unset = 0

+const Waiting = 1

+const Set = 2

+generic mkfut = {

+	var fut

+	fut = [.mtx = mkmtx() ]

+	mtxlock(&fut.mtx)

+	-> fut

+}

+generic futset = {fut, val

+	if fut.set

+		-> false

+	;;

+	/* compiler doesn't reorder shit */

+	fut.val = val

+	fut.set = true

+	mtxunlock(&fut.mtx)

+	-> true

+}

+generic futtryget = {fut

+	var val

+	if !fut.set

+		-> `std.None

+	;;

+	mtxlock(&fut.mtx)

+	val = fut.val

+	mtxunlock(&fut.mtx)

+	-> `std.Some val

+}

+generic futget = {fut

+	var val

+	mtxlock(&fut.mtx)

+	val = fut.val

+	mtxunlock(&fut.mtx)

+	-> val

+}

--- /dev/null

+++ b/lib/thread/mutex+freebsd.myr

@@ -1,0 +1,80 @@

+use std

+use sys

+use "atomic.use"

+use "common.use"

+pkg thread =

+	type mutex = struct

+		_state	: uint32

+	;;

+	const mkmtx	: (-> mutex)

+	const mtxlock	: (mtx : mutex# -> void)

+	const mtxtrylock	: (mtx : mutex# -> bool)

+	const mtxunlock	: (mtx : mutex# -> void)

+	pkglocal const Unlocked = 0

+	pkglocal const Locked = 1

+	pkglocal const Contended = 2

+;;

+var nspin = 10	/* FIXME: pick a sane number, based on CPU count */

+const mkmtx = {

+	-> [._state = Unlocked]

+}

+const mtxlock = {mtx

+	var c

+	/*

+	Uncontended case: we get an unlocked mutex, and we lock it.

+	*/

+        c = Locked

+	for var i = 0; i < nspin; i++

+		c = xcas(&mtx._state, Unlocked, Locked)

+		if c == Unlocked

+			->

+		;;

+	;;

+	/*

+	Contended case: we set the lock state to Contended. This indicates that there

+	the lock is locked, and we potentially have threads waiting on it, which means

+	that we will need to wake them up.

+	*/

+	if c == Locked

+		c = xchg(&mtx._state, Contended)

+	;;

+	while c != Unlocked

+		sys.umtx_op( \

+			&mtx._state castto(void#), \

+			sys.Umtxwaituintpriv, \

+			Contended castto(uint64), \

+			Zptr, Zptr)

+		c = xchg(&mtx._state, Contended)

+	;;

+}

+const mtxtrylock = {mtx

+	-> xcas(&mtx._state, Unlocked, Locked) == Unlocked

+}

+const mtxunlock = {mtx

+	/*

+	Uncontended case: If the mutex state is not contended, and we still

+	are uncontended by the xchg() call, then it's safe to simply return;

+	nobody was waiting for us.

+	*/

+	if mtx._state == Contended

+		mtx._state = Unlocked

+	elif xchg(&mtx._state, Unlocked) == Locked

+		->

+	;;

+	/* wake all threads: for some reason nwake */

+	sys.umtx_op(&mtx._state castto(void#), sys.Umtxwakepriv, 1, Zptr, Zptr)

+}

--- /dev/null

+++ b/lib/thread/mutex+linux.myr

@@ -1,0 +1,76 @@

+use std

+use sys

+use "atomic.use"

+use "common.use"

+pkg thread =

+	type mutex = struct

+		_state	: int32

+	;;

+	const mkmtx	: (-> mutex)

+	const mtxlock	: (mtx : mutex# -> void)

+	const mtxtrylock	: (mtx : mutex# -> bool)

+	const mtxunlock	: (mtx : mutex# -> void)

+	pkglocal const Unlocked = 0

+	pkglocal const Locked = 1

+	pkglocal const Contended = 2

+;;

+var nspin = 10	/* FIXME: pick a sane number, based on CPU count */

+const mkmtx = {

+	-> [._state = Unlocked]

+}

+const mtxlock = {mtx

+	var c

+	/*

+	Uncontended case: we get an unlocked mutex, and we lock it.

+	*/

+        c = Locked

+	for var i = 0; i < nspin; i++

+		c = xcas(&mtx._state, Unlocked, Locked)

+		if c == Unlocked

+			-> void

+		;;

+	;;

+	/*

+	Contended case: we set the lock state to Contended. This indicates that there

+	the lock is locked, and we potentially have threads waiting on it, which means

+	that we will need to wake them up.

+	*/

+	if c == Locked

+		c = xchg(&mtx._state, Contended)

+	;;

+	while c != Unlocked

+		sys.futex(&mtx._state, sys.Futexwait | sys.Futexpriv, Contended, Zptr, Zptr, 0)

+		c = xchg(&mtx._state, Contended)

+	;;

+}

+const mtxtrylock = {mtx

+	-> xcas(&mtx._state, Unlocked, Locked) == Unlocked

+}

+const mtxunlock = {mtx

+	/*

+	Uncontended case: If the mutex state is not contended, and we still

+	are uncontended by the xchg() call, then it's safe to simply return;

+	nobody was waiting for us.

+	*/

+	if mtx._state == Contended

+		mtx._state = Unlocked

+	elif xchg(&mtx._state, Unlocked) == Locked

+		-> void

+	;;

+	/* wake one thread */

+	sys.futex(&mtx._state, sys.Futexwake | sys.Futexpriv, 1, Zptr, Zptr, 0)

+}

--- /dev/null

+++ b/lib/thread/mutex+osx.myr

@@ -1,0 +1,65 @@

+use std

+use sys

+use "atomic.use"

+use "common.use"

+pkg thread =

+	type mutex = struct

+		_state	: uint32

+	;;

+	const mkmtx	: (-> mutex)

+	const mtxlock	: (mtx : mutex# -> void)

+	const mtxtrylock	: (mtx : mutex# -> bool)

+	const mtxunlock	: (mtx : mutex# -> void)

+;;

+const mkmtx = {

+	-> [._state = 0]

+}

+/* a shitty spinlock */

+const mtxlock = {mtx

+	/* first fast */

+	for var i = 0; i < 1000; i++

+		if xcas(&mtx._state, 0, 1) == 0

+			-> void

+		;;

+		std.nanosleep(0)

+	;;

+	/* then slower */

+	for var i = 0; i < 1000; i++

+		if xcas(&mtx._state, 0, 1) == 0

+			-> void

+		;;

+		std.nanosleep(10_000) /* 10 us */

+	;;

+	/* even slower */

+	for var i = 0; i < 1000; i++

+		if xcas(&mtx._state, 0, 1) == 0

+			-> void

+		;;

+		std.nanosleep(100_000) /* 100 us */

+	;;

+	/* I'm rip van winkle! */

+	while true

+		if xcas(&mtx._state, 0, 1) == 0

+			-> void

+		;;

+		std.nanosleep(1000_000) /* 1 ms */

+	;;

+}

+const mtxtrylock = {mtx

+	-> xcas(&mtx._state, 0, 1) == 0

+}

+const mtxunlock = {mtx

+	xset(&mtx._state, 0)

+}

--- /dev/null

+++ b/lib/thread/mutex+plan9.myr

@@ -1,0 +1,47 @@

+use std

+use sys

+use "atomic.use"

+use "common.use"

+pkg thread =

+	type mutex = struct

+		_state	: uint32

+		_sem	: uint32

+	;;

+	const mkmtx	: (-> mutex)

+	const mtxlock	: (mtx : mutex# -> void)

+	const mtxtrylock	: (mtx : mutex# -> bool)

+	const mtxunlock	: (mtx : mutex# -> void)

+;;

+const mkmtx = {

+	-> [._state = 0, ._sem=0]

+}

+const mtxlock = {mtx

+	/* if the old value was 0, we aren't contended */

+	if xadd(&mtx._state, 1) == 0

+		-> void

+	;;

+	while sys.semacquire(&mtx._sem, 1) < 0

+		/* interrupted; retry */

+	;;

+}

+const mtxtrylock = {mtx

+	-> xcas(&mtx._state, 0, 1) == 0

+}

+const mtxunlock = {mtx

+	/* if we were the only thread waiting on the lock, there was no contention */

+	if xadd(&mtx._state, -1) == 1

+		-> void

+	;;

+	sys.semrelease(&mtx._sem, 1)

+}

--- /dev/null

+++ b/lib/thread/spawn+freebsd.myr

@@ -1,0 +1,74 @@

+use sys

+use std

+pkg thread =

+	type tid = uint64

+	const spawn : (fn : (-> void) -> std.result(tid, byte[:]))

+;;

+const Stacksz = 8*std.MiB

+extern const exit : (-> void)

+const spawn = {fn

+	-> spawnstk(fn, Stacksz)

+}

+const spawnstk = {fn, sz

+	var stk : byte#, tid, ctid, ret

+	var szp, fp, tos

+	stk = getstk(sz)

+	if stk == sys.Mapbad

+		-> `std.Fail "couldn't get stack"

+	;;

+	tid = -1

+	/* find top of stack */

+	tos = (stk castto(std.intptr)) + (sz castto(std.intptr))

+	/* store the stack size */

+	tos -= sizeof(sys.size)

+	sz -= sizeof(sys.size)

+	szp = tos castto(sys.size#)

+	szp# = Stacksz

+	/* store the function we call */

+	tos -= sizeof((->void))

+	sz -= sizeof((->void))

+	fp = tos castto((->void)#)

+	fp# = fn

+	ret = sys.thr_new(&[

+		.startfn = startthread castto(void#),

+		.arg = tos castto(void#),

+		.stkbase = stk castto(byte#),

+		.stksz = sz,

+		.tid = &ctid,

+		.ptid = &tid,

+		.flags = 2,

+		.rtp = 0 castto(sys.rtprio#)

+	], sizeof(sys.thrparam))

+	if ret < 0

+		-> `std.Fail "couldn't spawn thread"

+	;;

+	-> `std.Ok tid castto(tid)

+}

+const getstk = {sz

+	var p, m

+	p = sys.mmap(0 castto(byte#), sz, sys.Mprotrw, sys.Mpriv | sys.Manon, -1, 0)

+	if p == sys.Mapbad

+		-> p

+	;;

+	m = p castto(std.intptr)

+	-> m castto(byte#)

+}

+const startthread = {fn : (-> void)#

+	fn#()

+	exit()

+}

--- /dev/null

+++ b/lib/thread/spawn+linux.myr

@@ -1,0 +1,68 @@

+use sys

+use std

+pkg thread =

+	type tid = sys.pid

+	const spawn : (fn : (-> void) -> std.result(tid, byte[:]))

+;;

+extern const exit : (-> void)

+/* Holy shit flag mania. */

+const Thrflag = sys.Clonevm | sys.Clonefs | sys.Clonefiles  | \

+	sys.Clonesighand | sys.Clonethread |sys.Clonesysvsem | \

+	sys.Clonesettls | sys.Cloneparentsettid | sys.Clonechildcleartid

+const Stacksz = 8*std.MiB

+const spawn = {fn

+	-> spawnstk(fn, Stacksz)

+}

+const spawnstk = {fn, sz

+	var stk : byte#, tid, ctid, ret

+	var szp, fp, tos

+	stk = getstk(sz)

+	if stk == sys.Mapbad

+		-> `std.Fail "couldn't get stack"

+	;;

+	tos = stk castto(std.intptr)

+	tos -= sizeof(int64)

+	szp = tos castto(sys.size#)

+	szp# = Stacksz

+	tos -= sizeof((->void))

+	fp = tos castto((->void)#)

+	fp# = fn

+	ret = sys.fnclone(Thrflag, \

+		tos castto(byte#),\

+		&tid, 0 castto(byte#), \

+		&ctid, 0 castto(byte#), \

+		startthread castto(void#)) castto(tid)

+	if ret < 0

+		std.put("errno={}\n", -ret)

+		-> `std.Fail "couldn't spawn thread"

+	;;

+	-> `std.Ok ret

+}

+const getstk = {sz

+	var p, m

+	p = sys.mmap(0 castto(byte#), sz, sys.Mprotrw, sys.Mpriv | sys.Manon, -1, 0)

+	if p == sys.Mapbad

+		-> p

+	;;

+	/* stack starts at the top of memory and grows down. */

+	m = p castto(std.intptr)

+	m += sz castto(std.intptr)

+	-> m castto(byte#)

+}

+const startthread = {fn : (-> void)

+	fn()

+	exit()

+}

--- /dev/null

+++ b/lib/thread/spawn+osx.myr

@@ -1,0 +1,60 @@

+use sys

+use std

+pkg thread =

+	type tid = uint64

+	const spawn : (fn : (-> void) -> std.result(tid, byte[:]))

+;;

+const Stacksz = 8*std.MiB

+extern const exit : (-> void)

+extern const start : (-> void)

+const __init__ = {

+	var ret

+	ret = sys.bsdthread_register(\

+		start castto(void#), \	/* start */

+		0 castto(void#), \	/* wqthread */

+		0 castto(uint32), \	/* sz */

+		0 castto(uint32), \	/* dummy */

+		0 castto(void#), \	/* targconc */

+		0 castto(uint32))	/* queueoff */

+	if ret < 0

+		std.fatal("unable to init threads: {}", ret)

+	;;

+}

+const spawn = {fn

+	-> spawnstk(fn, Stacksz)

+}

+const spawnstk = {fn, sz

+	var tid : tid, ret

+	std.put("...hi? fn={}\n", fn castto(void#))

+	ret = sys.bsdthread_create( \

+		fn castto(void#), \

+		envptr(&fn), \

+		sz castto(void#), \

+		0 castto(void#), \

+		0)

+	if ret == (-1 castto(void#))

+		-> `std.Fail "couldn't spawn thread"

+	;;

+	-> `std.Ok ret castto(tid)

+}

+const envptr = {fn

+	var repr : std.intptr[2]

+	repr = (fn castto(std.intptr[2]#))#

+	-> repr[0] castto(void#)

+}

--- /dev/null

+++ b/lib/thread/spawn+plan9.myr

@@ -1,0 +1,18 @@

+use std

+use sys

+pkg thread =

+	type tid = uint64

+	const spawn : (fn : (-> void) -> std.result(tid, byte[:]))

+;;

+const spawn = {fn

+	match sys.rfork(sys.Rfproc | sys.Rfmem | sys.Rfnowait)

+	| 0:

+		fn()

+		std.exit(0)

+	| -1:	-> `std.Fail "unable to spawn thread"

+	| thr:	-> `std.Ok thr castto(tid)

+	;;

+}

\ No newline at end of file

--- /dev/null

+++ b/lib/thread/start+osx-x64.s

@@ -1,0 +1,22 @@

+// The entry point for thread start, registered with bsdthread_register

+//      %rdi: pthread (0, for us)

+//      %rsi: mach thread port (ignored)

+//      %rdx: func

+//      %rcx: env

+//      %r8: stack

+//      %r9: flags (= 0)

+//      %rsp: stack - C_64_REDZONE_LEN (= stack - 128)

+.globl _thread$start

+_thread$start:

+	/* call the function */

+#	movq	%r8, %rsp	/* set up stack */

+	movq	%rcx, %rax	/* set up env */

+        callq    *%rdx		/* call function */

+	/* exit the thread */

+	movq	$0x2000169, %rax	/* Sysbsdthread_terminate */

+	movq	%rsp, %rdi	/* stack */

+	movq	$0, %rsi	/* len */

+	movq	$0, %rdx	/* sem */

+	syscall

--- /dev/null

+++ b/lib/thread/test/atomic.myr

@@ -1,0 +1,29 @@

+use std

+use thread

+use "test/util.use"

+const Nherd = 20

+var val : uint64 = 0

+var done : uint32 = 0

+const main = {

+	done = 0

+	val = 0

+	mkherd(Nherd, incvar)

+	while thread.xget(&done) != Nherd

+		/* nothing */

+	;;

+	std.assert(val == 2_000_000, "atomics are broken\n")

+}

+const incvar = {

+	var i

+	for i = 0; i < 100_000; i++

+		thread.xadd(&val, 1)

+	;;

+	thread.xadd(&done, 1)

+}

--- /dev/null

+++ b/lib/thread/test/condvar.myr

@@ -1,0 +1,88 @@

+use std

+use thread

+use "test/util.use"

+const Nwakes = 1000

+var cv

+var mtx

+var val

+var done : int32

+var nwoken : int32

+var nready : int32

+var locked : int32

+const main = {

+	done = 0

+	val = 123

+	mtx = thread.mkmtx()

+	cv = thread.mkcond(&mtx)

+	thread.spawn(cvwait)

+	thread.spawn(cvwake)

+	while done == 0

+		/* nothing */

+	;;

+	std.assert(nwoken == Nwakes, "wrong number of wakes")

+	std.assert(val == 123, "wrong val after all are done")

+	nwoken = 0

+	nready = 0

+	mkherd(100, cvwaitonce)

+	/* wait until the herd is ready */

+	while nready != 100	/* 0 to 99 */

+		/* nothing */

+	;;

+	while locked == 0

+		/* nothing */

+	;;

+	thread.condbroadcast(&cv)

+	while nwoken != 100

+		/* nothing */

+	;;

+	std.assert(nwoken == 100, "wrong thread count woken")

+}

+const cvwait = {

+	for var i = 0; i < Nwakes; i++

+		thread.mtxlock(&mtx)

+		thread.condwait(&cv)

+		std.assert(val == 456, "wrong val after signal\n")

+		val = 123

+		thread.mtxunlock(&mtx)

+		thread.xadd(&nwoken, 1)

+	;;

+	val = 123

+	thread.xadd(&done, 1)

+}

+const cvwake = {

+	while true

+		thread.mtxlock(&mtx)

+		val = 456

+		thread.mtxunlock(&mtx)

+		thread.condsignal(&cv)

+		if nwoken >= Nwakes

+			break

+		;;

+	;;

+}

+const cvwaitonce = {

+	thread.xadd(&nready, 1)

+	thread.mtxlock(&mtx)

+	thread.xadd(&locked, 1)

+	thread.condwait(&cv)

+	thread.mtxunlock(&mtx)

+	thread.xadd(&nwoken, 1)

+}

--- /dev/null

+++ b/lib/thread/test/future.myr

@@ -1,0 +1,50 @@

+use std

+use sys

+use thread

+use "test/util.use"

+var fut

+var nready : int32

+var ndone : int32

+const main = {

+	nready = 0

+	ndone = 0

+	fut = thread.mkfut()

+	/* set after we have some waiters */

+	mkherd(100, getfuture)

+	while nready != 100

+		/* spin */

+	;;

+	std.put("done waiting for ready\n")

+	std.assert(ndone == 0, "thread proceeded too soon\n")

+	thread.futset(&fut, 666)

+	std.assert(thread.futset(&fut, 1) == false, "double set future\n")

+	while ndone != 100

+		/* spin */

+	;;

+	std.put("double set future ok")

+	/* start up a few more to make sure we can still read */

+	mkherd(50, getfuture)

+	while ndone != 150

+		/* spin */

+	;;

+	/* set ahead of time */

+	ndone = 0

+	fut = thread.mkfut()

+	thread.futset(&fut, 666)

+	std.assert(thread.futset(&fut, 666) == false, "double set future\n")

+	mkherd(100, getfuture)

+	while ndone != 100

+		/* spin */

+	;;

+}

+const getfuture = {

+	thread.xadd(&nready, 1)

+	std.assert(thread.futget(&fut) == 666, "wrong value gotten from future")

+	thread.xadd(&ndone, 1)

+}

--- /dev/null

+++ b/lib/thread/test/mutex.myr

@@ -1,0 +1,33 @@

+use std

+use thread

+use "test/util.use"

+const Nherd = 20

+var val : uint64 = 0

+var done : uint32 = 0

+var mtx : thread.mutex

+const main = {

+	done = 0

+	val = 0

+	mtx = thread.mkmtx()

+	mkherd(Nherd, incvar)

+	while thread.xget(&done) != Nherd

+		/* nothing */

+	;;

+	if val != 10_000 * 20

+		std.fatal("mutexes are broken, got {}\n", val)

+	;;

+}

+const incvar = {

+	for var i = 0; i < 10_000; i++

+		thread.mtxlock(&mtx)

+		val++

+		thread.mtxunlock(&mtx)

+	;;

+	thread.xadd(&done, 1)

+}

--- /dev/null

+++ b/lib/thread/test/spawn.myr

@@ -1,0 +1,25 @@

+use std

+use thread

+var done : int32

+var capture

+const main = {

+	var ptr

+	capture = 666

+	ptr = &capture

+	thread.spawn({

+		std.assert(capture==666, "wrong captured value\n")

+		std.assert(ptr#==666, "wrong captured ptr value\n")

+		ptr# = 333

+		thread.xadd(&done, 1)

+	})

+	while done == 0

+		/* nothing */

+	;;

+	std.assert(capture == 333, "capture wasn't written to correctly\n")

+}

--- /dev/null

+++ b/lib/thread/test/util.myr

@@ -1,0 +1,12 @@

+use std

+use thread

+pkg =

+	const mkherd : (n : uint32, fn : (-> void) ->void)

+;;

+const mkherd = {n, fn

+	for var i = 0; i < n; i++

+		std.try(thread.spawn(fn))

+	;;

+}

--

⑨