ref: b2d29ed0eebd0e3e2c162b5e83d6d0e8b6ddee57
parent: d21343766fc75075e9246b386b60e264bfd1860e
parent: 920e16ca93f648b551af2b688de6a1d5fe1cb802
author: Ori Bernstein <ori@eigenstate.org>
date: Thu May 5 18:59:19 EDT 2016
Merge ../integrate-libthread
--- /dev/null
+++ b/lib/thread/atomic-impl+plan9-x64.s
@@ -1,0 +1,71 @@
+TEXT thread$xget32+0(SB),1,$0
+ MOVL (DI), AX
+ RET
+TEXT thread$xget64+0(SB),1,$0
+ MOVQ (DI), AX
+ RET
+TEXT thread$xgetp+0(SB),1,$0
+ MOVQ (DI), AX
+ RET
+
+TEXT thread$xset32+0(SB),1,$0
+ MOVL SI, (DI)
+ RET
+TEXT thread$xset64+0(SB),1,$0
+ MOVQ SI, (DI)
+ RET
+TEXT thread$xsetp+0(SB),1,$0
+ MOVQ SI, (DI)
+ RET
+
+TEXT thread$xadd32+0(SB),1,$0
+ LOCK; XADDL SI, (DI)
+ MOVL SI, AX
+ RET
+TEXT thread$xadd64+0(SB),1,$0
+ LOCK; XADDQ SI, (DI)
+ MOVQ SI, AX
+ RET
+TEXT thread$xaddp+0(SB),1,$0
+ LOCK; XADDQ SI, (DI)
+ MOVQ SI, AX
+ RET
+
+TEXT thread$xsub32+0(SB),1,$0
+ LOCK; XSUBL SI, (DI)
+ MOVL SI, AX
+ RET
+TEXT thread$xsub64+0(SB),1,$0
+ LOCK; XSUBQ SI, (DI)
+ MOVQ SI, AX
+ RET
+TEXT thread$xsubp+0(SB),1,$0
+ LOCK; XSUBQ SI, (DI)
+ MOVQ SI, AX
+ RET
+
+TEXT thread$xcas32+0(SB),1,$0
+ MOVL SI, AX
+ LOCK; CMPXCHGL DX, (DI)
+ RET
+TEXT thread$xcas64+0(SB),1,$0
+ MOVQ SI, AX
+ LOCK; CMPXCHGQ DX, (DI)
+ RET
+TEXT thread$xcasp+0(SB),1,$0
+ MOVQ SI, AX
+ LOCK; CMPXCHGQ DX, (DI)
+ RET
+
+TEXT thread$xchg32+0(SB),1,$0
+ MOVL SI, AX
+ LOCK; XCHGL (DI), AX
+ RET
+TEXT thread$xchg64+0(SB),1,$0
+ MOVQ SI, AX
+ LOCK; XCHGQ (DI), AX
+ RET
+TEXT thread$xchgp+0(SB),1,$0
+ MOVQ SI, AX
+ LOCK; XCHGQ (DI), AX
+ RET
--- /dev/null
+++ b/lib/thread/atomic-impl+x64.s
@@ -1,0 +1,90 @@
+.globl thread$xget32
+.globl _thread$xget32
+thread$xget32:
+_thread$xget32:
+ movl (%rdi), %eax
+ ret
+.globl thread$xget64
+.globl thread$xgetp
+.globl _thread$xget64
+.globl _thread$xgetp
+thread$xget64:
+thread$xgetp:
+_thread$xget64:
+_thread$xgetp:
+ movq (%rdi), %rax
+ ret
+
+.globl thread$xset32
+.globl _thread$xset32
+thread$xset32:
+_thread$xset32:
+ movl %esi, (%rdi)
+ ret
+.globl thread$xset64
+.globl thread$xsetp
+.globl _thread$xset64
+.globl _thread$xsetp
+thread$xset64:
+thread$xsetp:
+_thread$xset64:
+_thread$xsetp:
+ movq %rsi, (%rdi)
+ ret
+
+.globl thread$xadd32
+.globl _thread$xadd32
+thread$xadd32:
+_thread$xadd32:
+ lock xaddl %esi, (%rdi)
+ movl %esi,%eax
+ ret
+.globl thread$xadd64
+.globl thread$xaddp
+.globl _thread$xadd64
+.globl _thread$xaddp
+thread$xadd64:
+thread$xaddp:
+_thread$xadd64:
+_thread$xaddp:
+ lock xaddq %rsi, (%rdi)
+ movq %rsi,%rax
+ ret
+
+.globl thread$xcas32
+.globl _thread$xcas32
+thread$xcas32:
+_thread$xcas32:
+ movl %esi, %eax
+ lock cmpxchgl %edx, (%rdi)
+ ret
+.globl thread$xcas64
+.globl thread$xcasp
+.globl _thread$xcas64
+.globl _thread$xcasp
+thread$xcas64:
+thread$xcasp:
+_thread$xcas64:
+_thread$xcasp:
+ movq %rsi, %rax
+ lock cmpxchgq %rdx, (%rdi)
+ ret
+
+.globl thread$xchg32
+.globl _thread$xchg32
+thread$xchg32:
+_thread$xchg32:
+ movl %esi, %eax
+ lock xchgl (%rdi), %eax
+ ret
+.globl thread$xchg64
+.globl thread$xchgp
+.globl _thread$xchg64
+.globl _thread$xchgp
+thread$xchg64:
+thread$xchgp:
+_thread$xchg64:
+_thread$xchgp:
+ movq %rsi, %rax
+ lock xchgq (%rdi), %rax
+ ret
--- /dev/null
+++ b/lib/thread/atomic.myr
@@ -1,0 +1,78 @@
+use std
+
+pkg thread =
+ trait atomic @a::(integral,numeric) =
+ xget : (p : @a# -> @a)
+ xset : (p : @a#, v : @a -> void)
+ xadd : (p : @a#, v : @a -> @a)
+ xcas : (p : @a#, old : @a, new : @a -> @a)
+ xchg : (p : @a#, new : @a -> @a)
+ ;;
+
+ impl atomic int32
+ impl atomic int64
+ impl atomic uint32
+ impl atomic uint64
+;;
+
+impl atomic int32 =
+ xget = {p; -> xget32(p castto(uint32#)) castto(int32)}
+ xset = {p, v; xset32(p castto(uint32#), v castto(uint32))}
+ xadd = {p, v; -> xadd32(p castto(uint32#), v castto(uint32)) castto(int32)}
+ xcas = {p, old, new; -> xcas32(p castto(uint32#), old castto(uint32), new castto(uint32)) castto(int32)}
+ xchg = {p, v; -> xchg32(p castto(uint32#), v castto(uint32)) castto(int32)}
+;;
+
+
+impl atomic int64 =
+ xget = {p; -> xget64(p castto(uint64#)) castto(int64)}
+ xset = {p, v; xset64(p castto(uint64#), v castto(uint64))}
+ xadd = {p, v; -> xadd64(p castto(uint64#), v castto(uint64)) castto(int64)}
+ xcas = {p, old, new; -> xcas64(p castto(uint64#), old castto(uint64), new castto(uint64)) castto(int64)}
+ xchg = {p, v; -> xchg64(p castto(uint64#), v castto(uint64)) castto(int64)}
+;;
+
+impl atomic uint32 =
+ xget = {p; -> xget32(p)}
+ xset = {p, v; xset32(p, v)}
+ xadd = {p, v; -> xadd32(p, v)}
+ xcas = {p, old, new; -> xcas32(p, old, new)}
+ xchg = {p, v; -> xchg32(p, v)}
+;;
+
+
+impl atomic uint64 =
+ xget = {p; -> xget64(p)}
+ xset = {p, v; xset64(p, v)}
+ xadd = {p, v; -> xadd64(p, v)}
+ xcas = {p, old, new; -> xcas64(p, old, new)}
+ xchg = {p, v; -> xchg64(p, v)}
+;;
+
+impl atomic std.intptr =
+ xget = {p; -> xgetp(p)}
+ xset = {p, v; xsetp(p, v)}
+ xadd = {p, v; -> xaddp(p, v)}
+ xcas = {p, old, new; -> xcasp(p, old, new)}
+ xchg = {p, v; -> xchgp(p, v)}
+;;
+
+extern const xget32 : (p : uint32# -> uint32)
+extern const xget64 : (p : uint64# -> uint64)
+extern const xgetp : (p : std.intptr# -> std.intptr)
+
+extern const xset32 : (p : uint32#, v : uint32 -> void)
+extern const xset64 : (p : uint64#, v : uint64 -> void)
+extern const xsetp : (p : std.intptr#, v : std.intptr -> void)
+
+extern const xadd32 : (p : uint32#, v : uint32 -> uint32)
+extern const xadd64 : (p : uint64#, v : uint64 -> uint64)
+extern const xaddp : (p : std.intptr#, v : std.intptr -> std.intptr)
+
+extern const xcas32 : (p : uint32#, old: uint32, new : uint32 -> uint32)
+extern const xcas64 : (p : uint64#, old: uint64, new : uint64 -> uint64)
+extern const xcasp : (p : std.intptr#, old: std.intptr, new : std.intptr -> std.intptr)
+
+extern const xchg32 : (p : uint32#, v : uint32 -> uint32)
+extern const xchg64 : (p : uint64#, v : uint64 -> uint64)
+extern const xchgp : (p : std.intptr#, v : std.intptr -> std.intptr)
--- /dev/null
+++ b/lib/thread/bld.proj
@@ -1,0 +1,31 @@
+lib thread =
+ common.myr
+
+ # linux impl of basic thread primitives
+ condvar+linux.myr
+ mutex+linux.myr
+ spawn+linux.myr
+ exit+linux-x64.s
+
+ # freebsd impl of thread primitives
+ condvar+freebsd.myr
+ mutex+freebsd.myr
+ spawn+freebsd.myr
+ exit+freebsd-x64.s
+
+ # osx impl of thread primitives
+ #condvar+osx.myr
+ mutex+osx.myr
+ spawn+osx.myr
+ start+osx-x64.s
+
+ # 9front impl of thread primitives
+ #condvar+plan9.myr
+ mutex+plan9.myr
+ spawn+plan9.myr
+ atomic-impl+plan9-x64.s
+
+ atomic-impl+x64.s
+ atomic.myr
+;;
+
--- /dev/null
+++ b/lib/thread/common.myr
@@ -1,0 +1,5 @@
+use std
+
+pkg thread =
+ generic Zptr = 0 castto(@a#)
+;;
--- /dev/null
+++ b/lib/thread/condvar+freebsd.myr
@@ -1,0 +1,59 @@
+use std
+use sys
+
+use "atomic.use"
+use "common.use"
+use "mutex.use"
+
+pkg thread =
+ type cond = struct
+ _mtx : mutex#
+ _seq : uint32
+ ;;
+
+ const mkcond : (mtx : mutex# -> cond)
+ const condwait : (cond : cond# -> void)
+ const condsignal : (cond : cond# -> void)
+ const condbroadcast : (cond : cond# -> void)
+;;
+
+const mkcond = {mtx
+ -> [._mtx = mtx, ._seq = 0]
+}
+
+const condwait = {cond
+ var seq
+ var mtx
+
+ mtx = cond._mtx
+ seq = cond._seq
+
+ mtxunlock(mtx)
+ sys.umtx_op(&cond._seq castto(void#), \
+ sys.Umtxwaituintpriv, \
+ seq castto(uint64), \
+ Zptr, Zptr)
+
+ /*
+ We need to atomically set the mutex to contended. This allows us to
+ pass responsibility for waking up the potential other waiters on to the
+ unlocker of the mutex.
+ */
+ while xchg(&mtx._state, Contended) != Unlocked
+ sys.umtx_op(&mtx._state castto(void#), \
+ sys.Umtxwaituintpriv, \
+ Contended castto(uint64), \
+ Zptr, Zptr)
+ ;;
+}
+
+const condsignal = {cond : cond#
+ xadd(&cond._seq, 1)
+ sys.umtx_op(&cond._seq castto(void#), sys.Umtxwakepriv, 1, Zptr, Zptr)
+}
+
+const condbroadcast = {cond : cond#
+ xadd(&cond._seq, 1)
+ sys.umtx_op(&cond._seq castto(void#), sys.Umtxwakepriv, 0x7ffffff, Zptr, Zptr)
+}
+
--- /dev/null
+++ b/lib/thread/condvar+linux.myr
@@ -1,0 +1,61 @@
+use std
+use sys
+
+use "atomic.use"
+use "common.use"
+use "mutex.use"
+
+pkg thread =
+ type cond = struct
+ _mtx : mutex#
+ _seq : int32
+ ;;
+
+ const mkcond : (mtx : mutex# -> cond)
+ const condwait : (cond : cond# -> void)
+ const condsignal : (cond : cond# -> void)
+ const condbroadcast : (cond : cond# -> void)
+;;
+
+const mkcond = {mtx
+ -> [._mtx = mtx, ._seq = 0]
+}
+
+const condwait = {cond
+ var seq
+ var mtx
+
+ mtx = cond._mtx
+ seq = cond._seq
+
+ mtxunlock(mtx)
+ sys.futex(&cond._seq, sys.Futexwait | sys.Futexpriv, seq, Zptr, Zptr, 0)
+
+ /*
+ We need to atomically set the mutex to contended. This allows us to
+ pass responsibility for waking up the potential other waiters on to the
+ unlocker of the mutex.
+ */
+ while xchg(&mtx._state, Contended) != Unlocked
+ sys.futex(&mtx._state, sys.Futexwait | sys.Futexpriv, \
+ Contended, Zptr, Zptr, 0)
+ ;;
+}
+
+const condsignal = {cond : cond#
+ xadd(&cond._seq, 1)
+ sys.futex(&cond._seq, sys.Futexwake | sys.Futexpriv, 1, Zptr, Zptr, 0)
+}
+
+const condbroadcast = {cond : cond#
+ xadd(&cond._seq, 1)
+ /*
+ The futex docs seem to be broken -- the timeout parameter seems to be
+ used for the number of threads to move, and is not ignored when
+ requeueing
+ */
+ sys.futex(&cond._seq, sys.Futexcmprequeue | sys.Futexpriv, \
+ 1, 0x7fffffff castto(sys.timespec#), \
+ &cond._mtx._state, cond._seq)
+}
+
--- /dev/null
+++ b/lib/thread/exit+freebsd-x64.s
@@ -1,0 +1,23 @@
+/*
+const thread.exit : (stacksz : std.size -> void)
+NOTE: must be called from the bottom of the stack, since
+we assume that %rbp is in the top 4k of the stack.
+*/
+.globl thread$exit
+thread$exit:
+ /* find top of stack */
+ movq %rbp,%rdi /* addr */
+ andq $~0xfff,%rdi /* align it */
+ addq $0x1000,%rdi
+
+ /* munmap(base, size) */
+ movq $73,%rax /* munmap */
+ movq -8(%rdi),%rsi /* size */
+ subq %rsi,%rdi /* move to base ptr */
+ syscall
+
+ /* thr_exit(null) */
+ movq $431,%rax /* exit */
+ xorq %rdi,%rdi /* 0 */
+ syscall
+
--- /dev/null
+++ b/lib/thread/exit+linux-x64.s
@@ -1,0 +1,23 @@
+/*
+const thread.exit : (stacksz : std.size -> void)
+NOTE: must be called from the bottom of the stack, since
+we assume that %rbp is in the top 4k of the stack.
+*/
+.globl thread$exit
+thread$exit:
+ /* find top of stack */
+ movq %rbp,%rdi /* addr */
+ andq $~0xfff,%rdi /* align it */
+ addq $0x1000,%rdi
+
+ /* munmap(base, size) */
+ movq $11,%rax /* munmap */
+ movq -8(%rdi),%rsi /* size */
+ subq %rsi,%rdi /* move to base ptr */
+ syscall
+
+ /* thread_exit(0) */
+ movq $60,%rax /* exit */
+ xorq %rdi,%rdi /* 0 */
+ syscall
+
--- /dev/null
+++ b/lib/thread/future.myr
@@ -1,0 +1,63 @@
+use std
+
+use "mutex.use"
+
+pkg thread =
+ type future(@a) = struct
+ mtx : mutex
+ set : bool
+ val : @a
+ ;;
+
+ generic mkfut : (-> future(@a))
+ generic futset : (fut : future(@a)#, val : @a -> bool)
+ generic futget : (fut : future(@a)# -> @a)
+ generic futtryget : (fut : future(@a)# -> std.option(@a))
+ generic futclear : (fut : future(@a)# -> void)
+;;
+
+const Unset = 0
+const Waiting = 1
+const Set = 2
+
+generic mkfut = {
+ var fut
+
+ fut = [.mtx = mkmtx() ]
+ mtxlock(&fut.mtx)
+ -> fut
+}
+
+generic futset = {fut, val
+ if fut.set
+ -> false
+ ;;
+ /* compiler doesn't reorder shit */
+ fut.val = val
+ fut.set = true
+ mtxunlock(&fut.mtx)
+ -> true
+}
+
+generic futtryget = {fut
+ var val
+
+ if !fut.set
+ -> `std.None
+ ;;
+ mtxlock(&fut.mtx)
+ val = fut.val
+ mtxunlock(&fut.mtx)
+ -> `std.Some val
+}
+
+generic futget = {fut
+ var val
+
+ mtxlock(&fut.mtx)
+ val = fut.val
+ mtxunlock(&fut.mtx)
+ -> val
+}
+
+
--- /dev/null
+++ b/lib/thread/mutex+freebsd.myr
@@ -1,0 +1,80 @@
+use std
+use sys
+
+use "atomic.use"
+use "common.use"
+
+pkg thread =
+ type mutex = struct
+ _state : uint32
+ ;;
+
+ const mkmtx : (-> mutex)
+ const mtxlock : (mtx : mutex# -> void)
+ const mtxtrylock : (mtx : mutex# -> bool)
+ const mtxunlock : (mtx : mutex# -> void)
+
+ pkglocal const Unlocked = 0
+ pkglocal const Locked = 1
+ pkglocal const Contended = 2
+;;
+
+var nspin = 10 /* FIXME: pick a sane number, based on CPU count */
+
+const mkmtx = {
+ -> [._state = Unlocked]
+}
+
+const mtxlock = {mtx
+ var c
+
+ /*
+ Uncontended case: we get an unlocked mutex, and we lock it.
+ */
+ c = Locked
+ for var i = 0; i < nspin; i++
+ c = xcas(&mtx._state, Unlocked, Locked)
+ if c == Unlocked
+ ->
+ ;;
+ ;;
+
+ /*
+ Contended case: we set the lock state to Contended. This indicates that there
+ the lock is locked, and we potentially have threads waiting on it, which means
+ that we will need to wake them up.
+ */
+ if c == Locked
+ c = xchg(&mtx._state, Contended)
+ ;;
+
+ while c != Unlocked
+ sys.umtx_op( \
+ &mtx._state castto(void#), \
+ sys.Umtxwaituintpriv, \
+ Contended castto(uint64), \
+ Zptr, Zptr)
+ c = xchg(&mtx._state, Contended)
+ ;;
+}
+
+const mtxtrylock = {mtx
+ -> xcas(&mtx._state, Unlocked, Locked) == Unlocked
+}
+
+const mtxunlock = {mtx
+ /*
+ Uncontended case: If the mutex state is not contended, and we still
+ are uncontended by the xchg() call, then it's safe to simply return;
+ nobody was waiting for us.
+ */
+ if mtx._state == Contended
+ mtx._state = Unlocked
+ elif xchg(&mtx._state, Unlocked) == Locked
+ ->
+ ;;
+
+ /* wake all threads: for some reason nwake */
+ sys.umtx_op(&mtx._state castto(void#), sys.Umtxwakepriv, 1, Zptr, Zptr)
+}
+
--- /dev/null
+++ b/lib/thread/mutex+linux.myr
@@ -1,0 +1,76 @@
+use std
+use sys
+
+use "atomic.use"
+use "common.use"
+
+pkg thread =
+ type mutex = struct
+ _state : int32
+ ;;
+
+ const mkmtx : (-> mutex)
+ const mtxlock : (mtx : mutex# -> void)
+ const mtxtrylock : (mtx : mutex# -> bool)
+ const mtxunlock : (mtx : mutex# -> void)
+
+ pkglocal const Unlocked = 0
+ pkglocal const Locked = 1
+ pkglocal const Contended = 2
+;;
+
+var nspin = 10 /* FIXME: pick a sane number, based on CPU count */
+
+const mkmtx = {
+ -> [._state = Unlocked]
+}
+
+const mtxlock = {mtx
+ var c
+
+ /*
+ Uncontended case: we get an unlocked mutex, and we lock it.
+ */
+ c = Locked
+ for var i = 0; i < nspin; i++
+ c = xcas(&mtx._state, Unlocked, Locked)
+ if c == Unlocked
+ -> void
+ ;;
+ ;;
+
+ /*
+ Contended case: we set the lock state to Contended. This indicates that there
+ the lock is locked, and we potentially have threads waiting on it, which means
+ that we will need to wake them up.
+ */
+ if c == Locked
+ c = xchg(&mtx._state, Contended)
+ ;;
+
+ while c != Unlocked
+ sys.futex(&mtx._state, sys.Futexwait | sys.Futexpriv, Contended, Zptr, Zptr, 0)
+ c = xchg(&mtx._state, Contended)
+ ;;
+}
+
+const mtxtrylock = {mtx
+ -> xcas(&mtx._state, Unlocked, Locked) == Unlocked
+}
+
+const mtxunlock = {mtx
+ /*
+ Uncontended case: If the mutex state is not contended, and we still
+ are uncontended by the xchg() call, then it's safe to simply return;
+ nobody was waiting for us.
+ */
+ if mtx._state == Contended
+ mtx._state = Unlocked
+ elif xchg(&mtx._state, Unlocked) == Locked
+ -> void
+ ;;
+
+ /* wake one thread */
+ sys.futex(&mtx._state, sys.Futexwake | sys.Futexpriv, 1, Zptr, Zptr, 0)
+}
+
--- /dev/null
+++ b/lib/thread/mutex+osx.myr
@@ -1,0 +1,65 @@
+use std
+use sys
+
+
+use "atomic.use"
+use "common.use"
+
+pkg thread =
+ type mutex = struct
+ _state : uint32
+ ;;
+
+ const mkmtx : (-> mutex)
+ const mtxlock : (mtx : mutex# -> void)
+ const mtxtrylock : (mtx : mutex# -> bool)
+ const mtxunlock : (mtx : mutex# -> void)
+;;
+
+const mkmtx = {
+ -> [._state = 0]
+}
+
+/* a shitty spinlock */
+const mtxlock = {mtx
+ /* first fast */
+ for var i = 0; i < 1000; i++
+ if xcas(&mtx._state, 0, 1) == 0
+ -> void
+ ;;
+ std.nanosleep(0)
+ ;;
+
+ /* then slower */
+ for var i = 0; i < 1000; i++
+ if xcas(&mtx._state, 0, 1) == 0
+ -> void
+ ;;
+ std.nanosleep(10_000) /* 10 us */
+ ;;
+
+ /* even slower */
+ for var i = 0; i < 1000; i++
+ if xcas(&mtx._state, 0, 1) == 0
+ -> void
+ ;;
+ std.nanosleep(100_000) /* 100 us */
+ ;;
+
+ /* I'm rip van winkle! */
+ while true
+ if xcas(&mtx._state, 0, 1) == 0
+ -> void
+ ;;
+ std.nanosleep(1000_000) /* 1 ms */
+ ;;
+}
+
+const mtxtrylock = {mtx
+ -> xcas(&mtx._state, 0, 1) == 0
+}
+
+
+const mtxunlock = {mtx
+ xset(&mtx._state, 0)
+}
--- /dev/null
+++ b/lib/thread/mutex+plan9.myr
@@ -1,0 +1,47 @@
+use std
+use sys
+
+
+use "atomic.use"
+use "common.use"
+
+pkg thread =
+ type mutex = struct
+ _state : uint32
+ _sem : uint32
+ ;;
+
+ const mkmtx : (-> mutex)
+ const mtxlock : (mtx : mutex# -> void)
+ const mtxtrylock : (mtx : mutex# -> bool)
+ const mtxunlock : (mtx : mutex# -> void)
+;;
+
+const mkmtx = {
+ -> [._state = 0, ._sem=0]
+}
+
+const mtxlock = {mtx
+ /* if the old value was 0, we aren't contended */
+ if xadd(&mtx._state, 1) == 0
+ -> void
+ ;;
+
+ while sys.semacquire(&mtx._sem, 1) < 0
+ /* interrupted; retry */
+ ;;
+}
+
+const mtxtrylock = {mtx
+ -> xcas(&mtx._state, 0, 1) == 0
+}
+
+
+const mtxunlock = {mtx
+ /* if we were the only thread waiting on the lock, there was no contention */
+ if xadd(&mtx._state, -1) == 1
+ -> void
+ ;;
+
+ sys.semrelease(&mtx._sem, 1)
+}
--- /dev/null
+++ b/lib/thread/spawn+freebsd.myr
@@ -1,0 +1,74 @@
+use sys
+use std
+
+pkg thread =
+ type tid = uint64
+
+ const spawn : (fn : (-> void) -> std.result(tid, byte[:]))
+;;
+
+
+const Stacksz = 8*std.MiB
+extern const exit : (-> void)
+
+const spawn = {fn
+ -> spawnstk(fn, Stacksz)
+}
+
+const spawnstk = {fn, sz
+ var stk : byte#, tid, ctid, ret
+ var szp, fp, tos
+
+ stk = getstk(sz)
+ if stk == sys.Mapbad
+ -> `std.Fail "couldn't get stack"
+ ;;
+ tid = -1
+ /* find top of stack */
+ tos = (stk castto(std.intptr)) + (sz castto(std.intptr))
+
+ /* store the stack size */
+ tos -= sizeof(sys.size)
+ sz -= sizeof(sys.size)
+ szp = tos castto(sys.size#)
+ szp# = Stacksz
+
+ /* store the function we call */
+ tos -= sizeof((->void))
+ sz -= sizeof((->void))
+ fp = tos castto((->void)#)
+ fp# = fn
+
+ ret = sys.thr_new(&[
+ .startfn = startthread castto(void#),
+ .arg = tos castto(void#),
+ .stkbase = stk castto(byte#),
+ .stksz = sz,
+ .tid = &ctid,
+ .ptid = &tid,
+ .flags = 2,
+ .rtp = 0 castto(sys.rtprio#)
+ ], sizeof(sys.thrparam))
+
+ if ret < 0
+ -> `std.Fail "couldn't spawn thread"
+ ;;
+ -> `std.Ok tid castto(tid)
+}
+
+const getstk = {sz
+ var p, m
+
+ p = sys.mmap(0 castto(byte#), sz, sys.Mprotrw, sys.Mpriv | sys.Manon, -1, 0)
+ if p == sys.Mapbad
+ -> p
+ ;;
+ m = p castto(std.intptr)
+ -> m castto(byte#)
+}
+
+const startthread = {fn : (-> void)#
+ fn#()
+ exit()
+}
+
--- /dev/null
+++ b/lib/thread/spawn+linux.myr
@@ -1,0 +1,68 @@
+use sys
+use std
+
+pkg thread =
+ type tid = sys.pid
+
+ const spawn : (fn : (-> void) -> std.result(tid, byte[:]))
+;;
+
+extern const exit : (-> void)
+
+/* Holy shit flag mania. */
+const Thrflag = sys.Clonevm | sys.Clonefs | sys.Clonefiles | \
+ sys.Clonesighand | sys.Clonethread |sys.Clonesysvsem | \
+ sys.Clonesettls | sys.Cloneparentsettid | sys.Clonechildcleartid
+
+const Stacksz = 8*std.MiB
+
+const spawn = {fn
+ -> spawnstk(fn, Stacksz)
+}
+
+const spawnstk = {fn, sz
+ var stk : byte#, tid, ctid, ret
+ var szp, fp, tos
+
+ stk = getstk(sz)
+ if stk == sys.Mapbad
+ -> `std.Fail "couldn't get stack"
+ ;;
+ tos = stk castto(std.intptr)
+ tos -= sizeof(int64)
+ szp = tos castto(sys.size#)
+ szp# = Stacksz
+ tos -= sizeof((->void))
+ fp = tos castto((->void)#)
+ fp# = fn
+
+ ret = sys.fnclone(Thrflag, \
+ tos castto(byte#),\
+ &tid, 0 castto(byte#), \
+ &ctid, 0 castto(byte#), \
+ startthread castto(void#)) castto(tid)
+ if ret < 0
+ std.put("errno={}\n", -ret)
+ -> `std.Fail "couldn't spawn thread"
+ ;;
+ -> `std.Ok ret
+}
+
+const getstk = {sz
+ var p, m
+
+ p = sys.mmap(0 castto(byte#), sz, sys.Mprotrw, sys.Mpriv | sys.Manon, -1, 0)
+ if p == sys.Mapbad
+ -> p
+ ;;
+ /* stack starts at the top of memory and grows down. */
+ m = p castto(std.intptr)
+ m += sz castto(std.intptr)
+ -> m castto(byte#)
+}
+
+const startthread = {fn : (-> void)
+ fn()
+ exit()
+}
+
--- /dev/null
+++ b/lib/thread/spawn+osx.myr
@@ -1,0 +1,60 @@
+use sys
+use std
+
+pkg thread =
+ type tid = uint64
+
+ const spawn : (fn : (-> void) -> std.result(tid, byte[:]))
+;;
+
+
+const Stacksz = 8*std.MiB
+extern const exit : (-> void)
+extern const start : (-> void)
+
+const __init__ = {
+ var ret
+
+ ret = sys.bsdthread_register(\
+ start castto(void#), \ /* start */
+ 0 castto(void#), \ /* wqthread */
+ 0 castto(uint32), \ /* sz */
+ 0 castto(uint32), \ /* dummy */
+ 0 castto(void#), \ /* targconc */
+ 0 castto(uint32)) /* queueoff */
+ if ret < 0
+ std.fatal("unable to init threads: {}", ret)
+ ;;
+}
+
+
+
+const spawn = {fn
+ -> spawnstk(fn, Stacksz)
+}
+
+const spawnstk = {fn, sz
+ var tid : tid, ret
+
+
+ std.put("...hi? fn={}\n", fn castto(void#))
+ ret = sys.bsdthread_create( \
+ fn castto(void#), \
+ envptr(&fn), \
+ sz castto(void#), \
+ 0 castto(void#), \
+ 0)
+
+ if ret == (-1 castto(void#))
+ -> `std.Fail "couldn't spawn thread"
+ ;;
+ -> `std.Ok ret castto(tid)
+}
+
+const envptr = {fn
+ var repr : std.intptr[2]
+
+ repr = (fn castto(std.intptr[2]#))#
+ -> repr[0] castto(void#)
+}
+
--- /dev/null
+++ b/lib/thread/spawn+plan9.myr
@@ -1,0 +1,18 @@
+use std
+use sys
+
+pkg thread =
+ type tid = uint64
+
+ const spawn : (fn : (-> void) -> std.result(tid, byte[:]))
+;;
+
+const spawn = {fn
+ match sys.rfork(sys.Rfproc | sys.Rfmem | sys.Rfnowait)
+ | 0:
+ fn()
+ std.exit(0)
+ | -1: -> `std.Fail "unable to spawn thread"
+ | thr: -> `std.Ok thr castto(tid)
+ ;;
+}
\ No newline at end of file
--- /dev/null
+++ b/lib/thread/start+osx-x64.s
@@ -1,0 +1,22 @@
+// The entry point for thread start, registered with bsdthread_register
+// %rdi: pthread (0, for us)
+// %rsi: mach thread port (ignored)
+// %rdx: func
+// %rcx: env
+// %r8: stack
+// %r9: flags (= 0)
+// %rsp: stack - C_64_REDZONE_LEN (= stack - 128)
+.globl _thread$start
+_thread$start:
+ /* call the function */
+# movq %r8, %rsp /* set up stack */
+ movq %rcx, %rax /* set up env */
+ callq *%rdx /* call function */
+
+ /* exit the thread */
+ movq $0x2000169, %rax /* Sysbsdthread_terminate */
+ movq %rsp, %rdi /* stack */
+ movq $0, %rsi /* len */
+ movq $0, %rdx /* sem */
+ syscall
+
--- /dev/null
+++ b/lib/thread/test/atomic.myr
@@ -1,0 +1,29 @@
+use std
+use thread
+
+use "test/util.use"
+
+const Nherd = 20
+
+var val : uint64 = 0
+var done : uint32 = 0
+
+const main = {
+ done = 0
+ val = 0
+ mkherd(Nherd, incvar)
+ while thread.xget(&done) != Nherd
+ /* nothing */
+ ;;
+ std.assert(val == 2_000_000, "atomics are broken\n")
+}
+
+const incvar = {
+ var i
+
+ for i = 0; i < 100_000; i++
+ thread.xadd(&val, 1)
+ ;;
+ thread.xadd(&done, 1)
+}
+
--- /dev/null
+++ b/lib/thread/test/condvar.myr
@@ -1,0 +1,88 @@
+use std
+use thread
+
+use "test/util.use"
+
+const Nwakes = 1000
+
+var cv
+var mtx
+var val
+
+var done : int32
+var nwoken : int32
+var nready : int32
+var locked : int32
+
+const main = {
+ done = 0
+ val = 123
+
+ mtx = thread.mkmtx()
+ cv = thread.mkcond(&mtx)
+ thread.spawn(cvwait)
+ thread.spawn(cvwake)
+ while done == 0
+ /* nothing */
+ ;;
+ std.assert(nwoken == Nwakes, "wrong number of wakes")
+ std.assert(val == 123, "wrong val after all are done")
+
+ nwoken = 0
+ nready = 0
+ mkherd(100, cvwaitonce)
+
+ /* wait until the herd is ready */
+ while nready != 100 /* 0 to 99 */
+ /* nothing */
+ ;;
+ while locked == 0
+ /* nothing */
+ ;;
+ thread.condbroadcast(&cv)
+ while nwoken != 100
+ /* nothing */
+ ;;
+ std.assert(nwoken == 100, "wrong thread count woken")
+
+}
+
+const cvwait = {
+ for var i = 0; i < Nwakes; i++
+ thread.mtxlock(&mtx)
+ thread.condwait(&cv)
+ std.assert(val == 456, "wrong val after signal\n")
+ val = 123
+ thread.mtxunlock(&mtx)
+
+ thread.xadd(&nwoken, 1)
+ ;;
+ val = 123
+ thread.xadd(&done, 1)
+
+}
+
+const cvwake = {
+ while true
+ thread.mtxlock(&mtx)
+ val = 456
+ thread.mtxunlock(&mtx)
+
+ thread.condsignal(&cv)
+ if nwoken >= Nwakes
+ break
+ ;;
+ ;;
+}
+
+const cvwaitonce = {
+ thread.xadd(&nready, 1)
+
+ thread.mtxlock(&mtx)
+ thread.xadd(&locked, 1)
+ thread.condwait(&cv)
+ thread.mtxunlock(&mtx)
+
+ thread.xadd(&nwoken, 1)
+}
+
--- /dev/null
+++ b/lib/thread/test/future.myr
@@ -1,0 +1,50 @@
+use std
+use sys
+use thread
+
+use "test/util.use"
+
+var fut
+var nready : int32
+var ndone : int32
+
+const main = {
+ nready = 0
+ ndone = 0
+ fut = thread.mkfut()
+ /* set after we have some waiters */
+ mkherd(100, getfuture)
+ while nready != 100
+ /* spin */
+ ;;
+ std.put("done waiting for ready\n")
+ std.assert(ndone == 0, "thread proceeded too soon\n")
+ thread.futset(&fut, 666)
+ std.assert(thread.futset(&fut, 1) == false, "double set future\n")
+ while ndone != 100
+ /* spin */
+ ;;
+ std.put("double set future ok")
+ /* start up a few more to make sure we can still read */
+ mkherd(50, getfuture)
+ while ndone != 150
+ /* spin */
+ ;;
+
+
+ /* set ahead of time */
+ ndone = 0
+ fut = thread.mkfut()
+ thread.futset(&fut, 666)
+ std.assert(thread.futset(&fut, 666) == false, "double set future\n")
+ mkherd(100, getfuture)
+ while ndone != 100
+ /* spin */
+ ;;
+}
+
+const getfuture = {
+ thread.xadd(&nready, 1)
+ std.assert(thread.futget(&fut) == 666, "wrong value gotten from future")
+ thread.xadd(&ndone, 1)
+}
--- /dev/null
+++ b/lib/thread/test/mutex.myr
@@ -1,0 +1,33 @@
+use std
+use thread
+
+use "test/util.use"
+
+const Nherd = 20
+
+var val : uint64 = 0
+var done : uint32 = 0
+var mtx : thread.mutex
+
+const main = {
+ done = 0
+ val = 0
+
+ mtx = thread.mkmtx()
+ mkherd(Nherd, incvar)
+ while thread.xget(&done) != Nherd
+ /* nothing */
+ ;;
+ if val != 10_000 * 20
+ std.fatal("mutexes are broken, got {}\n", val)
+ ;;
+}
+
+const incvar = {
+ for var i = 0; i < 10_000; i++
+ thread.mtxlock(&mtx)
+ val++
+ thread.mtxunlock(&mtx)
+ ;;
+ thread.xadd(&done, 1)
+}
--- /dev/null
+++ b/lib/thread/test/spawn.myr
@@ -1,0 +1,25 @@
+use std
+use thread
+
+var done : int32
+var capture
+
+const main = {
+ var ptr
+
+ capture = 666
+ ptr = &capture
+ thread.spawn({
+ std.assert(capture==666, "wrong captured value\n")
+ std.assert(ptr#==666, "wrong captured ptr value\n")
+ ptr# = 333
+ thread.xadd(&done, 1)
+ })
+
+ while done == 0
+ /* nothing */
+ ;;
+
+ std.assert(capture == 333, "capture wasn't written to correctly\n")
+}
+
--- /dev/null
+++ b/lib/thread/test/util.myr
@@ -1,0 +1,12 @@
+use std
+use thread
+
+pkg =
+ const mkherd : (n : uint32, fn : (-> void) ->void)
+;;
+
+const mkherd = {n, fn
+ for var i = 0; i < n; i++
+ std.try(thread.spawn(fn))
+ ;;
+}