shithub: mc

Download patch

ref: 08442dd9c3426e2dbe598ef1f6fb10f2f67638aa
parent: 76086513de9efd7a000d0bc229c3e990f0af5a2f
author: Ori Bernstein <ori@eigenstate.org>
date: Fri Oct 2 19:27:11 EDT 2015

Implement some asm optimized memcpy/memmove checks.

    TODO: memcmp

--- a/bench/bld.sub
+++ b/bench/bld.sub
@@ -31,6 +31,14 @@
 	lib @/lib/sys:sys
 	lib @/lib/regex:regex
 ;;
+
+bin many-memcpy =
+	memcpy.myr
+	lib @/lib/std:std
+	lib @/lib/sys:sys
+;;
+
+# benchmark runner
 bin runbench =
 	runbench.myr
 	lib @/lib/std:std
@@ -45,4 +53,5 @@
 		bigfactorial
 		mandelbrot
 		regex-match
+		many-memcpy
 ;;
--- a/bench/copious-allocs.myr
+++ b/bench/copious-allocs.myr
@@ -5,32 +5,31 @@
 ;;
 
 const main = {
-	var i, j
 	var a : blob#[10000]
 
-	for j = 0; j < 100; j++
+	for var j = 0; j < 100; j++
 		/* alloc forwards, dealloc forwards */
-		for i = 0; i < a.len; i++
+		for var i = 0; i < a.len; i++
 			a[i] = std.alloc()
 		;;
-		for i = 0; i < a.len; i++
+		for var i = 0; i < a.len; i++
 			std.free(a[i])
 		;;
 
 		/* alloc forwards, dealloc backwards */
-		for i = 0; i < a.len; i++
+		for var i = 0; i < a.len; i++
 			a[i] = std.alloc()
 		;;
-		for i = a.len; i > 0; i--
+		for var i = a.len; i > 0; i--
 			std.free(a[i - 1])
 		;;
 
 		/* alloc forwards, dealloc randomly */
-		for i = 0; i < a.len; i++
+		for var i = 0; i < a.len; i++
 			a[i] = std.alloc()
 		;;
 		shuffle(a[:])
-		for i = a.len; i > 0; i--
+		for var i = a.len; i > 0; i--
 			std.free(a[i - 1])
 		;;
 	;;
@@ -39,10 +38,11 @@
 const shuffle = {a
 	var t
 	var rng
-	var i, j
+	var j
 
+	/* we want determinism for benchmarking */
 	rng = std.mksrng(123)
-	for i = 0; i < a.len - 1; i++
+	for var i = 0; i < a.len - 1; i++
 		j = std.rngrand(rng, i, a.len)
 		t = a[j]
 		a[j] = a[i]
--- /dev/null
+++ b/bench/many-memcpy.myr
@@ -1,0 +1,27 @@
+use std
+
+const main = {
+	var a : uint64[100000]
+
+	for var j = 0; j < 100; j++
+		/* independent copies forward */
+		for var i = 0; i < 10; i++
+			std.slcp(a[:a.len/2-1], a[a.len/2+1:])
+		;;
+		/* independent copies backward */
+		for var i = 0; i < 10; i++
+			std.slcp(a[:a.len/2-1], a[a.len/2+1:])
+		;;
+
+		/* dependent copies forward */
+		for var i = 0; i < 10; i++
+			std.slcp(a[:a.len/2+1000], a[a.len/2-1000:])
+		;;
+		/* dependent copies backward */
+		for var i = 0; i < 10; i++
+			std.slcp(a[a.len/2-1000:], a[:a.len/2+1000])
+		;;
+	;;
+}
+
+
--- a/lib/std/alloc.myr
+++ b/lib/std/alloc.myr
@@ -3,6 +3,7 @@
 use "types.use"
 use "units.use"
 use "syswrap.use"
+use "memops.use"
 
 /*
 The allocator implementation here is based on Bonwick's slab allocator.
@@ -181,11 +182,15 @@
 
 /* Grows a slice, filling new entries with zero bytes */
 generic slzgrow = {sl : @a[:], len
-	var oldsz
+	var oldlen
+	var base
 
-	oldsz = sl.len*sizeof(@a)
+	oldlen = sl.len
 	sl = slgrow(sl, len)
-	zfill((sl castto(byte#))[oldsz:len*sizeof(@a)])
+	base = sl castto(byte#) castto(intptr)
+	if oldlen < len
+		memfill(sl[oldlen:] castto(byte#), 0, (len - oldlen)*sizeof(@a))
+	;;
 	-> sl
 }
 
@@ -200,16 +205,10 @@
 	var p
 
 	p = bytealloc(sz)
-	zfill(p[0:sz])
+	memfill(p, 0, sz)
 	-> p
 }
 
-const zfill = {sl
-	for var i = 0; i < sl.len; i++
-		sl[i] = 0
-	;;
-}
-
 /* Allocates a blob that is 'sz' bytes long. Dies if the allocation fails */
 const bytealloc = {sz
 	var bkt, p
@@ -229,12 +228,8 @@
 /* frees a blob that is 'sz' bytes long. */
 const bytefree = {p, sz
 	var bkt
-	var b, i
 
-	b = (p castto(uint64#))[:sz/8]
-	for i = 0; i < sz>>3; i++
-		b[i] = 0xa8a8a8a8a8a8a8a8
-	;;
+	memfill(p, 0xa8, sz)
 	if (sz < Bktmax)
 		bkt = &buckets[bktnum(sz)]
 		bktfree(bkt, p)
--- a/lib/std/bld.sub
+++ b/lib/std/bld.sub
@@ -62,6 +62,11 @@
 	utf.myr
 	varargs.myr
 
+	# asm optimizations
+	memops.myr
+	memops-impl.myr
+	memops-impl+posixy-x64.s
+
 	# platform specific files
 	env+plan9.myr
 	env+posixy.myr
--- /dev/null
+++ b/lib/std/memops-impl+posixy-x64.s
@@ -1,0 +1,50 @@
+/*
+std.memblit	: (dst : byte#, src : byte#, len : std.size -> void)
+std.memfill	: (dst : byte#, val : byte, len : std.size -> void)
+*/
+.globl _std$memblit
+.globl std$memblit
+_std$memblit:
+std$memblit:
+	cmpq	%rdi,%rsi
+	jz	.done
+	jg	.fwdcpy
+	movq	%rsi,%rax
+	subq	%rdi,%rax
+	cmpq	%rax,%rcx
+	jg	.revcpy
+.fwdcpy:
+	movq	%rdx,%rcx
+	shrq	$3,%rcx
+	rep  movsq
+	movq	%rdx,%rcx
+	andq	$7,%rcx
+	rep  movsb
+	jmp	.done
+.revcpy:
+	std
+	movq	%rdx,%rcx
+	leaq	-1(%rdx,%rsi),%rsi
+	leaq	-1(%rdx,%rdi),%rdi
+	rep movsb
+	cld
+.done:
+	ret
+
+.globl _std$memfill
+.globl std$memfill
+_std$memfill:
+std$memfill:
+	/* generate 8 bytes of fill */
+	movzbq	%sil,%rbx
+	mov	$0x101010101010101,%rax
+	imul	%rbx,%rax
+
+	/* and fill */
+	movq	%rdx,%rcx
+	shrq	$3,%rcx
+	rep stosq
+	movq	%rdx,%rcx
+	andq	$7,%rcx
+	rep stosb 
+	ret
--- /dev/null
+++ b/lib/std/memops-impl.myr
@@ -1,0 +1,39 @@
+use "types.use"
+
+pkg std =
+	pkglocal const memblit	: (dst : byte#, src : byte#, len : std.size -> void)
+	pkglocal const memfill	: (dst : byte#, val : byte, len : std.size -> void)
+;;
+
+
+const memblit = {dst, src, len
+	var sa, da
+	var s, d
+
+	da = dst castto(intptr)
+	sa = src castto(intptr)
+	d = dst[:len]
+	s = src[:len]
+
+	if da == sa
+		->
+	elif da < sa
+		for var i = 0; i < d.len; i++
+			d[i] = s[i]
+		;;
+	else
+		for var i = d.len; i > 0; i--
+			d[i - 1] = s[i - 1]
+		;;
+	;;
+}
+
+const memfill = {dst, val, len
+	var d
+
+	d = dst[:len]
+	for var i = 0; i < d.len; i++
+		d[i] = val
+	;;
+}
+
--- /dev/null
+++ b/lib/std/memops.myr
@@ -1,0 +1,7 @@
+use "types.use"
+
+pkg std =
+	pkglocal extern const memblit	: (src : byte#, dst : byte#, len : std.size -> void)
+	pkglocal extern const memfill	: (src : byte#, val : byte, len : std.size -> void)
+;;
+
--- a/lib/std/slcp.myr
+++ b/lib/std/slcp.myr
@@ -1,5 +1,6 @@
 use "die.use"
 use "types.use"
+use "memops.use"
 
 pkg std =
 	generic slcp : (a : @a[:], b : @a[:] -> void)
@@ -6,20 +7,6 @@
 ;;
 
 generic slcp = {a : @a[:], b : @a[:]
-	var addr_a, addr_b
-
-	assert(a.len == b.len, "arguments to slcp() must be of equal length")
-
-	addr_a = a castto(@a#) castto(intptr)
-	addr_b = b castto(@a#) castto(intptr)
-	if addr_a <= addr_b
-		for var i = 0; i < a.len; i++
-			a[i] = b[i]
-		;;
-	else
-		for var i = a.len; i > 0; i--
-			a[i - 1] = b[i - 1]
-		;;
-	;;
-		
+	assert(a.len == b.len, "arguments to slcp() must be of equal length\n")
+	memblit(a castto(byte#), b castto(byte#), a.len * sizeof(@a))
 }
--- a/lib/std/test/slcp.myr
+++ b/lib/std/test/slcp.myr
@@ -9,6 +9,8 @@
 
 	std.slcp(a[:a.len-2], a[2:])
 	std.slcp(b[2:], b[:b.len-2])
+	std.put("a: {}, a_cped: {}\n", a[:], a_cped[:])
+	std.put("b: {}, b_cped: {}\n", b[:], b_cped[:])
 	std.assert(std.sleq(a[:], a_cped[:]), "slcp of a failed")
-	std.assert(std.sleq(b[:], b_cped[:]), "slcp of a failed")
+	std.assert(std.sleq(b[:], b_cped[:]), "slcp of b failed")
 }