shithub: riscv

Download patch

ref: 4e7fbabfc9f33f6ea5e5a46d5d6d7532d5f68301
parent: 1046d3e30be09d7f1f07bf0a49f0e32584e86874
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Wed May 8 22:32:09 EDT 2019

libc: provide arm64 assembly versions for memmove() and memset()

just a first approximation, uses unaligned 8 byte loads and stores.
MOVP not yet implemented by the linker. no ZVA cache magic yet.

--- /dev/null
+++ b/sys/src/ape/lib/ap/arm64/memmove.s
@@ -1,0 +1,50 @@
+TEXT memcpy(SB), $-4
+TEXT memmove(SB), $-4
+	MOV	from+8(FP), R1
+	MOVWU	n+16(FP), R2
+
+	CMP	R0, R1
+	BEQ	_done
+	BLT	_backward
+
+_forward:
+	ADD	R0, R2, R3
+	BIC	$7, R2, R4
+	CBZ	R4, _floop1
+	ADD	R0, R4, R4
+
+_floop8:
+	MOV	(R1)8!, R5
+	MOV	R5, (R0)8!
+	CMP	R4, R0
+	BNE	_floop8
+
+_floop1:
+	CMP	R3, R0
+	BEQ	_done
+	MOVBU	(R1)1!, R5
+	MOVBU	R5, (R0)1!
+	B	_floop1
+
+_done:
+	RETURN
+
+_backward:
+	ADD	R2, R1, R1
+	ADD	R2, R0, R3
+	BIC	$7, R2, R4
+	CBZ	R4, _bloop1
+	SUB	R4, R3, R4
+
+_bloop8:
+	MOV	-8(R1)!, R5
+	MOV	R5, -8(R3)!
+	CMP	R4, R3
+	BNE	_bloop8
+
+_bloop1:
+	CMP	R0, R3
+	BEQ	_done
+	MOVBU	-1(R1)!, R5
+	MOVBU	R5, -1(R3)!
+	B	_bloop1
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm64/memset.s
@@ -1,0 +1,27 @@
+TEXT memset(SB), $-4
+	MOVBU	c+8(FP), R1
+	MOVWU	n+16(FP), R2
+
+	ADD	R0, R2, R3
+	BIC	$7, R2, R4
+	CBZ	R4, _loop1
+	ADD	R0, R4, R4
+
+	ORR	R1<<8, R1
+	ORR	R1<<16, R1
+	ORR	R1<<32, R1
+
+_loop8:
+	MOV	R1, (R0)8!
+	CMP	R4, R0
+	BNE	_loop8
+
+_loop1:
+	CMP	R3, R0
+	BEQ	_done
+
+	MOVBU	R1, (R0)1!
+	B	_loop1
+
+_done:
+	RETURN
--- a/sys/src/ape/lib/ap/arm64/mkfile
+++ b/sys/src/ape/lib/ap/arm64/mkfile
@@ -6,6 +6,8 @@
 	getfcr.$O\
 	lock.$O\
 	main9.$O\
+	memmove.$O\
+	memset.$O\
 	notetramp.$O\
 	setjmp.$O\
 	tas.$O\
--- /dev/null
+++ b/sys/src/libc/arm64/memmove.s
@@ -1,0 +1,50 @@
+TEXT memcpy(SB), $-4
+TEXT memmove(SB), $-4
+	MOV	from+8(FP), R1
+	MOVWU	n+16(FP), R2
+
+	CMP	R0, R1
+	BEQ	_done
+	BLT	_backward
+
+_forward:
+	ADD	R0, R2, R3
+	BIC	$7, R2, R4
+	CBZ	R4, _floop1
+	ADD	R0, R4, R4
+
+_floop8:
+	MOV	(R1)8!, R5
+	MOV	R5, (R0)8!
+	CMP	R4, R0
+	BNE	_floop8
+
+_floop1:
+	CMP	R3, R0
+	BEQ	_done
+	MOVBU	(R1)1!, R5
+	MOVBU	R5, (R0)1!
+	B	_floop1
+
+_done:
+	RETURN
+
+_backward:
+	ADD	R2, R1, R1
+	ADD	R2, R0, R3
+	BIC	$7, R2, R4
+	CBZ	R4, _bloop1
+	SUB	R4, R3, R4
+
+_bloop8:
+	MOV	-8(R1)!, R5
+	MOV	R5, -8(R3)!
+	CMP	R4, R3
+	BNE	_bloop8
+
+_bloop1:
+	CMP	R0, R3
+	BEQ	_done
+	MOVBU	-1(R1)!, R5
+	MOVBU	R5, -1(R3)!
+	B	_bloop1
--- /dev/null
+++ b/sys/src/libc/arm64/memset.s
@@ -1,0 +1,27 @@
+TEXT memset(SB), $-4
+	MOVBU	c+8(FP), R1
+	MOVWU	n+16(FP), R2
+
+	ADD	R0, R2, R3
+	BIC	$7, R2, R4
+	CBZ	R4, _loop1
+	ADD	R0, R4, R4
+
+	ORR	R1<<8, R1
+	ORR	R1<<16, R1
+	ORR	R1<<32, R1
+
+_loop8:
+	MOV	R1, (R0)8!
+	CMP	R4, R0
+	BNE	_loop8
+
+_loop1:
+	CMP	R3, R0
+	BEQ	_done
+
+	MOVBU	R1, (R0)1!
+	B	_loop1
+
+_done:
+	RETURN
--- a/sys/src/libc/arm64/mkfile
+++ b/sys/src/libc/arm64/mkfile
@@ -11,6 +11,8 @@
 	getfcr.s\
 	main9.s\
 	main9p.s\
+	memmove.s\
+	memset.s\
 	setjmp.s\
 	tas.s\