ref: 4e7fbabfc9f33f6ea5e5a46d5d6d7532d5f68301
parent: 1046d3e30be09d7f1f07bf0a49f0e32584e86874
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Wed May 8 22:32:09 EDT 2019
libc: provide arm64 assembly versions for memmove() and memset() just a first approximation, uses unaligned 8 byte loads and stores. MOVP not yet implemented by the linker. no ZVA cache magic yet.
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm64/memmove.s
@@ -1,0 +1,50 @@
+TEXT memcpy(SB), $-4
+TEXT memmove(SB), $-4
+ MOV from+8(FP), R1
+ MOVWU n+16(FP), R2
+
+ CMP R0, R1
+ BEQ _done
+ BLT _backward
+
+_forward:
+ ADD R0, R2, R3
+ BIC $7, R2, R4
+ CBZ R4, _floop1
+ ADD R0, R4, R4
+
+_floop8:
+ MOV (R1)8!, R5
+ MOV R5, (R0)8!
+ CMP R4, R0
+ BNE _floop8
+
+_floop1:
+ CMP R3, R0
+ BEQ _done
+ MOVBU (R1)1!, R5
+ MOVBU R5, (R0)1!
+ B _floop1
+
+_done:
+ RETURN
+
+_backward:
+ ADD R2, R1, R1
+ ADD R2, R0, R3
+ BIC $7, R2, R4
+ CBZ R4, _bloop1
+ SUB R4, R3, R4
+
+_bloop8:
+ MOV -8(R1)!, R5
+ MOV R5, -8(R3)!
+ CMP R4, R3
+ BNE _bloop8
+
+_bloop1:
+ CMP R0, R3
+ BEQ _done
+ MOVBU -1(R1)!, R5
+ MOVBU R5, -1(R3)!
+ B _bloop1
--- /dev/null
+++ b/sys/src/ape/lib/ap/arm64/memset.s
@@ -1,0 +1,27 @@
+TEXT memset(SB), $-4
+ MOVBU c+8(FP), R1
+ MOVWU n+16(FP), R2
+
+ ADD R0, R2, R3
+ BIC $7, R2, R4
+ CBZ R4, _loop1
+ ADD R0, R4, R4
+
+ ORR R1<<8, R1
+ ORR R1<<16, R1
+ ORR R1<<32, R1
+
+_loop8:
+ MOV R1, (R0)8!
+ CMP R4, R0
+ BNE _loop8
+
+_loop1:
+ CMP R3, R0
+ BEQ _done
+
+ MOVBU R1, (R0)1!
+ B _loop1
+
+_done:
+ RETURN
--- a/sys/src/ape/lib/ap/arm64/mkfile
+++ b/sys/src/ape/lib/ap/arm64/mkfile
@@ -6,6 +6,8 @@
getfcr.$O\
lock.$O\
main9.$O\
+ memmove.$O\
+ memset.$O\
notetramp.$O\
setjmp.$O\
tas.$O\
--- /dev/null
+++ b/sys/src/libc/arm64/memmove.s
@@ -1,0 +1,50 @@
+TEXT memcpy(SB), $-4
+TEXT memmove(SB), $-4
+ MOV from+8(FP), R1
+ MOVWU n+16(FP), R2
+
+ CMP R0, R1
+ BEQ _done
+ BLT _backward
+
+_forward:
+ ADD R0, R2, R3
+ BIC $7, R2, R4
+ CBZ R4, _floop1
+ ADD R0, R4, R4
+
+_floop8:
+ MOV (R1)8!, R5
+ MOV R5, (R0)8!
+ CMP R4, R0
+ BNE _floop8
+
+_floop1:
+ CMP R3, R0
+ BEQ _done
+ MOVBU (R1)1!, R5
+ MOVBU R5, (R0)1!
+ B _floop1
+
+_done:
+ RETURN
+
+_backward:
+ ADD R2, R1, R1
+ ADD R2, R0, R3
+ BIC $7, R2, R4
+ CBZ R4, _bloop1
+ SUB R4, R3, R4
+
+_bloop8:
+ MOV -8(R1)!, R5
+ MOV R5, -8(R3)!
+ CMP R4, R3
+ BNE _bloop8
+
+_bloop1:
+ CMP R0, R3
+ BEQ _done
+ MOVBU -1(R1)!, R5
+ MOVBU R5, -1(R3)!
+ B _bloop1
--- /dev/null
+++ b/sys/src/libc/arm64/memset.s
@@ -1,0 +1,27 @@
+TEXT memset(SB), $-4
+ MOVBU c+8(FP), R1
+ MOVWU n+16(FP), R2
+
+ ADD R0, R2, R3
+ BIC $7, R2, R4
+ CBZ R4, _loop1
+ ADD R0, R4, R4
+
+ ORR R1<<8, R1
+ ORR R1<<16, R1
+ ORR R1<<32, R1
+
+_loop8:
+ MOV R1, (R0)8!
+ CMP R4, R0
+ BNE _loop8
+
+_loop1:
+ CMP R3, R0
+ BEQ _done
+
+ MOVBU R1, (R0)1!
+ B _loop1
+
+_done:
+ RETURN
--- a/sys/src/libc/arm64/mkfile
+++ b/sys/src/libc/arm64/mkfile
@@ -11,6 +11,8 @@
getfcr.s\
main9.s\
main9p.s\
+ memmove.s\
+ memset.s\
setjmp.s\
tas.s\