shithub: riscv

Download patch

ref: dbe0a995f03f26ea2b6859d21df3bd67856d672d
parent: 59d16c3900ebfdf0725ebab61e77499dfb7f86de
author: mischief <mischief@offblast.org>
date: Mon Aug 24 21:58:41 EDT 2015

libc: import more endianness fixes (thanks cherry9)

from https://bitbucket.org/cherry9/plan9-loongson/

--- /dev/null
+++ b/sys/src/libc/spim/memccpy.s
@@ -1,0 +1,20 @@
+TEXT	memccpy(SB), $0
+	MOVW	R1, 0(FP)
+	MOVW	n+12(FP), R1
+	BEQ	R1, ret
+	MOVW	s1+0(FP), R3
+	MOVW	s2+4(FP), R2
+	MOVBU	c+8(FP), R4
+	ADDU	R1, R2, R5
+
+l1:	MOVBU	(R2), R6
+	ADDU	$1, R2
+	MOVBU	R6, (R3)
+	ADDU	$1, R3
+	BEQ	R4, R6, eq
+	BNE	R2, R5, l1
+	MOVW	$0, R1
+	RET
+
+eq:	MOVW	R3, R1
+ret:	RET
--- /dev/null
+++ b/sys/src/libc/spim/memchr.s
@@ -1,0 +1,39 @@
+TEXT	memchr(SB), $0
+	MOVW	R1, 0(FP)
+
+	MOVW	n+8(FP), R1
+	MOVW	s1+0(FP), R2
+	MOVBU	c+4(FP), R3
+	ADDU	R1, R2, R6
+
+	AND	$(~1), R1, R5
+	ADDU	R2, R5
+	BEQ	R2, R5, lt2
+
+l1:
+	MOVBU	0(R2), R4
+	MOVBU	1(R2), R7
+	BEQ	R3, R4, eq0
+	ADDU	$2, R2
+	BEQ	R3, R7, eq
+	BNE	R2, R5, l1
+
+lt2:
+	BEQ	R2, R6, zret
+
+l2:
+	MOVBU	(R2), R4
+	ADDU	$1, R2
+	BEQ	R3, R4, eq
+	BNE	R2, R6, l2
+zret:
+	MOVW	R0, R1
+	RET
+
+eq0:
+	MOVW	R2, R1
+	RET
+
+eq:
+	SUBU	$1,R2, R1
+	RET
--- /dev/null
+++ b/sys/src/libc/spim/memcmp.s
@@ -1,0 +1,117 @@
+TEXT	memcmp(SB), $0
+	MOVW R1, 0(FP)
+
+/*
+ * performance:
+ *	alligned about 1.0us/call and 17.4mb/sec
+ *	unalligned is about 3.1mb/sec
+ */
+
+	MOVW	n+8(FP), R3		/* R3 is count */
+	MOVW	s1+0(FP), R4		/* R4 is pointer1 */
+	MOVW	s2+4(FP), R5		/* R5 is pointer2 */
+	ADDU	R3,R4, R6		/* R6 is end pointer1 */
+
+	/* TODO(mischief): fix multibyte copy */
+	JMP	out
+
+/*
+ * if not at least 4 chars,
+ * dont even mess around.
+ * 3 chars to guarantee any
+ * rounding up to a word
+ * boundary and 4 characters
+ * to get at least maybe one
+ * full word cmp.
+ */
+	SGT	$4,R3, R1
+	BNE	R1, out
+
+/*
+ * test if both pointers
+ * are similarly word alligned
+ */
+	XOR	R4,R5, R1
+	AND	$3, R1
+	BNE	R1, out
+
+/*
+ * byte at a time to word allign
+ */
+l1:
+	AND	$3,R4, R1
+	BEQ	R1, l2
+	MOVBU	0(R4), R8
+	MOVBU	0(R5), R9
+	ADDU	$1, R4
+	BNE	R8,R9, ne
+	ADDU	$1, R5
+	JMP	l1
+
+/*
+ * turn R3 into end pointer1-15
+ * cmp 16 at a time while theres room
+ */
+l2:
+	ADDU	$-15,R6, R3
+l3:
+	SGTU	R3,R4, R1
+	BEQ	R1, l4
+	MOVW	0(R4), R8
+	MOVW	0(R5), R9
+	MOVW	4(R4), R10
+	BNE	R8,R9, ne
+	MOVW	4(R5), R11
+	MOVW	8(R4), R8
+	BNE	R10,R11, ne1
+	MOVW	8(R5), R9
+	MOVW	12(R4), R10
+	BNE	R8,R9, ne
+	MOVW	12(R5), R11
+	ADDU	$16, R4
+	BNE	R10,R11, ne1
+	BNE	R8,R9, ne
+	ADDU	$16, R5
+	JMP	l3
+
+/*
+ * turn R3 into end pointer1-3
+ * cmp 4 at a time while theres room
+ */
+l4:
+	ADDU	$-3,R6, R3
+l5:
+	SGTU	R3,R4, R1
+	BEQ	R1, out
+	MOVW	0(R4), R8
+	MOVW	0(R5), R9
+	ADDU	$4, R4
+	BNE	R8,R9, ne	/* only works because big endian */
+	ADDU	$4, R5
+	JMP	l5
+
+/*
+ * last loop, cmp byte at a time
+ */
+out:
+	SGTU	R6,R4, R1
+	BEQ	R1, ret
+	MOVBU	0(R4), R8
+	MOVBU	0(R5), R9
+	ADDU	$1, R4
+	BNE	R8,R9, ne
+	ADDU	$1, R5
+	JMP	out
+
+ne1:
+	SGTU	R10,R11, R1
+	BNE	R1, ret
+	MOVW	$-1,R1
+	RET
+ne:
+	SGTU	R8,R9, R1
+	BNE	R1, ret
+	MOVW	$-1,R1
+ret:
+	RET
+	END
--- a/sys/src/libc/spim/mkfile
+++ b/sys/src/libc/spim/mkfile
@@ -9,13 +9,9 @@
 	getfcr.s\
 	main9.s\
 	main9p.s\
-	memccpy.s\
-	memchr.s\
-	memcmp.s\
 	memmove.s\
 	memset.s\
 	setjmp.s\
-	strchr.s\
 	strcmp.s\
 	strcpy.s\
 	tas.s\
@@ -26,6 +22,10 @@
 	notejmp.c\
 
 SFILES=\
+	memccpy.s\
+	memchr.s\
+	memcmp.s\
+	strchr.s\
 	vlop.s\
 
 CFILES=\
--- /dev/null
+++ b/sys/src/libc/spim/strchr.s
@@ -1,0 +1,63 @@
+TEXT	strchr(SB), $0
+	MOVW R1, 0(FP)
+	MOVB	c+4(FP), R4
+	MOVW	s+0(FP), R3
+
+	BEQ	R4, l2
+
+/*
+ * char is not null
+ */
+l1:
+	MOVB	(R3), R1
+	ADDU	$1, R3
+	BEQ	R1, ret
+	BNE	R1,R4, l1
+	JMP	rm1
+
+/*
+ * char is null
+ * align to word
+ */
+l2:
+	AND	$3,R3, R1
+	BEQ	R1, l3
+	MOVB	(R3), R1
+	ADDU	$1, R3
+	BNE	R1, l2
+	JMP	rm1
+
+l3:
+	MOVW	$0xff000000, R6
+	MOVW	$0x00ff0000, R7
+
+l4:
+	MOVW	(R3), R5
+	ADDU	$4, R3
+	AND	R6,R5, R1
+	AND	R7,R5, R2
+	BEQ	R1, b0
+	AND	$0xff00,R5, R1
+	BEQ	R2, b1
+	AND	$0xff,R5, R2
+	BEQ	R1, b2
+	BNE	R2, l4
+
+rm1:
+	ADDU	$-1,R3, R1
+	JMP	ret
+
+b2:
+	ADDU	$-2,R3, R1
+	JMP	ret
+
+b1:
+	ADDU	$-3,R3, R1
+	JMP	ret
+
+b0:
+	ADDU	$-4,R3, R1
+	JMP	ret
+
+ret:
+	RET