ref: 1f83575018b39d12410407dc08bdc9c445504406
parent: bc26e300d1ef47040df247923c40491c0e31863d
author: Martin Storsjö <martin@martin.st>
date: Thu Oct 3 07:31:17 EDT 2019
arm64: cdef: Use loads with postincrement in more places in the padding function
--- a/src/arm/64/cdef.S
+++ b/src/arm/64/cdef.S
@@ -129,6 +129,14 @@
3:
.endm
+.macro load_n_incr dst, src, incr, w
+.if \w == 4
+ ld1 {\dst\().s}[0], [\src], \incr
+.else
+ ld1 {\dst\().8b}, [\src], \incr
+.endif
+.endm
+
// void dav1d_cdef_paddingX_neon(uint16_t *tmp, const pixel *src,
// ptrdiff_t src_stride, const pixel (*left)[2],
// /*const*/ pixel *const top[2], int h,
@@ -163,9 +171,8 @@
// CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
0:
ld1 {v0.h}[0], [x3], #2
- ldr \rn\()1, [x1]
ldr h2, [x1, #\w]
- add x1, x1, x2
+ load_n_incr v1, x1, x2, \w
subs w5, w5, #1
uxtl v0.8h, v0.8b
uxtl v1.8h, v1.8b
@@ -179,11 +186,7 @@
1:
// CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
ld1 {v0.h}[0], [x3], #2
-.if \w == 8
- ld1 {v1.8b}, [x1], x2
-.else
- ld1 {v1.s}[0], [x1], x2
-.endif
+ load_n_incr v1, x1, x2, \w
subs w5, w5, #1
uxtl v0.8h, v0.8b
uxtl v1.8h, v1.8b
@@ -198,9 +201,8 @@
b.eq 1f
// !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
0:
- ldr \rn\()0, [x1]
ldr h1, [x1, #\w]
- add x1, x1, x2
+ load_n_incr v0, x1, x2, \w
subs w5, w5, #1
uxtl v0.8h, v0.8b
uxtl v1.8h, v1.8b
@@ -212,11 +214,7 @@
b 3f
1:
// !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
-.if \w == 8
- ld1 {v0.8b}, [x1], x2
-.else
- ld1 {v0.s}[0], [x1], x2
-.endif
+ load_n_incr v0, x1, x2, \w
subs w5, w5, #1
uxtl v0.8h, v0.8b
str s31, [x0]