ref: 13fad75d96942a7c895d8f0f5e1577edbf7c0df4
parent: 3bfe8c7c8a553728e2d6556e4a95f5cd246d1c92
author: Martin Storsjö <martin@martin.st>
date: Sat Aug 29 09:29:07 EDT 2020
arm64: mc: Use more descriptive element specifiers for loads/stores in 16 bpc put_neon For loads of a half/full register, the actual size of the elements doesn't matter, but it makes the code more readable and understandable.
--- a/src/arm/64/mc16.S
+++ b/src/arm/64/mc16.S
@@ -1004,11 +1004,11 @@
b.gt 2b
ret
4:
- ld1 {v0.8b}, [x2], x3
- ld1 {v1.8b}, [x2], x3
+ ld1 {v0.4h}, [x2], x3
+ ld1 {v1.4h}, [x2], x3
subs w5, w5, #2
- st1 {v0.8b}, [x0], x1
- st1 {v1.8b}, [x0], x1
+ st1 {v0.4h}, [x0], x1
+ st1 {v1.4h}, [x0], x1
b.gt 4b
ret
80:
@@ -1017,11 +1017,11 @@
add x9, x2, x3
lsl x3, x3, #1
8:
- ld1 {v0.16b}, [x2], x3
- ld1 {v1.16b}, [x9], x3
+ ld1 {v0.8h}, [x2], x3
+ ld1 {v1.8h}, [x9], x3
subs w5, w5, #2
- st1 {v0.16b}, [x0], x1
- st1 {v1.16b}, [x8], x1
+ st1 {v0.8h}, [x0], x1
+ st1 {v1.8h}, [x8], x1
b.gt 8b
ret
16: