ref: 18ef9556b71e3b6b839c35ae614ef0bb5b6a2179
parent: c204da0ff33a0d563d6c632b42799e4fbc48f402
author: Martin Storsjö <martin@martin.st>
date: Wed Feb 27 06:29:14 EST 2019
arm: looprestoration: Simplify a few padding cases in wiener_filter_h_neon
--- a/src/arm/32/looprestoration.S
+++ b/src/arm/32/looprestoration.S
@@ -283,14 +283,12 @@
.word 66f - L(variable_shift_tbl) + CONFIG_THUMB
.word 77f - L(variable_shift_tbl) + CONFIG_THUMB
+44: // 4 pixels valid in d2/d16, fill d3/d17 with padding.
+ vmov d3, d4
+ vmov d17, d18
+ b 88f
// Shift q1 right, shifting out invalid pixels,
// shift q1 left to the original offset, shifting in padding pixels.
-44: // 4 pixels valid
- vext.8 q1, q1, q1, #8
- vext.8 q1, q1, q2, #8
- vext.8 q8, q8, q8, #8
- vext.8 q8, q8, q9, #8
- b 88f
55: // 5 pixels valid
vext.8 q1, q1, q1, #10
vext.8 q1, q1, q2, #6
--- a/src/arm/64/looprestoration.S
+++ b/src/arm/64/looprestoration.S
@@ -224,14 +224,12 @@
mov v3.16b, v28.16b
mov v5.16b, v29.16b
br x11
+44: // 4 pixels valid in v2/v4, fill the high half with padding.
+ ins v2.d[1], v3.d[0]
+ ins v4.d[1], v5.d[0]
+ b 88f
// Shift v2 right, shifting out invalid pixels,
// shift v2 left to the original offset, shifting in padding pixels.
-44: // 4 pixels valid
- ext v2.16b, v2.16b, v2.16b, #8
- ext v2.16b, v2.16b, v3.16b, #8
- ext v4.16b, v4.16b, v4.16b, #8
- ext v4.16b, v4.16b, v5.16b, #8
- b 88f
55: // 5 pixels valid
ext v2.16b, v2.16b, v2.16b, #10
ext v2.16b, v2.16b, v3.16b, #6
@@ -238,17 +236,13 @@
ext v4.16b, v4.16b, v4.16b, #10
ext v4.16b, v4.16b, v5.16b, #6
b 88f
-66: // 6 pixels valid
- ext v2.16b, v2.16b, v2.16b, #12
- ext v2.16b, v2.16b, v3.16b, #4
- ext v4.16b, v4.16b, v4.16b, #12
- ext v4.16b, v4.16b, v5.16b, #4
+66: // 6 pixels valid, fill the upper 2 pixels with padding.
+ ins v2.s[3], v3.s[0]
+ ins v4.s[3], v5.s[0]
b 88f
-77: // 7 pixels valid
- ext v2.16b, v2.16b, v2.16b, #14
- ext v2.16b, v2.16b, v3.16b, #2
- ext v4.16b, v4.16b, v4.16b, #14
- ext v4.16b, v4.16b, v5.16b, #2
+77: // 7 pixels valid, fill the last pixel with padding.
+ ins v2.h[7], v3.h[0]
+ ins v4.h[7], v5.h[0]
b 88f
L(variable_shift_tbl):