ref: b252334a3a5c76cfc2e6d4e2a57f08d3dc551766
parent: 78d27b7d1c923f632bc266470436e7f46a940d70
author: Martin Storsjö <martin@martin.st>
date: Wed Dec 9 09:06:47 EST 2020
arm: loopfilter: Compare L != 0 before doing a splat
--- a/src/arm/32/loopfilter.S
+++ b/src/arm/32/loopfilter.S
@@ -783,11 +783,11 @@
vld1.8 {d6[]}, [r5] // sharp[1]
sub r5, r5, #8
vbif d1, d0, d3 // if (!l[0][0]) L = l[offset][0]
+ vtst.32 d2, d1, d2 // L != 0
vmul.i32 d1, d1, d4 // L
.ifc \type, y
vdup.32 d15, r2 // vmask[2]
.endif
- vtst.32 d2, d1, d2 // L != 0
vdup.32 d14, r7 // vmask[1]
vmov r10, r11, d2
orrs r10, r10, r11
--- a/src/arm/64/loopfilter.S
+++ b/src/arm/64/loopfilter.S
@@ -1034,11 +1034,11 @@
ld1r {v6.16b}, [x5] // sharp[1]
sub x5, x5, #8
bif v1.16b, v0.16b, v3.16b // if (!l[0][0]) L = l[offset][0]
+ cmtst v2.4s, v1.4s, v2.4s // L != 0
mul v1.4s, v1.4s, v4.4s // L
.ifc \type, y
dup v15.4s, w2 // vmask[2]
.endif
- cmtst v2.4s, v1.4s, v2.4s // L != 0
dup v14.4s, w7 // vmask[1]
mov x16, v2.d[0]
mov x17, v2.d[1]
--- a/src/arm/64/loopfilter16.S
+++ b/src/arm/64/loopfilter16.S
@@ -808,11 +808,11 @@
ld1r {v6.8b}, [x5] // sharp[1]
sub x5, x5, #8
bif v1.8b, v0.8b, v3.8b // if (!l[0][0]) L = l[offset][0]
+ cmtst v2.2s, v1.2s, v2.2s // L != 0
mul v1.2s, v1.2s, v4.2s // L
.ifc \type, y
dup v15.2s, w2 // vmask[2]
.endif
- cmtst v2.2s, v1.2s, v2.2s // L != 0
dup v14.2s, w7 // vmask[1]
mov x16, v2.d[0]
cmp x16, #0