shithub: dav1d

Download patch

ref: a364126820f4d7d1786d79b08340149a38dc00a3
parent: 2eaabafce3b01fc291b570d54612260fd2c163b1
author: Martin Storsjö <martin@martin.st>
date: Tue Oct 8 05:40:17 EDT 2019

arm64: looprestoration: Use ld2r instead of ld1+dup+dup

--- a/src/arm/64/looprestoration.S
+++ b/src/arm/64/looprestoration.S
@@ -1894,12 +1894,12 @@
 //                               const int16_t wt[2]);
 function sgr_weighted2_neon, export=1
         ldr             x8,  [sp]
-        ld1             {v31.s}[0], [x8]
         cmp             x7,  #2
         add             x10, x0,  x1
         add             x11, x2,  x3
         add             x12, x4,  #2*FILTER_OUT_STRIDE
         add             x13, x5,  #2*FILTER_OUT_STRIDE
+        ld2r            {v30.8h, v31.8h}, [x8] // wt[0], wt[1]
         mov             x8,  #4*FILTER_OUT_STRIDE
         lsl             x1,  x1,  #1
         lsl             x3,  x3,  #1
@@ -1908,8 +1908,6 @@
         sub             x1,  x1,  x9
         sub             x3,  x3,  x9
         sub             x8,  x8,  x9, lsl #1
-        dup             v30.8h, v31.h[0] // wt[0]
-        dup             v31.8h, v31.h[1] // wt[1]
         mov             x9,  x6
         b.lt            2f
 1: