shithub: openh264

Download patch

ref: cf6ae234130ce180688eb20299078765bb7dc55c
parent: 894f073e4b64d66c9825c245b6f2c5da10494056
author: Martin Storsjö <martin@martin.st>
date: Fri Jun 27 19:43:08 EDT 2014

Prefer modes in the same order as the reference, in the aarch64 combined intra satd/sad

This fixes encoding tests on aarch64.

--- a/codec/encoder/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S
+++ b/codec/encoder/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S
@@ -97,14 +97,22 @@
 #ifdef __APPLE__
 .macro SELECT_BEST_COST
     cmp     w1, $0
-    csel    $0, $0, w1, hs
-    cset    w7, lo
+    csel    $0, $0, w1, $2
+    cset    w7, $1
     cmp     w2, $0
     mov     w6, #2
-    csel    $0, $0, w2, hs
-    csel    w7, w7, w6, hs
+    csel    $0, $0, w2, $2
+    csel    w7, w7, w6, $2
 .endm
 
+.macro SELECT_BEST_COST_PREFER_HIGHER arg0
+    SELECT_BEST_COST \arg0, ls, hi
+.endm
+
+.macro SELECT_BEST_COST_PREFER_LOWER arg0
+    SELECT_BEST_COST \arg0, lo, hs
+.endm
+
 .macro LOAD_CHROMA_DATA
     sub     x9, $0, x1
     ld1     {$1}, [x9]      //top_cb
@@ -173,16 +181,24 @@
     add     $7, $7, v4.4s
 .endm
 #else
-.macro SELECT_BEST_COST arg0
+.macro SELECT_BEST_COST arg0, arg1, arg2
     cmp     w1, \arg0
-    csel    \arg0, \arg0, w1, hs
-    cset    w7, lo
+    csel    \arg0, \arg0, w1, \arg2
+    cset    w7, \arg1
     cmp     w2, \arg0
     mov     w6, #2
-    csel    \arg0, \arg0, w2, hs
-    csel    w7, w7, w6, hs
+    csel    \arg0, \arg0, w2, \arg2
+    csel    w7, w7, w6, \arg2
 .endm
 
+.macro SELECT_BEST_COST_PREFER_HIGHER arg0
+    SELECT_BEST_COST \arg0, ls, hi
+.endm
+
+.macro SELECT_BEST_COST_PREFER_LOWER arg0
+    SELECT_BEST_COST \arg0, lo, hs
+.endm
+
 .macro LOAD_CHROMA_DATA arg0, arg1, arg2
     sub     x9, \arg0, x1
     ld1     {\arg1}, [x9]      //top_cb
@@ -347,7 +363,7 @@
     saddlv  s31, v31.8h
     fmov    w0, s31
 
-    SELECT_BEST_COST w0
+    SELECT_BEST_COST_PREFER_HIGHER w0
 
     str     w7, [x4]
 WELS_ASM_ARCH64_FUNC_END
@@ -399,7 +415,7 @@
     fmov    w2, s31
     add     w2, w2, w5, lsl #1
 
-    SELECT_BEST_COST w0
+    SELECT_BEST_COST_PREFER_LOWER w0
 
     str     w7, [x4]
 WELS_ASM_ARCH64_FUNC_END
@@ -464,7 +480,7 @@
     add     w2, w2, w6
 
     mov     w10, w0
-    SELECT_BEST_COST w10
+    SELECT_BEST_COST_PREFER_HIGHER w10
 
     str     w7, [x5]
 
@@ -579,7 +595,7 @@
     addv    s31, v31.4s
     fmov    w0, s31
 
-    SELECT_BEST_COST w0
+    SELECT_BEST_COST_PREFER_HIGHER w0
 
     str     w7, [x4]
 WELS_ASM_ARCH64_FUNC_END
@@ -656,7 +672,7 @@
     fmov    w2, s31
     add     w2, w2, w5, lsl #1
 
-    SELECT_BEST_COST w0
+    SELECT_BEST_COST_PREFER_LOWER w0
 
     str     w7, [x4]