shithub: openh264

Download patch

ref: 86b3a999d548ae9d544029d262b56ad0b30132f0
parent: 3a1cc63649759469ebbec91da3aaa21719b9a96f
author: Martin Storsjö <martin@martin.st>
date: Tue Jul 8 05:29:23 EDT 2014

Use mov.16b instead of mov.8h

According to the arm architecture reference manual, the mov (vector)
instruction can only use the arrangement specifiers '8b' and '16b'.
The apple tools still accept the '8h' form, but it assembles into the
same as '16b'. (When copying a vector register to another, the element
size in the vectors don't matter.)

This fixes building with gnu binutils.

--- a/codec/encoder/core/arm64/reconstruct_aarch64_neon.S
+++ b/codec/encoder/core/arm64/reconstruct_aarch64_neon.S
@@ -73,7 +73,7 @@
     cmgt    $4.8h, $0.8h, #0      // if true, location of coef == 11111111
     bif     $3.16b, $1.16b, $4.16b      // if (x<0) reserved part; else keep 0 untouched
     shl     $3.8h, $3.8h, #1
-    mov.8h   $6, $1
+    mov.16b $6, $1
     sub     $1.8h, $1.8h, $3.8h      // if x > 0, -= 0; else x-= 2x
 //  }
 .endm
@@ -315,7 +315,7 @@
 cmgt    \arg4\().8h, \arg0\().8h, #0      // if true, location of coef == 11111111
 bif     \arg3\().16b, \arg1\().16b, \arg4\().16b      // if (x<0) reserved part; else keep 0 untouched
 shl     \arg3\().8h, \arg3\().8h, #1
-mov     \arg6\().8h, \arg1\().8h
+mov     \arg6\().16b, \arg1\().16b
 sub     \arg1\().8h, \arg1\().8h, \arg3\().8h      // if x > 0, -= 0; else x-= 2x
 //  }
 .endm
@@ -533,7 +533,7 @@
     ld1     {v2.8h}, [x1]
     ld1     {v0.8h, v1.8h}, [x0]
     ld1     {v3.8h}, [x2]
-    mov.8h  v4, v2
+    mov.16b v4, v2
     NEWQUANT_COEF_EACH_16BITS   v0, v2, v3, v5, v6, v7
     st1     {v2.8h}, [x0], #16
     NEWQUANT_COEF_EACH_16BITS   v1, v4, v3, v5, v6, v7
@@ -545,7 +545,7 @@
     ld1     {v0.8h, v1.8h}, [x0]
     dup     v2.8h, w1      // even ff range [0, 768]
     dup     v3.8h, w2
-    mov.8h  v4, v2
+    mov.16b v4, v2
     NEWQUANT_COEF_EACH_16BITS   v0, v2, v3, v5, v6, v7
     st1     {v2.8h}, [x0], #16
     NEWQUANT_COEF_EACH_16BITS   v1, v4, v3, v5, v6, v7
@@ -559,10 +559,10 @@
 
 .rept 4
     ld1     {v0.8h, v1.8h}, [x0], #32
-    mov.8h  v4, v2
+    mov.16b v4, v2
     NEWQUANT_COEF_EACH_16BITS   v0, v4, v3, v5, v6, v7
     st1     {v4.8h}, [x1], #16
-    mov.8h  v4, v2
+    mov.16b v4, v2
     NEWQUANT_COEF_EACH_16BITS   v1, v4, v3, v5, v6, v7
     st1     {v4.8h}, [x1], #16
 .endr
@@ -575,18 +575,18 @@
     mov     x1, x0
 
     ld1     {v0.8h, v1.8h}, [x0], #32
-    mov.8h  v4, v2
+    mov.16b v4, v2
     NEWQUANT_COEF_EACH_16BITS_MAX   v0, v4, v3, v5, v6, v7, v16
     st1     {v4.8h}, [x1], #16
-    mov.8h  v4, v2
+    mov.16b v4, v2
     NEWQUANT_COEF_EACH_16BITS_MAX   v1, v4, v3, v5, v6, v7, v17
     st1     {v4.8h}, [x1], #16   // then 1st 16 elem in v16  & v17
 
     ld1     {v0.8h, v1.8h}, [x0], #32
-    mov.8h  v4, v2
+    mov.16b v4, v2
     NEWQUANT_COEF_EACH_16BITS_MAX   v0, v4, v3, v5, v6, v7, v18
     st1     {v4.8h}, [x1], #16
-    mov.8h  v4, v2
+    mov.16b v4, v2
     NEWQUANT_COEF_EACH_16BITS_MAX   v1, v4, v3, v5, v6, v7, v19
     st1     {v4.8h}, [x1], #16   // then 2st 16 elem in v18 & v19
 
@@ -593,18 +593,18 @@
     SELECT_MAX_IN_ABS_COEF  v16, v17, v18, v19, h20, h21
 
     ld1     {v0.8h, v1.8h}, [x0], #32
-    mov.8h  v4, v2
+    mov.16b v4, v2
     NEWQUANT_COEF_EACH_16BITS_MAX   v0, v4, v3, v5, v6, v7, v16
     st1     {v4.8h}, [x1], #16
-    mov.8h  v4, v2
+    mov.16b v4, v2
     NEWQUANT_COEF_EACH_16BITS_MAX   v1, v4, v3, v5, v6, v7, v17
     st1     {v4.8h}, [x1], #16   // then 1st 16 elem in v16  & v17
 
     ld1     {v0.8h, v1.8h}, [x0], #32
-    mov.8h  v4, v2
+    mov.16b v4, v2
     NEWQUANT_COEF_EACH_16BITS_MAX   v0, v4, v3, v5, v6, v7, v18
     st1     {v4.8h}, [x1], #16
-    mov.8h  v4, v2
+    mov.16b v4, v2
     NEWQUANT_COEF_EACH_16BITS_MAX   v1, v4, v3, v5, v6, v7, v19
     st1     {v4.8h}, [x1], #16   // then 2st 16 elem in v18 & v19
 
@@ -944,4 +944,4 @@
     st1       {v3.16b}, [x0], x1
 .endr
 WELS_ASM_AARCH64_FUNC_END
-#endif
\ No newline at end of file
+#endif