shithub: openh264

Download patch

ref: cde30c155b7a8c1f756ba805e6bd8a0e48992faa
parent: 43bc9d7c07854fc6d5846dbe89a2d8dac4890bf8
author: Martin Storsjö <martin@martin.st>
date: Fri Mar 14 17:39:13 EDT 2014

Avoid clobbering the registers q4-q7 in DeblockingBSCalcEnc_neon

Remap q5 to q8, q6 to q9, q7 to q10 and q8 to q11, and push
q4 to the stack.

This was missed previously since the codec unittest doesn't
test encoding with loop filter enabled yet.

--- a/codec/common/deblocking_neon.S
+++ b/codec/common/deblocking_neon.S
@@ -860,24 +860,24 @@
 
 .macro BS_COMPARE_MV //in: $0,$1(const),$2(const),$3(const),$4(const); out:$5, $6
     mov       r6, #4
-    vabd.s16  q5, $0, $1
-    vabd.s16  q6, $1, $2
+    vabd.s16  q8, $0, $1
+    vabd.s16  q9, $1, $2
 	vdup.s16  $0, r6
-    vabd.s16  q7, $2, $3
-    vabd.s16  q8, $3, $4
+    vabd.s16  q10, $2, $3
+    vabd.s16  q11, $3, $4
 
-    vcge.s16  q5, $0
-    vcge.s16  q6, $0
-    vcge.s16  q7, $0
     vcge.s16  q8, $0
+    vcge.s16  q9, $0
+    vcge.s16  q10, $0
+    vcge.s16  q11, $0
 
-	vpadd.i16 d10, d10, d11
-    vpadd.i16 d11, d12, d13
-    vpadd.i16 d12, d14, d15
-    vpadd.i16 d13, d16, d17
+	vpadd.i16 d16, d16, d17
+    vpadd.i16 d17, d18, d19
+    vpadd.i16 d18, d20, d21
+    vpadd.i16 d19, d22, d23
 
-    vaddhn.i16  $5, q5, q5
-    vaddhn.i16  $6, q6, q6
+    vaddhn.i16  $5, q8, q8
+    vaddhn.i16  $6, q9, q9
 .endm
 
 .macro BS_MV_CHECK
@@ -953,24 +953,24 @@
 
 .macro BS_COMPARE_MV  arg0, arg1, arg2, arg3, arg4, arg5, arg6 //in: $0,$1(const),$2(const),$3(const),$4(const); out:$5, $6
     mov       r6, #4
-    vabd.s16  q5, \arg0, \arg1
-    vabd.s16  q6, \arg1, \arg2
+    vabd.s16  q8, \arg0, \arg1
+    vabd.s16  q9, \arg1, \arg2
     vdup.s16  \arg0, r6
-    vabd.s16  q7, \arg2, \arg3
-    vabd.s16  q8, \arg3, \arg4
+    vabd.s16  q10, \arg2, \arg3
+    vabd.s16  q11, \arg3, \arg4
 
-    vcge.s16  q5, \arg0
-    vcge.s16  q6, \arg0
-    vcge.s16  q7, \arg0
     vcge.s16  q8, \arg0
+    vcge.s16  q9, \arg0
+    vcge.s16  q10, \arg0
+    vcge.s16  q11, \arg0
 
-    vpadd.i16 d10, d10, d11
-    vpadd.i16 d11, d12, d13
-    vpadd.i16 d12, d14, d15
-    vpadd.i16 d13, d16, d17
+    vpadd.i16 d16, d16, d17
+    vpadd.i16 d17, d18, d19
+    vpadd.i16 d18, d20, d21
+    vpadd.i16 d19, d22, d23
 
-    vaddhn.i16  \arg5, q5, q5
-    vaddhn.i16  \arg6, q6, q6
+    vaddhn.i16  \arg5, q8, q8
+    vaddhn.i16  \arg6, q9, q9
 .endm
 
 .macro BS_MV_CHECK  arg0, arg1, arg2, arg3, arg4, arg5, arg6
@@ -1013,8 +1013,9 @@
 WELS_ASM_FUNC_BEGIN DeblockingBSCalcEnc_neon
 
 	stmdb sp!, {r5-r7}
+	vpush {q4}
 
-	ldr  r5, [sp, #12]	//Save BS to r5
+	ldr  r5, [sp, #28]	//Save BS to r5
 
 	/* Checking the nzc status */
 	BS_NZC_CHECK r0, r2, r3, q14, q15 //q14,q15 save the nzc status
@@ -1045,6 +1046,7 @@
 
 	//vstm r5, {q0, q1}
     vst1.32 {q0, q1}, [r5]
+	vpop {q4}
 	ldmia sp!, {r5-r7}
 WELS_ASM_FUNC_END
 #endif