shithub: openh264

Download patch

ref: 218adc7e29c346614033fbe94006dc47d26781bc
parent: 3567a2e316b81b32cd8036110ff16a149512986f
author: dongzhang <dongzha@cisco.com>
date: Fri May 9 09:50:55 EDT 2014

Fix a bug in deblocking for neon 32 bit arm implementation

--- a/codec/common/arm/deblocking_neon.S
+++ b/codec/common/arm/deblocking_neon.S
@@ -58,13 +58,16 @@
 
 
 .macro	DIFF_LUMA_LT4_P1_Q1
-    vabd.u8	$9, $0, $2
-    vclt.u8	$9, $9, $4
+    vmov.i8 $9, #128
     vrhadd.u8	$8, $2, $3
     vhadd.u8	$8, $0, $8
-    vsub.s8	$8, $8, $1
+    vsub.s8	$8, $8, $9
+    vsub.s8	$9, $1, $9
+    vqsub.s8	$8, $8, $9
     vmax.s8	$8, $8, $5
     vmin.s8	$8, $8, $6
+    vabd.u8	$9, $0, $2
+    vclt.u8	$9, $9, $4
     vand.s8	$8, $8, $9
     vand.s8	$8, $8, $7
     vadd.u8	$8, $1, $8
@@ -76,7 +79,7 @@
     vsubl.u8	$6, $2, $1
     vshl.s16	$6, $6, #2
     vadd.s16	$5, $5, $6
-    vrshrn.s16		$4, $5, #3
+    vqrshrn.s16		$4, $5, #3
 .endm
 
 .macro	DIFF_LUMA_EQ4_P2P1P0
@@ -172,13 +175,16 @@
 .endm
 
 .macro	DIFF_LUMA_LT4_P1_Q1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9
-    vabd.u8	\arg9, \arg0, \arg2
-    vclt.u8	\arg9, \arg9, \arg4
+    vmov.i8 \arg9, #128
     vrhadd.u8	\arg8, \arg2, \arg3
     vhadd.u8	\arg8, \arg0, \arg8
-    vsub.s8	\arg8, \arg8, \arg1
+    vsub.s8	\arg8, \arg8, \arg9
+    vsub.s8	\arg9, \arg1, \arg9
+    vqsub.s8    \arg8, \arg8, \arg9
     vmax.s8	\arg8, \arg8, \arg5
     vmin.s8	\arg8, \arg8, \arg6
+    vabd.u8	\arg9, \arg0, \arg2
+    vclt.u8	\arg9, \arg9, \arg4
     vand.s8	\arg8, \arg8, \arg9
     vand.s8	\arg8, \arg8, \arg7
     vadd.u8	\arg8, \arg1, \arg8
@@ -190,7 +196,7 @@
     vsubl.u8	\arg6, \arg2, \arg1
     vshl.s16	\arg6, \arg6, #2
     vadd.s16	\arg5, \arg5, \arg6
-    vrshrn.s16		\arg4, \arg5, #3
+    vqrshrn.s16		\arg4, \arg5, #3
 .endm