shithub: openh264

Download patch

ref: d15534ecb84bd0c3236d14053afcfb0fec78e354
parent: 8662a55bc3c1307b3f04e44ae2c9771b66229560
author: Martin Storsjö <martin@martin.st>
date: Tue Jun 17 06:00:07 EDT 2014

Get rid of mixed tabs and spaces in the aarch64 assembly

--- a/codec/common/arm64/deblocking_aarch64_neon.S
+++ b/codec/common/arm64/deblocking_aarch64_neon.S
@@ -36,7 +36,7 @@
 #include "arm_arch64_common_macro.S"
 #ifdef __APPLE__
 
-.macro	MASK_MATRIX
+.macro MASK_MATRIX
   uabd    $6.16b, $1.16b, $2.16b
   cmhi    $6.16b, $4.16b, $6.16b
 
@@ -49,9 +49,9 @@
   and     $6.16b, $6.16b, $4.16b
 .endm
 
-.macro	DIFF_LUMA_LT4_P1_Q1 //(Use Tmp v23, v24)
+.macro DIFF_LUMA_LT4_P1_Q1 //(Use Tmp v23, v24)
   //v0, v1, v2, v3, v17(beta), v18(-Tc0), v6(Tc0), v7(flag), v19, v20
-  urhadd	$8.16b, $2.16b, $3.16b
+  urhadd    $8.16b, $2.16b, $3.16b
   uhadd   $8.16b, $0.16b, $8.16b
   usubl   $9.8h, $8.8b, $1.8b
   sqxtn   $9.8b, $9.8h
@@ -59,9 +59,9 @@
   sqxtn2  $9.16b, $8.8h
   smax    $8.16b, $9.16b, $5.16b
 //
-  smin	$8.16b, $8.16b, $6.16b
-  uabd	$9.16b, $0.16b, $2.16b
-  cmhi	$9.16b, $4.16b, $9.16b
+  smin  $8.16b, $8.16b, $6.16b
+  uabd  $9.16b, $0.16b, $2.16b
+  cmhi  $9.16b, $4.16b, $9.16b
   and     $8.16b, $8.16b, $9.16b
   and     $8.16b, $8.16b, $7.16b
   add     $8.16b, $1.16b, $8.16b
@@ -68,29 +68,29 @@
   abs     $9.16b, $9.16b
 .endm
 
-.macro	DIFF_LUMA_LT4_P0_Q0_1
-  usubl	$5.8h, $0.8b, $3.8b
-  usubl	$6.8h, $2.8b, $1.8b
+.macro DIFF_LUMA_LT4_P0_Q0_1
+  usubl $5.8h, $0.8b, $3.8b
+  usubl $6.8h, $2.8b, $1.8b
   shl     $6.8h, $6.8h, #2
   add     $5.8h, $5.8h, $6.8h
   sqrshrn  $4.8b, $5.8h, #3
 .endm
 
-.macro	DIFF_LUMA_LT4_P0_Q0_2
-  usubl2	$5.8h, $0.16b, $3.16b
-  usubl2	$6.8h, $2.16b, $1.16b
+.macro DIFF_LUMA_LT4_P0_Q0_2
+  usubl2    $5.8h, $0.16b, $3.16b
+  usubl2    $6.8h, $2.16b, $1.16b
   shl     $6.8h, $6.8h, #2
   add     $5.8h, $5.8h, $6.8h
   sqrshrn2  $4.16b, $5.8h, #3
 .endm
 
-.macro	EXTRACT_DELTA_INTO_TWO_PART
-  cmge	$1.16b, $0.16b, #0
+.macro EXTRACT_DELTA_INTO_TWO_PART
+  cmge  $1.16b, $0.16b, #0
   and     $1.16b, $0.16b, $1.16b
   sub     $0.16b, $1.16b, $0.16b
 .endm
 
-.macro	DIFF_LUMA_EQ4_P2P1P0_1
+.macro DIFF_LUMA_EQ4_P2P1P0_1
   uaddl $8.8h, $1.8b, $2.8b
   uaddl $9.8h, $3.8b, $4.8b
   add   $9.8h, $9.8h, $8.8h
@@ -99,22 +99,22 @@
   shl   $8.8h, $8.8h, #1
   add   $8.8h, $9.8h, $8.8h
 
-  rshrn	$0.8b, $9.8h, #2
-  rshrn	$7.8b, $8.8h, #3
+  rshrn $0.8b, $9.8h, #2
+  rshrn $7.8b, $8.8h, #3
   shl     $9.8h, $9.8h, #1
   usubl   $8.8h, $5.8b, $1.8b
   add     $9.8h, $8.8h, $9.8h
 
-  uaddl	$8.8h, $2.8b, $5.8b
-  uaddw	$8.8h, $8.8h, $2.8b
-  uaddw	$8.8h, $8.8h, $3.8b
+  uaddl $8.8h, $2.8b, $5.8b
+  uaddw $8.8h, $8.8h, $2.8b
+  uaddw $8.8h, $8.8h, $3.8b
 
-  rshrn	$9.8b, $9.8h, #3
-  rshrn	$8.8b, $8.8h, #2
-  bsl		$6.8b, $9.8b, $8.8b
+  rshrn $9.8b, $9.8h, #3
+  rshrn $8.8b, $8.8h, #2
+  bsl       $6.8b, $9.8b, $8.8b
 .endm
 
-.macro	DIFF_LUMA_EQ4_P2P1P0_2
+.macro DIFF_LUMA_EQ4_P2P1P0_2
   uaddl2 $8.8h, $1.16b, $2.16b
   uaddl2 $9.8h, $3.16b, $4.16b
   add   $9.8h, $9.8h, $8.8h
@@ -123,23 +123,23 @@
   shl   $8.8h, $8.8h, #1
   add   $8.8h, $9.8h, $8.8h
 
-  rshrn2	$0.16b, $9.8h, #2
-  rshrn2	$7.16b, $8.8h, #3
+  rshrn2    $0.16b, $9.8h, #2
+  rshrn2    $7.16b, $8.8h, #3
   shl     $9.8h, $9.8h, #1
   usubl2   $8.8h, $5.16b, $1.16b
   add     $9.8h, $8.8h, $9.8h
 
-  uaddl2	$8.8h, $2.16b, $5.16b
-  uaddw2	$8.8h, $8.8h, $2.16b
-  uaddw2	$8.8h, $8.8h, $3.16b
+  uaddl2    $8.8h, $2.16b, $5.16b
+  uaddw2    $8.8h, $8.8h, $2.16b
+  uaddw2    $8.8h, $8.8h, $3.16b
 
-  rshrn2	$9.16b, $9.8h, #3
-  rshrn2	$8.16b, $8.8h, #2
-  bsl		$6.16b, $9.16b, $8.16b
+  rshrn2    $9.16b, $9.8h, #3
+  rshrn2    $8.16b, $8.8h, #2
+  bsl       $6.16b, $9.16b, $8.16b
 .endm
 
 
-.macro	DIFF_CHROMA_EQ4_P0Q0_1
+.macro DIFF_CHROMA_EQ4_P0Q0_1
   uaddl $4.8h, $0.8b, $3.8b
   shl   $4.8h, $4.8h, #1
   usubl $5.8h, $1.8b, $3.8b
@@ -150,7 +150,7 @@
   rshrn $7.8b, $5.8h, #2
 .endm
 
-.macro	DIFF_CHROMA_EQ4_P0Q0_2
+.macro DIFF_CHROMA_EQ4_P0Q0_2
   uaddl2 $4.8h, $0.16b, $3.16b
   shl   $4.8h, $4.8h, #1
   usubl2 $5.8h, $1.16b, $3.16b
@@ -161,40 +161,40 @@
   rshrn2 $7.16b, $5.8h, #2
 .endm
 
-.macro	DIFF_LUMA_EQ4_MASK
-  mov.16b	$3, $2
-  bsl	$3.16b, $0.16b, $1.16b
+.macro DIFF_LUMA_EQ4_MASK
+  mov.16b   $3, $2
+  bsl   $3.16b, $0.16b, $1.16b
 .endm
 
-.macro	LOAD_LUMA_DATA_3
-  ld3	{$0.b, $1.b, $2.b} [$6], [x2], x1
-  ld3	{$3.b, $4.b, $5.b} [$6], [x0], x1
+.macro LOAD_LUMA_DATA_3
+  ld3   {$0.b, $1.b, $2.b} [$6], [x2], x1
+  ld3   {$3.b, $4.b, $5.b} [$6], [x0], x1
 .endm
 
-.macro	LOAD_LUMA_DATA_4
-  ld4	{$0.b, $1.b, $2.b, $3.b} [$8], [x3], x1
-  ld4	{$4.b, $5.b, $6.b, $7.b} [$8], [x0], x1
+.macro LOAD_LUMA_DATA_4
+  ld4   {$0.b, $1.b, $2.b, $3.b} [$8], [x3], x1
+  ld4   {$4.b, $5.b, $6.b, $7.b} [$8], [x0], x1
 .endm
 
-.macro	STORE_LUMA_DATA_4
-  st4	{$0.b, $1.b, $2.b, $3.b} [$4], [x0], x1
-  st4	{$0.b, $1.b, $2.b, $3.b} [$5], [x2], x1
+.macro STORE_LUMA_DATA_4
+  st4   {$0.b, $1.b, $2.b, $3.b} [$4], [x0], x1
+  st4   {$0.b, $1.b, $2.b, $3.b} [$5], [x2], x1
 .endm
 
-.macro	STORE_LUMA_DATA_3
+.macro STORE_LUMA_DATA_3
   st3 {$0.b, $1.b, $2.b} [$6], [x3], x1
-  st3	{$3.b, $4.b, $5.b} [$6], [x0], x1
+  st3   {$3.b, $4.b, $5.b} [$6], [x0], x1
 .endm
 
-.macro	LOAD_CHROMA_DATA_4
-  ld4	{$0.b, $1.b, $2.b, $3.b} [$5], [$4], x2
+.macro LOAD_CHROMA_DATA_4
+  ld4   {$0.b, $1.b, $2.b, $3.b} [$5], [$4], x2
 .endm
 
-.macro	STORE_CHROMA_DATA_2
-  st2	{$0.b, $1.b} [$3], [$2], x2
+.macro STORE_CHROMA_DATA_2
+  st2   {$0.b, $1.b} [$3], [$2], x2
 .endm
 
-.macro	ZERO_JUMP_END
+.macro ZERO_JUMP_END
   mov $1, $0.d[0]
   mov $2, $0.d[1]
   orr $1, $1, $2
@@ -294,7 +294,7 @@
 
 #else
 
-.macro	MASK_MATRIX arg0, arg1, arg2, arg3, arg4, arg5, arg6
+.macro MASK_MATRIX arg0, arg1, arg2, arg3, arg4, arg5, arg6
   uabd    \arg6.16b, \arg1.16b, \arg2.16b
   cmhi    \arg6.16b, \arg4.16b, \arg6.16b
 
@@ -307,9 +307,9 @@
   and     \arg6.16b, \arg6.16b, \arg4.16b
 .endm
 
-.macro	DIFF_LUMA_LT4_P1_Q1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9
+.macro DIFF_LUMA_LT4_P1_Q1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9
   //v0, v1, v2, v3, v17(beta), v18(-Tc0), v6(Tc0), v7(flag), v19, v20
-  urhadd	\arg8.16b, \arg2.16b, \arg3.16b
+  urhadd    \arg8.16b, \arg2.16b, \arg3.16b
   uhadd   \arg8.16b, \arg0.16b, \arg8.16b
   usubl   \arg9.8h, \arg8.8b, \arg1.8b
   sqxtn   \arg9.8b, \arg9.8h
@@ -317,9 +317,9 @@
   sqxtn2  \arg9.16b, \arg8.8h
   smax    \arg8.16b, \arg9.16b, \arg5.16b
   //
-  smin	\arg8.16b, \arg8.16b, \arg6.16b
-  uabd	\arg9.16b, \arg0.16b, \arg2.16b
-  cmhi	\arg9.16b, \arg4.16b, \arg9.16b
+  smin  \arg8.16b, \arg8.16b, \arg6.16b
+  uabd  \arg9.16b, \arg0.16b, \arg2.16b
+  cmhi  \arg9.16b, \arg4.16b, \arg9.16b
   and     \arg8.16b, \arg8.16b, \arg9.16b
   and     \arg8.16b, \arg8.16b, \arg7.16b
   add     \arg8.16b, \arg1.16b, \arg8.16b
@@ -326,29 +326,29 @@
   abs     \arg9.16b, \arg9.16b
 .endm
 
-.macro	DIFF_LUMA_LT4_P0_Q0_1 arg0, arg1, arg2, arg3, arg4, arg5, arg6
-  usubl	\arg5.8h, \arg0.8b, \arg3.8b
-  usubl	\arg6.8h, \arg2.8b, \arg1.8b
+.macro DIFF_LUMA_LT4_P0_Q0_1 arg0, arg1, arg2, arg3, arg4, arg5, arg6
+  usubl \arg5.8h, \arg0.8b, \arg3.8b
+  usubl \arg6.8h, \arg2.8b, \arg1.8b
   shl     \arg6.8h, \arg6.8h, #2
   add     \arg5.8h, \arg5.8h, \arg6.8h
   sqrshrn  \arg4.8b, \arg5.8h, #3
 .endm
 
-.macro	DIFF_LUMA_LT4_P0_Q0_2 arg0, arg1, arg2, arg3, arg4, arg5, arg6
-  usubl2	\arg5.8h, \arg0.16b, \arg3.16b
-  usubl2	\arg6.8h, \arg2.16b, \arg1.16b
+.macro DIFF_LUMA_LT4_P0_Q0_2 arg0, arg1, arg2, arg3, arg4, arg5, arg6
+  usubl2    \arg5.8h, \arg0.16b, \arg3.16b
+  usubl2    \arg6.8h, \arg2.16b, \arg1.16b
   shl     \arg6.8h, \arg6.8h, #2
   add     \arg5.8h, \arg5.8h, \arg6.8h
   sqrshrn2  \arg4.16b, \arg5.8h, #3
 .endm
 
-.macro	EXTRACT_DELTA_INTO_TWO_PART arg0, arg1
-  cmge	\arg1.16b, \arg0.16b, #0
+.macro EXTRACT_DELTA_INTO_TWO_PART arg0, arg1
+  cmge  \arg1.16b, \arg0.16b, #0
   and     \arg1.16b, \arg0.16b, \arg1.16b
   sub     \arg0.16b, \arg1.16b, \arg0.16b
 .endm
 
-.macro	DIFF_LUMA_EQ4_P2P1P0_1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9
+.macro DIFF_LUMA_EQ4_P2P1P0_1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9
   uaddl \arg8.8h, \arg1.8b, \arg2.8b
   uaddl \arg9.8h, \arg3.8b, \arg4.8b
   add   \arg9.8h, \arg9.8h, \arg8.8h
@@ -357,22 +357,22 @@
   shl   \arg8.8h, \arg8.8h, #1
   add   \arg8.8h, \arg9.8h, \arg8.8h
 
-  rshrn	\arg0.8b, \arg9.8h, #2
-  rshrn	\arg7.8b, \arg8.8h, #3
+  rshrn \arg0.8b, \arg9.8h, #2
+  rshrn \arg7.8b, \arg8.8h, #3
   shl     \arg9.8h, \arg9.8h, #1
   usubl   \arg8.8h, \arg5.8b, \arg1.8b
   add     \arg9.8h, \arg8.8h, \arg9.8h
 
-  uaddl	\arg8.8h, \arg2.8b, \arg5.8b
-  uaddw	\arg8.8h, \arg8.8h, \arg2.8b
-  uaddw	\arg8.8h, \arg8.8h, \arg3.8b
+  uaddl \arg8.8h, \arg2.8b, \arg5.8b
+  uaddw \arg8.8h, \arg8.8h, \arg2.8b
+  uaddw \arg8.8h, \arg8.8h, \arg3.8b
 
-  rshrn	\arg9.8b, \arg9.8h, #3
-  rshrn	\arg8.8b, \arg8.8h, #2
-  bsl		\arg6.8b, \arg9.8b, \arg8.8b
+  rshrn \arg9.8b, \arg9.8h, #3
+  rshrn \arg8.8b, \arg8.8h, #2
+  bsl       \arg6.8b, \arg9.8b, \arg8.8b
 .endm
 
-.macro	DIFF_LUMA_EQ4_P2P1P0_2 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9
+.macro DIFF_LUMA_EQ4_P2P1P0_2 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9
   uaddl2 \arg8.8h, \arg1.16b, \arg2.16b
   uaddl2 \arg9.8h, \arg3.16b, \arg4.16b
   add   \arg9.8h, \arg9.8h, \arg8.8h
@@ -381,23 +381,23 @@
   shl   \arg8.8h, \arg8.8h, #1
   add   \arg8.8h, \arg9.8h, \arg8.8h
 
-  rshrn2	\arg0.16b, \arg9.8h, #2
-  rshrn2	\arg7.16b, \arg8.8h, #3
+  rshrn2    \arg0.16b, \arg9.8h, #2
+  rshrn2    \arg7.16b, \arg8.8h, #3
   shl     \arg9.8h, \arg9.8h, #1
   usubl2   \arg8.8h, \arg5.16b, \arg1.16b
   add     \arg9.8h, \arg8.8h, \arg9.8h
 
-  uaddl2	\arg8.8h, \arg2.16b, \arg5.16b
-  uaddw2	\arg8.8h, \arg8.8h, \arg2.16b
-  uaddw2	\arg8.8h, \arg8.8h, \arg3.16b
+  uaddl2    \arg8.8h, \arg2.16b, \arg5.16b
+  uaddw2    \arg8.8h, \arg8.8h, \arg2.16b
+  uaddw2    \arg8.8h, \arg8.8h, \arg3.16b
 
-  rshrn2	\arg9.16b, \arg9.8h, #3
-  rshrn2	\arg8.16b, \arg8.8h, #2
-  bsl		\arg6.16b, \arg9.16b, \arg8.16b
+  rshrn2    \arg9.16b, \arg9.8h, #3
+  rshrn2    \arg8.16b, \arg8.8h, #2
+  bsl       \arg6.16b, \arg9.16b, \arg8.16b
 .endm
 
 
-.macro	DIFF_CHROMA_EQ4_P0Q0_1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7
+.macro DIFF_CHROMA_EQ4_P0Q0_1 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7
   uaddl \arg4.8h, \arg0.8b, \arg3.8b
   shl   \arg4.8h, \arg4.8h, #1
   usubl \arg5.8h, \arg1.8b, \arg3.8b
@@ -408,7 +408,7 @@
   rshrn \arg7.8b, \arg5.8h, #2
 .endm
 
-.macro	DIFF_CHROMA_EQ4_P0Q0_2  arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7
+.macro DIFF_CHROMA_EQ4_P0Q0_2  arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7
   uaddl2 \arg4.8h, \arg0.16b, \arg3.16b
   shl   \arg4.8h, \arg4.8h, #1
   usubl2 \arg5.8h, \arg1.16b, \arg3.16b
@@ -419,40 +419,40 @@
   rshrn2 \arg7.16b, \arg5.8h, #2
 .endm
 
-.macro	DIFF_LUMA_EQ4_MASK arg0, arg1, arg2, arg3
-  mov.16b	\arg3, \arg2
-  bsl	\arg3.16b, \arg0.16b, \arg1.16b
+.macro DIFF_LUMA_EQ4_MASK arg0, arg1, arg2, arg3
+  mov.16b   \arg3, \arg2
+  bsl   \arg3.16b, \arg0.16b, \arg1.16b
 .endm
 
-.macro	LOAD_LUMA_DATA_3 arg0, arg1, arg2, arg3, arg4, arg5, arg6
-  ld3	{\arg0.b, \arg1.b, \arg2.b} [\arg6], [x2], x1
-  ld3	{\arg3.b, \arg4.b, \arg5.b} [\arg6], [x0], x1
+.macro LOAD_LUMA_DATA_3 arg0, arg1, arg2, arg3, arg4, arg5, arg6
+  ld3   {\arg0.b, \arg1.b, \arg2.b} [\arg6], [x2], x1
+  ld3   {\arg3.b, \arg4.b, \arg5.b} [\arg6], [x0], x1
 .endm
 
-.macro	LOAD_LUMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8
-  ld4	{\arg0.b, \arg1.b, \arg2.b, \arg3.b} [\arg8], [x3], x1
-  ld4	{\arg4.b, \arg5.b, \arg6.b, \arg7.b} [\arg8], [x0], x1
+.macro LOAD_LUMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8
+  ld4   {\arg0.b, \arg1.b, \arg2.b, \arg3.b} [\arg8], [x3], x1
+  ld4   {\arg4.b, \arg5.b, \arg6.b, \arg7.b} [\arg8], [x0], x1
 .endm
 
-.macro	STORE_LUMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5
-  st4	{\arg0.b, \arg1.b, \arg2.b, \arg3.b} [\arg4], [x0], x1
-  st4	{\arg0.b, \arg1.b, \arg2.b, \arg3.b} [\arg5], [x2], x1
+.macro STORE_LUMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5
+  st4   {\arg0.b, \arg1.b, \arg2.b, \arg3.b} [\arg4], [x0], x1
+  st4   {\arg0.b, \arg1.b, \arg2.b, \arg3.b} [\arg5], [x2], x1
 .endm
 
-.macro	STORE_LUMA_DATA_3 arg0, arg1, arg2, arg3, arg4, arg5, arg6
+.macro STORE_LUMA_DATA_3 arg0, arg1, arg2, arg3, arg4, arg5, arg6
   st3   {\arg0.b, \arg1.b, \arg2.b} [\arg6], [x3], x1
-  st3	{\arg3.b, \arg4.b, \arg5.b} [\arg6], [x0], x1
+  st3   {\arg3.b, \arg4.b, \arg5.b} [\arg6], [x0], x1
 .endm
 
-.macro	LOAD_CHROMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5
-  ld4	{\arg0.b, \arg1.b, \arg2.b, \arg3.b} [\arg5], [\arg4], x2
+.macro LOAD_CHROMA_DATA_4 arg0, arg1, arg2, arg3, arg4, arg5
+  ld4   {\arg0.b, \arg1.b, \arg2.b, \arg3.b} [\arg5], [\arg4], x2
 .endm
 
-.macro	STORE_CHROMA_DATA_2 arg0, arg1, arg2, arg3
-  st2	{\arg0.b, \arg1.b} [\arg3], [\arg2], x2
+.macro STORE_CHROMA_DATA_2 arg0, arg1, arg2, arg3
+  st2   {\arg0.b, \arg1.b} [\arg3], [\arg2], x2
 .endm
 
-.macro	ZERO_JUMP_END arg0, arg1, arg2, arg3
+.macro ZERO_JUMP_END arg0, arg1, arg2, arg3
   mov \arg1, \arg0.d[0]
   mov \arg2, \arg0.d[1]
   orr \arg1, \arg1, \arg2
@@ -579,7 +579,7 @@
   ld1 {v3.16b}, [x0], x1
   ld1 {v4.16b}, [x0], x1
   ld1 {v5.16b}, [x0]
-  sub	x2, x2, x1
+  sub   x2, x2, x1
   ld4r {v18.8b, v19.8b, v20.8b, v21.8b}, [x4]
   trn1 v18.2s, v18.2s, v19.2s
   trn1 v20.2s, v20.2s, v21.2s
@@ -586,39 +586,39 @@
   trn1 v6.2d, v18.2d, v20.2d // iTc0: 0000, 1111, 2222, 3333
   cmge v7.16b, v6.16b, #0 // iTc0 Flag
 
-  MASK_MATRIX	v1, v2, v3, v4, v16, v17, v18
-  and	v7.16b, v7.16b, v18.16b // need filter flag
+  MASK_MATRIX   v1, v2, v3, v4, v16, v17, v18
+  and   v7.16b, v7.16b, v18.16b // need filter flag
 
   ZERO_JUMP_END v7, x3, x4, DeblockLumaLt4V_AArch64_neon_end
 
-  eor	v18.16b, v18.16b, v18.16b
+  eor   v18.16b, v18.16b, v18.16b
   sub v18.16b, v18.16b, v6.16b // -iTc0: 0000, 1111, 2222, 3333
 
-  DIFF_LUMA_LT4_P1_Q1	v0, v1, v2, v3, v17, v18, v6, v7, v19, v20
-  st1	{v19.16b}, [x2], x1
+  DIFF_LUMA_LT4_P1_Q1   v0, v1, v2, v3, v17, v18, v6, v7, v19, v20
+  st1   {v19.16b}, [x2], x1
 
-  DIFF_LUMA_LT4_P1_Q1	v5, v4, v3, v2, v17, v18, v6, v7, v21, v22
+  DIFF_LUMA_LT4_P1_Q1   v5, v4, v3, v2, v17, v18, v6, v7, v21, v22
 
-  abs	v20.16b, v20.16b
-  abs	v22.16b, v22.16b
-  add	v6.16b, v6.16b, v20.16b
-  add	v6.16b, v6.16b, v22.16b
-  eor	v18.16b, v18.16b, v18.16b
-  sub	v18.16b, v18.16b, v6.16b
+  abs   v20.16b, v20.16b
+  abs   v22.16b, v22.16b
+  add   v6.16b, v6.16b, v20.16b
+  add   v6.16b, v6.16b, v22.16b
+  eor   v18.16b, v18.16b, v18.16b
+  sub   v18.16b, v18.16b, v6.16b
 
-  DIFF_LUMA_LT4_P0_Q0_1	v1, v2, v3, v4, v19, v20, v22
-  DIFF_LUMA_LT4_P0_Q0_2	v1, v2, v3, v4, v19, v20, v22
+  DIFF_LUMA_LT4_P0_Q0_1 v1, v2, v3, v4, v19, v20, v22
+  DIFF_LUMA_LT4_P0_Q0_2 v1, v2, v3, v4, v19, v20, v22
 
-  smax	v19.16b, v19.16b, v18.16b
-  smin	v19.16b, v19.16b, v6.16b
+  smax  v19.16b, v19.16b, v18.16b
+  smin  v19.16b, v19.16b, v6.16b
   and     v19.16b, v19.16b, v7.16b
 
-  EXTRACT_DELTA_INTO_TWO_PART	v19, v20
-  uqadd	v2.16b, v2.16b, v20.16b
-  uqsub	v2.16b, v2.16b, v19.16b
+  EXTRACT_DELTA_INTO_TWO_PART   v19, v20
+  uqadd v2.16b, v2.16b, v20.16b
+  uqsub v2.16b, v2.16b, v19.16b
   st1     {v2.16b}, [x2], x1
-  uqsub	v3.16b, v3.16b, v20.16b
-  uqadd	v3.16b, v3.16b, v19.16b
+  uqsub v3.16b, v3.16b, v20.16b
+  uqadd v3.16b, v3.16b, v19.16b
   st1     {v3.16b}, [x2], x1
   st1     {v21.16b}, [x2]
 DeblockLumaLt4V_AArch64_neon_end:
@@ -640,19 +640,19 @@
   ld1     {v7.16b}, [x0]
 
   sub     x3, x3, x1, lsl #1
-  MASK_MATRIX	v2, v3, v4, v5, v16, v17, v18
-  lsr		w2, w2, #2
-  add		w2, w2, #2
+  MASK_MATRIX   v2, v3, v4, v5, v16, v17, v18
+  lsr       w2, w2, #2
+  add       w2, w2, #2
   dup     v16.16b, w2 //((alpha >> 2) + 2)
-  uabd	v19.16b, v3.16b, v4.16b
-  cmhi	v20.16b, v16.16b, v19.16b //iDetaP0Q0 < ((iAlpha >> 2) + 2)
+  uabd  v19.16b, v3.16b, v4.16b
+  cmhi  v20.16b, v16.16b, v19.16b //iDetaP0Q0 < ((iAlpha >> 2) + 2)
 
-  uabd 	v21.16b, v1.16b, v3.16b
-  cmhi	v21.16b, v17.16b, v21.16b //bDetaP2P0
+  uabd  v21.16b, v1.16b, v3.16b
+  cmhi  v21.16b, v17.16b, v21.16b //bDetaP2P0
   and     v21.16b, v21.16b, v20.16b //(iDetaP0Q0 < ((iAlpha >> 2) + 2))&&bDetaP2P0
 
-  uabd	v22.16b, v6.16b, v4.16b
-  cmhi	v22.16b, v17.16b, v22.16b //bDetaQ2Q0
+  uabd  v22.16b, v6.16b, v4.16b
+  cmhi  v22.16b, v17.16b, v22.16b //bDetaQ2Q0
   and     v22.16b, v22.16b, v20.16b //(iDetaP0Q0 < ((iAlpha >> 2) + 2))&&bDetaQ2Q0
   and     v20.16b, v20.16b, v18.16b //(iDetaP0Q0 < iAlpha) && bDetaP1P0 && bDetaQ1Q0&&(iDetaP0Q0 < ((iAlpha >> 2) + 2))
 
@@ -660,33 +660,33 @@
   mov.16b v24, v21
 
   mov.16b v25, v0
-  DIFF_LUMA_EQ4_P2P1P0_1		v0, v1, v2, v3, v4, v5, v23, v19, v17, v16
-  DIFF_LUMA_EQ4_P2P1P0_2		v25, v1, v2, v3, v4, v5, v24, v19, v17, v16
+  DIFF_LUMA_EQ4_P2P1P0_1        v0, v1, v2, v3, v4, v5, v23, v19, v17, v16
+  DIFF_LUMA_EQ4_P2P1P0_2        v25, v1, v2, v3, v4, v5, v24, v19, v17, v16
   ins v0.d[1], v25.d[1]
   ins v23.d[1], v24.d[1]
-  and	v21.16b, v20.16b, v21.16b
-  DIFF_LUMA_EQ4_MASK	v19, v1, v21, v17
-  st1	{v17.16b}, [x3], x1
-  DIFF_LUMA_EQ4_MASK	v0, v2, v21, v17
-  st1	{v17.16b}, [x3], x1
-  DIFF_LUMA_EQ4_MASK	v23, v3, v18, v17
-  st1	{v17.16b}, [x3], x1
+  and   v21.16b, v20.16b, v21.16b
+  DIFF_LUMA_EQ4_MASK    v19, v1, v21, v17
+  st1   {v17.16b}, [x3], x1
+  DIFF_LUMA_EQ4_MASK    v0, v2, v21, v17
+  st1   {v17.16b}, [x3], x1
+  DIFF_LUMA_EQ4_MASK    v23, v3, v18, v17
+  st1   {v17.16b}, [x3], x1
 
 
   mov.16b v23, v22
   mov.16b v24, v22
   mov.16b v25, v7
-  DIFF_LUMA_EQ4_P2P1P0_1		v7, v6, v5, v4, v3, v2, v23, v19, v17, v16
-  DIFF_LUMA_EQ4_P2P1P0_2		v25, v6, v5, v4, v3, v2, v24, v19, v17, v16
+  DIFF_LUMA_EQ4_P2P1P0_1        v7, v6, v5, v4, v3, v2, v23, v19, v17, v16
+  DIFF_LUMA_EQ4_P2P1P0_2        v25, v6, v5, v4, v3, v2, v24, v19, v17, v16
   ins v7.d[1], v25.d[1]
   ins v23.d[1], v24.d[1]
-  and	v22.16b, v20.16b, v22.16b
-  DIFF_LUMA_EQ4_MASK	v23, v4, v18, v17
-  st1	{v17.16b}, [x3], x1
-  DIFF_LUMA_EQ4_MASK	v7, v5, v22, v17
-  st1	{v17.16b}, [x3], x1
-  DIFF_LUMA_EQ4_MASK	v19, v6, v22, v17
-  st1	{v17.16b}, [x3], x1
+  and   v22.16b, v20.16b, v22.16b
+  DIFF_LUMA_EQ4_MASK    v23, v4, v18, v17
+  st1   {v17.16b}, [x3], x1
+  DIFF_LUMA_EQ4_MASK    v7, v5, v22, v17
+  st1   {v17.16b}, [x3], x1
+  DIFF_LUMA_EQ4_MASK    v19, v6, v22, v17
+  st1   {v17.16b}, [x3], x1
 DeblockLumaEq4V_AArch64_neon_end:
 WELS_ASM_ARCH64_FUNC_END
 
@@ -697,23 +697,23 @@
   sub x2, x0, #3
   movi v23.16b, #128
 
-  LOAD_LUMA_DATA_3		v0, v1, v2, v3, v4, v5, 0
-  LOAD_LUMA_DATA_3		v0, v1, v2, v3, v4, v5, 1
-  LOAD_LUMA_DATA_3		v0, v1, v2, v3, v4, v5, 2
-  LOAD_LUMA_DATA_3		v0, v1, v2, v3, v4, v5, 3
-  LOAD_LUMA_DATA_3		v0, v1, v2, v3, v4, v5, 4
-  LOAD_LUMA_DATA_3		v0, v1, v2, v3, v4, v5, 5
-  LOAD_LUMA_DATA_3		v0, v1, v2, v3, v4, v5, 6
-  LOAD_LUMA_DATA_3		v0, v1, v2, v3, v4, v5, 7
+  LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 0
+  LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 1
+  LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 2
+  LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 3
+  LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 4
+  LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 5
+  LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 6
+  LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 7
 
-  LOAD_LUMA_DATA_3		v0, v1, v2, v3, v4, v5, 8
-  LOAD_LUMA_DATA_3		v0, v1, v2, v3, v4, v5, 9
-  LOAD_LUMA_DATA_3		v0, v1, v2, v3, v4, v5, 10
-  LOAD_LUMA_DATA_3		v0, v1, v2, v3, v4, v5, 11
-  LOAD_LUMA_DATA_3		v0, v1, v2, v3, v4, v5, 12
-  LOAD_LUMA_DATA_3		v0, v1, v2, v3, v4, v5, 13
-  LOAD_LUMA_DATA_3		v0, v1, v2, v3, v4, v5, 14
-  LOAD_LUMA_DATA_3		v0, v1, v2, v3, v4, v5, 15
+  LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 8
+  LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 9
+  LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 10
+  LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 11
+  LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 12
+  LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 13
+  LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 14
+  LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 15
 
   sub x0, x0, x1, lsl #4
 
@@ -723,55 +723,55 @@
   trn1 v6.2d, v18.2d, v20.2d // iTc0: 0000, 1111, 2222, 3333
   cmge v7.16b, v6.16b, #0 // iTc0 Flag
 
-  MASK_MATRIX	v1, v2, v3, v4, v16, v17, v18
-  and	v7.16b, v7.16b, v18.16b // need filter flag
+  MASK_MATRIX   v1, v2, v3, v4, v16, v17, v18
+  and   v7.16b, v7.16b, v18.16b // need filter flag
 
   ZERO_JUMP_END v7, x3, x4, DeblockLumaLt4H_AArch64_neon_end
 
-  eor	v18.16b, v18.16b, v18.16b
+  eor   v18.16b, v18.16b, v18.16b
   sub v18.16b, v18.16b, v6.16b // -iTc0: 0000, 1111, 2222, 3333
 
-  DIFF_LUMA_LT4_P1_Q1	v0, v1, v2, v3, v17, v18, v6, v7, v19, v20 //Use Tmp v23,v24
+  DIFF_LUMA_LT4_P1_Q1   v0, v1, v2, v3, v17, v18, v6, v7, v19, v20 //Use Tmp v23,v24
   mov.16b v25, v19
 
-  DIFF_LUMA_LT4_P1_Q1	v5, v4, v3, v2, v17, v18, v6, v7, v21, v22 //Use Tmp v23,v24
+  DIFF_LUMA_LT4_P1_Q1   v5, v4, v3, v2, v17, v18, v6, v7, v21, v22 //Use Tmp v23,v24
 
-  abs	v20.16b, v20.16b
-  abs	v22.16b, v22.16b
-  add	v6.16b, v6.16b, v20.16b
-  add	v6.16b, v6.16b, v22.16b
-  eor	v18.16b, v18.16b, v18.16b
-  sub	v18.16b, v18.16b, v6.16b
+  abs   v20.16b, v20.16b
+  abs   v22.16b, v22.16b
+  add   v6.16b, v6.16b, v20.16b
+  add   v6.16b, v6.16b, v22.16b
+  eor   v18.16b, v18.16b, v18.16b
+  sub   v18.16b, v18.16b, v6.16b
 
-  DIFF_LUMA_LT4_P0_Q0_1	v1, v2, v3, v4, v19, v20, v22
-  DIFF_LUMA_LT4_P0_Q0_2	v1, v2, v3, v4, v19, v20, v22
+  DIFF_LUMA_LT4_P0_Q0_1 v1, v2, v3, v4, v19, v20, v22
+  DIFF_LUMA_LT4_P0_Q0_2 v1, v2, v3, v4, v19, v20, v22
 
-  smax	v19.16b, v19.16b, v18.16b
-  smin	v19.16b, v19.16b, v6.16b
+  smax  v19.16b, v19.16b, v18.16b
+  smin  v19.16b, v19.16b, v6.16b
   and     v19.16b, v19.16b, v7.16b
 
-  EXTRACT_DELTA_INTO_TWO_PART	v19, v20
-  uqadd	v2.16b, v2.16b, v20.16b
-  uqsub	v2.16b, v2.16b, v19.16b
+  EXTRACT_DELTA_INTO_TWO_PART   v19, v20
+  uqadd v2.16b, v2.16b, v20.16b
+  uqsub v2.16b, v2.16b, v19.16b
   mov.16b v26, v2
-  uqsub	v3.16b, v3.16b, v20.16b
-  uqadd	v3.16b, v3.16b, v19.16b
+  uqsub v3.16b, v3.16b, v20.16b
+  uqadd v3.16b, v3.16b, v19.16b
   mov.16b v27, v3
   mov.16b v28, v21
 
-  sub	x0, x0, #2
-  add	x2, x0, x1
-  lsl	x1, x1, #1
+  sub   x0, x0, #2
+  add   x2, x0, x1
+  lsl   x1, x1, #1
 
-  STORE_LUMA_DATA_4		v25, v26, v27, v28, 0, 1
-  STORE_LUMA_DATA_4		v25, v26, v27, v28, 2, 3
-  STORE_LUMA_DATA_4		v25, v26, v27, v28, 4, 5
-  STORE_LUMA_DATA_4		v25, v26, v27, v28, 6, 7
+  STORE_LUMA_DATA_4     v25, v26, v27, v28, 0, 1
+  STORE_LUMA_DATA_4     v25, v26, v27, v28, 2, 3
+  STORE_LUMA_DATA_4     v25, v26, v27, v28, 4, 5
+  STORE_LUMA_DATA_4     v25, v26, v27, v28, 6, 7
 
-  STORE_LUMA_DATA_4		v25, v26, v27, v28, 8, 9
-  STORE_LUMA_DATA_4		v25, v26, v27, v28, 10, 11
-  STORE_LUMA_DATA_4		v25, v26, v27, v28, 12, 13
-  STORE_LUMA_DATA_4		v25, v26, v27, v28, 14, 15
+  STORE_LUMA_DATA_4     v25, v26, v27, v28, 8, 9
+  STORE_LUMA_DATA_4     v25, v26, v27, v28, 10, 11
+  STORE_LUMA_DATA_4     v25, v26, v27, v28, 12, 13
+  STORE_LUMA_DATA_4     v25, v26, v27, v28, 14, 15
 DeblockLumaLt4H_AArch64_neon_end:
 WELS_ASM_ARCH64_FUNC_END
 
@@ -781,42 +781,42 @@
   dup     v17.16b, w3 //beta
   sub     x3, x0, #4
 
-  LOAD_LUMA_DATA_4		v0, v1, v2, v3, v4, v5, v6, v7, 0
-  LOAD_LUMA_DATA_4		v0, v1, v2, v3, v4, v5, v6, v7, 1
-  LOAD_LUMA_DATA_4		v0, v1, v2, v3, v4, v5, v6, v7, 2
-  LOAD_LUMA_DATA_4		v0, v1, v2, v3, v4, v5, v6, v7, 3
-  LOAD_LUMA_DATA_4		v0, v1, v2, v3, v4, v5, v6, v7, 4
-  LOAD_LUMA_DATA_4		v0, v1, v2, v3, v4, v5, v6, v7, 5
-  LOAD_LUMA_DATA_4		v0, v1, v2, v3, v4, v5, v6, v7, 6
-  LOAD_LUMA_DATA_4		v0, v1, v2, v3, v4, v5, v6, v7, 7
+  LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 0
+  LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 1
+  LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 2
+  LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 3
+  LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 4
+  LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 5
+  LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 6
+  LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 7
 
-  LOAD_LUMA_DATA_4		v0, v1, v2, v3, v4, v5, v6, v7, 8
-  LOAD_LUMA_DATA_4		v0, v1, v2, v3, v4, v5, v6, v7, 9
-  LOAD_LUMA_DATA_4		v0, v1, v2, v3, v4, v5, v6, v7, 10
-  LOAD_LUMA_DATA_4		v0, v1, v2, v3, v4, v5, v6, v7, 11
-  LOAD_LUMA_DATA_4		v0, v1, v2, v3, v4, v5, v6, v7, 12
-  LOAD_LUMA_DATA_4		v0, v1, v2, v3, v4, v5, v6, v7, 13
-  LOAD_LUMA_DATA_4		v0, v1, v2, v3, v4, v5, v6, v7, 14
-  LOAD_LUMA_DATA_4		v0, v1, v2, v3, v4, v5, v6, v7, 15
+  LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 8
+  LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 9
+  LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 10
+  LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 11
+  LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 12
+  LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 13
+  LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 14
+  LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 15
 
   sub x0, x0, x1, lsl #4
   sub x3, x0, #3
-  MASK_MATRIX	v2, v3, v4, v5, v16, v17, v18
+  MASK_MATRIX   v2, v3, v4, v5, v16, v17, v18
 
   ZERO_JUMP_END v18, x4, x5, DeblockLumaEq4H_AArch64_neon_end
 
-  lsr		w2, w2, #2
-  add		w2, w2, #2
+  lsr       w2, w2, #2
+  add       w2, w2, #2
   dup     v16.16b, w2 //((alpha >> 2) + 2)
-  uabd	v19.16b, v3.16b, v4.16b
-  cmhi	v20.16b, v16.16b, v19.16b //iDetaP0Q0 < ((iAlpha >> 2) + 2)
+  uabd  v19.16b, v3.16b, v4.16b
+  cmhi  v20.16b, v16.16b, v19.16b //iDetaP0Q0 < ((iAlpha >> 2) + 2)
 
-  uabd	v21.16b, v1.16b, v3.16b
-  cmhi	v21.16b, v17.16b, v21.16b //bDetaP2P0
+  uabd  v21.16b, v1.16b, v3.16b
+  cmhi  v21.16b, v17.16b, v21.16b //bDetaP2P0
   and     v21.16b, v21.16b, v20.16b //(iDetaP0Q0 < ((iAlpha >> 2) + 2))&&bDetaP2P0
 
-  uabd	v22.16b, v6.16b, v4.16b
-  cmhi	v22.16b, v17.16b, v22.16b //bDetaQ2Q0
+  uabd  v22.16b, v6.16b, v4.16b
+  cmhi  v22.16b, v17.16b, v22.16b //bDetaQ2Q0
   and     v22.16b, v22.16b, v20.16b //(iDetaP0Q0 < ((iAlpha >> 2) + 2))&&bDetaQ2Q0
   and     v20.16b, v20.16b, v18.16b //(iDetaP0Q0 < iAlpha) && bDetaP1P0 && bDetaQ1Q0&&(iDetaP0Q0 < ((iAlpha >> 2) + 2))
 
@@ -824,16 +824,16 @@
   mov.16b v24, v21
 
   mov.16b v25, v0
-  DIFF_LUMA_EQ4_P2P1P0_1		v0, v1, v2, v3, v4, v5, v23, v19, v17, v16
-  DIFF_LUMA_EQ4_P2P1P0_2		v25, v1, v2, v3, v4, v5, v24, v19, v17, v16
+  DIFF_LUMA_EQ4_P2P1P0_1        v0, v1, v2, v3, v4, v5, v23, v19, v17, v16
+  DIFF_LUMA_EQ4_P2P1P0_2        v25, v1, v2, v3, v4, v5, v24, v19, v17, v16
   ins v0.d[1], v25.d[1]
   ins v23.d[1], v24.d[1]
-  and	v21.16b, v20.16b, v21.16b
-  DIFF_LUMA_EQ4_MASK	v19, v1, v21, v17
+  and   v21.16b, v20.16b, v21.16b
+  DIFF_LUMA_EQ4_MASK    v19, v1, v21, v17
   mov.16b v26, v17
-  DIFF_LUMA_EQ4_MASK	v0, v2, v21, v17
+  DIFF_LUMA_EQ4_MASK    v0, v2, v21, v17
   mov.16b v27, v17
-  DIFF_LUMA_EQ4_MASK	v23, v3, v18, v17
+  DIFF_LUMA_EQ4_MASK    v23, v3, v18, v17
   mov.16b v28, v17
 
 
@@ -840,34 +840,34 @@
   mov.16b v23, v22
   mov.16b v24, v22
   mov.16b v25, v7
-  DIFF_LUMA_EQ4_P2P1P0_1		v7, v6, v5, v4, v3, v2, v23, v19, v17, v16
-  DIFF_LUMA_EQ4_P2P1P0_2		v25, v6, v5, v4, v3, v2, v24, v19, v17, v16
+  DIFF_LUMA_EQ4_P2P1P0_1        v7, v6, v5, v4, v3, v2, v23, v19, v17, v16
+  DIFF_LUMA_EQ4_P2P1P0_2        v25, v6, v5, v4, v3, v2, v24, v19, v17, v16
   ins v7.d[1], v25.d[1]
   ins v23.d[1], v24.d[1]
-  and	v22.16b, v20.16b, v22.16b
-  DIFF_LUMA_EQ4_MASK	v23, v4, v18, v17
+  and   v22.16b, v20.16b, v22.16b
+  DIFF_LUMA_EQ4_MASK    v23, v4, v18, v17
   mov.16b v29, v17
-  DIFF_LUMA_EQ4_MASK	v7, v5, v22, v17
+  DIFF_LUMA_EQ4_MASK    v7, v5, v22, v17
   mov.16b v30, v17
-  DIFF_LUMA_EQ4_MASK	v19, v6, v22, v17
+  DIFF_LUMA_EQ4_MASK    v19, v6, v22, v17
   mov.16b v31, v17
 
-  STORE_LUMA_DATA_3		v26, v27, v28, v29, v30, v31, 0
-  STORE_LUMA_DATA_3		v26, v27, v28, v29, v30, v31, 1
-  STORE_LUMA_DATA_3		v26, v27, v28, v29, v30, v31, 2
-  STORE_LUMA_DATA_3		v26, v27, v28, v29, v30, v31, 3
-  STORE_LUMA_DATA_3		v26, v27, v28, v29, v30, v31, 4
-  STORE_LUMA_DATA_3		v26, v27, v28, v29, v30, v31, 5
-  STORE_LUMA_DATA_3		v26, v27, v28, v29, v30, v31, 6
-  STORE_LUMA_DATA_3		v26, v27, v28, v29, v30, v31, 7
-  STORE_LUMA_DATA_3		v26, v27, v28, v29, v30, v31, 8
-  STORE_LUMA_DATA_3		v26, v27, v28, v29, v30, v31, 9
-  STORE_LUMA_DATA_3		v26, v27, v28, v29, v30, v31, 10
-  STORE_LUMA_DATA_3		v26, v27, v28, v29, v30, v31, 11
-  STORE_LUMA_DATA_3		v26, v27, v28, v29, v30, v31, 12
-  STORE_LUMA_DATA_3		v26, v27, v28, v29, v30, v31, 13
-  STORE_LUMA_DATA_3		v26, v27, v28, v29, v30, v31, 14
-  STORE_LUMA_DATA_3		v26, v27, v28, v29, v30, v31, 15
+  STORE_LUMA_DATA_3     v26, v27, v28, v29, v30, v31, 0
+  STORE_LUMA_DATA_3     v26, v27, v28, v29, v30, v31, 1
+  STORE_LUMA_DATA_3     v26, v27, v28, v29, v30, v31, 2
+  STORE_LUMA_DATA_3     v26, v27, v28, v29, v30, v31, 3
+  STORE_LUMA_DATA_3     v26, v27, v28, v29, v30, v31, 4
+  STORE_LUMA_DATA_3     v26, v27, v28, v29, v30, v31, 5
+  STORE_LUMA_DATA_3     v26, v27, v28, v29, v30, v31, 6
+  STORE_LUMA_DATA_3     v26, v27, v28, v29, v30, v31, 7
+  STORE_LUMA_DATA_3     v26, v27, v28, v29, v30, v31, 8
+  STORE_LUMA_DATA_3     v26, v27, v28, v29, v30, v31, 9
+  STORE_LUMA_DATA_3     v26, v27, v28, v29, v30, v31, 10
+  STORE_LUMA_DATA_3     v26, v27, v28, v29, v30, v31, 11
+  STORE_LUMA_DATA_3     v26, v27, v28, v29, v30, v31, 12
+  STORE_LUMA_DATA_3     v26, v27, v28, v29, v30, v31, 13
+  STORE_LUMA_DATA_3     v26, v27, v28, v29, v30, v31, 14
+  STORE_LUMA_DATA_3     v26, v27, v28, v29, v30, v31, 15
 DeblockLumaEq4H_AArch64_neon_end:
 WELS_ASM_ARCH64_FUNC_END
 
@@ -894,28 +894,28 @@
   zip1 v6.4s, v18.4s, v20.4s //iTc0: 0011,2233,0011,2233
   cmgt v7.16b, v6.16b, #0 // iTc0 Flag
 
-  MASK_MATRIX	v0, v1, v2, v3, v16, v17, v18
-  and	v7.16b, v7.16b, v18.16b // need filter flag
+  MASK_MATRIX   v0, v1, v2, v3, v16, v17, v18
+  and   v7.16b, v7.16b, v18.16b // need filter flag
 
   ZERO_JUMP_END v7, x4, x5, DeblockChromaLt4V_AArch64_neon_end
 
-  eor	v18.16b, v18.16b, v18.16b
+  eor   v18.16b, v18.16b, v18.16b
   sub v18.16b, v18.16b, v6.16b //-iTc0: 0011,2233,0011,2233
 
-  DIFF_LUMA_LT4_P0_Q0_1	v0, v1, v2, v3, v19, v20, v22
-  DIFF_LUMA_LT4_P0_Q0_2	v0, v1, v2, v3, v19, v20, v22
+  DIFF_LUMA_LT4_P0_Q0_1 v0, v1, v2, v3, v19, v20, v22
+  DIFF_LUMA_LT4_P0_Q0_2 v0, v1, v2, v3, v19, v20, v22
 
-  smax	v19.16b, v19.16b, v18.16b
-  smin	v19.16b, v19.16b, v6.16b
+  smax  v19.16b, v19.16b, v18.16b
+  smin  v19.16b, v19.16b, v6.16b
   and     v19.16b, v19.16b, v7.16b
 
-  EXTRACT_DELTA_INTO_TWO_PART	v19, v20
-  uqadd	v1.16b, v1.16b, v20.16b
-  uqsub	v1.16b, v1.16b, v19.16b
+  EXTRACT_DELTA_INTO_TWO_PART   v19, v20
+  uqadd v1.16b, v1.16b, v20.16b
+  uqsub v1.16b, v1.16b, v19.16b
   st1     {v1.d} [0], [x6], x2
   st1     {v1.d} [1], [x7], x2
-  uqsub	v2.16b, v2.16b, v20.16b
-  uqadd	v2.16b, v2.16b, v19.16b
+  uqsub v2.16b, v2.16b, v20.16b
+  uqadd v2.16b, v2.16b, v19.16b
   st1     {v2.d} [0], [x6]
   st1     {v2.d} [1], [x7]
 DeblockChromaLt4V_AArch64_neon_end:
@@ -927,23 +927,23 @@
   sub x6, x0, #2 //pPixCb-2
   sub x7, x1, #2 //pPixCr-2
 
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x6, 0
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x6, 1
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x6, 2
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x6, 3
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x6, 4
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x6, 5
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x6, 6
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x6, 7
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x6, 0
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x6, 1
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x6, 2
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x6, 3
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x6, 4
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x6, 5
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x6, 6
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x6, 7
 
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x7, 8
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x7, 9
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x7, 10
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x7, 11
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x7, 12
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x7, 13
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x7, 14
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x7, 15
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x7, 8
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x7, 9
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x7, 10
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x7, 11
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x7, 12
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x7, 13
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x7, 14
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x7, 15
 
   sub x0, x0, #1
   sub x1, x1, #1
@@ -954,25 +954,25 @@
   zip1 v6.4s, v18.4s, v20.4s //iTc0: 0011,2233,0011,2233
   cmgt v7.16b, v6.16b, #0 // iTc0 Flag
 
-  MASK_MATRIX	v0, v1, v2, v3, v16, v17, v18
-  and	v7.16b, v7.16b, v18.16b // need filter flag
+  MASK_MATRIX   v0, v1, v2, v3, v16, v17, v18
+  and   v7.16b, v7.16b, v18.16b // need filter flag
 
   ZERO_JUMP_END v7, x4, x5, DeblockChromaLt4H_AArch64_neon_end
-  eor	v18.16b, v18.16b, v18.16b
+  eor   v18.16b, v18.16b, v18.16b
   sub v18.16b, v18.16b, v6.16b //-iTc0: 0011,2233,0011,2233
 
-  DIFF_LUMA_LT4_P0_Q0_1	v0, v1, v2, v3, v19, v20, v22
-  DIFF_LUMA_LT4_P0_Q0_2	v0, v1, v2, v3, v19, v20, v22
+  DIFF_LUMA_LT4_P0_Q0_1 v0, v1, v2, v3, v19, v20, v22
+  DIFF_LUMA_LT4_P0_Q0_2 v0, v1, v2, v3, v19, v20, v22
 
-  smax	v19.16b, v19.16b, v18.16b
-  smin	v19.16b, v19.16b, v6.16b
+  smax  v19.16b, v19.16b, v18.16b
+  smin  v19.16b, v19.16b, v6.16b
   and     v19.16b, v19.16b, v7.16b
 
-  EXTRACT_DELTA_INTO_TWO_PART	v19, v20
-  uqadd	v1.16b, v1.16b, v20.16b
-  uqsub	v1.16b, v1.16b, v19.16b
-  uqsub	v2.16b, v2.16b, v20.16b
-  uqadd	v2.16b, v2.16b, v19.16b
+  EXTRACT_DELTA_INTO_TWO_PART   v19, v20
+  uqadd v1.16b, v1.16b, v20.16b
+  uqsub v1.16b, v1.16b, v19.16b
+  uqsub v2.16b, v2.16b, v20.16b
+  uqadd v2.16b, v2.16b, v19.16b
 
   STORE_CHROMA_DATA_2 v1, v2, x0, 0
   STORE_CHROMA_DATA_2 v1, v2, x0, 1
@@ -1010,7 +1010,7 @@
   ld1 {v2.d} [1], [x1], x2
   ld1 {v3.d} [1], [x1]
 
-  MASK_MATRIX	v0, v1, v2, v3, v16, v17, v7
+  MASK_MATRIX   v0, v1, v2, v3, v16, v17, v7
 
   ZERO_JUMP_END v7, x3, x4, DeblockChromaEq4V_AArch64_neon_end
 
@@ -1036,27 +1036,27 @@
   sub x6, x0, #2 //pPixCb-2
   sub x7, x1, #2 //pPixCr-2
 
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x6, 0
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x6, 1
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x6, 2
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x6, 3
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x6, 4
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x6, 5
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x6, 6
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x6, 7
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x6, 0
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x6, 1
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x6, 2
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x6, 3
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x6, 4
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x6, 5
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x6, 6
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x6, 7
 
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x7, 8
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x7, 9
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x7, 10
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x7, 11
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x7, 12
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x7, 13
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x7, 14
-  LOAD_CHROMA_DATA_4		v0, v1, v2, v3, x7, 15
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x7, 8
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x7, 9
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x7, 10
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x7, 11
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x7, 12
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x7, 13
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x7, 14
+  LOAD_CHROMA_DATA_4        v0, v1, v2, v3, x7, 15
   sub x0, x0, #1
   sub x1, x1, #1
 
-  MASK_MATRIX	v0, v1, v2, v3, v16, v17, v7
+  MASK_MATRIX   v0, v1, v2, v3, v16, v17, v7
 
   ZERO_JUMP_END v7, x3, x4, DeblockChromaEq4H_AArch64_neon_end