shithub: openh264

Download patch

ref: c8e1a41c298f89d054757bcfad09195eac8b2a6b
parent: 6cb48fc54709b7e9a72b3218a17958c3787c10bf
author: Licai Guo <guolicai@gmail.com>
date: Thu Apr 17 06:06:48 EDT 2014

Move copy_mb neon code to common folder

--- a/codec/build/iOS/common/common.xcodeproj/project.pbxproj
+++ b/codec/build/iOS/common/common.xcodeproj/project.pbxproj
@@ -16,6 +16,7 @@
 		4C3406CF18D96EA600DFA14A /* deblocking_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4C3406C618D96EA600DFA14A /* deblocking_common.cpp */; };
 		4C3406D018D96EA600DFA14A /* logging.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4C3406C718D96EA600DFA14A /* logging.cpp */; };
 		4C3406D118D96EA600DFA14A /* WelsThreadLib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4C3406C818D96EA600DFA14A /* WelsThreadLib.cpp */; };
+		4CC61F0918FF6B4B00E56EAB /* copy_mb_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4CC61F0818FF6B4B00E56EAB /* copy_mb_neon.S */; };
 		4CE443D918B722CD0017DF25 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE443D818B722CD0017DF25 /* Foundation.framework */; };
 		4CE443E718B722CD0017DF25 /* XCTest.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE443E618B722CD0017DF25 /* XCTest.framework */; };
 		4CE443E818B722CD0017DF25 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE443D818B722CD0017DF25 /* Foundation.framework */; };
@@ -71,6 +72,7 @@
 		4C3406C618D96EA600DFA14A /* deblocking_common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deblocking_common.cpp; sourceTree = "<group>"; };
 		4C3406C718D96EA600DFA14A /* logging.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = logging.cpp; sourceTree = "<group>"; };
 		4C3406C818D96EA600DFA14A /* WelsThreadLib.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = WelsThreadLib.cpp; sourceTree = "<group>"; };
+		4CC61F0818FF6B4B00E56EAB /* copy_mb_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = copy_mb_neon.S; sourceTree = "<group>"; };
 		4CE443D518B722CD0017DF25 /* libcommon.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libcommon.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		4CE443D818B722CD0017DF25 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; };
 		4CE443E518B722CD0017DF25 /* commonTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = commonTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
@@ -111,6 +113,7 @@
 		4C3406B118D96EA600DFA14A /* arm */ = {
 			isa = PBXGroup;
 			children = (
+				4CC61F0818FF6B4B00E56EAB /* copy_mb_neon.S */,
 				4C3406B218D96EA600DFA14A /* arm_arch_common_macro.S */,
 				4C3406B318D96EA600DFA14A /* deblocking_neon.S */,
 				4C3406B418D96EA600DFA14A /* expand_picture_neon.S */,
@@ -300,6 +303,7 @@
 				4C3406D118D96EA600DFA14A /* WelsThreadLib.cpp in Sources */,
 				4C3406CC18D96EA600DFA14A /* mc_neon.S in Sources */,
 				4C3406CB18D96EA600DFA14A /* expand_picture_neon.S in Sources */,
+				4CC61F0918FF6B4B00E56EAB /* copy_mb_neon.S in Sources */,
 				4C3406CD18D96EA600DFA14A /* cpu.cpp in Sources */,
 				4C3406CA18D96EA600DFA14A /* deblocking_neon.S in Sources */,
 				F0B204F918FD23BF005DA23F /* copy_mb.cpp in Sources */,
--- /dev/null
+++ b/codec/common/arm/copy_mb_neon.S
@@ -1,0 +1,201 @@
+/*!
+ * \copy
+ *     Copyright (c)  2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifdef  HAVE_NEON
+.text
+#include "arm_arch_common_macro.S"
+
+#ifdef __APPLE__
+.macro	LOAD_ALIGNED_DATA_WITH_STRIDE
+//	{	//	input: $0~$3, src*, src_stride
+    vld1.64	{$0}, [$4,:128], $5
+    vld1.64	{$1}, [$4,:128], $5
+    vld1.64	{$2}, [$4,:128], $5
+    vld1.64	{$3}, [$4,:128], $5
+//	}
+.endm
+
+.macro	STORE_ALIGNED_DATA_WITH_STRIDE
+//	{	//	input: $0~$3, dst*, dst_stride
+    vst1.64	{$0}, [$4,:128], $5
+    vst1.64	{$1}, [$4,:128], $5
+    vst1.64	{$2}, [$4,:128], $5
+    vst1.64	{$3}, [$4,:128], $5
+//	}
+.endm
+
+.macro	LOAD_UNALIGNED_DATA_WITH_STRIDE
+//	{	//	input: $0~$3, src*, src_stride
+    vld1.64	{$0}, [$4], $5
+    vld1.64	{$1}, [$4], $5
+    vld1.64	{$2}, [$4], $5
+    vld1.64	{$3}, [$4], $5
+//	}
+.endm
+
+.macro	STORE_UNALIGNED_DATA_WITH_STRIDE
+//	{	//	input: $0~$3, dst*, dst_stride
+    vst1.64	{$0}, [$4], $5
+    vst1.64	{$1}, [$4], $5
+    vst1.64	{$2}, [$4], $5
+    vst1.64	{$3}, [$4], $5
+//	}
+.endm
+#else
+.macro	LOAD_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+//	{	//	input: \arg0~\arg3, src*, src_stride
+    vld1.64	{\arg0}, [\arg4,:128], \arg5
+    vld1.64	{\arg1}, [\arg4,:128], \arg5
+    vld1.64	{\arg2}, [\arg4,:128], \arg5
+    vld1.64	{\arg3}, [\arg4,:128], \arg5
+//	}
+.endm
+
+.macro	STORE_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+//	{	//	input: \arg0~\arg3, dst*, dst_stride
+    vst1.64	{\arg0}, [\arg4,:128], \arg5
+    vst1.64	{\arg1}, [\arg4,:128], \arg5
+    vst1.64	{\arg2}, [\arg4,:128], \arg5
+    vst1.64	{\arg3}, [\arg4,:128], \arg5
+//	}
+.endm
+
+.macro	LOAD_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+//	{	//	input: \arg0~\arg3, src*, src_stride
+    vld1.64	{\arg0}, [\arg4], \arg5
+    vld1.64	{\arg1}, [\arg4], \arg5
+    vld1.64	{\arg2}, [\arg4], \arg5
+    vld1.64	{\arg3}, [\arg4], \arg5
+//	}
+.endm
+
+.macro	STORE_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+//	{	//	input: \arg0~\arg3, dst*, dst_stride
+    vst1.64	{\arg0}, [\arg4], \arg5
+    vst1.64	{\arg1}, [\arg4], \arg5
+    vst1.64	{\arg2}, [\arg4], \arg5
+    vst1.64	{\arg3}, [\arg4], \arg5
+//	}
+.endm
+
+#endif
+
+
+WELS_ASM_FUNC_BEGIN WelsCopy8x8_neon
+
+	LOAD_UNALIGNED_DATA_WITH_STRIDE	d0, d1, d2, d3, r2, r3
+
+	STORE_UNALIGNED_DATA_WITH_STRIDE	d0, d1, d2, d3, r0, r1
+
+	LOAD_UNALIGNED_DATA_WITH_STRIDE	d4, d5, d6, d7, r2, r3
+
+	STORE_UNALIGNED_DATA_WITH_STRIDE	d4, d5, d6, d7, r0, r1
+
+WELS_ASM_FUNC_END
+
+
+WELS_ASM_FUNC_BEGIN WelsCopy16x16_neon
+
+	LOAD_ALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r2, r3
+
+	STORE_ALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r0, r1
+
+	LOAD_ALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r2, r3
+
+	STORE_ALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r0, r1
+
+	LOAD_ALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r2, r3
+
+	STORE_ALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r0, r1
+
+	LOAD_ALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r2, r3
+
+	STORE_ALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r0, r1
+
+WELS_ASM_FUNC_END
+
+
+WELS_ASM_FUNC_BEGIN WelsCopy16x16NotAligned_neon
+
+	LOAD_UNALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r2, r3
+
+	STORE_UNALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r0, r1
+
+	LOAD_UNALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r2, r3
+
+	STORE_UNALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r0, r1
+
+	LOAD_UNALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r2, r3
+
+	STORE_UNALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r0, r1
+
+	LOAD_UNALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r2, r3
+
+	STORE_UNALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r0, r1
+
+WELS_ASM_FUNC_END
+
+
+WELS_ASM_FUNC_BEGIN WelsCopy16x8NotAligned_neon
+
+	LOAD_UNALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r2, r3
+
+	STORE_UNALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r0, r1
+
+	LOAD_UNALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r2, r3
+
+	STORE_UNALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r0, r1
+
+WELS_ASM_FUNC_END
+
+
+WELS_ASM_FUNC_BEGIN WelsCopy8x16_neon
+
+	LOAD_UNALIGNED_DATA_WITH_STRIDE	d0, d1, d2, d3, r2, r3
+
+	STORE_UNALIGNED_DATA_WITH_STRIDE	d0, d1, d2, d3, r0, r1
+
+	LOAD_UNALIGNED_DATA_WITH_STRIDE	d4, d5, d6, d7, r2, r3
+
+	STORE_UNALIGNED_DATA_WITH_STRIDE	d4, d5, d6, d7, r0, r1
+
+	LOAD_UNALIGNED_DATA_WITH_STRIDE	d0, d1, d2, d3, r2, r3
+
+	STORE_UNALIGNED_DATA_WITH_STRIDE	d0, d1, d2, d3, r0, r1
+
+	LOAD_UNALIGNED_DATA_WITH_STRIDE	d4, d5, d6, d7, r2, r3
+
+	STORE_UNALIGNED_DATA_WITH_STRIDE	d4, d5, d6, d7, r0, r1
+
+WELS_ASM_FUNC_END
+
+#endif
--- a/codec/encoder/core/arm/reconstruct_neon.S
+++ b/codec/encoder/core/arm/reconstruct_neon.S
@@ -35,42 +35,6 @@
 #include "arm_arch_common_macro.S"
 
 #ifdef __APPLE__
-.macro	LOAD_ALIGNED_DATA_WITH_STRIDE
-//	{	//	input: $0~$3, src*, src_stride
-    vld1.64	{$0}, [$4,:128], $5
-    vld1.64	{$1}, [$4,:128], $5
-    vld1.64	{$2}, [$4,:128], $5
-    vld1.64	{$3}, [$4,:128], $5
-//	}
-.endm
-
-.macro	STORE_ALIGNED_DATA_WITH_STRIDE
-//	{	//	input: $0~$3, dst*, dst_stride
-    vst1.64	{$0}, [$4,:128], $5
-    vst1.64	{$1}, [$4,:128], $5
-    vst1.64	{$2}, [$4,:128], $5
-    vst1.64	{$3}, [$4,:128], $5
-//	}
-.endm
-
-.macro	LOAD_UNALIGNED_DATA_WITH_STRIDE
-//	{	//	input: $0~$3, src*, src_stride
-    vld1.64	{$0}, [$4], $5
-    vld1.64	{$1}, [$4], $5
-    vld1.64	{$2}, [$4], $5
-    vld1.64	{$3}, [$4], $5
-//	}
-.endm
-
-.macro	STORE_UNALIGNED_DATA_WITH_STRIDE
-//	{	//	input: $0~$3, dst*, dst_stride
-    vst1.64	{$0}, [$4], $5
-    vst1.64	{$1}, [$4], $5
-    vst1.64	{$2}, [$4], $5
-    vst1.64	{$3}, [$4], $5
-//	}
-.endm
-
 .macro	LOAD_4x4_DATA_FOR_DCT
 //	{	//	input: $0~$3, src1*, src1_stride, src2*, src2_stride
     vld2.16	{$0[0],$1[0]}, [$4], $5
@@ -315,42 +279,6 @@
 //	}
 .endm
 #else
-.macro	LOAD_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
-//	{	//	input: \arg0~\arg3, src*, src_stride
-    vld1.64	{\arg0}, [\arg4,:128], \arg5
-    vld1.64	{\arg1}, [\arg4,:128], \arg5
-    vld1.64	{\arg2}, [\arg4,:128], \arg5
-    vld1.64	{\arg3}, [\arg4,:128], \arg5
-//	}
-.endm
-
-.macro	STORE_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
-//	{	//	input: \arg0~\arg3, dst*, dst_stride
-    vst1.64	{\arg0}, [\arg4,:128], \arg5
-    vst1.64	{\arg1}, [\arg4,:128], \arg5
-    vst1.64	{\arg2}, [\arg4,:128], \arg5
-    vst1.64	{\arg3}, [\arg4,:128], \arg5
-//	}
-.endm
-
-.macro	LOAD_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
-//	{	//	input: \arg0~\arg3, src*, src_stride
-    vld1.64	{\arg0}, [\arg4], \arg5
-    vld1.64	{\arg1}, [\arg4], \arg5
-    vld1.64	{\arg2}, [\arg4], \arg5
-    vld1.64	{\arg3}, [\arg4], \arg5
-//	}
-.endm
-
-.macro	STORE_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
-//	{	//	input: \arg0~\arg3, dst*, dst_stride
-    vst1.64	{\arg0}, [\arg4], \arg5
-    vst1.64	{\arg1}, [\arg4], \arg5
-    vst1.64	{\arg2}, [\arg4], \arg5
-    vst1.64	{\arg3}, [\arg4], \arg5
-//	}
-.endm
-
 .macro	LOAD_4x4_DATA_FOR_DCT arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7
 //	{	//	input: \arg0~\arg3, src1*, src1_stride, src2*, src2_stride
     vld2.16	{\arg0[0],\arg1[0]}, [\arg4], \arg5
@@ -595,96 +523,6 @@
 //	}
 .endm
 #endif
-
-
-WELS_ASM_FUNC_BEGIN WelsCopy8x8_neon
-
-	LOAD_UNALIGNED_DATA_WITH_STRIDE	d0, d1, d2, d3, r2, r3
-
-	STORE_UNALIGNED_DATA_WITH_STRIDE	d0, d1, d2, d3, r0, r1
-
-	LOAD_UNALIGNED_DATA_WITH_STRIDE	d4, d5, d6, d7, r2, r3
-
-	STORE_UNALIGNED_DATA_WITH_STRIDE	d4, d5, d6, d7, r0, r1
-
-WELS_ASM_FUNC_END
-
-
-WELS_ASM_FUNC_BEGIN WelsCopy16x16_neon
-
-	LOAD_ALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r2, r3
-
-	STORE_ALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r0, r1
-
-	LOAD_ALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r2, r3
-
-	STORE_ALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r0, r1
-
-	LOAD_ALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r2, r3
-
-	STORE_ALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r0, r1
-
-	LOAD_ALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r2, r3
-
-	STORE_ALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r0, r1
-
-WELS_ASM_FUNC_END
-
-
-WELS_ASM_FUNC_BEGIN WelsCopy16x16NotAligned_neon
-
-	LOAD_UNALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r2, r3
-
-	STORE_UNALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r0, r1
-
-	LOAD_UNALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r2, r3
-
-	STORE_UNALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r0, r1
-
-	LOAD_UNALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r2, r3
-
-	STORE_UNALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r0, r1
-
-	LOAD_UNALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r2, r3
-
-	STORE_UNALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r0, r1
-
-WELS_ASM_FUNC_END
-
-
-WELS_ASM_FUNC_BEGIN WelsCopy16x8NotAligned_neon
-
-	LOAD_UNALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r2, r3
-
-	STORE_UNALIGNED_DATA_WITH_STRIDE	q0, q1, q2, q3, r0, r1
-
-	LOAD_UNALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r2, r3
-
-	STORE_UNALIGNED_DATA_WITH_STRIDE	q8, q9, q10, q11, r0, r1
-
-WELS_ASM_FUNC_END
-
-
-WELS_ASM_FUNC_BEGIN WelsCopy8x16_neon
-
-	LOAD_UNALIGNED_DATA_WITH_STRIDE	d0, d1, d2, d3, r2, r3
-
-	STORE_UNALIGNED_DATA_WITH_STRIDE	d0, d1, d2, d3, r0, r1
-
-	LOAD_UNALIGNED_DATA_WITH_STRIDE	d4, d5, d6, d7, r2, r3
-
-	STORE_UNALIGNED_DATA_WITH_STRIDE	d4, d5, d6, d7, r0, r1
-
-	LOAD_UNALIGNED_DATA_WITH_STRIDE	d0, d1, d2, d3, r2, r3
-
-	STORE_UNALIGNED_DATA_WITH_STRIDE	d0, d1, d2, d3, r0, r1
-
-	LOAD_UNALIGNED_DATA_WITH_STRIDE	d4, d5, d6, d7, r2, r3
-
-	STORE_UNALIGNED_DATA_WITH_STRIDE	d4, d5, d6, d7, r0, r1
-
-WELS_ASM_FUNC_END
-
 
 
 WELS_ASM_FUNC_BEGIN WelsDctT4_neon