ref: c8e1a41c298f89d054757bcfad09195eac8b2a6b
parent: 6cb48fc54709b7e9a72b3218a17958c3787c10bf
author: Licai Guo <guolicai@gmail.com>
date: Thu Apr 17 06:06:48 EDT 2014
Move copy_mb neon code to common folder
--- a/codec/build/iOS/common/common.xcodeproj/project.pbxproj
+++ b/codec/build/iOS/common/common.xcodeproj/project.pbxproj
@@ -16,6 +16,7 @@
4C3406CF18D96EA600DFA14A /* deblocking_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4C3406C618D96EA600DFA14A /* deblocking_common.cpp */; };
4C3406D018D96EA600DFA14A /* logging.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4C3406C718D96EA600DFA14A /* logging.cpp */; };
4C3406D118D96EA600DFA14A /* WelsThreadLib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4C3406C818D96EA600DFA14A /* WelsThreadLib.cpp */; };
+ 4CC61F0918FF6B4B00E56EAB /* copy_mb_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4CC61F0818FF6B4B00E56EAB /* copy_mb_neon.S */; };
4CE443D918B722CD0017DF25 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE443D818B722CD0017DF25 /* Foundation.framework */; };
4CE443E718B722CD0017DF25 /* XCTest.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE443E618B722CD0017DF25 /* XCTest.framework */; };
4CE443E818B722CD0017DF25 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE443D818B722CD0017DF25 /* Foundation.framework */; };
@@ -71,6 +72,7 @@
4C3406C618D96EA600DFA14A /* deblocking_common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deblocking_common.cpp; sourceTree = "<group>"; };
4C3406C718D96EA600DFA14A /* logging.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = logging.cpp; sourceTree = "<group>"; };
4C3406C818D96EA600DFA14A /* WelsThreadLib.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = WelsThreadLib.cpp; sourceTree = "<group>"; };
+ 4CC61F0818FF6B4B00E56EAB /* copy_mb_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = copy_mb_neon.S; sourceTree = "<group>"; };
4CE443D518B722CD0017DF25 /* libcommon.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libcommon.a; sourceTree = BUILT_PRODUCTS_DIR; };
4CE443D818B722CD0017DF25 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; };
4CE443E518B722CD0017DF25 /* commonTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = commonTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
@@ -111,6 +113,7 @@
4C3406B118D96EA600DFA14A /* arm */ = {
isa = PBXGroup;
children = (
+ 4CC61F0818FF6B4B00E56EAB /* copy_mb_neon.S */,
4C3406B218D96EA600DFA14A /* arm_arch_common_macro.S */,
4C3406B318D96EA600DFA14A /* deblocking_neon.S */,
4C3406B418D96EA600DFA14A /* expand_picture_neon.S */,
@@ -300,6 +303,7 @@
4C3406D118D96EA600DFA14A /* WelsThreadLib.cpp in Sources */,
4C3406CC18D96EA600DFA14A /* mc_neon.S in Sources */,
4C3406CB18D96EA600DFA14A /* expand_picture_neon.S in Sources */,
+ 4CC61F0918FF6B4B00E56EAB /* copy_mb_neon.S in Sources */,
4C3406CD18D96EA600DFA14A /* cpu.cpp in Sources */,
4C3406CA18D96EA600DFA14A /* deblocking_neon.S in Sources */,
F0B204F918FD23BF005DA23F /* copy_mb.cpp in Sources */,
--- /dev/null
+++ b/codec/common/arm/copy_mb_neon.S
@@ -1,0 +1,201 @@
+/*!
+ * \copy
+ * Copyright (c) 2013, Cisco Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifdef HAVE_NEON
+.text
+#include "arm_arch_common_macro.S"
+
+#ifdef __APPLE__
+.macro LOAD_ALIGNED_DATA_WITH_STRIDE
+// { // input: $0~$3, src*, src_stride
+ vld1.64 {$0}, [$4,:128], $5
+ vld1.64 {$1}, [$4,:128], $5
+ vld1.64 {$2}, [$4,:128], $5
+ vld1.64 {$3}, [$4,:128], $5
+// }
+.endm
+
+.macro STORE_ALIGNED_DATA_WITH_STRIDE
+// { // input: $0~$3, dst*, dst_stride
+ vst1.64 {$0}, [$4,:128], $5
+ vst1.64 {$1}, [$4,:128], $5
+ vst1.64 {$2}, [$4,:128], $5
+ vst1.64 {$3}, [$4,:128], $5
+// }
+.endm
+
+.macro LOAD_UNALIGNED_DATA_WITH_STRIDE
+// { // input: $0~$3, src*, src_stride
+ vld1.64 {$0}, [$4], $5
+ vld1.64 {$1}, [$4], $5
+ vld1.64 {$2}, [$4], $5
+ vld1.64 {$3}, [$4], $5
+// }
+.endm
+
+.macro STORE_UNALIGNED_DATA_WITH_STRIDE
+// { // input: $0~$3, dst*, dst_stride
+ vst1.64 {$0}, [$4], $5
+ vst1.64 {$1}, [$4], $5
+ vst1.64 {$2}, [$4], $5
+ vst1.64 {$3}, [$4], $5
+// }
+.endm
+#else
+.macro LOAD_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+// { // input: \arg0~\arg3, src*, src_stride
+ vld1.64 {\arg0}, [\arg4,:128], \arg5
+ vld1.64 {\arg1}, [\arg4,:128], \arg5
+ vld1.64 {\arg2}, [\arg4,:128], \arg5
+ vld1.64 {\arg3}, [\arg4,:128], \arg5
+// }
+.endm
+
+.macro STORE_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+// { // input: \arg0~\arg3, dst*, dst_stride
+ vst1.64 {\arg0}, [\arg4,:128], \arg5
+ vst1.64 {\arg1}, [\arg4,:128], \arg5
+ vst1.64 {\arg2}, [\arg4,:128], \arg5
+ vst1.64 {\arg3}, [\arg4,:128], \arg5
+// }
+.endm
+
+.macro LOAD_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+// { // input: \arg0~\arg3, src*, src_stride
+ vld1.64 {\arg0}, [\arg4], \arg5
+ vld1.64 {\arg1}, [\arg4], \arg5
+ vld1.64 {\arg2}, [\arg4], \arg5
+ vld1.64 {\arg3}, [\arg4], \arg5
+// }
+.endm
+
+.macro STORE_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+// { // input: \arg0~\arg3, dst*, dst_stride
+ vst1.64 {\arg0}, [\arg4], \arg5
+ vst1.64 {\arg1}, [\arg4], \arg5
+ vst1.64 {\arg2}, [\arg4], \arg5
+ vst1.64 {\arg3}, [\arg4], \arg5
+// }
+.endm
+
+#endif
+
+
+WELS_ASM_FUNC_BEGIN WelsCopy8x8_neon
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1
+
+WELS_ASM_FUNC_END
+
+
+WELS_ASM_FUNC_BEGIN WelsCopy16x16_neon
+
+ LOAD_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
+
+ STORE_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
+
+ LOAD_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
+
+ STORE_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
+
+ LOAD_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
+
+ STORE_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
+
+ LOAD_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
+
+ STORE_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
+
+WELS_ASM_FUNC_END
+
+
+WELS_ASM_FUNC_BEGIN WelsCopy16x16NotAligned_neon
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
+
+WELS_ASM_FUNC_END
+
+
+WELS_ASM_FUNC_BEGIN WelsCopy16x8NotAligned_neon
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
+
+WELS_ASM_FUNC_END
+
+
+WELS_ASM_FUNC_BEGIN WelsCopy8x16_neon
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1
+
+WELS_ASM_FUNC_END
+
+#endif
--- a/codec/encoder/core/arm/reconstruct_neon.S
+++ b/codec/encoder/core/arm/reconstruct_neon.S
@@ -35,42 +35,6 @@
#include "arm_arch_common_macro.S"
#ifdef __APPLE__
-.macro LOAD_ALIGNED_DATA_WITH_STRIDE
-// { // input: $0~$3, src*, src_stride
- vld1.64 {$0}, [$4,:128], $5
- vld1.64 {$1}, [$4,:128], $5
- vld1.64 {$2}, [$4,:128], $5
- vld1.64 {$3}, [$4,:128], $5
-// }
-.endm
-
-.macro STORE_ALIGNED_DATA_WITH_STRIDE
-// { // input: $0~$3, dst*, dst_stride
- vst1.64 {$0}, [$4,:128], $5
- vst1.64 {$1}, [$4,:128], $5
- vst1.64 {$2}, [$4,:128], $5
- vst1.64 {$3}, [$4,:128], $5
-// }
-.endm
-
-.macro LOAD_UNALIGNED_DATA_WITH_STRIDE
-// { // input: $0~$3, src*, src_stride
- vld1.64 {$0}, [$4], $5
- vld1.64 {$1}, [$4], $5
- vld1.64 {$2}, [$4], $5
- vld1.64 {$3}, [$4], $5
-// }
-.endm
-
-.macro STORE_UNALIGNED_DATA_WITH_STRIDE
-// { // input: $0~$3, dst*, dst_stride
- vst1.64 {$0}, [$4], $5
- vst1.64 {$1}, [$4], $5
- vst1.64 {$2}, [$4], $5
- vst1.64 {$3}, [$4], $5
-// }
-.endm
-
.macro LOAD_4x4_DATA_FOR_DCT
// { // input: $0~$3, src1*, src1_stride, src2*, src2_stride
vld2.16 {$0[0],$1[0]}, [$4], $5
@@ -315,42 +279,6 @@
// }
.endm
#else
-.macro LOAD_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
-// { // input: \arg0~\arg3, src*, src_stride
- vld1.64 {\arg0}, [\arg4,:128], \arg5
- vld1.64 {\arg1}, [\arg4,:128], \arg5
- vld1.64 {\arg2}, [\arg4,:128], \arg5
- vld1.64 {\arg3}, [\arg4,:128], \arg5
-// }
-.endm
-
-.macro STORE_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
-// { // input: \arg0~\arg3, dst*, dst_stride
- vst1.64 {\arg0}, [\arg4,:128], \arg5
- vst1.64 {\arg1}, [\arg4,:128], \arg5
- vst1.64 {\arg2}, [\arg4,:128], \arg5
- vst1.64 {\arg3}, [\arg4,:128], \arg5
-// }
-.endm
-
-.macro LOAD_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
-// { // input: \arg0~\arg3, src*, src_stride
- vld1.64 {\arg0}, [\arg4], \arg5
- vld1.64 {\arg1}, [\arg4], \arg5
- vld1.64 {\arg2}, [\arg4], \arg5
- vld1.64 {\arg3}, [\arg4], \arg5
-// }
-.endm
-
-.macro STORE_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
-// { // input: \arg0~\arg3, dst*, dst_stride
- vst1.64 {\arg0}, [\arg4], \arg5
- vst1.64 {\arg1}, [\arg4], \arg5
- vst1.64 {\arg2}, [\arg4], \arg5
- vst1.64 {\arg3}, [\arg4], \arg5
-// }
-.endm
-
.macro LOAD_4x4_DATA_FOR_DCT arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7
// { // input: \arg0~\arg3, src1*, src1_stride, src2*, src2_stride
vld2.16 {\arg0[0],\arg1[0]}, [\arg4], \arg5
@@ -595,96 +523,6 @@
// }
.endm
#endif
-
-
-WELS_ASM_FUNC_BEGIN WelsCopy8x8_neon
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1
-
-WELS_ASM_FUNC_END
-
-
-WELS_ASM_FUNC_BEGIN WelsCopy16x16_neon
-
- LOAD_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
-
- STORE_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
-
- LOAD_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
-
- STORE_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
-
- LOAD_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
-
- STORE_ALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
-
- LOAD_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
-
- STORE_ALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
-
-WELS_ASM_FUNC_END
-
-
-WELS_ASM_FUNC_BEGIN WelsCopy16x16NotAligned_neon
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
-
-WELS_ASM_FUNC_END
-
-
-WELS_ASM_FUNC_BEGIN WelsCopy16x8NotAligned_neon
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE q0, q1, q2, q3, r0, r1
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE q8, q9, q10, q11, r0, r1
-
-WELS_ASM_FUNC_END
-
-
-WELS_ASM_FUNC_BEGIN WelsCopy8x16_neon
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE d0, d1, d2, d3, r0, r1
-
- LOAD_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r2, r3
-
- STORE_UNALIGNED_DATA_WITH_STRIDE d4, d5, d6, d7, r0, r1
-
-WELS_ASM_FUNC_END
-
WELS_ASM_FUNC_BEGIN WelsDctT4_neon