shithub: openh264

Download patch

ref: eace9b7b00bf5f87a0ace0e55ca5b13eb8c9407c
parent: 95ac333f3bf2a915ef2736888e695f1b284b3660
author: dongzhang <dongzha@cisco.com>
date: Tue Jul 8 07:18:45 EDT 2014

add MemoryZero Arm64 code and UT

--- a/codec/build/iOS/enc/welsenc/welsenc.xcodeproj/project.pbxproj
+++ b/codec/build/iOS/enc/welsenc/welsenc.xcodeproj/project.pbxproj
@@ -48,6 +48,7 @@
 		9AED665019469FC1009A3567 /* welsCodecTrace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9AED664C19469FC1009A3567 /* welsCodecTrace.cpp */; };
 		9AED66661946A2B3009A3567 /* utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9AED66651946A2B3009A3567 /* utils.cpp */; };
 		F5617A50196A833A006E2B20 /* reconstruct_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5617A4F196A833A006E2B20 /* reconstruct_aarch64_neon.S */; };
+		F5BE8005196B913200ED02ED /* memory_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5BE8004196B913200ED02ED /* memory_aarch64_neon.S */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXCopyFilesBuildPhase section */
@@ -157,6 +158,7 @@
 		9AED66651946A2B3009A3567 /* utils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = utils.cpp; path = ../../../common/src/utils.cpp; sourceTree = "<group>"; };
 		9AED66671946A2C4009A3567 /* utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = utils.h; path = ../../../common/inc/utils.h; sourceTree = "<group>"; };
 		F5617A4F196A833A006E2B20 /* reconstruct_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = reconstruct_aarch64_neon.S; path = arm64/reconstruct_aarch64_neon.S; sourceTree = "<group>"; };
+		F5BE8004196B913200ED02ED /* memory_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = memory_aarch64_neon.S; path = arm64/memory_aarch64_neon.S; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -186,6 +188,7 @@
 		4CB8F2B219235FAC005D6386 /* arm64 */ = {
 			isa = PBXGroup;
 			children = (
+				F5BE8004196B913200ED02ED /* memory_aarch64_neon.S */,
 				F5617A4F196A833A006E2B20 /* reconstruct_aarch64_neon.S */,
 				4C23BC5F195A77E0003B81FC /* intra_pred_sad_3_opt_aarch64_neon.S */,
 				4CBC1B82194ACBB400214D9E /* intra_pred_aarch64_neon.S */,
@@ -431,6 +434,7 @@
 				4C23BC60195A77E0003B81FC /* intra_pred_sad_3_opt_aarch64_neon.S in Sources */,
 				4CE4472B18BC605C0017DF25 /* wels_preprocess.cpp in Sources */,
 				4CE4470E18BC605C0017DF25 /* au_set.cpp in Sources */,
+				F5BE8005196B913200ED02ED /* memory_aarch64_neon.S in Sources */,
 				4CBC1B83194ACBB400214D9E /* intra_pred_aarch64_neon.S in Sources */,
 				4CE4471718BC605C0017DF25 /* mc.cpp in Sources */,
 				F5617A50196A833A006E2B20 /* reconstruct_aarch64_neon.S in Sources */,
--- /dev/null
+++ b/codec/encoder/core/arm64/memory_aarch64_neon.S
@@ -1,0 +1,63 @@
+/*!
+ * \copy
+ *     Copyright (c)  2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifdef  HAVE_NEON_AARCH64
+.text
+#include "arm_arch64_common_macro.S"
+
+
+WELS_ASM_AARCH64_FUNC_BEGIN WelsSetMemZero_AArch64_neon
+  eor v0.16b, v0.16b, v0.16b
+  cmp x1, #32
+  b.eq mem_zero_32_neon_start
+  b.lt mem_zero_24_neon_start
+mem_zero_loop:
+  subs x1, x1, #64
+  st1 {v0.16b}, [x0], #16
+  st1 {v0.16b}, [x0], #16
+  st1 {v0.16b}, [x0], #16
+  st1 {v0.16b}, [x0], #16
+  b.ne mem_zero_loop
+  b mem_zero_end
+
+mem_zero_32_neon_start:
+  st1 {v0.16b}, [x0], #16
+  st1 {v0.16b}, [x0], #16
+  b mem_zero_end
+mem_zero_24_neon_start:
+  st1 {v0.16b}, [x0], #16
+  st1 {v0.8b}, [x0], #8
+mem_zero_end:
+
+WELS_ASM_AARCH64_FUNC_END
+
+#endif
--- a/codec/encoder/core/inc/encoder.h
+++ b/codec/encoder/core/inc/encoder.h
@@ -129,6 +129,8 @@
 void WelsPrefetchZero_mmx (int8_t const* kpDst);
 #elif defined(HAVE_NEON)
 void WelsSetMemZero_neon (void* pDst, int32_t iSize);
+#elif defined(HAVE_NEON_AARCH64)
+void WelsSetMemZero_AArch64_neon (void* pDst, int32_t iSize);
 #endif
 
 #if defined(__cplusplus)
--- a/codec/encoder/core/src/encoder.cpp
+++ b/codec/encoder/core/src/encoder.cpp
@@ -179,6 +179,14 @@
   }
 #endif
 
+#if defined(HAVE_NEON_AARCH64)
+  if (uiCpuFlag & WELS_CPU_NEON) {
+    pFuncList->pfSetMemZeroSize8	= WelsSetMemZero_AArch64_neon;
+    pFuncList->pfSetMemZeroSize64Aligned16	= WelsSetMemZero_AArch64_neon;
+    pFuncList->pfSetMemZeroSize64	= WelsSetMemZero_AArch64_neon;
+  }
+#endif
+
   InitExpandPictureFunc (& (pFuncList->sExpandPicFunc), uiCpuFlag);
 
   /* Intra_Prediction_fn*/
--- a/codec/encoder/targets.mk
+++ b/codec/encoder/targets.mk
@@ -61,6 +61,7 @@
 ENCODER_ASM_ARM64_SRCS=\
 	$(ENCODER_SRCDIR)/core/arm64/intra_pred_aarch64_neon.S\
 	$(ENCODER_SRCDIR)/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S\
+	$(ENCODER_SRCDIR)/core/arm64/memory_aarch64_neon.S\
 	$(ENCODER_SRCDIR)/core/arm64/pixel_aarch64_neon.S\
         $(ENCODER_SRCDIR)/core/arm64/reconstruct_aarch64_neon.S\
 
--- /dev/null
+++ b/test/encoder/EncUT_MemoryZero.cpp
@@ -1,0 +1,94 @@
+#include<gtest/gtest.h>
+#include<math.h>
+#include<stdlib.h>
+#include<time.h>
+
+#include "cpu_core.h"
+#include "cpu.h"
+#include "macros.h"
+#include "wels_func_ptr_def.h"
+#include "../../codec/encoder/core/src/encoder.cpp"
+
+using namespace WelsSVCEnc;
+#define MEMORYZEROTEST_NUM 1000
+
+TEST (SetMemZeroFunTest, WelsSetMemZero) {
+  int32_t iLen =64;
+  int32_t iCpuCores = 0;
+  SWelsFuncPtrList sFuncPtrList;
+  uint32_t uiCpuFlag = WelsCPUFeatureDetect (&iCpuCores);
+  /* Functionality utilization of CPU instructions dependency */
+  sFuncPtrList.pfSetMemZeroSize8	= WelsSetMemZero_c;		// confirmed_safe_unsafe_usage
+  sFuncPtrList.pfSetMemZeroSize64Aligned16	= WelsSetMemZero_c;	// confirmed_safe_unsafe_usage
+  sFuncPtrList.pfSetMemZeroSize64	= WelsSetMemZero_c;	// confirmed_safe_unsafe_usage
+#if defined(X86_ASM)
+  if (uiCpuFlag & WELS_CPU_MMXEXT) {
+    sFuncPtrList.pfSetMemZeroSize8	= WelsSetMemZeroSize8_mmx;		// confirmed_safe_unsafe_usage
+    sFuncPtrList.pfSetMemZeroSize64Aligned16	= WelsSetMemZeroSize64_mmx;	// confirmed_safe_unsafe_usage
+    sFuncPtrList.pfSetMemZeroSize64	= WelsSetMemZeroSize64_mmx;	// confirmed_safe_unsafe_usage
+  }
+  if (uiCpuFlag & WELS_CPU_SSE2) {
+    sFuncPtrList.pfSetMemZeroSize64Aligned16	= WelsSetMemZeroAligned64_sse2;	// confirmed_safe_unsafe_usage
+  }
+#endif//X86_ASM
+  
+#if defined(HAVE_NEON)
+  if (uiCpuFlag & WELS_CPU_NEON) {
+    sFuncPtrList.pfSetMemZeroSize8	= WelsSetMemZero_neon;
+    sFuncPtrList.pfSetMemZeroSize64Aligned16	= WelsSetMemZero_neon;
+    sFuncPtrList.pfSetMemZeroSize64	= WelsSetMemZero_neon;
+  }
+#endif
+  
+#if defined(HAVE_NEON_AARCH64)
+  if (uiCpuFlag & WELS_CPU_NEON) {
+    sFuncPtrList.pfSetMemZeroSize8	= WelsSetMemZero_AArch64_neon;
+    sFuncPtrList.pfSetMemZeroSize64Aligned16	= WelsSetMemZero_AArch64_neon;
+    sFuncPtrList.pfSetMemZeroSize64	= WelsSetMemZero_AArch64_neon;
+  }
+#endif
+  
+  ENFORCE_STACK_ALIGN_2D (uint8_t, pInputAlign, 2, 64*101, 16)
+  
+  for (int32_t k = 0; k < MEMORYZEROTEST_NUM; k++) {
+    memset(pInputAlign[0], 255, 64*101);
+    memset(pInputAlign[1], 255, 64*101);
+    iLen = 64*(1+(rand()%100));
+    WelsSetMemZero_c(pInputAlign[0],iLen);
+    sFuncPtrList.pfSetMemZeroSize64Aligned16(pInputAlign[1],iLen);
+    for (int32_t i = 0 ; i < 64*101; i++) {
+        ASSERT_EQ (pInputAlign[0][i], pInputAlign[1][i]);
+    }
+  }
+
+  for (int32_t k = 0; k < MEMORYZEROTEST_NUM; k++) {
+    memset(pInputAlign[0], 255, 64*101);
+    memset(pInputAlign[1], 255, 64*101);
+    iLen = 64*(1+(rand()%100));
+    WelsSetMemZero_c(pInputAlign[0]+1,iLen);
+    sFuncPtrList.pfSetMemZeroSize64(pInputAlign[1]+1,iLen);
+    for (int32_t i = 0 ; i < 64*101; i++) {
+      ASSERT_EQ (pInputAlign[0][i], pInputAlign[1][i]);
+    }
+  }
+
+  memset(pInputAlign[0], 255, 64*101);
+  memset(pInputAlign[1], 255, 64*101);
+  iLen = 32;
+  WelsSetMemZero_c(pInputAlign[0]+1,iLen);
+  sFuncPtrList.pfSetMemZeroSize8(pInputAlign[1]+1,iLen);
+  for (int32_t i = 0 ; i < 64*101; i++) {
+    ASSERT_EQ (pInputAlign[0][i], pInputAlign[1][i]);
+  }
+
+  memset(pInputAlign[0], 255, 64*101);
+  memset(pInputAlign[1], 255, 64*101);
+  iLen = 24;
+  WelsSetMemZero_c(pInputAlign[0]+1,iLen);
+  sFuncPtrList.pfSetMemZeroSize8(pInputAlign[1]+1,iLen);
+  for (int32_t i = 0 ; i < 64*101; i++) {
+    ASSERT_EQ (pInputAlign[0][i], pInputAlign[1][i]);
+  }
+}
+
+
--- a/test/encoder/targets.mk
+++ b/test/encoder/targets.mk
@@ -7,6 +7,7 @@
 	$(ENCODER_UNITTEST_SRCDIR)/EncUT_ExpGolomb.cpp\
 	$(ENCODER_UNITTEST_SRCDIR)/EncUT_GetIntraPredictor.cpp\
 	$(ENCODER_UNITTEST_SRCDIR)/EncUT_MemoryAlloc.cpp\
+	$(ENCODER_UNITTEST_SRCDIR)/EncUT_MemoryZero.cpp\
 	$(ENCODER_UNITTEST_SRCDIR)/EncUT_MotionEstimate.cpp\
         $(ENCODER_UNITTEST_SRCDIR)/EncUT_Reconstruct.cpp\
 	$(ENCODER_UNITTEST_SRCDIR)/EncUT_Sample.cpp\