shithub: openh264

Download patch

ref: 039a54780478b233626007993b49d43b51709ff1
parent: 427da1c990923fa68632f4a7b99294da78cbfeb2
author: Licai Guo <guolicai@gmail.com>
date: Fri Apr 18 20:33:23 EDT 2014

give accurate align information for mc copy functions
this can improve the performance for target like javascript

--- a/codec/common/inc/ls_defines.h
+++ b/codec/common/inc/ls_defines.h
@@ -51,11 +51,36 @@
 #define LD16(a) (((struct tagUnaligned_16 *) (a))->l)
 #define LD32(a) (((struct tagUnaligned_32 *) (a))->l)
 #define LD64(a) (((struct tagUnaligned_64 *) (a))->l)
+
+#define STRUCTA(size, align) struct tagUnaligned_##size##_##align {\
+    uint##size##_t l; \
+} __attribute__ ((aligned(align)))
+STRUCTA(16,2);
+STRUCTA(32,2);
+STRUCTA(32,4);
+STRUCTA(64,2);
+STRUCTA(64,4);
+STRUCTA(64,8);
 //#define _USE_STRUCT_INT_CVT
 //	#ifdef _USE_STRUCT_INT_CVT
 #define ST16(a, b) (((struct tagUnaligned_16 *) (a))->l) = (b)
 #define ST32(a, b) (((struct tagUnaligned_32 *) (a))->l) = (b)
 #define ST64(a, b) (((struct tagUnaligned_64 *) (a))->l) = (b)
+
+#define LDA(a, size, align) (((struct tagUnaligned_##size##_##align *) (a))->l)
+#define STA(a, b, size, align) (((struct tagUnaligned_##size##_##align *) (a))->l) = (b)
+#define LD16A2(a) LDA(a, 16, 2)
+#define LD32A2(a) LDA(a, 32, 2)
+#define LD32A4(a) LDA(a, 32, 4)
+#define LD64A2(a) LDA(a, 64, 2)
+#define LD64A4(a) LDA(a, 64, 4)
+#define LD64A8(a) LDA(a, 64, 8)
+#define ST16A2(a, b) STA(a, b, 16, 2)
+#define ST32A2(a, b) STA(a, b, 32, 2)
+#define ST32A4(a, b) STA(a, b, 32, 4)
+#define ST64A2(a, b) STA(a, b, 64, 2)
+#define ST64A4(a, b) STA(a, b, 64, 4)
+#define ST64A8(a, b) STA(a, b, 64, 8)
 //	#else
 //		inline void __ST16(void *dst, uint16_t v) { memcpy(dst, &v, 2); }
 //		inline void __ST32(void *dst, uint32_t v) { memcpy(dst, &v, 4); }
@@ -75,6 +100,18 @@
 #define ST16(a, b) *((uint16_t*)(a)) = (b)
 #define ST32(a, b) *((uint32_t*)(a)) = (b)
 #define ST64(a, b) *((uint64_t*)(a)) = (b)
+#define LD16A2 LD16
+#define LD32A2 LD32
+#define LD32A4 LD32
+#define LD64A2 LD64
+#define LD64A4 LD64
+#define LD64A8 LD64
+#define ST16A2 ST16
+#define ST32A2 ST32
+#define ST32A4 ST32
+#define ST64A2 ST64
+#define ST64A4 ST64
+#define ST64A8 ST64
 
 #endif /* !__GNUC__ */
 
--- a/codec/decoder/core/src/mc.cpp
+++ b/codec/decoder/core/src/mc.cpp
@@ -94,7 +94,7 @@
                                        int32_t iHeight) {
   int32_t i;
   for (i = 0; i < iHeight; i++) { // iWidth == 2 only for chroma
-    ST16 (pDst, LD16 (pSrc));
+    ST16A2 (pDst, LD16 (pSrc));
     pDst += iDstStride;
     pSrc += iSrcStride;
   }
@@ -104,7 +104,7 @@
                                        int32_t iHeight) {
   int32_t i;
   for (i = 0; i < iHeight; i++) {
-    ST32 (pDst, LD32 (pSrc));
+    ST32A4 (pDst, LD32 (pSrc));
     pDst += iDstStride;
     pSrc += iSrcStride;
   }
@@ -114,7 +114,7 @@
                                        int32_t iHeight) {
   int32_t i;
   for (i = 0; i < iHeight; i++) {
-    ST64 (pDst, LD64 (pSrc));
+    ST64A8 (pDst, LD64 (pSrc));
     pDst += iDstStride;
     pSrc += iSrcStride;
   }
@@ -124,8 +124,8 @@
                                         int32_t iHeight) {
   int32_t i;
   for (i = 0; i < iHeight; i++) {
-    ST64 (pDst  , LD64 (pSrc));
-    ST64 (pDst + 8, LD64 (pSrc + 8));
+    ST64A8 (pDst  , LD64 (pSrc));
+    ST64A8 (pDst + 8, LD64 (pSrc + 8));
     pDst += iDstStride;
     pSrc += iSrcStride;
   }
@@ -202,7 +202,7 @@
 
 static inline void McHorVer22_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
                                    int32_t iHeight) {
-  int16_t iTmp[16 + 5] = {0}; //16
+  int16_t iTmp[16 + 5]; //16
   int32_t i, j, k;
 
   for (i = 0; i < iHeight; i++) {