shithub: openh264

Download patch

ref: 0a14d4c79b88098eebfaa44dbbd78ae98accf08a
parent: 21687726d05641bd8d627128e6ce564df3a56aa2
author: Martin Storsjö <martin@martin.st>
date: Wed Jan 29 09:45:47 EST 2014

Make the deblocking handling endian independent

This avoids having to set a define indicating the endianness.

This doesn't incur any significant measurable slowdown.

--- a/codec/common/deblocking_common.h
+++ b/codec/common/deblocking_common.h
@@ -15,12 +15,6 @@
                             int8_t* pTc);
 void_t DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
 
-#ifdef WORDS_BIGENDIAN
-#define DEBLOCK_BS_SHIFTED(x) ((x) | ((x) << 8))
-#else
-#define DEBLOCK_BS_SHIFTED(x) ((x) | ((x) >> 8))
-#endif
-
 #if defined(__cplusplus)
 extern "C" {
 #endif//__cplusplus
--- a/codec/decoder/core/src/deblocking.cpp
+++ b/codec/decoder/core/src/deblocking.cpp
@@ -145,7 +145,6 @@
 
 void_t inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) {
   uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
-  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx3, 4, 4);
 
   uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
   uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
@@ -152,27 +151,23 @@
   uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
   uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
 
-  * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b0) << iLShiftFactor;
-  nBS[0][1][0] = uiBsx3[0];
-  nBS[0][2][0] = uiBsx3[1];
-  nBS[0][3][0] = uiBsx3[2];
+  nBS[0][1][0] = (pNnzTab[0] | pNnzTab[1]) << iLShiftFactor;
+  nBS[0][2][0] = (pNnzTab[1] | pNnzTab[2]) << iLShiftFactor;
+  nBS[0][3][0] = (pNnzTab[2] | pNnzTab[3]) << iLShiftFactor;
 
-  * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b1) << iLShiftFactor;
-  nBS[0][1][1] = uiBsx3[0];
-  nBS[0][2][1] = uiBsx3[1];
-  nBS[0][3][1] = uiBsx3[2];
+  nBS[0][1][1] = (pNnzTab[4] | pNnzTab[5]) << iLShiftFactor;
+  nBS[0][2][1] = (pNnzTab[5] | pNnzTab[6]) << iLShiftFactor;
+  nBS[0][3][1] = (pNnzTab[6] | pNnzTab[7]) << iLShiftFactor;
   * (uint32_t*)nBS[1][1] = (uiNnz32b0 | uiNnz32b1) << iLShiftFactor;
 
-  * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b2) << iLShiftFactor;
-  nBS[0][1][2] = uiBsx3[0];
-  nBS[0][2][2] = uiBsx3[1];
-  nBS[0][3][2] = uiBsx3[2];
+  nBS[0][1][2] = (pNnzTab[8]  | pNnzTab[9])  << iLShiftFactor;
+  nBS[0][2][2] = (pNnzTab[9]  | pNnzTab[10]) << iLShiftFactor;
+  nBS[0][3][2] = (pNnzTab[10] | pNnzTab[11]) << iLShiftFactor;
   * (uint32_t*)nBS[1][2] = (uiNnz32b1 | uiNnz32b2) << iLShiftFactor;
 
-  * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b3) << iLShiftFactor;
-  nBS[0][1][3] = uiBsx3[0];
-  nBS[0][2][3] = uiBsx3[1];
-  nBS[0][3][3] = uiBsx3[2];
+  nBS[0][1][3] = (pNnzTab[12] | pNnzTab[13]) << iLShiftFactor;
+  nBS[0][2][3] = (pNnzTab[13] | pNnzTab[14]) << iLShiftFactor;
+  nBS[0][3][3] = (pNnzTab[14] | pNnzTab[15]) << iLShiftFactor;
   * (uint32_t*)nBS[1][3] = (uiNnz32b2 | uiNnz32b3) << iLShiftFactor;
 
 }
@@ -188,22 +183,26 @@
   uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
   uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
 
-  * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b0);
+  for (int i = 0; i < 3; i++)
+      uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
   nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0);
   nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1);
   nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2);
 
-  * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b1);
+  for (int i = 0; i < 3; i++)
+      uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
   nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4);
   nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5);
   nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6);
 
-  * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b2);
+  for (int i = 0; i < 3; i++)
+      uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
   nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8);
   nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9);
   nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10);
 
-  * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b3);
+  for (int i = 0; i < 3; i++)
+      uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
   nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12);
   nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13);
   nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14);
--- a/codec/encoder/core/src/deblocking.cpp
+++ b/codec/encoder/core/src/deblocking.cpp
@@ -156,7 +156,6 @@
 
 void inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t uiBS[2][4][4], int32_t iLShiftFactor) {
   uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
-  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx3, 4, 4);
 
   uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
   uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
@@ -163,27 +162,23 @@
   uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
   uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
 
-  * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b0) << iLShiftFactor;
-  uiBS[0][1][0] = uiBsx3[0];
-  uiBS[0][2][0] = uiBsx3[1];
-  uiBS[0][3][0] = uiBsx3[2];
+  uiBS[0][1][0] = (pNnzTab[0] | pNnzTab[1]) << iLShiftFactor;
+  uiBS[0][2][0] = (pNnzTab[1] | pNnzTab[2]) << iLShiftFactor;
+  uiBS[0][3][0] = (pNnzTab[2] | pNnzTab[3]) << iLShiftFactor;
 
-  * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b1) << iLShiftFactor;
-  uiBS[0][1][1] = uiBsx3[0];
-  uiBS[0][2][1] = uiBsx3[1];
-  uiBS[0][3][1] = uiBsx3[2];
+  uiBS[0][1][1] = (pNnzTab[4] | pNnzTab[5]) << iLShiftFactor;
+  uiBS[0][2][1] = (pNnzTab[5] | pNnzTab[6]) << iLShiftFactor;
+  uiBS[0][3][1] = (pNnzTab[6] | pNnzTab[7]) << iLShiftFactor;
   * (uint32_t*)uiBS[1][1] = (uiNnz32b0 | uiNnz32b1) << iLShiftFactor;
 
-  * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b2) << iLShiftFactor;
-  uiBS[0][1][2] = uiBsx3[0];
-  uiBS[0][2][2] = uiBsx3[1];
-  uiBS[0][3][2] = uiBsx3[2];
+  uiBS[0][1][2] = (pNnzTab[8]  | pNnzTab[9])  << iLShiftFactor;
+  uiBS[0][2][2] = (pNnzTab[9]  | pNnzTab[10]) << iLShiftFactor;
+  uiBS[0][3][2] = (pNnzTab[10] | pNnzTab[11]) << iLShiftFactor;
   * (uint32_t*)uiBS[1][2] = (uiNnz32b1 | uiNnz32b2) << iLShiftFactor;
 
-  * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b3) << iLShiftFactor;
-  uiBS[0][1][3] = uiBsx3[0];
-  uiBS[0][2][3] = uiBsx3[1];
-  uiBS[0][3][3] = uiBsx3[2];
+  uiBS[0][1][3] = (pNnzTab[12] | pNnzTab[13]) << iLShiftFactor;
+  uiBS[0][2][3] = (pNnzTab[13] | pNnzTab[14]) << iLShiftFactor;
+  uiBS[0][3][3] = (pNnzTab[14] | pNnzTab[15]) << iLShiftFactor;
   * (uint32_t*)uiBS[1][3] = (uiNnz32b2 | uiNnz32b3) << iLShiftFactor;
 
 }
@@ -197,22 +192,26 @@
   uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
   uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
 
-  * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b0);
+  for (int i = 0; i < 3; i++)
+      uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
   uiBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 1, 0);
   uiBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 2, 1);
   uiBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 3, 2);
 
-  * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b1);
+  for (int i = 0; i < 3; i++)
+      uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
   uiBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 5, 4);
   uiBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 6, 5);
   uiBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 7, 6);
 
-  * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b2);
+  for (int i = 0; i < 3; i++)
+      uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
   uiBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 9, 8);
   uiBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 10, 9);
   uiBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 11, 10);
 
-  * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b3);
+  for (int i = 0; i < 3; i++)
+      uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
   uiBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 13, 12);
   uiBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 14, 13);
   uiBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 15, 14);