ref: cadbec75d8d8421704f4d3991d7646b612fd9281
parent: 47ec78843a97673fe821922bd2e5dc10e2f1de2b
author: Martin Storsjö <martin@martin.st>
date: Wed Jan 29 07:49:38 EST 2014
Fix shifting tricks in deblocking for big endian The code interprets an array of 4 uint8_t values as one uint32_t and does shifts on the value. The same optimization can be kept in big endian as well, but the shift has to be done in the other direction. This code could be made truly independent of endianness, but that could cause some minimal performance degradaion, at least in theory. This makes "make test" pass on big endian, assuming that WORDS_BIGENDIAN is defined while building.
--- a/codec/common/deblocking_common.h
+++ b/codec/common/deblocking_common.h
@@ -15,6 +15,12 @@
int8_t* pTc);
void_t DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
+#ifdef WORDS_BIGENDIAN
+#define DEBLOCK_BS_SHIFTED(x) ((x) | ((x) << 8))
+#else
+#define DEBLOCK_BS_SHIFTED(x) ((x) | ((x) >> 8))
+#endif
+
#if defined(__cplusplus)
extern "C" {
#endif//__cplusplus
--- a/codec/decoder/core/src/deblocking.cpp
+++ b/codec/decoder/core/src/deblocking.cpp
@@ -152,24 +152,24 @@
uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
- * (uint32_t*)uiBsx3 = (uiNnz32b0 | (uiNnz32b0 >> 8)) << iLShiftFactor;
+ * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b0) << iLShiftFactor;
nBS[0][1][0] = uiBsx3[0];
nBS[0][2][0] = uiBsx3[1];
nBS[0][3][0] = uiBsx3[2];
- * (uint32_t*)uiBsx3 = (uiNnz32b1 | (uiNnz32b1 >> 8)) << iLShiftFactor;
+ * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b1) << iLShiftFactor;
nBS[0][1][1] = uiBsx3[0];
nBS[0][2][1] = uiBsx3[1];
nBS[0][3][1] = uiBsx3[2];
* (uint32_t*)nBS[1][1] = (uiNnz32b0 | uiNnz32b1) << iLShiftFactor;
- * (uint32_t*)uiBsx3 = (uiNnz32b2 | (uiNnz32b2 >> 8)) << iLShiftFactor;
+ * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b2) << iLShiftFactor;
nBS[0][1][2] = uiBsx3[0];
nBS[0][2][2] = uiBsx3[1];
nBS[0][3][2] = uiBsx3[2];
* (uint32_t*)nBS[1][2] = (uiNnz32b1 | uiNnz32b2) << iLShiftFactor;
- * (uint32_t*)uiBsx3 = (uiNnz32b3 | (uiNnz32b3 >> 8)) << iLShiftFactor;
+ * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b3) << iLShiftFactor;
nBS[0][1][3] = uiBsx3[0];
nBS[0][2][3] = uiBsx3[1];
nBS[0][3][3] = uiBsx3[2];
@@ -188,22 +188,22 @@
uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
- * (uint32_t*)uiBsx4 = (uiNnz32b0 | (uiNnz32b0 >> 8));
+ * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b0);
nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0);
nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1);
nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2);
- * (uint32_t*)uiBsx4 = (uiNnz32b1 | (uiNnz32b1 >> 8));
+ * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b1);
nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4);
nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5);
nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6);
- * (uint32_t*)uiBsx4 = (uiNnz32b2 | (uiNnz32b2 >> 8));
+ * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b2);
nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8);
nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9);
nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10);
- * (uint32_t*)uiBsx4 = (uiNnz32b3 | (uiNnz32b3 >> 8));
+ * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b3);
nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12);
nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13);
nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14);
--- a/codec/encoder/core/src/deblocking.cpp
+++ b/codec/encoder/core/src/deblocking.cpp
@@ -164,24 +164,24 @@
uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
- * (uint32_t*)uiBsx3 = (uiNnz32b0 | (uiNnz32b0 >> 8)) << iLShiftFactor;
+ * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b0) << iLShiftFactor;
uiBS[0][1][0] = uiBsx3[0];
uiBS[0][2][0] = uiBsx3[1];
uiBS[0][3][0] = uiBsx3[2];
- * (uint32_t*)uiBsx3 = (uiNnz32b1 | (uiNnz32b1 >> 8)) << iLShiftFactor;
+ * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b1) << iLShiftFactor;
uiBS[0][1][1] = uiBsx3[0];
uiBS[0][2][1] = uiBsx3[1];
uiBS[0][3][1] = uiBsx3[2];
* (uint32_t*)uiBS[1][1] = (uiNnz32b0 | uiNnz32b1) << iLShiftFactor;
- * (uint32_t*)uiBsx3 = (uiNnz32b2 | (uiNnz32b2 >> 8)) << iLShiftFactor;
+ * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b2) << iLShiftFactor;
uiBS[0][1][2] = uiBsx3[0];
uiBS[0][2][2] = uiBsx3[1];
uiBS[0][3][2] = uiBsx3[2];
* (uint32_t*)uiBS[1][2] = (uiNnz32b1 | uiNnz32b2) << iLShiftFactor;
- * (uint32_t*)uiBsx3 = (uiNnz32b3 | (uiNnz32b3 >> 8)) << iLShiftFactor;
+ * (uint32_t*)uiBsx3 = DEBLOCK_BS_SHIFTED (uiNnz32b3) << iLShiftFactor;
uiBS[0][1][3] = uiBsx3[0];
uiBS[0][2][3] = uiBsx3[1];
uiBS[0][3][3] = uiBsx3[2];
@@ -198,22 +198,22 @@
uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
- * (uint32_t*)uiBsx4 = (uiNnz32b0 | (uiNnz32b0 >> 8));
+ * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b0);
uiBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 1, 0);
uiBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 2, 1);
uiBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 3, 2);
- * (uint32_t*)uiBsx4 = (uiNnz32b1 | (uiNnz32b1 >> 8));
+ * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b1);
uiBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 5, 4);
uiBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 6, 5);
uiBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 7, 6);
- * (uint32_t*)uiBsx4 = (uiNnz32b2 | (uiNnz32b2 >> 8));
+ * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b2);
uiBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 9, 8);
uiBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 10, 9);
uiBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 11, 10);
- * (uint32_t*)uiBsx4 = (uiNnz32b3 | (uiNnz32b3 >> 8));
+ * (uint32_t*)uiBsx4 = DEBLOCK_BS_SHIFTED (uiNnz32b3);
uiBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 13, 12);
uiBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 14, 13);
uiBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 15, 14);