ref: a4370b8ca8d66eda8209c4e0f8c4cb2c126c8e2e
parent: 6a10553dcec652f085c72fcceb3bf733af479456
parent: 89f78e92112b3b7cd5a867008f672c01f41bace6
author: dongzha <dongzha@cisco.com>
date: Tue Dec 16 11:31:10 EST 2014
Merge pull request #1637 from dongzha/SpeedUpSkipMode astyle and speed up decoding by removing unnessary memset
--- a/codec/decoder/core/src/decode_slice.cpp
+++ b/codec/decoder/core/src/decode_slice.cpp
@@ -101,7 +101,7 @@
++iCountNumMb;
if (!pCurLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex]) { //already con-ed, overwrite
pCurLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex] = true;
- pCtx->pDec->iMbEcedPropNum += (pCurLayer->pMbRefConcealedFlag[iNextMbXyIndex]? 1 : 0);
+ pCtx->pDec->iMbEcedPropNum += (pCurLayer->pMbRefConcealedFlag[iNextMbXyIndex] ? 1 : 0);
++pCtx->iTotalNumMbRec;
}
@@ -529,7 +529,7 @@
//get uiCbp for I4x4
WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, &sNeighAvail, uiCbp));
pCurLayer->pCbp[iMbXy] = uiCbp;
- pSlice->iLastDeltaQp = uiCbp == 0 ? 0: pSlice->iLastDeltaQp;
+ pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
uiCbpChroma = uiCbp >> 4;
uiCbpLuma = uiCbp & 15;
} else { //I16x16;
@@ -542,7 +542,7 @@
WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBsAux, pCurLayer));
}
- memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
+
ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
@@ -558,6 +558,7 @@
}
if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
int32_t iQpDelta, iId8x8, iId4x4;
WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
if (iQpDelta > 25 || iQpDelta < -26) {//out of iQpDelta range
@@ -722,18 +723,6 @@
}
}
- if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
- WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp));
- pCurLayer->pCbp[iMbXy] = uiCbp;
- pSlice->iLastDeltaQp = uiCbp == 0 ? 0: pSlice->iLastDeltaQp;
- uiCbpChroma = pCurLayer->pCbp[iMbXy] >> 4;
- uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
- }
-
- pCtx->sBlockFunc.pWelsBlockZero16x16Func (pCurLayer->pScaledTCoeff[iMbXy], 16);
- pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256, 8);
- pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256 + 64, 8);
-
ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
@@ -741,7 +730,17 @@
ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+ if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+ WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp));
+ pCurLayer->pCbp[iMbXy] = uiCbp;
+ pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
+ uiCbpChroma = pCurLayer->pCbp[iMbXy] >> 4;
+ uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+ }
+
if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
+
int32_t iQpDelta, iId8x8, iId4x4;
WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
@@ -863,7 +862,7 @@
pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
- pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || !(ppRefPic[0]&&ppRefPic[0]->bIsComplete);
+ pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && ppRefPic[0]->bIsComplete);
//predict mv
PredPSkipMvFromNeighbor (pCurLayer, pMv);
for (i = 0; i < 16; i++) {
@@ -871,9 +870,9 @@
ST32 (pCurLayer->pMvd[0][iMbXy][i], 0);
}
- if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
- memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
- }
+ //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
+ // memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
+ //}
//reset rS
pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb
@@ -1071,7 +1070,7 @@
pCurLayer->pLumaQp[iMbXy] = 0;
pCurLayer->pChromaQp[iMbXy] = 0;
memset (pNzc, 16, sizeof (pCurLayer->pNzc[iMbXy])); //Rec. 9.2.1 for PCM, nzc=16
- WELS_READ_VERIFY(InitReadBits (pBs, 0));
+ WELS_READ_VERIFY (InitReadBits (pBs, 0));
return 0;
} else if (0 == uiMbType) { //reference to JM
ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
@@ -1101,7 +1100,6 @@
WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer));
}
- memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
ST32A4 (&pNzc[0], 0);
ST32A4 (&pNzc[4], 0);
ST32A4 (&pNzc[8], 0);
@@ -1117,6 +1115,7 @@
}
if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
int32_t iQpDelta, iId8x8, iId4x4;
WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
@@ -1551,7 +1550,7 @@
pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
- pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || !(ppRefPic[0]&&ppRefPic[0]->bIsComplete);
+ pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && ppRefPic[0]->bIsComplete);
//predict iMv
PredPSkipMvFromNeighbor (pCurLayer, iMv);
for (i = 0; i < 16; i++) {
@@ -1558,9 +1557,9 @@
ST32A2 (pCurLayer->pMv[0][iMbXy][i], * (uint32_t*)iMv);
}
- if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
- memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
- }
+ //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
+ // memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
+ //}
//reset rS
if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag ||