ref: 3022d4f6559596e0a6a1fae4d638f3c02f7d2a9e
parent: b304687197cf59ed24ee6fc4756f7f91b72f613f
parent: 63926a3d10ca35db6ecaa35a6271abc43acbe06f
author: huili2 <huili2@cisco.com>
date: Thu Mar 5 04:39:31 EST 2015
Merge pull request #1832 from sijchen/imp_mc [Encoder] replace conditional judgements with faster operations
--- a/codec/encoder/core/inc/md.h
+++ b/codec/encoder/core/inc/md.h
@@ -122,6 +122,7 @@
uint8_t* pQuarPixBest;
uint8_t* pQuarPixTmp;
+PCopyFunc pfCopyBlockByMode;
} SMeRefinePointer;
void FillNeighborCacheIntra (SMbCache* pMbCache, SMB* pCurMb, int32_t iMbWidth/*, bool constrained_intra_pred_flag*/);
--- a/codec/encoder/core/inc/svc_enc_frame.h
+++ b/codec/encoder/core/inc/svc_enc_frame.h
@@ -93,6 +93,8 @@
int16_t iMbHeight; // MB height of this picture, equal to pSps.iMbHeight;
bool bBaseLayerAvailableFlag; // whether base layer is available for prediction?
+bool bSatdInMdFlag; // whether SATD is calculated in ME and integer-pel MD
+
uint8_t iLoopFilterDisableIdc; // 0: on, 1: off, 2: on except for slice boundaries
int8_t iLoopFilterAlphaC0Offset;// AlphaOffset: valid range [-6, 6], default 0
int8_t iLoopFilterBetaOffset; // BetaOffset: valid range [-6, 6], default 0
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -3046,6 +3046,10 @@
pCurLayer->pFeatureSearchPreparation->uiFMEGoodFrameCount = FMESWITCH_DEFAULT_GOODFRAME_NUM;
}
}
+
+ // update some layer dependent variable to save judgements in mb-level
+ pCurLayer->bSatdInMdFlag = ((pFuncList->sSampleDealingFuncs.pfMeCost == pFuncList->sSampleDealingFuncs.pfSampleSatd)
+ && (pFuncList->sSampleDealingFuncs.pfMdCost == pFuncList->sSampleDealingFuncs.pfSampleSatd));
}
/*!
--- a/codec/encoder/core/src/md.cpp
+++ b/codec/encoder/core/src/md.cpp
@@ -598,8 +598,7 @@
int32_t iCurCost;
int32_t iBestHalfPix;
- if ((pFunc->sSampleDealingFuncs.pfMeCost == pFunc->sSampleDealingFuncs.pfSampleSatd)
- && (pFunc->sSampleDealingFuncs.pfMdCost == pFunc->sSampleDealingFuncs.pfSampleSatd)) {
+ if (pEncCtx->pCurDqLayer->bSatdInMdFlag) {
iBestCost = pMe->uSadPredISatd.uiSatd + COST_MVD (pMe->pMvdCost, iMvx - pMe->sMvp.iMvX, iMvy - pMe->sMvp.iMvY);
} else {
iBestCost = pFunc->sSampleDealingFuncs.pfMeCost[pMe->uiBlockSize] (pEncData, kiStrideEnc, pRef, kiStrideRef) +
@@ -765,17 +764,8 @@
pBestPredInter = pRef;
iInterBlk4Stride = kiStrideRef;
}
- if (MB_WIDTH_LUMA == iWidth && MB_HEIGHT_LUMA == iHeight) { //P16x16
- pFunc->pfCopy16x16NotAligned (pMemPredInterMb, MB_WIDTH_LUMA, pBestPredInter,
- iInterBlk4Stride); // dst can be align with 16 bytes, but not sure at pSrc, 12/29/2011
- } else if (MB_WIDTH_LUMA == iWidth && MB_HEIGHT_CHROMA == iHeight) { //P16x8
- pFunc->pfCopy16x8NotAligned (pMemPredInterMb, MB_WIDTH_LUMA, pBestPredInter,
- iInterBlk4Stride); // dst can be align with 16 bytes, but not sure at pSrc, 12/29/2011
- } else if (MB_WIDTH_CHROMA == iWidth && MB_HEIGHT_LUMA == iHeight) { //P8x16
- pFunc->pfCopy8x16Aligned (pMemPredInterMb, MB_WIDTH_LUMA, pBestPredInter, iInterBlk4Stride);
- } else { //P8x8
- pFunc->pfCopy8x8Aligned (pMemPredInterMb, MB_WIDTH_LUMA, pBestPredInter, iInterBlk4Stride);
- }
+ pMeRefine->pfCopyBlockByMode (pMemPredInterMb, MB_WIDTH_LUMA, pBestPredInter,
+ iInterBlk4Stride);
}
void InitBlkStrideWithRef (int32_t* pBlkStride, const int32_t kiStrideRef) {
--- a/codec/encoder/core/src/svc_base_layer_md.cpp
+++ b/codec/encoder/core/src/svc_base_layer_md.cpp
@@ -1148,7 +1148,8 @@
}
}
-void WelsMdInterFinePartitionVaa (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, int32_t iBestCost) {
+void WelsMdInterFinePartitionVaa (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb,
+ int32_t iBestCost) {
SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
// SMbCache *pMbCache = &pSlice->sMbCacheInfo;
int32_t iCostP8x16, iCostP16x8, iCostP8x8;
@@ -1428,6 +1429,7 @@
void WelsMdInterMbRefinement (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache) {
SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
+ SWelsFuncPtrList* pFunc = pEncCtx->pFuncList;
uint8_t* pTmpRefCb, *pTmpRefCr, *pTmpDstCb, *pTmpDstCr;
int32_t iMvStride, iRefBlk4Stride, iDstBlk4Stride;
SMVUnitXY* pMv;
@@ -1448,6 +1450,8 @@
case MB_TYPE_16x16:
//luma
InitMeRefinePointer (&sMeRefine, pMbCache, 0);
+ sMeRefine.pfCopyBlockByMode =
+ pFunc->pfCopy16x16NotAligned; // dst can be align with 16 bytes, but not sure at pSrc, 12/29/2011
MeRefineFracPixel (pEncCtx, pDstLuma, &pWelsMd->sMe.sMe16x16, &sMeRefine, 16, 16);
UpdateP16x16MotionInfo (pMbCache, pCurMb, pWelsMd->uiRef, &pWelsMd->sMe.sMe16x16.sMv);
@@ -1474,6 +1478,8 @@
case MB_TYPE_16x8:
iPixStride = 0;
+ sMeRefine.pfCopyBlockByMode =
+ pFunc->pfCopy16x8NotAligned; // dst can be align with 16 bytes, but not sure at pSrc, 12/29/2011
for (i = 0; i < 2; i++) {
//luma
iIdx = i << 3;
@@ -1503,6 +1509,7 @@
case MB_TYPE_8x16:
iPixStride = 0;
+ sMeRefine.pfCopyBlockByMode = pFunc->pfCopy8x16Aligned;
for (i = 0; i < 2; i++) {
//luma
iIdx = i << 2;
@@ -1528,8 +1535,8 @@
pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY, 4, 8); //Cr
}
break;
-
case MB_TYPE_8x8:
+ sMeRefine.pfCopyBlockByMode = pFunc->pfCopy8x8Aligned;
for (i = 0; i < 4; i++) {
int32_t iBlk8Idx = i << 2; //0, 4, 8, 12
int32_t iBlk4X, iBlk4Y;
@@ -1558,8 +1565,10 @@
pTmpDstCb = pDstCb + iDstBlk4Stride;
pTmpRefCr = pRefCr + iRefBlk4Stride;
pTmpDstCr = pDstCr + iDstBlk4Stride;
- pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY, 4, 4); //Cb
- pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY, 4, 4); //Cr
+ pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCb + iMvStride, iLineSizeRefUV, pTmpDstCb, 8, pMv->iMvX, pMv->iMvY,
+ 4, 4); //Cb
+ pEncCtx->pFuncList->sMcFuncs.pMcChromaFunc (pTmpRefCr + iMvStride, iLineSizeRefUV, pTmpDstCr, 8, pMv->iMvX, pMv->iMvY,
+ 4, 4); //Cr
}
break;