ref: 6cb48fc54709b7e9a72b3218a17958c3787c10bf
parent: bbc0cb2b2cd7e6ad1421477c374b98a4dda3918b
parent: f41424183406efe176b9fcb52fbc92e3fc7923e8
author: volvet <qizh@cisco.com>
date: Wed Apr 16 13:41:17 EDT 2014
Merge pull request #690 from sijchen/fme_merge65 [Encoder ME] Add calling of FME preprocess calculation Approved by Xiaolin.
--- a/codec/encoder/core/inc/picture.h
+++ b/codec/encoder/core/inc/picture.h
@@ -56,18 +56,6 @@
bool bRefBlockFeatureCalculated; // flag of whether pre-process is done
} SScreenBlockFeatureStorage; //should be stored with RefPic, one for each frame
-typedef struct TagFeatureSearchPreparation{
- SScreenBlockFeatureStorage* pRefBlockFeature;//point the the ref frame storage
-
- uint16_t* pFeatureOfBlock; // Feature of every block (8x8), begin with the point
- uint8_t uiFeatureStrategyIndex;// index of hash strategy
-
- /* for FME frame-level switch */
- bool bFMESwitchFlag;
- uint8_t uiFMEGoodFrameCount;
- int32_t iHighFreMbCount;
-}SFeatureSearchPreparation;//maintain only one
-
/*
* Reconstructed Picture definition
* It is used to express reference picture, also consequent reconstruction picture for output
--- a/codec/encoder/core/inc/svc_enc_frame.h
+++ b/codec/encoder/core/inc/svc_enc_frame.h
@@ -56,8 +56,20 @@
///////////////////////////////////DQ Layer level///////////////////////////////////
typedef struct TagDqLayer SDqLayer;
-typedef SDqLayer* pDqLayer;
+typedef SDqLayer* pDqLayer;
+typedef struct TagFeatureSearchPreparation{
+ SScreenBlockFeatureStorage* pRefBlockFeature;//point the the ref frame storage
+
+ uint16_t* pFeatureOfBlock; // Feature of every block (8x8), begin with the point
+ uint8_t uiFeatureStrategyIndex;// index of hash strategy
+
+ /* for FME frame-level switch */
+ bool bFMESwitchFlag;
+ uint8_t uiFMEGoodFrameCount;
+ int32_t iHighFreMbCount;
+}SFeatureSearchPreparation;//maintain only one
+
typedef struct TagLayerInfo {
SNalUnitHeaderExt sNalHeaderExt;
SSlice*
@@ -97,6 +109,8 @@
int32_t* pNumSliceCodedOfPartition; // for dynamic slicing mode
int32_t* pLastCodedMbIdxOfPartition; // for dynamic slicing mode
int32_t* pLastMbIdxOfPartition; // for dynamic slicing mode
+
+ SFeatureSearchPreparation* pFeatureSearchPreparation;
SDqLayer* pRefLayer; // pointer to referencing dq_layer of current layer to be decoded
--- a/codec/encoder/core/inc/svc_motion_estimate.h
+++ b/codec/encoder/core/inc/svc_motion_estimate.h
@@ -222,6 +222,8 @@
#define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1)
#define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1)
#define LIST_SIZE_MSE_16x16 0x00878 //(avg+mse)/2, max= (255+16*255)/2
+#define FMESWITCH_MBSAD_THRESHOLD 30 // empirically set.
+
int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride);
int32_t SumOf16x16SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride);
void SumOf8x8BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
@@ -231,6 +233,13 @@
int32_t RequestScreenBlockFeatureStorage( CMemoryAlign *pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight, const int32_t iNeedFeatureStorage,
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage);
int32_t ReleaseScreenBlockFeatureStorage( CMemoryAlign *pMa, SScreenBlockFeatureStorage* pScreenBlockFeatureStorage );
+int32_t RequestFeatureSearchPreparation( CMemoryAlign *pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight, const int32_t iNeedFeatureStorage,
+ SFeatureSearchPreparation* pFeatureSearchPreparation);
+int32_t ReleaseFeatureSearchPreparation( CMemoryAlign *pMa, uint16_t*& pFeatureOfBlock);
+#define FME_DEFAULT_GOOD_FRAME_NUM (2)
+#define FME_DEFAULT_FEATURE_INDEX (0)
+void PerformFMEPreprocess( SWelsFuncPtrList *pFunc, SPicture* pRef,
+ SScreenBlockFeatureStorage* pScreenBlockFeatureStorage);
//inline functions
inline void SetMvWithinIntegerMvRange( const int32_t kiMbWidth, const int32_t kiMbHeight, const int32_t kiMbX, const int32_t kiMbY,
const int32_t kiMaxMvRange,
@@ -250,6 +259,15 @@
{
return (CheckMvInRange(ksCurrentMv.iMvX, ksMinMv.iMvX, ksMaxMv.iMvX)
&& CheckMvInRange(ksCurrentMv.iMvY, ksMinMv.iMvY, ksMaxMv.iMvY));
+}
+//FME switch related
+inline bool CalcFMESwitchFlag(const uint8_t uiFMEGoodFrameCount, const int32_t iHighFreMbPrecentage,
+ const int32_t iAvgMbSAD, const bool bScrollingDetected ) {
+ return ( bScrollingDetected ||( uiFMEGoodFrameCount>0 && iAvgMbSAD > FMESWITCH_MBSAD_THRESHOLD ) );
+ //TODO: add the logic of iHighFreMbPrecentage
+ //return ( iHighFreMbPrecentage > 2
+ // && ( bScrollingDetected || iHighFreMbPrecentage >15
+ // ||( uiFMEGoodFrameCount>0 && iFrameSAD > FMESWITCH_FRAMESAD_THRESHOLD ) ) );
}
}
#endif
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -708,7 +708,7 @@
iDlayerCount = pParam->iSpatialLayerNum;
iNumRef = pParam->iNumRefFrame;
- const int32_t kiFeatureStrategyIndex = 0;
+ const int32_t kiFeatureStrategyIndex = FME_DEFAULT_FEATURE_INDEX;
const int32_t kiMe16x16 = ME_DIA_CROSS;
const int32_t kiMe8x8 = ME_DIA_CROSS_FME;
const int32_t kiNeedFeatureStorage = (pParam->iUsageType != SCREEN_CONTENT_REAL_TIME)?0:
@@ -733,7 +733,7 @@
pRefList = (SRefList*)pMa->WelsMallocz (sizeof (SRefList), "pRefList");
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pRefList), FreeMemorySvc (ppCtx))
do {
- pRefList->pRef[i] = AllocPicture (pMa, kiWidth, kiHeight, true, kiNeedFeatureStorage); // to use actual size of current layer
+ pRefList->pRef[i] = AllocPicture (pMa, kiWidth, kiHeight, true, (iDlayerIndex == iDlayerCount-1)?kiNeedFeatureStorage:0); // to use actual size of current layer
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pRefList->pRef[i]), FreeMemorySvc (ppCtx))
++ i;
} while (i < 1 + iNumRef);
@@ -830,6 +830,18 @@
}
}
+ //
+ if (kiNeedFeatureStorage && iDlayerIndex==iDlayerCount-1)
+ {
+ pDqLayer->pFeatureSearchPreparation = static_cast<SFeatureSearchPreparation*> (pMa->WelsMallocz (sizeof (SFeatureSearchPreparation), "pFeatureSearchPreparation"));
+ WELS_VERIFY_RETURN_PROC_IF (1, NULL==pDqLayer->pFeatureSearchPreparation, FreeMemorySvc (ppCtx));
+ int32_t iReturn = RequestFeatureSearchPreparation(pMa, pDlayer->iFrameWidth, pDlayer->iFrameHeight, kiNeedFeatureStorage,
+ pDqLayer->pFeatureSearchPreparation);
+ WELS_VERIFY_RETURN_PROC_IF (1, ENC_RETURN_SUCCESS!=iReturn, FreeMemorySvc (ppCtx));
+ } else {
+ pDqLayer->pFeatureSearchPreparation = NULL;
+ }
+
(*ppCtx)->ppDqLayerList[iDlayerIndex] = pDqLayer;
++ iDlayerIndex;
@@ -1572,6 +1584,12 @@
pDq->pLastMbIdxOfPartition = NULL;
}
+ if (pDq->pFeatureSearchPreparation) {
+ ReleaseFeatureSearchPreparation(pMa, pDq->pFeatureSearchPreparation->pFeatureOfBlock);
+ pMa->WelsFree (pDq->pFeatureSearchPreparation, "pFeatureSearchPreparation");
+ pDq->pFeatureSearchPreparation = NULL;
+ }
+
pMa->WelsFree (pDq, "pDq");
pDq = NULL;
pCtx->ppDqLayerList[ilayer] = NULL;
@@ -2427,8 +2445,41 @@
pFuncList->pfCalculateSatd = CalculateSatdCost;
pFuncList->pfInterFineMd = WelsMdInterFinePartition;
}
+ }
+ //to init at each frame will be needed when dealing with hybrid content (camera+screen)
+ if (pCtx->pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME) {
+ SFeatureSearchPreparation* pFeatureSearchPreparation = pCurLayer->pFeatureSearchPreparation;
+ if (pFeatureSearchPreparation) {
+ pFeatureSearchPreparation->iHighFreMbCount = 0;
+ if (P_SLICE == pCtx->eSliceType) {
+ //calculate bFMESwitchFlag
+ SVAAFrameInfoExt *pVaaExt = static_cast<SVAAFrameInfoExt *>(pCtx->pVaa);
+ const int32_t kiMbSize = pCurLayer->iMbHeight*pCurLayer->iMbWidth;
+ pFeatureSearchPreparation->bFMESwitchFlag = CalcFMESwitchFlag( pFeatureSearchPreparation->uiFMEGoodFrameCount,
+ pFeatureSearchPreparation->iHighFreMbCount*100/kiMbSize, pCtx->pVaa->sVaaCalcInfo.iFrameSad/kiMbSize,
+ pVaaExt->sScrollDetectInfo.bScrollDetectFlag);
+
+ //PerformFMEPreprocess
+ SScreenBlockFeatureStorage* pScreenBlockFeatureStorage = pCurLayer->pRefPic->pScreenBlockFeatureStorage;
+ pFeatureSearchPreparation->pRefBlockFeature = pScreenBlockFeatureStorage;
+ if (pFeatureSearchPreparation->bFMESwitchFlag
+ && !pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) {
+ pScreenBlockFeatureStorage->pFeatureOfBlockPointer = pFeatureSearchPreparation->pFeatureOfBlock;
+ PerformFMEPreprocess( pFuncList, pCurLayer->pRefPic, pScreenBlockFeatureStorage );
+ }
+
+ //assign ME pointer
+ if (pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) {
+ //TBC int32_t iIs16x16 = pScreenBlockFeatureStorage->iIs16x16;
+ }
+ } else {
+ //reset some status when at I_SLICE
+ pFeatureSearchPreparation->bFMESwitchFlag = true;
+ pFeatureSearchPreparation->uiFMEGoodFrameCount = FME_DEFAULT_GOOD_FRAME_NUM;
+ }
+ }
}
}
--- a/codec/encoder/core/src/picture_handle.cpp
+++ b/codec/encoder/core/src/picture_handle.cpp
@@ -113,7 +113,6 @@
pPic->pScreenBlockFeatureStorage = static_cast<SScreenBlockFeatureStorage*> (pMa->WelsMallocz (sizeof (SScreenBlockFeatureStorage), "pScreenBlockFeatureStorage"));
int32_t iReturn = RequestScreenBlockFeatureStorage(pMa, kiWidth, kiHeight, iNeedFeatureStorage,
pPic->pScreenBlockFeatureStorage );
-
WELS_VERIFY_RETURN_PROC_IF (NULL, ENC_RETURN_SUCCESS != iReturn, FreePicture (pMa, &pPic));
} else {
pPic->pScreenBlockFeatureStorage = NULL;
--- a/codec/encoder/core/src/svc_motion_estimate.cpp
+++ b/codec/encoder/core/src/svc_motion_estimate.cpp
@@ -45,6 +45,18 @@
namespace WelsSVCEnc {
+const int32_t QStepx16ByQp[52] = { /* save QStep<<4 for int32_t */
+ 10, 11, 13, 14, 16, 18, /* 0~5 */
+ 20, 22, 26, 28, 32, 36, /* 6~11 */
+ 40, 44, 52, 56, 64, 72, /* 12~17 */
+ 80, 88, 104, 112, 128, 144, /* 18~23 */
+ 160, 176, 208, 224, 256, 288, /* 24~29 */
+ 320, 352, 416, 448, 512, 576, /* 30~35 */
+ 640, 704, 832, 896, 1024, 1152, /* 36~41 */
+ 1280, 1408, 1664, 1792, 2048, 2304, /* 42~47 */
+ 2560, 2816, 3328, 3584 /* 48~51 */
+};
+
static inline void UpdateMeResults( const SMVUnitXY ksBestMv, const uint32_t kiBestSadCost, uint8_t* pRef, SWelsME * pMe )
{
pMe->sMv = ksBestMv;
@@ -313,9 +325,9 @@
#if defined (X86_ASM)
void CalcMvdCostx8_c( uint16_t *pMvdCost, const int32_t kiStartMv, uint16_t* pMvdTable, const uint16_t kiFixedCost )
{
- uint16_t *pBaseCost = pMvdCost;
- const int32_t kiOffset = (kiStartMv<<2);
- uint16_t *pMvd = pMvdTable+kiOffset;
+ uint16_t *pBaseCost = pMvdCost;
+ const int32_t kiOffset = (kiStartMv<<2);
+ uint16_t *pMvd = pMvdTable+kiOffset;
for (int32_t i = 0; i < 8; ++ i) {
pBaseCost[i] = ((*pMvd) + kiFixedCost);
pMvd += 4;
@@ -327,58 +339,58 @@
const int32_t kiMinPos, const int32_t kiMaxPos,
const bool bVerticalSearch ) {
SWelsFuncPtrList *pFuncList = static_cast<SWelsFuncPtrList *>(pFunc);
- SWelsME *pMe = static_cast<SWelsME *>(vpMe);
- uint8_t* kpEncMb = pMe->pEncMb;
- const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixY;
- uint8_t* pRef = &pMe->pColoRefMb[(kiMinPos - kiCurMeBlockPix)*kiRefStride];
- const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
- const int32_t kiEdgeBlocks = kIsBlock16x16 ? 16 : 8;
+ SWelsME *pMe = static_cast<SWelsME *>(vpMe);
+ uint8_t* kpEncMb = pMe->pEncMb;
+ const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixY;
+ uint8_t* pRef = &pMe->pColoRefMb[(kiMinPos - kiCurMeBlockPix)*kiRefStride];
+ const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
+ const int32_t kiEdgeBlocks = kIsBlock16x16 ? 16 : 8;
PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
- PTransposeMatrixBlockFunc TransposeMatrixBlock = kIsBlock16x16 ? TransposeMatrixBlock16x16_sse2 : TransposeMatrixBlock8x8_mmx;
- PTransposeMatrixBlocksFunc TransposeMatrixBlocks= kIsBlock16x16 ? TransposeMatrixBlocksx16_sse2 : TransposeMatrixBlocksx8_mmx;
+ PTransposeMatrixBlockFunc TransposeMatrixBlock = kIsBlock16x16 ? TransposeMatrixBlock16x16_sse2 : TransposeMatrixBlock8x8_mmx;
+ PTransposeMatrixBlocksFunc TransposeMatrixBlocks= kIsBlock16x16 ? TransposeMatrixBlocksx16_sse2 : TransposeMatrixBlocksx8_mmx;
- const int32_t kiDiff = kiMaxPos - kiMinPos;
- const int32_t kiRowNum = WELS_ALIGN((kiDiff - kiEdgeBlocks + 1), kiEdgeBlocks);
- const int32_t kiBlocksNum = kIsBlock16x16 ? (kiRowNum>>4) : (kiRowNum>>3);
- int32_t iCountLoop8 = (kiRowNum-kiEdgeBlocks) >> 3;
- const int32_t kiRemainingVectors = kiDiff - (iCountLoop8<<3);
- const int32_t kiMatrixStride = MAX_VERTICAL_MV_RANGE;
- ENFORCE_STACK_ALIGN_2D( uint8_t, uiMatrixRef, 16, kiMatrixStride, 16 ); // transpose matrix result for ref
- ENFORCE_STACK_ALIGN_2D( uint8_t, uiMatrixEnc, 16, 16, 16 ); // transpose matrix result for enc
- assert(kiRowNum <= kiMatrixStride); // make sure effective memory
+ const int32_t kiDiff = kiMaxPos - kiMinPos;
+ const int32_t kiRowNum = WELS_ALIGN((kiDiff - kiEdgeBlocks + 1), kiEdgeBlocks);
+ const int32_t kiBlocksNum = kIsBlock16x16 ? (kiRowNum>>4) : (kiRowNum>>3);
+ int32_t iCountLoop8 = (kiRowNum-kiEdgeBlocks) >> 3;
+ const int32_t kiRemainingVectors = kiDiff - (iCountLoop8<<3);
+ const int32_t kiMatrixStride = MAX_VERTICAL_MV_RANGE;
+ ENFORCE_STACK_ALIGN_2D( uint8_t, uiMatrixRef, 16, kiMatrixStride, 16 ); // transpose matrix result for ref
+ ENFORCE_STACK_ALIGN_2D( uint8_t, uiMatrixEnc, 16, 16, 16 ); // transpose matrix result for enc
+ assert(kiRowNum <= kiMatrixStride); // make sure effective memory
TransposeMatrixBlock( &uiMatrixEnc[0][0], 16, kpEncMb, kiEncStride );
TransposeMatrixBlocks( &uiMatrixRef[0][0], kiMatrixStride, pRef, kiRefStride, kiBlocksNum );
ENFORCE_STACK_ALIGN_1D( uint16_t, uiBaseCost, 8, 16 );
- int32_t iTargetPos = kiMinPos;
- int16_t iBestPos = pMe->sMv.iMvX;
- uint32_t uiBestCost = pMe->uiSadCost;
+ int32_t iTargetPos = kiMinPos;
+ int16_t iBestPos = pMe->sMv.iMvX;
+ uint32_t uiBestCost = pMe->uiSadCost;
uint32_t uiCostMin;
int32_t iIndexMinPos;
- kpEncMb = &uiMatrixEnc[0][0];
- pRef = &uiMatrixRef[0][0];
+ kpEncMb = &uiMatrixEnc[0][0];
+ pRef = &uiMatrixRef[0][0];
while(iCountLoop8 > 0) {
CalcMvdCostx8_c(uiBaseCost, iTargetPos, pMvdTable, kiFixedMvd);
uiCostMin = pSampleSadHor8( kpEncMb, 16, pRef, kiMatrixStride, uiBaseCost, &iIndexMinPos );
if (uiCostMin < uiBestCost) {
- uiBestCost = uiCostMin;
- iBestPos = iTargetPos+iIndexMinPos;
+ uiBestCost = uiCostMin;
+ iBestPos = iTargetPos+iIndexMinPos;
}
- iTargetPos += 8;
+ iTargetPos += 8;
pRef += 8;
-- iCountLoop8;
}
if (kiRemainingVectors > 0) {
- kpEncMb = pMe->pEncMb;
- pRef = &pMe->pColoRefMb[(iTargetPos - kiCurMeBlockPix)*kiRefStride];
+ kpEncMb = pMe->pEncMb;
+ pRef = &pMe->pColoRefMb[(iTargetPos - kiCurMeBlockPix)*kiRefStride];
while (iTargetPos < kiMaxPos) {
- const uint16_t pMvdCost = pMvdTable[iTargetPos<<2];
- uint32_t uiSadCost = pSad( kpEncMb, kiEncStride, pRef, kiRefStride ) + (kiFixedMvd + pMvdCost);
+ const uint16_t pMvdCost = pMvdTable[iTargetPos<<2];
+ uint32_t uiSadCost = pSad( kpEncMb, kiEncStride, pRef, kiRefStride ) + (kiFixedMvd + pMvdCost);
if (uiSadCost < uiBestCost) {
- uiBestCost = uiSadCost;
- iBestPos = iTargetPos;
+ uiBestCost = uiSadCost;
+ iBestPos = iTargetPos;
}
pRef += kiRefStride;
++iTargetPos;
@@ -399,20 +411,20 @@
const bool bVerticalSearch )
{
SWelsFuncPtrList *pFuncList = static_cast<SWelsFuncPtrList *>(pFunc);
- SWelsME *pMe = static_cast<SWelsME *>(vpMe);
- uint8_t *kpEncMb = pMe->pEncMb;
- const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixX;
- uint8_t *pRef = &pMe->pColoRefMb[kiMinPos - kiCurMeBlockPix];
- const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
+ SWelsME *pMe = static_cast<SWelsME *>(vpMe);
+ uint8_t *kpEncMb = pMe->pEncMb;
+ const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixX;
+ uint8_t *pRef = &pMe->pColoRefMb[kiMinPos - kiCurMeBlockPix];
+ const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
ENFORCE_STACK_ALIGN_1D( uint16_t, uiBaseCost, 8, 16 );
- const int32_t kiNumVector = kiMaxPos - kiMinPos;
- int32_t iCountLoop8 = kiNumVector >> 3;
- const int32_t kiRemainingLoop8 = kiNumVector & 7;
- int32_t iTargetPos = kiMinPos;
- int16_t iBestPos = pMe->sMv.iMvX;
- uint32_t uiBestCost = pMe->uiSadCost;
+ const int32_t kiNumVector = kiMaxPos - kiMinPos;
+ int32_t iCountLoop8 = kiNumVector >> 3;
+ const int32_t kiRemainingLoop8 = kiNumVector & 7;
+ int32_t iTargetPos = kiMinPos;
+ int16_t iBestPos = pMe->sMv.iMvX;
+ uint32_t uiBestCost = pMe->uiSadCost;
uint32_t uiCostMin;
int32_t iIndexMinPos;
@@ -420,20 +432,20 @@
CalcMvdCostx8_c(uiBaseCost, iTargetPos, pMvdTable, kiFixedMvd);
uiCostMin = pSampleSadHor8( kpEncMb, kiEncStride, pRef, kiRefStride, uiBaseCost, &iIndexMinPos );
if (uiCostMin < uiBestCost) {
- uiBestCost = uiCostMin;
- iBestPos = iTargetPos+iIndexMinPos;
+ uiBestCost = uiCostMin;
+ iBestPos = iTargetPos+iIndexMinPos;
}
- iTargetPos += 8;
+ iTargetPos += 8;
pRef += 8;
-- iCountLoop8;
}
if ( kiRemainingLoop8 > 0 ) {
while (iTargetPos < kiMaxPos) {
- const uint16_t pMvdCost = pMvdTable[iTargetPos<<2];
- uint32_t uiSadCost = pSad( kpEncMb, kiEncStride, pRef, kiRefStride ) + (kiFixedMvd + pMvdCost);
+ const uint16_t pMvdCost = pMvdTable[iTargetPos<<2];
+ uint32_t uiSadCost = pSad( kpEncMb, kiEncStride, pRef, kiRefStride ) + (kiFixedMvd + pMvdCost);
if (uiSadCost < uiBestCost) {
- uiBestCost = uiSadCost;
- iBestPos = iTargetPos;
+ uiBestCost = uiSadCost;
+ iBestPos = iTargetPos;
}
++pRef;
++iTargetPos;
@@ -447,10 +459,10 @@
}
}
#endif
-void LineFullSearch_c( void *pFunc, void *vpMe,
- uint16_t* pMvdTable, const int32_t kiFixedMvd,
- const int32_t kiEncStride, const int32_t kiRefStride,
- const int32_t kiMinPos, const int32_t kiMaxPos,
+void LineFullSearch_c( void *pFunc, void *vpMe,
+ uint16_t* pMvdTable, const int32_t kiFixedMvd,
+ const int32_t kiEncStride, const int32_t kiRefStride,
+ const int32_t kiMinPos, const int32_t kiMaxPos,
const bool bVerticalSearch ) {
SWelsFuncPtrList *pFuncList = static_cast<SWelsFuncPtrList *>(pFunc);
SWelsME *pMe = static_cast<SWelsME *>(vpMe);
@@ -482,9 +494,9 @@
}
void WelsMotionCrossSearch(SWelsFuncPtrList *pFuncList, SWelsME * pMe,
- const SSlice* pSlice, const int32_t kiEncStride, const int32_t kiRefStride) {
- PLineFullSearchFunc pfVerticalFullSearchFunc = pFuncList->pfVerticalFullSearch;
- PLineFullSearchFunc pfHorizontalFullSearchFunc = pFuncList->pfHorizontalFullSearch;
+ const SSlice* pSlice, const int32_t kiEncStride, const int32_t kiRefStride) {
+ PLineFullSearchFunc pfVerticalFullSearchFunc = pFuncList->pfVerticalFullSearch;
+ PLineFullSearchFunc pfHorizontalFullSearchFunc = pFuncList->pfHorizontalFullSearch;
const int32_t iCurMeBlockPixX = pMe->iCurMeBlockPixX;
const int32_t iCurMeBlockQpelPixX = ((iCurMeBlockPixX)<<2);
@@ -515,9 +527,10 @@
// Feature Search Basics
/////////////////////////
//memory related
-int32_t RequestFeatureSearchPreparation( CMemoryAlign *pMa, const int32_t kiFeatureStrategyIndex,
- const int32_t kiFrameWidth, const int32_t kiFrameHeight, const bool bFme8x8,
- uint16_t*& pFeatureOfBlock) {
+int32_t RequestFeatureSearchPreparation( CMemoryAlign *pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight, const int32_t iNeedFeatureStorage,
+ SFeatureSearchPreparation* pFeatureSearchPreparation) {
+ const int32_t kiFeatureStrategyIndex = iNeedFeatureStorage>>16;
+ const bool bFme8x8 = ((iNeedFeatureStorage & 0x0000FF & ME_FME)==ME_FME);
const int32_t kiMarginSize = bFme8x8?8:16;
const int32_t kiFrameSize = (kiFrameWidth-kiMarginSize) * (kiFrameHeight-kiMarginSize);
int32_t iListOfFeatureOfBlock;
@@ -528,10 +541,15 @@
iListOfFeatureOfBlock = sizeof(uint16_t) * kiFrameSize +
(kiFrameWidth-kiMarginSize) * sizeof(uint32_t) + kiFrameWidth * 8 * sizeof(uint8_t);
}
- pFeatureOfBlock =
+ pFeatureSearchPreparation->pFeatureOfBlock =
(uint16_t *)pMa->WelsMalloc(iListOfFeatureOfBlock, "pFeatureOfBlock");
- WELS_VERIFY_RETURN_IF(ENC_RETURN_MEMALLOCERR, NULL == pFeatureOfBlock)
+ WELS_VERIFY_RETURN_IF(ENC_RETURN_MEMALLOCERR, NULL == (pFeatureSearchPreparation->pFeatureOfBlock) )
+ pFeatureSearchPreparation->uiFeatureStrategyIndex = kiFeatureStrategyIndex;
+ pFeatureSearchPreparation->bFMESwitchFlag = true;
+ pFeatureSearchPreparation->uiFMEGoodFrameCount = FME_DEFAULT_GOOD_FRAME_NUM;
+ pFeatureSearchPreparation->iHighFreMbCount = 0;
+
return ENC_RETURN_SUCCESS;
}
int32_t ReleaseFeatureSearchPreparation( CMemoryAlign *pMa, uint16_t*& pFeatureOfBlock) {
@@ -568,7 +586,13 @@
pScreenBlockFeatureStorage->pLocationPointer = (uint16_t*)pMa->WelsMalloc(2*kiFrameSize*sizeof(uint16_t), "pScreenBlockFeatureStorage->pLocationPointer");
WELS_VERIFY_RETURN_IF(ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pLocationPointer)
- pScreenBlockFeatureStorage->iActualListSize = kiListSize;
+ pScreenBlockFeatureStorage->pFeatureOfBlockPointer = NULL;
+ pScreenBlockFeatureStorage->iIs16x16 = !bIsBlock8x8;
+ pScreenBlockFeatureStorage->uiFeatureStrategyIndex = kiFeatureStrategyIndex;
+ pScreenBlockFeatureStorage->iActualListSize = kiListSize;
+ memset(pScreenBlockFeatureStorage->uiSadCostThreshold, UINT_MAX, BLOCK_SIZE_ALL*sizeof(uint32_t));
+ pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = false;
+
return ENC_RETURN_SUCCESS;
}
int32_t ReleaseScreenBlockFeatureStorage( CMemoryAlign *pMa, SScreenBlockFeatureStorage* pScreenBlockFeatureStorage ) {
@@ -588,11 +612,9 @@
}
//preprocess related
-int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride)
-{
+int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride) {
int32_t iSum = 0, i;
- for(i = 0; i < 8; i++)
- {
+ for(i = 0; i < 8; i++) {
iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3];
iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7];
pRef += kiRefStride;
@@ -599,11 +621,9 @@
}
return iSum;
}
-int32_t SumOf16x16SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride)
-{
+int32_t SumOf16x16SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride) {
int32_t iSum = 0, i;
- for(i = 0; i < 16; i++)
- {
+ for(i = 0; i < 16; i++) {
iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3];
iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7];
iSum += pRef[8] + pRef[9] + pRef[10] + pRef[11];
@@ -681,6 +701,7 @@
pSrcPointer += kiWidth;
}
}
+
void CalculateFeatureOfBlock( SWelsFuncPtrList *pFunc, SPicture* pRef,
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage)
{
@@ -710,10 +731,17 @@
}
void PerformFMEPreprocess( SWelsFuncPtrList *pFunc, SPicture* pRef,
- SScreenBlockFeatureStorage* pScreenBlockFeatureStorage)
-{
+ SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
CalculateFeatureOfBlock(pFunc, pRef, pScreenBlockFeatureStorage );
pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = true;
+
+ uint32_t uiRefPictureAvgQstepx16 = QStepx16ByQp[WelsMedian(0, pRef->iFrameAverageQp, 51)];
+ uint32_t uiSadCostThreshold16x16 = ((30 * (uiRefPictureAvgQstepx16 + 160))>>3);
+ pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_16x16] = uiSadCostThreshold16x16;
+ pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_8x8] = (uiSadCostThreshold16x16>>2);
+ pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_16x8]
+ = pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_8x16]
+ = pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_4x4] = UINT_MAX;
}
//search related