ref: fbc179fb823061ea886bff925dec90a2f687a04d
parent: 1dbc856ed03a866378438ec0b8e472bdea0ac401
parent: e5001c87cac3397a013d019a894dc674cdc8cc3f
author: Licai Guo <licaguo@cisco.com>
date: Mon Apr 21 18:18:55 EDT 2014
Merge pull request #722 from sijchen/fme_merge75 [Encoder ME] add FME switch logic
--- a/codec/encoder/core/inc/svc_motion_estimate.h
+++ b/codec/encoder/core/inc/svc_motion_estimate.h
@@ -193,7 +193,7 @@
int32_t& iBestSadCost);
// Cross Search Basics
-void LineFullSearch_c( void *pFunc, void *vpMe,
+void LineFullSearch_c( void *pFunc, void *vpMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
@@ -219,9 +219,12 @@
void WelsMotionCrossSearch(SWelsFuncPtrList *pFuncList, SDqLayer* pCurLayer, SWelsME * pMe, const SSlice* pSlice);
// Feature Search Basics
-#define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1)
-#define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1)
-#define LIST_SIZE_MSE_16x16 0x00878 //(avg+mse)/2, max= (255+16*255)/2
+#define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1)
+#define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1)
+#define LIST_SIZE_MSE_16x16 0x00878 //(avg+mse)/2, max= (255+16*255)/2
+
+#define FME_DEFAULT_FEATURE_INDEX (0)
+#define FMESWITCH_DEFAULT_GOODFRAME_NUM (2)
#define FMESWITCH_MBSAD_THRESHOLD 30 // empirically set.
int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride);
@@ -236,10 +239,15 @@
int32_t RequestFeatureSearchPreparation( CMemoryAlign *pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight, const int32_t iNeedFeatureStorage,
SFeatureSearchPreparation* pFeatureSearchPreparation);
int32_t ReleaseFeatureSearchPreparation( CMemoryAlign *pMa, uint16_t*& pFeatureOfBlock);
-#define FME_DEFAULT_GOOD_FRAME_NUM (2)
+
+#define FMESWITCH_DEFAULT_GOODFRAME_NUM (2)
#define FME_DEFAULT_FEATURE_INDEX (0)
void PerformFMEPreprocess( SWelsFuncPtrList *pFunc, SPicture* pRef,
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage);
+
+void UpdateFMESwitch(SDqLayer* pCurLayer);
+void UpdateFMESwitchNull(SDqLayer* pCurLayer);
+
//inline functions
inline void SetMvWithinIntegerMvRange( const int32_t kiMbWidth, const int32_t kiMbHeight, const int32_t kiMbX, const int32_t kiMbY,
const int32_t kiMaxMvRange,
@@ -262,7 +270,8 @@
}
//FME switch related
inline bool CalcFMESwitchFlag(const uint8_t uiFMEGoodFrameCount, const int32_t iHighFreMbPrecentage,
- const int32_t iAvgMbSAD, const bool bScrollingDetected ) {
+ const int32_t iAvgMbSAD, const bool bScrollingDetected )
+{
return ( bScrollingDetected ||( uiFMEGoodFrameCount>0 && iAvgMbSAD > FMESWITCH_MBSAD_THRESHOLD ) );
//TODO: add the logic of iHighFreMbPrecentage
//return ( iHighFreMbPrecentage > 2
--- a/codec/encoder/core/inc/wels_func_ptr_def.h
+++ b/codec/encoder/core/inc/wels_func_ptr_def.h
@@ -150,6 +150,7 @@
typedef void (*PCalculateBlockFeatureOfFrame)(uint8_t *pRef, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
typedef int32_t (*PCalculateSingleBlockFeature)(uint8_t *pRef, const int32_t kiRefStride);
+typedef void (*PUpdateFMESwitch)(SDqLayer* pCurLayer);
#define MAX_BLOCK_TYPE 5 // prev 7
typedef struct TagSampleDealingFunc {
@@ -209,10 +210,12 @@
PSearchMethodFunc pfSearchMethod[BLOCK_SIZE_ALL];
PCalculateSatdFunc pfCalculateSatd;
PCheckDirectionalMv pfCheckDirectionalMv;
+
PCalculateBlockFeatureOfFrame pfCalculateBlockFeatureOfFrame[2];//0 - for 8x8, 1 for 16x16
PCalculateSingleBlockFeature pfCalculateSingleBlockFeature[2];//0 - for 8x8, 1 for 16x16
PLineFullSearchFunc pfVerticalFullSearch;
PLineFullSearchFunc pfHorizontalFullSearch;
+ PUpdateFMESwitch pfUpdateFMESwitch;
PCopyFunc pfCopy16x16Aligned; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c
PCopyFunc pfCopy16x16NotAligned; //md.c
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -2468,6 +2468,7 @@
if (pFeatureSearchPreparation->bFMESwitchFlag
&& !pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) {
pScreenBlockFeatureStorage->pFeatureOfBlockPointer = pFeatureSearchPreparation->pFeatureOfBlock;
+ //TODO: use ORIGIN of reference when preprocessing is ready
PerformFMEPreprocess( pFuncList, pCurLayer->pRefPic, pScreenBlockFeatureStorage );
}
@@ -2475,10 +2476,17 @@
if (pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) {
//TBC int32_t iIs16x16 = pScreenBlockFeatureStorage->iIs16x16;
}
+
+ //assign UpdateFMESwitch pointer
+ if (pFeatureSearchPreparation->bFMESwitchFlag) {
+ pFuncList->pfUpdateFMESwitch = UpdateFMESwitch;
+ } else {
+ pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
+ }
} else {
//reset some status when at I_SLICE
pFeatureSearchPreparation->bFMESwitchFlag = true;
- pFeatureSearchPreparation->uiFMEGoodFrameCount = FME_DEFAULT_GOOD_FRAME_NUM;
+ pFeatureSearchPreparation->uiFMEGoodFrameCount = FMESWITCH_DEFAULT_GOODFRAME_NUM;
}
}
}
--- a/codec/encoder/core/src/svc_motion_estimate.cpp
+++ b/codec/encoder/core/src/svc_motion_estimate.cpp
@@ -74,6 +74,11 @@
void WelsInitMeFunc( SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScreenContent ) {
if (!bScreenContent) {
pFuncList->pfCheckDirectionalMv = CheckDirectionalMvFalse;
+ pFuncList->pfCalculateBlockFeatureOfFrame[0] =
+ pFuncList->pfCalculateBlockFeatureOfFrame[1] = NULL;
+ pFuncList->pfCalculateSingleBlockFeature[0] =
+ pFuncList->pfCalculateSingleBlockFeature[1] = NULL;
+ pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
} else {
pFuncList->pfCheckDirectionalMv = CheckDirectionalMv;
@@ -87,6 +92,7 @@
pFuncList->pfVerticalFullSearch = VerticalFullSearchUsingSSE41;
pFuncList->pfHorizontalFullSearch = HorizontalFullSearchUsingSSE41;
}
+#endif
//for feature search
pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_c;
@@ -94,7 +100,7 @@
//TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_c;
pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_c;
-#endif
+ pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
}
}
@@ -547,7 +553,7 @@
pFeatureSearchPreparation->uiFeatureStrategyIndex = kiFeatureStrategyIndex;
pFeatureSearchPreparation->bFMESwitchFlag = true;
- pFeatureSearchPreparation->uiFMEGoodFrameCount = FME_DEFAULT_GOOD_FRAME_NUM;
+ pFeatureSearchPreparation->uiFMEGoodFrameCount = FMESWITCH_DEFAULT_GOODFRAME_NUM;
pFeatureSearchPreparation->iHighFreMbCount = 0;
return ENC_RETURN_SUCCESS;
@@ -672,12 +678,10 @@
}
void InitializeHashforFeature_c( uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
- uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList )
-{
+ uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList ) {
//assign location pointer
uint16_t *pBufPos = pBuf;
- for( int32_t i = 0 ; i < kiListSize; ++i )
- {
+ for( int32_t i = 0 ; i < kiListSize; ++i ) {
pLocationOfFeature[i] =
pFeatureValuePointerList[i] = pBufPos;
pBufPos += (pTimesOfFeatureValue[i]<<1);
@@ -684,15 +688,12 @@
}
}
void FillQpelLocationByFeatureValue_c( uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
- uint16_t** pFeatureValuePointerList )
-{
+ uint16_t** pFeatureValuePointerList ) {
//assign each pixel's position
uint16_t* pSrcPointer = pFeatureOfBlock;
int32_t iQpelY = 0;
- for(int32_t y = 0; y < kiHeight; y++)
- {
- for(int32_t x = 0; x < kiWidth; x++)
- {
+ for(int32_t y = 0; y < kiHeight; y++) {
+ for(int32_t x = 0; x < kiWidth; x++) {
uint16_t uiFeature = pSrcPointer[x];
ST32( &pFeatureValuePointerList[uiFeature][0], ((iQpelY<<16)|(x<<2)) );
pFeatureValuePointerList[uiFeature] += 2;
@@ -703,8 +704,7 @@
}
void CalculateFeatureOfBlock( SWelsFuncPtrList *pFunc, SPicture* pRef,
- SScreenBlockFeatureStorage* pScreenBlockFeatureStorage)
-{
+ SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
uint16_t* pFeatureOfBlock = pScreenBlockFeatureStorage->pFeatureOfBlockPointer;
uint32_t* pTimesOfFeatureValue = pScreenBlockFeatureStorage->pTimesOfFeatureValue;
uint16_t** pLocationOfFeature = pScreenBlockFeatureStorage->pLocationOfFeature;
@@ -867,6 +867,41 @@
}
}
+//switch related
+static uint32_t CountFMECostDown( const SDqLayer* pCurLayer ) {
+ uint32_t uiCostDownSum = 0;
+ const int32_t kiSliceCount = GetCurrentSliceNum( pCurLayer->pSliceEncCtx );
+ if ( kiSliceCount >= 1 ) {
+ int32_t iSliceIndex = 0;
+ SSlice *pSlice = &pCurLayer->sLayerInfo.pSliceInLayer[iSliceIndex];
+ while( iSliceIndex < kiSliceCount ) {
+ uiCostDownSum += pSlice->uiSliceFMECostDown;
+ ++ pSlice;
+ ++ iSliceIndex;
+ }
+ }
+ return uiCostDownSum;
+}
+#define FMESWITCH_MBAVERCOSTSAVING_THRESHOLD (2) //empirically set.
+#define FMESWITCH_GOODFRAMECOUNT_MAX (5) //empirically set.
+static void UpdateFMEGoodFrameCount(const uint32_t iAvMBNormalizedRDcostDown, uint8_t& uiFMEGoodFrameCount) {
+ //this strategy may be changed, here the number is derived from empirical-numbers
+ // uiFMEGoodFrameCount lies in [0,FMESWITCH_GOODFRAMECOUNT_MAX]
+ if ( iAvMBNormalizedRDcostDown > FMESWITCH_MBAVERCOSTSAVING_THRESHOLD ) {
+ if ( uiFMEGoodFrameCount < FMESWITCH_GOODFRAMECOUNT_MAX )
+ ++ uiFMEGoodFrameCount;
+ } else {
+ if ( uiFMEGoodFrameCount > 0 )
+ -- uiFMEGoodFrameCount;
+ }
+}
+void UpdateFMESwitch(SDqLayer* pCurLayer) {
+ const uint32_t iFMECost = CountFMECostDown( pCurLayer );
+ const uint32_t iAvMBNormalizedRDcostDown = iFMECost / (pCurLayer->iMbWidth*pCurLayer->iMbHeight);
+ UpdateFMEGoodFrameCount( iAvMBNormalizedRDcostDown, pCurLayer->pFeatureSearchPreparation->uiFMEGoodFrameCount );
+}
+void UpdateFMESwitchNull(SDqLayer* pCurLayer) {
+}
/////////////////////////
// Search function options
/////////////////////////