shithub: openh264

Download patch

ref: e5001c87cac3397a013d019a894dc674cdc8cc3f
parent: 3f2ea77908b70e382b967b7af4d49658b12e0bb2
author: sijchen <sijchen@cisco.com>
date: Mon Apr 21 12:36:59 EDT 2014

add FME switch logic

--- a/codec/encoder/core/inc/svc_motion_estimate.h
+++ b/codec/encoder/core/inc/svc_motion_estimate.h
@@ -193,7 +193,7 @@
                       int32_t& iBestSadCost);
 
 // Cross Search Basics
-void LineFullSearch_c(   void *pFunc, void *vpMe,
+void LineFullSearch_c(  void *pFunc, void *vpMe,
                         uint16_t* pMvdTable, const int32_t kiFixedMvd,
                         const int32_t kiEncStride, const int32_t kiRefStride,
                         const int32_t kiMinPos, const int32_t kiMaxPos,
@@ -219,9 +219,12 @@
 void WelsMotionCrossSearch(SWelsFuncPtrList *pFuncList,  SDqLayer* pCurLayer, SWelsME * pMe, const SSlice* pSlice);
 
 // Feature Search Basics
-#define LIST_SIZE_SUM_16x16  0x0FF01    //(256*255+1)
-#define LIST_SIZE_SUM_8x8      0x03FC1    //(64*255+1)
-#define LIST_SIZE_MSE_16x16  0x00878    //(avg+mse)/2, max= (255+16*255)/2
+#define LIST_SIZE_SUM_16x16 0x0FF01  //(256*255+1)
+#define LIST_SIZE_SUM_8x8     0x03FC1  //(64*255+1)
+#define LIST_SIZE_MSE_16x16 0x00878  //(avg+mse)/2, max= (255+16*255)/2
+
+#define FME_DEFAULT_FEATURE_INDEX (0)
+#define FMESWITCH_DEFAULT_GOODFRAME_NUM (2)
 #define FMESWITCH_MBSAD_THRESHOLD   30 // empirically set.
 
 int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride);
@@ -236,10 +239,15 @@
 int32_t RequestFeatureSearchPreparation( CMemoryAlign *pMa, const int32_t kiFrameWidth,  const int32_t kiFrameHeight, const int32_t iNeedFeatureStorage,
                                          SFeatureSearchPreparation* pFeatureSearchPreparation);
 int32_t ReleaseFeatureSearchPreparation( CMemoryAlign *pMa, uint16_t*& pFeatureOfBlock);
-#define FME_DEFAULT_GOOD_FRAME_NUM (2)
+
+#define FMESWITCH_DEFAULT_GOODFRAME_NUM (2)
 #define FME_DEFAULT_FEATURE_INDEX (0)
 void PerformFMEPreprocess( SWelsFuncPtrList *pFunc, SPicture* pRef,
                           SScreenBlockFeatureStorage* pScreenBlockFeatureStorage);
+
+void UpdateFMESwitch(SDqLayer* pCurLayer);
+void UpdateFMESwitchNull(SDqLayer* pCurLayer);
+
 //inline functions
 inline void SetMvWithinIntegerMvRange( const int32_t kiMbWidth, const int32_t kiMbHeight, const int32_t kiMbX, const int32_t kiMbY,
                         const int32_t kiMaxMvRange,
@@ -262,7 +270,8 @@
 }
 //FME switch related
 inline bool CalcFMESwitchFlag(const uint8_t uiFMEGoodFrameCount, const int32_t iHighFreMbPrecentage,
-       const int32_t iAvgMbSAD, const bool bScrollingDetected ) {
+       const int32_t iAvgMbSAD, const bool bScrollingDetected )
+{
   return ( bScrollingDetected ||( uiFMEGoodFrameCount>0 && iAvgMbSAD > FMESWITCH_MBSAD_THRESHOLD ) );
   //TODO: add the logic of iHighFreMbPrecentage
   //return ( iHighFreMbPrecentage > 2
--- a/codec/encoder/core/inc/wels_func_ptr_def.h
+++ b/codec/encoder/core/inc/wels_func_ptr_def.h
@@ -150,6 +150,7 @@
 typedef void (*PCalculateBlockFeatureOfFrame)(uint8_t *pRef, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
                                               uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
 typedef int32_t (*PCalculateSingleBlockFeature)(uint8_t *pRef, const int32_t kiRefStride);
+typedef void (*PUpdateFMESwitch)(SDqLayer* pCurLayer);
 
 #define     MAX_BLOCK_TYPE 5 // prev 7
 typedef struct TagSampleDealingFunc {
@@ -209,10 +210,12 @@
   PSearchMethodFunc pfSearchMethod[BLOCK_SIZE_ALL];
   PCalculateSatdFunc pfCalculateSatd;
   PCheckDirectionalMv pfCheckDirectionalMv;
+
   PCalculateBlockFeatureOfFrame pfCalculateBlockFeatureOfFrame[2];//0 - for 8x8, 1 for 16x16
   PCalculateSingleBlockFeature pfCalculateSingleBlockFeature[2];//0 - for 8x8, 1 for 16x16
   PLineFullSearchFunc pfVerticalFullSearch;
   PLineFullSearchFunc pfHorizontalFullSearch;
+  PUpdateFMESwitch pfUpdateFMESwitch;
 
   PCopyFunc      pfCopy16x16Aligned;    //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c
   PCopyFunc      pfCopy16x16NotAligned;  //md.c
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -2468,6 +2468,7 @@
         if (pFeatureSearchPreparation->bFMESwitchFlag
           && !pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) {
             pScreenBlockFeatureStorage->pFeatureOfBlockPointer = pFeatureSearchPreparation->pFeatureOfBlock;
+            //TODO: use ORIGIN of reference when preprocessing is ready
             PerformFMEPreprocess( pFuncList, pCurLayer->pRefPic, pScreenBlockFeatureStorage );
         }
 
@@ -2475,10 +2476,17 @@
         if (pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) {
           //TBC int32_t iIs16x16 = pScreenBlockFeatureStorage->iIs16x16;
         }
+
+        //assign UpdateFMESwitch pointer
+        if (pFeatureSearchPreparation->bFMESwitchFlag) {
+          pFuncList->pfUpdateFMESwitch = UpdateFMESwitch;
+        } else {
+          pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
+        }
       } else {
         //reset some status when at I_SLICE
         pFeatureSearchPreparation->bFMESwitchFlag = true;
-        pFeatureSearchPreparation->uiFMEGoodFrameCount = FME_DEFAULT_GOOD_FRAME_NUM;
+        pFeatureSearchPreparation->uiFMEGoodFrameCount = FMESWITCH_DEFAULT_GOODFRAME_NUM;
       }
     }
   }
--- a/codec/encoder/core/src/svc_motion_estimate.cpp
+++ b/codec/encoder/core/src/svc_motion_estimate.cpp
@@ -74,6 +74,11 @@
 void WelsInitMeFunc( SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScreenContent ) {
   if (!bScreenContent) {
     pFuncList->pfCheckDirectionalMv = CheckDirectionalMvFalse;
+    pFuncList->pfCalculateBlockFeatureOfFrame[0] =
+      pFuncList->pfCalculateBlockFeatureOfFrame[1] = NULL;
+    pFuncList->pfCalculateSingleBlockFeature[0] =
+      pFuncList->pfCalculateSingleBlockFeature[1] = NULL;
+    pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
   } else {
     pFuncList->pfCheckDirectionalMv = CheckDirectionalMv;
 
@@ -87,6 +92,7 @@
       pFuncList->pfVerticalFullSearch = VerticalFullSearchUsingSSE41;
       pFuncList->pfHorizontalFullSearch = HorizontalFullSearchUsingSSE41;
     }
+#endif
 
     //for feature search
     pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_c;
@@ -94,7 +100,7 @@
     //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
     pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_c;
     pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_c;
-#endif
+    pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
   }
 }
 
@@ -547,7 +553,7 @@
 
   pFeatureSearchPreparation->uiFeatureStrategyIndex = kiFeatureStrategyIndex;
   pFeatureSearchPreparation->bFMESwitchFlag = true;
-  pFeatureSearchPreparation->uiFMEGoodFrameCount = FME_DEFAULT_GOOD_FRAME_NUM;
+  pFeatureSearchPreparation->uiFMEGoodFrameCount = FMESWITCH_DEFAULT_GOODFRAME_NUM;
   pFeatureSearchPreparation->iHighFreMbCount = 0;
 
   return ENC_RETURN_SUCCESS;
@@ -672,12 +678,10 @@
 }
 
 void InitializeHashforFeature_c( uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
-                                uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList )
-{
+                                uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList ) {
   //assign location pointer
   uint16_t *pBufPos  = pBuf;
-  for( int32_t i = 0 ; i < kiListSize; ++i )
-  {
+  for( int32_t i = 0 ; i < kiListSize; ++i ) {
     pLocationOfFeature[i] =
       pFeatureValuePointerList[i] = pBufPos;
     pBufPos      += (pTimesOfFeatureValue[i]<<1);
@@ -684,15 +688,12 @@
   }
 }
 void FillQpelLocationByFeatureValue_c( uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
-                                       uint16_t** pFeatureValuePointerList )
-{
+                                       uint16_t** pFeatureValuePointerList ) {
   //assign each pixel's position
   uint16_t* pSrcPointer  =  pFeatureOfBlock;
   int32_t iQpelY = 0;
-  for(int32_t y = 0; y < kiHeight; y++)
-  {
-    for(int32_t x = 0; x < kiWidth; x++)
-    {
+  for(int32_t y = 0; y < kiHeight; y++) {
+    for(int32_t x = 0; x < kiWidth; x++) {
       uint16_t uiFeature = pSrcPointer[x];
       ST32( &pFeatureValuePointerList[uiFeature][0], ((iQpelY<<16)|(x<<2)) );
       pFeatureValuePointerList[uiFeature] += 2;
@@ -703,8 +704,7 @@
 }
 
 void CalculateFeatureOfBlock( SWelsFuncPtrList *pFunc, SPicture* pRef,
-                         SScreenBlockFeatureStorage* pScreenBlockFeatureStorage)
-{
+                         SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
   uint16_t* pFeatureOfBlock = pScreenBlockFeatureStorage->pFeatureOfBlockPointer;
   uint32_t* pTimesOfFeatureValue = pScreenBlockFeatureStorage->pTimesOfFeatureValue;
   uint16_t** pLocationOfFeature  = pScreenBlockFeatureStorage->pLocationOfFeature;
@@ -867,6 +867,41 @@
   }
 }
 
+//switch related
+static uint32_t CountFMECostDown( const SDqLayer* pCurLayer ) {
+  uint32_t uiCostDownSum      = 0;
+  const int32_t kiSliceCount  = GetCurrentSliceNum( pCurLayer->pSliceEncCtx );
+  if ( kiSliceCount >= 1 ) {
+    int32_t iSliceIndex  = 0;
+    SSlice *pSlice    = &pCurLayer->sLayerInfo.pSliceInLayer[iSliceIndex];
+    while( iSliceIndex < kiSliceCount ) {
+      uiCostDownSum += pSlice->uiSliceFMECostDown;
+      ++ pSlice;
+      ++ iSliceIndex;
+    }
+  }
+  return uiCostDownSum;
+}
+#define FMESWITCH_MBAVERCOSTSAVING_THRESHOLD (2) //empirically set.
+#define FMESWITCH_GOODFRAMECOUNT_MAX (5) //empirically set.
+static void UpdateFMEGoodFrameCount(const uint32_t iAvMBNormalizedRDcostDown, uint8_t& uiFMEGoodFrameCount) {
+  //this strategy may be changed, here the number is derived from empirical-numbers
+  // uiFMEGoodFrameCount lies in [0,FMESWITCH_GOODFRAMECOUNT_MAX]
+  if ( iAvMBNormalizedRDcostDown > FMESWITCH_MBAVERCOSTSAVING_THRESHOLD ) {
+    if ( uiFMEGoodFrameCount < FMESWITCH_GOODFRAMECOUNT_MAX )
+      ++ uiFMEGoodFrameCount;
+  } else {
+    if ( uiFMEGoodFrameCount > 0 )
+      -- uiFMEGoodFrameCount;
+  }
+}
+void UpdateFMESwitch(SDqLayer* pCurLayer) {
+  const uint32_t iFMECost = CountFMECostDown( pCurLayer );
+  const uint32_t iAvMBNormalizedRDcostDown  = iFMECost / (pCurLayer->iMbWidth*pCurLayer->iMbHeight);
+  UpdateFMEGoodFrameCount( iAvMBNormalizedRDcostDown, pCurLayer->pFeatureSearchPreparation->uiFMEGoodFrameCount );
+}
+void UpdateFMESwitchNull(SDqLayer* pCurLayer) {
+}
 /////////////////////////
 // Search function options
 /////////////////////////