ref: 646d94320025b2ee70f24e83df28da28f428791d
parent: 02b08d278a9cb306afe90d60464eabf1ea1b070a
parent: 29f300dde9262c4ea9784ea90649c836eeec7089
author: ruil2 <ruil2@cisco.com>
date: Fri Apr 4 05:48:44 EDT 2014
Merge pull request #621 from sijchen/fme_merge42 [Encoder ME] add Preprocess functions for FME
--- a/codec/encoder/core/inc/picture.h
+++ b/codec/encoder/core/inc/picture.h
@@ -30,7 +30,7 @@
*
*/
-//picture.h - reconstruction picture/ reference picture/ residual picture are declared here
+//picture.h - reconstruction picture/ reference picture/ residual picture are declared here
#ifndef WELS_PICTURE_H__
#define WELS_PICTURE_H__
@@ -39,84 +39,89 @@
#include "wels_common_basis.h"
namespace WelsSVCEnc {
-#define LIST_SIZE 0x10000 //(256*256)
+#define LIST_SIZE 0x10000 //(256*256)
typedef struct TagScreenBlockFeatureStorage
{
- uint32_t* pTimesOfFeatureValue; // times of every value in Feature
- uint16_t** pLocationOfFeature; // uint16_t *pLocationOfFeature[LIST_SIZE], pLocationOfFeature[i] saves all the location(x,y) whose Feature = i;
- uint16_t* pLocationPointer; // buffer of position array
- int32_t iActualListSize; // actual list size
+ //Input
+ uint16_t* pFeatureOfBlockPointer; // Pointer to pFeatureOfBlock
+ int32_t iIs16x16; //Feature block size
+ uint8_t uiFeatureStrategyIndex;// index of hash strategy
+ //Modify
+ uint32_t* pTimesOfFeatureValue; // times of every value in Feature
+ uint16_t** pLocationOfFeature; // uint16_t *pLocationOfFeature[LIST_SIZE], pLocationOfFeature[i] saves all the location(x,y) whose Feature = i;
+ uint16_t* pLocationPointer; // buffer of position array
+ int32_t iActualListSize; // actual list size
uint32_t uiSadCostThreshold[BLOCK_SIZE_ALL];
- bool bRefBlockFeatureCalculated; // flag of whether pre-process is done
+ bool bRefBlockFeatureCalculated; // flag of whether pre-process is done
} SScreenBlockFeatureStorage; //should be stored with RefPic, one for each frame
typedef struct TagFeatureSearchPreparation{
- SScreenBlockFeatureStorage* pRefBlockFeature;//point the the ref frame storage
+ SScreenBlockFeatureStorage* pRefBlockFeature;//point the the ref frame storage
- uint16_t* pFeatureOfBlock; // Feature of every block (8x8), begin with the point
- uint8_t uiFeatureStrategyIndex;// index of hash strategy
+ uint16_t* pFeatureOfBlock; // Feature of every block (8x8), begin with the point
+ uint8_t uiFeatureStrategyIndex;// index of hash strategy
- /* for FME frame-level switch */
- bool bFMESwitchFlag;
- uint8_t uiFMEGoodFrameCount;
- int32_t iHighFreMbCount;
+ /* for FME frame-level switch */
+ bool bFMESwitchFlag;
+ uint8_t uiFMEGoodFrameCount;
+ int32_t iHighFreMbCount;
}SFeatureSearchPreparation;//maintain only one
/*
- * Reconstructed Picture definition
- * It is used to express reference picture, also consequent reconstruction picture for output
+ * Reconstructed Picture definition
+ * It is used to express reference picture, also consequent reconstruction picture for output
*/
typedef struct TagPicture {
/************************************payload pData*********************************/
- uint8_t* pBuffer; // pointer to the first allocated byte, basical offset of pBuffer, dimension:
- uint8_t* pData[3]; // pointer to picture planes respectively
- int32_t iLineSize[3]; // iLineSize of picture planes respectively
+ uint8_t* pBuffer; // pointer to the first allocated byte, basical offset of pBuffer, dimension:
+ uint8_t* pData[3]; // pointer to picture planes respectively
+ int32_t iLineSize[3]; // iLineSize of picture planes respectively
// picture information
/*******************************from other standard syntax****************************/
/*from pSps*/
- int32_t iWidthInPixel; // picture width in pixel
- int32_t iHeightInPixel;// picture height in pixel
- int32_t iPictureType; // got from sSliceHeader(): eSliceType
- int32_t iFramePoc; // frame POC
+ int32_t iWidthInPixel; // picture width in pixel
+ int32_t iHeightInPixel;// picture height in pixel
+ int32_t iPictureType; // got from sSliceHeader(): eSliceType
+ int32_t iFramePoc; // frame POC
- float fFrameRate; // MOVE
- int32_t iFrameNum; // frame number //for pRef pic management
+ float fFrameRate; // MOVE
+ int32_t iFrameNum; // frame number //for pRef pic management
- uint32_t* uiRefMbType; // for iMbWidth*iMbHeight
- uint8_t* pRefMbQp; // for iMbWidth*iMbHeight
+ uint32_t* uiRefMbType; // for iMbWidth*iMbHeight
+ uint8_t* pRefMbQp; // for iMbWidth*iMbHeight
int32_t* pMbSkipSad; //for iMbWidth*iMbHeight
- SMVUnitXY* sMvList;
+ SMVUnitXY* sMvList;
/*******************************sef_definition for misc use****************************/
- int32_t iMarkFrameNum;
- int32_t iLongTermPicNum;
+ int32_t iMarkFrameNum;
+ int32_t iLongTermPicNum;
- bool bUsedAsRef; //for pRef pic management
- bool bIsLongRef; // long term reference frame flag //for pRef pic management
+ bool bUsedAsRef; //for pRef pic management
+ bool bIsLongRef; // long term reference frame flag //for pRef pic management
bool bIsSceneLTR; //long term reference & large scene change
- uint8_t uiRecieveConfirmed;
- uint8_t uiTemporalId;
- uint8_t uiSpatialId;
+ uint8_t uiRecieveConfirmed;
+ uint8_t uiTemporalId;
+ uint8_t uiSpatialId;
int32_t iFrameAverageQp;
} SPicture;
/*
- * Residual Picture
+ * Residual Picture
*/
//typedef struct Rs_Picture_s{
-// int16_t *pBuffer[4]; // base pBuffer
-// int16_t *pData[4]; // pData pBuffer
-// int32_t real_linesize[4];// actual iLineSize of picture planes respectively
-// int32_t used_linesize[4];// iLineSize of picture planes respectively used currently
-// int32_t planes; // planes of YUV
+// int16_t *pBuffer[4]; // base pBuffer
+// int16_t *pData[4]; // pData pBuffer
+// int32_t real_linesize[4];// actual iLineSize of picture planes respectively
+// int32_t used_linesize[4];// iLineSize of picture planes respectively used currently
+// int32_t planes; // planes of YUV
//}Rs_Picture_t;
-} // end of namespace WelsSVCEnc {
+} // end of namespace WelsSVCEnc {
#endif//WELS_PICTURE_H__
--- a/codec/encoder/core/inc/svc_motion_estimate.h
+++ b/codec/encoder/core/inc/svc_motion_estimate.h
@@ -29,11 +29,11 @@
* POSSIBILITY OF SUCH DAMAGE.
*
*
- * \file svc motion estimate.h
+ * \file svc motion estimate.h
*
- * \brief Interfaces introduced in svc mb motion estimation
+ * \brief Interfaces introduced in svc mb motion estimation
*
- * \date 08/11/2009 Created
+ * \date 08/11/2009 Created
*
*************************************************************************************
*/
@@ -46,10 +46,10 @@
namespace WelsSVCEnc {
#define CAMERA_STARTMV_RANGE (64)
-#define ITERATIVE_TIMES (16)
+#define ITERATIVE_TIMES (16)
#define CAMERA_MV_RANGE (CAMERA_STARTMV_RANGE+ITERATIVE_TIMES)
#define CAMERA_MVD_RANGE ((CAMERA_MV_RANGE+1)<<1) //mvd=mv_range*2;
-#define BASE_MV_MB_NMB ((2*CAMERA_MV_RANGE/MB_WIDTH_LUMA)-1)
+#define BASE_MV_MB_NMB ((2*CAMERA_MV_RANGE/MB_WIDTH_LUMA)-1)
#define CAMERA_HIGHLAYER_MVD_RANGE (243)//mvd range;
#define EXPANDED_MV_RANGE (504) //=512-8 rather than 511 to sacrifice same edge point but save complexity in assemblys
#define EXPANDED_MVD_RANGE ((504+1)<<1)
@@ -56,42 +56,42 @@
enum
{
- ME_DIA = 0x01, // LITTLE DIAMOND= 0x01
- ME_CROSS = 0x02, // CROSS= 0x02
- ME_FME = 0x04, // FME = 0x04
- ME_FULL = 0x10, // FULL
+ ME_DIA = 0x01, // LITTLE DIAMOND= 0x01
+ ME_CROSS = 0x02, // CROSS= 0x02
+ ME_FME = 0x04, // FME = 0x04
+ ME_FULL = 0x10, // FULL
// derived ME methods combination
- ME_DIA_CROSS = (ME_DIA|ME_CROSS), // DIA+CROSS
- ME_DIA_CROSS_FME = (ME_DIA_CROSS|ME_FME), // DIA+CROSS+FME
+ ME_DIA_CROSS = (ME_DIA|ME_CROSS), // DIA+CROSS
+ ME_DIA_CROSS_FME = (ME_DIA_CROSS|ME_FME), // DIA+CROSS+FME
};
union SadPredISatdUnit {
-uint32_t uiSadPred;
-uint32_t uiSatd; //reuse the sad_pred as a temp satd pData
+uint32_t uiSadPred;
+uint32_t uiSatd; //reuse the sad_pred as a temp satd pData
};
typedef struct TagWelsME {
/* input */
-uint16_t* pMvdCost;
-union SadPredISatdUnit uSadPredISatd; //reuse the sad_pred as a temp pData
-uint32_t uiSadCost; //used by ME and RC //max SAD should be max_delta*size+lambda*mvdsize = 255*256+91*33*2 = 65280 + 6006 = 71286 > (2^16)-1 = 65535
-uint32_t uiSatdCost; /* satd + lm * nbits */
-uint32_t uiSadCostThreshold;
-int32_t iCurMeBlockPixX;
-int32_t iCurMeBlockPixY;
-uint8_t uiBlockSize; /* BLOCK_WxH */
-uint8_t uiReserved;
+uint16_t* pMvdCost;
+union SadPredISatdUnit uSadPredISatd; //reuse the sad_pred as a temp pData
+uint32_t uiSadCost; //used by ME and RC //max SAD should be max_delta*size+lambda*mvdsize = 255*256+91*33*2 = 65280 + 6006 = 71286 > (2^16)-1 = 65535
+uint32_t uiSatdCost; /* satd + lm * nbits */
+uint32_t uiSadCostThreshold;
+int32_t iCurMeBlockPixX;
+int32_t iCurMeBlockPixY;
+uint8_t uiBlockSize; /* BLOCK_WxH */
+uint8_t uiReserved;
-uint8_t* pEncMb;
-uint8_t* pRefMb;
-uint8_t* pColoRefMb;
+uint8_t* pEncMb;
+uint8_t* pRefMb;
+uint8_t* pColoRefMb;
-SMVUnitXY sMvp;
-SMVUnitXY sMvBase;
-SMVUnitXY sDirectionalMv;
+SMVUnitXY sMvp;
+SMVUnitXY sMvBase;
+SMVUnitXY sDirectionalMv;
/* output */
-SMVUnitXY sMv;
+SMVUnitXY sMv;
} SWelsME;
typedef struct TagFeatureSearchIn{
@@ -134,37 +134,37 @@
void WelsInitMeFunc( SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScreenContent );
/*!
- * \brief BL mb motion estimate search
+ * \brief BL mb motion estimate search
*
- * \param enc Wels encoder context
- * \param m Wels me information
+ * \param enc Wels encoder context
+ * \param m Wels me information
*
- * \return NONE
+ * \return NONE
*/
void WelsMotionEstimateSearch (SWelsFuncPtrList* pFuncList, void* pLplayer, void* pLpme, void* pLpslice);
/*!
- * \brief BL mb motion estimate initial point testing
+ * \brief BL mb motion estimate initial point testing
*
- * \param enc Wels encoder context
- * \param m Wels me information
- * \param mv_range search range in motion estimate
- * \param point the best match point in motion estimation
+ * \param enc Wels encoder context
+ * \param m Wels me information
+ * \param mv_range search range in motion estimate
+ * \param point the best match point in motion estimation
*
- * \return NONE
+ * \return NONE
*/
/*!
- * \brief EL mb motion estimate initial point testing
+ * \brief EL mb motion estimate initial point testing
*
- * \param pix_func SSampleDealingFunc
- * \param m Wels me information
- * \param mv_range search range in motion estimate
- * \param point the best match point in motion estimation
+ * \param pix_func SSampleDealingFunc
+ * \param m Wels me information
+ * \param mv_range search range in motion estimate
+ * \param point the best match point in motion estimation
*
- * \return NONE
+ * \return NONE
*/
bool WelsMotionEstimateInitialPoint (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
@@ -171,13 +171,13 @@
const int32_t kiStrideEnc, const int32_t kiStrideRef);
/*!
- * \brief mb iterative motion estimate search
+ * \brief mb iterative motion estimate search
*
- * \param enc Wels encoder context
- * \param m Wels me information
- * \param point the best match point in motion estimation
+ * \param enc Wels encoder context
+ * \param m Wels me information
+ * \param point the best match point in motion estimation
*
- * \return NONE
+ * \return NONE
*/
void WelsDiamondSearch (SWelsFuncPtrList* pFuncList, void* pLpme, void* pLpslice, const int32_t kiEncStride, const int32_t kiRefStride);
@@ -193,18 +193,30 @@
const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
int32_t& iBestSadCost);
-void LineFullSearch_c( void *pFunc, void *vpMe,
+// Cross Search Basics
+void LineFullSearch_c( void *pFunc, void *vpMe,
uint16_t* pMvdTable, const int32_t kiFixedMvd,
const int32_t kiEncStride, const int32_t kiRefStride,
const int32_t kiMinPos, const int32_t kiMaxPos,
const bool bVerticalSearch );
void VerticalFullSearchUsingSSE41( void *pFunc, void *vpMe,
- uint16_t* pMvdTable, const int32_t kiFixedMvd,
- const int32_t kiEncStride, const int32_t kiRefStride,
- const int32_t kiMinPos, const int32_t kiMaxPos,
+ uint16_t* pMvdTable, const int32_t kiFixedMvd,
+ const int32_t kiEncStride, const int32_t kiRefStride,
+ const int32_t kiMinPos, const int32_t kiMaxPos,
const bool bVerticalSearch );
void WelsMotionCrossSearch(SWelsFuncPtrList *pFuncList, SDqLayer* pCurLayer, SWelsME * pMe, const SSlice* pSlice);
+// Feature Search Basics
+#define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1)
+#define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1)
+#define LIST_SIZE_MSE_16x16 0x00878 //(avg+mse)/2, max= (255+16*255)/2
+int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride);
+int32_t SumOf16x16SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride);
+void SumOf8x8BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
+ uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
+void SumOf16x16BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
+ uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
+//inline functions
inline void SetMvWithinIntegerMvRange( const int32_t kiMbWidth, const int32_t kiMbHeight, const int32_t kiMbX, const int32_t kiMbY,
const int32_t kiMaxMvRange,
SMVUnitXY* pMvMin, SMVUnitXY* pMvMax)
--- a/codec/encoder/core/inc/wels_func_ptr_def.h
+++ b/codec/encoder/core/inc/wels_func_ptr_def.h
@@ -141,11 +141,14 @@
typedef bool (*PCheckDirectionalMv) (PSampleSadSatdCostFunc pSad, void * vpMe,
const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
int32_t& iBestSadCost);
-typedef void (*PLineFullSearchFunc) ( void *pFunc, void *vpMe,
- uint16_t* pMvdTable, const int32_t kiFixedMvd,
- const int32_t kiEncStride, const int32_t kiRefStride,
- const int32_t kiMinPos, const int32_t kiMaxPos,
+typedef void (*PLineFullSearchFunc) ( void *pFunc, void *vpMe,
+ uint16_t* pMvdTable, const int32_t kiFixedMvd,
+ const int32_t kiEncStride, const int32_t kiRefStride,
+ const int32_t kiMinPos, const int32_t kiMaxPos,
const bool bVerticalSearch );
+typedef void (*PCalculateBlockFeatureOfFrame)(uint8_t *pRef, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
+ uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
+typedef int32_t (*PCalculateSingleBlockFeature)(uint8_t *pRef, const int32_t kiRefStride);
#define MAX_BLOCK_TYPE 5 // prev 7
typedef struct TagSampleDealingFunc {
@@ -175,15 +178,15 @@
typedef bool (*PUpdateRefListFunc) (void* pCtx);
struct TagWelsFuncPointerList {
- PExpandPictureFunc pfExpandLumaPicture;
+ PExpandPictureFunc pfExpandLumaPicture;
PExpandPictureFunc
pfExpandChromaPicture[2];// 0: for chroma unalignment && width_uv >= 16; 1: for chroma alignment && width_uv >= 16;
PFillInterNeighborCacheFunc pfFillInterNeighborCache;
- PGetVarianceFromIntraVaaFunc pfGetVarianceFromIntraVaa;
- PGetMbSignFromInterVaaFunc pfGetMbSignFromInterVaa;
- PUpdateMbMvFunc pfUpdateMbMv;
+ PGetVarianceFromIntraVaaFunc pfGetVarianceFromIntraVaa;
+ PGetMbSignFromInterVaaFunc pfGetMbSignFromInterVaa;
+ PUpdateMbMvFunc pfUpdateMbMv;
PInterMdFirstIntraModeFunc pfFirstIntraMode; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c
PIntraFineMdFunc
pfIntraFineMd; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c
@@ -193,11 +196,11 @@
PInterMdBackgroundDecisionFunc pfInterMdBackgroundDecision;
PInterMdBackgroundInfoUpdateFunc pfInterMdBackgroundInfoUpdate;
- SMcFunc sMcFuncs;
+ SMcFunc sMcFuncs;
SSampleDealingFunc sSampleDealingFuncs;
- PGetIntraPredFunc pfGetLumaI16x16Pred[I16_PRED_DC_A];
- PGetIntraPredFunc pfGetLumaI4x4Pred[I4_PRED_A];
- PGetIntraPredFunc pfGetChromaPred[C_PRED_A];
+ PGetIntraPredFunc pfGetLumaI16x16Pred[I16_PRED_DC_A];
+ PGetIntraPredFunc pfGetLumaI4x4Pred[I4_PRED_A];
+ PGetIntraPredFunc pfGetChromaPred[C_PRED_A];
PMotionSearchFunc
pfMotionSearch[BLOCK_STATIC_IDC_ALL]; //svc_encode_slice.c svc_mode_decision.c svc_enhance_layer_md.c svc_base_layer_md.c
@@ -205,55 +208,57 @@
PCalculateSatdFunc pfCalculateSatd;
PCheckDirectionalMv pfCheckDirectionalMv;
PLineFullSearchFunc pfLineFullSearch;
+ PCalculateBlockFeatureOfFrame pfCalculateBlockFeatureOfFrame[2];//0 - for 8x8, 1 for 16x16
+ PCalculateSingleBlockFeature pfCalculateSingleBlockFeature[2];//0 - for 8x8, 1 for 16x16
- PCopyFunc pfCopy16x16Aligned; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c
- PCopyFunc pfCopy16x16NotAligned; //md.c
- PCopyFunc pfCopy8x8Aligned; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c md.c
- PCopyFunc pfCopy16x8NotAligned; //for MeRefineFracPixel 16x8 based
- PCopyFunc pfCopy8x16Aligned; //for MeRefineFracPixel 8x16 based
+ PCopyFunc pfCopy16x16Aligned; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c
+ PCopyFunc pfCopy16x16NotAligned; //md.c
+ PCopyFunc pfCopy8x8Aligned; //svc_encode_slice.c svc_mode_decision.c svc_base_layer_md.c md.c
+ PCopyFunc pfCopy16x8NotAligned; //for MeRefineFracPixel 16x8 based
+ PCopyFunc pfCopy8x16Aligned; //for MeRefineFracPixel 8x16 based
//svc_encode_mb.c encode_mb_aux.c
- PDctFunc pfDctT4;
- PDctFunc pfDctFourT4;
+ PDctFunc pfDctT4;
+ PDctFunc pfDctFourT4;
- PCalculateSingleCtrFunc pfCalculateSingleCtr4x4;
- PScanFunc pfScan4x4; //DC/AC
- PScanFunc pfScan4x4Ac;
+ PCalculateSingleCtrFunc pfCalculateSingleCtr4x4;
+ PScanFunc pfScan4x4; //DC/AC
+ PScanFunc pfScan4x4Ac;
- PQuantizationFunc pfQuantization4x4;
- PQuantizationFunc pfQuantizationFour4x4;
- PQuantizationDcFunc pfQuantizationDc4x4;
- PQuantizationMaxFunc pfQuantizationFour4x4Max;
- PQuantizationHadamardFunc pfQuantizationHadamard2x2;
- PQuantizationSkipFunc pfQuantizationHadamard2x2Skip;
+ PQuantizationFunc pfQuantization4x4;
+ PQuantizationFunc pfQuantizationFour4x4;
+ PQuantizationDcFunc pfQuantizationDc4x4;
+ PQuantizationMaxFunc pfQuantizationFour4x4Max;
+ PQuantizationHadamardFunc pfQuantizationHadamard2x2;
+ PQuantizationSkipFunc pfQuantizationHadamard2x2Skip;
- PTransformHadamard4x4Func pfTransformHadamard4x4Dc;
+ PTransformHadamard4x4Func pfTransformHadamard4x4Dc;
- PGetNoneZeroCountFunc pfGetNoneZeroCount;
+ PGetNoneZeroCountFunc pfGetNoneZeroCount;
- PDeQuantizationFunc pfDequantization4x4;
- PDeQuantizationFunc pfDequantizationFour4x4;
- PDeQuantizationHadamardFunc pfDequantizationIHadamard4x4;
- PIDctFunc pfIDctFourT4;
- PIDctFunc pfIDctT4;
- PIDctFunc pfIDctI16x16Dc;
+ PDeQuantizationFunc pfDequantization4x4;
+ PDeQuantizationFunc pfDequantizationFour4x4;
+ PDeQuantizationHadamardFunc pfDequantizationIHadamard4x4;
+ PIDctFunc pfIDctFourT4;
+ PIDctFunc pfIDctT4;
+ PIDctFunc pfIDctI16x16Dc;
// OPTI: if MT under diff uiSliceMode, need change here
- //PDynamicSlicingStepBackFunc dynslc_funcpointer_stepback;//svc_encode_slice.c
- //DYNSLC_LNGTH_CRTL dynslc_funcpointer_slcsize_ctrl;
+ //PDynamicSlicingStepBackFunc dynslc_funcpointer_stepback;//svc_encode_slice.c
+ //DYNSLC_LNGTH_CRTL dynslc_funcpointer_slcsize_ctrl;
/* For Deblocking */
DeblockingFunc pfDeblocking;
PSetNoneZeroCountZeroFunc pfSetNZCZero;
- SWelsRcFunc pfRc;
+ SWelsRcFunc pfRc;
PAccumulateSadFunc pfAccumulateSadForRc;
- PSetMemoryZero pfSetMemZeroSize8; // for size is times to 8
- PSetMemoryZero pfSetMemZeroSize64Aligned16; // for size is times of 64, and address is align to 16
- PSetMemoryZero pfSetMemZeroSize64; // for size is times of 64, and don't know address is align to 16 or not
+ PSetMemoryZero pfSetMemZeroSize8; // for size is times to 8
+ PSetMemoryZero pfSetMemZeroSize64Aligned16; // for size is times of 64, and address is align to 16
+ PSetMemoryZero pfSetMemZeroSize64; // for size is times of 64, and don't know address is align to 16 or not
PBuildRefListFunc pBuildRefList;
PMarkPicFunc pMarkPic;
@@ -260,6 +265,6 @@
PUpdateRefListFunc pUpdateRefList;
};
-} //end of namespace WelsSVCEnc {
+} //end of namespace WelsSVCEnc {
#endif//WELS_ENCODER_FUNCTION_POINTERS_DEFINITION_H_
--- a/codec/encoder/core/src/svc_motion_estimate.cpp
+++ b/codec/encoder/core/src/svc_motion_estimate.cpp
@@ -29,16 +29,17 @@
* POSSIBILITY OF SUCH DAMAGE.
*
*
- * \file svc motion estimate.c
+ * \file svc motion estimate.c
*
- * \brief Interfaces introduced in svc mb motion estimation
+ * \brief Interfaces introduced in svc mb motion estimation
*
- * \date 08/11/2009 Created
+ * \date 08/11/2009 Created
*
*************************************************************************************
*/
#include "cpu_core.h"
+#include "ls_defines.h"
#include "svc_motion_estimate.h"
namespace WelsSVCEnc {
@@ -67,16 +68,23 @@
pFuncList->pfLineFullSearch = LineFullSearch_c;
if ( uiCpuFlag & WELS_CPU_SSE41 ) {
}
+
+ //for feature search
+ pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_c;
+ pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_c;
+ //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
+ pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_c;
+ pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_c;
}
}
/*!
- * \brief BL mb motion estimate search
+ * \brief BL mb motion estimate search
*
- * \param enc Wels encoder context
- * \param pMe Wels me information
+ * \param enc Wels encoder context
+ * \param pMe Wels me information
*
- * \return NONE
+ * \return NONE
*/
void WelsMotionEstimateSearch (SWelsFuncPtrList* pFuncList, void* pLplayer, void* pLpme, void* pLpslice) {
@@ -96,20 +104,20 @@
}
/*!
- * \brief EL mb motion estimate initial point testing
+ * \brief EL mb motion estimate initial point testing
*
- * \param pix_pFuncList SSampleDealingFunc
- * \param pMe Wels me information
- * \param mv_range search range in motion estimate
- * \param point the best match point in motion estimation
+ * \param pix_pFuncList SSampleDealingFunc
+ * \param pMe Wels me information
+ * \param mv_range search range in motion estimate
+ * \param point the best match point in motion estimation
*
- * \return NONE
+ * \return NONE
*/
bool WelsMotionEstimateInitialPoint (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, int32_t iStrideEnc,
int32_t iStrideRef) {
- PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
- const uint16_t* kpMvdCost = pMe->pMvdCost;
- uint8_t* const kpEncMb = pMe->pEncMb;
+ PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
+ const uint16_t* kpMvdCost = pMe->pMvdCost;
+ uint8_t* const kpEncMb = pMe->pEncMb;
int16_t iMvc0, iMvc1;
int32_t iSadCost;
int32_t iBestSadCost;
@@ -116,17 +124,17 @@
uint8_t* pRefMb;
uint8_t* pFref2;
uint32_t i;
- const uint32_t kuiMvcNum = pSlice->uiMvcNum;
- const SMVUnitXY* kpMvcList = &pSlice->sMvc[0];
- const SMVUnitXY ksMvStartMin = pSlice->sMvStartMin;
- const SMVUnitXY ksMvStartMax = pSlice->sMvStartMax;
- const SMVUnitXY ksMvp = pMe->sMvp;
+ const uint32_t kuiMvcNum = pSlice->uiMvcNum;
+ const SMVUnitXY* kpMvcList = &pSlice->sMvc[0];
+ const SMVUnitXY ksMvStartMin = pSlice->sMvStartMin;
+ const SMVUnitXY ksMvStartMax = pSlice->sMvStartMax;
+ const SMVUnitXY ksMvp = pMe->sMvp;
SMVUnitXY sMv;
// Step 1: Initial point prediction
// init with sMvp
- sMv.iMvX = WELS_CLIP3 ((2 + ksMvp.iMvX) >> 2, ksMvStartMin.iMvX, ksMvStartMax.iMvX);
- sMv.iMvY = WELS_CLIP3 ((2 + ksMvp.iMvY) >> 2, ksMvStartMin.iMvY, ksMvStartMax.iMvY);
+ sMv.iMvX = WELS_CLIP3 ((2 + ksMvp.iMvX) >> 2, ksMvStartMin.iMvX, ksMvStartMax.iMvX);
+ sMv.iMvY = WELS_CLIP3 ((2 + ksMvp.iMvY) >> 2, ksMvStartMin.iMvY, ksMvStartMax.iMvY);
pRefMb = &pMe->pRefMb[sMv.iMvY * iStrideRef + sMv.iMvX];
@@ -171,7 +179,7 @@
void CalculateSatdCost( PSampleSadSatdCostFunc pSatd, void * vpMe,
const int32_t kiEncStride, const int32_t kiRefStride ) {
- SWelsME* pMe = static_cast<SWelsME *>(vpMe);
+ SWelsME* pMe = static_cast<SWelsME *>(vpMe);
pMe->uSadPredISatd.uiSatd = pSatd(pMe->pEncMb, kiEncStride, pMe->pRefMb, kiRefStride);
pMe->uiSatdCost = pMe->uSadPredISatd.uiSatd + COST_MVD (pMe->pMvdCost, pMe->sMv.iMvX - pMe->sMvp.iMvX,
pMe->sMv.iMvY - pMe->sMvp.iMvY);
@@ -266,7 +274,7 @@
bool CheckDirectionalMv(PSampleSadSatdCostFunc pSad, void * vpMe,
const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
int32_t& iBestSadCost) {
- SWelsME* pMe = static_cast<SWelsME *>(vpMe);
+ SWelsME* pMe = static_cast<SWelsME *>(vpMe);
const int16_t kiMvX = pMe->sDirectionalMv.iMvX;
const int16_t kiMvY = pMe->sDirectionalMv.iMvY;
@@ -295,34 +303,34 @@
// Cross Search Basics
/////////////////////////
void VerticalFullSearchUsingSSE41( void *pFunc, void *vpMe,
- uint16_t* pMvdTable, const int32_t kiFixedMvd,
- const int32_t kiEncStride, const int32_t kiRefStride,
- const int32_t kiMinPos, const int32_t kiMaxPos,
+ uint16_t* pMvdTable, const int32_t kiFixedMvd,
+ const int32_t kiEncStride, const int32_t kiRefStride,
+ const int32_t kiMinPos, const int32_t kiMaxPos,
const bool bVerticalSearch ) {
SWelsFuncPtrList *pFuncList = static_cast<SWelsFuncPtrList *>(pFunc);
- SWelsME *pMe = static_cast<SWelsME *>(vpMe);
+ SWelsME *pMe = static_cast<SWelsME *>(vpMe);
}
-void LineFullSearch_c( void *pFunc, void *vpMe,
- uint16_t* pMvdTable, const int32_t kiFixedMvd,
- const int32_t kiEncStride, const int32_t kiRefStride,
- const int32_t kiMinPos, const int32_t kiMaxPos,
+void LineFullSearch_c( void *pFunc, void *vpMe,
+ uint16_t* pMvdTable, const int32_t kiFixedMvd,
+ const int32_t kiEncStride, const int32_t kiRefStride,
+ const int32_t kiMinPos, const int32_t kiMaxPos,
const bool bVerticalSearch ) {
SWelsFuncPtrList *pFuncList = static_cast<SWelsFuncPtrList *>(pFunc);
- SWelsME *pMe = static_cast<SWelsME *>(vpMe);
+ SWelsME *pMe = static_cast<SWelsME *>(vpMe);
PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
- const int32_t kiCurMeBlockPix = bVerticalSearch?pMe->iCurMeBlockPixY:pMe->iCurMeBlockPixX;
+ const int32_t kiCurMeBlockPix = bVerticalSearch?pMe->iCurMeBlockPixY:pMe->iCurMeBlockPixX;
const int32_t kiStride = bVerticalSearch?kiRefStride:1;
- uint8_t* pRef = &pMe->pColoRefMb[(kiMinPos - kiCurMeBlockPix)*kiStride];
+ uint8_t* pRef = &pMe->pColoRefMb[(kiMinPos - kiCurMeBlockPix)*kiStride];
uint16_t* pMvdCost = &(pMvdTable[kiMinPos<<2]);
- uint32_t uiBestCost = 0xFFFFFFFF;
- int32_t iBestPos = 0;
+ uint32_t uiBestCost = 0xFFFFFFFF;
+ int32_t iBestPos = 0;
for ( int32_t iTargetPos = kiMinPos; iTargetPos < kiMaxPos; ++ iTargetPos ) {
- uint8_t* const kpEncMb = pMe->pEncMb;
+ uint8_t* const kpEncMb = pMe->pEncMb;
uint32_t uiSadCost = pSad( kpEncMb, kiEncStride, pRef, kiRefStride ) + (kiFixedMvd + *pMvdCost);
if (uiSadCost < uiBestCost) {
- uiBestCost = uiSadCost;
- iBestPos = iTargetPos;
+ uiBestCost = uiSadCost;
+ iBestPos = iTargetPos;
}
pRef += kiStride;
pMvdCost+=4;
@@ -400,8 +408,8 @@
int32_t RequestScreenBlockFeatureStorage( CMemoryAlign *pMa, const int32_t kiFeatureStrategyIndex,
const int32_t kiFrameWidth, const int32_t kiFrameHeight, const int32_t kiMe16x16, const int32_t kiMe8x8,
SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
-#define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1)
-#define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1)
+#define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1)
+#define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1)
if (((kiMe8x8&ME_FME)==ME_FME) && ((kiMe16x16&ME_FME)==ME_FME)) {
return ENC_RETURN_UNSUPPORTED_PARA;
@@ -411,7 +419,7 @@
const bool bIsBlock8x8 = ((kiMe8x8 & ME_FME)==ME_FME);
const int32_t kiMarginSize = bIsBlock8x8?8:16;
const int32_t kiFrameSize = (kiFrameWidth-kiMarginSize) * (kiFrameHeight-kiMarginSize);
- const int32_t kiListSize = (0==kiFeatureStrategyIndex)?(bIsBlock8x8 ? LIST_SIZE_SUM_8x8 : LIST_SIZE_SUM_16x16):256;
+ const int32_t kiListSize = (0==kiFeatureStrategyIndex)?(bIsBlock8x8 ? LIST_SIZE_SUM_8x8 : LIST_SIZE_SUM_16x16):256;
pScreenBlockFeatureStorage->pTimesOfFeatureValue = (uint32_t*)pMa->WelsMalloc(kiListSize*sizeof(uint32_t),"pScreenBlockFeatureStorage->pTimesOfFeatureValue");
WELS_VERIFY_RETURN_IF(ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pTimesOfFeatureValue)
@@ -422,7 +430,7 @@
pScreenBlockFeatureStorage->pLocationPointer = (uint16_t*)pMa->WelsMalloc(2*kiFrameSize*sizeof(uint16_t), "pScreenBlockFeatureStorage->pLocationPointer");
WELS_VERIFY_RETURN_IF(ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pLocationPointer)
- pScreenBlockFeatureStorage->iActualListSize = kiListSize;
+ pScreenBlockFeatureStorage->iActualListSize = kiListSize;
return ENC_RETURN_SUCCESS;
}
int32_t ReleaseScreenBlockFeatureStorage( CMemoryAlign *pMa, SScreenBlockFeatureStorage* pScreenBlockFeatureStorage ) {
@@ -440,6 +448,137 @@
}
return ENC_RETURN_UNEXPECTED;
}
+
+//preprocess related
+int32_t SumOf8x8SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride)
+{
+ int32_t iSum = 0, i;
+ for(i = 0; i < 8; i++)
+ {
+ iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3];
+ iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7];
+ pRef += kiRefStride;
+ }
+ return iSum;
+}
+int32_t SumOf16x16SingleBlock_c(uint8_t* pRef, const int32_t kiRefStride)
+{
+ int32_t iSum = 0, i;
+ for(i = 0; i < 16; i++)
+ {
+ iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3];
+ iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7];
+ iSum += pRef[8] + pRef[9] + pRef[10] + pRef[11];
+ iSum += pRef[12] + pRef[13] + pRef[14] + pRef[15];
+ pRef += kiRefStride;
+ }
+ return iSum;
+}
+
+void SumOf8x8BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
+ uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[])
+{
+ int32_t x, y;
+ uint8_t *pRef;
+ uint16_t *pBuffer;
+ int32_t iSum;
+ for(y = 0; y < kiHeight; y++) {
+ pRef = pRefPicture + kiRefStride * y;
+ pBuffer = pFeatureOfBlock + kiWidth * y;
+ for(x = 0; x < kiWidth; x++) {
+ iSum = SumOf8x8SingleBlock_c(pRef + x, kiRefStride);
+
+ pBuffer[x] = iSum;
+ pTimesOfFeatureValue[iSum]++;
+ }
+ }
+}
+
+void SumOf16x16BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
+ uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[])
+{//TODO: this is similar to SumOf8x8BlockOfFrame_c expect the calling of single block func, refactor-able?
+ int32_t x, y;
+ uint8_t *pRef;
+ uint16_t *pBuffer;
+ int32_t iSum;
+ for(y = 0; y < kiHeight; y++) {
+ pRef = pRefPicture + kiRefStride * y;
+ pBuffer = pFeatureOfBlock + kiWidth * y;
+ for(x = 0; x < kiWidth; x++) {
+ iSum = SumOf16x16SingleBlock_c(pRef + x, kiRefStride);
+
+ pBuffer[x] = iSum;
+ pTimesOfFeatureValue[iSum]++;
+ }
+ }
+}
+
+void InitializeHashforFeature_c( uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
+ uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList )
+{
+ //assign location pointer
+ uint16_t *pBufPos = pBuf;
+ for( int32_t i = 0 ; i < kiListSize; ++i )
+ {
+ pLocationOfFeature[i] =
+ pFeatureValuePointerList[i] = pBufPos;
+ pBufPos += (pTimesOfFeatureValue[i]<<1);
+ }
+}
+void FillQpelLocationByFeatureValue_c( uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
+ uint16_t** pFeatureValuePointerList )
+{
+ //assign each pixel's position
+ uint16_t* pSrcPointer = pFeatureOfBlock;
+ int32_t iQpelY = 0;
+ for(int32_t y = 0; y < kiHeight; y++)
+ {
+ for(int32_t x = 0; x < kiWidth; x++)
+ {
+ uint16_t uiFeature = pSrcPointer[x];
+ ST32( &pFeatureValuePointerList[uiFeature][0], ((iQpelY<<16)|(x<<2)) );
+ pFeatureValuePointerList[uiFeature] += 2;
+ }
+ iQpelY += 4;
+ pSrcPointer += kiWidth;
+ }
+}
+void CalculateFeatureOfBlock( SWelsFuncPtrList *pFunc, SPicture* pRef,
+ SScreenBlockFeatureStorage* pScreenBlockFeatureStorage)
+{
+ uint16_t* pFeatureOfBlock = pScreenBlockFeatureStorage->pFeatureOfBlockPointer;
+ uint32_t* pTimesOfFeatureValue = pScreenBlockFeatureStorage->pTimesOfFeatureValue;
+ uint16_t** pLocationOfFeature = pScreenBlockFeatureStorage->pLocationOfFeature;
+ uint16_t* pBuf = pScreenBlockFeatureStorage->pLocationPointer;
+
+ uint8_t* pRefData = pRef->pData[0];
+ const int32_t iRefStride = pRef->iLineSize[0];
+ int32_t iIs16x16 = pScreenBlockFeatureStorage->iIs16x16;
+ bool bUseSum = (pScreenBlockFeatureStorage->uiFeatureStrategyIndex == 0);
+ const int32_t iEdgeDiscard = (iIs16x16?16:8);//this is to save complexity of padding on pRef
+ const int32_t iWidth = pRef->iWidthInPixel - iEdgeDiscard;
+ const int32_t kiHeight = pRef->iHeightInPixel - iEdgeDiscard;
+ const int32_t kiActualListSize = pScreenBlockFeatureStorage->iActualListSize;
+ uint16_t* pFeatureValuePointerList[WELS_MAX(LIST_SIZE_SUM_16x16,LIST_SIZE_MSE_16x16)] = {0};
+
+ memset(pTimesOfFeatureValue, 0, sizeof(int32_t)*kiActualListSize);
+ (pFunc->pfCalculateBlockFeatureOfFrame[iIs16x16])(pRefData,iWidth, kiHeight, iRefStride, pFeatureOfBlock, pTimesOfFeatureValue);
+
+ //assign pLocationOfFeature pointer
+ InitializeHashforFeature_c( pTimesOfFeatureValue, pBuf, kiActualListSize,
+ pLocationOfFeature, pFeatureValuePointerList );
+
+ //assign each pixel's pLocationOfFeature
+ FillQpelLocationByFeatureValue_c(pFeatureOfBlock, iWidth, kiHeight, pFeatureValuePointerList);
+}
+
+void PerformFMEPreprocess( SWelsFuncPtrList *pFunc, SPicture* pRef,
+ SScreenBlockFeatureStorage* pScreenBlockFeatureStorage)
+{
+ CalculateFeatureOfBlock(pFunc, pRef, pScreenBlockFeatureStorage );
+ pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = true;
+}
+
//search related
void SetFeatureSearchIn( SWelsFuncPtrList *pFunc, const SWelsME& sMe,
const SSlice *pSlice, SScreenBlockFeatureStorage* pRefFeatureStorage,
@@ -446,7 +585,7 @@
const int32_t kiEncStride, const int32_t kiRefStride,
SFeatureSearchIn* pFeatureSearchIn ) {
pFeatureSearchIn->pSad = pFunc->sSampleDealingFuncs.pfSampleSad[sMe.uiBlockSize];
- //pFeatureSearchIn->iFeatureOfCurrent=
+ pFeatureSearchIn->iFeatureOfCurrent=pFunc->pfCalculateSingleBlockFeature[BLOCK_16x16==sMe.uiBlockSize](sMe.pEncMb, kiEncStride);
pFeatureSearchIn->pEnc = sMe.pEncMb;
pFeatureSearchIn->pColoRef = sMe.pColoRefMb;