ref: 2c6185a22d5a2b53c1ee002aa1db2a2ed2af311b
dir: /codec/encoder/core/inc/svc_motion_estimate.h/
/*! * \copy * Copyright (c) 2009-2013, Cisco Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * * \file svc motion estimate.h * * \brief Interfaces introduced in svc mb motion estimation * * \date 08/11/2009 Created * ************************************************************************************* */ #ifndef SVC_MOTION_ESTIMATE_ #define SVC_MOTION_ESTIMATE_ #include "typedefs.h" #include "encoder_context.h" #include "wels_func_ptr_def.h" namespace WelsEnc { #define CAMERA_STARTMV_RANGE (64) #define ITERATIVE_TIMES (16) #define CAMERA_MV_RANGE (CAMERA_STARTMV_RANGE+ITERATIVE_TIMES) #define CAMERA_MVD_RANGE ((CAMERA_MV_RANGE+1)<<1) //mvd=mv_range*2; #define BASE_MV_MB_NMB ((2*CAMERA_MV_RANGE/MB_WIDTH_LUMA)-1) #define CAMERA_HIGHLAYER_MVD_RANGE (243)//mvd range; #define EXPANDED_MV_RANGE (504) //=512-8 rather than 511 to sacrifice same edge point but save complexity in assemblys #define EXPANDED_MVD_RANGE ((504+1)<<1) enum { ME_DIA = 0x01, // LITTLE DIAMOND= 0x01 ME_CROSS = 0x02, // CROSS= 0x02 ME_FME = 0x04, // FME = 0x04 ME_FULL = 0x10, // FULL // derived ME methods combination ME_DIA_CROSS = (ME_DIA | ME_CROSS), // DIA+CROSS ME_DIA_CROSS_FME = (ME_DIA_CROSS | ME_FME) // DIA+CROSS+FME }; union SadPredISatdUnit { uint32_t uiSadPred; uint32_t uiSatd; //reuse the sad_pred as a temp satd pData }; typedef struct TagWelsME { /* input */ uint16_t* pMvdCost; union SadPredISatdUnit uSadPredISatd; //reuse the sad_pred as a temp pData uint32_t uiSadCost; //used by ME and RC //max SAD should be max_delta*size+lambda*mvdsize = 255*256+91*33*2 = 65280 + 6006 = 71286 > (2^16)-1 = 65535 uint32_t uiSatdCost; /* satd + lm * nbits */ uint32_t uiSadCostThreshold; int32_t iCurMeBlockPixX; int32_t iCurMeBlockPixY; uint8_t uiBlockSize; /* BLOCK_WxH */ uint8_t uiReserved; uint8_t* pEncMb; uint8_t* pRefMb; uint8_t* pColoRefMb; SMVUnitXY sMvp; SMVUnitXY sMvBase; SMVUnitXY sDirectionalMv; SScreenBlockFeatureStorage* pRefFeatureStorage; /* output */ SMVUnitXY sMv; } SWelsME; typedef struct TagFeatureSearchIn { PSampleSadSatdCostFunc pSad; uint32_t* pTimesOfFeature; uint16_t** pQpelLocationOfFeature; uint16_t* pMvdCostX; uint16_t* pMvdCostY; uint8_t* pEnc; uint8_t* pColoRef; int32_t iEncStride; int32_t iRefStride; uint16_t uiSadCostThresh; int32_t iFeatureOfCurrent; int32_t iCurPixX; int32_t iCurPixY; int32_t iCurPixXQpel; int32_t iCurPixYQpel; int32_t iMinQpelX; int32_t iMinQpelY; int32_t iMaxQpelX; int32_t iMaxQpelY; } SFeatureSearchIn; typedef struct TagFeatureSearchOut { SMVUnitXY sBestMv; uint32_t uiBestSadCost; uint8_t* pBestRef; } SFeatureSearchOut; #define COST_MVD(table, mx, my) (table[mx] + table[my]) extern const int32_t QStepx16ByQp[52]; // Function definitions below void WelsInitMeFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScreenContent); /*! * \brief BL mb motion estimate search * * \param enc Wels encoder context * \param m Wels me information * * \return NONE */ void WelsMotionEstimateSearch (SWelsFuncPtrList* pFuncList, SDqLayer* pLplayer, SWelsME* pLpme, SSlice* pLpslice); void WelsMotionEstimateSearchStatic (SWelsFuncPtrList* pFuncList, SDqLayer* pLplayer, SWelsME* pLpme, SSlice* pLpslice); void WelsMotionEstimateSearchScrolled (SWelsFuncPtrList* pFuncList, SDqLayer* pLplayer, SWelsME* pLpme, SSlice* pLpslice); /*! * \brief BL mb motion estimate initial point testing * * \param enc Wels encoder context * \param m Wels me information * \param mv_range search range in motion estimate * \param point the best match point in motion estimation * * \return NONE */ /*! * \brief EL mb motion estimate initial point testing * * \param pix_func SSampleDealingFunc * \param m Wels me information * \param mv_range search range in motion estimate * \param point the best match point in motion estimation * * \return NONE */ bool WelsMotionEstimateInitialPoint (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, const int32_t kiStrideEnc, const int32_t kiStrideRef); /*! * \brief mb iterative motion estimate search * * \param enc Wels encoder context * \param m Wels me information * \param point the best match point in motion estimation * * \return NONE */ void WelsDiamondSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, const int32_t kiEncStride, const int32_t kiRefStride); bool WelsMeSadCostSelect (int32_t* pSadCost, const uint16_t* kpMvdCost, int32_t* pBestCost, const int32_t kiDx, const int32_t kiDy, int32_t* pIx, int32_t* pIy); void CalculateSatdCost (PSampleSadSatdCostFunc pSatd, SWelsME* pMe, const int32_t kiEncStride, const int32_t kiRefStride); void NotCalculateSatdCost (PSampleSadSatdCostFunc pSatd, SWelsME* pMe, const int32_t kiEncStride, const int32_t kiRefStride); bool CheckDirectionalMv (PSampleSadSatdCostFunc pSad, SWelsME* pMe, const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride, int32_t& iBestSadCost); bool CheckDirectionalMvFalse (PSampleSadSatdCostFunc pSad, SWelsME* pMe, const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride, int32_t& iBestSadCost); // Cross Search Basics void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe, uint16_t* pMvdTable, const int32_t kiEncStride, const int32_t kiRefStride, const int16_t kiMinMv, const int16_t kiMaxMv, const bool bVerticalSearch); #ifdef X86_ASM extern "C" { uint32_t SampleSad8x8Hor8_sse41 (uint8_t*, int32_t, uint8_t*, int32_t, uint16_t*, int32_t*); uint32_t SampleSad16x16Hor8_sse41 (uint8_t*, int32_t, uint8_t*, int32_t, uint16_t*, int32_t*); } void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe, uint16_t* pMvdTable, const int32_t kiEncStride, const int32_t kiRefStride, const int16_t kiMinMv, const int16_t kiMaxMv, const bool bVerticalSearch); void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe, uint16_t* pMvdTable, const int32_t kiEncStride, const int32_t kiRefStride, const int16_t kiMinMv, const int16_t kiMaxMv, const bool bVerticalSearch); #endif void WelsMotionCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, const int32_t kiEncStride, const int32_t kiRefStride); void WelsDiamondCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, const int32_t kiEncStride, const int32_t kiRefStride); // Feature Search Basics #define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1) #define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1) #define LIST_SIZE_MSE_16x16 0x00878 //(avg+mse)/2, max= (255+16*255)/2 #define FME_DEFAULT_FEATURE_INDEX (0) #define FMESWITCH_DEFAULT_GOODFRAME_NUM (2) #define FMESWITCH_MBSAD_THRESHOLD 30 // empirically set. void InitializeHashforFeature_c (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize, uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList); void FillQpelLocationByFeatureValue_c (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, uint16_t** pFeatureValuePointerList); int32_t SumOf8x8SingleBlock_c (uint8_t* pRef, const int32_t kiRefStride); int32_t SumOf16x16SingleBlock_c (uint8_t* pRef, const int32_t kiRefStride); void SumOf8x8BlockOfFrame_c (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); void SumOf16x16BlockOfFrame_c (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); #ifdef X86_ASM extern "C" { void InitializeHashforFeature_sse2 (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize, uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList); void FillQpelLocationByFeatureValue_sse2 (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, uint16_t** pFeatureValuePointerList); int32_t SumOf8x8SingleBlock_sse2 (uint8_t* pRef, const int32_t kiRefStride); int32_t SumOf16x16SingleBlock_sse2 (uint8_t* pRef, const int32_t kiRefStride); void SumOf8x8BlockOfFrame_sse2 (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); void SumOf16x16BlockOfFrame_sse2 (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); void SumOf8x8BlockOfFrame_sse4 (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); void SumOf16x16BlockOfFrame_sse4 (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); } #endif #ifdef HAVE_NEON extern "C" { void InitializeHashforFeature_neon (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize, uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList); void FillQpelLocationByFeatureValue_neon (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, uint16_t** pFeatureValuePointerList); int32_t SumOf8x8SingleBlock_neon (uint8_t* pRef, const int32_t kiRefStride); int32_t SumOf16x16SingleBlock_neon (uint8_t* pRef, const int32_t kiRefStride); void SumOf8x8BlockOfFrame_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); void SumOf16x16BlockOfFrame_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); } #endif #ifdef HAVE_NEON_AARCH64 extern "C" { void InitializeHashforFeature_AArch64_neon (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize, uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList); void FillQpelLocationByFeatureValue_AArch64_neon (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, uint16_t** pFeatureValuePointerList); int32_t SumOf8x8SingleBlock_AArch64_neon (uint8_t* pRef, const int32_t kiRefStride); int32_t SumOf16x16SingleBlock_AArch64_neon (uint8_t* pRef, const int32_t kiRefStride); void SumOf8x8BlockOfFrame_AArch64_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); void SumOf16x16BlockOfFrame_AArch64_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]); } #endif int32_t RequestScreenBlockFeatureStorage (CMemoryAlign* pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight, const int32_t iNeedFeatureStorage, SScreenBlockFeatureStorage* pScreenBlockFeatureStorage); int32_t ReleaseScreenBlockFeatureStorage (CMemoryAlign* pMa, SScreenBlockFeatureStorage* pScreenBlockFeatureStorage); int32_t RequestFeatureSearchPreparation (CMemoryAlign* pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight, const int32_t iNeedFeatureStorage, SFeatureSearchPreparation* pFeatureSearchPreparation); int32_t ReleaseFeatureSearchPreparation (CMemoryAlign* pMa, uint16_t*& pFeatureOfBlock); #define FMESWITCH_DEFAULT_GOODFRAME_NUM (2) #define FME_DEFAULT_FEATURE_INDEX (0) void PerformFMEPreprocess (SWelsFuncPtrList* pFunc, SPicture* pRef, uint16_t* pFeatureOfBlock, SScreenBlockFeatureStorage* pScreenBlockFeatureStorage); bool SetFeatureSearchIn (SWelsFuncPtrList* pFunc, const SWelsME& sMe, const SSlice* pSlice, SScreenBlockFeatureStorage* pRefFeatureStorage, const int32_t kiEncStride, const int32_t kiRefStride, SFeatureSearchIn* pFeatureSearchIn); void MotionEstimateFeatureFullSearch (SFeatureSearchIn& sFeatureSearchIn, const uint32_t kuiMaxSearchPoint, SWelsME* pMe); void UpdateFMESwitch (SDqLayer* pCurLayer); void UpdateFMESwitchNull (SDqLayer* pCurLayer); void WelsDiamondCrossFeatureSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, const int32_t kiEncStride, const int32_t kiRefStride); //inline functions inline void SetMvWithinIntegerMvRange (const int32_t kiMbWidth, const int32_t kiMbHeight, const int32_t kiMbX, const int32_t kiMbY, const int32_t kiMaxMvRange, SMVUnitXY* pMvMin, SMVUnitXY* pMvMax) { pMvMin->iMvX = WELS_MAX (-1 * ((kiMbX + 1) * (1 << 4)) + INTPEL_NEEDED_MARGIN, -1 * kiMaxMvRange); pMvMin->iMvY = WELS_MAX (-1 * ((kiMbY + 1) * (1 << 4)) + INTPEL_NEEDED_MARGIN, -1 * kiMaxMvRange); pMvMax->iMvX = WELS_MIN (((kiMbWidth - kiMbX) * (1 << 4)) - INTPEL_NEEDED_MARGIN, kiMaxMvRange); pMvMax->iMvY = WELS_MIN (((kiMbHeight - kiMbY) * (1 << 4)) - INTPEL_NEEDED_MARGIN, kiMaxMvRange); } inline bool CheckMvInRange (const SMVUnitXY ksCurrentMv, const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv) { return (CheckInRangeCloseOpen (ksCurrentMv.iMvX, ksMinMv.iMvX, ksMaxMv.iMvX) && CheckInRangeCloseOpen (ksCurrentMv.iMvY, ksMinMv.iMvY, ksMaxMv.iMvY)); } //FME switch related inline bool CalcFMESwitchFlag (const uint8_t uiFMEGoodFrameCount, const int32_t iHighFreMbPrecentage, const int32_t iAvgMbSAD, const bool bScrollingDetected) { return (bScrollingDetected || (uiFMEGoodFrameCount > 0 && iAvgMbSAD > FMESWITCH_MBSAD_THRESHOLD)); //TODO: add the logic of iHighFreMbPrecentage //return ( iHighFreMbPrecentage > 2 // && ( bScrollingDetected || iHighFreMbPrecentage >15 // ||( uiFMEGoodFrameCount>0 && iFrameSAD > FMESWITCH_FRAMESAD_THRESHOLD ) ) ); } } #endif