shithub: openh264

Download patch

ref: 8ed4e83e93124ffdbf900f6a816a191fea5df9cd
parent: b896faa47cd847e15dcd37ad30790402dfdd573d
parent: 54c677cdd7717cbdef490ef3c64a89da7d1cc28a
author: ruil2 <ruil2@cisco.com>
date: Tue Nov 4 10:12:38 EST 2014

Merge pull request #1490 from sijchen/after_review

[Encoder] Fixing for large number of slices

--- a/codec/common/inc/macros.h
+++ b/codec/common/inc/macros.h
@@ -42,6 +42,7 @@
 
 #include <math.h>
 #include <assert.h>
+#include <string.h>
 #include "typedefs.h"
 
 
@@ -273,8 +274,36 @@
 #endif
 
 inline bool CheckInRangeCloseOpen (const int16_t kiCurrent, const int16_t kiMin, const int16_t kiMax) {
-return ((kiCurrent >= kiMin) && (kiCurrent < kiMax));
+  return ((kiCurrent >= kiMin) && (kiCurrent < kiMax));
 }
 
+static inline void WelsSetMemUint32_c (uint32_t* pDst, uint32_t iValue, int32_t iSizeOfData) {
+  for (int i = 0; i < iSizeOfData; i++) {
+    pDst[i] = iValue;
+  }
+}
+
+static inline void WelsSetMemUint16_c (uint16_t* pDst, uint16_t iValue, int32_t iSizeOfData) {
+  for (int i = 0; i < iSizeOfData; i++) {
+    pDst[i] = iValue;
+  }
+}
+
+inline void WelsSetMemMultiplebytes_c (void* pDst, uint32_t iValue, int32_t iSizeOfData, int32_t iDataLengthOfData) {
+  assert (4 == iDataLengthOfData || 2 == iDataLengthOfData || 1 == iDataLengthOfData);
+
+  // TODO: consider add assembly for these functions
+  if (0 != iValue) {
+    if (4 == iDataLengthOfData) {
+      WelsSetMemUint32_c (static_cast<uint32_t*> (pDst), static_cast<uint32_t> (iValue), iSizeOfData);
+    } else if (2 == iDataLengthOfData) {
+      WelsSetMemUint16_c (static_cast<uint16_t*> (pDst), static_cast<uint16_t> (iValue), iSizeOfData);
+    } else {
+      memset (static_cast<uint8_t*> (pDst), static_cast<uint8_t> (iValue), iSizeOfData);
+    }
+  } else {
+    memset (static_cast<uint8_t*> (pDst), 0, iSizeOfData * iDataLengthOfData);
+  }
+}
 
 #endif//WELS_MACRO_UTILIZATIONS_H__
--- a/codec/encoder/core/inc/svc_enc_macroblock.h
+++ b/codec/encoder/core/inc/svc_enc_macroblock.h
@@ -68,12 +68,12 @@
 
 uint8_t		uiLumaQp;		// uiLumaQp: pPps->iInitialQp + sSliceHeader->delta_qp + mb->dquant.
 uint8_t		uiChromaQp;
-uint8_t		uiSliceIdc;	// AVC: pFirstMbInSlice?; SVC: (pFirstMbInSlice << 7) | ((uiDependencyId << 4) | uiQualityId);
+uint16_t		uiSliceIdc;	// 2^16=65536 > MaxFS(36864) of level 5.1; AVC: pFirstMbInSlice?; SVC: (pFirstMbInSlice << 7) | ((uiDependencyId << 4) | uiQualityId);
 uint32_t    uiChromPredMode;
 int32_t     iLumaDQp;
 SMVUnitXY   sMvd[4];
 int32_t     iCbpDc;
-uint8_t		reserved_filling_bytes[1];	// filling bytes reserved to make structure aligned with 4 bytes, higher cache hit on less structure size by 2 cache lines( 2 * 64 bytes) once hit
+//uint8_t		reserved_filling_bytes[1];	// not deleting this line for further changes of this structure. filling bytes reserved to make structure aligned with 4 bytes, higher cache hit on less structure size by 2 cache lines( 2 * 64 bytes) once hit
 } SMB, *PMb;
 
 }
--- a/codec/encoder/core/inc/svc_enc_slice_segment.h
+++ b/codec/encoder/core/inc/svc_enc_slice_segment.h
@@ -80,7 +80,7 @@
 int16_t			iMbHeight;			/* height of picture size in mb */
 int16_t			iSliceNumInFrame;	/* count number of slices in frame; */
 int32_t			iMbNumInFrame;	/* count number of MBs in frame */
-uint8_t*			pOverallMbMap;	/* overall MB map in frame, store virtual slice idc; */
+uint16_t*			pOverallMbMap;	/* overall MB map in frame, store virtual slice idc; */
 int16_t*			pFirstMbInSlice;	/* first MB address top-left based in every slice respectively; */
 int32_t*			pCountMbNumInSlice;	/* count number of MBs in every slice respectively; */
 uint32_t		uiSliceSizeConstraint;/*in byte*/
@@ -140,7 +140,7 @@
  *
  * \return	uiSliceIdc - successful; (uint8_t)(-1) - failed;
  */
-uint8_t WelsMbToSliceIdc (SSliceCtx* pSliceCtx, const int16_t kiMbXY);
+uint16_t WelsMbToSliceIdc (SSliceCtx* pSliceCtx, const int16_t kiMbXY);
 
 /*!
  * \brief	Get first mb in slice/slice_group: uiSliceIdc (apply in Single/multiple slices and FMO)
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -597,7 +597,7 @@
     bool     bLeftTop;
     bool     bRightTop;
     int32_t  iLeftXY, iTopXY, iLeftTopXY, iRightTopXY;
-    uint8_t  uiSliceIdc;
+    uint16_t  uiSliceIdc;
 
     pList[iIdx].iMbX = pEnc->pStrideTab->pMbIndexX[kiDlayerId][iIdx];
     pList[iIdx].iMbY = pEnc->pStrideTab->pMbIndexY[kiDlayerId][iIdx];
@@ -2272,7 +2272,7 @@
     bool     bTop;
     bool     bLeftTop;
     bool     bRightTop;
-    int32_t  uiSliceIdc;
+    uint16_t  uiSliceIdc;
     int32_t   iLeftXY, iTopXY, iLeftTopXY, iRightTopXY;
 
     uiSliceIdc = WelsMbToSliceIdc (pSliceCtx, kiMbXY);
@@ -2347,7 +2347,8 @@
     }
     pSliceCtx->pFirstMbInSlice[i]	=	iFirstMbIdx;
 
-    memset (pSliceCtx->pOverallMbMap + iFirstMbIdx, (uint8_t)i, pSliceCtx->pCountMbNumInSlice[i]*sizeof (uint8_t));
+    WelsSetMemMultiplebytes_c(pSliceCtx->pOverallMbMap + iFirstMbIdx, i,
+                              pSliceCtx->pCountMbNumInSlice[i], sizeof(uint16_t));
 
     // for next partition(or pSlice)
     iFirstMbIdx	+= pSliceCtx->pCountMbNumInSlice[i];
@@ -3924,15 +3925,19 @@
   iMaxSliceNum *= SLICE_NUM_EXPAND_COEF;
 
   SWelsNalRaw* pNalList = (SWelsNalRaw*)pMA->WelsMalloc (iCountNals * sizeof (SWelsNalRaw), "pOut->sNalList");
-  if (NULL == pNalList)
+  if (NULL == pNalList) {
+    WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: pNalList is NULL");
     return ENC_RETURN_MEMALLOCERR;
+  }
   memcpy (pNalList, pCtx->pOut->sNalList, sizeof (SWelsNalRaw) * pCtx->pOut->iCountNals);
   pMA->WelsFree (pCtx->pOut->sNalList, "pOut->sNalList");
   pCtx->pOut->sNalList = pNalList;
 
   int32_t* pNalLen = (int32_t*)pMA->WelsMalloc (iCountNals * sizeof (int32_t), "pOut->pNalLen");
-  if (NULL == pNalLen)
+  if (NULL == pNalLen) {
+    WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: pNalLen is NULL");
     return ENC_RETURN_MEMALLOCERR;
+  }
   memcpy (pNalLen, pCtx->pOut->pNalLen, sizeof (int32_t) * pCtx->pOut->iCountNals);
   pMA->WelsFree (pCtx->pOut->pNalLen, "pOut->pNalLen");
   pCtx->pOut->pNalLen = pNalLen;
@@ -3948,8 +3953,10 @@
   }
 
   SSlice* pSlice = (SSlice*)pMA->WelsMallocz (sizeof (SSlice) * iMaxSliceNum, "Slice");
-  if (NULL == pSlice)
+  if (NULL == pSlice) {
+    WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: pSlice is NULL");
     return ENC_RETURN_MEMALLOCERR;
+  }
   memcpy (pSlice, pCurLayer->sLayerInfo.pSliceInLayer, sizeof (SSlice) * iMaxSliceNumOld);
   int32_t uiSliceIdx;
   uiSliceIdx = iMaxSliceNumOld;
@@ -3963,8 +3970,11 @@
       pSliceIdx->pSliceBsa = &pCtx->pSliceBs[uiSliceIdx].sBsWrite;
     else
       pSliceIdx->pSliceBsa = &pCtx->pOut->sBsWrite;
-    if (AllocMbCacheAligned (&pSliceIdx->sMbCacheInfo, pMA))
+    if (AllocMbCacheAligned (&pSliceIdx->sMbCacheInfo, pMA)) {
+      WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: realloc MbCache not successful at slice_idx=%d (max-slice=%d)",
+               uiSliceIdx, iMaxSliceNum);
       return ENC_RETURN_MEMALLOCERR;
+    }
 
     pSliceIdx->bSliceHeaderExtFlag = pBaseSlice->bSliceHeaderExtFlag;
     pSHExt->sSliceHeader.iPpsId = pBaseSHExt->sSliceHeader.iPpsId;
@@ -3983,8 +3993,10 @@
   pCurLayer->sLayerInfo.pSliceInLayer = pSlice;
 
   int16_t* pFirstMbInSlice = (int16_t*)pMA->WelsMalloc (iMaxSliceNum * sizeof (int16_t), "pSliceSeg->pFirstMbInSlice");
-  if (NULL == pFirstMbInSlice)
+  if (NULL == pFirstMbInSlice) {
+    WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: pFirstMbInSlice is NULL");
     return ENC_RETURN_MEMALLOCERR;
+  }
   memset (pFirstMbInSlice, 0, sizeof (int16_t) * iMaxSliceNum);
   memcpy (pFirstMbInSlice, pCurLayer->pSliceEncCtx->pFirstMbInSlice, sizeof (int16_t) * iMaxSliceNumOld);
   pMA->WelsFree (pCurLayer->pSliceEncCtx->pFirstMbInSlice, "pSliceSeg->pFirstMbInSlice");
@@ -3992,8 +4004,10 @@
 
   int32_t* pCountMbNumInSlice = (int32_t*)pMA->WelsMalloc (iMaxSliceNum * sizeof (int32_t),
                                 "pSliceSeg->pCountMbNumInSlice");
-  if (NULL == pCountMbNumInSlice)
+  if (NULL == pCountMbNumInSlice) {
+    WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: realloc pCountMbNumInSlice not successful");
     return ENC_RETURN_MEMALLOCERR;
+  }
   memcpy (pCountMbNumInSlice, pCurLayer->pSliceEncCtx->pCountMbNumInSlice, sizeof (int32_t) * iMaxSliceNumOld);
   uiSliceIdx = iMaxSliceNumOld;
   while (uiSliceIdx < iMaxSliceNum) {
@@ -4004,8 +4018,10 @@
   pCurLayer->pSliceEncCtx->pCountMbNumInSlice = pCountMbNumInSlice;
 
   SRCSlicing* pSlcingOverRc = (SRCSlicing*)pMA->WelsMalloc (iMaxSliceNum * sizeof (SRCSlicing), "SlicingOverRC");
-  if (NULL == pSlcingOverRc)
+  if (NULL == pSlcingOverRc) {
+    WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: realloc pSlcingOverRc not successful");
     return ENC_RETURN_MEMALLOCERR;
+  }
   memcpy (pSlcingOverRc, pCtx->pWelsSvcRc->pSlicingOverRc, sizeof (SRCSlicing) * iMaxSliceNumOld);
   uiSliceIdx = iMaxSliceNumOld;
   SRCSlicing* pSORC = &pSlcingOverRc[uiSliceIdx];
@@ -4069,9 +4085,13 @@
 
     if (iSliceIdx >= (pSliceCtx->iMaxSliceNumConstraint - kiSliceIdxStep)) {	// insufficient memory in pSliceInLayer[]
       if (pCtx->iActiveThreadsNum == 1) {
-        if (DynSliceRealloc (pCtx, pFrameBSInfo, pLayerBsInfo)) //only single thread support re-alloc now
+        //only single thread support re-alloc now
+        if (DynSliceRealloc (pCtx, pFrameBSInfo, pLayerBsInfo)) {
+          WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::WelsCodeOnePicPartition: DynSliceRealloc not successful");
           return ENC_RETURN_MEMALLOCERR;
+        }
       } else if (iSliceIdx >= pSliceCtx->iMaxSliceNumConstraint) {
+        WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::WelsCodeOnePicPartition: iSliceIdx(%d) over iMaxSliceNumConstraint(%d)", iSliceIdx, pSliceCtx->iMaxSliceNumConstraint);
         return ENC_RETURN_MEMALLOCERR;
       }
     }
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -73,7 +73,7 @@
 void UpdateMbListNeighborParallel (SSliceCtx* pSliceCtx,
                                    SMB* pMbList,
                                    const int32_t uiSliceIdc) {
-  const uint8_t* kpMbMap			= pSliceCtx->pOverallMbMap;
+  const uint16_t* kpMbMap			= pSliceCtx->pOverallMbMap;
   const int32_t kiMbWidth			= pSliceCtx->iMbWidth;
   int32_t iIdx						= pSliceCtx->pFirstMbInSlice[uiSliceIdc];
   const int32_t kiEndMbInSlice	= iIdx + pSliceCtx->pCountMbNumInSlice[uiSliceIdc] - 1;
--- a/codec/encoder/core/src/svc_enc_slice_segment.cpp
+++ b/codec/encoder/core/src/svc_enc_slice_segment.cpp
@@ -78,7 +78,8 @@
       const int16_t kiFirstMb = uiSliceIdx * kiMbWidth;
       pSliceSeg->pCountMbNumInSlice[uiSliceIdx]	= kiMbWidth;
       pSliceSeg->pFirstMbInSlice[uiSliceIdx]		= kiFirstMb;
-      memset (pSliceSeg->pOverallMbMap + kiFirstMb, (uint8_t)uiSliceIdx, kiMbWidth * sizeof (uint8_t));
+      WelsSetMemMultiplebytes_c(pSliceSeg->pOverallMbMap + kiFirstMb, uiSliceIdx,
+                                kiMbWidth, sizeof(uint16_t));
       ++ uiSliceIdx;
     }
 
@@ -89,7 +90,7 @@
     const int32_t* kpSlicesAssignList				= (int32_t*) & (kpMso->sSliceArgument.uiSliceMbNum[0]);
     const int32_t kiCountNumMbInFrame		= pSliceSeg->iMbNumInFrame;
     const int32_t kiCountSliceNumInFrame	= pSliceSeg->iSliceNumInFrame;
-    int32_t iSliceIdx						= 0;
+    uint16_t iSliceIdx						= 0;
     int16_t iMbIdx							= 0;
 
     do {
@@ -392,7 +393,7 @@
   }
 
   if (SM_SINGLE_SLICE == uiSliceMode) {
-    pSliceSeg->pOverallMbMap	= (uint8_t*)pMa->WelsMalloc (kiCountMbNum * sizeof (uint8_t), "pSliceSeg->pOverallMbMap");
+    pSliceSeg->pOverallMbMap	= (uint16_t*)pMa->WelsMalloc (kiCountMbNum * sizeof (uint16_t), "pSliceSeg->pOverallMbMap");
 
     WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pOverallMbMap)
     pSliceSeg->iSliceNumInFrame	= 1;
@@ -419,11 +420,12 @@
         && uiSliceMode != SM_DYN_SLICE && uiSliceMode != SM_AUTO_SLICE)
       return 1;
 
-    pSliceSeg->pOverallMbMap	= (uint8_t*)pMa->WelsMalloc (kiCountMbNum * sizeof (uint8_t), "pSliceSeg->pOverallMbMap");
+    pSliceSeg->pOverallMbMap	= (uint16_t*)pMa->WelsMalloc (kiCountMbNum * sizeof (uint16_t), "pSliceSeg->pOverallMbMap");
 
     WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pOverallMbMap)
 
-    memset (pSliceSeg->pOverallMbMap, 0, kiCountMbNum * sizeof (uint8_t));
+    WelsSetMemMultiplebytes_c(pSliceSeg->pOverallMbMap, 0, kiCountMbNum, sizeof(uint16_t));
+
     //SM_DYN_SLICE: init, set pSliceSeg->iSliceNumInFrame	= 1;
     pSliceSeg->iSliceNumInFrame = GetInitialSliceNum (kiMbWidth, kiMbHeight, pMso);
 
@@ -550,10 +552,10 @@
  *
  * \return	uiSliceIdc - successful; -1 - failed;
  */
-uint8_t WelsMbToSliceIdc (SSliceCtx* pSliceCtx, const int16_t kiMbXY) {
+uint16_t WelsMbToSliceIdc (SSliceCtx* pSliceCtx, const int16_t kiMbXY) {
   if (NULL != pSliceCtx && kiMbXY < pSliceCtx->iMbNumInFrame && kiMbXY >= 0)
     return pSliceCtx->pOverallMbMap[ kiMbXY ];
-  return (uint8_t) (-1);
+  return (uint16_t) (-1);
 }
 
 /*!
@@ -688,7 +690,8 @@
     pSliceCtx->pFirstMbInSlice[iSliceIdx]			= iFirstMbIdx;
     pSliceCtx->pCountMbNumInSlice[iSliceIdx]		= kiSliceRun;
 
-    memset (pSliceCtx->pOverallMbMap + iFirstMbIdx, (uint8_t)iSliceIdx, kiSliceRun * sizeof (uint8_t));
+    WelsSetMemMultiplebytes_c(pSliceCtx->pOverallMbMap + iFirstMbIdx, iSliceIdx,
+                              kiSliceRun, sizeof(uint16_t));
 
     iFirstMbIdx += kiSliceRun;
 
--- a/codec/encoder/core/src/svc_encode_slice.cpp
+++ b/codec/encoder/core/src/svc_encode_slice.cpp
@@ -776,7 +776,7 @@
     bool     bLeftTop;
     bool     bRightTop;
     int32_t   iLeftXY, iTopXY, iLeftTopXY, iRightTopXY;
-    const uint8_t  kuiSliceIdc		= WelsMbToSliceIdc (pSliceCtx, kiMbXY);
+    const uint16_t  kuiSliceIdc		= WelsMbToSliceIdc (pSliceCtx, kiMbXY);
 
     pMb->uiSliceIdc	= kuiSliceIdc;
     iLeftXY = kiMbXY - 1;
@@ -814,9 +814,9 @@
                        int32_t iFirstMbIdxOfNextSlice, const int32_t kiLastMbIdxInPartition) {
   SDqLayer*	pCurLayer = pEncCtx->pCurDqLayer;
   int32_t		iCurMbIdx		= pCurMb->iMbXY;
-  int32_t		iCurSliceIdc	= pSliceCtx->pOverallMbMap[ iCurMbIdx ];
+  uint16_t		iCurSliceIdc	= pSliceCtx->pOverallMbMap[ iCurMbIdx ];
   const int32_t kiSliceIdxStep = pEncCtx->iActiveThreadsNum;
-  int32_t		iNextSliceIdc	= iCurSliceIdc + kiSliceIdxStep;
+  uint16_t		iNextSliceIdc	= iCurSliceIdc + kiSliceIdxStep;
   SSlice*		pNextSlice		= NULL;
 
   SMB* pMbList					= pCurLayer->sMbDataP;
@@ -839,10 +839,9 @@
           sizeof (SSliceHeaderExt));	// confirmed_safe_unsafe_usage
 
   pSliceCtx->pFirstMbInSlice[iNextSliceIdc] = iFirstMbIdxOfNextSlice;
+  WelsSetMemMultiplebytes_c (pSliceCtx->pOverallMbMap + iFirstMbIdxOfNextSlice, iNextSliceIdc,
+                             (kiLastMbIdxInPartition - iFirstMbIdxOfNextSlice + 1), sizeof(uint16_t));
 
-  memset (pSliceCtx->pOverallMbMap + iFirstMbIdxOfNextSlice, (uint8_t)iNextSliceIdc,
-          (kiLastMbIdxInPartition - iFirstMbIdxOfNextSlice + 1)*sizeof (uint8_t));
-
   //DYNAMIC_SLICING_ONE_THREAD: update pMbList slice_neighbor_info
   UpdateMbNeighbourInfoForNextSlice (pSliceCtx, pMbList, iFirstMbIdxOfNextSlice, kiLastMbIdxInPartition);
 }
@@ -874,6 +873,10 @@
   if ((kbCurMbNotFirstMbOfCurSlice
        && JUMPPACKETSIZE_JUDGE (uiLen, iCurMbIdx, pSliceCtx->uiSliceSizeConstraint)) /*jump_avoiding_pack_exceed*/
       && kbCurMbNotLastMbOfCurPartition) { //decide to add new pSlice
+
+    WelsLog (&pEncCtx->sLogCtx, WELS_LOG_DETAIL,
+             "DynSlcJudgeSliceBoundaryStepBack: AddSliceBoundary: iCurMbIdx=%d, uiLen=%d, uiSliceIdx=%d", iCurMbIdx, uiLen,
+             pCurSlice->uiSliceIdx);
 
     if (pEncCtx->pSvcParam->iMultipleThreadIdc > 1) {
       WelsMutexLock (&pEncCtx->pSliceThreading->mutexSliceNumUpdate);
--- a/codec/encoder/core/src/svc_motion_estimate.cpp
+++ b/codec/encoder/core/src/svc_motion_estimate.cpp
@@ -718,7 +718,7 @@
   pScreenBlockFeatureStorage->iIs16x16 = !bIsBlock8x8;
   pScreenBlockFeatureStorage->uiFeatureStrategyIndex = kiFeatureStrategyIndex;
   pScreenBlockFeatureStorage->iActualListSize = kiListSize;
-  memset (pScreenBlockFeatureStorage->uiSadCostThreshold, UINT_MAX, BLOCK_SIZE_ALL * sizeof (uint32_t));
+  WelsSetMemMultiplebytes_c (pScreenBlockFeatureStorage->uiSadCostThreshold, UINT_MAX, BLOCK_SIZE_ALL, sizeof(uint32_t));
   pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = false;
 
   return ENC_RETURN_SUCCESS;