shithub: openh264

Download patch

ref: b6ddfabf2b8d3c5605fe0583f18e8e1949789657
parent: 240729288232fb6369397231b82ae6acc8a894c7
parent: e70621c1941215cdda325cfce8e64b65def48559
author: HaiboZhu <haibozhu@cisco.com>
date: Thu Jul 16 07:38:24 EDT 2015

Merge pull request #2028 from sijchen/mt42

[Encoder] save memory usage and improve error return logic under VLCOVERFLOW

--- a/codec/encoder/core/inc/slice_multi_threading.h
+++ b/codec/encoder/core/inc/slice_multi_threading.h
@@ -65,7 +65,7 @@
                            int32_t iCurDid);
 
 int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pParam, const int32_t kiCountBsLen,
-                           const int32_t kiTargetSpatialBsSize);
+                           const int32_t kiTargetSpatialBsSize, bool bDynamicSlice);
 
 void ReleaseMtResource (sWelsEncCtx** ppCtx);
 
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -702,7 +702,12 @@
       if (iDIndex == 0)
         iCountNumNals += MAX_SLICES_NUM;
       // MAX_SLICES_NUM < MAX_LAYER_NUM_OF_FRAME ensured at svc_enc_slice_segment.h
-      assert (iCountNumNals - iOrgNumNals <= MAX_NAL_UNITS_IN_LAYER);
+      if (iCountNumNals - iOrgNumNals > MAX_NAL_UNITS_IN_LAYER) {
+        WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR,
+                 "AcquireLayersNals(), num_of_slice(%d) > existing slice(%d) at (iDid= %d), max=%d",
+                 iCountNumNals, iOrgNumNals, iDIndex, MAX_NAL_UNITS_IN_LAYER);
+        return 1;
+      }
     } else { /*if ( SM_SINGLE_SLICE != pDLayer->sSliceCfg.uiSliceMode )*/
       const int32_t kiNumOfSlice = GetInitialSliceNum ((pDLayer->iVideoWidth + 0x0f) >> 4,
                                    (pDLayer->iVideoHeight + 0x0f) >> 4,
@@ -1754,6 +1759,11 @@
   const int32_t kiPpsSize = (*ppCtx)->GetNeededPpsNum() * PPS_BUFFER_SIZE;
   iNonVclLayersBsSizeCount = SSEI_BUFFER_SIZE + kiSpsSize + kiPpsSize;
 
+  bool    bDynamicSlice = false;
+  uint32_t uiMaxSliceNumEstimation = 0;
+  int32_t iSliceBufferSize = 0;
+  int32_t iMaxSliceBufferSize = 0;
+  int32_t iTotalLength = 0;
   int32_t iLayerBsSize = 0;
   iIndex = 0;
   while (iIndex < pParam->iSpatialLayerNum) {
@@ -1765,11 +1775,29 @@
                    MAX_MACROBLOCK_SIZE_IN_BYTE_x2;
     iLayerBsSize = WELS_ALIGN (iLayerBsSize, 4); // 4 bytes alinged
     iVclLayersBsSizeCount += iLayerBsSize;
+
+    SSliceConfig* pMso = & (fDlp->sSliceCfg);
+    if (pMso->uiSliceMode == SM_DYN_SLICE) {
+      bDynamicSlice = true;
+      uiMaxSliceNumEstimation = WELS_MIN (AVERSLICENUM_CONSTRAINT,
+                                          (iLayerBsSize / pMso->sSliceArgument.uiSliceSizeConstraint) + 1);
+      (*ppCtx)->iMaxSliceCount = WELS_MAX ((*ppCtx)->iMaxSliceCount, uiMaxSliceNumEstimation);
+      iSliceBufferSize = (WELS_MAX(pMso->sSliceArgument.uiSliceSizeConstraint, iLayerBsSize/uiMaxSliceNumEstimation)<<1) + MAX_MACROBLOCK_SIZE_IN_BYTE_x2;
+    } else {
+      (*ppCtx)->iMaxSliceCount = WELS_MAX ((*ppCtx)->iMaxSliceCount, pMso->sSliceArgument.uiSliceNum);
+      iSliceBufferSize = ((iLayerBsSize / pMso->sSliceArgument.uiSliceNum)<<1) + MAX_MACROBLOCK_SIZE_IN_BYTE_x2;
+    }
+    iMaxSliceBufferSize = WELS_MAX(iMaxSliceBufferSize, iSliceBufferSize);
+
     ++ iIndex;
   }
   iTargetSpatialBsSize = iLayerBsSize;
   iCountBsLen = iNonVclLayersBsSizeCount + iVclLayersBsSizeCount;
 
+  iMaxSliceBufferSize = WELS_MIN (iMaxSliceBufferSize, iTargetSpatialBsSize);
+  iTotalLength = (pParam->iMultipleThreadIdc == 1) ? iCountBsLen : (iCountBsLen + ((*ppCtx)->iMaxSliceCount - 1) *
+                 iMaxSliceBufferSize);
+
   pParam->iNumRefFrame = WELS_CLIP3 (pParam->iNumRefFrame, MIN_REF_PIC_COUNT,
                                       (pParam->iUsageType == CAMERA_VIDEO_REAL_TIME ? MAX_REFERENCE_PICTURE_COUNT_NUM_CAMERA :
                                        MAX_REFERENCE_PICTURE_COUNT_NUM_SCREEN));
@@ -1787,20 +1815,14 @@
   (*ppCtx)->pOut->iCountNals = iCountNals;
   (*ppCtx)->pOut->iNalIndex = 0;
 
-  if (pParam->iMultipleThreadIdc > 1) {
-    const int32_t iTotalLength = iCountBsLen + (iTargetSpatialBsSize * ((*ppCtx)->iMaxSliceCount - 1));
-    (*ppCtx)->pFrameBs = (uint8_t*)pMa->WelsMalloc (iTotalLength, "pFrameBs");
-    WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pFrameBs), FreeMemorySvc (ppCtx))
-    (*ppCtx)->iFrameBsSize = iTotalLength;
-  } else {
-    (*ppCtx)->pFrameBs = (uint8_t*)pMa->WelsMalloc (iCountBsLen, "pFrameBs");
-    WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pFrameBs), FreeMemorySvc (ppCtx))
-    (*ppCtx)->iFrameBsSize = iCountBsLen;
-  }
+
+  (*ppCtx)->pFrameBs = (uint8_t*)pMa->WelsMalloc (iTotalLength, "pFrameBs");
+  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pFrameBs), FreeMemorySvc (ppCtx))
+  (*ppCtx)->iFrameBsSize = iTotalLength;
   (*ppCtx)->iPosBsBuffer = 0;
 
   // for pSlice bs buffers
-  if (pParam->iMultipleThreadIdc > 1 && RequestMtResource (ppCtx, pParam, iCountBsLen, iTargetSpatialBsSize)) {
+  if (pParam->iMultipleThreadIdc > 1 && RequestMtResource (ppCtx, pParam, iCountBsLen, iMaxSliceBufferSize, bDynamicSlice)) {
     WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), RequestMtResource failed!");
     FreeMemorySvc (ppCtx);
     return 1;
@@ -4075,7 +4097,7 @@
                 // pick up succeeding slice for threading
                 // thread_id equal to iEventId per implementation here
                 pCtx->pSliceThreading->pThreadPEncCtx[iEventId].iSliceIndex = iIndexOfSliceToBeCoded;
-                SetOneSliceBsBufferUnderMultithread(pCtx, iEventId, iIndexOfSliceToBeCoded);
+                SetOneSliceBsBufferUnderMultithread (pCtx, iEventId, iIndexOfSliceToBeCoded);
                 WelsEventSignal (&pCtx->pSliceThreading->pReadySliceCodingEvent[iEventId]);
                 WelsEventSignal (&pCtx->pSliceThreading->pThreadMasterEvent[iEventId]);
                 ++ iIndexOfSliceToBeCoded;
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -295,6 +295,7 @@
   if (NULL == (*ppCtx)->pSliceBs) {
     return ENC_RETURN_MEMALLOCERR;
   }
+
   if (iSlice0Len <= 0) {
     return ENC_RETURN_UNEXPECTED;
   }
@@ -310,19 +311,19 @@
   if (iSlice1Len <= 0) {
     return ENC_RETURN_UNEXPECTED;
   }
+  if ((*ppCtx)->iFrameBsSize < (iSlice0Len + (iMaxSliceNum - 1)*iSlice1Len)) {
+    return ENC_RETURN_MEMALLOCERR;
+  }
   for (int32_t k = 1; k < iMaxSliceNum; k++) {
     (*ppCtx)->pSliceBs[k].uiSize = iSlice1Len;
     (*ppCtx)->pSliceBs[k].pBs    = (*ppCtx)->pSliceBs[k - 1].pBs + (*ppCtx)->pSliceBs[k - 1].uiSize;
   }
-  if ((*ppCtx)->iFrameBsSize < (iSlice0Len + (iMaxSliceNum - 1)*iSlice1Len)) {
-    return ENC_RETURN_MEMALLOCERR;
-  }
   return ENC_RETURN_SUCCESS;
 
 }
 
 int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingParam, const int32_t iCountBsLen,
-                           const int32_t iTargetSpatialBsSize) {
+                           const int32_t iMaxSliceBufferSize, bool bDynamicSlice) {
   CMemoryAlign* pMa             = NULL;
   SWelsSvcCodingParam* pPara = NULL;
   SSliceThreading* pSmt         = NULL;
@@ -331,8 +332,6 @@
   int32_t iIdx                  = 0;
   int16_t iMaxSliceNum          = 1;
   int32_t iReturn = ENC_RETURN_SUCCESS;
-  bool    bDynamicSlice = false;
-  uint32_t uiMaxSliceSizeConstraint = 0;
 
   if (NULL == ppCtx || NULL == pCodingParam || NULL == *ppCtx || iCountBsLen <= 0)
     return 1;
@@ -372,13 +371,6 @@
       pSmt->pSliceConsumeTime[iIdx]     = NULL;
       pSmt->pSliceComplexRatio[iIdx]    = NULL;
     }
-
-    if (pMso->uiSliceMode == SM_DYN_SLICE) {
-      bDynamicSlice = true;
-      if (uiMaxSliceSizeConstraint < pMso->sSliceArgument.uiSliceSizeConstraint) {
-        uiMaxSliceSizeConstraint = pMso->sSliceArgument.uiSliceSizeConstraint;
-      }
-    }
     ++ iIdx;
   }
   // NULL for pSliceConsumeTime[iIdx]: iIdx from iNumSpatialLayers to MAX_DEPENDENCY_LAYERS
@@ -428,8 +420,7 @@
     MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pReadySliceCodingEvent%d = 0x%p named(%s) ret%d err%d", iIdx,
                   (void*)pSmt->pReadySliceCodingEvent[iIdx], name, err, errno);
 
-
-    pSmt->pThreadBsBuffer[iIdx] = (uint8_t*)pMa->WelsMalloc (iTargetSpatialBsSize, "pSmt->pThreadBsBuffer");
+    pSmt->pThreadBsBuffer[iIdx] = (uint8_t*)pMa->WelsMalloc (iCountBsLen, "pSmt->pThreadBsBuffer");
     WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pThreadBsBuffer[iIdx]), FreeMemorySvc (ppCtx))
 
     ++ iIdx;
@@ -442,8 +433,9 @@
   err = WelsEventOpen (&pSmt->pSliceCodedMasterEvent, name);
   MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pSliceCodedMasterEvent named(%s) ret%d err%d", name, err, errno);
 
+
   iReturn = SetMultiSliceBuffer (ppCtx, pMa, pSmt, iMaxSliceNum,
-                                 iTargetSpatialBsSize, //TODO: may use uiMaxSliceSizeConstraint<<1 when bDynamicSlice, but need more twist
+                                 iMaxSliceBufferSize,
                                  iCountBsLen,
                                  bDynamicSlice);
   WELS_VERIFY_RETURN_PROC_IF (iReturn, (ENC_RETURN_SUCCESS != iReturn), FreeMemorySvc (ppCtx))
--- a/codec/encoder/core/src/svc_encode_slice.cpp
+++ b/codec/encoder/core/src/svc_encode_slice.cpp
@@ -516,7 +516,7 @@
 
 
     iEncReturn = pEncCtx->pFuncList->pfWelsSpatialWriteMbSyn (pEncCtx, pSlice, pCurMb);
-    if (iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND) {
+    if ((iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND) && (pCurMb->uiLumaQp < 50)) {
       pEncCtx->pFuncList->pfStashPopMBStatus (&sDss, pSlice);
       UpdateQpForOverflow (pCurMb, kuiChromaQpIndexOffset);
       goto TRY_REENCODING;
@@ -585,7 +585,7 @@
     UpdateNonZeroCountCache (pCurMb, pMbCache);
 
     iEncReturn = pEncCtx->pFuncList->pfWelsSpatialWriteMbSyn (pEncCtx, pSlice, pCurMb);
-    if (iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND) {
+    if (iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND && (pCurMb->uiLumaQp < 50)) {
       pEncCtx->pFuncList->pfStashPopMBStatus (&sDss, pSlice);
       UpdateQpForOverflow (pCurMb, kuiChromaQpIndexOffset);
       goto TRY_REENCODING;
@@ -991,7 +991,7 @@
     //step (6): begin to write bit stream; if the pSlice size is controlled, the writing may be skipped
 
     iEncReturn = pEncCtx->pFuncList->pfWelsSpatialWriteMbSyn (pEncCtx, pSlice, pCurMb);
-    if (iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND) {
+    if (iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND && (pCurMb->uiLumaQp < 50)) {
       pSlice->iMbSkipRun = pEncCtx->pFuncList->pfStashPopMBStatus (&sDss, pSlice);
       UpdateQpForOverflow (pCurMb, kuiChromaQpIndexOffset);
       goto TRY_REENCODING;
@@ -1097,7 +1097,7 @@
 
 
     iEncReturn = pEncCtx->pFuncList->pfWelsSpatialWriteMbSyn (pEncCtx, pSlice, pCurMb);
-    if (iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND) {
+    if (iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND  && (pCurMb->uiLumaQp < 50)) {
       pSlice->iMbSkipRun = pEncCtx->pFuncList->pfStashPopMBStatus (&sDss, pSlice);
       UpdateQpForOverflow (pCurMb, kuiChromaQpIndexOffset);
       goto TRY_REENCODING;
--- a/codec/encoder/core/src/svc_set_mb_syn_cavlc.cpp
+++ b/codec/encoder/core/src/svc_set_mb_syn_cavlc.cpp
@@ -250,7 +250,7 @@
   assert (iLeftLength > 0);
 
   if (iLeftLength < MAX_MACROBLOCK_SIZE_IN_BYTE_x2) {
-    return ENC_RETURN_MEMALLOCERR;
+    return ENC_RETURN_VLCOVERFLOWFOUND;//ENC_RETURN_MEMALLOCERR;
     //TODO: call the realloc&copy instead
   }
   return ENC_RETURN_SUCCESS;
--- a/codec/encoder/plus/src/welsEncoderExt.cpp
+++ b/codec/encoder/plus/src/welsEncoderExt.cpp
@@ -402,7 +402,7 @@
   const int32_t kiEncoderReturn = WelsEncoderEncodeExt (m_pEncContext, pBsInfo, pSrcPic);
   const int64_t kiCurrentFrameMs = (WelsTime() - kiBeforeFrameUs) / 1000;
 
-  if (kiEncoderReturn == ENC_RETURN_MEMALLOCERR) {
+  if ((kiEncoderReturn == ENC_RETURN_MEMALLOCERR) || (kiEncoderReturn == ENC_RETURN_MEMOVERFLOWFOUND) || (kiEncoderReturn == ENC_RETURN_VLCOVERFLOWFOUND)) {
     WelsUninitEncoderExt (&m_pEncContext);
     return cmMallocMemeError;
   } else if ((kiEncoderReturn != ENC_RETURN_SUCCESS) && (kiEncoderReturn == ENC_RETURN_CORRECTED)) {
--- a/test/api/encode_decode_api_test.cpp
+++ b/test/api/encode_decode_api_test.cpp
@@ -3459,13 +3459,12 @@
   {true, false, true, 30, 110, 296, 50, SM_DYN_SLICE, 500, 7.5, 1, ""},
   {true, false, true, 30, 104, 416, 44, SM_DYN_SLICE, 500, 7.5, 1, ""},
   {true, false, true, 30, 16, 16, 2, SM_DYN_SLICE, 500, 7.5, 1, ""},
-  // enable the following when all random input is supported
-  //{true, true, true, 30, 600, 460, 1, SM_DYN_SLICE, 450, 15.0, 1, ""},
-  //{true, true, true, 30, 340, 96, 24, SM_DYN_SLICE, 1000, 30.0, 1, ""},
-  //{true, true, true, 30, 140, 196, 51, SM_DYN_SLICE, 500, 7.5, 1, ""},
-  //{true, true, true, 30, 110, 296, 50, SM_DYN_SLICE, 500, 7.5, 1, ""},
-  //{true, true, true, 30, 104, 416, 44, SM_DYN_SLICE, 500, 7.5, 1, ""},
-  //{true, true, true, 30, 16, 16, 2, SM_DYN_SLICE, 500, 7.5, 1, ""},
+  {true, true, true, 30, 600, 460, 1, SM_DYN_SLICE, 450, 15.0, 1, ""},
+  {true, true, true, 30, 340, 96, 24, SM_DYN_SLICE, 1000, 30.0, 1, ""},
+  {true, true, true, 30, 140, 196, 51, SM_DYN_SLICE, 500, 7.5, 1, ""},
+  {true, true, true, 30, 110, 296, 50, SM_DYN_SLICE, 500, 7.5, 1, ""},
+  {true, true, true, 30, 104, 416, 44, SM_DYN_SLICE, 500, 7.5, 1, ""},
+  {true, true, true, 30, 16, 16, 2, SM_DYN_SLICE, 500, 7.5, 1, ""},
   {false, false, true, 3, 4096, 2304, 2, SM_SINGLE_SLICE, 0, 7.5, 1, ""}, // large picture size
   {false, true, false, 30, 32, 16, 2, SM_DYN_SLICE, 500, 7.5, 1, ""},
   {false, true, false, 30, 600, 460, 1, SM_DYN_SLICE, 450, 15.0, 4, ""},
@@ -3474,8 +3473,7 @@
   {false, true, false, 30, 110, 296, 50, SM_DYN_SLICE, 500, 7.5, 2, ""},
   {false, true, false, 30, 104, 416, 44, SM_DYN_SLICE, 500, 7.5, 2, ""},
   {false, true, false, 30, 16, 16, 2, SM_DYN_SLICE, 500, 7.5, 3, ""},
-  //{false, true, false, 30, 32, 16, 2, SM_DYN_SLICE, 500, 7.5, 3, ""},
-  //disable the above for now, enable when multi-thread error is correctly handled
+  {false, true, false, 30, 32, 16, 2, SM_DYN_SLICE, 500, 7.5, 3, ""},
 };
 
 class EncodeTestAPI : public ::testing::TestWithParam<EncodeOptionParam>, public ::EncodeDecodeTestAPIBase {
@@ -3543,6 +3541,12 @@
   int iIdx = 0;
   int iLen;
   unsigned char* pData[3] = { NULL };
+
+  //FIXME: remove this after the multi-thread case is correctly handled in encoder
+  if (p.iThreads>1 && SM_DYN_SLICE == p.eSliceMode) {
+    p.bAllRandom = false;
+  }
+
   while (iIdx <= p.iNumframes) {
     EncodeOneFrameRandom (0, p.bAllRandom);
     encToDecData (info, iLen);