shithub: openh264

Download patch

ref: 4c19823d443501351a0d3267727a0107971bba7d
parent: 2d3071e37cd17d44b05abcac9abb577b91d5349a
parent: b001785eeeff8d165ff8ee5fdb564600a8b1ece8
author: HaiboZhu <haibozhu@cisco.com>
date: Tue Nov 24 05:35:43 EST 2015

Merge pull request #2267 from shihuade/MultiThread_V4.2_SSliceCtx_SliceConSumeTime_Pull

remove pSliceConsumeTime in SSliceCtx and SliceThreading

--- a/codec/encoder/core/inc/mt_defs.h
+++ b/codec/encoder/core/inc/mt_defs.h
@@ -80,7 +80,6 @@
 
 WELS_MUTEX                      mutexSliceNumUpdate;    // for dynamic slicing mode MT
 
-uint32_t*                       pSliceConsumeTime[MAX_DEPENDENCY_LAYER];        // consuming time for each slice, [iSpatialIdx][uiSliceIdx]
 int32_t*                        pSliceComplexRatio[MAX_DEPENDENCY_LAYER]; // *INT_MULTIPLY
 
 #ifdef MT_DEBUG
--- a/codec/encoder/core/inc/slice.h
+++ b/codec/encoder/core/inc/slice.h
@@ -181,6 +181,7 @@
 SCabacCtx       sCabacCtx;
 int32_t         iCabacInitIdc;
 int32_t         iMbSkipRun;
+uint32_t        uiSliceConsumeTime;
 } SSlice, *PSlice;
 
 }
--- a/codec/encoder/core/inc/slice_multi_threading.h
+++ b/codec/encoder/core/inc/slice_multi_threading.h
@@ -55,7 +55,7 @@
                                    SMB* pMbList,
                                    const int32_t kiSliceIdc);
 
-void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq, uint32_t* pSliceConsume);
+void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq);
 
 int32_t NeedDynamicAdjust (void* pConsumeTime, const int32_t kiSliceNum);
 
--- a/codec/encoder/core/inc/svc_enc_slice_segment.h
+++ b/codec/encoder/core/inc/svc_enc_slice_segment.h
@@ -90,7 +90,6 @@
 uint32_t                uiSliceSizeConstraint;  /* in byte */
 int32_t                 iMaxSliceNumConstraint; /* maximal number of slices constraint */
 
-uint32_t*               pSliceConsumeTime;
 } SSliceCtx;
 
 
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -3507,7 +3507,7 @@
 // writing parasets for (simulcast) svc
 int32_t WriteSsvcParaset (sWelsEncCtx* pCtx, const int32_t kiSpatialNum,
                           SLayerBSInfo*& pLayerBsInfo, int32_t& iLayerNum, int32_t& iFrameSize) {
-  int32_t iNonVclSize = 0, iCountNal = 0, iReturn;
+  int32_t iNonVclSize = 0, iCountNal = 0, iReturn = 0;
   iReturn = WelsWriteParameterSets (pCtx, &pLayerBsInfo->pNalLengthInByte[0], &iCountNal, &iNonVclSize);
   WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)
 
@@ -4299,8 +4299,7 @@
         && pSvcParam->bUseLoadBalancing
         && pSvcParam->iMultipleThreadIdc > 1 &&
         pSvcParam->iMultipleThreadIdc >= pParam->sSliceArgument.uiSliceNum) {
-      CalcSliceComplexRatio (pCtx->pSliceThreading->pSliceComplexRatio[iCurDid], pCtx->pCurDqLayer,
-                             pCtx->pSliceThreading->pSliceConsumeTime[iCurDid]);
+      CalcSliceComplexRatio (pCtx->pSliceThreading->pSliceComplexRatio[iCurDid], pCtx->pCurDqLayer);
 #if defined(MT_DEBUG)
       TrackSliceComplexities (pCtx, iCurDid);
 #endif//#if defined(MT_DEBUG)
@@ -4813,17 +4812,6 @@
   }
   pMA->WelsFree (pCurLayer->sSliceEncCtx.pCountMbNumInSlice, "pSliceSeg->pCountMbNumInSlice");
   pCurLayer->sSliceEncCtx.pCountMbNumInSlice = pCountMbNumInSlice;
-
-  uint32_t* pSliceConsumeTime = (uint32_t*)pMA->WelsMalloc (iMaxSliceNum * sizeof (uint32_t),
-                                                          "pSliceSeg->pSliceConsumeTime");
-  if (NULL == pSliceConsumeTime) {
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
-             "CWelsH264SVCEncoder::DynSliceRealloc: realloc pSliceConsumeTime not successful");
-    return ENC_RETURN_MEMALLOCERR;
-  }
-  memcpy (pSliceConsumeTime, pCurLayer->sSliceEncCtx.pSliceConsumeTime, sizeof (int32_t) * iMaxSliceNumOld);
-  pMA->WelsFree (pCurLayer->sSliceEncCtx.pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime");
-  pCurLayer->sSliceEncCtx.pSliceConsumeTime = pSliceConsumeTime;
 
   //deal with rate control variables
   const int32_t kiCurDid = pCtx->uiDependencyId;
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -128,23 +128,23 @@
   } while (iIdx <= kiEndMbInSlice);
 }
 
-void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq, uint32_t* pSliceConsume) {
+void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq) {
   SSliceCtx* pSliceCtx          = &pCurDq->sSliceEncCtx;
+  SSlice*    pSliceInLayer      = pCurDq->sLayerInfo.pSliceInLayer;
   int32_t* pRatioList           = (int32_t*)pRatio;
-  int32_t iAvI[MAX_SLICES_NUM];
   int32_t iSumAv                = 0;
-  uint32_t* pSliceTime          = (uint32_t*)pSliceConsume;
   int32_t* pCountMbInSlice      = (int32_t*)pSliceCtx->pCountMbNumInSlice;
   const int32_t kiSliceCount    = pSliceCtx->iSliceNumInFrame;
   int32_t iSliceIdx             = 0;
+  int32_t iAvI[MAX_SLICES_NUM];
 
   WelsEmms();
 
   while (iSliceIdx < kiSliceCount) {
-    iAvI[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * pCountMbInSlice[iSliceIdx], pSliceTime[iSliceIdx]);
-    MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), pSliceConsumeTime[%d]= %d us, slice_run= %d",
+    iAvI[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * pCountMbInSlice[iSliceIdx], pSliceInLayer[iSliceIdx].uiSliceConsumeTime);
+    MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), uiSliceConsumeTime[%d]= %d us, slice_run= %d",
                   iSliceIdx,
-                  pSliceTime[iSliceIdx], pCountMbInSlice[iSliceIdx]);
+                  pSliceInLayer[iSliceIdx].uiSliceConsumeTime, pCountMbInSlice[iSliceIdx]);
     iSumAv += iAvI[iSliceIdx];
 
     ++ iSliceIdx;
@@ -154,8 +154,10 @@
   }
 }
 
-int32_t NeedDynamicAdjust (void* pConsumeTime, const int32_t iSliceNum) {
-  uint32_t* pSliceConsume       = (uint32_t*)pConsumeTime;
+int32_t NeedDynamicAdjust (SSlice* pSliceInLayer, const int32_t iSliceNum) {
+  if ( NULL == pSliceInLayer )
+    return false;
+
   uint32_t uiTotalConsume       = 0;
   int32_t iSliceIdx             = 0;
   int32_t iNeedAdj              = false;
@@ -163,7 +165,7 @@
   WelsEmms();
 
   while (iSliceIdx < iSliceNum) {
-    uiTotalConsume += pSliceConsume[iSliceIdx];
+    uiTotalConsume += pSliceInLayer[iSliceIdx].uiSliceConsumeTime;
     iSliceIdx ++;
   }
   if (uiTotalConsume == 0) {
@@ -178,7 +180,7 @@
   float fRmse                   = .0f;  // root mean square error of pSlice consume ratios
   const float kfMeanRatio       = 1.0f / iSliceNum;
   do {
-    const float fRatio = 1.0f * pSliceConsume[iSliceIdx] / uiTotalConsume;
+    const float fRatio = 1.0f * pSliceInLayer[iSliceIdx].uiSliceConsumeTime / uiTotalConsume;
     const float fDiffRatio = fRatio - kfMeanRatio;
     fRmse += (fDiffRatio * fDiffRatio);
     ++ iSliceIdx;
@@ -370,12 +372,9 @@
         && (pPara->bUseLoadBalancing)
         && (pPara->iMultipleThreadIdc > 1)
         && (pPara->iMultipleThreadIdc >= kiSliceNum)) {
-      pSmt->pSliceConsumeTime[iIdx] = (uint32_t*)pMa->WelsMallocz (kiSliceNum * sizeof (uint32_t), "pSliceConsumeTime[]");
-      WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pSliceConsumeTime[iIdx]), FreeMemorySvc (ppCtx))
       pSmt->pSliceComplexRatio[iIdx] = (int32_t*)pMa->WelsMalloc (kiSliceNum * sizeof (int32_t), "pSliceComplexRatio[]");
       WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pSliceComplexRatio[iIdx]), FreeMemorySvc (ppCtx))
     } else {
-      pSmt->pSliceConsumeTime[iIdx]     = NULL;
       pSmt->pSliceComplexRatio[iIdx]    = NULL;
     }
 
@@ -384,7 +383,6 @@
     }
     ++ iIdx;
   }
-  // NULL for pSliceConsumeTime[iIdx]: iIdx from iNumSpatialLayers to MAX_DEPENDENCY_LAYERS
 
 #ifdef MT_DEBUG
   // file handle for MT debug
@@ -554,11 +552,7 @@
 
   iIdx = 0;
   while (iIdx < pCodingParam->iSpatialLayerNum) {
-    if (pSmt->pSliceConsumeTime[iIdx]) {
-      pMa->WelsFree (pSmt->pSliceConsumeTime[iIdx], "pSliceConsumeTime[]");
-      pSmt->pSliceConsumeTime[iIdx] = NULL;
-    }
-    if (pSmt->pSliceComplexRatio[iIdx] != NULL) {
+   if (pSmt->pSliceComplexRatio[iIdx] != NULL) {
       pMa->WelsFree (pSmt->pSliceComplexRatio[iIdx], "pSliceComplexRatio[]");
       pSmt->pSliceComplexRatio[iIdx] = NULL;
     }
@@ -814,12 +808,12 @@
         pEncPEncCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (pCurDq, pEncPEncCtx->pFuncList, iSliceIdx);
 
         if (bDsaFlag) {
-          pEncPEncCtx->pSliceThreading->pSliceConsumeTime[pEncPEncCtx->uiDependencyId][iSliceIdx] = (uint32_t) (
+            pEncPEncCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx].uiSliceConsumeTime = (uint32_t) (
                 WelsTime() - iSliceStart);
           MT_TRACE_LOG (& (pEncPEncCtx->sLogCtx), WELS_LOG_INFO,
-                        "[MT] CodingSliceThreadProc(), coding_idx %d, uiSliceIdx %d, pSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d",
+                        "[MT] CodingSliceThreadProc(), coding_idx %d, uiSliceIdx %d, uiSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d",
                         pEncPEncCtx->iCodingIndex, iSliceIdx,
-                        pEncPEncCtx->pSliceThreading->pSliceConsumeTime[pEncPEncCtx->uiDependencyId][iSliceIdx], iSliceSize,
+                        pEncPEncCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx].uiSliceConsumeTime, iSliceSize,
                         pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx].sSliceHeaderExt.sSliceHeader.iFirstMbInSlice,
                         pCurDq->sSliceEncCtx.pCountMbNumInSlice[iSliceIdx]);
         }
@@ -1067,10 +1061,9 @@
 #endif//MT_DEBUG
 
   pCtx->pCurDqLayer = pCurDq;
-  memcpy ((pCtx->pSliceThreading->pSliceConsumeTime[0]), (pCurDq->sSliceEncCtx.pSliceConsumeTime),
-          pCurDq->sSliceEncCtx.iSliceNumInFrame * sizeof (uint32_t));
+
   // do not need adjust due to not different at both slices of consumed time
-  iNeedAdj = NeedDynamicAdjust (pCtx->pSliceThreading->pSliceConsumeTime[0], pCurDq->sSliceEncCtx.iSliceNumInFrame);
+  iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[0]->sLayerInfo.pSliceInLayer, pCurDq->sSliceEncCtx.iSliceNumInFrame);
   if (iNeedAdj)
     DynamicAdjustSlicing (pCtx,
                           pCurDq,
@@ -1100,12 +1093,10 @@
                                      && (pCtx->pSvcParam->sSpatialLayers[iCurDid - 1].sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE
                                          && pCtx->pSvcParam->iMultipleThreadIdc >= pCtx->pSvcParam->sSpatialLayers[iCurDid -
                                              1].sSliceArgument.uiSliceNum);
-  memcpy ((pCtx->pSliceThreading->pSliceConsumeTime[iCurDid]), (pCtx->pCurDqLayer->sSliceEncCtx.pSliceConsumeTime),
-          pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame * sizeof (uint32_t));
 
   if (kbModelingFromSpatial) { // using spatial base layer for complexity estimation
     // do not need adjust due to not different at both slices of consumed time
-    iNeedAdj = NeedDynamicAdjust (pCtx->pSliceThreading->pSliceConsumeTime[iCurDid - 1],
+    iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[iCurDid - 1]->sLayerInfo.pSliceInLayer,
                                   pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame);
     if (iNeedAdj)
       DynamicAdjustSlicing (pCtx,
@@ -1115,7 +1106,7 @@
                            );
   } else { // use temporal layer for complexity estimation
     // do not need adjust due to not different at both slices of consumed time
-    iNeedAdj = NeedDynamicAdjust (pCtx->pSliceThreading->pSliceConsumeTime[iCurDid],
+    iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[iCurDid]->sLayerInfo.pSliceInLayer,
                                   pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame);
     if (iNeedAdj)
       DynamicAdjustSlicing (pCtx,
@@ -1163,11 +1154,11 @@
 
   pPara = pCtx->pSvcParam;
   while (iSpatialIdx < iSpatialNum) {
-    const int32_t kiDid         = pDidList[iSpatialIdx];
-    SSpatialLayerInternal* pDlp = &pPara->sDependencyLayers[kiDid];
-    SSliceConfig* pSliceArgument          = &pDlp->sSliceArgument;
-    SDqLayer* pCurDq            = pCtx->ppDqLayerList[kiDid];
-    SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx;
+    const int32_t kiDid             = pDidList[iSpatialIdx];
+    SSliceConfig* pSliceArgument    = &pPara->sDependencyLayers[kiDid].sSliceArgument;
+    SDqLayer* pCurDq                = pCtx->ppDqLayerList[kiDid];
+    SSlice* pSliceInLayer           = pCurDq->sLayerInfo.pSliceInLayer;
+    SSliceCtx* pSliceCtx            = &pCurDq->sSliceEncCtx;
     const uint32_t kuiCountSliceNum = pSliceCtx->iSliceNumInFrame;
     if (pCtx->pSliceThreading) {
       if (pCtx->pSliceThreading->pFSliceDiff
@@ -1178,11 +1169,11 @@
         uint32_t uiMaxT = 0;
         int32_t iMaxI = 0;
         while (i < kuiCountSliceNum) {
-          if (pCtx->pSliceThreading->pSliceConsumeTime[kiDid] != NULL)
+          if (pSliceInLayer[i] != NULL)
             fprintf (pCtx->pSliceThreading->pFSliceDiff, "%6d us consume_time coding_idx %d iDid %d pSlice %d\n",
-                     pCtx->pSliceThreading->pSliceConsumeTime[kiDid][i], pCtx->iCodingIndex, kiDid, i /*/ 1000*/);
-          if (pCtx->pSliceThreading->pSliceConsumeTime[kiDid][i] > uiMaxT) {
-            uiMaxT = pCtx->pSliceThreading->pSliceConsumeTime[kiDid][i];
+                     pSliceInLayer[i].uiSliceConsumeTime, pCtx->iCodingIndex, kiDid, i /*/ 1000*/);
+          if (pSliceInLayer[i].uiSliceConsumeTime > uiMaxT) {
+            uiMaxT = pSliceInLayer[i].uiSliceConsumeTime;
             iMaxI = i;
           }
           ++ i;
--- a/codec/encoder/core/src/svc_enc_slice_segment.cpp
+++ b/codec/encoder/core/src/svc_enc_slice_segment.cpp
@@ -388,11 +388,7 @@
 
       pSliceSeg->pCountMbNumInSlice = NULL;
     }
-    if (NULL != pSliceSeg->pSliceConsumeTime) {
-      pMa->WelsFree (pSliceSeg->pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime");
 
-      pSliceSeg->pSliceConsumeTime = NULL;
-    }
     // just for safe
     pSliceSeg->iSliceNumInFrame = 0;
     pSliceSeg->iMbNumInFrame    = 0;
@@ -411,7 +407,6 @@
                                     "pSliceSeg->pCountMbNumInSlice");
 
     WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pCountMbNumInSlice)
-    pSliceSeg->pSliceConsumeTime = NULL;
     pSliceSeg->uiSliceMode              = uiSliceMode;
     pSliceSeg->iMbWidth                 = kiMbWidth;
     pSliceSeg->iMbHeight                = kiMbHeight;
@@ -438,10 +433,6 @@
                                     "pSliceSeg->pCountMbNumInSlice");
     WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pCountMbNumInSlice)
 
-    pSliceSeg->pSliceConsumeTime = (uint32_t*)pMa->WelsMalloc (pSliceSeg->iSliceNumInFrame * sizeof (uint32_t),
-                                                             "pSliceSeg->pSliceConsumeTime");
-    WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pSliceConsumeTime)
-
     pSliceSeg->uiSliceMode      = pSliceArgument->uiSliceMode;
 
     pSliceSeg->iMbWidth         = kiMbWidth;
@@ -488,11 +479,6 @@
       pMa->WelsFree (pSliceSeg->pCountMbNumInSlice, "pSliceSeg->pCountMbNumInSlice");
 
       pSliceSeg->pCountMbNumInSlice = NULL;
-    }
-    if (NULL != pSliceSeg->pSliceConsumeTime) {
-      pMa->WelsFree (pSliceSeg->pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime");
-
-      pSliceSeg->pSliceConsumeTime = NULL;
     }
 
     pSliceSeg->iMbNumInFrame    = 0;
--- a/codec/encoder/core/src/wels_task_encoder.cpp
+++ b/codec/encoder/core/src/wels_task_encoder.cpp
@@ -216,12 +216,12 @@
 void CWelsLoadBalancingSlicingEncodingTask::FinishTask() {
   CWelsSliceEncodingTask::FinishTask();
 
-  m_pCtx->pCurDqLayer->sSliceEncCtx.pSliceConsumeTime[m_iSliceIdx] = (uint32_t) (WelsTime() - m_iSliceStart);
+  m_pSlice->uiSliceConsumeTime = (uint32_t) (WelsTime() - m_iSliceStart);
   WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DEBUG,
-           "[MT] CWelsLoadBalancingSlicingEncodingTask()FinishTask, coding_idx %d, um_iSliceIdx %d, pSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d",
+           "[MT] CWelsLoadBalancingSlicingEncodingTask()FinishTask, coding_idx %d, um_iSliceIdx %d, uiSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d",
            m_pCtx->iCodingIndex,
            m_iSliceIdx,
-           m_pCtx->pCurDqLayer->sSliceEncCtx.pSliceConsumeTime[m_iSliceIdx],
+           m_pSlice->uiSliceConsumeTime,
            m_iSliceSize,
            m_pCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[m_iSliceIdx].sSliceHeaderExt.sSliceHeader.iFirstMbInSlice,
            m_pCtx->pCurDqLayer->sSliceEncCtx.pCountMbNumInSlice[m_iSliceIdx]);