ref: 4c19823d443501351a0d3267727a0107971bba7d
parent: 2d3071e37cd17d44b05abcac9abb577b91d5349a
parent: b001785eeeff8d165ff8ee5fdb564600a8b1ece8
author: HaiboZhu <haibozhu@cisco.com>
date: Tue Nov 24 05:35:43 EST 2015
Merge pull request #2267 from shihuade/MultiThread_V4.2_SSliceCtx_SliceConSumeTime_Pull remove pSliceConsumeTime in SSliceCtx and SliceThreading
--- a/codec/encoder/core/inc/mt_defs.h
+++ b/codec/encoder/core/inc/mt_defs.h
@@ -80,7 +80,6 @@
WELS_MUTEX mutexSliceNumUpdate; // for dynamic slicing mode MT
-uint32_t* pSliceConsumeTime[MAX_DEPENDENCY_LAYER]; // consuming time for each slice, [iSpatialIdx][uiSliceIdx]
int32_t* pSliceComplexRatio[MAX_DEPENDENCY_LAYER]; // *INT_MULTIPLY
#ifdef MT_DEBUG
--- a/codec/encoder/core/inc/slice.h
+++ b/codec/encoder/core/inc/slice.h
@@ -181,6 +181,7 @@
SCabacCtx sCabacCtx;
int32_t iCabacInitIdc;
int32_t iMbSkipRun;
+uint32_t uiSliceConsumeTime;
} SSlice, *PSlice;
}
--- a/codec/encoder/core/inc/slice_multi_threading.h
+++ b/codec/encoder/core/inc/slice_multi_threading.h
@@ -55,7 +55,7 @@
SMB* pMbList,
const int32_t kiSliceIdc);
-void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq, uint32_t* pSliceConsume);
+void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq);
int32_t NeedDynamicAdjust (void* pConsumeTime, const int32_t kiSliceNum);
--- a/codec/encoder/core/inc/svc_enc_slice_segment.h
+++ b/codec/encoder/core/inc/svc_enc_slice_segment.h
@@ -90,7 +90,6 @@
uint32_t uiSliceSizeConstraint; /* in byte */
int32_t iMaxSliceNumConstraint; /* maximal number of slices constraint */
-uint32_t* pSliceConsumeTime;
} SSliceCtx;
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -3507,7 +3507,7 @@
// writing parasets for (simulcast) svc
int32_t WriteSsvcParaset (sWelsEncCtx* pCtx, const int32_t kiSpatialNum,
SLayerBSInfo*& pLayerBsInfo, int32_t& iLayerNum, int32_t& iFrameSize) {
- int32_t iNonVclSize = 0, iCountNal = 0, iReturn;
+ int32_t iNonVclSize = 0, iCountNal = 0, iReturn = 0;
iReturn = WelsWriteParameterSets (pCtx, &pLayerBsInfo->pNalLengthInByte[0], &iCountNal, &iNonVclSize);
WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)
@@ -4299,8 +4299,7 @@
&& pSvcParam->bUseLoadBalancing
&& pSvcParam->iMultipleThreadIdc > 1 &&
pSvcParam->iMultipleThreadIdc >= pParam->sSliceArgument.uiSliceNum) {
- CalcSliceComplexRatio (pCtx->pSliceThreading->pSliceComplexRatio[iCurDid], pCtx->pCurDqLayer,
- pCtx->pSliceThreading->pSliceConsumeTime[iCurDid]);
+ CalcSliceComplexRatio (pCtx->pSliceThreading->pSliceComplexRatio[iCurDid], pCtx->pCurDqLayer);
#if defined(MT_DEBUG)
TrackSliceComplexities (pCtx, iCurDid);
#endif//#if defined(MT_DEBUG)
@@ -4813,17 +4812,6 @@
}
pMA->WelsFree (pCurLayer->sSliceEncCtx.pCountMbNumInSlice, "pSliceSeg->pCountMbNumInSlice");
pCurLayer->sSliceEncCtx.pCountMbNumInSlice = pCountMbNumInSlice;
-
- uint32_t* pSliceConsumeTime = (uint32_t*)pMA->WelsMalloc (iMaxSliceNum * sizeof (uint32_t),
- "pSliceSeg->pSliceConsumeTime");
- if (NULL == pSliceConsumeTime) {
- WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
- "CWelsH264SVCEncoder::DynSliceRealloc: realloc pSliceConsumeTime not successful");
- return ENC_RETURN_MEMALLOCERR;
- }
- memcpy (pSliceConsumeTime, pCurLayer->sSliceEncCtx.pSliceConsumeTime, sizeof (int32_t) * iMaxSliceNumOld);
- pMA->WelsFree (pCurLayer->sSliceEncCtx.pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime");
- pCurLayer->sSliceEncCtx.pSliceConsumeTime = pSliceConsumeTime;
//deal with rate control variables
const int32_t kiCurDid = pCtx->uiDependencyId;
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -128,23 +128,23 @@
} while (iIdx <= kiEndMbInSlice);
}
-void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq, uint32_t* pSliceConsume) {
+void CalcSliceComplexRatio (void* pRatio, SDqLayer* pCurDq) {
SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx;
+ SSlice* pSliceInLayer = pCurDq->sLayerInfo.pSliceInLayer;
int32_t* pRatioList = (int32_t*)pRatio;
- int32_t iAvI[MAX_SLICES_NUM];
int32_t iSumAv = 0;
- uint32_t* pSliceTime = (uint32_t*)pSliceConsume;
int32_t* pCountMbInSlice = (int32_t*)pSliceCtx->pCountMbNumInSlice;
const int32_t kiSliceCount = pSliceCtx->iSliceNumInFrame;
int32_t iSliceIdx = 0;
+ int32_t iAvI[MAX_SLICES_NUM];
WelsEmms();
while (iSliceIdx < kiSliceCount) {
- iAvI[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * pCountMbInSlice[iSliceIdx], pSliceTime[iSliceIdx]);
- MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), pSliceConsumeTime[%d]= %d us, slice_run= %d",
+ iAvI[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * pCountMbInSlice[iSliceIdx], pSliceInLayer[iSliceIdx].uiSliceConsumeTime);
+ MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), uiSliceConsumeTime[%d]= %d us, slice_run= %d",
iSliceIdx,
- pSliceTime[iSliceIdx], pCountMbInSlice[iSliceIdx]);
+ pSliceInLayer[iSliceIdx].uiSliceConsumeTime, pCountMbInSlice[iSliceIdx]);
iSumAv += iAvI[iSliceIdx];
++ iSliceIdx;
@@ -154,8 +154,10 @@
}
}
-int32_t NeedDynamicAdjust (void* pConsumeTime, const int32_t iSliceNum) {
- uint32_t* pSliceConsume = (uint32_t*)pConsumeTime;
+int32_t NeedDynamicAdjust (SSlice* pSliceInLayer, const int32_t iSliceNum) {
+ if ( NULL == pSliceInLayer )
+ return false;
+
uint32_t uiTotalConsume = 0;
int32_t iSliceIdx = 0;
int32_t iNeedAdj = false;
@@ -163,7 +165,7 @@
WelsEmms();
while (iSliceIdx < iSliceNum) {
- uiTotalConsume += pSliceConsume[iSliceIdx];
+ uiTotalConsume += pSliceInLayer[iSliceIdx].uiSliceConsumeTime;
iSliceIdx ++;
}
if (uiTotalConsume == 0) {
@@ -178,7 +180,7 @@
float fRmse = .0f; // root mean square error of pSlice consume ratios
const float kfMeanRatio = 1.0f / iSliceNum;
do {
- const float fRatio = 1.0f * pSliceConsume[iSliceIdx] / uiTotalConsume;
+ const float fRatio = 1.0f * pSliceInLayer[iSliceIdx].uiSliceConsumeTime / uiTotalConsume;
const float fDiffRatio = fRatio - kfMeanRatio;
fRmse += (fDiffRatio * fDiffRatio);
++ iSliceIdx;
@@ -370,12 +372,9 @@
&& (pPara->bUseLoadBalancing)
&& (pPara->iMultipleThreadIdc > 1)
&& (pPara->iMultipleThreadIdc >= kiSliceNum)) {
- pSmt->pSliceConsumeTime[iIdx] = (uint32_t*)pMa->WelsMallocz (kiSliceNum * sizeof (uint32_t), "pSliceConsumeTime[]");
- WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pSliceConsumeTime[iIdx]), FreeMemorySvc (ppCtx))
pSmt->pSliceComplexRatio[iIdx] = (int32_t*)pMa->WelsMalloc (kiSliceNum * sizeof (int32_t), "pSliceComplexRatio[]");
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pSliceComplexRatio[iIdx]), FreeMemorySvc (ppCtx))
} else {
- pSmt->pSliceConsumeTime[iIdx] = NULL;
pSmt->pSliceComplexRatio[iIdx] = NULL;
}
@@ -384,7 +383,6 @@
}
++ iIdx;
}
- // NULL for pSliceConsumeTime[iIdx]: iIdx from iNumSpatialLayers to MAX_DEPENDENCY_LAYERS
#ifdef MT_DEBUG
// file handle for MT debug
@@ -554,11 +552,7 @@
iIdx = 0;
while (iIdx < pCodingParam->iSpatialLayerNum) {
- if (pSmt->pSliceConsumeTime[iIdx]) {
- pMa->WelsFree (pSmt->pSliceConsumeTime[iIdx], "pSliceConsumeTime[]");
- pSmt->pSliceConsumeTime[iIdx] = NULL;
- }
- if (pSmt->pSliceComplexRatio[iIdx] != NULL) {
+ if (pSmt->pSliceComplexRatio[iIdx] != NULL) {
pMa->WelsFree (pSmt->pSliceComplexRatio[iIdx], "pSliceComplexRatio[]");
pSmt->pSliceComplexRatio[iIdx] = NULL;
}
@@ -814,12 +808,12 @@
pEncPEncCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (pCurDq, pEncPEncCtx->pFuncList, iSliceIdx);
if (bDsaFlag) {
- pEncPEncCtx->pSliceThreading->pSliceConsumeTime[pEncPEncCtx->uiDependencyId][iSliceIdx] = (uint32_t) (
+ pEncPEncCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx].uiSliceConsumeTime = (uint32_t) (
WelsTime() - iSliceStart);
MT_TRACE_LOG (& (pEncPEncCtx->sLogCtx), WELS_LOG_INFO,
- "[MT] CodingSliceThreadProc(), coding_idx %d, uiSliceIdx %d, pSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d",
+ "[MT] CodingSliceThreadProc(), coding_idx %d, uiSliceIdx %d, uiSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d",
pEncPEncCtx->iCodingIndex, iSliceIdx,
- pEncPEncCtx->pSliceThreading->pSliceConsumeTime[pEncPEncCtx->uiDependencyId][iSliceIdx], iSliceSize,
+ pEncPEncCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx].uiSliceConsumeTime, iSliceSize,
pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx].sSliceHeaderExt.sSliceHeader.iFirstMbInSlice,
pCurDq->sSliceEncCtx.pCountMbNumInSlice[iSliceIdx]);
}
@@ -1067,10 +1061,9 @@
#endif//MT_DEBUG
pCtx->pCurDqLayer = pCurDq;
- memcpy ((pCtx->pSliceThreading->pSliceConsumeTime[0]), (pCurDq->sSliceEncCtx.pSliceConsumeTime),
- pCurDq->sSliceEncCtx.iSliceNumInFrame * sizeof (uint32_t));
+
// do not need adjust due to not different at both slices of consumed time
- iNeedAdj = NeedDynamicAdjust (pCtx->pSliceThreading->pSliceConsumeTime[0], pCurDq->sSliceEncCtx.iSliceNumInFrame);
+ iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[0]->sLayerInfo.pSliceInLayer, pCurDq->sSliceEncCtx.iSliceNumInFrame);
if (iNeedAdj)
DynamicAdjustSlicing (pCtx,
pCurDq,
@@ -1100,12 +1093,10 @@
&& (pCtx->pSvcParam->sSpatialLayers[iCurDid - 1].sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE
&& pCtx->pSvcParam->iMultipleThreadIdc >= pCtx->pSvcParam->sSpatialLayers[iCurDid -
1].sSliceArgument.uiSliceNum);
- memcpy ((pCtx->pSliceThreading->pSliceConsumeTime[iCurDid]), (pCtx->pCurDqLayer->sSliceEncCtx.pSliceConsumeTime),
- pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame * sizeof (uint32_t));
if (kbModelingFromSpatial) { // using spatial base layer for complexity estimation
// do not need adjust due to not different at both slices of consumed time
- iNeedAdj = NeedDynamicAdjust (pCtx->pSliceThreading->pSliceConsumeTime[iCurDid - 1],
+ iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[iCurDid - 1]->sLayerInfo.pSliceInLayer,
pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame);
if (iNeedAdj)
DynamicAdjustSlicing (pCtx,
@@ -1115,7 +1106,7 @@
);
} else { // use temporal layer for complexity estimation
// do not need adjust due to not different at both slices of consumed time
- iNeedAdj = NeedDynamicAdjust (pCtx->pSliceThreading->pSliceConsumeTime[iCurDid],
+ iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[iCurDid]->sLayerInfo.pSliceInLayer,
pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame);
if (iNeedAdj)
DynamicAdjustSlicing (pCtx,
@@ -1163,11 +1154,11 @@
pPara = pCtx->pSvcParam;
while (iSpatialIdx < iSpatialNum) {
- const int32_t kiDid = pDidList[iSpatialIdx];
- SSpatialLayerInternal* pDlp = &pPara->sDependencyLayers[kiDid];
- SSliceConfig* pSliceArgument = &pDlp->sSliceArgument;
- SDqLayer* pCurDq = pCtx->ppDqLayerList[kiDid];
- SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx;
+ const int32_t kiDid = pDidList[iSpatialIdx];
+ SSliceConfig* pSliceArgument = &pPara->sDependencyLayers[kiDid].sSliceArgument;
+ SDqLayer* pCurDq = pCtx->ppDqLayerList[kiDid];
+ SSlice* pSliceInLayer = pCurDq->sLayerInfo.pSliceInLayer;
+ SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx;
const uint32_t kuiCountSliceNum = pSliceCtx->iSliceNumInFrame;
if (pCtx->pSliceThreading) {
if (pCtx->pSliceThreading->pFSliceDiff
@@ -1178,11 +1169,11 @@
uint32_t uiMaxT = 0;
int32_t iMaxI = 0;
while (i < kuiCountSliceNum) {
- if (pCtx->pSliceThreading->pSliceConsumeTime[kiDid] != NULL)
+ if (pSliceInLayer[i] != NULL)
fprintf (pCtx->pSliceThreading->pFSliceDiff, "%6d us consume_time coding_idx %d iDid %d pSlice %d\n",
- pCtx->pSliceThreading->pSliceConsumeTime[kiDid][i], pCtx->iCodingIndex, kiDid, i /*/ 1000*/);
- if (pCtx->pSliceThreading->pSliceConsumeTime[kiDid][i] > uiMaxT) {
- uiMaxT = pCtx->pSliceThreading->pSliceConsumeTime[kiDid][i];
+ pSliceInLayer[i].uiSliceConsumeTime, pCtx->iCodingIndex, kiDid, i /*/ 1000*/);
+ if (pSliceInLayer[i].uiSliceConsumeTime > uiMaxT) {
+ uiMaxT = pSliceInLayer[i].uiSliceConsumeTime;
iMaxI = i;
}
++ i;
--- a/codec/encoder/core/src/svc_enc_slice_segment.cpp
+++ b/codec/encoder/core/src/svc_enc_slice_segment.cpp
@@ -388,11 +388,7 @@
pSliceSeg->pCountMbNumInSlice = NULL;
}
- if (NULL != pSliceSeg->pSliceConsumeTime) {
- pMa->WelsFree (pSliceSeg->pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime");
- pSliceSeg->pSliceConsumeTime = NULL;
- }
// just for safe
pSliceSeg->iSliceNumInFrame = 0;
pSliceSeg->iMbNumInFrame = 0;
@@ -411,7 +407,6 @@
"pSliceSeg->pCountMbNumInSlice");
WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pCountMbNumInSlice)
- pSliceSeg->pSliceConsumeTime = NULL;
pSliceSeg->uiSliceMode = uiSliceMode;
pSliceSeg->iMbWidth = kiMbWidth;
pSliceSeg->iMbHeight = kiMbHeight;
@@ -438,10 +433,6 @@
"pSliceSeg->pCountMbNumInSlice");
WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pCountMbNumInSlice)
- pSliceSeg->pSliceConsumeTime = (uint32_t*)pMa->WelsMalloc (pSliceSeg->iSliceNumInFrame * sizeof (uint32_t),
- "pSliceSeg->pSliceConsumeTime");
- WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pSliceConsumeTime)
-
pSliceSeg->uiSliceMode = pSliceArgument->uiSliceMode;
pSliceSeg->iMbWidth = kiMbWidth;
@@ -488,11 +479,6 @@
pMa->WelsFree (pSliceSeg->pCountMbNumInSlice, "pSliceSeg->pCountMbNumInSlice");
pSliceSeg->pCountMbNumInSlice = NULL;
- }
- if (NULL != pSliceSeg->pSliceConsumeTime) {
- pMa->WelsFree (pSliceSeg->pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime");
-
- pSliceSeg->pSliceConsumeTime = NULL;
}
pSliceSeg->iMbNumInFrame = 0;
--- a/codec/encoder/core/src/wels_task_encoder.cpp
+++ b/codec/encoder/core/src/wels_task_encoder.cpp
@@ -216,12 +216,12 @@
void CWelsLoadBalancingSlicingEncodingTask::FinishTask() {
CWelsSliceEncodingTask::FinishTask();
- m_pCtx->pCurDqLayer->sSliceEncCtx.pSliceConsumeTime[m_iSliceIdx] = (uint32_t) (WelsTime() - m_iSliceStart);
+ m_pSlice->uiSliceConsumeTime = (uint32_t) (WelsTime() - m_iSliceStart);
WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DEBUG,
- "[MT] CWelsLoadBalancingSlicingEncodingTask()FinishTask, coding_idx %d, um_iSliceIdx %d, pSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d",
+ "[MT] CWelsLoadBalancingSlicingEncodingTask()FinishTask, coding_idx %d, um_iSliceIdx %d, uiSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d",
m_pCtx->iCodingIndex,
m_iSliceIdx,
- m_pCtx->pCurDqLayer->sSliceEncCtx.pSliceConsumeTime[m_iSliceIdx],
+ m_pSlice->uiSliceConsumeTime,
m_iSliceSize,
m_pCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[m_iSliceIdx].sSliceHeaderExt.sSliceHeader.iFirstMbInSlice,
m_pCtx->pCurDqLayer->sSliceEncCtx.pCountMbNumInSlice[m_iSliceIdx]);