ref: 76ca56498ad3423ff5ad11397c7c6760b4f74194
parent: 76b428a453bb44a8400b383d77f2702e31de52ff
author: sijchen <sijchen@cisco.com>
date: Wed Dec 9 04:55:04 EST 2015
Add tasks and thread pool call for SM_SIZELIMITED_SLICE mode
--- a/codec/encoder/core/inc/wels_const.h
+++ b/codec/encoder/core/inc/wels_const.h
@@ -166,7 +166,8 @@
ENC_RETURN_CORRECTED = 0x08, //unexpected value but corrected by encoder
ENC_RETURN_INVALIDINPUT = 0x10, //invalid input
ENC_RETURN_MEMOVERFLOWFOUND = 0x20,
- ENC_RETURN_VLCOVERFLOWFOUND = 0x40
+ ENC_RETURN_VLCOVERFLOWFOUND = 0x40,
+ ENC_RETURN_KNOWN_ISSUE = 0x80
};
//TODO: need to complete the return checking in encoder and fill in more types if needed
--- a/codec/encoder/core/inc/wels_task_encoder.h
+++ b/codec/encoder/core/inc/wels_task_encoder.h
@@ -92,7 +92,8 @@
class CWelsLoadBalancingSlicingEncodingTask : public CWelsSliceEncodingTask {
public:
- CWelsLoadBalancingSlicingEncodingTask(sWelsEncCtx* pCtx, const int32_t iSliceIdx) : CWelsSliceEncodingTask (pCtx, iSliceIdx) {
+ CWelsLoadBalancingSlicingEncodingTask (sWelsEncCtx* pCtx, const int32_t iSliceIdx) : CWelsSliceEncodingTask (pCtx,
+ iSliceIdx) {
};
virtual WelsErrorType InitTask();
@@ -105,24 +106,21 @@
int64_t m_iSliceStart;
};
-/*
-class CWelsConstrainedSizeSlicingEncodingTask : public CWelsSliceEncodingTask {
+
+class CWelsConstrainedSizeSlicingEncodingTask : public CWelsLoadBalancingSlicingEncodingTask {
public:
- CWelsConstrainedSizeSlicingEncodingTask (sWelsEncCtx* pCtx);
- ~CWelsConstrainedSizeSlicingEncodingTask();
+ CWelsConstrainedSizeSlicingEncodingTask (sWelsEncCtx* pCtx,
+ const int32_t iSliceIdx) : CWelsLoadBalancingSlicingEncodingTask (pCtx, iSliceIdx) {
+ };
- virtual WelsErrorType Execute();
+ virtual WelsErrorType ExecuteTask();
- WelsErrorType InitTask ();
- virtual void FinishTask();
-
virtual uint32_t GetTaskType() const {
return WELS_ENC_TASK_ENCODE_SLICE_SIZECONSTRAINED;
}
- protected:
};
-*/
+
class CWelsUpdateMbMapTask : public CWelsBaseTask {
public:
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -3975,7 +3975,6 @@
WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)
} else {
//other multi-slice uiSliceMode
- int32_t iRet = 0;
// THREAD_FULLY_FIRE_MODE/THREAD_PICK_UP_MODE for any mode of non-SM_SIZELIMITED_SLICE
if ((SM_SIZELIMITED_SLICE != pParam->sSliceArgument.uiSliceMode) && (pSvcParam->iMultipleThreadIdc > 1)) {
iSliceCount = GetCurrentSliceNum (pCtx->pCurDqLayer);
@@ -4000,7 +3999,6 @@
pLayerBsInfo->iNalCount = 0;
pCtx->pTaskManage->ExecuteTasks();
- iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, iSliceCount);
if (pCtx->iEncoderError) {
WelsLog (pLogCtx, WELS_LOG_ERROR,
"WelsEncoderEncodeExt(), multi-slice (mode %d) encoding error!",
@@ -4007,11 +4005,15 @@
pParam->sSliceArgument.uiSliceMode);
return pCtx->iEncoderError;
}
+
+ iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, iSliceCount);
}
// THREAD_FULLY_FIRE_MODE && SM_SIZELIMITED_SLICE
else if ((SM_SIZELIMITED_SLICE == pParam->sSliceArgument.uiSliceMode) && (pSvcParam->iMultipleThreadIdc > 1)) {
const int32_t kiPartitionCnt = pCtx->iActiveThreadsNum; //pSvcParam->iCountThreadsNum;
+#if 0 //TODO: temporarily use this to keep old codes for a while, will remove old codes later
+ int32_t iRet = 0;
// to fire slice coding threads
iRet = FiredSliceThreads (pCtx, &pCtx->pSliceThreading->pThreadPEncCtx[0],
&pCtx->pSliceThreading->pReadySliceCodingEvent[0],
@@ -4027,6 +4029,41 @@
WelsMultipleEventsWaitAllBlocking (kiPartitionCnt, &pCtx->pSliceThreading->pSliceCodedEvent[0],
&pCtx->pSliceThreading->pSliceCodedMasterEvent);
WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)
+#else
+ int32_t iEndMbIdx = pCtx->pCurDqLayer->sSliceEncCtx.iMbNumInFrame;
+ for (int32_t iIdx = kiPartitionCnt - 1; iIdx >= 0; --iIdx) {
+ const int32_t iFirstMbIdx = pCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[iIdx].sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
+ pCtx->pSliceThreading->pThreadPEncCtx[iIdx].iStartMbIndex = iFirstMbIdx;
+ pCtx->pSliceThreading->pThreadPEncCtx[iIdx].iEndMbIndex = iEndMbIdx;
+ iEndMbIdx = iFirstMbIdx;
+ }
+
+ //TODO: use a function to remove duplicate code here and ln3994
+ int32_t iLayerBsIdx = pCtx->pOut->iLayerBsIndex;
+ SLayerBSInfo* pLbi = &pFbi->sLayerInfo[iLayerBsIdx];
+ pLbi->pBsBuf = pCtx->pFrameBs + pCtx->iPosBsBuffer;
+ pLbi->uiLayerType = VIDEO_CODING_LAYER;
+ pLbi->uiSpatialId = pCtx->uiDependencyId;
+ pLbi->uiTemporalId = pCtx->uiTemporalId;
+ pLbi->uiQualityId = 0;
+ pLbi->iNalCount = 0;
+
+ int32_t iIdx = 0;
+ while (iIdx < kiPartitionCnt) {
+ pCtx->pSliceThreading->pThreadPEncCtx[iIdx].pFrameBsInfo = pFbi;
+ pCtx->pSliceThreading->pThreadPEncCtx[iIdx].iSliceIndex = iIdx;
+ SetOneSliceBsBufferUnderMultithread (pCtx, iIdx, iIdx);
+ ++ iIdx;
+ }
+ pCtx->pTaskManage->ExecuteTasks();
+
+ if (pCtx->iEncoderError) {
+ WelsLog (pLogCtx, WELS_LOG_ERROR,
+ "WelsEncoderEncodeExt(), multi-slice (mode %d) encoding error = %d!",
+ pParam->sSliceArgument.uiSliceMode, pCtx->iEncoderError);
+ return pCtx->iEncoderError;
+ }
+#endif
iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, kiPartitionCnt);
} else { // for non-dynamic-slicing mode single threading branch..
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -319,7 +319,7 @@
iIdx = 0;
while (iIdx < iNumSpatialLayers) {
SSliceArgument* pSliceArgument = &pPara->sSpatialLayers[iIdx].sSliceArgument;
- if (pSliceArgument->uiSliceMode == SM_FIXEDSLCNUM_SLICE || pSliceArgument->uiSliceMode == SM_RASTER_SLICE) {
+ if (pSliceArgument->uiSliceMode == SM_FIXEDSLCNUM_SLICE || pSliceArgument->uiSliceMode == SM_RASTER_SLICE || pSliceArgument->uiSliceMode == SM_SIZELIMITED_SLICE) {
bWillUseTaskManage = true;
}
++ iIdx;
@@ -763,12 +763,15 @@
pCurDq->pLastMbIdxOfPartition[kiPartitionId] = kiEndMbInPartition - 1;
pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId] = 0;
-
while (iAnyMbLeftInPartition > 0) {
if (iSliceIdx >= pSliceCtx->iMaxSliceNumConstraint) {
// TODO: need exception handler for not large enough of MAX_SLICES_NUM related memory usage
// No idea about its solution due MAX_SLICES_NUM is fixed lenght in relevent pData structure
uiThrdRet = 1;
+ WelsLog (&pEncPEncCtx->sLogCtx, WELS_LOG_WARNING,
+ "[MT] CodingSliceThreadProc Too many slices: coding_idx %d, iSliceIdx %d, pSliceCtx->iMaxSliceNumConstraint %d",
+ pEncPEncCtx->iCodingIndex,
+ iSliceIdx, pSliceCtx->iMaxSliceNumConstraint);
WELS_THREAD_SIGNAL_AND_BREAK (pEncPEncCtx->pSliceThreading->pSliceCodedEvent,
pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent,
iEventIdx);
@@ -810,6 +813,11 @@
iReturn = WriteSliceBs (pEncPEncCtx, pSliceBs, iSliceIdx, iSliceSize);
if (ENC_RETURN_SUCCESS != iReturn) {
uiThrdRet = iReturn;
+ WelsLog (&pEncPEncCtx->sLogCtx, WELS_LOG_WARNING,
+ "[MT] CodingSliceThreadProc, WriteSliceBs not successful: coding_idx %d, iSliceIdx %d, BufferSize %d, m_iSliceSize %d, iPayloadSize %d",
+ pEncPEncCtx->iCodingIndex,
+ iSliceIdx, pSliceBs->uiSize, iSliceSize, pSliceBs->sNalList[0].iPayloadSize);
+
WELS_THREAD_SIGNAL_AND_BREAK (pEncPEncCtx->pSliceThreading->pSliceCodedEvent,
pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent,
iEventIdx);
--- a/codec/encoder/core/src/wels_task_encoder.cpp
+++ b/codec/encoder/core/src/wels_task_encoder.cpp
@@ -55,7 +55,8 @@
namespace WelsEnc {
-CWelsSliceEncodingTask::CWelsSliceEncodingTask (sWelsEncCtx* pCtx, const int32_t iSliceIdx) : m_eTaskResult(ENC_RETURN_SUCCESS) {
+CWelsSliceEncodingTask::CWelsSliceEncodingTask (sWelsEncCtx* pCtx,
+ const int32_t iSliceIdx) : m_eTaskResult (ENC_RETURN_SUCCESS) {
m_pCtx = pCtx;
m_iSliceIdx = iSliceIdx;
}
@@ -211,7 +212,7 @@
m_pSlice->uiSliceConsumeTime = (uint32_t) (WelsTime() - m_iSliceStart);
WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DEBUG,
- "[MT] CWelsLoadBalancingSlicingEncodingTask()FinishTask, coding_idx %d, um_iSliceIdx %d, uiSliceConsumeTime %d, iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d",
+ "[MT] CWelsLoadBalancingSlicingEncodingTask()FinishTask, coding_idx %d, um_iSliceIdx %d, uiSliceConsumeTime %d, m_iSliceSize %d, iFirstMbInSlice %d, count_num_mb_in_slice %d",
m_pCtx->iCodingIndex,
m_iSliceIdx,
m_pSlice->uiSliceConsumeTime,
@@ -218,6 +219,99 @@
m_iSliceSize,
m_pCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[m_iSliceIdx].sSliceHeaderExt.sSliceHeader.iFirstMbInSlice,
m_pSlice->iCountMbNumInSlice);
+}
+
+//CWelsConstrainedSizeSlicingEncodingTask
+WelsErrorType CWelsConstrainedSizeSlicingEncodingTask::ExecuteTask() {
+
+ SDqLayer* pCurDq = m_pCtx->pCurDqLayer;
+
+ SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx;
+ const int32_t kiSliceIdxStep = m_pCtx->iActiveThreadsNum;
+
+
+ SSliceHeaderExt* pStartSliceHeaderExt = &pCurDq->sLayerInfo.pSliceInLayer[m_iSliceIdx].sSliceHeaderExt;
+
+ //deal with partition: TODO: here SSliceThreadPrivateData is just for parition info and actually has little relationship with threadbuffer, and iThreadIndex is not used in threadpool model, need renaming after removing old logic to avoid confusion
+ const int32_t kiPartitionId = m_iSliceIdx%kiSliceIdxStep;
+ SSliceThreadPrivateData* pPrivateData = & (m_pCtx->pSliceThreading->pThreadPEncCtx[kiPartitionId]);
+ const int32_t kiFirstMbInPartition = pPrivateData->iStartMbIndex; // inclusive
+ const int32_t kiEndMbInPartition = pPrivateData->iEndMbIndex; // exclusive
+ pStartSliceHeaderExt->sSliceHeader.iFirstMbInSlice = kiFirstMbInPartition;
+ pCurDq->pNumSliceCodedOfPartition[kiPartitionId] =
+ 1; // one pSlice per partition intialized, dynamic slicing inside
+ pCurDq->pLastMbIdxOfPartition[kiPartitionId] = kiEndMbInPartition - 1;
+
+ pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId] = 0;
+ //end of deal with partition
+
+ int32_t iAnyMbLeftInPartition = kiEndMbInPartition - kiFirstMbInPartition;
+ int32_t iLocalSliceIdx = m_iSliceIdx;
+ while (iAnyMbLeftInPartition > 0) {
+ if (iLocalSliceIdx >= pSliceCtx->iMaxSliceNumConstraint) {
+ WelsLog (&m_pCtx->sLogCtx, WELS_LOG_WARNING,
+ "[MT] CWelsConstrainedSizeSlicingEncodingTask ExecuteTask() coding_idx %d, uiLocalSliceIdx %d, pSliceCtx->iMaxSliceNumConstraint %d",
+ m_pCtx->iCodingIndex,
+ iLocalSliceIdx, pSliceCtx->iMaxSliceNumConstraint);
+ return ENC_RETURN_KNOWN_ISSUE;
+ }
+
+ SetOneSliceBsBufferUnderMultithread (m_pCtx, m_iThreadIdx, iLocalSliceIdx);
+ m_pSlice = &pCurDq->sLayerInfo.pSliceInLayer[iLocalSliceIdx];
+ m_pSliceBs = &m_pCtx->pSliceBs[iLocalSliceIdx];
+
+ m_pSliceBs->uiBsPos = 0;
+ m_pSliceBs->iNalIndex = 0;
+ InitBits (&m_pSliceBs->sBsWrite, m_pSliceBs->pBsBuffer, m_pSliceBs->uiSize);
+
+ if (m_bNeedPrefix) {
+ if (m_eNalRefIdc != NRI_PRI_LOWEST) {
+ WelsLoadNalForSlice (m_pSliceBs, NAL_UNIT_PREFIX, m_eNalRefIdc);
+ WelsWriteSVCPrefixNal (&m_pSliceBs->sBsWrite, m_eNalRefIdc, (NAL_UNIT_CODED_SLICE_IDR == m_eNalType));
+ WelsUnloadNalForSlice (m_pSliceBs);
+ } else { // No Prefix NAL Unit RBSP syntax here, but need add NAL Unit Header extension
+ WelsLoadNalForSlice (m_pSliceBs, NAL_UNIT_PREFIX, m_eNalRefIdc);
+ // No need write any syntax of prefix NAL Unit RBSP here
+ WelsUnloadNalForSlice (m_pSliceBs);
+ }
+ }
+
+ WelsLoadNalForSlice (m_pSliceBs, m_eNalType, m_eNalRefIdc);
+ int32_t iReturn = WelsCodeOneSlice (m_pCtx, iLocalSliceIdx, m_eNalType);
+ if (ENC_RETURN_SUCCESS != iReturn) {
+ return iReturn;
+ }
+ WelsUnloadNalForSlice (m_pSliceBs);
+
+ iReturn = WriteSliceBs (m_pCtx, m_pSliceBs, iLocalSliceIdx, m_iSliceSize);
+ if (ENC_RETURN_SUCCESS != iReturn) {
+ WelsLog (&m_pCtx->sLogCtx, WELS_LOG_WARNING,
+ "[MT] CWelsConstrainedSizeSlicingEncodingTask ExecuteTask(), WriteSliceBs not successful: coding_idx %d, uiLocalSliceIdx %d, BufferSize %d, m_iSliceSize %d, iPayloadSize %d",
+ m_pCtx->iCodingIndex,
+ iLocalSliceIdx, m_pSliceBs->uiSize, m_iSliceSize, m_pSliceBs->sNalList[0].iPayloadSize);
+ return iReturn;
+ }
+
+ m_pCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (pCurDq, m_pCtx->pFuncList, iLocalSliceIdx);
+
+ WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DETAIL,
+ "@pSlice=%-6d sliceType:%c idc:%d size:%-6d\n",
+ iLocalSliceIdx,
+ (m_pCtx->eSliceType == P_SLICE ? 'P' : 'I'),
+ m_eNalRefIdc,
+ m_iSliceSize
+ );
+
+ WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DEBUG,
+ "[MT] CWelsConstrainedSizeSlicingEncodingTask(), coding_idx %d, iPartitionId %d, m_iThreadIdx %d, iLocalSliceIdx %d, m_iSliceSize %d, ParamValidationExt(), invalid uiMaxNalSizeiEndMbInPartition %d, pCurDq->pLastCodedMbIdxOfPartition[%d] %d\n",
+ m_pCtx->iCodingIndex, kiPartitionId, m_iThreadIdx, iLocalSliceIdx, m_iSliceSize,
+ kiEndMbInPartition, kiPartitionId, pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]);
+
+ iAnyMbLeftInPartition = kiEndMbInPartition - (1 + pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]);
+ iLocalSliceIdx += kiSliceIdxStep;
+ }
+
+ return ENC_RETURN_SUCCESS;
}
--- a/codec/encoder/core/src/wels_task_management.cpp
+++ b/codec/encoder/core/src/wels_task_management.cpp
@@ -123,8 +123,9 @@
WelsErrorType CWelsTaskManageBase::CreateTasks (sWelsEncCtx* pEncCtx, const int32_t kiCurDid) {
CWelsBaseTask* pTask = NULL;
int32_t kiTaskCount;
+ uint32_t uiSliceMode = pEncCtx->pSvcParam->sSpatialLayers[0].sSliceArgument.uiSliceMode;
- if (pEncCtx->pSvcParam->sSpatialLayers[0].sSliceArgument.uiSliceMode != SM_SIZELIMITED_SLICE) {
+ if (uiSliceMode != SM_SIZELIMITED_SLICE) {
kiTaskCount = m_iTaskNum[kiCurDid] = pEncCtx->pSvcParam->sSpatialLayers[kiCurDid].sSliceArgument.uiSliceNum;
} else {
kiTaskCount = m_iTaskNum[kiCurDid] = pEncCtx->iActiveThreadsNum;
@@ -137,10 +138,14 @@
}
for (int idx = 0; idx < kiTaskCount; idx++) {
+ if (uiSliceMode==SM_SIZELIMITED_SLICE) {
+ pTask = WELS_NEW_OP (CWelsConstrainedSizeSlicingEncodingTask (pEncCtx, idx), CWelsConstrainedSizeSlicingEncodingTask);
+ } else {
if (pEncCtx->pSvcParam->bUseLoadBalancing) {
pTask = WELS_NEW_OP (CWelsLoadBalancingSlicingEncodingTask (pEncCtx, idx), CWelsLoadBalancingSlicingEncodingTask);
} else {
pTask = WELS_NEW_OP (CWelsSliceEncodingTask (pEncCtx, idx), CWelsSliceEncodingTask);
+ }
}
WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pTask)
m_cEncodingTaskList[kiCurDid]->push_back (pTask);
--- a/test/api/encode_options_test.cpp
+++ b/test/api/encode_options_test.cpp
@@ -1443,11 +1443,12 @@
SEncParamExt sParam;
encoder_->GetDefaultParams (&sParam);
prepareParamDefault (iSpatialLayerNum, 1, iWidth, iHeight, fFrameRate, &sParam);
- sParam.iMultipleThreadIdc = (rand() % 4) + 1;
+ sParam.iMultipleThreadIdc = (rand() % 2) ? 4 : ((rand() % 4) + 1);
sParam.bSimulcastAVC = 1;
sParam.sSpatialLayers[0].iVideoWidth = (iWidth >> 2);
sParam.sSpatialLayers[0].iVideoHeight = (iHeight >> 2);
- sParam.sSpatialLayers[0].sSliceArgument.uiSliceMode = SM_RASTER_SLICE;
+ sParam.sSpatialLayers[0].sSliceArgument.uiSliceMode = SM_SIZELIMITED_SLICE;
+ sParam.sSpatialLayers[0].sSliceArgument.uiSliceSizeConstraint = 1500;
sParam.sSpatialLayers[1].iVideoWidth = iWidth;
sParam.sSpatialLayers[1].iVideoHeight = iHeight;