ref: 100e9522316a1103550b33f2af821f1a78c22bc7
parent: 1b0735c3a99e2515ef648e8f6b0299a446b684fa
parent: f161566458447a169ca83253121011c40ce12092
author: sijchen <sijchen@cisco.com>
date: Thu Dec 17 07:02:00 EST 2015
Merge pull request #2314 from shihuade/MultiThread_V4.5_SliceBsRefact_V1 remove pSliceBs from ctx
--- a/codec/encoder/core/inc/encoder_context.h
+++ b/codec/encoder/core/inc/encoder_context.h
@@ -114,7 +114,6 @@
SLogContext sLogCtx;
// Input
SWelsSvcCodingParam* pSvcParam; // SVC parameter, WelsSVCParamConfig in svc_param_settings.h
- SWelsSliceBs* pSliceBs; // bitstream buffering for various slices, [uiSliceIdx]
int32_t* pSadCostMb;
/* MVD cost tables for Inter MB */
@@ -199,6 +198,7 @@
int32_t iPosBsBuffer; // current writing position of frame bs pBuffer
SSpatialPicIndex sSpatialIndexMap[MAX_DEPENDENCY_LAYER];
+ int32_t iSliceBufferSize[MAX_DEPENDENCY_LAYER];
bool bRefOfCurTidIsLtr[MAX_DEPENDENCY_LAYER][MAX_TEMPORAL_LEVEL];
uint16_t uiIdrPicId; // IDR picture id: [0, 65535], this one is used for LTR
--- a/codec/encoder/core/inc/slice.h
+++ b/codec/encoder/core/inc/slice.h
@@ -42,6 +42,7 @@
#include "parameter_sets.h"
#include "svc_enc_slice_segment.h"
#include "set_mb_syn_cabac.h"
+#include "nal_encap.h"
namespace WelsEnc {
@@ -157,6 +158,7 @@
// mainly for multiple threads imp.
SMbCache sMbCacheInfo; // MBCache is introduced within slice dependency
SBitStringAux* pSliceBsa;
+SWelsSliceBs sSliceBs;
/*******************************sSliceHeader****************************/
SSliceHeaderExt sSliceHeaderExt;
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -1095,15 +1095,37 @@
}
static inline int32_t InitpSliceInLayer (sWelsEncCtx** ppCtx, SDqLayer* pDqLayer, CMemoryAlign* pMa,
- const int32_t iMaxSliceNum, bool bMultithread) {
- int32_t iSliceIdx = 0;
+ const int32_t iMaxSliceNum, const int32_t kiDlayerIndex) {
+ int32_t iMaxSliceBufferSize = (*ppCtx)->iSliceBufferSize[kiDlayerIndex];
+ int32_t iSliceIdx = 0;
+ SliceModeEnum uiSliceMode = (*ppCtx)->pSvcParam->sSpatialLayers[kiDlayerIndex].sSliceArgument.uiSliceMode;
+
+ //SM_SINGLE_SLICE mode using single-thread bs writer pOut->sBsWrite
+ //even though multi-thread is on for other layers
+ bool bIndependenceBsBuffer = ((*ppCtx)->pSvcParam->iMultipleThreadIdc > 1 &&
+ SM_SINGLE_SLICE != uiSliceMode) ? true : false;
+
+ if ( iMaxSliceBufferSize <= 0) {
+ return ENC_RETURN_UNEXPECTED;
+ }
+
while (iSliceIdx < iMaxSliceNum) {
SSlice* pSlice = &pDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx];
- pSlice->uiSliceIdx = iSliceIdx;
- if (bMultithread)
- pSlice->pSliceBsa = & (*ppCtx)->pSliceBs[iSliceIdx].sBsWrite;
- else
- pSlice->pSliceBsa = & (*ppCtx)->pOut->sBsWrite;
+
+ pSlice->uiSliceIdx = iSliceIdx;
+ pSlice->sSliceBs.uiSize = iMaxSliceBufferSize;
+ pSlice->sSliceBs.uiBsPos = 0;
+ if (bIndependenceBsBuffer){
+ pSlice->pSliceBsa = &pSlice->sSliceBs.sBsWrite;
+ pSlice->sSliceBs.pBs = (uint8_t*)pMa->WelsMalloc (iMaxSliceBufferSize, "SliceBs");
+ if ( NULL == pSlice->sSliceBs.pBs) {
+ return ENC_RETURN_MEMALLOCERR;
+ }
+ } else {
+ pSlice->pSliceBsa = & (*ppCtx)->pOut->sBsWrite;
+ pSlice->sSliceBs.pBs = NULL;
+ }
+
if (AllocMbCacheAligned (&pSlice->sMbCacheInfo, pMa)) {
FreeMemorySvc (ppCtx);
return ENC_RETURN_MEMALLOCERR;
@@ -1211,7 +1233,7 @@
pDqLayer->sLayerInfo.pSliceInLayer = (SSlice*)pMa->WelsMallocz (sizeof (SSlice) * iMaxSliceNum, "pSliceInLayer");
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pDqLayer->sLayerInfo.pSliceInLayer), FreeMemorySvc (ppCtx))
- int32_t iReturn = InitpSliceInLayer (ppCtx, pDqLayer, pMa, iMaxSliceNum, pParam->iMultipleThreadIdc > 1);
+ int32_t iReturn = InitpSliceInLayer (ppCtx, pDqLayer, pMa, iMaxSliceNum, iDlayerIndex);
WELS_VERIFY_RETURN_PROC_IF (1, (ENC_RETURN_SUCCESS != iReturn), FreeMemorySvc (ppCtx))
}
@@ -1809,8 +1831,8 @@
(*ppCtx)->iMaxSliceCount = WELS_MAX ((*ppCtx)->iMaxSliceCount, (int) pSliceArgument->uiSliceNum);
iSliceBufferSize = ((iLayerBsSize / pSliceArgument->uiSliceNum)<<1) + MAX_MACROBLOCK_SIZE_IN_BYTE_x2;
}
- iMaxSliceBufferSize = WELS_MAX(iMaxSliceBufferSize, iSliceBufferSize);
-
+ iMaxSliceBufferSize = WELS_MAX(iMaxSliceBufferSize, iSliceBufferSize);
+ (*ppCtx)->iSliceBufferSize[iIndex] = iSliceBufferSize;
++ iIndex;
}
iTargetSpatialBsSize = iLayerBsSize;
@@ -1817,8 +1839,7 @@
iCountBsLen = iNonVclLayersBsSizeCount + iVclLayersBsSizeCount;
iMaxSliceBufferSize = WELS_MIN (iMaxSliceBufferSize, iTargetSpatialBsSize);
- iTotalLength = (pParam->iMultipleThreadIdc == 1) ? iCountBsLen : (iCountBsLen + (*ppCtx)->iMaxSliceCount *
- iMaxSliceBufferSize);
+ iTotalLength = iCountBsLen;
pParam->iNumRefFrame = WELS_CLIP3 (pParam->iNumRefFrame, MIN_REF_PIC_COUNT,
(pParam->iUsageType == CAMERA_VIDEO_REAL_TIME ? MAX_REFERENCE_PICTURE_COUNT_NUM_CAMERA :
@@ -2125,6 +2146,12 @@
while (iSliceIdx < iSliceNum) {
SSlice* pSlice = &pDq->sLayerInfo.pSliceInLayer[iSliceIdx];
FreeMbCache (&pSlice->sMbCacheInfo, pMa);
+
+ //slice bs buffer
+ if(NULL != pSlice->sSliceBs.pBs) {
+ pMa->WelsFree(pSlice->sSliceBs.pBs,"sSliceBs.pBs");
+ pSlice->sSliceBs.pBs = NULL;
+ }
++ iSliceIdx;
}
pMa->WelsFree (pDq->sLayerInfo.pSliceInLayer, "pSliceInLayer");
@@ -3896,11 +3923,6 @@
WelsLoadNal (pCtx->pOut, eNalType, eNalRefIdc);
- //the following line is to fix a problem with a specific setting as in test DiffSlicingInDlayerMixed:
- // (multi-th on with SM_SINGLE_SLICE in one of the D layers)
- //TODO: this may not be needed any more after the slice buffer refactoring
- pCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[0].pSliceBsa = &(pCtx->pOut->sBsWrite);
-
pCtx->iEncoderError = WelsCodeOneSlice (pCtx, 0, eNalType);
WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)
@@ -4734,7 +4756,7 @@
SSliceHeaderExt* pSHExt = &pSliceIdx->sSliceHeaderExt;
pSliceIdx->uiSliceIdx = uiSliceIdx;
if (pCtx->pSvcParam->iMultipleThreadIdc > 1)
- pSliceIdx->pSliceBsa = &pCtx->pSliceBs[uiSliceIdx].sBsWrite;
+ pSliceIdx->pSliceBsa = &pSliceIdx->sSliceBs.sBsWrite;
else
pSliceIdx->pSliceBsa = &pCtx->pOut->sBsWrite;
if (AllocMbCacheAligned (&pSliceIdx->sMbCacheInfo, pMA)) {
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -248,39 +248,6 @@
pCurDqLayer->bNeedAdjustingSlicing = !DynamicAdjustSlicePEncCtxAll (pCurDqLayer, iRunLen);
}
-int32_t SetMultiSliceBuffer (sWelsEncCtx** ppCtx, CMemoryAlign* pMa, SSliceThreading* pSmt,
- int32_t iMaxSliceNum, int32_t iSlice1Len, int32_t iSlice0Len, bool bDynamicSlice) {
- (*ppCtx)->pSliceBs = (SWelsSliceBs*)pMa->WelsMalloc (sizeof (SWelsSliceBs) * iMaxSliceNum, "pSliceBs");
- if (NULL == (*ppCtx)->pSliceBs) {
- return ENC_RETURN_MEMALLOCERR;
- }
-
- if (iSlice0Len <= 0) {
- return ENC_RETURN_UNEXPECTED;
- }
- //slice 0
- (*ppCtx)->pSliceBs[0].uiSize = iSlice1Len;
- (*ppCtx)->pSliceBs[0].pBs = (*ppCtx)->pFrameBs + iSlice0Len;
- (*ppCtx)->pSliceBs[0].uiBsPos = 0;
- (*ppCtx)->pSliceBs[0].pBsBuffer = pSmt->pThreadBsBuffer[0];
- if ((iMaxSliceNum == 1) && (!bDynamicSlice)) {
- return ENC_RETURN_SUCCESS;
- }
- //slice >0
- if (iSlice1Len <= 0) {
- return ENC_RETURN_UNEXPECTED;
- }
- if ((*ppCtx)->iFrameBsSize < (iSlice0Len + (iMaxSliceNum - 1)*iSlice1Len)) {
- return ENC_RETURN_MEMALLOCERR;
- }
- for (int32_t k = 1; k < iMaxSliceNum; k++) {
- (*ppCtx)->pSliceBs[k].uiSize = iSlice1Len;
- (*ppCtx)->pSliceBs[k].pBs = (*ppCtx)->pSliceBs[k - 1].pBs + (*ppCtx)->pSliceBs[k - 1].uiSize;
- }
- return ENC_RETURN_SUCCESS;
-
-}
-
int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingParam, const int32_t iCountBsLen,
const int32_t iMaxSliceBufferSize, bool bDynamicSlice) {
CMemoryAlign* pMa = NULL;
@@ -289,7 +256,6 @@
int32_t iNumSpatialLayers = 0;
int32_t iThreadNum = 0;
int32_t iIdx = 0;
- int16_t iMaxSliceNum = 1;
int32_t iReturn = ENC_RETURN_SUCCESS;
bool bWillUseTaskManage = false;
@@ -300,7 +266,6 @@
pPara = pCodingParam;
iNumSpatialLayers = pPara->iSpatialLayerNum;
iThreadNum = pPara->iMultipleThreadIdc;
- iMaxSliceNum = (*ppCtx)->iMaxSliceCount;
pSmt = (SSliceThreading*)pMa->WelsMalloc (sizeof (SSliceThreading), "SSliceThreading");
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt), FreeMemorySvc (ppCtx))
@@ -385,12 +350,6 @@
MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pSliceCodedMasterEvent named(%s) ret%d err%d", name, err, errno);
//previous conflict ends
- iReturn = SetMultiSliceBuffer (ppCtx, pMa, pSmt, iMaxSliceNum,
- iMaxSliceBufferSize,
- iCountBsLen,
- bDynamicSlice);
- WELS_VERIFY_RETURN_PROC_IF (iReturn, (ENC_RETURN_SUCCESS != iReturn), FreeMemorySvc (ppCtx))
-
iReturn = WelsMutexInit (&pSmt->mutexSliceNumUpdate);
WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx))
@@ -408,24 +367,20 @@
MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "RequestMtResource(), iThreadNum=%d, iMultipleThreadIdc= %d",
pPara->iMultipleThreadIdc,
- iMaxSliceNum);
-
+ (*ppCtx)->iMaxSliceCount);
return 0;
}
void ReleaseMtResource (sWelsEncCtx** ppCtx) {
- SWelsSliceBs* pSliceB = NULL;
SSliceThreading* pSmt = NULL;
CMemoryAlign* pMa = NULL;
int32_t iIdx = 0;
int32_t iThreadNum = 0;
- int16_t uiSliceNum = 0;
if (NULL == ppCtx || NULL == *ppCtx)
return;
pMa = (*ppCtx)->pMemAlign;
- uiSliceNum = (*ppCtx)->iMaxSliceCount;
iThreadNum = (*ppCtx)->pSvcParam->iMultipleThreadIdc;
pSmt = (*ppCtx)->pSliceThreading;
@@ -470,20 +425,6 @@
}
memset (&pSmt->bThreadBsBufferUsage, 0, MAX_THREADS_NUM * sizeof (bool));
- pSliceB = (*ppCtx)->pSliceBs;
- iIdx = 0;
- while (pSliceB != NULL && iIdx < uiSliceNum) {
- pSliceB->pBsBuffer = NULL;
- pSliceB->uiSize = 0;
- pSliceB->uiBsPos = 0;
- ++ iIdx;
- ++ pSliceB;
- }
- if ((*ppCtx)->pSliceBs != NULL) {
- pMa->WelsFree ((*ppCtx)->pSliceBs, "pSliceBs");
- (*ppCtx)->pSliceBs = NULL;
- }
-
if ((*ppCtx)->pTaskManage != NULL) {
delete (*ppCtx)->pTaskManage;
(*ppCtx)->pTaskManage = NULL;
@@ -503,6 +444,7 @@
int32_t AppendSliceToFrameBs (sWelsEncCtx* pCtx, SLayerBSInfo* pLbi, const int32_t iSliceCount) {
SWelsSvcCodingParam* pCodingParam = pCtx->pSvcParam;
SSpatialLayerConfig* pDlp = &pCodingParam->sSpatialLayers[pCtx->uiDependencyId];
+ SSlice* pSliceInlayer = pCtx->pCurDqLayer->sLayerInfo.pSliceInLayer;
SWelsSliceBs* pSliceBs = NULL;
const bool kbIsDynamicSlicingMode = (pDlp->sSliceArgument.uiSliceMode == SM_SIZELIMITED_SLICE);
@@ -511,9 +453,9 @@
int32_t iSliceIdx = 0;
if (!kbIsDynamicSlicingMode) {
- pSliceBs = &pCtx->pSliceBs[0];
iNalIdxBase = pLbi->iNalCount = 0;
while (iSliceIdx < iSliceCount) {
+ pSliceBs = &pSliceInlayer[iSliceIdx].sSliceBs;
if (pSliceBs != NULL && pSliceBs->uiBsPos > 0) {
int32_t iNalIdx = 0;
const int32_t iCountNal = pSliceBs->iNalIndex;
@@ -535,7 +477,6 @@
iNalIdxBase += iCountNal;
}
++ iSliceIdx;
- ++ pSliceBs;
}
} else { // for SM_SIZELIMITED_SLICE
const int32_t kiPartitionCnt = iSliceCount;
@@ -549,7 +490,7 @@
iSliceIdx = iPartitionIdx;
while (iIdx < kiCountSlicesCoded) {
- pSliceBs = &pCtx->pSliceBs[iSliceIdx];
+ pSliceBs = &pSliceInlayer[iSliceIdx].sSliceBs;
if (pSliceBs != NULL && pSliceBs->uiBsPos > 0) {
memmove (pCtx->pFrameBs + pCtx->iPosBsBuffer, pSliceBs->pBs, pSliceBs->uiBsPos); // confirmed_safe_unsafe_usage
pCtx->iPosBsBuffer += pSliceBs->uiBsPos;
@@ -665,7 +606,7 @@
bool bDsaFlag = false;
iSliceIdx = pPrivateData->iSliceIndex;
pSlice = &pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx];
- pSliceBs = &pEncPEncCtx->pSliceBs[iSliceIdx];
+ pSliceBs = &pSlice->sSliceBs;
bDsaFlag = ((pParamD->sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE) &&
pCodingParam->iMultipleThreadIdc > 1 &&
@@ -777,7 +718,7 @@
SetOneSliceBsBufferUnderMultithread (pEncPEncCtx, kiPartitionId, iSliceIdx);
pSlice = &pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx];
- pSliceBs = &pEncPEncCtx->pSliceBs[iSliceIdx];
+ pSliceBs = &pSlice->sSliceBs;
pSliceBs->uiBsPos = 0;
pSliceBs->iNalIndex = 0;
@@ -1086,9 +1027,9 @@
#endif//#if defined(MT_DEBUG)
void SetOneSliceBsBufferUnderMultithread (sWelsEncCtx* pCtx, const int32_t kiThreadIdx, const int32_t iSliceIdx) {
- pCtx->pSliceBs[iSliceIdx].pBsBuffer = pCtx->pSliceThreading->pThreadBsBuffer[kiThreadIdx];
- pCtx->pSliceBs[iSliceIdx].uiBsPos = 0;
- //printf("SetOneSliceBsBufferUnderMultithread, thread %d, slice %d, buffer=%x\n", kiThreadIdx, iSliceIdx, pCtx->pSliceBs[iSliceIdx].pBsBuffer);
+ SWelsSliceBs* pSliceBs = &pCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx].sSliceBs;
+ pSliceBs->pBsBuffer = pCtx->pSliceThreading->pThreadBsBuffer[kiThreadIdx];
+ pSliceBs->uiBsPos = 0;
}
}
--- a/codec/encoder/core/src/wels_task_encoder.cpp
+++ b/codec/encoder/core/src/wels_task_encoder.cpp
@@ -112,7 +112,7 @@
SetOneSliceBsBufferUnderMultithread (m_pCtx, m_iThreadIdx, m_iSliceIdx);
m_pSlice = &m_pCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[m_iSliceIdx];
- m_pSliceBs = &m_pCtx->pSliceBs[m_iSliceIdx];
+ m_pSliceBs = &m_pSlice->sSliceBs;
m_pSliceBs->uiBsPos = 0;
m_pSliceBs->iNalIndex = 0;
@@ -258,7 +258,7 @@
SetOneSliceBsBufferUnderMultithread (m_pCtx, m_iThreadIdx, iLocalSliceIdx);
m_pSlice = &pCurDq->sLayerInfo.pSliceInLayer[iLocalSliceIdx];
- m_pSliceBs = &m_pCtx->pSliceBs[iLocalSliceIdx];
+ m_pSliceBs = &m_pSlice->sSliceBs;
m_pSliceBs->uiBsPos = 0;
m_pSliceBs->iNalIndex = 0;