ref: 040974f7355a2829ada1fe7451bead94eaa6aec1
parent: 321c772536943e516523dc2ce10c1f8a3a8879c9
parent: 5e8a716c1d9a247fe2de4c98935bc785edfee7b2
author: HaiboZhu <haibozhu@cisco.com>
date: Thu Feb 25 09:40:56 EST 2016
Merge pull request #2378 from shihuade/MultiThread_V4.9_V5 add thread-based slice buffer and refactor reallocate process
--- a/codec/encoder/core/inc/mt_defs.h
+++ b/codec/encoder/core/inc/mt_defs.h
@@ -44,7 +44,9 @@
#include "codec_app_def.h"
#include "wels_const.h"
#include "WelsThreadLib.h"
+#include "slice.h"
+using namespace WelsEnc;
/*
* MT_DEBUG: output trace MT related into log file
*/
@@ -87,6 +89,12 @@
uint8_t* pThreadBsBuffer[MAX_THREADS_NUM]; //actual memory for slice buffer
bool bThreadBsBufferUsage[MAX_THREADS_NUM];
WELS_MUTEX mutexThreadBsBufferUsage;
+
+SSlice* pSliceInThread[MAX_THREADS_NUM]; //slice buffer
+int32_t* piSliceIndexInThread[MAX_THREADS_NUM];
+int32_t iMaxSliceNumInThread[MAX_THREADS_NUM];
+int32_t iEncodedSliceNumInThread[MAX_THREADS_NUM];
+
} SSliceThreading;
#endif//MULTIPLE_THREADING_DEFINES_H__
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -4783,7 +4783,7 @@
iRet = WelsEncoderParamAdjust (ppCtx, &sConfig);
return iRet;
}
-int32_t DynSliceRealloc (sWelsEncCtx* pCtx,
+int32_t FrameBsRealloc (sWelsEncCtx* pCtx,
SFrameBSInfo* pFrameBsInfo,
SLayerBSInfo* pLayerBsInfo) {
CMemoryAlign* pMA = pCtx->pMemAlign;
@@ -4823,6 +4823,17 @@
pLBI1->pNalLengthInByte = pLBI2->pNalLengthInByte + pLBI2->iNalCount;
}
+ return ENC_RETURN_SUCCESS;
+
+}
+
+int32_t SliceBufferRealloc (sWelsEncCtx* pCtx) {
+ CMemoryAlign* pMA = pCtx->pMemAlign;
+ SDqLayer* pCurLayer = pCtx->pCurDqLayer;
+ int32_t iMaxSliceNumOld = pCurLayer->sSliceEncCtx.iMaxSliceNumConstraint;
+ int32_t iMaxSliceNum = iMaxSliceNumOld;
+ iMaxSliceNum *= SLICE_NUM_EXPAND_COEF;
+
SSlice* pSlice = (SSlice*)pMA->WelsMallocz (sizeof (SSlice) * iMaxSliceNum, "Slice");
if (NULL == pSlice) {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: pSlice is NULL");
@@ -4882,6 +4893,20 @@
pCurLayer->sSliceEncCtx.iMaxSliceNumConstraint = iMaxSliceNum;
pCurLayer->iMaxSliceNum = iMaxSliceNum;
return ENC_RETURN_SUCCESS;
+}
+
+int32_t DynSliceRealloc (sWelsEncCtx* pCtx,
+ SFrameBSInfo* pFrameBsInfo,
+ SLayerBSInfo* pLayerBsInfo) {
+ int32_t iRet = 0;
+
+ iRet = FrameBsRealloc (pCtx, pFrameBsInfo, pLayerBsInfo);
+ if(ENC_RETURN_SUCCESS != iRet)
+ return iRet;
+
+ iRet = SliceBufferRealloc (pCtx);
+
+ return iRet;
}
int32_t WelsCodeOnePicPartition (sWelsEncCtx* pCtx,
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -251,22 +251,27 @@
int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingParam, const int32_t iCountBsLen,
const int32_t iMaxSliceBufferSize, bool bDynamicSlice) {
CMemoryAlign* pMa = NULL;
- SWelsSvcCodingParam* pPara = NULL;
+ SWelsSvcCodingParam* pPara = NULL;
SSliceThreading* pSmt = NULL;
int32_t iNumSpatialLayers = 0;
int32_t iThreadNum = 0;
int32_t iIdx = 0;
- int32_t iReturn = ENC_RETURN_SUCCESS;
+ int32_t iReturn = ENC_RETURN_SUCCESS;
+ int32_t iMaxSliceNumInThread = 0;
+
if (NULL == ppCtx || NULL == pCodingParam || NULL == *ppCtx || iCountBsLen <= 0)
return 1;
#if defined(ENABLE_TRACE_MT)
SLogContext* pLogCtx = & ((*ppCtx)->sLogCtx);
#endif
- pMa = (*ppCtx)->pMemAlign;
- pPara = pCodingParam;
- iNumSpatialLayers = pPara->iSpatialLayerNum;
- iThreadNum = pPara->iMultipleThreadIdc;
+ pMa = (*ppCtx)->pMemAlign;
+ pPara = pCodingParam;
+ iNumSpatialLayers = pPara->iSpatialLayerNum;
+ iThreadNum = pPara->iMultipleThreadIdc;
+ assert (iThreadNum > 0);
+ iMaxSliceNumInThread = ((*ppCtx)->iMaxSliceCount / iThreadNum + 1) * 2;
+
pSmt = (SSliceThreading*)pMa->WelsMalloc (sizeof (SSliceThreading), "SSliceThreading");
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt), FreeMemorySvc (ppCtx))
(*ppCtx)->pSliceThreading = pSmt;
@@ -299,10 +304,12 @@
iIdx = 0;
while (iIdx < iThreadNum) {
- pSmt->pThreadPEncCtx[iIdx].pWelsPEncCtx = (void*) *ppCtx;
- pSmt->pThreadPEncCtx[iIdx].iSliceIndex = iIdx;
- pSmt->pThreadPEncCtx[iIdx].iThreadIndex = iIdx;
- pSmt->pThreadHandles[iIdx] = 0;
+ pSmt->pThreadPEncCtx[iIdx].pWelsPEncCtx = (void*) *ppCtx;
+ pSmt->pThreadPEncCtx[iIdx].iSliceIndex = iIdx;
+ pSmt->pThreadPEncCtx[iIdx].iThreadIndex = iIdx;
+ pSmt->iMaxSliceNumInThread[iIdx] = iMaxSliceNumInThread;
+ pSmt->iEncodedSliceNumInThread[iIdx] = 0;
+ pSmt->pThreadHandles[iIdx] = 0;
WelsSnprintf (name, SEM_NAME_MAX, "ee%d%s", iIdx, pSmt->eventNamespace);
err = WelsEventOpen (&pSmt->pExitEncodeEvent[iIdx], name);
@@ -329,10 +336,18 @@
pSmt->pThreadBsBuffer[iIdx] = (uint8_t*)pMa->WelsMalloc (iCountBsLen, "pSmt->pThreadBsBuffer");
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pThreadBsBuffer[iIdx]), FreeMemorySvc (ppCtx))
+ pSmt->pSliceInThread[iIdx] = (SSlice*)pMa->WelsMalloc (sizeof (SSlice)*iMaxSliceNumInThread, "pSmt->pSliceInThread");
+ WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pSliceInThread[iIdx]), FreeMemorySvc (ppCtx))
+
+ pSmt->piSliceIndexInThread[iIdx] = (int32_t *)pMa->WelsMalloc (iMaxSliceNumInThread, "pSmt->piSliceIndexInThread");
+ WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->piSliceIndexInThread[iIdx]), FreeMemorySvc (ppCtx))
+
++ iIdx;
}
for (; iIdx < MAX_THREADS_NUM; iIdx++) {
- pSmt->pThreadBsBuffer[iIdx] = NULL;
+ pSmt->pThreadBsBuffer[iIdx] = NULL;
+ pSmt->pSliceInThread[iIdx] = NULL;
+ pSmt->piSliceIndexInThread[iIdx] = NULL;
}
WelsSnprintf (name, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace);
@@ -408,6 +423,16 @@
if (pSmt->pThreadBsBuffer[i]) {
pMa->WelsFree (pSmt->pThreadBsBuffer[i], "pSmt->pThreadBsBuffer");
pSmt->pThreadBsBuffer[i] = NULL;
+ }
+
+ if (pSmt->pSliceInThread[i]) {
+ pMa->WelsFree (pSmt->pSliceInThread[i], "pSmt->pSliceInThread");
+ pSmt->pSliceInThread[i] = NULL;
+ }
+
+ if (pSmt->piSliceIndexInThread[i]) {
+ pMa->WelsFree (pSmt->piSliceIndexInThread[i], "pSmt->piSliceIndexInThread");
+ pSmt->piSliceIndexInThread[i] = NULL;
}
}
memset (&pSmt->bThreadBsBufferUsage, 0, MAX_THREADS_NUM * sizeof (bool));