ref: b91e1e5960be5987d96f0f21bdb805c63921971c
dir: /codec/encoder/core/src/slice_multi_threading.cpp/
/*! * \copy * Copyright (c) 2010-2013, Cisco Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * * \file slice_multi_threading.h * * \brief pSlice based multiple threading * * \date 04/16/2010 Created * ************************************************************************************* */ #include <assert.h> #if !defined(_WIN32) #include <semaphore.h> #include <unistd.h> #endif//!_WIN32 #ifndef SEM_NAME_MAX // length of semaphore name should be system constrained at least on mac 10.7 #define SEM_NAME_MAX 32 #endif//SEM_NAME_MAX #include "slice_multi_threading.h" #include "mt_defs.h" #include "nal_encap.h" #include "utils.h" #include "encoder.h" #include "svc_encode_slice.h" #include "deblocking.h" #include "svc_enc_golomb.h" #include "crt_util_safe_x.h" // for safe crt like calls #include "rc.h" #include "cpu.h" #include "measure_time.h" #include "wels_task_management.h" #if defined(ENABLE_TRACE_MT) #define MT_TRACE_LOG(pLog, x, ...) WelsLog(pLog, x, __VA_ARGS__) #else #define MT_TRACE_LOG(x, ...) #endif namespace WelsEnc { void UpdateMbListNeighborParallel (SDqLayer* pCurDq, SMB* pMbList, const int32_t uiSliceIdc) { SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; const int32_t kiMbWidth = pSliceCtx->iMbWidth; int32_t iIdx = pCurDq->pFirstMbIdxOfSlice[uiSliceIdc]; const int32_t kiEndMbInSlice = iIdx + pCurDq->pCountMbNumInSlice[uiSliceIdc] - 1; do { UpdateMbNeighbor (pCurDq, &pMbList[iIdx], kiMbWidth, uiSliceIdc); ++ iIdx; } while (iIdx <= kiEndMbInSlice); } void CalcSliceComplexRatio (SDqLayer* pCurDq) { SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; SSlice** ppSliceInLayer = pCurDq->ppSliceInLayer; int32_t iSumAv = 0; const int32_t kiSliceCount = pSliceCtx->iSliceNumInFrame; int32_t iSliceIdx = 0; int32_t iAvI[MAX_SLICES_NUM]; assert (kiSliceCount <= MAX_SLICES_NUM); WelsEmms(); while (iSliceIdx < kiSliceCount) { iAvI[iSliceIdx] = WELS_DIV_ROUND (INT_MULTIPLY * ppSliceInLayer[iSliceIdx]->iCountMbNumInSlice, ppSliceInLayer[iSliceIdx]->uiSliceConsumeTime); MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] CalcSliceComplexRatio(), uiSliceConsumeTime[%d]= %d us, slice_run= %d", iSliceIdx, ppSliceInLayer[iSliceIdx]->uiSliceConsumeTime, ppSliceInLayer[iSliceIdx]->iCountMbNumInSlice); iSumAv += iAvI[iSliceIdx]; ++ iSliceIdx; } while (-- iSliceIdx >= 0) { ppSliceInLayer[iSliceIdx]->iSliceComplexRatio = WELS_DIV_ROUND (INT_MULTIPLY * iAvI[iSliceIdx], iSumAv); } } int32_t NeedDynamicAdjust (SSlice** ppSliceInLayer, const int32_t iSliceNum) { if (NULL == ppSliceInLayer) { return false; } uint32_t uiTotalConsume = 0; int32_t iSliceIdx = 0; int32_t iNeedAdj = false; WelsEmms(); while (iSliceIdx < iSliceNum) { if (NULL == ppSliceInLayer[iSliceIdx]) { return false; } uiTotalConsume += ppSliceInLayer[iSliceIdx]->uiSliceConsumeTime; iSliceIdx ++; } if (uiTotalConsume == 0) { MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] NeedDynamicAdjust(), herein do no adjust due first picture, iCountSliceNum= %d", iSliceNum); return false; } iSliceIdx = 0; float fThr = EPSN; // threshold for various cores cases float fRmse = .0f; // root mean square error of pSlice consume ratios const float kfMeanRatio = 1.0f / iSliceNum; do { const float fRatio = 1.0f * ppSliceInLayer[iSliceIdx]->uiSliceConsumeTime / uiTotalConsume; const float fDiffRatio = fRatio - kfMeanRatio; fRmse += (fDiffRatio * fDiffRatio); ++ iSliceIdx; } while (iSliceIdx + 1 < iSliceNum); fRmse = sqrtf (fRmse / iSliceNum); if (iSliceNum >= 8) { fThr += THRESHOLD_RMSE_CORE8; } else if (iSliceNum >= 4) { fThr += THRESHOLD_RMSE_CORE4; } else if (iSliceNum >= 2) { fThr += THRESHOLD_RMSE_CORE2; } else fThr = 1.0f; if (fRmse > fThr) iNeedAdj = true; MT_TRACE_LOG (NULL, WELS_LOG_DEBUG, "[MT] NeedDynamicAdjust(), herein adjustment decision is made (iNeedAdj= %d) by: fRmse of pSlice complexity ratios %.6f, the corresponding threshold %.6f, iCountSliceNum %d", iNeedAdj, fRmse, fThr, iSliceNum); return iNeedAdj; } void DynamicAdjustSlicing (sWelsEncCtx* pCtx, SDqLayer* pCurDqLayer, int32_t iCurDid) { SSliceCtx* pSliceCtx = &pCurDqLayer->sSliceEncCtx; SSlice** ppSliceInLayer = pCurDqLayer->ppSliceInLayer; const int32_t kiCountSliceNum = pSliceCtx->iSliceNumInFrame; const int32_t kiCountNumMb = pSliceCtx->iMbNumInFrame; int32_t iMinimalMbNum = pSliceCtx->iMbWidth; // in theory we need only 1 SMB, here let it as one SMB row required int32_t iMaximalMbNum = 0; // dynamically assign later int32_t iMbNumLeft = kiCountNumMb; int32_t iRunLen[MAX_THREADS_NUM] = {0}; int32_t iSliceIdx = 0; int32_t iNumMbInEachGom = 0; SWelsSvcRc* pWelsSvcRc = &pCtx->pWelsSvcRc[iCurDid]; if (pCtx->pSvcParam->iRCMode != RC_OFF_MODE) { iNumMbInEachGom = pWelsSvcRc->iNumberMbGom; if (iNumMbInEachGom <= 0) { WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "[MT] DynamicAdjustSlicing(), invalid iNumMbInEachGom= %d from RC, iDid= %d, iCountNumMb= %d", iNumMbInEachGom, iCurDid, kiCountNumMb); return; } // do not adjust in case no extra iNumMbInEachGom based left for slicing adjustment, // extra MB of non integrated GOM assigned at the last pSlice in default, keep up on early initial result. if (iNumMbInEachGom * kiCountSliceNum >= kiCountNumMb) { return; } iMinimalMbNum = iNumMbInEachGom; } if (kiCountSliceNum < 2 || (kiCountSliceNum & 0x01)) // we need suppose uiSliceNum is even for multiple threading return; iMaximalMbNum = kiCountNumMb - (kiCountSliceNum - 1) * iMinimalMbNum; WelsEmms(); MT_TRACE_LOG (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "[MT] DynamicAdjustSlicing(), iDid= %d, iCountNumMb= %d", iCurDid, kiCountNumMb); iSliceIdx = 0; while (iSliceIdx + 1 < kiCountSliceNum) { int32_t iNumMbAssigning = WELS_DIV_ROUND (kiCountNumMb * ppSliceInLayer[iSliceIdx]->iSliceComplexRatio, INT_MULTIPLY); // GOM boundary aligned if (pCtx->pSvcParam->iRCMode != RC_OFF_MODE) { iNumMbAssigning = iNumMbAssigning / iNumMbInEachGom * iNumMbInEachGom; } // make sure one GOM at least in each pSlice for safe if (iNumMbAssigning < iMinimalMbNum) iNumMbAssigning = iMinimalMbNum; else if (iNumMbAssigning > iMaximalMbNum) iNumMbAssigning = iMaximalMbNum; assert (iNumMbAssigning > 0); iMbNumLeft -= iNumMbAssigning; if (iMbNumLeft <= 0) { // error due to we can not support slice_skip now yet, do not adjust this time assert (0); return; } iRunLen[iSliceIdx] = iNumMbAssigning; MT_TRACE_LOG (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "[MT] DynamicAdjustSlicing(), iSliceIdx= %d, iSliceComplexRatio= %.2f, slice_run_org= %d, slice_run_adj= %d", iSliceIdx, ppSliceInLayer[iSliceIdx]->iSliceComplexRatio * 1.0f / INT_MULTIPLY, ppSliceInLayer[iSliceIdx]->iCountMbNumInSlice, iNumMbAssigning); ++ iSliceIdx; iMaximalMbNum = iMbNumLeft - (kiCountSliceNum - iSliceIdx - 1) * iMinimalMbNum; // get maximal num_mb in left parts } iRunLen[iSliceIdx] = iMbNumLeft; MT_TRACE_LOG (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "[MT] DynamicAdjustSlicing(), iSliceIdx= %d, pSliceComplexRatio= %.2f, slice_run_org= %d, slice_run_adj= %d", iSliceIdx, ppSliceInLayer[iSliceIdx]->iSliceComplexRatio * 1.0f / INT_MULTIPLY, ppSliceInLayer[iSliceIdx]->iCountMbNumInSlice, iMbNumLeft); pCurDqLayer->bNeedAdjustingSlicing = !DynamicAdjustSlicePEncCtxAll (pCurDqLayer, iRunLen); } int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingParam, const int32_t iCountBsLen, const int32_t iMaxSliceBufferSize, bool bDynamicSlice) { CMemoryAlign* pMa = NULL; SWelsSvcCodingParam* pPara = NULL; SSliceThreading* pSmt = NULL; int32_t iNumSpatialLayers = 0; int32_t iThreadNum = 0; int32_t iIdx = 0; int32_t iReturn = ENC_RETURN_SUCCESS; if (NULL == ppCtx || NULL == pCodingParam || NULL == *ppCtx || iCountBsLen <= 0) return 1; #if defined(ENABLE_TRACE_MT) SLogContext* pLogCtx = & ((*ppCtx)->sLogCtx); #endif pMa = (*ppCtx)->pMemAlign; pPara = pCodingParam; iNumSpatialLayers = pPara->iSpatialLayerNum; iThreadNum = pPara->iMultipleThreadIdc; assert (iThreadNum > 0); pSmt = (SSliceThreading*)pMa->WelsMalloc (sizeof (SSliceThreading), "SSliceThreading"); WELS_VERIFY_RETURN_IF (1, (NULL == pSmt)) memset (pSmt, 0, sizeof (SSliceThreading)); (*ppCtx)->pSliceThreading = pSmt; pSmt->pThreadPEncCtx = (SSliceThreadPrivateData*)pMa->WelsMalloc (sizeof (SSliceThreadPrivateData) * iThreadNum, "pThreadPEncCtx"); WELS_VERIFY_RETURN_IF (1, (NULL == pSmt->pThreadPEncCtx)) #ifdef _WIN32 // Dummy event namespace, the windows events don't actually use this WelsSnprintf (pSmt->eventNamespace, sizeof (pSmt->eventNamespace), "%p", (void*) *ppCtx); #else WelsSnprintf (pSmt->eventNamespace, sizeof (pSmt->eventNamespace), "%p%x", (void*) *ppCtx, getpid()); #endif//!_WIN32 #ifdef MT_DEBUG // file handle for MT debug pSmt->pFSliceDiff = NULL; if (pSmt->pFSliceDiff) { fclose (pSmt->pFSliceDiff); pSmt->pFSliceDiff = NULL; } pSmt->pFSliceDiff = fopen ("slice_time.txt", "wt+"); #endif//MT_DEBUG MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "encpEncCtx= 0x%p", (void*) *ppCtx); char name[SEM_NAME_MAX] = {0}; WELS_GCC_UNUSED WELS_THREAD_ERROR_CODE err = 0; iIdx = 0; while (iIdx < iThreadNum) { pSmt->pThreadPEncCtx[iIdx].pWelsPEncCtx = (void*) *ppCtx; pSmt->pThreadPEncCtx[iIdx].iSliceIndex = iIdx; pSmt->pThreadPEncCtx[iIdx].iThreadIndex = iIdx; pSmt->pThreadHandles[iIdx] = 0; // length of semaphore name should be system constrained at least on mac 10.7 WelsSnprintf (name, SEM_NAME_MAX, "ud%d%s", iIdx, pSmt->eventNamespace); err = WelsEventOpen (&pSmt->pUpdateMbListEvent[iIdx], name); MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "[MT] Open pUpdateMbListEvent%d named(%s) ret%d err%d", iIdx, name, err, errno); WelsSnprintf (name, SEM_NAME_MAX, "fu%d%s", iIdx, pSmt->eventNamespace); err = WelsEventOpen (&pSmt->pFinUpdateMbListEvent[iIdx], name); MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "[MT] Open pFinUpdateMbListEvent%d named(%s) ret%d err%d", iIdx, name, err, errno); WelsSnprintf (name, SEM_NAME_MAX, "sc%d%s", iIdx, pSmt->eventNamespace); err = WelsEventOpen (&pSmt->pSliceCodedEvent[iIdx], name); MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "[MT] Open pSliceCodedEvent%d named(%s) ret%d err%d", iIdx, name, err, errno); WelsSnprintf (name, SEM_NAME_MAX, "rc%d%s", iIdx, pSmt->eventNamespace); err = WelsEventOpen (&pSmt->pReadySliceCodingEvent[iIdx], name); MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "[MT] Open pReadySliceCodingEvent%d = 0x%p named(%s) ret%d err%d", iIdx, (void*)pSmt->pReadySliceCodingEvent[iIdx], name, err, errno); ++ iIdx; } WelsSnprintf (name, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace); err = WelsEventOpen (&pSmt->pSliceCodedMasterEvent, name); MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "[MT] Open pSliceCodedMasterEvent named(%s) ret%d err%d", name, err, errno); iReturn = WelsMutexInit (&pSmt->mutexSliceNumUpdate); WELS_VERIFY_RETURN_IF (1, (WELS_THREAD_ERROR_OK != iReturn)) (*ppCtx)->pTaskManage = IWelsTaskManage::CreateTaskManage (*ppCtx, iNumSpatialLayers, bDynamicSlice); WELS_VERIFY_RETURN_IF (1, (NULL == (*ppCtx)->pTaskManage)) int32_t iThreadBufferNum = WELS_MIN ((*ppCtx)->pTaskManage->GetThreadPoolThreadNum(), MAX_THREADS_NUM); for (iIdx = 0; iIdx < iThreadBufferNum; iIdx++) { pSmt->pThreadBsBuffer[iIdx] = (uint8_t*)pMa->WelsMallocz (iCountBsLen, "pSmt->pThreadBsBuffer"); WELS_VERIFY_RETURN_IF (1, (NULL == pSmt->pThreadBsBuffer[iIdx])) } iReturn = WelsMutexInit (&pSmt->mutexThreadBsBufferUsage); WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx)) iReturn = WelsMutexInit (&pSmt->mutexEvent); WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx)); iReturn = WelsMutexInit (&pSmt->mutexThreadSlcBuffReallocate); WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx)) iReturn = WelsMutexInit (& (*ppCtx)->mutexEncoderError); WELS_VERIFY_RETURN_IF (1, (WELS_THREAD_ERROR_OK != iReturn)) MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "RequestMtResource(), iThreadNum=%d, iMultipleThreadIdc= %d", pPara->iMultipleThreadIdc, (*ppCtx)->iMaxSliceCount); return 0; } void ReleaseMtResource (sWelsEncCtx** ppCtx) { SSliceThreading* pSmt = NULL; CMemoryAlign* pMa = NULL; int32_t iIdx = 0; int32_t iThreadNum = 0; if (NULL == ppCtx || NULL == *ppCtx) return; pMa = (*ppCtx)->pMemAlign; iThreadNum = (*ppCtx)->pSvcParam->iMultipleThreadIdc; pSmt = (*ppCtx)->pSliceThreading; if (NULL == pSmt) return; char ename[SEM_NAME_MAX] = {0}; while (iIdx < iThreadNum) { // length of semaphore name should be system constrained at least on mac 10.7 WelsSnprintf (ename, SEM_NAME_MAX, "sc%d%s", iIdx, pSmt->eventNamespace); WelsEventClose (&pSmt->pSliceCodedEvent[iIdx], ename); WelsSnprintf (ename, SEM_NAME_MAX, "rc%d%s", iIdx, pSmt->eventNamespace); WelsEventClose (&pSmt->pReadySliceCodingEvent[iIdx], ename); WelsSnprintf (ename, SEM_NAME_MAX, "ud%d%s", iIdx, pSmt->eventNamespace); WelsEventClose (&pSmt->pUpdateMbListEvent[iIdx], ename); WelsSnprintf (ename, SEM_NAME_MAX, "fu%d%s", iIdx, pSmt->eventNamespace); WelsEventClose (&pSmt->pFinUpdateMbListEvent[iIdx], ename); ++ iIdx; } WelsSnprintf (ename, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace); WelsEventClose (&pSmt->pSliceCodedMasterEvent, ename); WelsMutexDestroy (&pSmt->mutexSliceNumUpdate); WelsMutexDestroy (&pSmt->mutexThreadBsBufferUsage); WelsMutexDestroy (&pSmt->mutexThreadSlcBuffReallocate); WelsMutexDestroy (& ((*ppCtx)->mutexEncoderError)); WelsMutexDestroy (&pSmt->mutexEvent); if (pSmt->pThreadPEncCtx != NULL) { pMa->WelsFree (pSmt->pThreadPEncCtx, "pThreadPEncCtx"); pSmt->pThreadPEncCtx = NULL; } for (int i = 0; i < MAX_THREADS_NUM; i++) { if (pSmt->pThreadBsBuffer[i]) { pMa->WelsFree (pSmt->pThreadBsBuffer[i], "pSmt->pThreadBsBuffer"); pSmt->pThreadBsBuffer[i] = NULL; } } memset (&pSmt->bThreadBsBufferUsage, 0, MAX_THREADS_NUM * sizeof (bool)); if ((*ppCtx)->pTaskManage != NULL) { WELS_DELETE_OP ((*ppCtx)->pTaskManage); } #ifdef MT_DEBUG // file handle for debug if (pSmt->pFSliceDiff) { fclose (pSmt->pFSliceDiff); pSmt->pFSliceDiff = NULL; } #endif//MT_DEBUG pMa->WelsFree ((*ppCtx)->pSliceThreading, "SSliceThreading"); (*ppCtx)->pSliceThreading = NULL; } int32_t AppendSliceToFrameBs (sWelsEncCtx* pCtx, SLayerBSInfo* pLbi, const int32_t iSliceCount) { SSlice** ppSliceInlayer = pCtx->pCurDqLayer->ppSliceInLayer; SWelsSliceBs* pSliceBs = NULL; int32_t iLayerSize = 0; int32_t iNalIdxBase = pLbi->iNalCount; int32_t iSliceIdx = 0; iNalIdxBase = pLbi->iNalCount = 0; while (iSliceIdx < iSliceCount) { pSliceBs = &ppSliceInlayer[iSliceIdx]->sSliceBs; if (pSliceBs != NULL && pSliceBs->uiBsPos > 0) { int32_t iNalIdx = 0; const int32_t iCountNal = pSliceBs->iNalIndex; #if MT_DEBUG_BS_WR assert (pSliceBs->bSliceCodedFlag); #endif//MT_DEBUG_BS_WR memmove (pCtx->pFrameBs + pCtx->iPosBsBuffer, pSliceBs->pBs, pSliceBs->uiBsPos); // confirmed_safe_unsafe_usage pCtx->iPosBsBuffer += pSliceBs->uiBsPos; iLayerSize += pSliceBs->uiBsPos; while (iNalIdx < iCountNal) { pLbi->pNalLengthInByte[iNalIdxBase + iNalIdx] = pSliceBs->iNalLen[iNalIdx]; ++ iNalIdx; } pLbi->iNalCount += iCountNal; iNalIdxBase += iCountNal; } ++ iSliceIdx; } return iLayerSize; } int32_t WriteSliceBs (sWelsEncCtx* pCtx, SWelsSliceBs* pSliceBs, const int32_t iSliceIdx, int32_t& iSliceSize) { const int32_t kiNalCnt = pSliceBs->iNalIndex; int32_t iNalIdx = 0; int32_t iNalSize = 0; int32_t iReturn = ENC_RETURN_SUCCESS; int32_t iTotalLeftLength = pSliceBs->uiSize - pSliceBs->uiBsPos; SNalUnitHeaderExt* pNalHdrExt = &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt; uint8_t* pDst = pSliceBs->pBs; assert (kiNalCnt <= 2); if (kiNalCnt > 2) return 0; iSliceSize = 0; while (iNalIdx < kiNalCnt) { iNalSize = 0; iReturn = WelsEncodeNal (&pSliceBs->sNalList[iNalIdx], pNalHdrExt, iTotalLeftLength - iSliceSize, pDst, &iNalSize); WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS) pSliceBs->iNalLen[iNalIdx] = iNalSize; iSliceSize += iNalSize; pDst += iNalSize; ++ iNalIdx; } pSliceBs->uiBsPos = iSliceSize; return iReturn; } // thread process for coding one pSlice int32_t DynamicDetectCpuCores() { WelsLogicalProcessInfo info; WelsQueryLogicalProcessInfo (&info); return info.ProcessorCount; } int32_t AdjustBaseLayer (sWelsEncCtx* pCtx) { SDqLayer* pCurDq = pCtx->ppDqLayerList[0]; int32_t iNeedAdj = 1; #ifdef MT_DEBUG int64_t iT0 = WelsTime(); #endif//MT_DEBUG pCtx->pCurDqLayer = pCurDq; // do not need adjust due to not different at both slices of consumed time iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[0]->ppSliceInLayer , pCurDq->sSliceEncCtx.iSliceNumInFrame); if (iNeedAdj) DynamicAdjustSlicing (pCtx, pCurDq, 0); #ifdef MT_DEBUG iT0 = WelsTime() - iT0; if (pCtx->pSliceThreading->pFSliceDiff) { fprintf (pCtx->pSliceThreading->pFSliceDiff, "%6" PRId64" us adjust time at base spatial layer, iNeedAdj %d, DynamicAdjustSlicing()\n", iT0, iNeedAdj); } #endif//MT_DEBUG return iNeedAdj; } int32_t AdjustEnhanceLayer (sWelsEncCtx* pCtx, int32_t iCurDid) { #ifdef MT_DEBUG int64_t iT1 = WelsTime(); #endif//MT_DEBUG int32_t iNeedAdj = 1; // uiSliceMode of referencing spatial should be SM_FIXEDSLCNUM_SLICE // if using spatial base layer for complexity estimation const bool kbModelingFromSpatial = (pCtx->pCurDqLayer->pRefLayer != NULL && iCurDid > 0) && (pCtx->pSvcParam->sSpatialLayers[iCurDid - 1].sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE && pCtx->pSvcParam->iMultipleThreadIdc >= pCtx->pSvcParam->sSpatialLayers[iCurDid - 1].sSliceArgument.uiSliceNum); if (kbModelingFromSpatial) { // using spatial base layer for complexity estimation // do not need adjust due to not different at both slices of consumed time iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[iCurDid - 1]->ppSliceInLayer, pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame); if (iNeedAdj) DynamicAdjustSlicing (pCtx, pCtx->pCurDqLayer, iCurDid ); } else { // use temporal layer for complexity estimation // do not need adjust due to not different at both slices of consumed time iNeedAdj = NeedDynamicAdjust (pCtx->ppDqLayerList[iCurDid]->ppSliceInLayer, pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame); if (iNeedAdj) DynamicAdjustSlicing (pCtx, pCtx->pCurDqLayer, iCurDid ); } #ifdef MT_DEBUG iT1 = WelsTime() - iT1; if (pCtx->pSliceThreading->pFSliceDiff) { fprintf (pCtx->pSliceThreading->pFSliceDiff, "%6" PRId64" us adjust time at spatial layer %d, iNeedAdj %d, DynamicAdjustSlicing()\n", iT1, iCurDid, iNeedAdj); } #endif//MT_DEBUG return iNeedAdj; } #if defined(MT_DEBUG) void TrackSliceComplexities (sWelsEncCtx* pCtx, const int32_t iCurDid) { const int32_t kiCountSliceNum = pCtx->pCurDqLayer->sSliceEncCtx.iSliceNumInFrame; SSlice** ppSliceInLayer = pCtx->pCurDqLayer->ppSliceInLayer; if (kiCountSliceNum > 0) { int32_t iSliceIdx = 0; do { fprintf (pCtx->pSliceThreading->pFSliceDiff, "%6.3f complexity pRatio at iDid %d pSlice %d\n", ppSliceInLayer[iSliceIdx]->iSliceComplexRatio, iCurDid, iSliceIdx); ++ iSliceIdx; } while (iSliceIdx < kiCountSliceNum); } } #endif #if defined(MT_DEBUG) void TrackSliceConsumeTime (sWelsEncCtx* pCtx, int32_t* pDidList, const int32_t iSpatialNum) { SWelsSvcCodingParam* pPara = NULL; int32_t iSpatialIdx = 0; if (iSpatialNum > MAX_DEPENDENCY_LAYER) return; pPara = pCtx->pSvcParam; while (iSpatialIdx < iSpatialNum) { const int32_t kiDid = pDidList[iSpatialIdx]; SSliceArgument* pSliceArgument = &pPara->sSpatialLayers[kiDid].sSliceArgument; SDqLayer* pCurDq = pCtx->ppDqLayerList[kiDid]; SSlice** ppSliceInLayer = pCurDq->ppSliceInLayer; SSliceCtx* pSliceCtx = &pCurDq->sSliceEncCtx; const uint32_t kuiCountSliceNum = pSliceCtx->iSliceNumInFrame; if (pCtx->pSliceThreading) { if (pCtx->pSliceThreading->pFSliceDiff && ((pSliceArgument->uiSliceMode == SM_FIXEDSLCNUM_SLICE) || (pSliceArgument->uiSliceMode == SM_SIZELIMITED_SLICE)) && pPara->iMultipleThreadIdc > 1 && pPara->iMultipleThreadIdc >= kuiCountSliceNum) { uint32_t i = 0; uint32_t uiMaxT = 0; int32_t iMaxI = 0; while (i < kuiCountSliceNum) { fprintf (pCtx->pSliceThreading->pFSliceDiff, "%6d us consume_time coding_idx %d iDid %d pSlice %d\n", ppSliceInLayer[i]->uiSliceConsumeTime, pCtx->iCodingIndex, kiDid, i /*/ 1000*/); if (ppSliceInLayer[i]->uiSliceConsumeTime > uiMaxT) { uiMaxT = ppSliceInLayer[i]->uiSliceConsumeTime; iMaxI = i; } ++ i; } fprintf (pCtx->pSliceThreading->pFSliceDiff, "%6d us consume_time_max coding_idx %d iDid %d pSlice %d\n", uiMaxT, pCtx->iCodingIndex, kiDid, iMaxI /*/ 1000*/); } } ++ iSpatialIdx; } } #endif//#if defined(MT_DEBUG) void SetOneSliceBsBufferUnderMultithread (sWelsEncCtx* pCtx, const int32_t kiThreadIdx, SSlice* pSlice) { SWelsSliceBs* pSliceBs = &pSlice->sSliceBs; pSliceBs->pBsBuffer = pCtx->pSliceThreading->pThreadBsBuffer[kiThreadIdx]; pSliceBs->uiBsPos = 0; } }