ref: 6e9df6627280cd89ca27a9417482c01b6201f20c
parent: 33017fa722a6d7f8675ebc76043785da7353072c
parent: 10a4fb04c7990e497f73e622b87d3aaba0a7edfa
author: Ethan Hugg <ethanhugg@gmail.com>
date: Fri Feb 28 03:28:18 EST 2014
Merge pull request #369 from sijchen/mt_refactor3 [Encoder] remove macros to clear codes
--- a/codec/encoder/core/inc/mt_defs.h
+++ b/codec/encoder/core/inc/mt_defs.h
@@ -46,48 +46,6 @@
#include "WelsThreadLib.h"
/*
- * Dynamic Slicing Assignment (DSA)
- */
-#define DYNAMIC_SLICE_ASSIGN
-/*
- * Try to do dynamic slicing for multiple threads sync based on history slicing complexity result,
- * valid in case DYNAMIC_SLICE_ASSIGN enabled. In case it is disabled using step interval slicing map for DSA
- */
-#define TRY_SLICING_BALANCE
-/*
- * not absolute balancing, tolerant conditions for dynamic adjustment
- */
-#define NOT_ABSOLUTE_BALANCING
-/*
- * using root mean square error of slice complexity ratios for balancing
- */
-#define USE_RMSE_SLICE_COMPLEXITY_RATIO_FOR_BALANCING
-
-/*
- * REQUIREMENT FROM NOT BEING ABLE TO SUPPORT ASO ON GPU BASED DECODER
- */
-#define RASTER_SCAN_ORDER_PACKING // Arbitary SSlice Ordering (ASO) exclusive
-
-/*
- * Parallel slice bs output without memcpy used
- * NOTE: might be not applicable for SVC 2.0/2.1 client application layer implementation
- * due bs of various slices need be continuous within a layer packing
- */
-//#define PACKING_ONE_SLICE_PER_LAYER // MEAN packing only slice for a pLayerBs, disabled at SVC 2.0/2.1 in case Multi-Threading (MT) & Multi-SSlice (MS)
-
-//#define FIXED_PARTITION_ASSIGN // for dynamic slicing parallelization, mean same partition number used in P or I slices
-
-/*
- * Need disable PACKING_ONE_SLICE_PER_LAYER if RASTER_SCAN_ORDER_PACKING enabled
- * PACKING_ONE_SLICE_PER_LAYER might potentially introduce disordering slice packing into layer info for application layer
- */
-#if defined(RASTER_SCAN_ORDER_PACKING)
-#if defined(PACKING_ONE_SLICE_PER_LAYER)
-#undef PACKING_ONE_SLICE_PER_LAYER
-#endif//PACKING_ONE_SLICE_PER_LAYER
-#endif//RASTER_SCAN_ORDER_PACKING
-
-/*
* MT_DEBUG: output trace MT related into log file
*/
//#define MT_DEBUG
@@ -105,38 +63,8 @@
#endif//MT_ENABLED
-/*
- * TO Check macros dependencies MT related
- */
-
-#if !defined(DYNAMIC_SLICE_ASSIGN)
-
-#if defined(TRY_SLICING_BALANCE)
-#undef TRY_SLICING_BALANCE
-#endif//TRY_SLICING_BALANCE
-
-#endif//!DYNAMIC_SLICE_ASSIGN
-
-#if !defined(DYNAMIC_SLICE_ASSIGN) || !defined(TRY_SLICING_BALANCE)
-
-#if defined(NOT_ABSOLUTE_BALANCING)
-#undef NOT_ABSOLUTE_BALANCING
-#endif//NOT_ABSOLUTE_BALANCING
-
-#if defined(USE_RMSE_SLICE_COMPLEXITY_RATIO_FOR_BALANCING)
-#undef USE_RMSE_SLICE_COMPLEXITY_RATIO_FOR_BALANCING
-#endif//USE_RMSE_SLICE_COMPLEXITY_RATIO_FOR_BALANCING
-
-#endif//!DYNAMIC_SLICE_ASSIGN || !TRY_SLICING_BALANCE
-
#if !defined(MT_ENABLED)
-#if defined(DYNAMIC_SLICE_ASSIGN)
-#undef DYNAMIC_SLICE_ASSIGN
-#endif//DYNAMIC_SLICE_ASSIGN
-#if defined(TRY_SLICING_BALANCE)
-#undef TRY_SLICING_BALANCE
-#endif//TRY_SLICING_BALANCE
#if defined(MT_DEBUG)
#undef MT_DEBUG
#endif//MT_DEBUG
@@ -143,30 +71,12 @@
#if defined(ENABLE_TRACE_MT)
#undef ENABLE_TRACE_MT
#endif//ENABLE_TRACE_MT
-#if defined(PACKING_ONE_SLICE_PER_LAYER)
-#undef PACKING_ONE_SLICE_PER_LAYER
-#endif//PACKING_ONE_SLICE_PER_LAYER
-#ifdef NOT_ABSOLUTE_BALANCING
-#undef NOT_ABSOLUTE_BALANCING
-#endif//NOT_ABSOLUTE_BALANCING
-#ifdef USE_RMSE_SLICE_COMPLEXITY_RATIO_FOR_BALANCING
-#undef USE_RMSE_SLICE_COMPLEXITY_RATIO_FOR_BALANCING
-#endif//USE_RMSE_SLICE_COMPLEXITY_RATIO_FOR_BALANCING
#endif//!MT_ENABLED
-
-#ifdef NOT_ABSOLUTE_BALANCING
-#ifdef USE_RMSE_SLICE_COMPLEXITY_RATIO_FOR_BALANCING
#define THRESHOLD_RMSE_CORE8 0.0320f // v1.1: 0.0320f; v1.0: 0.02f
#define THRESHOLD_RMSE_CORE4 0.0215f // v1.1: 0.0215f; v1.0: 0.03f
#define THRESHOLD_RMSE_CORE2 0.0200f // v1.1: 0.0200f; v1.0: 0.04f
-#else
-#define TOLERANT_BALANCING_RATIO_LOSS 0.08f
-#define TOLERANT_BALANCING_RATIO_LOWER(n) ((1.0f-TOLERANT_BALANCING_RATIO_LOSS)/(n))
-#define TOLERANT_BALANCING_RATIO_UPPER(n) ((1.0f+TOLERANT_BALANCING_RATIO_LOSS)/(n))
-#endif//USE_RMSE_SLICE_COMPLEXITY_RATIO_FOR_BALANCING
-#endif//NOT_ABSOLUTE_BALANCING
typedef struct TagSliceThreadPrivateData {
void* pWelsPEncCtx;
@@ -192,7 +102,6 @@
WELS_EVENT* pReadySliceCodingEvent[MAX_THREADS_NUM]; // events for slice coding ready, [iThreadIdx]
#endif//_WIN32
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#if defined(__GNUC__)
WELS_THREAD_HANDLE* pUpdateMbListThrdHandles; // thread handles for update mb list thread, [iThreadIdx]
#endif//__GNUC__
@@ -203,24 +112,16 @@
WELS_EVENT* pUpdateMbListEvent[MAX_THREADS_NUM]; // signal to update mb list neighbor for various slices
WELS_EVENT* pFinUpdateMbListEvent[MAX_THREADS_NUM]; // signal to indicate finish updating mb list
#endif//_WIN32
-#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
WELS_MUTEX mutexSliceNumUpdate; // for dynamic slicing mode MT
-#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
uint32_t* pSliceConsumeTime[MAX_DEPENDENCY_LAYER]; // consuming time for each slice, [iSpatialIdx][uiSliceIdx]
-#endif//DYNAMIC_SLICE_ASSIGN || MT_DEBUG
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
float* pSliceComplexRatio[MAX_DEPENDENCY_LAYER];
-#endif//DYNAMIC_SLICE_ASSIGN && TRY_SLICING_BALANCE
#ifdef MT_DEBUG
FILE* pFSliceDiff; // file handle for debug
#endif//MT_DEBUG
-#ifdef PACKING_ONE_SLICE_PER_LAYER
-uint32_t* pCountBsSizeInPartition;
-#endif//PACKING_ONE_SLICE_PER_LAYER
} SSliceThreading;
#endif//MULTIPLE_THREADING_DEFINES_H__
--- a/codec/encoder/core/inc/slice_multi_threading.h
+++ b/codec/encoder/core/inc/slice_multi_threading.h
@@ -58,22 +58,15 @@
void CalcSliceComplexRatio (void* pRatio, SSliceCtx* pSliceCtx, uint32_t* pSliceConsume);
-#if defined(MT_ENABLED) && defined(DYNAMIC_SLICE_ASSIGN) && defined(NOT_ABSOLUTE_BALANCING)
+#if defined(MT_ENABLED)
int32_t NeedDynamicAdjust (void* pConsumeTime, const int32_t kiSliceNum);
-#endif//..
+#endif
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
void DynamicAdjustSlicing (sWelsEncCtx* pCtx,
SDqLayer* pCurDqLayer,
void* pComplexRatio,
int32_t iCurDid);
-#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
-#ifdef PACKING_ONE_SLICE_PER_LAYER
-void reset_env_mt (sWelsEncCtx* pCtx);
-#endif//PACKING_ONE_SLICE_PER_LAYER
-
-
int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pParam, const int32_t kiCountBsLen,
const int32_t kiTargetSpatialBsSize);
@@ -82,20 +75,14 @@
int32_t AppendSliceToFrameBs (sWelsEncCtx* pCtx, SLayerBSInfo* pLbi, const int32_t kiSliceCount);
int32_t WriteSliceToFrameBs (sWelsEncCtx* pCtx, SLayerBSInfo* pLbi, uint8_t* pFrameBsBuffer, const int32_t iSliceIdx, int32_t& iSliceSize);
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#if defined(__GNUC__)
WELS_THREAD_ROUTINE_TYPE UpdateMbListThreadProc (void* arg);
#endif//__GNUC__
-#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
WELS_THREAD_ROUTINE_TYPE CodingSliceThreadProc (void* arg);
int32_t CreateSliceThreads (sWelsEncCtx* pCtx);
-#ifdef PACKING_ONE_SLICE_PER_LAYER
-void ResetCountBsSizeInPartitions (uint32_t* pCountBsSizeList, const int32_t kiPartitionCnt);
-#endif//PACKING_ONE_SLICE_PER_LAYER
-
#ifdef _WIN32
int32_t FiredSliceThreads (SSliceThreadPrivateData* pPriData, WELS_EVENT* pEventsList, SLayerBSInfo* pLayerBsInfo,
const uint32_t kuiNumThreads/*, int32_t *iLayerNum*/, SSliceCtx* pSliceCtx, const bool kbIsDynamicSlicingMode);
@@ -106,21 +93,21 @@
int32_t DynamicDetectCpuCores();
-#if defined(MT_ENABLED) && defined(DYNAMIC_SLICE_ASSIGN)
+#if defined(MT_ENABLED)
int32_t AdjustBaseLayer (sWelsEncCtx* pCtx);
int32_t AdjustEnhanceLayer (sWelsEncCtx* pCtx, int32_t iCurDid);
-#endif//MT_ENABLED && DYNAMIC_SLICE_ASSIGN
+#endif//MT_ENABLED
#if defined(MT_ENABLED)
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE) && defined(MT_DEBUG)
+#if defined(MT_DEBUG)
void TrackSliceComplexities (sWelsEncCtx* pCtx, const int32_t kiCurDid);
-#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(MT_DEBUG)
+#endif
+#if defined(MT_DEBUG)
void TrackSliceConsumeTime (sWelsEncCtx* pCtx, int32_t* pDidList, const int32_t kiSpatialNum);
-#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(MT_DEBUG)
+#endif//defined(MT_DEBUG)
#endif//MT_ENABLED
}
--- a/codec/encoder/core/src/encoder.cpp
+++ b/codec/encoder/core/src/encoder.cpp
@@ -273,11 +273,6 @@
#if defined(STAT_OUTPUT)
memset (&pEncCtx->sPerInfo, 0, sizeof (SStatSliceInfo));
#endif//FRAME_INFO_OUTPUT
-
-#if defined(MT_ENABLED) && defined(PACKING_ONE_SLICE_PER_LAYER)
- if (pEncCtx->pSvcParam->iMultipleThreadIdc > 1)
- reset_env_mt (pEncCtx);
-#endif
}
EFrameType DecideFrameType (sWelsEncCtx* pEncCtx, const int8_t kiSpatialNum) {
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -54,9 +54,7 @@
#if defined(MT_ENABLED)
#include "slice_multi_threading.h"
#endif//MT_ENABLED
-#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
#include "measure_time.h"
-#endif//DYNAMIC_SLICE_ASSIGN
namespace WelsSVCEnc {
@@ -467,9 +465,6 @@
int32_t iCountNumNals = 0;
int32_t iNumDependencyLayers = 0;
int32_t iDIndex = 0;
-#if defined(MT_ENABLED) && defined(PACKING_ONE_SLICE_PER_LAYER)
- int32_t iNumLayersPack = 0;
-#endif//MT_ENABLED && PACKING_ONE_SLICE_PER_LAYER
if (NULL == pParam || NULL == ppCtx || NULL == *ppCtx)
return 1;
@@ -491,12 +486,7 @@
if (iDIndex == 0)
iCountNumNals += MAX_SLICES_NUM;
// MAX_SLICES_NUM < MAX_LAYER_NUM_OF_FRAME ensured at svc_enc_slice_segment.h
-#if defined(MT_ENABLED) && defined(PACKING_ONE_SLICE_PER_LAYER)
- assert (MAX_SLICES_NUM < MAX_LAYER_NUM_OF_FRAME);
- // iNumLayersPack += MAX_SLICES_NUM; // do not count it for dynamic slicing mode
-#else//!MT_ENABLED || !PACKING_ONE_SLICE_PER_LAYER
assert (iCountNumNals - iOrgNumNals <= MAX_NAL_UNITS_IN_LAYER);
-#endif//MT_ENABLED && PACKING_ONE_SLICE_PER_LAYER
} else { /*if ( SM_SINGLE_SLICE != pDLayer->sSliceCfg.uiSliceMode )*/
const int32_t kiNumOfSlice = GetInitialSliceNum ((pDLayer->iFrameWidth + 0x0f) >> 4,
(pDLayer->iFrameHeight + 0x0f) >> 4,
@@ -507,12 +497,7 @@
// plus prefix NALs
if (iDIndex == 0)
iCountNumNals += kiNumOfSlice;
-#if defined(MT_ENABLED) && defined(PACKING_ONE_SLICE_PER_LAYER)
- assert (num_of_slice <= MAX_SLICES_NUM && MAX_SLICES_NUM < MAX_LAYER_NUM_OF_FRAME);
- iNumLayersPack += num_of_slice;
-#else//!MT_ENABLED || !PACKING_ONE_SLICE_PER_LAYER
assert (iCountNumNals - iOrgNumNals <= MAX_NAL_UNITS_IN_LAYER);
-#endif//MT_ENALBED && PACKING_ONE_SLICE_PER_LAYER
if (kiNumOfSlice > MAX_SLICES_NUM) {
WelsLog (*ppCtx, WELS_LOG_ERROR,
"AcquireLayersNals(), num_of_slice(%d) > MAX_SLICES_NUM(%d) per (iDid= %d, qid= %d) settings!\n",
@@ -520,14 +505,13 @@
return 1;
}
}
-#if !defined(MT_ENABLED) || !defined(PACKING_ONE_SLICE_PER_LAYER)
+
if (iCountNumNals - iOrgNumNals > MAX_NAL_UNITS_IN_LAYER) {
WelsLog (*ppCtx, WELS_LOG_ERROR,
- "AcquireLayersNals(), num_of_nals(%d) > MAX_NAL_UNITS_IN_LAYER(%d) per (iDid= %d, qid= %d) settings!\n",
- (iCountNumNals - iOrgNumNals), MAX_NAL_UNITS_IN_LAYER, iDIndex, 0);
+ "AcquireLayersNals(), num_of_nals(%d) > MAX_NAL_UNITS_IN_LAYER(%d) per (iDid= %d, qid= %d) settings!\n",
+ (iCountNumNals - iOrgNumNals), MAX_NAL_UNITS_IN_LAYER, iDIndex, 0);
return 1;
}
-#endif//!MT_ENABLED) || !PACKING_ONE_SLICE_PER_LAYER
iCountNumLayers ++;
@@ -536,9 +520,6 @@
iCountNumNals += 1 + iNumDependencyLayers + (iCountNumLayers << 1) +
iCountNumLayers; // plus iCountNumLayers for reserved application
-#if defined(MT_ENABLED) && defined(PACKING_ONE_SLICE_PER_LAYER)
- iNumLayersPack += 1 + iNumDependencyLayers + (iCountNumLayers << 1);
-#endif//MT_ENABLED && PACKING_ONE_SLICE_PER_LAYER
// to check number of layers / nals / slices dependencies, 12/8/2010
#if !defined(MT_ENABLED)
@@ -548,19 +529,11 @@
return 1;
}
#else//MT_ENABLED
-#if defined(PACKING_ONE_SLICE_PER_LAYER)
- if (iNumLayersPack > MAX_LAYER_NUM_OF_FRAME) {
- WelsLog (*ppCtx, WELS_LOG_ERROR, "AcquireLayersNals(), num_layers_pack_overall(%d) > MAX_LAYER_NUM_OF_FRAME(%d)!",
- iNumLayersPack, MAX_LAYER_NUM_OF_FRAME);
- return 1;
- }
-#else//!PACKING_ONE_SLICE_PER_LAYER
if (iCountNumLayers > MAX_LAYER_NUM_OF_FRAME) {
WelsLog (*ppCtx, WELS_LOG_ERROR, "AcquireLayersNals(), iCountNumLayers(%d) > MAX_LAYER_NUM_OF_FRAME(%d)!",
iCountNumLayers, MAX_LAYER_NUM_OF_FRAME);
return 1;
}
-#endif//PACKING_ONE_SLICE_PER_LAYER
#endif//!MT_ENABLED
if (NULL != pCountLayers)
@@ -1770,14 +1743,9 @@
const int32_t kiMbHeight = (pDlp->iFrameHeight + 15) >> 4;
const int32_t kiMbNumInFrame = kiMbWidth * kiMbHeight;
#if defined(MT_ENABLED)
-#if defined(DYNAMIC_SLICE_ASSIGN)
int32_t iSliceNum = (SM_FIXEDSLCNUM_SLICE == pMso->uiSliceMode
|| SM_DYN_SLICE == pMso->uiSliceMode) ? kiCpuCores :
pSlcArg->uiSliceNum; // uiSliceNum per input has been validated at ParamValidationExt()
-#else//!DYNAMIC_SLICE_ASSIGN
- int32_t iSliceNum = (SM_DYN_SLICE == pMso->uiSliceMode) ? kiCpuCores :
- pSlcArg->uiSliceNum; // uiSliceNum per input has been validated at ParamValidationExt()
-#endif//DYNAMIC_SLICE_ASSIGN
#else//!MT_ENABLED
int16_t iSliceNum = pSlcArg->uiSliceNum; // uiSliceNum per input has been validated at ParamValidationExt()
#endif//MT_ENABLED
@@ -2215,7 +2183,6 @@
res);
(*ppCtx)->pSliceThreading->pThreadHandles[iThreadIdx] = 0;
}
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
if ((*ppCtx)->pSliceThreading->pUpdateMbListThrdHandles[iThreadIdx]) {
res = WelsThreadCancel ((*ppCtx)->pSliceThreading->pUpdateMbListThrdHandles[iThreadIdx]);
WelsLog (*ppCtx, WELS_LOG_INFO, "WelsUninitEncoderExt(), WelsThreadCancel(pUpdateMbListThrdHandles%d) return %d..\n",
@@ -2225,7 +2192,6 @@
iThreadIdx, res);
(*ppCtx)->pSliceThreading->pUpdateMbListThrdHandles[iThreadIdx] = 0;
}
-#endif//DYNAMIC_SLICE_ASSIGN && TRY_SLICING_BALANCE
++ iThreadIdx;
}
#endif//WIN32
@@ -2306,10 +2272,8 @@
#ifdef MT_ENABLED
if (pCtx->pSvcParam->iMultipleThreadIdc > 1) {
iPartitionNum = pCtx->pSvcParam->iCountThreadsNum;
-#if !defined(FIXED_PARTITION_ASSIGN)
if (P_SLICE == pCtx->eSliceType)
iPartitionNum = 1;
-#endif//!FIXED_PARTITION_ASSIGN
}
return iPartitionNum;
#else
@@ -2947,71 +2911,6 @@
}
/*
- * post process of dynamic slicing bs writing in case PACKING_ONE_SLICE_PER_LAYER
- * include: count bs size of over all the slices in layer,
- * return: count number of slices in layer
- */
-#if defined(MT_ENABLED) && defined(PACKING_ONE_SLICE_PER_LAYER)
-int32_t PostProcDynamicSlicingBsWriting (sWelsEncCtx* pCtx, SLayerBSInfo* pLayerBsInfo, int32_t* pLayerSize,
- const int32_t kiPartitionCnt) {
- SDqLayer* pCurDq = pCtx->pCurDqLayer;
- int32_t iPartitionIdx = 0;
- int32_t iCheckingIdx = 0;
- int32_t iSwappingIdx = -1;
- int32_t iSliceCount = 0;
- int32_t iLayerSize = 0;
-
- // count number of slices in layer and layer size
- while (iPartitionIdx < kiPartitionCnt) {
- const int32_t coded_slice_cnt = pCurDq->pNumSliceCodedOfPartition[iPartitionIdx];
- iLayerSize += pCtx->pSliceThreading->pCountBsSizeInPartition[iPartitionIdx];
- iSliceCount += coded_slice_cnt;
- ++ iPartitionIdx;
- }
- *pLayerSize = iLayerSize;
-
- // reordering pLayerBs pointers, but do not ensure raster scan order of picture
- // just maintain discontinuous items,i.e,
- // input:
- // partition 1: uiSliceIdx: 0 2 4 6
- // partition 2: uiSliceIdx: 1 3 5 7 9 11 13
- // output:
- // uiSliceIdx: 0 1 2 3 4 5 6 7 8 9 10
- iCheckingIdx = 0;
- while (true) {
- bool bMatchFlag = false;
- iPartitionIdx = 0;
- while (iPartitionIdx < kiPartitionCnt) {
- const int32_t coded_slice_cnt = pCurDq->pNumSliceCodedOfPartition[iPartitionIdx];
- // iCheckingIdx need convert to iIndex of iPartitionIdx based to avoid linear searching
- // belong this partition and not exceed the number of slices coded in partition
- if (iPartitionIdx == (iCheckingIdx % kiPartitionCnt)
- && iCheckingIdx / kiPartitionCnt < coded_slice_cnt) {
- if (iSwappingIdx >= 0) {
- // memory swapping
- memmove (pLayerBsInfo + iSwappingIdx, LayerBsInfo + iCheckingIdx, sizeof (SLayerBSInfo)); // confirmed_safe_unsafe_usage
- ++ iSwappingIdx; // record iSwappingIdx
- }
- ++ iCheckingIdx;
- bMatchFlag = true;
- break;
- }
- ++ iPartitionIdx;
- }
- if (!bMatchFlag) {
- if (iSwappingIdx < 0)
- iSwappingIdx = iCheckingIdx;
- ++ iCheckingIdx;
- }
- if (iSwappingIdx >= iSliceCount)
- break;
- }
-
- return iSliceCount;
-}
-#endif//MT_ENABLED && PACKING_ONE_SLICE_PER_LAYER
-
-/*
* Force coding IDR as follows
*/
int32_t ForceCodingIDR (sWelsEncCtx* pCtx) {
@@ -3072,9 +2971,9 @@
SPicture* fsnr = NULL;
#endif//ENABLE_FRAME_DUMP || ENABLE_PSNR_CALC
SPicture* pEncPic = NULL; // to be decided later
-#if defined(MT_ENABLED) && (defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG))
+#if defined(MT_ENABLED)
int32_t did_list[MAX_DEPENDENCY_LAYER] = {0};
-#endif//MT_ENABLED && DYNAMIC_SLICE_ASSIGN
+#endif//MT_ENABLED
int32_t iLayerNum = 0;
int32_t iLayerSize = 0;
int32_t iSpatialNum = 0; // available count number of spatial layers due to frame size changed in this given frame
@@ -3164,21 +3063,21 @@
iCurWidth = param_d->iFrameWidth;
iCurHeight = param_d->iFrameHeight;
-#if defined(MT_ENABLED) && (defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG))
+#if defined(MT_ENABLED)
did_list[iSpatialIdx] = iCurDid;
-#endif//MT_ENABLED && DYNAMIC_SLICE_ASSIGN
+#endif//MT_ENABLED
// Encoding this picture might mulitiple sQualityStat layers potentially be encoded as followed
switch (param_d->sSliceCfg.uiSliceMode) {
case SM_FIXEDSLCNUM_SLICE: {
-#if defined(MT_ENABLED) && defined(DYNAMIC_SLICE_ASSIGN)
+#if defined(MT_ENABLED)
if ((iCurDid > 0) && (pSvcParam->iMultipleThreadIdc > 1) &&
(pSvcParam->sDependencyLayers[iCurDid].sSliceCfg.uiSliceMode == SM_FIXEDSLCNUM_SLICE
&& pSvcParam->iMultipleThreadIdc >= pSvcParam->sDependencyLayers[iCurDid].sSliceCfg.sSliceArgument.uiSliceNum)
)
AdjustEnhanceLayer (pCtx, iCurDid);
-#endif//MT_ENABLED && DYNAMIC_SLICE_ASSIGN
+#endif//MT_ENABLED
break;
}
case SM_DYN_SLICE: {
@@ -3307,13 +3206,7 @@
// THREAD_FULLY_FIRE_MODE/THREAD_PICK_UP_MODE for any mode of non-SM_DYN_SLICE
if ((SM_DYN_SLICE != param_d->sSliceCfg.uiSliceMode) && (pSvcParam->iMultipleThreadIdc > 1)) {
iSliceCount = GetCurrentSliceNum (pCtx->pCurDqLayer->pSliceEncCtx);
- if (iLayerNum +
-#if defined(PACKING_ONE_SLICE_PER_LAYER)
- iSliceCount
-#else
- 1
-#endif//PACKING_ONE_SLICE_PER_LAYER
- >= MAX_LAYER_NUM_OF_FRAME) { // check available layer_bs_info for further writing as followed
+ if (iLayerNum + 1 >= MAX_LAYER_NUM_OF_FRAME) { // check available layer_bs_info for further writing as followed
WelsLog (pCtx, WELS_LOG_ERROR,
"WelsEncoderEncodeExt(), iLayerNum(%d) overflow(max:%d) at iDid= %d uiSliceMode= %d, iSliceCount= %d!",
iLayerNum, MAX_LAYER_NUM_OF_FRAME, iCurDid, param_d->sSliceCfg.uiSliceMode, iSliceCount);
@@ -3327,17 +3220,9 @@
}
if (pSvcParam->iCountThreadsNum >= iSliceCount) { //THREAD_FULLY_FIRE_MODE
-#if defined(PACKING_ONE_SLICE_PER_LAYER)
- int32_t iSliceIdx = 1;
- int32_t iOrgSlicePos[MAX_SLICES_NUM] = {0};
- iOrgSlicePos[0] = pCtx->iPosBsBuffer;
- while (uiSliceIdx < iSliceCount) {
- iOrgSlicePos[uiSliceIdx] = pCtx->pSliceBs[uiSliceIdx].uiBsPos;
- ++ uiSliceIdx;
- }
-#elif defined(MT_DEBUG)
+#if defined(MT_DEBUG)
int64_t t_bs_append = 0;
-#endif//PACKING_ONE_SLICE_PER_LAYER
+#endif
pCtx->iActiveThreadsNum = iSliceCount;
// to fire slice coding threads
@@ -3357,16 +3242,6 @@
WELS_VERIFY_RETURN_IFNEQ(pCtx->iEncoderError, ENC_RETURN_SUCCESS)
// append exclusive slice 0 bs to pFrameBs
-#if defined(PACKING_ONE_SLICE_PER_LAYER)
- iLayerSize = pCtx->iPosBsBuffer - iOrgSlicePos[0];
- uiSliceIdx = 1;
- while (uiSliceIdx < iSliceCount) {
- iLayerSize += pCtx->pSliceBs[uiSliceIdx].uiBsPos - iOrgSlicePos[uiSliceIdx];
- ++ uiSliceIdx;
- }
- iLayerNum += iSliceCount; // each slice stickly output as layer info for performance improvement directly
- pLayerBsInfo += iSliceCount;
-#else
#if defined(MT_DEBUG)
t_bs_append = WelsTime();
#endif//MT_DEBUG
@@ -3383,20 +3258,10 @@
t_bs_append, pCtx->iCodingIndex, iCurDid, 0);
}
#endif//MT_DEBUG
-#endif//PACKING_ONE_SLICE_PER_LAYER
} else { //THREAD_PICK_UP_MODE
int32_t iNumThreadsRunning = 0;
int32_t iNumThreadsScheduled = 0;
int32_t iIndexOfSliceToBeCoded = 0;
-#if defined(PACKING_ONE_SLICE_PER_LAYER)
- int32_t iSliceIdx = 1;
- int32_t iOrgSlicePos[MAX_SLICES_NUM] = {0};
- iOrgSlicePos[0] = pCtx->iPosBsBuffer;
- while (uiSliceIdx < iSliceCount) {
- iOrgSlicePos[uiSliceIdx] = pCtx->pSliceBs[uiSliceIdx].uiBsPos;
- ++ uiSliceIdx;
- }
-#endif//PACKING_ONE_SLICE_PER_LAYER
pCtx->iActiveThreadsNum = pSvcParam->iCountThreadsNum;
iNumThreadsScheduled = pCtx->iActiveThreadsNum;
@@ -3428,9 +3293,6 @@
// pick up succeeding slice for threading
// thread_id equal to iEventId per implementation here
pCtx->pSliceThreading->pThreadPEncCtx[iEventId].iSliceIndex = iIndexOfSliceToBeCoded;
-#ifdef PACKING_ONE_SLICE_PER_LAYER
- pCtx->pSliceThreading->pThreadPEncCtx[iEventId].pLayerBs = pLayerBsInfo + iIndexOfSliceToBeCoded;
-#endif//PACKING_ONE_SLICE_PER_LAYER
WelsEventSignal (&pCtx->pSliceThreading->pReadySliceCodingEvent[iEventId]);
++ iIndexOfSliceToBeCoded;
@@ -3452,9 +3314,6 @@
if (iIndexOfSliceToBeCoded >= iSliceCount)
break;
pCtx->pSliceThreading->pThreadPEncCtx[iThreadIdx].iSliceIndex = iIndexOfSliceToBeCoded;
-#ifdef PACKING_ONE_SLICE_PER_LAYER
- pCtx->pSliceThreading->pThreadPEncCtx[iThreadIdx].pLayerBs = pLayerBsInfo + iIndexOfSliceToBeCoded;
-#endif//PACKING_ONE_SLICE_PER_LAYER
WelsEventSignal (pCtx->pSliceThreading->pReadySliceCodingEvent[iThreadIdx]);
++ iIndexOfSliceToBeCoded;
@@ -3468,31 +3327,14 @@
#endif//_WIN32
}//while(1)
-// all slices are finished coding here
+ // all slices are finished coding here
// append exclusive slice 0 bs to pFrameBs
-#if defined(PACKING_ONE_SLICE_PER_LAYER)
- iLayerSize = pCtx->iPosBsBuffer - iOrgSlicePos[0];
- uiSliceIdx = 1;
- while (uiSliceIdx < iSliceCount) {
- iLayerSize += pCtx->pSliceBs[uiSliceIdx].uiBsPos - iOrgSlicePos[uiSliceIdx];
- ++ uiSliceIdx;
- }
- iLayerNum += iSliceCount; // each slice stickly output as layer info for performance improvement directly
- pLayerBsInfo += iSliceCount;
-#else
iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, iSliceCount);
-#endif//PACKING_ONE_SLICE_PER_LAYER
}
}
// THREAD_FULLY_FIRE_MODE && SM_DYN_SLICE
else if ((SM_DYN_SLICE == param_d->sSliceCfg.uiSliceMode) && (pSvcParam->iMultipleThreadIdc > 1)) {
const int32_t kiPartitionCnt = pCtx->iActiveThreadsNum; //pSvcParam->iCountThreadsNum;
-#if defined(PACKING_ONE_SLICE_PER_LAYER)
- ResetCountBsSizeInPartitions (pCtx->pSliceThreading->pCountBsSizeInPartition, kiPartitionCnt);
- pCtx->pCurDqLayer->pSliceEncCtx->iMaxSliceNumConstraint = WELS_MIN (MAX_SLICES_NUM,
- DynamicMaxSliceNumConstraint (MAX_LAYER_NUM_OF_FRAME, iLayerNum, 1 + /*( num_qlayer - 1) +*/ (((iCurDid == 0)
- && (pSvcParam->uiGopSize > 1)) ? 1 : 0)));
-#endif//PACKING_ONE_SLICE_PER_LAYER
// to fire slice coding threads
err = FiredSliceThreads (&pCtx->pSliceThreading->pThreadPEncCtx[0], &pCtx->pSliceThreading->pReadySliceCodingEvent[0],
@@ -3507,14 +3349,7 @@
WelsMultipleEventsWaitAllBlocking (kiPartitionCnt, &pCtx->pSliceThreading->pSliceCodedEvent[0]);
WELS_VERIFY_RETURN_IFNEQ(pCtx->iEncoderError, ENC_RETURN_SUCCESS)
-#if defined(PACKING_ONE_SLICE_PER_LAYER)
- iSliceCount = PostProcDynamicSlicingBsWriting (pCtx, pLayerBsInfo, &iLayerSize, kiPartitionCnt);
- assert (iLayerNum + iSliceCount < MAX_LAYER_NUM_OF_FRAME);
- pLayerBsInfo += iSliceCount;
- iLayerNum += iSliceCount;
-#else
iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, kiPartitionCnt);
-#endif//PACKING_ONE_SLICE_PER_LAYER
} else // for non-dynamic-slicing mode single threading branch..
#endif//MT_ENABLED
{
@@ -3681,17 +3516,9 @@
}
#endif//STAT_OUTPUT
-#if defined(MT_ENABLED) && defined(PACKING_ONE_SLICE_PER_LAYER)
- if (pSvcParam->iMultipleThreadIdc <= 1 || SM_SINGLE_SLICE == param_d->sSliceCfg.uiSliceMode) // sigle thread actually used
-#else
- if (1)
-#endif//MT_ENABLED && PACKING_ONE_SLICE_PER_LAYER
- {
- ++ iLayerNum;
- ++ pLayerBsInfo;
- }
+ ++ iLayerNum;
+ ++ pLayerBsInfo;
-
pLayerBsInfo->pBsBuf = pCtx->pFrameBs + pCtx->iPosBsBuffer;
if (pSvcParam->iPaddingFlag && pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingSize > 0) {
@@ -3722,7 +3549,7 @@
++ iLayerNum;
}
-#if defined(MT_ENABLED) && defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
+#if defined(MT_ENABLED)
if (param_d->sSliceCfg.uiSliceMode == SM_FIXEDSLCNUM_SLICE && pSvcParam->iMultipleThreadIdc > 1 &&
pSvcParam->iMultipleThreadIdc >= param_d->sSliceCfg.sSliceArgument.uiSliceNum) {
CalcSliceComplexRatio (pCtx->pSliceThreading->pSliceComplexRatio[iCurDid], pCtx->pCurDqLayer->pSliceEncCtx,
@@ -3731,7 +3558,7 @@
TrackSliceComplexities (pCtx, iCurDid);
#endif//#if defined(MT_DEBUG)
}
-#endif//MT_ENABLED && DYNAMIC_SLICE_ASSIGN && TRY_SLICING_BALANCE
+#endif//MT_ENABLED
++ iSpatialIdx;
@@ -3762,7 +3589,7 @@
TrackSliceConsumeTime (pCtx, did_list, iSpatialNum);
#endif//MT_ENABLED && MT_DEBUG
-#if defined(MT_ENABLED) && defined(DYNAMIC_SLICE_ASSIGN)
+#if defined(MT_ENABLED)
if (pSvcParam->iMultipleThreadIdc > 1 && did_list[0] == BASE_DEPENDENCY_ID
&& pSvcParam->sDependencyLayers[0].sSliceCfg.uiSliceMode == SM_FIXEDSLCNUM_SLICE
&& pSvcParam->iMultipleThreadIdc >= pSvcParam->sDependencyLayers[0].sSliceCfg.sSliceArgument.uiSliceNum
@@ -3771,7 +3598,7 @@
1]].sSliceCfg.sSliceArgument.uiSliceNum) {
AdjustBaseLayer (pCtx);
}
-#endif//DYNAMIC_SLICE_ASSIGN
+#endif
#ifdef ENABLE_FRAME_DUMP
DumpRecFrame (fsnr, &pSvcParam->sDependencyLayers[pSvcParam->iSpatialLayerNum -
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -63,9 +63,7 @@
#include "cpu.h"
#endif//X86_ASM
-#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
#include "measure_time.h"
-#endif//DYNAMIC_SLICE_ASSIGN
namespace WelsSVCEnc {
void UpdateMbListNeighborParallel (SSliceCtx* pSliceCtx,
SMB* pMbList,
@@ -144,12 +142,8 @@
}
}
-#if defined(MT_ENABLED) && defined(DYNAMIC_SLICE_ASSIGN) && defined(NOT_ABSOLUTE_BALANCING)
+#if defined(MT_ENABLED)
int32_t NeedDynamicAdjust (void* pConsumeTime, const int32_t iSliceNum) {
-#if !defined(USE_RMSE_SLICE_COMPLEXITY_RATIO_FOR_BALANCING)
- const float fRatioLower = TOLERANT_BALANCING_RATIO_LOWER (uiSliceNum);
- const float fRatioUpper = TOLERANT_BALANCING_RATIO_UPPER (uiSliceNum);
-#endif//USE_RMSE_SLICE_COMPLEXITY_RATIO_FOR_BALANCING
uint32_t* pSliceConsume = (uint32_t*)pConsumeTime;
uint32_t uiTotalConsume = 0;
int32_t iSliceIdx = 0;
@@ -172,7 +166,6 @@
}
iSliceIdx = 0;
-#if defined(USE_RMSE_SLICE_COMPLEXITY_RATIO_FOR_BALANCING)
float fThr = EPSN; // threshold for various cores cases
float fRmse = .0f; // root mean square error of pSlice consume ratios
const float kfMeanRatio = 1.0f / iSliceNum;
@@ -198,27 +191,11 @@
"[MT] NeedDynamicAdjust(), herein adjustment decision is made (iNeedAdj= %d) by: fRmse of pSlice complexity ratios %.6f, the corresponding threshold %.6f, iCountSliceNum %d\n",
iNeedAdj, fRmse, fThr, iSliceNum);
#endif//ENABLE_TRACE_MT
-#else
- do {
- const float kfRatio = 1.0f * pSliceConsume[uiSliceIdx] / uiTotalConsume;
- if (kfRatio + EPSN < fRatioLower || kfRatio > ratio_upper + EPSN) {
-#if defined(ENABLE_TRACE_MT)
- WelsLog (NULL, WELS_LOG_DEBUG,
- "[MT] NeedDynamicAdjust(), herein adjustment decision is made by pSlice consume time not balanced at all, uiSliceIdx= %d, comp_ratio= %.6f, pSliceConsumeTime= %d, total_consume_time= %d, iCountSliceNum= %d\n",
- uiSliceIdx, kfRatio, pSliceConsume[uiSliceIdx], uiTotalConsume, uiSliceNum);
-#endif//ENABLE_TRACE_MT
- iNeedAdj = true;
- break;
- }
- ++ uiSliceIdx;
- } while (uiSliceIdx + 1 < uiSliceNum);
-#endif//USE_RMSE_SLICE_COMPLEXITY_RATIO_FOR_BALANCING
return iNeedAdj;
}
-#endif//..
+#endif
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
void DynamicAdjustSlicing (sWelsEncCtx* pCtx,
SDqLayer* pCurDqLayer,
void* pComplexRatio,
@@ -320,21 +297,8 @@
WelsMultipleEventsWaitAllBlocking (kiThreadNum, &pCtx->pSliceThreading->pFinUpdateMbListEvent[0]);
}
}
-#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
-#ifdef PACKING_ONE_SLICE_PER_LAYER
-void ResetEnvMt (sWelsEncCtx* pCtx) {
- const int16_t kiSliceCount = pCtx->iMaxSliceCount;
- int32_t iIdx = 0;
- while (iIdx < kiSliceCount) {
- SWelsSliceBs* pSliceBs = &pCtx->pSliceBs[iIdx];
- pSliceBs->uiBsPos = 0;
- ++ iIdx;
- }
-}
-#endif//PACKING_ONE_SLICE_PER_LAYER
-
int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingParam, const int32_t iCountBsLen,
const int32_t iTargetSpatialBsSize) {
CMemoryAlign* pMa = NULL;
@@ -378,7 +342,6 @@
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pFinSliceCodingEvent), FreeMemorySvc (ppCtx))
#endif//_WIN32
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#if defined(__GNUC__)
pSmt->pUpdateMbListThrdHandles = (WELS_THREAD_HANDLE*)pMa->WelsMalloc (sizeof (WELS_THREAD_HANDLE) * iThreadNum,
"pUpdateMbListThrdHandles");
@@ -390,7 +353,6 @@
pSmt->pFinUpdateMbListEvent = (WELS_EVENT*)pMa->WelsMalloc (sizeof (WELS_EVENT) * iThreadNum, "pFinUpdateMbListEvent");
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pFinUpdateMbListEvent), FreeMemorySvc (ppCtx))
#endif//_WIN32
-#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#ifdef _WIN32
pSmt->pExitEncodeEvent = (WELS_EVENT*)pMa->WelsMalloc (sizeof (WELS_EVENT) * iThreadNum, "pExitEncodeEvent");
@@ -397,7 +359,6 @@
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pExitEncodeEvent), FreeMemorySvc (ppCtx))
#endif//_WIN32
-#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
iIdx = 0;
while (iIdx < iNumSpatialLayers) {
SSliceConfig* pMso = &pPara->sDependencyLayers[iIdx].sSliceCfg;
@@ -406,20 +367,15 @@
&& pPara->iMultipleThreadIdc >= kiSliceNum) {
pSmt->pSliceConsumeTime[iIdx] = (uint32_t*)pMa->WelsMallocz (kiSliceNum * sizeof (uint32_t), "pSliceConsumeTime[]");
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pSliceConsumeTime[iIdx]), FreeMemorySvc (ppCtx))
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
pSmt->pSliceComplexRatio[iIdx] = (float*)pMa->WelsMalloc (kiSliceNum * sizeof (float), "pSliceComplexRatio[]");
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pSliceComplexRatio[iIdx]), FreeMemorySvc (ppCtx))
-#endif//TRY_SLICING_BALANCE
} else {
pSmt->pSliceConsumeTime[iIdx] = NULL;
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
pSmt->pSliceComplexRatio[iIdx] = NULL;
-#endif//TRY_SLICING_BALANCE
}
++ iIdx;
}
// NULL for pSliceConsumeTime[iIdx]: iIdx from iNumSpatialLayers to MAX_DEPENDENCY_LAYERS
-#endif//#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
#ifdef MT_DEBUG
// file handle for MT debug
@@ -447,7 +403,6 @@
pSmt->pThreadPEncCtx[iIdx].iThreadIndex = iIdx;
pSmt->pThreadHandles[iIdx] = 0;
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#ifdef _WIN32
WelsEventInit (&pSmt->pUpdateMbListEvent[iIdx]);
WelsEventInit (&pSmt->pFinUpdateMbListEvent[iIdx]);
@@ -464,7 +419,6 @@
WelsLog ((*ppCtx), WELS_LOG_INFO, "[MT] Open pFinUpdateMbListEvent%d named(%s) ret%d err%d\n", iIdx, name, err, errno);
#endif
#endif//_WIN32
-#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#ifdef _WIN32
WelsEventInit (&pSmt->pSliceCodedEvent[iIdx]);
@@ -488,11 +442,6 @@
++ iIdx;
}
-#ifdef PACKING_ONE_SLICE_PER_LAYER
- pSmt->pCountBsSizeInPartition = (uint32_t*)pMa->WelsMalloc (sizeof (uint32_t) * iThreadNum, "pCountBsSizeInPartition");
- WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pCountBsSizeInPartition), FreeMemorySvc (ppCtx))
-#endif//PACKING_ONE_SLICE_PER_LAYER
-
(*ppCtx)->pSliceBs = (SWelsSliceBs*)pMa->WelsMalloc (sizeof (SWelsSliceBs) * iMaxSliceNum, "pSliceBs");
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pSliceBs), FreeMemorySvc (ppCtx))
@@ -566,12 +515,10 @@
WelsEventDestroy (&pSmt->pFinSliceCodingEvent[iIdx]);
if (pSmt->pExitEncodeEvent != NULL)
WelsEventDestroy (&pSmt->pExitEncodeEvent[iIdx]);
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
if (pSmt->pUpdateMbListEvent != NULL)
WelsEventDestroy (&pSmt->pUpdateMbListEvent[iIdx]);
if (pSmt->pFinUpdateMbListEvent != NULL)
WelsEventDestroy (&pSmt->pFinUpdateMbListEvent[iIdx]);
-#endif//DYNAMIC_SLICE_ASSIGN && TRY_SLICING_BALANCE
#else
char ename[SEM_NAME_MAX] = {0};
// length of semaphore name should be system constrained at least on mac 10.7
@@ -579,12 +526,10 @@
WelsEventClose (pSmt->pSliceCodedEvent[iIdx], ename);
WelsSnprintf (ename, SEM_NAME_MAX, "rc%d%p", iIdx, (void*) (*ppCtx));
WelsEventClose (pSmt->pReadySliceCodingEvent[iIdx], ename);
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
WelsSnprintf (ename, SEM_NAME_MAX, "ud%d%p", iIdx, (void*) (*ppCtx));
WelsEventClose (pSmt->pUpdateMbListEvent[iIdx], ename);
WelsSnprintf (ename, SEM_NAME_MAX, "fu%d%p", iIdx, (void*) (*ppCtx));
WelsEventClose (pSmt->pFinUpdateMbListEvent[iIdx], ename);
-#endif//DYNAMIC_SLICE_ASSIGN && TRY_SLICING_BALANCE
#endif//_WIN32
++ iIdx;
@@ -609,13 +554,6 @@
}
#endif//_WIN32
-#ifdef PACKING_ONE_SLICE_PER_LAYER
- if (NULL != pSmt->pCountBsSizeInPartition) {
- pMa->WelsFree (pSmt->pCountBsSizeInPartition, "pCountBsSizeInPartition");
- pSmt->pCountBsSizeInPartition = NULL;
- }
-#endif//PACKING_ONE_SLICE_PER_LAYER
-
WelsMutexDestroy (&pSmt->mutexSliceNumUpdate);
WelsMutexDestroy (&((*ppCtx)->mutexEncoderError));
@@ -643,7 +581,6 @@
pMa->WelsFree ((*ppCtx)->pSliceBs, "pSliceBs");
(*ppCtx)->pSliceBs = NULL;
}
-#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
iIdx = 0;
while (iIdx < pCodingParam->iSpatialLayerNum) {
if (pSmt->pSliceConsumeTime[iIdx]) {
@@ -650,18 +587,13 @@
pMa->WelsFree (pSmt->pSliceConsumeTime[iIdx], "pSliceConsumeTime[]");
pSmt->pSliceConsumeTime[iIdx] = NULL;
}
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
if (pSmt->pSliceComplexRatio[iIdx] != NULL) {
pMa->WelsFree (pSmt->pSliceComplexRatio[iIdx], "pSliceComplexRatio[]");
pSmt->pSliceComplexRatio[iIdx] = NULL;
}
-#endif//TRY_SLICING_BALANCE
++ iIdx;
}
-#endif//#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
-
#ifdef _WIN32
if (pSmt->pUpdateMbListEvent != NULL) {
pMa->WelsFree (pSmt->pUpdateMbListEvent, "pUpdateMbListEvent");
@@ -678,8 +610,6 @@
}
#endif//_WIN32
-#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
-
#ifdef MT_DEBUG
// file handle for debug
if (pSmt->pFSliceDiff) {
@@ -781,12 +711,8 @@
const int32_t kiNalCnt = pSliceBs->iNalIndex;
int32_t iNalIdx = 0;
int32_t iNalSize = 0;
-#if !defined(PACKING_ONE_SLICE_PER_LAYER)
const int32_t iFirstSlice = (iSliceIdx == 0);
int32_t iNalBase = iFirstSlice ? 0 : pLbi->iNalCount;
-#else
- int32_t iNalBase = 0;
-#endif//!PACKING_ONE_SLICE_PER_LAYER
int32_t iReturn = ENC_RETURN_SUCCESS;
const int32_t kiWrittenLength = pCtx->iPosBsBuffer;
iSliceSize = 0;
@@ -803,7 +729,6 @@
++ iNalIdx;
}
-#if !defined(PACKING_ONE_SLICE_PER_LAYER)
pSliceBs->uiBsPos = iSliceSize;
if (iFirstSlice) {
// pBsBuffer has been updated at coding_slice_0_in_encoder_mother_thread()
@@ -816,14 +741,6 @@
} else {
pLbi->iNalCount += kiNalCnt;
}
-#else
- pLbi->uiLayerType = VIDEO_CODING_LAYER;
- pLbi->uiSpatialId = pNalHdrExt->uiDependencyId;
- pLbi->uiTemporalId = pNalHdrExt->uiTemporalId;
- pLbi->uiQualityId = 0;
- pLbi->uiPriorityId = 0;
- pLbi->iNalCount = kiNalCnt;
-#endif//PACKING_ONE_SLICE_PER_LAYER
return ENC_RETURN_SUCCESS;
}
@@ -858,7 +775,6 @@
return iReturn;
}
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#if defined(__GNUC__) && !defined(_WIN32)
WELS_THREAD_ROUTINE_TYPE UpdateMbListThreadProc (void* arg) {
SSliceThreadPrivateData* pPrivateData = (SSliceThreadPrivateData*)arg;
@@ -899,7 +815,6 @@
WELS_THREAD_ROUTINE_RETURN (uiThrdRet);
}
#endif//__GNUC__
-#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
// thread process for coding one pSlice
WELS_THREAD_ROUTINE_TYPE CodingSliceThreadProc (void* arg) {
@@ -936,9 +851,7 @@
#ifdef _WIN32
pEventsList[iEventCount++] = pEncPEncCtx->pSliceThreading->pReadySliceCodingEvent[iEventIdx];
pEventsList[iEventCount++] = pEncPEncCtx->pSliceThreading->pExitEncodeEvent[iEventIdx];
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
pEventsList[iEventCount++] = pEncPEncCtx->pSliceThreading->pUpdateMbListEvent[iEventIdx];
-#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#endif//_WIN32
do {
@@ -973,17 +886,13 @@
pSlice = &pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx];
pSliceBs = &pEncPEncCtx->pSliceBs[iSliceIdx];
-#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
bDsaFlag = (pParamD->sSliceCfg.uiSliceMode == SM_FIXEDSLCNUM_SLICE &&
pCodingParam->iMultipleThreadIdc > 1 &&
pCodingParam->iMultipleThreadIdc >= pParamD->sSliceCfg.sSliceArgument.uiSliceNum);
if (bDsaFlag)
iSliceStart = WelsTime();
-#endif//DYNAMIC_SLICE_ASSIGN || MT_DEBUG
-#if !defined(PACKING_ONE_SLICE_PER_LAYER)
pSliceBs->uiBsPos = 0;
-#endif//!PACKING_ONE_SLICE_PER_LAYER
pSliceBs->iNalIndex = 0;
assert ((void*) (&pSliceBs->sBsWrite) == (void*)pSlice->pSliceBsa);
InitBits (&pSliceBs->sBsWrite, pSliceBs->pBsBuffer, pSliceBs->uiSize);
@@ -1014,7 +923,6 @@
WelsUnloadNalForSlice (pSliceBs);
-#if !defined(PACKING_ONE_SLICE_PER_LAYER)
if (0 == iSliceIdx) {
pLbi->pBsBuf = pEncPEncCtx->pFrameBs + pEncPEncCtx->iPosBsBuffer;
iReturn = WriteSliceToFrameBs (pEncPEncCtx, pLbi, pLbi->pBsBuf, iSliceIdx, iSliceSize);
@@ -1031,25 +939,6 @@
break;
}
}
-#else// PACKING_ONE_SLICE_PER_LAYER
- if (0 == iSliceIdx) {
- pLbi->pBsBuf = pEncPEncCtx->pFrameBs + pEncPEncCtx->iPosBsBuffer;
- iReturn = WriteSliceToFrameBs (pEncPEncCtx, pLbi, pLbi->pBsBuf, iSliceIdx, &iSliceSize);
- if (ENC_RETURN_SUCCESS!=iReturn) {
- uiThrdRet = iReturn;
- break;
- }
- pEncPEncCtx->iPosBsBuffer += iSliceSize;
- } else {
- pLbi->pBsBuf = pSliceBs->bs + pSliceBs->uiBsPos;
- iReturn = WriteSliceToFrameBs (pEncPEncCtx, pLbi, pLbi->pBsBuf, iSliceIdx, &iSliceSize);
- if (ENC_RETURN_SUCCESS!=iReturn) {
- uiThrdRet = iReturn;
- break;
- }
- pSliceBs->uiBsPos += iSliceSize;
- }
-#endif//!PACKING_ONE_SLICE_PER_LAYER
if (pCurDq->bDeblockingParallelFlag && pSlice->sSliceHeaderExt.sSliceHeader.uiDisableDeblockingFilterIdc != 1
#if !defined(ENABLE_FRAME_DUMP)
@@ -1060,7 +949,6 @@
DeblockingFilterSliceAvcbase (pCurDq, pEncPEncCtx->pFuncList, iSliceIdx);
}
-#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
if (bDsaFlag) {
pEncPEncCtx->pSliceThreading->pSliceConsumeTime[pEncPEncCtx->uiDependencyId][iSliceIdx] = (uint32_t) (
WelsTime() - iSliceStart);
@@ -1072,7 +960,6 @@
pCurDq->pSliceEncCtx->pFirstMbInSlice[iSliceIdx], pCurDq->pSliceEncCtx->pCountMbNumInSlice[iSliceIdx]);
#endif//ENABLE_TRACE_MT
}
-#endif//DYNAMIC_SLICE_ASSIGN || MT_DEBUG
#if defined(SLICE_INFO_OUTPUT)
fprintf (stderr,
@@ -1095,9 +982,6 @@
WelsEventSignal (pEncPEncCtx->pSliceThreading->pSliceCodedEvent[iEventIdx]); // mean finished coding current pSlice
#endif//WIN32
} else { // for SM_DYN_SLICE parallelization
-#ifdef PACKING_ONE_SLICE_PER_LAYER
- SLayerBSInfo* pLbiPacking = NULL;
-#endif//PACKING_ONE_SLICE_PER_LAYER
SSliceCtx* pSliceCtx = pCurDq->pSliceEncCtx;
const int32_t kiPartitionId = iThreadIdx;
const int32_t kiSliceIdxStep = pEncPEncCtx->iActiveThreadsNum;
@@ -1124,9 +1008,7 @@
pSlice = &pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx];
pSliceBs = &pEncPEncCtx->pSliceBs[iSliceIdx];
-#if !defined(PACKING_ONE_SLICE_PER_LAYER)
pSliceBs->uiBsPos = 0;
-#endif//!PACKING_ONE_SLICE_PER_LAYER
pSliceBs->iNalIndex = 0;
InitBits (&pSliceBs->sBsWrite, pSliceBs->pBsBuffer, pSliceBs->uiSize);
@@ -1152,7 +1034,6 @@
WelsUnloadNalForSlice (pSliceBs);
-#if !defined(PACKING_ONE_SLICE_PER_LAYER)
if (0 == kiPartitionId) {
if (0 == iSliceIdx)
pLbi->pBsBuf = pEncPEncCtx->pFrameBs + pEncPEncCtx->iPosBsBuffer;
@@ -1170,29 +1051,7 @@
break;
}
}
-#else// PACKING_ONE_SLICE_PER_LAYER
- pLbiPacking = pLbi + (iSliceIdx - kiPartitionId);
- if (0 == kiPartitionId) {
- pLbiPacking->pBsBuf = pEncPEncCtx->pFrameBs + pEncPEncCtx->iPosBsBuffer;
- iReturn = WriteSliceToFrameBs (pEncPEncCtx, pLbiPacking, pLbiPacking->pBsBuf, iSliceIdx, iSliceSize);
- if (ENC_RETURN_SUCCESS!=iReturn) {
- uiThrdRet = iReturn;
- break;
- }
- pEncPEncCtx->iPosBsBuffer += iSliceSize;
- } else {
- pLbiPacking->pBsBuf = pSliceBs->bs + pSliceBs->uiBsPos;
- iReturn = WriteSliceToFrameBs (pEncPEncCtx, pLbiPacking, pLbiPacking->pBsBuf, iSliceIdx, iSliceSize);
- if (ENC_RETURN_SUCCESS!=iReturn) {
- uiThrdRet = iReturn;
- break;
- }
- pSliceBs->uiBsPos += iSliceSize;
- }
- pEncPEncCtx->pSliceThreading->pCountBsSizeInPartition[kiPartitionId] += iSliceSize;
-#endif//!PACKING_ONE_SLICE_PER_LAYER
-
if (pCurDq->bDeblockingParallelFlag && pSlice->sSliceHeaderExt.sSliceHeader.uiDisableDeblockingFilterIdc != 1
#if !defined(ENABLE_FRAME_DUMP)
&& (eNalRefIdc != NRI_PRI_LOWEST) &&
@@ -1238,7 +1097,6 @@
uiThrdRet = 0;
break;
}
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
else if (WELS_THREAD_ERROR_WAIT_OBJECT_0 + 2 == iWaitRet) { // update pMb list singal
iSliceIdx =
iEventIdx; // pPrivateData->iSliceIndex; old threads can not be terminated, pPrivateData is not correct for applicable
@@ -1247,7 +1105,6 @@
WelsEventSignal (
&pEncPEncCtx->pSliceThreading->pFinUpdateMbListEvent[iEventIdx]); // mean finished update pMb list for this pSlice
}
-#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#endif//WIN32
else { // WELS_THREAD_ERROR_WAIT_TIMEOUT, or WELS_THREAD_ERROR_WAIT_FAILED
WelsLog (pEncPEncCtx, WELS_LOG_WARNING,
@@ -1299,12 +1156,10 @@
// We need extra threads for update_mb_list_proc on __GNUC__ like OS (mac/linux)
// due to WelsMultipleEventsWaitSingleBlocking implememtation can not work well
// in case waiting pUpdateMbListEvent and pReadySliceCodingEvent events at the same time
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
#if defined(__GNUC__) && !defined(_WIN32)
WelsThreadCreate (&pCtx->pSliceThreading->pUpdateMbListThrdHandles[iIdx], UpdateMbListThreadProc,
&pCtx->pSliceThreading->pThreadPEncCtx[iIdx], 0);
#endif//__GNUC__
-#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
++ iIdx;
}
@@ -1314,14 +1169,6 @@
return 0;
}
-#ifdef PACKING_ONE_SLICE_PER_LAYER
-void ResetCountBsSizeInPartitions (uint32_t* pCountBsSizeList, const int32_t iPartitionCnt) {
- if (pCountBsSizeList != NULL && iPartitionCnt > 0) {
- memset (pCountBsSizeList, 0, sizeof (pCountBsSizeList[0]) * iPartitionCnt);
- }
-}
-#endif//PACKING_ONE_SLICE_PER_LAYER
-
#ifdef _WIN32
int32_t FiredSliceThreads (SSliceThreadPrivateData* pPriData, WELS_EVENT* pEventsList, SLayerBSInfo* pLbi,
const uint32_t uiNumThreads, SSliceCtx* pSliceCtx, const bool bIsDynamicSlicingMode)
@@ -1341,37 +1188,10 @@
return 1;
}
-#if defined(PACKING_ONE_SLICE_PER_LAYER)
////////////////////////////////////////
if (bIsDynamicSlicingMode) {
iEndMbIdx = pSliceCtx->iMbNumInFrame;
for (iIdx = kiEventCnt - 1; iIdx >= 0; --iIdx) {
- const int32_t kiFirstMbIdx = pSliceCtx->pFirstMbInSlice[iIdx];
- pPriData[iIdx].iStartMbIndex = kiFirstMbIdx;
- pPriData[iIdx].iEndMbIndex = iEndMbIdx;
- iEndMbIdx = kiFirstMbIdx;
- }
- }
-
- iIdx = 0;
- while (iIdx < kiEventCnt) {
- pPriData[iIdx].pLayerBs = pLbi;
- pPriData[iIdx].iSliceIndex = iIdx;
-#ifdef _WIN32
- if (pEventsList[iIdx])
- WelsEventSignal (&pEventsList[iIdx]);
-#else
- WelsEventSignal (pEventsList[iIdx]);
-#endif//WIN32
- ++ pLbi;
- ++ iIdx;
- }
- ////////////////////////////////////////
-#else
- ////////////////////////////////////////
- if (bIsDynamicSlicingMode) {
- iEndMbIdx = pSliceCtx->iMbNumInFrame;
- for (iIdx = kiEventCnt - 1; iIdx >= 0; --iIdx) {
const int32_t iFirstMbIdx = pSliceCtx->pFirstMbInSlice[iIdx];
pPriData[iIdx].iStartMbIndex = iFirstMbIdx;
pPriData[iIdx].iEndMbIndex = iEndMbIdx;
@@ -1391,8 +1211,6 @@
#endif//WIN32
++ iIdx;
}
- ////////////////////////////////////////
-#endif//PACKING_ONE_SLICE_PER_LAYER
return 0;
}
@@ -1403,8 +1221,7 @@
return info.ProcessorCount;
}
-#if defined(MT_ENABLED) && defined(DYNAMIC_SLICE_ASSIGN)
-
+#if defined(MT_ENABLED)
int32_t AdjustBaseLayer (sWelsEncCtx* pCtx) {
SDqLayer* pCurDq = pCtx->ppDqLayerList[0];
int32_t iNeedAdj = 1;
@@ -1411,20 +1228,16 @@
#ifdef MT_DEBUG
int64_t iT0 = WelsTime();
#endif//MT_DEBUG
-#ifdef TRY_SLICING_BALANCE
pCtx->pCurDqLayer = pCurDq;
-#ifdef NOT_ABSOLUTE_BALANCING
// do not need adjust due to not different at both slices of consumed time
iNeedAdj = NeedDynamicAdjust (pCtx->pSliceThreading->pSliceConsumeTime[0], pCurDq->pSliceEncCtx->iSliceNumInFrame);
if (iNeedAdj)
-#endif//NOT_ABSOLUTE_BALANCING
DynamicAdjustSlicing (pCtx,
pCurDq,
pCtx->pSliceThreading->pSliceComplexRatio[0],
0);
-#endif//TRY_SLICING_BALANCE
#ifdef MT_DEBUG
iT0 = WelsTime() - iT0;
if (pCtx->pSliceThreading->pFSliceDiff) {
@@ -1455,33 +1268,25 @@
1].sSliceCfg.sSliceArgument.uiSliceNum);
if (kbModelingFromSpatial) { // using spatial base layer for complexity estimation
-#ifdef TRY_SLICING_BALANCE
-#ifdef NOT_ABSOLUTE_BALANCING
// do not need adjust due to not different at both slices of consumed time
iNeedAdj = NeedDynamicAdjust (pCtx->pSliceThreading->pSliceConsumeTime[iCurDid - 1],
pCtx->pCurDqLayer->pSliceEncCtx->iSliceNumInFrame);
if (iNeedAdj)
-#endif//NOT_ABSOLUTE_BALANCING
DynamicAdjustSlicing (pCtx,
pCtx->pCurDqLayer,
pCtx->pSliceThreading->pSliceComplexRatio[iCurDid - 1],
iCurDid
);
-#endif//TRY_SLICING_BALANCE
} else { // use temporal layer for complexity estimation
-#ifdef TRY_SLICING_BALANCE
-#ifdef NOT_ABSOLUTE_BALANCING
// do not need adjust due to not different at both slices of consumed time
iNeedAdj = NeedDynamicAdjust (pCtx->pSliceThreading->pSliceConsumeTime[iCurDid],
pCtx->pCurDqLayer->pSliceEncCtx->iSliceNumInFrame);
if (iNeedAdj)
-#endif//NOT_ABSOLUTE_BALANCING
DynamicAdjustSlicing (pCtx,
pCtx->pCurDqLayer,
pCtx->pSliceThreading->pSliceComplexRatio[iCurDid],
iCurDid
);
-#endif//TRY_SLICING_BALANCE
}
#ifdef MT_DEBUG
@@ -1500,11 +1305,11 @@
return iNeedAdj;
}
-#endif//#if defined(MT_ENABLED) && defined(DYNAMIC_SLICE_ASSIGN)
+#endif//#if defined(MT_ENABLED)
#if defined(MT_ENABLED)
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE) && defined(MT_DEBUG)
+#if defined(MT_DEBUG)
void TrackSliceComplexities (sWelsEncCtx* pCtx, const int32_t iCurDid) {
const int32_t kiCountSliceNum = pCtx->pCurDqLayer->pSliceEncCtx->iSliceNumInFrame;
if (kiCountSliceNum > 0) {
@@ -1516,9 +1321,9 @@
} while (iSliceIdx < kiCountSliceNum);
}
}
-#endif//#if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE)
+#endif
-#if defined(DYNAMIC_SLICE_ASSIGN) && defined(MT_DEBUG)
+#if defined(MT_DEBUG)
void TrackSliceConsumeTime (sWelsEncCtx* pCtx, int32_t* pDidList, const int32_t iSpatialNum) {
SWelsSvcCodingParam* pPara = NULL;
int32_t iSpatialIdx = 0;
@@ -1557,7 +1362,7 @@
++ iSpatialIdx;
}
}
-#endif//#if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)
+#endif//#if defined(MT_DEBUG)
#endif//MT_ENABLED
}