ref: e793bd35046a35569bc1b3db3be463912dea0582
parent: a87c15361223d536a95ddb4fe9490464790ee9d8
parent: 006d982fde9ae972c83ca816755be667b4c114a5
author: huili2 <huili2@cisco.com>
date: Thu Sep 10 05:09:49 EDT 2020
Merge pull request #3332 from xiaotianshi2/threading-decoding-fixes 1. fix multiple issues in threaded-decoding mode.
--- a/codec/console/dec/src/h264dec.cpp
+++ b/codec/console/dec/src/h264dec.cpp
@@ -114,6 +114,7 @@
int32_t pps_count = 0;
int32_t non_idr_pict_count = 0;
int32_t idr_pict_count = 0;
+ int32_t nal_deliminator = 0;
pSpsBuf = NULL;
sps_byte_count = 0;
while (read_bytes < bytes_available - 4) {
@@ -143,12 +144,22 @@
}
} else if (nal_unit_type == 7) {
pSpsBuf = ptr + (has4ByteStartCode ? 4 : 3);
- if ((++sps_count == 1) && (non_idr_pict_count == 1 || idr_pict_count == 1)) {
+ if ((++sps_count >= 1) && (non_idr_pict_count >= 1 || idr_pict_count >= 1)) {
return read_bytes;
}
+ if (sps_count == 2) {
+ return read_bytes;
+ }
} else if (nal_unit_type == 8) {
if (++pps_count == 1 && sps_count == 1) {
sps_byte_count = int32_t (ptr - pSpsBuf);
+ }
+ if (pps_count >= 1 && (non_idr_pict_count >= 1 || idr_pict_count >= 1)) {
+ return read_bytes;
+ }
+ } else if (nal_unit_type == 9) {
+ if (++nal_deliminator == 2) {
+ return read_bytes;
}
}
if (read_bytes >= bytes_available - 4) {
--- a/codec/decoder/core/inc/decoder_context.h
+++ b/codec/decoder/core/inc/decoder_context.h
@@ -557,6 +557,25 @@
}
return iThreadCount;
}
+//GetPrevFrameNum only applies when thread count >= 2
+static inline int32_t GetPrevFrameNum (PWelsDecoderContext pCtx) {
+ if (pCtx->uiDecodingTimeStamp > 0) {
+ PWelsDecoderThreadCTX pThreadCtx = (PWelsDecoderThreadCTX)pCtx->pThreadCtx;
+ int32_t iThreadCount = int32_t (pThreadCtx->sThreadInfo.uiThrMaxNum);
+ int32_t uiThrNum = int32_t (pThreadCtx->sThreadInfo.uiThrNum);
+ for (int32_t i = 0; i < iThreadCount; ++i) {
+ int32_t id = i - uiThrNum;
+ if (id != 0 && pThreadCtx[id].pCtx->uiDecodingTimeStamp == pCtx->uiDecodingTimeStamp - 1) {
+ if (pThreadCtx[id].pCtx->pDec != NULL) {
+ int32_t iFrameNum = pThreadCtx[id].pCtx->pDec->iFrameNum;
+ if (iFrameNum >= 0) return iFrameNum;
+ }
+ return pThreadCtx[id].pCtx->iFrameNum;
+ }
+ }
+ }
+ return pCtx->pLastDecPicInfo->iPrevFrameNum;
+}
//#ifdef __cplusplus
//}
//#endif//__cplusplus
--- a/codec/decoder/core/src/decoder.cpp
+++ b/codec/decoder/core/src/decoder.cpp
@@ -438,8 +438,9 @@
iNumRefFrames = MAX_REF_PIC_COUNT + 2;
} else {
iNumRefFrames = pCtx->pSps->iNumRefFrames + 2;
- if (GetThreadCount (pCtx) > 1) {
- iNumRefFrames = MAX_REF_PIC_COUNT + 1;
+ int32_t iThreadCount = GetThreadCount (pCtx);
+ if (iThreadCount > 1) {
+ iNumRefFrames = MAX_REF_PIC_COUNT;
}
}
--- a/codec/decoder/core/src/decoder_core.cpp
+++ b/codec/decoder/core/src/decoder_core.cpp
@@ -2421,9 +2421,6 @@
int32_t InitRefPicList (PWelsDecoderContext pCtx, const uint8_t kuiNRi, int32_t iPoc) {
int32_t iRet = ERR_NONE;
- if (GetThreadCount (pCtx) > 1 && pCtx->bNewSeqBegin) {
- WelsResetRefPic (pCtx);
- }
if (pCtx->eSliceType == B_SLICE) {
iRet = WelsInitBSliceRefList (pCtx, iPoc);
CreateImplicitWeightTable (pCtx);
@@ -2525,37 +2522,20 @@
SLayerInfo pLayerInfo;
PSliceHeaderExt pShExt = NULL;
PSliceHeader pSh = NULL;
-
- if (pLastThreadCtx != NULL) {
- pSh = &pNalCur->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
- if (pSh->iFirstMbInSlice == 0) {
- if (pLastThreadCtx->pCtx->pDec != NULL && pLastThreadCtx->pCtx->pDec->bIsUngroupedMultiSlice) {
- WAIT_EVENT (&pLastThreadCtx->sSliceDecodeFinish, WELS_DEC_THREAD_WAIT_INFINITE);
- }
- pCtx->pDec = NULL;
- pCtx->iTotalNumMbRec = 0;
- } else if (pLastThreadCtx->pCtx->pDec != NULL) {
- if (pSh->iFrameNum == pLastThreadCtx->pCtx->pDec->iFrameNum
- && pSh->iPicOrderCntLsb == pLastThreadCtx->pCtx->pDec->iFramePoc) {
- WAIT_EVENT (&pLastThreadCtx->sSliceDecodeFinish, WELS_DEC_THREAD_WAIT_INFINITE);
- pCtx->pDec = pLastThreadCtx->pCtx->pDec;
- pCtx->pDec->bIsUngroupedMultiSlice = true;
- pCtx->sRefPic = pLastThreadCtx->pCtx->sRefPic;
- pCtx->iTotalNumMbRec = pLastThreadCtx->pCtx->iTotalNumMbRec;
- }
- }
- }
bool isNewFrame = true;
if (iThreadCount > 1) {
isNewFrame = pCtx->pDec == NULL;
}
if (pCtx->pDec == NULL) {
- if (pLastThreadCtx != NULL && iIdx == 0) {
+ //make call PrefetchPic first before updating reference lists in threaded mode
+ //this prevents from possible thread-decoding hanging
+ pCtx->pDec = PrefetchPic (pCtx->pPicBuff);
+ if (pLastThreadCtx != NULL) {
pLastThreadCtx->pDec->bUsedAsRef = pLastThreadCtx->pCtx->uiNalRefIdc > 0;
if (pLastThreadCtx->pDec->bUsedAsRef) {
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
uint32_t i = 0;
- while (i < MAX_DPB_COUNT && pLastThreadCtx->pCtx->sRefPic.pRefList[listIdx][i]) {
+ while (i < MAX_REF_PIC_COUNT && pLastThreadCtx->pCtx->sRefPic.pRefList[listIdx][i]) {
pLastThreadCtx->pDec->pRefPic[listIdx][i] = pLastThreadCtx->pCtx->sRefPic.pRefList[listIdx][i];
++i;
}
@@ -2567,7 +2547,11 @@
pCtx->sRefPic = pLastThreadCtx->pCtx->sRefPic;
}
}
- pCtx->pDec = PrefetchPic (pCtx->pPicBuff);
+ //WelsResetRefPic needs to be called when a new sequence is encountered
+ //Otherwise artifacts is observed in decoded yuv in couple of unit tests with multiple-slice frame
+ if (GetThreadCount (pCtx) > 1 && pCtx->bNewSeqBegin) {
+ WelsResetRefPic (pCtx);
+ }
if (pCtx->iTotalNumMbRec != 0)
pCtx->iTotalNumMbRec = 0;
@@ -2580,7 +2564,6 @@
return ERR_INFO_REF_COUNT_OVERFLOW;
}
if (pThreadCtx != NULL) {
- pCtx->pDec->bIsUngroupedMultiSlice = false;
pThreadCtx->pDec = pCtx->pDec;
if (iThreadCount > 1) ++pCtx->pDec->iRefCount;
uint32_t uiMbHeight = (pCtx->pDec->iHeightInPixel + 15) >> 4;
@@ -2683,25 +2666,8 @@
// Subclause 8.2.5.2 Decoding process for gaps in frame_num
int32_t iPrevFrameNum = pCtx->pLastDecPicInfo->iPrevFrameNum;
if (pLastThreadCtx != NULL) {
- if (pCtx->bNewSeqBegin) {
- iPrevFrameNum = 0;
- } else if (pLastThreadCtx->pDec != NULL) {
- if (pLastThreadCtx->pDec->uiTimeStamp == pCtx->uiTimeStamp - 1) {
- iPrevFrameNum = pLastThreadCtx->pDec->iFrameNum;
- if (iPrevFrameNum == -1) iPrevFrameNum = pLastThreadCtx->pCtx->iFrameNum;
- } else {
- int32_t id = pThreadCtx->sThreadInfo.uiThrNum;
- for (int32_t i = 0; i < iThreadCount; ++i) {
- if (pThreadCtx[i - id].pCtx->uiTimeStamp == pCtx->uiTimeStamp - 1) {
- if (pThreadCtx[i - id].pDec != NULL) iPrevFrameNum = pThreadCtx[i - id].pDec->iFrameNum;
- if (iPrevFrameNum == -1) iPrevFrameNum = pThreadCtx[i - id].pCtx->iFrameNum;
- break;
- }
- }
- }
- } else {
- iPrevFrameNum = pCtx->bNewSeqBegin ? 0 : pLastThreadCtx->pCtx->iFrameNum;
- }
+ //call GetPrevFrameNum() to get correct iPrevFrameNum to prevent frame gap warning
+ iPrevFrameNum = pCtx->bNewSeqBegin ? 0 : GetPrevFrameNum (pCtx);
}
if (!kbIdrFlag &&
pSh->iFrameNum != iPrevFrameNum &&
@@ -2727,6 +2693,7 @@
if (iCurrIdD == kuiDependencyIdMax && iCurrIdQ == BASE_QUALITY_ID && isNewFrame) {
iRet = InitRefPicList (pCtx, pCtx->uiNalRefIdc, pSh->iPicOrderCntLsb);
+ if (iThreadCount > 1) isNewFrame = false;
if (iRet) {
pCtx->bRPLRError = true;
bAllRefComplete = false; // RPLR error, set ref pictures complete flag false