ref: 8f3c129f254b29a00dcca65b29dda8aafba8ac53
dir: /codec/decoder/core/src/decode_slice.cpp/
/*! * \copy * Copyright (c) 2008-2013, Cisco Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * * Abstract * current slice decoding * * History * 07/10/2008 Created * 08/09/2013 Modified * *****************************************************************************/ #include <memory.h> #include "typedefs.h" #include "dec_golomb.h" #include "fmo.h" #include "deblocking.h" #include "utils.h" #include "decode_slice.h" #include "error_code.h" #include "decode_mb_aux.h" #include "parse_mb_syn_cavlc.h" #include "rec_mb.h" #include "mv_pred.h" #include "as264_common.h" #include "cpu_core.h" #include "expand_pic.h" namespace WelsDec { int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) { int32_t iPreQP = 0; PDqLayer pCurLayer = pCtx->pCurDqLayer; PSlice pCurSlice = &pCurLayer->sLayerInfo.sSliceInLayer; PSliceHeader pSliceHeader = &pCurSlice->sSliceHeaderExt.sSliceHeader; int32_t iTotalMbTargetLayer = pSliceHeader->pSps->uiTotalMbCount; int32_t iCurLayerWidth = pCurLayer->iMbWidth << 4; int32_t iCurLayerHeight = pCurLayer->iMbHeight << 4; int32_t iNextMbXyIndex = 0; PFmo pFmo = pCtx->pFmo; int32_t iTotalNumMb = pCurSlice->iTotalMbInCurSlice; int32_t iCountNumMb = 0; PDeblockingFilterMbFunc pDeblockMb; if (!pCtx->bAvcBasedFlag && iCurLayerWidth != pCtx->iCurSeqIntervalMaxPicWidth) { return -1; } iNextMbXyIndex = pSliceHeader->iFirstMbInSlice; pCurLayer->iMbX = iNextMbXyIndex % pCurLayer->iMbWidth; pCurLayer->iMbY = iNextMbXyIndex / pCurLayer->iMbWidth; pCurLayer->iMbXyIndex = iNextMbXyIndex; if (0 == iNextMbXyIndex) { pCurLayer->pDec->iSpsId = pSliceHeader->iSpsId; pCurLayer->pDec->iPpsId = pSliceHeader->iPpsId; pCurLayer->pDec->uiQualityId = pCurLayer->sLayerInfo.sNalHeaderExt.uiQualityId; } do { iPreQP = pCurLayer->pLumaQp[pCurLayer->iMbXyIndex]; if (WelsTargetMbConstruction (pCtx)) { WelsLog (pCtx, WELS_LOG_WARNING, "WelsTargetSliceConstruction():::MB(%d, %d) construction error. pCurSlice_type:%d\n", pCurLayer->iMbX, pCurLayer->iMbY, pCurSlice->eSliceType); return -1; } ++iCountNumMb; ++pCurLayer->pDec->iTotalNumMbRec; if (iCountNumMb >= iTotalNumMb) { break; } if (pCurLayer->pDec->iTotalNumMbRec > iTotalMbTargetLayer) { WelsLog (pCtx, WELS_LOG_WARNING, "WelsTargetSliceConstruction():::fdec->iTotalNumMbRec:%d, iTotalMbTargetLayer:%d\n", pCurLayer->pDec->iTotalNumMbRec, iTotalMbTargetLayer); return -1; } if (pSliceHeader->pPps->uiNumSliceGroups > 1) { iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex); } else { ++iNextMbXyIndex; } if (-1 == iNextMbXyIndex || iNextMbXyIndex >= iTotalMbTargetLayer) { // slice group boundary or end of a frame break; } pCurLayer->iMbX = iNextMbXyIndex % pCurLayer->iMbWidth; pCurLayer->iMbY = iNextMbXyIndex / pCurLayer->iMbWidth; pCurLayer->iMbXyIndex = iNextMbXyIndex; } while (1); pCtx->pDec->iWidthInPixel = iCurLayerWidth; pCtx->pDec->iHeightInPixel = iCurLayerHeight; if ((pCurSlice->eSliceType != I_SLICE) && (pCurSlice->eSliceType != P_SLICE)) return 0; pDeblockMb = WelsDeblockingMb; if (1 == pSliceHeader->uiDisableDeblockingFilterIdc) { return 0;//NO_SUPPORTED_FILTER_IDX } else { WelsDeblockingFilterSlice (pCtx, pDeblockMb); } // any other filter_idc not supported here, 7/22/2010 return 0; } int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer, uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iStrideL, int32_t iStrideC) { int32_t iMbXy = pCurLayer->iMbXyIndex; int32_t i, iIndex, iOffset; WelsChromaDcIdct (pCurLayer->pScaledTCoeff[iMbXy] + 256); // 256 = 16*16 WelsChromaDcIdct (pCurLayer->pScaledTCoeff[iMbXy] + 320); // 320 = 16*16 + 16*4 for (i = 0; i < 16; i++) { //luma iIndex = g_kuiMbNonZeroCountIdx[i]; if (pCurLayer->pNzc[iMbXy][iIndex]) { iOffset = ((iIndex >> 2) << 2) * iStrideL + ((iIndex % 4) << 2); pCtx->pIdctResAddPredFunc (pDstY + iOffset, iStrideL, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4)); } } for (i = 0; i < 4; i++) { //chroma iIndex = g_kuiMbNonZeroCountIdx[i + 16]; //Cb if (pCurLayer->pNzc[iMbXy][iIndex] || * (pCurLayer->pScaledTCoeff[iMbXy] + ((i + 16) << 4))) { iOffset = (((iIndex - 16) >> 2) << 2) * iStrideC + (((iIndex - 16) % 4) << 2); pCtx->pIdctResAddPredFunc (pDstU + iOffset, iStrideC, pCurLayer->pScaledTCoeff[iMbXy] + ((i + 16) << 4)); } iIndex = g_kuiMbNonZeroCountIdx[i + 20]; //Cr if (pCurLayer->pNzc[iMbXy][iIndex] || * (pCurLayer->pScaledTCoeff[iMbXy] + ((i + 20) << 4))) { iOffset = (((iIndex - 18) >> 2) << 2) * iStrideC + (((iIndex - 18) % 4) << 2); pCtx->pIdctResAddPredFunc (pDstV + iOffset, iStrideC , pCurLayer->pScaledTCoeff[iMbXy] + ((i + 20) << 4)); } } return 0; } int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) { int32_t iMbX = pCurLayer->iMbX; int32_t iMbY = pCurLayer->iMbY; uint8_t* pDstY, *pDstCb, *pDstCr; int32_t iLumaStride = pCtx->pDec->iLinesize[0]; int32_t iChromaStride = pCtx->pDec->iLinesize[1]; pDstY = pCurLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4); pDstCb = pCurLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3); pDstCr = pCurLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3); GetInterPred (pDstY, pDstCb, pDstCr, pCtx); WelsMbInterSampleConstruction (pCtx, pCurLayer, pDstY, pDstCb, pDstCr, iLumaStride, iChromaStride); pCtx->sBlockFunc.pWelsSetNonZeroCountFunc (NULL, pCurLayer->pNzc[pCurLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti! return 0; } void_t WelsLumaDcDequantIdct (int16_t* pBlock, int32_t iQp) { const int32_t kiQMul = g_kuiDequantCoeff[iQp][0]; #define STRIDE 16 int32_t i; int32_t iTemp[16]; //FIXME check if this is a good idea int16_t* pBlk = pBlock; static const int32_t kiXOffset[4] = {0, STRIDE, STRIDE << 2, 5 * STRIDE}; static const int32_t kiYOffset[4] = {0, STRIDE << 1, STRIDE << 3, 10 * STRIDE}; for (i = 0; i < 4; i++) { const int32_t kiOffset = kiYOffset[i]; const int32_t kiX1 = kiOffset + kiXOffset[2]; const int32_t kiX2 = STRIDE + kiOffset; const int32_t kiX3 = kiOffset + kiXOffset[3]; const int32_t kiI4 = i << 2; // 4*i const int32_t kiZ0 = pBlk[kiOffset] + pBlk[kiX1]; const int32_t kiZ1 = pBlk[kiOffset] - pBlk[kiX1]; const int32_t kiZ2 = pBlk[kiX2] - pBlk[kiX3]; const int32_t kiZ3 = pBlk[kiX2] + pBlk[kiX3]; iTemp[kiI4] = kiZ0 + kiZ3; iTemp[1 + kiI4] = kiZ1 + kiZ2; iTemp[2 + kiI4] = kiZ1 - kiZ2; iTemp[3 + kiI4] = kiZ0 - kiZ3; } for (i = 0; i < 4; i++) { const int32_t kiOffset = kiXOffset[i]; const int32_t kiI4 = 4 + i; const int32_t kiZ0 = iTemp[i] + iTemp[4 + kiI4]; const int32_t kiZ1 = iTemp[i] - iTemp[4 + kiI4]; const int32_t kiZ2 = iTemp[kiI4] - iTemp[8 + kiI4]; const int32_t kiZ3 = iTemp[kiI4] + iTemp[8 + kiI4]; pBlk[kiOffset] = ((kiZ0 + kiZ3) * kiQMul + 2) >> 2; //FIXME think about merging this into decode_resdual pBlk[kiYOffset[1] + kiOffset] = ((kiZ1 + kiZ2) * kiQMul + 2) >> 2; pBlk[kiYOffset[2] + kiOffset] = ((kiZ1 - kiZ2) * kiQMul + 2) >> 2; pBlk[kiYOffset[3] + kiOffset] = ((kiZ0 - kiZ3) * kiQMul + 2) >> 2; } #undef STRIDE } int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer, bool_t bOutput) { //seems IPCM should not enter this path int32_t iMbXy = pCurLayer->iMbXyIndex; FORCE_STACK_ALIGN_1D (int16_t, pTempScaledTCoeff, MB_COEFF_LIST_SIZE, 16); memcpy (pTempScaledTCoeff, pCurLayer->pScaledTCoeff[iMbXy], 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0])); WelsFillRecNeededMbInfo (pCtx, bOutput, pCurLayer); if (IS_INTRA16x16 (pCurLayer->pMbType[iMbXy])) { int32_t i, j; // really need? for (i = 0; i < 16; i++) { j = g_kuiLumaDcZigzagScan[i]; pTempScaledTCoeff[j] = pCurLayer->pScaledTCoeff[iMbXy][j]; } WelsLumaDcDequantIdct (pTempScaledTCoeff, pCurLayer->pLumaQp[iMbXy]); RecI16x16Mb (iMbXy, pCtx, pTempScaledTCoeff, pCurLayer); return 0; } if (IS_INTRA4x4 (pCurLayer->pMbType[iMbXy])) RecI4x4Mb (iMbXy, pCtx, pTempScaledTCoeff, pCurLayer); return 0; } int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) { int32_t iMbX = pCurLayer->iMbX; int32_t iMbY = pCurLayer->iMbY; uint8_t* pDstY, *pDstCb, *pDstCr; int32_t iLumaStride = pCtx->pDec->iLinesize[0]; int32_t iChromaStride = pCtx->pDec->iLinesize[1]; pDstY = pCurLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4); pDstCb = pCurLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3); pDstCr = pCurLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3); GetInterPred (pDstY, pDstCb, pDstCr, pCtx); return 0; } void_t WelsMbCopy (uint8_t* pDst, int32_t iStrideDst, uint8_t* pSrc, int32_t iStrideSrc, int32_t iHeight, int32_t iWidth) { int32_t i; int32_t iOffsetDst = 0, iOffsetSrc = 0; for (i = 0; i < iHeight; i++) { memcpy (pDst + iOffsetDst, pSrc + iOffsetSrc, iWidth); iOffsetDst += iStrideDst; iOffsetSrc += iStrideSrc; } } int32_t WelsTargetMbConstruction (PWelsDecoderContext pCtx) { PDqLayer pCurLayer = pCtx->pCurDqLayer; if (MB_TYPE_INTRA_PCM == pCurLayer->pMbType[pCurLayer->iMbXyIndex]) { //copy cs into fdec int32_t iCsStrideL = pCurLayer->iCsStride[0]; int32_t iCsStrideC = pCurLayer->iCsStride[1]; int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0]; int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1]; int32_t iCsOffsetL = (pCurLayer->iMbX + pCurLayer->iMbY * iCsStrideL) << 4; int32_t iCsOffsetC = (pCurLayer->iMbX + pCurLayer->iMbY * iCsStrideC) << 3; int32_t iDecOffsetL = (pCurLayer->iMbX + pCurLayer->iMbY * iDecStrideL) << 4; int32_t iDecOffsetC = (pCurLayer->iMbX + pCurLayer->iMbY * iDecStrideC) << 3; uint8_t* pSrcY = pCurLayer->pCsData[0] + iCsOffsetL; uint8_t* pSrcU = pCurLayer->pCsData[1] + iCsOffsetC; uint8_t* pSrcV = pCurLayer->pCsData[2] + iCsOffsetC; uint8_t* pDecY = pCurLayer->pDec->pData[0] + iDecOffsetL; uint8_t* pDecU = pCurLayer->pDec->pData[1] + iDecOffsetC; uint8_t* pDecV = pCurLayer->pDec->pData[2] + iDecOffsetC; WelsMbCopy (pDecY, iDecStrideL, pSrcY, iCsStrideL, 16, 16); WelsMbCopy (pDecU, iDecStrideC, pSrcU, iCsStrideC, 8, 8); WelsMbCopy (pDecV, iDecStrideC, pSrcV, iCsStrideC, 8, 8); return 0; } else if (IS_INTRA (pCurLayer->pMbType[pCurLayer->iMbXyIndex])) { WelsMbIntraPredictionConstruction (pCtx, pCurLayer, 1); } else if (IS_INTER (pCurLayer->pMbType[pCurLayer->iMbXyIndex])) { //InterMB if (0 == pCurLayer->pCbp[pCurLayer->iMbXyIndex]) { //uiCbp==0 include SKIP WelsMbInterPrediction (pCtx, pCurLayer); } else { WelsMbInterConstruction (pCtx, pCurLayer); } } else { WelsLog (pCtx, WELS_LOG_WARNING, "WelsTargetMbConstruction():::::Unknown MB type: %d\n", pCurLayer->pMbType[pCurLayer->iMbXyIndex]); return -1; } return 0; } void_t WelsChromaDcIdct (int16_t* pBlock) { int32_t iStride = 32; int32_t iXStride = 16; int32_t iStride1 = iXStride + iStride; int16_t* pBlk = pBlock; int32_t iA, iB, iC, iD, iE; iA = pBlk[0]; iB = pBlk[iXStride]; iC = pBlk[iStride]; iD = pBlk[iStride1]; iE = iA - iB; iA += iB; iB = iC - iD; iC += iD; pBlk[0] = (iA + iC) >> 1; pBlk[iXStride] = (iE + iB) >> 1; pBlk[iStride] = (iA - iC) >> 1; pBlk[iStride1] = (iE - iB) >> 1; } int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool_t bFirstSliceInLayer, PNalUnit pNalCur) { PDqLayer pCurLayer = pCtx->pCurDqLayer; PFmo pFmo = pCtx->pFmo; int32_t i, iRet; int32_t iNextMbXyIndex, iSliceIdc; PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer; PSliceHeaderExt pSliceHeaderExt = &pSlice->sSliceHeaderExt; PSliceHeader pSliceHeader = &pSliceHeaderExt->sSliceHeader; int32_t iMbX, iMbY; const int32_t kiCountNumMb = pSliceHeader->pSps->uiTotalMbCount; //need to be correct when fmo or multi slice PBitStringAux pBs = pCurLayer->pBitStringAux; int32_t iUsedBits = 0; PWelsDecMbCavlcFunc pDecMbCavlcFunc; pSlice->iTotalMbInCurSlice = 0; //initialize at the starting of slice decoding. if (P_SLICE == pSliceHeader->eSliceType) { pDecMbCavlcFunc = WelsDecodeMbCavlcPSlice; } else { //I_SLICE pDecMbCavlcFunc = WelsDecodeMbCavlcISlice; } if (pSliceHeader->pPps->bConstainedIntraPredFlag) { pCtx->pFillInfoCacheIntra4x4Func = WelsFillCacheConstrain1Intra4x4; pCtx->pParseIntra4x4ModeFunc = ParseIntra4x4ModeConstrain1; pCtx->pParseIntra16x16ModeFunc = ParseIntra16x16ModeConstrain1; } else { pCtx->pFillInfoCacheIntra4x4Func = WelsFillCacheConstrain0Intra4x4; pCtx->pParseIntra4x4ModeFunc = ParseIntra4x4ModeConstrain0; pCtx->pParseIntra16x16ModeFunc = ParseIntra16x16ModeConstrain0; } pCtx->eSliceType = pSliceHeader->eSliceType; if (pCurLayer->sLayerInfo.pPps->bEntropyCodingModeFlag == 1) { //CABAC encoding is unsupported yet! return -1; } iNextMbXyIndex = pSliceHeader->iFirstMbInSlice; if (iNextMbXyIndex >= kiCountNumMb) { WelsLog (pCtx, WELS_LOG_ERROR, "WelsDecodeSlice()::iFirstMbInSlice(%d) > pSps->kiTotalMb(%d). ERROR!!! resolution change....\n", iNextMbXyIndex, kiCountNumMb); pCtx->iErrorCode |= dsNoParamSets; return dsNoParamSets; } iMbX = iNextMbXyIndex % pCurLayer->iMbWidth; iMbY = iNextMbXyIndex / pCurLayer->iMbWidth; // error is introduced by multiple slices case, 11/23/2009 pSlice->iMbSkipRun = -1; iSliceIdc = (pSliceHeader->iFirstMbInSlice << 7) + pCurLayer->uiLayerDqId; pCurLayer->iMbX = iMbX; pCurLayer->iMbY = iMbY; pCurLayer->iMbXyIndex = iNextMbXyIndex; if (pSliceHeaderExt->bSliceSkipFlag == 1) { for (i = 0; i < (int32_t)pSliceHeaderExt->uiNumMbsInSlice; i++) { pCurLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc; pCurLayer->pResidualPredFlag[iNextMbXyIndex] = 1; if (pSliceHeaderExt->sSliceHeader.pPps->uiNumSliceGroups > 1) { iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex); } else { ++iNextMbXyIndex; } iMbX = iNextMbXyIndex % pCurLayer->iMbWidth; iMbY = iNextMbXyIndex % pCurLayer->iMbHeight; pCurLayer->iMbX = iMbX; pCurLayer->iMbY = iMbY; pCurLayer->iMbXyIndex = iNextMbXyIndex; } return 0; } do { pCurLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc; iRet = pDecMbCavlcFunc (pCtx, pNalCur); if (iRet != ERR_NONE) { return iRet; } ++pSlice->iTotalMbInCurSlice; if (pSliceHeader->pPps->uiNumSliceGroups > 1) { iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex); } else { ++iNextMbXyIndex; } if ((-1 == iNextMbXyIndex) || (iNextMbXyIndex >= kiCountNumMb)) { // slice group boundary or end of a frame break; } // check whether there is left bits to read next time in case multiple slices iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits); if (iUsedBits == pBs->iBits && 0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun) { // slice boundary break; } if (iUsedBits > pBs->iBits) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash. WelsLog (pCtx, WELS_LOG_WARNING, "WelsDecodeSlice()::::pBs incomplete, iUsedBits:%d > pBs->iBits:%d, MUST stop decoding.\n", iUsedBits, pBs->iBits); return -1; } iMbX = iNextMbXyIndex % pCurLayer->iMbWidth; iMbY = iNextMbXyIndex / pCurLayer->iMbWidth; pCurLayer->iMbX = iMbX; pCurLayer->iMbY = iMbY; pCurLayer->iMbXyIndex = iNextMbXyIndex; } while (1); return ERR_NONE; } int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { SVlcTable* pVlcTable = &pCtx->sVlcTable; PDqLayer pCurLayer = pCtx->pCurDqLayer; PBitStringAux pBs = pCurLayer->pBitStringAux; PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer; PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; SNeighAvail sNeighAvail; int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart; int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd; int32_t iMbX = pCurLayer->iMbX; int32_t iMbY = pCurLayer->iMbY; int32_t iMbXy = pCurLayer->iMbXyIndex; int32_t iNMbMode, i; uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0; FORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0; pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag; uiMbType = BsGetUe (pBs); if (uiMbType > 25) { return ERR_INFO_INVALID_MB_TYPE; } if (25 == uiMbType) { int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0]; int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1]; int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4; int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3; uint8_t* pDecY = pCurLayer->pCsData[0] + iOffsetL; uint8_t* pDecU = pCurLayer->pCsData[1] + iOffsetC; uint8_t* pDecV = pCurLayer->pCsData[2] + iOffsetC; uint8_t* pTmpBsBuf; int32_t i; int32_t iCopySizeY = (sizeof (uint8_t) << 4); int32_t iCopySizeUV = (sizeof (uint8_t) << 3); int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2; pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM; //step 1: locating bit-stream pointer [must align into integer byte] pBs->pCurBuf -= iIndex; //step 2: copy pixel from bit-stream into fdec [reconstruction] pTmpBsBuf = pBs->pCurBuf; for (i = 0; i < 16; i++) { //luma memcpy (pDecY , pTmpBsBuf, iCopySizeY); pDecY += iDecStrideL; pTmpBsBuf += 16; } for (i = 0; i < 8; i++) { //cb memcpy (pDecU, pTmpBsBuf, iCopySizeUV); pDecU += iDecStrideC; pTmpBsBuf += 8; } for (i = 0; i < 8; i++) { //cr memcpy (pDecV, pTmpBsBuf, iCopySizeUV); pDecV += iDecStrideC; pTmpBsBuf += 8; } pBs->pCurBuf += 384; InitReadBits (pBs); //step 3: update QP and pNonZeroCount pCurLayer->pLumaQp[iMbXy] = 0; pCurLayer->pChromaQp[iMbXy] = 0; memset (pCurLayer->pNzc[iMbXy], 16, sizeof (pCurLayer->pNzc[iMbXy])); //JVT-x201wcm1.doc, page229, 2009.10.23 return 0; } else if (0 == uiMbType) { //reference to JM FORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4; pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer); if (pCtx->pParseIntra4x4ModeFunc (&sNeighAvail, pIntraPredMode, pBs, pCurLayer)) { return -1; } //uiCbp uiCbp = BsGetUe (pBs); //G.9.1 Alternative parsing process for coded pBlock pattern if (uiCbp > 47) return ERR_INFO_INVALID_CBP; uiCbp = g_kuiIntra4x4CbpTable[uiCbp]; pCurLayer->pCbp[iMbXy] = uiCbp; uiCbpC = uiCbp >> 4; uiCbpL = uiCbp & 15; } else { //I_PCM exclude, we can ignore it pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16; pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; uiCbpC = pCurLayer->pCbp[iMbXy] >> 4; uiCbpL = pCurLayer->pCbp[iMbXy] & 15; WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer); if (pCtx->pParseIntra16x16ModeFunc (&sNeighAvail, pBs, pCurLayer)) { return -1; } } iNMbMode = BASE_MB; memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0])); ST32 (&pCurLayer->pNzc[iMbXy][0], 0); ST32 (&pCurLayer->pNzc[iMbXy][4], 0); ST32 (&pCurLayer->pNzc[iMbXy][8], 0); ST32 (&pCurLayer->pNzc[iMbXy][12], 0); ST32 (&pCurLayer->pNzc[iMbXy][16], 0); ST32 (&pCurLayer->pNzc[iMbXy][20], 0); if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRA4x4 (pCurLayer->pMbType[iMbXy])) { pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQp[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] + pSliceHeader->pPps->iChromaQpIndexOffset, 0, 51)]; } if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) { int32_t iQpDelta, iId8x8, iId4x4; iQpDelta = BsGetSe (pBs); if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range return ERR_INFO_INVALID_QP; } pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp + iQpDelta; //update iLastMbQp //refer to JVT-X201wcm1.doc equation(7-35) if ((unsigned) (pCurLayer->pLumaQp[iMbXy]) > 51) { if (pCurLayer->pLumaQp[iMbXy] < 0) { pCurLayer->pLumaQp[iMbXy] += 52; } else { pCurLayer->pLumaQp[iMbXy] -= 52; } } //QP should be in the range of [0, 51] if (pCurLayer->pLumaQp[iMbXy] < 0 || pCurLayer->pLumaQp[iMbXy] > 51) { return ERR_INFO_INVALID_QP; } pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy]; pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQp[WELS_CLIP3 (pSlice->iLastMbQp + pSliceHeader->pPps->iChromaQpIndexOffset, 0, 51)]; BsStartCavlc (pBs); if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) { //step1: Luma DC if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], iNMbMode, pCurLayer->pLumaQp[iMbXy], pCtx)) { return -1;//abnormal } //step2: Luma AC if (uiCbpL) { for (i = 0; i < 16; i++) { if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4), iNMbMode, pCurLayer->pLumaQp[iMbXy], pCtx)) { return -1;//abnormal } } ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); } else { //pNonZeroCount = 0 ST32 (&pCurLayer->pNzc[iMbXy][0], 0); ST32 (&pCurLayer->pNzc[iMbXy][4], 0); ST32 (&pCurLayer->pNzc[iMbXy][8], 0); ST32 (&pCurLayer->pNzc[iMbXy][12], 0); } } else { //non-MB_TYPE_INTRA16x16 for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { if (uiCbpL & (1 << iId8x8)) { int32_t iIndex = (iId8x8 << 2); for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { //Luma (DC and AC decoding together) if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), iNMbMode, pCurLayer->pLumaQp[iMbXy], pCtx)) { return -1;//abnormal } iIndex++; } } else { ST16 (&pNonZeroCount[g_kuiCacheNzcScanIdx[ (iId8x8 << 2)]], 0); ST16 (&pNonZeroCount[g_kuiCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0); } } ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); } //chroma //step1: DC if (1 == uiCbpC || 2 == uiCbpC) { for (i = 0; i < 2; i++) { //Cb Cr if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, CHROMA_DC, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), iNMbMode, pCurLayer->pChromaQp[iMbXy], pCtx)) { return -1;//abnormal } } } //step2: AC if (2 == uiCbpC) { for (i = 0; i < 2; i++) { //Cb Cr int32_t iIndex = 16 + (i << 2); for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), CHROMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), iNMbMode, pCurLayer->pChromaQp[iMbXy], pCtx)) { return -1;//abnormal } iIndex++; } } ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1])); ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2])); ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4])); ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5])); } else { ST16 (&pCurLayer->pNzc[iMbXy][16], 0); ST16 (&pCurLayer->pNzc[iMbXy][20], 0); ST16 (&pCurLayer->pNzc[iMbXy][18], 0); ST16 (&pCurLayer->pNzc[iMbXy][22], 0); } BsEndCavlc (pBs); } else { ST32 (&pCurLayer->pNzc[iMbXy][0], 0); ST32 (&pCurLayer->pNzc[iMbXy][4], 0); ST32 (&pCurLayer->pNzc[iMbXy][8], 0); ST32 (&pCurLayer->pNzc[iMbXy][12], 0); ST32 (&pCurLayer->pNzc[iMbXy][16], 0); ST32 (&pCurLayer->pNzc[iMbXy][20], 0); } return 0; } int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur) { PDqLayer pCurLayer = pCtx->pCurDqLayer; PBitStringAux pBs = pCurLayer->pBitStringAux; PSliceHeaderExt pSliceHeaderExt = &pCurLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt; int32_t iBaseModeFlag; int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15 if (pSliceHeaderExt->bAdaptiveBaseModeFlag == 1) { iBaseModeFlag = BsGetOneBit (pBs); } else { iBaseModeFlag = pSliceHeaderExt->bDefaultBaseModeFlag; } if (!iBaseModeFlag) { iRet = WelsActualDecodeMbCavlcISlice (pCtx); } else { WelsLog (pCtx, WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.\n", iBaseModeFlag); return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP); } if (iRet) { //occur error when parsing, MUST STOP decoding return iRet; } return 0; } int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { SVlcTable* pVlcTable = &pCtx->sVlcTable; PDqLayer pCurLayer = pCtx->pCurDqLayer; PBitStringAux pBs = pCurLayer->pBitStringAux; PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer; PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; SNeighAvail sNeighAvail; int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart; int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd; int32_t iMbX = pCurLayer->iMbX; int32_t iMbY = pCurLayer->iMbY; int32_t iMbXy = pCurLayer->iMbXyIndex; int32_t iNMbMode, i; uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0; FORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23 uiMbType = BsGetUe (pBs); if (uiMbType < 5) { //inter MB type int16_t iMotionVector[LIST_A][30][MV_A]; int8_t iRefIndex[LIST_A][30]; pCurLayer->pMbType[iMbXy] = g_ksInterMbTypeInfo[uiMbType].iType; WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurLayer); if (ParseInterInfo (pCtx, iMotionVector, iRefIndex, pBs)) { return -1;//abnormal } if (pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag == 1) { pCurLayer->pResidualPredFlag[iMbXy] = BsGetOneBit (pBs); } else { pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag; } if (pCurLayer->pResidualPredFlag[iMbXy] == 0) { iNMbMode = BASE_MB; pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0; } else { WelsLog (pCtx, WELS_LOG_WARNING, "residual_pred_flag = 1 not supported.\n"); return -1; } } else { //intra MB type uiMbType -= 5; if (uiMbType > 25) { return ERR_INFO_INVALID_MB_TYPE; } if (25 == uiMbType) { int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0]; int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1]; int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4; int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3; uint8_t* pDecY = pCurLayer->pCsData[0] + iOffsetL; uint8_t* pDecU = pCurLayer->pCsData[1] + iOffsetC; uint8_t* pDecV = pCurLayer->pCsData[2] + iOffsetC; uint8_t* pTmpBsBuf; int32_t i; int32_t iCopySizeY = (sizeof (uint8_t) << 4); int32_t iCopySizeUV = (sizeof (uint8_t) << 3); int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2; pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM; //step 1: locating bit-stream pointer [must align into integer byte] pBs->pCurBuf -= iIndex; //step 2: copy pixel from bit-stream into fdec [reconstruction] pTmpBsBuf = pBs->pCurBuf; for (i = 0; i < 16; i++) { //luma memcpy (pDecY , pTmpBsBuf, iCopySizeY); pDecY += iDecStrideL; pTmpBsBuf += 16; } for (i = 0; i < 8; i++) { //cb memcpy (pDecU, pTmpBsBuf, iCopySizeUV); pDecU += iDecStrideC; pTmpBsBuf += 8; } for (i = 0; i < 8; i++) { //cr memcpy (pDecV, pTmpBsBuf, iCopySizeUV); pDecV += iDecStrideC; pTmpBsBuf += 8; } pBs->pCurBuf += 384; InitReadBits (pBs); //step 3: update QP and pNonZeroCount pCurLayer->pLumaQp[iMbXy] = 0; pCurLayer->pChromaQp[iMbXy] = 0; ST32 (&pCurLayer->pNzc[iMbXy][0], 0); ST32 (&pCurLayer->pNzc[iMbXy][4], 0); ST32 (&pCurLayer->pNzc[iMbXy][8], 0); ST32 (&pCurLayer->pNzc[iMbXy][12], 0); return 0; } else { if (0 == uiMbType) { FORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4; pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer); if (pCtx->pParseIntra4x4ModeFunc (&sNeighAvail, pIntraPredMode, pBs, pCurLayer)) { return -1; } iNMbMode = BASE_MB; } else { //I_PCM exclude, we can ignore it pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16; pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; uiCbpC = pCurLayer->pCbp[iMbXy] >> 4; uiCbpL = pCurLayer->pCbp[iMbXy] & 15; WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer); if (pCtx->pParseIntra16x16ModeFunc (&sNeighAvail, pBs, pCurLayer)) { return -1; } iNMbMode = BASE_MB; } } } if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) { uiCbp = BsGetUe (pBs); { if (uiCbp > 47) return ERR_INFO_INVALID_CBP; if (MB_TYPE_INTRA4x4 == pCurLayer->pMbType[iMbXy]) { uiCbp = g_kuiIntra4x4CbpTable[uiCbp]; } else //inter uiCbp = g_kuiInterCbpTable[uiCbp]; } pCurLayer->pCbp[iMbXy] = uiCbp; uiCbpC = pCurLayer->pCbp[iMbXy] >> 4; uiCbpL = pCurLayer->pCbp[iMbXy] & 15; } if (iNMbMode == BASE_MB) { pCtx->sBlockFunc.pWelsBlockZero16x16Func (pCurLayer->pScaledTCoeff[iMbXy], 16); pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256, 8); pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256 + 64, 8); ST32 (&pCurLayer->pNzc[iMbXy][0], 0); ST32 (&pCurLayer->pNzc[iMbXy][4], 0); ST32 (&pCurLayer->pNzc[iMbXy][8], 0); ST32 (&pCurLayer->pNzc[iMbXy][12], 0); ST32 (&pCurLayer->pNzc[iMbXy][20], 0); if (pCurLayer->pCbp[iMbXy] == 0 && !IS_INTRA16x16 (pCurLayer->pMbType[iMbXy]) && !IS_I_BL (pCurLayer->pMbType[iMbXy])) { pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQp[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] + pSliceHeader->pPps->iChromaQpIndexOffset, 0, 51)]; } } if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) { int32_t iQpDelta, iId8x8, iId4x4; iQpDelta = BsGetSe (pBs); if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range return ERR_INFO_INVALID_QP; } pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp + iQpDelta; //update iLastMbQp //refer to JVT-X201wcm1.doc equation(7-35) if ((unsigned) (pCurLayer->pLumaQp[iMbXy]) > 51) { if (pCurLayer->pLumaQp[iMbXy] < 0) { pCurLayer->pLumaQp[iMbXy] += 52; } else { pCurLayer->pLumaQp[iMbXy] -= 52; } } //QP should be in the range of [0, 51] if (pCurLayer->pLumaQp[iMbXy] < 0 || pCurLayer->pLumaQp[iMbXy] > 51) { return ERR_INFO_INVALID_QP; } pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy]; pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQp[WELS_CLIP3 (pSlice->iLastMbQp + pSliceHeader->pPps->iChromaQpIndexOffset, 0, 51)]; BsStartCavlc (pBs); if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) { //step1: Luma DC if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], iNMbMode, pCurLayer->pLumaQp[iMbXy], pCtx)) { return -1;//abnormal } //step2: Luma AC if (uiCbpL) { for (i = 0; i < 16; i++) { if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4), iNMbMode, pCurLayer->pLumaQp[iMbXy], pCtx)) { return -1;//abnormal } } ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); } else { //pNonZeroCount = 0 ST32 (&pCurLayer->pNzc[iMbXy][0], 0); ST32 (&pCurLayer->pNzc[iMbXy][4], 0); ST32 (&pCurLayer->pNzc[iMbXy][8], 0); ST32 (&pCurLayer->pNzc[iMbXy][12], 0); } } else { //non-MB_TYPE_INTRA16x16 for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { if (uiCbpL & (1 << iId8x8)) { int32_t iIndex = (iId8x8 << 2); for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { //Luma (DC and AC decoding together) if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), iNMbMode, pCurLayer->pLumaQp[iMbXy], pCtx)) { return -1;//abnormal } iIndex++; } } else { ST16 (&pNonZeroCount[g_kuiCacheNzcScanIdx[iId8x8 << 2]], 0); ST16 (&pNonZeroCount[g_kuiCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0); } } ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); } //chroma //step1: DC if (1 == uiCbpC || 2 == uiCbpC) { for (i = 0; i < 2; i++) { //Cb Cr if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, CHROMA_DC, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), iNMbMode, pCurLayer->pChromaQp[iMbXy], pCtx)) { return -1;//abnormal } } } else { } //step2: AC if (2 == uiCbpC) { for (i = 0; i < 2; i++) { //Cb Cr int32_t iIndex = 16 + (i << 2); for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), CHROMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), iNMbMode, pCurLayer->pChromaQp[iMbXy], pCtx)) { return -1;//abnormal } iIndex++; } } ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1])); ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2])); ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4])); ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5])); } else { ST32 (&pCurLayer->pNzc[iMbXy][16], 0); ST32 (&pCurLayer->pNzc[iMbXy][20], 0); } BsEndCavlc (pBs); } else { ST32 (&pCurLayer->pNzc[iMbXy][0], 0); ST32 (&pCurLayer->pNzc[iMbXy][4], 0); ST32 (&pCurLayer->pNzc[iMbXy][8], 0); ST32 (&pCurLayer->pNzc[iMbXy][12], 0); ST32 (&pCurLayer->pNzc[iMbXy][16], 0); ST32 (&pCurLayer->pNzc[iMbXy][20], 0); } return 0; } int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur) { PDqLayer pCurLayer = pCtx->pCurDqLayer; PBitStringAux pBs = pCurLayer->pBitStringAux; PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer; PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; int32_t iMbXy = pCurLayer->iMbXyIndex; int32_t iBaseModeFlag, i; int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15 if (-1 == pSlice->iMbSkipRun) { pSlice->iMbSkipRun = BsGetUe (pBs); if (-1 == pSlice->iMbSkipRun) { return -1; } } if (pSlice->iMbSkipRun--) { int16_t iMv[2] = {0}; pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP; ST32 (&pCurLayer->pNzc[iMbXy][0], 0); ST32 (&pCurLayer->pNzc[iMbXy][4], 0); ST32 (&pCurLayer->pNzc[iMbXy][8], 0); ST32 (&pCurLayer->pNzc[iMbXy][12], 0); ST32 (&pCurLayer->pNzc[iMbXy][16], 0); ST32 (&pCurLayer->pNzc[iMbXy][20], 0); pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0; memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16); //predict iMv PredPSkipMvFromNeighbor (pCurLayer, iMv); for (i = 0; i < 16; i++) { ST32 (pCurLayer->pMv[0][iMbXy][i], * (uint32_t*)iMv); } if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) { memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t)); } //reset rS if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag || (pNalCur->sNalHeaderExt.uiQualityId == 0 && pNalCur->sNalHeaderExt.uiDependencyId == 0)) { pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQp[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] + pSliceHeader->pPps->iChromaQpIndexOffset, 0, 51)]; } pCurLayer->pCbp[iMbXy] = 0; return 0; } if (pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag == 1) { iBaseModeFlag = BsGetOneBit (pBs); } else { iBaseModeFlag = pSlice->sSliceHeaderExt.bDefaultBaseModeFlag; } if (!iBaseModeFlag) { iRet = WelsActualDecodeMbCavlcPSlice (pCtx); } else { WelsLog (pCtx, WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.\n", iBaseModeFlag); return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP); } if (iRet) { //occur error when parsing, MUST STOP decoding return iRet; } return 0; } void_t WelsBlockInit (int16_t* pBlock, int32_t iWidth, int32_t iHeight, int32_t iStride, uint8_t uiVal) { int32_t i; int16_t* pDst = pBlock; for (i = 0; i < iHeight; i++) { memset (pDst, uiVal, iWidth * sizeof (int16_t)); pDst += iStride; } } void_t WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) { pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_c; pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_c; pFunc->pWelsSetNonZeroCountFunc = SetNonZeroCount_c; #ifdef X86_ASM if (iCpu & WELS_CPU_SSE2) { pFunc->pWelsBlockZero16x16Func = WelsResBlockZero16x16_sse2; pFunc->pWelsBlockZero8x8Func = WelsResBlockZero8x8_sse2; } #endif } void_t WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride) { WelsBlockInit (pBlock, 16, 16, iStride, 0); } void_t WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride) { WelsBlockInit (pBlock, 8, 8, iStride, 0); } void_t SetNonZeroCount_c (int16_t* pBlock, int8_t* pNonZeroCount) { int32_t i; int32_t iIndex; for (i = 0; i < 24; i++) { iIndex = g_kuiMbNonZeroCountIdx[i]; pNonZeroCount[iIndex] = !!pNonZeroCount[iIndex]; } } } // namespace WelsDec