ref: 1699775cbbb131a2c8b81016e47a0c204428a33e
parent: 9a602cac1b52b22e4e1e2e925247263ad554b0da
author: zhuling13 <zhulingwei2012@gmail.com>
date: Tue Dec 9 19:59:43 EST 2014
support scaling list astyle change bugs fix security issue fix the issue pps sps order change according to wayne's suggestion change according to wayne's
--- /dev/null
+++ b/\
@@ -1,0 +1,1731 @@
+/*!
+ * \copy
+ * Copyright (c) 2008-2013, Cisco Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Abstract
+ * current slice decoding
+ *
+ * History
+ * 07/10/2008 Created
+ * 08/09/2013 Modified
+ *
+ *****************************************************************************/
+
+
+#include "deblocking.h"
+
+#include "decode_slice.h"
+
+#include "parse_mb_syn_cavlc.h"
+#include "parse_mb_syn_cabac.h"
+#include "rec_mb.h"
+#include "mv_pred.h"
+
+#include "cpu_core.h"
+
+namespace WelsDec {
+
+int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) {
+ PDqLayer pCurLayer = pCtx->pCurDqLayer;
+ PSlice pCurSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ PSliceHeader pSliceHeader = &pCurSlice->sSliceHeaderExt.sSliceHeader;
+
+ int32_t iTotalMbTargetLayer = pSliceHeader->pSps->uiTotalMbCount;
+
+ int32_t iCurLayerWidth = pCurLayer->iMbWidth << 4;
+ int32_t iCurLayerHeight = pCurLayer->iMbHeight << 4;
+
+ int32_t iNextMbXyIndex = 0;
+ PFmo pFmo = pCtx->pFmo;
+
+ int32_t iTotalNumMb = pCurSlice->iTotalMbInCurSlice;
+ int32_t iCountNumMb = 0;
+ PDeblockingFilterMbFunc pDeblockMb;
+
+ if (!pCtx->bAvcBasedFlag && iCurLayerWidth != pCtx->iCurSeqIntervalMaxPicWidth) {
+ return -1;
+ }
+
+ iNextMbXyIndex = pSliceHeader->iFirstMbInSlice;
+ pCurLayer->iMbX = iNextMbXyIndex % pCurLayer->iMbWidth;
+ pCurLayer->iMbY = iNextMbXyIndex / pCurLayer->iMbWidth;
+ pCurLayer->iMbXyIndex = iNextMbXyIndex;
+
+ if (0 == iNextMbXyIndex) {
+ pCurLayer->pDec->iSpsId = pSliceHeader->iSpsId;
+ pCurLayer->pDec->iPpsId = pSliceHeader->iPpsId;
+
+ pCurLayer->pDec->uiQualityId = pCurLayer->sLayerInfo.sNalHeaderExt.uiQualityId;
+ }
+
+ do {
+ if (iCountNumMb >= iTotalNumMb) {
+ break;
+ }
+
+ if (WelsTargetMbConstruction (pCtx)) {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+ "WelsTargetSliceConstruction():::MB(%d, %d) construction error. pCurSlice_type:%d",
+ pCurLayer->iMbX, pCurLayer->iMbY, pCurSlice->eSliceType);
+
+ return -1;
+ }
+
+ ++iCountNumMb;
+ if (!pCurLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex]) { //already con-ed, overwrite
+ pCurLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex] = true;
+ ++pCtx->iTotalNumMbRec;
+ }
+
+ if (pCtx->iTotalNumMbRec > iTotalMbTargetLayer) {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+ "WelsTargetSliceConstruction():::pCtx->iTotalNumMbRec:%d, iTotalMbTargetLayer:%d",
+ pCtx->iTotalNumMbRec, iTotalMbTargetLayer);
+
+ return -1;
+ }
+
+ if (pSliceHeader->pPps->uiNumSliceGroups > 1) {
+ iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
+ } else {
+ ++iNextMbXyIndex;
+ }
+ if (-1 == iNextMbXyIndex || iNextMbXyIndex >= iTotalMbTargetLayer) { // slice group boundary or end of a frame
+ break;
+ }
+ pCurLayer->iMbX = iNextMbXyIndex % pCurLayer->iMbWidth;
+ pCurLayer->iMbY = iNextMbXyIndex / pCurLayer->iMbWidth;
+ pCurLayer->iMbXyIndex = iNextMbXyIndex;
+ } while (1);
+
+ pCtx->pDec->iWidthInPixel = iCurLayerWidth;
+ pCtx->pDec->iHeightInPixel = iCurLayerHeight;
+
+ if ((pCurSlice->eSliceType != I_SLICE) && (pCurSlice->eSliceType != P_SLICE))
+ return 0;
+
+ pDeblockMb = WelsDeblockingMb;
+
+ if (1 == pSliceHeader->uiDisableDeblockingFilterIdc) {
+ return 0;//NO_SUPPORTED_FILTER_IDX
+ } else {
+ WelsDeblockingFilterSlice (pCtx, pDeblockMb);
+
+ }
+ // any other filter_idc not supported here, 7/22/2010
+
+ return 0;
+}
+
+int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer,
+ uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iStrideL, int32_t iStrideC) {
+ int32_t iMbXy = pCurLayer->iMbXyIndex;
+ int32_t i, iIndex, iOffset;
+
+ WelsChromaDcIdct (pCurLayer->pScaledTCoeff[iMbXy] + 256); // 256 = 16*16
+ WelsChromaDcIdct (pCurLayer->pScaledTCoeff[iMbXy] + 320); // 320 = 16*16 + 16*4
+
+ for (i = 0; i < 16; i++) { //luma
+ iIndex = g_kuiMbCountScan4Idx[i];
+ if (pCurLayer->pNzc[iMbXy][iIndex]) {
+ iOffset = ((iIndex >> 2) << 2) * iStrideL + ((iIndex % 4) << 2);
+ pCtx->pIdctResAddPredFunc (pDstY + iOffset, iStrideL, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4));
+ }
+ }
+
+ for (i = 0; i < 4; i++) { //chroma
+ iIndex = g_kuiMbCountScan4Idx[i + 16]; //Cb
+ if (pCurLayer->pNzc[iMbXy][iIndex] || * (pCurLayer->pScaledTCoeff[iMbXy] + ((i + 16) << 4))) {
+ iOffset = (((iIndex - 16) >> 2) << 2) * iStrideC + (((iIndex - 16) % 4) << 2);
+ pCtx->pIdctResAddPredFunc (pDstU + iOffset, iStrideC, pCurLayer->pScaledTCoeff[iMbXy] + ((i + 16) << 4));
+ }
+
+ iIndex = g_kuiMbCountScan4Idx[i + 20]; //Cr
+ if (pCurLayer->pNzc[iMbXy][iIndex] || * (pCurLayer->pScaledTCoeff[iMbXy] + ((i + 20) << 4))) {
+ iOffset = (((iIndex - 18) >> 2) << 2) * iStrideC + (((iIndex - 18) % 4) << 2);
+ pCtx->pIdctResAddPredFunc (pDstV + iOffset, iStrideC , pCurLayer->pScaledTCoeff[iMbXy] + ((i + 20) << 4));
+ }
+ }
+
+ return 0;
+}
+int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) {
+ int32_t iMbX = pCurLayer->iMbX;
+ int32_t iMbY = pCurLayer->iMbY;
+ uint8_t* pDstY, *pDstCb, *pDstCr;
+
+ int32_t iLumaStride = pCtx->pDec->iLinesize[0];
+ int32_t iChromaStride = pCtx->pDec->iLinesize[1];
+
+ pDstY = pCurLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
+ pDstCb = pCurLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
+ pDstCr = pCurLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
+
+ GetInterPred (pDstY, pDstCb, pDstCr, pCtx);
+ WelsMbInterSampleConstruction (pCtx, pCurLayer, pDstY, pDstCb, pDstCr, iLumaStride, iChromaStride);
+
+ pCtx->sBlockFunc.pWelsSetNonZeroCountFunc (
+ pCurLayer->pNzc[pCurLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti!
+ return 0;
+}
+
+void WelsLumaDcDequantIdct (int16_t* pBlock, int32_t iQp,PWelsDecoderContext pCtx) {
+ const int32_t kiQMul =pCtx->bUseScalingList? pCtx->pDequant_coeff4x4[0][iQp][0]>>4 :g_kuiDequantCoeff[iQp][0];
+#define STRIDE 16
+ int32_t i;
+ int32_t iTemp[16]; //FIXME check if this is a good idea
+ int16_t* pBlk = pBlock;
+ static const int32_t kiXOffset[4] = {0, STRIDE, STRIDE << 2, 5 * STRIDE};
+ static const int32_t kiYOffset[4] = {0, STRIDE << 1, STRIDE << 3, 10 * STRIDE};
+
+ for (i = 0; i < 4; i++) {
+ const int32_t kiOffset = kiYOffset[i];
+ const int32_t kiX1 = kiOffset + kiXOffset[2];
+ const int32_t kiX2 = STRIDE + kiOffset;
+ const int32_t kiX3 = kiOffset + kiXOffset[3];
+ const int32_t kiI4 = i << 2; // 4*i
+ const int32_t kiZ0 = pBlk[kiOffset] + pBlk[kiX1];
+ const int32_t kiZ1 = pBlk[kiOffset] - pBlk[kiX1];
+ const int32_t kiZ2 = pBlk[kiX2] - pBlk[kiX3];
+ const int32_t kiZ3 = pBlk[kiX2] + pBlk[kiX3];
+
+ iTemp[kiI4] = kiZ0 + kiZ3;
+ iTemp[1 + kiI4] = kiZ1 + kiZ2;
+ iTemp[2 + kiI4] = kiZ1 - kiZ2;
+ iTemp[3 + kiI4] = kiZ0 - kiZ3;
+ }
+
+ for (i = 0; i < 4; i++) {
+ const int32_t kiOffset = kiXOffset[i];
+ const int32_t kiI4 = 4 + i;
+ const int32_t kiZ0 = iTemp[i] + iTemp[4 + kiI4];
+ const int32_t kiZ1 = iTemp[i] - iTemp[4 + kiI4];
+ const int32_t kiZ2 = iTemp[kiI4] - iTemp[8 + kiI4];
+ const int32_t kiZ3 = iTemp[kiI4] + iTemp[8 + kiI4];
+
+ pBlk[kiOffset] = ((kiZ0 + kiZ3) * kiQMul + 2) >> 2; //FIXME think about merging this into decode_resdual
+ pBlk[kiYOffset[1] + kiOffset] = ((kiZ1 + kiZ2) * kiQMul + 2) >> 2;
+ pBlk[kiYOffset[2] + kiOffset] = ((kiZ1 - kiZ2) * kiQMul + 2) >> 2;
+ pBlk[kiYOffset[3] + kiOffset] = ((kiZ0 - kiZ3) * kiQMul + 2) >> 2;
+ }
+#undef STRIDE
+}
+
+int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer, bool bOutput) {
+//seems IPCM should not enter this path
+ int32_t iMbXy = pCurLayer->iMbXyIndex;
+
+ WelsFillRecNeededMbInfo (pCtx, bOutput, pCurLayer);
+
+ if (IS_INTRA16x16 (pCurLayer->pMbType[iMbXy])) {
+ WelsLumaDcDequantIdct (pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy]);
+ RecI16x16Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
+
+ return 0;
+ }
+
+ if (IS_INTRA4x4 (pCurLayer->pMbType[iMbXy]))
+ RecI4x4Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
+
+ return 0;
+}
+
+int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) {
+ int32_t iMbX = pCurLayer->iMbX;
+ int32_t iMbY = pCurLayer->iMbY;
+ uint8_t* pDstY, *pDstCb, *pDstCr;
+
+ int32_t iLumaStride = pCtx->pDec->iLinesize[0];
+ int32_t iChromaStride = pCtx->pDec->iLinesize[1];
+
+ pDstY = pCurLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
+ pDstCb = pCurLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
+ pDstCr = pCurLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
+
+ GetInterPred (pDstY, pDstCb, pDstCr, pCtx);
+
+ return 0;
+}
+
+int32_t WelsTargetMbConstruction (PWelsDecoderContext pCtx) {
+ PDqLayer pCurLayer = pCtx->pCurDqLayer;
+ if (MB_TYPE_INTRA_PCM == pCurLayer->pMbType[pCurLayer->iMbXyIndex]) {
+ //already decoded and reconstructed when parsing
+ return 0;
+ } else if (IS_INTRA (pCurLayer->pMbType[pCurLayer->iMbXyIndex])) {
+ WelsMbIntraPredictionConstruction (pCtx, pCurLayer, 1);
+ } else if (IS_INTER (pCurLayer->pMbType[pCurLayer->iMbXyIndex])) { //InterMB
+ if (0 == pCurLayer->pCbp[pCurLayer->iMbXyIndex]) { //uiCbp==0 include SKIP
+ WelsMbInterPrediction (pCtx, pCurLayer);
+ } else {
+ WelsMbInterConstruction (pCtx, pCurLayer);
+ }
+ } else {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsTargetMbConstruction():::::Unknown MB type: %d",
+ pCurLayer->pMbType[pCurLayer->iMbXyIndex]);
+ return -1;
+ }
+
+ return 0;
+}
+
+void WelsChromaDcIdct (int16_t* pBlock) {
+ int32_t iStride = 32;
+ int32_t iXStride = 16;
+ int32_t iStride1 = iXStride + iStride;
+ int16_t* pBlk = pBlock;
+ int32_t iA, iB, iC, iD, iE;
+
+ iA = pBlk[0];
+ iB = pBlk[iXStride];
+ iC = pBlk[iStride];
+ iD = pBlk[iStride1];
+
+ iE = iA - iB;
+ iA += iB;
+ iB = iC - iD;
+ iC += iD;
+
+ pBlk[0] = (iA + iC) >> 1;
+ pBlk[iXStride] = (iE + iB) >> 1;
+ pBlk[iStride] = (iA - iC) >> 1;
+ pBlk[iStride1] = (iE - iB) >> 1;
+}
+
+void WelsMap4x4NeighToSampleNormal (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
+ if (pNeighAvail->iLeftAvail) { //left
+ pSampleAvail[ 6] =
+ pSampleAvail[12] =
+ pSampleAvail[18] =
+ pSampleAvail[24] = 1;
+ }
+ if (pNeighAvail->iLeftTopAvail) { //top_left
+ pSampleAvail[0] = 1;
+ }
+ if (pNeighAvail->iTopAvail) { //top
+ pSampleAvail[1] =
+ pSampleAvail[2] =
+ pSampleAvail[3] =
+ pSampleAvail[4] = 1;
+ }
+ if (pNeighAvail->iRightTopAvail) { //top_right
+ pSampleAvail[5] = 1;
+ }
+}
+
+void WelsMap4x4NeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
+ if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) { //left
+ pSampleAvail[ 6] =
+ pSampleAvail[12] =
+ pSampleAvail[18] =
+ pSampleAvail[24] = 1;
+ }
+ if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) { //top_left
+ pSampleAvail[0] = 1;
+ }
+ if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) { //top
+ pSampleAvail[1] =
+ pSampleAvail[2] =
+ pSampleAvail[3] =
+ pSampleAvail[4] = 1;
+ }
+ if (pNeighAvail->iRightTopAvail && IS_INTRA (pNeighAvail->iRightTopType)) { //top_right
+ pSampleAvail[5] = 1;
+ }
+}
+void WelsMap16x16NeighToSampleNormal (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail) {
+ if (pNeighAvail->iLeftAvail) {
+ *pSampleAvail = (1 << 2);
+ }
+ if (pNeighAvail->iLeftTopAvail) {
+ *pSampleAvail |= (1 << 1);
+ }
+ if (pNeighAvail->iTopAvail) {
+ *pSampleAvail |= 1;
+ }
+}
+
+void WelsMap16x16NeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail) {
+ if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) {
+ *pSampleAvail = (1 << 2);
+ }
+ if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) {
+ *pSampleAvail |= (1 << 1);
+ }
+ if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) {
+ *pSampleAvail |= 1;
+ }
+}
+
+int32_t ParseIntra4x4Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode,
+ PBitStringAux pBs,
+ PDqLayer pCurDqLayer) {
+ int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+ int32_t iFinalMode, i;
+
+ uint8_t uiNeighAvail = 0;
+ uint32_t uiCode;
+ int32_t iCode;
+ pCtx->pMap4x4NeighToSampleFunc (pNeighAvail, iSampleAvail);
+ uiNeighAvail = (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
+ for (i = 0; i < 16; i++) {
+ int32_t iPrevIntra4x4PredMode = 0;
+ if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+ WELS_READ_VERIFY (ParseIntraPredModeLumaCabac (pCtx, iCode));
+ iPrevIntra4x4PredMode = iCode;
+ } else {
+ WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
+ iPrevIntra4x4PredMode = uiCode;
+ }
+ const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i);
+
+ int8_t iBestMode;
+ if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+ if (iPrevIntra4x4PredMode == -1)
+ iBestMode = kiPredMode;
+ else
+ iBestMode = iPrevIntra4x4PredMode + (iPrevIntra4x4PredMode >= kiPredMode);
+ } else {
+ if (iPrevIntra4x4PredMode) {
+ iBestMode = kiPredMode;
+ } else {
+ WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode));
+ iBestMode = uiCode + ((int32_t) uiCode >= kiPredMode);
+ }
+ }
+
+ iFinalMode = CheckIntra4x4PredMode (&iSampleAvail[0], &iBestMode, i);
+ if (iFinalMode == ERR_INVALID_INTRA4X4_MODE) {
+ return ERR_INFO_INVALID_I4x4_PRED_MODE;
+ }
+
+ pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[i]] = iFinalMode;
+
+ pIntraPredMode[g_kuiScan8[i]] = iBestMode;
+
+ iSampleAvail[g_kuiCache30ScanIdx[i]] = 1;
+ }
+ ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4]));
+ pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1];
+ pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2];
+ pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3];
+ if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+ WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
+ if (iCode > MAX_PRED_MODE_ID_CHROMA) {
+ return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
+ }
+ pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
+ } else {
+ WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
+ if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
+ return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
+ }
+ pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
+ }
+
+ if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
+ || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
+ return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
+ }
+
+ return ERR_NONE;
+}
+
+int32_t ParseIntra16x16Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, PBitStringAux pBs,
+ PDqLayer pCurDqLayer) {
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+ uint8_t uiNeighAvail = 0; //0x07 = 0 1 1 1, means left, top-left, top avail or not. (1: avail, 0: unavail)
+ uint32_t uiCode;
+ int32_t iCode;
+ pCtx->pMap16x16NeighToSampleFunc (pNeighAvail, &uiNeighAvail);
+
+ if (CheckIntra16x16PredMode (uiNeighAvail,
+ &pCurDqLayer->pIntraPredMode[iMbXy][7])) { //invalid iPredMode, must stop decoding
+ return ERR_INFO_INVALID_I16x16_PRED_MODE;
+ }
+ if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+ WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
+ if (iCode > MAX_PRED_MODE_ID_CHROMA) {
+ return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
+ }
+ pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
+ } else {
+ WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
+ if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
+ return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
+ }
+ pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
+ }
+ if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
+ || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
+ return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
+ }
+
+ return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& uiEosFlag) {
+ PDqLayer pCurLayer = pCtx->pCurDqLayer;
+ PBitStringAux pBsAux = pCurLayer->pBitStringAux;
+ PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+ SWelsNeighAvail sNeighAvail;
+ int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+ int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+ int32_t iMbXy = pCurLayer->iMbXyIndex;
+ int32_t i;
+ uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
+
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+
+ pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
+ GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
+ WELS_READ_VERIFY (ParseMBTypeISliceCabac (pCtx, &sNeighAvail, uiMbType));
+ if (uiMbType > 25) {
+ return ERR_INFO_INVALID_MB_TYPE;
+ } else if (25 == uiMbType) { //I_PCM
+ WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
+ pSlice->iLastDeltaQp = 0;
+ WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+ if (uiEosFlag) {
+ RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+ }
+ return ERR_NONE;
+ } else if (0 == uiMbType) { //I4x4
+ ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+ pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+ pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+ WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+ //get uiCbp for I4x4
+ WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, &sNeighAvail, uiCbp));
+ pCurLayer->pCbp[iMbXy] = uiCbp;
+ pSlice->iLastDeltaQp = uiCbp == 0 ? 0: pSlice->iLastDeltaQp;
+ uiCbpChroma = uiCbp >> 4;
+ uiCbpLuma = uiCbp & 15;
+ } else { //I16x16;
+ pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+ pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+ pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+ uiCbpChroma = pCurLayer->pCbp[iMbXy] >> 4;
+ uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+ WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
+ WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBsAux, pCurLayer));
+ }
+
+ memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
+ ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+ pCurLayer->pCbfDc[iMbXy] = 0;
+
+ if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRA4x4 (pCurLayer->pMbType[iMbXy])) {
+ pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+ pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQpTable[WELS_CLIP3 ((pCurLayer->pLumaQp[iMbXy] +
+ pSliceHeader->pPps->iChromaQpIndexOffset), 0, 51)];
+ }
+
+ if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ int32_t iQpDelta, iId8x8, iId4x4;
+ WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
+ if (iQpDelta > 25 || iQpDelta < -26) {//out of iQpDelta range
+ return ERR_INFO_INVALID_QP;
+ }
+ pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+ pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+ pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQpTable[WELS_CLIP3 ((pSlice->iLastMbQp +
+ pSliceHeader->pPps->iChromaQpIndexOffset), 0, 51)];
+ if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ //step1: Luma DC
+ WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
+ I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
+ //step2: Luma AC
+ if (uiCbpLuma) {
+ for (i = 0; i < 16; i++) {
+ WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, i,
+ iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC,
+ pCurLayer->pScaledTCoeff[iMbXy] + (i << 4), pCurLayer->pLumaQp[iMbXy], pCtx));
+ }
+ ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ } else { //pNonZeroCount = 0
+ ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+ }
+ } else { //non-MB_TYPE_INTRA16x16
+ for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+ if (uiCbpLuma & (1 << iId8x8)) {
+ int32_t iIdx = (iId8x8 << 2);
+ for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+ //Luma (DC and AC decoding together)
+ WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
+ g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), pCurLayer->pLumaQp[iMbXy],
+ pCtx));
+ iIdx++;
+ }
+ } else {
+ ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
+ ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+ }
+ }
+ ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ }
+ int32_t iMBproperty;
+ //chroma
+ //step1: DC
+ if (1 == uiCbpChroma || 2 == uiCbpChroma) {
+ //Cb Cr
+ for ( i = 0;i<2;i++)
+ {
+ iMBproperty = i ? CHROMA_DC_V : CHROMA_DC_U;
+ WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 16 + (1 << 2), 4, g_kuiChromaDcScan,
+ iMBproperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (1 << 6), pCurLayer->pChromaQp[iMbXy], pCtx));
+ }
+ }
+
+ //step2: AC
+ if (2 == uiCbpChroma) {
+ for (i = 0; i < 2; i++) { //Cb Cr
+ iMBproperty = i ? CHROMA_AC_V : CHROMA_AC_U;
+ int32_t iIdx = 16 + (i << 2);
+ for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+ WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx,
+ iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMBproperty,
+ pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), pCurLayer->pChromaQp[iMbXy], pCtx));
+ iIdx++;
+ }
+ }
+ ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
+ ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
+ ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
+ ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
+ } else {
+ ST16 (&pCurLayer->pNzc[iMbXy][16], 0);
+ ST16 (&pCurLayer->pNzc[iMbXy][20], 0);
+ ST16 (&pCurLayer->pNzc[iMbXy][18], 0);
+ ST16 (&pCurLayer->pNzc[iMbXy][22], 0);
+ }
+ } else {
+ ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+ }
+
+ WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+ if (uiEosFlag) {
+ RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+ }
+ return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCabacISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+ WELS_READ_VERIFY (WelsDecodeMbCabacISliceBaseMode0 (pCtx, uiEosFlag));
+ return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag) {
+ PDqLayer pCurLayer = pCtx->pCurDqLayer;
+ PBitStringAux pBsAux = pCurLayer->pBitStringAux;
+ PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+
+ int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+ int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+ int32_t iMbXy = pCurLayer->iMbXyIndex;
+ int32_t iMBproperty;
+ int32_t i;
+ uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
+
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+
+ pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+
+ WELS_READ_VERIFY (ParseMBTypePSliceCabac (pCtx, pNeighAvail, uiMbType));
+ // uiMbType = 4 is not allowded.
+ if (uiMbType < 4) { //Inter mode
+ int16_t pMotionVector[LIST_A][30][MV_A];
+ int16_t pMvdCache[LIST_A][30][MV_A];
+ int8_t pRefIndex[LIST_A][30];
+ pCurLayer->pMbType[iMbXy] = g_ksInterMbTypeInfo[uiMbType].iType;
+ WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurLayer);
+ WELS_READ_VERIFY (ParseInterMotionInfoCabac (pCtx, pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex));
+ pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ } else { //Intra mode
+ uiMbType -= 5;
+ if (uiMbType > 25) {
+ return ERR_INFO_INVALID_MB_TYPE;
+ }
+
+ if (25 == uiMbType) { //I_PCM
+ WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
+ pSlice->iLastDeltaQp = 0;
+ WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+ if (uiEosFlag) {
+ RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+ }
+ return ERR_NONE;
+ } else { //normal Intra mode
+ if (0 == uiMbType) { //Intra4x4
+ ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+ pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+ pCtx->pFillInfoCacheIntra4x4Func (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+ WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+ } else { //Intra16x16
+ pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+ pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+ pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+ uiCbpChroma = pCurLayer->pCbp[iMbXy] >> 4;
+ uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+ WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
+ WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurLayer));
+ }
+ }
+ }
+
+ if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+ WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp));
+ pCurLayer->pCbp[iMbXy] = uiCbp;
+ pSlice->iLastDeltaQp = uiCbp == 0 ? 0: pSlice->iLastDeltaQp;
+ uiCbpChroma = pCurLayer->pCbp[iMbXy] >> 4;
+ uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+ }
+
+ pCtx->sBlockFunc.pWelsBlockZero16x16Func (pCurLayer->pScaledTCoeff[iMbXy], 16);
+ pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256, 8);
+ pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256 + 64, 8);
+
+ ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+
+ if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ int32_t iQpDelta, iId8x8, iId4x4;
+
+ WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
+ pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+ pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+ pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+ pSliceHeader->pPps->iChromaQpIndexOffset, 0, 51)];
+
+ if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ //step1: Luma DC
+ WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
+ I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
+ //step2: Luma AC
+ if (uiCbpLuma) {
+ for (i = 0; i < 16; i++) {
+ WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart,
+ 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
+ pCurLayer->pLumaQp[iMbXy], pCtx));
+ }
+ ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ } else {
+ ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+ }
+ } else { //non-MB_TYPE_INTRA16x16
+ iMBproperty =(IS_INTRA(pCurLayer->pMbType[iMbXy]))? LUMA_DC_AC_INTRA:LUMA_DC_AC_INTER;
+ for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+ if (uiCbpLuma & (1 << iId8x8)) {
+ int32_t iIdx = (iId8x8 << 2);
+ for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+ //Luma (DC and AC decoding together)
+ WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
+ g_kuiZigzagScan + iScanIdxStart, iMBproperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), pCurLayer->pLumaQp[iMbXy],
+ pCtx));
+ iIdx++;
+ }
+ } else {
+ ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[iId8x8 << 2]], 0);
+ ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+ }
+ }
+ ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ }
+
+ //chroma
+ //step1: DC
+ if (1 == uiCbpChroma || 2 == uiCbpChroma) {
+ for (i = 0; i < 2; i++) {
+ if(IS_INTRA(pCurLayer->pMbType[iMbXy]))
+ iMBproperty = i ? CHROMA_DC_V : CHROMA_DC_U;
+ else
+ iMBproperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
+
+ WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
+ iMBproperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy], pCtx));
+ }
+ }
+ //step2: AC
+ if (2 == uiCbpChroma) {
+ for (i = 0; i < 2; i++) {
+ if(IS_INTRA(pCurLayer->pMbType[iMbXy]))
+ iMBproperty = i ? CHROMA_AC_V : CHROMA_AC_U;
+ else
+ iMBproperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
+ int32_t index = 16 + (i << 2);
+ for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+ WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, index,
+ iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1),
+ iMBproperty, pCurLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurLayer->pChromaQp[iMbXy], pCtx));
+ index++;
+ }
+ }
+ ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
+ ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
+ ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
+ ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
+ } else {
+ ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+ }
+ } else {
+ pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+ pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+ pSliceHeader->pPps->iChromaQpIndexOffset, 0, 51)];
+ }
+
+ WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+ if (uiEosFlag) {
+ RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+ }
+
+ return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCabacPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+ PDqLayer pCurLayer = pCtx->pCurDqLayer;
+ PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+ uint32_t uiCode;
+ int32_t iMbXy = pCurLayer->iMbXyIndex;
+ int32_t i;
+ SWelsNeighAvail uiNeighAvail;
+ pCurLayer->pCbp[iMbXy] = 0;
+ pCurLayer->pCbfDc[iMbXy] = 0;
+ pCurLayer->pChromaPredMode[iMbXy] = C_PRED_DC;
+
+ GetNeighborAvailMbType (&uiNeighAvail, pCurLayer);
+ WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode));
+ if (uiCode) {
+ int16_t pMv[2] = {0};
+ pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP;
+ ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+ ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+
+ pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
+
+ //predict mv
+ PredPSkipMvFromNeighbor (pCurLayer, pMv);
+ for (i = 0; i < 16; i++) {
+ ST32 (pCurLayer->pMv[0][iMbXy][i], * (uint32_t*)pMv);
+ ST32 (pCurLayer->pMvd[0][iMbXy][i], 0);
+ }
+
+ if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
+ memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
+ }
+
+ //reset rS
+ pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb
+ pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+ pSliceHeader->pPps->iChromaQpIndexOffset, 0, 51)];
+
+ //for neighboring CABAC usage
+ pSlice->iLastDeltaQp = 0;
+
+ WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+
+ return ERR_NONE;
+ }
+
+ WELS_READ_VERIFY (WelsDecodeMbCabacPSliceBaseMode0 (pCtx, &uiNeighAvail, uiEosFlag));
+ return ERR_NONE;
+}
+// Calculate deqaunt coeff scaling list value
+int32_t WelsCalcDeqCoeffScalingList(PWelsDecoderContext pCtx)
+{
+ if(pCtx->pSps->bSeqScalingMatrixPresentFlag || pCtx->pPps->bSeqScalingMatrixPresentFlag)
+ {
+ pCtx->bUseScalingList = true;
+ if(!pCtx->bDequant_coeff4x4_Init || (pCtx->iDequant_coeff_ppsid != pCtx->pPps->iPpsId))
+ {
+ int i,q,x;
+ for(i=0;i<6;i++)
+ {
+ pCtx->pDequant_coeff4x4[i]=pCtx->pDequant_coeff_buffer4x4[i];
+
+
+ for(q=0;q<51;q++)
+ {
+ for(x=0;x<16;x++)
+ {
+ pCtx->pDequant_coeff4x4[i][q][x]= pCtx->pPps->bSeqScalingMatrixPresentFlag? pCtx->pPps->iScalingList4x4[i][x]*g_kuiDequantCoeff[q][x&0x07]:pCtx->pSps->iScalingList4x4[i][x]*g_kuiDequantCoeff[q][x&0x07];
+ }
+ x=0;
+
+ }
+ q=0;
+ }
+
+ pCtx->bDequant_coeff4x4_Init = true;
+ }
+ }
+ else
+ pCtx->bUseScalingList = false;
+return ERR_NONE;
+}
+
+int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNalUnit pNalCur) {
+ PDqLayer pCurLayer = pCtx->pCurDqLayer;
+ PFmo pFmo = pCtx->pFmo;
+ int32_t iRet;
+ int32_t iNextMbXyIndex, iSliceIdc;
+
+ PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ PSliceHeaderExt pSliceHeaderExt = &pSlice->sSliceHeaderExt;
+ PSliceHeader pSliceHeader = &pSliceHeaderExt->sSliceHeader;
+ int32_t iMbX, iMbY;
+ const int32_t kiCountNumMb = pSliceHeader->pSps->uiTotalMbCount; //need to be correct when fmo or multi slice
+ uint32_t uiEosFlag = 0;
+ PWelsDecMbFunc pDecMbFunc;
+
+ pSlice->iTotalMbInCurSlice = 0; //initialize at the starting of slice decoding.
+
+ if (pCtx->pPps->bEntropyCodingModeFlag) {
+ if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag ||
+ pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag ||
+ pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag) {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
+ "WelsDecodeSlice()::::ILP flag exist, not supported with CABAC enabled!");
+ pCtx->iErrorCode |= dsBitstreamError;
+ return dsBitstreamError;
+ }
+ if (P_SLICE == pSliceHeader->eSliceType)
+ pDecMbFunc = WelsDecodeMbCabacPSlice;
+ else //I_SLICE. B_SLICE not supported now
+ pDecMbFunc = WelsDecodeMbCabacISlice;
+ } else {
+ if (P_SLICE == pSliceHeader->eSliceType) {
+ pDecMbFunc = WelsDecodeMbCavlcPSlice;
+ } else { //I_SLICE
+ pDecMbFunc = WelsDecodeMbCavlcISlice;
+ }
+ }
+
+ if (pSliceHeader->pPps->bConstainedIntraPredFlag) {
+ pCtx->pFillInfoCacheIntra4x4Func = WelsFillCacheConstrain1Intra4x4;
+ pCtx->pMap4x4NeighToSampleFunc = WelsMap4x4NeighToSampleConstrain1;
+ pCtx->pMap16x16NeighToSampleFunc = WelsMap16x16NeighToSampleConstrain1;
+ } else {
+ pCtx->pFillInfoCacheIntra4x4Func = WelsFillCacheConstrain0Intra4x4;
+ pCtx->pMap4x4NeighToSampleFunc = WelsMap4x4NeighToSampleNormal;
+ pCtx->pMap16x16NeighToSampleFunc = WelsMap16x16NeighToSampleNormal;
+ }
+
+ pCtx->eSliceType = pSliceHeader->eSliceType;
+ if (pCurLayer->sLayerInfo.pPps->bEntropyCodingModeFlag == 1) {
+ int32_t iQp = pSlice->sSliceHeaderExt.sSliceHeader.iSliceQp;
+ int32_t iCabacInitIdc = pSlice->sSliceHeaderExt.sSliceHeader.iCabacInitIdc;
+ WelsCabacContextInit (pCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
+ //InitCabacCtx (pCtx->pCabacCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
+ pSlice->iLastDeltaQp = 0;
+ WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux));
+ }
+ //try to calculate the dequant_coeff
+ WelsCalcDeqCoeffScalingList(pCtx);
+
+ iNextMbXyIndex = pSliceHeader->iFirstMbInSlice;
+ iMbX = iNextMbXyIndex % pCurLayer->iMbWidth;
+ iMbY = iNextMbXyIndex / pCurLayer->iMbWidth; // error is introduced by multiple slices case, 11/23/2009
+ pSlice->iMbSkipRun = -1;
+ iSliceIdc = (pSliceHeader->iFirstMbInSlice << 7) + pCurLayer->uiLayerDqId;
+
+ pCurLayer->iMbX = iMbX;
+ pCurLayer->iMbY = iMbY;
+ pCurLayer->iMbXyIndex = iNextMbXyIndex;
+
+ do {
+ if ((-1 == iNextMbXyIndex) || (iNextMbXyIndex >= kiCountNumMb)) { // slice group boundary or end of a frame
+ break;
+ }
+
+ pCurLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc;
+ iRet = pDecMbFunc (pCtx, pNalCur, uiEosFlag);
+
+ if (iRet != ERR_NONE) {
+ return iRet;
+ }
+
+ ++pSlice->iTotalMbInCurSlice;
+ if (uiEosFlag) { //end of slice
+ break;
+ }
+ if (pSliceHeader->pPps->uiNumSliceGroups > 1) {
+ iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
+ } else {
+ ++iNextMbXyIndex;
+ }
+ iMbX = iNextMbXyIndex % pCurLayer->iMbWidth;
+ iMbY = iNextMbXyIndex / pCurLayer->iMbWidth;
+ pCurLayer->iMbX = iMbX;
+ pCurLayer->iMbY = iMbY;
+ pCurLayer->iMbXyIndex = iNextMbXyIndex;
+ } while (1);
+
+ return ERR_NONE;
+}
+
+int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
+ SVlcTable* pVlcTable = &pCtx->sVlcTable;
+ PDqLayer pCurLayer = pCtx->pCurDqLayer;
+ PBitStringAux pBs = pCurLayer->pBitStringAux;
+ PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+
+ SWelsNeighAvail sNeighAvail;
+
+ int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+ int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+
+ int32_t iMbX = pCurLayer->iMbX;
+ int32_t iMbY = pCurLayer->iMbY;
+ const int32_t iMbXy = pCurLayer->iMbXyIndex;
+ int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+ int32_t i;
+ uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
+ uint32_t uiCode;
+ int32_t iCode;
+
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+ GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
+ pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
+
+ WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
+ uiMbType = uiCode;
+ if (uiMbType > 25) {
+ return ERR_INFO_INVALID_MB_TYPE;
+ }
+
+ if (25 == uiMbType) {
+ int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0];
+ int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1];
+
+ int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4;
+ int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3;
+
+ uint8_t* pDecY = pCurLayer->pDec->pData[0] + iOffsetL;
+ uint8_t* pDecU = pCurLayer->pDec->pData[1] + iOffsetC;
+ uint8_t* pDecV = pCurLayer->pDec->pData[2] + iOffsetC;
+
+ uint8_t* pTmpBsBuf;
+ int32_t iMBproperty;
+
+ int32_t i;
+ int32_t iCopySizeY = (sizeof (uint8_t) << 4);
+ int32_t iCopySizeUV = (sizeof (uint8_t) << 3);
+
+ int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2;
+
+ pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
+
+ //step 1: locating bit-stream pointer [must align into integer byte]
+ pBs->pCurBuf -= iIndex;
+
+ //step 2: copy pixel from bit-stream into fdec [reconstruction]
+ pTmpBsBuf = pBs->pCurBuf;
+ for (i = 0; i < 16; i++) { //luma
+ memcpy (pDecY , pTmpBsBuf, iCopySizeY);
+ pDecY += iDecStrideL;
+ pTmpBsBuf += 16;
+ }
+ for (i = 0; i < 8; i++) { //cb
+ memcpy (pDecU, pTmpBsBuf, iCopySizeUV);
+ pDecU += iDecStrideC;
+ pTmpBsBuf += 8;
+ }
+ for (i = 0; i < 8; i++) { //cr
+ memcpy (pDecV, pTmpBsBuf, iCopySizeUV);
+ pDecV += iDecStrideC;
+ pTmpBsBuf += 8;
+ }
+
+ pBs->pCurBuf += 384;
+
+ //step 3: update QP and pNonZeroCount
+ pCurLayer->pLumaQp[iMbXy] = 0;
+ pCurLayer->pChromaQp[iMbXy] = 0;
+ memset (pNzc, 16, sizeof (pCurLayer->pNzc[iMbXy])); //Rec. 9.2.1 for PCM, nzc=16
+ WELS_READ_VERIFY(InitReadBits (pBs, 0));
+ return 0;
+ } else if (0 == uiMbType) { //reference to JM
+ ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+ pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+ pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+ WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+
+ //uiCbp
+ WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
+ uiCbp = uiCode;
+ //G.9.1 Alternative parsing process for coded pBlock pattern
+ if (uiCbp > 47)
+ return ERR_INFO_INVALID_CBP;
+
+ uiCbp = g_kuiIntra4x4CbpTable[uiCbp];
+
+ pCurLayer->pCbp[iMbXy] = uiCbp;
+ uiCbpC = uiCbp >> 4;
+ uiCbpL = uiCbp & 15;
+ } else { //I_PCM exclude, we can ignore it
+ pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+ pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+ pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+ uiCbpC = pCurLayer->pCbp[iMbXy] >> 4;
+ uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
+ WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
+ WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer));
+ }
+
+ memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
+ ST32A4 (&pNzc[0], 0);
+ ST32A4 (&pNzc[4], 0);
+ ST32A4 (&pNzc[8], 0);
+ ST32A4 (&pNzc[12], 0);
+ ST32A4 (&pNzc[16], 0);
+ ST32A4 (&pNzc[20], 0);
+
+ if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRA4x4 (pCurLayer->pMbType[iMbXy])) {
+ pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+ pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+ pSliceHeader->pPps->iChromaQpIndexOffset, 0, 51)];
+
+ }
+
+ if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ int32_t iQpDelta, iId8x8, iId4x4;
+
+ WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
+ iQpDelta = iCode;
+
+ if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
+ return ERR_INFO_INVALID_QP;
+ }
+
+ pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+ pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+ pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+ pSliceHeader->pPps->iChromaQpIndexOffset, 0,
+ 51)];
+
+
+ BsStartCavlc (pBs);
+
+ if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ //step1: Luma DC
+ if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16,
+ g_kuiLumaDcZigzagScan, I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)) {
+ return -1;//abnormal
+ }
+ //step2: Luma AC
+ if (uiCbpL) {
+ for (i = 0; i < 16; i++) {
+ if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i,
+ iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1),
+ I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4), pCurLayer->pLumaQp[iMbXy], pCtx)) {
+ return -1;//abnormal
+ }
+ }
+ ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ }
+ } else { //non-MB_TYPE_INTRA16x16
+ for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+ if (uiCbpL & (1 << iId8x8)) {
+ int32_t iIndex = (iId8x8 << 2);
+ for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+ //Luma (DC and AC decoding together)
+ if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex,
+ iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan + iScanIdxStart,
+ LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), pCurLayer->pLumaQp[iMbXy], pCtx)) {
+ return -1;//abnormal
+ }
+ iIndex++;
+ }
+ } else {
+ ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2)]], 0);
+ ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
+ }
+ }
+ ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ }
+
+ //chroma
+ //step1: DC
+ if (1 == uiCbpC || 2 == uiCbpC) {
+ for (i = 0; i < 2; i++) { //Cb Cr
+ iMBproperty = i ? CHROMA_DC_V : CHROMA_DC_U;
+ if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs,
+ 16 + (i << 2), 4, g_kuiChromaDcScan, CHROMA_DC, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6),
+ pCurLayer->pChromaQp[iMbXy], pCtx)) {
+ return -1;//abnormal
+ }
+ }
+ }
+
+ //step2: AC
+ if (2 == uiCbpC) {
+ for (i = 0; i < 2; i++) { //Cb Cr
+ iMBproperty = i? CHROMA_AC_V : CHROMA_AC_U;
+ int32_t iIndex = 16 + (i << 2);
+ for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+ if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex,
+ iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1),
+ CHROMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), pCurLayer->pChromaQp[iMbXy], pCtx)) {
+ return -1;//abnormal
+ }
+ iIndex++;
+ }
+ }
+ ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1]));
+ ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2]));
+ ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4]));
+ ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5]));
+ }
+ BsEndCavlc (pBs);
+ }
+
+ return 0;
+}
+
+int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+ PDqLayer pCurLayer = pCtx->pCurDqLayer;
+ PBitStringAux pBs = pCurLayer->pBitStringAux;
+ PSliceHeaderExt pSliceHeaderExt = &pCurLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt;
+ int32_t iBaseModeFlag;
+ int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
+ uint32_t uiCode;
+ intX_t iUsedBits;
+ if (pSliceHeaderExt->bAdaptiveBaseModeFlag == 1) {
+ WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
+ iBaseModeFlag = uiCode;
+ } else {
+ iBaseModeFlag = pSliceHeaderExt->bDefaultBaseModeFlag;
+ }
+ if (!iBaseModeFlag) {
+ iRet = WelsActualDecodeMbCavlcISlice (pCtx);
+ } else {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.",
+ iBaseModeFlag);
+ return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
+ }
+ if (iRet) { //occur error when parsing, MUST STOP decoding
+ return iRet;
+ }
+
+ // check whether there is left bits to read next time in case multiple slices
+ iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
+ // sub 1, for stop bit
+ if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
+ uiEosFlag = 1;
+ }
+ if (iUsedBits > (pBs->iBits -
+ 1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+ "WelsDecodeMbCavlcISlice()::::pBs incomplete, iUsedBits:%"PRId64" > pBs->iBits:%d, MUST stop decoding.",
+ (int64_t) iUsedBits, pBs->iBits);
+ return -1;
+ }
+ return 0;
+}
+
+int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
+ SVlcTable* pVlcTable = &pCtx->sVlcTable;
+ PDqLayer pCurLayer = pCtx->pCurDqLayer;
+ PBitStringAux pBs = pCurLayer->pBitStringAux;
+ PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+
+ int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+ int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+
+ SWelsNeighAvail sNeighAvail;
+ int32_t iMbX = pCurLayer->iMbX;
+ int32_t iMbY = pCurLayer->iMbY;
+ const int32_t iMbXy = pCurLayer->iMbXyIndex;
+ int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+ int32_t i;
+ uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
+ uint32_t uiCode;
+ int32_t iCode;
+ int32_t iMBproperty;
+
+ GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+ pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23
+ WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
+ uiMbType = uiCode;
+ if (uiMbType < 5) { //inter MB type
+ int16_t iMotionVector[LIST_A][30][MV_A];
+ int8_t iRefIndex[LIST_A][30];
+ pCurLayer->pMbType[iMbXy] = g_ksInterMbTypeInfo[uiMbType].iType;
+ WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurLayer);
+ if (ParseInterInfo (pCtx, iMotionVector, iRefIndex, pBs)) {
+ return -1;//abnormal
+ }
+
+ if (pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag == 1) {
+ WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //residual_prediction_flag
+ pCurLayer->pResidualPredFlag[iMbXy] = uiCode;
+ } else {
+ pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
+ }
+
+ if (pCurLayer->pResidualPredFlag[iMbXy] == 0) {
+ pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ } else {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "residual_pred_flag = 1 not supported.");
+ return -1;
+ }
+ } else { //intra MB type
+ uiMbType -= 5;
+ if (uiMbType > 25) {
+ return ERR_INFO_INVALID_MB_TYPE;
+ }
+
+ if (25 == uiMbType) {
+ int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0];
+ int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1];
+
+ int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4;
+ int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3;
+
+ uint8_t* pDecY = pCurLayer->pDec->pData[0] + iOffsetL;
+ uint8_t* pDecU = pCurLayer->pDec->pData[1] + iOffsetC;
+ uint8_t* pDecV = pCurLayer->pDec->pData[2] + iOffsetC;
+
+ uint8_t* pTmpBsBuf;
+
+ int32_t i;
+ int32_t iCopySizeY = (sizeof (uint8_t) << 4);
+ int32_t iCopySizeUV = (sizeof (uint8_t) << 3);
+
+ int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2;
+
+ pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
+
+ //step 1: locating bit-stream pointer [must align into integer byte]
+ pBs->pCurBuf -= iIndex;
+
+ //step 2: copy pixel from bit-stream into fdec [reconstruction]
+ pTmpBsBuf = pBs->pCurBuf;
+ for (i = 0; i < 16; i++) { //luma
+ memcpy (pDecY , pTmpBsBuf, iCopySizeY);
+ pDecY += iDecStrideL;
+ pTmpBsBuf += 16;
+ }
+
+ for (i = 0; i < 8; i++) { //cb
+ memcpy (pDecU, pTmpBsBuf, iCopySizeUV);
+ pDecU += iDecStrideC;
+ pTmpBsBuf += 8;
+ }
+ for (i = 0; i < 8; i++) { //cr
+ memcpy (pDecV, pTmpBsBuf, iCopySizeUV);
+ pDecV += iDecStrideC;
+ pTmpBsBuf += 8;
+ }
+
+ pBs->pCurBuf += 384;
+
+ //step 3: update QP and pNonZeroCount
+ pCurLayer->pLumaQp[iMbXy] = 0;
+ pCurLayer->pChromaQp[iMbXy] = 0;
+ //Rec. 9.2.1 for PCM, nzc=16
+ ST32A4 (&pNzc[0], 0x10101010);
+ ST32A4 (&pNzc[4], 0x10101010);
+ ST32A4 (&pNzc[8], 0x10101010);
+ ST32A4 (&pNzc[12], 0x10101010);
+ ST32A4 (&pNzc[16], 0x10101010);
+ ST32A4 (&pNzc[20], 0x10101010);
+ WELS_READ_VERIFY (InitReadBits (pBs, 0));
+ return 0;
+ } else {
+ if (0 == uiMbType) {
+ ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+ pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+ pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+ if (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer)) {
+ return -1;
+ }
+ } else { //I_PCM exclude, we can ignore it
+ pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+ pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+ pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+ uiCbpC = pCurLayer->pCbp[iMbXy] >> 4;
+ uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
+ WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
+ if (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer)) {
+ return -1;
+ }
+ }
+ }
+ }
+
+ if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+ WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
+ uiCbp = uiCode;
+ {
+ if (uiCbp > 47)
+ return ERR_INFO_INVALID_CBP;
+
+ if (MB_TYPE_INTRA4x4 == pCurLayer->pMbType[iMbXy]) {
+ uiCbp = g_kuiIntra4x4CbpTable[uiCbp];
+ } else //inter
+ uiCbp = g_kuiInterCbpTable[uiCbp];
+ }
+
+ pCurLayer->pCbp[iMbXy] = uiCbp;
+ uiCbpC = pCurLayer->pCbp[iMbXy] >> 4;
+ uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
+ }
+
+ memset (pCurLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof (int16_t));
+
+ ST32A4 (&pNzc[0], 0);
+ ST32A4 (&pNzc[4], 0);
+ ST32A4 (&pNzc[8], 0);
+ ST32A4 (&pNzc[12], 0);
+ ST32A4 (&pNzc[16], 0);
+ ST32A4 (&pNzc[20], 0);
+ if (pCurLayer->pCbp[iMbXy] == 0 && !IS_INTRA16x16 (pCurLayer->pMbType[iMbXy]) && !IS_I_BL (pCurLayer->pMbType[iMbXy])) {
+ pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+ pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+ pSliceHeader->pPps->iChromaQpIndexOffset, 0, 51)];
+ }
+
+ if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ int32_t iQpDelta, iId8x8, iId4x4;
+
+ WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
+ iQpDelta = iCode;
+
+ if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
+ return ERR_INFO_INVALID_QP;
+ }
+
+ pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+ pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+ pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+ pSliceHeader->pPps->iChromaQpIndexOffset, 0,
+ 51)];
+
+ BsStartCavlc (pBs);
+
+ if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+ //step1: Luma DC
+ if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan,
+ I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)) {
+ return -1;//abnormal
+ }
+ //step2: Luma AC
+ if (uiCbpL) {
+ for (i = 0; i < 16; i++) {
+ if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i,
+ iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1),
+ I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4), pCurLayer->pLumaQp[iMbXy], pCtx)) {
+ return -1;//abnormal
+ }
+ }
+ ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ }
+ } else { //non-MB_TYPE_INTRA16x16
+ for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+ iMBproperty = (IS_INTRA(pCurLayer->pMbType[iMbXy]))? LUMA_DC_AC_INTRA:LUMA_DC_AC_INTER;
+ if (uiCbpL & (1 << iId8x8)) {
+ int32_t iIndex = (iId8x8 << 2);
+ for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+ //Luma (DC and AC decoding together)
+ if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex,
+ iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan + iScanIdxStart, iMBproperty,
+ pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), pCurLayer->pLumaQp[iMbXy], pCtx)) {
+ return -1;//abnormal
+ }
+ iIndex++;
+ }
+ } else {
+ ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
+ ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
+ }
+ }
+ ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ }
+
+
+ //chroma
+ //step1: DC
+ if (1 == uiCbpC || 2 == uiCbpC) {
+ for (i = 0; i < 2; i++) { //Cb Cr
+ if(IS_INTRA(pCurLayer->pMbType[iMbXy]))
+ iMBproperty = i ? CHROMA_DC_V : CHROMA_DC_U;
+ else
+ iMBproperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_V_INTER;
+
+ if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs,
+ 16 + (i << 2), 4, g_kuiChromaDcScan, iMBproperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6),
+ pCurLayer->pChromaQp[iMbXy], pCtx)) {
+ return -1;//abnormal
+ }
+ }
+ } else {
+ }
+ //step2: AC
+ if (2 == uiCbpC) {
+ for (i = 0; i < 2; i++) { //Cb Cr
+ if(IS_INTRA(pCurLayer->pMbType[iMbXy]))
+ iMBproperty = i ? CHROMA_AC_V : CHROMA_AC_U;
+ else
+ iMBproperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_V_INTER;
+
+ int32_t iIndex = 16 + (i << 2);
+ for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+ if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex,
+ iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1),
+ iMBproperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), pCurLayer->pChromaQp[iMbXy], pCtx)) {
+ return -1;//abnormal
+ }
+ iIndex++;
+ }
+ }
+ ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1]));
+ ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2]));
+ ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4]));
+ ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5]));
+ }
+ BsEndCavlc (pBs);
+ }
+
+ return 0;
+}
+
+int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+ PDqLayer pCurLayer = pCtx->pCurDqLayer;
+ PBitStringAux pBs = pCurLayer->pBitStringAux;
+ PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+ PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+ intX_t iUsedBits;
+ const int32_t iMbXy = pCurLayer->iMbXyIndex;
+ int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+ int32_t iBaseModeFlag, i;
+ int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
+ uint32_t uiCode;
+
+ if (-1 == pSlice->iMbSkipRun) {
+ WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_skip_run
+ pSlice->iMbSkipRun = uiCode;
+ if (-1 == pSlice->iMbSkipRun) {
+ return -1;
+ }
+ }
+ if (pSlice->iMbSkipRun--) {
+ int16_t iMv[2];
+
+ pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP;
+ ST32A4 (&pNzc[0], 0);
+ ST32A4 (&pNzc[4], 0);
+ ST32A4 (&pNzc[8], 0);
+ ST32A4 (&pNzc[12], 0);
+ ST32A4 (&pNzc[16], 0);
+ ST32A4 (&pNzc[20], 0);
+
+ pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+ memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
+
+ //predict iMv
+ PredPSkipMvFromNeighbor (pCurLayer, iMv);
+ for (i = 0; i < 16; i++) {
+ ST32A2 (pCurLayer->pMv[0][iMbXy][i], * (uint32_t*)iMv);
+ }
+
+ if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
+ memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
+ }
+
+ //reset rS
+ if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag ||
+ (pNalCur->sNalHeaderExt.uiQualityId == 0 && pNalCur->sNalHeaderExt.uiDependencyId == 0)) {
+ pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+ pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+ pSliceHeader->pPps->iChromaQpIndexOffset, 0, 51)];
+ }
+
+ pCurLayer->pCbp[iMbXy] = 0;
+ } else {
+ if (pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag == 1) {
+ WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
+ iBaseModeFlag = uiCode;
+ } else {
+ iBaseModeFlag = pSlice->sSliceHeaderExt.bDefaultBaseModeFlag;
+ }
+ if (!iBaseModeFlag) {
+ iRet = WelsActualDecodeMbCavlcPSlice (pCtx);
+ } else {
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.",
+ iBaseModeFlag);
+ return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
+ }
+ if (iRet) { //occur error when parsing, MUST STOP decoding
+ return iRet;
+ }
+ }
+ // check whether there is left bits to read next time in case multiple slices
+ iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
+ // sub 1, for stop bit
+ if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
+ uiEosFlag = 1;
+ }
+ if (iUsedBits > (pBs->iBits -
+ 1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
+ WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+ "WelsDecodeMbCavlcISlice()::::pBs incomplete, iUsedBits:%"PRId64" > pBs->iBits:%d, MUST stop decoding.",
+ (int64_t) iUsedBits, pBs->iBits);
+ return -1;
+ }
+ return 0;
+}
+
+void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) {
+ pFunc->pWelsSetNonZeroCountFunc = SetNonZeroCount_c;
+
+#ifdef HAVE_NEON
+ if (iCpu & WELS_CPU_NEON) {
+ pFunc->pWelsSetNonZeroCountFunc = SetNonZeroCount_neon;
+ }
+#endif
+
+#ifdef HAVE_NEON_AARCH64
+ if (iCpu & WELS_CPU_NEON) {
+ pFunc->pWelsSetNonZeroCountFunc = SetNonZeroCount_AArch64_neon;
+ }
+#endif
+
+ pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_c;
+ pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_c;
+ //TO DO add neon and X86
+#ifdef HAVE_NEON
+ if (iCpu & WELS_CPU_NEON) {
+ pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_neon;
+ pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_neon;
+ }
+#endif
+
+#ifdef HAVE_NEON_AARCH64
+ if (iCpu & WELS_CPU_NEON) {
+ pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_AArch64_neon;
+ pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_AArch64_neon;
+ }
+#endif
+
+#if defined(X86_ASM)
+ if (iCpu & WELS_CPU_SSE2) {
+ pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_sse2;
+ pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_sse2;
+ }
+#endif
+
+}
+
+void SetNonZeroCount_c (int8_t* pNonZeroCount) {
+ int32_t i;
+
+ for (i = 0; i < 24; i++) {
+ pNonZeroCount[i] = !!pNonZeroCount[i];
+ }
+}
+
+void WelsBlockInit (int16_t* pBlock, int iW, int iH, int iStride, uint8_t uiVal) {
+ int32_t i;
+ int16_t* pDst = pBlock;
+
+ for (i = 0; i < iH; i++) {
+ memset (pDst, uiVal, iW * sizeof (int16_t));
+ pDst += iStride;
+ }
+}
+void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride) {
+ WelsBlockInit (pBlock, 16, 16, iStride, 0);
+}
+
+void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride) {
+ WelsBlockInit (pBlock, 8, 8, iStride, 0);
+}
+
+} // namespace WelsDec